author: Dimitry Andric <dim@FreeBSD.org> 2016-12-26 20:36:37 +0000
committer: Dimitry Andric <dim@FreeBSD.org> 2016-12-26 20:36:37 +0000
commit: b6d42e34c27d79488e27db71466f4e5cece05910 (patch)
tree: ab60b4cdd6e430dda1f292a46a77ddb744723f31 /contrib/llvm/tools/clang/lib
parent: d76705554f5443404be5a5e89f2f5f5ebf42cf98 (diff)
download: src-test2-b6d42e34c27d79488e27db71466f4e5cece05910.tar.gz
src-test2-b6d42e34c27d79488e27db71466f4e5cece05910.zip
433 files changed, 104994 insertions, 29236 deletions
diff --git a/contrib/llvm/tools/clang/lib/ARCMigrate/ARCMT.cpp b/contrib/llvm/tools/clang/lib/ARCMigrate/ARCMT.cpp
index 8c04c8371cef..da93d8418e78 100644
--- a/contrib/llvm/tools/clang/lib/ARCMigrate/ARCMT.cpp
+++ b/contrib/llvm/tools/clang/lib/ARCMigrate/ARCMT.cpp
@@ -21,6 +21,7 @@
 #include "clang/Serialization/ASTReader.h"
 #include "llvm/ADT/Triple.h"
 #include "llvm/Support/MemoryBuffer.h"
+#include <utility>
 using namespace clang;
 using namespace arcmt;
 
@@ -508,8 +509,8 @@ MigrationProcess::MigrationProcess(
     const CompilerInvocation &CI,
     std::shared_ptr<PCHContainerOperations> PCHContainerOps,
     DiagnosticConsumer *diagClient, StringRef outputDir)
-    : OrigCI(CI), PCHContainerOps(PCHContainerOps), DiagClient(diagClient),
-      HadARCErrors(false) {
+    : OrigCI(CI), PCHContainerOps(std::move(PCHContainerOps)),
+      DiagClient(diagClient), HadARCErrors(false) {
   if (!outputDir.empty()) {
     IntrusiveRefCntPtr<DiagnosticIDs> DiagID(new DiagnosticIDs());
     IntrusiveRefCntPtr<DiagnosticsEngine> Diags(
diff --git a/contrib/llvm/tools/clang/lib/ARCMigrate/ARCMTActions.cpp b/contrib/llvm/tools/clang/lib/ARCMigrate/ARCMTActions.cpp
index 39a922f426c3..0a5473ab19ec 100644
--- a/contrib/llvm/tools/clang/lib/ARCMigrate/ARCMTActions.cpp
+++ b/contrib/llvm/tools/clang/lib/ARCMigrate/ARCMTActions.cpp
@@ -25,8 +25,8 @@ bool CheckAction::BeginInvocation(CompilerInstance &CI) {
   return true;
 }
 
-CheckAction::CheckAction(FrontendAction *WrappedAction)
-  : WrapperFrontendAction(WrappedAction) {}
+CheckAction::CheckAction(std::unique_ptr<FrontendAction> WrappedAction)
+  : WrapperFrontendAction(std::move(WrappedAction)) {}
 
 bool ModifyAction::BeginInvocation(CompilerInstance &CI) {
   return !arcmt::applyTransformations(CI.getInvocation(), getCurrentInput(),
@@ -34,8 +34,8 @@ bool ModifyAction::BeginInvocation(CompilerInstance &CI) {
                                       CI.getDiagnostics().getClient());
 }
 
-ModifyAction::ModifyAction(FrontendAction *WrappedAction)
-  : WrapperFrontendAction(WrappedAction) {}
+ModifyAction::ModifyAction(std::unique_ptr<FrontendAction> WrappedAction)
+  : WrapperFrontendAction(std::move(WrappedAction)) {}
 
 bool MigrateAction::BeginInvocation(CompilerInstance &CI) {
   if (arcmt::migrateWithTemporaryFiles(
@@ -49,11 +49,11 @@ bool MigrateAction::BeginInvocation(CompilerInstance &CI) {
   return true;
 }
 
-MigrateAction::MigrateAction(FrontendAction *WrappedAction,
+MigrateAction::MigrateAction(std::unique_ptr<FrontendAction> WrappedAction,
                              StringRef migrateDir,
                              StringRef plistOut,
                              bool emitPremigrationARCErrors)
-  : WrapperFrontendAction(WrappedAction), MigrateDir(migrateDir),
+  : WrapperFrontendAction(std::move(WrappedAction)), MigrateDir(migrateDir),
     PlistOut(plistOut), EmitPremigrationARCErros(emitPremigrationARCErrors) {
   if (MigrateDir.empty())
     MigrateDir = "."; // user current directory if none is given.
diff --git a/contrib/llvm/tools/clang/lib/ARCMigrate/ObjCMT.cpp b/contrib/llvm/tools/clang/lib/ARCMigrate/ObjCMT.cpp
index 50b113660d3a..241a7246b621 100644
--- a/contrib/llvm/tools/clang/lib/ARCMigrate/ObjCMT.cpp
+++ b/contrib/llvm/tools/clang/lib/ARCMigrate/ObjCMT.cpp
@@ -177,12 +177,13 @@ protected:
   }
 };
 
-}
+} // end anonymous namespace
 
-ObjCMigrateAction::ObjCMigrateAction(FrontendAction *WrappedAction,
+ObjCMigrateAction::ObjCMigrateAction(
+                                  std::unique_ptr<FrontendAction> WrappedAction,
                                      StringRef migrateDir,
                                      unsigned migrateAction)
-  : WrapperFrontendAction(WrappedAction), MigrateDir(migrateDir),
+  : WrapperFrontendAction(std::move(WrappedAction)), MigrateDir(migrateDir),
     ObjCMigAction(migrateAction),
     CompInst(nullptr) {
   if (MigrateDir.empty())
@@ -306,7 +307,6 @@ namespace {
     }
     return true;
   }
-  
 
 class ObjCMigrator : public RecursiveASTVisitor<ObjCMigrator> {
   ObjCMigrateASTConsumer &Consumer;
@@ -369,7 +369,7 @@ public:
     return true;
   }
 };
-}
+} // end anonymous namespace
 
 void ObjCMigrateASTConsumer::migrateDecl(Decl *D) {
   if (!D)
@@ -588,7 +588,7 @@ void ObjCMigrateASTConsumer::migrateObjCContainerDecl(ASTContext &Ctx,
   if (!(ASTMigrateActions & FrontendOptions::ObjCMT_ReturnsInnerPointerProperty))
     return;
   
-  for (auto *Prop : D->properties()) {
+  for (auto *Prop : D->instance_properties()) {
     if ((ASTMigrateActions & FrontendOptions::ObjCMT_Annotation) &&
         !Prop->isDeprecated())
       migratePropertyNsReturnsInnerPointer(Ctx, Prop);
@@ -605,7 +605,7 @@ ClassImplementsAllMethodsAndProperties(ASTContext &Ctx,
   // in class interface.
   bool HasAtleastOneRequiredProperty = false;
   if (const ObjCProtocolDecl *PDecl = Protocol->getDefinition())
-    for (const auto *Property : PDecl->properties()) {
+    for (const auto *Property : PDecl->instance_properties()) {
       if (Property->getPropertyImplementation() == ObjCPropertyDecl::Optional)
         continue;
       HasAtleastOneRequiredProperty = true;
@@ -615,7 +615,8 @@ ClassImplementsAllMethodsAndProperties(ASTContext &Ctx,
         // or dynamic declaration. Class is implementing a property coming from
         // another protocol. This still makes the target protocol as conforming.
         if (!ImpDecl->FindPropertyImplDecl(
-                                  Property->getDeclName().getAsIdentifierInfo()))
+                                  Property->getDeclName().getAsIdentifierInfo(),
+                                  Property->getQueryKind()))
           return false;
       }
       else if (ObjCPropertyDecl *ClassProperty = dyn_cast<ObjCPropertyDecl>(R[0])) {
@@ -770,23 +771,11 @@ static void rewriteToNSMacroDecl(ASTContext &Ctx,
   ClassString += ", ";
   
   ClassString += TypedefDcl->getIdentifier()->getName();
-  ClassString += ')';
-  SourceLocation EndLoc;
-  if (EnumDcl->getIntegerTypeSourceInfo()) {
-    TypeSourceInfo *TSourceInfo = EnumDcl->getIntegerTypeSourceInfo();
-    TypeLoc TLoc = TSourceInfo->getTypeLoc();
-    EndLoc = TLoc.getLocEnd();
-    const char *lbrace = Ctx.getSourceManager().getCharacterData(EndLoc);
-    unsigned count = 0;
-    if (lbrace)
-      while (lbrace[count] != '{')
-        ++count;
-    if (count > 0)
-      EndLoc = EndLoc.getLocWithOffset(count-1);
-  }
-  else
-    EndLoc = EnumDcl->getLocStart();
-  SourceRange R(EnumDcl->getLocStart(), EndLoc);
+  ClassString += ") ";
+  SourceLocation EndLoc = EnumDcl->getBraceRange().getBegin();
+  if (EndLoc.isInvalid())
+    return;
+  CharSourceRange R = CharSourceRange::getCharRange(EnumDcl->getLocStart(), EndLoc);
   commit.replace(R, ClassString);
   // This is to remove spaces between '}' and typedef name.
   SourceLocation StartTypedefLoc = EnumDcl->getLocEnd();
@@ -1104,7 +1093,6 @@ static bool AvailabilityAttrsMatch(Attr *At1, Attr *At2) {
           versionsMatch(Deprecated1, Deprecated2) &&
           versionsMatch(Obsoleted1, Obsoleted2) &&
           IsUnavailable1 == IsUnavailable2);
-  
 }
 
 static bool MatchTwoAttributeLists(const AttrVec &Attrs1, const AttrVec &Attrs2,
@@ -1509,7 +1497,6 @@ void ObjCMigrateASTConsumer::AddCFAnnotations(ASTContext &Ctx,
   }
 }
 
-
 ObjCMigrateASTConsumer::CF_BRIDGING_KIND
   ObjCMigrateASTConsumer::migrateAddFunctionAnnotation(
                                                   ASTContext &Ctx,
@@ -1683,7 +1670,6 @@ void ObjCMigrateASTConsumer::migrateAddMethodAnnotation(
       return;
     }
   }
-  return;
 }
 
 namespace {
@@ -1700,7 +1686,7 @@ public:
     return true;
   }
 };
-} // anonymous namespace
+} // end anonymous namespace
 
 static bool hasSuperInitCall(const ObjCMethodDecl *MD) {
   return !SuperInitChecker().TraverseStmt(MD->getBody());
@@ -1739,6 +1725,11 @@ bool ObjCMigrateASTConsumer::InsertFoundation(ASTContext &Ctx,
     return true;
   if (Loc.isInvalid())
     return false;
+  auto *nsEnumId = &Ctx.Idents.get("NS_ENUM");
+  if (PP.getMacroDefinitionAtLoc(nsEnumId, Loc)) {
+    FoundationIncluded = true;
+    return true;
+  }
   edit::Commit commit(*Editor);
   if (Ctx.getLangOpts().Modules)
     commit.insert(Loc, "#ifndef NS_ENUM\n@import Foundation;\n#endif\n");
@@ -1841,7 +1832,7 @@ private:
   }
 };
 
-}
+} // end anonymous namespace
 
 void ObjCMigrateASTConsumer::HandleTranslationUnit(ASTContext &Ctx) {
   
@@ -1897,18 +1888,20 @@ void ObjCMigrateASTConsumer::HandleTranslationUnit(ASTContext &Ctx) {
         if (++N == DEnd)
           continue;
         if (const EnumDecl *ED = dyn_cast<EnumDecl>(*N)) {
-          if (++N != DEnd)
-            if (const TypedefDecl *TDF = dyn_cast<TypedefDecl>(*N)) {
-              // prefer typedef-follows-enum to enum-follows-typedef pattern.
-              if (migrateNSEnumDecl(Ctx, ED, TDF)) {
-                ++D; ++D;
-                CacheObjCNSIntegerTypedefed(TD);
-                continue;
+          if (canModify(ED)) {
+            if (++N != DEnd)
+              if (const TypedefDecl *TDF = dyn_cast<TypedefDecl>(*N)) {
+                // prefer typedef-follows-enum to enum-follows-typedef pattern.
+                if (migrateNSEnumDecl(Ctx, ED, TDF)) {
+                  ++D; ++D;
+                  CacheObjCNSIntegerTypedefed(TD);
+                  continue;
+                }
               }
+            if (migrateNSEnumDecl(Ctx, ED, TD)) {
+              ++D;
+              continue;
             }
-          if (migrateNSEnumDecl(Ctx, ED, TD)) {
-            ++D;
-            continue;
           }
         }
         CacheObjCNSIntegerTypedefed(TD);
@@ -2042,7 +2035,7 @@ struct EditEntry {
 
   EditEntry() : File(), Offset(), RemoveLen() {}
 };
-}
+} // end anonymous namespace
 
 namespace llvm {
 template<> struct DenseMapInfo<EditEntry> {
@@ -2071,7 +2064,7 @@ template<> struct DenseMapInfo<EditEntry> {
         LHS.Text == RHS.Text;
   }
 };
-}
+} // end namespace llvm
 
 namespace {
 class RemapFileParser {
@@ -2153,7 +2146,7 @@ private:
       Entries.push_back(Entry);
   }
 };
-}
+} // end anonymous namespace
 
 static bool reportDiag(const Twine &Err, DiagnosticsEngine &Diag) {
   Diag.Report(Diag.getCustomDiagID(DiagnosticsEngine::Error, "%0"))
diff --git a/contrib/llvm/tools/clang/lib/ARCMigrate/TransProperties.cpp b/contrib/llvm/tools/clang/lib/ARCMigrate/TransProperties.cpp
index 8667bc2a37da..389b03666bf7 100644
--- a/contrib/llvm/tools/clang/lib/ARCMigrate/TransProperties.cpp
+++ b/contrib/llvm/tools/clang/lib/ARCMigrate/TransProperties.cpp
@@ -76,7 +76,7 @@ public:
 
   static void collectProperties(ObjCContainerDecl *D, AtPropDeclsTy &AtProps,
                                 AtPropDeclsTy *PrevAtProps = nullptr) {
-    for (auto *Prop : D->properties()) {
+    for (auto *Prop : D->instance_properties()) {
       if (Prop->getAtLoc().isInvalid())
         continue;
       unsigned RawLoc = Prop->getAtLoc().getRawEncoding();
diff --git a/contrib/llvm/tools/clang/lib/AST/APValue.cpp b/contrib/llvm/tools/clang/lib/AST/APValue.cpp
index 91f1e20d73b6..3c587331ed07 100644
--- a/contrib/llvm/tools/clang/lib/AST/APValue.cpp
+++ b/contrib/llvm/tools/clang/lib/AST/APValue.cpp
@@ -255,7 +255,7 @@ void APValue::swap(APValue &RHS) {
   memcpy(RHS.Data.buffer, TmpData, DataSize);
 }
 
-void APValue::dump() const {
+LLVM_DUMP_METHOD void APValue::dump() const {
   dump(llvm::errs());
   llvm::errs() << '\n';
 }
diff --git a/contrib/llvm/tools/clang/lib/AST/ASTConsumer.cpp b/contrib/llvm/tools/clang/lib/AST/ASTConsumer.cpp
index cff82e9b1003..55033b238c66 100644
--- a/contrib/llvm/tools/clang/lib/AST/ASTConsumer.cpp
+++ b/contrib/llvm/tools/clang/lib/AST/ASTConsumer.cpp
@@ -12,7 +12,6 @@
 //===----------------------------------------------------------------------===//
 
 #include "clang/AST/ASTConsumer.h"
-#include "llvm/Bitcode/BitstreamReader.h"
 #include "clang/AST/Decl.h"
 #include "clang/AST/DeclGroup.h"
 using namespace clang;
diff --git a/contrib/llvm/tools/clang/lib/AST/ASTContext.cpp b/contrib/llvm/tools/clang/lib/AST/ASTContext.cpp
index 64386967b220..6aad4d1d570b 100644
--- a/contrib/llvm/tools/clang/lib/AST/ASTContext.cpp
+++ b/contrib/llvm/tools/clang/lib/AST/ASTContext.cpp
@@ -58,7 +58,7 @@ unsigned ASTContext::NumImplicitDestructors;
 unsigned ASTContext::NumImplicitDestructorsDeclared;
 
 enum FloatingRank {
-  HalfRank, FloatRank, DoubleRank, LongDoubleRank
+  HalfRank, FloatRank, DoubleRank, LongDoubleRank, Float128Rank
 };
 
 RawComment *ASTContext::getRawCommentForDeclNoCache(const Decl *D) const {
@@ -633,9 +633,8 @@ ASTContext::getCanonicalTemplateTemplateParmDecl(
                                                 NTTP->getPosition(), nullptr,
                                                 T,
                                                 TInfo,
-                                                ExpandedTypes.data(),
-                                                ExpandedTypes.size(),
-                                                ExpandedTInfos.data());
+                                                ExpandedTypes,
+                                                ExpandedTInfos);
       } else {
         Param = NonTypeTemplateParmDecl::Create(*this, getTranslationUnitDecl(),
                                                 SourceLocation(),
@@ -734,16 +733,16 @@ ASTContext::ASTContext(LangOptions &LOpts, SourceManager &SM,
       DependentTemplateSpecializationTypes(this_()),
       SubstTemplateTemplateParmPacks(this_()),
       GlobalNestedNameSpecifier(nullptr), Int128Decl(nullptr),
-      UInt128Decl(nullptr), Float128StubDecl(nullptr),
-      BuiltinVaListDecl(nullptr), BuiltinMSVaListDecl(nullptr),
-      ObjCIdDecl(nullptr), ObjCSelDecl(nullptr), ObjCClassDecl(nullptr),
-      ObjCProtocolClassDecl(nullptr), BOOLDecl(nullptr),
-      CFConstantStringTypeDecl(nullptr), ObjCInstanceTypeDecl(nullptr),
-      FILEDecl(nullptr), jmp_bufDecl(nullptr), sigjmp_bufDecl(nullptr),
-      ucontext_tDecl(nullptr), BlockDescriptorType(nullptr),
-      BlockDescriptorExtendedType(nullptr), cudaConfigureCallDecl(nullptr),
-      FirstLocalImport(), LastLocalImport(), ExternCContext(nullptr),
-      MakeIntegerSeqDecl(nullptr), SourceMgr(SM), LangOpts(LOpts),
+      UInt128Decl(nullptr), BuiltinVaListDecl(nullptr),
+      BuiltinMSVaListDecl(nullptr), ObjCIdDecl(nullptr), ObjCSelDecl(nullptr),
+      ObjCClassDecl(nullptr), ObjCProtocolClassDecl(nullptr), BOOLDecl(nullptr),
+      CFConstantStringTagDecl(nullptr), CFConstantStringTypeDecl(nullptr),
+      ObjCInstanceTypeDecl(nullptr), FILEDecl(nullptr), jmp_bufDecl(nullptr),
+      sigjmp_bufDecl(nullptr), ucontext_tDecl(nullptr),
+      BlockDescriptorType(nullptr), BlockDescriptorExtendedType(nullptr),
+      cudaConfigureCallDecl(nullptr), FirstLocalImport(), LastLocalImport(),
+      ExternCContext(nullptr), MakeIntegerSeqDecl(nullptr),
+      TypePackElementDecl(nullptr), SourceMgr(SM), LangOpts(LOpts),
       SanitizerBL(new SanitizerBlacklist(LangOpts.SanitizerBlacklistFiles, SM)),
       AddrSpaceMap(nullptr), Target(nullptr), AuxTarget(nullptr),
       PrintingPolicy(LOpts), Idents(idents), Selectors(sels),
@@ -816,7 +815,7 @@ void ASTContext::AddDeallocation(void (*Callback)(void*), void *Data) {
 
 void
 ASTContext::setExternalSource(IntrusiveRefCntPtr<ExternalASTSource> Source) {
-  ExternalSource = Source;
+  ExternalSource = std::move(Source);
 }
 
 void ASTContext::PrintStats() const {
@@ -928,6 +927,14 @@ ASTContext::getMakeIntegerSeqDecl() const {
   return MakeIntegerSeqDecl;
 }
 
+BuiltinTemplateDecl *
+ASTContext::getTypePackElementDecl() const {
+  if (!TypePackElementDecl)
+    TypePackElementDecl = buildBuiltinTemplateDecl(BTK__type_pack_element,
+                                                   getTypePackElementName());
+  return TypePackElementDecl;
+}
+
 RecordDecl *ASTContext::buildImplicitRecord(StringRef Name,
                                             RecordDecl::TagKind TK) const {
   SourceLocation Loc;
@@ -966,14 +973,6 @@ TypedefDecl *ASTContext::getUInt128Decl() const {
   return UInt128Decl;
 }
 
-TypeDecl *ASTContext::getFloat128StubType() const {
-  assert(LangOpts.CPlusPlus && "should only be called for c++");
-  if (!Float128StubDecl)
-    Float128StubDecl = buildImplicitRecord("__float128");
-
-  return Float128StubDecl;
-}
-
 void ASTContext::InitBuiltinType(CanQualType &R, BuiltinType::Kind K) {
   BuiltinType *Ty = new (*this, TypeAlignment) BuiltinType(K);
   R = CanQualType::CreateUnsafe(QualType(Ty, 0));
@@ -1022,6 +1021,9 @@ void ASTContext::InitBuiltinTypes(const TargetInfo &Target,
   InitBuiltinType(DoubleTy,            BuiltinType::Double);
   InitBuiltinType(LongDoubleTy,        BuiltinType::LongDouble);
 
+  // GNU extension, __float128 for IEEE quadruple precision
+  InitBuiltinType(Float128Ty,          BuiltinType::Float128);
+
   // GNU extension, 128-bit integers.
   InitBuiltinType(Int128Ty,            BuiltinType::Int128);
   InitBuiltinType(UnsignedInt128Ty,    BuiltinType::UInt128);
@@ -1083,26 +1085,17 @@ void ASTContext::InitBuiltinTypes(const TargetInfo &Target,
   FloatComplexTy      = getComplexType(FloatTy);
   DoubleComplexTy     = getComplexType(DoubleTy);
   LongDoubleComplexTy = getComplexType(LongDoubleTy);
+  Float128ComplexTy   = getComplexType(Float128Ty);
 
   // Builtin types for 'id', 'Class', and 'SEL'.
   InitBuiltinType(ObjCBuiltinIdTy, BuiltinType::ObjCId);
   InitBuiltinType(ObjCBuiltinClassTy, BuiltinType::ObjCClass);
   InitBuiltinType(ObjCBuiltinSelTy, BuiltinType::ObjCSel);
 
-  if (LangOpts.OpenCL) { 
-    InitBuiltinType(OCLImage1dTy, BuiltinType::OCLImage1d);
-    InitBuiltinType(OCLImage1dArrayTy, BuiltinType::OCLImage1dArray);
-    InitBuiltinType(OCLImage1dBufferTy, BuiltinType::OCLImage1dBuffer);
-    InitBuiltinType(OCLImage2dTy, BuiltinType::OCLImage2d);
-    InitBuiltinType(OCLImage2dArrayTy, BuiltinType::OCLImage2dArray);
-    InitBuiltinType(OCLImage2dDepthTy, BuiltinType::OCLImage2dDepth);
-    InitBuiltinType(OCLImage2dArrayDepthTy, BuiltinType::OCLImage2dArrayDepth);
-    InitBuiltinType(OCLImage2dMSAATy, BuiltinType::OCLImage2dMSAA);
-    InitBuiltinType(OCLImage2dArrayMSAATy, BuiltinType::OCLImage2dArrayMSAA);
-    InitBuiltinType(OCLImage2dMSAADepthTy, BuiltinType::OCLImage2dMSAADepth);
-    InitBuiltinType(OCLImage2dArrayMSAADepthTy,
-                    BuiltinType::OCLImage2dArrayMSAADepth);
-    InitBuiltinType(OCLImage3dTy, BuiltinType::OCLImage3d);
+  if (LangOpts.OpenCL) {
+#define IMAGE_TYPE(ImgType, Id, SingletonId, Access, Suffix) \
+    InitBuiltinType(SingletonId, BuiltinType::Id);
+#include "clang/Basic/OpenCLImageTypes.def"
 
     InitBuiltinType(OCLSamplerTy, BuiltinType::OCLSampler);
     InitBuiltinType(OCLEventTy, BuiltinType::OCLEvent);
@@ -1268,34 +1261,37 @@ void ASTContext::setInstantiatedFromUnnamedFieldDecl(FieldDecl *Inst,
 
 ASTContext::overridden_cxx_method_iterator
 ASTContext::overridden_methods_begin(const CXXMethodDecl *Method) const {
-  llvm::DenseMap<const CXXMethodDecl *, CXXMethodVector>::const_iterator Pos
-    = OverriddenMethods.find(Method->getCanonicalDecl());
+  llvm::DenseMap<const CXXMethodDecl *, CXXMethodVector>::const_iterator Pos =
+      OverriddenMethods.find(Method->getCanonicalDecl());
   if (Pos == OverriddenMethods.end())
     return nullptr;
-
   return Pos->second.begin();
 }
 
 ASTContext::overridden_cxx_method_iterator
 ASTContext::overridden_methods_end(const CXXMethodDecl *Method) const {
-  llvm::DenseMap<const CXXMethodDecl *, CXXMethodVector>::const_iterator Pos
-    = OverriddenMethods.find(Method->getCanonicalDecl());
+  llvm::DenseMap<const CXXMethodDecl *, CXXMethodVector>::const_iterator Pos =
+      OverriddenMethods.find(Method->getCanonicalDecl());
   if (Pos == OverriddenMethods.end())
     return nullptr;
-
   return Pos->second.end();
 }
 
 unsigned
 ASTContext::overridden_methods_size(const CXXMethodDecl *Method) const {
-  llvm::DenseMap<const CXXMethodDecl *, CXXMethodVector>::const_iterator Pos
-    = OverriddenMethods.find(Method->getCanonicalDecl());
+  llvm::DenseMap<const CXXMethodDecl *, CXXMethodVector>::const_iterator Pos =
+      OverriddenMethods.find(Method->getCanonicalDecl());
   if (Pos == OverriddenMethods.end())
     return 0;
-
   return Pos->second.size();
 }
 
+ASTContext::overridden_method_range
+ASTContext::overridden_methods(const CXXMethodDecl *Method) const {
+  return overridden_method_range(overridden_methods_begin(Method),
+                                 overridden_methods_end(Method));
+}
+
 void ASTContext::addOverriddenMethod(const CXXMethodDecl *Method, 
                                      const CXXMethodDecl *Overridden) {
   assert(Method->isCanonicalDecl() && Overridden->isCanonicalDecl());
@@ -1350,6 +1346,7 @@ const llvm::fltSemantics &ASTContext::getFloatTypeSemantics(QualType T) const {
   case BuiltinType::Float:      return Target->getFloatFormat();
   case BuiltinType::Double:     return Target->getDoubleFormat();
   case BuiltinType::LongDouble: return Target->getLongDoubleFormat();
+  case BuiltinType::Float128:   return Target->getFloat128Format();
   }
 }
 
@@ -1480,7 +1477,7 @@ static getConstantArrayInfoInChars(const ASTContext &Context,
   unsigned Align = EltInfo.second.getQuantity();
   if (!Context.getTargetInfo().getCXXABI().isMicrosoft() ||
       Context.getTargetInfo().getPointerWidth(0) == 64)
-    Width = llvm::RoundUpToAlignment(Width, Align);
+    Width = llvm::alignTo(Width, Align);
   return std::make_pair(CharUnits::fromQuantity(Width),
                         CharUnits::fromQuantity(Align));
 }
@@ -1564,7 +1561,7 @@ TypeInfo ASTContext::getTypeInfoImpl(const Type *T) const {
     Align = EltInfo.Align;
     if (!getTargetInfo().getCXXABI().isMicrosoft() ||
         getTargetInfo().getPointerWidth(0) == 64)
-      Width = llvm::RoundUpToAlignment(Width, Align);
+      Width = llvm::alignTo(Width, Align);
     break;
   }
   case Type::ExtVector:
@@ -1577,7 +1574,7 @@ TypeInfo ASTContext::getTypeInfoImpl(const Type *T) const {
     // This happens for non-power-of-2 length vectors.
     if (Align & (Align-1)) {
       Align = llvm::NextPowerOf2(Align);
-      Width = llvm::RoundUpToAlignment(Width, Align);
+      Width = llvm::alignTo(Width, Align);
     }
     // Adjust the alignment based on the target max.
     uint64_t TargetVectorAlign = Target->getMaxVectorAlign();
@@ -1660,6 +1657,10 @@ TypeInfo ASTContext::getTypeInfoImpl(const Type *T) const {
       Width = Target->getLongDoubleWidth();
       Align = Target->getLongDoubleAlign();
       break;
+    case BuiltinType::Float128:
+      Width = Target->getFloat128Width();
+      Align = Target->getFloat128Align();
+      break;
     case BuiltinType::NullPtr:
       Width = Target->getPointerWidth(0); // C++ 3.9.1p11: sizeof(nullptr_t)
       Align = Target->getPointerAlign(0); //   == sizeof(void*)
@@ -1680,18 +1681,10 @@ TypeInfo ASTContext::getTypeInfoImpl(const Type *T) const {
     case BuiltinType::OCLQueue:
     case BuiltinType::OCLNDRange:
     case BuiltinType::OCLReserveID:
-    case BuiltinType::OCLImage1d:
-    case BuiltinType::OCLImage1dArray:
-    case BuiltinType::OCLImage1dBuffer:
-    case BuiltinType::OCLImage2d:
-    case BuiltinType::OCLImage2dArray:
-    case BuiltinType::OCLImage2dDepth:
-    case BuiltinType::OCLImage2dArrayDepth:
-    case BuiltinType::OCLImage2dMSAA:
-    case BuiltinType::OCLImage2dArrayMSAA:
-    case BuiltinType::OCLImage2dMSAADepth:
-    case BuiltinType::OCLImage2dArrayMSAADepth:
-    case BuiltinType::OCLImage3d:
+#define IMAGE_TYPE(ImgType, Id, SingletonId, Access, Suffix) \
+    case BuiltinType::Id:
+#include "clang/Basic/OpenCLImageTypes.def"
+
       // Currently these types are pointers to opaque types.
       Width = Target->getPointerWidth(0);
       Align = Target->getPointerAlign(0);
@@ -1903,8 +1896,8 @@ unsigned ASTContext::getPreferredTypeAlign(const Type *T) const {
   if (T->isMemberPointerType())
     return getPreferredTypeAlign(getPointerDiffType().getTypePtr());
 
-  if (Target->getTriple().getArch() == llvm::Triple::xcore)
-    return ABIAlign;  // Never overalign on XCore.
+  if (!Target->allowsLargerPreferedTypeAlignment())
+    return ABIAlign;
 
   // Double and long long should be naturally aligned if possible.
   if (const ComplexType *CT = T->getAs<ComplexType>())
@@ -2991,13 +2984,18 @@ ASTContext::getDependentSizedExtVectorType(QualType vecType,
   return QualType(New, 0);
 }
 
+/// \brief Determine whether \p T is canonical as the result type of a function.
+static bool isCanonicalResultType(QualType T) {
+  return T.isCanonical() &&
+         (T.getObjCLifetime() == Qualifiers::OCL_None ||
+          T.getObjCLifetime() == Qualifiers::OCL_ExplicitNone);
+}
+
 /// getFunctionNoProtoType - Return a K&R style C function type like 'int()'.
 ///
 QualType
 ASTContext::getFunctionNoProtoType(QualType ResultTy,
                                    const FunctionType::ExtInfo &Info) const {
-  const CallingConv CallConv = Info.getCC();
-
   // Unique functions, to guarantee there is only one function of a particular
   // structure.
   llvm::FoldingSetNodeID ID;
@@ -3009,8 +3007,9 @@ ASTContext::getFunctionNoProtoType(QualType ResultTy,
     return QualType(FT, 0);
 
   QualType Canonical;
-  if (!ResultTy.isCanonical()) {
-    Canonical = getFunctionNoProtoType(getCanonicalType(ResultTy), Info);
+  if (!isCanonicalResultType(ResultTy)) {
+    Canonical =
+      getFunctionNoProtoType(getCanonicalFunctionResultType(ResultTy), Info);
 
     // Get the new insert position for the node we care about.
     FunctionNoProtoType *NewIP =
@@ -3018,21 +3017,13 @@ ASTContext::getFunctionNoProtoType(QualType ResultTy,
     assert(!NewIP && "Shouldn't be in the map!"); (void)NewIP;
   }
 
-  FunctionProtoType::ExtInfo newInfo = Info.withCallingConv(CallConv);
   FunctionNoProtoType *New = new (*this, TypeAlignment)
-    FunctionNoProtoType(ResultTy, Canonical, newInfo);
+    FunctionNoProtoType(ResultTy, Canonical, Info);
   Types.push_back(New);
   FunctionNoProtoTypes.InsertNode(New, InsertPos);
   return QualType(New, 0);
 }
 
-/// \brief Determine whether \p T is canonical as the result type of a function.
-static bool isCanonicalResultType(QualType T) {
-  return T.isCanonical() &&
-         (T.getObjCLifetime() == Qualifiers::OCL_None ||
-          T.getObjCLifetime() == Qualifiers::OCL_ExplicitNone);
-}
-
 CanQualType
 ASTContext::getCanonicalFunctionResultType(QualType ResultType) const {
   CanQualType CanResultType = getCanonicalType(ResultType);
@@ -3099,12 +3090,13 @@ ASTContext::getFunctionType(QualType ResultTy, ArrayRef<QualType> ArgArray,
   // them for three variable size arrays at the end:
   //  - parameter types
   //  - exception types
-  //  - consumed-arguments flags
+  //  - extended parameter information
   // Instead of the exception types, there could be a noexcept
   // expression, or information used to resolve the exception
   // specification.
   size_t Size = sizeof(FunctionProtoType) +
                 NumArgs * sizeof(QualType);
+
   if (EPI.ExceptionSpec.Type == EST_Dynamic) {
     Size += EPI.ExceptionSpec.Exceptions.size() * sizeof(QualType);
   } else if (EPI.ExceptionSpec.Type == EST_ComputedNoexcept) {
@@ -3114,8 +3106,16 @@ ASTContext::getFunctionType(QualType ResultTy, ArrayRef<QualType> ArgArray,
   } else if (EPI.ExceptionSpec.Type == EST_Unevaluated) {
     Size += sizeof(FunctionDecl*);
   }
-  if (EPI.ConsumedParameters)
-    Size += NumArgs * sizeof(bool);
+
+  // Put the ExtParameterInfos last.  If all were equal, it would make
+  // more sense to put these before the exception specification, because
+  // it's much easier to skip past them compared to the elaborate switch
+  // required to skip the exception specification.  However, all is not
+  // equal; ExtParameterInfos are used to model very uncommon features,
+  // and it's better not to burden the more common paths.
+  if (EPI.ExtParameterInfos) {
+    Size += NumArgs * sizeof(FunctionProtoType::ExtParameterInfo);
+  }
 
   FunctionProtoType *FTP = (FunctionProtoType*) Allocate(Size, TypeAlignment);
   FunctionProtoType::ExtProtoInfo newEPI = EPI;
@@ -3393,23 +3393,19 @@ ASTContext::getTemplateSpecializationType(TemplateName Template,
                                           QualType Underlying) const {
   assert(!Template.getAsDependentTemplateName() && 
          "No dependent template names here!");
-  
-  unsigned NumArgs = Args.size();
 
   SmallVector<TemplateArgument, 4> ArgVec;
-  ArgVec.reserve(NumArgs);
-  for (unsigned i = 0; i != NumArgs; ++i)
-    ArgVec.push_back(Args[i].getArgument());
+  ArgVec.reserve(Args.size());
+  for (const TemplateArgumentLoc &Arg : Args.arguments())
+    ArgVec.push_back(Arg.getArgument());
 
-  return getTemplateSpecializationType(Template, ArgVec.data(), NumArgs,
-                                       Underlying);
+  return getTemplateSpecializationType(Template, ArgVec, Underlying);
 }
 
 #ifndef NDEBUG
-static bool hasAnyPackExpansions(const TemplateArgument *Args,
-                                 unsigned NumArgs) {
-  for (unsigned I = 0; I != NumArgs; ++I)
-    if (Args[I].isPackExpansion())
+static bool hasAnyPackExpansions(ArrayRef<TemplateArgument> Args) {
+  for (const TemplateArgument &Arg : Args)
+    if (Arg.isPackExpansion())
       return true;
   
   return true;
@@ -3418,8 +3414,7 @@ static bool hasAnyPackExpansions(const TemplateArgument *Args,
 
 QualType
 ASTContext::getTemplateSpecializationType(TemplateName Template,
-                                          const TemplateArgument *Args,
-                                          unsigned NumArgs,
+                                          ArrayRef<TemplateArgument> Args,
                                           QualType Underlying) const {
   assert(!Template.getAsDependentTemplateName() && 
          "No dependent template names here!");
@@ -3436,32 +3431,29 @@ ASTContext::getTemplateSpecializationType(TemplateName Template,
   else {
     // We can get here with an alias template when the specialization contains
     // a pack expansion that does not match up with a parameter pack.
-    assert((!IsTypeAlias || hasAnyPackExpansions(Args, NumArgs)) &&
+    assert((!IsTypeAlias || hasAnyPackExpansions(Args)) &&
            "Caller must compute aliased type");
     IsTypeAlias = false;
-    CanonType = getCanonicalTemplateSpecializationType(Template, Args,
-                                                       NumArgs);
+    CanonType = getCanonicalTemplateSpecializationType(Template, Args);
   }
 
   // Allocate the (non-canonical) template specialization type, but don't
   // try to unique it: these types typically have location information that
   // we don't unique and don't want to lose.
   void *Mem = Allocate(sizeof(TemplateSpecializationType) +
-                       sizeof(TemplateArgument) * NumArgs +
+                       sizeof(TemplateArgument) * Args.size() +
                        (IsTypeAlias? sizeof(QualType) : 0),
                        TypeAlignment);
   TemplateSpecializationType *Spec
-    = new (Mem) TemplateSpecializationType(Template, Args, NumArgs, CanonType,
+    = new (Mem) TemplateSpecializationType(Template, Args, CanonType,
                                          IsTypeAlias ? Underlying : QualType());
 
   Types.push_back(Spec);
   return QualType(Spec, 0);
 }
 
-QualType
-ASTContext::getCanonicalTemplateSpecializationType(TemplateName Template,
-                                                   const TemplateArgument *Args,
-                                                   unsigned NumArgs) const {
+QualType ASTContext::getCanonicalTemplateSpecializationType(
+    TemplateName Template, ArrayRef<TemplateArgument> Args) const {
   assert(!Template.getAsDependentTemplateName() && 
          "No dependent template names here!");
 
@@ -3472,15 +3464,16 @@ ASTContext::getCanonicalTemplateSpecializationType(TemplateName Template,
   // Build the canonical template specialization type.
   TemplateName CanonTemplate = getCanonicalTemplateName(Template);
   SmallVector<TemplateArgument, 4> CanonArgs;
+  unsigned NumArgs = Args.size();
   CanonArgs.reserve(NumArgs);
-  for (unsigned I = 0; I != NumArgs; ++I)
-    CanonArgs.push_back(getCanonicalTemplateArgument(Args[I]));
+  for (const TemplateArgument &Arg : Args)
+    CanonArgs.push_back(getCanonicalTemplateArgument(Arg));
 
   // Determine whether this canonical template specialization type already
   // exists.
   llvm::FoldingSetNodeID ID;
   TemplateSpecializationType::Profile(ID, CanonTemplate,
-                                      CanonArgs.data(), NumArgs, *this);
+                                      CanonArgs, *this);
 
   void *InsertPos = nullptr;
   TemplateSpecializationType *Spec
@@ -3492,7 +3485,7 @@ ASTContext::getCanonicalTemplateSpecializationType(TemplateName Template,
                           sizeof(TemplateArgument) * NumArgs),
                          TypeAlignment);
     Spec = new (Mem) TemplateSpecializationType(CanonTemplate,
-                                                CanonArgs.data(), NumArgs,
+                                                CanonArgs,
                                                 QualType(), QualType());
     Types.push_back(Spec);
     TemplateSpecializationTypes.InsertNode(Spec, InsertPos);
@@ -3592,9 +3585,7 @@ ASTContext::getDependentTemplateSpecializationType(
   SmallVector<TemplateArgument, 16> ArgCopy;
   for (unsigned I = 0, E = Args.size(); I != E; ++I)
     ArgCopy.push_back(Args[I].getArgument());
-  return getDependentTemplateSpecializationType(Keyword, NNS, Name,
-                                                ArgCopy.size(),
-                                                ArgCopy.data());
+  return getDependentTemplateSpecializationType(Keyword, NNS, Name, ArgCopy);
 }
 
 QualType
@@ -3602,14 +3593,13 @@ ASTContext::getDependentTemplateSpecializationType(
                                  ElaboratedTypeKeyword Keyword,
                                  NestedNameSpecifier *NNS,
                                  const IdentifierInfo *Name,
-                                 unsigned NumArgs,
-                                 const TemplateArgument *Args) const {
+                                 ArrayRef<TemplateArgument> Args) const {
   assert((!NNS || NNS->isDependent()) && 
          "nested-name-specifier must be dependent");
 
   llvm::FoldingSetNodeID ID;
   DependentTemplateSpecializationType::Profile(ID, *this, Keyword, NNS,
-                                               Name, NumArgs, Args);
+                                               Name, Args);
 
   void *InsertPos = nullptr;
   DependentTemplateSpecializationType *T
@@ -3623,6 +3613,7 @@ ASTContext::getDependentTemplateSpecializationType(
   if (Keyword == ETK_None) CanonKeyword = ETK_Typename;
 
   bool AnyNonCanonArgs = false;
+  unsigned NumArgs = Args.size();
   SmallVector<TemplateArgument, 16> CanonArgs(NumArgs);
   for (unsigned I = 0; I != NumArgs; ++I) {
     CanonArgs[I] = getCanonicalTemplateArgument(Args[I]);
@@ -3633,8 +3624,8 @@ ASTContext::getDependentTemplateSpecializationType(
   QualType Canon;
   if (AnyNonCanonArgs || CanonNNS != NNS || CanonKeyword != Keyword) {
     Canon = getDependentTemplateSpecializationType(CanonKeyword, CanonNNS,
-                                                   Name, NumArgs,
-                                                   CanonArgs.data());
+                                                   Name,
+                                                   CanonArgs);
 
     // Find the insert position again.
     DependentTemplateSpecializationTypes.FindNodeOrInsertPos(ID, InsertPos);
@@ -3644,7 +3635,7 @@ ASTContext::getDependentTemplateSpecializationType(
                         sizeof(TemplateArgument) * NumArgs),
                        TypeAlignment);
   T = new (Mem) DependentTemplateSpecializationType(Keyword, NNS,
-                                                    Name, NumArgs, Args, Canon);
+                                                    Name, Args, Canon);
   Types.push_back(T);
   DependentTemplateSpecializationTypes.InsertNode(T, InsertPos);
   return QualType(T, 0);
@@ -4012,13 +4003,35 @@ QualType ASTContext::getUnaryTransformType(QualType BaseType,
                                            QualType UnderlyingType,
                                            UnaryTransformType::UTTKind Kind)
     const {
-  UnaryTransformType *Ty =
-    new (*this, TypeAlignment) UnaryTransformType (BaseType, UnderlyingType, 
-                                                   Kind,
-                                 UnderlyingType->isDependentType() ?
-                                 QualType() : getCanonicalType(UnderlyingType));
-  Types.push_back(Ty);
-  return QualType(Ty, 0);
+  UnaryTransformType *ut = nullptr;
+
+  if (BaseType->isDependentType()) {
+    // Look in the folding set for an existing type.
+    llvm::FoldingSetNodeID ID;
+    DependentUnaryTransformType::Profile(ID, getCanonicalType(BaseType), Kind);
+
+    void *InsertPos = nullptr;
+    DependentUnaryTransformType *Canon
+      = DependentUnaryTransformTypes.FindNodeOrInsertPos(ID, InsertPos);
+
+    if (!Canon) {
+      // Build a new, canonical __underlying_type(type) type.
+      Canon = new (*this, TypeAlignment)
+             DependentUnaryTransformType(*this, getCanonicalType(BaseType),
+                                         Kind);
+      DependentUnaryTransformTypes.InsertNode(Canon, InsertPos);
+    }
+    ut = new (*this, TypeAlignment) UnaryTransformType (BaseType,
+                                                        QualType(), Kind,
+                                                        QualType(Canon, 0));
+  } else {
+    QualType CanonType = getCanonicalType(UnderlyingType);
+    ut = new (*this, TypeAlignment) UnaryTransformType (BaseType,
+                                                        UnderlyingType, Kind,
+                                                        CanonType);
+  }
+  Types.push_back(ut);
+  return QualType(ut, 0);
 }
 
 /// getAutoType - Return the uniqued reference to the 'auto' type which has been
@@ -4623,6 +4636,7 @@ static FloatingRank getFloatingRank(QualType T) {
   case BuiltinType::Float:      return FloatRank;
   case BuiltinType::Double:     return DoubleRank;
   case BuiltinType::LongDouble: return LongDoubleRank;
+  case BuiltinType::Float128:   return Float128Rank;
   }
 }
 
@@ -4639,6 +4653,7 @@ QualType ASTContext::getFloatingTypeOfSizeWithinDomain(QualType Size,
     case FloatRank:      return FloatComplexTy;
     case DoubleRank:     return DoubleComplexTy;
     case LongDoubleRank: return LongDoubleComplexTy;
+    case Float128Rank:   return Float128ComplexTy;
     }
   }
 
@@ -4648,6 +4663,7 @@ QualType ASTContext::getFloatingTypeOfSizeWithinDomain(QualType Size,
   case FloatRank:      return FloatTy;
   case DoubleRank:     return DoubleTy;
   case LongDoubleRank: return LongDoubleTy;
+  case Float128Rank:   return Float128Ty;
   }
   llvm_unreachable("getFloatingRank(): illegal value for rank");
 }
@@ -4868,40 +4884,63 @@ int ASTContext::getIntegerTypeOrder(QualType LHS, QualType RHS) const {
   return 1;
 }
 
-// getCFConstantStringType - Return the type used for constant CFStrings.
-QualType ASTContext::getCFConstantStringType() const {
+TypedefDecl *ASTContext::getCFConstantStringDecl() const {
   if (!CFConstantStringTypeDecl) {
-    CFConstantStringTypeDecl = buildImplicitRecord("NSConstantString");
-    CFConstantStringTypeDecl->startDefinition();
+    assert(!CFConstantStringTagDecl &&
+           "tag and typedef should be initialized together");
+    CFConstantStringTagDecl = buildImplicitRecord("__NSConstantString_tag");
+    CFConstantStringTagDecl->startDefinition();
 
     QualType FieldTypes[4];
+    const char *FieldNames[4];
 
     // const int *isa;
     FieldTypes[0] = getPointerType(IntTy.withConst());
+    FieldNames[0] = "isa";
     // int flags;
     FieldTypes[1] = IntTy;
+    FieldNames[1] = "flags";
     // const char *str;
     FieldTypes[2] = getPointerType(CharTy.withConst());
+    FieldNames[2] = "str";
     // long length;
     FieldTypes[3] = LongTy;
+    FieldNames[3] = "length";
 
     // Create fields
     for (unsigned i = 0; i < 4; ++i) {
-      FieldDecl *Field = FieldDecl::Create(*this, CFConstantStringTypeDecl,
+      FieldDecl *Field = FieldDecl::Create(*this, CFConstantStringTagDecl,
+                                           SourceLocation(),
                                            SourceLocation(),
-                                           SourceLocation(), nullptr,
+                                           &Idents.get(FieldNames[i]),
                                            FieldTypes[i], /*TInfo=*/nullptr,
                                            /*BitWidth=*/nullptr,
                                            /*Mutable=*/false,
                                            ICIS_NoInit);
       Field->setAccess(AS_public);
-      CFConstantStringTypeDecl->addDecl(Field);
+      CFConstantStringTagDecl->addDecl(Field);
     }
 
-    CFConstantStringTypeDecl->completeDefinition();
+    CFConstantStringTagDecl->completeDefinition();
+    // This type is designed to be compatible with NSConstantString, but cannot
+    // use the same name, since NSConstantString is an interface.
+    auto tagType = getTagDeclType(CFConstantStringTagDecl);
+    CFConstantStringTypeDecl =
+        buildImplicitTypedef(tagType, "__NSConstantString");
   }
 
-  return getTagDeclType(CFConstantStringTypeDecl);
+  return CFConstantStringTypeDecl;
+}
+
+RecordDecl *ASTContext::getCFConstantStringTagDecl() const {
+  if (!CFConstantStringTagDecl)
+    getCFConstantStringDecl(); // Build the tag and the typedef.
+  return CFConstantStringTagDecl;
+}
+
+// getCFConstantStringType - Return the type used for constant CFStrings.
+QualType ASTContext::getCFConstantStringType() const {
+  return getTypedefType(getCFConstantStringDecl());
 }
 
 QualType ASTContext::getObjCSuperType() const {
@@ -4914,9 +4953,13 @@ QualType ASTContext::getObjCSuperType() const {
 }
 
 void ASTContext::setCFConstantStringType(QualType T) {
-  const RecordType *Rec = T->getAs<RecordType>();
-  assert(Rec && "Invalid CFConstantStringType");
-  CFConstantStringTypeDecl = Rec->getDecl();
+  const TypedefType *TD = T->getAs<TypedefType>();
+  assert(TD && "Invalid CFConstantStringType");
+  CFConstantStringTypeDecl = cast<TypedefDecl>(TD->getDecl());
+  auto TagType =
+      CFConstantStringTypeDecl->getUnderlyingType()->getAs<RecordType>();
+  assert(TagType && "Invalid CFConstantStringType");
+  CFConstantStringTagDecl = TagType->getDecl();
 }
 
 QualType ASTContext::getBlockDescriptorType() const {
@@ -5096,6 +5139,27 @@ bool ASTContext::isMSStaticDataMemberInlineDefinition(const VarDecl *VD) const {
          !VD->getFirstDecl()->isOutOfLine() && VD->getFirstDecl()->hasInit();
 }
 
+ASTContext::InlineVariableDefinitionKind
+ASTContext::getInlineVariableDefinitionKind(const VarDecl *VD) const {
+  if (!VD->isInline())
+    return InlineVariableDefinitionKind::None;
+
+  // In almost all cases, it's a weak definition.
+  auto *First = VD->getFirstDecl();
+  if (!First->isConstexpr() || First->isInlineSpecified() ||
+      !VD->isStaticDataMember())
+    return InlineVariableDefinitionKind::Weak;
+
+  // If there's a file-context declaration in this translation unit, it's a
+  // non-discardable definition.
+  for (auto *D : VD->redecls())
+    if (D->getLexicalDeclContext()->isFileContext())
+      return InlineVariableDefinitionKind::Strong;
+
+  // If we've not seen one yet, we don't know.
+  return InlineVariableDefinitionKind::WeakUnknown;
+}
+
 static inline 
 std::string charUnitsToString(const CharUnits &CU) {
   return llvm::itostr(CU.getQuantity());
@@ -5122,7 +5186,7 @@ std::string ASTContext::getObjCEncodingForBlock(const BlockExpr *Expr) const {
   SourceLocation Loc;
   CharUnits PtrSize = getTypeSizeInChars(VoidPtrTy);
   CharUnits ParmOffset = PtrSize;
-  for (auto PI : Decl->params()) {
+  for (auto PI : Decl->parameters()) {
     QualType PType = PI->getType();
     CharUnits sz = getObjCEncodingTypeSize(PType);
     if (sz.isZero())
@@ -5137,7 +5201,7 @@ std::string ASTContext::getObjCEncodingForBlock(const BlockExpr *Expr) const {
   
   // Argument types.
   ParmOffset = PtrSize;
-  for (auto PVDecl : Decl->params()) {
+  for (auto PVDecl : Decl->parameters()) {
     QualType PType = PVDecl->getOriginalType(); 
     if (const ArrayType *AT =
           dyn_cast<ArrayType>(PType->getCanonicalTypeInternal())) {
@@ -5165,7 +5229,7 @@ bool ASTContext::getObjCEncodingForFunctionDecl(const FunctionDecl *Decl,
   getObjCEncodingForType(Decl->getReturnType(), S);
   CharUnits ParmOffset;
   // Compute size of all parameters.
-  for (auto PI : Decl->params()) {
+  for (auto PI : Decl->parameters()) {
     QualType PType = PI->getType();
     CharUnits sz = getObjCEncodingTypeSize(PType);
     if (sz.isZero())
@@ -5179,7 +5243,7 @@ bool ASTContext::getObjCEncodingForFunctionDecl(const FunctionDecl *Decl,
   ParmOffset = CharUnits::Zero();
 
   // Argument types.
-  for (auto PVDecl : Decl->params()) {
+  for (auto PVDecl : Decl->parameters()) {
     QualType PType = PVDecl->getOriginalType();
     if (const ArrayType *AT =
           dyn_cast<ArrayType>(PType->getCanonicalTypeInternal())) {
@@ -5450,6 +5514,7 @@ static char getObjCEncodingForPrimitiveKind(const ASTContext *C,
     case BuiltinType::LongDouble: return 'D';
     case BuiltinType::NullPtr:    return '*'; // like char*
 
+    case BuiltinType::Float128:
     case BuiltinType::Half:
       // FIXME: potentially need @encodes for these!
       return ' ';
@@ -5460,18 +5525,9 @@ static char getObjCEncodingForPrimitiveKind(const ASTContext *C,
       llvm_unreachable("@encoding ObjC primitive type");
 
     // OpenCL and placeholder types don't need @encodings.
-    case BuiltinType::OCLImage1d:
-    case BuiltinType::OCLImage1dArray:
-    case BuiltinType::OCLImage1dBuffer:
-    case BuiltinType::OCLImage2d:
-    case BuiltinType::OCLImage2dArray:
-    case BuiltinType::OCLImage2dDepth:
-    case BuiltinType::OCLImage2dArrayDepth:
-    case BuiltinType::OCLImage2dMSAA:
-    case BuiltinType::OCLImage2dArrayMSAA:
-    case BuiltinType::OCLImage2dMSAADepth:
-    case BuiltinType::OCLImage2dArrayMSAADepth:
-    case BuiltinType::OCLImage3d:
+#define IMAGE_TYPE(ImgType, Id, SingletonId, Access, Suffix) \
+    case BuiltinType::Id:
+#include "clang/Basic/OpenCLImageTypes.def"
     case BuiltinType::OCLEvent:
     case BuiltinType::OCLClkEvent:
     case BuiltinType::OCLQueue:
@@ -5691,8 +5747,7 @@ void ASTContext::getObjCEncodingForTypeImpl(QualType T, std::string& S,
         const TemplateArgumentList &TemplateArgs = Spec->getTemplateArgs();
         llvm::raw_string_ostream OS(S);
         TemplateSpecializationType::PrintTemplateArgumentList(OS,
-                                            TemplateArgs.data(),
-                                            TemplateArgs.size(),
+                                            TemplateArgs.asArray(),
                                             (*this).getPrintingPolicy());
       }
     } else {
@@ -5913,7 +5968,7 @@ void ASTContext::getObjCEncodingForStructureImpl(RecordDecl *RDecl,
                                                  QualType *NotEncodedT) const {
   assert(RDecl && "Expected non-null RecordDecl");
   assert(!RDecl->isUnion() && "Should not be called for unions");
-  if (!RDecl->getDefinition())
+  if (!RDecl->getDefinition() || RDecl->getDefinition()->isInvalidDecl())
     return;
 
   CXXRecordDecl *CXXRec = dyn_cast<CXXRecordDecl>(RDecl);
@@ -6354,6 +6409,7 @@ CreateAAPCSABIBuiltinVaListDecl(const ASTContext *Context) {
 
   // };
   VaListDecl->completeDefinition();
+  Context->VaListTagDecl = VaListDecl;
 
   // typedef struct __va_list __builtin_va_list;
   QualType T = Context->getRecordType(VaListDecl);
@@ -7132,6 +7188,11 @@ QualType ASTContext::areCommonBaseCompatible(
   if (!LDecl || !RDecl)
     return QualType();
 
+  // When either LHS or RHS is a kindof type, we should return a kindof type.
+  // For example, for common base of kindof(ASub1) and kindof(ASub2), we return
+  // kindof(A).
+  bool anyKindOf = LHS->isKindOfType() || RHS->isKindOfType();
+
   // Follow the left-hand side up the class hierarchy until we either hit a
   // root or find the RHS. Record the ancestors in case we don't find it.
   llvm::SmallDenseMap<const ObjCInterfaceDecl *, const ObjCObjectType *, 4>
@@ -7166,10 +7227,12 @@ QualType ASTContext::areCommonBaseCompatible(
         anyChanges = true;
 
       // If anything in the LHS will have changed, build a new result type.
-      if (anyChanges) {
+      // If we need to return a kindof type but LHS is not a kindof type, we
+      // build a new result type.
+      if (anyChanges || LHS->isKindOfType() != anyKindOf) {
         QualType Result = getObjCInterfaceType(LHS->getInterface());
         Result = getObjCObjectType(Result, LHSTypeArgs, Protocols,
-                                   LHS->isKindOfType());
+                                   anyKindOf || LHS->isKindOfType());
         return getObjCObjectPointerType(Result);
       }
 
@@ -7214,10 +7277,12 @@ QualType ASTContext::areCommonBaseCompatible(
       if (!Protocols.empty())
         anyChanges = true;
 
-      if (anyChanges) {
+      // If we need to return a kindof type but RHS is not a kindof type, we
+      // build a new result type.
+      if (anyChanges || RHS->isKindOfType() != anyKindOf) {
         QualType Result = getObjCInterfaceType(RHS->getInterface());
         Result = getObjCObjectType(Result, RHSTypeArgs, Protocols,
-                                   RHS->isKindOfType());
+                                   anyKindOf || RHS->isKindOfType());
         return getObjCObjectPointerType(Result);
       }
 
@@ -7461,8 +7526,7 @@ QualType ASTContext::mergeFunctionTypes(QualType lhs, QualType rhs,
     if (lproto->getTypeQuals() != rproto->getTypeQuals())
       return QualType();
 
-    if (LangOpts.ObjCAutoRefCount &&
-        !FunctionTypesMatchOnNSConsumedAttrs(rproto, lproto))
+    if (!doFunctionTypesMatchOnExtParameterInfos(rproto, lproto))
       return QualType();
 
     // Check parameter type compatibility
@@ -7587,6 +7651,15 @@ QualType ASTContext::mergeTypes(QualType LHS, QualType RHS,
   Qualifiers LQuals = LHSCan.getLocalQualifiers();
   Qualifiers RQuals = RHSCan.getLocalQualifiers();
   if (LQuals != RQuals) {
+    if (getLangOpts().OpenCL) {
+      if (LHSCan.getUnqualifiedType() != RHSCan.getUnqualifiedType() ||
+          LQuals.getCVRQualifiers() != RQuals.getCVRQualifiers())
+        return QualType();
+      if (LQuals.isAddressSpaceSupersetOf(RQuals))
+        return LHS;
+      if (RQuals.isAddressSpaceSupersetOf(LQuals))
+        return RHS;
+    }
     // If any of these qualifiers are different, we have a type
     // mismatch.
     if (LQuals.getCVRQualifiers() != RQuals.getCVRQualifiers() ||
@@ -7850,21 +7923,26 @@ QualType ASTContext::mergeTypes(QualType LHS, QualType RHS,
   llvm_unreachable("Invalid Type::Class!");
 }
 
-bool ASTContext::FunctionTypesMatchOnNSConsumedAttrs(
-                   const FunctionProtoType *FromFunctionType,
-                   const FunctionProtoType *ToFunctionType) {
-  if (FromFunctionType->hasAnyConsumedParams() !=
-      ToFunctionType->hasAnyConsumedParams())
+bool ASTContext::doFunctionTypesMatchOnExtParameterInfos(
+                   const FunctionProtoType *firstFnType,
+                   const FunctionProtoType *secondFnType) {
+  // Fast path: if the first type doesn't have ext parameter infos,
+  // we match if and only if they second type also doesn't have them.
+  if (!firstFnType->hasExtParameterInfos())
+    return !secondFnType->hasExtParameterInfos();
+
+  // Otherwise, we can only match if the second type has them.
+  if (!secondFnType->hasExtParameterInfos())
     return false;
-  FunctionProtoType::ExtProtoInfo FromEPI = 
-    FromFunctionType->getExtProtoInfo();
-  FunctionProtoType::ExtProtoInfo ToEPI = 
-    ToFunctionType->getExtProtoInfo();
-  if (FromEPI.ConsumedParameters && ToEPI.ConsumedParameters)
-    for (unsigned i = 0, n = FromFunctionType->getNumParams(); i != n; ++i) {
-      if (FromEPI.ConsumedParameters[i] != ToEPI.ConsumedParameters[i])
-        return false;
-    }
+
+  auto firstEPI = firstFnType->getExtParameterInfos();
+  auto secondEPI = secondFnType->getExtParameterInfos();
+  assert(firstEPI.size() == secondEPI.size());
+
+  for (size_t i = 0, n = firstEPI.size(); i != n; ++i) {
+    if (firstEPI[i] != secondEPI[i])
+      return false;
+  }
   return true;
 }
 
@@ -8374,22 +8452,29 @@ static GVALinkage basicGVALinkageForFunction(const ASTContext &Context,
   return GVA_DiscardableODR;
 }
 
-static GVALinkage adjustGVALinkageForAttributes(GVALinkage L, const Decl *D) {
+static GVALinkage adjustGVALinkageForAttributes(const ASTContext &Context,
+                                                GVALinkage L, const Decl *D) {
   // See http://msdn.microsoft.com/en-us/library/xa0d9ste.aspx
   // dllexport/dllimport on inline functions.
   if (D->hasAttr<DLLImportAttr>()) {
     if (L == GVA_DiscardableODR || L == GVA_StrongODR)
       return GVA_AvailableExternally;
-  } else if (D->hasAttr<DLLExportAttr>() || D->hasAttr<CUDAGlobalAttr>()) {
+  } else if (D->hasAttr<DLLExportAttr>()) {
     if (L == GVA_DiscardableODR)
       return GVA_StrongODR;
+  } else if (Context.getLangOpts().CUDA && Context.getLangOpts().CUDAIsDevice &&
+             D->hasAttr<CUDAGlobalAttr>()) {
+    // Device-side functions with __global__ attribute must always be
+    // visible externally so they can be launched from host.
+    if (L == GVA_DiscardableODR || L == GVA_Internal)
+      return GVA_StrongODR;
   }
   return L;
 }
 
 GVALinkage ASTContext::GetGVALinkageForFunction(const FunctionDecl *FD) const {
-  return adjustGVALinkageForAttributes(basicGVALinkageForFunction(*this, FD),
-                                       FD);
+  return adjustGVALinkageForAttributes(
+      *this, basicGVALinkageForFunction(*this, FD), FD);
 }
 
 static GVALinkage basicGVALinkageForVariable(const ASTContext &Context,
@@ -8422,15 +8507,31 @@ static GVALinkage basicGVALinkageForVariable(const ASTContext &Context,
   if (Context.isMSStaticDataMemberInlineDefinition(VD))
     return GVA_DiscardableODR;
 
+  // Most non-template variables have strong linkage; inline variables are
+  // linkonce_odr or (occasionally, for compatibility) weak_odr.
+  GVALinkage StrongLinkage;
+  switch (Context.getInlineVariableDefinitionKind(VD)) {
+  case ASTContext::InlineVariableDefinitionKind::None:
+    StrongLinkage = GVA_StrongExternal;
+    break;
+  case ASTContext::InlineVariableDefinitionKind::Weak:
+  case ASTContext::InlineVariableDefinitionKind::WeakUnknown:
+    StrongLinkage = GVA_DiscardableODR;
+    break;
+  case ASTContext::InlineVariableDefinitionKind::Strong:
+    StrongLinkage = GVA_StrongODR;
+    break;
+  }
+
   switch (VD->getTemplateSpecializationKind()) {
   case TSK_Undeclared:
-    return GVA_StrongExternal;
+    return StrongLinkage;
 
   case TSK_ExplicitSpecialization:
     return Context.getTargetInfo().getCXXABI().isMicrosoft() &&
                    VD->isStaticDataMember()
                ? GVA_StrongODR
-               : GVA_StrongExternal;
+               : StrongLinkage;
 
   case TSK_ExplicitInstantiationDefinition:
     return GVA_StrongODR;
@@ -8446,8 +8547,8 @@ static GVALinkage basicGVALinkageForVariable(const ASTContext &Context,
 }
 
 GVALinkage ASTContext::GetGVALinkageForVariable(const VarDecl *VD) {
-  return adjustGVALinkageForAttributes(basicGVALinkageForVariable(*this, VD),
-                                       VD);
+  return adjustGVALinkageForAttributes(
+      *this, basicGVALinkageForVariable(*this, VD), VD);
 }
 
 bool ASTContext::DeclMustBeEmitted(const Decl *D) {
@@ -8464,8 +8565,17 @@ bool ASTContext::DeclMustBeEmitted(const Decl *D) {
     // We never need to emit an uninstantiated function template.
     if (FD->getTemplatedKind() == FunctionDecl::TK_FunctionTemplate)
       return false;
-  } else if (isa<OMPThreadPrivateDecl>(D))
+  } else if (isa<PragmaCommentDecl>(D))
+    return true;
+  else if (isa<OMPThreadPrivateDecl>(D) ||
+           D->hasAttr<OMPDeclareTargetDeclAttr>())
     return true;
+  else if (isa<PragmaDetectMismatchDecl>(D))
+    return true;
+  else if (isa<OMPThreadPrivateDecl>(D))
+    return !D->getDeclContext()->isDependentContext();
+  else if (isa<OMPDeclareReductionDecl>(D))
+    return !D->getDeclContext()->isDependentContext();
   else
     return false;
 
@@ -8545,8 +8655,25 @@ CallingConv ASTContext::getDefaultCallingConvention(bool IsVariadic,
   if (IsCXXMethod)
     return ABI->getDefaultMethodCallConv(IsVariadic);
 
-  if (LangOpts.MRTD && !IsVariadic) return CC_X86StdCall;
-
+  switch (LangOpts.getDefaultCallingConv()) {
+  case LangOptions::DCC_None:
+    break;
+  case LangOptions::DCC_CDecl:
+    return CC_C;
+  case LangOptions::DCC_FastCall:
+    if (getTargetInfo().hasFeature("sse2"))
+      return CC_X86FastCall;
+    break;
+  case LangOptions::DCC_StdCall:
+    if (!IsVariadic)
+      return CC_X86StdCall;
+    break;
+  case LangOptions::DCC_VectorCall:
+    // __vectorcall cannot be applied to variadic functions.
+    if (!IsVariadic)
+      return CC_X86VectorCall;
+    break;
+  }
   return Target->getDefaultCallingConv(TargetInfo::CCMT_Unknown);
 }
 
@@ -8626,6 +8753,8 @@ QualType ASTContext::getRealTypeForBitwidth(unsigned DestWidth) const {
     return DoubleTy;
   case TargetInfo::LongDouble:
     return LongDoubleTy;
+  case TargetInfo::Float128:
+    return Float128Ty;
   case TargetInfo::NoFloat:
     return QualType();
   }
@@ -8639,8 +8768,7 @@ void ASTContext::setManglingNumber(const NamedDecl *ND, unsigned Number) {
 }
 
 unsigned ASTContext::getManglingNumber(const NamedDecl *ND) const {
-  llvm::DenseMap<const NamedDecl *, unsigned>::const_iterator I =
-    MangleNumbers.find(ND);
+  auto I = MangleNumbers.find(ND);
   return I != MangleNumbers.end() ? I->second : 1;
 }
 
@@ -8650,8 +8778,7 @@ void ASTContext::setStaticLocalNumber(const VarDecl *VD, unsigned Number) {
 }
 
 unsigned ASTContext::getStaticLocalNumber(const VarDecl *VD) const {
-  llvm::DenseMap<const VarDecl *, unsigned>::const_iterator I =
-      StaticLocalNumbers.find(VD);
+  auto I = StaticLocalNumbers.find(VD);
   return I != StaticLocalNumbers.end() ? I->second : 1;
 }
 
diff --git a/contrib/llvm/tools/clang/lib/AST/ASTDiagnostic.cpp b/contrib/llvm/tools/clang/lib/AST/ASTDiagnostic.cpp
index 2ab5a32917ae..0f5a8b5ae892 100644
--- a/contrib/llvm/tools/clang/lib/AST/ASTDiagnostic.cpp
+++ b/contrib/llvm/tools/clang/lib/AST/ASTDiagnostic.cpp
@@ -10,6 +10,7 @@
 // This file implements a diagnostic formatting hook for AST elements.
 //
 //===----------------------------------------------------------------------===//
+
 #include "clang/AST/ASTDiagnostic.h"
 #include "clang/AST/ASTContext.h"
 #include "clang/AST/ASTLambda.h"
@@ -118,7 +119,7 @@ static QualType Desugar(ASTContext &Context, QualType QT, bool &ShouldAKA) {
         if (DesugarArgument) {
           ShouldAKA = true;
           QT = Context.getTemplateSpecializationType(
-              TST->getTemplateName(), Args.data(), Args.size(), QT);
+              TST->getTemplateName(), Args, QT);
         }
         break;
       }
@@ -443,7 +444,6 @@ void clang::FormatASTNodeDiagnosticArgument(
       NeedQuotes = false;
       break;
     }
-
   }
 
   if (NeedQuotes) {
@@ -497,7 +497,7 @@ class TemplateDiff {
     enum DiffKind {
       /// Incomplete or invalid node.
       Invalid,
-      /// Another level of templates, requires that
+      /// Another level of templates
       Template,
       /// Type difference, all type differences except those falling under
       /// the Template difference.
@@ -616,7 +616,7 @@ class TemplateDiff {
       SetDefault(FromDefault, ToDefault);
     }
 
-    void SetIntegerDiff(llvm::APSInt FromInt, llvm::APSInt ToInt,
+    void SetIntegerDiff(const llvm::APSInt &FromInt, const llvm::APSInt &ToInt,
                         bool IsValidFromInt, bool IsValidToInt,
                         QualType FromIntType, QualType ToIntType,
                         Expr *FromExpr, Expr *ToExpr, bool FromDefault,
@@ -653,7 +653,7 @@ class TemplateDiff {
 
     void SetFromDeclarationAndToIntegerDiff(
         ValueDecl *FromValueDecl, bool FromAddressOf, bool FromNullPtr,
-        Expr *FromExpr, llvm::APSInt ToInt, bool IsValidToInt,
+        Expr *FromExpr, const llvm::APSInt &ToInt, bool IsValidToInt,
         QualType ToIntType, Expr *ToExpr, bool FromDefault, bool ToDefault) {
       assert(FlatTree[CurrentNode].Kind == Invalid && "Node is not empty.");
       FlatTree[CurrentNode].Kind = FromDeclarationAndToInteger;
@@ -669,7 +669,7 @@ class TemplateDiff {
     }
 
     void SetFromIntegerAndToDeclarationDiff(
-        llvm::APSInt FromInt, bool IsValidFromInt, QualType FromIntType,
+        const llvm::APSInt &FromInt, bool IsValidFromInt, QualType FromIntType,
         Expr *FromExpr, ValueDecl *ToValueDecl, bool ToAddressOf,
         bool ToNullPtr, Expr *ToExpr, bool FromDefault, bool ToDefault) {
       assert(FlatTree[CurrentNode].Kind == Invalid && "Node is not empty.");
@@ -917,6 +917,8 @@ class TemplateDiff {
       /// template argument.
       InternalIterator(const TemplateSpecializationType *TST)
           : TST(TST), Index(0), CurrentTA(nullptr), EndTA(nullptr) {
+        if (!TST) return;
+
         if (isEnd()) return;
 
         // Set to first template argument.  If not a parameter pack, done.
@@ -937,11 +939,13 @@ class TemplateDiff {
 
       /// isEnd - Returns true if the iterator is one past the end.
       bool isEnd() const {
+        assert(TST && "InternalIterator is invalid with a null TST.");
         return Index >= TST->getNumArgs();
       }
 
       /// &operator++ - Increment the iterator to the next template argument.
       InternalIterator &operator++() {
+        assert(TST && "InternalIterator is invalid with a null TST.");
         if (isEnd()) {
           return *this;
         }
@@ -977,6 +981,7 @@ class TemplateDiff {
 
       /// operator* - Returns the appropriate TemplateArgument.
       reference operator*() const {
+        assert(TST && "InternalIterator is invalid with a null TST.");
         assert(!isEnd() && "Index exceeds number of arguments.");
         if (CurrentTA == EndTA)
           return TST->getArg(Index);
@@ -986,23 +991,27 @@ class TemplateDiff {
 
       /// operator-> - Allow access to the underlying TemplateArgument.
       pointer operator->() const {
+        assert(TST && "InternalIterator is invalid with a null TST.");
         return &operator*();
       }
     };
 
+    bool UseDesugaredIterator;
     InternalIterator SugaredIterator;
     InternalIterator DesugaredIterator;
 
   public:
     TSTiterator(ASTContext &Context, const TemplateSpecializationType *TST)
-        : SugaredIterator(TST),
+        : UseDesugaredIterator(TST->isSugared() && !TST->isTypeAlias()),
+          SugaredIterator(TST),
           DesugaredIterator(
               GetTemplateSpecializationType(Context, TST->desugar())) {}
 
     /// &operator++ - Increment the iterator to the next template argument.
     TSTiterator &operator++() {
       ++SugaredIterator;
-      ++DesugaredIterator;
+      if (UseDesugaredIterator)
+        ++DesugaredIterator;
       return *this;
     }
 
@@ -1024,11 +1033,13 @@ class TemplateDiff {
     /// hasDesugaredTA - Returns true if there is another TemplateArgument
     /// available.
     bool hasDesugaredTA() const {
-      return !DesugaredIterator.isEnd();
+      return UseDesugaredIterator && !DesugaredIterator.isEnd();
     }
 
     /// getDesugaredTA - Returns the desugared TemplateArgument.
     reference getDesugaredTA() const {
+      assert(UseDesugaredIterator &&
+             "Desugared TemplateArgument should not be used.");
       return *DesugaredIterator;
     }
   };
@@ -1055,8 +1066,7 @@ class TemplateDiff {
 
     Ty = Context.getTemplateSpecializationType(
              TemplateName(CTSD->getSpecializedTemplate()),
-             CTSD->getTemplateArgs().data(),
-             CTSD->getTemplateArgs().size(),
+             CTSD->getTemplateArgs().asArray(),
              Ty.getLocalUnqualifiedType().getCanonicalType());
 
     return Ty->getAs<TemplateSpecializationType>();
@@ -1523,12 +1533,14 @@ class TemplateDiff {
         OS << FromTD->getNameAsString() << '<';
         Tree.MoveToChild();
         unsigned NumElideArgs = 0;
+        bool AllArgsElided = true;
         do {
           if (ElideType) {
             if (Tree.NodeIsSame()) {
               ++NumElideArgs;
               continue;
             }
+            AllArgsElided = false;
             if (NumElideArgs > 0) {
               PrintElideArgs(NumElideArgs, Indent);
               NumElideArgs = 0;
@@ -1539,8 +1551,12 @@ class TemplateDiff {
           if (Tree.HasNextSibling())
             OS << ", ";
         } while (Tree.AdvanceSibling());
-        if (NumElideArgs > 0)
-          PrintElideArgs(NumElideArgs, Indent);
+        if (NumElideArgs > 0) {
+          if (AllArgsElided)
+            OS << "...";
+          else
+            PrintElideArgs(NumElideArgs, Indent);
+        }
 
         Tree.Parent();
         OS << ">";
@@ -1622,7 +1638,6 @@ class TemplateDiff {
       Unbold();
       OS << "]";
     }
-    return;
   }
 
   /// PrintExpr - Prints out the expr template arguments, highlighting argument
@@ -1695,7 +1710,7 @@ class TemplateDiff {
 
   /// PrintAPSInt - Handles printing of integral arguments, highlighting
   /// argument differences.
-  void PrintAPSInt(llvm::APSInt FromInt, llvm::APSInt ToInt,
+  void PrintAPSInt(const llvm::APSInt &FromInt, const llvm::APSInt &ToInt,
                    bool IsValidFromInt, bool IsValidToInt, QualType FromIntType,
                    QualType ToIntType, Expr *FromExpr, Expr *ToExpr,
                    bool FromDefault, bool ToDefault, bool Same) {
@@ -1728,8 +1743,8 @@ class TemplateDiff {
 
   /// PrintAPSInt - If valid, print the APSInt.  If the expression is
   /// gives more information, print it too.
-  void PrintAPSInt(llvm::APSInt Val, Expr *E, bool Valid, QualType IntType,
-                   bool PrintType) {
+  void PrintAPSInt(const llvm::APSInt &Val, Expr *E, bool Valid,
+                   QualType IntType, bool PrintType) {
     Bold();
     if (Valid) {
       if (HasExtraInfo(E)) {
@@ -1834,14 +1849,13 @@ class TemplateDiff {
       Unbold();
       OS << ']';
     }
-
   }
 
   /// PrintValueDeclAndInteger - Uses the print functions for ValueDecl and
   /// APSInt to print a mixed difference.
   void PrintValueDeclAndInteger(ValueDecl *VD, bool NeedAddressOf,
                                 bool IsNullPtr, Expr *VDExpr, bool DefaultDecl,
-                                llvm::APSInt Val, QualType IntType,
+                                const llvm::APSInt &Val, QualType IntType,
                                 Expr *IntExpr, bool DefaultInt) {
     if (!PrintTree) {
       OS << (DefaultDecl ? "(default) " : "");
@@ -1861,7 +1875,7 @@ class TemplateDiff {
 
   /// PrintIntegerAndValueDecl - Uses the print functions for APSInt and
   /// ValueDecl to print a mixed difference.
-  void PrintIntegerAndValueDecl(llvm::APSInt Val, QualType IntType,
+  void PrintIntegerAndValueDecl(const llvm::APSInt &Val, QualType IntType,
                                 Expr *IntExpr, bool DefaultInt, ValueDecl *VD,
                                 bool NeedAddressOf, bool IsNullPtr,
                                 Expr *VDExpr, bool DefaultDecl) {
@@ -2016,7 +2030,7 @@ public:
     return true;
   }
 }; // end class TemplateDiff
-}  // end namespace
+}  // end anonymous namespace
 
 /// FormatTemplateTypeDiff - A helper static function to start the template
 /// diff and return the properly formatted string.  Returns true if the diff
diff --git a/contrib/llvm/tools/clang/lib/AST/ASTDumper.cpp b/contrib/llvm/tools/clang/lib/AST/ASTDumper.cpp
index 4622a75ac2c6..872ba356a9b2 100644
--- a/contrib/llvm/tools/clang/lib/AST/ASTDumper.cpp
+++ b/contrib/llvm/tools/clang/lib/AST/ASTDumper.cpp
@@ -18,13 +18,14 @@
 #include "clang/AST/DeclCXX.h"
 #include "clang/AST/DeclLookups.h"
 #include "clang/AST/DeclObjC.h"
+#include "clang/AST/DeclOpenMP.h"
 #include "clang/AST/DeclVisitor.h"
+#include "clang/AST/LocInfoType.h"
 #include "clang/AST/StmtVisitor.h"
 #include "clang/AST/TypeVisitor.h"
 #include "clang/Basic/Builtins.h"
 #include "clang/Basic/Module.h"
 #include "clang/Basic/SourceManager.h"
-#include "clang/Sema/LocInfoType.h"
 #include "llvm/Support/raw_ostream.h"
 using namespace clang;
 using namespace clang::comments;
@@ -403,6 +404,9 @@ namespace  {
     void VisitAtomicType(const AtomicType *T) {
       dumpTypeAsChild(T->getValueType());
     }
+    void VisitPipeType(const PipeType *T) {
+      dumpTypeAsChild(T->getElementType());
+    }
     void VisitAdjustedType(const AdjustedType *T) {
       dumpTypeAsChild(T->getOriginalType());
     }
@@ -426,6 +430,14 @@ namespace  {
     void VisitVarDecl(const VarDecl *D);
     void VisitFileScopeAsmDecl(const FileScopeAsmDecl *D);
     void VisitImportDecl(const ImportDecl *D);
+    void VisitPragmaCommentDecl(const PragmaCommentDecl *D);
+    void VisitPragmaDetectMismatchDecl(const PragmaDetectMismatchDecl *D);
+    void VisitCapturedDecl(const CapturedDecl *D);
+
+    // OpenMP decls
+    void VisitOMPThreadPrivateDecl(const OMPThreadPrivateDecl *D);
+    void VisitOMPDeclareReductionDecl(const OMPDeclareReductionDecl *D);
+    void VisitOMPCapturedExprDecl(const OMPCapturedExprDecl *D);
 
     // C++ Decls
     void VisitNamespaceDecl(const NamespaceDecl *D);
@@ -462,6 +474,7 @@ namespace  {
     void VisitUnresolvedUsingTypenameDecl(const UnresolvedUsingTypenameDecl *D);
     void VisitUnresolvedUsingValueDecl(const UnresolvedUsingValueDecl *D);
     void VisitUsingShadowDecl(const UsingShadowDecl *D);
+    void VisitConstructorUsingShadowDecl(const ConstructorUsingShadowDecl *D);
     void VisitLinkageSpecDecl(const LinkageSpecDecl *D);
     void VisitAccessSpecDecl(const AccessSpecDecl *D);
     void VisitFriendDecl(const FriendDecl *D);
@@ -487,6 +500,10 @@ namespace  {
     void VisitLabelStmt(const LabelStmt *Node);
     void VisitGotoStmt(const GotoStmt *Node);
     void VisitCXXCatchStmt(const CXXCatchStmt *Node);
+    void VisitCapturedStmt(const CapturedStmt *Node);
+
+    // OpenMP
+    void VisitOMPExecutableDirective(const OMPExecutableDirective *Node);
 
     // Exprs
     void VisitExpr(const Expr *Node);
@@ -697,6 +714,12 @@ void ASTDumper::dumpTypeAsChild(const Type *T) {
 }
 
 void ASTDumper::dumpBareDeclRef(const Decl *D) {
+  if (!D) {
+    ColorScope Color(*this, NullColor);
+    OS << "<<<NULL>>>";
+    return;
+  }
+
   {
     ColorScope Color(*this, DeclKindNameColor);
     OS << D->getDeclKindName();
@@ -819,8 +842,6 @@ void ASTDumper::dumpAttr(const Attr *A) {
       switch (A->getKind()) {
 #define ATTR(X) case attr::X: OS << #X; break;
 #include "clang/Basic/AttrList.inc"
-      default:
-        llvm_unreachable("unexpected attribute kind");
       }
       OS << "Attr";
     }
@@ -1140,10 +1161,8 @@ void ASTDumper::VisitFunctionDecl(const FunctionDecl *D) {
   if (!D->param_begin() && D->getNumParams())
     dumpChild([=] { OS << "<<NULL params x " << D->getNumParams() << ">>"; });
   else
-    for (FunctionDecl::param_const_iterator I = D->param_begin(),
-                                            E = D->param_end();
-         I != E; ++I)
-      dumpDecl(*I);
+    for (const ParmVarDecl *Parameter : D->parameters())
+      dumpDecl(Parameter);
 
   if (const CXXConstructorDecl *C = dyn_cast<CXXConstructorDecl>(D))
     for (CXXConstructorDecl::init_const_iterator I = C->init_begin(),
@@ -1184,6 +1203,10 @@ void ASTDumper::VisitVarDecl(const VarDecl *D) {
     OS << " __module_private__";
   if (D->isNRVOVariable())
     OS << " nrvo";
+  if (D->isInline())
+    OS << " inline";
+  if (D->isConstexpr())
+    OS << " constexpr";
   if (D->hasInit()) {
     switch (D->getInitStyle()) {
     case VarDecl::CInit: OS << " cinit"; break;
@@ -1202,6 +1225,56 @@ void ASTDumper::VisitImportDecl(const ImportDecl *D) {
   OS << ' ' << D->getImportedModule()->getFullModuleName();
 }
 
+void ASTDumper::VisitPragmaCommentDecl(const PragmaCommentDecl *D) {
+  OS << ' ';
+  switch (D->getCommentKind()) {
+  case PCK_Unknown:  llvm_unreachable("unexpected pragma comment kind");
+  case PCK_Compiler: OS << "compiler"; break;
+  case PCK_ExeStr:   OS << "exestr"; break;
+  case PCK_Lib:      OS << "lib"; break;
+  case PCK_Linker:   OS << "linker"; break;
+  case PCK_User:     OS << "user"; break;
+  }
+  StringRef Arg = D->getArg();
+  if (!Arg.empty())
+    OS << " \"" << Arg << "\"";
+}
+
+void ASTDumper::VisitPragmaDetectMismatchDecl(
+    const PragmaDetectMismatchDecl *D) {
+  OS << " \"" << D->getName() << "\" \"" << D->getValue() << "\"";
+}
+
+void ASTDumper::VisitCapturedDecl(const CapturedDecl *D) {
+  dumpStmt(D->getBody());
+}
+
+//===----------------------------------------------------------------------===//
+// OpenMP Declarations
+//===----------------------------------------------------------------------===//
+
+void ASTDumper::VisitOMPThreadPrivateDecl(const OMPThreadPrivateDecl *D) {
+  for (auto *E : D->varlists())
+    dumpStmt(E);
+}
+
+void ASTDumper::VisitOMPDeclareReductionDecl(const OMPDeclareReductionDecl *D) {
+  dumpName(D);
+  dumpType(D->getType());
+  OS << " combiner";
+  dumpStmt(D->getCombiner());
+  if (auto *Initializer = D->getInitializer()) {
+    OS << " initializer";
+    dumpStmt(Initializer);
+  }
+}
+
+void ASTDumper::VisitOMPCapturedExprDecl(const OMPCapturedExprDecl *D) {
+  dumpName(D);
+  dumpType(D->getType());
+  dumpStmt(D->getInit());
+}
+
 //===----------------------------------------------------------------------===//
 // C++ Declarations
 //===----------------------------------------------------------------------===//
@@ -1425,6 +1498,31 @@ void ASTDumper::VisitUsingShadowDecl(const UsingShadowDecl *D) {
     dumpTypeAsChild(TD->getTypeForDecl());
 }
 
+void ASTDumper::VisitConstructorUsingShadowDecl(
+    const ConstructorUsingShadowDecl *D) {
+  if (D->constructsVirtualBase())
+    OS << " virtual";
+
+  dumpChild([=] {
+    OS << "target ";
+    dumpBareDeclRef(D->getTargetDecl());
+  });
+
+  dumpChild([=] {
+    OS << "nominated ";
+    dumpBareDeclRef(D->getNominatedBaseClass());
+    OS << ' ';
+    dumpBareDeclRef(D->getNominatedBaseClassShadowDecl());
+  });
+
+  dumpChild([=] {
+    OS << "constructed ";
+    dumpBareDeclRef(D->getConstructedBaseClass());
+    OS << ' ';
+    dumpBareDeclRef(D->getConstructedBaseClassShadowDecl());
+  });
+}
+
 void ASTDumper::VisitLinkageSpecDecl(const LinkageSpecDecl *D) {
   switch (D->getLanguage()) {
   case LinkageSpecDecl::lang_c: OS << " C"; break;
@@ -1484,10 +1582,8 @@ void ASTDumper::VisitObjCMethodDecl(const ObjCMethodDecl *D) {
   if (D->isThisDeclarationADefinition()) {
     dumpDeclContext(D);
   } else {
-    for (ObjCMethodDecl::param_const_iterator I = D->param_begin(),
-                                              E = D->param_end();
-         I != E; ++I)
-      dumpDecl(*I);
+    for (const ParmVarDecl *Parameter : D->parameters())
+      dumpDecl(Parameter);
   }
 
   if (D->isVariadic())
@@ -1597,6 +1693,8 @@ void ASTDumper::VisitObjCPropertyDecl(const ObjCPropertyDecl *D) {
       OS << " strong";
     if (Attrs & ObjCPropertyDecl::OBJC_PR_unsafe_unretained)
       OS << " unsafe_unretained";
+    if (Attrs & ObjCPropertyDecl::OBJC_PR_class)
+      OS << " class";
     if (Attrs & ObjCPropertyDecl::OBJC_PR_getter)
       dumpDeclRef(D->getGetterMethodDecl(), "getter");
     if (Attrs & ObjCPropertyDecl::OBJC_PR_setter)
@@ -1615,7 +1713,7 @@ void ASTDumper::VisitObjCPropertyImplDecl(const ObjCPropertyImplDecl *D) {
 }
 
 void ASTDumper::VisitBlockDecl(const BlockDecl *D) {
-  for (auto I : D->params())
+  for (auto I : D->parameters())
     dumpDecl(I);
 
   if (D->isVariadic())
@@ -1707,6 +1805,41 @@ void ASTDumper::VisitCXXCatchStmt(const CXXCatchStmt *Node) {
   dumpDecl(Node->getExceptionDecl());
 }
 
+void ASTDumper::VisitCapturedStmt(const CapturedStmt *Node) {
+  VisitStmt(Node);
+  dumpDecl(Node->getCapturedDecl());
+}
+
+//===----------------------------------------------------------------------===//
+//  OpenMP dumping methods.
+//===----------------------------------------------------------------------===//
+
+void ASTDumper::VisitOMPExecutableDirective(
+    const OMPExecutableDirective *Node) {
+  VisitStmt(Node);
+  for (auto *C : Node->clauses()) {
+    dumpChild([=] {
+      if (!C) {
+        ColorScope Color(*this, NullColor);
+        OS << "<<<NULL>>> OMPClause";
+        return;
+      }
+      {
+        ColorScope Color(*this, AttrColor);
+        StringRef ClauseName(getOpenMPClauseName(C->getClauseKind()));
+        OS << "OMP" << ClauseName.substr(/*Start=*/0, /*N=*/1).upper()
+           << ClauseName.drop_front() << "Clause";
+      }
+      dumpPointer(C);
+      dumpSourceRange(SourceRange(C->getLocStart(), C->getLocEnd()));
+      if (C->isImplicit())
+        OS << " <implicit>";
+      for (auto *S : C->children())
+        dumpStmt(S);
+    });
+  }
+}
+
 //===----------------------------------------------------------------------===//
 //  Expr dumping methods.
 //===----------------------------------------------------------------------===//
@@ -2076,8 +2209,10 @@ void ASTDumper::VisitObjCMessageExpr(const ObjCMessageExpr *Node) {
 
 void ASTDumper::VisitObjCBoxedExpr(const ObjCBoxedExpr *Node) {
   VisitExpr(Node);
-  OS << " selector=";
-  Node->getBoxingMethod()->getSelector().print(OS);
+  if (auto *BoxingMethod = Node->getBoxingMethod()) {
+    OS << " selector=";
+    BoxingMethod->getSelector().print(OS);
+  }
 }
 
 void ASTDumper::VisitObjCAtCatchStmt(const ObjCAtCatchStmt *Node) {
diff --git a/contrib/llvm/tools/clang/lib/AST/ASTImporter.cpp b/contrib/llvm/tools/clang/lib/AST/ASTImporter.cpp
index 916f1081798d..bc1f9f96a06b 100644
--- a/contrib/llvm/tools/clang/lib/AST/ASTImporter.cpp
+++ b/contrib/llvm/tools/clang/lib/AST/ASTImporter.cpp
@@ -29,7 +29,7 @@ namespace clang {
                           public DeclVisitor<ASTNodeImporter, Decl *>,
                           public StmtVisitor<ASTNodeImporter, Stmt *> {
     ASTImporter &Importer;
-    
+
   public:
     explicit ASTNodeImporter(ASTImporter &Importer) : Importer(Importer) { }
     
@@ -64,11 +64,12 @@ namespace clang {
     QualType VisitDecltypeType(const DecltypeType *T);
     QualType VisitUnaryTransformType(const UnaryTransformType *T);
     QualType VisitAutoType(const AutoType *T);
+    QualType VisitInjectedClassNameType(const InjectedClassNameType *T);
     // FIXME: DependentDecltypeType
     QualType VisitRecordType(const RecordType *T);
     QualType VisitEnumType(const EnumType *T);
     QualType VisitAttributedType(const AttributedType *T);
-    // FIXME: TemplateTypeParmType
+    QualType VisitTemplateTypeParmType(const TemplateTypeParmType *T);
     // FIXME: SubstTemplateTypeParmType
     QualType VisitTemplateSpecializationType(const TemplateSpecializationType *T);
     QualType VisitElaboratedType(const ElaboratedType *T);
@@ -86,6 +87,10 @@ namespace clang {
     void ImportDeclarationNameLoc(const DeclarationNameInfo &From,
                                   DeclarationNameInfo& To);
     void ImportDeclContext(DeclContext *FromDC, bool ForceImport = false);
+
+    typedef DesignatedInitExpr::Designator Designator;
+    Designator ImportDesignator(const Designator &D);
+
                         
     /// \brief What we should import from the definition.
     enum ImportDefinitionKind { 
@@ -130,11 +135,13 @@ namespace clang {
     bool IsStructuralMatch(ClassTemplateDecl *From, ClassTemplateDecl *To);
     bool IsStructuralMatch(VarTemplateDecl *From, VarTemplateDecl *To);
     Decl *VisitDecl(Decl *D);
+    Decl *VisitAccessSpecDecl(AccessSpecDecl *D);
     Decl *VisitTranslationUnitDecl(TranslationUnitDecl *D);
     Decl *VisitNamespaceDecl(NamespaceDecl *D);
     Decl *VisitTypedefNameDecl(TypedefNameDecl *D, bool IsAlias);
     Decl *VisitTypedefDecl(TypedefDecl *D);
     Decl *VisitTypeAliasDecl(TypeAliasDecl *D);
+    Decl *VisitLabelDecl(LabelDecl *D);
     Decl *VisitEnumDecl(EnumDecl *D);
     Decl *VisitRecordDecl(RecordDecl *D);
     Decl *VisitEnumConstantDecl(EnumConstantDecl *D);
@@ -174,6 +181,7 @@ namespace clang {
     DeclGroupRef ImportDeclGroup(DeclGroupRef DG);
 
     Stmt *VisitStmt(Stmt *S);
+    Stmt *VisitGCCAsmStmt(GCCAsmStmt *S);
     Stmt *VisitDeclStmt(DeclStmt *S);
     Stmt *VisitNullStmt(NullStmt *S);
     Stmt *VisitCompoundStmt(CompoundStmt *S);
@@ -191,7 +199,6 @@ namespace clang {
     Stmt *VisitContinueStmt(ContinueStmt *S);
     Stmt *VisitBreakStmt(BreakStmt *S);
     Stmt *VisitReturnStmt(ReturnStmt *S);
-    // FIXME: GCCAsmStmt
     // FIXME: MSAsmStmt
     // FIXME: SEHExceptStmt
     // FIXME: SEHFinallyStmt
@@ -212,21 +219,69 @@ namespace clang {
 
     // Importing expressions
     Expr *VisitExpr(Expr *E);
+    Expr *VisitVAArgExpr(VAArgExpr *E);
+    Expr *VisitGNUNullExpr(GNUNullExpr *E);
+    Expr *VisitPredefinedExpr(PredefinedExpr *E);
     Expr *VisitDeclRefExpr(DeclRefExpr *E);
+    Expr *VisitImplicitValueInitExpr(ImplicitValueInitExpr *ILE);
+    Expr *VisitDesignatedInitExpr(DesignatedInitExpr *E);
+    Expr *VisitCXXNullPtrLiteralExpr(CXXNullPtrLiteralExpr *E);
     Expr *VisitIntegerLiteral(IntegerLiteral *E);
+    Expr *VisitFloatingLiteral(FloatingLiteral *E);
     Expr *VisitCharacterLiteral(CharacterLiteral *E);
+    Expr *VisitStringLiteral(StringLiteral *E);
+    Expr *VisitCompoundLiteralExpr(CompoundLiteralExpr *E);
+    Expr *VisitAtomicExpr(AtomicExpr *E);
+    Expr *VisitAddrLabelExpr(AddrLabelExpr *E);
     Expr *VisitParenExpr(ParenExpr *E);
+    Expr *VisitParenListExpr(ParenListExpr *E);
+    Expr *VisitStmtExpr(StmtExpr *E);
     Expr *VisitUnaryOperator(UnaryOperator *E);
     Expr *VisitUnaryExprOrTypeTraitExpr(UnaryExprOrTypeTraitExpr *E);
     Expr *VisitBinaryOperator(BinaryOperator *E);
+    Expr *VisitConditionalOperator(ConditionalOperator *E);
+    Expr *VisitBinaryConditionalOperator(BinaryConditionalOperator *E);
+    Expr *VisitOpaqueValueExpr(OpaqueValueExpr *E);
     Expr *VisitCompoundAssignOperator(CompoundAssignOperator *E);
     Expr *VisitImplicitCastExpr(ImplicitCastExpr *E);
     Expr *VisitCStyleCastExpr(CStyleCastExpr *E);
     Expr *VisitCXXConstructExpr(CXXConstructExpr *E);
+    Expr *VisitCXXMemberCallExpr(CXXMemberCallExpr *E);
+    Expr *VisitCXXThisExpr(CXXThisExpr *E);
+    Expr *VisitCXXBoolLiteralExpr(CXXBoolLiteralExpr *E);
     Expr *VisitMemberExpr(MemberExpr *E);
     Expr *VisitCallExpr(CallExpr *E);
+    Expr *VisitInitListExpr(InitListExpr *E);
+    Expr *VisitCXXDefaultInitExpr(CXXDefaultInitExpr *E);
+    Expr *VisitCXXNamedCastExpr(CXXNamedCastExpr *E);
+
+    template<typename IIter, typename OIter>
+    void ImportArray(IIter Ibegin, IIter Iend, OIter Obegin) {
+      typedef typename std::remove_reference<decltype(*Obegin)>::type ItemT;
+      ASTImporter &ImporterRef = Importer;
+      std::transform(Ibegin, Iend, Obegin,
+                     [&ImporterRef](ItemT From) -> ItemT {
+                       return ImporterRef.Import(From);
+                     });
+    }
+
+    template<typename IIter, typename OIter>
+    bool ImportArrayChecked(IIter Ibegin, IIter Iend, OIter Obegin) {
+      typedef typename std::remove_reference<decltype(**Obegin)>::type ItemT;
+      ASTImporter &ImporterRef = Importer;
+      bool Failed = false;
+      std::transform(Ibegin, Iend, Obegin,
+                     [&ImporterRef, &Failed](ItemT *From) -> ItemT * {
+                       ItemT *To = ImporterRef.Import(From);
+                       if (!To && From)
+                         Failed = true;
+                       return To;
+                     });
+      return Failed;
+    }
   };
 }
+
 using namespace clang;
 
 //----------------------------------------------------------------------------
@@ -618,8 +673,8 @@ static bool IsStructurallyEquivalent(StructuralEquivalenceContext &Context,
     if (!IsStructurallyEquivalent(Context, Function1->getReturnType(),
                                   Function2->getReturnType()))
       return false;
-      if (Function1->getExtInfo() != Function2->getExtInfo())
-        return false;
+    if (Function1->getExtInfo() != Function2->getExtInfo())
+      return false;
     break;
   }
    
@@ -974,7 +1029,7 @@ static bool IsStructurallyEquivalent(StructuralEquivalenceContext &Context,
 /// including the next assigned index (if none of them match). Returns an
 /// empty option if the context is not a record, i.e.. if the anonymous
 /// struct/union is at namespace or block scope.
-static Optional<unsigned> findAnonymousStructOrUnionIndex(RecordDecl *Anon) {
+static Optional<unsigned> findUntaggedStructOrUnionIndex(RecordDecl *Anon) {
   ASTContext &Context = Anon->getASTContext();
   QualType AnonTy = Context.getRecordType(Anon);
 
@@ -985,13 +1040,29 @@ static Optional<unsigned> findAnonymousStructOrUnionIndex(RecordDecl *Anon) {
   unsigned Index = 0;
   for (const auto *D : Owner->noload_decls()) {
     const auto *F = dyn_cast<FieldDecl>(D);
-    if (!F || !F->isAnonymousStructOrUnion())
+    if (!F)
       continue;
 
-    if (Context.hasSameType(F->getType(), AnonTy))
-      break;
+    if (F->isAnonymousStructOrUnion()) {
+      if (Context.hasSameType(F->getType(), AnonTy))
+        break;
+      ++Index;
+      continue;
+    }
 
-    ++Index;
+    // If the field looks like this:
+    // struct { ... } A;
+    QualType FieldType = F->getType();
+    if (const auto *RecType = dyn_cast<RecordType>(FieldType)) {
+      const RecordDecl *RecDecl = RecType->getDecl();
+      if (RecDecl->getDeclContext() == Owner &&
+          !RecDecl->getIdentifier()) {
+        if (Context.hasSameType(FieldType, AnonTy))
+          break;
+        ++Index;
+        continue;
+      }
+    }
   }
 
   return Index;
@@ -1013,8 +1084,8 @@ static bool IsStructurallyEquivalent(StructuralEquivalenceContext &Context,
   if (D1->isAnonymousStructOrUnion() && D2->isAnonymousStructOrUnion()) {
     // If both anonymous structs/unions are in a record context, make sure
     // they occur in the same location in the context records.
-    if (Optional<unsigned> Index1 = findAnonymousStructOrUnionIndex(D1)) {
-      if (Optional<unsigned> Index2 = findAnonymousStructOrUnionIndex(D2)) {
+    if (Optional<unsigned> Index1 = findUntaggedStructOrUnionIndex(D1)) {
+      if (Optional<unsigned> Index2 = findUntaggedStructOrUnionIndex(D2)) {
         if (*Index1 != *Index2)
           return false;
       }
@@ -1480,6 +1551,10 @@ QualType ASTNodeImporter::VisitType(const Type *T) {
 
 QualType ASTNodeImporter::VisitBuiltinType(const BuiltinType *T) {
   switch (T->getKind()) {
+#define IMAGE_TYPE(ImgType, Id, SingletonId, Access, Suffix) \
+  case BuiltinType::Id: \
+    return Importer.getToContext().SingletonId;
+#include "clang/Basic/OpenCLImageTypes.def"
 #define SHARED_SINGLETON_TYPE(Expansion)
 #define BUILTIN_TYPE(Id, SingletonId) \
   case BuiltinType::Id: return Importer.getToContext().SingletonId;
@@ -1758,6 +1833,28 @@ QualType ASTNodeImporter::VisitAutoType(const AutoType *T) {
                                              /*IsDependent*/false);
 }
 
+QualType ASTNodeImporter::VisitInjectedClassNameType(
+    const InjectedClassNameType *T) {
+  CXXRecordDecl *D = cast_or_null<CXXRecordDecl>(Importer.Import(T->getDecl()));
+  if (!D)
+    return QualType();
+
+  QualType InjType = Importer.Import(T->getInjectedSpecializationType());
+  if (InjType.isNull())
+    return QualType();
+
+  // FIXME: ASTContext::getInjectedClassNameType is not suitable for AST reading
+  // See comments in InjectedClassNameType definition for details
+  // return Importer.getToContext().getInjectedClassNameType(D, InjType);
+  enum {
+    TypeAlignmentInBits = 4,
+    TypeAlignment = 1 << TypeAlignmentInBits
+  };
+
+  return QualType(new (Importer.getToContext(), TypeAlignment)
+                  InjectedClassNameType(D, InjType), 0);
+}
+
 QualType ASTNodeImporter::VisitRecordType(const RecordType *T) {
   RecordDecl *ToDecl
     = dyn_cast_or_null<RecordDecl>(Importer.Import(T->getDecl()));
@@ -1797,6 +1894,18 @@ QualType ASTNodeImporter::VisitAttributedType(const AttributedType *T) {
     ToModifiedType, ToEquivalentType);
 }
 
+
+QualType ASTNodeImporter::VisitTemplateTypeParmType(
+    const TemplateTypeParmType *T) {
+  TemplateTypeParmDecl *ParmDecl =
+      cast_or_null<TemplateTypeParmDecl>(Importer.Import(T->getDecl()));
+  if (!ParmDecl && T->getDecl())
+    return QualType();
+
+  return Importer.getToContext().getTemplateTypeParmType(
+        T->getDepth(), T->getIndex(), T->isParameterPack(), ParmDecl);
+}
+
 QualType ASTNodeImporter::VisitTemplateSpecializationType(
                                        const TemplateSpecializationType *T) {
   TemplateName ToTemplate = Importer.Import(T->getTemplateName());
@@ -1816,8 +1925,7 @@ QualType ASTNodeImporter::VisitTemplateSpecializationType(
       return QualType();
   }
   return Importer.getToContext().getTemplateSpecializationType(ToTemplate, 
-                                                         ToTemplateArgs.data(), 
-                                                         ToTemplateArgs.size(),
+                                                               ToTemplateArgs,
                                                                ToCanonType);
 }
 
@@ -2023,6 +2131,9 @@ bool ASTNodeImporter::ImportDefinition(RecordDecl *From, RecordDecl *To,
     ToData.HasInClassInitializer = FromData.HasInClassInitializer;
     ToData.HasUninitializedReferenceMember
       = FromData.HasUninitializedReferenceMember;
+    ToData.HasUninitializedFields = FromData.HasUninitializedFields;
+    ToData.HasInheritedConstructor = FromData.HasInheritedConstructor;
+    ToData.HasInheritedAssignment = FromData.HasInheritedAssignment;
     ToData.NeedOverloadResolutionForMoveConstructor
       = FromData.NeedOverloadResolutionForMoveConstructor;
     ToData.NeedOverloadResolutionForMoveAssignment
@@ -2038,6 +2149,8 @@ bool ASTNodeImporter::ImportDefinition(RecordDecl *From, RecordDecl *To,
     ToData.HasIrrelevantDestructor = FromData.HasIrrelevantDestructor;
     ToData.HasConstexprNonCopyMoveConstructor
       = FromData.HasConstexprNonCopyMoveConstructor;
+    ToData.HasDefaultedDefaultConstructor
+      = FromData.HasDefaultedDefaultConstructor;
     ToData.DefaultedDefaultConstructorIsConstexpr
       = FromData.DefaultedDefaultConstructorIsConstexpr;
     ToData.HasConstexprDefaultConstructor
@@ -2316,6 +2429,31 @@ Decl *ASTNodeImporter::VisitTranslationUnitDecl(TranslationUnitDecl *D) {
   return ToD;
 }
 
+Decl *ASTNodeImporter::VisitAccessSpecDecl(AccessSpecDecl *D) {
+
+  SourceLocation Loc = Importer.Import(D->getLocation());
+  SourceLocation ColonLoc = Importer.Import(D->getColonLoc());
+
+  // Import the context of this declaration.
+  DeclContext *DC = Importer.ImportContext(D->getDeclContext());
+  if (!DC)
+    return nullptr;
+
+  AccessSpecDecl *accessSpecDecl
+    = AccessSpecDecl::Create(Importer.getToContext(), D->getAccess(),
+                             DC, Loc, ColonLoc);
+
+  if (!accessSpecDecl)
+    return nullptr;
+
+  // Lexical DeclContext and Semantic DeclContext
+  // is always the same for the accessSpec.
+  accessSpecDecl->setLexicalDeclContext(DC);
+  DC->addDeclInternal(accessSpecDecl);
+
+  return accessSpecDecl;
+}
+
 Decl *ASTNodeImporter::VisitNamespaceDecl(NamespaceDecl *D) {
   // Import the major distinguishing characteristics of this namespace.
   DeclContext *DC, *LexicalDC;
@@ -2464,6 +2602,39 @@ Decl *ASTNodeImporter::VisitTypeAliasDecl(TypeAliasDecl *D) {
   return VisitTypedefNameDecl(D, /*IsAlias=*/true);
 }
 
+Decl *ASTNodeImporter::VisitLabelDecl(LabelDecl *D) {
+  // Import the major distinguishing characteristics of this label.
+  DeclContext *DC, *LexicalDC;
+  DeclarationName Name;
+  SourceLocation Loc;
+  NamedDecl *ToD;
+  if (ImportDeclParts(D, DC, LexicalDC, Name, ToD, Loc))
+    return nullptr;
+  if (ToD)
+    return ToD;
+
+  assert(LexicalDC->isFunctionOrMethod());
+
+  LabelDecl *ToLabel = D->isGnuLocal()
+      ? LabelDecl::Create(Importer.getToContext(),
+                          DC, Importer.Import(D->getLocation()),
+                          Name.getAsIdentifierInfo(),
+                          Importer.Import(D->getLocStart()))
+      : LabelDecl::Create(Importer.getToContext(),
+                          DC, Importer.Import(D->getLocation()),
+                          Name.getAsIdentifierInfo());
+  Importer.Imported(D, ToLabel);
+
+  LabelStmt *Label = cast_or_null<LabelStmt>(Importer.Import(D->getStmt()));
+  if (!Label)
+    return nullptr;
+
+  ToLabel->setStmt(Label);
+  ToLabel->setLexicalDeclContext(LexicalDC);
+  LexicalDC->addDeclInternal(ToLabel);
+  return ToLabel;
+}
+
 Decl *ASTNodeImporter::VisitEnumDecl(EnumDecl *D) {
   // Import the major distinguishing characteristics of this enum.
   DeclContext *DC, *LexicalDC;
@@ -2594,9 +2765,9 @@ Decl *ASTNodeImporter::VisitRecordDecl(RecordDecl *D) {
           // If both anonymous structs/unions are in a record context, make sure
           // they occur in the same location in the context records.
           if (Optional<unsigned> Index1
-              = findAnonymousStructOrUnionIndex(D)) {
+              = findUntaggedStructOrUnionIndex(D)) {
             if (Optional<unsigned> Index2 =
-                    findAnonymousStructOrUnionIndex(FoundRecord)) {
+                    findUntaggedStructOrUnionIndex(FoundRecord)) {
               if (*Index1 != *Index2)
                 continue;
             }
@@ -2654,11 +2825,35 @@ Decl *ASTNodeImporter::VisitRecordDecl(RecordDecl *D) {
   RecordDecl *D2 = AdoptDecl;
   SourceLocation StartLoc = Importer.Import(D->getLocStart());
   if (!D2) {
-    if (isa<CXXRecordDecl>(D)) {
-      CXXRecordDecl *D2CXX = CXXRecordDecl::Create(Importer.getToContext(), 
-                                                   D->getTagKind(),
-                                                   DC, StartLoc, Loc,
-                                                   Name.getAsIdentifierInfo());
+    CXXRecordDecl *D2CXX = nullptr;
+    if (CXXRecordDecl *DCXX = llvm::dyn_cast<CXXRecordDecl>(D)) {
+      if (DCXX->isLambda()) {
+        TypeSourceInfo *TInfo = Importer.Import(DCXX->getLambdaTypeInfo());
+        D2CXX = CXXRecordDecl::CreateLambda(Importer.getToContext(),
+                                            DC, TInfo, Loc,
+                                            DCXX->isDependentLambda(),
+                                            DCXX->isGenericLambda(),
+                                            DCXX->getLambdaCaptureDefault());
+        Decl *CDecl = Importer.Import(DCXX->getLambdaContextDecl());
+        if (DCXX->getLambdaContextDecl() && !CDecl)
+          return nullptr;
+        D2CXX->setLambdaMangling(DCXX->getLambdaManglingNumber(), CDecl);
+      } else if (DCXX->isInjectedClassName()) {                                                 
+        // We have to be careful to do a similar dance to the one in                            
+        // Sema::ActOnStartCXXMemberDeclarations                                                
+        CXXRecordDecl *const PrevDecl = nullptr;                                                
+        const bool DelayTypeCreation = true;                                                    
+        D2CXX = CXXRecordDecl::Create(                                                          
+            Importer.getToContext(), D->getTagKind(), DC, StartLoc, Loc,                        
+            Name.getAsIdentifierInfo(), PrevDecl, DelayTypeCreation);                           
+        Importer.getToContext().getTypeDeclType(                                                
+            D2CXX, llvm::dyn_cast<CXXRecordDecl>(DC));                                          
+      } else {
+        D2CXX = CXXRecordDecl::Create(Importer.getToContext(),
+                                      D->getTagKind(),
+                                      DC, StartLoc, Loc,
+                                      Name.getAsIdentifierInfo());
+      }
       D2 = D2CXX;
       D2->setAccess(D->getAccess());
     } else {
@@ -2830,7 +3025,7 @@ Decl *ASTNodeImporter::VisitFunctionDecl(FunctionDecl *D) {
 
   // Import the function parameters.
   SmallVector<ParmVarDecl *, 8> Parameters;
-  for (auto P : D->params()) {
+  for (auto P : D->parameters()) {
     ParmVarDecl *ToP = cast_or_null<ParmVarDecl>(Importer.Import(P));
     if (!ToP)
       return nullptr;
@@ -2851,6 +3046,22 @@ Decl *ASTNodeImporter::VisitFunctionDecl(FunctionDecl *D) {
                                             D->isInlineSpecified(), 
                                             D->isImplicit(),
                                             D->isConstexpr());
+    if (unsigned NumInitializers = FromConstructor->getNumCtorInitializers()) {
+      SmallVector<CXXCtorInitializer *, 4> CtorInitializers;
+      for (CXXCtorInitializer *I : FromConstructor->inits()) {
+        CXXCtorInitializer *ToI =
+            cast_or_null<CXXCtorInitializer>(Importer.Import(I));
+        if (!ToI && I)
+          return nullptr;
+        CtorInitializers.push_back(ToI);
+      }
+      CXXCtorInitializer **Memory =
+          new (Importer.getToContext()) CXXCtorInitializer *[NumInitializers];
+      std::copy(CtorInitializers.begin(), CtorInitializers.end(), Memory);
+      CXXConstructorDecl *ToCtor = llvm::cast<CXXConstructorDecl>(ToFunction);
+      ToCtor->setCtorInitializers(Memory);
+      ToCtor->setNumCtorInitializers(NumInitializers);
+    }
   } else if (isa<CXXDestructorDecl>(D)) {
     ToFunction = CXXDestructorDecl::Create(Importer.getToContext(),
                                            cast<CXXRecordDecl>(DC),
@@ -3009,8 +3220,13 @@ Decl *ASTNodeImporter::VisitFieldDecl(FieldDecl *D) {
                                          D->getInClassInitStyle());
   ToField->setAccess(D->getAccess());
   ToField->setLexicalDeclContext(LexicalDC);
-  if (ToField->hasInClassInitializer())
-    ToField->setInClassInitializer(D->getInClassInitializer());
+  if (Expr *FromInitializer = D->getInClassInitializer()) {
+    Expr *ToInitializer = Importer.Import(FromInitializer);
+    if (ToInitializer)
+      ToField->setInClassInitializer(ToInitializer);
+    else
+      return nullptr;
+  }
   ToField->setImplicit(D->isImplicit());
   Importer.Imported(D, ToField);
   LexicalDC->addDeclInternal(ToField);
@@ -3075,7 +3291,7 @@ Decl *ASTNodeImporter::VisitIndirectFieldDecl(IndirectFieldDecl *D) {
 
   IndirectFieldDecl *ToIndirectField = IndirectFieldDecl::Create(
       Importer.getToContext(), DC, Loc, Name.getAsIdentifierInfo(), T,
-      NamedChain, D->getChainingSize());
+      {NamedChain, D->getChainingSize()});
 
   for (const auto *Attr : D->attrs())
     ToIndirectField->addAttr(Attr->clone(Importer.getToContext()));
@@ -3418,7 +3634,7 @@ Decl *ASTNodeImporter::VisitObjCMethodDecl(ObjCMethodDecl *D) {
 
   // Import the parameters
   SmallVector<ParmVarDecl *, 5> ToParams;
-  for (auto *FromP : D->params()) {
+  for (auto *FromP : D->parameters()) {
     ParmVarDecl *ToP = cast_or_null<ParmVarDecl>(Importer.Import(FromP));
     if (!ToP)
       return nullptr;
@@ -4061,7 +4277,8 @@ Decl *ASTNodeImporter::VisitObjCPropertyImplDecl(ObjCPropertyImplDecl *D) {
   }
 
   ObjCPropertyImplDecl *ToImpl
-    = InImpl->FindPropertyImplDecl(Property->getIdentifier());
+    = InImpl->FindPropertyImplDecl(Property->getIdentifier(),
+                                   Property->getQueryKind());
   if (!ToImpl) {    
     ToImpl = ObjCPropertyImplDecl::Create(Importer.getToContext(), DC,
                                           Importer.Import(D->getLocStart()),
@@ -4246,16 +4463,16 @@ Decl *ASTNodeImporter::VisitClassTemplateDecl(ClassTemplateDecl *D) {
   CXXRecordDecl *DTemplated = D->getTemplatedDecl();
   
   // Create the declaration that is being templated.
-  SourceLocation StartLoc = Importer.Import(DTemplated->getLocStart());
-  SourceLocation IdLoc = Importer.Import(DTemplated->getLocation());
-  CXXRecordDecl *D2Templated = CXXRecordDecl::Create(Importer.getToContext(),
-                                                     DTemplated->getTagKind(),
-                                                     DC, StartLoc, IdLoc,
-                                                   Name.getAsIdentifierInfo());
-  D2Templated->setAccess(DTemplated->getAccess());
-  D2Templated->setQualifierInfo(Importer.Import(DTemplated->getQualifierLoc()));
-  D2Templated->setLexicalDeclContext(LexicalDC);
-  
+  // Create the declaration that is being templated.
+  CXXRecordDecl *D2Templated = cast_or_null<CXXRecordDecl>(
+        Importer.Import(DTemplated));
+  if (!D2Templated)
+    return nullptr;
+
+  // Resolve possible cyclic import.
+  if (Decl *AlreadyImported = Importer.GetAlreadyImportedOrNull(D))
+    return AlreadyImported;
+
   // Create the class template declaration itself.
   TemplateParameterList *TemplateParams
     = ImportTemplateParameterList(D->getTemplateParameters());
@@ -4351,8 +4568,7 @@ Decl *ASTNodeImporter::VisitClassTemplateSpecializationDecl(
                                                  D->getTagKind(), DC, 
                                                  StartLoc, IdLoc,
                                                  ClassTemplate,
-                                                 TemplateArgs.data(), 
-                                                 TemplateArgs.size(), 
+                                                 TemplateArgs,
                                                  /*PrevDecl=*/nullptr);
     D2->setSpecializationKind(D->getSpecializationKind());
 
@@ -4553,7 +4769,7 @@ Decl *ASTNodeImporter::VisitVarTemplateSpecializationDecl(
     // Create a new specialization.
     D2 = VarTemplateSpecializationDecl::Create(
         Importer.getToContext(), DC, StartLoc, IdLoc, VarTemplate, T, TInfo,
-        D->getStorageClass(), TemplateArgs.data(), TemplateArgs.size());
+        D->getStorageClass(), TemplateArgs);
     D2->setSpecializationKind(D->getSpecializationKind());
     D2->setTemplateArgsInfo(D->getTemplateArgsInfo());
 
@@ -4599,7 +4815,78 @@ DeclGroupRef ASTNodeImporter::ImportDeclGroup(DeclGroupRef DG) {
      << S->getStmtClassName();
    return nullptr;
  }
- 
+
+
+Stmt *ASTNodeImporter::VisitGCCAsmStmt(GCCAsmStmt *S) {
+  SmallVector<IdentifierInfo *, 4> Names;
+  for (unsigned I = 0, E = S->getNumOutputs(); I != E; I++) {
+    IdentifierInfo *ToII = Importer.Import(S->getOutputIdentifier(I));
+    if (!ToII)
+      return nullptr;
+    Names.push_back(ToII);
+  }
+  for (unsigned I = 0, E = S->getNumInputs(); I != E; I++) {
+    IdentifierInfo *ToII = Importer.Import(S->getInputIdentifier(I));
+    if (!ToII)
+      return nullptr;
+    Names.push_back(ToII);
+  }
+
+  SmallVector<StringLiteral *, 4> Clobbers;
+  for (unsigned I = 0, E = S->getNumClobbers(); I != E; I++) {
+    StringLiteral *Clobber = cast_or_null<StringLiteral>(
+          Importer.Import(S->getClobberStringLiteral(I)));
+    if (!Clobber)
+      return nullptr;
+    Clobbers.push_back(Clobber);
+  }
+
+  SmallVector<StringLiteral *, 4> Constraints;
+  for (unsigned I = 0, E = S->getNumOutputs(); I != E; I++) {
+    StringLiteral *Output = cast_or_null<StringLiteral>(
+          Importer.Import(S->getOutputConstraintLiteral(I)));
+    if (!Output)
+      return nullptr;
+    Constraints.push_back(Output);
+  }
+
+  for (unsigned I = 0, E = S->getNumInputs(); I != E; I++) {
+    StringLiteral *Input = cast_or_null<StringLiteral>(
+          Importer.Import(S->getInputConstraintLiteral(I)));
+    if (!Input)
+      return nullptr;
+    Constraints.push_back(Input);
+  }
+
+  SmallVector<Expr *, 4> Exprs(S->getNumOutputs() + S->getNumInputs());
+  if (ImportArrayChecked(S->begin_outputs(), S->end_outputs(), Exprs.begin()))
+    return nullptr;
+
+  if (ImportArrayChecked(S->begin_inputs(), S->end_inputs(),
+                         Exprs.begin() + S->getNumOutputs()))
+    return nullptr;
+
+  StringLiteral *AsmStr = cast_or_null<StringLiteral>(
+        Importer.Import(S->getAsmString()));
+  if (!AsmStr)
+    return nullptr;
+
+  return new (Importer.getToContext()) GCCAsmStmt(
+        Importer.getToContext(),
+        Importer.Import(S->getAsmLoc()),
+        S->isSimple(),
+        S->isVolatile(),
+        S->getNumOutputs(),
+        S->getNumInputs(),
+        Names.data(),
+        Constraints.data(),
+        Exprs.data(),
+        AsmStr,
+        S->getNumClobbers(),
+        Clobbers.data(),
+        Importer.Import(S->getRParenLoc()));
+}
+
 Stmt *ASTNodeImporter::VisitDeclStmt(DeclStmt *S) {
   DeclGroupRef ToDG = ImportDeclGroup(S->getDeclGroup());
   for (Decl *ToD : ToDG) {
@@ -4618,16 +4905,11 @@ Stmt *ASTNodeImporter::VisitNullStmt(NullStmt *S) {
 }
 
 Stmt *ASTNodeImporter::VisitCompoundStmt(CompoundStmt *S) {
-  SmallVector<Stmt *, 4> ToStmts(S->size());
-  auto &_Importer = this->Importer;
-  std::transform(S->body_begin(), S->body_end(), ToStmts.begin(),
-    [&_Importer](Stmt *CS) -> Stmt * {
-      return _Importer.Import(CS);
-    });
-  for (Stmt *ToS : ToStmts) {
-    if (!ToS)
-      return nullptr;
-  }
+  llvm::SmallVector<Stmt *, 8> ToStmts(S->size());
+    
+  if (ImportArrayChecked(S->body_begin(), S->body_end(), ToStmts.begin()))
+    return nullptr;
+
   SourceLocation ToLBraceLoc = Importer.Import(S->getLBracLoc());
   SourceLocation ToRBraceLoc = Importer.Import(S->getRBracLoc());
   return new (Importer.getToContext()) CompoundStmt(Importer.getToContext(),
@@ -4695,6 +4977,9 @@ Stmt *ASTNodeImporter::VisitAttributedStmt(AttributedStmt *S) {
 
 Stmt *ASTNodeImporter::VisitIfStmt(IfStmt *S) {
   SourceLocation ToIfLoc = Importer.Import(S->getIfLoc());
+  Stmt *ToInit = Importer.Import(S->getInit());
+  if (!ToInit && S->getInit())
+    return nullptr;
   VarDecl *ToConditionVariable = nullptr;
   if (VarDecl *FromConditionVariable = S->getConditionVariable()) {
     ToConditionVariable =
@@ -4713,12 +4998,17 @@ Stmt *ASTNodeImporter::VisitIfStmt(IfStmt *S) {
   if (!ToElseStmt && S->getElse())
     return nullptr;
   return new (Importer.getToContext()) IfStmt(Importer.getToContext(),
-                                              ToIfLoc, ToConditionVariable,
+                                              ToIfLoc, S->isConstexpr(),
+                                              ToInit,
+                                              ToConditionVariable,
                                               ToCondition, ToThenStmt,
                                               ToElseLoc, ToElseStmt);
 }
 
 Stmt *ASTNodeImporter::VisitSwitchStmt(SwitchStmt *S) {
+  Stmt *ToInit = Importer.Import(S->getInit());
+  if (!ToInit && S->getInit())
+    return nullptr;
   VarDecl *ToConditionVariable = nullptr;
   if (VarDecl *FromConditionVariable = S->getConditionVariable()) {
     ToConditionVariable =
@@ -4730,8 +5020,8 @@ Stmt *ASTNodeImporter::VisitSwitchStmt(SwitchStmt *S) {
   if (!ToCondition && S->getCond())
     return nullptr;
   SwitchStmt *ToStmt = new (Importer.getToContext()) SwitchStmt(
-                         Importer.getToContext(), ToConditionVariable,
-                         ToCondition);
+                         Importer.getToContext(), ToInit,
+                         ToConditionVariable, ToCondition);
   Stmt *ToBody = Importer.Import(S->getBody());
   if (!ToBody && S->getBody())
     return nullptr;
@@ -4905,9 +5195,13 @@ Stmt *ASTNodeImporter::VisitCXXForRangeStmt(CXXForRangeStmt *S) {
     dyn_cast_or_null<DeclStmt>(Importer.Import(S->getRangeStmt()));
   if (!ToRange && S->getRangeStmt())
     return nullptr;
-  DeclStmt *ToBeginEnd =
-    dyn_cast_or_null<DeclStmt>(Importer.Import(S->getBeginEndStmt()));
-  if (!ToBeginEnd && S->getBeginEndStmt())
+  DeclStmt *ToBegin =
+    dyn_cast_or_null<DeclStmt>(Importer.Import(S->getBeginStmt()));
+  if (!ToBegin && S->getBeginStmt())
+    return nullptr;
+  DeclStmt *ToEnd =
+    dyn_cast_or_null<DeclStmt>(Importer.Import(S->getEndStmt()));
+  if (!ToEnd && S->getEndStmt())
     return nullptr;
   Expr *ToCond = Importer.Import(S->getCond());
   if (!ToCond && S->getCond())
@@ -4926,7 +5220,7 @@ Stmt *ASTNodeImporter::VisitCXXForRangeStmt(CXXForRangeStmt *S) {
   SourceLocation ToCoawaitLoc = Importer.Import(S->getCoawaitLoc());
   SourceLocation ToColonLoc = Importer.Import(S->getColonLoc());
   SourceLocation ToRParenLoc = Importer.Import(S->getRParenLoc());
-  return new (Importer.getToContext()) CXXForRangeStmt(ToRange, ToBeginEnd,
+  return new (Importer.getToContext()) CXXForRangeStmt(ToRange, ToBegin, ToEnd,
                                                        ToCond, ToInc,
                                                        ToLoopVar, ToBody,
                                                        ToForLoc, ToCoawaitLoc,
@@ -5042,6 +5336,48 @@ Expr *ASTNodeImporter::VisitExpr(Expr *E) {
   return nullptr;
 }
 
+Expr *ASTNodeImporter::VisitVAArgExpr(VAArgExpr *E) {
+  QualType T = Importer.Import(E->getType());
+  if (T.isNull())
+    return nullptr;
+
+  Expr *SubExpr = Importer.Import(E->getSubExpr());
+  if (!SubExpr && E->getSubExpr())
+    return nullptr;
+
+  TypeSourceInfo *TInfo = Importer.Import(E->getWrittenTypeInfo());
+  if (!TInfo)
+    return nullptr;
+
+  return new (Importer.getToContext()) VAArgExpr(
+        Importer.Import(E->getBuiltinLoc()), SubExpr, TInfo,
+        Importer.Import(E->getRParenLoc()), T, E->isMicrosoftABI());
+}
+
+
+Expr *ASTNodeImporter::VisitGNUNullExpr(GNUNullExpr *E) {
+  QualType T = Importer.Import(E->getType());
+  if (T.isNull())
+    return nullptr;
+
+  return new (Importer.getToContext()) GNUNullExpr(
+        T, Importer.Import(E->getExprLoc()));
+}
+
+Expr *ASTNodeImporter::VisitPredefinedExpr(PredefinedExpr *E) {
+  QualType T = Importer.Import(E->getType());
+  if (T.isNull())
+    return nullptr;
+
+  StringLiteral *SL = cast_or_null<StringLiteral>(
+        Importer.Import(E->getFunctionName()));
+  if (!SL && E->getFunctionName())
+    return nullptr;
+
+  return new (Importer.getToContext()) PredefinedExpr(
+        Importer.Import(E->getExprLoc()), T, E->getIdentType(), SL);
+}
+
 Expr *ASTNodeImporter::VisitDeclRefExpr(DeclRefExpr *E) {
   ValueDecl *ToD = cast_or_null<ValueDecl>(Importer.Import(E->getDecl()));
   if (!ToD)
@@ -5072,6 +5408,74 @@ Expr *ASTNodeImporter::VisitDeclRefExpr(DeclRefExpr *E) {
   return DRE;
 }
 
+Expr *ASTNodeImporter::VisitImplicitValueInitExpr(ImplicitValueInitExpr *E) {
+  QualType T = Importer.Import(E->getType());
+  if (T.isNull())
+    return NULL;
+
+  return new (Importer.getToContext()) ImplicitValueInitExpr(T);
+}
+
+ASTNodeImporter::Designator
+ASTNodeImporter::ImportDesignator(const Designator &D) {
+  if (D.isFieldDesignator()) {
+    IdentifierInfo *ToFieldName = Importer.Import(D.getFieldName());
+    // Caller checks for import error
+    return Designator(ToFieldName, Importer.Import(D.getDotLoc()),
+                      Importer.Import(D.getFieldLoc()));
+  }
+  if (D.isArrayDesignator())
+    return Designator(D.getFirstExprIndex(),
+                      Importer.Import(D.getLBracketLoc()),
+                      Importer.Import(D.getRBracketLoc()));
+
+  assert(D.isArrayRangeDesignator());
+  return Designator(D.getFirstExprIndex(),
+                    Importer.Import(D.getLBracketLoc()),
+                    Importer.Import(D.getEllipsisLoc()),
+                    Importer.Import(D.getRBracketLoc()));
+}
+
+
+Expr *ASTNodeImporter::VisitDesignatedInitExpr(DesignatedInitExpr *DIE) {
+  Expr *Init = cast_or_null<Expr>(Importer.Import(DIE->getInit()));
+  if (!Init)
+    return nullptr;
+
+  SmallVector<Expr *, 4> IndexExprs(DIE->getNumSubExprs() - 1);
+  // List elements from the second, the first is Init itself
+  for (unsigned I = 1, E = DIE->getNumSubExprs(); I < E; I++) {
+    if (Expr *Arg = cast_or_null<Expr>(Importer.Import(DIE->getSubExpr(I))))
+      IndexExprs[I - 1] = Arg;
+    else
+      return nullptr;
+  }
+
+  SmallVector<Designator, 4> Designators(DIE->size());
+  llvm::transform(DIE->designators(), Designators.begin(),
+                  [this](const Designator &D) -> Designator {
+                    return ImportDesignator(D);
+                  });
+
+  for (const Designator &D : DIE->designators())
+    if (D.isFieldDesignator() && !D.getFieldName())
+      return nullptr;
+
+  return DesignatedInitExpr::Create(
+        Importer.getToContext(), Designators,
+        IndexExprs, Importer.Import(DIE->getEqualOrColonLoc()),
+        DIE->usesGNUSyntax(), Init);
+}
+
+Expr *ASTNodeImporter::VisitCXXNullPtrLiteralExpr(CXXNullPtrLiteralExpr *E) {
+  QualType T = Importer.Import(E->getType());
+  if (T.isNull())
+    return nullptr;
+
+  return new (Importer.getToContext())
+      CXXNullPtrLiteralExpr(T, Importer.Import(E->getLocation()));
+}
+
 Expr *ASTNodeImporter::VisitIntegerLiteral(IntegerLiteral *E) {
   QualType T = Importer.Import(E->getType());
   if (T.isNull())
@@ -5082,6 +5486,16 @@ Expr *ASTNodeImporter::VisitIntegerLiteral(IntegerLiteral *E) {
                                 Importer.Import(E->getLocation()));
 }
 
+Expr *ASTNodeImporter::VisitFloatingLiteral(FloatingLiteral *E) {
+  QualType T = Importer.Import(E->getType());
+  if (T.isNull())
+    return nullptr;
+
+  return FloatingLiteral::Create(Importer.getToContext(),
+                                E->getValue(), E->isExact(), T,
+                                Importer.Import(E->getLocation()));
+}
+
 Expr *ASTNodeImporter::VisitCharacterLiteral(CharacterLiteral *E) {
   QualType T = Importer.Import(E->getType());
   if (T.isNull())
@@ -5092,6 +5506,67 @@ Expr *ASTNodeImporter::VisitCharacterLiteral(CharacterLiteral *E) {
                                           Importer.Import(E->getLocation()));
 }
 
+Expr *ASTNodeImporter::VisitStringLiteral(StringLiteral *E) {
+  QualType T = Importer.Import(E->getType());
+  if (T.isNull())
+    return nullptr;
+
+  SmallVector<SourceLocation, 4> Locations(E->getNumConcatenated());
+  ImportArray(E->tokloc_begin(), E->tokloc_end(), Locations.begin());
+
+  return StringLiteral::Create(Importer.getToContext(), E->getBytes(),
+                               E->getKind(), E->isPascal(), T,
+                               Locations.data(), Locations.size());
+}
+
+Expr *ASTNodeImporter::VisitCompoundLiteralExpr(CompoundLiteralExpr *E) {
+  QualType T = Importer.Import(E->getType());
+  if (T.isNull())
+    return nullptr;
+
+  TypeSourceInfo *TInfo = Importer.Import(E->getTypeSourceInfo());
+  if (!TInfo)
+    return nullptr;
+
+  Expr *Init = Importer.Import(E->getInitializer());
+  if (!Init)
+    return nullptr;
+
+  return new (Importer.getToContext()) CompoundLiteralExpr(
+        Importer.Import(E->getLParenLoc()), TInfo, T, E->getValueKind(),
+        Init, E->isFileScope());
+}
+
+Expr *ASTNodeImporter::VisitAtomicExpr(AtomicExpr *E) {
+  QualType T = Importer.Import(E->getType());
+  if (T.isNull())
+    return nullptr;
+
+  SmallVector<Expr *, 6> Exprs(E->getNumSubExprs());
+  if (ImportArrayChecked(
+        E->getSubExprs(), E->getSubExprs() + E->getNumSubExprs(),
+        Exprs.begin()))
+    return nullptr;
+
+  return new (Importer.getToContext()) AtomicExpr(
+        Importer.Import(E->getBuiltinLoc()), Exprs, T, E->getOp(),
+        Importer.Import(E->getRParenLoc()));
+}
+
+Expr *ASTNodeImporter::VisitAddrLabelExpr(AddrLabelExpr *E) {
+  QualType T = Importer.Import(E->getType());
+  if (T.isNull())
+    return nullptr;
+
+  LabelDecl *ToLabel = cast_or_null<LabelDecl>(Importer.Import(E->getLabel()));
+  if (!ToLabel)
+    return nullptr;
+
+  return new (Importer.getToContext()) AddrLabelExpr(
+        Importer.Import(E->getAmpAmpLoc()), Importer.Import(E->getLabelLoc()),
+        ToLabel, T);
+}
+
 Expr *ASTNodeImporter::VisitParenExpr(ParenExpr *E) {
   Expr *SubExpr = Importer.Import(E->getSubExpr());
   if (!SubExpr)
@@ -5103,6 +5578,31 @@ Expr *ASTNodeImporter::VisitParenExpr(ParenExpr *E) {
                                             SubExpr);
 }
 
+Expr *ASTNodeImporter::VisitParenListExpr(ParenListExpr *E) {
+  SmallVector<Expr *, 4> Exprs(E->getNumExprs());
+  if (ImportArrayChecked(
+        E->getExprs(), E->getExprs() + E->getNumExprs(), Exprs.begin()))
+    return nullptr;
+
+  return new (Importer.getToContext()) ParenListExpr(
+        Importer.getToContext(), Importer.Import(E->getLParenLoc()),
+        Exprs, Importer.Import(E->getLParenLoc()));
+}
+
+Expr *ASTNodeImporter::VisitStmtExpr(StmtExpr *E) {
+  QualType T = Importer.Import(E->getType());
+  if (T.isNull())
+    return nullptr;
+
+  CompoundStmt *ToSubStmt = cast_or_null<CompoundStmt>(
+        Importer.Import(E->getSubStmt()));
+  if (!ToSubStmt && E->getSubStmt())
+    return nullptr;
+
+  return new (Importer.getToContext()) StmtExpr(ToSubStmt, T,
+        Importer.Import(E->getLParenLoc()), Importer.Import(E->getRParenLoc()));
+}
+
 Expr *ASTNodeImporter::VisitUnaryOperator(UnaryOperator *E) {
   QualType T = Importer.Import(E->getType());
   if (T.isNull())
@@ -5163,6 +5663,76 @@ Expr *ASTNodeImporter::VisitBinaryOperator(BinaryOperator *E) {
                                                       E->isFPContractable());
 }
 
+Expr *ASTNodeImporter::VisitConditionalOperator(ConditionalOperator *E) {
+  QualType T = Importer.Import(E->getType());
+  if (T.isNull())
+    return nullptr;
+
+  Expr *ToLHS = Importer.Import(E->getLHS());
+  if (!ToLHS)
+    return nullptr;
+
+  Expr *ToRHS = Importer.Import(E->getRHS());
+  if (!ToRHS)
+    return nullptr;
+
+  Expr *ToCond = Importer.Import(E->getCond());
+  if (!ToCond)
+    return nullptr;
+
+  return new (Importer.getToContext()) ConditionalOperator(
+        ToCond, Importer.Import(E->getQuestionLoc()),
+        ToLHS, Importer.Import(E->getColonLoc()),
+        ToRHS, T, E->getValueKind(), E->getObjectKind());
+}
+
+Expr *ASTNodeImporter::VisitBinaryConditionalOperator(
+    BinaryConditionalOperator *E) {
+  QualType T = Importer.Import(E->getType());
+  if (T.isNull())
+    return nullptr;
+
+  Expr *Common = Importer.Import(E->getCommon());
+  if (!Common)
+    return nullptr;
+
+  Expr *Cond = Importer.Import(E->getCond());
+  if (!Cond)
+    return nullptr;
+
+  OpaqueValueExpr *OpaqueValue = cast_or_null<OpaqueValueExpr>(
+        Importer.Import(E->getOpaqueValue()));
+  if (!OpaqueValue)
+    return nullptr;
+
+  Expr *TrueExpr = Importer.Import(E->getTrueExpr());
+  if (!TrueExpr)
+    return nullptr;
+
+  Expr *FalseExpr = Importer.Import(E->getFalseExpr());
+  if (!FalseExpr)
+    return nullptr;
+
+  return new (Importer.getToContext()) BinaryConditionalOperator(
+        Common, OpaqueValue, Cond, TrueExpr, FalseExpr,
+        Importer.Import(E->getQuestionLoc()), Importer.Import(E->getColonLoc()),
+        T, E->getValueKind(), E->getObjectKind());
+}
+
+Expr *ASTNodeImporter::VisitOpaqueValueExpr(OpaqueValueExpr *E) {
+  QualType T = Importer.Import(E->getType());
+  if (T.isNull())
+    return nullptr;
+
+  Expr *SourceExpr = Importer.Import(E->getSourceExpr());
+  if (!SourceExpr && E->getSourceExpr())
+    return nullptr;
+
+  return new (Importer.getToContext()) OpaqueValueExpr(
+        Importer.Import(E->getExprLoc()), T, E->getValueKind(),
+        E->getObjectKind(), SourceExpr);
+}
+
 Expr *ASTNodeImporter::VisitCompoundAssignOperator(CompoundAssignOperator *E) {
   QualType T = Importer.Import(E->getType());
   if (T.isNull())
@@ -5247,21 +5817,14 @@ Expr *ASTNodeImporter::VisitCXXConstructExpr(CXXConstructExpr *E) {
     return nullptr;
 
   CXXConstructorDecl *ToCCD =
-    dyn_cast<CXXConstructorDecl>(Importer.Import(E->getConstructor()));
-  if (!ToCCD && E->getConstructor())
+    dyn_cast_or_null<CXXConstructorDecl>(Importer.Import(E->getConstructor()));
+  if (!ToCCD)
     return nullptr;
 
-  size_t NumArgs = E->getNumArgs();
-  SmallVector<Expr *, 1> ToArgs(NumArgs);
-  ASTImporter &_Importer = Importer;
-  std::transform(E->arg_begin(), E->arg_end(), ToArgs.begin(),
-    [&_Importer](Expr *AE) -> Expr * {
-      return _Importer.Import(AE);
-    });
-  for (Expr *ToA : ToArgs) {
-    if (!ToA)
-      return nullptr;
-  }
+  SmallVector<Expr *, 6> ToArgs(E->getNumArgs());
+  if (ImportArrayChecked(E->getArgs(), E->getArgs() + E->getNumArgs(),
+                         ToArgs.begin()))
+    return nullptr;
 
   return CXXConstructExpr::Create(Importer.getToContext(), T,
                                   Importer.Import(E->getLocation()),
@@ -5274,6 +5837,44 @@ Expr *ASTNodeImporter::VisitCXXConstructExpr(CXXConstructExpr *E) {
                                   Importer.Import(E->getParenOrBraceRange()));
 }
 
+Expr *ASTNodeImporter::VisitCXXMemberCallExpr(CXXMemberCallExpr *E) {
+  QualType T = Importer.Import(E->getType());
+  if (T.isNull())
+    return nullptr;
+  
+  Expr *ToFn = Importer.Import(E->getCallee());
+  if (!ToFn)
+    return nullptr;
+  
+  SmallVector<Expr *, 4> ToArgs(E->getNumArgs());
+  
+  if (ImportArrayChecked(E->arg_begin(), E->arg_end(), ToArgs.begin()))
+    return nullptr;
+
+  return new (Importer.getToContext()) CXXMemberCallExpr(
+        Importer.getToContext(), ToFn, ToArgs, T, E->getValueKind(),
+        Importer.Import(E->getRParenLoc()));
+}
+
+Expr *ASTNodeImporter::VisitCXXThisExpr(CXXThisExpr *E) {
+  QualType T = Importer.Import(E->getType());
+  if (T.isNull())
+    return nullptr;
+  
+  return new (Importer.getToContext())
+  CXXThisExpr(Importer.Import(E->getLocation()), T, E->isImplicit());
+}
+
+Expr *ASTNodeImporter::VisitCXXBoolLiteralExpr(CXXBoolLiteralExpr *E) {
+  QualType T = Importer.Import(E->getType());
+  if (T.isNull())
+    return nullptr;
+  
+  return new (Importer.getToContext())
+  CXXBoolLiteralExpr(E->getValue(), T, Importer.Import(E->getLocation()));
+}
+
+
 Expr *ASTNodeImporter::VisitMemberExpr(MemberExpr *E) {
   QualType T = Importer.Import(E->getType());
   if (T.isNull())
@@ -5342,6 +5943,95 @@ Expr *ASTNodeImporter::VisitCallExpr(CallExpr *E) {
              Importer.Import(E->getRParenLoc()));
 }
 
+Expr *ASTNodeImporter::VisitInitListExpr(InitListExpr *ILE) {
+  QualType T = Importer.Import(ILE->getType());
+  if (T.isNull())
+    return nullptr;
+
+  llvm::SmallVector<Expr *, 4> Exprs(ILE->getNumInits());
+  if (ImportArrayChecked(
+        ILE->getInits(), ILE->getInits() + ILE->getNumInits(), Exprs.begin()))
+    return nullptr;
+
+  ASTContext &ToCtx = Importer.getToContext();
+  InitListExpr *To = new (ToCtx) InitListExpr(
+        ToCtx, Importer.Import(ILE->getLBraceLoc()),
+        Exprs, Importer.Import(ILE->getLBraceLoc()));
+  To->setType(T);
+
+  if (ILE->hasArrayFiller()) {
+    Expr *Filler = Importer.Import(ILE->getArrayFiller());
+    if (!Filler)
+      return nullptr;
+    To->setArrayFiller(Filler);
+  }
+
+  if (FieldDecl *FromFD = ILE->getInitializedFieldInUnion()) {
+    FieldDecl *ToFD = cast_or_null<FieldDecl>(Importer.Import(FromFD));
+    if (!ToFD)
+      return nullptr;
+    To->setInitializedFieldInUnion(ToFD);
+  }
+
+  if (InitListExpr *SyntForm = ILE->getSyntacticForm()) {
+    InitListExpr *ToSyntForm = cast_or_null<InitListExpr>(
+          Importer.Import(SyntForm));
+    if (!ToSyntForm)
+      return nullptr;
+    To->setSyntacticForm(ToSyntForm);
+  }
+
+  To->sawArrayRangeDesignator(ILE->hadArrayRangeDesignator());
+  To->setValueDependent(ILE->isValueDependent());
+  To->setInstantiationDependent(ILE->isInstantiationDependent());
+
+  return To;
+}
+
+Expr *ASTNodeImporter::VisitCXXDefaultInitExpr(CXXDefaultInitExpr *DIE) {
+  FieldDecl *ToField = llvm::dyn_cast_or_null<FieldDecl>(
+      Importer.Import(DIE->getField()));
+  if (!ToField && DIE->getField())
+    return nullptr;
+
+  return CXXDefaultInitExpr::Create(
+      Importer.getToContext(), Importer.Import(DIE->getLocStart()), ToField);
+}
+
+Expr *ASTNodeImporter::VisitCXXNamedCastExpr(CXXNamedCastExpr *E) {
+  QualType ToType = Importer.Import(E->getType());
+  if (ToType.isNull() && !E->getType().isNull())
+    return nullptr;
+  ExprValueKind VK = E->getValueKind();
+  CastKind CK = E->getCastKind();
+  Expr *ToOp = Importer.Import(E->getSubExpr());
+  if (!ToOp && E->getSubExpr())
+    return nullptr;
+  CXXCastPath BasePath;
+  if (ImportCastPath(E, BasePath))
+    return nullptr;
+  TypeSourceInfo *ToWritten = Importer.Import(E->getTypeInfoAsWritten());
+  SourceLocation ToOperatorLoc = Importer.Import(E->getOperatorLoc());
+  SourceLocation ToRParenLoc = Importer.Import(E->getRParenLoc());
+  SourceRange ToAngleBrackets = Importer.Import(E->getAngleBrackets());
+  
+  if (isa<CXXStaticCastExpr>(E)) {
+    return CXXStaticCastExpr::Create(
+        Importer.getToContext(), ToType, VK, CK, ToOp, &BasePath, 
+        ToWritten, ToOperatorLoc, ToRParenLoc, ToAngleBrackets);
+  } else if (isa<CXXDynamicCastExpr>(E)) {
+    return CXXDynamicCastExpr::Create(
+        Importer.getToContext(), ToType, VK, CK, ToOp, &BasePath, 
+        ToWritten, ToOperatorLoc, ToRParenLoc, ToAngleBrackets);
+  } else if (isa<CXXReinterpretCastExpr>(E)) {
+    return CXXReinterpretCastExpr::Create(
+        Importer.getToContext(), ToType, VK, CK, ToOp, &BasePath, 
+        ToWritten, ToOperatorLoc, ToRParenLoc, ToAngleBrackets);
+  } else {
+    return nullptr;
+  }
+}
+
 ASTImporter::ASTImporter(ASTContext &ToContext, FileManager &ToFileManager,
                          ASTContext &FromContext, FileManager &FromFileManager,
                          bool MinimalImport)
@@ -5747,6 +6437,72 @@ FileID ASTImporter::Import(FileID FromID) {
   return ToID;
 }
 
+CXXCtorInitializer *ASTImporter::Import(CXXCtorInitializer *From) {
+  Expr *ToExpr = Import(From->getInit());
+  if (!ToExpr && From->getInit())
+    return nullptr;
+
+  if (From->isBaseInitializer()) {
+    TypeSourceInfo *ToTInfo = Import(From->getTypeSourceInfo());
+    if (!ToTInfo && From->getTypeSourceInfo())
+      return nullptr;
+
+    return new (ToContext) CXXCtorInitializer(
+        ToContext, ToTInfo, From->isBaseVirtual(), Import(From->getLParenLoc()),
+        ToExpr, Import(From->getRParenLoc()),
+        From->isPackExpansion() ? Import(From->getEllipsisLoc())
+                                : SourceLocation());
+  } else if (From->isMemberInitializer()) {
+    FieldDecl *ToField =
+        llvm::cast_or_null<FieldDecl>(Import(From->getMember()));
+    if (!ToField && From->getMember())
+      return nullptr;
+
+    return new (ToContext) CXXCtorInitializer(
+        ToContext, ToField, Import(From->getMemberLocation()),
+        Import(From->getLParenLoc()), ToExpr, Import(From->getRParenLoc()));
+  } else if (From->isIndirectMemberInitializer()) {
+    IndirectFieldDecl *ToIField = llvm::cast_or_null<IndirectFieldDecl>(
+        Import(From->getIndirectMember()));
+    if (!ToIField && From->getIndirectMember())
+      return nullptr;
+
+    return new (ToContext) CXXCtorInitializer(
+        ToContext, ToIField, Import(From->getMemberLocation()),
+        Import(From->getLParenLoc()), ToExpr, Import(From->getRParenLoc()));
+  } else if (From->isDelegatingInitializer()) {
+    TypeSourceInfo *ToTInfo = Import(From->getTypeSourceInfo());
+    if (!ToTInfo && From->getTypeSourceInfo())
+      return nullptr;
+
+    return new (ToContext)
+        CXXCtorInitializer(ToContext, ToTInfo, Import(From->getLParenLoc()),
+                           ToExpr, Import(From->getRParenLoc()));
+  } else if (unsigned NumArrayIndices = From->getNumArrayIndices()) {
+    FieldDecl *ToField =
+        llvm::cast_or_null<FieldDecl>(Import(From->getMember()));
+    if (!ToField && From->getMember())
+      return nullptr;
+
+    SmallVector<VarDecl *, 4> ToAIs(NumArrayIndices);
+
+    for (unsigned AII = 0; AII < NumArrayIndices; ++AII) {
+      VarDecl *ToArrayIndex =
+          dyn_cast_or_null<VarDecl>(Import(From->getArrayIndex(AII)));
+      if (!ToArrayIndex && From->getArrayIndex(AII))
+        return nullptr;
+    }
+
+    return CXXCtorInitializer::Create(
+        ToContext, ToField, Import(From->getMemberLocation()),
+        Import(From->getLParenLoc()), ToExpr, Import(From->getRParenLoc()),
+        ToAIs.data(), NumArrayIndices);
+  } else {
+    return nullptr;
+  }
+}
+
+
 void ASTImporter::ImportDefinition(Decl *From) {
   Decl *To = Import(From);
   if (!To)
@@ -5851,7 +6607,12 @@ IdentifierInfo *ASTImporter::Import(const IdentifierInfo *FromId) {
   if (!FromId)
     return nullptr;
 
-  return &ToContext.Idents.get(FromId->getName());
+  IdentifierInfo *ToId = &ToContext.Idents.get(FromId->getName());
+
+  if (!ToId->getBuiltinID() && FromId->getBuiltinID())
+    ToId->setBuiltinID(FromId->getBuiltinID());
+
+  return ToId;
 }
 
 Selector ASTImporter::Import(Selector FromSel) {
@@ -5910,6 +6671,16 @@ void ASTImporter::CompleteDecl (Decl *D) {
 }
 
 Decl *ASTImporter::Imported(Decl *From, Decl *To) {
+  if (From->hasAttrs()) {
+    for (Attr *FromAttr : From->getAttrs())
+      To->addAttr(FromAttr->clone(To->getASTContext()));
+  }
+  if (From->isUsed()) {
+    To->setIsUsed();
+  }
+  if (From->isImplicit()) {
+    To->setImplicit();
+  }
   ImportedDecls[From] = To;
   return To;
 }
diff --git a/contrib/llvm/tools/clang/lib/AST/ASTTypeTraits.cpp b/contrib/llvm/tools/clang/lib/AST/ASTTypeTraits.cpp
index ec0671ceb1b5..2336c98fe049 100644
--- a/contrib/llvm/tools/clang/lib/AST/ASTTypeTraits.cpp
+++ b/contrib/llvm/tools/clang/lib/AST/ASTTypeTraits.cpp
@@ -22,12 +22,12 @@ namespace ast_type_traits {
 
 const ASTNodeKind::KindInfo ASTNodeKind::AllKindInfo[] = {
   { NKI_None, "<None>" },
-  { NKI_None, "CXXCtorInitializer" },
   { NKI_None, "TemplateArgument" },
-  { NKI_None, "NestedNameSpecifier" },
   { NKI_None, "NestedNameSpecifierLoc" },
   { NKI_None, "QualType" },
   { NKI_None, "TypeLoc" },
+  { NKI_None, "CXXCtorInitializer" },
+  { NKI_None, "NestedNameSpecifier" },
   { NKI_None, "Decl" },
 #define DECL(DERIVED, BASE) { NKI_##BASE, #DERIVED "Decl" },
 #include "clang/AST/DeclNodes.inc"
@@ -43,10 +43,6 @@ bool ASTNodeKind::isBaseOf(ASTNodeKind Other, unsigned *Distance) const {
   return isBaseOf(KindId, Other.KindId, Distance);
 }
 
-bool ASTNodeKind::isSame(ASTNodeKind Other) const {
-  return KindId != NKI_None && KindId == Other.KindId;
-}
-
 bool ASTNodeKind::isBaseOf(NodeKindId Base, NodeKindId Derived,
                            unsigned *Distance) {
   if (Base == NKI_None || Derived == NKI_None) return false;
diff --git a/contrib/llvm/tools/clang/lib/AST/CXXInheritance.cpp b/contrib/llvm/tools/clang/lib/AST/CXXInheritance.cpp
index 6785a0c2935a..6a6ca76a0165 100644
--- a/contrib/llvm/tools/clang/lib/AST/CXXInheritance.cpp
+++ b/contrib/llvm/tools/clang/lib/AST/CXXInheritance.cpp
@@ -405,6 +405,21 @@ bool CXXRecordDecl::FindOrdinaryMember(const CXXBaseSpecifier *Specifier,
   return false;
 }
 
+bool CXXRecordDecl::FindOMPReductionMember(const CXXBaseSpecifier *Specifier,
+                                           CXXBasePath &Path,
+                                           DeclarationName Name) {
+  RecordDecl *BaseRecord =
+      Specifier->getType()->castAs<RecordType>()->getDecl();
+
+  for (Path.Decls = BaseRecord->lookup(Name); !Path.Decls.empty();
+       Path.Decls = Path.Decls.slice(1)) {
+    if (Path.Decls.front()->isInIdentifierNamespace(IDNS_OMPReduction))
+      return true;
+  }
+
+  return false;
+}
+
 bool CXXRecordDecl::
 FindNestedNameSpecifierMember(const CXXBaseSpecifier *Specifier, 
                               CXXBasePath &Path,
diff --git a/contrib/llvm/tools/clang/lib/AST/Comment.cpp b/contrib/llvm/tools/clang/lib/AST/Comment.cpp
index d05c5de543ff..893bdc5c17bf 100644
--- a/contrib/llvm/tools/clang/lib/AST/Comment.cpp
+++ b/contrib/llvm/tools/clang/lib/AST/Comment.cpp
@@ -157,7 +157,7 @@ void DeclInfo::fill() {
   case Decl::CXXConversion: {
     const FunctionDecl *FD = cast<FunctionDecl>(CommentDecl);
     Kind = FunctionKind;
-    ParamVars = llvm::makeArrayRef(FD->param_begin(), FD->getNumParams());
+    ParamVars = FD->parameters();
     ReturnType = FD->getReturnType();
     unsigned NumLists = FD->getNumTemplateParameterLists();
     if (NumLists != 0) {
@@ -177,7 +177,7 @@ void DeclInfo::fill() {
   case Decl::ObjCMethod: {
     const ObjCMethodDecl *MD = cast<ObjCMethodDecl>(CommentDecl);
     Kind = FunctionKind;
-    ParamVars = llvm::makeArrayRef(MD->param_begin(), MD->param_size());
+    ParamVars = MD->parameters();
     ReturnType = MD->getReturnType();
     IsObjCMethod = true;
     IsInstanceMethod = MD->isInstanceMethod();
@@ -189,7 +189,7 @@ void DeclInfo::fill() {
     Kind = FunctionKind;
     TemplateKind = Template;
     const FunctionDecl *FD = FTD->getTemplatedDecl();
-    ParamVars = llvm::makeArrayRef(FD->param_begin(), FD->getNumParams());
+    ParamVars = FD->parameters();
     ReturnType = FD->getReturnType();
     TemplateParameters = FTD->getTemplateParameters();
     break;
diff --git a/contrib/llvm/tools/clang/lib/AST/CommentLexer.cpp b/contrib/llvm/tools/clang/lib/AST/CommentLexer.cpp
index 98b7e367950c..57bfef08df6e 100644
--- a/contrib/llvm/tools/clang/lib/AST/CommentLexer.cpp
+++ b/contrib/llvm/tools/clang/lib/AST/CommentLexer.cpp
@@ -1,3 +1,12 @@
+//===--- CommentLexer.cpp -------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
 #include "clang/AST/CommentLexer.h"
 #include "clang/AST/CommentCommandTraits.h"
 #include "clang/AST/CommentDiagnostic.h"
@@ -44,7 +53,7 @@ namespace {
 #include "clang/AST/CommentHTMLTags.inc"
 #include "clang/AST/CommentHTMLNamedCharacterReferences.inc"
 
-} // unnamed namespace
+} // end anonymous namespace
 
 StringRef Lexer::resolveHTMLNamedCharacterReference(StringRef Name) const {
   // Fast path, first check a few most widely used named character references.
@@ -266,7 +275,7 @@ const char *findCCommentEnd(const char *BufferPtr, const char *BufferEnd) {
   llvm_unreachable("buffer end hit before '*/' was seen");
 }
     
-} // unnamed namespace
+} // end anonymous namespace
 
 void Lexer::formTokenWithChars(Token &Result, const char *TokEnd,
                                tok::TokenKind Kind) {
@@ -411,7 +420,6 @@ void Lexer::lexCommentText(Token &T) {
           setupAndLexHTMLEndTag(T);
         else
           formTextToken(T, TokenPtr);
-
         return;
       }
 
@@ -604,7 +612,6 @@ void Lexer::lexHTMLCharacterReference(Token &T) {
   }
   formTokenWithChars(T, TokenPtr, tok::text);
   T.setText(Resolved);
-  return;
 }
 
 void Lexer::setupAndLexHTMLStartTag(Token &T) {
@@ -848,4 +855,3 @@ StringRef Lexer::getSpelling(const Token &Tok,
 
 } // end namespace comments
 } // end namespace clang
-
diff --git a/contrib/llvm/tools/clang/lib/AST/CommentSema.cpp b/contrib/llvm/tools/clang/lib/AST/CommentSema.cpp
index 12823c37dfc6..f5f4f70dcbbf 100644
--- a/contrib/llvm/tools/clang/lib/AST/CommentSema.cpp
+++ b/contrib/llvm/tools/clang/lib/AST/CommentSema.cpp
@@ -23,7 +23,7 @@ namespace comments {
 
 namespace {
 #include "clang/AST/CommentHTMLTagsProperties.inc"
-} // unnamed namespace
+} // end anonymous namespace
 
 Sema::Sema(llvm::BumpPtrAllocator &Allocator, const SourceManager &SourceMgr,
            DiagnosticsEngine &Diags, CommandTraits &Traits,
@@ -353,8 +353,6 @@ void Sema::actOnTParamCommandParamNameArg(TParamCommandComment *Command,
       << CorrectedName
       << FixItHint::CreateReplacement(ArgRange, CorrectedName);
   }
-
-  return;
 }
 
 void Sema::actOnTParamCommandFinish(TParamCommandComment *Command,
@@ -1002,7 +1000,7 @@ void SimpleTypoCorrector::addDecl(const NamedDecl *ND) {
     BestIndex = CurrIndex;
   }
 }
-} // unnamed namespace
+} // end anonymous namespace
 
 unsigned Sema::correctTypoInParmVarReference(
                                     StringRef Typo,
@@ -1040,7 +1038,7 @@ bool ResolveTParamReferenceHelper(
   }
   return false;
 }
-} // unnamed namespace
+} // end anonymous namespace
 
 bool Sema::resolveTParamReference(
                             StringRef Name,
@@ -1067,7 +1065,7 @@ void CorrectTypoInTParamReferenceHelper(
                                          Corrector);
   }
 }
-} // unnamed namespace
+} // end anonymous namespace
 
 StringRef Sema::correctTypoInTParamReference(
                             StringRef Typo,
@@ -1095,4 +1093,3 @@ Sema::getInlineCommandRenderKind(StringRef Name) const {
 
 } // end namespace comments
 } // end namespace clang
-
diff --git a/contrib/llvm/tools/clang/lib/AST/Decl.cpp b/contrib/llvm/tools/clang/lib/AST/Decl.cpp
index 427ca5efcd69..d1e8d25ea044 100644
--- a/contrib/llvm/tools/clang/lib/AST/Decl.cpp
+++ b/contrib/llvm/tools/clang/lib/AST/Decl.cpp
@@ -18,6 +18,7 @@
 #include "clang/AST/Attr.h"
 #include "clang/AST/DeclCXX.h"
 #include "clang/AST/DeclObjC.h"
+#include "clang/AST/DeclOpenMP.h"
 #include "clang/AST/DeclTemplate.h"
 #include "clang/AST/Expr.h"
 #include "clang/AST/ExprCXX.h"
@@ -221,7 +222,7 @@ static Optional<Visibility> getVisibilityOf(const NamedDecl *D,
   // implies visibility(default).
   if (D->getASTContext().getTargetInfo().getTriple().isOSDarwin()) {
     for (const auto *A : D->specific_attrs<AvailabilityAttr>())
-      if (A->getPlatform()->getName().equals("macosx"))
+      if (A->getPlatform()->getName().equals("macos"))
         return DefaultVisibility;
   }
 
@@ -591,12 +592,14 @@ static LinkageInfo getLVForNamespaceScopeDecl(const NamedDecl *D,
     if (Var->getStorageClass() == SC_Static)
       return LinkageInfo::internal();
 
-    // - a non-volatile object or reference that is explicitly declared const
-    //   or constexpr and neither explicitly declared extern nor previously
-    //   declared to have external linkage; or (there is no equivalent in C99)
+    // - a non-inline, non-volatile object or reference that is explicitly
+    //   declared const or constexpr and neither explicitly declared extern
+    //   nor previously declared to have external linkage; or (there is no
+    //   equivalent in C99)
     if (Context.getLangOpts().CPlusPlus &&
         Var->getType().isConstQualified() && 
-        !Var->getType().isVolatileQualified()) {
+        !Var->getType().isVolatileQualified() &&
+        !Var->isInline()) {
       const VarDecl *PrevVar = Var->getPreviousDecl();
       if (PrevVar)
         return getLVForDecl(PrevVar, computation);
@@ -1421,16 +1424,13 @@ void NamedDecl::printQualifiedName(raw_ostream &OS,
     Ctx = Ctx->getParent();
   }
 
-  for (ContextsTy::reverse_iterator I = Contexts.rbegin(), E = Contexts.rend();
-       I != E; ++I) {
-    if (const auto *Spec = dyn_cast<ClassTemplateSpecializationDecl>(*I)) {
+  for (const DeclContext *DC : reverse(Contexts)) {
+    if (const auto *Spec = dyn_cast<ClassTemplateSpecializationDecl>(DC)) {
       OS << Spec->getName();
       const TemplateArgumentList &TemplateArgs = Spec->getTemplateArgs();
-      TemplateSpecializationType::PrintTemplateArgumentList(OS,
-                                                            TemplateArgs.data(),
-                                                            TemplateArgs.size(),
-                                                            P);
-    } else if (const auto *ND = dyn_cast<NamespaceDecl>(*I)) {
+      TemplateSpecializationType::PrintTemplateArgumentList(
+          OS, TemplateArgs.asArray(), P);
+    } else if (const auto *ND = dyn_cast<NamespaceDecl>(DC)) {
       if (P.SuppressUnwrittenScope &&
           (ND->isAnonymousNamespace() || ND->isInline()))
         continue;
@@ -1440,12 +1440,12 @@ void NamedDecl::printQualifiedName(raw_ostream &OS,
       }
       else
         OS << *ND;
-    } else if (const auto *RD = dyn_cast<RecordDecl>(*I)) {
+    } else if (const auto *RD = dyn_cast<RecordDecl>(DC)) {
       if (!RD->getIdentifier())
         OS << "(anonymous " << RD->getKindName() << ')';
       else
         OS << *RD;
-    } else if (const auto *FD = dyn_cast<FunctionDecl>(*I)) {
+    } else if (const auto *FD = dyn_cast<FunctionDecl>(DC)) {
       const FunctionProtoType *FT = nullptr;
       if (FD->hasWrittenPrototype())
         FT = dyn_cast<FunctionProtoType>(FD->getType()->castAs<FunctionType>());
@@ -1466,7 +1466,7 @@ void NamedDecl::printQualifiedName(raw_ostream &OS,
         }
       }
       OS << ')';
-    } else if (const auto *ED = dyn_cast<EnumDecl>(*I)) {
+    } else if (const auto *ED = dyn_cast<EnumDecl>(DC)) {
       // C++ [dcl.enum]p10: Each enum-name and each unscoped
       // enumerator is declared in the scope that immediately contains
       // the enum-specifier. Each scoped enumerator is declared in the
@@ -1476,7 +1476,7 @@ void NamedDecl::printQualifiedName(raw_ostream &OS,
       else
         continue;
     } else {
-      OS << *cast<NamedDecl>(*I);
+      OS << *cast<NamedDecl>(DC);
     }
     OS << "::";
   }
@@ -1912,7 +1912,9 @@ VarDecl::isThisDeclarationADefinition(ASTContext &C) const {
   // C++ [basic.def]p2:
   //   A declaration is a definition unless [...] it contains the 'extern'
   //   specifier or a linkage-specification and neither an initializer [...],
-  //   it declares a static data member in a class declaration [...].
+  //   it declares a non-inline static data member in a class declaration [...],
+  //   it declares a static data member outside a class definition and the variable
+  //   was defined within the class with the constexpr specifier [...],
   // C++1y [temp.expl.spec]p15:
   //   An explicit specialization of a static data member or an explicit
   //   specialization of a static data member template is a definition if the
@@ -1922,6 +1924,8 @@ VarDecl::isThisDeclarationADefinition(ASTContext &C) const {
   // a static data member template outside the containing class?
   if (isStaticDataMember()) {
     if (isOutOfLine() &&
+        !(getCanonicalDecl()->isInline() &&
+          getCanonicalDecl()->isConstexpr()) &&
         (hasInit() ||
          // If the first declaration is out-of-line, this may be an
          // instantiation of an out-of-line partial specialization of a variable
@@ -1932,6 +1936,8 @@ VarDecl::isThisDeclarationADefinition(ASTContext &C) const {
                     TSK_ExplicitSpecialization) ||
          isa<VarTemplatePartialSpecializationDecl>(this)))
       return Definition;
+    else if (!isOutOfLine() && isInline())
+      return Definition;
     else
       return DeclarationOnly;
   }
@@ -1945,7 +1951,7 @@ VarDecl::isThisDeclarationADefinition(ASTContext &C) const {
   if (hasInit())
     return Definition;
 
-  if (hasAttr<AliasAttr>())
+  if (hasDefiningAttr())
     return Definition;
 
   if (const auto *SAA = getAttr<SelectAnyAttr>())
@@ -2072,18 +2078,6 @@ bool VarDecl::isOutOfLine() const {
   return false;
 }
 
-VarDecl *VarDecl::getOutOfLineDefinition() {
-  if (!isStaticDataMember())
-    return nullptr;
-
-  for (auto RD : redecls()) {
-    if (RD->getLexicalDeclContext()->isFileContext())
-      return RD;
-  }
-
-  return nullptr;
-}
-
 void VarDecl::setInit(Expr *I) {
   if (auto *Eval = Init.dyn_cast<EvaluatedStmt *>()) {
     Eval->~EvaluatedStmt();
@@ -2448,7 +2442,7 @@ void FunctionDecl::getNameForDiagnostic(
   const TemplateArgumentList *TemplateArgs = getTemplateSpecializationArgs();
   if (TemplateArgs)
     TemplateSpecializationType::PrintTemplateArgumentList(
-        OS, TemplateArgs->data(), TemplateArgs->size(), Policy);
+        OS, TemplateArgs->asArray(), Policy);
 }
 
 bool FunctionDecl::isVariadic() const {
@@ -2485,7 +2479,7 @@ bool FunctionDecl::hasTrivialBody() const
 bool FunctionDecl::isDefined(const FunctionDecl *&Definition) const {
   for (auto I : redecls()) {
     if (I->IsDeleted || I->IsDefaulted || I->Body || I->IsLateTemplateParsed ||
-        I->hasAttr<AliasAttr>()) {
+        I->hasDefiningAttr()) {
       Definition = I->IsDeleted ? I->getCanonicalDecl() : I;
       return true;
     }
@@ -2708,8 +2702,7 @@ unsigned FunctionDecl::getBuiltinID() const {
     // declaration, for instance "extern "C" { namespace std { decl } }".
     if (!LinkageDecl) {
       if (BuiltinID == Builtin::BI__GetExceptionInfo &&
-          Context.getTargetInfo().getCXXABI().isMicrosoft() &&
-          isInStdNamespace())
+          Context.getTargetInfo().getCXXABI().isMicrosoft())
         return Builtin::BI__GetExceptionInfo;
       return 0;
     }
@@ -2733,6 +2726,12 @@ unsigned FunctionDecl::getBuiltinID() const {
   if (getStorageClass() == SC_Static)
     return 0;
 
+  // OpenCL v1.2 s6.9.f - The library functions defined in
+  // the C99 standard headers are not available.
+  if (Context.getLangOpts().OpenCL &&
+      Context.BuiltinInfo.isPredefinedLibFunction(BuiltinID))
+    return 0;
+
   return BuiltinID;
 }
 
@@ -2788,7 +2787,7 @@ unsigned FunctionDecl::getMinRequiredArguments() const {
     return getNumParams();
 
   unsigned NumRequiredArgs = 0;
-  for (auto *Param : params())
+  for (auto *Param : parameters())
     if (!Param->isParameterPack() && !Param->hasDefaultArg())
       ++NumRequiredArgs;
   return NumRequiredArgs;
@@ -2929,16 +2928,22 @@ SourceRange FunctionDecl::getReturnTypeSourceRange() const {
   return RTRange;
 }
 
-bool FunctionDecl::hasUnusedResultAttr() const {
+const Attr *FunctionDecl::getUnusedResultAttr() const {
   QualType RetType = getReturnType();
   if (RetType->isRecordType()) {
     const CXXRecordDecl *Ret = RetType->getAsCXXRecordDecl();
     const auto *MD = dyn_cast<CXXMethodDecl>(this);
-    if (Ret && Ret->hasAttr<WarnUnusedResultAttr>() &&
-        !(MD && MD->getCorrespondingMethodInClass(Ret, true)))
-      return true;
+    if (Ret && !(MD && MD->getCorrespondingMethodInClass(Ret, true))) {
+      if (const auto *R = Ret->getAttr<WarnUnusedResultAttr>())
+        return R;
+    }
+  } else if (const auto *ET = RetType->getAs<EnumType>()) {
+    if (const EnumDecl *ED = ET->getDecl()) {
+      if (const auto *R = ED->getAttr<WarnUnusedResultAttr>())
+        return R;
+    }
   }
-  return hasAttr<WarnUnusedResultAttr>();
+  return getAttr<WarnUnusedResultAttr>();
 }
 
 /// \brief For an inline function definition in C, or for a gnu_inline function
@@ -3520,6 +3525,7 @@ SourceLocation TagDecl::getOuterLocStart() const {
 }
 
 SourceRange TagDecl::getSourceRange() const {
+  SourceLocation RBraceLoc = BraceRange.getEnd();
   SourceLocation E = RBraceLoc.isValid() ? RBraceLoc : getLocation();
   return SourceRange(getOuterLocStart(), E);
 }
@@ -3675,6 +3681,21 @@ void EnumDecl::setTemplateSpecializationKind(TemplateSpecializationKind TSK,
     MSI->setPointOfInstantiation(PointOfInstantiation);
 }
 
+EnumDecl *EnumDecl::getTemplateInstantiationPattern() const {
+  if (MemberSpecializationInfo *MSInfo = getMemberSpecializationInfo()) {
+    if (isTemplateInstantiation(MSInfo->getTemplateSpecializationKind())) {
+      EnumDecl *ED = getInstantiatedFromMemberEnum();
+      while (auto *NewED = ED->getInstantiatedFromMemberEnum())
+        ED = NewED;
+      return ED;
+    }
+  }
+
+  assert(!isTemplateInstantiation(getTemplateSpecializationKind()) &&
+         "couldn't find pattern for enum instantiation");
+  return nullptr;
+}
+
 EnumDecl *EnumDecl::getInstantiatedFromMemberEnum() const {
   if (SpecializationInfo)
     return cast<EnumDecl>(SpecializationInfo->getInstantiatedFrom());
@@ -3900,6 +3921,53 @@ TranslationUnitDecl *TranslationUnitDecl::Create(ASTContext &C) {
   return new (C, (DeclContext *)nullptr) TranslationUnitDecl(C);
 }
 
+void PragmaCommentDecl::anchor() { }
+
+PragmaCommentDecl *PragmaCommentDecl::Create(const ASTContext &C,
+                                             TranslationUnitDecl *DC,
+                                             SourceLocation CommentLoc,
+                                             PragmaMSCommentKind CommentKind,
+                                             StringRef Arg) {
+  PragmaCommentDecl *PCD =
+      new (C, DC, additionalSizeToAlloc<char>(Arg.size() + 1))
+          PragmaCommentDecl(DC, CommentLoc, CommentKind);
+  memcpy(PCD->getTrailingObjects<char>(), Arg.data(), Arg.size());
+  PCD->getTrailingObjects<char>()[Arg.size()] = '\0';
+  return PCD;
+}
+
+PragmaCommentDecl *PragmaCommentDecl::CreateDeserialized(ASTContext &C,
+                                                         unsigned ID,
+                                                         unsigned ArgSize) {
+  return new (C, ID, additionalSizeToAlloc<char>(ArgSize + 1))
+      PragmaCommentDecl(nullptr, SourceLocation(), PCK_Unknown);
+}
+
+void PragmaDetectMismatchDecl::anchor() { }
+
+PragmaDetectMismatchDecl *
+PragmaDetectMismatchDecl::Create(const ASTContext &C, TranslationUnitDecl *DC,
+                                 SourceLocation Loc, StringRef Name,
+                                 StringRef Value) {
+  size_t ValueStart = Name.size() + 1;
+  PragmaDetectMismatchDecl *PDMD =
+      new (C, DC, additionalSizeToAlloc<char>(ValueStart + Value.size() + 1))
+          PragmaDetectMismatchDecl(DC, Loc, ValueStart);
+  memcpy(PDMD->getTrailingObjects<char>(), Name.data(), Name.size());
+  PDMD->getTrailingObjects<char>()[Name.size()] = '\0';
+  memcpy(PDMD->getTrailingObjects<char>() + ValueStart, Value.data(),
+         Value.size());
+  PDMD->getTrailingObjects<char>()[ValueStart + Value.size()] = '\0';
+  return PDMD;
+}
+
+PragmaDetectMismatchDecl *
+PragmaDetectMismatchDecl::CreateDeserialized(ASTContext &C, unsigned ID,
+                                             unsigned NameValueSize) {
+  return new (C, ID, additionalSizeToAlloc<char>(NameValueSize + 1))
+      PragmaDetectMismatchDecl(nullptr, SourceLocation(), 0);
+}
+
 void ExternCContextDecl::anchor() { }
 
 ExternCContextDecl *ExternCContextDecl::Create(const ASTContext &C,
@@ -4026,8 +4094,10 @@ void IndirectFieldDecl::anchor() { }
 
 IndirectFieldDecl::IndirectFieldDecl(ASTContext &C, DeclContext *DC,
                                      SourceLocation L, DeclarationName N,
-                                     QualType T, NamedDecl **CH, unsigned CHS)
-    : ValueDecl(IndirectField, DC, L, N, T), Chaining(CH), ChainingSize(CHS) {
+                                     QualType T,
+                                     MutableArrayRef<NamedDecl *> CH)
+    : ValueDecl(IndirectField, DC, L, N, T), Chaining(CH.data()),
+      ChainingSize(CH.size()) {
   // In C++, indirect field declarations conflict with tag declarations in the
   // same scope, so add them to IDNS_Tag so that tag redeclaration finds them.
   if (C.getLangOpts().CPlusPlus)
@@ -4036,16 +4106,15 @@ IndirectFieldDecl::IndirectFieldDecl(ASTContext &C, DeclContext *DC,
 
 IndirectFieldDecl *
 IndirectFieldDecl::Create(ASTContext &C, DeclContext *DC, SourceLocation L,
-                          IdentifierInfo *Id, QualType T, NamedDecl **CH,
-                          unsigned CHS) {
-  return new (C, DC) IndirectFieldDecl(C, DC, L, Id, T, CH, CHS);
+                          IdentifierInfo *Id, QualType T,
+                          llvm::MutableArrayRef<NamedDecl *> CH) {
+  return new (C, DC) IndirectFieldDecl(C, DC, L, Id, T, CH);
 }
 
 IndirectFieldDecl *IndirectFieldDecl::CreateDeserialized(ASTContext &C,
                                                          unsigned ID) {
   return new (C, ID) IndirectFieldDecl(C, nullptr, SourceLocation(),
-                                       DeclarationName(), QualType(), nullptr,
-                                       0);
+                                       DeclarationName(), QualType(), None);
 }
 
 SourceRange EnumConstantDecl::getSourceRange() const {
diff --git a/contrib/llvm/tools/clang/lib/AST/DeclBase.cpp b/contrib/llvm/tools/clang/lib/AST/DeclBase.cpp
index 72587e388e47..bfb7d02b2955 100644
--- a/contrib/llvm/tools/clang/lib/AST/DeclBase.cpp
+++ b/contrib/llvm/tools/clang/lib/AST/DeclBase.cpp
@@ -46,7 +46,7 @@ void Decl::updateOutOfDate(IdentifierInfo &II) const {
 }
 
 #define DECL(DERIVED, BASE)                                                    \
-  static_assert(Decl::DeclObjAlignment >=                                      \
+  static_assert(llvm::AlignOf<Decl>::Alignment >=                              \
                     llvm::AlignOf<DERIVED##Decl>::Alignment,                   \
                 "Alignment sufficient after objects prepended to " #DERIVED);
 #define ABSTRACT_DECL(DECL)
@@ -56,7 +56,7 @@ void *Decl::operator new(std::size_t Size, const ASTContext &Context,
                          unsigned ID, std::size_t Extra) {
   // Allocate an extra 8 bytes worth of storage, which ensures that the
   // resulting pointer will still be 8-byte aligned.
-  static_assert(sizeof(unsigned) * 2 >= DeclObjAlignment,
+  static_assert(sizeof(unsigned) * 2 >= llvm::AlignOf<Decl>::Alignment,
                 "Decl won't be misaligned");
   void *Start = Context.Allocate(Size + Extra + 8);
   void *Result = (char*)Start + 8;
@@ -81,7 +81,8 @@ void *Decl::operator new(std::size_t Size, const ASTContext &Ctx,
     // Ensure required alignment of the resulting object by adding extra
     // padding at the start if required.
     size_t ExtraAlign =
-        llvm::OffsetToAlignment(sizeof(Module *), DeclObjAlignment);
+        llvm::OffsetToAlignment(sizeof(Module *),
+                                llvm::AlignOf<Decl>::Alignment);
     char *Buffer = reinterpret_cast<char *>(
         ::operator new(ExtraAlign + sizeof(Module *) + Size + Extra, Ctx));
     Buffer += ExtraAlign;
@@ -196,6 +197,17 @@ bool Decl::isTemplateDecl() const {
   return isa<TemplateDecl>(this);
 }
 
+TemplateDecl *Decl::getDescribedTemplate() const {
+  if (auto *FD = dyn_cast<FunctionDecl>(this))
+    return FD->getDescribedFunctionTemplate();
+  else if (auto *RD = dyn_cast<CXXRecordDecl>(this))
+    return RD->getDescribedClassTemplate();
+  else if (auto *VD = dyn_cast<VarDecl>(this))
+    return VD->getDescribedVarTemplate();
+
+  return nullptr;
+}
+
 const DeclContext *Decl::getParentFunctionOrMethod() const {
   for (const DeclContext *DC = getDeclContext();
        DC && !DC->isTranslationUnit() && !DC->isNamespace(); 
@@ -329,25 +341,29 @@ unsigned Decl::getMaxAlignment() const {
   return Align;
 }
 
-bool Decl::isUsed(bool CheckUsedAttr) const { 
-  if (Used)
+bool Decl::isUsed(bool CheckUsedAttr) const {
+  const Decl *CanonD = getCanonicalDecl();
+  if (CanonD->Used)
     return true;
-  
+
   // Check for used attribute.
-  if (CheckUsedAttr && hasAttr<UsedAttr>())
+  // Ask the most recent decl, since attributes accumulate in the redecl chain.
+  if (CheckUsedAttr && getMostRecentDecl()->hasAttr<UsedAttr>())
     return true;
 
-  return false; 
+  // The information may have not been deserialized yet. Force deserialization
+  // to complete the needed information.
+  return getMostRecentDecl()->getCanonicalDecl()->Used;
 }
 
 void Decl::markUsed(ASTContext &C) {
-  if (Used)
+  if (isUsed(false))
     return;
 
   if (C.getASTMutationListener())
     C.getASTMutationListener()->DeclarationMarkedUsed(this);
 
-  Used = true;
+  setIsUsed();
 }
 
 bool Decl::isReferenced() const { 
@@ -362,6 +378,18 @@ bool Decl::isReferenced() const {
   return false; 
 }
 
+bool Decl::hasDefiningAttr() const {
+  return hasAttr<AliasAttr>() || hasAttr<IFuncAttr>();
+}
+
+const Attr *Decl::getDefiningAttr() const {
+  if (AliasAttr *AA = getAttr<AliasAttr>())
+    return AA;
+  if (IFuncAttr *IFA = getAttr<IFuncAttr>())
+    return IFA;
+  return nullptr;
+}
+
 /// \brief Determine the availability of the given declaration based on
 /// the target platform.
 ///
@@ -432,7 +460,7 @@ static AvailabilityResult CheckAvailability(ASTContext &Context,
           << VTI << HintMessage;
     }
 
-    return AR_NotYetIntroduced;
+    return A->getStrict() ? AR_Unavailable : AR_NotYetIntroduced;
   }
 
   // Make sure that this declaration hasn't been obsoleted.
@@ -467,6 +495,9 @@ static AvailabilityResult CheckAvailability(ASTContext &Context,
 }
 
 AvailabilityResult Decl::getAvailability(std::string *Message) const {
+  if (auto *FTD = dyn_cast<FunctionTemplateDecl>(this))
+    return FTD->getTemplatedDecl()->getAvailability(Message);
+
   AvailabilityResult Result = AR_Available;
   std::string ResultMessage;
 
@@ -563,6 +594,7 @@ unsigned Decl::getIdentifierNamespaceForKind(Kind DeclKind) {
     case Function:
     case CXXMethod:
     case CXXConstructor:
+    case ConstructorUsingShadow:
     case CXXDestructor:
     case CXXConversion:
     case EnumConstant:
@@ -630,6 +662,9 @@ unsigned Decl::getIdentifierNamespaceForKind(Kind DeclKind) {
     case TemplateTemplateParm:
       return IDNS_Ordinary | IDNS_Tag | IDNS_Type;
 
+    case OMPDeclareReduction:
+      return IDNS_OMPReduction;
+
     // Never have names.
     case Friend:
     case FriendTemplate:
@@ -638,6 +673,8 @@ unsigned Decl::getIdentifierNamespaceForKind(Kind DeclKind) {
     case FileScopeAsm:
     case StaticAssert:
     case ObjCPropertyImpl:
+    case PragmaComment:
+    case PragmaDetectMismatch:
     case Block:
     case Captured:
     case TranslationUnit:
@@ -655,6 +692,7 @@ unsigned Decl::getIdentifierNamespaceForKind(Kind DeclKind) {
     case ObjCCategoryImpl:
     case Import:
     case OMPThreadPrivate:
+    case OMPCapturedExpr:
     case Empty:
       // Never looked up by name.
       return 0;
@@ -957,6 +995,7 @@ DeclContext *DeclContext::getPrimaryContext() {
   case Decl::LinkageSpec:
   case Decl::Block:
   case Decl::Captured:
+  case Decl::OMPDeclareReduction:
     // There is only one DeclContext for these entities.
     return this;
 
@@ -1549,9 +1588,12 @@ void DeclContext::makeDeclVisibleInContextWithFlags(NamedDecl *D, bool Internal,
                                                     bool Recoverable) {
   assert(this == getPrimaryContext() && "expected a primary DC");
 
-  // Skip declarations within functions.
-  if (isFunctionOrMethod())
+  if (!isLookupContext()) {
+    if (isTransparentContext())
+      getParent()->getPrimaryContext()
+        ->makeDeclVisibleInContextWithFlags(D, Internal, Recoverable);
     return;
+  }
 
   // Skip declarations which should be invisible to name lookup.
   if (shouldBeHidden(D))
diff --git a/contrib/llvm/tools/clang/lib/AST/DeclCXX.cpp b/contrib/llvm/tools/clang/lib/AST/DeclCXX.cpp
index 4f24fdc28f71..81f94148d6ed 100644
--- a/contrib/llvm/tools/clang/lib/AST/DeclCXX.cpp
+++ b/contrib/llvm/tools/clang/lib/AST/DeclCXX.cpp
@@ -46,34 +46,33 @@ void LazyASTUnresolvedSet::getFromExternalSource(ASTContext &C) const {
 }
 
 CXXRecordDecl::DefinitionData::DefinitionData(CXXRecordDecl *D)
-  : UserDeclaredConstructor(false), UserDeclaredSpecialMembers(0),
-    Aggregate(true), PlainOldData(true), Empty(true), Polymorphic(false),
-    Abstract(false), IsStandardLayout(true), HasNoNonEmptyBases(true),
-    HasPrivateFields(false), HasProtectedFields(false), HasPublicFields(false),
-    HasMutableFields(false), HasVariantMembers(false), HasOnlyCMembers(true),
-    HasInClassInitializer(false), HasUninitializedReferenceMember(false),
-    NeedOverloadResolutionForMoveConstructor(false),
-    NeedOverloadResolutionForMoveAssignment(false),
-    NeedOverloadResolutionForDestructor(false),
-    DefaultedMoveConstructorIsDeleted(false),
-    DefaultedMoveAssignmentIsDeleted(false),
-    DefaultedDestructorIsDeleted(false),
-    HasTrivialSpecialMembers(SMF_All),
-    DeclaredNonTrivialSpecialMembers(0),
-    HasIrrelevantDestructor(true),
-    HasConstexprNonCopyMoveConstructor(false),
-    DefaultedDefaultConstructorIsConstexpr(true),
-    HasConstexprDefaultConstructor(false),
-    HasNonLiteralTypeFieldsOrBases(false), ComputedVisibleConversions(false),
-    UserProvidedDefaultConstructor(false), DeclaredSpecialMembers(0),
-    ImplicitCopyConstructorHasConstParam(true),
-    ImplicitCopyAssignmentHasConstParam(true),
-    HasDeclaredCopyConstructorWithConstParam(false),
-    HasDeclaredCopyAssignmentWithConstParam(false),
-    IsLambda(false), IsParsingBaseSpecifiers(false), NumBases(0), NumVBases(0),
-    Bases(), VBases(),
-    Definition(D), FirstFriend() {
-}
+    : UserDeclaredConstructor(false), UserDeclaredSpecialMembers(0),
+      Aggregate(true), PlainOldData(true), Empty(true), Polymorphic(false),
+      Abstract(false), IsStandardLayout(true), HasNoNonEmptyBases(true),
+      HasPrivateFields(false), HasProtectedFields(false),
+      HasPublicFields(false), HasMutableFields(false), HasVariantMembers(false),
+      HasOnlyCMembers(true), HasInClassInitializer(false),
+      HasUninitializedReferenceMember(false), HasUninitializedFields(false),
+      HasInheritedConstructor(false), HasInheritedAssignment(false),
+      NeedOverloadResolutionForMoveConstructor(false),
+      NeedOverloadResolutionForMoveAssignment(false),
+      NeedOverloadResolutionForDestructor(false),
+      DefaultedMoveConstructorIsDeleted(false),
+      DefaultedMoveAssignmentIsDeleted(false),
+      DefaultedDestructorIsDeleted(false), HasTrivialSpecialMembers(SMF_All),
+      DeclaredNonTrivialSpecialMembers(0), HasIrrelevantDestructor(true),
+      HasConstexprNonCopyMoveConstructor(false),
+      HasDefaultedDefaultConstructor(false),
+      DefaultedDefaultConstructorIsConstexpr(true),
+      HasConstexprDefaultConstructor(false),
+      HasNonLiteralTypeFieldsOrBases(false), ComputedVisibleConversions(false),
+      UserProvidedDefaultConstructor(false), DeclaredSpecialMembers(0),
+      ImplicitCopyConstructorHasConstParam(true),
+      ImplicitCopyAssignmentHasConstParam(true),
+      HasDeclaredCopyConstructorWithConstParam(false),
+      HasDeclaredCopyAssignmentWithConstParam(false), IsLambda(false),
+      IsParsingBaseSpecifiers(false), NumBases(0), NumVBases(0), Bases(),
+      VBases(), Definition(D), FirstFriend() {}
 
 CXXBaseSpecifier *CXXRecordDecl::DefinitionData::getBasesSlowCase() const {
   return Bases.get(Definition->getASTContext().getExternalSource());
@@ -89,7 +88,7 @@ CXXRecordDecl::CXXRecordDecl(Kind K, TagKind TK, const ASTContext &C,
                              CXXRecordDecl *PrevDecl)
     : RecordDecl(K, TK, C, DC, StartLoc, IdLoc, Id, PrevDecl),
       DefinitionData(PrevDecl ? PrevDecl->DefinitionData
-                              : DefinitionDataPtr(this)),
+                              : nullptr),
       TemplateOrInstantiation() {}
 
 CXXRecordDecl *CXXRecordDecl::Create(const ASTContext &C, TagKind TK,
@@ -143,9 +142,11 @@ CXXRecordDecl::setBases(CXXBaseSpecifier const * const *Bases,
     C.Deallocate(data().getBases());
 
   if (NumBases) {
-    // C++ [dcl.init.aggr]p1:
-    //   An aggregate is [...] a class with [...] no base classes [...].
-    data().Aggregate = false;
+    if (!C.getLangOpts().CPlusPlus1z) {
+      // C++ [dcl.init.aggr]p1:
+      //   An aggregate is [...] a class with [...] no base classes [...].
+      data().Aggregate = false;
+    }
 
     // C++ [class]p4:
     //   A POD-struct is an aggregate class...
@@ -171,8 +172,6 @@ CXXRecordDecl::setBases(CXXBaseSpecifier const * const *Bases,
     CXXRecordDecl *BaseClassDecl
       = cast<CXXRecordDecl>(BaseType->getAs<RecordType>()->getDecl());
 
-    // A class with a non-empty base class is not empty.
-    // FIXME: Standard ref?
     if (!BaseClassDecl->isEmpty()) {
       if (!data().Empty) {
         // C++0x [class]p7:
@@ -186,10 +185,18 @@ CXXRecordDecl::setBases(CXXBaseSpecifier const * const *Bases,
         data().IsStandardLayout = false;
       }
 
+      // C++14 [meta.unary.prop]p4:
+      //   T is a class type [...] with [...] no base class B for which
+      //   is_empty<B>::value is false.
       data().Empty = false;
       data().HasNoNonEmptyBases = false;
     }
     
+    // C++1z [dcl.init.agg]p1:
+    //   An aggregate is a class with [...] no private or protected base classes
+    if (Base->getAccessSpecifier() != AS_public)
+      data().Aggregate = false;
+
     // C++ [class.virtual]p1:
     //   A class that declares or inherits a virtual function is called a 
     //   polymorphic class.
@@ -220,6 +227,10 @@ CXXRecordDecl::setBases(CXXBaseSpecifier const * const *Bases,
         if (CXXRecordDecl *VBaseDecl = VBase.getType()->getAsCXXRecordDecl())
           if (!VBaseDecl->hasCopyConstructorWithConstParam())
             data().ImplicitCopyConstructorHasConstParam = false;
+
+        // C++1z [dcl.init.agg]p1:
+        //   An aggregate is a class with [...] no virtual base classes
+        data().Aggregate = false;
       }
     }
 
@@ -228,11 +239,15 @@ CXXRecordDecl::setBases(CXXBaseSpecifier const * const *Bases,
       if (SeenVBaseTypes.insert(C.getCanonicalType(BaseType)).second)
         VBases.push_back(Base);
 
-      // C++0x [meta.unary.prop] is_empty:
-      //    T is a class type, but not a union type, with ... no virtual base
-      //    classes
+      // C++14 [meta.unary.prop] is_empty:
+      //   T is a class type, but not a union type, with ... no virtual base
+      //   classes
       data().Empty = false;
 
+      // C++1z [dcl.init.agg]p1:
+      //   An aggregate is a class with [...] no virtual base classes
+      data().Aggregate = false;
+
       // C++11 [class.ctor]p5, C++11 [class.copy]p12, C++11 [class.copy]p25:
       //   A [default constructor, copy/move constructor, or copy/move assignment
       //   operator for a class X] is trivial [...] if:
@@ -332,6 +347,9 @@ CXXRecordDecl::setBases(CXXBaseSpecifier const * const *Bases,
     if (BaseClassDecl->hasUninitializedReferenceMember())
       data().HasUninitializedReferenceMember = true;
 
+    if (!BaseClassDecl->allowConstDefaultInit())
+      data().HasUninitializedFields = true;
+
     addedClassSubobject(BaseClassDecl);
   }
   
@@ -430,6 +448,15 @@ void CXXRecordDecl::addedMember(Decl *D) {
   FunctionTemplateDecl *FunTmpl = dyn_cast<FunctionTemplateDecl>(D);
   if (FunTmpl)
     D = FunTmpl->getTemplatedDecl();
+
+  // FIXME: Pass NamedDecl* to addedMember?
+  Decl *DUnderlying = D;
+  if (auto *ND = dyn_cast<NamedDecl>(DUnderlying)) {
+    DUnderlying = ND->getUnderlyingDecl();
+    if (FunctionTemplateDecl *UnderlyingFunTmpl =
+            dyn_cast<FunctionTemplateDecl>(DUnderlying))
+      DUnderlying = UnderlyingFunTmpl->getTemplatedDecl();
+  }
   
   if (CXXMethodDecl *Method = dyn_cast<CXXMethodDecl>(D)) {
     if (Method->isVirtual()) {
@@ -441,8 +468,8 @@ void CXXRecordDecl::addedMember(Decl *D) {
       //   A POD-struct is an aggregate class...
       data().PlainOldData = false;
       
-      // Virtual functions make the class non-empty.
-      // FIXME: Standard ref?
+      // C++14 [meta.unary.prop]p4:
+      //   T is a class type [...] with [...] no virtual member functions...
       data().Empty = false;
 
       // C++ [class.virtual]p1:
@@ -485,18 +512,15 @@ void CXXRecordDecl::addedMember(Decl *D) {
       data().PlainOldData = false;
     }
 
-    // Technically, "user-provided" is only defined for special member
-    // functions, but the intent of the standard is clearly that it should apply
-    // to all functions.
-    bool UserProvided = Constructor->isUserProvided();
-
     if (Constructor->isDefaultConstructor()) {
       SMKind |= SMF_DefaultConstructor;
 
-      if (UserProvided)
+      if (Constructor->isUserProvided())
         data().UserProvidedDefaultConstructor = true;
       if (Constructor->isConstexpr())
         data().HasConstexprDefaultConstructor = true;
+      if (Constructor->isDefaulted())
+        data().HasDefaultedDefaultConstructor = true;
     }
 
     if (!FunTmpl) {
@@ -509,9 +533,17 @@ void CXXRecordDecl::addedMember(Decl *D) {
       } else if (Constructor->isMoveConstructor())
         SMKind |= SMF_MoveConstructor;
     }
+  }
 
+  // Handle constructors, including those inherited from base classes.
+  if (CXXConstructorDecl *Constructor =
+          dyn_cast<CXXConstructorDecl>(DUnderlying)) {
     // Record if we see any constexpr constructors which are neither copy
     // nor move constructors.
+    // C++1z [basic.types]p10:
+    //   [...] has at least one constexpr constructor or constructor template
+    //   (possibly inherited from a base class) that is not a copy or move
+    //   constructor [...]
     if (Constructor->isConstexpr() && !Constructor->isCopyOrMoveConstructor())
       data().HasConstexprNonCopyMoveConstructor = true;
 
@@ -521,8 +553,12 @@ void CXXRecordDecl::addedMember(Decl *D) {
     // C++11 [dcl.init.aggr]p1:
     //   An aggregate is an array or a class with no user-provided
     //   constructors [...].
+    // C++11 [dcl.init.aggr]p1:
+    //   An aggregate is an array or a class with no user-provided
+    //   constructors (including those inherited from a base class) [...].
     if (getASTContext().getLangOpts().CPlusPlus11
-          ? UserProvided : !Constructor->isImplicit())
+            ? Constructor->isUserProvided()
+            : !Constructor->isImplicit())
       data().Aggregate = false;
   }
 
@@ -702,6 +738,15 @@ void CXXRecordDecl::addedMember(Decl *D) {
       data().IsStandardLayout = false;
     }
 
+    if (!Field->hasInClassInitializer() && !Field->isMutable()) {
+      if (CXXRecordDecl *FieldType = Field->getType()->getAsCXXRecordDecl()) {
+        if (FieldType->hasDefinition() && !FieldType->allowConstDefaultInit())
+          data().HasUninitializedFields = true;
+      } else {
+        data().HasUninitializedFields = true;
+      }
+    }
+
     // Record if this field is the first non-literal or volatile field or base.
     if (!T->isLiteralType(Context) || T.isVolatileQualified())
       data().HasNonLiteralTypeFieldsOrBases = true;
@@ -720,7 +765,7 @@ void CXXRecordDecl::addedMember(Decl *D) {
       //   An aggregate is a [...] class with [...] no
       //   brace-or-equal-initializers for non-static data members.
       //
-      // This rule was removed in C++1y.
+      // This rule was removed in C++14.
       if (!getASTContext().getLangOpts().CPlusPlus14)
         data().Aggregate = false;
 
@@ -762,6 +807,17 @@ void CXXRecordDecl::addedMember(Decl *D) {
             data().DefaultedDestructorIsDeleted = true;
         }
 
+        // For an anonymous union member, our overload resolution will perform
+        // overload resolution for its members.
+        if (Field->isAnonymousStructOrUnion()) {
+          data().NeedOverloadResolutionForMoveConstructor |=
+              FieldRec->data().NeedOverloadResolutionForMoveConstructor;
+          data().NeedOverloadResolutionForMoveAssignment |=
+              FieldRec->data().NeedOverloadResolutionForMoveAssignment;
+          data().NeedOverloadResolutionForDestructor |=
+              FieldRec->data().NeedOverloadResolutionForDestructor;
+        }
+
         // C++0x [class.ctor]p5:
         //   A default constructor is trivial [...] if:
         //    -- for all the non-static data members of its class that are of
@@ -910,7 +966,9 @@ void CXXRecordDecl::addedMember(Decl *D) {
     if (!data().HasNoNonEmptyBases)
       data().IsStandardLayout = false;
 
-    // If this is not a zero-length bit-field, then the class is not empty.
+    // C++14 [meta.unary.prop]p4:
+    //   T is a class type [...] with [...] no non-static data members other
+    //   than bit-fields of length 0...
     if (data().Empty) {
       if (!Field->isBitField() ||
           (!Field->getBitWidth()->isTypeDependent() &&
@@ -928,6 +986,15 @@ void CXXRecordDecl::addedMember(Decl *D) {
       data().Conversions.get(Ctx).addDecl(Ctx, Shadow, Shadow->getAccess());
     }
   }
+
+  if (UsingDecl *Using = dyn_cast<UsingDecl>(D)) {
+    if (Using->getDeclName().getNameKind() ==
+        DeclarationName::CXXConstructorName)
+      data().HasInheritedConstructor = true;
+
+    if (Using->getDeclName().getCXXOverloadedOperator() == OO_Equal)
+      data().HasInheritedAssignment = true;
+  }
 }
 
 void CXXRecordDecl::finishedDefaultedOrDeletedMember(CXXMethodDecl *D) {
@@ -1595,6 +1662,13 @@ unsigned CXXMethodDecl::size_overridden_methods() const {
   return getASTContext().overridden_methods_size(this);
 }
 
+CXXMethodDecl::overridden_method_range
+CXXMethodDecl::overridden_methods() const {
+  if (isa<CXXConstructorDecl>(this))
+    return overridden_method_range(nullptr, nullptr);
+  return getASTContext().overridden_methods(this);
+}
+
 QualType CXXMethodDecl::getThisType(ASTContext &C) const {
   // C++ 9.3.2p1: The type of this in a member function of a class X is X*.
   // If the member function is declared const, the type of this is const X*,
@@ -1606,7 +1680,7 @@ QualType CXXMethodDecl::getThisType(ASTContext &C) const {
 
   QualType ClassTy = C.getTypeDeclType(getParent());
   ClassTy = C.getQualifiedType(ClassTy,
-                               Qualifiers::fromCVRMask(getTypeQualifiers()));
+                               Qualifiers::fromCVRUMask(getTypeQualifiers()));
   return C.getPointerType(ClassTy);
 }
 
@@ -1744,11 +1818,15 @@ SourceRange CXXCtorInitializer::getSourceRange() const {
 
 void CXXConstructorDecl::anchor() { }
 
-CXXConstructorDecl *
-CXXConstructorDecl::CreateDeserialized(ASTContext &C, unsigned ID) {
-  return new (C, ID) CXXConstructorDecl(C, nullptr, SourceLocation(),
-                                        DeclarationNameInfo(), QualType(),
-                                        nullptr, false, false, false, false);
+CXXConstructorDecl *CXXConstructorDecl::CreateDeserialized(ASTContext &C,
+                                                           unsigned ID,
+                                                           bool Inherited) {
+  unsigned Extra = additionalSizeToAlloc<InheritedConstructor>(Inherited);
+  auto *Result = new (C, ID, Extra) CXXConstructorDecl(
+      C, nullptr, SourceLocation(), DeclarationNameInfo(), QualType(), nullptr,
+      false, false, false, false, InheritedConstructor());
+  Result->IsInheritingConstructor = Inherited;
+  return Result;
 }
 
 CXXConstructorDecl *
@@ -1757,13 +1835,16 @@ CXXConstructorDecl::Create(ASTContext &C, CXXRecordDecl *RD,
                            const DeclarationNameInfo &NameInfo,
                            QualType T, TypeSourceInfo *TInfo,
                            bool isExplicit, bool isInline,
-                           bool isImplicitlyDeclared, bool isConstexpr) {
+                           bool isImplicitlyDeclared, bool isConstexpr,
+                           InheritedConstructor Inherited) {
   assert(NameInfo.getName().getNameKind()
          == DeclarationName::CXXConstructorName &&
          "Name must refer to a constructor");
-  return new (C, RD) CXXConstructorDecl(C, RD, StartLoc, NameInfo, T, TInfo,
-                                        isExplicit, isInline,
-                                        isImplicitlyDeclared, isConstexpr);
+  unsigned Extra =
+      additionalSizeToAlloc<InheritedConstructor>(Inherited ? 1 : 0);
+  return new (C, RD, Extra) CXXConstructorDecl(
+      C, RD, StartLoc, NameInfo, T, TInfo, isExplicit, isInline,
+      isImplicitlyDeclared, isConstexpr, Inherited);
 }
 
 CXXConstructorDecl::init_const_iterator CXXConstructorDecl::init_begin() const {
@@ -1878,23 +1959,6 @@ bool CXXConstructorDecl::isSpecializationCopyingObject() const {
   return true;  
 }
 
-const CXXConstructorDecl *CXXConstructorDecl::getInheritedConstructor() const {
-  // Hack: we store the inherited constructor in the overridden method table
-  method_iterator It = getASTContext().overridden_methods_begin(this);
-  if (It == getASTContext().overridden_methods_end(this))
-    return nullptr;
-
-  return cast<CXXConstructorDecl>(*It);
-}
-
-void
-CXXConstructorDecl::setInheritedConstructor(const CXXConstructorDecl *BaseCtor){
-  // Hack: we store the inherited constructor in the overridden method table
-  assert(getASTContext().overridden_methods_size(this) == 0 &&
-         "Base ctor already set.");
-  getASTContext().addOverriddenMethod(this, BaseCtor);
-}
-
 void CXXDestructorDecl::anchor() { }
 
 CXXDestructorDecl *
@@ -2090,10 +2154,24 @@ NamespaceAliasDecl::CreateDeserialized(ASTContext &C, unsigned ID) {
 
 void UsingShadowDecl::anchor() { }
 
+UsingShadowDecl::UsingShadowDecl(Kind K, ASTContext &C, DeclContext *DC,
+                                 SourceLocation Loc, UsingDecl *Using,
+                                 NamedDecl *Target)
+    : NamedDecl(K, DC, Loc, Using ? Using->getDeclName() : DeclarationName()),
+      redeclarable_base(C), Underlying(Target),
+      UsingOrNextShadow(cast<NamedDecl>(Using)) {
+  if (Target)
+    IdentifierNamespace = Target->getIdentifierNamespace();
+  setImplicit();
+}
+
+UsingShadowDecl::UsingShadowDecl(Kind K, ASTContext &C, EmptyShell Empty)
+    : NamedDecl(K, nullptr, SourceLocation(), DeclarationName()),
+      redeclarable_base(C), Underlying(), UsingOrNextShadow() {}
+
 UsingShadowDecl *
 UsingShadowDecl::CreateDeserialized(ASTContext &C, unsigned ID) {
-  return new (C, ID) UsingShadowDecl(C, nullptr, SourceLocation(),
-                                     nullptr, nullptr);
+  return new (C, ID) UsingShadowDecl(UsingShadow, C, EmptyShell());
 }
 
 UsingDecl *UsingShadowDecl::getUsingDecl() const {
@@ -2104,6 +2182,25 @@ UsingDecl *UsingShadowDecl::getUsingDecl() const {
   return cast<UsingDecl>(Shadow->UsingOrNextShadow);
 }
 
+void ConstructorUsingShadowDecl::anchor() { }
+
+ConstructorUsingShadowDecl *
+ConstructorUsingShadowDecl::Create(ASTContext &C, DeclContext *DC,
+                                   SourceLocation Loc, UsingDecl *Using,
+                                   NamedDecl *Target, bool IsVirtual) {
+  return new (C, DC) ConstructorUsingShadowDecl(C, DC, Loc, Using, Target,
+                                                IsVirtual);
+}
+
+ConstructorUsingShadowDecl *
+ConstructorUsingShadowDecl::CreateDeserialized(ASTContext &C, unsigned ID) {
+  return new (C, ID) ConstructorUsingShadowDecl(C, EmptyShell());
+}
+
+CXXRecordDecl *ConstructorUsingShadowDecl::getNominatedBaseClass() const {
+  return getUsingDecl()->getQualifier()->getAsRecordDecl();
+}
+
 void UsingDecl::anchor() { }
 
 void UsingDecl::addShadowDecl(UsingShadowDecl *S) {
diff --git a/contrib/llvm/tools/clang/lib/AST/DeclObjC.cpp b/contrib/llvm/tools/clang/lib/AST/DeclObjC.cpp
index 050a0f53f1e5..d2701211beae 100644
--- a/contrib/llvm/tools/clang/lib/AST/DeclObjC.cpp
+++ b/contrib/llvm/tools/clang/lib/AST/DeclObjC.cpp
@@ -152,7 +152,8 @@ bool ObjCContainerDecl::HasUserDeclaredSetterMethod(
 
 ObjCPropertyDecl *
 ObjCPropertyDecl::findPropertyDecl(const DeclContext *DC,
-                                   const IdentifierInfo *propertyID) {
+                                   const IdentifierInfo *propertyID,
+                                   ObjCPropertyQueryKind queryKind) {
   // If this context is a hidden protocol definition, don't find any
   // property.
   if (const ObjCProtocolDecl *Proto = dyn_cast<ObjCProtocolDecl>(DC)) {
@@ -166,15 +167,33 @@ ObjCPropertyDecl::findPropertyDecl(const DeclContext *DC,
   if (auto *IDecl = dyn_cast<ObjCInterfaceDecl>(DC)) {
     for (const auto *Ext : IDecl->known_extensions())
       if (ObjCPropertyDecl *PD = ObjCPropertyDecl::findPropertyDecl(Ext,
-                                                                    propertyID))
+                                                       propertyID,
+                                                       queryKind))
         return PD;
   }
 
   DeclContext::lookup_result R = DC->lookup(propertyID);
+  ObjCPropertyDecl *classProp = nullptr;
   for (DeclContext::lookup_iterator I = R.begin(), E = R.end(); I != E;
        ++I)
-    if (ObjCPropertyDecl *PD = dyn_cast<ObjCPropertyDecl>(*I))
-      return PD;
+    if (ObjCPropertyDecl *PD = dyn_cast<ObjCPropertyDecl>(*I)) {
+      // If queryKind is unknown, we return the instance property if one
+      // exists; otherwise we return the class property.
+      if ((queryKind == ObjCPropertyQueryKind::OBJC_PR_query_unknown &&
+           !PD->isClassProperty()) ||
+          (queryKind == ObjCPropertyQueryKind::OBJC_PR_query_class &&
+           PD->isClassProperty()) ||
+          (queryKind == ObjCPropertyQueryKind::OBJC_PR_query_instance &&
+           !PD->isClassProperty()))
+        return PD;
+
+      if (PD->isClassProperty())
+        classProp = PD;
+    }
+
+  if (queryKind == ObjCPropertyQueryKind::OBJC_PR_query_unknown)
+    // We can't find the instance property, return the class property.
+    return classProp;
 
   return nullptr;
 }
@@ -192,7 +211,8 @@ ObjCPropertyDecl::getDefaultSynthIvarName(ASTContext &Ctx) const {
 /// FindPropertyDeclaration - Finds declaration of the property given its name
 /// in 'PropertyId' and returns it. It returns 0, if not found.
 ObjCPropertyDecl *ObjCContainerDecl::FindPropertyDeclaration(
-    const IdentifierInfo *PropertyId) const {
+    const IdentifierInfo *PropertyId,
+    ObjCPropertyQueryKind QueryKind) const {
   // Don't find properties within hidden protocol definitions.
   if (const ObjCProtocolDecl *Proto = dyn_cast<ObjCProtocolDecl>(this)) {
     if (const ObjCProtocolDecl *Def = Proto->getDefinition())
@@ -204,13 +224,14 @@ ObjCPropertyDecl *ObjCContainerDecl::FindPropertyDeclaration(
   // the class itself.
   if (const auto *ClassDecl = dyn_cast<ObjCInterfaceDecl>(this)) {
     for (const auto *Ext : ClassDecl->visible_extensions()) {
-      if (auto *P = Ext->FindPropertyDeclaration(PropertyId))
+      if (auto *P = Ext->FindPropertyDeclaration(PropertyId, QueryKind))
         return P;
     }
   }
 
   if (ObjCPropertyDecl *PD =
-        ObjCPropertyDecl::findPropertyDecl(cast<DeclContext>(this), PropertyId))
+        ObjCPropertyDecl::findPropertyDecl(cast<DeclContext>(this), PropertyId,
+                                           QueryKind))
     return PD;
 
   switch (getKind()) {
@@ -219,7 +240,8 @@ ObjCPropertyDecl *ObjCContainerDecl::FindPropertyDeclaration(
     case Decl::ObjCProtocol: {
       const ObjCProtocolDecl *PID = cast<ObjCProtocolDecl>(this);
       for (const auto *I : PID->protocols())
-        if (ObjCPropertyDecl *P = I->FindPropertyDeclaration(PropertyId))
+        if (ObjCPropertyDecl *P = I->FindPropertyDeclaration(PropertyId,
+                                                             QueryKind))
           return P;
       break;
     }
@@ -228,18 +250,20 @@ ObjCPropertyDecl *ObjCContainerDecl::FindPropertyDeclaration(
       // Look through categories (but not extensions; they were handled above).
       for (const auto *Cat : OID->visible_categories()) {
         if (!Cat->IsClassExtension())
-          if (ObjCPropertyDecl *P = Cat->FindPropertyDeclaration(PropertyId))
+          if (ObjCPropertyDecl *P = Cat->FindPropertyDeclaration(
+                                             PropertyId, QueryKind))
             return P;
       }
 
       // Look through protocols.
       for (const auto *I : OID->all_referenced_protocols())
-        if (ObjCPropertyDecl *P = I->FindPropertyDeclaration(PropertyId))
+        if (ObjCPropertyDecl *P = I->FindPropertyDeclaration(PropertyId,
+                                                             QueryKind))
           return P;
 
       // Finally, check the super class.
       if (const ObjCInterfaceDecl *superClass = OID->getSuperClass())
-        return superClass->FindPropertyDeclaration(PropertyId);
+        return superClass->FindPropertyDeclaration(PropertyId, QueryKind);
       break;
     }
     case Decl::ObjCCategory: {
@@ -247,7 +271,8 @@ ObjCPropertyDecl *ObjCContainerDecl::FindPropertyDeclaration(
       // Look through protocols.
       if (!OCD->IsClassExtension())
         for (const auto *I : OCD->protocols())
-          if (ObjCPropertyDecl *P = I->FindPropertyDeclaration(PropertyId))
+          if (ObjCPropertyDecl *P = I->FindPropertyDeclaration(PropertyId,
+                                                               QueryKind))
             return P;
       break;
     }
@@ -319,7 +344,8 @@ SourceLocation ObjCInterfaceDecl::getSuperClassLoc() const {
 ///
 ObjCPropertyDecl *
 ObjCInterfaceDecl::FindPropertyVisibleInPrimaryClass(
-                                            IdentifierInfo *PropertyId) const {
+                       IdentifierInfo *PropertyId,
+                       ObjCPropertyQueryKind QueryKind) const {
   // FIXME: Should make sure no callers ever do this.
   if (!hasDefinition())
     return nullptr;
@@ -328,12 +354,14 @@ ObjCInterfaceDecl::FindPropertyVisibleInPrimaryClass(
     LoadExternalDefinition();
 
   if (ObjCPropertyDecl *PD =
-      ObjCPropertyDecl::findPropertyDecl(cast<DeclContext>(this), PropertyId))
+      ObjCPropertyDecl::findPropertyDecl(cast<DeclContext>(this), PropertyId,
+                                         QueryKind))
     return PD;
 
   // Look through protocols.
   for (const auto *I : all_referenced_protocols())
-    if (ObjCPropertyDecl *P = I->FindPropertyDeclaration(PropertyId))
+    if (ObjCPropertyDecl *P = I->FindPropertyDeclaration(PropertyId,
+                                                         QueryKind))
       return P;
 
   return nullptr;
@@ -342,13 +370,13 @@ ObjCInterfaceDecl::FindPropertyVisibleInPrimaryClass(
 void ObjCInterfaceDecl::collectPropertiesToImplement(PropertyMap &PM,
                                                      PropertyDeclOrder &PO) const {
   for (auto *Prop : properties()) {
-    PM[Prop->getIdentifier()] = Prop;
+    PM[std::make_pair(Prop->getIdentifier(), Prop->isClassProperty())] = Prop;
     PO.push_back(Prop);
   }
   for (const auto *Ext : known_extensions()) {
     const ObjCCategoryDecl *ClassExt = Ext;
     for (auto *Prop : ClassExt->properties()) {
-      PM[Prop->getIdentifier()] = Prop;
+      PM[std::make_pair(Prop->getIdentifier(), Prop->isClassProperty())] = Prop;
       PO.push_back(Prop);
     }
   }
@@ -1206,23 +1234,29 @@ ObjCMethodDecl::findPropertyDecl(bool CheckOverrides) const {
   if (NumArgs > 1)
     return nullptr;
 
-  if (!isInstanceMethod())
-    return nullptr;
-
   if (isPropertyAccessor()) {
     const ObjCContainerDecl *Container = cast<ObjCContainerDecl>(getParent());
     bool IsGetter = (NumArgs == 0);
+    bool IsInstance = isInstanceMethod();
 
     /// Local function that attempts to find a matching property within the
     /// given Objective-C container.
     auto findMatchingProperty =
       [&](const ObjCContainerDecl *Container) -> const ObjCPropertyDecl * {
-
-      for (const auto *I : Container->properties()) {
-        Selector NextSel = IsGetter ? I->getGetterName()
-                                    : I->getSetterName();
-        if (NextSel == Sel)
-          return I;
+      if (IsInstance) {
+        for (const auto *I : Container->instance_properties()) {
+          Selector NextSel = IsGetter ? I->getGetterName()
+                                      : I->getSetterName();
+          if (NextSel == Sel)
+            return I;
+        }
+      } else {
+        for (const auto *I : Container->class_properties()) {
+          Selector NextSel = IsGetter ? I->getGetterName()
+                                      : I->getSetterName();
+          if (NextSel == Sel)
+            return I;
+        }
       }
 
       return nullptr;
@@ -1822,7 +1856,9 @@ void ObjCProtocolDecl::collectPropertiesToImplement(PropertyMap &PM,
   if (const ObjCProtocolDecl *PDecl = getDefinition()) {
     for (auto *Prop : PDecl->properties()) {
       // Insert into PM if not there already.
-      PM.insert(std::make_pair(Prop->getIdentifier(), Prop));
+      PM.insert(std::make_pair(
+          std::make_pair(Prop->getIdentifier(), Prop->isClassProperty()),
+          Prop));
       PO.push_back(Prop);
     }
     // Scan through protocol's protocols.
@@ -2011,10 +2047,29 @@ FindPropertyImplIvarDecl(IdentifierInfo *ivarId) const {
 /// category \@implementation block.
 ///
 ObjCPropertyImplDecl *ObjCImplDecl::
-FindPropertyImplDecl(IdentifierInfo *Id) const {
+FindPropertyImplDecl(IdentifierInfo *Id,
+                     ObjCPropertyQueryKind QueryKind) const {
+  ObjCPropertyImplDecl *ClassPropImpl = nullptr;
   for (auto *PID : property_impls())
-    if (PID->getPropertyDecl()->getIdentifier() == Id)
-      return PID;
+    // If queryKind is unknown, we return the instance property if one
+    // exists; otherwise we return the class property.
+    if (PID->getPropertyDecl()->getIdentifier() == Id) {
+      if ((QueryKind == ObjCPropertyQueryKind::OBJC_PR_query_unknown &&
+           !PID->getPropertyDecl()->isClassProperty()) ||
+          (QueryKind == ObjCPropertyQueryKind::OBJC_PR_query_class &&
+           PID->getPropertyDecl()->isClassProperty()) ||
+          (QueryKind == ObjCPropertyQueryKind::OBJC_PR_query_instance &&
+           !PID->getPropertyDecl()->isClassProperty()))
+        return PID;
+
+      if (PID->getPropertyDecl()->isClassProperty())
+        ClassPropImpl = PID;
+    }
+
+  if (QueryKind == ObjCPropertyQueryKind::OBJC_PR_query_unknown)
+    // We can't find the instance property, return the class property.
+    return ClassPropImpl;
+
   return nullptr;
 }
 
diff --git a/contrib/llvm/tools/clang/lib/AST/DeclOpenMP.cpp b/contrib/llvm/tools/clang/lib/AST/DeclOpenMP.cpp
index 493e2cd41226..5b06ce0778a7 100644
--- a/contrib/llvm/tools/clang/lib/AST/DeclOpenMP.cpp
+++ b/contrib/llvm/tools/clang/lib/AST/DeclOpenMP.cpp
@@ -7,7 +7,8 @@
 //
 //===----------------------------------------------------------------------===//
 /// \file
-/// \brief This file implements OMPThreadPrivateDecl class.
+/// \brief This file implements OMPThreadPrivateDecl, OMPCapturedExprDecl
+/// classes.
 ///
 //===----------------------------------------------------------------------===//
 
@@ -52,3 +53,50 @@ void OMPThreadPrivateDecl::setVars(ArrayRef<Expr *> VL) {
   std::uninitialized_copy(VL.begin(), VL.end(), getTrailingObjects<Expr *>());
 }
 
+//===----------------------------------------------------------------------===//
+// OMPDeclareReductionDecl Implementation.
+//===----------------------------------------------------------------------===//
+
+void OMPDeclareReductionDecl::anchor() {}
+
+OMPDeclareReductionDecl *OMPDeclareReductionDecl::Create(
+    ASTContext &C, DeclContext *DC, SourceLocation L, DeclarationName Name,
+    QualType T, OMPDeclareReductionDecl *PrevDeclInScope) {
+  return new (C, DC) OMPDeclareReductionDecl(OMPDeclareReduction, DC, L, Name,
+                                             T, PrevDeclInScope);
+}
+
+OMPDeclareReductionDecl *
+OMPDeclareReductionDecl::CreateDeserialized(ASTContext &C, unsigned ID) {
+  return new (C, ID) OMPDeclareReductionDecl(
+      OMPDeclareReduction, /*DC=*/nullptr, SourceLocation(), DeclarationName(),
+      QualType(), /*PrevDeclInScope=*/nullptr);
+}
+
+OMPDeclareReductionDecl *OMPDeclareReductionDecl::getPrevDeclInScope() {
+  return cast_or_null<OMPDeclareReductionDecl>(
+      PrevDeclInScope.get(getASTContext().getExternalSource()));
+}
+const OMPDeclareReductionDecl *
+OMPDeclareReductionDecl::getPrevDeclInScope() const {
+  return cast_or_null<OMPDeclareReductionDecl>(
+      PrevDeclInScope.get(getASTContext().getExternalSource()));
+}
+
+//===----------------------------------------------------------------------===//
+// OMPCapturedExprDecl Implementation.
+//===----------------------------------------------------------------------===//
+
+void OMPCapturedExprDecl::anchor() {}
+
+OMPCapturedExprDecl *OMPCapturedExprDecl::Create(ASTContext &C, DeclContext *DC,
+                                                 IdentifierInfo *Id,
+                                                 QualType T) {
+  return new (C, DC) OMPCapturedExprDecl(C, DC, Id, T);
+}
+
+OMPCapturedExprDecl *OMPCapturedExprDecl::CreateDeserialized(ASTContext &C,
+                                                             unsigned ID) {
+  return new (C, ID) OMPCapturedExprDecl(C, nullptr, nullptr, QualType());
+}
+
diff --git a/contrib/llvm/tools/clang/lib/AST/DeclPrinter.cpp b/contrib/llvm/tools/clang/lib/AST/DeclPrinter.cpp
index 5c6002d55c0f..7e786990becb 100644
--- a/contrib/llvm/tools/clang/lib/AST/DeclPrinter.cpp
+++ b/contrib/llvm/tools/clang/lib/AST/DeclPrinter.cpp
@@ -92,6 +92,8 @@ namespace {
     void VisitUsingDecl(UsingDecl *D);
     void VisitUsingShadowDecl(UsingShadowDecl *D);
     void VisitOMPThreadPrivateDecl(OMPThreadPrivateDecl *D);
+    void VisitOMPDeclareReductionDecl(OMPDeclareReductionDecl *D);
+    void VisitOMPCapturedExprDecl(OMPCapturedExprDecl *D);
 
     void PrintTemplateParameters(const TemplateParameterList *Params,
                                  const TemplateArgumentList *Args = nullptr);
@@ -130,6 +132,8 @@ static QualType GetBaseType(QualType T) {
       BaseType = VTy->getElementType();
     else if (const ReferenceType *RTy = BaseType->getAs<ReferenceType>())
       BaseType = RTy->getPointeeType();
+    else if (const AutoType *ATy = BaseType->getAs<AutoType>())
+      BaseType = ATy->getDeducedType();
     else
       llvm_unreachable("Unknown declarator!");
   }
@@ -158,19 +162,17 @@ void Decl::printGroup(Decl** Begin, unsigned NumDecls,
     ++Begin;
 
   PrintingPolicy SubPolicy(Policy);
-  if (TD && TD->isCompleteDefinition()) {
-    TD->print(Out, Policy, Indentation);
-    Out << " ";
-    SubPolicy.SuppressTag = true;
-  }
 
   bool isFirst = true;
   for ( ; Begin != End; ++Begin) {
     if (isFirst) {
+      if(TD)
+        SubPolicy.IncludeTagDefinition = true;
       SubPolicy.SuppressSpecifiers = false;
       isFirst = false;
     } else {
       if (!isFirst) Out << ", ";
+      SubPolicy.IncludeTagDefinition = false;
       SubPolicy.SuppressSpecifiers = true;
     }
 
@@ -244,7 +246,7 @@ void DeclPrinter::printDeclType(QualType T, StringRef DeclName, bool Pack) {
     Pack = true;
     T = PET->getPattern();
   }
-  T.print(Out, Policy, (Pack ? "..." : "") + DeclName);
+  T.print(Out, Policy, (Pack ? "..." : "") + DeclName, Indentation);
 }
 
 void DeclPrinter::ProcessDeclGroup(SmallVectorImpl<Decl*>& Decls) {
@@ -333,7 +335,7 @@ void DeclPrinter::VisitDeclContext(DeclContext *DC, bool Indent) {
 
     // FIXME: Need to be able to tell the DeclPrinter when
     const char *Terminator = nullptr;
-    if (isa<OMPThreadPrivateDecl>(*D))
+    if (isa<OMPThreadPrivateDecl>(*D) || isa<OMPDeclareReductionDecl>(*D))
       Terminator = nullptr;
     else if (isa<FunctionDecl>(*D) &&
              cast<FunctionDecl>(*D)->isThisDeclarationADefinition())
@@ -358,6 +360,11 @@ void DeclPrinter::VisitDeclContext(DeclContext *DC, bool Indent) {
     if (Terminator)
       Out << Terminator;
     Out << "\n";
+
+    // Declare target attribute is special one, natural spelling for the pragma
+    // assumes "ending" construct so print it here.
+    if (D->hasAttr<OMPDeclareTargetDeclAttr>())
+      Out << "#pragma omp end declare target\n";
   }
 
   if (!Decls.empty())
@@ -378,7 +385,8 @@ void DeclPrinter::VisitTypedefDecl(TypedefDecl *D) {
     if (D->isModulePrivate())
       Out << "__module_private__ ";
   }
-  D->getTypeSourceInfo()->getType().print(Out, Policy, D->getName());
+  QualType Ty = D->getTypeSourceInfo()->getType();
+  Ty.print(Out, Policy, D->getName(), Indentation);
   prettyPrintAttributes(D);
 }
 
@@ -683,7 +691,7 @@ void DeclPrinter::VisitFieldDecl(FieldDecl *D) {
     Out << "__module_private__ ";
 
   Out << D->getASTContext().getUnqualifiedObjCPointerType(D->getType()).
-            stream(Policy, D->getName());
+         stream(Policy, D->getName(), Indentation);
 
   if (D->isBitField()) {
     Out << " : ";
@@ -707,6 +715,11 @@ void DeclPrinter::VisitLabelDecl(LabelDecl *D) {
 
 void DeclPrinter::VisitVarDecl(VarDecl *D) {
   prettyPrintPragmas(D);
+
+  QualType T = D->getTypeSourceInfo()
+    ? D->getTypeSourceInfo()->getType()
+    : D->getASTContext().getUnqualifiedObjCPointerType(D->getType());
+
   if (!Policy.SuppressSpecifiers) {
     StorageClass SC = D->getStorageClass();
     if (SC != SC_None)
@@ -728,11 +741,13 @@ void DeclPrinter::VisitVarDecl(VarDecl *D) {
 
     if (D->isModulePrivate())
       Out << "__module_private__ ";
+
+    if (D->isConstexpr()) {
+      Out << "constexpr ";
+      T.removeLocalConst();
+    }
   }
 
-  QualType T = D->getTypeSourceInfo()
-    ? D->getTypeSourceInfo()->getType()
-    : D->getASTContext().getUnqualifiedObjCPointerType(D->getType());
   printDeclType(T, D->getName());
   Expr *Init = D->getInit();
   if (!Policy.SuppressInitializers && Init) {
@@ -751,7 +766,10 @@ void DeclPrinter::VisitVarDecl(VarDecl *D) {
       else if (D->getInitStyle() == VarDecl::CInit) {
         Out << " = ";
       }
-      Init->printPretty(Out, nullptr, Policy, Indentation);
+      PrintingPolicy SubPolicy(Policy);
+      SubPolicy.SuppressSpecifiers = false;
+      SubPolicy.IncludeTagDefinition = false;
+      Init->printPretty(Out, nullptr, SubPolicy, Indentation);
       if ((D->getInitStyle() == VarDecl::CallInit) && !isa<ParenListExpr>(Init))
         Out << ")";
     }
@@ -1046,7 +1064,7 @@ void DeclPrinter::VisitObjCMethodDecl(ObjCMethodDecl *OMD) {
 
   std::string name = OMD->getSelector().getAsString();
   std::string::size_type pos, lastPos = 0;
-  for (const auto *PI : OMD->params()) {
+  for (const auto *PI : OMD->parameters()) {
     // FIXME: selector is missing here!
     pos = name.find_first_of(':', lastPos);
     Out << " " << name.substr(lastPos, pos - lastPos) << ':';
@@ -1298,6 +1316,11 @@ void DeclPrinter::VisitObjCPropertyDecl(ObjCPropertyDecl *PDecl) {
       }
     }
 
+    if (PDecl->getPropertyAttributes() & ObjCPropertyDecl::OBJC_PR_class) {
+      Out << (first ? ' ' : ',') << "class";
+      first = false;
+    }
+
     (void) first; // Silence dead store warning due to idiomatic code.
     Out << " )";
   }
@@ -1358,3 +1381,38 @@ void DeclPrinter::VisitOMPThreadPrivateDecl(OMPThreadPrivateDecl *D) {
   }
 }
 
+void DeclPrinter::VisitOMPDeclareReductionDecl(OMPDeclareReductionDecl *D) {
+  if (!D->isInvalidDecl()) {
+    Out << "#pragma omp declare reduction (";
+    if (D->getDeclName().getNameKind() == DeclarationName::CXXOperatorName) {
+      static const char *const OperatorNames[NUM_OVERLOADED_OPERATORS] = {
+          nullptr,
+#define OVERLOADED_OPERATOR(Name, Spelling, Token, Unary, Binary, MemberOnly)  \
+          Spelling,
+#include "clang/Basic/OperatorKinds.def"
+      };
+      const char *OpName =
+          OperatorNames[D->getDeclName().getCXXOverloadedOperator()];
+      assert(OpName && "not an overloaded operator");
+      Out << OpName;
+    } else {
+      assert(D->getDeclName().isIdentifier());
+      D->printName(Out);
+    }
+    Out << " : ";
+    D->getType().print(Out, Policy);
+    Out << " : ";
+    D->getCombiner()->printPretty(Out, nullptr, Policy, 0);
+    Out << ")";
+    if (auto *Init = D->getInitializer()) {
+      Out << " initializer(";
+      Init->printPretty(Out, nullptr, Policy, 0);
+      Out << ")";
+    }
+  }
+}
+
+void DeclPrinter::VisitOMPCapturedExprDecl(OMPCapturedExprDecl *D) {
+  D->getInit()->printPretty(Out, nullptr, Policy, Indentation);
+}
+
diff --git a/contrib/llvm/tools/clang/lib/AST/DeclTemplate.cpp b/contrib/llvm/tools/clang/lib/AST/DeclTemplate.cpp
index de3ebd23ef4f..37943cdd5b7b 100644
--- a/contrib/llvm/tools/clang/lib/AST/DeclTemplate.cpp
+++ b/contrib/llvm/tools/clang/lib/AST/DeclTemplate.cpp
@@ -65,32 +65,29 @@ TemplateParameterList *TemplateParameterList::Create(
 
 unsigned TemplateParameterList::getMinRequiredArguments() const {
   unsigned NumRequiredArgs = 0;
-  for (iterator P = const_cast<TemplateParameterList *>(this)->begin(), 
-             PEnd = const_cast<TemplateParameterList *>(this)->end(); 
-       P != PEnd; ++P) {
-    if ((*P)->isTemplateParameterPack()) {
-      if (NonTypeTemplateParmDecl *NTTP = dyn_cast<NonTypeTemplateParmDecl>(*P))
+  for (const NamedDecl *P : asArray()) {
+    if (P->isTemplateParameterPack()) {
+      if (const auto *NTTP = dyn_cast<NonTypeTemplateParmDecl>(P))
         if (NTTP->isExpandedParameterPack()) {
           NumRequiredArgs += NTTP->getNumExpansionTypes();
           continue;
         }
-      
+
       break;
     }
-  
-    if (TemplateTypeParmDecl *TTP = dyn_cast<TemplateTypeParmDecl>(*P)) {
+
+    if (const auto *TTP = dyn_cast<TemplateTypeParmDecl>(P)) {
       if (TTP->hasDefaultArgument())
         break;
-    } else if (NonTypeTemplateParmDecl *NTTP 
-                                    = dyn_cast<NonTypeTemplateParmDecl>(*P)) {
+    } else if (const auto *NTTP = dyn_cast<NonTypeTemplateParmDecl>(P)) {
       if (NTTP->hasDefaultArgument())
         break;
-    } else if (cast<TemplateTemplateParmDecl>(*P)->hasDefaultArgument())
+    } else if (cast<TemplateTemplateParmDecl>(P)->hasDefaultArgument())
       break;
-    
+
     ++NumRequiredArgs;
   }
-  
+
   return NumRequiredArgs;
 }
 
@@ -111,12 +108,10 @@ unsigned TemplateParameterList::getDepth() const {
 
 static void AdoptTemplateParameterList(TemplateParameterList *Params,
                                        DeclContext *Owner) {
-  for (TemplateParameterList::iterator P = Params->begin(), 
-                                    PEnd = Params->end();
-       P != PEnd; ++P) {
-    (*P)->setDeclContext(Owner);
-    
-    if (TemplateTemplateParmDecl *TTP = dyn_cast<TemplateTemplateParmDecl>(*P))
+  for (NamedDecl *P : *Params) {
+    P->setDeclContext(Owner);
+
+    if (auto *TTP = dyn_cast<TemplateTemplateParmDecl>(P))
       AdoptTemplateParameterList(TTP->getTemplateParameters(), Owner);
   }
 }
@@ -157,8 +152,8 @@ RedeclarableTemplateDecl::CommonBase *RedeclarableTemplateDecl::getCommonPtr() c
   }
 
   // Update any previous declarations we saw with the common pointer.
-  for (unsigned I = 0, N = PrevDecls.size(); I != N; ++I)
-    PrevDecls[I]->Common = Common;
+  for (const RedeclarableTemplateDecl *Prev : PrevDecls)
+    Prev->Common = Common;
 
   return Common;
 }
@@ -205,44 +200,41 @@ void RedeclarableTemplateDecl::addSpecializationImpl(
 /// \brief Generate the injected template arguments for the given template
 /// parameter list, e.g., for the injected-class-name of a class template.
 static void GenerateInjectedTemplateArgs(ASTContext &Context,
-                                        TemplateParameterList *Params,
+                                         TemplateParameterList *Params,
                                          TemplateArgument *Args) {
-  for (TemplateParameterList::iterator Param = Params->begin(),
-                                    ParamEnd = Params->end();
-       Param != ParamEnd; ++Param) {
+  for (NamedDecl *Param : *Params) {
     TemplateArgument Arg;
-    if (TemplateTypeParmDecl *TTP = dyn_cast<TemplateTypeParmDecl>(*Param)) {
+    if (auto *TTP = dyn_cast<TemplateTypeParmDecl>(Param)) {
       QualType ArgType = Context.getTypeDeclType(TTP);
       if (TTP->isParameterPack())
         ArgType = Context.getPackExpansionType(ArgType, None);
 
       Arg = TemplateArgument(ArgType);
-    } else if (NonTypeTemplateParmDecl *NTTP =
-               dyn_cast<NonTypeTemplateParmDecl>(*Param)) {
+    } else if (auto *NTTP = dyn_cast<NonTypeTemplateParmDecl>(Param)) {
       Expr *E = new (Context) DeclRefExpr(NTTP, /*enclosing*/ false,
                                   NTTP->getType().getNonLValueExprType(Context),
                                   Expr::getValueKindForType(NTTP->getType()),
                                           NTTP->getLocation());
-      
+
       if (NTTP->isParameterPack())
         E = new (Context) PackExpansionExpr(Context.DependentTy, E,
                                             NTTP->getLocation(), None);
       Arg = TemplateArgument(E);
     } else {
-      TemplateTemplateParmDecl *TTP = cast<TemplateTemplateParmDecl>(*Param);
+      auto *TTP = cast<TemplateTemplateParmDecl>(Param);
       if (TTP->isParameterPack())
         Arg = TemplateArgument(TemplateName(TTP), Optional<unsigned>());
       else
         Arg = TemplateArgument(TemplateName(TTP));
     }
-    
-    if ((*Param)->isTemplateParameterPack())
+
+    if (Param->isTemplateParameterPack())
       Arg = TemplateArgument::CreatePackCopy(Context, Arg);
 
     *Args++ = Arg;
   }
 }
-                                      
+
 //===----------------------------------------------------------------------===//
 // FunctionTemplateDecl Implementation
 //===----------------------------------------------------------------------===//
@@ -421,23 +413,17 @@ void ClassTemplateDecl::getPartialSpecializations(
     = getPartialSpecializations();
   PS.clear();
   PS.reserve(PartialSpecs.size());
-  for (llvm::FoldingSetVector<ClassTemplatePartialSpecializationDecl>::iterator
-       P = PartialSpecs.begin(), PEnd = PartialSpecs.end();
-       P != PEnd; ++P)
-    PS.push_back(P->getMostRecentDecl());
+  for (ClassTemplatePartialSpecializationDecl &P : PartialSpecs)
+    PS.push_back(P.getMostRecentDecl());
 }
 
 ClassTemplatePartialSpecializationDecl *
 ClassTemplateDecl::findPartialSpecialization(QualType T) {
   ASTContext &Context = getASTContext();
-  using llvm::FoldingSetVector;
-  typedef FoldingSetVector<ClassTemplatePartialSpecializationDecl>::iterator
-    partial_spec_iterator;
-  for (partial_spec_iterator P = getPartialSpecializations().begin(),
-                          PEnd = getPartialSpecializations().end();
-       P != PEnd; ++P) {
-    if (Context.hasSameType(P->getInjectedSpecializationType(), T))
-      return P->getMostRecentDecl();
+  for (ClassTemplatePartialSpecializationDecl &P :
+       getPartialSpecializations()) {
+    if (Context.hasSameType(P.getInjectedSpecializationType(), T))
+      return P.getMostRecentDecl();
   }
 
   return nullptr;
@@ -447,12 +433,9 @@ ClassTemplatePartialSpecializationDecl *
 ClassTemplateDecl::findPartialSpecInstantiatedFromMember(
                                     ClassTemplatePartialSpecializationDecl *D) {
   Decl *DCanon = D->getCanonicalDecl();
-  for (llvm::FoldingSetVector<ClassTemplatePartialSpecializationDecl>::iterator
-            P = getPartialSpecializations().begin(),
-         PEnd = getPartialSpecializations().end();
-       P != PEnd; ++P) {
-    if (P->getInstantiatedFromMember()->getCanonicalDecl() == DCanon)
-      return P->getMostRecentDecl();
+  for (ClassTemplatePartialSpecializationDecl &P : getPartialSpecializations()) {
+    if (P.getInstantiatedFromMember()->getCanonicalDecl() == DCanon)
+      return P.getMostRecentDecl();
   }
 
   return nullptr;
@@ -478,8 +461,7 @@ ClassTemplateDecl::getInjectedClassNameSpecialization() {
   GenerateInjectedTemplateArgs(getASTContext(), Params, TemplateArgs.data());
   CommonPtr->InjectedClassNameType
     = Context.getTemplateSpecializationType(TemplateName(this),
-                                            &TemplateArgs[0],
-                                            TemplateArgs.size());
+                                            TemplateArgs);
   return CommonPtr->InjectedClassNameType;
 }
 
@@ -535,20 +517,14 @@ bool TemplateTypeParmDecl::isParameterPack() const {
 // NonTypeTemplateParmDecl Method Implementations
 //===----------------------------------------------------------------------===//
 
-NonTypeTemplateParmDecl::NonTypeTemplateParmDecl(DeclContext *DC, 
-                                                 SourceLocation StartLoc,
-                                                 SourceLocation IdLoc,
-                                                 unsigned D, unsigned P,
-                                                 IdentifierInfo *Id, 
-                                                 QualType T, 
-                                                 TypeSourceInfo *TInfo,
-                                                 const QualType *ExpandedTypes,
-                                                 unsigned NumExpandedTypes,
-                                                TypeSourceInfo **ExpandedTInfos)
-  : DeclaratorDecl(NonTypeTemplateParm, DC, IdLoc, Id, T, TInfo, StartLoc),
-    TemplateParmPosition(D, P), ParameterPack(true),
-    ExpandedParameterPack(true), NumExpandedTypes(NumExpandedTypes) {
-  if (ExpandedTypes && ExpandedTInfos) {
+NonTypeTemplateParmDecl::NonTypeTemplateParmDecl(
+    DeclContext *DC, SourceLocation StartLoc, SourceLocation IdLoc, unsigned D,
+    unsigned P, IdentifierInfo *Id, QualType T, TypeSourceInfo *TInfo,
+    ArrayRef<QualType> ExpandedTypes, ArrayRef<TypeSourceInfo *> ExpandedTInfos)
+    : DeclaratorDecl(NonTypeTemplateParm, DC, IdLoc, Id, T, TInfo, StartLoc),
+      TemplateParmPosition(D, P), ParameterPack(true),
+      ExpandedParameterPack(true), NumExpandedTypes(ExpandedTypes.size()) {
+  if (!ExpandedTypes.empty() && !ExpandedTInfos.empty()) {
     auto TypesAndInfos =
         getTrailingObjects<std::pair<QualType, TypeSourceInfo *>>();
     for (unsigned I = 0; I != NumExpandedTypes; ++I) {
@@ -568,20 +544,16 @@ NonTypeTemplateParmDecl::Create(const ASTContext &C, DeclContext *DC,
                                              T, ParameterPack, TInfo);
 }
 
-NonTypeTemplateParmDecl *
-NonTypeTemplateParmDecl::Create(const ASTContext &C, DeclContext *DC, 
-                                SourceLocation StartLoc, SourceLocation IdLoc,
-                                unsigned D, unsigned P, 
-                                IdentifierInfo *Id, QualType T, 
-                                TypeSourceInfo *TInfo,
-                                const QualType *ExpandedTypes, 
-                                unsigned NumExpandedTypes,
-                                TypeSourceInfo **ExpandedTInfos) {
+NonTypeTemplateParmDecl *NonTypeTemplateParmDecl::Create(
+    const ASTContext &C, DeclContext *DC, SourceLocation StartLoc,
+    SourceLocation IdLoc, unsigned D, unsigned P, IdentifierInfo *Id,
+    QualType T, TypeSourceInfo *TInfo, ArrayRef<QualType> ExpandedTypes,
+    ArrayRef<TypeSourceInfo *> ExpandedTInfos) {
   return new (C, DC,
               additionalSizeToAlloc<std::pair<QualType, TypeSourceInfo *>>(
-                  NumExpandedTypes))
+                  ExpandedTypes.size()))
       NonTypeTemplateParmDecl(DC, StartLoc, IdLoc, D, P, Id, T, TInfo,
-                              ExpandedTypes, NumExpandedTypes, ExpandedTInfos);
+                              ExpandedTypes, ExpandedTInfos);
 }
 
 NonTypeTemplateParmDecl *
@@ -594,12 +566,14 @@ NonTypeTemplateParmDecl::CreateDeserialized(ASTContext &C, unsigned ID) {
 NonTypeTemplateParmDecl *
 NonTypeTemplateParmDecl::CreateDeserialized(ASTContext &C, unsigned ID,
                                             unsigned NumExpandedTypes) {
-  return new (C, ID,
-              additionalSizeToAlloc<std::pair<QualType, TypeSourceInfo *>>(
-                  NumExpandedTypes))
-      NonTypeTemplateParmDecl(nullptr, SourceLocation(), SourceLocation(), 0, 0,
-                              nullptr, QualType(), nullptr, nullptr,
-                              NumExpandedTypes, nullptr);
+  auto *NTTP =
+      new (C, ID, additionalSizeToAlloc<std::pair<QualType, TypeSourceInfo *>>(
+                      NumExpandedTypes))
+          NonTypeTemplateParmDecl(nullptr, SourceLocation(), SourceLocation(),
+                                  0, 0, nullptr, QualType(), nullptr, None,
+                                  None);
+  NTTP->NumExpandedTypes = NumExpandedTypes;
+  return NTTP;
 }
 
 SourceRange NonTypeTemplateParmDecl::getSourceRange() const {
@@ -624,12 +598,12 @@ void TemplateTemplateParmDecl::anchor() { }
 TemplateTemplateParmDecl::TemplateTemplateParmDecl(
     DeclContext *DC, SourceLocation L, unsigned D, unsigned P,
     IdentifierInfo *Id, TemplateParameterList *Params,
-    unsigned NumExpansions, TemplateParameterList * const *Expansions)
-  : TemplateDecl(TemplateTemplateParm, DC, L, Id, Params),
-    TemplateParmPosition(D, P), ParameterPack(true),
-    ExpandedParameterPack(true), NumExpandedParams(NumExpansions) {
-  if (Expansions)
-    std::uninitialized_copy(Expansions, Expansions + NumExpandedParams,
+    ArrayRef<TemplateParameterList *> Expansions)
+    : TemplateDecl(TemplateTemplateParm, DC, L, Id, Params),
+      TemplateParmPosition(D, P), ParameterPack(true),
+      ExpandedParameterPack(true), NumExpandedParams(Expansions.size()) {
+  if (!Expansions.empty())
+    std::uninitialized_copy(Expansions.begin(), Expansions.end(),
                             getTrailingObjects<TemplateParameterList *>());
 }
 
@@ -650,8 +624,7 @@ TemplateTemplateParmDecl::Create(const ASTContext &C, DeclContext *DC,
                                  ArrayRef<TemplateParameterList *> Expansions) {
   return new (C, DC,
               additionalSizeToAlloc<TemplateParameterList *>(Expansions.size()))
-      TemplateTemplateParmDecl(DC, L, D, P, Id, Params, Expansions.size(),
-                               Expansions.data());
+      TemplateTemplateParmDecl(DC, L, D, P, Id, Params, Expansions);
 }
 
 TemplateTemplateParmDecl *
@@ -663,10 +636,12 @@ TemplateTemplateParmDecl::CreateDeserialized(ASTContext &C, unsigned ID) {
 TemplateTemplateParmDecl *
 TemplateTemplateParmDecl::CreateDeserialized(ASTContext &C, unsigned ID,
                                              unsigned NumExpansions) {
-  return new (C, ID,
-              additionalSizeToAlloc<TemplateParameterList *>(NumExpansions))
-      TemplateTemplateParmDecl(nullptr, SourceLocation(), 0, 0, nullptr,
-                               nullptr, NumExpansions, nullptr);
+  auto *TTP =
+      new (C, ID, additionalSizeToAlloc<TemplateParameterList *>(NumExpansions))
+          TemplateTemplateParmDecl(nullptr, SourceLocation(), 0, 0, nullptr,
+                                   nullptr, None);
+  TTP->NumExpandedParams = NumExpansions;
+  return TTP;
 }
 
 SourceLocation TemplateTemplateParmDecl::getDefaultArgumentLoc() const {
@@ -685,19 +660,18 @@ void TemplateTemplateParmDecl::setDefaultArgument(
 //===----------------------------------------------------------------------===//
 // TemplateArgumentList Implementation
 //===----------------------------------------------------------------------===//
-TemplateArgumentList::TemplateArgumentList(const TemplateArgument *Args,
-                                           unsigned NumArgs)
-    : Arguments(getTrailingObjects<TemplateArgument>()), NumArguments(NumArgs) {
-  std::uninitialized_copy(Args, Args + NumArgs,
+TemplateArgumentList::TemplateArgumentList(ArrayRef<TemplateArgument> Args)
+    : Arguments(getTrailingObjects<TemplateArgument>()),
+      NumArguments(Args.size()) {
+  std::uninitialized_copy(Args.begin(), Args.end(),
                           getTrailingObjects<TemplateArgument>());
 }
 
 TemplateArgumentList *
 TemplateArgumentList::CreateCopy(ASTContext &Context,
-                                 const TemplateArgument *Args,
-                                 unsigned NumArgs) {
-  void *Mem = Context.Allocate(totalSizeToAlloc<TemplateArgument>(NumArgs));
-  return new (Mem) TemplateArgumentList(Args, NumArgs);
+                                 ArrayRef<TemplateArgument> Args) {
+  void *Mem = Context.Allocate(totalSizeToAlloc<TemplateArgument>(Args.size()));
+  return new (Mem) TemplateArgumentList(Args);
 }
 
 FunctionTemplateSpecializationInfo *
@@ -732,15 +706,14 @@ ClassTemplateSpecializationDecl(ASTContext &Context, Kind DK, TagKind TK,
                                 DeclContext *DC, SourceLocation StartLoc,
                                 SourceLocation IdLoc,
                                 ClassTemplateDecl *SpecializedTemplate,
-                                const TemplateArgument *Args,
-                                unsigned NumArgs,
+                                ArrayRef<TemplateArgument> Args,
                                 ClassTemplateSpecializationDecl *PrevDecl)
   : CXXRecordDecl(DK, TK, Context, DC, StartLoc, IdLoc,
                   SpecializedTemplate->getIdentifier(),
                   PrevDecl),
     SpecializedTemplate(SpecializedTemplate),
     ExplicitInfo(nullptr),
-    TemplateArgs(TemplateArgumentList::CreateCopy(Context, Args, NumArgs)),
+    TemplateArgs(TemplateArgumentList::CreateCopy(Context, Args)),
     SpecializationKind(TSK_Undeclared) {
 }
 
@@ -756,13 +729,12 @@ ClassTemplateSpecializationDecl::Create(ASTContext &Context, TagKind TK,
                                         SourceLocation StartLoc,
                                         SourceLocation IdLoc,
                                         ClassTemplateDecl *SpecializedTemplate,
-                                        const TemplateArgument *Args,
-                                        unsigned NumArgs,
+                                        ArrayRef<TemplateArgument> Args,
                                    ClassTemplateSpecializationDecl *PrevDecl) {
   ClassTemplateSpecializationDecl *Result =
       new (Context, DC) ClassTemplateSpecializationDecl(
           Context, ClassTemplateSpecialization, TK, DC, StartLoc, IdLoc,
-          SpecializedTemplate, Args, NumArgs, PrevDecl);
+          SpecializedTemplate, Args, PrevDecl);
   Result->MayHaveOutOfDateDef = false;
 
   Context.getTypeDeclType(Result, PrevDecl);
@@ -784,7 +756,7 @@ void ClassTemplateSpecializationDecl::getNameForDiagnostic(
 
   const TemplateArgumentList &TemplateArgs = getTemplateArgs();
   TemplateSpecializationType::PrintTemplateArgumentList(
-      OS, TemplateArgs.data(), TemplateArgs.size(), Policy);
+      OS, TemplateArgs.asArray(), Policy);
 }
 
 ClassTemplateDecl *
@@ -806,7 +778,7 @@ ClassTemplateSpecializationDecl::getSourceRange() const {
              getSpecializationKind() == TSK_ExplicitInstantiationDefinition);
       if (getExternLoc().isValid())
         Begin = getExternLoc();
-      SourceLocation End = getRBraceLoc();
+      SourceLocation End = getBraceRange().getEnd();
       if (End.isInvalid())
         End = getTypeAsWritten()->getTypeLoc().getEndLoc();
       return SourceRange(Begin, End);
@@ -846,15 +818,14 @@ ClassTemplatePartialSpecializationDecl(ASTContext &Context, TagKind TK,
                                        SourceLocation IdLoc,
                                        TemplateParameterList *Params,
                                        ClassTemplateDecl *SpecializedTemplate,
-                                       const TemplateArgument *Args,
-                                       unsigned NumArgs,
+                                       ArrayRef<TemplateArgument> Args,
                                const ASTTemplateArgumentListInfo *ArgInfos,
                                ClassTemplatePartialSpecializationDecl *PrevDecl)
   : ClassTemplateSpecializationDecl(Context,
                                     ClassTemplatePartialSpecialization,
                                     TK, DC, StartLoc, IdLoc,
                                     SpecializedTemplate,
-                                    Args, NumArgs, PrevDecl),
+                                    Args, PrevDecl),
     TemplateParams(Params), ArgsAsWritten(ArgInfos),
     InstantiatedFromMember(nullptr, false)
 {
@@ -867,8 +838,7 @@ Create(ASTContext &Context, TagKind TK,DeclContext *DC,
        SourceLocation StartLoc, SourceLocation IdLoc,
        TemplateParameterList *Params,
        ClassTemplateDecl *SpecializedTemplate,
-       const TemplateArgument *Args,
-       unsigned NumArgs,
+       ArrayRef<TemplateArgument> Args,
        const TemplateArgumentListInfo &ArgInfos,
        QualType CanonInjectedType,
        ClassTemplatePartialSpecializationDecl *PrevDecl) {
@@ -878,7 +848,7 @@ Create(ASTContext &Context, TagKind TK,DeclContext *DC,
   ClassTemplatePartialSpecializationDecl *Result = new (Context, DC)
       ClassTemplatePartialSpecializationDecl(Context, TK, DC, StartLoc, IdLoc,
                                              Params, SpecializedTemplate, Args,
-                                             NumArgs, ASTArgInfos, PrevDecl);
+                                             ASTArgInfos, PrevDecl);
   Result->setSpecializationKind(TSK_ExplicitSpecialization);
   Result->MayHaveOutOfDateDef = false;
 
@@ -901,15 +871,12 @@ ClassTemplatePartialSpecializationDecl::CreateDeserialized(ASTContext &C,
 
 void FriendTemplateDecl::anchor() { }
 
-FriendTemplateDecl *FriendTemplateDecl::Create(ASTContext &Context,
-                                               DeclContext *DC,
-                                               SourceLocation L,
-                                               unsigned NParams,
-                                               TemplateParameterList **Params,
-                                               FriendUnion Friend,
-                                               SourceLocation FLoc) {
-  return new (Context, DC) FriendTemplateDecl(DC, L, NParams, Params,
-                                              Friend, FLoc);
+FriendTemplateDecl *
+FriendTemplateDecl::Create(ASTContext &Context, DeclContext *DC,
+                           SourceLocation L,
+                           MutableArrayRef<TemplateParameterList *> Params,
+                           FriendUnion Friend, SourceLocation FLoc) {
+  return new (Context, DC) FriendTemplateDecl(DC, L, Params, Friend, FLoc);
 }
 
 FriendTemplateDecl *FriendTemplateDecl::CreateDeserialized(ASTContext &C,
@@ -1065,23 +1032,17 @@ void VarTemplateDecl::getPartialSpecializations(
       getPartialSpecializations();
   PS.clear();
   PS.reserve(PartialSpecs.size());
-  for (llvm::FoldingSetVector<VarTemplatePartialSpecializationDecl>::iterator
-           P = PartialSpecs.begin(),
-           PEnd = PartialSpecs.end();
-       P != PEnd; ++P)
-    PS.push_back(P->getMostRecentDecl());
+  for (VarTemplatePartialSpecializationDecl &P : PartialSpecs)
+    PS.push_back(P.getMostRecentDecl());
 }
 
 VarTemplatePartialSpecializationDecl *
 VarTemplateDecl::findPartialSpecInstantiatedFromMember(
     VarTemplatePartialSpecializationDecl *D) {
   Decl *DCanon = D->getCanonicalDecl();
-  for (llvm::FoldingSetVector<VarTemplatePartialSpecializationDecl>::iterator
-           P = getPartialSpecializations().begin(),
-           PEnd = getPartialSpecializations().end();
-       P != PEnd; ++P) {
-    if (P->getInstantiatedFromMember()->getCanonicalDecl() == DCanon)
-      return P->getMostRecentDecl();
+  for (VarTemplatePartialSpecializationDecl &P : getPartialSpecializations()) {
+    if (P.getInstantiatedFromMember()->getCanonicalDecl() == DCanon)
+      return P.getMostRecentDecl();
   }
 
   return nullptr;
@@ -1093,12 +1054,11 @@ VarTemplateDecl::findPartialSpecInstantiatedFromMember(
 VarTemplateSpecializationDecl::VarTemplateSpecializationDecl(
     Kind DK, ASTContext &Context, DeclContext *DC, SourceLocation StartLoc,
     SourceLocation IdLoc, VarTemplateDecl *SpecializedTemplate, QualType T,
-    TypeSourceInfo *TInfo, StorageClass S, const TemplateArgument *Args,
-    unsigned NumArgs)
+    TypeSourceInfo *TInfo, StorageClass S, ArrayRef<TemplateArgument> Args)
     : VarDecl(DK, Context, DC, StartLoc, IdLoc,
               SpecializedTemplate->getIdentifier(), T, TInfo, S),
       SpecializedTemplate(SpecializedTemplate), ExplicitInfo(nullptr),
-      TemplateArgs(TemplateArgumentList::CreateCopy(Context, Args, NumArgs)),
+      TemplateArgs(TemplateArgumentList::CreateCopy(Context, Args)),
       SpecializationKind(TSK_Undeclared) {}
 
 VarTemplateSpecializationDecl::VarTemplateSpecializationDecl(Kind DK,
@@ -1110,11 +1070,10 @@ VarTemplateSpecializationDecl::VarTemplateSpecializationDecl(Kind DK,
 VarTemplateSpecializationDecl *VarTemplateSpecializationDecl::Create(
     ASTContext &Context, DeclContext *DC, SourceLocation StartLoc,
     SourceLocation IdLoc, VarTemplateDecl *SpecializedTemplate, QualType T,
-    TypeSourceInfo *TInfo, StorageClass S, const TemplateArgument *Args,
-    unsigned NumArgs) {
+    TypeSourceInfo *TInfo, StorageClass S, ArrayRef<TemplateArgument> Args) {
   return new (Context, DC) VarTemplateSpecializationDecl(
       VarTemplateSpecialization, Context, DC, StartLoc, IdLoc,
-      SpecializedTemplate, T, TInfo, S, Args, NumArgs);
+      SpecializedTemplate, T, TInfo, S, Args);
 }
 
 VarTemplateSpecializationDecl *
@@ -1129,7 +1088,7 @@ void VarTemplateSpecializationDecl::getNameForDiagnostic(
 
   const TemplateArgumentList &TemplateArgs = getTemplateArgs();
   TemplateSpecializationType::PrintTemplateArgumentList(
-      OS, TemplateArgs.data(), TemplateArgs.size(), Policy);
+      OS, TemplateArgs.asArray(), Policy);
 }
 
 VarTemplateDecl *VarTemplateSpecializationDecl::getSpecializedTemplate() const {
@@ -1141,11 +1100,10 @@ VarTemplateDecl *VarTemplateSpecializationDecl::getSpecializedTemplate() const {
 
 void VarTemplateSpecializationDecl::setTemplateArgsInfo(
     const TemplateArgumentListInfo &ArgsInfo) {
-  unsigned N = ArgsInfo.size();
   TemplateArgsInfo.setLAngleLoc(ArgsInfo.getLAngleLoc());
   TemplateArgsInfo.setRAngleLoc(ArgsInfo.getRAngleLoc());
-  for (unsigned I = 0; I != N; ++I)
-    TemplateArgsInfo.addArgument(ArgsInfo[I]);
+  for (const TemplateArgumentLoc &Loc : ArgsInfo.arguments())
+    TemplateArgsInfo.addArgument(Loc);
 }
 
 //===----------------------------------------------------------------------===//
@@ -1157,11 +1115,11 @@ VarTemplatePartialSpecializationDecl::VarTemplatePartialSpecializationDecl(
     ASTContext &Context, DeclContext *DC, SourceLocation StartLoc,
     SourceLocation IdLoc, TemplateParameterList *Params,
     VarTemplateDecl *SpecializedTemplate, QualType T, TypeSourceInfo *TInfo,
-    StorageClass S, const TemplateArgument *Args, unsigned NumArgs,
+    StorageClass S, ArrayRef<TemplateArgument> Args,
     const ASTTemplateArgumentListInfo *ArgInfos)
     : VarTemplateSpecializationDecl(VarTemplatePartialSpecialization, Context,
                                     DC, StartLoc, IdLoc, SpecializedTemplate, T,
-                                    TInfo, S, Args, NumArgs),
+                                    TInfo, S, Args),
       TemplateParams(Params), ArgsAsWritten(ArgInfos),
       InstantiatedFromMember(nullptr, false) {
   // TODO: The template parameters should be in DC by now. Verify.
@@ -1173,7 +1131,7 @@ VarTemplatePartialSpecializationDecl::Create(
     ASTContext &Context, DeclContext *DC, SourceLocation StartLoc,
     SourceLocation IdLoc, TemplateParameterList *Params,
     VarTemplateDecl *SpecializedTemplate, QualType T, TypeSourceInfo *TInfo,
-    StorageClass S, const TemplateArgument *Args, unsigned NumArgs,
+    StorageClass S, ArrayRef<TemplateArgument> Args,
     const TemplateArgumentListInfo &ArgInfos) {
   const ASTTemplateArgumentListInfo *ASTArgInfos
     = ASTTemplateArgumentListInfo::Create(Context, ArgInfos);
@@ -1181,7 +1139,7 @@ VarTemplatePartialSpecializationDecl::Create(
   VarTemplatePartialSpecializationDecl *Result =
       new (Context, DC) VarTemplatePartialSpecializationDecl(
           Context, DC, StartLoc, IdLoc, Params, SpecializedTemplate, T, TInfo,
-          S, Args, NumArgs, ASTArgInfos);
+          S, Args, ASTArgInfos);
   Result->setSpecializationKind(TSK_ExplicitSpecialization);
   return Result;
 }
@@ -1239,11 +1197,34 @@ createMakeIntegerSeqParameterList(const ASTContext &C, DeclContext *DC) {
                                        Params, SourceLocation());
 }
 
+static TemplateParameterList *
+createTypePackElementParameterList(const ASTContext &C, DeclContext *DC) {
+  // std::size_t Index
+  TypeSourceInfo *TInfo = C.getTrivialTypeSourceInfo(C.getSizeType());
+  auto *Index = NonTypeTemplateParmDecl::Create(
+      C, DC, SourceLocation(), SourceLocation(), /*Depth=*/0, /*Position=*/0,
+      /*Id=*/nullptr, TInfo->getType(), /*ParameterPack=*/false, TInfo);
+
+  // typename ...T
+  auto *Ts = TemplateTypeParmDecl::Create(
+      C, DC, SourceLocation(), SourceLocation(), /*Depth=*/0, /*Position=*/1,
+      /*Id=*/nullptr, /*Typename=*/true, /*ParameterPack=*/true);
+  Ts->setImplicit(true);
+
+  // template <std::size_t Index, typename ...T>
+  NamedDecl *Params[] = {Index, Ts};
+  return TemplateParameterList::Create(C, SourceLocation(), SourceLocation(),
+                                       llvm::makeArrayRef(Params),
+                                       SourceLocation());
+}
+
 static TemplateParameterList *createBuiltinTemplateParameterList(
     const ASTContext &C, DeclContext *DC, BuiltinTemplateKind BTK) {
   switch (BTK) {
   case BTK__make_integer_seq:
     return createMakeIntegerSeqParameterList(C, DC);
+  case BTK__type_pack_element:
+    return createTypePackElementParameterList(C, DC);
   }
 
   llvm_unreachable("unhandled BuiltinTemplateKind!");
diff --git a/contrib/llvm/tools/clang/lib/AST/DeclarationName.cpp b/contrib/llvm/tools/clang/lib/AST/DeclarationName.cpp
index b2f27275f49c..2a988e1d22d0 100644
--- a/contrib/llvm/tools/clang/lib/AST/DeclarationName.cpp
+++ b/contrib/llvm/tools/clang/lib/AST/DeclarationName.cpp
@@ -12,7 +12,7 @@
 //
 //===----------------------------------------------------------------------===//
 #include "clang/AST/ASTContext.h"
-#include "clang/AST/Decl.h"
+#include "clang/AST/DeclCXX.h"
 #include "clang/AST/DeclarationName.h"
 #include "clang/AST/Type.h"
 #include "clang/AST/TypeLoc.h"
@@ -133,36 +133,45 @@ int DeclarationName::compare(DeclarationName LHS, DeclarationName RHS) {
   llvm_unreachable("Invalid DeclarationName Kind!");
 }
 
-raw_ostream &operator<<(raw_ostream &OS, DeclarationName N) {
+static void printCXXConstructorDestructorName(QualType ClassType,
+                                              raw_ostream &OS,
+                                              PrintingPolicy Policy) {
+  // We know we're printing C++ here. Ensure we print types properly.
+  Policy.adjustForCPlusPlus();
+
+  if (const RecordType *ClassRec = ClassType->getAs<RecordType>()) {
+    OS << *ClassRec->getDecl();
+    return;
+  }
+  if (Policy.SuppressTemplateArgsInCXXConstructors) {
+    if (auto *InjTy = ClassType->getAs<InjectedClassNameType>()) {
+      OS << *InjTy->getDecl();
+      return;
+    }
+  }
+  ClassType.print(OS, Policy);
+}
+
+void DeclarationName::print(raw_ostream &OS, const PrintingPolicy &Policy) {
+  DeclarationName &N = *this;
   switch (N.getNameKind()) {
   case DeclarationName::Identifier:
     if (const IdentifierInfo *II = N.getAsIdentifierInfo())
       OS << II->getName();
-    return OS;
+    return;
 
   case DeclarationName::ObjCZeroArgSelector:
   case DeclarationName::ObjCOneArgSelector:
   case DeclarationName::ObjCMultiArgSelector:
     N.getObjCSelector().print(OS);
-    return OS;
-
-  case DeclarationName::CXXConstructorName: {
-    QualType ClassType = N.getCXXNameType();
-    if (const RecordType *ClassRec = ClassType->getAs<RecordType>())
-      return OS << *ClassRec->getDecl();
-    LangOptions LO;
-    LO.CPlusPlus = true;
-    return OS << ClassType.getAsString(PrintingPolicy(LO));
-  }
+    return;
+
+  case DeclarationName::CXXConstructorName:
+    return printCXXConstructorDestructorName(N.getCXXNameType(), OS, Policy);
 
   case DeclarationName::CXXDestructorName: {
     OS << '~';
-    QualType Type = N.getCXXNameType();
-    if (const RecordType *Rec = Type->getAs<RecordType>())
-      return OS << *Rec->getDecl();
-    LangOptions LO;
-    LO.CPlusPlus = true;
-    return OS << Type.getAsString(PrintingPolicy(LO));
+    return printCXXConstructorDestructorName(N.getCXXNameType(), OS, Policy);
   }
 
   case DeclarationName::CXXOperatorName: {
@@ -178,29 +187,41 @@ raw_ostream &operator<<(raw_ostream &OS, DeclarationName N) {
     OS << "operator";
     if (OpName[0] >= 'a' && OpName[0] <= 'z')
       OS << ' ';
-    return OS << OpName;
+    OS << OpName;
+    return;
   }
 
   case DeclarationName::CXXLiteralOperatorName:
-    return OS << "operator\"\"" << N.getCXXLiteralIdentifier()->getName();
+    OS << "operator\"\"" << N.getCXXLiteralIdentifier()->getName();
+    return;
 
   case DeclarationName::CXXConversionFunctionName: {
     OS << "operator ";
     QualType Type = N.getCXXNameType();
-    if (const RecordType *Rec = Type->getAs<RecordType>())
-      return OS << *Rec->getDecl();
-    LangOptions LO;
-    LO.CPlusPlus = true;
-    LO.Bool = true;
-    return OS << Type.getAsString(PrintingPolicy(LO));
+    if (const RecordType *Rec = Type->getAs<RecordType>()) {
+      OS << *Rec->getDecl();
+      return;
+    }
+    // We know we're printing C++ here, ensure we print 'bool' properly.
+    PrintingPolicy CXXPolicy = Policy;
+    CXXPolicy.adjustForCPlusPlus();
+    Type.print(OS, CXXPolicy);
+    return;
   }
   case DeclarationName::CXXUsingDirective:
-    return OS << "<using-directive>";
+    OS << "<using-directive>";
+    return;
   }
 
   llvm_unreachable("Unexpected declaration name kind");
 }
 
+raw_ostream &operator<<(raw_ostream &OS, DeclarationName N) {
+  LangOptions LO;
+  N.print(OS, PrintingPolicy(LO));
+  return OS;
+}
+
 } // end namespace clang
 
 DeclarationName::NameKind DeclarationName::getNameKind() const {
@@ -333,7 +354,7 @@ DeclarationName DeclarationName::getUsingDirectiveName() {
   return DeclarationName(Ptr);
 }
 
-void DeclarationName::dump() const {
+LLVM_DUMP_METHOD void DeclarationName::dump() const {
   llvm::errs() << *this << '\n';
 }
 
diff --git a/contrib/llvm/tools/clang/lib/AST/Expr.cpp b/contrib/llvm/tools/clang/lib/AST/Expr.cpp
index 52f34df43565..091e8787d8b6 100644
--- a/contrib/llvm/tools/clang/lib/AST/Expr.cpp
+++ b/contrib/llvm/tools/clang/lib/AST/Expr.cpp
@@ -985,7 +985,7 @@ void StringLiteral::setString(const ASTContext &C, StringRef Str,
       break;
     }
     default:
-      assert(false && "unsupported CharByteWidth");
+      llvm_unreachable("unsupported CharByteWidth");
   }
 }
 
@@ -1084,20 +1084,8 @@ StringLiteral::getLocationOfByte(unsigned ByteNo, const SourceManager &SM,
 /// corresponds to, e.g. "sizeof" or "[pre]++".
 StringRef UnaryOperator::getOpcodeStr(Opcode Op) {
   switch (Op) {
-  case UO_PostInc: return "++";
-  case UO_PostDec: return "--";
-  case UO_PreInc:  return "++";
-  case UO_PreDec:  return "--";
-  case UO_AddrOf:  return "&";
-  case UO_Deref:   return "*";
-  case UO_Plus:    return "+";
-  case UO_Minus:   return "-";
-  case UO_Not:     return "~";
-  case UO_LNot:    return "!";
-  case UO_Real:    return "__real";
-  case UO_Imag:    return "__imag";
-  case UO_Extension: return "__extension__";
-  case UO_Coawait: return "co_await";
+#define UNARY_OPERATION(Name, Spelling) case UO_##Name: return Spelling;
+#include "clang/AST/OperationKinds.def"
   }
   llvm_unreachable("Unknown unary operator");
 }
@@ -1138,28 +1126,23 @@ OverloadedOperatorKind UnaryOperator::getOverloadedOperator(Opcode Opc) {
 // Postfix Operators.
 //===----------------------------------------------------------------------===//
 
-CallExpr::CallExpr(const ASTContext& C, StmtClass SC, Expr *fn,
-                   unsigned NumPreArgs, ArrayRef<Expr*> args, QualType t,
+CallExpr::CallExpr(const ASTContext &C, StmtClass SC, Expr *fn,
+                   ArrayRef<Expr *> preargs, ArrayRef<Expr *> args, QualType t,
                    ExprValueKind VK, SourceLocation rparenloc)
-  : Expr(SC, t, VK, OK_Ordinary,
-         fn->isTypeDependent(),
-         fn->isValueDependent(),
-         fn->isInstantiationDependent(),
-         fn->containsUnexpandedParameterPack()),
-    NumArgs(args.size()) {
-
-  SubExprs = new (C) Stmt*[args.size()+PREARGS_START+NumPreArgs];
+    : Expr(SC, t, VK, OK_Ordinary, fn->isTypeDependent(),
+           fn->isValueDependent(), fn->isInstantiationDependent(),
+           fn->containsUnexpandedParameterPack()),
+      NumArgs(args.size()) {
+
+  unsigned NumPreArgs = preargs.size();
+  SubExprs = new (C) Stmt *[args.size()+PREARGS_START+NumPreArgs];
   SubExprs[FN] = fn;
+  for (unsigned i = 0; i != NumPreArgs; ++i) {
+    updateDependenciesFromArg(preargs[i]);
+    SubExprs[i+PREARGS_START] = preargs[i];
+  }
   for (unsigned i = 0; i != args.size(); ++i) {
-    if (args[i]->isTypeDependent())
-      ExprBits.TypeDependent = true;
-    if (args[i]->isValueDependent())
-      ExprBits.ValueDependent = true;
-    if (args[i]->isInstantiationDependent())
-      ExprBits.InstantiationDependent = true;
-    if (args[i]->containsUnexpandedParameterPack())
-      ExprBits.ContainsUnexpandedParameterPack = true;
-
+    updateDependenciesFromArg(args[i]);
     SubExprs[i+PREARGS_START+NumPreArgs] = args[i];
   }
 
@@ -1167,9 +1150,14 @@ CallExpr::CallExpr(const ASTContext& C, StmtClass SC, Expr *fn,
   RParenLoc = rparenloc;
 }
 
+CallExpr::CallExpr(const ASTContext &C, StmtClass SC, Expr *fn,
+                   ArrayRef<Expr *> args, QualType t, ExprValueKind VK,
+                   SourceLocation rparenloc)
+    : CallExpr(C, SC, fn, ArrayRef<Expr *>(), args, t, VK, rparenloc) {}
+
 CallExpr::CallExpr(const ASTContext &C, Expr *fn, ArrayRef<Expr *> args,
                    QualType t, ExprValueKind VK, SourceLocation rparenloc)
-    : CallExpr(C, CallExprClass, fn, /*NumPreArgs=*/0, args, t, VK, rparenloc) {
+    : CallExpr(C, CallExprClass, fn, ArrayRef<Expr *>(), args, t, VK, rparenloc) {
 }
 
 CallExpr::CallExpr(const ASTContext &C, StmtClass SC, EmptyShell Empty)
@@ -1179,10 +1167,21 @@ CallExpr::CallExpr(const ASTContext &C, StmtClass SC, unsigned NumPreArgs,
                    EmptyShell Empty)
   : Expr(SC, Empty), SubExprs(nullptr), NumArgs(0) {
   // FIXME: Why do we allocate this?
-  SubExprs = new (C) Stmt*[PREARGS_START+NumPreArgs];
+  SubExprs = new (C) Stmt*[PREARGS_START+NumPreArgs]();
   CallExprBits.NumPreArgs = NumPreArgs;
 }
 
+void CallExpr::updateDependenciesFromArg(Expr *Arg) {
+  if (Arg->isTypeDependent())
+    ExprBits.TypeDependent = true;
+  if (Arg->isValueDependent())
+    ExprBits.ValueDependent = true;
+  if (Arg->isInstantiationDependent())
+    ExprBits.InstantiationDependent = true;
+  if (Arg->containsUnexpandedParameterPack())
+    ExprBits.ContainsUnexpandedParameterPack = true;
+}
+
 Decl *CallExpr::getCalleeDecl() {
   Expr *CEE = getCallee()->IgnoreParenImpCasts();
     
@@ -1597,120 +1596,9 @@ bool CastExpr::CastConsistency() const {
 
 const char *CastExpr::getCastKindName() const {
   switch (getCastKind()) {
-  case CK_Dependent:
-    return "Dependent";
-  case CK_BitCast:
-    return "BitCast";
-  case CK_LValueBitCast:
-    return "LValueBitCast";
-  case CK_LValueToRValue:
-    return "LValueToRValue";
-  case CK_NoOp:
-    return "NoOp";
-  case CK_BaseToDerived:
-    return "BaseToDerived";
-  case CK_DerivedToBase:
-    return "DerivedToBase";
-  case CK_UncheckedDerivedToBase:
-    return "UncheckedDerivedToBase";
-  case CK_Dynamic:
-    return "Dynamic";
-  case CK_ToUnion:
-    return "ToUnion";
-  case CK_ArrayToPointerDecay:
-    return "ArrayToPointerDecay";
-  case CK_FunctionToPointerDecay:
-    return "FunctionToPointerDecay";
-  case CK_NullToMemberPointer:
-    return "NullToMemberPointer";
-  case CK_NullToPointer:
-    return "NullToPointer";
-  case CK_BaseToDerivedMemberPointer:
-    return "BaseToDerivedMemberPointer";
-  case CK_DerivedToBaseMemberPointer:
-    return "DerivedToBaseMemberPointer";
-  case CK_ReinterpretMemberPointer:
-    return "ReinterpretMemberPointer";
-  case CK_UserDefinedConversion:
-    return "UserDefinedConversion";
-  case CK_ConstructorConversion:
-    return "ConstructorConversion";
-  case CK_IntegralToPointer:
-    return "IntegralToPointer";
-  case CK_PointerToIntegral:
-    return "PointerToIntegral";
-  case CK_PointerToBoolean:
-    return "PointerToBoolean";
-  case CK_ToVoid:
-    return "ToVoid";
-  case CK_VectorSplat:
-    return "VectorSplat";
-  case CK_IntegralCast:
-    return "IntegralCast";
-  case CK_BooleanToSignedIntegral:
-    return "BooleanToSignedIntegral";
-  case CK_IntegralToBoolean:
-    return "IntegralToBoolean";
-  case CK_IntegralToFloating:
-    return "IntegralToFloating";
-  case CK_FloatingToIntegral:
-    return "FloatingToIntegral";
-  case CK_FloatingCast:
-    return "FloatingCast";
-  case CK_FloatingToBoolean:
-    return "FloatingToBoolean";
-  case CK_MemberPointerToBoolean:
-    return "MemberPointerToBoolean";
-  case CK_CPointerToObjCPointerCast:
-    return "CPointerToObjCPointerCast";
-  case CK_BlockPointerToObjCPointerCast:
-    return "BlockPointerToObjCPointerCast";
-  case CK_AnyPointerToBlockPointerCast:
-    return "AnyPointerToBlockPointerCast";
-  case CK_ObjCObjectLValueCast:
-    return "ObjCObjectLValueCast";
-  case CK_FloatingRealToComplex:
-    return "FloatingRealToComplex";
-  case CK_FloatingComplexToReal:
-    return "FloatingComplexToReal";
-  case CK_FloatingComplexToBoolean:
-    return "FloatingComplexToBoolean";
-  case CK_FloatingComplexCast:
-    return "FloatingComplexCast";
-  case CK_FloatingComplexToIntegralComplex:
-    return "FloatingComplexToIntegralComplex";
-  case CK_IntegralRealToComplex:
-    return "IntegralRealToComplex";
-  case CK_IntegralComplexToReal:
-    return "IntegralComplexToReal";
-  case CK_IntegralComplexToBoolean:
-    return "IntegralComplexToBoolean";
-  case CK_IntegralComplexCast:
-    return "IntegralComplexCast";
-  case CK_IntegralComplexToFloatingComplex:
-    return "IntegralComplexToFloatingComplex";
-  case CK_ARCConsumeObject:
-    return "ARCConsumeObject";
-  case CK_ARCProduceObject:
-    return "ARCProduceObject";
-  case CK_ARCReclaimReturnedObject:
-    return "ARCReclaimReturnedObject";
-  case CK_ARCExtendBlockObject:
-    return "ARCExtendBlockObject";
-  case CK_AtomicToNonAtomic:
-    return "AtomicToNonAtomic";
-  case CK_NonAtomicToAtomic:
-    return "NonAtomicToAtomic";
-  case CK_CopyAndAutoreleaseBlockObject:
-    return "CopyAndAutoreleaseBlockObject";
-  case CK_BuiltinFnToFnPtr:
-    return "BuiltinFnToFnPtr";
-  case CK_ZeroToOCLEvent:
-    return "ZeroToOCLEvent";
-  case CK_AddressSpaceConversion:
-    return "AddressSpaceConversion";
+#define CAST_OPERATION(Name) case CK_##Name: return #Name;
+#include "clang/AST/OperationKinds.def"
   }
-
   llvm_unreachable("Unhandled cast kind!");
 }
 
@@ -1733,8 +1621,13 @@ Expr *CastExpr::getSubExprAsWritten() {
     // subexpression describing the call; strip it off.
     if (E->getCastKind() == CK_ConstructorConversion)
       SubExpr = cast<CXXConstructExpr>(SubExpr)->getArg(0);
-    else if (E->getCastKind() == CK_UserDefinedConversion)
-      SubExpr = cast<CXXMemberCallExpr>(SubExpr)->getImplicitObjectArgument();
+    else if (E->getCastKind() == CK_UserDefinedConversion) {
+      assert((isa<CXXMemberCallExpr>(SubExpr) ||
+              isa<BlockExpr>(SubExpr)) &&
+             "Unexpected SubExpr for CK_UserDefinedConversion.");
+      if (isa<CXXMemberCallExpr>(SubExpr))
+        SubExpr = cast<CXXMemberCallExpr>(SubExpr)->getImplicitObjectArgument();
+    }
     
     // If the subexpression we're left with is an implicit cast, look
     // through that, too.
@@ -1802,40 +1695,9 @@ CStyleCastExpr *CStyleCastExpr::CreateEmpty(const ASTContext &C,
 /// corresponds to, e.g. "<<=".
 StringRef BinaryOperator::getOpcodeStr(Opcode Op) {
   switch (Op) {
-  case BO_PtrMemD:   return ".*";
-  case BO_PtrMemI:   return "->*";
-  case BO_Mul:       return "*";
-  case BO_Div:       return "/";
-  case BO_Rem:       return "%";
-  case BO_Add:       return "+";
-  case BO_Sub:       return "-";
-  case BO_Shl:       return "<<";
-  case BO_Shr:       return ">>";
-  case BO_LT:        return "<";
-  case BO_GT:        return ">";
-  case BO_LE:        return "<=";
-  case BO_GE:        return ">=";
-  case BO_EQ:        return "==";
-  case BO_NE:        return "!=";
-  case BO_And:       return "&";
-  case BO_Xor:       return "^";
-  case BO_Or:        return "|";
-  case BO_LAnd:      return "&&";
-  case BO_LOr:       return "||";
-  case BO_Assign:    return "=";
-  case BO_MulAssign: return "*=";
-  case BO_DivAssign: return "/=";
-  case BO_RemAssign: return "%=";
-  case BO_AddAssign: return "+=";
-  case BO_SubAssign: return "-=";
-  case BO_ShlAssign: return "<<=";
-  case BO_ShrAssign: return ">>=";
-  case BO_AndAssign: return "&=";
-  case BO_XorAssign: return "^=";
-  case BO_OrAssign:  return "|=";
-  case BO_Comma:     return ",";
+#define BINARY_OPERATION(Name, Spelling) case BO_##Name: return Spelling;
+#include "clang/AST/OperationKinds.def"
   }
-
   llvm_unreachable("Invalid OpCode!");
 }
 
@@ -2994,6 +2856,7 @@ bool Expr::HasSideEffects(const ASTContext &Ctx,
   case ObjCStringLiteralClass:
   case ObjCEncodeExprClass:
   case ObjCBoolLiteralExprClass:
+  case ObjCAvailabilityCheckExprClass:
   case CXXUuidofExprClass:
   case OpaqueValueExprClass:
     // These never have a side-effect.
@@ -3028,7 +2891,6 @@ bool Expr::HasSideEffects(const ASTContext &Ctx,
   case CXXThrowExprClass:
   case CXXNewExprClass:
   case CXXDeleteExprClass:
-  case ExprWithCleanupsClass:
   case CoawaitExprClass:
   case CoyieldExprClass:
     // These always have a side-effect.
@@ -3041,6 +2903,12 @@ bool Expr::HasSideEffects(const ASTContext &Ctx,
     return Finder.hasSideEffects();
   }
 
+  case ExprWithCleanupsClass:
+    if (IncludePossibleEffects)
+      if (cast<ExprWithCleanups>(this)->cleanupsHaveSideEffects())
+        return true;
+    break;
+
   case ParenExprClass:
   case ArraySubscriptExprClass:
   case OMPArraySectionExprClass:
@@ -3141,6 +3009,13 @@ bool Expr::HasSideEffects(const ASTContext &Ctx,
     break;
   }
 
+  case CXXInheritedCtorInitExprClass: {
+    const auto *ICIE = cast<CXXInheritedCtorInitExpr>(this);
+    if (!ICIE->getConstructor()->isTrivial() && IncludePossibleEffects)
+      return true;
+    break;
+  }
+
   case LambdaExprClass: {
     const LambdaExpr *LE = cast<LambdaExpr>(this);
     for (LambdaExpr::capture_iterator I = LE->capture_begin(),
@@ -3643,8 +3518,7 @@ IdentifierInfo *DesignatedInitExpr::Designator::getFieldName() const {
 }
 
 DesignatedInitExpr::DesignatedInitExpr(const ASTContext &C, QualType Ty,
-                                       unsigned NumDesignators,
-                                       const Designator *Designators,
+                                       llvm::ArrayRef<Designator> Designators,
                                        SourceLocation EqualOrColonLoc,
                                        bool GNUSyntax,
                                        ArrayRef<Expr*> IndexExprs,
@@ -3655,7 +3529,7 @@ DesignatedInitExpr::DesignatedInitExpr(const ASTContext &C, QualType Ty,
          Init->isInstantiationDependent(),
          Init->containsUnexpandedParameterPack()),
     EqualOrColonLoc(EqualOrColonLoc), GNUSyntax(GNUSyntax),
-    NumDesignators(NumDesignators), NumSubExprs(IndexExprs.size() + 1) {
+    NumDesignators(Designators.size()), NumSubExprs(IndexExprs.size() + 1) {
   this->Designators = new (C) Designator[NumDesignators];
 
   // Record the initializer itself.
@@ -3709,14 +3583,14 @@ DesignatedInitExpr::DesignatedInitExpr(const ASTContext &C, QualType Ty,
 }
 
 DesignatedInitExpr *
-DesignatedInitExpr::Create(const ASTContext &C, Designator *Designators,
-                           unsigned NumDesignators,
+DesignatedInitExpr::Create(const ASTContext &C,
+                           llvm::ArrayRef<Designator> Designators,
                            ArrayRef<Expr*> IndexExprs,
                            SourceLocation ColonOrEqualLoc,
                            bool UsesColonSyntax, Expr *Init) {
   void *Mem = C.Allocate(totalSizeToAlloc<Stmt *>(IndexExprs.size() + 1),
                          llvm::alignOf<DesignatedInitExpr>());
-  return new (Mem) DesignatedInitExpr(C, C.VoidTy, NumDesignators, Designators,
+  return new (Mem) DesignatedInitExpr(C, C.VoidTy, Designators,
                                       ColonOrEqualLoc, UsesColonSyntax,
                                       IndexExprs, Init);
 }
@@ -3747,8 +3621,8 @@ SourceRange DesignatedInitExpr::getDesignatorsSourceRange() const {
 
 SourceLocation DesignatedInitExpr::getLocStart() const {
   SourceLocation StartLoc;
-  Designator &First =
-    *const_cast<DesignatedInitExpr*>(this)->designators_begin();
+  auto *DIE = const_cast<DesignatedInitExpr *>(this);
+  Designator &First = *DIE->getDesignator(0);
   if (First.isFieldDesignator()) {
     if (GNUSyntax)
       StartLoc = SourceLocation::getFromRawEncoding(First.Field.FieldLoc);
@@ -4010,16 +3884,18 @@ unsigned AtomicExpr::getNumSubExprs(AtomicOp Op) {
   llvm_unreachable("unknown atomic op");
 }
 
-QualType OMPArraySectionExpr::getBaseOriginalType(Expr *Base) {
+QualType OMPArraySectionExpr::getBaseOriginalType(const Expr *Base) {
   unsigned ArraySectionCount = 0;
   while (auto *OASE = dyn_cast<OMPArraySectionExpr>(Base->IgnoreParens())) {
     Base = OASE->getBase();
     ++ArraySectionCount;
   }
-  while (auto *ASE = dyn_cast<ArraySubscriptExpr>(Base->IgnoreParens())) {
+  while (auto *ASE =
+             dyn_cast<ArraySubscriptExpr>(Base->IgnoreParenImpCasts())) {
     Base = ASE->getBase();
     ++ArraySectionCount;
   }
+  Base = Base->IgnoreParenImpCasts();
   auto OriginalTy = Base->getType();
   if (auto *DRE = dyn_cast<DeclRefExpr>(Base))
     if (auto *PVD = dyn_cast<ParmVarDecl>(DRE->getDecl()))
diff --git a/contrib/llvm/tools/clang/lib/AST/ExprCXX.cpp b/contrib/llvm/tools/clang/lib/AST/ExprCXX.cpp
index ea983340a293..a13033d47467 100644
--- a/contrib/llvm/tools/clang/lib/AST/ExprCXX.cpp
+++ b/contrib/llvm/tools/clang/lib/AST/ExprCXX.cpp
@@ -54,79 +54,6 @@ QualType CXXUuidofExpr::getTypeOperand(ASTContext &Context) const {
       Operand.get<TypeSourceInfo *>()->getType().getNonReferenceType(), Quals);
 }
 
-// static
-const UuidAttr *CXXUuidofExpr::GetUuidAttrOfType(QualType QT,
-                                                 bool *RDHasMultipleGUIDsPtr) {
-  // Optionally remove one level of pointer, reference or array indirection.
-  const Type *Ty = QT.getTypePtr();
-  if (QT->isPointerType() || QT->isReferenceType())
-    Ty = QT->getPointeeType().getTypePtr();
-  else if (QT->isArrayType())
-    Ty = Ty->getBaseElementTypeUnsafe();
-
-  const CXXRecordDecl *RD = Ty->getAsCXXRecordDecl();
-  if (!RD)
-    return nullptr;
-
-  if (const UuidAttr *Uuid = RD->getMostRecentDecl()->getAttr<UuidAttr>())
-    return Uuid;
-
-  // __uuidof can grab UUIDs from template arguments.
-  if (const ClassTemplateSpecializationDecl *CTSD =
-          dyn_cast<ClassTemplateSpecializationDecl>(RD)) {
-    const TemplateArgumentList &TAL = CTSD->getTemplateArgs();
-    const UuidAttr *UuidForRD = nullptr;
-
-    for (const TemplateArgument &TA : TAL.asArray()) {
-      bool SeenMultipleGUIDs = false;
-
-      const UuidAttr *UuidForTA = nullptr;
-      if (TA.getKind() == TemplateArgument::Type)
-        UuidForTA = GetUuidAttrOfType(TA.getAsType(), &SeenMultipleGUIDs);
-      else if (TA.getKind() == TemplateArgument::Declaration)
-        UuidForTA =
-            GetUuidAttrOfType(TA.getAsDecl()->getType(), &SeenMultipleGUIDs);
-
-      // If the template argument has a UUID, there are three cases:
-      //  - This is the first UUID seen for this RecordDecl.
-      //  - This is a different UUID than previously seen for this RecordDecl.
-      //  - This is the same UUID than previously seen for this RecordDecl.
-      if (UuidForTA) {
-        if (!UuidForRD)
-          UuidForRD = UuidForTA;
-        else if (UuidForRD != UuidForTA)
-          SeenMultipleGUIDs = true;
-      }
-
-      // Seeing multiple UUIDs means that we couldn't find a UUID
-      if (SeenMultipleGUIDs) {
-        if (RDHasMultipleGUIDsPtr)
-          *RDHasMultipleGUIDsPtr = true;
-        return nullptr;
-      }
-    }
-
-    return UuidForRD;
-  }
-
-  return nullptr;
-}
-
-StringRef CXXUuidofExpr::getUuidAsStringRef(ASTContext &Context) const {
-  StringRef Uuid;
-  if (isTypeOperand())
-    Uuid = CXXUuidofExpr::GetUuidAttrOfType(getTypeOperand(Context))->getGuid();
-  else {
-    // Special case: __uuidof(0) means an all-zero GUID.
-    Expr *Op = getExprOperand();
-    if (!Op->isNullPointerConstant(Context, Expr::NPC_ValueDependentIsNull))
-      Uuid = CXXUuidofExpr::GetUuidAttrOfType(Op->getType())->getGuid();
-    else
-      Uuid = "00000000-0000-0000-0000-000000000000";
-  }
-  return Uuid;
-}
-
 // CXXScalarValueInitExpr
 SourceLocation CXXScalarValueInitExpr::getLocStart() const {
   return TypeInfo ? TypeInfo->getTypeLoc().getBeginLoc() : RParenLoc;
@@ -823,7 +750,8 @@ SourceLocation CXXTemporaryObjectExpr::getLocEnd() const {
 
 CXXConstructExpr *CXXConstructExpr::Create(const ASTContext &C, QualType T,
                                            SourceLocation Loc,
-                                           CXXConstructorDecl *D, bool Elidable,
+                                           CXXConstructorDecl *Ctor,
+                                           bool Elidable,
                                            ArrayRef<Expr*> Args,
                                            bool HadMultipleCandidates,
                                            bool ListInitialization,
@@ -831,8 +759,8 @@ CXXConstructExpr *CXXConstructExpr::Create(const ASTContext &C, QualType T,
                                            bool ZeroInitialization,
                                            ConstructionKind ConstructKind,
                                            SourceRange ParenOrBraceRange) {
-  return new (C) CXXConstructExpr(C, CXXConstructExprClass, T, Loc, D, 
-                                  Elidable, Args,
+  return new (C) CXXConstructExpr(C, CXXConstructExprClass, T, Loc,
+                                  Ctor, Elidable, Args,
                                   HadMultipleCandidates, ListInitialization,
                                   StdInitListInitialization,
                                   ZeroInitialization, ConstructKind,
@@ -841,8 +769,9 @@ CXXConstructExpr *CXXConstructExpr::Create(const ASTContext &C, QualType T,
 
 CXXConstructExpr::CXXConstructExpr(const ASTContext &C, StmtClass SC,
                                    QualType T, SourceLocation Loc,
-                                   CXXConstructorDecl *D, bool elidable,
-                                   ArrayRef<Expr*> args,
+                                   CXXConstructorDecl *Ctor,
+                                   bool Elidable,
+                                   ArrayRef<Expr*> Args,
                                    bool HadMultipleCandidates,
                                    bool ListInitialization,
                                    bool StdInitListInitialization,
@@ -853,28 +782,28 @@ CXXConstructExpr::CXXConstructExpr(const ASTContext &C, StmtClass SC,
          T->isDependentType(), T->isDependentType(),
          T->isInstantiationDependentType(),
          T->containsUnexpandedParameterPack()),
-    Constructor(D), Loc(Loc), ParenOrBraceRange(ParenOrBraceRange),
-    NumArgs(args.size()),
-    Elidable(elidable), HadMultipleCandidates(HadMultipleCandidates),
+    Constructor(Ctor), Loc(Loc), ParenOrBraceRange(ParenOrBraceRange),
+    NumArgs(Args.size()),
+    Elidable(Elidable), HadMultipleCandidates(HadMultipleCandidates),
     ListInitialization(ListInitialization),
     StdInitListInitialization(StdInitListInitialization),
     ZeroInitialization(ZeroInitialization),
     ConstructKind(ConstructKind), Args(nullptr)
 {
   if (NumArgs) {
-    Args = new (C) Stmt*[args.size()];
+    this->Args = new (C) Stmt*[Args.size()];
     
-    for (unsigned i = 0; i != args.size(); ++i) {
-      assert(args[i] && "NULL argument in CXXConstructExpr");
+    for (unsigned i = 0; i != Args.size(); ++i) {
+      assert(Args[i] && "NULL argument in CXXConstructExpr");
 
-      if (args[i]->isValueDependent())
+      if (Args[i]->isValueDependent())
         ExprBits.ValueDependent = true;
-      if (args[i]->isInstantiationDependent())
+      if (Args[i]->isInstantiationDependent())
         ExprBits.InstantiationDependent = true;
-      if (args[i]->containsUnexpandedParameterPack())
+      if (Args[i]->containsUnexpandedParameterPack())
         ExprBits.ContainsUnexpandedParameterPack = true;
   
-      Args[i] = args[i];
+      this->Args[i] = Args[i];
     }
   }
 }
@@ -889,8 +818,12 @@ LambdaCapture::LambdaCapture(SourceLocation Loc, bool Implicit,
     Bits |= Capture_Implicit;
   
   switch (Kind) {
+  case LCK_StarThis:
+    Bits |= Capture_ByCopy;
+    // Fall through
   case LCK_This:
     assert(!Var && "'this' capture cannot have a variable!");
+    Bits |= Capture_This;
     break;
 
   case LCK_ByCopy:
@@ -901,18 +834,17 @@ LambdaCapture::LambdaCapture(SourceLocation Loc, bool Implicit,
     break;
   case LCK_VLAType:
     assert(!Var && "VLA type capture cannot have a variable!");
-    Bits |= Capture_ByCopy;
     break;
   }
   DeclAndBits.setInt(Bits);
 }
 
 LambdaCaptureKind LambdaCapture::getCaptureKind() const {
-  Decl *D = DeclAndBits.getPointer();
+  if (capturesVLAType())
+    return LCK_VLAType;
   bool CapByCopy = DeclAndBits.getInt() & Capture_ByCopy;
-  if (!D)
-    return CapByCopy ? LCK_VLAType : LCK_This;
-
+  if (capturesThis())
+    return CapByCopy ? LCK_StarThis : LCK_This;
   return CapByCopy ? LCK_ByCopy : LCK_ByRef;
 }
 
@@ -1091,6 +1023,7 @@ bool LambdaExpr::isMutable() const {
 }
 
 ExprWithCleanups::ExprWithCleanups(Expr *subexpr,
+                                   bool CleanupsHaveSideEffects,
                                    ArrayRef<CleanupObject> objects)
   : Expr(ExprWithCleanupsClass, subexpr->getType(),
          subexpr->getValueKind(), subexpr->getObjectKind(),
@@ -1098,16 +1031,19 @@ ExprWithCleanups::ExprWithCleanups(Expr *subexpr,
          subexpr->isInstantiationDependent(),
          subexpr->containsUnexpandedParameterPack()),
     SubExpr(subexpr) {
+  ExprWithCleanupsBits.CleanupsHaveSideEffects = CleanupsHaveSideEffects;
   ExprWithCleanupsBits.NumObjects = objects.size();
   for (unsigned i = 0, e = objects.size(); i != e; ++i)
     getTrailingObjects<CleanupObject>()[i] = objects[i];
 }
 
 ExprWithCleanups *ExprWithCleanups::Create(const ASTContext &C, Expr *subexpr,
+                                           bool CleanupsHaveSideEffects,
                                            ArrayRef<CleanupObject> objects) {
   void *buffer = C.Allocate(totalSizeToAlloc<CleanupObject>(objects.size()),
                             llvm::alignOf<ExprWithCleanups>());
-  return new (buffer) ExprWithCleanups(subexpr, objects);
+  return new (buffer)
+      ExprWithCleanups(subexpr, CleanupsHaveSideEffects, objects);
 }
 
 ExprWithCleanups::ExprWithCleanups(EmptyShell empty, unsigned numObjects)
diff --git a/contrib/llvm/tools/clang/lib/AST/ExprClassification.cpp b/contrib/llvm/tools/clang/lib/AST/ExprClassification.cpp
index a47b03c0afba..89cc9bc18ef0 100644
--- a/contrib/llvm/tools/clang/lib/AST/ExprClassification.cpp
+++ b/contrib/llvm/tools/clang/lib/AST/ExprClassification.cpp
@@ -178,6 +178,7 @@ static Cl::Kinds ClassifyInternal(ASTContext &Ctx, const Expr *E) {
   case Expr::ObjCArrayLiteralClass:
   case Expr::ObjCDictionaryLiteralClass:
   case Expr::ObjCBoolLiteralExprClass:
+  case Expr::ObjCAvailabilityCheckExprClass:
   case Expr::ParenListExprClass:
   case Expr::SizeOfPackExprClass:
   case Expr::SubstNonTypeTemplateParmPackExprClass:
@@ -360,6 +361,7 @@ static Cl::Kinds ClassifyInternal(ASTContext &Ctx, const Expr *E) {
       
     // Some C++ expressions are always class temporaries.
   case Expr::CXXConstructExprClass:
+  case Expr::CXXInheritedCtorInitExprClass:
   case Expr::CXXTemporaryObjectExprClass:
   case Expr::LambdaExprClass:
   case Expr::CXXStdInitializerListExprClass:
diff --git a/contrib/llvm/tools/clang/lib/AST/ExprConstant.cpp b/contrib/llvm/tools/clang/lib/AST/ExprConstant.cpp
index fa652ba1b0ab..df944e8f25f2 100644
--- a/contrib/llvm/tools/clang/lib/AST/ExprConstant.cpp
+++ b/contrib/llvm/tools/clang/lib/AST/ExprConstant.cpp
@@ -157,13 +157,13 @@ namespace {
     /// True if the subobject was named in a manner not supported by C++11. Such
     /// lvalues can still be folded, but they are not core constant expressions
     /// and we cannot perform lvalue-to-rvalue conversions on them.
-    bool Invalid : 1;
+    unsigned Invalid : 1;
 
     /// Is this a pointer one past the end of an object?
-    bool IsOnePastTheEnd : 1;
+    unsigned IsOnePastTheEnd : 1;
 
     /// Indicator of whether the most-derived object is an array element.
-    bool MostDerivedIsArrayElement : 1;
+    unsigned MostDerivedIsArrayElement : 1;
 
     /// The length of the path to the most-derived object of which this is a
     /// subobject.
@@ -477,6 +477,9 @@ namespace {
     /// fold (not just why it's not strictly a constant expression)?
     bool HasFoldFailureDiagnostic;
 
+    /// \brief Whether or not we're currently speculatively evaluating.
+    bool IsSpeculativelyEvaluating;
+
     enum EvaluationMode {
       /// Evaluate as a constant expression. Stop if we find that the expression
       /// is not a constant expression.
@@ -541,7 +544,8 @@ namespace {
         BottomFrame(*this, SourceLocation(), nullptr, nullptr, nullptr),
         EvaluatingDecl((const ValueDecl *)nullptr),
         EvaluatingDeclValue(nullptr), HasActiveDiagnostic(false),
-        HasFoldFailureDiagnostic(false), EvalMode(Mode) {}
+        HasFoldFailureDiagnostic(false), IsSpeculativelyEvaluating(false),
+        EvalMode(Mode) {}
 
     void setEvaluatingDecl(APValue::LValueBase Base, APValue &Value) {
       EvaluatingDecl = Base;
@@ -557,12 +561,12 @@ namespace {
         return false;
       if (NextCallIndex == 0) {
         // NextCallIndex has wrapped around.
-        Diag(Loc, diag::note_constexpr_call_limit_exceeded);
+        FFDiag(Loc, diag::note_constexpr_call_limit_exceeded);
         return false;
       }
       if (CallStackDepth <= getLangOpts().ConstexprCallDepth)
         return true;
-      Diag(Loc, diag::note_constexpr_depth_limit_exceeded)
+      FFDiag(Loc, diag::note_constexpr_depth_limit_exceeded)
         << getLangOpts().ConstexprCallDepth;
       return false;
     }
@@ -579,7 +583,7 @@ namespace {
 
     bool nextStep(const Stmt *S) {
       if (!StepsLeft) {
-        Diag(S->getLocStart(), diag::note_constexpr_step_limit_exceeded);
+        FFDiag(S->getLocStart(), diag::note_constexpr_step_limit_exceeded);
         return false;
       }
       --StepsLeft;
@@ -597,11 +601,10 @@ namespace {
     /// Add notes containing a call stack to the current point of evaluation.
     void addCallStack(unsigned Limit);
 
-  public:
-    /// Diagnose that the evaluation cannot be folded.
-    OptionalDiagnostic Diag(SourceLocation Loc, diag::kind DiagId
-                              = diag::note_invalid_subexpr_in_const_expr,
-                            unsigned ExtraNotes = 0, bool IsCCEDiag = false) {
+  private:
+    OptionalDiagnostic Diag(SourceLocation Loc, diag::kind DiagId,
+                            unsigned ExtraNotes, bool IsCCEDiag) {
+    
       if (EvalStatus.Diag) {
         // If we have a prior diagnostic, it will be noting that the expression
         // isn't a constant expression. This diagnostic is more important,
@@ -646,12 +649,20 @@ namespace {
       HasActiveDiagnostic = false;
       return OptionalDiagnostic();
     }
-
-    OptionalDiagnostic Diag(const Expr *E, diag::kind DiagId
+  public:
+    // Diagnose that the evaluation could not be folded (FF => FoldFailure)
+    OptionalDiagnostic
+    FFDiag(SourceLocation Loc,
+          diag::kind DiagId = diag::note_invalid_subexpr_in_const_expr,
+          unsigned ExtraNotes = 0) {
+      return Diag(Loc, DiagId, ExtraNotes, false);
+    }
+    
+    OptionalDiagnostic FFDiag(const Expr *E, diag::kind DiagId
                               = diag::note_invalid_subexpr_in_const_expr,
-                            unsigned ExtraNotes = 0, bool IsCCEDiag = false) {
+                            unsigned ExtraNotes = 0) {
       if (EvalStatus.Diag)
-        return Diag(E->getExprLoc(), DiagId, ExtraNotes, IsCCEDiag);
+        return Diag(E->getExprLoc(), DiagId, ExtraNotes, /*IsCCEDiag*/false);
       HasActiveDiagnostic = false;
       return OptionalDiagnostic();
     }
@@ -661,8 +672,7 @@ namespace {
     ///
     /// FIXME: Stop evaluating if we're in EM_ConstantExpression or
     /// EM_PotentialConstantExpression mode and we produce one of these.
-    template<typename LocArg>
-    OptionalDiagnostic CCEDiag(LocArg Loc, diag::kind DiagId
+    OptionalDiagnostic CCEDiag(SourceLocation Loc, diag::kind DiagId
                                  = diag::note_invalid_subexpr_in_const_expr,
                                unsigned ExtraNotes = 0) {
       // Don't override a previous diagnostic. Don't bother collecting
@@ -673,7 +683,11 @@ namespace {
       }
       return Diag(Loc, DiagId, ExtraNotes, true);
     }
-
+    OptionalDiagnostic CCEDiag(const Expr *E, diag::kind DiagId
+                                 = diag::note_invalid_subexpr_in_const_expr,
+                               unsigned ExtraNotes = 0) {
+      return CCEDiag(E->getExprLoc(), DiagId, ExtraNotes);
+    }
     /// Add a note to a prior diagnostic.
     OptionalDiagnostic Note(SourceLocation Loc, diag::kind DiagId) {
       if (!HasActiveDiagnostic)
@@ -763,6 +777,29 @@ namespace {
       llvm_unreachable("Missed EvalMode case");
     }
 
+    /// Notes that we failed to evaluate an expression that other expressions
+    /// directly depend on, and determine if we should keep evaluating. This
+    /// should only be called if we actually intend to keep evaluating.
+    ///
+    /// Call noteSideEffect() instead if we may be able to ignore the value that
+    /// we failed to evaluate, e.g. if we failed to evaluate Foo() in:
+    ///
+    /// (Foo(), 1)      // use noteSideEffect
+    /// (Foo() || true) // use noteSideEffect
+    /// Foo() + 1       // use noteFailure
+    LLVM_ATTRIBUTE_UNUSED_RESULT bool noteFailure() {
+      // Failure when evaluating some expression often means there is some
+      // subexpression whose evaluation was skipped. Therefore, (because we
+      // don't track whether we skipped an expression when unwinding after an
+      // evaluation failure) every evaluation failure that bubbles up from a
+      // subexpression implies that a side-effect has potentially happened. We
+      // skip setting the HasSideEffects flag to true until we decide to
+      // continue evaluating after that point, which happens here.
+      bool KeepGoing = keepEvaluatingAfterFailure();
+      EvalStatus.HasSideEffects |= KeepGoing;
+      return KeepGoing;
+    }
+
     bool allowInvalidBaseExpr() const {
       return EvalMode == EM_DesignatorFold;
     }
@@ -811,24 +848,52 @@ namespace {
     ~FoldOffsetRAII() { Info.EvalMode = OldMode; }
   };
 
-  /// RAII object used to suppress diagnostics and side-effects from a
-  /// speculative evaluation.
+  /// RAII object used to optionally suppress diagnostics and side-effects from
+  /// a speculative evaluation.
   class SpeculativeEvaluationRAII {
-    EvalInfo &Info;
+    /// Pair of EvalInfo, and a bit that stores whether or not we were
+    /// speculatively evaluating when we created this RAII.
+    llvm::PointerIntPair<EvalInfo *, 1, bool> InfoAndOldSpecEval;
     Expr::EvalStatus Old;
 
+    void moveFromAndCancel(SpeculativeEvaluationRAII &&Other) {
+      InfoAndOldSpecEval = Other.InfoAndOldSpecEval;
+      Old = Other.Old;
+      Other.InfoAndOldSpecEval.setPointer(nullptr);
+    }
+
+    void maybeRestoreState() {
+      EvalInfo *Info = InfoAndOldSpecEval.getPointer();
+      if (!Info)
+        return;
+
+      Info->EvalStatus = Old;
+      Info->IsSpeculativelyEvaluating = InfoAndOldSpecEval.getInt();
+    }
+
   public:
-    SpeculativeEvaluationRAII(EvalInfo &Info,
-                        SmallVectorImpl<PartialDiagnosticAt> *NewDiag = nullptr)
-      : Info(Info), Old(Info.EvalStatus) {
+    SpeculativeEvaluationRAII() = default;
+
+    SpeculativeEvaluationRAII(
+        EvalInfo &Info, SmallVectorImpl<PartialDiagnosticAt> *NewDiag = nullptr)
+        : InfoAndOldSpecEval(&Info, Info.IsSpeculativelyEvaluating),
+          Old(Info.EvalStatus) {
       Info.EvalStatus.Diag = NewDiag;
-      // If we're speculatively evaluating, we may have skipped over some
-      // evaluations and missed out a side effect.
-      Info.EvalStatus.HasSideEffects = true;
+      Info.IsSpeculativelyEvaluating = true;
     }
-    ~SpeculativeEvaluationRAII() {
-      Info.EvalStatus = Old;
+
+    SpeculativeEvaluationRAII(const SpeculativeEvaluationRAII &Other) = delete;
+    SpeculativeEvaluationRAII(SpeculativeEvaluationRAII &&Other) {
+      moveFromAndCancel(std::move(Other));
     }
+
+    SpeculativeEvaluationRAII &operator=(SpeculativeEvaluationRAII &&Other) {
+      maybeRestoreState();
+      moveFromAndCancel(std::move(Other));
+      return *this;
+    }
+
+    ~SpeculativeEvaluationRAII() { maybeRestoreState(); }
   };
 
   /// RAII object wrapping a full-expression or block scope, and handling
@@ -941,6 +1006,16 @@ void EvalInfo::addCallStack(unsigned Limit) {
       continue;
     }
 
+    // Use a different note for an inheriting constructor, because from the
+    // user's perspective it's not really a function at all.
+    if (auto *CD = dyn_cast_or_null<CXXConstructorDecl>(Frame->Callee)) {
+      if (CD->isInheritingConstructor()) {
+        addDiag(Frame->CallLoc, diag::note_constexpr_inherited_ctor_call_here)
+          << CD->getParent();
+        continue;
+      }
+    }
+
     SmallVector<char, 128> Buffer;
     llvm::raw_svector_ostream Out(Buffer);
     describeCall(Frame, Out);
@@ -992,7 +1067,7 @@ namespace {
   struct LValue {
     APValue::LValueBase Base;
     CharUnits Offset;
-    bool InvalidBase : 1;
+    unsigned InvalidBase : 1;
     unsigned CallIndex : 31;
     SubobjectDesignator Designator;
 
@@ -1335,12 +1410,12 @@ static bool CheckLValueConstantExpression(EvalInfo &Info, SourceLocation Loc,
   if (!IsGlobalLValue(Base)) {
     if (Info.getLangOpts().CPlusPlus11) {
       const ValueDecl *VD = Base.dyn_cast<const ValueDecl*>();
-      Info.Diag(Loc, diag::note_constexpr_non_global, 1)
+      Info.FFDiag(Loc, diag::note_constexpr_non_global, 1)
         << IsReferenceType << !Designator.Entries.empty()
         << !!VD << VD;
       NoteLValueLocation(Info, Base);
     } else {
-      Info.Diag(Loc);
+      Info.FFDiag(Loc);
     }
     // Don't allow references to temporaries to escape.
     return false;
@@ -1390,7 +1465,7 @@ static bool CheckLValueConstantExpression(EvalInfo &Info, SourceLocation Loc,
   // Does this refer one past the end of some object?
   if (!Designator.Invalid && Designator.isOnePastTheEnd()) {
     const ValueDecl *VD = Base.dyn_cast<const ValueDecl*>();
-    Info.Diag(Loc, diag::note_constexpr_past_end, 1)
+    Info.FFDiag(Loc, diag::note_constexpr_past_end, 1)
       << !Designator.Entries.empty() << !!VD << VD;
     NoteLValueLocation(Info, Base);
   }
@@ -1414,10 +1489,10 @@ static bool CheckLiteralType(EvalInfo &Info, const Expr *E,
 
   // Prvalue constant expressions must be of literal types.
   if (Info.getLangOpts().CPlusPlus11)
-    Info.Diag(E, diag::note_constexpr_nonliteral)
+    Info.FFDiag(E, diag::note_constexpr_nonliteral)
       << E->getType();
   else
-    Info.Diag(E, diag::note_invalid_subexpr_in_const_expr);
+    Info.FFDiag(E, diag::note_invalid_subexpr_in_const_expr);
   return false;
 }
 
@@ -1427,7 +1502,7 @@ static bool CheckLiteralType(EvalInfo &Info, const Expr *E,
 static bool CheckConstantExpression(EvalInfo &Info, SourceLocation DiagLoc,
                                     QualType Type, const APValue &Value) {
   if (Value.isUninit()) {
-    Info.Diag(DiagLoc, diag::note_constexpr_uninitialized)
+    Info.FFDiag(DiagLoc, diag::note_constexpr_uninitialized)
       << true << Type;
     return false;
   }
@@ -1637,7 +1712,7 @@ static bool truncateBitfieldValue(EvalInfo &Info, const Expr *E,
     // FIXME: In this case, we should provide the diagnostic for casting
     // a pointer to an integer.
     assert(Value.isLValue() && "integral value neither int nor lvalue?");
-    Info.Diag(E);
+    Info.FFDiag(E);
     return false;
   }
 
@@ -1679,7 +1754,7 @@ static bool EvalAndBitcastToAPInt(EvalInfo &Info, const Expr *E,
       } else {
         // Don't try to handle vectors of anything other than int or float
         // (not sure if it's possible to hit this case).
-        Info.Diag(E, diag::note_invalid_subexpr_in_const_expr);
+        Info.FFDiag(E, diag::note_invalid_subexpr_in_const_expr);
         return false;
       }
       unsigned BaseEltSize = EltAsInt.getBitWidth();
@@ -1692,7 +1767,7 @@ static bool EvalAndBitcastToAPInt(EvalInfo &Info, const Expr *E,
   }
   // Give up if the input isn't an int, float, or vector.  For example, we
   // reject "(v4i16)(intptr_t)&a".
-  Info.Diag(E, diag::note_invalid_subexpr_in_const_expr);
+  Info.FFDiag(E, diag::note_invalid_subexpr_in_const_expr);
   return false;
 }
 
@@ -1728,7 +1803,7 @@ static bool handleIntIntBinOp(EvalInfo &Info, const Expr *E, const APSInt &LHS,
                               APSInt &Result) {
   switch (Opcode) {
   default:
-    Info.Diag(E);
+    Info.FFDiag(E);
     return false;
   case BO_Mul:
     return CheckedIntArithmetic(Info, E, LHS, RHS, LHS.getBitWidth() * 2,
@@ -1745,7 +1820,7 @@ static bool handleIntIntBinOp(EvalInfo &Info, const Expr *E, const APSInt &LHS,
   case BO_Div:
   case BO_Rem:
     if (RHS == 0) {
-      Info.Diag(E, diag::note_expr_divide_by_zero);
+      Info.FFDiag(E, diag::note_expr_divide_by_zero);
       return false;
     }
     Result = (Opcode == BO_Rem ? LHS % RHS : LHS / RHS);
@@ -1826,7 +1901,7 @@ static bool handleFloatFloatBinOp(EvalInfo &Info, const Expr *E,
                                   const APFloat &RHS) {
   switch (Opcode) {
   default:
-    Info.Diag(E);
+    Info.FFDiag(E);
     return false;
   case BO_Mul:
     LHS.multiply(RHS, APFloat::rmNearestTiesToEven);
@@ -1968,10 +2043,15 @@ static bool HandleSizeof(EvalInfo &Info, SourceLocation Loc,
     return true;
   }
 
+  if (Type->isDependentType()) {
+    Info.FFDiag(Loc);
+    return false;
+  }
+
   if (!Type->isConstantSizeType()) {
     // sizeof(vla) is not a constantexpr: C99 6.5.3.4p2.
     // FIXME: Better diagnostic.
-    Info.Diag(Loc);
+    Info.FFDiag(Loc);
     return false;
   }
 
@@ -2035,7 +2115,7 @@ static bool evaluateVarDeclInit(EvalInfo &Info, const Expr *E,
     if (Info.checkingPotentialConstantExpression())
       return false;
     if (!Frame || !Frame->Arguments) {
-      Info.Diag(E, diag::note_invalid_subexpr_in_const_expr);
+      Info.FFDiag(E, diag::note_invalid_subexpr_in_const_expr);
       return false;
     }
     Result = &Frame->Arguments[PVD->getFunctionScopeIndex()];
@@ -2055,7 +2135,7 @@ static bool evaluateVarDeclInit(EvalInfo &Info, const Expr *E,
     // If we're checking a potential constant expression, the variable could be
     // initialized later.
     if (!Info.checkingPotentialConstantExpression())
-      Info.Diag(E, diag::note_invalid_subexpr_in_const_expr);
+      Info.FFDiag(E, diag::note_invalid_subexpr_in_const_expr);
     return false;
   }
 
@@ -2069,7 +2149,7 @@ static bool evaluateVarDeclInit(EvalInfo &Info, const Expr *E,
   // Never evaluate the initializer of a weak variable. We can't be sure that
   // this is the definition which will be used.
   if (VD->isWeak()) {
-    Info.Diag(E, diag::note_invalid_subexpr_in_const_expr);
+    Info.FFDiag(E, diag::note_invalid_subexpr_in_const_expr);
     return false;
   }
 
@@ -2077,7 +2157,7 @@ static bool evaluateVarDeclInit(EvalInfo &Info, const Expr *E,
   // this in the cases where it matters for conformance.
   SmallVector<PartialDiagnosticAt, 8> Notes;
   if (!VD->evaluateValue(Notes)) {
-    Info.Diag(E, diag::note_constexpr_var_init_non_constant,
+    Info.FFDiag(E, diag::note_constexpr_var_init_non_constant,
               Notes.size() + 1) << VD;
     Info.Note(VD->getLocation(), diag::note_declared_at);
     Info.addNotes(Notes);
@@ -2218,7 +2298,7 @@ static bool diagnoseUnreadableFields(EvalInfo &Info, const Expr *E,
     // FIXME: Add core issue number for the union case.
     if (Field->isMutable() &&
         (RD->isUnion() || isReadByLvalueToRvalueConversion(Field->getType()))) {
-      Info.Diag(E, diag::note_constexpr_ltor_mutable, 1) << Field;
+      Info.FFDiag(E, diag::note_constexpr_ltor_mutable, 1) << Field;
       Info.Note(Field->getLocation(), diag::note_declared_at);
       return true;
     }
@@ -2272,10 +2352,10 @@ findSubobject(EvalInfo &Info, const Expr *E, const CompleteObject &Obj,
     return handler.failed();
   if (Sub.isOnePastTheEnd()) {
     if (Info.getLangOpts().CPlusPlus11)
-      Info.Diag(E, diag::note_constexpr_access_past_end)
+      Info.FFDiag(E, diag::note_constexpr_access_past_end)
         << handler.AccessKind;
     else
-      Info.Diag(E);
+      Info.FFDiag(E);
     return handler.failed();
   }
 
@@ -2287,7 +2367,7 @@ findSubobject(EvalInfo &Info, const Expr *E, const CompleteObject &Obj,
   for (unsigned I = 0, N = Sub.Entries.size(); /**/; ++I) {
     if (O->isUninit()) {
       if (!Info.checkingPotentialConstantExpression())
-        Info.Diag(E, diag::note_constexpr_access_uninit) << handler.AccessKind;
+        Info.FFDiag(E, diag::note_constexpr_access_uninit) << handler.AccessKind;
       return handler.failed();
     }
 
@@ -2322,10 +2402,10 @@ findSubobject(EvalInfo &Info, const Expr *E, const CompleteObject &Obj,
         // Note, it should not be possible to form a pointer with a valid
         // designator which points more than one past the end of the array.
         if (Info.getLangOpts().CPlusPlus11)
-          Info.Diag(E, diag::note_constexpr_access_past_end)
+          Info.FFDiag(E, diag::note_constexpr_access_past_end)
             << handler.AccessKind;
         else
-          Info.Diag(E);
+          Info.FFDiag(E);
         return handler.failed();
       }
 
@@ -2355,10 +2435,10 @@ findSubobject(EvalInfo &Info, const Expr *E, const CompleteObject &Obj,
       uint64_t Index = Sub.Entries[I].ArrayIndex;
       if (Index > 1) {
         if (Info.getLangOpts().CPlusPlus11)
-          Info.Diag(E, diag::note_constexpr_access_past_end)
+          Info.FFDiag(E, diag::note_constexpr_access_past_end)
             << handler.AccessKind;
         else
-          Info.Diag(E);
+          Info.FFDiag(E);
         return handler.failed();
       }
 
@@ -2378,7 +2458,7 @@ findSubobject(EvalInfo &Info, const Expr *E, const CompleteObject &Obj,
       }
     } else if (const FieldDecl *Field = getAsField(Sub.Entries[I])) {
       if (Field->isMutable() && handler.AccessKind == AK_Read) {
-        Info.Diag(E, diag::note_constexpr_ltor_mutable, 1)
+        Info.FFDiag(E, diag::note_constexpr_ltor_mutable, 1)
           << Field;
         Info.Note(Field->getLocation(), diag::note_declared_at);
         return handler.failed();
@@ -2390,7 +2470,7 @@ findSubobject(EvalInfo &Info, const Expr *E, const CompleteObject &Obj,
         const FieldDecl *UnionField = O->getUnionField();
         if (!UnionField ||
             UnionField->getCanonicalDecl() != Field->getCanonicalDecl()) {
-          Info.Diag(E, diag::note_constexpr_access_inactive_union_member)
+          Info.FFDiag(E, diag::note_constexpr_access_inactive_union_member)
             << handler.AccessKind << Field << !UnionField << UnionField;
           return handler.failed();
         }
@@ -2406,11 +2486,11 @@ findSubobject(EvalInfo &Info, const Expr *E, const CompleteObject &Obj,
       if (ObjType.isVolatileQualified()) {
         if (Info.getLangOpts().CPlusPlus) {
           // FIXME: Include a description of the path to the volatile subobject.
-          Info.Diag(E, diag::note_constexpr_access_volatile_obj, 1)
+          Info.FFDiag(E, diag::note_constexpr_access_volatile_obj, 1)
             << handler.AccessKind << 2 << Field;
           Info.Note(Field->getLocation(), diag::note_declared_at);
         } else {
-          Info.Diag(E, diag::note_invalid_subexpr_in_const_expr);
+          Info.FFDiag(E, diag::note_invalid_subexpr_in_const_expr);
         }
         return handler.failed();
       }
@@ -2482,7 +2562,7 @@ struct ModifySubobjectHandler {
   bool checkConst(QualType QT) {
     // Assigning to a const object has undefined behavior.
     if (QT.isConstQualified()) {
-      Info.Diag(E, diag::note_constexpr_modify_const_type) << QT;
+      Info.FFDiag(E, diag::note_constexpr_modify_const_type) << QT;
       return false;
     }
     return true;
@@ -2501,7 +2581,7 @@ struct ModifySubobjectHandler {
       return false;
     if (!NewVal.isInt()) {
       // Maybe trying to write a cast pointer value into a complex?
-      Info.Diag(E);
+      Info.FFDiag(E);
       return false;
     }
     Value = NewVal.getInt();
@@ -2592,7 +2672,7 @@ static CompleteObject findCompleteObject(EvalInfo &Info, const Expr *E,
                                          AccessKinds AK, const LValue &LVal,
                                          QualType LValType) {
   if (!LVal.Base) {
-    Info.Diag(E, diag::note_constexpr_access_null) << AK;
+    Info.FFDiag(E, diag::note_constexpr_access_null) << AK;
     return CompleteObject();
   }
 
@@ -2600,7 +2680,7 @@ static CompleteObject findCompleteObject(EvalInfo &Info, const Expr *E,
   if (LVal.CallIndex) {
     Frame = Info.getCallFrame(LVal.CallIndex);
     if (!Frame) {
-      Info.Diag(E, diag::note_constexpr_lifetime_ended, 1)
+      Info.FFDiag(E, diag::note_constexpr_lifetime_ended, 1)
         << AK << LVal.Base.is<const ValueDecl*>();
       NoteLValueLocation(Info, LVal.Base);
       return CompleteObject();
@@ -2613,10 +2693,10 @@ static CompleteObject findCompleteObject(EvalInfo &Info, const Expr *E,
   // semantics.
   if (LValType.isVolatileQualified()) {
     if (Info.getLangOpts().CPlusPlus)
-      Info.Diag(E, diag::note_constexpr_access_volatile_type)
+      Info.FFDiag(E, diag::note_constexpr_access_volatile_type)
         << AK << LValType;
     else
-      Info.Diag(E);
+      Info.FFDiag(E);
     return CompleteObject();
   }
 
@@ -2638,18 +2718,18 @@ static CompleteObject findCompleteObject(EvalInfo &Info, const Expr *E,
         VD = VDef;
     }
     if (!VD || VD->isInvalidDecl()) {
-      Info.Diag(E);
+      Info.FFDiag(E);
       return CompleteObject();
     }
 
     // Accesses of volatile-qualified objects are not allowed.
     if (BaseType.isVolatileQualified()) {
       if (Info.getLangOpts().CPlusPlus) {
-        Info.Diag(E, diag::note_constexpr_access_volatile_obj, 1)
+        Info.FFDiag(E, diag::note_constexpr_access_volatile_obj, 1)
           << AK << 1 << VD;
         Info.Note(VD->getLocation(), diag::note_declared_at);
       } else {
-        Info.Diag(E);
+        Info.FFDiag(E);
       }
       return CompleteObject();
     }
@@ -2664,17 +2744,20 @@ static CompleteObject findCompleteObject(EvalInfo &Info, const Expr *E,
         // evaluation.
       } else if (AK != AK_Read) {
         // All the remaining cases only permit reading.
-        Info.Diag(E, diag::note_constexpr_modify_global);
+        Info.FFDiag(E, diag::note_constexpr_modify_global);
         return CompleteObject();
       } else if (VD->isConstexpr()) {
         // OK, we can read this variable.
       } else if (BaseType->isIntegralOrEnumerationType()) {
-        if (!BaseType.isConstQualified()) {
+        // In OpenCL if a variable is in constant address space it is a const value.
+        if (!(BaseType.isConstQualified() ||
+              (Info.getLangOpts().OpenCL &&
+               BaseType.getAddressSpace() == LangAS::opencl_constant))) {
           if (Info.getLangOpts().CPlusPlus) {
-            Info.Diag(E, diag::note_constexpr_ltor_non_const_int, 1) << VD;
+            Info.FFDiag(E, diag::note_constexpr_ltor_non_const_int, 1) << VD;
             Info.Note(VD->getLocation(), diag::note_declared_at);
           } else {
-            Info.Diag(E);
+            Info.FFDiag(E);
           }
           return CompleteObject();
         }
@@ -2690,11 +2773,15 @@ static CompleteObject findCompleteObject(EvalInfo &Info, const Expr *E,
         }
       } else {
         // FIXME: Allow folding of values of any literal type in all languages.
-        if (Info.getLangOpts().CPlusPlus11) {
-          Info.Diag(E, diag::note_constexpr_ltor_non_constexpr, 1) << VD;
+        if (Info.checkingPotentialConstantExpression() &&
+            VD->getType().isConstQualified() && !VD->hasDefinition(Info.Ctx)) {
+          // The definition of this variable could be constexpr. We can't
+          // access it right now, but may be able to in future.
+        } else if (Info.getLangOpts().CPlusPlus11) {
+          Info.FFDiag(E, diag::note_constexpr_ltor_non_constexpr, 1) << VD;
           Info.Note(VD->getLocation(), diag::note_declared_at);
         } else {
-          Info.Diag(E);
+          Info.FFDiag(E);
         }
         return CompleteObject();
       }
@@ -2730,7 +2817,7 @@ static CompleteObject findCompleteObject(EvalInfo &Info, const Expr *E,
         if (!(BaseType.isConstQualified() &&
               BaseType->isIntegralOrEnumerationType()) &&
             !(VD && VD->getCanonicalDecl() == ED->getCanonicalDecl())) {
-          Info.Diag(E, diag::note_constexpr_access_static_temporary, 1) << AK;
+          Info.FFDiag(E, diag::note_constexpr_access_static_temporary, 1) << AK;
           Info.Note(MTE->getExprLoc(), diag::note_constexpr_temporary_here);
           return CompleteObject();
         }
@@ -2738,7 +2825,7 @@ static CompleteObject findCompleteObject(EvalInfo &Info, const Expr *E,
         BaseVal = Info.Ctx.getMaterializedTemporaryValue(MTE, false);
         assert(BaseVal && "got reference to unevaluated temporary");
       } else {
-        Info.Diag(E);
+        Info.FFDiag(E);
         return CompleteObject();
       }
     } else {
@@ -2749,11 +2836,11 @@ static CompleteObject findCompleteObject(EvalInfo &Info, const Expr *E,
     // Volatile temporary objects cannot be accessed in constant expressions.
     if (BaseType.isVolatileQualified()) {
       if (Info.getLangOpts().CPlusPlus) {
-        Info.Diag(E, diag::note_constexpr_access_volatile_obj, 1)
+        Info.FFDiag(E, diag::note_constexpr_access_volatile_obj, 1)
           << AK << 0;
         Info.Note(Base->getExprLoc(), diag::note_constexpr_temporary_here);
       } else {
-        Info.Diag(E);
+        Info.FFDiag(E);
       }
       return CompleteObject();
     }
@@ -2769,12 +2856,13 @@ static CompleteObject findCompleteObject(EvalInfo &Info, const Expr *E,
   }
 
   // In C++1y, we can't safely access any mutable state when we might be
-  // evaluating after an unmodeled side effect or an evaluation failure.
+  // evaluating after an unmodeled side effect.
   //
   // FIXME: Not all local state is mutable. Allow local constant subobjects
   // to be read here (but take care with 'mutable' fields).
-  if (Frame && Info.getLangOpts().CPlusPlus14 &&
-      (Info.EvalStatus.HasSideEffects || Info.keepEvaluatingAfterFailure()))
+  if ((Frame && Info.getLangOpts().CPlusPlus14 &&
+       Info.EvalStatus.HasSideEffects) ||
+      (AK != AK_Read && Info.IsSpeculativelyEvaluating))
     return CompleteObject();
 
   return CompleteObject(BaseVal, BaseType);
@@ -2806,7 +2894,7 @@ static bool handleLValueToRValueConversion(EvalInfo &Info, const Expr *Conv,
       // an ICE in C, so this only matters for fold.
       assert(!Info.getLangOpts().CPlusPlus && "lvalue compound literal in c++?");
       if (Type.isVolatileQualified()) {
-        Info.Diag(Conv);
+        Info.FFDiag(Conv);
         return false;
       }
       APValue Lit;
@@ -2835,7 +2923,7 @@ static bool handleAssignment(EvalInfo &Info, const Expr *E, const LValue &LVal,
     return false;
 
   if (!Info.getLangOpts().CPlusPlus14) {
-    Info.Diag(E);
+    Info.FFDiag(E);
     return false;
   }
 
@@ -2863,7 +2951,7 @@ struct CompoundAssignSubobjectHandler {
   bool checkConst(QualType QT) {
     // Assigning to a const object has undefined behavior.
     if (QT.isConstQualified()) {
-      Info.Diag(E, diag::note_constexpr_modify_const_type) << QT;
+      Info.FFDiag(E, diag::note_constexpr_modify_const_type) << QT;
       return false;
     }
     return true;
@@ -2879,13 +2967,13 @@ struct CompoundAssignSubobjectHandler {
     case APValue::ComplexInt:
     case APValue::ComplexFloat:
       // FIXME: Implement complex compound assignment.
-      Info.Diag(E);
+      Info.FFDiag(E);
       return false;
     case APValue::LValue:
       return foundPointer(Subobj, SubobjType);
     default:
       // FIXME: can this happen?
-      Info.Diag(E);
+      Info.FFDiag(E);
       return false;
     }
   }
@@ -2896,7 +2984,7 @@ struct CompoundAssignSubobjectHandler {
     if (!SubobjType->isIntegerType() || !RHS.isInt()) {
       // We don't support compound assignment on integer-cast-to-pointer
       // values.
-      Info.Diag(E);
+      Info.FFDiag(E);
       return false;
     }
 
@@ -2924,7 +3012,7 @@ struct CompoundAssignSubobjectHandler {
 
     if (PointeeType.isNull() || !RHS.isInt() ||
         (Opcode != BO_Add && Opcode != BO_Sub)) {
-      Info.Diag(E);
+      Info.FFDiag(E);
       return false;
     }
 
@@ -2956,7 +3044,7 @@ static bool handleCompoundAssignment(
     return false;
 
   if (!Info.getLangOpts().CPlusPlus14) {
-    Info.Diag(E);
+    Info.FFDiag(E);
     return false;
   }
 
@@ -2978,7 +3066,7 @@ struct IncDecSubobjectHandler {
   bool checkConst(QualType QT) {
     // Assigning to a const object has undefined behavior.
     if (QT.isConstQualified()) {
-      Info.Diag(E, diag::note_constexpr_modify_const_type) << QT;
+      Info.FFDiag(E, diag::note_constexpr_modify_const_type) << QT;
       return false;
     }
     return true;
@@ -3010,7 +3098,7 @@ struct IncDecSubobjectHandler {
       return foundPointer(Subobj, SubobjType);
     default:
       // FIXME: can this happen?
-      Info.Diag(E);
+      Info.FFDiag(E);
       return false;
     }
   }
@@ -3021,7 +3109,7 @@ struct IncDecSubobjectHandler {
     if (!SubobjType->isIntegerType()) {
       // We don't support increment / decrement on integer-cast-to-pointer
       // values.
-      Info.Diag(E);
+      Info.FFDiag(E);
       return false;
     }
 
@@ -3080,7 +3168,7 @@ struct IncDecSubobjectHandler {
     if (const PointerType *PT = SubobjType->getAs<PointerType>())
       PointeeType = PT->getPointeeType();
     else {
-      Info.Diag(E);
+      Info.FFDiag(E);
       return false;
     }
 
@@ -3105,7 +3193,7 @@ static bool handleIncDec(EvalInfo &Info, const Expr *E, const LValue &LVal,
     return false;
 
   if (!Info.getLangOpts().CPlusPlus14) {
-    Info.Diag(E);
+    Info.FFDiag(E);
     return false;
   }
 
@@ -3127,7 +3215,7 @@ static bool EvaluateObjectArgument(EvalInfo &Info, const Expr *Object,
   if (Object->getType()->isLiteralType(Info.Ctx))
     return EvaluateTemporary(Object, This, Info);
 
-  Info.Diag(Object, diag::note_constexpr_nonliteral) << Object->getType();
+  Info.FFDiag(Object, diag::note_constexpr_nonliteral) << Object->getType();
   return false;
 }
 
@@ -3155,7 +3243,7 @@ static const ValueDecl *HandleMemberPointerAccess(EvalInfo &Info,
   // member value, the behavior is undefined.
   if (!MemPtr.getDecl()) {
     // FIXME: Specific diagnostic.
-    Info.Diag(RHS);
+    Info.FFDiag(RHS);
     return nullptr;
   }
 
@@ -3165,7 +3253,7 @@ static const ValueDecl *HandleMemberPointerAccess(EvalInfo &Info,
     // derived-to-base path for the member pointer.
     if (LV.Designator.MostDerivedPathLength + MemPtr.Path.size() >
         LV.Designator.Entries.size()) {
-      Info.Diag(RHS);
+      Info.FFDiag(RHS);
       return nullptr;
     }
     unsigned PathLengthToMember =
@@ -3175,7 +3263,7 @@ static const ValueDecl *HandleMemberPointerAccess(EvalInfo &Info,
           LV.Designator.Entries[PathLengthToMember + I]);
       const CXXRecordDecl *MPDecl = MemPtr.Path[I];
       if (LVDecl->getCanonicalDecl() != MPDecl->getCanonicalDecl()) {
-        Info.Diag(RHS);
+        Info.FFDiag(RHS);
         return nullptr;
       }
     }
@@ -3231,7 +3319,7 @@ static const ValueDecl *HandleMemberPointerAccess(EvalInfo &Info,
   assert(BO->getOpcode() == BO_PtrMemD || BO->getOpcode() == BO_PtrMemI);
 
   if (!EvaluateObjectArgument(Info, BO->getLHS(), LV)) {
-    if (Info.keepEvaluatingAfterFailure()) {
+    if (Info.noteFailure()) {
       MemberPtr MemPtr;
       EvaluateMemberPointer(BO->getRHS(), MemPtr, Info);
     }
@@ -3309,7 +3397,7 @@ static bool EvaluateDecl(EvalInfo &Info, const Decl *D) {
 
     const Expr *InitE = VD->getInit();
     if (!InitE) {
-      Info.Diag(D->getLocStart(), diag::note_constexpr_uninitialized)
+      Info.FFDiag(D->getLocStart(), diag::note_constexpr_uninitialized)
         << false << VD->getType();
       Val = APValue();
       return false;
@@ -3338,6 +3426,7 @@ static bool EvaluateCond(EvalInfo &Info, const VarDecl *CondDecl,
   return EvaluateAsBooleanCondition(Cond, Result, Info);
 }
 
+namespace {
 /// \brief A location where the result (returned value) of evaluating a
 /// statement should be stored.
 struct StmtResult {
@@ -3346,6 +3435,7 @@ struct StmtResult {
   /// The location containing the result, if any (used to support RVO).
   const LValue *Slot;
 };
+}
 
 static EvalStmtResult EvaluateStmt(StmtResult &Result, EvalInfo &Info,
                                    const Stmt *S,
@@ -3379,6 +3469,11 @@ static EvalStmtResult EvaluateSwitch(StmtResult &Result, EvalInfo &Info,
   APSInt Value;
   {
     FullExpressionRAII Scope(Info);
+    if (const Stmt *Init = SS->getInit()) {
+      EvalStmtResult ESR = EvaluateStmt(Result, Info, Init);
+      if (ESR != ESR_Succeeded)
+        return ESR;
+    }
     if (SS->getConditionVariable() &&
         !EvaluateDecl(Info, SS->getConditionVariable()))
       return ESR_Failed;
@@ -3421,7 +3516,7 @@ static EvalStmtResult EvaluateSwitch(StmtResult &Result, EvalInfo &Info,
   case ESR_CaseNotFound:
     // This can only happen if the switch case is nested within a statement
     // expression. We have no intention of supporting that.
-    Info.Diag(Found->getLocStart(), diag::note_constexpr_stmt_expr_unsupported);
+    Info.FFDiag(Found->getLocStart(), diag::note_constexpr_stmt_expr_unsupported);
     return ESR_Failed;
   }
   llvm_unreachable("Invalid EvalStmtResult!");
@@ -3512,7 +3607,7 @@ static EvalStmtResult EvaluateStmt(StmtResult &Result, EvalInfo &Info,
       return ESR_Succeeded;
     }
 
-    Info.Diag(S->getLocStart());
+    Info.FFDiag(S->getLocStart());
     return ESR_Failed;
 
   case Stmt::NullStmtClass:
@@ -3525,7 +3620,7 @@ static EvalStmtResult EvaluateStmt(StmtResult &Result, EvalInfo &Info,
       // FIXME: This isn't quite right; if we're performing aggregate
       // initialization, each braced subexpression is its own full-expression.
       FullExpressionRAII Scope(Info);
-      if (!EvaluateDecl(Info, DclIt) && !Info.keepEvaluatingAfterFailure())
+      if (!EvaluateDecl(Info, DclIt) && !Info.noteFailure())
         return ESR_Failed;
     }
     return ESR_Succeeded;
@@ -3561,6 +3656,11 @@ static EvalStmtResult EvaluateStmt(StmtResult &Result, EvalInfo &Info,
 
     // Evaluate the condition, as either a var decl or as an expression.
     BlockScopeRAII Scope(Info);
+    if (const Stmt *Init = IS->getInit()) {
+      EvalStmtResult ESR = EvaluateStmt(Result, Info, Init);
+      if (ESR != ESR_Succeeded)
+        return ESR;
+    }
     bool Cond;
     if (!EvaluateCond(Info, IS->getConditionVariable(), IS->getCond(), Cond))
       return ESR_Failed;
@@ -3647,7 +3747,10 @@ static EvalStmtResult EvaluateStmt(StmtResult &Result, EvalInfo &Info,
       return ESR;
 
     // Create the __begin and __end iterators.
-    ESR = EvaluateStmt(Result, Info, FS->getBeginEndStmt());
+    ESR = EvaluateStmt(Result, Info, FS->getBeginStmt());
+    if (ESR != ESR_Succeeded)
+      return ESR;
+    ESR = EvaluateStmt(Result, Info, FS->getEndStmt());
     if (ESR != ESR_Succeeded)
       return ESR;
 
@@ -3736,7 +3839,8 @@ static bool CheckTrivialDefaultConstructor(EvalInfo &Info, SourceLocation Loc,
 /// expression.
 static bool CheckConstexprFunction(EvalInfo &Info, SourceLocation CallLoc,
                                    const FunctionDecl *Declaration,
-                                   const FunctionDecl *Definition) {
+                                   const FunctionDecl *Definition,
+                                   const Stmt *Body) {
   // Potential constant expressions can contain calls to declared, but not yet
   // defined, constexpr functions.
   if (Info.checkingPotentialConstantExpression() && !Definition &&
@@ -3749,19 +3853,34 @@ static bool CheckConstexprFunction(EvalInfo &Info, SourceLocation CallLoc,
     return false;
 
   // Can we evaluate this function call?
-  if (Definition && Definition->isConstexpr() && !Definition->isInvalidDecl())
+  if (Definition && Definition->isConstexpr() &&
+      !Definition->isInvalidDecl() && Body)
     return true;
 
   if (Info.getLangOpts().CPlusPlus11) {
     const FunctionDecl *DiagDecl = Definition ? Definition : Declaration;
-    // FIXME: If DiagDecl is an implicitly-declared special member function, we
-    // should be much more explicit about why it's not constexpr.
-    Info.Diag(CallLoc, diag::note_constexpr_invalid_function, 1)
-      << DiagDecl->isConstexpr() << isa<CXXConstructorDecl>(DiagDecl)
-      << DiagDecl;
+    
+    // If this function is not constexpr because it is an inherited
+    // non-constexpr constructor, diagnose that directly.
+    auto *CD = dyn_cast<CXXConstructorDecl>(DiagDecl);
+    if (CD && CD->isInheritingConstructor()) {
+      auto *Inherited = CD->getInheritedConstructor().getConstructor();
+      if (!Inherited->isConstexpr()) 
+        DiagDecl = CD = Inherited;
+    }
+
+    // FIXME: If DiagDecl is an implicitly-declared special member function
+    // or an inheriting constructor, we should be much more explicit about why
+    // it's not constexpr.
+    if (CD && CD->isInheritingConstructor())
+      Info.FFDiag(CallLoc, diag::note_constexpr_invalid_inhctor, 1)
+        << CD->getInheritedConstructor().getConstructor()->getParent();
+    else
+      Info.FFDiag(CallLoc, diag::note_constexpr_invalid_function, 1)
+        << DiagDecl->isConstexpr() << (bool)CD << DiagDecl;
     Info.Note(DiagDecl->getLocation(), diag::note_declared_at);
   } else {
-    Info.Diag(CallLoc, diag::note_invalid_subexpr_in_const_expr);
+    Info.FFDiag(CallLoc, diag::note_invalid_subexpr_in_const_expr);
   }
   return false;
 }
@@ -3795,7 +3914,7 @@ static bool EvaluateArgs(ArrayRef<const Expr*> Args, ArgVector &ArgValues,
     if (!Evaluate(ArgValues[I - Args.begin()], Info, *I)) {
       // If we're checking for a potential constant expression, evaluate all
       // initializers even if some of them fail.
-      if (!Info.keepEvaluatingAfterFailure())
+      if (!Info.noteFailure())
         return false;
       Success = false;
     }
@@ -3848,37 +3967,34 @@ static bool HandleFunctionCall(SourceLocation CallLoc,
   if (ESR == ESR_Succeeded) {
     if (Callee->getReturnType()->isVoidType())
       return true;
-    Info.Diag(Callee->getLocEnd(), diag::note_constexpr_no_return);
+    Info.FFDiag(Callee->getLocEnd(), diag::note_constexpr_no_return);
   }
   return ESR == ESR_Returned;
 }
 
 /// Evaluate a constructor call.
-static bool HandleConstructorCall(SourceLocation CallLoc, const LValue &This,
-                                  ArrayRef<const Expr*> Args,
+static bool HandleConstructorCall(const Expr *E, const LValue &This,
+                                  APValue *ArgValues,
                                   const CXXConstructorDecl *Definition,
                                   EvalInfo &Info, APValue &Result) {
-  ArgVector ArgValues(Args.size());
-  if (!EvaluateArgs(Args, ArgValues, Info))
-    return false;
-
+  SourceLocation CallLoc = E->getExprLoc();
   if (!Info.CheckCallLimit(CallLoc))
     return false;
 
   const CXXRecordDecl *RD = Definition->getParent();
   if (RD->getNumVBases()) {
-    Info.Diag(CallLoc, diag::note_constexpr_virtual_base) << RD;
+    Info.FFDiag(CallLoc, diag::note_constexpr_virtual_base) << RD;
     return false;
   }
 
-  CallStackFrame Frame(Info, CallLoc, Definition, &This, ArgValues.data());
+  CallStackFrame Frame(Info, CallLoc, Definition, &This, ArgValues);
 
   // FIXME: Creating an APValue just to hold a nonexistent return value is
   // wasteful.
   APValue RetVal;
   StmtResult Ret = {RetVal, nullptr};
 
-  // If it's a delegating constructor, just delegate.
+  // If it's a delegating constructor, delegate.
   if (Definition->isDelegatingConstructor()) {
     CXXConstructorDecl::init_const_iterator I = Definition->init_begin();
     {
@@ -3902,8 +4018,9 @@ static bool HandleConstructorCall(SourceLocation CallLoc, const LValue &This,
        (Definition->isTrivial() && hasFields(Definition->getParent())))) {
     LValue RHS;
     RHS.setFrom(Info.Ctx, ArgValues[0]);
-    return handleLValueToRValueConversion(Info, Args[0], Args[0]->getType(),
-                                          RHS, Result);
+    return handleLValueToRValueConversion(
+        Info, E, Definition->getParamDecl(0)->getType().getNonReferenceType(),
+        RHS, Result);
   }
 
   // Reserve space for the struct members.
@@ -3987,7 +4104,7 @@ static bool HandleConstructorCall(SourceLocation CallLoc, const LValue &This,
                                                           *Value, FD))) {
       // If we're checking for a potential constant expression, evaluate all
       // initializers even if some of them fail.
-      if (!Info.keepEvaluatingAfterFailure())
+      if (!Info.noteFailure())
         return false;
       Success = false;
     }
@@ -3997,6 +4114,18 @@ static bool HandleConstructorCall(SourceLocation CallLoc, const LValue &This,
          EvaluateStmt(Ret, Info, Definition->getBody()) != ESR_Failed;
 }
 
+static bool HandleConstructorCall(const Expr *E, const LValue &This,
+                                  ArrayRef<const Expr*> Args,
+                                  const CXXConstructorDecl *Definition,
+                                  EvalInfo &Info, APValue &Result) {
+  ArgVector ArgValues(Args.size());
+  if (!EvaluateArgs(Args, ArgValues, Info))
+    return false;
+
+  return HandleConstructorCall(E, This, ArgValues.data(), Definition,
+                               Info, Result);
+}
+
 //===----------------------------------------------------------------------===//
 // Generic Evaluation
 //===----------------------------------------------------------------------===//
@@ -4022,14 +4151,16 @@ private:
     assert(Info.checkingPotentialConstantExpression());
 
     // Speculatively evaluate both arms.
+    SmallVector<PartialDiagnosticAt, 8> Diag;
     {
-      SmallVector<PartialDiagnosticAt, 8> Diag;
       SpeculativeEvaluationRAII Speculate(Info, &Diag);
-
       StmtVisitorTy::Visit(E->getFalseExpr());
       if (Diag.empty())
         return;
+    }
 
+    {
+      SpeculativeEvaluationRAII Speculate(Info, &Diag);
       Diag.clear();
       StmtVisitorTy::Visit(E->getTrueExpr());
       if (Diag.empty())
@@ -4044,7 +4175,7 @@ private:
   bool HandleConditionalOperator(const ConditionalOperator *E) {
     bool BoolResult;
     if (!EvaluateAsBooleanCondition(E->getCond(), BoolResult, Info)) {
-      if (Info.checkingPotentialConstantExpression())
+      if (Info.checkingPotentialConstantExpression() && Info.noteFailure())
         CheckPotentialConstantConditional(E);
       return false;
     }
@@ -4072,7 +4203,7 @@ public:
   /// Report an evaluation error. This should only be called when an error is
   /// first discovered. When propagating an error, just return false.
   bool Error(const Expr *E, diag::kind D) {
-    Info.Diag(E, D);
+    Info.FFDiag(E, D);
     return false;
   }
   bool Error(const Expr *E) {
@@ -4275,7 +4406,7 @@ public:
     const FunctionDecl *Definition = nullptr;
     Stmt *Body = FD->getBody(Definition);
 
-    if (!CheckConstexprFunction(Info, E->getExprLoc(), FD, Definition) ||
+    if (!CheckConstexprFunction(Info, E->getExprLoc(), FD, Definition, Body) ||
         !HandleFunctionCall(E->getExprLoc(), Definition, This, Args, Body, Info,
                             Result, ResultSlot))
       return false;
@@ -4397,7 +4528,7 @@ public:
       if (BI + 1 == BE) {
         const Expr *FinalExpr = dyn_cast<Expr>(*BI);
         if (!FinalExpr) {
-          Info.Diag((*BI)->getLocStart(),
+          Info.FFDiag((*BI)->getLocStart(),
                     diag::note_constexpr_stmt_expr_unsupported);
           return false;
         }
@@ -4412,7 +4543,7 @@ public:
         // 'break', or 'continue', it would be nice to propagate that to
         // the outer statement evaluation rather than bailing out.
         if (ESR != ESR_Failed)
-          Info.Diag((*BI)->getLocStart(),
+          Info.FFDiag((*BI)->getLocStart(),
                     diag::note_constexpr_stmt_expr_unsupported);
         return false;
       }
@@ -4425,6 +4556,15 @@ public:
   void VisitIgnoredValue(const Expr *E) {
     EvaluateIgnoredValue(Info, E);
   }
+
+  /// Potentially visit a MemberExpr's base expression.
+  void VisitIgnoredBaseExpression(const Expr *E) {
+    // While MSVC doesn't evaluate the base expression, it does diagnose the
+    // presence of side-effecting behavior.
+    if (Info.getLangOpts().MSVCCompat && !E->HasSideEffects(Info.Ctx))
+      return;
+    VisitIgnoredValue(E);
+  }
 };
 
 }
@@ -4651,7 +4791,7 @@ bool LValueExprEvaluator::VisitVarDecl(const Expr *E, const VarDecl *VD) {
     return false;
   if (V->isUninit()) {
     if (!Info.checkingPotentialConstantExpression())
-      Info.Diag(E, diag::note_constexpr_use_uninit_reference);
+      Info.FFDiag(E, diag::note_constexpr_use_uninit_reference);
     return false;
   }
   return Success(*V, E);
@@ -4735,7 +4875,7 @@ bool LValueExprEvaluator::VisitCXXTypeidExpr(const CXXTypeidExpr *E) {
   if (!E->isPotentiallyEvaluated())
     return Success(E);
 
-  Info.Diag(E, diag::note_constexpr_typeid_polymorphic)
+  Info.FFDiag(E, diag::note_constexpr_typeid_polymorphic)
     << E->getExprOperand()->getType()
     << E->getExprOperand()->getSourceRange();
   return false;
@@ -4748,14 +4888,14 @@ bool LValueExprEvaluator::VisitCXXUuidofExpr(const CXXUuidofExpr *E) {
 bool LValueExprEvaluator::VisitMemberExpr(const MemberExpr *E) {
   // Handle static data members.
   if (const VarDecl *VD = dyn_cast<VarDecl>(E->getMemberDecl())) {
-    VisitIgnoredValue(E->getBase());
+    VisitIgnoredBaseExpression(E->getBase());
     return VisitVarDecl(E, VD);
   }
 
   // Handle static member functions.
   if (const CXXMethodDecl *MD = dyn_cast<CXXMethodDecl>(E->getMemberDecl())) {
     if (MD->isStatic()) {
-      VisitIgnoredValue(E->getBase());
+      VisitIgnoredBaseExpression(E->getBase());
       return Success(MD);
     }
   }
@@ -4823,7 +4963,7 @@ bool LValueExprEvaluator::VisitCompoundAssignOperator(
 
   // The overall lvalue result is the result of evaluating the LHS.
   if (!this->Visit(CAO->getLHS())) {
-    if (Info.keepEvaluatingAfterFailure())
+    if (Info.noteFailure())
       Evaluate(RHS, this->Info, CAO->getRHS());
     return false;
   }
@@ -4844,7 +4984,7 @@ bool LValueExprEvaluator::VisitBinAssign(const BinaryOperator *E) {
   APValue NewVal;
 
   if (!this->Visit(E->getLHS())) {
-    if (Info.keepEvaluatingAfterFailure())
+    if (Info.noteFailure())
       Evaluate(NewVal, this->Info, E->getRHS());
     return false;
   }
@@ -4903,9 +5043,9 @@ public:
       return false;
     if (!Info.CurrentCall->This) {
       if (Info.getLangOpts().CPlusPlus11)
-        Info.Diag(E, diag::note_constexpr_this) << E->isImplicit();
+        Info.FFDiag(E, diag::note_constexpr_this) << E->isImplicit();
       else
-        Info.Diag(E);
+        Info.FFDiag(E);
       return false;
     }
     Result = *Info.CurrentCall->This;
@@ -4932,7 +5072,7 @@ bool PointerExprEvaluator::VisitBinaryOperator(const BinaryOperator *E) {
     std::swap(PExp, IExp);
 
   bool EvalPtrOK = EvaluatePointer(PExp, Result, Info);
-  if (!EvalPtrOK && !Info.keepEvaluatingAfterFailure())
+  if (!EvalPtrOK && !Info.noteFailure())
     return false;
 
   llvm::APSInt Offset;
@@ -5135,7 +5275,7 @@ bool PointerExprEvaluator::VisitCallExpr(const CallExpr *E) {
     }
 
     // The offset must also have the correct alignment.
-    if (OffsetResult.Offset.RoundUpToAlignment(Align) != OffsetResult.Offset) {
+    if (OffsetResult.Offset.alignTo(Align) != OffsetResult.Offset) {
       Result.Designator.setInvalid();
       APSInt Offset(64, false);
       Offset = OffsetResult.Offset.getQuantity();
@@ -5265,14 +5405,21 @@ namespace {
       Result = V;
       return true;
     }
-    bool ZeroInitialization(const Expr *E);
+    bool ZeroInitialization(const Expr *E) {
+      return ZeroInitialization(E, E->getType());
+    }
+    bool ZeroInitialization(const Expr *E, QualType T);
 
     bool VisitCallExpr(const CallExpr *E) {
       return handleCallExpr(E, Result, &This);
     }
     bool VisitCastExpr(const CastExpr *E);
     bool VisitInitListExpr(const InitListExpr *E);
-    bool VisitCXXConstructExpr(const CXXConstructExpr *E);
+    bool VisitCXXConstructExpr(const CXXConstructExpr *E) {
+      return VisitCXXConstructExpr(E, E->getType());
+    }
+    bool VisitCXXInheritedCtorInitExpr(const CXXInheritedCtorInitExpr *E);
+    bool VisitCXXConstructExpr(const CXXConstructExpr *E, QualType T);
     bool VisitCXXStdInitializerListExpr(const CXXStdInitializerListExpr *E);
   };
 }
@@ -5327,8 +5474,8 @@ static bool HandleClassZeroInitialization(EvalInfo &Info, const Expr *E,
   return true;
 }
 
-bool RecordExprEvaluator::ZeroInitialization(const Expr *E) {
-  const RecordDecl *RD = E->getType()->castAs<RecordType>()->getDecl();
+bool RecordExprEvaluator::ZeroInitialization(const Expr *E, QualType T) {
+  const RecordDecl *RD = T->castAs<RecordType>()->getDecl();
   if (RD->isInvalidDecl()) return false;
   if (RD->isUnion()) {
     // C++11 [dcl.init]p5: If T is a (possibly cv-qualified) union type, the
@@ -5348,7 +5495,7 @@ bool RecordExprEvaluator::ZeroInitialization(const Expr *E) {
   }
 
   if (isa<CXXRecordDecl>(RD) && cast<CXXRecordDecl>(RD)->getNumVBases()) {
-    Info.Diag(E, diag::note_constexpr_virtual_base) << RD;
+    Info.FFDiag(E, diag::note_constexpr_virtual_base) << RD;
     return false;
   }
 
@@ -5417,12 +5564,34 @@ bool RecordExprEvaluator::VisitInitListExpr(const InitListExpr *E) {
     return EvaluateInPlace(Result.getUnionValue(), Info, Subobject, InitExpr);
   }
 
-  assert((!isa<CXXRecordDecl>(RD) || !cast<CXXRecordDecl>(RD)->getNumBases()) &&
-         "initializer list for class with base classes");
-  Result = APValue(APValue::UninitStruct(), 0,
-                   std::distance(RD->field_begin(), RD->field_end()));
+  auto *CXXRD = dyn_cast<CXXRecordDecl>(RD);
+  if (Result.isUninit())
+    Result = APValue(APValue::UninitStruct(), CXXRD ? CXXRD->getNumBases() : 0,
+                     std::distance(RD->field_begin(), RD->field_end()));
   unsigned ElementNo = 0;
   bool Success = true;
+
+  // Initialize base classes.
+  if (CXXRD) {
+    for (const auto &Base : CXXRD->bases()) {
+      assert(ElementNo < E->getNumInits() && "missing init for base class");
+      const Expr *Init = E->getInit(ElementNo);
+
+      LValue Subobject = This;
+      if (!HandleLValueBase(Info, Init, Subobject, CXXRD, &Base))
+        return false;
+
+      APValue &FieldVal = Result.getStructBase(ElementNo);
+      if (!EvaluateInPlace(FieldVal, Info, Subobject, Init)) {
+        if (!Info.noteFailure())
+          return false;
+        Success = false;
+      }
+      ++ElementNo;
+    }
+  }
+
+  // Initialize members.
   for (const auto *Field : RD->fields()) {
     // Anonymous bit-fields are not considered members of the class for
     // purposes of aggregate initialization.
@@ -5452,7 +5621,7 @@ bool RecordExprEvaluator::VisitInitListExpr(const InitListExpr *E) {
     if (!EvaluateInPlace(FieldVal, Info, Subobject, Init) ||
         (Field->isBitField() && !truncateBitfieldValue(Info, Init,
                                                        FieldVal, Field))) {
-      if (!Info.keepEvaluatingAfterFailure())
+      if (!Info.noteFailure())
         return false;
       Success = false;
     }
@@ -5461,7 +5630,10 @@ bool RecordExprEvaluator::VisitInitListExpr(const InitListExpr *E) {
   return Success;
 }
 
-bool RecordExprEvaluator::VisitCXXConstructExpr(const CXXConstructExpr *E) {
+bool RecordExprEvaluator::VisitCXXConstructExpr(const CXXConstructExpr *E,
+                                                QualType T) {
+  // Note that E's type is not necessarily the type of our class here; we might
+  // be initializing an array element instead.
   const CXXConstructorDecl *FD = E->getConstructor();
   if (FD->isInvalidDecl() || FD->getParent()->isInvalidDecl()) return false;
 
@@ -5479,13 +5651,13 @@ bool RecordExprEvaluator::VisitCXXConstructExpr(const CXXConstructExpr *E) {
     //     lifetimes of all the base subobjects (there can be no data member
     //     subobjects in this case) per [basic.life]p1.
     // Either way, ZeroInitialization is appropriate.
-    return ZeroInitialization(E);
+    return ZeroInitialization(E, T);
   }
 
   const FunctionDecl *Definition = nullptr;
-  FD->getBody(Definition);
+  auto Body = FD->getBody(Definition);
 
-  if (!CheckConstexprFunction(Info, E->getExprLoc(), FD, Definition))
+  if (!CheckConstexprFunction(Info, E->getExprLoc(), FD, Definition, Body))
     return false;
 
   // Avoid materializing a temporary for an elidable copy/move constructor.
@@ -5494,11 +5666,33 @@ bool RecordExprEvaluator::VisitCXXConstructExpr(const CXXConstructExpr *E) {
           = dyn_cast<MaterializeTemporaryExpr>(E->getArg(0)))
       return Visit(ME->GetTemporaryExpr());
 
-  if (ZeroInit && !ZeroInitialization(E))
+  if (ZeroInit && !ZeroInitialization(E, T))
     return false;
 
   auto Args = llvm::makeArrayRef(E->getArgs(), E->getNumArgs());
-  return HandleConstructorCall(E->getExprLoc(), This, Args,
+  return HandleConstructorCall(E, This, Args,
+                               cast<CXXConstructorDecl>(Definition), Info,
+                               Result);
+}
+
+bool RecordExprEvaluator::VisitCXXInheritedCtorInitExpr(
+    const CXXInheritedCtorInitExpr *E) {
+  if (!Info.CurrentCall) {
+    assert(Info.checkingPotentialConstantExpression());
+    return false;
+  }
+
+  const CXXConstructorDecl *FD = E->getConstructor();
+  if (FD->isInvalidDecl() || FD->getParent()->isInvalidDecl())
+    return false;
+
+  const FunctionDecl *Definition = nullptr;
+  auto Body = FD->getBody(Definition);
+
+  if (!CheckConstexprFunction(Info, E->getExprLoc(), FD, Definition, Body))
+    return false;
+
+  return HandleConstructorCall(E, This, Info.CurrentCall->Arguments,
                                cast<CXXConstructorDecl>(Definition), Info,
                                Result);
 }
@@ -5902,7 +6096,7 @@ bool ArrayExprEvaluator::VisitInitListExpr(const InitListExpr *E) {
                          Info, Subobject, Init) ||
         !HandleLValueArrayAdjustment(Info, Init, Subobject,
                                      CAT->getElementType(), 1)) {
-      if (!Info.keepEvaluatingAfterFailure())
+      if (!Info.noteFailure())
         return false;
       Success = false;
     }
@@ -5958,34 +6152,8 @@ bool ArrayExprEvaluator::VisitCXXConstructExpr(const CXXConstructExpr *E,
   if (!Type->isRecordType())
     return Error(E);
 
-  const CXXConstructorDecl *FD = E->getConstructor();
-
-  bool ZeroInit = E->requiresZeroInitialization();
-  if (CheckTrivialDefaultConstructor(Info, E->getExprLoc(), FD, ZeroInit)) {
-    if (HadZeroInit)
-      return true;
-
-    // See RecordExprEvaluator::VisitCXXConstructExpr for explanation.
-    ImplicitValueInitExpr VIE(Type);
-    return EvaluateInPlace(*Value, Info, Subobject, &VIE);
-  }
-
-  const FunctionDecl *Definition = nullptr;
-  FD->getBody(Definition);
-
-  if (!CheckConstexprFunction(Info, E->getExprLoc(), FD, Definition))
-    return false;
-
-  if (ZeroInit && !HadZeroInit) {
-    ImplicitValueInitExpr VIE(Type);
-    if (!EvaluateInPlace(*Value, Info, Subobject, &VIE))
-      return false;
-  }
-
-  auto Args = llvm::makeArrayRef(E->getArgs(), E->getNumArgs());
-  return HandleConstructorCall(E->getExprLoc(), Subobject, Args,
-                               cast<CXXConstructorDecl>(Definition),
-                               Info, *Value);
+  return RecordExprEvaluator(Info, Subobject, *Value)
+             .VisitCXXConstructExpr(E, Type);
 }
 
 //===----------------------------------------------------------------------===//
@@ -6076,7 +6244,7 @@ public:
   }
   bool VisitMemberExpr(const MemberExpr *E) {
     if (CheckReferencedDecl(E, E->getMemberDecl())) {
-      VisitIgnoredValue(E->getBase());
+      VisitIgnoredBaseExpression(E->getBase());
       return true;
     }
 
@@ -6149,7 +6317,7 @@ static bool EvaluateInteger(const Expr *E, APSInt &Result, EvalInfo &Info) {
   if (!Val.isInt()) {
     // FIXME: It would be better to produce the diagnostic for casting
     //        a pointer to an integer.
-    Info.Diag(E, diag::note_invalid_subexpr_in_const_expr);
+    Info.FFDiag(E, diag::note_invalid_subexpr_in_const_expr);
     return false;
   }
   Result = Val.getInt();
@@ -6185,7 +6353,8 @@ bool IntExprEvaluator::CheckReferencedDecl(const Expr* E, const Decl* D) {
 
 /// EvaluateBuiltinClassifyType - Evaluate __builtin_classify_type the same way
 /// as GCC.
-static int EvaluateBuiltinClassifyType(const CallExpr *E) {
+static int EvaluateBuiltinClassifyType(const CallExpr *E,
+                                       const LangOptions &LangOpts) {
   // The following enum mimics the values returned by GCC.
   // FIXME: Does GCC differ between lvalue and rvalue references here?
   enum gcc_type_class {
@@ -6205,37 +6374,123 @@ static int EvaluateBuiltinClassifyType(const CallExpr *E) {
   if (E->getNumArgs() == 0)
     return no_type_class;
 
-  QualType ArgTy = E->getArg(0)->getType();
-  if (ArgTy->isVoidType())
-    return void_type_class;
-  else if (ArgTy->isEnumeralType())
-    return enumeral_type_class;
-  else if (ArgTy->isBooleanType())
-    return boolean_type_class;
-  else if (ArgTy->isCharType())
-    return string_type_class; // gcc doesn't appear to use char_type_class
-  else if (ArgTy->isIntegerType())
-    return integer_type_class;
-  else if (ArgTy->isPointerType())
+  QualType CanTy = E->getArg(0)->getType().getCanonicalType();
+  const BuiltinType *BT = dyn_cast<BuiltinType>(CanTy);
+
+  switch (CanTy->getTypeClass()) {
+#define TYPE(ID, BASE)
+#define DEPENDENT_TYPE(ID, BASE) case Type::ID:
+#define NON_CANONICAL_TYPE(ID, BASE) case Type::ID:
+#define NON_CANONICAL_UNLESS_DEPENDENT_TYPE(ID, BASE) case Type::ID:
+#include "clang/AST/TypeNodes.def"
+      llvm_unreachable("CallExpr::isBuiltinClassifyType(): unimplemented type");
+
+  case Type::Builtin:
+    switch (BT->getKind()) {
+#define BUILTIN_TYPE(ID, SINGLETON_ID)
+#define SIGNED_TYPE(ID, SINGLETON_ID) case BuiltinType::ID: return integer_type_class;
+#define FLOATING_TYPE(ID, SINGLETON_ID) case BuiltinType::ID: return real_type_class;
+#define PLACEHOLDER_TYPE(ID, SINGLETON_ID) case BuiltinType::ID: break;
+#include "clang/AST/BuiltinTypes.def"
+    case BuiltinType::Void:
+      return void_type_class;
+
+    case BuiltinType::Bool:
+      return boolean_type_class;
+
+    case BuiltinType::Char_U: // gcc doesn't appear to use char_type_class
+    case BuiltinType::UChar:
+    case BuiltinType::UShort:
+    case BuiltinType::UInt:
+    case BuiltinType::ULong:
+    case BuiltinType::ULongLong:
+    case BuiltinType::UInt128:
+      return integer_type_class;
+
+    case BuiltinType::NullPtr:
+      return pointer_type_class;
+
+    case BuiltinType::WChar_U:
+    case BuiltinType::Char16:
+    case BuiltinType::Char32:
+    case BuiltinType::ObjCId:
+    case BuiltinType::ObjCClass:
+    case BuiltinType::ObjCSel:
+#define IMAGE_TYPE(ImgType, Id, SingletonId, Access, Suffix) \
+    case BuiltinType::Id:
+#include "clang/Basic/OpenCLImageTypes.def"
+    case BuiltinType::OCLSampler:
+    case BuiltinType::OCLEvent:
+    case BuiltinType::OCLClkEvent:
+    case BuiltinType::OCLQueue:
+    case BuiltinType::OCLNDRange:
+    case BuiltinType::OCLReserveID:
+    case BuiltinType::Dependent:
+      llvm_unreachable("CallExpr::isBuiltinClassifyType(): unimplemented type");
+    };
+
+  case Type::Enum:
+    return LangOpts.CPlusPlus ? enumeral_type_class : integer_type_class;
+    break;
+
+  case Type::Pointer:
     return pointer_type_class;
-  else if (ArgTy->isReferenceType())
-    return reference_type_class;
-  else if (ArgTy->isRealType())
-    return real_type_class;
-  else if (ArgTy->isComplexType())
+    break;
+
+  case Type::MemberPointer:
+    if (CanTy->isMemberDataPointerType())
+      return offset_type_class;
+    else {
+      // We expect member pointers to be either data or function pointers,
+      // nothing else.
+      assert(CanTy->isMemberFunctionPointerType());
+      return method_type_class;
+    }
+
+  case Type::Complex:
     return complex_type_class;
-  else if (ArgTy->isFunctionType())
-    return function_type_class;
-  else if (ArgTy->isStructureOrClassType())
-    return record_type_class;
-  else if (ArgTy->isUnionType())
-    return union_type_class;
-  else if (ArgTy->isArrayType())
-    return array_type_class;
-  else if (ArgTy->isUnionType())
-    return union_type_class;
-  else  // FIXME: offset_type_class, method_type_class, & lang_type_class?
+
+  case Type::FunctionNoProto:
+  case Type::FunctionProto:
+    return LangOpts.CPlusPlus ? function_type_class : pointer_type_class;
+
+  case Type::Record:
+    if (const RecordType *RT = CanTy->getAs<RecordType>()) {
+      switch (RT->getDecl()->getTagKind()) {
+      case TagTypeKind::TTK_Struct:
+      case TagTypeKind::TTK_Class:
+      case TagTypeKind::TTK_Interface:
+        return record_type_class;
+
+      case TagTypeKind::TTK_Enum:
+        return LangOpts.CPlusPlus ? enumeral_type_class : integer_type_class;
+
+      case TagTypeKind::TTK_Union:
+        return union_type_class;
+      }
+    }
     llvm_unreachable("CallExpr::isBuiltinClassifyType(): unimplemented type");
+
+  case Type::ConstantArray:
+  case Type::VariableArray:
+  case Type::IncompleteArray:
+    return LangOpts.CPlusPlus ? array_type_class : pointer_type_class;
+
+  case Type::BlockPointer:
+  case Type::LValueReference:
+  case Type::RValueReference:
+  case Type::Vector:
+  case Type::ExtVector:
+  case Type::Auto:
+  case Type::ObjCObject:
+  case Type::ObjCInterface:
+  case Type::ObjCObjectPointer:
+  case Type::Pipe:
+  case Type::Atomic:
+    llvm_unreachable("CallExpr::isBuiltinClassifyType(): unimplemented type");
+  }
+
+  llvm_unreachable("CallExpr::isBuiltinClassifyType(): unimplemented type");
 }
 
 /// EvaluateBuiltinConstantPForLValue - Determine the result of
@@ -6346,25 +6601,32 @@ static const Expr *ignorePointerCastsAndParens(const Expr *E) {
 ///
 /// Please note: this function is specialized for how __builtin_object_size
 /// views "objects".
+///
+/// If this encounters an invalid RecordDecl, it will always return true.
 static bool isDesignatorAtObjectEnd(const ASTContext &Ctx, const LValue &LVal) {
   assert(!LVal.Designator.Invalid);
 
-  auto IsLastFieldDecl = [&Ctx](const FieldDecl *FD) {
-    if (FD->getParent()->isUnion())
+  auto IsLastOrInvalidFieldDecl = [&Ctx](const FieldDecl *FD, bool &Invalid) {
+    const RecordDecl *Parent = FD->getParent();
+    Invalid = Parent->isInvalidDecl();
+    if (Invalid || Parent->isUnion())
       return true;
-    const ASTRecordLayout &Layout = Ctx.getASTRecordLayout(FD->getParent());
+    const ASTRecordLayout &Layout = Ctx.getASTRecordLayout(Parent);
     return FD->getFieldIndex() + 1 == Layout.getFieldCount();
   };
 
   auto &Base = LVal.getLValueBase();
   if (auto *ME = dyn_cast_or_null<MemberExpr>(Base.dyn_cast<const Expr *>())) {
     if (auto *FD = dyn_cast<FieldDecl>(ME->getMemberDecl())) {
-      if (!IsLastFieldDecl(FD))
-        return false;
+      bool Invalid;
+      if (!IsLastOrInvalidFieldDecl(FD, Invalid))
+        return Invalid;
     } else if (auto *IFD = dyn_cast<IndirectFieldDecl>(ME->getMemberDecl())) {
-      for (auto *FD : IFD->chain())
-        if (!IsLastFieldDecl(cast<FieldDecl>(FD)))
-          return false;
+      for (auto *FD : IFD->chain()) {
+        bool Invalid;
+        if (!IsLastOrInvalidFieldDecl(cast<FieldDecl>(FD), Invalid))
+          return Invalid;
+      }
     }
   }
 
@@ -6387,8 +6649,9 @@ static bool isDesignatorAtObjectEnd(const ASTContext &Ctx, const LValue &LVal) {
         return false;
       BaseType = CT->getElementType();
     } else if (auto *FD = getAsField(LVal.Designator.Entries[I])) {
-      if (!IsLastFieldDecl(FD))
-        return false;
+      bool Invalid;
+      if (!IsLastOrInvalidFieldDecl(FD, Invalid))
+        return Invalid;
       BaseType = FD->getType();
     } else {
       assert(getAsBaseClass(LVal.Designator.Entries[I]) != nullptr &&
@@ -6607,7 +6870,7 @@ bool IntExprEvaluator::VisitCallExpr(const CallExpr *E) {
   }
 
   case Builtin::BI__builtin_classify_type:
-    return Success(EvaluateBuiltinClassifyType(E), E);
+    return Success(EvaluateBuiltinClassifyType(E, Info.getLangOpts()), E);
 
   // FIXME: BI__builtin_clrsb
   // FIXME: BI__builtin_clrsbl
@@ -6899,23 +7162,14 @@ class DataRecursiveIntBinOpEvaluator {
     Job() = default;
     Job(Job &&J)
         : E(J.E), LHSResult(J.LHSResult), Kind(J.Kind),
-          StoredInfo(J.StoredInfo), OldEvalStatus(J.OldEvalStatus) {
-      J.StoredInfo = nullptr;
-    }
+          SpecEvalRAII(std::move(J.SpecEvalRAII)) {}
 
     void startSpeculativeEval(EvalInfo &Info) {
-      OldEvalStatus = Info.EvalStatus;
-      Info.EvalStatus.Diag = nullptr;
-      StoredInfo = &Info;
-    }
-    ~Job() {
-      if (StoredInfo) {
-        StoredInfo->EvalStatus = OldEvalStatus;
-      }
+      SpecEvalRAII = SpeculativeEvaluationRAII(Info);
     }
+
   private:
-    EvalInfo *StoredInfo = nullptr; // non-null if status changed.
-    Expr::EvalStatus OldEvalStatus;
+    SpeculativeEvaluationRAII SpecEvalRAII;
   };
 
   SmallVector<Job, 16> Queue;
@@ -6935,7 +7189,9 @@ public:
   static bool shouldEnqueue(const BinaryOperator *E) {
     return E->getOpcode() == BO_Comma ||
            E->isLogicalOp() ||
-           (E->getLHS()->getType()->isIntegralOrEnumerationType() &&
+           (E->isRValue() &&
+            E->getType()->isIntegralOrEnumerationType() &&
+            E->getLHS()->getType()->isIntegralOrEnumerationType() &&
             E->getRHS()->getType()->isIntegralOrEnumerationType());
   }
 
@@ -7017,7 +7273,7 @@ bool DataRecursiveIntBinOpEvaluator::
       LHSResult.Failed = true;
 
       // Since we weren't able to evaluate the left hand side, it
-      // must have had side effects.
+      // might have had side effects.
       if (!Info.noteSideEffect())
         return false;
 
@@ -7033,7 +7289,7 @@ bool DataRecursiveIntBinOpEvaluator::
   assert(E->getLHS()->getType()->isIntegralOrEnumerationType() &&
          E->getRHS()->getType()->isIntegralOrEnumerationType());
 
-  if (LHSResult.Failed && !Info.keepEvaluatingAfterFailure())
+  if (LHSResult.Failed && !Info.noteFailure())
     return false; // Ignore RHS;
 
   return true;
@@ -7185,10 +7441,34 @@ void DataRecursiveIntBinOpEvaluator::process(EvalResult &Result) {
   llvm_unreachable("Invalid Job::Kind!");
 }
 
+namespace {
+/// Used when we determine that we should fail, but can keep evaluating prior to
+/// noting that we had a failure.
+class DelayedNoteFailureRAII {
+  EvalInfo &Info;
+  bool NoteFailure;
+
+public:
+  DelayedNoteFailureRAII(EvalInfo &Info, bool NoteFailure = true)
+      : Info(Info), NoteFailure(NoteFailure) {}
+  ~DelayedNoteFailureRAII() {
+    if (NoteFailure) {
+      bool ContinueAfterFailure = Info.noteFailure();
+      (void)ContinueAfterFailure;
+      assert(ContinueAfterFailure &&
+             "Shouldn't have kept evaluating on failure.");
+    }
+  }
+};
+}
+
 bool IntExprEvaluator::VisitBinaryOperator(const BinaryOperator *E) {
+  // We don't call noteFailure immediately because the assignment happens after
+  // we evaluate LHS and RHS.
   if (!Info.keepEvaluatingAfterFailure() && E->isAssignmentOp())
     return Error(E);
 
+  DelayedNoteFailureRAII MaybeNoteFailureLater(Info, E->isAssignmentOp());
   if (DataRecursiveIntBinOpEvaluator::shouldEnqueue(E))
     return DataRecursiveIntBinOpEvaluator(*this, Result).Traverse(E);
 
@@ -7211,7 +7491,7 @@ bool IntExprEvaluator::VisitBinaryOperator(const BinaryOperator *E) {
     } else {
       LHSOK = EvaluateComplex(E->getLHS(), LHS, Info);
     }
-    if (!LHSOK && !Info.keepEvaluatingAfterFailure())
+    if (!LHSOK && !Info.noteFailure())
       return false;
 
     if (E->getRHS()->getType()->isRealFloatingType()) {
@@ -7259,7 +7539,7 @@ bool IntExprEvaluator::VisitBinaryOperator(const BinaryOperator *E) {
     APFloat RHS(0.0), LHS(0.0);
 
     bool LHSOK = EvaluateFloat(E->getRHS(), RHS, Info);
-    if (!LHSOK && !Info.keepEvaluatingAfterFailure())
+    if (!LHSOK && !Info.noteFailure())
       return false;
 
     if (!EvaluateFloat(E->getLHS(), LHS, Info) || !LHSOK)
@@ -7293,7 +7573,7 @@ bool IntExprEvaluator::VisitBinaryOperator(const BinaryOperator *E) {
       LValue LHSValue, RHSValue;
 
       bool LHSOK = EvaluatePointer(E->getLHS(), LHSValue, Info);
-      if (!LHSOK && !Info.keepEvaluatingAfterFailure())
+      if (!LHSOK && !Info.noteFailure())
         return false;
 
       if (!EvaluatePointer(E->getRHS(), RHSValue, Info) || !LHSOK)
@@ -7387,7 +7667,7 @@ bool IntExprEvaluator::VisitBinaryOperator(const BinaryOperator *E) {
         // C, array of zero length). Pointer subtraction in such cases has
         // undefined behavior, so is not constant.
         if (ElementSize.isZero()) {
-          Info.Diag(E, diag::note_constexpr_pointer_subtraction_zero_size)
+          Info.FFDiag(E, diag::note_constexpr_pointer_subtraction_zero_size)
             << ElementType;
           return false;
         }
@@ -7510,7 +7790,7 @@ bool IntExprEvaluator::VisitBinaryOperator(const BinaryOperator *E) {
     MemberPtr LHSValue, RHSValue;
 
     bool LHSOK = EvaluateMemberPointer(E->getLHS(), LHSValue, Info);
-    if (!LHSOK && Info.keepEvaluatingAfterFailure())
+    if (!LHSOK && !Info.noteFailure())
       return false;
 
     if (!EvaluateMemberPointer(E->getRHS(), RHSValue, Info) || !LHSOK)
@@ -8082,7 +8362,7 @@ bool FloatExprEvaluator::VisitBinaryOperator(const BinaryOperator *E) {
 
   APFloat RHS(0.0);
   bool LHSOK = EvaluateFloat(E->getLHS(), Result, Info);
-  if (!LHSOK && !Info.keepEvaluatingAfterFailure())
+  if (!LHSOK && !Info.noteFailure())
     return false;
   return EvaluateFloat(E->getRHS(), RHS, Info) && LHSOK &&
          handleFloatFloatBinOp(Info, E, Result, E->getOpcode(), RHS);
@@ -8359,7 +8639,7 @@ bool ComplexExprEvaluator::VisitBinaryOperator(const BinaryOperator *E) {
   } else {
     LHSOK = Visit(E->getLHS());
   }
-  if (!LHSOK && !Info.keepEvaluatingAfterFailure())
+  if (!LHSOK && !Info.noteFailure())
     return false;
 
   ComplexValue RHS;
@@ -8513,12 +8793,14 @@ bool ComplexExprEvaluator::VisitBinaryOperator(const BinaryOperator *E) {
         APFloat MaxCD = maxnum(abs(C), abs(D));
         if (MaxCD.isFinite()) {
           DenomLogB = ilogb(MaxCD);
-          C = scalbn(C, -DenomLogB);
-          D = scalbn(D, -DenomLogB);
+          C = scalbn(C, -DenomLogB, APFloat::rmNearestTiesToEven);
+          D = scalbn(D, -DenomLogB, APFloat::rmNearestTiesToEven);
         }
         APFloat Denom = C * C + D * D;
-        ResR = scalbn((A * C + B * D) / Denom, -DenomLogB);
-        ResI = scalbn((B * C - A * D) / Denom, -DenomLogB);
+        ResR = scalbn((A * C + B * D) / Denom, -DenomLogB,
+                      APFloat::rmNearestTiesToEven);
+        ResI = scalbn((B * C - A * D) / Denom, -DenomLogB,
+                      APFloat::rmNearestTiesToEven);
         if (ResR.isNaN() && ResI.isNaN()) {
           if (Denom.isPosZero() && (!A.isNaN() || !B.isNaN())) {
             ResR = APFloat::getInf(ResR.getSemantics(), C.isNegative()) * A;
@@ -8758,10 +9040,10 @@ static bool Evaluate(APValue &Result, EvalInfo &Info, const Expr *E) {
     if (!EvaluateAtomic(E, Result, Info))
       return false;
   } else if (Info.getLangOpts().CPlusPlus11) {
-    Info.Diag(E, diag::note_constexpr_nonliteral) << E->getType();
+    Info.FFDiag(E, diag::note_constexpr_nonliteral) << E->getType();
     return false;
   } else {
-    Info.Diag(E, diag::note_invalid_subexpr_in_const_expr);
+    Info.FFDiag(E, diag::note_invalid_subexpr_in_const_expr);
     return false;
   }
 
@@ -8885,6 +9167,20 @@ bool Expr::EvaluateAsInt(APSInt &Result, const ASTContext &Ctx,
   return true;
 }
 
+bool Expr::EvaluateAsFloat(APFloat &Result, const ASTContext &Ctx,
+                           SideEffectsKind AllowSideEffects) const {
+  if (!getType()->isRealFloatingType())
+    return false;
+
+  EvalResult ExprResult;
+  if (!EvaluateAsRValue(ExprResult, Ctx) || !ExprResult.Val.isFloat() ||
+      hasUnacceptableSideEffect(ExprResult, AllowSideEffects))
+    return false;
+
+  Result = ExprResult.Val.getFloat();
+  return true;
+}
+
 bool Expr::EvaluateAsLValue(EvalResult &Result, const ASTContext &Ctx) const {
   EvalInfo Info(Ctx, Result, EvalInfo::EM_ConstantFold);
 
@@ -9070,6 +9366,7 @@ static ICEDiag CheckICE(const Expr* E, const ASTContext &Ctx) {
   case Expr::TypoExprClass:
   case Expr::DependentScopeDeclRefExprClass:
   case Expr::CXXConstructExprClass:
+  case Expr::CXXInheritedCtorInitExprClass:
   case Expr::CXXStdInitializerListExprClass:
   case Expr::CXXBindTemporaryExprClass:
   case Expr::ExprWithCleanupsClass:
@@ -9089,6 +9386,7 @@ static ICEDiag CheckICE(const Expr* E, const ASTContext &Ctx) {
   case Expr::ObjCPropertyRefExprClass:
   case Expr::ObjCSubscriptRefExprClass:
   case Expr::ObjCIsaExprClass:
+  case Expr::ObjCAvailabilityCheckExprClass:
   case Expr::ShuffleVectorExprClass:
   case Expr::ConvertVectorExprClass:
   case Expr::BlockExprClass:
@@ -9533,17 +9831,17 @@ bool Expr::isPotentialConstantExpr(const FunctionDecl *FD,
 
   ArrayRef<const Expr*> Args;
 
-  SourceLocation Loc = FD->getLocation();
-
   APValue Scratch;
   if (const CXXConstructorDecl *CD = dyn_cast<CXXConstructorDecl>(FD)) {
     // Evaluate the call as a constant initializer, to allow the construction
     // of objects of non-literal types.
     Info.setEvaluatingDecl(This.getLValueBase(), Scratch);
-    HandleConstructorCall(Loc, This, Args, CD, Info, Scratch);
-  } else
+    HandleConstructorCall(&VIE, This, Args, CD, Info, Scratch);
+  } else {
+    SourceLocation Loc = FD->getLocation();
     HandleFunctionCall(Loc, FD, (MD && MD->isInstance()) ? &This : nullptr,
                        Args, FD->getBody(), Info, Scratch, nullptr);
+  }
 
   return Diags.empty();
 }
diff --git a/contrib/llvm/tools/clang/lib/AST/ItaniumMangle.cpp b/contrib/llvm/tools/clang/lib/AST/ItaniumMangle.cpp
index 3f6b682f238f..694fde317542 100644
--- a/contrib/llvm/tools/clang/lib/AST/ItaniumMangle.cpp
+++ b/contrib/llvm/tools/clang/lib/AST/ItaniumMangle.cpp
@@ -20,6 +20,7 @@
 #include "clang/AST/Decl.h"
 #include "clang/AST/DeclCXX.h"
 #include "clang/AST/DeclObjC.h"
+#include "clang/AST/DeclOpenMP.h"
 #include "clang/AST/DeclTemplate.h"
 #include "clang/AST/Expr.h"
 #include "clang/AST/ExprCXX.h"
@@ -66,8 +67,9 @@ static const DeclContext *getEffectiveDeclContext(const Decl *D) {
   }
   
   const DeclContext *DC = D->getDeclContext();
-  if (const CapturedDecl *CD = dyn_cast<CapturedDecl>(DC))
-    return getEffectiveDeclContext(CD);
+  if (isa<CapturedDecl>(DC) || isa<OMPDeclareReductionDecl>(DC)) {
+    return getEffectiveDeclContext(cast<Decl>(DC));
+  }
 
   if (const auto *VD = dyn_cast<VarDecl>(D))
     if (VD->isExternC())
@@ -77,7 +79,7 @@ static const DeclContext *getEffectiveDeclContext(const Decl *D) {
     if (FD->isExternC())
       return FD->getASTContext().getTranslationUnitDecl();
 
-  return DC;
+  return DC->getRedeclContext();
 }
 
 static const DeclContext *getEffectiveParentContext(const DeclContext *DC) {
@@ -212,6 +214,12 @@ public:
 class CXXNameMangler {
   ItaniumMangleContextImpl &Context;
   raw_ostream &Out;
+  bool NullOut = false;
+  /// In the "DisableDerivedAbiTags" mode derived ABI tags are not calculated.
+  /// This mode is used when mangler creates another mangler recursively to
+  /// calculate ABI tags for the function return value or the variable type.
+  /// Also it is required to avoid infinite recursion in some cases.
+  bool DisableDerivedAbiTags = false;
 
   /// The "structor" is the top-level declaration being mangled, if
   /// that's not a template specialization; otherwise it's the pattern
@@ -261,15 +269,126 @@ class CXXNameMangler {
 
   } FunctionTypeDepth;
 
+  // abi_tag is a gcc attribute, taking one or more strings called "tags".
+  // The goal is to annotate against which version of a library an object was
+  // built and to be able to provide backwards compatibility ("dual abi").
+  // For more information see docs/ItaniumMangleAbiTags.rst.
+  typedef SmallVector<StringRef, 4> AbiTagList;
+
+  // State to gather all implicit and explicit tags used in a mangled name.
+  // Must always have an instance of this while emitting any name to keep
+  // track.
+  class AbiTagState final {
+  public:
+    explicit AbiTagState(AbiTagState *&Head) : LinkHead(Head) {
+      Parent = LinkHead;
+      LinkHead = this;
+    }
+
+    // No copy, no move.
+    AbiTagState(const AbiTagState &) = delete;
+    AbiTagState &operator=(const AbiTagState &) = delete;
+
+    ~AbiTagState() { pop(); }
+
+    void write(raw_ostream &Out, const NamedDecl *ND,
+               const AbiTagList *AdditionalAbiTags) {
+      ND = cast<NamedDecl>(ND->getCanonicalDecl());
+      if (!isa<FunctionDecl>(ND) && !isa<VarDecl>(ND)) {
+        assert(
+            !AdditionalAbiTags &&
+            "only function and variables need a list of additional abi tags");
+        if (const auto *NS = dyn_cast<NamespaceDecl>(ND)) {
+          if (const auto *AbiTag = NS->getAttr<AbiTagAttr>()) {
+            UsedAbiTags.insert(UsedAbiTags.end(), AbiTag->tags().begin(),
+                               AbiTag->tags().end());
+          }
+          // Don't emit abi tags for namespaces.
+          return;
+        }
+      }
+
+      AbiTagList TagList;
+      if (const auto *AbiTag = ND->getAttr<AbiTagAttr>()) {
+        UsedAbiTags.insert(UsedAbiTags.end(), AbiTag->tags().begin(),
+                           AbiTag->tags().end());
+        TagList.insert(TagList.end(), AbiTag->tags().begin(),
+                       AbiTag->tags().end());
+      }
+
+      if (AdditionalAbiTags) {
+        UsedAbiTags.insert(UsedAbiTags.end(), AdditionalAbiTags->begin(),
+                           AdditionalAbiTags->end());
+        TagList.insert(TagList.end(), AdditionalAbiTags->begin(),
+                       AdditionalAbiTags->end());
+      }
+
+      std::sort(TagList.begin(), TagList.end());
+      TagList.erase(std::unique(TagList.begin(), TagList.end()), TagList.end());
+
+      writeSortedUniqueAbiTags(Out, TagList);
+    }
+
+    const AbiTagList &getUsedAbiTags() const { return UsedAbiTags; }
+    void setUsedAbiTags(const AbiTagList &AbiTags) {
+      UsedAbiTags = AbiTags;
+    }
+
+    const AbiTagList &getEmittedAbiTags() const {
+      return EmittedAbiTags;
+    }
+
+    const AbiTagList &getSortedUniqueUsedAbiTags() {
+      std::sort(UsedAbiTags.begin(), UsedAbiTags.end());
+      UsedAbiTags.erase(std::unique(UsedAbiTags.begin(), UsedAbiTags.end()),
+                        UsedAbiTags.end());
+      return UsedAbiTags;
+    }
+
+  private:
+    //! All abi tags used implicitly or explicitly.
+    AbiTagList UsedAbiTags;
+    //! All explicit abi tags (i.e. not from namespace).
+    AbiTagList EmittedAbiTags;
+
+    AbiTagState *&LinkHead;
+    AbiTagState *Parent = nullptr;
+
+    void pop() {
+      assert(LinkHead == this &&
+             "abi tag link head must point to us on destruction");
+      if (Parent) {
+        Parent->UsedAbiTags.insert(Parent->UsedAbiTags.end(),
+                                   UsedAbiTags.begin(), UsedAbiTags.end());
+        Parent->EmittedAbiTags.insert(Parent->EmittedAbiTags.end(),
+                                      EmittedAbiTags.begin(),
+                                      EmittedAbiTags.end());
+      }
+      LinkHead = Parent;
+    }
+
+    void writeSortedUniqueAbiTags(raw_ostream &Out, const AbiTagList &AbiTags) {
+      for (const auto &Tag : AbiTags) {
+        EmittedAbiTags.push_back(Tag);
+        Out << "B";
+        Out << Tag.size();
+        Out << Tag;
+      }
+    }
+  };
+
+  AbiTagState *AbiTags = nullptr;
+  AbiTagState AbiTagsRoot;
+
   llvm::DenseMap<uintptr_t, unsigned> Substitutions;
 
   ASTContext &getASTContext() const { return Context.getASTContext(); }
 
 public:
   CXXNameMangler(ItaniumMangleContextImpl &C, raw_ostream &Out_,
-                 const NamedDecl *D = nullptr)
-    : Context(C), Out(Out_), Structor(getStructor(D)), StructorType(0),
-      SeqID(0) {
+                 const NamedDecl *D = nullptr, bool NullOut_ = false)
+    : Context(C), Out(Out_), NullOut(NullOut_),  Structor(getStructor(D)),
+      StructorType(0), SeqID(0), AbiTagsRoot(AbiTags) {
     // These can't be mangled without a ctor type or dtor type.
     assert(!D || (!isa<CXXDestructorDecl>(D) &&
                   !isa<CXXConstructorDecl>(D)));
@@ -277,11 +396,21 @@ public:
   CXXNameMangler(ItaniumMangleContextImpl &C, raw_ostream &Out_,
                  const CXXConstructorDecl *D, CXXCtorType Type)
     : Context(C), Out(Out_), Structor(getStructor(D)), StructorType(Type),
-      SeqID(0) { }
+      SeqID(0), AbiTagsRoot(AbiTags) { }
   CXXNameMangler(ItaniumMangleContextImpl &C, raw_ostream &Out_,
                  const CXXDestructorDecl *D, CXXDtorType Type)
     : Context(C), Out(Out_), Structor(getStructor(D)), StructorType(Type),
-      SeqID(0) { }
+      SeqID(0), AbiTagsRoot(AbiTags) { }
+
+  CXXNameMangler(CXXNameMangler &Outer, raw_ostream &Out_)
+      : Context(Outer.Context), Out(Out_), NullOut(false),
+        Structor(Outer.Structor), StructorType(Outer.StructorType),
+        SeqID(Outer.SeqID), AbiTagsRoot(AbiTags) {}
+
+  CXXNameMangler(CXXNameMangler &Outer, llvm::raw_null_ostream &Out_)
+      : Context(Outer.Context), Out(Out_), NullOut(true),
+        Structor(Outer.Structor), StructorType(Outer.StructorType),
+        SeqID(Outer.SeqID), AbiTagsRoot(AbiTags) {}
 
 #if MANGLE_CHECKER
   ~CXXNameMangler() {
@@ -296,6 +425,9 @@ public:
 #endif
   raw_ostream &getStream() { return Out; }
 
+  void disableDerivedAbiTags() { DisableDerivedAbiTags = true; }
+  static bool shouldHaveAbiTags(ItaniumMangleContextImpl &C, const VarDecl *VD);
+
   void mangle(const NamedDecl *D);
   void mangleCallOffset(int64_t NonVirtual, int64_t Virtual);
   void mangleNumber(const llvm::APSInt &I);
@@ -314,7 +446,6 @@ private:
   bool mangleSubstitution(TemplateName Template);
   bool mangleSubstitution(uintptr_t Ptr);
 
-  void mangleExistingSubstitution(QualType type);
   void mangleExistingSubstitution(TemplateName name);
 
   bool mangleStandardSubstitution(const NamedDecl *ND);
@@ -334,23 +465,37 @@ private:
                             DeclarationName name,
                             unsigned KnownArity = UnknownArity);
 
-  void mangleName(const TemplateDecl *TD,
-                  const TemplateArgument *TemplateArgs,
-                  unsigned NumTemplateArgs);
-  void mangleUnqualifiedName(const NamedDecl *ND) {
-    mangleUnqualifiedName(ND, ND->getDeclName(), UnknownArity);
+  void mangleFunctionEncodingBareType(const FunctionDecl *FD);
+
+  void mangleNameWithAbiTags(const NamedDecl *ND,
+                             const AbiTagList *AdditionalAbiTags);
+  void mangleTemplateName(const TemplateDecl *TD,
+                          const TemplateArgument *TemplateArgs,
+                          unsigned NumTemplateArgs);
+  void mangleUnqualifiedName(const NamedDecl *ND,
+                             const AbiTagList *AdditionalAbiTags) {
+    mangleUnqualifiedName(ND, ND->getDeclName(), UnknownArity,
+                          AdditionalAbiTags);
   }
   void mangleUnqualifiedName(const NamedDecl *ND, DeclarationName Name,
-                             unsigned KnownArity);
-  void mangleUnscopedName(const NamedDecl *ND);
-  void mangleUnscopedTemplateName(const TemplateDecl *ND);
-  void mangleUnscopedTemplateName(TemplateName);
+                             unsigned KnownArity,
+                             const AbiTagList *AdditionalAbiTags);
+  void mangleUnscopedName(const NamedDecl *ND,
+                          const AbiTagList *AdditionalAbiTags);
+  void mangleUnscopedTemplateName(const TemplateDecl *ND,
+                                  const AbiTagList *AdditionalAbiTags);
+  void mangleUnscopedTemplateName(TemplateName,
+                                  const AbiTagList *AdditionalAbiTags);
   void mangleSourceName(const IdentifierInfo *II);
-  void mangleLocalName(const Decl *D);
+  void mangleSourceNameWithAbiTags(
+      const NamedDecl *ND, const AbiTagList *AdditionalAbiTags = nullptr);
+  void mangleLocalName(const Decl *D,
+                       const AbiTagList *AdditionalAbiTags);
   void mangleBlockForPrefix(const BlockDecl *Block);
   void mangleUnqualifiedBlock(const BlockDecl *Block);
   void mangleLambda(const CXXRecordDecl *Lambda);
   void mangleNestedName(const NamedDecl *ND, const DeclContext *DC,
+                        const AbiTagList *AdditionalAbiTags,
                         bool NoFunction=false);
   void mangleNestedName(const TemplateDecl *TD,
                         const TemplateArgument *TemplateArgs,
@@ -364,6 +509,7 @@ private:
                                       StringRef Prefix = "");
   void mangleOperatorName(DeclarationName Name, unsigned Arity);
   void mangleOperatorName(OverloadedOperatorKind OO, unsigned Arity);
+  void mangleVendorQualifier(StringRef qualifier);
   void mangleQualifiers(Qualifiers Quals);
   void mangleRefQualifier(RefQualifierKind RefQualifier);
 
@@ -377,7 +523,10 @@ private:
 
   void mangleType(const TagType*);
   void mangleType(TemplateName);
-  void mangleBareFunctionType(const FunctionType *T, bool MangleReturnType,
+  static StringRef getCallingConvQualifierName(CallingConv CC);
+  void mangleExtParameterInfo(FunctionProtoType::ExtParameterInfo info);
+  void mangleExtFunctionInfo(const FunctionType *T);
+  void mangleBareFunctionType(const FunctionProtoType *T, bool MangleReturnType,
                               const FunctionDecl *FD = nullptr);
   void mangleNeonVectorType(const VectorType *T);
   void mangleAArch64NeonVectorType(const VectorType *T);
@@ -392,7 +541,7 @@ private:
   void mangleCastExpression(const Expr *E, StringRef CastEncoding);
   void mangleInitListElements(const InitListExpr *InitList);
   void mangleExpression(const Expr *E, unsigned Arity = UnknownArity);
-  void mangleCXXCtorType(CXXCtorType T);
+  void mangleCXXCtorType(CXXCtorType T, const CXXRecordDecl *InheritedFrom);
   void mangleCXXDtorType(CXXDtorType T);
 
   void mangleTemplateArgs(const TemplateArgumentLoc *TemplateArgs,
@@ -405,6 +554,14 @@ private:
   void mangleTemplateParameter(unsigned Index);
 
   void mangleFunctionParam(const ParmVarDecl *parm);
+
+  void writeAbiTags(const NamedDecl *ND,
+                    const AbiTagList *AdditionalAbiTags);
+
+  // Returns sorted unique list of ABI tags.
+  AbiTagList makeFunctionReturnTypeTags(const FunctionDecl *FD);
+  // Returns sorted unique list of ABI tags.
+  AbiTagList makeVariableTypeTags(const VarDecl *VD);
 };
 
 }
@@ -448,6 +605,7 @@ bool ItaniumMangleContextImpl::shouldMangleCXXName(const NamedDecl *D) {
       while (!DC->isNamespace() && !DC->isTranslationUnit())
         DC = getEffectiveParentContext(DC);
     if (DC->isTranslationUnit() && D->getFormalLinkage() != InternalLinkage &&
+        !CXXNameMangler::shouldHaveAbiTags(*this, VD) &&
         !isa<VarTemplateSpecializationDecl>(D))
       return false;
   }
@@ -455,6 +613,18 @@ bool ItaniumMangleContextImpl::shouldMangleCXXName(const NamedDecl *D) {
   return true;
 }
 
+void CXXNameMangler::writeAbiTags(const NamedDecl *ND,
+                                  const AbiTagList *AdditionalAbiTags) {
+  assert(AbiTags && "require AbiTagState");
+  AbiTags->write(Out, ND, DisableDerivedAbiTags ? nullptr : AdditionalAbiTags);
+}
+
+void CXXNameMangler::mangleSourceNameWithAbiTags(
+    const NamedDecl *ND, const AbiTagList *AdditionalAbiTags) {
+  mangleSourceName(ND->getIdentifier());
+  writeAbiTags(ND, AdditionalAbiTags);
+}
+
 void CXXNameMangler::mangle(const NamedDecl *D) {
   // <mangled-name> ::= _Z <encoding>
   //            ::= <data name>
@@ -472,12 +642,52 @@ void CXXNameMangler::mangle(const NamedDecl *D) {
 
 void CXXNameMangler::mangleFunctionEncoding(const FunctionDecl *FD) {
   // <encoding> ::= <function name> <bare-function-type>
-  mangleName(FD);
 
   // Don't mangle in the type if this isn't a decl we should typically mangle.
-  if (!Context.shouldMangleDeclName(FD))
+  if (!Context.shouldMangleDeclName(FD)) {
+    mangleName(FD);
+    return;
+  }
+
+  AbiTagList ReturnTypeAbiTags = makeFunctionReturnTypeTags(FD);
+  if (ReturnTypeAbiTags.empty()) {
+    // There are no tags for return type, the simplest case.
+    mangleName(FD);
+    mangleFunctionEncodingBareType(FD);
     return;
+  }
 
+  // Mangle function name and encoding to temporary buffer.
+  // We have to output name and encoding to the same mangler to get the same
+  // substitution as it will be in final mangling.
+  SmallString<256> FunctionEncodingBuf;
+  llvm::raw_svector_ostream FunctionEncodingStream(FunctionEncodingBuf);
+  CXXNameMangler FunctionEncodingMangler(*this, FunctionEncodingStream);
+  // Output name of the function.
+  FunctionEncodingMangler.disableDerivedAbiTags();
+  FunctionEncodingMangler.mangleNameWithAbiTags(FD, nullptr);
+
+  // Remember length of the function name in the buffer.
+  size_t EncodingPositionStart = FunctionEncodingStream.str().size();
+  FunctionEncodingMangler.mangleFunctionEncodingBareType(FD);
+
+  // Get tags from return type that are not present in function name or
+  // encoding.
+  const AbiTagList &UsedAbiTags =
+      FunctionEncodingMangler.AbiTagsRoot.getSortedUniqueUsedAbiTags();
+  AbiTagList AdditionalAbiTags(ReturnTypeAbiTags.size());
+  AdditionalAbiTags.erase(
+      std::set_difference(ReturnTypeAbiTags.begin(), ReturnTypeAbiTags.end(),
+                          UsedAbiTags.begin(), UsedAbiTags.end(),
+                          AdditionalAbiTags.begin()),
+      AdditionalAbiTags.end());
+
+  // Output name with implicit tags and function encoding from temporary buffer.
+  mangleNameWithAbiTags(FD, &AdditionalAbiTags);
+  Out << FunctionEncodingStream.str().substr(EncodingPositionStart);
+}
+
+void CXXNameMangler::mangleFunctionEncodingBareType(const FunctionDecl *FD) {
   if (FD->hasAttr<EnableIfAttr>()) {
     FunctionTypeDepthState Saved = FunctionTypeDepth.push();
     Out << "Ua9enable_ifI";
@@ -497,6 +707,12 @@ void CXXNameMangler::mangleFunctionEncoding(const FunctionDecl *FD) {
     FunctionTypeDepth.pop(Saved);
   }
 
+  // When mangling an inheriting constructor, the bare function type used is
+  // that of the inherited constructor.
+  if (auto *CD = dyn_cast<CXXConstructorDecl>(FD))
+    if (auto Inherited = CD->getInheritedConstructor())
+      FD = Inherited.getConstructor();
+
   // Whether the mangling of a function type includes the return type depends on
   // the context and the nature of the function. The rules for deciding whether
   // the return type is included are:
@@ -523,7 +739,7 @@ void CXXNameMangler::mangleFunctionEncoding(const FunctionDecl *FD) {
     FD = PrimaryTemplate->getTemplatedDecl();
   }
 
-  mangleBareFunctionType(FD->getType()->getAs<FunctionType>(), 
+  mangleBareFunctionType(FD->getType()->castAs<FunctionProtoType>(),
                          MangleReturnType, FD);
 }
 
@@ -557,7 +773,7 @@ static bool isStdNamespace(const DeclContext *DC) {
 static const TemplateDecl *
 isTemplate(const NamedDecl *ND, const TemplateArgumentList *&TemplateArgs) {
   // Check if we have a function template.
-  if (const FunctionDecl *FD = dyn_cast<FunctionDecl>(ND)){
+  if (const FunctionDecl *FD = dyn_cast<FunctionDecl>(ND)) {
     if (const TemplateDecl *TD = FD->getPrimaryTemplate()) {
       TemplateArgs = FD->getTemplateSpecializationArgs();
       return TD;
@@ -582,6 +798,40 @@ isTemplate(const NamedDecl *ND, const TemplateArgumentList *&TemplateArgs) {
 }
 
 void CXXNameMangler::mangleName(const NamedDecl *ND) {
+  if (const VarDecl *VD = dyn_cast<VarDecl>(ND)) {
+    // Variables should have implicit tags from its type.
+    AbiTagList VariableTypeAbiTags = makeVariableTypeTags(VD);
+    if (VariableTypeAbiTags.empty()) {
+      // Simple case no variable type tags.
+      mangleNameWithAbiTags(VD, nullptr);
+      return;
+    }
+
+    // Mangle variable name to null stream to collect tags.
+    llvm::raw_null_ostream NullOutStream;
+    CXXNameMangler VariableNameMangler(*this, NullOutStream);
+    VariableNameMangler.disableDerivedAbiTags();
+    VariableNameMangler.mangleNameWithAbiTags(VD, nullptr);
+
+    // Get tags from variable type that are not present in its name.
+    const AbiTagList &UsedAbiTags =
+        VariableNameMangler.AbiTagsRoot.getSortedUniqueUsedAbiTags();
+    AbiTagList AdditionalAbiTags(VariableTypeAbiTags.size());
+    AdditionalAbiTags.erase(
+        std::set_difference(VariableTypeAbiTags.begin(),
+                            VariableTypeAbiTags.end(), UsedAbiTags.begin(),
+                            UsedAbiTags.end(), AdditionalAbiTags.begin()),
+        AdditionalAbiTags.end());
+
+    // Output name with implicit tags.
+    mangleNameWithAbiTags(VD, &AdditionalAbiTags);
+  } else {
+    mangleNameWithAbiTags(ND, nullptr);
+  }
+}
+
+void CXXNameMangler::mangleNameWithAbiTags(const NamedDecl *ND,
+                                           const AbiTagList *AdditionalAbiTags) {
   //  <name> ::= <nested-name>
   //         ::= <unscoped-name>
   //         ::= <unscoped-template-name> <template-args>
@@ -597,7 +847,7 @@ void CXXNameMangler::mangleName(const NamedDecl *ND) {
     while (!DC->isNamespace() && !DC->isTranslationUnit())
       DC = getEffectiveParentContext(DC);
   else if (GetLocalClassDecl(ND)) {
-    mangleLocalName(ND);
+    mangleLocalName(ND, AdditionalAbiTags);
     return;
   }
 
@@ -607,76 +857,88 @@ void CXXNameMangler::mangleName(const NamedDecl *ND) {
     // Check if we have a template.
     const TemplateArgumentList *TemplateArgs = nullptr;
     if (const TemplateDecl *TD = isTemplate(ND, TemplateArgs)) {
-      mangleUnscopedTemplateName(TD);
+      mangleUnscopedTemplateName(TD, AdditionalAbiTags);
       mangleTemplateArgs(*TemplateArgs);
       return;
     }
 
-    mangleUnscopedName(ND);
+    mangleUnscopedName(ND, AdditionalAbiTags);
     return;
   }
 
   if (isLocalContainerContext(DC)) {
-    mangleLocalName(ND);
+    mangleLocalName(ND, AdditionalAbiTags);
     return;
   }
 
-  mangleNestedName(ND, DC);
+  mangleNestedName(ND, DC, AdditionalAbiTags);
 }
-void CXXNameMangler::mangleName(const TemplateDecl *TD,
-                                const TemplateArgument *TemplateArgs,
-                                unsigned NumTemplateArgs) {
+
+void CXXNameMangler::mangleTemplateName(const TemplateDecl *TD,
+                                        const TemplateArgument *TemplateArgs,
+                                        unsigned NumTemplateArgs) {
   const DeclContext *DC = IgnoreLinkageSpecDecls(getEffectiveDeclContext(TD));
 
   if (DC->isTranslationUnit() || isStdNamespace(DC)) {
-    mangleUnscopedTemplateName(TD);
+    mangleUnscopedTemplateName(TD, nullptr);
     mangleTemplateArgs(TemplateArgs, NumTemplateArgs);
   } else {
     mangleNestedName(TD, TemplateArgs, NumTemplateArgs);
   }
 }
 
-void CXXNameMangler::mangleUnscopedName(const NamedDecl *ND) {
+void CXXNameMangler::mangleUnscopedName(const NamedDecl *ND,
+                                        const AbiTagList *AdditionalAbiTags) {
   //  <unscoped-name> ::= <unqualified-name>
   //                  ::= St <unqualified-name>   # ::std::
 
   if (isStdNamespace(IgnoreLinkageSpecDecls(getEffectiveDeclContext(ND))))
     Out << "St";
 
-  mangleUnqualifiedName(ND);
+  mangleUnqualifiedName(ND, AdditionalAbiTags);
 }
 
-void CXXNameMangler::mangleUnscopedTemplateName(const TemplateDecl *ND) {
+void CXXNameMangler::mangleUnscopedTemplateName(
+    const TemplateDecl *ND, const AbiTagList *AdditionalAbiTags) {
   //     <unscoped-template-name> ::= <unscoped-name>
   //                              ::= <substitution>
   if (mangleSubstitution(ND))
     return;
 
   // <template-template-param> ::= <template-param>
-  if (const auto *TTP = dyn_cast<TemplateTemplateParmDecl>(ND))
+  if (const auto *TTP = dyn_cast<TemplateTemplateParmDecl>(ND)) {
+    assert(!AdditionalAbiTags &&
+           "template template param cannot have abi tags");
     mangleTemplateParameter(TTP->getIndex());
-  else
-    mangleUnscopedName(ND->getTemplatedDecl());
+  } else if (isa<BuiltinTemplateDecl>(ND)) {
+    mangleUnscopedName(ND, AdditionalAbiTags);
+  } else {
+    mangleUnscopedName(ND->getTemplatedDecl(), AdditionalAbiTags);
+  }
 
   addSubstitution(ND);
 }
 
-void CXXNameMangler::mangleUnscopedTemplateName(TemplateName Template) {
+void CXXNameMangler::mangleUnscopedTemplateName(
+    TemplateName Template, const AbiTagList *AdditionalAbiTags) {
   //     <unscoped-template-name> ::= <unscoped-name>
   //                              ::= <substitution>
   if (TemplateDecl *TD = Template.getAsTemplateDecl())
-    return mangleUnscopedTemplateName(TD);
+    return mangleUnscopedTemplateName(TD, AdditionalAbiTags);
   
   if (mangleSubstitution(Template))
     return;
 
+  assert(!AdditionalAbiTags &&
+         "dependent template name cannot have abi tags");
+
   DependentTemplateName *Dependent = Template.getAsDependentTemplateName();
   assert(Dependent && "Not a dependent template name?");
   if (const IdentifierInfo *Id = Dependent->getIdentifier())
     mangleSourceName(Id);
   else
     mangleOperatorName(Dependent->getOperator(), UnknownArity);
-  
+
   addSubstitution(Template);
 }
 
@@ -834,7 +1096,7 @@ void CXXNameMangler::mangleUnresolvedPrefix(NestedNameSpecifier *qualifier,
                              /*recursive*/ true);
     else
       Out << "sr";
-    mangleSourceName(qualifier->getAsNamespace()->getIdentifier());
+    mangleSourceNameWithAbiTags(qualifier->getAsNamespace());
     break;
   case NestedNameSpecifier::NamespaceAlias:
     if (qualifier->getPrefix())
@@ -842,7 +1104,7 @@ void CXXNameMangler::mangleUnresolvedPrefix(NestedNameSpecifier *qualifier,
                              /*recursive*/ true);
     else
       Out << "sr";
-    mangleSourceName(qualifier->getAsNamespaceAlias()->getIdentifier());
+    mangleSourceNameWithAbiTags(qualifier->getAsNamespaceAlias());
     break;
 
   case NestedNameSpecifier::TypeSpec:
@@ -877,6 +1139,7 @@ void CXXNameMangler::mangleUnresolvedPrefix(NestedNameSpecifier *qualifier,
       Out << "sr";
 
     mangleSourceName(qualifier->getAsIdentifier());
+    // An Identifier has no type information, so we can't emit abi tags for it.
     break;
   }
 
@@ -922,7 +1185,8 @@ void CXXNameMangler::mangleUnresolvedName(NestedNameSpecifier *qualifier,
 
 void CXXNameMangler::mangleUnqualifiedName(const NamedDecl *ND,
                                            DeclarationName Name,
-                                           unsigned KnownArity) {
+                                           unsigned KnownArity,
+                                           const AbiTagList *AdditionalAbiTags) {
   unsigned Arity = KnownArity;
   //  <unqualified-name> ::= <operator-name>
   //                     ::= <ctor-dtor-name>
@@ -941,6 +1205,7 @@ void CXXNameMangler::mangleUnqualifiedName(const NamedDecl *ND,
         Out << 'L';
 
       mangleSourceName(II);
+      writeAbiTags(ND, AdditionalAbiTags);
       break;
     }
 
@@ -980,6 +1245,7 @@ void CXXNameMangler::mangleUnqualifiedName(const NamedDecl *ND,
       assert(FD->getIdentifier() && "Data member name isn't an identifier!");
 
       mangleSourceName(FD->getIdentifier());
+      // Not emitting abi tags: internal name anyway.
       break;
     }
 
@@ -1000,6 +1266,10 @@ void CXXNameMangler::mangleUnqualifiedName(const NamedDecl *ND,
       assert(D->getDeclName().getAsIdentifierInfo() &&
              "Typedef was not named!");
       mangleSourceName(D->getDeclName().getAsIdentifierInfo());
+      assert(!AdditionalAbiTags && "Type cannot have additional abi tags");
+      // Explicit abi tags are still possible; take from underlying type, not
+      // from typedef.
+      writeAbiTags(TD, nullptr);
       break;
     }
 
@@ -1009,6 +1279,8 @@ void CXXNameMangler::mangleUnqualifiedName(const NamedDecl *ND,
     // <lambda-sig> ::= <parameter-type>+   # Parameter types or 'v' for 'void'.
     if (const CXXRecordDecl *Record = dyn_cast<CXXRecordDecl>(TD)) {
       if (Record->isLambda() && Record->getLambdaManglingNumber()) {
+        assert(!AdditionalAbiTags &&
+               "Lambda type cannot have additional abi tags");
         mangleLambda(Record);
         break;
       }
@@ -1020,11 +1292,13 @@ void CXXNameMangler::mangleUnqualifiedName(const NamedDecl *ND,
       if (UnnamedMangle > 1)
         Out << UnnamedMangle - 2;
       Out << '_';
+      writeAbiTags(TD, AdditionalAbiTags);
       break;
     }
 
-    // Get a unique id for the anonymous struct.
-    unsigned AnonStructId = Context.getAnonymousStructId(TD);
+    // Get a unique id for the anonymous struct. If it is not a real output
+    // ID doesn't matter so use fake one.
+    unsigned AnonStructId = NullOut ? 0 : Context.getAnonymousStructId(TD);
 
     // Mangle it as a source name in the form
     // [n] $_<id>
@@ -1043,16 +1317,33 @@ void CXXNameMangler::mangleUnqualifiedName(const NamedDecl *ND,
   case DeclarationName::ObjCMultiArgSelector:
     llvm_unreachable("Can't mangle Objective-C selector names here!");
 
-  case DeclarationName::CXXConstructorName:
+  case DeclarationName::CXXConstructorName: {
+    const CXXRecordDecl *InheritedFrom = nullptr;
+    const TemplateArgumentList *InheritedTemplateArgs = nullptr;
+    if (auto Inherited =
+            cast<CXXConstructorDecl>(ND)->getInheritedConstructor()) {
+      InheritedFrom = Inherited.getConstructor()->getParent();
+      InheritedTemplateArgs =
+          Inherited.getConstructor()->getTemplateSpecializationArgs();
+    }
+
     if (ND == Structor)
       // If the named decl is the C++ constructor we're mangling, use the type
       // we were given.
-      mangleCXXCtorType(static_cast<CXXCtorType>(StructorType));
+      mangleCXXCtorType(static_cast<CXXCtorType>(StructorType), InheritedFrom);
     else
       // Otherwise, use the complete constructor name. This is relevant if a
       // class with a constructor is declared within a constructor.
-      mangleCXXCtorType(Ctor_Complete);
+      mangleCXXCtorType(Ctor_Complete, InheritedFrom);
+
+    // FIXME: The template arguments are part of the enclosing prefix or
+    // nested-name, but it's more convenient to mangle them here.
+    if (InheritedTemplateArgs)
+      mangleTemplateArgs(*InheritedTemplateArgs);
+
+    writeAbiTags(ND, AdditionalAbiTags);
     break;
+  }
 
   case DeclarationName::CXXDestructorName:
     if (ND == Structor)
@@ -1063,6 +1354,7 @@ void CXXNameMangler::mangleUnqualifiedName(const NamedDecl *ND,
       // Otherwise, use the complete destructor name. This is relevant if a
       // class with a destructor is declared within a destructor.
       mangleCXXDtorType(Dtor_Complete);
+    writeAbiTags(ND, AdditionalAbiTags);
     break;
 
   case DeclarationName::CXXOperatorName:
@@ -1078,6 +1370,7 @@ void CXXNameMangler::mangleUnqualifiedName(const NamedDecl *ND,
   case DeclarationName::CXXConversionFunctionName:
   case DeclarationName::CXXLiteralOperatorName:
     mangleOperatorName(Name, Arity);
+    writeAbiTags(ND, AdditionalAbiTags);
     break;
 
   case DeclarationName::CXXUsingDirective:
@@ -1094,6 +1387,7 @@ void CXXNameMangler::mangleSourceName(const IdentifierInfo *II) {
 
 void CXXNameMangler::mangleNestedName(const NamedDecl *ND,
                                       const DeclContext *DC,
+                                      const AbiTagList *AdditionalAbiTags,
                                       bool NoFunction) {
   // <nested-name> 
   //   ::= N [<CV-qualifiers>] [<ref-qualifier>] <prefix> <unqualified-name> E
@@ -1119,7 +1413,7 @@ void CXXNameMangler::mangleNestedName(const NamedDecl *ND,
   }
   else {
     manglePrefix(DC, NoFunction);
-    mangleUnqualifiedName(ND);
+    mangleUnqualifiedName(ND, AdditionalAbiTags);
   }
 
   Out << 'E';
@@ -1137,7 +1431,8 @@ void CXXNameMangler::mangleNestedName(const TemplateDecl *TD,
   Out << 'E';
 }
 
-void CXXNameMangler::mangleLocalName(const Decl *D) {
+void CXXNameMangler::mangleLocalName(const Decl *D,
+                                     const AbiTagList *AdditionalAbiTags) {
   // <local-name> := Z <function encoding> E <entity name> [<discriminator>]
   //              := Z <function encoding> E s [<discriminator>]
   // <local-name> := Z <function encoding> E d [ <parameter number> ] 
@@ -1149,15 +1444,26 @@ void CXXNameMangler::mangleLocalName(const Decl *D) {
 
   Out << 'Z';
 
-  if (const ObjCMethodDecl *MD = dyn_cast<ObjCMethodDecl>(DC))
-    mangleObjCMethodName(MD);
-  else if (const BlockDecl *BD = dyn_cast<BlockDecl>(DC))
-    mangleBlockForPrefix(BD);
-  else
-    mangleFunctionEncoding(cast<FunctionDecl>(DC));
+  {
+    AbiTagState LocalAbiTags(AbiTags);
+
+    if (const ObjCMethodDecl *MD = dyn_cast<ObjCMethodDecl>(DC))
+      mangleObjCMethodName(MD);
+    else if (const BlockDecl *BD = dyn_cast<BlockDecl>(DC))
+      mangleBlockForPrefix(BD);
+    else
+      mangleFunctionEncoding(cast<FunctionDecl>(DC));
+
+    // Implicit ABI tags (from namespace) are not available in the following
+    // entity; reset to actually emitted tags, which are available.
+    LocalAbiTags.setUsedAbiTags(LocalAbiTags.getEmittedAbiTags());
+  }
 
   Out << 'E';
 
+  // GCC 5.3.0 doesn't emit derived ABI tags for local names but that seems to
+  // be a bug that is fixed in trunk.
+
   if (RD) {
     // The parameter number is omitted for the last parameter, 0 for the 
     // second-to-last parameter, 1 for the third-to-last parameter, etc. The 
@@ -1182,13 +1488,15 @@ void CXXNameMangler::mangleLocalName(const Decl *D) {
     // Mangle the name relative to the closest enclosing function.
     // equality ok because RD derived from ND above
     if (D == RD)  {
-      mangleUnqualifiedName(RD);
+      mangleUnqualifiedName(RD, AdditionalAbiTags);
     } else if (const BlockDecl *BD = dyn_cast<BlockDecl>(D)) {
       manglePrefix(getEffectiveDeclContext(BD), true /*NoFunction*/);
+      assert(!AdditionalAbiTags && "Block cannot have additional abi tags");
       mangleUnqualifiedBlock(BD);
     } else {
       const NamedDecl *ND = cast<NamedDecl>(D);
-      mangleNestedName(ND, getEffectiveDeclContext(ND), true /*NoFunction*/);
+      mangleNestedName(ND, getEffectiveDeclContext(ND), AdditionalAbiTags,
+                       true /*NoFunction*/);
     }
   } else if (const BlockDecl *BD = dyn_cast<BlockDecl>(D)) {
     // Mangle a block in a default parameter; see above explanation for
@@ -1205,9 +1513,10 @@ void CXXNameMangler::mangleLocalName(const Decl *D) {
       }
     }
 
+    assert(!AdditionalAbiTags && "Block cannot have additional abi tags");
     mangleUnqualifiedBlock(BD);
   } else {
-    mangleUnqualifiedName(cast<NamedDecl>(D));
+    mangleUnqualifiedName(cast<NamedDecl>(D), AdditionalAbiTags);
   }
 
   if (const NamedDecl *ND = dyn_cast<NamedDecl>(RD ? RD : D)) {
@@ -1223,12 +1532,12 @@ void CXXNameMangler::mangleLocalName(const Decl *D) {
 
 void CXXNameMangler::mangleBlockForPrefix(const BlockDecl *Block) {
   if (GetLocalClassDecl(Block)) {
-    mangleLocalName(Block);
+    mangleLocalName(Block, /* AdditionalAbiTags */ nullptr);
     return;
   }
   const DeclContext *DC = getEffectiveDeclContext(Block);
   if (isLocalContainerContext(DC)) {
-    mangleLocalName(Block);
+    mangleLocalName(Block, /* AdditionalAbiTags */ nullptr);
     return;
   }
   manglePrefix(getEffectiveDeclContext(Block));
@@ -1239,10 +1548,10 @@ void CXXNameMangler::mangleUnqualifiedBlock(const BlockDecl *Block) {
   if (Decl *Context = Block->getBlockManglingContextDecl()) {
     if ((isa<VarDecl>(Context) || isa<FieldDecl>(Context)) &&
         Context->getDeclContext()->isRecord()) {
-      if (const IdentifierInfo *Name
-            = cast<NamedDecl>(Context)->getIdentifier()) {
-        mangleSourceName(Name);
-        Out << 'M';            
+      const auto *ND = cast<NamedDecl>(Context);
+      if (ND->getIdentifier()) {
+        mangleSourceNameWithAbiTags(ND);
+        Out << 'M';
       }
     }
   }
@@ -1275,7 +1584,7 @@ void CXXNameMangler::mangleLambda(const CXXRecordDecl *Lambda) {
       if (const IdentifierInfo *Name
             = cast<NamedDecl>(Context)->getIdentifier()) {
         mangleSourceName(Name);
-        Out << 'M';            
+        Out << 'M';
       }
     }
   }
@@ -1362,7 +1671,7 @@ void CXXNameMangler::manglePrefix(const DeclContext *DC, bool NoFunction) {
     mangleTemplateArgs(*TemplateArgs);
   } else {
     manglePrefix(getEffectiveDeclContext(ND), NoFunction);
-    mangleUnqualifiedName(ND);
+    mangleUnqualifiedName(ND, nullptr);
   }
 
   addSubstitution(ND);
@@ -1377,19 +1686,19 @@ void CXXNameMangler::mangleTemplatePrefix(TemplateName Template) {
 
   if (QualifiedTemplateName *Qualified = Template.getAsQualifiedTemplateName())
     manglePrefix(Qualified->getQualifier());
-  
+
   if (OverloadedTemplateStorage *Overloaded
                                       = Template.getAsOverloadedTemplate()) {
     mangleUnqualifiedName(nullptr, (*Overloaded->begin())->getDeclName(),
-                          UnknownArity);
+                          UnknownArity, nullptr);
     return;
   }
-   
+
   DependentTemplateName *Dependent = Template.getAsDependentTemplateName();
   assert(Dependent && "Unknown template name kind?");
   if (NestedNameSpecifier *Qualifier = Dependent->getQualifier())
     manglePrefix(Qualifier);
-  mangleUnscopedTemplateName(Template);
+  mangleUnscopedTemplateName(Template, /* AdditionalAbiTags */ nullptr);
 }
 
 void CXXNameMangler::mangleTemplatePrefix(const TemplateDecl *ND,
@@ -1408,7 +1717,10 @@ void CXXNameMangler::mangleTemplatePrefix(const TemplateDecl *ND,
     mangleTemplateParameter(TTP->getIndex());
   } else {
     manglePrefix(getEffectiveDeclContext(ND), NoFunction);
-    mangleUnqualifiedName(ND->getTemplatedDecl());
+    if (isa<BuiltinTemplateDecl>(ND))
+      mangleUnqualifiedName(ND, nullptr);
+    else
+      mangleUnqualifiedName(ND->getTemplatedDecl(), nullptr);
   }
 
   addSubstitution(ND);
@@ -1543,17 +1855,17 @@ bool CXXNameMangler::mangleUnresolvedTypeOrSimpleId(QualType Ty,
     return true;
 
   case Type::Typedef:
-    mangleSourceName(cast<TypedefType>(Ty)->getDecl()->getIdentifier());
+    mangleSourceNameWithAbiTags(cast<TypedefType>(Ty)->getDecl());
     break;
 
   case Type::UnresolvedUsing:
-    mangleSourceName(
-        cast<UnresolvedUsingType>(Ty)->getDecl()->getIdentifier());
+    mangleSourceNameWithAbiTags(
+        cast<UnresolvedUsingType>(Ty)->getDecl());
     break;
 
   case Type::Enum:
   case Type::Record:
-    mangleSourceName(cast<TagType>(Ty)->getDecl()->getIdentifier());
+    mangleSourceNameWithAbiTags(cast<TagType>(Ty)->getDecl());
     break;
 
   case Type::TemplateSpecialization: {
@@ -1571,7 +1883,7 @@ bool CXXNameMangler::mangleUnresolvedTypeOrSimpleId(QualType Ty,
       if (isa<TemplateTemplateParmDecl>(TD))
         goto unresolvedType;
 
-      mangleSourceName(TD->getIdentifier());
+      mangleSourceNameWithAbiTags(TD);
       break;
     }
 
@@ -1601,8 +1913,8 @@ bool CXXNameMangler::mangleUnresolvedTypeOrSimpleId(QualType Ty,
   }
 
   case Type::InjectedClassName:
-    mangleSourceName(
-        cast<InjectedClassNameType>(Ty)->getDecl()->getIdentifier());
+    mangleSourceNameWithAbiTags(
+        cast<InjectedClassNameType>(Ty)->getDecl());
     break;
 
   case Type::DependentName:
@@ -1653,8 +1965,6 @@ void CXXNameMangler::mangleOperatorName(DeclarationName Name, unsigned Arity) {
   }
 }
 
-
-
 void
 CXXNameMangler::mangleOperatorName(OverloadedOperatorKind OO, unsigned Arity) {
   switch (OO) {
@@ -1767,14 +2077,9 @@ CXXNameMangler::mangleOperatorName(OverloadedOperatorKind OO, unsigned Arity) {
 }
 
 void CXXNameMangler::mangleQualifiers(Qualifiers Quals) {
-  // <CV-qualifiers> ::= [r] [V] [K]    # restrict (C99), volatile, const
-  if (Quals.hasRestrict())
-    Out << 'r';
-  if (Quals.hasVolatile())
-    Out << 'V';
-  if (Quals.hasConst())
-    Out << 'K';
+  // Vendor qualifiers come first.
 
+  // Address space qualifiers start with an ordinary letter.
   if (Quals.hasAddressSpace()) {
     // Address space extension:
     //
@@ -1788,7 +2093,7 @@ void CXXNameMangler::mangleQualifiers(Qualifiers Quals) {
     if (Context.getASTContext().addressSpaceMapManglingFor(AS)) {
       //  <target-addrspace> ::= "AS" <address-space-number>
       unsigned TargetAS = Context.getASTContext().getTargetAddressSpace(AS);
-      ASString = "AS" + llvm::utostr_32(TargetAS);
+      ASString = "AS" + llvm::utostr(TargetAS);
     } else {
       switch (AS) {
       default: llvm_unreachable("Not a language specific address space");
@@ -1802,10 +2107,10 @@ void CXXNameMangler::mangleQualifiers(Qualifiers Quals) {
       case LangAS::cuda_shared:     ASString = "CUshared";   break;
       }
     }
-    Out << 'U' << ASString.size() << ASString;
+    mangleVendorQualifier(ASString);
   }
-  
-  StringRef LifetimeName;
+
+  // The ARC ownership qualifiers start with underscores.
   switch (Quals.getObjCLifetime()) {
   // Objective-C ARC Extension:
   //
@@ -1816,15 +2121,15 @@ void CXXNameMangler::mangleQualifiers(Qualifiers Quals) {
     break;
     
   case Qualifiers::OCL_Weak:
-    LifetimeName = "__weak";
+    mangleVendorQualifier("__weak");
     break;
     
   case Qualifiers::OCL_Strong:
-    LifetimeName = "__strong";
+    mangleVendorQualifier("__strong");
     break;
     
   case Qualifiers::OCL_Autoreleasing:
-    LifetimeName = "__autoreleasing";
+    mangleVendorQualifier("__autoreleasing");
     break;
     
   case Qualifiers::OCL_ExplicitNone:
@@ -1837,8 +2142,18 @@ void CXXNameMangler::mangleQualifiers(Qualifiers Quals) {
     // in any type signatures that need to be mangled.
     break;
   }
-  if (!LifetimeName.empty())
-    Out << 'U' << LifetimeName.size() << LifetimeName;
+
+  // <CV-qualifiers> ::= [r] [V] [K]    # restrict (C99), volatile, const
+  if (Quals.hasRestrict())
+    Out << 'r';
+  if (Quals.hasVolatile())
+    Out << 'V';
+  if (Quals.hasConst())
+    Out << 'K';
+}
+
+void CXXNameMangler::mangleVendorQualifier(StringRef name) {
+  Out << 'U' << name.size() << name;
 }
 
 void CXXNameMangler::mangleRefQualifier(RefQualifierKind RefQualifier) {
@@ -1981,7 +2296,7 @@ void CXXNameMangler::mangleType(const BuiltinType *T) {
   //                 ::= f  # float
   //                 ::= d  # double
   //                 ::= e  # long double, __float80
-  // UNSUPPORTED:    ::= g  # __float128
+  //                 ::= g  # __float128
   // UNSUPPORTED:    ::= Dd # IEEE 754r decimal floating point (64 bits)
   // UNSUPPORTED:    ::= De # IEEE 754r decimal floating point (128 bits)
   // UNSUPPORTED:    ::= Df # IEEE 754r decimal floating point (32 bits)
@@ -1990,6 +2305,7 @@ void CXXNameMangler::mangleType(const BuiltinType *T) {
   //                 ::= Ds # char16_t
   //                 ::= Dn # std::nullptr_t (i.e., decltype(nullptr))
   //                 ::= u <source-name>    # vendor extended type
+  std::string type_name;
   switch (T->getKind()) {
   case BuiltinType::Void:
     Out << 'v';
@@ -2061,6 +2377,12 @@ void CXXNameMangler::mangleType(const BuiltinType *T) {
                 ? 'g'
                 : 'e');
     break;
+  case BuiltinType::Float128:
+    if (getASTContext().getTargetInfo().useFloat128ManglingForLongDouble())
+      Out << "U10__float128"; // Match the GCC mangling
+    else
+      Out << 'g';
+    break;
   case BuiltinType::NullPtr:
     Out << "Dn";
     break;
@@ -2070,7 +2392,9 @@ void CXXNameMangler::mangleType(const BuiltinType *T) {
   case BuiltinType::Id:
 #include "clang/AST/BuiltinTypes.def"
   case BuiltinType::Dependent:
-    llvm_unreachable("mangling a placeholder type");
+    if (!NullOut)
+      llvm_unreachable("mangling a placeholder type");
+    break;
   case BuiltinType::ObjCId:
     Out << "11objc_object";
     break;
@@ -2080,42 +2404,12 @@ void CXXNameMangler::mangleType(const BuiltinType *T) {
   case BuiltinType::ObjCSel:
     Out << "13objc_selector";
     break;
-  case BuiltinType::OCLImage1d:
-    Out << "11ocl_image1d";
-    break;
-  case BuiltinType::OCLImage1dArray:
-    Out << "16ocl_image1darray";
-    break;
-  case BuiltinType::OCLImage1dBuffer:
-    Out << "17ocl_image1dbuffer";
-    break;
-  case BuiltinType::OCLImage2d:
-    Out << "11ocl_image2d";
-    break;
-  case BuiltinType::OCLImage2dArray:
-    Out << "16ocl_image2darray";
-    break;
-  case BuiltinType::OCLImage2dDepth:
-    Out << "16ocl_image2ddepth";
-    break;
-  case BuiltinType::OCLImage2dArrayDepth:
-    Out << "21ocl_image2darraydepth";
-    break;
-  case BuiltinType::OCLImage2dMSAA:
-    Out << "15ocl_image2dmsaa";
-    break;
-  case BuiltinType::OCLImage2dArrayMSAA:
-    Out << "20ocl_image2darraymsaa";
-    break;
-  case BuiltinType::OCLImage2dMSAADepth:
-    Out << "20ocl_image2dmsaadepth";
-    break;
-  case BuiltinType::OCLImage2dArrayMSAADepth:
-    Out << "35ocl_image2darraymsaadepth";
-    break;
-  case BuiltinType::OCLImage3d:
-    Out << "11ocl_image3d";
+#define IMAGE_TYPE(ImgType, Id, SingletonId, Access, Suffix) \
+  case BuiltinType::Id: \
+    type_name = "ocl_" #ImgType "_" #Suffix; \
+    Out << type_name.size() << type_name; \
     break;
+#include "clang/Basic/OpenCLImageTypes.def"
   case BuiltinType::OCLSampler:
     Out << "11ocl_sampler";
     break;
@@ -2137,10 +2431,80 @@ void CXXNameMangler::mangleType(const BuiltinType *T) {
   }
 }
 
+StringRef CXXNameMangler::getCallingConvQualifierName(CallingConv CC) {
+  switch (CC) {
+  case CC_C:
+    return "";
+
+  case CC_X86StdCall:
+  case CC_X86FastCall:
+  case CC_X86ThisCall:
+  case CC_X86VectorCall:
+  case CC_X86Pascal:
+  case CC_X86_64Win64:
+  case CC_X86_64SysV:
+  case CC_AAPCS:
+  case CC_AAPCS_VFP:
+  case CC_IntelOclBicc:
+  case CC_SpirFunction:
+  case CC_OpenCLKernel:
+  case CC_PreserveMost:
+  case CC_PreserveAll:
+    // FIXME: we should be mangling all of the above.
+    return "";
+
+  case CC_Swift:
+    return "swiftcall";
+  }
+  llvm_unreachable("bad calling convention");
+}
+
+void CXXNameMangler::mangleExtFunctionInfo(const FunctionType *T) {
+  // Fast path.
+  if (T->getExtInfo() == FunctionType::ExtInfo())
+    return;
+
+  // Vendor-specific qualifiers are emitted in reverse alphabetical order.
+  // This will get more complicated in the future if we mangle other
+  // things here; but for now, since we mangle ns_returns_retained as
+  // a qualifier on the result type, we can get away with this:
+  StringRef CCQualifier = getCallingConvQualifierName(T->getExtInfo().getCC());
+  if (!CCQualifier.empty())
+    mangleVendorQualifier(CCQualifier);
+
+  // FIXME: regparm
+  // FIXME: noreturn
+}
+
+void
+CXXNameMangler::mangleExtParameterInfo(FunctionProtoType::ExtParameterInfo PI) {
+  // Vendor-specific qualifiers are emitted in reverse alphabetical order.
+
+  // Note that these are *not* substitution candidates.  Demanglers might
+  // have trouble with this if the parameter type is fully substituted.
+
+  switch (PI.getABI()) {
+  case ParameterABI::Ordinary:
+    break;
+
+  // All of these start with "swift", so they come before "ns_consumed".
+  case ParameterABI::SwiftContext:
+  case ParameterABI::SwiftErrorResult:
+  case ParameterABI::SwiftIndirectResult:
+    mangleVendorQualifier(getParameterABISpelling(PI.getABI()));
+    break;
+  }
+
+  if (PI.isConsumed())
+    mangleVendorQualifier("ns_consumed");
+}
+
 // <type>          ::= <function-type>
 // <function-type> ::= [<CV-qualifiers>] F [Y]
 //                      <bare-function-type> [<ref-qualifier>] E
 void CXXNameMangler::mangleType(const FunctionProtoType *T) {
+  mangleExtFunctionInfo(T);
+
   // Mangle CV-qualifiers, if present.  These are 'this' qualifiers,
   // e.g. "const" in "int (A::*)() const".
   mangleQualifiers(Qualifiers::fromCVRMask(T->getTypeQuals()));
@@ -2173,12 +2537,9 @@ void CXXNameMangler::mangleType(const FunctionNoProtoType *T) {
   Out << 'E';
 }
 
-void CXXNameMangler::mangleBareFunctionType(const FunctionType *T,
+void CXXNameMangler::mangleBareFunctionType(const FunctionProtoType *Proto,
                                             bool MangleReturnType,
                                             const FunctionDecl *FD) {
-  // We should never be mangling something without a prototype.
-  const FunctionProtoType *Proto = cast<FunctionProtoType>(T);
-
   // Record that we're in a function type.  See mangleFunctionParam
   // for details on what we're trying to achieve here.
   FunctionTypeDepthState saved = FunctionTypeDepth.push();
@@ -2186,7 +2547,20 @@ void CXXNameMangler::mangleBareFunctionType(const FunctionType *T,
   // <bare-function-type> ::= <signature type>+
   if (MangleReturnType) {
     FunctionTypeDepth.enterResultType();
-    mangleType(Proto->getReturnType());
+
+    // Mangle ns_returns_retained as an order-sensitive qualifier here.
+    if (Proto->getExtInfo().getProducesResult() && FD == nullptr)
+      mangleVendorQualifier("ns_returns_retained");
+
+    // Mangle the return type without any direct ARC ownership qualifiers.
+    QualType ReturnTy = Proto->getReturnType();
+    if (ReturnTy.getObjCLifetime()) {
+      auto SplitReturnTy = ReturnTy.split();
+      SplitReturnTy.Quals.removeObjCLifetime();
+      ReturnTy = getASTContext().getQualifiedType(SplitReturnTy);
+    }
+    mangleType(ReturnTy);
+
     FunctionTypeDepth.leaveResultType();
   }
 
@@ -2200,7 +2574,13 @@ void CXXNameMangler::mangleBareFunctionType(const FunctionType *T,
 
   assert(!FD || FD->getNumParams() == Proto->getNumParams());
   for (unsigned I = 0, E = Proto->getNumParams(); I != E; ++I) {
-    const auto &ParamTy = Proto->getParamType(I);
+    // Mangle extended parameter info as order-sensitive qualifiers here.
+    if (Proto->hasExtParameterInfos() && FD == nullptr) {
+      mangleExtParameterInfo(Proto->getExtParameterInfo(I));
+    }
+
+    // Mangle the type.
+    QualType ParamTy = Proto->getParamType(I);
     mangleType(Context.getASTContext().getSignatureParameterType(ParamTy));
 
     if (FD) {
@@ -2546,7 +2926,7 @@ void CXXNameMangler::mangleType(const InjectedClassNameType *T) {
 
 void CXXNameMangler::mangleType(const TemplateSpecializationType *T) {
   if (TemplateDecl *TD = T->getTemplateName().getAsTemplateDecl()) {
-    mangleName(TD, T->getArgs(), T->getNumArgs());
+    mangleTemplateName(TD, T->getArgs(), T->getNumArgs());
   } else {
     if (mangleSubstitution(QualType(T, 0)))
       return;
@@ -2662,7 +3042,7 @@ void CXXNameMangler::mangleType(const UnaryTransformType *T) {
     }
   }
 
-  mangleType(T->getUnderlyingType());
+  mangleType(T->getBaseType());
 }
 
 void CXXNameMangler::mangleType(const AutoType *T) {
@@ -2836,6 +3216,7 @@ recurse:
   case Expr::MSPropertySubscriptExprClass:
   case Expr::TypoExprClass:  // This should no longer exist in the AST by now.
   case Expr::OMPArraySectionExprClass:
+  case Expr::CXXInheritedCtorInitExprClass:
     llvm_unreachable("unexpected statement kind");
 
   // FIXME: invent manglings for all these.
@@ -2858,6 +3239,7 @@ recurse:
   case Expr::ObjCDictionaryLiteralClass:
   case Expr::ObjCSubscriptRefExprClass:
   case Expr::ObjCIndirectCopyRestoreExprClass:
+  case Expr::ObjCAvailabilityCheckExprClass:
   case Expr::OffsetOfExprClass:
   case Expr::PredefinedExprClass:
   case Expr::ShuffleVectorExprClass:
@@ -2872,12 +3254,14 @@ recurse:
   case Expr::PseudoObjectExprClass:
   case Expr::AtomicExprClass:
   {
-    // As bad as this diagnostic is, it's better than crashing.
-    DiagnosticsEngine &Diags = Context.getDiags();
-    unsigned DiagID = Diags.getCustomDiagID(DiagnosticsEngine::Error,
-                                     "cannot yet mangle expression type %0");
-    Diags.Report(E->getExprLoc(), DiagID)
-      << E->getStmtClassName() << E->getSourceRange();
+    if (!NullOut) {
+      // As bad as this diagnostic is, it's better than crashing.
+      DiagnosticsEngine &Diags = Context.getDiags();
+      unsigned DiagID = Diags.getCustomDiagID(DiagnosticsEngine::Error,
+                                       "cannot yet mangle expression type %0");
+      Diags.Report(E->getExprLoc(), DiagID)
+        << E->getStmtClassName() << E->getSourceRange();
+    }
     break;
   }
 
@@ -3615,25 +3999,33 @@ void CXXNameMangler::mangleFunctionParam(const ParmVarDecl *parm) {
   Out << '_';
 }
 
-void CXXNameMangler::mangleCXXCtorType(CXXCtorType T) {
+void CXXNameMangler::mangleCXXCtorType(CXXCtorType T,
+                                       const CXXRecordDecl *InheritedFrom) {
   // <ctor-dtor-name> ::= C1  # complete object constructor
   //                  ::= C2  # base object constructor
+  //                  ::= CI1 <type> # complete inheriting constructor
+  //                  ::= CI2 <type> # base inheriting constructor
   //
   // In addition, C5 is a comdat name with C1 and C2 in it.
+  Out << 'C';
+  if (InheritedFrom)
+    Out << 'I';
   switch (T) {
   case Ctor_Complete:
-    Out << "C1";
+    Out << '1';
     break;
   case Ctor_Base:
-    Out << "C2";
+    Out << '2';
     break;
   case Ctor_Comdat:
-    Out << "C5";
+    Out << '5';
     break;
   case Ctor_DefaultClosure:
   case Ctor_CopyingClosure:
     llvm_unreachable("closure constructors don't exist for the Itanium ABI!");
   }
+  if (InheritedFrom)
+    mangleName(InheritedFrom);
 }
 
 void CXXNameMangler::mangleCXXDtorType(CXXDtorType T) {
@@ -3745,7 +4137,7 @@ void CXXNameMangler::mangleTemplateArg(TemplateArgument A) {
 
     Out << 'L';
     // References to external entities use the mangled name; if the name would
-    // not normally be manged then mangle it as unqualified.
+    // not normally be mangled then mangle it as unqualified.
     mangle(D);
     Out << 'E';
 
@@ -3801,12 +4193,6 @@ void CXXNameMangler::mangleSeqID(unsigned SeqID) {
   Out << '_';
 }
 
-void CXXNameMangler::mangleExistingSubstitution(QualType type) {
-  bool result = mangleSubstitution(type);
-  assert(result && "no existing substitution for type");
-  (void) result;
-}
-
 void CXXNameMangler::mangleExistingSubstitution(TemplateName tname) {
   bool result = mangleSubstitution(tname);
   assert(result && "no existing substitution for template name");
@@ -4020,6 +4406,48 @@ void CXXNameMangler::addSubstitution(uintptr_t Ptr) {
   Substitutions[Ptr] = SeqID++;
 }
 
+CXXNameMangler::AbiTagList
+CXXNameMangler::makeFunctionReturnTypeTags(const FunctionDecl *FD) {
+  // When derived abi tags are disabled there is no need to make any list.
+  if (DisableDerivedAbiTags)
+    return AbiTagList();
+
+  llvm::raw_null_ostream NullOutStream;
+  CXXNameMangler TrackReturnTypeTags(*this, NullOutStream);
+  TrackReturnTypeTags.disableDerivedAbiTags();
+
+  const FunctionProtoType *Proto =
+      cast<FunctionProtoType>(FD->getType()->getAs<FunctionType>());
+  TrackReturnTypeTags.FunctionTypeDepth.enterResultType();
+  TrackReturnTypeTags.mangleType(Proto->getReturnType());
+  TrackReturnTypeTags.FunctionTypeDepth.leaveResultType();
+
+  return TrackReturnTypeTags.AbiTagsRoot.getSortedUniqueUsedAbiTags();
+}
+
+CXXNameMangler::AbiTagList
+CXXNameMangler::makeVariableTypeTags(const VarDecl *VD) {
+  // When derived abi tags are disabled there is no need to make any list.
+  if (DisableDerivedAbiTags)
+    return AbiTagList();
+
+  llvm::raw_null_ostream NullOutStream;
+  CXXNameMangler TrackVariableType(*this, NullOutStream);
+  TrackVariableType.disableDerivedAbiTags();
+
+  TrackVariableType.mangleType(VD->getType());
+
+  return TrackVariableType.AbiTagsRoot.getSortedUniqueUsedAbiTags();
+}
+
+bool CXXNameMangler::shouldHaveAbiTags(ItaniumMangleContextImpl &C,
+                                       const VarDecl *VD) {
+  llvm::raw_null_ostream NullOutStream;
+  CXXNameMangler TrackAbiTags(C, NullOutStream, nullptr, true);
+  TrackAbiTags.mangle(VD);
+  return TrackAbiTags.AbiTagsRoot.getUsedAbiTags().size();
+}
+
 //
 
 /// Mangles the name of the declaration D and emits that name to the given
@@ -4121,6 +4549,8 @@ void ItaniumMangleContextImpl::mangleStaticGuardVariable(const VarDecl *D,
   //  <special-name> ::= GV <object name>       # Guard variable for one-time
   //                                            # initialization
   CXXNameMangler Mangler(*this, Out);
+  // GCC 5.3.0 doesn't emit derived ABI tags for local names but that seems to
+  // be a bug that is fixed in trunk.
   Mangler.getStream() << "_ZGV";
   Mangler.mangleName(D);
 }
diff --git a/contrib/llvm/tools/clang/lib/AST/Mangle.cpp b/contrib/llvm/tools/clang/lib/AST/Mangle.cpp
index 014338f0490f..ee241732e8ad 100644
--- a/contrib/llvm/tools/clang/lib/AST/Mangle.cpp
+++ b/contrib/llvm/tools/clang/lib/AST/Mangle.cpp
@@ -126,9 +126,9 @@ void MangleContext::mangleName(const NamedDecl *D, raw_ostream &Out) {
     // llvm mangler on ELF is a nop, so we can just avoid adding the \01
     // marker.  We also avoid adding the marker if this is an alias for an
     // LLVM intrinsic.
-    StringRef UserLabelPrefix =
-        getASTContext().getTargetInfo().getUserLabelPrefix();
-    if (!UserLabelPrefix.empty() && !ALA->getLabel().startswith("llvm."))
+    char GlobalPrefix =
+        getASTContext().getTargetInfo().getDataLayout().getGlobalPrefix();
+    if (GlobalPrefix && !ALA->getLabel().startswith("llvm."))
       Out << '\01'; // LLVM IR Marker for __asm("foo")
 
     Out << ALA->getLabel();
@@ -177,9 +177,9 @@ void MangleContext::mangleName(const NamedDecl *D, raw_ostream &Out) {
       ++ArgWords;
   for (const auto &AT : Proto->param_types())
     // Size should be aligned to pointer size.
-    ArgWords += llvm::RoundUpToAlignment(ASTContext.getTypeSize(AT),
-                                         TI.getPointerWidth(0)) /
-                TI.getPointerWidth(0);
+    ArgWords +=
+        llvm::alignTo(ASTContext.getTypeSize(AT), TI.getPointerWidth(0)) /
+        TI.getPointerWidth(0);
   Out << ((TI.getPointerWidth(0) / 8) * ArgWords);
 }
 
@@ -254,11 +254,8 @@ void MangleContext::mangleBlock(const DeclContext *DC, const BlockDecl *BD,
   mangleFunctionBlock(*this, Buffer, BD, Out);
 }
 
-void MangleContext::mangleObjCMethodName(const ObjCMethodDecl *MD,
-                                         raw_ostream &Out) {
-  SmallString<64> Name;
-  llvm::raw_svector_ostream OS(Name);
-  
+void MangleContext::mangleObjCMethodNameWithoutSize(const ObjCMethodDecl *MD,
+                                                    raw_ostream &OS) {
   const ObjCContainerDecl *CD =
   dyn_cast<ObjCContainerDecl>(MD->getDeclContext());
   assert (CD && "Missing container decl in GetNameForMethod");
@@ -268,6 +265,13 @@ void MangleContext::mangleObjCMethodName(const ObjCMethodDecl *MD,
   OS << ' ';
   MD->getSelector().print(OS);
   OS << ']';
-  
+}
+
+void MangleContext::mangleObjCMethodName(const ObjCMethodDecl *MD,
+                                         raw_ostream &Out) {
+  SmallString<64> Name;
+  llvm::raw_svector_ostream OS(Name);
+
+  mangleObjCMethodNameWithoutSize(MD, OS);
   Out << OS.str().size() << OS.str();
 }
diff --git a/contrib/llvm/tools/clang/lib/AST/MicrosoftCXXABI.cpp b/contrib/llvm/tools/clang/lib/AST/MicrosoftCXXABI.cpp
index 6ba31ccf1e37..3ae04538d626 100644
--- a/contrib/llvm/tools/clang/lib/AST/MicrosoftCXXABI.cpp
+++ b/contrib/llvm/tools/clang/lib/AST/MicrosoftCXXABI.cpp
@@ -262,7 +262,7 @@ std::pair<uint64_t, unsigned> MicrosoftCXXABI::getMemberPointerWidthAndAlign(
     Align = Target.getIntAlign();
 
   if (Target.getTriple().isArch64Bit())
-    Width = llvm::RoundUpToAlignment(Width, Align);
+    Width = llvm::alignTo(Width, Align);
   return std::make_pair(Width, Align);
 }
 
diff --git a/contrib/llvm/tools/clang/lib/AST/MicrosoftMangle.cpp b/contrib/llvm/tools/clang/lib/AST/MicrosoftMangle.cpp
index 4a45f9e4051f..351997e02a9d 100644
--- a/contrib/llvm/tools/clang/lib/AST/MicrosoftMangle.cpp
+++ b/contrib/llvm/tools/clang/lib/AST/MicrosoftMangle.cpp
@@ -19,6 +19,7 @@
 #include "clang/AST/Decl.h"
 #include "clang/AST/DeclCXX.h"
 #include "clang/AST/DeclObjC.h"
+#include "clang/AST/DeclOpenMP.h"
 #include "clang/AST/DeclTemplate.h"
 #include "clang/AST/Expr.h"
 #include "clang/AST/ExprCXX.h"
@@ -27,13 +28,44 @@
 #include "clang/Basic/DiagnosticOptions.h"
 #include "clang/Basic/TargetInfo.h"
 #include "llvm/ADT/StringExtras.h"
-#include "llvm/Support/MathExtras.h"
 #include "llvm/Support/JamCRC.h"
+#include "llvm/Support/MD5.h"
+#include "llvm/Support/MathExtras.h"
 
 using namespace clang;
 
 namespace {
 
+struct msvc_hashing_ostream : public llvm::raw_svector_ostream {
+  raw_ostream &OS;
+  llvm::SmallString<64> Buffer;
+
+  msvc_hashing_ostream(raw_ostream &OS)
+      : llvm::raw_svector_ostream(Buffer), OS(OS) {}
+  ~msvc_hashing_ostream() override {
+    StringRef MangledName = str();
+    bool StartsWithEscape = MangledName.startswith("\01");
+    if (StartsWithEscape)
+      MangledName = MangledName.drop_front(1);
+    if (MangledName.size() <= 4096) {
+      OS << str();
+      return;
+    }
+
+    llvm::MD5 Hasher;
+    llvm::MD5::MD5Result Hash;
+    Hasher.update(MangledName);
+    Hasher.final(Hash);
+
+    SmallString<32> HexString;
+    llvm::MD5::stringifyResult(Hash, HexString);
+
+    if (StartsWithEscape)
+      OS << '\01';
+    OS << "??@" << HexString << '@';
+  }
+};
+
 /// \brief Retrieve the declaration context that should be used when mangling
 /// the given declaration.
 static const DeclContext *getEffectiveDeclContext(const Decl *D) {
@@ -58,10 +90,11 @@ static const DeclContext *getEffectiveDeclContext(const Decl *D) {
   }
 
   const DeclContext *DC = D->getDeclContext();
-  if (const CapturedDecl *CD = dyn_cast<CapturedDecl>(DC))
-    return getEffectiveDeclContext(CD);
+  if (isa<CapturedDecl>(DC) || isa<OMPDeclareReductionDecl>(DC)) {
+    return getEffectiveDeclContext(cast<Decl>(DC));
+  }
 
-  return DC;
+  return DC->getRedeclContext();
 }
 
 static const DeclContext *getEffectiveParentContext(const DeclContext *DC) {
@@ -120,7 +153,8 @@ public:
                                        const CXXRecordDecl *DstRD,
                                        raw_ostream &Out) override;
   void mangleCXXThrowInfo(QualType T, bool IsConst, bool IsVolatile,
-                          uint32_t NumEntries, raw_ostream &Out) override;
+                          bool IsUnaligned, uint32_t NumEntries,
+                          raw_ostream &Out) override;
   void mangleCXXCatchableTypeArray(QualType T, uint32_t NumEntries,
                                    raw_ostream &Out) override;
   void mangleCXXCatchableType(QualType T, const CXXConstructorDecl *CD,
@@ -160,14 +194,17 @@ public:
                              raw_ostream &Out) override;
   void mangleStringLiteral(const StringLiteral *SL, raw_ostream &Out) override;
   bool getNextDiscriminator(const NamedDecl *ND, unsigned &disc) {
-    // Lambda closure types are already numbered.
-    if (isLambda(ND))
-      return false;
-
     const DeclContext *DC = getEffectiveDeclContext(ND);
     if (!DC->isFunctionOrMethod())
       return false;
 
+    // Lambda closure types are already numbered, give out a phony number so
+    // that they demangle nicely.
+    if (isLambda(ND)) {
+      disc = 1;
+      return true;
+    }
+
     // Use the canonical number for externally visible decls.
     if (ND->isExternallyVisible()) {
       disc = getASTContext().getManglingNumber(ND);
@@ -201,7 +238,7 @@ public:
   }
 
 private:
-  void mangleInitFiniStub(const VarDecl *D, raw_ostream &Out, char CharCode);
+  void mangleInitFiniStub(const VarDecl *D, char CharCode, raw_ostream &Out);
 };
 
 /// MicrosoftCXXNameMangler - Manage the mangling of a single name for the
@@ -1150,7 +1187,7 @@ void MicrosoftCXXNameMangler::mangleExpression(const Expr *E) {
 
     // This CXXUuidofExpr is mangled as-if it were actually a VarDecl from
     // const __s_GUID _GUID_{lower case UUID with underscores}
-    StringRef Uuid = UE->getUuidAsStringRef(Context.getASTContext());
+    StringRef Uuid = UE->getUuidStr();
     std::string Name = "_GUID_" + Uuid.lower();
     std::replace(Name.begin(), Name.end(), '-', '_');
 
@@ -1410,6 +1447,10 @@ void MicrosoftCXXNameMangler::manglePointerExtQualifiers(Qualifiers Quals,
 
   if (HasRestrict)
     Out << 'I';
+
+  if (Quals.hasUnaligned() ||
+      (!PointeeType.isNull() && PointeeType.getLocalQualifiers().hasUnaligned()))
+    Out << 'F';
 }
 
 void MicrosoftCXXNameMangler::manglePointerCVQualifiers(Qualifiers Quals) {
@@ -1541,6 +1582,8 @@ void MicrosoftCXXNameMangler::mangleType(QualType T, SourceRange Range,
     }
     break;
   case QMM_Result:
+    // Presence of __unaligned qualifier shouldn't affect mangling here.
+    Quals.removeUnaligned();
     if ((!IsPointer && Quals) || isa<TagType>(T)) {
       Out << '?';
       mangleQualifiers(Quals, false);
@@ -1681,54 +1724,11 @@ void MicrosoftCXXNameMangler::mangleType(const BuiltinType *T, Qualifiers,
     mangleArtificalTagType(TTK_Struct, "objc_selector");
     break;
 
-  case BuiltinType::OCLImage1d:
-    Out << "PA";
-    mangleArtificalTagType(TTK_Struct, "ocl_image1d");
-    break;
-  case BuiltinType::OCLImage1dArray:
-    Out << "PA";
-    mangleArtificalTagType(TTK_Struct, "ocl_image1darray");
-    break;
-  case BuiltinType::OCLImage1dBuffer:
-    Out << "PA";
-    mangleArtificalTagType(TTK_Struct, "ocl_image1dbuffer");
-    break;
-  case BuiltinType::OCLImage2d:
-    Out << "PA";
-    mangleArtificalTagType(TTK_Struct, "ocl_image2d");
-    break;
-  case BuiltinType::OCLImage2dArray:
-    Out << "PA";
-    mangleArtificalTagType(TTK_Struct, "ocl_image2darray");
-    break;
-  case BuiltinType::OCLImage2dDepth:
-    Out << "PA";
-    mangleArtificalTagType(TTK_Struct, "ocl_image2ddepth");
-    break;
-  case BuiltinType::OCLImage2dArrayDepth:
-    Out << "PA";
-    mangleArtificalTagType(TTK_Struct, "ocl_image2darraydepth");
-    break;
-  case BuiltinType::OCLImage2dMSAA:
-    Out << "PA";
-    mangleArtificalTagType(TTK_Struct, "ocl_image2dmsaa");
-    break;
-  case BuiltinType::OCLImage2dArrayMSAA:
-    Out << "PA";
-    mangleArtificalTagType(TTK_Struct, "ocl_image2darraymsaa");
-    break;
-  case BuiltinType::OCLImage2dMSAADepth:
-    Out << "PA";
-    mangleArtificalTagType(TTK_Struct, "ocl_image2dmsaadepth");
-    break;
-  case BuiltinType::OCLImage2dArrayMSAADepth:
-    Out << "PA";
-    mangleArtificalTagType(TTK_Struct, "ocl_image2darraymsaadepth");
-    break;
-  case BuiltinType::OCLImage3d:
-    Out << "PA";
-    mangleArtificalTagType(TTK_Struct, "ocl_image3d");
+#define IMAGE_TYPE(ImgType, Id, SingletonId, Access, Suffix) \
+  case BuiltinType::Id: \
+    Out << "PAUocl_" #ImgType "_" #Suffix "@@"; \
     break;
+#include "clang/Basic/OpenCLImageTypes.def"
   case BuiltinType::OCLSampler:
     Out << "PA";
     mangleArtificalTagType(TTK_Struct, "ocl_sampler");
@@ -1758,6 +1758,7 @@ void MicrosoftCXXNameMangler::mangleType(const BuiltinType *T, Qualifiers,
     Out << "$$T";
     break;
 
+  case BuiltinType::Float128:
   case BuiltinType::Half: {
     DiagnosticsEngine &Diags = Context.getDiags();
     unsigned DiagID = Diags.getCustomDiagID(
@@ -1799,9 +1800,12 @@ void MicrosoftCXXNameMangler::mangleFunctionType(const FunctionType *T,
   SourceRange Range;
   if (D) Range = D->getSourceRange();
 
+  bool IsInLambda = false;
   bool IsStructor = false, HasThisQuals = ForceThisQuals, IsCtorClosure = false;
   CallingConv CC = T->getCallConv();
   if (const CXXMethodDecl *MD = dyn_cast_or_null<CXXMethodDecl>(D)) {
+    if (MD->getParent()->isLambda())
+      IsInLambda = true;
     if (MD->isInstance())
       HasThisQuals = true;
     if (isa<CXXDestructorDecl>(MD)) {
@@ -1820,7 +1824,7 @@ void MicrosoftCXXNameMangler::mangleFunctionType(const FunctionType *T,
   // If this is a C++ instance method, mangle the CVR qualifiers for the
   // this pointer.
   if (HasThisQuals) {
-    Qualifiers Quals = Qualifiers::fromCVRMask(Proto->getTypeQuals());
+    Qualifiers Quals = Qualifiers::fromCVRUMask(Proto->getTypeQuals());
     manglePointerExtQualifiers(Quals, /*PointeeType=*/QualType());
     mangleRefQualifier(Proto->getRefQualifier());
     mangleQualifiers(Quals, /*IsMember=*/false);
@@ -1875,6 +1879,8 @@ void MicrosoftCXXNameMangler::mangleFunctionType(const FunctionType *T,
              "shouldn't need to mangle __auto_type!");
       mangleSourceName(AT->isDecltypeAuto() ? "<decltype-auto>" : "<auto>");
       Out << '@';
+    } else if (IsInLambda) {
+      Out << '@';
     } else {
       if (ResultType->isVoidType())
         ResultType = ResultType.getUnqualifiedType();
@@ -2448,7 +2454,8 @@ void MicrosoftMangleContextImpl::mangleCXXName(const NamedDecl *D,
                                  getASTContext().getSourceManager(),
                                  "Mangling declaration");
 
-  MicrosoftCXXNameMangler Mangler(*this, Out);
+  msvc_hashing_ostream MHO(Out);
+  MicrosoftCXXNameMangler Mangler(*this, MHO);
   return Mangler.mangle(D);
 }
 
@@ -2548,7 +2555,8 @@ MicrosoftMangleContextImpl::mangleVirtualMemPtrThunk(const CXXMethodDecl *MD,
   const MicrosoftVTableContext::MethodVFTableLocation &ML =
       VTContext->getMethodVFTableLocation(GlobalDecl(MD));
 
-  MicrosoftCXXNameMangler Mangler(*this, Out);
+  msvc_hashing_ostream MHO(Out);
+  MicrosoftCXXNameMangler Mangler(*this, MHO);
   Mangler.getStream() << "\01?";
   Mangler.mangleVirtualMemPtrThunk(MD, ML);
 }
@@ -2556,10 +2564,11 @@ MicrosoftMangleContextImpl::mangleVirtualMemPtrThunk(const CXXMethodDecl *MD,
 void MicrosoftMangleContextImpl::mangleThunk(const CXXMethodDecl *MD,
                                              const ThunkInfo &Thunk,
                                              raw_ostream &Out) {
-  MicrosoftCXXNameMangler Mangler(*this, Out);
-  Out << "\01?";
+  msvc_hashing_ostream MHO(Out);
+  MicrosoftCXXNameMangler Mangler(*this, MHO);
+  Mangler.getStream() << "\01?";
   Mangler.mangleName(MD);
-  mangleThunkThisAdjustment(MD, Thunk.This, Mangler, Out);
+  mangleThunkThisAdjustment(MD, Thunk.This, Mangler, MHO);
   if (!Thunk.Return.isEmpty())
     assert(Thunk.Method != nullptr &&
            "Thunk info should hold the overridee decl");
@@ -2576,10 +2585,11 @@ void MicrosoftMangleContextImpl::mangleCXXDtorThunk(
   // dtors rather than scalar deleting dtors. Just use the vector deleting dtor
   // mangling manually until we support both deleting dtor types.
   assert(Type == Dtor_Deleting);
-  MicrosoftCXXNameMangler Mangler(*this, Out, DD, Type);
-  Out << "\01??_E";
+  msvc_hashing_ostream MHO(Out);
+  MicrosoftCXXNameMangler Mangler(*this, MHO, DD, Type);
+  Mangler.getStream() << "\01??_E";
   Mangler.mangleName(DD->getParent());
-  mangleThunkThisAdjustment(DD, Adjustment, Mangler, Out);
+  mangleThunkThisAdjustment(DD, Adjustment, Mangler, MHO);
   Mangler.mangleFunctionType(DD->getType()->castAs<FunctionProtoType>(), DD);
 }
 
@@ -2590,8 +2600,12 @@ void MicrosoftMangleContextImpl::mangleCXXVFTable(
   //                    <cvr-qualifiers> [<name>] @
   // NOTE: <cvr-qualifiers> here is always 'B' (const). <storage-class>
   // is always '6' for vftables.
-  MicrosoftCXXNameMangler Mangler(*this, Out);
-  Mangler.getStream() << "\01??_7";
+  msvc_hashing_ostream MHO(Out);
+  MicrosoftCXXNameMangler Mangler(*this, MHO);
+  if (Derived->hasAttr<DLLImportAttr>())
+    Mangler.getStream() << "\01??_S";
+  else
+    Mangler.getStream() << "\01??_7";
   Mangler.mangleName(Derived);
   Mangler.getStream() << "6B"; // '6' for vftable, 'B' for const.
   for (const CXXRecordDecl *RD : BasePath)
@@ -2606,7 +2620,8 @@ void MicrosoftMangleContextImpl::mangleCXXVBTable(
   //                    <cvr-qualifiers> [<name>] @
   // NOTE: <cvr-qualifiers> here is always 'B' (const). <storage-class>
   // is always '7' for vbtables.
-  MicrosoftCXXNameMangler Mangler(*this, Out);
+  msvc_hashing_ostream MHO(Out);
+  MicrosoftCXXNameMangler Mangler(*this, MHO);
   Mangler.getStream() << "\01??_8";
   Mangler.mangleName(Derived);
   Mangler.getStream() << "7B";  // '7' for vbtable, 'B' for const.
@@ -2616,7 +2631,8 @@ void MicrosoftMangleContextImpl::mangleCXXVBTable(
 }
 
 void MicrosoftMangleContextImpl::mangleCXXRTTI(QualType T, raw_ostream &Out) {
-  MicrosoftCXXNameMangler Mangler(*this, Out);
+  msvc_hashing_ostream MHO(Out);
+  MicrosoftCXXNameMangler Mangler(*this, MHO);
   Mangler.getStream() << "\01??_R0";
   Mangler.mangleType(T, SourceRange(), MicrosoftCXXNameMangler::QMM_Result);
   Mangler.getStream() << "@8";
@@ -2631,31 +2647,36 @@ void MicrosoftMangleContextImpl::mangleCXXRTTIName(QualType T,
 
 void MicrosoftMangleContextImpl::mangleCXXVirtualDisplacementMap(
     const CXXRecordDecl *SrcRD, const CXXRecordDecl *DstRD, raw_ostream &Out) {
-  MicrosoftCXXNameMangler Mangler(*this, Out);
+  msvc_hashing_ostream MHO(Out);
+  MicrosoftCXXNameMangler Mangler(*this, MHO);
   Mangler.getStream() << "\01??_K";
   Mangler.mangleName(SrcRD);
   Mangler.getStream() << "$C";
   Mangler.mangleName(DstRD);
 }
 
-void MicrosoftMangleContextImpl::mangleCXXThrowInfo(QualType T,
-                                                    bool IsConst,
+void MicrosoftMangleContextImpl::mangleCXXThrowInfo(QualType T, bool IsConst,
                                                     bool IsVolatile,
+                                                    bool IsUnaligned,
                                                     uint32_t NumEntries,
                                                     raw_ostream &Out) {
-  MicrosoftCXXNameMangler Mangler(*this, Out);
+  msvc_hashing_ostream MHO(Out);
+  MicrosoftCXXNameMangler Mangler(*this, MHO);
   Mangler.getStream() << "_TI";
   if (IsConst)
     Mangler.getStream() << 'C';
   if (IsVolatile)
     Mangler.getStream() << 'V';
+  if (IsUnaligned)
+    Mangler.getStream() << 'U';
   Mangler.getStream() << NumEntries;
   Mangler.mangleType(T, SourceRange(), MicrosoftCXXNameMangler::QMM_Result);
 }
 
 void MicrosoftMangleContextImpl::mangleCXXCatchableTypeArray(
     QualType T, uint32_t NumEntries, raw_ostream &Out) {
-  MicrosoftCXXNameMangler Mangler(*this, Out);
+  msvc_hashing_ostream MHO(Out);
+  MicrosoftCXXNameMangler Mangler(*this, MHO);
   Mangler.getStream() << "_CTA";
   Mangler.getStream() << NumEntries;
   Mangler.mangleType(T, SourceRange(), MicrosoftCXXNameMangler::QMM_Result);
@@ -2671,17 +2692,20 @@ void MicrosoftMangleContextImpl::mangleCXXCatchableType(
   llvm::SmallString<64> RTTIMangling;
   {
     llvm::raw_svector_ostream Stream(RTTIMangling);
-    mangleCXXRTTI(T, Stream);
+    msvc_hashing_ostream MHO(Stream);
+    mangleCXXRTTI(T, MHO);
   }
   Mangler.getStream() << RTTIMangling.substr(1);
 
   // VS2015 CTP6 omits the copy-constructor in the mangled name.  This name is,
   // in fact, superfluous but I'm not sure the change was made consciously.
-  // TODO: Revisit this when VS2015 gets released.
   llvm::SmallString<64> CopyCtorMangling;
-  if (CD) {
+  if (!getASTContext().getLangOpts().isCompatibleWithMSVC(
+          LangOptions::MSVC2015) &&
+      CD) {
     llvm::raw_svector_ostream Stream(CopyCtorMangling);
-    mangleCXXCtor(CD, CT, Stream);
+    msvc_hashing_ostream MHO(Stream);
+    mangleCXXCtor(CD, CT, MHO);
   }
   Mangler.getStream() << CopyCtorMangling.substr(1);
 
@@ -2700,7 +2724,8 @@ void MicrosoftMangleContextImpl::mangleCXXCatchableType(
 void MicrosoftMangleContextImpl::mangleCXXRTTIBaseClassDescriptor(
     const CXXRecordDecl *Derived, uint32_t NVOffset, int32_t VBPtrOffset,
     uint32_t VBTableOffset, uint32_t Flags, raw_ostream &Out) {
-  MicrosoftCXXNameMangler Mangler(*this, Out);
+  msvc_hashing_ostream MHO(Out);
+  MicrosoftCXXNameMangler Mangler(*this, MHO);
   Mangler.getStream() << "\01??_R1";
   Mangler.mangleNumber(NVOffset);
   Mangler.mangleNumber(VBPtrOffset);
@@ -2712,7 +2737,8 @@ void MicrosoftMangleContextImpl::mangleCXXRTTIBaseClassDescriptor(
 
 void MicrosoftMangleContextImpl::mangleCXXRTTIBaseClassArray(
     const CXXRecordDecl *Derived, raw_ostream &Out) {
-  MicrosoftCXXNameMangler Mangler(*this, Out);
+  msvc_hashing_ostream MHO(Out);
+  MicrosoftCXXNameMangler Mangler(*this, MHO);
   Mangler.getStream() << "\01??_R2";
   Mangler.mangleName(Derived);
   Mangler.getStream() << "8";
@@ -2720,7 +2746,8 @@ void MicrosoftMangleContextImpl::mangleCXXRTTIBaseClassArray(
 
 void MicrosoftMangleContextImpl::mangleCXXRTTIClassHierarchyDescriptor(
     const CXXRecordDecl *Derived, raw_ostream &Out) {
-  MicrosoftCXXNameMangler Mangler(*this, Out);
+  msvc_hashing_ostream MHO(Out);
+  MicrosoftCXXNameMangler Mangler(*this, MHO);
   Mangler.getStream() << "\01??_R3";
   Mangler.mangleName(Derived);
   Mangler.getStream() << "8";
@@ -2733,18 +2760,26 @@ void MicrosoftMangleContextImpl::mangleCXXRTTICompleteObjectLocator(
   //                    <cvr-qualifiers> [<name>] @
   // NOTE: <cvr-qualifiers> here is always 'B' (const). <storage-class>
   // is always '6' for vftables.
-  MicrosoftCXXNameMangler Mangler(*this, Out);
-  Mangler.getStream() << "\01??_R4";
-  Mangler.mangleName(Derived);
-  Mangler.getStream() << "6B"; // '6' for vftable, 'B' for const.
-  for (const CXXRecordDecl *RD : BasePath)
-    Mangler.mangleName(RD);
-  Mangler.getStream() << '@';
+  llvm::SmallString<64> VFTableMangling;
+  llvm::raw_svector_ostream Stream(VFTableMangling);
+  mangleCXXVFTable(Derived, BasePath, Stream);
+
+  if (VFTableMangling.startswith("\01??@")) {
+    assert(VFTableMangling.endswith("@"));
+    Out << VFTableMangling << "??_R4@";
+    return;
+  }
+
+  assert(VFTableMangling.startswith("\01??_7") ||
+         VFTableMangling.startswith("\01??_S"));
+
+  Out << "\01??_R4" << StringRef(VFTableMangling).drop_front(5);
 }
 
 void MicrosoftMangleContextImpl::mangleSEHFilterExpression(
     const NamedDecl *EnclosingDecl, raw_ostream &Out) {
-  MicrosoftCXXNameMangler Mangler(*this, Out);
+  msvc_hashing_ostream MHO(Out);
+  MicrosoftCXXNameMangler Mangler(*this, MHO);
   // The function body is in the same comdat as the function with the handler,
   // so the numbering here doesn't have to be the same across TUs.
   //
@@ -2755,7 +2790,8 @@ void MicrosoftMangleContextImpl::mangleSEHFilterExpression(
 
 void MicrosoftMangleContextImpl::mangleSEHFinallyBlock(
     const NamedDecl *EnclosingDecl, raw_ostream &Out) {
-  MicrosoftCXXNameMangler Mangler(*this, Out);
+  msvc_hashing_ostream MHO(Out);
+  MicrosoftCXXNameMangler Mangler(*this, MHO);
   // The function body is in the same comdat as the function with the handler,
   // so the numbering here doesn't have to be the same across TUs.
   //
@@ -2775,20 +2811,23 @@ void MicrosoftMangleContextImpl::mangleTypeName(QualType T, raw_ostream &Out) {
 void MicrosoftMangleContextImpl::mangleCXXCtor(const CXXConstructorDecl *D,
                                                CXXCtorType Type,
                                                raw_ostream &Out) {
-  MicrosoftCXXNameMangler mangler(*this, Out, D, Type);
+  msvc_hashing_ostream MHO(Out);
+  MicrosoftCXXNameMangler mangler(*this, MHO, D, Type);
   mangler.mangle(D);
 }
 
 void MicrosoftMangleContextImpl::mangleCXXDtor(const CXXDestructorDecl *D,
                                                CXXDtorType Type,
                                                raw_ostream &Out) {
-  MicrosoftCXXNameMangler mangler(*this, Out, D, Type);
+  msvc_hashing_ostream MHO(Out);
+  MicrosoftCXXNameMangler mangler(*this, MHO, D, Type);
   mangler.mangle(D);
 }
 
 void MicrosoftMangleContextImpl::mangleReferenceTemporary(
     const VarDecl *VD, unsigned ManglingNumber, raw_ostream &Out) {
-  MicrosoftCXXNameMangler Mangler(*this, Out);
+  msvc_hashing_ostream MHO(Out);
+  MicrosoftCXXNameMangler Mangler(*this, MHO);
 
   Mangler.getStream() << "\01?$RT" << ManglingNumber << '@';
   Mangler.mangle(VD, "");
@@ -2796,10 +2835,12 @@ void MicrosoftMangleContextImpl::mangleReferenceTemporary(
 
 void MicrosoftMangleContextImpl::mangleThreadSafeStaticGuardVariable(
     const VarDecl *VD, unsigned GuardNum, raw_ostream &Out) {
-  MicrosoftCXXNameMangler Mangler(*this, Out);
+  msvc_hashing_ostream MHO(Out);
+  MicrosoftCXXNameMangler Mangler(*this, MHO);
 
   Mangler.getStream() << "\01?$TSS" << GuardNum << '@';
   Mangler.mangleNestedName(VD);
+  Mangler.getStream() << "@4HA";
 }
 
 void MicrosoftMangleContextImpl::mangleStaticGuardVariable(const VarDecl *VD,
@@ -2814,7 +2855,8 @@ void MicrosoftMangleContextImpl::mangleStaticGuardVariable(const VarDecl *VD,
   // than 32 static locals.  We don't fully implement the second mangling
   // because those guards are not externally visible, and instead use LLVM's
   // default renaming when creating a new guard variable.
-  MicrosoftCXXNameMangler Mangler(*this, Out);
+  msvc_hashing_ostream MHO(Out);
+  MicrosoftCXXNameMangler Mangler(*this, MHO);
 
   bool Visible = VD->isExternallyVisible();
   if (Visible) {
@@ -2836,9 +2878,10 @@ void MicrosoftMangleContextImpl::mangleStaticGuardVariable(const VarDecl *VD,
 }
 
 void MicrosoftMangleContextImpl::mangleInitFiniStub(const VarDecl *D,
-                                                    raw_ostream &Out,
-                                                    char CharCode) {
-  MicrosoftCXXNameMangler Mangler(*this, Out);
+                                                    char CharCode,
+                                                    raw_ostream &Out) {
+  msvc_hashing_ostream MHO(Out);
+  MicrosoftCXXNameMangler Mangler(*this, MHO);
   Mangler.getStream() << "\01??__" << CharCode;
   Mangler.mangleName(D);
   if (D->isStaticDataMember()) {
@@ -2853,14 +2896,14 @@ void MicrosoftMangleContextImpl::mangleInitFiniStub(const VarDecl *D,
 void MicrosoftMangleContextImpl::mangleDynamicInitializer(const VarDecl *D,
                                                           raw_ostream &Out) {
   // <initializer-name> ::= ?__E <name> YAXXZ
-  mangleInitFiniStub(D, Out, 'E');
+  mangleInitFiniStub(D, 'E', Out);
 }
 
 void
 MicrosoftMangleContextImpl::mangleDynamicAtExitDestructor(const VarDecl *D,
                                                           raw_ostream &Out) {
   // <destructor-name> ::= ?__F <name> YAXXZ
-  mangleInitFiniStub(D, Out, 'F');
+  mangleInitFiniStub(D, 'F', Out);
 }
 
 void MicrosoftMangleContextImpl::mangleStringLiteral(const StringLiteral *SL,
diff --git a/contrib/llvm/tools/clang/lib/AST/NSAPI.cpp b/contrib/llvm/tools/clang/lib/AST/NSAPI.cpp
index c562dae63231..ac2a8d354247 100644
--- a/contrib/llvm/tools/clang/lib/AST/NSAPI.cpp
+++ b/contrib/llvm/tools/clang/lib/AST/NSAPI.cpp
@@ -441,22 +441,14 @@ NSAPI::getNSNumberFactoryMethodKind(QualType T) const {
   case BuiltinType::Int128:
   case BuiltinType::LongDouble:
   case BuiltinType::UInt128:
+  case BuiltinType::Float128:
   case BuiltinType::NullPtr:
   case BuiltinType::ObjCClass:
   case BuiltinType::ObjCId:
   case BuiltinType::ObjCSel:
-  case BuiltinType::OCLImage1d:
-  case BuiltinType::OCLImage1dArray:
-  case BuiltinType::OCLImage1dBuffer:
-  case BuiltinType::OCLImage2d:
-  case BuiltinType::OCLImage2dArray:
-  case BuiltinType::OCLImage2dDepth:
-  case BuiltinType::OCLImage2dArrayDepth:
-  case BuiltinType::OCLImage2dMSAA:
-  case BuiltinType::OCLImage2dArrayMSAA:
-  case BuiltinType::OCLImage2dMSAADepth:
-  case BuiltinType::OCLImage2dArrayMSAADepth:
-  case BuiltinType::OCLImage3d:
+#define IMAGE_TYPE(ImgType, Id, SingletonId, Access, Suffix) \
+  case BuiltinType::Id:
+#include "clang/Basic/OpenCLImageTypes.def"
   case BuiltinType::OCLSampler:
   case BuiltinType::OCLEvent:
   case BuiltinType::OCLClkEvent:
diff --git a/contrib/llvm/tools/clang/lib/AST/NestedNameSpecifier.cpp b/contrib/llvm/tools/clang/lib/AST/NestedNameSpecifier.cpp
index d2370c88b9c5..82809d7ea7b5 100644
--- a/contrib/llvm/tools/clang/lib/AST/NestedNameSpecifier.cpp
+++ b/contrib/llvm/tools/clang/lib/AST/NestedNameSpecifier.cpp
@@ -171,10 +171,19 @@ NamespaceAliasDecl *NestedNameSpecifier::getAsNamespaceAlias() const {
 
 /// \brief Retrieve the record declaration stored in this nested name specifier.
 CXXRecordDecl *NestedNameSpecifier::getAsRecordDecl() const {
-  if (Prefix.getInt() == StoredDecl)
+  switch (Prefix.getInt()) {
+  case StoredIdentifier:
+    return nullptr;
+
+  case StoredDecl:
     return dyn_cast<CXXRecordDecl>(static_cast<NamedDecl *>(Specifier));
 
-  return nullptr;
+  case StoredTypeSpec:
+  case StoredTypeSpecWithTemplate:
+    return getAsType()->getAsCXXRecordDecl();
+  }
+
+  llvm_unreachable("Invalid NNS Kind!");
 }
 
 /// \brief Whether this nested name specifier refers to a dependent
@@ -306,7 +315,7 @@ NestedNameSpecifier::print(raw_ostream &OS,
 
       // Print the template argument list.
       TemplateSpecializationType::PrintTemplateArgumentList(
-          OS, SpecType->getArgs(), SpecType->getNumArgs(), InnerPolicy);
+          OS, SpecType->template_arguments(), InnerPolicy);
     } else {
       // Print the type normally
       QualType(T, 0).print(OS, InnerPolicy);
@@ -322,7 +331,7 @@ void NestedNameSpecifier::dump(const LangOptions &LO) const {
   print(llvm::errs(), PrintingPolicy(LO));
 }
 
-void NestedNameSpecifier::dump() const {
+LLVM_DUMP_METHOD void NestedNameSpecifier::dump() const {
   LangOptions LO;
   print(llvm::errs(), PrintingPolicy(LO));
 }
diff --git a/contrib/llvm/tools/clang/lib/AST/OpenMPClause.cpp b/contrib/llvm/tools/clang/lib/AST/OpenMPClause.cpp
index 1ef43f7694cf..d04ba727bb05 100644
--- a/contrib/llvm/tools/clang/lib/AST/OpenMPClause.cpp
+++ b/contrib/llvm/tools/clang/lib/AST/OpenMPClause.cpp
@@ -29,6 +29,139 @@ OMPClause::child_range OMPClause::children() {
   llvm_unreachable("unknown OMPClause");
 }
 
+OMPClauseWithPreInit *OMPClauseWithPreInit::get(OMPClause *C) {
+  auto *Res = OMPClauseWithPreInit::get(const_cast<const OMPClause *>(C));
+  return Res ? const_cast<OMPClauseWithPreInit *>(Res) : nullptr;
+}
+
+const OMPClauseWithPreInit *OMPClauseWithPreInit::get(const OMPClause *C) {
+  switch (C->getClauseKind()) {
+  case OMPC_schedule:
+    return static_cast<const OMPScheduleClause *>(C);
+  case OMPC_dist_schedule:
+    return static_cast<const OMPDistScheduleClause *>(C);
+  case OMPC_firstprivate:
+    return static_cast<const OMPFirstprivateClause *>(C);
+  case OMPC_lastprivate:
+    return static_cast<const OMPLastprivateClause *>(C);
+  case OMPC_reduction:
+    return static_cast<const OMPReductionClause *>(C);
+  case OMPC_linear:
+    return static_cast<const OMPLinearClause *>(C);
+  case OMPC_default:
+  case OMPC_proc_bind:
+  case OMPC_if:
+  case OMPC_final:
+  case OMPC_num_threads:
+  case OMPC_safelen:
+  case OMPC_simdlen:
+  case OMPC_collapse:
+  case OMPC_private:
+  case OMPC_shared:
+  case OMPC_aligned:
+  case OMPC_copyin:
+  case OMPC_copyprivate:
+  case OMPC_ordered:
+  case OMPC_nowait:
+  case OMPC_untied:
+  case OMPC_mergeable:
+  case OMPC_threadprivate:
+  case OMPC_flush:
+  case OMPC_read:
+  case OMPC_write:
+  case OMPC_update:
+  case OMPC_capture:
+  case OMPC_seq_cst:
+  case OMPC_depend:
+  case OMPC_device:
+  case OMPC_threads:
+  case OMPC_simd:
+  case OMPC_map:
+  case OMPC_num_teams:
+  case OMPC_thread_limit:
+  case OMPC_priority:
+  case OMPC_grainsize:
+  case OMPC_nogroup:
+  case OMPC_num_tasks:
+  case OMPC_hint:
+  case OMPC_defaultmap:
+  case OMPC_unknown:
+  case OMPC_uniform:
+  case OMPC_to:
+  case OMPC_from:
+  case OMPC_use_device_ptr:
+  case OMPC_is_device_ptr:
+    break;
+  }
+
+  return nullptr;
+}
+
+OMPClauseWithPostUpdate *OMPClauseWithPostUpdate::get(OMPClause *C) {
+  auto *Res = OMPClauseWithPostUpdate::get(const_cast<const OMPClause *>(C));
+  return Res ? const_cast<OMPClauseWithPostUpdate *>(Res) : nullptr;
+}
+
+const OMPClauseWithPostUpdate *OMPClauseWithPostUpdate::get(const OMPClause *C) {
+  switch (C->getClauseKind()) {
+  case OMPC_lastprivate:
+    return static_cast<const OMPLastprivateClause *>(C);
+  case OMPC_reduction:
+    return static_cast<const OMPReductionClause *>(C);
+  case OMPC_linear:
+    return static_cast<const OMPLinearClause *>(C);
+  case OMPC_schedule:
+  case OMPC_dist_schedule:
+  case OMPC_firstprivate:
+  case OMPC_default:
+  case OMPC_proc_bind:
+  case OMPC_if:
+  case OMPC_final:
+  case OMPC_num_threads:
+  case OMPC_safelen:
+  case OMPC_simdlen:
+  case OMPC_collapse:
+  case OMPC_private:
+  case OMPC_shared:
+  case OMPC_aligned:
+  case OMPC_copyin:
+  case OMPC_copyprivate:
+  case OMPC_ordered:
+  case OMPC_nowait:
+  case OMPC_untied:
+  case OMPC_mergeable:
+  case OMPC_threadprivate:
+  case OMPC_flush:
+  case OMPC_read:
+  case OMPC_write:
+  case OMPC_update:
+  case OMPC_capture:
+  case OMPC_seq_cst:
+  case OMPC_depend:
+  case OMPC_device:
+  case OMPC_threads:
+  case OMPC_simd:
+  case OMPC_map:
+  case OMPC_num_teams:
+  case OMPC_thread_limit:
+  case OMPC_priority:
+  case OMPC_grainsize:
+  case OMPC_nogroup:
+  case OMPC_num_tasks:
+  case OMPC_hint:
+  case OMPC_defaultmap:
+  case OMPC_unknown:
+  case OMPC_uniform:
+  case OMPC_to:
+  case OMPC_from:
+  case OMPC_use_device_ptr:
+  case OMPC_is_device_ptr:
+    break;
+  }
+
+  return nullptr;
+}
+
 void OMPPrivateClause::setPrivateCopies(ArrayRef<Expr *> VL) {
   assert(VL.size() == varlist_size() &&
          "Number of private copies is not the same as the preallocated buffer");
@@ -70,13 +203,14 @@ OMPFirstprivateClause *
 OMPFirstprivateClause::Create(const ASTContext &C, SourceLocation StartLoc,
                               SourceLocation LParenLoc, SourceLocation EndLoc,
                               ArrayRef<Expr *> VL, ArrayRef<Expr *> PrivateVL,
-                              ArrayRef<Expr *> InitVL) {
+                              ArrayRef<Expr *> InitVL, Stmt *PreInit) {
   void *Mem = C.Allocate(totalSizeToAlloc<Expr *>(3 * VL.size()));
   OMPFirstprivateClause *Clause =
       new (Mem) OMPFirstprivateClause(StartLoc, LParenLoc, EndLoc, VL.size());
   Clause->setVarRefs(VL);
   Clause->setPrivateCopies(PrivateVL);
   Clause->setInits(InitVL);
+  Clause->setPreInitStmt(PreInit);
   return Clause;
 }
 
@@ -117,7 +251,8 @@ void OMPLastprivateClause::setAssignmentOps(ArrayRef<Expr *> AssignmentOps) {
 OMPLastprivateClause *OMPLastprivateClause::Create(
     const ASTContext &C, SourceLocation StartLoc, SourceLocation LParenLoc,
     SourceLocation EndLoc, ArrayRef<Expr *> VL, ArrayRef<Expr *> SrcExprs,
-    ArrayRef<Expr *> DstExprs, ArrayRef<Expr *> AssignmentOps) {
+    ArrayRef<Expr *> DstExprs, ArrayRef<Expr *> AssignmentOps, Stmt *PreInit,
+    Expr *PostUpdate) {
   void *Mem = C.Allocate(totalSizeToAlloc<Expr *>(5 * VL.size()));
   OMPLastprivateClause *Clause =
       new (Mem) OMPLastprivateClause(StartLoc, LParenLoc, EndLoc, VL.size());
@@ -125,6 +260,8 @@ OMPLastprivateClause *OMPLastprivateClause::Create(
   Clause->setSourceExprs(SrcExprs);
   Clause->setDestinationExprs(DstExprs);
   Clause->setAssignmentOps(AssignmentOps);
+  Clause->setPreInitStmt(PreInit);
+  Clause->setPostUpdateExpr(PostUpdate);
   return Clause;
 }
 
@@ -179,7 +316,8 @@ OMPLinearClause *OMPLinearClause::Create(
     const ASTContext &C, SourceLocation StartLoc, SourceLocation LParenLoc,
     OpenMPLinearClauseKind Modifier, SourceLocation ModifierLoc,
     SourceLocation ColonLoc, SourceLocation EndLoc, ArrayRef<Expr *> VL,
-    ArrayRef<Expr *> PL, ArrayRef<Expr *> IL, Expr *Step, Expr *CalcStep) {
+    ArrayRef<Expr *> PL, ArrayRef<Expr *> IL, Expr *Step, Expr *CalcStep,
+    Stmt *PreInit, Expr *PostUpdate) {
   // Allocate space for 4 lists (Vars, Inits, Updates, Finals) and 2 expressions
   // (Step and CalcStep).
   void *Mem = C.Allocate(totalSizeToAlloc<Expr *>(5 * VL.size() + 2));
@@ -196,6 +334,8 @@ OMPLinearClause *OMPLinearClause::Create(
             nullptr);
   Clause->setStep(Step);
   Clause->setCalcStep(CalcStep);
+  Clause->setPreInitStmt(PreInit);
+  Clause->setPostUpdateExpr(PostUpdate);
   return Clause;
 }
 
@@ -340,7 +480,8 @@ OMPReductionClause *OMPReductionClause::Create(
     SourceLocation EndLoc, SourceLocation ColonLoc, ArrayRef<Expr *> VL,
     NestedNameSpecifierLoc QualifierLoc, const DeclarationNameInfo &NameInfo,
     ArrayRef<Expr *> Privates, ArrayRef<Expr *> LHSExprs,
-    ArrayRef<Expr *> RHSExprs, ArrayRef<Expr *> ReductionOps) {
+    ArrayRef<Expr *> RHSExprs, ArrayRef<Expr *> ReductionOps, Stmt *PreInit,
+    Expr *PostUpdate) {
   void *Mem = C.Allocate(totalSizeToAlloc<Expr *>(5 * VL.size()));
   OMPReductionClause *Clause = new (Mem) OMPReductionClause(
       StartLoc, LParenLoc, EndLoc, ColonLoc, VL.size(), QualifierLoc, NameInfo);
@@ -349,6 +490,8 @@ OMPReductionClause *OMPReductionClause::Create(
   Clause->setLHSExprs(LHSExprs);
   Clause->setRHSExprs(RHSExprs);
   Clause->setReductionOps(ReductionOps);
+  Clause->setPreInitStmt(PreInit);
+  Clause->setPostUpdateExpr(PostUpdate);
   return Clause;
 }
 
@@ -363,7 +506,7 @@ OMPFlushClause *OMPFlushClause::Create(const ASTContext &C,
                                        SourceLocation LParenLoc,
                                        SourceLocation EndLoc,
                                        ArrayRef<Expr *> VL) {
-  void *Mem = C.Allocate(totalSizeToAlloc<Expr *>(VL.size()));
+  void *Mem = C.Allocate(totalSizeToAlloc<Expr *>(VL.size() + 1));
   OMPFlushClause *Clause =
       new (Mem) OMPFlushClause(StartLoc, LParenLoc, EndLoc, VL.size());
   Clause->setVarRefs(VL);
@@ -375,43 +518,252 @@ OMPFlushClause *OMPFlushClause::CreateEmpty(const ASTContext &C, unsigned N) {
   return new (Mem) OMPFlushClause(N);
 }
 
-OMPDependClause *
-OMPDependClause::Create(const ASTContext &C, SourceLocation StartLoc,
-                        SourceLocation LParenLoc, SourceLocation EndLoc,
-                        OpenMPDependClauseKind DepKind, SourceLocation DepLoc,
-                        SourceLocation ColonLoc, ArrayRef<Expr *> VL) {
-  void *Mem = C.Allocate(totalSizeToAlloc<Expr *>(VL.size()));
+OMPDependClause *OMPDependClause::Create(
+    const ASTContext &C, SourceLocation StartLoc, SourceLocation LParenLoc,
+    SourceLocation EndLoc, OpenMPDependClauseKind DepKind,
+    SourceLocation DepLoc, SourceLocation ColonLoc, ArrayRef<Expr *> VL) {
+  void *Mem = C.Allocate(totalSizeToAlloc<Expr *>(VL.size() + 1));
   OMPDependClause *Clause =
       new (Mem) OMPDependClause(StartLoc, LParenLoc, EndLoc, VL.size());
   Clause->setVarRefs(VL);
   Clause->setDependencyKind(DepKind);
   Clause->setDependencyLoc(DepLoc);
   Clause->setColonLoc(ColonLoc);
+  Clause->setCounterValue(nullptr);
   return Clause;
 }
 
 OMPDependClause *OMPDependClause::CreateEmpty(const ASTContext &C, unsigned N) {
-  void *Mem = C.Allocate(totalSizeToAlloc<Expr *>(N));
+  void *Mem = C.Allocate(totalSizeToAlloc<Expr *>(N + 1));
   return new (Mem) OMPDependClause(N);
 }
 
-OMPMapClause *OMPMapClause::Create(const ASTContext &C, SourceLocation StartLoc,
-                                   SourceLocation LParenLoc,
-                                   SourceLocation EndLoc, ArrayRef<Expr *> VL,
-                                   OpenMPMapClauseKind TypeModifier,
-                                   OpenMPMapClauseKind Type,
-                                   SourceLocation TypeLoc) {
-  void *Mem = C.Allocate(totalSizeToAlloc<Expr *>(VL.size()));
+void OMPDependClause::setCounterValue(Expr *V) {
+  assert(getDependencyKind() == OMPC_DEPEND_sink ||
+         getDependencyKind() == OMPC_DEPEND_source || V == nullptr);
+  *getVarRefs().end() = V;
+}
+
+const Expr *OMPDependClause::getCounterValue() const {
+  auto *V = *getVarRefs().end();
+  assert(getDependencyKind() == OMPC_DEPEND_sink ||
+         getDependencyKind() == OMPC_DEPEND_source || V == nullptr);
+  return V;
+}
+
+Expr *OMPDependClause::getCounterValue() {
+  auto *V = *getVarRefs().end();
+  assert(getDependencyKind() == OMPC_DEPEND_sink ||
+         getDependencyKind() == OMPC_DEPEND_source || V == nullptr);
+  return V;
+}
+
+unsigned OMPClauseMappableExprCommon::getComponentsTotalNumber(
+    MappableExprComponentListsRef ComponentLists) {
+  unsigned TotalNum = 0u;
+  for (auto &C : ComponentLists)
+    TotalNum += C.size();
+  return TotalNum;
+}
+
+unsigned OMPClauseMappableExprCommon::getUniqueDeclarationsTotalNumber(
+    ArrayRef<ValueDecl *> Declarations) {
+  unsigned TotalNum = 0u;
+  llvm::SmallPtrSet<const ValueDecl *, 8> Cache;
+  for (auto *D : Declarations) {
+    const ValueDecl *VD = D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr;
+    if (Cache.count(VD))
+      continue;
+    ++TotalNum;
+    Cache.insert(VD);
+  }
+  return TotalNum;
+}
+
+OMPMapClause *
+OMPMapClause::Create(const ASTContext &C, SourceLocation StartLoc,
+                     SourceLocation LParenLoc, SourceLocation EndLoc,
+                     ArrayRef<Expr *> Vars, ArrayRef<ValueDecl *> Declarations,
+                     MappableExprComponentListsRef ComponentLists,
+                     OpenMPMapClauseKind TypeModifier, OpenMPMapClauseKind Type,
+                     bool TypeIsImplicit, SourceLocation TypeLoc) {
+
+  unsigned NumVars = Vars.size();
+  unsigned NumUniqueDeclarations =
+      getUniqueDeclarationsTotalNumber(Declarations);
+  unsigned NumComponentLists = ComponentLists.size();
+  unsigned NumComponents = getComponentsTotalNumber(ComponentLists);
+
+  // We need to allocate:
+  // NumVars x Expr* - we have an original list expression for each clause list
+  // entry.
+  // NumUniqueDeclarations x ValueDecl* - unique base declarations associated
+  // with each component list.
+  // (NumUniqueDeclarations + NumComponentLists) x unsigned - we specify the
+  // number of lists for each unique declaration and the size of each component
+  // list.
+  // NumComponents x MappableComponent - the total of all the components in all
+  // the lists.
+  void *Mem = C.Allocate(
+      totalSizeToAlloc<Expr *, ValueDecl *, unsigned,
+                       OMPClauseMappableExprCommon::MappableComponent>(
+          NumVars, NumUniqueDeclarations,
+          NumUniqueDeclarations + NumComponentLists, NumComponents));
   OMPMapClause *Clause = new (Mem) OMPMapClause(
-      TypeModifier, Type, TypeLoc, StartLoc, LParenLoc, EndLoc, VL.size());
-  Clause->setVarRefs(VL);
+      TypeModifier, Type, TypeIsImplicit, TypeLoc, StartLoc, LParenLoc, EndLoc,
+      NumVars, NumUniqueDeclarations, NumComponentLists, NumComponents);
+
+  Clause->setVarRefs(Vars);
+  Clause->setClauseInfo(Declarations, ComponentLists);
   Clause->setMapTypeModifier(TypeModifier);
   Clause->setMapType(Type);
   Clause->setMapLoc(TypeLoc);
   return Clause;
 }
 
-OMPMapClause *OMPMapClause::CreateEmpty(const ASTContext &C, unsigned N) {
+OMPMapClause *OMPMapClause::CreateEmpty(const ASTContext &C, unsigned NumVars,
+                                        unsigned NumUniqueDeclarations,
+                                        unsigned NumComponentLists,
+                                        unsigned NumComponents) {
+  void *Mem = C.Allocate(
+      totalSizeToAlloc<Expr *, ValueDecl *, unsigned,
+                       OMPClauseMappableExprCommon::MappableComponent>(
+          NumVars, NumUniqueDeclarations,
+          NumUniqueDeclarations + NumComponentLists, NumComponents));
+  return new (Mem) OMPMapClause(NumVars, NumUniqueDeclarations,
+                                NumComponentLists, NumComponents);
+}
+
+OMPToClause *OMPToClause::Create(const ASTContext &C, SourceLocation StartLoc,
+                                 SourceLocation LParenLoc,
+                                 SourceLocation EndLoc, ArrayRef<Expr *> Vars,
+                                 ArrayRef<ValueDecl *> Declarations,
+                                 MappableExprComponentListsRef ComponentLists) {
+  unsigned NumVars = Vars.size();
+  unsigned NumUniqueDeclarations =
+      getUniqueDeclarationsTotalNumber(Declarations);
+  unsigned NumComponentLists = ComponentLists.size();
+  unsigned NumComponents = getComponentsTotalNumber(ComponentLists);
+
+  // We need to allocate:
+  // NumVars x Expr* - we have an original list expression for each clause list
+  // entry.
+  // NumUniqueDeclarations x ValueDecl* - unique base declarations associated
+  // with each component list.
+  // (NumUniqueDeclarations + NumComponentLists) x unsigned - we specify the
+  // number of lists for each unique declaration and the size of each component
+  // list.
+  // NumComponents x MappableComponent - the total of all the components in all
+  // the lists.
+  void *Mem = C.Allocate(
+      totalSizeToAlloc<Expr *, ValueDecl *, unsigned,
+                       OMPClauseMappableExprCommon::MappableComponent>(
+          NumVars, NumUniqueDeclarations,
+          NumUniqueDeclarations + NumComponentLists, NumComponents));
+
+  OMPToClause *Clause = new (Mem)
+      OMPToClause(StartLoc, LParenLoc, EndLoc, NumVars, NumUniqueDeclarations,
+                  NumComponentLists, NumComponents);
+
+  Clause->setVarRefs(Vars);
+  Clause->setClauseInfo(Declarations, ComponentLists);
+  return Clause;
+}
+
+OMPToClause *OMPToClause::CreateEmpty(const ASTContext &C, unsigned NumVars,
+                                      unsigned NumUniqueDeclarations,
+                                      unsigned NumComponentLists,
+                                      unsigned NumComponents) {
+  void *Mem = C.Allocate(
+      totalSizeToAlloc<Expr *, ValueDecl *, unsigned,
+                       OMPClauseMappableExprCommon::MappableComponent>(
+          NumVars, NumUniqueDeclarations,
+          NumUniqueDeclarations + NumComponentLists, NumComponents));
+  return new (Mem) OMPToClause(NumVars, NumUniqueDeclarations,
+                               NumComponentLists, NumComponents);
+}
+
+OMPFromClause *
+OMPFromClause::Create(const ASTContext &C, SourceLocation StartLoc,
+                      SourceLocation LParenLoc, SourceLocation EndLoc,
+                      ArrayRef<Expr *> Vars, ArrayRef<ValueDecl *> Declarations,
+                      MappableExprComponentListsRef ComponentLists) {
+  unsigned NumVars = Vars.size();
+  unsigned NumUniqueDeclarations =
+      getUniqueDeclarationsTotalNumber(Declarations);
+  unsigned NumComponentLists = ComponentLists.size();
+  unsigned NumComponents = getComponentsTotalNumber(ComponentLists);
+
+  // We need to allocate:
+  // NumVars x Expr* - we have an original list expression for each clause list
+  // entry.
+  // NumUniqueDeclarations x ValueDecl* - unique base declarations associated
+  // with each component list.
+  // (NumUniqueDeclarations + NumComponentLists) x unsigned - we specify the
+  // number of lists for each unique declaration and the size of each component
+  // list.
+  // NumComponents x MappableComponent - the total of all the components in all
+  // the lists.
+  void *Mem = C.Allocate(
+      totalSizeToAlloc<Expr *, ValueDecl *, unsigned,
+                       OMPClauseMappableExprCommon::MappableComponent>(
+          NumVars, NumUniqueDeclarations,
+          NumUniqueDeclarations + NumComponentLists, NumComponents));
+
+  OMPFromClause *Clause = new (Mem)
+      OMPFromClause(StartLoc, LParenLoc, EndLoc, NumVars, NumUniqueDeclarations,
+                    NumComponentLists, NumComponents);
+
+  Clause->setVarRefs(Vars);
+  Clause->setClauseInfo(Declarations, ComponentLists);
+  return Clause;
+}
+
+OMPFromClause *OMPFromClause::CreateEmpty(const ASTContext &C, unsigned NumVars,
+                                          unsigned NumUniqueDeclarations,
+                                          unsigned NumComponentLists,
+                                          unsigned NumComponents) {
+  void *Mem = C.Allocate(
+      totalSizeToAlloc<Expr *, ValueDecl *, unsigned,
+                       OMPClauseMappableExprCommon::MappableComponent>(
+          NumVars, NumUniqueDeclarations,
+          NumUniqueDeclarations + NumComponentLists, NumComponents));
+  return new (Mem) OMPFromClause(NumVars, NumUniqueDeclarations,
+                                 NumComponentLists, NumComponents);
+}
+
+OMPUseDevicePtrClause *OMPUseDevicePtrClause::Create(const ASTContext &C,
+                                                     SourceLocation StartLoc,
+                                                     SourceLocation LParenLoc,
+                                                     SourceLocation EndLoc,
+                                                     ArrayRef<Expr *> VL) {
+  void *Mem = C.Allocate(totalSizeToAlloc<Expr *>(VL.size()));
+  OMPUseDevicePtrClause *Clause =
+      new (Mem) OMPUseDevicePtrClause(StartLoc, LParenLoc, EndLoc, VL.size());
+  Clause->setVarRefs(VL);
+  return Clause;
+}
+
+OMPUseDevicePtrClause *OMPUseDevicePtrClause::CreateEmpty(const ASTContext &C,
+                                                          unsigned N) {
+  void *Mem = C.Allocate(totalSizeToAlloc<Expr *>(N));
+  return new (Mem) OMPUseDevicePtrClause(N);
+}
+
+OMPIsDevicePtrClause *OMPIsDevicePtrClause::Create(const ASTContext &C,
+                                                   SourceLocation StartLoc,
+                                                   SourceLocation LParenLoc,
+                                                   SourceLocation EndLoc,
+                                                   ArrayRef<Expr *> VL) {
+  void *Mem = C.Allocate(totalSizeToAlloc<Expr *>(VL.size()));
+  OMPIsDevicePtrClause *Clause =
+      new (Mem) OMPIsDevicePtrClause(StartLoc, LParenLoc, EndLoc, VL.size());
+  Clause->setVarRefs(VL);
+  return Clause;
+}
+
+OMPIsDevicePtrClause *OMPIsDevicePtrClause::CreateEmpty(const ASTContext &C,
+                                                        unsigned N) {
   void *Mem = C.Allocate(totalSizeToAlloc<Expr *>(N));
-  return new (Mem) OMPMapClause(N);
+  return new (Mem) OMPIsDevicePtrClause(N);
 }
diff --git a/contrib/llvm/tools/clang/lib/AST/ParentMap.cpp b/contrib/llvm/tools/clang/lib/AST/ParentMap.cpp
index d7d5f9c69205..d8882c9030b2 100644
--- a/contrib/llvm/tools/clang/lib/AST/ParentMap.cpp
+++ b/contrib/llvm/tools/clang/lib/AST/ParentMap.cpp
@@ -28,6 +28,8 @@ enum OpaqueValueMode {
 
 static void BuildParentMap(MapTy& M, Stmt* S,
                            OpaqueValueMode OVMode = OV_Transparent) {
+  if (!S)
+    return;
 
   switch (S->getStmtClass()) {
   case Stmt::PseudoObjectExprClass: {
diff --git a/contrib/llvm/tools/clang/lib/AST/RecordLayout.cpp b/contrib/llvm/tools/clang/lib/AST/RecordLayout.cpp
index b2c244e3790e..299fd111bf6a 100644
--- a/contrib/llvm/tools/clang/lib/AST/RecordLayout.cpp
+++ b/contrib/llvm/tools/clang/lib/AST/RecordLayout.cpp
@@ -18,8 +18,6 @@
 using namespace clang;
 
 void ASTRecordLayout::Destroy(ASTContext &Ctx) {
-  if (FieldOffsets)
-    Ctx.Deallocate(FieldOffsets);
   if (CXXInfo) {
     CXXInfo->~CXXRecordLayoutInfo();
     Ctx.Deallocate(CXXInfo);
@@ -29,18 +27,13 @@ void ASTRecordLayout::Destroy(ASTContext &Ctx) {
 }
 
 ASTRecordLayout::ASTRecordLayout(const ASTContext &Ctx, CharUnits size,
-                                 CharUnits alignment, 
+                                 CharUnits alignment,
                                  CharUnits requiredAlignment,
                                  CharUnits datasize,
-                                 const uint64_t *fieldoffsets,
-                                 unsigned fieldcount)
-  : Size(size), DataSize(datasize), Alignment(alignment),
-    RequiredAlignment(requiredAlignment), FieldOffsets(nullptr),
-    FieldCount(fieldcount), CXXInfo(nullptr) {
-  if (FieldCount > 0)  {
-    FieldOffsets = new (Ctx) uint64_t[FieldCount];
-    memcpy(FieldOffsets, fieldoffsets, FieldCount * sizeof(*FieldOffsets));
-  }
+                                 ArrayRef<uint64_t> fieldoffsets)
+    : Size(size), DataSize(datasize), Alignment(alignment),
+      RequiredAlignment(requiredAlignment), CXXInfo(nullptr) {
+  FieldOffsets.append(Ctx, fieldoffsets.begin(), fieldoffsets.end());
 }
 
 // Constructor for C++ records.
@@ -50,26 +43,21 @@ ASTRecordLayout::ASTRecordLayout(const ASTContext &Ctx,
                                  bool hasOwnVFPtr, bool hasExtendableVFPtr,
                                  CharUnits vbptroffset,
                                  CharUnits datasize,
-                                 const uint64_t *fieldoffsets,
-                                 unsigned fieldcount,
+                                 ArrayRef<uint64_t> fieldoffsets,
                                  CharUnits nonvirtualsize,
                                  CharUnits nonvirtualalignment,
                                  CharUnits SizeOfLargestEmptySubobject,
                                  const CXXRecordDecl *PrimaryBase,
                                  bool IsPrimaryBaseVirtual,
                                  const CXXRecordDecl *BaseSharingVBPtr,
-                                 bool HasZeroSizedSubObject,
+                                 bool EndsWithZeroSizedObject,
                                  bool LeadsWithZeroSizedBase,
                                  const BaseOffsetsMapTy& BaseOffsets,
                                  const VBaseOffsetsMapTy& VBaseOffsets)
   : Size(size), DataSize(datasize), Alignment(alignment),
-    RequiredAlignment(requiredAlignment), FieldOffsets(nullptr),
-    FieldCount(fieldcount), CXXInfo(new (Ctx) CXXRecordLayoutInfo)
+    RequiredAlignment(requiredAlignment), CXXInfo(new (Ctx) CXXRecordLayoutInfo)
 {
-  if (FieldCount > 0)  {
-    FieldOffsets = new (Ctx) uint64_t[FieldCount];
-    memcpy(FieldOffsets, fieldoffsets, FieldCount * sizeof(*FieldOffsets));
-  }
+  FieldOffsets.append(Ctx, fieldoffsets.begin(), fieldoffsets.end());
 
   CXXInfo->PrimaryBase.setPointer(PrimaryBase);
   CXXInfo->PrimaryBase.setInt(IsPrimaryBaseVirtual);
@@ -82,7 +70,7 @@ ASTRecordLayout::ASTRecordLayout(const ASTContext &Ctx,
   CXXInfo->VBPtrOffset = vbptroffset;
   CXXInfo->HasExtendableVFPtr = hasExtendableVFPtr;
   CXXInfo->BaseSharingVBPtr = BaseSharingVBPtr;
-  CXXInfo->HasZeroSizedSubObject = HasZeroSizedSubObject;
+  CXXInfo->EndsWithZeroSizedObject = EndsWithZeroSizedObject;
   CXXInfo->LeadsWithZeroSizedBase = LeadsWithZeroSizedBase;
 
 
diff --git a/contrib/llvm/tools/clang/lib/AST/RecordLayoutBuilder.cpp b/contrib/llvm/tools/clang/lib/AST/RecordLayoutBuilder.cpp
index bc5ae0ffc469..cf981be0a4dd 100644
--- a/contrib/llvm/tools/clang/lib/AST/RecordLayoutBuilder.cpp
+++ b/contrib/llvm/tools/clang/lib/AST/RecordLayoutBuilder.cpp
@@ -973,7 +973,7 @@ void ItaniumRecordLayoutBuilder::EnsureVTablePointerAlignment(
   }
 
   // Round up the current record size to pointer alignment.
-  setSize(getSize().RoundUpToAlignment(BaseAlign));
+  setSize(getSize().alignTo(BaseAlign));
   setDataSize(getSize());
 
   // Update the alignment.
@@ -1194,7 +1194,7 @@ ItaniumRecordLayoutBuilder::LayoutBase(const BaseSubobjectInfo *Base) {
 
   if (!HasExternalLayout) {
     // Round up the current record size to the base's alignment boundary.
-    Offset = getDataSize().RoundUpToAlignment(BaseAlign);
+    Offset = getDataSize().alignTo(BaseAlign);
 
     // Try to place the base.
     while (!EmptySubobjects->CanPlaceBaseAtOffset(Base, Offset))
@@ -1204,7 +1204,7 @@ ItaniumRecordLayoutBuilder::LayoutBase(const BaseSubobjectInfo *Base) {
     (void)Allowed;
     assert(Allowed && "Base subobject externally placed at overlapping offset");
 
-    if (InferAlignment && Offset < getDataSize().RoundUpToAlignment(BaseAlign)){
+    if (InferAlignment && Offset < getDataSize().alignTo(BaseAlign)) {
       // The externally-supplied base offset is before the base offset we
       // computed. Assume that the structure is packed.
       Alignment = CharUnits::One();
@@ -1292,8 +1292,7 @@ void ItaniumRecordLayoutBuilder::Layout(const CXXRecordDecl *RD) {
   LayoutFields(RD);
 
   NonVirtualSize = Context.toCharUnitsFromBits(
-        llvm::RoundUpToAlignment(getSizeInBits(), 
-                                 Context.getTargetInfo().getCharAlign()));
+      llvm::alignTo(getSizeInBits(), Context.getTargetInfo().getCharAlign()));
   NonVirtualAlignment = Alignment;
 
   // Lay out the virtual bases and add the primary virtual base offsets.
@@ -1364,7 +1363,7 @@ static uint64_t
 roundUpSizeToCharAlignment(uint64_t Size,
                            const ASTContext &Context) {
   uint64_t CharAlignment = Context.getTargetInfo().getCharAlign();
-  return llvm::RoundUpToAlignment(Size, CharAlignment);
+  return llvm::alignTo(Size, CharAlignment);
 }
 
 void ItaniumRecordLayoutBuilder::LayoutWideBitField(uint64_t FieldSize,
@@ -1411,13 +1410,12 @@ void ItaniumRecordLayoutBuilder::LayoutWideBitField(uint64_t FieldSize,
   } else {
     // The bitfield is allocated starting at the next offset aligned 
     // appropriately for T', with length n bits.
-    FieldOffset = llvm::RoundUpToAlignment(getDataSizeInBits(), 
-                                           Context.toBits(TypeAlign));
+    FieldOffset = llvm::alignTo(getDataSizeInBits(), Context.toBits(TypeAlign));
 
     uint64_t NewSizeInBits = FieldOffset + FieldSize;
 
-    setDataSize(llvm::RoundUpToAlignment(NewSizeInBits, 
-                                         Context.getTargetInfo().getCharAlign()));
+    setDataSize(
+        llvm::alignTo(NewSizeInBits, Context.getTargetInfo().getCharAlign()));
     UnfilledBitsInLastUnit = getDataSizeInBits() - NewSizeInBits;
   }
 
@@ -1560,10 +1558,13 @@ void ItaniumRecordLayoutBuilder::LayoutBitField(const FieldDecl *D) {
 
   // But, if there's a #pragma pack in play, that takes precedent over
   // even the 'aligned' attribute, for non-zero-width bitfields.
+  unsigned MaxFieldAlignmentInBits = Context.toBits(MaxFieldAlignment);
   if (!MaxFieldAlignment.isZero() && FieldSize) {
-    unsigned MaxFieldAlignmentInBits = Context.toBits(MaxFieldAlignment);
-    FieldAlign = std::min(FieldAlign, MaxFieldAlignmentInBits);
     UnpackedFieldAlign = std::min(UnpackedFieldAlign, MaxFieldAlignmentInBits);
+    if (FieldPacked)
+      FieldAlign = UnpackedFieldAlign;
+    else
+      FieldAlign = std::min(FieldAlign, MaxFieldAlignmentInBits);
   }
 
   // But, ms_struct just ignores all of that in unions, even explicit
@@ -1587,9 +1588,9 @@ void ItaniumRecordLayoutBuilder::LayoutBitField(const FieldDecl *D) {
     // start a new storage unit), just do so, regardless of any other
     // other consideration.  Otherwise, round up to the right alignment.
     if (FieldSize == 0 || FieldSize > UnfilledBitsInLastUnit) {
-      FieldOffset = llvm::RoundUpToAlignment(FieldOffset, FieldAlign);
-      UnpackedFieldOffset = llvm::RoundUpToAlignment(UnpackedFieldOffset,
-                                                     UnpackedFieldAlign);
+      FieldOffset = llvm::alignTo(FieldOffset, FieldAlign);
+      UnpackedFieldOffset =
+          llvm::alignTo(UnpackedFieldOffset, UnpackedFieldAlign);
       UnfilledBitsInLastUnit = 0;
     }
 
@@ -1601,22 +1602,28 @@ void ItaniumRecordLayoutBuilder::LayoutBitField(const FieldDecl *D) {
     if (FieldSize == 0 || 
         (AllowPadding &&
          (FieldOffset & (FieldAlign-1)) + FieldSize > TypeSize)) {
-      FieldOffset = llvm::RoundUpToAlignment(FieldOffset, FieldAlign);
-    } else if (ExplicitFieldAlign) {
+      FieldOffset = llvm::alignTo(FieldOffset, FieldAlign);
+    } else if (ExplicitFieldAlign &&
+               (MaxFieldAlignmentInBits == 0 ||
+                ExplicitFieldAlign <= MaxFieldAlignmentInBits) &&
+               Context.getTargetInfo().useExplicitBitFieldAlignment()) {
       // TODO: figure it out what needs to be done on targets that don't honor
       // bit-field type alignment like ARM APCS ABI.
-      FieldOffset = llvm::RoundUpToAlignment(FieldOffset, ExplicitFieldAlign);
+      FieldOffset = llvm::alignTo(FieldOffset, ExplicitFieldAlign);
     }
 
     // Repeat the computation for diagnostic purposes.
     if (FieldSize == 0 ||
         (AllowPadding &&
          (UnpackedFieldOffset & (UnpackedFieldAlign-1)) + FieldSize > TypeSize))
-      UnpackedFieldOffset = llvm::RoundUpToAlignment(UnpackedFieldOffset,
-                                                     UnpackedFieldAlign);
-    else if (ExplicitFieldAlign)
-      UnpackedFieldOffset = llvm::RoundUpToAlignment(UnpackedFieldOffset,
-                                                     ExplicitFieldAlign);
+      UnpackedFieldOffset =
+          llvm::alignTo(UnpackedFieldOffset, UnpackedFieldAlign);
+    else if (ExplicitFieldAlign &&
+             (MaxFieldAlignmentInBits == 0 ||
+              ExplicitFieldAlign <= MaxFieldAlignmentInBits) &&
+             Context.getTargetInfo().useExplicitBitFieldAlignment())
+      UnpackedFieldOffset =
+          llvm::alignTo(UnpackedFieldOffset, ExplicitFieldAlign);
   }
 
   // If we're using external layout, give the external layout a chance
@@ -1677,7 +1684,7 @@ void ItaniumRecordLayoutBuilder::LayoutBitField(const FieldDecl *D) {
   } else {
     uint64_t NewSizeInBits = FieldOffset + FieldSize;
     uint64_t CharAlignment = Context.getTargetInfo().getCharAlign();
-    setDataSize(llvm::RoundUpToAlignment(NewSizeInBits, CharAlignment));
+    setDataSize(llvm::alignTo(NewSizeInBits, CharAlignment));
     UnfilledBitsInLastUnit = getDataSizeInBits() - NewSizeInBits;
 
     // The only time we can get here for an ms_struct is if this is a
@@ -1767,9 +1774,8 @@ void ItaniumRecordLayoutBuilder::LayoutField(const FieldDecl *D,
   }
 
   // Round up the current record size to the field's alignment boundary.
-  FieldOffset = FieldOffset.RoundUpToAlignment(FieldAlign);
-  UnpackedFieldOffset = 
-    UnpackedFieldOffset.RoundUpToAlignment(UnpackedFieldAlign);
+  FieldOffset = FieldOffset.alignTo(FieldAlign);
+  UnpackedFieldOffset = UnpackedFieldOffset.alignTo(UnpackedFieldAlign);
 
   if (UseExternalLayout) {
     FieldOffset = Context.toCharUnitsFromBits(
@@ -1840,11 +1846,10 @@ void ItaniumRecordLayoutBuilder::FinishLayout(const NamedDecl *D) {
   // record itself.
   uint64_t UnpaddedSize = getSizeInBits() - UnfilledBitsInLastUnit;
   uint64_t UnpackedSizeInBits =
-  llvm::RoundUpToAlignment(getSizeInBits(),
-                           Context.toBits(UnpackedAlignment));
+      llvm::alignTo(getSizeInBits(), Context.toBits(UnpackedAlignment));
   CharUnits UnpackedSize = Context.toCharUnitsFromBits(UnpackedSizeInBits);
-  uint64_t RoundedSize
-    = llvm::RoundUpToAlignment(getSizeInBits(), Context.toBits(Alignment));
+  uint64_t RoundedSize =
+      llvm::alignTo(getSizeInBits(), Context.toBits(Alignment));
 
   if (UseExternalLayout) {
     // If we're inferring alignment, and the external size is smaller than
@@ -2127,7 +2132,7 @@ static bool isMsLayout(const ASTContext &Context) {
 //   function pointer) and a vbptr (virtual base pointer).  They can each be
 //   shared with a, non-virtual bases. These bases need not be the same.  vfptrs
 //   always occur at offset 0.  vbptrs can occur at an arbitrary offset and are
-//   placed after the lexiographically last non-virtual base.  This placement
+//   placed after the lexicographically last non-virtual base.  This placement
 //   is always before fields but can be in the middle of the non-virtual bases
 //   due to the two-pass layout scheme for non-virtual-bases.
 // * Virtual bases sometimes require a 'vtordisp' field that is laid out before
@@ -2148,7 +2153,7 @@ static bool isMsLayout(const ASTContext &Context) {
 //   pushes all bases and fields back by the alignment imposed by those bases
 //   and fields.  This can potentially add a significant amount of padding.
 //   vbptrs are injected immediately after the last non-virtual base as
-//   lexiographically ordered in the code.  If this site isn't pointer aligned
+//   lexicographically ordered in the code.  If this site isn't pointer aligned
 //   the vbptr is placed at the next properly aligned location.  Enough padding
 //   is added to guarantee a fit.
 // * The last zero sized non-virtual base can be placed at the end of the
@@ -2223,7 +2228,8 @@ public:
   /// laid out.
   void initializeCXXLayout(const CXXRecordDecl *RD);
   void layoutNonVirtualBases(const CXXRecordDecl *RD);
-  void layoutNonVirtualBase(const CXXRecordDecl *BaseDecl,
+  void layoutNonVirtualBase(const CXXRecordDecl *RD,
+                            const CXXRecordDecl *BaseDecl,
                             const ASTRecordLayout &BaseLayout,
                             const ASTRecordLayout *&PreviousBaseLayout);
   void injectVFPtr(const CXXRecordDecl *RD);
@@ -2329,7 +2335,7 @@ MicrosoftRecordLayoutBuilder::getAdjustedElementInfo(
   if (!MaxFieldAlignment.isZero())
     Info.Alignment = std::min(Info.Alignment, MaxFieldAlignment);
   // Track zero-sized subobjects here where it's already available.
-  EndsWithZeroSizedObject = Layout.hasZeroSizedSubObject();
+  EndsWithZeroSizedObject = Layout.endsWithZeroSizedObject();
   // Respect required alignment, this is necessary because we may have adjusted
   // the alignment in the case of pragam pack.  Note that the required alignment
   // doesn't actually apply to the struct alignment at this point.
@@ -2364,7 +2370,7 @@ MicrosoftRecordLayoutBuilder::getAdjustedElementInfo(
     if (auto RT =
             FD->getType()->getBaseElementTypeUnsafe()->getAs<RecordType>()) {
       auto const &Layout = Context.getASTRecordLayout(RT->getDecl());
-      EndsWithZeroSizedObject = Layout.hasZeroSizedSubObject();
+      EndsWithZeroSizedObject = Layout.endsWithZeroSizedObject();
       FieldRequiredAlignment = std::max(FieldRequiredAlignment,
                                         Layout.getRequiredAlignment());
     }
@@ -2385,7 +2391,7 @@ void MicrosoftRecordLayoutBuilder::layout(const RecordDecl *RD) {
   MinEmptyStructSize = CharUnits::fromQuantity(4);
   initializeLayout(RD);
   layoutFields(RD);
-  DataSize = Size = Size.RoundUpToAlignment(Alignment);
+  DataSize = Size = Size.alignTo(Alignment);
   RequiredAlignment = std::max(
       RequiredAlignment, Context.toCharUnitsFromBits(RD->getMaxAlignment()));
   finalizeLayout(RD);
@@ -2405,7 +2411,7 @@ void MicrosoftRecordLayoutBuilder::cxxLayout(const CXXRecordDecl *RD) {
   auto RoundingAlignment = Alignment;
   if (!MaxFieldAlignment.isZero())
     RoundingAlignment = std::min(RoundingAlignment, MaxFieldAlignment);
-  NonVirtualSize = Size = Size.RoundUpToAlignment(RoundingAlignment);
+  NonVirtualSize = Size = Size.alignTo(RoundingAlignment);
   RequiredAlignment = std::max(
       RequiredAlignment, Context.toCharUnitsFromBits(RD->getMaxAlignment()));
   layoutVirtualBases(RD);
@@ -2471,7 +2477,7 @@ MicrosoftRecordLayoutBuilder::layoutNonVirtualBases(const CXXRecordDecl *RD) {
   // out any bases that do not contain vfptrs.  We implement this as two passes
   // over the bases.  This approach guarantees that the primary base is laid out
   // first.  We use these passes to calculate some additional aggregated
-  // information about the bases, such as reqruied alignment and the presence of
+  // information about the bases, such as required alignment and the presence of
   // zero sized members.
   const ASTRecordLayout *PreviousBaseLayout = nullptr;
   // Iterate through the bases and lay out the non-virtual ones.
@@ -2483,7 +2489,7 @@ MicrosoftRecordLayoutBuilder::layoutNonVirtualBases(const CXXRecordDecl *RD) {
       HasVBPtr = true;
       continue;
     }
-    // Check fo a base to share a VBPtr with.
+    // Check for a base to share a VBPtr with.
     if (!SharedVBPtrBase && BaseLayout.hasVBPtr()) {
       SharedVBPtrBase = BaseDecl;
       HasVBPtr = true;
@@ -2497,7 +2503,7 @@ MicrosoftRecordLayoutBuilder::layoutNonVirtualBases(const CXXRecordDecl *RD) {
       LeadsWithZeroSizedBase = BaseLayout.leadsWithZeroSizedBase();
     }
     // Lay out the base.
-    layoutNonVirtualBase(BaseDecl, BaseLayout, PreviousBaseLayout);
+    layoutNonVirtualBase(RD, BaseDecl, BaseLayout, PreviousBaseLayout);
   }
   // Figure out if we need a fresh VFPtr for this class.
   if (!PrimaryBase && RD->isDynamicClass())
@@ -2526,7 +2532,7 @@ MicrosoftRecordLayoutBuilder::layoutNonVirtualBases(const CXXRecordDecl *RD) {
       LeadsWithZeroSizedBase = BaseLayout.leadsWithZeroSizedBase();
     }
     // Lay out the base.
-    layoutNonVirtualBase(BaseDecl, BaseLayout, PreviousBaseLayout);
+    layoutNonVirtualBase(RD, BaseDecl, BaseLayout, PreviousBaseLayout);
     VBPtrOffset = Bases[BaseDecl] + BaseLayout.getNonVirtualSize();
   }
   // Set our VBPtroffset if we know it at this point.
@@ -2538,15 +2544,32 @@ MicrosoftRecordLayoutBuilder::layoutNonVirtualBases(const CXXRecordDecl *RD) {
   }
 }
 
+static bool recordUsesEBO(const RecordDecl *RD) {
+  if (!isa<CXXRecordDecl>(RD))
+    return false;
+  if (RD->hasAttr<EmptyBasesAttr>())
+    return true;
+  if (auto *LVA = RD->getAttr<LayoutVersionAttr>())
+    // TODO: Double check with the next version of MSVC.
+    if (LVA->getVersion() <= LangOptions::MSVC2015)
+      return false;
+  // TODO: Some later version of MSVC will change the default behavior of the
+  // compiler to enable EBO by default.  When this happens, we will need an
+  // additional isCompatibleWithMSVC check.
+  return false;
+}
+
 void MicrosoftRecordLayoutBuilder::layoutNonVirtualBase(
+    const CXXRecordDecl *RD,
     const CXXRecordDecl *BaseDecl,
     const ASTRecordLayout &BaseLayout,
     const ASTRecordLayout *&PreviousBaseLayout) {
   // Insert padding between two bases if the left first one is zero sized or
   // contains a zero sized subobject and the right is zero sized or one leads
   // with a zero sized base.
-  if (PreviousBaseLayout && PreviousBaseLayout->hasZeroSizedSubObject() &&
-      BaseLayout.leadsWithZeroSizedBase())
+  bool MDCUsesEBO = recordUsesEBO(RD);
+  if (PreviousBaseLayout && PreviousBaseLayout->endsWithZeroSizedObject() &&
+      BaseLayout.leadsWithZeroSizedBase() && !MDCUsesEBO)
     Size++;
   ElementInfo Info = getAdjustedElementInfo(BaseLayout);
   CharUnits BaseOffset;
@@ -2555,14 +2578,23 @@ void MicrosoftRecordLayoutBuilder::layoutNonVirtualBase(
   bool FoundBase = false;
   if (UseExternalLayout) {
     FoundBase = External.getExternalNVBaseOffset(BaseDecl, BaseOffset);
-    if (FoundBase)
+    if (FoundBase) {
       assert(BaseOffset >= Size && "base offset already allocated");
+      Size = BaseOffset;
+    }
   }
 
-  if (!FoundBase)
-    BaseOffset = Size.RoundUpToAlignment(Info.Alignment);
+  if (!FoundBase) {
+    if (MDCUsesEBO && BaseDecl->isEmpty() &&
+        BaseLayout.getNonVirtualSize() == CharUnits::Zero()) {
+      BaseOffset = CharUnits::Zero();
+    } else {
+      // Otherwise, lay the base out at the end of the MDC.
+      BaseOffset = Size = Size.alignTo(Info.Alignment);
+    }
+  }
   Bases.insert(std::make_pair(BaseDecl, BaseOffset));
-  Size = BaseOffset + BaseLayout.getNonVirtualSize();
+  Size += BaseLayout.getNonVirtualSize();
   PreviousBaseLayout = &BaseLayout;
 }
 
@@ -2590,7 +2622,7 @@ void MicrosoftRecordLayoutBuilder::layoutField(const FieldDecl *FD) {
           Context.toCharUnitsFromBits(External.getExternalFieldOffset(FD));
       assert(FieldOffset >= Size && "field offset already allocated");
     } else {
-      FieldOffset = Size.RoundUpToAlignment(Info.Alignment);
+      FieldOffset = Size.alignTo(Info.Alignment);
     }
     placeFieldAtOffset(FieldOffset);
     Size = FieldOffset + Info.Size;
@@ -2625,7 +2657,7 @@ void MicrosoftRecordLayoutBuilder::layoutBitField(const FieldDecl *FD) {
     // TODO: Add a Sema warning that MS ignores bitfield alignment in unions.
   } else {
     // Allocate a new block of memory and place the bitfield in it.
-    CharUnits FieldOffset = Size.RoundUpToAlignment(Info.Alignment);
+    CharUnits FieldOffset = Size.alignTo(Info.Alignment);
     placeFieldAtOffset(FieldOffset);
     Size = FieldOffset + Info.Size;
     Alignment = std::max(Alignment, Info.Alignment);
@@ -2651,7 +2683,7 @@ MicrosoftRecordLayoutBuilder::layoutZeroWidthBitField(const FieldDecl *FD) {
     // TODO: Add a Sema warning that MS ignores bitfield alignment in unions.
   } else {
     // Round up the current record size to the field's alignment boundary.
-    CharUnits FieldOffset = Size.RoundUpToAlignment(Info.Alignment);
+    CharUnits FieldOffset = Size.alignTo(Info.Alignment);
     placeFieldAtOffset(FieldOffset);
     Size = FieldOffset;
     Alignment = std::max(Alignment, Info.Alignment);
@@ -2664,7 +2696,7 @@ void MicrosoftRecordLayoutBuilder::injectVBPtr(const CXXRecordDecl *RD) {
   // Inject the VBPointer at the injection site.
   CharUnits InjectionSite = VBPtrOffset;
   // But before we do, make sure it's properly aligned.
-  VBPtrOffset = VBPtrOffset.RoundUpToAlignment(PointerInfo.Alignment);
+  VBPtrOffset = VBPtrOffset.alignTo(PointerInfo.Alignment);
   // Shift everything after the vbptr down, unless we're using an external
   // layout.
   if (UseExternalLayout)
@@ -2673,8 +2705,8 @@ void MicrosoftRecordLayoutBuilder::injectVBPtr(const CXXRecordDecl *RD) {
   CharUnits FieldStart = VBPtrOffset + PointerInfo.Size;
   // Make sure that the amount we push the fields back by is a multiple of the
   // alignment.
-  CharUnits Offset = (FieldStart - InjectionSite).RoundUpToAlignment(
-      std::max(RequiredAlignment, Alignment));
+  CharUnits Offset = (FieldStart - InjectionSite)
+                         .alignTo(std::max(RequiredAlignment, Alignment));
   Size += Offset;
   for (uint64_t &FieldOffset : FieldOffsets)
     FieldOffset += Context.toBits(Offset);
@@ -2688,8 +2720,8 @@ void MicrosoftRecordLayoutBuilder::injectVFPtr(const CXXRecordDecl *RD) {
     return;
   // Make sure that the amount we push the struct back by is a multiple of the
   // alignment.
-  CharUnits Offset = PointerInfo.Size.RoundUpToAlignment(
-      std::max(RequiredAlignment, Alignment));
+  CharUnits Offset =
+      PointerInfo.Size.alignTo(std::max(RequiredAlignment, Alignment));
   // Push back the vbptr, but increase the size of the object and push back
   // regular fields by the offset only if not using external record layout.
   if (HasVBPtr)
@@ -2741,9 +2773,10 @@ void MicrosoftRecordLayoutBuilder::layoutVirtualBases(const CXXRecordDecl *RD) {
     // with a zero sized base.  The padding between virtual bases is 4
     // bytes (in both 32 and 64 bits modes) and always involves rounding up to
     // the required alignment, we don't know why.
-    if ((PreviousBaseLayout && PreviousBaseLayout->hasZeroSizedSubObject() &&
-        BaseLayout.leadsWithZeroSizedBase()) || HasVtordisp) {
-      Size = Size.RoundUpToAlignment(VtorDispAlignment) + VtorDispSize;
+    if ((PreviousBaseLayout && PreviousBaseLayout->endsWithZeroSizedObject() &&
+         BaseLayout.leadsWithZeroSizedBase() && !recordUsesEBO(RD)) ||
+        HasVtordisp) {
+      Size = Size.alignTo(VtorDispAlignment) + VtorDispSize;
       Alignment = std::max(VtorDispAlignment, Alignment);
     }
     // Insert the virtual base.
@@ -2758,7 +2791,7 @@ void MicrosoftRecordLayoutBuilder::layoutVirtualBases(const CXXRecordDecl *RD) {
         assert(BaseOffset >= Size && "base offset already allocated");
     }
     if (!FoundBase)
-      BaseOffset = Size.RoundUpToAlignment(Info.Alignment);
+      BaseOffset = Size.alignTo(Info.Alignment);
 
     VBases.insert(std::make_pair(BaseDecl,
         ASTRecordLayout::VBaseInfo(BaseOffset, HasVtordisp)));
@@ -2777,11 +2810,13 @@ void MicrosoftRecordLayoutBuilder::finalizeLayout(const RecordDecl *RD) {
     if (!MaxFieldAlignment.isZero())
       RoundingAlignment = std::min(RoundingAlignment, MaxFieldAlignment);
     RoundingAlignment = std::max(RoundingAlignment, RequiredAlignment);
-    Size = Size.RoundUpToAlignment(RoundingAlignment);
+    Size = Size.alignTo(RoundingAlignment);
   }
   if (Size.isZero()) {
-    EndsWithZeroSizedObject = true;
-    LeadsWithZeroSizedBase = true;
+    if (!recordUsesEBO(RD) || !cast<CXXRecordDecl>(RD)->isEmpty()) {
+      EndsWithZeroSizedObject = true;
+      LeadsWithZeroSizedBase = true;
+    }
     // Zero-sized structures have size equal to their alignment if a
     // __declspec(align) came into play.
     if (RequiredAlignment >= MinEmptyStructSize)
@@ -2914,8 +2949,7 @@ ASTContext::getASTRecordLayout(const RecordDecl *D) const {
       NewEntry = new (*this) ASTRecordLayout(
           *this, Builder.Size, Builder.Alignment, Builder.RequiredAlignment,
           Builder.HasOwnVFPtr, Builder.HasOwnVFPtr || Builder.PrimaryBase,
-          Builder.VBPtrOffset, Builder.NonVirtualSize,
-          Builder.FieldOffsets.data(), Builder.FieldOffsets.size(),
+          Builder.VBPtrOffset, Builder.DataSize, Builder.FieldOffsets,
           Builder.NonVirtualSize, Builder.Alignment, CharUnits::Zero(),
           Builder.PrimaryBase, false, Builder.SharedVBPtrBase,
           Builder.EndsWithZeroSizedObject, Builder.LeadsWithZeroSizedBase,
@@ -2924,8 +2958,7 @@ ASTContext::getASTRecordLayout(const RecordDecl *D) const {
       Builder.layout(D);
       NewEntry = new (*this) ASTRecordLayout(
           *this, Builder.Size, Builder.Alignment, Builder.RequiredAlignment,
-          Builder.Size, Builder.FieldOffsets.data(),
-          Builder.FieldOffsets.size());
+          Builder.Size, Builder.FieldOffsets);
     }
   } else {
     if (const auto *RD = dyn_cast<CXXRecordDecl>(D)) {
@@ -2948,9 +2981,8 @@ ASTContext::getASTRecordLayout(const RecordDecl *D) const {
           *this, Builder.getSize(), Builder.Alignment,
           /*RequiredAlignment : used by MS-ABI)*/
           Builder.Alignment, Builder.HasOwnVFPtr, RD->isDynamicClass(),
-          CharUnits::fromQuantity(-1), DataSize, Builder.FieldOffsets.data(),
-          Builder.FieldOffsets.size(), NonVirtualSize,
-          Builder.NonVirtualAlignment,
+          CharUnits::fromQuantity(-1), DataSize, Builder.FieldOffsets,
+          NonVirtualSize, Builder.NonVirtualAlignment,
           EmptySubobjects.SizeOfLargestEmptySubobject, Builder.PrimaryBase,
           Builder.PrimaryBaseIsVirtual, nullptr, false, false, Builder.Bases,
           Builder.VBases);
@@ -2961,8 +2993,7 @@ ASTContext::getASTRecordLayout(const RecordDecl *D) const {
       NewEntry = new (*this) ASTRecordLayout(
           *this, Builder.getSize(), Builder.Alignment,
           /*RequiredAlignment : used by MS-ABI)*/
-          Builder.Alignment, Builder.getSize(), Builder.FieldOffsets.data(),
-          Builder.FieldOffsets.size());
+          Builder.Alignment, Builder.getSize(), Builder.FieldOffsets);
     }
   }
 
@@ -3065,7 +3096,7 @@ ASTContext::getObjCLayout(const ObjCInterfaceDecl *D,
   // Add in synthesized ivar count if laying out an implementation.
   if (Impl) {
     unsigned SynthCount = CountNonClassIvars(D);
-    // If there aren't any sythesized ivars then reuse the interface
+    // If there aren't any synthesized ivars then reuse the interface
     // entry. Note we can't cache this because we simply free all
     // entries later; however we shouldn't look up implementations
     // frequently.
@@ -3077,13 +3108,12 @@ ASTContext::getObjCLayout(const ObjCInterfaceDecl *D,
   Builder.Layout(D);
 
   const ASTRecordLayout *NewEntry =
-    new (*this) ASTRecordLayout(*this, Builder.getSize(), 
+    new (*this) ASTRecordLayout(*this, Builder.getSize(),
                                 Builder.Alignment,
                                 /*RequiredAlignment : used by MS-ABI)*/
                                 Builder.Alignment,
                                 Builder.getDataSize(),
-                                Builder.FieldOffsets.data(),
-                                Builder.FieldOffsets.size());
+                                Builder.FieldOffsets);
 
   ObjCLayouts[Key] = NewEntry;
 
diff --git a/contrib/llvm/tools/clang/lib/AST/Stmt.cpp b/contrib/llvm/tools/clang/lib/AST/Stmt.cpp
index ca63d8486d82..75c076399511 100644
--- a/contrib/llvm/tools/clang/lib/AST/Stmt.cpp
+++ b/contrib/llvm/tools/clang/lib/AST/Stmt.cpp
@@ -503,6 +503,9 @@ unsigned GCCAsmStmt::AnalyzeAsmString(SmallVectorImpl<AsmStringPiece>&Pieces,
 
   bool HasVariants = !C.getTargetInfo().hasNoAsmVariants();
 
+  unsigned LastAsmStringToken = 0;
+  unsigned LastAsmStringOffset = 0;
+
   while (1) {
     // Done with the string?
     if (CurPtr == StrEnd) {
@@ -589,10 +592,12 @@ unsigned GCCAsmStmt::AnalyzeAsmString(SmallVectorImpl<AsmStringPiece>&Pieces,
 
       // (BeginLoc, EndLoc) represents the range of the operand we are currently
       // processing. Unlike Str, the range includes the leading '%'.
-      SourceLocation BeginLoc =
-          getAsmString()->getLocationOfByte(Percent - StrStart, SM, LO, TI);
-      SourceLocation EndLoc =
-          getAsmString()->getLocationOfByte(CurPtr - StrStart, SM, LO, TI);
+      SourceLocation BeginLoc = getAsmString()->getLocationOfByte(
+          Percent - StrStart, SM, LO, TI, &LastAsmStringToken,
+          &LastAsmStringOffset);
+      SourceLocation EndLoc = getAsmString()->getLocationOfByte(
+          CurPtr - StrStart, SM, LO, TI, &LastAsmStringToken,
+          &LastAsmStringOffset);
 
       Pieces.emplace_back(N, std::move(Str), BeginLoc, EndLoc);
       continue;
@@ -623,10 +628,12 @@ unsigned GCCAsmStmt::AnalyzeAsmString(SmallVectorImpl<AsmStringPiece>&Pieces,
 
       // (BeginLoc, EndLoc) represents the range of the operand we are currently
       // processing. Unlike Str, the range includes the leading '%'.
-      SourceLocation BeginLoc =
-          getAsmString()->getLocationOfByte(Percent - StrStart, SM, LO, TI);
-      SourceLocation EndLoc =
-          getAsmString()->getLocationOfByte(NameEnd + 1 - StrStart, SM, LO, TI);
+      SourceLocation BeginLoc = getAsmString()->getLocationOfByte(
+          Percent - StrStart, SM, LO, TI, &LastAsmStringToken,
+          &LastAsmStringOffset);
+      SourceLocation EndLoc = getAsmString()->getLocationOfByte(
+          NameEnd + 1 - StrStart, SM, LO, TI, &LastAsmStringToken,
+          &LastAsmStringOffset);
 
       Pieces.emplace_back(N, std::move(Str), BeginLoc, EndLoc);
 
@@ -756,11 +763,13 @@ void MSAsmStmt::initialize(const ASTContext &C, StringRef asmstr,
                  });
 }
 
-IfStmt::IfStmt(const ASTContext &C, SourceLocation IL, VarDecl *var, Expr *cond,
-               Stmt *then, SourceLocation EL, Stmt *elsev)
-  : Stmt(IfStmtClass), IfLoc(IL), ElseLoc(EL)
-{
+IfStmt::IfStmt(const ASTContext &C, SourceLocation IL, bool IsConstexpr,
+               Stmt *init, VarDecl *var, Expr *cond, Stmt *then,
+               SourceLocation EL, Stmt *elsev)
+    : Stmt(IfStmtClass), IfLoc(IL), ElseLoc(EL) {
+  setConstexpr(IsConstexpr);
   setConditionVariable(C, var);
+  SubExprs[INIT] = init;
   SubExprs[COND] = cond;
   SubExprs[THEN] = then;
   SubExprs[ELSE] = elsev;
@@ -816,9 +825,11 @@ void ForStmt::setConditionVariable(const ASTContext &C, VarDecl *V) {
                                        VarRange.getEnd());
 }
 
-SwitchStmt::SwitchStmt(const ASTContext &C, VarDecl *Var, Expr *cond)
+SwitchStmt::SwitchStmt(const ASTContext &C, Stmt *init, VarDecl *Var,
+                       Expr *cond)
     : Stmt(SwitchStmtClass), FirstCase(nullptr, false) {
   setConditionVariable(C, Var);
+  SubExprs[INIT] = init;
   SubExprs[COND] = cond;
   SubExprs[BODY] = nullptr;
 }
@@ -987,8 +998,7 @@ CapturedStmt::Capture *CapturedStmt::getStoredCaptures() const {
   unsigned Size = sizeof(CapturedStmt) + sizeof(Stmt *) * (NumCaptures + 1);
 
   // Offset of the first Capture object.
-  unsigned FirstCaptureOffset =
-    llvm::RoundUpToAlignment(Size, llvm::alignOf<Capture>());
+  unsigned FirstCaptureOffset = llvm::alignTo(Size, llvm::alignOf<Capture>());
 
   return reinterpret_cast<Capture *>(
       reinterpret_cast<char *>(const_cast<CapturedStmt *>(this))
@@ -1045,7 +1055,7 @@ CapturedStmt *CapturedStmt::Create(const ASTContext &Context, Stmt *S,
   unsigned Size = sizeof(CapturedStmt) + sizeof(Stmt *) * (Captures.size() + 1);
   if (!Captures.empty()) {
     // Realign for the following Capture array.
-    Size = llvm::RoundUpToAlignment(Size, llvm::alignOf<Capture>());
+    Size = llvm::alignTo(Size, llvm::alignOf<Capture>());
     Size += sizeof(Capture) * Captures.size();
   }
 
@@ -1058,7 +1068,7 @@ CapturedStmt *CapturedStmt::CreateDeserialized(const ASTContext &Context,
   unsigned Size = sizeof(CapturedStmt) + sizeof(Stmt *) * (NumCaptures + 1);
   if (NumCaptures > 0) {
     // Realign for the following Capture array.
-    Size = llvm::RoundUpToAlignment(Size, llvm::alignOf<Capture>());
+    Size = llvm::alignTo(Size, llvm::alignOf<Capture>());
     Size += sizeof(Capture) * NumCaptures;
   }
 
diff --git a/contrib/llvm/tools/clang/lib/AST/StmtCXX.cpp b/contrib/llvm/tools/clang/lib/AST/StmtCXX.cpp
index e39a01daf96c..4692db84b505 100644
--- a/contrib/llvm/tools/clang/lib/AST/StmtCXX.cpp
+++ b/contrib/llvm/tools/clang/lib/AST/StmtCXX.cpp
@@ -49,7 +49,8 @@ CXXTryStmt::CXXTryStmt(SourceLocation tryLoc, Stmt *tryBlock,
   std::copy(handlers.begin(), handlers.end(), Stmts + 1);
 }
 
-CXXForRangeStmt::CXXForRangeStmt(DeclStmt *Range, DeclStmt *BeginEndStmt,
+CXXForRangeStmt::CXXForRangeStmt(DeclStmt *Range,
+                                 DeclStmt *BeginStmt, DeclStmt *EndStmt,
                                  Expr *Cond, Expr *Inc, DeclStmt *LoopVar,
                                  Stmt *Body, SourceLocation FL,
                                  SourceLocation CAL, SourceLocation CL,
@@ -57,7 +58,8 @@ CXXForRangeStmt::CXXForRangeStmt(DeclStmt *Range, DeclStmt *BeginEndStmt,
     : Stmt(CXXForRangeStmtClass), ForLoc(FL), CoawaitLoc(CAL), ColonLoc(CL),
       RParenLoc(RPL) {
   SubExprs[RANGE] = Range;
-  SubExprs[BEGINEND] = BeginEndStmt;
+  SubExprs[BEGINSTMT] = BeginStmt;
+  SubExprs[ENDSTMT] = EndStmt;
   SubExprs[COND] = Cond;
   SubExprs[INC] = Inc;
   SubExprs[LOOPVAR] = LoopVar;
diff --git a/contrib/llvm/tools/clang/lib/AST/StmtOpenMP.cpp b/contrib/llvm/tools/clang/lib/AST/StmtOpenMP.cpp
index 7f923d8a8251..f1ddedb2b0f2 100644
--- a/contrib/llvm/tools/clang/lib/AST/StmtOpenMP.cpp
+++ b/contrib/llvm/tools/clang/lib/AST/StmtOpenMP.cpp
@@ -57,8 +57,8 @@ void OMPLoopDirective::setFinals(ArrayRef<Expr *> A) {
 OMPParallelDirective *OMPParallelDirective::Create(
     const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
     ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt, bool HasCancel) {
-  unsigned Size = llvm::RoundUpToAlignment(sizeof(OMPParallelDirective),
-                                           llvm::alignOf<OMPClause *>());
+  unsigned Size =
+      llvm::alignTo(sizeof(OMPParallelDirective), llvm::alignOf<OMPClause *>());
   void *Mem =
       C.Allocate(Size + sizeof(OMPClause *) * Clauses.size() + sizeof(Stmt *));
   OMPParallelDirective *Dir =
@@ -72,8 +72,8 @@ OMPParallelDirective *OMPParallelDirective::Create(
 OMPParallelDirective *OMPParallelDirective::CreateEmpty(const ASTContext &C,
                                                         unsigned NumClauses,
                                                         EmptyShell) {
-  unsigned Size = llvm::RoundUpToAlignment(sizeof(OMPParallelDirective),
-                                           llvm::alignOf<OMPClause *>());
+  unsigned Size =
+      llvm::alignTo(sizeof(OMPParallelDirective), llvm::alignOf<OMPClause *>());
   void *Mem =
       C.Allocate(Size + sizeof(OMPClause *) * NumClauses + sizeof(Stmt *));
   return new (Mem) OMPParallelDirective(NumClauses);
@@ -84,8 +84,8 @@ OMPSimdDirective::Create(const ASTContext &C, SourceLocation StartLoc,
                          SourceLocation EndLoc, unsigned CollapsedNum,
                          ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt,
                          const HelperExprs &Exprs) {
-  unsigned Size = llvm::RoundUpToAlignment(sizeof(OMPSimdDirective),
-                                           llvm::alignOf<OMPClause *>());
+  unsigned Size =
+      llvm::alignTo(sizeof(OMPSimdDirective), llvm::alignOf<OMPClause *>());
   void *Mem =
       C.Allocate(Size + sizeof(OMPClause *) * Clauses.size() +
                  sizeof(Stmt *) * numLoopChildren(CollapsedNum, OMPD_simd));
@@ -105,6 +105,7 @@ OMPSimdDirective::Create(const ASTContext &C, SourceLocation StartLoc,
   Dir->setInits(Exprs.Inits);
   Dir->setUpdates(Exprs.Updates);
   Dir->setFinals(Exprs.Finals);
+  Dir->setPreInits(Exprs.PreInits);
   return Dir;
 }
 
@@ -112,8 +113,8 @@ OMPSimdDirective *OMPSimdDirective::CreateEmpty(const ASTContext &C,
                                                 unsigned NumClauses,
                                                 unsigned CollapsedNum,
                                                 EmptyShell) {
-  unsigned Size = llvm::RoundUpToAlignment(sizeof(OMPSimdDirective),
-                                           llvm::alignOf<OMPClause *>());
+  unsigned Size =
+      llvm::alignTo(sizeof(OMPSimdDirective), llvm::alignOf<OMPClause *>());
   void *Mem =
       C.Allocate(Size + sizeof(OMPClause *) * NumClauses +
                  sizeof(Stmt *) * numLoopChildren(CollapsedNum, OMPD_simd));
@@ -125,8 +126,8 @@ OMPForDirective::Create(const ASTContext &C, SourceLocation StartLoc,
                         SourceLocation EndLoc, unsigned CollapsedNum,
                         ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt,
                         const HelperExprs &Exprs, bool HasCancel) {
-  unsigned Size = llvm::RoundUpToAlignment(sizeof(OMPForDirective),
-                                           llvm::alignOf<OMPClause *>());
+  unsigned Size =
+      llvm::alignTo(sizeof(OMPForDirective), llvm::alignOf<OMPClause *>());
   void *Mem =
       C.Allocate(Size + sizeof(OMPClause *) * Clauses.size() +
                  sizeof(Stmt *) * numLoopChildren(CollapsedNum, OMPD_for));
@@ -148,11 +149,15 @@ OMPForDirective::Create(const ASTContext &C, SourceLocation StartLoc,
   Dir->setEnsureUpperBound(Exprs.EUB);
   Dir->setNextLowerBound(Exprs.NLB);
   Dir->setNextUpperBound(Exprs.NUB);
+  Dir->setNumIterations(Exprs.NumIterations);
+  Dir->setPrevLowerBoundVariable(Exprs.PrevLB);
+  Dir->setPrevUpperBoundVariable(Exprs.PrevUB);
   Dir->setCounters(Exprs.Counters);
   Dir->setPrivateCounters(Exprs.PrivateCounters);
   Dir->setInits(Exprs.Inits);
   Dir->setUpdates(Exprs.Updates);
   Dir->setFinals(Exprs.Finals);
+  Dir->setPreInits(Exprs.PreInits);
   Dir->setHasCancel(HasCancel);
   return Dir;
 }
@@ -161,8 +166,8 @@ OMPForDirective *OMPForDirective::CreateEmpty(const ASTContext &C,
                                               unsigned NumClauses,
                                               unsigned CollapsedNum,
                                               EmptyShell) {
-  unsigned Size = llvm::RoundUpToAlignment(sizeof(OMPForDirective),
-                                           llvm::alignOf<OMPClause *>());
+  unsigned Size =
+      llvm::alignTo(sizeof(OMPForDirective), llvm::alignOf<OMPClause *>());
   void *Mem =
       C.Allocate(Size + sizeof(OMPClause *) * NumClauses +
                  sizeof(Stmt *) * numLoopChildren(CollapsedNum, OMPD_for));
@@ -174,8 +179,8 @@ OMPForSimdDirective::Create(const ASTContext &C, SourceLocation StartLoc,
                             SourceLocation EndLoc, unsigned CollapsedNum,
                             ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt,
                             const HelperExprs &Exprs) {
-  unsigned Size = llvm::RoundUpToAlignment(sizeof(OMPForSimdDirective),
-                                           llvm::alignOf<OMPClause *>());
+  unsigned Size =
+      llvm::alignTo(sizeof(OMPForSimdDirective), llvm::alignOf<OMPClause *>());
   void *Mem =
       C.Allocate(Size + sizeof(OMPClause *) * Clauses.size() +
                  sizeof(Stmt *) * numLoopChildren(CollapsedNum, OMPD_for_simd));
@@ -197,11 +202,15 @@ OMPForSimdDirective::Create(const ASTContext &C, SourceLocation StartLoc,
   Dir->setEnsureUpperBound(Exprs.EUB);
   Dir->setNextLowerBound(Exprs.NLB);
   Dir->setNextUpperBound(Exprs.NUB);
+  Dir->setNumIterations(Exprs.NumIterations);
+  Dir->setPrevLowerBoundVariable(Exprs.PrevLB);
+  Dir->setPrevUpperBoundVariable(Exprs.PrevUB);
   Dir->setCounters(Exprs.Counters);
   Dir->setPrivateCounters(Exprs.PrivateCounters);
   Dir->setInits(Exprs.Inits);
   Dir->setUpdates(Exprs.Updates);
   Dir->setFinals(Exprs.Finals);
+  Dir->setPreInits(Exprs.PreInits);
   return Dir;
 }
 
@@ -209,8 +218,8 @@ OMPForSimdDirective *OMPForSimdDirective::CreateEmpty(const ASTContext &C,
                                                       unsigned NumClauses,
                                                       unsigned CollapsedNum,
                                                       EmptyShell) {
-  unsigned Size = llvm::RoundUpToAlignment(sizeof(OMPForSimdDirective),
-                                           llvm::alignOf<OMPClause *>());
+  unsigned Size =
+      llvm::alignTo(sizeof(OMPForSimdDirective), llvm::alignOf<OMPClause *>());
   void *Mem =
       C.Allocate(Size + sizeof(OMPClause *) * NumClauses +
                  sizeof(Stmt *) * numLoopChildren(CollapsedNum, OMPD_for_simd));
@@ -220,8 +229,8 @@ OMPForSimdDirective *OMPForSimdDirective::CreateEmpty(const ASTContext &C,
 OMPSectionsDirective *OMPSectionsDirective::Create(
     const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
     ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt, bool HasCancel) {
-  unsigned Size = llvm::RoundUpToAlignment(sizeof(OMPSectionsDirective),
-                                           llvm::alignOf<OMPClause *>());
+  unsigned Size =
+      llvm::alignTo(sizeof(OMPSectionsDirective), llvm::alignOf<OMPClause *>());
   void *Mem =
       C.Allocate(Size + sizeof(OMPClause *) * Clauses.size() + sizeof(Stmt *));
   OMPSectionsDirective *Dir =
@@ -235,8 +244,8 @@ OMPSectionsDirective *OMPSectionsDirective::Create(
 OMPSectionsDirective *OMPSectionsDirective::CreateEmpty(const ASTContext &C,
                                                         unsigned NumClauses,
                                                         EmptyShell) {
-  unsigned Size = llvm::RoundUpToAlignment(sizeof(OMPSectionsDirective),
-                                           llvm::alignOf<OMPClause *>());
+  unsigned Size =
+      llvm::alignTo(sizeof(OMPSectionsDirective), llvm::alignOf<OMPClause *>());
   void *Mem =
       C.Allocate(Size + sizeof(OMPClause *) * NumClauses + sizeof(Stmt *));
   return new (Mem) OMPSectionsDirective(NumClauses);
@@ -247,8 +256,8 @@ OMPSectionDirective *OMPSectionDirective::Create(const ASTContext &C,
                                                  SourceLocation EndLoc,
                                                  Stmt *AssociatedStmt,
                                                  bool HasCancel) {
-  unsigned Size = llvm::RoundUpToAlignment(sizeof(OMPSectionDirective),
-                                           llvm::alignOf<Stmt *>());
+  unsigned Size =
+      llvm::alignTo(sizeof(OMPSectionDirective), llvm::alignOf<Stmt *>());
   void *Mem = C.Allocate(Size + sizeof(Stmt *));
   OMPSectionDirective *Dir = new (Mem) OMPSectionDirective(StartLoc, EndLoc);
   Dir->setAssociatedStmt(AssociatedStmt);
@@ -258,8 +267,8 @@ OMPSectionDirective *OMPSectionDirective::Create(const ASTContext &C,
 
 OMPSectionDirective *OMPSectionDirective::CreateEmpty(const ASTContext &C,
                                                       EmptyShell) {
-  unsigned Size = llvm::RoundUpToAlignment(sizeof(OMPSectionDirective),
-                                           llvm::alignOf<Stmt *>());
+  unsigned Size =
+      llvm::alignTo(sizeof(OMPSectionDirective), llvm::alignOf<Stmt *>());
   void *Mem = C.Allocate(Size + sizeof(Stmt *));
   return new (Mem) OMPSectionDirective();
 }
@@ -269,8 +278,8 @@ OMPSingleDirective *OMPSingleDirective::Create(const ASTContext &C,
                                                SourceLocation EndLoc,
                                                ArrayRef<OMPClause *> Clauses,
                                                Stmt *AssociatedStmt) {
-  unsigned Size = llvm::RoundUpToAlignment(sizeof(OMPSingleDirective),
-                                           llvm::alignOf<OMPClause *>());
+  unsigned Size =
+      llvm::alignTo(sizeof(OMPSingleDirective), llvm::alignOf<OMPClause *>());
   void *Mem =
       C.Allocate(Size + sizeof(OMPClause *) * Clauses.size() + sizeof(Stmt *));
   OMPSingleDirective *Dir =
@@ -283,8 +292,8 @@ OMPSingleDirective *OMPSingleDirective::Create(const ASTContext &C,
 OMPSingleDirective *OMPSingleDirective::CreateEmpty(const ASTContext &C,
                                                     unsigned NumClauses,
                                                     EmptyShell) {
-  unsigned Size = llvm::RoundUpToAlignment(sizeof(OMPSingleDirective),
-                                           llvm::alignOf<OMPClause *>());
+  unsigned Size =
+      llvm::alignTo(sizeof(OMPSingleDirective), llvm::alignOf<OMPClause *>());
   void *Mem =
       C.Allocate(Size + sizeof(OMPClause *) * NumClauses + sizeof(Stmt *));
   return new (Mem) OMPSingleDirective(NumClauses);
@@ -294,8 +303,8 @@ OMPMasterDirective *OMPMasterDirective::Create(const ASTContext &C,
                                                SourceLocation StartLoc,
                                                SourceLocation EndLoc,
                                                Stmt *AssociatedStmt) {
-  unsigned Size = llvm::RoundUpToAlignment(sizeof(OMPMasterDirective),
-                                           llvm::alignOf<Stmt *>());
+  unsigned Size =
+      llvm::alignTo(sizeof(OMPMasterDirective), llvm::alignOf<Stmt *>());
   void *Mem = C.Allocate(Size + sizeof(Stmt *));
   OMPMasterDirective *Dir = new (Mem) OMPMasterDirective(StartLoc, EndLoc);
   Dir->setAssociatedStmt(AssociatedStmt);
@@ -304,8 +313,8 @@ OMPMasterDirective *OMPMasterDirective::Create(const ASTContext &C,
 
 OMPMasterDirective *OMPMasterDirective::CreateEmpty(const ASTContext &C,
                                                     EmptyShell) {
-  unsigned Size = llvm::RoundUpToAlignment(sizeof(OMPMasterDirective),
-                                           llvm::alignOf<Stmt *>());
+  unsigned Size =
+      llvm::alignTo(sizeof(OMPMasterDirective), llvm::alignOf<Stmt *>());
   void *Mem = C.Allocate(Size + sizeof(Stmt *));
   return new (Mem) OMPMasterDirective();
 }
@@ -314,8 +323,8 @@ OMPCriticalDirective *OMPCriticalDirective::Create(
     const ASTContext &C, const DeclarationNameInfo &Name,
     SourceLocation StartLoc, SourceLocation EndLoc,
     ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt) {
-  unsigned Size = llvm::RoundUpToAlignment(sizeof(OMPCriticalDirective),
-                                           llvm::alignOf<OMPClause *>());
+  unsigned Size =
+      llvm::alignTo(sizeof(OMPCriticalDirective), llvm::alignOf<OMPClause *>());
   void *Mem =
       C.Allocate(Size + sizeof(OMPClause *) * Clauses.size() + sizeof(Stmt *));
   OMPCriticalDirective *Dir =
@@ -328,8 +337,8 @@ OMPCriticalDirective *OMPCriticalDirective::Create(
 OMPCriticalDirective *OMPCriticalDirective::CreateEmpty(const ASTContext &C,
                                                         unsigned NumClauses,
                                                         EmptyShell) {
-  unsigned Size = llvm::RoundUpToAlignment(sizeof(OMPCriticalDirective),
-                                           llvm::alignOf<OMPClause *>());
+  unsigned Size =
+      llvm::alignTo(sizeof(OMPCriticalDirective), llvm::alignOf<OMPClause *>());
   void *Mem =
       C.Allocate(Size + sizeof(OMPClause *) * NumClauses + sizeof(Stmt *));
   return new (Mem) OMPCriticalDirective(NumClauses);
@@ -339,8 +348,8 @@ OMPParallelForDirective *OMPParallelForDirective::Create(
     const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
     unsigned CollapsedNum, ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt,
     const HelperExprs &Exprs, bool HasCancel) {
-  unsigned Size = llvm::RoundUpToAlignment(sizeof(OMPParallelForDirective),
-                                           llvm::alignOf<OMPClause *>());
+  unsigned Size = llvm::alignTo(sizeof(OMPParallelForDirective),
+                                llvm::alignOf<OMPClause *>());
   void *Mem = C.Allocate(Size + sizeof(OMPClause *) * Clauses.size() +
                          sizeof(Stmt *) *
                              numLoopChildren(CollapsedNum, OMPD_parallel_for));
@@ -362,11 +371,15 @@ OMPParallelForDirective *OMPParallelForDirective::Create(
   Dir->setEnsureUpperBound(Exprs.EUB);
   Dir->setNextLowerBound(Exprs.NLB);
   Dir->setNextUpperBound(Exprs.NUB);
+  Dir->setNumIterations(Exprs.NumIterations);
+  Dir->setPrevLowerBoundVariable(Exprs.PrevLB);
+  Dir->setPrevUpperBoundVariable(Exprs.PrevUB);
   Dir->setCounters(Exprs.Counters);
   Dir->setPrivateCounters(Exprs.PrivateCounters);
   Dir->setInits(Exprs.Inits);
   Dir->setUpdates(Exprs.Updates);
   Dir->setFinals(Exprs.Finals);
+  Dir->setPreInits(Exprs.PreInits);
   Dir->setHasCancel(HasCancel);
   return Dir;
 }
@@ -374,8 +387,8 @@ OMPParallelForDirective *OMPParallelForDirective::Create(
 OMPParallelForDirective *
 OMPParallelForDirective::CreateEmpty(const ASTContext &C, unsigned NumClauses,
                                      unsigned CollapsedNum, EmptyShell) {
-  unsigned Size = llvm::RoundUpToAlignment(sizeof(OMPParallelForDirective),
-                                           llvm::alignOf<OMPClause *>());
+  unsigned Size = llvm::alignTo(sizeof(OMPParallelForDirective),
+                                llvm::alignOf<OMPClause *>());
   void *Mem = C.Allocate(Size + sizeof(OMPClause *) * NumClauses +
                          sizeof(Stmt *) *
                              numLoopChildren(CollapsedNum, OMPD_parallel_for));
@@ -386,8 +399,8 @@ OMPParallelForSimdDirective *OMPParallelForSimdDirective::Create(
     const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
     unsigned CollapsedNum, ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt,
     const HelperExprs &Exprs) {
-  unsigned Size = llvm::RoundUpToAlignment(sizeof(OMPParallelForSimdDirective),
-                                           llvm::alignOf<OMPClause *>());
+  unsigned Size = llvm::alignTo(sizeof(OMPParallelForSimdDirective),
+                                llvm::alignOf<OMPClause *>());
   void *Mem = C.Allocate(
       Size + sizeof(OMPClause *) * Clauses.size() +
       sizeof(Stmt *) * numLoopChildren(CollapsedNum, OMPD_parallel_for_simd));
@@ -409,11 +422,15 @@ OMPParallelForSimdDirective *OMPParallelForSimdDirective::Create(
   Dir->setEnsureUpperBound(Exprs.EUB);
   Dir->setNextLowerBound(Exprs.NLB);
   Dir->setNextUpperBound(Exprs.NUB);
+  Dir->setNumIterations(Exprs.NumIterations);
+  Dir->setPrevLowerBoundVariable(Exprs.PrevLB);
+  Dir->setPrevUpperBoundVariable(Exprs.PrevUB);
   Dir->setCounters(Exprs.Counters);
   Dir->setPrivateCounters(Exprs.PrivateCounters);
   Dir->setInits(Exprs.Inits);
   Dir->setUpdates(Exprs.Updates);
   Dir->setFinals(Exprs.Finals);
+  Dir->setPreInits(Exprs.PreInits);
   return Dir;
 }
 
@@ -421,8 +438,8 @@ OMPParallelForSimdDirective *
 OMPParallelForSimdDirective::CreateEmpty(const ASTContext &C,
                                          unsigned NumClauses,
                                          unsigned CollapsedNum, EmptyShell) {
-  unsigned Size = llvm::RoundUpToAlignment(sizeof(OMPParallelForSimdDirective),
-                                           llvm::alignOf<OMPClause *>());
+  unsigned Size = llvm::alignTo(sizeof(OMPParallelForSimdDirective),
+                                llvm::alignOf<OMPClause *>());
   void *Mem = C.Allocate(
       Size + sizeof(OMPClause *) * NumClauses +
       sizeof(Stmt *) * numLoopChildren(CollapsedNum, OMPD_parallel_for_simd));
@@ -432,8 +449,8 @@ OMPParallelForSimdDirective::CreateEmpty(const ASTContext &C,
 OMPParallelSectionsDirective *OMPParallelSectionsDirective::Create(
     const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
     ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt, bool HasCancel) {
-  unsigned Size = llvm::RoundUpToAlignment(sizeof(OMPParallelSectionsDirective),
-                                           llvm::alignOf<OMPClause *>());
+  unsigned Size = llvm::alignTo(sizeof(OMPParallelSectionsDirective),
+                                llvm::alignOf<OMPClause *>());
   void *Mem =
       C.Allocate(Size + sizeof(OMPClause *) * Clauses.size() + sizeof(Stmt *));
   OMPParallelSectionsDirective *Dir =
@@ -447,8 +464,8 @@ OMPParallelSectionsDirective *OMPParallelSectionsDirective::Create(
 OMPParallelSectionsDirective *
 OMPParallelSectionsDirective::CreateEmpty(const ASTContext &C,
                                           unsigned NumClauses, EmptyShell) {
-  unsigned Size = llvm::RoundUpToAlignment(sizeof(OMPParallelSectionsDirective),
-                                           llvm::alignOf<OMPClause *>());
+  unsigned Size = llvm::alignTo(sizeof(OMPParallelSectionsDirective),
+                                llvm::alignOf<OMPClause *>());
   void *Mem =
       C.Allocate(Size + sizeof(OMPClause *) * NumClauses + sizeof(Stmt *));
   return new (Mem) OMPParallelSectionsDirective(NumClauses);
@@ -458,8 +475,8 @@ OMPTaskDirective *
 OMPTaskDirective::Create(const ASTContext &C, SourceLocation StartLoc,
                          SourceLocation EndLoc, ArrayRef<OMPClause *> Clauses,
                          Stmt *AssociatedStmt, bool HasCancel) {
-  unsigned Size = llvm::RoundUpToAlignment(sizeof(OMPTaskDirective),
-                                           llvm::alignOf<OMPClause *>());
+  unsigned Size =
+      llvm::alignTo(sizeof(OMPTaskDirective), llvm::alignOf<OMPClause *>());
   void *Mem =
       C.Allocate(Size + sizeof(OMPClause *) * Clauses.size() + sizeof(Stmt *));
   OMPTaskDirective *Dir =
@@ -473,8 +490,8 @@ OMPTaskDirective::Create(const ASTContext &C, SourceLocation StartLoc,
 OMPTaskDirective *OMPTaskDirective::CreateEmpty(const ASTContext &C,
                                                 unsigned NumClauses,
                                                 EmptyShell) {
-  unsigned Size = llvm::RoundUpToAlignment(sizeof(OMPTaskDirective),
-                                           llvm::alignOf<OMPClause *>());
+  unsigned Size =
+      llvm::alignTo(sizeof(OMPTaskDirective), llvm::alignOf<OMPClause *>());
   void *Mem =
       C.Allocate(Size + sizeof(OMPClause *) * NumClauses + sizeof(Stmt *));
   return new (Mem) OMPTaskDirective(NumClauses);
@@ -527,8 +544,8 @@ OMPTaskgroupDirective *OMPTaskgroupDirective::Create(const ASTContext &C,
                                                      SourceLocation StartLoc,
                                                      SourceLocation EndLoc,
                                                      Stmt *AssociatedStmt) {
-  unsigned Size = llvm::RoundUpToAlignment(sizeof(OMPTaskgroupDirective),
-                                           llvm::alignOf<Stmt *>());
+  unsigned Size =
+      llvm::alignTo(sizeof(OMPTaskgroupDirective), llvm::alignOf<Stmt *>());
   void *Mem = C.Allocate(Size + sizeof(Stmt *));
   OMPTaskgroupDirective *Dir =
       new (Mem) OMPTaskgroupDirective(StartLoc, EndLoc);
@@ -538,8 +555,8 @@ OMPTaskgroupDirective *OMPTaskgroupDirective::Create(const ASTContext &C,
 
 OMPTaskgroupDirective *OMPTaskgroupDirective::CreateEmpty(const ASTContext &C,
                                                           EmptyShell) {
-  unsigned Size = llvm::RoundUpToAlignment(sizeof(OMPTaskgroupDirective),
-                                           llvm::alignOf<Stmt *>());
+  unsigned Size =
+      llvm::alignTo(sizeof(OMPTaskgroupDirective), llvm::alignOf<Stmt *>());
   void *Mem = C.Allocate(Size + sizeof(Stmt *));
   return new (Mem) OMPTaskgroupDirective();
 }
@@ -547,8 +564,8 @@ OMPTaskgroupDirective *OMPTaskgroupDirective::CreateEmpty(const ASTContext &C,
 OMPCancellationPointDirective *OMPCancellationPointDirective::Create(
     const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
     OpenMPDirectiveKind CancelRegion) {
-  unsigned Size = llvm::RoundUpToAlignment(
-      sizeof(OMPCancellationPointDirective), llvm::alignOf<Stmt *>());
+  unsigned Size = llvm::alignTo(sizeof(OMPCancellationPointDirective),
+                                llvm::alignOf<Stmt *>());
   void *Mem = C.Allocate(Size);
   OMPCancellationPointDirective *Dir =
       new (Mem) OMPCancellationPointDirective(StartLoc, EndLoc);
@@ -558,8 +575,8 @@ OMPCancellationPointDirective *OMPCancellationPointDirective::Create(
 
 OMPCancellationPointDirective *
 OMPCancellationPointDirective::CreateEmpty(const ASTContext &C, EmptyShell) {
-  unsigned Size = llvm::RoundUpToAlignment(
-      sizeof(OMPCancellationPointDirective), llvm::alignOf<Stmt *>());
+  unsigned Size = llvm::alignTo(sizeof(OMPCancellationPointDirective),
+                                llvm::alignOf<Stmt *>());
   void *Mem = C.Allocate(Size);
   return new (Mem) OMPCancellationPointDirective();
 }
@@ -568,9 +585,9 @@ OMPCancelDirective *
 OMPCancelDirective::Create(const ASTContext &C, SourceLocation StartLoc,
                            SourceLocation EndLoc, ArrayRef<OMPClause *> Clauses,
                            OpenMPDirectiveKind CancelRegion) {
-  unsigned Size = llvm::RoundUpToAlignment(
-      sizeof(OMPCancelDirective) + sizeof(OMPClause *) * Clauses.size(),
-      llvm::alignOf<Stmt *>());
+  unsigned Size = llvm::alignTo(sizeof(OMPCancelDirective) +
+                                    sizeof(OMPClause *) * Clauses.size(),
+                                llvm::alignOf<Stmt *>());
   void *Mem = C.Allocate(Size);
   OMPCancelDirective *Dir =
       new (Mem) OMPCancelDirective(StartLoc, EndLoc, Clauses.size());
@@ -582,9 +599,9 @@ OMPCancelDirective::Create(const ASTContext &C, SourceLocation StartLoc,
 OMPCancelDirective *OMPCancelDirective::CreateEmpty(const ASTContext &C,
                                                     unsigned NumClauses,
                                                     EmptyShell) {
-  unsigned Size = llvm::RoundUpToAlignment(sizeof(OMPCancelDirective) +
-                                               sizeof(OMPClause *) * NumClauses,
-                                           llvm::alignOf<Stmt *>());
+  unsigned Size = llvm::alignTo(sizeof(OMPCancelDirective) +
+                                    sizeof(OMPClause *) * NumClauses,
+                                llvm::alignOf<Stmt *>());
   void *Mem = C.Allocate(Size);
   return new (Mem) OMPCancelDirective(NumClauses);
 }
@@ -593,8 +610,8 @@ OMPFlushDirective *OMPFlushDirective::Create(const ASTContext &C,
                                              SourceLocation StartLoc,
                                              SourceLocation EndLoc,
                                              ArrayRef<OMPClause *> Clauses) {
-  unsigned Size = llvm::RoundUpToAlignment(sizeof(OMPFlushDirective),
-                                           llvm::alignOf<OMPClause *>());
+  unsigned Size =
+      llvm::alignTo(sizeof(OMPFlushDirective), llvm::alignOf<OMPClause *>());
   void *Mem = C.Allocate(Size + sizeof(OMPClause *) * Clauses.size());
   OMPFlushDirective *Dir =
       new (Mem) OMPFlushDirective(StartLoc, EndLoc, Clauses.size());
@@ -605,8 +622,8 @@ OMPFlushDirective *OMPFlushDirective::Create(const ASTContext &C,
 OMPFlushDirective *OMPFlushDirective::CreateEmpty(const ASTContext &C,
                                                   unsigned NumClauses,
                                                   EmptyShell) {
-  unsigned Size = llvm::RoundUpToAlignment(sizeof(OMPFlushDirective),
-                                           llvm::alignOf<OMPClause *>());
+  unsigned Size =
+      llvm::alignTo(sizeof(OMPFlushDirective), llvm::alignOf<OMPClause *>());
   void *Mem = C.Allocate(Size + sizeof(OMPClause *) * NumClauses);
   return new (Mem) OMPFlushDirective(NumClauses);
 }
@@ -616,8 +633,8 @@ OMPOrderedDirective *OMPOrderedDirective::Create(const ASTContext &C,
                                                  SourceLocation EndLoc,
                                                  ArrayRef<OMPClause *> Clauses,
                                                  Stmt *AssociatedStmt) {
-  unsigned Size = llvm::RoundUpToAlignment(sizeof(OMPOrderedDirective),
-                                           llvm::alignOf<OMPClause *>());
+  unsigned Size =
+      llvm::alignTo(sizeof(OMPOrderedDirective), llvm::alignOf<OMPClause *>());
   void *Mem =
       C.Allocate(Size + sizeof(Stmt *) + sizeof(OMPClause *) * Clauses.size());
   OMPOrderedDirective *Dir =
@@ -630,8 +647,8 @@ OMPOrderedDirective *OMPOrderedDirective::Create(const ASTContext &C,
 OMPOrderedDirective *OMPOrderedDirective::CreateEmpty(const ASTContext &C,
                                                       unsigned NumClauses,
                                                       EmptyShell) {
-  unsigned Size = llvm::RoundUpToAlignment(sizeof(OMPOrderedDirective),
-                                           llvm::alignOf<OMPClause *>());
+  unsigned Size =
+      llvm::alignTo(sizeof(OMPOrderedDirective), llvm::alignOf<OMPClause *>());
   void *Mem =
       C.Allocate(Size + sizeof(Stmt *) + sizeof(OMPClause *) * NumClauses);
   return new (Mem) OMPOrderedDirective(NumClauses);
@@ -641,8 +658,8 @@ OMPAtomicDirective *OMPAtomicDirective::Create(
     const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
     ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt, Expr *X, Expr *V,
     Expr *E, Expr *UE, bool IsXLHSInRHSPart, bool IsPostfixUpdate) {
-  unsigned Size = llvm::RoundUpToAlignment(sizeof(OMPAtomicDirective),
-                                           llvm::alignOf<OMPClause *>());
+  unsigned Size =
+      llvm::alignTo(sizeof(OMPAtomicDirective), llvm::alignOf<OMPClause *>());
   void *Mem = C.Allocate(Size + sizeof(OMPClause *) * Clauses.size() +
                          5 * sizeof(Stmt *));
   OMPAtomicDirective *Dir =
@@ -661,8 +678,8 @@ OMPAtomicDirective *OMPAtomicDirective::Create(
 OMPAtomicDirective *OMPAtomicDirective::CreateEmpty(const ASTContext &C,
                                                     unsigned NumClauses,
                                                     EmptyShell) {
-  unsigned Size = llvm::RoundUpToAlignment(sizeof(OMPAtomicDirective),
-                                           llvm::alignOf<OMPClause *>());
+  unsigned Size =
+      llvm::alignTo(sizeof(OMPAtomicDirective), llvm::alignOf<OMPClause *>());
   void *Mem =
       C.Allocate(Size + sizeof(OMPClause *) * NumClauses + 5 * sizeof(Stmt *));
   return new (Mem) OMPAtomicDirective(NumClauses);
@@ -673,8 +690,8 @@ OMPTargetDirective *OMPTargetDirective::Create(const ASTContext &C,
                                                SourceLocation EndLoc,
                                                ArrayRef<OMPClause *> Clauses,
                                                Stmt *AssociatedStmt) {
-  unsigned Size = llvm::RoundUpToAlignment(sizeof(OMPTargetDirective),
-                                           llvm::alignOf<OMPClause *>());
+  unsigned Size =
+      llvm::alignTo(sizeof(OMPTargetDirective), llvm::alignOf<OMPClause *>());
   void *Mem =
       C.Allocate(Size + sizeof(OMPClause *) * Clauses.size() + sizeof(Stmt *));
   OMPTargetDirective *Dir =
@@ -687,20 +704,95 @@ OMPTargetDirective *OMPTargetDirective::Create(const ASTContext &C,
 OMPTargetDirective *OMPTargetDirective::CreateEmpty(const ASTContext &C,
                                                     unsigned NumClauses,
                                                     EmptyShell) {
-  unsigned Size = llvm::RoundUpToAlignment(sizeof(OMPTargetDirective),
-                                           llvm::alignOf<OMPClause *>());
+  unsigned Size =
+      llvm::alignTo(sizeof(OMPTargetDirective), llvm::alignOf<OMPClause *>());
   void *Mem =
       C.Allocate(Size + sizeof(OMPClause *) * NumClauses + sizeof(Stmt *));
   return new (Mem) OMPTargetDirective(NumClauses);
 }
 
-OMPTargetDataDirective *OMPTargetDataDirective::Create(
+OMPTargetParallelDirective *OMPTargetParallelDirective::Create(
     const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
     ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt) {
+  unsigned Size = llvm::alignTo(sizeof(OMPTargetParallelDirective),
+                                llvm::alignOf<OMPClause *>());
   void *Mem =
-      C.Allocate(llvm::RoundUpToAlignment(sizeof(OMPTargetDataDirective),
-                                          llvm::alignOf<OMPClause *>()) +
-                 sizeof(OMPClause *) * Clauses.size() + sizeof(Stmt *));
+      C.Allocate(Size + sizeof(OMPClause *) * Clauses.size() + sizeof(Stmt *));
+  OMPTargetParallelDirective *Dir =
+      new (Mem) OMPTargetParallelDirective(StartLoc, EndLoc, Clauses.size());
+  Dir->setClauses(Clauses);
+  Dir->setAssociatedStmt(AssociatedStmt);
+  return Dir;
+}
+
+OMPTargetParallelDirective *
+OMPTargetParallelDirective::CreateEmpty(const ASTContext &C,
+                                        unsigned NumClauses, EmptyShell) {
+  unsigned Size = llvm::alignTo(sizeof(OMPTargetParallelDirective),
+                                llvm::alignOf<OMPClause *>());
+  void *Mem =
+      C.Allocate(Size + sizeof(OMPClause *) * NumClauses + sizeof(Stmt *));
+  return new (Mem) OMPTargetParallelDirective(NumClauses);
+}
+
+OMPTargetParallelForDirective *OMPTargetParallelForDirective::Create(
+    const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
+    unsigned CollapsedNum, ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt,
+    const HelperExprs &Exprs, bool HasCancel) {
+  unsigned Size = llvm::alignTo(sizeof(OMPTargetParallelForDirective),
+                                llvm::alignOf<OMPClause *>());
+  void *Mem = C.Allocate(
+      Size + sizeof(OMPClause *) * Clauses.size() +
+      sizeof(Stmt *) * numLoopChildren(CollapsedNum, OMPD_target_parallel_for));
+  OMPTargetParallelForDirective *Dir = new (Mem) OMPTargetParallelForDirective(
+      StartLoc, EndLoc, CollapsedNum, Clauses.size());
+  Dir->setClauses(Clauses);
+  Dir->setAssociatedStmt(AssociatedStmt);
+  Dir->setIterationVariable(Exprs.IterationVarRef);
+  Dir->setLastIteration(Exprs.LastIteration);
+  Dir->setCalcLastIteration(Exprs.CalcLastIteration);
+  Dir->setPreCond(Exprs.PreCond);
+  Dir->setCond(Exprs.Cond);
+  Dir->setInit(Exprs.Init);
+  Dir->setInc(Exprs.Inc);
+  Dir->setIsLastIterVariable(Exprs.IL);
+  Dir->setLowerBoundVariable(Exprs.LB);
+  Dir->setUpperBoundVariable(Exprs.UB);
+  Dir->setStrideVariable(Exprs.ST);
+  Dir->setEnsureUpperBound(Exprs.EUB);
+  Dir->setNextLowerBound(Exprs.NLB);
+  Dir->setNextUpperBound(Exprs.NUB);
+  Dir->setNumIterations(Exprs.NumIterations);
+  Dir->setPrevLowerBoundVariable(Exprs.PrevLB);
+  Dir->setPrevUpperBoundVariable(Exprs.PrevUB);
+  Dir->setCounters(Exprs.Counters);
+  Dir->setPrivateCounters(Exprs.PrivateCounters);
+  Dir->setInits(Exprs.Inits);
+  Dir->setUpdates(Exprs.Updates);
+  Dir->setFinals(Exprs.Finals);
+  Dir->setPreInits(Exprs.PreInits);
+  Dir->setHasCancel(HasCancel);
+  return Dir;
+}
+
+OMPTargetParallelForDirective *
+OMPTargetParallelForDirective::CreateEmpty(const ASTContext &C,
+                                           unsigned NumClauses,
+                                           unsigned CollapsedNum, EmptyShell) {
+  unsigned Size = llvm::alignTo(sizeof(OMPTargetParallelForDirective),
+                                llvm::alignOf<OMPClause *>());
+  void *Mem = C.Allocate(
+      Size + sizeof(OMPClause *) * NumClauses +
+      sizeof(Stmt *) * numLoopChildren(CollapsedNum, OMPD_target_parallel_for));
+  return new (Mem) OMPTargetParallelForDirective(CollapsedNum, NumClauses);
+}
+
+OMPTargetDataDirective *OMPTargetDataDirective::Create(
+    const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
+    ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt) {
+  void *Mem = C.Allocate(llvm::alignTo(sizeof(OMPTargetDataDirective),
+                                       llvm::alignOf<OMPClause *>()) +
+                         sizeof(OMPClause *) * Clauses.size() + sizeof(Stmt *));
   OMPTargetDataDirective *Dir =
       new (Mem) OMPTargetDataDirective(StartLoc, EndLoc, Clauses.size());
   Dir->setClauses(Clauses);
@@ -711,20 +803,62 @@ OMPTargetDataDirective *OMPTargetDataDirective::Create(
 OMPTargetDataDirective *OMPTargetDataDirective::CreateEmpty(const ASTContext &C,
                                                             unsigned N,
                                                             EmptyShell) {
-  void *Mem =
-      C.Allocate(llvm::RoundUpToAlignment(sizeof(OMPTargetDataDirective),
-                                          llvm::alignOf<OMPClause *>()) +
-                 sizeof(OMPClause *) * N + sizeof(Stmt *));
+  void *Mem = C.Allocate(llvm::alignTo(sizeof(OMPTargetDataDirective),
+                                       llvm::alignOf<OMPClause *>()) +
+                         sizeof(OMPClause *) * N + sizeof(Stmt *));
   return new (Mem) OMPTargetDataDirective(N);
 }
 
+OMPTargetEnterDataDirective *OMPTargetEnterDataDirective::Create(
+    const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
+    ArrayRef<OMPClause *> Clauses) {
+  void *Mem = C.Allocate(llvm::alignTo(sizeof(OMPTargetEnterDataDirective),
+                                       llvm::alignOf<OMPClause *>()) +
+                         sizeof(OMPClause *) * Clauses.size());
+  OMPTargetEnterDataDirective *Dir =
+      new (Mem) OMPTargetEnterDataDirective(StartLoc, EndLoc, Clauses.size());
+  Dir->setClauses(Clauses);
+  return Dir;
+}
+
+OMPTargetEnterDataDirective *
+OMPTargetEnterDataDirective::CreateEmpty(const ASTContext &C, unsigned N,
+                                         EmptyShell) {
+  void *Mem = C.Allocate(llvm::alignTo(sizeof(OMPTargetEnterDataDirective),
+                                       llvm::alignOf<OMPClause *>()) +
+                         sizeof(OMPClause *) * N);
+  return new (Mem) OMPTargetEnterDataDirective(N);
+}
+
+OMPTargetExitDataDirective *
+OMPTargetExitDataDirective::Create(const ASTContext &C, SourceLocation StartLoc,
+                                   SourceLocation EndLoc,
+                                   ArrayRef<OMPClause *> Clauses) {
+  void *Mem = C.Allocate(llvm::alignTo(sizeof(OMPTargetExitDataDirective),
+                                       llvm::alignOf<OMPClause *>()) +
+                         sizeof(OMPClause *) * Clauses.size());
+  OMPTargetExitDataDirective *Dir =
+      new (Mem) OMPTargetExitDataDirective(StartLoc, EndLoc, Clauses.size());
+  Dir->setClauses(Clauses);
+  return Dir;
+}
+
+OMPTargetExitDataDirective *
+OMPTargetExitDataDirective::CreateEmpty(const ASTContext &C, unsigned N,
+                                        EmptyShell) {
+  void *Mem = C.Allocate(llvm::alignTo(sizeof(OMPTargetExitDataDirective),
+                                       llvm::alignOf<OMPClause *>()) +
+                         sizeof(OMPClause *) * N);
+  return new (Mem) OMPTargetExitDataDirective(N);
+}
+
 OMPTeamsDirective *OMPTeamsDirective::Create(const ASTContext &C,
                                              SourceLocation StartLoc,
                                              SourceLocation EndLoc,
                                              ArrayRef<OMPClause *> Clauses,
                                              Stmt *AssociatedStmt) {
-  unsigned Size = llvm::RoundUpToAlignment(sizeof(OMPTeamsDirective),
-                                           llvm::alignOf<OMPClause *>());
+  unsigned Size =
+      llvm::alignTo(sizeof(OMPTeamsDirective), llvm::alignOf<OMPClause *>());
   void *Mem =
       C.Allocate(Size + sizeof(OMPClause *) * Clauses.size() + sizeof(Stmt *));
   OMPTeamsDirective *Dir =
@@ -737,8 +871,8 @@ OMPTeamsDirective *OMPTeamsDirective::Create(const ASTContext &C,
 OMPTeamsDirective *OMPTeamsDirective::CreateEmpty(const ASTContext &C,
                                                   unsigned NumClauses,
                                                   EmptyShell) {
-  unsigned Size = llvm::RoundUpToAlignment(sizeof(OMPTeamsDirective),
-                                           llvm::alignOf<OMPClause *>());
+  unsigned Size =
+      llvm::alignTo(sizeof(OMPTeamsDirective), llvm::alignOf<OMPClause *>());
   void *Mem =
       C.Allocate(Size + sizeof(OMPClause *) * NumClauses + sizeof(Stmt *));
   return new (Mem) OMPTeamsDirective(NumClauses);
@@ -748,8 +882,8 @@ OMPTaskLoopDirective *OMPTaskLoopDirective::Create(
     const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
     unsigned CollapsedNum, ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt,
     const HelperExprs &Exprs) {
-  unsigned Size = llvm::RoundUpToAlignment(sizeof(OMPTaskLoopDirective),
-                                           llvm::alignOf<OMPClause *>());
+  unsigned Size =
+      llvm::alignTo(sizeof(OMPTaskLoopDirective), llvm::alignOf<OMPClause *>());
   void *Mem =
       C.Allocate(Size + sizeof(OMPClause *) * Clauses.size() +
                  sizeof(Stmt *) * numLoopChildren(CollapsedNum, OMPD_taskloop));
@@ -771,11 +905,15 @@ OMPTaskLoopDirective *OMPTaskLoopDirective::Create(
   Dir->setEnsureUpperBound(Exprs.EUB);
   Dir->setNextLowerBound(Exprs.NLB);
   Dir->setNextUpperBound(Exprs.NUB);
+  Dir->setNumIterations(Exprs.NumIterations);
+  Dir->setPrevLowerBoundVariable(Exprs.PrevLB);
+  Dir->setPrevUpperBoundVariable(Exprs.PrevUB);
   Dir->setCounters(Exprs.Counters);
   Dir->setPrivateCounters(Exprs.PrivateCounters);
   Dir->setInits(Exprs.Inits);
   Dir->setUpdates(Exprs.Updates);
   Dir->setFinals(Exprs.Finals);
+  Dir->setPreInits(Exprs.PreInits);
   return Dir;
 }
 
@@ -783,8 +921,8 @@ OMPTaskLoopDirective *OMPTaskLoopDirective::CreateEmpty(const ASTContext &C,
                                                         unsigned NumClauses,
                                                         unsigned CollapsedNum,
                                                         EmptyShell) {
-  unsigned Size = llvm::RoundUpToAlignment(sizeof(OMPTaskLoopDirective),
-                                           llvm::alignOf<OMPClause *>());
+  unsigned Size =
+      llvm::alignTo(sizeof(OMPTaskLoopDirective), llvm::alignOf<OMPClause *>());
   void *Mem =
       C.Allocate(Size + sizeof(OMPClause *) * NumClauses +
                  sizeof(Stmt *) * numLoopChildren(CollapsedNum, OMPD_taskloop));
@@ -795,8 +933,8 @@ OMPTaskLoopSimdDirective *OMPTaskLoopSimdDirective::Create(
     const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
     unsigned CollapsedNum, ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt,
     const HelperExprs &Exprs) {
-  unsigned Size = llvm::RoundUpToAlignment(sizeof(OMPTaskLoopSimdDirective),
-                                           llvm::alignOf<OMPClause *>());
+  unsigned Size = llvm::alignTo(sizeof(OMPTaskLoopSimdDirective),
+                                llvm::alignOf<OMPClause *>());
   void *Mem = C.Allocate(Size + sizeof(OMPClause *) * Clauses.size() +
                          sizeof(Stmt *) *
                              numLoopChildren(CollapsedNum, OMPD_taskloop_simd));
@@ -818,19 +956,23 @@ OMPTaskLoopSimdDirective *OMPTaskLoopSimdDirective::Create(
   Dir->setEnsureUpperBound(Exprs.EUB);
   Dir->setNextLowerBound(Exprs.NLB);
   Dir->setNextUpperBound(Exprs.NUB);
+  Dir->setNumIterations(Exprs.NumIterations);
+  Dir->setPrevLowerBoundVariable(Exprs.PrevLB);
+  Dir->setPrevUpperBoundVariable(Exprs.PrevUB);
   Dir->setCounters(Exprs.Counters);
   Dir->setPrivateCounters(Exprs.PrivateCounters);
   Dir->setInits(Exprs.Inits);
   Dir->setUpdates(Exprs.Updates);
   Dir->setFinals(Exprs.Finals);
+  Dir->setPreInits(Exprs.PreInits);
   return Dir;
 }
 
 OMPTaskLoopSimdDirective *
 OMPTaskLoopSimdDirective::CreateEmpty(const ASTContext &C, unsigned NumClauses,
                                       unsigned CollapsedNum, EmptyShell) {
-  unsigned Size = llvm::RoundUpToAlignment(sizeof(OMPTaskLoopSimdDirective),
-                                           llvm::alignOf<OMPClause *>());
+  unsigned Size = llvm::alignTo(sizeof(OMPTaskLoopSimdDirective),
+                                llvm::alignOf<OMPClause *>());
   void *Mem = C.Allocate(Size + sizeof(OMPClause *) * NumClauses +
                          sizeof(Stmt *) *
                              numLoopChildren(CollapsedNum, OMPD_taskloop_simd));
@@ -841,8 +983,8 @@ OMPDistributeDirective *OMPDistributeDirective::Create(
     const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
     unsigned CollapsedNum, ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt,
     const HelperExprs &Exprs) {
-  unsigned Size = llvm::RoundUpToAlignment(sizeof(OMPDistributeDirective),
-                                           llvm::alignOf<OMPClause *>());
+  unsigned Size = llvm::alignTo(sizeof(OMPDistributeDirective),
+                                llvm::alignOf<OMPClause *>());
   void *Mem = C.Allocate(Size + sizeof(OMPClause *) * Clauses.size() +
                          sizeof(Stmt *) *
                              numLoopChildren(CollapsedNum, OMPD_distribute));
@@ -864,21 +1006,267 @@ OMPDistributeDirective *OMPDistributeDirective::Create(
   Dir->setEnsureUpperBound(Exprs.EUB);
   Dir->setNextLowerBound(Exprs.NLB);
   Dir->setNextUpperBound(Exprs.NUB);
+  Dir->setNumIterations(Exprs.NumIterations);
+  Dir->setPrevLowerBoundVariable(Exprs.PrevLB);
+  Dir->setPrevUpperBoundVariable(Exprs.PrevUB);
   Dir->setCounters(Exprs.Counters);
   Dir->setPrivateCounters(Exprs.PrivateCounters);
   Dir->setInits(Exprs.Inits);
   Dir->setUpdates(Exprs.Updates);
   Dir->setFinals(Exprs.Finals);
+  Dir->setPreInits(Exprs.PreInits);
   return Dir;
 }
 
 OMPDistributeDirective *
 OMPDistributeDirective::CreateEmpty(const ASTContext &C, unsigned NumClauses,
                                     unsigned CollapsedNum, EmptyShell) {
-  unsigned Size = llvm::RoundUpToAlignment(sizeof(OMPDistributeDirective),
-                                           llvm::alignOf<OMPClause *>());
+  unsigned Size = llvm::alignTo(sizeof(OMPDistributeDirective),
+                                llvm::alignOf<OMPClause *>());
   void *Mem = C.Allocate(Size + sizeof(OMPClause *) * NumClauses +
                          sizeof(Stmt *) *
                              numLoopChildren(CollapsedNum, OMPD_distribute));
   return new (Mem) OMPDistributeDirective(CollapsedNum, NumClauses);
 }
+
+OMPTargetUpdateDirective *
+OMPTargetUpdateDirective::Create(const ASTContext &C, SourceLocation StartLoc,
+                                 SourceLocation EndLoc,
+                                 ArrayRef<OMPClause *> Clauses) {
+  unsigned Size = llvm::alignTo(sizeof(OMPTargetUpdateDirective),
+                                llvm::alignOf<OMPClause *>());
+  void *Mem = C.Allocate(Size + sizeof(OMPClause *) * Clauses.size());
+  OMPTargetUpdateDirective *Dir =
+      new (Mem) OMPTargetUpdateDirective(StartLoc, EndLoc, Clauses.size());
+  Dir->setClauses(Clauses);
+  return Dir;
+}
+
+OMPTargetUpdateDirective *
+OMPTargetUpdateDirective::CreateEmpty(const ASTContext &C, unsigned NumClauses,
+                                      EmptyShell) {
+  unsigned Size = llvm::alignTo(sizeof(OMPTargetUpdateDirective),
+                                llvm::alignOf<OMPClause *>());
+  void *Mem = C.Allocate(Size + sizeof(OMPClause *) * NumClauses);
+  return new (Mem) OMPTargetUpdateDirective(NumClauses);
+}
+
+OMPDistributeParallelForDirective *OMPDistributeParallelForDirective::Create(
+    const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
+    unsigned CollapsedNum, ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt,
+    const HelperExprs &Exprs) {
+  unsigned Size = llvm::alignTo(sizeof(OMPDistributeParallelForDirective),
+                                llvm::alignOf<OMPClause *>());
+  void *Mem = C.Allocate(
+      Size + sizeof(OMPClause *) * Clauses.size() +
+      sizeof(Stmt *) *
+          numLoopChildren(CollapsedNum, OMPD_distribute_parallel_for));
+  OMPDistributeParallelForDirective *Dir =
+      new (Mem) OMPDistributeParallelForDirective(StartLoc, EndLoc,
+                                                  CollapsedNum, Clauses.size());
+  Dir->setClauses(Clauses);
+  Dir->setAssociatedStmt(AssociatedStmt);
+  Dir->setIterationVariable(Exprs.IterationVarRef);
+  Dir->setLastIteration(Exprs.LastIteration);
+  Dir->setCalcLastIteration(Exprs.CalcLastIteration);
+  Dir->setPreCond(Exprs.PreCond);
+  Dir->setCond(Exprs.Cond);
+  Dir->setInit(Exprs.Init);
+  Dir->setInc(Exprs.Inc);
+  Dir->setIsLastIterVariable(Exprs.IL);
+  Dir->setLowerBoundVariable(Exprs.LB);
+  Dir->setUpperBoundVariable(Exprs.UB);
+  Dir->setStrideVariable(Exprs.ST);
+  Dir->setEnsureUpperBound(Exprs.EUB);
+  Dir->setNextLowerBound(Exprs.NLB);
+  Dir->setNextUpperBound(Exprs.NUB);
+  Dir->setNumIterations(Exprs.NumIterations);
+  Dir->setPrevLowerBoundVariable(Exprs.PrevLB);
+  Dir->setPrevUpperBoundVariable(Exprs.PrevUB);
+  Dir->setCounters(Exprs.Counters);
+  Dir->setPrivateCounters(Exprs.PrivateCounters);
+  Dir->setInits(Exprs.Inits);
+  Dir->setUpdates(Exprs.Updates);
+  Dir->setFinals(Exprs.Finals);
+  Dir->setPreInits(Exprs.PreInits);
+  return Dir;
+}
+
+OMPDistributeParallelForDirective *
+OMPDistributeParallelForDirective::CreateEmpty(const ASTContext &C,
+                                               unsigned NumClauses,
+                                               unsigned CollapsedNum,
+                                               EmptyShell) {
+  unsigned Size = llvm::alignTo(sizeof(OMPDistributeParallelForDirective),
+                                llvm::alignOf<OMPClause *>());
+  void *Mem = C.Allocate(
+      Size + sizeof(OMPClause *) * NumClauses +
+      sizeof(Stmt *) *
+          numLoopChildren(CollapsedNum, OMPD_distribute_parallel_for));
+  return new (Mem) OMPDistributeParallelForDirective(CollapsedNum, NumClauses);
+}
+
+OMPDistributeParallelForSimdDirective *
+OMPDistributeParallelForSimdDirective::Create(
+    const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
+    unsigned CollapsedNum, ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt,
+    const HelperExprs &Exprs) {
+  unsigned Size = llvm::alignTo(sizeof(OMPDistributeParallelForSimdDirective),
+                                llvm::alignOf<OMPClause *>());
+  void *Mem = C.Allocate(
+      Size + sizeof(OMPClause *) * Clauses.size() +
+      sizeof(Stmt *) *
+          numLoopChildren(CollapsedNum, OMPD_distribute_parallel_for_simd));
+  OMPDistributeParallelForSimdDirective *Dir = new (Mem)
+      OMPDistributeParallelForSimdDirective(StartLoc, EndLoc, CollapsedNum,
+                                            Clauses.size());
+  Dir->setClauses(Clauses);
+  Dir->setAssociatedStmt(AssociatedStmt);
+  Dir->setIterationVariable(Exprs.IterationVarRef);
+  Dir->setLastIteration(Exprs.LastIteration);
+  Dir->setCalcLastIteration(Exprs.CalcLastIteration);
+  Dir->setPreCond(Exprs.PreCond);
+  Dir->setCond(Exprs.Cond);
+  Dir->setInit(Exprs.Init);
+  Dir->setInc(Exprs.Inc);
+  Dir->setIsLastIterVariable(Exprs.IL);
+  Dir->setLowerBoundVariable(Exprs.LB);
+  Dir->setUpperBoundVariable(Exprs.UB);
+  Dir->setStrideVariable(Exprs.ST);
+  Dir->setEnsureUpperBound(Exprs.EUB);
+  Dir->setNextLowerBound(Exprs.NLB);
+  Dir->setNextUpperBound(Exprs.NUB);
+  Dir->setNumIterations(Exprs.NumIterations);
+  Dir->setPrevLowerBoundVariable(Exprs.PrevLB);
+  Dir->setPrevUpperBoundVariable(Exprs.PrevUB);
+  Dir->setCounters(Exprs.Counters);
+  Dir->setPrivateCounters(Exprs.PrivateCounters);
+  Dir->setInits(Exprs.Inits);
+  Dir->setUpdates(Exprs.Updates);
+  Dir->setFinals(Exprs.Finals);
+  Dir->setPreInits(Exprs.PreInits);
+  return Dir;
+}
+
+OMPDistributeParallelForSimdDirective *
+OMPDistributeParallelForSimdDirective::CreateEmpty(const ASTContext &C,
+                                                   unsigned NumClauses,
+                                                   unsigned CollapsedNum,
+                                                   EmptyShell) {
+  unsigned Size = llvm::alignTo(sizeof(OMPDistributeParallelForSimdDirective),
+                                llvm::alignOf<OMPClause *>());
+  void *Mem = C.Allocate(
+      Size + sizeof(OMPClause *) * NumClauses +
+      sizeof(Stmt *) *
+          numLoopChildren(CollapsedNum, OMPD_distribute_parallel_for_simd));
+  return new (Mem)
+      OMPDistributeParallelForSimdDirective(CollapsedNum, NumClauses);
+}
+
+OMPDistributeSimdDirective *OMPDistributeSimdDirective::Create(
+    const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
+    unsigned CollapsedNum, ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt,
+    const HelperExprs &Exprs) {
+  unsigned Size = llvm::alignTo(sizeof(OMPDistributeSimdDirective),
+                                llvm::alignOf<OMPClause *>());
+  void *Mem = C.Allocate(
+      Size + sizeof(OMPClause *) * Clauses.size() +
+      sizeof(Stmt *) *
+          numLoopChildren(CollapsedNum, OMPD_distribute_simd));
+  OMPDistributeSimdDirective *Dir = new (Mem) OMPDistributeSimdDirective(
+      StartLoc, EndLoc, CollapsedNum, Clauses.size());
+  Dir->setClauses(Clauses);
+  Dir->setAssociatedStmt(AssociatedStmt);
+  Dir->setIterationVariable(Exprs.IterationVarRef);
+  Dir->setLastIteration(Exprs.LastIteration);
+  Dir->setCalcLastIteration(Exprs.CalcLastIteration);
+  Dir->setPreCond(Exprs.PreCond);
+  Dir->setCond(Exprs.Cond);
+  Dir->setInit(Exprs.Init);
+  Dir->setInc(Exprs.Inc);
+  Dir->setIsLastIterVariable(Exprs.IL);
+  Dir->setLowerBoundVariable(Exprs.LB);
+  Dir->setUpperBoundVariable(Exprs.UB);
+  Dir->setStrideVariable(Exprs.ST);
+  Dir->setEnsureUpperBound(Exprs.EUB);
+  Dir->setNextLowerBound(Exprs.NLB);
+  Dir->setNextUpperBound(Exprs.NUB);
+  Dir->setNumIterations(Exprs.NumIterations);
+  Dir->setPrevLowerBoundVariable(Exprs.PrevLB);
+  Dir->setPrevUpperBoundVariable(Exprs.PrevUB);
+  Dir->setCounters(Exprs.Counters);
+  Dir->setPrivateCounters(Exprs.PrivateCounters);
+  Dir->setInits(Exprs.Inits);
+  Dir->setUpdates(Exprs.Updates);
+  Dir->setFinals(Exprs.Finals);
+  Dir->setPreInits(Exprs.PreInits);
+  return Dir;
+}
+
+OMPDistributeSimdDirective *
+OMPDistributeSimdDirective::CreateEmpty(const ASTContext &C,
+                                        unsigned NumClauses,
+                                        unsigned CollapsedNum, EmptyShell) {
+  unsigned Size = llvm::alignTo(sizeof(OMPDistributeSimdDirective),
+                                llvm::alignOf<OMPClause *>());
+  void *Mem = C.Allocate(
+      Size + sizeof(OMPClause *) * NumClauses +
+      sizeof(Stmt *) *
+          numLoopChildren(CollapsedNum, OMPD_distribute_simd));
+  return new (Mem) OMPDistributeSimdDirective(CollapsedNum, NumClauses);
+}
+
+OMPTargetParallelForSimdDirective *OMPTargetParallelForSimdDirective::Create(
+    const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
+    unsigned CollapsedNum, ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt,
+    const HelperExprs &Exprs) {
+  unsigned Size = llvm::alignTo(sizeof(OMPTargetParallelForSimdDirective),
+                                llvm::alignOf<OMPClause *>());
+  void *Mem = C.Allocate(
+      Size + sizeof(OMPClause *) * Clauses.size() +
+      sizeof(Stmt *) * 
+          numLoopChildren(CollapsedNum, OMPD_target_parallel_for_simd));
+  OMPTargetParallelForSimdDirective *Dir = 
+      new (Mem) OMPTargetParallelForSimdDirective(StartLoc, EndLoc,
+                                                  CollapsedNum, Clauses.size());
+  Dir->setClauses(Clauses);
+  Dir->setAssociatedStmt(AssociatedStmt);
+  Dir->setIterationVariable(Exprs.IterationVarRef);
+  Dir->setLastIteration(Exprs.LastIteration);
+  Dir->setCalcLastIteration(Exprs.CalcLastIteration);
+  Dir->setPreCond(Exprs.PreCond);
+  Dir->setCond(Exprs.Cond);
+  Dir->setInit(Exprs.Init);
+  Dir->setInc(Exprs.Inc);
+  Dir->setIsLastIterVariable(Exprs.IL);
+  Dir->setLowerBoundVariable(Exprs.LB);
+  Dir->setUpperBoundVariable(Exprs.UB);
+  Dir->setStrideVariable(Exprs.ST);
+  Dir->setEnsureUpperBound(Exprs.EUB);
+  Dir->setNextLowerBound(Exprs.NLB);
+  Dir->setNextUpperBound(Exprs.NUB);
+  Dir->setNumIterations(Exprs.NumIterations);
+  Dir->setPrevLowerBoundVariable(Exprs.PrevLB);
+  Dir->setPrevUpperBoundVariable(Exprs.PrevUB);
+  Dir->setCounters(Exprs.Counters);
+  Dir->setPrivateCounters(Exprs.PrivateCounters);
+  Dir->setInits(Exprs.Inits);
+  Dir->setUpdates(Exprs.Updates);
+  Dir->setFinals(Exprs.Finals);
+  Dir->setPreInits(Exprs.PreInits);
+  return Dir;
+}
+
+OMPTargetParallelForSimdDirective *
+OMPTargetParallelForSimdDirective::CreateEmpty(const ASTContext &C,
+                                               unsigned NumClauses,
+                                               unsigned CollapsedNum,
+                                               EmptyShell) {
+  unsigned Size = llvm::alignTo(sizeof(OMPTargetParallelForSimdDirective),
+                                llvm::alignOf<OMPClause *>());
+  void *Mem = C.Allocate(
+      Size + sizeof(OMPClause *) * NumClauses +
+      sizeof(Stmt *) * 
+          numLoopChildren(CollapsedNum, OMPD_target_parallel_for_simd));
+  return new (Mem) OMPTargetParallelForSimdDirective(CollapsedNum, NumClauses);
+}
diff --git a/contrib/llvm/tools/clang/lib/AST/StmtPrinter.cpp b/contrib/llvm/tools/clang/lib/AST/StmtPrinter.cpp
index 69f52f52b669..8797a13335c4 100644
--- a/contrib/llvm/tools/clang/lib/AST/StmtPrinter.cpp
+++ b/contrib/llvm/tools/clang/lib/AST/StmtPrinter.cpp
@@ -16,6 +16,7 @@
 #include "clang/AST/Attr.h"
 #include "clang/AST/DeclCXX.h"
 #include "clang/AST/DeclObjC.h"
+#include "clang/AST/DeclOpenMP.h"
 #include "clang/AST/DeclTemplate.h"
 #include "clang/AST/Expr.h"
 #include "clang/AST/ExprCXX.h"
@@ -496,6 +497,11 @@ void StmtPrinter::VisitObjCAtThrowStmt(ObjCAtThrowStmt *Node) {
   OS << ";\n";
 }
 
+void StmtPrinter::VisitObjCAvailabilityCheckExpr(
+    ObjCAvailabilityCheckExpr *Node) {
+  OS << "@available(...)";
+}
+
 void StmtPrinter::VisitObjCAtSynchronizedStmt(ObjCAtSynchronizedStmt *Node) {
   Indent() << "@synchronized (";
   PrintExpr(Node->getSynchExpr());
@@ -663,9 +669,9 @@ void OMPClausePrinter::VisitOMPScheduleClause(OMPScheduleClause *Node) {
     OS << ": ";
   }
   OS << getOpenMPSimpleClauseTypeName(OMPC_schedule, Node->getScheduleKind());
-  if (Node->getChunkSize()) {
+  if (auto *E = Node->getChunkSize()) {
     OS << ", ";
-    Node->getChunkSize()->printPretty(OS, nullptr, Policy);
+    E->printPretty(OS, nullptr, Policy);
   }
   OS << ")";
 }
@@ -763,15 +769,16 @@ template<typename T>
 void OMPClausePrinter::VisitOMPClauseList(T *Node, char StartSym) {
   for (typename T::varlist_iterator I = Node->varlist_begin(),
                                     E = Node->varlist_end();
-         I != E; ++I) {
+       I != E; ++I) {
     assert(*I && "Expected non-null Stmt");
+    OS << (I == Node->varlist_begin() ? StartSym : ',');
     if (DeclRefExpr *DRE = dyn_cast<DeclRefExpr>(*I)) {
-      OS << (I == Node->varlist_begin() ? StartSym : ',');
-      cast<NamedDecl>(DRE->getDecl())->printQualifiedName(OS);
-    } else {
-      OS << (I == Node->varlist_begin() ? StartSym : ',');
+      if (isa<OMPCapturedExprDecl>(DRE->getDecl()))
+        DRE->printPretty(OS, nullptr, Policy, 0);
+      else
+        DRE->getDecl()->printQualifiedName(OS);
+    } else
       (*I)->printPretty(OS, nullptr, Policy, 0);
-    }
   }
 }
 
@@ -909,6 +916,58 @@ void OMPClausePrinter::VisitOMPMapClause(OMPMapClause *Node) {
     OS << ")";
   }
 }
+
+void OMPClausePrinter::VisitOMPToClause(OMPToClause *Node) {
+  if (!Node->varlist_empty()) {
+    OS << "to";
+    VisitOMPClauseList(Node, '(');
+    OS << ")";
+  }
+}
+
+void OMPClausePrinter::VisitOMPFromClause(OMPFromClause *Node) {
+  if (!Node->varlist_empty()) {
+    OS << "from";
+    VisitOMPClauseList(Node, '(');
+    OS << ")";
+  }
+}
+
+void OMPClausePrinter::VisitOMPDistScheduleClause(OMPDistScheduleClause *Node) {
+  OS << "dist_schedule(" << getOpenMPSimpleClauseTypeName(
+                           OMPC_dist_schedule, Node->getDistScheduleKind());
+  if (auto *E = Node->getChunkSize()) {
+    OS << ", ";
+    E->printPretty(OS, nullptr, Policy);
+  }
+  OS << ")";
+}
+
+void OMPClausePrinter::VisitOMPDefaultmapClause(OMPDefaultmapClause *Node) {
+  OS << "defaultmap(";
+  OS << getOpenMPSimpleClauseTypeName(OMPC_defaultmap,
+                                      Node->getDefaultmapModifier());
+  OS << ": ";
+  OS << getOpenMPSimpleClauseTypeName(OMPC_defaultmap,
+    Node->getDefaultmapKind());
+  OS << ")";
+}
+
+void OMPClausePrinter::VisitOMPUseDevicePtrClause(OMPUseDevicePtrClause *Node) {
+  if (!Node->varlist_empty()) {
+    OS << "use_device_ptr";
+    VisitOMPClauseList(Node, '(');
+    OS << ")";
+  }
+}
+
+void OMPClausePrinter::VisitOMPIsDevicePtrClause(OMPIsDevicePtrClause *Node) {
+  if (!Node->varlist_empty()) {
+    OS << "is_device_ptr";
+    VisitOMPClauseList(Node, '(');
+    OS << ")";
+  }
+}
 }
 
 //===----------------------------------------------------------------------===//
@@ -1051,6 +1110,30 @@ void StmtPrinter::VisitOMPTargetDataDirective(OMPTargetDataDirective *Node) {
   PrintOMPExecutableDirective(Node);
 }
 
+void StmtPrinter::VisitOMPTargetEnterDataDirective(
+    OMPTargetEnterDataDirective *Node) {
+  Indent() << "#pragma omp target enter data ";
+  PrintOMPExecutableDirective(Node);
+}
+
+void StmtPrinter::VisitOMPTargetExitDataDirective(
+    OMPTargetExitDataDirective *Node) {
+  Indent() << "#pragma omp target exit data ";
+  PrintOMPExecutableDirective(Node);
+}
+
+void StmtPrinter::VisitOMPTargetParallelDirective(
+    OMPTargetParallelDirective *Node) {
+  Indent() << "#pragma omp target parallel ";
+  PrintOMPExecutableDirective(Node);
+}
+
+void StmtPrinter::VisitOMPTargetParallelForDirective(
+    OMPTargetParallelForDirective *Node) {
+  Indent() << "#pragma omp target parallel for ";
+  PrintOMPExecutableDirective(Node);
+}
+
 void StmtPrinter::VisitOMPTeamsDirective(OMPTeamsDirective *Node) {
   Indent() << "#pragma omp teams ";
   PrintOMPExecutableDirective(Node);
@@ -1085,11 +1168,45 @@ void StmtPrinter::VisitOMPDistributeDirective(OMPDistributeDirective *Node) {
   PrintOMPExecutableDirective(Node);
 }
 
+void StmtPrinter::VisitOMPTargetUpdateDirective(
+    OMPTargetUpdateDirective *Node) {
+  Indent() << "#pragma omp target update ";
+  PrintOMPExecutableDirective(Node);
+}
+
+void StmtPrinter::VisitOMPDistributeParallelForDirective(
+    OMPDistributeParallelForDirective *Node) {
+  Indent() << "#pragma omp distribute parallel for ";
+  PrintOMPExecutableDirective(Node);
+}
+
+void StmtPrinter::VisitOMPDistributeParallelForSimdDirective(
+    OMPDistributeParallelForSimdDirective *Node) {
+  Indent() << "#pragma omp distribute parallel for simd ";
+  PrintOMPExecutableDirective(Node);
+}
+
+void StmtPrinter::VisitOMPDistributeSimdDirective(
+    OMPDistributeSimdDirective *Node) {
+  Indent() << "#pragma omp distribute simd ";
+  PrintOMPExecutableDirective(Node);
+}
+
+void StmtPrinter::VisitOMPTargetParallelForSimdDirective(
+    OMPTargetParallelForSimdDirective *Node) {
+  Indent() << "#pragma omp target parallel for simd ";
+  PrintOMPExecutableDirective(Node);
+}
+
 //===----------------------------------------------------------------------===//
 //  Expr printing methods.
 //===----------------------------------------------------------------------===//
 
 void StmtPrinter::VisitDeclRefExpr(DeclRefExpr *Node) {
+  if (auto *OCED = dyn_cast<OMPCapturedExprDecl>(Node->getDecl())) {
+    OCED->getInit()->IgnoreImpCasts()->printPretty(OS, nullptr, Policy);
+    return;
+  }
   if (NestedNameSpecifier *Qualifier = Node->getQualifier())
     Qualifier->print(OS, Policy);
   if (Node->hasTemplateKeyword())
@@ -1097,7 +1214,7 @@ void StmtPrinter::VisitDeclRefExpr(DeclRefExpr *Node) {
   OS << Node->getNameInfo();
   if (Node->hasExplicitTemplateArgs())
     TemplateSpecializationType::PrintTemplateArgumentList(
-        OS, Node->getTemplateArgs(), Node->getNumTemplateArgs(), Policy);
+        OS, Node->template_arguments(), Policy);
 }
 
 void StmtPrinter::VisitDependentScopeDeclRefExpr(
@@ -1109,7 +1226,7 @@ void StmtPrinter::VisitDependentScopeDeclRefExpr(
   OS << Node->getNameInfo();
   if (Node->hasExplicitTemplateArgs())
     TemplateSpecializationType::PrintTemplateArgumentList(
-        OS, Node->getTemplateArgs(), Node->getNumTemplateArgs(), Policy);
+        OS, Node->template_arguments(), Policy);
 }
 
 void StmtPrinter::VisitUnresolvedLookupExpr(UnresolvedLookupExpr *Node) {
@@ -1120,7 +1237,7 @@ void StmtPrinter::VisitUnresolvedLookupExpr(UnresolvedLookupExpr *Node) {
   OS << Node->getNameInfo();
   if (Node->hasExplicitTemplateArgs())
     TemplateSpecializationType::PrintTemplateArgumentList(
-        OS, Node->getTemplateArgs(), Node->getNumTemplateArgs(), Policy);
+        OS, Node->template_arguments(), Policy);
 }
 
 void StmtPrinter::VisitObjCIvarRefExpr(ObjCIvarRefExpr *Node) {
@@ -1204,6 +1321,12 @@ void StmtPrinter::VisitCharacterLiteral(CharacterLiteral *Node) {
     OS << "'\\v'";
     break;
   default:
+    // A character literal might be sign-extended, which
+    // would result in an invalid \U escape sequence.
+    // FIXME: multicharacter literals such as '\xFF\xFF\xFF\xFF'
+    // are not correctly handled.
+    if ((value & ~0xFFu) == ~0xFFu && Node->getKind() == CharacterLiteral::Ascii)
+      value &= 0xFFu;
     if (value < 256 && isPrintable((unsigned char)value))
       OS << "'" << (char)value << "'";
     else if (value < 256)
@@ -1254,6 +1377,7 @@ static void PrintFloatingLiteral(raw_ostream &OS, FloatingLiteral *Node,
   case BuiltinType::Double:     break; // no suffix.
   case BuiltinType::Float:      OS << 'F'; break;
   case BuiltinType::LongDouble: OS << 'L'; break;
+  case BuiltinType::Float128:   OS << 'Q'; break;
   }
 }
 
@@ -1340,9 +1464,9 @@ void StmtPrinter::VisitUnaryExprOrTypeTraitExpr(UnaryExprOrTypeTraitExpr *Node){
     OS << "sizeof";
     break;
   case UETT_AlignOf:
-    if (Policy.LangOpts.CPlusPlus)
+    if (Policy.Alignof)
       OS << "alignof";
-    else if (Policy.LangOpts.C11)
+    else if (Policy.UnderscoreAlignof)
       OS << "_Alignof";
     else
       OS << "__alignof";
@@ -1440,7 +1564,7 @@ void StmtPrinter::VisitMemberExpr(MemberExpr *Node) {
   OS << Node->getMemberNameInfo();
   if (Node->hasExplicitTemplateArgs())
     TemplateSpecializationType::PrintTemplateArgumentList(
-        OS, Node->getTemplateArgs(), Node->getNumTemplateArgs(), Policy);
+        OS, Node->template_arguments(), Policy);
 }
 void StmtPrinter::VisitObjCIsaExpr(ObjCIsaExpr *Node) {
   PrintExpr(Node->getBase());
@@ -1563,26 +1687,24 @@ void StmtPrinter::VisitParenListExpr(ParenListExpr* Node) {
 
 void StmtPrinter::VisitDesignatedInitExpr(DesignatedInitExpr *Node) {
   bool NeedsEquals = true;
-  for (DesignatedInitExpr::designators_iterator D = Node->designators_begin(),
-                      DEnd = Node->designators_end();
-       D != DEnd; ++D) {
-    if (D->isFieldDesignator()) {
-      if (D->getDotLoc().isInvalid()) {
-        if (IdentifierInfo *II = D->getFieldName()) {
+  for (const DesignatedInitExpr::Designator &D : Node->designators()) {
+    if (D.isFieldDesignator()) {
+      if (D.getDotLoc().isInvalid()) {
+        if (IdentifierInfo *II = D.getFieldName()) {
           OS << II->getName() << ":";
           NeedsEquals = false;
         }
       } else {
-        OS << "." << D->getFieldName()->getName();
+        OS << "." << D.getFieldName()->getName();
       }
     } else {
       OS << "[";
-      if (D->isArrayDesignator()) {
-        PrintExpr(Node->getArrayIndex(*D));
+      if (D.isArrayDesignator()) {
+        PrintExpr(Node->getArrayIndex(D));
       } else {
-        PrintExpr(Node->getArrayRangeStart(*D));
+        PrintExpr(Node->getArrayRangeStart(D));
         OS << " ... ";
-        PrintExpr(Node->getArrayRangeEnd(*D));
+        PrintExpr(Node->getArrayRangeEnd(D));
       }
       OS << "]";
     }
@@ -1612,7 +1734,7 @@ void StmtPrinter::VisitNoInitExpr(NoInitExpr *Node) {
 }
 
 void StmtPrinter::VisitImplicitValueInitExpr(ImplicitValueInitExpr *Node) {
-  if (Policy.LangOpts.CPlusPlus) {
+  if (Node->getType()->getAsCXXRecordDecl()) {
     OS << "/*implicit*/";
     Node->getType().print(OS, Policy);
     OS << "()";
@@ -1822,7 +1944,7 @@ void StmtPrinter::VisitUserDefinedLiteral(UserDefinedLiteral *Node) {
     if (Args->size() != 1) {
       OS << "operator\"\"" << Node->getUDSuffix()->getName();
       TemplateSpecializationType::PrintTemplateArgumentList(
-          OS, Args->data(), Args->size(), Policy);
+          OS, Args->asArray(), Policy);
       OS << "()";
       return;
     }
@@ -1952,7 +2074,9 @@ void StmtPrinter::VisitLambdaExpr(LambdaExpr *Node) {
     case LCK_This:
       OS << "this";
       break;
-
+    case LCK_StarThis:
+      OS << "*this";
+      break;
     case LCK_ByRef:
       if (Node->getCaptureDefault() != LCD_ByRef || Node->isInitCapture(C))
         OS << '&';
@@ -1975,7 +2099,7 @@ void StmtPrinter::VisitLambdaExpr(LambdaExpr *Node) {
     OS << " (";
     CXXMethodDecl *Method = Node->getCallOperator();
     NeedComma = false;
-    for (auto P : Method->params()) {
+    for (auto P : Method->parameters()) {
       if (NeedComma) {
         OS << ", ";
       } else {
@@ -2103,6 +2227,11 @@ void StmtPrinter::VisitCXXConstructExpr(CXXConstructExpr *E) {
     OS << "}";
 }
 
+void StmtPrinter::VisitCXXInheritedCtorInitExpr(CXXInheritedCtorInitExpr *E) {
+  // Parens are printed by the surrounding context.
+  OS << "<forwarded>";
+}
+
 void StmtPrinter::VisitCXXStdInitializerListExpr(CXXStdInitializerListExpr *E) {
   PrintExpr(E->getSubExpr());
 }
@@ -2140,7 +2269,7 @@ void StmtPrinter::VisitCXXDependentScopeMemberExpr(
   OS << Node->getMemberNameInfo();
   if (Node->hasExplicitTemplateArgs())
     TemplateSpecializationType::PrintTemplateArgumentList(
-        OS, Node->getTemplateArgs(), Node->getNumTemplateArgs(), Policy);
+        OS, Node->template_arguments(), Policy);
 }
 
 void StmtPrinter::VisitUnresolvedMemberExpr(UnresolvedMemberExpr *Node) {
@@ -2155,7 +2284,7 @@ void StmtPrinter::VisitUnresolvedMemberExpr(UnresolvedMemberExpr *Node) {
   OS << Node->getMemberNameInfo();
   if (Node->hasExplicitTemplateArgs())
     TemplateSpecializationType::PrintTemplateArgumentList(
-        OS, Node->getTemplateArgs(), Node->getNumTemplateArgs(), Policy);
+        OS, Node->template_arguments(), Policy);
 }
 
 static const char *getTypeTraitName(TypeTrait TT) {
@@ -2424,7 +2553,7 @@ void StmtPrinter::VisitOpaqueValueExpr(OpaqueValueExpr *Node) {
 
 void StmtPrinter::VisitTypoExpr(TypoExpr *Node) {
   // TODO: Print something reasonable for a TypoExpr, if necessary.
-  assert(false && "Cannot print TypoExpr nodes");
+  llvm_unreachable("Cannot print TypoExpr nodes");
 }
 
 void StmtPrinter::VisitAsTypeExpr(AsTypeExpr *Node) {
diff --git a/contrib/llvm/tools/clang/lib/AST/StmtProfile.cpp b/contrib/llvm/tools/clang/lib/AST/StmtProfile.cpp
index 175a43abbf61..0a39413853a0 100644
--- a/contrib/llvm/tools/clang/lib/AST/StmtProfile.cpp
+++ b/contrib/llvm/tools/clang/lib/AST/StmtProfile.cpp
@@ -69,6 +69,7 @@ namespace {
 }
 
 void StmtProfiler::VisitStmt(const Stmt *S) {
+  assert(S && "Requires non-null Stmt pointer");
   ID.AddInteger(S->getStmtClass());
   for (const Stmt *SubStmt : S->children()) {
     if (SubStmt)
@@ -268,8 +269,23 @@ public:
 #define OPENMP_CLAUSE(Name, Class)                                             \
   void Visit##Class(const Class *C);
 #include "clang/Basic/OpenMPKinds.def"
+  void VistOMPClauseWithPreInit(const OMPClauseWithPreInit *C);
+  void VistOMPClauseWithPostUpdate(const OMPClauseWithPostUpdate *C);
 };
 
+void OMPClauseProfiler::VistOMPClauseWithPreInit(
+    const OMPClauseWithPreInit *C) {
+  if (auto *S = C->getPreInitStmt())
+    Profiler->VisitStmt(S);
+}
+
+void OMPClauseProfiler::VistOMPClauseWithPostUpdate(
+    const OMPClauseWithPostUpdate *C) {
+  VistOMPClauseWithPreInit(C);
+  if (auto *E = C->getPostUpdateExpr())
+    Profiler->VisitStmt(E);
+}
+
 void OMPClauseProfiler::VisitOMPIfClause(const OMPIfClause *C) {
   if (C->getCondition())
     Profiler->VisitStmt(C->getCondition());
@@ -305,12 +321,9 @@ void OMPClauseProfiler::VisitOMPDefaultClause(const OMPDefaultClause *C) { }
 void OMPClauseProfiler::VisitOMPProcBindClause(const OMPProcBindClause *C) { }
 
 void OMPClauseProfiler::VisitOMPScheduleClause(const OMPScheduleClause *C) {
-  if (C->getChunkSize()) {
-    Profiler->VisitStmt(C->getChunkSize());
-    if (C->getHelperChunkSize()) {
-      Profiler->VisitStmt(C->getChunkSize());
-    }
-  }
+  VistOMPClauseWithPreInit(C);
+  if (auto *S = C->getChunkSize())
+    Profiler->VisitStmt(S);
 }
 
 void OMPClauseProfiler::VisitOMPOrderedClause(const OMPOrderedClause *C) {
@@ -343,37 +356,46 @@ void OMPClauseProfiler::VisitOMPNogroupClause(const OMPNogroupClause *) {}
 template<typename T>
 void OMPClauseProfiler::VisitOMPClauseList(T *Node) {
   for (auto *E : Node->varlists()) {
-    Profiler->VisitStmt(E);
+    if (E)
+      Profiler->VisitStmt(E);
   }
 }
 
 void OMPClauseProfiler::VisitOMPPrivateClause(const OMPPrivateClause *C) {
   VisitOMPClauseList(C);
   for (auto *E : C->private_copies()) {
-    Profiler->VisitStmt(E);
+    if (E)
+      Profiler->VisitStmt(E);
   }
 }
 void
 OMPClauseProfiler::VisitOMPFirstprivateClause(const OMPFirstprivateClause *C) {
   VisitOMPClauseList(C);
+  VistOMPClauseWithPreInit(C);
   for (auto *E : C->private_copies()) {
-    Profiler->VisitStmt(E);
+    if (E)
+      Profiler->VisitStmt(E);
   }
   for (auto *E : C->inits()) {
-    Profiler->VisitStmt(E);
+    if (E)
+      Profiler->VisitStmt(E);
   }
 }
 void
 OMPClauseProfiler::VisitOMPLastprivateClause(const OMPLastprivateClause *C) {
   VisitOMPClauseList(C);
+  VistOMPClauseWithPostUpdate(C);
   for (auto *E : C->source_exprs()) {
-    Profiler->VisitStmt(E);
+    if (E)
+      Profiler->VisitStmt(E);
   }
   for (auto *E : C->destination_exprs()) {
-    Profiler->VisitStmt(E);
+    if (E)
+      Profiler->VisitStmt(E);
   }
   for (auto *E : C->assignment_ops()) {
-    Profiler->VisitStmt(E);
+    if (E)
+      Profiler->VisitStmt(E);
   }
 }
 void OMPClauseProfiler::VisitOMPSharedClause(const OMPSharedClause *C) {
@@ -385,63 +407,82 @@ void OMPClauseProfiler::VisitOMPReductionClause(
       C->getQualifierLoc().getNestedNameSpecifier());
   Profiler->VisitName(C->getNameInfo().getName());
   VisitOMPClauseList(C);
+  VistOMPClauseWithPostUpdate(C);
   for (auto *E : C->privates()) {
-    Profiler->VisitStmt(E);
+    if (E)
+      Profiler->VisitStmt(E);
   }
   for (auto *E : C->lhs_exprs()) {
-    Profiler->VisitStmt(E);
+    if (E)
+      Profiler->VisitStmt(E);
   }
   for (auto *E : C->rhs_exprs()) {
-    Profiler->VisitStmt(E);
+    if (E)
+      Profiler->VisitStmt(E);
   }
   for (auto *E : C->reduction_ops()) {
-    Profiler->VisitStmt(E);
+    if (E)
+      Profiler->VisitStmt(E);
   }
 }
 void OMPClauseProfiler::VisitOMPLinearClause(const OMPLinearClause *C) {
   VisitOMPClauseList(C);
+  VistOMPClauseWithPostUpdate(C);
   for (auto *E : C->privates()) {
-    Profiler->VisitStmt(E);
+    if (E)
+      Profiler->VisitStmt(E);
   }
   for (auto *E : C->inits()) {
-    Profiler->VisitStmt(E);
+    if (E)
+      Profiler->VisitStmt(E);
   }
   for (auto *E : C->updates()) {
-    Profiler->VisitStmt(E);
+    if (E)
+      Profiler->VisitStmt(E);
   }
   for (auto *E : C->finals()) {
-    Profiler->VisitStmt(E);
+    if (E)
+      Profiler->VisitStmt(E);
   }
-  Profiler->VisitStmt(C->getStep());
-  Profiler->VisitStmt(C->getCalcStep());
+  if (C->getStep())
+    Profiler->VisitStmt(C->getStep());
+  if (C->getCalcStep())
+    Profiler->VisitStmt(C->getCalcStep());
 }
 void OMPClauseProfiler::VisitOMPAlignedClause(const OMPAlignedClause *C) {
   VisitOMPClauseList(C);
-  Profiler->VisitStmt(C->getAlignment());
+  if (C->getAlignment())
+    Profiler->VisitStmt(C->getAlignment());
 }
 void OMPClauseProfiler::VisitOMPCopyinClause(const OMPCopyinClause *C) {
   VisitOMPClauseList(C);
   for (auto *E : C->source_exprs()) {
-    Profiler->VisitStmt(E);
+    if (E)
+      Profiler->VisitStmt(E);
   }
   for (auto *E : C->destination_exprs()) {
-    Profiler->VisitStmt(E);
+    if (E)
+      Profiler->VisitStmt(E);
   }
   for (auto *E : C->assignment_ops()) {
-    Profiler->VisitStmt(E);
+    if (E)
+      Profiler->VisitStmt(E);
   }
 }
 void
 OMPClauseProfiler::VisitOMPCopyprivateClause(const OMPCopyprivateClause *C) {
   VisitOMPClauseList(C);
   for (auto *E : C->source_exprs()) {
-    Profiler->VisitStmt(E);
+    if (E)
+      Profiler->VisitStmt(E);
   }
   for (auto *E : C->destination_exprs()) {
-    Profiler->VisitStmt(E);
+    if (E)
+      Profiler->VisitStmt(E);
   }
   for (auto *E : C->assignment_ops()) {
-    Profiler->VisitStmt(E);
+    if (E)
+      Profiler->VisitStmt(E);
   }
 }
 void OMPClauseProfiler::VisitOMPFlushClause(const OMPFlushClause *C) {
@@ -451,29 +492,50 @@ void OMPClauseProfiler::VisitOMPDependClause(const OMPDependClause *C) {
   VisitOMPClauseList(C);
 }
 void OMPClauseProfiler::VisitOMPDeviceClause(const OMPDeviceClause *C) {
-  Profiler->VisitStmt(C->getDevice());
+  if (C->getDevice())
+    Profiler->VisitStmt(C->getDevice());
 }
 void OMPClauseProfiler::VisitOMPMapClause(const OMPMapClause *C) {
   VisitOMPClauseList(C);
 }
 void OMPClauseProfiler::VisitOMPNumTeamsClause(const OMPNumTeamsClause *C) {
-  Profiler->VisitStmt(C->getNumTeams());
+  if (C->getNumTeams())
+    Profiler->VisitStmt(C->getNumTeams());
 }
 void OMPClauseProfiler::VisitOMPThreadLimitClause(
     const OMPThreadLimitClause *C) {
-  Profiler->VisitStmt(C->getThreadLimit());
+  if (C->getThreadLimit())
+    Profiler->VisitStmt(C->getThreadLimit());
 }
 void OMPClauseProfiler::VisitOMPPriorityClause(const OMPPriorityClause *C) {
-  Profiler->VisitStmt(C->getPriority());
+  if (C->getPriority())
+    Profiler->VisitStmt(C->getPriority());
 }
 void OMPClauseProfiler::VisitOMPGrainsizeClause(const OMPGrainsizeClause *C) {
-  Profiler->VisitStmt(C->getGrainsize());
+  if (C->getGrainsize())
+    Profiler->VisitStmt(C->getGrainsize());
 }
 void OMPClauseProfiler::VisitOMPNumTasksClause(const OMPNumTasksClause *C) {
-  Profiler->VisitStmt(C->getNumTasks());
+  if (C->getNumTasks())
+    Profiler->VisitStmt(C->getNumTasks());
 }
 void OMPClauseProfiler::VisitOMPHintClause(const OMPHintClause *C) {
-  Profiler->VisitStmt(C->getHint());
+  if (C->getHint())
+    Profiler->VisitStmt(C->getHint());
+}
+void OMPClauseProfiler::VisitOMPToClause(const OMPToClause *C) {
+  VisitOMPClauseList(C);
+}
+void OMPClauseProfiler::VisitOMPFromClause(const OMPFromClause *C) {
+  VisitOMPClauseList(C);
+}
+void OMPClauseProfiler::VisitOMPUseDevicePtrClause(
+    const OMPUseDevicePtrClause *C) {
+  VisitOMPClauseList(C);
+}
+void OMPClauseProfiler::VisitOMPIsDevicePtrClause(
+    const OMPIsDevicePtrClause *C) {
+  VisitOMPClauseList(C);
 }
 }
 
@@ -584,6 +646,26 @@ void StmtProfiler::VisitOMPTargetDataDirective(const OMPTargetDataDirective *S)
   VisitOMPExecutableDirective(S);
 }
 
+void StmtProfiler::VisitOMPTargetEnterDataDirective(
+    const OMPTargetEnterDataDirective *S) {
+  VisitOMPExecutableDirective(S);
+}
+
+void StmtProfiler::VisitOMPTargetExitDataDirective(
+    const OMPTargetExitDataDirective *S) {
+  VisitOMPExecutableDirective(S);
+}
+
+void StmtProfiler::VisitOMPTargetParallelDirective(
+    const OMPTargetParallelDirective *S) {
+  VisitOMPExecutableDirective(S);
+}
+
+void StmtProfiler::VisitOMPTargetParallelForDirective(
+    const OMPTargetParallelForDirective *S) {
+  VisitOMPExecutableDirective(S);
+}
+
 void StmtProfiler::VisitOMPTeamsDirective(const OMPTeamsDirective *S) {
   VisitOMPExecutableDirective(S);
 }
@@ -611,6 +693,40 @@ void StmtProfiler::VisitOMPDistributeDirective(
   VisitOMPLoopDirective(S);
 }
 
+void OMPClauseProfiler::VisitOMPDistScheduleClause(
+    const OMPDistScheduleClause *C) {
+  VistOMPClauseWithPreInit(C);
+  if (auto *S = C->getChunkSize())
+    Profiler->VisitStmt(S);
+}
+
+void OMPClauseProfiler::VisitOMPDefaultmapClause(const OMPDefaultmapClause *) {}
+
+void StmtProfiler::VisitOMPTargetUpdateDirective(
+    const OMPTargetUpdateDirective *S) {
+  VisitOMPExecutableDirective(S);
+}
+
+void StmtProfiler::VisitOMPDistributeParallelForDirective(
+    const OMPDistributeParallelForDirective *S) {
+  VisitOMPLoopDirective(S);
+}
+
+void StmtProfiler::VisitOMPDistributeParallelForSimdDirective(
+    const OMPDistributeParallelForSimdDirective *S) {
+  VisitOMPLoopDirective(S);
+}
+
+void StmtProfiler::VisitOMPDistributeSimdDirective(
+    const OMPDistributeSimdDirective *S) {
+  VisitOMPLoopDirective(S);
+}
+
+void StmtProfiler::VisitOMPTargetParallelForSimdDirective(
+    const OMPTargetParallelForSimdDirective *S) {
+  VisitOMPLoopDirective(S);
+}
+
 void StmtProfiler::VisitExpr(const Expr *S) {
   VisitStmt(S);
 }
@@ -810,22 +926,20 @@ void StmtProfiler::VisitInitListExpr(const InitListExpr *S) {
 void StmtProfiler::VisitDesignatedInitExpr(const DesignatedInitExpr *S) {
   VisitExpr(S);
   ID.AddBoolean(S->usesGNUSyntax());
-  for (DesignatedInitExpr::const_designators_iterator D =
-         S->designators_begin(), DEnd = S->designators_end();
-       D != DEnd; ++D) {
-    if (D->isFieldDesignator()) {
+  for (const DesignatedInitExpr::Designator &D : S->designators()) {
+    if (D.isFieldDesignator()) {
       ID.AddInteger(0);
-      VisitName(D->getFieldName());
+      VisitName(D.getFieldName());
       continue;
     }
 
-    if (D->isArrayDesignator()) {
+    if (D.isArrayDesignator()) {
       ID.AddInteger(1);
     } else {
-      assert(D->isArrayRangeDesignator());
+      assert(D.isArrayRangeDesignator());
       ID.AddInteger(2);
     }
-    ID.AddInteger(D->getFirstExprIndex());
+    ID.AddInteger(D.getFirstExprIndex());
   }
 }
 
@@ -1196,6 +1310,12 @@ void StmtProfiler::VisitCXXConstructExpr(const CXXConstructExpr *S) {
   ID.AddBoolean(S->isElidable());
 }
 
+void StmtProfiler::VisitCXXInheritedCtorInitExpr(
+    const CXXInheritedCtorInitExpr *S) {
+  VisitExpr(S);
+  VisitDecl(S->getConstructor());
+}
+
 void StmtProfiler::VisitCXXFunctionalCastExpr(const CXXFunctionalCastExpr *S) {
   VisitExplicitCastExpr(S);
 }
@@ -1213,6 +1333,7 @@ StmtProfiler::VisitLambdaExpr(const LambdaExpr *S) {
        C != CEnd; ++C) {
     ID.AddInteger(C->getCaptureKind());
     switch (C->getCaptureKind()) {
+    case LCK_StarThis:
     case LCK_This:
       break;
     case LCK_ByRef:
@@ -1511,6 +1632,11 @@ void StmtProfiler::VisitObjCBridgedCastExpr(const ObjCBridgedCastExpr *S) {
   ID.AddBoolean(S->getBridgeKind());
 }
 
+void StmtProfiler::VisitObjCAvailabilityCheckExpr(
+    const ObjCAvailabilityCheckExpr *S) {
+  VisitExpr(S);
+}
+
 void StmtProfiler::VisitDecl(const Decl *D) {
   ID.AddInteger(D? D->getKind() : 0);
 
diff --git a/contrib/llvm/tools/clang/lib/AST/TemplateBase.cpp b/contrib/llvm/tools/clang/lib/AST/TemplateBase.cpp
index e9edb0df66df..b75ede862f7a 100644
--- a/contrib/llvm/tools/clang/lib/AST/TemplateBase.cpp
+++ b/contrib/llvm/tools/clang/lib/AST/TemplateBase.cpp
@@ -415,6 +415,15 @@ void TemplateArgument::print(const PrintingPolicy &Policy,
   }
 }
 
+void TemplateArgument::dump(raw_ostream &Out) const {
+  LangOptions LO; // FIXME! see also TemplateName::dump().
+  LO.CPlusPlus = true;
+  LO.Bool = true;
+  print(PrintingPolicy(LO), Out);
+}
+
+LLVM_DUMP_METHOD void TemplateArgument::dump() const { dump(llvm::errs()); }
+
 //===----------------------------------------------------------------------===//
 // TemplateArgumentLoc Implementation
 //===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/tools/clang/lib/AST/TemplateName.cpp b/contrib/llvm/tools/clang/lib/AST/TemplateName.cpp
index 47e0255d52ef..47a7d47e7a48 100644
--- a/contrib/llvm/tools/clang/lib/AST/TemplateName.cpp
+++ b/contrib/llvm/tools/clang/lib/AST/TemplateName.cpp
@@ -227,6 +227,6 @@ void TemplateName::dump(raw_ostream &OS) const {
   print(OS, PrintingPolicy(LO));
 }
 
-void TemplateName::dump() const {
+LLVM_DUMP_METHOD void TemplateName::dump() const {
   dump(llvm::errs());
 }
diff --git a/contrib/llvm/tools/clang/lib/AST/Type.cpp b/contrib/llvm/tools/clang/lib/AST/Type.cpp
index b467dac66b57..99b024701aa3 100644
--- a/contrib/llvm/tools/clang/lib/AST/Type.cpp
+++ b/contrib/llvm/tools/clang/lib/AST/Type.cpp
@@ -64,7 +64,7 @@ const IdentifierInfo* QualType::getBaseTypeIdentifier() const {
   return nullptr;
 }
 
-bool QualType::isConstant(QualType T, ASTContext &Ctx) {
+bool QualType::isConstant(QualType T, const ASTContext &Ctx) {
   if (T.isConstQualified())
     return true;
 
@@ -74,7 +74,7 @@ bool QualType::isConstant(QualType T, ASTContext &Ctx) {
   return T.getAddressSpace() == LangAS::opencl_constant;
 }
 
-unsigned ConstantArrayType::getNumAddressingBits(ASTContext &Context,
+unsigned ConstantArrayType::getNumAddressingBits(const ASTContext &Context,
                                                  QualType ElementType,
                                                const llvm::APInt &NumElements) {
   uint64_t ElementSize = Context.getTypeSizeInChars(ElementType).getQuantity();
@@ -109,7 +109,7 @@ unsigned ConstantArrayType::getNumAddressingBits(ASTContext &Context,
   return TotalSize.getActiveBits();
 }
 
-unsigned ConstantArrayType::getMaxSizeBits(ASTContext &Context) {
+unsigned ConstantArrayType::getMaxSizeBits(const ASTContext &Context) {
   unsigned Bits = Context.getTypeSize(Context.getSizeType());
   
   // Limit the number of bits in size_t so that maximal bit size fits 64 bit
@@ -1274,6 +1274,12 @@ QualType QualType::stripObjCKindOfType(const ASTContext &constCtx) const {
            });
 }
 
+QualType QualType::getAtomicUnqualifiedType() const {
+  if (auto AT = getTypePtr()->getAs<AtomicType>())
+    return AT->getValueType().getUnqualifiedType();
+  return getUnqualifiedType();
+}
+
 Optional<ArrayRef<QualType>> Type::getObjCSubstitutions(
                                const DeclContext *dc) const {
   // Look through method scopes.
@@ -1616,7 +1622,7 @@ bool Type::hasIntegerRepresentation() const {
 /// \param Ctx The context in which this type occurs.
 ///
 /// \returns true if the type is considered an integral type, false otherwise.
-bool Type::isIntegralType(ASTContext &Ctx) const {
+bool Type::isIntegralType(const ASTContext &Ctx) const {
   if (const BuiltinType *BT = dyn_cast<BuiltinType>(CanonicalType))
     return BT->getKind() >= BuiltinType::Bool &&
            BT->getKind() <= BuiltinType::Int128;
@@ -1777,7 +1783,7 @@ bool Type::hasUnsignedIntegerRepresentation() const {
 bool Type::isFloatingType() const {
   if (const BuiltinType *BT = dyn_cast<BuiltinType>(CanonicalType))
     return BT->getKind() >= BuiltinType::Half &&
-           BT->getKind() <= BuiltinType::LongDouble;
+           BT->getKind() <= BuiltinType::Float128;
   if (const ComplexType *CT = dyn_cast<ComplexType>(CanonicalType))
     return CT->getElementType()->isFloatingType();
   return false;
@@ -1799,7 +1805,7 @@ bool Type::isRealFloatingType() const {
 bool Type::isRealType() const {
   if (const BuiltinType *BT = dyn_cast<BuiltinType>(CanonicalType))
     return BT->getKind() >= BuiltinType::Bool &&
-           BT->getKind() <= BuiltinType::LongDouble;
+           BT->getKind() <= BuiltinType::Float128;
   if (const EnumType *ET = dyn_cast<EnumType>(CanonicalType))
       return ET->getDecl()->isComplete() && !ET->getDecl()->isScoped();
   return false;
@@ -1808,7 +1814,7 @@ bool Type::isRealType() const {
 bool Type::isArithmeticType() const {
   if (const BuiltinType *BT = dyn_cast<BuiltinType>(CanonicalType))
     return BT->getKind() >= BuiltinType::Bool &&
-           BT->getKind() <= BuiltinType::LongDouble;
+           BT->getKind() <= BuiltinType::Float128;
   if (const EnumType *ET = dyn_cast<EnumType>(CanonicalType))
     // GCC allows forward declaration of enum types (forbid by C99 6.7.2.3p2).
     // If a body isn't seen by the time we get here, return false.
@@ -1958,7 +1964,7 @@ bool Type::isIncompleteType(NamedDecl **Def) const {
   }
 }
 
-bool QualType::isPODType(ASTContext &Context) const {
+bool QualType::isPODType(const ASTContext &Context) const {
   // C++11 has a more relaxed definition of POD.
   if (Context.getLangOpts().CPlusPlus11)
     return isCXX11PODType(Context);
@@ -1966,7 +1972,7 @@ bool QualType::isPODType(ASTContext &Context) const {
   return isCXX98PODType(Context);
 }
 
-bool QualType::isCXX98PODType(ASTContext &Context) const {
+bool QualType::isCXX98PODType(const ASTContext &Context) const {
   // The compiler shouldn't query this for incomplete types, but the user might.
   // We return false for that case. Except for incomplete arrays of PODs, which
   // are PODs according to the standard.
@@ -2026,7 +2032,7 @@ bool QualType::isCXX98PODType(ASTContext &Context) const {
   }
 }
 
-bool QualType::isTrivialType(ASTContext &Context) const {
+bool QualType::isTrivialType(const ASTContext &Context) const {
   // The compiler shouldn't query this for incomplete types, but the user might.
   // We return false for that case. Except for incomplete arrays of PODs, which
   // are PODs according to the standard.
@@ -2089,7 +2095,7 @@ bool QualType::isTrivialType(ASTContext &Context) const {
   return false;
 }
 
-bool QualType::isTriviallyCopyableType(ASTContext &Context) const {
+bool QualType::isTriviallyCopyableType(const ASTContext &Context) const {
   if ((*this)->isArrayType())
     return Context.getBaseElementType(*this).isTriviallyCopyableType(Context);
 
@@ -2249,7 +2255,7 @@ bool Type::isStandardLayoutType() const {
 // This is effectively the intersection of isTrivialType and
 // isStandardLayoutType. We implement it directly to avoid redundant
 // conversions from a type to a CXXRecordDecl.
-bool QualType::isCXX11PODType(ASTContext &Context) const {
+bool QualType::isCXX11PODType(const ASTContext &Context) const {
   const Type *ty = getTypePtr();
   if (ty->isDependentType())
     return false;
@@ -2454,19 +2460,20 @@ StringRef TypeWithKeyword::getKeywordName(ElaboratedTypeKeyword Keyword) {
 DependentTemplateSpecializationType::DependentTemplateSpecializationType(
                          ElaboratedTypeKeyword Keyword,
                          NestedNameSpecifier *NNS, const IdentifierInfo *Name,
-                         unsigned NumArgs, const TemplateArgument *Args,
+                         ArrayRef<TemplateArgument> Args,
                          QualType Canon)
   : TypeWithKeyword(Keyword, DependentTemplateSpecialization, Canon, true, true,
                     /*VariablyModified=*/false,
                     NNS && NNS->containsUnexpandedParameterPack()),
-    NNS(NNS), Name(Name), NumArgs(NumArgs) {
+    NNS(NNS), Name(Name), NumArgs(Args.size()) {
   assert((!NNS || NNS->isDependent()) &&
          "DependentTemplateSpecializatonType requires dependent qualifier");
-  for (unsigned I = 0; I != NumArgs; ++I) {
-    if (Args[I].containsUnexpandedParameterPack())
+  TemplateArgument *ArgBuffer = getArgBuffer();
+  for (const TemplateArgument &Arg : Args) {
+    if (Arg.containsUnexpandedParameterPack())
       setContainsUnexpandedParameterPack();
 
-    new (&getArgBuffer()[I]) TemplateArgument(Args[I]);
+    new (ArgBuffer++) TemplateArgument(Arg);
   }
 }
 
@@ -2476,13 +2483,12 @@ DependentTemplateSpecializationType::Profile(llvm::FoldingSetNodeID &ID,
                                              ElaboratedTypeKeyword Keyword,
                                              NestedNameSpecifier *Qualifier,
                                              const IdentifierInfo *Name,
-                                             unsigned NumArgs,
-                                             const TemplateArgument *Args) {
+                                             ArrayRef<TemplateArgument> Args) {
   ID.AddInteger(Keyword);
   ID.AddPointer(Qualifier);
   ID.AddPointer(Name);
-  for (unsigned Idx = 0; Idx < NumArgs; ++Idx)
-    Args[Idx].Profile(ID, Context);
+  for (const TemplateArgument &Arg : Args)
+    Arg.Profile(ID, Context);
 }
 
 bool Type::isElaboratedTypeSpecifier() const {
@@ -2552,6 +2558,8 @@ StringRef BuiltinType::getName(const PrintingPolicy &Policy) const {
     return "double";
   case LongDouble:
     return "long double";
+  case Float128:
+    return "__float128";
   case WChar_S:
   case WChar_U:
     return Policy.MSWChar ? "__wchar_t" : "wchar_t";
@@ -2581,30 +2589,10 @@ StringRef BuiltinType::getName(const PrintingPolicy &Policy) const {
     return "Class";
   case ObjCSel:
     return "SEL";
-  case OCLImage1d:
-    return "image1d_t";
-  case OCLImage1dArray:
-    return "image1d_array_t";
-  case OCLImage1dBuffer:
-    return "image1d_buffer_t";
-  case OCLImage2d:
-    return "image2d_t";
-  case OCLImage2dArray:
-    return "image2d_array_t";
-  case OCLImage2dDepth:
-    return "image2d_depth_t";
-  case OCLImage2dArrayDepth:
-    return "image2d_array_depth_t";
-  case OCLImage2dMSAA:
-    return "image2d_msaa_t";
-  case OCLImage2dArrayMSAA:
-    return "image2d_array_msaa_t";
-  case OCLImage2dMSAADepth:
-    return "image2d_msaa_depth_t";
-  case OCLImage2dArrayMSAADepth:
-    return "image2d_array_msaa_depth_t";
-  case OCLImage3d:
-    return "image3d_t";
+#define IMAGE_TYPE(ImgType, Id, SingletonId, Access, Suffix) \
+  case Id: \
+    return "__" #Access " " #ImgType "_t";
+#include "clang/Basic/OpenCLImageTypes.def"
   case OCLSampler:
     return "sampler_t";
   case OCLEvent:
@@ -2654,7 +2642,10 @@ StringRef FunctionType::getNameForCallConv(CallingConv CC) {
   case CC_AAPCS_VFP: return "aapcs-vfp";
   case CC_IntelOclBicc: return "intel_ocl_bicc";
   case CC_SpirFunction: return "spir_function";
-  case CC_SpirKernel: return "spir_kernel";
+  case CC_OpenCLKernel: return "opencl_kernel";
+  case CC_Swift: return "swiftcall";
+  case CC_PreserveMost: return "preserve_most";
+  case CC_PreserveAll: return "preserve_all";
   }
 
   llvm_unreachable("Invalid calling convention.");
@@ -2671,7 +2662,7 @@ FunctionProtoType::FunctionProtoType(QualType result, ArrayRef<QualType> params,
       NumParams(params.size()),
       NumExceptions(epi.ExceptionSpec.Exceptions.size()),
       ExceptionSpecType(epi.ExceptionSpec.Type),
-      HasAnyConsumedParams(epi.ConsumedParameters != nullptr),
+      HasExtParameterInfos(epi.ExtParameterInfos != nullptr),
       Variadic(epi.Variadic), HasTrailingReturn(epi.HasTrailingReturn) {
   assert(NumParams == params.size() && "function has too many parameters");
 
@@ -2737,10 +2728,11 @@ FunctionProtoType::FunctionProtoType(QualType result, ArrayRef<QualType> params,
     slot[0] = epi.ExceptionSpec.SourceDecl;
   }
 
-  if (epi.ConsumedParameters) {
-    bool *consumedParams = const_cast<bool *>(getConsumedParamsBuffer());
+  if (epi.ExtParameterInfos) {
+    ExtParameterInfo *extParamInfos =
+      const_cast<ExtParameterInfo *>(getExtParameterInfosBuffer());
     for (unsigned i = 0; i != NumParams; ++i)
-      consumedParams[i] = epi.ConsumedParameters[i];
+      extParamInfos[i] = epi.ExtParameterInfos[i];
   }
 }
 
@@ -2860,9 +2852,9 @@ void FunctionProtoType::Profile(llvm::FoldingSetNodeID &ID, QualType Result,
              epi.ExceptionSpec.Type == EST_Unevaluated) {
     ID.AddPointer(epi.ExceptionSpec.SourceDecl->getCanonicalDecl());
   }
-  if (epi.ConsumedParameters) {
+  if (epi.ExtParameterInfos) {
     for (unsigned i = 0; i != NumParams; ++i)
-      ID.AddBoolean(epi.ConsumedParameters[i]);
+      ID.AddInteger(epi.ExtParameterInfos[i].getOpaqueValue());
   }
   epi.ExtInfo.Profile(ID);
   ID.AddBoolean(epi.HasTrailingReturn);
@@ -2931,6 +2923,24 @@ void DependentDecltypeType::Profile(llvm::FoldingSetNodeID &ID,
   E->Profile(ID, Context, true);
 }
 
+UnaryTransformType::UnaryTransformType(QualType BaseType,
+                                       QualType UnderlyingType,
+                                       UTTKind UKind,
+                                       QualType CanonicalType)
+  : Type(UnaryTransform, CanonicalType, BaseType->isDependentType(),
+         BaseType->isInstantiationDependentType(),
+         BaseType->isVariablyModifiedType(),
+         BaseType->containsUnexpandedParameterPack())
+  , BaseType(BaseType), UnderlyingType(UnderlyingType), UKind(UKind)
+{}
+
+DependentUnaryTransformType::DependentUnaryTransformType(const ASTContext &C,
+                                                         QualType BaseType,
+                                                         UTTKind UKind)
+   : UnaryTransformType(BaseType, C.DependentTy, UKind, QualType())
+{}
+
+
 TagType::TagType(TypeClass TC, const TagDecl *D, QualType can)
   : Type(TC, can, D->isDependentType(), 
          /*InstantiationDependent=*/D->isDependentType(),
@@ -2947,17 +2957,6 @@ static TagDecl *getInterestingTagDecl(TagDecl *decl) {
   return decl;
 }
 
-UnaryTransformType::UnaryTransformType(QualType BaseType,
-                                       QualType UnderlyingType,
-                                       UTTKind UKind,
-                                       QualType CanonicalType)
-  : Type(UnaryTransform, CanonicalType, UnderlyingType->isDependentType(),
-         UnderlyingType->isInstantiationDependentType(),
-         UnderlyingType->isVariablyModifiedType(),
-         BaseType->containsUnexpandedParameterPack())
-  , BaseType(BaseType), UnderlyingType(UnderlyingType), UKind(UKind)
-{}
-
 TagDecl *TagType::getDecl() const {
   return getInterestingTagDecl(decl);
 }
@@ -2994,8 +2993,11 @@ bool AttributedType::isQualifier() const {
   case AttributedType::attr_stdcall:
   case AttributedType::attr_thiscall:
   case AttributedType::attr_pascal:
+  case AttributedType::attr_swiftcall:
   case AttributedType::attr_vectorcall:
   case AttributedType::attr_inteloclbicc:
+  case AttributedType::attr_preserve_most:
+  case AttributedType::attr_preserve_all:
   case AttributedType::attr_ms_abi:
   case AttributedType::attr_sysv_abi:
   case AttributedType::attr_ptr32:
@@ -3047,11 +3049,14 @@ bool AttributedType::isCallingConv() const {
   case attr_fastcall:
   case attr_stdcall:
   case attr_thiscall:
+  case attr_swiftcall:
   case attr_vectorcall:
   case attr_pascal:
   case attr_ms_abi:
   case attr_sysv_abi:
   case attr_inteloclbicc:
+  case attr_preserve_most:
+  case attr_preserve_all:
     return true;
   }
   llvm_unreachable("invalid attr kind");
@@ -3095,20 +3100,20 @@ void SubstTemplateTypeParmPackType::Profile(llvm::FoldingSetNodeID &ID,
 bool TemplateSpecializationType::
 anyDependentTemplateArguments(const TemplateArgumentListInfo &Args,
                               bool &InstantiationDependent) {
-  return anyDependentTemplateArguments(Args.getArgumentArray(), Args.size(),
+  return anyDependentTemplateArguments(Args.arguments(),
                                        InstantiationDependent);
 }
 
 bool TemplateSpecializationType::
-anyDependentTemplateArguments(const TemplateArgumentLoc *Args, unsigned N,
+anyDependentTemplateArguments(ArrayRef<TemplateArgumentLoc> Args,
                               bool &InstantiationDependent) {
-  for (unsigned i = 0; i != N; ++i) {
-    if (Args[i].getArgument().isDependent()) {
+  for (const TemplateArgumentLoc &ArgLoc : Args) {
+    if (ArgLoc.getArgument().isDependent()) {
       InstantiationDependent = true;
       return true;
     }
-    
-    if (Args[i].getArgument().isInstantiationDependent())
+
+    if (ArgLoc.getArgument().isInstantiationDependent())
       InstantiationDependent = true;
   }
   return false;
@@ -3116,7 +3121,7 @@ anyDependentTemplateArguments(const TemplateArgumentLoc *Args, unsigned N,
 
 TemplateSpecializationType::
 TemplateSpecializationType(TemplateName T,
-                           const TemplateArgument *Args, unsigned NumArgs,
+                           ArrayRef<TemplateArgument> Args,
                            QualType Canon, QualType AliasedType)
   : Type(TemplateSpecialization,
          Canon.isNull()? QualType(this, 0) : Canon,
@@ -3124,7 +3129,7 @@ TemplateSpecializationType(TemplateName T,
          Canon.isNull()? true : Canon->isInstantiationDependentType(),
          false,
          T.containsUnexpandedParameterPack()),
-    Template(T), NumArgs(NumArgs), TypeAlias(!AliasedType.isNull()) {
+    Template(T), NumArgs(Args.size()), TypeAlias(!AliasedType.isNull()) {
   assert(!T.getAsDependentTemplateName() && 
          "Use DependentTemplateSpecializationType for dependent template-name");
   assert((T.getKind() == TemplateName::Template ||
@@ -3134,7 +3139,7 @@ TemplateSpecializationType(TemplateName T,
 
   TemplateArgument *TemplateArgs
     = reinterpret_cast<TemplateArgument *>(this + 1);
-  for (unsigned Arg = 0; Arg < NumArgs; ++Arg) {
+  for (const TemplateArgument &Arg : Args) {
     // Update instantiation-dependent and variably-modified bits.
     // If the canonical type exists and is non-dependent, the template
     // specialization type can be non-dependent even if one of the type
@@ -3143,14 +3148,14 @@ TemplateSpecializationType(TemplateName T,
     // U<T> is always non-dependent, irrespective of the type T.
     // However, U<Ts> contains an unexpanded parameter pack, even though
     // its expansion (and thus its desugared type) doesn't.
-    if (Args[Arg].isInstantiationDependent())
+    if (Arg.isInstantiationDependent())
       setInstantiationDependent();
-    if (Args[Arg].getKind() == TemplateArgument::Type &&
-        Args[Arg].getAsType()->isVariablyModifiedType())
+    if (Arg.getKind() == TemplateArgument::Type &&
+        Arg.getAsType()->isVariablyModifiedType())
       setVariablyModified();
-    if (Args[Arg].containsUnexpandedParameterPack())
+    if (Arg.containsUnexpandedParameterPack())
       setContainsUnexpandedParameterPack();
-    new (&TemplateArgs[Arg]) TemplateArgument(Args[Arg]);
+    new (TemplateArgs++) TemplateArgument(Arg);
   }
 
   // Store the aliased type if this is a type alias template specialization.
@@ -3163,12 +3168,11 @@ TemplateSpecializationType(TemplateName T,
 void
 TemplateSpecializationType::Profile(llvm::FoldingSetNodeID &ID,
                                     TemplateName T,
-                                    const TemplateArgument *Args,
-                                    unsigned NumArgs,
+                                    ArrayRef<TemplateArgument> Args,
                                     const ASTContext &Context) {
   T.Profile(ID);
-  for (unsigned Idx = 0; Idx < NumArgs; ++Idx)
-    Args[Idx].Profile(ID, Context);
+  for (const TemplateArgument &Arg : Args)
+    Arg.Profile(ID, Context);
 }
 
 QualType
@@ -3561,18 +3565,9 @@ bool Type::canHaveNullability() const {
     case BuiltinType::ObjCId:
     case BuiltinType::ObjCClass:
     case BuiltinType::ObjCSel:
-    case BuiltinType::OCLImage1d:
-    case BuiltinType::OCLImage1dArray:
-    case BuiltinType::OCLImage1dBuffer:
-    case BuiltinType::OCLImage2d:
-    case BuiltinType::OCLImage2dArray:
-    case BuiltinType::OCLImage2dDepth:
-    case BuiltinType::OCLImage2dArrayDepth:
-    case BuiltinType::OCLImage2dMSAA:
-    case BuiltinType::OCLImage2dArrayMSAA:
-    case BuiltinType::OCLImage2dMSAADepth:
-    case BuiltinType::OCLImage2dArrayMSAADepth:
-    case BuiltinType::OCLImage3d:
+#define IMAGE_TYPE(ImgType, Id, SingletonId, Access, Suffix) \
+    case BuiltinType::Id:
+#include "clang/Basic/OpenCLImageTypes.def"
     case BuiltinType::OCLSampler:
     case BuiltinType::OCLEvent:
     case BuiltinType::OCLClkEvent:
diff --git a/contrib/llvm/tools/clang/lib/AST/TypeLoc.cpp b/contrib/llvm/tools/clang/lib/AST/TypeLoc.cpp
index d08b07b2ccd6..78947d18f953 100644
--- a/contrib/llvm/tools/clang/lib/AST/TypeLoc.cpp
+++ b/contrib/llvm/tools/clang/lib/AST/TypeLoc.cpp
@@ -80,11 +80,11 @@ unsigned TypeLoc::getFullDataSizeForType(QualType Ty) {
   while (!TyLoc.isNull()) {
     unsigned Align = getLocalAlignmentForType(TyLoc.getType());
     MaxAlign = std::max(Align, MaxAlign);
-    Total = llvm::RoundUpToAlignment(Total, Align);
+    Total = llvm::alignTo(Total, Align);
     Total += TypeSizer().Visit(TyLoc);
     TyLoc = TyLoc.getNextTypeLoc();
   }
-  Total = llvm::RoundUpToAlignment(Total, MaxAlign);
+  Total = llvm::alignTo(Total, MaxAlign);
   return Total;
 }
 
@@ -149,12 +149,12 @@ void TypeLoc::copy(TypeLoc other) {
   // If both data pointers are aligned to the maximum alignment, we
   // can memcpy because getFullDataSize() accurately reflects the
   // layout of the data.
-  if (reinterpret_cast<uintptr_t>(Data)
-        == llvm::RoundUpToAlignment(reinterpret_cast<uintptr_t>(Data),
-                                    TypeLocMaxDataAlign) &&
-      reinterpret_cast<uintptr_t>(other.Data)
-        == llvm::RoundUpToAlignment(reinterpret_cast<uintptr_t>(other.Data),
-                                    TypeLocMaxDataAlign)) {
+  if (reinterpret_cast<uintptr_t>(Data) ==
+          llvm::alignTo(reinterpret_cast<uintptr_t>(Data),
+                        TypeLocMaxDataAlign) &&
+      reinterpret_cast<uintptr_t>(other.Data) ==
+          llvm::alignTo(reinterpret_cast<uintptr_t>(other.Data),
+                        TypeLocMaxDataAlign)) {
     memcpy(Data, other.Data, getFullDataSize());
     return;
   }
@@ -320,6 +320,7 @@ TypeSpecifierType BuiltinTypeLoc::getWrittenTypeSpec() const {
   case BuiltinType::Float:
   case BuiltinType::Double:
   case BuiltinType::LongDouble:
+  case BuiltinType::Float128:
     llvm_unreachable("Builtin type needs extra local data!");
     // Fall through, if the impossible happens.
       
@@ -333,18 +334,9 @@ TypeSpecifierType BuiltinTypeLoc::getWrittenTypeSpec() const {
   case BuiltinType::ObjCId:
   case BuiltinType::ObjCClass:
   case BuiltinType::ObjCSel:
-  case BuiltinType::OCLImage1d:
-  case BuiltinType::OCLImage1dArray:
-  case BuiltinType::OCLImage1dBuffer:
-  case BuiltinType::OCLImage2d:
-  case BuiltinType::OCLImage2dArray:
-  case BuiltinType::OCLImage2dDepth:
-  case BuiltinType::OCLImage2dArrayDepth:
-  case BuiltinType::OCLImage2dMSAA:
-  case BuiltinType::OCLImage2dArrayMSAA:
-  case BuiltinType::OCLImage2dMSAADepth:
-  case BuiltinType::OCLImage2dArrayMSAADepth:
-  case BuiltinType::OCLImage3d:
+#define IMAGE_TYPE(ImgType, Id, SingletonId, Access, Suffix) \
+  case BuiltinType::Id:
+#include "clang/Basic/OpenCLImageTypes.def"
   case BuiltinType::OCLSampler:
   case BuiltinType::OCLEvent:
   case BuiltinType::OCLClkEvent:
diff --git a/contrib/llvm/tools/clang/lib/AST/TypePrinter.cpp b/contrib/llvm/tools/clang/lib/AST/TypePrinter.cpp
index b202523bdaf3..065a2db09141 100644
--- a/contrib/llvm/tools/clang/lib/AST/TypePrinter.cpp
+++ b/contrib/llvm/tools/clang/lib/AST/TypePrinter.cpp
@@ -81,12 +81,14 @@ namespace {
   
   class TypePrinter {
     PrintingPolicy Policy;
+    unsigned Indentation;
     bool HasEmptyPlaceHolder;
     bool InsideCCAttribute;
 
   public:
-    explicit TypePrinter(const PrintingPolicy &Policy)
-      : Policy(Policy), HasEmptyPlaceHolder(false), InsideCCAttribute(false) { }
+    explicit TypePrinter(const PrintingPolicy &Policy, unsigned Indentation = 0)
+      : Policy(Policy), Indentation(Indentation),
+        HasEmptyPlaceHolder(false), InsideCCAttribute(false) { }
 
     void print(const Type *ty, Qualifiers qs, raw_ostream &OS,
                StringRef PlaceHolder);
@@ -110,7 +112,8 @@ namespace {
   };
 }
 
-static void AppendTypeQualList(raw_ostream &OS, unsigned TypeQuals, bool C99) {
+static void AppendTypeQualList(raw_ostream &OS, unsigned TypeQuals,
+                               bool HasRestrictKeyword) {
   bool appendSpace = false;
   if (TypeQuals & Qualifiers::Const) {
     OS << "const";
@@ -123,7 +126,7 @@ static void AppendTypeQualList(raw_ostream &OS, unsigned TypeQuals, bool C99) {
   }
   if (TypeQuals & Qualifiers::Restrict) {
     if (appendSpace) OS << ' ';
-    if (C99) {
+    if (HasRestrictKeyword) {
       OS << "restrict";
     } else {
       OS << "__restrict";
@@ -411,7 +414,7 @@ void TypePrinter::printMemberPointerBefore(const MemberPointerType *T,
     OS << '(';
 
   PrintingPolicy InnerPolicy(Policy);
-  InnerPolicy.SuppressTag = false;
+  InnerPolicy.IncludeTagDefinition = false;
   TypePrinter(InnerPolicy).print(QualType(T->getClass(), 0), OS, StringRef());
 
   OS << "::*";
@@ -437,7 +440,8 @@ void TypePrinter::printConstantArrayAfter(const ConstantArrayType *T,
                                           raw_ostream &OS) {
   OS << '[';
   if (T->getIndexTypeQualifiers().hasQualifiers()) {
-    AppendTypeQualList(OS, T->getIndexTypeCVRQualifiers(), Policy.LangOpts.C99);
+    AppendTypeQualList(OS, T->getIndexTypeCVRQualifiers(),
+                       Policy.Restrict);
     OS << ' ';
   }
 
@@ -470,7 +474,7 @@ void TypePrinter::printVariableArrayAfter(const VariableArrayType *T,
                                           raw_ostream &OS) {
   OS << '[';
   if (T->getIndexTypeQualifiers().hasQualifiers()) {
-    AppendTypeQualList(OS, T->getIndexTypeCVRQualifiers(), Policy.LangOpts.C99);
+    AppendTypeQualList(OS, T->getIndexTypeCVRQualifiers(), Policy.Restrict);
     OS << ' ';
   }
 
@@ -629,6 +633,20 @@ void TypePrinter::printFunctionProtoBefore(const FunctionProtoType *T,
   }
 }
 
+llvm::StringRef clang::getParameterABISpelling(ParameterABI ABI) {
+  switch (ABI) {
+  case ParameterABI::Ordinary:
+    llvm_unreachable("asking for spelling of ordinary parameter ABI");
+  case ParameterABI::SwiftContext:
+    return "swift_context";
+  case ParameterABI::SwiftErrorResult:
+    return "swift_error_result";
+  case ParameterABI::SwiftIndirectResult:
+    return "swift_indirect_result";
+  }
+  llvm_unreachable("bad parameter ABI kind");
+}
+
 void TypePrinter::printFunctionProtoAfter(const FunctionProtoType *T, 
                                           raw_ostream &OS) { 
   // If needed for precedence reasons, wrap the inner part in grouping parens.
@@ -641,6 +659,13 @@ void TypePrinter::printFunctionProtoAfter(const FunctionProtoType *T,
     ParamPolicyRAII ParamPolicy(Policy);
     for (unsigned i = 0, e = T->getNumParams(); i != e; ++i) {
       if (i) OS << ", ";
+
+      auto EPI = T->getExtParameterInfo(i);
+      if (EPI.isConsumed()) OS << "__attribute__((ns_consumed)) ";
+      auto ABI = EPI.getABI();
+      if (ABI != ParameterABI::Ordinary)
+        OS << "__attribute__((" << getParameterABISpelling(ABI) << ")) ";
+
       print(T->getParamType(i), OS, StringRef());
     }
   }
@@ -649,7 +674,7 @@ void TypePrinter::printFunctionProtoAfter(const FunctionProtoType *T,
     if (T->getNumParams())
       OS << ", ";
     OS << "...";
-  } else if (T->getNumParams() == 0 && !Policy.LangOpts.CPlusPlus) {
+  } else if (T->getNumParams() == 0 && Policy.UseVoidForZeroParams) {
     // Do not emit int() if we have a proto, emit 'int(void)'.
     OS << "void";
   }
@@ -700,9 +725,18 @@ void TypePrinter::printFunctionProtoAfter(const FunctionProtoType *T,
       OS << " __attribute__((sysv_abi))";
       break;
     case CC_SpirFunction:
-    case CC_SpirKernel:
+    case CC_OpenCLKernel:
       // Do nothing. These CCs are not available as attributes.
       break;
+    case CC_Swift:
+      OS << " __attribute__((swiftcall))";
+      break;
+    case CC_PreserveMost:
+      OS << " __attribute__((preserve_most))";
+      break;
+    case CC_PreserveAll:
+      OS << " __attribute__((preserve_all))";
+      break;
     }
   }
 
@@ -714,7 +748,7 @@ void TypePrinter::printFunctionProtoAfter(const FunctionProtoType *T,
 
   if (unsigned quals = T->getTypeQuals()) {
     OS << ' ';
-    AppendTypeQualList(OS, quals, Policy.LangOpts.C99);
+    AppendTypeQualList(OS, quals, Policy.Restrict);
   }
 
   switch (T->getRefQualifier()) {
@@ -863,7 +897,8 @@ void TypePrinter::printAtomicAfter(const AtomicType *T, raw_ostream &OS) { }
 void TypePrinter::printPipeBefore(const PipeType *T, raw_ostream &OS) {
   IncludeStrongLifetimeRAII Strong(Policy);
 
-  OS << "pipe";
+  OS << "pipe ";
+  print(T->getElementType(), OS, StringRef());
   spaceBeforePlaceHolder(OS);
 }
 
@@ -888,10 +923,8 @@ void TypePrinter::AppendScope(DeclContext *DC, raw_ostream &OS) {
     IncludeStrongLifetimeRAII Strong(Policy);
     OS << Spec->getIdentifier()->getName();
     const TemplateArgumentList &TemplateArgs = Spec->getTemplateArgs();
-    TemplateSpecializationType::PrintTemplateArgumentList(OS,
-                                            TemplateArgs.data(),
-                                            TemplateArgs.size(),
-                                            Policy);
+    TemplateSpecializationType::PrintTemplateArgumentList(
+        OS, TemplateArgs.asArray(), Policy);
     OS << "::";
   } else if (TagDecl *Tag = dyn_cast<TagDecl>(DC)) {
     if (TypedefNameDecl *Typedef = Tag->getTypedefNameForAnonDecl())
@@ -904,18 +937,19 @@ void TypePrinter::AppendScope(DeclContext *DC, raw_ostream &OS) {
 }
 
 void TypePrinter::printTag(TagDecl *D, raw_ostream &OS) {
-  if (Policy.SuppressTag)
+  if (Policy.IncludeTagDefinition) {
+    PrintingPolicy SubPolicy = Policy;
+    SubPolicy.IncludeTagDefinition = false;
+    D->print(OS, SubPolicy, Indentation);
+    spaceBeforePlaceHolder(OS);
     return;
+  }
 
   bool HasKindDecoration = false;
 
-  // bool SuppressTagKeyword
-  //   = Policy.LangOpts.CPlusPlus || Policy.SuppressTagKeyword;
-
   // We don't print tags unless this is an elaborated type.
   // In C, we just assume every RecordType is an elaborated type.
-  if (!(Policy.LangOpts.CPlusPlus || Policy.SuppressTagKeyword ||
-        D->getTypedefNameForAnonDecl())) {
+  if (!Policy.SuppressTagKeyword && !D->getTypedefNameForAnonDecl()) {
     HasKindDecoration = true;
     OS << D->getKindName();
     OS << ' ';
@@ -967,22 +1001,17 @@ void TypePrinter::printTag(TagDecl *D, raw_ostream &OS) {
   // arguments.
   if (ClassTemplateSpecializationDecl *Spec
         = dyn_cast<ClassTemplateSpecializationDecl>(D)) {
-    const TemplateArgument *Args;
-    unsigned NumArgs;
+    ArrayRef<TemplateArgument> Args;
     if (TypeSourceInfo *TAW = Spec->getTypeAsWritten()) {
       const TemplateSpecializationType *TST =
         cast<TemplateSpecializationType>(TAW->getType());
-      Args = TST->getArgs();
-      NumArgs = TST->getNumArgs();
+      Args = TST->template_arguments();
     } else {
       const TemplateArgumentList &TemplateArgs = Spec->getTemplateArgs();
-      Args = TemplateArgs.data();
-      NumArgs = TemplateArgs.size();
+      Args = TemplateArgs.asArray();
     }
     IncludeStrongLifetimeRAII Strong(Policy);
-    TemplateSpecializationType::PrintTemplateArgumentList(OS,
-                                                          Args, NumArgs,
-                                                          Policy);
+    TemplateSpecializationType::PrintTemplateArgumentList(OS, Args, Policy);
   }
 
   spaceBeforePlaceHolder(OS);
@@ -1040,11 +1069,9 @@ void TypePrinter::printTemplateSpecializationBefore(
                                             raw_ostream &OS) { 
   IncludeStrongLifetimeRAII Strong(Policy);
   T->getTemplateName().print(OS, Policy);
-  
-  TemplateSpecializationType::PrintTemplateArgumentList(OS,
-                                                        T->getArgs(), 
-                                                        T->getNumArgs(), 
-                                                        Policy);
+
+  TemplateSpecializationType::PrintTemplateArgumentList(
+      OS, T->template_arguments(), Policy);
   spaceBeforePlaceHolder(OS);
 }
 void TypePrinter::printTemplateSpecializationAfter(
@@ -1060,14 +1087,16 @@ void TypePrinter::printInjectedClassNameAfter(const InjectedClassNameType *T,
 
 void TypePrinter::printElaboratedBefore(const ElaboratedType *T,
                                         raw_ostream &OS) {
-  if (Policy.SuppressTag && isa<TagType>(T->getNamedType()))
-    return;
-  OS << TypeWithKeyword::getKeywordName(T->getKeyword());
-  if (T->getKeyword() != ETK_None)
-    OS << " ";
-  NestedNameSpecifier* Qualifier = T->getQualifier();
-  if (Qualifier)
-    Qualifier->print(OS, Policy);
+  // The tag definition will take care of these.
+  if (!Policy.IncludeTagDefinition)
+  {
+    OS << TypeWithKeyword::getKeywordName(T->getKeyword());
+    if (T->getKeyword() != ETK_None)
+      OS << " ";
+    NestedNameSpecifier* Qualifier = T->getQualifier();
+    if (Qualifier)
+      Qualifier->print(OS, Policy);
+  }
   
   ElaboratedTypePolicyRAII PolicyRAII(Policy);
   printBefore(T->getNamedType(), OS);
@@ -1119,8 +1148,7 @@ void TypePrinter::printDependentTemplateSpecializationBefore(
     T->getQualifier()->print(OS, Policy);    
   OS << T->getIdentifier()->getName();
   TemplateSpecializationType::PrintTemplateArgumentList(OS,
-                                                        T->getArgs(),
-                                                        T->getNumArgs(),
+                                                        T->template_arguments(),
                                                         Policy);
   spaceBeforePlaceHolder(OS);
 }
@@ -1305,6 +1333,7 @@ void TypePrinter::printAttributedAfter(const AttributedType *T,
   case AttributedType::attr_fastcall: OS << "fastcall"; break;
   case AttributedType::attr_stdcall: OS << "stdcall"; break;
   case AttributedType::attr_thiscall: OS << "thiscall"; break;
+  case AttributedType::attr_swiftcall: OS << "swiftcall"; break;
   case AttributedType::attr_vectorcall: OS << "vectorcall"; break;
   case AttributedType::attr_pascal: OS << "pascal"; break;
   case AttributedType::attr_ms_abi: OS << "ms_abi"; break;
@@ -1321,6 +1350,12 @@ void TypePrinter::printAttributedAfter(const AttributedType *T,
    break;
   }
   case AttributedType::attr_inteloclbicc: OS << "inteloclbicc"; break;
+  case AttributedType::attr_preserve_most:
+    OS << "preserve_most";
+    break;
+  case AttributedType::attr_preserve_all:
+    OS << "preserve_all";
+    break;
   }
   OS << "))";
 }
@@ -1400,50 +1435,46 @@ void TemplateSpecializationType::
                             const TemplateArgumentListInfo &Args,
                             const PrintingPolicy &Policy) {
   return PrintTemplateArgumentList(OS,
-                                   Args.getArgumentArray(),
-                                   Args.size(),
+                                   Args.arguments(),
                                    Policy);
 }
 
-void
-TemplateSpecializationType::PrintTemplateArgumentList(
-                                                raw_ostream &OS,
-                                                const TemplateArgument *Args,
-                                                unsigned NumArgs,
-                                                  const PrintingPolicy &Policy,
-                                                      bool SkipBrackets) {
+void TemplateSpecializationType::PrintTemplateArgumentList(
+    raw_ostream &OS, ArrayRef<TemplateArgument> Args,
+    const PrintingPolicy &Policy, bool SkipBrackets) {
   const char *Comma = Policy.MSVCFormatting ? "," : ", ";
   if (!SkipBrackets)
     OS << '<';
-  
+
   bool needSpace = false;
-  for (unsigned Arg = 0; Arg < NumArgs; ++Arg) {
+  bool FirstArg = true;
+  for (const TemplateArgument &Arg : Args) {
     // Print the argument into a string.
     SmallString<128> Buf;
     llvm::raw_svector_ostream ArgOS(Buf);
-    if (Args[Arg].getKind() == TemplateArgument::Pack) {
-      if (Args[Arg].pack_size() && Arg > 0)
+    if (Arg.getKind() == TemplateArgument::Pack) {
+      if (Arg.pack_size() && !FirstArg)
         OS << Comma;
       PrintTemplateArgumentList(ArgOS,
-                                Args[Arg].pack_begin(), 
-                                Args[Arg].pack_size(), 
+                                Arg.getPackAsArray(),
                                 Policy, true);
     } else {
-      if (Arg > 0)
+      if (!FirstArg)
         OS << Comma;
-      Args[Arg].print(Policy, ArgOS);
+      Arg.print(Policy, ArgOS);
     }
     StringRef ArgString = ArgOS.str();
 
     // If this is the first argument and its string representation
     // begins with the global scope specifier ('::foo'), add a space
     // to avoid printing the diagraph '<:'.
-    if (!Arg && !ArgString.empty() && ArgString[0] == ':')
+    if (FirstArg && !ArgString.empty() && ArgString[0] == ':')
       OS << ' ';
 
     OS << ArgString;
 
     needSpace = (!ArgString.empty() && ArgString.back() == '>');
+    FirstArg = false;
   }
 
   // If the last character of our string is '>', add another space to
@@ -1459,40 +1490,41 @@ TemplateSpecializationType::PrintTemplateArgumentList(
 // Sadly, repeat all that with TemplateArgLoc.
 void TemplateSpecializationType::
 PrintTemplateArgumentList(raw_ostream &OS,
-                          const TemplateArgumentLoc *Args, unsigned NumArgs,
+                          ArrayRef<TemplateArgumentLoc> Args,
                           const PrintingPolicy &Policy) {
   OS << '<';
   const char *Comma = Policy.MSVCFormatting ? "," : ", ";
 
   bool needSpace = false;
-  for (unsigned Arg = 0; Arg < NumArgs; ++Arg) {
-    if (Arg > 0)
+  bool FirstArg = true;
+  for (const TemplateArgumentLoc &Arg : Args) {
+    if (!FirstArg)
       OS << Comma;
-    
+
     // Print the argument into a string.
     SmallString<128> Buf;
     llvm::raw_svector_ostream ArgOS(Buf);
-    if (Args[Arg].getArgument().getKind() == TemplateArgument::Pack) {
+    if (Arg.getArgument().getKind() == TemplateArgument::Pack) {
       PrintTemplateArgumentList(ArgOS,
-                                Args[Arg].getArgument().pack_begin(), 
-                                Args[Arg].getArgument().pack_size(), 
+                                Arg.getArgument().getPackAsArray(),
                                 Policy, true);
     } else {
-      Args[Arg].getArgument().print(Policy, ArgOS);
+      Arg.getArgument().print(Policy, ArgOS);
     }
     StringRef ArgString = ArgOS.str();
-    
+
     // If this is the first argument and its string representation
     // begins with the global scope specifier ('::foo'), add a space
     // to avoid printing the diagraph '<:'.
-    if (!Arg && !ArgString.empty() && ArgString[0] == ':')
+    if (FirstArg && !ArgString.empty() && ArgString[0] == ':')
       OS << ' ';
 
     OS << ArgString;
 
     needSpace = (!ArgString.empty() && ArgString.back() == '>');
+    FirstArg = false;
   }
-  
+
   // If the last character of our string is '>', add another space to
   // keep the two '>''s separate tokens. We don't *have* to do this in
   // C++0x, but it's still good hygiene.
@@ -1543,7 +1575,13 @@ void Qualifiers::print(raw_ostream &OS, const PrintingPolicy& Policy,
 
   unsigned quals = getCVRQualifiers();
   if (quals) {
-    AppendTypeQualList(OS, quals, Policy.LangOpts.C99);
+    AppendTypeQualList(OS, quals, Policy.Restrict);
+    addSpace = true;
+  }
+  if (hasUnaligned()) {
+    if (addSpace)
+      OS << ' ';
+    OS << "__unaligned";
     addSpace = true;
   }
   if (unsigned addrspace = getAddressSpace()) {
@@ -1617,11 +1655,11 @@ std::string QualType::getAsString(const Type *ty, Qualifiers qs) {
 
 void QualType::print(const Type *ty, Qualifiers qs,
                      raw_ostream &OS, const PrintingPolicy &policy,
-                     const Twine &PlaceHolder) {
+                     const Twine &PlaceHolder, unsigned Indentation) {
   SmallString<128> PHBuf;
   StringRef PH = PlaceHolder.toStringRef(PHBuf);
 
-  TypePrinter(policy).print(ty, qs, OS, PH);
+  TypePrinter(policy, Indentation).print(ty, qs, OS, PH);
 }
 
 void QualType::getAsStringInternal(const Type *ty, Qualifiers qs,
diff --git a/contrib/llvm/tools/clang/lib/AST/VTableBuilder.cpp b/contrib/llvm/tools/clang/lib/AST/VTableBuilder.cpp
index bae018652f91..640fbf47aeab 100644
--- a/contrib/llvm/tools/clang/lib/AST/VTableBuilder.cpp
+++ b/contrib/llvm/tools/clang/lib/AST/VTableBuilder.cpp
@@ -2416,7 +2416,7 @@ private:
   MethodVFTableLocationsTy MethodVFTableLocations;
 
   /// \brief Does this class have an RTTI component?
-  bool HasRTTIComponent;
+  bool HasRTTIComponent = false;
 
   /// MethodInfo - Contains information about a method in a vtable.
   /// (Used for computing 'this' pointer adjustment thunks.
@@ -2545,12 +2545,13 @@ public:
         MostDerivedClassLayout(Context.getASTRecordLayout(MostDerivedClass)),
         WhichVFPtr(*Which),
         Overriders(MostDerivedClass, CharUnits(), MostDerivedClass) {
-    // Only include the RTTI component if we know that we will provide a
-    // definition of the vftable.
-    HasRTTIComponent = Context.getLangOpts().RTTIData &&
-                       !MostDerivedClass->hasAttr<DLLImportAttr>() &&
-                       MostDerivedClass->getTemplateSpecializationKind() !=
-                           TSK_ExplicitInstantiationDeclaration;
+    // Provide the RTTI component if RTTIData is enabled. If the vftable would
+    // be available externally, we should not provide the RTTI componenent. It
+    // is currently impossible to get available externally vftables with either
+    // dllimport or extern template instantiations, but eventually we may add a
+    // flag to support additional devirtualization that needs this.
+    if (Context.getLangOpts().RTTIData)
+      HasRTTIComponent = true;
 
     LayoutVFTable();
 
diff --git a/contrib/llvm/tools/clang/lib/ASTMatchers/ASTMatchFinder.cpp b/contrib/llvm/tools/clang/lib/ASTMatchers/ASTMatchFinder.cpp
index 847398c0861c..19e5743ea1cb 100644
--- a/contrib/llvm/tools/clang/lib/ASTMatchers/ASTMatchFinder.cpp
+++ b/contrib/llvm/tools/clang/lib/ASTMatchers/ASTMatchFinder.cpp
@@ -616,6 +616,10 @@ private:
         ActiveASTContext->getTranslationUnitDecl())
       return false;
 
+    // For AST-nodes that don't have an identity, we can't memoize.
+    if (!Builder->isComparable())
+      return matchesAncestorOfRecursively(Node, Matcher, Builder, MatchMode);
+
     MatchKey Key;
     Key.MatcherID = Matcher.getID();
     Key.Node = Node;
@@ -630,22 +634,34 @@ private:
     }
 
     MemoizedMatchResult Result;
-    Result.ResultOfMatch = false;
     Result.Nodes = *Builder;
+    Result.ResultOfMatch =
+        matchesAncestorOfRecursively(Node, Matcher, &Result.Nodes, MatchMode);
+
+    MemoizedMatchResult &CachedResult = ResultCache[Key];
+    CachedResult = std::move(Result);
 
+    *Builder = CachedResult.Nodes;
+    return CachedResult.ResultOfMatch;
+  }
+
+  bool matchesAncestorOfRecursively(const ast_type_traits::DynTypedNode &Node,
+                                    const DynTypedMatcher &Matcher,
+                                    BoundNodesTreeBuilder *Builder,
+                                    AncestorMatchMode MatchMode) {
     const auto &Parents = ActiveASTContext->getParents(Node);
     assert(!Parents.empty() && "Found node that is not in the parent map.");
     if (Parents.size() == 1) {
       // Only one parent - do recursive memoization.
       const ast_type_traits::DynTypedNode Parent = Parents[0];
-      if (Matcher.matches(Parent, this, &Result.Nodes)) {
-        Result.ResultOfMatch = true;
-      } else if (MatchMode != ASTMatchFinder::AMM_ParentOnly) {
-        // Reset the results to not include the bound nodes from the failed
-        // match above.
-        Result.Nodes = *Builder;
-        Result.ResultOfMatch = memoizedMatchesAncestorOfRecursively(
-            Parent, Matcher, &Result.Nodes, MatchMode);
+      BoundNodesTreeBuilder BuilderCopy = *Builder;
+      if (Matcher.matches(Parent, this, &BuilderCopy)) {
+        *Builder = std::move(BuilderCopy);
+        return true;
+      }
+      if (MatchMode != ASTMatchFinder::AMM_ParentOnly) {
+        return memoizedMatchesAncestorOfRecursively(Parent, Matcher, Builder,
+                                                    MatchMode);
         // Once we get back from the recursive call, the result will be the
         // same as the parent's result.
       }
@@ -655,10 +671,10 @@ private:
       std::deque<ast_type_traits::DynTypedNode> Queue(Parents.begin(),
                                                       Parents.end());
       while (!Queue.empty()) {
-        Result.Nodes = *Builder;
-        if (Matcher.matches(Queue.front(), this, &Result.Nodes)) {
-          Result.ResultOfMatch = true;
-          break;
+        BoundNodesTreeBuilder BuilderCopy = *Builder;
+        if (Matcher.matches(Queue.front(), this, &BuilderCopy)) {
+          *Builder = std::move(BuilderCopy);
+          return true;
         }
         if (MatchMode != ASTMatchFinder::AMM_ParentOnly) {
           for (const auto &Parent :
@@ -673,12 +689,7 @@ private:
         Queue.pop_front();
       }
     }
-
-    MemoizedMatchResult &CachedResult = ResultCache[Key];
-    CachedResult = std::move(Result);
-
-    *Builder = CachedResult.Nodes;
-    return CachedResult.ResultOfMatch;
+    return false;
   }
 
   // Implements a BoundNodesTree::Visitor that calls a MatchCallback with
@@ -701,7 +712,7 @@ private:
 
   // Returns true if 'TypeNode' has an alias that matches the given matcher.
   bool typeHasMatchingAlias(const Type *TypeNode,
-                            const Matcher<NamedDecl> Matcher,
+                            const Matcher<NamedDecl> &Matcher,
                             BoundNodesTreeBuilder *Builder) {
     const Type *const CanonicalType =
       ActiveASTContext->getCanonicalType(TypeNode);
@@ -744,46 +755,25 @@ private:
   MemoizationMap ResultCache;
 };
 
-static CXXRecordDecl *getAsCXXRecordDecl(const Type *TypeNode) {
-  // Type::getAs<...>() drills through typedefs.
-  if (TypeNode->getAs<DependentNameType>() != nullptr ||
-      TypeNode->getAs<DependentTemplateSpecializationType>() != nullptr ||
-      TypeNode->getAs<TemplateTypeParmType>() != nullptr)
-    // Dependent names and template TypeNode parameters will be matched when
-    // the template is instantiated.
-    return nullptr;
-  TemplateSpecializationType const *TemplateType =
-      TypeNode->getAs<TemplateSpecializationType>();
-  if (!TemplateType) {
-    return TypeNode->getAsCXXRecordDecl();
-  }
-  if (TemplateType->getTemplateName().isDependent())
-    // Dependent template specializations will be matched when the
-    // template is instantiated.
-    return nullptr;
-
-  // For template specialization types which are specializing a template
-  // declaration which is an explicit or partial specialization of another
-  // template declaration, getAsCXXRecordDecl() returns the corresponding
-  // ClassTemplateSpecializationDecl.
-  //
-  // For template specialization types which are specializing a template
-  // declaration which is neither an explicit nor partial specialization of
-  // another template declaration, getAsCXXRecordDecl() returns NULL and
-  // we get the CXXRecordDecl of the templated declaration.
-  CXXRecordDecl *SpecializationDecl = TemplateType->getAsCXXRecordDecl();
-  if (SpecializationDecl) {
-    return SpecializationDecl;
-  }
-  NamedDecl *Templated =
-      TemplateType->getTemplateName().getAsTemplateDecl()->getTemplatedDecl();
-  if (CXXRecordDecl *TemplatedRecord = dyn_cast<CXXRecordDecl>(Templated)) {
-    return TemplatedRecord;
-  }
-  // Now it can still be that we have an alias template.
-  TypeAliasDecl *AliasDecl = dyn_cast<TypeAliasDecl>(Templated);
-  assert(AliasDecl);
-  return getAsCXXRecordDecl(AliasDecl->getUnderlyingType().getTypePtr());
+static CXXRecordDecl *
+getAsCXXRecordDeclOrPrimaryTemplate(const Type *TypeNode) {
+  if (auto *RD = TypeNode->getAsCXXRecordDecl())
+    return RD;
+
+  // Find the innermost TemplateSpecializationType that isn't an alias template.
+  auto *TemplateType = TypeNode->getAs<TemplateSpecializationType>();
+  while (TemplateType && TemplateType->isTypeAlias())
+    TemplateType =
+        TemplateType->getAliasedType()->getAs<TemplateSpecializationType>();
+
+  // If this is the name of a (dependent) template specialization, use the
+  // definition of the template, even though it might be specialized later.
+  if (TemplateType)
+    if (auto *ClassTemplate = dyn_cast_or_null<ClassTemplateDecl>(
+          TemplateType->getTemplateName().getAsTemplateDecl()))
+      return ClassTemplate->getTemplatedDecl();
+
+  return nullptr;
 }
 
 // Returns true if the given class is directly or indirectly derived
@@ -800,7 +790,10 @@ bool MatchASTVisitor::classIsDerivedFrom(const CXXRecordDecl *Declaration,
     if (typeHasMatchingAlias(TypeNode, Base, Builder))
       return true;
 
-    CXXRecordDecl *ClassDecl = getAsCXXRecordDecl(TypeNode);
+    // FIXME: Going to the primary template here isn't really correct, but
+    // unfortunately we accept a Decl matcher for the base class not a Type
+    // matcher, so it's the best thing we can do with our current interface.
+    CXXRecordDecl *ClassDecl = getAsCXXRecordDeclOrPrimaryTemplate(TypeNode);
     if (!ClassDecl)
       continue;
     if (ClassDecl == Declaration) {
diff --git a/contrib/llvm/tools/clang/lib/ASTMatchers/ASTMatchersInternal.cpp b/contrib/llvm/tools/clang/lib/ASTMatchers/ASTMatchersInternal.cpp
index 463cf0ba9df6..107052ef1ded 100644
--- a/contrib/llvm/tools/clang/lib/ASTMatchers/ASTMatchersInternal.cpp
+++ b/contrib/llvm/tools/clang/lib/ASTMatchers/ASTMatchersInternal.cpp
@@ -14,6 +14,7 @@
 #include "clang/ASTMatchers/ASTMatchers.h"
 #include "clang/ASTMatchers/ASTMatchersInternal.h"
 #include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/Support/ManagedStatic.h"
 
 namespace clang {
@@ -293,50 +294,212 @@ bool AnyOfVariadicOperator(const ast_type_traits::DynTypedNode &DynNode,
   return false;
 }
 
-HasNameMatcher::HasNameMatcher(StringRef NameRef)
-    : UseUnqualifiedMatch(NameRef.find("::") == NameRef.npos), Name(NameRef) {
-  assert(!Name.empty());
+Matcher<NamedDecl> hasAnyNameFunc(ArrayRef<const StringRef *> NameRefs) {
+  std::vector<std::string> Names;
+  for (auto *Name : NameRefs)
+    Names.emplace_back(*Name);
+  return internal::Matcher<NamedDecl>(
+      new internal::HasNameMatcher(std::move(Names)));
 }
 
-bool HasNameMatcher::matchesNodeUnqualified(const NamedDecl &Node) const {
-  assert(UseUnqualifiedMatch);
-  if (Node.getIdentifier()) {
-    // Simple name.
-    return Name == Node.getName();
+HasNameMatcher::HasNameMatcher(std::vector<std::string> N)
+    : UseUnqualifiedMatch(std::all_of(
+          N.begin(), N.end(),
+          [](StringRef Name) { return Name.find("::") == Name.npos; })),
+      Names(std::move(N)) {
+#ifndef NDEBUG
+  for (StringRef Name : Names)
+    assert(!Name.empty());
+#endif
+}
+
+namespace {
+
+bool consumeNameSuffix(StringRef &FullName, StringRef Suffix) {
+  StringRef Name = FullName;
+  if (!Name.endswith(Suffix))
+    return false;
+  Name = Name.drop_back(Suffix.size());
+  if (!Name.empty()) {
+    if (!Name.endswith("::"))
+      return false;
+    Name = Name.drop_back(2);
   }
+  FullName = Name;
+  return true;
+}
+
+StringRef getNodeName(const NamedDecl &Node, llvm::SmallString<128> &Scratch) {
+  // Simple name.
+  if (Node.getIdentifier())
+    return Node.getName();
+
   if (Node.getDeclName()) {
     // Name needs to be constructed.
-    llvm::SmallString<128> NodeName;
-    llvm::raw_svector_ostream OS(NodeName);
+    Scratch.clear();
+    llvm::raw_svector_ostream OS(Scratch);
     Node.printName(OS);
-    return Name == OS.str();
+    return OS.str();
   }
-  return false;
+
+  return "(anonymous)";
+}
+
+StringRef getNodeName(const RecordDecl &Node, llvm::SmallString<128> &Scratch) {
+  if (Node.getIdentifier()) {
+    return Node.getName();
+  }
+  Scratch.clear();
+  return ("(anonymous " + Node.getKindName() + ")").toStringRef(Scratch);
+}
+
+StringRef getNodeName(const NamespaceDecl &Node,
+                      llvm::SmallString<128> &Scratch) {
+  return Node.isAnonymousNamespace() ? "(anonymous namespace)" : Node.getName();
+}
+
+
+class PatternSet {
+public:
+  PatternSet(ArrayRef<std::string> Names) {
+    for (StringRef Name : Names)
+      Patterns.push_back({Name, Name.startswith("::")});
+  }
+
+  /// Consumes the name suffix from each pattern in the set and removes the ones
+  /// that didn't match.
+  /// Return true if there are still any patterns left.
+  bool consumeNameSuffix(StringRef NodeName, bool CanSkip) {
+    for (size_t I = 0; I < Patterns.size();) {
+      if (internal::consumeNameSuffix(Patterns[I].P, NodeName) ||
+          CanSkip) {
+        ++I;
+      } else {
+        Patterns.erase(Patterns.begin() + I);
+      }
+    }
+    return !Patterns.empty();
+  }
+
+  /// Check if any of the patterns are a match.
+  /// A match will be a pattern that was fully consumed, that also matches the
+  /// 'fully qualified' requirement.
+  bool foundMatch(bool AllowFullyQualified) const {
+    for (auto& P: Patterns)
+      if (P.P.empty() && (AllowFullyQualified || !P.IsFullyQualified))
+        return true;
+    return false;
+  }
+
+private:
+  struct Pattern {
+    StringRef P;
+    bool IsFullyQualified;
+  };
+  llvm::SmallVector<Pattern, 8> Patterns;
+};
+
+}  // namespace
+
+bool HasNameMatcher::matchesNodeUnqualified(const NamedDecl &Node) const {
+  assert(UseUnqualifiedMatch);
+  llvm::SmallString<128> Scratch;
+  StringRef NodeName = getNodeName(Node, Scratch);
+  return std::any_of(Names.begin(), Names.end(), [&](StringRef Name) {
+    return consumeNameSuffix(Name, NodeName) && Name.empty();
+  });
+}
+
+bool HasNameMatcher::matchesNodeFullFast(const NamedDecl &Node) const {
+  PatternSet Patterns(Names);
+  llvm::SmallString<128> Scratch;
+
+  // This function is copied and adapted from NamedDecl::printQualifiedName()
+  // By matching each part individually we optimize in a couple of ways:
+  //  - We can exit early on the first failure.
+  //  - We can skip inline/anonymous namespaces without another pass.
+  //  - We print one name at a time, reducing the chance of overflowing the
+  //    inlined space of the SmallString.
+
+  // First, match the name.
+  if (!Patterns.consumeNameSuffix(getNodeName(Node, Scratch),
+                                  /*CanSkip=*/false))
+    return false;
+
+  // Try to match each declaration context.
+  // We are allowed to skip anonymous and inline namespaces if they don't match.
+  const DeclContext *Ctx = Node.getDeclContext();
+
+  if (Ctx->isFunctionOrMethod())
+    return Patterns.foundMatch(/*AllowFullyQualified=*/false);
+
+  for (; Ctx && isa<NamedDecl>(Ctx); Ctx = Ctx->getParent()) {
+    if (Patterns.foundMatch(/*AllowFullyQualified=*/false))
+      return true;
+
+    if (const auto *ND = dyn_cast<NamespaceDecl>(Ctx)) {
+      // If it matches (or we can skip it), continue.
+      if (Patterns.consumeNameSuffix(getNodeName(*ND, Scratch),
+                                     /*CanSkip=*/ND->isAnonymousNamespace() ||
+                                         ND->isInline()))
+        continue;
+      return false;
+    }
+    if (const auto *RD = dyn_cast<RecordDecl>(Ctx)) {
+      if (!isa<ClassTemplateSpecializationDecl>(Ctx)) {
+        if (Patterns.consumeNameSuffix(getNodeName(*RD, Scratch),
+                                       /*CanSkip=*/false))
+          continue;
+
+        return false;
+      }
+    }
+
+    // We don't know how to deal with this DeclContext.
+    // Fallback to the slow version of the code.
+    return matchesNodeFullSlow(Node);
+  }
+
+  return Patterns.foundMatch(/*AllowFullyQualified=*/true);
 }
 
-bool HasNameMatcher::matchesNodeFull(const NamedDecl &Node) const {
-  llvm::SmallString<128> NodeName = StringRef("::");
-  llvm::raw_svector_ostream OS(NodeName);
-  Node.printQualifiedName(OS);
-  const StringRef FullName = OS.str();
-  const StringRef Pattern = Name;
+bool HasNameMatcher::matchesNodeFullSlow(const NamedDecl &Node) const {
+  const bool SkipUnwrittenCases[] = {false, true};
+  for (bool SkipUnwritten : SkipUnwrittenCases) {
+    llvm::SmallString<128> NodeName = StringRef("::");
+    llvm::raw_svector_ostream OS(NodeName);
+
+    if (SkipUnwritten) {
+      PrintingPolicy Policy = Node.getASTContext().getPrintingPolicy();
+      Policy.SuppressUnwrittenScope = true;
+      Node.printQualifiedName(OS, Policy);
+    } else {
+      Node.printQualifiedName(OS);
+    }
+
+    const StringRef FullName = OS.str();
 
-  if (Pattern.startswith("::"))
-    return FullName == Pattern;
+    for (const StringRef Pattern : Names) {
+      if (Pattern.startswith("::")) {
+        if (FullName == Pattern)
+          return true;
+      } else if (FullName.endswith(Pattern) &&
+                 FullName.drop_back(Pattern.size()).endswith("::")) {
+        return true;
+      }
+    }
+  }
 
-  return FullName.endswith(Pattern) &&
-         FullName.drop_back(Pattern.size()).endswith("::");
+  return false;
 }
 
 bool HasNameMatcher::matchesNode(const NamedDecl &Node) const {
-  // FIXME: There is still room for improvement, but it would require copying a
-  // lot of the logic from NamedDecl::printQualifiedName(). The benchmarks do
-  // not show like that extra complexity is needed right now.
+  assert(matchesNodeFullFast(Node) == matchesNodeFullSlow(Node));
   if (UseUnqualifiedMatch) {
-    assert(matchesNodeUnqualified(Node) == matchesNodeFull(Node));
+    assert(matchesNodeUnqualified(Node) == matchesNodeFullFast(Node));
     return matchesNodeUnqualified(Node);
   }
-  return matchesNodeFull(Node);
+  return matchesNodeFullFast(Node);
 }
 
 } // end namespace internal
diff --git a/contrib/llvm/tools/clang/lib/ASTMatchers/Dynamic/Marshallers.h b/contrib/llvm/tools/clang/lib/ASTMatchers/Dynamic/Marshallers.h
index 64d6b7814aeb..7b1a30702633 100644
--- a/contrib/llvm/tools/clang/lib/ASTMatchers/Dynamic/Marshallers.h
+++ b/contrib/llvm/tools/clang/lib/ASTMatchers/Dynamic/Marshallers.h
@@ -1,4 +1,4 @@
-//===--- Marshallers.h - Generic matcher function marshallers -*- C++ -*-===//
+//===--- Marshallers.h - Generic matcher function marshallers ---*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -32,7 +32,6 @@ namespace ast_matchers {
 namespace dynamic {
 namespace internal {
 
-
 /// \brief Helper template class to just from argument type to the right is/get
 ///   functions in VariantValue.
 /// Used to verify and extract the matcher arguments below.
@@ -97,6 +96,28 @@ public:
   }
 };
 
+template <> struct ArgTypeTraits<clang::CastKind> {
+private:
+  static clang::CastKind getCastKind(llvm::StringRef AttrKind) {
+    return llvm::StringSwitch<clang::CastKind>(AttrKind)
+#define CAST_OPERATION(Name) .Case( #Name, CK_##Name)
+#include "clang/AST/OperationKinds.def"
+        .Default(CK_Invalid);
+  }
+
+public:
+  static bool is(const VariantValue &Value) {
+    return Value.isString() &&  
+        getCastKind(Value.getString()) != CK_Invalid;
+  }
+  static clang::CastKind get(const VariantValue &Value) {
+    return getCastKind(Value.getString());
+  }
+  static ArgKind getKind() {
+    return ArgKind(ArgKind::AK_String);
+  }
+};
+
 /// \brief Matcher descriptor interface.
 ///
 /// Provides a \c create() method that constructs the matcher from the provided
@@ -234,7 +255,7 @@ static VariantMatcher outvalueToVariantMatcher(const DynTypedMatcher &Matcher) {
 template <typename T>
 static VariantMatcher outvalueToVariantMatcher(const T &PolyMatcher,
                                                typename T::ReturnTypes * =
-                                                   NULL) {
+                                                   nullptr) {
   std::vector<DynTypedMatcher> Matchers;
   mergePolyMatchers(PolyMatcher, Matchers, typename T::ReturnTypes());
   VariantMatcher Out = VariantMatcher::PolymorphicMatcher(std::move(Matchers));
@@ -326,8 +347,9 @@ public:
 
   template <typename ResultT, typename ArgT,
             ResultT (*F)(ArrayRef<const ArgT *>)>
-  VariadicFuncMatcherDescriptor(llvm::VariadicFunction<ResultT, ArgT, F> Func,
-                          StringRef MatcherName)
+  VariadicFuncMatcherDescriptor(
+      ast_matchers::internal::VariadicFunction<ResultT, ArgT, F> Func,
+      StringRef MatcherName)
       : Func(&variadicMatcherDescriptor<ResultT, ArgT, F>),
         MatcherName(MatcherName.str()),
         ArgsKind(ArgTypeTraits<ArgT>::getKind()) {
@@ -410,7 +432,6 @@ private:
     return VariantMatcher();                                                   \
   }
 
-
 /// \brief 0-arg marshaller function.
 template <typename ReturnType>
 static VariantMatcher matcherMarshall0(void (*Func)(), StringRef MatcherName,
@@ -657,9 +678,9 @@ MatcherDescriptor *makeMatcherAutoMarshall(ReturnType (*Func)(ArgType1, ArgType2
 /// \brief Variadic overload.
 template <typename ResultT, typename ArgT,
           ResultT (*Func)(ArrayRef<const ArgT *>)>
-MatcherDescriptor *
-makeMatcherAutoMarshall(llvm::VariadicFunction<ResultT, ArgT, Func> VarFunc,
-                        StringRef MatcherName) {
+MatcherDescriptor *makeMatcherAutoMarshall(
+    ast_matchers::internal::VariadicFunction<ResultT, ArgT, Func> VarFunc,
+    StringRef MatcherName) {
   return new VariadicFuncMatcherDescriptor(VarFunc, MatcherName);
 }
 
@@ -708,9 +729,9 @@ makeMatcherAutoMarshall(ast_matchers::internal::VariadicOperatorMatcherFunc<
                                                MatcherName);
 }
 
-}  // namespace internal
-}  // namespace dynamic
-}  // namespace ast_matchers
-}  // namespace clang
+} // namespace internal
+} // namespace dynamic
+} // namespace ast_matchers
+} // namespace clang
 
-#endif  // LLVM_CLANG_AST_MATCHERS_DYNAMIC_MARSHALLERS_H
+#endif // LLVM_CLANG_AST_MATCHERS_DYNAMIC_MARSHALLERS_H
diff --git a/contrib/llvm/tools/clang/lib/ASTMatchers/Dynamic/Registry.cpp b/contrib/llvm/tools/clang/lib/ASTMatchers/Dynamic/Registry.cpp
index 5b1c5529aa47..a8d4b88d8580 100644
--- a/contrib/llvm/tools/clang/lib/ASTMatchers/Dynamic/Registry.cpp
+++ b/contrib/llvm/tools/clang/lib/ASTMatchers/Dynamic/Registry.cpp
@@ -95,6 +95,7 @@ RegistryMaps::RegistryMaps() {
   REGISTER_OVERLOADED_2(thisPointerType);
 
   REGISTER_MATCHER(accessSpecDecl);
+  REGISTER_MATCHER(addrLabelExpr);
   REGISTER_MATCHER(alignOfExpr);
   REGISTER_MATCHER(allOf);
   REGISTER_MATCHER(anyOf);
@@ -104,9 +105,11 @@ RegistryMaps::RegistryMaps() {
   REGISTER_MATCHER(arrayType);
   REGISTER_MATCHER(asmStmt);
   REGISTER_MATCHER(asString);
+  REGISTER_MATCHER(atomicExpr);
   REGISTER_MATCHER(atomicType);
   REGISTER_MATCHER(autoType);
   REGISTER_MATCHER(binaryOperator);
+  REGISTER_MATCHER(binaryConditionalOperator);
   REGISTER_MATCHER(blockPointerType);
   REGISTER_MATCHER(booleanType);
   REGISTER_MATCHER(breakStmt);
@@ -161,11 +164,14 @@ RegistryMaps::RegistryMaps() {
   REGISTER_MATCHER(declStmt);
   REGISTER_MATCHER(defaultStmt);
   REGISTER_MATCHER(dependentSizedArrayType);
+  REGISTER_MATCHER(designatedInitExpr);
+  REGISTER_MATCHER(designatorCountIs);
   REGISTER_MATCHER(doStmt);
   REGISTER_MATCHER(eachOf);
   REGISTER_MATCHER(elaboratedType);
   REGISTER_MATCHER(enumConstantDecl);
   REGISTER_MATCHER(enumDecl);
+  REGISTER_MATCHER(enumType);
   REGISTER_MATCHER(equalsBoundNode);
   REGISTER_MATCHER(equalsIntegralValue);
   REGISTER_MATCHER(explicitCastExpr);
@@ -174,20 +180,25 @@ RegistryMaps::RegistryMaps() {
   REGISTER_MATCHER(fieldDecl);
   REGISTER_MATCHER(floatLiteral);
   REGISTER_MATCHER(forEach);
+  REGISTER_MATCHER(forEachArgumentWithParam);
   REGISTER_MATCHER(forEachConstructorInitializer);
   REGISTER_MATCHER(forEachDescendant);
   REGISTER_MATCHER(forEachSwitchCase);
   REGISTER_MATCHER(forField);
+  REGISTER_MATCHER(forFunction);
   REGISTER_MATCHER(forStmt);
   REGISTER_MATCHER(friendDecl);
   REGISTER_MATCHER(functionDecl);
+  REGISTER_MATCHER(functionProtoType);
   REGISTER_MATCHER(functionTemplateDecl);
   REGISTER_MATCHER(functionType);
+  REGISTER_MATCHER(gnuNullExpr);
   REGISTER_MATCHER(gotoStmt);
   REGISTER_MATCHER(has);
   REGISTER_MATCHER(hasAncestor);
   REGISTER_MATCHER(hasAnyArgument);
   REGISTER_MATCHER(hasAnyConstructorInitializer);
+  REGISTER_MATCHER(hasAnyName);
   REGISTER_MATCHER(hasAnyParameter);
   REGISTER_MATCHER(hasAnySubstatement);
   REGISTER_MATCHER(hasAnyTemplateArgument);
@@ -197,9 +208,11 @@ RegistryMaps::RegistryMaps() {
   REGISTER_MATCHER(hasAttr);
   REGISTER_MATCHER(hasAutomaticStorageDuration);
   REGISTER_MATCHER(hasBase);
+  REGISTER_MATCHER(hasBitWidth);
   REGISTER_MATCHER(hasBody);
   REGISTER_MATCHER(hasCanonicalType);
   REGISTER_MATCHER(hasCaseConstant);
+  REGISTER_MATCHER(hasCastKind);
   REGISTER_MATCHER(hasCondition);
   REGISTER_MATCHER(hasConditionVariableStatement);
   REGISTER_MATCHER(hasDecayedType);
@@ -208,6 +221,7 @@ RegistryMaps::RegistryMaps() {
   REGISTER_MATCHER(hasDeducedType);
   REGISTER_MATCHER(hasDescendant);
   REGISTER_MATCHER(hasDestinationType);
+  REGISTER_MATCHER(hasDynamicExceptionSpec);
   REGISTER_MATCHER(hasEitherOperand);
   REGISTER_MATCHER(hasElementType);
   REGISTER_MATCHER(hasElse);
@@ -234,6 +248,7 @@ RegistryMaps::RegistryMaps() {
   REGISTER_MATCHER(hasQualifier);
   REGISTER_MATCHER(hasRangeInit);
   REGISTER_MATCHER(hasReceiverType);
+  REGISTER_MATCHER(hasReturnValue);
   REGISTER_MATCHER(hasRHS);
   REGISTER_MATCHER(hasSelector);
   REGISTER_MATCHER(hasSingleDecl);
@@ -241,6 +256,7 @@ RegistryMaps::RegistryMaps() {
   REGISTER_MATCHER(hasSizeExpr);
   REGISTER_MATCHER(hasSourceExpression);
   REGISTER_MATCHER(hasStaticStorageDuration);
+  REGISTER_MATCHER(hasSyntacticForm);
   REGISTER_MATCHER(hasTargetDecl);
   REGISTER_MATCHER(hasTemplateArgument);
   REGISTER_MATCHER(hasThen);
@@ -251,24 +267,32 @@ RegistryMaps::RegistryMaps() {
   REGISTER_MATCHER(hasUnarySelector);
   REGISTER_MATCHER(hasValueType);
   REGISTER_MATCHER(ifStmt);
+  REGISTER_MATCHER(ignoringImplicit);
   REGISTER_MATCHER(ignoringImpCasts);
   REGISTER_MATCHER(ignoringParenCasts);
   REGISTER_MATCHER(ignoringParenImpCasts);
+  REGISTER_MATCHER(ignoringParens);
   REGISTER_MATCHER(implicitCastExpr);
+  REGISTER_MATCHER(implicitValueInitExpr);
   REGISTER_MATCHER(incompleteArrayType);
   REGISTER_MATCHER(initListExpr);
   REGISTER_MATCHER(injectedClassNameType);
   REGISTER_MATCHER(innerType);
   REGISTER_MATCHER(integerLiteral);
   REGISTER_MATCHER(isAnonymous);
+  REGISTER_MATCHER(isAnyCharacter);
+  REGISTER_MATCHER(isAnyPointer);
   REGISTER_MATCHER(isArrow);
   REGISTER_MATCHER(isBaseInitializer);
+  REGISTER_MATCHER(isBitField);
   REGISTER_MATCHER(isCatchAll);
   REGISTER_MATCHER(isClass);
   REGISTER_MATCHER(isConst);
   REGISTER_MATCHER(isConstQualified);
+  REGISTER_MATCHER(isCopyAssignmentOperator);
   REGISTER_MATCHER(isCopyConstructor);
   REGISTER_MATCHER(isDefaultConstructor);
+  REGISTER_MATCHER(isDefaulted);
   REGISTER_MATCHER(isDefinition);
   REGISTER_MATCHER(isDeleted);
   REGISTER_MATCHER(isExceptionVariable);
@@ -286,8 +310,10 @@ RegistryMaps::RegistryMaps() {
   REGISTER_MATCHER(isInteger);
   REGISTER_MATCHER(isIntegral);
   REGISTER_MATCHER(isInTemplateInstantiation);
+  REGISTER_MATCHER(isLambda);
   REGISTER_MATCHER(isListInitialization);
   REGISTER_MATCHER(isMemberInitializer);
+  REGISTER_MATCHER(isMoveAssignmentOperator);
   REGISTER_MATCHER(isMoveConstructor);
   REGISTER_MATCHER(isNoThrow);
   REGISTER_MATCHER(isOverride);
@@ -295,13 +321,17 @@ RegistryMaps::RegistryMaps() {
   REGISTER_MATCHER(isProtected);
   REGISTER_MATCHER(isPublic);
   REGISTER_MATCHER(isPure);
+  REGISTER_MATCHER(isSignedInteger);
   REGISTER_MATCHER(isStruct);
   REGISTER_MATCHER(isTemplateInstantiation);
   REGISTER_MATCHER(isUnion);
+  REGISTER_MATCHER(isUnsignedInteger);
   REGISTER_MATCHER(isVariadic);
   REGISTER_MATCHER(isVirtual);
+  REGISTER_MATCHER(isVirtualAsWritten);
   REGISTER_MATCHER(isVolatileQualified);
   REGISTER_MATCHER(isWritten);
+  REGISTER_MATCHER(labelDecl);
   REGISTER_MATCHER(labelStmt);
   REGISTER_MATCHER(lambdaExpr);
   REGISTER_MATCHER(lValueReferenceType);
@@ -317,6 +347,7 @@ RegistryMaps::RegistryMaps() {
   REGISTER_MATCHER(namesType);
   REGISTER_MATCHER(nestedNameSpecifier);
   REGISTER_MATCHER(nestedNameSpecifierLoc);
+  REGISTER_MATCHER(nullPointerConstant);
   REGISTER_MATCHER(nullStmt);
   REGISTER_MATCHER(numSelectorArgs);
   REGISTER_MATCHER(ofClass);
@@ -325,18 +356,24 @@ RegistryMaps::RegistryMaps() {
   REGISTER_MATCHER(objcObjectPointerType);
   REGISTER_MATCHER(on);
   REGISTER_MATCHER(onImplicitObjectArgument);
+  REGISTER_MATCHER(opaqueValueExpr);
   REGISTER_MATCHER(parameterCountIs);
+  REGISTER_MATCHER(parenExpr);
+  REGISTER_MATCHER(parenListExpr);
   REGISTER_MATCHER(parenType);
   REGISTER_MATCHER(parmVarDecl);
   REGISTER_MATCHER(pointee);
   REGISTER_MATCHER(pointerType);
+  REGISTER_MATCHER(predefinedExpr);
   REGISTER_MATCHER(qualType);
+  REGISTER_MATCHER(realFloatingPointType);
   REGISTER_MATCHER(recordDecl);
   REGISTER_MATCHER(recordType);
   REGISTER_MATCHER(referenceType);
   REGISTER_MATCHER(refersToDeclaration);
   REGISTER_MATCHER(refersToIntegralType);
   REGISTER_MATCHER(refersToType);
+  REGISTER_MATCHER(requiresZeroInitialization);
   REGISTER_MATCHER(returns);
   REGISTER_MATCHER(returnStmt);
   REGISTER_MATCHER(rValueReferenceType);
@@ -347,6 +384,7 @@ RegistryMaps::RegistryMaps() {
   REGISTER_MATCHER(statementCountIs);
   REGISTER_MATCHER(staticAssertDecl);
   REGISTER_MATCHER(stmt);
+  REGISTER_MATCHER(stmtExpr);
   REGISTER_MATCHER(stringLiteral);
   REGISTER_MATCHER(substNonTypeTemplateParmExpr);
   REGISTER_MATCHER(substTemplateTypeParmType);
@@ -361,12 +399,15 @@ RegistryMaps::RegistryMaps() {
   REGISTER_MATCHER(translationUnitDecl);
   REGISTER_MATCHER(type);
   REGISTER_MATCHER(typedefDecl);
+  REGISTER_MATCHER(typedefNameDecl);
   REGISTER_MATCHER(typedefType);
+  REGISTER_MATCHER(typeAliasDecl);
   REGISTER_MATCHER(typeLoc);
   REGISTER_MATCHER(unaryExprOrTypeTraitExpr);
   REGISTER_MATCHER(unaryOperator);
   REGISTER_MATCHER(unaryTransformType);
   REGISTER_MATCHER(unless);
+  REGISTER_MATCHER(unresolvedLookupExpr);
   REGISTER_MATCHER(unresolvedUsingTypenameDecl);
   REGISTER_MATCHER(unresolvedUsingValueDecl);
   REGISTER_MATCHER(userDefinedLiteral);
diff --git a/contrib/llvm/tools/clang/lib/Analysis/AnalysisDeclContext.cpp b/contrib/llvm/tools/clang/lib/Analysis/AnalysisDeclContext.cpp
index 52c7f2613654..6bbe8f86d48e 100644
--- a/contrib/llvm/tools/clang/lib/Analysis/AnalysisDeclContext.cpp
+++ b/contrib/llvm/tools/clang/lib/Analysis/AnalysisDeclContext.cpp
@@ -94,19 +94,25 @@ Stmt *AnalysisDeclContext::getBody(bool &IsAutosynthesized) const {
   IsAutosynthesized = false;
   if (const FunctionDecl *FD = dyn_cast<FunctionDecl>(D)) {
     Stmt *Body = FD->getBody();
-    if (!Body && Manager && Manager->synthesizeBodies()) {
-      Body = getBodyFarm(getASTContext(), Manager->Injector.get()).getBody(FD);
-      if (Body)
+    if (Manager && Manager->synthesizeBodies()) {
+      Stmt *SynthesizedBody =
+          getBodyFarm(getASTContext(), Manager->Injector.get()).getBody(FD);
+      if (SynthesizedBody) {
+        Body = SynthesizedBody;
         IsAutosynthesized = true;
+      }
     }
     return Body;
   }
   else if (const ObjCMethodDecl *MD = dyn_cast<ObjCMethodDecl>(D)) {
     Stmt *Body = MD->getBody();
-    if (!Body && Manager && Manager->synthesizeBodies()) {
-      Body = getBodyFarm(getASTContext(), Manager->Injector.get()).getBody(MD);
-      if (Body)
+    if (Manager && Manager->synthesizeBodies()) {
+      Stmt *SynthesizedBody =
+          getBodyFarm(getASTContext(), Manager->Injector.get()).getBody(MD);
+      if (SynthesizedBody) {
+        Body = SynthesizedBody;
         IsAutosynthesized = true;
+      }
     }
     return Body;
   } else if (const BlockDecl *BD = dyn_cast<BlockDecl>(D))
@@ -135,6 +141,10 @@ bool AnalysisDeclContext::isBodyAutosynthesizedFromModelFile() const {
   return Tmp && Body->getLocStart().isValid();
 }
 
+/// Returns true if \param VD is an Objective-C implicit 'self' parameter.
+static bool isSelfDecl(const VarDecl *VD) {
+  return isa<ImplicitParamDecl>(VD) && VD->getName() == "self";
+}
 
 const ImplicitParamDecl *AnalysisDeclContext::getSelfDecl() const {
   if (const ObjCMethodDecl *MD = dyn_cast<ObjCMethodDecl>(D))
@@ -143,7 +153,7 @@ const ImplicitParamDecl *AnalysisDeclContext::getSelfDecl() const {
     // See if 'self' was captured by the block.
     for (const auto &I : BD->captures()) {
       const VarDecl *VD = I.getVariable();
-      if (VD->getName() == "self")
+      if (isSelfDecl(VD))
         return dyn_cast<ImplicitParamDecl>(VD);
     }    
   }
@@ -161,7 +171,7 @@ const ImplicitParamDecl *AnalysisDeclContext::getSelfDecl() const {
       continue;
 
     VarDecl *VD = LC.getCapturedVar();
-    if (VD->getName() == "self")
+    if (isSelfDecl(VD))
       return dyn_cast<ImplicitParamDecl>(VD);
   }
 
@@ -317,6 +327,21 @@ AnalysisDeclContext::getBlockInvocationContext(const LocationContext *parent,
                                                                BD, ContextData);
 }
 
+bool AnalysisDeclContext::isInStdNamespace(const Decl *D) {
+  const DeclContext *DC = D->getDeclContext()->getEnclosingNamespaceContext();
+  const NamespaceDecl *ND = dyn_cast<NamespaceDecl>(DC);
+  if (!ND)
+    return false;
+
+  while (const DeclContext *Parent = ND->getParent()) {
+    if (!isa<NamespaceDecl>(Parent))
+      break;
+    ND = cast<NamespaceDecl>(Parent);
+  }
+
+  return ND->isStdNamespace();
+}
+
 LocationContextManager & AnalysisDeclContext::getLocationContextManager() {
   assert(Manager &&
          "Cannot create LocationContexts without an AnalysisDeclContextManager!");
diff --git a/contrib/llvm/tools/clang/lib/Analysis/BodyFarm.cpp b/contrib/llvm/tools/clang/lib/Analysis/BodyFarm.cpp
index 09904369ba9c..d202a0406461 100644
--- a/contrib/llvm/tools/clang/lib/Analysis/BodyFarm.cpp
+++ b/contrib/llvm/tools/clang/lib/Analysis/BodyFarm.cpp
@@ -239,7 +239,8 @@ static Stmt *create_dispatch_once(ASTContext &C, const FunctionDecl *D) {
                                            SourceLocation());
   
   // (5) Create the 'if' statement.
-  IfStmt *If = new (C) IfStmt(C, SourceLocation(), nullptr, UO, CS);
+  IfStmt *If = new (C) IfStmt(C, SourceLocation(), false, nullptr, nullptr,
+                              UO, CS);
   return If;
 }
 
@@ -342,9 +343,8 @@ static Stmt *create_OSAtomicCompareAndSwap(ASTContext &C, const FunctionDecl *D)
   Stmt *Else = M.makeReturn(RetVal);
   
   /// Construct the If.
-  Stmt *If =
-    new (C) IfStmt(C, SourceLocation(), nullptr, Comparison, Body,
-                   SourceLocation(), Else);
+  Stmt *If = new (C) IfStmt(C, SourceLocation(), false, nullptr, nullptr,
+                            Comparison, Body, SourceLocation(), Else);
 
   return If;  
 }
@@ -383,10 +383,49 @@ Stmt *BodyFarm::getBody(const FunctionDecl *D) {
   return Val.getValue();
 }
 
+static const ObjCIvarDecl *findBackingIvar(const ObjCPropertyDecl *Prop) {
+  const ObjCIvarDecl *IVar = Prop->getPropertyIvarDecl();
+
+  if (IVar)
+    return IVar;
+
+  // When a readonly property is shadowed in a class extensions with a
+  // a readwrite property, the instance variable belongs to the shadowing
+  // property rather than the shadowed property. If there is no instance
+  // variable on a readonly property, check to see whether the property is
+  // shadowed and if so try to get the instance variable from shadowing
+  // property.
+  if (!Prop->isReadOnly())
+    return nullptr;
+
+  auto *Container = cast<ObjCContainerDecl>(Prop->getDeclContext());
+  const ObjCInterfaceDecl *PrimaryInterface = nullptr;
+  if (auto *InterfaceDecl = dyn_cast<ObjCInterfaceDecl>(Container)) {
+    PrimaryInterface = InterfaceDecl;
+  } else if (auto *CategoryDecl = dyn_cast<ObjCCategoryDecl>(Container)) {
+    PrimaryInterface = CategoryDecl->getClassInterface();
+  } else if (auto *ImplDecl = dyn_cast<ObjCImplDecl>(Container)) {
+    PrimaryInterface = ImplDecl->getClassInterface();
+  } else {
+    return nullptr;
+  }
+
+  // FindPropertyVisibleInPrimaryClass() looks first in class extensions, so it
+  // is guaranteed to find the shadowing property, if it exists, rather than
+  // the shadowed property.
+  auto *ShadowingProp = PrimaryInterface->FindPropertyVisibleInPrimaryClass(
+      Prop->getIdentifier(), Prop->getQueryKind());
+  if (ShadowingProp && ShadowingProp != Prop) {
+    IVar = ShadowingProp->getPropertyIvarDecl();
+  }
+
+  return IVar;
+}
+
 static Stmt *createObjCPropertyGetter(ASTContext &Ctx,
                                       const ObjCPropertyDecl *Prop) {
   // First, find the backing ivar.
-  const ObjCIvarDecl *IVar = Prop->getPropertyIvarDecl();
+  const ObjCIvarDecl *IVar = findBackingIvar(Prop);
   if (!IVar)
     return nullptr;
 
@@ -459,6 +498,14 @@ Stmt *BodyFarm::getBody(const ObjCMethodDecl *D) {
     return nullptr;
 
   // For now, we only synthesize getters.
+  // Synthesizing setters would cause false negatives in the
+  // RetainCountChecker because the method body would bind the parameter
+  // to an instance variable, causing it to escape. This would prevent
+  // warning in the following common scenario:
+  //
+  //  id foo = [[NSObject alloc] init];
+  //  self.foo = foo; // We should warn that foo leaks here.
+  //
   if (D->param_size() != 0)
     return nullptr;
 
diff --git a/contrib/llvm/tools/clang/lib/Analysis/BodyFarm.h b/contrib/llvm/tools/clang/lib/Analysis/BodyFarm.h
index 91379437231d..edbe99624651 100644
--- a/contrib/llvm/tools/clang/lib/Analysis/BodyFarm.h
+++ b/contrib/llvm/tools/clang/lib/Analysis/BodyFarm.h
@@ -15,6 +15,7 @@
 #ifndef LLVM_CLANG_LIB_ANALYSIS_BODYFARM_H
 #define LLVM_CLANG_LIB_ANALYSIS_BODYFARM_H
 
+#include "clang/AST/DeclBase.h"
 #include "clang/Basic/LLVM.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/Optional.h"
@@ -22,7 +23,6 @@
 namespace clang {
 
 class ASTContext;
-class Decl;
 class FunctionDecl;
 class ObjCMethodDecl;
 class ObjCPropertyDecl;
diff --git a/contrib/llvm/tools/clang/lib/Analysis/CFG.cpp b/contrib/llvm/tools/clang/lib/Analysis/CFG.cpp
index ed2239f88ae5..a67f0910e15a 100644
--- a/contrib/llvm/tools/clang/lib/Analysis/CFG.cpp
+++ b/contrib/llvm/tools/clang/lib/Analysis/CFG.cpp
@@ -1,4 +1,4 @@
-  //===--- CFG.cpp - Classes for representing and building CFGs----*- C++ -*-===//
+//===--- CFG.cpp - Classes for representing and building CFGs----*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -825,7 +825,7 @@ private:
     // * Variable x is equal to the largest literal.
     // * Variable x is greater than largest literal.
     bool AlwaysTrue = true, AlwaysFalse = true;
-    for (llvm::APSInt Value : Values) {
+    for (const llvm::APSInt &Value : Values) {
       TryResult Res1, Res2;
       Res1 = analyzeLogicOperatorCondition(BO1, Value, L1);
       Res2 = analyzeLogicOperatorCondition(BO2, Value, L2);
@@ -1945,7 +1945,8 @@ CFGBlock *CFGBuilder::VisitCompoundStmt(CompoundStmt *C) {
     addLocalScopeForStmt(C);
   }
   if (!C->body_empty() && !isa<ReturnStmt>(*C->body_rbegin())) {
-    // If the body ends with a ReturnStmt, the dtors will be added in VisitReturnStmt
+    // If the body ends with a ReturnStmt, the dtors will be added in
+    // VisitReturnStmt.
     addAutomaticObjDtors(ScopePos, scopeBeginPos, C);
   }
 
@@ -2168,6 +2169,13 @@ CFGBlock *CFGBuilder::VisitIfStmt(IfStmt *I) {
   // won't be restored when traversing AST.
   SaveAndRestore<LocalScope::const_iterator> save_scope_pos(ScopePos);
 
+  // Create local scope for C++17 if init-stmt if one exists.
+  if (Stmt *Init = I->getInit()) {
+    LocalScope::const_iterator BeginScopePos = ScopePos;
+    addLocalScopeForStmt(Init);
+    addAutomaticObjDtors(ScopePos, BeginScopePos, I);
+  }
+
   // Create local scope for possible condition variable.
   // Store scope position. Add implicit destructor.
   if (VarDecl *VD = I->getConditionVariable()) {
@@ -2268,13 +2276,19 @@ CFGBlock *CFGBuilder::VisitIfStmt(IfStmt *I) {
   // blocks will be pointed to be "Block".
   CFGBlock *LastBlock = addStmt(I->getCond());
 
-  // Finally, if the IfStmt contains a condition variable, add it and its
+  // If the IfStmt contains a condition variable, add it and its
   // initializer to the CFG.
   if (const DeclStmt* DS = I->getConditionVariableDeclStmt()) {
     autoCreateBlock();
     LastBlock = addStmt(const_cast<DeclStmt *>(DS));
   }
 
+  // Finally, if the IfStmt contains a C++17 init-stmt, add it to the CFG.
+  if (Stmt *Init = I->getInit()) {
+    autoCreateBlock();
+    LastBlock = addStmt(Init);
+  }
+
   return LastBlock;
 }
 
@@ -3059,6 +3073,13 @@ CFGBlock *CFGBuilder::VisitSwitchStmt(SwitchStmt *Terminator) {
   // won't be restored when traversing AST.
   SaveAndRestore<LocalScope::const_iterator> save_scope_pos(ScopePos);
 
+  // Create local scope for C++17 switch init-stmt if one exists.
+  if (Stmt *Init = Terminator->getInit()) {
+    LocalScope::const_iterator BeginScopePos = ScopePos;
+    addLocalScopeForStmt(Init);
+    addAutomaticObjDtors(ScopePos, BeginScopePos, Terminator);
+  }
+
   // Create local scope for possible condition variable.
   // Store scope position. Add implicit destructor.
   if (VarDecl *VD = Terminator->getConditionVariable()) {
@@ -3138,7 +3159,7 @@ CFGBlock *CFGBuilder::VisitSwitchStmt(SwitchStmt *Terminator) {
   Block = SwitchTerminatedBlock;
   CFGBlock *LastBlock = addStmt(Terminator->getCond());
 
-  // Finally, if the SwitchStmt contains a condition variable, add both the
+  // If the SwitchStmt contains a condition variable, add both the
   // SwitchStmt and the condition variable initialization to the CFG.
   if (VarDecl *VD = Terminator->getConditionVariable()) {
     if (Expr *Init = VD->getInit()) {
@@ -3148,6 +3169,12 @@ CFGBlock *CFGBuilder::VisitSwitchStmt(SwitchStmt *Terminator) {
     }
   }
 
+  // Finally, if the SwitchStmt contains a C++17 init-stmt, add it to the CFG.
+  if (Stmt *Init = Terminator->getInit()) {
+    autoCreateBlock();
+    LastBlock = addStmt(Init);
+  }
+
   return LastBlock;
 }
   
@@ -3397,8 +3424,10 @@ CFGBlock *CFGBuilder::VisitCXXForRangeStmt(CXXForRangeStmt *S) {
   // Create local scopes and destructors for range, begin and end variables.
   if (Stmt *Range = S->getRangeStmt())
     addLocalScopeForStmt(Range);
-  if (Stmt *BeginEnd = S->getBeginEndStmt())
-    addLocalScopeForStmt(BeginEnd);
+  if (Stmt *Begin = S->getBeginStmt())
+    addLocalScopeForStmt(Begin);
+  if (Stmt *End = S->getEndStmt())
+    addLocalScopeForStmt(End);
   addAutomaticObjDtors(ScopePos, save_scope_pos.get(), S);
 
   LocalScope::const_iterator ContinueScopePos = ScopePos;
@@ -3455,6 +3484,8 @@ CFGBlock *CFGBuilder::VisitCXXForRangeStmt(CXXForRangeStmt *S) {
     // continue statements.
     Block = nullptr;
     Succ = addStmt(S->getInc());
+    if (badCFG)
+      return nullptr;
     ContinueJumpTarget = JumpTarget(Succ, ContinueScopePos);
 
     // The starting block for the loop increment is the block that should
@@ -3489,7 +3520,8 @@ CFGBlock *CFGBuilder::VisitCXXForRangeStmt(CXXForRangeStmt *S) {
 
   // Add the initialization statements.
   Block = createBlock();
-  addStmt(S->getBeginEndStmt());
+  addStmt(S->getBeginStmt());
+  addStmt(S->getEndStmt());
   return addStmt(S->getRangeStmt());
 }
 
@@ -3870,7 +3902,17 @@ CFGImplicitDtor::getDestructorDecl(ASTContext &astContext) const {
     case CFGElement::AutomaticObjectDtor: {
       const VarDecl *var = castAs<CFGAutomaticObjDtor>().getVarDecl();
       QualType ty = var->getType();
-      ty = ty.getNonReferenceType();
+
+      // FIXME: See CFGBuilder::addLocalScopeForVarDecl.
+      //
+      // Lifetime-extending constructs are handled here. This works for a single
+      // temporary in an initializer expression.
+      if (ty->isReferenceType()) {
+        if (const Expr *Init = var->getInit()) {
+          ty = getReferenceInitTemporaryType(astContext, Init);
+        }
+      }
+
       while (const ArrayType *arrayType = astContext.getAsArrayType(ty)) {
         ty = arrayType->getElementType();
       }
@@ -4514,7 +4556,7 @@ void CFGBlock::dump(const CFG* cfg, const LangOptions &LO,
   print(llvm::errs(), cfg, LO, ShowColors);
 }
 
-void CFGBlock::dump() const {
+LLVM_DUMP_METHOD void CFGBlock::dump() const {
   dump(getParent(), LangOptions(), false);
 }
 
diff --git a/contrib/llvm/tools/clang/lib/Analysis/CallGraph.cpp b/contrib/llvm/tools/clang/lib/Analysis/CallGraph.cpp
index d06603469dd0..9d522fe7c6c5 100644
--- a/contrib/llvm/tools/clang/lib/Analysis/CallGraph.cpp
+++ b/contrib/llvm/tools/clang/lib/Analysis/CallGraph.cpp
@@ -188,7 +188,7 @@ void CallGraph::print(raw_ostream &OS) const {
   OS.flush();
 }
 
-void CallGraph::dump() const {
+LLVM_DUMP_METHOD void CallGraph::dump() const {
   print(llvm::errs());
 }
 
@@ -202,7 +202,7 @@ void CallGraphNode::print(raw_ostream &os) const {
   os << "< >";
 }
 
-void CallGraphNode::dump() const {
+LLVM_DUMP_METHOD void CallGraphNode::dump() const {
   print(llvm::errs());
 }
 
diff --git a/contrib/llvm/tools/clang/lib/Analysis/Consumed.cpp b/contrib/llvm/tools/clang/lib/Analysis/Consumed.cpp
index 9df23923b014..47bef1b927c9 100644
--- a/contrib/llvm/tools/clang/lib/Analysis/Consumed.cpp
+++ b/contrib/llvm/tools/clang/lib/Analysis/Consumed.cpp
@@ -466,9 +466,15 @@ class ConsumedStmtVisitor : public ConstStmtVisitor<ConsumedStmtVisitor> {
   MapType PropagationMap;
 
   InfoEntry findInfo(const Expr *E) {
+    if (auto Cleanups = dyn_cast<ExprWithCleanups>(E))
+      if (!Cleanups->cleanupsHaveSideEffects())
+        E = Cleanups->getSubExpr();
     return PropagationMap.find(E->IgnoreParens());
   }
   ConstInfoEntry findInfo(const Expr *E) const {
+    if (auto Cleanups = dyn_cast<ExprWithCleanups>(E))
+      if (!Cleanups->cleanupsHaveSideEffects())
+        E = Cleanups->getSubExpr();
     return PropagationMap.find(E->IgnoreParens());
   }
   void insertInfo(const Expr *E, const PropagationInfo &PI) {
@@ -1356,7 +1362,7 @@ void ConsumedAnalyzer::run(AnalysisDeclContext &AC) {
   ConsumedStmtVisitor Visitor(AC, *this, CurrStates.get());
 
   // Add all trackable parameters to the state map.
-  for (const auto *PI : D->params())
+  for (const auto *PI : D->parameters())
     Visitor.VisitParmVarDecl(PI);
   
   // Visit all of the function's basic blocks.
diff --git a/contrib/llvm/tools/clang/lib/Analysis/FormatString.cpp b/contrib/llvm/tools/clang/lib/Analysis/FormatString.cpp
index 0948bc0b08a8..83d08b55427f 100644
--- a/contrib/llvm/tools/clang/lib/Analysis/FormatString.cpp
+++ b/contrib/llvm/tools/clang/lib/Analysis/FormatString.cpp
@@ -15,6 +15,7 @@
 #include "FormatStringParsing.h"
 #include "clang/Basic/LangOptions.h"
 #include "clang/Basic/TargetInfo.h"
+#include "llvm/Support/ConvertUTF.h"
 
 using clang::analyze_format_string::ArgType;
 using clang::analyze_format_string::FormatStringHandler;
@@ -190,13 +191,21 @@ clang::analyze_format_string::ParseLengthModifier(FormatSpecifier &FS,
       return false;
     case 'h':
       ++I;
-      lmKind = (I != E && *I == 'h') ? (++I, LengthModifier::AsChar)
-                                     : LengthModifier::AsShort;
+      if (I != E && *I == 'h') {
+        ++I;
+        lmKind = LengthModifier::AsChar;
+      } else {
+        lmKind = LengthModifier::AsShort;
+      }
       break;
     case 'l':
       ++I;
-      lmKind = (I != E && *I == 'l') ? (++I, LengthModifier::AsLongLong)
-                                     : LengthModifier::AsLong;
+      if (I != E && *I == 'l') {
+        ++I;
+        lmKind = LengthModifier::AsLongLong;
+      } else {
+        lmKind = LengthModifier::AsLong;
+      }
       break;
     case 'j': lmKind = LengthModifier::AsIntMax;     ++I; break;
     case 'z': lmKind = LengthModifier::AsSizeT;      ++I; break;
@@ -252,6 +261,28 @@ clang::analyze_format_string::ParseLengthModifier(FormatSpecifier &FS,
   return true;
 }
 
+bool clang::analyze_format_string::ParseUTF8InvalidSpecifier(
+    const char *SpecifierBegin, const char *FmtStrEnd, unsigned &Len) {
+  if (SpecifierBegin + 1 >= FmtStrEnd)
+    return false;
+
+  const UTF8 *SB = reinterpret_cast<const UTF8 *>(SpecifierBegin + 1);
+  const UTF8 *SE = reinterpret_cast<const UTF8 *>(FmtStrEnd);
+  const char FirstByte = *SB;
+
+  // If the invalid specifier is a multibyte UTF-8 string, return the
+  // total length accordingly so that the conversion specifier can be
+  // properly updated to reflect a complete UTF-8 specifier.
+  unsigned NumBytes = getNumBytesForUTF8(FirstByte);
+  if (NumBytes == 1)
+    return false;
+  if (SB + NumBytes > SE)
+    return false;
+
+  Len = NumBytes + 1;
+  return true;
+}
+
 //===----------------------------------------------------------------------===//
 // Methods on ArgType.
 //===----------------------------------------------------------------------===//
@@ -663,7 +694,7 @@ bool FormatSpecifier::hasValidLengthModifier(const TargetInfo &Target) const {
           return true;
         case ConversionSpecifier::FreeBSDrArg:
         case ConversionSpecifier::FreeBSDyArg:
-          return Target.getTriple().isOSFreeBSD();
+          return Target.getTriple().isOSFreeBSD() || Target.getTriple().isPS4();
         default:
           return false;
       }
@@ -696,7 +727,7 @@ bool FormatSpecifier::hasValidLengthModifier(const TargetInfo &Target) const {
           return true;
         case ConversionSpecifier::FreeBSDrArg:
         case ConversionSpecifier::FreeBSDyArg:
-          return Target.getTriple().isOSFreeBSD();
+          return Target.getTriple().isOSFreeBSD() || Target.getTriple().isPS4();
         default:
           return false;
       }
diff --git a/contrib/llvm/tools/clang/lib/Analysis/FormatStringParsing.h b/contrib/llvm/tools/clang/lib/Analysis/FormatStringParsing.h
index e1652964b8c2..8463fcec5bf4 100644
--- a/contrib/llvm/tools/clang/lib/Analysis/FormatStringParsing.h
+++ b/contrib/llvm/tools/clang/lib/Analysis/FormatStringParsing.h
@@ -46,7 +46,13 @@ bool ParseArgPosition(FormatStringHandler &H,
 /// FormatSpecifier& argument, and false otherwise.
 bool ParseLengthModifier(FormatSpecifier &FS, const char *&Beg, const char *E,
                          const LangOptions &LO, bool IsScanf = false);
-  
+
+/// Returns true if the invalid specifier in \p SpecifierBegin is a UTF-8
+/// string; check that it won't go further than \p FmtStrEnd and write
+/// up the total size in \p Len.
+bool ParseUTF8InvalidSpecifier(const char *SpecifierBegin,
+                               const char *FmtStrEnd, unsigned &Len);
+
 template <typename T> class SpecifierResult {
   T FS;
   const char *Start;
diff --git a/contrib/llvm/tools/clang/lib/Analysis/PrintfFormatString.cpp b/contrib/llvm/tools/clang/lib/Analysis/PrintfFormatString.cpp
index f0976bce9720..ac6cef9d0842 100644
--- a/contrib/llvm/tools/clang/lib/Analysis/PrintfFormatString.cpp
+++ b/contrib/llvm/tools/clang/lib/Analysis/PrintfFormatString.cpp
@@ -312,8 +312,13 @@ static PrintfSpecifierResult ParsePrintfSpecifier(FormatStringHandler &H,
     argIndex++;
 
   if (k == ConversionSpecifier::InvalidSpecifier) {
+    unsigned Len = I - Start;
+    if (ParseUTF8InvalidSpecifier(Start, E, Len)) {
+      CS.setEndScanList(Start + Len);
+      FS.setConversionSpecifier(CS);
+    }
     // Assume the conversion takes one argument.
-    return !H.HandleInvalidPrintfConversionSpecifier(FS, Start, I - Start);
+    return !H.HandleInvalidPrintfConversionSpecifier(FS, Start, Len);
   }
   return PrintfSpecifierResult(Start, FS);
 }
@@ -611,9 +616,13 @@ bool PrintfSpecifier::fixType(QualType QT, const LangOptions &LangOpt,
   case BuiltinType::UInt128:
   case BuiltinType::Int128:
   case BuiltinType::Half:
+  case BuiltinType::Float128:
     // Various types which are non-trivial to correct.
     return false;
 
+#define IMAGE_TYPE(ImgType, Id, SingletonId, Access, Suffix) \
+  case BuiltinType::Id:
+#include "clang/Basic/OpenCLImageTypes.def"
 #define SIGNED_TYPE(Id, SingletonId)
 #define UNSIGNED_TYPE(Id, SingletonId)
 #define FLOATING_TYPE(Id, SingletonId)
diff --git a/contrib/llvm/tools/clang/lib/Analysis/PseudoConstantAnalysis.cpp b/contrib/llvm/tools/clang/lib/Analysis/PseudoConstantAnalysis.cpp
index 5b917a7a27f5..614f676fb193 100644
--- a/contrib/llvm/tools/clang/lib/Analysis/PseudoConstantAnalysis.cpp
+++ b/contrib/llvm/tools/clang/lib/Analysis/PseudoConstantAnalysis.cpp
@@ -22,9 +22,7 @@
 
 using namespace clang;
 
-// The number of ValueDecls we want to keep track of by default (per-function)
-#define VARDECL_SET_SIZE 256
-typedef llvm::SmallPtrSet<const VarDecl*, VARDECL_SET_SIZE> VarDeclSet;
+typedef llvm::SmallPtrSet<const VarDecl*, 32> VarDeclSet;
 
 PseudoConstantAnalysis::PseudoConstantAnalysis(const Stmt *DeclBody) :
       DeclBody(DeclBody), Analyzed(false) {
diff --git a/contrib/llvm/tools/clang/lib/Analysis/ScanfFormatString.cpp b/contrib/llvm/tools/clang/lib/Analysis/ScanfFormatString.cpp
index d484d8e828cb..82b038864c23 100644
--- a/contrib/llvm/tools/clang/lib/Analysis/ScanfFormatString.cpp
+++ b/contrib/llvm/tools/clang/lib/Analysis/ScanfFormatString.cpp
@@ -79,7 +79,7 @@ static ScanfSpecifierResult ParseScanfSpecifier(FormatStringHandler &H,
                                                 unsigned &argIndex,
                                                 const LangOptions &LO,
                                                 const TargetInfo &Target) {
-  
+  using namespace clang::analyze_format_string;
   using namespace clang::analyze_scanf;
   const char *I = Beg;
   const char *Start = nullptr;
@@ -210,10 +210,15 @@ static ScanfSpecifierResult ParseScanfSpecifier(FormatStringHandler &H,
   
   // FIXME: '%' and '*' doesn't make sense.  Issue a warning.
   // FIXME: 'ConsumedSoFar' and '*' doesn't make sense.
-  
+
   if (k == ScanfConversionSpecifier::InvalidSpecifier) {
+    unsigned Len = I - Beg;
+    if (ParseUTF8InvalidSpecifier(Beg, E, Len)) {
+      CS.setEndScanList(Beg + Len);
+      FS.setConversionSpecifier(CS);
+    }
     // Assume the conversion takes one argument.
-    return !H.HandleInvalidScanfConversionSpecifier(FS, Beg, I - Beg);
+    return !H.HandleInvalidScanfConversionSpecifier(FS, Beg, Len);
   }
   return ScanfSpecifierResult(Start, FS);
 }
diff --git a/contrib/llvm/tools/clang/lib/Basic/Builtins.cpp b/contrib/llvm/tools/clang/lib/Basic/Builtins.cpp
index fb6a6451aa82..28695d649a86 100644
--- a/contrib/llvm/tools/clang/lib/Basic/Builtins.cpp
+++ b/contrib/llvm/tools/clang/lib/Basic/Builtins.cpp
@@ -69,7 +69,9 @@ bool Builtin::Context::builtinIsSupported(const Builtin::Info &BuiltinInfo,
   bool MSModeUnsupported =
       !LangOpts.MicrosoftExt && (BuiltinInfo.Langs & MS_LANG);
   bool ObjCUnsupported = !LangOpts.ObjC1 && BuiltinInfo.Langs == OBJC_LANG;
-  return !BuiltinsUnsupported && !MathBuiltinsUnsupported &&
+  bool OclCUnsupported = LangOpts.OpenCLVersion != 200 &&
+                         BuiltinInfo.Langs == OCLC20_LANG;
+  return !BuiltinsUnsupported && !MathBuiltinsUnsupported && !OclCUnsupported &&
          !GnuModeUnsupported && !MSModeUnsupported && !ObjCUnsupported;
 }
 
diff --git a/contrib/llvm/tools/clang/lib/Basic/Cuda.cpp b/contrib/llvm/tools/clang/lib/Basic/Cuda.cpp
new file mode 100644
index 000000000000..3264078b98f5
--- /dev/null
+++ b/contrib/llvm/tools/clang/lib/Basic/Cuda.cpp
@@ -0,0 +1,171 @@
+#include "clang/Basic/Cuda.h"
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/Support/ErrorHandling.h"
+
+namespace clang {
+
+const char *CudaVersionToString(CudaVersion V) {
+  switch (V) {
+  case CudaVersion::UNKNOWN:
+    return "unknown";
+  case CudaVersion::CUDA_70:
+    return "7.0";
+  case CudaVersion::CUDA_75:
+    return "7.5";
+  case CudaVersion::CUDA_80:
+    return "8.0";
+  }
+  llvm_unreachable("invalid enum");
+}
+
+const char *CudaArchToString(CudaArch A) {
+  switch (A) {
+  case CudaArch::UNKNOWN:
+    return "unknown";
+  case CudaArch::SM_20:
+    return "sm_20";
+  case CudaArch::SM_21:
+    return "sm_21";
+  case CudaArch::SM_30:
+    return "sm_30";
+  case CudaArch::SM_32:
+    return "sm_32";
+  case CudaArch::SM_35:
+    return "sm_35";
+  case CudaArch::SM_37:
+    return "sm_37";
+  case CudaArch::SM_50:
+    return "sm_50";
+  case CudaArch::SM_52:
+    return "sm_52";
+  case CudaArch::SM_53:
+    return "sm_53";
+  case CudaArch::SM_60:
+    return "sm_60";
+  case CudaArch::SM_61:
+    return "sm_61";
+  case CudaArch::SM_62:
+    return "sm_62";
+  }
+  llvm_unreachable("invalid enum");
+}
+
+CudaArch StringToCudaArch(llvm::StringRef S) {
+  return llvm::StringSwitch<CudaArch>(S)
+      .Case("sm_20", CudaArch::SM_20)
+      .Case("sm_21", CudaArch::SM_21)
+      .Case("sm_30", CudaArch::SM_30)
+      .Case("sm_32", CudaArch::SM_32)
+      .Case("sm_35", CudaArch::SM_35)
+      .Case("sm_37", CudaArch::SM_37)
+      .Case("sm_50", CudaArch::SM_50)
+      .Case("sm_52", CudaArch::SM_52)
+      .Case("sm_53", CudaArch::SM_53)
+      .Case("sm_60", CudaArch::SM_60)
+      .Case("sm_61", CudaArch::SM_61)
+      .Case("sm_62", CudaArch::SM_62)
+      .Default(CudaArch::UNKNOWN);
+}
+
+const char *CudaVirtualArchToString(CudaVirtualArch A) {
+  switch (A) {
+  case CudaVirtualArch::UNKNOWN:
+    return "unknown";
+  case CudaVirtualArch::COMPUTE_20:
+    return "compute_20";
+  case CudaVirtualArch::COMPUTE_30:
+    return "compute_30";
+  case CudaVirtualArch::COMPUTE_32:
+    return "compute_32";
+  case CudaVirtualArch::COMPUTE_35:
+    return "compute_35";
+  case CudaVirtualArch::COMPUTE_37:
+    return "compute_37";
+  case CudaVirtualArch::COMPUTE_50:
+    return "compute_50";
+  case CudaVirtualArch::COMPUTE_52:
+    return "compute_52";
+  case CudaVirtualArch::COMPUTE_53:
+    return "compute_53";
+  case CudaVirtualArch::COMPUTE_60:
+    return "compute_60";
+  case CudaVirtualArch::COMPUTE_61:
+    return "compute_61";
+  case CudaVirtualArch::COMPUTE_62:
+    return "compute_62";
+  }
+  llvm_unreachable("invalid enum");
+}
+
+CudaVirtualArch StringToCudaVirtualArch(llvm::StringRef S) {
+  return llvm::StringSwitch<CudaVirtualArch>(S)
+      .Case("compute_20", CudaVirtualArch::COMPUTE_20)
+      .Case("compute_30", CudaVirtualArch::COMPUTE_30)
+      .Case("compute_32", CudaVirtualArch::COMPUTE_32)
+      .Case("compute_35", CudaVirtualArch::COMPUTE_35)
+      .Case("compute_37", CudaVirtualArch::COMPUTE_37)
+      .Case("compute_50", CudaVirtualArch::COMPUTE_50)
+      .Case("compute_52", CudaVirtualArch::COMPUTE_52)
+      .Case("compute_53", CudaVirtualArch::COMPUTE_53)
+      .Case("compute_60", CudaVirtualArch::COMPUTE_60)
+      .Case("compute_61", CudaVirtualArch::COMPUTE_61)
+      .Case("compute_62", CudaVirtualArch::COMPUTE_62)
+      .Default(CudaVirtualArch::UNKNOWN);
+}
+
+CudaVirtualArch VirtualArchForCudaArch(CudaArch A) {
+  switch (A) {
+  case CudaArch::UNKNOWN:
+    return CudaVirtualArch::UNKNOWN;
+  case CudaArch::SM_20:
+  case CudaArch::SM_21:
+    return CudaVirtualArch::COMPUTE_20;
+  case CudaArch::SM_30:
+    return CudaVirtualArch::COMPUTE_30;
+  case CudaArch::SM_32:
+    return CudaVirtualArch::COMPUTE_32;
+  case CudaArch::SM_35:
+    return CudaVirtualArch::COMPUTE_35;
+  case CudaArch::SM_37:
+    return CudaVirtualArch::COMPUTE_37;
+  case CudaArch::SM_50:
+    return CudaVirtualArch::COMPUTE_50;
+  case CudaArch::SM_52:
+    return CudaVirtualArch::COMPUTE_52;
+  case CudaArch::SM_53:
+    return CudaVirtualArch::COMPUTE_53;
+  case CudaArch::SM_60:
+    return CudaVirtualArch::COMPUTE_60;
+  case CudaArch::SM_61:
+    return CudaVirtualArch::COMPUTE_61;
+  case CudaArch::SM_62:
+    return CudaVirtualArch::COMPUTE_62;
+  }
+  llvm_unreachable("invalid enum");
+}
+
+CudaVersion MinVersionForCudaArch(CudaArch A) {
+  switch (A) {
+  case CudaArch::UNKNOWN:
+    return CudaVersion::UNKNOWN;
+  case CudaArch::SM_20:
+  case CudaArch::SM_21:
+  case CudaArch::SM_30:
+  case CudaArch::SM_32:
+  case CudaArch::SM_35:
+  case CudaArch::SM_37:
+  case CudaArch::SM_50:
+  case CudaArch::SM_52:
+  case CudaArch::SM_53:
+    return CudaVersion::CUDA_70;
+  case CudaArch::SM_60:
+  case CudaArch::SM_61:
+  case CudaArch::SM_62:
+    return CudaVersion::CUDA_80;
+  }
+  llvm_unreachable("invalid enum");
+}
+
+} // namespace clang
diff --git a/contrib/llvm/tools/clang/lib/Basic/Diagnostic.cpp b/contrib/llvm/tools/clang/lib/Basic/Diagnostic.cpp
index 7cf7305827fe..f10d156743b2 100644
--- a/contrib/llvm/tools/clang/lib/Basic/Diagnostic.cpp
+++ b/contrib/llvm/tools/clang/lib/Basic/Diagnostic.cpp
@@ -68,6 +68,7 @@ DiagnosticsEngine::DiagnosticsEngine(
   WarningsAsErrors = false;
   EnableAllWarnings = false;
   ErrorsAsFatal = false;
+  FatalsAsError = false;
   SuppressSystemWarnings = false;
   SuppressAllDiagnostics = false;
   ElideType = true;
diff --git a/contrib/llvm/tools/clang/lib/Basic/DiagnosticIDs.cpp b/contrib/llvm/tools/clang/lib/Basic/DiagnosticIDs.cpp
index a34c7fecb53b..3c370f67fa32 100644
--- a/contrib/llvm/tools/clang/lib/Basic/DiagnosticIDs.cpp
+++ b/contrib/llvm/tools/clang/lib/Basic/DiagnosticIDs.cpp
@@ -351,7 +351,7 @@ bool DiagnosticIDs::isDefaultMappingAsError(unsigned DiagID) {
   if (DiagID >= diag::DIAG_UPPER_LIMIT)
     return false;
 
-  return GetDefaultDiagMapping(DiagID).getSeverity() == diag::Severity::Error;
+  return GetDefaultDiagMapping(DiagID).getSeverity() >= diag::Severity::Error;
 }
 
 /// getDescription - Given a diagnostic ID, return a description of the
@@ -462,6 +462,12 @@ DiagnosticIDs::getDiagnosticSeverity(unsigned DiagID, SourceLocation Loc,
       Result = diag::Severity::Fatal;
   }
 
+  // If explicitly requested, map fatal errors to errors.
+  if (Result == diag::Severity::Fatal) {
+      if (Diag.FatalsAsError)
+        Result = diag::Severity::Error;
+  }
+
   // Custom diagnostics always are emitted in system headers.
   bool ShowInSystemHeader =
       !GetDiagInfo(DiagID) || GetDiagInfo(DiagID)->WarnShowInSystemHeader;
diff --git a/contrib/llvm/tools/clang/lib/Basic/FileManager.cpp b/contrib/llvm/tools/clang/lib/Basic/FileManager.cpp
index cb3f75c25a0b..ce9b7e1bb48c 100644
--- a/contrib/llvm/tools/clang/lib/Basic/FileManager.cpp
+++ b/contrib/llvm/tools/clang/lib/Basic/FileManager.cpp
@@ -19,7 +19,6 @@
 
 #include "clang/Basic/FileManager.h"
 #include "clang/Basic/FileSystemStatCache.h"
-#include "clang/Frontend/PCHContainerOperations.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/Config/llvm-config.h"
 #include "llvm/ADT/STLExtras.h"
@@ -124,7 +123,7 @@ static const DirectoryEntry *getDirectoryFromFile(FileManager &FileMgr,
 void FileManager::addAncestorsAsVirtualDirs(StringRef Path) {
   StringRef DirName = llvm::sys::path::parent_path(Path);
   if (DirName.empty())
-    return;
+    DirName = ".";
 
   auto &NamedDirEnt =
       *SeenDirEntries.insert(std::make_pair(DirName, nullptr)).first;
@@ -313,6 +312,9 @@ const FileEntry *FileManager::getFile(StringRef Filename, bool openFile,
   UFE.InPCH = Data.InPCH;
   UFE.File = std::move(F);
   UFE.IsValid = true;
+  if (UFE.File)
+    if (auto RealPathName = UFE.File->getName())
+      UFE.RealPathName = *RealPathName;
   return &UFE;
 }
 
@@ -564,7 +566,3 @@ void FileManager::PrintStats() const {
 
   //llvm::errs() << PagesMapped << BytesOfPagesMapped << FSLookups;
 }
-
-// Virtual destructors for abstract base classes that need live in Basic.
-PCHContainerWriter::~PCHContainerWriter() {}
-PCHContainerReader::~PCHContainerReader() {}
diff --git a/contrib/llvm/tools/clang/lib/Basic/IdentifierTable.cpp b/contrib/llvm/tools/clang/lib/Basic/IdentifierTable.cpp
index 67de1cb6fdaa..d6ad0f5c9158 100644
--- a/contrib/llvm/tools/clang/lib/Basic/IdentifierTable.cpp
+++ b/contrib/llvm/tools/clang/lib/Basic/IdentifierTable.cpp
@@ -42,6 +42,7 @@ IdentifierInfo::IdentifierInfo() {
   NeedsHandleIdentifier = false;
   IsFromAST = false;
   ChangedAfterLoad = false;
+  FEChangedAfterLoad = false;
   RevertedTokenID = false;
   OutOfDate = false;
   IsModulesImport = false;
diff --git a/contrib/llvm/tools/clang/lib/Basic/LangOptions.cpp b/contrib/llvm/tools/clang/lib/Basic/LangOptions.cpp
index 1b08b0686046..8c0ecd46ad55 100644
--- a/contrib/llvm/tools/clang/lib/Basic/LangOptions.cpp
+++ b/contrib/llvm/tools/clang/lib/Basic/LangOptions.cpp
@@ -34,7 +34,6 @@ void LangOptions::resetNonModularOptions() {
   SanitizerBlacklistFiles.clear();
 
   CurrentModule.clear();
-  ImplementationOfModule.clear();
 }
 
 bool LangOptions::isNoBuiltinFunc(const char *Name) const {
diff --git a/contrib/llvm/tools/clang/lib/Basic/Module.cpp b/contrib/llvm/tools/clang/lib/Basic/Module.cpp
index 0b7832636943..3d1a40db5ea2 100644
--- a/contrib/llvm/tools/clang/lib/Basic/Module.cpp
+++ b/contrib/llvm/tools/clang/lib/Basic/Module.cpp
@@ -418,12 +418,8 @@ void Module::print(raw_ostream &OS, unsigned Indent) const {
     OS.indent(Indent + 2);
     OS << "export ";
     printModuleId(OS, UnresolvedExports[I].Id);
-    if (UnresolvedExports[I].Wildcard) {
-      if (UnresolvedExports[I].Id.empty())
-        OS << "*";
-      else
-        OS << ".*";
-    }
+    if (UnresolvedExports[I].Wildcard)
+      OS << (UnresolvedExports[I].Id.empty() ? "*" : ".*");
     OS << "\n";
   }
 
@@ -486,12 +482,13 @@ void Module::print(raw_ostream &OS, unsigned Indent) const {
   OS << "}\n";
 }
 
-void Module::dump() const {
+LLVM_DUMP_METHOD void Module::dump() const {
   print(llvm::errs());
 }
 
 void VisibleModuleSet::setVisible(Module *M, SourceLocation Loc,
                                   VisibleCallback Vis, ConflictCallback Cb) {
+  assert(Loc.isValid() && "setVisible expects a valid import location");
   if (isVisible(M))
     return;
 
diff --git a/contrib/llvm/tools/clang/lib/Basic/OpenMPKinds.cpp b/contrib/llvm/tools/clang/lib/Basic/OpenMPKinds.cpp
index 577132dc1442..d1e4779e2c72 100644
--- a/contrib/llvm/tools/clang/lib/Basic/OpenMPKinds.cpp
+++ b/contrib/llvm/tools/clang/lib/Basic/OpenMPKinds.cpp
@@ -55,6 +55,7 @@ OpenMPClauseKind clang::getOpenMPClauseKind(StringRef Str) {
   return llvm::StringSwitch<OpenMPClauseKind>(Str)
 #define OPENMP_CLAUSE(Name, Class) .Case(#Name, OMPC_##Name)
 #include "clang/Basic/OpenMPKinds.def"
+      .Case("uniform", OMPC_uniform)
       .Default(OMPC_unknown);
 }
 
@@ -67,6 +68,8 @@ const char *clang::getOpenMPClauseName(OpenMPClauseKind Kind) {
   case OMPC_##Name:                                                            \
     return #Name;
 #include "clang/Basic/OpenMPKinds.def"
+  case OMPC_uniform:
+    return "uniform";
   case OMPC_threadprivate:
     return "threadprivate or thread local";
   }
@@ -109,6 +112,19 @@ unsigned clang::getOpenMPSimpleClauseType(OpenMPClauseKind Kind,
 #define OPENMP_MAP_KIND(Name) .Case(#Name, OMPC_MAP_##Name)
 #include "clang/Basic/OpenMPKinds.def"
         .Default(OMPC_MAP_unknown);
+  case OMPC_dist_schedule:
+    return llvm::StringSwitch<OpenMPDistScheduleClauseKind>(Str)
+#define OPENMP_DIST_SCHEDULE_KIND(Name) .Case(#Name, OMPC_DIST_SCHEDULE_##Name)
+#include "clang/Basic/OpenMPKinds.def"
+        .Default(OMPC_DIST_SCHEDULE_unknown);
+  case OMPC_defaultmap:
+    return llvm::StringSwitch<unsigned>(Str)
+#define OPENMP_DEFAULTMAP_KIND(Name)                                           \
+  .Case(#Name, static_cast<unsigned>(OMPC_DEFAULTMAP_##Name))
+#define OPENMP_DEFAULTMAP_MODIFIER(Name)                                       \
+  .Case(#Name, static_cast<unsigned>(OMPC_DEFAULTMAP_MODIFIER_##Name))
+#include "clang/Basic/OpenMPKinds.def"
+        .Default(OMPC_DEFAULTMAP_unknown);
   case OMPC_unknown:
   case OMPC_threadprivate:
   case OMPC_if:
@@ -145,6 +161,11 @@ unsigned clang::getOpenMPSimpleClauseType(OpenMPClauseKind Kind,
   case OMPC_nogroup:
   case OMPC_num_tasks:
   case OMPC_hint:
+  case OMPC_uniform:
+  case OMPC_to:
+  case OMPC_from:
+  case OMPC_use_device_ptr:
+  case OMPC_is_device_ptr:
     break;
   }
   llvm_unreachable("Invalid OpenMP simple clause kind");
@@ -219,6 +240,30 @@ const char *clang::getOpenMPSimpleClauseTypeName(OpenMPClauseKind Kind,
       break;
     }
     llvm_unreachable("Invalid OpenMP 'map' clause type");
+  case OMPC_dist_schedule:
+    switch (Type) {
+    case OMPC_DIST_SCHEDULE_unknown:
+      return "unknown";
+#define OPENMP_DIST_SCHEDULE_KIND(Name)                                      \
+  case OMPC_DIST_SCHEDULE_##Name:                                            \
+    return #Name;
+#include "clang/Basic/OpenMPKinds.def"
+    }
+    llvm_unreachable("Invalid OpenMP 'dist_schedule' clause type");
+  case OMPC_defaultmap:
+    switch (Type) {
+    case OMPC_DEFAULTMAP_unknown:
+    case OMPC_DEFAULTMAP_MODIFIER_last:
+      return "unknown";
+#define OPENMP_DEFAULTMAP_KIND(Name)                                         \
+    case OMPC_DEFAULTMAP_##Name:                                             \
+      return #Name;
+#define OPENMP_DEFAULTMAP_MODIFIER(Name)                                     \
+    case OMPC_DEFAULTMAP_MODIFIER_##Name:                                    \
+      return #Name;
+#include "clang/Basic/OpenMPKinds.def"
+    }
+    llvm_unreachable("Invalid OpenMP 'schedule' clause type");
   case OMPC_unknown:
   case OMPC_threadprivate:
   case OMPC_if:
@@ -255,6 +300,11 @@ const char *clang::getOpenMPSimpleClauseTypeName(OpenMPClauseKind Kind,
   case OMPC_nogroup:
   case OMPC_num_tasks:
   case OMPC_hint:
+  case OMPC_uniform:
+  case OMPC_to:
+  case OMPC_from:
+  case OMPC_use_device_ptr:
+  case OMPC_is_device_ptr:
     break;
   }
   llvm_unreachable("Invalid OpenMP simple clause kind");
@@ -398,6 +448,56 @@ bool clang::isAllowedClauseForDirective(OpenMPDirectiveKind DKind,
       break;
     }
     break;
+  case OMPD_target_enter_data:
+    switch (CKind) {
+#define OPENMP_TARGET_ENTER_DATA_CLAUSE(Name)                                  \
+  case OMPC_##Name:                                                            \
+    return true;
+#include "clang/Basic/OpenMPKinds.def"
+    default:
+      break;
+    }
+    break;
+  case OMPD_target_exit_data:
+    switch (CKind) {
+#define OPENMP_TARGET_EXIT_DATA_CLAUSE(Name)                                   \
+  case OMPC_##Name:                                                            \
+    return true;
+#include "clang/Basic/OpenMPKinds.def"
+    default:
+      break;
+    }
+    break;
+  case OMPD_target_parallel:
+    switch (CKind) {
+#define OPENMP_TARGET_PARALLEL_CLAUSE(Name)                                    \
+  case OMPC_##Name:                                                            \
+    return true;
+#include "clang/Basic/OpenMPKinds.def"
+    default:
+      break;
+    }
+    break;
+  case OMPD_target_parallel_for:
+    switch (CKind) {
+#define OPENMP_TARGET_PARALLEL_FOR_CLAUSE(Name)                                \
+  case OMPC_##Name:                                                            \
+    return true;
+#include "clang/Basic/OpenMPKinds.def"
+    default:
+      break;
+    }
+    break;
+  case OMPD_target_update:
+    switch (CKind) {
+#define OPENMP_TARGET_UPDATE_CLAUSE(Name)                                      \
+  case OMPC_##Name:                                                            \
+    return true;
+#include "clang/Basic/OpenMPKinds.def"
+    default:
+      break;
+    }
+    break;
   case OMPD_teams:
     switch (CKind) {
 #define OPENMP_TEAMS_CLAUSE(Name)                                              \
@@ -408,6 +508,8 @@ bool clang::isAllowedClauseForDirective(OpenMPDirectiveKind DKind,
       break;
     }
     break;
+  case OMPD_declare_simd:
+    break;
   case OMPD_cancel:
     switch (CKind) {
 #define OPENMP_CANCEL_CLAUSE(Name)                                             \
@@ -468,6 +570,48 @@ bool clang::isAllowedClauseForDirective(OpenMPDirectiveKind DKind,
       break;
     }
     break;
+  case OMPD_distribute_parallel_for:
+    switch (CKind) {
+#define OPENMP_DISTRIBUTE_PARALLEL_FOR_CLAUSE(Name)                            \
+  case OMPC_##Name:                                                            \
+    return true;
+#include "clang/Basic/OpenMPKinds.def"
+    default:
+      break;
+    }
+    break;
+  case OMPD_distribute_parallel_for_simd:
+    switch (CKind) {
+#define OPENMP_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE(Name)                       \
+  case OMPC_##Name:                                                            \
+    return true;
+#include "clang/Basic/OpenMPKinds.def"
+    default:
+      break;
+    }
+    break;
+  case OMPD_distribute_simd:
+    switch (CKind) {
+#define OPENMP_DISTRIBUTE_SIMD_CLAUSE(Name)                                    \
+  case OMPC_##Name:                                                            \
+    return true;
+#include "clang/Basic/OpenMPKinds.def"
+    default:
+      break;
+    }
+    break;
+  case OMPD_target_parallel_for_simd:
+    switch (CKind) {
+#define OPENMP_TARGET_PARALLEL_FOR_SIMD_CLAUSE(Name)                           \
+  case OMPC_##Name:                                                            \
+    return true;
+#include "clang/Basic/OpenMPKinds.def"
+    default:
+      break;
+    }
+    break;
+  case OMPD_declare_target:
+  case OMPD_end_declare_target:
   case OMPD_unknown:
   case OMPD_threadprivate:
   case OMPD_section:
@@ -477,6 +621,7 @@ bool clang::isAllowedClauseForDirective(OpenMPDirectiveKind DKind,
   case OMPD_taskwait:
   case OMPD_taskgroup:
   case OMPD_cancellation_point:
+  case OMPD_declare_reduction:
     break;
   }
   return false;
@@ -485,17 +630,25 @@ bool clang::isAllowedClauseForDirective(OpenMPDirectiveKind DKind,
 bool clang::isOpenMPLoopDirective(OpenMPDirectiveKind DKind) {
   return DKind == OMPD_simd || DKind == OMPD_for || DKind == OMPD_for_simd ||
          DKind == OMPD_parallel_for || DKind == OMPD_parallel_for_simd ||
-         DKind == OMPD_taskloop ||
-         DKind == OMPD_taskloop_simd ||
-         DKind == OMPD_distribute; // TODO add next directives.
+         DKind == OMPD_taskloop || DKind == OMPD_taskloop_simd ||
+         DKind == OMPD_distribute || DKind == OMPD_target_parallel_for ||
+         DKind == OMPD_distribute_parallel_for ||
+         DKind == OMPD_distribute_parallel_for_simd ||
+         DKind == OMPD_distribute_simd ||
+         DKind == OMPD_target_parallel_for_simd;
+  // TODO add next directives.
 }
 
 bool clang::isOpenMPWorksharingDirective(OpenMPDirectiveKind DKind) {
   return DKind == OMPD_for || DKind == OMPD_for_simd ||
          DKind == OMPD_sections || DKind == OMPD_section ||
          DKind == OMPD_single || DKind == OMPD_parallel_for ||
-         DKind == OMPD_parallel_for_simd ||
-         DKind == OMPD_parallel_sections; // TODO add next directives.
+         DKind == OMPD_parallel_for_simd || DKind == OMPD_parallel_sections ||
+         DKind == OMPD_target_parallel_for ||
+         DKind == OMPD_distribute_parallel_for ||
+         DKind == OMPD_distribute_parallel_for_simd ||
+         DKind == OMPD_target_parallel_for_simd;
+  // TODO add next directives.
 }
 
 bool clang::isOpenMPTaskLoopDirective(OpenMPDirectiveKind DKind) {
@@ -504,12 +657,24 @@ bool clang::isOpenMPTaskLoopDirective(OpenMPDirectiveKind DKind) {
 
 bool clang::isOpenMPParallelDirective(OpenMPDirectiveKind DKind) {
   return DKind == OMPD_parallel || DKind == OMPD_parallel_for ||
-         DKind == OMPD_parallel_for_simd ||
-         DKind == OMPD_parallel_sections; // TODO add next directives.
+         DKind == OMPD_parallel_for_simd || DKind == OMPD_parallel_sections ||
+         DKind == OMPD_target_parallel || DKind == OMPD_target_parallel_for ||
+         DKind == OMPD_distribute_parallel_for ||
+         DKind == OMPD_distribute_parallel_for_simd ||
+         DKind == OMPD_target_parallel_for_simd;
+  // TODO add next directives.
 }
 
-bool clang::isOpenMPTargetDirective(OpenMPDirectiveKind DKind) {
-  return DKind == OMPD_target; // TODO add next directives.
+bool clang::isOpenMPTargetExecutionDirective(OpenMPDirectiveKind DKind) {
+  // TODO add next directives.
+  return DKind == OMPD_target || DKind == OMPD_target_parallel ||
+         DKind == OMPD_target_parallel_for || 
+         DKind == OMPD_target_parallel_for_simd;
+}
+
+bool clang::isOpenMPTargetDataManagementDirective(OpenMPDirectiveKind DKind) {
+  return DKind == OMPD_target_data || DKind == OMPD_target_enter_data ||
+         DKind == OMPD_target_exit_data || DKind == OMPD_target_update;
 }
 
 bool clang::isOpenMPTeamsDirective(OpenMPDirectiveKind DKind) {
@@ -518,12 +683,17 @@ bool clang::isOpenMPTeamsDirective(OpenMPDirectiveKind DKind) {
 
 bool clang::isOpenMPSimdDirective(OpenMPDirectiveKind DKind) {
   return DKind == OMPD_simd || DKind == OMPD_for_simd ||
-         DKind == OMPD_parallel_for_simd ||
-         DKind == OMPD_taskloop_simd; // TODO add next directives.
+         DKind == OMPD_parallel_for_simd || DKind == OMPD_taskloop_simd ||
+         DKind == OMPD_distribute_parallel_for_simd ||
+         DKind == OMPD_distribute_simd;
+  // TODO add next directives.
 }
 
 bool clang::isOpenMPDistributeDirective(OpenMPDirectiveKind Kind) {
-  return Kind == OMPD_distribute; // TODO add next directives.
+  return Kind == OMPD_distribute || Kind == OMPD_distribute_parallel_for ||
+         Kind == OMPD_distribute_parallel_for_simd ||
+         Kind == OMPD_distribute_simd;
+  // TODO add next directives.
 }
 
 bool clang::isOpenMPPrivate(OpenMPClauseKind Kind) {
@@ -536,3 +706,12 @@ bool clang::isOpenMPThreadPrivate(OpenMPClauseKind Kind) {
   return Kind == OMPC_threadprivate || Kind == OMPC_copyin;
 }
 
+bool clang::isOpenMPTaskingDirective(OpenMPDirectiveKind Kind) {
+  return Kind == OMPD_task || isOpenMPTaskLoopDirective(Kind);
+}
+
+bool clang::isOpenMPLoopBoundSharingDirective(OpenMPDirectiveKind Kind) {
+  return Kind == OMPD_distribute_parallel_for ||
+         Kind == OMPD_distribute_parallel_for_simd ||
+         Kind == OMPD_distribute_simd;
+}
diff --git a/contrib/llvm/tools/clang/lib/Basic/OperatorPrecedence.cpp b/contrib/llvm/tools/clang/lib/Basic/OperatorPrecedence.cpp
index ade8d6d841df..384d23c38af5 100644
--- a/contrib/llvm/tools/clang/lib/Basic/OperatorPrecedence.cpp
+++ b/contrib/llvm/tools/clang/lib/Basic/OperatorPrecedence.cpp
@@ -53,6 +53,7 @@ prec::Level getBinOpPrecedence(tok::TokenKind Kind, bool GreaterThanIsOperator,
   case tok::pipeequal:            return prec::Assignment;
   case tok::question:             return prec::Conditional;
   case tok::pipepipe:             return prec::LogicalOr;
+  case tok::caretcaret:
   case tok::ampamp:               return prec::LogicalAnd;
   case tok::pipe:                 return prec::InclusiveOr;
   case tok::caret:                return prec::ExclusiveOr;
diff --git a/contrib/llvm/tools/clang/lib/Basic/SourceManager.cpp b/contrib/llvm/tools/clang/lib/Basic/SourceManager.cpp
index 4c501616a3e8..1e83b63cf82e 100644
--- a/contrib/llvm/tools/clang/lib/Basic/SourceManager.cpp
+++ b/contrib/llvm/tools/clang/lib/Basic/SourceManager.cpp
@@ -1160,7 +1160,8 @@ unsigned SourceManager::getColumnNumber(FileID FID, unsigned FilePos,
 
 // isInvalid - Return the result of calling loc.isInvalid(), and
 // if Invalid is not null, set its value to same.
-static bool isInvalid(SourceLocation Loc, bool *Invalid) {
+template<typename LocType>
+static bool isInvalid(LocType Loc, bool *Invalid) {
   bool MyInvalid = Loc.isInvalid();
   if (Invalid)
     *Invalid = MyInvalid;
@@ -1183,8 +1184,9 @@ unsigned SourceManager::getExpansionColumnNumber(SourceLocation Loc,
 
 unsigned SourceManager::getPresumedColumnNumber(SourceLocation Loc,
                                                 bool *Invalid) const {
-  if (isInvalid(Loc, Invalid)) return 0;
-  return getPresumedLoc(Loc).getColumn();
+  PresumedLoc PLoc = getPresumedLoc(Loc);
+  if (isInvalid(PLoc, Invalid)) return 0;
+  return PLoc.getColumn();
 }
 
 #ifdef __SSE2__
@@ -1258,15 +1260,19 @@ FoundSpecialChar:
 
     if (Buf[0] == '\n' || Buf[0] == '\r') {
       // If this is \n\r or \r\n, skip both characters.
-      if ((Buf[1] == '\n' || Buf[1] == '\r') && Buf[0] != Buf[1])
-        ++Offs, ++Buf;
-      ++Offs, ++Buf;
+      if ((Buf[1] == '\n' || Buf[1] == '\r') && Buf[0] != Buf[1]) {
+        ++Offs;
+        ++Buf;
+      }
+      ++Offs;
+      ++Buf;
       LineOffsets.push_back(Offs);
     } else {
       // Otherwise, this is a null.  If end of file, exit.
       if (Buf == End) break;
       // Otherwise, skip the null.
-      ++Offs, ++Buf;
+      ++Offs;
+      ++Buf;
     }
   }
 
@@ -1388,8 +1394,9 @@ unsigned SourceManager::getExpansionLineNumber(SourceLocation Loc,
 }
 unsigned SourceManager::getPresumedLineNumber(SourceLocation Loc,
                                               bool *Invalid) const {
-  if (isInvalid(Loc, Invalid)) return 0;
-  return getPresumedLoc(Loc).getLine();
+  PresumedLoc PLoc = getPresumedLoc(Loc);
+  if (isInvalid(PLoc, Invalid)) return 0;
+  return PLoc.getLine();
 }
 
 /// getFileCharacteristic - return the file characteristic of the specified
@@ -2089,10 +2096,10 @@ bool SourceManager::isBeforeInTranslationUnit(SourceLocation LHS,
 
   // Clear the lookup cache, it depends on a common location.
   IsBeforeInTUCache.clear();
-  llvm::MemoryBuffer *LBuf = getBuffer(LOffs.first);
-  llvm::MemoryBuffer *RBuf = getBuffer(ROffs.first);
-  bool LIsBuiltins = strcmp("<built-in>", LBuf->getBufferIdentifier()) == 0;
-  bool RIsBuiltins = strcmp("<built-in>", RBuf->getBufferIdentifier()) == 0;
+  const char *LB = getBuffer(LOffs.first)->getBufferIdentifier();
+  const char *RB = getBuffer(ROffs.first)->getBufferIdentifier();
+  bool LIsBuiltins = strcmp("<built-in>", LB) == 0;
+  bool RIsBuiltins = strcmp("<built-in>", RB) == 0;
   // Sort built-in before non-built-in.
   if (LIsBuiltins || RIsBuiltins) {
     if (LIsBuiltins != RIsBuiltins)
@@ -2101,8 +2108,8 @@ bool SourceManager::isBeforeInTranslationUnit(SourceLocation LHS,
     // lower IDs come first.
     return LOffs.first < ROffs.first;
   }
-  bool LIsAsm = strcmp("<inline asm>", LBuf->getBufferIdentifier()) == 0;
-  bool RIsAsm = strcmp("<inline asm>", RBuf->getBufferIdentifier()) == 0;
+  bool LIsAsm = strcmp("<inline asm>", LB) == 0;
+  bool RIsAsm = strcmp("<inline asm>", RB) == 0;
   // Sort assembler after built-ins, but before the rest.
   if (LIsAsm || RIsAsm) {
     if (LIsAsm != RIsAsm)
@@ -2110,6 +2117,14 @@ bool SourceManager::isBeforeInTranslationUnit(SourceLocation LHS,
     assert(LOffs.first == ROffs.first);
     return false;
   }
+  bool LIsScratch = strcmp("<scratch space>", LB) == 0;
+  bool RIsScratch = strcmp("<scratch space>", RB) == 0;
+  // Sort scratch after inline asm, but before the rest.
+  if (LIsScratch || RIsScratch) {
+    if (LIsScratch != RIsScratch)
+      return LIsScratch;
+    return LOffs.second < ROffs.second;
+  }
   llvm_unreachable("Unsortable locations found");
 }
 
diff --git a/contrib/llvm/tools/clang/lib/Basic/TargetInfo.cpp b/contrib/llvm/tools/clang/lib/Basic/TargetInfo.cpp
index 1648a27d8b37..92f658a6a37f 100644
--- a/contrib/llvm/tools/clang/lib/Basic/TargetInfo.cpp
+++ b/contrib/llvm/tools/clang/lib/Basic/TargetInfo.cpp
@@ -30,6 +30,7 @@ TargetInfo::TargetInfo(const llvm::Triple &T) : TargetOpts(), Triple(T) {
   BigEndian = true;
   TLSSupported = true;
   NoAsmVariants = false;
+  HasFloat128 = false;
   PointerWidth = PointerAlign = 32;
   BoolWidth = BoolAlign = 8;
   IntWidth = IntAlign = 32;
@@ -46,6 +47,7 @@ TargetInfo::TargetInfo(const llvm::Triple &T) : TargetOpts(), Triple(T) {
   DoubleAlign = 64;
   LongDoubleWidth = 64;
   LongDoubleAlign = 64;
+  Float128Align = 128;
   LargeArrayMinWidth = 0;
   LargeArrayAlign = 0;
   MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 0;
@@ -66,13 +68,13 @@ TargetInfo::TargetInfo(const llvm::Triple &T) : TargetOpts(), Triple(T) {
   UseSignedCharForObjCBool = true;
   UseBitFieldTypeAlignment = true;
   UseZeroLengthBitfieldAlignment = false;
+  UseExplicitBitFieldAlignment = true;
   ZeroLengthBitfieldBoundary = 0;
   HalfFormat = &llvm::APFloat::IEEEhalf;
   FloatFormat = &llvm::APFloat::IEEEsingle;
   DoubleFormat = &llvm::APFloat::IEEEdouble;
   LongDoubleFormat = &llvm::APFloat::IEEEdouble;
-  DataLayoutString = nullptr;
-  UserLabelPrefix = "_";
+  Float128Format = &llvm::APFloat::IEEEquad;
   MCountName = "mcount";
   RegParmMax = 0;
   SSERegParmMax = 0;
@@ -224,6 +226,8 @@ TargetInfo::RealType TargetInfo::getRealTypeByWidth(unsigned BitWidth) const {
     if (&getLongDoubleFormat() == &llvm::APFloat::PPCDoubleDouble ||
         &getLongDoubleFormat() == &llvm::APFloat::IEEEquad)
       return LongDouble;
+    if (hasFloat128Type())
+      return Float128;
     break;
   }
 
@@ -276,6 +280,10 @@ void TargetInfo::adjust(const LangOptions &Opts) {
     UseBitFieldTypeAlignment = false;
   if (Opts.ShortWChar)
     WCharType = UnsignedShort;
+  if (Opts.AlignDouble) {
+    DoubleAlign = LongLongAlign = 64;
+    LongDoubleAlign = 64;
+  }
 
   if (Opts.OpenCL) {
     // OpenCL C requires specific widths for types, irrespective of
diff --git a/contrib/llvm/tools/clang/lib/Basic/Targets.cpp b/contrib/llvm/tools/clang/lib/Basic/Targets.cpp
index 7fce33af1809..be5d4ad8feda 100644
--- a/contrib/llvm/tools/clang/lib/Basic/Targets.cpp
+++ b/contrib/llvm/tools/clang/lib/Basic/Targets.cpp
@@ -12,12 +12,13 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "clang/Basic/TargetInfo.h"
 #include "clang/Basic/Builtins.h"
+#include "clang/Basic/Cuda.h"
 #include "clang/Basic/Diagnostic.h"
 #include "clang/Basic/LangOptions.h"
 #include "clang/Basic/MacroBuilder.h"
 #include "clang/Basic/TargetBuiltins.h"
+#include "clang/Basic/TargetInfo.h"
 #include "clang/Basic/TargetOptions.h"
 #include "clang/Basic/Version.h"
 #include "llvm/ADT/APFloat.h"
@@ -65,6 +66,9 @@ static void defineCPUMacros(MacroBuilder &Builder, StringRef CPUName,
     Builder.defineMacro("__tune_" + CPUName + "__");
 }
 
+static TargetInfo *AllocateTarget(const llvm::Triple &Triple,
+                                  const TargetOptions &Opts);
+
 //===----------------------------------------------------------------------===//
 // Defines specific to certain operating systems.
 //===----------------------------------------------------------------------===//
@@ -76,7 +80,8 @@ protected:
   virtual void getOSDefines(const LangOptions &Opts, const llvm::Triple &Triple,
                             MacroBuilder &Builder) const=0;
 public:
-  OSTargetInfo(const llvm::Triple &Triple) : TgtInfo(Triple) {}
+  OSTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts)
+      : TgtInfo(Triple, Opts) {}
   void getTargetDefines(const LangOptions &Opts,
                         MacroBuilder &Builder) const override {
     TgtInfo::getTargetDefines(Opts, Builder);
@@ -101,10 +106,8 @@ protected:
   }
 
 public:
-  CloudABITargetInfo(const llvm::Triple &Triple)
-      : OSTargetInfo<Target>(Triple) {
-    this->UserLabelPrefix = "";
-  }
+  CloudABITargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts)
+      : OSTargetInfo<Target>(Triple, Opts) {}
 };
 
 static void getDarwinDefines(MacroBuilder &Builder, const LangOptions &Opts,
@@ -139,7 +142,7 @@ static void getDarwinDefines(MacroBuilder &Builder, const LangOptions &Opts,
   unsigned Maj, Min, Rev;
   if (Triple.isMacOSX()) {
     Triple.getMacOSXVersion(Maj, Min, Rev);
-    PlatformName = "macosx";
+    PlatformName = "macos";
   } else {
     Triple.getOSVersion(Maj, Min, Rev);
     PlatformName = llvm::Triple::getOSTypeName(Triple.getOS());
@@ -155,14 +158,25 @@ static void getDarwinDefines(MacroBuilder &Builder, const LangOptions &Opts,
 
   // Set the appropriate OS version define.
   if (Triple.isiOS()) {
-    assert(Maj < 10 && Min < 100 && Rev < 100 && "Invalid version!");
-    char Str[6];
-    Str[0] = '0' + Maj;
-    Str[1] = '0' + (Min / 10);
-    Str[2] = '0' + (Min % 10);
-    Str[3] = '0' + (Rev / 10);
-    Str[4] = '0' + (Rev % 10);
-    Str[5] = '\0';
+    assert(Maj < 100 && Min < 100 && Rev < 100 && "Invalid version!");
+    char Str[7];
+    if (Maj < 10) {
+      Str[0] = '0' + Maj;
+      Str[1] = '0' + (Min / 10);
+      Str[2] = '0' + (Min % 10);
+      Str[3] = '0' + (Rev / 10);
+      Str[4] = '0' + (Rev % 10);
+      Str[5] = '\0';
+    } else {
+      // Handle versions >= 10.
+      Str[0] = '0' + (Maj / 10);
+      Str[1] = '0' + (Maj % 10);
+      Str[2] = '0' + (Min / 10);
+      Str[3] = '0' + (Min % 10);
+      Str[4] = '0' + (Rev / 10);
+      Str[5] = '0' + (Rev % 10);
+      Str[6] = '\0';
+    }
     if (Triple.isTvOS())
       Builder.defineMacro("__ENVIRONMENT_TV_OS_VERSION_MIN_REQUIRED__", Str);
     else
@@ -209,6 +223,10 @@ static void getDarwinDefines(MacroBuilder &Builder, const LangOptions &Opts,
   if (Triple.isOSDarwin())
     Builder.defineMacro("__MACH__");
 
+  // The Watch ABI uses Dwarf EH.
+  if(Triple.isWatchABI())
+    Builder.defineMacro("__ARM_DWARF_EH__");
+
   PlatformMinVersion = VersionTuple(Maj, Min, Rev);
 }
 
@@ -222,7 +240,8 @@ protected:
   }
 
 public:
-  DarwinTargetInfo(const llvm::Triple &Triple) : OSTargetInfo<Target>(Triple) {
+  DarwinTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts)
+      : OSTargetInfo<Target>(Triple, Opts) {
     // By default, no TLS, and we whitelist permitted architecture/OS
     // combinations.
     this->TLSSupported = false;
@@ -264,6 +283,13 @@ public:
   bool hasProtectedVisibility() const override {
     return false;
   }
+
+  unsigned getExnObjectAlignment() const override {
+    // The alignment of an exception object is 8-bytes for darwin since
+    // libc++abi doesn't declare _Unwind_Exception with __attribute__((aligned))
+    // and therefore doesn't guarantee 16-byte alignment.
+    return  64;
+  }
 };
 
 
@@ -282,10 +308,8 @@ protected:
     DefineStd(Builder, "unix", Opts);
   }
 public:
-  DragonFlyBSDTargetInfo(const llvm::Triple &Triple)
-      : OSTargetInfo<Target>(Triple) {
-    this->UserLabelPrefix = "";
-
+  DragonFlyBSDTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts)
+      : OSTargetInfo<Target>(Triple, Opts) {
     switch (Triple.getArch()) {
     default:
     case llvm::Triple::x86:
@@ -333,9 +357,8 @@ protected:
     Builder.defineMacro("__STDC_MB_MIGHT_NEQ_WC__", "1");
   }
 public:
-  FreeBSDTargetInfo(const llvm::Triple &Triple) : OSTargetInfo<Target>(Triple) {
-    this->UserLabelPrefix = "";
-
+  FreeBSDTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts)
+      : OSTargetInfo<Target>(Triple, Opts) {
     switch (Triple.getArch()) {
     default:
     case llvm::Triple::x86:
@@ -374,9 +397,30 @@ protected:
       Builder.defineMacro("_GNU_SOURCE");
   }
 public:
-  KFreeBSDTargetInfo(const llvm::Triple &Triple)
-      : OSTargetInfo<Target>(Triple) {
-    this->UserLabelPrefix = "";
+  KFreeBSDTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts)
+      : OSTargetInfo<Target>(Triple, Opts) {}
+};
+
+// Haiku Target
+template<typename Target>
+class HaikuTargetInfo : public OSTargetInfo<Target> {
+protected:
+  void getOSDefines(const LangOptions &Opts, const llvm::Triple &Triple,
+                    MacroBuilder &Builder) const override {
+    // Haiku defines; list based off of gcc output
+    Builder.defineMacro("__HAIKU__");
+    Builder.defineMacro("__ELF__");
+    DefineStd(Builder, "unix", Opts);
+  }
+public:
+  HaikuTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts)
+      : OSTargetInfo<Target>(Triple, Opts) {
+    this->SizeType = TargetInfo::UnsignedLong;
+    this->IntPtrType = TargetInfo::SignedLong;
+    this->PtrDiffType = TargetInfo::SignedLong;
+    this->ProcessIDType = TargetInfo::SignedLong;
+    this->TLSSupported = false;
+
   }
 };
 
@@ -399,9 +443,8 @@ protected:
     DefineStd(Builder, "unix", Opts);
   }
 public:
-  MinixTargetInfo(const llvm::Triple &Triple) : OSTargetInfo<Target>(Triple) {
-    this->UserLabelPrefix = "";
-  }
+  MinixTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts)
+      : OSTargetInfo<Target>(Triple, Opts) {}
 };
 
 // Linux target
@@ -426,10 +469,12 @@ protected:
       Builder.defineMacro("_REENTRANT");
     if (Opts.CPlusPlus)
       Builder.defineMacro("_GNU_SOURCE");
+    if (this->HasFloat128)
+      Builder.defineMacro("__FLOAT128__");
   }
 public:
-  LinuxTargetInfo(const llvm::Triple &Triple) : OSTargetInfo<Target>(Triple) {
-    this->UserLabelPrefix = "";
+  LinuxTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts)
+      : OSTargetInfo<Target>(Triple, Opts) {
     this->WIntType = TargetInfo::UnsignedInt;
 
     switch (Triple.getArch()) {
@@ -440,6 +485,11 @@ public:
     case llvm::Triple::ppc64le:
       this->MCountName = "_mcount";
       break;
+    case llvm::Triple::x86:
+    case llvm::Triple::x86_64:
+    case llvm::Triple::systemz:
+      this->HasFloat128 = true;
+      break;
     }
   }
 
@@ -473,8 +523,8 @@ protected:
     }
   }
 public:
-  NetBSDTargetInfo(const llvm::Triple &Triple) : OSTargetInfo<Target>(Triple) {
-    this->UserLabelPrefix = "";
+  NetBSDTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts)
+      : OSTargetInfo<Target>(Triple, Opts) {
     this->MCountName = "_mcount";
   }
 };
@@ -494,8 +544,8 @@ protected:
       Builder.defineMacro("_REENTRANT");
   }
 public:
-  OpenBSDTargetInfo(const llvm::Triple &Triple) : OSTargetInfo<Target>(Triple) {
-    this->UserLabelPrefix = "";
+  OpenBSDTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts)
+      : OSTargetInfo<Target>(Triple, Opts) {
     this->TLSSupported = false;
 
       switch (Triple.getArch()) {
@@ -542,8 +592,8 @@ protected:
     }
   }
 public:
-  BitrigTargetInfo(const llvm::Triple &Triple) : OSTargetInfo<Target>(Triple) {
-    this->UserLabelPrefix = "";
+  BitrigTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts)
+      : OSTargetInfo<Target>(Triple, Opts) {
     this->MCountName = "__mcount";
   }
 };
@@ -561,9 +611,7 @@ protected:
     Builder.defineMacro("__ELF__");
   }
 public:
-  PSPTargetInfo(const llvm::Triple &Triple) : OSTargetInfo<Target>(Triple) {
-    this->UserLabelPrefix = "";
-  }
+  PSPTargetInfo(const llvm::Triple &Triple) : OSTargetInfo<Target>(Triple) {}
 };
 
 // PS3 PPU Target
@@ -582,14 +630,14 @@ protected:
     Builder.defineMacro("__powerpc64__");
   }
 public:
-  PS3PPUTargetInfo(const llvm::Triple &Triple) : OSTargetInfo<Target>(Triple) {
-    this->UserLabelPrefix = "";
+  PS3PPUTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts)
+      : OSTargetInfo<Target>(Triple, Opts) {
     this->LongWidth = this->LongAlign = 32;
     this->PointerWidth = this->PointerAlign = 32;
     this->IntMaxType = TargetInfo::SignedLongLong;
     this->Int64Type = TargetInfo::SignedLongLong;
     this->SizeType = TargetInfo::UnsignedInt;
-    this->DataLayoutString = "E-m:e-p:32:32-i64:64-n32:64";
+    this->resetDataLayout("E-m:e-p:32:32-i64:64-n32:64");
   }
 };
 
@@ -603,15 +651,19 @@ protected:
     Builder.defineMacro("__KPRINTF_ATTRIBUTE__");
     DefineStd(Builder, "unix", Opts);
     Builder.defineMacro("__ELF__");
-    Builder.defineMacro("__PS4__");
+    Builder.defineMacro("__ORBIS__");
   }
 public:
-  PS4OSTargetInfo(const llvm::Triple &Triple) : OSTargetInfo<Target>(Triple) {
+  PS4OSTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts)
+      : OSTargetInfo<Target>(Triple, Opts) {
     this->WCharType = this->UnsignedShort;
 
     // On PS4, TLS variable cannot be aligned to more than 32 bytes (256 bits).
     this->MaxTLSAlign = 256;
-    this->UserLabelPrefix = "";
+
+    // On PS4, do not honor explicit bit field alignment,
+    // as in "__attribute__((aligned(2))) int b : 1;".
+    this->UseExplicitBitFieldAlignment = false;
 
     switch (Triple.getArch()) {
     default:
@@ -649,8 +701,8 @@ protected:
     Builder.defineMacro("_REENTRANT");
   }
 public:
-  SolarisTargetInfo(const llvm::Triple &Triple) : OSTargetInfo<Target>(Triple) {
-    this->UserLabelPrefix = "";
+  SolarisTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts)
+      : OSTargetInfo<Target>(Triple, Opts) {
     this->WCharType = this->SignedInt;
     // FIXME: WIntType should be SignedLong
   }
@@ -694,6 +746,13 @@ protected:
 
       if (Opts.CPlusPlus11 && Opts.isCompatibleWithMSVC(LangOptions::MSVC2015))
         Builder.defineMacro("_HAS_CHAR16_T_LANGUAGE_SUPPORT", Twine(1));
+
+      if (Opts.isCompatibleWithMSVC(LangOptions::MSVC2015)) {
+        if (Opts.CPlusPlus1z)
+          Builder.defineMacro("_MSVC_LANG", "201403L");
+        else if (Opts.CPlusPlus14)
+          Builder.defineMacro("_MSVC_LANG", "201402L");
+      }
     }
 
     if (Opts.MicrosoftExt) {
@@ -710,8 +769,8 @@ protected:
   }
 
 public:
-  WindowsTargetInfo(const llvm::Triple &Triple)
-      : OSTargetInfo<Target>(Triple) {}
+  WindowsTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts)
+      : OSTargetInfo<Target>(Triple, Opts) {}
 };
 
 template <typename Target>
@@ -730,8 +789,8 @@ protected:
   }
 
 public:
-  NaClTargetInfo(const llvm::Triple &Triple) : OSTargetInfo<Target>(Triple) {
-    this->UserLabelPrefix = "";
+  NaClTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts)
+      : OSTargetInfo<Target>(Triple, Opts) {
     this->LongAlign = 32;
     this->LongWidth = 32;
     this->PointerAlign = 32;
@@ -751,14 +810,14 @@ public:
     if (Triple.getArch() == llvm::Triple::arm) {
       // Handled in ARM's setABI().
     } else if (Triple.getArch() == llvm::Triple::x86) {
-      this->DataLayoutString = "e-m:e-p:32:32-i64:64-n8:16:32-S128";
+      this->resetDataLayout("e-m:e-p:32:32-i64:64-n8:16:32-S128");
     } else if (Triple.getArch() == llvm::Triple::x86_64) {
-      this->DataLayoutString = "e-m:e-p:32:32-i64:64-n8:16:32:64-S128";
+      this->resetDataLayout("e-m:e-p:32:32-i64:64-n8:16:32:64-S128");
     } else if (Triple.getArch() == llvm::Triple::mipsel) {
-      // Handled on mips' setDataLayoutString.
+      // Handled on mips' setDataLayout.
     } else {
       assert(Triple.getArch() == llvm::Triple::le32);
-      this->DataLayoutString = "e-p:32:32-i64:64";
+      this->resetDataLayout("e-p:32:32-i64:64");
     }
   }
 };
@@ -782,10 +841,10 @@ class WebAssemblyOSTargetInfo : public OSTargetInfo<Target> {
   }
 
 public:
-  explicit WebAssemblyOSTargetInfo(const llvm::Triple &Triple)
-      : OSTargetInfo<Target>(Triple) {
+  explicit WebAssemblyOSTargetInfo(const llvm::Triple &Triple,
+                                   const TargetOptions &Opts)
+      : OSTargetInfo<Target>(Triple, Opts) {
     this->MCountName = "__mcount";
-    this->UserLabelPrefix = "";
     this->TheCXXABI.set(TargetCXXABI::WebAssembly);
   }
 };
@@ -815,7 +874,7 @@ protected:
   std::string ABI;
 
 public:
-  PPCTargetInfo(const llvm::Triple &Triple)
+  PPCTargetInfo(const llvm::Triple &Triple, const TargetOptions &)
     : TargetInfo(Triple), HasVSX(false), HasP8Vector(false),
       HasP8Crypto(false), HasDirectMove(false), HasQPX(false), HasHTM(false),
       HasBPERMD(false), HasExtDiv(false) {
@@ -841,8 +900,9 @@ public:
     ArchDefinePwr6x = 1 << 10,
     ArchDefinePwr7  = 1 << 11,
     ArchDefinePwr8  = 1 << 12,
-    ArchDefineA2    = 1 << 13,
-    ArchDefineA2q   = 1 << 14
+    ArchDefinePwr9  = 1 << 13,
+    ArchDefineA2    = 1 << 14,
+    ArchDefineA2q   = 1 << 15
   } ArchDefineTypes;
 
   // Note: GCC recognizes the following additional cpus:
@@ -891,6 +951,8 @@ public:
       .Case("pwr7", true)
       .Case("power8", true)
       .Case("pwr8", true)
+      .Case("power9", true)
+      .Case("pwr9", true)
       .Case("powerpc", true)
       .Case("ppc", true)
       .Case("powerpc64", true)
@@ -1093,6 +1155,8 @@ bool PPCTargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
       HasQPX = true;
     } else if (Feature == "+htm") {
       HasHTM = true;
+    } else if (Feature == "+float128") {
+      HasFloat128 = true;
     }
     // TODO: Finish this list and add an assert that we've handled them
     // all.
@@ -1183,6 +1247,10 @@ void PPCTargetInfo::getTargetDefines(const LangOptions &Opts,
     .Case("pwr8",  ArchDefineName | ArchDefinePwr7 | ArchDefinePwr6x
                      | ArchDefinePwr6 | ArchDefinePwr5x | ArchDefinePwr5
                      | ArchDefinePwr4 | ArchDefinePpcgr | ArchDefinePpcsq)
+    .Case("pwr9",  ArchDefineName | ArchDefinePwr8 | ArchDefinePwr7
+                     | ArchDefinePwr6x | ArchDefinePwr6 | ArchDefinePwr5x
+                     | ArchDefinePwr5 | ArchDefinePwr4 | ArchDefinePpcgr
+                     | ArchDefinePpcsq)
     .Case("power3",  ArchDefinePpcgr)
     .Case("power4",  ArchDefinePwr4 | ArchDefinePpcgr | ArchDefinePpcsq)
     .Case("power5",  ArchDefinePwr5 | ArchDefinePwr4 | ArchDefinePpcgr
@@ -1200,6 +1268,10 @@ void PPCTargetInfo::getTargetDefines(const LangOptions &Opts,
     .Case("power8",  ArchDefinePwr8 | ArchDefinePwr7 | ArchDefinePwr6x
                        | ArchDefinePwr6 | ArchDefinePwr5x | ArchDefinePwr5
                        | ArchDefinePwr4 | ArchDefinePpcgr | ArchDefinePpcsq)
+    .Case("power9",  ArchDefinePwr9 | ArchDefinePwr8 | ArchDefinePwr7
+                       | ArchDefinePwr6x | ArchDefinePwr6 | ArchDefinePwr5x
+                       | ArchDefinePwr5 | ArchDefinePwr4 | ArchDefinePpcgr
+                       | ArchDefinePpcsq)
     .Default(ArchDefineNone);
 
   if (defs & ArchDefineName)
@@ -1228,6 +1300,8 @@ void PPCTargetInfo::getTargetDefines(const LangOptions &Opts,
     Builder.defineMacro("_ARCH_PWR7");
   if (defs & ArchDefinePwr8)
     Builder.defineMacro("_ARCH_PWR8");
+  if (defs & ArchDefinePwr9)
+    Builder.defineMacro("_ARCH_PWR9");
   if (defs & ArchDefineA2)
     Builder.defineMacro("_ARCH_A2");
   if (defs & ArchDefineA2q) {
@@ -1250,6 +1324,8 @@ void PPCTargetInfo::getTargetDefines(const LangOptions &Opts,
     Builder.defineMacro("__CRYPTO__");
   if (HasHTM)
     Builder.defineMacro("__HTM__");
+  if (HasFloat128)
+    Builder.defineMacro("__FLOAT128__");
 
   Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_1");
   Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_2");
@@ -1300,6 +1376,13 @@ static bool ppcUserFeaturesCheck(DiagnosticsEngine &Diags,
                                                      << "-mno-vsx";
       return false;
     }
+
+    if (std::find(FeaturesVec.begin(), FeaturesVec.end(), "+float128") !=
+        FeaturesVec.end()) {
+      Diags.Report(diag::err_opt_not_valid_with_opt) << "-mfloat128"
+                                                     << "-mno-vsx";
+      return false;
+    }
   }
 
   return true;
@@ -1318,6 +1401,7 @@ bool PPCTargetInfo::initFeatureMap(
     .Case("pwr6", true)
     .Case("pwr7", true)
     .Case("pwr8", true)
+    .Case("pwr9", true)
     .Case("ppc64", true)
     .Case("ppc64le", true)
     .Default(false);
@@ -1325,28 +1409,34 @@ bool PPCTargetInfo::initFeatureMap(
   Features["qpx"] = (CPU == "a2q");
   Features["crypto"] = llvm::StringSwitch<bool>(CPU)
     .Case("ppc64le", true)
+    .Case("pwr9", true)
     .Case("pwr8", true)
     .Default(false);
   Features["power8-vector"] = llvm::StringSwitch<bool>(CPU)
     .Case("ppc64le", true)
+    .Case("pwr9", true)
     .Case("pwr8", true)
     .Default(false);
   Features["bpermd"] = llvm::StringSwitch<bool>(CPU)
     .Case("ppc64le", true)
+    .Case("pwr9", true)
     .Case("pwr8", true)
     .Case("pwr7", true)
     .Default(false);
   Features["extdiv"] = llvm::StringSwitch<bool>(CPU)
     .Case("ppc64le", true)
+    .Case("pwr9", true)
     .Case("pwr8", true)
     .Case("pwr7", true)
     .Default(false);
   Features["direct-move"] = llvm::StringSwitch<bool>(CPU)
     .Case("ppc64le", true)
+    .Case("pwr9", true)
     .Case("pwr8", true)
     .Default(false);
   Features["vsx"] = llvm::StringSwitch<bool>(CPU)
     .Case("ppc64le", true)
+    .Case("pwr9", true)
     .Case("pwr8", true)
     .Case("pwr7", true)
     .Default(false);
@@ -1368,6 +1458,7 @@ bool PPCTargetInfo::hasFeature(StringRef Feature) const {
     .Case("htm", HasHTM)
     .Case("bpermd", HasBPERMD)
     .Case("extdiv", HasExtDiv)
+    .Case("float128", HasFloat128)
     .Default(false);
 }
 
@@ -1377,19 +1468,19 @@ void PPCTargetInfo::setFeatureEnabled(llvm::StringMap<bool> &Features,
   // as well. Do the inverse if we're disabling vsx. We'll diagnose any user
   // incompatible options.
   if (Enabled) {
-    if (Name == "vsx") {
-     Features[Name] = true;
-    } else if (Name == "direct-move") {
+    if (Name == "direct-move") {
       Features[Name] = Features["vsx"] = true;
     } else if (Name == "power8-vector") {
       Features[Name] = Features["vsx"] = true;
+    } else if (Name == "float128") {
+      Features[Name] = Features["vsx"] = true;
     } else {
       Features[Name] = true;
     }
   } else {
     if (Name == "vsx") {
       Features[Name] = Features["direct-move"] = Features["power8-vector"] =
-          false;
+          Features["float128"] = false;
     } else {
       Features[Name] = false;
     }
@@ -1497,8 +1588,9 @@ ArrayRef<TargetInfo::GCCRegAlias> PPCTargetInfo::getGCCRegAliases() const {
 
 class PPC32TargetInfo : public PPCTargetInfo {
 public:
-  PPC32TargetInfo(const llvm::Triple &Triple) : PPCTargetInfo(Triple) {
-    DataLayoutString = "E-m:e-p:32:32-i64:64-n32";
+  PPC32TargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts)
+      : PPCTargetInfo(Triple, Opts) {
+    resetDataLayout("E-m:e-p:32:32-i64:64-n32");
 
     switch (getTriple().getOS()) {
     case llvm::Triple::Linux:
@@ -1531,16 +1623,17 @@ public:
 // TargetInfo for little endian.
 class PPC64TargetInfo : public PPCTargetInfo {
 public:
-  PPC64TargetInfo(const llvm::Triple &Triple) : PPCTargetInfo(Triple) {
+  PPC64TargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts)
+      : PPCTargetInfo(Triple, Opts) {
     LongWidth = LongAlign = PointerWidth = PointerAlign = 64;
     IntMaxType = SignedLong;
     Int64Type = SignedLong;
 
     if ((Triple.getArch() == llvm::Triple::ppc64le)) {
-      DataLayoutString = "e-m:e-i64:64-n32:64";
+      resetDataLayout("e-m:e-i64:64-n32:64");
       ABI = "elfv2";
     } else {
-      DataLayoutString = "E-m:e-i64:64-n32:64";
+      resetDataLayout("E-m:e-i64:64-n32:64");
       ABI = "elfv1";
     }
 
@@ -1573,31 +1666,29 @@ public:
   }
 };
 
-class DarwinPPC32TargetInfo :
-  public DarwinTargetInfo<PPC32TargetInfo> {
+class DarwinPPC32TargetInfo : public DarwinTargetInfo<PPC32TargetInfo> {
 public:
-  DarwinPPC32TargetInfo(const llvm::Triple &Triple)
-      : DarwinTargetInfo<PPC32TargetInfo>(Triple) {
+  DarwinPPC32TargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts)
+      : DarwinTargetInfo<PPC32TargetInfo>(Triple, Opts) {
     HasAlignMac68kSupport = true;
     BoolWidth = BoolAlign = 32; //XXX support -mone-byte-bool?
     PtrDiffType = SignedInt; // for http://llvm.org/bugs/show_bug.cgi?id=15726
     LongLongAlign = 32;
     SuitableAlign = 128;
-    DataLayoutString = "E-m:o-p:32:32-f64:32:64-n32";
+    resetDataLayout("E-m:o-p:32:32-f64:32:64-n32");
   }
   BuiltinVaListKind getBuiltinVaListKind() const override {
     return TargetInfo::CharPtrBuiltinVaList;
   }
 };
 
-class DarwinPPC64TargetInfo :
-  public DarwinTargetInfo<PPC64TargetInfo> {
+class DarwinPPC64TargetInfo : public DarwinTargetInfo<PPC64TargetInfo> {
 public:
-  DarwinPPC64TargetInfo(const llvm::Triple &Triple)
-      : DarwinTargetInfo<PPC64TargetInfo>(Triple) {
+  DarwinPPC64TargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts)
+      : DarwinTargetInfo<PPC64TargetInfo>(Triple, Opts) {
     HasAlignMac68kSupport = true;
     SuitableAlign = 128;
-    DataLayoutString = "E-m:o-i64:64-n32:64";
+    resetDataLayout("E-m:o-i64:64-n32:64");
   }
 };
 
@@ -1615,19 +1706,11 @@ static const unsigned NVPTXAddrSpaceMap[] = {
 class NVPTXTargetInfo : public TargetInfo {
   static const char *const GCCRegNames[];
   static const Builtin::Info BuiltinInfo[];
-
-  // The GPU profiles supported by the NVPTX backend
-  enum GPUKind {
-    GK_NONE,
-    GK_SM20,
-    GK_SM21,
-    GK_SM30,
-    GK_SM35,
-    GK_SM37,
-  } GPU;
+  CudaArch GPU;
 
 public:
-  NVPTXTargetInfo(const llvm::Triple &Triple) : TargetInfo(Triple) {
+  NVPTXTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts)
+      : TargetInfo(Triple) {
     BigEndian = false;
     TLSSupported = false;
     LongWidth = LongAlign = 64;
@@ -1636,8 +1719,66 @@ public:
     // Define available target features
     // These must be defined in sorted order!
     NoAsmVariants = true;
-    // Set the default GPU to sm20
-    GPU = GK_SM20;
+    GPU = CudaArch::SM_20;
+
+    // If possible, get a TargetInfo for our host triple, so we can match its
+    // types.
+    llvm::Triple HostTriple(Opts.HostTriple);
+    if (HostTriple.isNVPTX())
+      return;
+    std::unique_ptr<TargetInfo> HostTarget(
+        AllocateTarget(llvm::Triple(Opts.HostTriple), Opts));
+    if (!HostTarget) {
+      return;
+    }
+
+    PointerWidth = HostTarget->getPointerWidth(/* AddrSpace = */ 0);
+    PointerAlign = HostTarget->getPointerAlign(/* AddrSpace = */ 0);
+    BoolWidth = HostTarget->getBoolWidth();
+    BoolAlign = HostTarget->getBoolAlign();
+    IntWidth = HostTarget->getIntWidth();
+    IntAlign = HostTarget->getIntAlign();
+    HalfWidth = HostTarget->getHalfWidth();
+    HalfAlign = HostTarget->getHalfAlign();
+    FloatWidth = HostTarget->getFloatWidth();
+    FloatAlign = HostTarget->getFloatAlign();
+    DoubleWidth = HostTarget->getDoubleWidth();
+    DoubleAlign = HostTarget->getDoubleAlign();
+    LongWidth = HostTarget->getLongWidth();
+    LongAlign = HostTarget->getLongAlign();
+    LongLongWidth = HostTarget->getLongLongWidth();
+    LongLongAlign = HostTarget->getLongLongAlign();
+    MinGlobalAlign = HostTarget->getMinGlobalAlign();
+    DefaultAlignForAttributeAligned =
+        HostTarget->getDefaultAlignForAttributeAligned();
+    SizeType = HostTarget->getSizeType();
+    IntMaxType = HostTarget->getIntMaxType();
+    PtrDiffType = HostTarget->getPtrDiffType(/* AddrSpace = */ 0);
+    IntPtrType = HostTarget->getIntPtrType();
+    WCharType = HostTarget->getWCharType();
+    WIntType = HostTarget->getWIntType();
+    Char16Type = HostTarget->getChar16Type();
+    Char32Type = HostTarget->getChar32Type();
+    Int64Type = HostTarget->getInt64Type();
+    SigAtomicType = HostTarget->getSigAtomicType();
+    ProcessIDType = HostTarget->getProcessIDType();
+
+    UseBitFieldTypeAlignment = HostTarget->useBitFieldTypeAlignment();
+    UseZeroLengthBitfieldAlignment =
+        HostTarget->useZeroLengthBitfieldAlignment();
+    UseExplicitBitFieldAlignment = HostTarget->useExplicitBitFieldAlignment();
+    ZeroLengthBitfieldBoundary = HostTarget->getZeroLengthBitfieldBoundary();
+
+    // Properties intentionally not copied from host:
+    // - LargeArrayMinWidth, LargeArrayAlign: Not visible across the
+    //   host/device boundary.
+    // - SuitableAlign: Not visible across the host/device boundary, and may
+    //   correctly be different on host/device, e.g. if host has wider vector
+    //   types than device.
+    // - LongDoubleWidth, LongDoubleAlign: nvptx's long double type is the same
+    //   as its double type, but that's not necessarily true on the host.
+    //   TODO: nvcc emits a warning when using long double on device; we should
+    //   do the same.
   }
   void getTargetDefines(const LangOptions &Opts,
                         MacroBuilder &Builder) const override {
@@ -1645,26 +1786,38 @@ public:
     Builder.defineMacro("__NVPTX__");
     if (Opts.CUDAIsDevice) {
       // Set __CUDA_ARCH__ for the GPU specified.
-      std::string CUDAArchCode;
-      switch (GPU) {
-      case GK_SM20:
-        CUDAArchCode = "200";
-        break;
-      case GK_SM21:
-        CUDAArchCode = "210";
-        break;
-      case GK_SM30:
-        CUDAArchCode = "300";
-        break;
-      case GK_SM35:
-        CUDAArchCode = "350";
-        break;
-      case GK_SM37:
-        CUDAArchCode = "370";
-        break;
-      default:
-        llvm_unreachable("Unhandled target CPU");
-      }
+      std::string CUDAArchCode = [this] {
+        switch (GPU) {
+        case CudaArch::UNKNOWN:
+          assert(false && "No GPU arch when compiling CUDA device code.");
+          return "";
+        case CudaArch::SM_20:
+          return "200";
+        case CudaArch::SM_21:
+          return "210";
+        case CudaArch::SM_30:
+          return "300";
+        case CudaArch::SM_32:
+          return "320";
+        case CudaArch::SM_35:
+          return "350";
+        case CudaArch::SM_37:
+          return "370";
+        case CudaArch::SM_50:
+          return "500";
+        case CudaArch::SM_52:
+          return "520";
+        case CudaArch::SM_53:
+          return "530";
+        case CudaArch::SM_60:
+          return "600";
+        case CudaArch::SM_61:
+          return "610";
+        case CudaArch::SM_62:
+          return "620";
+        }
+        llvm_unreachable("unhandled CudaArch");
+      }();
       Builder.defineMacro("__CUDA_ARCH__", CUDAArchCode);
     }
   }
@@ -1705,15 +1858,21 @@ public:
     return TargetInfo::CharPtrBuiltinVaList;
   }
   bool setCPU(const std::string &Name) override {
-    GPU = llvm::StringSwitch<GPUKind>(Name)
-              .Case("sm_20", GK_SM20)
-              .Case("sm_21", GK_SM21)
-              .Case("sm_30", GK_SM30)
-              .Case("sm_35", GK_SM35)
-              .Case("sm_37", GK_SM37)
-              .Default(GK_NONE);
-
-    return GPU != GK_NONE;
+    GPU = StringToCudaArch(Name);
+    return GPU != CudaArch::UNKNOWN;
+  }
+  void setSupportedOpenCLOpts() override {
+    auto &Opts = getSupportedOpenCLOpts();
+    Opts.cl_clang_storage_class_specifiers = 1;
+    Opts.cl_khr_gl_sharing = 1;
+    Opts.cl_khr_icd = 1;
+
+    Opts.cl_khr_fp64 = 1;
+    Opts.cl_khr_byte_addressable_store = 1;
+    Opts.cl_khr_global_int32_base_atomics = 1;
+    Opts.cl_khr_global_int32_extended_atomics = 1;
+    Opts.cl_khr_local_int32_base_atomics = 1;
+    Opts.cl_khr_local_int32_extended_atomics = 1;
   }
 };
 
@@ -1733,24 +1892,26 @@ ArrayRef<const char *> NVPTXTargetInfo::getGCCRegNames() const {
 
 class NVPTX32TargetInfo : public NVPTXTargetInfo {
 public:
-  NVPTX32TargetInfo(const llvm::Triple &Triple) : NVPTXTargetInfo(Triple) {
+  NVPTX32TargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts)
+      : NVPTXTargetInfo(Triple, Opts) {
     LongWidth = LongAlign = 32;
     PointerWidth = PointerAlign = 32;
     SizeType = TargetInfo::UnsignedInt;
     PtrDiffType = TargetInfo::SignedInt;
     IntPtrType = TargetInfo::SignedInt;
-    DataLayoutString = "e-p:32:32-i64:64-v16:16-v32:32-n16:32:64";
+    resetDataLayout("e-p:32:32-i64:64-v16:16-v32:32-n16:32:64");
   }
 };
 
 class NVPTX64TargetInfo : public NVPTXTargetInfo {
 public:
-  NVPTX64TargetInfo(const llvm::Triple &Triple) : NVPTXTargetInfo(Triple) {
+  NVPTX64TargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts)
+      : NVPTXTargetInfo(Triple, Opts) {
     PointerWidth = PointerAlign = 64;
     SizeType = TargetInfo::UnsignedLong;
     PtrDiffType = TargetInfo::SignedLong;
     IntPtrType = TargetInfo::SignedLong;
-    DataLayoutString = "e-i64:64-v16:16-v32:32-n16:32:64";
+    resetDataLayout("e-i64:64-v16:16-v32:32-n16:32:64");
   }
 };
 
@@ -1771,16 +1932,12 @@ static const char *const DataLayoutStringR600 =
   "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
   "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64";
 
-static const char *const DataLayoutStringR600DoubleOps =
-  "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
-  "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64";
-
 static const char *const DataLayoutStringSI =
-  "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24:64:64"
+  "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32"
   "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
   "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64";
 
-class AMDGPUTargetInfo : public TargetInfo {
+class AMDGPUTargetInfo final : public TargetInfo {
   static const Builtin::Info BuiltinInfo[];
   static const char * const GCCRegNames[];
 
@@ -1804,23 +1961,26 @@ class AMDGPUTargetInfo : public TargetInfo {
   bool hasFMAF:1;
   bool hasLDEXPF:1;
 
-public:
-  AMDGPUTargetInfo(const llvm::Triple &Triple)
-    : TargetInfo(Triple) {
+  static bool isAMDGCN(const llvm::Triple &TT) {
+    return TT.getArch() == llvm::Triple::amdgcn;
+  }
 
-    if (Triple.getArch() == llvm::Triple::amdgcn) {
-      DataLayoutString = DataLayoutStringSI;
-      GPU = GK_SOUTHERN_ISLANDS;
+public:
+  AMDGPUTargetInfo(const llvm::Triple &Triple, const TargetOptions &)
+    : TargetInfo(Triple) ,
+      GPU(isAMDGCN(Triple) ? GK_SOUTHERN_ISLANDS : GK_R600),
+      hasFP64(false),
+      hasFMAF(false),
+      hasLDEXPF(false) {
+    if (getTriple().getArch() == llvm::Triple::amdgcn) {
       hasFP64 = true;
       hasFMAF = true;
       hasLDEXPF = true;
-    } else {
-      DataLayoutString = DataLayoutStringR600;
-      GPU = GK_R600;
-      hasFP64 = false;
-      hasFMAF = false;
-      hasLDEXPF = false;
     }
+
+    resetDataLayout(getTriple().getArch() == llvm::Triple::amdgcn ?
+                    DataLayoutStringSI : DataLayoutStringR600);
+
     AddrSpaceMap = &AMDGPUAddrSpaceMap;
     UseAddrSpaceMapMangling = true;
   }
@@ -1861,6 +2021,10 @@ public:
     return false;
   }
 
+  bool initFeatureMap(llvm::StringMap<bool> &Features,
+                      DiagnosticsEngine &Diags, StringRef CPU,
+                      const std::vector<std::string> &FeatureVec) const override;
+
   ArrayRef<Builtin::Info> getTargetBuiltins() const override {
     return llvm::makeArrayRef(BuiltinInfo,
                         clang::AMDGPU::LastTSBuiltin - Builtin::FirstTSBuiltin);
@@ -1868,30 +2032,25 @@ public:
 
   void getTargetDefines(const LangOptions &Opts,
                         MacroBuilder &Builder) const override {
-    Builder.defineMacro("__R600__");
+    if (getTriple().getArch() == llvm::Triple::amdgcn)
+      Builder.defineMacro("__AMDGCN__");
+    else
+      Builder.defineMacro("__R600__");
+
     if (hasFMAF)
       Builder.defineMacro("__HAS_FMAF__");
     if (hasLDEXPF)
       Builder.defineMacro("__HAS_LDEXPF__");
-    if (hasFP64 && Opts.OpenCL)
-      Builder.defineMacro("cl_khr_fp64");
-    if (Opts.OpenCL) {
-      if (GPU >= GK_NORTHERN_ISLANDS) {
-        Builder.defineMacro("cl_khr_byte_addressable_store");
-        Builder.defineMacro("cl_khr_global_int32_base_atomics");
-        Builder.defineMacro("cl_khr_global_int32_extended_atomics");
-        Builder.defineMacro("cl_khr_local_int32_base_atomics");
-        Builder.defineMacro("cl_khr_local_int32_extended_atomics");
-      }
-    }
+    if (hasFP64)
+      Builder.defineMacro("__HAS_FP64__");
   }
 
   BuiltinVaListKind getBuiltinVaListKind() const override {
     return TargetInfo::CharPtrBuiltinVaList;
   }
 
-  bool setCPU(const std::string &Name) override {
-    GPU = llvm::StringSwitch<GPUKind>(Name)
+  static GPUKind parseR600Name(StringRef Name) {
+    return llvm::StringSwitch<GPUKind>(Name)
       .Case("r600" ,    GK_R600)
       .Case("rv610",    GK_R600)
       .Case("rv620",    GK_R600)
@@ -1917,63 +2076,78 @@ public:
       .Case("caicos",   GK_NORTHERN_ISLANDS)
       .Case("cayman",   GK_CAYMAN)
       .Case("aruba",    GK_CAYMAN)
-      .Case("tahiti",   GK_SOUTHERN_ISLANDS)
-      .Case("pitcairn", GK_SOUTHERN_ISLANDS)
-      .Case("verde",    GK_SOUTHERN_ISLANDS)
-      .Case("oland",    GK_SOUTHERN_ISLANDS)
-      .Case("hainan",   GK_SOUTHERN_ISLANDS)
-      .Case("bonaire",  GK_SEA_ISLANDS)
-      .Case("kabini",   GK_SEA_ISLANDS)
-      .Case("kaveri",   GK_SEA_ISLANDS)
-      .Case("hawaii",   GK_SEA_ISLANDS)
-      .Case("mullins",  GK_SEA_ISLANDS)
-      .Case("tonga",    GK_VOLCANIC_ISLANDS)
-      .Case("iceland",  GK_VOLCANIC_ISLANDS)
-      .Case("carrizo",  GK_VOLCANIC_ISLANDS)
       .Default(GK_NONE);
+  }
 
-    if (GPU == GK_NONE) {
-      return false;
-    }
+  static GPUKind parseAMDGCNName(StringRef Name) {
+    return llvm::StringSwitch<GPUKind>(Name)
+      .Case("tahiti",    GK_SOUTHERN_ISLANDS)
+      .Case("pitcairn",  GK_SOUTHERN_ISLANDS)
+      .Case("verde",     GK_SOUTHERN_ISLANDS)
+      .Case("oland",     GK_SOUTHERN_ISLANDS)
+      .Case("hainan",    GK_SOUTHERN_ISLANDS)
+      .Case("bonaire",   GK_SEA_ISLANDS)
+      .Case("kabini",    GK_SEA_ISLANDS)
+      .Case("kaveri",    GK_SEA_ISLANDS)
+      .Case("hawaii",    GK_SEA_ISLANDS)
+      .Case("mullins",   GK_SEA_ISLANDS)
+      .Case("tonga",     GK_VOLCANIC_ISLANDS)
+      .Case("iceland",   GK_VOLCANIC_ISLANDS)
+      .Case("carrizo",   GK_VOLCANIC_ISLANDS)
+      .Case("fiji",      GK_VOLCANIC_ISLANDS)
+      .Case("stoney",    GK_VOLCANIC_ISLANDS)
+      .Case("polaris10", GK_VOLCANIC_ISLANDS)
+      .Case("polaris11", GK_VOLCANIC_ISLANDS)
+      .Default(GK_NONE);
+  }
 
-    // Set the correct data layout
-    switch (GPU) {
-    case GK_NONE:
-    case GK_R600:
-    case GK_R700:
-    case GK_EVERGREEN:
-    case GK_NORTHERN_ISLANDS:
-      DataLayoutString = DataLayoutStringR600;
-      hasFP64 = false;
-      hasFMAF = false;
-      hasLDEXPF = false;
-      break;
-    case GK_R600_DOUBLE_OPS:
-    case GK_R700_DOUBLE_OPS:
-    case GK_EVERGREEN_DOUBLE_OPS:
-    case GK_CAYMAN:
-      DataLayoutString = DataLayoutStringR600DoubleOps;
-      hasFP64 = true;
-      hasFMAF = true;
-      hasLDEXPF = false;
-      break;
-    case GK_SOUTHERN_ISLANDS:
-    case GK_SEA_ISLANDS:
-    case GK_VOLCANIC_ISLANDS:
-      DataLayoutString = DataLayoutStringSI;
-      hasFP64 = true;
-      hasFMAF = true;
-      hasLDEXPF = true;
-      break;
+  bool setCPU(const std::string &Name) override {
+    if (getTriple().getArch() == llvm::Triple::amdgcn)
+      GPU = parseAMDGCNName(Name);
+    else
+      GPU = parseR600Name(Name);
+
+    return GPU != GK_NONE;
+  }
+
+  void setSupportedOpenCLOpts() override {
+    auto &Opts = getSupportedOpenCLOpts();
+    Opts.cl_clang_storage_class_specifiers = 1;
+    Opts.cl_khr_icd = 1;
+
+    if (hasFP64)
+      Opts.cl_khr_fp64 = 1;
+    if (GPU >= GK_EVERGREEN) {
+      Opts.cl_khr_byte_addressable_store = 1;
+      Opts.cl_khr_global_int32_base_atomics = 1;
+      Opts.cl_khr_global_int32_extended_atomics = 1;
+      Opts.cl_khr_local_int32_base_atomics = 1;
+      Opts.cl_khr_local_int32_extended_atomics = 1;
     }
+    if (GPU >= GK_SOUTHERN_ISLANDS) {
+      Opts.cl_khr_fp16 = 1;
+      Opts.cl_khr_int64_base_atomics = 1;
+      Opts.cl_khr_int64_extended_atomics = 1;
+      Opts.cl_khr_3d_image_writes = 1;
+    }
+  }
 
-    return true;
+  CallingConvCheckResult checkCallingConvention(CallingConv CC) const override {
+    switch (CC) {
+      default:
+        return CCCR_Warning;
+      case CC_C:
+      case CC_OpenCLKernel:
+        return CCCR_OK;
+    }
   }
 };
 
 const Builtin::Info AMDGPUTargetInfo::BuiltinInfo[] = {
 #define BUILTIN(ID, TYPE, ATTRS)                \
   { #ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr },
+#define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE)                               \
+  { #ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, FEATURE },
 #include "clang/Basic/BuiltinsAMDGPU.def"
 };
 const char * const AMDGPUTargetInfo::GCCRegNames[] = {
@@ -2024,15 +2198,66 @@ const char * const AMDGPUTargetInfo::GCCRegNames[] = {
   "s96", "s97", "s98", "s99", "s100", "s101", "s102", "s103",
   "s104", "s105", "s106", "s107", "s108", "s109", "s110", "s111",
   "s112", "s113", "s114", "s115", "s116", "s117", "s118", "s119",
-  "s120", "s121", "s122", "s123", "s124", "s125", "s126", "s127"
-  "exec", "vcc", "scc", "m0", "flat_scr", "exec_lo", "exec_hi",
-  "vcc_lo", "vcc_hi", "flat_scr_lo", "flat_scr_hi"
+  "s120", "s121", "s122", "s123", "s124", "s125", "s126", "s127",
+  "exec", "vcc", "scc", "m0", "flat_scratch", "exec_lo", "exec_hi",
+  "vcc_lo", "vcc_hi", "flat_scratch_lo", "flat_scratch_hi"
 };
 
 ArrayRef<const char *> AMDGPUTargetInfo::getGCCRegNames() const {
   return llvm::makeArrayRef(GCCRegNames);
 }
 
+bool AMDGPUTargetInfo::initFeatureMap(
+  llvm::StringMap<bool> &Features,
+  DiagnosticsEngine &Diags, StringRef CPU,
+  const std::vector<std::string> &FeatureVec) const {
+
+  // XXX - What does the member GPU mean if device name string passed here?
+  if (getTriple().getArch() == llvm::Triple::amdgcn) {
+    if (CPU.empty())
+      CPU = "tahiti";
+
+    switch (parseAMDGCNName(CPU)) {
+    case GK_SOUTHERN_ISLANDS:
+    case GK_SEA_ISLANDS:
+      break;
+
+    case GK_VOLCANIC_ISLANDS:
+      Features["s-memrealtime"] = true;
+      Features["16-bit-insts"] = true;
+      break;
+
+    case GK_NONE:
+      return false;
+    default:
+      llvm_unreachable("unhandled subtarget");
+    }
+  } else {
+    if (CPU.empty())
+      CPU = "r600";
+
+    switch (parseR600Name(CPU)) {
+    case GK_R600:
+    case GK_R700:
+    case GK_EVERGREEN:
+    case GK_NORTHERN_ISLANDS:
+      break;
+    case GK_R600_DOUBLE_OPS:
+    case GK_R700_DOUBLE_OPS:
+    case GK_EVERGREEN_DOUBLE_OPS:
+    case GK_CAYMAN:
+      Features["fp64"] = true;
+      break;
+    case GK_NONE:
+      return false;
+    default:
+      llvm_unreachable("unhandled subtarget");
+    }
+  }
+
+  return TargetInfo::initFeatureMap(Features, Diags, CPU, FeatureVec);
+}
+
 // Namespace for x86 abstract base class
 const Builtin::Info BuiltinInfo[] = {
 #define BUILTIN(ID, TYPE, ATTRS)                                               \
@@ -2054,6 +2279,14 @@ static const char* const GCCRegNames[] = {
   "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15",
   "ymm0", "ymm1", "ymm2", "ymm3", "ymm4", "ymm5", "ymm6", "ymm7",
   "ymm8", "ymm9", "ymm10", "ymm11", "ymm12", "ymm13", "ymm14", "ymm15",
+  "xmm16", "xmm17", "xmm18", "xmm19", "xmm20", "xmm21", "xmm22", "xmm23",
+  "xmm24", "xmm25", "xmm26", "xmm27", "xmm28", "xmm29", "xmm30", "xmm31",
+  "ymm16", "ymm17", "ymm18", "ymm19", "ymm20", "ymm21", "ymm22", "ymm23",
+  "ymm24", "ymm25", "ymm26", "ymm27", "ymm28", "ymm29", "ymm30", "ymm31",
+  "zmm0", "zmm1", "zmm2", "zmm3", "zmm4", "zmm5", "zmm6", "zmm7",
+  "zmm8", "zmm9", "zmm10", "zmm11", "zmm12", "zmm13", "zmm14", "zmm15",
+  "zmm16", "zmm17", "zmm18", "zmm19", "zmm20", "zmm21", "zmm22", "zmm23",
+  "zmm24", "zmm25", "zmm26", "zmm27", "zmm28", "zmm29", "zmm30", "zmm31",
 };
 
 const TargetInfo::AddlRegName AddlRegNames[] = {
@@ -2112,14 +2345,25 @@ class X86TargetInfo : public TargetInfo {
   bool HasAVX512DQ = false;
   bool HasAVX512BW = false;
   bool HasAVX512VL = false;
+  bool HasAVX512VBMI = false;
+  bool HasAVX512IFMA = false;
   bool HasSHA = false;
+  bool HasMPX = false;
+  bool HasSGX = false;
   bool HasCX16 = false;
   bool HasFXSR = false;
   bool HasXSAVE = false;
   bool HasXSAVEOPT = false;
   bool HasXSAVEC = false;
   bool HasXSAVES = false;
+  bool HasMWAITX = false;
   bool HasPKU = false;
+  bool HasCLFLUSHOPT = false;
+  bool HasPCOMMIT = false;
+  bool HasCLWB = false;
+  bool HasUMIP = false;
+  bool HasMOVBE = false;
+  bool HasPREFETCHWT1 = false;
 
   /// \brief Enumeration of all of the X86 CPUs supported by Clang.
   ///
@@ -2220,14 +2464,26 @@ class X86TargetInfo : public TargetInfo {
     /// Broadwell microarchitecture based processors.
     CK_Broadwell,
 
-    /// \name Skylake
-    /// Skylake microarchitecture based processors.
-    CK_Skylake,
+    /// \name Skylake Client
+    /// Skylake client microarchitecture based processors.
+    CK_SkylakeClient,
+
+    /// \name Skylake Server
+    /// Skylake server microarchitecture based processors.
+    CK_SkylakeServer,
+
+    /// \name Cannonlake Client
+    /// Cannonlake client microarchitecture based processors.
+    CK_Cannonlake,
 
     /// \name Knights Landing
     /// Knights Landing processor.
     CK_KNL,
 
+    /// \name Lakemont
+    /// Lakemont microarchitecture based processors.
+    CK_Lakemont,
+
     /// \name K6
     /// K6 architecture processors.
     //@{
@@ -2327,9 +2583,12 @@ class X86TargetInfo : public TargetInfo {
         .Case("haswell", CK_Haswell)
         .Case("core-avx2", CK_Haswell) // Legacy name.
         .Case("broadwell", CK_Broadwell)
-        .Case("skylake", CK_Skylake)
-        .Case("skx", CK_Skylake) // Legacy name.
+        .Case("skylake", CK_SkylakeClient)
+        .Case("skylake-avx512", CK_SkylakeServer)
+        .Case("skx", CK_SkylakeServer) // Legacy name.
+        .Case("cannonlake", CK_Cannonlake)
         .Case("knl", CK_KNL)
+        .Case("lakemont", CK_Lakemont)
         .Case("k6", CK_K6)
         .Case("k6-2", CK_K6_2)
         .Case("k6-3", CK_K6_3)
@@ -2365,7 +2624,8 @@ class X86TargetInfo : public TargetInfo {
   } FPMath = FP_Default;
 
 public:
-  X86TargetInfo(const llvm::Triple &Triple) : TargetInfo(Triple) {
+  X86TargetInfo(const llvm::Triple &Triple, const TargetOptions &)
+      : TargetInfo(Triple) {
     BigEndian = false;
     LongDoubleFormat = &llvm::APFloat::x87DoubleExtended;
   }
@@ -2477,6 +2737,7 @@ public:
     case CK_C3_2:
     case CK_Pentium4:
     case CK_Pentium4M:
+    case CK_Lakemont:
     case CK_Prescott:
     case CK_K6:
     case CK_K6_2:
@@ -2503,7 +2764,9 @@ public:
     case CK_IvyBridge:
     case CK_Haswell:
     case CK_Broadwell:
-    case CK_Skylake:
+    case CK_SkylakeClient:
+    case CK_SkylakeServer:
+    case CK_Cannonlake:
     case CK_KNL:
     case CK_Athlon64:
     case CK_Athlon64SSE3:
@@ -2528,14 +2791,20 @@ public:
   bool setFPMath(StringRef Name) override;
 
   CallingConvCheckResult checkCallingConvention(CallingConv CC) const override {
-    // We accept all non-ARM calling conventions
-    return (CC == CC_X86ThisCall ||
-            CC == CC_X86FastCall ||
-            CC == CC_X86StdCall ||
-            CC == CC_X86VectorCall ||
-            CC == CC_C ||
-            CC == CC_X86Pascal ||
-            CC == CC_IntelOclBicc) ? CCCR_OK : CCCR_Warning;
+    // Most of the non-ARM calling conventions are i386 conventions.
+    switch (CC) {
+    case CC_X86ThisCall:
+    case CC_X86FastCall:
+    case CC_X86StdCall:
+    case CC_X86VectorCall:
+    case CC_C:
+    case CC_Swift:
+    case CC_X86Pascal:
+    case CC_IntelOclBicc:
+      return CCCR_OK;
+    default:
+      return CCCR_Warning;
+    }
   }
 
   CallingConv getDefaultCallingConv(CallingConvMethodType MT) const override {
@@ -2545,6 +2814,10 @@ public:
   bool hasSjLjLowering() const override {
     return true;
   }
+
+  void setSupportedOpenCLOpts() override {
+    getSupportedOpenCLOpts().setAll();
+  }
 };
 
 bool X86TargetInfo::setFPMath(StringRef Name) {
@@ -2567,7 +2840,13 @@ bool X86TargetInfo::initFeatureMap(
   if (getTriple().getArch() == llvm::Triple::x86_64)
     setFeatureEnabledImpl(Features, "sse2", true);
 
-  switch (getCPUKind(CPU)) {
+  const CPUKind Kind = getCPUKind(CPU);
+
+  // Enable X87 for all X86 processors but Lakemont.
+  if (Kind != CK_Lakemont)
+    setFeatureEnabledImpl(Features, "x87", true);
+
+  switch (Kind) {
   case CK_Generic:
   case CK_i386:
   case CK_i486:
@@ -2575,6 +2854,7 @@ bool X86TargetInfo::initFeatureMap(
   case CK_Pentium:
   case CK_i686:
   case CK_PentiumPro:
+  case CK_Lakemont:
     break;
   case CK_PentiumMMX:
   case CK_Pentium2:
@@ -2613,15 +2893,28 @@ bool X86TargetInfo::initFeatureMap(
     setFeatureEnabledImpl(Features, "fxsr", true);
     setFeatureEnabledImpl(Features, "cx16", true);
     break;
-  case CK_Skylake:
+  case CK_Cannonlake:
+    setFeatureEnabledImpl(Features, "avx512ifma", true);
+    setFeatureEnabledImpl(Features, "avx512vbmi", true);
+    setFeatureEnabledImpl(Features, "sha", true);
+    setFeatureEnabledImpl(Features, "umip", true);
+    // FALLTHROUGH
+  case CK_SkylakeServer:
     setFeatureEnabledImpl(Features, "avx512f", true);
     setFeatureEnabledImpl(Features, "avx512cd", true);
     setFeatureEnabledImpl(Features, "avx512dq", true);
     setFeatureEnabledImpl(Features, "avx512bw", true);
     setFeatureEnabledImpl(Features, "avx512vl", true);
+    setFeatureEnabledImpl(Features, "pku", true);
+    setFeatureEnabledImpl(Features, "pcommit", true);
+    setFeatureEnabledImpl(Features, "clwb", true);
+    // FALLTHROUGH
+  case CK_SkylakeClient:
     setFeatureEnabledImpl(Features, "xsavec", true);
     setFeatureEnabledImpl(Features, "xsaves", true);
-    setFeatureEnabledImpl(Features, "pku", true);
+    setFeatureEnabledImpl(Features, "mpx", true);
+    setFeatureEnabledImpl(Features, "sgx", true);
+    setFeatureEnabledImpl(Features, "clflushopt", true);
     // FALLTHROUGH
   case CK_Broadwell:
     setFeatureEnabledImpl(Features, "rdseed", true);
@@ -2634,6 +2927,7 @@ bool X86TargetInfo::initFeatureMap(
     setFeatureEnabledImpl(Features, "bmi2", true);
     setFeatureEnabledImpl(Features, "rtm", true);
     setFeatureEnabledImpl(Features, "fma", true);
+    setFeatureEnabledImpl(Features, "movbe", true);
     // FALLTHROUGH
   case CK_IvyBridge:
     setFeatureEnabledImpl(Features, "rdrnd", true);
@@ -2660,6 +2954,7 @@ bool X86TargetInfo::initFeatureMap(
     setFeatureEnabledImpl(Features, "avx512cd", true);
     setFeatureEnabledImpl(Features, "avx512er", true);
     setFeatureEnabledImpl(Features, "avx512pf", true);
+    setFeatureEnabledImpl(Features, "prefetchwt1", true);
     setFeatureEnabledImpl(Features, "fxsr", true);
     setFeatureEnabledImpl(Features, "rdseed", true);
     setFeatureEnabledImpl(Features, "adx", true);
@@ -2676,6 +2971,7 @@ bool X86TargetInfo::initFeatureMap(
     setFeatureEnabledImpl(Features, "cx16", true);
     setFeatureEnabledImpl(Features, "xsaveopt", true);
     setFeatureEnabledImpl(Features, "xsave", true);
+    setFeatureEnabledImpl(Features, "movbe", true);
     break;
   case CK_K6_2:
   case CK_K6_3:
@@ -2735,6 +3031,7 @@ bool X86TargetInfo::initFeatureMap(
   case CK_BDVER4:
     setFeatureEnabledImpl(Features, "avx2", true);
     setFeatureEnabledImpl(Features, "bmi2", true);
+    setFeatureEnabledImpl(Features, "mwaitx", true);
     // FALLTHROUGH
   case CK_BDVER3:
     setFeatureEnabledImpl(Features, "fsgsbase", true);
@@ -2843,7 +3140,8 @@ void X86TargetInfo::setSSELevel(llvm::StringMap<bool> &Features,
   case AVX512F:
     Features["avx512f"] = Features["avx512cd"] = Features["avx512er"] =
       Features["avx512pf"] = Features["avx512dq"] = Features["avx512bw"] =
-      Features["avx512vl"] = false;
+      Features["avx512vl"] = Features["avx512vbmi"] =
+      Features["avx512ifma"] = false;
   }
 }
 
@@ -2941,8 +3239,9 @@ void X86TargetInfo::setFeatureEnabledImpl(llvm::StringMap<bool> &Features,
     setSSELevel(Features, AVX2, Enabled);
   } else if (Name == "avx512f") {
     setSSELevel(Features, AVX512F, Enabled);
-  } else if (Name == "avx512cd" || Name == "avx512er" || Name == "avx512pf"
-          || Name == "avx512dq" || Name == "avx512bw" || Name == "avx512vl") {
+  } else if (Name == "avx512cd" || Name == "avx512er" || Name == "avx512pf" ||
+             Name == "avx512dq" || Name == "avx512bw" || Name == "avx512vl" ||
+             Name == "avx512vbmi" || Name == "avx512ifma") {
     if (Enabled)
       setSSELevel(Features, AVX512F, Enabled);
   } else if (Name == "fma") {
@@ -2970,15 +3269,11 @@ void X86TargetInfo::setFeatureEnabledImpl(llvm::StringMap<bool> &Features,
     else
       setSSELevel(Features, SSE41, Enabled);
   } else if (Name == "xsave") {
-    if (Enabled)
-      setSSELevel(Features, AVX, Enabled);
-    else
+    if (!Enabled)
       Features["xsaveopt"] = false;
   } else if (Name == "xsaveopt" || Name == "xsavec" || Name == "xsaves") {
-    if (Enabled) {
+    if (Enabled)
       Features["xsave"] = true;
-      setSSELevel(Features, AVX, Enabled);
-    }
   }
 }
 
@@ -3032,8 +3327,18 @@ bool X86TargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
       HasAVX512BW = true;
     } else if (Feature == "+avx512vl") {
       HasAVX512VL = true;
+    } else if (Feature == "+avx512vbmi") {
+      HasAVX512VBMI = true;
+    } else if (Feature == "+avx512ifma") {
+      HasAVX512IFMA = true;
     } else if (Feature == "+sha") {
       HasSHA = true;
+    } else if (Feature == "+mpx") {
+      HasMPX = true;
+    } else if (Feature == "+movbe") {
+      HasMOVBE = true;
+    } else if (Feature == "+sgx") {
+      HasSGX = true;
     } else if (Feature == "+cx16") {
       HasCX16 = true;
     } else if (Feature == "+fxsr") {
@@ -3046,8 +3351,20 @@ bool X86TargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
       HasXSAVEC = true;
     } else if (Feature == "+xsaves") {
       HasXSAVES = true;
+    } else if (Feature == "+mwaitx") {
+      HasMWAITX = true;
     } else if (Feature == "+pku") {
       HasPKU = true;
+    } else if (Feature == "+clflushopt") {
+      HasCLFLUSHOPT = true;
+    } else if (Feature == "+pcommit") {
+      HasPCOMMIT = true;
+    } else if (Feature == "+clwb") {
+      HasCLWB = true;
+    } else if (Feature == "+umip") {
+      HasUMIP = true;
+    } else if (Feature == "+prefetchwt1") {
+      HasPREFETCHWT1 = true;
     }
 
     X86SSEEnum Level = llvm::StringSwitch<X86SSEEnum>(Feature)
@@ -3181,21 +3498,23 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts,
   case CK_IvyBridge:
   case CK_Haswell:
   case CK_Broadwell:
+  case CK_SkylakeClient:
     // FIXME: Historically, we defined this legacy name, it would be nice to
     // remove it at some point. We've never exposed fine-grained names for
     // recent primary x86 CPUs, and we should keep it that way.
     defineCPUMacros(Builder, "corei7");
     break;
-  case CK_Skylake:
-    // FIXME: Historically, we defined this legacy name, it would be nice to
-    // remove it at some point. This is the only fine-grained CPU macro in the
-    // main intel CPU line, and it would be better to not have these and force
-    // people to use ISA macros.
+  case CK_SkylakeServer:
     defineCPUMacros(Builder, "skx");
     break;
+  case CK_Cannonlake:
+    break;
   case CK_KNL:
     defineCPUMacros(Builder, "knl");
     break;
+  case CK_Lakemont:
+    Builder.defineMacro("__tune_lakemont__");
+    break;
   case CK_K6_2:
     Builder.defineMacro("__k6_2__");
     Builder.defineMacro("__tune_k6_2__");
@@ -3306,6 +3625,9 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts,
   if (HasTBM)
     Builder.defineMacro("__TBM__");
 
+  if (HasMWAITX)
+    Builder.defineMacro("__MWAITX__");
+
   switch (XOPLevel) {
   case XOP:
     Builder.defineMacro("__XOP__");
@@ -3335,6 +3657,10 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts,
     Builder.defineMacro("__AVX512BW__");
   if (HasAVX512VL)
     Builder.defineMacro("__AVX512VL__");
+  if (HasAVX512VBMI)
+    Builder.defineMacro("__AVX512VBMI__");
+  if (HasAVX512IFMA)
+    Builder.defineMacro("__AVX512IFMA__");
 
   if (HasSHA)
     Builder.defineMacro("__SHA__");
@@ -3433,8 +3759,12 @@ bool X86TargetInfo::hasFeature(StringRef Feature) const {
       .Case("avx512dq", HasAVX512DQ)
       .Case("avx512bw", HasAVX512BW)
       .Case("avx512vl", HasAVX512VL)
+      .Case("avx512vbmi", HasAVX512VBMI)
+      .Case("avx512ifma", HasAVX512IFMA)
       .Case("bmi", HasBMI)
       .Case("bmi2", HasBMI2)
+      .Case("clflushopt", HasCLFLUSHOPT)
+      .Case("clwb", HasCLWB)
       .Case("cx16", HasCX16)
       .Case("f16c", HasF16C)
       .Case("fma", HasFMA)
@@ -3445,12 +3775,18 @@ bool X86TargetInfo::hasFeature(StringRef Feature) const {
       .Case("mm3dnow", MMX3DNowLevel >= AMD3DNow)
       .Case("mm3dnowa", MMX3DNowLevel >= AMD3DNowAthlon)
       .Case("mmx", MMX3DNowLevel >= MMX)
+      .Case("movbe", HasMOVBE)
+      .Case("mpx", HasMPX)
       .Case("pclmul", HasPCLMUL)
+      .Case("pcommit", HasPCOMMIT)
+      .Case("pku", HasPKU)
       .Case("popcnt", HasPOPCNT)
+      .Case("prefetchwt1", HasPREFETCHWT1)
       .Case("prfchw", HasPRFCHW)
       .Case("rdrnd", HasRDRND)
       .Case("rdseed", HasRDSEED)
       .Case("rtm", HasRTM)
+      .Case("sgx", HasSGX)
       .Case("sha", HasSHA)
       .Case("sse", SSELevel >= SSE1)
       .Case("sse2", SSELevel >= SSE2)
@@ -3460,6 +3796,7 @@ bool X86TargetInfo::hasFeature(StringRef Feature) const {
       .Case("sse4.2", SSELevel >= SSE42)
       .Case("sse4a", XOPLevel >= SSE4A)
       .Case("tbm", HasTBM)
+      .Case("umip", HasUMIP)
       .Case("x86", true)
       .Case("x86_32", getTriple().getArch() == llvm::Triple::x86)
       .Case("x86_64", getTriple().getArch() == llvm::Triple::x86_64)
@@ -3468,7 +3805,6 @@ bool X86TargetInfo::hasFeature(StringRef Feature) const {
       .Case("xsavec", HasXSAVEC)
       .Case("xsaves", HasXSAVES)
       .Case("xsaveopt", HasXSAVEOPT)
-      .Case("pku", HasPKU)
       .Default(false);
 }
 
@@ -3485,6 +3821,7 @@ bool X86TargetInfo::validateCpuSupports(StringRef FeatureStr) const {
       .Case("sse", true)
       .Case("sse2", true)
       .Case("sse3", true)
+      .Case("ssse3", true)
       .Case("sse4.1", true)
       .Case("sse4.2", true)
       .Case("avx", true)
@@ -3496,6 +3833,16 @@ bool X86TargetInfo::validateCpuSupports(StringRef FeatureStr) const {
       .Case("avx512f", true)
       .Case("bmi", true)
       .Case("bmi2", true)
+      .Case("aes", true)
+      .Case("pclmul", true)
+      .Case("avx512vl", true)
+      .Case("avx512bw", true)
+      .Case("avx512dq", true)
+      .Case("avx512cd", true)
+      .Case("avx512er", true)
+      .Case("avx512pf", true)
+      .Case("avx512vbmi", true)
+      .Case("avx512ifma", true)
       .Default(false);
 }
 
@@ -3657,12 +4004,13 @@ X86TargetInfo::convertConstraint(const char *&Constraint) const {
 // X86-32 generic target
 class X86_32TargetInfo : public X86TargetInfo {
 public:
-  X86_32TargetInfo(const llvm::Triple &Triple) : X86TargetInfo(Triple) {
+  X86_32TargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts)
+      : X86TargetInfo(Triple, Opts) {
     DoubleAlign = LongLongAlign = 32;
     LongDoubleWidth = 96;
     LongDoubleAlign = 32;
     SuitableAlign = 128;
-    DataLayoutString = "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128";
+    resetDataLayout("e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128");
     SizeType = UnsignedInt;
     PtrDiffType = SignedInt;
     IntPtrType = SignedInt;
@@ -3711,8 +4059,8 @@ public:
 
 class NetBSDI386TargetInfo : public NetBSDTargetInfo<X86_32TargetInfo> {
 public:
-  NetBSDI386TargetInfo(const llvm::Triple &Triple)
-      : NetBSDTargetInfo<X86_32TargetInfo>(Triple) {}
+  NetBSDI386TargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts)
+      : NetBSDTargetInfo<X86_32TargetInfo>(Triple, Opts) {}
 
   unsigned getFloatEvalMethod() const override {
     unsigned Major, Minor, Micro;
@@ -3727,8 +4075,8 @@ public:
 
 class OpenBSDI386TargetInfo : public OpenBSDTargetInfo<X86_32TargetInfo> {
 public:
-  OpenBSDI386TargetInfo(const llvm::Triple &Triple)
-      : OpenBSDTargetInfo<X86_32TargetInfo>(Triple) {
+  OpenBSDI386TargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts)
+      : OpenBSDTargetInfo<X86_32TargetInfo>(Triple, Opts) {
     SizeType = UnsignedLong;
     IntPtrType = SignedLong;
     PtrDiffType = SignedLong;
@@ -3737,8 +4085,8 @@ public:
 
 class BitrigI386TargetInfo : public BitrigTargetInfo<X86_32TargetInfo> {
 public:
-  BitrigI386TargetInfo(const llvm::Triple &Triple)
-      : BitrigTargetInfo<X86_32TargetInfo>(Triple) {
+  BitrigI386TargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts)
+      : BitrigTargetInfo<X86_32TargetInfo>(Triple, Opts) {
     SizeType = UnsignedLong;
     IntPtrType = SignedLong;
     PtrDiffType = SignedLong;
@@ -3747,8 +4095,8 @@ public:
 
 class DarwinI386TargetInfo : public DarwinTargetInfo<X86_32TargetInfo> {
 public:
-  DarwinI386TargetInfo(const llvm::Triple &Triple)
-      : DarwinTargetInfo<X86_32TargetInfo>(Triple) {
+  DarwinI386TargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts)
+      : DarwinTargetInfo<X86_32TargetInfo>(Triple, Opts) {
     LongDoubleWidth = 128;
     LongDoubleAlign = 128;
     SuitableAlign = 128;
@@ -3759,7 +4107,7 @@ public:
       UseSignedCharForObjCBool = false;
     SizeType = UnsignedLong;
     IntPtrType = SignedLong;
-    DataLayoutString = "e-m:o-p:32:32-f64:32:64-f80:128-n8:16:32-S128";
+    resetDataLayout("e-m:o-p:32:32-f64:32:64-f80:128-n8:16:32-S128");
     HasAlignMac68kSupport = true;
   }
 
@@ -3778,15 +4126,15 @@ public:
 // x86-32 Windows target
 class WindowsX86_32TargetInfo : public WindowsTargetInfo<X86_32TargetInfo> {
 public:
-  WindowsX86_32TargetInfo(const llvm::Triple &Triple)
-      : WindowsTargetInfo<X86_32TargetInfo>(Triple) {
+  WindowsX86_32TargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts)
+      : WindowsTargetInfo<X86_32TargetInfo>(Triple, Opts) {
     WCharType = UnsignedShort;
     DoubleAlign = LongLongAlign = 64;
     bool IsWinCOFF =
         getTriple().isOSWindows() && getTriple().isOSBinFormatCOFF();
-    DataLayoutString = IsWinCOFF
-                           ? "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
-                           : "e-m:e-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32";
+    resetDataLayout(IsWinCOFF
+                        ? "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
+                        : "e-m:e-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32");
   }
   void getTargetDefines(const LangOptions &Opts,
                         MacroBuilder &Builder) const override {
@@ -3797,8 +4145,9 @@ public:
 // x86-32 Windows Visual Studio target
 class MicrosoftX86_32TargetInfo : public WindowsX86_32TargetInfo {
 public:
-  MicrosoftX86_32TargetInfo(const llvm::Triple &Triple)
-      : WindowsX86_32TargetInfo(Triple) {
+  MicrosoftX86_32TargetInfo(const llvm::Triple &Triple,
+                            const TargetOptions &Opts)
+      : WindowsX86_32TargetInfo(Triple, Opts) {
     LongDoubleWidth = LongDoubleAlign = 64;
     LongDoubleFormat = &llvm::APFloat::IEEEdouble;
   }
@@ -3846,8 +4195,8 @@ static void addMinGWDefines(const LangOptions &Opts, MacroBuilder &Builder) {
 // x86-32 MinGW target
 class MinGWX86_32TargetInfo : public WindowsX86_32TargetInfo {
 public:
-  MinGWX86_32TargetInfo(const llvm::Triple &Triple)
-      : WindowsX86_32TargetInfo(Triple) {}
+  MinGWX86_32TargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts)
+      : WindowsX86_32TargetInfo(Triple, Opts) {}
   void getTargetDefines(const LangOptions &Opts,
                         MacroBuilder &Builder) const override {
     WindowsX86_32TargetInfo::getTargetDefines(Opts, Builder);
@@ -3861,11 +4210,11 @@ public:
 // x86-32 Cygwin target
 class CygwinX86_32TargetInfo : public X86_32TargetInfo {
 public:
-  CygwinX86_32TargetInfo(const llvm::Triple &Triple)
-      : X86_32TargetInfo(Triple) {
+  CygwinX86_32TargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts)
+      : X86_32TargetInfo(Triple, Opts) {
     WCharType = UnsignedShort;
     DoubleAlign = LongLongAlign = 64;
-    DataLayoutString = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32";
+    resetDataLayout("e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32");
   }
   void getTargetDefines(const LangOptions &Opts,
                         MacroBuilder &Builder) const override {
@@ -3881,30 +4230,27 @@ public:
 };
 
 // x86-32 Haiku target
-class HaikuX86_32TargetInfo : public X86_32TargetInfo {
+class HaikuX86_32TargetInfo : public HaikuTargetInfo<X86_32TargetInfo> {
 public:
-  HaikuX86_32TargetInfo(const llvm::Triple &Triple) : X86_32TargetInfo(Triple) {
-    SizeType = UnsignedLong;
-    IntPtrType = SignedLong;
-    PtrDiffType = SignedLong;
-    ProcessIDType = SignedLong;
-    this->UserLabelPrefix = "";
-    this->TLSSupported = false;
+  HaikuX86_32TargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts)
+    : HaikuTargetInfo<X86_32TargetInfo>(Triple, Opts) {
   }
   void getTargetDefines(const LangOptions &Opts,
                         MacroBuilder &Builder) const override {
-    X86_32TargetInfo::getTargetDefines(Opts, Builder);
+    HaikuTargetInfo<X86_32TargetInfo>::getTargetDefines(Opts, Builder);
     Builder.defineMacro("__INTEL__");
-    Builder.defineMacro("__HAIKU__");
   }
 };
 
 // X86-32 MCU target
 class MCUX86_32TargetInfo : public X86_32TargetInfo {
 public:
-  MCUX86_32TargetInfo(const llvm::Triple &Triple) : X86_32TargetInfo(Triple) {
+  MCUX86_32TargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts)
+      : X86_32TargetInfo(Triple, Opts) {
     LongDoubleWidth = 64;
     LongDoubleFormat = &llvm::APFloat::IEEEdouble;
+    resetDataLayout("e-m:e-p:32:32-i64:32-f64:32-f128:32-n8:16:32-a:0:32-S32");
+    WIntType = UnsignedInt;
   }
 
   CallingConvCheckResult checkCallingConvention(CallingConv CC) const override {
@@ -3918,6 +4264,10 @@ public:
     Builder.defineMacro("__iamcu");
     Builder.defineMacro("__iamcu__");
   }
+
+  bool allowsLargerPreferedTypeAlignment() const override {
+    return false;
+  }
 };
 
 // RTEMS Target
@@ -3933,9 +4283,8 @@ protected:
   }
 
 public:
-  RTEMSTargetInfo(const llvm::Triple &Triple) : OSTargetInfo<Target>(Triple) {
-    this->UserLabelPrefix = "";
-
+  RTEMSTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts)
+      : OSTargetInfo<Target>(Triple, Opts) {
     switch (Triple.getArch()) {
     default:
     case llvm::Triple::x86:
@@ -3958,11 +4307,11 @@ public:
 // x86-32 RTEMS target
 class RTEMSX86_32TargetInfo : public X86_32TargetInfo {
 public:
-  RTEMSX86_32TargetInfo(const llvm::Triple &Triple) : X86_32TargetInfo(Triple) {
+  RTEMSX86_32TargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts)
+      : X86_32TargetInfo(Triple, Opts) {
     SizeType = UnsignedLong;
     IntPtrType = SignedLong;
     PtrDiffType = SignedLong;
-    this->UserLabelPrefix = "";
   }
   void getTargetDefines(const LangOptions &Opts,
                         MacroBuilder &Builder) const override {
@@ -3975,7 +4324,8 @@ public:
 // x86-64 generic target
 class X86_64TargetInfo : public X86TargetInfo {
 public:
-  X86_64TargetInfo(const llvm::Triple &Triple) : X86TargetInfo(Triple) {
+  X86_64TargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts)
+      : X86TargetInfo(Triple, Opts) {
     const bool IsX32 = getTriple().getEnvironment() == llvm::Triple::GNUX32;
     bool IsWinCOFF =
         getTriple().isOSWindows() && getTriple().isOSBinFormatCOFF();
@@ -3993,10 +4343,10 @@ public:
     RegParmMax = 6;
 
     // Pointers are 32-bit in x32.
-    DataLayoutString = IsX32 ? "e-m:e-p:32:32-i64:64-f80:128-n8:16:32:64-S128"
-                             : IsWinCOFF
-                                   ? "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
-                                   : "e-m:e-i64:64-f80:128-n8:16:32:64-S128";
+    resetDataLayout(IsX32
+                        ? "e-m:e-p:32:32-i64:64-f80:128-n8:16:32:64-S128"
+                        : IsWinCOFF ? "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
+                                    : "e-m:e-i64:64-f80:128-n8:16:32:64-S128");
 
     // Use fpret only for long double.
     RealTypeUsesObjCFPRet = (1 << TargetInfo::LongDouble);
@@ -4022,10 +4372,18 @@ public:
   }
 
   CallingConvCheckResult checkCallingConvention(CallingConv CC) const override {
-    return (CC == CC_C ||
-            CC == CC_X86VectorCall ||
-            CC == CC_IntelOclBicc ||
-            CC == CC_X86_64Win64) ? CCCR_OK : CCCR_Warning;
+    switch (CC) {
+    case CC_C:
+    case CC_Swift:
+    case CC_X86VectorCall:
+    case CC_IntelOclBicc:
+    case CC_X86_64Win64:
+    case CC_PreserveMost:
+    case CC_PreserveAll:
+      return CCCR_OK;
+    default:
+      return CCCR_Warning;
+    }
   }
 
   CallingConv getDefaultCallingConv(CallingConvMethodType MT) const override {
@@ -4034,6 +4392,8 @@ public:
 
   // for x32 we need it here explicitly
   bool hasInt128Type() const override { return true; }
+  unsigned getUnwindWordWidth() const override { return 64; }
+  unsigned getRegisterWidth() const override { return 64; }
 
   bool validateGlobalRegisterVariable(StringRef RegName,
                                       unsigned RegSize,
@@ -4055,8 +4415,8 @@ public:
 // x86-64 Windows target
 class WindowsX86_64TargetInfo : public WindowsTargetInfo<X86_64TargetInfo> {
 public:
-  WindowsX86_64TargetInfo(const llvm::Triple &Triple)
-      : WindowsTargetInfo<X86_64TargetInfo>(Triple) {
+  WindowsX86_64TargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts)
+      : WindowsTargetInfo<X86_64TargetInfo>(Triple, Opts) {
     WCharType = UnsignedShort;
     LongWidth = LongAlign = 32;
     DoubleAlign = LongLongAlign = 64;
@@ -4065,7 +4425,6 @@ public:
     SizeType = UnsignedLongLong;
     PtrDiffType = SignedLongLong;
     IntPtrType = SignedLongLong;
-    this->UserLabelPrefix = "";
   }
 
   void getTargetDefines(const LangOptions &Opts,
@@ -4098,8 +4457,9 @@ public:
 // x86-64 Windows Visual Studio target
 class MicrosoftX86_64TargetInfo : public WindowsX86_64TargetInfo {
 public:
-  MicrosoftX86_64TargetInfo(const llvm::Triple &Triple)
-      : WindowsX86_64TargetInfo(Triple) {
+  MicrosoftX86_64TargetInfo(const llvm::Triple &Triple,
+                            const TargetOptions &Opts)
+      : WindowsX86_64TargetInfo(Triple, Opts) {
     LongDoubleWidth = LongDoubleAlign = 64;
     LongDoubleFormat = &llvm::APFloat::IEEEdouble;
   }
@@ -4115,8 +4475,8 @@ public:
 // x86-64 MinGW target
 class MinGWX86_64TargetInfo : public WindowsX86_64TargetInfo {
 public:
-  MinGWX86_64TargetInfo(const llvm::Triple &Triple)
-      : WindowsX86_64TargetInfo(Triple) {
+  MinGWX86_64TargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts)
+      : WindowsX86_64TargetInfo(Triple, Opts) {
     // Mingw64 rounds long double size and alignment up to 16 bytes, but sticks
     // with x86 FP ops. Weird.
     LongDoubleWidth = LongDoubleAlign = 128;
@@ -4139,8 +4499,8 @@ public:
 // x86-64 Cygwin target
 class CygwinX86_64TargetInfo : public X86_64TargetInfo {
 public:
-  CygwinX86_64TargetInfo(const llvm::Triple &Triple)
-      : X86_64TargetInfo(Triple) {
+  CygwinX86_64TargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts)
+      : X86_64TargetInfo(Triple, Opts) {
     TLSSupported = false;
     WCharType = UnsignedShort;
   }
@@ -4163,14 +4523,14 @@ public:
 
 class DarwinX86_64TargetInfo : public DarwinTargetInfo<X86_64TargetInfo> {
 public:
-  DarwinX86_64TargetInfo(const llvm::Triple &Triple)
-      : DarwinTargetInfo<X86_64TargetInfo>(Triple) {
+  DarwinX86_64TargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts)
+      : DarwinTargetInfo<X86_64TargetInfo>(Triple, Opts) {
     Int64Type = SignedLongLong;
     // The 64-bit iOS simulator uses the builtin bool type for Objective-C.
     llvm::Triple T = llvm::Triple(Triple);
     if (T.isiOS())
       UseSignedCharForObjCBool = false;
-    DataLayoutString = "e-m:o-i64:64-f80:128-n8:16:32:64-S128";
+    resetDataLayout("e-m:o-i64:64-f80:128-n8:16:32:64-S128");
   }
 
   bool handleTargetFeatures(std::vector<std::string> &Features,
@@ -4187,8 +4547,8 @@ public:
 
 class OpenBSDX86_64TargetInfo : public OpenBSDTargetInfo<X86_64TargetInfo> {
 public:
-  OpenBSDX86_64TargetInfo(const llvm::Triple &Triple)
-      : OpenBSDTargetInfo<X86_64TargetInfo>(Triple) {
+  OpenBSDX86_64TargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts)
+      : OpenBSDTargetInfo<X86_64TargetInfo>(Triple, Opts) {
     IntMaxType = SignedLongLong;
     Int64Type = SignedLongLong;
   }
@@ -4196,8 +4556,8 @@ public:
 
 class BitrigX86_64TargetInfo : public BitrigTargetInfo<X86_64TargetInfo> {
 public:
-  BitrigX86_64TargetInfo(const llvm::Triple &Triple)
-      : BitrigTargetInfo<X86_64TargetInfo>(Triple) {
+  BitrigX86_64TargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts)
+      : BitrigTargetInfo<X86_64TargetInfo>(Triple, Opts) {
     IntMaxType = SignedLongLong;
     Int64Type = SignedLongLong;
   }
@@ -4309,26 +4669,26 @@ class ARMTargetInfo : public TargetInfo {
     // Thumb1 add sp, #imm requires the immediate value be multiple of 4,
     // so set preferred for small types to 32.
     if (T.isOSBinFormatMachO()) {
-      DataLayoutString =
-          BigEndian ? "E-m:o-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
-                    : "e-m:o-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64";
+      resetDataLayout(BigEndian
+                          ? "E-m:o-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
+                          : "e-m:o-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64");
     } else if (T.isOSWindows()) {
       assert(!BigEndian && "Windows on ARM does not support big endian");
-      DataLayoutString = "e"
-                         "-m:w"
-                         "-p:32:32"
-                         "-i64:64"
-                         "-v128:64:128"
-                         "-a:0:32"
-                         "-n32"
-                         "-S64";
+      resetDataLayout("e"
+                      "-m:w"
+                      "-p:32:32"
+                      "-i64:64"
+                      "-v128:64:128"
+                      "-a:0:32"
+                      "-n32"
+                      "-S64");
     } else if (T.isOSNaCl()) {
       assert(!BigEndian && "NaCl on ARM does not support big endian");
-      DataLayoutString = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S128";
+      resetDataLayout("e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S128");
     } else {
-      DataLayoutString =
-          BigEndian ? "E-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
-                    : "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64";
+      resetDataLayout(BigEndian
+                          ? "E-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
+                          : "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64");
     }
 
     // FIXME: Enumerated types are variable width in straight AAPCS.
@@ -4364,17 +4724,17 @@ class ARMTargetInfo : public TargetInfo {
 
     if (T.isOSBinFormatMachO() && IsAAPCS16) {
       assert(!BigEndian && "AAPCS16 does not support big-endian");
-      DataLayoutString = "e-m:o-p:32:32-i64:64-a:0:32-n32-S128";
+      resetDataLayout("e-m:o-p:32:32-i64:64-a:0:32-n32-S128");
     } else if (T.isOSBinFormatMachO())
-      DataLayoutString =
+      resetDataLayout(
           BigEndian
               ? "E-m:o-p:32:32-f64:32:64-v64:32:64-v128:32:128-a:0:32-n32-S32"
-              : "e-m:o-p:32:32-f64:32:64-v64:32:64-v128:32:128-a:0:32-n32-S32";
+              : "e-m:o-p:32:32-f64:32:64-v64:32:64-v128:32:128-a:0:32-n32-S32");
     else
-      DataLayoutString =
+      resetDataLayout(
           BigEndian
               ? "E-m:e-p:32:32-f64:32:64-v64:32:64-v128:32:128-a:0:32-n32-S32"
-              : "e-m:e-p:32:32-f64:32:64-v64:32:64-v128:32:128-a:0:32-n32-S32";
+              : "e-m:e-p:32:32-f64:32:64-v64:32:64-v128:32:128-a:0:32-n32-S32");
 
     // FIXME: Override "preferred align" for double and long long.
   }
@@ -4432,7 +4792,8 @@ class ARMTargetInfo : public TargetInfo {
   }
 
   bool supportsThumb2() const {
-    return CPUAttr.equals("6T2") || ArchVersion >= 7;
+    return CPUAttr.equals("6T2") ||
+           (ArchVersion >= 7 && !CPUAttr.equals("8M_BASE"));
   }
 
   StringRef getCPUAttr() const {
@@ -4457,6 +4818,12 @@ class ARMTargetInfo : public TargetInfo {
       return "8A";
     case llvm::ARM::AK_ARMV8_1A:
       return "8_1A";
+    case llvm::ARM::AK_ARMV8_2A:
+      return "8_2A";
+    case llvm::ARM::AK_ARMV8MBaseline:
+      return "8M_BASE";
+    case llvm::ARM::AK_ARMV8MMainline:
+      return "8M_MAIN";
     }
   }
 
@@ -4474,9 +4841,10 @@ class ARMTargetInfo : public TargetInfo {
   }
 
 public:
-  ARMTargetInfo(const llvm::Triple &Triple, bool IsBigEndian)
-      : TargetInfo(Triple), FPMath(FP_Default),
-        IsAAPCS(true), LDREX(0), HW_FP(0) {
+  ARMTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts,
+                bool IsBigEndian)
+      : TargetInfo(Triple), FPMath(FP_Default), IsAAPCS(true), LDREX(0),
+        HW_FP(0) {
     BigEndian = IsBigEndian;
 
     switch (getTriple().getOS()) {
@@ -4505,7 +4873,7 @@ public:
           Triple.getOS() == llvm::Triple::UnknownOS ||
           StringRef(CPU).startswith("cortex-m")) {
         setABI("aapcs");
-      } else if (Triple.isWatchOS()) {
+      } else if (Triple.isWatchABI()) {
         setABI("aapcs16");
       } else {
         setABI("apcs-gnu");
@@ -4519,6 +4887,8 @@ public:
       case llvm::Triple::Android:
       case llvm::Triple::GNUEABI:
       case llvm::Triple::GNUEABIHF:
+      case llvm::Triple::MuslEABI:
+      case llvm::Triple::MuslEABIHF:
         setABI("aapcs-linux");
         break;
       case llvm::Triple::EABIHF:
@@ -4548,6 +4918,11 @@ public:
     // that follows it, `bar', `bar' will be aligned as the  type of the
     // zero length bitfield.
     UseZeroLengthBitfieldAlignment = true;
+
+    if (Triple.getOS() == llvm::Triple::Linux ||
+        Triple.getOS() == llvm::Triple::UnknownOS)
+      this->MCountName =
+          Opts.EABIVersion == "gnu" ? "\01__gnu_mcount_nc" : "\01mcount";
   }
 
   StringRef getABI() const override { return ABI; }
@@ -4638,7 +5013,7 @@ public:
       } else if (Feature == "+dsp") {
         DSP = 1;
       } else if (Feature == "+fp-only-sp") {
-        HW_FP_remove |= HW_FP_DP; 
+        HW_FP_remove |= HW_FP_DP;
       } else if (Feature == "+strict-align") {
         Unaligned = 0;
       } else if (Feature == "+fp16") {
@@ -4715,13 +5090,17 @@ public:
     // Target identification.
     Builder.defineMacro("__arm");
     Builder.defineMacro("__arm__");
+    // For bare-metal none-eabi.
+    if (getTriple().getOS() == llvm::Triple::UnknownOS &&
+        getTriple().getEnvironment() == llvm::Triple::EABI)
+      Builder.defineMacro("__ELF__");
 
     // Target properties.
     Builder.defineMacro("__REGISTER_PREFIX__", "");
 
     // Unfortunately, __ARM_ARCH_7K__ is now more of an ABI descriptor. The CPU
     // happens to be Cortex-A7 though, so it should still get __ARM_ARCH_7A__.
-    if (getTriple().isWatchOS())
+    if (getTriple().isWatchABI())
       Builder.defineMacro("__ARM_ARCH_7K__", "2");
 
     if (!CPUAttr.empty())
@@ -4746,13 +5125,14 @@ public:
 
     // __ARM_ARCH_ISA_ARM is defined to 1 if the core supports the ARM ISA.  It
     // is not defined for the M-profile.
-    // NOTE that the deffault profile is assumed to be 'A'
-    if (CPUProfile.empty() || CPUProfile != "M")
+    // NOTE that the default profile is assumed to be 'A'
+    if (CPUProfile.empty() || ArchProfile != llvm::ARM::PK_M)
       Builder.defineMacro("__ARM_ARCH_ISA_ARM", "1");
 
-    // __ARM_ARCH_ISA_THUMB is defined to 1 if the core supporst the original
-    // Thumb ISA (including v6-M).  It is set to 2 if the core supports the
-    // Thumb-2 ISA as found in the v6T2 architecture and all v7 architecture.
+    // __ARM_ARCH_ISA_THUMB is defined to 1 if the core supports the original
+    // Thumb ISA (including v6-M and v8-M Baseline).  It is set to 2 if the
+    // core supports the Thumb-2 ISA as found in the v6T2 architecture and all
+    // v7 and v8 architectures excluding v8-M Baseline.
     if (supportsThumb2())
       Builder.defineMacro("__ARM_ARCH_ISA_THUMB", "2");
     else if (supportsThumb())
@@ -4794,7 +5174,7 @@ public:
     Builder.defineMacro("__ARM_FP16_ARGS", "1");
 
     // ACLE 6.5.3 Fused multiply-accumulate (FMA)
-    if (ArchVersion >= 7 && (CPUProfile != "M" || CPUAttr == "7EM"))
+    if (ArchVersion >= 7 && (FPU & VFP4FPU))
       Builder.defineMacro("__ARM_FEATURE_FMA", "1");
 
     // Subtarget options.
@@ -4808,14 +5188,15 @@ public:
     if (ABI == "aapcs" || ABI == "aapcs-linux" || ABI == "aapcs-vfp") {
       // Embedded targets on Darwin follow AAPCS, but not EABI.
       // Windows on ARM follows AAPCS VFP, but does not conform to EABI.
-      if (!getTriple().isOSDarwin() && !getTriple().isOSWindows())
+      if (!getTriple().isOSBinFormatMachO() && !getTriple().isOSWindows())
         Builder.defineMacro("__ARM_EABI__");
       Builder.defineMacro("__ARM_PCS", "1");
-
-      if ((!SoftFloat && !SoftFloatABI) || ABI == "aapcs-vfp")
-        Builder.defineMacro("__ARM_PCS_VFP", "1");
     }
 
+    if ((!SoftFloat && !SoftFloatABI) || ABI == "aapcs-vfp" ||
+        ABI == "aapcs16")
+      Builder.defineMacro("__ARM_PCS_VFP", "1");
+
     if (SoftFloat)
       Builder.defineMacro("__SOFTFP__");
 
@@ -4872,7 +5253,7 @@ public:
     Builder.defineMacro("__ARM_SIZEOF_MINIMAL_ENUM",
                         Opts.ShortEnums ? "1" : "4");
 
-    if (ArchVersion >= 6 && CPUAttr != "6M") {
+    if (ArchVersion >= 6 && CPUAttr != "6M" && CPUAttr != "8M_BASE") {
       Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_1");
       Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_2");
       Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_4");
@@ -4910,8 +5291,8 @@ public:
   BuiltinVaListKind getBuiltinVaListKind() const override {
     return IsAAPCS
                ? AAPCSABIBuiltinVaList
-               : (getTriple().isWatchOS() ? TargetInfo::CharPtrBuiltinVaList
-                                          : TargetInfo::VoidPtrBuiltinVaList);
+               : (getTriple().isWatchABI() ? TargetInfo::CharPtrBuiltinVaList
+                                           : TargetInfo::VoidPtrBuiltinVaList);
   }
   ArrayRef<const char *> getGCCRegNames() const override;
   ArrayRef<TargetInfo::GCCRegAlias> getGCCRegAliases() const override;
@@ -5001,7 +5382,14 @@ public:
   }
 
   CallingConvCheckResult checkCallingConvention(CallingConv CC) const override {
-    return (CC == CC_AAPCS || CC == CC_AAPCS_VFP) ? CCCR_OK : CCCR_Warning;
+    switch (CC) {
+    case CC_AAPCS:
+    case CC_AAPCS_VFP:
+    case CC_Swift:
+      return CCCR_OK;
+    default:
+      return CCCR_Warning;
+    }
   }
 
   int getEHDataRegisterNumber(unsigned RegNo) const override {
@@ -5096,8 +5484,8 @@ const Builtin::Info ARMTargetInfo::BuiltinInfo[] = {
 
 class ARMleTargetInfo : public ARMTargetInfo {
 public:
-  ARMleTargetInfo(const llvm::Triple &Triple)
-    : ARMTargetInfo(Triple, false) { }
+  ARMleTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts)
+      : ARMTargetInfo(Triple, Opts, /*BigEndian=*/false) {}
   void getTargetDefines(const LangOptions &Opts,
                         MacroBuilder &Builder) const override {
     Builder.defineMacro("__ARMEL__");
@@ -5107,8 +5495,8 @@ public:
 
 class ARMbeTargetInfo : public ARMTargetInfo {
 public:
-  ARMbeTargetInfo(const llvm::Triple &Triple)
-    : ARMTargetInfo(Triple, true) { }
+  ARMbeTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts)
+      : ARMTargetInfo(Triple, Opts, /*BigEndian=*/true) {}
   void getTargetDefines(const LangOptions &Opts,
                         MacroBuilder &Builder) const override {
     Builder.defineMacro("__ARMEB__");
@@ -5120,12 +5508,10 @@ public:
 class WindowsARMTargetInfo : public WindowsTargetInfo<ARMleTargetInfo> {
   const llvm::Triple Triple;
 public:
-  WindowsARMTargetInfo(const llvm::Triple &Triple)
-    : WindowsTargetInfo<ARMleTargetInfo>(Triple), Triple(Triple) {
-    TLSSupported = false;
+  WindowsARMTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts)
+      : WindowsTargetInfo<ARMleTargetInfo>(Triple, Opts), Triple(Triple) {
     WCharType = UnsignedShort;
     SizeType = UnsignedInt;
-    UserLabelPrefix = "";
   }
   void getVisualStudioDefines(const LangOptions &Opts,
                               MacroBuilder &Builder) const {
@@ -5167,8 +5553,9 @@ public:
 // Windows ARM + Itanium C++ ABI Target
 class ItaniumWindowsARMleTargetInfo : public WindowsARMTargetInfo {
 public:
-  ItaniumWindowsARMleTargetInfo(const llvm::Triple &Triple)
-    : WindowsARMTargetInfo(Triple) {
+  ItaniumWindowsARMleTargetInfo(const llvm::Triple &Triple,
+                                const TargetOptions &Opts)
+      : WindowsARMTargetInfo(Triple, Opts) {
     TheCXXABI.set(TargetCXXABI::GenericARM);
   }
 
@@ -5184,8 +5571,9 @@ public:
 // Windows ARM, MS (C++) ABI
 class MicrosoftARMleTargetInfo : public WindowsARMTargetInfo {
 public:
-  MicrosoftARMleTargetInfo(const llvm::Triple &Triple)
-    : WindowsARMTargetInfo(Triple) {
+  MicrosoftARMleTargetInfo(const llvm::Triple &Triple,
+                           const TargetOptions &Opts)
+      : WindowsARMTargetInfo(Triple, Opts) {
     TheCXXABI.set(TargetCXXABI::Microsoft);
   }
 
@@ -5199,8 +5587,8 @@ public:
 // ARM MinGW target
 class MinGWARMTargetInfo : public WindowsARMTargetInfo {
 public:
-  MinGWARMTargetInfo(const llvm::Triple &Triple)
-      : WindowsARMTargetInfo(Triple) {
+  MinGWARMTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts)
+      : WindowsARMTargetInfo(Triple, Opts) {
     TheCXXABI.set(TargetCXXABI::GenericARM);
   }
 
@@ -5217,11 +5605,12 @@ public:
 // ARM Cygwin target
 class CygwinARMTargetInfo : public ARMleTargetInfo {
 public:
-  CygwinARMTargetInfo(const llvm::Triple &Triple) : ARMleTargetInfo(Triple) {
+  CygwinARMTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts)
+      : ARMleTargetInfo(Triple, Opts) {
     TLSSupported = false;
     WCharType = UnsignedShort;
     DoubleAlign = LongLongAlign = 64;
-    DataLayoutString = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64";
+    resetDataLayout("e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64");
   }
   void getTargetDefines(const LangOptions &Opts,
                         MacroBuilder &Builder) const override {
@@ -5235,8 +5624,7 @@ public:
   }
 };
 
-class DarwinARMTargetInfo :
-  public DarwinTargetInfo<ARMleTargetInfo> {
+class DarwinARMTargetInfo : public DarwinTargetInfo<ARMleTargetInfo> {
 protected:
   void getOSDefines(const LangOptions &Opts, const llvm::Triple &Triple,
                     MacroBuilder &Builder) const override {
@@ -5244,15 +5632,15 @@ protected:
   }
 
 public:
-  DarwinARMTargetInfo(const llvm::Triple &Triple)
-      : DarwinTargetInfo<ARMleTargetInfo>(Triple) {
+  DarwinARMTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts)
+      : DarwinTargetInfo<ARMleTargetInfo>(Triple, Opts) {
     HasAlignMac68kSupport = true;
     // iOS always has 64-bit atomic instructions.
     // FIXME: This should be based off of the target features in
     // ARMleTargetInfo.
     MaxAtomicInlineWidth = 64;
 
-    if (Triple.isWatchOS()) {
+    if (Triple.isWatchABI()) {
       // Darwin on iOS uses a variant of the ARM C++ ABI.
       TheCXXABI.set(TargetCXXABI::WatchOS);
 
@@ -5268,7 +5656,7 @@ public:
 };
 
 class AArch64TargetInfo : public TargetInfo {
-  virtual void setDataLayoutString() = 0;
+  virtual void setDataLayout() = 0;
   static const TargetInfo::GCCRegAlias GCCRegAliases[];
   static const char *const GCCRegNames[];
 
@@ -5288,9 +5676,8 @@ class AArch64TargetInfo : public TargetInfo {
   std::string ABI;
 
 public:
-  AArch64TargetInfo(const llvm::Triple &Triple)
+  AArch64TargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts)
       : TargetInfo(Triple), ABI("aapcs") {
-
     if (getTriple().getOS() == llvm::Triple::NetBSD) {
       WCharType = SignedInt;
 
@@ -5325,6 +5712,10 @@ public:
 
     // AArch64 targets default to using the ARM C++ ABI.
     TheCXXABI.set(TargetCXXABI::GenericAArch64);
+
+    if (Triple.getOS() == llvm::Triple::Linux ||
+        Triple.getOS() == llvm::Triple::UnknownOS)
+      this->MCountName = Opts.EABIVersion == "gnu" ? "\01_mcount" : "mcount";
   }
 
   StringRef getABI() const override { return ABI; }
@@ -5341,7 +5732,10 @@ public:
                         .Case("generic", true)
                         .Cases("cortex-a53", "cortex-a57", "cortex-a72",
                                "cortex-a35", "exynos-m1", true)
+                        .Case("cortex-a73", true)
                         .Case("cyclone", true)
+                        .Case("kryo", true)
+                        .Case("vulcan", true)
                         .Default(false);
     return CPUKnown;
   }
@@ -5448,11 +5842,23 @@ public:
         V8_1A = 1;
     }
 
-    setDataLayoutString();
+    setDataLayout();
 
     return true;
   }
 
+  CallingConvCheckResult checkCallingConvention(CallingConv CC) const override {
+    switch (CC) {
+    case CC_C:
+    case CC_Swift:
+    case CC_PreserveMost:
+    case CC_PreserveAll:
+      return CCCR_OK;
+    default:
+      return CCCR_Warning;
+    }
+  }
+
   bool isCLZForZeroUndef() const override { return false; }
 
   BuiltinVaListKind getBuiltinVaListKind() const override {
@@ -5598,18 +6004,18 @@ const Builtin::Info AArch64TargetInfo::BuiltinInfo[] = {
 };
 
 class AArch64leTargetInfo : public AArch64TargetInfo {
-  void setDataLayoutString() override {
+  void setDataLayout() override {
     if (getTriple().isOSBinFormatMachO())
-      DataLayoutString = "e-m:o-i64:64-i128:128-n32:64-S128";
+      resetDataLayout("e-m:o-i64:64-i128:128-n32:64-S128");
     else
-      DataLayoutString = "e-m:e-i64:64-i128:128-n32:64-S128";
+      resetDataLayout("e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128");
   }
 
 public:
-  AArch64leTargetInfo(const llvm::Triple &Triple)
-    : AArch64TargetInfo(Triple) {
+  AArch64leTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts)
+      : AArch64TargetInfo(Triple, Opts) {
     BigEndian = false;
-    }
+  }
   void getTargetDefines(const LangOptions &Opts,
                         MacroBuilder &Builder) const override {
     Builder.defineMacro("__AARCH64EL__");
@@ -5618,14 +6024,14 @@ public:
 };
 
 class AArch64beTargetInfo : public AArch64TargetInfo {
-  void setDataLayoutString() override {
+  void setDataLayout() override {
     assert(!getTriple().isOSBinFormatMachO());
-    DataLayoutString = "E-m:e-i64:64-i128:128-n32:64-S128";
+    resetDataLayout("E-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128");
   }
 
 public:
-  AArch64beTargetInfo(const llvm::Triple &Triple)
-    : AArch64TargetInfo(Triple) { }
+  AArch64beTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts)
+      : AArch64TargetInfo(Triple, Opts) {}
   void getTargetDefines(const LangOptions &Opts,
                         MacroBuilder &Builder) const override {
     Builder.defineMacro("__AARCH64EB__");
@@ -5651,8 +6057,8 @@ protected:
   }
 
 public:
-  DarwinAArch64TargetInfo(const llvm::Triple &Triple)
-      : DarwinTargetInfo<AArch64leTargetInfo>(Triple) {
+  DarwinAArch64TargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts)
+      : DarwinTargetInfo<AArch64leTargetInfo>(Triple, Opts) {
     Int64Type = SignedLongLong;
     WCharType = SignedInt;
     UseSignedCharForObjCBool = false;
@@ -5677,11 +6083,15 @@ class HexagonTargetInfo : public TargetInfo {
   bool HasHVX, HasHVXDouble;
 
 public:
-  HexagonTargetInfo(const llvm::Triple &Triple) : TargetInfo(Triple) {
+  HexagonTargetInfo(const llvm::Triple &Triple, const TargetOptions &)
+      : TargetInfo(Triple) {
     BigEndian = false;
-    DataLayoutString = "e-m:e-p:32:32:32-"
-                       "i64:64:64-i32:32:32-i16:16:16-i1:8:8-"
-                       "f64:64:64-f32:32:32-v64:64:64-v32:32:32-a:0-n16:32";
+    // Specify the vector alignment explicitly. For v512x1, the calculated
+    // alignment would be 512*alignment(i1), which is 512 bytes, instead of
+    // the required minimum of 64 bytes.
+    resetDataLayout("e-m:e-p:32:32:32-a:0-n16:32-"
+        "i64:64:64-i32:32:32-i16:16:16-i1:8:8-f32:32:32-f64:64:64-"
+        "v32:32:32-v64:64:64-v512:512:512-v1024:1024:1024-v2048:2048:2048");
     SizeType    = UnsignedInt;
     PtrDiffType = SignedInt;
     IntPtrType  = SignedInt;
@@ -5704,7 +6114,19 @@ public:
 
   bool validateAsmConstraint(const char *&Name,
                              TargetInfo::ConstraintInfo &Info) const override {
-    return true;
+    switch (*Name) {
+      case 'v':
+      case 'q':
+        if (HasHVX) {
+          Info.setAllowsRegister();
+          return true;
+        }
+        break;
+      case 's':
+        // Relocatable constant.
+        return true;
+    }
+    return false;
   }
 
   void getTargetDefines(const LangOptions &Opts,
@@ -5776,12 +6198,23 @@ void HexagonTargetInfo::getTargetDefines(const LangOptions &Opts,
       Builder.defineMacro("__QDSP6_V5__");
       Builder.defineMacro("__QDSP6_ARCH__", "5");
     }
+  } else if (CPU == "hexagonv55") {
+    Builder.defineMacro("__HEXAGON_V55__");
+    Builder.defineMacro("__HEXAGON_ARCH__", "55");
+    Builder.defineMacro("__QDSP6_V55__");
+    Builder.defineMacro("__QDSP6_ARCH__", "55");
   } else if (CPU == "hexagonv60") {
     Builder.defineMacro("__HEXAGON_V60__");
     Builder.defineMacro("__HEXAGON_ARCH__", "60");
     Builder.defineMacro("__QDSP6_V60__");
     Builder.defineMacro("__QDSP6_ARCH__", "60");
   }
+
+  if (hasFeature("hvx")) {
+    Builder.defineMacro("__HVX__");
+    if (hasFeature("hvx-double"))
+      Builder.defineMacro("__HVXDBL__");
+  }
 }
 
 bool HexagonTargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
@@ -5842,23 +6275,133 @@ const Builtin::Info HexagonTargetInfo::BuiltinInfo[] = {
 #include "clang/Basic/BuiltinsHexagon.def"
 };
 
+class LanaiTargetInfo : public TargetInfo {
+  // Class for Lanai (32-bit).
+  // The CPU profiles supported by the Lanai backend
+  enum CPUKind {
+    CK_NONE,
+    CK_V11,
+  } CPU;
+
+  static const TargetInfo::GCCRegAlias GCCRegAliases[];
+  static const char *const GCCRegNames[];
+
+public:
+  LanaiTargetInfo(const llvm::Triple &Triple, const TargetOptions &)
+      : TargetInfo(Triple) {
+    // Description string has to be kept in sync with backend.
+    resetDataLayout("E"        // Big endian
+                    "-m:e"     // ELF name manging
+                    "-p:32:32" // 32 bit pointers, 32 bit aligned
+                    "-i64:64"  // 64 bit integers, 64 bit aligned
+                    "-a:0:32"  // 32 bit alignment of objects of aggregate type
+                    "-n32"     // 32 bit native integer width
+                    "-S64"     // 64 bit natural stack alignment
+                    );
+
+    // Setting RegParmMax equal to what mregparm was set to in the old
+    // toolchain
+    RegParmMax = 4;
+
+    // Set the default CPU to V11
+    CPU = CK_V11;
+
+    // Temporary approach to make everything at least word-aligned and allow for
+    // safely casting between pointers with different alignment requirements.
+    // TODO: Remove this when there are no more cast align warnings on the
+    // firmware.
+    MinGlobalAlign = 32;
+  }
+
+  void getTargetDefines(const LangOptions &Opts,
+                        MacroBuilder &Builder) const override {
+    // Define __lanai__ when building for target lanai.
+    Builder.defineMacro("__lanai__");
+
+    // Set define for the CPU specified.
+    switch (CPU) {
+    case CK_V11:
+      Builder.defineMacro("__LANAI_V11__");
+      break;
+    case CK_NONE:
+      llvm_unreachable("Unhandled target CPU");
+    }
+  }
+
+  bool setCPU(const std::string &Name) override {
+    CPU = llvm::StringSwitch<CPUKind>(Name)
+              .Case("v11", CK_V11)
+              .Default(CK_NONE);
+
+    return CPU != CK_NONE;
+  }
+
+  bool hasFeature(StringRef Feature) const override {
+    return llvm::StringSwitch<bool>(Feature).Case("lanai", true).Default(false);
+  }
+
+  ArrayRef<const char *> getGCCRegNames() const override;
+
+  ArrayRef<TargetInfo::GCCRegAlias> getGCCRegAliases() const override;
+
+  BuiltinVaListKind getBuiltinVaListKind() const override {
+    return TargetInfo::VoidPtrBuiltinVaList;
+  }
+
+  ArrayRef<Builtin::Info> getTargetBuiltins() const override { return None; }
+
+  bool validateAsmConstraint(const char *&Name,
+                             TargetInfo::ConstraintInfo &info) const override {
+    return false;
+  }
+
+  const char *getClobbers() const override { return ""; }
+};
+
+const char *const LanaiTargetInfo::GCCRegNames[] = {
+    "r0",  "r1",  "r2",  "r3",  "r4",  "r5",  "r6",  "r7",  "r8",  "r9",  "r10",
+    "r11", "r12", "r13", "r14", "r15", "r16", "r17", "r18", "r19", "r20", "r21",
+    "r22", "r23", "r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31"};
+
+ArrayRef<const char *> LanaiTargetInfo::getGCCRegNames() const {
+  return llvm::makeArrayRef(GCCRegNames);
+}
+
+const TargetInfo::GCCRegAlias LanaiTargetInfo::GCCRegAliases[] = {
+    {{"pc"}, "r2"},
+    {{"sp"}, "r4"},
+    {{"fp"}, "r5"},
+    {{"rv"}, "r8"},
+    {{"rr1"}, "r10"},
+    {{"rr2"}, "r11"},
+    {{"rca"}, "r15"},
+};
+
+ArrayRef<TargetInfo::GCCRegAlias> LanaiTargetInfo::getGCCRegAliases() const {
+  return llvm::makeArrayRef(GCCRegAliases);
+}
+
 // Shared base class for SPARC v8 (32-bit) and SPARC v9 (64-bit).
 class SparcTargetInfo : public TargetInfo {
   static const TargetInfo::GCCRegAlias GCCRegAliases[];
   static const char * const GCCRegNames[];
   bool SoftFloat;
 public:
-  SparcTargetInfo(const llvm::Triple &Triple)
+  SparcTargetInfo(const llvm::Triple &Triple, const TargetOptions &)
       : TargetInfo(Triple), SoftFloat(false) {}
 
+  int getEHDataRegisterNumber(unsigned RegNo) const override {
+    if (RegNo == 0) return 24;
+    if (RegNo == 1) return 25;
+    return -1;
+  }
+
   bool handleTargetFeatures(std::vector<std::string> &Features,
                             DiagnosticsEngine &Diags) override {
-    // The backend doesn't actually handle soft float yet, but in case someone
-    // is using the support for the front end continue to support it.
+    // Check if software floating point is enabled
     auto Feature = std::find(Features.begin(), Features.end(), "+soft-float");
     if (Feature != Features.end()) {
       SoftFloat = true;
-      Features.erase(Feature);
     }
     return true;
   }
@@ -5878,6 +6421,10 @@ public:
              .Default(false);
   }
 
+  bool hasSjLjLowering() const override {
+    return true;
+  }
+
   ArrayRef<Builtin::Info> getTargetBuiltins() const override {
     // FIXME: Implement!
     return None;
@@ -5924,7 +6471,17 @@ public:
     CK_NIAGARA,
     CK_NIAGARA2,
     CK_NIAGARA3,
-    CK_NIAGARA4
+    CK_NIAGARA4,
+    CK_MYRIAD2_1,
+    CK_MYRIAD2_2,
+    CK_LEON2,
+    CK_LEON2_AT697E,
+    CK_LEON2_AT697F,
+    CK_LEON3,
+    CK_LEON3_UT699,
+    CK_LEON3_GR712RC,
+    CK_LEON4,
+    CK_LEON4_GR740
   } CPU = CK_GENERIC;
 
   enum CPUGeneration {
@@ -5943,6 +6500,16 @@ public:
     case CK_SPARCLITE86X:
     case CK_SPARCLET:
     case CK_TSC701:
+    case CK_MYRIAD2_1:
+    case CK_MYRIAD2_2:
+    case CK_LEON2:
+    case CK_LEON2_AT697E:
+    case CK_LEON2_AT697F:
+    case CK_LEON3:
+    case CK_LEON3_UT699:
+    case CK_LEON3_GR712RC:
+    case CK_LEON4:
+    case CK_LEON4_GR740:
       return CG_V8;
     case CK_V9:
     case CK_ULTRASPARC:
@@ -5973,6 +6540,17 @@ public:
         .Case("niagara2", CK_NIAGARA2)
         .Case("niagara3", CK_NIAGARA3)
         .Case("niagara4", CK_NIAGARA4)
+        .Case("myriad2", CK_MYRIAD2_1)
+        .Case("myriad2.1", CK_MYRIAD2_1)
+        .Case("myriad2.2", CK_MYRIAD2_2)
+        .Case("leon2", CK_LEON2)
+        .Case("at697e", CK_LEON2_AT697E)
+        .Case("at697f", CK_LEON2_AT697F)
+        .Case("leon3", CK_LEON3)
+        .Case("ut699", CK_LEON3_UT699)
+        .Case("gr712rc", CK_LEON3_GR712RC)
+        .Case("leon4", CK_LEON4)
+        .Case("gr740", CK_LEON4_GR740)
         .Default(CK_GENERIC);
   }
 
@@ -6035,8 +6613,9 @@ ArrayRef<TargetInfo::GCCRegAlias> SparcTargetInfo::getGCCRegAliases() const {
 // SPARC v8 is the 32-bit mode selected by Triple::sparc.
 class SparcV8TargetInfo : public SparcTargetInfo {
 public:
-  SparcV8TargetInfo(const llvm::Triple &Triple) : SparcTargetInfo(Triple) {
-    DataLayoutString = "E-m:e-p:32:32-i64:64-f128:64-n32-S64";
+  SparcV8TargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts)
+      : SparcTargetInfo(Triple, Opts) {
+    resetDataLayout("E-m:e-p:32:32-i64:64-f128:64-n32-S64");
     // NetBSD / OpenBSD use long (same as llvm default); everyone else uses int.
     switch (getTriple().getOS()) {
     default:
@@ -6051,6 +6630,7 @@ public:
       PtrDiffType = SignedLong;
       break;
     }
+    MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64;
   }
 
   void getTargetDefines(const LangOptions &Opts,
@@ -6070,24 +6650,44 @@ public:
       }
       break;
     }
+    if (getTriple().getVendor() == llvm::Triple::Myriad) {
+      switch (CPU) {
+      case CK_MYRIAD2_1:
+        Builder.defineMacro("__myriad2", "1");
+        Builder.defineMacro("__myriad2__", "1");
+        break;
+      case CK_MYRIAD2_2:
+        Builder.defineMacro("__myriad2", "2");
+        Builder.defineMacro("__myriad2__", "2");
+        break;
+      default:
+        break;
+      }
+    }
+  }
+
+  bool hasSjLjLowering() const override {
+    return true;
   }
 };
 
 // SPARCV8el is the 32-bit little-endian mode selected by Triple::sparcel.
 class SparcV8elTargetInfo : public SparcV8TargetInfo {
  public:
-  SparcV8elTargetInfo(const llvm::Triple &Triple) : SparcV8TargetInfo(Triple) {
-    DataLayoutString = "e-m:e-p:32:32-i64:64-f128:64-n32-S64";
-    BigEndian = false;
+   SparcV8elTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts)
+       : SparcV8TargetInfo(Triple, Opts) {
+     resetDataLayout("e-m:e-p:32:32-i64:64-f128:64-n32-S64");
+     BigEndian = false;
   }
 };
 
 // SPARC v9 is the 64-bit mode selected by Triple::sparcv9.
 class SparcV9TargetInfo : public SparcTargetInfo {
 public:
-  SparcV9TargetInfo(const llvm::Triple &Triple) : SparcTargetInfo(Triple) {
+  SparcV9TargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts)
+      : SparcTargetInfo(Triple, Opts) {
     // FIXME: Support Sparc quad-precision long double?
-    DataLayoutString = "E-m:e-i64:64-n32:64-S128";
+    resetDataLayout("E-m:e-i64:64-n32:64-S128");
     // This is an LP64 platform.
     LongWidth = LongAlign = PointerWidth = PointerAlign = 64;
 
@@ -6134,7 +6734,7 @@ class SystemZTargetInfo : public TargetInfo {
   bool HasVector;
 
 public:
-  SystemZTargetInfo(const llvm::Triple &Triple)
+  SystemZTargetInfo(const llvm::Triple &Triple, const TargetOptions &)
       : TargetInfo(Triple), CPU("z10"), HasTransactionalExecution(false),
         HasVector(false) {
     IntMaxType = SignedLong;
@@ -6148,7 +6748,7 @@ public:
     LongDoubleFormat = &llvm::APFloat::IEEEquad;
     DefaultAlignForAttributeAligned = 64;
     MinGlobalAlign = 16;
-    DataLayoutString = "E-m:e-i1:8:16-i8:8:16-i64:64-f128:64-a:8:16-n32:64";
+    resetDataLayout("E-m:e-i1:8:16-i8:8:16-i64:64-f128:64-a:8:16-n32:64");
     MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64;
   }
   void getTargetDefines(const LangOptions &Opts,
@@ -6223,8 +6823,8 @@ public:
     // If we use the vector ABI, vector types are 64-bit aligned.
     if (HasVector) {
       MaxVectorAlign = 64;
-      DataLayoutString = "E-m:e-i1:8:16-i8:8:16-i64:64-f128:64"
-                         "-v128:64-a:8:16-n32:64";
+      resetDataLayout("E-m:e-i1:8:16-i8:8:16-i64:64-f128:64"
+                      "-v128:64-a:8:16-n32:64");
     }
     return true;
   }
@@ -6237,6 +6837,16 @@ public:
         .Default(false);
   }
 
+  CallingConvCheckResult checkCallingConvention(CallingConv CC) const override {
+    switch (CC) {
+    case CC_C:
+    case CC_Swift:
+      return CCCR_OK;
+    default:
+      return CCCR_Warning;
+    }
+  }
+
   StringRef getABI() const override {
     if (HasVector)
       return "vector";
@@ -6251,6 +6861,8 @@ public:
 const Builtin::Info SystemZTargetInfo::BuiltinInfo[] = {
 #define BUILTIN(ID, TYPE, ATTRS)                                               \
   { #ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr },
+#define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE)                               \
+  { #ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, FEATURE },
 #include "clang/Basic/BuiltinsSystemZ.def"
 };
 
@@ -6298,7 +6910,8 @@ class MSP430TargetInfo : public TargetInfo {
   static const char *const GCCRegNames[];
 
 public:
-  MSP430TargetInfo(const llvm::Triple &Triple) : TargetInfo(Triple) {
+  MSP430TargetInfo(const llvm::Triple &Triple, const TargetOptions &)
+      : TargetInfo(Triple) {
     BigEndian = false;
     TLSSupported = false;
     IntWidth = 16;
@@ -6314,7 +6927,7 @@ public:
     IntPtrType = SignedInt;
     PtrDiffType = SignedInt;
     SigAtomicType = SignedLong;
-    DataLayoutString = "e-m:e-p:16:16-i32:16:32-a:16-n8:16";
+    resetDataLayout("e-m:e-p:16:16-i32:16:32-a:16-n8:16");
   }
   void getTargetDefines(const LangOptions &Opts,
                         MacroBuilder &Builder) const override {
@@ -6385,7 +6998,8 @@ static const unsigned TCEOpenCLAddrSpaceMap[] = {
 
 class TCETargetInfo : public TargetInfo {
 public:
-  TCETargetInfo(const llvm::Triple &Triple) : TargetInfo(Triple) {
+  TCETargetInfo(const llvm::Triple &Triple, const TargetOptions &)
+      : TargetInfo(Triple) {
     TLSSupported = false;
     IntWidth = 32;
     LongWidth = LongLongWidth = 32;
@@ -6407,8 +7021,8 @@ public:
     FloatFormat = &llvm::APFloat::IEEEsingle;
     DoubleFormat = &llvm::APFloat::IEEEsingle;
     LongDoubleFormat = &llvm::APFloat::IEEEsingle;
-    DataLayoutString = "E-p:32:32-i8:8:32-i16:16:32-i64:32"
-                       "-f64:32-v64:32-v128:32-a:0:32-n32";
+    resetDataLayout("E-p:32:32-i8:8:32-i16:16:32-i64:32"
+                    "-f64:32-v64:32-v128:32-a:0:32-n32");
     AddrSpaceMap = &TCEOpenCLAddrSpaceMap;
     UseAddrSpaceMapMangling = true;
   }
@@ -6438,7 +7052,8 @@ public:
 
 class BPFTargetInfo : public TargetInfo {
 public:
-  BPFTargetInfo(const llvm::Triple &Triple) : TargetInfo(Triple) {
+  BPFTargetInfo(const llvm::Triple &Triple, const TargetOptions &)
+      : TargetInfo(Triple) {
     LongWidth = LongAlign = PointerWidth = PointerAlign = 64;
     SizeType    = UnsignedLong;
     PtrDiffType = SignedLong;
@@ -6448,10 +7063,10 @@ public:
     RegParmMax = 5;
     if (Triple.getArch() == llvm::Triple::bpfeb) {
       BigEndian = true;
-      DataLayoutString = "E-m:e-p:64:64-i64:64-n32:64-S128";
+      resetDataLayout("E-m:e-p:64:64-i64:64-n32:64-S128");
     } else {
       BigEndian = false;
-      DataLayoutString = "e-m:e-p:64:64-i64:64-n32:64-S128";
+      resetDataLayout("e-m:e-p:64:64-i64:64-n32:64-S128");
     }
     MaxAtomicPromoteWidth = 64;
     MaxAtomicInlineWidth = 64;
@@ -6485,8 +7100,25 @@ public:
   }
 };
 
-class MipsTargetInfoBase : public TargetInfo {
-  virtual void setDataLayoutString() = 0;
+class MipsTargetInfo : public TargetInfo {
+  void setDataLayout() {
+    StringRef Layout;
+
+    if (ABI == "o32")
+      Layout = "m:m-p:32:32-i8:8:32-i16:16:32-i64:64-n32-S64";
+    else if (ABI == "n32")
+      Layout = "m:e-p:32:32-i8:8:32-i16:16:32-i64:64-n32:64-S128";
+    else if (ABI == "n64")
+      Layout = "m:e-i8:8:32-i16:16:32-i64:64-n32:64-S128";
+    else
+      llvm_unreachable("Invalid ABI");
+
+    if (BigEndian)
+      resetDataLayout(("E-" + Layout).str());
+    else
+      resetDataLayout(("e-" + Layout).str());
+  }
+
 
   static const Builtin::Info BuiltinInfo[];
   std::string CPU;
@@ -6507,12 +7139,20 @@ protected:
   std::string ABI;
 
 public:
-  MipsTargetInfoBase(const llvm::Triple &Triple, const std::string &ABIStr,
-                     const std::string &CPUStr)
-      : TargetInfo(Triple), CPU(CPUStr), IsMips16(false), IsMicromips(false),
+  MipsTargetInfo(const llvm::Triple &Triple, const TargetOptions &)
+      : TargetInfo(Triple), IsMips16(false), IsMicromips(false),
         IsNan2008(false), IsSingleFloat(false), FloatABI(HardFloat),
-        DspRev(NoDSP), HasMSA(false), HasFP64(false), ABI(ABIStr) {
+        DspRev(NoDSP), HasMSA(false), HasFP64(false) {
     TheCXXABI.set(TargetCXXABI::GenericMIPS);
+    BigEndian = getTriple().getArch() == llvm::Triple::mips ||
+                getTriple().getArch() == llvm::Triple::mips64;
+
+    setABI((getTriple().getArch() == llvm::Triple::mips ||
+            getTriple().getArch() == llvm::Triple::mipsel)
+               ? "o32"
+               : "n64");
+
+    CPU = ABI == "o32" ? "mips32r2" : "mips64r2";
   }
 
   bool isNaN2008Default() const {
@@ -6527,22 +7167,99 @@ public:
     return IsNan2008;
   }
 
+  bool processorSupportsGPR64() const {
+    return llvm::StringSwitch<bool>(CPU)
+        .Case("mips3", true)
+        .Case("mips4", true)
+        .Case("mips5", true)
+        .Case("mips64", true)
+        .Case("mips64r2", true)
+        .Case("mips64r3", true)
+        .Case("mips64r5", true)
+        .Case("mips64r6", true)
+        .Case("octeon", true)
+        .Default(false);
+    return false;
+  }
+
   StringRef getABI() const override { return ABI; }
+  bool setABI(const std::string &Name) override {
+    if (Name == "o32") {
+      setO32ABITypes();
+      ABI = Name;
+      return true;
+    }
+
+    if (Name == "n32") {
+      setN32ABITypes();
+      ABI = Name;
+      return true;
+    }
+    if (Name == "n64") {
+      setN64ABITypes();
+      ABI = Name;
+      return true;
+    }
+    return false;
+  }
+
+  void setO32ABITypes() {
+    Int64Type = SignedLongLong;
+    IntMaxType = Int64Type;
+    LongDoubleFormat = &llvm::APFloat::IEEEdouble;
+    LongDoubleWidth = LongDoubleAlign = 64;
+    LongWidth = LongAlign = 32;
+    MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 32;
+    PointerWidth = PointerAlign = 32;
+    PtrDiffType = SignedInt;
+    SizeType = UnsignedInt;
+    SuitableAlign = 64;
+  }
+
+  void setN32N64ABITypes() {
+    LongDoubleWidth = LongDoubleAlign = 128;
+    LongDoubleFormat = &llvm::APFloat::IEEEquad;
+    if (getTriple().getOS() == llvm::Triple::FreeBSD) {
+      LongDoubleWidth = LongDoubleAlign = 64;
+      LongDoubleFormat = &llvm::APFloat::IEEEdouble;
+    }
+    MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64;
+    SuitableAlign = 128;
+  }
+
+  void setN64ABITypes() {
+    setN32N64ABITypes();
+    Int64Type = SignedLong;
+    IntMaxType = Int64Type;
+    LongWidth = LongAlign = 64;
+    PointerWidth = PointerAlign = 64;
+    PtrDiffType = SignedLong;
+    SizeType = UnsignedLong;
+  }
+
+  void setN32ABITypes() {
+    setN32N64ABITypes();
+    Int64Type = SignedLongLong;
+    IntMaxType = Int64Type;
+    LongWidth = LongAlign = 32;
+    PointerWidth = PointerAlign = 32;
+    PtrDiffType = SignedInt;
+    SizeType = UnsignedInt;
+  }
+
   bool setCPU(const std::string &Name) override {
-    bool IsMips32 = getTriple().getArch() == llvm::Triple::mips ||
-                    getTriple().getArch() == llvm::Triple::mipsel;
     CPU = Name;
     return llvm::StringSwitch<bool>(Name)
-        .Case("mips1", IsMips32)
-        .Case("mips2", IsMips32)
+        .Case("mips1", true)
+        .Case("mips2", true)
         .Case("mips3", true)
         .Case("mips4", true)
         .Case("mips5", true)
-        .Case("mips32", IsMips32)
-        .Case("mips32r2", IsMips32)
-        .Case("mips32r3", IsMips32)
-        .Case("mips32r5", IsMips32)
-        .Case("mips32r6", IsMips32)
+        .Case("mips32", true)
+        .Case("mips32r2", true)
+        .Case("mips32r3", true)
+        .Case("mips32r5", true)
+        .Case("mips32r6", true)
         .Case("mips64", true)
         .Case("mips64r2", true)
         .Case("mips64r3", true)
@@ -6557,6 +7274,8 @@ public:
   initFeatureMap(llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags,
                  StringRef CPU,
                  const std::vector<std::string> &FeaturesVec) const override {
+    if (CPU.empty())
+      CPU = getCPU();
     if (CPU == "octeon")
       Features["mips64r2"] = Features["cnmips"] = true;
     else
@@ -6566,11 +7285,54 @@ public:
 
   void getTargetDefines(const LangOptions &Opts,
                         MacroBuilder &Builder) const override {
+    if (BigEndian) {
+      DefineStd(Builder, "MIPSEB", Opts);
+      Builder.defineMacro("_MIPSEB");
+    } else {
+      DefineStd(Builder, "MIPSEL", Opts);
+      Builder.defineMacro("_MIPSEL");
+    }
+
     Builder.defineMacro("__mips__");
     Builder.defineMacro("_mips");
     if (Opts.GNUMode)
       Builder.defineMacro("mips");
 
+    if (ABI == "o32") {
+      Builder.defineMacro("__mips", "32");
+      Builder.defineMacro("_MIPS_ISA", "_MIPS_ISA_MIPS32");
+    } else {
+      Builder.defineMacro("__mips", "64");
+      Builder.defineMacro("__mips64");
+      Builder.defineMacro("__mips64__");
+      Builder.defineMacro("_MIPS_ISA", "_MIPS_ISA_MIPS64");
+    }
+
+    const std::string ISARev = llvm::StringSwitch<std::string>(getCPU())
+                                   .Cases("mips32", "mips64", "1")
+                                   .Cases("mips32r2", "mips64r2", "2")
+                                   .Cases("mips32r3", "mips64r3", "3")
+                                   .Cases("mips32r5", "mips64r5", "5")
+                                   .Cases("mips32r6", "mips64r6", "6")
+                                   .Default("");
+    if (!ISARev.empty())
+      Builder.defineMacro("__mips_isa_rev", ISARev);
+
+    if (ABI == "o32") {
+      Builder.defineMacro("__mips_o32");
+      Builder.defineMacro("_ABIO32", "1");
+      Builder.defineMacro("_MIPS_SIM", "_ABIO32");
+    } else if (ABI == "n32") {
+      Builder.defineMacro("__mips_n32");
+      Builder.defineMacro("_ABIN32", "2");
+      Builder.defineMacro("_MIPS_SIM", "_ABIN32");
+    } else if (ABI == "n64") {
+      Builder.defineMacro("__mips_n64");
+      Builder.defineMacro("_ABI64", "3");
+      Builder.defineMacro("_MIPS_SIM", "_ABI64");
+    } else
+      llvm_unreachable("Invalid ABI.");
+
     Builder.defineMacro("__REGISTER_PREFIX__", "");
 
     switch (FloatABI) {
@@ -6627,6 +7389,13 @@ public:
     Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_1");
     Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_2");
     Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_4");
+
+    // 32-bit MIPS processors don't have the necessary lld/scd instructions
+    // found in 64-bit processors. In the case of O32 on a 64-bit processor,
+    // the instructions exist but using them violates the ABI since they
+    // require 64-bit GPRs and O32 only supports 32-bit GPRs.
+    if (ABI == "n32" || ABI == "n64")
+      Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8");
   }
 
   ArrayRef<Builtin::Info> getTargetBuiltins() const override {
@@ -6657,7 +7426,8 @@ public:
       "$f24", "$f25", "$f26", "$f27", "$f28", "$f29", "$f30", "$f31",
       // Hi/lo and condition register names
       "hi",   "lo",   "",     "$fcc0","$fcc1","$fcc2","$fcc3","$fcc4",
-      "$fcc5","$fcc6","$fcc7",
+      "$fcc5","$fcc6","$fcc7","$ac1hi","$ac1lo","$ac2hi","$ac2lo",
+      "$ac3hi","$ac3lo",
       // MSA register names
       "$w0",  "$w1",  "$w2",  "$w3",  "$w4",  "$w5",  "$w6",  "$w7",
       "$w8",  "$w9",  "$w10", "$w11", "$w12", "$w13", "$w14", "$w15",
@@ -6669,7 +7439,6 @@ public:
     };
     return llvm::makeArrayRef(GCCRegNames);
   }
-  ArrayRef<TargetInfo::GCCRegAlias> getGCCRegAliases() const override = 0;
   bool validateAsmConstraint(const char *&Name,
                              TargetInfo::ConstraintInfo &Info) const override {
     switch (*Name) {
@@ -6780,7 +7549,7 @@ public:
         IsNan2008 = false;
     }
 
-    setDataLayoutString();
+    setDataLayout();
 
     return true;
   }
@@ -6792,304 +7561,94 @@ public:
   }
 
   bool isCLZForZeroUndef() const override { return false; }
-};
-
-const Builtin::Info MipsTargetInfoBase::BuiltinInfo[] = {
-#define BUILTIN(ID, TYPE, ATTRS) \
-  { #ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr },
-#define LIBBUILTIN(ID, TYPE, ATTRS, HEADER) \
-  { #ID, TYPE, ATTRS, HEADER, ALL_LANGUAGES, nullptr },
-#include "clang/Basic/BuiltinsMips.def"
-};
-
-class Mips32TargetInfoBase : public MipsTargetInfoBase {
-public:
-  Mips32TargetInfoBase(const llvm::Triple &Triple)
-      : MipsTargetInfoBase(Triple, "o32", "mips32r2") {
-    SizeType = UnsignedInt;
-    PtrDiffType = SignedInt;
-    Int64Type = SignedLongLong;
-    IntMaxType = Int64Type;
-    MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 32;
-  }
-  bool setABI(const std::string &Name) override {
-    if (Name == "o32" || Name == "eabi") {
-      ABI = Name;
-      return true;
-    }
-    return false;
-  }
-  void getTargetDefines(const LangOptions &Opts,
-                        MacroBuilder &Builder) const override {
-    MipsTargetInfoBase::getTargetDefines(Opts, Builder);
-
-    Builder.defineMacro("__mips", "32");
-    Builder.defineMacro("_MIPS_ISA", "_MIPS_ISA_MIPS32");
-
-    const std::string& CPUStr = getCPU();
-    if (CPUStr == "mips32")
-      Builder.defineMacro("__mips_isa_rev", "1");
-    else if (CPUStr == "mips32r2")
-      Builder.defineMacro("__mips_isa_rev", "2");
-    else if (CPUStr == "mips32r3")
-      Builder.defineMacro("__mips_isa_rev", "3");
-    else if (CPUStr == "mips32r5")
-      Builder.defineMacro("__mips_isa_rev", "5");
-    else if (CPUStr == "mips32r6")
-      Builder.defineMacro("__mips_isa_rev", "6");
 
-    if (ABI == "o32") {
-      Builder.defineMacro("__mips_o32");
-      Builder.defineMacro("_ABIO32", "1");
-      Builder.defineMacro("_MIPS_SIM", "_ABIO32");
-    }
-    else if (ABI == "eabi")
-      Builder.defineMacro("__mips_eabi");
-    else
-      llvm_unreachable("Invalid ABI for Mips32.");
-  }
   ArrayRef<TargetInfo::GCCRegAlias> getGCCRegAliases() const override {
-    static const TargetInfo::GCCRegAlias GCCRegAliases[] = {
-      { { "at" },  "$1" },
-      { { "v0" },  "$2" },
-      { { "v1" },  "$3" },
-      { { "a0" },  "$4" },
-      { { "a1" },  "$5" },
-      { { "a2" },  "$6" },
-      { { "a3" },  "$7" },
-      { { "t0" },  "$8" },
-      { { "t1" },  "$9" },
-      { { "t2" }, "$10" },
-      { { "t3" }, "$11" },
-      { { "t4" }, "$12" },
-      { { "t5" }, "$13" },
-      { { "t6" }, "$14" },
-      { { "t7" }, "$15" },
-      { { "s0" }, "$16" },
-      { { "s1" }, "$17" },
-      { { "s2" }, "$18" },
-      { { "s3" }, "$19" },
-      { { "s4" }, "$20" },
-      { { "s5" }, "$21" },
-      { { "s6" }, "$22" },
-      { { "s7" }, "$23" },
-      { { "t8" }, "$24" },
-      { { "t9" }, "$25" },
-      { { "k0" }, "$26" },
-      { { "k1" }, "$27" },
-      { { "gp" }, "$28" },
-      { { "sp","$sp" }, "$29" },
-      { { "fp","$fp" }, "$30" },
-      { { "ra" }, "$31" }
-    };
-    return llvm::makeArrayRef(GCCRegAliases);
-  }
-};
-
-class Mips32EBTargetInfo : public Mips32TargetInfoBase {
-  void setDataLayoutString() override {
-    DataLayoutString = "E-m:m-p:32:32-i8:8:32-i16:16:32-i64:64-n32-S64";
-  }
-
-public:
-  Mips32EBTargetInfo(const llvm::Triple &Triple)
-      : Mips32TargetInfoBase(Triple) {
-  }
-  void getTargetDefines(const LangOptions &Opts,
-                        MacroBuilder &Builder) const override {
-    DefineStd(Builder, "MIPSEB", Opts);
-    Builder.defineMacro("_MIPSEB");
-    Mips32TargetInfoBase::getTargetDefines(Opts, Builder);
-  }
-};
-
-class Mips32ELTargetInfo : public Mips32TargetInfoBase {
-  void setDataLayoutString() override {
-    DataLayoutString = "e-m:m-p:32:32-i8:8:32-i16:16:32-i64:64-n32-S64";
-  }
-
-public:
-  Mips32ELTargetInfo(const llvm::Triple &Triple)
-      : Mips32TargetInfoBase(Triple) {
-    BigEndian = false;
-  }
-  void getTargetDefines(const LangOptions &Opts,
-                        MacroBuilder &Builder) const override {
-    DefineStd(Builder, "MIPSEL", Opts);
-    Builder.defineMacro("_MIPSEL");
-    Mips32TargetInfoBase::getTargetDefines(Opts, Builder);
-  }
-};
-
-class Mips64TargetInfoBase : public MipsTargetInfoBase {
-public:
-  Mips64TargetInfoBase(const llvm::Triple &Triple)
-      : MipsTargetInfoBase(Triple, "n64", "mips64r2") {
-    LongDoubleWidth = LongDoubleAlign = 128;
-    LongDoubleFormat = &llvm::APFloat::IEEEquad;
-    if (getTriple().getOS() == llvm::Triple::FreeBSD) {
-      LongDoubleWidth = LongDoubleAlign = 64;
-      LongDoubleFormat = &llvm::APFloat::IEEEdouble;
+    static const TargetInfo::GCCRegAlias O32RegAliases[] = {
+        {{"at"}, "$1"},  {{"v0"}, "$2"},         {{"v1"}, "$3"},
+        {{"a0"}, "$4"},  {{"a1"}, "$5"},         {{"a2"}, "$6"},
+        {{"a3"}, "$7"},  {{"t0"}, "$8"},         {{"t1"}, "$9"},
+        {{"t2"}, "$10"}, {{"t3"}, "$11"},        {{"t4"}, "$12"},
+        {{"t5"}, "$13"}, {{"t6"}, "$14"},        {{"t7"}, "$15"},
+        {{"s0"}, "$16"}, {{"s1"}, "$17"},        {{"s2"}, "$18"},
+        {{"s3"}, "$19"}, {{"s4"}, "$20"},        {{"s5"}, "$21"},
+        {{"s6"}, "$22"}, {{"s7"}, "$23"},        {{"t8"}, "$24"},
+        {{"t9"}, "$25"}, {{"k0"}, "$26"},        {{"k1"}, "$27"},
+        {{"gp"}, "$28"}, {{"sp", "$sp"}, "$29"}, {{"fp", "$fp"}, "$30"},
+        {{"ra"}, "$31"}};
+    static const TargetInfo::GCCRegAlias NewABIRegAliases[] = {
+        {{"at"}, "$1"},  {{"v0"}, "$2"},         {{"v1"}, "$3"},
+        {{"a0"}, "$4"},  {{"a1"}, "$5"},         {{"a2"}, "$6"},
+        {{"a3"}, "$7"},  {{"a4"}, "$8"},         {{"a5"}, "$9"},
+        {{"a6"}, "$10"}, {{"a7"}, "$11"},        {{"t0"}, "$12"},
+        {{"t1"}, "$13"}, {{"t2"}, "$14"},        {{"t3"}, "$15"},
+        {{"s0"}, "$16"}, {{"s1"}, "$17"},        {{"s2"}, "$18"},
+        {{"s3"}, "$19"}, {{"s4"}, "$20"},        {{"s5"}, "$21"},
+        {{"s6"}, "$22"}, {{"s7"}, "$23"},        {{"t8"}, "$24"},
+        {{"t9"}, "$25"}, {{"k0"}, "$26"},        {{"k1"}, "$27"},
+        {{"gp"}, "$28"}, {{"sp", "$sp"}, "$29"}, {{"fp", "$fp"}, "$30"},
+        {{"ra"}, "$31"}};
+    if (ABI == "o32")
+      return llvm::makeArrayRef(O32RegAliases);
+    return llvm::makeArrayRef(NewABIRegAliases);
+  }
+
+  bool hasInt128Type() const override {
+    return ABI == "n32" || ABI == "n64";
+  }
+
+  bool validateTarget(DiagnosticsEngine &Diags) const override {
+    // FIXME: It's valid to use O32 on a 64-bit CPU but the backend can't handle
+    //        this yet. It's better to fail here than on the backend assertion.
+    if (processorSupportsGPR64() && ABI == "o32") {
+      Diags.Report(diag::err_target_unsupported_abi) << ABI << CPU;
+      return false;
     }
-    setN64ABITypes();
-    SuitableAlign = 128;
-    MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64;
-  }
 
-  void setN64ABITypes() {
-    LongWidth = LongAlign = 64;
-    PointerWidth = PointerAlign = 64;
-    SizeType = UnsignedLong;
-    PtrDiffType = SignedLong;
-    Int64Type = SignedLong;
-    IntMaxType = Int64Type;
-  }
-
-  void setN32ABITypes() {
-    LongWidth = LongAlign = 32;
-    PointerWidth = PointerAlign = 32;
-    SizeType = UnsignedInt;
-    PtrDiffType = SignedInt;
-    Int64Type = SignedLongLong;
-    IntMaxType = Int64Type;
-  }
-
-  bool setABI(const std::string &Name) override {
-    if (Name == "n32") {
-      setN32ABITypes();
-      ABI = Name;
-      return true;
-    }
-    if (Name == "n64") {
-      setN64ABITypes();
-      ABI = Name;
-      return true;
+    // 64-bit ABI's require 64-bit CPU's.
+    if (!processorSupportsGPR64() && (ABI == "n32" || ABI == "n64")) {
+      Diags.Report(diag::err_target_unsupported_abi) << ABI << CPU;
+      return false;
     }
-    return false;
-  }
 
-  void getTargetDefines(const LangOptions &Opts,
-                        MacroBuilder &Builder) const override {
-    MipsTargetInfoBase::getTargetDefines(Opts, Builder);
-
-    Builder.defineMacro("__mips", "64");
-    Builder.defineMacro("__mips64");
-    Builder.defineMacro("__mips64__");
-    Builder.defineMacro("_MIPS_ISA", "_MIPS_ISA_MIPS64");
-
-    const std::string& CPUStr = getCPU();
-    if (CPUStr == "mips64")
-      Builder.defineMacro("__mips_isa_rev", "1");
-    else if (CPUStr == "mips64r2")
-      Builder.defineMacro("__mips_isa_rev", "2");
-    else if (CPUStr == "mips64r3")
-      Builder.defineMacro("__mips_isa_rev", "3");
-    else if (CPUStr == "mips64r5")
-      Builder.defineMacro("__mips_isa_rev", "5");
-    else if (CPUStr == "mips64r6")
-      Builder.defineMacro("__mips_isa_rev", "6");
-
-    if (ABI == "n32") {
-      Builder.defineMacro("__mips_n32");
-      Builder.defineMacro("_ABIN32", "2");
-      Builder.defineMacro("_MIPS_SIM", "_ABIN32");
-    }
-    else if (ABI == "n64") {
-      Builder.defineMacro("__mips_n64");
-      Builder.defineMacro("_ABI64", "3");
-      Builder.defineMacro("_MIPS_SIM", "_ABI64");
+    // FIXME: It's valid to use O32 on a mips64/mips64el triple but the backend
+    //        can't handle this yet. It's better to fail here than on the
+    //        backend assertion.
+    if ((getTriple().getArch() == llvm::Triple::mips64 ||
+         getTriple().getArch() == llvm::Triple::mips64el) &&
+        ABI == "o32") {
+      Diags.Report(diag::err_target_unsupported_abi_for_triple)
+          << ABI << getTriple().str();
+      return false;
     }
-    else
-      llvm_unreachable("Invalid ABI for Mips64.");
 
-    Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8");
-  }
-  ArrayRef<TargetInfo::GCCRegAlias> getGCCRegAliases() const override {
-    static const TargetInfo::GCCRegAlias GCCRegAliases[] = {
-      { { "at" },  "$1" },
-      { { "v0" },  "$2" },
-      { { "v1" },  "$3" },
-      { { "a0" },  "$4" },
-      { { "a1" },  "$5" },
-      { { "a2" },  "$6" },
-      { { "a3" },  "$7" },
-      { { "a4" },  "$8" },
-      { { "a5" },  "$9" },
-      { { "a6" }, "$10" },
-      { { "a7" }, "$11" },
-      { { "t0" }, "$12" },
-      { { "t1" }, "$13" },
-      { { "t2" }, "$14" },
-      { { "t3" }, "$15" },
-      { { "s0" }, "$16" },
-      { { "s1" }, "$17" },
-      { { "s2" }, "$18" },
-      { { "s3" }, "$19" },
-      { { "s4" }, "$20" },
-      { { "s5" }, "$21" },
-      { { "s6" }, "$22" },
-      { { "s7" }, "$23" },
-      { { "t8" }, "$24" },
-      { { "t9" }, "$25" },
-      { { "k0" }, "$26" },
-      { { "k1" }, "$27" },
-      { { "gp" }, "$28" },
-      { { "sp","$sp" }, "$29" },
-      { { "fp","$fp" }, "$30" },
-      { { "ra" }, "$31" }
-    };
-    return llvm::makeArrayRef(GCCRegAliases);
-  }
-
-  bool hasInt128Type() const override { return true; }
-};
-
-class Mips64EBTargetInfo : public Mips64TargetInfoBase {
-  void setDataLayoutString() override {
-    if (ABI == "n32")
-      DataLayoutString = "E-m:m-p:32:32-i8:8:32-i16:16:32-i64:64-n32:64-S128";
-    else
-      DataLayoutString = "E-m:m-i8:8:32-i16:16:32-i64:64-n32:64-S128";
-
-  }
+    // FIXME: It's valid to use N32/N64 on a mips/mipsel triple but the backend
+    //        can't handle this yet. It's better to fail here than on the
+    //        backend assertion.
+    if ((getTriple().getArch() == llvm::Triple::mips ||
+         getTriple().getArch() == llvm::Triple::mipsel) &&
+        (ABI == "n32" || ABI == "n64")) {
+      Diags.Report(diag::err_target_unsupported_abi_for_triple)
+          << ABI << getTriple().str();
+      return false;
+    }
 
-public:
-  Mips64EBTargetInfo(const llvm::Triple &Triple)
-      : Mips64TargetInfoBase(Triple) {}
-  void getTargetDefines(const LangOptions &Opts,
-                        MacroBuilder &Builder) const override {
-    DefineStd(Builder, "MIPSEB", Opts);
-    Builder.defineMacro("_MIPSEB");
-    Mips64TargetInfoBase::getTargetDefines(Opts, Builder);
+    return true;
   }
 };
 
-class Mips64ELTargetInfo : public Mips64TargetInfoBase {
-  void setDataLayoutString() override {
-    if (ABI == "n32")
-      DataLayoutString = "e-m:m-p:32:32-i8:8:32-i16:16:32-i64:64-n32:64-S128";
-    else
-      DataLayoutString = "e-m:m-i8:8:32-i16:16:32-i64:64-n32:64-S128";
-  }
-public:
-  Mips64ELTargetInfo(const llvm::Triple &Triple)
-      : Mips64TargetInfoBase(Triple) {
-    // Default ABI is n64.
-    BigEndian = false;
-  }
-  void getTargetDefines(const LangOptions &Opts,
-                        MacroBuilder &Builder) const override {
-    DefineStd(Builder, "MIPSEL", Opts);
-    Builder.defineMacro("_MIPSEL");
-    Mips64TargetInfoBase::getTargetDefines(Opts, Builder);
-  }
+const Builtin::Info MipsTargetInfo::BuiltinInfo[] = {
+#define BUILTIN(ID, TYPE, ATTRS) \
+  { #ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr },
+#define LIBBUILTIN(ID, TYPE, ATTRS, HEADER) \
+  { #ID, TYPE, ATTRS, HEADER, ALL_LANGUAGES, nullptr },
+#include "clang/Basic/BuiltinsMips.def"
 };
 
 class PNaClTargetInfo : public TargetInfo {
 public:
-  PNaClTargetInfo(const llvm::Triple &Triple) : TargetInfo(Triple) {
+  PNaClTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts)
+      : TargetInfo(Triple) {
     BigEndian = false;
-    this->UserLabelPrefix = "";
     this->LongAlign = 32;
     this->LongWidth = 32;
     this->PointerAlign = 32;
@@ -7141,11 +7700,10 @@ ArrayRef<TargetInfo::GCCRegAlias> PNaClTargetInfo::getGCCRegAliases() const {
 }
 
 // We attempt to use PNaCl (le32) frontend and Mips32EL backend.
-class NaClMips32ELTargetInfo : public Mips32ELTargetInfo {
+class NaClMips32TargetInfo : public MipsTargetInfo {
 public:
-  NaClMips32ELTargetInfo(const llvm::Triple &Triple) :
-    Mips32ELTargetInfo(Triple) {
-  }
+  NaClMips32TargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts)
+      : MipsTargetInfo(Triple, Opts) {}
 
   BuiltinVaListKind getBuiltinVaListKind() const override {
     return TargetInfo::PNaClABIBuiltinVaList;
@@ -7156,12 +7714,13 @@ class Le64TargetInfo : public TargetInfo {
   static const Builtin::Info BuiltinInfo[];
 
 public:
-  Le64TargetInfo(const llvm::Triple &Triple) : TargetInfo(Triple) {
+  Le64TargetInfo(const llvm::Triple &Triple, const TargetOptions &)
+      : TargetInfo(Triple) {
     BigEndian = false;
     NoAsmVariants = true;
     LongWidth = LongAlign = PointerWidth = PointerAlign = 64;
     MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64;
-    DataLayoutString = "e-m:e-v128:32-v16:16-v32:32-v96:32-n8:16:32:64-S128";
+    resetDataLayout("e-m:e-v128:32-v16:16-v32:32-v96:32-n8:16:32:64-S128");
   }
 
   void getTargetDefines(const LangOptions &Opts,
@@ -7201,7 +7760,7 @@ class WebAssemblyTargetInfo : public TargetInfo {
   } SIMDLevel;
 
 public:
-  explicit WebAssemblyTargetInfo(const llvm::Triple &T)
+  explicit WebAssemblyTargetInfo(const llvm::Triple &T, const TargetOptions &)
       : TargetInfo(T), SIMDLevel(NoSIMD) {
     BigEndian = false;
     NoAsmVariants = true;
@@ -7307,10 +7866,11 @@ const Builtin::Info WebAssemblyTargetInfo::BuiltinInfo[] = {
 
 class WebAssembly32TargetInfo : public WebAssemblyTargetInfo {
 public:
-  explicit WebAssembly32TargetInfo(const llvm::Triple &T)
-      : WebAssemblyTargetInfo(T) {
+  explicit WebAssembly32TargetInfo(const llvm::Triple &T,
+                                   const TargetOptions &Opts)
+      : WebAssemblyTargetInfo(T, Opts) {
     MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 32;
-    DataLayoutString = "e-m:e-p:32:32-i64:64-n32:64-S128";
+    resetDataLayout("e-m:e-p:32:32-i64:64-n32:64-S128");
   }
 
 protected:
@@ -7323,12 +7883,13 @@ protected:
 
 class WebAssembly64TargetInfo : public WebAssemblyTargetInfo {
 public:
-  explicit WebAssembly64TargetInfo(const llvm::Triple &T)
-      : WebAssemblyTargetInfo(T) {
+  explicit WebAssembly64TargetInfo(const llvm::Triple &T,
+                                   const TargetOptions &Opts)
+      : WebAssemblyTargetInfo(T, Opts) {
     LongAlign = LongWidth = 64;
     PointerAlign = PointerWidth = 64;
     MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64;
-    DataLayoutString = "e-m:e-p:64:64-i64:64-n32:64-S128";
+    resetDataLayout("e-m:e-p:64:64-i64:64-n32:64-S128");
   }
 
 protected:
@@ -7356,7 +7917,8 @@ static const unsigned SPIRAddrSpaceMap[] = {
 };
 class SPIRTargetInfo : public TargetInfo {
 public:
-  SPIRTargetInfo(const llvm::Triple &Triple) : TargetInfo(Triple) {
+  SPIRTargetInfo(const llvm::Triple &Triple, const TargetOptions &)
+      : TargetInfo(Triple) {
     assert(getTriple().getOS() == llvm::Triple::UnknownOS &&
            "SPIR target must use unknown OS");
     assert(getTriple().getEnvironment() == llvm::Triple::UnknownEnvironment &&
@@ -7393,23 +7955,30 @@ public:
   }
 
   CallingConvCheckResult checkCallingConvention(CallingConv CC) const override {
-    return (CC == CC_SpirFunction || CC == CC_SpirKernel) ? CCCR_OK
-                                                          : CCCR_Warning;
+    return (CC == CC_SpirFunction || CC == CC_OpenCLKernel) ? CCCR_OK
+                                                            : CCCR_Warning;
   }
 
   CallingConv getDefaultCallingConv(CallingConvMethodType MT) const override {
     return CC_SpirFunction;
   }
+
+  void setSupportedOpenCLOpts() override {
+    // Assume all OpenCL extensions and optional core features are supported
+    // for SPIR since it is a generic target.
+    getSupportedOpenCLOpts().setAll();
+  }
 };
 
 class SPIR32TargetInfo : public SPIRTargetInfo {
 public:
-  SPIR32TargetInfo(const llvm::Triple &Triple) : SPIRTargetInfo(Triple) {
+  SPIR32TargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts)
+      : SPIRTargetInfo(Triple, Opts) {
     PointerWidth = PointerAlign = 32;
     SizeType = TargetInfo::UnsignedInt;
     PtrDiffType = IntPtrType = TargetInfo::SignedInt;
-    DataLayoutString = "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-"
-                       "v96:128-v192:256-v256:256-v512:512-v1024:1024";
+    resetDataLayout("e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-"
+                    "v96:128-v192:256-v256:256-v512:512-v1024:1024");
   }
   void getTargetDefines(const LangOptions &Opts,
                         MacroBuilder &Builder) const override {
@@ -7419,12 +7988,13 @@ public:
 
 class SPIR64TargetInfo : public SPIRTargetInfo {
 public:
-  SPIR64TargetInfo(const llvm::Triple &Triple) : SPIRTargetInfo(Triple) {
+  SPIR64TargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts)
+      : SPIRTargetInfo(Triple, Opts) {
     PointerWidth = PointerAlign = 64;
     SizeType = TargetInfo::UnsignedLong;
     PtrDiffType = IntPtrType = TargetInfo::SignedLong;
-    DataLayoutString = "e-i64:64-v16:16-v24:32-v32:32-v48:64-"
-                       "v96:128-v192:256-v256:256-v512:512-v1024:1024";
+    resetDataLayout("e-i64:64-v16:16-v24:32-v32:32-v48:64-"
+                    "v96:128-v192:256-v256:256-v512:512-v1024:1024");
   }
   void getTargetDefines(const LangOptions &Opts,
                         MacroBuilder &Builder) const override {
@@ -7435,7 +8005,8 @@ public:
 class XCoreTargetInfo : public TargetInfo {
   static const Builtin::Info BuiltinInfo[];
 public:
-  XCoreTargetInfo(const llvm::Triple &Triple) : TargetInfo(Triple) {
+  XCoreTargetInfo(const llvm::Triple &Triple, const TargetOptions &)
+      : TargetInfo(Triple) {
     BigEndian = false;
     NoAsmVariants = true;
     LongLongAlign = 32;
@@ -7447,8 +8018,8 @@ public:
     WCharType = UnsignedChar;
     WIntType = UnsignedInt;
     UseZeroLengthBitfieldAlignment = true;
-    DataLayoutString = "e-m:e-p:32:32-i1:8:32-i8:8:32-i16:16:32-i64:32"
-                       "-f64:32-a:0:32-n32";
+    resetDataLayout("e-m:e-p:32:32-i1:8:32-i8:8:32-i16:16:32-i64:32"
+                    "-f64:32-a:0:32-n32");
   }
   void getTargetDefines(const LangOptions &Opts,
                         MacroBuilder &Builder) const override {
@@ -7482,6 +8053,9 @@ public:
     // R0=ExceptionPointerRegister R1=ExceptionSelectorRegister
     return (RegNo < 2)? RegNo : -1;
   }
+  bool allowsLargerPreferedTypeAlignment() const override {
+    return false;
+  }
 };
 
 const Builtin::Info XCoreTargetInfo::BuiltinInfo[] = {
@@ -7495,8 +8069,8 @@ const Builtin::Info XCoreTargetInfo::BuiltinInfo[] = {
 // x86_32 Android target
 class AndroidX86_32TargetInfo : public LinuxTargetInfo<X86_32TargetInfo> {
 public:
-  AndroidX86_32TargetInfo(const llvm::Triple &Triple)
-      : LinuxTargetInfo<X86_32TargetInfo>(Triple) {
+  AndroidX86_32TargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts)
+      : LinuxTargetInfo<X86_32TargetInfo>(Triple, Opts) {
     SuitableAlign = 32;
     LongDoubleWidth = 64;
     LongDoubleFormat = &llvm::APFloat::IEEEdouble;
@@ -7506,8 +8080,8 @@ public:
 // x86_64 Android target
 class AndroidX86_64TargetInfo : public LinuxTargetInfo<X86_64TargetInfo> {
 public:
-  AndroidX86_64TargetInfo(const llvm::Triple &Triple)
-      : LinuxTargetInfo<X86_64TargetInfo>(Triple) {
+  AndroidX86_64TargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts)
+      : LinuxTargetInfo<X86_64TargetInfo>(Triple, Opts) {
     LongDoubleFormat = &llvm::APFloat::IEEEquad;
   }
 
@@ -7515,13 +8089,50 @@ public:
     return true;
   }
 };
+
+// 32-bit RenderScript is armv7 with width and align of 'long' set to 8-bytes
+class RenderScript32TargetInfo : public ARMleTargetInfo {
+public:
+  RenderScript32TargetInfo(const llvm::Triple &Triple,
+                           const TargetOptions &Opts)
+      : ARMleTargetInfo(llvm::Triple("armv7", Triple.getVendorName(),
+                                     Triple.getOSName(),
+                                     Triple.getEnvironmentName()),
+                        Opts) {
+    LongWidth = LongAlign = 64;
+  }
+  void getTargetDefines(const LangOptions &Opts,
+                        MacroBuilder &Builder) const override {
+    Builder.defineMacro("__RENDERSCRIPT__");
+    ARMleTargetInfo::getTargetDefines(Opts, Builder);
+  }
+};
+
+// 64-bit RenderScript is aarch64
+class RenderScript64TargetInfo : public AArch64leTargetInfo {
+public:
+  RenderScript64TargetInfo(const llvm::Triple &Triple,
+                           const TargetOptions &Opts)
+      : AArch64leTargetInfo(llvm::Triple("aarch64", Triple.getVendorName(),
+                                         Triple.getOSName(),
+                                         Triple.getEnvironmentName()),
+                            Opts) {}
+
+  void getTargetDefines(const LangOptions &Opts,
+                        MacroBuilder &Builder) const override {
+    Builder.defineMacro("__RENDERSCRIPT__");
+    AArch64leTargetInfo::getTargetDefines(Opts, Builder);
+  }
+};
+
 } // end anonymous namespace
 
 //===----------------------------------------------------------------------===//
 // Driver code
 //===----------------------------------------------------------------------===//
 
-static TargetInfo *AllocateTarget(const llvm::Triple &Triple) {
+static TargetInfo *AllocateTarget(const llvm::Triple &Triple,
+                                  const TargetOptions &Opts) {
   llvm::Triple::OSType os = Triple.getOS();
 
   switch (Triple.getArch()) {
@@ -7529,414 +8140,426 @@ static TargetInfo *AllocateTarget(const llvm::Triple &Triple) {
     return nullptr;
 
   case llvm::Triple::xcore:
-    return new XCoreTargetInfo(Triple);
+    return new XCoreTargetInfo(Triple, Opts);
 
   case llvm::Triple::hexagon:
-    return new HexagonTargetInfo(Triple);
+    return new HexagonTargetInfo(Triple, Opts);
+
+  case llvm::Triple::lanai:
+    return new LanaiTargetInfo(Triple, Opts);
 
   case llvm::Triple::aarch64:
     if (Triple.isOSDarwin())
-      return new DarwinAArch64TargetInfo(Triple);
+      return new DarwinAArch64TargetInfo(Triple, Opts);
 
     switch (os) {
     case llvm::Triple::CloudABI:
-      return new CloudABITargetInfo<AArch64leTargetInfo>(Triple);
+      return new CloudABITargetInfo<AArch64leTargetInfo>(Triple, Opts);
     case llvm::Triple::FreeBSD:
-      return new FreeBSDTargetInfo<AArch64leTargetInfo>(Triple);
+      return new FreeBSDTargetInfo<AArch64leTargetInfo>(Triple, Opts);
     case llvm::Triple::Linux:
-      return new LinuxTargetInfo<AArch64leTargetInfo>(Triple);
+      return new LinuxTargetInfo<AArch64leTargetInfo>(Triple, Opts);
     case llvm::Triple::NetBSD:
-      return new NetBSDTargetInfo<AArch64leTargetInfo>(Triple);
+      return new NetBSDTargetInfo<AArch64leTargetInfo>(Triple, Opts);
     default:
-      return new AArch64leTargetInfo(Triple);
+      return new AArch64leTargetInfo(Triple, Opts);
     }
 
   case llvm::Triple::aarch64_be:
     switch (os) {
     case llvm::Triple::FreeBSD:
-      return new FreeBSDTargetInfo<AArch64beTargetInfo>(Triple);
+      return new FreeBSDTargetInfo<AArch64beTargetInfo>(Triple, Opts);
     case llvm::Triple::Linux:
-      return new LinuxTargetInfo<AArch64beTargetInfo>(Triple);
+      return new LinuxTargetInfo<AArch64beTargetInfo>(Triple, Opts);
     case llvm::Triple::NetBSD:
-      return new NetBSDTargetInfo<AArch64beTargetInfo>(Triple);
+      return new NetBSDTargetInfo<AArch64beTargetInfo>(Triple, Opts);
     default:
-      return new AArch64beTargetInfo(Triple);
+      return new AArch64beTargetInfo(Triple, Opts);
     }
 
   case llvm::Triple::arm:
   case llvm::Triple::thumb:
     if (Triple.isOSBinFormatMachO())
-      return new DarwinARMTargetInfo(Triple);
+      return new DarwinARMTargetInfo(Triple, Opts);
 
     switch (os) {
+    case llvm::Triple::CloudABI:
+      return new CloudABITargetInfo<ARMleTargetInfo>(Triple, Opts);
     case llvm::Triple::Linux:
-      return new LinuxTargetInfo<ARMleTargetInfo>(Triple);
+      return new LinuxTargetInfo<ARMleTargetInfo>(Triple, Opts);
     case llvm::Triple::FreeBSD:
-      return new FreeBSDTargetInfo<ARMleTargetInfo>(Triple);
+      return new FreeBSDTargetInfo<ARMleTargetInfo>(Triple, Opts);
     case llvm::Triple::NetBSD:
-      return new NetBSDTargetInfo<ARMleTargetInfo>(Triple);
+      return new NetBSDTargetInfo<ARMleTargetInfo>(Triple, Opts);
     case llvm::Triple::OpenBSD:
-      return new OpenBSDTargetInfo<ARMleTargetInfo>(Triple);
+      return new OpenBSDTargetInfo<ARMleTargetInfo>(Triple, Opts);
     case llvm::Triple::Bitrig:
-      return new BitrigTargetInfo<ARMleTargetInfo>(Triple);
+      return new BitrigTargetInfo<ARMleTargetInfo>(Triple, Opts);
     case llvm::Triple::RTEMS:
-      return new RTEMSTargetInfo<ARMleTargetInfo>(Triple);
+      return new RTEMSTargetInfo<ARMleTargetInfo>(Triple, Opts);
     case llvm::Triple::NaCl:
-      return new NaClTargetInfo<ARMleTargetInfo>(Triple);
+      return new NaClTargetInfo<ARMleTargetInfo>(Triple, Opts);
     case llvm::Triple::Win32:
       switch (Triple.getEnvironment()) {
       case llvm::Triple::Cygnus:
-        return new CygwinARMTargetInfo(Triple);
+        return new CygwinARMTargetInfo(Triple, Opts);
       case llvm::Triple::GNU:
-        return new MinGWARMTargetInfo(Triple);
+        return new MinGWARMTargetInfo(Triple, Opts);
       case llvm::Triple::Itanium:
-        return new ItaniumWindowsARMleTargetInfo(Triple);
+        return new ItaniumWindowsARMleTargetInfo(Triple, Opts);
       case llvm::Triple::MSVC:
       default: // Assume MSVC for unknown environments
-        return new MicrosoftARMleTargetInfo(Triple);
+        return new MicrosoftARMleTargetInfo(Triple, Opts);
       }
     default:
-      return new ARMleTargetInfo(Triple);
+      return new ARMleTargetInfo(Triple, Opts);
     }
 
   case llvm::Triple::armeb:
   case llvm::Triple::thumbeb:
     if (Triple.isOSDarwin())
-      return new DarwinARMTargetInfo(Triple);
+      return new DarwinARMTargetInfo(Triple, Opts);
 
     switch (os) {
     case llvm::Triple::Linux:
-      return new LinuxTargetInfo<ARMbeTargetInfo>(Triple);
+      return new LinuxTargetInfo<ARMbeTargetInfo>(Triple, Opts);
     case llvm::Triple::FreeBSD:
-      return new FreeBSDTargetInfo<ARMbeTargetInfo>(Triple);
+      return new FreeBSDTargetInfo<ARMbeTargetInfo>(Triple, Opts);
     case llvm::Triple::NetBSD:
-      return new NetBSDTargetInfo<ARMbeTargetInfo>(Triple);
+      return new NetBSDTargetInfo<ARMbeTargetInfo>(Triple, Opts);
     case llvm::Triple::OpenBSD:
-      return new OpenBSDTargetInfo<ARMbeTargetInfo>(Triple);
+      return new OpenBSDTargetInfo<ARMbeTargetInfo>(Triple, Opts);
     case llvm::Triple::Bitrig:
-      return new BitrigTargetInfo<ARMbeTargetInfo>(Triple);
+      return new BitrigTargetInfo<ARMbeTargetInfo>(Triple, Opts);
     case llvm::Triple::RTEMS:
-      return new RTEMSTargetInfo<ARMbeTargetInfo>(Triple);
+      return new RTEMSTargetInfo<ARMbeTargetInfo>(Triple, Opts);
     case llvm::Triple::NaCl:
-      return new NaClTargetInfo<ARMbeTargetInfo>(Triple);
+      return new NaClTargetInfo<ARMbeTargetInfo>(Triple, Opts);
     default:
-      return new ARMbeTargetInfo(Triple);
+      return new ARMbeTargetInfo(Triple, Opts);
     }
 
   case llvm::Triple::bpfeb:
   case llvm::Triple::bpfel:
-    return new BPFTargetInfo(Triple);
+    return new BPFTargetInfo(Triple, Opts);
 
   case llvm::Triple::msp430:
-    return new MSP430TargetInfo(Triple);
+    return new MSP430TargetInfo(Triple, Opts);
 
   case llvm::Triple::mips:
     switch (os) {
     case llvm::Triple::Linux:
-      return new LinuxTargetInfo<Mips32EBTargetInfo>(Triple);
+      return new LinuxTargetInfo<MipsTargetInfo>(Triple, Opts);
     case llvm::Triple::RTEMS:
-      return new RTEMSTargetInfo<Mips32EBTargetInfo>(Triple);
+      return new RTEMSTargetInfo<MipsTargetInfo>(Triple, Opts);
     case llvm::Triple::FreeBSD:
-      return new FreeBSDTargetInfo<Mips32EBTargetInfo>(Triple);
+      return new FreeBSDTargetInfo<MipsTargetInfo>(Triple, Opts);
     case llvm::Triple::NetBSD:
-      return new NetBSDTargetInfo<Mips32EBTargetInfo>(Triple);
+      return new NetBSDTargetInfo<MipsTargetInfo>(Triple, Opts);
     default:
-      return new Mips32EBTargetInfo(Triple);
+      return new MipsTargetInfo(Triple, Opts);
     }
 
   case llvm::Triple::mipsel:
     switch (os) {
     case llvm::Triple::Linux:
-      return new LinuxTargetInfo<Mips32ELTargetInfo>(Triple);
+      return new LinuxTargetInfo<MipsTargetInfo>(Triple, Opts);
     case llvm::Triple::RTEMS:
-      return new RTEMSTargetInfo<Mips32ELTargetInfo>(Triple);
+      return new RTEMSTargetInfo<MipsTargetInfo>(Triple, Opts);
     case llvm::Triple::FreeBSD:
-      return new FreeBSDTargetInfo<Mips32ELTargetInfo>(Triple);
+      return new FreeBSDTargetInfo<MipsTargetInfo>(Triple, Opts);
     case llvm::Triple::NetBSD:
-      return new NetBSDTargetInfo<Mips32ELTargetInfo>(Triple);
+      return new NetBSDTargetInfo<MipsTargetInfo>(Triple, Opts);
     case llvm::Triple::NaCl:
-      return new NaClTargetInfo<NaClMips32ELTargetInfo>(Triple);
+      return new NaClTargetInfo<NaClMips32TargetInfo>(Triple, Opts);
     default:
-      return new Mips32ELTargetInfo(Triple);
+      return new MipsTargetInfo(Triple, Opts);
     }
 
   case llvm::Triple::mips64:
     switch (os) {
     case llvm::Triple::Linux:
-      return new LinuxTargetInfo<Mips64EBTargetInfo>(Triple);
+      return new LinuxTargetInfo<MipsTargetInfo>(Triple, Opts);
     case llvm::Triple::RTEMS:
-      return new RTEMSTargetInfo<Mips64EBTargetInfo>(Triple);
+      return new RTEMSTargetInfo<MipsTargetInfo>(Triple, Opts);
     case llvm::Triple::FreeBSD:
-      return new FreeBSDTargetInfo<Mips64EBTargetInfo>(Triple);
+      return new FreeBSDTargetInfo<MipsTargetInfo>(Triple, Opts);
     case llvm::Triple::NetBSD:
-      return new NetBSDTargetInfo<Mips64EBTargetInfo>(Triple);
+      return new NetBSDTargetInfo<MipsTargetInfo>(Triple, Opts);
     case llvm::Triple::OpenBSD:
-      return new OpenBSDTargetInfo<Mips64EBTargetInfo>(Triple);
+      return new OpenBSDTargetInfo<MipsTargetInfo>(Triple, Opts);
     default:
-      return new Mips64EBTargetInfo(Triple);
+      return new MipsTargetInfo(Triple, Opts);
     }
 
   case llvm::Triple::mips64el:
     switch (os) {
     case llvm::Triple::Linux:
-      return new LinuxTargetInfo<Mips64ELTargetInfo>(Triple);
+      return new LinuxTargetInfo<MipsTargetInfo>(Triple, Opts);
     case llvm::Triple::RTEMS:
-      return new RTEMSTargetInfo<Mips64ELTargetInfo>(Triple);
+      return new RTEMSTargetInfo<MipsTargetInfo>(Triple, Opts);
     case llvm::Triple::FreeBSD:
-      return new FreeBSDTargetInfo<Mips64ELTargetInfo>(Triple);
+      return new FreeBSDTargetInfo<MipsTargetInfo>(Triple, Opts);
     case llvm::Triple::NetBSD:
-      return new NetBSDTargetInfo<Mips64ELTargetInfo>(Triple);
+      return new NetBSDTargetInfo<MipsTargetInfo>(Triple, Opts);
     case llvm::Triple::OpenBSD:
-      return new OpenBSDTargetInfo<Mips64ELTargetInfo>(Triple);
+      return new OpenBSDTargetInfo<MipsTargetInfo>(Triple, Opts);
     default:
-      return new Mips64ELTargetInfo(Triple);
+      return new MipsTargetInfo(Triple, Opts);
     }
 
   case llvm::Triple::le32:
     switch (os) {
     case llvm::Triple::NaCl:
-      return new NaClTargetInfo<PNaClTargetInfo>(Triple);
+      return new NaClTargetInfo<PNaClTargetInfo>(Triple, Opts);
     default:
       return nullptr;
     }
 
   case llvm::Triple::le64:
-    return new Le64TargetInfo(Triple);
+    return new Le64TargetInfo(Triple, Opts);
 
   case llvm::Triple::ppc:
     if (Triple.isOSDarwin())
-      return new DarwinPPC32TargetInfo(Triple);
+      return new DarwinPPC32TargetInfo(Triple, Opts);
     switch (os) {
     case llvm::Triple::Linux:
-      return new LinuxTargetInfo<PPC32TargetInfo>(Triple);
+      return new LinuxTargetInfo<PPC32TargetInfo>(Triple, Opts);
     case llvm::Triple::FreeBSD:
-      return new FreeBSDTargetInfo<PPC32TargetInfo>(Triple);
+      return new FreeBSDTargetInfo<PPC32TargetInfo>(Triple, Opts);
     case llvm::Triple::NetBSD:
-      return new NetBSDTargetInfo<PPC32TargetInfo>(Triple);
+      return new NetBSDTargetInfo<PPC32TargetInfo>(Triple, Opts);
     case llvm::Triple::OpenBSD:
-      return new OpenBSDTargetInfo<PPC32TargetInfo>(Triple);
+      return new OpenBSDTargetInfo<PPC32TargetInfo>(Triple, Opts);
     case llvm::Triple::RTEMS:
-      return new RTEMSTargetInfo<PPC32TargetInfo>(Triple);
+      return new RTEMSTargetInfo<PPC32TargetInfo>(Triple, Opts);
     default:
-      return new PPC32TargetInfo(Triple);
+      return new PPC32TargetInfo(Triple, Opts);
     }
 
   case llvm::Triple::ppc64:
     if (Triple.isOSDarwin())
-      return new DarwinPPC64TargetInfo(Triple);
+      return new DarwinPPC64TargetInfo(Triple, Opts);
     switch (os) {
     case llvm::Triple::Linux:
-      return new LinuxTargetInfo<PPC64TargetInfo>(Triple);
+      return new LinuxTargetInfo<PPC64TargetInfo>(Triple, Opts);
     case llvm::Triple::Lv2:
-      return new PS3PPUTargetInfo<PPC64TargetInfo>(Triple);
+      return new PS3PPUTargetInfo<PPC64TargetInfo>(Triple, Opts);
     case llvm::Triple::FreeBSD:
-      return new FreeBSDTargetInfo<PPC64TargetInfo>(Triple);
+      return new FreeBSDTargetInfo<PPC64TargetInfo>(Triple, Opts);
     case llvm::Triple::NetBSD:
-      return new NetBSDTargetInfo<PPC64TargetInfo>(Triple);
+      return new NetBSDTargetInfo<PPC64TargetInfo>(Triple, Opts);
     default:
-      return new PPC64TargetInfo(Triple);
+      return new PPC64TargetInfo(Triple, Opts);
     }
 
   case llvm::Triple::ppc64le:
     switch (os) {
     case llvm::Triple::Linux:
-      return new LinuxTargetInfo<PPC64TargetInfo>(Triple);
+      return new LinuxTargetInfo<PPC64TargetInfo>(Triple, Opts);
     case llvm::Triple::NetBSD:
-      return new NetBSDTargetInfo<PPC64TargetInfo>(Triple);
+      return new NetBSDTargetInfo<PPC64TargetInfo>(Triple, Opts);
     default:
-      return new PPC64TargetInfo(Triple);
+      return new PPC64TargetInfo(Triple, Opts);
     }
 
   case llvm::Triple::nvptx:
-    return new NVPTX32TargetInfo(Triple);
+    return new NVPTX32TargetInfo(Triple, Opts);
   case llvm::Triple::nvptx64:
-    return new NVPTX64TargetInfo(Triple);
+    return new NVPTX64TargetInfo(Triple, Opts);
 
   case llvm::Triple::amdgcn:
   case llvm::Triple::r600:
-    return new AMDGPUTargetInfo(Triple);
+    return new AMDGPUTargetInfo(Triple, Opts);
 
   case llvm::Triple::sparc:
     switch (os) {
     case llvm::Triple::Linux:
-      return new LinuxTargetInfo<SparcV8TargetInfo>(Triple);
+      return new LinuxTargetInfo<SparcV8TargetInfo>(Triple, Opts);
     case llvm::Triple::Solaris:
-      return new SolarisTargetInfo<SparcV8TargetInfo>(Triple);
+      return new SolarisTargetInfo<SparcV8TargetInfo>(Triple, Opts);
     case llvm::Triple::NetBSD:
-      return new NetBSDTargetInfo<SparcV8TargetInfo>(Triple);
+      return new NetBSDTargetInfo<SparcV8TargetInfo>(Triple, Opts);
     case llvm::Triple::OpenBSD:
-      return new OpenBSDTargetInfo<SparcV8TargetInfo>(Triple);
+      return new OpenBSDTargetInfo<SparcV8TargetInfo>(Triple, Opts);
     case llvm::Triple::RTEMS:
-      return new RTEMSTargetInfo<SparcV8TargetInfo>(Triple);
+      return new RTEMSTargetInfo<SparcV8TargetInfo>(Triple, Opts);
     default:
-      return new SparcV8TargetInfo(Triple);
+      return new SparcV8TargetInfo(Triple, Opts);
     }
 
   // The 'sparcel' architecture copies all the above cases except for Solaris.
   case llvm::Triple::sparcel:
     switch (os) {
     case llvm::Triple::Linux:
-      return new LinuxTargetInfo<SparcV8elTargetInfo>(Triple);
+      return new LinuxTargetInfo<SparcV8elTargetInfo>(Triple, Opts);
     case llvm::Triple::NetBSD:
-      return new NetBSDTargetInfo<SparcV8elTargetInfo>(Triple);
+      return new NetBSDTargetInfo<SparcV8elTargetInfo>(Triple, Opts);
     case llvm::Triple::OpenBSD:
-      return new OpenBSDTargetInfo<SparcV8elTargetInfo>(Triple);
+      return new OpenBSDTargetInfo<SparcV8elTargetInfo>(Triple, Opts);
     case llvm::Triple::RTEMS:
-      return new RTEMSTargetInfo<SparcV8elTargetInfo>(Triple);
+      return new RTEMSTargetInfo<SparcV8elTargetInfo>(Triple, Opts);
     default:
-      return new SparcV8elTargetInfo(Triple);
+      return new SparcV8elTargetInfo(Triple, Opts);
     }
 
   case llvm::Triple::sparcv9:
     switch (os) {
     case llvm::Triple::Linux:
-      return new LinuxTargetInfo<SparcV9TargetInfo>(Triple);
+      return new LinuxTargetInfo<SparcV9TargetInfo>(Triple, Opts);
     case llvm::Triple::Solaris:
-      return new SolarisTargetInfo<SparcV9TargetInfo>(Triple);
+      return new SolarisTargetInfo<SparcV9TargetInfo>(Triple, Opts);
     case llvm::Triple::NetBSD:
-      return new NetBSDTargetInfo<SparcV9TargetInfo>(Triple);
+      return new NetBSDTargetInfo<SparcV9TargetInfo>(Triple, Opts);
     case llvm::Triple::OpenBSD:
-      return new OpenBSDTargetInfo<SparcV9TargetInfo>(Triple);
+      return new OpenBSDTargetInfo<SparcV9TargetInfo>(Triple, Opts);
     case llvm::Triple::FreeBSD:
-      return new FreeBSDTargetInfo<SparcV9TargetInfo>(Triple);
+      return new FreeBSDTargetInfo<SparcV9TargetInfo>(Triple, Opts);
     default:
-      return new SparcV9TargetInfo(Triple);
+      return new SparcV9TargetInfo(Triple, Opts);
     }
 
   case llvm::Triple::systemz:
     switch (os) {
     case llvm::Triple::Linux:
-      return new LinuxTargetInfo<SystemZTargetInfo>(Triple);
+      return new LinuxTargetInfo<SystemZTargetInfo>(Triple, Opts);
     default:
-      return new SystemZTargetInfo(Triple);
+      return new SystemZTargetInfo(Triple, Opts);
     }
 
   case llvm::Triple::tce:
-    return new TCETargetInfo(Triple);
+    return new TCETargetInfo(Triple, Opts);
 
   case llvm::Triple::x86:
     if (Triple.isOSDarwin())
-      return new DarwinI386TargetInfo(Triple);
+      return new DarwinI386TargetInfo(Triple, Opts);
 
     switch (os) {
     case llvm::Triple::CloudABI:
-      return new CloudABITargetInfo<X86_32TargetInfo>(Triple);
+      return new CloudABITargetInfo<X86_32TargetInfo>(Triple, Opts);
     case llvm::Triple::Linux: {
       switch (Triple.getEnvironment()) {
       default:
-        return new LinuxTargetInfo<X86_32TargetInfo>(Triple);
+        return new LinuxTargetInfo<X86_32TargetInfo>(Triple, Opts);
       case llvm::Triple::Android:
-        return new AndroidX86_32TargetInfo(Triple);
+        return new AndroidX86_32TargetInfo(Triple, Opts);
       }
     }
     case llvm::Triple::DragonFly:
-      return new DragonFlyBSDTargetInfo<X86_32TargetInfo>(Triple);
+      return new DragonFlyBSDTargetInfo<X86_32TargetInfo>(Triple, Opts);
     case llvm::Triple::NetBSD:
-      return new NetBSDI386TargetInfo(Triple);
+      return new NetBSDI386TargetInfo(Triple, Opts);
     case llvm::Triple::OpenBSD:
-      return new OpenBSDI386TargetInfo(Triple);
+      return new OpenBSDI386TargetInfo(Triple, Opts);
     case llvm::Triple::Bitrig:
-      return new BitrigI386TargetInfo(Triple);
+      return new BitrigI386TargetInfo(Triple, Opts);
     case llvm::Triple::FreeBSD:
-      return new FreeBSDTargetInfo<X86_32TargetInfo>(Triple);
+      return new FreeBSDTargetInfo<X86_32TargetInfo>(Triple, Opts);
     case llvm::Triple::KFreeBSD:
-      return new KFreeBSDTargetInfo<X86_32TargetInfo>(Triple);
+      return new KFreeBSDTargetInfo<X86_32TargetInfo>(Triple, Opts);
     case llvm::Triple::Minix:
-      return new MinixTargetInfo<X86_32TargetInfo>(Triple);
+      return new MinixTargetInfo<X86_32TargetInfo>(Triple, Opts);
     case llvm::Triple::Solaris:
-      return new SolarisTargetInfo<X86_32TargetInfo>(Triple);
+      return new SolarisTargetInfo<X86_32TargetInfo>(Triple, Opts);
     case llvm::Triple::Win32: {
       switch (Triple.getEnvironment()) {
       case llvm::Triple::Cygnus:
-        return new CygwinX86_32TargetInfo(Triple);
+        return new CygwinX86_32TargetInfo(Triple, Opts);
       case llvm::Triple::GNU:
-        return new MinGWX86_32TargetInfo(Triple);
+        return new MinGWX86_32TargetInfo(Triple, Opts);
       case llvm::Triple::Itanium:
       case llvm::Triple::MSVC:
       default: // Assume MSVC for unknown environments
-        return new MicrosoftX86_32TargetInfo(Triple);
+        return new MicrosoftX86_32TargetInfo(Triple, Opts);
       }
     }
     case llvm::Triple::Haiku:
-      return new HaikuX86_32TargetInfo(Triple);
+      return new HaikuX86_32TargetInfo(Triple, Opts);
     case llvm::Triple::RTEMS:
-      return new RTEMSX86_32TargetInfo(Triple);
+      return new RTEMSX86_32TargetInfo(Triple, Opts);
     case llvm::Triple::NaCl:
-      return new NaClTargetInfo<X86_32TargetInfo>(Triple);
+      return new NaClTargetInfo<X86_32TargetInfo>(Triple, Opts);
     case llvm::Triple::ELFIAMCU:
-      return new MCUX86_32TargetInfo(Triple);
+      return new MCUX86_32TargetInfo(Triple, Opts);
     default:
-      return new X86_32TargetInfo(Triple);
+      return new X86_32TargetInfo(Triple, Opts);
     }
 
   case llvm::Triple::x86_64:
     if (Triple.isOSDarwin() || Triple.isOSBinFormatMachO())
-      return new DarwinX86_64TargetInfo(Triple);
+      return new DarwinX86_64TargetInfo(Triple, Opts);
 
     switch (os) {
     case llvm::Triple::CloudABI:
-      return new CloudABITargetInfo<X86_64TargetInfo>(Triple);
+      return new CloudABITargetInfo<X86_64TargetInfo>(Triple, Opts);
     case llvm::Triple::Linux: {
       switch (Triple.getEnvironment()) {
       default:
-        return new LinuxTargetInfo<X86_64TargetInfo>(Triple);
+        return new LinuxTargetInfo<X86_64TargetInfo>(Triple, Opts);
       case llvm::Triple::Android:
-        return new AndroidX86_64TargetInfo(Triple);
+        return new AndroidX86_64TargetInfo(Triple, Opts);
       }
     }
     case llvm::Triple::DragonFly:
-      return new DragonFlyBSDTargetInfo<X86_64TargetInfo>(Triple);
+      return new DragonFlyBSDTargetInfo<X86_64TargetInfo>(Triple, Opts);
     case llvm::Triple::NetBSD:
-      return new NetBSDTargetInfo<X86_64TargetInfo>(Triple);
+      return new NetBSDTargetInfo<X86_64TargetInfo>(Triple, Opts);
     case llvm::Triple::OpenBSD:
-      return new OpenBSDX86_64TargetInfo(Triple);
+      return new OpenBSDX86_64TargetInfo(Triple, Opts);
     case llvm::Triple::Bitrig:
-      return new BitrigX86_64TargetInfo(Triple);
+      return new BitrigX86_64TargetInfo(Triple, Opts);
     case llvm::Triple::FreeBSD:
-      return new FreeBSDTargetInfo<X86_64TargetInfo>(Triple);
+      return new FreeBSDTargetInfo<X86_64TargetInfo>(Triple, Opts);
     case llvm::Triple::KFreeBSD:
-      return new KFreeBSDTargetInfo<X86_64TargetInfo>(Triple);
+      return new KFreeBSDTargetInfo<X86_64TargetInfo>(Triple, Opts);
     case llvm::Triple::Solaris:
-      return new SolarisTargetInfo<X86_64TargetInfo>(Triple);
+      return new SolarisTargetInfo<X86_64TargetInfo>(Triple, Opts);
     case llvm::Triple::Win32: {
       switch (Triple.getEnvironment()) {
       case llvm::Triple::Cygnus:
-        return new CygwinX86_64TargetInfo(Triple);
+        return new CygwinX86_64TargetInfo(Triple, Opts);
       case llvm::Triple::GNU:
-        return new MinGWX86_64TargetInfo(Triple);
+        return new MinGWX86_64TargetInfo(Triple, Opts);
       case llvm::Triple::MSVC:
       default: // Assume MSVC for unknown environments
-        return new MicrosoftX86_64TargetInfo(Triple);
+        return new MicrosoftX86_64TargetInfo(Triple, Opts);
       }
     }
+    case llvm::Triple::Haiku:
+      return new HaikuTargetInfo<X86_64TargetInfo>(Triple, Opts);
     case llvm::Triple::NaCl:
-      return new NaClTargetInfo<X86_64TargetInfo>(Triple);
+      return new NaClTargetInfo<X86_64TargetInfo>(Triple, Opts);
     case llvm::Triple::PS4:
-      return new PS4OSTargetInfo<X86_64TargetInfo>(Triple);
+      return new PS4OSTargetInfo<X86_64TargetInfo>(Triple, Opts);
     default:
-      return new X86_64TargetInfo(Triple);
+      return new X86_64TargetInfo(Triple, Opts);
     }
 
   case llvm::Triple::spir: {
     if (Triple.getOS() != llvm::Triple::UnknownOS ||
         Triple.getEnvironment() != llvm::Triple::UnknownEnvironment)
       return nullptr;
-    return new SPIR32TargetInfo(Triple);
+    return new SPIR32TargetInfo(Triple, Opts);
   }
   case llvm::Triple::spir64: {
     if (Triple.getOS() != llvm::Triple::UnknownOS ||
         Triple.getEnvironment() != llvm::Triple::UnknownEnvironment)
       return nullptr;
-    return new SPIR64TargetInfo(Triple);
+    return new SPIR64TargetInfo(Triple, Opts);
   }
   case llvm::Triple::wasm32:
     if (!(Triple == llvm::Triple("wasm32-unknown-unknown")))
       return nullptr;
-    return new WebAssemblyOSTargetInfo<WebAssembly32TargetInfo>(Triple);
+    return new WebAssemblyOSTargetInfo<WebAssembly32TargetInfo>(Triple, Opts);
   case llvm::Triple::wasm64:
     if (!(Triple == llvm::Triple("wasm64-unknown-unknown")))
       return nullptr;
-    return new WebAssemblyOSTargetInfo<WebAssembly64TargetInfo>(Triple);
+    return new WebAssemblyOSTargetInfo<WebAssembly64TargetInfo>(Triple, Opts);
+
+  case llvm::Triple::renderscript32:
+    return new LinuxTargetInfo<RenderScript32TargetInfo>(Triple, Opts);
+  case llvm::Triple::renderscript64:
+    return new LinuxTargetInfo<RenderScript64TargetInfo>(Triple, Opts);
   }
 }
 
@@ -7948,7 +8571,7 @@ TargetInfo::CreateTargetInfo(DiagnosticsEngine &Diags,
   llvm::Triple Triple(Opts->Triple);
 
   // Construct the target
-  std::unique_ptr<TargetInfo> Target(AllocateTarget(Triple));
+  std::unique_ptr<TargetInfo> Target(AllocateTarget(Triple, *Opts));
   if (!Target) {
     Diags.Report(diag::err_target_unknown_triple) << Triple.str();
     return nullptr;
@@ -7988,5 +8611,10 @@ TargetInfo::CreateTargetInfo(DiagnosticsEngine &Diags,
   if (!Target->handleTargetFeatures(Opts->Features, Diags))
     return nullptr;
 
+  Target->setSupportedOpenCLOpts();
+
+  if (!Target->validateTarget(Diags))
+    return nullptr;
+
   return Target.release();
 }
diff --git a/contrib/llvm/tools/clang/lib/Basic/Version.cpp b/contrib/llvm/tools/clang/lib/Basic/Version.cpp
index 2b5014e2936e..4fa52b4acce0 100644
--- a/contrib/llvm/tools/clang/lib/Basic/Version.cpp
+++ b/contrib/llvm/tools/clang/lib/Basic/Version.cpp
@@ -36,7 +36,7 @@ std::string getClangRepositoryPath() {
 
   // If the SVN_REPOSITORY is empty, try to use the SVN keyword. This helps us
   // pick up a tag in an SVN export, for example.
-  StringRef SVNRepository("$URL: https://llvm.org/svn/llvm-project/cfe/tags/RELEASE_380/final/lib/Basic/Version.cpp $");
+  StringRef SVNRepository("$URL: https://llvm.org/svn/llvm-project/cfe/tags/RELEASE_391/final/lib/Basic/Version.cpp $");
   if (URL.empty()) {
     URL = SVNRepository.slice(SVNRepository.find(':'),
                               SVNRepository.find("/lib/Basic"));
diff --git a/contrib/llvm/tools/clang/lib/Basic/VirtualFileSystem.cpp b/contrib/llvm/tools/clang/lib/Basic/VirtualFileSystem.cpp
index 6977f400287f..8ace2b3dc838 100644
--- a/contrib/llvm/tools/clang/lib/Basic/VirtualFileSystem.cpp
+++ b/contrib/llvm/tools/clang/lib/Basic/VirtualFileSystem.cpp
@@ -16,13 +16,16 @@
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringSet.h"
 #include "llvm/ADT/iterator_range.h"
+#include "llvm/Config/llvm-config.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/Support/Errc.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/Path.h"
+#include "llvm/Support/Process.h"
 #include "llvm/Support/YAMLParser.h"
-#include "llvm/Config/llvm-config.h"
 #include <atomic>
 #include <memory>
+#include <utility>
 
 // For chdir.
 #ifdef LLVM_ON_WIN32
@@ -99,6 +102,9 @@ FileSystem::getBufferForFile(const llvm::Twine &Name, int64_t FileSize,
 }
 
 std::error_code FileSystem::makeAbsolute(SmallVectorImpl<char> &Path) const {
+  if (llvm::sys::path::is_absolute(Path))
+    return std::error_code();
+
   auto WorkingDir = getCurrentWorkingDirectory();
   if (!WorkingDir)
     return WorkingDir.getError();
@@ -111,6 +117,20 @@ bool FileSystem::exists(const Twine &Path) {
   return Status && Status->exists();
 }
 
+#ifndef NDEBUG
+static bool isTraversalComponent(StringRef Component) {
+  return Component.equals("..") || Component.equals(".");
+}
+
+static bool pathHasTraversal(StringRef Path) {
+  using namespace llvm::sys;
+  for (StringRef Comp : llvm::make_range(path::begin(Path), path::end(Path)))
+    if (isTraversalComponent(Comp))
+      return true;
+  return false;
+}
+#endif
+
 //===-----------------------------------------------------------------------===/
 // RealFileSystem implementation
 //===-----------------------------------------------------------------------===/
@@ -120,16 +140,19 @@ namespace {
 class RealFile : public File {
   int FD;
   Status S;
+  std::string RealName;
   friend class RealFileSystem;
-  RealFile(int FD, StringRef NewName)
+  RealFile(int FD, StringRef NewName, StringRef NewRealPathName)
       : FD(FD), S(NewName, {}, {}, {}, {}, {},
-                  llvm::sys::fs::file_type::status_error, {}) {
+                  llvm::sys::fs::file_type::status_error, {}),
+        RealName(NewRealPathName.str()) {
     assert(FD >= 0 && "Invalid or inactive file descriptor");
   }
 
 public:
   ~RealFile() override;
   ErrorOr<Status> status() override;
+  ErrorOr<std::string> getName() override;
   ErrorOr<std::unique_ptr<MemoryBuffer>> getBuffer(const Twine &Name,
                                                    int64_t FileSize,
                                                    bool RequiresNullTerminator,
@@ -150,6 +173,10 @@ ErrorOr<Status> RealFile::status() {
   return S;
 }
 
+ErrorOr<std::string> RealFile::getName() {
+  return RealName.empty() ? S.getName().str() : RealName;
+}
+
 ErrorOr<std::unique_ptr<MemoryBuffer>>
 RealFile::getBuffer(const Twine &Name, int64_t FileSize,
                     bool RequiresNullTerminator, bool IsVolatile) {
@@ -158,21 +185,10 @@ RealFile::getBuffer(const Twine &Name, int64_t FileSize,
                                    IsVolatile);
 }
 
-// FIXME: This is terrible, we need this for ::close.
-#if !defined(_MSC_VER) && !defined(__MINGW32__)
-#include <unistd.h>
-#include <sys/uio.h>
-#else
-#include <io.h>
-#ifndef S_ISFIFO
-#define S_ISFIFO(x) (0)
-#endif
-#endif
 std::error_code RealFile::close() {
-  if (::close(FD))
-    return std::error_code(errno, std::generic_category());
+  std::error_code EC = sys::Process::SafelyCloseFileDescriptor(FD);
   FD = -1;
-  return std::error_code();
+  return EC;
 }
 
 namespace {
@@ -198,9 +214,10 @@ ErrorOr<Status> RealFileSystem::status(const Twine &Path) {
 ErrorOr<std::unique_ptr<File>>
 RealFileSystem::openFileForRead(const Twine &Name) {
   int FD;
-  if (std::error_code EC = sys::fs::openFileForRead(Name, FD))
+  SmallString<256> RealName;
+  if (std::error_code EC = sys::fs::openFileForRead(Name, FD, &RealName))
     return EC;
-  return std::unique_ptr<File>(new RealFile(FD, Name.str()));
+  return std::unique_ptr<File>(new RealFile(FD, Name.str(), RealName.str()));
 }
 
 llvm::ErrorOr<std::string> RealFileSystem::getCurrentWorkingDirectory() const {
@@ -271,7 +288,7 @@ directory_iterator RealFileSystem::dir_begin(const Twine &Dir,
 // OverlayFileSystem implementation
 //===-----------------------------------------------------------------------===/
 OverlayFileSystem::OverlayFileSystem(IntrusiveRefCntPtr<FileSystem> BaseFS) {
-  FSList.push_back(BaseFS);
+  FSList.push_back(std::move(BaseFS));
 }
 
 void OverlayFileSystem::pushOverlay(IntrusiveRefCntPtr<FileSystem> FS) {
@@ -711,7 +728,13 @@ public:
                             Status S)
       : Entry(EK_Directory, Name), Contents(std::move(Contents)),
         S(std::move(S)) {}
+  RedirectingDirectoryEntry(StringRef Name, Status S)
+      : Entry(EK_Directory, Name), S(std::move(S)) {}
   Status getStatus() { return S; }
+  void addContent(std::unique_ptr<Entry> Content) {
+    Contents.push_back(std::move(Content));
+  }
+  Entry *getLastContent() const { return Contents.back().get(); }
   typedef decltype(Contents)::iterator iterator;
   iterator contents_begin() { return Contents.begin(); }
   iterator contents_end() { return Contents.end(); }
@@ -739,6 +762,7 @@ public:
     return UseName == NK_NotSet ? GlobalUseExternalName
                                 : (UseName == NK_External);
   }
+  NameKind getUseName() const { return UseName; }
   static bool classof(const Entry *E) { return E->getKind() == EK_File; }
 };
 
@@ -776,6 +800,7 @@ public:
 /// All configuration options are optional.
 ///   'case-sensitive': <boolean, default=true>
 ///   'use-external-names': <boolean, default=true>
+///   'overlay-relative': <boolean, default=false>
 ///
 /// Virtual directories are represented as
 /// \verbatim
@@ -815,6 +840,10 @@ class RedirectingFileSystem : public vfs::FileSystem {
   std::vector<std::unique_ptr<Entry>> Roots;
   /// \brief The file system to use for external references.
   IntrusiveRefCntPtr<FileSystem> ExternalFS;
+  /// If IsRelativeOverlay is set, this represents the directory
+  /// path that should be prefixed to each 'external-contents' entry
+  /// when reading from YAML files.
+  std::string ExternalContentsPrefixDir;
 
   /// @name Configuration
   /// @{
@@ -822,18 +851,32 @@ class RedirectingFileSystem : public vfs::FileSystem {
   /// \brief Whether to perform case-sensitive comparisons.
   ///
   /// Currently, case-insensitive matching only works correctly with ASCII.
-  bool CaseSensitive;
+  bool CaseSensitive = true;
+
+  /// IsRelativeOverlay marks whether a IsExternalContentsPrefixDir path must
+  /// be prefixed in every 'external-contents' when reading from YAML files.
+  bool IsRelativeOverlay = false;
 
   /// \brief Whether to use to use the value of 'external-contents' for the
   /// names of files.  This global value is overridable on a per-file basis.
-  bool UseExternalNames;
+  bool UseExternalNames = true;
   /// @}
 
+  /// Virtual file paths and external files could be canonicalized without "..",
+  /// "." and "./" in their paths. FIXME: some unittests currently fail on
+  /// win32 when using remove_dots and remove_leading_dotslash on paths.
+  bool UseCanonicalizedPaths =
+#ifdef LLVM_ON_WIN32
+      false;
+#else
+      true;
+#endif
+
   friend class RedirectingFileSystemParser;
 
 private:
   RedirectingFileSystem(IntrusiveRefCntPtr<FileSystem> ExternalFS)
-      : ExternalFS(ExternalFS), CaseSensitive(true), UseExternalNames(true) {}
+      : ExternalFS(std::move(ExternalFS)) {}
 
   /// \brief Looks up \p Path in \c Roots.
   ErrorOr<Entry *> lookupPath(const Twine &Path);
@@ -851,8 +894,8 @@ public:
   /// returns a virtual file system representing its contents.
   static RedirectingFileSystem *
   create(std::unique_ptr<MemoryBuffer> Buffer,
-         SourceMgr::DiagHandlerTy DiagHandler, void *DiagContext,
-         IntrusiveRefCntPtr<FileSystem> ExternalFS);
+         SourceMgr::DiagHandlerTy DiagHandler, StringRef YAMLFilePath,
+         void *DiagContext, IntrusiveRefCntPtr<FileSystem> ExternalFS);
 
   ErrorOr<Status> status(const Twine &Path) override;
   ErrorOr<std::unique_ptr<File>> openFileForRead(const Twine &Path) override;
@@ -885,6 +928,38 @@ public:
     return directory_iterator(std::make_shared<VFSFromYamlDirIterImpl>(Dir,
         *this, D->contents_begin(), D->contents_end(), EC));
   }
+
+  void setExternalContentsPrefixDir(StringRef PrefixDir) {
+    ExternalContentsPrefixDir = PrefixDir.str();
+  }
+
+  StringRef getExternalContentsPrefixDir() const {
+    return ExternalContentsPrefixDir;
+  }
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void dump() const {
+    for (const std::unique_ptr<Entry> &Root : Roots)
+      dumpEntry(Root.get());
+  }
+
+LLVM_DUMP_METHOD void dumpEntry(Entry *E, int NumSpaces = 0) const {
+    StringRef Name = E->getName();
+    for (int i = 0, e = NumSpaces; i < e; ++i)
+      dbgs() << " ";
+    dbgs() << "'" << Name.str().c_str() << "'" << "\n";
+
+    if (E->getKind() == EK_Directory) {
+      auto *DE = dyn_cast<RedirectingDirectoryEntry>(E);
+      assert(DE && "Should be a directory");
+
+      for (std::unique_ptr<Entry> &SubEntry :
+           llvm::make_range(DE->contents_begin(), DE->contents_end()))
+        dumpEntry(SubEntry.get(), NumSpaces+2);
+    }
+  }
+#endif
+
 };
 
 /// \brief A helper class to hold the common YAML parsing state.
@@ -964,7 +1039,71 @@ class RedirectingFileSystemParser {
     return true;
   }
 
-  std::unique_ptr<Entry> parseEntry(yaml::Node *N) {
+  Entry *lookupOrCreateEntry(RedirectingFileSystem *FS, StringRef Name,
+                             Entry *ParentEntry = nullptr) {
+    if (!ParentEntry) { // Look for a existent root
+      for (const std::unique_ptr<Entry> &Root : FS->Roots) {
+        if (Name.equals(Root->getName())) {
+          ParentEntry = Root.get();
+          return ParentEntry;
+        }
+      }
+    } else { // Advance to the next component
+      auto *DE = dyn_cast<RedirectingDirectoryEntry>(ParentEntry);
+      for (std::unique_ptr<Entry> &Content :
+           llvm::make_range(DE->contents_begin(), DE->contents_end())) {
+        auto *DirContent = dyn_cast<RedirectingDirectoryEntry>(Content.get());
+        if (DirContent && Name.equals(Content->getName()))
+          return DirContent;
+      }
+    }
+
+    // ... or create a new one
+    std::unique_ptr<Entry> E = llvm::make_unique<RedirectingDirectoryEntry>(
+        Name, Status("", getNextVirtualUniqueID(), sys::TimeValue::now(), 0, 0,
+                     0, file_type::directory_file, sys::fs::all_all));
+
+    if (!ParentEntry) { // Add a new root to the overlay
+      FS->Roots.push_back(std::move(E));
+      ParentEntry = FS->Roots.back().get();
+      return ParentEntry;
+    }
+
+    auto *DE = dyn_cast<RedirectingDirectoryEntry>(ParentEntry);
+    DE->addContent(std::move(E));
+    return DE->getLastContent();
+  }
+
+  void uniqueOverlayTree(RedirectingFileSystem *FS, Entry *SrcE,
+                         Entry *NewParentE = nullptr) {
+    StringRef Name = SrcE->getName();
+    switch (SrcE->getKind()) {
+    case EK_Directory: {
+      auto *DE = dyn_cast<RedirectingDirectoryEntry>(SrcE);
+      assert(DE && "Must be a directory");
+      // Empty directories could be present in the YAML as a way to
+      // describe a file for a current directory after some of its subdir
+      // is parsed. This only leads to redundant walks, ignore it.
+      if (!Name.empty())
+        NewParentE = lookupOrCreateEntry(FS, Name, NewParentE);
+      for (std::unique_ptr<Entry> &SubEntry :
+           llvm::make_range(DE->contents_begin(), DE->contents_end()))
+        uniqueOverlayTree(FS, SubEntry.get(), NewParentE);
+      break;
+    }
+    case EK_File: {
+      auto *FE = dyn_cast<RedirectingFileEntry>(SrcE);
+      assert(FE && "Must be a file");
+      assert(NewParentE && "Parent entry must exist");
+      auto *DE = dyn_cast<RedirectingDirectoryEntry>(NewParentE);
+      DE->addContent(llvm::make_unique<RedirectingFileEntry>(
+          Name, FE->getExternalContentsPath(), FE->getUseName()));
+      break;
+    }
+    }
+  }
+
+  std::unique_ptr<Entry> parseEntry(yaml::Node *N, RedirectingFileSystem *FS) {
     yaml::MappingNode *M = dyn_cast<yaml::MappingNode>(N);
     if (!M) {
       error(N, "expected mapping node for file or directory entry");
@@ -1004,7 +1143,17 @@ class RedirectingFileSystemParser {
       if (Key == "name") {
         if (!parseScalarString(I->getValue(), Value, Buffer))
           return nullptr;
-        Name = Value;
+
+        if (FS->UseCanonicalizedPaths) {
+          SmallString<256> Path(Value);
+          // Guarantee that old YAML files containing paths with ".." and "."
+          // are properly canonicalized before read into the VFS.
+          Path = sys::path::remove_leading_dotslash(Path);
+          sys::path::remove_dots(Path, /*remove_dot_dot=*/true);
+          Name = Path.str();
+        } else {
+          Name = Value;
+        }
       } else if (Key == "type") {
         if (!parseScalarString(I->getValue(), Value, Buffer))
           return nullptr;
@@ -1034,7 +1183,7 @@ class RedirectingFileSystemParser {
         for (yaml::SequenceNode::iterator I = Contents->begin(),
                                           E = Contents->end();
              I != E; ++I) {
-          if (std::unique_ptr<Entry> E = parseEntry(&*I))
+          if (std::unique_ptr<Entry> E = parseEntry(&*I, FS))
             EntryArrayContents.push_back(std::move(E));
           else
             return nullptr;
@@ -1048,7 +1197,24 @@ class RedirectingFileSystemParser {
         HasContents = true;
         if (!parseScalarString(I->getValue(), Value, Buffer))
           return nullptr;
-        ExternalContentsPath = Value;
+
+        SmallString<256> FullPath;
+        if (FS->IsRelativeOverlay) {
+          FullPath = FS->getExternalContentsPrefixDir();
+          assert(!FullPath.empty() &&
+                 "External contents prefix directory must exist");
+          llvm::sys::path::append(FullPath, Value);
+        } else {
+          FullPath = Value;
+        }
+
+        if (FS->UseCanonicalizedPaths) {
+          // Guarantee that old YAML files containing paths with ".." and "."
+          // are properly canonicalized before read into the VFS.
+          FullPath = sys::path::remove_leading_dotslash(FullPath);
+          sys::path::remove_dots(FullPath, /*remove_dot_dot=*/true);
+        }
+        ExternalContentsPath = FullPath.str();
       } else if (Key == "use-external-name") {
         bool Val;
         if (!parseScalarBool(I->getValue(), Val))
@@ -1134,10 +1300,12 @@ public:
       KeyStatusPair("version", true),
       KeyStatusPair("case-sensitive", false),
       KeyStatusPair("use-external-names", false),
+      KeyStatusPair("overlay-relative", false),
       KeyStatusPair("roots", true),
     };
 
     DenseMap<StringRef, KeyStatus> Keys(std::begin(Fields), std::end(Fields));
+    std::vector<std::unique_ptr<Entry>> RootEntries;
 
     // Parse configuration and 'roots'
     for (yaml::MappingNode::iterator I = Top->begin(), E = Top->end(); I != E;
@@ -1159,8 +1327,8 @@ public:
 
         for (yaml::SequenceNode::iterator I = Roots->begin(), E = Roots->end();
              I != E; ++I) {
-          if (std::unique_ptr<Entry> E = parseEntry(&*I))
-            FS->Roots.push_back(std::move(E));
+          if (std::unique_ptr<Entry> E = parseEntry(&*I, FS))
+            RootEntries.push_back(std::move(E));
           else
             return false;
         }
@@ -1185,6 +1353,9 @@ public:
       } else if (Key == "case-sensitive") {
         if (!parseScalarBool(I->getValue(), FS->CaseSensitive))
           return false;
+      } else if (Key == "overlay-relative") {
+        if (!parseScalarBool(I->getValue(), FS->IsRelativeOverlay))
+          return false;
       } else if (Key == "use-external-names") {
         if (!parseScalarBool(I->getValue(), FS->UseExternalNames))
           return false;
@@ -1198,6 +1369,13 @@ public:
 
     if (!checkMissingKeys(Top, Keys))
       return false;
+
+    // Now that we sucessefully parsed the YAML file, canonicalize the internal
+    // representation to a proper directory tree so that we can search faster
+    // inside the VFS.
+    for (std::unique_ptr<Entry> &E : RootEntries)
+      uniqueOverlayTree(FS, E.get());
+
     return true;
   }
 };
@@ -1205,9 +1383,11 @@ public:
 
 Entry::~Entry() = default;
 
-RedirectingFileSystem *RedirectingFileSystem::create(
-    std::unique_ptr<MemoryBuffer> Buffer, SourceMgr::DiagHandlerTy DiagHandler,
-    void *DiagContext, IntrusiveRefCntPtr<FileSystem> ExternalFS) {
+RedirectingFileSystem *
+RedirectingFileSystem::create(std::unique_ptr<MemoryBuffer> Buffer,
+                              SourceMgr::DiagHandlerTy DiagHandler,
+                              StringRef YAMLFilePath, void *DiagContext,
+                              IntrusiveRefCntPtr<FileSystem> ExternalFS) {
 
   SourceMgr SM;
   yaml::Stream Stream(Buffer->getMemBufferRef(), SM);
@@ -1223,7 +1403,24 @@ RedirectingFileSystem *RedirectingFileSystem::create(
   RedirectingFileSystemParser P(Stream);
 
   std::unique_ptr<RedirectingFileSystem> FS(
-      new RedirectingFileSystem(ExternalFS));
+      new RedirectingFileSystem(std::move(ExternalFS)));
+
+  if (!YAMLFilePath.empty()) {
+    // Use the YAML path from -ivfsoverlay to compute the dir to be prefixed
+    // to each 'external-contents' path.
+    //
+    // Example:
+    //    -ivfsoverlay dummy.cache/vfs/vfs.yaml
+    // yields:
+    //  FS->ExternalContentsPrefixDir => /<absolute_path_to>/dummy.cache/vfs
+    //
+    SmallString<256> OverlayAbsDir = sys::path::parent_path(YAMLFilePath);
+    std::error_code EC = llvm::sys::fs::make_absolute(OverlayAbsDir);
+    assert(!EC && "Overlay dir final path must be absolute");
+    (void)EC;
+    FS->setExternalContentsPrefixDir(OverlayAbsDir);
+  }
+
   if (!P.parse(Root, FS.get()))
     return nullptr;
 
@@ -1238,6 +1435,14 @@ ErrorOr<Entry *> RedirectingFileSystem::lookupPath(const Twine &Path_) {
   if (std::error_code EC = makeAbsolute(Path))
     return EC;
 
+  // Canonicalize path by removing ".", "..", "./", etc components. This is
+  // a VFS request, do bot bother about symlinks in the path components
+  // but canonicalize in order to perform the correct entry search.
+  if (UseCanonicalizedPaths) {
+    Path = sys::path::remove_leading_dotslash(Path);
+    sys::path::remove_dots(Path, /*remove_dot_dot=*/true);
+  }
+
   if (Path.empty())
     return make_error_code(llvm::errc::invalid_argument);
 
@@ -1254,20 +1459,32 @@ ErrorOr<Entry *> RedirectingFileSystem::lookupPath(const Twine &Path_) {
 ErrorOr<Entry *>
 RedirectingFileSystem::lookupPath(sys::path::const_iterator Start,
                                   sys::path::const_iterator End, Entry *From) {
+#ifndef LLVM_ON_WIN32
+  assert(!isTraversalComponent(*Start) &&
+         !isTraversalComponent(From->getName()) &&
+         "Paths should not contain traversal components");
+#else
+  // FIXME: this is here to support windows, remove it once canonicalized
+  // paths become globally default.
   if (Start->equals("."))
     ++Start;
+#endif
 
-  // FIXME: handle ..
-  if (CaseSensitive ? !Start->equals(From->getName())
-                    : !Start->equals_lower(From->getName()))
-    // failure to match
-    return make_error_code(llvm::errc::no_such_file_or_directory);
+  StringRef FromName = From->getName();
 
-  ++Start;
+  // Forward the search to the next component in case this is an empty one.
+  if (!FromName.empty()) {
+    if (CaseSensitive ? !Start->equals(FromName)
+                      : !Start->equals_lower(FromName))
+      // failure to match
+      return make_error_code(llvm::errc::no_such_file_or_directory);
 
-  if (Start == End) {
-    // Match!
-    return From;
+    ++Start;
+
+    if (Start == End) {
+      // Match!
+      return From;
+    }
   }
 
   auto *DE = dyn_cast<RedirectingDirectoryEntry>(From);
@@ -1322,7 +1539,7 @@ class FileWithFixedStatus : public File {
 
 public:
   FileWithFixedStatus(std::unique_ptr<File> InnerFile, Status S)
-      : InnerFile(std::move(InnerFile)), S(S) {}
+      : InnerFile(std::move(InnerFile)), S(std::move(S)) {}
 
   ErrorOr<Status> status() override { return S; }
   ErrorOr<std::unique_ptr<llvm::MemoryBuffer>>
@@ -1362,10 +1579,13 @@ RedirectingFileSystem::openFileForRead(const Twine &Path) {
 
 IntrusiveRefCntPtr<FileSystem>
 vfs::getVFSFromYAML(std::unique_ptr<MemoryBuffer> Buffer,
-                    SourceMgr::DiagHandlerTy DiagHandler, void *DiagContext,
+                    SourceMgr::DiagHandlerTy DiagHandler,
+                    StringRef YAMLFilePath,
+                    void *DiagContext,
                     IntrusiveRefCntPtr<FileSystem> ExternalFS) {
   return RedirectingFileSystem::create(std::move(Buffer), DiagHandler,
-                                       DiagContext, ExternalFS);
+                                       YAMLFilePath, DiagContext,
+                                       std::move(ExternalFS));
 }
 
 UniqueID vfs::getNextVirtualUniqueID() {
@@ -1376,16 +1596,6 @@ UniqueID vfs::getNextVirtualUniqueID() {
   return UniqueID(std::numeric_limits<uint64_t>::max(), ID);
 }
 
-#ifndef NDEBUG
-static bool pathHasTraversal(StringRef Path) {
-  using namespace llvm::sys;
-  for (StringRef Comp : llvm::make_range(path::begin(Path), path::end(Path)))
-    if (Comp == "." || Comp == "..")
-      return true;
-  return false;
-}
-#endif
-
 void YAMLVFSWriter::addFileMapping(StringRef VirtualPath, StringRef RealPath) {
   assert(sys::path::is_absolute(VirtualPath) && "virtual path not absolute");
   assert(sys::path::is_absolute(RealPath) && "real path not absolute");
@@ -1407,7 +1617,9 @@ class JSONWriter {
 
 public:
   JSONWriter(llvm::raw_ostream &OS) : OS(OS) {}
-  void write(ArrayRef<YAMLVFSEntry> Entries, Optional<bool> IsCaseSensitive);
+  void write(ArrayRef<YAMLVFSEntry> Entries, Optional<bool> UseExternalNames,
+             Optional<bool> IsCaseSensitive, Optional<bool> IsOverlayRelative,
+             StringRef OverlayDir);
 };
 }
 
@@ -1460,7 +1672,10 @@ void JSONWriter::writeEntry(StringRef VPath, StringRef RPath) {
 }
 
 void JSONWriter::write(ArrayRef<YAMLVFSEntry> Entries,
-                       Optional<bool> IsCaseSensitive) {
+                       Optional<bool> UseExternalNames,
+                       Optional<bool> IsCaseSensitive,
+                       Optional<bool> IsOverlayRelative,
+                       StringRef OverlayDir) {
   using namespace llvm::sys;
 
   OS << "{\n"
@@ -1468,12 +1683,30 @@ void JSONWriter::write(ArrayRef<YAMLVFSEntry> Entries,
   if (IsCaseSensitive.hasValue())
     OS << "  'case-sensitive': '"
        << (IsCaseSensitive.getValue() ? "true" : "false") << "',\n";
+  if (UseExternalNames.hasValue())
+    OS << "  'use-external-names': '"
+       << (UseExternalNames.getValue() ? "true" : "false") << "',\n";
+  bool UseOverlayRelative = false;
+  if (IsOverlayRelative.hasValue()) {
+    UseOverlayRelative = IsOverlayRelative.getValue();
+    OS << "  'overlay-relative': '"
+       << (UseOverlayRelative ? "true" : "false") << "',\n";
+  }
   OS << "  'roots': [\n";
 
   if (!Entries.empty()) {
     const YAMLVFSEntry &Entry = Entries.front();
     startDirectory(path::parent_path(Entry.VPath));
-    writeEntry(path::filename(Entry.VPath), Entry.RPath);
+
+    StringRef RPath = Entry.RPath;
+    if (UseOverlayRelative) {
+      unsigned OverlayDirLen = OverlayDir.size();
+      assert(RPath.substr(0, OverlayDirLen) == OverlayDir &&
+             "Overlay dir must be contained in RPath");
+      RPath = RPath.slice(OverlayDirLen, RPath.size());
+    }
+
+    writeEntry(path::filename(Entry.VPath), RPath);
 
     for (const auto &Entry : Entries.slice(1)) {
       StringRef Dir = path::parent_path(Entry.VPath);
@@ -1487,7 +1720,14 @@ void JSONWriter::write(ArrayRef<YAMLVFSEntry> Entries,
         OS << ",\n";
         startDirectory(Dir);
       }
-      writeEntry(path::filename(Entry.VPath), Entry.RPath);
+      StringRef RPath = Entry.RPath;
+      if (UseOverlayRelative) {
+        unsigned OverlayDirLen = OverlayDir.size();
+        assert(RPath.substr(0, OverlayDirLen) == OverlayDir &&
+               "Overlay dir must be contained in RPath");
+        RPath = RPath.slice(OverlayDirLen, RPath.size());
+      }
+      writeEntry(path::filename(Entry.VPath), RPath);
     }
 
     while (!DirStack.empty()) {
@@ -1507,7 +1747,8 @@ void YAMLVFSWriter::write(llvm::raw_ostream &OS) {
     return LHS.VPath < RHS.VPath;
   });
 
-  JSONWriter(OS).write(Mappings, IsCaseSensitive);
+  JSONWriter(OS).write(Mappings, UseExternalNames, IsCaseSensitive,
+                       IsOverlayRelative, OverlayDir);
 }
 
 VFSFromYamlDirIterImpl::VFSFromYamlDirIterImpl(
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/ABIInfo.h b/contrib/llvm/tools/clang/lib/CodeGen/ABIInfo.h
index a65f27085616..530a7ef560c5 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/ABIInfo.h
+++ b/contrib/llvm/tools/clang/lib/CodeGen/ABIInfo.h
@@ -18,20 +18,25 @@ namespace llvm {
   class Value;
   class LLVMContext;
   class DataLayout;
+  class Type;
 }
 
 namespace clang {
   class ASTContext;
   class TargetInfo;
 
-  namespace CodeGen {
-    class ABIArgInfo;
-    class Address;
-    class CGCXXABI;
-    class CGFunctionInfo;
-    class CodeGenFunction;
-    class CodeGenTypes;
-  }
+namespace CodeGen {
+  class ABIArgInfo;
+  class Address;
+  class CGCXXABI;
+  class CGFunctionInfo;
+  class CodeGenFunction;
+  class CodeGenTypes;
+  class SwiftABIInfo;
+
+namespace swiftcall {
+  class SwiftAggLowering;
+}
 
   // FIXME: All of this stuff should be part of the target interface
   // somehow. It is currently here because it is not clear how to factor
@@ -55,6 +60,8 @@ namespace clang {
 
     virtual ~ABIInfo();
 
+    virtual bool supportsSwift() const { return false; }
+
     CodeGen::CGCXXABI &getCXXABI() const;
     ASTContext &getContext() const;
     llvm::LLVMContext &getVMContext() const;
@@ -85,6 +92,8 @@ namespace clang {
                                        CodeGen::Address VAListAddr,
                                        QualType Ty) const = 0;
 
+    bool isAndroid() const;
+
     /// Emit the target dependent code to load a value of
     /// \arg Ty from the \c __builtin_ms_va_list pointed to by \arg VAListAddr.
     virtual CodeGen::Address EmitMSVAArg(CodeGen::CodeGenFunction &CGF,
@@ -110,7 +119,35 @@ namespace clang {
 
     CodeGen::ABIArgInfo
     getNaturalAlignIndirectInReg(QualType Ty, bool Realign = false) const;
+
+
+  };
+
+  /// A refining implementation of ABIInfo for targets that support swiftcall.
+  ///
+  /// If we find ourselves wanting multiple such refinements, they'll probably
+  /// be independent refinements, and we should probably find another way
+  /// to do it than simple inheritance.
+  class SwiftABIInfo : public ABIInfo {
+  public:
+    SwiftABIInfo(CodeGen::CodeGenTypes &cgt) : ABIInfo(cgt) {}
+
+    bool supportsSwift() const final override { return true; }
+
+    virtual bool shouldPassIndirectlyForSwift(CharUnits totalSize,
+                                              ArrayRef<llvm::Type*> types,
+                                              bool asReturnValue) const = 0;
+
+    virtual bool isLegalVectorTypeForSwift(CharUnits totalSize,
+                                           llvm::Type *eltTy,
+                                           unsigned elts) const;
+
+    static bool classof(const ABIInfo *info) {
+      return info->supportsSwift();
+    }
   };
+
+}  // end namespace CodeGen
 }  // end namespace clang
 
 #endif
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/BackendUtil.cpp b/contrib/llvm/tools/clang/lib/CodeGen/BackendUtil.cpp
index 6d746c25eed1..165b6dd55c9b 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/BackendUtil.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/BackendUtil.cpp
@@ -16,19 +16,21 @@
 #include "clang/Frontend/Utils.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/Triple.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/Bitcode/BitcodeWriterPass.h"
+#include "llvm/Bitcode/ReaderWriter.h"
 #include "llvm/CodeGen/RegAllocRegistry.h"
 #include "llvm/CodeGen/SchedulerRegistry.h"
 #include "llvm/IR/DataLayout.h"
-#include "llvm/IR/FunctionInfo.h"
+#include "llvm/IR/ModuleSummaryIndex.h"
 #include "llvm/IR/IRPrintingPasses.h"
 #include "llvm/IR/LegacyPassManager.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/Verifier.h"
 #include "llvm/MC/SubtargetFeature.h"
-#include "llvm/Object/FunctionIndexObjectFile.h"
+#include "llvm/Object/ModuleSummaryIndexObjectFile.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/PrettyStackTrace.h"
 #include "llvm/Support/TargetRegistry.h"
@@ -42,6 +44,7 @@
 #include "llvm/Transforms/Instrumentation.h"
 #include "llvm/Transforms/ObjCARC.h"
 #include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Scalar/GVN.h"
 #include "llvm/Transforms/Utils/SymbolRewriter.h"
 #include <memory>
 using namespace clang;
@@ -58,9 +61,7 @@ class EmitAssemblyHelper {
 
   Timer CodeGenerationTime;
 
-  mutable legacy::PassManager *CodeGenPasses;
-  mutable legacy::PassManager *PerModulePasses;
-  mutable legacy::FunctionPassManager *PerFunctionPasses;
+  std::unique_ptr<raw_pwrite_stream> OS;
 
 private:
   TargetIRAnalysis getTargetIRAnalysis() const {
@@ -70,70 +71,44 @@ private:
     return TargetIRAnalysis();
   }
 
-  legacy::PassManager *getCodeGenPasses() const {
-    if (!CodeGenPasses) {
-      CodeGenPasses = new legacy::PassManager();
-      CodeGenPasses->add(
-          createTargetTransformInfoWrapperPass(getTargetIRAnalysis()));
-    }
-    return CodeGenPasses;
-  }
-
-  legacy::PassManager *getPerModulePasses() const {
-    if (!PerModulePasses) {
-      PerModulePasses = new legacy::PassManager();
-      PerModulePasses->add(
-          createTargetTransformInfoWrapperPass(getTargetIRAnalysis()));
-    }
-    return PerModulePasses;
-  }
-
-  legacy::FunctionPassManager *getPerFunctionPasses() const {
-    if (!PerFunctionPasses) {
-      PerFunctionPasses = new legacy::FunctionPassManager(TheModule);
-      PerFunctionPasses->add(
-          createTargetTransformInfoWrapperPass(getTargetIRAnalysis()));
-    }
-    return PerFunctionPasses;
-  }
+  /// Set LLVM command line options passed through -backend-option.
+  void setCommandLineOpts();
 
-  void CreatePasses(FunctionInfoIndex *FunctionIndex);
+  void CreatePasses(legacy::PassManager &MPM, legacy::FunctionPassManager &FPM,
+                    ModuleSummaryIndex *ModuleSummary);
 
   /// Generates the TargetMachine.
-  /// Returns Null if it is unable to create the target machine.
+  /// Leaves TM unchanged if it is unable to create the target machine.
   /// Some of our clang tests specify triples which are not built
   /// into clang. This is okay because these tests check the generated
   /// IR, and they require DataLayout which depends on the triple.
   /// In this case, we allow this method to fail and not report an error.
   /// When MustCreateTM is used, we print an error if we are unable to load
   /// the requested target.
-  TargetMachine *CreateTargetMachine(bool MustCreateTM);
+  void CreateTargetMachine(bool MustCreateTM);
 
   /// Add passes necessary to emit assembly or LLVM IR.
   ///
   /// \return True on success.
-  bool AddEmitPasses(BackendAction Action, raw_pwrite_stream &OS);
+  bool AddEmitPasses(legacy::PassManager &CodeGenPasses, BackendAction Action,
+                     raw_pwrite_stream &OS);
 
 public:
   EmitAssemblyHelper(DiagnosticsEngine &_Diags, const CodeGenOptions &CGOpts,
                      const clang::TargetOptions &TOpts,
                      const LangOptions &LOpts, Module *M)
       : Diags(_Diags), CodeGenOpts(CGOpts), TargetOpts(TOpts), LangOpts(LOpts),
-        TheModule(M), CodeGenerationTime("Code Generation Time"),
-        CodeGenPasses(nullptr), PerModulePasses(nullptr),
-        PerFunctionPasses(nullptr) {}
+        TheModule(M), CodeGenerationTime("Code Generation Time") {}
 
   ~EmitAssemblyHelper() {
-    delete CodeGenPasses;
-    delete PerModulePasses;
-    delete PerFunctionPasses;
     if (CodeGenOpts.DisableFree)
       BuryPointer(std::move(TM));
   }
 
   std::unique_ptr<TargetMachine> TM;
 
-  void EmitAssembly(BackendAction Action, raw_pwrite_stream *OS);
+  void EmitAssembly(BackendAction Action,
+                    std::unique_ptr<raw_pwrite_stream> OS);
 };
 
 // We need this wrapper to access LangOpts and CGOpts from extension functions
@@ -172,8 +147,19 @@ static void addAddDiscriminatorsPass(const PassManagerBuilder &Builder,
   PM.add(createAddDiscriminatorsPass());
 }
 
+static void addCleanupPassesForSampleProfiler(
+    const PassManagerBuilder &Builder, legacy::PassManagerBase &PM) {
+  // instcombine is needed before sample profile annotation because it converts
+  // certain function calls to be inlinable. simplifycfg and sroa are needed
+  // before instcombine for necessary preparation. E.g. load store is eliminated
+  // properly so that instcombine will not introduce unecessary liverange.
+  PM.add(createCFGSimplificationPass());
+  PM.add(createSROAPass());
+  PM.add(createInstructionCombiningPass());
+}
+
 static void addBoundsCheckingPass(const PassManagerBuilder &Builder,
-                                    legacy::PassManagerBase &PM) {
+                                  legacy::PassManagerBase &PM) {
   PM.add(createBoundsCheckingPass());
 }
 
@@ -189,6 +175,7 @@ static void addSanitizerCoveragePass(const PassManagerBuilder &Builder,
   Opts.TraceBB = CGOpts.SanitizeCoverageTraceBB;
   Opts.TraceCmp = CGOpts.SanitizeCoverageTraceCmp;
   Opts.Use8bitCounters = CGOpts.SanitizeCoverage8bitCounters;
+  Opts.TracePC = CGOpts.SanitizeCoverageTracePC;
   PM.add(createSanitizerCoverageModulePass(Opts));
 }
 
@@ -198,14 +185,17 @@ static void addAddressSanitizerPasses(const PassManagerBuilder &Builder,
       static_cast<const PassManagerBuilderWrapper&>(Builder);
   const CodeGenOptions &CGOpts = BuilderWrapper.getCGOpts();
   bool Recover = CGOpts.SanitizeRecover.has(SanitizerKind::Address);
-  PM.add(createAddressSanitizerFunctionPass(/*CompileKernel*/false, Recover));
+  bool UseAfterScope = CGOpts.SanitizeAddressUseAfterScope;
+  PM.add(createAddressSanitizerFunctionPass(/*CompileKernel*/ false, Recover,
+                                            UseAfterScope));
   PM.add(createAddressSanitizerModulePass(/*CompileKernel*/false, Recover));
 }
 
 static void addKernelAddressSanitizerPasses(const PassManagerBuilder &Builder,
                                             legacy::PassManagerBase &PM) {
-  PM.add(createAddressSanitizerFunctionPass(/*CompileKernel*/true,
-                                            /*Recover*/true));
+  PM.add(createAddressSanitizerFunctionPass(
+      /*CompileKernel*/ true,
+      /*Recover*/ true, /*UseAfterScope*/ false));
   PM.add(createAddressSanitizerModulePass(/*CompileKernel*/true,
                                           /*Recover*/true));
 }
@@ -243,6 +233,19 @@ static void addDataFlowSanitizerPass(const PassManagerBuilder &Builder,
   PM.add(createDataFlowSanitizerPass(LangOpts.SanitizerBlacklistFiles));
 }
 
+static void addEfficiencySanitizerPass(const PassManagerBuilder &Builder,
+                                       legacy::PassManagerBase &PM) {
+  const PassManagerBuilderWrapper &BuilderWrapper =
+      static_cast<const PassManagerBuilderWrapper&>(Builder);
+  const LangOptions &LangOpts = BuilderWrapper.getLangOpts();
+  EfficiencySanitizerOptions Opts;
+  if (LangOpts.Sanitize.has(SanitizerKind::EfficiencyCacheFrag))
+    Opts.ToolType = EfficiencySanitizerOptions::ESAN_CacheFrag;
+  else if (LangOpts.Sanitize.has(SanitizerKind::EfficiencyWorkingSet))
+    Opts.ToolType = EfficiencySanitizerOptions::ESAN_WorkingSet;
+  PM.add(createEfficiencySanitizerPass(Opts));
+}
+
 static TargetLibraryInfoImpl *createTLII(llvm::Triple &TargetTriple,
                                          const CodeGenOptions &CodeGenOpts) {
   TargetLibraryInfoImpl *TLII = new TargetLibraryInfoImpl(TargetTriple);
@@ -277,7 +280,9 @@ static void addSymbolRewriterPass(const CodeGenOptions &Opts,
   MPM->add(createRewriteSymbolsPass(DL));
 }
 
-void EmitAssemblyHelper::CreatePasses(FunctionInfoIndex *FunctionIndex) {
+void EmitAssemblyHelper::CreatePasses(legacy::PassManager &MPM,
+                                      legacy::FunctionPassManager &FPM,
+                                      ModuleSummaryIndex *ModuleSummary) {
   if (CodeGenOpts.DisableLLVMPasses)
     return;
 
@@ -300,7 +305,8 @@ void EmitAssemblyHelper::CreatePasses(FunctionInfoIndex *FunctionIndex) {
   switch (Inlining) {
   case CodeGenOptions::NoInlining:
     break;
-  case CodeGenOptions::NormalInlining: {
+  case CodeGenOptions::NormalInlining:
+  case CodeGenOptions::OnlyHintInlining: {
     PMBuilder.Inliner =
         createFunctionInliningPass(OptLevel, CodeGenOpts.OptimizeSize);
     break;
@@ -321,22 +327,28 @@ void EmitAssemblyHelper::CreatePasses(FunctionInfoIndex *FunctionIndex) {
   PMBuilder.SLPVectorize = CodeGenOpts.VectorizeSLP;
   PMBuilder.LoopVectorize = CodeGenOpts.VectorizeLoop;
 
-  PMBuilder.DisableUnitAtATime = !CodeGenOpts.UnitAtATime;
   PMBuilder.DisableUnrollLoops = !CodeGenOpts.UnrollLoops;
   PMBuilder.MergeFunctions = CodeGenOpts.MergeFunctions;
+  PMBuilder.PrepareForThinLTO = CodeGenOpts.EmitSummaryIndex;
   PMBuilder.PrepareForLTO = CodeGenOpts.PrepareForLTO;
   PMBuilder.RerollLoops = CodeGenOpts.RerollLoops;
 
-  legacy::PassManager *MPM = getPerModulePasses();
-
   // If we are performing a ThinLTO importing compile, invoke the LTO
-  // pipeline and pass down the in-memory function index.
-  if (FunctionIndex) {
-    PMBuilder.FunctionIndex = FunctionIndex;
-    PMBuilder.populateLTOPassManager(*MPM);
+  // pipeline and pass down the in-memory module summary index.
+  if (ModuleSummary) {
+    PMBuilder.ModuleSummary = ModuleSummary;
+    PMBuilder.populateThinLTOPassManager(MPM);
     return;
   }
 
+  // Add target-specific passes that need to run as early as possible.
+  if (TM)
+    PMBuilder.addExtension(
+        PassManagerBuilder::EP_EarlyAsPossible,
+        [&](const PassManagerBuilder &, legacy::PassManagerBase &PM) {
+          TM->addEarlyAsPossiblePasses(PM);
+        });
+
   PMBuilder.addExtension(PassManagerBuilder::EP_EarlyAsPossible,
                          addAddDiscriminatorsPass);
 
@@ -401,15 +413,20 @@ void EmitAssemblyHelper::CreatePasses(FunctionInfoIndex *FunctionIndex) {
                            addDataFlowSanitizerPass);
   }
 
+  if (LangOpts.Sanitize.hasOneOf(SanitizerKind::Efficiency)) {
+    PMBuilder.addExtension(PassManagerBuilder::EP_OptimizerLast,
+                           addEfficiencySanitizerPass);
+    PMBuilder.addExtension(PassManagerBuilder::EP_EnabledOnOptLevel0,
+                           addEfficiencySanitizerPass);
+  }
+
   // Set up the per-function pass manager.
-  legacy::FunctionPassManager *FPM = getPerFunctionPasses();
   if (CodeGenOpts.VerifyModule)
-    FPM->add(createVerifierPass());
-  PMBuilder.populateFunctionPassManager(*FPM);
+    FPM.add(createVerifierPass());
 
   // Set up the per-module pass manager.
   if (!CodeGenOpts.RewriteMapFiles.empty())
-    addSymbolRewriterPass(CodeGenOpts, MPM);
+    addSymbolRewriterPass(CodeGenOpts, &MPM);
 
   if (!CodeGenOpts.DisableGCov &&
       (CodeGenOpts.EmitGcovArcs || CodeGenOpts.EmitGcovNotes)) {
@@ -424,25 +441,56 @@ void EmitAssemblyHelper::CreatePasses(FunctionInfoIndex *FunctionIndex) {
     Options.FunctionNamesInData =
         !CodeGenOpts.CoverageNoFunctionNamesInData;
     Options.ExitBlockBeforeBody = CodeGenOpts.CoverageExitBlockBeforeBody;
-    MPM->add(createGCOVProfilerPass(Options));
-    if (CodeGenOpts.getDebugInfo() == CodeGenOptions::NoDebugInfo)
-      MPM->add(createStripSymbolsPass(true));
+    MPM.add(createGCOVProfilerPass(Options));
+    if (CodeGenOpts.getDebugInfo() == codegenoptions::NoDebugInfo)
+      MPM.add(createStripSymbolsPass(true));
   }
 
-  if (CodeGenOpts.ProfileInstrGenerate) {
+  if (CodeGenOpts.hasProfileClangInstr()) {
     InstrProfOptions Options;
     Options.NoRedZone = CodeGenOpts.DisableRedZone;
     Options.InstrProfileOutput = CodeGenOpts.InstrProfileOutput;
-    MPM->add(createInstrProfilingPass(Options));
+    MPM.add(createInstrProfilingLegacyPass(Options));
+  }
+  if (CodeGenOpts.hasProfileIRInstr()) {
+    if (!CodeGenOpts.InstrProfileOutput.empty())
+      PMBuilder.PGOInstrGen = CodeGenOpts.InstrProfileOutput;
+    else
+      PMBuilder.PGOInstrGen = "default.profraw";
+  }
+  if (CodeGenOpts.hasProfileIRUse())
+    PMBuilder.PGOInstrUse = CodeGenOpts.ProfileInstrumentUsePath;
+
+  if (!CodeGenOpts.SampleProfileFile.empty()) {
+    MPM.add(createPruneEHPass());
+    MPM.add(createSampleProfileLoaderPass(CodeGenOpts.SampleProfileFile));
+    PMBuilder.addExtension(PassManagerBuilder::EP_EarlyAsPossible,
+                           addCleanupPassesForSampleProfiler);
   }
 
-  if (!CodeGenOpts.SampleProfileFile.empty())
-    MPM->add(createSampleProfileLoaderPass(CodeGenOpts.SampleProfileFile));
+  PMBuilder.populateFunctionPassManager(FPM);
+  PMBuilder.populateModulePassManager(MPM);
+}
 
-  PMBuilder.populateModulePassManager(*MPM);
+void EmitAssemblyHelper::setCommandLineOpts() {
+  SmallVector<const char *, 16> BackendArgs;
+  BackendArgs.push_back("clang"); // Fake program name.
+  if (!CodeGenOpts.DebugPass.empty()) {
+    BackendArgs.push_back("-debug-pass");
+    BackendArgs.push_back(CodeGenOpts.DebugPass.c_str());
+  }
+  if (!CodeGenOpts.LimitFloatPrecision.empty()) {
+    BackendArgs.push_back("-limit-float-precision");
+    BackendArgs.push_back(CodeGenOpts.LimitFloatPrecision.c_str());
+  }
+  for (const std::string &BackendOption : CodeGenOpts.BackendOptions)
+    BackendArgs.push_back(BackendOption.c_str());
+  BackendArgs.push_back(nullptr);
+  llvm::cl::ParseCommandLineOptions(BackendArgs.size() - 1,
+                                    BackendArgs.data());
 }
 
-TargetMachine *EmitAssemblyHelper::CreateTargetMachine(bool MustCreateTM) {
+void EmitAssemblyHelper::CreateTargetMachine(bool MustCreateTM) {
   // Create the TargetMachine for generating code.
   std::string Error;
   std::string Triple = TheModule->getTargetTriple();
@@ -450,7 +498,7 @@ TargetMachine *EmitAssemblyHelper::CreateTargetMachine(bool MustCreateTM) {
   if (!TheTarget) {
     if (MustCreateTM)
       Diags.Report(diag::err_fe_unable_to_create_target) << Error;
-    return nullptr;
+    return;
   }
 
   unsigned CodeModel =
@@ -464,27 +512,11 @@ TargetMachine *EmitAssemblyHelper::CreateTargetMachine(bool MustCreateTM) {
   assert(CodeModel != ~0u && "invalid code model!");
   llvm::CodeModel::Model CM = static_cast<llvm::CodeModel::Model>(CodeModel);
 
-  SmallVector<const char *, 16> BackendArgs;
-  BackendArgs.push_back("clang"); // Fake program name.
-  if (!CodeGenOpts.DebugPass.empty()) {
-    BackendArgs.push_back("-debug-pass");
-    BackendArgs.push_back(CodeGenOpts.DebugPass.c_str());
-  }
-  if (!CodeGenOpts.LimitFloatPrecision.empty()) {
-    BackendArgs.push_back("-limit-float-precision");
-    BackendArgs.push_back(CodeGenOpts.LimitFloatPrecision.c_str());
-  }
-  for (const std::string &BackendOption : CodeGenOpts.BackendOptions)
-    BackendArgs.push_back(BackendOption.c_str());
-  BackendArgs.push_back(nullptr);
-  llvm::cl::ParseCommandLineOptions(BackendArgs.size() - 1,
-                                    BackendArgs.data());
-
   std::string FeaturesStr =
       llvm::join(TargetOpts.Features.begin(), TargetOpts.Features.end(), ",");
 
   // Keep this synced with the equivalent code in tools/driver/cc1as_main.cpp.
-  llvm::Reloc::Model RM = llvm::Reloc::Default;
+  llvm::Optional<llvm::Reloc::Model> RM;
   if (CodeGenOpts.RelocationModel == "static") {
     RM = llvm::Reloc::Static;
   } else if (CodeGenOpts.RelocationModel == "pic") {
@@ -539,38 +571,29 @@ TargetMachine *EmitAssemblyHelper::CreateTargetMachine(bool MustCreateTM) {
   Options.UseInitArray = CodeGenOpts.UseInitArray;
   Options.DisableIntegratedAS = CodeGenOpts.DisableIntegratedAS;
   Options.CompressDebugSections = CodeGenOpts.CompressDebugSections;
+  Options.RelaxELFRelocations = CodeGenOpts.RelaxELFRelocations;
 
   // Set EABI version.
-  Options.EABIVersion = llvm::StringSwitch<llvm::EABI>(CodeGenOpts.EABIVersion)
+  Options.EABIVersion = llvm::StringSwitch<llvm::EABI>(TargetOpts.EABIVersion)
                             .Case("4", llvm::EABI::EABI4)
                             .Case("5", llvm::EABI::EABI5)
                             .Case("gnu", llvm::EABI::GNU)
                             .Default(llvm::EABI::Default);
 
+  if (LangOpts.SjLjExceptions)
+    Options.ExceptionModel = llvm::ExceptionHandling::SjLj;
+
   Options.LessPreciseFPMADOption = CodeGenOpts.LessPreciseFPMAD;
   Options.NoInfsFPMath = CodeGenOpts.NoInfsFPMath;
   Options.NoNaNsFPMath = CodeGenOpts.NoNaNsFPMath;
   Options.NoZerosInBSS = CodeGenOpts.NoZeroInitializedInBSS;
   Options.UnsafeFPMath = CodeGenOpts.UnsafeFPMath;
   Options.StackAlignmentOverride = CodeGenOpts.StackAlignment;
-  Options.PositionIndependentExecutable = LangOpts.PIELevel != 0;
   Options.FunctionSections = CodeGenOpts.FunctionSections;
   Options.DataSections = CodeGenOpts.DataSections;
   Options.UniqueSectionNames = CodeGenOpts.UniqueSectionNames;
   Options.EmulatedTLS = CodeGenOpts.EmulatedTLS;
-  switch (CodeGenOpts.getDebuggerTuning()) {
-  case CodeGenOptions::DebuggerKindGDB:
-    Options.DebuggerTuning = llvm::DebuggerKind::GDB;
-    break;
-  case CodeGenOptions::DebuggerKindLLDB:
-    Options.DebuggerTuning = llvm::DebuggerKind::LLDB;
-    break;
-  case CodeGenOptions::DebuggerKindSCE:
-    Options.DebuggerTuning = llvm::DebuggerKind::SCE;
-    break;
-  default:
-    break;
-  }
+  Options.DebuggerTuning = CodeGenOpts.getDebuggerTuning();
 
   Options.MCOptions.MCRelaxAll = CodeGenOpts.RelaxAll;
   Options.MCOptions.MCSaveTempLabels = CodeGenOpts.SaveTempLabels;
@@ -582,24 +605,18 @@ TargetMachine *EmitAssemblyHelper::CreateTargetMachine(bool MustCreateTM) {
   Options.MCOptions.AsmVerbose = CodeGenOpts.AsmVerbose;
   Options.MCOptions.ABIName = TargetOpts.ABI;
 
-  TargetMachine *TM = TheTarget->createTargetMachine(Triple, TargetOpts.CPU,
-                                                     FeaturesStr, Options,
-                                                     RM, CM, OptLevel);
-
-  return TM;
+  TM.reset(TheTarget->createTargetMachine(Triple, TargetOpts.CPU, FeaturesStr,
+                                          Options, RM, CM, OptLevel));
 }
 
-bool EmitAssemblyHelper::AddEmitPasses(BackendAction Action,
+bool EmitAssemblyHelper::AddEmitPasses(legacy::PassManager &CodeGenPasses,
+                                       BackendAction Action,
                                        raw_pwrite_stream &OS) {
-
-  // Create the code generator passes.
-  legacy::PassManager *PM = getCodeGenPasses();
-
   // Add LibraryInfo.
   llvm::Triple TargetTriple(TheModule->getTargetTriple());
   std::unique_ptr<TargetLibraryInfoImpl> TLII(
       createTLII(TargetTriple, CodeGenOpts));
-  PM->add(new TargetLibraryInfoWrapperPass(*TLII));
+  CodeGenPasses.add(new TargetLibraryInfoWrapperPass(*TLII));
 
   // Normal mode, emit a .s or .o file by running the code generator. Note,
   // this also adds codegenerator level optimization passes.
@@ -615,9 +632,9 @@ bool EmitAssemblyHelper::AddEmitPasses(BackendAction Action,
   // "codegen" passes so that it isn't run multiple times when there is
   // inlining happening.
   if (CodeGenOpts.OptimizationLevel > 0)
-    PM->add(createObjCARCContractPass());
+    CodeGenPasses.add(createObjCARCContractPass());
 
-  if (TM->addPassesToEmitFile(*PM, OS, CGFT,
+  if (TM->addPassesToEmitFile(CodeGenPasses, OS, CGFT,
                               /*DisableVerify=*/!CodeGenOpts.VerifyModule)) {
     Diags.Report(diag::err_fe_unable_to_interface_with_target);
     return false;
@@ -627,14 +644,15 @@ bool EmitAssemblyHelper::AddEmitPasses(BackendAction Action,
 }
 
 void EmitAssemblyHelper::EmitAssembly(BackendAction Action,
-                                      raw_pwrite_stream *OS) {
+                                      std::unique_ptr<raw_pwrite_stream> OS) {
   TimeRegion Region(llvm::TimePassesIsEnabled ? &CodeGenerationTime : nullptr);
 
+  setCommandLineOpts();
+
   bool UsesCodeGen = (Action != Backend_EmitNothing &&
                       Action != Backend_EmitBC &&
                       Action != Backend_EmitLL);
-  if (!TM)
-    TM.reset(CreateTargetMachine(UsesCodeGen));
+  CreateTargetMachine(UsesCodeGen);
 
   if (UsesCodeGen && !TM)
     return;
@@ -644,41 +662,54 @@ void EmitAssemblyHelper::EmitAssembly(BackendAction Action,
   // If we are performing a ThinLTO importing compile, load the function
   // index into memory and pass it into CreatePasses, which will add it
   // to the PassManagerBuilder and invoke LTO passes.
-  std::unique_ptr<FunctionInfoIndex> FunctionIndex;
+  std::unique_ptr<ModuleSummaryIndex> ModuleSummary;
   if (!CodeGenOpts.ThinLTOIndexFile.empty()) {
-    ErrorOr<std::unique_ptr<FunctionInfoIndex>> IndexOrErr =
-        llvm::getFunctionIndexForFile(CodeGenOpts.ThinLTOIndexFile,
-                                      [&](const DiagnosticInfo &DI) {
-                                        TheModule->getContext().diagnose(DI);
-                                      });
+    ErrorOr<std::unique_ptr<ModuleSummaryIndex>> IndexOrErr =
+        llvm::getModuleSummaryIndexForFile(
+            CodeGenOpts.ThinLTOIndexFile, [&](const DiagnosticInfo &DI) {
+              TheModule->getContext().diagnose(DI);
+            });
     if (std::error_code EC = IndexOrErr.getError()) {
       std::string Error = EC.message();
       errs() << "Error loading index file '" << CodeGenOpts.ThinLTOIndexFile
              << "': " << Error << "\n";
       return;
     }
-    FunctionIndex = std::move(IndexOrErr.get());
-    assert(FunctionIndex && "Expected non-empty function index");
+    ModuleSummary = std::move(IndexOrErr.get());
+    assert(ModuleSummary && "Expected non-empty module summary index");
   }
 
-  CreatePasses(FunctionIndex.get());
+  legacy::PassManager PerModulePasses;
+  PerModulePasses.add(
+      createTargetTransformInfoWrapperPass(getTargetIRAnalysis()));
+
+  legacy::FunctionPassManager PerFunctionPasses(TheModule);
+  PerFunctionPasses.add(
+      createTargetTransformInfoWrapperPass(getTargetIRAnalysis()));
+
+  CreatePasses(PerModulePasses, PerFunctionPasses, ModuleSummary.get());
+
+  legacy::PassManager CodeGenPasses;
+  CodeGenPasses.add(
+      createTargetTransformInfoWrapperPass(getTargetIRAnalysis()));
 
   switch (Action) {
   case Backend_EmitNothing:
     break;
 
   case Backend_EmitBC:
-    getPerModulePasses()->add(createBitcodeWriterPass(
-        *OS, CodeGenOpts.EmitLLVMUseLists, CodeGenOpts.EmitFunctionSummary));
+    PerModulePasses.add(createBitcodeWriterPass(
+        *OS, CodeGenOpts.EmitLLVMUseLists, CodeGenOpts.EmitSummaryIndex,
+        CodeGenOpts.EmitSummaryIndex));
     break;
 
   case Backend_EmitLL:
-    getPerModulePasses()->add(
+    PerModulePasses.add(
         createPrintModulePass(*OS, "", CodeGenOpts.EmitLLVMUseLists));
     break;
 
   default:
-    if (!AddEmitPasses(Action, *OS))
+    if (!AddEmitPasses(CodeGenPasses, Action, *OS))
       return;
   }
 
@@ -688,46 +719,165 @@ void EmitAssemblyHelper::EmitAssembly(BackendAction Action,
   // Run passes. For now we do all passes at once, but eventually we
   // would like to have the option of streaming code generation.
 
-  if (PerFunctionPasses) {
+  {
     PrettyStackTraceString CrashInfo("Per-function optimization");
 
-    PerFunctionPasses->doInitialization();
+    PerFunctionPasses.doInitialization();
     for (Function &F : *TheModule)
       if (!F.isDeclaration())
-        PerFunctionPasses->run(F);
-    PerFunctionPasses->doFinalization();
+        PerFunctionPasses.run(F);
+    PerFunctionPasses.doFinalization();
   }
 
-  if (PerModulePasses) {
+  {
     PrettyStackTraceString CrashInfo("Per-module optimization passes");
-    PerModulePasses->run(*TheModule);
+    PerModulePasses.run(*TheModule);
   }
 
-  if (CodeGenPasses) {
+  {
     PrettyStackTraceString CrashInfo("Code generation");
-    CodeGenPasses->run(*TheModule);
+    CodeGenPasses.run(*TheModule);
   }
 }
 
 void clang::EmitBackendOutput(DiagnosticsEngine &Diags,
                               const CodeGenOptions &CGOpts,
                               const clang::TargetOptions &TOpts,
-                              const LangOptions &LOpts, StringRef TDesc,
+                              const LangOptions &LOpts, const llvm::DataLayout &TDesc,
                               Module *M, BackendAction Action,
-                              raw_pwrite_stream *OS) {
+                              std::unique_ptr<raw_pwrite_stream> OS) {
   EmitAssemblyHelper AsmHelper(Diags, CGOpts, TOpts, LOpts, M);
 
-  AsmHelper.EmitAssembly(Action, OS);
+  AsmHelper.EmitAssembly(Action, std::move(OS));
 
-  // If an optional clang TargetInfo description string was passed in, use it to
-  // verify the LLVM TargetMachine's DataLayout.
-  if (AsmHelper.TM && !TDesc.empty()) {
+  // Verify clang's TargetInfo DataLayout against the LLVM TargetMachine's
+  // DataLayout.
+  if (AsmHelper.TM) {
     std::string DLDesc = M->getDataLayout().getStringRepresentation();
-    if (DLDesc != TDesc) {
+    if (DLDesc != TDesc.getStringRepresentation()) {
       unsigned DiagID = Diags.getCustomDiagID(
           DiagnosticsEngine::Error, "backend data layout '%0' does not match "
                                     "expected target description '%1'");
-      Diags.Report(DiagID) << DLDesc << TDesc;
+      Diags.Report(DiagID) << DLDesc << TDesc.getStringRepresentation();
+    }
+  }
+}
+
+static const char* getSectionNameForBitcode(const Triple &T) {
+  switch (T.getObjectFormat()) {
+  case Triple::MachO:
+    return "__LLVM,__bitcode";
+  case Triple::COFF:
+  case Triple::ELF:
+  case Triple::UnknownObjectFormat:
+    return ".llvmbc";
+  }
+  llvm_unreachable("Unimplemented ObjectFormatType");
+}
+
+static const char* getSectionNameForCommandline(const Triple &T) {
+  switch (T.getObjectFormat()) {
+  case Triple::MachO:
+    return "__LLVM,__cmdline";
+  case Triple::COFF:
+  case Triple::ELF:
+  case Triple::UnknownObjectFormat:
+    return ".llvmcmd";
+  }
+  llvm_unreachable("Unimplemented ObjectFormatType");
+}
+
+// With -fembed-bitcode, save a copy of the llvm IR as data in the
+// __LLVM,__bitcode section.
+void clang::EmbedBitcode(llvm::Module *M, const CodeGenOptions &CGOpts,
+                         llvm::MemoryBufferRef Buf) {
+  if (CGOpts.getEmbedBitcode() == CodeGenOptions::Embed_Off)
+    return;
+
+  // Save llvm.compiler.used and remote it.
+  SmallVector<Constant*, 2> UsedArray;
+  SmallSet<GlobalValue*, 4> UsedGlobals;
+  Type *UsedElementType = Type::getInt8Ty(M->getContext())->getPointerTo(0);
+  GlobalVariable *Used = collectUsedGlobalVariables(*M, UsedGlobals, true);
+  for (auto *GV : UsedGlobals) {
+    if (GV->getName() != "llvm.embedded.module" &&
+        GV->getName() != "llvm.cmdline")
+      UsedArray.push_back(
+          ConstantExpr::getPointerBitCastOrAddrSpaceCast(GV, UsedElementType));
+  }
+  if (Used)
+    Used->eraseFromParent();
+
+  // Embed the bitcode for the llvm module.
+  std::string Data;
+  ArrayRef<uint8_t> ModuleData;
+  Triple T(M->getTargetTriple());
+  // Create a constant that contains the bitcode.
+  // In case of embedding a marker, ignore the input Buf and use the empty
+  // ArrayRef. It is also legal to create a bitcode marker even Buf is empty.
+  if (CGOpts.getEmbedBitcode() != CodeGenOptions::Embed_Marker) {
+    if (!isBitcode((const unsigned char *)Buf.getBufferStart(),
+                   (const unsigned char *)Buf.getBufferEnd())) {
+      // If the input is LLVM Assembly, bitcode is produced by serializing
+      // the module. Use-lists order need to be perserved in this case.
+      llvm::raw_string_ostream OS(Data);
+      llvm::WriteBitcodeToFile(M, OS, /* ShouldPreserveUseListOrder */ true);
+      ModuleData =
+          ArrayRef<uint8_t>((const uint8_t *)OS.str().data(), OS.str().size());
+    } else
+      // If the input is LLVM bitcode, write the input byte stream directly.
+      ModuleData = ArrayRef<uint8_t>((const uint8_t *)Buf.getBufferStart(),
+                                     Buf.getBufferSize());
+  }
+  llvm::Constant *ModuleConstant =
+      llvm::ConstantDataArray::get(M->getContext(), ModuleData);
+  llvm::GlobalVariable *GV = new llvm::GlobalVariable(
+      *M, ModuleConstant->getType(), true, llvm::GlobalValue::PrivateLinkage,
+      ModuleConstant);
+  GV->setSection(getSectionNameForBitcode(T));
+  UsedArray.push_back(
+      ConstantExpr::getPointerBitCastOrAddrSpaceCast(GV, UsedElementType));
+  if (llvm::GlobalVariable *Old =
+          M->getGlobalVariable("llvm.embedded.module", true)) {
+    assert(Old->hasOneUse() &&
+           "llvm.embedded.module can only be used once in llvm.compiler.used");
+    GV->takeName(Old);
+    Old->eraseFromParent();
+  } else {
+    GV->setName("llvm.embedded.module");
+  }
+
+  // Skip if only bitcode needs to be embedded.
+  if (CGOpts.getEmbedBitcode() != CodeGenOptions::Embed_Bitcode) {
+    // Embed command-line options.
+    ArrayRef<uint8_t> CmdData(const_cast<uint8_t *>(CGOpts.CmdArgs.data()),
+                              CGOpts.CmdArgs.size());
+    llvm::Constant *CmdConstant =
+      llvm::ConstantDataArray::get(M->getContext(), CmdData);
+    GV = new llvm::GlobalVariable(*M, CmdConstant->getType(), true,
+                                  llvm::GlobalValue::PrivateLinkage,
+                                  CmdConstant);
+    GV->setSection(getSectionNameForCommandline(T));
+    UsedArray.push_back(
+        ConstantExpr::getPointerBitCastOrAddrSpaceCast(GV, UsedElementType));
+    if (llvm::GlobalVariable *Old =
+            M->getGlobalVariable("llvm.cmdline", true)) {
+      assert(Old->hasOneUse() &&
+             "llvm.cmdline can only be used once in llvm.compiler.used");
+      GV->takeName(Old);
+      Old->eraseFromParent();
+    } else {
+      GV->setName("llvm.cmdline");
     }
   }
+
+  if (UsedArray.empty())
+    return;
+
+  // Recreate llvm.compiler.used.
+  ArrayType *ATy = ArrayType::get(UsedElementType, UsedArray.size());
+  auto *NewUsed = new GlobalVariable(
+      *M, ATy, false, llvm::GlobalValue::AppendingLinkage,
+      llvm::ConstantArray::get(ATy, UsedArray), "llvm.compiler.used");
+  NewUsed->setSection("llvm.metadata");
 }
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGAtomic.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGAtomic.cpp
index 24de30b0b862..7b747c138303 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGAtomic.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGAtomic.cpp
@@ -79,7 +79,7 @@ namespace {
         auto Offset = OrigBFI.Offset % C.toBits(lvalue.getAlignment());
         AtomicSizeInBits = C.toBits(
             C.toCharUnitsFromBits(Offset + OrigBFI.Size + C.getCharWidth() - 1)
-                .RoundUpToAlignment(lvalue.getAlignment()));
+                .alignTo(lvalue.getAlignment()));
         auto VoidPtrAddr = CGF.EmitCastToVoidPtr(lvalue.getBitFieldPointer());
         auto OffsetInChars =
             (C.toCharUnitsFromBits(OrigBFI.Offset) / lvalue.getAlignment()) *
@@ -221,11 +221,13 @@ namespace {
     /// \param IsWeak true if atomic operation is weak, false otherwise.
     /// \returns Pair of values: previous value from storage (value type) and
     /// boolean flag (i1 type) with true if success and false otherwise.
-    std::pair<RValue, llvm::Value *> EmitAtomicCompareExchange(
-        RValue Expected, RValue Desired,
-        llvm::AtomicOrdering Success = llvm::SequentiallyConsistent,
-        llvm::AtomicOrdering Failure = llvm::SequentiallyConsistent,
-        bool IsWeak = false);
+    std::pair<RValue, llvm::Value *>
+    EmitAtomicCompareExchange(RValue Expected, RValue Desired,
+                              llvm::AtomicOrdering Success =
+                                  llvm::AtomicOrdering::SequentiallyConsistent,
+                              llvm::AtomicOrdering Failure =
+                                  llvm::AtomicOrdering::SequentiallyConsistent,
+                              bool IsWeak = false);
 
     /// \brief Emits atomic update.
     /// \param AO Atomic ordering.
@@ -241,11 +243,6 @@ namespace {
     /// Materialize an atomic r-value in atomic-layout memory.
     Address materializeRValue(RValue rvalue) const;
 
-    /// \brief Translates LLVM atomic ordering to GNU atomic ordering for
-    /// libcalls.
-    static AtomicExpr::AtomicOrderingKind
-    translateAtomicOrdering(const llvm::AtomicOrdering AO);
-
     /// \brief Creates temp alloca for intermediate operations on atomic value.
     Address CreateTempAlloca() const;
   private:
@@ -260,13 +257,17 @@ namespace {
     /// \brief Emits atomic compare-and-exchange op as a libcall.
     llvm::Value *EmitAtomicCompareExchangeLibcall(
         llvm::Value *ExpectedAddr, llvm::Value *DesiredAddr,
-        llvm::AtomicOrdering Success = llvm::SequentiallyConsistent,
-        llvm::AtomicOrdering Failure = llvm::SequentiallyConsistent);
+        llvm::AtomicOrdering Success =
+            llvm::AtomicOrdering::SequentiallyConsistent,
+        llvm::AtomicOrdering Failure =
+            llvm::AtomicOrdering::SequentiallyConsistent);
     /// \brief Emits atomic compare-and-exchange op as LLVM instruction.
     std::pair<llvm::Value *, llvm::Value *> EmitAtomicCompareExchangeOp(
         llvm::Value *ExpectedVal, llvm::Value *DesiredVal,
-        llvm::AtomicOrdering Success = llvm::SequentiallyConsistent,
-        llvm::AtomicOrdering Failure = llvm::SequentiallyConsistent,
+        llvm::AtomicOrdering Success =
+            llvm::AtomicOrdering::SequentiallyConsistent,
+        llvm::AtomicOrdering Failure =
+            llvm::AtomicOrdering::SequentiallyConsistent,
         bool IsWeak = false);
     /// \brief Emit atomic update as libcalls.
     void
@@ -286,25 +287,6 @@ namespace {
   };
 }
 
-AtomicExpr::AtomicOrderingKind
-AtomicInfo::translateAtomicOrdering(const llvm::AtomicOrdering AO) {
-  switch (AO) {
-  case llvm::Unordered:
-  case llvm::NotAtomic:
-  case llvm::Monotonic:
-    return AtomicExpr::AO_ABI_memory_order_relaxed;
-  case llvm::Acquire:
-    return AtomicExpr::AO_ABI_memory_order_acquire;
-  case llvm::Release:
-    return AtomicExpr::AO_ABI_memory_order_release;
-  case llvm::AcquireRelease:
-    return AtomicExpr::AO_ABI_memory_order_acq_rel;
-  case llvm::SequentiallyConsistent:
-    return AtomicExpr::AO_ABI_memory_order_seq_cst;
-  }
-  llvm_unreachable("Unhandled AtomicOrdering");
-}
-
 Address AtomicInfo::CreateTempAlloca() const {
   Address TempAlloca = CGF.CreateMemTemp(
       (LVal.isBitField() && ValueSizeInBits > AtomicSizeInBits) ? ValueTy
@@ -323,8 +305,7 @@ static RValue emitAtomicLibcall(CodeGenFunction &CGF,
                                 QualType resultType,
                                 CallArgList &args) {
   const CGFunctionInfo &fnInfo =
-    CGF.CGM.getTypes().arrangeFreeFunctionCall(resultType, args,
-            FunctionType::ExtInfo(), RequiredArgs::All);
+    CGF.CGM.getTypes().arrangeBuiltinFunctionCall(resultType, args);
   llvm::FunctionType *fnTy = CGF.CGM.getTypes().GetFunctionType(fnInfo);
   llvm::Constant *fn = CGF.CGM.CreateRuntimeFunction(fnTy, fnName);
   return CGF.EmitCall(fnInfo, fn, ReturnValueSlot(), args);
@@ -422,33 +403,39 @@ static void emitAtomicCmpXchg(CodeGenFunction &CGF, AtomicExpr *E, bool IsWeak,
 /// instructions to cope with the provided (but possibly only dynamically known)
 /// FailureOrder.
 static void emitAtomicCmpXchgFailureSet(CodeGenFunction &CGF, AtomicExpr *E,
-                                        bool IsWeak, Address Dest,
-                                        Address Ptr, Address Val1,
-                                        Address Val2,
+                                        bool IsWeak, Address Dest, Address Ptr,
+                                        Address Val1, Address Val2,
                                         llvm::Value *FailureOrderVal,
                                         uint64_t Size,
                                         llvm::AtomicOrdering SuccessOrder) {
   llvm::AtomicOrdering FailureOrder;
   if (llvm::ConstantInt *FO = dyn_cast<llvm::ConstantInt>(FailureOrderVal)) {
-    switch (FO->getSExtValue()) {
-    default:
-      FailureOrder = llvm::Monotonic;
-      break;
-    case AtomicExpr::AO_ABI_memory_order_consume:
-    case AtomicExpr::AO_ABI_memory_order_acquire:
-      FailureOrder = llvm::Acquire;
-      break;
-    case AtomicExpr::AO_ABI_memory_order_seq_cst:
-      FailureOrder = llvm::SequentiallyConsistent;
-      break;
-    }
-    if (FailureOrder >= SuccessOrder) {
-      // Don't assert on undefined behaviour.
+    auto FOS = FO->getSExtValue();
+    if (!llvm::isValidAtomicOrderingCABI(FOS))
+      FailureOrder = llvm::AtomicOrdering::Monotonic;
+    else
+      switch ((llvm::AtomicOrderingCABI)FOS) {
+      case llvm::AtomicOrderingCABI::relaxed:
+      case llvm::AtomicOrderingCABI::release:
+      case llvm::AtomicOrderingCABI::acq_rel:
+        FailureOrder = llvm::AtomicOrdering::Monotonic;
+        break;
+      case llvm::AtomicOrderingCABI::consume:
+      case llvm::AtomicOrderingCABI::acquire:
+        FailureOrder = llvm::AtomicOrdering::Acquire;
+        break;
+      case llvm::AtomicOrderingCABI::seq_cst:
+        FailureOrder = llvm::AtomicOrdering::SequentiallyConsistent;
+        break;
+      }
+    if (isStrongerThan(FailureOrder, SuccessOrder)) {
+      // Don't assert on undefined behavior "failure argument shall be no
+      // stronger than the success argument".
       FailureOrder =
-        llvm::AtomicCmpXchgInst::getStrongestFailureOrdering(SuccessOrder);
+          llvm::AtomicCmpXchgInst::getStrongestFailureOrdering(SuccessOrder);
     }
-    emitAtomicCmpXchg(CGF, E, IsWeak, Dest, Ptr, Val1, Val2, Size,
-                      SuccessOrder, FailureOrder);
+    emitAtomicCmpXchg(CGF, E, IsWeak, Dest, Ptr, Val1, Val2, Size, SuccessOrder,
+                      FailureOrder);
     return;
   }
 
@@ -456,9 +443,10 @@ static void emitAtomicCmpXchgFailureSet(CodeGenFunction &CGF, AtomicExpr *E,
   llvm::BasicBlock *MonotonicBB = nullptr, *AcquireBB = nullptr,
                    *SeqCstBB = nullptr;
   MonotonicBB = CGF.createBasicBlock("monotonic_fail", CGF.CurFn);
-  if (SuccessOrder != llvm::Monotonic && SuccessOrder != llvm::Release)
+  if (SuccessOrder != llvm::AtomicOrdering::Monotonic &&
+      SuccessOrder != llvm::AtomicOrdering::Release)
     AcquireBB = CGF.createBasicBlock("acquire_fail", CGF.CurFn);
-  if (SuccessOrder == llvm::SequentiallyConsistent)
+  if (SuccessOrder == llvm::AtomicOrdering::SequentiallyConsistent)
     SeqCstBB = CGF.createBasicBlock("seqcst_fail", CGF.CurFn);
 
   llvm::BasicBlock *ContBB = CGF.createBasicBlock("atomic.continue", CGF.CurFn);
@@ -472,25 +460,25 @@ static void emitAtomicCmpXchgFailureSet(CodeGenFunction &CGF, AtomicExpr *E,
   // doesn't fold to a constant for the ordering.
   CGF.Builder.SetInsertPoint(MonotonicBB);
   emitAtomicCmpXchg(CGF, E, IsWeak, Dest, Ptr, Val1, Val2,
-                    Size, SuccessOrder, llvm::Monotonic);
+                    Size, SuccessOrder, llvm::AtomicOrdering::Monotonic);
   CGF.Builder.CreateBr(ContBB);
 
   if (AcquireBB) {
     CGF.Builder.SetInsertPoint(AcquireBB);
     emitAtomicCmpXchg(CGF, E, IsWeak, Dest, Ptr, Val1, Val2,
-                      Size, SuccessOrder, llvm::Acquire);
+                      Size, SuccessOrder, llvm::AtomicOrdering::Acquire);
     CGF.Builder.CreateBr(ContBB);
-    SI->addCase(CGF.Builder.getInt32(AtomicExpr::AO_ABI_memory_order_consume),
+    SI->addCase(CGF.Builder.getInt32((int)llvm::AtomicOrderingCABI::consume),
                 AcquireBB);
-    SI->addCase(CGF.Builder.getInt32(AtomicExpr::AO_ABI_memory_order_acquire),
+    SI->addCase(CGF.Builder.getInt32((int)llvm::AtomicOrderingCABI::acquire),
                 AcquireBB);
   }
   if (SeqCstBB) {
     CGF.Builder.SetInsertPoint(SeqCstBB);
-    emitAtomicCmpXchg(CGF, E, IsWeak, Dest, Ptr, Val1, Val2,
-                      Size, SuccessOrder, llvm::SequentiallyConsistent);
+    emitAtomicCmpXchg(CGF, E, IsWeak, Dest, Ptr, Val1, Val2, Size, SuccessOrder,
+                      llvm::AtomicOrdering::SequentiallyConsistent);
     CGF.Builder.CreateBr(ContBB);
-    SI->addCase(CGF.Builder.getInt32(AtomicExpr::AO_ABI_memory_order_seq_cst),
+    SI->addCase(CGF.Builder.getInt32((int)llvm::AtomicOrderingCABI::seq_cst),
                 SeqCstBB);
   }
 
@@ -1037,40 +1025,39 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
                 E->getOp() == AtomicExpr::AO__atomic_load_n;
 
   if (isa<llvm::ConstantInt>(Order)) {
-    int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
-    switch (ord) {
-    case AtomicExpr::AO_ABI_memory_order_relaxed:
-      EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail,
-                   Size, llvm::Monotonic);
-      break;
-    case AtomicExpr::AO_ABI_memory_order_consume:
-    case AtomicExpr::AO_ABI_memory_order_acquire:
-      if (IsStore)
-        break; // Avoid crashing on code with undefined behavior
-      EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail,
-                   Size, llvm::Acquire);
-      break;
-    case AtomicExpr::AO_ABI_memory_order_release:
-      if (IsLoad)
-        break; // Avoid crashing on code with undefined behavior
-      EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail,
-                   Size, llvm::Release);
-      break;
-    case AtomicExpr::AO_ABI_memory_order_acq_rel:
-      if (IsLoad || IsStore)
-        break; // Avoid crashing on code with undefined behavior
-      EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail,
-                   Size, llvm::AcquireRelease);
-      break;
-    case AtomicExpr::AO_ABI_memory_order_seq_cst:
-      EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail,
-                   Size, llvm::SequentiallyConsistent);
-      break;
-    default: // invalid order
-      // We should not ever get here normally, but it's hard to
-      // enforce that in general.
-      break;
-    }
+    auto ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
+    // We should not ever get to a case where the ordering isn't a valid C ABI
+    // value, but it's hard to enforce that in general.
+    if (llvm::isValidAtomicOrderingCABI(ord))
+      switch ((llvm::AtomicOrderingCABI)ord) {
+      case llvm::AtomicOrderingCABI::relaxed:
+        EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size,
+                     llvm::AtomicOrdering::Monotonic);
+        break;
+      case llvm::AtomicOrderingCABI::consume:
+      case llvm::AtomicOrderingCABI::acquire:
+        if (IsStore)
+          break; // Avoid crashing on code with undefined behavior
+        EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size,
+                     llvm::AtomicOrdering::Acquire);
+        break;
+      case llvm::AtomicOrderingCABI::release:
+        if (IsLoad)
+          break; // Avoid crashing on code with undefined behavior
+        EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size,
+                     llvm::AtomicOrdering::Release);
+        break;
+      case llvm::AtomicOrderingCABI::acq_rel:
+        if (IsLoad || IsStore)
+          break; // Avoid crashing on code with undefined behavior
+        EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size,
+                     llvm::AtomicOrdering::AcquireRelease);
+        break;
+      case llvm::AtomicOrderingCABI::seq_cst:
+        EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size,
+                     llvm::AtomicOrdering::SequentiallyConsistent);
+        break;
+      }
     if (RValTy->isVoidType())
       return RValue::get(nullptr);
 
@@ -1105,39 +1092,39 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
   // Emit all the different atomics
   Builder.SetInsertPoint(MonotonicBB);
   EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail,
-               Size, llvm::Monotonic);
+               Size, llvm::AtomicOrdering::Monotonic);
   Builder.CreateBr(ContBB);
   if (!IsStore) {
     Builder.SetInsertPoint(AcquireBB);
     EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail,
-                 Size, llvm::Acquire);
+                 Size, llvm::AtomicOrdering::Acquire);
     Builder.CreateBr(ContBB);
-    SI->addCase(Builder.getInt32(AtomicExpr::AO_ABI_memory_order_consume),
+    SI->addCase(Builder.getInt32((int)llvm::AtomicOrderingCABI::consume),
                 AcquireBB);
-    SI->addCase(Builder.getInt32(AtomicExpr::AO_ABI_memory_order_acquire),
+    SI->addCase(Builder.getInt32((int)llvm::AtomicOrderingCABI::acquire),
                 AcquireBB);
   }
   if (!IsLoad) {
     Builder.SetInsertPoint(ReleaseBB);
     EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail,
-                 Size, llvm::Release);
+                 Size, llvm::AtomicOrdering::Release);
     Builder.CreateBr(ContBB);
-    SI->addCase(Builder.getInt32(AtomicExpr::AO_ABI_memory_order_release),
+    SI->addCase(Builder.getInt32((int)llvm::AtomicOrderingCABI::release),
                 ReleaseBB);
   }
   if (!IsLoad && !IsStore) {
     Builder.SetInsertPoint(AcqRelBB);
     EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail,
-                 Size, llvm::AcquireRelease);
+                 Size, llvm::AtomicOrdering::AcquireRelease);
     Builder.CreateBr(ContBB);
-    SI->addCase(Builder.getInt32(AtomicExpr::AO_ABI_memory_order_acq_rel),
+    SI->addCase(Builder.getInt32((int)llvm::AtomicOrderingCABI::acq_rel),
                 AcqRelBB);
   }
   Builder.SetInsertPoint(SeqCstBB);
   EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail,
-               Size, llvm::SequentiallyConsistent);
+               Size, llvm::AtomicOrdering::SequentiallyConsistent);
   Builder.CreateBr(ContBB);
-  SI->addCase(Builder.getInt32(AtomicExpr::AO_ABI_memory_order_seq_cst),
+  SI->addCase(Builder.getInt32((int)llvm::AtomicOrderingCABI::seq_cst),
               SeqCstBB);
 
   // Cleanup and return
@@ -1257,9 +1244,9 @@ void AtomicInfo::EmitAtomicLoadLibcall(llvm::Value *AddForLoaded,
            CGF.getContext().VoidPtrTy);
   Args.add(RValue::get(CGF.EmitCastToVoidPtr(AddForLoaded)),
            CGF.getContext().VoidPtrTy);
-  Args.add(RValue::get(
-               llvm::ConstantInt::get(CGF.IntTy, translateAtomicOrdering(AO))),
-           CGF.getContext().IntTy);
+  Args.add(
+      RValue::get(llvm::ConstantInt::get(CGF.IntTy, (int)llvm::toCABI(AO))),
+      CGF.getContext().IntTy);
   emitAtomicLibcall(CGF, "__atomic_load", CGF.getContext().VoidTy, Args);
 }
 
@@ -1287,28 +1274,21 @@ bool CodeGenFunction::LValueIsSuitableForInlineAtomic(LValue LV) {
   bool IsVolatile = LV.isVolatile() || hasVolatileMember(LV.getType());
   // An atomic is inline if we don't need to use a libcall.
   bool AtomicIsInline = !AI.shouldUseLibcall();
+  // MSVC doesn't seem to do this for types wider than a pointer.
+  if (getContext().getTypeSize(LV.getType()) >
+      getContext().getTypeSize(getContext().getIntPtrType()))
+    return false;
   return IsVolatile && AtomicIsInline;
 }
 
-/// An type is a candidate for having its loads and stores be made atomic if
-/// we are operating under /volatile:ms *and* we know the access is volatile and
-/// performing such an operation can be performed without a libcall.
-bool CodeGenFunction::typeIsSuitableForInlineAtomic(QualType Ty,
-                                                    bool IsVolatile) const {
-  // An atomic is inline if we don't need to use a libcall (e.g. it is builtin).
-  bool AtomicIsInline = getContext().getTargetInfo().hasBuiltinAtomic(
-      getContext().getTypeSize(Ty), getContext().getTypeAlign(Ty));
-  return CGM.getCodeGenOpts().MSVolatile && IsVolatile && AtomicIsInline;
-}
-
 RValue CodeGenFunction::EmitAtomicLoad(LValue LV, SourceLocation SL,
                                        AggValueSlot Slot) {
   llvm::AtomicOrdering AO;
   bool IsVolatile = LV.isVolatileQualified();
   if (LV.getType()->isAtomicType()) {
-    AO = llvm::SequentiallyConsistent;
+    AO = llvm::AtomicOrdering::SequentiallyConsistent;
   } else {
-    AO = llvm::Acquire;
+    AO = llvm::AtomicOrdering::Acquire;
     IsVolatile = true;
   }
   return EmitAtomicLoad(LV, SL, AO, IsVolatile, Slot);
@@ -1462,11 +1442,11 @@ AtomicInfo::EmitAtomicCompareExchangeLibcall(llvm::Value *ExpectedAddr,
            CGF.getContext().VoidPtrTy);
   Args.add(RValue::get(CGF.EmitCastToVoidPtr(DesiredAddr)),
            CGF.getContext().VoidPtrTy);
-  Args.add(RValue::get(llvm::ConstantInt::get(
-               CGF.IntTy, translateAtomicOrdering(Success))),
+  Args.add(RValue::get(
+               llvm::ConstantInt::get(CGF.IntTy, (int)llvm::toCABI(Success))),
            CGF.getContext().IntTy);
-  Args.add(RValue::get(llvm::ConstantInt::get(
-               CGF.IntTy, translateAtomicOrdering(Failure))),
+  Args.add(RValue::get(
+               llvm::ConstantInt::get(CGF.IntTy, (int)llvm::toCABI(Failure))),
            CGF.getContext().IntTy);
   auto SuccessFailureRVal = emitAtomicLibcall(CGF, "__atomic_compare_exchange",
                                               CGF.getContext().BoolTy, Args);
@@ -1477,8 +1457,9 @@ AtomicInfo::EmitAtomicCompareExchangeLibcall(llvm::Value *ExpectedAddr,
 std::pair<RValue, llvm::Value *> AtomicInfo::EmitAtomicCompareExchange(
     RValue Expected, RValue Desired, llvm::AtomicOrdering Success,
     llvm::AtomicOrdering Failure, bool IsWeak) {
-  if (Failure >= Success)
-    // Don't assert on undefined behavior.
+  if (isStrongerThan(Failure, Success))
+    // Don't assert on undefined behavior "failure argument shall be no stronger
+    // than the success argument".
     Failure = llvm::AtomicCmpXchgInst::getStrongestFailureOrdering(Success);
 
   // Check whether we should use a library call.
@@ -1727,9 +1708,9 @@ void CodeGenFunction::EmitAtomicStore(RValue rvalue, LValue lvalue,
   bool IsVolatile = lvalue.isVolatileQualified();
   llvm::AtomicOrdering AO;
   if (lvalue.getType()->isAtomicType()) {
-    AO = llvm::SequentiallyConsistent;
+    AO = llvm::AtomicOrdering::SequentiallyConsistent;
   } else {
-    AO = llvm::Release;
+    AO = llvm::AtomicOrdering::Release;
     IsVolatile = true;
   }
   return EmitAtomicStore(rvalue, lvalue, AO, IsVolatile, isInit);
@@ -1772,9 +1753,9 @@ void CodeGenFunction::EmitAtomicStore(RValue rvalue, LValue dest,
                getContext().VoidPtrTy);
       args.add(RValue::get(EmitCastToVoidPtr(srcAddr.getPointer())),
                getContext().VoidPtrTy);
-      args.add(RValue::get(llvm::ConstantInt::get(
-                   IntTy, AtomicInfo::translateAtomicOrdering(AO))),
-               getContext().IntTy);
+      args.add(
+          RValue::get(llvm::ConstantInt::get(IntTy, (int)llvm::toCABI(AO))),
+          getContext().IntTy);
       emitAtomicLibcall(*this, "__atomic_store", getContext().VoidTy, args);
       return;
     }
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGBlocks.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGBlocks.cpp
index ba2941e9df4a..e3658ab9b762 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGBlocks.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGBlocks.cpp
@@ -125,10 +125,15 @@ static llvm::Constant *buildBlockDescriptor(CodeGenModule &CGM,
 
   llvm::Constant *init = llvm::ConstantStruct::getAnon(elements);
 
+  unsigned AddrSpace = 0;
+  if (C.getLangOpts().OpenCL)
+    AddrSpace = C.getTargetAddressSpace(LangAS::opencl_constant);
   llvm::GlobalVariable *global =
     new llvm::GlobalVariable(CGM.getModule(), init->getType(), true,
                              llvm::GlobalValue::InternalLinkage,
-                             init, "__block_descriptor_tmp");
+                             init, "__block_descriptor_tmp", nullptr,
+                             llvm::GlobalValue::NotThreadLocal,
+                             AddrSpace);
 
   return llvm::ConstantExpr::getBitCast(global, CGM.getBlockDescriptorType());
 }
@@ -262,6 +267,11 @@ static bool isSafeForCXXConstantCapture(QualType type) {
 static llvm::Constant *tryCaptureAsConstant(CodeGenModule &CGM,
                                             CodeGenFunction *CGF,
                                             const VarDecl *var) {
+  // Return if this is a function paramter. We shouldn't try to
+  // rematerialize default arguments of function parameters.
+  if (isa<ParmVarDecl>(var))
+    return nullptr;
+
   QualType type = var->getType();
 
   // We can only do this if the variable is const.
@@ -508,7 +518,7 @@ static void computeBlockInfo(CodeGenModule &CGM, CodeGenFunction *CGF,
   // At this point, we just have to add padding if the end align still
   // isn't aligned right.
   if (endAlign < maxFieldAlign) {
-    CharUnits newBlockSize = blockSize.RoundUpToAlignment(maxFieldAlign);
+    CharUnits newBlockSize = blockSize.alignTo(maxFieldAlign);
     CharUnits padding = newBlockSize - blockSize;
 
     // If we haven't yet added any fields, remember that there was an
@@ -775,35 +785,34 @@ llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo) {
     // Compute the address of the thing we're going to move into the
     // block literal.
     Address src = Address::invalid();
-    if (BlockInfo && CI.isNested()) {
-      // We need to use the capture from the enclosing block.
-      const CGBlockInfo::Capture &enclosingCapture =
-        BlockInfo->getCapture(variable);
-
-      // This is a [[type]]*, except that a byref entry wil just be an i8**.
-      src = Builder.CreateStructGEP(LoadBlockStruct(),
-                                    enclosingCapture.getIndex(),
-                                    enclosingCapture.getOffset(),
-                                    "block.capture.addr");
-    } else if (blockDecl->isConversionFromLambda()) {
+
+    if (blockDecl->isConversionFromLambda()) {
       // The lambda capture in a lambda's conversion-to-block-pointer is
       // special; we'll simply emit it directly.
       src = Address::invalid();
-    } else {
-      // Just look it up in the locals map, which will give us back a
-      // [[type]]*.  If that doesn't work, do the more elaborate DRE
-      // emission.
-      auto it = LocalDeclMap.find(variable);
-      if (it != LocalDeclMap.end()) {
-        src = it->second;
+    } else if (CI.isByRef()) {
+      if (BlockInfo && CI.isNested()) {
+        // We need to use the capture from the enclosing block.
+        const CGBlockInfo::Capture &enclosingCapture =
+            BlockInfo->getCapture(variable);
+
+        // This is a [[type]]*, except that a byref entry wil just be an i8**.
+        src = Builder.CreateStructGEP(LoadBlockStruct(),
+                                      enclosingCapture.getIndex(),
+                                      enclosingCapture.getOffset(),
+                                      "block.capture.addr");
       } else {
-        DeclRefExpr declRef(
-            const_cast<VarDecl *>(variable),
-            /*RefersToEnclosingVariableOrCapture*/ CI.isNested(), type,
-            VK_LValue, SourceLocation());
-        src = EmitDeclRefLValue(&declRef).getAddress();
+        auto I = LocalDeclMap.find(variable);
+        assert(I != LocalDeclMap.end());
+        src = I->second;
       }
-    }
+    } else {
+      DeclRefExpr declRef(const_cast<VarDecl *>(variable),
+                          /*RefersToEnclosingVariableOrCapture*/ CI.isNested(),
+                          type.getNonReferenceType(), VK_LValue,
+                          SourceLocation());
+      src = EmitDeclRefLValue(&declRef).getAddress();
+    };
 
     // For byrefs, we just write the pointer to the byref struct into
     // the block field.  There's no need to chase the forwarding
@@ -837,8 +846,7 @@ llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo) {
 
     // If it's a reference variable, copy the reference into the block field.
     } else if (type->isReferenceType()) {
-      llvm::Value *ref = Builder.CreateLoad(src, "ref.val");
-      Builder.CreateStore(ref, blockField);
+      Builder.CreateStore(src.getPointer(), blockField);
 
     // If this is an ARC __strong block-pointer variable, don't do a
     // block copy.
@@ -924,7 +932,10 @@ llvm::Type *CodeGenModule::getBlockDescriptorType() {
                              UnsignedLongTy, UnsignedLongTy, nullptr);
 
   // Now form a pointer to that.
-  BlockDescriptorType = llvm::PointerType::getUnqual(BlockDescriptorType);
+  unsigned AddrSpace = 0;
+  if (getLangOpts().OpenCL)
+    AddrSpace = getContext().getTargetAddressSpace(LangAS::opencl_constant);
+  BlockDescriptorType = llvm::PointerType::get(BlockDescriptorType, AddrSpace);
   return BlockDescriptorType;
 }
 
@@ -1109,8 +1120,8 @@ void CodeGenFunction::setBlockContextParameter(const ImplicitParamDecl *D,
   }
 
   if (CGDebugInfo *DI = getDebugInfo()) {
-    if (CGM.getCodeGenOpts().getDebugInfo()
-          >= CodeGenOptions::LimitedDebugInfo) {
+    if (CGM.getCodeGenOpts().getDebugInfo() >=
+        codegenoptions::LimitedDebugInfo) {
       DI->setLocation(D->getLocation());
       DI->EmitDeclareOfBlockLiteralArgVariable(*BlockInfo, arg, argNum,
                                                localAddr, Builder);
@@ -1174,9 +1185,8 @@ CodeGenFunction::GenerateBlockFunction(GlobalDecl GD,
 
   // Create the function declaration.
   const FunctionProtoType *fnType = blockInfo.getBlockExpr()->getFunctionType();
-  const CGFunctionInfo &fnInfo = CGM.getTypes().arrangeFreeFunctionDeclaration(
-      fnType->getReturnType(), args, fnType->getExtInfo(),
-      fnType->isVariadic());
+  const CGFunctionInfo &fnInfo =
+    CGM.getTypes().arrangeBlockFunctionDeclaration(fnType, args);
   if (CGM.ReturnSlotInterferesWithArgs(fnInfo))
     blockInfo.UsesStret = true;
 
@@ -1260,8 +1270,8 @@ CodeGenFunction::GenerateBlockFunction(GlobalDecl GD,
       const VarDecl *variable = CI.getVariable();
       DI->EmitLocation(Builder, variable->getLocation());
 
-      if (CGM.getCodeGenOpts().getDebugInfo()
-            >= CodeGenOptions::LimitedDebugInfo) {
+      if (CGM.getCodeGenOpts().getDebugInfo() >=
+          codegenoptions::LimitedDebugInfo) {
         const CGBlockInfo::Capture &capture = blockInfo.getCapture(variable);
         if (capture.isConstant()) {
           auto addr = LocalDeclMap.find(variable)->second;
@@ -1329,8 +1339,8 @@ CodeGenFunction::GenerateCopyHelperFunction(const CGBlockInfo &blockInfo) {
                             C.VoidPtrTy);
   args.push_back(&srcDecl);
 
-  const CGFunctionInfo &FI = CGM.getTypes().arrangeFreeFunctionDeclaration(
-      C.VoidTy, args, FunctionType::ExtInfo(), /*variadic=*/false);
+  const CGFunctionInfo &FI =
+    CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, args);
 
   // FIXME: it would be nice if these were mergeable with things with
   // identical semantics.
@@ -1505,8 +1515,8 @@ CodeGenFunction::GenerateDestroyHelperFunction(const CGBlockInfo &blockInfo) {
                             C.VoidPtrTy);
   args.push_back(&srcDecl);
 
-  const CGFunctionInfo &FI = CGM.getTypes().arrangeFreeFunctionDeclaration(
-      C.VoidTy, args, FunctionType::ExtInfo(), /*variadic=*/false);
+  const CGFunctionInfo &FI =
+    CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, args);
 
   // FIXME: We'd like to put these into a mergable by content, with
   // internal linkage.
@@ -1791,8 +1801,8 @@ generateByrefCopyHelper(CodeGenFunction &CGF, const BlockByrefInfo &byrefInfo,
                         Context.VoidPtrTy);
   args.push_back(&src);
 
-  const CGFunctionInfo &FI = CGF.CGM.getTypes().arrangeFreeFunctionDeclaration(
-      R, args, FunctionType::ExtInfo(), /*variadic=*/false);
+  const CGFunctionInfo &FI =
+    CGF.CGM.getTypes().arrangeBuiltinFunctionDeclaration(R, args);
 
   llvm::FunctionType *LTy = CGF.CGM.getTypes().GetFunctionType(FI);
 
@@ -1864,8 +1874,8 @@ generateByrefDisposeHelper(CodeGenFunction &CGF,
                         Context.VoidPtrTy);
   args.push_back(&src);
 
-  const CGFunctionInfo &FI = CGF.CGM.getTypes().arrangeFreeFunctionDeclaration(
-      R, args, FunctionType::ExtInfo(), /*variadic=*/false);
+  const CGFunctionInfo &FI =
+    CGF.CGM.getTypes().arrangeBuiltinFunctionDeclaration(R, args);
 
   llvm::FunctionType *LTy = CGF.CGM.getTypes().GetFunctionType(FI);
 
@@ -2108,7 +2118,7 @@ const BlockByrefInfo &CodeGenFunction::getBlockByrefInfo(const VarDecl *D) {
 
   bool packed = false;
   CharUnits varAlign = getContext().getDeclAlign(D);
-  CharUnits varOffset = size.RoundUpToAlignment(varAlign);
+  CharUnits varOffset = size.alignTo(varAlign);
 
   // We may have to insert padding.
   if (varOffset != size) {
@@ -2285,9 +2295,36 @@ void CodeGenFunction::enterByrefCleanup(const AutoVarEmission &emission) {
 /// Adjust the declaration of something from the blocks API.
 static void configureBlocksRuntimeObject(CodeGenModule &CGM,
                                          llvm::Constant *C) {
-  if (!CGM.getLangOpts().BlocksRuntimeOptional) return;
-
   auto *GV = cast<llvm::GlobalValue>(C->stripPointerCasts());
+
+  if (CGM.getTarget().getTriple().isOSBinFormatCOFF()) {
+    IdentifierInfo &II = CGM.getContext().Idents.get(C->getName());
+    TranslationUnitDecl *TUDecl = CGM.getContext().getTranslationUnitDecl();
+    DeclContext *DC = TranslationUnitDecl::castToDeclContext(TUDecl);
+
+    assert((isa<llvm::Function>(C->stripPointerCasts()) ||
+            isa<llvm::GlobalVariable>(C->stripPointerCasts())) &&
+           "expected Function or GlobalVariable");
+
+    const NamedDecl *ND = nullptr;
+    for (const auto &Result : DC->lookup(&II))
+      if ((ND = dyn_cast<FunctionDecl>(Result)) ||
+          (ND = dyn_cast<VarDecl>(Result)))
+        break;
+
+    // TODO: support static blocks runtime
+    if (GV->isDeclaration() && (!ND || !ND->hasAttr<DLLExportAttr>())) {
+      GV->setDLLStorageClass(llvm::GlobalValue::DLLImportStorageClass);
+      GV->setLinkage(llvm::GlobalValue::ExternalLinkage);
+    } else {
+      GV->setDLLStorageClass(llvm::GlobalValue::DLLExportStorageClass);
+      GV->setLinkage(llvm::GlobalValue::ExternalLinkage);
+    }
+  }
+
+  if (!CGM.getLangOpts().BlocksRuntimeOptional)
+    return;
+
   if (GV->isDeclaration() && GV->hasExternalLinkage())
     GV->setLinkage(llvm::GlobalValue::ExternalWeakLinkage);
 }
@@ -2335,5 +2372,5 @@ llvm::Constant *CodeGenModule::getNSConcreteStackBlock() {
                                                Int8PtrTy->getPointerTo(),
                                                nullptr);
   configureBlocksRuntimeObject(*this, NSConcreteStackBlock);
-  return NSConcreteStackBlock;  
+  return NSConcreteStackBlock;
 }
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGBuilder.h b/contrib/llvm/tools/clang/lib/CodeGen/CGBuilder.h
index 489f3413d4b8..027435d7c599 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGBuilder.h
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGBuilder.h
@@ -10,6 +10,7 @@
 #ifndef LLVM_CLANG_LIB_CODEGEN_CGBUILDER_H
 #define LLVM_CLANG_LIB_CODEGEN_CGBUILDER_H
 
+#include "llvm/IR/DataLayout.h"
 #include "llvm/IR/IRBuilder.h"
 #include "Address.h"
 #include "CodeGenTypeCache.h"
@@ -22,9 +23,7 @@ class CodeGenFunction;
 /// \brief This is an IRBuilder insertion helper that forwards to
 /// CodeGenFunction::InsertHelper, which adds necessary metadata to
 /// instructions.
-template <bool PreserveNames>
-class CGBuilderInserter
-    : protected llvm::IRBuilderDefaultInserter<PreserveNames> {
+class CGBuilderInserter : protected llvm::IRBuilderDefaultInserter {
 public:
   CGBuilderInserter() = default;
   explicit CGBuilderInserter(CodeGenFunction *CGF) : CGF(CGF) {}
@@ -38,17 +37,10 @@ private:
   CodeGenFunction *CGF = nullptr;
 };
 
-// Don't preserve names on values in an optimized build.
-#ifdef NDEBUG
-#define PreserveNames false
-#else
-#define PreserveNames true
-#endif
-
-typedef CGBuilderInserter<PreserveNames> CGBuilderInserterTy;
+typedef CGBuilderInserter CGBuilderInserterTy;
 
-typedef llvm::IRBuilder<PreserveNames, llvm::ConstantFolder,
-                        CGBuilderInserterTy> CGBuilderBaseTy;
+typedef llvm::IRBuilder<llvm::ConstantFolder, CGBuilderInserterTy>
+    CGBuilderBaseTy;
 
 class CGBuilderTy : public CGBuilderBaseTy {
   /// Storing a reference to the type cache here makes it a lot easier
@@ -194,6 +186,12 @@ public:
                                    Addr.getPointer(), Index, Name),
                    Addr.getAlignment().alignmentAtOffset(Offset));
   }
+  Address CreateStructGEP(Address Addr, unsigned Index,
+                          const llvm::StructLayout *Layout,
+                          const llvm::Twine &Name = "") {
+    auto Offset = CharUnits::fromQuantity(Layout->getElementOffset(Index));
+    return CreateStructGEP(Addr, Index, Offset, Name);
+  }
 
   /// Given
   ///   %addr = [n x T]* ...
@@ -298,8 +296,6 @@ public:
   }
 };
 
-#undef PreserveNames
-
 }  // end namespace CodeGen
 }  // end namespace clang
 
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGBuiltin.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGBuiltin.cpp
index 787ac5361bbb..a5fc53113bdc 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGBuiltin.cpp
@@ -26,6 +26,7 @@
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/InlineAsm.h"
 #include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/MDBuilder.h"
 #include <sstream>
 
 using namespace clang;
@@ -105,9 +106,8 @@ static Value *MakeBinaryAtomicValue(CodeGenFunction &CGF,
   llvm::Type *ValueType = Args[1]->getType();
   Args[1] = EmitToInt(CGF, Args[1], T, IntType);
 
-  llvm::Value *Result =
-      CGF.Builder.CreateAtomicRMW(Kind, Args[0], Args[1],
-                                  llvm::SequentiallyConsistent);
+  llvm::Value *Result = CGF.Builder.CreateAtomicRMW(
+      Kind, Args[0], Args[1], llvm::AtomicOrdering::SequentiallyConsistent);
   return EmitFromInt(CGF, Result, T, ValueType);
 }
 
@@ -167,9 +167,8 @@ static RValue EmitBinaryAtomicPost(CodeGenFunction &CGF,
   Args[1] = EmitToInt(CGF, Args[1], T, IntType);
   Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType);
 
-  llvm::Value *Result =
-      CGF.Builder.CreateAtomicRMW(Kind, Args[0], Args[1],
-                                  llvm::SequentiallyConsistent);
+  llvm::Value *Result = CGF.Builder.CreateAtomicRMW(
+      Kind, Args[0], Args[1], llvm::AtomicOrdering::SequentiallyConsistent);
   Result = CGF.Builder.CreateBinOp(Op, Result, Args[1]);
   if (Invert)
     Result = CGF.Builder.CreateBinOp(llvm::Instruction::Xor, Result,
@@ -206,9 +205,9 @@ static Value *MakeAtomicCmpXchgValue(CodeGenFunction &CGF, const CallExpr *E,
   Args[1] = EmitToInt(CGF, Args[1], T, IntType);
   Args[2] = EmitToInt(CGF, CGF.EmitScalarExpr(E->getArg(2)), T, IntType);
 
-  Value *Pair = CGF.Builder.CreateAtomicCmpXchg(Args[0], Args[1], Args[2],
-                                                llvm::SequentiallyConsistent,
-                                                llvm::SequentiallyConsistent);
+  Value *Pair = CGF.Builder.CreateAtomicCmpXchg(
+      Args[0], Args[1], Args[2], llvm::AtomicOrdering::SequentiallyConsistent,
+      llvm::AtomicOrdering::SequentiallyConsistent);
   if (ReturnBool)
     // Extract boolean success flag and zext it to int.
     return CGF.Builder.CreateZExt(CGF.Builder.CreateExtractValue(Pair, 1),
@@ -219,6 +218,51 @@ static Value *MakeAtomicCmpXchgValue(CodeGenFunction &CGF, const CallExpr *E,
                        ValueType);
 }
 
+// Emit a simple mangled intrinsic that has 1 argument and a return type
+// matching the argument type.
+static Value *emitUnaryBuiltin(CodeGenFunction &CGF,
+                               const CallExpr *E,
+                               unsigned IntrinsicID) {
+  llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
+
+  Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
+  return CGF.Builder.CreateCall(F, Src0);
+}
+
+// Emit an intrinsic that has 2 operands of the same type as its result.
+static Value *emitBinaryBuiltin(CodeGenFunction &CGF,
+                                const CallExpr *E,
+                                unsigned IntrinsicID) {
+  llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
+  llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
+
+  Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
+  return CGF.Builder.CreateCall(F, { Src0, Src1 });
+}
+
+// Emit an intrinsic that has 3 operands of the same type as its result.
+static Value *emitTernaryBuiltin(CodeGenFunction &CGF,
+                                 const CallExpr *E,
+                                 unsigned IntrinsicID) {
+  llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
+  llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
+  llvm::Value *Src2 = CGF.EmitScalarExpr(E->getArg(2));
+
+  Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
+  return CGF.Builder.CreateCall(F, { Src0, Src1, Src2 });
+}
+
+// Emit an intrinsic that has 1 float or double operand, and 1 integer.
+static Value *emitFPIntBuiltin(CodeGenFunction &CGF,
+                               const CallExpr *E,
+                               unsigned IntrinsicID) {
+  llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
+  llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
+
+  Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
+  return CGF.Builder.CreateCall(F, {Src0, Src1});
+}
+
 /// EmitFAbs - Emit a call to @llvm.fabs().
 static Value *EmitFAbs(CodeGenFunction &CGF, Value *V) {
   Value *F = CGF.CGM.getIntrinsic(Intrinsic::fabs, V->getType());
@@ -248,8 +292,8 @@ static Value *EmitSignBit(CodeGenFunction &CGF, Value *V) {
     if (CGF.getTarget().isBigEndian()) {
       Value *ShiftCst = llvm::ConstantInt::get(IntTy, Width);
       V = CGF.Builder.CreateLShr(V, ShiftCst);
-    } 
-    // We are truncating value in order to extract the higher-order 
+    }
+    // We are truncating value in order to extract the higher-order
     // double, which we will be using to extract the sign from.
     IntTy = llvm::IntegerType::get(C, Width);
     V = CGF.Builder.CreateTrunc(V, IntTy);
@@ -288,6 +332,17 @@ static llvm::Value *EmitOverflowIntrinsic(CodeGenFunction &CGF,
   return CGF.Builder.CreateExtractValue(Tmp, 0);
 }
 
+static Value *emitRangedBuiltin(CodeGenFunction &CGF,
+                                unsigned IntrinsicID,
+                                int low, int high) {
+    llvm::MDBuilder MDHelper(CGF.getLLVMContext());
+    llvm::MDNode *RNode = MDHelper.createRange(APInt(32, low), APInt(32, high));
+    Value *F = CGF.CGM.getIntrinsic(IntrinsicID, {});
+    llvm::Instruction *Call = CGF.Builder.CreateCall(F);
+    Call->setMetadata(llvm::LLVMContext::MD_range, RNode);
+    return Call;
+}
+
 namespace {
   struct WidthAndSignedness {
     unsigned Width;
@@ -465,9 +520,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
   case Builtin::BI__builtin_fabs:
   case Builtin::BI__builtin_fabsf:
   case Builtin::BI__builtin_fabsl: {
-    Value *Arg1 = EmitScalarExpr(E->getArg(0));
-    Value *Result = EmitFAbs(*this, Arg1);
-    return RValue::get(Result);
+    return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::fabs));
   }
   case Builtin::BI__builtin_fmod:
   case Builtin::BI__builtin_fmodf:
@@ -477,7 +530,51 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
     Value *Result = Builder.CreateFRem(Arg1, Arg2, "fmod");
     return RValue::get(Result);
   }
-
+  case Builtin::BI__builtin_copysign:
+  case Builtin::BI__builtin_copysignf:
+  case Builtin::BI__builtin_copysignl: {
+    return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::copysign));
+  }
+  case Builtin::BI__builtin_ceil:
+  case Builtin::BI__builtin_ceilf:
+  case Builtin::BI__builtin_ceill: {
+    return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::ceil));
+  }
+  case Builtin::BI__builtin_floor:
+  case Builtin::BI__builtin_floorf:
+  case Builtin::BI__builtin_floorl: {
+    return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::floor));
+  }
+  case Builtin::BI__builtin_trunc:
+  case Builtin::BI__builtin_truncf:
+  case Builtin::BI__builtin_truncl: {
+    return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::trunc));
+  }
+  case Builtin::BI__builtin_rint:
+  case Builtin::BI__builtin_rintf:
+  case Builtin::BI__builtin_rintl: {
+    return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::rint));
+  }
+  case Builtin::BI__builtin_nearbyint:
+  case Builtin::BI__builtin_nearbyintf:
+  case Builtin::BI__builtin_nearbyintl: {
+    return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::nearbyint));
+  }
+  case Builtin::BI__builtin_round:
+  case Builtin::BI__builtin_roundf:
+  case Builtin::BI__builtin_roundl: {
+    return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::round));
+  }
+  case Builtin::BI__builtin_fmin:
+  case Builtin::BI__builtin_fminf:
+  case Builtin::BI__builtin_fminl: {
+    return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::minnum));
+  }
+  case Builtin::BI__builtin_fmax:
+  case Builtin::BI__builtin_fmaxf:
+  case Builtin::BI__builtin_fmaxl: {
+    return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::maxnum));
+  }
   case Builtin::BI__builtin_conj:
   case Builtin::BI__builtin_conjf:
   case Builtin::BI__builtin_conjl: {
@@ -645,10 +742,13 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
   case Builtin::BI__builtin_bswap16:
   case Builtin::BI__builtin_bswap32:
   case Builtin::BI__builtin_bswap64: {
-    Value *ArgValue = EmitScalarExpr(E->getArg(0));
-    llvm::Type *ArgType = ArgValue->getType();
-    Value *F = CGM.getIntrinsic(Intrinsic::bswap, ArgType);
-    return RValue::get(Builder.CreateCall(F, ArgValue));
+    return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::bswap));
+  }
+  case Builtin::BI__builtin_bitreverse8:
+  case Builtin::BI__builtin_bitreverse16:
+  case Builtin::BI__builtin_bitreverse32:
+  case Builtin::BI__builtin_bitreverse64: {
+    return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::bitreverse));
   }
   case Builtin::BI__builtin_object_size: {
     unsigned Type =
@@ -751,13 +851,19 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
     return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
   }
 
-  case Builtin::BI__builtin_isinf: {
-    // isinf(x) --> fabs(x) == infinity
+  case Builtin::BI__builtin_isinf:
+  case Builtin::BI__builtin_isfinite: {
+    // isinf(x)    --> fabs(x) == infinity
+    // isfinite(x) --> fabs(x) != infinity
+    // x != NaN via the ordered compare in either case.
     Value *V = EmitScalarExpr(E->getArg(0));
-    V = EmitFAbs(*this, V);
-
-    V = Builder.CreateFCmpOEQ(V, ConstantFP::getInfinity(V->getType()),"isinf");
-    return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
+    Value *Fabs = EmitFAbs(*this, V);
+    Constant *Infinity = ConstantFP::getInfinity(V->getType());
+    CmpInst::Predicate Pred = (BuiltinID == Builtin::BI__builtin_isinf)
+                                  ? CmpInst::FCMP_OEQ
+                                  : CmpInst::FCMP_ONE;
+    Value *FCmp = Builder.CreateFCmp(Pred, Fabs, Infinity, "cmpinf");
+    return RValue::get(Builder.CreateZExt(FCmp, ConvertType(E->getType())));
   }
 
   case Builtin::BI__builtin_isinf_sign: {
@@ -795,19 +901,6 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
     return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
   }
 
-  case Builtin::BI__builtin_isfinite: {
-    // isfinite(x) --> x == x && fabs(x) != infinity;
-    Value *V = EmitScalarExpr(E->getArg(0));
-    Value *Eq = Builder.CreateFCmpOEQ(V, V, "iseq");
-
-    Value *Abs = EmitFAbs(*this, V);
-    Value *IsNotInf =
-      Builder.CreateFCmpUNE(Abs, ConstantFP::getInfinity(V->getType()),"isinf");
-
-    V = Builder.CreateAnd(Eq, IsNotInf, "and");
-    return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
-  }
-
   case Builtin::BI__builtin_fpclassify: {
     Value *V = EmitScalarExpr(E->getArg(5));
     llvm::Type *Ty = ConvertType(E->getArg(5)->getType());
@@ -1258,7 +1351,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
     llvm::StoreInst *Store =
       Builder.CreateAlignedStore(llvm::Constant::getNullValue(ITy), Ptr,
                                  StoreSize);
-    Store->setAtomic(llvm::Release);
+    Store->setAtomic(llvm::AtomicOrdering::Release);
     return RValue::get(nullptr);
   }
 
@@ -1270,7 +1363,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
     // any way to safely use it... but in practice, it mostly works
     // to use it with non-atomic loads and stores to get acquire/release
     // semantics.
-    Builder.CreateFence(llvm::SequentiallyConsistent);
+    Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent);
     return RValue::get(nullptr);
   }
 
@@ -1294,9 +1387,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
       Args.add(RValue::get(llvm::Constant::getNullValue(VoidPtrTy)),
                getContext().VoidPtrTy);
     const CGFunctionInfo &FuncInfo =
-        CGM.getTypes().arrangeFreeFunctionCall(E->getType(), Args,
-                                               FunctionType::ExtInfo(),
-                                               RequiredArgs::All);
+        CGM.getTypes().arrangeBuiltinFunctionCall(E->getType(), Args);
     llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FuncInfo);
     llvm::Constant *Func = CGM.CreateRuntimeFunction(FTy, LibCallName);
     return EmitCall(FuncInfo, Func, ReturnValueSlot(), Args);
@@ -1320,30 +1411,27 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
       switch (ord) {
       case 0:  // memory_order_relaxed
       default: // invalid order
-        Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg,
-                                         Ptr, NewVal,
-                                         llvm::Monotonic);
+        Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
+                                         llvm::AtomicOrdering::Monotonic);
         break;
-      case 1:  // memory_order_consume
-      case 2:  // memory_order_acquire
-        Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg,
-                                         Ptr, NewVal,
-                                         llvm::Acquire);
+      case 1: // memory_order_consume
+      case 2: // memory_order_acquire
+        Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
+                                         llvm::AtomicOrdering::Acquire);
         break;
-      case 3:  // memory_order_release
-        Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg,
-                                         Ptr, NewVal,
-                                         llvm::Release);
+      case 3: // memory_order_release
+        Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
+                                         llvm::AtomicOrdering::Release);
         break;
-      case 4:  // memory_order_acq_rel
-        Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg,
-                                         Ptr, NewVal,
-                                         llvm::AcquireRelease);
+      case 4: // memory_order_acq_rel
+
+        Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
+                                         llvm::AtomicOrdering::AcquireRelease);
         break;
-      case 5:  // memory_order_seq_cst
-        Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg,
-                                         Ptr, NewVal,
-                                         llvm::SequentiallyConsistent);
+      case 5: // memory_order_seq_cst
+        Result = Builder.CreateAtomicRMW(
+            llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
+            llvm::AtomicOrdering::SequentiallyConsistent);
         break;
       }
       Result->setVolatile(Volatile);
@@ -1360,9 +1448,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
       createBasicBlock("seqcst", CurFn)
     };
     llvm::AtomicOrdering Orders[5] = {
-      llvm::Monotonic, llvm::Acquire, llvm::Release,
-      llvm::AcquireRelease, llvm::SequentiallyConsistent
-    };
+        llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Acquire,
+        llvm::AtomicOrdering::Release, llvm::AtomicOrdering::AcquireRelease,
+        llvm::AtomicOrdering::SequentiallyConsistent};
 
     Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
     llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]);
@@ -1406,13 +1494,13 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
       switch (ord) {
       case 0:  // memory_order_relaxed
       default: // invalid order
-        Store->setOrdering(llvm::Monotonic);
+        Store->setOrdering(llvm::AtomicOrdering::Monotonic);
         break;
       case 3:  // memory_order_release
-        Store->setOrdering(llvm::Release);
+        Store->setOrdering(llvm::AtomicOrdering::Release);
         break;
       case 5:  // memory_order_seq_cst
-        Store->setOrdering(llvm::SequentiallyConsistent);
+        Store->setOrdering(llvm::AtomicOrdering::SequentiallyConsistent);
         break;
       }
       return RValue::get(nullptr);
@@ -1426,8 +1514,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
       createBasicBlock("seqcst", CurFn)
     };
     llvm::AtomicOrdering Orders[3] = {
-      llvm::Monotonic, llvm::Release, llvm::SequentiallyConsistent
-    };
+        llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Release,
+        llvm::AtomicOrdering::SequentiallyConsistent};
 
     Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
     llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]);
@@ -1466,16 +1554,17 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
         break;
       case 1:  // memory_order_consume
       case 2:  // memory_order_acquire
-        Builder.CreateFence(llvm::Acquire, Scope);
+        Builder.CreateFence(llvm::AtomicOrdering::Acquire, Scope);
         break;
       case 3:  // memory_order_release
-        Builder.CreateFence(llvm::Release, Scope);
+        Builder.CreateFence(llvm::AtomicOrdering::Release, Scope);
         break;
       case 4:  // memory_order_acq_rel
-        Builder.CreateFence(llvm::AcquireRelease, Scope);
+        Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, Scope);
         break;
       case 5:  // memory_order_seq_cst
-        Builder.CreateFence(llvm::SequentiallyConsistent, Scope);
+        Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
+                            Scope);
         break;
       }
       return RValue::get(nullptr);
@@ -1492,23 +1581,23 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
     llvm::SwitchInst *SI = Builder.CreateSwitch(Order, ContBB);
 
     Builder.SetInsertPoint(AcquireBB);
-    Builder.CreateFence(llvm::Acquire, Scope);
+    Builder.CreateFence(llvm::AtomicOrdering::Acquire, Scope);
     Builder.CreateBr(ContBB);
     SI->addCase(Builder.getInt32(1), AcquireBB);
     SI->addCase(Builder.getInt32(2), AcquireBB);
 
     Builder.SetInsertPoint(ReleaseBB);
-    Builder.CreateFence(llvm::Release, Scope);
+    Builder.CreateFence(llvm::AtomicOrdering::Release, Scope);
     Builder.CreateBr(ContBB);
     SI->addCase(Builder.getInt32(3), ReleaseBB);
 
     Builder.SetInsertPoint(AcqRelBB);
-    Builder.CreateFence(llvm::AcquireRelease, Scope);
+    Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, Scope);
     Builder.CreateBr(ContBB);
     SI->addCase(Builder.getInt32(4), AcqRelBB);
 
     Builder.SetInsertPoint(SeqCstBB);
-    Builder.CreateFence(llvm::SequentiallyConsistent, Scope);
+    Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, Scope);
     Builder.CreateBr(ContBB);
     SI->addCase(Builder.getInt32(5), SeqCstBB);
 
@@ -1794,7 +1883,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
       break;
     }
 
-    
+
     llvm::Value *Carry;
     llvm::Value *Sum = EmitOverflowIntrinsic(*this, IntrinsicId, X, Y, Carry);
     Builder.CreateStore(Sum, SumOutPtr);
@@ -1839,9 +1928,10 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
     llvm::Value *Comparand =
       Builder.CreatePtrToInt(EmitScalarExpr(E->getArg(2)), IntType);
 
-    auto Result = Builder.CreateAtomicCmpXchg(Destination, Comparand, Exchange,
-                                              SequentiallyConsistent,
-                                              SequentiallyConsistent);
+    auto Result =
+        Builder.CreateAtomicCmpXchg(Destination, Comparand, Exchange,
+                                    AtomicOrdering::SequentiallyConsistent,
+                                    AtomicOrdering::SequentiallyConsistent);
     Result->setVolatile(true);
 
     return RValue::get(Builder.CreateIntToPtr(Builder.CreateExtractValue(Result,
@@ -1853,44 +1943,47 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
         EmitScalarExpr(E->getArg(0)),
         EmitScalarExpr(E->getArg(2)),
         EmitScalarExpr(E->getArg(1)),
-        SequentiallyConsistent,
-        SequentiallyConsistent);
+        AtomicOrdering::SequentiallyConsistent,
+        AtomicOrdering::SequentiallyConsistent);
       CXI->setVolatile(true);
       return RValue::get(Builder.CreateExtractValue(CXI, 0));
   }
   case Builtin::BI_InterlockedIncrement: {
+    llvm::Type *IntTy = ConvertType(E->getType());
     AtomicRMWInst *RMWI = Builder.CreateAtomicRMW(
       AtomicRMWInst::Add,
       EmitScalarExpr(E->getArg(0)),
-      ConstantInt::get(Int32Ty, 1),
-      llvm::SequentiallyConsistent);
+      ConstantInt::get(IntTy, 1),
+      llvm::AtomicOrdering::SequentiallyConsistent);
     RMWI->setVolatile(true);
-    return RValue::get(Builder.CreateAdd(RMWI, ConstantInt::get(Int32Ty, 1)));
+    return RValue::get(Builder.CreateAdd(RMWI, ConstantInt::get(IntTy, 1)));
   }
   case Builtin::BI_InterlockedDecrement: {
+    llvm::Type *IntTy = ConvertType(E->getType());
     AtomicRMWInst *RMWI = Builder.CreateAtomicRMW(
       AtomicRMWInst::Sub,
       EmitScalarExpr(E->getArg(0)),
-      ConstantInt::get(Int32Ty, 1),
-      llvm::SequentiallyConsistent);
+      ConstantInt::get(IntTy, 1),
+      llvm::AtomicOrdering::SequentiallyConsistent);
     RMWI->setVolatile(true);
-    return RValue::get(Builder.CreateSub(RMWI, ConstantInt::get(Int32Ty, 1)));
+    return RValue::get(Builder.CreateSub(RMWI, ConstantInt::get(IntTy, 1)));
   }
   case Builtin::BI_InterlockedExchangeAdd: {
     AtomicRMWInst *RMWI = Builder.CreateAtomicRMW(
       AtomicRMWInst::Add,
       EmitScalarExpr(E->getArg(0)),
       EmitScalarExpr(E->getArg(1)),
-      llvm::SequentiallyConsistent);
+      llvm::AtomicOrdering::SequentiallyConsistent);
     RMWI->setVolatile(true);
     return RValue::get(RMWI);
   }
   case Builtin::BI__readfsdword: {
+    llvm::Type *IntTy = ConvertType(E->getType());
     Value *IntToPtr =
       Builder.CreateIntToPtr(EmitScalarExpr(E->getArg(0)),
-                             llvm::PointerType::get(CGM.Int32Ty, 257));
+                             llvm::PointerType::get(IntTy, 257));
     LoadInst *Load =
-        Builder.CreateAlignedLoad(IntToPtr, /*Align=*/4, /*isVolatile=*/true);
+        Builder.CreateDefaultAlignedLoad(IntToPtr, /*isVolatile=*/true);
     return RValue::get(Load);
   }
 
@@ -1963,6 +2056,323 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
       return RValue::get(llvm::ConstantExpr::getBitCast(GV, CGM.Int8PtrTy));
     break;
   }
+
+  // OpenCL v2.0 s6.13.16.2, Built-in pipe read and write functions
+  case Builtin::BIread_pipe:
+  case Builtin::BIwrite_pipe: {
+    Value *Arg0 = EmitScalarExpr(E->getArg(0)),
+          *Arg1 = EmitScalarExpr(E->getArg(1));
+
+    // Type of the generic packet parameter.
+    unsigned GenericAS =
+        getContext().getTargetAddressSpace(LangAS::opencl_generic);
+    llvm::Type *I8PTy = llvm::PointerType::get(
+        llvm::Type::getInt8Ty(getLLVMContext()), GenericAS);
+
+    // Testing which overloaded version we should generate the call for.
+    if (2U == E->getNumArgs()) {
+      const char *Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_2"
+                                                             : "__write_pipe_2";
+      // Creating a generic function type to be able to call with any builtin or
+      // user defined type.
+      llvm::Type *ArgTys[] = {Arg0->getType(), I8PTy};
+      llvm::FunctionType *FTy = llvm::FunctionType::get(
+          Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
+      Value *BCast = Builder.CreatePointerCast(Arg1, I8PTy);
+      return RValue::get(Builder.CreateCall(
+          CGM.CreateRuntimeFunction(FTy, Name), {Arg0, BCast}));
+    } else {
+      assert(4 == E->getNumArgs() &&
+             "Illegal number of parameters to pipe function");
+      const char *Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_4"
+                                                             : "__write_pipe_4";
+
+      llvm::Type *ArgTys[] = {Arg0->getType(), Arg1->getType(), Int32Ty, I8PTy};
+      Value *Arg2 = EmitScalarExpr(E->getArg(2)),
+            *Arg3 = EmitScalarExpr(E->getArg(3));
+      llvm::FunctionType *FTy = llvm::FunctionType::get(
+          Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
+      Value *BCast = Builder.CreatePointerCast(Arg3, I8PTy);
+      // We know the third argument is an integer type, but we may need to cast
+      // it to i32.
+      if (Arg2->getType() != Int32Ty)
+        Arg2 = Builder.CreateZExtOrTrunc(Arg2, Int32Ty);
+      return RValue::get(Builder.CreateCall(
+          CGM.CreateRuntimeFunction(FTy, Name), {Arg0, Arg1, Arg2, BCast}));
+    }
+  }
+  // OpenCL v2.0 s6.13.16 ,s9.17.3.5 - Built-in pipe reserve read and write
+  // functions
+  case Builtin::BIreserve_read_pipe:
+  case Builtin::BIreserve_write_pipe:
+  case Builtin::BIwork_group_reserve_read_pipe:
+  case Builtin::BIwork_group_reserve_write_pipe:
+  case Builtin::BIsub_group_reserve_read_pipe:
+  case Builtin::BIsub_group_reserve_write_pipe: {
+    // Composing the mangled name for the function.
+    const char *Name;
+    if (BuiltinID == Builtin::BIreserve_read_pipe)
+      Name = "__reserve_read_pipe";
+    else if (BuiltinID == Builtin::BIreserve_write_pipe)
+      Name = "__reserve_write_pipe";
+    else if (BuiltinID == Builtin::BIwork_group_reserve_read_pipe)
+      Name = "__work_group_reserve_read_pipe";
+    else if (BuiltinID == Builtin::BIwork_group_reserve_write_pipe)
+      Name = "__work_group_reserve_write_pipe";
+    else if (BuiltinID == Builtin::BIsub_group_reserve_read_pipe)
+      Name = "__sub_group_reserve_read_pipe";
+    else
+      Name = "__sub_group_reserve_write_pipe";
+
+    Value *Arg0 = EmitScalarExpr(E->getArg(0)),
+          *Arg1 = EmitScalarExpr(E->getArg(1));
+    llvm::Type *ReservedIDTy = ConvertType(getContext().OCLReserveIDTy);
+
+    // Building the generic function prototype.
+    llvm::Type *ArgTys[] = {Arg0->getType(), Int32Ty};
+    llvm::FunctionType *FTy = llvm::FunctionType::get(
+        ReservedIDTy, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
+    // We know the second argument is an integer type, but we may need to cast
+    // it to i32.
+    if (Arg1->getType() != Int32Ty)
+      Arg1 = Builder.CreateZExtOrTrunc(Arg1, Int32Ty);
+    return RValue::get(
+        Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), {Arg0, Arg1}));
+  }
+  // OpenCL v2.0 s6.13.16, s9.17.3.5 - Built-in pipe commit read and write
+  // functions
+  case Builtin::BIcommit_read_pipe:
+  case Builtin::BIcommit_write_pipe:
+  case Builtin::BIwork_group_commit_read_pipe:
+  case Builtin::BIwork_group_commit_write_pipe:
+  case Builtin::BIsub_group_commit_read_pipe:
+  case Builtin::BIsub_group_commit_write_pipe: {
+    const char *Name;
+    if (BuiltinID == Builtin::BIcommit_read_pipe)
+      Name = "__commit_read_pipe";
+    else if (BuiltinID == Builtin::BIcommit_write_pipe)
+      Name = "__commit_write_pipe";
+    else if (BuiltinID == Builtin::BIwork_group_commit_read_pipe)
+      Name = "__work_group_commit_read_pipe";
+    else if (BuiltinID == Builtin::BIwork_group_commit_write_pipe)
+      Name = "__work_group_commit_write_pipe";
+    else if (BuiltinID == Builtin::BIsub_group_commit_read_pipe)
+      Name = "__sub_group_commit_read_pipe";
+    else
+      Name = "__sub_group_commit_write_pipe";
+
+    Value *Arg0 = EmitScalarExpr(E->getArg(0)),
+          *Arg1 = EmitScalarExpr(E->getArg(1));
+
+    // Building the generic function prototype.
+    llvm::Type *ArgTys[] = {Arg0->getType(), Arg1->getType()};
+    llvm::FunctionType *FTy =
+        llvm::FunctionType::get(llvm::Type::getVoidTy(getLLVMContext()),
+                                llvm::ArrayRef<llvm::Type *>(ArgTys), false);
+
+    return RValue::get(
+        Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), {Arg0, Arg1}));
+  }
+  // OpenCL v2.0 s6.13.16.4 Built-in pipe query functions
+  case Builtin::BIget_pipe_num_packets:
+  case Builtin::BIget_pipe_max_packets: {
+    const char *Name;
+    if (BuiltinID == Builtin::BIget_pipe_num_packets)
+      Name = "__get_pipe_num_packets";
+    else
+      Name = "__get_pipe_max_packets";
+
+    // Building the generic function prototype.
+    Value *Arg0 = EmitScalarExpr(E->getArg(0));
+    llvm::Type *ArgTys[] = {Arg0->getType()};
+    llvm::FunctionType *FTy = llvm::FunctionType::get(
+        Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
+
+    return RValue::get(
+        Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), {Arg0}));
+  }
+
+  // OpenCL v2.0 s6.13.9 - Address space qualifier functions.
+  case Builtin::BIto_global:
+  case Builtin::BIto_local:
+  case Builtin::BIto_private: {
+    auto Arg0 = EmitScalarExpr(E->getArg(0));
+    auto NewArgT = llvm::PointerType::get(Int8Ty,
+      CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic));
+    auto NewRetT = llvm::PointerType::get(Int8Ty,
+      CGM.getContext().getTargetAddressSpace(
+        E->getType()->getPointeeType().getAddressSpace()));
+    auto FTy = llvm::FunctionType::get(NewRetT, {NewArgT}, false);
+    llvm::Value *NewArg;
+    if (Arg0->getType()->getPointerAddressSpace() !=
+        NewArgT->getPointerAddressSpace())
+      NewArg = Builder.CreateAddrSpaceCast(Arg0, NewArgT);
+    else
+      NewArg = Builder.CreateBitOrPointerCast(Arg0, NewArgT);
+    auto NewName = std::string("__") + E->getDirectCallee()->getName().str();
+    auto NewCall =
+        Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, NewName), {NewArg});
+    return RValue::get(Builder.CreateBitOrPointerCast(NewCall,
+      ConvertType(E->getType())));
+  }
+
+  // OpenCL v2.0, s6.13.17 - Enqueue kernel function.
+  // It contains four different overload formats specified in Table 6.13.17.1.
+  case Builtin::BIenqueue_kernel: {
+    StringRef Name; // Generated function call name
+    unsigned NumArgs = E->getNumArgs();
+
+    llvm::Type *QueueTy = ConvertType(getContext().OCLQueueTy);
+    llvm::Type *RangeTy = ConvertType(getContext().OCLNDRangeTy);
+
+    llvm::Value *Queue = EmitScalarExpr(E->getArg(0));
+    llvm::Value *Flags = EmitScalarExpr(E->getArg(1));
+    llvm::Value *Range = EmitScalarExpr(E->getArg(2));
+
+    if (NumArgs == 4) {
+      // The most basic form of the call with parameters:
+      // queue_t, kernel_enqueue_flags_t, ndrange_t, block(void)
+      Name = "__enqueue_kernel_basic";
+      llvm::Type *ArgTys[] = {QueueTy, Int32Ty, RangeTy, Int8PtrTy};
+      llvm::FunctionType *FTy = llvm::FunctionType::get(
+          Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys, 4), false);
+
+      llvm::Value *Block =
+          Builder.CreateBitCast(EmitScalarExpr(E->getArg(3)), Int8PtrTy);
+
+      return RValue::get(Builder.CreateCall(
+          CGM.CreateRuntimeFunction(FTy, Name), {Queue, Flags, Range, Block}));
+    }
+    assert(NumArgs >= 5 && "Invalid enqueue_kernel signature");
+
+    // Could have events and/or vaargs.
+    if (E->getArg(3)->getType()->isBlockPointerType()) {
+      // No events passed, but has variadic arguments.
+      Name = "__enqueue_kernel_vaargs";
+      llvm::Value *Block =
+          Builder.CreateBitCast(EmitScalarExpr(E->getArg(3)), Int8PtrTy);
+      // Create a vector of the arguments, as well as a constant value to
+      // express to the runtime the number of variadic arguments.
+      std::vector<llvm::Value *> Args = {Queue, Flags, Range, Block,
+                                         ConstantInt::get(IntTy, NumArgs - 4)};
+      std::vector<llvm::Type *> ArgTys = {QueueTy, IntTy, RangeTy, Int8PtrTy,
+                                          IntTy};
+
+      // Add the variadics.
+      for (unsigned I = 4; I < NumArgs; ++I) {
+        llvm::Value *ArgSize = EmitScalarExpr(E->getArg(I));
+        unsigned TypeSizeInBytes =
+            getContext()
+                .getTypeSizeInChars(E->getArg(I)->getType())
+                .getQuantity();
+        Args.push_back(TypeSizeInBytes < 4
+                           ? Builder.CreateZExt(ArgSize, Int32Ty)
+                           : ArgSize);
+      }
+
+      llvm::FunctionType *FTy = llvm::FunctionType::get(
+          Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), true);
+      return RValue::get(
+          Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
+                             llvm::ArrayRef<llvm::Value *>(Args)));
+    }
+    // Any calls now have event arguments passed.
+    if (NumArgs >= 7) {
+      llvm::Type *EventTy = ConvertType(getContext().OCLClkEventTy);
+      unsigned AS4 =
+          E->getArg(4)->getType()->isArrayType()
+              ? E->getArg(4)->getType().getAddressSpace()
+              : E->getArg(4)->getType()->getPointeeType().getAddressSpace();
+      llvm::Type *EventPtrAS4Ty =
+          EventTy->getPointerTo(CGM.getContext().getTargetAddressSpace(AS4));
+      unsigned AS5 =
+          E->getArg(5)->getType()->getPointeeType().getAddressSpace();
+      llvm::Type *EventPtrAS5Ty =
+          EventTy->getPointerTo(CGM.getContext().getTargetAddressSpace(AS5));
+
+      llvm::Value *NumEvents = EmitScalarExpr(E->getArg(3));
+      llvm::Value *EventList =
+          E->getArg(4)->getType()->isArrayType()
+              ? EmitArrayToPointerDecay(E->getArg(4)).getPointer()
+              : EmitScalarExpr(E->getArg(4));
+      llvm::Value *ClkEvent = EmitScalarExpr(E->getArg(5));
+      llvm::Value *Block =
+          Builder.CreateBitCast(EmitScalarExpr(E->getArg(6)), Int8PtrTy);
+
+      std::vector<llvm::Type *> ArgTys = {
+          QueueTy,       Int32Ty,       RangeTy,  Int32Ty,
+          EventPtrAS4Ty, EventPtrAS5Ty, Int8PtrTy};
+      std::vector<llvm::Value *> Args = {Queue,     Flags,    Range, NumEvents,
+                                         EventList, ClkEvent, Block};
+
+      if (NumArgs == 7) {
+        // Has events but no variadics.
+        Name = "__enqueue_kernel_basic_events";
+        llvm::FunctionType *FTy = llvm::FunctionType::get(
+            Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
+        return RValue::get(
+            Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
+                               llvm::ArrayRef<llvm::Value *>(Args)));
+      }
+      // Has event info and variadics
+      // Pass the number of variadics to the runtime function too.
+      Args.push_back(ConstantInt::get(Int32Ty, NumArgs - 7));
+      ArgTys.push_back(Int32Ty);
+      Name = "__enqueue_kernel_events_vaargs";
+
+      // Add the variadics.
+      for (unsigned I = 7; I < NumArgs; ++I) {
+        llvm::Value *ArgSize = EmitScalarExpr(E->getArg(I));
+        unsigned TypeSizeInBytes =
+            getContext()
+                .getTypeSizeInChars(E->getArg(I)->getType())
+                .getQuantity();
+        Args.push_back(TypeSizeInBytes < 4
+                           ? Builder.CreateZExt(ArgSize, Int32Ty)
+                           : ArgSize);
+      }
+      llvm::FunctionType *FTy = llvm::FunctionType::get(
+          Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), true);
+      return RValue::get(
+          Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
+                             llvm::ArrayRef<llvm::Value *>(Args)));
+    }
+  }
+  // OpenCL v2.0 s6.13.17.6 - Kernel query functions need bitcast of block
+  // parameter.
+  case Builtin::BIget_kernel_work_group_size: {
+    Value *Arg = EmitScalarExpr(E->getArg(0));
+    Arg = Builder.CreateBitCast(Arg, Int8PtrTy);
+    return RValue::get(
+        Builder.CreateCall(CGM.CreateRuntimeFunction(
+                               llvm::FunctionType::get(IntTy, Int8PtrTy, false),
+                               "__get_kernel_work_group_size_impl"),
+                           Arg));
+  }
+  case Builtin::BIget_kernel_preferred_work_group_size_multiple: {
+    Value *Arg = EmitScalarExpr(E->getArg(0));
+    Arg = Builder.CreateBitCast(Arg, Int8PtrTy);
+    return RValue::get(Builder.CreateCall(
+        CGM.CreateRuntimeFunction(
+            llvm::FunctionType::get(IntTy, Int8PtrTy, false),
+            "__get_kernel_preferred_work_group_multiple_impl"),
+        Arg));
+  }
+  case Builtin::BIprintf:
+    if (getLangOpts().CUDA && getLangOpts().CUDAIsDevice)
+      return EmitCUDADevicePrintfCallExpr(E, ReturnValue);
+    break;
+  case Builtin::BI__builtin_canonicalize:
+  case Builtin::BI__builtin_canonicalizef:
+  case Builtin::BI__builtin_canonicalizel:
+    return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::canonicalize));
+
+  case Builtin::BI__builtin_thread_pointer: {
+    if (!getContext().getTargetInfo().isTLSSupported())
+      CGM.ErrorUnsupported(E, "__builtin_thread_pointer");
+    // Fall through - it's already mapped to the intrinsic by GCCBuiltin.
+    break;
+  }
   }
 
   // If this is an alias for a lib function (e.g. __builtin_sin), emit
@@ -2155,7 +2565,7 @@ static llvm::VectorType *GetFloatNeonType(CodeGenFunction *CGF,
 }
 
 Value *CodeGenFunction::EmitNeonSplat(Value *V, Constant *C) {
-  unsigned nElts = cast<llvm::VectorType>(V->getType())->getNumElements();
+  unsigned nElts = V->getType()->getVectorNumElements();
   Value* SV = llvm::ConstantVector::getSplat(nElts, C);
   return Builder.CreateShuffleVector(V, V, SV, "lane");
 }
@@ -3073,14 +3483,13 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
   case NEON::BI__builtin_neon_vext_v:
   case NEON::BI__builtin_neon_vextq_v: {
     int CV = cast<ConstantInt>(Ops[2])->getSExtValue();
-    SmallVector<Constant*, 16> Indices;
+    SmallVector<uint32_t, 16> Indices;
     for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
-      Indices.push_back(ConstantInt::get(Int32Ty, i+CV));
+      Indices.push_back(i+CV);
 
     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
-    Value *SV = llvm::ConstantVector::get(Indices);
-    return Builder.CreateShuffleVector(Ops[0], Ops[1], SV, "vext");
+    return Builder.CreateShuffleVector(Ops[0], Ops[1], Indices, "vext");
   }
   case NEON::BI__builtin_neon_vfma_v:
   case NEON::BI__builtin_neon_vfmaq_v: {
@@ -3278,14 +3687,13 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
     Value *SV = nullptr;
 
     for (unsigned vi = 0; vi != 2; ++vi) {
-      SmallVector<Constant*, 16> Indices;
+      SmallVector<uint32_t, 16> Indices;
       for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
-        Indices.push_back(Builder.getInt32(i+vi));
-        Indices.push_back(Builder.getInt32(i+e+vi));
+        Indices.push_back(i+vi);
+        Indices.push_back(i+e+vi);
       }
       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
-      SV = llvm::ConstantVector::get(Indices);
-      SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vtrn");
+      SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn");
       SV = Builder.CreateDefaultAlignedStore(SV, Addr);
     }
     return SV;
@@ -3307,13 +3715,12 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
     Value *SV = nullptr;
 
     for (unsigned vi = 0; vi != 2; ++vi) {
-      SmallVector<Constant*, 16> Indices;
+      SmallVector<uint32_t, 16> Indices;
       for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
-        Indices.push_back(ConstantInt::get(Int32Ty, 2*i+vi));
+        Indices.push_back(2*i+vi);
 
       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
-      SV = llvm::ConstantVector::get(Indices);
-      SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vuzp");
+      SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp");
       SV = Builder.CreateDefaultAlignedStore(SV, Addr);
     }
     return SV;
@@ -3326,14 +3733,13 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
     Value *SV = nullptr;
 
     for (unsigned vi = 0; vi != 2; ++vi) {
-      SmallVector<Constant*, 16> Indices;
+      SmallVector<uint32_t, 16> Indices;
       for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
-        Indices.push_back(ConstantInt::get(Int32Ty, (i + vi*e) >> 1));
-        Indices.push_back(ConstantInt::get(Int32Ty, ((i + vi*e) >> 1)+e));
+        Indices.push_back((i + vi*e) >> 1);
+        Indices.push_back(((i + vi*e) >> 1)+e);
       }
       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
-      SV = llvm::ConstantVector::get(Indices);
-      SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vzip");
+      SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip");
       SV = Builder.CreateDefaultAlignedStore(SV, Addr);
     }
     return SV;
@@ -3381,19 +3787,19 @@ static Value *packTBLDVectorList(CodeGenFunction &CGF, ArrayRef<Value *> Ops,
   if (ExtOp)
     TblOps.push_back(ExtOp);
 
-  // Build a vector containing sequential number like (0, 1, 2, ..., 15)  
-  SmallVector<Constant*, 16> Indices;
+  // Build a vector containing sequential number like (0, 1, 2, ..., 15)
+  SmallVector<uint32_t, 16> Indices;
   llvm::VectorType *TblTy = cast<llvm::VectorType>(Ops[0]->getType());
   for (unsigned i = 0, e = TblTy->getNumElements(); i != e; ++i) {
-    Indices.push_back(ConstantInt::get(CGF.Int32Ty, 2*i));
-    Indices.push_back(ConstantInt::get(CGF.Int32Ty, 2*i+1));
+    Indices.push_back(2*i);
+    Indices.push_back(2*i+1);
   }
-  Value *SV = llvm::ConstantVector::get(Indices);
 
   int PairPos = 0, End = Ops.size() - 1;
   while (PairPos < End) {
     TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos],
-                                                     Ops[PairPos+1], SV, Name));
+                                                     Ops[PairPos+1], Indices,
+                                                     Name));
     PairPos += 2;
   }
 
@@ -3402,13 +3808,13 @@ static Value *packTBLDVectorList(CodeGenFunction &CGF, ArrayRef<Value *> Ops,
   if (PairPos == End) {
     Value *ZeroTbl = ConstantAggregateZero::get(TblTy);
     TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos],
-                                                     ZeroTbl, SV, Name));
+                                                     ZeroTbl, Indices, Name));
   }
 
   Function *TblF;
   TblOps.push_back(IndexOp);
   TblF = CGF.CGM.getIntrinsic(IntID, ResTy);
-  
+
   return CGF.EmitNeonCall(TblF, TblOps, Name);
 }
 
@@ -3452,7 +3858,9 @@ Value *CodeGenFunction::GetValueForARMHint(unsigned BuiltinID) {
 static Value *EmitSpecialRegisterBuiltin(CodeGenFunction &CGF,
                                          const CallExpr *E,
                                          llvm::Type *RegisterType,
-                                         llvm::Type *ValueType, bool IsRead) {
+                                         llvm::Type *ValueType,
+                                         bool IsRead,
+                                         StringRef SysReg = "") {
   // write and register intrinsics only support 32 and 64 bit operations.
   assert((RegisterType->isIntegerTy(32) || RegisterType->isIntegerTy(64))
           && "Unsupported size for register.");
@@ -3461,8 +3869,10 @@ static Value *EmitSpecialRegisterBuiltin(CodeGenFunction &CGF,
   CodeGen::CodeGenModule &CGM = CGF.CGM;
   LLVMContext &Context = CGM.getLLVMContext();
 
-  const Expr *SysRegStrExpr = E->getArg(0)->IgnoreParenCasts();
-  StringRef SysReg = cast<StringLiteral>(SysRegStrExpr)->getString();
+  if (SysReg.empty()) {
+    const Expr *SysRegStrExpr = E->getArg(0)->IgnoreParenCasts();
+    SysReg = cast<StringLiteral>(SysRegStrExpr)->getString();
+  }
 
   llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysReg) };
   llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
@@ -3602,6 +4012,74 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
     return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
   }
 
+  if (BuiltinID == ARM::BI__builtin_arm_mcrr ||
+      BuiltinID == ARM::BI__builtin_arm_mcrr2) {
+    Function *F;
+
+    switch (BuiltinID) {
+    default: llvm_unreachable("unexpected builtin");
+    case ARM::BI__builtin_arm_mcrr:
+      F = CGM.getIntrinsic(Intrinsic::arm_mcrr);
+      break;
+    case ARM::BI__builtin_arm_mcrr2:
+      F = CGM.getIntrinsic(Intrinsic::arm_mcrr2);
+      break;
+    }
+
+    // MCRR{2} instruction has 5 operands but
+    // the intrinsic has 4 because Rt and Rt2
+    // are represented as a single unsigned 64
+    // bit integer in the intrinsic definition
+    // but internally it's represented as 2 32
+    // bit integers.
+
+    Value *Coproc = EmitScalarExpr(E->getArg(0));
+    Value *Opc1 = EmitScalarExpr(E->getArg(1));
+    Value *RtAndRt2 = EmitScalarExpr(E->getArg(2));
+    Value *CRm = EmitScalarExpr(E->getArg(3));
+
+    Value *C1 = llvm::ConstantInt::get(Int64Ty, 32);
+    Value *Rt = Builder.CreateTruncOrBitCast(RtAndRt2, Int32Ty);
+    Value *Rt2 = Builder.CreateLShr(RtAndRt2, C1);
+    Rt2 = Builder.CreateTruncOrBitCast(Rt2, Int32Ty);
+
+    return Builder.CreateCall(F, {Coproc, Opc1, Rt, Rt2, CRm});
+  }
+
+  if (BuiltinID == ARM::BI__builtin_arm_mrrc ||
+      BuiltinID == ARM::BI__builtin_arm_mrrc2) {
+    Function *F;
+
+    switch (BuiltinID) {
+    default: llvm_unreachable("unexpected builtin");
+    case ARM::BI__builtin_arm_mrrc:
+      F = CGM.getIntrinsic(Intrinsic::arm_mrrc);
+      break;
+    case ARM::BI__builtin_arm_mrrc2:
+      F = CGM.getIntrinsic(Intrinsic::arm_mrrc2);
+      break;
+    }
+
+    Value *Coproc = EmitScalarExpr(E->getArg(0));
+    Value *Opc1 = EmitScalarExpr(E->getArg(1));
+    Value *CRm  = EmitScalarExpr(E->getArg(2));
+    Value *RtAndRt2 = Builder.CreateCall(F, {Coproc, Opc1, CRm});
+
+    // Returns an unsigned 64 bit integer, represented
+    // as two 32 bit integers.
+
+    Value *Rt = Builder.CreateExtractValue(RtAndRt2, 1);
+    Value *Rt1 = Builder.CreateExtractValue(RtAndRt2, 0);
+    Rt = Builder.CreateZExt(Rt, Int64Ty);
+    Rt1 = Builder.CreateZExt(Rt1, Int64Ty);
+
+    Value *ShiftCast = llvm::ConstantInt::get(Int64Ty, 32);
+    RtAndRt2 = Builder.CreateShl(Rt, ShiftCast, "shl", true);
+    RtAndRt2 = Builder.CreateOr(RtAndRt2, Rt1);
+
+    return Builder.CreateBitCast(RtAndRt2, ConvertType(E->getType()));
+  }
+
   if (BuiltinID == ARM::BI__builtin_arm_ldrexd ||
       ((BuiltinID == ARM::BI__builtin_arm_ldrex ||
         BuiltinID == ARM::BI__builtin_arm_ldaex) &&
@@ -3914,7 +4392,7 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
   // the first argument, but the LLVM intrinsic expects it as the third one.
   case ARM::BI_MoveToCoprocessor:
   case ARM::BI_MoveToCoprocessor2: {
-    Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI_MoveToCoprocessor ? 
+    Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI_MoveToCoprocessor ?
                                    Intrinsic::arm_mcr : Intrinsic::arm_mcr2);
     return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0],
                                   Ops[3], Ops[4], Ops[5]});
@@ -4478,11 +4956,6 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
     return Builder.CreateCall(F);
   }
 
-  if (BuiltinID == AArch64::BI__builtin_thread_pointer) {
-    Function *F = CGM.getIntrinsic(Intrinsic::aarch64_thread_pointer);
-    return Builder.CreateCall(F);
-  }
-
   // CRC32
   Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
   switch (BuiltinID) {
@@ -5150,22 +5623,6 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
     Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
     return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]});
   }
-  case NEON::BI__builtin_neon_vfms_v:
-  case NEON::BI__builtin_neon_vfmsq_v: {  // Only used for FP types
-    // FIXME: probably remove when we no longer support aarch64_simd.h
-    // (arm_neon.h delegates to vfma).
-
-    // The ARM builtins (and instructions) have the addend as the first
-    // operand, but the 'fma' intrinsics have it last. Swap it around here.
-    Value *Subtrahend = Ops[0];
-    Value *Multiplicand = Ops[2];
-    Ops[0] = Multiplicand;
-    Ops[2] = Subtrahend;
-    Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
-    Ops[1] = Builder.CreateFNeg(Ops[1]);
-    Int = Intrinsic::fma;
-    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "fmls");
-  }
   case NEON::BI__builtin_neon_vmull_v:
     // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
     Int = usgn ? Intrinsic::aarch64_neon_umull : Intrinsic::aarch64_neon_smull;
@@ -5988,14 +6445,13 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
     Value *SV = nullptr;
 
     for (unsigned vi = 0; vi != 2; ++vi) {
-      SmallVector<Constant*, 16> Indices;
+      SmallVector<uint32_t, 16> Indices;
       for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
-        Indices.push_back(ConstantInt::get(Int32Ty, i+vi));
-        Indices.push_back(ConstantInt::get(Int32Ty, i+e+vi));
+        Indices.push_back(i+vi);
+        Indices.push_back(i+e+vi);
       }
       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
-      SV = llvm::ConstantVector::get(Indices);
-      SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vtrn");
+      SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn");
       SV = Builder.CreateDefaultAlignedStore(SV, Addr);
     }
     return SV;
@@ -6008,13 +6464,12 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
     Value *SV = nullptr;
 
     for (unsigned vi = 0; vi != 2; ++vi) {
-      SmallVector<Constant*, 16> Indices;
+      SmallVector<uint32_t, 16> Indices;
       for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
-        Indices.push_back(ConstantInt::get(Int32Ty, 2*i+vi));
+        Indices.push_back(2*i+vi);
 
       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
-      SV = llvm::ConstantVector::get(Indices);
-      SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vuzp");
+      SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp");
       SV = Builder.CreateDefaultAlignedStore(SV, Addr);
     }
     return SV;
@@ -6027,14 +6482,13 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
     Value *SV = nullptr;
 
     for (unsigned vi = 0; vi != 2; ++vi) {
-      SmallVector<Constant*, 16> Indices;
+      SmallVector<uint32_t, 16> Indices;
       for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
-        Indices.push_back(ConstantInt::get(Int32Ty, (i + vi*e) >> 1));
-        Indices.push_back(ConstantInt::get(Int32Ty, ((i + vi*e) >> 1)+e));
+        Indices.push_back((i + vi*e) >> 1);
+        Indices.push_back(((i + vi*e) >> 1)+e);
       }
       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
-      SV = llvm::ConstantVector::get(Indices);
-      SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vzip");
+      SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip");
       SV = Builder.CreateDefaultAlignedStore(SV, Addr);
     }
     return SV;
@@ -6110,6 +6564,118 @@ BuildVector(ArrayRef<llvm::Value*> Ops) {
   return Result;
 }
 
+// Convert the mask from an integer type to a vector of i1.
+static Value *getMaskVecValue(CodeGenFunction &CGF, Value *Mask,
+                              unsigned NumElts) {
+
+  llvm::VectorType *MaskTy = llvm::VectorType::get(CGF.Builder.getInt1Ty(),
+                         cast<IntegerType>(Mask->getType())->getBitWidth());
+  Value *MaskVec = CGF.Builder.CreateBitCast(Mask, MaskTy);
+
+  // If we have less than 8 elements, then the starting mask was an i8 and
+  // we need to extract down to the right number of elements.
+  if (NumElts < 8) {
+    uint32_t Indices[4];
+    for (unsigned i = 0; i != NumElts; ++i)
+      Indices[i] = i;
+    MaskVec = CGF.Builder.CreateShuffleVector(MaskVec, MaskVec,
+                                             makeArrayRef(Indices, NumElts),
+                                             "extract");
+  }
+  return MaskVec;
+}
+
+static Value *EmitX86MaskedStore(CodeGenFunction &CGF,
+                                 SmallVectorImpl<Value *> &Ops,
+                                 unsigned Align) {
+  // Cast the pointer to right type.
+  Ops[0] = CGF.Builder.CreateBitCast(Ops[0],
+                               llvm::PointerType::getUnqual(Ops[1]->getType()));
+
+  // If the mask is all ones just emit a regular store.
+  if (const auto *C = dyn_cast<Constant>(Ops[2]))
+    if (C->isAllOnesValue())
+      return CGF.Builder.CreateAlignedStore(Ops[1], Ops[0], Align);
+
+  Value *MaskVec = getMaskVecValue(CGF, Ops[2],
+                                   Ops[1]->getType()->getVectorNumElements());
+
+  return CGF.Builder.CreateMaskedStore(Ops[1], Ops[0], Align, MaskVec);
+}
+
+static Value *EmitX86MaskedLoad(CodeGenFunction &CGF,
+                                SmallVectorImpl<Value *> &Ops, unsigned Align) {
+  // Cast the pointer to right type.
+  Ops[0] = CGF.Builder.CreateBitCast(Ops[0],
+                               llvm::PointerType::getUnqual(Ops[1]->getType()));
+
+  // If the mask is all ones just emit a regular store.
+  if (const auto *C = dyn_cast<Constant>(Ops[2]))
+    if (C->isAllOnesValue())
+      return CGF.Builder.CreateAlignedLoad(Ops[0], Align);
+
+  Value *MaskVec = getMaskVecValue(CGF, Ops[2],
+                                   Ops[1]->getType()->getVectorNumElements());
+
+  return CGF.Builder.CreateMaskedLoad(Ops[0], Align, MaskVec, Ops[1]);
+}
+
+static Value *EmitX86Select(CodeGenFunction &CGF,
+                            Value *Mask, Value *Op0, Value *Op1) {
+
+  // If the mask is all ones just return first argument.
+  if (const auto *C = dyn_cast<Constant>(Mask))
+    if (C->isAllOnesValue())
+      return Op0;
+
+  Mask = getMaskVecValue(CGF, Mask, Op0->getType()->getVectorNumElements());
+
+  return CGF.Builder.CreateSelect(Mask, Op0, Op1);
+}
+
+static Value *EmitX86MaskedCompare(CodeGenFunction &CGF, unsigned CC,
+                                   bool Signed, SmallVectorImpl<Value *> &Ops) {
+  unsigned NumElts = Ops[0]->getType()->getVectorNumElements();
+  Value *Cmp;
+
+  if (CC == 3) {
+    Cmp = Constant::getNullValue(
+                       llvm::VectorType::get(CGF.Builder.getInt1Ty(), NumElts));
+  } else if (CC == 7) {
+    Cmp = Constant::getAllOnesValue(
+                       llvm::VectorType::get(CGF.Builder.getInt1Ty(), NumElts));
+  } else {
+    ICmpInst::Predicate Pred;
+    switch (CC) {
+    default: llvm_unreachable("Unknown condition code");
+    case 0: Pred = ICmpInst::ICMP_EQ;  break;
+    case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
+    case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
+    case 4: Pred = ICmpInst::ICMP_NE;  break;
+    case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
+    case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
+    }
+    Cmp = CGF.Builder.CreateICmp(Pred, Ops[0], Ops[1]);
+  }
+
+  const auto *C = dyn_cast<Constant>(Ops.back());
+  if (!C || !C->isAllOnesValue())
+    Cmp = CGF.Builder.CreateAnd(Cmp, getMaskVecValue(CGF, Ops.back(), NumElts));
+
+  if (NumElts < 8) {
+    uint32_t Indices[8];
+    for (unsigned i = 0; i != NumElts; ++i)
+      Indices[i] = i;
+    for (unsigned i = NumElts; i != 8; ++i)
+      Indices[i] = i % NumElts + NumElts;
+    Cmp = CGF.Builder.CreateShuffleVector(
+        Cmp, llvm::Constant::getNullValue(Cmp->getType()), Indices);
+  }
+  return CGF.Builder.CreateBitCast(Cmp,
+                                   IntegerType::get(CGF.getLLVMContext(),
+                                                    std::max(NumElts, 8U)));
+}
+
 Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
                                            const CallExpr *E) {
   if (BuiltinID == X86::BI__builtin_ms_va_start ||
@@ -6160,6 +6726,31 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
     Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result));
   }
 
+  // These exist so that the builtin that takes an immediate can be bounds
+  // checked by clang to avoid passing bad immediates to the backend. Since
+  // AVX has a larger immediate than SSE we would need separate builtins to
+  // do the different bounds checking. Rather than create a clang specific
+  // SSE only builtin, this implements eight separate builtins to match gcc
+  // implementation.
+  auto getCmpIntrinsicCall = [this, &Ops](Intrinsic::ID ID, unsigned Imm) {
+    Ops.push_back(llvm::ConstantInt::get(Int8Ty, Imm));
+    llvm::Function *F = CGM.getIntrinsic(ID);
+    return Builder.CreateCall(F, Ops);
+  };
+
+  // For the vector forms of FP comparisons, translate the builtins directly to
+  // IR.
+  // TODO: The builtins could be removed if the SSE header files used vector
+  // extension comparisons directly (vector ordered/unordered may need
+  // additional support via __builtin_isnan()).
+  auto getVectorFCmpIR = [this, &Ops](CmpInst::Predicate Pred) {
+    Value *Cmp = Builder.CreateFCmp(Pred, Ops[0], Ops[1]);
+    llvm::VectorType *FPVecTy = cast<llvm::VectorType>(Ops[0]->getType());
+    llvm::VectorType *IntVecTy = llvm::VectorType::getInteger(FPVecTy);
+    Value *Sext = Builder.CreateSExt(Cmp, IntVecTy);
+    return Builder.CreateBitCast(Sext, FPVecTy);
+  };
+
   switch (BuiltinID) {
   default: return nullptr;
   case X86::BI__builtin_cpu_supports: {
@@ -6188,6 +6779,16 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
       AVX512F,
       BMI,
       BMI2,
+      AES,
+      PCLMUL,
+      AVX512VL,
+      AVX512BW,
+      AVX512DQ,
+      AVX512CD,
+      AVX512ER,
+      AVX512PF,
+      AVX512VBMI,
+      AVX512IFMA,
       MAX
     };
 
@@ -6198,6 +6799,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
                               .Case("sse", X86Features::SSE)
                               .Case("sse2", X86Features::SSE2)
                               .Case("sse3", X86Features::SSE3)
+                              .Case("ssse3", X86Features::SSSE3)
                               .Case("sse4.1", X86Features::SSE4_1)
                               .Case("sse4.2", X86Features::SSE4_2)
                               .Case("avx", X86Features::AVX)
@@ -6209,6 +6811,16 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
                               .Case("avx512f", X86Features::AVX512F)
                               .Case("bmi", X86Features::BMI)
                               .Case("bmi2", X86Features::BMI2)
+                              .Case("aes", X86Features::AES)
+                              .Case("pclmul", X86Features::PCLMUL)
+                              .Case("avx512vl", X86Features::AVX512VL)
+                              .Case("avx512bw", X86Features::AVX512BW)
+                              .Case("avx512dq", X86Features::AVX512DQ)
+                              .Case("avx512cd", X86Features::AVX512CD)
+                              .Case("avx512er", X86Features::AVX512ER)
+                              .Case("avx512pf", X86Features::AVX512PF)
+                              .Case("avx512vbmi", X86Features::AVX512VBMI)
+                              .Case("avx512ifma", X86Features::AVX512IFMA)
                               .Default(X86Features::MAX);
     assert(Feature != X86Features::MAX && "Invalid feature!");
 
@@ -6237,7 +6849,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
 
     // Check the value of the bit corresponding to the feature requested.
     Value *Bitset = Builder.CreateAnd(
-        Features, llvm::ConstantInt::get(Int32Ty, 1 << Feature));
+        Features, llvm::ConstantInt::get(Int32Ty, 1ULL << Feature));
     return Builder.CreateICmpNE(Bitset, llvm::ConstantInt::get(Int32Ty, 0));
   }
   case X86::BI_mm_prefetch: {
@@ -6312,6 +6924,78 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
     Ops.push_back(Mlo);
     return Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
   }
+  case X86::BI__builtin_ia32_storedqudi128_mask:
+  case X86::BI__builtin_ia32_storedqusi128_mask:
+  case X86::BI__builtin_ia32_storedquhi128_mask:
+  case X86::BI__builtin_ia32_storedquqi128_mask:
+  case X86::BI__builtin_ia32_storeupd128_mask:
+  case X86::BI__builtin_ia32_storeups128_mask:
+  case X86::BI__builtin_ia32_storedqudi256_mask:
+  case X86::BI__builtin_ia32_storedqusi256_mask:
+  case X86::BI__builtin_ia32_storedquhi256_mask:
+  case X86::BI__builtin_ia32_storedquqi256_mask:
+  case X86::BI__builtin_ia32_storeupd256_mask:
+  case X86::BI__builtin_ia32_storeups256_mask:
+  case X86::BI__builtin_ia32_storedqudi512_mask:
+  case X86::BI__builtin_ia32_storedqusi512_mask:
+  case X86::BI__builtin_ia32_storedquhi512_mask:
+  case X86::BI__builtin_ia32_storedquqi512_mask:
+  case X86::BI__builtin_ia32_storeupd512_mask:
+  case X86::BI__builtin_ia32_storeups512_mask:
+    return EmitX86MaskedStore(*this, Ops, 1);
+
+  case X86::BI__builtin_ia32_movdqa32store128_mask:
+  case X86::BI__builtin_ia32_movdqa64store128_mask:
+  case X86::BI__builtin_ia32_storeaps128_mask:
+  case X86::BI__builtin_ia32_storeapd128_mask:
+  case X86::BI__builtin_ia32_movdqa32store256_mask:
+  case X86::BI__builtin_ia32_movdqa64store256_mask:
+  case X86::BI__builtin_ia32_storeaps256_mask:
+  case X86::BI__builtin_ia32_storeapd256_mask:
+  case X86::BI__builtin_ia32_movdqa32store512_mask:
+  case X86::BI__builtin_ia32_movdqa64store512_mask:
+  case X86::BI__builtin_ia32_storeaps512_mask:
+  case X86::BI__builtin_ia32_storeapd512_mask: {
+    unsigned Align =
+      getContext().getTypeAlignInChars(E->getArg(1)->getType()).getQuantity();
+    return EmitX86MaskedStore(*this, Ops, Align);
+  }
+  case X86::BI__builtin_ia32_loadups128_mask:
+  case X86::BI__builtin_ia32_loadups256_mask:
+  case X86::BI__builtin_ia32_loadups512_mask:
+  case X86::BI__builtin_ia32_loadupd128_mask:
+  case X86::BI__builtin_ia32_loadupd256_mask:
+  case X86::BI__builtin_ia32_loadupd512_mask:
+  case X86::BI__builtin_ia32_loaddquqi128_mask:
+  case X86::BI__builtin_ia32_loaddquqi256_mask:
+  case X86::BI__builtin_ia32_loaddquqi512_mask:
+  case X86::BI__builtin_ia32_loaddquhi128_mask:
+  case X86::BI__builtin_ia32_loaddquhi256_mask:
+  case X86::BI__builtin_ia32_loaddquhi512_mask:
+  case X86::BI__builtin_ia32_loaddqusi128_mask:
+  case X86::BI__builtin_ia32_loaddqusi256_mask:
+  case X86::BI__builtin_ia32_loaddqusi512_mask:
+  case X86::BI__builtin_ia32_loaddqudi128_mask:
+  case X86::BI__builtin_ia32_loaddqudi256_mask:
+  case X86::BI__builtin_ia32_loaddqudi512_mask:
+    return EmitX86MaskedLoad(*this, Ops, 1);
+
+  case X86::BI__builtin_ia32_loadaps128_mask:
+  case X86::BI__builtin_ia32_loadaps256_mask:
+  case X86::BI__builtin_ia32_loadaps512_mask:
+  case X86::BI__builtin_ia32_loadapd128_mask:
+  case X86::BI__builtin_ia32_loadapd256_mask:
+  case X86::BI__builtin_ia32_loadapd512_mask:
+  case X86::BI__builtin_ia32_movdqa32load128_mask:
+  case X86::BI__builtin_ia32_movdqa32load256_mask:
+  case X86::BI__builtin_ia32_movdqa32load512_mask:
+  case X86::BI__builtin_ia32_movdqa64load128_mask:
+  case X86::BI__builtin_ia32_movdqa64load256_mask:
+  case X86::BI__builtin_ia32_movdqa64load512_mask: {
+    unsigned Align =
+      getContext().getTypeAlignInChars(E->getArg(1)->getType()).getQuantity();
+    return EmitX86MaskedLoad(*this, Ops, Align);
+  }
   case X86::BI__builtin_ia32_storehps:
   case X86::BI__builtin_ia32_storelps: {
     llvm::Type *PtrTy = llvm::PointerType::getUnqual(Int64Ty);
@@ -6330,103 +7014,50 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
   }
   case X86::BI__builtin_ia32_palignr128:
-  case X86::BI__builtin_ia32_palignr256: {
+  case X86::BI__builtin_ia32_palignr256:
+  case X86::BI__builtin_ia32_palignr128_mask:
+  case X86::BI__builtin_ia32_palignr256_mask:
+  case X86::BI__builtin_ia32_palignr512_mask: {
     unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
 
-    unsigned NumElts =
-      cast<llvm::VectorType>(Ops[0]->getType())->getNumElements();
+    unsigned NumElts = Ops[0]->getType()->getVectorNumElements();
     assert(NumElts % 16 == 0);
-    unsigned NumLanes = NumElts / 16;
-    unsigned NumLaneElts = NumElts / NumLanes;
 
     // If palignr is shifting the pair of vectors more than the size of two
     // lanes, emit zero.
-    if (ShiftVal >= (2 * NumLaneElts))
+    if (ShiftVal >= 32)
       return llvm::Constant::getNullValue(ConvertType(E->getType()));
 
     // If palignr is shifting the pair of input vectors more than one lane,
     // but less than two lanes, convert to shifting in zeroes.
-    if (ShiftVal > NumLaneElts) {
-      ShiftVal -= NumLaneElts;
+    if (ShiftVal > 16) {
+      ShiftVal -= 16;
       Ops[1] = Ops[0];
       Ops[0] = llvm::Constant::getNullValue(Ops[0]->getType());
     }
 
-    uint32_t Indices[32];
+    uint32_t Indices[64];
     // 256-bit palignr operates on 128-bit lanes so we need to handle that
-    for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
-      for (unsigned i = 0; i != NumLaneElts; ++i) {
+    for (unsigned l = 0; l != NumElts; l += 16) {
+      for (unsigned i = 0; i != 16; ++i) {
         unsigned Idx = ShiftVal + i;
-        if (Idx >= NumLaneElts)
-          Idx += NumElts - NumLaneElts; // End of lane, switch operand.
+        if (Idx >= 16)
+          Idx += NumElts - 16; // End of lane, switch operand.
         Indices[l + i] = Idx + l;
       }
     }
 
-    Value *SV = llvm::ConstantDataVector::get(getLLVMContext(),
-                                              makeArrayRef(Indices, NumElts));
-    return Builder.CreateShuffleVector(Ops[1], Ops[0], SV, "palignr");
-  }
-  case X86::BI__builtin_ia32_pslldqi256: {
-    // Shift value is in bits so divide by 8.
-    unsigned shiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() >> 3;
+    Value *Align = Builder.CreateShuffleVector(Ops[1], Ops[0],
+                                               makeArrayRef(Indices, NumElts),
+                                               "palignr");
 
-    // If pslldq is shifting the vector more than 15 bytes, emit zero.
-    if (shiftVal >= 16)
-      return llvm::Constant::getNullValue(ConvertType(E->getType()));
-
-    uint32_t Indices[32];
-    // 256-bit pslldq operates on 128-bit lanes so we need to handle that
-    for (unsigned l = 0; l != 32; l += 16) {
-      for (unsigned i = 0; i != 16; ++i) {
-        unsigned Idx = 32 + i - shiftVal;
-        if (Idx < 32) Idx -= 16; // end of lane, switch operand.
-        Indices[l + i] = Idx + l;
-      }
-    }
-
-    llvm::Type *VecTy = llvm::VectorType::get(Int8Ty, 32);
-    Ops[0] = Builder.CreateBitCast(Ops[0], VecTy, "cast");
-    Value *Zero = llvm::Constant::getNullValue(VecTy);
+    // If this isn't a masked builtin, just return the align operation.
+    if (Ops.size() == 3)
+      return Align;
 
-    Value *SV = llvm::ConstantDataVector::get(getLLVMContext(), Indices);
-    SV = Builder.CreateShuffleVector(Zero, Ops[0], SV, "pslldq");
-    llvm::Type *ResultType = ConvertType(E->getType());
-    return Builder.CreateBitCast(SV, ResultType, "cast");
+    return EmitX86Select(*this, Ops[4], Align, Ops[3]);
   }
-  case X86::BI__builtin_ia32_psrldqi256: {
-    // Shift value is in bits so divide by 8.
-    unsigned shiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() >> 3;
-
-    // If psrldq is shifting the vector more than 15 bytes, emit zero.
-    if (shiftVal >= 16)
-      return llvm::Constant::getNullValue(ConvertType(E->getType()));
 
-    uint32_t Indices[32];
-    // 256-bit psrldq operates on 128-bit lanes so we need to handle that
-    for (unsigned l = 0; l != 32; l += 16) {
-      for (unsigned i = 0; i != 16; ++i) {
-        unsigned Idx = i + shiftVal;
-        if (Idx >= 16) Idx += 16; // end of lane, switch operand.
-        Indices[l + i] = Idx + l;
-      }
-    }
-
-    llvm::Type *VecTy = llvm::VectorType::get(Int8Ty, 32);
-    Ops[0] = Builder.CreateBitCast(Ops[0], VecTy, "cast");
-    Value *Zero = llvm::Constant::getNullValue(VecTy);
-
-    Value *SV = llvm::ConstantDataVector::get(getLLVMContext(), Indices);
-    SV = Builder.CreateShuffleVector(Ops[0], Zero, SV, "psrldq");
-    llvm::Type *ResultType = ConvertType(E->getType());
-    return Builder.CreateBitCast(SV, ResultType, "cast");
-  }
-  case X86::BI__builtin_ia32_movntps:
-  case X86::BI__builtin_ia32_movntps256:
-  case X86::BI__builtin_ia32_movntpd:
-  case X86::BI__builtin_ia32_movntpd256:
-  case X86::BI__builtin_ia32_movntdq:
-  case X86::BI__builtin_ia32_movntdq256:
   case X86::BI__builtin_ia32_movnti:
   case X86::BI__builtin_ia32_movnti64: {
     llvm::MDNode *Node = llvm::MDNode::get(
@@ -6439,17 +7070,156 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
     StoreInst *SI = Builder.CreateDefaultAlignedStore(Ops[1], BC);
     SI->setMetadata(CGM.getModule().getMDKindID("nontemporal"), Node);
 
-    // If the operand is an integer, we can't assume alignment. Otherwise,
-    // assume natural alignment.
-    QualType ArgTy = E->getArg(1)->getType();
-    unsigned Align;
-    if (ArgTy->isIntegerType())
-      Align = 1;
-    else
-      Align = getContext().getTypeSizeInChars(ArgTy).getQuantity();
-    SI->setAlignment(Align);
+    // No alignment for scalar intrinsic store.
+    SI->setAlignment(1);
+    return SI;
+  }
+  case X86::BI__builtin_ia32_movntsd:
+  case X86::BI__builtin_ia32_movntss: {
+    llvm::MDNode *Node = llvm::MDNode::get(
+        getLLVMContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1)));
+
+    // Extract the 0'th element of the source vector.
+    Value *Scl = Builder.CreateExtractElement(Ops[1], (uint64_t)0, "extract");
+
+    // Convert the type of the pointer to a pointer to the stored type.
+    Value *BC = Builder.CreateBitCast(Ops[0],
+                                llvm::PointerType::getUnqual(Scl->getType()),
+                                      "cast");
+
+    // Unaligned nontemporal store of the scalar value.
+    StoreInst *SI = Builder.CreateDefaultAlignedStore(Scl, BC);
+    SI->setMetadata(CGM.getModule().getMDKindID("nontemporal"), Node);
+    SI->setAlignment(1);
     return SI;
   }
+
+  case X86::BI__builtin_ia32_selectb_128:
+  case X86::BI__builtin_ia32_selectb_256:
+  case X86::BI__builtin_ia32_selectb_512:
+  case X86::BI__builtin_ia32_selectw_128:
+  case X86::BI__builtin_ia32_selectw_256:
+  case X86::BI__builtin_ia32_selectw_512:
+  case X86::BI__builtin_ia32_selectd_128:
+  case X86::BI__builtin_ia32_selectd_256:
+  case X86::BI__builtin_ia32_selectd_512:
+  case X86::BI__builtin_ia32_selectq_128:
+  case X86::BI__builtin_ia32_selectq_256:
+  case X86::BI__builtin_ia32_selectq_512:
+  case X86::BI__builtin_ia32_selectps_128:
+  case X86::BI__builtin_ia32_selectps_256:
+  case X86::BI__builtin_ia32_selectps_512:
+  case X86::BI__builtin_ia32_selectpd_128:
+  case X86::BI__builtin_ia32_selectpd_256:
+  case X86::BI__builtin_ia32_selectpd_512:
+    return EmitX86Select(*this, Ops[0], Ops[1], Ops[2]);
+  case X86::BI__builtin_ia32_pcmpeqb128_mask:
+  case X86::BI__builtin_ia32_pcmpeqb256_mask:
+  case X86::BI__builtin_ia32_pcmpeqb512_mask:
+  case X86::BI__builtin_ia32_pcmpeqw128_mask:
+  case X86::BI__builtin_ia32_pcmpeqw256_mask:
+  case X86::BI__builtin_ia32_pcmpeqw512_mask:
+  case X86::BI__builtin_ia32_pcmpeqd128_mask:
+  case X86::BI__builtin_ia32_pcmpeqd256_mask:
+  case X86::BI__builtin_ia32_pcmpeqd512_mask:
+  case X86::BI__builtin_ia32_pcmpeqq128_mask:
+  case X86::BI__builtin_ia32_pcmpeqq256_mask:
+  case X86::BI__builtin_ia32_pcmpeqq512_mask:
+    return EmitX86MaskedCompare(*this, 0, false, Ops);
+  case X86::BI__builtin_ia32_pcmpgtb128_mask:
+  case X86::BI__builtin_ia32_pcmpgtb256_mask:
+  case X86::BI__builtin_ia32_pcmpgtb512_mask:
+  case X86::BI__builtin_ia32_pcmpgtw128_mask:
+  case X86::BI__builtin_ia32_pcmpgtw256_mask:
+  case X86::BI__builtin_ia32_pcmpgtw512_mask:
+  case X86::BI__builtin_ia32_pcmpgtd128_mask:
+  case X86::BI__builtin_ia32_pcmpgtd256_mask:
+  case X86::BI__builtin_ia32_pcmpgtd512_mask:
+  case X86::BI__builtin_ia32_pcmpgtq128_mask:
+  case X86::BI__builtin_ia32_pcmpgtq256_mask:
+  case X86::BI__builtin_ia32_pcmpgtq512_mask:
+    return EmitX86MaskedCompare(*this, 6, true, Ops);
+  case X86::BI__builtin_ia32_cmpb128_mask:
+  case X86::BI__builtin_ia32_cmpb256_mask:
+  case X86::BI__builtin_ia32_cmpb512_mask:
+  case X86::BI__builtin_ia32_cmpw128_mask:
+  case X86::BI__builtin_ia32_cmpw256_mask:
+  case X86::BI__builtin_ia32_cmpw512_mask:
+  case X86::BI__builtin_ia32_cmpd128_mask:
+  case X86::BI__builtin_ia32_cmpd256_mask:
+  case X86::BI__builtin_ia32_cmpd512_mask:
+  case X86::BI__builtin_ia32_cmpq128_mask:
+  case X86::BI__builtin_ia32_cmpq256_mask:
+  case X86::BI__builtin_ia32_cmpq512_mask: {
+    unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7;
+    return EmitX86MaskedCompare(*this, CC, true, Ops);
+  }
+  case X86::BI__builtin_ia32_ucmpb128_mask:
+  case X86::BI__builtin_ia32_ucmpb256_mask:
+  case X86::BI__builtin_ia32_ucmpb512_mask:
+  case X86::BI__builtin_ia32_ucmpw128_mask:
+  case X86::BI__builtin_ia32_ucmpw256_mask:
+  case X86::BI__builtin_ia32_ucmpw512_mask:
+  case X86::BI__builtin_ia32_ucmpd128_mask:
+  case X86::BI__builtin_ia32_ucmpd256_mask:
+  case X86::BI__builtin_ia32_ucmpd512_mask:
+  case X86::BI__builtin_ia32_ucmpq128_mask:
+  case X86::BI__builtin_ia32_ucmpq256_mask:
+  case X86::BI__builtin_ia32_ucmpq512_mask: {
+    unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7;
+    return EmitX86MaskedCompare(*this, CC, false, Ops);
+  }
+
+  case X86::BI__builtin_ia32_vplzcntd_128_mask:
+  case X86::BI__builtin_ia32_vplzcntd_256_mask:
+  case X86::BI__builtin_ia32_vplzcntd_512_mask:
+  case X86::BI__builtin_ia32_vplzcntq_128_mask:
+  case X86::BI__builtin_ia32_vplzcntq_256_mask:
+  case X86::BI__builtin_ia32_vplzcntq_512_mask: {
+    Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Ops[0]->getType());
+    return EmitX86Select(*this, Ops[2],
+                         Builder.CreateCall(F, {Ops[0],Builder.getInt1(false)}),
+                         Ops[1]);
+  }
+
+  // TODO: Handle 64/512-bit vector widths of min/max.
+  case X86::BI__builtin_ia32_pmaxsb128:
+  case X86::BI__builtin_ia32_pmaxsw128:
+  case X86::BI__builtin_ia32_pmaxsd128:
+  case X86::BI__builtin_ia32_pmaxsb256:
+  case X86::BI__builtin_ia32_pmaxsw256:
+  case X86::BI__builtin_ia32_pmaxsd256: {
+    Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_SGT, Ops[0], Ops[1]);
+    return Builder.CreateSelect(Cmp, Ops[0], Ops[1]);
+  }
+  case X86::BI__builtin_ia32_pmaxub128:
+  case X86::BI__builtin_ia32_pmaxuw128:
+  case X86::BI__builtin_ia32_pmaxud128:
+  case X86::BI__builtin_ia32_pmaxub256:
+  case X86::BI__builtin_ia32_pmaxuw256:
+  case X86::BI__builtin_ia32_pmaxud256: {
+    Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_UGT, Ops[0], Ops[1]);
+    return Builder.CreateSelect(Cmp, Ops[0], Ops[1]);
+  }
+  case X86::BI__builtin_ia32_pminsb128:
+  case X86::BI__builtin_ia32_pminsw128:
+  case X86::BI__builtin_ia32_pminsd128:
+  case X86::BI__builtin_ia32_pminsb256:
+  case X86::BI__builtin_ia32_pminsw256:
+  case X86::BI__builtin_ia32_pminsd256: {
+    Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_SLT, Ops[0], Ops[1]);
+    return Builder.CreateSelect(Cmp, Ops[0], Ops[1]);
+  }
+  case X86::BI__builtin_ia32_pminub128:
+  case X86::BI__builtin_ia32_pminuw128:
+  case X86::BI__builtin_ia32_pminud128:
+  case X86::BI__builtin_ia32_pminub256:
+  case X86::BI__builtin_ia32_pminuw256:
+  case X86::BI__builtin_ia32_pminud256: {
+    Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_ULT, Ops[0], Ops[1]);
+    return Builder.CreateSelect(Cmp, Ops[0], Ops[1]);
+  }
+
   // 3DNow!
   case X86::BI__builtin_ia32_pswapdsf:
   case X86::BI__builtin_ia32_pswapdsi: {
@@ -6492,154 +7262,107 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
                                       Ops[0]);
     return Builder.CreateExtractValue(Call, 1);
   }
-  // SSE comparison intrisics
+
+  // SSE packed comparison intrinsics
   case X86::BI__builtin_ia32_cmpeqps:
+  case X86::BI__builtin_ia32_cmpeqpd:
+    return getVectorFCmpIR(CmpInst::FCMP_OEQ);
   case X86::BI__builtin_ia32_cmpltps:
+  case X86::BI__builtin_ia32_cmpltpd:
+    return getVectorFCmpIR(CmpInst::FCMP_OLT);
   case X86::BI__builtin_ia32_cmpleps:
+  case X86::BI__builtin_ia32_cmplepd:
+    return getVectorFCmpIR(CmpInst::FCMP_OLE);
   case X86::BI__builtin_ia32_cmpunordps:
+  case X86::BI__builtin_ia32_cmpunordpd:
+    return getVectorFCmpIR(CmpInst::FCMP_UNO);
   case X86::BI__builtin_ia32_cmpneqps:
+  case X86::BI__builtin_ia32_cmpneqpd:
+    return getVectorFCmpIR(CmpInst::FCMP_UNE);
   case X86::BI__builtin_ia32_cmpnltps:
+  case X86::BI__builtin_ia32_cmpnltpd:
+    return getVectorFCmpIR(CmpInst::FCMP_UGE);
   case X86::BI__builtin_ia32_cmpnleps:
+  case X86::BI__builtin_ia32_cmpnlepd:
+    return getVectorFCmpIR(CmpInst::FCMP_UGT);
   case X86::BI__builtin_ia32_cmpordps:
+  case X86::BI__builtin_ia32_cmpordpd:
+    return getVectorFCmpIR(CmpInst::FCMP_ORD);
+  case X86::BI__builtin_ia32_cmpps:
+  case X86::BI__builtin_ia32_cmpps256:
+  case X86::BI__builtin_ia32_cmppd:
+  case X86::BI__builtin_ia32_cmppd256: {
+    unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
+    // If this one of the SSE immediates, we can use native IR.
+    if (CC < 8) {
+      FCmpInst::Predicate Pred;
+      switch (CC) {
+      case 0: Pred = FCmpInst::FCMP_OEQ; break;
+      case 1: Pred = FCmpInst::FCMP_OLT; break;
+      case 2: Pred = FCmpInst::FCMP_OLE; break;
+      case 3: Pred = FCmpInst::FCMP_UNO; break;
+      case 4: Pred = FCmpInst::FCMP_UNE; break;
+      case 5: Pred = FCmpInst::FCMP_UGE; break;
+      case 6: Pred = FCmpInst::FCMP_UGT; break;
+      case 7: Pred = FCmpInst::FCMP_ORD; break;
+      }
+      return getVectorFCmpIR(Pred);
+    }
+
+    // We can't handle 8-31 immediates with native IR, use the intrinsic.
+    Intrinsic::ID ID;
+    switch (BuiltinID) {
+    default: llvm_unreachable("Unsupported intrinsic!");
+    case X86::BI__builtin_ia32_cmpps:
+      ID = Intrinsic::x86_sse_cmp_ps;
+      break;
+    case X86::BI__builtin_ia32_cmpps256:
+      ID = Intrinsic::x86_avx_cmp_ps_256;
+      break;
+    case X86::BI__builtin_ia32_cmppd:
+      ID = Intrinsic::x86_sse2_cmp_pd;
+      break;
+    case X86::BI__builtin_ia32_cmppd256:
+      ID = Intrinsic::x86_avx_cmp_pd_256;
+      break;
+    }
+
+    return Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
+  }
+
+  // SSE scalar comparison intrinsics
   case X86::BI__builtin_ia32_cmpeqss:
+    return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 0);
   case X86::BI__builtin_ia32_cmpltss:
+    return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 1);
   case X86::BI__builtin_ia32_cmpless:
+    return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 2);
   case X86::BI__builtin_ia32_cmpunordss:
+    return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 3);
   case X86::BI__builtin_ia32_cmpneqss:
+    return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 4);
   case X86::BI__builtin_ia32_cmpnltss:
+    return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 5);
   case X86::BI__builtin_ia32_cmpnless:
+    return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 6);
   case X86::BI__builtin_ia32_cmpordss:
-  case X86::BI__builtin_ia32_cmpeqpd:
-  case X86::BI__builtin_ia32_cmpltpd:
-  case X86::BI__builtin_ia32_cmplepd:
-  case X86::BI__builtin_ia32_cmpunordpd:
-  case X86::BI__builtin_ia32_cmpneqpd:
-  case X86::BI__builtin_ia32_cmpnltpd:
-  case X86::BI__builtin_ia32_cmpnlepd:
-  case X86::BI__builtin_ia32_cmpordpd:
+    return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 7);
   case X86::BI__builtin_ia32_cmpeqsd:
+    return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 0);
   case X86::BI__builtin_ia32_cmpltsd:
+    return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 1);
   case X86::BI__builtin_ia32_cmplesd:
+    return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 2);
   case X86::BI__builtin_ia32_cmpunordsd:
+    return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 3);
   case X86::BI__builtin_ia32_cmpneqsd:
+    return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 4);
   case X86::BI__builtin_ia32_cmpnltsd:
+    return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 5);
   case X86::BI__builtin_ia32_cmpnlesd:
+    return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 6);
   case X86::BI__builtin_ia32_cmpordsd:
-    // These exist so that the builtin that takes an immediate can be bounds
-    // checked by clang to avoid passing bad immediates to the backend. Since
-    // AVX has a larger immediate than SSE we would need separate builtins to
-    // do the different bounds checking. Rather than create a clang specific
-    // SSE only builtin, this implements eight separate builtins to match gcc
-    // implementation.
-
-    // Choose the immediate.
-    unsigned Imm;
-    switch (BuiltinID) {
-    default: llvm_unreachable("Unsupported intrinsic!");
-    case X86::BI__builtin_ia32_cmpeqps:
-    case X86::BI__builtin_ia32_cmpeqss:
-    case X86::BI__builtin_ia32_cmpeqpd:
-    case X86::BI__builtin_ia32_cmpeqsd:
-      Imm = 0;
-      break;
-    case X86::BI__builtin_ia32_cmpltps:
-    case X86::BI__builtin_ia32_cmpltss:
-    case X86::BI__builtin_ia32_cmpltpd:
-    case X86::BI__builtin_ia32_cmpltsd:
-      Imm = 1;
-      break;
-    case X86::BI__builtin_ia32_cmpleps:
-    case X86::BI__builtin_ia32_cmpless:
-    case X86::BI__builtin_ia32_cmplepd:
-    case X86::BI__builtin_ia32_cmplesd:
-      Imm = 2;
-      break;
-    case X86::BI__builtin_ia32_cmpunordps:
-    case X86::BI__builtin_ia32_cmpunordss:
-    case X86::BI__builtin_ia32_cmpunordpd:
-    case X86::BI__builtin_ia32_cmpunordsd:
-      Imm = 3;
-      break;
-    case X86::BI__builtin_ia32_cmpneqps:
-    case X86::BI__builtin_ia32_cmpneqss:
-    case X86::BI__builtin_ia32_cmpneqpd:
-    case X86::BI__builtin_ia32_cmpneqsd:
-      Imm = 4;
-      break;
-    case X86::BI__builtin_ia32_cmpnltps:
-    case X86::BI__builtin_ia32_cmpnltss:
-    case X86::BI__builtin_ia32_cmpnltpd:
-    case X86::BI__builtin_ia32_cmpnltsd:
-      Imm = 5;
-      break;
-    case X86::BI__builtin_ia32_cmpnleps:
-    case X86::BI__builtin_ia32_cmpnless:
-    case X86::BI__builtin_ia32_cmpnlepd:
-    case X86::BI__builtin_ia32_cmpnlesd:
-      Imm = 6;
-      break;
-    case X86::BI__builtin_ia32_cmpordps:
-    case X86::BI__builtin_ia32_cmpordss:
-    case X86::BI__builtin_ia32_cmpordpd:
-    case X86::BI__builtin_ia32_cmpordsd:
-      Imm = 7;
-      break;
-    }
-
-    // Choose the intrinsic ID.
-    const char *name;
-    Intrinsic::ID ID;
-    switch (BuiltinID) {
-    default: llvm_unreachable("Unsupported intrinsic!");
-    case X86::BI__builtin_ia32_cmpeqps:
-    case X86::BI__builtin_ia32_cmpltps:
-    case X86::BI__builtin_ia32_cmpleps:
-    case X86::BI__builtin_ia32_cmpunordps:
-    case X86::BI__builtin_ia32_cmpneqps:
-    case X86::BI__builtin_ia32_cmpnltps:
-    case X86::BI__builtin_ia32_cmpnleps:
-    case X86::BI__builtin_ia32_cmpordps:
-      name = "cmpps";
-      ID = Intrinsic::x86_sse_cmp_ps;
-      break;
-    case X86::BI__builtin_ia32_cmpeqss:
-    case X86::BI__builtin_ia32_cmpltss:
-    case X86::BI__builtin_ia32_cmpless:
-    case X86::BI__builtin_ia32_cmpunordss:
-    case X86::BI__builtin_ia32_cmpneqss:
-    case X86::BI__builtin_ia32_cmpnltss:
-    case X86::BI__builtin_ia32_cmpnless:
-    case X86::BI__builtin_ia32_cmpordss:
-      name = "cmpss";
-      ID = Intrinsic::x86_sse_cmp_ss;
-      break;
-    case X86::BI__builtin_ia32_cmpeqpd:
-    case X86::BI__builtin_ia32_cmpltpd:
-    case X86::BI__builtin_ia32_cmplepd:
-    case X86::BI__builtin_ia32_cmpunordpd:
-    case X86::BI__builtin_ia32_cmpneqpd:
-    case X86::BI__builtin_ia32_cmpnltpd:
-    case X86::BI__builtin_ia32_cmpnlepd:
-    case X86::BI__builtin_ia32_cmpordpd:
-      name = "cmppd";
-      ID = Intrinsic::x86_sse2_cmp_pd;
-      break;
-    case X86::BI__builtin_ia32_cmpeqsd:
-    case X86::BI__builtin_ia32_cmpltsd:
-    case X86::BI__builtin_ia32_cmplesd:
-    case X86::BI__builtin_ia32_cmpunordsd:
-    case X86::BI__builtin_ia32_cmpneqsd:
-    case X86::BI__builtin_ia32_cmpnltsd:
-    case X86::BI__builtin_ia32_cmpnlesd:
-    case X86::BI__builtin_ia32_cmpordsd:
-      name = "cmpsd";
-      ID = Intrinsic::x86_sse2_cmp_sd;
-      break;
-    }
-
-    Ops.push_back(llvm::ConstantInt::get(Int8Ty, Imm));
-    llvm::Function *F = CGM.getIntrinsic(ID);
-    return Builder.CreateCall(F, Ops, name);
+    return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 7);
   }
 }
 
@@ -6812,6 +7535,16 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
     llvm::Function *F = CGM.getIntrinsic(ID, ResultType);
     return Builder.CreateCall(F, X);
   }
+
+  // Absolute value
+  case PPC::BI__builtin_vsx_xvabsdp:
+  case PPC::BI__builtin_vsx_xvabssp: {
+    llvm::Type *ResultType = ConvertType(E->getType());
+    Value *X = EmitScalarExpr(E->getArg(0));
+    llvm::Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
+    return Builder.CreateCall(F, X);
+  }
+
   // FMA variations
   case PPC::BI__builtin_vsx_xvmaddadp:
   case PPC::BI__builtin_vsx_xvmaddasp:
@@ -6851,44 +7584,11 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
   }
 }
 
-// Emit an intrinsic that has 1 float or double.
-static Value *emitUnaryFPBuiltin(CodeGenFunction &CGF,
-                                 const CallExpr *E,
-                                 unsigned IntrinsicID) {
-  llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
-
-  Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
-  return CGF.Builder.CreateCall(F, Src0);
-}
-
-// Emit an intrinsic that has 3 float or double operands.
-static Value *emitTernaryFPBuiltin(CodeGenFunction &CGF,
-                                   const CallExpr *E,
-                                   unsigned IntrinsicID) {
-  llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
-  llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
-  llvm::Value *Src2 = CGF.EmitScalarExpr(E->getArg(2));
-
-  Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
-  return CGF.Builder.CreateCall(F, {Src0, Src1, Src2});
-}
-
-// Emit an intrinsic that has 1 float or double operand, and 1 integer.
-static Value *emitFPIntBuiltin(CodeGenFunction &CGF,
-                               const CallExpr *E,
-                               unsigned IntrinsicID) {
-  llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
-  llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
-
-  Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
-  return CGF.Builder.CreateCall(F, {Src0, Src1});
-}
-
 Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
                                               const CallExpr *E) {
   switch (BuiltinID) {
-  case AMDGPU::BI__builtin_amdgpu_div_scale:
-  case AMDGPU::BI__builtin_amdgpu_div_scalef: {
+  case AMDGPU::BI__builtin_amdgcn_div_scale:
+  case AMDGPU::BI__builtin_amdgcn_div_scalef: {
     // Translate from the intrinsics's struct return to the builtin's out
     // argument.
 
@@ -6898,7 +7598,7 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
     llvm::Value *Y = EmitScalarExpr(E->getArg(1));
     llvm::Value *Z = EmitScalarExpr(E->getArg(2));
 
-    llvm::Value *Callee = CGM.getIntrinsic(Intrinsic::AMDGPU_div_scale,
+    llvm::Value *Callee = CGM.getIntrinsic(Intrinsic::amdgcn_div_scale,
                                            X->getType());
 
     llvm::Value *Tmp = Builder.CreateCall(Callee, {X, Y, Z});
@@ -6913,40 +7613,85 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
     Builder.CreateStore(FlagExt, FlagOutPtr);
     return Result;
   }
-  case AMDGPU::BI__builtin_amdgpu_div_fmas:
-  case AMDGPU::BI__builtin_amdgpu_div_fmasf: {
+  case AMDGPU::BI__builtin_amdgcn_div_fmas:
+  case AMDGPU::BI__builtin_amdgcn_div_fmasf: {
     llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
     llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
     llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
     llvm::Value *Src3 = EmitScalarExpr(E->getArg(3));
 
-    llvm::Value *F = CGM.getIntrinsic(Intrinsic::AMDGPU_div_fmas,
+    llvm::Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_div_fmas,
                                       Src0->getType());
     llvm::Value *Src3ToBool = Builder.CreateIsNotNull(Src3);
     return Builder.CreateCall(F, {Src0, Src1, Src2, Src3ToBool});
   }
-  case AMDGPU::BI__builtin_amdgpu_div_fixup:
-  case AMDGPU::BI__builtin_amdgpu_div_fixupf:
-    return emitTernaryFPBuiltin(*this, E, Intrinsic::AMDGPU_div_fixup);
-  case AMDGPU::BI__builtin_amdgpu_trig_preop:
-  case AMDGPU::BI__builtin_amdgpu_trig_preopf:
-    return emitFPIntBuiltin(*this, E, Intrinsic::AMDGPU_trig_preop);
-  case AMDGPU::BI__builtin_amdgpu_rcp:
-  case AMDGPU::BI__builtin_amdgpu_rcpf:
-    return emitUnaryFPBuiltin(*this, E, Intrinsic::AMDGPU_rcp);
-  case AMDGPU::BI__builtin_amdgpu_rsq:
-  case AMDGPU::BI__builtin_amdgpu_rsqf:
-    return emitUnaryFPBuiltin(*this, E, Intrinsic::AMDGPU_rsq);
-  case AMDGPU::BI__builtin_amdgpu_rsq_clamped:
-  case AMDGPU::BI__builtin_amdgpu_rsq_clampedf:
-    return emitUnaryFPBuiltin(*this, E, Intrinsic::AMDGPU_rsq_clamped);
-  case AMDGPU::BI__builtin_amdgpu_ldexp:
-  case AMDGPU::BI__builtin_amdgpu_ldexpf:
-    return emitFPIntBuiltin(*this, E, Intrinsic::AMDGPU_ldexp);
-  case AMDGPU::BI__builtin_amdgpu_class:
-  case AMDGPU::BI__builtin_amdgpu_classf:
-    return emitFPIntBuiltin(*this, E, Intrinsic::AMDGPU_class);
-   default:
+  case AMDGPU::BI__builtin_amdgcn_div_fixup:
+  case AMDGPU::BI__builtin_amdgcn_div_fixupf:
+    return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_div_fixup);
+  case AMDGPU::BI__builtin_amdgcn_trig_preop:
+  case AMDGPU::BI__builtin_amdgcn_trig_preopf:
+    return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_trig_preop);
+  case AMDGPU::BI__builtin_amdgcn_rcp:
+  case AMDGPU::BI__builtin_amdgcn_rcpf:
+    return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rcp);
+  case AMDGPU::BI__builtin_amdgcn_rsq:
+  case AMDGPU::BI__builtin_amdgcn_rsqf:
+    return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rsq);
+  case AMDGPU::BI__builtin_amdgcn_rsq_clamp:
+  case AMDGPU::BI__builtin_amdgcn_rsq_clampf:
+    return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rsq_clamp);
+  case AMDGPU::BI__builtin_amdgcn_sinf:
+    return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_sin);
+  case AMDGPU::BI__builtin_amdgcn_cosf:
+    return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_cos);
+  case AMDGPU::BI__builtin_amdgcn_log_clampf:
+    return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_log_clamp);
+  case AMDGPU::BI__builtin_amdgcn_ldexp:
+  case AMDGPU::BI__builtin_amdgcn_ldexpf:
+    return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_ldexp);
+  case AMDGPU::BI__builtin_amdgcn_frexp_mant:
+  case AMDGPU::BI__builtin_amdgcn_frexp_mantf: {
+    return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_frexp_mant);
+  }
+  case AMDGPU::BI__builtin_amdgcn_frexp_exp:
+  case AMDGPU::BI__builtin_amdgcn_frexp_expf: {
+    return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_frexp_exp);
+  }
+  case AMDGPU::BI__builtin_amdgcn_fract:
+  case AMDGPU::BI__builtin_amdgcn_fractf:
+    return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_fract);
+  case AMDGPU::BI__builtin_amdgcn_lerp:
+    return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_lerp);
+  case AMDGPU::BI__builtin_amdgcn_class:
+  case AMDGPU::BI__builtin_amdgcn_classf:
+    return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_class);
+
+  case AMDGPU::BI__builtin_amdgcn_read_exec: {
+    CallInst *CI = cast<CallInst>(
+      EmitSpecialRegisterBuiltin(*this, E, Int64Ty, Int64Ty, true, "exec"));
+    CI->setConvergent();
+    return CI;
+  }
+
+  // amdgcn workitem
+  case AMDGPU::BI__builtin_amdgcn_workitem_id_x:
+    return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_x, 0, 1024);
+  case AMDGPU::BI__builtin_amdgcn_workitem_id_y:
+    return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_y, 0, 1024);
+  case AMDGPU::BI__builtin_amdgcn_workitem_id_z:
+    return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_z, 0, 1024);
+
+  // r600 intrinsics
+  case AMDGPU::BI__builtin_r600_recipsqrt_ieee:
+  case AMDGPU::BI__builtin_r600_recipsqrt_ieeef:
+    return emitUnaryBuiltin(*this, E, Intrinsic::r600_recipsqrt_ieee);
+  case AMDGPU::BI__builtin_r600_read_tidig_x:
+    return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_x, 0, 1024);
+  case AMDGPU::BI__builtin_r600_read_tidig_y:
+    return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_y, 0, 1024);
+  case AMDGPU::BI__builtin_r600_read_tidig_z:
+    return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_z, 0, 1024);
+  default:
     return nullptr;
   }
 }
@@ -7196,6 +7941,17 @@ Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID,
 
 Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID,
                                              const CallExpr *E) {
+  auto MakeLdg = [&](unsigned IntrinsicID) {
+    Value *Ptr = EmitScalarExpr(E->getArg(0));
+    AlignmentSource AlignSource;
+    clang::CharUnits Align =
+        getNaturalPointeeTypeAlignment(E->getArg(0)->getType(), &AlignSource);
+    return Builder.CreateCall(
+        CGM.getIntrinsic(IntrinsicID, {Ptr->getType()->getPointerElementType(),
+                                       Ptr->getType()}),
+        {Ptr, ConstantInt::get(Builder.getInt32Ty(), Align.getQuantity())});
+  };
+
   switch (BuiltinID) {
   case NVPTX::BI__nvvm_atom_add_gen_i:
   case NVPTX::BI__nvvm_atom_add_gen_l:
@@ -7264,6 +8020,56 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID,
     return Builder.CreateCall(FnALAF32, {Ptr, Val});
   }
 
+  case NVPTX::BI__nvvm_atom_inc_gen_ui: {
+    Value *Ptr = EmitScalarExpr(E->getArg(0));
+    Value *Val = EmitScalarExpr(E->getArg(1));
+    Value *FnALI32 =
+        CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_inc_32, Ptr->getType());
+    return Builder.CreateCall(FnALI32, {Ptr, Val});
+  }
+
+  case NVPTX::BI__nvvm_atom_dec_gen_ui: {
+    Value *Ptr = EmitScalarExpr(E->getArg(0));
+    Value *Val = EmitScalarExpr(E->getArg(1));
+    Value *FnALD32 =
+        CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_dec_32, Ptr->getType());
+    return Builder.CreateCall(FnALD32, {Ptr, Val});
+  }
+
+  case NVPTX::BI__nvvm_ldg_c:
+  case NVPTX::BI__nvvm_ldg_c2:
+  case NVPTX::BI__nvvm_ldg_c4:
+  case NVPTX::BI__nvvm_ldg_s:
+  case NVPTX::BI__nvvm_ldg_s2:
+  case NVPTX::BI__nvvm_ldg_s4:
+  case NVPTX::BI__nvvm_ldg_i:
+  case NVPTX::BI__nvvm_ldg_i2:
+  case NVPTX::BI__nvvm_ldg_i4:
+  case NVPTX::BI__nvvm_ldg_l:
+  case NVPTX::BI__nvvm_ldg_ll:
+  case NVPTX::BI__nvvm_ldg_ll2:
+  case NVPTX::BI__nvvm_ldg_uc:
+  case NVPTX::BI__nvvm_ldg_uc2:
+  case NVPTX::BI__nvvm_ldg_uc4:
+  case NVPTX::BI__nvvm_ldg_us:
+  case NVPTX::BI__nvvm_ldg_us2:
+  case NVPTX::BI__nvvm_ldg_us4:
+  case NVPTX::BI__nvvm_ldg_ui:
+  case NVPTX::BI__nvvm_ldg_ui2:
+  case NVPTX::BI__nvvm_ldg_ui4:
+  case NVPTX::BI__nvvm_ldg_ul:
+  case NVPTX::BI__nvvm_ldg_ull:
+  case NVPTX::BI__nvvm_ldg_ull2:
+    // PTX Interoperability section 2.2: "For a vector with an even number of
+    // elements, its alignment is set to number of elements times the alignment
+    // of its member: n*alignof(t)."
+    return MakeLdg(Intrinsic::nvvm_ldg_global_i);
+  case NVPTX::BI__nvvm_ldg_f:
+  case NVPTX::BI__nvvm_ldg_f2:
+  case NVPTX::BI__nvvm_ldg_f4:
+  case NVPTX::BI__nvvm_ldg_d:
+  case NVPTX::BI__nvvm_ldg_d2:
+    return MakeLdg(Intrinsic::nvvm_ldg_global_f);
   default:
     return nullptr;
   }
@@ -7272,9 +8078,9 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID,
 Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
                                                    const CallExpr *E) {
   switch (BuiltinID) {
-  case WebAssembly::BI__builtin_wasm_memory_size: {
+  case WebAssembly::BI__builtin_wasm_current_memory: {
     llvm::Type *ResultType = ConvertType(E->getType());
-    Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_memory_size, ResultType);
+    Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_current_memory, ResultType);
     return Builder.CreateCall(Callee);
   }
   case WebAssembly::BI__builtin_wasm_grow_memory: {
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGCUDABuiltin.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGCUDABuiltin.cpp
new file mode 100644
index 000000000000..ea3b888635c3
--- /dev/null
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGCUDABuiltin.cpp
@@ -0,0 +1,117 @@
+//===----- CGCUDABuiltin.cpp - Codegen for CUDA builtins ------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Generates code for built-in CUDA calls which are not runtime-specific.
+// (Runtime-specific codegen lives in CGCUDARuntime.)
+//
+//===----------------------------------------------------------------------===//
+
+#include "CodeGenFunction.h"
+#include "clang/Basic/Builtins.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/Support/MathExtras.h"
+
+using namespace clang;
+using namespace CodeGen;
+
+static llvm::Function *GetVprintfDeclaration(llvm::Module &M) {
+  llvm::Type *ArgTypes[] = {llvm::Type::getInt8PtrTy(M.getContext()),
+                            llvm::Type::getInt8PtrTy(M.getContext())};
+  llvm::FunctionType *VprintfFuncType = llvm::FunctionType::get(
+      llvm::Type::getInt32Ty(M.getContext()), ArgTypes, false);
+
+  if (auto* F = M.getFunction("vprintf")) {
+    // Our CUDA system header declares vprintf with the right signature, so
+    // nobody else should have been able to declare vprintf with a bogus
+    // signature.
+    assert(F->getFunctionType() == VprintfFuncType);
+    return F;
+  }
+
+  // vprintf doesn't already exist; create a declaration and insert it into the
+  // module.
+  return llvm::Function::Create(
+      VprintfFuncType, llvm::GlobalVariable::ExternalLinkage, "vprintf", &M);
+}
+
+// Transforms a call to printf into a call to the NVPTX vprintf syscall (which
+// isn't particularly special; it's invoked just like a regular function).
+// vprintf takes two args: A format string, and a pointer to a buffer containing
+// the varargs.
+//
+// For example, the call
+//
+//   printf("format string", arg1, arg2, arg3);
+//
+// is converted into something resembling
+//
+//   struct Tmp {
+//     Arg1 a1;
+//     Arg2 a2;
+//     Arg3 a3;
+//   };
+//   char* buf = alloca(sizeof(Tmp));
+//   *(Tmp*)buf = {a1, a2, a3};
+//   vprintf("format string", buf);
+//
+// buf is aligned to the max of {alignof(Arg1), ...}.  Furthermore, each of the
+// args is itself aligned to its preferred alignment.
+//
+// Note that by the time this function runs, E's args have already undergone the
+// standard C vararg promotion (short -> int, float -> double, etc.).
+RValue
+CodeGenFunction::EmitCUDADevicePrintfCallExpr(const CallExpr *E,
+                                              ReturnValueSlot ReturnValue) {
+  assert(getLangOpts().CUDA);
+  assert(getLangOpts().CUDAIsDevice);
+  assert(E->getBuiltinCallee() == Builtin::BIprintf);
+  assert(E->getNumArgs() >= 1); // printf always has at least one arg.
+
+  const llvm::DataLayout &DL = CGM.getDataLayout();
+  llvm::LLVMContext &Ctx = CGM.getLLVMContext();
+
+  CallArgList Args;
+  EmitCallArgs(Args,
+               E->getDirectCallee()->getType()->getAs<FunctionProtoType>(),
+               E->arguments(), E->getDirectCallee(),
+               /* ParamsToSkip = */ 0);
+
+  // We don't know how to emit non-scalar varargs.
+  if (std::any_of(Args.begin() + 1, Args.end(),
+                  [](const CallArg &A) { return !A.RV.isScalar(); })) {
+    CGM.ErrorUnsupported(E, "non-scalar arg to printf");
+    return RValue::get(llvm::ConstantInt::get(IntTy, 0));
+  }
+
+  // Construct and fill the args buffer that we'll pass to vprintf.
+  llvm::Value *BufferPtr;
+  if (Args.size() <= 1) {
+    // If there are no args, pass a null pointer to vprintf.
+    BufferPtr = llvm::ConstantPointerNull::get(llvm::Type::getInt8PtrTy(Ctx));
+  } else {
+    llvm::SmallVector<llvm::Type *, 8> ArgTypes;
+    for (unsigned I = 1, NumArgs = Args.size(); I < NumArgs; ++I)
+      ArgTypes.push_back(Args[I].RV.getScalarVal()->getType());
+    llvm::Type *AllocaTy = llvm::StructType::create(ArgTypes, "printf_args");
+    llvm::Value *Alloca = CreateTempAlloca(AllocaTy);
+
+    for (unsigned I = 1, NumArgs = Args.size(); I < NumArgs; ++I) {
+      llvm::Value *P = Builder.CreateStructGEP(AllocaTy, Alloca, I - 1);
+      llvm::Value *Arg = Args[I].RV.getScalarVal();
+      Builder.CreateAlignedStore(Arg, P, DL.getPrefTypeAlignment(Arg->getType()));
+    }
+    BufferPtr = Builder.CreatePointerCast(Alloca, llvm::Type::getInt8PtrTy(Ctx));
+  }
+
+  // Invoke vprintf and return.
+  llvm::Function* VprintfFunc = GetVprintfDeclaration(CGM.getModule());
+  return RValue::get(
+      Builder.CreateCall(VprintfFunc, {Args[0].RV.getScalarVal(), BufferPtr}));
+}
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGCUDANV.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGCUDANV.cpp
index 045e19b189dc..6a04d4eea784 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGCUDANV.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGCUDANV.cpp
@@ -38,6 +38,7 @@ private:
   llvm::Module &TheModule;
   /// Keeps track of kernel launch stubs emitted in this module
   llvm::SmallVector<llvm::Function *, 16> EmittedKernels;
+  llvm::SmallVector<std::pair<llvm::GlobalVariable *, unsigned>, 16> DeviceVars;
   /// Keeps track of variables containing handles of GPU binaries. Populated by
   /// ModuleCtorFunction() and used to create corresponding cleanup calls in
   /// ModuleDtorFunction()
@@ -47,7 +48,7 @@ private:
   llvm::Constant *getLaunchFn() const;
 
   /// Creates a function to register all kernel stubs generated in this module.
-  llvm::Function *makeRegisterKernelsFn();
+  llvm::Function *makeRegisterGlobalsFn();
 
   /// Helper function that generates a constant string and returns a pointer to
   /// the start of the string.  The result of this function can be used anywhere
@@ -68,6 +69,10 @@ public:
   CGNVCUDARuntime(CodeGenModule &CGM);
 
   void emitDeviceStub(CodeGenFunction &CGF, FunctionArgList &Args) override;
+  void registerDeviceVar(llvm::GlobalVariable &Var, unsigned Flags) override {
+    DeviceVars.push_back(std::make_pair(&Var, Flags));
+  }
+
   /// Creates module constructor function
   llvm::Function *makeModuleCtorFunction() override;
   /// Creates module destructor function
@@ -93,10 +98,7 @@ CGNVCUDARuntime::CGNVCUDARuntime(CodeGenModule &CGM)
 
 llvm::Constant *CGNVCUDARuntime::getSetupArgumentFn() const {
   // cudaError_t cudaSetupArgument(void *, size_t, size_t)
-  std::vector<llvm::Type*> Params;
-  Params.push_back(VoidPtrTy);
-  Params.push_back(SizeTy);
-  Params.push_back(SizeTy);
+  llvm::Type *Params[] = {VoidPtrTy, SizeTy, SizeTy};
   return CGM.CreateRuntimeFunction(llvm::FunctionType::get(IntTy,
                                                            Params, false),
                                    "cudaSetupArgument");
@@ -158,19 +160,28 @@ void CGNVCUDARuntime::emitDeviceStubBody(CodeGenFunction &CGF,
   CGF.EmitBlock(EndBlock);
 }
 
-/// Creates internal function to register all kernel stubs generated in this
-/// module with the CUDA runtime.
+/// Creates a function that sets up state on the host side for CUDA objects that
+/// have a presence on both the host and device sides. Specifically, registers
+/// the host side of kernel functions and device global variables with the CUDA
+/// runtime.
 /// \code
-/// void __cuda_register_kernels(void** GpuBinaryHandle) {
+/// void __cuda_register_globals(void** GpuBinaryHandle) {
 ///    __cudaRegisterFunction(GpuBinaryHandle,Kernel0,...);
 ///    ...
 ///    __cudaRegisterFunction(GpuBinaryHandle,KernelM,...);
+///    __cudaRegisterVar(GpuBinaryHandle, GlobalVar0, ...);
+///    ...
+///    __cudaRegisterVar(GpuBinaryHandle, GlobalVarN, ...);
 /// }
 /// \endcode
-llvm::Function *CGNVCUDARuntime::makeRegisterKernelsFn() {
+llvm::Function *CGNVCUDARuntime::makeRegisterGlobalsFn() {
+  // No need to register anything
+  if (EmittedKernels.empty() && DeviceVars.empty())
+    return nullptr;
+
   llvm::Function *RegisterKernelsFunc = llvm::Function::Create(
       llvm::FunctionType::get(VoidTy, VoidPtrPtrTy, false),
-      llvm::GlobalValue::InternalLinkage, "__cuda_register_kernels", &TheModule);
+      llvm::GlobalValue::InternalLinkage, "__cuda_register_globals", &TheModule);
   llvm::BasicBlock *EntryBB =
       llvm::BasicBlock::Create(Context, "entry", RegisterKernelsFunc);
   CGBuilderTy Builder(CGM, Context);
@@ -178,7 +189,7 @@ llvm::Function *CGNVCUDARuntime::makeRegisterKernelsFn() {
 
   // void __cudaRegisterFunction(void **, const char *, char *, const char *,
   //                             int, uint3*, uint3*, dim3*, dim3*, int*)
-  std::vector<llvm::Type *> RegisterFuncParams = {
+  llvm::Type *RegisterFuncParams[] = {
       VoidPtrPtrTy, CharPtrTy, CharPtrTy, CharPtrTy, IntTy,
       VoidPtrTy,    VoidPtrTy, VoidPtrTy, VoidPtrTy, IntTy->getPointerTo()};
   llvm::Constant *RegisterFunc = CGM.CreateRuntimeFunction(
@@ -186,18 +197,44 @@ llvm::Function *CGNVCUDARuntime::makeRegisterKernelsFn() {
       "__cudaRegisterFunction");
 
   // Extract GpuBinaryHandle passed as the first argument passed to
-  // __cuda_register_kernels() and generate __cudaRegisterFunction() call for
+  // __cuda_register_globals() and generate __cudaRegisterFunction() call for
   // each emitted kernel.
   llvm::Argument &GpuBinaryHandlePtr = *RegisterKernelsFunc->arg_begin();
   for (llvm::Function *Kernel : EmittedKernels) {
     llvm::Constant *KernelName = makeConstantString(Kernel->getName());
     llvm::Constant *NullPtr = llvm::ConstantPointerNull::get(VoidPtrTy);
-    llvm::Value *args[] = {
+    llvm::Value *Args[] = {
         &GpuBinaryHandlePtr, Builder.CreateBitCast(Kernel, VoidPtrTy),
         KernelName, KernelName, llvm::ConstantInt::get(IntTy, -1), NullPtr,
         NullPtr, NullPtr, NullPtr,
         llvm::ConstantPointerNull::get(IntTy->getPointerTo())};
-    Builder.CreateCall(RegisterFunc, args);
+    Builder.CreateCall(RegisterFunc, Args);
+  }
+
+  // void __cudaRegisterVar(void **, char *, char *, const char *,
+  //                        int, int, int, int)
+  llvm::Type *RegisterVarParams[] = {VoidPtrPtrTy, CharPtrTy, CharPtrTy,
+                                     CharPtrTy,    IntTy,     IntTy,
+                                     IntTy,        IntTy};
+  llvm::Constant *RegisterVar = CGM.CreateRuntimeFunction(
+      llvm::FunctionType::get(IntTy, RegisterVarParams, false),
+      "__cudaRegisterVar");
+  for (auto &Pair : DeviceVars) {
+    llvm::GlobalVariable *Var = Pair.first;
+    unsigned Flags = Pair.second;
+    llvm::Constant *VarName = makeConstantString(Var->getName());
+    uint64_t VarSize =
+        CGM.getDataLayout().getTypeAllocSize(Var->getValueType());
+    llvm::Value *Args[] = {
+        &GpuBinaryHandlePtr,
+        Builder.CreateBitCast(Var, VoidPtrTy),
+        VarName,
+        VarName,
+        llvm::ConstantInt::get(IntTy, (Flags & ExternDeviceVar) ? 1 : 0),
+        llvm::ConstantInt::get(IntTy, VarSize),
+        llvm::ConstantInt::get(IntTy, (Flags & ConstantDeviceVar) ? 1 : 0),
+        llvm::ConstantInt::get(IntTy, 0)};
+    Builder.CreateCall(RegisterVar, Args);
   }
 
   Builder.CreateRetVoid();
@@ -208,15 +245,19 @@ llvm::Function *CGNVCUDARuntime::makeRegisterKernelsFn() {
 /// \code
 /// void __cuda_module_ctor(void*) {
 ///     Handle0 = __cudaRegisterFatBinary(GpuBinaryBlob0);
-///     __cuda_register_kernels(Handle0);
+///     __cuda_register_globals(Handle0);
 ///     ...
 ///     HandleN = __cudaRegisterFatBinary(GpuBinaryBlobN);
-///     __cuda_register_kernels(HandleN);
+///     __cuda_register_globals(HandleN);
 /// }
 /// \endcode
 llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() {
-  // void __cuda_register_kernels(void* handle);
-  llvm::Function *RegisterKernelsFunc = makeRegisterKernelsFn();
+  // No need to generate ctors/dtors if there are no GPU binaries.
+  if (CGM.getCodeGenOpts().CudaGpuBinaryFileNames.empty())
+    return nullptr;
+
+  // void __cuda_register_globals(void* handle);
+  llvm::Function *RegisterGlobalsFunc = makeRegisterGlobalsFn();
   // void ** __cudaRegisterFatBinary(void *);
   llvm::Constant *RegisterFatbinFunc = CGM.CreateRuntimeFunction(
       llvm::FunctionType::get(VoidPtrPtrTy, VoidPtrTy, false),
@@ -259,6 +300,8 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() {
         TheModule, FatbinWrapperTy, true, llvm::GlobalValue::InternalLinkage,
         llvm::ConstantStruct::get(FatbinWrapperTy, Values),
         "__cuda_fatbin_wrapper");
+    // NVIDIA's cuobjdump looks for fatbins in this section.
+    FatbinWrapper->setSection(".nvFatBinSegment");
 
     // GpuBinaryHandle = __cudaRegisterFatBinary(&FatbinWrapper);
     llvm::CallInst *RegisterFatbinCall = CtorBuilder.CreateCall(
@@ -270,8 +313,9 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() {
     CtorBuilder.CreateAlignedStore(RegisterFatbinCall, GpuBinaryHandle,
                                    CGM.getPointerAlign());
 
-    // Call __cuda_register_kernels(GpuBinaryHandle);
-    CtorBuilder.CreateCall(RegisterKernelsFunc, RegisterFatbinCall);
+    // Call __cuda_register_globals(GpuBinaryHandle);
+    if (RegisterGlobalsFunc)
+      CtorBuilder.CreateCall(RegisterGlobalsFunc, RegisterFatbinCall);
 
     // Save GpuBinaryHandle so we can unregister it in destructor.
     GpuBinaryHandles.push_back(GpuBinaryHandle);
@@ -291,6 +335,10 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() {
 /// }
 /// \endcode
 llvm::Function *CGNVCUDARuntime::makeModuleDtorFunction() {
+  // No need for destructor if we don't have handles to unregister.
+  if (GpuBinaryHandles.empty())
+    return nullptr;
+
   // void __cudaUnregisterFatBinary(void ** handle);
   llvm::Constant *UnregisterFatbinFunc = CGM.CreateRuntimeFunction(
       llvm::FunctionType::get(VoidTy, VoidPtrPtrTy, false),
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGCUDARuntime.h b/contrib/llvm/tools/clang/lib/CodeGen/CGCUDARuntime.h
index dcacf9703277..0168f4f9e942 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGCUDARuntime.h
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGCUDARuntime.h
@@ -18,6 +18,7 @@
 
 namespace llvm {
 class Function;
+class GlobalVariable;
 }
 
 namespace clang {
@@ -37,6 +38,12 @@ protected:
   CodeGenModule &CGM;
 
 public:
+  // Global variable properties that must be passed to CUDA runtime.
+  enum DeviceVarFlags {
+    ExternDeviceVar = 0x01,   // extern
+    ConstantDeviceVar = 0x02, // __constant__
+  };
+
   CGCUDARuntime(CodeGenModule &CGM) : CGM(CGM) {}
   virtual ~CGCUDARuntime();
 
@@ -46,6 +53,7 @@ public:
 
   /// Emits a kernel launch stub.
   virtual void emitDeviceStub(CodeGenFunction &CGF, FunctionArgList &Args) = 0;
+  virtual void registerDeviceVar(llvm::GlobalVariable &Var, unsigned Flags) = 0;
 
   /// Constructs and returns a module initialization function or nullptr if it's
   /// not needed. Must be called after all kernels have been emitted.
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGCXX.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGCXX.cpp
index 6847df9b749b..40f1bc426ff7 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGCXX.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGCXX.cpp
@@ -164,7 +164,7 @@ bool CodeGenModule::TryEmitDefinitionAsAlias(GlobalDecl AliasDecl,
     // members with attribute "AlwaysInline" and expect no reference to
     // be generated. It is desirable to reenable this optimisation after
     // corresponding LLVM changes.
-    Replacements[MangledName] = Aliasee;
+    addReplacement(MangledName, Aliasee);
     return false;
   }
 
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGCXXABI.h b/contrib/llvm/tools/clang/lib/CodeGen/CGCXXABI.h
index 3f240b1802b8..9e10ec068e09 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGCXXABI.h
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGCXXABI.h
@@ -106,6 +106,16 @@ public:
 
   virtual bool hasMostDerivedReturn(GlobalDecl GD) const { return false; }
 
+  /// Returns true if the target allows calling a function through a pointer
+  /// with a different signature than the actual function (or equivalently,
+  /// bitcasting a function or function pointer to a different function type).
+  /// In principle in the most general case this could depend on the target, the
+  /// calling convention, and the actual types of the arguments and return
+  /// value. Here it just means whether the signature mismatch could *ever* be
+  /// allowed; in other words, does the target do strict checking of signatures
+  /// for all calls.
+  virtual bool canCallMismatchedFunctionType() const { return true; }
+
   /// If the C++ ABI requires the given type be returned in a particular way,
   /// this method sets RetAI and returns true.
   virtual bool classifyReturnType(CGFunctionInfo &FI) const = 0;
@@ -326,6 +336,12 @@ public:
   virtual void addImplicitStructorParams(CodeGenFunction &CGF, QualType &ResTy,
                                          FunctionArgList &Params) = 0;
 
+  /// Get the ABI-specific "this" parameter adjustment to apply in the prologue
+  /// of a virtual function.
+  virtual CharUnits getVirtualFunctionPrologueThisAdjustment(GlobalDecl GD) {
+    return CharUnits::Zero();
+  }
+
   /// Perform ABI-specific "this" parameter adjustment in a virtual function
   /// prologue.
   virtual llvm::Value *adjustThisParameterInVirtualFunctionPrologue(
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGCall.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGCall.cpp
index 935985049c01..242b5962070a 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGCall.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGCall.cpp
@@ -14,6 +14,7 @@
 
 #include "CGCall.h"
 #include "ABIInfo.h"
+#include "CGBlocks.h"
 #include "CGCXXABI.h"
 #include "CGCleanup.h"
 #include "CodeGenFunction.h"
@@ -25,9 +26,11 @@
 #include "clang/Basic/TargetBuiltins.h"
 #include "clang/Basic/TargetInfo.h"
 #include "clang/CodeGen/CGFunctionInfo.h"
+#include "clang/CodeGen/SwiftCallingConv.h"
 #include "clang/Frontend/CodeGenOptions.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/IR/Attributes.h"
+#include "llvm/IR/CallingConv.h"
 #include "llvm/IR/CallSite.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/InlineAsm.h"
@@ -39,7 +42,7 @@ using namespace CodeGen;
 
 /***/
 
-static unsigned ClangCallConvToLLVMCallConv(CallingConv CC) {
+unsigned CodeGenTypes::ClangCallConvToLLVMCallConv(CallingConv CC) {
   switch (CC) {
   default: return llvm::CallingConv::C;
   case CC_X86StdCall: return llvm::CallingConv::X86_StdCall;
@@ -55,7 +58,10 @@ static unsigned ClangCallConvToLLVMCallConv(CallingConv CC) {
   // TODO: Add support for __vectorcall to LLVM.
   case CC_X86VectorCall: return llvm::CallingConv::X86_VectorCall;
   case CC_SpirFunction: return llvm::CallingConv::SPIR_FUNC;
-  case CC_SpirKernel: return llvm::CallingConv::SPIR_KERNEL;
+  case CC_OpenCLKernel: return CGM.getTargetCodeGenInfo().getOpenCLKernelCallingConv();
+  case CC_PreserveMost: return llvm::CallingConv::PreserveMost;
+  case CC_PreserveAll: return llvm::CallingConv::PreserveAll;
+  case CC_Swift: return llvm::CallingConv::Swift;
   }
 }
 
@@ -90,15 +96,25 @@ CodeGenTypes::arrangeFreeFunctionType(CanQual<FunctionNoProtoType> FTNP) {
   return arrangeLLVMFunctionInfo(FTNP->getReturnType().getUnqualifiedType(),
                                  /*instanceMethod=*/false,
                                  /*chainCall=*/false, None,
-                                 FTNP->getExtInfo(), RequiredArgs(0));
+                                 FTNP->getExtInfo(), {}, RequiredArgs(0));
 }
 
 /// Adds the formal paramaters in FPT to the given prefix. If any parameter in
 /// FPT has pass_object_size attrs, then we'll add parameters for those, too.
 static void appendParameterTypes(const CodeGenTypes &CGT,
                                  SmallVectorImpl<CanQualType> &prefix,
-                                 const CanQual<FunctionProtoType> &FPT,
+              SmallVectorImpl<FunctionProtoType::ExtParameterInfo> &paramInfos,
+                                 CanQual<FunctionProtoType> FPT,
                                  const FunctionDecl *FD) {
+  // Fill out paramInfos.
+  if (FPT->hasExtParameterInfos() || !paramInfos.empty()) {
+    assert(paramInfos.size() <= prefix.size());
+    auto protoParamInfos = FPT->getExtParameterInfos();
+    paramInfos.reserve(prefix.size() + protoParamInfos.size());
+    paramInfos.resize(prefix.size());
+    paramInfos.append(protoParamInfos.begin(), protoParamInfos.end());
+  }
+
   // Fast path: unknown target.
   if (FD == nullptr) {
     prefix.append(FPT->param_type_begin(), FPT->param_type_end());
@@ -125,13 +141,17 @@ arrangeLLVMFunctionInfo(CodeGenTypes &CGT, bool instanceMethod,
                         SmallVectorImpl<CanQualType> &prefix,
                         CanQual<FunctionProtoType> FTP,
                         const FunctionDecl *FD) {
-  RequiredArgs required = RequiredArgs::forPrototypePlus(FTP, prefix.size());
+  SmallVector<FunctionProtoType::ExtParameterInfo, 16> paramInfos;
+  RequiredArgs Required =
+      RequiredArgs::forPrototypePlus(FTP, prefix.size(), FD);
   // FIXME: Kill copy.
-  appendParameterTypes(CGT, prefix, FTP, FD);
+  appendParameterTypes(CGT, prefix, paramInfos, FTP, FD);
   CanQualType resultType = FTP->getReturnType().getUnqualifiedType();
+
   return CGT.arrangeLLVMFunctionInfo(resultType, instanceMethod,
                                      /*chainCall=*/false, prefix,
-                                     FTP->getExtInfo(), required);
+                                     FTP->getExtInfo(), paramInfos,
+                                     Required);
 }
 
 /// Arrange the argument and result information for a value of the
@@ -173,6 +193,12 @@ static CallingConv getCallingConventionForDecl(const Decl *D, bool IsWindows) {
   if (D->hasAttr<SysVABIAttr>())
     return IsWindows ? CC_X86_64SysV : CC_C;
 
+  if (D->hasAttr<PreserveMostAttr>())
+    return CC_PreserveMost;
+
+  if (D->hasAttr<PreserveAllAttr>())
+    return CC_PreserveAll;
+
   return CC_C;
 }
 
@@ -219,16 +245,33 @@ CodeGenTypes::arrangeCXXMethodDeclaration(const CXXMethodDecl *MD) {
   return arrangeFreeFunctionType(prototype, MD);
 }
 
+bool CodeGenTypes::inheritingCtorHasParams(
+    const InheritedConstructor &Inherited, CXXCtorType Type) {
+  // Parameters are unnecessary if we're constructing a base class subobject
+  // and the inherited constructor lives in a virtual base.
+  return Type == Ctor_Complete ||
+         !Inherited.getShadowDecl()->constructsVirtualBase() ||
+         !Target.getCXXABI().hasConstructorVariants();
+  }
+
 const CGFunctionInfo &
 CodeGenTypes::arrangeCXXStructorDeclaration(const CXXMethodDecl *MD,
                                             StructorType Type) {
 
   SmallVector<CanQualType, 16> argTypes;
+  SmallVector<FunctionProtoType::ExtParameterInfo, 16> paramInfos;
   argTypes.push_back(GetThisType(Context, MD->getParent()));
 
+  bool PassParams = true;
+
   GlobalDecl GD;
   if (auto *CD = dyn_cast<CXXConstructorDecl>(MD)) {
     GD = GlobalDecl(CD, toCXXCtorType(Type));
+
+    // A base class inheriting constructor doesn't get forwarded arguments
+    // needed to construct a virtual base (or base class thereof).
+    if (auto Inherited = CD->getInheritedConstructor())
+      PassParams = inheritingCtorHasParams(Inherited, toCXXCtorType(Type));
   } else {
     auto *DD = dyn_cast<CXXDestructorDecl>(MD);
     GD = GlobalDecl(DD, toCXXDtorType(Type));
@@ -237,12 +280,14 @@ CodeGenTypes::arrangeCXXStructorDeclaration(const CXXMethodDecl *MD,
   CanQual<FunctionProtoType> FTP = GetFormalType(MD);
 
   // Add the formal parameters.
-  appendParameterTypes(*this, argTypes, FTP, MD);
+  if (PassParams)
+    appendParameterTypes(*this, argTypes, paramInfos, FTP, MD);
 
   TheCXXABI.buildStructorSignature(MD, Type, argTypes);
 
   RequiredArgs required =
-      (MD->isVariadic() ? RequiredArgs(argTypes.size()) : RequiredArgs::All);
+      (PassParams && MD->isVariadic() ? RequiredArgs(argTypes.size())
+                                      : RequiredArgs::All);
 
   FunctionType::ExtInfo extInfo = FTP->getExtInfo();
   CanQualType resultType = TheCXXABI.HasThisReturn(GD)
@@ -252,7 +297,53 @@ CodeGenTypes::arrangeCXXStructorDeclaration(const CXXMethodDecl *MD,
                                      : Context.VoidTy;
   return arrangeLLVMFunctionInfo(resultType, /*instanceMethod=*/true,
                                  /*chainCall=*/false, argTypes, extInfo,
-                                 required);
+                                 paramInfos, required);
+}
+
+static SmallVector<CanQualType, 16>
+getArgTypesForCall(ASTContext &ctx, const CallArgList &args) {
+  SmallVector<CanQualType, 16> argTypes;
+  for (auto &arg : args)
+    argTypes.push_back(ctx.getCanonicalParamType(arg.Ty));
+  return argTypes;
+}
+
+static SmallVector<CanQualType, 16>
+getArgTypesForDeclaration(ASTContext &ctx, const FunctionArgList &args) {
+  SmallVector<CanQualType, 16> argTypes;
+  for (auto &arg : args)
+    argTypes.push_back(ctx.getCanonicalParamType(arg->getType()));
+  return argTypes;
+}
+
+static void addExtParameterInfosForCall(
+         llvm::SmallVectorImpl<FunctionProtoType::ExtParameterInfo> &paramInfos,
+                                        const FunctionProtoType *proto,
+                                        unsigned prefixArgs,
+                                        unsigned totalArgs) {
+  assert(proto->hasExtParameterInfos());
+  assert(paramInfos.size() <= prefixArgs);
+  assert(proto->getNumParams() + prefixArgs <= totalArgs);
+
+  // Add default infos for any prefix args that don't already have infos.
+  paramInfos.resize(prefixArgs);
+
+  // Add infos for the prototype.
+  auto protoInfos = proto->getExtParameterInfos();
+  paramInfos.append(protoInfos.begin(), protoInfos.end());
+
+  // Add default infos for the variadic arguments.
+  paramInfos.resize(totalArgs);
+}
+
+static llvm::SmallVector<FunctionProtoType::ExtParameterInfo, 16>
+getExtParameterInfosForCall(const FunctionProtoType *proto,
+                            unsigned prefixArgs, unsigned totalArgs) {
+  llvm::SmallVector<FunctionProtoType::ExtParameterInfo, 16> result;
+  if (proto->hasExtParameterInfos()) {
+    addExtParameterInfosForCall(result, proto, prefixArgs, totalArgs);
+  }
+  return result;
 }
 
 /// Arrange a call to a C++ method, passing the given arguments.
@@ -267,7 +358,7 @@ CodeGenTypes::arrangeCXXConstructorCall(const CallArgList &args,
     ArgTypes.push_back(Context.getCanonicalParamType(Arg.Ty));
 
   CanQual<FunctionProtoType> FPT = GetFormalType(D);
-  RequiredArgs Required = RequiredArgs::forPrototypePlus(FPT, 1 + ExtraArgs);
+  RequiredArgs Required = RequiredArgs::forPrototypePlus(FPT, 1 + ExtraArgs, D);
   GlobalDecl GD(D, CtorKind);
   CanQualType ResultType = TheCXXABI.HasThisReturn(GD)
                                ? ArgTypes.front()
@@ -276,9 +367,11 @@ CodeGenTypes::arrangeCXXConstructorCall(const CallArgList &args,
                                      : Context.VoidTy;
 
   FunctionType::ExtInfo Info = FPT->getExtInfo();
+  auto ParamInfos = getExtParameterInfosForCall(FPT.getTypePtr(), 1 + ExtraArgs,
+                                                ArgTypes.size());
   return arrangeLLVMFunctionInfo(ResultType, /*instanceMethod=*/true,
                                  /*chainCall=*/false, ArgTypes, Info,
-                                 Required);
+                                 ParamInfos, Required);
 }
 
 /// Arrange the argument and result information for the declaration or
@@ -299,7 +392,7 @@ CodeGenTypes::arrangeFunctionDeclaration(const FunctionDecl *FD) {
     CanQual<FunctionNoProtoType> noProto = FTy.getAs<FunctionNoProtoType>();
     return arrangeLLVMFunctionInfo(
         noProto->getReturnType(), /*instanceMethod=*/false,
-        /*chainCall=*/false, None, noProto->getExtInfo(), RequiredArgs::All);
+        /*chainCall=*/false, None, noProto->getExtInfo(), {},RequiredArgs::All);
   }
 
   assert(isa<FunctionProtoType>(FTy));
@@ -328,7 +421,7 @@ CodeGenTypes::arrangeObjCMessageSendSignature(const ObjCMethodDecl *MD,
   argTys.push_back(Context.getCanonicalParamType(receiverType));
   argTys.push_back(Context.getCanonicalParamType(Context.getObjCSelType()));
   // FIXME: Kill copy?
-  for (const auto *I : MD->params()) {
+  for (const auto *I : MD->parameters()) {
     argTys.push_back(Context.getCanonicalParamType(I->getType()));
   }
 
@@ -345,7 +438,18 @@ CodeGenTypes::arrangeObjCMessageSendSignature(const ObjCMethodDecl *MD,
 
   return arrangeLLVMFunctionInfo(
       GetReturnType(MD->getReturnType()), /*instanceMethod=*/false,
-      /*chainCall=*/false, argTys, einfo, required);
+      /*chainCall=*/false, argTys, einfo, {}, required);
+}
+
+const CGFunctionInfo &
+CodeGenTypes::arrangeUnprototypedObjCMessageSend(QualType returnType,
+                                                 const CallArgList &args) {
+  auto argTypes = getArgTypesForCall(Context, args);
+  FunctionType::ExtInfo einfo;
+
+  return arrangeLLVMFunctionInfo(
+      GetReturnType(returnType), /*instanceMethod=*/false,
+      /*chainCall=*/false, argTypes, einfo, {}, RequiredArgs::All);
 }
 
 const CGFunctionInfo &
@@ -374,7 +478,7 @@ CodeGenTypes::arrangeMSMemberPointerThunk(const CXXMethodDecl *MD) {
   CanQualType ArgTys[] = { GetThisType(Context, MD->getParent()) };
   return arrangeLLVMFunctionInfo(Context.VoidTy, /*instanceMethod=*/false,
                                  /*chainCall=*/false, ArgTys,
-                                 FTP->getExtInfo(), RequiredArgs(1));
+                                 FTP->getExtInfo(), {}, RequiredArgs(1));
 }
 
 const CGFunctionInfo &
@@ -394,7 +498,8 @@ CodeGenTypes::arrangeMSCtorClosure(const CXXConstructorDecl *CD,
       /*IsVariadic=*/false, /*IsCXXMethod=*/true);
   return arrangeLLVMFunctionInfo(Context.VoidTy, /*instanceMethod=*/true,
                                  /*chainCall=*/false, ArgTys,
-                                 FunctionType::ExtInfo(CC), RequiredArgs::All);
+                                 FunctionType::ExtInfo(CC), {},
+                                 RequiredArgs::All);
 }
 
 /// Arrange a call as unto a free function, except possibly with an
@@ -408,6 +513,8 @@ arrangeFreeFunctionLikeCall(CodeGenTypes &CGT,
                             bool chainCall) {
   assert(args.size() >= numExtraRequiredArgs);
 
+  llvm::SmallVector<FunctionProtoType::ExtParameterInfo, 16> paramInfos;
+
   // In most cases, there are no optional arguments.
   RequiredArgs required = RequiredArgs::All;
 
@@ -417,6 +524,10 @@ arrangeFreeFunctionLikeCall(CodeGenTypes &CGT,
     if (proto->isVariadic())
       required = RequiredArgs(proto->getNumParams() + numExtraRequiredArgs);
 
+    if (proto->hasExtParameterInfos())
+      addExtParameterInfosForCall(paramInfos, proto, numExtraRequiredArgs,
+                                  args.size());
+
   // If we don't have a prototype at all, but we're supposed to
   // explicitly use the variadic convention for unprototyped calls,
   // treat all of the arguments as required but preserve the nominal
@@ -433,7 +544,8 @@ arrangeFreeFunctionLikeCall(CodeGenTypes &CGT,
     argTypes.push_back(CGT.getContext().getCanonicalParamType(arg.Ty));
   return CGT.arrangeLLVMFunctionInfo(GetReturnType(fnType->getReturnType()),
                                      /*instanceMethod=*/false, chainCall,
-                                     argTypes, fnType->getExtInfo(), required);
+                                     argTypes, fnType->getExtInfo(), paramInfos,
+                                     required);
 }
 
 /// Figure out the rules for calling a function with the given formal
@@ -448,7 +560,7 @@ CodeGenTypes::arrangeFreeFunctionCall(const CallArgList &args,
                                      chainCall ? 1 : 0, chainCall);
 }
 
-/// A block function call is essentially a free-function call with an
+/// A block function is essentially a free function with an
 /// extra implicit argument.
 const CGFunctionInfo &
 CodeGenTypes::arrangeBlockFunctionCall(const CallArgList &args,
@@ -458,54 +570,99 @@ CodeGenTypes::arrangeBlockFunctionCall(const CallArgList &args,
 }
 
 const CGFunctionInfo &
-CodeGenTypes::arrangeFreeFunctionCall(QualType resultType,
-                                      const CallArgList &args,
-                                      FunctionType::ExtInfo info,
-                                      RequiredArgs required) {
+CodeGenTypes::arrangeBlockFunctionDeclaration(const FunctionProtoType *proto,
+                                              const FunctionArgList &params) {
+  auto paramInfos = getExtParameterInfosForCall(proto, 1, params.size());
+  auto argTypes = getArgTypesForDeclaration(Context, params);
+
+  return arrangeLLVMFunctionInfo(
+      GetReturnType(proto->getReturnType()),
+      /*instanceMethod*/ false, /*chainCall*/ false, argTypes,
+      proto->getExtInfo(), paramInfos,
+      RequiredArgs::forPrototypePlus(proto, 1, nullptr));
+}
+
+const CGFunctionInfo &
+CodeGenTypes::arrangeBuiltinFunctionCall(QualType resultType,
+                                         const CallArgList &args) {
   // FIXME: Kill copy.
   SmallVector<CanQualType, 16> argTypes;
   for (const auto &Arg : args)
     argTypes.push_back(Context.getCanonicalParamType(Arg.Ty));
   return arrangeLLVMFunctionInfo(
       GetReturnType(resultType), /*instanceMethod=*/false,
-      /*chainCall=*/false, argTypes, info, required);
+      /*chainCall=*/false, argTypes, FunctionType::ExtInfo(),
+      /*paramInfos=*/ {}, RequiredArgs::All);
 }
 
-/// Arrange a call to a C++ method, passing the given arguments.
 const CGFunctionInfo &
-CodeGenTypes::arrangeCXXMethodCall(const CallArgList &args,
-                                   const FunctionProtoType *FPT,
-                                   RequiredArgs required) {
-  // FIXME: Kill copy.
-  SmallVector<CanQualType, 16> argTypes;
-  for (const auto &Arg : args)
-    argTypes.push_back(Context.getCanonicalParamType(Arg.Ty));
+CodeGenTypes::arrangeBuiltinFunctionDeclaration(QualType resultType,
+                                                const FunctionArgList &args) {
+  auto argTypes = getArgTypesForDeclaration(Context, args);
+
+  return arrangeLLVMFunctionInfo(
+      GetReturnType(resultType), /*instanceMethod=*/false, /*chainCall=*/false,
+      argTypes, FunctionType::ExtInfo(), {}, RequiredArgs::All);
+}
 
-  FunctionType::ExtInfo info = FPT->getExtInfo();
+const CGFunctionInfo &
+CodeGenTypes::arrangeBuiltinFunctionDeclaration(CanQualType resultType,
+                                              ArrayRef<CanQualType> argTypes) {
   return arrangeLLVMFunctionInfo(
-      GetReturnType(FPT->getReturnType()), /*instanceMethod=*/true,
-      /*chainCall=*/false, argTypes, info, required);
+      resultType, /*instanceMethod=*/false, /*chainCall=*/false,
+      argTypes, FunctionType::ExtInfo(), {}, RequiredArgs::All);
 }
 
-const CGFunctionInfo &CodeGenTypes::arrangeFreeFunctionDeclaration(
-    QualType resultType, const FunctionArgList &args,
-    const FunctionType::ExtInfo &info, bool isVariadic) {
+/// Arrange a call to a C++ method, passing the given arguments.
+const CGFunctionInfo &
+CodeGenTypes::arrangeCXXMethodCall(const CallArgList &args,
+                                   const FunctionProtoType *proto,
+                                   RequiredArgs required) {
+  unsigned numRequiredArgs =
+    (proto->isVariadic() ? required.getNumRequiredArgs() : args.size());
+  unsigned numPrefixArgs = numRequiredArgs - proto->getNumParams();
+  auto paramInfos =
+    getExtParameterInfosForCall(proto, numPrefixArgs, args.size());
+
   // FIXME: Kill copy.
-  SmallVector<CanQualType, 16> argTypes;
-  for (auto Arg : args)
-    argTypes.push_back(Context.getCanonicalParamType(Arg->getType()));
+  auto argTypes = getArgTypesForCall(Context, args);
 
-  RequiredArgs required =
-    (isVariadic ? RequiredArgs(args.size()) : RequiredArgs::All);
+  FunctionType::ExtInfo info = proto->getExtInfo();
   return arrangeLLVMFunctionInfo(
-      GetReturnType(resultType), /*instanceMethod=*/false,
-      /*chainCall=*/false, argTypes, info, required);
+      GetReturnType(proto->getReturnType()), /*instanceMethod=*/true,
+      /*chainCall=*/false, argTypes, info, paramInfos, required);
 }
 
 const CGFunctionInfo &CodeGenTypes::arrangeNullaryFunction() {
   return arrangeLLVMFunctionInfo(
       getContext().VoidTy, /*instanceMethod=*/false, /*chainCall=*/false,
-      None, FunctionType::ExtInfo(), RequiredArgs::All);
+      None, FunctionType::ExtInfo(), {}, RequiredArgs::All);
+}
+
+const CGFunctionInfo &
+CodeGenTypes::arrangeCall(const CGFunctionInfo &signature,
+                          const CallArgList &args) {
+  assert(signature.arg_size() <= args.size());
+  if (signature.arg_size() == args.size())
+    return signature;
+
+  SmallVector<FunctionProtoType::ExtParameterInfo, 16> paramInfos;
+  auto sigParamInfos = signature.getExtParameterInfos();
+  if (!sigParamInfos.empty()) {
+    paramInfos.append(sigParamInfos.begin(), sigParamInfos.end());
+    paramInfos.resize(args.size());
+  }
+
+  auto argTypes = getArgTypesForCall(Context, args);
+
+  assert(signature.getRequiredArgs().allowsOptionalArgs());
+  return arrangeLLVMFunctionInfo(signature.getReturnType(),
+                                 signature.isInstanceMethod(),
+                                 signature.isChainCall(),
+                                 argTypes,
+                                 signature.getExtInfo(),
+                                 paramInfos,
+                                 signature.getRequiredArgs());
 }
 
 /// Arrange the argument and result information for an abstract value
@@ -517,25 +674,26 @@ CodeGenTypes::arrangeLLVMFunctionInfo(CanQualType resultType,
                                       bool chainCall,
                                       ArrayRef<CanQualType> argTypes,
                                       FunctionType::ExtInfo info,
+                     ArrayRef<FunctionProtoType::ExtParameterInfo> paramInfos,
                                       RequiredArgs required) {
   assert(std::all_of(argTypes.begin(), argTypes.end(),
                      std::mem_fun_ref(&CanQualType::isCanonicalAsParam)));
 
-  unsigned CC = ClangCallConvToLLVMCallConv(info.getCC());
-
   // Lookup or create unique function info.
   llvm::FoldingSetNodeID ID;
-  CGFunctionInfo::Profile(ID, instanceMethod, chainCall, info, required,
-                          resultType, argTypes);
+  CGFunctionInfo::Profile(ID, instanceMethod, chainCall, info, paramInfos,
+                          required, resultType, argTypes);
 
   void *insertPos = nullptr;
   CGFunctionInfo *FI = FunctionInfos.FindNodeOrInsertPos(ID, insertPos);
   if (FI)
     return *FI;
 
+  unsigned CC = ClangCallConvToLLVMCallConv(info.getCC());
+
   // Construct the function info.  We co-allocate the ArgInfos.
   FI = CGFunctionInfo::create(CC, instanceMethod, chainCall, info,
-                              resultType, argTypes, required);
+                              paramInfos, resultType, argTypes, required);
   FunctionInfos.InsertNode(FI, insertPos);
 
   bool inserted = FunctionsBeingProcessed.insert(FI).second;
@@ -543,7 +701,11 @@ CodeGenTypes::arrangeLLVMFunctionInfo(CanQualType resultType,
   assert(inserted && "Recursively being processed?");
   
   // Compute ABI information.
-  getABIInfo().computeInfo(*FI);
+  if (info.getCC() != CC_Swift) {
+    getABIInfo().computeInfo(*FI);
+  } else {
+    swiftcall::computeABIInfo(CGM, *FI);
+  }
 
   // Loop over all of the computed argument and return value info.  If any of
   // them are direct or extend without a specified coerce type, specify the
@@ -566,11 +728,16 @@ CGFunctionInfo *CGFunctionInfo::create(unsigned llvmCC,
                                        bool instanceMethod,
                                        bool chainCall,
                                        const FunctionType::ExtInfo &info,
+                                       ArrayRef<ExtParameterInfo> paramInfos,
                                        CanQualType resultType,
                                        ArrayRef<CanQualType> argTypes,
                                        RequiredArgs required) {
-  void *buffer = operator new(sizeof(CGFunctionInfo) +
-                              sizeof(ArgInfo) * (argTypes.size() + 1));
+  assert(paramInfos.empty() || paramInfos.size() == argTypes.size());
+
+  void *buffer =
+    operator new(totalSizeToAlloc<ArgInfo,             ExtParameterInfo>(
+                                  argTypes.size() + 1, paramInfos.size()));
+
   CGFunctionInfo *FI = new(buffer) CGFunctionInfo();
   FI->CallingConvention = llvmCC;
   FI->EffectiveCallingConvention = llvmCC;
@@ -585,9 +752,12 @@ CGFunctionInfo *CGFunctionInfo::create(unsigned llvmCC,
   FI->ArgStruct = nullptr;
   FI->ArgStructAlign = 0;
   FI->NumArgs = argTypes.size();
+  FI->HasExtParameterInfos = !paramInfos.empty();
   FI->getArgsBuffer()[0].type = resultType;
   for (unsigned i = 0, e = argTypes.size(); i != e; ++i)
     FI->getArgsBuffer()[i + 1].type = argTypes[i];
+  for (unsigned i = 0, e = paramInfos.size(); i != e; ++i)
+    FI->getExtParameterInfosBuffer()[i] = paramInfos[i];
   return FI;
 }
 
@@ -634,7 +804,8 @@ struct RecordExpansion : TypeExpansion {
 
   RecordExpansion(SmallVector<const CXXBaseSpecifier *, 1> &&Bases,
                   SmallVector<const FieldDecl *, 1> &&Fields)
-      : TypeExpansion(TEK_Record), Bases(Bases), Fields(Fields) {}
+      : TypeExpansion(TEK_Record), Bases(std::move(Bases)),
+        Fields(std::move(Fields)) {}
   static bool classof(const TypeExpansion *TE) {
     return TE->Kind == TEK_Record;
   }
@@ -773,7 +944,7 @@ static void forConstantArrayExpansion(CodeGenFunction &CGF,
 }
 
 void CodeGenFunction::ExpandTypeFromArgs(
-    QualType Ty, LValue LV, SmallVectorImpl<llvm::Argument *>::iterator &AI) {
+    QualType Ty, LValue LV, SmallVectorImpl<llvm::Value *>::iterator &AI) {
   assert(LV.isSimple() &&
          "Unexpected non-simple lvalue during struct expansion.");
 
@@ -798,7 +969,7 @@ void CodeGenFunction::ExpandTypeFromArgs(
     }
     for (auto FD : RExp->Fields) {
       // FIXME: What are the right qualifiers here?
-      LValue SubLV = EmitLValueForField(LV, FD);
+      LValue SubLV = EmitLValueForFieldInitialization(LV, FD);
       ExpandTypeFromArgs(FD->getType(), SubLV, AI);
     }
   } else if (isa<ComplexExpansion>(Exp.get())) {
@@ -1220,11 +1391,13 @@ void ClangToLLVMArgMapping::construct(const ASTContext &Context,
       // ignore and inalloca doesn't have matching LLVM parameters.
       IRArgs.NumberOfArgs = 0;
       break;
-    case ABIArgInfo::Expand: {
+    case ABIArgInfo::CoerceAndExpand:
+      IRArgs.NumberOfArgs = AI.getCoerceAndExpandTypeSequence().size();
+      break;
+    case ABIArgInfo::Expand:
       IRArgs.NumberOfArgs = getExpansionSize(ArgType, Context);
       break;
     }
-    }
 
     if (IRArgs.NumberOfArgs > 0) {
       IRArgs.FirstArgIndex = IRArgNo;
@@ -1323,6 +1496,10 @@ CodeGenTypes::GetFunctionType(const CGFunctionInfo &FI) {
   case ABIArgInfo::Ignore:
     resultType = llvm::Type::getVoidTy(getLLVMContext());
     break;
+
+  case ABIArgInfo::CoerceAndExpand:
+    resultType = retAI.getUnpaddedCoerceAndExpandType();
+    break;
   }
 
   ClangToLLVMArgMapping IRFunctionArgs(getContext(), FI, true);
@@ -1390,6 +1567,15 @@ CodeGenTypes::GetFunctionType(const CGFunctionInfo &FI) {
       break;
     }
 
+    case ABIArgInfo::CoerceAndExpand: {
+      auto ArgTypesIter = ArgTypes.begin() + FirstIRArg;
+      for (auto EltTy : ArgInfo.getCoerceAndExpandTypeSequence()) {
+        *ArgTypesIter++ = EltTy;
+      }
+      assert(ArgTypesIter == ArgTypes.begin() + FirstIRArg + NumIRArgs);
+      break;
+    }
+
     case ABIArgInfo::Expand:
       auto ArgTypesIter = ArgTypes.begin() + FirstIRArg;
       getExpandedTypes(it->type, ArgTypesIter);
@@ -1450,6 +1636,7 @@ void CodeGenModule::ConstructAttributeList(
 
   const Decl *TargetDecl = CalleeInfo.getCalleeDecl();
 
+  bool HasAnyX86InterruptAttr = false;
   // FIXME: handle sseregparm someday...
   if (TargetDecl) {
     if (TargetDecl->hasAttr<ReturnsTwiceAttr>())
@@ -1487,6 +1674,7 @@ void CodeGenModule::ConstructAttributeList(
     if (TargetDecl->hasAttr<ReturnsNonNullAttr>())
       RetAttrs.addAttribute(llvm::Attribute::NonNull);
 
+    HasAnyX86InterruptAttr = TargetDecl->hasAttr<AnyX86InterruptAttr>();
     HasOptnone = TargetDecl->hasAttr<OptimizeNoneAttr>();
   }
 
@@ -1526,10 +1714,11 @@ void CodeGenModule::ConstructAttributeList(
     }
 
     bool DisableTailCalls =
-        CodeGenOpts.DisableTailCalls ||
+        CodeGenOpts.DisableTailCalls || HasAnyX86InterruptAttr ||
         (TargetDecl && TargetDecl->hasAttr<DisableTailCallsAttr>());
-    FuncAttrs.addAttribute("disable-tail-calls",
-                           llvm::toStringRef(DisableTailCalls));
+    FuncAttrs.addAttribute(
+        "disable-tail-calls",
+        llvm::toStringRef(DisableTailCalls));
 
     FuncAttrs.addAttribute("less-precise-fpmad",
                            llvm::toStringRef(CodeGenOpts.LessPreciseFPMAD));
@@ -1543,9 +1732,13 @@ void CodeGenModule::ConstructAttributeList(
                            llvm::toStringRef(CodeGenOpts.SoftFloat));
     FuncAttrs.addAttribute("stack-protector-buffer-size",
                            llvm::utostr(CodeGenOpts.SSPBufferSize));
+    FuncAttrs.addAttribute("no-signed-zeros-fp-math",
+                           llvm::toStringRef(CodeGenOpts.NoSignedZeros));
 
     if (CodeGenOpts.StackRealignment)
       FuncAttrs.addAttribute("stackrealign");
+    if (CodeGenOpts.Backchain)
+      FuncAttrs.addAttribute("backchain");
 
     // Add target-cpu and target-features attributes to functions. If
     // we have a decl for the function and it has a target attribute then
@@ -1594,6 +1787,18 @@ void CodeGenModule::ConstructAttributeList(
     }
   }
 
+  if (getLangOpts().CUDA && getLangOpts().CUDAIsDevice) {
+    // Conservatively, mark all functions and calls in CUDA as convergent
+    // (meaning, they may call an intrinsically convergent op, such as
+    // __syncthreads(), and so can't have certain optimizations applied around
+    // them).  LLVM will remove this attribute where it safely can.
+    FuncAttrs.addAttribute(llvm::Attribute::Convergent);
+
+    // Respect -fcuda-flush-denormals-to-zero.
+    if (getLangOpts().CUDADeviceFlushDenormalsToZero)
+      FuncAttrs.addAttribute("nvptx-f32ftz", "true");
+  }
+
   ClangToLLVMArgMapping IRFunctionArgs(getContext(), FI);
 
   QualType RetTy = FI.getReturnType();
@@ -1620,6 +1825,9 @@ void CodeGenModule::ConstructAttributeList(
     break;
   }
 
+  case ABIArgInfo::CoerceAndExpand:
+    break;
+
   case ABIArgInfo::Expand:
     llvm_unreachable("Invalid ABI kind for return argument");
   }
@@ -1639,10 +1847,13 @@ void CodeGenModule::ConstructAttributeList(
         getLLVMContext(), llvm::AttributeSet::ReturnIndex, RetAttrs));
   }
 
+  bool hasUsedSRet = false;
+
   // Attach attributes to sret.
   if (IRFunctionArgs.hasSRetArg()) {
     llvm::AttrBuilder SRETAttrs;
     SRETAttrs.addAttribute(llvm::Attribute::StructRet);
+    hasUsedSRet = true;
     if (RetAI.getInReg())
       SRETAttrs.addAttribute(llvm::Attribute::InReg);
     PAL.push_back(llvm::AttributeSet::get(
@@ -1727,7 +1938,8 @@ void CodeGenModule::ConstructAttributeList(
     }
     case ABIArgInfo::Ignore:
     case ABIArgInfo::Expand:
-      continue;
+    case ABIArgInfo::CoerceAndExpand:
+      break;
 
     case ABIArgInfo::InAlloca:
       // inalloca disables readnone and readonly.
@@ -1745,6 +1957,41 @@ void CodeGenModule::ConstructAttributeList(
         Attrs.addAttribute(llvm::Attribute::NonNull);
     }
 
+    switch (FI.getExtParameterInfo(ArgNo).getABI()) {
+    case ParameterABI::Ordinary:
+      break;
+
+    case ParameterABI::SwiftIndirectResult: {
+      // Add 'sret' if we haven't already used it for something, but
+      // only if the result is void.
+      if (!hasUsedSRet && RetTy->isVoidType()) {
+        Attrs.addAttribute(llvm::Attribute::StructRet);
+        hasUsedSRet = true;
+      }
+
+      // Add 'noalias' in either case.
+      Attrs.addAttribute(llvm::Attribute::NoAlias);
+
+      // Add 'dereferenceable' and 'alignment'.
+      auto PTy = ParamType->getPointeeType();
+      if (!PTy->isIncompleteType() && PTy->isConstantSizeType()) {
+        auto info = getContext().getTypeInfoInChars(PTy);
+        Attrs.addDereferenceableAttr(info.first.getQuantity());
+        Attrs.addAttribute(llvm::Attribute::getWithAlignment(getLLVMContext(),
+                                                 info.second.getQuantity()));
+      }
+      break;
+    }
+
+    case ParameterABI::SwiftErrorResult:
+      Attrs.addAttribute(llvm::Attribute::SwiftError);
+      break;
+
+    case ParameterABI::SwiftContext:
+      Attrs.addAttribute(llvm::Attribute::SwiftSelf);
+      break;
+    }
+
     if (Attrs.hasAttributes()) {
       unsigned FirstIRArg, NumIRArgs;
       std::tie(FirstIRArg, NumIRArgs) = IRFunctionArgs.getIRArgs(ArgNo);
@@ -1810,6 +2057,18 @@ static const NonNullAttr *getNonNullAttr(const Decl *FD, const ParmVarDecl *PVD,
   return nullptr;
 }
 
+namespace {
+  struct CopyBackSwiftError final : EHScopeStack::Cleanup {
+    Address Temp;
+    Address Arg;
+    CopyBackSwiftError(Address temp, Address arg) : Temp(temp), Arg(arg) {}
+    void Emit(CodeGenFunction &CGF, Flags flags) override {
+      llvm::Value *errorValue = CGF.Builder.CreateLoad(Temp);
+      CGF.Builder.CreateStore(errorValue, Arg);
+    }
+  };
+}
+
 void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,
                                          llvm::Function *Fn,
                                          const FunctionArgList &Args) {
@@ -1835,7 +2094,7 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,
 
   ClangToLLVMArgMapping IRFunctionArgs(CGM.getContext(), FI);
   // Flattened function arguments.
-  SmallVector<llvm::Argument *, 16> FnArgs;
+  SmallVector<llvm::Value *, 16> FnArgs;
   FnArgs.reserve(IRFunctionArgs.totalIRArgs());
   for (auto &Arg : Fn->args()) {
     FnArgs.push_back(&Arg);
@@ -1856,7 +2115,7 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,
 
   // Name the struct return parameter.
   if (IRFunctionArgs.hasSRetArg()) {
-    auto AI = FnArgs[IRFunctionArgs.getSRetArgNo()];
+    auto AI = cast<llvm::Argument>(FnArgs[IRFunctionArgs.getSRetArgNo()]);
     AI->setName("agg.result");
     AI->addAttr(llvm::AttributeSet::get(getLLVMContext(), AI->getArgNo() + 1,
                                         llvm::Attribute::NoAlias));
@@ -1944,8 +2203,8 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,
           ArgI.getCoerceToType() == ConvertType(Ty) &&
           ArgI.getDirectOffset() == 0) {
         assert(NumIRArgs == 1);
-        auto AI = FnArgs[FirstIRArg];
-        llvm::Value *V = AI;
+        llvm::Value *V = FnArgs[FirstIRArg];
+        auto AI = cast<llvm::Argument>(V);
 
         if (const ParmVarDecl *PVD = dyn_cast<ParmVarDecl>(Arg)) {
           if (getNonNullAttr(CurCodeDecl, PVD, PVD->getType(),
@@ -2014,6 +2273,25 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,
                                               AI->getArgNo() + 1,
                                               llvm::Attribute::NoAlias));
 
+        // LLVM expects swifterror parameters to be used in very restricted
+        // ways.  Copy the value into a less-restricted temporary.
+        if (FI.getExtParameterInfo(ArgNo).getABI()
+              == ParameterABI::SwiftErrorResult) {
+          QualType pointeeTy = Ty->getPointeeType();
+          assert(pointeeTy->isPointerType());
+          Address temp =
+            CreateMemTemp(pointeeTy, getPointerAlign(), "swifterror.temp");
+          Address arg = Address(V, getContext().getTypeAlignInChars(pointeeTy));
+          llvm::Value *incomingErrorValue = Builder.CreateLoad(arg);
+          Builder.CreateStore(incomingErrorValue, temp);
+          V = temp.getPointer();
+
+          // Push a cleanup to copy the value back at the end of the function.
+          // The convention does not guarantee that the value will be written
+          // back if the function exits with an unwind exception.
+          EHStack.pushCleanup<CopyBackSwiftError>(NormalCleanup, temp, arg);
+        }
+
         // Ensure the argument is the correct type.
         if (V->getType() != ArgI.getCoerceToType())
           V = Builder.CreateBitCast(V, ArgI.getCoerceToType());
@@ -2100,6 +2378,29 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,
       break;
     }
 
+    case ABIArgInfo::CoerceAndExpand: {
+      // Reconstruct into a temporary.
+      Address alloca = CreateMemTemp(Ty, getContext().getDeclAlign(Arg));
+      ArgVals.push_back(ParamValue::forIndirect(alloca));
+
+      auto coercionType = ArgI.getCoerceAndExpandType();
+      alloca = Builder.CreateElementBitCast(alloca, coercionType);
+      auto layout = CGM.getDataLayout().getStructLayout(coercionType);
+
+      unsigned argIndex = FirstIRArg;
+      for (unsigned i = 0, e = coercionType->getNumElements(); i != e; ++i) {
+        llvm::Type *eltType = coercionType->getElementType(i);
+        if (ABIArgInfo::isPaddingForCoerceAndExpand(eltType))
+          continue;
+
+        auto eltAddr = Builder.CreateStructGEP(alloca, i, layout);
+        auto elt = FnArgs[argIndex++];
+        Builder.CreateStore(elt, eltAddr);
+      }
+      assert(argIndex == FirstIRArg + NumIRArgs);
+      break;
+    }
+
     case ABIArgInfo::Expand: {
       // If this structure was expanded into multiple arguments then
       // we need to create a temporary and reconstruct it from the
@@ -2462,9 +2763,26 @@ void CodeGenFunction::EmitFunctionEpilog(const CGFunctionInfo &FI,
     // In ARC, end functions that return a retainable type with a call
     // to objc_autoreleaseReturnValue.
     if (AutoreleaseResult) {
+#ifndef NDEBUG
+      // Type::isObjCRetainabletype has to be called on a QualType that hasn't
+      // been stripped of the typedefs, so we cannot use RetTy here. Get the
+      // original return type of FunctionDecl, CurCodeDecl, and BlockDecl from
+      // CurCodeDecl or BlockInfo.
+      QualType RT;
+
+      if (auto *FD = dyn_cast<FunctionDecl>(CurCodeDecl))
+        RT = FD->getReturnType();
+      else if (auto *MD = dyn_cast<ObjCMethodDecl>(CurCodeDecl))
+        RT = MD->getReturnType();
+      else if (isa<BlockDecl>(CurCodeDecl))
+        RT = BlockInfo->BlockExpression->getFunctionType()->getReturnType();
+      else
+        llvm_unreachable("Unexpected function/method type");
+
       assert(getLangOpts().ObjCAutoRefCount &&
              !FI.isReturnsRetained() &&
-             RetTy->isObjCRetainableType());
+             RT->isObjCRetainableType());
+#endif
       RV = emitAutoreleaseOfResult(*this, RV);
     }
 
@@ -2473,6 +2791,40 @@ void CodeGenFunction::EmitFunctionEpilog(const CGFunctionInfo &FI,
   case ABIArgInfo::Ignore:
     break;
 
+  case ABIArgInfo::CoerceAndExpand: {
+    auto coercionType = RetAI.getCoerceAndExpandType();
+    auto layout = CGM.getDataLayout().getStructLayout(coercionType);
+
+    // Load all of the coerced elements out into results.
+    llvm::SmallVector<llvm::Value*, 4> results;
+    Address addr = Builder.CreateElementBitCast(ReturnValue, coercionType);
+    for (unsigned i = 0, e = coercionType->getNumElements(); i != e; ++i) {
+      auto coercedEltType = coercionType->getElementType(i);
+      if (ABIArgInfo::isPaddingForCoerceAndExpand(coercedEltType))
+        continue;
+
+      auto eltAddr = Builder.CreateStructGEP(addr, i, layout);
+      auto elt = Builder.CreateLoad(eltAddr);
+      results.push_back(elt);
+    }
+
+    // If we have one result, it's the single direct result type.
+    if (results.size() == 1) {
+      RV = results[0];
+
+    // Otherwise, we need to make a first-class aggregate.
+    } else {
+      // Construct a return type that lacks padding elements.
+      llvm::Type *returnType = RetAI.getUnpaddedCoerceAndExpandType();
+
+      RV = llvm::UndefValue::get(returnType);
+      for (unsigned i = 0, e = results.size(); i != e; ++i) {
+        RV = Builder.CreateInsertValue(RV, results[i], i);
+      }
+    }
+    break;
+  }
+
   case ABIArgInfo::Expand:
     llvm_unreachable("Invalid ABI kind for return argument");
   }
@@ -2536,23 +2888,15 @@ void CodeGenFunction::EmitDelegateCallArg(CallArgList &args,
 
   QualType type = param->getType();
 
-  // For the most part, we just need to load the alloca, except:
-  // 1) aggregate r-values are actually pointers to temporaries, and
-  // 2) references to non-scalars are pointers directly to the aggregate.
-  // I don't know why references to scalars are different here.
-  if (const ReferenceType *ref = type->getAs<ReferenceType>()) {
-    if (!hasScalarEvaluationKind(ref->getPointeeType()))
-      return args.add(RValue::getAggregate(local), type);
-
-    // Locals which are references to scalars are represented
-    // with allocas holding the pointer.
-    return args.add(RValue::get(Builder.CreateLoad(local)), type);
-  }
-
   assert(!isInAllocaArgument(CGM.getCXXABI(), type) &&
          "cannot emit delegate call arguments for inalloca arguments!");
 
-  args.add(convertTempToRValue(local, type, loc), type);
+  // For the most part, we just need to load the alloca, except that
+  // aggregate r-values are actually pointers to temporaries.
+  if (type->isReferenceType())
+    args.add(RValue::get(Builder.CreateLoad(local)), type);
+  else
+    args.add(convertTempToRValue(local, type, loc), type);
 }
 
 static bool isProvablyNull(llvm::Value *addr) {
@@ -2863,10 +3207,10 @@ void CodeGenFunction::EmitCallArgs(
     size_t CallArgsStart = Args.size();
     for (int I = ArgTypes.size() - 1; I >= 0; --I) {
       CallExpr::const_arg_iterator Arg = ArgRange.begin() + I;
+      MaybeEmitImplicitObjectSize(I, *Arg);
       EmitCallArg(Args, *Arg, ArgTypes[I]);
       EmitNonNullArgCheck(Args.back().RV, ArgTypes[I], (*Arg)->getExprLoc(),
                           CalleeDecl, ParamsToSkip + I);
-      MaybeEmitImplicitObjectSize(I, *Arg);
     }
 
     // Un-reverse the arguments we just evaluated so they match up with the LLVM
@@ -3046,24 +3390,13 @@ CodeGenFunction::EmitRuntimeCall(llvm::Value *callee,
   return EmitRuntimeCall(callee, None, name);
 }
 
-/// Emits a simple call (never an invoke) to the given runtime
-/// function.
-llvm::CallInst *
-CodeGenFunction::EmitRuntimeCall(llvm::Value *callee,
-                                 ArrayRef<llvm::Value*> args,
-                                 const llvm::Twine &name) {
-  llvm::CallInst *call = Builder.CreateCall(callee, args, name);
-  call->setCallingConv(getRuntimeCC());
-  return call;
-}
-
 // Calls which may throw must have operand bundles indicating which funclet
 // they are nested within.
 static void
-getBundlesForFunclet(llvm::Value *Callee,
-                     llvm::Instruction *CurrentFuncletPad,
+getBundlesForFunclet(llvm::Value *Callee, llvm::Instruction *CurrentFuncletPad,
                      SmallVectorImpl<llvm::OperandBundleDef> &BundleList) {
-  // There is no need for a funclet operand bundle if we aren't inside a funclet.
+  // There is no need for a funclet operand bundle if we aren't inside a
+  // funclet.
   if (!CurrentFuncletPad)
     return;
 
@@ -3075,6 +3408,19 @@ getBundlesForFunclet(llvm::Value *Callee,
   BundleList.emplace_back("funclet", CurrentFuncletPad);
 }
 
+/// Emits a simple call (never an invoke) to the given runtime function.
+llvm::CallInst *
+CodeGenFunction::EmitRuntimeCall(llvm::Value *callee,
+                                 ArrayRef<llvm::Value*> args,
+                                 const llvm::Twine &name) {
+  SmallVector<llvm::OperandBundleDef, 1> BundleList;
+  getBundlesForFunclet(callee, CurrentFuncletPad, BundleList);
+
+  llvm::CallInst *call = Builder.CreateCall(callee, args, BundleList, name);
+  call->setCallingConv(getRuntimeCC());
+  return call;
+}
+
 /// Emits a call or invoke to the given noreturn runtime function.
 void CodeGenFunction::EmitNoreturnRuntimeCallOrInvoke(llvm::Value *callee,
                                                ArrayRef<llvm::Value*> args) {
@@ -3098,8 +3444,7 @@ void CodeGenFunction::EmitNoreturnRuntimeCallOrInvoke(llvm::Value *callee,
   }
 }
 
-/// Emits a call or invoke instruction to the given nullary runtime
-/// function.
+/// Emits a call or invoke instruction to the given nullary runtime function.
 llvm::CallSite
 CodeGenFunction::EmitRuntimeCallOrInvoke(llvm::Value *callee,
                                          const Twine &name) {
@@ -3123,13 +3468,16 @@ CodeGenFunction::EmitCallOrInvoke(llvm::Value *Callee,
                                   ArrayRef<llvm::Value *> Args,
                                   const Twine &Name) {
   llvm::BasicBlock *InvokeDest = getInvokeDest();
+  SmallVector<llvm::OperandBundleDef, 1> BundleList;
+  getBundlesForFunclet(Callee, CurrentFuncletPad, BundleList);
 
   llvm::Instruction *Inst;
   if (!InvokeDest)
-    Inst = Builder.CreateCall(Callee, Args, Name);
+    Inst = Builder.CreateCall(Callee, Args, BundleList, Name);
   else {
     llvm::BasicBlock *ContBB = createBasicBlock("invoke.cont");
-    Inst = Builder.CreateInvoke(Callee, ContBB, InvokeDest, Args, Name);
+    Inst = Builder.CreateInvoke(Callee, ContBB, InvokeDest, Args, BundleList,
+                                Name);
     EmitBlock(ContBB);
   }
 
@@ -3208,7 +3556,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
   // alloca to hold the result, unless one is given to us.
   Address SRetPtr = Address::invalid();
   size_t UnusedReturnSize = 0;
-  if (RetAI.isIndirect() || RetAI.isInAlloca()) {
+  if (RetAI.isIndirect() || RetAI.isInAlloca() || RetAI.isCoerceAndExpand()) {
     if (!ReturnValue.isNull()) {
       SRetPtr = ReturnValue.getValue();
     } else {
@@ -3222,12 +3570,15 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
     }
     if (IRFunctionArgs.hasSRetArg()) {
       IRCallArgs[IRFunctionArgs.getSRetArgNo()] = SRetPtr.getPointer();
-    } else {
+    } else if (RetAI.isInAlloca()) {
       Address Addr = createInAllocaStructGEP(RetAI.getInAllocaFieldIndex());
       Builder.CreateStore(SRetPtr.getPointer(), Addr);
     }
   }
 
+  Address swiftErrorTemp = Address::invalid();
+  Address swiftErrorArg = Address::invalid();
+
   assert(CallInfo.arg_size() == CallArgs.size() &&
          "Mismatch between function signature & arguments.");
   unsigned ArgNo = 0;
@@ -3334,6 +3685,25 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
         else
           V = Builder.CreateLoad(RV.getAggregateAddress());
 
+        // Implement swifterror by copying into a new swifterror argument.
+        // We'll write back in the normal path out of the call.
+        if (CallInfo.getExtParameterInfo(ArgNo).getABI()
+              == ParameterABI::SwiftErrorResult) {
+          assert(!swiftErrorTemp.isValid() && "multiple swifterror args");
+
+          QualType pointeeTy = I->Ty->getPointeeType();
+          swiftErrorArg =
+            Address(V, getContext().getTypeAlignInChars(pointeeTy));
+
+          swiftErrorTemp =
+            CreateMemTemp(pointeeTy, getPointerAlign(), "swifterror.temp");
+          V = swiftErrorTemp.getPointer();
+          cast<llvm::AllocaInst>(V)->setSwiftError(true);
+
+          llvm::Value *errorValue = Builder.CreateLoad(swiftErrorArg);
+          Builder.CreateStore(errorValue, swiftErrorTemp);
+        }
+
         // We might have to widen integers, but we should never truncate.
         if (ArgInfo.getCoerceToType() != V->getType() &&
             V->getType()->isIntegerTy())
@@ -3344,6 +3714,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
         if (FirstIRArg < IRFuncTy->getNumParams() &&
             V->getType() != IRFuncTy->getParamType(FirstIRArg))
           V = Builder.CreateBitCast(V, IRFuncTy->getParamType(FirstIRArg));
+
         IRCallArgs[FirstIRArg] = V;
         break;
       }
@@ -3402,6 +3773,51 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
       break;
     }
 
+    case ABIArgInfo::CoerceAndExpand: {
+      auto coercionType = ArgInfo.getCoerceAndExpandType();
+      auto layout = CGM.getDataLayout().getStructLayout(coercionType);
+
+      llvm::Value *tempSize = nullptr;
+      Address addr = Address::invalid();
+      if (RV.isAggregate()) {
+        addr = RV.getAggregateAddress();
+      } else {
+        assert(RV.isScalar()); // complex should always just be direct
+
+        llvm::Type *scalarType = RV.getScalarVal()->getType();
+        auto scalarSize = CGM.getDataLayout().getTypeAllocSize(scalarType);
+        auto scalarAlign = CGM.getDataLayout().getPrefTypeAlignment(scalarType);
+
+        tempSize = llvm::ConstantInt::get(CGM.Int64Ty, scalarSize);
+
+        // Materialize to a temporary.
+        addr = CreateTempAlloca(RV.getScalarVal()->getType(),
+                 CharUnits::fromQuantity(std::max(layout->getAlignment(),
+                                                  scalarAlign)));
+        EmitLifetimeStart(scalarSize, addr.getPointer());
+
+        Builder.CreateStore(RV.getScalarVal(), addr);
+      }
+
+      addr = Builder.CreateElementBitCast(addr, coercionType);
+
+      unsigned IRArgPos = FirstIRArg;
+      for (unsigned i = 0, e = coercionType->getNumElements(); i != e; ++i) {
+        llvm::Type *eltType = coercionType->getElementType(i);
+        if (ABIArgInfo::isPaddingForCoerceAndExpand(eltType)) continue;
+        Address eltAddr = Builder.CreateStructGEP(addr, i, layout);
+        llvm::Value *elt = Builder.CreateLoad(eltAddr);
+        IRCallArgs[IRArgPos++] = elt;
+      }
+      assert(IRArgPos == FirstIRArg + NumIRArgs);
+
+      if (tempSize) {
+        EmitLifetimeEnd(tempSize, addr.getPointer());
+      }
+
+      break;
+    }
+
     case ABIArgInfo::Expand:
       unsigned IRArgPos = FirstIRArg;
       ExpandTypeToArgs(I->Ty, RV, IRFuncTy, IRCallArgs, IRArgPos);
@@ -3541,6 +3957,13 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
   CS.setAttributes(Attrs);
   CS.setCallingConv(static_cast<llvm::CallingConv::ID>(CallingConv));
 
+  // Insert instrumentation or attach profile metadata at indirect call sites.
+  // For more details, see the comment before the definition of
+  // IPVK_IndirectCallTarget in InstrProfData.inc.
+  if (!CS.getCalledFunction())
+    PGO.valueProfile(Builder, llvm::IPVK_IndirectCallTarget,
+                     CS.getInstruction(), Callee);
+
   // In ObjC ARC mode with no ObjC ARC exception safety, tell the ARC
   // optimizer it can aggressively ignore unwind edges.
   if (CGM.getLangOpts().ObjCAutoRefCount)
@@ -3567,9 +3990,15 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
   }
 
   llvm::Instruction *CI = CS.getInstruction();
-  if (Builder.isNamePreserving() && !CI->getType()->isVoidTy())
+  if (!CI->getType()->isVoidTy())
     CI->setName("call");
 
+  // Perform the swifterror writeback.
+  if (swiftErrorTemp.isValid()) {
+    llvm::Value *errorResult = Builder.CreateLoad(swiftErrorTemp);
+    Builder.CreateStore(errorResult, swiftErrorArg);
+  }
+
   // Emit any writebacks immediately.  Arguably this should happen
   // after any return-value munging.
   if (CallArgs.hasWritebacks())
@@ -3587,6 +4016,31 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
 
   RValue Ret = [&] {
     switch (RetAI.getKind()) {
+    case ABIArgInfo::CoerceAndExpand: {
+      auto coercionType = RetAI.getCoerceAndExpandType();
+      auto layout = CGM.getDataLayout().getStructLayout(coercionType);
+
+      Address addr = SRetPtr;
+      addr = Builder.CreateElementBitCast(addr, coercionType);
+
+      assert(CI->getType() == RetAI.getUnpaddedCoerceAndExpandType());
+      bool requiresExtract = isa<llvm::StructType>(CI->getType());
+
+      unsigned unpaddedIndex = 0;
+      for (unsigned i = 0, e = coercionType->getNumElements(); i != e; ++i) {
+        llvm::Type *eltType = coercionType->getElementType(i);
+        if (ABIArgInfo::isPaddingForCoerceAndExpand(eltType)) continue;
+        Address eltAddr = Builder.CreateStructGEP(addr, i, layout);
+        llvm::Value *elt = CI;
+        if (requiresExtract)
+          elt = Builder.CreateExtractValue(elt, unpaddedIndex++);
+        else
+          assert(unpaddedIndex == 0);
+        Builder.CreateStore(elt, eltAddr);
+      }
+      // FALLTHROUGH
+    }
+
     case ABIArgInfo::InAlloca:
     case ABIArgInfo::Indirect: {
       RValue ret = convertTempToRValue(SRetPtr, RetTy, SourceLocation());
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGClass.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGClass.cpp
index 2e566de6d8ac..7ed891f426aa 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGClass.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGClass.cpp
@@ -26,6 +26,7 @@
 #include "clang/Frontend/CodeGenOptions.h"
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/Metadata.h"
+#include "llvm/Transforms/Utils/SanitizerStats.h"
 
 using namespace clang;
 using namespace CodeGen;
@@ -94,7 +95,7 @@ CodeGenModule::getDynamicOffsetAlignment(CharUnits actualBaseAlign,
   // unless we someday add some sort of attribute to change the
   // assumed alignment of 'this'.  So our goal here is pretty much
   // just to allow the user to explicitly say that a pointer is
-  // under-aligned and then safely access its fields and v-tables.
+  // under-aligned and then safely access its fields and vtables.
   if (actualBaseAlign >= expectedBaseAlign) {
     return expectedTargetAlign;
   }
@@ -745,7 +746,7 @@ static void EmitMemberInitializer(CodeGenFunction &CGF,
 
   ArrayRef<VarDecl *> ArrayIndexes;
   if (MemberInit->getNumArrayIndices())
-    ArrayIndexes = MemberInit->getArrayIndexes();
+    ArrayIndexes = MemberInit->getArrayIndices();
   CGF.EmitInitializerForField(Field, LHS, MemberInit->getInit(), ArrayIndexes);
 }
 
@@ -986,7 +987,7 @@ namespace {
     CodeGenFunction &CGF;
     SanitizerSet OldSanOpts;
   };
-}
+} // end anonymous namespace
  
 namespace {
   class FieldMemcpyizer {
@@ -1071,7 +1072,6 @@ namespace {
     const CXXRecordDecl *ClassDecl;
 
   private:
-
     void emitMemcpyIR(Address DestPtr, Address SrcPtr, CharUnits Size) {
       llvm::PointerType *DPT = DestPtr.getType();
       llvm::Type *DBP =
@@ -1087,13 +1087,12 @@ namespace {
     }
 
     void addInitialField(FieldDecl *F) {
-        FirstField = F;
-        LastField = F;
-        FirstFieldOffset = RecLayout.getFieldOffset(F->getFieldIndex());
-        LastFieldOffset = FirstFieldOffset;
-        LastAddedFieldIndex = F->getFieldIndex();
-        return;
-      }
+      FirstField = F;
+      LastField = F;
+      FirstFieldOffset = RecLayout.getFieldOffset(F->getFieldIndex());
+      LastFieldOffset = FirstFieldOffset;
+      LastAddedFieldIndex = F->getFieldIndex();
+    }
 
     void addNextField(FieldDecl *F) {
       // For the most part, the following invariant will hold:
@@ -1127,7 +1126,6 @@ namespace {
 
   class ConstructorMemcpyizer : public FieldMemcpyizer {
   private:
-
     /// Get source argument for copy constructor. Returns null if not a copy
     /// constructor.
     static const VarDecl *getTrivialCopySource(CodeGenFunction &CGF,
@@ -1232,7 +1230,6 @@ namespace {
 
   class AssignmentMemcpyizer : public FieldMemcpyizer {
   private:
-
     // Returns the memcpyable field copied by the given statement, if one
     // exists. Otherwise returns null.
     FieldDecl *getMemcpyableField(Stmt *S) {
@@ -1306,7 +1303,6 @@ namespace {
     SmallVector<Stmt*, 16> AggregatedStmts;
 
   public:
-
     AssignmentMemcpyizer(CodeGenFunction &CGF, const CXXMethodDecl *AD,
                          FunctionArgList &Args)
       : FieldMemcpyizer(CGF, AD->getParent(), Args[Args.size() - 1]),
@@ -1607,6 +1603,7 @@ void CodeGenFunction::emitImplicitAssignmentOperatorBody(FunctionArgList &Args)
 
   LexicalScope Scope(*this, RootCS->getSourceRange());
 
+  incrementProfileCounter(RootCS);
   AssignmentMemcpyizer AM(*this, AssignOp, Args);
   for (auto *I : RootCS->body())
     AM.emitAssignment(I);
@@ -1628,6 +1625,7 @@ namespace {
 
   struct CallDtorDeleteConditional final : EHScopeStack::Cleanup {
     llvm::Value *ShouldDeleteCondition;
+
   public:
     CallDtorDeleteConditional(llvm::Value *ShouldDeleteCondition)
         : ShouldDeleteCondition(ShouldDeleteCondition) {
@@ -1917,7 +1915,7 @@ void CodeGenFunction::EnterDtorCleanups(const CXXDestructorDecl *DD,
 /// \param zeroInitialize true if each element should be
 ///   zero-initialized before it is constructed
 void CodeGenFunction::EmitCXXAggrConstructorCall(
-    const CXXConstructorDecl *ctor, const ConstantArrayType *arrayType,
+    const CXXConstructorDecl *ctor, const ArrayType *arrayType,
     Address arrayBegin, const CXXConstructExpr *E, bool zeroInitialize) {
   QualType elementType;
   llvm::Value *numElements =
@@ -2050,6 +2048,62 @@ void CodeGenFunction::EmitCXXConstructorCall(const CXXConstructorDecl *D,
                                              bool ForVirtualBase,
                                              bool Delegating, Address This,
                                              const CXXConstructExpr *E) {
+  CallArgList Args;
+
+  // Push the this ptr.
+  Args.add(RValue::get(This.getPointer()), D->getThisType(getContext()));
+
+  // If this is a trivial constructor, emit a memcpy now before we lose
+  // the alignment information on the argument.
+  // FIXME: It would be better to preserve alignment information into CallArg.
+  if (isMemcpyEquivalentSpecialMember(D)) {
+    assert(E->getNumArgs() == 1 && "unexpected argcount for trivial ctor");
+
+    const Expr *Arg = E->getArg(0);
+    QualType SrcTy = Arg->getType();
+    Address Src = EmitLValue(Arg).getAddress();
+    QualType DestTy = getContext().getTypeDeclType(D->getParent());
+    EmitAggregateCopyCtor(This, Src, DestTy, SrcTy);
+    return;
+  }
+
+  // Add the rest of the user-supplied arguments.
+  const FunctionProtoType *FPT = D->getType()->castAs<FunctionProtoType>();
+  EmitCallArgs(Args, FPT, E->arguments(), E->getConstructor());
+
+  EmitCXXConstructorCall(D, Type, ForVirtualBase, Delegating, This, Args);
+}
+
+static bool canEmitDelegateCallArgs(CodeGenFunction &CGF,
+                                    const CXXConstructorDecl *Ctor,
+                                    CXXCtorType Type, CallArgList &Args) {
+  // We can't forward a variadic call.
+  if (Ctor->isVariadic())
+    return false;
+
+  if (CGF.getTarget().getCXXABI().areArgsDestroyedLeftToRightInCallee()) {
+    // If the parameters are callee-cleanup, it's not safe to forward.
+    for (auto *P : Ctor->parameters())
+      if (P->getType().isDestructedType())
+        return false;
+
+    // Likewise if they're inalloca.
+    const CGFunctionInfo &Info =
+        CGF.CGM.getTypes().arrangeCXXConstructorCall(Args, Ctor, Type, 0);
+    if (Info.usesInAlloca())
+      return false;
+  }
+
+  // Anything else should be OK.
+  return true;
+}
+
+void CodeGenFunction::EmitCXXConstructorCall(const CXXConstructorDecl *D,
+                                             CXXCtorType Type,
+                                             bool ForVirtualBase,
+                                             bool Delegating,
+                                             Address This,
+                                             CallArgList &Args) {
   const CXXRecordDecl *ClassDecl = D->getParent();
 
   // C++11 [class.mfct.non-static]p2:
@@ -2060,7 +2114,7 @@ void CodeGenFunction::EmitCXXConstructorCall(const CXXConstructorDecl *D,
                 This.getPointer(), getContext().getRecordType(ClassDecl));
 
   if (D->isTrivial() && D->isDefaultConstructor()) {
-    assert(E->getNumArgs() == 0 && "trivial default ctor with args");
+    assert(Args.size() == 1 && "trivial default ctor with args");
     return;
   }
 
@@ -2068,24 +2122,24 @@ void CodeGenFunction::EmitCXXConstructorCall(const CXXConstructorDecl *D,
   // union copy constructor, we must emit a memcpy, because the AST does not
   // model that copy.
   if (isMemcpyEquivalentSpecialMember(D)) {
-    assert(E->getNumArgs() == 1 && "unexpected argcount for trivial ctor");
+    assert(Args.size() == 2 && "unexpected argcount for trivial ctor");
 
-    const Expr *Arg = E->getArg(0);
-    QualType SrcTy = Arg->getType();
-    Address Src = EmitLValue(Arg).getAddress();
+    QualType SrcTy = D->getParamDecl(0)->getType().getNonReferenceType();
+    Address Src(Args[1].RV.getScalarVal(), getNaturalTypeAlignment(SrcTy));
     QualType DestTy = getContext().getTypeDeclType(ClassDecl);
     EmitAggregateCopyCtor(This, Src, DestTy, SrcTy);
     return;
   }
 
-  CallArgList Args;
-
-  // Push the this ptr.
-  Args.add(RValue::get(This.getPointer()), D->getThisType(getContext()));
-
-  // Add the rest of the user-supplied arguments.
-  const FunctionProtoType *FPT = D->getType()->castAs<FunctionProtoType>();
-  EmitCallArgs(Args, FPT, E->arguments(), E->getConstructor());
+  // Check whether we can actually emit the constructor before trying to do so.
+  if (auto Inherited = D->getInheritedConstructor()) {
+    if (getTypes().inheritingCtorHasParams(Inherited, Type) &&
+        !canEmitDelegateCallArgs(*this, D, Type, Args)) {
+      EmitInlinedInheritingCXXConstructorCall(D, Type, ForVirtualBase,
+                                              Delegating, Args);
+      return;
+    }
+  }
 
   // Insert any ABI-specific implicit constructor arguments.
   unsigned ExtraArgs = CGM.getCXXABI().addImplicitConstructorArgs(
@@ -2115,6 +2169,95 @@ void CodeGenFunction::EmitCXXConstructorCall(const CXXConstructorDecl *D,
     EmitVTableAssumptionLoads(ClassDecl, This);
 }
 
+void CodeGenFunction::EmitInheritedCXXConstructorCall(
+    const CXXConstructorDecl *D, bool ForVirtualBase, Address This,
+    bool InheritedFromVBase, const CXXInheritedCtorInitExpr *E) {
+  CallArgList Args;
+  CallArg ThisArg(RValue::get(This.getPointer()), D->getThisType(getContext()),
+                  /*NeedsCopy=*/false);
+
+  // Forward the parameters.
+  if (InheritedFromVBase &&
+      CGM.getTarget().getCXXABI().hasConstructorVariants()) {
+    // Nothing to do; this construction is not responsible for constructing
+    // the base class containing the inherited constructor.
+    // FIXME: Can we just pass undef's for the remaining arguments if we don't
+    // have constructor variants?
+    Args.push_back(ThisArg);
+  } else if (!CXXInheritedCtorInitExprArgs.empty()) {
+    // The inheriting constructor was inlined; just inject its arguments.
+    assert(CXXInheritedCtorInitExprArgs.size() >= D->getNumParams() &&
+           "wrong number of parameters for inherited constructor call");
+    Args = CXXInheritedCtorInitExprArgs;
+    Args[0] = ThisArg;
+  } else {
+    // The inheriting constructor was not inlined. Emit delegating arguments.
+    Args.push_back(ThisArg);
+    const auto *OuterCtor = cast<CXXConstructorDecl>(CurCodeDecl);
+    assert(OuterCtor->getNumParams() == D->getNumParams());
+    assert(!OuterCtor->isVariadic() && "should have been inlined");
+
+    for (const auto *Param : OuterCtor->parameters()) {
+      assert(getContext().hasSameUnqualifiedType(
+          OuterCtor->getParamDecl(Param->getFunctionScopeIndex())->getType(),
+          Param->getType()));
+      EmitDelegateCallArg(Args, Param, E->getLocation());
+
+      // Forward __attribute__(pass_object_size).
+      if (Param->hasAttr<PassObjectSizeAttr>()) {
+        auto *POSParam = SizeArguments[Param];
+        assert(POSParam && "missing pass_object_size value for forwarding");
+        EmitDelegateCallArg(Args, POSParam, E->getLocation());
+      }
+    }
+  }
+
+  EmitCXXConstructorCall(D, Ctor_Base, ForVirtualBase, /*Delegating*/false,
+                         This, Args);
+}
+
+void CodeGenFunction::EmitInlinedInheritingCXXConstructorCall(
+    const CXXConstructorDecl *Ctor, CXXCtorType CtorType, bool ForVirtualBase,
+    bool Delegating, CallArgList &Args) {
+  InlinedInheritingConstructorScope Scope(*this, GlobalDecl(Ctor, CtorType));
+
+  // Save the arguments to be passed to the inherited constructor.
+  CXXInheritedCtorInitExprArgs = Args;
+
+  FunctionArgList Params;
+  QualType RetType = BuildFunctionArgList(CurGD, Params);
+  FnRetTy = RetType;
+
+  // Insert any ABI-specific implicit constructor arguments.
+  CGM.getCXXABI().addImplicitConstructorArgs(*this, Ctor, CtorType,
+                                             ForVirtualBase, Delegating, Args);
+
+  // Emit a simplified prolog. We only need to emit the implicit params.
+  assert(Args.size() >= Params.size() && "too few arguments for call");
+  for (unsigned I = 0, N = Args.size(); I != N; ++I) {
+    if (I < Params.size() && isa<ImplicitParamDecl>(Params[I])) {
+      const RValue &RV = Args[I].RV;
+      assert(!RV.isComplex() && "complex indirect params not supported");
+      ParamValue Val = RV.isScalar()
+                           ? ParamValue::forDirect(RV.getScalarVal())
+                           : ParamValue::forIndirect(RV.getAggregateAddress());
+      EmitParmDecl(*Params[I], Val, I + 1);
+    }
+  }
+
+  // Create a return value slot if the ABI implementation wants one.
+  // FIXME: This is dumb, we should ask the ABI not to try to set the return
+  // value instead.
+  if (!RetType->isVoidType())
+    ReturnValue = CreateIRTemp(RetType, "retval.inhctor");
+
+  CGM.getCXXABI().EmitInstanceFunctionProlog(*this);
+  CXXThisValue = CXXABIThisValue;
+
+  // Directly emit the constructor initializers.
+  EmitCtorPrologue(Ctor, CtorType, Params);
+}
+
 void CodeGenFunction::EmitVTableAssumptionLoad(const VPtr &Vptr, Address This) {
   llvm::Value *VTableGlobal =
       CGM.getCXXABI().getVTableAddressPoint(Vptr.Base, Vptr.VTableClass);
@@ -2147,19 +2290,6 @@ void
 CodeGenFunction::EmitSynthesizedCXXCopyCtorCall(const CXXConstructorDecl *D,
                                                 Address This, Address Src,
                                                 const CXXConstructExpr *E) {
-  if (isMemcpyEquivalentSpecialMember(D)) {
-    assert(E->getNumArgs() == 1 && "unexpected argcount for trivial ctor");
-    assert(D->isCopyOrMoveConstructor() &&
-           "trivial 1-arg ctor not a copy/move ctor");
-    EmitAggregateCopyCtor(This, Src,
-                          getContext().getTypeDeclType(D->getParent()),
-                          (*E->arg_begin())->getType());
-    return;
-  }
-  llvm::Value *Callee = CGM.getAddrOfCXXStructor(D, StructorType::Complete);
-  assert(D->isInstance() &&
-         "Trying to emit a member call expr on a static method!");
-
   const FunctionProtoType *FPT = D->getType()->castAs<FunctionProtoType>();
 
   CallArgList Args;
@@ -2177,8 +2307,7 @@ CodeGenFunction::EmitSynthesizedCXXCopyCtorCall(const CXXConstructorDecl *D,
   EmitCallArgs(Args, FPT, drop_begin(E->arguments(), 1), E->getConstructor(),
                /*ParamsToSkip*/ 1);
 
-  EmitCall(CGM.getTypes().arrangeCXXMethodCall(Args, FPT, RequiredArgs::All),
-           Callee, ReturnValueSlot(), Args, D);
+  EmitCXXConstructorCall(D, Ctor_Complete, false, false, This, Args);
 }
 
 void
@@ -2192,21 +2321,17 @@ CodeGenFunction::EmitDelegateCXXConstructorCall(const CXXConstructorDecl *Ctor,
   assert(I != E && "no parameters to constructor");
 
   // this
-  DelegateArgs.add(RValue::get(LoadCXXThis()), (*I)->getType());
+  Address This = LoadCXXThisAddress();
+  DelegateArgs.add(RValue::get(This.getPointer()), (*I)->getType());
   ++I;
 
-  // vtt
-  if (llvm::Value *VTT = GetVTTParameter(GlobalDecl(Ctor, CtorType),
-                                         /*ForVirtualBase=*/false,
-                                         /*Delegating=*/true)) {
-    QualType VoidPP = getContext().getPointerType(getContext().VoidPtrTy);
-    DelegateArgs.add(RValue::get(VTT), VoidPP);
-
-    if (CGM.getCXXABI().NeedsVTTParameter(CurGD)) {
-      assert(I != E && "cannot skip vtt parameter, already done with args");
-      assert((*I)->getType() == VoidPP && "skipping parameter not of vtt type");
-      ++I;
-    }
+  // FIXME: The location of the VTT parameter in the parameter list is
+  // specific to the Itanium ABI and shouldn't be hardcoded here.
+  if (CGM.getCXXABI().NeedsVTTParameter(CurGD)) {
+    assert(I != E && "cannot skip vtt parameter, already done with args");
+    assert((*I)->getType()->isPointerType() &&
+           "skipping parameter not of vtt type");
+    ++I;
   }
 
   // Explicit arguments.
@@ -2216,11 +2341,8 @@ CodeGenFunction::EmitDelegateCXXConstructorCall(const CXXConstructorDecl *Ctor,
     EmitDelegateCallArg(DelegateArgs, param, Loc);
   }
 
-  llvm::Value *Callee =
-      CGM.getAddrOfCXXStructor(Ctor, getFromCtorType(CtorType));
-  EmitCall(CGM.getTypes()
-               .arrangeCXXStructorDeclaration(Ctor, getFromCtorType(CtorType)),
-           Callee, ReturnValueSlot(), DelegateArgs, Ctor);
+  EmitCXXConstructorCall(Ctor, CtorType, /*ForVirtualBase=*/false,
+                         /*Delegating=*/true, This, DelegateArgs);
 }
 
 namespace {
@@ -2289,7 +2411,7 @@ namespace {
                                 /*Delegating=*/false, Addr);
     }
   };
-}
+} // end anonymous namespace
 
 void CodeGenFunction::PushDestructorCleanup(const CXXDestructorDecl *D,
                                             Address Addr) {
@@ -2487,15 +2609,35 @@ LeastDerivedClassWithSameLayout(const CXXRecordDecl *RD) {
       RD->bases_begin()->getType()->getAsCXXRecordDecl());
 }
 
-void CodeGenFunction::EmitVTablePtrCheckForCall(const CXXMethodDecl *MD,
+void CodeGenFunction::EmitTypeMetadataCodeForVCall(const CXXRecordDecl *RD,
+                                                   llvm::Value *VTable,
+                                                   SourceLocation Loc) {
+  if (CGM.getCodeGenOpts().WholeProgramVTables &&
+      CGM.HasHiddenLTOVisibility(RD)) {
+    llvm::Metadata *MD =
+        CGM.CreateMetadataIdentifierForType(QualType(RD->getTypeForDecl(), 0));
+    llvm::Value *TypeId =
+        llvm::MetadataAsValue::get(CGM.getLLVMContext(), MD);
+
+    llvm::Value *CastedVTable = Builder.CreateBitCast(VTable, Int8PtrTy);
+    llvm::Value *TypeTest =
+        Builder.CreateCall(CGM.getIntrinsic(llvm::Intrinsic::type_test),
+                           {CastedVTable, TypeId});
+    Builder.CreateCall(CGM.getIntrinsic(llvm::Intrinsic::assume), TypeTest);
+  }
+
+  if (SanOpts.has(SanitizerKind::CFIVCall))
+    EmitVTablePtrCheckForCall(RD, VTable, CodeGenFunction::CFITCK_VCall, Loc);
+}
+
+void CodeGenFunction::EmitVTablePtrCheckForCall(const CXXRecordDecl *RD,
                                                 llvm::Value *VTable,
                                                 CFITypeCheckKind TCK,
                                                 SourceLocation Loc) {
-  const CXXRecordDecl *ClassDecl = MD->getParent();
   if (!SanOpts.has(SanitizerKind::CFICastStrict))
-    ClassDecl = LeastDerivedClassWithSameLayout(ClassDecl);
+    RD = LeastDerivedClassWithSameLayout(RD);
 
-  EmitVTablePtrCheck(ClassDecl, VTable, TCK, Loc);
+  EmitVTablePtrCheck(RD, VTable, TCK, Loc);
 }
 
 void CodeGenFunction::EmitVTablePtrCheckForCast(QualType T,
@@ -2547,26 +2689,41 @@ void CodeGenFunction::EmitVTablePtrCheck(const CXXRecordDecl *RD,
                                          llvm::Value *VTable,
                                          CFITypeCheckKind TCK,
                                          SourceLocation Loc) {
-  if (CGM.IsCFIBlacklistedRecord(RD))
+  if (!CGM.getCodeGenOpts().SanitizeCfiCrossDso &&
+      !CGM.HasHiddenLTOVisibility(RD))
+    return;
+
+  std::string TypeName = RD->getQualifiedNameAsString();
+  if (getContext().getSanitizerBlacklist().isBlacklistedType(TypeName))
     return;
 
   SanitizerScope SanScope(this);
+  llvm::SanitizerStatKind SSK;
+  switch (TCK) {
+  case CFITCK_VCall:
+    SSK = llvm::SanStat_CFI_VCall;
+    break;
+  case CFITCK_NVCall:
+    SSK = llvm::SanStat_CFI_NVCall;
+    break;
+  case CFITCK_DerivedCast:
+    SSK = llvm::SanStat_CFI_DerivedCast;
+    break;
+  case CFITCK_UnrelatedCast:
+    SSK = llvm::SanStat_CFI_UnrelatedCast;
+    break;
+  case CFITCK_ICall:
+    llvm_unreachable("not expecting CFITCK_ICall");
+  }
+  EmitSanitizerStatReport(SSK);
 
   llvm::Metadata *MD =
       CGM.CreateMetadataIdentifierForType(QualType(RD->getTypeForDecl(), 0));
-  llvm::Value *BitSetName = llvm::MetadataAsValue::get(getLLVMContext(), MD);
+  llvm::Value *TypeId = llvm::MetadataAsValue::get(getLLVMContext(), MD);
 
   llvm::Value *CastedVTable = Builder.CreateBitCast(VTable, Int8PtrTy);
-  llvm::Value *BitSetTest =
-      Builder.CreateCall(CGM.getIntrinsic(llvm::Intrinsic::bitset_test),
-                         {CastedVTable, BitSetName});
-
-  if (CGM.getCodeGenOpts().SanitizeCfiCrossDso) {
-    if (auto TypeId = CGM.CreateCfiIdForTypeMetadata(MD)) {
-      EmitCfiSlowPathCheck(BitSetTest, TypeId, CastedVTable);
-      return;
-    }
-  }
+  llvm::Value *TypeTest = Builder.CreateCall(
+      CGM.getIntrinsic(llvm::Intrinsic::type_test), {CastedVTable, TypeId});
 
   SanitizerMask M;
   switch (TCK) {
@@ -2582,15 +2739,70 @@ void CodeGenFunction::EmitVTablePtrCheck(const CXXRecordDecl *RD,
   case CFITCK_UnrelatedCast:
     M = SanitizerKind::CFIUnrelatedCast;
     break;
+  case CFITCK_ICall:
+    llvm_unreachable("not expecting CFITCK_ICall");
   }
 
   llvm::Constant *StaticData[] = {
+      llvm::ConstantInt::get(Int8Ty, TCK),
       EmitCheckSourceLocation(Loc),
       EmitCheckTypeDescriptor(QualType(RD->getTypeForDecl(), 0)),
-      llvm::ConstantInt::get(Int8Ty, TCK),
   };
-  EmitCheck(std::make_pair(BitSetTest, M), "cfi_bad_type", StaticData,
-            CastedVTable);
+
+  auto CrossDsoTypeId = CGM.CreateCrossDsoCfiTypeId(MD);
+  if (CGM.getCodeGenOpts().SanitizeCfiCrossDso && CrossDsoTypeId) {
+    EmitCfiSlowPathCheck(M, TypeTest, CrossDsoTypeId, CastedVTable, StaticData);
+    return;
+  }
+
+  if (CGM.getCodeGenOpts().SanitizeTrap.has(M)) {
+    EmitTrapCheck(TypeTest);
+    return;
+  }
+
+  llvm::Value *AllVtables = llvm::MetadataAsValue::get(
+      CGM.getLLVMContext(),
+      llvm::MDString::get(CGM.getLLVMContext(), "all-vtables"));
+  llvm::Value *ValidVtable = Builder.CreateCall(
+      CGM.getIntrinsic(llvm::Intrinsic::type_test), {CastedVTable, AllVtables});
+  EmitCheck(std::make_pair(TypeTest, M), "cfi_check_fail", StaticData,
+            {CastedVTable, ValidVtable});
+}
+
+bool CodeGenFunction::ShouldEmitVTableTypeCheckedLoad(const CXXRecordDecl *RD) {
+  if (!CGM.getCodeGenOpts().WholeProgramVTables ||
+      !SanOpts.has(SanitizerKind::CFIVCall) ||
+      !CGM.getCodeGenOpts().SanitizeTrap.has(SanitizerKind::CFIVCall) ||
+      !CGM.HasHiddenLTOVisibility(RD))
+    return false;
+
+  std::string TypeName = RD->getQualifiedNameAsString();
+  return !getContext().getSanitizerBlacklist().isBlacklistedType(TypeName);
+}
+
+llvm::Value *CodeGenFunction::EmitVTableTypeCheckedLoad(
+    const CXXRecordDecl *RD, llvm::Value *VTable, uint64_t VTableByteOffset) {
+  SanitizerScope SanScope(this);
+
+  EmitSanitizerStatReport(llvm::SanStat_CFI_VCall);
+
+  llvm::Metadata *MD =
+      CGM.CreateMetadataIdentifierForType(QualType(RD->getTypeForDecl(), 0));
+  llvm::Value *TypeId = llvm::MetadataAsValue::get(CGM.getLLVMContext(), MD);
+
+  llvm::Value *CastedVTable = Builder.CreateBitCast(VTable, Int8PtrTy);
+  llvm::Value *CheckedLoad = Builder.CreateCall(
+      CGM.getIntrinsic(llvm::Intrinsic::type_checked_load),
+      {CastedVTable, llvm::ConstantInt::get(Int32Ty, VTableByteOffset),
+       TypeId});
+  llvm::Value *CheckResult = Builder.CreateExtractValue(CheckedLoad, 1);
+
+  EmitCheck(std::make_pair(CheckResult, SanitizerKind::CFIVCall),
+            "cfi_check_fail", nullptr, nullptr);
+
+  return Builder.CreateBitCast(
+      Builder.CreateExtractValue(CheckedLoad, 0),
+      cast<llvm::PointerType>(VTable->getType())->getElementType());
 }
 
 // FIXME: Ideally Expr::IgnoreParenNoopCasts should do this, but it doesn't do
@@ -2731,7 +2943,7 @@ void CodeGenFunction::EmitLambdaBlockInvokeBody() {
   CallArgs.add(RValue::get(ThisPtr.getPointer()), ThisType);
 
   // Add the rest of the parameters.
-  for (auto param : BD->params())
+  for (auto param : BD->parameters())
     EmitDelegateCallArg(CallArgs, param, param->getLocStart());
 
   assert(!Lambda->isGenericLambda() &&
@@ -2761,7 +2973,7 @@ void CodeGenFunction::EmitLambdaDelegatingInvokeBody(const CXXMethodDecl *MD) {
   CallArgs.add(RValue::get(ThisPtr), ThisType);
 
   // Add the rest of the parameters.
-  for (auto Param : MD->params())
+  for (auto Param : MD->parameters())
     EmitDelegateCallArg(CallArgs, Param, Param->getLocStart());
 
   const CXXMethodDecl *CallOp = Lambda->getLambdaCallOperator();
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGCleanup.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGCleanup.cpp
index ba7dcf7de6c7..b3278b3b4fef 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGCleanup.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGCleanup.cpp
@@ -112,7 +112,7 @@ RValue DominatingValue<RValue>::saved_type::restore(CodeGenFunction &CGF) {
 
 /// Push an entry of the given size onto this protected-scope stack.
 char *EHScopeStack::allocate(size_t Size) {
-  Size = llvm::RoundUpToAlignment(Size, ScopeStackAlignment);
+  Size = llvm::alignTo(Size, ScopeStackAlignment);
   if (!StartOfBuffer) {
     unsigned Capacity = 1024;
     while (Capacity < Size) Capacity *= 2;
@@ -143,7 +143,7 @@ char *EHScopeStack::allocate(size_t Size) {
 }
 
 void EHScopeStack::deallocate(size_t Size) {
-  StartOfData += llvm::RoundUpToAlignment(Size, ScopeStackAlignment);
+  StartOfData += llvm::alignTo(Size, ScopeStackAlignment);
 }
 
 bool EHScopeStack::containsOnlyLifetimeMarkers(
@@ -157,6 +157,20 @@ bool EHScopeStack::containsOnlyLifetimeMarkers(
   return true;
 }
 
+bool EHScopeStack::requiresLandingPad() const {
+  for (stable_iterator si = getInnermostEHScope(); si != stable_end(); ) {
+    // Skip lifetime markers.
+    if (auto *cleanup = dyn_cast<EHCleanupScope>(&*find(si)))
+      if (cleanup->isLifetimeMarker()) {
+        si = cleanup->getEnclosingEHScope();
+        continue;
+      }
+    return true;
+  }
+
+  return false;
+}
+
 EHScopeStack::stable_iterator
 EHScopeStack::getInnermostActiveNormalCleanup() const {
   for (stable_iterator si = getInnermostNormalCleanup(), se = stable_end();
@@ -174,6 +188,7 @@ void *EHScopeStack::pushCleanup(CleanupKind Kind, size_t Size) {
   bool IsNormalCleanup = Kind & NormalCleanup;
   bool IsEHCleanup = Kind & EHCleanup;
   bool IsActive = !(Kind & InactiveCleanup);
+  bool IsLifetimeMarker = Kind & LifetimeMarker;
   EHCleanupScope *Scope =
     new (Buffer) EHCleanupScope(IsNormalCleanup,
                                 IsEHCleanup,
@@ -186,6 +201,8 @@ void *EHScopeStack::pushCleanup(CleanupKind Kind, size_t Size) {
     InnermostNormalCleanup = stable_begin();
   if (IsEHCleanup)
     InnermostEHScope = stable_begin();
+  if (IsLifetimeMarker)
+    Scope->setLifetimeMarker();
 
   return Scope->getCleanupBuffer();
 }
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGCleanup.h b/contrib/llvm/tools/clang/lib/CodeGen/CGCleanup.h
index 909f00b05925..98d01b1326c9 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGCleanup.h
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGCleanup.h
@@ -86,11 +86,6 @@ protected:
     /// The amount of extra storage needed by the Cleanup.
     /// Always a multiple of the scope-stack alignment.
     unsigned CleanupSize : 12;
-
-    /// The number of fixups required by enclosing scopes (not including
-    /// this one).  If this is the top cleanup scope, all the fixups
-    /// from this index onwards belong to this scope.
-    unsigned FixupDepth : 32 - 18 - NumCommonBits; // currently 12
   };
 
   class FilterBitFields {
@@ -188,6 +183,7 @@ public:
                EHScopeStack::stable_iterator enclosingEHScope)
     : EHScope(Catch, enclosingEHScope) {
     CatchBits.NumHandlers = numHandlers;
+    assert(CatchBits.NumHandlers == numHandlers && "NumHandlers overflow?");
   }
 
   unsigned getNumHandlers() const {
@@ -263,6 +259,11 @@ class LLVM_ALIGNAS(/*alignof(uint64_t)*/ 8) EHCleanupScope : public EHScope {
   };
   mutable struct ExtInfo *ExtInfo;
 
+  /// The number of fixups required by enclosing scopes (not including
+  /// this one).  If this is the top cleanup scope, all the fixups
+  /// from this index onwards belong to this scope.
+  unsigned FixupDepth;
+
   struct ExtInfo &getExtInfo() {
     if (!ExtInfo) ExtInfo = new struct ExtInfo();
     return *ExtInfo;
@@ -288,8 +289,9 @@ public:
                  unsigned cleanupSize, unsigned fixupDepth,
                  EHScopeStack::stable_iterator enclosingNormal,
                  EHScopeStack::stable_iterator enclosingEH)
-    : EHScope(EHScope::Cleanup, enclosingEH), EnclosingNormal(enclosingNormal),
-      NormalBlock(nullptr), ActiveFlag(nullptr), ExtInfo(nullptr) {
+      : EHScope(EHScope::Cleanup, enclosingEH),
+        EnclosingNormal(enclosingNormal), NormalBlock(nullptr),
+        ActiveFlag(nullptr), ExtInfo(nullptr), FixupDepth(fixupDepth) {
     CleanupBits.IsNormalCleanup = isNormal;
     CleanupBits.IsEHCleanup = isEH;
     CleanupBits.IsActive = isActive;
@@ -297,7 +299,6 @@ public:
     CleanupBits.TestFlagInNormalCleanup = false;
     CleanupBits.TestFlagInEHCleanup = false;
     CleanupBits.CleanupSize = cleanupSize;
-    CleanupBits.FixupDepth = fixupDepth;
 
     assert(CleanupBits.CleanupSize == cleanupSize && "cleanup size overflow");
   }
@@ -343,7 +344,7 @@ public:
     return CleanupBits.TestFlagInEHCleanup;
   }
 
-  unsigned getFixupDepth() const { return CleanupBits.FixupDepth; }
+  unsigned getFixupDepth() const { return FixupDepth; }
   EHScopeStack::stable_iterator getEnclosingNormalCleanup() const {
     return EnclosingNormal;
   }
@@ -451,6 +452,7 @@ public:
   EHFilterScope(unsigned numFilters)
     : EHScope(Filter, EHScopeStack::stable_end()) {
     FilterBits.NumFilters = numFilters;
+    assert(FilterBits.NumFilters == numFilters && "NumFilters overflow");
   }
 
   static size_t getSizeForNumFilters(unsigned numFilters) {
@@ -540,7 +542,7 @@ public:
       Size = EHPadEndScope::getSize();
       break;
     }
-    Ptr += llvm::RoundUpToAlignment(Size, ScopeStackAlignment);
+    Ptr += llvm::alignTo(Size, ScopeStackAlignment);
     return *this;
   }
 
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGDebugInfo.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGDebugInfo.cpp
index 5df85194878d..0607a5157a6f 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGDebugInfo.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGDebugInfo.cpp
@@ -13,6 +13,7 @@
 
 #include "CGDebugInfo.h"
 #include "CGBlocks.h"
+#include "CGRecordLayout.h"
 #include "CGCXXABI.h"
 #include "CGObjCRuntime.h"
 #include "CodeGenFunction.h"
@@ -168,10 +169,10 @@ llvm::DIScope *CGDebugInfo::getContextDescriptor(const Decl *Context,
   }
 
   // Check namespace.
-  if (const NamespaceDecl *NSDecl = dyn_cast<NamespaceDecl>(Context))
+  if (const auto *NSDecl = dyn_cast<NamespaceDecl>(Context))
     return getOrCreateNameSpace(NSDecl);
 
-  if (const RecordDecl *RDecl = dyn_cast<RecordDecl>(Context))
+  if (const auto *RDecl = dyn_cast<RecordDecl>(Context))
     if (!RDecl->isDependentType())
       return getOrCreateType(CGM.getContext().getTypeDeclType(RDecl),
                              getOrCreateMainFile());
@@ -184,30 +185,32 @@ StringRef CGDebugInfo::getFunctionName(const FunctionDecl *FD) {
   FunctionTemplateSpecializationInfo *Info =
       FD->getTemplateSpecializationInfo();
 
-  if (!Info && FII && !CGM.getCodeGenOpts().EmitCodeView)
+  // Emit the unqualified name in normal operation. LLVM and the debugger can
+  // compute the fully qualified name from the scope chain. If we're only
+  // emitting line table info, there won't be any scope chains, so emit the
+  // fully qualified name here so that stack traces are more accurate.
+  // FIXME: Do this when emitting DWARF as well as when emitting CodeView after
+  // evaluating the size impact.
+  bool UseQualifiedName = DebugKind == codegenoptions::DebugLineTablesOnly &&
+                          CGM.getCodeGenOpts().EmitCodeView;
+
+  if (!Info && FII && !UseQualifiedName)
     return FII->getName();
 
-  // Otherwise construct human readable name for debug info.
   SmallString<128> NS;
   llvm::raw_svector_ostream OS(NS);
   PrintingPolicy Policy(CGM.getLangOpts());
-
-  if (CGM.getCodeGenOpts().EmitCodeView) {
-    // Print a fully qualified name like MSVC would.
-    Policy.MSVCFormatting = true;
-    FD->printQualifiedName(OS, Policy);
-  } else {
-    // Print the unqualified name with some template arguments. This is what
-    // DWARF-based debuggers expect.
+  Policy.MSVCFormatting = CGM.getCodeGenOpts().EmitCodeView;
+  if (!UseQualifiedName)
     FD->printName(OS);
-    // Add any template specialization args.
-    if (Info) {
-      const TemplateArgumentList *TArgs = Info->TemplateArguments;
-      const TemplateArgument *Args = TArgs->data();
-      unsigned NumArgs = TArgs->size();
-      TemplateSpecializationType::PrintTemplateArgumentList(OS, Args, NumArgs,
-                                                            Policy);
-    }
+  else
+    FD->printQualifiedName(OS, Policy);
+
+  // Add any template specialization args.
+  if (Info) {
+    const TemplateArgumentList *TArgs = Info->TemplateArguments;
+    TemplateSpecializationType::PrintTemplateArgumentList(OS, TArgs->asArray(),
+                                                          Policy);
   }
 
   // Copy this name on the side and use its reference.
@@ -219,21 +222,18 @@ StringRef CGDebugInfo::getObjCMethodName(const ObjCMethodDecl *OMD) {
   llvm::raw_svector_ostream OS(MethodName);
   OS << (OMD->isInstanceMethod() ? '-' : '+') << '[';
   const DeclContext *DC = OMD->getDeclContext();
-  if (const ObjCImplementationDecl *OID =
-          dyn_cast<const ObjCImplementationDecl>(DC)) {
+  if (const auto *OID = dyn_cast<ObjCImplementationDecl>(DC)) {
     OS << OID->getName();
-  } else if (const ObjCInterfaceDecl *OID =
-                 dyn_cast<const ObjCInterfaceDecl>(DC)) {
+  } else if (const auto *OID = dyn_cast<ObjCInterfaceDecl>(DC)) {
     OS << OID->getName();
-  } else if (const ObjCCategoryDecl *OC = dyn_cast<ObjCCategoryDecl>(DC)) {
+  } else if (const auto *OC = dyn_cast<ObjCCategoryDecl>(DC)) {
     if (OC->IsClassExtension()) {
       OS << OC->getClassInterface()->getName();
     } else {
-      OS << ((const NamedDecl *)OC)->getIdentifier()->getNameStart() << '('
+      OS << OC->getIdentifier()->getNameStart() << '('
          << OC->getIdentifier()->getNameStart() << ')';
     }
-  } else if (const ObjCCategoryImplDecl *OCD =
-                 dyn_cast<const ObjCCategoryImplDecl>(DC)) {
+  } else if (const auto *OCD = dyn_cast<ObjCCategoryImplDecl>(DC)) {
     OS << ((const NamedDecl *)OCD)->getIdentifier()->getNameStart() << '('
        << OCD->getIdentifier()->getNameStart() << ')';
   } else if (isa<ObjCProtocolDecl>(DC)) {
@@ -254,20 +254,56 @@ StringRef CGDebugInfo::getSelectorName(Selector S) {
 }
 
 StringRef CGDebugInfo::getClassName(const RecordDecl *RD) {
-  // quick optimization to avoid having to intern strings that are already
-  // stored reliably elsewhere
-  if (!isa<ClassTemplateSpecializationDecl>(RD))
-    return RD->getName();
-
-  SmallString<128> Name;
-  {
+  if (isa<ClassTemplateSpecializationDecl>(RD)) {
+    SmallString<128> Name;
     llvm::raw_svector_ostream OS(Name);
     RD->getNameForDiagnostic(OS, CGM.getContext().getPrintingPolicy(),
                              /*Qualified*/ false);
+
+    // Copy this name on the side and use its reference.
+    return internString(Name);
   }
 
-  // Copy this name on the side and use its reference.
-  return internString(Name);
+  // quick optimization to avoid having to intern strings that are already
+  // stored reliably elsewhere
+  if (const IdentifierInfo *II = RD->getIdentifier())
+    return II->getName();
+
+  // The CodeView printer in LLVM wants to see the names of unnamed types: it is
+  // used to reconstruct the fully qualified type names.
+  if (CGM.getCodeGenOpts().EmitCodeView) {
+    if (const TypedefNameDecl *D = RD->getTypedefNameForAnonDecl()) {
+      assert(RD->getDeclContext() == D->getDeclContext() &&
+             "Typedef should not be in another decl context!");
+      assert(D->getDeclName().getAsIdentifierInfo() &&
+             "Typedef was not named!");
+      return D->getDeclName().getAsIdentifierInfo()->getName();
+    }
+
+    if (CGM.getLangOpts().CPlusPlus) {
+      StringRef Name;
+
+      ASTContext &Context = CGM.getContext();
+      if (const DeclaratorDecl *DD = Context.getDeclaratorForUnnamedTagDecl(RD))
+        // Anonymous types without a name for linkage purposes have their
+        // declarator mangled in if they have one.
+        Name = DD->getName();
+      else if (const TypedefNameDecl *TND =
+                   Context.getTypedefNameForUnnamedTagDecl(RD))
+        // Anonymous types without a name for linkage purposes have their
+        // associate typedef mangled in if they have one.
+        Name = TND->getName();
+
+      if (!Name.empty()) {
+        SmallString<256> UnnamedType("<unnamed-type-");
+        UnnamedType += Name;
+        UnnamedType += '>';
+        return internString(UnnamedType);
+      }
+    }
+  }
+
+  return StringRef();
 }
 
 llvm::DIFile *CGDebugInfo::getOrCreateFile(SourceLocation Loc) {
@@ -383,6 +419,8 @@ void CGDebugInfo::CreateCompileUnit() {
       LangTag = llvm::dwarf::DW_LANG_C_plus_plus;
   } else if (LO.ObjC1) {
     LangTag = llvm::dwarf::DW_LANG_ObjC;
+  } else if (LO.RenderScript) {
+    LangTag = llvm::dwarf::DW_LANG_GOOGLE_RenderScript;
   } else if (LO.C99) {
     LangTag = llvm::dwarf::DW_LANG_C99;
   } else {
@@ -396,16 +434,27 @@ void CGDebugInfo::CreateCompileUnit() {
   if (LO.ObjC1)
     RuntimeVers = LO.ObjCRuntime.isNonFragile() ? 2 : 1;
 
+  llvm::DICompileUnit::DebugEmissionKind EmissionKind;
+  switch (DebugKind) {
+  case codegenoptions::NoDebugInfo:
+  case codegenoptions::LocTrackingOnly:
+    EmissionKind = llvm::DICompileUnit::NoDebug;
+    break;
+  case codegenoptions::DebugLineTablesOnly:
+    EmissionKind = llvm::DICompileUnit::LineTablesOnly;
+    break;
+  case codegenoptions::LimitedDebugInfo:
+  case codegenoptions::FullDebugInfo:
+    EmissionKind = llvm::DICompileUnit::FullDebug;
+    break;
+  }
+
   // Create new compile unit.
   // FIXME - Eliminate TheCU.
   TheCU = DBuilder.createCompileUnit(
       LangTag, remapDIPath(MainFileName), remapDIPath(getCurrentDirname()),
       Producer, LO.Optimize, CGM.getCodeGenOpts().DwarfDebugFlags, RuntimeVers,
-      CGM.getCodeGenOpts().SplitDwarfFile,
-      DebugKind <= CodeGenOptions::DebugLineTablesOnly
-          ? llvm::DIBuilder::LineTablesOnly
-          : llvm::DIBuilder::FullDebug,
-      0 /* DWOid */, DebugKind != CodeGenOptions::LocTrackingOnly);
+      CGM.getCodeGenOpts().SplitDwarfFile, EmissionKind, 0 /* DWOid */);
 }
 
 llvm::DIType *CGDebugInfo::CreateType(const BuiltinType *BT) {
@@ -463,39 +512,11 @@ llvm::DIType *CGDebugInfo::CreateType(const BuiltinType *BT) {
     return SelTy;
   }
 
-  case BuiltinType::OCLImage1d:
-    return getOrCreateStructPtrType("opencl_image1d_t", OCLImage1dDITy);
-  case BuiltinType::OCLImage1dArray:
-    return getOrCreateStructPtrType("opencl_image1d_array_t",
-                                    OCLImage1dArrayDITy);
-  case BuiltinType::OCLImage1dBuffer:
-    return getOrCreateStructPtrType("opencl_image1d_buffer_t",
-                                    OCLImage1dBufferDITy);
-  case BuiltinType::OCLImage2d:
-    return getOrCreateStructPtrType("opencl_image2d_t", OCLImage2dDITy);
-  case BuiltinType::OCLImage2dArray:
-    return getOrCreateStructPtrType("opencl_image2d_array_t",
-                                    OCLImage2dArrayDITy);
-  case BuiltinType::OCLImage2dDepth:
-    return getOrCreateStructPtrType("opencl_image2d_depth_t",
-                                    OCLImage2dDepthDITy);
-  case BuiltinType::OCLImage2dArrayDepth:
-    return getOrCreateStructPtrType("opencl_image2d_array_depth_t",
-                                    OCLImage2dArrayDepthDITy);
-  case BuiltinType::OCLImage2dMSAA:
-    return getOrCreateStructPtrType("opencl_image2d_msaa_t",
-                                    OCLImage2dMSAADITy);
-  case BuiltinType::OCLImage2dArrayMSAA:
-    return getOrCreateStructPtrType("opencl_image2d_array_msaa_t",
-                                    OCLImage2dArrayMSAADITy);
-  case BuiltinType::OCLImage2dMSAADepth:
-    return getOrCreateStructPtrType("opencl_image2d_msaa_depth_t",
-                                    OCLImage2dMSAADepthDITy);
-  case BuiltinType::OCLImage2dArrayMSAADepth:
-    return getOrCreateStructPtrType("opencl_image2d_array_msaa_depth_t",
-                                    OCLImage2dArrayMSAADepthDITy);
-  case BuiltinType::OCLImage3d:
-    return getOrCreateStructPtrType("opencl_image3d_t", OCLImage3dDITy);
+#define IMAGE_TYPE(ImgType, Id, SingletonId, Access, Suffix) \
+  case BuiltinType::Id: \
+    return getOrCreateStructPtrType("opencl_" #ImgType "_" #Suffix "_t", \
+                                    SingletonId);
+#include "clang/Basic/OpenCLImageTypes.def"
   case BuiltinType::OCLSampler:
     return DBuilder.createBasicType(
         "opencl_sampler_t", CGM.getContext().getTypeSize(BT),
@@ -545,7 +566,13 @@ llvm::DIType *CGDebugInfo::CreateType(const BuiltinType *BT) {
   case BuiltinType::Half:
   case BuiltinType::Float:
   case BuiltinType::LongDouble:
+  case BuiltinType::Float128:
   case BuiltinType::Double:
+    // FIXME: For targets where long double and __float128 have the same size,
+    // they are currently indistinguishable in the debugger without some
+    // special treatment. However, there is currently no consensus on encoding
+    // and this should be updated once a DWARF encoding exists for distinct
+    // floating point types of the same size.
     Encoding = llvm::dwarf::DW_ATE_float;
     break;
   }
@@ -660,10 +687,6 @@ static SmallString<256> getUniqueTagTypeName(const TagType *Ty,
   if (!hasCXXMangling(TD, TheCU) || !TD->isExternallyVisible())
     return FullName;
 
-  // Microsoft Mangler does not have support for mangleCXXRTTIName yet.
-  if (CGM.getTarget().getCXXABI().isMicrosoft())
-    return FullName;
-
   // TODO: This is using the RTTI name. Is there a better way to get
   // a unique string for a type?
   llvm::raw_svector_ostream Out(FullName);
@@ -817,10 +840,10 @@ llvm::DIType *CGDebugInfo::CreateType(const TemplateSpecializationType *Ty,
                               /*qualified*/ false);
 
   TemplateSpecializationType::PrintTemplateArgumentList(
-      OS, Ty->getArgs(), Ty->getNumArgs(),
+      OS, Ty->template_arguments(),
       CGM.getContext().getPrintingPolicy());
 
-  TypeAliasDecl *AliasDecl = cast<TypeAliasTemplateDecl>(
+  auto *AliasDecl = cast<TypeAliasTemplateDecl>(
       Ty->getTemplateName().getAsTemplateDecl())->getTemplatedDecl();
 
   SourceLocation Loc = AliasDecl->getLocation();
@@ -842,6 +865,39 @@ llvm::DIType *CGDebugInfo::CreateType(const TypedefType *Ty,
       getDeclContextDescriptor(Ty->getDecl()));
 }
 
+static unsigned getDwarfCC(CallingConv CC) {
+  switch (CC) {
+  case CC_C:
+    // Avoid emitting DW_AT_calling_convention if the C convention was used.
+    return 0;
+
+  case CC_X86StdCall:
+    return llvm::dwarf::DW_CC_BORLAND_stdcall;
+  case CC_X86FastCall:
+    return llvm::dwarf::DW_CC_BORLAND_msfastcall;
+  case CC_X86ThisCall:
+    return llvm::dwarf::DW_CC_BORLAND_thiscall;
+  case CC_X86VectorCall:
+    return llvm::dwarf::DW_CC_LLVM_vectorcall;
+  case CC_X86Pascal:
+    return llvm::dwarf::DW_CC_BORLAND_pascal;
+
+  // FIXME: Create new DW_CC_ codes for these calling conventions.
+  case CC_X86_64Win64:
+  case CC_X86_64SysV:
+  case CC_AAPCS:
+  case CC_AAPCS_VFP:
+  case CC_IntelOclBicc:
+  case CC_SpirFunction:
+  case CC_OpenCLKernel:
+  case CC_Swift:
+  case CC_PreserveMost:
+  case CC_PreserveAll:
+    return 0;
+  }
+  return 0;
+}
+
 llvm::DIType *CGDebugInfo::CreateType(const FunctionType *Ty,
                                       llvm::DIFile *Unit) {
   SmallVector<llvm::Metadata *, 16> EltTys;
@@ -853,15 +909,16 @@ llvm::DIType *CGDebugInfo::CreateType(const FunctionType *Ty,
   // otherwise emit it as a variadic function.
   if (isa<FunctionNoProtoType>(Ty))
     EltTys.push_back(DBuilder.createUnspecifiedParameter());
-  else if (const FunctionProtoType *FPT = dyn_cast<FunctionProtoType>(Ty)) {
-    for (unsigned i = 0, e = FPT->getNumParams(); i != e; ++i)
-      EltTys.push_back(getOrCreateType(FPT->getParamType(i), Unit));
+  else if (const auto *FPT = dyn_cast<FunctionProtoType>(Ty)) {
+    for (const QualType &ParamType : FPT->param_types())
+      EltTys.push_back(getOrCreateType(ParamType, Unit));
     if (FPT->isVariadic())
       EltTys.push_back(DBuilder.createUnspecifiedParameter());
   }
 
   llvm::DITypeRefArray EltTypeArray = DBuilder.getOrCreateTypeArray(EltTys);
-  return DBuilder.createSubroutineType(EltTypeArray);
+  return DBuilder.createSubroutineType(EltTypeArray, 0,
+                                       getDwarfCC(Ty->getCallConv()));
 }
 
 /// Convert an AccessSpecifier into the corresponding DINode flag.
@@ -890,10 +947,38 @@ static unsigned getAccessFlag(AccessSpecifier Access, const RecordDecl *RD) {
   llvm_unreachable("unexpected access enumerator");
 }
 
-llvm::DIType *CGDebugInfo::createFieldType(
-    StringRef name, QualType type, uint64_t sizeInBitsOverride,
-    SourceLocation loc, AccessSpecifier AS, uint64_t offsetInBits,
-    llvm::DIFile *tunit, llvm::DIScope *scope, const RecordDecl *RD) {
+llvm::DIType *CGDebugInfo::createBitFieldType(const FieldDecl *BitFieldDecl,
+                                              llvm::DIScope *RecordTy,
+                                              const RecordDecl *RD) {
+  StringRef Name = BitFieldDecl->getName();
+  QualType Ty = BitFieldDecl->getType();
+  SourceLocation Loc = BitFieldDecl->getLocation();
+  llvm::DIFile *VUnit = getOrCreateFile(Loc);
+  llvm::DIType *DebugType = getOrCreateType(Ty, VUnit);
+
+  // Get the location for the field.
+  llvm::DIFile *File = getOrCreateFile(Loc);
+  unsigned Line = getLineNumber(Loc);
+
+  const CGBitFieldInfo &BitFieldInfo =
+      CGM.getTypes().getCGRecordLayout(RD).getBitFieldInfo(BitFieldDecl);
+  uint64_t SizeInBits = BitFieldInfo.Size;
+  assert(SizeInBits > 0 && "found named 0-width bitfield");
+  unsigned AlignInBits = CGM.getContext().getTypeAlign(Ty);
+  uint64_t StorageOffsetInBits =
+      CGM.getContext().toBits(BitFieldInfo.StorageOffset);
+  uint64_t OffsetInBits = StorageOffsetInBits + BitFieldInfo.Offset;
+  unsigned Flags = getAccessFlag(BitFieldDecl->getAccess(), RD);
+  return DBuilder.createBitFieldMemberType(
+      RecordTy, Name, File, Line, SizeInBits, AlignInBits, OffsetInBits,
+      StorageOffsetInBits, Flags, DebugType);
+}
+
+llvm::DIType *
+CGDebugInfo::createFieldType(StringRef name, QualType type, SourceLocation loc,
+                             AccessSpecifier AS, uint64_t offsetInBits,
+                             llvm::DIFile *tunit, llvm::DIScope *scope,
+                             const RecordDecl *RD) {
   llvm::DIType *debugType = getOrCreateType(type, tunit);
 
   // Get the location for the field.
@@ -906,9 +991,6 @@ llvm::DIType *CGDebugInfo::createFieldType(
     TypeInfo TI = CGM.getContext().getTypeInfo(type);
     SizeInBits = TI.Width;
     AlignInBits = TI.Align;
-
-    if (sizeInBitsOverride)
-      SizeInBits = sizeInBitsOverride;
   }
 
   unsigned flags = getAccessFlag(AS, RD);
@@ -930,19 +1012,15 @@ void CGDebugInfo::CollectRecordLambdaFields(
        I != E; ++I, ++Field, ++fieldno) {
     const LambdaCapture &C = *I;
     if (C.capturesVariable()) {
+      SourceLocation Loc = C.getLocation();
+      assert(!Field->isBitField() && "lambdas don't have bitfield members!");
       VarDecl *V = C.getCapturedVar();
-      llvm::DIFile *VUnit = getOrCreateFile(C.getLocation());
       StringRef VName = V->getName();
-      uint64_t SizeInBitsOverride = 0;
-      if (Field->isBitField()) {
-        SizeInBitsOverride = Field->getBitWidthValue(CGM.getContext());
-        assert(SizeInBitsOverride && "found named 0-width bitfield");
-      }
-      llvm::DIType *fieldType = createFieldType(
-          VName, Field->getType(), SizeInBitsOverride, C.getLocation(),
-          Field->getAccess(), layout.getFieldOffset(fieldno), VUnit, RecordTy,
-          CXXDecl);
-      elements.push_back(fieldType);
+      llvm::DIFile *VUnit = getOrCreateFile(Loc);
+      llvm::DIType *FieldType = createFieldType(
+          VName, Field->getType(), Loc, Field->getAccess(),
+          layout.getFieldOffset(fieldno), VUnit, RecordTy, CXXDecl);
+      elements.push_back(FieldType);
     } else if (C.capturesThis()) {
       // TODO: Need to handle 'this' in some way by probably renaming the
       // this of the lambda class and having a field member of 'this' or
@@ -952,7 +1030,7 @@ void CGDebugInfo::CollectRecordLambdaFields(
       llvm::DIFile *VUnit = getOrCreateFile(f->getLocation());
       QualType type = f->getType();
       llvm::DIType *fieldType = createFieldType(
-          "this", type, 0, f->getLocation(), f->getAccess(),
+          "this", type, f->getLocation(), f->getAccess(),
           layout.getFieldOffset(fieldno), VUnit, RecordTy, CXXDecl);
 
       elements.push_back(fieldType);
@@ -1000,24 +1078,23 @@ void CGDebugInfo::CollectRecordNormalField(
   if (name.empty() && !type->isRecordType())
     return;
 
-  uint64_t SizeInBitsOverride = 0;
+  llvm::DIType *FieldType;
   if (field->isBitField()) {
-    SizeInBitsOverride = field->getBitWidthValue(CGM.getContext());
-    assert(SizeInBitsOverride && "found named 0-width bitfield");
+    FieldType = createBitFieldType(field, RecordTy, RD);
+  } else {
+    FieldType =
+        createFieldType(name, type, field->getLocation(), field->getAccess(),
+                        OffsetInBits, tunit, RecordTy, RD);
   }
 
-  llvm::DIType *fieldType =
-      createFieldType(name, type, SizeInBitsOverride, field->getLocation(),
-                      field->getAccess(), OffsetInBits, tunit, RecordTy, RD);
-
-  elements.push_back(fieldType);
+  elements.push_back(FieldType);
 }
 
 void CGDebugInfo::CollectRecordFields(
     const RecordDecl *record, llvm::DIFile *tunit,
     SmallVectorImpl<llvm::Metadata *> &elements,
     llvm::DICompositeType *RecordTy) {
-  const CXXRecordDecl *CXXDecl = dyn_cast<CXXRecordDecl>(record);
+  const auto *CXXDecl = dyn_cast<CXXRecordDecl>(record);
 
   if (CXXDecl && CXXDecl->isLambda())
     CollectRecordLambdaFields(CXXDecl, elements, RecordTy);
@@ -1031,6 +1108,8 @@ void CGDebugInfo::CollectRecordFields(
     // the corresponding declarations in the source program.
     for (const auto *I : record->decls())
       if (const auto *V = dyn_cast<VarDecl>(I)) {
+        if (V->hasAttr<NoDebugAttr>())
+          continue;
         // Reuse the existing static member declaration if one exists
         auto MI = StaticDataMemberCache.find(V->getCanonicalDecl());
         if (MI != StaticDataMemberCache.end()) {
@@ -1112,13 +1191,14 @@ llvm::DISubroutineType *CGDebugInfo::getOrCreateInstanceMethodType(
   if (Func->getExtProtoInfo().RefQualifier == RQ_RValue)
     Flags |= llvm::DINode::FlagRValueReference;
 
-  return DBuilder.createSubroutineType(EltTypeArray, Flags);
+  return DBuilder.createSubroutineType(EltTypeArray, Flags,
+                                       getDwarfCC(Func->getCallConv()));
 }
 
 /// isFunctionLocalClass - Return true if CXXRecordDecl is defined
 /// inside a function.
 static bool isFunctionLocalClass(const CXXRecordDecl *RD) {
-  if (const CXXRecordDecl *NRD = dyn_cast<CXXRecordDecl>(RD->getDeclContext()))
+  if (const auto *NRD = dyn_cast<CXXRecordDecl>(RD->getDeclContext()))
     return isFunctionLocalClass(NRD);
   if (isa<FunctionDecl>(RD->getDeclContext()))
     return true;
@@ -1136,6 +1216,11 @@ llvm::DISubprogram *CGDebugInfo::CreateCXXMemberFunction(
   // Since a single ctor/dtor corresponds to multiple functions, it doesn't
   // make sense to give a single ctor/dtor a linkage name.
   StringRef MethodLinkageName;
+  // FIXME: 'isFunctionLocalClass' seems like an arbitrary/unintentional
+  // property to use here. It may've been intended to model "is non-external
+  // type" but misses cases of non-function-local but non-external classes such
+  // as those in anonymous namespaces as well as the reverse - external types
+  // that are function local, such as those in (non-local) inline functions.
   if (!IsCtorOrDtor && !isFunctionLocalClass(Method->getParent()))
     MethodLinkageName = CGM.getMangledName(Method);
 
@@ -1151,6 +1236,8 @@ llvm::DISubprogram *CGDebugInfo::CreateCXXMemberFunction(
   llvm::DIType *ContainingType = nullptr;
   unsigned Virtuality = 0;
   unsigned VIndex = 0;
+  unsigned Flags = 0;
+  int ThisAdjustment = 0;
 
   if (Method->isVirtual()) {
     if (Method->isPure())
@@ -1158,26 +1245,45 @@ llvm::DISubprogram *CGDebugInfo::CreateCXXMemberFunction(
     else
       Virtuality = llvm::dwarf::DW_VIRTUALITY_virtual;
 
-    // It doesn't make sense to give a virtual destructor a vtable index,
-    // since a single destructor has two entries in the vtable.
-    // FIXME: Add proper support for debug info for virtual calls in
-    // the Microsoft ABI, where we may use multiple vptrs to make a vftable
-    // lookup if we have multiple or virtual inheritance.
-    if (!isa<CXXDestructorDecl>(Method) &&
-        !CGM.getTarget().getCXXABI().isMicrosoft())
-      VIndex = CGM.getItaniumVTableContext().getMethodVTableIndex(Method);
+    if (CGM.getTarget().getCXXABI().isItaniumFamily()) {
+      // It doesn't make sense to give a virtual destructor a vtable index,
+      // since a single destructor has two entries in the vtable.
+      if (!isa<CXXDestructorDecl>(Method))
+        VIndex = CGM.getItaniumVTableContext().getMethodVTableIndex(Method);
+    } else {
+      // Emit MS ABI vftable information.  There is only one entry for the
+      // deleting dtor.
+      const auto *DD = dyn_cast<CXXDestructorDecl>(Method);
+      GlobalDecl GD = DD ? GlobalDecl(DD, Dtor_Deleting) : GlobalDecl(Method);
+      MicrosoftVTableContext::MethodVFTableLocation ML =
+          CGM.getMicrosoftVTableContext().getMethodVFTableLocation(GD);
+      VIndex = ML.Index;
+
+      // CodeView only records the vftable offset in the class that introduces
+      // the virtual method. This is possible because, unlike Itanium, the MS
+      // C++ ABI does not include all virtual methods from non-primary bases in
+      // the vtable for the most derived class. For example, if C inherits from
+      // A and B, C's primary vftable will not include B's virtual methods.
+      if (Method->begin_overridden_methods() == Method->end_overridden_methods())
+        Flags |= llvm::DINode::FlagIntroducedVirtual;
+
+      // The 'this' adjustment accounts for both the virtual and non-virtual
+      // portions of the adjustment. Presumably the debugger only uses it when
+      // it knows the dynamic type of an object.
+      ThisAdjustment = CGM.getCXXABI()
+                           .getVirtualFunctionPrologueThisAdjustment(GD)
+                           .getQuantity();
+    }
     ContainingType = RecordTy;
   }
 
-  unsigned Flags = 0;
   if (Method->isImplicit())
     Flags |= llvm::DINode::FlagArtificial;
   Flags |= getAccessFlag(Method->getAccess(), Method->getParent());
-  if (const CXXConstructorDecl *CXXC = dyn_cast<CXXConstructorDecl>(Method)) {
+  if (const auto *CXXC = dyn_cast<CXXConstructorDecl>(Method)) {
     if (CXXC->isExplicit())
       Flags |= llvm::DINode::FlagExplicit;
-  } else if (const CXXConversionDecl *CXXC =
-                 dyn_cast<CXXConversionDecl>(Method)) {
+  } else if (const auto *CXXC = dyn_cast<CXXConversionDecl>(Method)) {
     if (CXXC->isExplicit())
       Flags |= llvm::DINode::FlagExplicit;
   }
@@ -1191,9 +1297,9 @@ llvm::DISubprogram *CGDebugInfo::CreateCXXMemberFunction(
   llvm::DINodeArray TParamsArray = CollectFunctionTemplateParams(Method, Unit);
   llvm::DISubprogram *SP = DBuilder.createMethod(
       RecordTy, MethodName, MethodLinkageName, MethodDefUnit, MethodLine,
-      MethodTy, /*isLocalToUnit=*/false,
-      /* isDefinition=*/false, Virtuality, VIndex, ContainingType, Flags,
-      CGM.getLangOpts().Optimize, TParamsArray.get());
+      MethodTy, /*isLocalToUnit=*/false, /*isDefinition=*/false, Virtuality,
+      VIndex, ThisAdjustment, ContainingType, Flags, CGM.getLangOpts().Optimize,
+      TParamsArray.get());
 
   SPCache[Method->getCanonicalDecl()].reset(SP);
 
@@ -1246,7 +1352,7 @@ void CGDebugInfo::CollectCXXBases(const CXXRecordDecl *RD, llvm::DIFile *Unit,
     unsigned BFlags = 0;
     uint64_t BaseOffset;
 
-    const CXXRecordDecl *Base =
+    const auto *Base =
         cast<CXXRecordDecl>(BI.getType()->getAs<RecordType>()->getDecl());
 
     if (BI.isVirtual()) {
@@ -1334,8 +1440,7 @@ CGDebugInfo::CollectTemplateParams(const TemplateParameterList *TPList,
       llvm::Constant *V = nullptr;
       // Special case member data pointer null values since they're actually -1
       // instead of zero.
-      if (const MemberPointerType *MPT =
-              dyn_cast<MemberPointerType>(T.getTypePtr()))
+      if (const auto *MPT = dyn_cast<MemberPointerType>(T.getTypePtr()))
         // But treat member function pointers as simple zero integers because
         // it's easier than having a special case in LLVM's CodeGen. If LLVM
         // CodeGen grows handling for values of non-null member function
@@ -1346,7 +1451,7 @@ CGDebugInfo::CollectTemplateParams(const TemplateParameterList *TPList,
       if (!V)
         V = llvm::ConstantInt::get(CGM.Int8Ty, 0);
       TemplateParams.push_back(DBuilder.createTemplateValueParameter(
-          TheCU, Name, TTy, cast<llvm::Constant>(V)));
+          TheCU, Name, TTy, V));
     } break;
     case TemplateArgument::Template:
       TemplateParams.push_back(DBuilder.createTemplateTemplateParameter(
@@ -1367,7 +1472,7 @@ CGDebugInfo::CollectTemplateParams(const TemplateParameterList *TPList,
       assert(V && "Expression in template argument isn't constant");
       llvm::DIType *TTy = getOrCreateType(T, Unit);
       TemplateParams.push_back(DBuilder.createTemplateValueParameter(
-          TheCU, Name, TTy, cast<llvm::Constant>(V->stripPointerCasts())));
+          TheCU, Name, TTy, V->stripPointerCasts()));
     } break;
     // And the following should never occur:
     case TemplateArgument::TemplateExpansion:
@@ -1446,7 +1551,7 @@ void CGDebugInfo::CollectVTableInfo(const CXXRecordDecl *RD, llvm::DIFile *Unit,
 
 llvm::DIType *CGDebugInfo::getOrCreateRecordType(QualType RTy,
                                                  SourceLocation Loc) {
-  assert(DebugKind >= CodeGenOptions::LimitedDebugInfo);
+  assert(DebugKind >= codegenoptions::LimitedDebugInfo);
   llvm::DIType *T = getOrCreateType(RTy, getOrCreateFile(Loc));
   return T;
 }
@@ -1458,22 +1563,17 @@ llvm::DIType *CGDebugInfo::getOrCreateInterfaceType(QualType D,
 
 llvm::DIType *CGDebugInfo::getOrCreateStandaloneType(QualType D,
                                                      SourceLocation Loc) {
-  assert(DebugKind >= CodeGenOptions::LimitedDebugInfo);
+  assert(DebugKind >= codegenoptions::LimitedDebugInfo);
   assert(!D.isNull() && "null type");
   llvm::DIType *T = getOrCreateType(D, getOrCreateFile(Loc));
   assert(T && "could not create debug info for type");
 
-  // Composite types with UIDs were already retained by DIBuilder
-  // because they are only referenced by name in the IR.
-  if (auto *CTy = dyn_cast<llvm::DICompositeType>(T))
-    if (!CTy->getIdentifier().empty())
-      return T;
   RetainedTypes.push_back(D.getAsOpaquePtr());
   return T;
 }
 
 void CGDebugInfo::completeType(const EnumDecl *ED) {
-  if (DebugKind <= CodeGenOptions::DebugLineTablesOnly)
+  if (DebugKind <= codegenoptions::DebugLineTablesOnly)
     return;
   QualType Ty = CGM.getContext().getEnumType(ED);
   void *TyPtr = Ty.getAsOpaquePtr();
@@ -1486,16 +1586,16 @@ void CGDebugInfo::completeType(const EnumDecl *ED) {
 }
 
 void CGDebugInfo::completeType(const RecordDecl *RD) {
-  if (DebugKind > CodeGenOptions::LimitedDebugInfo ||
+  if (DebugKind > codegenoptions::LimitedDebugInfo ||
       !CGM.getLangOpts().CPlusPlus)
     completeRequiredType(RD);
 }
 
 void CGDebugInfo::completeRequiredType(const RecordDecl *RD) {
-  if (DebugKind <= CodeGenOptions::DebugLineTablesOnly)
+  if (DebugKind <= codegenoptions::DebugLineTablesOnly)
     return;
 
-  if (const CXXRecordDecl *CXXDecl = dyn_cast<CXXRecordDecl>(RD))
+  if (const auto *CXXDecl = dyn_cast<CXXRecordDecl>(RD))
     if (CXXDecl->isDynamicClass())
       return;
 
@@ -1509,7 +1609,7 @@ void CGDebugInfo::completeRequiredType(const RecordDecl *RD) {
 }
 
 void CGDebugInfo::completeClassData(const RecordDecl *RD) {
-  if (DebugKind <= CodeGenOptions::DebugLineTablesOnly)
+  if (DebugKind <= codegenoptions::DebugLineTablesOnly)
     return;
   QualType Ty = CGM.getContext().getRecordType(RD);
   void *TyPtr = Ty.getAsOpaquePtr();
@@ -1523,23 +1623,38 @@ void CGDebugInfo::completeClassData(const RecordDecl *RD) {
 
 static bool hasExplicitMemberDefinition(CXXRecordDecl::method_iterator I,
                                         CXXRecordDecl::method_iterator End) {
-  for (; I != End; ++I)
-    if (FunctionDecl *Tmpl = I->getInstantiatedFromMemberFunction())
+  for (CXXMethodDecl *MD : llvm::make_range(I, End))
+    if (FunctionDecl *Tmpl = MD->getInstantiatedFromMemberFunction())
       if (!Tmpl->isImplicit() && Tmpl->isThisDeclarationADefinition() &&
-          !I->getMemberSpecializationInfo()->isExplicitSpecialization())
+          !MD->getMemberSpecializationInfo()->isExplicitSpecialization())
         return true;
   return false;
 }
 
-static bool shouldOmitDefinition(CodeGenOptions::DebugInfoKind DebugKind,
-                                 bool DebugTypeExtRefs,
-                                 const RecordDecl *RD,
+/// Does a type definition exist in an imported clang module?
+static bool isDefinedInClangModule(const RecordDecl *RD) {
+  if (!RD || !RD->isFromASTFile())
+    return false;
+  if (!RD->isExternallyVisible() && RD->getName().empty())
+    return false;
+  if (auto *CXXDecl = dyn_cast<CXXRecordDecl>(RD)) {
+    assert(CXXDecl->isCompleteDefinition() && "incomplete record definition");
+    if (CXXDecl->getTemplateSpecializationKind() != TSK_Undeclared)
+      // Make sure the instantiation is actually in a module.
+      if (CXXDecl->field_begin() != CXXDecl->field_end())
+        return CXXDecl->field_begin()->isFromASTFile();
+  }
+
+  return true;
+}
+
+static bool shouldOmitDefinition(codegenoptions::DebugInfoKind DebugKind,
+                                 bool DebugTypeExtRefs, const RecordDecl *RD,
                                  const LangOptions &LangOpts) {
-  // Does the type exist in an imported clang module?
-  if (DebugTypeExtRefs && RD->isFromASTFile() && RD->getDefinition())
-      return true;
+  if (DebugTypeExtRefs && isDefinedInClangModule(RD->getDefinition()))
+    return true;
 
-  if (DebugKind > CodeGenOptions::LimitedDebugInfo)
+  if (DebugKind > codegenoptions::LimitedDebugInfo)
     return false;
 
   if (!LangOpts.CPlusPlus)
@@ -1548,7 +1663,7 @@ static bool shouldOmitDefinition(CodeGenOptions::DebugInfoKind DebugKind,
   if (!RD->isCompleteDefinitionRequired())
     return true;
 
-  const CXXRecordDecl *CXXDecl = dyn_cast<CXXRecordDecl>(RD);
+  const auto *CXXDecl = dyn_cast<CXXRecordDecl>(RD);
 
   if (!CXXDecl)
     return false;
@@ -1557,8 +1672,7 @@ static bool shouldOmitDefinition(CodeGenOptions::DebugInfoKind DebugKind,
     return true;
 
   TemplateSpecializationKind Spec = TSK_Undeclared;
-  if (const ClassTemplateSpecializationDecl *SD =
-          dyn_cast<ClassTemplateSpecializationDecl>(RD))
+  if (const auto *SD = dyn_cast<ClassTemplateSpecializationDecl>(RD))
     Spec = SD->getSpecializationKind();
 
   if (Spec == TSK_ExplicitInstantiationDeclaration &&
@@ -1600,7 +1714,7 @@ llvm::DIType *CGDebugInfo::CreateTypeDefinition(const RecordType *Ty) {
   if (!D || !D->isCompleteDefinition())
     return FwdDecl;
 
-  if (const CXXRecordDecl *CXXDecl = dyn_cast<CXXRecordDecl>(RD))
+  if (const auto *CXXDecl = dyn_cast<CXXRecordDecl>(RD))
     CollectContainingType(CXXDecl, FwdDecl);
 
   // Push the struct on region stack.
@@ -1615,7 +1729,7 @@ llvm::DIType *CGDebugInfo::CreateTypeDefinition(const RecordType *Ty) {
   // gdb tests will depend on a certain ordering at printout. The debug
   // information offsets are still correct if we merge them all together
   // though.
-  const CXXRecordDecl *CXXDecl = dyn_cast<CXXRecordDecl>(RD);
+  const auto *CXXDecl = dyn_cast<CXXRecordDecl>(RD);
   if (CXXDecl) {
     CollectCXXBases(CXXDecl, DefUnit, EltTys, FwdDecl);
     CollectVTableInfo(CXXDecl, DefUnit, EltTys);
@@ -1676,8 +1790,11 @@ llvm::DIType *CGDebugInfo::CreateType(const ObjCInterfaceType *Ty,
   if (!ID)
     return nullptr;
 
-  // Return a forward declaration if this type was imported from a clang module.
-  if (DebugTypeExtRefs && ID->isFromASTFile() && ID->getDefinition())
+  // Return a forward declaration if this type was imported from a clang module,
+  // and this is not the compile unit with the implementation of the type (which
+  // may contain hidden ivars).
+  if (DebugTypeExtRefs && ID->isFromASTFile() && ID->getDefinition() &&
+      !ID->getImplementation())
     return DBuilder.createForwardDecl(llvm::dwarf::DW_TAG_structure_type,
                                       ID->getName(),
                                       getDeclContextDescriptor(ID), Unit, 0);
@@ -1739,11 +1856,14 @@ CGDebugInfo::getOrCreateModuleRef(ExternalASTSource::ASTSourceDescriptor Mod,
 
   bool IsRootModule = M ? !M->Parent : true;
   if (CreateSkeletonCU && IsRootModule) {
+    // PCH files don't have a signature field in the control block,
+    // but LLVM detects skeleton CUs by looking for a non-zero DWO id.
+    uint64_t Signature = Mod.getSignature() ? Mod.getSignature() : ~1ULL;
     llvm::DIBuilder DIB(CGM.getModule());
     DIB.createCompileUnit(TheCU->getSourceLanguage(), Mod.getModuleName(),
                           Mod.getPath(), TheCU->getProducer(), true,
                           StringRef(), 0, Mod.getASTFile(),
-                          llvm::DIBuilder::FullDebug, Mod.getSignature());
+                          llvm::DICompileUnit::FullDebug, Signature);
     DIB.finalize();
   }
   llvm::DIModule *Parent =
@@ -1942,7 +2062,7 @@ llvm::DIType *CGDebugInfo::CreateType(const ArrayType *Ty, llvm::DIFile *Unit) {
   uint64_t Align;
 
   // FIXME: make getTypeAlign() aware of VLAs and incomplete array types
-  if (const VariableArrayType *VAT = dyn_cast<VariableArrayType>(Ty)) {
+  if (const auto *VAT = dyn_cast<VariableArrayType>(Ty)) {
     Size = 0;
     Align =
         CGM.getContext().getTypeAlign(CGM.getContext().getBaseElementType(VAT));
@@ -1975,7 +2095,7 @@ llvm::DIType *CGDebugInfo::CreateType(const ArrayType *Ty, llvm::DIFile *Unit) {
     //     int x[0];
     //   };
     int64_t Count = -1; // Count == -1 is an unbounded array.
-    if (const ConstantArrayType *CAT = dyn_cast<ConstantArrayType>(Ty))
+    if (const auto *CAT = dyn_cast<ConstantArrayType>(Ty))
       Count = CAT->getSize().getZExtValue();
 
     // FIXME: Verify this is right for VLAs.
@@ -2003,12 +2123,35 @@ llvm::DIType *CGDebugInfo::CreateType(const RValueReferenceType *Ty,
 
 llvm::DIType *CGDebugInfo::CreateType(const MemberPointerType *Ty,
                                       llvm::DIFile *U) {
-  uint64_t Size =
-      !Ty->isIncompleteType() ? CGM.getContext().getTypeSize(Ty) : 0;
+  unsigned Flags = 0;
+  uint64_t Size = 0;
+
+  if (!Ty->isIncompleteType()) {
+    Size = CGM.getContext().getTypeSize(Ty);
+
+    // Set the MS inheritance model. There is no flag for the unspecified model.
+    if (CGM.getTarget().getCXXABI().isMicrosoft()) {
+      switch (Ty->getMostRecentCXXRecordDecl()->getMSInheritanceModel()) {
+      case MSInheritanceAttr::Keyword_single_inheritance:
+        Flags |= llvm::DINode::FlagSingleInheritance;
+        break;
+      case MSInheritanceAttr::Keyword_multiple_inheritance:
+        Flags |= llvm::DINode::FlagMultipleInheritance;
+        break;
+      case MSInheritanceAttr::Keyword_virtual_inheritance:
+        Flags |= llvm::DINode::FlagVirtualInheritance;
+        break;
+      case MSInheritanceAttr::Keyword_unspecified_inheritance:
+        break;
+      }
+    }
+  }
+
   llvm::DIType *ClassType = getOrCreateType(QualType(Ty->getClass(), 0), U);
   if (Ty->isMemberDataPointerType())
     return DBuilder.createMemberPointerType(
-        getOrCreateType(Ty->getPointeeType(), U), ClassType, Size);
+        getOrCreateType(Ty->getPointeeType(), U), ClassType, Size, /*Align=*/0,
+        Flags);
 
   const FunctionProtoType *FPT =
       Ty->getPointeeType()->getAs<FunctionProtoType>();
@@ -2016,7 +2159,7 @@ llvm::DIType *CGDebugInfo::CreateType(const MemberPointerType *Ty,
       getOrCreateInstanceMethodType(CGM.getContext().getPointerType(QualType(
                                         Ty->getClass(), FPT->getTypeQuals())),
                                     FPT, U),
-      ClassType, Size);
+      ClassType, Size, /*Align=*/0, Flags);
 }
 
 llvm::DIType *CGDebugInfo::CreateType(const AtomicType *Ty, llvm::DIFile *U) {
@@ -2048,13 +2191,23 @@ llvm::DIType *CGDebugInfo::CreateEnumType(const EnumType *Ty) {
   // If this is just a forward declaration, construct an appropriately
   // marked node and just return it.
   if (isImportedFromModule || !ED->getDefinition()) {
+    // Note that it is possible for enums to be created as part of
+    // their own declcontext. In this case a FwdDecl will be created
+    // twice. This doesn't cause a problem because both FwdDecls are
+    // entered into the ReplaceMap: finalize() will replace the first
+    // FwdDecl with the second and then replace the second with
+    // complete type.
     llvm::DIScope *EDContext = getDeclContextDescriptor(ED);
     llvm::DIFile *DefUnit = getOrCreateFile(ED->getLocation());
+    llvm::TempDIScope TmpContext(DBuilder.createReplaceableCompositeType(
+        llvm::dwarf::DW_TAG_enumeration_type, "", TheCU, DefUnit, 0));
+
     unsigned Line = getLineNumber(ED->getLocation());
     StringRef EDName = ED->getName();
     llvm::DIType *RetTy = DBuilder.createReplaceableCompositeType(
         llvm::dwarf::DW_TAG_enumeration_type, EDName, EDContext, DefUnit, Line,
         0, Size, Align, llvm::DINode::FlagFwdDecl, FullName);
+
     ReplaceMap.emplace_back(
         std::piecewise_construct, std::make_tuple(Ty),
         std::make_tuple(static_cast<llvm::Metadata *>(RetTy)));
@@ -2168,7 +2321,7 @@ llvm::DIType *CGDebugInfo::getTypeOrNull(QualType Ty) {
 
 void CGDebugInfo::completeTemplateDefinition(
     const ClassTemplateSpecializationDecl &SD) {
-  if (DebugKind <= CodeGenOptions::DebugLineTablesOnly)
+  if (DebugKind <= codegenoptions::DebugLineTablesOnly)
     return;
 
   completeClassData(&SD);
@@ -2220,8 +2373,12 @@ llvm::DIModule *CGDebugInfo::getParentModuleOrNull(const Decl *D) {
     // option.
     FullSourceLoc Loc(D->getLocation(), CGM.getContext().getSourceManager());
     if (Module *M = ClangModuleMap->inferModuleFromLocation(Loc)) {
+      // This is a (sub-)module.
       auto Info = ExternalASTSource::ASTSourceDescriptor(*M);
       return getOrCreateModuleRef(Info, /*SkeletonCU=*/false);
+    } else {
+      // This the precompiled header being built.
+      return getOrCreateModuleRef(PCHDescriptor, /*SkeletonCU=*/false);
     }
   }
 
@@ -2369,11 +2526,34 @@ llvm::DICompositeType *CGDebugInfo::CreateLimitedType(const RecordType *Ty) {
       getTagForRecord(RD), RDName, RDContext, DefUnit, Line, 0, Size, Align, 0,
       FullName);
 
+  // Elements of composite types usually have back to the type, creating
+  // uniquing cycles.  Distinct nodes are more efficient.
+  switch (RealDecl->getTag()) {
+  default:
+    llvm_unreachable("invalid composite type tag");
+
+  case llvm::dwarf::DW_TAG_array_type:
+  case llvm::dwarf::DW_TAG_enumeration_type:
+    // Array elements and most enumeration elements don't have back references,
+    // so they don't tend to be involved in uniquing cycles and there is some
+    // chance of merging them when linking together two modules.  Only make
+    // them distinct if they are ODR-uniqued.
+    if (FullName.empty())
+      break;
+
+  case llvm::dwarf::DW_TAG_structure_type:
+  case llvm::dwarf::DW_TAG_union_type:
+  case llvm::dwarf::DW_TAG_class_type:
+    // Immediatley resolve to a distinct node.
+    RealDecl =
+        llvm::MDNode::replaceWithDistinct(llvm::TempDICompositeType(RealDecl));
+    break;
+  }
+
   RegionMap[Ty->getDecl()].reset(RealDecl);
   TypeCache[QualType(Ty, 0).getAsOpaquePtr()].reset(RealDecl);
 
-  if (const ClassTemplateSpecializationDecl *TSpecial =
-          dyn_cast<ClassTemplateSpecializationDecl>(RD))
+  if (const auto *TSpecial = dyn_cast<ClassTemplateSpecializationDecl>(RD))
     DBuilder.replaceArrays(RealDecl, llvm::DINodeArray(),
                            CollectCXXTemplateParams(TSpecial, DefUnit));
   return RealDecl;
@@ -2420,7 +2600,7 @@ void CGDebugInfo::collectFunctionDeclProps(GlobalDecl GD, llvm::DIFile *Unit,
                                            llvm::DIScope *&FDContext,
                                            llvm::DINodeArray &TParamsArray,
                                            unsigned &Flags) {
-  const FunctionDecl *FD = cast<FunctionDecl>(GD.getDecl());
+  const auto *FD = cast<FunctionDecl>(GD.getDecl());
   Name = getFunctionName(FD);
   // Use mangled name as linkage name for C/C++ functions.
   if (FD->hasPrototype()) {
@@ -2430,13 +2610,12 @@ void CGDebugInfo::collectFunctionDeclProps(GlobalDecl GD, llvm::DIFile *Unit,
   // No need to replicate the linkage name if it isn't different from the
   // subprogram name, no need to have it at all unless coverage is enabled or
   // debug is set to more than just line tables.
-  if (LinkageName == Name ||
-      (!CGM.getCodeGenOpts().EmitGcovArcs &&
-       !CGM.getCodeGenOpts().EmitGcovNotes &&
-       DebugKind <= CodeGenOptions::DebugLineTablesOnly))
+  if (LinkageName == Name || (!CGM.getCodeGenOpts().EmitGcovArcs &&
+                              !CGM.getCodeGenOpts().EmitGcovNotes &&
+                              DebugKind <= codegenoptions::DebugLineTablesOnly))
     LinkageName = StringRef();
 
-  if (DebugKind >= CodeGenOptions::LimitedDebugInfo) {
+  if (DebugKind >= codegenoptions::LimitedDebugInfo) {
     if (const NamespaceDecl *NSDecl =
         dyn_cast_or_null<NamespaceDecl>(FD->getDeclContext()))
       FDContext = getOrCreateNameSpace(NSDecl);
@@ -2513,15 +2692,15 @@ CGDebugInfo::getFunctionForwardDeclaration(const FunctionDecl *FD) {
   SmallVector<QualType, 16> ArgTypes;
   for (const ParmVarDecl *Parm: FD->parameters())
     ArgTypes.push_back(Parm->getType());
-  QualType FnType =
-    CGM.getContext().getFunctionType(FD->getReturnType(), ArgTypes,
-                                     FunctionProtoType::ExtProtoInfo());
+  CallingConv CC = FD->getType()->castAs<FunctionType>()->getCallConv();
+  QualType FnType = CGM.getContext().getFunctionType(
+      FD->getReturnType(), ArgTypes, FunctionProtoType::ExtProtoInfo(CC));
   llvm::DISubprogram *SP = DBuilder.createTempFunctionFwdDecl(
       DContext, Name, LinkageName, Unit, Line,
       getOrCreateFunctionType(FD, FnType, Unit), !FD->isExternallyVisible(),
       /* isDefinition = */ false, 0, Flags, CGM.getLangOpts().Optimize,
       TParamsArray.get(), getFunctionDeclaration(FD));
-  const FunctionDecl *CanonDecl = cast<FunctionDecl>(FD->getCanonicalDecl());
+  const auto *CanonDecl = cast<FunctionDecl>(FD->getCanonicalDecl());
   FwdDeclReplaceMap.emplace_back(std::piecewise_construct,
                                  std::make_tuple(CanonDecl),
                                  std::make_tuple(SP));
@@ -2553,7 +2732,7 @@ llvm::DINode *CGDebugInfo::getDeclarationOrDefinition(const Decl *D) {
   // we would otherwise do to get a type for a pointee. (forward declarations in
   // limited debug info, full definitions (if the type definition is available)
   // in unlimited debug info)
-  if (const TypeDecl *TD = dyn_cast<TypeDecl>(D))
+  if (const auto *TD = dyn_cast<TypeDecl>(D))
     return getOrCreateType(CGM.getContext().getTypeDeclType(TD),
                            getOrCreateFile(TD->getLocation()));
   auto I = DeclCache.find(D->getCanonicalDecl());
@@ -2563,7 +2742,7 @@ llvm::DINode *CGDebugInfo::getDeclarationOrDefinition(const Decl *D) {
 
   // No definition for now. Emit a forward definition that might be
   // merged with a potential upcoming definition.
-  if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D))
+  if (const auto *FD = dyn_cast<FunctionDecl>(D))
     return getFunctionForwardDeclaration(FD);
   else if (const auto *VD = dyn_cast<VarDecl>(D))
     return getGlobalVariableForwardDeclaration(VD);
@@ -2572,10 +2751,10 @@ llvm::DINode *CGDebugInfo::getDeclarationOrDefinition(const Decl *D) {
 }
 
 llvm::DISubprogram *CGDebugInfo::getFunctionDeclaration(const Decl *D) {
-  if (!D || DebugKind <= CodeGenOptions::DebugLineTablesOnly)
+  if (!D || DebugKind <= codegenoptions::DebugLineTablesOnly)
     return nullptr;
 
-  const FunctionDecl *FD = dyn_cast<FunctionDecl>(D);
+  const auto *FD = dyn_cast<FunctionDecl>(D);
   if (!FD)
     return nullptr;
 
@@ -2584,8 +2763,7 @@ llvm::DISubprogram *CGDebugInfo::getFunctionDeclaration(const Decl *D) {
 
   auto MI = SPCache.find(FD->getCanonicalDecl());
   if (MI == SPCache.end()) {
-    if (const CXXMethodDecl *MD =
-            dyn_cast<CXXMethodDecl>(FD->getCanonicalDecl())) {
+    if (const auto *MD = dyn_cast<CXXMethodDecl>(FD->getCanonicalDecl())) {
       return CreateCXXMemberFunction(MD, getOrCreateFile(MD->getLocation()),
                                      cast<llvm::DICompositeType>(S));
     }
@@ -2612,14 +2790,18 @@ llvm::DISubprogram *CGDebugInfo::getFunctionDeclaration(const Decl *D) {
 llvm::DISubroutineType *CGDebugInfo::getOrCreateFunctionType(const Decl *D,
                                                              QualType FnType,
                                                              llvm::DIFile *F) {
-  if (!D || DebugKind <= CodeGenOptions::DebugLineTablesOnly)
+  if (!D || DebugKind <= codegenoptions::DebugLineTablesOnly)
     // Create fake but valid subroutine type. Otherwise -verify would fail, and
     // subprogram DIE will miss DW_AT_decl_file and DW_AT_decl_line fields.
     return DBuilder.createSubroutineType(DBuilder.getOrCreateTypeArray(None));
 
-  if (const CXXMethodDecl *Method = dyn_cast<CXXMethodDecl>(D))
+  if (const auto *Method = dyn_cast<CXXMethodDecl>(D))
     return getOrCreateMethodType(Method, F);
-  if (const ObjCMethodDecl *OMethod = dyn_cast<ObjCMethodDecl>(D)) {
+
+  const auto *FTy = FnType->getAs<FunctionType>();
+  CallingConv CC = FTy ? FTy->getCallConv() : CallingConv::CC_C;
+
+  if (const auto *OMethod = dyn_cast<ObjCMethodDecl>(D)) {
     // Add "self" and "_cmd"
     SmallVector<llvm::Metadata *, 16> Elts;
 
@@ -2645,28 +2827,28 @@ llvm::DISubroutineType *CGDebugInfo::getOrCreateFunctionType(const Decl *D,
     Elts.push_back(DBuilder.createArtificialType(
         getOrCreateType(CGM.getContext().getObjCSelType(), F)));
     // Get rest of the arguments.
-    for (const auto *PI : OMethod->params())
+    for (const auto *PI : OMethod->parameters())
       Elts.push_back(getOrCreateType(PI->getType(), F));
     // Variadic methods need a special marker at the end of the type list.
     if (OMethod->isVariadic())
       Elts.push_back(DBuilder.createUnspecifiedParameter());
 
     llvm::DITypeRefArray EltTypeArray = DBuilder.getOrCreateTypeArray(Elts);
-    return DBuilder.createSubroutineType(EltTypeArray);
+    return DBuilder.createSubroutineType(EltTypeArray, 0, getDwarfCC(CC));
   }
 
   // Handle variadic function types; they need an additional
   // unspecified parameter.
-  if (const FunctionDecl *FD = dyn_cast<FunctionDecl>(D))
+  if (const auto *FD = dyn_cast<FunctionDecl>(D))
     if (FD->isVariadic()) {
       SmallVector<llvm::Metadata *, 16> EltTys;
       EltTys.push_back(getOrCreateType(FD->getReturnType(), F));
-      if (const FunctionProtoType *FPT = dyn_cast<FunctionProtoType>(FnType))
-        for (unsigned i = 0, e = FPT->getNumParams(); i != e; ++i)
-          EltTys.push_back(getOrCreateType(FPT->getParamType(i), F));
+      if (const auto *FPT = dyn_cast<FunctionProtoType>(FnType))
+        for (QualType ParamType : FPT->param_types())
+          EltTys.push_back(getOrCreateType(ParamType, F));
       EltTys.push_back(DBuilder.createUnspecifiedParameter());
       llvm::DITypeRefArray EltTypeArray = DBuilder.getOrCreateTypeArray(EltTys);
-      return DBuilder.createSubroutineType(EltTypeArray);
+      return DBuilder.createSubroutineType(EltTypeArray, 0, getDwarfCC(CC));
     }
 
   return cast<llvm::DISubroutineType>(getOrCreateType(FnType, F));
@@ -2691,7 +2873,7 @@ void CGDebugInfo::EmitFunctionStart(GlobalDecl GD, SourceLocation Loc,
   if (!HasDecl) {
     // Use llvm function name.
     LinkageName = Fn->getName();
-  } else if (const FunctionDecl *FD = dyn_cast<FunctionDecl>(D)) {
+  } else if (const auto *FD = dyn_cast<FunctionDecl>(D)) {
     // If there is a subprogram for this function available then use it.
     auto FI = SPCache.find(FD->getCanonicalDecl());
     if (FI != SPCache.end()) {
@@ -2704,7 +2886,7 @@ void CGDebugInfo::EmitFunctionStart(GlobalDecl GD, SourceLocation Loc,
     }
     collectFunctionDeclProps(GD, Unit, Name, LinkageName, FDContext,
                              TParamsArray, Flags);
-  } else if (const ObjCMethodDecl *OMD = dyn_cast<ObjCMethodDecl>(D)) {
+  } else if (const auto *OMD = dyn_cast<ObjCMethodDecl>(D)) {
     Name = getObjCMethodName(OMD);
     Flags |= llvm::DINode::FlagPrototyped;
   } else {
@@ -2712,7 +2894,7 @@ void CGDebugInfo::EmitFunctionStart(GlobalDecl GD, SourceLocation Loc,
     Name = Fn->getName();
     Flags |= llvm::DINode::FlagPrototyped;
   }
-  if (!Name.empty() && Name[0] == '\01')
+  if (Name.startswith("\01"))
     Name = Name.substr(1);
 
   if (!HasDecl || D->isImplicit()) {
@@ -2731,7 +2913,7 @@ void CGDebugInfo::EmitFunctionStart(GlobalDecl GD, SourceLocation Loc,
   // are emitted as CU level entities by the backend.
   llvm::DISubprogram *SP = DBuilder.createFunction(
       FDContext, Name, LinkageName, Unit, LineNo,
-      getOrCreateFunctionType(D, FnType, Unit), Fn->hasInternalLinkage(),
+      getOrCreateFunctionType(D, FnType, Unit), Fn->hasLocalLinkage(),
       true /*definition*/, ScopeLine, Flags, CGM.getLangOpts().Optimize,
       TParamsArray.get(), getFunctionDeclaration(D));
   Fn->setSubprogram(SP);
@@ -2739,7 +2921,7 @@ void CGDebugInfo::EmitFunctionStart(GlobalDecl GD, SourceLocation Loc,
   // code for the initialization of globals. Do not record these decls
   // as they will overwrite the actual VarDecl Decl in the cache.
   if (HasDecl && isa<FunctionDecl>(D))
-    DeclCache[D->getCanonicalDecl()].reset(static_cast<llvm::Metadata *>(SP));
+    DeclCache[D->getCanonicalDecl()].reset(SP);
 
   // Push the function onto the lexical block stack.
   LexicalBlockStack.emplace_back(SP);
@@ -2765,7 +2947,7 @@ void CGDebugInfo::EmitFunctionDecl(GlobalDecl GD, SourceLocation Loc,
     // If there is a DISubprogram for this function available then use it.
     collectFunctionDeclProps(GD, Unit, Name, LinkageName, FDContext,
                              TParamsArray, Flags);
-  } else if (const ObjCMethodDecl *OMD = dyn_cast<ObjCMethodDecl>(D)) {
+  } else if (const auto *OMD = dyn_cast<ObjCMethodDecl>(D)) {
     Name = getObjCMethodName(OMD);
     Flags |= llvm::DINode::FlagPrototyped;
   } else {
@@ -2783,11 +2965,11 @@ void CGDebugInfo::EmitFunctionDecl(GlobalDecl GD, SourceLocation Loc,
   unsigned LineNo = getLineNumber(Loc);
   unsigned ScopeLine = 0;
 
-  DBuilder.createFunction(FDContext, Name, LinkageName, Unit, LineNo,
-                          getOrCreateFunctionType(D, FnType, Unit),
-                          false /*internalLinkage*/, true /*definition*/,
-                          ScopeLine, Flags, CGM.getLangOpts().Optimize,
-                          TParamsArray.get(), getFunctionDeclaration(D));
+  DBuilder.retainType(DBuilder.createFunction(
+      FDContext, Name, LinkageName, Unit, LineNo,
+      getOrCreateFunctionType(D, FnType, Unit), false /*internalLinkage*/,
+      false /*definition*/, ScopeLine, Flags, CGM.getLangOpts().Optimize,
+      TParamsArray.get(), getFunctionDeclaration(D)));
 }
 
 void CGDebugInfo::EmitLocation(CGBuilderTy &Builder, SourceLocation Loc) {
@@ -2820,7 +3002,7 @@ void CGDebugInfo::EmitLexicalBlockStart(CGBuilderTy &Builder,
   Builder.SetCurrentDebugLocation(llvm::DebugLoc::get(
       getLineNumber(Loc), getColumnNumber(Loc), LexicalBlockStack.back()));
 
-  if (DebugKind <= CodeGenOptions::DebugLineTablesOnly)
+  if (DebugKind <= codegenoptions::DebugLineTablesOnly)
     return;
 
   // Create a new lexical block and push it on the stack.
@@ -2834,7 +3016,7 @@ void CGDebugInfo::EmitLexicalBlockEnd(CGBuilderTy &Builder,
   // Provide an entry in the line table for the end of the block.
   EmitLocation(Builder, Loc);
 
-  if (DebugKind <= CodeGenOptions::DebugLineTablesOnly)
+  if (DebugKind <= codegenoptions::DebugLineTablesOnly)
     return;
 
   LexicalBlockStack.pop_back();
@@ -2896,8 +3078,7 @@ llvm::DIType *CGDebugInfo::EmitTypeForVarWithBlocksAttr(const VarDecl *VD,
                   CGM.getTarget().getPointerAlign(0))) {
     CharUnits FieldOffsetInBytes =
         CGM.getContext().toCharUnitsFromBits(FieldOffset);
-    CharUnits AlignedOffsetInBytes =
-        FieldOffsetInBytes.RoundUpToAlignment(Align);
+    CharUnits AlignedOffsetInBytes = FieldOffsetInBytes.alignTo(Align);
     CharUnits NumPaddingBytes = AlignedOffsetInBytes - FieldOffsetInBytes;
 
     if (NumPaddingBytes.isPositive()) {
@@ -2930,8 +3111,10 @@ llvm::DIType *CGDebugInfo::EmitTypeForVarWithBlocksAttr(const VarDecl *VD,
 void CGDebugInfo::EmitDeclare(const VarDecl *VD, llvm::Value *Storage,
                               llvm::Optional<unsigned> ArgNo,
                               CGBuilderTy &Builder) {
-  assert(DebugKind >= CodeGenOptions::LimitedDebugInfo);
+  assert(DebugKind >= codegenoptions::LimitedDebugInfo);
   assert(!LexicalBlockStack.empty() && "Region stack mismatch, stack empty!");
+  if (VD->hasAttr<NoDebugAttr>())
+    return;
 
   bool Unwritten =
       VD->isImplicit() || (isa<Decl>(VD->getDeclContext()) &&
@@ -2969,7 +3152,7 @@ void CGDebugInfo::EmitDeclare(const VarDecl *VD, llvm::Value *Storage,
   // otherwise it is 'self' or 'this'.
   if (isa<ImplicitParamDecl>(VD) && ArgNo && *ArgNo == 1)
     Flags |= llvm::DINode::FlagObjectPointer;
-  if (llvm::Argument *Arg = dyn_cast<llvm::Argument>(Storage))
+  if (auto *Arg = dyn_cast<llvm::Argument>(Storage))
     if (Arg->getType()->isPointerTy() && !Arg->hasByValAttr() &&
         !VD->getType()->isPointerType())
       Expr.push_back(llvm::dwarf::DW_OP_deref);
@@ -3005,10 +3188,10 @@ void CGDebugInfo::EmitDeclare(const VarDecl *VD, llvm::Value *Storage,
       return;
     } else if (isa<VariableArrayType>(VD->getType()))
       Expr.push_back(llvm::dwarf::DW_OP_deref);
-  } else if (const RecordType *RT = dyn_cast<RecordType>(VD->getType())) {
+  } else if (const auto *RT = dyn_cast<RecordType>(VD->getType())) {
     // If VD is an anonymous union then Storage represents value for
     // all union fields.
-    const RecordDecl *RD = cast<RecordDecl>(RT->getDecl());
+    const auto *RD = cast<RecordDecl>(RT->getDecl());
     if (RD->isUnion() && RD->isAnonymousStructOrUnion()) {
       // GDB has trouble finding local variables in anonymous unions, so we emit
       // artifical local variables for each of the members.
@@ -3056,7 +3239,7 @@ void CGDebugInfo::EmitDeclare(const VarDecl *VD, llvm::Value *Storage,
 void CGDebugInfo::EmitDeclareOfAutoVariable(const VarDecl *VD,
                                             llvm::Value *Storage,
                                             CGBuilderTy &Builder) {
-  assert(DebugKind >= CodeGenOptions::LimitedDebugInfo);
+  assert(DebugKind >= codegenoptions::LimitedDebugInfo);
   EmitDeclare(VD, Storage, llvm::None, Builder);
 }
 
@@ -3071,11 +3254,13 @@ llvm::DIType *CGDebugInfo::CreateSelfType(const QualType &QualTy,
 void CGDebugInfo::EmitDeclareOfBlockDeclRefVariable(
     const VarDecl *VD, llvm::Value *Storage, CGBuilderTy &Builder,
     const CGBlockInfo &blockInfo, llvm::Instruction *InsertPoint) {
-  assert(DebugKind >= CodeGenOptions::LimitedDebugInfo);
+  assert(DebugKind >= codegenoptions::LimitedDebugInfo);
   assert(!LexicalBlockStack.empty() && "Region stack mismatch, stack empty!");
 
   if (Builder.GetInsertBlock() == nullptr)
     return;
+  if (VD->hasAttr<NoDebugAttr>())
+    return;
 
   bool isByRef = VD->hasAttr<BlocksAttr>();
 
@@ -3139,7 +3324,7 @@ void CGDebugInfo::EmitDeclareOfBlockDeclRefVariable(
 void CGDebugInfo::EmitDeclareOfArgVariable(const VarDecl *VD, llvm::Value *AI,
                                            unsigned ArgNo,
                                            CGBuilderTy &Builder) {
-  assert(DebugKind >= CodeGenOptions::LimitedDebugInfo);
+  assert(DebugKind >= codegenoptions::LimitedDebugInfo);
   EmitDeclare(VD, AI, ArgNo, Builder);
 }
 
@@ -3158,7 +3343,7 @@ void CGDebugInfo::EmitDeclareOfBlockLiteralArgVariable(const CGBlockInfo &block,
                                                        unsigned ArgNo,
                                                        llvm::Value *LocalAddr,
                                                        CGBuilderTy &Builder) {
-  assert(DebugKind >= CodeGenOptions::LimitedDebugInfo);
+  assert(DebugKind >= codegenoptions::LimitedDebugInfo);
   ASTContext &C = CGM.getContext();
   const BlockDecl *blockDecl = block.getBlockDecl();
 
@@ -3175,25 +3360,25 @@ void CGDebugInfo::EmitDeclareOfBlockLiteralArgVariable(const CGBlockInfo &block,
       CGM.getDataLayout().getStructLayout(block.StructureType);
 
   SmallVector<llvm::Metadata *, 16> fields;
-  fields.push_back(createFieldType("__isa", C.VoidPtrTy, 0, loc, AS_public,
+  fields.push_back(createFieldType("__isa", C.VoidPtrTy, loc, AS_public,
                                    blockLayout->getElementOffsetInBits(0),
                                    tunit, tunit));
-  fields.push_back(createFieldType("__flags", C.IntTy, 0, loc, AS_public,
+  fields.push_back(createFieldType("__flags", C.IntTy, loc, AS_public,
                                    blockLayout->getElementOffsetInBits(1),
                                    tunit, tunit));
-  fields.push_back(createFieldType("__reserved", C.IntTy, 0, loc, AS_public,
+  fields.push_back(createFieldType("__reserved", C.IntTy, loc, AS_public,
                                    blockLayout->getElementOffsetInBits(2),
                                    tunit, tunit));
   auto *FnTy = block.getBlockExpr()->getFunctionType();
   auto FnPtrType = CGM.getContext().getPointerType(FnTy->desugar());
-  fields.push_back(createFieldType("__FuncPtr", FnPtrType, 0, loc, AS_public,
+  fields.push_back(createFieldType("__FuncPtr", FnPtrType, loc, AS_public,
                                    blockLayout->getElementOffsetInBits(3),
                                    tunit, tunit));
   fields.push_back(createFieldType(
       "__descriptor", C.getPointerType(block.NeedsCopyDispose
                                            ? C.getBlockDescriptorExtendedType()
                                            : C.getBlockDescriptorType()),
-      0, loc, AS_public, blockLayout->getElementOffsetInBits(4), tunit, tunit));
+      loc, AS_public, blockLayout->getElementOffsetInBits(4), tunit, tunit));
 
   // We want to sort the captures by offset, not because DWARF
   // requires this, but because we're paranoid about debuggers.
@@ -3227,19 +3412,22 @@ void CGDebugInfo::EmitDeclareOfBlockLiteralArgVariable(const CGBlockInfo &block,
   // Sort by offset.
   llvm::array_pod_sort(chunks.begin(), chunks.end());
 
-  for (SmallVectorImpl<BlockLayoutChunk>::iterator i = chunks.begin(),
-                                                   e = chunks.end();
-       i != e; ++i) {
-    uint64_t offsetInBits = i->OffsetInBits;
-    const BlockDecl::Capture *capture = i->Capture;
+  for (const BlockLayoutChunk &Chunk : chunks) {
+    uint64_t offsetInBits = Chunk.OffsetInBits;
+    const BlockDecl::Capture *capture = Chunk.Capture;
 
     // If we have a null capture, this must be the C++ 'this' capture.
     if (!capture) {
-      const CXXMethodDecl *method =
-          cast<CXXMethodDecl>(blockDecl->getNonClosureContext());
-      QualType type = method->getThisType(C);
+      QualType type;
+      if (auto *Method =
+              cast_or_null<CXXMethodDecl>(blockDecl->getNonClosureContext()))
+        type = Method->getThisType(C);
+      else if (auto *RDecl = dyn_cast<CXXRecordDecl>(blockDecl->getParent()))
+        type = QualType(RDecl->getTypeForDecl(), 0);
+      else
+        llvm_unreachable("unexpected block declcontext");
 
-      fields.push_back(createFieldType("this", type, 0, loc, AS_public,
+      fields.push_back(createFieldType("this", type, loc, AS_public,
                                        offsetInBits, tunit, tunit));
       continue;
     }
@@ -3259,7 +3447,7 @@ void CGDebugInfo::EmitDeclareOfBlockLiteralArgVariable(const CGBlockInfo &block,
           DBuilder.createMemberType(tunit, name, tunit, line, PtrInfo.Width,
                                     PtrInfo.Align, offsetInBits, 0, fieldType);
     } else {
-      fieldType = createFieldType(name, variable->getType(), 0, loc, AS_public,
+      fieldType = createFieldType(name, variable->getType(), loc, AS_public,
                                   offsetInBits, tunit, tunit);
     }
     fields.push_back(fieldType);
@@ -3328,8 +3516,7 @@ llvm::DIGlobalVariable *CGDebugInfo::CollectAnonRecordDecls(
 
     // Ignore unnamed fields, but recurse into anonymous records.
     if (FieldName.empty()) {
-      const RecordType *RT = dyn_cast<RecordType>(Field->getType());
-      if (RT)
+      if (const auto *RT = dyn_cast<RecordType>(Field->getType()))
         GV = CollectAnonRecordDecls(RT->getDecl(), Unit, LineNo, LinkageName,
                                     Var, DContext);
       continue;
@@ -3337,14 +3524,16 @@ llvm::DIGlobalVariable *CGDebugInfo::CollectAnonRecordDecls(
     // Use VarDecl's Tag, Scope and Line number.
     GV = DBuilder.createGlobalVariable(DContext, FieldName, LinkageName, Unit,
                                        LineNo, FieldTy,
-                                       Var->hasInternalLinkage(), Var, nullptr);
+                                       Var->hasLocalLinkage(), Var, nullptr);
   }
   return GV;
 }
 
 void CGDebugInfo::EmitGlobalVariable(llvm::GlobalVariable *Var,
                                      const VarDecl *D) {
-  assert(DebugKind >= CodeGenOptions::LimitedDebugInfo);
+  assert(DebugKind >= codegenoptions::LimitedDebugInfo);
+  if (D->hasAttr<NoDebugAttr>())
+    return;
   // Create global variable debug descriptor.
   llvm::DIFile *Unit = nullptr;
   llvm::DIScope *DContext = nullptr;
@@ -3368,21 +3557,23 @@ void CGDebugInfo::EmitGlobalVariable(llvm::GlobalVariable *Var,
   } else {
     GV = DBuilder.createGlobalVariable(
         DContext, DeclName, LinkageName, Unit, LineNo, getOrCreateType(T, Unit),
-        Var->hasInternalLinkage(), Var,
+        Var->hasLocalLinkage(), Var,
         getOrCreateStaticDataMemberDeclarationOrNull(D));
   }
-  DeclCache[D->getCanonicalDecl()].reset(static_cast<llvm::Metadata *>(GV));
+  DeclCache[D->getCanonicalDecl()].reset(GV);
 }
 
 void CGDebugInfo::EmitGlobalVariable(const ValueDecl *VD,
                                      llvm::Constant *Init) {
-  assert(DebugKind >= CodeGenOptions::LimitedDebugInfo);
+  assert(DebugKind >= codegenoptions::LimitedDebugInfo);
+  if (VD->hasAttr<NoDebugAttr>())
+    return;
   // Create the descriptor for the variable.
   llvm::DIFile *Unit = getOrCreateFile(VD->getLocation());
   StringRef Name = VD->getName();
   llvm::DIType *Ty = getOrCreateType(VD->getType(), Unit);
-  if (const EnumConstantDecl *ECD = dyn_cast<EnumConstantDecl>(VD)) {
-    const EnumDecl *ED = cast<EnumDecl>(ECD->getDeclContext());
+  if (const auto *ECD = dyn_cast<EnumConstantDecl>(VD)) {
+    const auto *ED = cast<EnumDecl>(ECD->getDeclContext());
     assert(isa<EnumType>(ED->getTypeForDecl()) && "Enum without EnumType?");
     Ty = getOrCreateType(QualType(ED->getTypeForDecl(), 0), Unit);
   }
@@ -3400,6 +3591,9 @@ void CGDebugInfo::EmitGlobalVariable(const ValueDecl *VD,
     auto *RD = cast<RecordDecl>(VarD->getDeclContext());
     getDeclContextDescriptor(VarD);
     // Ensure that the type is retained even though it's otherwise unreferenced.
+    //
+    // FIXME: This is probably unnecessary, since Ty should reference RD
+    // through its scope.
     RetainedTypes.push_back(
         CGM.getContext().getRecordType(RD).getAsOpaquePtr());
     return;
@@ -3423,7 +3617,7 @@ llvm::DIScope *CGDebugInfo::getCurrentContextDescriptor(const Decl *D) {
 }
 
 void CGDebugInfo::EmitUsingDirective(const UsingDirectiveDecl &UD) {
-  if (CGM.getCodeGenOpts().getDebugInfo() < CodeGenOptions::LimitedDebugInfo)
+  if (CGM.getCodeGenOpts().getDebugInfo() < codegenoptions::LimitedDebugInfo)
     return;
   const NamespaceDecl *NSDecl = UD.getNominatedNamespace();
   if (!NSDecl->isAnonymousNamespace() || 
@@ -3436,13 +3630,23 @@ void CGDebugInfo::EmitUsingDirective(const UsingDirectiveDecl &UD) {
 }
 
 void CGDebugInfo::EmitUsingDecl(const UsingDecl &UD) {
-  if (CGM.getCodeGenOpts().getDebugInfo() < CodeGenOptions::LimitedDebugInfo)
+  if (CGM.getCodeGenOpts().getDebugInfo() < codegenoptions::LimitedDebugInfo)
     return;
   assert(UD.shadow_size() &&
          "We shouldn't be codegening an invalid UsingDecl containing no decls");
   // Emitting one decl is sufficient - debuggers can detect that this is an
   // overloaded name & provide lookup for all the overloads.
   const UsingShadowDecl &USD = **UD.shadow_begin();
+
+  // FIXME: Skip functions with undeduced auto return type for now since we
+  // don't currently have the plumbing for separate declarations & definitions
+  // of free functions and mismatched types (auto in the declaration, concrete
+  // return type in the definition)
+  if (const auto *FD = dyn_cast<FunctionDecl>(USD.getUnderlyingDecl()))
+    if (const auto *AT =
+            FD->getType()->getAs<FunctionProtoType>()->getContainedAutoType())
+      if (AT->getDeducedType().isNull())
+        return;
   if (llvm::DINode *Target =
           getDeclarationOrDefinition(USD.getUnderlyingDecl()))
     DBuilder.createImportedDeclaration(
@@ -3451,6 +3655,8 @@ void CGDebugInfo::EmitUsingDecl(const UsingDecl &UD) {
 }
 
 void CGDebugInfo::EmitImportDecl(const ImportDecl &ID) {
+  if (CGM.getCodeGenOpts().getDebuggerTuning() != llvm::DebuggerKind::LLDB)
+    return;
   if (Module *M = ID.getImportedModule()) {
     auto Info = ExternalASTSource::ASTSourceDescriptor(*M);
     DBuilder.createImportedDeclaration(
@@ -3462,13 +3668,13 @@ void CGDebugInfo::EmitImportDecl(const ImportDecl &ID) {
 
 llvm::DIImportedEntity *
 CGDebugInfo::EmitNamespaceAlias(const NamespaceAliasDecl &NA) {
-  if (CGM.getCodeGenOpts().getDebugInfo() < CodeGenOptions::LimitedDebugInfo)
+  if (CGM.getCodeGenOpts().getDebugInfo() < codegenoptions::LimitedDebugInfo)
     return nullptr;
   auto &VH = NamespaceAliasCache[&NA];
   if (VH)
     return cast<llvm::DIImportedEntity>(VH);
   llvm::DIImportedEntity *R;
-  if (const NamespaceAliasDecl *Underlying =
+  if (const auto *Underlying =
           dyn_cast<NamespaceAliasDecl>(NA.getAliasedNamespace()))
     // This could cache & dedup here rather than relying on metadata deduping.
     R = DBuilder.createImportedDeclaration(
@@ -3557,7 +3763,7 @@ void CGDebugInfo::finalize() {
 }
 
 void CGDebugInfo::EmitExplicitCastType(QualType Ty) {
-  if (CGM.getCodeGenOpts().getDebugInfo() < CodeGenOptions::LimitedDebugInfo)
+  if (CGM.getCodeGenOpts().getDebugInfo() < codegenoptions::LimitedDebugInfo)
     return;
 
   if (auto *DieTy = getOrCreateType(Ty, getOrCreateMainFile()))
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGDebugInfo.h b/contrib/llvm/tools/clang/lib/CodeGen/CGDebugInfo.h
index a68dd33fa5fe..366dd81ac812 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGDebugInfo.h
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGDebugInfo.h
@@ -16,6 +16,7 @@
 
 #include "CGBuilder.h"
 #include "clang/AST/Expr.h"
+#include "clang/AST/ExternalASTSource.h"
 #include "clang/AST/Type.h"
 #include "clang/Basic/SourceLocation.h"
 #include "clang/Frontend/CodeGenOptions.h"
@@ -52,28 +53,20 @@ class CGDebugInfo {
   friend class ApplyDebugLocation;
   friend class SaveAndRestoreLocation;
   CodeGenModule &CGM;
-  const CodeGenOptions::DebugInfoKind DebugKind;
+  const codegenoptions::DebugInfoKind DebugKind;
   bool DebugTypeExtRefs;
   llvm::DIBuilder DBuilder;
   llvm::DICompileUnit *TheCU = nullptr;
   ModuleMap *ClangModuleMap = nullptr;
+  ExternalASTSource::ASTSourceDescriptor PCHDescriptor;
   SourceLocation CurLoc;
   llvm::DIType *VTablePtrType = nullptr;
   llvm::DIType *ClassTy = nullptr;
   llvm::DICompositeType *ObjTy = nullptr;
   llvm::DIType *SelTy = nullptr;
-  llvm::DIType *OCLImage1dDITy = nullptr;
-  llvm::DIType *OCLImage1dArrayDITy = nullptr;
-  llvm::DIType *OCLImage1dBufferDITy = nullptr;
-  llvm::DIType *OCLImage2dDITy = nullptr;
-  llvm::DIType *OCLImage2dArrayDITy = nullptr;
-  llvm::DIType *OCLImage2dDepthDITy = nullptr;
-  llvm::DIType *OCLImage2dArrayDepthDITy = nullptr;
-  llvm::DIType *OCLImage2dMSAADITy = nullptr;
-  llvm::DIType *OCLImage2dArrayMSAADITy = nullptr;
-  llvm::DIType *OCLImage2dMSAADepthDITy = nullptr;
-  llvm::DIType *OCLImage2dArrayMSAADepthDITy = nullptr;
-  llvm::DIType *OCLImage3dDITy = nullptr;
+#define IMAGE_TYPE(ImgType, Id, SingletonId, Access, Suffix) \
+  llvm::DIType *SingletonId = nullptr;
+#include "clang/Basic/OpenCLImageTypes.def"
   llvm::DIType *OCLEventDITy = nullptr;
   llvm::DIType *OCLClkEventDITy = nullptr;
   llvm::DIType *OCLQueueDITy = nullptr;
@@ -107,7 +100,7 @@ class CGDebugInfo {
   /// compilation.
   std::vector<std::pair<const TagType *, llvm::TrackingMDRef>> ReplaceMap;
 
-  /// Cache of replaceable forward declarartions (functions and
+  /// Cache of replaceable forward declarations (functions and
   /// variables) to RAUW at the end of compilation.
   std::vector<std::pair<const DeclaratorDecl *, llvm::TrackingMDRef>>
       FwdDeclReplaceMap;
@@ -239,11 +232,16 @@ class CGDebugInfo {
                            llvm::DIFile *F);
 
   llvm::DIType *createFieldType(StringRef name, QualType type,
-                                uint64_t sizeInBitsOverride, SourceLocation loc,
-                                AccessSpecifier AS, uint64_t offsetInBits,
-                                llvm::DIFile *tunit, llvm::DIScope *scope,
+                                SourceLocation loc, AccessSpecifier AS,
+                                uint64_t offsetInBits, llvm::DIFile *tunit,
+                                llvm::DIScope *scope,
                                 const RecordDecl *RD = nullptr);
 
+  /// Create new bit field member.
+  llvm::DIType *createBitFieldType(const FieldDecl *BitFieldDecl,
+                                   llvm::DIScope *RecordTy,
+                                   const RecordDecl *RD);
+
   /// Helpers for collecting fields of a record.
   /// @{
   void CollectRecordLambdaFields(const CXXRecordDecl *CXXDecl,
@@ -275,6 +273,8 @@ public:
 
   void finalize();
 
+  /// Module debugging: Support for building PCMs.
+  /// @{
   /// Set the main CU's DwoId field to \p Signature.
   void setDwoId(uint64_t Signature);
 
@@ -283,6 +283,14 @@ public:
   /// the module of origin of each Decl.
   void setModuleMap(ModuleMap &MMap) { ClangModuleMap = &MMap; }
 
+  /// When generating debug information for a clang module or
+  /// precompiled header, this module map will be used to determine
+  /// the module of origin of each Decl.
+  void setPCHDescriptor(ExternalASTSource::ASTSourceDescriptor PCH) {
+    PCHDescriptor = PCH;
+  }
+  /// @}
+
   /// Update the current source location. If \arg loc is invalid it is
   /// ignored.
   void setLocation(SourceLocation Loc);
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGDecl.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGDecl.cpp
index b78e80d79ddd..89407cd70c3d 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGDecl.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGDecl.cpp
@@ -16,11 +16,13 @@
 #include "CGCleanup.h"
 #include "CGDebugInfo.h"
 #include "CGOpenCLRuntime.h"
+#include "CGOpenMPRuntime.h"
 #include "CodeGenModule.h"
 #include "clang/AST/ASTContext.h"
 #include "clang/AST/CharUnits.h"
 #include "clang/AST/Decl.h"
 #include "clang/AST/DeclObjC.h"
+#include "clang/AST/DeclOpenMP.h"
 #include "clang/Basic/SourceManager.h"
 #include "clang/Basic/TargetInfo.h"
 #include "clang/CodeGen/CGFunctionInfo.h"
@@ -29,10 +31,10 @@
 #include "llvm/IR/GlobalVariable.h"
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/Type.h"
+
 using namespace clang;
 using namespace CodeGen;
 
-
 void CodeGenFunction::EmitDecl(const Decl &D) {
   switch (D.getKind()) {
   case Decl::BuiltinTemplate:
@@ -71,6 +73,8 @@ void CodeGenFunction::EmitDecl(const Decl &D) {
   case Decl::ObjCImplementation:
   case Decl::ObjCProperty:
   case Decl::ObjCCompatibleAlias:
+  case Decl::PragmaComment:
+  case Decl::PragmaDetectMismatch:
   case Decl::AccessSpec:
   case Decl::LinkageSpec:
   case Decl::ObjCPropertyImpl:
@@ -81,6 +85,7 @@ void CodeGenFunction::EmitDecl(const Decl &D) {
   case Decl::Captured:
   case Decl::ClassScopeFunctionSpecialization:
   case Decl::UsingShadow:
+  case Decl::ConstructorUsingShadow:
   case Decl::ObjCTypeParam:
     llvm_unreachable("Declaration should not be in declstmts!");
   case Decl::Function:  // void X();
@@ -92,6 +97,7 @@ void CodeGenFunction::EmitDecl(const Decl &D) {
   case Decl::Label:        // __label__ x;
   case Decl::Import:
   case Decl::OMPThreadPrivate:
+  case Decl::OMPCapturedExpr:
   case Decl::Empty:
     // None of these decls require codegen support.
     return;
@@ -115,6 +121,9 @@ void CodeGenFunction::EmitDecl(const Decl &D) {
     return EmitVarDecl(VD);
   }
 
+  case Decl::OMPDeclareReduction:
+    return CGM.EmitOMPDeclareReduction(cast<OMPDeclareReductionDecl>(&D), this);
+
   case Decl::Typedef:      // typedef int X;
   case Decl::TypeAlias: {  // using X = int; [C++0x]
     const TypedefNameDecl &TD = cast<TypedefNameDecl>(D);
@@ -363,8 +372,15 @@ void CodeGenFunction::EmitStaticVarDecl(const VarDecl &D,
 
   llvm::GlobalVariable *var =
     cast<llvm::GlobalVariable>(addr->stripPointerCasts());
+
+  // CUDA's local and local static __shared__ variables should not
+  // have any non-empty initializers. This is ensured by Sema.
+  // Whatever initializer such variable may have when it gets here is
+  // a no-op and should not be emitted.
+  bool isCudaSharedVar = getLangOpts().CUDA && getLangOpts().CUDAIsDevice &&
+                         D.hasAttr<CUDASharedAttr>();
   // If this value has an initializer, emit it.
-  if (D.getInit())
+  if (D.getInit() && !isCudaSharedVar)
     var = AddInitializerToStaticVarDecl(D, var);
 
   var->setAlignment(alignment.getQuantity());
@@ -394,7 +410,7 @@ void CodeGenFunction::EmitStaticVarDecl(const VarDecl &D,
   // Emit global variable debug descriptor for static vars.
   CGDebugInfo *DI = getDebugInfo();
   if (DI &&
-      CGM.getCodeGenOpts().getDebugInfo() >= CodeGenOptions::LimitedDebugInfo) {
+      CGM.getCodeGenOpts().getDebugInfo() >= codegenoptions::LimitedDebugInfo) {
     DI->setLocation(D.getLocation());
     DI->EmitGlobalVariable(var, &D);
   }
@@ -513,20 +529,7 @@ namespace {
       CGF.EmitCall(FnInfo, CleanupFn, ReturnValueSlot(), Args);
     }
   };
-
-  /// A cleanup to call @llvm.lifetime.end.
-  class CallLifetimeEnd final : public EHScopeStack::Cleanup {
-    llvm::Value *Addr;
-    llvm::Value *Size;
-  public:
-    CallLifetimeEnd(Address addr, llvm::Value *size)
-      : Addr(addr.getPointer()), Size(size) {}
-
-    void Emit(CodeGenFunction &CGF, Flags flags) override {
-      CGF.EmitLifetimeEnd(Size, Addr);
-    }
-  };
-}
+} // end anonymous namespace
 
 /// EmitAutoVarWithLifetime - Does the setup required for an automatic
 /// variable with lifetime.
@@ -644,7 +647,6 @@ static bool tryEmitARCCopyWeakInit(CodeGenFunction &CGF,
     }
 
     init = castExpr->getSubExpr();
-    continue;
   }
   return false;
 }
@@ -665,10 +667,10 @@ void CodeGenFunction::EmitScalarInit(const Expr *init, const ValueDecl *D,
     EmitStoreThroughLValue(RValue::get(value), lvalue, true);
     return;
   }
-  
+
   if (const CXXDefaultInitExpr *DIE = dyn_cast<CXXDefaultInitExpr>(init))
     init = DIE->getExpr();
-    
+
   // If we're emitting a value with lifetime, we have to do the
   // initialization *before* we leave the cleanup scopes.
   if (const ExprWithCleanups *ewc = dyn_cast<ExprWithCleanups>(init)) {
@@ -715,8 +717,7 @@ void CodeGenFunction::EmitScalarInit(const Expr *init, const ValueDecl *D,
     llvm_unreachable("present but none");
 
   case Qualifiers::OCL_ExplicitNone:
-    // nothing to do
-    value = EmitScalarExpr(init);
+    value = EmitARCUnsafeUnretainedScalarExpr(init);
     break;
 
   case Qualifiers::OCL_Strong: {
@@ -819,7 +820,7 @@ static bool canEmitInitWithFewStoresAfterMemset(llvm::Constant *Init,
     }
     return true;
   }
-  
+
   if (llvm::ConstantDataSequential *CDS =
         dyn_cast<llvm::ConstantDataSequential>(Init)) {
     for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) {
@@ -848,9 +849,9 @@ static void emitStoresForInitAfterMemset(llvm::Constant *Init, llvm::Value *Loc,
     Builder.CreateDefaultAlignedStore(Init, Loc, isVolatile);
     return;
   }
-  
-  if (llvm::ConstantDataSequential *CDS = 
-        dyn_cast<llvm::ConstantDataSequential>(Init)) {
+
+  if (llvm::ConstantDataSequential *CDS =
+          dyn_cast<llvm::ConstantDataSequential>(Init)) {
     for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) {
       llvm::Constant *Elt = CDS->getElementAsConstant(i);
 
@@ -877,7 +878,6 @@ static void emitStoresForInitAfterMemset(llvm::Constant *Init, llvm::Value *Loc,
   }
 }
 
-
 /// shouldUseMemSetPlusStoresToInitialize - Decide whether we should use memset
 /// plus some stores to initialize a local variable instead of using a memcpy
 /// from a constant global.  It is beneficial to use memset if the global is all
@@ -907,18 +907,29 @@ void CodeGenFunction::EmitAutoVarDecl(const VarDecl &D) {
   EmitAutoVarCleanups(emission);
 }
 
+/// shouldEmitLifetimeMarkers - Decide whether we need emit the life-time
+/// markers.
+static bool shouldEmitLifetimeMarkers(const CodeGenOptions &CGOpts,
+                                      const LangOptions &LangOpts) {
+  // Asan uses markers for use-after-scope checks.
+  if (CGOpts.SanitizeAddressUseAfterScope)
+    return true;
+
+  // Disable lifetime markers in msan builds.
+  // FIXME: Remove this when msan works with lifetime markers.
+  if (LangOpts.Sanitize.has(SanitizerKind::Memory))
+    return false;
+
+  // For now, only in optimized builds.
+  return CGOpts.OptimizationLevel != 0;
+}
+
 /// Emit a lifetime.begin marker if some criteria are satisfied.
 /// \return a pointer to the temporary size Value if a marker was emitted, null
 /// otherwise
 llvm::Value *CodeGenFunction::EmitLifetimeStart(uint64_t Size,
                                                 llvm::Value *Addr) {
-  // For now, only in optimized builds.
-  if (CGM.getCodeGenOpts().OptimizationLevel == 0)
-    return nullptr;
-
-  // Disable lifetime markers in msan builds.
-  // FIXME: Remove this when msan works with lifetime markers.
-  if (getLangOpts().Sanitize.has(SanitizerKind::Memory))
+  if (!shouldEmitLifetimeMarkers(CGM.getCodeGenOpts(), getLangOpts()))
     return nullptr;
 
   llvm::Value *SizeV = llvm::ConstantInt::get(Int64Ty, Size);
@@ -1086,8 +1097,8 @@ CodeGenFunction::EmitAutoVarAlloca(const VarDecl &D) {
   // Emit debug info for local var declaration.
   if (HaveInsertPoint())
     if (CGDebugInfo *DI = getDebugInfo()) {
-      if (CGM.getCodeGenOpts().getDebugInfo()
-            >= CodeGenOptions::LimitedDebugInfo) {
+      if (CGM.getCodeGenOpts().getDebugInfo() >=
+          codegenoptions::LimitedDebugInfo) {
         DI->setLocation(D.getLocation());
         DI->EmitDeclareOfAutoVariable(&D, address.getPointer(), Builder);
       }
@@ -1163,6 +1174,7 @@ bool CodeGenFunction::isTrivialInitializer(const Expr *Init) {
 
   return false;
 }
+
 void CodeGenFunction::EmitAutoVarInit(const AutoVarEmission &emission) {
   assert(emission.Variable && "emission was not valid!");
 
@@ -1250,7 +1262,7 @@ void CodeGenFunction::EmitAutoVarInit(const AutoVarEmission &emission) {
                                llvm::GlobalValue::PrivateLinkage,
                                constant, Name);
     GV->setAlignment(Loc.getAlignment().getQuantity());
-    GV->setUnnamedAddr(true);
+    GV->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
 
     Address SrcPtr = Address(GV, Loc.getAlignment());
     if (SrcPtr.getType() != BP)
@@ -1381,13 +1393,10 @@ void CodeGenFunction::EmitAutoVarCleanups(const AutoVarEmission &emission) {
 
   // Make sure we call @llvm.lifetime.end.  This needs to happen
   // *last*, so the cleanup needs to be pushed *first*.
-  if (emission.useLifetimeMarkers()) {
-    EHStack.pushCleanup<CallLifetimeEnd>(NormalCleanup,
+  if (emission.useLifetimeMarkers())
+    EHStack.pushCleanup<CallLifetimeEnd>(NormalEHLifetimeMarker,
                                          emission.getAllocatedAddress(),
                                          emission.getSizeForLifetimeMarkers());
-    EHCleanupScope &cleanup = cast<EHCleanupScope>(*EHStack.begin());
-    cleanup.setLifetimeMarker();
-  }
 
   // Check the type for a cleanup.
   if (QualType::DestructionKind dtorKind = D.getType().isDestructedType())
@@ -1662,7 +1671,7 @@ namespace {
                               ElementType, ElementAlign, Destroyer);
     }
   };
-}
+} // end anonymous namespace
 
 /// pushIrregularPartialArrayCleanup - Push an EH cleanup to destroy
 /// already-constructed elements of the given array.  The cleanup
@@ -1731,7 +1740,7 @@ namespace {
       CGF.EmitARCRelease(Param, Precise);
     }
   };
-}
+} // end anonymous namespace
 
 /// Emit an alloca (or GlobalValue depending on target)
 /// for the specified parameter and set up LocalDeclMap.
@@ -1852,8 +1861,8 @@ void CodeGenFunction::EmitParmDecl(const VarDecl &D, ParamValue Arg,
 
   // Emit debug info for param declaration.
   if (CGDebugInfo *DI = getDebugInfo()) {
-    if (CGM.getCodeGenOpts().getDebugInfo()
-          >= CodeGenOptions::LimitedDebugInfo) {
+    if (CGM.getCodeGenOpts().getDebugInfo() >=
+        codegenoptions::LimitedDebugInfo) {
       DI->EmitDeclareOfArgVariable(&D, DeclPtr.getPointer(), ArgNo, Builder);
     }
   }
@@ -1861,3 +1870,10 @@ void CodeGenFunction::EmitParmDecl(const VarDecl &D, ParamValue Arg,
   if (D.hasAttr<AnnotateAttr>())
     EmitVarAnnotations(&D, DeclPtr.getPointer());
 }
+
+void CodeGenModule::EmitOMPDeclareReduction(const OMPDeclareReductionDecl *D,
+                                            CodeGenFunction *CGF) {
+  if (!LangOpts.OpenMP || (!LangOpts.EmitAllDecls && !D->isUsed()))
+    return;
+  getOpenMPRuntime().emitUserDefinedReduction(CGF, D);
+}
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGDeclCXX.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGDeclCXX.cpp
index adba73168797..89d142e44b49 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGDeclCXX.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGDeclCXX.cpp
@@ -86,13 +86,21 @@ static void EmitDeclDestroy(CodeGenFunction &CGF, const VarDecl &D,
   llvm::Constant *function;
   llvm::Constant *argument;
 
-  // Special-case non-array C++ destructors, where there's a function
-  // with the right signature that we can just call.
-  const CXXRecordDecl *record = nullptr;
-  if (dtorKind == QualType::DK_cxx_destructor &&
-      (record = type->getAsCXXRecordDecl())) {
-    assert(!record->hasTrivialDestructor());
-    CXXDestructorDecl *dtor = record->getDestructor();
+  // Special-case non-array C++ destructors, if they have the right signature.
+  // Under some ABIs, destructors return this instead of void, and cannot be
+  // passed directly to __cxa_atexit if the target does not allow this mismatch.
+  const CXXRecordDecl *Record = type->getAsCXXRecordDecl();
+  bool CanRegisterDestructor =
+      Record && (!CGM.getCXXABI().HasThisReturn(
+                     GlobalDecl(Record->getDestructor(), Dtor_Complete)) ||
+                 CGM.getCXXABI().canCallMismatchedFunctionType());
+  // If __cxa_atexit is disabled via a flag, a different helper function is
+  // generated elsewhere which uses atexit instead, and it takes the destructor
+  // directly.
+  bool UsingExternalHelper = !CGM.getCodeGenOpts().CXAAtExit;
+  if (Record && (CanRegisterDestructor || UsingExternalHelper)) {
+    assert(!Record->hasTrivialDestructor());
+    CXXDestructorDecl *dtor = Record->getDestructor();
 
     function = CGM.getAddrOfCXXStructor(dtor, StructorType::Complete);
     argument = llvm::ConstantExpr::getBitCast(
@@ -304,6 +312,17 @@ void
 CodeGenModule::EmitCXXGlobalVarDeclInitFunc(const VarDecl *D,
                                             llvm::GlobalVariable *Addr,
                                             bool PerformInit) {
+
+  // According to E.2.3.1 in CUDA-7.5 Programming guide: __device__,
+  // __constant__ and __shared__ variables defined in namespace scope,
+  // that are of class type, cannot have a non-empty constructor. All
+  // the checks have been done in Sema by now. Whatever initializers
+  // are allowed are empty and we just need to ignore them here.
+  if (getLangOpts().CUDA && getLangOpts().CUDAIsDevice &&
+      (D->hasAttr<CUDADeviceAttr>() || D->hasAttr<CUDAConstantAttr>() ||
+       D->hasAttr<CUDASharedAttr>()))
+    return;
+
   // Check if we've already initialized this decl.
   auto I = DelayedCXXInitPosition.find(D);
   if (I != DelayedCXXInitPosition.end() && I->second == ~0U)
@@ -587,8 +606,8 @@ llvm::Function *CodeGenFunction::generateDestroyHelper(
                         getContext().VoidPtrTy);
   args.push_back(&dst);
 
-  const CGFunctionInfo &FI = CGM.getTypes().arrangeFreeFunctionDeclaration(
-      getContext().VoidTy, args, FunctionType::ExtInfo(), /*variadic=*/false);
+  const CGFunctionInfo &FI =
+    CGM.getTypes().arrangeBuiltinFunctionDeclaration(getContext().VoidTy, args);
   llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
   llvm::Function *fn = CGM.CreateGlobalInitOrDestructFunction(
       FTy, "__cxx_global_array_dtor", FI, VD->getLocation());
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGException.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGException.cpp
index fce2e7581962..4a7dc4205e09 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGException.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGException.cpp
@@ -686,8 +686,10 @@ llvm::BasicBlock *CodeGenFunction::getInvokeDestImpl() {
   assert(EHStack.requiresLandingPad());
   assert(!EHStack.empty());
 
-  // If exceptions are disabled, there are usually no landingpads. However, when
-  // SEH is enabled, functions using SEH still get landingpads.
+  // If exceptions are disabled and SEH is not in use, then there is no invoke
+  // destination. SEH "works" even if exceptions are off. In practice, this
+  // means that C++ destructors and other EH cleanups don't run, which is
+  // consistent with MSVC's behavior.
   const LangOptions &LO = CGM.getLangOpts();
   if (!LO.Exceptions) {
     if (!LO.Borland && !LO.MicrosoftExt)
@@ -1326,11 +1328,13 @@ llvm::BasicBlock *CodeGenFunction::getTerminateHandler() {
   TerminateHandler = createBasicBlock("terminate.handler");
   Builder.SetInsertPoint(TerminateHandler);
   llvm::Value *Exn = nullptr;
+  SaveAndRestore<llvm::Instruction *> RestoreCurrentFuncletPad(
+      CurrentFuncletPad);
   if (EHPersonality::get(*this).usesFuncletPads()) {
     llvm::Value *ParentPad = CurrentFuncletPad;
     if (!ParentPad)
       ParentPad = llvm::ConstantTokenNone::get(CGM.getLLVMContext());
-    Builder.CreateCleanupPad(ParentPad);
+    CurrentFuncletPad = Builder.CreateCleanupPad(ParentPad);
   } else {
     if (getLangOpts().CPlusPlus)
       Exn = getExceptionFromSlot();
@@ -1422,12 +1426,8 @@ struct PerformSEHFinally final : EHScopeStack::Cleanup {
     Args.add(RValue::get(FP), ArgTys[1]);
 
     // Arrange a two-arg function info and type.
-    FunctionProtoType::ExtProtoInfo EPI;
-    const auto *FPT = cast<FunctionProtoType>(
-        Context.getFunctionType(Context.VoidTy, ArgTys, EPI));
     const CGFunctionInfo &FnInfo =
-        CGM.getTypes().arrangeFreeFunctionCall(Args, FPT,
-                                               /*chainCall=*/false);
+        CGM.getTypes().arrangeBuiltinFunctionCall(Context.VoidTy, Args);
 
     CGF.EmitCall(FnInfo, OutlinedFinally, ReturnValueSlot(), Args);
   }
@@ -1623,14 +1623,13 @@ void CodeGenFunction::startOutlinedSEHHelper(CodeGenFunction &ParentCGF,
   SmallString<128> Name;
   {
     llvm::raw_svector_ostream OS(Name);
-    const Decl *ParentCodeDecl = ParentCGF.CurCodeDecl;
-    const NamedDecl *Parent = dyn_cast_or_null<NamedDecl>(ParentCodeDecl);
-    assert(Parent && "FIXME: handle unnamed decls (lambdas, blocks) with SEH");
+    const FunctionDecl *ParentSEHFn = ParentCGF.CurSEHParent;
+    assert(ParentSEHFn && "No CurSEHParent!");
     MangleContext &Mangler = CGM.getCXXABI().getMangleContext();
     if (IsFilter)
-      Mangler.mangleSEHFilterExpression(Parent, OS);
+      Mangler.mangleSEHFilterExpression(ParentSEHFn, OS);
     else
-      Mangler.mangleSEHFinallyBlock(Parent, OS);
+      Mangler.mangleSEHFinallyBlock(ParentSEHFn, OS);
   }
 
   FunctionArgList Args;
@@ -1656,8 +1655,8 @@ void CodeGenFunction::startOutlinedSEHHelper(CodeGenFunction &ParentCGF,
   QualType RetTy = IsFilter ? getContext().LongTy : getContext().VoidTy;
 
   llvm::Function *ParentFn = ParentCGF.CurFn;
-  const CGFunctionInfo &FnInfo = CGM.getTypes().arrangeFreeFunctionDeclaration(
-      RetTy, Args, FunctionType::ExtInfo(), /*isVariadic=*/false);
+  const CGFunctionInfo &FnInfo =
+    CGM.getTypes().arrangeBuiltinFunctionDeclaration(RetTy, Args);
 
   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
   llvm::Function *Fn = llvm::Function::Create(
@@ -1677,6 +1676,7 @@ void CodeGenFunction::startOutlinedSEHHelper(CodeGenFunction &ParentCGF,
 
   StartFunction(GlobalDecl(), RetTy, Fn, FnInfo, Args,
                 OutlinedStmt->getLocStart(), OutlinedStmt->getLocStart());
+  CurSEHParent = ParentCGF.CurSEHParent;
 
   CGM.SetLLVMFunctionAttributes(nullptr, FnInfo, CurFn);
   EmitCapturedLocals(ParentCGF, OutlinedStmt, IsFilter);
@@ -1708,12 +1708,6 @@ CodeGenFunction::GenerateSEHFinallyFunction(CodeGenFunction &ParentCGF,
   const Stmt *FinallyBlock = Finally.getBlock();
   startOutlinedSEHHelper(ParentCGF, false, FinallyBlock);
 
-  // Mark finally block calls as nounwind and noinline to make LLVM's job a
-  // little easier.
-  // FIXME: Remove these restrictions in the future.
-  CurFn->addFnAttr(llvm::Attribute::NoUnwind);
-  CurFn->addFnAttr(llvm::Attribute::NoInline);
-
   // Emit the original filter expression, convert to i32, and return.
   EmitStmt(FinallyBlock);
 
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGExpr.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGExpr.cpp
index 507ce3d7d0ce..5f3b290d8eb1 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGExpr.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGExpr.cpp
@@ -11,13 +11,14 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "CodeGenFunction.h"
 #include "CGCXXABI.h"
 #include "CGCall.h"
+#include "CGCleanup.h"
 #include "CGDebugInfo.h"
 #include "CGObjCRuntime.h"
 #include "CGOpenMPRuntime.h"
 #include "CGRecordLayout.h"
+#include "CodeGenFunction.h"
 #include "CodeGenModule.h"
 #include "TargetInfo.h"
 #include "clang/AST/ASTContext.h"
@@ -32,6 +33,8 @@
 #include "llvm/IR/MDBuilder.h"
 #include "llvm/Support/ConvertUTF.h"
 #include "llvm/Support/MathExtras.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Transforms/Utils/SanitizerStats.h"
 
 using namespace clang;
 using namespace CodeGen;
@@ -65,8 +68,6 @@ Address CodeGenFunction::CreateTempAlloca(llvm::Type *Ty, CharUnits Align,
 /// block.
 llvm::AllocaInst *CodeGenFunction::CreateTempAlloca(llvm::Type *Ty,
                                                     const Twine &Name) {
-  if (!Builder.isNamePreserving())
-    return new llvm::AllocaInst(Ty, nullptr, "", AllocaInsertPt);
   return new llvm::AllocaInst(Ty, nullptr, Name, AllocaInsertPt);
 }
 
@@ -361,9 +362,16 @@ EmitMaterializeTemporaryExpr(const MaterializeTemporaryExpr *M) {
                            ConvertTypeForMem(E->getType())
                              ->getPointerTo(Object.getAddressSpace())),
                        Object.getAlignment());
-      // We should not have emitted the initializer for this temporary as a
-      // constant.
-      assert(!Var->hasInitializer());
+
+      // createReferenceTemporary will promote the temporary to a global with a
+      // constant initializer if it can.  It can only do this to a value of
+      // ARC-manageable type if the value is global and therefore "immune" to
+      // ref-counting operations.  Therefore we have no need to emit either a
+      // dynamic initialization or a cleanup and we can just return the address
+      // of the temporary.
+      if (Var->hasInitializer())
+        return MakeAddrLValue(Object, M->getType(), AlignmentSource::Decl);
+
       Var->setInitializer(CGM.EmitNullConstant(E->getType()));
     }
     LValue RefTempDst = MakeAddrLValue(Object, M->getType(),
@@ -416,6 +424,23 @@ EmitMaterializeTemporaryExpr(const MaterializeTemporaryExpr *M) {
       EmitAnyExprToMem(E, Object, Qualifiers(), /*IsInit*/true);
     }
   } else {
+    switch (M->getStorageDuration()) {
+    case SD_Automatic:
+    case SD_FullExpression:
+      if (auto *Size = EmitLifetimeStart(
+              CGM.getDataLayout().getTypeAllocSize(Object.getElementType()),
+              Object.getPointer())) {
+        if (M->getStorageDuration() == SD_Automatic)
+          pushCleanupAfterFullExpr<CallLifetimeEnd>(NormalEHLifetimeMarker,
+                                                    Object, Size);
+        else
+          pushFullExprCleanup<CallLifetimeEnd>(NormalEHLifetimeMarker, Object,
+                                               Size);
+      }
+      break;
+    default:
+      break;
+    }
     EmitAnyExprToMem(E, Object, Qualifiers(), /*IsInit*/true);
   }
   pushTemporaryCleanup(*this, M, E, Object);
@@ -577,7 +602,7 @@ void CodeGenFunction::EmitTypeCheck(TypeCheckKind TCK, SourceLocation Loc,
 
   if (Checks.size() > 0) {
     llvm::Constant *StaticData[] = {
-      EmitCheckSourceLocation(Loc),
+     EmitCheckSourceLocation(Loc),
       EmitCheckTypeDescriptor(Ty),
       llvm::ConstantInt::get(SizeTy, AlignVal),
       llvm::ConstantInt::get(Int8Ty, TCK)
@@ -824,7 +849,8 @@ Address CodeGenFunction::EmitPointerWithAlignment(const Expr *E,
                          getNaturalPointeeTypeAlignment(E->getType(), Source));
         }
 
-        if (SanOpts.has(SanitizerKind::CFIUnrelatedCast)) {
+        if (SanOpts.has(SanitizerKind::CFIUnrelatedCast) &&
+            CE->getCastKind() == CK_BitCast) {
           if (auto PT = E->getType()->getAs<PointerType>())
             EmitVTablePtrCheckForCast(PT->getPointeeType(), Addr.getPointer(),
                                       /*MayBeNull=*/true,
@@ -1265,10 +1291,10 @@ llvm::Value *CodeGenFunction::EmitLoadOfScalar(Address Addr, bool Volatile,
   }
 
   // Atomic operations have to be done on integral types.
-  if (Ty->isAtomicType() || typeIsSuitableForInlineAtomic(Ty, Volatile)) {
-    LValue lvalue =
+  LValue AtomicLValue =
       LValue::MakeAddr(Addr, Ty, getContext(), AlignSource, TBAAInfo);
-    return EmitAtomicLoad(lvalue, Loc).getScalarVal();
+  if (Ty->isAtomicType() || LValueIsSuitableForInlineAtomic(AtomicLValue)) {
+    return EmitAtomicLoad(AtomicLValue, Loc).getScalarVal();
   }
 
   llvm::LoadInst *Load = Builder.CreateLoad(Addr, Volatile);
@@ -1376,12 +1402,11 @@ void CodeGenFunction::EmitStoreOfScalar(llvm::Value *Value, Address Addr,
 
   Value = EmitToMemory(Value, Ty);
 
+  LValue AtomicLValue =
+      LValue::MakeAddr(Addr, Ty, getContext(), AlignSource, TBAAInfo);
   if (Ty->isAtomicType() ||
-      (!isInit && typeIsSuitableForInlineAtomic(Ty, Volatile))) {
-    EmitAtomicStore(RValue::get(Value),
-                    LValue::MakeAddr(Addr, Ty, getContext(),
-                                     AlignSource, TBAAInfo),
-                    isInit);
+      (!isInit && LValueIsSuitableForInlineAtomic(AtomicLValue))) {
+    EmitAtomicStore(RValue::get(Value), AtomicLValue, isInit);
     return;
   }
 
@@ -1733,8 +1758,7 @@ void CodeGenFunction::EmitStoreThroughExtVectorComponentLValue(RValue Src,
 
   if (const VectorType *VTy = Dst.getType()->getAs<VectorType>()) {
     unsigned NumSrcElts = VTy->getNumElements();
-    unsigned NumDstElts =
-       cast<llvm::VectorType>(Vec->getType())->getNumElements();
+    unsigned NumDstElts = Vec->getType()->getVectorNumElements();
     if (NumDstElts == NumSrcElts) {
       // Use shuffle vector is the src and destination are the same number of
       // elements and restore the vector mask since it is on the side it will be
@@ -1947,6 +1971,21 @@ LValue CodeGenFunction::EmitLoadOfReferenceLValue(Address RefAddr,
   return MakeAddrLValue(Addr, RefTy->getPointeeType(), Source);
 }
 
+Address CodeGenFunction::EmitLoadOfPointer(Address Ptr,
+                                           const PointerType *PtrTy,
+                                           AlignmentSource *Source) {
+  llvm::Value *Addr = Builder.CreateLoad(Ptr);
+  return Address(Addr, getNaturalTypeAlignment(PtrTy->getPointeeType(), Source,
+                                               /*forPointeeType=*/true));
+}
+
+LValue CodeGenFunction::EmitLoadOfPointerLValue(Address PtrAddr,
+                                                const PointerType *PtrTy) {
+  AlignmentSource Source;
+  Address Addr = EmitLoadOfPointer(PtrAddr, PtrTy, &Source);
+  return MakeAddrLValue(Addr, PtrTy->getPointeeType(), Source);
+}
+
 static LValue EmitGlobalVarDeclLValue(CodeGenFunction &CGF,
                                       const Expr *E, const VarDecl *VD) {
   QualType T = E->getType();
@@ -2066,12 +2105,11 @@ LValue CodeGenFunction::EmitDeclRefLValue(const DeclRefExpr *E) {
       if (auto *FD = LambdaCaptureFields.lookup(VD))
         return EmitCapturedFieldLValue(*this, FD, CXXABIThisValue);
       else if (CapturedStmtInfo) {
-        auto it = LocalDeclMap.find(VD);
-        if (it != LocalDeclMap.end()) {
-          if (auto RefTy = VD->getType()->getAs<ReferenceType>()) {
-            return EmitLoadOfReferenceLValue(it->second, RefTy);
-          }
-          return MakeAddrLValue(it->second, T);
+        auto I = LocalDeclMap.find(VD);
+        if (I != LocalDeclMap.end()) {
+          if (auto RefTy = VD->getType()->getAs<ReferenceType>())
+            return EmitLoadOfReferenceLValue(I->second, RefTy);
+          return MakeAddrLValue(I->second, T);
         }
         LValue CapLVal =
             EmitCapturedFieldLValue(*this, CapturedStmtInfo->lookup(VD),
@@ -2210,13 +2248,15 @@ LValue CodeGenFunction::EmitUnaryOpLValue(const UnaryOperator *E) {
       return LV;
     }
 
-    assert(E->getSubExpr()->getType()->isAnyComplexType());
+    QualType T = ExprTy->castAs<ComplexType>()->getElementType();
 
     Address Component =
       (E->getOpcode() == UO_Real
          ? emitAddrOfRealComponent(LV.getAddress(), LV.getType())
          : emitAddrOfImagComponent(LV.getAddress(), LV.getType()));
-    return MakeAddrLValue(Component, ExprTy, LV.getAlignmentSource());
+    LValue ElemLV = MakeAddrLValue(Component, T, LV.getAlignmentSource());
+    ElemLV.getQuals().addQualifiers(LV.getQuals());
+    return ElemLV;
   }
   case UO_PreInc:
   case UO_PreDec: {
@@ -2302,7 +2342,7 @@ llvm::Constant *CodeGenFunction::EmitCheckTypeDescriptor(QualType T) {
   auto *GV = new llvm::GlobalVariable(
       CGM.getModule(), Descriptor->getType(),
       /*isConstant=*/true, llvm::GlobalVariable::PrivateLinkage, Descriptor);
-  GV->setUnnamedAddr(true);
+  GV->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
   CGM.getSanitizerMetadata()->disableSanitizerForGlobal(GV);
 
   // Remember the descriptor for this type.
@@ -2352,7 +2392,33 @@ llvm::Constant *CodeGenFunction::EmitCheckSourceLocation(SourceLocation Loc) {
 
   PresumedLoc PLoc = getContext().getSourceManager().getPresumedLoc(Loc);
   if (PLoc.isValid()) {
-    auto FilenameGV = CGM.GetAddrOfConstantCString(PLoc.getFilename(), ".src");
+    StringRef FilenameString = PLoc.getFilename();
+
+    int PathComponentsToStrip =
+        CGM.getCodeGenOpts().EmitCheckPathComponentsToStrip;
+    if (PathComponentsToStrip < 0) {
+      assert(PathComponentsToStrip != INT_MIN);
+      int PathComponentsToKeep = -PathComponentsToStrip;
+      auto I = llvm::sys::path::rbegin(FilenameString);
+      auto E = llvm::sys::path::rend(FilenameString);
+      while (I != E && --PathComponentsToKeep)
+        ++I;
+
+      FilenameString = FilenameString.substr(I - E);
+    } else if (PathComponentsToStrip > 0) {
+      auto I = llvm::sys::path::begin(FilenameString);
+      auto E = llvm::sys::path::end(FilenameString);
+      while (I != E && PathComponentsToStrip--)
+        ++I;
+
+      if (I != E)
+        FilenameString =
+            FilenameString.substr(I - llvm::sys::path::begin(FilenameString));
+      else
+        FilenameString = llvm::sys::path::filename(FilenameString);
+    }
+
+    auto FilenameGV = CGM.GetAddrOfConstantCString(FilenameString, ".src");
     CGM.getSanitizerMetadata()->disableSanitizerForGlobal(
                           cast<llvm::GlobalVariable>(FilenameGV.getPointer()));
     Filename = FilenameGV.getPointer();
@@ -2483,24 +2549,26 @@ void CodeGenFunction::EmitCheck(
   Branch->setMetadata(llvm::LLVMContext::MD_prof, Node);
   EmitBlock(Handlers);
 
-  // Emit handler arguments and create handler function type.
-  llvm::Constant *Info = llvm::ConstantStruct::getAnon(StaticArgs);
-  auto *InfoPtr =
-      new llvm::GlobalVariable(CGM.getModule(), Info->getType(), false,
-                               llvm::GlobalVariable::PrivateLinkage, Info);
-  InfoPtr->setUnnamedAddr(true);
-  CGM.getSanitizerMetadata()->disableSanitizerForGlobal(InfoPtr);
-
+  // Handler functions take an i8* pointing to the (handler-specific) static
+  // information block, followed by a sequence of intptr_t arguments
+  // representing operand values.
   SmallVector<llvm::Value *, 4> Args;
   SmallVector<llvm::Type *, 4> ArgTypes;
   Args.reserve(DynamicArgs.size() + 1);
   ArgTypes.reserve(DynamicArgs.size() + 1);
 
-  // Handler functions take an i8* pointing to the (handler-specific) static
-  // information block, followed by a sequence of intptr_t arguments
-  // representing operand values.
-  Args.push_back(Builder.CreateBitCast(InfoPtr, Int8PtrTy));
-  ArgTypes.push_back(Int8PtrTy);
+  // Emit handler arguments and create handler function type.
+  if (!StaticArgs.empty()) {
+    llvm::Constant *Info = llvm::ConstantStruct::getAnon(StaticArgs);
+    auto *InfoPtr =
+        new llvm::GlobalVariable(CGM.getModule(), Info->getType(), false,
+                                 llvm::GlobalVariable::PrivateLinkage, Info);
+    InfoPtr->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
+    CGM.getSanitizerMetadata()->disableSanitizerForGlobal(InfoPtr);
+    Args.push_back(Builder.CreateBitCast(InfoPtr, Int8PtrTy));
+    ArgTypes.push_back(Int8PtrTy);
+  }
+
   for (size_t i = 0, n = DynamicArgs.size(); i != n; ++i) {
     Args.push_back(EmitCheckValue(DynamicArgs[i]));
     ArgTypes.push_back(IntPtrTy);
@@ -2532,10 +2600,9 @@ void CodeGenFunction::EmitCheck(
   EmitBlock(Cont);
 }
 
-void CodeGenFunction::EmitCfiSlowPathCheck(llvm::Value *Cond,
-                                           llvm::ConstantInt *TypeId,
-                                           llvm::Value *Ptr) {
-  auto &Ctx = getLLVMContext();
+void CodeGenFunction::EmitCfiSlowPathCheck(
+    SanitizerMask Kind, llvm::Value *Cond, llvm::ConstantInt *TypeId,
+    llvm::Value *Ptr, ArrayRef<llvm::Constant *> StaticArgs) {
   llvm::BasicBlock *Cont = createBasicBlock("cfi.cont");
 
   llvm::BasicBlock *CheckBB = createBasicBlock("cfi.slowpath");
@@ -2547,19 +2614,122 @@ void CodeGenFunction::EmitCfiSlowPathCheck(llvm::Value *Cond,
 
   EmitBlock(CheckBB);
 
-  llvm::Constant *SlowPathFn = CGM.getModule().getOrInsertFunction(
-      "__cfi_slowpath",
-      llvm::FunctionType::get(
-          llvm::Type::getVoidTy(Ctx),
-          {llvm::Type::getInt64Ty(Ctx),
-           llvm::PointerType::getUnqual(llvm::Type::getInt8Ty(Ctx))},
-          false));
-  llvm::CallInst *CheckCall = Builder.CreateCall(SlowPathFn, {TypeId, Ptr});
+  bool WithDiag = !CGM.getCodeGenOpts().SanitizeTrap.has(Kind);
+
+  llvm::CallInst *CheckCall;
+  if (WithDiag) {
+    llvm::Constant *Info = llvm::ConstantStruct::getAnon(StaticArgs);
+    auto *InfoPtr =
+        new llvm::GlobalVariable(CGM.getModule(), Info->getType(), false,
+                                 llvm::GlobalVariable::PrivateLinkage, Info);
+    InfoPtr->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
+    CGM.getSanitizerMetadata()->disableSanitizerForGlobal(InfoPtr);
+
+    llvm::Constant *SlowPathDiagFn = CGM.getModule().getOrInsertFunction(
+        "__cfi_slowpath_diag",
+        llvm::FunctionType::get(VoidTy, {Int64Ty, Int8PtrTy, Int8PtrTy},
+                                false));
+    CheckCall = Builder.CreateCall(
+        SlowPathDiagFn,
+        {TypeId, Ptr, Builder.CreateBitCast(InfoPtr, Int8PtrTy)});
+  } else {
+    llvm::Constant *SlowPathFn = CGM.getModule().getOrInsertFunction(
+        "__cfi_slowpath",
+        llvm::FunctionType::get(VoidTy, {Int64Ty, Int8PtrTy}, false));
+    CheckCall = Builder.CreateCall(SlowPathFn, {TypeId, Ptr});
+  }
+
   CheckCall->setDoesNotThrow();
 
   EmitBlock(Cont);
 }
 
+// This function is basically a switch over the CFI failure kind, which is
+// extracted from CFICheckFailData (1st function argument). Each case is either
+// llvm.trap or a call to one of the two runtime handlers, based on
+// -fsanitize-trap and -fsanitize-recover settings.  Default case (invalid
+// failure kind) traps, but this should really never happen.  CFICheckFailData
+// can be nullptr if the calling module has -fsanitize-trap behavior for this
+// check kind; in this case __cfi_check_fail traps as well.
+void CodeGenFunction::EmitCfiCheckFail() {
+  SanitizerScope SanScope(this);
+  FunctionArgList Args;
+  ImplicitParamDecl ArgData(getContext(), nullptr, SourceLocation(), nullptr,
+                            getContext().VoidPtrTy);
+  ImplicitParamDecl ArgAddr(getContext(), nullptr, SourceLocation(), nullptr,
+                            getContext().VoidPtrTy);
+  Args.push_back(&ArgData);
+  Args.push_back(&ArgAddr);
+
+  const CGFunctionInfo &FI =
+    CGM.getTypes().arrangeBuiltinFunctionDeclaration(getContext().VoidTy, Args);
+
+  llvm::Function *F = llvm::Function::Create(
+      llvm::FunctionType::get(VoidTy, {VoidPtrTy, VoidPtrTy}, false),
+      llvm::GlobalValue::WeakODRLinkage, "__cfi_check_fail", &CGM.getModule());
+  F->setVisibility(llvm::GlobalValue::HiddenVisibility);
+
+  StartFunction(GlobalDecl(), CGM.getContext().VoidTy, F, FI, Args,
+                SourceLocation());
+
+  llvm::Value *Data =
+      EmitLoadOfScalar(GetAddrOfLocalVar(&ArgData), /*Volatile=*/false,
+                       CGM.getContext().VoidPtrTy, ArgData.getLocation());
+  llvm::Value *Addr =
+      EmitLoadOfScalar(GetAddrOfLocalVar(&ArgAddr), /*Volatile=*/false,
+                       CGM.getContext().VoidPtrTy, ArgAddr.getLocation());
+
+  // Data == nullptr means the calling module has trap behaviour for this check.
+  llvm::Value *DataIsNotNullPtr =
+      Builder.CreateICmpNE(Data, llvm::ConstantPointerNull::get(Int8PtrTy));
+  EmitTrapCheck(DataIsNotNullPtr);
+
+  llvm::StructType *SourceLocationTy =
+      llvm::StructType::get(VoidPtrTy, Int32Ty, Int32Ty, nullptr);
+  llvm::StructType *CfiCheckFailDataTy =
+      llvm::StructType::get(Int8Ty, SourceLocationTy, VoidPtrTy, nullptr);
+
+  llvm::Value *V = Builder.CreateConstGEP2_32(
+      CfiCheckFailDataTy,
+      Builder.CreatePointerCast(Data, CfiCheckFailDataTy->getPointerTo(0)), 0,
+      0);
+  Address CheckKindAddr(V, getIntAlign());
+  llvm::Value *CheckKind = Builder.CreateLoad(CheckKindAddr);
+
+  llvm::Value *AllVtables = llvm::MetadataAsValue::get(
+      CGM.getLLVMContext(),
+      llvm::MDString::get(CGM.getLLVMContext(), "all-vtables"));
+  llvm::Value *ValidVtable = Builder.CreateZExt(
+      Builder.CreateCall(CGM.getIntrinsic(llvm::Intrinsic::type_test),
+                         {Addr, AllVtables}),
+      IntPtrTy);
+
+  const std::pair<int, SanitizerMask> CheckKinds[] = {
+      {CFITCK_VCall, SanitizerKind::CFIVCall},
+      {CFITCK_NVCall, SanitizerKind::CFINVCall},
+      {CFITCK_DerivedCast, SanitizerKind::CFIDerivedCast},
+      {CFITCK_UnrelatedCast, SanitizerKind::CFIUnrelatedCast},
+      {CFITCK_ICall, SanitizerKind::CFIICall}};
+
+  SmallVector<std::pair<llvm::Value *, SanitizerMask>, 5> Checks;
+  for (auto CheckKindMaskPair : CheckKinds) {
+    int Kind = CheckKindMaskPair.first;
+    SanitizerMask Mask = CheckKindMaskPair.second;
+    llvm::Value *Cond =
+        Builder.CreateICmpNE(CheckKind, llvm::ConstantInt::get(Int8Ty, Kind));
+    if (CGM.getLangOpts().Sanitize.has(Mask))
+      EmitCheck(std::make_pair(Cond, Mask), "cfi_check_fail", {},
+                {Data, Addr, ValidVtable});
+    else
+      EmitTrapCheck(Cond);
+  }
+
+  FinishFunction();
+  // The only reference to this function will be created during LTO link.
+  // Make sure it survives until then.
+  CGM.addUsedGlobal(F);
+}
+
 void CodeGenFunction::EmitTrapCheck(llvm::Value *Checked) {
   llvm::BasicBlock *Cont = createBasicBlock("cont");
 
@@ -2827,22 +2997,55 @@ LValue CodeGenFunction::EmitArraySubscriptExpr(const ArraySubscriptExpr *E,
   return LV;
 }
 
+static Address emitOMPArraySectionBase(CodeGenFunction &CGF, const Expr *Base,
+                                       AlignmentSource &AlignSource,
+                                       QualType BaseTy, QualType ElTy,
+                                       bool IsLowerBound) {
+  LValue BaseLVal;
+  if (auto *ASE = dyn_cast<OMPArraySectionExpr>(Base->IgnoreParenImpCasts())) {
+    BaseLVal = CGF.EmitOMPArraySectionExpr(ASE, IsLowerBound);
+    if (BaseTy->isArrayType()) {
+      Address Addr = BaseLVal.getAddress();
+      AlignSource = BaseLVal.getAlignmentSource();
+
+      // If the array type was an incomplete type, we need to make sure
+      // the decay ends up being the right type.
+      llvm::Type *NewTy = CGF.ConvertType(BaseTy);
+      Addr = CGF.Builder.CreateElementBitCast(Addr, NewTy);
+
+      // Note that VLA pointers are always decayed, so we don't need to do
+      // anything here.
+      if (!BaseTy->isVariableArrayType()) {
+        assert(isa<llvm::ArrayType>(Addr.getElementType()) &&
+               "Expected pointer to array");
+        Addr = CGF.Builder.CreateStructGEP(Addr, 0, CharUnits::Zero(),
+                                           "arraydecay");
+      }
+
+      return CGF.Builder.CreateElementBitCast(Addr,
+                                              CGF.ConvertTypeForMem(ElTy));
+    }
+    CharUnits Align = CGF.getNaturalTypeAlignment(ElTy, &AlignSource);
+    return Address(CGF.Builder.CreateLoad(BaseLVal.getAddress()), Align);
+  }
+  return CGF.EmitPointerWithAlignment(Base, &AlignSource);
+}
+
 LValue CodeGenFunction::EmitOMPArraySectionExpr(const OMPArraySectionExpr *E,
                                                 bool IsLowerBound) {
-  LValue Base;
+  QualType BaseTy;
   if (auto *ASE =
           dyn_cast<OMPArraySectionExpr>(E->getBase()->IgnoreParenImpCasts()))
-    Base = EmitOMPArraySectionExpr(ASE, IsLowerBound);
+    BaseTy = OMPArraySectionExpr::getBaseOriginalType(ASE);
   else
-    Base = EmitLValue(E->getBase());
-  QualType BaseTy = Base.getType();
-  llvm::Value *Idx = nullptr;
+    BaseTy = E->getBase()->getType();
   QualType ResultExprTy;
   if (auto *AT = getContext().getAsArrayType(BaseTy))
     ResultExprTy = AT->getElementType();
   else
     ResultExprTy = BaseTy->getPointeeType();
-  if (IsLowerBound || (!IsLowerBound && E->getColonLoc().isInvalid())) {
+  llvm::Value *Idx = nullptr;
+  if (IsLowerBound || E->getColonLoc().isInvalid()) {
     // Requesting lower bound or upper bound, but without provided length and
     // without ':' symbol for the default length -> length = 1.
     // Idx = LowerBound ?: 0;
@@ -2853,9 +3056,9 @@ LValue CodeGenFunction::EmitOMPArraySectionExpr(const OMPArraySectionExpr *E,
     } else
       Idx = llvm::ConstantInt::getNullValue(IntPtrTy);
   } else {
-    // Try to emit length or lower bound as constant. If this is possible, 1 is
-    // subtracted from constant length or lower bound. Otherwise, emit LLVM IR
-    // (LB + Len) - 1.
+    // Try to emit length or lower bound as constant. If this is possible, 1
+    // is subtracted from constant length or lower bound. Otherwise, emit LLVM
+    // IR (LB + Len) - 1.
     auto &C = CGM.getContext();
     auto *Length = E->getLength();
     llvm::APSInt ConstLength;
@@ -2901,12 +3104,15 @@ LValue CodeGenFunction::EmitOMPArraySectionExpr(const OMPArraySectionExpr *E,
         Idx = llvm::ConstantInt::get(IntPtrTy, ConstLength + ConstLowerBound);
     } else {
       // Idx = ArraySize - 1;
-      if (auto *VAT = C.getAsVariableArrayType(BaseTy)) {
+      QualType ArrayTy = BaseTy->isPointerType()
+                             ? E->getBase()->IgnoreParenImpCasts()->getType()
+                             : BaseTy;
+      if (auto *VAT = C.getAsVariableArrayType(ArrayTy)) {
         Length = VAT->getSizeExpr();
         if (Length->isIntegerConstantExpr(ConstLength, C))
           Length = nullptr;
       } else {
-        auto *CAT = C.getAsConstantArrayType(BaseTy);
+        auto *CAT = C.getAsConstantArrayType(ArrayTy);
         ConstLength = CAT->getSize();
       }
       if (Length) {
@@ -2925,52 +3131,56 @@ LValue CodeGenFunction::EmitOMPArraySectionExpr(const OMPArraySectionExpr *E,
   }
   assert(Idx);
 
-  llvm::Value *EltPtr;
-  QualType FixedSizeEltType = ResultExprTy;
+  Address EltPtr = Address::invalid();
+  AlignmentSource AlignSource;
   if (auto *VLA = getContext().getAsVariableArrayType(ResultExprTy)) {
+    // The base must be a pointer, which is not an aggregate.  Emit
+    // it.  It needs to be emitted first in case it's what captures
+    // the VLA bounds.
+    Address Base =
+        emitOMPArraySectionBase(*this, E->getBase(), AlignSource, BaseTy,
+                                VLA->getElementType(), IsLowerBound);
     // The element count here is the total number of non-VLA elements.
-    llvm::Value *numElements = getVLASize(VLA).first;
-    FixedSizeEltType = getFixedSizeElementType(getContext(), VLA);
+    llvm::Value *NumElements = getVLASize(VLA).first;
 
     // Effectively, the multiply by the VLA size is part of the GEP.
     // GEP indexes are signed, and scaling an index isn't permitted to
     // signed-overflow, so we use the same semantics for our explicit
     // multiply.  We suppress this if overflow is not undefined behavior.
-    if (getLangOpts().isSignedOverflowDefined()) {
-      Idx = Builder.CreateMul(Idx, numElements);
-      EltPtr = Builder.CreateGEP(Base.getPointer(), Idx, "arrayidx");
-    } else {
-      Idx = Builder.CreateNSWMul(Idx, numElements);
-      EltPtr = Builder.CreateInBoundsGEP(Base.getPointer(), Idx, "arrayidx");
-    }
-  } else if (BaseTy->isConstantArrayType()) {
-    llvm::Value *ArrayPtr = Base.getPointer();
-    llvm::Value *Zero = llvm::ConstantInt::getNullValue(IntPtrTy);
-    llvm::Value *Args[] = {Zero, Idx};
-
     if (getLangOpts().isSignedOverflowDefined())
-      EltPtr = Builder.CreateGEP(ArrayPtr, Args, "arrayidx");
+      Idx = Builder.CreateMul(Idx, NumElements);
     else
-      EltPtr = Builder.CreateInBoundsGEP(ArrayPtr, Args, "arrayidx");
-  } else {
-    // The base must be a pointer, which is not an aggregate.  Emit it.
-    if (getLangOpts().isSignedOverflowDefined())
-      EltPtr = Builder.CreateGEP(Base.getPointer(), Idx, "arrayidx");
+      Idx = Builder.CreateNSWMul(Idx, NumElements);
+    EltPtr = emitArraySubscriptGEP(*this, Base, Idx, VLA->getElementType(),
+                                   !getLangOpts().isSignedOverflowDefined());
+  } else if (const Expr *Array = isSimpleArrayDecayOperand(E->getBase())) {
+    // If this is A[i] where A is an array, the frontend will have decayed the
+    // base to be a ArrayToPointerDecay implicit cast.  While correct, it is
+    // inefficient at -O0 to emit a "gep A, 0, 0" when codegen'ing it, then a
+    // "gep x, i" here.  Emit one "gep A, 0, i".
+    assert(Array->getType()->isArrayType() &&
+           "Array to pointer decay must have array source type!");
+    LValue ArrayLV;
+    // For simple multidimensional array indexing, set the 'accessed' flag for
+    // better bounds-checking of the base expression.
+    if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Array))
+      ArrayLV = EmitArraySubscriptExpr(ASE, /*Accessed*/ true);
     else
-      EltPtr = Builder.CreateInBoundsGEP(Base.getPointer(), Idx, "arrayidx");
-  }
-
-  CharUnits EltAlign =
-    Base.getAlignment().alignmentOfArrayElement(
-                          getContext().getTypeSizeInChars(FixedSizeEltType));
-
-  // Limit the alignment to that of the result type.
-  LValue LV = MakeAddrLValue(Address(EltPtr, EltAlign), ResultExprTy,
-                             Base.getAlignmentSource());
+      ArrayLV = EmitLValue(Array);
 
-  LV.getQuals().setAddressSpace(BaseTy.getAddressSpace());
+    // Propagate the alignment from the array itself to the result.
+    EltPtr = emitArraySubscriptGEP(
+        *this, ArrayLV.getAddress(), {CGM.getSize(CharUnits::Zero()), Idx},
+        ResultExprTy, !getLangOpts().isSignedOverflowDefined());
+    AlignSource = ArrayLV.getAlignmentSource();
+  } else {
+    Address Base = emitOMPArraySectionBase(*this, E->getBase(), AlignSource,
+                                           BaseTy, ResultExprTy, IsLowerBound);
+    EltPtr = emitArraySubscriptGEP(*this, Base, Idx, ResultExprTy,
+                                   !getLangOpts().isSignedOverflowDefined());
+  }
 
-  return LV;
+  return MakeAddrLValue(EltPtr, ResultExprTy, AlignSource);
 }
 
 LValue CodeGenFunction::
@@ -3508,6 +3718,10 @@ RValue CodeGenFunction::EmitRValueForField(LValue LV,
   case TEK_Aggregate:
     return FieldLV.asAggregateRValue();
   case TEK_Scalar:
+    // This routine is used to load fields one-by-one to perform a copy, so
+    // don't load reference fields.
+    if (FD->getType()->isReferenceType())
+      return RValue::get(FieldLV.getPointer());
     return EmitLoadOfLValue(FieldLV, Loc);
   }
   llvm_unreachable("bad evaluation kind");
@@ -3851,25 +4065,28 @@ RValue CodeGenFunction::EmitCall(QualType CalleeType, llvm::Value *Callee,
   if (SanOpts.has(SanitizerKind::CFIICall) &&
       (!TargetDecl || !isa<FunctionDecl>(TargetDecl))) {
     SanitizerScope SanScope(this);
+    EmitSanitizerStatReport(llvm::SanStat_CFI_ICall);
 
     llvm::Metadata *MD = CGM.CreateMetadataIdentifierForType(QualType(FnType, 0));
-    llvm::Value *BitSetName = llvm::MetadataAsValue::get(getLLVMContext(), MD);
+    llvm::Value *TypeId = llvm::MetadataAsValue::get(getLLVMContext(), MD);
 
     llvm::Value *CastedCallee = Builder.CreateBitCast(Callee, Int8PtrTy);
-    llvm::Value *BitSetTest =
-        Builder.CreateCall(CGM.getIntrinsic(llvm::Intrinsic::bitset_test),
-                           {CastedCallee, BitSetName});
+    llvm::Value *TypeTest = Builder.CreateCall(
+        CGM.getIntrinsic(llvm::Intrinsic::type_test), {CastedCallee, TypeId});
 
-    auto TypeId = CGM.CreateCfiIdForTypeMetadata(MD);
-    if (CGM.getCodeGenOpts().SanitizeCfiCrossDso && TypeId) {
-      EmitCfiSlowPathCheck(BitSetTest, TypeId, CastedCallee);
+    auto CrossDsoTypeId = CGM.CreateCrossDsoCfiTypeId(MD);
+    llvm::Constant *StaticData[] = {
+        llvm::ConstantInt::get(Int8Ty, CFITCK_ICall),
+        EmitCheckSourceLocation(E->getLocStart()),
+        EmitCheckTypeDescriptor(QualType(FnType, 0)),
+    };
+    if (CGM.getCodeGenOpts().SanitizeCfiCrossDso && CrossDsoTypeId) {
+      EmitCfiSlowPathCheck(SanitizerKind::CFIICall, TypeTest, CrossDsoTypeId,
+                           CastedCallee, StaticData);
     } else {
-      llvm::Constant *StaticData[] = {
-          EmitCheckSourceLocation(E->getLocStart()),
-          EmitCheckTypeDescriptor(QualType(FnType, 0)),
-      };
-      EmitCheck(std::make_pair(BitSetTest, SanitizerKind::CFIICall),
-                "cfi_bad_icall", StaticData, CastedCallee);
+      EmitCheck(std::make_pair(TypeTest, SanitizerKind::CFIICall),
+                "cfi_check_fail", StaticData,
+                {CastedCallee, llvm::UndefValue::get(IntPtrTy)});
     }
   }
 
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGExprAgg.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGExprAgg.cpp
index a4547a9982be..6d18843591f3 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGExprAgg.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGExprAgg.cpp
@@ -175,6 +175,7 @@ public:
   }
   void VisitCXXBindTemporaryExpr(CXXBindTemporaryExpr *E);
   void VisitCXXConstructExpr(const CXXConstructExpr *E);
+  void VisitCXXInheritedCtorInitExpr(const CXXInheritedCtorInitExpr *E);
   void VisitLambdaExpr(LambdaExpr *E);
   void VisitCXXStdInitializerListExpr(CXXStdInitializerListExpr *E);
   void VisitExprWithCleanups(ExprWithCleanups *E);
@@ -967,12 +968,9 @@ void AggExprEmitter::VisitVAArgExpr(VAArgExpr *VE) {
   Address ArgValue = Address::invalid();
   Address ArgPtr = CGF.EmitVAArg(VE, ArgValue);
 
+  // If EmitVAArg fails, emit an error.
   if (!ArgPtr.isValid()) {
-    // If EmitVAArg fails, we fall back to the LLVM instruction.
-    llvm::Value *Val = Builder.CreateVAArg(ArgValue.getPointer(),
-                                           CGF.ConvertType(VE->getType()));
-    if (!Dest.isIgnored())
-      Builder.CreateStore(Val, Dest.getAddress());
+    CGF.ErrorUnsupported(VE, "aggregate va_arg expression");
     return;
   }
 
@@ -1001,6 +999,14 @@ AggExprEmitter::VisitCXXConstructExpr(const CXXConstructExpr *E) {
   CGF.EmitCXXConstructExpr(E, Slot);
 }
 
+void AggExprEmitter::VisitCXXInheritedCtorInitExpr(
+    const CXXInheritedCtorInitExpr *E) {
+  AggValueSlot Slot = EnsureSlot(E->getType());
+  CGF.EmitInheritedCXXConstructorCall(
+      E->getConstructor(), E->constructsVBase(), Slot.getAddress(),
+      E->inheritedFromVBase(), E);
+}
+
 void
 AggExprEmitter::VisitLambdaExpr(LambdaExpr *E) {
   AggValueSlot Slot = EnsureSlot(E->getType());
@@ -1174,6 +1180,38 @@ void AggExprEmitter::VisitInitListExpr(InitListExpr *E) {
   unsigned NumInitElements = E->getNumInits();
   RecordDecl *record = E->getType()->castAs<RecordType>()->getDecl();
 
+  // We'll need to enter cleanup scopes in case any of the element
+  // initializers throws an exception.
+  SmallVector<EHScopeStack::stable_iterator, 16> cleanups;
+  llvm::Instruction *cleanupDominator = nullptr;
+
+  unsigned curInitIndex = 0;
+
+  // Emit initialization of base classes.
+  if (auto *CXXRD = dyn_cast<CXXRecordDecl>(record)) {
+    assert(E->getNumInits() >= CXXRD->getNumBases() &&
+           "missing initializer for base class");
+    for (auto &Base : CXXRD->bases()) {
+      assert(!Base.isVirtual() && "should not see vbases here");
+      auto *BaseRD = Base.getType()->getAsCXXRecordDecl();
+      Address V = CGF.GetAddressOfDirectBaseInCompleteClass(
+          Dest.getAddress(), CXXRD, BaseRD,
+          /*isBaseVirtual*/ false);
+      AggValueSlot AggSlot =
+        AggValueSlot::forAddr(V, Qualifiers(),
+                              AggValueSlot::IsDestructed,
+                              AggValueSlot::DoesNotNeedGCBarriers,
+                              AggValueSlot::IsNotAliased);
+      CGF.EmitAggExpr(E->getInit(curInitIndex++), AggSlot);
+
+      if (QualType::DestructionKind dtorKind =
+              Base.getType().isDestructedType()) {
+        CGF.pushDestroy(dtorKind, V, Base.getType());
+        cleanups.push_back(CGF.EHStack.stable_begin());
+      }
+    }
+  }
+
   // Prepare a 'this' for CXXDefaultInitExprs.
   CodeGenFunction::FieldConstructionScope FCS(CGF, Dest.getAddress());
 
@@ -1207,14 +1245,8 @@ void AggExprEmitter::VisitInitListExpr(InitListExpr *E) {
     return;
   }
 
-  // We'll need to enter cleanup scopes in case any of the member
-  // initializers throw an exception.
-  SmallVector<EHScopeStack::stable_iterator, 16> cleanups;
-  llvm::Instruction *cleanupDominator = nullptr;
-
   // Here we iterate over the fields; this makes it simpler to both
   // default-initialize fields and skip over unnamed fields.
-  unsigned curInitIndex = 0;
   for (const auto *field : record->fields()) {
     // We're done once we hit the flexible array member.
     if (field->getType()->isIncompleteArrayType())
@@ -1320,6 +1352,10 @@ static CharUnits GetNumNonZeroBytesInInit(const Expr *E, CodeGenFunction &CGF) {
       CharUnits NumNonZeroBytes = CharUnits::Zero();
       
       unsigned ILEElement = 0;
+      if (auto *CXXRD = dyn_cast<CXXRecordDecl>(SD))
+        while (ILEElement != CXXRD->getNumBases())
+          NumNonZeroBytes +=
+              GetNumNonZeroBytesInInit(ILE->getInit(ILEElement++), CGF);
       for (const auto *Field : SD->fields()) {
         // We're done once we hit the flexible array member or run out of
         // InitListExpr elements.
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGExprCXX.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGExprCXX.cpp
index 604cde76a7b1..eec2aceb88a2 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGExprCXX.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGExprCXX.cpp
@@ -24,10 +24,11 @@
 using namespace clang;
 using namespace CodeGen;
 
-static RequiredArgs commonEmitCXXMemberOrOperatorCall(
-    CodeGenFunction &CGF, const CXXMethodDecl *MD, llvm::Value *Callee,
-    ReturnValueSlot ReturnValue, llvm::Value *This, llvm::Value *ImplicitParam,
-    QualType ImplicitParamTy, const CallExpr *CE, CallArgList &Args) {
+static RequiredArgs
+commonEmitCXXMemberOrOperatorCall(CodeGenFunction &CGF, const CXXMethodDecl *MD,
+                                  llvm::Value *This, llvm::Value *ImplicitParam,
+                                  QualType ImplicitParamTy, const CallExpr *CE,
+                                  CallArgList &Args) {
   assert(CE == nullptr || isa<CXXMemberCallExpr>(CE) ||
          isa<CXXOperatorCallExpr>(CE));
   assert(MD->isInstance() &&
@@ -53,7 +54,7 @@ static RequiredArgs commonEmitCXXMemberOrOperatorCall(
   }
 
   const FunctionProtoType *FPT = MD->getType()->castAs<FunctionProtoType>();
-  RequiredArgs required = RequiredArgs::forPrototypePlus(FPT, Args.size());
+  RequiredArgs required = RequiredArgs::forPrototypePlus(FPT, Args.size(), MD);
 
   // And the rest of the call args.
   if (CE) {
@@ -76,21 +77,20 @@ RValue CodeGenFunction::EmitCXXMemberOrOperatorCall(
   const FunctionProtoType *FPT = MD->getType()->castAs<FunctionProtoType>();
   CallArgList Args;
   RequiredArgs required = commonEmitCXXMemberOrOperatorCall(
-      *this, MD, Callee, ReturnValue, This, ImplicitParam, ImplicitParamTy, CE,
-      Args);
+      *this, MD, This, ImplicitParam, ImplicitParamTy, CE, Args);
   return EmitCall(CGM.getTypes().arrangeCXXMethodCall(Args, FPT, required),
                   Callee, ReturnValue, Args, MD);
 }
 
-RValue CodeGenFunction::EmitCXXStructorCall(
-    const CXXMethodDecl *MD, llvm::Value *Callee, ReturnValueSlot ReturnValue,
-    llvm::Value *This, llvm::Value *ImplicitParam, QualType ImplicitParamTy,
-    const CallExpr *CE, StructorType Type) {
+RValue CodeGenFunction::EmitCXXDestructorCall(
+    const CXXDestructorDecl *DD, llvm::Value *Callee, llvm::Value *This,
+    llvm::Value *ImplicitParam, QualType ImplicitParamTy, const CallExpr *CE,
+    StructorType Type) {
   CallArgList Args;
-  commonEmitCXXMemberOrOperatorCall(*this, MD, Callee, ReturnValue, This,
-                                    ImplicitParam, ImplicitParamTy, CE, Args);
-  return EmitCall(CGM.getTypes().arrangeCXXStructorDeclaration(MD, Type),
-                  Callee, ReturnValue, Args, MD);
+  commonEmitCXXMemberOrOperatorCall(*this, DD, This, ImplicitParam,
+                                    ImplicitParamTy, CE, Args);
+  return EmitCall(CGM.getTypes().arrangeCXXStructorDeclaration(DD, Type),
+                  Callee, ReturnValueSlot(), Args, DD);
 }
 
 static CXXRecordDecl *getCXXRecord(const Expr *E) {
@@ -259,7 +259,8 @@ RValue CodeGenFunction::EmitCXXMemberOrOperatorMemberCallExpr(
     if (SanOpts.has(SanitizerKind::CFINVCall) &&
         MD->getParent()->isDynamicClass()) {
       llvm::Value *VTable = GetVTablePtr(This, Int8PtrTy, MD->getParent());
-      EmitVTablePtrCheckForCall(MD, VTable, CFITCK_NVCall, CE->getLocStart());
+      EmitVTablePtrCheckForCall(MD->getParent(), VTable, CFITCK_NVCall,
+                                CE->getLocStart());
     }
 
     if (getLangOpts().AppleKext && MD->isVirtual() && HasQualifier)
@@ -273,7 +274,7 @@ RValue CodeGenFunction::EmitCXXMemberOrOperatorMemberCallExpr(
 
   if (MD->isVirtual()) {
     This = CGM.getCXXABI().adjustThisArgumentForVirtualFunctionCall(
-        *this, MD, This, UseVirtualCall);
+        *this, CalleeDecl, This, UseVirtualCall);
   }
 
   return EmitCXXMemberOrOperatorCall(MD, Callee, ReturnValue, This.getPointer(),
@@ -323,10 +324,11 @@ CodeGenFunction::EmitCXXMemberPointerCallExpr(const CXXMemberCallExpr *E,
   // Push the this ptr.
   Args.add(RValue::get(ThisPtrForCall), ThisType);
 
-  RequiredArgs required = RequiredArgs::forPrototypePlus(FPT, 1);
-  
+  RequiredArgs required =
+      RequiredArgs::forPrototypePlus(FPT, 1, /*FD=*/nullptr);
+
   // And the rest of the call args
-  EmitCallArgs(Args, FPT, E->arguments(), E->getDirectCallee());
+  EmitCallArgs(Args, FPT, E->arguments());
   return EmitCall(CGM.getTypes().arrangeCXXMethodCall(Args, FPT, required),
                   Callee, ReturnValue, Args);
 }
@@ -369,6 +371,9 @@ static void EmitNullBaseClassInitialization(CodeGenFunction &CGF,
   std::vector<CharUnits> VBPtrOffsets =
       CGF.CGM.getCXXABI().getVBPtrOffsets(Base);
   for (CharUnits VBPtrOffset : VBPtrOffsets) {
+    // Stop before we hit any virtual base pointers located in virtual bases.
+    if (VBPtrOffset >= NVSize)
+      break;
     std::pair<CharUnits, CharUnits> LastStore = Stores.pop_back_val();
     CharUnits LastStoreOffset = LastStore.first;
     CharUnits LastStoreSize = LastStore.second;
@@ -471,8 +476,8 @@ CodeGenFunction::EmitCXXConstructExpr(const CXXConstructExpr *E,
     }
   }
   
-  if (const ConstantArrayType *arrayType 
-        = getContext().getAsConstantArrayType(E->getType())) {
+  if (const ArrayType *arrayType
+        = getContext().getAsArrayType(E->getType())) {
     EmitCXXAggrConstructorCall(CD, arrayType, Dest.getAddress(), E);
   } else {
     CXXCtorType Type = Ctor_Complete;
@@ -1010,15 +1015,18 @@ void CodeGenFunction::EmitNewArrayInitializer(
   if (auto *ILE = dyn_cast<InitListExpr>(Init)) {
     if (const RecordType *RType = ILE->getType()->getAs<RecordType>()) {
       if (RType->getDecl()->isStruct()) {
-        unsigned NumFields = 0;
+        unsigned NumElements = 0;
+        if (auto *CXXRD = dyn_cast<CXXRecordDecl>(RType->getDecl()))
+          NumElements = CXXRD->getNumBases();
         for (auto *Field : RType->getDecl()->fields())
           if (!Field->isUnnamedBitfield())
-            ++NumFields;
-        if (ILE->getNumInits() == NumFields)
+            ++NumElements;
+        // FIXME: Recurse into nested InitListExprs.
+        if (ILE->getNumInits() == NumElements)
           for (unsigned i = 0, e = ILE->getNumInits(); i != e; ++i)
             if (!isa<ImplicitValueInitExpr>(ILE->getInit(i)))
-              --NumFields;
-        if (ILE->getNumInits() == NumFields && TryMemsetInitialization())
+              --NumElements;
+        if (ILE->getNumInits() == NumElements && TryMemsetInitialization())
           return;
       }
     }
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGExprConstant.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGExprConstant.cpp
index ee049f1810a2..803b39907dd7 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGExprConstant.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGExprConstant.cpp
@@ -111,7 +111,7 @@ AppendBytes(CharUnits FieldOffsetInChars, llvm::Constant *InitCst) {
 
   // Round up the field offset to the alignment of the field type.
   CharUnits AlignedNextFieldOffsetInChars =
-      NextFieldOffsetInChars.RoundUpToAlignment(FieldAlignment);
+      NextFieldOffsetInChars.alignTo(FieldAlignment);
 
   if (AlignedNextFieldOffsetInChars < FieldOffsetInChars) {
     // We need to append padding.
@@ -121,7 +121,7 @@ AppendBytes(CharUnits FieldOffsetInChars, llvm::Constant *InitCst) {
            "Did not add enough padding!");
 
     AlignedNextFieldOffsetInChars =
-        NextFieldOffsetInChars.RoundUpToAlignment(FieldAlignment);
+        NextFieldOffsetInChars.alignTo(FieldAlignment);
   }
 
   if (AlignedNextFieldOffsetInChars > FieldOffsetInChars) {
@@ -162,8 +162,8 @@ void ConstStructBuilder::AppendBitField(const FieldDecl *Field,
   if (FieldOffset > NextFieldOffsetInBits) {
     // We need to add padding.
     CharUnits PadSize = Context.toCharUnitsFromBits(
-      llvm::RoundUpToAlignment(FieldOffset - NextFieldOffsetInBits, 
-                               Context.getTargetInfo().getCharAlign()));
+        llvm::alignTo(FieldOffset - NextFieldOffsetInBits,
+                      Context.getTargetInfo().getCharAlign()));
 
     AppendPadding(PadSize);
   }
@@ -334,7 +334,7 @@ void ConstStructBuilder::ConvertStructToPacked() {
     CharUnits ElementAlign = CharUnits::fromQuantity(
       CGM.getDataLayout().getABITypeAlignment(C->getType()));
     CharUnits AlignedElementOffsetInChars =
-      ElementOffsetInChars.RoundUpToAlignment(ElementAlign);
+        ElementOffsetInChars.alignTo(ElementAlign);
 
     if (AlignedElementOffsetInChars > ElementOffsetInChars) {
       // We need some padding.
@@ -368,7 +368,14 @@ bool ConstStructBuilder::Build(InitListExpr *ILE) {
 
   unsigned FieldNo = 0;
   unsigned ElementNo = 0;
-  
+
+  // Bail out if we have base classes. We could support these, but they only
+  // arise in C++1z where we will have already constant folded most interesting
+  // cases. FIXME: There are still a few more cases we can handle this way.
+  if (auto *CXXRD = dyn_cast<CXXRecordDecl>(RD))
+    if (CXXRD->getNumBases())
+      return false;
+
   for (RecordDecl::field_iterator Field = RD->field_begin(),
        FieldEnd = RD->field_end(); Field != FieldEnd; ++Field, ++FieldNo) {
     // If this is a union, skip all the fields that aren't being initialized.
@@ -508,13 +515,12 @@ llvm::Constant *ConstStructBuilder::Finalize(QualType Ty) {
   } else {
     // Append tail padding if necessary.
     CharUnits LLVMSizeInChars =
-        NextFieldOffsetInChars.RoundUpToAlignment(LLVMStructAlignment);
+        NextFieldOffsetInChars.alignTo(LLVMStructAlignment);
 
     if (LLVMSizeInChars != LayoutSizeInChars)
       AppendTailPadding(LayoutSizeInChars);
 
-    LLVMSizeInChars =
-        NextFieldOffsetInChars.RoundUpToAlignment(LLVMStructAlignment);
+    LLVMSizeInChars = NextFieldOffsetInChars.alignTo(LLVMStructAlignment);
 
     // Check if we need to convert the struct to a packed struct.
     if (NextFieldOffsetInChars <= LayoutSizeInChars &&
@@ -526,8 +532,7 @@ llvm::Constant *ConstStructBuilder::Finalize(QualType Ty) {
              "Converting to packed did not help!");
     }
 
-    LLVMSizeInChars =
-        NextFieldOffsetInChars.RoundUpToAlignment(LLVMStructAlignment);
+    LLVMSizeInChars = NextFieldOffsetInChars.alignTo(LLVMStructAlignment);
 
     assert(LayoutSizeInChars == LLVMSizeInChars &&
            "Tail padding mismatch!");
@@ -546,8 +551,9 @@ llvm::Constant *ConstStructBuilder::Finalize(QualType Ty) {
 
   llvm::Constant *Result = llvm::ConstantStruct::get(STy, Elements);
 
-  assert(NextFieldOffsetInChars.RoundUpToAlignment(getAlignment(Result)) ==
-         getSizeInChars(Result) && "Size mismatch!");
+  assert(NextFieldOffsetInChars.alignTo(getAlignment(Result)) ==
+             getSizeInChars(Result) &&
+         "Size mismatch!");
 
   return Result;
 }
@@ -758,6 +764,12 @@ public:
     return Visit(DIE->getExpr());
   }
 
+  llvm::Constant *VisitExprWithCleanups(ExprWithCleanups *E) {
+    if (!E->cleanupsHaveSideEffects())
+      return Visit(E->getSubExpr());
+    return nullptr;
+  }
+
   llvm::Constant *VisitMaterializeTemporaryExpr(MaterializeTemporaryExpr *E) {
     return Visit(E->GetTemporaryExpr());
   }
@@ -1125,6 +1137,13 @@ bool ConstStructBuilder::Build(ConstExprEmitter *Emitter,
   unsigned FieldNo = -1;
   unsigned ElementNo = 0;
 
+  // Bail out if we have base classes. We could support these, but they only
+  // arise in C++1z where we will have already constant folded most interesting
+  // cases. FIXME: There are still a few more cases we can handle this way.
+  if (auto *CXXRD = dyn_cast<CXXRecordDecl>(RD))
+    if (CXXRD->getNumBases())
+      return false;
+
   for (FieldDecl *Field : RD->fields()) {
     ++FieldNo;
 
@@ -1301,8 +1320,14 @@ llvm::Constant *CodeGenModule::EmitConstantValue(const APValue &Value,
 
       // Convert to the appropriate type; this could be an lvalue for
       // an integer.
-      if (isa<llvm::PointerType>(DestTy))
+      if (isa<llvm::PointerType>(DestTy)) {
+        // Convert the integer to a pointer-sized integer before converting it
+        // to a pointer.
+        C = llvm::ConstantExpr::getIntegerCast(
+            C, getDataLayout().getIntPtrType(DestTy),
+            /*isSigned=*/false);
         return llvm::ConstantExpr::getIntToPtr(C, DestTy);
+      }
 
       // If the types don't match this should only be a truncate.
       if (C->getType() != DestTy)
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGExprScalar.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGExprScalar.cpp
index 268e7967b808..120dacfbb011 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGExprScalar.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGExprScalar.cpp
@@ -818,7 +818,7 @@ Value *ScalarExprEmitter::EmitScalarConversion(Value *Src, QualType SrcType,
            "Splatted expr doesn't match with vector element type?");
 
     // Splat the element across to all elements
-    unsigned NumElements = cast<llvm::VectorType>(DstTy)->getNumElements();
+    unsigned NumElements = DstTy->getVectorNumElements();
     return Builder.CreateVectorSplat(NumElements, Src, "splat");
   }
 
@@ -984,8 +984,7 @@ Value *ScalarExprEmitter::VisitExpr(Expr *E) {
 
 Value *ScalarExprEmitter::VisitShuffleVectorExpr(ShuffleVectorExpr *E) {
   // Vector Mask Case
-  if (E->getNumSubExprs() == 2 ||
-      (E->getNumSubExprs() == 3 && E->getExpr(2)->getType()->isVectorType())) {
+  if (E->getNumSubExprs() == 2) {
     Value *LHS = CGF.EmitScalarExpr(E->getExpr(0));
     Value *RHS = CGF.EmitScalarExpr(E->getExpr(1));
     Value *Mask;
@@ -993,22 +992,7 @@ Value *ScalarExprEmitter::VisitShuffleVectorExpr(ShuffleVectorExpr *E) {
     llvm::VectorType *LTy = cast<llvm::VectorType>(LHS->getType());
     unsigned LHSElts = LTy->getNumElements();
 
-    if (E->getNumSubExprs() == 3) {
-      Mask = CGF.EmitScalarExpr(E->getExpr(2));
-
-      // Shuffle LHS & RHS into one input vector.
-      SmallVector<llvm::Constant*, 32> concat;
-      for (unsigned i = 0; i != LHSElts; ++i) {
-        concat.push_back(Builder.getInt32(2*i));
-        concat.push_back(Builder.getInt32(2*i+1));
-      }
-
-      Value* CV = llvm::ConstantVector::get(concat);
-      LHS = Builder.CreateShuffleVector(LHS, RHS, CV, "concat");
-      LHSElts *= 2;
-    } else {
-      Mask = RHS;
-    }
+    Mask = RHS;
 
     llvm::VectorType *MTy = cast<llvm::VectorType>(Mask->getType());
 
@@ -1366,8 +1350,9 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) {
   QualType DestTy = CE->getType();
   CastKind Kind = CE->getCastKind();
 
-  if (!DestTy->isVoidType())
-    TestAndClearIgnoreResultAssign();
+  // These cases are generally not written to ignore the result of
+  // evaluating their sub-expressions, so we clear this now.
+  bool Ignored = TestAndClearIgnoreResultAssign();
 
   // Since almost all cast kinds apply to scalars, this switch doesn't have
   // a default case, so the compiler will warn on a missing case.  The cases
@@ -1410,7 +1395,10 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) {
   }
   case CK_AddressSpaceConversion: {
     Value *Src = Visit(const_cast<Expr*>(E));
-    return Builder.CreateAddrSpaceCast(Src, ConvertType(DestTy));
+    // Since target may map different address spaces in AST to the same address
+    // space, an address space conversion may end up as a bitcast.
+    return Builder.CreatePointerBitCastOrAddrSpaceCast(Src,
+                                                       ConvertType(DestTy));
   }
   case CK_AtomicToNonAtomic:
   case CK_NonAtomicToAtomic:
@@ -1494,11 +1482,8 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) {
     return CGF.EmitARCRetainScalarExpr(E);
   case CK_ARCConsumeObject:
     return CGF.EmitObjCConsumeObject(E->getType(), Visit(E));
-  case CK_ARCReclaimReturnedObject: {
-    llvm::Value *value = Visit(E);
-    value = CGF.EmitARCRetainAutoreleasedReturnValue(value);
-    return CGF.EmitObjCConsumeObject(E->getType(), value);
-  }
+  case CK_ARCReclaimReturnedObject:
+    return CGF.EmitARCReclaimReturnedObject(E, /*allowUnsafe*/ Ignored);
   case CK_ARCExtendBlockObject:
     return CGF.EmitARCExtendBlockObject(E);
 
@@ -1544,7 +1529,7 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) {
     llvm::Type *DstTy = ConvertType(DestTy);
     Value *Elt = Visit(const_cast<Expr*>(E));
     // Splat the element across to all elements
-    unsigned NumElements = cast<llvm::VectorType>(DstTy)->getNumElements();
+    unsigned NumElements = DstTy->getVectorNumElements();
     return Builder.CreateVectorSplat(NumElements, Elt, "splat");
   }
 
@@ -1654,13 +1639,14 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV,
       llvm::Value *True = CGF.EmitToMemory(Builder.getTrue(), type);
       if (isPre) {
         Builder.CreateStore(True, LV.getAddress(), LV.isVolatileQualified())
-          ->setAtomic(llvm::SequentiallyConsistent);
+          ->setAtomic(llvm::AtomicOrdering::SequentiallyConsistent);
         return Builder.getTrue();
       }
       // For atomic bool increment, we just store true and return it for
       // preincrement, do an atomic swap with true for postincrement
-        return Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg,
-            LV.getPointer(), True, llvm::SequentiallyConsistent);
+      return Builder.CreateAtomicRMW(
+          llvm::AtomicRMWInst::Xchg, LV.getPointer(), True,
+          llvm::AtomicOrdering::SequentiallyConsistent);
     }
     // Special case for atomic increment / decrement on integers, emit
     // atomicrmw instructions.  We skip this if we want to be doing overflow
@@ -1677,7 +1663,7 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV,
       llvm::Value *amt = CGF.EmitToMemory(
           llvm::ConstantInt::get(ConvertType(type), 1, true), type);
       llvm::Value *old = Builder.CreateAtomicRMW(aop,
-          LV.getPointer(), amt, llvm::SequentiallyConsistent);
+          LV.getPointer(), amt, llvm::AtomicOrdering::SequentiallyConsistent);
       return isPre ? Builder.CreateBinOp(op, old, amt) : old;
     }
     value = EmitLoadOfLValue(LV, E->getExprLoc());
@@ -1794,15 +1780,19 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV,
       amt = llvm::ConstantFP::get(VMContext,
                                   llvm::APFloat(static_cast<double>(amount)));
     else {
-      // Remaining types are either Half or LongDouble.  Convert from float.
+      // Remaining types are Half, LongDouble or __float128. Convert from float.
       llvm::APFloat F(static_cast<float>(amount));
       bool ignored;
+      const llvm::fltSemantics *FS;
       // Don't use getFloatTypeSemantics because Half isn't
       // necessarily represented using the "half" LLVM type.
-      F.convert(value->getType()->isHalfTy()
-                    ? CGF.getTarget().getHalfFormat()
-                    : CGF.getTarget().getLongDoubleFormat(),
-                llvm::APFloat::rmTowardZero, &ignored);
+      if (value->getType()->isFP128Ty())
+        FS = &CGF.getTarget().getFloat128Format();
+      else if (value->getType()->isHalfTy())
+        FS = &CGF.getTarget().getHalfFormat();
+      else
+        FS = &CGF.getTarget().getLongDoubleFormat();
+      F.convert(*FS, llvm::APFloat::rmTowardZero, &ignored);
       amt = llvm::ConstantFP::get(VMContext, F);
     }
     value = Builder.CreateFAdd(value, amt, isInc ? "inc" : "dec");
@@ -2159,7 +2149,7 @@ LValue ScalarExprEmitter::EmitCompoundAssignLValue(
                                  E->getExprLoc()),
             LHSTy);
         Builder.CreateAtomicRMW(aop, LHSLV.getPointer(), amt,
-            llvm::SequentiallyConsistent);
+            llvm::AtomicOrdering::SequentiallyConsistent);
         return LHSLV;
       }
     }
@@ -2716,7 +2706,8 @@ Value *ScalarExprEmitter::EmitShl(const BinOpInfo &Ops) {
     RHS = Builder.CreateIntCast(RHS, Ops.LHS->getType(), false, "sh_prom");
 
   bool SanitizeBase = CGF.SanOpts.has(SanitizerKind::ShiftBase) &&
-                      Ops.Ty->hasSignedIntegerRepresentation();
+                      Ops.Ty->hasSignedIntegerRepresentation() &&
+                      !CGF.getLangOpts().isSignedOverflowDefined();
   bool SanitizeExponent = CGF.SanOpts.has(SanitizerKind::ShiftExponent);
   // OpenCL 6.3j: shift values are effectively % word size of LHS.
   if (CGF.getLangOpts().OpenCL)
@@ -2993,15 +2984,17 @@ Value *ScalarExprEmitter::VisitBinAssign(const BinaryOperator *E) {
     std::tie(LHS, RHS) = CGF.EmitARCStoreAutoreleasing(E);
     break;
 
+  case Qualifiers::OCL_ExplicitNone:
+    std::tie(LHS, RHS) = CGF.EmitARCStoreUnsafeUnretained(E, Ignore);
+    break;
+
   case Qualifiers::OCL_Weak:
     RHS = Visit(E->getRHS());
     LHS = EmitCheckedLValue(E->getLHS(), CodeGenFunction::TCK_Store);
     RHS = CGF.EmitARCStoreWeak(LHS.getAddress(), RHS, Ignore);
     break;
 
-  // No reason to do any of these differently.
   case Qualifiers::OCL_None:
-  case Qualifiers::OCL_ExplicitNone:
     // __block variables need to have the rhs evaluated first, plus
     // this should improve codegen just a little.
     RHS = Visit(E->getRHS());
@@ -3366,9 +3359,11 @@ Value *ScalarExprEmitter::VisitVAArgExpr(VAArgExpr *VE) {
 
   llvm::Type *ArgTy = ConvertType(VE->getType());
 
-  // If EmitVAArg fails, we fall back to the LLVM instruction.
-  if (!ArgPtr.isValid())
-    return Builder.CreateVAArg(ArgValue.getPointer(), ArgTy);
+  // If EmitVAArg fails, emit an error.
+  if (!ArgPtr.isValid()) {
+    CGF.ErrorUnsupported(VE, "va_arg expression");
+    return llvm::UndefValue::get(ArgTy);
+  }
 
   // FIXME Volatility.
   llvm::Value *Val = Builder.CreateLoad(ArgPtr);
@@ -3388,50 +3383,48 @@ Value *ScalarExprEmitter::VisitBlockExpr(const BlockExpr *block) {
   return CGF.EmitBlockLiteral(block);
 }
 
+// Convert a vec3 to vec4, or vice versa.
+static Value *ConvertVec3AndVec4(CGBuilderTy &Builder, CodeGenFunction &CGF,
+                                 Value *Src, unsigned NumElementsDst) {
+  llvm::Value *UnV = llvm::UndefValue::get(Src->getType());
+  SmallVector<llvm::Constant*, 4> Args;
+  Args.push_back(Builder.getInt32(0));
+  Args.push_back(Builder.getInt32(1));
+  Args.push_back(Builder.getInt32(2));
+  if (NumElementsDst == 4)
+    Args.push_back(llvm::UndefValue::get(CGF.Int32Ty));
+  llvm::Constant *Mask = llvm::ConstantVector::get(Args);
+  return Builder.CreateShuffleVector(Src, UnV, Mask);
+}
+
 Value *ScalarExprEmitter::VisitAsTypeExpr(AsTypeExpr *E) {
   Value *Src  = CGF.EmitScalarExpr(E->getSrcExpr());
   llvm::Type *DstTy = ConvertType(E->getType());
 
-  // Going from vec4->vec3 or vec3->vec4 is a special case and requires
-  // a shuffle vector instead of a bitcast.
   llvm::Type *SrcTy = Src->getType();
-  if (isa<llvm::VectorType>(DstTy) && isa<llvm::VectorType>(SrcTy)) {
-    unsigned numElementsDst = cast<llvm::VectorType>(DstTy)->getNumElements();
-    unsigned numElementsSrc = cast<llvm::VectorType>(SrcTy)->getNumElements();
-    if ((numElementsDst == 3 && numElementsSrc == 4)
-        || (numElementsDst == 4 && numElementsSrc == 3)) {
-
-
-      // In the case of going from int4->float3, a bitcast is needed before
-      // doing a shuffle.
-      llvm::Type *srcElemTy =
-      cast<llvm::VectorType>(SrcTy)->getElementType();
-      llvm::Type *dstElemTy =
-      cast<llvm::VectorType>(DstTy)->getElementType();
-
-      if ((srcElemTy->isIntegerTy() && dstElemTy->isFloatTy())
-          || (srcElemTy->isFloatTy() && dstElemTy->isIntegerTy())) {
-        // Create a float type of the same size as the source or destination.
-        llvm::VectorType *newSrcTy = llvm::VectorType::get(dstElemTy,
-                                                                 numElementsSrc);
-
-        Src = Builder.CreateBitCast(Src, newSrcTy, "astypeCast");
-      }
-
-      llvm::Value *UnV = llvm::UndefValue::get(Src->getType());
-
-      SmallVector<llvm::Constant*, 3> Args;
-      Args.push_back(Builder.getInt32(0));
-      Args.push_back(Builder.getInt32(1));
-      Args.push_back(Builder.getInt32(2));
-
-      if (numElementsDst == 4)
-        Args.push_back(llvm::UndefValue::get(CGF.Int32Ty));
-
-      llvm::Constant *Mask = llvm::ConstantVector::get(Args);
+  unsigned NumElementsSrc = isa<llvm::VectorType>(SrcTy) ?
+    cast<llvm::VectorType>(SrcTy)->getNumElements() : 0;
+  unsigned NumElementsDst = isa<llvm::VectorType>(DstTy) ?
+    cast<llvm::VectorType>(DstTy)->getNumElements() : 0;
+
+  // Going from vec3 to non-vec3 is a special case and requires a shuffle
+  // vector to get a vec4, then a bitcast if the target type is different.
+  if (NumElementsSrc == 3 && NumElementsDst != 3) {
+    Src = ConvertVec3AndVec4(Builder, CGF, Src, 4);
+    Src = Builder.CreateBitCast(Src, DstTy);
+    Src->setName("astype");
+    return Src;
+  }
 
-      return Builder.CreateShuffleVector(Src, UnV, Mask, "astype");
-    }
+  // Going from non-vec3 to vec3 is a special case and requires a bitcast
+  // to vec4 if the original type is not vec4, then a shuffle vector to
+  // get a vec3.
+  if (NumElementsSrc != 3 && NumElementsDst == 3) {
+    auto Vec4Ty = llvm::VectorType::get(DstTy->getVectorElementType(), 4);
+    Src = Builder.CreateBitCast(Src, Vec4Ty);
+    Src = ConvertVec3AndVec4(Builder, CGF, Src, 3);
+    Src->setName("astype");
+    return Src;
   }
 
   return Builder.CreateBitCast(Src, DstTy, "astype");
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGLoopInfo.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGLoopInfo.cpp
index 0afe7dbb9f1d..51474f16a018 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGLoopInfo.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGLoopInfo.cpp
@@ -19,12 +19,15 @@
 using namespace clang::CodeGen;
 using namespace llvm;
 
-static MDNode *createMetadata(LLVMContext &Ctx, const LoopAttributes &Attrs) {
+static MDNode *createMetadata(LLVMContext &Ctx, const LoopAttributes &Attrs,
+                              llvm::DebugLoc Location) {
 
   if (!Attrs.IsParallel && Attrs.VectorizeWidth == 0 &&
       Attrs.InterleaveCount == 0 && Attrs.UnrollCount == 0 &&
       Attrs.VectorizeEnable == LoopAttributes::Unspecified &&
-      Attrs.UnrollEnable == LoopAttributes::Unspecified)
+      Attrs.UnrollEnable == LoopAttributes::Unspecified &&
+      Attrs.DistributeEnable == LoopAttributes::Unspecified &&
+      !Location)
     return nullptr;
 
   SmallVector<Metadata *, 4> Args;
@@ -32,6 +35,10 @@ static MDNode *createMetadata(LLVMContext &Ctx, const LoopAttributes &Attrs) {
   auto TempNode = MDNode::getTemporary(Ctx, None);
   Args.push_back(TempNode.get());
 
+  // If we have a valid debug location for the loop, add it.
+  if (Location)
+    Args.push_back(Location.getAsMDNode());
+
   // Setting vectorize.width
   if (Attrs.VectorizeWidth > 0) {
     Metadata *Vals[] = {MDString::get(Ctx, "llvm.loop.vectorize.width"),
@@ -78,6 +85,14 @@ static MDNode *createMetadata(LLVMContext &Ctx, const LoopAttributes &Attrs) {
     Args.push_back(MDNode::get(Ctx, Vals));
   }
 
+  if (Attrs.DistributeEnable != LoopAttributes::Unspecified) {
+    Metadata *Vals[] = {MDString::get(Ctx, "llvm.loop.distribute.enable"),
+                        ConstantAsMetadata::get(ConstantInt::get(
+                            Type::getInt1Ty(Ctx), (Attrs.DistributeEnable ==
+                                                   LoopAttributes::Enable)))};
+    Args.push_back(MDNode::get(Ctx, Vals));
+  }
+
   // Set the first operand to itself.
   MDNode *LoopID = MDNode::get(Ctx, Args);
   LoopID->replaceOperandWith(0, LoopID);
@@ -87,7 +102,8 @@ static MDNode *createMetadata(LLVMContext &Ctx, const LoopAttributes &Attrs) {
 LoopAttributes::LoopAttributes(bool IsParallel)
     : IsParallel(IsParallel), VectorizeEnable(LoopAttributes::Unspecified),
       UnrollEnable(LoopAttributes::Unspecified), VectorizeWidth(0),
-      InterleaveCount(0), UnrollCount(0) {}
+      InterleaveCount(0), UnrollCount(0),
+      DistributeEnable(LoopAttributes::Unspecified) {}
 
 void LoopAttributes::clear() {
   IsParallel = false;
@@ -98,37 +114,60 @@ void LoopAttributes::clear() {
   UnrollEnable = LoopAttributes::Unspecified;
 }
 
-LoopInfo::LoopInfo(BasicBlock *Header, const LoopAttributes &Attrs)
+LoopInfo::LoopInfo(BasicBlock *Header, const LoopAttributes &Attrs,
+                   llvm::DebugLoc Location)
     : LoopID(nullptr), Header(Header), Attrs(Attrs) {
-  LoopID = createMetadata(Header->getContext(), Attrs);
+  LoopID = createMetadata(Header->getContext(), Attrs, Location);
 }
 
-void LoopInfoStack::push(BasicBlock *Header) {
-  Active.push_back(LoopInfo(Header, StagedAttrs));
+void LoopInfoStack::push(BasicBlock *Header, llvm::DebugLoc Location) {
+  Active.push_back(LoopInfo(Header, StagedAttrs, Location));
   // Clear the attributes so nested loops do not inherit them.
   StagedAttrs.clear();
 }
 
 void LoopInfoStack::push(BasicBlock *Header, clang::ASTContext &Ctx,
-                         ArrayRef<const clang::Attr *> Attrs) {
+                         ArrayRef<const clang::Attr *> Attrs,
+                         llvm::DebugLoc Location) {
 
   // Identify loop hint attributes from Attrs.
   for (const auto *Attr : Attrs) {
     const LoopHintAttr *LH = dyn_cast<LoopHintAttr>(Attr);
+    const OpenCLUnrollHintAttr *OpenCLHint =
+        dyn_cast<OpenCLUnrollHintAttr>(Attr);
 
     // Skip non loop hint attributes
-    if (!LH)
+    if (!LH && !OpenCLHint) {
       continue;
+    }
 
-    auto *ValueExpr = LH->getValue();
+    LoopHintAttr::OptionType Option = LoopHintAttr::Unroll;
+    LoopHintAttr::LoopHintState State = LoopHintAttr::Disable;
     unsigned ValueInt = 1;
-    if (ValueExpr) {
-      llvm::APSInt ValueAPS = ValueExpr->EvaluateKnownConstInt(Ctx);
-      ValueInt = ValueAPS.getSExtValue();
-    }
+    // Translate opencl_unroll_hint attribute argument to
+    // equivalent LoopHintAttr enums.
+    // OpenCL v2.0 s6.11.5:  
+    // 0 - full unroll (no argument).
+    // 1 - disable unroll.
+    // other positive integer n - unroll by n.
+    if (OpenCLHint) {
+      ValueInt = OpenCLHint->getUnrollHint();
+      if (ValueInt == 0) {
+        State = LoopHintAttr::Full;
+      } else if (ValueInt != 1) {
+        Option = LoopHintAttr::UnrollCount;
+        State = LoopHintAttr::Numeric;
+      }
+    } else if (LH) {
+      auto *ValueExpr = LH->getValue();
+      if (ValueExpr) {
+        llvm::APSInt ValueAPS = ValueExpr->EvaluateKnownConstInt(Ctx);
+        ValueInt = ValueAPS.getSExtValue();
+      }
 
-    LoopHintAttr::OptionType Option = LH->getOption();
-    LoopHintAttr::LoopHintState State = LH->getState();
+      Option = LH->getOption();
+      State = LH->getState();
+    }
     switch (State) {
     case LoopHintAttr::Disable:
       switch (Option) {
@@ -143,6 +182,9 @@ void LoopInfoStack::push(BasicBlock *Header, clang::ASTContext &Ctx,
       case LoopHintAttr::Unroll:
         setUnrollState(LoopAttributes::Disable);
         break;
+      case LoopHintAttr::Distribute:
+        setDistributeState(false);
+        break;
       case LoopHintAttr::UnrollCount:
       case LoopHintAttr::VectorizeWidth:
       case LoopHintAttr::InterleaveCount:
@@ -159,6 +201,9 @@ void LoopInfoStack::push(BasicBlock *Header, clang::ASTContext &Ctx,
       case LoopHintAttr::Unroll:
         setUnrollState(LoopAttributes::Enable);
         break;
+      case LoopHintAttr::Distribute:
+        setDistributeState(true);
+        break;
       case LoopHintAttr::UnrollCount:
       case LoopHintAttr::VectorizeWidth:
       case LoopHintAttr::InterleaveCount:
@@ -178,6 +223,7 @@ void LoopInfoStack::push(BasicBlock *Header, clang::ASTContext &Ctx,
       case LoopHintAttr::UnrollCount:
       case LoopHintAttr::VectorizeWidth:
       case LoopHintAttr::InterleaveCount:
+      case LoopHintAttr::Distribute:
         llvm_unreachable("Options cannot be used to assume mem safety.");
         break;
       }
@@ -192,6 +238,7 @@ void LoopInfoStack::push(BasicBlock *Header, clang::ASTContext &Ctx,
       case LoopHintAttr::UnrollCount:
       case LoopHintAttr::VectorizeWidth:
       case LoopHintAttr::InterleaveCount:
+      case LoopHintAttr::Distribute:
         llvm_unreachable("Options cannot be used with 'full' hint.");
         break;
       }
@@ -210,6 +257,7 @@ void LoopInfoStack::push(BasicBlock *Header, clang::ASTContext &Ctx,
       case LoopHintAttr::Unroll:
       case LoopHintAttr::Vectorize:
       case LoopHintAttr::Interleave:
+      case LoopHintAttr::Distribute:
         llvm_unreachable("Options cannot be assigned a value.");
         break;
       }
@@ -218,7 +266,7 @@ void LoopInfoStack::push(BasicBlock *Header, clang::ASTContext &Ctx,
   }
 
   /// Stage the attributes.
-  push(Header);
+  push(Header, Location);
 }
 
 void LoopInfoStack::pop() {
@@ -237,7 +285,7 @@ void LoopInfoStack::InsertHelper(Instruction *I) const {
   if (TerminatorInst *TI = dyn_cast<TerminatorInst>(I)) {
     for (unsigned i = 0, ie = TI->getNumSuccessors(); i < ie; ++i)
       if (TI->getSuccessor(i) == L.getHeader()) {
-        TI->setMetadata("llvm.loop", L.getLoopID());
+        TI->setMetadata(llvm::LLVMContext::MD_loop, L.getLoopID());
         break;
       }
     return;
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGLoopInfo.h b/contrib/llvm/tools/clang/lib/CodeGen/CGLoopInfo.h
index ec3390677fa9..a0111edde5de 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGLoopInfo.h
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGLoopInfo.h
@@ -18,6 +18,7 @@
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/IR/DebugLoc.h"
 #include "llvm/IR/Value.h"
 #include "llvm/Support/Compiler.h"
 
@@ -57,13 +58,17 @@ struct LoopAttributes {
 
   /// \brief llvm.unroll.
   unsigned UnrollCount;
+
+  /// \brief Value for llvm.loop.distribute.enable metadata.
+  LVEnableState DistributeEnable;
 };
 
 /// \brief Information used when generating a structured loop.
 class LoopInfo {
 public:
   /// \brief Construct a new LoopInfo for the loop with entry Header.
-  LoopInfo(llvm::BasicBlock *Header, const LoopAttributes &Attrs);
+  LoopInfo(llvm::BasicBlock *Header, const LoopAttributes &Attrs,
+           llvm::DebugLoc Location);
 
   /// \brief Get the loop id metadata for this loop.
   llvm::MDNode *getLoopID() const { return LoopID; }
@@ -95,12 +100,14 @@ public:
 
   /// \brief Begin a new structured loop. The set of staged attributes will be
   /// applied to the loop and then cleared.
-  void push(llvm::BasicBlock *Header);
+  void push(llvm::BasicBlock *Header,
+            llvm::DebugLoc Location = llvm::DebugLoc());
 
   /// \brief Begin a new structured loop. Stage attributes from the Attrs list.
   /// The staged attributes are applied to the loop and then cleared.
   void push(llvm::BasicBlock *Header, clang::ASTContext &Ctx,
-            llvm::ArrayRef<const Attr *> Attrs);
+            llvm::ArrayRef<const Attr *> Attrs,
+            llvm::DebugLoc Location = llvm::DebugLoc());
 
   /// \brief End the current loop.
   void pop();
@@ -126,6 +133,12 @@ public:
         Enable ? LoopAttributes::Enable : LoopAttributes::Disable;
   }
 
+  /// \brief Set the next pushed loop as a distribution candidate.
+  void setDistributeState(bool Enable = true) {
+    StagedAttrs.DistributeEnable =
+        Enable ? LoopAttributes::Enable : LoopAttributes::Disable;
+  }
+
   /// \brief Set the next pushed loop unroll state.
   void setUnrollState(const LoopAttributes::LVEnableState &State) {
     StagedAttrs.UnrollEnable = State;
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGObjC.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGObjC.cpp
index 2d5991b71fca..db894ce67470 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGObjC.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGObjC.cpp
@@ -590,9 +590,7 @@ static void emitStructGetterCall(CodeGenFunction &CGF, ObjCIvarDecl *ivar,
   args.add(RValue::get(CGF.Builder.getInt1(hasStrong)), Context.BoolTy);
 
   llvm::Value *fn = CGF.CGM.getObjCRuntime().GetGetStructFunction();
-  CGF.EmitCall(CGF.getTypes().arrangeFreeFunctionCall(Context.VoidTy, args,
-                                                      FunctionType::ExtInfo(),
-                                                      RequiredArgs::All),
+  CGF.EmitCall(CGF.getTypes().arrangeBuiltinFunctionCall(Context.VoidTy, args),
                fn, ReturnValueSlot(), args);
 }
 
@@ -856,10 +854,8 @@ static void emitCPPObjectAtomicGetterCall(CodeGenFunction &CGF,
   
   llvm::Value *copyCppAtomicObjectFn = 
     CGF.CGM.getObjCRuntime().GetCppAtomicObjectGetFunction();
-  CGF.EmitCall(CGF.getTypes().arrangeFreeFunctionCall(CGF.getContext().VoidTy,
-                                                      args,
-                                                      FunctionType::ExtInfo(),
-                                                      RequiredArgs::All),
+  CGF.EmitCall(
+      CGF.getTypes().arrangeBuiltinFunctionCall(CGF.getContext().VoidTy, args),
                copyCppAtomicObjectFn, ReturnValueSlot(), args);
 }
 
@@ -901,21 +897,29 @@ CodeGenFunction::generateObjCGetterBody(const ObjCImplementationDecl *classImpl,
 
     // Currently, all atomic accesses have to be through integer
     // types, so there's no point in trying to pick a prettier type.
-    llvm::Type *bitcastType =
-      llvm::Type::getIntNTy(getLLVMContext(),
-                            getContext().toBits(strategy.getIvarSize()));
+    uint64_t ivarSize = getContext().toBits(strategy.getIvarSize());
+    llvm::Type *bitcastType = llvm::Type::getIntNTy(getLLVMContext(), ivarSize);
     bitcastType = bitcastType->getPointerTo(); // addrspace 0 okay
 
     // Perform an atomic load.  This does not impose ordering constraints.
     Address ivarAddr = LV.getAddress();
     ivarAddr = Builder.CreateBitCast(ivarAddr, bitcastType);
     llvm::LoadInst *load = Builder.CreateLoad(ivarAddr, "load");
-    load->setAtomic(llvm::Unordered);
+    load->setAtomic(llvm::AtomicOrdering::Unordered);
 
     // Store that value into the return address.  Doing this with a
     // bitcast is likely to produce some pretty ugly IR, but it's not
     // the *most* terrible thing in the world.
-    Builder.CreateStore(load, Builder.CreateBitCast(ReturnValue, bitcastType));
+    llvm::Type *retTy = ConvertType(getterMethod->getReturnType());
+    uint64_t retTySize = CGM.getDataLayout().getTypeSizeInBits(retTy);
+    llvm::Value *ivarVal = load;
+    if (ivarSize > retTySize) {
+      llvm::Type *newTy = llvm::Type::getIntNTy(getLLVMContext(), retTySize);
+      ivarVal = Builder.CreateTrunc(load, newTy);
+      bitcastType = newTy->getPointerTo();
+    }
+    Builder.CreateStore(ivarVal,
+                        Builder.CreateBitCast(ReturnValue, bitcastType));
 
     // Make sure we don't do an autorelease.
     AutoreleaseResult = false;
@@ -950,8 +954,7 @@ CodeGenFunction::generateObjCGetterBody(const ObjCImplementationDecl *classImpl,
     // runtime already should have computed it to build the function.
     llvm::Instruction *CallInstruction;
     RValue RV = EmitCall(
-        getTypes().arrangeFreeFunctionCall(
-            propType, args, FunctionType::ExtInfo(), RequiredArgs::All),
+        getTypes().arrangeBuiltinFunctionCall(propType, args),
         getPropertyFn, ReturnValueSlot(), args, CGCalleeInfo(),
         &CallInstruction);
     if (llvm::CallInst *call = dyn_cast<llvm::CallInst>(CallInstruction))
@@ -1015,7 +1018,6 @@ CodeGenFunction::generateObjCGetterBody(const ObjCImplementationDecl *classImpl,
           AutoreleaseResult = false;
         }
 
-        value = Builder.CreateBitCast(value, ConvertType(propType));
         value = Builder.CreateBitCast(
             value, ConvertType(GetterMethodDecl->getReturnType()));
       }
@@ -1067,10 +1069,8 @@ static void emitStructSetterCall(CodeGenFunction &CGF, ObjCMethodDecl *OMD,
   args.add(RValue::get(CGF.Builder.getFalse()), CGF.getContext().BoolTy);
 
   llvm::Value *copyStructFn = CGF.CGM.getObjCRuntime().GetSetStructFunction();
-  CGF.EmitCall(CGF.getTypes().arrangeFreeFunctionCall(CGF.getContext().VoidTy,
-                                                      args,
-                                                      FunctionType::ExtInfo(),
-                                                      RequiredArgs::All),
+  CGF.EmitCall(
+      CGF.getTypes().arrangeBuiltinFunctionCall(CGF.getContext().VoidTy, args),
                copyStructFn, ReturnValueSlot(), args);
 }
 
@@ -1105,10 +1105,8 @@ static void emitCPPObjectAtomicSetterCall(CodeGenFunction &CGF,
   
   llvm::Value *copyCppAtomicObjectFn = 
     CGF.CGM.getObjCRuntime().GetCppAtomicObjectSetFunction();
-  CGF.EmitCall(CGF.getTypes().arrangeFreeFunctionCall(CGF.getContext().VoidTy,
-                                                      args,
-                                                      FunctionType::ExtInfo(),
-                                                      RequiredArgs::All),
+  CGF.EmitCall(
+      CGF.getTypes().arrangeBuiltinFunctionCall(CGF.getContext().VoidTy, args),
                copyCppAtomicObjectFn, ReturnValueSlot(), args);
 }
 
@@ -1192,7 +1190,7 @@ CodeGenFunction::generateObjCSetterBody(const ObjCImplementationDecl *classImpl,
 
     // Perform an atomic store.  There are no memory ordering requirements.
     llvm::StoreInst *store = Builder.CreateStore(load, ivarAddr);
-    store->setAtomic(llvm::Unordered);
+    store->setAtomic(llvm::AtomicOrdering::Unordered);
     return;
   }
 
@@ -1238,9 +1236,7 @@ CodeGenFunction::generateObjCSetterBody(const ObjCImplementationDecl *classImpl,
     if (setOptimizedPropertyFn) {
       args.add(RValue::get(arg), getContext().getObjCIdType());
       args.add(RValue::get(ivarOffset), getContext().getPointerDiffType());
-      EmitCall(getTypes().arrangeFreeFunctionCall(getContext().VoidTy, args,
-                                                  FunctionType::ExtInfo(),
-                                                  RequiredArgs::All),
+      EmitCall(getTypes().arrangeBuiltinFunctionCall(getContext().VoidTy, args),
                setOptimizedPropertyFn, ReturnValueSlot(), args);
     } else {
       args.add(RValue::get(ivarOffset), getContext().getPointerDiffType());
@@ -1251,9 +1247,7 @@ CodeGenFunction::generateObjCSetterBody(const ObjCImplementationDecl *classImpl,
                getContext().BoolTy);
       // FIXME: We shouldn't need to get the function info here, the runtime
       // already should have computed it to build the function.
-      EmitCall(getTypes().arrangeFreeFunctionCall(getContext().VoidTy, args,
-                                                  FunctionType::ExtInfo(),
-                                                  RequiredArgs::All),
+      EmitCall(getTypes().arrangeBuiltinFunctionCall(getContext().VoidTy, args),
                setPropertyFn, ReturnValueSlot(), args);
     }
     
@@ -1498,6 +1492,8 @@ void CodeGenFunction::EmitObjCForCollectionStmt(const ObjCForCollectionStmt &S){
                                       ArrayType::Normal, 0);
   Address ItemsPtr = CreateMemTemp(ItemsTy, "items.ptr");
 
+  RunCleanupsScope ForScope(*this);
+
   // Emit the collection pointer.  In ARC, we do a retain.
   llvm::Value *Collection;
   if (getLangOpts().ObjCAutoRefCount) {
@@ -1610,9 +1606,8 @@ void CodeGenFunction::EmitObjCForCollectionStmt(const ObjCForCollectionStmt &S){
   Args2.add(RValue::get(V), getContext().getObjCIdType());
   // FIXME: We shouldn't need to get the function info here, the runtime already
   // should have computed it to build the function.
-  EmitCall(CGM.getTypes().arrangeFreeFunctionCall(getContext().VoidTy, Args2,
-                                                  FunctionType::ExtInfo(),
-                                                  RequiredArgs::All),
+  EmitCall(
+          CGM.getTypes().arrangeBuiltinFunctionCall(getContext().VoidTy, Args2),
            EnumerationMutationFn, ReturnValueSlot(), Args2);
 
   // Otherwise, or if the mutation function returns, just continue.
@@ -1739,10 +1734,7 @@ void CodeGenFunction::EmitObjCForCollectionStmt(const ObjCForCollectionStmt &S){
   if (DI)
     DI->EmitLexicalBlockEnd(Builder, S.getSourceRange().getEnd());
 
-  // Leave the cleanup we entered in ARC.
-  if (getLangOpts().ObjCAutoRefCount)
-    PopCleanupBlock();
-
+  ForScope.ForceCleanup();
   EmitBlock(LoopEnd.getBlock());
 }
 
@@ -1980,20 +1972,14 @@ llvm::Value *CodeGenFunction::EmitARCRetainBlock(llvm::Value *value,
   return result;
 }
 
-/// Retain the given object which is the result of a function call.
-///   call i8* \@objc_retainAutoreleasedReturnValue(i8* %value)
-///
-/// Yes, this function name is one character away from a different
-/// call with completely different semantics.
-llvm::Value *
-CodeGenFunction::EmitARCRetainAutoreleasedReturnValue(llvm::Value *value) {
+static void emitAutoreleasedReturnValueMarker(CodeGenFunction &CGF) {
   // Fetch the void(void) inline asm which marks that we're going to
-  // retain the autoreleased return value.
+  // do something with the autoreleased return value.
   llvm::InlineAsm *&marker
-    = CGM.getObjCEntrypoints().retainAutoreleasedReturnValueMarker;
+    = CGF.CGM.getObjCEntrypoints().retainAutoreleasedReturnValueMarker;
   if (!marker) {
     StringRef assembly
-      = CGM.getTargetCodeGenInfo()
+      = CGF.CGM.getTargetCodeGenInfo()
            .getARCRetainAutoreleasedReturnValueMarker();
 
     // If we have an empty assembly string, there's nothing to do.
@@ -2001,9 +1987,9 @@ CodeGenFunction::EmitARCRetainAutoreleasedReturnValue(llvm::Value *value) {
 
     // Otherwise, at -O0, build an inline asm that we're going to call
     // in a moment.
-    } else if (CGM.getCodeGenOpts().OptimizationLevel == 0) {
+    } else if (CGF.CGM.getCodeGenOpts().OptimizationLevel == 0) {
       llvm::FunctionType *type =
-        llvm::FunctionType::get(VoidTy, /*variadic*/false);
+        llvm::FunctionType::get(CGF.VoidTy, /*variadic*/false);
       
       marker = llvm::InlineAsm::get(type, assembly, "", /*sideeffects*/ true);
 
@@ -2012,25 +1998,50 @@ CodeGenFunction::EmitARCRetainAutoreleasedReturnValue(llvm::Value *value) {
     // optimizer to pick up.
     } else {
       llvm::NamedMDNode *metadata =
-        CGM.getModule().getOrInsertNamedMetadata(
+        CGF.CGM.getModule().getOrInsertNamedMetadata(
                             "clang.arc.retainAutoreleasedReturnValueMarker");
       assert(metadata->getNumOperands() <= 1);
       if (metadata->getNumOperands() == 0) {
-        metadata->addOperand(llvm::MDNode::get(
-            getLLVMContext(), llvm::MDString::get(getLLVMContext(), assembly)));
+        auto &ctx = CGF.getLLVMContext();
+        metadata->addOperand(llvm::MDNode::get(ctx,
+                                     llvm::MDString::get(ctx, assembly)));
       }
     }
   }
 
   // Call the marker asm if we made one, which we do only at -O0.
   if (marker)
-    Builder.CreateCall(marker);
+    CGF.Builder.CreateCall(marker);
+}
 
+/// Retain the given object which is the result of a function call.
+///   call i8* \@objc_retainAutoreleasedReturnValue(i8* %value)
+///
+/// Yes, this function name is one character away from a different
+/// call with completely different semantics.
+llvm::Value *
+CodeGenFunction::EmitARCRetainAutoreleasedReturnValue(llvm::Value *value) {
+  emitAutoreleasedReturnValueMarker(*this);
   return emitARCValueOperation(*this, value,
-                     CGM.getObjCEntrypoints().objc_retainAutoreleasedReturnValue,
+              CGM.getObjCEntrypoints().objc_retainAutoreleasedReturnValue,
                                "objc_retainAutoreleasedReturnValue");
 }
 
+/// Claim a possibly-autoreleased return value at +0.  This is only
+/// valid to do in contexts which do not rely on the retain to keep
+/// the object valid for for all of its uses; for example, when
+/// the value is ignored, or when it is being assigned to an
+/// __unsafe_unretained variable.
+///
+///   call i8* \@objc_unsafeClaimAutoreleasedReturnValue(i8* %value)
+llvm::Value *
+CodeGenFunction::EmitARCUnsafeClaimAutoreleasedReturnValue(llvm::Value *value) {
+  emitAutoreleasedReturnValueMarker(*this);
+  return emitARCValueOperation(*this, value,
+              CGM.getObjCEntrypoints().objc_unsafeClaimAutoreleasedReturnValue,
+                               "objc_unsafeClaimAutoreleasedReturnValue");
+}
+
 /// Release the given object.
 ///   call void \@objc_release(i8* %value)
 void CodeGenFunction::EmitARCRelease(llvm::Value *value,
@@ -2446,25 +2457,22 @@ static TryEmitResult tryEmitARCRetainLoadOfScalar(CodeGenFunction &CGF,
   return tryEmitARCRetainLoadOfScalar(CGF, CGF.EmitLValue(e), type);
 }
 
-static llvm::Value *emitARCRetainAfterCall(CodeGenFunction &CGF,
-                                           llvm::Value *value);
+typedef llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
+                                         llvm::Value *value)>
+  ValueTransform;
 
-/// Given that the given expression is some sort of call (which does
-/// not return retained), emit a retain following it.
-static llvm::Value *emitARCRetainCall(CodeGenFunction &CGF, const Expr *e) {
-  llvm::Value *value = CGF.EmitScalarExpr(e);
-  return emitARCRetainAfterCall(CGF, value);
-}
-
-static llvm::Value *emitARCRetainAfterCall(CodeGenFunction &CGF,
-                                           llvm::Value *value) {
+/// Insert code immediately after a call.
+static llvm::Value *emitARCOperationAfterCall(CodeGenFunction &CGF,
+                                              llvm::Value *value,
+                                              ValueTransform doAfterCall,
+                                              ValueTransform doFallback) {
   if (llvm::CallInst *call = dyn_cast<llvm::CallInst>(value)) {
     CGBuilderTy::InsertPoint ip = CGF.Builder.saveIP();
 
     // Place the retain immediately following the call.
     CGF.Builder.SetInsertPoint(call->getParent(),
                                ++llvm::BasicBlock::iterator(call));
-    value = CGF.EmitARCRetainAutoreleasedReturnValue(value);
+    value = doAfterCall(CGF, value);
 
     CGF.Builder.restoreIP(ip);
     return value;
@@ -2474,7 +2482,7 @@ static llvm::Value *emitARCRetainAfterCall(CodeGenFunction &CGF,
     // Place the retain at the beginning of the normal destination block.
     llvm::BasicBlock *BB = invoke->getNormalDest();
     CGF.Builder.SetInsertPoint(BB, BB->begin());
-    value = CGF.EmitARCRetainAutoreleasedReturnValue(value);
+    value = doAfterCall(CGF, value);
 
     CGF.Builder.restoreIP(ip);
     return value;
@@ -2483,7 +2491,7 @@ static llvm::Value *emitARCRetainAfterCall(CodeGenFunction &CGF,
   // the operand.
   } else if (llvm::BitCastInst *bitcast = dyn_cast<llvm::BitCastInst>(value)) {
     llvm::Value *operand = bitcast->getOperand(0);
-    operand = emitARCRetainAfterCall(CGF, operand);
+    operand = emitARCOperationAfterCall(CGF, operand, doAfterCall, doFallback);
     bitcast->setOperand(0, operand);
     return bitcast;
 
@@ -2491,7 +2499,46 @@ static llvm::Value *emitARCRetainAfterCall(CodeGenFunction &CGF,
   } else {
     // Retain using the non-block variant: we never need to do a copy
     // of a block that's been returned to us.
-    return CGF.EmitARCRetainNonBlock(value);
+    return doFallback(CGF, value);
+  }
+}
+
+/// Given that the given expression is some sort of call (which does
+/// not return retained), emit a retain following it.
+static llvm::Value *emitARCRetainCallResult(CodeGenFunction &CGF,
+                                            const Expr *e) {
+  llvm::Value *value = CGF.EmitScalarExpr(e);
+  return emitARCOperationAfterCall(CGF, value,
+           [](CodeGenFunction &CGF, llvm::Value *value) {
+             return CGF.EmitARCRetainAutoreleasedReturnValue(value);
+           },
+           [](CodeGenFunction &CGF, llvm::Value *value) {
+             return CGF.EmitARCRetainNonBlock(value);
+           });
+}
+
+/// Given that the given expression is some sort of call (which does
+/// not return retained), perform an unsafeClaim following it.
+static llvm::Value *emitARCUnsafeClaimCallResult(CodeGenFunction &CGF,
+                                                 const Expr *e) {
+  llvm::Value *value = CGF.EmitScalarExpr(e);
+  return emitARCOperationAfterCall(CGF, value,
+           [](CodeGenFunction &CGF, llvm::Value *value) {
+             return CGF.EmitARCUnsafeClaimAutoreleasedReturnValue(value);
+           },
+           [](CodeGenFunction &CGF, llvm::Value *value) {
+             return value;
+           });
+}
+
+llvm::Value *CodeGenFunction::EmitARCReclaimReturnedObject(const Expr *E,
+                                                      bool allowUnsafeClaim) {
+  if (allowUnsafeClaim &&
+      CGM.getLangOpts().ObjCRuntime.hasARCUnsafeClaimAutoreleasedReturnValue()) {
+    return emitARCUnsafeClaimCallResult(*this, E);
+  } else {
+    llvm::Value *value = emitARCRetainCallResult(*this, E);
+    return EmitObjCConsumeObject(E->getType(), value);
   }
 }
 
@@ -2531,17 +2578,52 @@ static bool shouldEmitSeparateBlockRetain(const Expr *e) {
   return true;
 }
 
-/// Try to emit a PseudoObjectExpr at +1.
+namespace {
+/// A CRTP base class for emitting expressions of retainable object
+/// pointer type in ARC.
+template <typename Impl, typename Result> class ARCExprEmitter {
+protected:
+  CodeGenFunction &CGF;
+  Impl &asImpl() { return *static_cast<Impl*>(this); }
+
+  ARCExprEmitter(CodeGenFunction &CGF) : CGF(CGF) {}
+
+public:
+  Result visit(const Expr *e);
+  Result visitCastExpr(const CastExpr *e);
+  Result visitPseudoObjectExpr(const PseudoObjectExpr *e);
+  Result visitBinaryOperator(const BinaryOperator *e);
+  Result visitBinAssign(const BinaryOperator *e);
+  Result visitBinAssignUnsafeUnretained(const BinaryOperator *e);
+  Result visitBinAssignAutoreleasing(const BinaryOperator *e);
+  Result visitBinAssignWeak(const BinaryOperator *e);
+  Result visitBinAssignStrong(const BinaryOperator *e);
+
+  // Minimal implementation:
+  //   Result visitLValueToRValue(const Expr *e)
+  //   Result visitConsumeObject(const Expr *e)
+  //   Result visitExtendBlockObject(const Expr *e)
+  //   Result visitReclaimReturnedObject(const Expr *e)
+  //   Result visitCall(const Expr *e)
+  //   Result visitExpr(const Expr *e)
+  //
+  //   Result emitBitCast(Result result, llvm::Type *resultType)
+  //   llvm::Value *getValueOfResult(Result result)
+};
+}
+
+/// Try to emit a PseudoObjectExpr under special ARC rules.
 ///
 /// This massively duplicates emitPseudoObjectRValue.
-static TryEmitResult tryEmitARCRetainPseudoObject(CodeGenFunction &CGF,
-                                                  const PseudoObjectExpr *E) {
+template <typename Impl, typename Result>
+Result
+ARCExprEmitter<Impl,Result>::visitPseudoObjectExpr(const PseudoObjectExpr *E) {
   SmallVector<CodeGenFunction::OpaqueValueMappingData, 4> opaques;
 
   // Find the result expression.
   const Expr *resultExpr = E->getResultExpr();
   assert(resultExpr);
-  TryEmitResult result;
+  Result result;
 
   for (PseudoObjectExpr::const_semantics_iterator
          i = E->semantics_begin(), e = E->semantics_end(); i != e; ++i) {
@@ -2557,8 +2639,9 @@ static TryEmitResult tryEmitARCRetainPseudoObject(CodeGenFunction &CGF,
       // expression, try to evaluate the source as +1.
       if (ov == resultExpr) {
         assert(!OVMA::shouldBindAsLValue(ov));
-        result = tryEmitARCRetainScalarExpr(CGF, ov->getSourceExpr());
-        opaqueData = OVMA::bind(CGF, ov, RValue::get(result.getPointer()));
+        result = asImpl().visit(ov->getSourceExpr());
+        opaqueData = OVMA::bind(CGF, ov,
+                            RValue::get(asImpl().getValueOfResult(result)));
 
       // Otherwise, just bind it.
       } else {
@@ -2569,7 +2652,7 @@ static TryEmitResult tryEmitARCRetainPseudoObject(CodeGenFunction &CGF,
     // Otherwise, if the expression is the result, evaluate it
     // and remember the result.
     } else if (semantic == resultExpr) {
-      result = tryEmitARCRetainScalarExpr(CGF, semantic);
+      result = asImpl().visit(semantic);
 
     // Otherwise, evaluate the expression in an ignored context.
     } else {
@@ -2584,146 +2667,240 @@ static TryEmitResult tryEmitARCRetainPseudoObject(CodeGenFunction &CGF,
   return result;
 }
 
-static TryEmitResult
-tryEmitARCRetainScalarExpr(CodeGenFunction &CGF, const Expr *e) {
+template <typename Impl, typename Result>
+Result ARCExprEmitter<Impl,Result>::visitCastExpr(const CastExpr *e) {
+  switch (e->getCastKind()) {
+
+  // No-op casts don't change the type, so we just ignore them.
+  case CK_NoOp:
+    return asImpl().visit(e->getSubExpr());
+
+  // These casts can change the type.
+  case CK_CPointerToObjCPointerCast:
+  case CK_BlockPointerToObjCPointerCast:
+  case CK_AnyPointerToBlockPointerCast:
+  case CK_BitCast: {
+    llvm::Type *resultType = CGF.ConvertType(e->getType());
+    assert(e->getSubExpr()->getType()->hasPointerRepresentation());
+    Result result = asImpl().visit(e->getSubExpr());
+    return asImpl().emitBitCast(result, resultType);
+  }
+
+  // Handle some casts specially.
+  case CK_LValueToRValue:
+    return asImpl().visitLValueToRValue(e->getSubExpr());
+  case CK_ARCConsumeObject:
+    return asImpl().visitConsumeObject(e->getSubExpr());
+  case CK_ARCExtendBlockObject:
+    return asImpl().visitExtendBlockObject(e->getSubExpr());
+  case CK_ARCReclaimReturnedObject:
+    return asImpl().visitReclaimReturnedObject(e->getSubExpr());
+
+  // Otherwise, use the default logic.
+  default:
+    return asImpl().visitExpr(e);
+  }
+}
+
+template <typename Impl, typename Result>
+Result
+ARCExprEmitter<Impl,Result>::visitBinaryOperator(const BinaryOperator *e) {
+  switch (e->getOpcode()) {
+  case BO_Comma:
+    CGF.EmitIgnoredExpr(e->getLHS());
+    CGF.EnsureInsertPoint();
+    return asImpl().visit(e->getRHS());
+
+  case BO_Assign:
+    return asImpl().visitBinAssign(e);
+
+  default:
+    return asImpl().visitExpr(e);
+  }
+}
+
+template <typename Impl, typename Result>
+Result ARCExprEmitter<Impl,Result>::visitBinAssign(const BinaryOperator *e) {
+  switch (e->getLHS()->getType().getObjCLifetime()) {
+  case Qualifiers::OCL_ExplicitNone:
+    return asImpl().visitBinAssignUnsafeUnretained(e);
+
+  case Qualifiers::OCL_Weak:
+    return asImpl().visitBinAssignWeak(e);
+
+  case Qualifiers::OCL_Autoreleasing:
+    return asImpl().visitBinAssignAutoreleasing(e);
+
+  case Qualifiers::OCL_Strong:
+    return asImpl().visitBinAssignStrong(e);
+
+  case Qualifiers::OCL_None:
+    return asImpl().visitExpr(e);
+  }
+  llvm_unreachable("bad ObjC ownership qualifier");
+}
+
+/// The default rule for __unsafe_unretained emits the RHS recursively,
+/// stores into the unsafe variable, and propagates the result outward.
+template <typename Impl, typename Result>
+Result ARCExprEmitter<Impl,Result>::
+                    visitBinAssignUnsafeUnretained(const BinaryOperator *e) {
+  // Recursively emit the RHS.
+  // For __block safety, do this before emitting the LHS.
+  Result result = asImpl().visit(e->getRHS());
+
+  // Perform the store.
+  LValue lvalue =
+    CGF.EmitCheckedLValue(e->getLHS(), CodeGenFunction::TCK_Store);
+  CGF.EmitStoreThroughLValue(RValue::get(asImpl().getValueOfResult(result)),
+                             lvalue);
+
+  return result;
+}
+
+template <typename Impl, typename Result>
+Result
+ARCExprEmitter<Impl,Result>::visitBinAssignAutoreleasing(const BinaryOperator *e) {
+  return asImpl().visitExpr(e);
+}
+
+template <typename Impl, typename Result>
+Result
+ARCExprEmitter<Impl,Result>::visitBinAssignWeak(const BinaryOperator *e) {
+  return asImpl().visitExpr(e);
+}
+
+template <typename Impl, typename Result>
+Result
+ARCExprEmitter<Impl,Result>::visitBinAssignStrong(const BinaryOperator *e) {
+  return asImpl().visitExpr(e);
+}
+
+/// The general expression-emission logic.
+template <typename Impl, typename Result>
+Result ARCExprEmitter<Impl,Result>::visit(const Expr *e) {
   // We should *never* see a nested full-expression here, because if
   // we fail to emit at +1, our caller must not retain after we close
-  // out the full-expression.
+  // out the full-expression.  This isn't as important in the unsafe
+  // emitter.
   assert(!isa<ExprWithCleanups>(e));
 
-  // The desired result type, if it differs from the type of the
-  // ultimate opaque expression.
-  llvm::Type *resultType = nullptr;
-
-  while (true) {
-    e = e->IgnoreParens();
-
-    // There's a break at the end of this if-chain;  anything
-    // that wants to keep looping has to explicitly continue.
-    if (const CastExpr *ce = dyn_cast<CastExpr>(e)) {
-      switch (ce->getCastKind()) {
-      // No-op casts don't change the type, so we just ignore them.
-      case CK_NoOp:
-        e = ce->getSubExpr();
-        continue;
-
-      case CK_LValueToRValue: {
-        TryEmitResult loadResult
-          = tryEmitARCRetainLoadOfScalar(CGF, ce->getSubExpr());
-        if (resultType) {
-          llvm::Value *value = loadResult.getPointer();
-          value = CGF.Builder.CreateBitCast(value, resultType);
-          loadResult.setPointer(value);
-        }
-        return loadResult;
-      }
+  // Look through parens, __extension__, generic selection, etc.
+  e = e->IgnoreParens();
 
-      // These casts can change the type, so remember that and
-      // soldier on.  We only need to remember the outermost such
-      // cast, though.
-      case CK_CPointerToObjCPointerCast:
-      case CK_BlockPointerToObjCPointerCast:
-      case CK_AnyPointerToBlockPointerCast:
-      case CK_BitCast:
-        if (!resultType)
-          resultType = CGF.ConvertType(ce->getType());
-        e = ce->getSubExpr();
-        assert(e->getType()->hasPointerRepresentation());
-        continue;
-
-      // For consumptions, just emit the subexpression and thus elide
-      // the retain/release pair.
-      case CK_ARCConsumeObject: {
-        llvm::Value *result = CGF.EmitScalarExpr(ce->getSubExpr());
-        if (resultType) result = CGF.Builder.CreateBitCast(result, resultType);
-        return TryEmitResult(result, true);
-      }
+  // Handle certain kinds of casts.
+  if (const CastExpr *ce = dyn_cast<CastExpr>(e)) {
+    return asImpl().visitCastExpr(ce);
 
-      // Block extends are net +0.  Naively, we could just recurse on
-      // the subexpression, but actually we need to ensure that the
-      // value is copied as a block, so there's a little filter here.
-      case CK_ARCExtendBlockObject: {
-        llvm::Value *result; // will be a +0 value
+  // Handle the comma operator.
+  } else if (auto op = dyn_cast<BinaryOperator>(e)) {
+    return asImpl().visitBinaryOperator(op);
 
-        // If we can't safely assume the sub-expression will produce a
-        // block-copied value, emit the sub-expression at +0.
-        if (shouldEmitSeparateBlockRetain(ce->getSubExpr())) {
-          result = CGF.EmitScalarExpr(ce->getSubExpr());
+  // TODO: handle conditional operators here
 
-        // Otherwise, try to emit the sub-expression at +1 recursively.
-        } else {
-          TryEmitResult subresult
-            = tryEmitARCRetainScalarExpr(CGF, ce->getSubExpr());
-          result = subresult.getPointer();
-
-          // If that produced a retained value, just use that,
-          // possibly casting down.
-          if (subresult.getInt()) {
-            if (resultType)
-              result = CGF.Builder.CreateBitCast(result, resultType);
-            return TryEmitResult(result, true);
-          }
+  // For calls and message sends, use the retained-call logic.
+  // Delegate inits are a special case in that they're the only
+  // returns-retained expression that *isn't* surrounded by
+  // a consume.
+  } else if (isa<CallExpr>(e) ||
+             (isa<ObjCMessageExpr>(e) &&
+              !cast<ObjCMessageExpr>(e)->isDelegateInitCall())) {
+    return asImpl().visitCall(e);
 
-          // Otherwise it's +0.
-        }
+  // Look through pseudo-object expressions.
+  } else if (const PseudoObjectExpr *pseudo = dyn_cast<PseudoObjectExpr>(e)) {
+    return asImpl().visitPseudoObjectExpr(pseudo);
+  }
 
-        // Retain the object as a block, then cast down.
-        result = CGF.EmitARCRetainBlock(result, /*mandatory*/ true);
-        if (resultType) result = CGF.Builder.CreateBitCast(result, resultType);
-        return TryEmitResult(result, true);
-      }
+  return asImpl().visitExpr(e);
+}
 
-      // For reclaims, emit the subexpression as a retained call and
-      // skip the consumption.
-      case CK_ARCReclaimReturnedObject: {
-        llvm::Value *result = emitARCRetainCall(CGF, ce->getSubExpr());
-        if (resultType) result = CGF.Builder.CreateBitCast(result, resultType);
-        return TryEmitResult(result, true);
-      }
+namespace {
 
-      default:
-        break;
-      }
+/// An emitter for +1 results.
+struct ARCRetainExprEmitter :
+  public ARCExprEmitter<ARCRetainExprEmitter, TryEmitResult> {
 
-    // Skip __extension__.
-    } else if (const UnaryOperator *op = dyn_cast<UnaryOperator>(e)) {
-      if (op->getOpcode() == UO_Extension) {
-        e = op->getSubExpr();
-        continue;
-      }
+  ARCRetainExprEmitter(CodeGenFunction &CGF) : ARCExprEmitter(CGF) {}
+
+  llvm::Value *getValueOfResult(TryEmitResult result) {
+    return result.getPointer();
+  }
 
-    // For calls and message sends, use the retained-call logic.
-    // Delegate inits are a special case in that they're the only
-    // returns-retained expression that *isn't* surrounded by
-    // a consume.
-    } else if (isa<CallExpr>(e) ||
-               (isa<ObjCMessageExpr>(e) &&
-                !cast<ObjCMessageExpr>(e)->isDelegateInitCall())) {
-      llvm::Value *result = emitARCRetainCall(CGF, e);
-      if (resultType) result = CGF.Builder.CreateBitCast(result, resultType);
-      return TryEmitResult(result, true);
-
-    // Look through pseudo-object expressions.
-    } else if (const PseudoObjectExpr *pseudo = dyn_cast<PseudoObjectExpr>(e)) {
-      TryEmitResult result
-        = tryEmitARCRetainPseudoObject(CGF, pseudo);
-      if (resultType) {
-        llvm::Value *value = result.getPointer();
-        value = CGF.Builder.CreateBitCast(value, resultType);
-        result.setPointer(value);
+  TryEmitResult emitBitCast(TryEmitResult result, llvm::Type *resultType) {
+    llvm::Value *value = result.getPointer();
+    value = CGF.Builder.CreateBitCast(value, resultType);
+    result.setPointer(value);
+    return result;
+  }
+
+  TryEmitResult visitLValueToRValue(const Expr *e) {
+    return tryEmitARCRetainLoadOfScalar(CGF, e);
+  }
+
+  /// For consumptions, just emit the subexpression and thus elide
+  /// the retain/release pair.
+  TryEmitResult visitConsumeObject(const Expr *e) {
+    llvm::Value *result = CGF.EmitScalarExpr(e);
+    return TryEmitResult(result, true);
+  }
+
+  /// Block extends are net +0.  Naively, we could just recurse on
+  /// the subexpression, but actually we need to ensure that the
+  /// value is copied as a block, so there's a little filter here.
+  TryEmitResult visitExtendBlockObject(const Expr *e) {
+    llvm::Value *result; // will be a +0 value
+
+    // If we can't safely assume the sub-expression will produce a
+    // block-copied value, emit the sub-expression at +0.
+    if (shouldEmitSeparateBlockRetain(e)) {
+      result = CGF.EmitScalarExpr(e);
+
+    // Otherwise, try to emit the sub-expression at +1 recursively.
+    } else {
+      TryEmitResult subresult = asImpl().visit(e);
+
+      // If that produced a retained value, just use that.
+      if (subresult.getInt()) {
+        return subresult;
       }
-      return result;
+
+      // Otherwise it's +0.
+      result = subresult.getPointer();
     }
 
-    // Conservatively halt the search at any other expression kind.
-    break;
+    // Retain the object as a block.
+    result = CGF.EmitARCRetainBlock(result, /*mandatory*/ true);
+    return TryEmitResult(result, true);
   }
 
-  // We didn't find an obvious production, so emit what we've got and
-  // tell the caller that we didn't manage to retain.
-  llvm::Value *result = CGF.EmitScalarExpr(e);
-  if (resultType) result = CGF.Builder.CreateBitCast(result, resultType);
-  return TryEmitResult(result, false);
+  /// For reclaims, emit the subexpression as a retained call and
+  /// skip the consumption.
+  TryEmitResult visitReclaimReturnedObject(const Expr *e) {
+    llvm::Value *result = emitARCRetainCallResult(CGF, e);
+    return TryEmitResult(result, true);
+  }
+
+  /// When we have an undecorated call, retroactively do a claim.
+  TryEmitResult visitCall(const Expr *e) {
+    llvm::Value *result = emitARCRetainCallResult(CGF, e);
+    return TryEmitResult(result, true);
+  }
+
+  // TODO: maybe special-case visitBinAssignWeak?
+
+  TryEmitResult visitExpr(const Expr *e) {
+    // We didn't find an obvious production, so emit what we've got and
+    // tell the caller that we didn't manage to retain.
+    llvm::Value *result = CGF.EmitScalarExpr(e);
+    return TryEmitResult(result, false);
+  }
+};
+}
+
+static TryEmitResult
+tryEmitARCRetainScalarExpr(CodeGenFunction &CGF, const Expr *e) {
+  return ARCRetainExprEmitter(CGF).visit(e);
 }
 
 static llvm::Value *emitARCRetainLoadOfScalar(CodeGenFunction &CGF,
@@ -2807,6 +2984,96 @@ llvm::Value *CodeGenFunction::EmitObjCThrowOperand(const Expr *expr) {
   return EmitScalarExpr(expr);
 }
 
+namespace {
+
+/// An emitter for assigning into an __unsafe_unretained context.
+struct ARCUnsafeUnretainedExprEmitter :
+  public ARCExprEmitter<ARCUnsafeUnretainedExprEmitter, llvm::Value*> {
+
+  ARCUnsafeUnretainedExprEmitter(CodeGenFunction &CGF) : ARCExprEmitter(CGF) {}
+
+  llvm::Value *getValueOfResult(llvm::Value *value) {
+    return value;
+  }
+
+  llvm::Value *emitBitCast(llvm::Value *value, llvm::Type *resultType) {
+    return CGF.Builder.CreateBitCast(value, resultType);
+  }
+
+  llvm::Value *visitLValueToRValue(const Expr *e) {
+    return CGF.EmitScalarExpr(e);
+  }
+
+  /// For consumptions, just emit the subexpression and perform the
+  /// consumption like normal.
+  llvm::Value *visitConsumeObject(const Expr *e) {
+    llvm::Value *value = CGF.EmitScalarExpr(e);
+    return CGF.EmitObjCConsumeObject(e->getType(), value);
+  }
+
+  /// No special logic for block extensions.  (This probably can't
+  /// actually happen in this emitter, though.)
+  llvm::Value *visitExtendBlockObject(const Expr *e) {
+    return CGF.EmitARCExtendBlockObject(e);
+  }
+
+  /// For reclaims, perform an unsafeClaim if that's enabled.
+  llvm::Value *visitReclaimReturnedObject(const Expr *e) {
+    return CGF.EmitARCReclaimReturnedObject(e, /*unsafe*/ true);
+  }
+
+  /// When we have an undecorated call, just emit it without adding
+  /// the unsafeClaim.
+  llvm::Value *visitCall(const Expr *e) {
+    return CGF.EmitScalarExpr(e);
+  }
+
+  /// Just do normal scalar emission in the default case.
+  llvm::Value *visitExpr(const Expr *e) {
+    return CGF.EmitScalarExpr(e);
+  }
+};
+}
+
+static llvm::Value *emitARCUnsafeUnretainedScalarExpr(CodeGenFunction &CGF,
+                                                      const Expr *e) {
+  return ARCUnsafeUnretainedExprEmitter(CGF).visit(e);
+}
+
+/// EmitARCUnsafeUnretainedScalarExpr - Semantically equivalent to
+/// immediately releasing the resut of EmitARCRetainScalarExpr, but
+/// avoiding any spurious retains, including by performing reclaims
+/// with objc_unsafeClaimAutoreleasedReturnValue.
+llvm::Value *CodeGenFunction::EmitARCUnsafeUnretainedScalarExpr(const Expr *e) {
+  // Look through full-expressions.
+  if (const ExprWithCleanups *cleanups = dyn_cast<ExprWithCleanups>(e)) {
+    enterFullExpression(cleanups);
+    RunCleanupsScope scope(*this);
+    return emitARCUnsafeUnretainedScalarExpr(*this, cleanups->getSubExpr());
+  }
+
+  return emitARCUnsafeUnretainedScalarExpr(*this, e);
+}
+
+std::pair<LValue,llvm::Value*>
+CodeGenFunction::EmitARCStoreUnsafeUnretained(const BinaryOperator *e,
+                                              bool ignored) {
+  // Evaluate the RHS first.  If we're ignoring the result, assume
+  // that we can emit at an unsafe +0.
+  llvm::Value *value;
+  if (ignored) {
+    value = EmitARCUnsafeUnretainedScalarExpr(e->getRHS());
+  } else {
+    value = EmitScalarExpr(e->getRHS());
+  }
+
+  // Emit the LHS and perform the store.
+  LValue lvalue = EmitLValue(e->getLHS());
+  EmitStoreOfScalar(value, lvalue);
+
+  return std::pair<LValue,llvm::Value*>(std::move(lvalue), value);
+}
+
 std::pair<LValue,llvm::Value*>
 CodeGenFunction::EmitARCStoreStrong(const BinaryOperator *e,
                                     bool ignored) {
@@ -2935,8 +3202,8 @@ CodeGenFunction::GenerateObjCAtomicSetterCopyHelperFunction(
   ImplicitParamDecl srcDecl(getContext(), FD, SourceLocation(), nullptr, SrcTy);
   args.push_back(&srcDecl);
 
-  const CGFunctionInfo &FI = CGM.getTypes().arrangeFreeFunctionDeclaration(
-      C.VoidTy, args, FunctionType::ExtInfo(), RequiredArgs::All);
+  const CGFunctionInfo &FI =
+    CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, args);
 
   llvm::FunctionType *LTy = CGM.getTypes().GetFunctionType(FI);
   
@@ -3016,8 +3283,8 @@ CodeGenFunction::GenerateObjCAtomicGetterCopyHelperFunction(
   ImplicitParamDecl srcDecl(getContext(), FD, SourceLocation(), nullptr, SrcTy);
   args.push_back(&srcDecl);
 
-  const CGFunctionInfo &FI = CGM.getTypes().arrangeFreeFunctionDeclaration(
-      C.VoidTy, args, FunctionType::ExtInfo(), RequiredArgs::All);
+  const CGFunctionInfo &FI =
+    CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, args);
 
   llvm::FunctionType *LTy = CGM.getTypes().GetFunctionType(FI);
   
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGObjCGNU.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGObjCGNU.cpp
index f0af3e924c09..caafef84c333 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGObjCGNU.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGObjCGNU.cpp
@@ -35,11 +35,9 @@
 #include "llvm/Support/Compiler.h"
 #include <cstdarg>
 
-
 using namespace clang;
 using namespace CodeGen;
 
-
 namespace {
 /// Class that lazily initialises the runtime function.  Avoids inserting the
 /// types and the function declaration into a module if they're not used, and
@@ -161,6 +159,7 @@ protected:
   /// runtime provides some LLVM passes that can use this to do things like
   /// automatic IMP caching and speculative inlining.
   unsigned msgSendMDKind;
+
   /// Helper function that generates a constant string and returns a pointer to
   /// the start of the string.  The result of this function can be used anywhere
   /// where the C code specifies const char*.  
@@ -170,6 +169,7 @@ protected:
     return llvm::ConstantExpr::getGetElementPtr(Array.getElementType(),
                                                 Array.getPointer(), Zeros);
   }
+
   /// Emits a linkonce_odr string, whose name is the prefix followed by the
   /// string value.  This allows the linker to combine the strings between
   /// different modules.  Used for EH typeinfo names, selector strings, and a
@@ -186,6 +186,7 @@ protected:
     return llvm::ConstantExpr::getGetElementPtr(ConstStr->getValueType(),
                                                 ConstStr, Zeros);
   }
+
   /// Generates a global structure, initialized by the elements in the vector.
   /// The element types must match the types of the structure elements in the
   /// first argument.
@@ -201,6 +202,7 @@ protected:
     GV->setAlignment(Align.getQuantity());
     return GV;
   }
+
   /// Generates a global array.  The vector must contain the same number of
   /// elements that the array type declares, of the type specified as the array
   /// element type.
@@ -216,6 +218,7 @@ protected:
     GV->setAlignment(Align.getQuantity());
     return GV;
   }
+
   /// Generates a global array, inferring the array type from the specified
   /// element type and the size of the initialiser.  
   llvm::GlobalVariable *MakeGlobalArray(llvm::Type *Ty,
@@ -227,6 +230,7 @@ protected:
     llvm::ArrayType *ArrayTy = llvm::ArrayType::get(Ty, V.size());
     return MakeGlobal(ArrayTy, V, Align, Name, linkage);
   }
+
   /// Returns a property name and encoding string.
   llvm::Constant *MakePropertyEncodingString(const ObjCPropertyDecl *PD,
                                              const Decl *Container) {
@@ -245,6 +249,7 @@ protected:
     }
     return MakeConstantString(PD->getNameAsString());
   }
+
   /// Push the property attributes into two structure fields. 
   void PushPropertyAttributes(std::vector<llvm::Constant*> &Fields,
       ObjCPropertyDecl *property, bool isSynthesized=true, bool
@@ -273,6 +278,7 @@ protected:
     Fields.push_back(llvm::ConstantInt::get(Int8Ty, 0));
     Fields.push_back(llvm::ConstantInt::get(Int8Ty, 0));
   }
+
   /// Ensures that the value has the required type, by inserting a bitcast if
   /// required.  This function lets us avoid inserting bitcasts that are
   /// redundant.
@@ -284,12 +290,14 @@ protected:
     if (V.getType() == Ty) return V;
     return B.CreateBitCast(V, Ty);
   }
+
   // Some zeros used for GEPs in lots of places.
   llvm::Constant *Zeros[2];
   /// Null pointer value.  Mainly used as a terminator in various arrays.
   llvm::Constant *NULLPtr;
   /// LLVM context.
   llvm::LLVMContext &VMContext;
+
 private:
   /// Placeholder for the class.  Lots of things refer to the class before we've
   /// actually emitted it.  We use this alias as a placeholder, and then replace
@@ -360,7 +368,6 @@ protected:
   LazyRuntimeFunction SyncExitFn;
 
 private:
-
   /// Function called if fast enumeration detects that the collection is
   /// modified during the update.
   LazyRuntimeFunction EnumerationMutationFn;
@@ -385,7 +392,7 @@ private:
   /// Objective-C 1 property structures when targeting the GCC runtime or it
   /// will abort.
   const int ProtocolVersion;
-private:
+
   /// Generates an instance variable list structure.  This is a structure
   /// containing a size and an array of structures containing instance variable
   /// metadata.  This is used purely for introspection in the fragile ABI.  In
@@ -393,6 +400,7 @@ private:
   llvm::Constant *GenerateIvarList(ArrayRef<llvm::Constant *> IvarNames,
                                    ArrayRef<llvm::Constant *> IvarTypes,
                                    ArrayRef<llvm::Constant *> IvarOffsets);
+
   /// Generates a method list structure.  This is a structure containing a size
   /// and an array of structures containing method metadata.
   ///
@@ -403,23 +411,28 @@ private:
       ArrayRef<Selector> MethodSels,
       ArrayRef<llvm::Constant *> MethodTypes,
       bool isClassMethodList);
+
   /// Emits an empty protocol.  This is used for \@protocol() where no protocol
   /// is found.  The runtime will (hopefully) fix up the pointer to refer to the
   /// real protocol.
   llvm::Constant *GenerateEmptyProtocol(const std::string &ProtocolName);
+
   /// Generates a list of property metadata structures.  This follows the same
   /// pattern as method and instance variable metadata lists.
   llvm::Constant *GeneratePropertyList(const ObjCImplementationDecl *OID,
         SmallVectorImpl<Selector> &InstanceMethodSels,
         SmallVectorImpl<llvm::Constant*> &InstanceMethodTypes);
+
   /// Generates a list of referenced protocols.  Classes, categories, and
   /// protocols all use this structure.
   llvm::Constant *GenerateProtocolList(ArrayRef<std::string> Protocols);
+
   /// To ensure that all protocols are seen by the runtime, we add a category on
   /// a class defined in the runtime, declaring no methods, but adopting the
   /// protocols.  This is a horribly ugly hack, but it allows us to collect all
   /// of the protocols without changing the ABI.
   void GenerateProtocolHolderCategory();
+
   /// Generates a class structure.
   llvm::Constant *GenerateClassStructure(
       llvm::Constant *MetaClass,
@@ -436,25 +449,31 @@ private:
       llvm::Constant *StrongIvarBitmap,
       llvm::Constant *WeakIvarBitmap,
       bool isMeta=false);
+
   /// Generates a method list.  This is used by protocols to define the required
   /// and optional methods.
   llvm::Constant *GenerateProtocolMethodList(
       ArrayRef<llvm::Constant *> MethodNames,
       ArrayRef<llvm::Constant *> MethodTypes);
+
   /// Returns a selector with the specified type encoding.  An empty string is
   /// used to return an untyped selector (with the types field set to NULL).
   llvm::Value *GetSelector(CodeGenFunction &CGF, Selector Sel,
                            const std::string &TypeEncoding);
+
   /// Returns the variable used to store the offset of an instance variable.
   llvm::GlobalVariable *ObjCIvarOffsetVariable(const ObjCInterfaceDecl *ID,
       const ObjCIvarDecl *Ivar);
   /// Emits a reference to a class.  This allows the linker to object if there
   /// is no class of the matching name.
+
 protected:
   void EmitClassRef(const std::string &className);
+
   /// Emits a pointer to the named class
   virtual llvm::Value *GetClassNamed(CodeGenFunction &CGF,
                                      const std::string &Name, bool isWeak);
+
   /// Looks up the method for sending a message to the specified object.  This
   /// mechanism differs between the GCC and GNU runtimes, so this method must be
   /// overridden in subclasses.
@@ -463,6 +482,7 @@ protected:
                                  llvm::Value *cmd,
                                  llvm::MDNode *node,
                                  MessageSendInfo &MSI) = 0;
+
   /// Looks up the method for sending a message to a superclass.  This
   /// mechanism differs between the GCC and GNU runtimes, so this method must
   /// be overridden in subclasses.
@@ -470,6 +490,7 @@ protected:
                                       Address ObjCSuper,
                                       llvm::Value *cmd,
                                       MessageSendInfo &MSI) = 0;
+
   /// Libobjc2 uses a bitfield representation where small(ish) bitfields are
   /// stored in a 64-bit value with the low bit set to 1 and the remaining 63
   /// bits set to their values, LSB first, while larger ones are stored in a
@@ -482,6 +503,7 @@ protected:
   /// a bitfield with the 64th bit set will be (int64_t)&{ 2, [0, 1<<31] },
   /// while a bitfield / with the 63rd bit set will be 1<<64.
   llvm::Constant *MakeBitField(ArrayRef<bool> bits);
+
 public:
   CGObjCGNU(CodeGenModule &cgm, unsigned runtimeABIVersion,
       unsigned protocolClassVersion);
@@ -569,11 +591,12 @@ public:
     return NULLPtr;
   }
 
-  llvm::GlobalVariable *GetClassGlobal(const std::string &Name,
+  llvm::GlobalVariable *GetClassGlobal(StringRef Name,
                                        bool Weak = false) override {
     return nullptr;
   }
 };
+
 /// Class representing the legacy GCC Objective-C ABI.  This is the default when
 /// -fobjc-nonfragile-abi is not specified.
 ///
@@ -590,6 +613,7 @@ class CGObjCGCC : public CGObjCGNU {
   /// structure describing the receiver and the class, and a selector as
   /// arguments.  Returns the IMP for the corresponding method.
   LazyRuntimeFunction MsgLookupSuperFn;
+
 protected:
   llvm::Value *LookupIMP(CodeGenFunction &CGF, llvm::Value *&Receiver,
                          llvm::Value *cmd, llvm::MDNode *node,
@@ -602,23 +626,26 @@ protected:
     imp->setMetadata(msgSendMDKind, node);
     return imp.getInstruction();
   }
+
   llvm::Value *LookupIMPSuper(CodeGenFunction &CGF, Address ObjCSuper,
                               llvm::Value *cmd, MessageSendInfo &MSI) override {
-      CGBuilderTy &Builder = CGF.Builder;
-      llvm::Value *lookupArgs[] = {EnforceType(Builder, ObjCSuper,
-          PtrToObjCSuperTy).getPointer(), cmd};
-      return CGF.EmitNounwindRuntimeCall(MsgLookupSuperFn, lookupArgs);
-    }
-  public:
-    CGObjCGCC(CodeGenModule &Mod) : CGObjCGNU(Mod, 8, 2) {
-      // IMP objc_msg_lookup(id, SEL);
-      MsgLookupFn.init(&CGM, "objc_msg_lookup", IMPTy, IdTy, SelectorTy,
-                       nullptr);
-      // IMP objc_msg_lookup_super(struct objc_super*, SEL);
-      MsgLookupSuperFn.init(&CGM, "objc_msg_lookup_super", IMPTy,
-              PtrToObjCSuperTy, SelectorTy, nullptr);
-    }
+    CGBuilderTy &Builder = CGF.Builder;
+    llvm::Value *lookupArgs[] = {EnforceType(Builder, ObjCSuper,
+        PtrToObjCSuperTy).getPointer(), cmd};
+    return CGF.EmitNounwindRuntimeCall(MsgLookupSuperFn, lookupArgs);
+  }
+
+public:
+  CGObjCGCC(CodeGenModule &Mod) : CGObjCGNU(Mod, 8, 2) {
+    // IMP objc_msg_lookup(id, SEL);
+    MsgLookupFn.init(&CGM, "objc_msg_lookup", IMPTy, IdTy, SelectorTy,
+                     nullptr);
+    // IMP objc_msg_lookup_super(struct objc_super*, SEL);
+    MsgLookupSuperFn.init(&CGM, "objc_msg_lookup_super", IMPTy,
+                          PtrToObjCSuperTy, SelectorTy, nullptr);
+  }
 };
+
 /// Class used when targeting the new GNUstep runtime ABI.
 class CGObjCGNUstep : public CGObjCGNU {
     /// The slot lookup function.  Returns a pointer to a cacheable structure
@@ -646,8 +673,10 @@ class CGObjCGNUstep : public CGObjCGNU {
     /// Type of an slot structure pointer.  This is returned by the various
     /// lookup functions.
     llvm::Type *SlotTy;
+
   public:
     llvm::Constant *GetEHType(QualType T) override;
+
   protected:
     llvm::Value *LookupIMP(CodeGenFunction &CGF, llvm::Value *&Receiver,
                            llvm::Value *cmd, llvm::MDNode *node,
@@ -689,6 +718,7 @@ class CGObjCGNUstep : public CGObjCGNU {
       Receiver = Builder.CreateLoad(ReceiverPtr, true);
       return imp;
     }
+
     llvm::Value *LookupIMPSuper(CodeGenFunction &CGF, Address ObjCSuper,
                                 llvm::Value *cmd,
                                 MessageSendInfo &MSI) override {
@@ -702,6 +732,7 @@ class CGObjCGNUstep : public CGObjCGNU {
       return Builder.CreateAlignedLoad(Builder.CreateStructGEP(nullptr, slot, 4),
                                        CGF.getPointerAlign());
     }
+
   public:
     CGObjCGNUstep(CodeGenModule &Mod) : CGObjCGNU(Mod, 9, 3) {
       const ObjCRuntime &R = CGM.getLangOpts().ObjCRuntime;
@@ -753,6 +784,7 @@ class CGObjCGNUstep : public CGObjCGNU {
       CxxAtomicObjectGetFn.init(&CGM, "objc_getCppObjectAtomic", VoidTy, PtrTy,
           PtrTy, PtrTy, nullptr);
     }
+
     llvm::Constant *GetCppAtomicObjectGetFunction() override {
       // The optimised functions were added in version 1.7 of the GNUstep
       // runtime.
@@ -760,6 +792,7 @@ class CGObjCGNUstep : public CGObjCGNU {
           VersionTuple(1, 7));
       return CxxAtomicObjectGetFn;
     }
+
     llvm::Constant *GetCppAtomicObjectSetFunction() override {
       // The optimised functions were added in version 1.7 of the GNUstep
       // runtime.
@@ -767,6 +800,7 @@ class CGObjCGNUstep : public CGObjCGNU {
           VersionTuple(1, 7));
       return CxxAtomicObjectSetFn;
     }
+
     llvm::Constant *GetOptimizedPropertySetFunction(bool atomic,
                                                     bool copy) override {
       // The optimised property functions omit the GC check, and so are not
@@ -821,32 +855,29 @@ protected:
 
   llvm::Value *LookupIMPSuper(CodeGenFunction &CGF, Address ObjCSuper,
                               llvm::Value *cmd, MessageSendInfo &MSI) override {
-      CGBuilderTy &Builder = CGF.Builder;
-      llvm::Value *lookupArgs[] = {EnforceType(Builder, ObjCSuper.getPointer(),
-          PtrToObjCSuperTy), cmd};
+    CGBuilderTy &Builder = CGF.Builder;
+    llvm::Value *lookupArgs[] = {
+        EnforceType(Builder, ObjCSuper.getPointer(), PtrToObjCSuperTy), cmd,
+    };
 
-      if (CGM.ReturnTypeUsesSRet(MSI.CallInfo))
-        return CGF.EmitNounwindRuntimeCall(MsgLookupSuperFnSRet, lookupArgs);
-      else
-        return CGF.EmitNounwindRuntimeCall(MsgLookupSuperFn, lookupArgs);
-    }
+    if (CGM.ReturnTypeUsesSRet(MSI.CallInfo))
+      return CGF.EmitNounwindRuntimeCall(MsgLookupSuperFnSRet, lookupArgs);
+    else
+      return CGF.EmitNounwindRuntimeCall(MsgLookupSuperFn, lookupArgs);
+  }
 
-  llvm::Value *GetClassNamed(CodeGenFunction &CGF,
-                             const std::string &Name, bool isWeak) override {
+  llvm::Value *GetClassNamed(CodeGenFunction &CGF, const std::string &Name,
+                             bool isWeak) override {
     if (isWeak)
       return CGObjCGNU::GetClassNamed(CGF, Name, isWeak);
 
     EmitClassRef(Name);
-
     std::string SymbolName = "_OBJC_CLASS_" + Name;
-
     llvm::GlobalVariable *ClassSymbol = TheModule.getGlobalVariable(SymbolName);
-
     if (!ClassSymbol)
       ClassSymbol = new llvm::GlobalVariable(TheModule, LongTy, false,
                                              llvm::GlobalValue::ExternalLinkage,
                                              nullptr, SymbolName);
-
     return ClassSymbol;
   }
 
@@ -865,7 +896,6 @@ public:
 };
 } // end anonymous namespace
 
-
 /// Emits a reference to a dummy variable which is emitted with each class.
 /// This ensures that a linker error will be generated when trying to link
 /// together modules where a referenced class is not defined.
@@ -1021,8 +1051,7 @@ CGObjCGNU::CGObjCGNU(CodeGenModule &cgm, unsigned runtimeABIVersion,
 }
 
 llvm::Value *CGObjCGNU::GetClassNamed(CodeGenFunction &CGF,
-                                      const std::string &Name,
-                                      bool isWeak) {
+                                      const std::string &Name, bool isWeak) {
   llvm::Constant *ClassName = MakeConstantString(Name);
   // With the incompatible ABI, this will need to be replaced with a direct
   // reference to the class symbol.  For the compatible nonfragile ABI we are
@@ -1044,15 +1073,48 @@ llvm::Value *CGObjCGNU::GetClassNamed(CodeGenFunction &CGF,
 // techniques can modify the name -> class mapping.
 llvm::Value *CGObjCGNU::GetClass(CodeGenFunction &CGF,
                                  const ObjCInterfaceDecl *OID) {
-  return GetClassNamed(CGF, OID->getNameAsString(), OID->isWeakImported());
+  auto *Value =
+      GetClassNamed(CGF, OID->getNameAsString(), OID->isWeakImported());
+  if (CGM.getTriple().isOSBinFormatCOFF()) {
+    if (auto *ClassSymbol = dyn_cast<llvm::GlobalVariable>(Value)) {
+      auto DLLStorage = llvm::GlobalValue::DefaultStorageClass;
+      if (OID->hasAttr<DLLExportAttr>())
+        DLLStorage = llvm::GlobalValue::DLLExportStorageClass;
+      else if (OID->hasAttr<DLLImportAttr>())
+        DLLStorage = llvm::GlobalValue::DLLImportStorageClass;
+      ClassSymbol->setDLLStorageClass(DLLStorage);
+    }
+  }
+  return Value;
 }
+
 llvm::Value *CGObjCGNU::EmitNSAutoreleasePoolClassRef(CodeGenFunction &CGF) {
-  return GetClassNamed(CGF, "NSAutoreleasePool", false);
+  auto *Value  = GetClassNamed(CGF, "NSAutoreleasePool", false);
+  if (CGM.getTriple().isOSBinFormatCOFF()) {
+    if (auto *ClassSymbol = dyn_cast<llvm::GlobalVariable>(Value)) {
+      IdentifierInfo &II = CGF.CGM.getContext().Idents.get("NSAutoreleasePool");
+      TranslationUnitDecl *TUDecl = CGM.getContext().getTranslationUnitDecl();
+      DeclContext *DC = TranslationUnitDecl::castToDeclContext(TUDecl);
+
+      const VarDecl *VD = nullptr;
+      for (const auto &Result : DC->lookup(&II))
+        if ((VD = dyn_cast<VarDecl>(Result)))
+          break;
+
+      auto DLLStorage = llvm::GlobalValue::DefaultStorageClass;
+      if (!VD || VD->hasAttr<DLLImportAttr>())
+        DLLStorage = llvm::GlobalValue::DLLImportStorageClass;
+      else if (VD->hasAttr<DLLExportAttr>())
+        DLLStorage = llvm::GlobalValue::DLLExportStorageClass;
+
+      ClassSymbol->setDLLStorageClass(DLLStorage);
+    }
+  }
+  return Value;
 }
 
 llvm::Value *CGObjCGNU::GetSelector(CodeGenFunction &CGF, Selector Sel,
                                     const std::string &TypeEncoding) {
-
   SmallVectorImpl<TypedSelector> &Types = SelectorTable[Sel];
   llvm::GlobalAlias *SelValue = nullptr;
 
@@ -1247,8 +1309,6 @@ CGObjCGNU::GenerateMessageSendSuper(CodeGenFunction &CGF,
   }
 
   llvm::Value *cmd = GetSelector(CGF, Sel);
-
-
   CallArgList ActualArgs;
 
   ActualArgs.add(RValue::get(EnforceType(Builder, Receiver, IdTy)), ASTIdTy);
@@ -1497,21 +1557,17 @@ GenerateMethodList(StringRef ClassName,
     IMPTy, //Method pointer
     nullptr);
   std::vector<llvm::Constant*> Methods;
-  std::vector<llvm::Constant*> Elements;
   for (unsigned int i = 0, e = MethodTypes.size(); i < e; ++i) {
-    Elements.clear();
     llvm::Constant *Method =
       TheModule.getFunction(SymbolNameForMethod(ClassName, CategoryName,
                                                 MethodSels[i],
                                                 isClassMethodList));
     assert(Method && "Can't generate metadata for method that doesn't exist");
     llvm::Constant *C = MakeConstantString(MethodSels[i].getAsString());
-    Elements.push_back(C);
-    Elements.push_back(MethodTypes[i]);
     Method = llvm::ConstantExpr::getBitCast(Method,
         IMPTy);
-    Elements.push_back(Method);
-    Methods.push_back(llvm::ConstantStruct::get(ObjCMethodTy, Elements));
+    Methods.push_back(
+        llvm::ConstantStruct::get(ObjCMethodTy, {C, MethodTypes[i], Method}));
   }
 
   // Array of method structures
@@ -1554,23 +1610,18 @@ GenerateIvarList(ArrayRef<llvm::Constant *> IvarNames,
     IntTy,
     nullptr);
   std::vector<llvm::Constant*> Ivars;
-  std::vector<llvm::Constant*> Elements;
   for (unsigned int i = 0, e = IvarNames.size() ; i < e ; i++) {
-    Elements.clear();
-    Elements.push_back(IvarNames[i]);
-    Elements.push_back(IvarTypes[i]);
-    Elements.push_back(IvarOffsets[i]);
-    Ivars.push_back(llvm::ConstantStruct::get(ObjCIvarTy, Elements));
+    Ivars.push_back(llvm::ConstantStruct::get(
+        ObjCIvarTy, {IvarNames[i], IvarTypes[i], IvarOffsets[i]}));
   }
 
   // Array of method structures
   llvm::ArrayType *ObjCIvarArrayTy = llvm::ArrayType::get(ObjCIvarTy,
       IvarNames.size());
 
-
-  Elements.clear();
-  Elements.push_back(llvm::ConstantInt::get(IntTy, (int)IvarNames.size()));
-  Elements.push_back(llvm::ConstantArray::get(ObjCIvarArrayTy, Ivars));
+  llvm::Constant *Elements[] = {
+      llvm::ConstantInt::get(IntTy, (int)IvarNames.size()),
+      llvm::ConstantArray::get(ObjCIvarArrayTy, Ivars)};
   // Structure containing array and array count
   llvm::StructType *ObjCIvarListTy = llvm::StructType::get(IntTy,
     ObjCIvarArrayTy,
@@ -1682,12 +1733,9 @@ GenerateProtocolMethodList(ArrayRef<llvm::Constant *> MethodNames,
     PtrToInt8Ty,
     nullptr);
   std::vector<llvm::Constant*> Methods;
-  std::vector<llvm::Constant*> Elements;
   for (unsigned int i = 0, e = MethodTypes.size() ; i < e ; i++) {
-    Elements.clear();
-    Elements.push_back(MethodNames[i]);
-    Elements.push_back(MethodTypes[i]);
-    Methods.push_back(llvm::ConstantStruct::get(ObjCMethodDescTy, Elements));
+    Methods.push_back(llvm::ConstantStruct::get(
+        ObjCMethodDescTy, {MethodNames[i], MethodTypes[i]}));
   }
   llvm::ArrayType *ObjCMethodArrayTy = llvm::ArrayType::get(ObjCMethodDescTy,
       MethodNames.size());
@@ -1762,17 +1810,13 @@ llvm::Constant *CGObjCGNU::GenerateEmptyProtocol(
       MethodList->getType(),
       MethodList->getType(),
       nullptr);
-  std::vector<llvm::Constant*> Elements;
   // The isa pointer must be set to a magic number so the runtime knows it's
   // the correct layout.
-  Elements.push_back(llvm::ConstantExpr::getIntToPtr(
-        llvm::ConstantInt::get(Int32Ty, ProtocolVersion), IdTy));
-  Elements.push_back(MakeConstantString(ProtocolName, ".objc_protocol_name"));
-  Elements.push_back(ProtocolList);
-  Elements.push_back(MethodList);
-  Elements.push_back(MethodList);
-  Elements.push_back(MethodList);
-  Elements.push_back(MethodList);
+  llvm::Constant *Elements[] = {
+      llvm::ConstantExpr::getIntToPtr(
+          llvm::ConstantInt::get(Int32Ty, ProtocolVersion), IdTy),
+      MakeConstantString(ProtocolName, ".objc_protocol_name"), ProtocolList,
+      MethodList, MethodList, MethodList, MethodList};
   return MakeGlobal(ProtocolTy, Elements, CGM.getPointerAlign(),
                     ".objc_protocol");
 }
@@ -1849,7 +1893,7 @@ void CGObjCGNU::GenerateProtocol(const ObjCProtocolDecl *PD) {
 
   // Add all of the property methods need adding to the method list and to the
   // property metadata list.
-  for (auto *property : PD->properties()) {
+  for (auto *property : PD->instance_properties()) {
     std::vector<llvm::Constant*> Fields;
 
     Fields.push_back(MakePropertyEncodingString(property, nullptr));
@@ -1920,19 +1964,14 @@ void CGObjCGNU::GenerateProtocol(const ObjCProtocolDecl *PD) {
       PropertyList->getType(),
       OptionalPropertyList->getType(),
       nullptr);
-  std::vector<llvm::Constant*> Elements;
   // The isa pointer must be set to a magic number so the runtime knows it's
   // the correct layout.
-  Elements.push_back(llvm::ConstantExpr::getIntToPtr(
-        llvm::ConstantInt::get(Int32Ty, ProtocolVersion), IdTy));
-  Elements.push_back(MakeConstantString(ProtocolName, ".objc_protocol_name"));
-  Elements.push_back(ProtocolList);
-  Elements.push_back(InstanceMethodList);
-  Elements.push_back(ClassMethodList);
-  Elements.push_back(OptionalInstanceMethodList);
-  Elements.push_back(OptionalClassMethodList);
-  Elements.push_back(PropertyList);
-  Elements.push_back(OptionalPropertyList);
+  llvm::Constant *Elements[] = {
+      llvm::ConstantExpr::getIntToPtr(
+          llvm::ConstantInt::get(Int32Ty, ProtocolVersion), IdTy),
+      MakeConstantString(ProtocolName, ".objc_protocol_name"), ProtocolList,
+      InstanceMethodList, ClassMethodList, OptionalInstanceMethodList,
+      OptionalClassMethodList, PropertyList, OptionalPropertyList};
   ExistingProtocols[ProtocolName] =
     llvm::ConstantExpr::getBitCast(MakeGlobal(ProtocolTy, Elements,
           CGM.getPointerAlign(), ".objc_protocol"), IdTy);
@@ -2058,20 +2097,20 @@ void CGObjCGNU::GenerateCategory(const ObjCCategoryImplDecl *OCD) {
        E = Protos.end(); I != E; ++I)
     Protocols.push_back((*I)->getNameAsString());
 
-  std::vector<llvm::Constant*> Elements;
-  Elements.push_back(MakeConstantString(CategoryName));
-  Elements.push_back(MakeConstantString(ClassName));
-  // Instance method list
-  Elements.push_back(llvm::ConstantExpr::getBitCast(GenerateMethodList(
-          ClassName, CategoryName, InstanceMethodSels, InstanceMethodTypes,
-          false), PtrTy));
-  // Class method list
-  Elements.push_back(llvm::ConstantExpr::getBitCast(GenerateMethodList(
-          ClassName, CategoryName, ClassMethodSels, ClassMethodTypes, true),
-        PtrTy));
-  // Protocol list
-  Elements.push_back(llvm::ConstantExpr::getBitCast(
-        GenerateProtocolList(Protocols), PtrTy));
+  llvm::Constant *Elements[] = {
+      MakeConstantString(CategoryName), MakeConstantString(ClassName),
+      // Instance method list
+      llvm::ConstantExpr::getBitCast(
+          GenerateMethodList(ClassName, CategoryName, InstanceMethodSels,
+                             InstanceMethodTypes, false),
+          PtrTy),
+      // Class method list
+      llvm::ConstantExpr::getBitCast(GenerateMethodList(ClassName, CategoryName,
+                                                        ClassMethodSels,
+                                                        ClassMethodTypes, true),
+                                     PtrTy),
+      // Protocol list
+      llvm::ConstantExpr::getBitCast(GenerateProtocolList(Protocols), PtrTy)};
   Categories.push_back(llvm::ConstantExpr::getBitCast(
         MakeGlobal(llvm::StructType::get(PtrToInt8Ty, PtrToInt8Ty,
             PtrTy, PtrTy, PtrTy, nullptr), Elements, CGM.getPointerAlign()),
@@ -2167,18 +2206,19 @@ void CGObjCGNU::GenerateClass(const ObjCImplementationDecl *OID) {
 
   // Get the class name
   ObjCInterfaceDecl *ClassDecl =
-    const_cast<ObjCInterfaceDecl *>(OID->getClassInterface());
+      const_cast<ObjCInterfaceDecl *>(OID->getClassInterface());
   std::string ClassName = ClassDecl->getNameAsString();
+
   // Emit the symbol that is used to generate linker errors if this class is
   // referenced in other modules but not declared.
   std::string classSymbolName = "__objc_class_name_" + ClassName;
-  if (llvm::GlobalVariable *symbol =
-      TheModule.getGlobalVariable(classSymbolName)) {
+  if (auto *symbol = TheModule.getGlobalVariable(classSymbolName)) {
     symbol->setInitializer(llvm::ConstantInt::get(LongTy, 0));
   } else {
     new llvm::GlobalVariable(TheModule, LongTy, false,
-    llvm::GlobalValue::ExternalLinkage, llvm::ConstantInt::get(LongTy, 0),
-    classSymbolName);
+                             llvm::GlobalValue::ExternalLinkage,
+                             llvm::ConstantInt::get(LongTy, 0),
+                             classSymbolName);
   }
 
   // Get the size of instances.
@@ -2256,7 +2296,6 @@ void CGObjCGNU::GenerateClass(const ObjCImplementationDecl *OID) {
     MakeGlobalArray(PtrToIntTy, IvarOffsetValues, CGM.getPointerAlign(),
                     ".ivar.offsets");
 
-
   // Collect information about instance methods
   SmallVector<Selector, 16> InstanceMethodSels;
   SmallVector<llvm::Constant*, 16> InstanceMethodTypes;
@@ -2270,7 +2309,6 @@ void CGObjCGNU::GenerateClass(const ObjCImplementationDecl *OID) {
   llvm::Constant *Properties = GeneratePropertyList(OID, InstanceMethodSels,
           InstanceMethodTypes);
 
-
   // Collect information about class methods
   SmallVector<Selector, 16> ClassMethodSels;
   SmallVector<llvm::Constant*, 16> ClassMethodTypes;
@@ -2343,19 +2381,35 @@ void CGObjCGNU::GenerateClass(const ObjCImplementationDecl *OID) {
       ++ivarIndex;
   }
   llvm::Constant *ZeroPtr = llvm::ConstantInt::get(IntPtrTy, 0);
+
   //Generate metaclass for class methods
-  llvm::Constant *MetaClassStruct = GenerateClassStructure(NULLPtr,
-      NULLPtr, 0x12L, ClassName.c_str(), nullptr, Zeros[0], GenerateIvarList(
-        empty, empty, empty), ClassMethodList, NULLPtr,
-      NULLPtr, NULLPtr, ZeroPtr, ZeroPtr, true);
+  llvm::Constant *MetaClassStruct = GenerateClassStructure(
+      NULLPtr, NULLPtr, 0x12L, ClassName.c_str(), nullptr, Zeros[0],
+      GenerateIvarList(empty, empty, empty), ClassMethodList, NULLPtr, NULLPtr,
+      NULLPtr, ZeroPtr, ZeroPtr, true);
+  if (CGM.getTriple().isOSBinFormatCOFF()) {
+    auto Storage = llvm::GlobalValue::DefaultStorageClass;
+    if (OID->getClassInterface()->hasAttr<DLLImportAttr>())
+      Storage = llvm::GlobalValue::DLLImportStorageClass;
+    else if (OID->getClassInterface()->hasAttr<DLLExportAttr>())
+      Storage = llvm::GlobalValue::DLLExportStorageClass;
+    cast<llvm::GlobalValue>(MetaClassStruct)->setDLLStorageClass(Storage);
+  }
 
   // Generate the class structure
-  llvm::Constant *ClassStruct =
-    GenerateClassStructure(MetaClassStruct, SuperClass, 0x11L,
-                           ClassName.c_str(), nullptr,
-      llvm::ConstantInt::get(LongTy, instanceSize), IvarList,
-      MethodList, GenerateProtocolList(Protocols), IvarOffsetArray,
-      Properties, StrongIvarBitmap, WeakIvarBitmap);
+  llvm::Constant *ClassStruct = GenerateClassStructure(
+      MetaClassStruct, SuperClass, 0x11L, ClassName.c_str(), nullptr,
+      llvm::ConstantInt::get(LongTy, instanceSize), IvarList, MethodList,
+      GenerateProtocolList(Protocols), IvarOffsetArray, Properties,
+      StrongIvarBitmap, WeakIvarBitmap);
+  if (CGM.getTriple().isOSBinFormatCOFF()) {
+    auto Storage = llvm::GlobalValue::DefaultStorageClass;
+    if (OID->getClassInterface()->hasAttr<DLLImportAttr>())
+      Storage = llvm::GlobalValue::DLLImportStorageClass;
+    else if (OID->getClassInterface()->hasAttr<DLLExportAttr>())
+      Storage = llvm::GlobalValue::DLLExportStorageClass;
+    cast<llvm::GlobalValue>(ClassStruct)->setDLLStorageClass(Storage);
+  }
 
   // Resolve the class aliases, if they exist.
   if (ClassPtrAlias) {
@@ -2376,7 +2430,6 @@ void CGObjCGNU::GenerateClass(const ObjCImplementationDecl *OID) {
   Classes.push_back(ClassStruct);
 }
 
-
 llvm::Function *CGObjCGNU::ModuleInitFunction() {
   // Only emit an ObjC load function if no Objective-C stuff has been called
   if (Classes.empty() && Categories.empty() && ConstantStrings.empty() &&
@@ -2651,12 +2704,15 @@ llvm::Constant *CGObjCGNU::GetOptimizedPropertySetFunction(bool atomic,
 llvm::Constant *CGObjCGNU::GetGetStructFunction() {
   return GetStructPropertyFn;
 }
+
 llvm::Constant *CGObjCGNU::GetSetStructFunction() {
   return SetStructPropertyFn;
 }
+
 llvm::Constant *CGObjCGNU::GetCppAtomicObjectGetFunction() {
   return nullptr;
 }
+
 llvm::Constant *CGObjCGNU::GetCppAtomicObjectSetFunction() {
   return nullptr;
 }
@@ -2685,7 +2741,6 @@ void CGObjCGNU::EmitTryStmt(CodeGenFunction &CGF,
   // In Objective-C++ mode, we actually emit something equivalent to the C++
   // exception handler. 
   EmitTryCatchStmt(CGF, S, EnterCatchFn, ExitCatchFn, ExceptionReThrowFn);
-  return ;
 }
 
 void CGObjCGNU::EmitThrowStmt(CodeGenFunction &CGF,
@@ -2800,7 +2855,7 @@ llvm::GlobalVariable *CGObjCGNU::ObjCIvarOffsetVariable(
     // to replace it with the real version for a library.  In non-PIC code you
     // must compile with the fragile ABI if you want to use ivars from a
     // GCC-compiled class.
-    if (CGM.getLangOpts().PICLevel || CGM.getLangOpts().PIELevel) {
+    if (CGM.getLangOpts().PICLevel) {
       llvm::GlobalVariable *IvarOffsetGV = new llvm::GlobalVariable(TheModule,
             Int32Ty, false,
             llvm::GlobalValue::PrivateLinkage, OffsetGuess, Name+".guess");
@@ -2848,7 +2903,12 @@ llvm::Value *CGObjCGNU::EmitIvarOffset(CodeGenFunction &CGF,
                          const ObjCIvarDecl *Ivar) {
   if (CGM.getLangOpts().ObjCRuntime.isNonFragile()) {
     Interface = FindIvarInterface(CGM.getContext(), Interface, Ivar);
-    if (RuntimeVersion < 10)
+
+    // The MSVC linker cannot have a single global defined as LinkOnceAnyLinkage
+    // and ExternalLinkage, so create a reference to the ivar global and rely on
+    // the definition being created as part of GenerateClass.
+    if (RuntimeVersion < 10 ||
+        CGF.CGM.getTarget().getTriple().isKnownWindowsMSVCEnvironment())
       return CGF.Builder.CreateZExtOrBitCast(
           CGF.Builder.CreateDefaultAlignedLoad(CGF.Builder.CreateAlignedLoad(
                   ObjCIvarOffsetVariable(Interface, Ivar),
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGObjCMac.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGObjCMac.cpp
index e30b2875f209..5ab9fc4f9710 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGObjCMac.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGObjCMac.cpp
@@ -236,17 +236,14 @@ public:
     CodeGen::CodeGenTypes &Types = CGM.getTypes();
     ASTContext &Ctx = CGM.getContext();
     // id objc_getProperty (id, SEL, ptrdiff_t, bool)
-    SmallVector<CanQualType,4> Params;
     CanQualType IdType = Ctx.getCanonicalParamType(Ctx.getObjCIdType());
     CanQualType SelType = Ctx.getCanonicalParamType(Ctx.getObjCSelType());
-    Params.push_back(IdType);
-    Params.push_back(SelType);
-    Params.push_back(Ctx.getPointerDiffType()->getCanonicalTypeUnqualified());
-    Params.push_back(Ctx.BoolTy);
+    CanQualType Params[] = {
+        IdType, SelType,
+        Ctx.getPointerDiffType()->getCanonicalTypeUnqualified(), Ctx.BoolTy};
     llvm::FunctionType *FTy =
-        Types.GetFunctionType(Types.arrangeLLVMFunctionInfo(
-            IdType, false, false, Params, FunctionType::ExtInfo(),
-            RequiredArgs::All));
+        Types.GetFunctionType(
+          Types.arrangeBuiltinFunctionDeclaration(IdType, Params));
     return CGM.CreateRuntimeFunction(FTy, "objc_getProperty");
   }
 
@@ -254,19 +251,18 @@ public:
     CodeGen::CodeGenTypes &Types = CGM.getTypes();
     ASTContext &Ctx = CGM.getContext();
     // void objc_setProperty (id, SEL, ptrdiff_t, id, bool, bool)
-    SmallVector<CanQualType,6> Params;
     CanQualType IdType = Ctx.getCanonicalParamType(Ctx.getObjCIdType());
     CanQualType SelType = Ctx.getCanonicalParamType(Ctx.getObjCSelType());
-    Params.push_back(IdType);
-    Params.push_back(SelType);
-    Params.push_back(Ctx.getPointerDiffType()->getCanonicalTypeUnqualified());
-    Params.push_back(IdType);
-    Params.push_back(Ctx.BoolTy);
-    Params.push_back(Ctx.BoolTy);
+    CanQualType Params[] = {
+        IdType,
+        SelType,
+        Ctx.getPointerDiffType()->getCanonicalTypeUnqualified(),
+        IdType,
+        Ctx.BoolTy,
+        Ctx.BoolTy};
     llvm::FunctionType *FTy =
-        Types.GetFunctionType(Types.arrangeLLVMFunctionInfo(
-            Ctx.VoidTy, false, false, Params, FunctionType::ExtInfo(),
-            RequiredArgs::All));
+        Types.GetFunctionType(
+          Types.arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, Params));
     return CGM.CreateRuntimeFunction(FTy, "objc_setProperty");
   }
 
@@ -290,9 +286,8 @@ public:
     Params.push_back(IdType);
     Params.push_back(Ctx.getPointerDiffType()->getCanonicalTypeUnqualified());
     llvm::FunctionType *FTy =
-        Types.GetFunctionType(Types.arrangeLLVMFunctionInfo(
-            Ctx.VoidTy, false, false, Params, FunctionType::ExtInfo(),
-            RequiredArgs::All));
+        Types.GetFunctionType(
+          Types.arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, Params));
     const char *name;
     if (atomic && copy)
       name = "objc_setProperty_atomic_copy";
@@ -317,9 +312,8 @@ public:
     Params.push_back(Ctx.BoolTy);
     Params.push_back(Ctx.BoolTy);
     llvm::FunctionType *FTy =
-        Types.GetFunctionType(Types.arrangeLLVMFunctionInfo(
-            Ctx.VoidTy, false, false, Params, FunctionType::ExtInfo(),
-            RequiredArgs::All));
+        Types.GetFunctionType(
+          Types.arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, Params));
     return CGM.CreateRuntimeFunction(FTy, "objc_copyStruct");
   }
   
@@ -336,10 +330,8 @@ public:
     Params.push_back(Ctx.VoidPtrTy);
     Params.push_back(Ctx.VoidPtrTy);
     llvm::FunctionType *FTy =
-      Types.GetFunctionType(Types.arrangeLLVMFunctionInfo(Ctx.VoidTy, false, false,
-                                                          Params,
-                                                          FunctionType::ExtInfo(),
-                                                          RequiredArgs::All));
+        Types.GetFunctionType(
+          Types.arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, Params));
     return CGM.CreateRuntimeFunction(FTy, "objc_copyCppObjectAtomic");
   }
   
@@ -350,12 +342,25 @@ public:
     SmallVector<CanQualType,1> Params;
     Params.push_back(Ctx.getCanonicalParamType(Ctx.getObjCIdType()));
     llvm::FunctionType *FTy =
-        Types.GetFunctionType(Types.arrangeLLVMFunctionInfo(
-            Ctx.VoidTy, false, false, Params, FunctionType::ExtInfo(),
-            RequiredArgs::All));
+        Types.GetFunctionType(
+          Types.arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, Params));
     return CGM.CreateRuntimeFunction(FTy, "objc_enumerationMutation");
   }
 
+  llvm::Constant *getLookUpClassFn() {
+    CodeGen::CodeGenTypes &Types = CGM.getTypes();
+    ASTContext &Ctx = CGM.getContext();
+    // Class objc_lookUpClass (const char *)
+    SmallVector<CanQualType,1> Params;
+    Params.push_back(
+      Ctx.getCanonicalType(Ctx.getPointerType(Ctx.CharTy.withConst())));
+    llvm::FunctionType *FTy =
+        Types.GetFunctionType(Types.arrangeBuiltinFunctionDeclaration(
+                                Ctx.getCanonicalType(Ctx.getObjCClassType()),
+                                Params));
+    return CGM.CreateRuntimeFunction(FTy, "objc_lookUpClass");
+  }
+
   /// GcReadWeakFn -- LLVM objc_read_weak (id *src) function.
   llvm::Constant *getGcReadWeakFn() {
     // id objc_read_weak (id *)
@@ -576,7 +581,6 @@ public:
     return CGM.CreateRuntimeFunction(
       llvm::FunctionType::get(CGM.Int32Ty, params, false),
       "objc_exception_match");
-
   }
 
   /// SetJmpFn - LLVM _setjmp function.
@@ -600,7 +604,6 @@ public:
 /// modern abi
 class ObjCNonFragileABITypesHelper : public ObjCCommonTypesHelper {
 public:
-
   // MethodListnfABITy - LLVM for struct _method_list_t
   llvm::StructType *MethodListnfABITy;
 
@@ -967,7 +970,8 @@ protected:
   llvm::Constant *EmitPropertyList(Twine Name,
                                    const Decl *Container,
                                    const ObjCContainerDecl *OCD,
-                                   const ObjCCommonTypesHelper &ObjCTypes);
+                                   const ObjCCommonTypesHelper &ObjCTypes,
+                                   bool IsClassProperty);
 
   /// EmitProtocolMethodTypes - Generate the array of extended method type 
   /// strings. The return value has type Int8PtrPtrTy.
@@ -981,13 +985,20 @@ protected:
     SmallVectorImpl<llvm::Constant*> &Properties,
     const Decl *Container,
     const ObjCProtocolDecl *Proto,
-    const ObjCCommonTypesHelper &ObjCTypes);
+    const ObjCCommonTypesHelper &ObjCTypes,
+    bool IsClassProperty);
 
   /// GetProtocolRef - Return a reference to the internal protocol
   /// description, creating an empty one if it has not been
   /// defined. The return value has type ProtocolPtrTy.
   llvm::Constant *GetProtocolRef(const ObjCProtocolDecl *PD);
 
+  /// Return a reference to the given Class using runtime calls rather than
+  /// by a symbol reference.
+  llvm::Value *EmitClassRefViaRuntime(CodeGenFunction &CGF,
+                                      const ObjCInterfaceDecl *ID,
+                                      ObjCCommonTypesHelper &ObjCTypes);
+
 public:
   /// CreateMetadataVar - Create a global variable with internal
   /// linkage for use by the Objective-C runtime.
@@ -1079,7 +1090,8 @@ private:
   /// has type ClassExtensionPtrTy.
   llvm::Constant *EmitClassExtension(const ObjCImplementationDecl *ID,
                                      CharUnits instanceSize,
-                                     bool hasMRCWeakIvars);
+                                     bool hasMRCWeakIvars,
+                                     bool isClassProperty);
 
   /// EmitClassRef - Return a Value*, of type ObjCTypes.ClassPtrTy,
   /// for the given class.
@@ -1119,9 +1131,8 @@ private:
 
   /// EmitMethodList - Emit the method list for the given
   /// implementation. The return value has type MethodListPtrTy.
-  llvm::Constant *EmitMethodList(Twine Name,
-                                 const char *Section,
-                                 ArrayRef<llvm::Constant*> Methods);
+  llvm::Constant *EmitMethodList(Twine Name, StringRef Section,
+                                 ArrayRef<llvm::Constant *> Methods);
 
   /// EmitMethodDescList - Emit a method description list for a list of
   /// method declarations.
@@ -1134,9 +1145,8 @@ private:
   ///  - begin, end: The method list to output.
   ///
   /// The return value has type MethodDescriptionListPtrTy.
-  llvm::Constant *EmitMethodDescList(Twine Name,
-                                     const char *Section,
-                                     ArrayRef<llvm::Constant*> Methods);
+  llvm::Constant *EmitMethodDescList(Twine Name, StringRef Section,
+                                     ArrayRef<llvm::Constant *> Methods);
 
   /// GetOrEmitProtocol - Get the protocol object for the given
   /// declaration, emitting it if necessary. The return value has type
@@ -1255,7 +1265,7 @@ public:
 
   /// GetClassGlobal - Return the global variable for the Objective-C
   /// class of the given name.
-  llvm::GlobalVariable *GetClassGlobal(const std::string &Name,
+  llvm::GlobalVariable *GetClassGlobal(StringRef Name,
                                        bool Weak = false) override {
     llvm_unreachable("CGObjCMac::GetClassGlobal");
   }
@@ -1293,9 +1303,8 @@ private:
 
   /// AddModuleClassList - Add the given list of class pointers to the
   /// module with the provided symbol and section names.
-  void AddModuleClassList(ArrayRef<llvm::GlobalValue*> Container,
-                          const char *SymbolName,
-                          const char *SectionName);
+  void AddModuleClassList(ArrayRef<llvm::GlobalValue *> Container,
+                          StringRef SymbolName, StringRef SectionName);
 
   llvm::GlobalVariable * BuildClassRoTInitializer(unsigned flags,
                                               unsigned InstanceStart,
@@ -1314,9 +1323,8 @@ private:
 
   /// EmitMethodList - Emit the method list for the given
   /// implementation. The return value has type MethodListnfABITy.
-  llvm::Constant *EmitMethodList(Twine Name,
-                                 const char *Section,
-                                 ArrayRef<llvm::Constant*> Methods);
+  llvm::Constant *EmitMethodList(Twine Name, StringRef Section,
+                                 ArrayRef<llvm::Constant *> Methods);
   /// EmitIvarList - Emit the ivar list for the given
   /// implementation. If ForClass is true the list of class ivars
   /// (i.e. metaclass ivars) is emitted, otherwise the list of
@@ -1357,7 +1365,7 @@ private:
   
   /// GetClassGlobal - Return the global variable for the Objective-C
   /// class of the given name.
-  llvm::GlobalVariable *GetClassGlobal(const std::string &Name,
+  llvm::GlobalVariable *GetClassGlobal(StringRef Name,
                                        bool Weak = false) override;
 
   /// EmitClassRef - Return a Value*, of type ObjCTypes.ClassPtrTy,
@@ -1398,13 +1406,9 @@ private:
   llvm::Constant *GetInterfaceEHType(const ObjCInterfaceDecl *ID,
                                   bool ForDefinition);
 
-  const char *getMetaclassSymbolPrefix() const {
-    return "OBJC_METACLASS_$_";
-  }
+  StringRef getMetaclassSymbolPrefix() const { return "OBJC_METACLASS_$_"; }
 
-  const char *getClassSymbolPrefix() const {
-    return "OBJC_CLASS_$_";
-  }
+  StringRef getClassSymbolPrefix() const { return "OBJC_CLASS_$_"; }
 
   void GetClassSizeInfo(const ObjCImplementationDecl *OID,
                         uint32_t &InstanceStart,
@@ -1506,12 +1510,15 @@ public:
   llvm::Constant *GetSetStructFunction() override {
     return ObjCTypes.getCopyStructFn();
   }
+
   llvm::Constant *GetGetStructFunction() override {
     return ObjCTypes.getCopyStructFn();
   }
+
   llvm::Constant *GetCppAtomicObjectSetFunction() override {
     return ObjCTypes.getCppAtomicObjectFunction();
   }
+
   llvm::Constant *GetCppAtomicObjectGetFunction() override {
     return ObjCTypes.getCppAtomicObjectFunction();
   }
@@ -1934,7 +1941,7 @@ CGObjCCommonMac::EmitMessageSend(CodeGen::CodeGenFunction &CGF,
   // Emit a null-check if there's a consumed argument other than the receiver.
   bool RequiresNullCheck = false;
   if (ReceiverCanBeNull && CGM.getLangOpts().ObjCAutoRefCount && Method) {
-    for (const auto *ParamDecl : Method->params()) {
+    for (const auto *ParamDecl : Method->parameters()) {
       if (ParamDecl->hasAttr<NSConsumedAttr>()) {
         if (!nullReturn.NullBB)
           nullReturn.init(CGF, Arg0);
@@ -2027,6 +2034,7 @@ namespace {
     bool IsDisordered = false;
 
     llvm::SmallVector<IvarInfo, 8> IvarsInfo;
+
   public:
     IvarLayoutBuilder(CodeGenModule &CGM, CharUnits instanceBegin,
                       CharUnits instanceEnd, bool forStrongLayout)
@@ -2062,7 +2070,7 @@ namespace {
       printf("\n");
     }
   };
-}
+} // end anonymous namespace
 
 llvm::Constant *CGObjCCommonMac::BuildGCBlockLayout(CodeGenModule &CGM,
                                                 const CGBlockInfo &blockInfo) {
@@ -2141,7 +2149,6 @@ void IvarLayoutBuilder::visitBlock(const CGBlockInfo &blockInfo) {
   }
 }
 
-
 /// getBlockCaptureLifetime - This routine returns life time of the captured
 /// block variable for the purpose of block layout meta-data generation. FQT is
 /// the type of the variable captured in the block.
@@ -2629,7 +2636,6 @@ llvm::Constant *CGObjCCommonMac::BuildRCBlockLayout(CodeGenModule &CGM,
   return getBitmapBlockLayout(false);
 }
 
-
 llvm::Constant *CGObjCCommonMac::BuildByrefLayout(CodeGen::CodeGenModule &CGM,
                                                   QualType T) {
   assert(CGM.getLangOpts().getGC() == LangOptions::NonGC);
@@ -2677,6 +2683,25 @@ llvm::Constant *CGObjCCommonMac::GetProtocolRef(const ObjCProtocolDecl *PD) {
   return GetOrEmitProtocolRef(PD);
 }
 
+llvm::Value *CGObjCCommonMac::EmitClassRefViaRuntime(
+               CodeGenFunction &CGF,
+               const ObjCInterfaceDecl *ID,
+               ObjCCommonTypesHelper &ObjCTypes) {
+  llvm::Constant *lookUpClassFn = ObjCTypes.getLookUpClassFn();
+
+  llvm::Value *className =
+      CGF.CGM.GetAddrOfConstantCString(ID->getObjCRuntimeNameAsString())
+        .getPointer();
+  ASTContext &ctx = CGF.CGM.getContext();
+  className =
+      CGF.Builder.CreateBitCast(className,
+                                CGF.ConvertType(
+                                  ctx.getPointerType(ctx.CharTy.withConst())));
+  llvm::CallInst *call = CGF.Builder.CreateCall(lookUpClassFn, className);
+  call->setDoesNotThrow();
+  return call;
+}
+
 /*
 // Objective-C 1.0 extensions
 struct _objc_protocol {
@@ -2798,6 +2823,7 @@ llvm::Constant *CGObjCMac::GetOrEmitProtocolRef(const ObjCProtocolDecl *PD) {
   struct objc_method_description_list *optional_class_methods;
   struct objc_property_list *instance_properties;
   const char ** extendedMethodTypes;
+  struct objc_property_list *class_properties;
   };
 */
 llvm::Constant *
@@ -2816,13 +2842,16 @@ CGObjCMac::EmitProtocolExtension(const ObjCProtocolDecl *PD,
                          "__OBJC,__cat_cls_meth,regular,no_dead_strip",
                          OptClassMethods),
       EmitPropertyList("OBJC_$_PROP_PROTO_LIST_" + PD->getName(), nullptr, PD,
-                       ObjCTypes),
+                       ObjCTypes, false),
       EmitProtocolMethodTypes("OBJC_PROTOCOL_METHOD_TYPES_" + PD->getName(),
-                              MethodTypesExt, ObjCTypes)};
+                              MethodTypesExt, ObjCTypes),
+      EmitPropertyList("OBJC_$_CLASS_PROP_PROTO_LIST_" + PD->getName(), nullptr,
+                       PD, ObjCTypes, true)};
 
   // Return null if no extension bits are used.
   if (Values[1]->isNullValue() && Values[2]->isNullValue() &&
-      Values[3]->isNullValue() && Values[4]->isNullValue())
+      Values[3]->isNullValue() && Values[4]->isNullValue() &&
+      Values[5]->isNullValue())
     return llvm::Constant::getNullValue(ObjCTypes.ProtocolExtensionPtrTy);
 
   llvm::Constant *Init =
@@ -2878,10 +2907,15 @@ PushProtocolProperties(llvm::SmallPtrSet<const IdentifierInfo*,16> &PropertySet,
                        SmallVectorImpl<llvm::Constant *> &Properties,
                        const Decl *Container,
                        const ObjCProtocolDecl *Proto,
-                       const ObjCCommonTypesHelper &ObjCTypes) {
+                       const ObjCCommonTypesHelper &ObjCTypes,
+                       bool IsClassProperty) {
   for (const auto *P : Proto->protocols()) 
-    PushProtocolProperties(PropertySet, Properties, Container, P, ObjCTypes);
+    PushProtocolProperties(PropertySet, Properties, Container, P, ObjCTypes,
+                           IsClassProperty);
+
   for (const auto *PD : Proto->properties()) {
+    if (IsClassProperty != PD->isClassProperty())
+      continue;
     if (!PropertySet.insert(PD->getIdentifier()).second)
       continue;
     llvm::Constant *Prop[] = {
@@ -2907,7 +2941,17 @@ PushProtocolProperties(llvm::SmallPtrSet<const IdentifierInfo*,16> &PropertySet,
 llvm::Constant *CGObjCCommonMac::EmitPropertyList(Twine Name,
                                        const Decl *Container,
                                        const ObjCContainerDecl *OCD,
-                                       const ObjCCommonTypesHelper &ObjCTypes) {
+                                       const ObjCCommonTypesHelper &ObjCTypes,
+                                       bool IsClassProperty) {
+  if (IsClassProperty) {
+    // Make this entry NULL for OS X with deployment target < 10.11, for iOS
+    // with deployment target < 9.0.
+    const llvm::Triple &Triple = CGM.getTarget().getTriple();
+    if ((Triple.isMacOSX() && Triple.isMacOSXVersionLT(10, 11)) ||
+        (Triple.isiOS() && Triple.isOSVersionLT(9)))
+      return llvm::Constant::getNullValue(ObjCTypes.PropertyListPtrTy);
+  }
+
   SmallVector<llvm::Constant *, 16> Properties;
   llvm::SmallPtrSet<const IdentifierInfo*, 16> PropertySet;
 
@@ -2919,10 +2963,15 @@ llvm::Constant *CGObjCCommonMac::EmitPropertyList(Twine Name,
   if (const ObjCInterfaceDecl *OID = dyn_cast<ObjCInterfaceDecl>(OCD))
     for (const ObjCCategoryDecl *ClassExt : OID->known_extensions())
       for (auto *PD : ClassExt->properties()) {
+        if (IsClassProperty != PD->isClassProperty())
+          continue;
         PropertySet.insert(PD->getIdentifier());
         AddProperty(PD);
       }
+
   for (const auto *PD : OCD->properties()) {
+    if (IsClassProperty != PD->isClassProperty())
+      continue;
     // Don't emit duplicate metadata for properties that were already in a
     // class extension.
     if (!PropertySet.insert(PD->getIdentifier()).second)
@@ -2932,11 +2981,13 @@ llvm::Constant *CGObjCCommonMac::EmitPropertyList(Twine Name,
 
   if (const ObjCInterfaceDecl *OID = dyn_cast<ObjCInterfaceDecl>(OCD)) {
     for (const auto *P : OID->all_referenced_protocols())
-      PushProtocolProperties(PropertySet, Properties, Container, P, ObjCTypes);
+      PushProtocolProperties(PropertySet, Properties, Container, P, ObjCTypes,
+                             IsClassProperty);
   }
   else if (const ObjCCategoryDecl *CD = dyn_cast<ObjCCategoryDecl>(OCD)) {
     for (const auto *P : CD->protocols())
-      PushProtocolProperties(PropertySet, Properties, Container, P, ObjCTypes);
+      PushProtocolProperties(PropertySet, Properties, Container, P, ObjCTypes,
+                             IsClassProperty);
   }
 
   // Return null for empty list.
@@ -3001,8 +3052,8 @@ CGObjCMac::GetMethodDescriptionConstant(const ObjCMethodDecl *MD) {
 }
 
 llvm::Constant *
-CGObjCMac::EmitMethodDescList(Twine Name, const char *Section,
-                              ArrayRef<llvm::Constant*> Methods) {
+CGObjCMac::EmitMethodDescList(Twine Name, StringRef Section,
+                              ArrayRef<llvm::Constant *> Methods) {
   // Return null for empty list.
   if (Methods.empty())
     return llvm::Constant::getNullValue(ObjCTypes.MethodDescriptionListPtrTy);
@@ -3029,6 +3080,7 @@ CGObjCMac::EmitMethodDescList(Twine Name, const char *Section,
   struct _objc_protocol_list *protocols;
   uint32_t size; // <rdar://4585769>
   struct _objc_property_list *instance_properties;
+  struct _objc_property_list *class_properties;
   };
 */
 void CGObjCMac::GenerateCategory(const ObjCCategoryImplDecl *OCD) {
@@ -3055,7 +3107,7 @@ void CGObjCMac::GenerateCategory(const ObjCCategoryImplDecl *OCD) {
     // Class methods should always be defined.
     ClassMethods.push_back(GetMethodConstant(I));
 
-  llvm::Constant *Values[7];
+  llvm::Constant *Values[8];
   Values[0] = GetClassName(OCD->getName());
   Values[1] = GetClassName(Interface->getObjCRuntimeNameAsString());
   LazySymbols.insert(Interface->getIdentifier());
@@ -3077,9 +3129,12 @@ void CGObjCMac::GenerateCategory(const ObjCCategoryImplDecl *OCD) {
   // If there is no category @interface then there can be no properties.
   if (Category) {
     Values[6] = EmitPropertyList("\01l_OBJC_$_PROP_LIST_" + ExtName.str(),
-                                 OCD, Category, ObjCTypes);
+                                 OCD, Category, ObjCTypes, false);
+    Values[7] = EmitPropertyList("\01l_OBJC_$_CLASS_PROP_LIST_" + ExtName.str(),
+                                 OCD, Category, ObjCTypes, true);
   } else {
     Values[6] = llvm::Constant::getNullValue(ObjCTypes.PropertyListPtrTy);
+    Values[7] = llvm::Constant::getNullValue(ObjCTypes.PropertyListPtrTy);
   }
 
   llvm::Constant *Init = llvm::ConstantStruct::get(ObjCTypes.CategoryTy,
@@ -3274,7 +3329,8 @@ void CGObjCMac::GenerateClass(const ObjCImplementationDecl *ID) {
   Values[ 8] = llvm::Constant::getNullValue(ObjCTypes.CachePtrTy);
   Values[ 9] = Protocols;
   Values[10] = BuildStrongIvarLayout(ID, CharUnits::Zero(), Size);
-  Values[11] = EmitClassExtension(ID, Size, hasMRCWeak);
+  Values[11] = EmitClassExtension(ID, Size, hasMRCWeak,
+                                  false/*isClassProperty*/);
   llvm::Constant *Init = llvm::ConstantStruct::get(ObjCTypes.ClassTy,
                                                    Values);
   std::string Name("OBJC_CLASS_");
@@ -3338,8 +3394,9 @@ llvm::Constant *CGObjCMac::EmitMetaClass(const ObjCImplementationDecl *ID,
   Values[ 9] = Protocols;
   // ivar_layout for metaclass is always NULL.
   Values[10] = llvm::Constant::getNullValue(ObjCTypes.Int8PtrTy);
-  // The class extension is always unused for metaclasses.
-  Values[11] = llvm::Constant::getNullValue(ObjCTypes.ClassExtensionPtrTy);
+  // The class extension is used to store class properties for metaclasses.
+  Values[11] = EmitClassExtension(ID, CharUnits::Zero(), false/*hasMRCWeak*/,
+                                  true/*isClassProperty*/);
   llvm::Constant *Init = llvm::ConstantStruct::get(ObjCTypes.ClassTy,
                                                    Values);
 
@@ -3413,19 +3470,28 @@ llvm::Value *CGObjCMac::EmitSuperClassRef(const ObjCInterfaceDecl *ID) {
 */
 llvm::Constant *
 CGObjCMac::EmitClassExtension(const ObjCImplementationDecl *ID,
-                              CharUnits InstanceSize, bool hasMRCWeakIvars) {
+                              CharUnits InstanceSize, bool hasMRCWeakIvars,
+                              bool isClassProperty) {
   uint64_t Size =
     CGM.getDataLayout().getTypeAllocSize(ObjCTypes.ClassExtensionTy);
 
   llvm::Constant *Values[3];
   Values[0] = llvm::ConstantInt::get(ObjCTypes.IntTy, Size);
-  Values[1] = BuildWeakIvarLayout(ID, CharUnits::Zero(), InstanceSize,
-                                  hasMRCWeakIvars);
-  Values[2] = EmitPropertyList("\01l_OBJC_$_PROP_LIST_" + ID->getName(),
-                               ID, ID->getClassInterface(), ObjCTypes);
+  if (isClassProperty) {
+    llvm::Type *PtrTy = CGM.Int8PtrTy;
+    Values[1] = llvm::Constant::getNullValue(PtrTy);
+  } else
+    Values[1] = BuildWeakIvarLayout(ID, CharUnits::Zero(), InstanceSize,
+                                    hasMRCWeakIvars);
+  if (isClassProperty)
+    Values[2] = EmitPropertyList("\01l_OBJC_$_CLASS_PROP_LIST_" + ID->getName(),
+                                 ID, ID->getClassInterface(), ObjCTypes, true);
+  else
+    Values[2] = EmitPropertyList("\01l_OBJC_$_PROP_LIST_" + ID->getName(),
+                                 ID, ID->getClassInterface(), ObjCTypes, false);
 
   // Return null if no extension bits are used.
-  if (Values[1]->isNullValue() && Values[2]->isNullValue())
+  if ((!Values[1] || Values[1]->isNullValue()) && Values[2]->isNullValue())
     return llvm::Constant::getNullValue(ObjCTypes.ClassExtensionPtrTy);
 
   llvm::Constant *Init =
@@ -3530,9 +3596,8 @@ llvm::Constant *CGObjCMac::GetMethodConstant(const ObjCMethodDecl *MD) {
   return llvm::ConstantStruct::get(ObjCTypes.MethodTy, Method);
 }
 
-llvm::Constant *CGObjCMac::EmitMethodList(Twine Name,
-                                          const char *Section,
-                                          ArrayRef<llvm::Constant*> Methods) {
+llvm::Constant *CGObjCMac::EmitMethodList(Twine Name, StringRef Section,
+                                          ArrayRef<llvm::Constant *> Methods) {
   // Return null for empty list.
   if (Methods.empty())
     return llvm::Constant::getNullValue(ObjCTypes.MethodListPtrTy);
@@ -3607,6 +3672,7 @@ llvm::Constant *CGObjCMac::GetOptimizedPropertySetFunction(bool atomic,
 llvm::Constant *CGObjCMac::GetGetStructFunction() {
   return ObjCTypes.getCopyStructFn();
 }
+
 llvm::Constant *CGObjCMac::GetSetStructFunction() {
   return ObjCTypes.getCopyStructFn();
 }
@@ -3614,6 +3680,7 @@ llvm::Constant *CGObjCMac::GetSetStructFunction() {
 llvm::Constant *CGObjCMac::GetCppAtomicObjectGetFunction() {
   return ObjCTypes.getCppAtomicObjectFunction();
 }
+
 llvm::Constant *CGObjCMac::GetCppAtomicObjectSetFunction() {
   return ObjCTypes.getCppAtomicObjectFunction();
 }
@@ -3711,7 +3778,7 @@ namespace {
     void emitWriteHazard();
     void emitHazardsInNewBlocks();
   };
-}
+} // end anonymous namespace
 
 /// Create the fragile-ABI read and write hazards based on the current
 /// state of the function, which is presumed to be immediately prior
@@ -4332,7 +4399,6 @@ void CGObjCMac::EmitObjCWeakAssign(CodeGen::CodeGenFunction &CGF,
   llvm::Value *args[] = { src, dst.getPointer() };
   CGF.EmitNounwindRuntimeCall(ObjCTypes.getGcAssignWeakFn(),
                               args, "weakassign");
-  return;
 }
 
 /// EmitObjCGlobalAssign - Code gen for assigning to a __strong object.
@@ -4358,7 +4424,6 @@ void CGObjCMac::EmitObjCGlobalAssign(CodeGen::CodeGenFunction &CGF,
   else
     CGF.EmitNounwindRuntimeCall(ObjCTypes.getGcAssignThreadLocalFn(),
                                 args, "threadlocalassign");
-  return;
 }
 
 /// EmitObjCIvarAssign - Code gen for assigning to a __strong object.
@@ -4380,7 +4445,6 @@ void CGObjCMac::EmitObjCIvarAssign(CodeGen::CodeGenFunction &CGF,
   dst = CGF.Builder.CreateBitCast(dst, ObjCTypes.PtrObjectPtrTy);
   llvm::Value *args[] = { src, dst.getPointer(), ivarOffset };
   CGF.EmitNounwindRuntimeCall(ObjCTypes.getGcAssignIvarFn(), args);
-  return;
 }
 
 /// EmitObjCStrongCastAssign - Code gen for assigning to a __strong cast object.
@@ -4401,7 +4465,6 @@ void CGObjCMac::EmitObjCStrongCastAssign(CodeGen::CodeGenFunction &CGF,
   llvm::Value *args[] = { src, dst.getPointer() };
   CGF.EmitNounwindRuntimeCall(ObjCTypes.getGcAssignStrongCastFn(),
                               args, "strongassign");
-  return;
 }
 
 void CGObjCMac::EmitGCMemmoveCollectable(CodeGen::CodeGenFunction &CGF,
@@ -4455,7 +4518,8 @@ enum ImageInfoFlags {
   // A flag indicating that the module has no instances of a @synthesize of a
   // superclass variable. <rdar://problem/6803242>
   eImageInfo_CorrectedSynthesize = (1 << 4), // This flag is no longer set by clang.
-  eImageInfo_ImageIsSimulated    = (1 << 5)
+  eImageInfo_ImageIsSimulated    = (1 << 5),
+  eImageInfo_ClassProperties     = (1 << 6)
 };
 
 void CGObjCCommonMac::EmitImageInfo() {
@@ -4507,6 +4571,10 @@ void CGObjCCommonMac::EmitImageInfo() {
        Triple.getArch() == llvm::Triple::x86_64))
     Mod.addModuleFlag(llvm::Module::Error, "Objective-C Is Simulated",
                       eImageInfo_ImageIsSimulated);
+
+  // Indicate whether we are generating class properties.
+  Mod.addModuleFlag(llvm::Module::Error, "Objective-C Class Properties",
+                    eImageInfo_ClassProperties);
 }
 
 // struct objc_module {
@@ -4602,6 +4670,11 @@ llvm::Value *CGObjCMac::EmitClassRefFromId(CodeGenFunction &CGF,
 
 llvm::Value *CGObjCMac::EmitClassRef(CodeGenFunction &CGF,
                                      const ObjCInterfaceDecl *ID) {
+  // If the class has the objc_runtime_visible attribute, we need to
+  // use the Objective-C runtime to get the class.
+  if (ID->hasAttr<ObjCRuntimeVisibleAttr>())
+    return EmitClassRefViaRuntime(CGF, ID, ObjCTypes);
+
   return EmitClassRefFromId(CGF, ID->getIdentifier());
 }
 
@@ -4954,7 +5027,7 @@ CGObjCCommonMac::BuildIvarLayout(const ObjCImplementationDecl *OMD,
       baseOffset = CharUnits::Zero();
     }
 
-    baseOffset = baseOffset.RoundUpToAlignment(CGM.getPointerAlign());
+    baseOffset = baseOffset.alignTo(CGM.getPointerAlign());
   }
   else {
     CGM.getContext().DeepCollectObjCIvars(OI, true, ivars);
@@ -5131,9 +5204,8 @@ void CGObjCMac::FinishModule() {
 }
 
 CGObjCNonFragileABIMac::CGObjCNonFragileABIMac(CodeGen::CodeGenModule &cgm)
-  : CGObjCCommonMac(cgm),
-    ObjCTypes(cgm) {
-  ObjCEmptyCacheVar = ObjCEmptyVtableVar = nullptr;
+    : CGObjCCommonMac(cgm), ObjCTypes(cgm), ObjCEmptyCacheVar(nullptr),
+      ObjCEmptyVtableVar(nullptr) {
   ObjCABI = 2;
 }
 
@@ -5223,7 +5295,6 @@ ObjCCommonTypesHelper::ObjCCommonTypesHelper(CodeGen::CodeGenModule &cgm)
   // struct _objc_cache *
   CacheTy = llvm::StructType::create(VMContext, "struct._objc_cache");
   CachePtrTy = llvm::PointerType::getUnqual(CacheTy);
-    
 }
 
 ObjCTypesHelper::ObjCTypesHelper(CodeGen::CodeGenModule &cgm)
@@ -5256,12 +5327,13 @@ ObjCTypesHelper::ObjCTypesHelper(CodeGen::CodeGenModule &cgm)
   //   struct _objc_method_description_list *optional_class_methods;
   //   struct _objc_property_list *instance_properties;
   //   const char ** extendedMethodTypes;
+  //   struct _objc_property_list *class_properties;
   // }
   ProtocolExtensionTy =
     llvm::StructType::create("struct._objc_protocol_extension",
                              IntTy, MethodDescriptionListPtrTy,
                              MethodDescriptionListPtrTy, PropertyListPtrTy,
-                             Int8PtrPtrTy, nullptr);
+                             Int8PtrPtrTy, PropertyListPtrTy, nullptr);
 
   // struct _objc_protocol_extension *
   ProtocolExtensionPtrTy = llvm::PointerType::getUnqual(ProtocolExtensionTy);
@@ -5359,14 +5431,17 @@ ObjCTypesHelper::ObjCTypesHelper(CodeGen::CodeGenModule &cgm)
   //   char *class_name;
   //   struct _objc_method_list *instance_method;
   //   struct _objc_method_list *class_method;
+  //   struct _objc_protocol_list *protocols;
   //   uint32_t size;  // sizeof(struct _objc_category)
   //   struct _objc_property_list *instance_properties;// category's @property
+  //   struct _objc_property_list *class_properties;
   // }
   CategoryTy =
     llvm::StructType::create("struct._objc_category",
                              Int8PtrTy, Int8PtrTy, MethodListPtrTy,
                              MethodListPtrTy, ProtocolListPtrTy,
-                             IntTy, PropertyListPtrTy, nullptr);
+                             IntTy, PropertyListPtrTy, PropertyListPtrTy,
+                             nullptr);
 
   // Global metadata structures
 
@@ -5405,7 +5480,6 @@ ObjCTypesHelper::ObjCTypesHelper(CodeGen::CodeGenModule &cgm)
     llvm::StructType::create("struct._objc_exception_data",
                              llvm::ArrayType::get(CGM.Int32Ty,SetJmpBufferSize),
                              StackPtrTy, nullptr);
-
 }
 
 ObjCNonFragileABITypesHelper::ObjCNonFragileABITypesHelper(CodeGen::CodeGenModule &cgm)
@@ -5434,6 +5508,7 @@ ObjCNonFragileABITypesHelper::ObjCNonFragileABITypesHelper(CodeGen::CodeGenModul
   //   const uint32_t flags;  // = 0
   //   const char ** extendedMethodTypes;
   //   const char *demangledName;
+  //   const struct _prop_list_t * class_properties;
   // }
 
   // Holder for struct _protocol_list_t *
@@ -5446,7 +5521,7 @@ ObjCNonFragileABITypesHelper::ObjCNonFragileABITypesHelper(CodeGen::CodeGenModul
                              MethodListnfABIPtrTy, MethodListnfABIPtrTy,
                              MethodListnfABIPtrTy, MethodListnfABIPtrTy,
                              PropertyListPtrTy, IntTy, IntTy, Int8PtrPtrTy,
-                             Int8PtrTy,
+                             Int8PtrTy, PropertyListPtrTy,
                              nullptr);
 
   // struct _protocol_t*
@@ -5539,6 +5614,8 @@ ObjCNonFragileABITypesHelper::ObjCNonFragileABITypesHelper(CodeGen::CodeGenModul
   //   const struct _method_list_t * const class_methods;
   //   const struct _protocol_list_t * const protocols;
   //   const struct _prop_list_t * const properties;
+  //   const struct _prop_list_t * const class_properties;
+  //   const uint32_t size;
   // }
   CategorynfABITy = llvm::StructType::create("struct._category_t",
                                              Int8PtrTy, ClassnfABIPtrTy,
@@ -5546,6 +5623,8 @@ ObjCNonFragileABITypesHelper::ObjCNonFragileABITypesHelper(CodeGen::CodeGenModul
                                              MethodListnfABIPtrTy,
                                              ProtocolListnfABIPtrTy,
                                              PropertyListPtrTy,
+                                             PropertyListPtrTy,
+                                             IntTy,
                                              nullptr);
 
   // New types for nonfragile abi messaging.
@@ -5609,10 +5688,9 @@ llvm::Function *CGObjCNonFragileABIMac::ModuleInitFunction() {
   return nullptr;
 }
 
-void CGObjCNonFragileABIMac::
-AddModuleClassList(ArrayRef<llvm::GlobalValue*> Container,
-                   const char *SymbolName,
-                   const char *SectionName) {
+void CGObjCNonFragileABIMac::AddModuleClassList(
+    ArrayRef<llvm::GlobalValue *> Container, StringRef SymbolName,
+    StringRef SectionName) {
   unsigned NumClasses = Container.size();
 
   if (!NumClasses)
@@ -5814,13 +5892,16 @@ llvm::GlobalVariable * CGObjCNonFragileABIMac::BuildClassRoTInitializer(
   if (flags & NonFragileABI_Class_Meta) {
     Values[ 7] = llvm::Constant::getNullValue(ObjCTypes.IvarListnfABIPtrTy);
     Values[ 8] = GetIvarLayoutName(nullptr, ObjCTypes);
-    Values[ 9] = llvm::Constant::getNullValue(ObjCTypes.PropertyListPtrTy);
+    Values[ 9] = EmitPropertyList(
+        "\01l_OBJC_$_CLASS_PROP_LIST_" + ID->getObjCRuntimeNameAsString(),
+        ID, ID->getClassInterface(), ObjCTypes, true);
   } else {
     Values[ 7] = EmitIvarList(ID);
     Values[ 8] = BuildWeakIvarLayout(ID, beginInstance, endInstance,
                                      hasMRCWeak);
-    Values[ 9] = EmitPropertyList("\01l_OBJC_$_PROP_LIST_" + ID->getObjCRuntimeNameAsString(),
-                                  ID, ID->getClassInterface(), ObjCTypes);
+    Values[ 9] = EmitPropertyList(
+        "\01l_OBJC_$_PROP_LIST_" + ID->getObjCRuntimeNameAsString(),
+        ID, ID->getClassInterface(), ObjCTypes, false);
   }
   llvm::Constant *Init = llvm::ConstantStruct::get(ObjCTypes.ClassRonfABITy,
                                                    Values);
@@ -5870,8 +5951,9 @@ llvm::GlobalVariable *CGObjCNonFragileABIMac::BuildClassMetaData(
   GV->setSection("__DATA, __objc_data");
   GV->setAlignment(
     CGM.getDataLayout().getABITypeAlignment(ObjCTypes.ClassnfABITy));
-  if (HiddenVisibility)
-    GV->setVisibility(llvm::GlobalValue::HiddenVisibility);
+  if (!CGM.getTriple().isOSBinFormatCOFF())
+    if (HiddenVisibility)
+      GV->setVisibility(llvm::GlobalValue::HiddenVisibility);
   return GV;
 }
 
@@ -5896,49 +5978,60 @@ void CGObjCNonFragileABIMac::GetClassSizeInfo(const ObjCImplementationDecl *OID,
     InstanceStart = RL.getFieldOffset(0) / CGM.getContext().getCharWidth();
 }
 
+static llvm::GlobalValue::DLLStorageClassTypes getStorage(CodeGenModule &CGM,
+                                                          StringRef Name) {
+  IdentifierInfo &II = CGM.getContext().Idents.get(Name);
+  TranslationUnitDecl *TUDecl = CGM.getContext().getTranslationUnitDecl();
+  DeclContext *DC = TranslationUnitDecl::castToDeclContext(TUDecl);
+
+  const VarDecl *VD = nullptr;
+  for (const auto &Result : DC->lookup(&II))
+    if ((VD = dyn_cast<VarDecl>(Result)))
+      break;
+
+  if (!VD)
+    return llvm::GlobalValue::DLLImportStorageClass;
+  if (VD->hasAttr<DLLExportAttr>())
+    return llvm::GlobalValue::DLLExportStorageClass;
+  if (VD->hasAttr<DLLImportAttr>())
+    return llvm::GlobalValue::DLLImportStorageClass;
+  return llvm::GlobalValue::DefaultStorageClass;
+}
+
 void CGObjCNonFragileABIMac::GenerateClass(const ObjCImplementationDecl *ID) {
-  std::string ClassName = ID->getObjCRuntimeNameAsString();
   if (!ObjCEmptyCacheVar) {
-    ObjCEmptyCacheVar = new llvm::GlobalVariable(
-      CGM.getModule(),
-      ObjCTypes.CacheTy,
-      false,
-      llvm::GlobalValue::ExternalLinkage,
-      nullptr,
-      "_objc_empty_cache");
-
-    // Make this entry NULL for any iOS device target, any iOS simulator target,
-    // OS X with deployment target 10.9 or later.
+    ObjCEmptyCacheVar =
+        new llvm::GlobalVariable(CGM.getModule(), ObjCTypes.CacheTy, false,
+                                 llvm::GlobalValue::ExternalLinkage, nullptr,
+                                 "_objc_empty_cache");
+    if (CGM.getTriple().isOSBinFormatCOFF())
+      ObjCEmptyCacheVar->setDLLStorageClass(getStorage(CGM, "_objc_empty_cache"));
+
+    // Only OS X with deployment version <10.9 use the empty vtable symbol
     const llvm::Triple &Triple = CGM.getTarget().getTriple();
-    if (Triple.isiOS() || Triple.isWatchOS() ||
-        (Triple.isMacOSX() && !Triple.isMacOSXVersionLT(10, 9)))
-      // This entry will be null.
-      ObjCEmptyVtableVar = nullptr;
-    else
-      ObjCEmptyVtableVar = new llvm::GlobalVariable(
-                                                    CGM.getModule(),
-                                                    ObjCTypes.ImpnfABITy,
-                                                    false,
-                                                    llvm::GlobalValue::ExternalLinkage,
-                                                    nullptr,
-                                                    "_objc_empty_vtable");
-  }
-  assert(ID->getClassInterface() &&
-         "CGObjCNonFragileABIMac::GenerateClass - class is 0");
+    if (Triple.isMacOSX() && Triple.isMacOSXVersionLT(10, 9))
+      ObjCEmptyVtableVar =
+          new llvm::GlobalVariable(CGM.getModule(), ObjCTypes.ImpnfABITy, false,
+                                   llvm::GlobalValue::ExternalLinkage, nullptr,
+                                   "_objc_empty_vtable");
+  }
+
   // FIXME: Is this correct (that meta class size is never computed)?
   uint32_t InstanceStart =
     CGM.getDataLayout().getTypeAllocSize(ObjCTypes.ClassnfABITy);
   uint32_t InstanceSize = InstanceStart;
   uint32_t flags = NonFragileABI_Class_Meta;
-  llvm::SmallString<64> ObjCMetaClassName(getMetaclassSymbolPrefix());
-  llvm::SmallString<64> ObjCClassName(getClassSymbolPrefix());
-  llvm::SmallString<64> TClassName;
 
   llvm::GlobalVariable *SuperClassGV, *IsAGV;
 
+  StringRef ClassName = ID->getObjCRuntimeNameAsString();
+  const auto *CI = ID->getClassInterface();
+  assert(CI && "CGObjCNonFragileABIMac::GenerateClass - class is 0");
+
   // Build the flags for the metaclass.
-  bool classIsHidden =
-    ID->getClassInterface()->getVisibility() == HiddenVisibility;
+  bool classIsHidden = (CGM.getTriple().isOSBinFormatCOFF())
+                           ? !CI->hasAttr<DLLExportAttr>()
+                           : CI->getVisibility() == HiddenVisibility;
   if (classIsHidden)
     flags |= NonFragileABI_Class_Hidden;
 
@@ -5947,45 +6040,59 @@ void CGObjCNonFragileABIMac::GenerateClass(const ObjCImplementationDecl *ID) {
   if (ID->hasNonZeroConstructors() || ID->hasDestructors()) {
     flags |= NonFragileABI_Class_HasCXXStructors;
     if (!ID->hasNonZeroConstructors())
-      flags |= NonFragileABI_Class_HasCXXDestructorOnly;  
+      flags |= NonFragileABI_Class_HasCXXDestructorOnly;
   }
 
-  if (!ID->getClassInterface()->getSuperClass()) {
+  if (!CI->getSuperClass()) {
     // class is root
     flags |= NonFragileABI_Class_Root;
-    TClassName = ObjCClassName;
-    TClassName += ClassName;
-    SuperClassGV = GetClassGlobal(TClassName.str(),
-                                  ID->getClassInterface()->isWeakImported());
-    TClassName = ObjCMetaClassName;
-    TClassName += ClassName;
-    IsAGV = GetClassGlobal(TClassName.str(),
-                           ID->getClassInterface()->isWeakImported());
+
+    SuperClassGV = GetClassGlobal((getClassSymbolPrefix() + ClassName).str(),
+                                  CI->isWeakImported());
+    if (CGM.getTriple().isOSBinFormatCOFF())
+      if (CI->hasAttr<DLLImportAttr>())
+        SuperClassGV->setDLLStorageClass(llvm::GlobalValue::DLLImportStorageClass);
+
+    IsAGV = GetClassGlobal((getMetaclassSymbolPrefix() + ClassName).str(),
+                           CI->isWeakImported());
+    if (CGM.getTriple().isOSBinFormatCOFF())
+      if (CI->hasAttr<DLLImportAttr>())
+        IsAGV->setDLLStorageClass(llvm::GlobalValue::DLLImportStorageClass);
   } else {
     // Has a root. Current class is not a root.
     const ObjCInterfaceDecl *Root = ID->getClassInterface();
     while (const ObjCInterfaceDecl *Super = Root->getSuperClass())
       Root = Super;
-    TClassName = ObjCMetaClassName ;
-    TClassName += Root->getObjCRuntimeNameAsString();
-    IsAGV = GetClassGlobal(TClassName.str(),
+
+    const auto *Super = CI->getSuperClass();
+    StringRef RootClassName = Root->getObjCRuntimeNameAsString();
+    StringRef SuperClassName = Super->getObjCRuntimeNameAsString();
+
+    IsAGV = GetClassGlobal((getMetaclassSymbolPrefix() + RootClassName).str(),
                            Root->isWeakImported());
+    if (CGM.getTriple().isOSBinFormatCOFF())
+      if (Root->hasAttr<DLLImportAttr>())
+        IsAGV->setDLLStorageClass(llvm::GlobalValue::DLLImportStorageClass);
 
     // work on super class metadata symbol.
-    TClassName = ObjCMetaClassName;
-    TClassName += ID->getClassInterface()->getSuperClass()->getObjCRuntimeNameAsString();
-    SuperClassGV = GetClassGlobal(
-                                  TClassName.str(),
-                                  ID->getClassInterface()->getSuperClass()->isWeakImported());
-  }
-  llvm::GlobalVariable *CLASS_RO_GV = BuildClassRoTInitializer(flags,
-                                                               InstanceStart,
-                                                               InstanceSize,ID);
-  TClassName = ObjCMetaClassName;
-  TClassName += ClassName;
-  llvm::GlobalVariable *MetaTClass = BuildClassMetaData(
-      TClassName.str(), IsAGV, SuperClassGV, CLASS_RO_GV, classIsHidden,
-      ID->getClassInterface()->isWeakImported());
+    SuperClassGV =
+        GetClassGlobal((getMetaclassSymbolPrefix() + SuperClassName).str(),
+                       Super->isWeakImported());
+    if (CGM.getTriple().isOSBinFormatCOFF())
+      if (Super->hasAttr<DLLImportAttr>())
+        SuperClassGV->setDLLStorageClass(llvm::GlobalValue::DLLImportStorageClass);
+  }
+
+  llvm::GlobalVariable *CLASS_RO_GV =
+      BuildClassRoTInitializer(flags, InstanceStart, InstanceSize, ID);
+
+  llvm::GlobalVariable *MetaTClass =
+      BuildClassMetaData((getMetaclassSymbolPrefix() + ClassName).str(), IsAGV,
+                         SuperClassGV, CLASS_RO_GV, classIsHidden,
+                         CI->isWeakImported());
+  if (CGM.getTriple().isOSBinFormatCOFF())
+    if (CI->hasAttr<DLLExportAttr>())
+      MetaTClass->setDLLStorageClass(llvm::GlobalValue::DLLExportStorageClass);
   DefinedMetaClasses.push_back(MetaTClass);
 
   // Metadata for the class
@@ -6006,34 +6113,38 @@ void CGObjCNonFragileABIMac::GenerateClass(const ObjCImplementationDecl *ID) {
       flags |= NonFragileABI_Class_HasCXXDestructorOnly;
   }
 
-  if (hasObjCExceptionAttribute(CGM.getContext(), ID->getClassInterface()))
+  if (hasObjCExceptionAttribute(CGM.getContext(), CI))
     flags |= NonFragileABI_Class_Exception;
 
-  if (!ID->getClassInterface()->getSuperClass()) {
+  if (!CI->getSuperClass()) {
     flags |= NonFragileABI_Class_Root;
     SuperClassGV = nullptr;
   } else {
     // Has a root. Current class is not a root.
-    TClassName = ObjCClassName;
-    TClassName += ID->getClassInterface()->getSuperClass()->getObjCRuntimeNameAsString();
-    SuperClassGV = GetClassGlobal(
-                                  TClassName.str(),
-                                  ID->getClassInterface()->getSuperClass()->isWeakImported());
+    const auto *Super = CI->getSuperClass();
+    StringRef SuperClassName = Super->getObjCRuntimeNameAsString();
+
+    SuperClassGV =
+        GetClassGlobal((getClassSymbolPrefix() + SuperClassName).str(),
+                       Super->isWeakImported());
+    if (CGM.getTriple().isOSBinFormatCOFF())
+      if (Super->hasAttr<DLLImportAttr>())
+        SuperClassGV->setDLLStorageClass(llvm::GlobalValue::DLLImportStorageClass);
   }
+
   GetClassSizeInfo(ID, InstanceStart, InstanceSize);
-  CLASS_RO_GV = BuildClassRoTInitializer(flags,
-                                         InstanceStart,
-                                         InstanceSize,
-                                         ID);
+  CLASS_RO_GV =
+      BuildClassRoTInitializer(flags, InstanceStart, InstanceSize, ID);
 
-  TClassName = ObjCClassName;
-  TClassName += ClassName;
   llvm::GlobalVariable *ClassMD =
-    BuildClassMetaData(TClassName.str(), MetaTClass, SuperClassGV, CLASS_RO_GV,
-                       classIsHidden,
-                       ID->getClassInterface()->isWeakImported());
+    BuildClassMetaData((getClassSymbolPrefix() + ClassName).str(), MetaTClass,
+                       SuperClassGV, CLASS_RO_GV, classIsHidden,
+                       CI->isWeakImported());
+  if (CGM.getTriple().isOSBinFormatCOFF())
+    if (CI->hasAttr<DLLExportAttr>())
+      ClassMD->setDLLStorageClass(llvm::GlobalValue::DLLExportStorageClass);
   DefinedClasses.push_back(ClassMD);
-  ImplementedClasses.push_back(ID->getClassInterface());
+  ImplementedClasses.push_back(CI);
 
   // Determine if this class is also "non-lazy".
   if (ImplementationIsNonLazy(ID))
@@ -6041,7 +6152,7 @@ void CGObjCNonFragileABIMac::GenerateClass(const ObjCImplementationDecl *ID) {
 
   // Force the definition of the EHType if necessary.
   if (flags & NonFragileABI_Class_Exception)
-    GetInterfaceEHType(ID->getClassInterface(), true);
+    GetInterfaceEHType(CI, true);
   // Make sure method definition entries are all clear for next implementation.
   MethodDefinitions.clear();
 }
@@ -6093,6 +6204,8 @@ llvm::Value *CGObjCNonFragileABIMac::GenerateProtocolRef(CodeGenFunction &CGF,
 ///   const struct _method_list_t * const class_methods;
 ///   const struct _protocol_list_t * const protocols;
 ///   const struct _prop_list_t * const properties;
+///   const struct _prop_list_t * const class_properties;
+///   const uint32_t size;
 /// }
 ///
 void CGObjCNonFragileABIMac::GenerateCategory(const ObjCCategoryImplDecl *OCD) {
@@ -6107,7 +6220,7 @@ void CGObjCNonFragileABIMac::GenerateCategory(const ObjCCategoryImplDecl *OCD) {
   llvm::SmallString<64> ExtClassName(getClassSymbolPrefix());
   ExtClassName += Interface->getObjCRuntimeNameAsString();
 
-  llvm::Constant *Values[6];
+  llvm::Constant *Values[8];
   Values[0] = GetClassName(OCD->getIdentifier()->getName());
   // meta-class entry symbol
   llvm::GlobalVariable *ClassGV =
@@ -6156,12 +6269,18 @@ void CGObjCNonFragileABIMac::GenerateCategory(const ObjCCategoryImplDecl *OCD) {
                                    Category->protocol_begin(),
                                    Category->protocol_end());
     Values[5] = EmitPropertyList("\01l_OBJC_$_PROP_LIST_" + ExtName.str(),
-                                 OCD, Category, ObjCTypes);
+                                 OCD, Category, ObjCTypes, false);
+    Values[6] = EmitPropertyList("\01l_OBJC_$_CLASS_PROP_LIST_" + ExtName.str(),
+                                 OCD, Category, ObjCTypes, true);
   } else {
     Values[4] = llvm::Constant::getNullValue(ObjCTypes.ProtocolListnfABIPtrTy);
     Values[5] = llvm::Constant::getNullValue(ObjCTypes.PropertyListPtrTy);
+    Values[6] = llvm::Constant::getNullValue(ObjCTypes.PropertyListPtrTy);
   }
 
+  unsigned Size = CGM.getDataLayout().getTypeAllocSize(ObjCTypes.CategorynfABITy);
+  Values[7] = llvm::ConstantInt::get(ObjCTypes.IntTy, Size);
+
   llvm::Constant *Init =
     llvm::ConstantStruct::get(ObjCTypes.CategorynfABITy,
                               Values);
@@ -6210,9 +6329,8 @@ llvm::Constant *CGObjCNonFragileABIMac::GetMethodConstant(
 /// }
 ///
 llvm::Constant *
-CGObjCNonFragileABIMac::EmitMethodList(Twine Name,
-                                       const char *Section,
-                                       ArrayRef<llvm::Constant*> Methods) {
+CGObjCNonFragileABIMac::EmitMethodList(Twine Name, StringRef Section,
+                                       ArrayRef<llvm::Constant *> Methods) {
   // Return null for empty list.
   if (Methods.empty())
     return llvm::Constant::getNullValue(ObjCTypes.MethodListnfABIPtrTy);
@@ -6242,18 +6360,28 @@ CGObjCNonFragileABIMac::EmitMethodList(Twine Name,
 llvm::GlobalVariable *
 CGObjCNonFragileABIMac::ObjCIvarOffsetVariable(const ObjCInterfaceDecl *ID,
                                                const ObjCIvarDecl *Ivar) {
-    
   const ObjCInterfaceDecl *Container = Ivar->getContainingInterface();
   llvm::SmallString<64> Name("OBJC_IVAR_$_");
   Name += Container->getObjCRuntimeNameAsString();
   Name += ".";
   Name += Ivar->getName();
-  llvm::GlobalVariable *IvarOffsetGV =
-    CGM.getModule().getGlobalVariable(Name);
-  if (!IvarOffsetGV)
-    IvarOffsetGV = new llvm::GlobalVariable(
-      CGM.getModule(), ObjCTypes.IvarOffsetVarTy, false,
-      llvm::GlobalValue::ExternalLinkage, nullptr, Name.str());
+  llvm::GlobalVariable *IvarOffsetGV = CGM.getModule().getGlobalVariable(Name);
+  if (!IvarOffsetGV) {
+    IvarOffsetGV =
+        new llvm::GlobalVariable(CGM.getModule(), ObjCTypes.IvarOffsetVarTy,
+                                 false, llvm::GlobalValue::ExternalLinkage,
+                                 nullptr, Name.str());
+    if (CGM.getTriple().isOSBinFormatCOFF()) {
+      bool IsPrivateOrPackage =
+          Ivar->getAccessControl() == ObjCIvarDecl::Private ||
+          Ivar->getAccessControl() == ObjCIvarDecl::Package;
+
+      if (ID->hasAttr<DLLExportAttr>() && !IsPrivateOrPackage)
+        IvarOffsetGV->setDLLStorageClass(llvm::GlobalValue::DLLExportStorageClass);
+      else if (ID->hasAttr<DLLImportAttr>())
+        IvarOffsetGV->setDLLStorageClass(llvm::GlobalValue::DLLImportStorageClass);
+    }
+  }
   return IvarOffsetGV;
 }
 
@@ -6267,14 +6395,17 @@ CGObjCNonFragileABIMac::EmitIvarOffsetVar(const ObjCInterfaceDecl *ID,
   IvarOffsetGV->setAlignment(
       CGM.getDataLayout().getABITypeAlignment(ObjCTypes.IvarOffsetVarTy));
 
-  // FIXME: This matches gcc, but shouldn't the visibility be set on the use as
-  // well (i.e., in ObjCIvarOffsetVariable).
-  if (Ivar->getAccessControl() == ObjCIvarDecl::Private ||
-      Ivar->getAccessControl() == ObjCIvarDecl::Package ||
-      ID->getVisibility() == HiddenVisibility)
-    IvarOffsetGV->setVisibility(llvm::GlobalValue::HiddenVisibility);
-  else
-    IvarOffsetGV->setVisibility(llvm::GlobalValue::DefaultVisibility);
+  if (!CGM.getTriple().isOSBinFormatCOFF()) {
+    // FIXME: This matches gcc, but shouldn't the visibility be set on the use
+    // as well (i.e., in ObjCIvarOffsetVariable).
+    if (Ivar->getAccessControl() == ObjCIvarDecl::Private ||
+        Ivar->getAccessControl() == ObjCIvarDecl::Package ||
+        ID->getVisibility() == HiddenVisibility)
+      IvarOffsetGV->setVisibility(llvm::GlobalValue::HiddenVisibility);
+    else
+      IvarOffsetGV->setVisibility(llvm::GlobalValue::DefaultVisibility);
+  }
+
   IvarOffsetGV->setSection("__DATA, __objc_ivar");
   return IvarOffsetGV;
 }
@@ -6361,7 +6492,7 @@ llvm::Constant *CGObjCNonFragileABIMac::GetOrEmitProtocolRef(
   const ObjCProtocolDecl *PD) {
   llvm::GlobalVariable *&Entry = Protocols[PD->getIdentifier()];
 
-  if (!Entry) {
+  if (!Entry)
     // We use the initializer as a marker of whether this is a forward
     // reference or not. At module finalization we add the empty
     // contents for protocols which were referenced but never defined.
@@ -6370,8 +6501,6 @@ llvm::Constant *CGObjCNonFragileABIMac::GetOrEmitProtocolRef(
                                  false, llvm::GlobalValue::ExternalLinkage,
                                  nullptr,
                                  "\01l_OBJC_PROTOCOL_$_" + PD->getObjCRuntimeNameAsString());
-    Entry->setSection("__DATA,__datacoal_nt,coalesced");
-  }
 
   return Entry;
 }
@@ -6391,6 +6520,7 @@ llvm::Constant *CGObjCNonFragileABIMac::GetOrEmitProtocolRef(
 ///   const uint32_t flags;  // = 0
 ///   const char ** extendedMethodTypes;
 ///   const char *demangledName;
+///   const struct _prop_list_t * class_properties;
 /// }
 /// @endcode
 ///
@@ -6442,7 +6572,7 @@ llvm::Constant *CGObjCNonFragileABIMac::GetOrEmitProtocol(
   MethodTypesExt.insert(MethodTypesExt.end(),
                         OptMethodTypesExt.begin(), OptMethodTypesExt.end());
 
-  llvm::Constant *Values[12];
+  llvm::Constant *Values[13];
   // isa is NULL
   Values[0] = llvm::Constant::getNullValue(ObjCTypes.ObjectPtrTy);
   Values[1] = GetClassName(PD->getObjCRuntimeNameAsString());
@@ -6466,8 +6596,9 @@ llvm::Constant *CGObjCNonFragileABIMac::GetOrEmitProtocol(
                              + PD->getObjCRuntimeNameAsString(),
                              "__DATA, __objc_const",
                              OptClassMethods);
-  Values[7] = EmitPropertyList("\01l_OBJC_$_PROP_LIST_" + PD->getObjCRuntimeNameAsString(),
-                               nullptr, PD, ObjCTypes);
+  Values[7] = EmitPropertyList(
+      "\01l_OBJC_$_PROP_LIST_" + PD->getObjCRuntimeNameAsString(),
+      nullptr, PD, ObjCTypes, false);
   uint32_t Size =
     CGM.getDataLayout().getTypeAllocSize(ObjCTypes.ProtocolnfABITy);
   Values[8] = llvm::ConstantInt::get(ObjCTypes.IntTy, Size);
@@ -6477,6 +6608,10 @@ llvm::Constant *CGObjCNonFragileABIMac::GetOrEmitProtocol(
                                        MethodTypesExt, ObjCTypes);
   // const char *demangledName;
   Values[11] = llvm::Constant::getNullValue(ObjCTypes.Int8PtrTy);
+
+  Values[12] = EmitPropertyList(
+      "\01l_OBJC_$_CLASS_PROP_LIST_" + PD->getObjCRuntimeNameAsString(),
+      nullptr, PD, ObjCTypes, true);
     
   llvm::Constant *Init = llvm::ConstantStruct::get(ObjCTypes.ProtocolnfABITy,
                                                    Values);
@@ -6492,7 +6627,6 @@ llvm::Constant *CGObjCNonFragileABIMac::GetOrEmitProtocol(
                                "\01l_OBJC_PROTOCOL_$_" + PD->getObjCRuntimeNameAsString());
     Entry->setAlignment(
       CGM.getDataLayout().getABITypeAlignment(ObjCTypes.ProtocolnfABITy));
-    Entry->setSection("__DATA,__datacoal_nt,coalesced");
 
     Protocols[PD->getIdentifier()] = Entry;
   }
@@ -6640,7 +6774,7 @@ static void appendSelectorForMessageRefTable(std::string &buffer,
   }
 }
 
-/// Emit a "v-table" message send.  We emit a weak hidden-visibility
+/// Emit a "vtable" message send.  We emit a weak hidden-visibility
 /// struct, initially containing the selector pointer and a pointer to
 /// a "fixup" variant of the appropriate objc_msgSend.  To call, we
 /// load and call the function pointer, passing the address of the
@@ -6734,7 +6868,7 @@ CGObjCNonFragileABIMac::EmitVTableMessageSend(CodeGenFunction &CGF,
   
   bool requiresnullCheck = false;
   if (CGM.getLangOpts().ObjCAutoRefCount && method)
-    for (const auto *ParamDecl : method->params()) {
+    for (const auto *ParamDecl : method->parameters()) {
       if (ParamDecl->hasAttr<NSConsumedAttr>()) {
         if (!nullReturn.NullBB)
           nullReturn.init(CGF, arg0);
@@ -6783,7 +6917,7 @@ CGObjCNonFragileABIMac::GenerateMessageSend(CodeGen::CodeGenFunction &CGF,
 }
 
 llvm::GlobalVariable *
-CGObjCNonFragileABIMac::GetClassGlobal(const std::string &Name, bool Weak) {
+CGObjCNonFragileABIMac::GetClassGlobal(StringRef Name, bool Weak) {
   llvm::GlobalValue::LinkageTypes L =
       Weak ? llvm::GlobalValue::ExternalWeakLinkage
            : llvm::GlobalValue::ExternalLinkage;
@@ -6806,9 +6940,8 @@ llvm::Value *CGObjCNonFragileABIMac::EmitClassRefFromId(CodeGenFunction &CGF,
   llvm::GlobalVariable *&Entry = ClassReferences[II];
   
   if (!Entry) {
-    std::string ClassName(
-      getClassSymbolPrefix() +
-      (ID ? ID->getObjCRuntimeNameAsString() : II->getName()).str());
+    StringRef Name = ID ? ID->getObjCRuntimeNameAsString() : II->getName();
+    std::string ClassName = (getClassSymbolPrefix() + Name).str();
     llvm::GlobalVariable *ClassGV = GetClassGlobal(ClassName, Weak);
     Entry = new llvm::GlobalVariable(CGM.getModule(), ObjCTypes.ClassnfABIPtrTy,
                                      false, llvm::GlobalValue::PrivateLinkage,
@@ -6822,6 +6955,11 @@ llvm::Value *CGObjCNonFragileABIMac::EmitClassRefFromId(CodeGenFunction &CGF,
 
 llvm::Value *CGObjCNonFragileABIMac::EmitClassRef(CodeGenFunction &CGF,
                                                   const ObjCInterfaceDecl *ID) {
+  // If the class has the objc_runtime_visible attribute, we need to
+  // use the Objective-C runtime to get the class.
+  if (ID->hasAttr<ObjCRuntimeVisibleAttr>())
+    return EmitClassRefViaRuntime(CGF, ID, ObjCTypes);
+
   return EmitClassRefFromId(CGF, ID->getIdentifier(), ID->isWeakImported(), ID);
 }
 
@@ -7100,27 +7238,28 @@ CGObjCNonFragileABIMac::EmitSynchronizedStmt(CodeGen::CodeGenFunction &CGF,
 llvm::Constant *
 CGObjCNonFragileABIMac::GetEHType(QualType T) {
   // There's a particular fixed type info for 'id'.
-  if (T->isObjCIdType() ||
-      T->isObjCQualifiedIdType()) {
-    llvm::Constant *IDEHType =
-      CGM.getModule().getGlobalVariable("OBJC_EHTYPE_id");
-    if (!IDEHType)
+  if (T->isObjCIdType() || T->isObjCQualifiedIdType()) {
+    auto *IDEHType = CGM.getModule().getGlobalVariable("OBJC_EHTYPE_id");
+    if (!IDEHType) {
       IDEHType =
-        new llvm::GlobalVariable(CGM.getModule(), ObjCTypes.EHTypeTy,
-                                 false,
-                                 llvm::GlobalValue::ExternalLinkage,
-                                 nullptr, "OBJC_EHTYPE_id");
+          new llvm::GlobalVariable(CGM.getModule(), ObjCTypes.EHTypeTy, false,
+                                   llvm::GlobalValue::ExternalLinkage, nullptr,
+                                   "OBJC_EHTYPE_id");
+      if (CGM.getTriple().isOSBinFormatCOFF())
+        IDEHType->setDLLStorageClass(getStorage(CGM, "OBJC_EHTYPE_id"));
+    }
     return IDEHType;
   }
 
   // All other types should be Objective-C interface pointer types.
-  const ObjCObjectPointerType *PT =
-    T->getAs<ObjCObjectPointerType>();
+  const ObjCObjectPointerType *PT = T->getAs<ObjCObjectPointerType>();
   assert(PT && "Invalid @catch type.");
+
   const ObjCInterfaceType *IT = PT->getInterfaceType();
   assert(IT && "Invalid @catch type.");
+
   return GetInterfaceEHType(IT->getDecl(), false);
-}                                                  
+}
 
 void CGObjCNonFragileABIMac::EmitTryStmt(CodeGen::CodeGenFunction &CGF,
                                          const ObjCAtTryStmt &S) {
@@ -7153,6 +7292,7 @@ llvm::Constant *
 CGObjCNonFragileABIMac::GetInterfaceEHType(const ObjCInterfaceDecl *ID,
                                            bool ForDefinition) {
   llvm::GlobalVariable * &Entry = EHTypeReferences[ID->getIdentifier()];
+  StringRef ClassName = ID->getObjCRuntimeNameAsString();
 
   // If we don't need a definition, return the entry if found or check
   // if we use an external reference.
@@ -7162,38 +7302,43 @@ CGObjCNonFragileABIMac::GetInterfaceEHType(const ObjCInterfaceDecl *ID,
 
     // If this type (or a super class) has the __objc_exception__
     // attribute, emit an external reference.
-    if (hasObjCExceptionAttribute(CGM.getContext(), ID))
-      return Entry =
-          new llvm::GlobalVariable(CGM.getModule(), ObjCTypes.EHTypeTy, false,
-                                   llvm::GlobalValue::ExternalLinkage,
-                                   nullptr,
-                                   ("OBJC_EHTYPE_$_" +
-                                    ID->getObjCRuntimeNameAsString()));
+    if (hasObjCExceptionAttribute(CGM.getContext(), ID)) {
+      std::string EHTypeName = ("OBJC_EHTYPE_$_" + ClassName).str();
+      Entry = new llvm::GlobalVariable(CGM.getModule(), ObjCTypes.EHTypeTy,
+                                       false, llvm::GlobalValue::ExternalLinkage,
+                                       nullptr, EHTypeName);
+      if (CGM.getTriple().isOSBinFormatCOFF()) {
+        if (ID->hasAttr<DLLExportAttr>())
+          Entry->setDLLStorageClass(llvm::GlobalValue::DLLExportStorageClass);
+        else if (ID->hasAttr<DLLImportAttr>())
+          Entry->setDLLStorageClass(llvm::GlobalValue::DLLImportStorageClass);
+      }
+      return Entry;
+    }
   }
 
-  // Otherwise we need to either make a new entry or fill in the
-  // initializer.
+  // Otherwise we need to either make a new entry or fill in the initializer.
   assert((!Entry || !Entry->hasInitializer()) && "Duplicate EHType definition");
-  llvm::SmallString<64> ClassName(getClassSymbolPrefix());
-  ClassName += ID->getObjCRuntimeNameAsString();
+
   std::string VTableName = "objc_ehtype_vtable";
-  llvm::GlobalVariable *VTableGV =
-    CGM.getModule().getGlobalVariable(VTableName);
-  if (!VTableGV)
-    VTableGV = new llvm::GlobalVariable(CGM.getModule(), ObjCTypes.Int8PtrTy,
-                                        false,
-                                        llvm::GlobalValue::ExternalLinkage,
-                                        nullptr, VTableName);
+  auto *VTableGV = CGM.getModule().getGlobalVariable(VTableName);
+  if (!VTableGV) {
+    VTableGV =
+        new llvm::GlobalVariable(CGM.getModule(), ObjCTypes.Int8PtrTy, false,
+                                 llvm::GlobalValue::ExternalLinkage, nullptr,
+                                 VTableName);
+    if (CGM.getTriple().isOSBinFormatCOFF())
+      VTableGV->setDLLStorageClass(getStorage(CGM, VTableName));
+  }
 
   llvm::Value *VTableIdx = llvm::ConstantInt::get(CGM.Int32Ty, 2);
-
   llvm::Constant *Values[] = {
       llvm::ConstantExpr::getGetElementPtr(VTableGV->getValueType(), VTableGV,
                                            VTableIdx),
       GetClassName(ID->getObjCRuntimeNameAsString()),
-      GetClassGlobal(ClassName.str())};
-  llvm::Constant *Init =
-    llvm::ConstantStruct::get(ObjCTypes.EHTypeTy, Values);
+      GetClassGlobal((getClassSymbolPrefix() + ClassName).str()),
+  };
+  llvm::Constant *Init = llvm::ConstantStruct::get(ObjCTypes.EHTypeTy, Values);
 
   llvm::GlobalValue::LinkageTypes L = ForDefinition
                                           ? llvm::GlobalValue::ExternalLinkage
@@ -7201,24 +7346,25 @@ CGObjCNonFragileABIMac::GetInterfaceEHType(const ObjCInterfaceDecl *ID,
   if (Entry) {
     Entry->setInitializer(Init);
   } else {
-    llvm::SmallString<64> EHTYPEName("OBJC_EHTYPE_$_");
-    EHTYPEName += ID->getObjCRuntimeNameAsString();
-    Entry = new llvm::GlobalVariable(CGM.getModule(), ObjCTypes.EHTypeTy, false,
-                                     L,
-                                     Init,
-                                     EHTYPEName.str());
+    Entry =
+        new llvm::GlobalVariable(CGM.getModule(), ObjCTypes.EHTypeTy, false, L,
+                                 Init, ("OBJC_EHTYPE_$_" + ClassName).str());
+    if (CGM.getTriple().isOSBinFormatCOFF())
+      if (hasObjCExceptionAttribute(CGM.getContext(), ID))
+        if (ID->hasAttr<DLLExportAttr>())
+          Entry->setDLLStorageClass(llvm::GlobalValue::DLLExportStorageClass);
   }
   assert(Entry->getLinkage() == L);
 
-  if (ID->getVisibility() == HiddenVisibility)
-    Entry->setVisibility(llvm::GlobalValue::HiddenVisibility);
-  Entry->setAlignment(CGM.getDataLayout().getABITypeAlignment(
-      ObjCTypes.EHTypeTy));
+  if (!CGM.getTriple().isOSBinFormatCOFF())
+    if (ID->getVisibility() == HiddenVisibility)
+      Entry->setVisibility(llvm::GlobalValue::HiddenVisibility);
+
+  const auto &DL = CGM.getDataLayout();
+  Entry->setAlignment(DL.getABITypeAlignment(ObjCTypes.EHTypeTy));
 
   if (ForDefinition)
     Entry->setSection("__DATA,__objc_const");
-  else
-    Entry->setSection("__DATA,__datacoal_nt,coalesced");
 
   return Entry;
 }
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGObjCRuntime.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGObjCRuntime.cpp
index 7be9ae996040..0caf6d9f210a 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGObjCRuntime.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGObjCRuntime.cpp
@@ -120,9 +120,8 @@ LValue CGObjCRuntime::EmitValueForIvarAtOffset(CodeGen::CodeGenFunction &CGF,
   uint64_t BitOffset = FieldBitOffset % CGF.CGM.getContext().getCharWidth();
   uint64_t AlignmentBits = CGF.CGM.getTarget().getCharAlign();
   uint64_t BitFieldSize = Ivar->getBitWidthValue(CGF.getContext());
-  CharUnits StorageSize =
-    CGF.CGM.getContext().toCharUnitsFromBits(
-      llvm::RoundUpToAlignment(BitOffset + BitFieldSize, AlignmentBits));
+  CharUnits StorageSize = CGF.CGM.getContext().toCharUnitsFromBits(
+      llvm::alignTo(BitOffset + BitFieldSize, AlignmentBits));
   CharUnits Alignment = CGF.CGM.getContext().toCharUnitsFromBits(AlignmentBits);
 
   // Allocate a new CGBitFieldInfo object to describe this access.
@@ -364,25 +363,15 @@ CGObjCRuntime::getMessageSendInfo(const ObjCMethodDecl *method,
     llvm::PointerType *signatureType =
       CGM.getTypes().GetFunctionType(signature)->getPointerTo();
 
-    // If that's not variadic, there's no need to recompute the ABI
-    // arrangement.
-    if (!signature.isVariadic())
-      return MessageSendInfo(signature, signatureType);
+    const CGFunctionInfo &signatureForCall =
+      CGM.getTypes().arrangeCall(signature, callArgs);
 
-    // Otherwise, there is.
-    FunctionType::ExtInfo einfo = signature.getExtInfo();
-    const CGFunctionInfo &argsInfo =
-      CGM.getTypes().arrangeFreeFunctionCall(resultType, callArgs, einfo,
-                                             signature.getRequiredArgs());
-
-    return MessageSendInfo(argsInfo, signatureType);
+    return MessageSendInfo(signatureForCall, signatureType);
   }
 
   // There's no method;  just use a default CC.
   const CGFunctionInfo &argsInfo =
-    CGM.getTypes().arrangeFreeFunctionCall(resultType, callArgs, 
-                                           FunctionType::ExtInfo(),
-                                           RequiredArgs::All);
+    CGM.getTypes().arrangeUnprototypedObjCMessageSend(resultType, callArgs);
 
   // Derive the signature to call from that.
   llvm::PointerType *signatureType =
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGObjCRuntime.h b/contrib/llvm/tools/clang/lib/CodeGen/CGObjCRuntime.h
index 28d88dd10be9..6c330590f7cd 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGObjCRuntime.h
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGObjCRuntime.h
@@ -280,7 +280,7 @@ public:
   virtual llvm::Constant *BuildByrefLayout(CodeGen::CodeGenModule &CGM,
                                            QualType T) = 0;
 
-  virtual llvm::GlobalVariable *GetClassGlobal(const std::string &Name,
+  virtual llvm::GlobalVariable *GetClassGlobal(StringRef Name,
                                                bool Weak = false) = 0;
 
   struct MessageSendInfo {
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGOpenCLRuntime.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGOpenCLRuntime.cpp
index 686678962d3e..38aebea18ed3 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGOpenCLRuntime.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGOpenCLRuntime.cpp
@@ -40,46 +40,12 @@ llvm::Type *CGOpenCLRuntime::convertOpenCLSpecificType(const Type *T) {
   default: 
     llvm_unreachable("Unexpected opencl builtin type!");
     return nullptr;
-  case BuiltinType::OCLImage1d:
-    return llvm::PointerType::get(llvm::StructType::create(
-                           Ctx, "opencl.image1d_t"), ImgAddrSpc);
-  case BuiltinType::OCLImage1dArray:
-    return llvm::PointerType::get(llvm::StructType::create(
-                           Ctx, "opencl.image1d_array_t"), ImgAddrSpc);
-  case BuiltinType::OCLImage1dBuffer:
-    return llvm::PointerType::get(llvm::StructType::create(
-                           Ctx, "opencl.image1d_buffer_t"), ImgAddrSpc);
-  case BuiltinType::OCLImage2d:
-    return llvm::PointerType::get(llvm::StructType::create(
-                           Ctx, "opencl.image2d_t"), ImgAddrSpc);
-  case BuiltinType::OCLImage2dArray:
-    return llvm::PointerType::get(llvm::StructType::create(
-                           Ctx, "opencl.image2d_array_t"), ImgAddrSpc);
-  case BuiltinType::OCLImage2dDepth:
-    return llvm::PointerType::get(
-        llvm::StructType::create(Ctx, "opencl.image2d_depth_t"), ImgAddrSpc);
-  case BuiltinType::OCLImage2dArrayDepth:
-    return llvm::PointerType::get(
-        llvm::StructType::create(Ctx, "opencl.image2d_array_depth_t"),
+#define IMAGE_TYPE(ImgType, Id, SingletonId, Access, Suffix) \
+  case BuiltinType::Id: \
+    return llvm::PointerType::get( \
+        llvm::StructType::create(Ctx, "opencl." #ImgType "_" #Suffix "_t"), \
         ImgAddrSpc);
-  case BuiltinType::OCLImage2dMSAA:
-    return llvm::PointerType::get(
-        llvm::StructType::create(Ctx, "opencl.image2d_msaa_t"), ImgAddrSpc);
-  case BuiltinType::OCLImage2dArrayMSAA:
-    return llvm::PointerType::get(
-        llvm::StructType::create(Ctx, "opencl.image2d_array_msaa_t"),
-        ImgAddrSpc);
-  case BuiltinType::OCLImage2dMSAADepth:
-    return llvm::PointerType::get(
-        llvm::StructType::create(Ctx, "opencl.image2d_msaa_depth_t"),
-        ImgAddrSpc);
-  case BuiltinType::OCLImage2dArrayMSAADepth:
-    return llvm::PointerType::get(
-        llvm::StructType::create(Ctx, "opencl.image2d_array_msaa_depth_t"),
-        ImgAddrSpc);
-  case BuiltinType::OCLImage3d:
-    return llvm::PointerType::get(llvm::StructType::create(
-                           Ctx, "opencl.image3d_t"), ImgAddrSpc);
+#include "clang/Basic/OpenCLImageTypes.def"
   case BuiltinType::OCLSampler:
     return llvm::IntegerType::get(Ctx, 32);
   case BuiltinType::OCLEvent:
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index 5cfacacbe01a..6a0edbe0e7a9 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -72,6 +72,8 @@ public:
   /// \return LValue for thread id variable. This LValue always has type int32*.
   virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
 
+  virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
+
   CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
 
   OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
@@ -82,6 +84,8 @@ public:
     return Info->getKind() == CR_OpenMP;
   }
 
+  ~CGOpenMPRegionInfo() override = default;
+
 protected:
   CGOpenMPRegionKind RegionKind;
   RegionCodeGenTy CodeGen;
@@ -90,7 +94,7 @@ protected:
 };
 
 /// \brief API for captured statement code generation in OpenMP constructs.
-class CGOpenMPOutlinedRegionInfo : public CGOpenMPRegionInfo {
+class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
 public:
   CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
                              const RegionCodeGenTy &CodeGen,
@@ -100,6 +104,7 @@ public:
         ThreadIDVar(ThreadIDVar) {
     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
   }
+
   /// \brief Get a variable or parameter for storing global thread id
   /// inside OpenMP construct.
   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
@@ -120,16 +125,65 @@ private:
 };
 
 /// \brief API for captured statement code generation in OpenMP constructs.
-class CGOpenMPTaskOutlinedRegionInfo : public CGOpenMPRegionInfo {
+class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
 public:
+  class UntiedTaskActionTy final : public PrePostActionTy {
+    bool Untied;
+    const VarDecl *PartIDVar;
+    const RegionCodeGenTy UntiedCodeGen;
+    llvm::SwitchInst *UntiedSwitch = nullptr;
+
+  public:
+    UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
+                       const RegionCodeGenTy &UntiedCodeGen)
+        : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
+    void Enter(CodeGenFunction &CGF) override {
+      if (Untied) {
+        // Emit task switching point.
+        auto PartIdLVal = CGF.EmitLoadOfPointerLValue(
+            CGF.GetAddrOfLocalVar(PartIDVar),
+            PartIDVar->getType()->castAs<PointerType>());
+        auto *Res = CGF.EmitLoadOfScalar(PartIdLVal, SourceLocation());
+        auto *DoneBB = CGF.createBasicBlock(".untied.done.");
+        UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
+        CGF.EmitBlock(DoneBB);
+        CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
+        CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
+        UntiedSwitch->addCase(CGF.Builder.getInt32(0),
+                              CGF.Builder.GetInsertBlock());
+        emitUntiedSwitch(CGF);
+      }
+    }
+    void emitUntiedSwitch(CodeGenFunction &CGF) const {
+      if (Untied) {
+        auto PartIdLVal = CGF.EmitLoadOfPointerLValue(
+            CGF.GetAddrOfLocalVar(PartIDVar),
+            PartIDVar->getType()->castAs<PointerType>());
+        CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
+                              PartIdLVal);
+        UntiedCodeGen(CGF);
+        CodeGenFunction::JumpDest CurPoint =
+            CGF.getJumpDestInCurrentScope(".untied.next.");
+        CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
+        CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
+        UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
+                              CGF.Builder.GetInsertBlock());
+        CGF.EmitBranchThroughCleanup(CurPoint);
+        CGF.EmitBlock(CurPoint.getBlock());
+      }
+    }
+    unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
+  };
   CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
                                  const VarDecl *ThreadIDVar,
                                  const RegionCodeGenTy &CodeGen,
-                                 OpenMPDirectiveKind Kind, bool HasCancel)
+                                 OpenMPDirectiveKind Kind, bool HasCancel,
+                                 const UntiedTaskActionTy &Action)
       : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
-        ThreadIDVar(ThreadIDVar) {
+        ThreadIDVar(ThreadIDVar), Action(Action) {
     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
   }
+
   /// \brief Get a variable or parameter for storing global thread id
   /// inside OpenMP construct.
   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
@@ -140,6 +194,10 @@ public:
   /// \brief Get the name of the capture helper.
   StringRef getHelperName() const override { return ".omp_outlined."; }
 
+  void emitUntiedSwitch(CodeGenFunction &CGF) override {
+    Action.emitUntiedSwitch(CGF);
+  }
+
   static bool classof(const CGCapturedStmtInfo *Info) {
     return CGOpenMPRegionInfo::classof(Info) &&
            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
@@ -150,6 +208,8 @@ private:
   /// \brief A variable or parameter storing global thread id for OpenMP
   /// constructs.
   const VarDecl *ThreadIDVar;
+  /// Action for emitting code for untied tasks.
+  const UntiedTaskActionTy &Action;
 };
 
 /// \brief API for inlined captured statement code generation in OpenMP
@@ -162,12 +222,14 @@ public:
       : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
         OldCSI(OldCSI),
         OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
+
   // \brief Retrieve the value of the context parameter.
   llvm::Value *getContextValue() const override {
     if (OuterRegionInfo)
       return OuterRegionInfo->getContextValue();
     llvm_unreachable("No context value for inlined OpenMP region");
   }
+
   void setContextValue(llvm::Value *V) override {
     if (OuterRegionInfo) {
       OuterRegionInfo->setContextValue(V);
@@ -175,6 +237,7 @@ public:
     }
     llvm_unreachable("No context value for inlined OpenMP region");
   }
+
   /// \brief Lookup the captured field decl for a variable.
   const FieldDecl *lookup(const VarDecl *VD) const override {
     if (OuterRegionInfo)
@@ -183,11 +246,13 @@ public:
     // captured variables, we can use the original one.
     return nullptr;
   }
+
   FieldDecl *getThisFieldDecl() const override {
     if (OuterRegionInfo)
       return OuterRegionInfo->getThisFieldDecl();
     return nullptr;
   }
+
   /// \brief Get a variable or parameter for storing global thread id
   /// inside OpenMP construct.
   const VarDecl *getThreadIDVariable() const override {
@@ -203,6 +268,11 @@ public:
     llvm_unreachable("No helper name for inlined OpenMP construct");
   }
 
+  void emitUntiedSwitch(CodeGenFunction &CGF) override {
+    if (OuterRegionInfo)
+      OuterRegionInfo->emitUntiedSwitch(CGF);
+  }
+
   CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
 
   static bool classof(const CGCapturedStmtInfo *Info) {
@@ -210,6 +280,8 @@ public:
            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
   }
 
+  ~CGOpenMPInlinedRegionInfo() override = default;
+
 private:
   /// \brief CodeGen info about outer OpenMP region.
   CodeGenFunction::CGCapturedStmtInfo *OldCSI;
@@ -221,7 +293,7 @@ private:
 /// captured fields. The name of the target region has to be unique in a given
 /// application so it is provided by the client, because only the client has
 /// the information to generate that.
-class CGOpenMPTargetRegionInfo : public CGOpenMPRegionInfo {
+class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
 public:
   CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
                            const RegionCodeGenTy &CodeGen, StringRef HelperName)
@@ -245,9 +317,75 @@ private:
   StringRef HelperName;
 };
 
+static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
+  llvm_unreachable("No codegen for expressions");
+}
+/// \brief API for generation of expressions captured in a innermost OpenMP
+/// region.
+class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
+public:
+  CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
+      : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
+                                  OMPD_unknown,
+                                  /*HasCancel=*/false),
+        PrivScope(CGF) {
+    // Make sure the globals captured in the provided statement are local by
+    // using the privatization logic. We assume the same variable is not
+    // captured more than once.
+    for (auto &C : CS.captures()) {
+      if (!C.capturesVariable() && !C.capturesVariableByCopy())
+        continue;
+
+      const VarDecl *VD = C.getCapturedVar();
+      if (VD->isLocalVarDeclOrParm())
+        continue;
+
+      DeclRefExpr DRE(const_cast<VarDecl *>(VD),
+                      /*RefersToEnclosingVariableOrCapture=*/false,
+                      VD->getType().getNonReferenceType(), VK_LValue,
+                      SourceLocation());
+      PrivScope.addPrivate(VD, [&CGF, &DRE]() -> Address {
+        return CGF.EmitLValue(&DRE).getAddress();
+      });
+    }
+    (void)PrivScope.Privatize();
+  }
+
+  /// \brief Lookup the captured field decl for a variable.
+  const FieldDecl *lookup(const VarDecl *VD) const override {
+    if (auto *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
+      return FD;
+    return nullptr;
+  }
+
+  /// \brief Emit the captured statement body.
+  void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
+    llvm_unreachable("No body for expressions");
+  }
+
+  /// \brief Get a variable or parameter for storing global thread id
+  /// inside OpenMP construct.
+  const VarDecl *getThreadIDVariable() const override {
+    llvm_unreachable("No thread id for expressions");
+  }
+
+  /// \brief Get the name of the capture helper.
+  StringRef getHelperName() const override {
+    llvm_unreachable("No helper name for expressions");
+  }
+
+  static bool classof(const CGCapturedStmtInfo *Info) { return false; }
+
+private:
+  /// Private scope to capture global variables.
+  CodeGenFunction::OMPPrivateScope PrivScope;
+};
+
 /// \brief RAII for emitting code of OpenMP constructs.
 class InlinedOpenMPRegionRAII {
   CodeGenFunction &CGF;
+  llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields;
+  FieldDecl *LambdaThisCaptureField = nullptr;
 
 public:
   /// \brief Constructs region for combined constructs.
@@ -260,30 +398,306 @@ public:
     // Start emission for the construct.
     CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
         CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
+    std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
+    LambdaThisCaptureField = CGF.LambdaThisCaptureField;
+    CGF.LambdaThisCaptureField = nullptr;
   }
+
   ~InlinedOpenMPRegionRAII() {
     // Restore original CapturedStmtInfo only if we're done with code emission.
     auto *OldCSI =
         cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
     delete CGF.CapturedStmtInfo;
     CGF.CapturedStmtInfo = OldCSI;
+    std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
+    CGF.LambdaThisCaptureField = LambdaThisCaptureField;
+  }
+};
+
+/// \brief Values for bit flags used in the ident_t to describe the fields.
+/// All enumeric elements are named and described in accordance with the code
+/// from http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h
+enum OpenMPLocationFlags {
+  /// \brief Use trampoline for internal microtask.
+  OMP_IDENT_IMD = 0x01,
+  /// \brief Use c-style ident structure.
+  OMP_IDENT_KMPC = 0x02,
+  /// \brief Atomic reduction option for kmpc_reduce.
+  OMP_ATOMIC_REDUCE = 0x10,
+  /// \brief Explicit 'barrier' directive.
+  OMP_IDENT_BARRIER_EXPL = 0x20,
+  /// \brief Implicit barrier in code.
+  OMP_IDENT_BARRIER_IMPL = 0x40,
+  /// \brief Implicit barrier in 'for' directive.
+  OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
+  /// \brief Implicit barrier in 'sections' directive.
+  OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
+  /// \brief Implicit barrier in 'single' directive.
+  OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140
+};
+
+/// \brief Describes ident structure that describes a source location.
+/// All descriptions are taken from
+/// http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h
+/// Original structure:
+/// typedef struct ident {
+///    kmp_int32 reserved_1;   /**<  might be used in Fortran;
+///                                  see above  */
+///    kmp_int32 flags;        /**<  also f.flags; KMP_IDENT_xxx flags;
+///                                  KMP_IDENT_KMPC identifies this union
+///                                  member  */
+///    kmp_int32 reserved_2;   /**<  not really used in Fortran any more;
+///                                  see above */
+///#if USE_ITT_BUILD
+///                            /*  but currently used for storing
+///                                region-specific ITT */
+///                            /*  contextual information. */
+///#endif /* USE_ITT_BUILD */
+///    kmp_int32 reserved_3;   /**< source[4] in Fortran, do not use for
+///                                 C++  */
+///    char const *psource;    /**< String describing the source location.
+///                            The string is composed of semi-colon separated
+//                             fields which describe the source file,
+///                            the function and a pair of line numbers that
+///                            delimit the construct.
+///                             */
+/// } ident_t;
+enum IdentFieldIndex {
+  /// \brief might be used in Fortran
+  IdentField_Reserved_1,
+  /// \brief OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
+  IdentField_Flags,
+  /// \brief Not really used in Fortran any more
+  IdentField_Reserved_2,
+  /// \brief Source[4] in Fortran, do not use for C++
+  IdentField_Reserved_3,
+  /// \brief String describing the source location. The string is composed of
+  /// semi-colon separated fields which describe the source file, the function
+  /// and a pair of line numbers that delimit the construct.
+  IdentField_PSource
+};
+
+/// \brief Schedule types for 'omp for' loops (these enumerators are taken from
+/// the enum sched_type in kmp.h).
+enum OpenMPSchedType {
+  /// \brief Lower bound for default (unordered) versions.
+  OMP_sch_lower = 32,
+  OMP_sch_static_chunked = 33,
+  OMP_sch_static = 34,
+  OMP_sch_dynamic_chunked = 35,
+  OMP_sch_guided_chunked = 36,
+  OMP_sch_runtime = 37,
+  OMP_sch_auto = 38,
+  /// static with chunk adjustment (e.g., simd)
+  OMP_sch_static_balanced_chunked   = 45,
+  /// \brief Lower bound for 'ordered' versions.
+  OMP_ord_lower = 64,
+  OMP_ord_static_chunked = 65,
+  OMP_ord_static = 66,
+  OMP_ord_dynamic_chunked = 67,
+  OMP_ord_guided_chunked = 68,
+  OMP_ord_runtime = 69,
+  OMP_ord_auto = 70,
+  OMP_sch_default = OMP_sch_static,
+  /// \brief dist_schedule types
+  OMP_dist_sch_static_chunked = 91,
+  OMP_dist_sch_static = 92,
+  /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
+  /// Set if the monotonic schedule modifier was present.
+  OMP_sch_modifier_monotonic = (1 << 29),
+  /// Set if the nonmonotonic schedule modifier was present.
+  OMP_sch_modifier_nonmonotonic = (1 << 30),
+};
+
+enum OpenMPRTLFunction {
+  /// \brief Call to void __kmpc_fork_call(ident_t *loc, kmp_int32 argc,
+  /// kmpc_micro microtask, ...);
+  OMPRTL__kmpc_fork_call,
+  /// \brief Call to void *__kmpc_threadprivate_cached(ident_t *loc,
+  /// kmp_int32 global_tid, void *data, size_t size, void ***cache);
+  OMPRTL__kmpc_threadprivate_cached,
+  /// \brief Call to void __kmpc_threadprivate_register( ident_t *,
+  /// void *data, kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);
+  OMPRTL__kmpc_threadprivate_register,
+  // Call to __kmpc_int32 kmpc_global_thread_num(ident_t *loc);
+  OMPRTL__kmpc_global_thread_num,
+  // Call to void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
+  // kmp_critical_name *crit);
+  OMPRTL__kmpc_critical,
+  // Call to void __kmpc_critical_with_hint(ident_t *loc, kmp_int32
+  // global_tid, kmp_critical_name *crit, uintptr_t hint);
+  OMPRTL__kmpc_critical_with_hint,
+  // Call to void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
+  // kmp_critical_name *crit);
+  OMPRTL__kmpc_end_critical,
+  // Call to kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32
+  // global_tid);
+  OMPRTL__kmpc_cancel_barrier,
+  // Call to void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
+  OMPRTL__kmpc_barrier,
+  // Call to void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
+  OMPRTL__kmpc_for_static_fini,
+  // Call to void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
+  // global_tid);
+  OMPRTL__kmpc_serialized_parallel,
+  // Call to void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
+  // global_tid);
+  OMPRTL__kmpc_end_serialized_parallel,
+  // Call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
+  // kmp_int32 num_threads);
+  OMPRTL__kmpc_push_num_threads,
+  // Call to void __kmpc_flush(ident_t *loc);
+  OMPRTL__kmpc_flush,
+  // Call to kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid);
+  OMPRTL__kmpc_master,
+  // Call to void __kmpc_end_master(ident_t *, kmp_int32 global_tid);
+  OMPRTL__kmpc_end_master,
+  // Call to kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid,
+  // int end_part);
+  OMPRTL__kmpc_omp_taskyield,
+  // Call to kmp_int32 __kmpc_single(ident_t *, kmp_int32 global_tid);
+  OMPRTL__kmpc_single,
+  // Call to void __kmpc_end_single(ident_t *, kmp_int32 global_tid);
+  OMPRTL__kmpc_end_single,
+  // Call to kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
+  // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
+  // kmp_routine_entry_t *task_entry);
+  OMPRTL__kmpc_omp_task_alloc,
+  // Call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *
+  // new_task);
+  OMPRTL__kmpc_omp_task,
+  // Call to void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
+  // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *),
+  // kmp_int32 didit);
+  OMPRTL__kmpc_copyprivate,
+  // Call to kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid,
+  // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void
+  // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck);
+  OMPRTL__kmpc_reduce,
+  // Call to kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32
+  // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
+  // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name
+  // *lck);
+  OMPRTL__kmpc_reduce_nowait,
+  // Call to void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
+  // kmp_critical_name *lck);
+  OMPRTL__kmpc_end_reduce,
+  // Call to void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
+  // kmp_critical_name *lck);
+  OMPRTL__kmpc_end_reduce_nowait,
+  // Call to void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
+  // kmp_task_t * new_task);
+  OMPRTL__kmpc_omp_task_begin_if0,
+  // Call to void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
+  // kmp_task_t * new_task);
+  OMPRTL__kmpc_omp_task_complete_if0,
+  // Call to void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid);
+  OMPRTL__kmpc_ordered,
+  // Call to void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid);
+  OMPRTL__kmpc_end_ordered,
+  // Call to kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
+  // global_tid);
+  OMPRTL__kmpc_omp_taskwait,
+  // Call to void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid);
+  OMPRTL__kmpc_taskgroup,
+  // Call to void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid);
+  OMPRTL__kmpc_end_taskgroup,
+  // Call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
+  // int proc_bind);
+  OMPRTL__kmpc_push_proc_bind,
+  // Call to kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32
+  // gtid, kmp_task_t * new_task, kmp_int32 ndeps, kmp_depend_info_t
+  // *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
+  OMPRTL__kmpc_omp_task_with_deps,
+  // Call to void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32
+  // gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
+  // ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
+  OMPRTL__kmpc_omp_wait_deps,
+  // Call to kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
+  // global_tid, kmp_int32 cncl_kind);
+  OMPRTL__kmpc_cancellationpoint,
+  // Call to kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
+  // kmp_int32 cncl_kind);
+  OMPRTL__kmpc_cancel,
+  // Call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid,
+  // kmp_int32 num_teams, kmp_int32 thread_limit);
+  OMPRTL__kmpc_push_num_teams,
+  // Call to void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro
+  // microtask, ...);
+  OMPRTL__kmpc_fork_teams,
+  // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
+  // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
+  // sched, kmp_uint64 grainsize, void *task_dup);
+  OMPRTL__kmpc_taskloop,
+  // Call to void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32
+  // num_dims, struct kmp_dim *dims);
+  OMPRTL__kmpc_doacross_init,
+  // Call to void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid);
+  OMPRTL__kmpc_doacross_fini,
+  // Call to void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64
+  // *vec);
+  OMPRTL__kmpc_doacross_post,
+  // Call to void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64
+  // *vec);
+  OMPRTL__kmpc_doacross_wait,
+
+  //
+  // Offloading related calls
+  //
+  // Call to int32_t __tgt_target(int32_t device_id, void *host_ptr, int32_t
+  // arg_num, void** args_base, void **args, size_t *arg_sizes, int32_t
+  // *arg_types);
+  OMPRTL__tgt_target,
+  // Call to int32_t __tgt_target_teams(int32_t device_id, void *host_ptr,
+  // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes,
+  // int32_t *arg_types, int32_t num_teams, int32_t thread_limit);
+  OMPRTL__tgt_target_teams,
+  // Call to void __tgt_register_lib(__tgt_bin_desc *desc);
+  OMPRTL__tgt_register_lib,
+  // Call to void __tgt_unregister_lib(__tgt_bin_desc *desc);
+  OMPRTL__tgt_unregister_lib,
+  // Call to void __tgt_target_data_begin(int32_t device_id, int32_t arg_num,
+  // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types);
+  OMPRTL__tgt_target_data_begin,
+  // Call to void __tgt_target_data_end(int32_t device_id, int32_t arg_num,
+  // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types);
+  OMPRTL__tgt_target_data_end,
+  // Call to void __tgt_target_data_update(int32_t device_id, int32_t arg_num,
+  // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types);
+  OMPRTL__tgt_target_data_update,
+};
+
+/// A basic class for pre|post-action for advanced codegen sequence for OpenMP
+/// region.
+class CleanupTy final : public EHScopeStack::Cleanup {
+  PrePostActionTy *Action;
+
+public:
+  explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
+  void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
+    if (!CGF.HaveInsertPoint())
+      return;
+    Action->Exit(CGF);
   }
 };
 
 } // anonymous namespace
 
-static LValue emitLoadOfPointerLValue(CodeGenFunction &CGF, Address PtrAddr,
-                                      QualType Ty) {
-  AlignmentSource Source;
-  CharUnits Align = CGF.getNaturalPointeeTypeAlignment(Ty, &Source);
-  return CGF.MakeAddrLValue(Address(CGF.Builder.CreateLoad(PtrAddr), Align),
-                            Ty->getPointeeType(), Source);
+void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const {
+  CodeGenFunction::RunCleanupsScope Scope(CGF);
+  if (PrePostAction) {
+    CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
+    Callback(CodeGen, CGF, *PrePostAction);
+  } else {
+    PrePostActionTy Action;
+    Callback(CodeGen, CGF, Action);
+  }
 }
 
 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
-  return emitLoadOfPointerLValue(CGF,
-                                 CGF.GetAddrOfLocalVar(getThreadIDVariable()),
-                                 getThreadIDVariable()->getType());
+  return CGF.EmitLoadOfPointerLValue(
+      CGF.GetAddrOfLocalVar(getThreadIDVariable()),
+      getThreadIDVariable()->getType()->castAs<PointerType>());
 }
 
 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) {
@@ -295,10 +709,7 @@ void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) {
   // The point of exit cannot be a branch out of the structured block.
   // longjmp() and throw() must not violate the entry/exit criteria.
   CGF.EHStack.pushTerminate();
-  {
-    CodeGenFunction::RunCleanupsScope Scope(CGF);
-    CodeGen(CGF);
-  }
+  CodeGen(CGF);
   CGF.EHStack.popTerminate();
 }
 
@@ -310,16 +721,11 @@ LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
 }
 
 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM)
-    : CGM(CGM), DefaultOpenMPPSource(nullptr), KmpRoutineEntryPtrTy(nullptr),
-      OffloadEntriesInfoManager(CGM) {
+    : CGM(CGM), OffloadEntriesInfoManager(CGM) {
   IdentTy = llvm::StructType::create(
       "ident_t", CGM.Int32Ty /* reserved_1 */, CGM.Int32Ty /* flags */,
       CGM.Int32Ty /* reserved_2 */, CGM.Int32Ty /* reserved_3 */,
       CGM.Int8PtrTy /* psource */, nullptr);
-  // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
-  llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
-                               llvm::PointerType::getUnqual(CGM.Int32Ty)};
-  Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
   KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
 
   loadOffloadInfoMetadata();
@@ -329,6 +735,90 @@ void CGOpenMPRuntime::clear() {
   InternalVars.clear();
 }
 
+static llvm::Function *
+emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
+                          const Expr *CombinerInitializer, const VarDecl *In,
+                          const VarDecl *Out, bool IsCombiner) {
+  // void .omp_combiner.(Ty *in, Ty *out);
+  auto &C = CGM.getContext();
+  QualType PtrTy = C.getPointerType(Ty).withRestrict();
+  FunctionArgList Args;
+  ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
+                               /*Id=*/nullptr, PtrTy);
+  ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
+                              /*Id=*/nullptr, PtrTy);
+  Args.push_back(&OmpOutParm);
+  Args.push_back(&OmpInParm);
+  auto &FnInfo =
+      CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
+  auto *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
+  auto *Fn = llvm::Function::Create(
+      FnTy, llvm::GlobalValue::InternalLinkage,
+      IsCombiner ? ".omp_combiner." : ".omp_initializer.", &CGM.getModule());
+  CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, FnInfo);
+  Fn->addFnAttr(llvm::Attribute::AlwaysInline);
+  CodeGenFunction CGF(CGM);
+  // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
+  // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
+  CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args);
+  CodeGenFunction::OMPPrivateScope Scope(CGF);
+  Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
+  Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() -> Address {
+    return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
+        .getAddress();
+  });
+  Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
+  Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() -> Address {
+    return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
+        .getAddress();
+  });
+  (void)Scope.Privatize();
+  CGF.EmitIgnoredExpr(CombinerInitializer);
+  Scope.ForceCleanup();
+  CGF.FinishFunction();
+  return Fn;
+}
+
+void CGOpenMPRuntime::emitUserDefinedReduction(
+    CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
+  if (UDRMap.count(D) > 0)
+    return;
+  auto &C = CGM.getContext();
+  if (!In || !Out) {
+    In = &C.Idents.get("omp_in");
+    Out = &C.Idents.get("omp_out");
+  }
+  llvm::Function *Combiner = emitCombinerOrInitializer(
+      CGM, D->getType(), D->getCombiner(), cast<VarDecl>(D->lookup(In).front()),
+      cast<VarDecl>(D->lookup(Out).front()),
+      /*IsCombiner=*/true);
+  llvm::Function *Initializer = nullptr;
+  if (auto *Init = D->getInitializer()) {
+    if (!Priv || !Orig) {
+      Priv = &C.Idents.get("omp_priv");
+      Orig = &C.Idents.get("omp_orig");
+    }
+    Initializer = emitCombinerOrInitializer(
+        CGM, D->getType(), Init, cast<VarDecl>(D->lookup(Orig).front()),
+        cast<VarDecl>(D->lookup(Priv).front()),
+        /*IsCombiner=*/false);
+  }
+  UDRMap.insert(std::make_pair(D, std::make_pair(Combiner, Initializer)));
+  if (CGF) {
+    auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn);
+    Decls.second.push_back(D);
+  }
+}
+
+std::pair<llvm::Function *, llvm::Function *>
+CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) {
+  auto I = UDRMap.find(D);
+  if (I != UDRMap.end())
+    return I->second;
+  emitUserDefinedReduction(/*CGF=*/nullptr, D);
+  return UDRMap.lookup(D);
+}
+
 // Layout information for ident_t.
 static CharUnits getIdentAlign(CodeGenModule &CGM) {
   return CGM.getPointerAlign();
@@ -337,18 +827,18 @@ static CharUnits getIdentSize(CodeGenModule &CGM) {
   assert((4 * CGM.getPointerSize()).isMultipleOf(CGM.getPointerAlign()));
   return CharUnits::fromQuantity(16) + CGM.getPointerSize();
 }
-static CharUnits getOffsetOfIdentField(CGOpenMPRuntime::IdentFieldIndex Field) {
+static CharUnits getOffsetOfIdentField(IdentFieldIndex Field) {
   // All the fields except the last are i32, so this works beautifully.
   return unsigned(Field) * CharUnits::fromQuantity(4);
 }
 static Address createIdentFieldGEP(CodeGenFunction &CGF, Address Addr,
-                                   CGOpenMPRuntime::IdentFieldIndex Field,
+                                   IdentFieldIndex Field,
                                    const llvm::Twine &Name = "") {
   auto Offset = getOffsetOfIdentField(Field);
   return CGF.Builder.CreateStructGEP(Addr, Field, Offset, Name);
 }
 
-llvm::Value *CGOpenMPRuntime::emitParallelOutlinedFunction(
+llvm::Value *CGOpenMPRuntime::emitParallelOrTeamsOutlinedFunction(
     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
   assert(ThreadIDVar->getType()->isPointerType() &&
@@ -370,19 +860,39 @@ llvm::Value *CGOpenMPRuntime::emitParallelOutlinedFunction(
 
 llvm::Value *CGOpenMPRuntime::emitTaskOutlinedFunction(
     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
-    OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
+    const VarDecl *PartIDVar, const VarDecl *TaskTVar,
+    OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
+    bool Tied, unsigned &NumberOfParts) {
+  auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
+                                              PrePostActionTy &) {
+    auto *ThreadID = getThreadID(CGF, D.getLocStart());
+    auto *UpLoc = emitUpdateLocation(CGF, D.getLocStart());
+    llvm::Value *TaskArgs[] = {
+        UpLoc, ThreadID,
+        CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
+                                    TaskTVar->getType()->castAs<PointerType>())
+            .getPointer()};
+    CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), TaskArgs);
+  };
+  CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
+                                                            UntiedCodeGen);
+  CodeGen.setAction(Action);
   assert(!ThreadIDVar->getType()->isPointerType() &&
          "thread id variable must be of type kmp_int32 for tasks");
   auto *CS = cast<CapturedStmt>(D.getAssociatedStmt());
+  auto *TD = dyn_cast<OMPTaskDirective>(&D);
   CodeGenFunction CGF(CGM, true);
   CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
                                         InnermostKind,
-                                        cast<OMPTaskDirective>(D).hasCancel());
+                                        TD ? TD->hasCancel() : false, Action);
   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
-  return CGF.GenerateCapturedStmtFunction(*CS);
+  auto *Res = CGF.GenerateCapturedStmtFunction(*CS);
+  if (!Tied)
+    NumberOfParts = Action.getNumberOfParts();
+  return Res;
 }
 
-Address CGOpenMPRuntime::getOrCreateDefaultLocation(OpenMPLocationFlags Flags) {
+Address CGOpenMPRuntime::getOrCreateDefaultLocation(unsigned Flags) {
   CharUnits Align = getIdentAlign(CGM);
   llvm::Value *Entry = OpenMPDefaultLocMap.lookup(Flags);
   if (!Entry) {
@@ -399,7 +909,7 @@ Address CGOpenMPRuntime::getOrCreateDefaultLocation(OpenMPLocationFlags Flags) {
     auto DefaultOpenMPLocation = new llvm::GlobalVariable(
         CGM.getModule(), IdentTy, /*isConstant*/ true,
         llvm::GlobalValue::PrivateLinkage, /*Initializer*/ nullptr);
-    DefaultOpenMPLocation->setUnnamedAddr(true);
+    DefaultOpenMPLocation->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
     DefaultOpenMPLocation->setAlignment(Align.getQuantity());
 
     llvm::Constant *Zero = llvm::ConstantInt::get(CGM.Int32Ty, 0, true);
@@ -415,9 +925,10 @@ Address CGOpenMPRuntime::getOrCreateDefaultLocation(OpenMPLocationFlags Flags) {
 
 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
                                                  SourceLocation Loc,
-                                                 OpenMPLocationFlags Flags) {
+                                                 unsigned Flags) {
+  Flags |= OMP_IDENT_KMPC;
   // If no debug info is generated - return global default location.
-  if (CGM.getCodeGenOpts().getDebugInfo() == CodeGenOptions::NoDebugInfo ||
+  if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo ||
       Loc.isInvalid())
     return getOrCreateDefaultLocation(Flags).getPointer();
 
@@ -517,20 +1028,34 @@ void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
   assert(CGF.CurFn && "No function in current CodeGenFunction.");
   if (OpenMPLocThreadIDMap.count(CGF.CurFn))
     OpenMPLocThreadIDMap.erase(CGF.CurFn);
+  if (FunctionUDRMap.count(CGF.CurFn) > 0) {
+    for(auto *D : FunctionUDRMap[CGF.CurFn]) {
+      UDRMap.erase(D);
+    }
+    FunctionUDRMap.erase(CGF.CurFn);
+  }
 }
 
 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
+  if (!IdentTy) {
+  }
   return llvm::PointerType::getUnqual(IdentTy);
 }
 
 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
+  if (!Kmpc_MicroTy) {
+    // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
+    llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
+                                 llvm::PointerType::getUnqual(CGM.Int32Ty)};
+    Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
+  }
   return llvm::PointerType::getUnqual(Kmpc_MicroTy);
 }
 
 llvm::Constant *
-CGOpenMPRuntime::createRuntimeFunction(OpenMPRTLFunction Function) {
+CGOpenMPRuntime::createRuntimeFunction(unsigned Function) {
   llvm::Constant *RTLFn = nullptr;
-  switch (Function) {
+  switch (static_cast<OpenMPRTLFunction>(Function)) {
   case OMPRTL__kmpc_fork_call: {
     // Build void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro
     // microtask, ...);
@@ -927,6 +1452,86 @@ CGOpenMPRuntime::createRuntimeFunction(OpenMPRTLFunction Function) {
     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancel");
     break;
   }
+  case OMPRTL__kmpc_push_num_teams: {
+    // Build void kmpc_push_num_teams (ident_t loc, kmp_int32 global_tid,
+    // kmp_int32 num_teams, kmp_int32 num_threads)
+    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
+        CGM.Int32Ty};
+    llvm::FunctionType *FnTy =
+        llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
+    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_teams");
+    break;
+  }
+  case OMPRTL__kmpc_fork_teams: {
+    // Build void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro
+    // microtask, ...);
+    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
+                                getKmpc_MicroPointerTy()};
+    llvm::FunctionType *FnTy =
+        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true);
+    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_teams");
+    break;
+  }
+  case OMPRTL__kmpc_taskloop: {
+    // Build void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
+    // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
+    // sched, kmp_uint64 grainsize, void *task_dup);
+    llvm::Type *TypeParams[] = {getIdentTyPointerTy(),
+                                CGM.IntTy,
+                                CGM.VoidPtrTy,
+                                CGM.IntTy,
+                                CGM.Int64Ty->getPointerTo(),
+                                CGM.Int64Ty->getPointerTo(),
+                                CGM.Int64Ty,
+                                CGM.IntTy,
+                                CGM.IntTy,
+                                CGM.Int64Ty,
+                                CGM.VoidPtrTy};
+    llvm::FunctionType *FnTy =
+        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
+    RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_taskloop");
+    break;
+  }
+  case OMPRTL__kmpc_doacross_init: {
+    // Build void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32
+    // num_dims, struct kmp_dim *dims);
+    llvm::Type *TypeParams[] = {getIdentTyPointerTy(),
+                                CGM.Int32Ty,
+                                CGM.Int32Ty,
+                                CGM.VoidPtrTy};
+    llvm::FunctionType *FnTy =
+        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
+    RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_init");
+    break;
+  }
+  case OMPRTL__kmpc_doacross_fini: {
+    // Build void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid);
+    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
+    llvm::FunctionType *FnTy =
+        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
+    RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_fini");
+    break;
+  }
+  case OMPRTL__kmpc_doacross_post: {
+    // Build void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64
+    // *vec);
+    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
+                                CGM.Int64Ty->getPointerTo()};
+    llvm::FunctionType *FnTy =
+        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
+    RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_post");
+    break;
+  }
+  case OMPRTL__kmpc_doacross_wait: {
+    // Build void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64
+    // *vec);
+    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
+                                CGM.Int64Ty->getPointerTo()};
+    llvm::FunctionType *FnTy =
+        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
+    RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_wait");
+    break;
+  }
   case OMPRTL__tgt_target: {
     // Build int32_t __tgt_target(int32_t device_id, void *host_ptr, int32_t
     // arg_num, void** args_base, void **args, size_t *arg_sizes, int32_t
@@ -943,6 +1548,24 @@ CGOpenMPRuntime::createRuntimeFunction(OpenMPRTLFunction Function) {
     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target");
     break;
   }
+  case OMPRTL__tgt_target_teams: {
+    // Build int32_t __tgt_target_teams(int32_t device_id, void *host_ptr,
+    // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes,
+    // int32_t *arg_types, int32_t num_teams, int32_t thread_limit);
+    llvm::Type *TypeParams[] = {CGM.Int32Ty,
+                                CGM.VoidPtrTy,
+                                CGM.Int32Ty,
+                                CGM.VoidPtrPtrTy,
+                                CGM.VoidPtrPtrTy,
+                                CGM.SizeTy->getPointerTo(),
+                                CGM.Int32Ty->getPointerTo(),
+                                CGM.Int32Ty,
+                                CGM.Int32Ty};
+    llvm::FunctionType *FnTy =
+        llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
+    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams");
+    break;
+  }
   case OMPRTL__tgt_register_lib: {
     // Build void __tgt_register_lib(__tgt_bin_desc *desc);
     QualType ParamTy =
@@ -963,30 +1586,53 @@ CGOpenMPRuntime::createRuntimeFunction(OpenMPRTLFunction Function) {
     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_unregister_lib");
     break;
   }
+  case OMPRTL__tgt_target_data_begin: {
+    // Build void __tgt_target_data_begin(int32_t device_id, int32_t arg_num,
+    // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types);
+    llvm::Type *TypeParams[] = {CGM.Int32Ty,
+                                CGM.Int32Ty,
+                                CGM.VoidPtrPtrTy,
+                                CGM.VoidPtrPtrTy,
+                                CGM.SizeTy->getPointerTo(),
+                                CGM.Int32Ty->getPointerTo()};
+    llvm::FunctionType *FnTy =
+        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
+    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin");
+    break;
+  }
+  case OMPRTL__tgt_target_data_end: {
+    // Build void __tgt_target_data_end(int32_t device_id, int32_t arg_num,
+    // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types);
+    llvm::Type *TypeParams[] = {CGM.Int32Ty,
+                                CGM.Int32Ty,
+                                CGM.VoidPtrPtrTy,
+                                CGM.VoidPtrPtrTy,
+                                CGM.SizeTy->getPointerTo(),
+                                CGM.Int32Ty->getPointerTo()};
+    llvm::FunctionType *FnTy =
+        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
+    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end");
+    break;
+  }
+  case OMPRTL__tgt_target_data_update: {
+    // Build void __tgt_target_data_update(int32_t device_id, int32_t arg_num,
+    // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types);
+    llvm::Type *TypeParams[] = {CGM.Int32Ty,
+                                CGM.Int32Ty,
+                                CGM.VoidPtrPtrTy,
+                                CGM.VoidPtrPtrTy,
+                                CGM.SizeTy->getPointerTo(),
+                                CGM.Int32Ty->getPointerTo()};
+    llvm::FunctionType *FnTy =
+        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
+    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update");
+    break;
+  }
   }
+  assert(RTLFn && "Unable to find OpenMP runtime function");
   return RTLFn;
 }
 
-static llvm::Value *getTypeSize(CodeGenFunction &CGF, QualType Ty) {
-  auto &C = CGF.getContext();
-  llvm::Value *Size = nullptr;
-  auto SizeInChars = C.getTypeSizeInChars(Ty);
-  if (SizeInChars.isZero()) {
-    // getTypeSizeInChars() returns 0 for a VLA.
-    while (auto *VAT = C.getAsVariableArrayType(Ty)) {
-      llvm::Value *ArraySize;
-      std::tie(ArraySize, Ty) = CGF.getVLASize(VAT);
-      Size = Size ? CGF.Builder.CreateNUWMul(Size, ArraySize) : ArraySize;
-    }
-    SizeInChars = C.getTypeSizeInChars(Ty);
-    assert(!SizeInChars.isZero());
-    Size = CGF.Builder.CreateNUWMul(
-        Size, llvm::ConstantInt::get(CGF.SizeTy, SizeInChars.getQuantity()));
-  } else
-    Size = llvm::ConstantInt::get(CGF.SizeTy, SizeInChars.getQuantity());
-  return Size;
-}
-
 llvm::Constant *CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize,
                                                              bool IVSigned) {
   assert((IVSize == 32 || IVSize == 64) &&
@@ -1144,9 +1790,8 @@ llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy);
       Args.push_back(&Dst);
 
-      auto &FI = CGM.getTypes().arrangeFreeFunctionDeclaration(
-          CGM.getContext().VoidPtrTy, Args, FunctionType::ExtInfo(),
-          /*isVariadic=*/false);
+      auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
+          CGM.getContext().VoidPtrTy, Args);
       auto FTy = CGM.getTypes().GetFunctionType(FI);
       auto Fn = CGM.CreateGlobalInitOrDestructFunction(
           FTy, ".__kmpc_global_ctor_.", FI, Loc);
@@ -1176,14 +1821,16 @@ llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy);
       Args.push_back(&Dst);
 
-      auto &FI = CGM.getTypes().arrangeFreeFunctionDeclaration(
-          CGM.getContext().VoidTy, Args, FunctionType::ExtInfo(),
-          /*isVariadic=*/false);
+      auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
+          CGM.getContext().VoidTy, Args);
       auto FTy = CGM.getTypes().GetFunctionType(FI);
       auto Fn = CGM.CreateGlobalInitOrDestructFunction(
           FTy, ".__kmpc_global_dtor_.", FI, Loc);
+      auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
                             SourceLocation());
+      // Create a scope with an artificial location for the body of this function.
+      auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
       auto ArgVal = DtorCGF.EmitLoadOfScalar(
           DtorCGF.GetAddrOfLocalVar(&Dst),
           /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
@@ -1251,12 +1898,10 @@ static void emitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond,
   // the condition and the dead arm of the if/else.
   bool CondConstant;
   if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
-    CodeGenFunction::RunCleanupsScope Scope(CGF);
-    if (CondConstant) {
+    if (CondConstant)
       ThenGen(CGF);
-    } else {
+    else
       ElseGen(CGF);
-    }
     return;
   }
 
@@ -1269,26 +1914,16 @@ static void emitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond,
 
   // Emit the 'then' code.
   CGF.EmitBlock(ThenBlock);
-  {
-    CodeGenFunction::RunCleanupsScope ThenScope(CGF);
-    ThenGen(CGF);
-  }
+  ThenGen(CGF);
   CGF.EmitBranch(ContBlock);
   // Emit the 'else' code if present.
-  {
-    // There is no need to emit line number for unconditional branch.
-    auto NL = ApplyDebugLocation::CreateEmpty(CGF);
-    CGF.EmitBlock(ElseBlock);
-  }
-  {
-    CodeGenFunction::RunCleanupsScope ThenScope(CGF);
-    ElseGen(CGF);
-  }
-  {
-    // There is no need to emit line number for unconditional branch.
-    auto NL = ApplyDebugLocation::CreateEmpty(CGF);
-    CGF.EmitBranch(ContBlock);
-  }
+  // There is no need to emit line number for unconditional branch.
+  (void)ApplyDebugLocation::CreateEmpty(CGF);
+  CGF.EmitBlock(ElseBlock);
+  ElseGen(CGF);
+  // There is no need to emit line number for unconditional branch.
+  (void)ApplyDebugLocation::CreateEmpty(CGF);
+  CGF.EmitBranch(ContBlock);
   // Emit the continuation block for code after the if.
   CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
 }
@@ -1300,34 +1935,36 @@ void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
   if (!CGF.HaveInsertPoint())
     return;
   auto *RTLoc = emitUpdateLocation(CGF, Loc);
-  auto &&ThenGen = [this, OutlinedFn, CapturedVars,
-                    RTLoc](CodeGenFunction &CGF) {
+  auto &&ThenGen = [OutlinedFn, CapturedVars, RTLoc](CodeGenFunction &CGF,
+                                                     PrePostActionTy &) {
     // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
+    auto &RT = CGF.CGM.getOpenMPRuntime();
     llvm::Value *Args[] = {
         RTLoc,
         CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
-        CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())};
+        CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())};
     llvm::SmallVector<llvm::Value *, 16> RealArgs;
     RealArgs.append(std::begin(Args), std::end(Args));
     RealArgs.append(CapturedVars.begin(), CapturedVars.end());
 
-    auto RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_call);
+    auto RTLFn = RT.createRuntimeFunction(OMPRTL__kmpc_fork_call);
     CGF.EmitRuntimeCall(RTLFn, RealArgs);
   };
-  auto &&ElseGen = [this, OutlinedFn, CapturedVars, RTLoc,
-                    Loc](CodeGenFunction &CGF) {
-    auto ThreadID = getThreadID(CGF, Loc);
+  auto &&ElseGen = [OutlinedFn, CapturedVars, RTLoc, Loc](CodeGenFunction &CGF,
+                                                          PrePostActionTy &) {
+    auto &RT = CGF.CGM.getOpenMPRuntime();
+    auto ThreadID = RT.getThreadID(CGF, Loc);
     // Build calls:
     // __kmpc_serialized_parallel(&Loc, GTid);
     llvm::Value *Args[] = {RTLoc, ThreadID};
-    CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_serialized_parallel),
-                        Args);
+    CGF.EmitRuntimeCall(
+        RT.createRuntimeFunction(OMPRTL__kmpc_serialized_parallel), Args);
 
     // OutlinedFn(&GTid, &zero, CapturedStruct);
-    auto ThreadIDAddr = emitThreadIDAddress(CGF, Loc);
+    auto ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
     Address ZeroAddr =
-      CGF.CreateTempAlloca(CGF.Int32Ty, CharUnits::fromQuantity(4),
-                           /*Name*/ ".zero.addr");
+        CGF.CreateTempAlloca(CGF.Int32Ty, CharUnits::fromQuantity(4),
+                             /*Name*/ ".zero.addr");
     CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
     llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
     OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
@@ -1336,15 +1973,16 @@ void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
     CGF.EmitCallOrInvoke(OutlinedFn, OutlinedFnArgs);
 
     // __kmpc_end_serialized_parallel(&Loc, GTid);
-    llvm::Value *EndArgs[] = {emitUpdateLocation(CGF, Loc), ThreadID};
+    llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
     CGF.EmitRuntimeCall(
-        createRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel), EndArgs);
+        RT.createRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel),
+        EndArgs);
   };
-  if (IfCond) {
+  if (IfCond)
     emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen);
-  } else {
-    CodeGenFunction::RunCleanupsScope Scope(CGF);
-    ThenGen(CGF);
+  else {
+    RegionCodeGenTy ThenRCG(ThenGen);
+    ThenRCG(CGF);
   }
 }
 
@@ -1397,20 +2035,39 @@ llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
 }
 
 namespace {
-template <size_t N> class CallEndCleanup final : public EHScopeStack::Cleanup {
-  llvm::Value *Callee;
-  llvm::Value *Args[N];
+/// Common pre(post)-action for different OpenMP constructs.
+class CommonActionTy final : public PrePostActionTy {
+  llvm::Value *EnterCallee;
+  ArrayRef<llvm::Value *> EnterArgs;
+  llvm::Value *ExitCallee;
+  ArrayRef<llvm::Value *> ExitArgs;
+  bool Conditional;
+  llvm::BasicBlock *ContBlock = nullptr;
 
 public:
-  CallEndCleanup(llvm::Value *Callee, ArrayRef<llvm::Value *> CleanupArgs)
-      : Callee(Callee) {
-    assert(CleanupArgs.size() == N);
-    std::copy(CleanupArgs.begin(), CleanupArgs.end(), std::begin(Args));
+  CommonActionTy(llvm::Value *EnterCallee, ArrayRef<llvm::Value *> EnterArgs,
+                 llvm::Value *ExitCallee, ArrayRef<llvm::Value *> ExitArgs,
+                 bool Conditional = false)
+      : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
+        ExitArgs(ExitArgs), Conditional(Conditional) {}
+  void Enter(CodeGenFunction &CGF) override {
+    llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
+    if (Conditional) {
+      llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
+      auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
+      ContBlock = CGF.createBasicBlock("omp_if.end");
+      // Generate the branch (If-stmt)
+      CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
+      CGF.EmitBlock(ThenBlock);
+    }
   }
-  void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
-    if (!CGF.HaveInsertPoint())
-      return;
-    CGF.EmitRuntimeCall(Callee, Args);
+  void Done(CodeGenFunction &CGF) {
+    // Emit the rest of blocks/branches
+    CGF.EmitBranch(ContBlock);
+    CGF.EmitBlock(ContBlock, true);
+  }
+  void Exit(CodeGenFunction &CGF) override {
+    CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
   }
 };
 } // anonymous namespace
@@ -1425,45 +2082,22 @@ void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
   // Prepare arguments and build a call to __kmpc_critical
   if (!CGF.HaveInsertPoint())
     return;
-  CodeGenFunction::RunCleanupsScope Scope(CGF);
   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
                          getCriticalRegionLock(CriticalName)};
+  llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
+                                                std::end(Args));
   if (Hint) {
-    llvm::SmallVector<llvm::Value *, 8> ArgsWithHint(std::begin(Args),
-                                                     std::end(Args));
-    auto *HintVal = CGF.EmitScalarExpr(Hint);
-    ArgsWithHint.push_back(
-        CGF.Builder.CreateIntCast(HintVal, CGM.IntPtrTy, /*isSigned=*/false));
-    CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_critical_with_hint),
-                        ArgsWithHint);
-  } else
-    CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_critical), Args);
-  // Build a call to __kmpc_end_critical
-  CGF.EHStack.pushCleanup<CallEndCleanup<std::extent<decltype(Args)>::value>>(
-      NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_critical),
-      llvm::makeArrayRef(Args));
+    EnterArgs.push_back(CGF.Builder.CreateIntCast(
+        CGF.EmitScalarExpr(Hint), CGM.IntPtrTy, /*isSigned=*/false));
+  }
+  CommonActionTy Action(
+      createRuntimeFunction(Hint ? OMPRTL__kmpc_critical_with_hint
+                                 : OMPRTL__kmpc_critical),
+      EnterArgs, createRuntimeFunction(OMPRTL__kmpc_end_critical), Args);
+  CriticalOpGen.setAction(Action);
   emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
 }
 
-static void emitIfStmt(CodeGenFunction &CGF, llvm::Value *IfCond,
-                       OpenMPDirectiveKind Kind, SourceLocation Loc,
-                       const RegionCodeGenTy &BodyOpGen) {
-  llvm::Value *CallBool = CGF.EmitScalarConversion(
-      IfCond,
-      CGF.getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true),
-      CGF.getContext().BoolTy, Loc);
-
-  auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
-  auto *ContBlock = CGF.createBasicBlock("omp_if.end");
-  // Generate the branch (If-stmt)
-  CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
-  CGF.EmitBlock(ThenBlock);
-  CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, Kind, BodyOpGen);
-  // Emit the rest of bblocks/branches
-  CGF.EmitBranch(ContBlock);
-  CGF.EmitBlock(ContBlock, true);
-}
-
 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
                                        const RegionCodeGenTy &MasterOpGen,
                                        SourceLocation Loc) {
@@ -1475,18 +2109,12 @@ void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
   // }
   // Prepare arguments and build a call to __kmpc_master
   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
-  auto *IsMaster =
-      CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_master), Args);
-  typedef CallEndCleanup<std::extent<decltype(Args)>::value>
-      MasterCallEndCleanup;
-  emitIfStmt(
-      CGF, IsMaster, OMPD_master, Loc, [&](CodeGenFunction &CGF) -> void {
-        CodeGenFunction::RunCleanupsScope Scope(CGF);
-        CGF.EHStack.pushCleanup<MasterCallEndCleanup>(
-            NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_master),
-            llvm::makeArrayRef(Args));
-        MasterOpGen(CGF);
-      });
+  CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_master), Args,
+                        createRuntimeFunction(OMPRTL__kmpc_end_master), Args,
+                        /*Conditional=*/true);
+  MasterOpGen.setAction(Action);
+  emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
+  Action.Done(CGF);
 }
 
 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
@@ -1498,6 +2126,8 @@ void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
       llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskyield), Args);
+  if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
+    Region->emitUntiedSwitch(CGF);
 }
 
 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
@@ -1509,16 +2139,12 @@ void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
   // TaskgroupOpGen();
   // __kmpc_end_taskgroup(ident_t *, gtid);
   // Prepare arguments and build a call to __kmpc_taskgroup
-  {
-    CodeGenFunction::RunCleanupsScope Scope(CGF);
-    llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
-    CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_taskgroup), Args);
-    // Build a call to __kmpc_end_taskgroup
-    CGF.EHStack.pushCleanup<CallEndCleanup<std::extent<decltype(Args)>::value>>(
-        NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_taskgroup),
-        llvm::makeArrayRef(Args));
-    emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
-  }
+  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
+  CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_taskgroup), Args,
+                        createRuntimeFunction(OMPRTL__kmpc_end_taskgroup),
+                        Args);
+  TaskgroupOpGen.setAction(Action);
+  emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
 }
 
 /// Given an array of pointers to variables, project the address of a
@@ -1549,9 +2175,7 @@ static llvm::Value *emitCopyprivateCopyFunction(
                            C.VoidPtrTy);
   Args.push_back(&LHSArg);
   Args.push_back(&RHSArg);
-  FunctionType::ExtInfo EI;
-  auto &CGFI = CGM.getTypes().arrangeFreeFunctionDeclaration(
-      C.VoidTy, Args, EI, /*isVariadic=*/false);
+  auto &CGFI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
   auto *Fn = llvm::Function::Create(
       CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage,
       ".omp.copyprivate.copy_func", &CGM.getModule());
@@ -1616,22 +2240,16 @@ void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
   }
   // Prepare arguments and build a call to __kmpc_single
   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
-  auto *IsSingle =
-      CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_single), Args);
-  typedef CallEndCleanup<std::extent<decltype(Args)>::value>
-      SingleCallEndCleanup;
-  emitIfStmt(
-      CGF, IsSingle, OMPD_single, Loc, [&](CodeGenFunction &CGF) -> void {
-        CodeGenFunction::RunCleanupsScope Scope(CGF);
-        CGF.EHStack.pushCleanup<SingleCallEndCleanup>(
-            NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_single),
-            llvm::makeArrayRef(Args));
-        SingleOpGen(CGF);
-        if (DidIt.isValid()) {
-          // did_it = 1;
-          CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
-        }
-      });
+  CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_single), Args,
+                        createRuntimeFunction(OMPRTL__kmpc_end_single), Args,
+                        /*Conditional=*/true);
+  SingleOpGen.setAction(Action);
+  emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
+  if (DidIt.isValid()) {
+    // did_it = 1;
+    CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
+  }
+  Action.Done(CGF);
   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
   // <copy_func>, did_it);
   if (DidIt.isValid()) {
@@ -1655,7 +2273,7 @@ void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
     auto *CpyFn = emitCopyprivateCopyFunction(
         CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(),
         CopyprivateVars, SrcExprs, DstExprs, AssignmentOps);
-    auto *BufSize = getTypeSize(CGF, CopyprivateArrayTy);
+    auto *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
     Address CL =
       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList,
                                                       CGF.VoidPtrTy);
@@ -1681,14 +2299,14 @@ void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
   // OrderedOpGen();
   // __kmpc_end_ordered(ident_t *, gtid);
   // Prepare arguments and build a call to __kmpc_ordered
-  CodeGenFunction::RunCleanupsScope Scope(CGF);
   if (IsThreads) {
     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
-    CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_ordered), Args);
-    // Build a call to __kmpc_end_ordered
-    CGF.EHStack.pushCleanup<CallEndCleanup<std::extent<decltype(Args)>::value>>(
-        NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_ordered),
-        llvm::makeArrayRef(Args));
+    CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_ordered), Args,
+                          createRuntimeFunction(OMPRTL__kmpc_end_ordered),
+                          Args);
+    OrderedOpGen.setAction(Action);
+    emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
+    return;
   }
   emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
 }
@@ -1700,21 +2318,17 @@ void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
     return;
   // Build call __kmpc_cancel_barrier(loc, thread_id);
   // Build call __kmpc_barrier(loc, thread_id);
-  OpenMPLocationFlags Flags = OMP_IDENT_KMPC;
-  if (Kind == OMPD_for) {
-    Flags =
-        static_cast<OpenMPLocationFlags>(Flags | OMP_IDENT_BARRIER_IMPL_FOR);
-  } else if (Kind == OMPD_sections) {
-    Flags = static_cast<OpenMPLocationFlags>(Flags |
-                                             OMP_IDENT_BARRIER_IMPL_SECTIONS);
-  } else if (Kind == OMPD_single) {
-    Flags =
-        static_cast<OpenMPLocationFlags>(Flags | OMP_IDENT_BARRIER_IMPL_SINGLE);
-  } else if (Kind == OMPD_barrier) {
-    Flags = static_cast<OpenMPLocationFlags>(Flags | OMP_IDENT_BARRIER_EXPL);
-  } else {
-    Flags = static_cast<OpenMPLocationFlags>(Flags | OMP_IDENT_BARRIER_IMPL);
-  }
+  unsigned Flags;
+  if (Kind == OMPD_for)
+    Flags = OMP_IDENT_BARRIER_IMPL_FOR;
+  else if (Kind == OMPD_sections)
+    Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
+  else if (Kind == OMPD_single)
+    Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
+  else if (Kind == OMPD_barrier)
+    Flags = OMP_IDENT_BARRIER_EXPL;
+  else
+    Flags = OMP_IDENT_BARRIER_IMPL;
   // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
   // thread_id);
   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
@@ -1745,28 +2359,6 @@ void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_barrier), Args);
 }
 
-/// \brief Schedule types for 'omp for' loops (these enumerators are taken from
-/// the enum sched_type in kmp.h).
-enum OpenMPSchedType {
-  /// \brief Lower bound for default (unordered) versions.
-  OMP_sch_lower = 32,
-  OMP_sch_static_chunked = 33,
-  OMP_sch_static = 34,
-  OMP_sch_dynamic_chunked = 35,
-  OMP_sch_guided_chunked = 36,
-  OMP_sch_runtime = 37,
-  OMP_sch_auto = 38,
-  /// \brief Lower bound for 'ordered' versions.
-  OMP_ord_lower = 64,
-  OMP_ord_static_chunked = 65,
-  OMP_ord_static = 66,
-  OMP_ord_dynamic_chunked = 67,
-  OMP_ord_guided_chunked = 68,
-  OMP_ord_runtime = 69,
-  OMP_ord_auto = 70,
-  OMP_sch_default = OMP_sch_static,
-};
-
 /// \brief Map the OpenMP loop schedule to the runtime enumeration.
 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
                                           bool Chunked, bool Ordered) {
@@ -1789,12 +2381,26 @@ static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
   llvm_unreachable("Unexpected runtime schedule");
 }
 
+/// \brief Map the OpenMP distribute schedule to the runtime enumeration.
+static OpenMPSchedType
+getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) {
+  // only static is allowed for dist_schedule
+  return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
+}
+
 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
                                          bool Chunked) const {
   auto Schedule = getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
   return Schedule == OMP_sch_static;
 }
 
+bool CGOpenMPRuntime::isStaticNonchunked(
+    OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
+  auto Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
+  return Schedule == OMP_dist_sch_static;
+}
+
+
 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
   auto Schedule =
       getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
@@ -1802,19 +2408,57 @@ bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
   return Schedule != OMP_sch_static;
 }
 
+static int addMonoNonMonoModifier(OpenMPSchedType Schedule,
+                                  OpenMPScheduleClauseModifier M1,
+                                  OpenMPScheduleClauseModifier M2) {
+  int Modifier = 0;
+  switch (M1) {
+  case OMPC_SCHEDULE_MODIFIER_monotonic:
+    Modifier = OMP_sch_modifier_monotonic;
+    break;
+  case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
+    Modifier = OMP_sch_modifier_nonmonotonic;
+    break;
+  case OMPC_SCHEDULE_MODIFIER_simd:
+    if (Schedule == OMP_sch_static_chunked)
+      Schedule = OMP_sch_static_balanced_chunked;
+    break;
+  case OMPC_SCHEDULE_MODIFIER_last:
+  case OMPC_SCHEDULE_MODIFIER_unknown:
+    break;
+  }
+  switch (M2) {
+  case OMPC_SCHEDULE_MODIFIER_monotonic:
+    Modifier = OMP_sch_modifier_monotonic;
+    break;
+  case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
+    Modifier = OMP_sch_modifier_nonmonotonic;
+    break;
+  case OMPC_SCHEDULE_MODIFIER_simd:
+    if (Schedule == OMP_sch_static_chunked)
+      Schedule = OMP_sch_static_balanced_chunked;
+    break;
+  case OMPC_SCHEDULE_MODIFIER_last:
+  case OMPC_SCHEDULE_MODIFIER_unknown:
+    break;
+  }
+  return Schedule | Modifier;
+}
+
 void CGOpenMPRuntime::emitForDispatchInit(CodeGenFunction &CGF,
                                           SourceLocation Loc,
-                                          OpenMPScheduleClauseKind ScheduleKind,
+                                          const OpenMPScheduleTy &ScheduleKind,
                                           unsigned IVSize, bool IVSigned,
                                           bool Ordered, llvm::Value *UB,
                                           llvm::Value *Chunk) {
   if (!CGF.HaveInsertPoint())
     return;
   OpenMPSchedType Schedule =
-      getRuntimeSchedule(ScheduleKind, Chunk != nullptr, Ordered);
+      getRuntimeSchedule(ScheduleKind.Schedule, Chunk != nullptr, Ordered);
   assert(Ordered ||
          (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
-          Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked));
+          Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
+          Schedule != OMP_sch_static_balanced_chunked));
   // Call __kmpc_dispatch_init(
   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
   //          kmp_int[32|64] lower, kmp_int[32|64] upper,
@@ -1824,59 +2468,94 @@ void CGOpenMPRuntime::emitForDispatchInit(CodeGenFunction &CGF,
   if (Chunk == nullptr)
     Chunk = CGF.Builder.getIntN(IVSize, 1);
   llvm::Value *Args[] = {
-    emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC),
-    getThreadID(CGF, Loc),
-    CGF.Builder.getInt32(Schedule), // Schedule type
-    CGF.Builder.getIntN(IVSize, 0), // Lower
-    UB,                             // Upper
-    CGF.Builder.getIntN(IVSize, 1), // Stride
-    Chunk                           // Chunk
+      emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
+      CGF.Builder.getInt32(addMonoNonMonoModifier(
+          Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
+      CGF.Builder.getIntN(IVSize, 0),                   // Lower
+      UB,                                               // Upper
+      CGF.Builder.getIntN(IVSize, 1),                   // Stride
+      Chunk                                             // Chunk
   };
   CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
 }
 
+static void emitForStaticInitCall(
+    CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
+    llvm::Constant *ForStaticInitFunction, OpenMPSchedType Schedule,
+    OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2,
+    unsigned IVSize, bool Ordered, Address IL, Address LB, Address UB,
+    Address ST, llvm::Value *Chunk) {
+  if (!CGF.HaveInsertPoint())
+     return;
+
+   assert(!Ordered);
+   assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
+          Schedule == OMP_sch_static_balanced_chunked ||
+          Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
+          Schedule == OMP_dist_sch_static ||
+          Schedule == OMP_dist_sch_static_chunked);
+
+   // Call __kmpc_for_static_init(
+   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
+   //          kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
+   //          kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
+   //          kmp_int[32|64] incr, kmp_int[32|64] chunk);
+   if (Chunk == nullptr) {
+     assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
+             Schedule == OMP_dist_sch_static) &&
+            "expected static non-chunked schedule");
+     // If the Chunk was not specified in the clause - use default value 1.
+       Chunk = CGF.Builder.getIntN(IVSize, 1);
+   } else {
+     assert((Schedule == OMP_sch_static_chunked ||
+             Schedule == OMP_sch_static_balanced_chunked ||
+             Schedule == OMP_ord_static_chunked ||
+             Schedule == OMP_dist_sch_static_chunked) &&
+            "expected static chunked schedule");
+   }
+   llvm::Value *Args[] = {
+       UpdateLocation, ThreadId, CGF.Builder.getInt32(addMonoNonMonoModifier(
+                                     Schedule, M1, M2)), // Schedule type
+       IL.getPointer(),                                  // &isLastIter
+       LB.getPointer(),                                  // &LB
+       UB.getPointer(),                                  // &UB
+       ST.getPointer(),                                  // &Stride
+       CGF.Builder.getIntN(IVSize, 1),                   // Incr
+       Chunk                                             // Chunk
+   };
+   CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
+}
+
 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
                                         SourceLocation Loc,
-                                        OpenMPScheduleClauseKind ScheduleKind,
+                                        const OpenMPScheduleTy &ScheduleKind,
                                         unsigned IVSize, bool IVSigned,
                                         bool Ordered, Address IL, Address LB,
                                         Address UB, Address ST,
                                         llvm::Value *Chunk) {
-  if (!CGF.HaveInsertPoint())
-    return;
-  OpenMPSchedType Schedule =
-    getRuntimeSchedule(ScheduleKind, Chunk != nullptr, Ordered);
-  assert(!Ordered);
-  assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
-         Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked);
-
-  // Call __kmpc_for_static_init(
-  //          ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
-  //          kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
-  //          kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
-  //          kmp_int[32|64] incr, kmp_int[32|64] chunk);
-  if (Chunk == nullptr) {
-    assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static) &&
-           "expected static non-chunked schedule");
-    // If the Chunk was not specified in the clause - use default value 1.
-      Chunk = CGF.Builder.getIntN(IVSize, 1);
-  } else {
-    assert((Schedule == OMP_sch_static_chunked ||
-            Schedule == OMP_ord_static_chunked) &&
-           "expected static chunked schedule");
-  }
-  llvm::Value *Args[] = {
-    emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC),
-    getThreadID(CGF, Loc),
-    CGF.Builder.getInt32(Schedule), // Schedule type
-    IL.getPointer(),                // &isLastIter
-    LB.getPointer(),                // &LB
-    UB.getPointer(),                // &UB
-    ST.getPointer(),                // &Stride
-    CGF.Builder.getIntN(IVSize, 1), // Incr
-    Chunk                           // Chunk
-  };
-  CGF.EmitRuntimeCall(createForStaticInitFunction(IVSize, IVSigned), Args);
+  OpenMPSchedType ScheduleNum =
+      getRuntimeSchedule(ScheduleKind.Schedule, Chunk != nullptr, Ordered);
+  auto *UpdatedLocation = emitUpdateLocation(CGF, Loc);
+  auto *ThreadId = getThreadID(CGF, Loc);
+  auto *StaticInitFunction = createForStaticInitFunction(IVSize, IVSigned);
+  emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
+                        ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, IVSize,
+                        Ordered, IL, LB, UB, ST, Chunk);
+}
+
+void CGOpenMPRuntime::emitDistributeStaticInit(
+    CodeGenFunction &CGF, SourceLocation Loc,
+    OpenMPDistScheduleClauseKind SchedKind, unsigned IVSize, bool IVSigned,
+    bool Ordered, Address IL, Address LB, Address UB, Address ST,
+    llvm::Value *Chunk) {
+  OpenMPSchedType ScheduleNum = getRuntimeSchedule(SchedKind, Chunk != nullptr);
+  auto *UpdatedLocation = emitUpdateLocation(CGF, Loc);
+  auto *ThreadId = getThreadID(CGF, Loc);
+  auto *StaticInitFunction = createForStaticInitFunction(IVSize, IVSigned);
+  emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
+                        ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
+                        OMPC_SCHEDULE_MODIFIER_unknown, IVSize, Ordered, IL, LB,
+                        UB, ST, Chunk);
 }
 
 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
@@ -1884,8 +2563,7 @@ void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
   if (!CGF.HaveInsertPoint())
     return;
   // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
-  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC),
-                         getThreadID(CGF, Loc)};
+  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_for_static_fini),
                       Args);
 }
@@ -1897,8 +2575,7 @@ void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
   if (!CGF.HaveInsertPoint())
     return;
   // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
-  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC),
-                         getThreadID(CGF, Loc)};
+  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
   CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args);
 }
 
@@ -1912,7 +2589,8 @@ llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
   //          kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
   //          kmp_int[32|64] *p_stride);
   llvm::Value *Args[] = {
-      emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC), getThreadID(CGF, Loc),
+      emitUpdateLocation(CGF, Loc),
+      getThreadID(CGF, Loc),
       IL.getPointer(), // &isLastIter
       LB.getPointer(), // &Lower
       UB.getPointer(), // &Upper
@@ -1991,8 +2669,18 @@ enum KmpTaskTFields {
   KmpTaskTRoutine,
   /// \brief Partition id for the untied tasks.
   KmpTaskTPartId,
-  /// \brief Function with call of destructors for private variables.
-  KmpTaskTDestructors,
+  /// Function with call of destructors for private variables.
+  Data1,
+  /// Task priority.
+  Data2,
+  /// (Taskloops only) Lower bound.
+  KmpTaskTLowerBound,
+  /// (Taskloops only) Upper bound.
+  KmpTaskTUpperBound,
+  /// (Taskloops only) Stride.
+  KmpTaskTStride,
+  /// (Taskloops only) Is last iteration flag.
+  KmpTaskTLastIter,
 };
 } // anonymous namespace
 
@@ -2005,11 +2693,11 @@ bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const {
 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
     initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
                                     StringRef ParentName, unsigned LineNum,
-                                    unsigned ColNum, unsigned Order) {
+                                    unsigned Order) {
   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
                                              "only required for the device "
                                              "code generation.");
-  OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum][ColNum] =
+  OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] =
       OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr);
   ++OffloadingEntriesNum;
 }
@@ -2017,30 +2705,27 @@ void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
     registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
                                   StringRef ParentName, unsigned LineNum,
-                                  unsigned ColNum, llvm::Constant *Addr,
-                                  llvm::Constant *ID) {
+                                  llvm::Constant *Addr, llvm::Constant *ID) {
   // If we are emitting code for a target, the entry is already initialized,
   // only has to be registered.
   if (CGM.getLangOpts().OpenMPIsDevice) {
-    assert(hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum,
-                                    ColNum) &&
+    assert(hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum) &&
            "Entry must exist.");
-    auto &Entry = OffloadEntriesTargetRegion[DeviceID][FileID][ParentName]
-                                            [LineNum][ColNum];
+    auto &Entry =
+        OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum];
     assert(Entry.isValid() && "Entry not initialized!");
     Entry.setAddress(Addr);
     Entry.setID(ID);
     return;
   } else {
     OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum++, Addr, ID);
-    OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum][ColNum] =
-        Entry;
+    OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry;
   }
 }
 
 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo(
-    unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned LineNum,
-    unsigned ColNum) const {
+    unsigned DeviceID, unsigned FileID, StringRef ParentName,
+    unsigned LineNum) const {
   auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID);
   if (PerDevice == OffloadEntriesTargetRegion.end())
     return false;
@@ -2053,11 +2738,8 @@ bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo(
   auto PerLine = PerParentName->second.find(LineNum);
   if (PerLine == PerParentName->second.end())
     return false;
-  auto PerColumn = PerLine->second.find(ColNum);
-  if (PerColumn == PerLine->second.end())
-    return false;
   // Fail if this entry is already registered.
-  if (PerColumn->second.getAddress() || PerColumn->second.getID())
+  if (PerLine->second.getAddress() || PerLine->second.getID())
     return false;
   return true;
 }
@@ -2069,8 +2751,7 @@ void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo(
     for (auto &F : D.second)
       for (auto &P : F.second)
         for (auto &L : P.second)
-          for (auto &C : L.second)
-            Action(D.first, F.first, P.first(), L.first, C.first, C.second);
+          Action(D.first, F.first, P.first(), L.first, L.second);
 }
 
 /// \brief Create a Ctor/Dtor-like function whose body is emitted through
@@ -2087,9 +2768,7 @@ createOffloadingBinaryDescriptorFunction(CodeGenModule &CGM, StringRef Name,
 
   CodeGenFunction CGF(CGM);
   GlobalDecl();
-  auto &FI = CGM.getTypes().arrangeFreeFunctionDeclaration(
-      C.VoidTy, Args, FunctionType::ExtInfo(),
-      /*isVariadic=*/false);
+  auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
   auto FTy = CGM.getTypes().GetFunctionType(FI);
   auto *Fn =
       CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, SourceLocation());
@@ -2123,11 +2802,11 @@ CGOpenMPRuntime::createOffloadingBinaryDescriptorRegistration() {
       CGM.getTypes().ConvertTypeForMem(getTgtOffloadEntryQTy());
   llvm::GlobalVariable *HostEntriesBegin = new llvm::GlobalVariable(
       M, OffloadEntryTy, /*isConstant=*/true,
-      llvm::GlobalValue::ExternalLinkage, /*Initializer=*/0,
+      llvm::GlobalValue::ExternalLinkage, /*Initializer=*/nullptr,
       ".omp_offloading.entries_begin");
   llvm::GlobalVariable *HostEntriesEnd = new llvm::GlobalVariable(
       M, OffloadEntryTy, /*isConstant=*/true,
-      llvm::GlobalValue::ExternalLinkage, /*Initializer=*/0,
+      llvm::GlobalValue::ExternalLinkage, /*Initializer=*/nullptr,
       ".omp_offloading.entries_end");
 
   // Create all device images
@@ -2139,10 +2818,11 @@ CGOpenMPRuntime::createOffloadingBinaryDescriptorRegistration() {
     StringRef T = Devices[i].getTriple();
     auto *ImgBegin = new llvm::GlobalVariable(
         M, CGM.Int8Ty, /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage,
-        /*Initializer=*/0, Twine(".omp_offloading.img_start.") + Twine(T));
+        /*Initializer=*/nullptr,
+        Twine(".omp_offloading.img_start.") + Twine(T));
     auto *ImgEnd = new llvm::GlobalVariable(
         M, CGM.Int8Ty, /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage,
-        /*Initializer=*/0, Twine(".omp_offloading.img_end.") + Twine(T));
+        /*Initializer=*/nullptr, Twine(".omp_offloading.img_end.") + Twine(T));
 
     llvm::Constant *Dev =
         llvm::ConstantStruct::get(DeviceImageTy, ImgBegin, ImgEnd,
@@ -2160,7 +2840,7 @@ CGOpenMPRuntime::createOffloadingBinaryDescriptorRegistration() {
       M, DeviceImagesInitTy, /*isConstant=*/true,
       llvm::GlobalValue::InternalLinkage, DeviceImagesInit,
       ".omp_offloading.device_images");
-  DeviceImages->setUnnamedAddr(true);
+  DeviceImages->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
 
   // This is a Zero array to be used in the creation of the constant expressions
   llvm::Constant *Index[] = {llvm::Constant::getNullValue(CGM.Int32Ty),
@@ -2190,12 +2870,14 @@ CGOpenMPRuntime::createOffloadingBinaryDescriptorRegistration() {
                                 IdentInfo, C.CharTy);
 
   auto *UnRegFn = createOffloadingBinaryDescriptorFunction(
-      CGM, ".omp_offloading.descriptor_unreg", [&](CodeGenFunction &CGF) {
+      CGM, ".omp_offloading.descriptor_unreg",
+      [&](CodeGenFunction &CGF, PrePostActionTy &) {
         CGF.EmitCallOrInvoke(createRuntimeFunction(OMPRTL__tgt_unregister_lib),
                              Desc);
       });
   auto *RegFn = createOffloadingBinaryDescriptorFunction(
-      CGM, ".omp_offloading.descriptor_reg", [&](CodeGenFunction &CGF) {
+      CGM, ".omp_offloading.descriptor_reg",
+      [&](CodeGenFunction &CGF, PrePostActionTy &) {
         CGF.EmitCallOrInvoke(createRuntimeFunction(OMPRTL__tgt_register_lib),
                              Desc);
         CGM.getCXXABI().registerGlobalDtor(CGF, RegUnregVar, UnRegFn, Desc);
@@ -2203,15 +2885,16 @@ CGOpenMPRuntime::createOffloadingBinaryDescriptorRegistration() {
   return RegFn;
 }
 
-void CGOpenMPRuntime::createOffloadEntry(llvm::Constant *Addr, StringRef Name,
-                                         uint64_t Size) {
+void CGOpenMPRuntime::createOffloadEntry(llvm::Constant *ID,
+                                         llvm::Constant *Addr, uint64_t Size) {
+  StringRef Name = Addr->getName();
   auto *TgtOffloadEntryType = cast<llvm::StructType>(
       CGM.getTypes().ConvertTypeForMem(getTgtOffloadEntryQTy()));
   llvm::LLVMContext &C = CGM.getModule().getContext();
   llvm::Module &M = CGM.getModule();
 
   // Make sure the address has the right type.
-  llvm::Constant *AddrPtr = llvm::ConstantExpr::getBitCast(Addr, CGM.VoidPtrTy);
+  llvm::Constant *AddrPtr = llvm::ConstantExpr::getBitCast(ID, CGM.VoidPtrTy);
 
   // Create constant string with the name.
   llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name);
@@ -2220,7 +2903,7 @@ void CGOpenMPRuntime::createOffloadEntry(llvm::Constant *Addr, StringRef Name,
       new llvm::GlobalVariable(M, StrPtrInit->getType(), /*isConstant=*/true,
                                llvm::GlobalValue::InternalLinkage, StrPtrInit,
                                ".omp_offloading.entry_name");
-  Str->setUnnamedAddr(true);
+  Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
   llvm::Constant *StrPtr = llvm::ConstantExpr::getBitCast(Str, CGM.Int8PtrTy);
 
   // Create the entry struct.
@@ -2236,7 +2919,6 @@ void CGOpenMPRuntime::createOffloadEntry(llvm::Constant *Addr, StringRef Name,
   // We can't have any padding between symbols, so we need to have 1-byte
   // alignment.
   Entry->setAlignment(1);
-  return;
 }
 
 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
@@ -2272,7 +2954,6 @@ void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
   // Create function that emits metadata for each target region entry;
   auto &&TargetRegionMetadataEmitter = [&](
       unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned Line,
-      unsigned Column,
       OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) {
     llvm::SmallVector<llvm::Metadata *, 32> Ops;
     // Generate metadata for target regions. Each entry of this metadata
@@ -2282,15 +2963,13 @@ void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
     // - Entry 2 -> File ID of the file where the entry was identified.
     // - Entry 3 -> Mangled name of the function where the entry was identified.
     // - Entry 4 -> Line in the file where the entry was identified.
-    // - Entry 5 -> Column in the file where the entry was identified.
-    // - Entry 6 -> Order the entry was created.
+    // - Entry 5 -> Order the entry was created.
     // The first element of the metadata node is the kind.
     Ops.push_back(getMDInt(E.getKind()));
     Ops.push_back(getMDInt(DeviceID));
     Ops.push_back(getMDInt(FileID));
     Ops.push_back(getMDString(ParentName));
     Ops.push_back(getMDInt(Line));
-    Ops.push_back(getMDInt(Column));
     Ops.push_back(getMDInt(E.getOrder()));
 
     // Save this entry in the right position of the ordered entries array.
@@ -2310,7 +2989,7 @@ void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
                 E)) {
       assert(CE->getID() && CE->getAddress() &&
              "Entry ID and Addr are invalid!");
-      createOffloadEntry(CE->getID(), CE->getAddress()->getName(), /*Size=*/0);
+      createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0);
     } else
       llvm_unreachable("Unsupported entry kind.");
   }
@@ -2365,7 +3044,7 @@ void CGOpenMPRuntime::loadOffloadInfoMetadata() {
       OffloadEntriesInfoManager.initializeTargetRegionEntryInfo(
           /*DeviceID=*/getMDInt(1), /*FileID=*/getMDInt(2),
           /*ParentName=*/getMDString(3), /*Line=*/getMDInt(4),
-          /*Column=*/getMDInt(5), /*Order=*/getMDInt(6));
+          /*Order=*/getMDInt(5));
       break;
     }
   }
@@ -2509,21 +3188,45 @@ createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
 }
 
 static RecordDecl *
-createKmpTaskTRecordDecl(CodeGenModule &CGM, QualType KmpInt32Ty,
+createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind,
+                         QualType KmpInt32Ty,
                          QualType KmpRoutineEntryPointerQTy) {
   auto &C = CGM.getContext();
   // Build struct kmp_task_t {
   //         void *              shareds;
   //         kmp_routine_entry_t routine;
   //         kmp_int32           part_id;
-  //         kmp_routine_entry_t destructors;
+  //         kmp_cmplrdata_t data1;
+  //         kmp_cmplrdata_t data2;
+  // For taskloops additional fields:
+  //         kmp_uint64          lb;
+  //         kmp_uint64          ub;
+  //         kmp_int64           st;
+  //         kmp_int32           liter;
   //       };
+  auto *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union);
+  UD->startDefinition();
+  addFieldToRecordDecl(C, UD, KmpInt32Ty);
+  addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
+  UD->completeDefinition();
+  QualType KmpCmplrdataTy = C.getRecordType(UD);
   auto *RD = C.buildImplicitRecord("kmp_task_t");
   RD->startDefinition();
   addFieldToRecordDecl(C, RD, C.VoidPtrTy);
   addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
   addFieldToRecordDecl(C, RD, KmpInt32Ty);
-  addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
+  addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
+  addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
+  if (isOpenMPTaskLoopDirective(Kind)) {
+    QualType KmpUInt64Ty =
+        CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
+    QualType KmpInt64Ty =
+        CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
+    addFieldToRecordDecl(C, RD, KmpUInt64Ty);
+    addFieldToRecordDecl(C, RD, KmpUInt64Ty);
+    addFieldToRecordDecl(C, RD, KmpInt64Ty);
+    addFieldToRecordDecl(C, RD, KmpInt32Ty);
+  }
   RD->completeDefinition();
   return RD;
 }
@@ -2550,14 +3253,17 @@ createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
 /// argument.
 /// \code
 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
-///   TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map,
+///   TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
+///   For taskloops:
+///   tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
 ///   tt->shareds);
 ///   return 0;
 /// }
 /// \endcode
 static llvm::Value *
 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
-                      QualType KmpInt32Ty, QualType KmpTaskTWithPrivatesPtrQTy,
+                      OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
+                      QualType KmpTaskTWithPrivatesPtrQTy,
                       QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
                       QualType SharedsPtrTy, llvm::Value *TaskFunction,
                       llvm::Value *TaskPrivatesMap) {
@@ -2569,10 +3275,8 @@ emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
                                 KmpTaskTWithPrivatesPtrQTy.withRestrict());
   Args.push_back(&GtidArg);
   Args.push_back(&TaskTypeArg);
-  FunctionType::ExtInfo Info;
   auto &TaskEntryFnInfo =
-      CGM.getTypes().arrangeFreeFunctionDeclaration(KmpInt32Ty, Args, Info,
-                                                    /*isVariadic=*/false);
+      CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
   auto *TaskEntryTy = CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
   auto *TaskEntry =
       llvm::Function::Create(TaskEntryTy, llvm::GlobalValue::InternalLinkage,
@@ -2583,11 +3287,15 @@ emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args);
 
   // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
+  // tt,
+  // For taskloops:
+  // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
   // tt->task_data.shareds);
   auto *GtidParam = CGF.EmitLoadOfScalar(
       CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
-  LValue TDBase = emitLoadOfPointerLValue(
-      CGF, CGF.GetAddrOfLocalVar(&TaskTypeArg), KmpTaskTWithPrivatesPtrQTy);
+  LValue TDBase = CGF.EmitLoadOfPointerLValue(
+      CGF.GetAddrOfLocalVar(&TaskTypeArg),
+      KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
   auto *KmpTaskTWithPrivatesQTyRD =
       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
   LValue Base =
@@ -2595,7 +3303,7 @@ emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
   auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
   auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
   auto PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
-  auto *PartidParam = CGF.EmitLoadOfLValue(PartIdLVal, Loc).getScalarVal();
+  auto *PartidParam = PartIdLVal.getPointer();
 
   auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
   auto SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
@@ -2609,12 +3317,37 @@ emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
     auto PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
     PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
         PrivatesLVal.getPointer(), CGF.VoidPtrTy);
-  } else {
+  } else
     PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
+
+  llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam,
+                               TaskPrivatesMap,
+                               CGF.Builder
+                                   .CreatePointerBitCastOrAddrSpaceCast(
+                                       TDBase.getAddress(), CGF.VoidPtrTy)
+                                   .getPointer()};
+  SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
+                                          std::end(CommonArgs));
+  if (isOpenMPTaskLoopDirective(Kind)) {
+    auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
+    auto LBLVal = CGF.EmitLValueForField(Base, *LBFI);
+    auto *LBParam = CGF.EmitLoadOfLValue(LBLVal, Loc).getScalarVal();
+    auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
+    auto UBLVal = CGF.EmitLValueForField(Base, *UBFI);
+    auto *UBParam = CGF.EmitLoadOfLValue(UBLVal, Loc).getScalarVal();
+    auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
+    auto StLVal = CGF.EmitLValueForField(Base, *StFI);
+    auto *StParam = CGF.EmitLoadOfLValue(StLVal, Loc).getScalarVal();
+    auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
+    auto LILVal = CGF.EmitLValueForField(Base, *LIFI);
+    auto *LIParam = CGF.EmitLoadOfLValue(LILVal, Loc).getScalarVal();
+    CallArgs.push_back(LBParam);
+    CallArgs.push_back(UBParam);
+    CallArgs.push_back(StParam);
+    CallArgs.push_back(LIParam);
   }
+  CallArgs.push_back(SharedsParam);
 
-  llvm::Value *CallArgs[] = {GtidParam, PartidParam, PrivatesParam,
-                             TaskPrivatesMap, SharedsParam};
   CGF.EmitCallOrInvoke(TaskFunction, CallArgs);
   CGF.EmitStoreThroughLValue(
       RValue::get(CGF.Builder.getInt32(/*C=*/0)),
@@ -2638,8 +3371,7 @@ static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
   Args.push_back(&TaskTypeArg);
   FunctionType::ExtInfo Info;
   auto &DestructorFnInfo =
-      CGM.getTypes().arrangeFreeFunctionDeclaration(KmpInt32Ty, Args, Info,
-                                                    /*isVariadic=*/false);
+      CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
   auto *DestructorFnTy = CGM.getTypes().GetFunctionType(DestructorFnInfo);
   auto *DestructorFn =
       llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
@@ -2651,8 +3383,9 @@ static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
                     Args);
 
-  LValue Base = emitLoadOfPointerLValue(
-      CGF, CGF.GetAddrOfLocalVar(&TaskTypeArg), KmpTaskTWithPrivatesPtrQTy);
+  LValue Base = CGF.EmitLoadOfPointerLValue(
+      CGF.GetAddrOfLocalVar(&TaskTypeArg),
+      KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
   auto *KmpTaskTWithPrivatesQTyRD =
       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
@@ -2682,6 +3415,7 @@ static llvm::Value *
 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
                                ArrayRef<const Expr *> PrivateVars,
                                ArrayRef<const Expr *> FirstprivateVars,
+                               ArrayRef<const Expr *> LastprivateVars,
                                QualType PrivatesQTy,
                                ArrayRef<PrivateDataTy> Privates) {
   auto &C = CGM.getContext();
@@ -2712,10 +3446,18 @@ emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
     PrivateVarsPos[VD] = Counter;
     ++Counter;
   }
-  FunctionType::ExtInfo Info;
+  for (auto *E: LastprivateVars) {
+    Args.push_back(ImplicitParamDecl::Create(
+        C, /*DC=*/nullptr, Loc,
+        /*Id=*/nullptr, C.getPointerType(C.getPointerType(E->getType()))
+                            .withConst()
+                            .withRestrict()));
+    auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
+    PrivateVarsPos[VD] = Counter;
+    ++Counter;
+  }
   auto &TaskPrivatesMapFnInfo =
-      CGM.getTypes().arrangeFreeFunctionDeclaration(C.VoidTy, Args, Info,
-                                                    /*isVariadic=*/false);
+      CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
   auto *TaskPrivatesMapTy =
       CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
   auto *TaskPrivatesMap = llvm::Function::Create(
@@ -2730,16 +3472,17 @@ emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
                     TaskPrivatesMapFnInfo, Args);
 
   // *privi = &.privates.privi;
-  LValue Base = emitLoadOfPointerLValue(
-      CGF, CGF.GetAddrOfLocalVar(&TaskPrivatesArg), TaskPrivatesArg.getType());
+  LValue Base = CGF.EmitLoadOfPointerLValue(
+      CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
+      TaskPrivatesArg.getType()->castAs<PointerType>());
   auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
   Counter = 0;
   for (auto *Field : PrivatesQTyRD->fields()) {
     auto FieldLVal = CGF.EmitLValueForField(Base, Field);
     auto *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
     auto RefLVal = CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
-    auto RefLoadLVal =
-        emitLoadOfPointerLValue(CGF, RefLVal.getAddress(), RefLVal.getType());
+    auto RefLoadLVal = CGF.EmitLoadOfPointerLValue(
+        RefLVal.getAddress(), RefLVal.getType()->castAs<PointerType>());
     CGF.EmitStoreOfScalar(FieldLVal.getPointer(), RefLoadLVal);
     ++Counter;
   }
@@ -2752,23 +3495,199 @@ static int array_pod_sort_comparator(const PrivateDataTy *P1,
   return P1->first < P2->first ? 1 : (P2->first < P1->first ? -1 : 0);
 }
 
-void CGOpenMPRuntime::emitTaskCall(
-    CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D,
-    bool Tied, llvm::PointerIntPair<llvm::Value *, 1, bool> Final,
-    llvm::Value *TaskFunction, QualType SharedsTy, Address Shareds,
-    const Expr *IfCond, ArrayRef<const Expr *> PrivateVars,
-    ArrayRef<const Expr *> PrivateCopies,
-    ArrayRef<const Expr *> FirstprivateVars,
-    ArrayRef<const Expr *> FirstprivateCopies,
-    ArrayRef<const Expr *> FirstprivateInits,
-    ArrayRef<std::pair<OpenMPDependClauseKind, const Expr *>> Dependences) {
-  if (!CGF.HaveInsertPoint())
-    return;
+/// Emit initialization for private variables in task-based directives.
+static void emitPrivatesInit(CodeGenFunction &CGF,
+                             const OMPExecutableDirective &D,
+                             Address KmpTaskSharedsPtr, LValue TDBase,
+                             const RecordDecl *KmpTaskTWithPrivatesQTyRD,
+                             QualType SharedsTy, QualType SharedsPtrTy,
+                             const OMPTaskDataTy &Data,
+                             ArrayRef<PrivateDataTy> Privates, bool ForDup) {
+  auto &C = CGF.getContext();
+  auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
+  LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
+  LValue SrcBase;
+  if (!Data.FirstprivateVars.empty()) {
+    SrcBase = CGF.MakeAddrLValue(
+        CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+            KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)),
+        SharedsTy);
+  }
+  CodeGenFunction::CGCapturedStmtInfo CapturesInfo(
+      cast<CapturedStmt>(*D.getAssociatedStmt()));
+  FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
+  for (auto &&Pair : Privates) {
+    auto *VD = Pair.second.PrivateCopy;
+    auto *Init = VD->getAnyInitializer();
+    if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
+                             !CGF.isTrivialInitializer(Init)))) {
+      LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
+      if (auto *Elem = Pair.second.PrivateElemInit) {
+        auto *OriginalVD = Pair.second.Original;
+        auto *SharedField = CapturesInfo.lookup(OriginalVD);
+        auto SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
+        SharedRefLValue = CGF.MakeAddrLValue(
+            Address(SharedRefLValue.getPointer(), C.getDeclAlign(OriginalVD)),
+            SharedRefLValue.getType(), AlignmentSource::Decl);
+        QualType Type = OriginalVD->getType();
+        if (Type->isArrayType()) {
+          // Initialize firstprivate array.
+          if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) {
+            // Perform simple memcpy.
+            CGF.EmitAggregateAssign(PrivateLValue.getAddress(),
+                                    SharedRefLValue.getAddress(), Type);
+          } else {
+            // Initialize firstprivate array using element-by-element
+            // intialization.
+            CGF.EmitOMPAggregateAssign(
+                PrivateLValue.getAddress(), SharedRefLValue.getAddress(), Type,
+                [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
+                                                  Address SrcElement) {
+                  // Clean up any temporaries needed by the initialization.
+                  CodeGenFunction::OMPPrivateScope InitScope(CGF);
+                  InitScope.addPrivate(
+                      Elem, [SrcElement]() -> Address { return SrcElement; });
+                  (void)InitScope.Privatize();
+                  // Emit initialization for single element.
+                  CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
+                      CGF, &CapturesInfo);
+                  CGF.EmitAnyExprToMem(Init, DestElement,
+                                       Init->getType().getQualifiers(),
+                                       /*IsInitializer=*/false);
+                });
+          }
+        } else {
+          CodeGenFunction::OMPPrivateScope InitScope(CGF);
+          InitScope.addPrivate(Elem, [SharedRefLValue]() -> Address {
+            return SharedRefLValue.getAddress();
+          });
+          (void)InitScope.Privatize();
+          CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
+          CGF.EmitExprAsInit(Init, VD, PrivateLValue,
+                             /*capturedByInit=*/false);
+        }
+      } else
+        CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
+    }
+    ++FI;
+  }
+}
+
+/// Check if duplication function is required for taskloops.
+static bool checkInitIsRequired(CodeGenFunction &CGF,
+                                ArrayRef<PrivateDataTy> Privates) {
+  bool InitRequired = false;
+  for (auto &&Pair : Privates) {
+    auto *VD = Pair.second.PrivateCopy;
+    auto *Init = VD->getAnyInitializer();
+    InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) &&
+                                    !CGF.isTrivialInitializer(Init));
+  }
+  return InitRequired;
+}
+
+
+/// Emit task_dup function (for initialization of
+/// private/firstprivate/lastprivate vars and last_iter flag)
+/// \code
+/// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
+/// lastpriv) {
+/// // setup lastprivate flag
+///    task_dst->last = lastpriv;
+/// // could be constructor calls here...
+/// }
+/// \endcode
+static llvm::Value *
+emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc,
+                    const OMPExecutableDirective &D,
+                    QualType KmpTaskTWithPrivatesPtrQTy,
+                    const RecordDecl *KmpTaskTWithPrivatesQTyRD,
+                    const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
+                    QualType SharedsPtrTy, const OMPTaskDataTy &Data,
+                    ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
+  auto &C = CGM.getContext();
+  FunctionArgList Args;
+  ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc,
+                           /*Id=*/nullptr, KmpTaskTWithPrivatesPtrQTy);
+  ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc,
+                           /*Id=*/nullptr, KmpTaskTWithPrivatesPtrQTy);
+  ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc,
+                                /*Id=*/nullptr, C.IntTy);
+  Args.push_back(&DstArg);
+  Args.push_back(&SrcArg);
+  Args.push_back(&LastprivArg);
+  auto &TaskDupFnInfo =
+      CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
+  auto *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
+  auto *TaskDup =
+      llvm::Function::Create(TaskDupTy, llvm::GlobalValue::InternalLinkage,
+                             ".omp_task_dup.", &CGM.getModule());
+  CGM.SetInternalFunctionAttributes(/*D=*/nullptr, TaskDup, TaskDupFnInfo);
+  CodeGenFunction CGF(CGM);
+  CGF.disableDebugInfo();
+  CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args);
+
+  LValue TDBase = CGF.EmitLoadOfPointerLValue(
+      CGF.GetAddrOfLocalVar(&DstArg),
+      KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
+  // task_dst->liter = lastpriv;
+  if (WithLastIter) {
+    auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
+    LValue Base = CGF.EmitLValueForField(
+        TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
+    LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
+    llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
+        CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
+    CGF.EmitStoreOfScalar(Lastpriv, LILVal);
+  }
+
+  // Emit initial values for private copies (if any).
+  assert(!Privates.empty());
+  Address KmpTaskSharedsPtr = Address::invalid();
+  if (!Data.FirstprivateVars.empty()) {
+    LValue TDBase = CGF.EmitLoadOfPointerLValue(
+        CGF.GetAddrOfLocalVar(&SrcArg),
+        KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
+    LValue Base = CGF.EmitLValueForField(
+        TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
+    KmpTaskSharedsPtr = Address(
+        CGF.EmitLoadOfScalar(CGF.EmitLValueForField(
+                                 Base, *std::next(KmpTaskTQTyRD->field_begin(),
+                                                  KmpTaskTShareds)),
+                             Loc),
+        CGF.getNaturalTypeAlignment(SharedsTy));
+  }
+  emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
+                   SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
+  CGF.FinishFunction();
+  return TaskDup;
+}
+
+/// Checks if destructor function is required to be generated.
+/// \return true if cleanups are required, false otherwise.
+static bool
+checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD) {
+  bool NeedsCleanup = false;
+  auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
+  auto *PrivateRD = cast<RecordDecl>(FI->getType()->getAsTagDecl());
+  for (auto *FD : PrivateRD->fields()) {
+    NeedsCleanup = NeedsCleanup || FD->getType().isDestructedType();
+    if (NeedsCleanup)
+      break;
+  }
+  return NeedsCleanup;
+}
+
+CGOpenMPRuntime::TaskResultTy
+CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
+                              const OMPExecutableDirective &D,
+                              llvm::Value *TaskFunction, QualType SharedsTy,
+                              Address Shareds, const OMPTaskDataTy &Data) {
   auto &C = CGM.getContext();
-  llvm::SmallVector<PrivateDataTy, 8> Privates;
+  llvm::SmallVector<PrivateDataTy, 4> Privates;
   // Aggregate privates and sort them by the alignment.
-  auto I = PrivateCopies.begin();
-  for (auto *E : PrivateVars) {
+  auto I = Data.PrivateCopies.begin();
+  for (auto *E : Data.PrivateVars) {
     auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
     Privates.push_back(std::make_pair(
         C.getDeclAlign(VD),
@@ -2776,16 +3695,26 @@ void CGOpenMPRuntime::emitTaskCall(
                          /*PrivateElemInit=*/nullptr)));
     ++I;
   }
-  I = FirstprivateCopies.begin();
-  auto IElemInitRef = FirstprivateInits.begin();
-  for (auto *E : FirstprivateVars) {
+  I = Data.FirstprivateCopies.begin();
+  auto IElemInitRef = Data.FirstprivateInits.begin();
+  for (auto *E : Data.FirstprivateVars) {
     auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
     Privates.push_back(std::make_pair(
         C.getDeclAlign(VD),
         PrivateHelpersTy(
             VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
             cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl()))));
-    ++I, ++IElemInitRef;
+    ++I;
+    ++IElemInitRef;
+  }
+  I = Data.LastprivateCopies.begin();
+  for (auto *E : Data.LastprivateVars) {
+    auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
+    Privates.push_back(std::make_pair(
+        C.getDeclAlign(VD),
+        PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
+                         /*PrivateElemInit=*/nullptr)));
+    ++I;
   }
   llvm::array_pod_sort(Privates.begin(), Privates.end(),
                        array_pod_sort_comparator);
@@ -2794,8 +3723,8 @@ void CGOpenMPRuntime::emitTaskCall(
   emitKmpRoutineEntryT(KmpInt32Ty);
   // Build type kmp_task_t (if not built yet).
   if (KmpTaskTQTy.isNull()) {
-    KmpTaskTQTy = C.getRecordType(
-        createKmpTaskTRecordDecl(CGM, KmpInt32Ty, KmpRoutineEntryPtrQTy));
+    KmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl(
+        CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
   }
   auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
   // Build particular struct kmp_task_t for the given task.
@@ -2806,7 +3735,7 @@ void CGOpenMPRuntime::emitTaskCall(
       C.getPointerType(KmpTaskTWithPrivatesQTy);
   auto *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
   auto *KmpTaskTWithPrivatesPtrTy = KmpTaskTWithPrivatesTy->getPointerTo();
-  auto *KmpTaskTWithPrivatesTySize = getTypeSize(CGF, KmpTaskTWithPrivatesQTy);
+  auto *KmpTaskTWithPrivatesTySize = CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
   QualType SharedsPtrTy = C.getPointerType(SharedsTy);
 
   // Emit initial values for private copies (if any).
@@ -2818,7 +3747,8 @@ void CGOpenMPRuntime::emitTaskCall(
   if (!Privates.empty()) {
     auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
     TaskPrivatesMap = emitTaskPrivateMappingFunction(
-        CGM, Loc, PrivateVars, FirstprivateVars, FI->getType(), Privates);
+        CGM, Loc, Data.PrivateVars, Data.FirstprivateVars, Data.LastprivateVars,
+        FI->getType(), Privates);
     TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
         TaskPrivatesMap, TaskPrivatesMapTy);
   } else {
@@ -2828,8 +3758,9 @@ void CGOpenMPRuntime::emitTaskCall(
   // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
   // kmp_task_t *tt);
   auto *TaskEntry = emitProxyTaskFunction(
-      CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTy,
-      KmpTaskTQTy, SharedsPtrTy, TaskFunction, TaskPrivatesMap);
+      CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
+      KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
+      TaskPrivatesMap);
 
   // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
@@ -2837,15 +3768,27 @@ void CGOpenMPRuntime::emitTaskCall(
   // Task flags. Format is taken from
   // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h,
   // description of kmp_tasking_flags struct.
-  const unsigned TiedFlag = 0x1;
-  const unsigned FinalFlag = 0x2;
-  unsigned Flags = Tied ? TiedFlag : 0;
+  enum {
+    TiedFlag = 0x1,
+    FinalFlag = 0x2,
+    DestructorsFlag = 0x8,
+    PriorityFlag = 0x20
+  };
+  unsigned Flags = Data.Tied ? TiedFlag : 0;
+  bool NeedsCleanup = false;
+  if (!Privates.empty()) {
+    NeedsCleanup = checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD);
+    if (NeedsCleanup)
+      Flags = Flags | DestructorsFlag;
+  }
+  if (Data.Priority.getInt())
+    Flags = Flags | PriorityFlag;
   auto *TaskFlags =
-      Final.getPointer()
-          ? CGF.Builder.CreateSelect(Final.getPointer(),
+      Data.Final.getPointer()
+          ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
                                      CGF.Builder.getInt32(FinalFlag),
                                      CGF.Builder.getInt32(/*C=*/0))
-          : CGF.Builder.getInt32(Final.getInt() ? FinalFlag : 0);
+          : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
   TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
   auto *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
   llvm::Value *AllocArgs[] = {emitUpdateLocation(CGF, Loc),
@@ -2875,96 +3818,71 @@ void CGOpenMPRuntime::emitTaskCall(
     CGF.EmitAggregateCopy(KmpTaskSharedsPtr, Shareds, SharedsTy);
   }
   // Emit initial values for private copies (if any).
-  bool NeedsCleanup = false;
+  TaskResultTy Result;
   if (!Privates.empty()) {
-    auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
-    auto PrivatesBase = CGF.EmitLValueForField(Base, *FI);
-    FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
-    LValue SharedsBase;
-    if (!FirstprivateVars.empty()) {
-      SharedsBase = CGF.MakeAddrLValue(
-          CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
-              KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)),
-          SharedsTy);
-    }
-    CodeGenFunction::CGCapturedStmtInfo CapturesInfo(
-        cast<CapturedStmt>(*D.getAssociatedStmt()));
-    for (auto &&Pair : Privates) {
-      auto *VD = Pair.second.PrivateCopy;
-      auto *Init = VD->getAnyInitializer();
-      LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
-      if (Init) {
-        if (auto *Elem = Pair.second.PrivateElemInit) {
-          auto *OriginalVD = Pair.second.Original;
-          auto *SharedField = CapturesInfo.lookup(OriginalVD);
-          auto SharedRefLValue =
-              CGF.EmitLValueForField(SharedsBase, SharedField);
-          SharedRefLValue = CGF.MakeAddrLValue(
-              Address(SharedRefLValue.getPointer(), C.getDeclAlign(OriginalVD)),
-              SharedRefLValue.getType(), AlignmentSource::Decl);
-          QualType Type = OriginalVD->getType();
-          if (Type->isArrayType()) {
-            // Initialize firstprivate array.
-            if (!isa<CXXConstructExpr>(Init) ||
-                CGF.isTrivialInitializer(Init)) {
-              // Perform simple memcpy.
-              CGF.EmitAggregateAssign(PrivateLValue.getAddress(),
-                                      SharedRefLValue.getAddress(), Type);
-            } else {
-              // Initialize firstprivate array using element-by-element
-              // intialization.
-              CGF.EmitOMPAggregateAssign(
-                  PrivateLValue.getAddress(), SharedRefLValue.getAddress(),
-                  Type, [&CGF, Elem, Init, &CapturesInfo](
-                            Address DestElement, Address SrcElement) {
-                    // Clean up any temporaries needed by the initialization.
-                    CodeGenFunction::OMPPrivateScope InitScope(CGF);
-                    InitScope.addPrivate(Elem, [SrcElement]() -> Address {
-                      return SrcElement;
-                    });
-                    (void)InitScope.Privatize();
-                    // Emit initialization for single element.
-                    CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
-                        CGF, &CapturesInfo);
-                    CGF.EmitAnyExprToMem(Init, DestElement,
-                                         Init->getType().getQualifiers(),
-                                         /*IsInitializer=*/false);
-                  });
-            }
-          } else {
-            CodeGenFunction::OMPPrivateScope InitScope(CGF);
-            InitScope.addPrivate(Elem, [SharedRefLValue]() -> Address {
-              return SharedRefLValue.getAddress();
-            });
-            (void)InitScope.Privatize();
-            CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
-            CGF.EmitExprAsInit(Init, VD, PrivateLValue,
-                               /*capturedByInit=*/false);
-          }
-        } else {
-          CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
-        }
-      }
-      NeedsCleanup = NeedsCleanup || FI->getType().isDestructedType();
-      ++FI;
+    emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
+                     SharedsTy, SharedsPtrTy, Data, Privates,
+                     /*ForDup=*/false);
+    if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
+        (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
+      Result.TaskDupFn = emitTaskDupFunction(
+          CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
+          KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
+          /*WithLastIter=*/!Data.LastprivateVars.empty());
     }
   }
+  // Fields of union "kmp_cmplrdata_t" for destructors and priority.
+  enum { Priority = 0, Destructors = 1 };
   // Provide pointer to function with destructors for privates.
-  llvm::Value *DestructorFn =
-      NeedsCleanup ? emitDestructorsFunction(CGM, Loc, KmpInt32Ty,
-                                             KmpTaskTWithPrivatesPtrQTy,
-                                             KmpTaskTWithPrivatesQTy)
-                   : llvm::ConstantPointerNull::get(
-                         cast<llvm::PointerType>(KmpRoutineEntryPtrTy));
-  LValue Destructor = CGF.EmitLValueForField(
-      TDBase, *std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTDestructors));
-  CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
-                            DestructorFn, KmpRoutineEntryPtrTy),
-                        Destructor);
+  auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
+  auto *KmpCmplrdataUD = (*FI)->getType()->getAsUnionType()->getDecl();
+  if (NeedsCleanup) {
+    llvm::Value *DestructorFn = emitDestructorsFunction(
+        CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
+        KmpTaskTWithPrivatesQTy);
+    LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
+    LValue DestructorsLV = CGF.EmitLValueForField(
+        Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
+    CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+                              DestructorFn, KmpRoutineEntryPtrTy),
+                          DestructorsLV);
+  }
+  // Set priority.
+  if (Data.Priority.getInt()) {
+    LValue Data2LV = CGF.EmitLValueForField(
+        TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
+    LValue PriorityLV = CGF.EmitLValueForField(
+        Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
+    CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
+  }
+  Result.NewTask = NewTask;
+  Result.TaskEntry = TaskEntry;
+  Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
+  Result.TDBase = TDBase;
+  Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
+  return Result;
+}
+
+void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
+                                   const OMPExecutableDirective &D,
+                                   llvm::Value *TaskFunction,
+                                   QualType SharedsTy, Address Shareds,
+                                   const Expr *IfCond,
+                                   const OMPTaskDataTy &Data) {
+  if (!CGF.HaveInsertPoint())
+    return;
 
+  TaskResultTy Result =
+      emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
+  llvm::Value *NewTask = Result.NewTask;
+  llvm::Value *TaskEntry = Result.TaskEntry;
+  llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
+  LValue TDBase = Result.TDBase;
+  RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
+  auto &C = CGM.getContext();
   // Process list of dependences.
   Address DependenciesArray = Address::invalid();
-  unsigned NumDependencies = Dependences.size();
+  unsigned NumDependencies = Data.Dependences.size();
   if (NumDependencies) {
     // Dependence kind for RTL.
     enum RTLDependenceKindTy { DepIn = 0x01, DepInOut = 0x3 };
@@ -2981,18 +3899,18 @@ void CGOpenMPRuntime::emitTaskCall(
       addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
       KmpDependInfoRD->completeDefinition();
       KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
-    } else {
+    } else
       KmpDependInfoRD = cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
-    }
     CharUnits DependencySize = C.getTypeSizeInChars(KmpDependInfoTy);
     // Define type kmp_depend_info[<Dependences.size()>];
     QualType KmpDependInfoArrayTy = C.getConstantArrayType(
         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies),
         ArrayType::Normal, /*IndexTypeQuals=*/0);
     // kmp_depend_info[<Dependences.size()>] deps;
-    DependenciesArray = CGF.CreateMemTemp(KmpDependInfoArrayTy);
+    DependenciesArray =
+        CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
     for (unsigned i = 0; i < NumDependencies; ++i) {
-      const Expr *E = Dependences[i].second;
+      const Expr *E = Data.Dependences[i].second;
       auto Addr = CGF.EmitLValue(E);
       llvm::Value *Size;
       QualType Ty = E->getType();
@@ -3006,7 +3924,7 @@ void CGOpenMPRuntime::emitTaskCall(
         llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGM.SizeTy);
         Size = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
       } else
-        Size = getTypeSize(CGF, Ty);
+        Size = CGF.getTypeSize(Ty);
       auto Base = CGF.MakeAddrLValue(
           CGF.Builder.CreateConstArrayGEP(DependenciesArray, i, DependencySize),
           KmpDependInfoTy);
@@ -3022,7 +3940,7 @@ void CGOpenMPRuntime::emitTaskCall(
       CGF.EmitStoreOfScalar(Size, LenLVal);
       // deps[i].flags = <Dependences[i].first>;
       RTLDependenceKindTy DepKind;
-      switch (Dependences[i].first) {
+      switch (Data.Dependences[i].first) {
       case OMPC_DEPEND_in:
         DepKind = DepIn;
         break;
@@ -3048,8 +3966,6 @@ void CGOpenMPRuntime::emitTaskCall(
 
   // NOTE: routine and part_id fields are intialized by __kmpc_omp_task_alloc()
   // libcall.
-  // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
-  // *new_task);
   // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
   // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
   // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
@@ -3067,19 +3983,26 @@ void CGOpenMPRuntime::emitTaskCall(
     DepTaskArgs[5] = CGF.Builder.getInt32(0);
     DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
   }
-  auto &&ThenCodeGen = [this, NumDependencies,
-                        &TaskArgs, &DepTaskArgs](CodeGenFunction &CGF) {
-    // TODO: add check for untied tasks.    
+  auto &&ThenCodeGen = [this, Loc, &Data, TDBase, KmpTaskTQTyRD,
+                        NumDependencies, &TaskArgs,
+                        &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
+    if (!Data.Tied) {
+      auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
+      auto PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
+      CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
+    }
     if (NumDependencies) {
-      CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task_with_deps),
-                          DepTaskArgs);
+      CGF.EmitRuntimeCall(
+          createRuntimeFunction(OMPRTL__kmpc_omp_task_with_deps), DepTaskArgs);
     } else {
       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task),
                           TaskArgs);
     }
+    // Check if parent region is untied and build return for untied task;
+    if (auto *Region =
+            dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
+      Region->emitUntiedSwitch(CGF);
   };
-  typedef CallEndCleanup<std::extent<decltype(TaskArgs)>::value>
-      IfCallEndCleanup;
 
   llvm::Value *DepWaitTaskArgs[6];
   if (NumDependencies) {
@@ -3090,40 +4013,111 @@ void CGOpenMPRuntime::emitTaskCall(
     DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
     DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
   }
-  auto &&ElseCodeGen = [this, &TaskArgs, ThreadID, NewTaskNewTaskTTy, TaskEntry,
-                        NumDependencies, &DepWaitTaskArgs](CodeGenFunction &CGF) {
+  auto &&ElseCodeGen = [&TaskArgs, ThreadID, NewTaskNewTaskTTy, TaskEntry,
+                        NumDependencies, &DepWaitTaskArgs](CodeGenFunction &CGF,
+                                                           PrePostActionTy &) {
+    auto &RT = CGF.CGM.getOpenMPRuntime();
     CodeGenFunction::RunCleanupsScope LocalScope(CGF);
     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
     // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
     // is specified.
     if (NumDependencies)
-      CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_wait_deps),
+      CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__kmpc_omp_wait_deps),
                           DepWaitTaskArgs);
+    // Call proxy_task_entry(gtid, new_task);
+    auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy](
+        CodeGenFunction &CGF, PrePostActionTy &Action) {
+      Action.Enter(CGF);
+      llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
+      CGF.EmitCallOrInvoke(TaskEntry, OutlinedFnArgs);
+    };
+
     // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
     // kmp_task_t *new_task);
-    CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task_begin_if0),
-                        TaskArgs);
     // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
     // kmp_task_t *new_task);
-    CGF.EHStack.pushCleanup<IfCallEndCleanup>(
-        NormalAndEHCleanup,
-        createRuntimeFunction(OMPRTL__kmpc_omp_task_complete_if0),
-        llvm::makeArrayRef(TaskArgs));
-
-    // Call proxy_task_entry(gtid, new_task);
-    llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
-    CGF.EmitCallOrInvoke(TaskEntry, OutlinedFnArgs);
+    RegionCodeGenTy RCG(CodeGen);
+    CommonActionTy Action(
+        RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_begin_if0), TaskArgs,
+        RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_complete_if0), TaskArgs);
+    RCG.setAction(Action);
+    RCG(CGF);
   };
 
-  if (IfCond) {
+  if (IfCond)
     emitOMPIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
-  } else {
-    CodeGenFunction::RunCleanupsScope Scope(CGF);
-    ThenCodeGen(CGF);
+  else {
+    RegionCodeGenTy ThenRCG(ThenCodeGen);
+    ThenRCG(CGF);
   }
 }
 
+void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
+                                       const OMPLoopDirective &D,
+                                       llvm::Value *TaskFunction,
+                                       QualType SharedsTy, Address Shareds,
+                                       const Expr *IfCond,
+                                       const OMPTaskDataTy &Data) {
+  if (!CGF.HaveInsertPoint())
+    return;
+  TaskResultTy Result =
+      emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
+  // NOTE: routine and part_id fields are intialized by __kmpc_omp_task_alloc()
+  // libcall.
+  // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
+  // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
+  // sched, kmp_uint64 grainsize, void *task_dup);
+  llvm::Value *ThreadID = getThreadID(CGF, Loc);
+  llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
+  llvm::Value *IfVal;
+  if (IfCond) {
+    IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
+                                      /*isSigned=*/true);
+  } else
+    IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
+
+  LValue LBLVal = CGF.EmitLValueForField(
+      Result.TDBase,
+      *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
+  auto *LBVar =
+      cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
+  CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(), LBLVal.getQuals(),
+                       /*IsInitializer=*/true);
+  LValue UBLVal = CGF.EmitLValueForField(
+      Result.TDBase,
+      *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
+  auto *UBVar =
+      cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
+  CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(), UBLVal.getQuals(),
+                       /*IsInitializer=*/true);
+  LValue StLVal = CGF.EmitLValueForField(
+      Result.TDBase,
+      *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
+  auto *StVar =
+      cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
+  CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(), StLVal.getQuals(),
+                       /*IsInitializer=*/true);
+  enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
+  llvm::Value *TaskArgs[] = {
+      UpLoc, ThreadID, Result.NewTask, IfVal, LBLVal.getPointer(),
+      UBLVal.getPointer(), CGF.EmitLoadOfScalar(StLVal, SourceLocation()),
+      llvm::ConstantInt::getSigned(CGF.IntTy, Data.Nogroup ? 1 : 0),
+      llvm::ConstantInt::getSigned(
+          CGF.IntTy, Data.Schedule.getPointer()
+                         ? Data.Schedule.getInt() ? NumTasks : Grainsize
+                         : NoSchedule),
+      Data.Schedule.getPointer()
+          ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
+                                      /*isSigned=*/false)
+          : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
+      Result.TaskDupFn
+          ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Result.TaskDupFn,
+                                                            CGF.VoidPtrTy)
+          : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
+  CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_taskloop), TaskArgs);
+}
+
 /// \brief Emit reduction operation for each element of array (required for
 /// array sections) LHS op = RHS.
 /// \param Type Type of array.
@@ -3204,6 +4198,26 @@ static void EmitOMPAggregateReduction(
   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
 }
 
+/// Emit reduction combiner. If the combiner is a simple expression emit it as
+/// is, otherwise consider it as combiner of UDR decl and emit it as a call of
+/// UDR combiner function.
+static void emitReductionCombiner(CodeGenFunction &CGF,
+                                  const Expr *ReductionOp) {
+  if (auto *CE = dyn_cast<CallExpr>(ReductionOp))
+    if (auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
+      if (auto *DRE =
+              dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
+        if (auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {
+          std::pair<llvm::Function *, llvm::Function *> Reduction =
+              CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
+          RValue Func = RValue::get(Reduction.first);
+          CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
+          CGF.EmitIgnoredExpr(ReductionOp);
+          return;
+        }
+  CGF.EmitIgnoredExpr(ReductionOp);
+}
+
 static llvm::Value *emitReductionFunction(CodeGenModule &CGM,
                                           llvm::Type *ArgsType,
                                           ArrayRef<const Expr *> Privates,
@@ -3220,9 +4234,7 @@ static llvm::Value *emitReductionFunction(CodeGenModule &CGM,
                            C.VoidPtrTy);
   Args.push_back(&LHSArg);
   Args.push_back(&RHSArg);
-  FunctionType::ExtInfo EI;
-  auto &CGFI = CGM.getTypes().arrangeFreeFunctionDeclaration(
-      C.VoidTy, Args, EI, /*isVariadic=*/false);
+  auto &CGFI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
   auto *Fn = llvm::Function::Create(
       CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage,
       ".omp.reduction.reduction_func", &CGM.getModule());
@@ -3255,17 +4267,16 @@ static llvm::Value *emitReductionFunction(CodeGenModule &CGM,
       return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar);
     });
     QualType PrivTy = (*IPriv)->getType();
-    if (PrivTy->isArrayType()) {
+    if (PrivTy->isVariablyModifiedType()) {
       // Get array size and emit VLA type.
       ++Idx;
       Address Elem =
           CGF.Builder.CreateConstArrayGEP(LHS, Idx, CGF.getPointerSize());
       llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
+      auto *VLA = CGF.getContext().getAsVariableArrayType(PrivTy);
+      auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
       CodeGenFunction::OpaqueValueMapping OpaqueMap(
-          CGF,
-          cast<OpaqueValueExpr>(
-              CGF.getContext().getAsVariableArrayType(PrivTy)->getSizeExpr()),
-          RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
+          CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
       CGF.EmitVariablyModifiedType(PrivTy);
     }
   }
@@ -3278,20 +4289,42 @@ static llvm::Value *emitReductionFunction(CodeGenModule &CGM,
       // Emit reduction for array section.
       auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
       auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
-      EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
-                                [=](CodeGenFunction &CGF, const Expr *,
-                                    const Expr *,
-                                    const Expr *) { CGF.EmitIgnoredExpr(E); });
+      EmitOMPAggregateReduction(
+          CGF, (*IPriv)->getType(), LHSVar, RHSVar,
+          [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
+            emitReductionCombiner(CGF, E);
+          });
     } else
       // Emit reduction for array subscript or single variable.
-      CGF.EmitIgnoredExpr(E);
-    ++IPriv, ++ILHS, ++IRHS;
+      emitReductionCombiner(CGF, E);
+    ++IPriv;
+    ++ILHS;
+    ++IRHS;
   }
   Scope.ForceCleanup();
   CGF.FinishFunction();
   return Fn;
 }
 
+static void emitSingleReductionCombiner(CodeGenFunction &CGF,
+                                        const Expr *ReductionOp,
+                                        const Expr *PrivateRef,
+                                        const DeclRefExpr *LHS,
+                                        const DeclRefExpr *RHS) {
+  if (PrivateRef->getType()->isArrayType()) {
+    // Emit reduction for array section.
+    auto *LHSVar = cast<VarDecl>(LHS->getDecl());
+    auto *RHSVar = cast<VarDecl>(RHS->getDecl());
+    EmitOMPAggregateReduction(
+        CGF, PrivateRef->getType(), LHSVar, RHSVar,
+        [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
+          emitReductionCombiner(CGF, ReductionOp);
+        });
+  } else
+    // Emit reduction for array subscript or single variable.
+    emitReductionCombiner(CGF, ReductionOp);
+}
+
 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
                                     ArrayRef<const Expr *> Privates,
                                     ArrayRef<const Expr *> LHSExprs,
@@ -3343,16 +4376,11 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
     auto ILHS = LHSExprs.begin();
     auto IRHS = RHSExprs.begin();
     for (auto *E : ReductionOps) {
-      if ((*IPriv)->getType()->isArrayType()) {
-        auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
-        auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
-        EmitOMPAggregateReduction(
-            CGF, (*IPriv)->getType(), LHSVar, RHSVar,
-            [=](CodeGenFunction &CGF, const Expr *, const Expr *,
-                const Expr *) { CGF.EmitIgnoredExpr(E); });
-      } else
-        CGF.EmitIgnoredExpr(E);
-      ++IPriv, ++ILHS, ++IRHS;
+      emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
+                                  cast<DeclRefExpr>(*IRHS));
+      ++IPriv;
+      ++ILHS;
+      ++IRHS;
     }
     return;
   }
@@ -3361,7 +4389,7 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
   // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
   auto Size = RHSExprs.size();
   for (auto *E : Privates) {
-    if (E->getType()->isArrayType())
+    if (E->getType()->isVariablyModifiedType())
       // Reserve place for array size.
       ++Size;
   }
@@ -3380,20 +4408,18 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
             CGF.EmitLValue(RHSExprs[I]).getPointer(), CGF.VoidPtrTy),
         Elem);
-    if ((*IPriv)->getType()->isArrayType()) {
+    if ((*IPriv)->getType()->isVariablyModifiedType()) {
       // Store array size.
       ++Idx;
       Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx,
                                              CGF.getPointerSize());
-      CGF.Builder.CreateStore(
-          CGF.Builder.CreateIntToPtr(
-              CGF.Builder.CreateIntCast(
-                  CGF.getVLASize(CGF.getContext().getAsVariableArrayType(
-                                     (*IPriv)->getType()))
-                      .first,
-                  CGF.SizeTy, /*isSigned=*/false),
-              CGF.VoidPtrTy),
-          Elem);
+      llvm::Value *Size = CGF.Builder.CreateIntCast(
+          CGF.getVLASize(
+                 CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
+              .first,
+          CGF.SizeTy, /*isSigned=*/false);
+      CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
+                              Elem);
     }
   }
 
@@ -3407,11 +4433,9 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
 
   // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
   // RedList, reduce_func, &<lock>);
-  auto *IdentTLoc = emitUpdateLocation(
-      CGF, Loc,
-      static_cast<OpenMPLocationFlags>(OMP_IDENT_KMPC | OMP_ATOMIC_REDUCE));
+  auto *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
   auto *ThreadId = getThreadID(CGF, Loc);
-  auto *ReductionArrayTySize = getTypeSize(CGF, ReductionArrayTy);
+  auto *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
   auto *RL =
     CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(ReductionList.getPointer(),
                                                     CGF.VoidPtrTy);
@@ -3443,38 +4467,33 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
   SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
   CGF.EmitBlock(Case1BB);
 
-  {
-    CodeGenFunction::RunCleanupsScope Scope(CGF);
-    // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
-    llvm::Value *EndArgs[] = {
-        IdentTLoc, // ident_t *<loc>
-        ThreadId,  // i32 <gtid>
-        Lock       // kmp_critical_name *&<lock>
-    };
-    CGF.EHStack
-        .pushCleanup<CallEndCleanup<std::extent<decltype(EndArgs)>::value>>(
-            NormalAndEHCleanup,
-            createRuntimeFunction(WithNowait ? OMPRTL__kmpc_end_reduce_nowait
-                                             : OMPRTL__kmpc_end_reduce),
-            llvm::makeArrayRef(EndArgs));
+  // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
+  llvm::Value *EndArgs[] = {
+      IdentTLoc, // ident_t *<loc>
+      ThreadId,  // i32 <gtid>
+      Lock       // kmp_critical_name *&<lock>
+  };
+  auto &&CodeGen = [&Privates, &LHSExprs, &RHSExprs, &ReductionOps](
+      CodeGenFunction &CGF, PrePostActionTy &Action) {
     auto IPriv = Privates.begin();
     auto ILHS = LHSExprs.begin();
     auto IRHS = RHSExprs.begin();
     for (auto *E : ReductionOps) {
-      if ((*IPriv)->getType()->isArrayType()) {
-        // Emit reduction for array section.
-        auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
-        auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
-        EmitOMPAggregateReduction(
-            CGF, (*IPriv)->getType(), LHSVar, RHSVar,
-            [=](CodeGenFunction &CGF, const Expr *, const Expr *,
-                const Expr *) { CGF.EmitIgnoredExpr(E); });
-      } else
-        // Emit reduction for array subscript or single variable.
-        CGF.EmitIgnoredExpr(E);
-      ++IPriv, ++ILHS, ++IRHS;
+      emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
+                                  cast<DeclRefExpr>(*IRHS));
+      ++IPriv;
+      ++ILHS;
+      ++IRHS;
     }
-  }
+  };
+  RegionCodeGenTy RCG(CodeGen);
+  CommonActionTy Action(
+      nullptr, llvm::None,
+      createRuntimeFunction(WithNowait ? OMPRTL__kmpc_end_reduce_nowait
+                                       : OMPRTL__kmpc_end_reduce),
+      EndArgs);
+  RCG.setAction(Action);
+  RCG(CGF);
 
   CGF.EmitBranch(DefaultBB);
 
@@ -3487,103 +4506,113 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
   SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
   CGF.EmitBlock(Case2BB);
 
-  {
-    CodeGenFunction::RunCleanupsScope Scope(CGF);
-    if (!WithNowait) {
-      // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
-      llvm::Value *EndArgs[] = {
-          IdentTLoc, // ident_t *<loc>
-          ThreadId,  // i32 <gtid>
-          Lock       // kmp_critical_name *&<lock>
-      };
-      CGF.EHStack
-          .pushCleanup<CallEndCleanup<std::extent<decltype(EndArgs)>::value>>(
-              NormalAndEHCleanup,
-              createRuntimeFunction(OMPRTL__kmpc_end_reduce),
-              llvm::makeArrayRef(EndArgs));
-    }
+  auto &&AtomicCodeGen = [Loc, &Privates, &LHSExprs, &RHSExprs, &ReductionOps](
+      CodeGenFunction &CGF, PrePostActionTy &Action) {
     auto ILHS = LHSExprs.begin();
     auto IRHS = RHSExprs.begin();
     auto IPriv = Privates.begin();
     for (auto *E : ReductionOps) {
-        const Expr *XExpr = nullptr;
-        const Expr *EExpr = nullptr;
-        const Expr *UpExpr = nullptr;
-        BinaryOperatorKind BO = BO_Comma;
-        if (auto *BO = dyn_cast<BinaryOperator>(E)) {
-          if (BO->getOpcode() == BO_Assign) {
-            XExpr = BO->getLHS();
-            UpExpr = BO->getRHS();
-          }
+      const Expr *XExpr = nullptr;
+      const Expr *EExpr = nullptr;
+      const Expr *UpExpr = nullptr;
+      BinaryOperatorKind BO = BO_Comma;
+      if (auto *BO = dyn_cast<BinaryOperator>(E)) {
+        if (BO->getOpcode() == BO_Assign) {
+          XExpr = BO->getLHS();
+          UpExpr = BO->getRHS();
         }
-        // Try to emit update expression as a simple atomic.
-        auto *RHSExpr = UpExpr;
-        if (RHSExpr) {
-          // Analyze RHS part of the whole expression.
-          if (auto *ACO = dyn_cast<AbstractConditionalOperator>(
-                  RHSExpr->IgnoreParenImpCasts())) {
-            // If this is a conditional operator, analyze its condition for
-            // min/max reduction operator.
-            RHSExpr = ACO->getCond();
-          }
-          if (auto *BORHS =
-                  dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
-            EExpr = BORHS->getRHS();
-            BO = BORHS->getOpcode();
-          }
+      }
+      // Try to emit update expression as a simple atomic.
+      auto *RHSExpr = UpExpr;
+      if (RHSExpr) {
+        // Analyze RHS part of the whole expression.
+        if (auto *ACO = dyn_cast<AbstractConditionalOperator>(
+                RHSExpr->IgnoreParenImpCasts())) {
+          // If this is a conditional operator, analyze its condition for
+          // min/max reduction operator.
+          RHSExpr = ACO->getCond();
         }
-        if (XExpr) {
-          auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
-          auto &&AtomicRedGen = [this, BO, VD, IPriv,
-                                 Loc](CodeGenFunction &CGF, const Expr *XExpr,
-                                      const Expr *EExpr, const Expr *UpExpr) {
-            LValue X = CGF.EmitLValue(XExpr);
-            RValue E;
-            if (EExpr)
-              E = CGF.EmitAnyExpr(EExpr);
-            CGF.EmitOMPAtomicSimpleUpdateExpr(
-                X, E, BO, /*IsXLHSInRHSPart=*/true, llvm::Monotonic, Loc,
-                [&CGF, UpExpr, VD, IPriv, Loc](RValue XRValue) {
-                  CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
-                  PrivateScope.addPrivate(
-                      VD, [&CGF, VD, XRValue, Loc]() -> Address {
-                        Address LHSTemp = CGF.CreateMemTemp(VD->getType());
-                        CGF.emitOMPSimpleStore(
-                            CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
-                            VD->getType().getNonReferenceType(), Loc);
-                        return LHSTemp;
-                      });
-                  (void)PrivateScope.Privatize();
-                  return CGF.EmitAnyExpr(UpExpr);
-                });
-          };
-          if ((*IPriv)->getType()->isArrayType()) {
-            // Emit atomic reduction for array section.
-            auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
-            EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
-                                      AtomicRedGen, XExpr, EExpr, UpExpr);
-          } else
-            // Emit atomic reduction for array subscript or single variable.
-            AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
-        } else {
-          // Emit as a critical region.
-          auto &&CritRedGen = [this, E, Loc](CodeGenFunction &CGF, const Expr *,
-                                             const Expr *, const Expr *) {
-            emitCriticalRegion(
-                CGF, ".atomic_reduction",
-                [E](CodeGenFunction &CGF) { CGF.EmitIgnoredExpr(E); }, Loc);
-          };
-          if ((*IPriv)->getType()->isArrayType()) {
-            auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
-            auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
-            EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
-                                      CritRedGen);
-          } else
-            CritRedGen(CGF, nullptr, nullptr, nullptr);
+        if (auto *BORHS =
+                dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
+          EExpr = BORHS->getRHS();
+          BO = BORHS->getOpcode();
         }
-      ++ILHS, ++IRHS, ++IPriv;
+      }
+      if (XExpr) {
+        auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
+        auto &&AtomicRedGen = [BO, VD, IPriv,
+                               Loc](CodeGenFunction &CGF, const Expr *XExpr,
+                                    const Expr *EExpr, const Expr *UpExpr) {
+          LValue X = CGF.EmitLValue(XExpr);
+          RValue E;
+          if (EExpr)
+            E = CGF.EmitAnyExpr(EExpr);
+          CGF.EmitOMPAtomicSimpleUpdateExpr(
+              X, E, BO, /*IsXLHSInRHSPart=*/true,
+              llvm::AtomicOrdering::Monotonic, Loc,
+              [&CGF, UpExpr, VD, IPriv, Loc](RValue XRValue) {
+                CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
+                PrivateScope.addPrivate(
+                    VD, [&CGF, VD, XRValue, Loc]() -> Address {
+                      Address LHSTemp = CGF.CreateMemTemp(VD->getType());
+                      CGF.emitOMPSimpleStore(
+                          CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
+                          VD->getType().getNonReferenceType(), Loc);
+                      return LHSTemp;
+                    });
+                (void)PrivateScope.Privatize();
+                return CGF.EmitAnyExpr(UpExpr);
+              });
+        };
+        if ((*IPriv)->getType()->isArrayType()) {
+          // Emit atomic reduction for array section.
+          auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
+          EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
+                                    AtomicRedGen, XExpr, EExpr, UpExpr);
+        } else
+          // Emit atomic reduction for array subscript or single variable.
+          AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
+      } else {
+        // Emit as a critical region.
+        auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
+                                     const Expr *, const Expr *) {
+          auto &RT = CGF.CGM.getOpenMPRuntime();
+          RT.emitCriticalRegion(
+              CGF, ".atomic_reduction",
+              [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
+                Action.Enter(CGF);
+                emitReductionCombiner(CGF, E);
+              },
+              Loc);
+        };
+        if ((*IPriv)->getType()->isArrayType()) {
+          auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
+          auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
+          EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
+                                    CritRedGen);
+        } else
+          CritRedGen(CGF, nullptr, nullptr, nullptr);
+      }
+      ++ILHS;
+      ++IRHS;
+      ++IPriv;
     }
-  }
+  };
+  RegionCodeGenTy AtomicRCG(AtomicCodeGen);
+  if (!WithNowait) {
+    // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
+    llvm::Value *EndArgs[] = {
+        IdentTLoc, // ident_t *<loc>
+        ThreadId,  // i32 <gtid>
+        Lock       // kmp_critical_name *&<lock>
+    };
+    CommonActionTy Action(nullptr, llvm::None,
+                          createRuntimeFunction(OMPRTL__kmpc_end_reduce),
+                          EndArgs);
+    AtomicRCG.setAction(Action);
+    AtomicRCG(CGF);
+  } else
+    AtomicRCG(CGF);
 
   CGF.EmitBranch(DefaultBB);
   CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
@@ -3598,6 +4627,8 @@ void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
   // Ignore return result until untied tasks are supported.
   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskwait), Args);
+  if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
+    Region->emitUntiedSwitch(CGF);
 }
 
 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
@@ -3618,7 +4649,7 @@ enum RTCancelKind {
   CancelSections = 3,
   CancelTaskgroup = 4
 };
-}
+} // anonymous namespace
 
 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
   RTCancelKind CancelKind = CancelNoreq;
@@ -3680,14 +4711,15 @@ void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
   // kmp_int32 cncl_kind);
   if (auto *OMPRegionInfo =
           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
-    auto &&ThenGen = [this, Loc, CancelRegion,
-                      OMPRegionInfo](CodeGenFunction &CGF) {
+    auto &&ThenGen = [Loc, CancelRegion, OMPRegionInfo](CodeGenFunction &CGF,
+                                                        PrePostActionTy &) {
+      auto &RT = CGF.CGM.getOpenMPRuntime();
       llvm::Value *Args[] = {
-          emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
+          RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
       // Ignore return result until untied tasks are supported.
-      auto *Result =
-          CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_cancel), Args);
+      auto *Result = CGF.EmitRuntimeCall(
+          RT.createRuntimeFunction(OMPRTL__kmpc_cancel), Args);
       // if (__kmpc_cancel()) {
       //  __kmpc_cancel_barrier();
       //   exit from construct;
@@ -3698,7 +4730,7 @@ void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
       CGF.EmitBlock(ExitBB);
       // __kmpc_cancel_barrier();
-      emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
+      RT.emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
       // exit from construct;
       auto CancelDest =
           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
@@ -3706,18 +4738,21 @@ void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
     };
     if (IfCond)
-      emitOMPIfClause(CGF, IfCond, ThenGen, [](CodeGenFunction &) {});
-    else
-      ThenGen(CGF);
+      emitOMPIfClause(CGF, IfCond, ThenGen,
+                      [](CodeGenFunction &, PrePostActionTy &) {});
+    else {
+      RegionCodeGenTy ThenRCG(ThenGen);
+      ThenRCG(CGF);
+    }
   }
 }
 
 /// \brief Obtain information that uniquely identifies a target entry. This
-/// consists of the file and device IDs as well as line and column numbers
-/// associated with the relevant entry source location.
+/// consists of the file and device IDs as well as line number associated with
+/// the relevant entry source location.
 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc,
                                      unsigned &DeviceID, unsigned &FileID,
-                                     unsigned &LineNum, unsigned &ColumnNum) {
+                                     unsigned &LineNum) {
 
   auto &SM = C.getSourceManager();
 
@@ -3737,49 +4772,45 @@ static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc,
   DeviceID = ID.getDevice();
   FileID = ID.getFile();
   LineNum = PLoc.getLine();
-  ColumnNum = PLoc.getColumn();
-  return;
 }
 
 void CGOpenMPRuntime::emitTargetOutlinedFunction(
     const OMPExecutableDirective &D, StringRef ParentName,
     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
-    bool IsOffloadEntry) {
-
+    bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
   assert(!ParentName.empty() && "Invalid target region parent name!");
 
-  const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt());
-
-  // Emit target region as a standalone region.
-  auto &&CodeGen = [&CS](CodeGenFunction &CGF) {
-    CGF.EmitStmt(CS.getCapturedStmt());
-  };
+  emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
+                                   IsOffloadEntry, CodeGen);
+}
 
-  // Create a unique name for the proxy/entry function that using the source
-  // location information of the current target region. The name will be
-  // something like:
+void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
+    const OMPExecutableDirective &D, StringRef ParentName,
+    llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
+    bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
+  // Create a unique name for the entry function using the source location
+  // information of the current target region. The name will be something like:
   //
-  // .omp_offloading.DD_FFFF.PP.lBB.cCC
+  // __omp_offloading_DD_FFFF_PP_lBB
   //
   // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the
-  // mangled name of the function that encloses the target region, BB is the
-  // line number of the target region, and CC is the column number of the target
-  // region.
+  // mangled name of the function that encloses the target region and BB is the
+  // line number of the target region.
 
   unsigned DeviceID;
   unsigned FileID;
   unsigned Line;
-  unsigned Column;
   getTargetEntryUniqueInfo(CGM.getContext(), D.getLocStart(), DeviceID, FileID,
-                           Line, Column);
+                           Line);
   SmallString<64> EntryFnName;
   {
     llvm::raw_svector_ostream OS(EntryFnName);
-    OS << ".omp_offloading" << llvm::format(".%x", DeviceID)
-       << llvm::format(".%x.", FileID) << ParentName << ".l" << Line << ".c"
-       << Column;
+    OS << "__omp_offloading" << llvm::format("_%x", DeviceID)
+       << llvm::format("_%x_", FileID) << ParentName << "_l" << Line;
   }
 
+  const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt());
+
   CodeGenFunction CGF(CGM, true);
   CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
@@ -3813,18 +4844,122 @@ void CGOpenMPRuntime::emitTargetOutlinedFunction(
 
   // Register the information for the entry associated with this target region.
   OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
-      DeviceID, FileID, ParentName, Line, Column, OutlinedFn, OutlinedFnID);
-  return;
+      DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID);
 }
 
-void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF,
-                                     const OMPExecutableDirective &D,
-                                     llvm::Value *OutlinedFn,
-                                     llvm::Value *OutlinedFnID,
-                                     const Expr *IfCond, const Expr *Device,
-                                     ArrayRef<llvm::Value *> CapturedVars) {
-  if (!CGF.HaveInsertPoint())
-    return;
+/// discard all CompoundStmts intervening between two constructs
+static const Stmt *ignoreCompoundStmts(const Stmt *Body) {
+  while (auto *CS = dyn_cast_or_null<CompoundStmt>(Body))
+    Body = CS->body_front();
+
+  return Body;
+}
+
+/// \brief Emit the num_teams clause of an enclosed teams directive at the
+/// target region scope. If there is no teams directive associated with the
+/// target directive, or if there is no num_teams clause associated with the
+/// enclosed teams directive, return nullptr.
+static llvm::Value *
+emitNumTeamsClauseForTargetDirective(CGOpenMPRuntime &OMPRuntime,
+                                     CodeGenFunction &CGF,
+                                     const OMPExecutableDirective &D) {
+
+  assert(!CGF.getLangOpts().OpenMPIsDevice && "Clauses associated with the "
+                                              "teams directive expected to be "
+                                              "emitted only for the host!");
+
+  // FIXME: For the moment we do not support combined directives with target and
+  // teams, so we do not expect to get any num_teams clause in the provided
+  // directive. Once we support that, this assertion can be replaced by the
+  // actual emission of the clause expression.
+  assert(D.getSingleClause<OMPNumTeamsClause>() == nullptr &&
+         "Not expecting clause in directive.");
+
+  // If the current target region has a teams region enclosed, we need to get
+  // the number of teams to pass to the runtime function call. This is done
+  // by generating the expression in a inlined region. This is required because
+  // the expression is captured in the enclosing target environment when the
+  // teams directive is not combined with target.
+
+  const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt());
+
+  // FIXME: Accommodate other combined directives with teams when they become
+  // available.
+  if (auto *TeamsDir = dyn_cast_or_null<OMPTeamsDirective>(
+          ignoreCompoundStmts(CS.getCapturedStmt()))) {
+    if (auto *NTE = TeamsDir->getSingleClause<OMPNumTeamsClause>()) {
+      CGOpenMPInnerExprInfo CGInfo(CGF, CS);
+      CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
+      llvm::Value *NumTeams = CGF.EmitScalarExpr(NTE->getNumTeams());
+      return CGF.Builder.CreateIntCast(NumTeams, CGF.Int32Ty,
+                                       /*IsSigned=*/true);
+    }
+
+    // If we have an enclosed teams directive but no num_teams clause we use
+    // the default value 0.
+    return CGF.Builder.getInt32(0);
+  }
+
+  // No teams associated with the directive.
+  return nullptr;
+}
+
+/// \brief Emit the thread_limit clause of an enclosed teams directive at the
+/// target region scope. If there is no teams directive associated with the
+/// target directive, or if there is no thread_limit clause associated with the
+/// enclosed teams directive, return nullptr.
+static llvm::Value *
+emitThreadLimitClauseForTargetDirective(CGOpenMPRuntime &OMPRuntime,
+                                        CodeGenFunction &CGF,
+                                        const OMPExecutableDirective &D) {
+
+  assert(!CGF.getLangOpts().OpenMPIsDevice && "Clauses associated with the "
+                                              "teams directive expected to be "
+                                              "emitted only for the host!");
+
+  // FIXME: For the moment we do not support combined directives with target and
+  // teams, so we do not expect to get any thread_limit clause in the provided
+  // directive. Once we support that, this assertion can be replaced by the
+  // actual emission of the clause expression.
+  assert(D.getSingleClause<OMPThreadLimitClause>() == nullptr &&
+         "Not expecting clause in directive.");
+
+  // If the current target region has a teams region enclosed, we need to get
+  // the thread limit to pass to the runtime function call. This is done
+  // by generating the expression in a inlined region. This is required because
+  // the expression is captured in the enclosing target environment when the
+  // teams directive is not combined with target.
+
+  const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt());
+
+  // FIXME: Accommodate other combined directives with teams when they become
+  // available.
+  if (auto *TeamsDir = dyn_cast_or_null<OMPTeamsDirective>(
+          ignoreCompoundStmts(CS.getCapturedStmt()))) {
+    if (auto *TLE = TeamsDir->getSingleClause<OMPThreadLimitClause>()) {
+      CGOpenMPInnerExprInfo CGInfo(CGF, CS);
+      CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
+      llvm::Value *ThreadLimit = CGF.EmitScalarExpr(TLE->getThreadLimit());
+      return CGF.Builder.CreateIntCast(ThreadLimit, CGF.Int32Ty,
+                                       /*IsSigned=*/true);
+    }
+
+    // If we have an enclosed teams directive but no thread_limit clause we use
+    // the default value 0.
+    return CGF.Builder.getInt32(0);
+  }
+
+  // No teams associated with the directive.
+  return nullptr;
+}
+
+namespace {
+// \brief Utility to handle information from clauses associated with a given
+// construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
+// It provides a convenient interface to obtain the information and generate
+// code for that information.
+class MappableExprsHandler {
+public:
   /// \brief Values for bit flags used to specify the mapping type for
   /// offloading.
   enum OpenMPOffloadMappingFlags {
@@ -3832,105 +4967,806 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF,
     OMP_MAP_TO = 0x01,
     /// \brief Allocate memory on the device and move data from device to host.
     OMP_MAP_FROM = 0x02,
-    /// \brief The element passed to the device is a pointer.
-    OMP_MAP_PTR = 0x20,
+    /// \brief Always perform the requested mapping action on the element, even
+    /// if it was already mapped before.
+    OMP_MAP_ALWAYS = 0x04,
+    /// \brief Delete the element from the device environment, ignoring the
+    /// current reference count associated with the element.
+    OMP_MAP_DELETE = 0x08,
+    /// \brief The element being mapped is a pointer, therefore the pointee
+    /// should be mapped as well.
+    OMP_MAP_IS_PTR = 0x10,
+    /// \brief This flags signals that an argument is the first one relating to
+    /// a map/private clause expression. For some cases a single
+    /// map/privatization results in multiple arguments passed to the runtime
+    /// library.
+    OMP_MAP_FIRST_REF = 0x20,
+    /// \brief This flag signals that the reference being passed is a pointer to
+    /// private data.
+    OMP_MAP_PRIVATE_PTR = 0x80,
     /// \brief Pass the element to the device by value.
-    OMP_MAP_BYCOPY = 0x80,
+    OMP_MAP_PRIVATE_VAL = 0x100,
   };
 
-  enum OpenMPOffloadingReservedDeviceIDs {
-    /// \brief Device ID if the device was not defined, runtime should get it
-    /// from environment variables in the spec.
-    OMP_DEVICEID_UNDEF = -1,
-  };
+  typedef SmallVector<llvm::Value *, 16> MapValuesArrayTy;
+  typedef SmallVector<unsigned, 16> MapFlagsArrayTy;
+
+private:
+  /// \brief Directive from where the map clauses were extracted.
+  const OMPExecutableDirective &Directive;
+
+  /// \brief Function the directive is being generated for.
+  CodeGenFunction &CGF;
+
+  /// \brief Set of all first private variables in the current directive.
+  llvm::SmallPtrSet<const VarDecl *, 8> FirstPrivateDecls;
+
+  llvm::Value *getExprTypeSize(const Expr *E) const {
+    auto ExprTy = E->getType().getCanonicalType();
+
+    // Reference types are ignored for mapping purposes.
+    if (auto *RefTy = ExprTy->getAs<ReferenceType>())
+      ExprTy = RefTy->getPointeeType().getCanonicalType();
+
+    // Given that an array section is considered a built-in type, we need to
+    // do the calculation based on the length of the section instead of relying
+    // on CGF.getTypeSize(E->getType()).
+    if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) {
+      QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType(
+                            OAE->getBase()->IgnoreParenImpCasts())
+                            .getCanonicalType();
+
+      // If there is no length associated with the expression, that means we
+      // are using the whole length of the base.
+      if (!OAE->getLength() && OAE->getColonLoc().isValid())
+        return CGF.getTypeSize(BaseTy);
+
+      llvm::Value *ElemSize;
+      if (auto *PTy = BaseTy->getAs<PointerType>())
+        ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType());
+      else {
+        auto *ATy = cast<ArrayType>(BaseTy.getTypePtr());
+        assert(ATy && "Expecting array type if not a pointer type.");
+        ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType());
+      }
+
+      // If we don't have a length at this point, that is because we have an
+      // array section with a single element.
+      if (!OAE->getLength())
+        return ElemSize;
+
+      auto *LengthVal = CGF.EmitScalarExpr(OAE->getLength());
+      LengthVal =
+          CGF.Builder.CreateIntCast(LengthVal, CGF.SizeTy, /*isSigned=*/false);
+      return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
+    }
+    return CGF.getTypeSize(ExprTy);
+  }
+
+  /// \brief Return the corresponding bits for a given map clause modifier. Add
+  /// a flag marking the map as a pointer if requested. Add a flag marking the
+  /// map as the first one of a series of maps that relate to the same map
+  /// expression.
+  unsigned getMapTypeBits(OpenMPMapClauseKind MapType,
+                          OpenMPMapClauseKind MapTypeModifier, bool AddPtrFlag,
+                          bool AddIsFirstFlag) const {
+    unsigned Bits = 0u;
+    switch (MapType) {
+    case OMPC_MAP_alloc:
+    case OMPC_MAP_release:
+      // alloc and release is the default behavior in the runtime library,  i.e.
+      // if we don't pass any bits alloc/release that is what the runtime is
+      // going to do. Therefore, we don't need to signal anything for these two
+      // type modifiers.
+      break;
+    case OMPC_MAP_to:
+      Bits = OMP_MAP_TO;
+      break;
+    case OMPC_MAP_from:
+      Bits = OMP_MAP_FROM;
+      break;
+    case OMPC_MAP_tofrom:
+      Bits = OMP_MAP_TO | OMP_MAP_FROM;
+      break;
+    case OMPC_MAP_delete:
+      Bits = OMP_MAP_DELETE;
+      break;
+    default:
+      llvm_unreachable("Unexpected map type!");
+      break;
+    }
+    if (AddPtrFlag)
+      Bits |= OMP_MAP_IS_PTR;
+    if (AddIsFirstFlag)
+      Bits |= OMP_MAP_FIRST_REF;
+    if (MapTypeModifier == OMPC_MAP_always)
+      Bits |= OMP_MAP_ALWAYS;
+    return Bits;
+  }
+
+  /// \brief Return true if the provided expression is a final array section. A
+  /// final array section, is one whose length can't be proved to be one.
+  bool isFinalArraySectionExpression(const Expr *E) const {
+    auto *OASE = dyn_cast<OMPArraySectionExpr>(E);
+
+    // It is not an array section and therefore not a unity-size one.
+    if (!OASE)
+      return false;
+
+    // An array section with no colon always refer to a single element.
+    if (OASE->getColonLoc().isInvalid())
+      return false;
+
+    auto *Length = OASE->getLength();
+
+    // If we don't have a length we have to check if the array has size 1
+    // for this dimension. Also, we should always expect a length if the
+    // base type is pointer.
+    if (!Length) {
+      auto BaseQTy = OMPArraySectionExpr::getBaseOriginalType(
+                         OASE->getBase()->IgnoreParenImpCasts())
+                         .getCanonicalType();
+      if (auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))
+        return ATy->getSize().getSExtValue() != 1;
+      // If we don't have a constant dimension length, we have to consider
+      // the current section as having any size, so it is not necessarily
+      // unitary. If it happen to be unity size, that's user fault.
+      return true;
+    }
+
+    // Check if the length evaluates to 1.
+    llvm::APSInt ConstLength;
+    if (!Length->EvaluateAsInt(ConstLength, CGF.getContext()))
+      return true; // Can have more that size 1.
+
+    return ConstLength.getSExtValue() != 1;
+  }
+
+  /// \brief Generate the base pointers, section pointers, sizes and map type
+  /// bits for the provided map type, map modifier, and expression components.
+  /// \a IsFirstComponent should be set to true if the provided set of
+  /// components is the first associated with a capture.
+  void generateInfoForComponentList(
+      OpenMPMapClauseKind MapType, OpenMPMapClauseKind MapTypeModifier,
+      OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
+      MapValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
+      MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types,
+      bool IsFirstComponentList) const {
+
+    // The following summarizes what has to be generated for each map and the
+    // types bellow. The generated information is expressed in this order:
+    // base pointer, section pointer, size, flags
+    // (to add to the ones that come from the map type and modifier).
+    //
+    // double d;
+    // int i[100];
+    // float *p;
+    //
+    // struct S1 {
+    //   int i;
+    //   float f[50];
+    // }
+    // struct S2 {
+    //   int i;
+    //   float f[50];
+    //   S1 s;
+    //   double *p;
+    //   struct S2 *ps;
+    // }
+    // S2 s;
+    // S2 *ps;
+    //
+    // map(d)
+    // &d, &d, sizeof(double), noflags
+    //
+    // map(i)
+    // &i, &i, 100*sizeof(int), noflags
+    //
+    // map(i[1:23])
+    // &i(=&i[0]), &i[1], 23*sizeof(int), noflags
+    //
+    // map(p)
+    // &p, &p, sizeof(float*), noflags
+    //
+    // map(p[1:24])
+    // p, &p[1], 24*sizeof(float), noflags
+    //
+    // map(s)
+    // &s, &s, sizeof(S2), noflags
+    //
+    // map(s.i)
+    // &s, &(s.i), sizeof(int), noflags
+    //
+    // map(s.s.f)
+    // &s, &(s.i.f), 50*sizeof(int), noflags
+    //
+    // map(s.p)
+    // &s, &(s.p), sizeof(double*), noflags
+    //
+    // map(s.p[:22], s.a s.b)
+    // &s, &(s.p), sizeof(double*), noflags
+    // &(s.p), &(s.p[0]), 22*sizeof(double), ptr_flag + extra_flag
+    //
+    // map(s.ps)
+    // &s, &(s.ps), sizeof(S2*), noflags
+    //
+    // map(s.ps->s.i)
+    // &s, &(s.ps), sizeof(S2*), noflags
+    // &(s.ps), &(s.ps->s.i), sizeof(int), ptr_flag + extra_flag
+    //
+    // map(s.ps->ps)
+    // &s, &(s.ps), sizeof(S2*), noflags
+    // &(s.ps), &(s.ps->ps), sizeof(S2*), ptr_flag + extra_flag
+    //
+    // map(s.ps->ps->ps)
+    // &s, &(s.ps), sizeof(S2*), noflags
+    // &(s.ps), &(s.ps->ps), sizeof(S2*), ptr_flag + extra_flag
+    // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), ptr_flag + extra_flag
+    //
+    // map(s.ps->ps->s.f[:22])
+    // &s, &(s.ps), sizeof(S2*), noflags
+    // &(s.ps), &(s.ps->ps), sizeof(S2*), ptr_flag + extra_flag
+    // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), ptr_flag + extra_flag
+    //
+    // map(ps)
+    // &ps, &ps, sizeof(S2*), noflags
+    //
+    // map(ps->i)
+    // ps, &(ps->i), sizeof(int), noflags
+    //
+    // map(ps->s.f)
+    // ps, &(ps->s.f[0]), 50*sizeof(float), noflags
+    //
+    // map(ps->p)
+    // ps, &(ps->p), sizeof(double*), noflags
+    //
+    // map(ps->p[:22])
+    // ps, &(ps->p), sizeof(double*), noflags
+    // &(ps->p), &(ps->p[0]), 22*sizeof(double), ptr_flag + extra_flag
+    //
+    // map(ps->ps)
+    // ps, &(ps->ps), sizeof(S2*), noflags
+    //
+    // map(ps->ps->s.i)
+    // ps, &(ps->ps), sizeof(S2*), noflags
+    // &(ps->ps), &(ps->ps->s.i), sizeof(int), ptr_flag + extra_flag
+    //
+    // map(ps->ps->ps)
+    // ps, &(ps->ps), sizeof(S2*), noflags
+    // &(ps->ps), &(ps->ps->ps), sizeof(S2*), ptr_flag + extra_flag
+    //
+    // map(ps->ps->ps->ps)
+    // ps, &(ps->ps), sizeof(S2*), noflags
+    // &(ps->ps), &(ps->ps->ps), sizeof(S2*), ptr_flag + extra_flag
+    // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), ptr_flag + extra_flag
+    //
+    // map(ps->ps->ps->s.f[:22])
+    // ps, &(ps->ps), sizeof(S2*), noflags
+    // &(ps->ps), &(ps->ps->ps), sizeof(S2*), ptr_flag + extra_flag
+    // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), ptr_flag +
+    // extra_flag
+
+    // Track if the map information being generated is the first for a capture.
+    bool IsCaptureFirstInfo = IsFirstComponentList;
+
+    // Scan the components from the base to the complete expression.
+    auto CI = Components.rbegin();
+    auto CE = Components.rend();
+    auto I = CI;
+
+    // Track if the map information being generated is the first for a list of
+    // components.
+    bool IsExpressionFirstInfo = true;
+    llvm::Value *BP = nullptr;
+
+    if (auto *ME = dyn_cast<MemberExpr>(I->getAssociatedExpression())) {
+      // The base is the 'this' pointer. The content of the pointer is going
+      // to be the base of the field being mapped.
+      BP = CGF.EmitScalarExpr(ME->getBase());
+    } else {
+      // The base is the reference to the variable.
+      // BP = &Var.
+      BP = CGF.EmitLValue(cast<DeclRefExpr>(I->getAssociatedExpression()))
+               .getPointer();
+
+      // If the variable is a pointer and is being dereferenced (i.e. is not
+      // the last component), the base has to be the pointer itself, not its
+      // reference.
+      if (I->getAssociatedDeclaration()->getType()->isAnyPointerType() &&
+          std::next(I) != CE) {
+        auto PtrAddr = CGF.MakeNaturalAlignAddrLValue(
+            BP, I->getAssociatedDeclaration()->getType());
+        BP = CGF.EmitLoadOfPointerLValue(PtrAddr.getAddress(),
+                                         I->getAssociatedDeclaration()
+                                             ->getType()
+                                             ->getAs<PointerType>())
+                 .getPointer();
+
+        // We do not need to generate individual map information for the
+        // pointer, it can be associated with the combined storage.
+        ++I;
+      }
+    }
+
+    for (; I != CE; ++I) {
+      auto Next = std::next(I);
+
+      // We need to generate the addresses and sizes if this is the last
+      // component, if the component is a pointer or if it is an array section
+      // whose length can't be proved to be one. If this is a pointer, it
+      // becomes the base address for the following components.
+
+      // A final array section, is one whose length can't be proved to be one.
+      bool IsFinalArraySection =
+          isFinalArraySectionExpression(I->getAssociatedExpression());
+
+      // Get information on whether the element is a pointer. Have to do a
+      // special treatment for array sections given that they are built-in
+      // types.
+      const auto *OASE =
+          dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression());
+      bool IsPointer =
+          (OASE &&
+           OMPArraySectionExpr::getBaseOriginalType(OASE)
+               .getCanonicalType()
+               ->isAnyPointerType()) ||
+          I->getAssociatedExpression()->getType()->isAnyPointerType();
+
+      if (Next == CE || IsPointer || IsFinalArraySection) {
+
+        // If this is not the last component, we expect the pointer to be
+        // associated with an array expression or member expression.
+        assert((Next == CE ||
+                isa<MemberExpr>(Next->getAssociatedExpression()) ||
+                isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
+                isa<OMPArraySectionExpr>(Next->getAssociatedExpression())) &&
+               "Unexpected expression");
+
+        // Save the base we are currently using.
+        BasePointers.push_back(BP);
+
+        auto *LB = CGF.EmitLValue(I->getAssociatedExpression()).getPointer();
+        auto *Size = getExprTypeSize(I->getAssociatedExpression());
+
+        Pointers.push_back(LB);
+        Sizes.push_back(Size);
+        // We need to add a pointer flag for each map that comes from the
+        // same expression except for the first one. We also need to signal
+        // this map is the first one that relates with the current capture
+        // (there is a set of entries for each capture).
+        Types.push_back(getMapTypeBits(MapType, MapTypeModifier,
+                                       !IsExpressionFirstInfo,
+                                       IsCaptureFirstInfo));
+
+        // If we have a final array section, we are done with this expression.
+        if (IsFinalArraySection)
+          break;
+
+        // The pointer becomes the base for the next element.
+        if (Next != CE)
+          BP = LB;
+
+        IsExpressionFirstInfo = false;
+        IsCaptureFirstInfo = false;
+        continue;
+      }
+    }
+  }
+
+  /// \brief Return the adjusted map modifiers if the declaration a capture
+  /// refers to appears in a first-private clause. This is expected to be used
+  /// only with directives that start with 'target'.
+  unsigned adjustMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap,
+                                               unsigned CurrentModifiers) {
+    assert(Cap.capturesVariable() && "Expected capture by reference only!");
+
+    // A first private variable captured by reference will use only the
+    // 'private ptr' and 'map to' flag. Return the right flags if the captured
+    // declaration is known as first-private in this handler.
+    if (FirstPrivateDecls.count(Cap.getCapturedVar()))
+      return MappableExprsHandler::OMP_MAP_PRIVATE_PTR |
+             MappableExprsHandler::OMP_MAP_TO;
+
+    // We didn't modify anything.
+    return CurrentModifiers;
+  }
+
+public:
+  MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
+      : Directive(Dir), CGF(CGF) {
+    // Extract firstprivate clause information.
+    for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
+      for (const auto *D : C->varlists())
+        FirstPrivateDecls.insert(
+            cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl());
+  }
+
+  /// \brief Generate all the base pointers, section pointers, sizes and map
+  /// types for the extracted mappable expressions.
+  void generateAllInfo(MapValuesArrayTy &BasePointers,
+                       MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
+                       MapFlagsArrayTy &Types) const {
+    BasePointers.clear();
+    Pointers.clear();
+    Sizes.clear();
+    Types.clear();
+
+    struct MapInfo {
+      OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
+      OpenMPMapClauseKind MapType;
+      OpenMPMapClauseKind MapTypeModifier;
+    };
+
+    // We have to process the component lists that relate with the same
+    // declaration in a single chunk so that we can generate the map flags
+    // correctly. Therefore, we organize all lists in a map.
+    llvm::DenseMap<const ValueDecl *, SmallVector<MapInfo, 8>> Info;
+
+    // Helper function to fill the information map for the different supported
+    // clauses.
+    auto &&InfoGen =
+        [&Info](const ValueDecl *D,
+                OMPClauseMappableExprCommon::MappableExprComponentListRef L,
+                OpenMPMapClauseKind MapType, OpenMPMapClauseKind MapModifier) {
+          const ValueDecl *VD =
+              D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr;
+          Info[VD].push_back({L, MapType, MapModifier});
+        };
+
+    for (auto *C : Directive.getClausesOfKind<OMPMapClause>())
+      for (auto L : C->component_lists())
+        InfoGen(L.first, L.second, C->getMapType(), C->getMapTypeModifier());
+    for (auto *C : Directive.getClausesOfKind<OMPToClause>())
+      for (auto L : C->component_lists())
+        InfoGen(L.first, L.second, OMPC_MAP_to, OMPC_MAP_unknown);
+    for (auto *C : Directive.getClausesOfKind<OMPFromClause>())
+      for (auto L : C->component_lists())
+        InfoGen(L.first, L.second, OMPC_MAP_from, OMPC_MAP_unknown);
+
+    for (auto &M : Info) {
+      // We need to know when we generate information for the first component
+      // associated with a capture, because the mapping flags depend on it.
+      bool IsFirstComponentList = true;
+      for (MapInfo &L : M.second) {
+        assert(!L.Components.empty() &&
+               "Not expecting declaration with no component lists.");
+        generateInfoForComponentList(L.MapType, L.MapTypeModifier, L.Components,
+                                     BasePointers, Pointers, Sizes, Types,
+                                     IsFirstComponentList);
+        IsFirstComponentList = false;
+      }
+    }
+  }
+
+  /// \brief Generate the base pointers, section pointers, sizes and map types
+  /// associated to a given capture.
+  void generateInfoForCapture(const CapturedStmt::Capture *Cap,
+                              MapValuesArrayTy &BasePointers,
+                              MapValuesArrayTy &Pointers,
+                              MapValuesArrayTy &Sizes,
+                              MapFlagsArrayTy &Types) const {
+    assert(!Cap->capturesVariableArrayType() &&
+           "Not expecting to generate map info for a variable array type!");
+
+    BasePointers.clear();
+    Pointers.clear();
+    Sizes.clear();
+    Types.clear();
+
+    const ValueDecl *VD =
+        Cap->capturesThis()
+            ? nullptr
+            : cast<ValueDecl>(Cap->getCapturedVar()->getCanonicalDecl());
+
+    // We need to know when we generating information for the first component
+    // associated with a capture, because the mapping flags depend on it.
+    bool IsFirstComponentList = true;
+    for (auto *C : Directive.getClausesOfKind<OMPMapClause>())
+      for (auto L : C->decl_component_lists(VD)) {
+        assert(L.first == VD &&
+               "We got information for the wrong declaration??");
+        assert(!L.second.empty() &&
+               "Not expecting declaration with no component lists.");
+        generateInfoForComponentList(C->getMapType(), C->getMapTypeModifier(),
+                                     L.second, BasePointers, Pointers, Sizes,
+                                     Types, IsFirstComponentList);
+        IsFirstComponentList = false;
+      }
+
+    return;
+  }
+
+  /// \brief Generate the default map information for a given capture \a CI,
+  /// record field declaration \a RI and captured value \a CV.
+  void generateDefaultMapInfo(
+      const CapturedStmt::Capture &CI, const FieldDecl &RI, llvm::Value *CV,
+      MappableExprsHandler::MapValuesArrayTy &CurBasePointers,
+      MappableExprsHandler::MapValuesArrayTy &CurPointers,
+      MappableExprsHandler::MapValuesArrayTy &CurSizes,
+      MappableExprsHandler::MapFlagsArrayTy &CurMapTypes) {
+
+    // Do the default mapping.
+    if (CI.capturesThis()) {
+      CurBasePointers.push_back(CV);
+      CurPointers.push_back(CV);
+      const PointerType *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
+      CurSizes.push_back(CGF.getTypeSize(PtrTy->getPointeeType()));
+      // Default map type.
+      CurMapTypes.push_back(MappableExprsHandler::OMP_MAP_TO |
+                            MappableExprsHandler::OMP_MAP_FROM);
+    } else if (CI.capturesVariableByCopy()) {
+      CurBasePointers.push_back(CV);
+      CurPointers.push_back(CV);
+      if (!RI.getType()->isAnyPointerType()) {
+        // We have to signal to the runtime captures passed by value that are
+        // not pointers.
+        CurMapTypes.push_back(MappableExprsHandler::OMP_MAP_PRIVATE_VAL);
+        CurSizes.push_back(CGF.getTypeSize(RI.getType()));
+      } else {
+        // Pointers are implicitly mapped with a zero size and no flags
+        // (other than first map that is added for all implicit maps).
+        CurMapTypes.push_back(0u);
+        CurSizes.push_back(llvm::Constant::getNullValue(CGF.SizeTy));
+      }
+    } else {
+      assert(CI.capturesVariable() && "Expected captured reference.");
+      CurBasePointers.push_back(CV);
+      CurPointers.push_back(CV);
+
+      const ReferenceType *PtrTy =
+          cast<ReferenceType>(RI.getType().getTypePtr());
+      QualType ElementType = PtrTy->getPointeeType();
+      CurSizes.push_back(CGF.getTypeSize(ElementType));
+      // The default map type for a scalar/complex type is 'to' because by
+      // default the value doesn't have to be retrieved. For an aggregate
+      // type, the default is 'tofrom'.
+      CurMapTypes.push_back(ElementType->isAggregateType()
+                                ? (MappableExprsHandler::OMP_MAP_TO |
+                                   MappableExprsHandler::OMP_MAP_FROM)
+                                : MappableExprsHandler::OMP_MAP_TO);
+
+      // If we have a capture by reference we may need to add the private
+      // pointer flag if the base declaration shows in some first-private
+      // clause.
+      CurMapTypes.back() =
+          adjustMapModifiersForPrivateClauses(CI, CurMapTypes.back());
+    }
+    // Every default map produces a single argument, so, it is always the
+    // first one.
+    CurMapTypes.back() |= MappableExprsHandler::OMP_MAP_FIRST_REF;
+  }
+};
+
+enum OpenMPOffloadingReservedDeviceIDs {
+  /// \brief Device ID if the device was not defined, runtime should get it
+  /// from environment variables in the spec.
+  OMP_DEVICEID_UNDEF = -1,
+};
+} // anonymous namespace
+
+/// \brief Emit the arrays used to pass the captures and map information to the
+/// offloading runtime library. If there is no map or capture information,
+/// return nullptr by reference.
+static void
+emitOffloadingArrays(CodeGenFunction &CGF, llvm::Value *&BasePointersArray,
+                     llvm::Value *&PointersArray, llvm::Value *&SizesArray,
+                     llvm::Value *&MapTypesArray,
+                     MappableExprsHandler::MapValuesArrayTy &BasePointers,
+                     MappableExprsHandler::MapValuesArrayTy &Pointers,
+                     MappableExprsHandler::MapValuesArrayTy &Sizes,
+                     MappableExprsHandler::MapFlagsArrayTy &MapTypes) {
+  auto &CGM = CGF.CGM;
+  auto &Ctx = CGF.getContext();
+
+  BasePointersArray = PointersArray = SizesArray = MapTypesArray = nullptr;
+
+  if (unsigned PointerNumVal = BasePointers.size()) {
+    // Detect if we have any capture size requiring runtime evaluation of the
+    // size so that a constant array could be eventually used.
+    bool hasRuntimeEvaluationCaptureSize = false;
+    for (auto *S : Sizes)
+      if (!isa<llvm::Constant>(S)) {
+        hasRuntimeEvaluationCaptureSize = true;
+        break;
+      }
+
+    llvm::APInt PointerNumAP(32, PointerNumVal, /*isSigned=*/true);
+    QualType PointerArrayType =
+        Ctx.getConstantArrayType(Ctx.VoidPtrTy, PointerNumAP, ArrayType::Normal,
+                                 /*IndexTypeQuals=*/0);
+
+    BasePointersArray =
+        CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer();
+    PointersArray =
+        CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer();
+
+    // If we don't have any VLA types or other types that require runtime
+    // evaluation, we can use a constant array for the map sizes, otherwise we
+    // need to fill up the arrays as we do for the pointers.
+    if (hasRuntimeEvaluationCaptureSize) {
+      QualType SizeArrayType = Ctx.getConstantArrayType(
+          Ctx.getSizeType(), PointerNumAP, ArrayType::Normal,
+          /*IndexTypeQuals=*/0);
+      SizesArray =
+          CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer();
+    } else {
+      // We expect all the sizes to be constant, so we collect them to create
+      // a constant array.
+      SmallVector<llvm::Constant *, 16> ConstSizes;
+      for (auto S : Sizes)
+        ConstSizes.push_back(cast<llvm::Constant>(S));
+
+      auto *SizesArrayInit = llvm::ConstantArray::get(
+          llvm::ArrayType::get(CGM.SizeTy, ConstSizes.size()), ConstSizes);
+      auto *SizesArrayGbl = new llvm::GlobalVariable(
+          CGM.getModule(), SizesArrayInit->getType(),
+          /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
+          SizesArrayInit, ".offload_sizes");
+      SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
+      SizesArray = SizesArrayGbl;
+    }
+
+    // The map types are always constant so we don't need to generate code to
+    // fill arrays. Instead, we create an array constant.
+    llvm::Constant *MapTypesArrayInit =
+        llvm::ConstantDataArray::get(CGF.Builder.getContext(), MapTypes);
+    auto *MapTypesArrayGbl = new llvm::GlobalVariable(
+        CGM.getModule(), MapTypesArrayInit->getType(),
+        /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
+        MapTypesArrayInit, ".offload_maptypes");
+    MapTypesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
+    MapTypesArray = MapTypesArrayGbl;
+
+    for (unsigned i = 0; i < PointerNumVal; ++i) {
+      llvm::Value *BPVal = BasePointers[i];
+      if (BPVal->getType()->isPointerTy())
+        BPVal = CGF.Builder.CreateBitCast(BPVal, CGM.VoidPtrTy);
+      else {
+        assert(BPVal->getType()->isIntegerTy() &&
+               "If not a pointer, the value type must be an integer.");
+        BPVal = CGF.Builder.CreateIntToPtr(BPVal, CGM.VoidPtrTy);
+      }
+      llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32(
+          llvm::ArrayType::get(CGM.VoidPtrTy, PointerNumVal), BasePointersArray,
+          0, i);
+      Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
+      CGF.Builder.CreateStore(BPVal, BPAddr);
+
+      llvm::Value *PVal = Pointers[i];
+      if (PVal->getType()->isPointerTy())
+        PVal = CGF.Builder.CreateBitCast(PVal, CGM.VoidPtrTy);
+      else {
+        assert(PVal->getType()->isIntegerTy() &&
+               "If not a pointer, the value type must be an integer.");
+        PVal = CGF.Builder.CreateIntToPtr(PVal, CGM.VoidPtrTy);
+      }
+      llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
+          llvm::ArrayType::get(CGM.VoidPtrTy, PointerNumVal), PointersArray, 0,
+          i);
+      Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
+      CGF.Builder.CreateStore(PVal, PAddr);
+
+      if (hasRuntimeEvaluationCaptureSize) {
+        llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32(
+            llvm::ArrayType::get(CGM.SizeTy, PointerNumVal), SizesArray,
+            /*Idx0=*/0,
+            /*Idx1=*/i);
+        Address SAddr(S, Ctx.getTypeAlignInChars(Ctx.getSizeType()));
+        CGF.Builder.CreateStore(
+            CGF.Builder.CreateIntCast(Sizes[i], CGM.SizeTy, /*isSigned=*/true),
+            SAddr);
+      }
+    }
+  }
+}
+/// \brief Emit the arguments to be passed to the runtime library based on the
+/// arrays of pointers, sizes and map types.
+static void emitOffloadingArraysArgument(
+    CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg,
+    llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg,
+    llvm::Value *&MapTypesArrayArg, llvm::Value *BasePointersArray,
+    llvm::Value *PointersArray, llvm::Value *SizesArray,
+    llvm::Value *MapTypesArray, unsigned NumElems) {
+  auto &CGM = CGF.CGM;
+  if (NumElems) {
+    BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
+        llvm::ArrayType::get(CGM.VoidPtrTy, NumElems), BasePointersArray,
+        /*Idx0=*/0, /*Idx1=*/0);
+    PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
+        llvm::ArrayType::get(CGM.VoidPtrTy, NumElems), PointersArray,
+        /*Idx0=*/0,
+        /*Idx1=*/0);
+    SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
+        llvm::ArrayType::get(CGM.SizeTy, NumElems), SizesArray,
+        /*Idx0=*/0, /*Idx1=*/0);
+    MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
+        llvm::ArrayType::get(CGM.Int32Ty, NumElems), MapTypesArray,
+        /*Idx0=*/0,
+        /*Idx1=*/0);
+  } else {
+    BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
+    PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
+    SizesArrayArg = llvm::ConstantPointerNull::get(CGM.SizeTy->getPointerTo());
+    MapTypesArrayArg =
+        llvm::ConstantPointerNull::get(CGM.Int32Ty->getPointerTo());
+  }
+}
+
+void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF,
+                                     const OMPExecutableDirective &D,
+                                     llvm::Value *OutlinedFn,
+                                     llvm::Value *OutlinedFnID,
+                                     const Expr *IfCond, const Expr *Device,
+                                     ArrayRef<llvm::Value *> CapturedVars) {
+  if (!CGF.HaveInsertPoint())
+    return;
 
   assert(OutlinedFn && "Invalid outlined function!");
 
   auto &Ctx = CGF.getContext();
 
-  // Fill up the arrays with the all the captured variables.
-  SmallVector<llvm::Value *, 16> BasePointers;
-  SmallVector<llvm::Value *, 16> Pointers;
-  SmallVector<llvm::Value *, 16> Sizes;
-  SmallVector<unsigned, 16> MapTypes;
+  // Fill up the arrays with all the captured variables.
+  MappableExprsHandler::MapValuesArrayTy KernelArgs;
+  MappableExprsHandler::MapValuesArrayTy BasePointers;
+  MappableExprsHandler::MapValuesArrayTy Pointers;
+  MappableExprsHandler::MapValuesArrayTy Sizes;
+  MappableExprsHandler::MapFlagsArrayTy MapTypes;
 
-  bool hasVLACaptures = false;
+  MappableExprsHandler::MapValuesArrayTy CurBasePointers;
+  MappableExprsHandler::MapValuesArrayTy CurPointers;
+  MappableExprsHandler::MapValuesArrayTy CurSizes;
+  MappableExprsHandler::MapFlagsArrayTy CurMapTypes;
+
+  // Get mappable expression information.
+  MappableExprsHandler MEHandler(D, CGF);
 
   const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt());
   auto RI = CS.getCapturedRecordDecl()->field_begin();
-  // auto II = CS.capture_init_begin();
   auto CV = CapturedVars.begin();
   for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
                                             CE = CS.capture_end();
        CI != CE; ++CI, ++RI, ++CV) {
     StringRef Name;
     QualType Ty;
-    llvm::Value *BasePointer;
-    llvm::Value *Pointer;
-    llvm::Value *Size;
-    unsigned MapType;
 
-    // VLA sizes are passed to the outlined region by copy.
+    CurBasePointers.clear();
+    CurPointers.clear();
+    CurSizes.clear();
+    CurMapTypes.clear();
+
+    // VLA sizes are passed to the outlined region by copy and do not have map
+    // information associated.
     if (CI->capturesVariableArrayType()) {
-      BasePointer = Pointer = *CV;
-      Size = getTypeSize(CGF, RI->getType());
+      CurBasePointers.push_back(*CV);
+      CurPointers.push_back(*CV);
+      CurSizes.push_back(CGF.getTypeSize(RI->getType()));
       // Copy to the device as an argument. No need to retrieve it.
-      MapType = OMP_MAP_BYCOPY;
-      hasVLACaptures = true;
-    } else if (CI->capturesThis()) {
-      BasePointer = Pointer = *CV;
-      const PointerType *PtrTy = cast<PointerType>(RI->getType().getTypePtr());
-      Size = getTypeSize(CGF, PtrTy->getPointeeType());
-      // Default map type.
-      MapType = OMP_MAP_TO | OMP_MAP_FROM;
-    } else if (CI->capturesVariableByCopy()) {
-      MapType = OMP_MAP_BYCOPY;
-      if (!RI->getType()->isAnyPointerType()) {
-        // If the field is not a pointer, we need to save the actual value and
-        // load it as a void pointer.
-        auto DstAddr = CGF.CreateMemTemp(
-            Ctx.getUIntPtrType(),
-            Twine(CI->getCapturedVar()->getName()) + ".casted");
-        LValue DstLV = CGF.MakeAddrLValue(DstAddr, Ctx.getUIntPtrType());
-
-        auto *SrcAddrVal = CGF.EmitScalarConversion(
-            DstAddr.getPointer(), Ctx.getPointerType(Ctx.getUIntPtrType()),
-            Ctx.getPointerType(RI->getType()), SourceLocation());
-        LValue SrcLV =
-            CGF.MakeNaturalAlignAddrLValue(SrcAddrVal, RI->getType());
-
-        // Store the value using the source type pointer.
-        CGF.EmitStoreThroughLValue(RValue::get(*CV), SrcLV);
-
-        // Load the value using the destination type pointer.
-        BasePointer = Pointer =
-            CGF.EmitLoadOfLValue(DstLV, SourceLocation()).getScalarVal();
-      } else {
-        MapType |= OMP_MAP_PTR;
-        BasePointer = Pointer = *CV;
-      }
-      Size = getTypeSize(CGF, RI->getType());
+      CurMapTypes.push_back(MappableExprsHandler::OMP_MAP_PRIVATE_VAL |
+                            MappableExprsHandler::OMP_MAP_FIRST_REF);
     } else {
-      assert(CI->capturesVariable() && "Expected captured reference.");
-      BasePointer = Pointer = *CV;
-
-      const ReferenceType *PtrTy =
-          cast<ReferenceType>(RI->getType().getTypePtr());
-      QualType ElementType = PtrTy->getPointeeType();
-      Size = getTypeSize(CGF, ElementType);
-      // The default map type for a scalar/complex type is 'to' because by
-      // default the value doesn't have to be retrieved. For an aggregate type,
-      // the default is 'tofrom'.
-      MapType = ElementType->isAggregateType() ? (OMP_MAP_TO | OMP_MAP_FROM)
-                                               : OMP_MAP_TO;
-      if (ElementType->isAnyPointerType())
-        MapType |= OMP_MAP_PTR;
+      // If we have any information in the map clause, we use it, otherwise we
+      // just do a default mapping.
+      MEHandler.generateInfoForCapture(CI, CurBasePointers, CurPointers,
+                                       CurSizes, CurMapTypes);
+      if (CurBasePointers.empty())
+        MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurBasePointers,
+                                         CurPointers, CurSizes, CurMapTypes);
     }
-
-    BasePointers.push_back(BasePointer);
-    Pointers.push_back(Pointer);
-    Sizes.push_back(Size);
-    MapTypes.push_back(MapType);
+    // We expect to have at least an element of information for this capture.
+    assert(!CurBasePointers.empty() && "Non-existing map pointer for capture!");
+    assert(CurBasePointers.size() == CurPointers.size() &&
+           CurBasePointers.size() == CurSizes.size() &&
+           CurBasePointers.size() == CurMapTypes.size() &&
+           "Inconsistent map information sizes!");
+
+    // The kernel args are always the first elements of the base pointers
+    // associated with a capture.
+    KernelArgs.push_back(CurBasePointers.front());
+    // We need to append the results of this capture to what we already have.
+    BasePointers.append(CurBasePointers.begin(), CurBasePointers.end());
+    Pointers.append(CurPointers.begin(), CurPointers.end());
+    Sizes.append(CurSizes.begin(), CurSizes.end());
+    MapTypes.append(CurMapTypes.begin(), CurMapTypes.end());
   }
 
   // Keep track on whether the host function has to be executed.
@@ -3943,128 +5779,22 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF,
                         OffloadError);
 
   // Fill up the pointer arrays and transfer execution to the device.
-  auto &&ThenGen = [this, &Ctx, &BasePointers, &Pointers, &Sizes, &MapTypes,
-                    hasVLACaptures, Device, OutlinedFnID, OffloadError,
-                    OffloadErrorQType](CodeGenFunction &CGF) {
-    unsigned PointerNumVal = BasePointers.size();
-    llvm::Value *PointerNum = CGF.Builder.getInt32(PointerNumVal);
+  auto &&ThenGen = [&Ctx, &BasePointers, &Pointers, &Sizes, &MapTypes, Device,
+                    OutlinedFnID, OffloadError, OffloadErrorQType,
+                    &D](CodeGenFunction &CGF, PrePostActionTy &) {
+    auto &RT = CGF.CGM.getOpenMPRuntime();
+    // Emit the offloading arrays.
     llvm::Value *BasePointersArray;
     llvm::Value *PointersArray;
     llvm::Value *SizesArray;
     llvm::Value *MapTypesArray;
-
-    if (PointerNumVal) {
-      llvm::APInt PointerNumAP(32, PointerNumVal, /*isSigned=*/true);
-      QualType PointerArrayType = Ctx.getConstantArrayType(
-          Ctx.VoidPtrTy, PointerNumAP, ArrayType::Normal,
-          /*IndexTypeQuals=*/0);
-
-      BasePointersArray =
-          CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer();
-      PointersArray =
-          CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer();
-
-      // If we don't have any VLA types, we can use a constant array for the map
-      // sizes, otherwise we need to fill up the arrays as we do for the
-      // pointers.
-      if (hasVLACaptures) {
-        QualType SizeArrayType = Ctx.getConstantArrayType(
-            Ctx.getSizeType(), PointerNumAP, ArrayType::Normal,
-            /*IndexTypeQuals=*/0);
-        SizesArray =
-            CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer();
-      } else {
-        // We expect all the sizes to be constant, so we collect them to create
-        // a constant array.
-        SmallVector<llvm::Constant *, 16> ConstSizes;
-        for (auto S : Sizes)
-          ConstSizes.push_back(cast<llvm::Constant>(S));
-
-        auto *SizesArrayInit = llvm::ConstantArray::get(
-            llvm::ArrayType::get(CGM.SizeTy, ConstSizes.size()), ConstSizes);
-        auto *SizesArrayGbl = new llvm::GlobalVariable(
-            CGM.getModule(), SizesArrayInit->getType(),
-            /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
-            SizesArrayInit, ".offload_sizes");
-        SizesArrayGbl->setUnnamedAddr(true);
-        SizesArray = SizesArrayGbl;
-      }
-
-      // The map types are always constant so we don't need to generate code to
-      // fill arrays. Instead, we create an array constant.
-      llvm::Constant *MapTypesArrayInit =
-          llvm::ConstantDataArray::get(CGF.Builder.getContext(), MapTypes);
-      auto *MapTypesArrayGbl = new llvm::GlobalVariable(
-          CGM.getModule(), MapTypesArrayInit->getType(),
-          /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
-          MapTypesArrayInit, ".offload_maptypes");
-      MapTypesArrayGbl->setUnnamedAddr(true);
-      MapTypesArray = MapTypesArrayGbl;
-
-      for (unsigned i = 0; i < PointerNumVal; ++i) {
-
-        llvm::Value *BPVal = BasePointers[i];
-        if (BPVal->getType()->isPointerTy())
-          BPVal = CGF.Builder.CreateBitCast(BPVal, CGM.VoidPtrTy);
-        else {
-          assert(BPVal->getType()->isIntegerTy() &&
-                 "If not a pointer, the value type must be an integer.");
-          BPVal = CGF.Builder.CreateIntToPtr(BPVal, CGM.VoidPtrTy);
-        }
-        llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32(
-            llvm::ArrayType::get(CGM.VoidPtrTy, PointerNumVal),
-            BasePointersArray, 0, i);
-        Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
-        CGF.Builder.CreateStore(BPVal, BPAddr);
-
-        llvm::Value *PVal = Pointers[i];
-        if (PVal->getType()->isPointerTy())
-          PVal = CGF.Builder.CreateBitCast(PVal, CGM.VoidPtrTy);
-        else {
-          assert(PVal->getType()->isIntegerTy() &&
-                 "If not a pointer, the value type must be an integer.");
-          PVal = CGF.Builder.CreateIntToPtr(PVal, CGM.VoidPtrTy);
-        }
-        llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
-            llvm::ArrayType::get(CGM.VoidPtrTy, PointerNumVal), PointersArray,
-            0, i);
-        Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
-        CGF.Builder.CreateStore(PVal, PAddr);
-
-        if (hasVLACaptures) {
-          llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32(
-              llvm::ArrayType::get(CGM.SizeTy, PointerNumVal), SizesArray,
-              /*Idx0=*/0,
-              /*Idx1=*/i);
-          Address SAddr(S, Ctx.getTypeAlignInChars(Ctx.getSizeType()));
-          CGF.Builder.CreateStore(CGF.Builder.CreateIntCast(
-                                      Sizes[i], CGM.SizeTy, /*isSigned=*/true),
-                                  SAddr);
-        }
-      }
-
-      BasePointersArray = CGF.Builder.CreateConstInBoundsGEP2_32(
-          llvm::ArrayType::get(CGM.VoidPtrTy, PointerNumVal), BasePointersArray,
-          /*Idx0=*/0, /*Idx1=*/0);
-      PointersArray = CGF.Builder.CreateConstInBoundsGEP2_32(
-          llvm::ArrayType::get(CGM.VoidPtrTy, PointerNumVal), PointersArray,
-          /*Idx0=*/0,
-          /*Idx1=*/0);
-      SizesArray = CGF.Builder.CreateConstInBoundsGEP2_32(
-          llvm::ArrayType::get(CGM.SizeTy, PointerNumVal), SizesArray,
-          /*Idx0=*/0, /*Idx1=*/0);
-      MapTypesArray = CGF.Builder.CreateConstInBoundsGEP2_32(
-          llvm::ArrayType::get(CGM.Int32Ty, PointerNumVal), MapTypesArray,
-          /*Idx0=*/0,
-          /*Idx1=*/0);
-
-    } else {
-      BasePointersArray = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
-      PointersArray = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
-      SizesArray = llvm::ConstantPointerNull::get(CGM.SizeTy->getPointerTo());
-      MapTypesArray =
-          llvm::ConstantPointerNull::get(CGM.Int32Ty->getPointerTo());
-    }
+    emitOffloadingArrays(CGF, BasePointersArray, PointersArray, SizesArray,
+                         MapTypesArray, BasePointers, Pointers, Sizes,
+                         MapTypes);
+    emitOffloadingArraysArgument(CGF, BasePointersArray, PointersArray,
+                                 SizesArray, MapTypesArray, BasePointersArray,
+                                 PointersArray, SizesArray, MapTypesArray,
+                                 BasePointers.size());
 
     // On top of the arrays that were filled up, the target offloading call
     // takes as arguments the device id as well as the host pointer. The host
@@ -4082,23 +5812,48 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF,
     llvm::Value *DeviceID;
     if (Device)
       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
-                                           CGM.Int32Ty, /*isSigned=*/true);
+                                           CGF.Int32Ty, /*isSigned=*/true);
     else
       DeviceID = CGF.Builder.getInt32(OMP_DEVICEID_UNDEF);
 
-    llvm::Value *OffloadingArgs[] = {
-        DeviceID,      OutlinedFnID, PointerNum,   BasePointersArray,
-        PointersArray, SizesArray,   MapTypesArray};
-    auto Return = CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target),
-                                      OffloadingArgs);
+    // Emit the number of elements in the offloading arrays.
+    llvm::Value *PointerNum = CGF.Builder.getInt32(BasePointers.size());
+
+    // Return value of the runtime offloading call.
+    llvm::Value *Return;
+
+    auto *NumTeams = emitNumTeamsClauseForTargetDirective(RT, CGF, D);
+    auto *ThreadLimit = emitThreadLimitClauseForTargetDirective(RT, CGF, D);
+
+    // If we have NumTeams defined this means that we have an enclosed teams
+    // region. Therefore we also expect to have ThreadLimit defined. These two
+    // values should be defined in the presence of a teams directive, regardless
+    // of having any clauses associated. If the user is using teams but no
+    // clauses, these two values will be the default that should be passed to
+    // the runtime library - a 32-bit integer with the value zero.
+    if (NumTeams) {
+      assert(ThreadLimit && "Thread limit expression should be available along "
+                            "with number of teams.");
+      llvm::Value *OffloadingArgs[] = {
+          DeviceID,          OutlinedFnID,  PointerNum,
+          BasePointersArray, PointersArray, SizesArray,
+          MapTypesArray,     NumTeams,      ThreadLimit};
+      Return = CGF.EmitRuntimeCall(
+          RT.createRuntimeFunction(OMPRTL__tgt_target_teams), OffloadingArgs);
+    } else {
+      llvm::Value *OffloadingArgs[] = {
+          DeviceID,      OutlinedFnID, PointerNum,   BasePointersArray,
+          PointersArray, SizesArray,   MapTypesArray};
+      Return = CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__tgt_target),
+                                   OffloadingArgs);
+    }
 
     CGF.EmitStoreOfScalar(Return, OffloadError);
   };
 
   // Notify that the host version must be executed.
-  auto &&ElseGen = [this, OffloadError,
-                    OffloadErrorQType](CodeGenFunction &CGF) {
-    CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/-1u),
+  auto &&ElseGen = [OffloadError](CodeGenFunction &CGF, PrePostActionTy &) {
+    CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.Int32Ty, /*V=*/-1u),
                           OffloadError);
   };
 
@@ -4107,15 +5862,15 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF,
   // regardless of the conditional in the if clause if, e.g., the user do not
   // specify target triples.
   if (OutlinedFnID) {
-    if (IfCond) {
+    if (IfCond)
       emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen);
-    } else {
-      CodeGenFunction::RunCleanupsScope Scope(CGF);
-      ThenGen(CGF);
+    else {
+      RegionCodeGenTy ThenRCG(ThenGen);
+      ThenRCG(CGF);
     }
   } else {
-    CodeGenFunction::RunCleanupsScope Scope(CGF);
-    ElseGen(CGF);
+    RegionCodeGenTy ElseRCG(ElseGen);
+    ElseRCG(CGF);
   }
 
   // Check the error code and execute the host version if required.
@@ -4126,11 +5881,10 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF,
   CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock);
 
   CGF.EmitBlock(OffloadFailedBlock);
-  CGF.Builder.CreateCall(OutlinedFn, BasePointers);
+  CGF.Builder.CreateCall(OutlinedFn, KernelArgs);
   CGF.EmitBranch(OffloadContBlock);
 
   CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true);
-  return;
 }
 
 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
@@ -4148,26 +5902,27 @@ void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
     unsigned DeviceID;
     unsigned FileID;
     unsigned Line;
-    unsigned Column;
     getTargetEntryUniqueInfo(CGM.getContext(), E->getLocStart(), DeviceID,
-                             FileID, Line, Column);
+                             FileID, Line);
 
     // Is this a target region that should not be emitted as an entry point? If
     // so just signal we are done with this target region.
-    if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(
-            DeviceID, FileID, ParentName, Line, Column))
+    if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID,
+                                                            ParentName, Line))
       return;
 
     llvm::Function *Fn;
     llvm::Constant *Addr;
-    emitTargetOutlinedFunction(*E, ParentName, Fn, Addr,
-                               /*isOffloadEntry=*/true);
+    std::tie(Fn, Addr) =
+        CodeGenFunction::EmitOMPTargetDirectiveOutlinedFunction(
+            CGM, cast<OMPTargetDirective>(*E), ParentName,
+            /*isOffloadEntry=*/true);
     assert(Fn && Addr && "Target region emission failed.");
     return;
   }
 
   if (const OMPExecutableDirective *E = dyn_cast<OMPExecutableDirective>(S)) {
-    if (!E->getAssociatedStmt())
+    if (!E->hasAssociatedStmt())
       return;
 
     scanForTargetRegionsFunctions(
@@ -4183,8 +5938,6 @@ void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
   // Keep looking for target regions recursively.
   for (auto *II : S->children())
     scanForTargetRegionsFunctions(II, ParentName);
-
-  return;
 }
 
 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) {
@@ -4249,3 +6002,594 @@ llvm::Function *CGOpenMPRuntime::emitRegistrationFunction() {
   // compilation unit.
   return createOffloadingBinaryDescriptorRegistration();
 }
+
+void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF,
+                                    const OMPExecutableDirective &D,
+                                    SourceLocation Loc,
+                                    llvm::Value *OutlinedFn,
+                                    ArrayRef<llvm::Value *> CapturedVars) {
+  if (!CGF.HaveInsertPoint())
+    return;
+
+  auto *RTLoc = emitUpdateLocation(CGF, Loc);
+  CodeGenFunction::RunCleanupsScope Scope(CGF);
+
+  // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
+  llvm::Value *Args[] = {
+      RTLoc,
+      CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
+      CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())};
+  llvm::SmallVector<llvm::Value *, 16> RealArgs;
+  RealArgs.append(std::begin(Args), std::end(Args));
+  RealArgs.append(CapturedVars.begin(), CapturedVars.end());
+
+  auto RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_teams);
+  CGF.EmitRuntimeCall(RTLFn, RealArgs);
+}
+
+void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
+                                         const Expr *NumTeams,
+                                         const Expr *ThreadLimit,
+                                         SourceLocation Loc) {
+  if (!CGF.HaveInsertPoint())
+    return;
+
+  auto *RTLoc = emitUpdateLocation(CGF, Loc);
+
+  llvm::Value *NumTeamsVal =
+      (NumTeams)
+          ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams),
+                                      CGF.CGM.Int32Ty, /* isSigned = */ true)
+          : CGF.Builder.getInt32(0);
+
+  llvm::Value *ThreadLimitVal =
+      (ThreadLimit)
+          ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
+                                      CGF.CGM.Int32Ty, /* isSigned = */ true)
+          : CGF.Builder.getInt32(0);
+
+  // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
+  llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
+                                     ThreadLimitVal};
+  CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_teams),
+                      PushNumTeamsArgs);
+}
+
+void CGOpenMPRuntime::emitTargetDataCalls(CodeGenFunction &CGF,
+                                          const OMPExecutableDirective &D,
+                                          const Expr *IfCond,
+                                          const Expr *Device,
+                                          const RegionCodeGenTy &CodeGen) {
+
+  if (!CGF.HaveInsertPoint())
+    return;
+
+  llvm::Value *BasePointersArray = nullptr;
+  llvm::Value *PointersArray = nullptr;
+  llvm::Value *SizesArray = nullptr;
+  llvm::Value *MapTypesArray = nullptr;
+  unsigned NumOfPtrs = 0;
+
+  // Generate the code for the opening of the data environment. Capture all the
+  // arguments of the runtime call by reference because they are used in the
+  // closing of the region.
+  auto &&BeginThenGen = [&D, &CGF, &BasePointersArray, &PointersArray,
+                         &SizesArray, &MapTypesArray, Device,
+                         &NumOfPtrs](CodeGenFunction &CGF, PrePostActionTy &) {
+    // Fill up the arrays with all the mapped variables.
+    MappableExprsHandler::MapValuesArrayTy BasePointers;
+    MappableExprsHandler::MapValuesArrayTy Pointers;
+    MappableExprsHandler::MapValuesArrayTy Sizes;
+    MappableExprsHandler::MapFlagsArrayTy MapTypes;
+
+    // Get map clause information.
+    MappableExprsHandler MCHandler(D, CGF);
+    MCHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes);
+    NumOfPtrs = BasePointers.size();
+
+    // Fill up the arrays and create the arguments.
+    emitOffloadingArrays(CGF, BasePointersArray, PointersArray, SizesArray,
+                         MapTypesArray, BasePointers, Pointers, Sizes,
+                         MapTypes);
+
+    llvm::Value *BasePointersArrayArg = nullptr;
+    llvm::Value *PointersArrayArg = nullptr;
+    llvm::Value *SizesArrayArg = nullptr;
+    llvm::Value *MapTypesArrayArg = nullptr;
+    emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
+                                 SizesArrayArg, MapTypesArrayArg,
+                                 BasePointersArray, PointersArray, SizesArray,
+                                 MapTypesArray, NumOfPtrs);
+
+    // Emit device ID if any.
+    llvm::Value *DeviceID = nullptr;
+    if (Device)
+      DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
+                                           CGF.Int32Ty, /*isSigned=*/true);
+    else
+      DeviceID = CGF.Builder.getInt32(OMP_DEVICEID_UNDEF);
+
+    // Emit the number of elements in the offloading arrays.
+    auto *PointerNum = CGF.Builder.getInt32(NumOfPtrs);
+
+    llvm::Value *OffloadingArgs[] = {
+        DeviceID,         PointerNum,    BasePointersArrayArg,
+        PointersArrayArg, SizesArrayArg, MapTypesArrayArg};
+    auto &RT = CGF.CGM.getOpenMPRuntime();
+    CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__tgt_target_data_begin),
+                        OffloadingArgs);
+  };
+
+  // Generate code for the closing of the data region.
+  auto &&EndThenGen = [&CGF, &BasePointersArray, &PointersArray, &SizesArray,
+                       &MapTypesArray, Device,
+                       &NumOfPtrs](CodeGenFunction &CGF, PrePostActionTy &) {
+    assert(BasePointersArray && PointersArray && SizesArray && MapTypesArray &&
+           NumOfPtrs && "Invalid data environment closing arguments.");
+
+    llvm::Value *BasePointersArrayArg = nullptr;
+    llvm::Value *PointersArrayArg = nullptr;
+    llvm::Value *SizesArrayArg = nullptr;
+    llvm::Value *MapTypesArrayArg = nullptr;
+    emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
+                                 SizesArrayArg, MapTypesArrayArg,
+                                 BasePointersArray, PointersArray, SizesArray,
+                                 MapTypesArray, NumOfPtrs);
+
+    // Emit device ID if any.
+    llvm::Value *DeviceID = nullptr;
+    if (Device)
+      DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
+                                           CGF.Int32Ty, /*isSigned=*/true);
+    else
+      DeviceID = CGF.Builder.getInt32(OMP_DEVICEID_UNDEF);
+
+    // Emit the number of elements in the offloading arrays.
+    auto *PointerNum = CGF.Builder.getInt32(NumOfPtrs);
+
+    llvm::Value *OffloadingArgs[] = {
+        DeviceID,         PointerNum,    BasePointersArrayArg,
+        PointersArrayArg, SizesArrayArg, MapTypesArrayArg};
+    auto &RT = CGF.CGM.getOpenMPRuntime();
+    CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__tgt_target_data_end),
+                        OffloadingArgs);
+  };
+
+  // In the event we get an if clause, we don't have to take any action on the
+  // else side.
+  auto &&ElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {};
+
+  if (IfCond) {
+    emitOMPIfClause(CGF, IfCond, BeginThenGen, ElseGen);
+  } else {
+    RegionCodeGenTy BeginThenRCG(BeginThenGen);
+    BeginThenRCG(CGF);
+  }
+
+  CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_target_data, CodeGen);
+
+  if (IfCond) {
+    emitOMPIfClause(CGF, IfCond, EndThenGen, ElseGen);
+  } else {
+    RegionCodeGenTy EndThenRCG(EndThenGen);
+    EndThenRCG(CGF);
+  }
+}
+
+void CGOpenMPRuntime::emitTargetDataStandAloneCall(
+    CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
+    const Expr *Device) {
+  if (!CGF.HaveInsertPoint())
+    return;
+
+  assert((isa<OMPTargetEnterDataDirective>(D) ||
+          isa<OMPTargetExitDataDirective>(D) ||
+          isa<OMPTargetUpdateDirective>(D)) &&
+         "Expecting either target enter, exit data, or update directives.");
+
+  // Generate the code for the opening of the data environment.
+  auto &&ThenGen = [&D, &CGF, Device](CodeGenFunction &CGF, PrePostActionTy &) {
+    // Fill up the arrays with all the mapped variables.
+    MappableExprsHandler::MapValuesArrayTy BasePointers;
+    MappableExprsHandler::MapValuesArrayTy Pointers;
+    MappableExprsHandler::MapValuesArrayTy Sizes;
+    MappableExprsHandler::MapFlagsArrayTy MapTypes;
+
+    // Get map clause information.
+    MappableExprsHandler MEHandler(D, CGF);
+    MEHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes);
+
+    llvm::Value *BasePointersArrayArg = nullptr;
+    llvm::Value *PointersArrayArg = nullptr;
+    llvm::Value *SizesArrayArg = nullptr;
+    llvm::Value *MapTypesArrayArg = nullptr;
+
+    // Fill up the arrays and create the arguments.
+    emitOffloadingArrays(CGF, BasePointersArrayArg, PointersArrayArg,
+                         SizesArrayArg, MapTypesArrayArg, BasePointers,
+                         Pointers, Sizes, MapTypes);
+    emitOffloadingArraysArgument(
+        CGF, BasePointersArrayArg, PointersArrayArg, SizesArrayArg,
+        MapTypesArrayArg, BasePointersArrayArg, PointersArrayArg, SizesArrayArg,
+        MapTypesArrayArg, BasePointers.size());
+
+    // Emit device ID if any.
+    llvm::Value *DeviceID = nullptr;
+    if (Device)
+      DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
+                                           CGF.Int32Ty, /*isSigned=*/true);
+    else
+      DeviceID = CGF.Builder.getInt32(OMP_DEVICEID_UNDEF);
+
+    // Emit the number of elements in the offloading arrays.
+    auto *PointerNum = CGF.Builder.getInt32(BasePointers.size());
+
+    llvm::Value *OffloadingArgs[] = {
+        DeviceID,         PointerNum,    BasePointersArrayArg,
+        PointersArrayArg, SizesArrayArg, MapTypesArrayArg};
+
+    auto &RT = CGF.CGM.getOpenMPRuntime();
+    // Select the right runtime function call for each expected standalone
+    // directive.
+    OpenMPRTLFunction RTLFn;
+    switch (D.getDirectiveKind()) {
+    default:
+      llvm_unreachable("Unexpected standalone target data directive.");
+      break;
+    case OMPD_target_enter_data:
+      RTLFn = OMPRTL__tgt_target_data_begin;
+      break;
+    case OMPD_target_exit_data:
+      RTLFn = OMPRTL__tgt_target_data_end;
+      break;
+    case OMPD_target_update:
+      RTLFn = OMPRTL__tgt_target_data_update;
+      break;
+    }
+    CGF.EmitRuntimeCall(RT.createRuntimeFunction(RTLFn), OffloadingArgs);
+  };
+
+  // In the event we get an if clause, we don't have to take any action on the
+  // else side.
+  auto &&ElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {};
+
+  if (IfCond) {
+    emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen);
+  } else {
+    RegionCodeGenTy ThenGenRCG(ThenGen);
+    ThenGenRCG(CGF);
+  }
+}
+
+namespace {
+  /// Kind of parameter in a function with 'declare simd' directive.
+  enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector };
+  /// Attribute set of the parameter.
+  struct ParamAttrTy {
+    ParamKindTy Kind = Vector;
+    llvm::APSInt StrideOrArg;
+    llvm::APSInt Alignment;
+  };
+} // namespace
+
+static unsigned evaluateCDTSize(const FunctionDecl *FD,
+                                ArrayRef<ParamAttrTy> ParamAttrs) {
+  // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
+  // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
+  // of that clause. The VLEN value must be power of 2.
+  // In other case the notion of the function`s "characteristic data type" (CDT)
+  // is used to compute the vector length.
+  // CDT is defined in the following order:
+  //   a) For non-void function, the CDT is the return type.
+  //   b) If the function has any non-uniform, non-linear parameters, then the
+  //   CDT is the type of the first such parameter.
+  //   c) If the CDT determined by a) or b) above is struct, union, or class
+  //   type which is pass-by-value (except for the type that maps to the
+  //   built-in complex data type), the characteristic data type is int.
+  //   d) If none of the above three cases is applicable, the CDT is int.
+  // The VLEN is then determined based on the CDT and the size of vector
+  // register of that ISA for which current vector version is generated. The
+  // VLEN is computed using the formula below:
+  //   VLEN  = sizeof(vector_register) / sizeof(CDT),
+  // where vector register size specified in section 3.2.1 Registers and the
+  // Stack Frame of original AMD64 ABI document.
+  QualType RetType = FD->getReturnType();
+  if (RetType.isNull())
+    return 0;
+  ASTContext &C = FD->getASTContext();
+  QualType CDT;
+  if (!RetType.isNull() && !RetType->isVoidType())
+    CDT = RetType;
+  else {
+    unsigned Offset = 0;
+    if (auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
+      if (ParamAttrs[Offset].Kind == Vector)
+        CDT = C.getPointerType(C.getRecordType(MD->getParent()));
+      ++Offset;
+    }
+    if (CDT.isNull()) {
+      for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
+        if (ParamAttrs[I + Offset].Kind == Vector) {
+          CDT = FD->getParamDecl(I)->getType();
+          break;
+        }
+      }
+    }
+  }
+  if (CDT.isNull())
+    CDT = C.IntTy;
+  CDT = CDT->getCanonicalTypeUnqualified();
+  if (CDT->isRecordType() || CDT->isUnionType())
+    CDT = C.IntTy;
+  return C.getTypeSize(CDT);
+}
+
+static void
+emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
+                           llvm::APSInt VLENVal,
+                           ArrayRef<ParamAttrTy> ParamAttrs,
+                           OMPDeclareSimdDeclAttr::BranchStateTy State) {
+  struct ISADataTy {
+    char ISA;
+    unsigned VecRegSize;
+  };
+  ISADataTy ISAData[] = {
+      {
+          'b', 128
+      }, // SSE
+      {
+          'c', 256
+      }, // AVX
+      {
+          'd', 256
+      }, // AVX2
+      {
+          'e', 512
+      }, // AVX512
+  };
+  llvm::SmallVector<char, 2> Masked;
+  switch (State) {
+  case OMPDeclareSimdDeclAttr::BS_Undefined:
+    Masked.push_back('N');
+    Masked.push_back('M');
+    break;
+  case OMPDeclareSimdDeclAttr::BS_Notinbranch:
+    Masked.push_back('N');
+    break;
+  case OMPDeclareSimdDeclAttr::BS_Inbranch:
+    Masked.push_back('M');
+    break;
+  }
+  for (auto Mask : Masked) {
+    for (auto &Data : ISAData) {
+      SmallString<256> Buffer;
+      llvm::raw_svector_ostream Out(Buffer);
+      Out << "_ZGV" << Data.ISA << Mask;
+      if (!VLENVal) {
+        Out << llvm::APSInt::getUnsigned(Data.VecRegSize /
+                                         evaluateCDTSize(FD, ParamAttrs));
+      } else
+        Out << VLENVal;
+      for (auto &ParamAttr : ParamAttrs) {
+        switch (ParamAttr.Kind){
+        case LinearWithVarStride:
+          Out << 's' << ParamAttr.StrideOrArg;
+          break;
+        case Linear:
+          Out << 'l';
+          if (!!ParamAttr.StrideOrArg)
+            Out << ParamAttr.StrideOrArg;
+          break;
+        case Uniform:
+          Out << 'u';
+          break;
+        case Vector:
+          Out << 'v';
+          break;
+        }
+        if (!!ParamAttr.Alignment)
+          Out << 'a' << ParamAttr.Alignment;
+      }
+      Out << '_' << Fn->getName();
+      Fn->addFnAttr(Out.str());
+    }
+  }
+}
+
+void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD,
+                                              llvm::Function *Fn) {
+  ASTContext &C = CGM.getContext();
+  FD = FD->getCanonicalDecl();
+  // Map params to their positions in function decl.
+  llvm::DenseMap<const Decl *, unsigned> ParamPositions;
+  if (isa<CXXMethodDecl>(FD))
+    ParamPositions.insert({FD, 0});
+  unsigned ParamPos = ParamPositions.size();
+  for (auto *P : FD->parameters()) {
+    ParamPositions.insert({P->getCanonicalDecl(), ParamPos});
+    ++ParamPos;
+  }
+  for (auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
+    llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
+    // Mark uniform parameters.
+    for (auto *E : Attr->uniforms()) {
+      E = E->IgnoreParenImpCasts();
+      unsigned Pos;
+      if (isa<CXXThisExpr>(E))
+        Pos = ParamPositions[FD];
+      else {
+        auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
+                        ->getCanonicalDecl();
+        Pos = ParamPositions[PVD];
+      }
+      ParamAttrs[Pos].Kind = Uniform;
+    }
+    // Get alignment info.
+    auto NI = Attr->alignments_begin();
+    for (auto *E : Attr->aligneds()) {
+      E = E->IgnoreParenImpCasts();
+      unsigned Pos;
+      QualType ParmTy;
+      if (isa<CXXThisExpr>(E)) {
+        Pos = ParamPositions[FD];
+        ParmTy = E->getType();
+      } else {
+        auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
+                        ->getCanonicalDecl();
+        Pos = ParamPositions[PVD];
+        ParmTy = PVD->getType();
+      }
+      ParamAttrs[Pos].Alignment =
+          (*NI) ? (*NI)->EvaluateKnownConstInt(C)
+                : llvm::APSInt::getUnsigned(
+                      C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy))
+                          .getQuantity());
+      ++NI;
+    }
+    // Mark linear parameters.
+    auto SI = Attr->steps_begin();
+    auto MI = Attr->modifiers_begin();
+    for (auto *E : Attr->linears()) {
+      E = E->IgnoreParenImpCasts();
+      unsigned Pos;
+      if (isa<CXXThisExpr>(E))
+        Pos = ParamPositions[FD];
+      else {
+        auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
+                        ->getCanonicalDecl();
+        Pos = ParamPositions[PVD];
+      }
+      auto &ParamAttr = ParamAttrs[Pos];
+      ParamAttr.Kind = Linear;
+      if (*SI) {
+        if (!(*SI)->EvaluateAsInt(ParamAttr.StrideOrArg, C,
+                                  Expr::SE_AllowSideEffects)) {
+          if (auto *DRE = cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
+            if (auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) {
+              ParamAttr.Kind = LinearWithVarStride;
+              ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(
+                  ParamPositions[StridePVD->getCanonicalDecl()]);
+            }
+          }
+        }
+      }
+      ++SI;
+      ++MI;
+    }
+    llvm::APSInt VLENVal;
+    if (const Expr *VLEN = Attr->getSimdlen())
+      VLENVal = VLEN->EvaluateKnownConstInt(C);
+    OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
+    if (CGM.getTriple().getArch() == llvm::Triple::x86 ||
+        CGM.getTriple().getArch() == llvm::Triple::x86_64)
+      emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
+  }
+}
+
+namespace {
+/// Cleanup action for doacross support.
+class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
+public:
+  static const int DoacrossFinArgs = 2;
+
+private:
+  llvm::Value *RTLFn;
+  llvm::Value *Args[DoacrossFinArgs];
+
+public:
+  DoacrossCleanupTy(llvm::Value *RTLFn, ArrayRef<llvm::Value *> CallArgs)
+      : RTLFn(RTLFn) {
+    assert(CallArgs.size() == DoacrossFinArgs);
+    std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
+  }
+  void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
+    if (!CGF.HaveInsertPoint())
+      return;
+    CGF.EmitRuntimeCall(RTLFn, Args);
+  }
+};
+} // namespace
+
+void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF,
+                                       const OMPLoopDirective &D) {
+  if (!CGF.HaveInsertPoint())
+    return;
+
+  ASTContext &C = CGM.getContext();
+  QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
+  RecordDecl *RD;
+  if (KmpDimTy.isNull()) {
+    // Build struct kmp_dim {  // loop bounds info casted to kmp_int64
+    //  kmp_int64 lo; // lower
+    //  kmp_int64 up; // upper
+    //  kmp_int64 st; // stride
+    // };
+    RD = C.buildImplicitRecord("kmp_dim");
+    RD->startDefinition();
+    addFieldToRecordDecl(C, RD, Int64Ty);
+    addFieldToRecordDecl(C, RD, Int64Ty);
+    addFieldToRecordDecl(C, RD, Int64Ty);
+    RD->completeDefinition();
+    KmpDimTy = C.getRecordType(RD);
+  } else
+    RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl());
+
+  Address DimsAddr = CGF.CreateMemTemp(KmpDimTy, "dims");
+  CGF.EmitNullInitialization(DimsAddr, KmpDimTy);
+  enum { LowerFD = 0, UpperFD, StrideFD };
+  // Fill dims with data.
+  LValue DimsLVal = CGF.MakeAddrLValue(DimsAddr, KmpDimTy);
+  // dims.upper = num_iterations;
+  LValue UpperLVal =
+      CGF.EmitLValueForField(DimsLVal, *std::next(RD->field_begin(), UpperFD));
+  llvm::Value *NumIterVal = CGF.EmitScalarConversion(
+      CGF.EmitScalarExpr(D.getNumIterations()), D.getNumIterations()->getType(),
+      Int64Ty, D.getNumIterations()->getExprLoc());
+  CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
+  // dims.stride = 1;
+  LValue StrideLVal =
+      CGF.EmitLValueForField(DimsLVal, *std::next(RD->field_begin(), StrideFD));
+  CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),
+                        StrideLVal);
+
+  // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
+  // kmp_int32 num_dims, struct kmp_dim * dims);
+  llvm::Value *Args[] = {emitUpdateLocation(CGF, D.getLocStart()),
+                         getThreadID(CGF, D.getLocStart()),
+                         llvm::ConstantInt::getSigned(CGM.Int32Ty, 1),
+                         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+                             DimsAddr.getPointer(), CGM.VoidPtrTy)};
+
+  llvm::Value *RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_init);
+  CGF.EmitRuntimeCall(RTLFn, Args);
+  llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
+      emitUpdateLocation(CGF, D.getLocEnd()), getThreadID(CGF, D.getLocEnd())};
+  llvm::Value *FiniRTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_fini);
+  CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
+                                             llvm::makeArrayRef(FiniArgs));
+}
+
+void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
+                                          const OMPDependClause *C) {
+  QualType Int64Ty =
+      CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
+  const Expr *CounterVal = C->getCounterValue();
+  assert(CounterVal);
+  llvm::Value *CntVal = CGF.EmitScalarConversion(CGF.EmitScalarExpr(CounterVal),
+                                                 CounterVal->getType(), Int64Ty,
+                                                 CounterVal->getExprLoc());
+  Address CntAddr = CGF.CreateMemTemp(Int64Ty, ".cnt.addr");
+  CGF.EmitStoreOfScalar(CntVal, CntAddr, /*Volatile=*/false, Int64Ty);
+  llvm::Value *Args[] = {emitUpdateLocation(CGF, C->getLocStart()),
+                         getThreadID(CGF, C->getLocStart()),
+                         CntAddr.getPointer()};
+  llvm::Value *RTLFn;
+  if (C->getDependencyKind() == OMPC_DEPEND_source)
+    RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_post);
+  else {
+    assert(C->getDependencyKind() == OMPC_DEPEND_sink);
+    RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_wait);
+  }
+  CGF.EmitRuntimeCall(RTLFn, Args);
+}
+
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntime.h b/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntime.h
index b3256375d9b6..270de8dd505e 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntime.h
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntime.h
@@ -14,18 +14,19 @@
 #ifndef LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIME_H
 #define LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIME_H
 
+#include "CGValue.h"
 #include "clang/AST/Type.h"
 #include "clang/Basic/OpenMPKinds.h"
 #include "clang/Basic/SourceLocation.h"
 #include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/StringMap.h"
+#include "llvm/IR/Function.h"
 #include "llvm/IR/ValueHandle.h"
 
 namespace llvm {
 class ArrayType;
 class Constant;
-class Function;
 class FunctionType;
 class GlobalVariable;
 class StructType;
@@ -36,222 +37,116 @@ class Value;
 namespace clang {
 class Expr;
 class GlobalDecl;
+class OMPDependClause;
 class OMPExecutableDirective;
+class OMPLoopDirective;
 class VarDecl;
+class OMPDeclareReductionDecl;
+class IdentifierInfo;
 
 namespace CodeGen {
 class Address;
 class CodeGenFunction;
 class CodeGenModule;
 
-typedef llvm::function_ref<void(CodeGenFunction &)> RegionCodeGenTy;
+/// A basic class for pre|post-action for advanced codegen sequence for OpenMP
+/// region.
+class PrePostActionTy {
+public:
+  explicit PrePostActionTy() {}
+  virtual void Enter(CodeGenFunction &CGF) {}
+  virtual void Exit(CodeGenFunction &CGF) {}
+  virtual ~PrePostActionTy() {}
+};
 
-class CGOpenMPRuntime {
-private:
-  enum OpenMPRTLFunction {
-    /// \brief Call to void __kmpc_fork_call(ident_t *loc, kmp_int32 argc,
-    /// kmpc_micro microtask, ...);
-    OMPRTL__kmpc_fork_call,
-    /// \brief Call to void *__kmpc_threadprivate_cached(ident_t *loc,
-    /// kmp_int32 global_tid, void *data, size_t size, void ***cache);
-    OMPRTL__kmpc_threadprivate_cached,
-    /// \brief Call to void __kmpc_threadprivate_register( ident_t *,
-    /// void *data, kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);
-    OMPRTL__kmpc_threadprivate_register,
-    // Call to __kmpc_int32 kmpc_global_thread_num(ident_t *loc);
-    OMPRTL__kmpc_global_thread_num,
-    // Call to void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
-    // kmp_critical_name *crit);
-    OMPRTL__kmpc_critical,
-    // Call to void __kmpc_critical_with_hint(ident_t *loc, kmp_int32
-    // global_tid, kmp_critical_name *crit, uintptr_t hint);
-    OMPRTL__kmpc_critical_with_hint,
-    // Call to void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
-    // kmp_critical_name *crit);
-    OMPRTL__kmpc_end_critical,
-    // Call to kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32
-    // global_tid);
-    OMPRTL__kmpc_cancel_barrier,
-    // Call to void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
-    OMPRTL__kmpc_barrier,
-    // Call to void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
-    OMPRTL__kmpc_for_static_fini,
-    // Call to void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
-    // global_tid);
-    OMPRTL__kmpc_serialized_parallel,
-    // Call to void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
-    // global_tid);
-    OMPRTL__kmpc_end_serialized_parallel,
-    // Call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
-    // kmp_int32 num_threads);
-    OMPRTL__kmpc_push_num_threads,
-    // Call to void __kmpc_flush(ident_t *loc);
-    OMPRTL__kmpc_flush,
-    // Call to kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid);
-    OMPRTL__kmpc_master,
-    // Call to void __kmpc_end_master(ident_t *, kmp_int32 global_tid);
-    OMPRTL__kmpc_end_master,
-    // Call to kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid,
-    // int end_part);
-    OMPRTL__kmpc_omp_taskyield,
-    // Call to kmp_int32 __kmpc_single(ident_t *, kmp_int32 global_tid);
-    OMPRTL__kmpc_single,
-    // Call to void __kmpc_end_single(ident_t *, kmp_int32 global_tid);
-    OMPRTL__kmpc_end_single,
-    // Call to kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
-    // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
-    // kmp_routine_entry_t *task_entry);
-    OMPRTL__kmpc_omp_task_alloc,
-    // Call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *
-    // new_task);
-    OMPRTL__kmpc_omp_task,
-    // Call to void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
-    // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *),
-    // kmp_int32 didit);
-    OMPRTL__kmpc_copyprivate,
-    // Call to kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid,
-    // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void
-    // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck);
-    OMPRTL__kmpc_reduce,
-    // Call to kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32
-    // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
-    // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name
-    // *lck);
-    OMPRTL__kmpc_reduce_nowait,
-    // Call to void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
-    // kmp_critical_name *lck);
-    OMPRTL__kmpc_end_reduce,
-    // Call to void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
-    // kmp_critical_name *lck);
-    OMPRTL__kmpc_end_reduce_nowait,
-    // Call to void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
-    // kmp_task_t * new_task);
-    OMPRTL__kmpc_omp_task_begin_if0,
-    // Call to void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
-    // kmp_task_t * new_task);
-    OMPRTL__kmpc_omp_task_complete_if0,
-    // Call to void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid);
-    OMPRTL__kmpc_ordered,
-    // Call to void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid);
-    OMPRTL__kmpc_end_ordered,
-    // Call to kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
-    // global_tid);
-    OMPRTL__kmpc_omp_taskwait,
-    // Call to void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid);
-    OMPRTL__kmpc_taskgroup,
-    // Call to void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid);
-    OMPRTL__kmpc_end_taskgroup,
-    // Call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
-    // int proc_bind);
-    OMPRTL__kmpc_push_proc_bind,
-    // Call to kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32
-    // gtid, kmp_task_t * new_task, kmp_int32 ndeps, kmp_depend_info_t
-    // *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
-    OMPRTL__kmpc_omp_task_with_deps,
-    // Call to void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32
-    // gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
-    // ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
-    OMPRTL__kmpc_omp_wait_deps,
-    // Call to kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
-    // global_tid, kmp_int32 cncl_kind);
-    OMPRTL__kmpc_cancellationpoint,
-    // Call to kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
-    // kmp_int32 cncl_kind);
-    OMPRTL__kmpc_cancel,
-
-    //
-    // Offloading related calls
-    //
-    // Call to int32_t __tgt_target(int32_t device_id, void *host_ptr, int32_t
-    // arg_num, void** args_base, void **args, size_t *arg_sizes, int32_t
-    // *arg_types);
-    OMPRTL__tgt_target,
-    // Call to void __tgt_register_lib(__tgt_bin_desc *desc);
-    OMPRTL__tgt_register_lib,
-    // Call to void __tgt_unregister_lib(__tgt_bin_desc *desc);
-    OMPRTL__tgt_unregister_lib,
-  };
+/// Class provides a way to call simple version of codegen for OpenMP region, or
+/// an advanced with possible pre|post-actions in codegen.
+class RegionCodeGenTy final {
+  intptr_t CodeGen;
+  typedef void (*CodeGenTy)(intptr_t, CodeGenFunction &, PrePostActionTy &);
+  CodeGenTy Callback;
+  mutable PrePostActionTy *PrePostAction;
+  RegionCodeGenTy() = delete;
+  RegionCodeGenTy &operator=(const RegionCodeGenTy &) = delete;
+  template <typename Callable>
+  static void CallbackFn(intptr_t CodeGen, CodeGenFunction &CGF,
+                         PrePostActionTy &Action) {
+    return (*reinterpret_cast<Callable *>(CodeGen))(CGF, Action);
+  }
 
-  /// \brief Values for bit flags used in the ident_t to describe the fields.
-  /// All enumeric elements are named and described in accordance with the code
-  /// from http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h
-  enum OpenMPLocationFlags {
-    /// \brief Use trampoline for internal microtask.
-    OMP_IDENT_IMD = 0x01,
-    /// \brief Use c-style ident structure.
-    OMP_IDENT_KMPC = 0x02,
-    /// \brief Atomic reduction option for kmpc_reduce.
-    OMP_ATOMIC_REDUCE = 0x10,
-    /// \brief Explicit 'barrier' directive.
-    OMP_IDENT_BARRIER_EXPL = 0x20,
-    /// \brief Implicit barrier in code.
-    OMP_IDENT_BARRIER_IMPL = 0x40,
-    /// \brief Implicit barrier in 'for' directive.
-    OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
-    /// \brief Implicit barrier in 'sections' directive.
-    OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
-    /// \brief Implicit barrier in 'single' directive.
-    OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140
-  };
+public:
+  template <typename Callable>
+  RegionCodeGenTy(
+      Callable &&CodeGen,
+      typename std::enable_if<
+          !std::is_same<typename std::remove_reference<Callable>::type,
+                        RegionCodeGenTy>::value>::type * = nullptr)
+      : CodeGen(reinterpret_cast<intptr_t>(&CodeGen)),
+        Callback(CallbackFn<typename std::remove_reference<Callable>::type>),
+        PrePostAction(nullptr) {}
+  void setAction(PrePostActionTy &Action) const { PrePostAction = &Action; }
+  void operator()(CodeGenFunction &CGF) const;
+};
+
+struct OMPTaskDataTy final {
+  SmallVector<const Expr *, 4> PrivateVars;
+  SmallVector<const Expr *, 4> PrivateCopies;
+  SmallVector<const Expr *, 4> FirstprivateVars;
+  SmallVector<const Expr *, 4> FirstprivateCopies;
+  SmallVector<const Expr *, 4> FirstprivateInits;
+  SmallVector<const Expr *, 4> LastprivateVars;
+  SmallVector<const Expr *, 4> LastprivateCopies;
+  SmallVector<std::pair<OpenMPDependClauseKind, const Expr *>, 4> Dependences;
+  llvm::PointerIntPair<llvm::Value *, 1, bool> Final;
+  llvm::PointerIntPair<llvm::Value *, 1, bool> Schedule;
+  llvm::PointerIntPair<llvm::Value *, 1, bool> Priority;
+  unsigned NumberOfParts = 0;
+  bool Tied = true;
+  bool Nogroup = false;
+};
+
+class CGOpenMPRuntime {
+protected:
   CodeGenModule &CGM;
+
+  /// \brief Creates offloading entry for the provided entry ID \a ID,
+  /// address \a Addr and size \a Size.
+  virtual void createOffloadEntry(llvm::Constant *ID, llvm::Constant *Addr,
+                                  uint64_t Size);
+
+  /// \brief Helper to emit outlined function for 'target' directive.
+  /// \param D Directive to emit.
+  /// \param ParentName Name of the function that encloses the target region.
+  /// \param OutlinedFn Outlined function value to be defined by this call.
+  /// \param OutlinedFnID Outlined function ID value to be defined by this call.
+  /// \param IsOffloadEntry True if the outlined function is an offload entry.
+  /// \param CodeGen Lambda codegen specific to an accelerator device.
+  /// An oulined function may not be an entry if, e.g. the if clause always
+  /// evaluates to false.
+  virtual void emitTargetOutlinedFunctionHelper(const OMPExecutableDirective &D,
+                                                StringRef ParentName,
+                                                llvm::Function *&OutlinedFn,
+                                                llvm::Constant *&OutlinedFnID,
+                                                bool IsOffloadEntry,
+                                                const RegionCodeGenTy &CodeGen);
+
+private:
   /// \brief Default const ident_t object used for initialization of all other
   /// ident_t objects.
-  llvm::Constant *DefaultOpenMPPSource;
+  llvm::Constant *DefaultOpenMPPSource = nullptr;
   /// \brief Map of flags and corresponding default locations.
   typedef llvm::DenseMap<unsigned, llvm::Value *> OpenMPDefaultLocMapTy;
   OpenMPDefaultLocMapTy OpenMPDefaultLocMap;
-  Address getOrCreateDefaultLocation(OpenMPLocationFlags Flags);
+  Address getOrCreateDefaultLocation(unsigned Flags);
 
-public:
-  /// \brief Describes ident structure that describes a source location.
-  /// All descriptions are taken from
-  /// http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h
-  /// Original structure:
-  /// typedef struct ident {
-  ///    kmp_int32 reserved_1;   /**<  might be used in Fortran;
-  ///                                  see above  */
-  ///    kmp_int32 flags;        /**<  also f.flags; KMP_IDENT_xxx flags;
-  ///                                  KMP_IDENT_KMPC identifies this union
-  ///                                  member  */
-  ///    kmp_int32 reserved_2;   /**<  not really used in Fortran any more;
-  ///                                  see above */
-  ///#if USE_ITT_BUILD
-  ///                            /*  but currently used for storing
-  ///                                region-specific ITT */
-  ///                            /*  contextual information. */
-  ///#endif /* USE_ITT_BUILD */
-  ///    kmp_int32 reserved_3;   /**< source[4] in Fortran, do not use for
-  ///                                 C++  */
-  ///    char const *psource;    /**< String describing the source location.
-  ///                            The string is composed of semi-colon separated
-  //                             fields which describe the source file,
-  ///                            the function and a pair of line numbers that
-  ///                            delimit the construct.
-  ///                             */
-  /// } ident_t;
-  enum IdentFieldIndex {
-    /// \brief might be used in Fortran
-    IdentField_Reserved_1,
-    /// \brief OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
-    IdentField_Flags,
-    /// \brief Not really used in Fortran any more
-    IdentField_Reserved_2,
-    /// \brief Source[4] in Fortran, do not use for C++
-    IdentField_Reserved_3,
-    /// \brief String describing the source location. The string is composed of
-    /// semi-colon separated fields which describe the source file, the function
-    /// and a pair of line numbers that delimit the construct.
-    IdentField_PSource
-  };
-private:
-  llvm::StructType *IdentTy;
+  llvm::StructType *IdentTy = nullptr;
   /// \brief Map for SourceLocation and OpenMP runtime library debug locations.
   typedef llvm::DenseMap<unsigned, llvm::Value *> OpenMPDebugLocMapTy;
   OpenMPDebugLocMapTy OpenMPDebugLocMap;
   /// \brief The type for a microtask which gets passed to __kmpc_fork_call().
   /// Original representation is:
   /// typedef void (kmpc_micro)(kmp_int32 global_tid, kmp_int32 bound_tid,...);
-  llvm::FunctionType *Kmpc_MicroTy;
+  llvm::FunctionType *Kmpc_MicroTy = nullptr;
   /// \brief Stores debug location and ThreadID for the function.
   struct DebugLocThreadIdTy {
     llvm::Value *DebugLoc;
@@ -261,6 +156,20 @@ private:
   typedef llvm::DenseMap<llvm::Function *, DebugLocThreadIdTy>
       OpenMPLocThreadIDMapTy;
   OpenMPLocThreadIDMapTy OpenMPLocThreadIDMap;
+  /// Map of UDRs and corresponding combiner/initializer.
+  typedef llvm::DenseMap<const OMPDeclareReductionDecl *,
+                         std::pair<llvm::Function *, llvm::Function *>>
+      UDRMapTy;
+  UDRMapTy UDRMap;
+  /// Map of functions and locally defined UDRs.
+  typedef llvm::DenseMap<llvm::Function *,
+                         SmallVector<const OMPDeclareReductionDecl *, 4>>
+      FunctionUDRMapTy;
+  FunctionUDRMapTy FunctionUDRMap;
+  IdentifierInfo *In = nullptr;
+  IdentifierInfo *Out = nullptr;
+  IdentifierInfo *Priv = nullptr;
+  IdentifierInfo *Orig = nullptr;
   /// \brief Type kmp_critical_name, originally defined as typedef kmp_int32
   /// kmp_critical_name[8];
   llvm::ArrayType *KmpCriticalNameTy;
@@ -272,7 +181,7 @@ private:
   llvm::StringMap<llvm::AssertingVH<llvm::Constant>, llvm::BumpPtrAllocator>
       InternalVars;
   /// \brief Type typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *);
-  llvm::Type *KmpRoutineEntryPtrTy;
+  llvm::Type *KmpRoutineEntryPtrTy = nullptr;
   QualType KmpRoutineEntryPtrQTy;
   /// \brief Type typedef struct kmp_task {
   ///    void *              shareds; /**< pointer to block of pointers to
@@ -293,6 +202,12 @@ private:
   ///    } flags;
   /// } kmp_depend_info_t;
   QualType KmpDependInfoTy;
+  /// struct kmp_dim {  // loop bounds info casted to kmp_int64
+  ///  kmp_int64 lo; // lower
+  ///  kmp_int64 up; // upper
+  ///  kmp_int64 st; // stride
+  /// };
+  QualType KmpDimTy;
   /// \brief Type struct __tgt_offload_entry{
   ///   void      *addr;       // Pointer to the offload entry info.
   ///                          // (function or global)
@@ -402,30 +317,27 @@ private:
     /// \brief Initialize target region entry.
     void initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
                                          StringRef ParentName, unsigned LineNum,
-                                         unsigned ColNum, unsigned Order);
+                                         unsigned Order);
     /// \brief Register target region entry.
     void registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
                                        StringRef ParentName, unsigned LineNum,
-                                       unsigned ColNum, llvm::Constant *Addr,
+                                       llvm::Constant *Addr,
                                        llvm::Constant *ID);
     /// \brief Return true if a target region entry with the provided
     /// information exists.
     bool hasTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
-                                  StringRef ParentName, unsigned LineNum,
-                                  unsigned ColNum) const;
+                                  StringRef ParentName, unsigned LineNum) const;
     /// brief Applies action \a Action on all registered entries.
     typedef llvm::function_ref<void(unsigned, unsigned, StringRef, unsigned,
-                                    unsigned, OffloadEntryInfoTargetRegion &)>
+                                    OffloadEntryInfoTargetRegion &)>
         OffloadTargetRegionEntryInfoActTy;
     void actOnTargetRegionEntriesInfo(
         const OffloadTargetRegionEntryInfoActTy &Action);
 
   private:
     // Storage for target region entries kind. The storage is to be indexed by
-    // file ID, device ID, parent function name, lane number, and column number.
+    // file ID, device ID, parent function name and line number.
     typedef llvm::DenseMap<unsigned, OffloadEntryInfoTargetRegion>
-        OffloadEntriesTargetRegionPerColumn;
-    typedef llvm::DenseMap<unsigned, OffloadEntriesTargetRegionPerColumn>
         OffloadEntriesTargetRegionPerLine;
     typedef llvm::StringMap<OffloadEntriesTargetRegionPerLine>
         OffloadEntriesTargetRegionPerParentName;
@@ -442,10 +354,6 @@ private:
   /// compilation unit. The function that does the registration is returned.
   llvm::Function *createOffloadingBinaryDescriptorRegistration();
 
-  /// \brief Creates offloading entry for the provided address \a Addr,
-  /// name \a Name and size \a Size.
-  void createOffloadEntry(llvm::Constant *Addr, StringRef Name, uint64_t Size);
-
   /// \brief Creates all the offload entries in the current compilation unit
   /// along with the associated metadata.
   void createOffloadEntriesAndInfoMetadata();
@@ -476,7 +384,7 @@ private:
   /// \param Flags Flags for OpenMP location.
   ///
   llvm::Value *emitUpdateLocation(CodeGenFunction &CGF, SourceLocation Loc,
-                                  OpenMPLocationFlags Flags = OMP_IDENT_KMPC);
+                                  unsigned Flags = 0);
 
   /// \brief Returns pointer to ident_t type.
   llvm::Type *getIdentTyPointerTy();
@@ -487,7 +395,7 @@ private:
   /// \brief Returns specified OpenMP runtime function.
   /// \param Function OpenMP runtime function.
   /// \return Specified function.
-  llvm::Constant *createRuntimeFunction(OpenMPRTLFunction Function);
+  llvm::Constant *createRuntimeFunction(unsigned Function);
 
   /// \brief Returns __kmpc_for_static_init_* runtime function for the specified
   /// size \a IVSize and sign \a IVSigned.
@@ -530,7 +438,7 @@ private:
                                               const llvm::Twine &Name);
 
   /// \brief Set of threadprivate variables with the generated initializer.
-  llvm::DenseSet<const VarDecl *> ThreadPrivateWithDefinition;
+  llvm::SmallPtrSet<const VarDecl *, 4> ThreadPrivateWithDefinition;
 
   /// \brief Emits initialization code for the threadprivate variables.
   /// \param VDAddr Address of the global variable \a VD.
@@ -549,11 +457,52 @@ private:
   ///
   llvm::Value *getCriticalRegionLock(StringRef CriticalName);
 
+  struct TaskResultTy {
+    llvm::Value *NewTask = nullptr;
+    llvm::Value *TaskEntry = nullptr;
+    llvm::Value *NewTaskNewTaskTTy = nullptr;
+    LValue TDBase;
+    RecordDecl *KmpTaskTQTyRD = nullptr;
+    llvm::Value *TaskDupFn = nullptr;
+  };
+  /// Emit task region for the task directive. The task region is emitted in
+  /// several steps:
+  /// 1. Emit a call to kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32
+  /// gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
+  /// kmp_routine_entry_t *task_entry). Here task_entry is a pointer to the
+  /// function:
+  /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
+  ///   TaskFunction(gtid, tt->part_id, tt->shareds);
+  ///   return 0;
+  /// }
+  /// 2. Copy a list of shared variables to field shareds of the resulting
+  /// structure kmp_task_t returned by the previous call (if any).
+  /// 3. Copy a pointer to destructions function to field destructions of the
+  /// resulting structure kmp_task_t.
+  /// \param D Current task directive.
+  /// \param TaskFunction An LLVM function with type void (*)(i32 /*gtid*/, i32
+  /// /*part_id*/, captured_struct */*__context*/);
+  /// \param SharedsTy A type which contains references the shared variables.
+  /// \param Shareds Context with the list of shared variables from the \p
+  /// TaskFunction.
+  /// \param Data Additional data for task generation like tiednsee, final
+  /// state, list of privates etc.
+  TaskResultTy emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
+                            const OMPExecutableDirective &D,
+                            llvm::Value *TaskFunction, QualType SharedsTy,
+                            Address Shareds, const OMPTaskDataTy &Data);
+
 public:
   explicit CGOpenMPRuntime(CodeGenModule &CGM);
   virtual ~CGOpenMPRuntime() {}
   virtual void clear();
 
+  /// Emit code for the specified user defined reduction construct.
+  virtual void emitUserDefinedReduction(CodeGenFunction *CGF,
+                                        const OMPDeclareReductionDecl *D);
+  /// Get combiner/initializer for the specified user-defined reduction, if any.
+  virtual std::pair<llvm::Function *, llvm::Function *>
+  getUserDefinedReduction(const OMPDeclareReductionDecl *D);
   /// \brief Emits outlined function for the specified OpenMP parallel directive
   /// \a D. This outlined function has type void(*)(kmp_int32 *ThreadID,
   /// kmp_int32 BoundID, struct context_vars*).
@@ -562,22 +511,30 @@ public:
   /// \param InnermostKind Kind of innermost directive (for simple directives it
   /// is a directive itself, for combined - its innermost directive).
   /// \param CodeGen Code generation sequence for the \a D directive.
-  virtual llvm::Value *emitParallelOutlinedFunction(
+  virtual llvm::Value *emitParallelOrTeamsOutlinedFunction(
       const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
       OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen);
 
   /// \brief Emits outlined function for the OpenMP task directive \a D. This
-  /// outlined function has type void(*)(kmp_int32 ThreadID, kmp_int32
-  /// PartID, struct context_vars*).
+  /// outlined function has type void(*)(kmp_int32 ThreadID, struct task_t*
+  /// TaskT).
   /// \param D OpenMP directive.
   /// \param ThreadIDVar Variable for thread id in the current OpenMP region.
+  /// \param PartIDVar Variable for partition id in the current OpenMP untied
+  /// task region.
+  /// \param TaskTVar Variable for task_t argument.
   /// \param InnermostKind Kind of innermost directive (for simple directives it
   /// is a directive itself, for combined - its innermost directive).
   /// \param CodeGen Code generation sequence for the \a D directive.
+  /// \param Tied true if task is generated for tied task, false otherwise.
+  /// \param NumberOfParts Number of parts in untied task. Ignored for tied
+  /// tasks.
   ///
   virtual llvm::Value *emitTaskOutlinedFunction(
       const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
-      OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen);
+      const VarDecl *PartIDVar, const VarDecl *TaskTVar,
+      OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
+      bool Tied, unsigned &NumberOfParts);
 
   /// \brief Cleans up references to the objects in finished function.
   ///
@@ -664,6 +621,14 @@ public:
   virtual bool isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
                                   bool Chunked) const;
 
+  /// \brief Check if the specified \a ScheduleKind is static non-chunked.
+  /// This kind of distribute directive is emitted without outer loop.
+  /// \param ScheduleKind Schedule kind specified in the 'dist_schedule' clause.
+  /// \param Chunked True if chunk is specified in the clause.
+  ///
+  virtual bool isStaticNonchunked(OpenMPDistScheduleClauseKind ScheduleKind,
+                                  bool Chunked) const;
+
   /// \brief Check if the specified \a ScheduleKind is dynamic.
   /// This kind of worksharing directive is emitted without outer loop.
   /// \param ScheduleKind Schedule Kind specified in the 'schedule' clause.
@@ -671,9 +636,9 @@ public:
   virtual bool isDynamic(OpenMPScheduleClauseKind ScheduleKind) const;
 
   virtual void emitForDispatchInit(CodeGenFunction &CGF, SourceLocation Loc,
-                                   OpenMPScheduleClauseKind SchedKind,
-                                   unsigned IVSize, bool IVSigned,
-                                   bool Ordered, llvm::Value *UB,
+                                   const OpenMPScheduleTy &ScheduleKind,
+                                   unsigned IVSize, bool IVSigned, bool Ordered,
+                                   llvm::Value *UB,
                                    llvm::Value *Chunk = nullptr);
 
   /// \brief Call the appropriate runtime routine to initialize it before start
@@ -685,7 +650,7 @@ public:
   ///
   /// \param CGF Reference to current CodeGenFunction.
   /// \param Loc Clang source location.
-  /// \param SchedKind Schedule kind, specified by the 'schedule' clause.
+  /// \param ScheduleKind Schedule kind, specified by the 'schedule' clause.
   /// \param IVSize Size of the iteration variable in bits.
   /// \param IVSigned Sign of the interation variable.
   /// \param Ordered true if loop is ordered, false otherwise.
@@ -701,12 +666,36 @@ public:
   /// For the default (nullptr) value, the chunk 1 will be used.
   ///
   virtual void emitForStaticInit(CodeGenFunction &CGF, SourceLocation Loc,
-                                 OpenMPScheduleClauseKind SchedKind,
+                                 const OpenMPScheduleTy &ScheduleKind,
                                  unsigned IVSize, bool IVSigned, bool Ordered,
-                                 Address IL, Address LB,
-                                 Address UB, Address ST,
+                                 Address IL, Address LB, Address UB, Address ST,
                                  llvm::Value *Chunk = nullptr);
 
+  ///
+  /// \param CGF Reference to current CodeGenFunction.
+  /// \param Loc Clang source location.
+  /// \param SchedKind Schedule kind, specified by the 'dist_schedule' clause.
+  /// \param IVSize Size of the iteration variable in bits.
+  /// \param IVSigned Sign of the interation variable.
+  /// \param Ordered true if loop is ordered, false otherwise.
+  /// \param IL Address of the output variable in which the flag of the
+  /// last iteration is returned.
+  /// \param LB Address of the output variable in which the lower iteration
+  /// number is returned.
+  /// \param UB Address of the output variable in which the upper iteration
+  /// number is returned.
+  /// \param ST Address of the output variable in which the stride value is
+  /// returned nesessary to generated the static_chunked scheduled loop.
+  /// \param Chunk Value of the chunk for the static_chunked scheduled loop.
+  /// For the default (nullptr) value, the chunk 1 will be used.
+  ///
+  virtual void emitDistributeStaticInit(CodeGenFunction &CGF, SourceLocation Loc,
+                                        OpenMPDistScheduleClauseKind SchedKind,
+                                        unsigned IVSize, bool IVSigned,
+                                        bool Ordered, Address IL, Address LB,
+                                        Address UB, Address ST,
+                                        llvm::Value *Chunk = nullptr);
+
   /// \brief Call the appropriate runtime routine to notify that we finished
   /// iteration of the ordered loop with the dynamic scheduling.
   ///
@@ -807,12 +796,6 @@ public:
   /// kmp_task_t *new_task), where new_task is a resulting structure from
   /// previous items.
   /// \param D Current task directive.
-  /// \param Tied true if the task is tied (the task is tied to the thread that
-  /// can suspend its task region), false - untied (the task is not tied to any
-  /// thread).
-  /// \param Final Contains either constant bool value, or llvm::Value * of i1
-  /// type for final clause. If the value is true, the task forces all of its
-  /// child tasks to become final and included tasks.
   /// \param TaskFunction An LLVM function with type void (*)(i32 /*gtid*/, i32
   /// /*part_id*/, captured_struct */*__context*/);
   /// \param SharedsTy A type which contains references the shared variables.
@@ -820,29 +803,47 @@ public:
   /// TaskFunction.
   /// \param IfCond Not a nullptr if 'if' clause was specified, nullptr
   /// otherwise.
-  /// \param PrivateVars List of references to private variables for the task
-  /// directive.
-  /// \param PrivateCopies List of private copies for each private variable in
-  /// \p PrivateVars.
-  /// \param FirstprivateVars List of references to private variables for the
-  /// task directive.
-  /// \param FirstprivateCopies List of private copies for each private variable
-  /// in \p FirstprivateVars.
-  /// \param FirstprivateInits List of references to auto generated variables
-  /// used for initialization of a single array element. Used if firstprivate
-  /// variable is of array type.
-  /// \param Dependences List of dependences for the 'task' construct, including
-  /// original expression and dependency type.
-  virtual void emitTaskCall(
-      CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D,
-      bool Tied, llvm::PointerIntPair<llvm::Value *, 1, bool> Final,
+  /// \param Data Additional data for task generation like tiednsee, final
+  /// state, list of privates etc.
+  virtual void emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
+                            const OMPExecutableDirective &D,
+                            llvm::Value *TaskFunction, QualType SharedsTy,
+                            Address Shareds, const Expr *IfCond,
+                            const OMPTaskDataTy &Data);
+
+  /// Emit task region for the taskloop directive. The taskloop region is
+  /// emitted in several steps:
+  /// 1. Emit a call to kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32
+  /// gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
+  /// kmp_routine_entry_t *task_entry). Here task_entry is a pointer to the
+  /// function:
+  /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
+  ///   TaskFunction(gtid, tt->part_id, tt->shareds);
+  ///   return 0;
+  /// }
+  /// 2. Copy a list of shared variables to field shareds of the resulting
+  /// structure kmp_task_t returned by the previous call (if any).
+  /// 3. Copy a pointer to destructions function to field destructions of the
+  /// resulting structure kmp_task_t.
+  /// 4. Emit a call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t
+  /// *task, int if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int
+  /// nogroup, int sched, kmp_uint64 grainsize, void *task_dup ), where new_task
+  /// is a resulting structure from
+  /// previous items.
+  /// \param D Current task directive.
+  /// \param TaskFunction An LLVM function with type void (*)(i32 /*gtid*/, i32
+  /// /*part_id*/, captured_struct */*__context*/);
+  /// \param SharedsTy A type which contains references the shared variables.
+  /// \param Shareds Context with the list of shared variables from the \p
+  /// TaskFunction.
+  /// \param IfCond Not a nullptr if 'if' clause was specified, nullptr
+  /// otherwise.
+  /// \param Data Additional data for task generation like tiednsee, final
+  /// state, list of privates etc.
+  virtual void emitTaskLoopCall(
+      CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D,
       llvm::Value *TaskFunction, QualType SharedsTy, Address Shareds,
-      const Expr *IfCond, ArrayRef<const Expr *> PrivateVars,
-      ArrayRef<const Expr *> PrivateCopies,
-      ArrayRef<const Expr *> FirstprivateVars,
-      ArrayRef<const Expr *> FirstprivateCopies,
-      ArrayRef<const Expr *> FirstprivateInits,
-      ArrayRef<std::pair<OpenMPDependClauseKind, const Expr *>> Dependences);
+      const Expr *IfCond, const OMPTaskDataTy &Data);
 
   /// \brief Emit code for the directive that does not require outlining.
   ///
@@ -926,13 +927,15 @@ public:
   /// \param OutlinedFn Outlined function value to be defined by this call.
   /// \param OutlinedFnID Outlined function ID value to be defined by this call.
   /// \param IsOffloadEntry True if the outlined function is an offload entry.
+  /// \param CodeGen Code generation sequence for the \a D directive.
   /// An oulined function may not be an entry if, e.g. the if clause always
   /// evaluates to false.
   virtual void emitTargetOutlinedFunction(const OMPExecutableDirective &D,
                                           StringRef ParentName,
                                           llvm::Function *&OutlinedFn,
                                           llvm::Constant *&OutlinedFnID,
-                                          bool IsOffloadEntry);
+                                          bool IsOffloadEntry,
+                                          const RegionCodeGenTy &CodeGen);
 
   /// \brief Emit the target offloading code associated with \a D. The emitted
   /// code attempts offloading the execution to the device, an the event of
@@ -972,6 +975,68 @@ public:
   /// was emitted in the current module and return the function that registers
   /// it.
   virtual llvm::Function *emitRegistrationFunction();
+
+  /// \brief Emits code for teams call of the \a OutlinedFn with
+  /// variables captured in a record which address is stored in \a
+  /// CapturedStruct.
+  /// \param OutlinedFn Outlined function to be run by team masters. Type of
+  /// this function is void(*)(kmp_int32 *, kmp_int32, struct context_vars*).
+  /// \param CapturedVars A pointer to the record with the references to
+  /// variables used in \a OutlinedFn function.
+  ///
+  virtual void emitTeamsCall(CodeGenFunction &CGF,
+                             const OMPExecutableDirective &D,
+                             SourceLocation Loc, llvm::Value *OutlinedFn,
+                             ArrayRef<llvm::Value *> CapturedVars);
+
+  /// \brief Emits call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32
+  /// global_tid, kmp_int32 num_teams, kmp_int32 thread_limit) to generate code
+  /// for num_teams clause.
+  /// \param NumTeams An integer expression of teams.
+  /// \param ThreadLimit An integer expression of threads.
+  virtual void emitNumTeamsClause(CodeGenFunction &CGF, const Expr *NumTeams,
+                                  const Expr *ThreadLimit, SourceLocation Loc);
+
+  /// \brief Emit the target data mapping code associated with \a D.
+  /// \param D Directive to emit.
+  /// \param IfCond Expression evaluated in if clause associated with the target
+  /// directive, or null if no if clause is used.
+  /// \param Device Expression evaluated in device clause associated with the
+  /// target directive, or null if no device clause is used.
+  /// \param CodeGen Function that emits the enclosed region.
+  virtual void emitTargetDataCalls(CodeGenFunction &CGF,
+                                   const OMPExecutableDirective &D,
+                                   const Expr *IfCond, const Expr *Device,
+                                   const RegionCodeGenTy &CodeGen);
+
+  /// \brief Emit the data mapping/movement code associated with the directive
+  /// \a D that should be of the form 'target [{enter|exit} data | update]'.
+  /// \param D Directive to emit.
+  /// \param IfCond Expression evaluated in if clause associated with the target
+  /// directive, or null if no if clause is used.
+  /// \param Device Expression evaluated in device clause associated with the
+  /// target directive, or null if no device clause is used.
+  virtual void emitTargetDataStandAloneCall(CodeGenFunction &CGF,
+                                            const OMPExecutableDirective &D,
+                                            const Expr *IfCond,
+                                            const Expr *Device);
+
+  /// Marks function \a Fn with properly mangled versions of vector functions.
+  /// \param FD Function marked as 'declare simd'.
+  /// \param Fn LLVM function that must be marked with 'declare simd'
+  /// attributes.
+  virtual void emitDeclareSimdFunction(const FunctionDecl *FD,
+                                       llvm::Function *Fn);
+
+  /// Emit initialization for doacross loop nesting support.
+  /// \param D Loop-based construct used in doacross nesting construct.
+  virtual void emitDoacrossInit(CodeGenFunction &CGF,
+                                const OMPLoopDirective &D);
+
+  /// Emit code for doacross ordered directive with 'depend' clause.
+  /// \param C 'depend' clause with 'sink|source' dependency kind.
+  virtual void emitDoacrossOrdered(CodeGenFunction &CGF,
+                                   const OMPDependClause *C);
 };
 
 } // namespace CodeGen
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
new file mode 100644
index 000000000000..d64f6df72012
--- /dev/null
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
@@ -0,0 +1,396 @@
+//===---- CGOpenMPRuntimeNVPTX.cpp - Interface to OpenMP NVPTX Runtimes ---===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This provides a class for OpenMP runtime code generation specialized to NVPTX
+// targets.
+//
+//===----------------------------------------------------------------------===//
+
+#include "CGOpenMPRuntimeNVPTX.h"
+#include "clang/AST/DeclOpenMP.h"
+#include "CodeGenFunction.h"
+#include "clang/AST/StmtOpenMP.h"
+
+using namespace clang;
+using namespace CodeGen;
+
+/// \brief Get the GPU warp size.
+llvm::Value *CGOpenMPRuntimeNVPTX::getNVPTXWarpSize(CodeGenFunction &CGF) {
+  CGBuilderTy &Bld = CGF.Builder;
+  return Bld.CreateCall(
+      llvm::Intrinsic::getDeclaration(
+          &CGM.getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_warpsize),
+      llvm::None, "nvptx_warp_size");
+}
+
+/// \brief Get the id of the current thread on the GPU.
+llvm::Value *CGOpenMPRuntimeNVPTX::getNVPTXThreadID(CodeGenFunction &CGF) {
+  CGBuilderTy &Bld = CGF.Builder;
+  return Bld.CreateCall(
+      llvm::Intrinsic::getDeclaration(
+          &CGM.getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_tid_x),
+      llvm::None, "nvptx_tid");
+}
+
+// \brief Get the maximum number of threads in a block of the GPU.
+llvm::Value *CGOpenMPRuntimeNVPTX::getNVPTXNumThreads(CodeGenFunction &CGF) {
+  CGBuilderTy &Bld = CGF.Builder;
+  return Bld.CreateCall(
+      llvm::Intrinsic::getDeclaration(
+          &CGM.getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_ntid_x),
+      llvm::None, "nvptx_num_threads");
+}
+
+/// \brief Get barrier to synchronize all threads in a block.
+void CGOpenMPRuntimeNVPTX::getNVPTXCTABarrier(CodeGenFunction &CGF) {
+  CGBuilderTy &Bld = CGF.Builder;
+  Bld.CreateCall(llvm::Intrinsic::getDeclaration(
+      &CGM.getModule(), llvm::Intrinsic::nvvm_barrier0));
+}
+
+// \brief Synchronize all GPU threads in a block.
+void CGOpenMPRuntimeNVPTX::syncCTAThreads(CodeGenFunction &CGF) {
+  getNVPTXCTABarrier(CGF);
+}
+
+/// \brief Get the thread id of the OMP master thread.
+/// The master thread id is the first thread (lane) of the last warp in the
+/// GPU block.  Warp size is assumed to be some power of 2.
+/// Thread id is 0 indexed.
+/// E.g: If NumThreads is 33, master id is 32.
+///      If NumThreads is 64, master id is 32.
+///      If NumThreads is 1024, master id is 992.
+llvm::Value *CGOpenMPRuntimeNVPTX::getMasterThreadID(CodeGenFunction &CGF) {
+  CGBuilderTy &Bld = CGF.Builder;
+  llvm::Value *NumThreads = getNVPTXNumThreads(CGF);
+
+  // We assume that the warp size is a power of 2.
+  llvm::Value *Mask = Bld.CreateSub(getNVPTXWarpSize(CGF), Bld.getInt32(1));
+
+  return Bld.CreateAnd(Bld.CreateSub(NumThreads, Bld.getInt32(1)),
+                       Bld.CreateNot(Mask), "master_tid");
+}
+
+namespace {
+enum OpenMPRTLFunctionNVPTX {
+  /// \brief Call to void __kmpc_kernel_init(kmp_int32 omp_handle,
+  /// kmp_int32 thread_limit);
+  OMPRTL_NVPTX__kmpc_kernel_init,
+};
+
+// NVPTX Address space
+enum ADDRESS_SPACE {
+  ADDRESS_SPACE_SHARED = 3,
+};
+} // namespace
+
+CGOpenMPRuntimeNVPTX::WorkerFunctionState::WorkerFunctionState(
+    CodeGenModule &CGM)
+    : WorkerFn(nullptr), CGFI(nullptr) {
+  createWorkerFunction(CGM);
+}
+
+void CGOpenMPRuntimeNVPTX::WorkerFunctionState::createWorkerFunction(
+    CodeGenModule &CGM) {
+  // Create an worker function with no arguments.
+  CGFI = &CGM.getTypes().arrangeNullaryFunction();
+
+  WorkerFn = llvm::Function::Create(
+      CGM.getTypes().GetFunctionType(*CGFI), llvm::GlobalValue::InternalLinkage,
+      /* placeholder */ "_worker", &CGM.getModule());
+  CGM.SetInternalFunctionAttributes(/*D=*/nullptr, WorkerFn, *CGFI);
+  WorkerFn->setLinkage(llvm::GlobalValue::InternalLinkage);
+  WorkerFn->addFnAttr(llvm::Attribute::NoInline);
+}
+
+void CGOpenMPRuntimeNVPTX::initializeEnvironment() {
+  //
+  // Initialize master-worker control state in shared memory.
+  //
+
+  auto DL = CGM.getDataLayout();
+  ActiveWorkers = new llvm::GlobalVariable(
+      CGM.getModule(), CGM.Int32Ty, /*isConstant=*/false,
+      llvm::GlobalValue::CommonLinkage,
+      llvm::Constant::getNullValue(CGM.Int32Ty), "__omp_num_threads", 0,
+      llvm::GlobalVariable::NotThreadLocal, ADDRESS_SPACE_SHARED);
+  ActiveWorkers->setAlignment(DL.getPrefTypeAlignment(CGM.Int32Ty));
+
+  WorkID = new llvm::GlobalVariable(
+      CGM.getModule(), CGM.Int64Ty, /*isConstant=*/false,
+      llvm::GlobalValue::CommonLinkage,
+      llvm::Constant::getNullValue(CGM.Int64Ty), "__tgt_work_id", 0,
+      llvm::GlobalVariable::NotThreadLocal, ADDRESS_SPACE_SHARED);
+  WorkID->setAlignment(DL.getPrefTypeAlignment(CGM.Int64Ty));
+}
+
+void CGOpenMPRuntimeNVPTX::emitWorkerFunction(WorkerFunctionState &WST) {
+  auto &Ctx = CGM.getContext();
+
+  CodeGenFunction CGF(CGM, /*suppressNewContext=*/true);
+  CGF.StartFunction(GlobalDecl(), Ctx.VoidTy, WST.WorkerFn, *WST.CGFI, {});
+  emitWorkerLoop(CGF, WST);
+  CGF.FinishFunction();
+}
+
+void CGOpenMPRuntimeNVPTX::emitWorkerLoop(CodeGenFunction &CGF,
+                                          WorkerFunctionState &WST) {
+  //
+  // The workers enter this loop and wait for parallel work from the master.
+  // When the master encounters a parallel region it sets up the work + variable
+  // arguments, and wakes up the workers.  The workers first check to see if
+  // they are required for the parallel region, i.e., within the # of requested
+  // parallel threads.  The activated workers load the variable arguments and
+  // execute the parallel work.
+  //
+
+  CGBuilderTy &Bld = CGF.Builder;
+
+  llvm::BasicBlock *AwaitBB = CGF.createBasicBlock(".await.work");
+  llvm::BasicBlock *SelectWorkersBB = CGF.createBasicBlock(".select.workers");
+  llvm::BasicBlock *ExecuteBB = CGF.createBasicBlock(".execute.parallel");
+  llvm::BasicBlock *TerminateBB = CGF.createBasicBlock(".terminate.parallel");
+  llvm::BasicBlock *BarrierBB = CGF.createBasicBlock(".barrier.parallel");
+  llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".exit");
+
+  CGF.EmitBranch(AwaitBB);
+
+  // Workers wait for work from master.
+  CGF.EmitBlock(AwaitBB);
+  // Wait for parallel work
+  syncCTAThreads(CGF);
+  // On termination condition (workid == 0), exit loop.
+  llvm::Value *ShouldTerminate = Bld.CreateICmpEQ(
+      Bld.CreateAlignedLoad(WorkID, WorkID->getAlignment()),
+      llvm::Constant::getNullValue(WorkID->getType()->getElementType()),
+      "should_terminate");
+  Bld.CreateCondBr(ShouldTerminate, ExitBB, SelectWorkersBB);
+
+  // Activate requested workers.
+  CGF.EmitBlock(SelectWorkersBB);
+  llvm::Value *ThreadID = getNVPTXThreadID(CGF);
+  llvm::Value *ActiveThread = Bld.CreateICmpSLT(
+      ThreadID,
+      Bld.CreateAlignedLoad(ActiveWorkers, ActiveWorkers->getAlignment()),
+      "active_thread");
+  Bld.CreateCondBr(ActiveThread, ExecuteBB, BarrierBB);
+
+  // Signal start of parallel region.
+  CGF.EmitBlock(ExecuteBB);
+  // TODO: Add parallel work.
+
+  // Signal end of parallel region.
+  CGF.EmitBlock(TerminateBB);
+  CGF.EmitBranch(BarrierBB);
+
+  // All active and inactive workers wait at a barrier after parallel region.
+  CGF.EmitBlock(BarrierBB);
+  // Barrier after parallel region.
+  syncCTAThreads(CGF);
+  CGF.EmitBranch(AwaitBB);
+
+  // Exit target region.
+  CGF.EmitBlock(ExitBB);
+}
+
+// Setup NVPTX threads for master-worker OpenMP scheme.
+void CGOpenMPRuntimeNVPTX::emitEntryHeader(CodeGenFunction &CGF,
+                                           EntryFunctionState &EST,
+                                           WorkerFunctionState &WST) {
+  CGBuilderTy &Bld = CGF.Builder;
+
+  // Get the master thread id.
+  llvm::Value *MasterID = getMasterThreadID(CGF);
+  // Current thread's identifier.
+  llvm::Value *ThreadID = getNVPTXThreadID(CGF);
+
+  // Setup BBs in entry function.
+  llvm::BasicBlock *WorkerCheckBB = CGF.createBasicBlock(".check.for.worker");
+  llvm::BasicBlock *WorkerBB = CGF.createBasicBlock(".worker");
+  llvm::BasicBlock *MasterBB = CGF.createBasicBlock(".master");
+  EST.ExitBB = CGF.createBasicBlock(".exit");
+
+  // The head (master thread) marches on while its body of companion threads in
+  // the warp go to sleep.
+  llvm::Value *ShouldDie =
+      Bld.CreateICmpUGT(ThreadID, MasterID, "excess_in_master_warp");
+  Bld.CreateCondBr(ShouldDie, EST.ExitBB, WorkerCheckBB);
+
+  // Select worker threads...
+  CGF.EmitBlock(WorkerCheckBB);
+  llvm::Value *IsWorker = Bld.CreateICmpULT(ThreadID, MasterID, "is_worker");
+  Bld.CreateCondBr(IsWorker, WorkerBB, MasterBB);
+
+  // ... and send to worker loop, awaiting parallel invocation.
+  CGF.EmitBlock(WorkerBB);
+  CGF.EmitCallOrInvoke(WST.WorkerFn, llvm::None);
+  CGF.EmitBranch(EST.ExitBB);
+
+  // Only master thread executes subsequent serial code.
+  CGF.EmitBlock(MasterBB);
+
+  // First action in sequential region:
+  // Initialize the state of the OpenMP runtime library on the GPU.
+  llvm::Value *Args[] = {Bld.getInt32(/*OmpHandle=*/0), getNVPTXThreadID(CGF)};
+  CGF.EmitRuntimeCall(createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_init),
+                      Args);
+}
+
+void CGOpenMPRuntimeNVPTX::emitEntryFooter(CodeGenFunction &CGF,
+                                           EntryFunctionState &EST) {
+  CGBuilderTy &Bld = CGF.Builder;
+  llvm::BasicBlock *TerminateBB = CGF.createBasicBlock(".termination.notifier");
+  CGF.EmitBranch(TerminateBB);
+
+  CGF.EmitBlock(TerminateBB);
+  // Signal termination condition.
+  Bld.CreateAlignedStore(
+      llvm::Constant::getNullValue(WorkID->getType()->getElementType()), WorkID,
+      WorkID->getAlignment());
+  // Barrier to terminate worker threads.
+  syncCTAThreads(CGF);
+  // Master thread jumps to exit point.
+  CGF.EmitBranch(EST.ExitBB);
+
+  CGF.EmitBlock(EST.ExitBB);
+}
+
+/// \brief Returns specified OpenMP runtime function for the current OpenMP
+/// implementation.  Specialized for the NVPTX device.
+/// \param Function OpenMP runtime function.
+/// \return Specified function.
+llvm::Constant *
+CGOpenMPRuntimeNVPTX::createNVPTXRuntimeFunction(unsigned Function) {
+  llvm::Constant *RTLFn = nullptr;
+  switch (static_cast<OpenMPRTLFunctionNVPTX>(Function)) {
+  case OMPRTL_NVPTX__kmpc_kernel_init: {
+    // Build void __kmpc_kernel_init(kmp_int32 omp_handle,
+    // kmp_int32 thread_limit);
+    llvm::Type *TypeParams[] = {CGM.Int32Ty, CGM.Int32Ty};
+    llvm::FunctionType *FnTy =
+        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
+    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_kernel_init");
+    break;
+  }
+  }
+  return RTLFn;
+}
+
+void CGOpenMPRuntimeNVPTX::createOffloadEntry(llvm::Constant *ID,
+                                              llvm::Constant *Addr,
+                                              uint64_t Size) {
+  auto *F = dyn_cast<llvm::Function>(Addr);
+  // TODO: Add support for global variables on the device after declare target
+  // support.
+  if (!F)
+    return;
+  llvm::Module *M = F->getParent();
+  llvm::LLVMContext &Ctx = M->getContext();
+
+  // Get "nvvm.annotations" metadata node
+  llvm::NamedMDNode *MD = M->getOrInsertNamedMetadata("nvvm.annotations");
+
+  llvm::Metadata *MDVals[] = {
+      llvm::ConstantAsMetadata::get(F), llvm::MDString::get(Ctx, "kernel"),
+      llvm::ConstantAsMetadata::get(
+          llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), 1))};
+  // Append metadata to nvvm.annotations
+  MD->addOperand(llvm::MDNode::get(Ctx, MDVals));
+}
+
+void CGOpenMPRuntimeNVPTX::emitTargetOutlinedFunction(
+    const OMPExecutableDirective &D, StringRef ParentName,
+    llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
+    bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
+  if (!IsOffloadEntry) // Nothing to do.
+    return;
+
+  assert(!ParentName.empty() && "Invalid target region parent name!");
+
+  EntryFunctionState EST;
+  WorkerFunctionState WST(CGM);
+
+  // Emit target region as a standalone region.
+  class NVPTXPrePostActionTy : public PrePostActionTy {
+    CGOpenMPRuntimeNVPTX &RT;
+    CGOpenMPRuntimeNVPTX::EntryFunctionState &EST;
+    CGOpenMPRuntimeNVPTX::WorkerFunctionState &WST;
+
+  public:
+    NVPTXPrePostActionTy(CGOpenMPRuntimeNVPTX &RT,
+                         CGOpenMPRuntimeNVPTX::EntryFunctionState &EST,
+                         CGOpenMPRuntimeNVPTX::WorkerFunctionState &WST)
+        : RT(RT), EST(EST), WST(WST) {}
+    void Enter(CodeGenFunction &CGF) override {
+      RT.emitEntryHeader(CGF, EST, WST);
+    }
+    void Exit(CodeGenFunction &CGF) override { RT.emitEntryFooter(CGF, EST); }
+  } Action(*this, EST, WST);
+  CodeGen.setAction(Action);
+  emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
+                                   IsOffloadEntry, CodeGen);
+
+  // Create the worker function
+  emitWorkerFunction(WST);
+
+  // Now change the name of the worker function to correspond to this target
+  // region's entry function.
+  WST.WorkerFn->setName(OutlinedFn->getName() + "_worker");
+}
+
+CGOpenMPRuntimeNVPTX::CGOpenMPRuntimeNVPTX(CodeGenModule &CGM)
+    : CGOpenMPRuntime(CGM), ActiveWorkers(nullptr), WorkID(nullptr) {
+  if (!CGM.getLangOpts().OpenMPIsDevice)
+    llvm_unreachable("OpenMP NVPTX can only handle device code.");
+
+  // Called once per module during initialization.
+  initializeEnvironment();
+}
+
+void CGOpenMPRuntimeNVPTX::emitNumTeamsClause(CodeGenFunction &CGF,
+                                              const Expr *NumTeams,
+                                              const Expr *ThreadLimit,
+                                              SourceLocation Loc) {}
+
+llvm::Value *CGOpenMPRuntimeNVPTX::emitParallelOrTeamsOutlinedFunction(
+    const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
+    OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
+
+  llvm::Function *OutlinedFun = nullptr;
+  if (isa<OMPTeamsDirective>(D)) {
+    llvm::Value *OutlinedFunVal =
+        CGOpenMPRuntime::emitParallelOrTeamsOutlinedFunction(
+            D, ThreadIDVar, InnermostKind, CodeGen);
+    OutlinedFun = cast<llvm::Function>(OutlinedFunVal);
+    OutlinedFun->addFnAttr(llvm::Attribute::AlwaysInline);
+  } else
+    llvm_unreachable("parallel directive is not yet supported for nvptx "
+                     "backend.");
+
+  return OutlinedFun;
+}
+
+void CGOpenMPRuntimeNVPTX::emitTeamsCall(CodeGenFunction &CGF,
+                                         const OMPExecutableDirective &D,
+                                         SourceLocation Loc,
+                                         llvm::Value *OutlinedFn,
+                                         ArrayRef<llvm::Value *> CapturedVars) {
+  if (!CGF.HaveInsertPoint())
+    return;
+
+  Address ZeroAddr =
+      CGF.CreateTempAlloca(CGF.Int32Ty, CharUnits::fromQuantity(4),
+                           /*Name*/ ".zero.addr");
+  CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
+  llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
+  OutlinedFnArgs.push_back(ZeroAddr.getPointer());
+  OutlinedFnArgs.push_back(ZeroAddr.getPointer());
+  OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
+  CGF.EmitCallOrInvoke(OutlinedFn, OutlinedFnArgs);
+}
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h b/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h
new file mode 100644
index 000000000000..a6c64b2f6d67
--- /dev/null
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h
@@ -0,0 +1,179 @@
+//===----- CGOpenMPRuntimeNVPTX.h - Interface to OpenMP NVPTX Runtimes ----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This provides a class for OpenMP runtime code generation specialized to NVPTX
+// targets.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIMENVPTX_H
+#define LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIMENVPTX_H
+
+#include "CGOpenMPRuntime.h"
+#include "CodeGenFunction.h"
+#include "clang/AST/StmtOpenMP.h"
+#include "llvm/IR/CallSite.h"
+
+namespace clang {
+namespace CodeGen {
+
+class CGOpenMPRuntimeNVPTX : public CGOpenMPRuntime {
+public:
+  class EntryFunctionState {
+  public:
+    llvm::BasicBlock *ExitBB;
+
+    EntryFunctionState() : ExitBB(nullptr){};
+  };
+
+  class WorkerFunctionState {
+  public:
+    llvm::Function *WorkerFn;
+    const CGFunctionInfo *CGFI;
+
+    WorkerFunctionState(CodeGenModule &CGM);
+
+  private:
+    void createWorkerFunction(CodeGenModule &CGM);
+  };
+
+  /// \brief Helper for target entry function. Guide the master and worker
+  /// threads to their respective locations.
+  void emitEntryHeader(CodeGenFunction &CGF, EntryFunctionState &EST,
+                       WorkerFunctionState &WST);
+
+  /// \brief Signal termination of OMP execution.
+  void emitEntryFooter(CodeGenFunction &CGF, EntryFunctionState &EST);
+
+private:
+  //
+  // NVPTX calls.
+  //
+
+  /// \brief Get the GPU warp size.
+  llvm::Value *getNVPTXWarpSize(CodeGenFunction &CGF);
+
+  /// \brief Get the id of the current thread on the GPU.
+  llvm::Value *getNVPTXThreadID(CodeGenFunction &CGF);
+
+  // \brief Get the maximum number of threads in a block of the GPU.
+  llvm::Value *getNVPTXNumThreads(CodeGenFunction &CGF);
+
+  /// \brief Get barrier to synchronize all threads in a block.
+  void getNVPTXCTABarrier(CodeGenFunction &CGF);
+
+  // \brief Synchronize all GPU threads in a block.
+  void syncCTAThreads(CodeGenFunction &CGF);
+
+  //
+  // OMP calls.
+  //
+
+  /// \brief Get the thread id of the OMP master thread.
+  /// The master thread id is the first thread (lane) of the last warp in the
+  /// GPU block.  Warp size is assumed to be some power of 2.
+  /// Thread id is 0 indexed.
+  /// E.g: If NumThreads is 33, master id is 32.
+  ///      If NumThreads is 64, master id is 32.
+  ///      If NumThreads is 1024, master id is 992.
+  llvm::Value *getMasterThreadID(CodeGenFunction &CGF);
+
+  //
+  // Private state and methods.
+  //
+
+  // Master-worker control state.
+  // Number of requested OMP threads in parallel region.
+  llvm::GlobalVariable *ActiveWorkers;
+  // Outlined function for the workers to execute.
+  llvm::GlobalVariable *WorkID;
+
+  /// \brief Initialize master-worker control state.
+  void initializeEnvironment();
+
+  /// \brief Emit the worker function for the current target region.
+  void emitWorkerFunction(WorkerFunctionState &WST);
+
+  /// \brief Helper for worker function. Emit body of worker loop.
+  void emitWorkerLoop(CodeGenFunction &CGF, WorkerFunctionState &WST);
+
+  /// \brief Returns specified OpenMP runtime function for the current OpenMP
+  /// implementation.  Specialized for the NVPTX device.
+  /// \param Function OpenMP runtime function.
+  /// \return Specified function.
+  llvm::Constant *createNVPTXRuntimeFunction(unsigned Function);
+
+  //
+  // Base class overrides.
+  //
+
+  /// \brief Creates offloading entry for the provided entry ID \a ID,
+  /// address \a Addr and size \a Size.
+  void createOffloadEntry(llvm::Constant *ID, llvm::Constant *Addr,
+                          uint64_t Size) override;
+
+  /// \brief Emit outlined function for 'target' directive on the NVPTX
+  /// device.
+  /// \param D Directive to emit.
+  /// \param ParentName Name of the function that encloses the target region.
+  /// \param OutlinedFn Outlined function value to be defined by this call.
+  /// \param OutlinedFnID Outlined function ID value to be defined by this call.
+  /// \param IsOffloadEntry True if the outlined function is an offload entry.
+  /// An outlined function may not be an entry if, e.g. the if clause always
+  /// evaluates to false.
+  void emitTargetOutlinedFunction(const OMPExecutableDirective &D,
+                                  StringRef ParentName,
+                                  llvm::Function *&OutlinedFn,
+                                  llvm::Constant *&OutlinedFnID,
+                                  bool IsOffloadEntry,
+                                  const RegionCodeGenTy &CodeGen) override;
+
+public:
+  explicit CGOpenMPRuntimeNVPTX(CodeGenModule &CGM);
+
+  /// \brief This function ought to emit, in the general case, a call to
+  // the openmp runtime kmpc_push_num_teams. In NVPTX backend it is not needed
+  // as these numbers are obtained through the PTX grid and block configuration.
+  /// \param NumTeams An integer expression of teams.
+  /// \param ThreadLimit An integer expression of threads.
+  void emitNumTeamsClause(CodeGenFunction &CGF, const Expr *NumTeams,
+                          const Expr *ThreadLimit, SourceLocation Loc) override;
+
+  /// \brief Emits inlined function for the specified OpenMP parallel
+  //  directive but an inlined function for teams.
+  /// \a D. This outlined function has type void(*)(kmp_int32 *ThreadID,
+  /// kmp_int32 BoundID, struct context_vars*).
+  /// \param D OpenMP directive.
+  /// \param ThreadIDVar Variable for thread id in the current OpenMP region.
+  /// \param InnermostKind Kind of innermost directive (for simple directives it
+  /// is a directive itself, for combined - its innermost directive).
+  /// \param CodeGen Code generation sequence for the \a D directive.
+  llvm::Value *
+  emitParallelOrTeamsOutlinedFunction(const OMPExecutableDirective &D,
+                                      const VarDecl *ThreadIDVar,
+                                      OpenMPDirectiveKind InnermostKind,
+                                      const RegionCodeGenTy &CodeGen) override;
+
+  /// \brief Emits code for teams call of the \a OutlinedFn with
+  /// variables captured in a record which address is stored in \a
+  /// CapturedStruct.
+  /// \param OutlinedFn Outlined function to be run by team masters. Type of
+  /// this function is void(*)(kmp_int32 *, kmp_int32, struct context_vars*).
+  /// \param CapturedVars A pointer to the record with the references to
+  /// variables used in \a OutlinedFn function.
+  ///
+  void emitTeamsCall(CodeGenFunction &CGF, const OMPExecutableDirective &D,
+                     SourceLocation Loc, llvm::Value *OutlinedFn,
+                     ArrayRef<llvm::Value *> CapturedVars) override;
+};
+
+} // CodeGen namespace.
+} // clang namespace.
+
+#endif // LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIMENVPTX_H
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGRecordLayout.h b/contrib/llvm/tools/clang/lib/CodeGen/CGRecordLayout.h
index d4ad33e3345e..7b9c27d1d772 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGRecordLayout.h
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGRecordLayout.h
@@ -11,7 +11,7 @@
 #define LLVM_CLANG_LIB_CODEGEN_CGRECORDLAYOUT_H
 
 #include "clang/AST/CharUnits.h"
-#include "clang/AST/Decl.h"
+#include "clang/AST/DeclCXX.h"
 #include "clang/Basic/LLVM.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/IR/DerivedTypes.h"
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGRecordLayoutBuilder.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGRecordLayoutBuilder.cpp
index 375b59c5cb33..7d530a278fbf 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGRecordLayoutBuilder.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGRecordLayoutBuilder.cpp
@@ -121,7 +121,7 @@ struct CGRecordLowering {
   /// \brief Wraps llvm::Type::getIntNTy with some implicit arguments.
   llvm::Type *getIntNType(uint64_t NumBits) {
     return llvm::Type::getIntNTy(Types.getLLVMContext(),
-        (unsigned)llvm::RoundUpToAlignment(NumBits, 8));
+                                 (unsigned)llvm::alignTo(NumBits, 8));
   }
   /// \brief Gets an llvm type of size NumBytes and alignment 1.
   llvm::Type *getByteArrayType(CharUnits NumBytes) {
@@ -555,7 +555,7 @@ void CGRecordLowering::clipTailPadding() {
     if (Member->Offset < Tail) {
       assert(Prior->Kind == MemberInfo::Field && !Prior->FD &&
              "Only storage fields have tail padding!");
-      Prior->Data = getByteArrayType(bitsToCharUnits(llvm::RoundUpToAlignment(
+      Prior->Data = getByteArrayType(bitsToCharUnits(llvm::alignTo(
           cast<llvm::IntegerType>(Prior->Data)->getIntegerBitWidth(), 8)));
     }
     if (Member->Data)
@@ -609,8 +609,8 @@ void CGRecordLowering::insertPadding() {
     CharUnits Offset = Member->Offset;
     assert(Offset >= Size);
     // Insert padding if we need to.
-    if (Offset != Size.RoundUpToAlignment(Packed ? CharUnits::One() :
-                                          getAlignment(Member->Data)))
+    if (Offset !=
+        Size.alignTo(Packed ? CharUnits::One() : getAlignment(Member->Data)))
       Padding.push_back(std::make_pair(Size, Offset - Size));
     Size = Offset + getSize(Member->Data);
   }
@@ -842,7 +842,7 @@ void CGRecordLayout::print(raw_ostream &OS) const {
   OS << "]>\n";
 }
 
-void CGRecordLayout::dump() const {
+LLVM_DUMP_METHOD void CGRecordLayout::dump() const {
   print(llvm::errs());
 }
 
@@ -855,6 +855,6 @@ void CGBitFieldInfo::print(raw_ostream &OS) const {
      << " StorageOffset:" << StorageOffset.getQuantity() << ">";
 }
 
-void CGBitFieldInfo::dump() const {
+LLVM_DUMP_METHOD void CGBitFieldInfo::dump() const {
   print(llvm::errs());
 }
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGStmt.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGStmt.cpp
index cc4fa2ec5972..d815863e929d 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGStmt.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGStmt.cpp
@@ -256,15 +256,45 @@ void CodeGenFunction::EmitStmt(const Stmt *S) {
   case Stmt::OMPTargetDataDirectiveClass:
     EmitOMPTargetDataDirective(cast<OMPTargetDataDirective>(*S));
     break;
+  case Stmt::OMPTargetEnterDataDirectiveClass:
+    EmitOMPTargetEnterDataDirective(cast<OMPTargetEnterDataDirective>(*S));
+    break;
+  case Stmt::OMPTargetExitDataDirectiveClass:
+    EmitOMPTargetExitDataDirective(cast<OMPTargetExitDataDirective>(*S));
+    break;
+  case Stmt::OMPTargetParallelDirectiveClass:
+    EmitOMPTargetParallelDirective(cast<OMPTargetParallelDirective>(*S));
+    break;
+  case Stmt::OMPTargetParallelForDirectiveClass:
+    EmitOMPTargetParallelForDirective(cast<OMPTargetParallelForDirective>(*S));
+    break;
   case Stmt::OMPTaskLoopDirectiveClass:
     EmitOMPTaskLoopDirective(cast<OMPTaskLoopDirective>(*S));
     break;
   case Stmt::OMPTaskLoopSimdDirectiveClass:
     EmitOMPTaskLoopSimdDirective(cast<OMPTaskLoopSimdDirective>(*S));
     break;
-case Stmt::OMPDistributeDirectiveClass:
+  case Stmt::OMPDistributeDirectiveClass:
     EmitOMPDistributeDirective(cast<OMPDistributeDirective>(*S));
-	break;
+    break;
+  case Stmt::OMPTargetUpdateDirectiveClass:
+    EmitOMPTargetUpdateDirective(cast<OMPTargetUpdateDirective>(*S));
+    break;
+  case Stmt::OMPDistributeParallelForDirectiveClass:
+    EmitOMPDistributeParallelForDirective(
+        cast<OMPDistributeParallelForDirective>(*S));
+    break;
+  case Stmt::OMPDistributeParallelForSimdDirectiveClass:
+    EmitOMPDistributeParallelForSimdDirective(
+        cast<OMPDistributeParallelForSimdDirective>(*S));
+    break;
+  case Stmt::OMPDistributeSimdDirectiveClass:
+    EmitOMPDistributeSimdDirective(cast<OMPDistributeSimdDirective>(*S));
+    break;
+  case Stmt::OMPTargetParallelForSimdDirectiveClass:
+    EmitOMPTargetParallelForSimdDirective(
+        cast<OMPTargetParallelForSimdDirective>(*S));
+    break;
   }
 }
 
@@ -542,13 +572,17 @@ void CodeGenFunction::EmitIfStmt(const IfStmt &S) {
   // unequal to 0.  The condition must be a scalar type.
   LexicalScope ConditionScope(*this, S.getCond()->getSourceRange());
 
+  if (S.getInit())
+    EmitStmt(S.getInit());
+
   if (S.getConditionVariable())
     EmitAutoVarDecl(*S.getConditionVariable());
 
   // If the condition constant folds and can be elided, try to avoid emitting
   // the condition and the dead arm of the if/else.
   bool CondConstant;
-  if (ConstantFoldsToSimpleInteger(S.getCond(), CondConstant)) {
+  if (ConstantFoldsToSimpleInteger(S.getCond(), CondConstant,
+                                   S.isConstexpr())) {
     // Figure out which block (then or else) is executed.
     const Stmt *Executed = S.getThen();
     const Stmt *Skipped  = S.getElse();
@@ -557,7 +591,7 @@ void CodeGenFunction::EmitIfStmt(const IfStmt &S) {
 
     // If the skipped block has no labels in it, just emit the executed block.
     // This avoids emitting dead code and simplifies the CFG substantially.
-    if (!ContainsLabel(Skipped)) {
+    if (S.isConstexpr() || !ContainsLabel(Skipped)) {
       if (CondConstant)
         incrementProfileCounter(&S);
       if (Executed) {
@@ -617,7 +651,8 @@ void CodeGenFunction::EmitWhileStmt(const WhileStmt &S,
   JumpDest LoopHeader = getJumpDestInCurrentScope("while.cond");
   EmitBlock(LoopHeader.getBlock());
 
-  LoopStack.push(LoopHeader.getBlock(), CGM.getContext(), WhileAttrs);
+  LoopStack.push(LoopHeader.getBlock(), CGM.getContext(), WhileAttrs,
+                 Builder.getCurrentDebugLocation());
 
   // Create an exit block for when the condition fails, which will
   // also become the break target.
@@ -708,7 +743,8 @@ void CodeGenFunction::EmitDoStmt(const DoStmt &S,
   // Emit the body of the loop.
   llvm::BasicBlock *LoopBody = createBasicBlock("do.body");
 
-  LoopStack.push(LoopBody, CGM.getContext(), DoAttrs);
+  LoopStack.push(LoopBody, CGM.getContext(), DoAttrs,
+                 Builder.getCurrentDebugLocation());
 
   EmitBlockWithFallThrough(LoopBody, &S);
   {
@@ -760,6 +796,8 @@ void CodeGenFunction::EmitForStmt(const ForStmt &S,
 
   LexicalScope ForScope(*this, S.getSourceRange());
 
+  llvm::DebugLoc DL = Builder.getCurrentDebugLocation();
+
   // Evaluate the first part before the loop.
   if (S.getInit())
     EmitStmt(S.getInit());
@@ -771,7 +809,7 @@ void CodeGenFunction::EmitForStmt(const ForStmt &S,
   llvm::BasicBlock *CondBlock = Continue.getBlock();
   EmitBlock(CondBlock);
 
-  LoopStack.push(CondBlock, CGM.getContext(), ForAttrs);
+  LoopStack.push(CondBlock, CGM.getContext(), ForAttrs, DL);
 
   // If the for loop doesn't have an increment we can just use the
   // condition as the continue block.  Otherwise we'll need to create
@@ -856,9 +894,12 @@ CodeGenFunction::EmitCXXForRangeStmt(const CXXForRangeStmt &S,
 
   LexicalScope ForScope(*this, S.getSourceRange());
 
+  llvm::DebugLoc DL = Builder.getCurrentDebugLocation();
+
   // Evaluate the first pieces before the loop.
   EmitStmt(S.getRangeStmt());
-  EmitStmt(S.getBeginEndStmt());
+  EmitStmt(S.getBeginStmt());
+  EmitStmt(S.getEndStmt());
 
   // Start the loop with a block that tests the condition.
   // If there's an increment, the continue scope will be overwritten
@@ -866,7 +907,7 @@ CodeGenFunction::EmitCXXForRangeStmt(const CXXForRangeStmt &S,
   llvm::BasicBlock *CondBlock = createBasicBlock("for.cond");
   EmitBlock(CondBlock);
 
-  LoopStack.push(CondBlock, CGM.getContext(), ForAttrs);
+  LoopStack.push(CondBlock, CGM.getContext(), ForAttrs, DL);
 
   // If there are any cleanups between here and the loop-exit scope,
   // create a block to stage a loop exit along.
@@ -1147,7 +1188,7 @@ void CodeGenFunction::EmitCaseStmt(const CaseStmt &S) {
   // If the body of the case is just a 'break', try to not emit an empty block.
   // If we're profiling or we're not optimizing, leave the block in for better
   // debug and coverage analysis.
-  if (!CGM.getCodeGenOpts().ProfileInstrGenerate &&
+  if (!CGM.getCodeGenOpts().hasProfileClangInstr() &&
       CGM.getCodeGenOpts().OptimizationLevel > 0 &&
       isa<BreakStmt>(S.getSubStmt())) {
     JumpDest Block = BreakContinueStack.back().BreakBlock;
@@ -1194,7 +1235,7 @@ void CodeGenFunction::EmitCaseStmt(const CaseStmt &S) {
 
     if (SwitchWeights)
       SwitchWeights->push_back(getProfileCount(NextCase));
-    if (CGM.getCodeGenOpts().ProfileInstrGenerate) {
+    if (CGM.getCodeGenOpts().hasProfileClangInstr()) {
       CaseDest = createBasicBlock("sw.bb");
       EmitBlockWithFallThrough(CaseDest, &S);
     }
@@ -1208,6 +1249,14 @@ void CodeGenFunction::EmitCaseStmt(const CaseStmt &S) {
 }
 
 void CodeGenFunction::EmitDefaultStmt(const DefaultStmt &S) {
+  // If there is no enclosing switch instance that we're aware of, then this
+  // default statement can be elided. This situation only happens when we've
+  // constant-folded the switch.
+  if (!SwitchInsn) {
+    EmitStmt(S.getSubStmt());
+    return;
+  }
+
   llvm::BasicBlock *DefaultBlock = SwitchInsn->getDefaultDest();
   assert(DefaultBlock->empty() &&
          "EmitDefaultStmt: Default block already defined?");
@@ -1274,6 +1323,10 @@ static CSFC_Result CollectStatementsForCase(const Stmt *S,
     // Handle this as two cases: we might be looking for the SwitchCase (if so
     // the skipped statements must be skippable) or we might already have it.
     CompoundStmt::const_body_iterator I = CS->body_begin(), E = CS->body_end();
+    bool StartedInLiveCode = FoundCase;
+    unsigned StartSize = ResultStmts.size();
+
+    // If we've not found the case yet, scan through looking for it.
     if (Case) {
       // Keep track of whether we see a skipped declaration.  The code could be
       // using the declaration even if it is skipped, so we can't optimize out
@@ -1283,7 +1336,7 @@ static CSFC_Result CollectStatementsForCase(const Stmt *S,
       // If we're looking for the case, just see if we can skip each of the
       // substatements.
       for (; Case && I != E; ++I) {
-        HadSkippedDecl |= isa<DeclStmt>(*I);
+        HadSkippedDecl |= CodeGenFunction::mightAddDeclToScope(*I);
 
         switch (CollectStatementsForCase(*I, Case, FoundCase, ResultStmts)) {
         case CSFC_Failure: return CSFC_Failure;
@@ -1319,11 +1372,19 @@ static CSFC_Result CollectStatementsForCase(const Stmt *S,
           break;
         }
       }
+
+      if (!FoundCase)
+        return CSFC_Success;
+
+      assert(!HadSkippedDecl && "fallthrough after skipping decl");
     }
 
     // If we have statements in our range, then we know that the statements are
     // live and need to be added to the set of statements we're tracking.
+    bool AnyDecls = false;
     for (; I != E; ++I) {
+      AnyDecls |= CodeGenFunction::mightAddDeclToScope(*I);
+
       switch (CollectStatementsForCase(*I, nullptr, FoundCase, ResultStmts)) {
       case CSFC_Failure: return CSFC_Failure;
       case CSFC_FallThrough:
@@ -1341,7 +1402,24 @@ static CSFC_Result CollectStatementsForCase(const Stmt *S,
       }
     }
 
-    return Case ? CSFC_Success : CSFC_FallThrough;
+    // If we're about to fall out of a scope without hitting a 'break;', we
+    // can't perform the optimization if there were any decls in that scope
+    // (we'd lose their end-of-lifetime).
+    if (AnyDecls) {
+      // If the entire compound statement was live, there's one more thing we
+      // can try before giving up: emit the whole thing as a single statement.
+      // We can do that unless the statement contains a 'break;'.
+      // FIXME: Such a break must be at the end of a construct within this one.
+      // We could emit this by just ignoring the BreakStmts entirely.
+      if (StartedInLiveCode && !CodeGenFunction::containsBreak(S)) {
+        ResultStmts.resize(StartSize);
+        ResultStmts.push_back(S);
+      } else {
+        return CSFC_Failure;
+      }
+    }
+
+    return CSFC_FallThrough;
   }
 
   // Okay, this is some other statement that we don't handle explicitly, like a
@@ -1438,6 +1516,9 @@ void CodeGenFunction::EmitSwitchStmt(const SwitchStmt &S) {
         incrementProfileCounter(Case);
       RunCleanupsScope ExecutedScope(*this);
 
+      if (S.getInit())
+        EmitStmt(S.getInit());
+
       // Emit the condition variable if needed inside the entire cleanup scope
       // used by this special case for constant folded switches.
       if (S.getConditionVariable())
@@ -1465,6 +1546,10 @@ void CodeGenFunction::EmitSwitchStmt(const SwitchStmt &S) {
   JumpDest SwitchExit = getJumpDestInCurrentScope("sw.epilog");
 
   RunCleanupsScope ConditionScope(*this);
+
+  if (S.getInit())
+    EmitStmt(S.getInit());
+
   if (S.getConditionVariable())
     EmitAutoVarDecl(*S.getConditionVariable());
   llvm::Value *CondV = EmitScalarExpr(S.getCond());
@@ -1537,16 +1622,13 @@ void CodeGenFunction::EmitSwitchStmt(const SwitchStmt &S) {
   // If the switch has a condition wrapped by __builtin_unpredictable,
   // create metadata that specifies that the switch is unpredictable.
   // Don't bother if not optimizing because that metadata would not be used.
-  if (CGM.getCodeGenOpts().OptimizationLevel != 0) {
-    if (const CallExpr *Call = dyn_cast<CallExpr>(S.getCond())) {
-      const Decl *TargetDecl = Call->getCalleeDecl();
-      if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(TargetDecl)) {
-        if (FD->getBuiltinID() == Builtin::BI__builtin_unpredictable) {
-          llvm::MDBuilder MDHelper(getLLVMContext());
-          SwitchInsn->setMetadata(llvm::LLVMContext::MD_unpredictable,
-                                  MDHelper.createUnpredictable());
-        }
-      }
+  auto *Call = dyn_cast<CallExpr>(S.getCond());
+  if (Call && CGM.getCodeGenOpts().OptimizationLevel != 0) {
+    auto *FD = dyn_cast_or_null<FunctionDecl>(Call->getCalleeDecl());
+    if (FD && FD->getBuiltinID() == Builtin::BI__builtin_unpredictable) {
+      llvm::MDBuilder MDHelper(getLLVMContext());
+      SwitchInsn->setMetadata(llvm::LLVMContext::MD_unpredictable,
+                              MDHelper.createUnpredictable());
     }
   }
 
@@ -2035,6 +2117,14 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) {
                                           llvm::ConstantAsMetadata::get(Loc)));
   }
 
+  if (getLangOpts().CUDA && getLangOpts().CUDAIsDevice) {
+    // Conservatively, mark all inline asm blocks in CUDA as convergent
+    // (meaning, they may call an intrinsically convergent op, such as bar.sync,
+    // and so can't have certain optimizations applied around them).
+    Result->addAttribute(llvm::AttributeSet::FunctionIndex,
+                         llvm::Attribute::Convergent);
+  }
+
   // Extract all of the register value results from the asm.
   std::vector<llvm::Value*> RegResults;
   if (ResultRegTypes.size() == 1) {
@@ -2147,8 +2237,7 @@ CodeGenFunction::GenerateCapturedStmtFunction(const CapturedStmt &S) {
   // Create the function declaration.
   FunctionType::ExtInfo ExtInfo;
   const CGFunctionInfo &FuncInfo =
-      CGM.getTypes().arrangeFreeFunctionDeclaration(Ctx.VoidTy, Args, ExtInfo,
-                                                    /*IsVariadic=*/false);
+    CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, Args);
   llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(FuncInfo);
 
   llvm::Function *F =
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGStmtOpenMP.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGStmtOpenMP.cpp
index d0ee891441d7..d214340bdafe 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -11,15 +11,117 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "CGCleanup.h"
 #include "CGOpenMPRuntime.h"
 #include "CodeGenFunction.h"
 #include "CodeGenModule.h"
 #include "TargetInfo.h"
 #include "clang/AST/Stmt.h"
 #include "clang/AST/StmtOpenMP.h"
+#include "clang/AST/DeclOpenMP.h"
+#include "llvm/IR/CallSite.h"
 using namespace clang;
 using namespace CodeGen;
 
+namespace {
+/// Lexical scope for OpenMP executable constructs, that handles correct codegen
+/// for captured expressions.
+class OMPLexicalScope final : public CodeGenFunction::LexicalScope {
+  void emitPreInitStmt(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
+    for (const auto *C : S.clauses()) {
+      if (auto *CPI = OMPClauseWithPreInit::get(C)) {
+        if (auto *PreInit = cast_or_null<DeclStmt>(CPI->getPreInitStmt())) {
+          for (const auto *I : PreInit->decls()) {
+            if (!I->hasAttr<OMPCaptureNoInitAttr>())
+              CGF.EmitVarDecl(cast<VarDecl>(*I));
+            else {
+              CodeGenFunction::AutoVarEmission Emission =
+                  CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
+              CGF.EmitAutoVarCleanups(Emission);
+            }
+          }
+        }
+      }
+    }
+  }
+  CodeGenFunction::OMPPrivateScope InlinedShareds;
+
+  static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) {
+    return CGF.LambdaCaptureFields.lookup(VD) ||
+           (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) ||
+           (CGF.CurCodeDecl && isa<BlockDecl>(CGF.CurCodeDecl));
+  }
+
+public:
+  OMPLexicalScope(CodeGenFunction &CGF, const OMPExecutableDirective &S,
+                  bool AsInlined = false)
+      : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()),
+        InlinedShareds(CGF) {
+    emitPreInitStmt(CGF, S);
+    if (AsInlined) {
+      if (S.hasAssociatedStmt()) {
+        auto *CS = cast<CapturedStmt>(S.getAssociatedStmt());
+        for (auto &C : CS->captures()) {
+          if (C.capturesVariable() || C.capturesVariableByCopy()) {
+            auto *VD = C.getCapturedVar();
+            DeclRefExpr DRE(const_cast<VarDecl *>(VD),
+                            isCapturedVar(CGF, VD) ||
+                                (CGF.CapturedStmtInfo &&
+                                 InlinedShareds.isGlobalVarCaptured(VD)),
+                            VD->getType().getNonReferenceType(), VK_LValue,
+                            SourceLocation());
+            InlinedShareds.addPrivate(VD, [&CGF, &DRE]() -> Address {
+              return CGF.EmitLValue(&DRE).getAddress();
+            });
+          }
+        }
+        (void)InlinedShareds.Privatize();
+      }
+    }
+  }
+};
+
+/// Private scope for OpenMP loop-based directives, that supports capturing
+/// of used expression from loop statement.
+class OMPLoopScope : public CodeGenFunction::RunCleanupsScope {
+  void emitPreInitStmt(CodeGenFunction &CGF, const OMPLoopDirective &S) {
+    if (auto *LD = dyn_cast<OMPLoopDirective>(&S)) {
+      if (auto *PreInits = cast_or_null<DeclStmt>(LD->getPreInits())) {
+        for (const auto *I : PreInits->decls())
+          CGF.EmitVarDecl(cast<VarDecl>(*I));
+      }
+    }
+  }
+
+public:
+  OMPLoopScope(CodeGenFunction &CGF, const OMPLoopDirective &S)
+      : CodeGenFunction::RunCleanupsScope(CGF) {
+    emitPreInitStmt(CGF, S);
+  }
+};
+
+} // namespace
+
+llvm::Value *CodeGenFunction::getTypeSize(QualType Ty) {
+  auto &C = getContext();
+  llvm::Value *Size = nullptr;
+  auto SizeInChars = C.getTypeSizeInChars(Ty);
+  if (SizeInChars.isZero()) {
+    // getTypeSizeInChars() returns 0 for a VLA.
+    while (auto *VAT = C.getAsVariableArrayType(Ty)) {
+      llvm::Value *ArraySize;
+      std::tie(ArraySize, Ty) = getVLASize(VAT);
+      Size = Size ? Builder.CreateNUWMul(Size, ArraySize) : ArraySize;
+    }
+    SizeInChars = C.getTypeSizeInChars(Ty);
+    if (SizeInChars.isZero())
+      return llvm::ConstantInt::get(SizeTy, /*V=*/0);
+    Size = Builder.CreateNUWMul(Size, CGM.getSize(SizeInChars));
+  } else
+    Size = CGM.getSize(SizeInChars);
+  return Size;
+}
+
 void CodeGenFunction::GenerateOpenMPCapturedVars(
     const CapturedStmt &S, SmallVectorImpl<llvm::Value *> &CapturedVars) {
   const RecordDecl *RD = S.getCapturedRecordDecl();
@@ -34,10 +136,33 @@ void CodeGenFunction::GenerateOpenMPCapturedVars(
       CapturedVars.push_back(Val);
     } else if (CurCap->capturesThis())
       CapturedVars.push_back(CXXThisValue);
-    else if (CurCap->capturesVariableByCopy())
-      CapturedVars.push_back(
-          EmitLoadOfLValue(EmitLValue(*I), SourceLocation()).getScalarVal());
-    else {
+    else if (CurCap->capturesVariableByCopy()) {
+      llvm::Value *CV =
+          EmitLoadOfLValue(EmitLValue(*I), SourceLocation()).getScalarVal();
+
+      // If the field is not a pointer, we need to save the actual value
+      // and load it as a void pointer.
+      if (!CurField->getType()->isAnyPointerType()) {
+        auto &Ctx = getContext();
+        auto DstAddr = CreateMemTemp(
+            Ctx.getUIntPtrType(),
+            Twine(CurCap->getCapturedVar()->getName()) + ".casted");
+        LValue DstLV = MakeAddrLValue(DstAddr, Ctx.getUIntPtrType());
+
+        auto *SrcAddrVal = EmitScalarConversion(
+            DstAddr.getPointer(), Ctx.getPointerType(Ctx.getUIntPtrType()),
+            Ctx.getPointerType(CurField->getType()), SourceLocation());
+        LValue SrcLV =
+            MakeNaturalAlignAddrLValue(SrcAddrVal, CurField->getType());
+
+        // Store the value using the source type pointer.
+        EmitStoreThroughLValue(RValue::get(CV), SrcLV);
+
+        // Load the value using the destination type pointer.
+        CV = EmitLoadOfLValue(DstLV, SourceLocation()).getScalarVal();
+      }
+      CapturedVars.push_back(CV);
+    } else {
       assert(CurCap->capturesVariable() && "Expected capture by reference.");
       CapturedVars.push_back(EmitLValue(*I).getAddress().getPointer());
     }
@@ -107,8 +232,15 @@ CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) {
       assert(I->capturesVariableArrayType());
       II = &getContext().Idents.get("vla");
     }
-    if (ArgType->isVariablyModifiedType())
-      ArgType = getContext().getVariableArrayDecayedType(ArgType);
+    if (ArgType->isVariablyModifiedType()) {
+      bool IsReference = ArgType->isLValueReferenceType();
+      ArgType =
+          getContext().getCanonicalParamType(ArgType.getNonReferenceType());
+      if (IsReference && !ArgType->isPointerType()) {
+        ArgType = getContext().getLValueReferenceType(
+            ArgType, /*SpelledAsLValue=*/false);
+      }
+    }
     Args.push_back(ImplicitParamDecl::Create(getContext(), nullptr,
                                              FD->getLocation(), II, ArgType));
     ++I;
@@ -120,8 +252,7 @@ CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) {
   // Create the function declaration.
   FunctionType::ExtInfo ExtInfo;
   const CGFunctionInfo &FuncInfo =
-      CGM.getTypes().arrangeFreeFunctionDeclaration(Ctx.VoidTy, Args, ExtInfo,
-                                                    /*IsVariadic=*/false);
+      CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, Args);
   llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(FuncInfo);
 
   llvm::Function *F = llvm::Function::Create(
@@ -141,7 +272,8 @@ CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) {
     // use the value that we get from the arguments.
     if (I->capturesVariableByCopy() && FD->getType()->isAnyPointerType()) {
       setAddrOfLocalVar(I->getCapturedVar(), GetAddrOfLocalVar(Args[Cnt]));
-      ++Cnt, ++I;
+      ++Cnt;
+      ++I;
       continue;
     }
 
@@ -162,8 +294,14 @@ CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) {
       QualType VarTy = Var->getType();
       Address ArgAddr = ArgLVal.getAddress();
       if (!VarTy->isReferenceType()) {
-        ArgAddr = EmitLoadOfReference(
-            ArgAddr, ArgLVal.getType()->castAs<ReferenceType>());
+        if (ArgLVal.getType()->isLValueReferenceType()) {
+          ArgAddr = EmitLoadOfReference(
+              ArgAddr, ArgLVal.getType()->castAs<ReferenceType>());
+        } else if (!VarTy->isVariablyModifiedType() || !VarTy->isPointerType()) {
+          assert(ArgLVal.getType()->isPointerType());
+          ArgAddr = EmitLoadOfPointer(
+              ArgAddr, ArgLVal.getType()->castAs<PointerType>());
+        }
       }
       setAddrOfLocalVar(
           Var, Address(ArgAddr.getPointer(), getContext().getDeclAlign(Var)));
@@ -172,17 +310,17 @@ CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) {
              "Not expecting a captured pointer.");
       auto *Var = I->getCapturedVar();
       QualType VarTy = Var->getType();
-      setAddrOfLocalVar(I->getCapturedVar(),
-                        castValueFromUintptr(*this, FD->getType(),
-                                             Args[Cnt]->getName(), ArgLVal,
-                                             VarTy->isReferenceType()));
+      setAddrOfLocalVar(Var, castValueFromUintptr(*this, FD->getType(),
+                                                  Args[Cnt]->getName(), ArgLVal,
+                                                  VarTy->isReferenceType()));
     } else {
       // If 'this' is captured, load it into CXXThisValue.
       assert(I->capturesThis());
       CXXThisValue =
           EmitLoadOfLValue(ArgLVal, Args[Cnt]->getLocation()).getScalarVal();
     }
-    ++Cnt, ++I;
+    ++Cnt;
+    ++I;
   }
 
   PGO.assignRegionCounters(GlobalDecl(CD), F);
@@ -256,12 +394,77 @@ void CodeGenFunction::EmitOMPAggregateAssign(
   EmitBlock(DoneBB, /*IsFinished=*/true);
 }
 
+/// Check if the combiner is a call to UDR combiner and if it is so return the
+/// UDR decl used for reduction.
+static const OMPDeclareReductionDecl *
+getReductionInit(const Expr *ReductionOp) {
+  if (auto *CE = dyn_cast<CallExpr>(ReductionOp))
+    if (auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
+      if (auto *DRE =
+              dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
+        if (auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
+          return DRD;
+  return nullptr;
+}
+
+static void emitInitWithReductionInitializer(CodeGenFunction &CGF,
+                                             const OMPDeclareReductionDecl *DRD,
+                                             const Expr *InitOp,
+                                             Address Private, Address Original,
+                                             QualType Ty) {
+  if (DRD->getInitializer()) {
+    std::pair<llvm::Function *, llvm::Function *> Reduction =
+        CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
+    auto *CE = cast<CallExpr>(InitOp);
+    auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
+    const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
+    const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
+    auto *LHSDRE = cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
+    auto *RHSDRE = cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
+    CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
+    PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()),
+                            [=]() -> Address { return Private; });
+    PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()),
+                            [=]() -> Address { return Original; });
+    (void)PrivateScope.Privatize();
+    RValue Func = RValue::get(Reduction.second);
+    CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
+    CGF.EmitIgnoredExpr(InitOp);
+  } else {
+    llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
+    auto *GV = new llvm::GlobalVariable(
+        CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
+        llvm::GlobalValue::PrivateLinkage, Init, ".init");
+    LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty);
+    RValue InitRVal;
+    switch (CGF.getEvaluationKind(Ty)) {
+    case TEK_Scalar:
+      InitRVal = CGF.EmitLoadOfLValue(LV, SourceLocation());
+      break;
+    case TEK_Complex:
+      InitRVal =
+          RValue::getComplex(CGF.EmitLoadOfComplex(LV, SourceLocation()));
+      break;
+    case TEK_Aggregate:
+      InitRVal = RValue::getAggregate(LV.getAddress());
+      break;
+    }
+    OpaqueValueExpr OVE(SourceLocation(), Ty, VK_RValue);
+    CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
+    CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
+                         /*IsInitializer=*/false);
+  }
+}
+
 /// \brief Emit initialization of arrays of complex types.
 /// \param DestAddr Address of the array.
 /// \param Type Type of array.
 /// \param Init Initial expression of array.
+/// \param SrcAddr Address of the original array.
 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
-                                 QualType Type, const Expr *Init) {
+                                 QualType Type, const Expr *Init,
+                                 Address SrcAddr = Address::invalid()) {
+  auto *DRD = getReductionInit(Init);
   // Perform element-by-element initialization.
   QualType ElementTy;
 
@@ -270,7 +473,13 @@ static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
   auto NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
   DestAddr =
       CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType());
+  if (DRD)
+    SrcAddr =
+        CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
 
+  llvm::Value *SrcBegin = nullptr;
+  if (DRD)
+    SrcBegin = SrcAddr.getPointer();
   auto DestBegin = DestAddr.getPointer();
   // Cast from pointer to array type to pointer to single element.
   auto DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements);
@@ -287,6 +496,16 @@ static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
 
   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
 
+  llvm::PHINode *SrcElementPHI = nullptr;
+  Address SrcElementCurrent = Address::invalid();
+  if (DRD) {
+    SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
+                                          "omp.arraycpy.srcElementPast");
+    SrcElementPHI->addIncoming(SrcBegin, EntryBB);
+    SrcElementCurrent =
+        Address(SrcElementPHI,
+                SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
+  }
   llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
       DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
   DestElementPHI->addIncoming(DestBegin, EntryBB);
@@ -297,8 +516,19 @@ static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
   // Emit copy.
   {
     CodeGenFunction::RunCleanupsScope InitScope(CGF);
-    CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
-                         /*IsInitializer=*/false);
+    if (DRD && (DRD->getInitializer() || !Init)) {
+      emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
+                                       SrcElementCurrent, ElementTy);
+    } else
+      CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
+                           /*IsInitializer=*/false);
+  }
+
+  if (DRD) {
+    // Shift the address forward by one element.
+    auto SrcElementNext = CGF.Builder.CreateConstGEP1_32(
+        SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
+    SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
   }
 
   // Shift the address forward by one element.
@@ -356,24 +586,42 @@ bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D,
                                                 OMPPrivateScope &PrivateScope) {
   if (!HaveInsertPoint())
     return false;
+  bool FirstprivateIsLastprivate = false;
+  llvm::DenseSet<const VarDecl *> Lastprivates;
+  for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
+    for (const auto *D : C->varlists())
+      Lastprivates.insert(
+          cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl());
+  }
   llvm::DenseSet<const VarDecl *> EmittedAsFirstprivate;
+  CGCapturedStmtInfo CapturesInfo(cast<CapturedStmt>(*D.getAssociatedStmt()));
   for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) {
     auto IRef = C->varlist_begin();
     auto InitsRef = C->inits().begin();
     for (auto IInit : C->private_copies()) {
       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
-      if (EmittedAsFirstprivate.count(OrigVD) == 0) {
-        EmittedAsFirstprivate.insert(OrigVD);
+      bool ThisFirstprivateIsLastprivate =
+          Lastprivates.count(OrigVD->getCanonicalDecl()) > 0;
+      auto *CapFD = CapturesInfo.lookup(OrigVD);
+      auto *FD = CapturedStmtInfo->lookup(OrigVD);
+      if (!ThisFirstprivateIsLastprivate && FD && (FD == CapFD) &&
+          !FD->getType()->isReferenceType()) {
+        EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl());
+        ++IRef;
+        ++InitsRef;
+        continue;
+      }
+      FirstprivateIsLastprivate =
+          FirstprivateIsLastprivate || ThisFirstprivateIsLastprivate;
+      if (EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()).second) {
         auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
         auto *VDInit = cast<VarDecl>(cast<DeclRefExpr>(*InitsRef)->getDecl());
         bool IsRegistered;
-        DeclRefExpr DRE(
-            const_cast<VarDecl *>(OrigVD),
-            /*RefersToEnclosingVariableOrCapture=*/CapturedStmtInfo->lookup(
-                OrigVD) != nullptr,
-            (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
+        DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
+                        /*RefersToEnclosingVariableOrCapture=*/FD != nullptr,
+                        (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
         Address OriginalAddr = EmitLValue(&DRE).getAddress();
-        QualType Type = OrigVD->getType();
+        QualType Type = VD->getType();
         if (Type->isArrayType()) {
           // Emit VarDecl with copy init for arrays.
           // Get the address of the original variable captured in current
@@ -420,10 +668,11 @@ bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D,
         // Silence the warning about unused variable.
         (void)IsRegistered;
       }
-      ++IRef, ++InitsRef;
+      ++IRef;
+      ++InitsRef;
     }
   }
-  return !EmittedAsFirstprivate.empty();
+  return FirstprivateIsLastprivate && !EmittedAsFirstprivate.empty();
 }
 
 void CodeGenFunction::EmitOMPPrivateClause(
@@ -470,7 +719,6 @@ bool CodeGenFunction::EmitOMPCopyinClause(const OMPExecutableDirective &D) {
       auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
       QualType Type = VD->getType();
       if (CopiedVars.insert(VD->getCanonicalDecl()).second) {
-
         // Get the address of the master variable. If we are emitting code with
         // TLS support, the address is passed from the master as field in the
         // captured declaration.
@@ -525,15 +773,27 @@ bool CodeGenFunction::EmitOMPLastprivateClauseInit(
   if (!HaveInsertPoint())
     return false;
   bool HasAtLeastOneLastprivate = false;
+  llvm::DenseSet<const VarDecl *> SIMDLCVs;
+  if (isOpenMPSimdDirective(D.getDirectiveKind())) {
+    auto *LoopDirective = cast<OMPLoopDirective>(&D);
+    for (auto *C : LoopDirective->counters()) {
+      SIMDLCVs.insert(
+          cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl());
+    }
+  }
   llvm::DenseSet<const VarDecl *> AlreadyEmittedVars;
   for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
     HasAtLeastOneLastprivate = true;
+    if (isOpenMPTaskLoopDirective(D.getDirectiveKind()))
+      break;
     auto IRef = C->varlist_begin();
     auto IDestRef = C->destination_exprs().begin();
     for (auto *IInit : C->private_copies()) {
       // Keep the address of the original variable for future update at the end
       // of the loop.
       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
+      // Taskloops do not require additional initialization, it is done in
+      // runtime support library.
       if (AlreadyEmittedVars.insert(OrigVD->getCanonicalDecl()).second) {
         auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
         PrivateScope.addPrivate(DestVD, [this, OrigVD, IRef]() -> Address {
@@ -547,27 +807,28 @@ bool CodeGenFunction::EmitOMPLastprivateClauseInit(
         // Check if the variable is also a firstprivate: in this case IInit is
         // not generated. Initialization of this variable will happen in codegen
         // for 'firstprivate' clause.
-        if (IInit) {
+        if (IInit && !SIMDLCVs.count(OrigVD->getCanonicalDecl())) {
           auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
-          bool IsRegistered =
-              PrivateScope.addPrivate(OrigVD, [&]() -> Address {
-                // Emit private VarDecl with copy init.
-                EmitDecl(*VD);
-                return GetAddrOfLocalVar(VD);
-              });
+          bool IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address {
+            // Emit private VarDecl with copy init.
+            EmitDecl(*VD);
+            return GetAddrOfLocalVar(VD);
+          });
           assert(IsRegistered &&
                  "lastprivate var already registered as private");
           (void)IsRegistered;
         }
       }
-      ++IRef, ++IDestRef;
+      ++IRef;
+      ++IDestRef;
     }
   }
   return HasAtLeastOneLastprivate;
 }
 
 void CodeGenFunction::EmitOMPLastprivateClauseFinal(
-    const OMPExecutableDirective &D, llvm::Value *IsLastIterCond) {
+    const OMPExecutableDirective &D, bool NoFinals,
+    llvm::Value *IsLastIterCond) {
   if (!HaveInsertPoint())
     return;
   // Emit following code:
@@ -584,16 +845,20 @@ void CodeGenFunction::EmitOMPLastprivateClauseFinal(
     Builder.CreateCondBr(IsLastIterCond, ThenBB, DoneBB);
     EmitBlock(ThenBB);
   }
-  llvm::DenseMap<const Decl *, const Expr *> LoopCountersAndUpdates;
+  llvm::DenseSet<const VarDecl *> AlreadyEmittedVars;
+  llvm::DenseMap<const VarDecl *, const Expr *> LoopCountersAndUpdates;
   if (auto *LoopDirective = dyn_cast<OMPLoopDirective>(&D)) {
     auto IC = LoopDirective->counters().begin();
     for (auto F : LoopDirective->finals()) {
-      auto *D = cast<DeclRefExpr>(*IC)->getDecl()->getCanonicalDecl();
-      LoopCountersAndUpdates[D] = F;
+      auto *D =
+          cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl())->getCanonicalDecl();
+      if (NoFinals)
+        AlreadyEmittedVars.insert(D);
+      else
+        LoopCountersAndUpdates[D] = F;
       ++IC;
     }
   }
-  llvm::DenseSet<const VarDecl *> AlreadyEmittedVars;
   for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
     auto IRef = C->varlist_begin();
     auto ISrcRef = C->source_exprs().begin();
@@ -606,8 +871,8 @@ void CodeGenFunction::EmitOMPLastprivateClauseFinal(
         // If lastprivate variable is a loop control variable for loop-based
         // directive, update its value before copyin back to original
         // variable.
-        if (auto *UpExpr = LoopCountersAndUpdates.lookup(CanonicalVD))
-          EmitIgnoredExpr(UpExpr);
+        if (auto *FinalExpr = LoopCountersAndUpdates.lookup(CanonicalVD))
+          EmitIgnoredExpr(FinalExpr);
         auto *SrcVD = cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl());
         auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
         // Get the address of the original variable.
@@ -624,11 +889,61 @@ void CodeGenFunction::EmitOMPLastprivateClauseFinal(
       ++ISrcRef;
       ++IDestRef;
     }
+    if (auto *PostUpdate = C->getPostUpdateExpr())
+      EmitIgnoredExpr(PostUpdate);
   }
   if (IsLastIterCond)
     EmitBlock(DoneBB, /*IsFinished=*/true);
 }
 
+static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
+                          LValue BaseLV, llvm::Value *Addr) {
+  Address Tmp = Address::invalid();
+  Address TopTmp = Address::invalid();
+  Address MostTopTmp = Address::invalid();
+  BaseTy = BaseTy.getNonReferenceType();
+  while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
+         !CGF.getContext().hasSameType(BaseTy, ElTy)) {
+    Tmp = CGF.CreateMemTemp(BaseTy);
+    if (TopTmp.isValid())
+      CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
+    else
+      MostTopTmp = Tmp;
+    TopTmp = Tmp;
+    BaseTy = BaseTy->getPointeeType();
+  }
+  llvm::Type *Ty = BaseLV.getPointer()->getType();
+  if (Tmp.isValid())
+    Ty = Tmp.getElementType();
+  Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty);
+  if (Tmp.isValid()) {
+    CGF.Builder.CreateStore(Addr, Tmp);
+    return MostTopTmp;
+  }
+  return Address(Addr, BaseLV.getAlignment());
+}
+
+static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
+                          LValue BaseLV) {
+  BaseTy = BaseTy.getNonReferenceType();
+  while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
+         !CGF.getContext().hasSameType(BaseTy, ElTy)) {
+    if (auto *PtrTy = BaseTy->getAs<PointerType>())
+      BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(), PtrTy);
+    else {
+      BaseLV = CGF.EmitLoadOfReferenceLValue(BaseLV.getAddress(),
+                                             BaseTy->castAs<ReferenceType>());
+    }
+    BaseTy = BaseTy->getPointeeType();
+  }
+  return CGF.MakeAddrLValue(
+      Address(
+          CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+              BaseLV.getPointer(), CGF.ConvertTypeForMem(ElTy)->getPointerTo()),
+          BaseLV.getAlignment()),
+      BaseLV.getType(), BaseLV.getAlignmentSource());
+}
+
 void CodeGenFunction::EmitOMPReductionClauseInit(
     const OMPExecutableDirective &D,
     CodeGenFunction::OMPPrivateScope &PrivateScope) {
@@ -638,10 +953,12 @@ void CodeGenFunction::EmitOMPReductionClauseInit(
     auto ILHS = C->lhs_exprs().begin();
     auto IRHS = C->rhs_exprs().begin();
     auto IPriv = C->privates().begin();
+    auto IRed = C->reduction_ops().begin();
     for (auto IRef : C->varlists()) {
       auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
       auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
       auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IPriv)->getDecl());
+      auto *DRD = getReductionInit(*IRed);
       if (auto *OASE = dyn_cast<OMPArraySectionExpr>(IRef)) {
         auto *Base = OASE->getBase()->IgnoreParenImpCasts();
         while (auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
@@ -654,21 +971,9 @@ void CodeGenFunction::EmitOMPReductionClauseInit(
         auto OASELValueUB =
             EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false);
         auto OriginalBaseLValue = EmitLValue(DE);
-        auto BaseLValue = OriginalBaseLValue;
-        auto *Zero = Builder.getInt64(/*C=*/0);
-        llvm::SmallVector<llvm::Value *, 4> Indexes;
-        Indexes.push_back(Zero);
-        auto *ItemTy =
-            OASELValueLB.getPointer()->getType()->getPointerElementType();
-        auto *Ty = BaseLValue.getPointer()->getType()->getPointerElementType();
-        while (Ty != ItemTy) {
-          Indexes.push_back(Zero);
-          Ty = Ty->getPointerElementType();
-        }
-        BaseLValue = MakeAddrLValue(
-            Address(Builder.CreateInBoundsGEP(BaseLValue.getPointer(), Indexes),
-                    OASELValueLB.getAlignment()),
-            OASELValueLB.getType(), OASELValueLB.getAlignmentSource());
+        LValue BaseLValue =
+            loadToBegin(*this, OrigVD->getType(), OASELValueLB.getType(),
+                        OriginalBaseLValue);
         // Store the address of the original variable associated with the LHS
         // implicit variable.
         PrivateScope.addPrivate(LHSVD, [this, OASELValueLB]() -> Address {
@@ -676,8 +981,8 @@ void CodeGenFunction::EmitOMPReductionClauseInit(
         });
         // Emit reduction copy.
         bool IsRegistered = PrivateScope.addPrivate(
-            OrigVD, [this, PrivateVD, BaseLValue, OASELValueLB, OASELValueUB,
-                     OriginalBaseLValue]() -> Address {
+            OrigVD, [this, OrigVD, PrivateVD, BaseLValue, OASELValueLB,
+                     OASELValueUB, OriginalBaseLValue, DRD, IRed]() -> Address {
               // Emit VarDecl with copy init for arrays.
               // Get the address of the original variable captured in current
               // captured region.
@@ -695,15 +1000,17 @@ void CodeGenFunction::EmitOMPReductionClauseInit(
               auto Emission = EmitAutoVarAlloca(*PrivateVD);
               auto Addr = Emission.getAllocatedAddress();
               auto *Init = PrivateVD->getInit();
-              EmitOMPAggregateInit(*this, Addr, PrivateVD->getType(), Init);
+              EmitOMPAggregateInit(*this, Addr, PrivateVD->getType(),
+                                   DRD ? *IRed : Init,
+                                   OASELValueLB.getAddress());
               EmitAutoVarCleanups(Emission);
               // Emit private VarDecl with reduction init.
               auto *Offset = Builder.CreatePtrDiff(BaseLValue.getPointer(),
                                                    OASELValueLB.getPointer());
               auto *Ptr = Builder.CreateGEP(Addr.getPointer(), Offset);
-              Ptr = Builder.CreatePointerBitCastOrAddrSpaceCast(
-                  Ptr, OriginalBaseLValue.getPointer()->getType());
-              return Address(Ptr, OriginalBaseLValue.getAlignment());
+              return castToBase(*this, OrigVD->getType(),
+                                OASELValueLB.getType(), OriginalBaseLValue,
+                                Ptr);
             });
         assert(IsRegistered && "private var already registered as private");
         // Silence the warning about unused variable.
@@ -719,21 +1026,8 @@ void CodeGenFunction::EmitOMPReductionClauseInit(
         auto *OrigVD = cast<VarDecl>(DE->getDecl());
         auto ASELValue = EmitLValue(ASE);
         auto OriginalBaseLValue = EmitLValue(DE);
-        auto BaseLValue = OriginalBaseLValue;
-        auto *Zero = Builder.getInt64(/*C=*/0);
-        llvm::SmallVector<llvm::Value *, 4> Indexes;
-        Indexes.push_back(Zero);
-        auto *ItemTy =
-            ASELValue.getPointer()->getType()->getPointerElementType();
-        auto *Ty = BaseLValue.getPointer()->getType()->getPointerElementType();
-        while (Ty != ItemTy) {
-          Indexes.push_back(Zero);
-          Ty = Ty->getPointerElementType();
-        }
-        BaseLValue = MakeAddrLValue(
-            Address(Builder.CreateInBoundsGEP(BaseLValue.getPointer(), Indexes),
-                    ASELValue.getAlignment()),
-            ASELValue.getType(), ASELValue.getAlignmentSource());
+        LValue BaseLValue = loadToBegin(
+            *this, OrigVD->getType(), ASELValue.getType(), OriginalBaseLValue);
         // Store the address of the original variable associated with the LHS
         // implicit variable.
         PrivateScope.addPrivate(LHSVD, [this, ASELValue]() -> Address {
@@ -741,49 +1035,114 @@ void CodeGenFunction::EmitOMPReductionClauseInit(
         });
         // Emit reduction copy.
         bool IsRegistered = PrivateScope.addPrivate(
-            OrigVD, [this, PrivateVD, BaseLValue, ASELValue,
-                     OriginalBaseLValue]() -> Address {
+            OrigVD, [this, OrigVD, PrivateVD, BaseLValue, ASELValue,
+                     OriginalBaseLValue, DRD, IRed]() -> Address {
               // Emit private VarDecl with reduction init.
-              EmitDecl(*PrivateVD);
-              auto Addr = GetAddrOfLocalVar(PrivateVD);
+              AutoVarEmission Emission = EmitAutoVarAlloca(*PrivateVD);
+              auto Addr = Emission.getAllocatedAddress();
+              if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
+                emitInitWithReductionInitializer(*this, DRD, *IRed, Addr,
+                                                 ASELValue.getAddress(),
+                                                 ASELValue.getType());
+              } else
+                EmitAutoVarInit(Emission);
+              EmitAutoVarCleanups(Emission);
               auto *Offset = Builder.CreatePtrDiff(BaseLValue.getPointer(),
                                                    ASELValue.getPointer());
               auto *Ptr = Builder.CreateGEP(Addr.getPointer(), Offset);
-              Ptr = Builder.CreatePointerBitCastOrAddrSpaceCast(
-                  Ptr, OriginalBaseLValue.getPointer()->getType());
-              return Address(Ptr, OriginalBaseLValue.getAlignment());
+              return castToBase(*this, OrigVD->getType(), ASELValue.getType(),
+                                OriginalBaseLValue, Ptr);
             });
         assert(IsRegistered && "private var already registered as private");
         // Silence the warning about unused variable.
         (void)IsRegistered;
-        PrivateScope.addPrivate(RHSVD, [this, PrivateVD]() -> Address {
-          return GetAddrOfLocalVar(PrivateVD);
+        PrivateScope.addPrivate(RHSVD, [this, PrivateVD, RHSVD]() -> Address {
+          return Builder.CreateElementBitCast(
+              GetAddrOfLocalVar(PrivateVD), ConvertTypeForMem(RHSVD->getType()),
+              "rhs.begin");
         });
       } else {
         auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(IRef)->getDecl());
-        // Store the address of the original variable associated with the LHS
-        // implicit variable.
-        PrivateScope.addPrivate(LHSVD, [this, OrigVD, IRef]() -> Address {
+        QualType Type = PrivateVD->getType();
+        if (getContext().getAsArrayType(Type)) {
+          // Store the address of the original variable associated with the LHS
+          // implicit variable.
           DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
                           CapturedStmtInfo->lookup(OrigVD) != nullptr,
                           IRef->getType(), VK_LValue, IRef->getExprLoc());
-          return EmitLValue(&DRE).getAddress();
-        });
-        // Emit reduction copy.
-        bool IsRegistered =
-            PrivateScope.addPrivate(OrigVD, [this, PrivateVD]() -> Address {
-              // Emit private VarDecl with reduction init.
-              EmitDecl(*PrivateVD);
-              return GetAddrOfLocalVar(PrivateVD);
-            });
-        assert(IsRegistered && "private var already registered as private");
-        // Silence the warning about unused variable.
-        (void)IsRegistered;
-        PrivateScope.addPrivate(RHSVD, [this, PrivateVD]() -> Address {
-          return GetAddrOfLocalVar(PrivateVD);
-        });
+          Address OriginalAddr = EmitLValue(&DRE).getAddress();
+          PrivateScope.addPrivate(LHSVD, [this, &OriginalAddr,
+                                          LHSVD]() -> Address {
+            OriginalAddr = Builder.CreateElementBitCast(
+                OriginalAddr, ConvertTypeForMem(LHSVD->getType()), "lhs.begin");
+            return OriginalAddr;
+          });
+          bool IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address {
+            if (Type->isVariablyModifiedType()) {
+              CodeGenFunction::OpaqueValueMapping OpaqueMap(
+                  *this, cast<OpaqueValueExpr>(
+                             getContext()
+                                 .getAsVariableArrayType(PrivateVD->getType())
+                                 ->getSizeExpr()),
+                  RValue::get(
+                      getTypeSize(OrigVD->getType().getNonReferenceType())));
+              EmitVariablyModifiedType(Type);
+            }
+            auto Emission = EmitAutoVarAlloca(*PrivateVD);
+            auto Addr = Emission.getAllocatedAddress();
+            auto *Init = PrivateVD->getInit();
+            EmitOMPAggregateInit(*this, Addr, PrivateVD->getType(),
+                                 DRD ? *IRed : Init, OriginalAddr);
+            EmitAutoVarCleanups(Emission);
+            return Emission.getAllocatedAddress();
+          });
+          assert(IsRegistered && "private var already registered as private");
+          // Silence the warning about unused variable.
+          (void)IsRegistered;
+          PrivateScope.addPrivate(RHSVD, [this, PrivateVD, RHSVD]() -> Address {
+            return Builder.CreateElementBitCast(
+                GetAddrOfLocalVar(PrivateVD),
+                ConvertTypeForMem(RHSVD->getType()), "rhs.begin");
+          });
+        } else {
+          // Store the address of the original variable associated with the LHS
+          // implicit variable.
+          Address OriginalAddr = Address::invalid();
+          PrivateScope.addPrivate(LHSVD, [this, OrigVD, IRef,
+                                          &OriginalAddr]() -> Address {
+            DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
+                            CapturedStmtInfo->lookup(OrigVD) != nullptr,
+                            IRef->getType(), VK_LValue, IRef->getExprLoc());
+            OriginalAddr = EmitLValue(&DRE).getAddress();
+            return OriginalAddr;
+          });
+          // Emit reduction copy.
+          bool IsRegistered = PrivateScope.addPrivate(
+              OrigVD, [this, PrivateVD, OriginalAddr, DRD, IRed]() -> Address {
+                // Emit private VarDecl with reduction init.
+                AutoVarEmission Emission = EmitAutoVarAlloca(*PrivateVD);
+                auto Addr = Emission.getAllocatedAddress();
+                if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
+                  emitInitWithReductionInitializer(*this, DRD, *IRed, Addr,
+                                                   OriginalAddr,
+                                                   PrivateVD->getType());
+                } else
+                  EmitAutoVarInit(Emission);
+                EmitAutoVarCleanups(Emission);
+                return Addr;
+              });
+          assert(IsRegistered && "private var already registered as private");
+          // Silence the warning about unused variable.
+          (void)IsRegistered;
+          PrivateScope.addPrivate(RHSVD, [this, PrivateVD]() -> Address {
+            return GetAddrOfLocalVar(PrivateVD);
+          });
+        }
       }
-      ++ILHS, ++IRHS, ++IPriv;
+      ++ILHS;
+      ++IRHS;
+      ++IPriv;
+      ++IRed;
     }
   }
 }
@@ -816,15 +1175,39 @@ void CodeGenFunction::EmitOMPReductionClauseFinal(
   }
 }
 
+static void emitPostUpdateForReductionClause(
+    CodeGenFunction &CGF, const OMPExecutableDirective &D,
+    const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) {
+  if (!CGF.HaveInsertPoint())
+    return;
+  llvm::BasicBlock *DoneBB = nullptr;
+  for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
+    if (auto *PostUpdate = C->getPostUpdateExpr()) {
+      if (!DoneBB) {
+        if (auto *Cond = CondGen(CGF)) {
+          // If the first post-update expression is found, emit conditional
+          // block if it was requested.
+          auto *ThenBB = CGF.createBasicBlock(".omp.reduction.pu");
+          DoneBB = CGF.createBasicBlock(".omp.reduction.pu.done");
+          CGF.Builder.CreateCondBr(Cond, ThenBB, DoneBB);
+          CGF.EmitBlock(ThenBB);
+        }
+      }
+      CGF.EmitIgnoredExpr(PostUpdate);
+    }
+  }
+  if (DoneBB)
+    CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
+}
+
 static void emitCommonOMPParallelDirective(CodeGenFunction &CGF,
                                            const OMPExecutableDirective &S,
                                            OpenMPDirectiveKind InnermostKind,
                                            const RegionCodeGenTy &CodeGen) {
   auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
-  llvm::SmallVector<llvm::Value *, 16> CapturedVars;
-  CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
-  auto OutlinedFn = CGF.CGM.getOpenMPRuntime().emitParallelOutlinedFunction(
-      S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen);
+  auto OutlinedFn = CGF.CGM.getOpenMPRuntime().
+      emitParallelOrTeamsOutlinedFunction(S,
+          *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen);
   if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>()) {
     CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
     auto NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(),
@@ -833,7 +1216,7 @@ static void emitCommonOMPParallelDirective(CodeGenFunction &CGF,
         CGF, NumThreads, NumThreadsClause->getLocStart());
   }
   if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>()) {
-    CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
+    CodeGenFunction::RunCleanupsScope ProcBindScope(CGF);
     CGF.CGM.getOpenMPRuntime().emitProcBindClause(
         CGF, ProcBindClause->getProcBindKind(), ProcBindClause->getLocStart());
   }
@@ -845,22 +1228,24 @@ static void emitCommonOMPParallelDirective(CodeGenFunction &CGF,
       break;
     }
   }
+
+  OMPLexicalScope Scope(CGF, S);
+  llvm::SmallVector<llvm::Value *, 16> CapturedVars;
+  CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
   CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getLocStart(), OutlinedFn,
                                               CapturedVars, IfCond);
 }
 
 void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) {
-  LexicalScope Scope(*this, S.getSourceRange());
   // Emit parallel region as a standalone region.
-  auto &&CodeGen = [&S](CodeGenFunction &CGF) {
+  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
     OMPPrivateScope PrivateScope(CGF);
     bool Copyins = CGF.EmitOMPCopyinClause(S);
-    bool Firstprivates = CGF.EmitOMPFirstprivateClause(S, PrivateScope);
-    if (Copyins || Firstprivates) {
+    (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
+    if (Copyins) {
       // Emit implicit barrier to synchronize threads and avoid data races on
-      // initialization of firstprivate variables or propagation master's thread
-      // values of threadprivate variables to local instances of that variables
-      // of all other implicit threads.
+      // propagation master's thread values of threadprivate variables to local
+      // instances of that variables of all other implicit threads.
       CGF.CGM.getOpenMPRuntime().emitBarrierCall(
           CGF, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false,
           /*ForceSimpleCall=*/true);
@@ -872,6 +1257,8 @@ void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) {
     CGF.EmitOMPReductionClauseFinal(S);
   };
   emitCommonOMPParallelDirective(*this, S, OMPD_parallel, CodeGen);
+  emitPostUpdateForReductionClause(
+      *this, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
 }
 
 void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &D,
@@ -883,9 +1270,8 @@ void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &D,
   }
   // Update the linear variables.
   for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
-    for (auto U : C->updates()) {
+    for (auto *U : C->updates())
       EmitIgnoredExpr(U);
-    }
   }
 
   // On a continue in the body, jump to the end.
@@ -908,7 +1294,7 @@ void CodeGenFunction::EmitOMPInnerLoop(
   // Start the loop with a block that tests the condition.
   auto CondBlock = createBasicBlock("omp.inner.for.cond");
   EmitBlock(CondBlock);
-  LoopStack.push(CondBlock);
+  LoopStack.push(CondBlock, Builder.getCurrentDebugLocation());
 
   // If there are any cleanups between here and the loop-exit scope,
   // create a block to stage a loop exit along.
@@ -950,19 +1336,21 @@ void CodeGenFunction::EmitOMPLinearClauseInit(const OMPLoopDirective &D) {
     return;
   // Emit inits for the linear variables.
   for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
-    for (auto Init : C->inits()) {
+    for (auto *Init : C->inits()) {
       auto *VD = cast<VarDecl>(cast<DeclRefExpr>(Init)->getDecl());
-      auto *OrigVD = cast<VarDecl>(
-          cast<DeclRefExpr>(VD->getInit()->IgnoreImpCasts())->getDecl());
-      DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
-                      CapturedStmtInfo->lookup(OrigVD) != nullptr,
-                      VD->getInit()->getType(), VK_LValue,
-                      VD->getInit()->getExprLoc());
-      AutoVarEmission Emission = EmitAutoVarAlloca(*VD);
-      EmitExprAsInit(&DRE, VD,
-               MakeAddrLValue(Emission.getAllocatedAddress(), VD->getType()),
-                     /*capturedByInit=*/false);
-      EmitAutoVarCleanups(Emission);
+      if (auto *Ref = dyn_cast<DeclRefExpr>(VD->getInit()->IgnoreImpCasts())) {
+        AutoVarEmission Emission = EmitAutoVarAlloca(*VD);
+        auto *OrigVD = cast<VarDecl>(Ref->getDecl());
+        DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
+                        CapturedStmtInfo->lookup(OrigVD) != nullptr,
+                        VD->getInit()->getType(), VK_LValue,
+                        VD->getInit()->getExprLoc());
+        EmitExprAsInit(&DRE, VD, MakeAddrLValue(Emission.getAllocatedAddress(),
+                                                VD->getType()),
+                       /*capturedByInit=*/false);
+        EmitAutoVarCleanups(Emission);
+      } else
+        EmitVarDecl(*VD);
     }
     // Emit the linear steps for the linear clauses.
     // If a step is not constant, it is pre-calculated before the loop.
@@ -975,27 +1363,42 @@ void CodeGenFunction::EmitOMPLinearClauseInit(const OMPLoopDirective &D) {
   }
 }
 
-static void emitLinearClauseFinal(CodeGenFunction &CGF,
-                                  const OMPLoopDirective &D) {
-  if (!CGF.HaveInsertPoint())
+void CodeGenFunction::EmitOMPLinearClauseFinal(
+    const OMPLoopDirective &D,
+    const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) {
+  if (!HaveInsertPoint())
     return;
+  llvm::BasicBlock *DoneBB = nullptr;
   // Emit the final values of the linear variables.
   for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
     auto IC = C->varlist_begin();
-    for (auto F : C->finals()) {
+    for (auto *F : C->finals()) {
+      if (!DoneBB) {
+        if (auto *Cond = CondGen(*this)) {
+          // If the first post-update expression is found, emit conditional
+          // block if it was requested.
+          auto *ThenBB = createBasicBlock(".omp.linear.pu");
+          DoneBB = createBasicBlock(".omp.linear.pu.done");
+          Builder.CreateCondBr(Cond, ThenBB, DoneBB);
+          EmitBlock(ThenBB);
+        }
+      }
       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl());
       DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
-                      CGF.CapturedStmtInfo->lookup(OrigVD) != nullptr,
+                      CapturedStmtInfo->lookup(OrigVD) != nullptr,
                       (*IC)->getType(), VK_LValue, (*IC)->getExprLoc());
-      Address OrigAddr = CGF.EmitLValue(&DRE).getAddress();
-      CodeGenFunction::OMPPrivateScope VarScope(CGF);
-      VarScope.addPrivate(OrigVD,
-                          [OrigAddr]() -> Address { return OrigAddr; });
+      Address OrigAddr = EmitLValue(&DRE).getAddress();
+      CodeGenFunction::OMPPrivateScope VarScope(*this);
+      VarScope.addPrivate(OrigVD, [OrigAddr]() -> Address { return OrigAddr; });
       (void)VarScope.Privatize();
-      CGF.EmitIgnoredExpr(F);
+      EmitIgnoredExpr(F);
       ++IC;
     }
+    if (auto *PostUpdate = C->getPostUpdateExpr())
+      EmitIgnoredExpr(PostUpdate);
   }
+  if (DoneBB)
+    EmitBlock(DoneBB, /*IsFinished=*/true);
 }
 
 static void emitAlignedClause(CodeGenFunction &CGF,
@@ -1031,25 +1434,34 @@ static void emitAlignedClause(CodeGenFunction &CGF,
   }
 }
 
-static void emitPrivateLoopCounters(CodeGenFunction &CGF,
-                                    CodeGenFunction::OMPPrivateScope &LoopScope,
-                                    ArrayRef<Expr *> Counters,
-                                    ArrayRef<Expr *> PrivateCounters) {
-  if (!CGF.HaveInsertPoint())
+void CodeGenFunction::EmitOMPPrivateLoopCounters(
+    const OMPLoopDirective &S, CodeGenFunction::OMPPrivateScope &LoopScope) {
+  if (!HaveInsertPoint())
     return;
-  auto I = PrivateCounters.begin();
-  for (auto *E : Counters) {
+  auto I = S.private_counters().begin();
+  for (auto *E : S.counters()) {
     auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
     auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl());
-    Address Addr = Address::invalid();
-    (void)LoopScope.addPrivate(PrivateVD, [&]() -> Address {
+    (void)LoopScope.addPrivate(VD, [&]() -> Address {
       // Emit var without initialization.
-      auto VarEmission = CGF.EmitAutoVarAlloca(*PrivateVD);
-      CGF.EmitAutoVarCleanups(VarEmission);
-      Addr = VarEmission.getAllocatedAddress();
-      return Addr;
+      if (!LocalDeclMap.count(PrivateVD)) {
+        auto VarEmission = EmitAutoVarAlloca(*PrivateVD);
+        EmitAutoVarCleanups(VarEmission);
+      }
+      DeclRefExpr DRE(const_cast<VarDecl *>(PrivateVD),
+                      /*RefersToEnclosingVariableOrCapture=*/false,
+                      (*I)->getType(), VK_LValue, (*I)->getExprLoc());
+      return EmitLValue(&DRE).getAddress();
     });
-    (void)LoopScope.addPrivate(VD, [&]() -> Address { return Addr; });
+    if (LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD) ||
+        VD->hasGlobalStorage()) {
+      (void)LoopScope.addPrivate(PrivateVD, [&]() -> Address {
+        DeclRefExpr DRE(const_cast<VarDecl *>(VD),
+                        LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD),
+                        E->getType(), VK_LValue, E->getExprLoc());
+        return EmitLValue(&DRE).getAddress();
+      });
+    }
     ++I;
   }
 }
@@ -1061,8 +1473,7 @@ static void emitPreCond(CodeGenFunction &CGF, const OMPLoopDirective &S,
     return;
   {
     CodeGenFunction::OMPPrivateScope PreCondScope(CGF);
-    emitPrivateLoopCounters(CGF, PreCondScope, S.counters(),
-                            S.private_counters());
+    CGF.EmitOMPPrivateLoopCounters(S, PreCondScope);
     (void)PreCondScope.Privatize();
     // Get initial values of real counters.
     for (auto I : S.inits()) {
@@ -1073,25 +1484,35 @@ static void emitPreCond(CodeGenFunction &CGF, const OMPLoopDirective &S,
   CGF.EmitBranchOnBoolExpr(Cond, TrueBlock, FalseBlock, TrueCount);
 }
 
-static void
-emitPrivateLinearVars(CodeGenFunction &CGF, const OMPExecutableDirective &D,
-                      CodeGenFunction::OMPPrivateScope &PrivateScope) {
-  if (!CGF.HaveInsertPoint())
+void CodeGenFunction::EmitOMPLinearClause(
+    const OMPLoopDirective &D, CodeGenFunction::OMPPrivateScope &PrivateScope) {
+  if (!HaveInsertPoint())
     return;
+  llvm::DenseSet<const VarDecl *> SIMDLCVs;
+  if (isOpenMPSimdDirective(D.getDirectiveKind())) {
+    auto *LoopDirective = cast<OMPLoopDirective>(&D);
+    for (auto *C : LoopDirective->counters()) {
+      SIMDLCVs.insert(
+          cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl());
+    }
+  }
   for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
     auto CurPrivate = C->privates().begin();
     for (auto *E : C->varlists()) {
       auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
       auto *PrivateVD =
           cast<VarDecl>(cast<DeclRefExpr>(*CurPrivate)->getDecl());
-      bool IsRegistered = PrivateScope.addPrivate(VD, [&]() -> Address {
-        // Emit private VarDecl with copy init.
-        CGF.EmitVarDecl(*PrivateVD);
-        return CGF.GetAddrOfLocalVar(PrivateVD);
-      });
-      assert(IsRegistered && "linear var already registered as private");
-      // Silence the warning about unused variable.
-      (void)IsRegistered;
+      if (!SIMDLCVs.count(VD->getCanonicalDecl())) {
+        bool IsRegistered = PrivateScope.addPrivate(VD, [&]() -> Address {
+          // Emit private VarDecl with copy init.
+          EmitVarDecl(*PrivateVD);
+          return GetAddrOfLocalVar(PrivateVD);
+        });
+        assert(IsRegistered && "linear var already registered as private");
+        // Silence the warning about unused variable.
+        (void)IsRegistered;
+      } else
+        EmitVarDecl(*PrivateVD);
       ++CurPrivate;
     }
   }
@@ -1132,17 +1553,39 @@ void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective &D,
   emitSimdlenSafelenClause(*this, D, IsMonotonic);
 }
 
-void CodeGenFunction::EmitOMPSimdFinal(const OMPLoopDirective &D) {
+void CodeGenFunction::EmitOMPSimdFinal(
+    const OMPLoopDirective &D,
+    const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) {
   if (!HaveInsertPoint())
     return;
+  llvm::BasicBlock *DoneBB = nullptr;
   auto IC = D.counters().begin();
+  auto IPC = D.private_counters().begin();
   for (auto F : D.finals()) {
     auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>((*IC))->getDecl());
-    if (LocalDeclMap.count(OrigVD) || CapturedStmtInfo->lookup(OrigVD)) {
-      DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
-                      CapturedStmtInfo->lookup(OrigVD) != nullptr,
-                      (*IC)->getType(), VK_LValue, (*IC)->getExprLoc());
-      Address OrigAddr = EmitLValue(&DRE).getAddress();
+    auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>((*IPC))->getDecl());
+    auto *CED = dyn_cast<OMPCapturedExprDecl>(OrigVD);
+    if (LocalDeclMap.count(OrigVD) || CapturedStmtInfo->lookup(OrigVD) ||
+        OrigVD->hasGlobalStorage() || CED) {
+      if (!DoneBB) {
+        if (auto *Cond = CondGen(*this)) {
+          // If the first post-update expression is found, emit conditional
+          // block if it was requested.
+          auto *ThenBB = createBasicBlock(".omp.final.then");
+          DoneBB = createBasicBlock(".omp.final.done");
+          Builder.CreateCondBr(Cond, ThenBB, DoneBB);
+          EmitBlock(ThenBB);
+        }
+      }
+      Address OrigAddr = Address::invalid();
+      if (CED)
+        OrigAddr = EmitLValue(CED->getInit()->IgnoreImpCasts()).getAddress();
+      else {
+        DeclRefExpr DRE(const_cast<VarDecl *>(PrivateVD),
+                        /*RefersToEnclosingVariableOrCapture=*/false,
+                        (*IPC)->getType(), VK_LValue, (*IPC)->getExprLoc());
+        OrigAddr = EmitLValue(&DRE).getAddress();
+      }
       OMPPrivateScope VarScope(*this);
       VarScope.addPrivate(OrigVD,
                           [OrigAddr]() -> Address { return OrigAddr; });
@@ -1150,12 +1593,15 @@ void CodeGenFunction::EmitOMPSimdFinal(const OMPLoopDirective &D) {
       EmitIgnoredExpr(F);
     }
     ++IC;
+    ++IPC;
   }
-  emitLinearClauseFinal(*this, D);
+  if (DoneBB)
+    EmitBlock(DoneBB, /*IsFinished=*/true);
 }
 
 void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) {
-  auto &&CodeGen = [&S](CodeGenFunction &CGF) {
+  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
+    OMPLoopScope PreInitScope(CGF, S);
     // if (PreCond) {
     //   for (IV in 0..LastIteration) BODY;
     //   <Final counter/linear vars updates>;
@@ -1198,15 +1644,14 @@ void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) {
 
     emitAlignedClause(CGF, S);
     CGF.EmitOMPLinearClauseInit(S);
-    bool HasLastprivateClause;
     {
       OMPPrivateScope LoopScope(CGF);
-      emitPrivateLoopCounters(CGF, LoopScope, S.counters(),
-                              S.private_counters());
-      emitPrivateLinearVars(CGF, S, LoopScope);
+      CGF.EmitOMPPrivateLoopCounters(S, LoopScope);
+      CGF.EmitOMPLinearClause(S, LoopScope);
       CGF.EmitOMPPrivateClause(S, LoopScope);
       CGF.EmitOMPReductionClauseInit(S, LoopScope);
-      HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
+      bool HasLastprivateClause =
+          CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
       (void)LoopScope.Privatize();
       CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(),
                            S.getInc(),
@@ -1215,104 +1660,42 @@ void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) {
                              CGF.EmitStopPoint(&S);
                            },
                            [](CodeGenFunction &) {});
+      CGF.EmitOMPSimdFinal(
+          S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
       // Emit final copy of the lastprivate variables at the end of loops.
-      if (HasLastprivateClause) {
-        CGF.EmitOMPLastprivateClauseFinal(S);
-      }
+      if (HasLastprivateClause)
+        CGF.EmitOMPLastprivateClauseFinal(S, /*NoFinals=*/true);
       CGF.EmitOMPReductionClauseFinal(S);
+      emitPostUpdateForReductionClause(
+          CGF, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
     }
-    CGF.EmitOMPSimdFinal(S);
+    CGF.EmitOMPLinearClauseFinal(
+        S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
     // Emit: if (PreCond) - end.
     if (ContBlock) {
       CGF.EmitBranch(ContBlock);
       CGF.EmitBlock(ContBlock, true);
     }
   };
+  OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
 }
 
-void CodeGenFunction::EmitOMPForOuterLoop(
-    OpenMPScheduleClauseKind ScheduleKind, bool IsMonotonic,
+void CodeGenFunction::EmitOMPOuterLoop(bool DynamicOrOrdered, bool IsMonotonic,
     const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered,
     Address LB, Address UB, Address ST, Address IL, llvm::Value *Chunk) {
   auto &RT = CGM.getOpenMPRuntime();
 
-  // Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime).
-  const bool DynamicOrOrdered = Ordered || RT.isDynamic(ScheduleKind);
-
-  assert((Ordered ||
-          !RT.isStaticNonchunked(ScheduleKind, /*Chunked=*/Chunk != nullptr)) &&
-         "static non-chunked schedule does not need outer loop");
-
-  // Emit outer loop.
-  //
-  // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
-  // When schedule(dynamic,chunk_size) is specified, the iterations are
-  // distributed to threads in the team in chunks as the threads request them.
-  // Each thread executes a chunk of iterations, then requests another chunk,
-  // until no chunks remain to be distributed. Each chunk contains chunk_size
-  // iterations, except for the last chunk to be distributed, which may have
-  // fewer iterations. When no chunk_size is specified, it defaults to 1.
-  //
-  // When schedule(guided,chunk_size) is specified, the iterations are assigned
-  // to threads in the team in chunks as the executing threads request them.
-  // Each thread executes a chunk of iterations, then requests another chunk,
-  // until no chunks remain to be assigned. For a chunk_size of 1, the size of
-  // each chunk is proportional to the number of unassigned iterations divided
-  // by the number of threads in the team, decreasing to 1. For a chunk_size
-  // with value k (greater than 1), the size of each chunk is determined in the
-  // same way, with the restriction that the chunks do not contain fewer than k
-  // iterations (except for the last chunk to be assigned, which may have fewer
-  // than k iterations).
-  //
-  // When schedule(auto) is specified, the decision regarding scheduling is
-  // delegated to the compiler and/or runtime system. The programmer gives the
-  // implementation the freedom to choose any possible mapping of iterations to
-  // threads in the team.
-  //
-  // When schedule(runtime) is specified, the decision regarding scheduling is
-  // deferred until run time, and the schedule and chunk size are taken from the
-  // run-sched-var ICV. If the ICV is set to auto, the schedule is
-  // implementation defined
-  //
-  // while(__kmpc_dispatch_next(&LB, &UB)) {
-  //   idx = LB;
-  //   while (idx <= UB) { BODY; ++idx;
-  //   __kmpc_dispatch_fini_(4|8)[u](); // For ordered loops only.
-  //   } // inner loop
-  // }
-  //
-  // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
-  // When schedule(static, chunk_size) is specified, iterations are divided into
-  // chunks of size chunk_size, and the chunks are assigned to the threads in
-  // the team in a round-robin fashion in the order of the thread number.
-  //
-  // while(UB = min(UB, GlobalUB), idx = LB, idx < UB) {
-  //   while (idx <= UB) { BODY; ++idx; } // inner loop
-  //   LB = LB + ST;
-  //   UB = UB + ST;
-  // }
-  //
-
   const Expr *IVExpr = S.getIterationVariable();
   const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
   const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
 
-  if (DynamicOrOrdered) {
-    llvm::Value *UBVal = EmitScalarExpr(S.getLastIteration());
-    RT.emitForDispatchInit(*this, S.getLocStart(), ScheduleKind,
-                           IVSize, IVSigned, Ordered, UBVal, Chunk);
-  } else {
-    RT.emitForStaticInit(*this, S.getLocStart(), ScheduleKind,
-                         IVSize, IVSigned, Ordered, IL, LB, UB, ST, Chunk);
-  }
-
   auto LoopExit = getJumpDestInCurrentScope("omp.dispatch.end");
 
   // Start the loop with a block that tests the condition.
   auto CondBlock = createBasicBlock("omp.dispatch.cond");
   EmitBlock(CondBlock);
-  LoopStack.push(CondBlock);
+  LoopStack.push(CondBlock, Builder.getCurrentDebugLocation());
 
   llvm::Value *BoolCondVal = nullptr;
   if (!DynamicOrOrdered) {
@@ -1323,8 +1706,8 @@ void CodeGenFunction::EmitOMPForOuterLoop(
     // IV < UB
     BoolCondVal = EvaluateExprAsBool(S.getCond());
   } else {
-    BoolCondVal = RT.emitForNext(*this, S.getLocStart(), IVSize, IVSigned,
-                                    IL, LB, UB, ST);
+    BoolCondVal = RT.emitForNext(*this, S.getLocStart(), IVSize, IVSigned, IL,
+                                 LB, UB, ST);
   }
 
   // If there are any cleanups between here and the loop-exit scope,
@@ -1384,8 +1767,167 @@ void CodeGenFunction::EmitOMPForOuterLoop(
   EmitBlock(LoopExit.getBlock());
 
   // Tell the runtime we are done.
-  if (!DynamicOrOrdered)
-    RT.emitForStaticFinish(*this, S.getLocEnd());
+  auto &&CodeGen = [DynamicOrOrdered, &S](CodeGenFunction &CGF) {
+    if (!DynamicOrOrdered)
+      CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocEnd());
+  };
+  OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen);
+}
+
+void CodeGenFunction::EmitOMPForOuterLoop(
+    const OpenMPScheduleTy &ScheduleKind, bool IsMonotonic,
+    const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered,
+    Address LB, Address UB, Address ST, Address IL, llvm::Value *Chunk) {
+  auto &RT = CGM.getOpenMPRuntime();
+
+  // Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime).
+  const bool DynamicOrOrdered =
+      Ordered || RT.isDynamic(ScheduleKind.Schedule);
+
+  assert((Ordered ||
+          !RT.isStaticNonchunked(ScheduleKind.Schedule,
+                                 /*Chunked=*/Chunk != nullptr)) &&
+         "static non-chunked schedule does not need outer loop");
+
+  // Emit outer loop.
+  //
+  // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
+  // When schedule(dynamic,chunk_size) is specified, the iterations are
+  // distributed to threads in the team in chunks as the threads request them.
+  // Each thread executes a chunk of iterations, then requests another chunk,
+  // until no chunks remain to be distributed. Each chunk contains chunk_size
+  // iterations, except for the last chunk to be distributed, which may have
+  // fewer iterations. When no chunk_size is specified, it defaults to 1.
+  //
+  // When schedule(guided,chunk_size) is specified, the iterations are assigned
+  // to threads in the team in chunks as the executing threads request them.
+  // Each thread executes a chunk of iterations, then requests another chunk,
+  // until no chunks remain to be assigned. For a chunk_size of 1, the size of
+  // each chunk is proportional to the number of unassigned iterations divided
+  // by the number of threads in the team, decreasing to 1. For a chunk_size
+  // with value k (greater than 1), the size of each chunk is determined in the
+  // same way, with the restriction that the chunks do not contain fewer than k
+  // iterations (except for the last chunk to be assigned, which may have fewer
+  // than k iterations).
+  //
+  // When schedule(auto) is specified, the decision regarding scheduling is
+  // delegated to the compiler and/or runtime system. The programmer gives the
+  // implementation the freedom to choose any possible mapping of iterations to
+  // threads in the team.
+  //
+  // When schedule(runtime) is specified, the decision regarding scheduling is
+  // deferred until run time, and the schedule and chunk size are taken from the
+  // run-sched-var ICV. If the ICV is set to auto, the schedule is
+  // implementation defined
+  //
+  // while(__kmpc_dispatch_next(&LB, &UB)) {
+  //   idx = LB;
+  //   while (idx <= UB) { BODY; ++idx;
+  //   __kmpc_dispatch_fini_(4|8)[u](); // For ordered loops only.
+  //   } // inner loop
+  // }
+  //
+  // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
+  // When schedule(static, chunk_size) is specified, iterations are divided into
+  // chunks of size chunk_size, and the chunks are assigned to the threads in
+  // the team in a round-robin fashion in the order of the thread number.
+  //
+  // while(UB = min(UB, GlobalUB), idx = LB, idx < UB) {
+  //   while (idx <= UB) { BODY; ++idx; } // inner loop
+  //   LB = LB + ST;
+  //   UB = UB + ST;
+  // }
+  //
+
+  const Expr *IVExpr = S.getIterationVariable();
+  const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
+  const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
+
+  if (DynamicOrOrdered) {
+    llvm::Value *UBVal = EmitScalarExpr(S.getLastIteration());
+    RT.emitForDispatchInit(*this, S.getLocStart(), ScheduleKind, IVSize,
+                           IVSigned, Ordered, UBVal, Chunk);
+  } else {
+    RT.emitForStaticInit(*this, S.getLocStart(), ScheduleKind, IVSize, IVSigned,
+                         Ordered, IL, LB, UB, ST, Chunk);
+  }
+
+  EmitOMPOuterLoop(DynamicOrOrdered, IsMonotonic, S, LoopScope, Ordered, LB, UB,
+                   ST, IL, Chunk);
+}
+
+void CodeGenFunction::EmitOMPDistributeOuterLoop(
+    OpenMPDistScheduleClauseKind ScheduleKind,
+    const OMPDistributeDirective &S, OMPPrivateScope &LoopScope,
+    Address LB, Address UB, Address ST, Address IL, llvm::Value *Chunk) {
+
+  auto &RT = CGM.getOpenMPRuntime();
+
+  // Emit outer loop.
+  // Same behavior as a OMPForOuterLoop, except that schedule cannot be
+  // dynamic
+  //
+
+  const Expr *IVExpr = S.getIterationVariable();
+  const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
+  const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
+
+  RT.emitDistributeStaticInit(*this, S.getLocStart(), ScheduleKind,
+                              IVSize, IVSigned, /* Ordered = */ false,
+                              IL, LB, UB, ST, Chunk);
+
+  EmitOMPOuterLoop(/* DynamicOrOrdered = */ false, /* IsMonotonic = */ false,
+                   S, LoopScope, /* Ordered = */ false, LB, UB, ST, IL, Chunk);
+}
+
+void CodeGenFunction::EmitOMPDistributeParallelForDirective(
+    const OMPDistributeParallelForDirective &S) {
+  OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
+  CGM.getOpenMPRuntime().emitInlinedDirective(
+      *this, OMPD_distribute_parallel_for,
+      [&S](CodeGenFunction &CGF, PrePostActionTy &) {
+        OMPLoopScope PreInitScope(CGF, S);
+        OMPCancelStackRAII CancelRegion(CGF, OMPD_distribute_parallel_for,
+                                        /*HasCancel=*/false);
+        CGF.EmitStmt(
+            cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
+      });
+}
+
+void CodeGenFunction::EmitOMPDistributeParallelForSimdDirective(
+    const OMPDistributeParallelForSimdDirective &S) {
+  OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
+  CGM.getOpenMPRuntime().emitInlinedDirective(
+      *this, OMPD_distribute_parallel_for_simd,
+      [&S](CodeGenFunction &CGF, PrePostActionTy &) {
+        OMPLoopScope PreInitScope(CGF, S);
+        CGF.EmitStmt(
+            cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
+      });
+}
+
+void CodeGenFunction::EmitOMPDistributeSimdDirective(
+    const OMPDistributeSimdDirective &S) {
+  OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
+  CGM.getOpenMPRuntime().emitInlinedDirective(
+      *this, OMPD_distribute_simd,
+      [&S](CodeGenFunction &CGF, PrePostActionTy &) {
+        OMPLoopScope PreInitScope(CGF, S);
+        CGF.EmitStmt(
+            cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
+      });
+}
+
+void CodeGenFunction::EmitOMPTargetParallelForSimdDirective(
+    const OMPTargetParallelForSimdDirective &S) {
+  OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
+  CGM.getOpenMPRuntime().emitInlinedDirective(
+      *this, OMPD_target_parallel_for_simd,
+      [&S](CodeGenFunction &CGF, PrePostActionTy &) {
+        OMPLoopScope PreInitScope(CGF, S);
+        CGF.EmitStmt(
+            cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
+      });
 }
 
 /// \brief Emit a helper variable and return corresponding lvalue.
@@ -1408,42 +1950,6 @@ namespace {
   };
 } // namespace
 
-static std::pair<llvm::Value * /*Chunk*/, ScheduleKindModifiersTy>
-emitScheduleClause(CodeGenFunction &CGF, const OMPLoopDirective &S,
-                   bool OuterRegion) {
-  // Detect the loop schedule kind and chunk.
-  auto ScheduleKind = OMPC_SCHEDULE_unknown;
-  OpenMPScheduleClauseModifier M1 = OMPC_SCHEDULE_MODIFIER_unknown;
-  OpenMPScheduleClauseModifier M2 = OMPC_SCHEDULE_MODIFIER_unknown;
-  llvm::Value *Chunk = nullptr;
-  if (const auto *C = S.getSingleClause<OMPScheduleClause>()) {
-    ScheduleKind = C->getScheduleKind();
-    M1 = C->getFirstScheduleModifier();
-    M2 = C->getSecondScheduleModifier();
-    if (const auto *Ch = C->getChunkSize()) {
-      if (auto *ImpRef = cast_or_null<DeclRefExpr>(C->getHelperChunkSize())) {
-        if (OuterRegion) {
-          const VarDecl *ImpVar = cast<VarDecl>(ImpRef->getDecl());
-          CGF.EmitVarDecl(*ImpVar);
-          CGF.EmitStoreThroughLValue(
-              CGF.EmitAnyExpr(Ch),
-              CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(ImpVar),
-                                 ImpVar->getType()));
-        } else {
-          Ch = ImpRef;
-        }
-      }
-      if (!C->getHelperChunkSize() || !OuterRegion) {
-        Chunk = CGF.EmitScalarExpr(Ch);
-        Chunk = CGF.EmitScalarConversion(Chunk, Ch->getType(),
-                                         S.getIterationVariable()->getType(),
-                                         S.getLocStart());
-      }
-    }
-  }
-  return std::make_pair(Chunk, ScheduleKindModifiersTy(ScheduleKind, M1, M2));
-}
-
 bool CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) {
   // Emit the loop iteration variable.
   auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable());
@@ -1464,6 +1970,7 @@ bool CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) {
   bool HasLastprivateClause;
   // Check pre-condition.
   {
+    OMPLoopScope PreInitScope(*this, S);
     // Skip the entire loop if we don't meet the precondition.
     // If the condition constant folds and can be elided, avoid emitting the
     // whole loop.
@@ -1481,24 +1988,34 @@ bool CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) {
       incrementProfileCounter(&S);
     }
 
+    bool Ordered = false;
+    if (auto *OrderedClause = S.getSingleClause<OMPOrderedClause>()) {
+      if (OrderedClause->getNumForLoops())
+        RT.emitDoacrossInit(*this, S);
+      else
+        Ordered = true;
+    }
+
+    llvm::DenseSet<const Expr *> EmittedFinals;
     emitAlignedClause(*this, S);
     EmitOMPLinearClauseInit(S);
+    // Emit helper vars inits.
+    LValue LB =
+        EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getLowerBoundVariable()));
+    LValue UB =
+        EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getUpperBoundVariable()));
+    LValue ST =
+        EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable()));
+    LValue IL =
+        EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable()));
+
     // Emit 'then' code.
     {
-      // Emit helper vars inits.
-      LValue LB =
-          EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getLowerBoundVariable()));
-      LValue UB =
-          EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getUpperBoundVariable()));
-      LValue ST =
-          EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable()));
-      LValue IL =
-          EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable()));
-
       OMPPrivateScope LoopScope(*this);
       if (EmitOMPFirstprivateClause(S, LoopScope)) {
         // Emit implicit barrier to synchronize threads and avoid data races on
-        // initialization of firstprivate variables.
+        // initialization of firstprivate variables and post-update of
+        // lastprivate variables.
         CGM.getOpenMPRuntime().emitBarrierCall(
             *this, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false,
             /*ForceSimpleCall=*/true);
@@ -1506,28 +2023,31 @@ bool CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) {
       EmitOMPPrivateClause(S, LoopScope);
       HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope);
       EmitOMPReductionClauseInit(S, LoopScope);
-      emitPrivateLoopCounters(*this, LoopScope, S.counters(),
-                              S.private_counters());
-      emitPrivateLinearVars(*this, S, LoopScope);
+      EmitOMPPrivateLoopCounters(S, LoopScope);
+      EmitOMPLinearClause(S, LoopScope);
       (void)LoopScope.Privatize();
 
       // Detect the loop schedule kind and chunk.
-      llvm::Value *Chunk;
-      OpenMPScheduleClauseKind ScheduleKind;
-      auto ScheduleInfo =
-          emitScheduleClause(*this, S, /*OuterRegion=*/false);
-      Chunk = ScheduleInfo.first;
-      ScheduleKind = ScheduleInfo.second.Kind;
-      const OpenMPScheduleClauseModifier M1 = ScheduleInfo.second.M1;
-      const OpenMPScheduleClauseModifier M2 = ScheduleInfo.second.M2;
+      llvm::Value *Chunk = nullptr;
+      OpenMPScheduleTy ScheduleKind;
+      if (auto *C = S.getSingleClause<OMPScheduleClause>()) {
+        ScheduleKind.Schedule = C->getScheduleKind();
+        ScheduleKind.M1 = C->getFirstScheduleModifier();
+        ScheduleKind.M2 = C->getSecondScheduleModifier();
+        if (const auto *Ch = C->getChunkSize()) {
+          Chunk = EmitScalarExpr(Ch);
+          Chunk = EmitScalarConversion(Chunk, Ch->getType(),
+                                       S.getIterationVariable()->getType(),
+                                       S.getLocStart());
+        }
+      }
       const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
       const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
-      const bool Ordered = S.getSingleClause<OMPOrderedClause>() != nullptr;
       // OpenMP 4.5, 2.7.1 Loop Construct, Description.
       // If the static schedule kind is specified or if the ordered clause is
       // specified, and if no monotonic modifier is specified, the effect will
       // be as if the monotonic modifier was specified.
-      if (RT.isStaticNonchunked(ScheduleKind,
+      if (RT.isStaticNonchunked(ScheduleKind.Schedule,
                                 /* Chunked */ Chunk != nullptr) &&
           !Ordered) {
         if (isOpenMPSimdDirective(S.getDirectiveKind()))
@@ -1557,28 +2077,46 @@ bool CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) {
                          [](CodeGenFunction &) {});
         EmitBlock(LoopExit.getBlock());
         // Tell the runtime we are done.
-        RT.emitForStaticFinish(*this, S.getLocStart());
+        auto &&CodeGen = [&S](CodeGenFunction &CGF) {
+          CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocEnd());
+        };
+        OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen);
       } else {
-        const bool IsMonotonic = Ordered ||
-                                 ScheduleKind == OMPC_SCHEDULE_static ||
-                                 ScheduleKind == OMPC_SCHEDULE_unknown ||
-                                 M1 == OMPC_SCHEDULE_MODIFIER_monotonic ||
-                                 M2 == OMPC_SCHEDULE_MODIFIER_monotonic;
+        const bool IsMonotonic =
+            Ordered || ScheduleKind.Schedule == OMPC_SCHEDULE_static ||
+            ScheduleKind.Schedule == OMPC_SCHEDULE_unknown ||
+            ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_monotonic ||
+            ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_monotonic;
         // Emit the outer loop, which requests its work chunk [LB..UB] from
         // runtime and runs the inner loop to process it.
         EmitOMPForOuterLoop(ScheduleKind, IsMonotonic, S, LoopScope, Ordered,
                             LB.getAddress(), UB.getAddress(), ST.getAddress(),
                             IL.getAddress(), Chunk);
       }
+      if (isOpenMPSimdDirective(S.getDirectiveKind())) {
+        EmitOMPSimdFinal(S,
+                         [&](CodeGenFunction &CGF) -> llvm::Value * {
+                           return CGF.Builder.CreateIsNotNull(
+                               CGF.EmitLoadOfScalar(IL, S.getLocStart()));
+                         });
+      }
       EmitOMPReductionClauseFinal(S);
+      // Emit post-update of the reduction variables if IsLastIter != 0.
+      emitPostUpdateForReductionClause(
+          *this, S, [&](CodeGenFunction &CGF) -> llvm::Value * {
+            return CGF.Builder.CreateIsNotNull(
+                CGF.EmitLoadOfScalar(IL, S.getLocStart()));
+          });
       // Emit final copy of the lastprivate variables if IsLastIter != 0.
       if (HasLastprivateClause)
         EmitOMPLastprivateClauseFinal(
-            S, Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getLocStart())));
-    }
-    if (isOpenMPSimdDirective(S.getDirectiveKind())) {
-      EmitOMPSimdFinal(S);
+            S, isOpenMPSimdDirective(S.getDirectiveKind()),
+            Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getLocStart())));
     }
+    EmitOMPLinearClauseFinal(S, [&](CodeGenFunction &CGF) -> llvm::Value * {
+      return CGF.Builder.CreateIsNotNull(
+          CGF.EmitLoadOfScalar(IL, S.getLocStart()));
+    });
     // We're now done with the loop, so jump to the continuation block.
     if (ContBlock) {
       EmitBranch(ContBlock);
@@ -1589,13 +2127,17 @@ bool CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) {
 }
 
 void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) {
-  LexicalScope Scope(*this, S.getSourceRange());
   bool HasLastprivates = false;
-  auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF) {
+  auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF,
+                                          PrePostActionTy &) {
+    OMPCancelStackRAII CancelRegion(CGF, OMPD_for, S.hasCancel());
     HasLastprivates = CGF.EmitOMPWorksharingLoop(S);
   };
-  CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_for, CodeGen,
-                                              S.hasCancel());
+  {
+    OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
+    CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_for, CodeGen,
+                                                S.hasCancel());
+  }
 
   // Emit an implicit barrier at the end.
   if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) {
@@ -1604,12 +2146,15 @@ void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) {
 }
 
 void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &S) {
-  LexicalScope Scope(*this, S.getSourceRange());
   bool HasLastprivates = false;
-  auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF) {
+  auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF,
+                                          PrePostActionTy &) {
     HasLastprivates = CGF.EmitOMPWorksharingLoop(S);
   };
-  CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
+  {
+    OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
+    CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
+  }
 
   // Emit an implicit barrier at the end.
   if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) {
@@ -1626,12 +2171,12 @@ static LValue createSectionLVal(CodeGenFunction &CGF, QualType Ty,
   return LVal;
 }
 
-OpenMPDirectiveKind
-CodeGenFunction::EmitSections(const OMPExecutableDirective &S) {
+void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) {
   auto *Stmt = cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt();
   auto *CS = dyn_cast<CompoundStmt>(Stmt);
   bool HasLastprivates = false;
-  auto &&CodeGen = [&S, Stmt, CS, &HasLastprivates](CodeGenFunction &CGF) {
+  auto &&CodeGen = [&S, Stmt, CS, &HasLastprivates](CodeGenFunction &CGF,
+                                                    PrePostActionTy &) {
     auto &C = CGF.CGM.getContext();
     auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
     // Emit helper vars inits.
@@ -1697,7 +2242,8 @@ CodeGenFunction::EmitSections(const OMPExecutableDirective &S) {
     CodeGenFunction::OMPPrivateScope LoopScope(CGF);
     if (CGF.EmitOMPFirstprivateClause(S, LoopScope)) {
       // Emit implicit barrier to synchronize threads and avoid data races on
-      // initialization of firstprivate variables.
+      // initialization of firstprivate variables and post-update of lastprivate
+      // variables.
       CGF.CGM.getOpenMPRuntime().emitBarrierCall(
           CGF, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false,
           /*ForceSimpleCall=*/true);
@@ -1708,8 +2254,10 @@ CodeGenFunction::EmitSections(const OMPExecutableDirective &S) {
     (void)LoopScope.Privatize();
 
     // Emit static non-chunked loop.
+    OpenMPScheduleTy ScheduleKind;
+    ScheduleKind.Schedule = OMPC_SCHEDULE_static;
     CGF.CGM.getOpenMPRuntime().emitForStaticInit(
-        CGF, S.getLocStart(), OMPC_SCHEDULE_static, /*IVSize=*/32,
+        CGF, S.getLocStart(), ScheduleKind, /*IVSize=*/32,
         /*IVSigned=*/true, /*Ordered=*/false, IL.getAddress(), LB.getAddress(),
         UB.getAddress(), ST.getAddress());
     // UB = min(UB, GlobalUB);
@@ -1723,14 +2271,24 @@ CodeGenFunction::EmitSections(const OMPExecutableDirective &S) {
     CGF.EmitOMPInnerLoop(S, /*RequiresCleanup=*/false, &Cond, &Inc, BodyGen,
                          [](CodeGenFunction &) {});
     // Tell the runtime we are done.
-    CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocStart());
+    auto &&CodeGen = [&S](CodeGenFunction &CGF) {
+      CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocEnd());
+    };
+    CGF.OMPCancelStack.emitExit(CGF, S.getDirectiveKind(), CodeGen);
     CGF.EmitOMPReductionClauseFinal(S);
+    // Emit post-update of the reduction variables if IsLastIter != 0.
+    emitPostUpdateForReductionClause(
+        CGF, S, [&](CodeGenFunction &CGF) -> llvm::Value * {
+          return CGF.Builder.CreateIsNotNull(
+              CGF.EmitLoadOfScalar(IL, S.getLocStart()));
+        });
 
     // Emit final copy of the lastprivate variables if IsLastIter != 0.
     if (HasLastprivates)
       CGF.EmitOMPLastprivateClauseFinal(
-          S, CGF.Builder.CreateIsNotNull(
-                 CGF.EmitLoadOfScalar(IL, S.getLocStart())));
+          S, /*NoFinals=*/false,
+          CGF.Builder.CreateIsNotNull(
+              CGF.EmitLoadOfScalar(IL, S.getLocStart())));
   };
 
   bool HasCancel = false;
@@ -1738,6 +2296,7 @@ CodeGenFunction::EmitSections(const OMPExecutableDirective &S) {
     HasCancel = OSD->hasCancel();
   else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&S))
     HasCancel = OPSD->hasCancel();
+  OMPCancelStackRAII CancelRegion(*this, S.getDirectiveKind(), HasCancel);
   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_sections, CodeGen,
                                               HasCancel);
   // Emit barrier for lastprivates only if 'sections' directive has 'nowait'
@@ -1749,23 +2308,25 @@ CodeGenFunction::EmitSections(const OMPExecutableDirective &S) {
     CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(),
                                            OMPD_unknown);
   }
-  return OMPD_sections;
 }
 
 void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) {
-  LexicalScope Scope(*this, S.getSourceRange());
-  OpenMPDirectiveKind EmittedAs = EmitSections(S);
+  {
+    OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
+    EmitSections(S);
+  }
   // Emit an implicit barrier at the end.
   if (!S.getSingleClause<OMPNowaitClause>()) {
-    CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), EmittedAs);
+    CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(),
+                                           OMPD_sections);
   }
 }
 
 void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) {
-  LexicalScope Scope(*this, S.getSourceRange());
-  auto &&CodeGen = [&S](CodeGenFunction &CGF) {
+  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
   };
+  OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_section, CodeGen,
                                               S.hasCancel());
 }
@@ -1776,8 +2337,7 @@ void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) {
   llvm::SmallVector<const Expr *, 8> SrcExprs;
   llvm::SmallVector<const Expr *, 8> AssignmentOps;
   // Check if there are any 'copyprivate' clauses associated with this
-  // 'single'
-  // construct.
+  // 'single' construct.
   // Build a list of copyprivate variables along with helper expressions
   // (<source>, <destination>, <destination>=<source> expressions)
   for (const auto *C : S.getClausesOfKind<OMPCopyprivateClause>()) {
@@ -1788,24 +2348,24 @@ void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) {
     AssignmentOps.append(C->assignment_ops().begin(),
                          C->assignment_ops().end());
   }
-  LexicalScope Scope(*this, S.getSourceRange());
   // Emit code for 'single' region along with 'copyprivate' clauses
-  bool HasFirstprivates;
-  auto &&CodeGen = [&S, &HasFirstprivates](CodeGenFunction &CGF) {
-    CodeGenFunction::OMPPrivateScope SingleScope(CGF);
-    HasFirstprivates = CGF.EmitOMPFirstprivateClause(S, SingleScope);
+  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
+    Action.Enter(CGF);
+    OMPPrivateScope SingleScope(CGF);
+    (void)CGF.EmitOMPFirstprivateClause(S, SingleScope);
     CGF.EmitOMPPrivateClause(S, SingleScope);
     (void)SingleScope.Privatize();
-
     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
   };
-  CGM.getOpenMPRuntime().emitSingleRegion(*this, CodeGen, S.getLocStart(),
-                                          CopyprivateVars, DestExprs, SrcExprs,
-                                          AssignmentOps);
+  {
+    OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
+    CGM.getOpenMPRuntime().emitSingleRegion(*this, CodeGen, S.getLocStart(),
+                                            CopyprivateVars, DestExprs,
+                                            SrcExprs, AssignmentOps);
+  }
   // Emit an implicit barrier at the end (to avoid data race on firstprivate
   // init or if no 'nowait' clause was specified and no 'copyprivate' clause).
-  if ((!S.getSingleClause<OMPNowaitClause>() || HasFirstprivates) &&
-      CopyprivateVars.empty()) {
+  if (!S.getSingleClause<OMPNowaitClause>() && CopyprivateVars.empty()) {
     CGM.getOpenMPRuntime().emitBarrierCall(
         *this, S.getLocStart(),
         S.getSingleClause<OMPNowaitClause>() ? OMPD_unknown : OMPD_single);
@@ -1813,21 +2373,23 @@ void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) {
 }
 
 void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) {
-  LexicalScope Scope(*this, S.getSourceRange());
-  auto &&CodeGen = [&S](CodeGenFunction &CGF) {
+  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
+    Action.Enter(CGF);
     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
   };
+  OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
   CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getLocStart());
 }
 
 void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) {
-  LexicalScope Scope(*this, S.getSourceRange());
-  auto &&CodeGen = [&S](CodeGenFunction &CGF) {
+  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
+    Action.Enter(CGF);
     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
   };
   Expr *Hint = nullptr;
   if (auto *HintClause = S.getSingleClause<OMPHintClause>())
     Hint = HintClause->getHint();
+  OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
   CGM.getOpenMPRuntime().emitCriticalRegion(*this,
                                             S.getDirectiveName().getAsString(),
                                             CodeGen, S.getLocStart(), Hint);
@@ -1837,9 +2399,8 @@ void CodeGenFunction::EmitOMPParallelForDirective(
     const OMPParallelForDirective &S) {
   // Emit directive as a combined directive that consists of two implicit
   // directives: 'parallel' with 'for' directive.
-  LexicalScope Scope(*this, S.getSourceRange());
-  (void)emitScheduleClause(*this, S, /*OuterRegion=*/true);
-  auto &&CodeGen = [&S](CodeGenFunction &CGF) {
+  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
+    OMPCancelStackRAII CancelRegion(CGF, OMPD_parallel_for, S.hasCancel());
     CGF.EmitOMPWorksharingLoop(S);
   };
   emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen);
@@ -1849,9 +2410,7 @@ void CodeGenFunction::EmitOMPParallelForSimdDirective(
     const OMPParallelForSimdDirective &S) {
   // Emit directive as a combined directive that consists of two implicit
   // directives: 'parallel' with 'for' directive.
-  LexicalScope Scope(*this, S.getSourceRange());
-  (void)emitScheduleClause(*this, S, /*OuterRegion=*/true);
-  auto &&CodeGen = [&S](CodeGenFunction &CGF) {
+  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
     CGF.EmitOMPWorksharingLoop(S);
   };
   emitCommonOMPParallelDirective(*this, S, OMPD_simd, CodeGen);
@@ -1861,93 +2420,148 @@ void CodeGenFunction::EmitOMPParallelSectionsDirective(
     const OMPParallelSectionsDirective &S) {
   // Emit directive as a combined directive that consists of two implicit
   // directives: 'parallel' with 'sections' directive.
-  LexicalScope Scope(*this, S.getSourceRange());
-  auto &&CodeGen = [&S](CodeGenFunction &CGF) {
-    (void)CGF.EmitSections(S);
+  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
+    CGF.EmitSections(S);
   };
   emitCommonOMPParallelDirective(*this, S, OMPD_sections, CodeGen);
 }
 
-void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) {
+void CodeGenFunction::EmitOMPTaskBasedDirective(const OMPExecutableDirective &S,
+                                                const RegionCodeGenTy &BodyGen,
+                                                const TaskGenTy &TaskGen,
+                                                OMPTaskDataTy &Data) {
   // Emit outlined function for task construct.
-  LexicalScope Scope(*this, S.getSourceRange());
   auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
-  auto CapturedStruct = GenerateCapturedStmtArgument(*CS);
   auto *I = CS->getCapturedDecl()->param_begin();
   auto *PartId = std::next(I);
+  auto *TaskT = std::next(I, 4);
+  // Check if the task is final
+  if (const auto *Clause = S.getSingleClause<OMPFinalClause>()) {
+    // If the condition constant folds and can be elided, try to avoid emitting
+    // the condition and the dead arm of the if/else.
+    auto *Cond = Clause->getCondition();
+    bool CondConstant;
+    if (ConstantFoldsToSimpleInteger(Cond, CondConstant))
+      Data.Final.setInt(CondConstant);
+    else
+      Data.Final.setPointer(EvaluateExprAsBool(Cond));
+  } else {
+    // By default the task is not final.
+    Data.Final.setInt(/*IntVal=*/false);
+  }
+  // Check if the task has 'priority' clause.
+  if (const auto *Clause = S.getSingleClause<OMPPriorityClause>()) {
+    // Runtime currently does not support codegen for priority clause argument.
+    // TODO: Add codegen for priority clause arg when runtime lib support it.
+    auto *Prio = Clause->getPriority();
+    Data.Priority.setInt(Prio);
+    Data.Priority.setPointer(EmitScalarConversion(
+        EmitScalarExpr(Prio), Prio->getType(),
+        getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1),
+        Prio->getExprLoc()));
+  }
   // The first function argument for tasks is a thread id, the second one is a
   // part id (0 for tied tasks, >=0 for untied task).
   llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
   // Get list of private variables.
-  llvm::SmallVector<const Expr *, 8> PrivateVars;
-  llvm::SmallVector<const Expr *, 8> PrivateCopies;
   for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
     auto IRef = C->varlist_begin();
     for (auto *IInit : C->private_copies()) {
       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
       if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
-        PrivateVars.push_back(*IRef);
-        PrivateCopies.push_back(IInit);
+        Data.PrivateVars.push_back(*IRef);
+        Data.PrivateCopies.push_back(IInit);
       }
       ++IRef;
     }
   }
   EmittedAsPrivate.clear();
   // Get list of firstprivate variables.
-  llvm::SmallVector<const Expr *, 8> FirstprivateVars;
-  llvm::SmallVector<const Expr *, 8> FirstprivateCopies;
-  llvm::SmallVector<const Expr *, 8> FirstprivateInits;
   for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
     auto IRef = C->varlist_begin();
     auto IElemInitRef = C->inits().begin();
     for (auto *IInit : C->private_copies()) {
       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
       if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
-        FirstprivateVars.push_back(*IRef);
-        FirstprivateCopies.push_back(IInit);
-        FirstprivateInits.push_back(*IElemInitRef);
+        Data.FirstprivateVars.push_back(*IRef);
+        Data.FirstprivateCopies.push_back(IInit);
+        Data.FirstprivateInits.push_back(*IElemInitRef);
       }
-      ++IRef, ++IElemInitRef;
+      ++IRef;
+      ++IElemInitRef;
     }
   }
-  // Build list of dependences.
-  llvm::SmallVector<std::pair<OpenMPDependClauseKind, const Expr *>, 8>
-      Dependences;
-  for (const auto *C : S.getClausesOfKind<OMPDependClause>()) {
-    for (auto *IRef : C->varlists()) {
-      Dependences.push_back(std::make_pair(C->getDependencyKind(), IRef));
+  // Get list of lastprivate variables (for taskloops).
+  llvm::DenseMap<const VarDecl *, const DeclRefExpr *> LastprivateDstsOrigs;
+  for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
+    auto IRef = C->varlist_begin();
+    auto ID = C->destination_exprs().begin();
+    for (auto *IInit : C->private_copies()) {
+      auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
+      if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
+        Data.LastprivateVars.push_back(*IRef);
+        Data.LastprivateCopies.push_back(IInit);
+      }
+      LastprivateDstsOrigs.insert(
+          {cast<VarDecl>(cast<DeclRefExpr>(*ID)->getDecl()),
+           cast<DeclRefExpr>(*IRef)});
+      ++IRef;
+      ++ID;
     }
   }
-  auto &&CodeGen = [PartId, &S, &PrivateVars, &FirstprivateVars](
-      CodeGenFunction &CGF) {
+  // Build list of dependences.
+  for (const auto *C : S.getClausesOfKind<OMPDependClause>())
+    for (auto *IRef : C->varlists())
+      Data.Dependences.push_back(std::make_pair(C->getDependencyKind(), IRef));
+  auto &&CodeGen = [PartId, &S, &Data, CS, &BodyGen, &LastprivateDstsOrigs](
+      CodeGenFunction &CGF, PrePostActionTy &Action) {
     // Set proper addresses for generated private copies.
-    auto *CS = cast<CapturedStmt>(S.getAssociatedStmt());
     OMPPrivateScope Scope(CGF);
-    if (!PrivateVars.empty() || !FirstprivateVars.empty()) {
+    if (!Data.PrivateVars.empty() || !Data.FirstprivateVars.empty() ||
+        !Data.LastprivateVars.empty()) {
       auto *CopyFn = CGF.Builder.CreateLoad(
           CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(3)));
       auto *PrivatesPtr = CGF.Builder.CreateLoad(
           CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(2)));
       // Map privates.
-      llvm::SmallVector<std::pair<const VarDecl *, Address>, 16>
-          PrivatePtrs;
+      llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs;
       llvm::SmallVector<llvm::Value *, 16> CallArgs;
       CallArgs.push_back(PrivatesPtr);
-      for (auto *E : PrivateVars) {
+      for (auto *E : Data.PrivateVars) {
+        auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
+        Address PrivatePtr = CGF.CreateMemTemp(
+            CGF.getContext().getPointerType(E->getType()), ".priv.ptr.addr");
+        PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr));
+        CallArgs.push_back(PrivatePtr.getPointer());
+      }
+      for (auto *E : Data.FirstprivateVars) {
         auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
         Address PrivatePtr =
-            CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()));
+            CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()),
+                              ".firstpriv.ptr.addr");
         PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr));
         CallArgs.push_back(PrivatePtr.getPointer());
       }
-      for (auto *E : FirstprivateVars) {
+      for (auto *E : Data.LastprivateVars) {
         auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
         Address PrivatePtr =
-            CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()));
+            CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()),
+                              ".lastpriv.ptr.addr");
         PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr));
         CallArgs.push_back(PrivatePtr.getPointer());
       }
       CGF.EmitRuntimeCall(CopyFn, CallArgs);
+      for (auto &&Pair : LastprivateDstsOrigs) {
+        auto *OrigVD = cast<VarDecl>(Pair.second->getDecl());
+        DeclRefExpr DRE(
+            const_cast<VarDecl *>(OrigVD),
+            /*RefersToEnclosingVariableOrCapture=*/CGF.CapturedStmtInfo->lookup(
+                OrigVD) != nullptr,
+            Pair.second->getType(), VK_LValue, Pair.second->getExprLoc());
+        Scope.addPrivate(Pair.first, [&CGF, &DRE]() {
+          return CGF.EmitLValue(&DRE).getAddress();
+        });
+      }
       for (auto &&Pair : PrivatePtrs) {
         Address Replacement(CGF.Builder.CreateLoad(Pair.second),
                             CGF.getContext().getDeclAlign(Pair.first));
@@ -1955,30 +2569,21 @@ void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) {
       }
     }
     (void)Scope.Privatize();
-    if (*PartId) {
-      // TODO: emit code for untied tasks.
-    }
-    CGF.EmitStmt(CS->getCapturedStmt());
+
+    Action.Enter(CGF);
+    BodyGen(CGF);
   };
-  auto OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction(
-      S, *I, OMPD_task, CodeGen);
-  // Check if we should emit tied or untied task.
-  bool Tied = !S.getSingleClause<OMPUntiedClause>();
-  // Check if the task is final
-  llvm::PointerIntPair<llvm::Value *, 1, bool> Final;
-  if (const auto *Clause = S.getSingleClause<OMPFinalClause>()) {
-    // If the condition constant folds and can be elided, try to avoid emitting
-    // the condition and the dead arm of the if/else.
-    auto *Cond = Clause->getCondition();
-    bool CondConstant;
-    if (ConstantFoldsToSimpleInteger(Cond, CondConstant))
-      Final.setInt(CondConstant);
-    else
-      Final.setPointer(EvaluateExprAsBool(Cond));
-  } else {
-    // By default the task is not final.
-    Final.setInt(/*IntVal=*/false);
-  }
+  auto *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction(
+      S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, Data.Tied,
+      Data.NumberOfParts);
+  OMPLexicalScope Scope(*this, S);
+  TaskGen(*this, OutlinedFn, Data);
+}
+
+void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) {
+  // Emit outlined function for task construct.
+  auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
+  auto CapturedStruct = GenerateCapturedStmtArgument(*CS);
   auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
   const Expr *IfCond = nullptr;
   for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
@@ -1988,10 +2593,21 @@ void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) {
       break;
     }
   }
-  CGM.getOpenMPRuntime().emitTaskCall(
-      *this, S.getLocStart(), S, Tied, Final, OutlinedFn, SharedsTy,
-      CapturedStruct, IfCond, PrivateVars, PrivateCopies, FirstprivateVars,
-      FirstprivateCopies, FirstprivateInits, Dependences);
+
+  OMPTaskDataTy Data;
+  // Check if we should emit tied or untied task.
+  Data.Tied = !S.getSingleClause<OMPUntiedClause>();
+  auto &&BodyGen = [CS](CodeGenFunction &CGF, PrePostActionTy &) {
+    CGF.EmitStmt(CS->getCapturedStmt());
+  };
+  auto &&TaskGen = [&S, SharedsTy, CapturedStruct,
+                    IfCond](CodeGenFunction &CGF, llvm::Value *OutlinedFn,
+                            const OMPTaskDataTy &Data) {
+    CGF.CGM.getOpenMPRuntime().emitTaskCall(CGF, S.getLocStart(), S, OutlinedFn,
+                                            SharedsTy, CapturedStruct, IfCond,
+                                            Data);
+  };
+  EmitOMPTaskBasedDirective(S, BodyGen, TaskGen, Data);
 }
 
 void CodeGenFunction::EmitOMPTaskyieldDirective(
@@ -2009,10 +2625,11 @@ void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &S) {
 
 void CodeGenFunction::EmitOMPTaskgroupDirective(
     const OMPTaskgroupDirective &S) {
-  LexicalScope Scope(*this, S.getSourceRange());
-  auto &&CodeGen = [&S](CodeGenFunction &CGF) {
+  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
+    Action.Enter(CGF);
     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
   };
+  OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
   CGM.getOpenMPRuntime().emitTaskgroupRegion(*this, CodeGen, S.getLocStart());
 }
 
@@ -2026,9 +2643,130 @@ void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) {
   }(), S.getLocStart());
 }
 
+void CodeGenFunction::EmitOMPDistributeLoop(const OMPDistributeDirective &S) {
+  // Emit the loop iteration variable.
+  auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable());
+  auto IVDecl = cast<VarDecl>(IVExpr->getDecl());
+  EmitVarDecl(*IVDecl);
+
+  // Emit the iterations count variable.
+  // If it is not a variable, Sema decided to calculate iterations count on each
+  // iteration (e.g., it is foldable into a constant).
+  if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
+    EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
+    // Emit calculation of the iterations count.
+    EmitIgnoredExpr(S.getCalcLastIteration());
+  }
+
+  auto &RT = CGM.getOpenMPRuntime();
+
+  // Check pre-condition.
+  {
+    OMPLoopScope PreInitScope(*this, S);
+    // Skip the entire loop if we don't meet the precondition.
+    // If the condition constant folds and can be elided, avoid emitting the
+    // whole loop.
+    bool CondConstant;
+    llvm::BasicBlock *ContBlock = nullptr;
+    if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
+      if (!CondConstant)
+        return;
+    } else {
+      auto *ThenBlock = createBasicBlock("omp.precond.then");
+      ContBlock = createBasicBlock("omp.precond.end");
+      emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock,
+                  getProfileCount(&S));
+      EmitBlock(ThenBlock);
+      incrementProfileCounter(&S);
+    }
+
+    // Emit 'then' code.
+    {
+      // Emit helper vars inits.
+      LValue LB =
+          EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getLowerBoundVariable()));
+      LValue UB =
+          EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getUpperBoundVariable()));
+      LValue ST =
+          EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable()));
+      LValue IL =
+          EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable()));
+
+      OMPPrivateScope LoopScope(*this);
+      EmitOMPPrivateLoopCounters(S, LoopScope);
+      (void)LoopScope.Privatize();
+
+      // Detect the distribute schedule kind and chunk.
+      llvm::Value *Chunk = nullptr;
+      OpenMPDistScheduleClauseKind ScheduleKind = OMPC_DIST_SCHEDULE_unknown;
+      if (auto *C = S.getSingleClause<OMPDistScheduleClause>()) {
+        ScheduleKind = C->getDistScheduleKind();
+        if (const auto *Ch = C->getChunkSize()) {
+          Chunk = EmitScalarExpr(Ch);
+          Chunk = EmitScalarConversion(Chunk, Ch->getType(),
+          S.getIterationVariable()->getType(),
+          S.getLocStart());
+        }
+      }
+      const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
+      const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
+
+      // OpenMP [2.10.8, distribute Construct, Description]
+      // If dist_schedule is specified, kind must be static. If specified,
+      // iterations are divided into chunks of size chunk_size, chunks are
+      // assigned to the teams of the league in a round-robin fashion in the
+      // order of the team number. When no chunk_size is specified, the
+      // iteration space is divided into chunks that are approximately equal
+      // in size, and at most one chunk is distributed to each team of the
+      // league. The size of the chunks is unspecified in this case.
+      if (RT.isStaticNonchunked(ScheduleKind,
+                                /* Chunked */ Chunk != nullptr)) {
+        RT.emitDistributeStaticInit(*this, S.getLocStart(), ScheduleKind,
+                             IVSize, IVSigned, /* Ordered = */ false,
+                             IL.getAddress(), LB.getAddress(),
+                             UB.getAddress(), ST.getAddress());
+        auto LoopExit =
+            getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit"));
+        // UB = min(UB, GlobalUB);
+        EmitIgnoredExpr(S.getEnsureUpperBound());
+        // IV = LB;
+        EmitIgnoredExpr(S.getInit());
+        // while (idx <= UB) { BODY; ++idx; }
+        EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(),
+                         S.getInc(),
+                         [&S, LoopExit](CodeGenFunction &CGF) {
+                           CGF.EmitOMPLoopBody(S, LoopExit);
+                           CGF.EmitStopPoint(&S);
+                         },
+                         [](CodeGenFunction &) {});
+        EmitBlock(LoopExit.getBlock());
+        // Tell the runtime we are done.
+        RT.emitForStaticFinish(*this, S.getLocStart());
+      } else {
+        // Emit the outer loop, which requests its work chunk [LB..UB] from
+        // runtime and runs the inner loop to process it.
+        EmitOMPDistributeOuterLoop(ScheduleKind, S, LoopScope,
+                            LB.getAddress(), UB.getAddress(), ST.getAddress(),
+                            IL.getAddress(), Chunk);
+      }
+    }
+
+    // We're now done with the loop, so jump to the continuation block.
+    if (ContBlock) {
+      EmitBranch(ContBlock);
+      EmitBlock(ContBlock, true);
+    }
+  }
+}
+
 void CodeGenFunction::EmitOMPDistributeDirective(
     const OMPDistributeDirective &S) {
-  llvm_unreachable("CodeGen for 'omp distribute' is not supported yet.");
+  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
+    CGF.EmitOMPDistributeLoop(S);
+  };
+  OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
+  CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen,
+                                              false);
 }
 
 static llvm::Function *emitOutlinedOrderedFunction(CodeGenModule &CGM,
@@ -2042,11 +2780,14 @@ static llvm::Function *emitOutlinedOrderedFunction(CodeGenModule &CGM,
 }
 
 void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) {
-  if (!S.getAssociatedStmt())
+  if (!S.getAssociatedStmt()) {
+    for (const auto *DC : S.getClausesOfKind<OMPDependClause>())
+      CGM.getOpenMPRuntime().emitDoacrossOrdered(*this, DC);
     return;
-  LexicalScope Scope(*this, S.getSourceRange());
+  }
   auto *C = S.getSingleClause<OMPSIMDClause>();
-  auto &&CodeGen = [&S, C, this](CodeGenFunction &CGF) {
+  auto &&CodeGen = [&S, C, this](CodeGenFunction &CGF,
+                                 PrePostActionTy &Action) {
     if (C) {
       auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
       llvm::SmallVector<llvm::Value *, 16> CapturedVars;
@@ -2054,10 +2795,12 @@ void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) {
       auto *OutlinedFn = emitOutlinedOrderedFunction(CGM, CS);
       CGF.EmitNounwindRuntimeCall(OutlinedFn, CapturedVars);
     } else {
+      Action.Enter(CGF);
       CGF.EmitStmt(
           cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
     }
   };
+  OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
   CGM.getOpenMPRuntime().emitOrderedRegion(*this, CodeGen, S.getLocStart(), !C);
 }
 
@@ -2104,8 +2847,9 @@ static void emitSimpleAtomicStore(CodeGenFunction &CGF, bool IsSeqCst,
   if (LVal.isGlobalReg()) {
     CGF.EmitStoreThroughGlobalRegLValue(RVal, LVal);
   } else {
-    CGF.EmitAtomicStore(RVal, LVal, IsSeqCst ? llvm::SequentiallyConsistent
-                                             : llvm::Monotonic,
+    CGF.EmitAtomicStore(RVal, LVal,
+                        IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent
+                                 : llvm::AtomicOrdering::Monotonic,
                         LVal.isVolatile(), /*IsInit=*/false);
   }
 }
@@ -2138,10 +2882,11 @@ static void EmitOMPAtomicReadExpr(CodeGenFunction &CGF, bool IsSeqCst,
   LValue VLValue = CGF.EmitLValue(V);
   RValue Res = XLValue.isGlobalReg()
                    ? CGF.EmitLoadOfLValue(XLValue, Loc)
-                   : CGF.EmitAtomicLoad(XLValue, Loc,
-                                        IsSeqCst ? llvm::SequentiallyConsistent
-                                                 : llvm::Monotonic,
-                                        XLValue.isVolatile());
+                   : CGF.EmitAtomicLoad(
+                         XLValue, Loc,
+                         IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent
+                                  : llvm::AtomicOrdering::Monotonic,
+                         XLValue.isVolatile());
   // OpenMP, 2.12.6, atomic Construct
   // Any atomic construct with a seq_cst clause forces the atomically
   // performed operation to include an implicit flush operation without a
@@ -2297,7 +3042,8 @@ static void EmitOMPAtomicUpdateExpr(CodeGenFunction &CGF, bool IsSeqCst,
   assert(X->isLValue() && "X of 'omp atomic update' is not lvalue");
   LValue XLValue = CGF.EmitLValue(X);
   RValue ExprRValue = CGF.EmitAnyExpr(E);
-  auto AO = IsSeqCst ? llvm::SequentiallyConsistent : llvm::Monotonic;
+  auto AO = IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent
+                     : llvm::AtomicOrdering::Monotonic;
   auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts());
   auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts());
   auto *XRValExpr = IsXLHSInRHSPart ? LHS : RHS;
@@ -2346,7 +3092,8 @@ static void EmitOMPAtomicCaptureExpr(CodeGenFunction &CGF, bool IsSeqCst,
   LValue VLValue = CGF.EmitLValue(V);
   LValue XLValue = CGF.EmitLValue(X);
   RValue ExprRValue = CGF.EmitAnyExpr(E);
-  auto AO = IsSeqCst ? llvm::SequentiallyConsistent : llvm::Monotonic;
+  auto AO = IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent
+                     : llvm::AtomicOrdering::Monotonic;
   QualType NewVValType;
   if (UE) {
     // 'x' is updated with some additional value.
@@ -2472,6 +3219,13 @@ static void EmitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind,
   case OMPC_nogroup:
   case OMPC_num_tasks:
   case OMPC_hint:
+  case OMPC_dist_schedule:
+  case OMPC_defaultmap:
+  case OMPC_uniform:
+  case OMPC_to:
+  case OMPC_from:
+  case OMPC_use_device_ptr:
+  case OMPC_is_device_ptr:
     llvm_unreachable("Clause is not allowed in 'omp atomic'.");
   }
 }
@@ -2501,18 +3255,39 @@ void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) {
     }
   }
 
-  LexicalScope Scope(*this, S.getSourceRange());
-  auto &&CodeGen = [&S, Kind, IsSeqCst, CS](CodeGenFunction &CGF) {
+  auto &&CodeGen = [&S, Kind, IsSeqCst, CS](CodeGenFunction &CGF,
+                                            PrePostActionTy &) {
     CGF.EmitStopPoint(CS);
     EmitOMPAtomicExpr(CGF, Kind, IsSeqCst, S.isPostfixUpdate(), S.getX(),
                       S.getV(), S.getExpr(), S.getUpdateExpr(),
                       S.isXLHSInRHSPart(), S.getLocStart());
   };
+  OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_atomic, CodeGen);
 }
 
+std::pair<llvm::Function * /*OutlinedFn*/, llvm::Constant * /*OutlinedFnID*/>
+CodeGenFunction::EmitOMPTargetDirectiveOutlinedFunction(
+    CodeGenModule &CGM, const OMPTargetDirective &S, StringRef ParentName,
+    bool IsOffloadEntry) {
+  llvm::Function *OutlinedFn = nullptr;
+  llvm::Constant *OutlinedFnID = nullptr;
+  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
+    OMPPrivateScope PrivateScope(CGF);
+    (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
+    CGF.EmitOMPPrivateClause(S, PrivateScope);
+    (void)PrivateScope.Privatize();
+
+    Action.Enter(CGF);
+    CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
+  };
+  // Emit target region as a standalone region.
+  CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
+      S, ParentName, OutlinedFn, OutlinedFnID, IsOffloadEntry, CodeGen);
+  return std::make_pair(OutlinedFn, OutlinedFnID);
+}
+
 void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &S) {
-  LexicalScope Scope(*this, S.getSourceRange());
   const CapturedStmt &CS = *cast<CapturedStmt>(S.getAssociatedStmt());
 
   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
@@ -2558,15 +3333,50 @@ void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &S) {
     ParentName =
         CGM.getMangledName(GlobalDecl(cast<FunctionDecl>(CurFuncDecl)));
 
-  CGM.getOpenMPRuntime().emitTargetOutlinedFunction(S, ParentName, Fn, FnID,
-                                                    IsOffloadEntry);
-
+  std::tie(Fn, FnID) = EmitOMPTargetDirectiveOutlinedFunction(
+      CGM, S, ParentName, IsOffloadEntry);
+  OMPLexicalScope Scope(*this, S);
   CGM.getOpenMPRuntime().emitTargetCall(*this, S, Fn, FnID, IfCond, Device,
                                         CapturedVars);
 }
 
-void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &) {
-  llvm_unreachable("CodeGen for 'omp teams' is not supported yet.");
+static void emitCommonOMPTeamsDirective(CodeGenFunction &CGF,
+                                        const OMPExecutableDirective &S,
+                                        OpenMPDirectiveKind InnermostKind,
+                                        const RegionCodeGenTy &CodeGen) {
+  auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
+  auto OutlinedFn = CGF.CGM.getOpenMPRuntime().
+      emitParallelOrTeamsOutlinedFunction(S,
+          *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen);
+
+  const OMPTeamsDirective &TD = *dyn_cast<OMPTeamsDirective>(&S);
+  const OMPNumTeamsClause *NT = TD.getSingleClause<OMPNumTeamsClause>();
+  const OMPThreadLimitClause *TL = TD.getSingleClause<OMPThreadLimitClause>();
+  if (NT || TL) {
+    Expr *NumTeams = (NT) ? NT->getNumTeams() : nullptr;
+    Expr *ThreadLimit = (TL) ? TL->getThreadLimit() : nullptr;
+
+    CGF.CGM.getOpenMPRuntime().emitNumTeamsClause(CGF, NumTeams, ThreadLimit,
+                                                  S.getLocStart());
+  }
+
+  OMPLexicalScope Scope(CGF, S);
+  llvm::SmallVector<llvm::Value *, 16> CapturedVars;
+  CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
+  CGF.CGM.getOpenMPRuntime().emitTeamsCall(CGF, S, S.getLocStart(), OutlinedFn,
+                                           CapturedVars);
+}
+
+void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &S) {
+  // Emit parallel region as a standalone region.
+  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
+    OMPPrivateScope PrivateScope(CGF);
+    (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
+    CGF.EmitOMPPrivateClause(S, PrivateScope);
+    (void)PrivateScope.Privatize();
+    CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
+  };
+  emitCommonOMPTeamsDirective(*this, S, OMPD_teams, CodeGen);
 }
 
 void CodeGenFunction::EmitOMPCancellationPointDirective(
@@ -2590,37 +3400,261 @@ void CodeGenFunction::EmitOMPCancelDirective(const OMPCancelDirective &S) {
 
 CodeGenFunction::JumpDest
 CodeGenFunction::getOMPCancelDestination(OpenMPDirectiveKind Kind) {
-  if (Kind == OMPD_parallel || Kind == OMPD_task)
+  if (Kind == OMPD_parallel || Kind == OMPD_task ||
+      Kind == OMPD_target_parallel)
     return ReturnBlock;
   assert(Kind == OMPD_for || Kind == OMPD_section || Kind == OMPD_sections ||
-         Kind == OMPD_parallel_sections || Kind == OMPD_parallel_for);
-  return BreakContinueStack.back().BreakBlock;
+         Kind == OMPD_parallel_sections || Kind == OMPD_parallel_for ||
+         Kind == OMPD_distribute_parallel_for ||
+         Kind == OMPD_target_parallel_for);
+  return OMPCancelStack.getExitBlock();
 }
 
 // Generate the instructions for '#pragma omp target data' directive.
 void CodeGenFunction::EmitOMPTargetDataDirective(
     const OMPTargetDataDirective &S) {
-  // emit the code inside the construct for now
+  // The target data enclosed region is implemented just by emitting the
+  // statement.
+  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
+    CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
+  };
+
+  // If we don't have target devices, don't bother emitting the data mapping
+  // code.
+  if (CGM.getLangOpts().OMPTargetTriples.empty()) {
+    OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
+
+    CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_target_data,
+                                                CodeGen);
+    return;
+  }
+
+  // Check if we have any if clause associated with the directive.
+  const Expr *IfCond = nullptr;
+  if (auto *C = S.getSingleClause<OMPIfClause>())
+    IfCond = C->getCondition();
+
+  // Check if we have any device clause associated with the directive.
+  const Expr *Device = nullptr;
+  if (auto *C = S.getSingleClause<OMPDeviceClause>())
+    Device = C->getDevice();
+
+  CGM.getOpenMPRuntime().emitTargetDataCalls(*this, S, IfCond, Device, CodeGen);
+}
+
+void CodeGenFunction::EmitOMPTargetEnterDataDirective(
+    const OMPTargetEnterDataDirective &S) {
+  // If we don't have target devices, don't bother emitting the data mapping
+  // code.
+  if (CGM.getLangOpts().OMPTargetTriples.empty())
+    return;
+
+  // Check if we have any if clause associated with the directive.
+  const Expr *IfCond = nullptr;
+  if (auto *C = S.getSingleClause<OMPIfClause>())
+    IfCond = C->getCondition();
+
+  // Check if we have any device clause associated with the directive.
+  const Expr *Device = nullptr;
+  if (auto *C = S.getSingleClause<OMPDeviceClause>())
+    Device = C->getDevice();
+
+  CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device);
+}
+
+void CodeGenFunction::EmitOMPTargetExitDataDirective(
+    const OMPTargetExitDataDirective &S) {
+  // If we don't have target devices, don't bother emitting the data mapping
+  // code.
+  if (CGM.getLangOpts().OMPTargetTriples.empty())
+    return;
+
+  // Check if we have any if clause associated with the directive.
+  const Expr *IfCond = nullptr;
+  if (auto *C = S.getSingleClause<OMPIfClause>())
+    IfCond = C->getCondition();
+
+  // Check if we have any device clause associated with the directive.
+  const Expr *Device = nullptr;
+  if (auto *C = S.getSingleClause<OMPDeviceClause>())
+    Device = C->getDevice();
+
+  CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device);
+}
+
+void CodeGenFunction::EmitOMPTargetParallelDirective(
+    const OMPTargetParallelDirective &S) {
+  // TODO: codegen for target parallel.
+}
+
+void CodeGenFunction::EmitOMPTargetParallelForDirective(
+    const OMPTargetParallelForDirective &S) {
+  // TODO: codegen for target parallel for.
+}
+
+/// Emit a helper variable and return corresponding lvalue.
+static void mapParam(CodeGenFunction &CGF, const DeclRefExpr *Helper,
+                     const ImplicitParamDecl *PVD,
+                     CodeGenFunction::OMPPrivateScope &Privates) {
+  auto *VDecl = cast<VarDecl>(Helper->getDecl());
+  Privates.addPrivate(
+      VDecl, [&CGF, PVD]() -> Address { return CGF.GetAddrOfLocalVar(PVD); });
+}
+
+void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) {
+  assert(isOpenMPTaskLoopDirective(S.getDirectiveKind()));
+  // Emit outlined function for task construct.
   auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
-  CGM.getOpenMPRuntime().emitInlinedDirective(
-      *this, OMPD_target_data,
-      [&CS](CodeGenFunction &CGF) { CGF.EmitStmt(CS->getCapturedStmt()); });
+  auto CapturedStruct = GenerateCapturedStmtArgument(*CS);
+  auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
+  const Expr *IfCond = nullptr;
+  for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
+    if (C->getNameModifier() == OMPD_unknown ||
+        C->getNameModifier() == OMPD_taskloop) {
+      IfCond = C->getCondition();
+      break;
+    }
+  }
+
+  OMPTaskDataTy Data;
+  // Check if taskloop must be emitted without taskgroup.
+  Data.Nogroup = S.getSingleClause<OMPNogroupClause>();
+  // TODO: Check if we should emit tied or untied task.
+  Data.Tied = true;
+  // Set scheduling for taskloop
+  if (const auto* Clause = S.getSingleClause<OMPGrainsizeClause>()) {
+    // grainsize clause
+    Data.Schedule.setInt(/*IntVal=*/false);
+    Data.Schedule.setPointer(EmitScalarExpr(Clause->getGrainsize()));
+  } else if (const auto* Clause = S.getSingleClause<OMPNumTasksClause>()) {
+    // num_tasks clause
+    Data.Schedule.setInt(/*IntVal=*/true);
+    Data.Schedule.setPointer(EmitScalarExpr(Clause->getNumTasks()));
+  }
+
+  auto &&BodyGen = [CS, &S](CodeGenFunction &CGF, PrePostActionTy &) {
+    // if (PreCond) {
+    //   for (IV in 0..LastIteration) BODY;
+    //   <Final counter/linear vars updates>;
+    // }
+    //
+
+    // Emit: if (PreCond) - begin.
+    // If the condition constant folds and can be elided, avoid emitting the
+    // whole loop.
+    bool CondConstant;
+    llvm::BasicBlock *ContBlock = nullptr;
+    OMPLoopScope PreInitScope(CGF, S);
+    if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
+      if (!CondConstant)
+        return;
+    } else {
+      auto *ThenBlock = CGF.createBasicBlock("taskloop.if.then");
+      ContBlock = CGF.createBasicBlock("taskloop.if.end");
+      emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock,
+                  CGF.getProfileCount(&S));
+      CGF.EmitBlock(ThenBlock);
+      CGF.incrementProfileCounter(&S);
+    }
+
+    if (isOpenMPSimdDirective(S.getDirectiveKind()))
+      CGF.EmitOMPSimdInit(S);
+
+    OMPPrivateScope LoopScope(CGF);
+    // Emit helper vars inits.
+    enum { LowerBound = 5, UpperBound, Stride, LastIter };
+    auto *I = CS->getCapturedDecl()->param_begin();
+    auto *LBP = std::next(I, LowerBound);
+    auto *UBP = std::next(I, UpperBound);
+    auto *STP = std::next(I, Stride);
+    auto *LIP = std::next(I, LastIter);
+    mapParam(CGF, cast<DeclRefExpr>(S.getLowerBoundVariable()), *LBP,
+             LoopScope);
+    mapParam(CGF, cast<DeclRefExpr>(S.getUpperBoundVariable()), *UBP,
+             LoopScope);
+    mapParam(CGF, cast<DeclRefExpr>(S.getStrideVariable()), *STP, LoopScope);
+    mapParam(CGF, cast<DeclRefExpr>(S.getIsLastIterVariable()), *LIP,
+             LoopScope);
+    CGF.EmitOMPPrivateLoopCounters(S, LoopScope);
+    bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
+    (void)LoopScope.Privatize();
+    // Emit the loop iteration variable.
+    const Expr *IVExpr = S.getIterationVariable();
+    const VarDecl *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl());
+    CGF.EmitVarDecl(*IVDecl);
+    CGF.EmitIgnoredExpr(S.getInit());
+
+    // Emit the iterations count variable.
+    // If it is not a variable, Sema decided to calculate iterations count on
+    // each iteration (e.g., it is foldable into a constant).
+    if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
+      CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
+      // Emit calculation of the iterations count.
+      CGF.EmitIgnoredExpr(S.getCalcLastIteration());
+    }
+
+    CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(),
+                         S.getInc(),
+                         [&S](CodeGenFunction &CGF) {
+                           CGF.EmitOMPLoopBody(S, JumpDest());
+                           CGF.EmitStopPoint(&S);
+                         },
+                         [](CodeGenFunction &) {});
+    // Emit: if (PreCond) - end.
+    if (ContBlock) {
+      CGF.EmitBranch(ContBlock);
+      CGF.EmitBlock(ContBlock, true);
+    }
+    // Emit final copy of the lastprivate variables if IsLastIter != 0.
+    if (HasLastprivateClause) {
+      CGF.EmitOMPLastprivateClauseFinal(
+          S, isOpenMPSimdDirective(S.getDirectiveKind()),
+          CGF.Builder.CreateIsNotNull(CGF.EmitLoadOfScalar(
+              CGF.GetAddrOfLocalVar(*LIP), /*Volatile=*/false,
+              (*LIP)->getType(), S.getLocStart())));
+    }
+  };
+  auto &&TaskGen = [&S, SharedsTy, CapturedStruct,
+                    IfCond](CodeGenFunction &CGF, llvm::Value *OutlinedFn,
+                            const OMPTaskDataTy &Data) {
+    auto &&CodeGen = [&](CodeGenFunction &CGF, PrePostActionTy &) {
+      OMPLoopScope PreInitScope(CGF, S);
+      CGF.CGM.getOpenMPRuntime().emitTaskLoopCall(CGF, S.getLocStart(), S,
+                                                  OutlinedFn, SharedsTy,
+                                                  CapturedStruct, IfCond, Data);
+    };
+    CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_taskloop,
+                                                    CodeGen);
+  };
+  EmitOMPTaskBasedDirective(S, BodyGen, TaskGen, Data);
 }
 
 void CodeGenFunction::EmitOMPTaskLoopDirective(const OMPTaskLoopDirective &S) {
-  // emit the code inside the construct for now
-  auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
-  CGM.getOpenMPRuntime().emitInlinedDirective(
-      *this, OMPD_taskloop,
-      [&CS](CodeGenFunction &CGF) { CGF.EmitStmt(CS->getCapturedStmt()); });
+  EmitOMPTaskLoopBasedDirective(S);
 }
 
 void CodeGenFunction::EmitOMPTaskLoopSimdDirective(
     const OMPTaskLoopSimdDirective &S) {
-  // emit the code inside the construct for now
-  auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
-  CGM.getOpenMPRuntime().emitInlinedDirective(
-      *this, OMPD_taskloop_simd,
-      [&CS](CodeGenFunction &CGF) { CGF.EmitStmt(CS->getCapturedStmt()); });
+  EmitOMPTaskLoopBasedDirective(S);
 }
 
+// Generate the instructions for '#pragma omp target update' directive.
+void CodeGenFunction::EmitOMPTargetUpdateDirective(
+    const OMPTargetUpdateDirective &S) {
+  // If we don't have target devices, don't bother emitting the data mapping
+  // code.
+  if (CGM.getLangOpts().OMPTargetTriples.empty())
+    return;
+
+  // Check if we have any if clause associated with the directive.
+  const Expr *IfCond = nullptr;
+  if (auto *C = S.getSingleClause<OMPIfClause>())
+    IfCond = C->getCondition();
+
+  // Check if we have any device clause associated with the directive.
+  const Expr *Device = nullptr;
+  if (auto *C = S.getSingleClause<OMPDeviceClause>())
+    Device = C->getDevice();
+
+  CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device);
+}
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGVTT.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGVTT.cpp
index 4fb76710d2ad..5b90ee603307 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGVTT.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGVTT.cpp
@@ -44,7 +44,7 @@ CodeGenVTables::EmitVTTDefinition(llvm::GlobalVariable *VTT,
                                   const CXXRecordDecl *RD) {
   VTTBuilder Builder(CGM.getContext(), RD, /*GenerateDefinition=*/true);
 
-  llvm::Type *Int8PtrTy = CGM.Int8PtrTy, *Int64Ty = CGM.Int64Ty;
+  llvm::Type *Int8PtrTy = CGM.Int8PtrTy, *Int32Ty = CGM.Int32Ty;
   llvm::ArrayType *ArrayType = 
     llvm::ArrayType::get(Int8PtrTy, Builder.getVTTComponents().size());
 
@@ -75,8 +75,8 @@ CodeGenVTables::EmitVTTDefinition(llvm::GlobalVariable *VTT,
     }
 
      llvm::Value *Idxs[] = {
-       llvm::ConstantInt::get(Int64Ty, 0),
-       llvm::ConstantInt::get(Int64Ty, AddressPoint)
+       llvm::ConstantInt::get(Int32Ty, 0),
+       llvm::ConstantInt::get(Int32Ty, AddressPoint)
      };
 
      llvm::Constant *Init = llvm::ConstantExpr::getInBoundsGetElementPtr(
@@ -121,7 +121,7 @@ llvm::GlobalVariable *CodeGenVTables::GetAddrOfVTT(const CXXRecordDecl *RD) {
   llvm::GlobalVariable *GV =
     CGM.CreateOrReplaceCXXRuntimeVariable(Name, ArrayType, 
                                           llvm::GlobalValue::ExternalLinkage);
-  GV->setUnnamedAddr(true);
+  GV->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
   return GV;
 }
 
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGVTables.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGVTables.cpp
index a40aab29be07..957055033890 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGVTables.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGVTables.cpp
@@ -156,9 +156,7 @@ CodeGenFunction::GenerateVarArgsThunk(llvm::Function *Fn,
 
   // Clone to thunk.
   llvm::ValueToValueMapTy VMap;
-  llvm::Function *NewFn = llvm::CloneFunction(BaseFn, VMap,
-                                              /*ModuleLevelChanges=*/false);
-  CGM.getModule().getFunctionList().push_back(NewFn);
+  llvm::Function *NewFn = llvm::CloneFunction(BaseFn, VMap);
   Fn->replaceAllUsesWith(NewFn);
   NewFn->takeName(Fn);
   Fn->eraseFromParent();
@@ -286,15 +284,14 @@ void CodeGenFunction::EmitCallAndReturnForThunk(llvm::Value *Callee,
     CGM.getCXXABI().adjustCallArgsForDestructorThunk(*this, CurGD, CallArgs);
 
   // Add the rest of the arguments.
-  for (const ParmVarDecl *PD : MD->params())
+  for (const ParmVarDecl *PD : MD->parameters())
     EmitDelegateCallArg(CallArgs, PD, PD->getLocStart());
 
   const FunctionProtoType *FPT = MD->getType()->getAs<FunctionProtoType>();
 
 #ifndef NDEBUG
-  const CGFunctionInfo &CallFnInfo =
-    CGM.getTypes().arrangeCXXMethodCall(CallArgs, FPT,
-                                       RequiredArgs::forPrototypePlus(FPT, 1));
+  const CGFunctionInfo &CallFnInfo = CGM.getTypes().arrangeCXXMethodCall(
+      CallArgs, FPT, RequiredArgs::forPrototypePlus(FPT, 1, MD));
   assert(CallFnInfo.getRegParm() == CurFnInfo->getRegParm() &&
          CallFnInfo.isNoReturn() == CurFnInfo->isNoReturn() &&
          CallFnInfo.getCallingConvention() == CurFnInfo->getCallingConvention());
@@ -607,6 +604,8 @@ llvm::Constant *CodeGenVTables::CreateVTableInitializer(
             llvm::FunctionType::get(CGM.VoidTy, /*isVarArg=*/false);
           StringRef PureCallName = CGM.getCXXABI().GetPureVirtualCallName();
           PureVirtualFn = CGM.CreateRuntimeFunction(Ty, PureCallName);
+          if (auto *F = dyn_cast<llvm::Function>(PureVirtualFn))
+            F->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
           PureVirtualFn = llvm::ConstantExpr::getBitCast(PureVirtualFn,
                                                          CGM.Int8PtrTy);
         }
@@ -618,6 +617,8 @@ llvm::Constant *CodeGenVTables::CreateVTableInitializer(
           StringRef DeletedCallName =
             CGM.getCXXABI().GetDeletedVirtualCallName();
           DeletedVirtualFn = CGM.CreateRuntimeFunction(Ty, DeletedCallName);
+          if (auto *F = dyn_cast<llvm::Function>(DeletedVirtualFn))
+            F->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
           DeletedVirtualFn = llvm::ConstantExpr::getBitCast(DeletedVirtualFn,
                                                          CGM.Int8PtrTy);
         }
@@ -696,7 +697,7 @@ CodeGenVTables::GenerateConstructionVTable(const CXXRecordDecl *RD,
   CGM.setGlobalVisibility(VTable, RD);
 
   // V-tables are always unnamed_addr.
-  VTable->setUnnamedAddr(true);
+  VTable->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
 
   llvm::Constant *RTTI = CGM.GetAddrOfRTTIDescriptor(
       CGM.getContext().getTagDeclType(Base.getBase()));
@@ -708,7 +709,7 @@ CodeGenVTables::GenerateConstructionVTable(const CXXRecordDecl *RD,
       VTLayout->getNumVTableThunks(), RTTI);
   VTable->setInitializer(Init);
   
-  CGM.EmitVTableBitSetEntries(VTable, *VTLayout.get());
+  CGM.EmitVTableTypeMetadata(VTable, *VTLayout.get());
 
   return VTable;
 }
@@ -719,7 +720,7 @@ static bool shouldEmitAvailableExternallyVTable(const CodeGenModule &CGM,
          CGM.getCXXABI().canSpeculativelyEmitVTable(RD);
 }
 
-/// Compute the required linkage of the v-table for the given class.
+/// Compute the required linkage of the vtable for the given class.
 ///
 /// Note that we only call this at the end of the translation unit.
 llvm::GlobalVariable::LinkageTypes 
@@ -793,6 +794,10 @@ CodeGenModule::getVTableLinkage(const CXXRecordDecl *RD) {
       return DiscardableODRLinkage;
 
     case TSK_ExplicitInstantiationDeclaration:
+      // Explicit instantiations in MSVC do not provide vtables, so we must emit
+      // our own.
+      if (getTarget().getCXXABI().isMicrosoft())
+        return DiscardableODRLinkage;
       return shouldEmitAvailableExternallyVTable(*this, RD)
                  ? llvm::GlobalVariable::AvailableExternallyLinkage
                  : llvm::GlobalVariable::ExternalLinkage;
@@ -804,7 +809,7 @@ CodeGenModule::getVTableLinkage(const CXXRecordDecl *RD) {
   llvm_unreachable("Invalid TemplateSpecializationKind!");
 }
 
-/// This is a callback from Sema to tell us that that a particular v-table is
+/// This is a callback from Sema to tell us that that a particular vtable is
 /// required to be emitted in this translation unit.
 ///
 /// This is only called for vtables that _must_ be emitted (mainly due to key
@@ -832,38 +837,43 @@ CodeGenVTables::GenerateClassData(const CXXRecordDecl *RD) {
 /// the translation unit.
 ///
 /// The only semantic restriction here is that the object file should
-/// not contain a v-table definition when that v-table is defined
+/// not contain a vtable definition when that vtable is defined
 /// strongly elsewhere.  Otherwise, we'd just like to avoid emitting
-/// v-tables when unnecessary.
+/// vtables when unnecessary.
 bool CodeGenVTables::isVTableExternal(const CXXRecordDecl *RD) {
   assert(RD->isDynamicClass() && "Non-dynamic classes have no VTable.");
 
+  // We always synthesize vtables if they are needed in the MS ABI. MSVC doesn't
+  // emit them even if there is an explicit template instantiation.
+  if (CGM.getTarget().getCXXABI().isMicrosoft())
+    return false;
+
   // If we have an explicit instantiation declaration (and not a
-  // definition), the v-table is defined elsewhere.
+  // definition), the vtable is defined elsewhere.
   TemplateSpecializationKind TSK = RD->getTemplateSpecializationKind();
   if (TSK == TSK_ExplicitInstantiationDeclaration)
     return true;
 
   // Otherwise, if the class is an instantiated template, the
-  // v-table must be defined here.
+  // vtable must be defined here.
   if (TSK == TSK_ImplicitInstantiation ||
       TSK == TSK_ExplicitInstantiationDefinition)
     return false;
 
   // Otherwise, if the class doesn't have a key function (possibly
-  // anymore), the v-table must be defined here.
+  // anymore), the vtable must be defined here.
   const CXXMethodDecl *keyFunction = CGM.getContext().getCurrentKeyFunction(RD);
   if (!keyFunction)
     return false;
 
   // Otherwise, if we don't have a definition of the key function, the
-  // v-table must be defined somewhere else.
+  // vtable must be defined somewhere else.
   return !keyFunction->hasBody();
 }
 
 /// Given that we're currently at the end of the translation unit, and
-/// we've emitted a reference to the v-table for this class, should
-/// we define that v-table?
+/// we've emitted a reference to the vtable for this class, should
+/// we define that vtable?
 static bool shouldEmitVTableAtEndOfTranslationUnit(CodeGenModule &CGM,
                                                    const CXXRecordDecl *RD) {
   // If vtable is internal then it has to be done.
@@ -875,7 +885,7 @@ static bool shouldEmitVTableAtEndOfTranslationUnit(CodeGenModule &CGM,
 }
 
 /// Given that at some point we emitted a reference to one or more
-/// v-tables, and that we are now at the end of the translation unit,
+/// vtables, and that we are now at the end of the translation unit,
 /// decide whether we should emit them.
 void CodeGenModule::EmitDeferredVTables() {
 #ifndef NDEBUG
@@ -889,25 +899,47 @@ void CodeGenModule::EmitDeferredVTables() {
       VTables.GenerateClassData(RD);
 
   assert(savedSize == DeferredVTables.size() &&
-         "deferred extra v-tables during v-table emission?");
+         "deferred extra vtables during vtable emission?");
   DeferredVTables.clear();
 }
 
-bool CodeGenModule::IsCFIBlacklistedRecord(const CXXRecordDecl *RD) {
-  if (RD->hasAttr<UuidAttr>() &&
-      getContext().getSanitizerBlacklist().isBlacklistedType("attr:uuid"))
+bool CodeGenModule::HasHiddenLTOVisibility(const CXXRecordDecl *RD) {
+  LinkageInfo LV = RD->getLinkageAndVisibility();
+  if (!isExternallyVisible(LV.getLinkage()))
     return true;
 
-  return getContext().getSanitizerBlacklist().isBlacklistedType(
-      RD->getQualifiedNameAsString());
+  if (RD->hasAttr<LTOVisibilityPublicAttr>() || RD->hasAttr<UuidAttr>())
+    return false;
+
+  if (getTriple().isOSBinFormatCOFF()) {
+    if (RD->hasAttr<DLLExportAttr>() || RD->hasAttr<DLLImportAttr>())
+      return false;
+  } else {
+    if (LV.getVisibility() != HiddenVisibility)
+      return false;
+  }
+
+  if (getCodeGenOpts().LTOVisibilityPublicStd) {
+    const DeclContext *DC = RD;
+    while (1) {
+      auto *D = cast<Decl>(DC);
+      DC = DC->getParent();
+      if (isa<TranslationUnitDecl>(DC->getRedeclContext())) {
+        if (auto *ND = dyn_cast<NamespaceDecl>(D))
+          if (const IdentifierInfo *II = ND->getIdentifier())
+            if (II->isStr("std") || II->isStr("stdext"))
+              return false;
+        break;
+      }
+    }
+  }
+
+  return true;
 }
 
-void CodeGenModule::EmitVTableBitSetEntries(llvm::GlobalVariable *VTable,
-                                            const VTableLayout &VTLayout) {
-  if (!LangOpts.Sanitize.has(SanitizerKind::CFIVCall) &&
-      !LangOpts.Sanitize.has(SanitizerKind::CFINVCall) &&
-      !LangOpts.Sanitize.has(SanitizerKind::CFIDerivedCast) &&
-      !LangOpts.Sanitize.has(SanitizerKind::CFIUnrelatedCast))
+void CodeGenModule::EmitVTableTypeMetadata(llvm::GlobalVariable *VTable,
+                                           const VTableLayout &VTLayout) {
+  if (!getCodeGenOpts().PrepareForLTO)
     return;
 
   CharUnits PointerWidth =
@@ -916,12 +948,8 @@ void CodeGenModule::EmitVTableBitSetEntries(llvm::GlobalVariable *VTable,
   typedef std::pair<const CXXRecordDecl *, unsigned> BSEntry;
   std::vector<BSEntry> BitsetEntries;
   // Create a bit set entry for each address point.
-  for (auto &&AP : VTLayout.getAddressPoints()) {
-    if (IsCFIBlacklistedRecord(AP.first.getBase()))
-      continue;
-
+  for (auto &&AP : VTLayout.getAddressPoints())
     BitsetEntries.push_back(std::make_pair(AP.first.getBase(), AP.second));
-  }
 
   // Sort the bit set entries for determinism.
   std::sort(BitsetEntries.begin(), BitsetEntries.end(),
@@ -949,10 +977,7 @@ void CodeGenModule::EmitVTableBitSetEntries(llvm::GlobalVariable *VTable,
     return E1.second < E2.second;
   });
 
-  llvm::NamedMDNode *BitsetsMD =
-      getModule().getOrInsertNamedMetadata("llvm.bitsets");
   for (auto BitsetEntry : BitsetEntries)
-    CreateVTableBitSetEntry(BitsetsMD, VTable,
-                            PointerWidth * BitsetEntry.second,
-                            BitsetEntry.first);
+    AddVTableTypeMetadata(VTable, PointerWidth * BitsetEntry.second,
+                          BitsetEntry.first);
 }
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGValue.h b/contrib/llvm/tools/clang/lib/CodeGen/CGValue.h
index 3ccc4cda89f9..53a376df6457 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGValue.h
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGValue.h
@@ -445,7 +445,7 @@ class AggValueSlot {
   // Qualifiers
   Qualifiers Quals;
 
-  unsigned short Alignment;
+  unsigned Alignment;
 
   /// DestructedFlag - This is set to true if some external code is
   /// responsible for setting up a destructor for the slot.  Otherwise
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenABITypes.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenABITypes.cpp
index 643c996e2ec9..166f44f816f3 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenABITypes.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenABITypes.cpp
@@ -26,45 +26,41 @@
 using namespace clang;
 using namespace CodeGen;
 
-CodeGenABITypes::CodeGenABITypes(ASTContext &C, llvm::Module &M,
-                                 CoverageSourceInfo *CoverageInfo)
-    : CGO(new CodeGenOptions), HSO(new HeaderSearchOptions),
-      PPO(new PreprocessorOptions),
-      CGM(new CodeGen::CodeGenModule(C, *HSO, *PPO, *CGO, M, C.getDiagnostics(),
-                                     CoverageInfo)) {}
-
-// Explicitly out-of-line because ~CodeGenModule() is private but
-// CodeGenABITypes.h is part of clang's API.
-CodeGenABITypes::~CodeGenABITypes() = default;
-
 const CGFunctionInfo &
-CodeGenABITypes::arrangeObjCMessageSendSignature(const ObjCMethodDecl *MD,
-                                                 QualType receiverType) {
-  return CGM->getTypes().arrangeObjCMessageSendSignature(MD, receiverType);
+CodeGen::arrangeObjCMessageSendSignature(CodeGenModule &CGM,
+                                         const ObjCMethodDecl *MD,
+                                         QualType receiverType) {
+  return CGM.getTypes().arrangeObjCMessageSendSignature(MD, receiverType);
 }
 
 const CGFunctionInfo &
-CodeGenABITypes::arrangeFreeFunctionType(CanQual<FunctionProtoType> Ty,
-                                         const FunctionDecl *FD) {
-  return CGM->getTypes().arrangeFreeFunctionType(Ty, FD);
+CodeGen::arrangeFreeFunctionType(CodeGenModule &CGM,
+                                 CanQual<FunctionProtoType> Ty,
+                                 const FunctionDecl *FD) {
+  return CGM.getTypes().arrangeFreeFunctionType(Ty, FD);
 }
 
 const CGFunctionInfo &
-CodeGenABITypes::arrangeFreeFunctionType(CanQual<FunctionNoProtoType> Ty) {
-  return CGM->getTypes().arrangeFreeFunctionType(Ty);
+CodeGen::arrangeFreeFunctionType(CodeGenModule &CGM,
+                                 CanQual<FunctionNoProtoType> Ty) {
+  return CGM.getTypes().arrangeFreeFunctionType(Ty);
 }
 
 const CGFunctionInfo &
-CodeGenABITypes::arrangeCXXMethodType(const CXXRecordDecl *RD,
-                                      const FunctionProtoType *FTP,
-                                      const CXXMethodDecl *MD) {
-  return CGM->getTypes().arrangeCXXMethodType(RD, FTP, MD);
+CodeGen::arrangeCXXMethodType(CodeGenModule &CGM,
+                              const CXXRecordDecl *RD,
+                              const FunctionProtoType *FTP,
+                              const CXXMethodDecl *MD) {
+  return CGM.getTypes().arrangeCXXMethodType(RD, FTP, MD);
 }
 
-const CGFunctionInfo &CodeGenABITypes::arrangeFreeFunctionCall(
-    CanQualType returnType, ArrayRef<CanQualType> argTypes,
-    FunctionType::ExtInfo info, RequiredArgs args) {
-  return CGM->getTypes().arrangeLLVMFunctionInfo(
+const CGFunctionInfo &
+CodeGen::arrangeFreeFunctionCall(CodeGenModule &CGM,
+                                 CanQualType returnType,
+                                 ArrayRef<CanQualType> argTypes,
+                                 FunctionType::ExtInfo info,
+                                 RequiredArgs args) {
+  return CGM.getTypes().arrangeLLVMFunctionInfo(
       returnType, /*IsInstanceMethod=*/false, /*IsChainCall=*/false, argTypes,
-      info, args);
+      info, {}, args);
 }
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenAction.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenAction.cpp
index 0a670ab19aa5..49738a20f493 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenAction.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenAction.cpp
@@ -46,14 +46,13 @@ namespace clang {
     const CodeGenOptions &CodeGenOpts;
     const TargetOptions &TargetOpts;
     const LangOptions &LangOpts;
-    raw_pwrite_stream *AsmOutStream;
+    std::unique_ptr<raw_pwrite_stream> AsmOutStream;
     ASTContext *Context;
 
     Timer LLVMIRGeneration;
 
     std::unique_ptr<CodeGenerator> Gen;
 
-    std::unique_ptr<llvm::Module> TheModule;
     SmallVector<std::pair<unsigned, std::unique_ptr<llvm::Module>>, 4>
         LinkModules;
 
@@ -69,11 +68,12 @@ namespace clang {
         const TargetOptions &TargetOpts, const LangOptions &LangOpts,
         bool TimePasses, const std::string &InFile,
         const SmallVectorImpl<std::pair<unsigned, llvm::Module *>> &LinkModules,
-        raw_pwrite_stream *OS, LLVMContext &C,
+        std::unique_ptr<raw_pwrite_stream> OS, LLVMContext &C,
         CoverageSourceInfo *CoverageInfo = nullptr)
         : Diags(Diags), Action(Action), CodeGenOpts(CodeGenOpts),
-          TargetOpts(TargetOpts), LangOpts(LangOpts), AsmOutStream(OS),
-          Context(nullptr), LLVMIRGeneration("LLVM IR Generation Time"),
+          TargetOpts(TargetOpts), LangOpts(LangOpts),
+          AsmOutStream(std::move(OS)), Context(nullptr),
+          LLVMIRGeneration("LLVM IR Generation Time"),
           Gen(CreateLLVMCodeGen(Diags, InFile, HeaderSearchOpts, PPOpts,
                                 CodeGenOpts, C, CoverageInfo)) {
       llvm::TimePassesIsEnabled = TimePasses;
@@ -81,7 +81,10 @@ namespace clang {
         this->LinkModules.push_back(
             std::make_pair(I.first, std::unique_ptr<llvm::Module>(I.second)));
     }
-    std::unique_ptr<llvm::Module> takeModule() { return std::move(TheModule); }
+    llvm::Module *getModule() const { return Gen->GetModule(); }
+    std::unique_ptr<llvm::Module> takeModule() {
+      return std::unique_ptr<llvm::Module>(Gen->ReleaseModule());
+    }
     void releaseLinkModules() {
       for (auto &I : LinkModules)
         I.second.release();
@@ -101,8 +104,6 @@ namespace clang {
 
       Gen->Initialize(Ctx);
 
-      TheModule.reset(Gen->GetModule());
-
       if (llvm::TimePassesIsEnabled)
         LLVMIRGeneration.stopTimer();
     }
@@ -123,14 +124,14 @@ namespace clang {
       return true;
     }
 
-    void HandleInlineMethodDefinition(CXXMethodDecl *D) override {
+    void HandleInlineFunctionDefinition(FunctionDecl *D) override {
       PrettyStackTraceDecl CrashInfo(D, SourceLocation(),
                                      Context->getSourceManager(),
-                                     "LLVM IR generation of inline method");
+                                     "LLVM IR generation of inline function");
       if (llvm::TimePassesIsEnabled)
         LLVMIRGeneration.startTimer();
 
-      Gen->HandleInlineMethodDefinition(D);
+      Gen->HandleInlineFunctionDefinition(D);
 
       if (llvm::TimePassesIsEnabled)
         LLVMIRGeneration.stopTimer();
@@ -149,25 +150,12 @@ namespace clang {
       }
 
       // Silently ignore if we weren't initialized for some reason.
-      if (!TheModule)
+      if (!getModule())
         return;
 
-      // Make sure IR generation is happy with the module. This is released by
-      // the module provider.
-      llvm::Module *M = Gen->ReleaseModule();
-      if (!M) {
-        // The module has been released by IR gen on failures, do not double
-        // free.
-        TheModule.release();
-        return;
-      }
-
-      assert(TheModule.get() == M &&
-             "Unexpected module change during IR generation");
-
       // Install an inline asm handler so that diagnostics get printed through
       // our diagnostics hooks.
-      LLVMContext &Ctx = TheModule->getContext();
+      LLVMContext &Ctx = getModule()->getContext();
       LLVMContext::InlineAsmDiagHandlerTy OldHandler =
         Ctx.getInlineAsmDiagnosticHandler();
       void *OldContext = Ctx.getInlineAsmDiagnosticContext();
@@ -182,13 +170,15 @@ namespace clang {
       for (auto &I : LinkModules) {
         unsigned LinkFlags = I.first;
         CurLinkModule = I.second.get();
-        if (Linker::linkModules(*M, std::move(I.second), LinkFlags))
+        if (Linker::linkModules(*getModule(), std::move(I.second), LinkFlags))
           return;
       }
 
+      EmbedBitcode(getModule(), CodeGenOpts, llvm::MemoryBufferRef());
+
       EmitBackendOutput(Diags, CodeGenOpts, TargetOpts, LangOpts,
-                        C.getTargetInfo().getDataLayoutString(),
-                        TheModule.get(), Action, AsmOutStream);
+                        C.getTargetInfo().getDataLayout(),
+                        getModule(), Action, std::move(AsmOutStream));
 
       Ctx.setInlineAsmDiagnosticHandler(OldHandler, OldContext);
 
@@ -210,21 +200,12 @@ namespace clang {
       Gen->CompleteTentativeDefinition(D);
     }
 
-    void HandleVTable(CXXRecordDecl *RD) override {
-      Gen->HandleVTable(RD);
-    }
-
-    void HandleLinkerOptionPragma(llvm::StringRef Opts) override {
-      Gen->HandleLinkerOptionPragma(Opts);
+    void AssignInheritanceModel(CXXRecordDecl *RD) override {
+      Gen->AssignInheritanceModel(RD);
     }
 
-    void HandleDetectMismatch(llvm::StringRef Name,
-                                      llvm::StringRef Value) override {
-      Gen->HandleDetectMismatch(Name, Value);
-    }
-
-    void HandleDependentLibrary(llvm::StringRef Opts) override {
-      Gen->HandleDependentLibrary(Opts);
+    void HandleVTable(CXXRecordDecl *RD) override {
+      Gen->HandleVTable(RD);
     }
 
     static void InlineAsmDiagHandler(const llvm::SMDiagnostic &SM,void *Context,
@@ -238,6 +219,13 @@ namespace clang {
       ((BackendConsumer *)Context)->DiagnosticHandlerImpl(DI);
     }
 
+    /// Get the best possible source location to represent a diagnostic that
+    /// may have associated debug info.
+    const FullSourceLoc
+    getBestLocationFromDebugLoc(const llvm::DiagnosticInfoWithDebugLocBase &D,
+                                bool &BadDebugInfo, StringRef &Filename,
+                                unsigned &Line, unsigned &Column) const;
+
     void InlineAsmDiagHandler2(const llvm::SMDiagnostic &,
                                SourceLocation LocCookie);
 
@@ -250,6 +238,8 @@ namespace clang {
     /// \return True if the diagnostic has been successfully reported, false
     /// otherwise.
     bool StackSizeDiagHandler(const llvm::DiagnosticInfoStackSize &D);
+    /// \brief Specialized handler for unsupported backend feature diagnostic.
+    void UnsupportedDiagHandler(const llvm::DiagnosticInfoUnsupported &D);
     /// \brief Specialized handlers for optimization remarks.
     /// Note that these handlers only accept remarks and they always handle
     /// them.
@@ -426,25 +416,21 @@ BackendConsumer::StackSizeDiagHandler(const llvm::DiagnosticInfoStackSize &D) {
     return false;
 
   if (const Decl *ND = Gen->GetDeclForMangledName(D.getFunction().getName())) {
+    // FIXME: Shouldn't need to truncate to uint32_t
     Diags.Report(ND->getASTContext().getFullLoc(ND->getLocation()),
                  diag::warn_fe_frame_larger_than)
-        << D.getStackSize() << Decl::castToDeclContext(ND);
+      << static_cast<uint32_t>(D.getStackSize()) << Decl::castToDeclContext(ND);
     return true;
   }
 
   return false;
 }
 
-void BackendConsumer::EmitOptimizationMessage(
-    const llvm::DiagnosticInfoOptimizationBase &D, unsigned DiagID) {
-  // We only support warnings and remarks.
-  assert(D.getSeverity() == llvm::DS_Remark ||
-         D.getSeverity() == llvm::DS_Warning);
-
+const FullSourceLoc BackendConsumer::getBestLocationFromDebugLoc(
+    const llvm::DiagnosticInfoWithDebugLocBase &D, bool &BadDebugInfo, StringRef &Filename,
+                                unsigned &Line, unsigned &Column) const {
   SourceManager &SourceMgr = Context->getSourceManager();
   FileManager &FileMgr = SourceMgr.getFileManager();
-  StringRef Filename;
-  unsigned Line, Column;
   SourceLocation DILoc;
 
   if (D.isLocationAvailable()) {
@@ -455,6 +441,7 @@ void BackendConsumer::EmitOptimizationMessage(
       // source manager, so pass 1 if Column is not set.
       DILoc = SourceMgr.translateFileLineCol(FE, Line, Column ? Column : 1);
     }
+    BadDebugInfo = DILoc.isInvalid();
   }
 
   // If a location isn't available, try to approximate it using the associated
@@ -463,18 +450,63 @@ void BackendConsumer::EmitOptimizationMessage(
   FullSourceLoc Loc(DILoc, SourceMgr);
   if (Loc.isInvalid())
     if (const Decl *FD = Gen->GetDeclForMangledName(D.getFunction().getName()))
-      Loc = FD->getASTContext().getFullLoc(FD->getBodyRBrace());
+      Loc = FD->getASTContext().getFullLoc(FD->getLocation());
+
+  if (DILoc.isInvalid() && D.isLocationAvailable())
+    // If we were not able to translate the file:line:col information
+    // back to a SourceLocation, at least emit a note stating that
+    // we could not translate this location. This can happen in the
+    // case of #line directives.
+    Diags.Report(Loc, diag::note_fe_backend_invalid_loc)
+        << Filename << Line << Column;
+
+  return Loc;
+}
+
+void BackendConsumer::UnsupportedDiagHandler(
+    const llvm::DiagnosticInfoUnsupported &D) {
+  // We only support errors.
+  assert(D.getSeverity() == llvm::DS_Error);
+
+  StringRef Filename;
+  unsigned Line, Column;
+  bool BadDebugInfo;
+  FullSourceLoc Loc = getBestLocationFromDebugLoc(D, BadDebugInfo, Filename,
+      Line, Column);
+
+  Diags.Report(Loc, diag::err_fe_backend_unsupported) << D.getMessage().str();
+
+  if (BadDebugInfo)
+    // If we were not able to translate the file:line:col information
+    // back to a SourceLocation, at least emit a note stating that
+    // we could not translate this location. This can happen in the
+    // case of #line directives.
+    Diags.Report(Loc, diag::note_fe_backend_invalid_loc)
+        << Filename << Line << Column;
+}
+
+void BackendConsumer::EmitOptimizationMessage(
+    const llvm::DiagnosticInfoOptimizationBase &D, unsigned DiagID) {
+  // We only support warnings and remarks.
+  assert(D.getSeverity() == llvm::DS_Remark ||
+         D.getSeverity() == llvm::DS_Warning);
+
+  StringRef Filename;
+  unsigned Line, Column;
+  bool BadDebugInfo = false;
+  FullSourceLoc Loc = getBestLocationFromDebugLoc(D, BadDebugInfo, Filename,
+      Line, Column);
 
   Diags.Report(Loc, DiagID)
       << AddFlagValue(D.getPassName() ? D.getPassName() : "")
       << D.getMsg().str();
 
-  if (DILoc.isInvalid() && D.isLocationAvailable())
+  if (BadDebugInfo)
     // If we were not able to translate the file:line:col information
     // back to a SourceLocation, at least emit a note stating that
     // we could not translate this location. This can happen in the
     // case of #line directives.
-    Diags.Report(Loc, diag::note_fe_backend_optimization_remark_invalid_loc)
+    Diags.Report(Loc, diag::note_fe_backend_invalid_loc)
         << Filename << Line << Column;
 }
 
@@ -504,7 +536,7 @@ void BackendConsumer::OptimizationRemarkHandler(
   // llvm::DiagnosticInfo::AlwasyPrint or if the -Rpass-analysis flag has a
   // regular expression that matches the name of the pass name in \p D.
 
-  if (D.getPassName() == llvm::DiagnosticInfo::AlwaysPrint ||
+  if (D.shouldAlwaysPrint() ||
       (CodeGenOpts.OptimizationRemarkAnalysisPattern &&
        CodeGenOpts.OptimizationRemarkAnalysisPattern->match(D.getPassName())))
     EmitOptimizationMessage(
@@ -517,7 +549,7 @@ void BackendConsumer::OptimizationRemarkHandler(
   // llvm::DiagnosticInfo::AlwasyPrint or if the -Rpass-analysis flag has a
   // regular expression that matches the name of the pass name in \p D.
 
-  if (D.getPassName() == llvm::DiagnosticInfo::AlwaysPrint ||
+  if (D.shouldAlwaysPrint() ||
       (CodeGenOpts.OptimizationRemarkAnalysisPattern &&
        CodeGenOpts.OptimizationRemarkAnalysisPattern->match(D.getPassName())))
     EmitOptimizationMessage(
@@ -530,7 +562,7 @@ void BackendConsumer::OptimizationRemarkHandler(
   // llvm::DiagnosticInfo::AlwasyPrint or if the -Rpass-analysis flag has a
   // regular expression that matches the name of the pass name in \p D.
 
-  if (D.getPassName() == llvm::DiagnosticInfo::AlwaysPrint ||
+  if (D.shouldAlwaysPrint() ||
       (CodeGenOpts.OptimizationRemarkAnalysisPattern &&
        CodeGenOpts.OptimizationRemarkAnalysisPattern->match(D.getPassName())))
     EmitOptimizationMessage(
@@ -599,6 +631,9 @@ void BackendConsumer::DiagnosticHandlerImpl(const DiagnosticInfo &DI) {
     // handler.
     OptimizationFailureHandler(cast<DiagnosticInfoOptimizationFailure>(DI));
     return;
+  case llvm::DK_Unsupported:
+    UnsupportedDiagHandler(cast<DiagnosticInfoUnsupported>(DI));
+    return;
   default:
     // Plugin IDs are not bound to any value as they are set dynamically.
     ComputeDiagRemarkID(Severity, backend_plugin, DiagID);
@@ -657,7 +692,7 @@ llvm::LLVMContext *CodeGenAction::takeLLVMContext() {
   return VMContext;
 }
 
-static raw_pwrite_stream *
+static std::unique_ptr<raw_pwrite_stream>
 GetOutputStream(CompilerInstance &CI, StringRef InFile, BackendAction Action) {
   switch (Action) {
   case Backend_EmitAssembly:
@@ -680,7 +715,7 @@ GetOutputStream(CompilerInstance &CI, StringRef InFile, BackendAction Action) {
 std::unique_ptr<ASTConsumer>
 CodeGenAction::CreateASTConsumer(CompilerInstance &CI, StringRef InFile) {
   BackendAction BA = static_cast<BackendAction>(Act);
-  raw_pwrite_stream *OS = GetOutputStream(CI, InFile, BA);
+  std::unique_ptr<raw_pwrite_stream> OS = GetOutputStream(CI, InFile, BA);
   if (BA != Backend_EmitNothing && !OS)
     return nullptr;
 
@@ -720,7 +755,7 @@ CodeGenAction::CreateASTConsumer(CompilerInstance &CI, StringRef InFile) {
       BA, CI.getDiagnostics(), CI.getHeaderSearchOpts(),
       CI.getPreprocessorOpts(), CI.getCodeGenOpts(), CI.getTargetOpts(),
       CI.getLangOpts(), CI.getFrontendOpts().ShowTimers, InFile, LinkModules,
-      OS, *VMContext, CoverageInfo));
+      std::move(OS), *VMContext, CoverageInfo));
   BEConsumer = Result.get();
   return std::move(Result);
 }
@@ -729,6 +764,22 @@ static void BitcodeInlineAsmDiagHandler(const llvm::SMDiagnostic &SM,
                                          void *Context,
                                          unsigned LocCookie) {
   SM.print(nullptr, llvm::errs());
+
+  auto Diags = static_cast<DiagnosticsEngine *>(Context);
+  unsigned DiagID;
+  switch (SM.getKind()) {
+  case llvm::SourceMgr::DK_Error:
+    DiagID = diag::err_fe_inline_asm;
+    break;
+  case llvm::SourceMgr::DK_Warning:
+    DiagID = diag::warn_fe_inline_asm;
+    break;
+  case llvm::SourceMgr::DK_Note:
+    DiagID = diag::note_fe_inline_asm;
+    break;
+  }
+
+  Diags->Report(DiagID).AddString("cannot compile inline asm");
 }
 
 void CodeGenAction::ExecuteAction() {
@@ -736,7 +787,8 @@ void CodeGenAction::ExecuteAction() {
   if (getCurrentFileKind() == IK_LLVM_IR) {
     BackendAction BA = static_cast<BackendAction>(Act);
     CompilerInstance &CI = getCompilerInstance();
-    raw_pwrite_stream *OS = GetOutputStream(CI, getCurrentFile(), BA);
+    std::unique_ptr<raw_pwrite_stream> OS =
+        GetOutputStream(CI, getCurrentFile(), BA);
     if (BA != Backend_EmitNothing && !OS)
       return;
 
@@ -747,6 +799,11 @@ void CodeGenAction::ExecuteAction() {
     if (Invalid)
       return;
 
+    // For ThinLTO backend invocations, ensure that the context
+    // merges types based on ODR identifiers.
+    if (!CI.getCodeGenOpts().ThinLTOIndexFile.empty())
+      VMContext->enableDebugTypeODRUniquing();
+
     llvm::SMDiagnostic Err;
     TheModule = parseIR(MainFile->getMemBufferRef(), Err, *VMContext);
     if (!TheModule) {
@@ -779,11 +836,16 @@ void CodeGenAction::ExecuteAction() {
       TheModule->setTargetTriple(TargetOpts.Triple);
     }
 
+    EmbedBitcode(TheModule.get(), CI.getCodeGenOpts(),
+                 MainFile->getMemBufferRef());
+
     LLVMContext &Ctx = TheModule->getContext();
-    Ctx.setInlineAsmDiagnosticHandler(BitcodeInlineAsmDiagHandler);
+    Ctx.setInlineAsmDiagnosticHandler(BitcodeInlineAsmDiagHandler,
+                                      &CI.getDiagnostics());
+
     EmitBackendOutput(CI.getDiagnostics(), CI.getCodeGenOpts(), TargetOpts,
-                      CI.getLangOpts(), CI.getTarget().getDataLayoutString(),
-                      TheModule.get(), BA, OS);
+                      CI.getLangOpts(), CI.getTarget().getDataLayout(),
+                      TheModule.get(), BA, std::move(OS));
     return;
   }
 
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenFunction.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenFunction.cpp
index e38ff0a39da3..11e4ad9ecefa 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenFunction.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenFunction.cpp
@@ -25,6 +25,7 @@
 #include "clang/AST/Decl.h"
 #include "clang/AST/DeclCXX.h"
 #include "clang/AST/StmtCXX.h"
+#include "clang/AST/StmtObjC.h"
 #include "clang/Basic/Builtins.h"
 #include "clang/Basic/TargetInfo.h"
 #include "clang/CodeGen/CGFunctionInfo.h"
@@ -397,10 +398,17 @@ bool CodeGenFunction::ShouldInstrumentFunction() {
   return true;
 }
 
+/// ShouldXRayInstrument - Return true if the current function should be
+/// instrumented with XRay nop sleds.
+bool CodeGenFunction::ShouldXRayInstrumentFunction() const {
+  return CGM.getCodeGenOpts().XRayInstrumentFunctions;
+}
+
 /// EmitFunctionInstrumentation - Emit LLVM code to call the specified
 /// instrumentation function with the current function and the call site, if
 /// function instrumentation is enabled.
 void CodeGenFunction::EmitFunctionInstrumentation(const char *Fn) {
+  auto NL = ApplyDebugLocation::CreateArtificial(*this);
   // void __cyg_profile_func_{enter,exit} (void *this_fn, void *call_site);
   llvm::PointerType *PointerTy = Int8PtrTy;
   llvm::Type *ProfileFuncArgs[] = { PointerTy, PointerTy };
@@ -429,12 +437,28 @@ void CodeGenFunction::EmitMCountInstrumentation() {
   EmitNounwindRuntimeCall(MCountFn);
 }
 
+// Returns the address space id that should be produced to the
+// kernel_arg_addr_space metadata. This is always fixed to the ids
+// as specified in the SPIR 2.0 specification in order to differentiate
+// for example in clGetKernelArgInfo() implementation between the address
+// spaces with targets without unique mapping to the OpenCL address spaces
+// (basically all single AS CPUs).
+static unsigned ArgInfoAddressSpace(unsigned LangAS) {
+  switch (LangAS) {
+  case LangAS::opencl_global:   return 1;
+  case LangAS::opencl_constant: return 2;
+  case LangAS::opencl_local:    return 3;
+  case LangAS::opencl_generic:  return 4; // Not in SPIR 2.0 specs.
+  default:
+    return 0; // Assume private.
+  }
+}
+
 // OpenCL v1.2 s5.6.4.6 allows the compiler to store kernel argument
 // information in the program executable. The argument information stored
 // includes the argument name, its type, the address and access qualifiers used.
 static void GenOpenCLArgMetadata(const FunctionDecl *FD, llvm::Function *Fn,
                                  CodeGenModule &CGM, llvm::LLVMContext &Context,
-                                 SmallVector<llvm::Metadata *, 5> &kernelMDArgs,
                                  CGBuilderTy &Builder, ASTContext &ASTCtx) {
   // Create MDNodes that represent the kernel arg metadata.
   // Each MDNode is a list in the form of "key", N number of values which is
@@ -444,28 +468,21 @@ static void GenOpenCLArgMetadata(const FunctionDecl *FD, llvm::Function *Fn,
 
   // MDNode for the kernel argument address space qualifiers.
   SmallVector<llvm::Metadata *, 8> addressQuals;
-  addressQuals.push_back(llvm::MDString::get(Context, "kernel_arg_addr_space"));
 
   // MDNode for the kernel argument access qualifiers (images only).
   SmallVector<llvm::Metadata *, 8> accessQuals;
-  accessQuals.push_back(llvm::MDString::get(Context, "kernel_arg_access_qual"));
 
   // MDNode for the kernel argument type names.
   SmallVector<llvm::Metadata *, 8> argTypeNames;
-  argTypeNames.push_back(llvm::MDString::get(Context, "kernel_arg_type"));
 
   // MDNode for the kernel argument base type names.
   SmallVector<llvm::Metadata *, 8> argBaseTypeNames;
-  argBaseTypeNames.push_back(
-      llvm::MDString::get(Context, "kernel_arg_base_type"));
 
   // MDNode for the kernel argument type qualifiers.
   SmallVector<llvm::Metadata *, 8> argTypeQuals;
-  argTypeQuals.push_back(llvm::MDString::get(Context, "kernel_arg_type_qual"));
 
   // MDNode for the kernel argument names.
   SmallVector<llvm::Metadata *, 8> argNames;
-  argNames.push_back(llvm::MDString::get(Context, "kernel_arg_name"));
 
   for (unsigned i = 0, e = FD->getNumParams(); i != e; ++i) {
     const ParmVarDecl *parm = FD->getParamDecl(i);
@@ -477,7 +494,7 @@ static void GenOpenCLArgMetadata(const FunctionDecl *FD, llvm::Function *Fn,
 
       // Get address qualifier.
       addressQuals.push_back(llvm::ConstantAsMetadata::get(Builder.getInt32(
-          ASTCtx.getTargetAddressSpace(pointeeTy.getAddressSpace()))));
+        ArgInfoAddressSpace(pointeeTy.getAddressSpace()))));
 
       // Get argument type name.
       std::string typeName =
@@ -514,8 +531,7 @@ static void GenOpenCLArgMetadata(const FunctionDecl *FD, llvm::Function *Fn,
       uint32_t AddrSpc = 0;
       bool isPipe = ty->isPipeType();
       if (ty->isImageType() || isPipe)
-        AddrSpc =
-          CGM.getContext().getTargetAddressSpace(LangAS::opencl_global);
+        AddrSpc = ArgInfoAddressSpace(LangAS::opencl_global);
 
       addressQuals.push_back(
           llvm::ConstantAsMetadata::get(Builder.getInt32(AddrSpc)));
@@ -523,7 +539,8 @@ static void GenOpenCLArgMetadata(const FunctionDecl *FD, llvm::Function *Fn,
       // Get argument type name.
       std::string typeName;
       if (isPipe)
-        typeName = cast<PipeType>(ty)->getElementType().getAsString(Policy);
+        typeName = ty.getCanonicalType()->getAs<PipeType>()->getElementType()
+                     .getAsString(Policy);
       else
         typeName = ty.getUnqualifiedType().getAsString(Policy);
 
@@ -536,8 +553,9 @@ static void GenOpenCLArgMetadata(const FunctionDecl *FD, llvm::Function *Fn,
 
       std::string baseTypeName;
       if (isPipe)
-        baseTypeName =
-          cast<PipeType>(ty)->getElementType().getCanonicalType().getAsString(Policy);
+        baseTypeName = ty.getCanonicalType()->getAs<PipeType>()
+                          ->getElementType().getCanonicalType()
+                          .getAsString(Policy);
       else
         baseTypeName =
           ty.getUnqualifiedType().getCanonicalType().getAsString(Policy);
@@ -561,15 +579,14 @@ static void GenOpenCLArgMetadata(const FunctionDecl *FD, llvm::Function *Fn,
     argTypeQuals.push_back(llvm::MDString::get(Context, typeQuals));
 
     // Get image and pipe access qualifier:
-    // FIXME: now image and pipe share the same access qualifier maybe we can
-    // refine it to OpenCL access qualifier and also handle write_read
     if (ty->isImageType()|| ty->isPipeType()) {
-      const OpenCLImageAccessAttr *A = parm->getAttr<OpenCLImageAccessAttr>();
+      const OpenCLAccessAttr *A = parm->getAttr<OpenCLAccessAttr>();
       if (A && A->isWriteOnly())
         accessQuals.push_back(llvm::MDString::get(Context, "write_only"));
+      else if (A && A->isReadWrite())
+        accessQuals.push_back(llvm::MDString::get(Context, "read_write"));
       else
         accessQuals.push_back(llvm::MDString::get(Context, "read_only"));
-      // FIXME: what about read_write?
     } else
       accessQuals.push_back(llvm::MDString::get(Context, "none"));
 
@@ -577,13 +594,19 @@ static void GenOpenCLArgMetadata(const FunctionDecl *FD, llvm::Function *Fn,
     argNames.push_back(llvm::MDString::get(Context, parm->getName()));
   }
 
-  kernelMDArgs.push_back(llvm::MDNode::get(Context, addressQuals));
-  kernelMDArgs.push_back(llvm::MDNode::get(Context, accessQuals));
-  kernelMDArgs.push_back(llvm::MDNode::get(Context, argTypeNames));
-  kernelMDArgs.push_back(llvm::MDNode::get(Context, argBaseTypeNames));
-  kernelMDArgs.push_back(llvm::MDNode::get(Context, argTypeQuals));
+  Fn->setMetadata("kernel_arg_addr_space",
+                  llvm::MDNode::get(Context, addressQuals));
+  Fn->setMetadata("kernel_arg_access_qual",
+                  llvm::MDNode::get(Context, accessQuals));
+  Fn->setMetadata("kernel_arg_type",
+                  llvm::MDNode::get(Context, argTypeNames));
+  Fn->setMetadata("kernel_arg_base_type",
+                  llvm::MDNode::get(Context, argBaseTypeNames));
+  Fn->setMetadata("kernel_arg_type_qual",
+                  llvm::MDNode::get(Context, argTypeQuals));
   if (CGM.getCodeGenOpts().EmitOpenCLArgMetadata)
-    kernelMDArgs.push_back(llvm::MDNode::get(Context, argNames));
+    Fn->setMetadata("kernel_arg_name",
+                    llvm::MDNode::get(Context, argNames));
 }
 
 void CodeGenFunction::EmitOpenCLKernelMetadata(const FunctionDecl *FD,
@@ -594,11 +617,7 @@ void CodeGenFunction::EmitOpenCLKernelMetadata(const FunctionDecl *FD,
 
   llvm::LLVMContext &Context = getLLVMContext();
 
-  SmallVector<llvm::Metadata *, 5> kernelMDArgs;
-  kernelMDArgs.push_back(llvm::ConstantAsMetadata::get(Fn));
-
-  GenOpenCLArgMetadata(FD, Fn, CGM, Context, kernelMDArgs, Builder,
-                       getContext());
+  GenOpenCLArgMetadata(FD, Fn, CGM, Context, Builder, getContext());
 
   if (const VecTypeHintAttr *A = FD->getAttr<VecTypeHintAttr>()) {
     QualType hintQTy = A->getTypeHint();
@@ -607,37 +626,29 @@ void CodeGenFunction::EmitOpenCLKernelMetadata(const FunctionDecl *FD,
         hintQTy->isSignedIntegerType() ||
         (hintEltQTy && hintEltQTy->getElementType()->isSignedIntegerType());
     llvm::Metadata *attrMDArgs[] = {
-        llvm::MDString::get(Context, "vec_type_hint"),
         llvm::ConstantAsMetadata::get(llvm::UndefValue::get(
             CGM.getTypes().ConvertType(A->getTypeHint()))),
         llvm::ConstantAsMetadata::get(llvm::ConstantInt::get(
             llvm::IntegerType::get(Context, 32),
             llvm::APInt(32, (uint64_t)(isSignedInteger ? 1 : 0))))};
-    kernelMDArgs.push_back(llvm::MDNode::get(Context, attrMDArgs));
+    Fn->setMetadata("vec_type_hint", llvm::MDNode::get(Context, attrMDArgs));
   }
 
   if (const WorkGroupSizeHintAttr *A = FD->getAttr<WorkGroupSizeHintAttr>()) {
     llvm::Metadata *attrMDArgs[] = {
-        llvm::MDString::get(Context, "work_group_size_hint"),
         llvm::ConstantAsMetadata::get(Builder.getInt32(A->getXDim())),
         llvm::ConstantAsMetadata::get(Builder.getInt32(A->getYDim())),
         llvm::ConstantAsMetadata::get(Builder.getInt32(A->getZDim()))};
-    kernelMDArgs.push_back(llvm::MDNode::get(Context, attrMDArgs));
+    Fn->setMetadata("work_group_size_hint", llvm::MDNode::get(Context, attrMDArgs));
   }
 
   if (const ReqdWorkGroupSizeAttr *A = FD->getAttr<ReqdWorkGroupSizeAttr>()) {
     llvm::Metadata *attrMDArgs[] = {
-        llvm::MDString::get(Context, "reqd_work_group_size"),
         llvm::ConstantAsMetadata::get(Builder.getInt32(A->getXDim())),
         llvm::ConstantAsMetadata::get(Builder.getInt32(A->getYDim())),
         llvm::ConstantAsMetadata::get(Builder.getInt32(A->getZDim()))};
-    kernelMDArgs.push_back(llvm::MDNode::get(Context, attrMDArgs));
+    Fn->setMetadata("reqd_work_group_size", llvm::MDNode::get(Context, attrMDArgs));
   }
-
-  llvm::MDNode *kernelMDNode = llvm::MDNode::get(Context, kernelMDArgs);
-  llvm::NamedMDNode *OpenCLKernelMetadata =
-    CGM.getModule().getOrInsertNamedMetadata("opencl.kernels");
-  OpenCLKernelMetadata->addOperand(kernelMDNode);
 }
 
 /// Determine whether the function F ends with a return stmt.
@@ -670,6 +681,9 @@ void CodeGenFunction::StartFunction(GlobalDecl GD,
 
   DidCallStackSave = false;
   CurCodeDecl = D;
+  if (const auto *FD = dyn_cast_or_null<FunctionDecl>(D))
+    if (FD->usesSEHTry())
+      CurSEHParent = FD;
   CurFuncDecl = (D ? D->getNonClosureContext() : nullptr);
   FnRetTy = RetTy;
   CurFn = Fn;
@@ -695,20 +709,46 @@ void CodeGenFunction::StartFunction(GlobalDecl GD,
   if (SanOpts.has(SanitizerKind::SafeStack))
     Fn->addFnAttr(llvm::Attribute::SafeStack);
 
+  // Apply xray attributes to the function (as a string, for now)
+  if (D && ShouldXRayInstrumentFunction()) {
+    if (const auto *XRayAttr = D->getAttr<XRayInstrumentAttr>()) {
+      if (XRayAttr->alwaysXRayInstrument())
+        Fn->addFnAttr("function-instrument", "xray-always");
+      if (XRayAttr->neverXRayInstrument())
+        Fn->addFnAttr("function-instrument", "xray-never");
+    } else {
+      Fn->addFnAttr(
+          "xray-instruction-threshold",
+          llvm::itostr(CGM.getCodeGenOpts().XRayInstructionThreshold));
+    }
+  }
+
   // Pass inline keyword to optimizer if it appears explicitly on any
   // declaration. Also, in the case of -fno-inline attach NoInline
-  // attribute to all function that are not marked AlwaysInline.
+  // attribute to all functions that are not marked AlwaysInline, or
+  // to all functions that are not marked inline or implicitly inline
+  // in the case of -finline-hint-functions.
   if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D)) {
-    if (!CGM.getCodeGenOpts().NoInline) {
+    const CodeGenOptions& CodeGenOpts = CGM.getCodeGenOpts();
+    if (!CodeGenOpts.NoInline) {
       for (auto RI : FD->redecls())
         if (RI->isInlineSpecified()) {
           Fn->addFnAttr(llvm::Attribute::InlineHint);
           break;
         }
+      if (CodeGenOpts.getInlining() == CodeGenOptions::OnlyHintInlining &&
+          !FD->isInlined() && !Fn->hasFnAttribute(llvm::Attribute::InlineHint))
+        Fn->addFnAttr(llvm::Attribute::NoInline);
     } else if (!FD->hasAttr<AlwaysInlineAttr>())
       Fn->addFnAttr(llvm::Attribute::NoInline);
+    if (CGM.getLangOpts().OpenMP && FD->hasAttr<OMPDeclareSimdDeclAttr>())
+      CGM.getOpenMPRuntime().emitDeclareSimdFunction(FD, Fn);
   }
 
+  // Add no-jump-tables value.
+  Fn->addFnAttr("no-jump-tables",
+                llvm::toStringRef(CGM.getCodeGenOpts().NoUseJumpTables));
+
   if (getLangOpts().OpenCL) {
     // Add metadata for a kernel function.
     if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D))
@@ -745,9 +785,7 @@ void CodeGenFunction::StartFunction(GlobalDecl GD,
   // later.  Don't create this with the builder, because we don't want it
   // folded.
   llvm::Value *Undef = llvm::UndefValue::get(Int32Ty);
-  AllocaInsertPt = new llvm::BitCastInst(Undef, Int32Ty, "", EntryBB);
-  if (Builder.isNamePreserving())
-    AllocaInsertPt->setName("allocapt");
+  AllocaInsertPt = new llvm::BitCastInst(Undef, Int32Ty, "allocapt", EntryBB);
 
   ReturnBlock = getJumpDestInCurrentScope("return");
 
@@ -755,15 +793,18 @@ void CodeGenFunction::StartFunction(GlobalDecl GD,
 
   // Emit subprogram debug descriptor.
   if (CGDebugInfo *DI = getDebugInfo()) {
+    // Reconstruct the type from the argument list so that implicit parameters,
+    // such as 'this' and 'vtt', show up in the debug info. Preserve the calling
+    // convention.
+    CallingConv CC = CallingConv::CC_C;
+    if (auto *FD = dyn_cast_or_null<FunctionDecl>(D))
+      if (const auto *SrcFnTy = FD->getType()->getAs<FunctionType>())
+        CC = SrcFnTy->getCallConv();
     SmallVector<QualType, 16> ArgTypes;
-    for (FunctionArgList::const_iterator i = Args.begin(), e = Args.end();
-	 i != e; ++i) {
-      ArgTypes.push_back((*i)->getType());
-    }
-
-    QualType FnType =
-      getContext().getFunctionType(RetTy, ArgTypes,
-                                   FunctionProtoType::ExtProtoInfo());
+    for (const VarDecl *VD : Args)
+      ArgTypes.push_back(VD->getType());
+    QualType FnType = getContext().getFunctionType(
+        RetTy, ArgTypes, FunctionProtoType::ExtProtoInfo(CC));
     DI->EmitFunctionStart(GD, Loc, StartLoc, FnType, CurFn, Builder);
   }
 
@@ -823,10 +864,22 @@ void CodeGenFunction::StartFunction(GlobalDecl GD,
       MD->getParent()->getCaptureFields(LambdaCaptureFields,
                                         LambdaThisCaptureField);
       if (LambdaThisCaptureField) {
-        // If this lambda captures this, load it.
-        LValue ThisLValue = EmitLValueForLambdaField(LambdaThisCaptureField);
-        CXXThisValue = EmitLoadOfLValue(ThisLValue,
-                                        SourceLocation()).getScalarVal();
+        // If the lambda captures the object referred to by '*this' - either by
+        // value or by reference, make sure CXXThisValue points to the correct
+        // object.
+
+        // Get the lvalue for the field (which is a copy of the enclosing object
+        // or contains the address of the enclosing object).
+        LValue ThisFieldLValue = EmitLValueForLambdaField(LambdaThisCaptureField);
+        if (!LambdaThisCaptureField->getType()->isPointerType()) {
+          // If the enclosing object was captured by value, just use its address.
+          CXXThisValue = ThisFieldLValue.getAddress().getPointer();
+        } else {
+          // Load the lvalue pointed to by the field, since '*this' was captured
+          // by reference.
+          CXXThisValue =
+              EmitLoadOfLValue(ThisFieldLValue, SourceLocation()).getScalarVal();
+        }
       }
       for (auto *FD : MD->getParent()->fields()) {
         if (FD->hasCapturedVLAType()) {
@@ -883,7 +936,7 @@ void CodeGenFunction::EmitFunctionBody(FunctionArgList &Args,
 void CodeGenFunction::EmitBlockWithFallThrough(llvm::BasicBlock *BB,
                                                const Stmt *S) {
   llvm::BasicBlock *SkipCountBB = nullptr;
-  if (HaveInsertPoint() && CGM.getCodeGenOpts().ProfileInstrGenerate) {
+  if (HaveInsertPoint() && CGM.getCodeGenOpts().hasProfileClangInstr()) {
     // When instrumenting for profiling, the fallthrough to certain
     // statements needs to skip over the instrumentation code so that we
     // get an accurate count.
@@ -904,7 +957,7 @@ void CodeGenFunction::EmitBlockWithFallThrough(llvm::BasicBlock *BB,
 static void TryMarkNoThrow(llvm::Function *F) {
   // LLVM treats 'nounwind' on a function as part of the type, so we
   // can't do this on functions that can be overwritten.
-  if (F->mayBeOverridden()) return;
+  if (F->isInterposable()) return;
 
   for (llvm::BasicBlock &BB : *F)
     for (llvm::Instruction &I : BB)
@@ -914,18 +967,11 @@ static void TryMarkNoThrow(llvm::Function *F) {
   F->setDoesNotThrow();
 }
 
-void CodeGenFunction::GenerateCode(GlobalDecl GD, llvm::Function *Fn,
-                                   const CGFunctionInfo &FnInfo) {
+QualType CodeGenFunction::BuildFunctionArgList(GlobalDecl GD,
+                                               FunctionArgList &Args) {
   const FunctionDecl *FD = cast<FunctionDecl>(GD.getDecl());
-
-  // Check if we should generate debug info for this function.
-  if (FD->hasAttr<NoDebugAttr>())
-    DebugInfo = nullptr; // disable debug info indefinitely for this function
-
-  FunctionArgList Args;
   QualType ResTy = FD->getReturnType();
 
-  CurGD = GD;
   const CXXMethodDecl *MD = dyn_cast<CXXMethodDecl>(FD);
   if (MD && MD->isInstance()) {
     if (CGM.getCXXABI().HasThisReturn(GD))
@@ -935,22 +981,48 @@ void CodeGenFunction::GenerateCode(GlobalDecl GD, llvm::Function *Fn,
     CGM.getCXXABI().buildThisParam(*this, Args);
   }
 
-  for (auto *Param : FD->params()) {
-    Args.push_back(Param);
-    if (!Param->hasAttr<PassObjectSizeAttr>())
-      continue;
-
-    IdentifierInfo *NoID = nullptr;
-    auto *Implicit = ImplicitParamDecl::Create(
-        getContext(), Param->getDeclContext(), Param->getLocation(), NoID,
-        getContext().getSizeType());
-    SizeArguments[Param] = Implicit;
-    Args.push_back(Implicit);
+  // The base version of an inheriting constructor whose constructed base is a
+  // virtual base is not passed any arguments (because it doesn't actually call
+  // the inherited constructor).
+  bool PassedParams = true;
+  if (const CXXConstructorDecl *CD = dyn_cast<CXXConstructorDecl>(FD))
+    if (auto Inherited = CD->getInheritedConstructor())
+      PassedParams =
+          getTypes().inheritingCtorHasParams(Inherited, GD.getCtorType());
+
+  if (PassedParams) {
+    for (auto *Param : FD->parameters()) {
+      Args.push_back(Param);
+      if (!Param->hasAttr<PassObjectSizeAttr>())
+        continue;
+
+      IdentifierInfo *NoID = nullptr;
+      auto *Implicit = ImplicitParamDecl::Create(
+          getContext(), Param->getDeclContext(), Param->getLocation(), NoID,
+          getContext().getSizeType());
+      SizeArguments[Param] = Implicit;
+      Args.push_back(Implicit);
+    }
   }
 
   if (MD && (isa<CXXConstructorDecl>(MD) || isa<CXXDestructorDecl>(MD)))
     CGM.getCXXABI().addImplicitStructorParams(*this, ResTy, Args);
 
+  return ResTy;
+}
+
+void CodeGenFunction::GenerateCode(GlobalDecl GD, llvm::Function *Fn,
+                                   const CGFunctionInfo &FnInfo) {
+  const FunctionDecl *FD = cast<FunctionDecl>(GD.getDecl());
+  CurGD = GD;
+
+  FunctionArgList Args;
+  QualType ResTy = BuildFunctionArgList(GD, Args);
+
+  // Check if we should generate debug info for this function.
+  if (FD->hasAttr<NoDebugAttr>())
+    DebugInfo = nullptr; // disable debug info indefinitely for this function
+
   SourceRange BodyRange;
   if (Stmt *Body = FD->getBody()) BodyRange = Body->getSourceRange();
   CurEHLocation = BodyRange.getEnd();
@@ -1088,14 +1160,37 @@ bool CodeGenFunction::containsBreak(const Stmt *S) {
   return false;
 }
 
+bool CodeGenFunction::mightAddDeclToScope(const Stmt *S) {
+  if (!S) return false;
+
+  // Some statement kinds add a scope and thus never add a decl to the current
+  // scope. Note, this list is longer than the list of statements that might
+  // have an unscoped decl nested within them, but this way is conservatively
+  // correct even if more statement kinds are added.
+  if (isa<IfStmt>(S) || isa<SwitchStmt>(S) || isa<WhileStmt>(S) ||
+      isa<DoStmt>(S) || isa<ForStmt>(S) || isa<CompoundStmt>(S) ||
+      isa<CXXForRangeStmt>(S) || isa<CXXTryStmt>(S) ||
+      isa<ObjCForCollectionStmt>(S) || isa<ObjCAtTryStmt>(S))
+    return false;
+
+  if (isa<DeclStmt>(S))
+    return true;
+
+  for (const Stmt *SubStmt : S->children())
+    if (mightAddDeclToScope(SubStmt))
+      return true;
+
+  return false;
+}
 
 /// ConstantFoldsToSimpleInteger - If the specified expression does not fold
 /// to a constant, or if it does but contains a label, return false.  If it
 /// constant folds return true and set the boolean result in Result.
 bool CodeGenFunction::ConstantFoldsToSimpleInteger(const Expr *Cond,
-                                                   bool &ResultBool) {
+                                                   bool &ResultBool,
+                                                   bool AllowLabels) {
   llvm::APSInt ResultInt;
-  if (!ConstantFoldsToSimpleInteger(Cond, ResultInt))
+  if (!ConstantFoldsToSimpleInteger(Cond, ResultInt, AllowLabels))
     return false;
 
   ResultBool = ResultInt.getBoolValue();
@@ -1105,15 +1200,16 @@ bool CodeGenFunction::ConstantFoldsToSimpleInteger(const Expr *Cond,
 /// ConstantFoldsToSimpleInteger - If the specified expression does not fold
 /// to a constant, or if it does but contains a label, return false.  If it
 /// constant folds return true and set the folded value.
-bool CodeGenFunction::
-ConstantFoldsToSimpleInteger(const Expr *Cond, llvm::APSInt &ResultInt) {
+bool CodeGenFunction::ConstantFoldsToSimpleInteger(const Expr *Cond,
+                                                   llvm::APSInt &ResultInt,
+                                                   bool AllowLabels) {
   // FIXME: Rename and handle conversion of other evaluatable things
   // to bool.
   llvm::APSInt Int;
   if (!Cond->EvaluateAsInt(Int, getContext()))
     return false;  // Not foldable, not integer or not fully evaluatable.
 
-  if (CodeGenFunction::ContainsLabel(Cond))
+  if (!AllowLabels && CodeGenFunction::ContainsLabel(Cond))
     return false;  // Contains a label.
 
   ResultInt = Int;
@@ -1297,15 +1393,12 @@ void CodeGenFunction::EmitBranchOnBoolExpr(const Expr *Cond,
   // create metadata that specifies that the branch is unpredictable.
   // Don't bother if not optimizing because that metadata would not be used.
   llvm::MDNode *Unpredictable = nullptr;
-  if (CGM.getCodeGenOpts().OptimizationLevel != 0) {
-    if (const CallExpr *Call = dyn_cast<CallExpr>(Cond)) {
-      const Decl *TargetDecl = Call->getCalleeDecl();
-      if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(TargetDecl)) {
-        if (FD->getBuiltinID() == Builtin::BI__builtin_unpredictable) {
-          llvm::MDBuilder MDHelper(getLLVMContext());
-          Unpredictable = MDHelper.createUnpredictable();
-        }
-      }
+  auto *Call = dyn_cast<CallExpr>(Cond);
+  if (Call && CGM.getCodeGenOpts().OptimizationLevel != 0) {
+    auto *FD = dyn_cast_or_null<FunctionDecl>(Call->getCalleeDecl());
+    if (FD && FD->getBuiltinID() == Builtin::BI__builtin_unpredictable) {
+      llvm::MDBuilder MDHelper(getLLVMContext());
+      Unpredictable = MDHelper.createUnpredictable();
     }
   }
 
@@ -1764,7 +1857,7 @@ void CodeGenFunction::EmitDeclRefExprDbgValue(const DeclRefExpr *E,
                                               llvm::Constant *Init) {
   assert (Init && "Invalid DeclRefExpr initializer!");
   if (CGDebugInfo *Dbg = getDebugInfo())
-    if (CGM.getCodeGenOpts().getDebugInfo() >= CodeGenOptions::LimitedDebugInfo)
+    if (CGM.getCodeGenOpts().getDebugInfo() >= codegenoptions::LimitedDebugInfo)
       Dbg->EmitGlobalVariable(E->getDecl(), Init);
 }
 
@@ -1860,26 +1953,14 @@ void CodeGenFunction::InsertHelper(llvm::Instruction *I,
     CGM.getSanitizerMetadata()->disableSanitizerForInstruction(I);
 }
 
-template <bool PreserveNames>
-void CGBuilderInserter<PreserveNames>::InsertHelper(
+void CGBuilderInserter::InsertHelper(
     llvm::Instruction *I, const llvm::Twine &Name, llvm::BasicBlock *BB,
     llvm::BasicBlock::iterator InsertPt) const {
-  llvm::IRBuilderDefaultInserter<PreserveNames>::InsertHelper(I, Name, BB,
-                                                              InsertPt);
+  llvm::IRBuilderDefaultInserter::InsertHelper(I, Name, BB, InsertPt);
   if (CGF)
     CGF->InsertHelper(I, Name, BB, InsertPt);
 }
 
-#ifdef NDEBUG
-#define PreserveNames false
-#else
-#define PreserveNames true
-#endif
-template void CGBuilderInserter<PreserveNames>::InsertHelper(
-    llvm::Instruction *I, const llvm::Twine &Name, llvm::BasicBlock *BB,
-    llvm::BasicBlock::iterator InsertPt) const;
-#undef PreserveNames
-
 static bool hasRequiredFeatures(const SmallVectorImpl<StringRef> &ReqFeatures,
                                 CodeGenModule &CGM, const FunctionDecl *FD,
                                 std::string &FirstMissing) {
@@ -1956,3 +2037,12 @@ void CodeGenFunction::checkTargetFeatures(const CallExpr *E,
           << FD->getDeclName() << TargetDecl->getDeclName() << MissingFeature;
   }
 }
+
+void CodeGenFunction::EmitSanitizerStatReport(llvm::SanitizerStatKind SSK) {
+  if (!CGM.getCodeGenOpts().SanitizeStats)
+    return;
+
+  llvm::IRBuilder<> IRB(Builder.GetInsertBlock(), Builder.GetInsertPoint());
+  IRB.SetCurrentDebugLocation(Builder.getCurrentDebugLocation());
+  CGM.getSanStats().create(IRB, SSK);
+}
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenFunction.h b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenFunction.h
index 4803b13f58d9..fb19a2657c9c 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenFunction.h
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenFunction.h
@@ -36,6 +36,7 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/IR/ValueHandle.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Transforms/Utils/SanitizerStats.h"
 
 namespace llvm {
 class BasicBlock;
@@ -67,7 +68,6 @@ class ObjCMethodDecl;
 class ObjCImplementationDecl;
 class ObjCPropertyImplDecl;
 class TargetInfo;
-class TargetCodeGenInfo;
 class VarDecl;
 class ObjCForCollectionStmt;
 class ObjCAtTryStmt;
@@ -85,6 +85,9 @@ class BlockByrefHelpers;
 class BlockByrefInfo;
 class BlockFlags;
 class BlockFieldFlags;
+class RegionCodeGenTy;
+class TargetCodeGenInfo;
+struct OMPTaskDataTy;
 
 /// The kind of evaluation to perform on values of a particular
 /// type.  Basically, is the code in CGExprScalar, CGExprComplex, or
@@ -188,6 +191,8 @@ public:
           CXXThisFieldDecl = *Field;
         else if (I->capturesVariable())
           CaptureFields[I->getCapturedVar()] = *Field;
+        else if (I->capturesVariableByCopy())
+          CaptureFields[I->getCapturedVar()] = *Field;
       }
     }
 
@@ -275,6 +280,8 @@ public:
   /// potentially set the return value.
   bool SawAsmBlock;
 
+  const FunctionDecl *CurSEHParent = nullptr;
+
   /// True if the current function is an outlined SEH helper. This can be a
   /// finally block or filter expression.
   bool IsOutlinedSEHHelper;
@@ -295,6 +302,19 @@ public:
 
   llvm::Instruction *CurrentFuncletPad = nullptr;
 
+  class CallLifetimeEnd final : public EHScopeStack::Cleanup {
+    llvm::Value *Addr;
+    llvm::Value *Size;
+
+  public:
+    CallLifetimeEnd(Address addr, llvm::Value *size)
+        : Addr(addr.getPointer()), Size(size) {}
+
+    void Emit(CodeGenFunction &CGF, Flags flags) override {
+      CGF.EmitLifetimeEnd(Size, Addr);
+    }
+  };
+
   /// Header for data within LifetimeExtendedCleanupStack.
   struct LifetimeExtendedCleanupHeader {
     /// The size of the following cleanup object.
@@ -637,6 +657,11 @@ public:
         ForceCleanup();
     }
 
+    /// Checks if the global variable is captured in current function. 
+    bool isGlobalVarCaptured(const VarDecl *VD) const {
+      return !VD->isLocalVarDeclOrParm() && CGF.LocalDeclMap.count(VD) > 0;
+    }
+
   private:
     /// Copy all the entries in the source map over the corresponding
     /// entries in the destination, which must exist.
@@ -940,6 +965,94 @@ private:
   };
   SmallVector<BreakContinue, 8> BreakContinueStack;
 
+  /// Handles cancellation exit points in OpenMP-related constructs.
+  class OpenMPCancelExitStack {
+    /// Tracks cancellation exit point and join point for cancel-related exit
+    /// and normal exit.
+    struct CancelExit {
+      CancelExit() = default;
+      CancelExit(OpenMPDirectiveKind Kind, JumpDest ExitBlock,
+                 JumpDest ContBlock)
+          : Kind(Kind), ExitBlock(ExitBlock), ContBlock(ContBlock) {}
+      OpenMPDirectiveKind Kind = OMPD_unknown;
+      /// true if the exit block has been emitted already by the special
+      /// emitExit() call, false if the default codegen is used.
+      bool HasBeenEmitted = false;
+      JumpDest ExitBlock;
+      JumpDest ContBlock;
+    };
+
+    SmallVector<CancelExit, 8> Stack;
+
+  public:
+    OpenMPCancelExitStack() : Stack(1) {}
+    ~OpenMPCancelExitStack() = default;
+    /// Fetches the exit block for the current OpenMP construct.
+    JumpDest getExitBlock() const { return Stack.back().ExitBlock; }
+    /// Emits exit block with special codegen procedure specific for the related
+    /// OpenMP construct + emits code for normal construct cleanup.
+    void emitExit(CodeGenFunction &CGF, OpenMPDirectiveKind Kind,
+                  const llvm::function_ref<void(CodeGenFunction &)> &CodeGen) {
+      if (Stack.back().Kind == Kind && getExitBlock().isValid()) {
+        assert(CGF.getOMPCancelDestination(Kind).isValid());
+        assert(CGF.HaveInsertPoint());
+        assert(!Stack.back().HasBeenEmitted);
+        auto IP = CGF.Builder.saveAndClearIP();
+        CGF.EmitBlock(Stack.back().ExitBlock.getBlock());
+        CodeGen(CGF);
+        CGF.EmitBranchThroughCleanup(Stack.back().ContBlock);
+        CGF.Builder.restoreIP(IP);
+        Stack.back().HasBeenEmitted = true;
+      }
+      CodeGen(CGF);
+    }
+    /// Enter the cancel supporting \a Kind construct.
+    /// \param Kind OpenMP directive that supports cancel constructs.
+    /// \param HasCancel true, if the construct has inner cancel directive,
+    /// false otherwise.
+    void enter(CodeGenFunction &CGF, OpenMPDirectiveKind Kind, bool HasCancel) {
+      Stack.push_back({Kind,
+                       HasCancel ? CGF.getJumpDestInCurrentScope("cancel.exit")
+                                 : JumpDest(),
+                       HasCancel ? CGF.getJumpDestInCurrentScope("cancel.cont")
+                                 : JumpDest()});
+    }
+    /// Emits default exit point for the cancel construct (if the special one
+    /// has not be used) + join point for cancel/normal exits.
+    void exit(CodeGenFunction &CGF) {
+      if (getExitBlock().isValid()) {
+        assert(CGF.getOMPCancelDestination(Stack.back().Kind).isValid());
+        bool HaveIP = CGF.HaveInsertPoint();
+        if (!Stack.back().HasBeenEmitted) {
+          if (HaveIP)
+            CGF.EmitBranchThroughCleanup(Stack.back().ContBlock);
+          CGF.EmitBlock(Stack.back().ExitBlock.getBlock());
+          CGF.EmitBranchThroughCleanup(Stack.back().ContBlock);
+        }
+        CGF.EmitBlock(Stack.back().ContBlock.getBlock());
+        if (!HaveIP) {
+          CGF.Builder.CreateUnreachable();
+          CGF.Builder.ClearInsertionPoint();
+        }
+      }
+      Stack.pop_back();
+    }
+  };
+  OpenMPCancelExitStack OMPCancelStack;
+
+  /// Controls insertion of cancellation exit blocks in worksharing constructs.
+  class OMPCancelStackRAII {
+    CodeGenFunction &CGF;
+
+  public:
+    OMPCancelStackRAII(CodeGenFunction &CGF, OpenMPDirectiveKind Kind,
+                       bool HasCancel)
+        : CGF(CGF) {
+      CGF.OMPCancelStack.enter(CGF, Kind, HasCancel);
+    }
+    ~OMPCancelStackRAII() { CGF.OMPCancelStack.exit(CGF); }
+  };
+
   CodeGenPGO PGO;
 
   /// Calculate branch weights appropriate for PGO data
@@ -951,7 +1064,7 @@ private:
 public:
   /// Increment the profiler's counter for the given statement.
   void incrementProfileCounter(const Stmt *S) {
-    if (CGM.getCodeGenOpts().ProfileInstrGenerate)
+    if (CGM.getCodeGenOpts().hasProfileClangInstr())
       PGO.emitCounterIncrement(Builder, S);
     PGO.setCurrentStmt(S);
   }
@@ -1053,6 +1166,61 @@ public:
     CharUnits OldCXXThisAlignment;
   };
 
+  class InlinedInheritingConstructorScope {
+  public:
+    InlinedInheritingConstructorScope(CodeGenFunction &CGF, GlobalDecl GD)
+        : CGF(CGF), OldCurGD(CGF.CurGD), OldCurFuncDecl(CGF.CurFuncDecl),
+          OldCurCodeDecl(CGF.CurCodeDecl),
+          OldCXXABIThisDecl(CGF.CXXABIThisDecl),
+          OldCXXABIThisValue(CGF.CXXABIThisValue),
+          OldCXXThisValue(CGF.CXXThisValue),
+          OldCXXABIThisAlignment(CGF.CXXABIThisAlignment),
+          OldCXXThisAlignment(CGF.CXXThisAlignment),
+          OldReturnValue(CGF.ReturnValue), OldFnRetTy(CGF.FnRetTy),
+          OldCXXInheritedCtorInitExprArgs(
+              std::move(CGF.CXXInheritedCtorInitExprArgs)) {
+      CGF.CurGD = GD;
+      CGF.CurFuncDecl = CGF.CurCodeDecl =
+          cast<CXXConstructorDecl>(GD.getDecl());
+      CGF.CXXABIThisDecl = nullptr;
+      CGF.CXXABIThisValue = nullptr;
+      CGF.CXXThisValue = nullptr;
+      CGF.CXXABIThisAlignment = CharUnits();
+      CGF.CXXThisAlignment = CharUnits();
+      CGF.ReturnValue = Address::invalid();
+      CGF.FnRetTy = QualType();
+      CGF.CXXInheritedCtorInitExprArgs.clear();
+    }
+    ~InlinedInheritingConstructorScope() {
+      CGF.CurGD = OldCurGD;
+      CGF.CurFuncDecl = OldCurFuncDecl;
+      CGF.CurCodeDecl = OldCurCodeDecl;
+      CGF.CXXABIThisDecl = OldCXXABIThisDecl;
+      CGF.CXXABIThisValue = OldCXXABIThisValue;
+      CGF.CXXThisValue = OldCXXThisValue;
+      CGF.CXXABIThisAlignment = OldCXXABIThisAlignment;
+      CGF.CXXThisAlignment = OldCXXThisAlignment;
+      CGF.ReturnValue = OldReturnValue;
+      CGF.FnRetTy = OldFnRetTy;
+      CGF.CXXInheritedCtorInitExprArgs =
+          std::move(OldCXXInheritedCtorInitExprArgs);
+    }
+
+  private:
+    CodeGenFunction &CGF;
+    GlobalDecl OldCurGD;
+    const Decl *OldCurFuncDecl;
+    const Decl *OldCurCodeDecl;
+    ImplicitParamDecl *OldCXXABIThisDecl;
+    llvm::Value *OldCXXABIThisValue;
+    llvm::Value *OldCXXThisValue;
+    CharUnits OldCXXABIThisAlignment;
+    CharUnits OldCXXThisAlignment;
+    Address OldReturnValue;
+    QualType OldFnRetTy;
+    CallArgList OldCXXInheritedCtorInitExprArgs;
+  };
+
 private:
   /// CXXThisDecl - When generating code for a C++ member function,
   /// this will hold the implicit 'this' declaration.
@@ -1066,6 +1234,10 @@ private:
   /// this expression.
   Address CXXDefaultInitExprThis = Address::invalid();
 
+  /// The values of function arguments to use when evaluating
+  /// CXXInheritedCtorInitExprs within this context.
+  CallArgList CXXInheritedCtorInitExprArgs;
+
   /// CXXStructorImplicitParamDecl - When generating code for a constructor or
   /// destructor, this will hold the implicit argument (e.g. VTT).
   ImplicitParamDecl *CXXStructorImplicitParamDecl;
@@ -1149,10 +1321,7 @@ public:
     return getInvokeDestImpl();
   }
 
-  bool currentFunctionUsesSEHTry() const {
-    const auto *FD = dyn_cast_or_null<FunctionDecl>(CurCodeDecl);
-    return FD && FD->usesSEHTry();
-  }
+  bool currentFunctionUsesSEHTry() const { return CurSEHParent != nullptr; }
 
   const TargetInfo &getTarget() const { return Target; }
   llvm::LLVMContext &getLLVMContext() { return CGM.getLLVMContext(); }
@@ -1292,6 +1461,8 @@ public:
 
   const BlockByrefInfo &getBlockByrefInfo(const VarDecl *var);
 
+  QualType BuildFunctionArgList(GlobalDecl GD, FunctionArgList &Args);
+
   void GenerateCode(GlobalDecl GD, llvm::Function *Fn,
                     const CGFunctionInfo &FnInfo);
   /// \brief Emit code for the start of a function.
@@ -1388,6 +1559,7 @@ public:
     CFITCK_NVCall,
     CFITCK_DerivedCast,
     CFITCK_UnrelatedCast,
+    CFITCK_ICall,
   };
 
   /// \brief Derived is the presumed address of an object of type T after a
@@ -1399,14 +1571,29 @@ public:
 
   /// EmitVTablePtrCheckForCall - Virtual method MD is being called via VTable.
   /// If vptr CFI is enabled, emit a check that VTable is valid.
-  void EmitVTablePtrCheckForCall(const CXXMethodDecl *MD, llvm::Value *VTable,
+  void EmitVTablePtrCheckForCall(const CXXRecordDecl *RD, llvm::Value *VTable,
                                  CFITypeCheckKind TCK, SourceLocation Loc);
 
   /// EmitVTablePtrCheck - Emit a check that VTable is a valid virtual table for
-  /// RD using llvm.bitset.test.
+  /// RD using llvm.type.test.
   void EmitVTablePtrCheck(const CXXRecordDecl *RD, llvm::Value *VTable,
                           CFITypeCheckKind TCK, SourceLocation Loc);
 
+  /// If whole-program virtual table optimization is enabled, emit an assumption
+  /// that VTable is a member of RD's type identifier. Or, if vptr CFI is
+  /// enabled, emit a check that VTable is a member of RD's type identifier.
+  void EmitTypeMetadataCodeForVCall(const CXXRecordDecl *RD,
+                                    llvm::Value *VTable, SourceLocation Loc);
+
+  /// Returns whether we should perform a type checked load when loading a
+  /// virtual function for virtual calls to members of RD. This is generally
+  /// true when both vcall CFI and whole-program-vtables are enabled.
+  bool ShouldEmitVTableTypeCheckedLoad(const CXXRecordDecl *RD);
+
+  /// Emit a type checked load from the given vtable.
+  llvm::Value *EmitVTableTypeCheckedLoad(const CXXRecordDecl *RD, llvm::Value *VTable,
+                                         uint64_t VTableByteOffset);
+
   /// CanDevirtualizeMemberFunctionCalls - Checks whether virtual calls on given
   /// expr can be devirtualized.
   bool CanDevirtualizeMemberFunctionCall(const Expr *Base,
@@ -1422,6 +1609,10 @@ public:
   /// instrumented with __cyg_profile_func_* calls
   bool ShouldInstrumentFunction();
 
+  /// ShouldXRayInstrument - Return true if the current function should be
+  /// instrumented with XRay nop sleds.
+  bool ShouldXRayInstrumentFunction() const;
+
   /// EmitFunctionInstrumentation - Emit LLVM code to call the specified
   /// instrumentation function with the current function and the call site, if
   /// function instrumentation is enabled.
@@ -1572,6 +1763,10 @@ public:
                               AlignmentSource *Source = nullptr);
   LValue EmitLoadOfReferenceLValue(Address Ref, const ReferenceType *RefTy);
 
+  Address EmitLoadOfPointer(Address Ptr, const PointerType *PtrTy,
+                            AlignmentSource *Source = nullptr);
+  LValue EmitLoadOfPointerLValue(Address Ptr, const PointerType *PtrTy);
+
   /// CreateTempAlloca - This creates a alloca and inserts it into the entry
   /// block. The caller is responsible for setting an appropriate alignment on
   /// the alloca.
@@ -1845,10 +2040,32 @@ public:
   void EmitDelegatingCXXConstructorCall(const CXXConstructorDecl *Ctor,
                                         const FunctionArgList &Args);
 
+  /// Emit a call to an inheriting constructor (that is, one that invokes a
+  /// constructor inherited from a base class) by inlining its definition. This
+  /// is necessary if the ABI does not support forwarding the arguments to the
+  /// base class constructor (because they're variadic or similar).
+  void EmitInlinedInheritingCXXConstructorCall(const CXXConstructorDecl *Ctor,
+                                               CXXCtorType CtorType,
+                                               bool ForVirtualBase,
+                                               bool Delegating,
+                                               CallArgList &Args);
+
+  /// Emit a call to a constructor inherited from a base class, passing the
+  /// current constructor's arguments along unmodified (without even making
+  /// a copy).
+  void EmitInheritedCXXConstructorCall(const CXXConstructorDecl *D,
+                                       bool ForVirtualBase, Address This,
+                                       bool InheritedFromVBase,
+                                       const CXXInheritedCtorInitExpr *E);
+
   void EmitCXXConstructorCall(const CXXConstructorDecl *D, CXXCtorType Type,
                               bool ForVirtualBase, bool Delegating,
                               Address This, const CXXConstructExpr *E);
 
+  void EmitCXXConstructorCall(const CXXConstructorDecl *D, CXXCtorType Type,
+                              bool ForVirtualBase, bool Delegating,
+                              Address This, CallArgList &Args);
+
   /// Emit assumption load for all bases. Requires to be be called only on
   /// most-derived class and not under construction of the object.
   void EmitVTableAssumptionLoads(const CXXRecordDecl *ClassDecl, Address This);
@@ -1861,7 +2078,7 @@ public:
                                       const CXXConstructExpr *E);
 
   void EmitCXXAggrConstructorCall(const CXXConstructorDecl *D,
-                                  const ConstantArrayType *ArrayTy,
+                                  const ArrayType *ArrayTy,
                                   Address ArrayPtr,
                                   const CXXConstructExpr *E,
                                   bool ZeroInitialization = false);
@@ -2204,6 +2421,8 @@ public:
   void EmitCXXForRangeStmt(const CXXForRangeStmt &S,
                            ArrayRef<const Attr *> Attrs = None);
 
+  /// Returns calculated size of the specified type.
+  llvm::Value *getTypeSize(QualType Ty);
   LValue InitCapturedStruct(const CapturedStmt &S);
   llvm::Function *EmitCapturedStmt(const CapturedStmt &S, CapturedRegionKind K);
   llvm::Function *GenerateCapturedStmtFunction(const CapturedStmt &S);
@@ -2294,7 +2513,17 @@ public:
   /// it is the last iteration of the loop code in associated directive, or to
   /// 'i1 false' otherwise. If this item is nullptr, no final check is required.
   void EmitOMPLastprivateClauseFinal(const OMPExecutableDirective &D,
+                                     bool NoFinals,
                                      llvm::Value *IsLastIterCond = nullptr);
+  /// Emit initial code for linear clauses.
+  void EmitOMPLinearClause(const OMPLoopDirective &D,
+                           CodeGenFunction::OMPPrivateScope &PrivateScope);
+  /// Emit final code for linear clauses.
+  /// \param CondGen Optional conditional code for final part of codegen for
+  /// linear clause.
+  void EmitOMPLinearClauseFinal(
+      const OMPLoopDirective &D,
+      const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen);
   /// \brief Emit initial code for reduction variables. Creates reduction copies
   /// and initializes them with the values according to OpenMP standard.
   ///
@@ -2315,6 +2544,14 @@ public:
   /// \param D Directive (possibly) with the 'linear' clause.
   void EmitOMPLinearClauseInit(const OMPLoopDirective &D);
 
+  typedef const llvm::function_ref<void(CodeGenFunction & /*CGF*/,
+                                        llvm::Value * /*OutlinedFn*/,
+                                        const OMPTaskDataTy & /*Data*/)>
+      TaskGenTy;
+  void EmitOMPTaskBasedDirective(const OMPExecutableDirective &S,
+                                 const RegionCodeGenTy &BodyGen,
+                                 const TaskGenTy &TaskGen, OMPTaskDataTy &Data);
+
   void EmitOMPParallelDirective(const OMPParallelDirective &S);
   void EmitOMPSimdDirective(const OMPSimdDirective &S);
   void EmitOMPForDirective(const OMPForDirective &S);
@@ -2337,14 +2574,36 @@ public:
   void EmitOMPAtomicDirective(const OMPAtomicDirective &S);
   void EmitOMPTargetDirective(const OMPTargetDirective &S);
   void EmitOMPTargetDataDirective(const OMPTargetDataDirective &S);
+  void EmitOMPTargetEnterDataDirective(const OMPTargetEnterDataDirective &S);
+  void EmitOMPTargetExitDataDirective(const OMPTargetExitDataDirective &S);
+  void EmitOMPTargetUpdateDirective(const OMPTargetUpdateDirective &S);
+  void EmitOMPTargetParallelDirective(const OMPTargetParallelDirective &S);
+  void
+  EmitOMPTargetParallelForDirective(const OMPTargetParallelForDirective &S);
   void EmitOMPTeamsDirective(const OMPTeamsDirective &S);
   void
   EmitOMPCancellationPointDirective(const OMPCancellationPointDirective &S);
   void EmitOMPCancelDirective(const OMPCancelDirective &S);
+  void EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S);
   void EmitOMPTaskLoopDirective(const OMPTaskLoopDirective &S);
   void EmitOMPTaskLoopSimdDirective(const OMPTaskLoopSimdDirective &S);
   void EmitOMPDistributeDirective(const OMPDistributeDirective &S);
-
+  void EmitOMPDistributeLoop(const OMPDistributeDirective &S);
+  void EmitOMPDistributeParallelForDirective(
+      const OMPDistributeParallelForDirective &S);
+  void EmitOMPDistributeParallelForSimdDirective(
+      const OMPDistributeParallelForSimdDirective &S);
+  void EmitOMPDistributeSimdDirective(const OMPDistributeSimdDirective &S);
+  void EmitOMPTargetParallelForSimdDirective(
+      const OMPTargetParallelForSimdDirective &S);
+
+  /// Emit outlined function for the target directive.
+  static std::pair<llvm::Function * /*OutlinedFn*/,
+                   llvm::Constant * /*OutlinedFnID*/>
+  EmitOMPTargetDirectiveOutlinedFunction(CodeGenModule &CGM,
+                                         const OMPTargetDirective &S,
+                                         StringRef ParentName,
+                                         bool IsOffloadEntry);
   /// \brief Emit inner loop of the worksharing/simd construct.
   ///
   /// \param S Directive, for which the inner loop must be emitted.
@@ -2362,24 +2621,35 @@ public:
       const llvm::function_ref<void(CodeGenFunction &)> &PostIncGen);
 
   JumpDest getOMPCancelDestination(OpenMPDirectiveKind Kind);
+  /// Emit initial code for loop counters of loop-based directives.
+  void EmitOMPPrivateLoopCounters(const OMPLoopDirective &S,
+                                  OMPPrivateScope &LoopScope);
 
 private:
-
   /// Helpers for the OpenMP loop directives.
   void EmitOMPLoopBody(const OMPLoopDirective &D, JumpDest LoopExit);
   void EmitOMPSimdInit(const OMPLoopDirective &D, bool IsMonotonic = false);
-  void EmitOMPSimdFinal(const OMPLoopDirective &D);
+  void EmitOMPSimdFinal(
+      const OMPLoopDirective &D,
+      const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen);
   /// \brief Emit code for the worksharing loop-based directive.
   /// \return true, if this construct has any lastprivate clause, false -
   /// otherwise.
   bool EmitOMPWorksharingLoop(const OMPLoopDirective &S);
-  void EmitOMPForOuterLoop(OpenMPScheduleClauseKind ScheduleKind,
+  void EmitOMPOuterLoop(bool IsMonotonic, bool DynamicOrOrdered,
+      const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered,
+      Address LB, Address UB, Address ST, Address IL, llvm::Value *Chunk);
+  void EmitOMPForOuterLoop(const OpenMPScheduleTy &ScheduleKind,
                            bool IsMonotonic, const OMPLoopDirective &S,
                            OMPPrivateScope &LoopScope, bool Ordered, Address LB,
                            Address UB, Address ST, Address IL,
                            llvm::Value *Chunk);
+  void EmitOMPDistributeOuterLoop(
+      OpenMPDistScheduleClauseKind ScheduleKind,
+      const OMPDistributeDirective &S, OMPPrivateScope &LoopScope,
+      Address LB, Address UB, Address ST, Address IL, llvm::Value *Chunk);
   /// \brief Emit code for sections directive.
-  OpenMPDirectiveKind EmitSections(const OMPExecutableDirective &S);
+  void EmitSections(const OMPExecutableDirective &S);
 
 public:
 
@@ -2430,7 +2700,6 @@ public:
   void EmitAtomicInit(Expr *E, LValue lvalue);
 
   bool LValueIsSuitableForInlineAtomic(LValue Src);
-  bool typeIsSuitableForInlineAtomic(QualType Ty, bool IsVolatile) const;
 
   RValue EmitAtomicLoad(LValue LV, SourceLocation SL,
                         AggValueSlot Slot = AggValueSlot::ignored());
@@ -2446,8 +2715,10 @@ public:
 
   std::pair<RValue, llvm::Value *> EmitAtomicCompareExchange(
       LValue Obj, RValue Expected, RValue Desired, SourceLocation Loc,
-      llvm::AtomicOrdering Success = llvm::SequentiallyConsistent,
-      llvm::AtomicOrdering Failure = llvm::SequentiallyConsistent,
+      llvm::AtomicOrdering Success =
+          llvm::AtomicOrdering::SequentiallyConsistent,
+      llvm::AtomicOrdering Failure =
+          llvm::AtomicOrdering::SequentiallyConsistent,
       bool IsWeak = false, AggValueSlot Slot = AggValueSlot::ignored());
 
   void EmitAtomicUpdate(LValue LVal, llvm::AtomicOrdering AO,
@@ -2680,11 +2951,10 @@ public:
                               ReturnValueSlot ReturnValue, llvm::Value *This,
                               llvm::Value *ImplicitParam,
                               QualType ImplicitParamTy, const CallExpr *E);
-  RValue EmitCXXStructorCall(const CXXMethodDecl *MD, llvm::Value *Callee,
-                             ReturnValueSlot ReturnValue, llvm::Value *This,
-                             llvm::Value *ImplicitParam,
-                             QualType ImplicitParamTy, const CallExpr *E,
-                             StructorType Type);
+  RValue EmitCXXDestructorCall(const CXXDestructorDecl *DD, llvm::Value *Callee,
+                               llvm::Value *This, llvm::Value *ImplicitParam,
+                               QualType ImplicitParamTy, const CallExpr *E,
+                               StructorType Type);
   RValue EmitCXXMemberCallExpr(const CXXMemberCallExpr *E,
                                ReturnValueSlot ReturnValue);
   RValue EmitCXXMemberOrOperatorMemberCallExpr(const CallExpr *CE,
@@ -2708,6 +2978,8 @@ public:
   RValue EmitCUDAKernelCallExpr(const CUDAKernelCallExpr *E,
                                 ReturnValueSlot ReturnValue);
 
+  RValue EmitCUDADevicePrintfCallExpr(const CallExpr *E,
+                                      ReturnValueSlot ReturnValue);
 
   RValue EmitBuiltinExpr(const FunctionDecl *FD,
                          unsigned BuiltinID, const CallExpr *E,
@@ -2798,19 +3070,25 @@ public:
   llvm::Value *EmitARCAutoreleaseReturnValue(llvm::Value *value);
   llvm::Value *EmitARCRetainAutoreleaseReturnValue(llvm::Value *value);
   llvm::Value *EmitARCRetainAutoreleasedReturnValue(llvm::Value *value);
+  llvm::Value *EmitARCUnsafeClaimAutoreleasedReturnValue(llvm::Value *value);
 
   std::pair<LValue,llvm::Value*>
   EmitARCStoreAutoreleasing(const BinaryOperator *e);
   std::pair<LValue,llvm::Value*>
   EmitARCStoreStrong(const BinaryOperator *e, bool ignored);
+  std::pair<LValue,llvm::Value*>
+  EmitARCStoreUnsafeUnretained(const BinaryOperator *e, bool ignored);
 
   llvm::Value *EmitObjCThrowOperand(const Expr *expr);
   llvm::Value *EmitObjCConsumeObject(QualType T, llvm::Value *Ptr);
   llvm::Value *EmitObjCExtendObjectLifetime(QualType T, llvm::Value *Ptr);
 
   llvm::Value *EmitARCExtendBlockObject(const Expr *expr);
+  llvm::Value *EmitARCReclaimReturnedObject(const Expr *e,
+                                            bool allowUnsafeClaim);
   llvm::Value *EmitARCRetainScalarExpr(const Expr *expr);
   llvm::Value *EmitARCRetainAutoreleaseScalarExpr(const Expr *expr);
+  llvm::Value *EmitARCUnsafeUnretainedScalarExpr(const Expr *expr);
 
   void EmitARCIntrinsicUse(ArrayRef<llvm::Value*> values);
 
@@ -2973,17 +3251,23 @@ public:
   /// If the statement (recursively) contains a switch or loop with a break
   /// inside of it, this is fine.
   static bool containsBreak(const Stmt *S);
+
+  /// Determine if the given statement might introduce a declaration into the
+  /// current scope, by being a (possibly-labelled) DeclStmt.
+  static bool mightAddDeclToScope(const Stmt *S);
   
   /// ConstantFoldsToSimpleInteger - If the specified expression does not fold
   /// to a constant, or if it does but contains a label, return false.  If it
   /// constant folds return true and set the boolean result in Result.
-  bool ConstantFoldsToSimpleInteger(const Expr *Cond, bool &Result);
+  bool ConstantFoldsToSimpleInteger(const Expr *Cond, bool &Result,
+                                    bool AllowLabels = false);
 
   /// ConstantFoldsToSimpleInteger - If the specified expression does not fold
   /// to a constant, or if it does but contains a label, return false.  If it
   /// constant folds return true and set the folded value.
-  bool ConstantFoldsToSimpleInteger(const Expr *Cond, llvm::APSInt &Result);
-  
+  bool ConstantFoldsToSimpleInteger(const Expr *Cond, llvm::APSInt &Result,
+                                    bool AllowLabels = false);
+
   /// EmitBranchOnBoolExpr - Emit a branch on a boolean condition (e.g. for an
   /// if statement) to the specified blocks.  Based on the condition, this might
   /// try to simplify the codegen of the conditional based on the branch.
@@ -3013,8 +3297,9 @@ public:
 
   /// \brief Emit a slow path cross-DSO CFI check which calls __cfi_slowpath
   /// if Cond if false.
-  void EmitCfiSlowPathCheck(llvm::Value *Cond, llvm::ConstantInt *TypeId,
-                            llvm::Value *Ptr);
+  void EmitCfiSlowPathCheck(SanitizerMask Kind, llvm::Value *Cond,
+                            llvm::ConstantInt *TypeId, llvm::Value *Ptr,
+                            ArrayRef<llvm::Constant *> StaticArgs);
 
   /// \brief Create a basic block that will call the trap intrinsic, and emit a
   /// conditional branch to it, for the -ftrapv checks.
@@ -3024,6 +3309,9 @@ public:
   /// "trap-func-name" if specified.
   llvm::CallInst *EmitTrapCall(llvm::Intrinsic::ID IntrID);
 
+  /// \brief Emit a cross-DSO CFI failure handling function.
+  void EmitCfiCheckFail();
+
   /// \brief Create a check for a function parameter that may potentially be
   /// declared as non-null.
   void EmitNonNullArgCheck(RValue RV, QualType ArgType, SourceLocation ArgLoc,
@@ -3062,7 +3350,7 @@ private:
   ///
   /// \param AI - The first function argument of the expansion.
   void ExpandTypeFromArgs(QualType Ty, LValue Dst,
-                          SmallVectorImpl<llvm::Argument *>::iterator &AI);
+                          SmallVectorImpl<llvm::Value *>::iterator &AI);
 
   /// ExpandTypeToArgs - Expand an RValue \arg RV, with the LLVM type for \arg
   /// Ty, into individual arguments on the provided vector \arg IRCallArgs,
@@ -3189,6 +3477,8 @@ public:
   Address EmitPointerWithAlignment(const Expr *Addr,
                                    AlignmentSource *Source = nullptr);
 
+  void EmitSanitizerStatReport(llvm::SanitizerStatKind SSK);
+
 private:
   QualType getVarArgType(const Expr *Arg);
 
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenModule.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenModule.cpp
index 97b166278f81..0161cfb611ca 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenModule.cpp
@@ -20,6 +20,7 @@
 #include "CGObjCRuntime.h"
 #include "CGOpenCLRuntime.h"
 #include "CGOpenMPRuntime.h"
+#include "CGOpenMPRuntimeNVPTX.h"
 #include "CodeGenFunction.h"
 #include "CodeGenPGO.h"
 #include "CodeGenTBAA.h"
@@ -86,17 +87,8 @@ CodeGenModule::CodeGenModule(ASTContext &C, const HeaderSearchOptions &HSO,
     : Context(C), LangOpts(C.getLangOpts()), HeaderSearchOpts(HSO),
       PreprocessorOpts(PPO), CodeGenOpts(CGO), TheModule(M), Diags(diags),
       Target(C.getTargetInfo()), ABI(createCXXABI(*this)),
-      VMContext(M.getContext()), TBAA(nullptr), TheTargetCodeGenInfo(nullptr),
-      Types(*this), VTables(*this), ObjCRuntime(nullptr),
-      OpenCLRuntime(nullptr), OpenMPRuntime(nullptr), CUDARuntime(nullptr),
-      DebugInfo(nullptr), ObjCData(nullptr),
-      NoObjCARCExceptionsMetadata(nullptr), PGOReader(nullptr),
-      CFConstantStringClassRef(nullptr), ConstantStringClassRef(nullptr),
-      NSConstantStringType(nullptr), NSConcreteGlobalBlock(nullptr),
-      NSConcreteStackBlock(nullptr), BlockObjectAssign(nullptr),
-      BlockObjectDispose(nullptr), BlockDescriptorType(nullptr),
-      GenericBlockLiteralType(nullptr), LifetimeStartFn(nullptr),
-      LifetimeEndFn(nullptr), SanitizerMD(new SanitizerMetadata(*this)) {
+      VMContext(M.getContext()), Types(*this), VTables(*this),
+      SanitizerMD(new SanitizerMetadata(*this)) {
 
   // Initialize the type cache.
   llvm::LLVMContext &LLVMContext = M.getContext();
@@ -132,29 +124,30 @@ CodeGenModule::CodeGenModule(ASTContext &C, const HeaderSearchOptions &HSO,
   // Enable TBAA unless it's suppressed. ThreadSanitizer needs TBAA even at O0.
   if (LangOpts.Sanitize.has(SanitizerKind::Thread) ||
       (!CodeGenOpts.RelaxedAliasing && CodeGenOpts.OptimizationLevel > 0))
-    TBAA = new CodeGenTBAA(Context, VMContext, CodeGenOpts, getLangOpts(),
-                           getCXXABI().getMangleContext());
+    TBAA.reset(new CodeGenTBAA(Context, VMContext, CodeGenOpts, getLangOpts(),
+                               getCXXABI().getMangleContext()));
 
   // If debug info or coverage generation is enabled, create the CGDebugInfo
   // object.
-  if (CodeGenOpts.getDebugInfo() != CodeGenOptions::NoDebugInfo ||
-      CodeGenOpts.EmitGcovArcs ||
-      CodeGenOpts.EmitGcovNotes)
-    DebugInfo = new CGDebugInfo(*this);
+  if (CodeGenOpts.getDebugInfo() != codegenoptions::NoDebugInfo ||
+      CodeGenOpts.EmitGcovArcs || CodeGenOpts.EmitGcovNotes)
+    DebugInfo.reset(new CGDebugInfo(*this));
 
   Block.GlobalUniqueCount = 0;
 
   if (C.getLangOpts().ObjC1)
-    ObjCData = new ObjCEntrypoints();
+    ObjCData.reset(new ObjCEntrypoints());
 
-  if (!CodeGenOpts.InstrProfileInput.empty()) {
-    auto ReaderOrErr =
-        llvm::IndexedInstrProfReader::create(CodeGenOpts.InstrProfileInput);
-    if (std::error_code EC = ReaderOrErr.getError()) {
+  if (CodeGenOpts.hasProfileClangUse()) {
+    auto ReaderOrErr = llvm::IndexedInstrProfReader::create(
+        CodeGenOpts.ProfileInstrumentUsePath);
+    if (auto E = ReaderOrErr.takeError()) {
       unsigned DiagID = Diags.getCustomDiagID(DiagnosticsEngine::Error,
                                               "Could not read profile %0: %1");
-      getDiags().Report(DiagID) << CodeGenOpts.InstrProfileInput
-                                << EC.message();
+      llvm::handleAllErrors(std::move(E), [&](const llvm::ErrorInfoBase &EI) {
+        getDiags().Report(DiagID) << CodeGenOpts.ProfileInstrumentUsePath
+                                  << EI.message();
+      });
     } else
       PGOReader = std::move(ReaderOrErr.get());
   }
@@ -165,16 +158,7 @@ CodeGenModule::CodeGenModule(ASTContext &C, const HeaderSearchOptions &HSO,
     CoverageMapping.reset(new CoverageMappingModuleGen(*this, *CoverageInfo));
 }
 
-CodeGenModule::~CodeGenModule() {
-  delete ObjCRuntime;
-  delete OpenCLRuntime;
-  delete OpenMPRuntime;
-  delete CUDARuntime;
-  delete TheTargetCodeGenInfo;
-  delete TBAA;
-  delete DebugInfo;
-  delete ObjCData;
-}
+CodeGenModule::~CodeGenModule() {}
 
 void CodeGenModule::createObjCRuntime() {
   // This is just isGNUFamily(), but we want to force implementors of
@@ -183,29 +167,42 @@ void CodeGenModule::createObjCRuntime() {
   case ObjCRuntime::GNUstep:
   case ObjCRuntime::GCC:
   case ObjCRuntime::ObjFW:
-    ObjCRuntime = CreateGNUObjCRuntime(*this);
+    ObjCRuntime.reset(CreateGNUObjCRuntime(*this));
     return;
 
   case ObjCRuntime::FragileMacOSX:
   case ObjCRuntime::MacOSX:
   case ObjCRuntime::iOS:
   case ObjCRuntime::WatchOS:
-    ObjCRuntime = CreateMacObjCRuntime(*this);
+    ObjCRuntime.reset(CreateMacObjCRuntime(*this));
     return;
   }
   llvm_unreachable("bad runtime kind");
 }
 
 void CodeGenModule::createOpenCLRuntime() {
-  OpenCLRuntime = new CGOpenCLRuntime(*this);
+  OpenCLRuntime.reset(new CGOpenCLRuntime(*this));
 }
 
 void CodeGenModule::createOpenMPRuntime() {
-  OpenMPRuntime = new CGOpenMPRuntime(*this);
+  // Select a specialized code generation class based on the target, if any.
+  // If it does not exist use the default implementation.
+  switch (getTarget().getTriple().getArch()) {
+
+  case llvm::Triple::nvptx:
+  case llvm::Triple::nvptx64:
+    assert(getLangOpts().OpenMPIsDevice &&
+           "OpenMP NVPTX is only prepared to deal with device code.");
+    OpenMPRuntime.reset(new CGOpenMPRuntimeNVPTX(*this));
+    break;
+  default:
+    OpenMPRuntime.reset(new CGOpenMPRuntime(*this));
+    break;
+  }
 }
 
 void CodeGenModule::createCUDARuntime() {
-  CUDARuntime = CreateNVCUDARuntime(*this);
+  CUDARuntime.reset(CreateNVCUDARuntime(*this));
 }
 
 void CodeGenModule::addReplacement(StringRef Name, llvm::Constant *C) {
@@ -259,20 +256,21 @@ void CodeGenModule::applyGlobalValReplacements() {
 
 // This is only used in aliases that we created and we know they have a
 // linear structure.
-static const llvm::GlobalObject *getAliasedGlobal(const llvm::GlobalAlias &GA) {
-  llvm::SmallPtrSet<const llvm::GlobalAlias*, 4> Visited;
-  const llvm::Constant *C = &GA;
+static const llvm::GlobalObject *getAliasedGlobal(
+    const llvm::GlobalIndirectSymbol &GIS) {
+  llvm::SmallPtrSet<const llvm::GlobalIndirectSymbol*, 4> Visited;
+  const llvm::Constant *C = &GIS;
   for (;;) {
     C = C->stripPointerCasts();
     if (auto *GO = dyn_cast<llvm::GlobalObject>(C))
       return GO;
     // stripPointerCasts will not walk over weak aliases.
-    auto *GA2 = dyn_cast<llvm::GlobalAlias>(C);
-    if (!GA2)
+    auto *GIS2 = dyn_cast<llvm::GlobalIndirectSymbol>(C);
+    if (!GIS2)
       return nullptr;
-    if (!Visited.insert(GA2).second)
+    if (!Visited.insert(GIS2).second)
       return nullptr;
-    C = GA2->getAliasee();
+    C = GIS2->getIndirectSymbol();
   }
 }
 
@@ -284,20 +282,35 @@ void CodeGenModule::checkAliases() {
   DiagnosticsEngine &Diags = getDiags();
   for (const GlobalDecl &GD : Aliases) {
     const auto *D = cast<ValueDecl>(GD.getDecl());
-    const AliasAttr *AA = D->getAttr<AliasAttr>();
+    SourceLocation Location;
+    bool IsIFunc = D->hasAttr<IFuncAttr>();
+    if (const Attr *A = D->getDefiningAttr())
+      Location = A->getLocation();
+    else
+      llvm_unreachable("Not an alias or ifunc?");
     StringRef MangledName = getMangledName(GD);
     llvm::GlobalValue *Entry = GetGlobalValue(MangledName);
-    auto *Alias = cast<llvm::GlobalAlias>(Entry);
+    auto *Alias  = cast<llvm::GlobalIndirectSymbol>(Entry);
     const llvm::GlobalValue *GV = getAliasedGlobal(*Alias);
     if (!GV) {
       Error = true;
-      Diags.Report(AA->getLocation(), diag::err_cyclic_alias);
+      Diags.Report(Location, diag::err_cyclic_alias) << IsIFunc;
     } else if (GV->isDeclaration()) {
       Error = true;
-      Diags.Report(AA->getLocation(), diag::err_alias_to_undefined);
+      Diags.Report(Location, diag::err_alias_to_undefined)
+          << IsIFunc << IsIFunc;
+    } else if (IsIFunc) {
+      // Check resolver function type.
+      llvm::FunctionType *FTy = dyn_cast<llvm::FunctionType>(
+          GV->getType()->getPointerElementType());
+      assert(FTy);
+      if (!FTy->getReturnType()->isPointerTy())
+        Diags.Report(Location, diag::err_ifunc_resolver_return);
+      if (FTy->getNumParams())
+        Diags.Report(Location, diag::err_ifunc_resolver_params);
     }
 
-    llvm::Constant *Aliasee = Alias->getAliasee();
+    llvm::Constant *Aliasee = Alias->getIndirectSymbol();
     llvm::GlobalValue *AliaseeGV;
     if (auto CE = dyn_cast<llvm::ConstantExpr>(Aliasee))
       AliaseeGV = cast<llvm::GlobalValue>(CE->getOperand(0));
@@ -308,7 +321,7 @@ void CodeGenModule::checkAliases() {
       StringRef AliasSection = SA->getName();
       if (AliasSection != AliaseeGV->getSection())
         Diags.Report(SA->getLocation(), diag::warn_alias_with_section)
-            << AliasSection;
+            << AliasSection << IsIFunc << IsIFunc;
     }
 
     // We have to handle alias to weak aliases in here. LLVM itself disallows
@@ -316,13 +329,13 @@ void CodeGenModule::checkAliases() {
     // compatibility with gcc we implement it by just pointing the alias
     // to its aliasee's aliasee. We also warn, since the user is probably
     // expecting the link to be weak.
-    if (auto GA = dyn_cast<llvm::GlobalAlias>(AliaseeGV)) {
-      if (GA->mayBeOverridden()) {
-        Diags.Report(AA->getLocation(), diag::warn_alias_to_weak_alias)
-            << GV->getName() << GA->getName();
+    if (auto GA = dyn_cast<llvm::GlobalIndirectSymbol>(AliaseeGV)) {
+      if (GA->isInterposable()) {
+        Diags.Report(Location, diag::warn_alias_to_weak_alias)
+            << GV->getName() << GA->getName() << IsIFunc;
         Aliasee = llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(
-            GA->getAliasee(), Alias->getType());
-        Alias->setAliasee(Aliasee);
+            GA->getIndirectSymbol(), Alias->getType());
+        Alias->setIndirectSymbol(Aliasee);
       }
     }
   }
@@ -332,7 +345,7 @@ void CodeGenModule::checkAliases() {
   for (const GlobalDecl &GD : Aliases) {
     StringRef MangledName = getMangledName(GD);
     llvm::GlobalValue *Entry = GetGlobalValue(MangledName);
-    auto *Alias = cast<llvm::GlobalAlias>(Entry);
+    auto *Alias = dyn_cast<llvm::GlobalIndirectSymbol>(Entry);
     Alias->replaceAllUsesWith(llvm::UndefValue::get(Alias->getType()));
     Alias->eraseFromParent();
   }
@@ -380,7 +393,7 @@ void CodeGenModule::Release() {
             OpenMPRuntime->emitRegistrationFunction())
       AddGlobalCtor(OpenMPRegistrationFunction, 0);
   if (PGOReader) {
-    getModule().setMaximumFunctionCount(PGOReader->getMaximumFunctionCount());
+    getModule().setProfileSummary(PGOReader->getSummary().getMD(VMContext));
     if (PGOStats.hasDiagnostics())
       PGOStats.reportDiagnostics(getDiags(), getCodeGenOpts().MainFileName);
   }
@@ -391,7 +404,11 @@ void CodeGenModule::Release() {
   EmitDeferredUnusedCoverageMappings();
   if (CoverageMapping)
     CoverageMapping->emit();
+  if (CodeGenOpts.SanitizeCfiCrossDso)
+    CodeGenFunction(*this).EmitCfiCheckFail();
   emitLLVMUsed();
+  if (SanStats)
+    SanStats->finish();
 
   if (CodeGenOpts.Autolink &&
       (Context.getLangOpts().Modules || !LinkerOptionsMetadata.empty())) {
@@ -452,16 +469,19 @@ void CodeGenModule::Release() {
     getModule().addModuleFlag(llvm::Module::Override, "Cross-DSO CFI", 1);
   }
 
-  if (uint32_t PLevel = Context.getLangOpts().PICLevel) {
-    llvm::PICLevel::Level PL = llvm::PICLevel::Default;
-    switch (PLevel) {
-    case 0: break;
-    case 1: PL = llvm::PICLevel::Small; break;
-    case 2: PL = llvm::PICLevel::Large; break;
-    default: llvm_unreachable("Invalid PIC Level");
-    }
+  if (LangOpts.CUDAIsDevice && getTarget().getTriple().isNVPTX()) {
+    // Indicate whether __nvvm_reflect should be configured to flush denormal
+    // floating point values to 0.  (This corresponds to its "__CUDA_FTZ"
+    // property.)
+    getModule().addModuleFlag(llvm::Module::Override, "nvvm-reflect-ftz",
+                              LangOpts.CUDADeviceFlushDenormalsToZero ? 1 : 0);
+  }
 
-    getModule().setPICLevel(PL);
+  if (uint32_t PLevel = Context.getLangOpts().PICLevel) {
+    assert(PLevel < 3 && "Invalid PIC Level");
+    getModule().setPICLevel(static_cast<llvm::PICLevel::Level>(PLevel));
+    if (Context.getLangOpts().PIE)
+      getModule().setPIELevel(static_cast<llvm::PIELevel::Level>(PLevel));
   }
 
   SimplifyPersonality();
@@ -485,6 +505,11 @@ void CodeGenModule::UpdateCompletedType(const TagDecl *TD) {
   Types.UpdateCompletedType(TD);
 }
 
+void CodeGenModule::RefreshTypeCacheForClass(const CXXRecordDecl *RD) {
+  // Make sure that this type is translated.
+  Types.RefreshTypeCacheForClass(RD);
+}
+
 llvm::MDNode *CodeGenModule::getTBAAInfo(QualType QTy) {
   if (!TBAA)
     return nullptr;
@@ -740,6 +765,15 @@ CodeGenModule::getFunctionLinkage(GlobalDecl GD) {
                                    : llvm::GlobalValue::LinkOnceODRLinkage;
   }
 
+  if (isa<CXXConstructorDecl>(D) &&
+      cast<CXXConstructorDecl>(D)->isInheritingConstructor() &&
+      Context.getTargetInfo().getCXXABI().isMicrosoft()) {
+    // Our approach to inheriting constructors is fundamentally different from
+    // that used by the MS ABI, so keep our inheriting constructor thunks
+    // internal rather than trying to pick an unambiguous mangling for them.
+    return llvm::GlobalValue::InternalLinkage;
+  }
+
   return getLLVMLinkageForDeclarator(D, Linkage, /*isConstantVariable=*/false);
 }
 
@@ -762,8 +796,7 @@ void CodeGenModule::setFunctionDLLStorageClass(GlobalDecl GD, llvm::Function *F)
     F->setDLLStorageClass(llvm::GlobalVariable::DefaultStorageClass);
 }
 
-llvm::ConstantInt *
-CodeGenModule::CreateCfiIdForTypeMetadata(llvm::Metadata *MD) {
+llvm::ConstantInt *CodeGenModule::CreateCrossDsoCfiTypeId(llvm::Metadata *MD) {
   llvm::MDString *MDS = dyn_cast<llvm::MDString>(MD);
   if (!MDS) return nullptr;
 
@@ -882,12 +915,6 @@ void CodeGenModule::SetLLVMFunctionAttributesForDefinition(const Decl *D,
     F->removeFnAttr(llvm::Attribute::InlineHint);
   }
 
-  if (isa<CXXConstructorDecl>(D) || isa<CXXDestructorDecl>(D))
-    F->setUnnamedAddr(true);
-  else if (const auto *MD = dyn_cast<CXXMethodDecl>(D))
-    if (MD->isVirtual())
-      F->setUnnamedAddr(true);
-
   unsigned alignment = D->getMaxAlignment() / Context.getCharWidth();
   if (alignment)
     F->setAlignment(alignment);
@@ -970,8 +997,8 @@ static void setLinkageAndVisibilityForGV(llvm::GlobalValue *GV,
   }
 }
 
-void CodeGenModule::CreateFunctionBitSetEntry(const FunctionDecl *FD,
-                                              llvm::Function *F) {
+void CodeGenModule::CreateFunctionTypeMetadata(const FunctionDecl *FD,
+                                               llvm::Function *F) {
   // Only if we are checking indirect calls.
   if (!LangOpts.Sanitize.has(SanitizerKind::CFIICall))
     return;
@@ -992,25 +1019,13 @@ void CodeGenModule::CreateFunctionBitSetEntry(const FunctionDecl *FD,
       return;
   }
 
-  llvm::NamedMDNode *BitsetsMD =
-      getModule().getOrInsertNamedMetadata("llvm.bitsets");
-
   llvm::Metadata *MD = CreateMetadataIdentifierForType(FD->getType());
-  llvm::Metadata *BitsetOps[] = {
-      MD, llvm::ConstantAsMetadata::get(F),
-      llvm::ConstantAsMetadata::get(llvm::ConstantInt::get(Int64Ty, 0))};
-  BitsetsMD->addOperand(llvm::MDTuple::get(getLLVMContext(), BitsetOps));
+  F->addTypeMetadata(0, MD);
 
   // Emit a hash-based bit set entry for cross-DSO calls.
-  if (CodeGenOpts.SanitizeCfiCrossDso) {
-    if (auto TypeId = CreateCfiIdForTypeMetadata(MD)) {
-      llvm::Metadata *BitsetOps2[] = {
-          llvm::ConstantAsMetadata::get(TypeId),
-          llvm::ConstantAsMetadata::get(F),
-          llvm::ConstantAsMetadata::get(llvm::ConstantInt::get(Int64Ty, 0))};
-      BitsetsMD->addOperand(llvm::MDTuple::get(getLLVMContext(), BitsetOps2));
-    }
-  }
+  if (CodeGenOpts.SanitizeCfiCrossDso)
+    if (auto CrossDsoTypeId = CreateCrossDsoCfiTypeId(MD))
+      F->addTypeMetadata(0, llvm::ConstantAsMetadata::get(CrossDsoTypeId));
 }
 
 void CodeGenModule::SetFunctionAttributes(GlobalDecl GD, llvm::Function *F,
@@ -1049,13 +1064,29 @@ void CodeGenModule::SetFunctionAttributes(GlobalDecl GD, llvm::Function *F,
   if (const SectionAttr *SA = FD->getAttr<SectionAttr>())
     F->setSection(SA->getName());
 
-  // A replaceable global allocation function does not act like a builtin by
-  // default, only if it is invoked by a new-expression or delete-expression.
-  if (FD->isReplaceableGlobalAllocationFunction())
+  if (FD->isReplaceableGlobalAllocationFunction()) {
+    // A replaceable global allocation function does not act like a builtin by
+    // default, only if it is invoked by a new-expression or delete-expression.
     F->addAttribute(llvm::AttributeSet::FunctionIndex,
                     llvm::Attribute::NoBuiltin);
 
-  CreateFunctionBitSetEntry(FD, F);
+    // A sane operator new returns a non-aliasing pointer.
+    // FIXME: Also add NonNull attribute to the return value
+    // for the non-nothrow forms?
+    auto Kind = FD->getDeclName().getCXXOverloadedOperator();
+    if (getCodeGenOpts().AssumeSaneOperatorNew &&
+        (Kind == OO_New || Kind == OO_Array_New))
+      F->addAttribute(llvm::AttributeSet::ReturnIndex,
+                      llvm::Attribute::NoAlias);
+  }
+
+  if (isa<CXXConstructorDecl>(FD) || isa<CXXDestructorDecl>(FD))
+    F->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
+  else if (const auto *MD = dyn_cast<CXXMethodDecl>(FD))
+    if (MD->isVirtual())
+      F->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
+
+  CreateFunctionTypeMetadata(FD, F);
 }
 
 void CodeGenModule::addUsedGlobal(llvm::GlobalValue *GV) {
@@ -1227,13 +1258,13 @@ void CodeGenModule::EmitDeferred() {
   if (!DeferredVTables.empty()) {
     EmitDeferredVTables();
 
-    // Emitting a v-table doesn't directly cause more v-tables to
+    // Emitting a vtable doesn't directly cause more vtables to
     // become deferred, although it can cause functions to be
-    // emitted that then need those v-tables.
+    // emitted that then need those vtables.
     assert(DeferredVTables.empty());
   }
 
-  // Stop if we're out of both deferred v-tables and deferred declarations.
+  // Stop if we're out of both deferred vtables and deferred declarations.
   if (DeferredDeclsToEmit.empty())
     return;
 
@@ -1244,19 +1275,23 @@ void CodeGenModule::EmitDeferred() {
 
   for (DeferredGlobal &G : CurDeclsToEmit) {
     GlobalDecl D = G.GD;
-    llvm::GlobalValue *GV = G.GV;
     G.GV = nullptr;
 
     // We should call GetAddrOfGlobal with IsForDefinition set to true in order
     // to get GlobalValue with exactly the type we need, not something that
     // might had been created for another decl with the same mangled name but
     // different type.
-    // FIXME: Support for variables is not implemented yet.
-    if (isa<FunctionDecl>(D.getDecl()))
-      GV = cast<llvm::GlobalValue>(GetAddrOfGlobal(D, /*IsForDefinition=*/true));
-    else
-      if (!GV)
-        GV = GetGlobalValue(getMangledName(D));
+    llvm::GlobalValue *GV = dyn_cast<llvm::GlobalValue>(
+        GetAddrOfGlobal(D, /*IsForDefinition=*/true));
+
+    // In case of different address spaces, we may still get a cast, even with
+    // IsForDefinition equal to true. Query mangled names table to get
+    // GlobalValue.
+    if (!GV)
+      GV = GetGlobalValue(getMangledName(D));
+
+    // Make sure GetGlobalValue returned non-null.
+    assert(GV);
 
     // Check to see if we've already emitted this.  This is necessary
     // for a couple of reasons: first, decls can end up in the
@@ -1264,7 +1299,7 @@ void CodeGenModule::EmitDeferred() {
     // up with definitions in unusual ways (e.g. by an extern inline
     // function acquiring a strong function redefinition).  Just
     // ignore these cases.
-    if (GV && !GV->isDeclaration())
+    if (!GV->isDeclaration())
       continue;
 
     // Otherwise, emit the definition and move on to the next one.
@@ -1304,7 +1339,7 @@ llvm::Constant *CodeGenModule::EmitAnnotationString(StringRef Str) {
       new llvm::GlobalVariable(getModule(), s->getType(), true,
                                llvm::GlobalValue::PrivateLinkage, s, ".str");
   gv->setSection(AnnotationSection);
-  gv->setUnnamedAddr(true);
+  gv->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
   AStr = gv;
   return gv;
 }
@@ -1412,6 +1447,12 @@ bool CodeGenModule::MayBeEmittedEagerly(const ValueDecl *Global) {
       // Implicit template instantiations may change linkage if they are later
       // explicitly instantiated, so they should not be emitted eagerly.
       return false;
+  if (const auto *VD = dyn_cast<VarDecl>(Global))
+    if (Context.getInlineVariableDefinitionKind(VD) ==
+        ASTContext::InlineVariableDefinitionKind::WeakUnknown)
+      // A definition of an inline constexpr static data member may change
+      // linkage later if it's redeclared outside the class.
+      return false;
   // If OpenMP is enabled and threadprivates must be generated like TLS, delay
   // codegen for global variables, because they may be marked as threadprivate.
   if (LangOpts.OpenMP && LangOpts.OpenMPUseTLS &&
@@ -1425,12 +1466,12 @@ ConstantAddress CodeGenModule::GetAddrOfUuidDescriptor(
     const CXXUuidofExpr* E) {
   // Sema has verified that IIDSource has a __declspec(uuid()), and that its
   // well-formed.
-  StringRef Uuid = E->getUuidAsStringRef(Context);
+  StringRef Uuid = E->getUuidStr();
   std::string Name = "_GUID_" + Uuid.lower();
   std::replace(Name.begin(), Name.end(), '-', '_');
 
-  // Contains a 32-bit field.
-  CharUnits Alignment = CharUnits::fromQuantity(4);
+  // The UUID descriptor should be pointer aligned.
+  CharUnits Alignment = CharUnits::fromQuantity(PointerAlignInBytes);
 
   // Look for an existing global.
   if (llvm::GlobalVariable *GV = getModule().getNamedGlobal(Name))
@@ -1491,6 +1532,10 @@ void CodeGenModule::EmitGlobal(GlobalDecl GD) {
   if (Global->hasAttr<AliasAttr>())
     return EmitAliasDefinition(GD);
 
+  // IFunc like an alias whose value is resolved at runtime by calling resolver.
+  if (Global->hasAttr<IFuncAttr>())
+    return emitIFuncDefinition(GD);
+
   // If this is CUDA, be selective about which declarations we emit.
   if (LangOpts.CUDA) {
     if (LangOpts.CUDAIsDevice) {
@@ -1500,18 +1545,32 @@ void CodeGenModule::EmitGlobal(GlobalDecl GD) {
           !Global->hasAttr<CUDASharedAttr>())
         return;
     } else {
-      if (!Global->hasAttr<CUDAHostAttr>() && (
-            Global->hasAttr<CUDADeviceAttr>() ||
-            Global->hasAttr<CUDAConstantAttr>() ||
-            Global->hasAttr<CUDASharedAttr>()))
+      // We need to emit host-side 'shadows' for all global
+      // device-side variables because the CUDA runtime needs their
+      // size and host-side address in order to provide access to
+      // their device-side incarnations.
+
+      // So device-only functions are the only things we skip.
+      if (isa<FunctionDecl>(Global) && !Global->hasAttr<CUDAHostAttr>() &&
+          Global->hasAttr<CUDADeviceAttr>())
         return;
+
+      assert((isa<FunctionDecl>(Global) || isa<VarDecl>(Global)) &&
+             "Expected Variable or Function");
     }
   }
 
-  // If this is OpenMP device, check if it is legal to emit this global
-  // normally.
-  if (OpenMPRuntime && OpenMPRuntime->emitTargetGlobal(GD))
-    return;
+  if (LangOpts.OpenMP) {
+    // If this is OpenMP device, check if it is legal to emit this global
+    // normally.
+    if (OpenMPRuntime && OpenMPRuntime->emitTargetGlobal(GD))
+      return;
+    if (auto *DRD = dyn_cast<OMPDeclareReductionDecl>(Global)) {
+      if (MustBeEmitted(Global))
+        EmitOMPDeclareReduction(DRD);
+      return;
+    }
+  }
 
   // Ignore declarations, they will be emitted on their first use.
   if (const auto *FD = dyn_cast<FunctionDecl>(Global)) {
@@ -1533,10 +1592,23 @@ void CodeGenModule::EmitGlobal(GlobalDecl GD) {
   } else {
     const auto *VD = cast<VarDecl>(Global);
     assert(VD->isFileVarDecl() && "Cannot emit local var decl as global.");
-
-    if (VD->isThisDeclarationADefinition() != VarDecl::Definition &&
-        !Context.isMSStaticDataMemberInlineDefinition(VD))
+    // We need to emit device-side global CUDA variables even if a
+    // variable does not have a definition -- we still need to define
+    // host-side shadow for it.
+    bool MustEmitForCuda = LangOpts.CUDA && !LangOpts.CUDAIsDevice &&
+                           !VD->hasDefinition() &&
+                           (VD->hasAttr<CUDAConstantAttr>() ||
+                            VD->hasAttr<CUDADeviceAttr>());
+    if (!MustEmitForCuda &&
+        VD->isThisDeclarationADefinition() != VarDecl::Definition &&
+        !Context.isMSStaticDataMemberInlineDefinition(VD)) {
+      // If this declaration may have caused an inline variable definition to
+      // change linkage, make sure that it's emitted.
+      if (Context.getInlineVariableDefinitionKind(VD) ==
+          ASTContext::InlineVariableDefinitionKind::Strong)
+        GetAddrOfGlobalVar(VD);
       return;
+    }
   }
 
   // Defer code generation to first use when possible, e.g. if this is an inline
@@ -1690,7 +1762,7 @@ void CodeGenModule::CompleteDIClassType(const CXXMethodDecl* D) {
     return;
 
   if (CGDebugInfo *DI = getModuleDebugInfo())
-    if (getCodeGenOpts().getDebugInfo() >= CodeGenOptions::LimitedDebugInfo) {
+    if (getCodeGenOpts().getDebugInfo() >= codegenoptions::LimitedDebugInfo) {
       const auto *ThisPtr = cast<PointerType>(D->getThisType(getContext()));
       DI->getOrCreateRecordType(ThisPtr->getPointeeType(), D->getLocation());
     }
@@ -1730,7 +1802,7 @@ void CodeGenModule::EmitGlobalDefinition(GlobalDecl GD, llvm::GlobalValue *GV) {
   }
 
   if (const auto *VD = dyn_cast<VarDecl>(D))
-    return EmitGlobalVarDefinition(VD);
+    return EmitGlobalVarDefinition(VD, !VD->hasDefinition());
   
   llvm_unreachable("Invalid argument to EmitGlobalDefinition()");
 }
@@ -1771,8 +1843,8 @@ CodeGenModule::GetOrCreateLLVMFunction(StringRef MangledName,
     // error.
     if (IsForDefinition && !Entry->isDeclaration()) {
       GlobalDecl OtherGD;
-      // Check that GD is not yet in ExplicitDefinitions is required to make
-      // sure that we issue an error only once.
+      // Check that GD is not yet in DiagnosedConflictingDefinitions is required
+      // to make sure that we issue an error only once.
       if (lookupRepresentativeDecl(MangledName, OtherGD) &&
           (GD.getCanonicalDecl().getDecl() !=
            OtherGD.getCanonicalDecl().getDecl()) &&
@@ -1982,10 +2054,15 @@ bool CodeGenModule::isTypeConstant(QualType Ty, bool ExcludeCtor) {
 ///
 /// If D is non-null, it specifies a decl that correspond to this.  This is used
 /// to set the attributes on the global when it is first created.
+///
+/// If IsForDefinition is true, it is guranteed that an actual global with
+/// type Ty will be returned, not conversion of a variable with the same
+/// mangled name but some other type.
 llvm::Constant *
 CodeGenModule::GetOrCreateLLVMGlobal(StringRef MangledName,
                                      llvm::PointerType *Ty,
-                                     const VarDecl *D) {
+                                     const VarDecl *D,
+                                     bool IsForDefinition) {
   // Lookup the entry, lazily creating it if necessary.
   llvm::GlobalValue *Entry = GetGlobalValue(MangledName);
   if (Entry) {
@@ -2001,11 +2078,34 @@ CodeGenModule::GetOrCreateLLVMGlobal(StringRef MangledName,
     if (Entry->getType() == Ty)
       return Entry;
 
+    // If there are two attempts to define the same mangled name, issue an
+    // error.
+    if (IsForDefinition && !Entry->isDeclaration()) {
+      GlobalDecl OtherGD;
+      const VarDecl *OtherD;
+
+      // Check that D is not yet in DiagnosedConflictingDefinitions is required
+      // to make sure that we issue an error only once.
+      if (D && lookupRepresentativeDecl(MangledName, OtherGD) &&
+          (D->getCanonicalDecl() != OtherGD.getCanonicalDecl().getDecl()) &&
+          (OtherD = dyn_cast<VarDecl>(OtherGD.getDecl())) &&
+          OtherD->hasInit() &&
+          DiagnosedConflictingDefinitions.insert(D).second) {
+        getDiags().Report(D->getLocation(),
+                          diag::err_duplicate_mangled_name);
+        getDiags().Report(OtherGD.getDecl()->getLocation(),
+                          diag::note_previous_definition);
+      }
+    }
+
     // Make sure the result is of the correct type.
     if (Entry->getType()->getAddressSpace() != Ty->getAddressSpace())
       return llvm::ConstantExpr::getAddrSpaceCast(Entry, Ty);
 
-    return llvm::ConstantExpr::getBitCast(Entry, Ty);
+    // (If global is requested for a definition, we always need to create a new
+    // global, not just return a bitcast.)
+    if (!IsForDefinition)
+      return llvm::ConstantExpr::getBitCast(Entry, Ty);
   }
 
   unsigned AddrSpace = GetGlobalVarAddressSpace(D, Ty->getAddressSpace());
@@ -2014,6 +2114,20 @@ CodeGenModule::GetOrCreateLLVMGlobal(StringRef MangledName,
       llvm::GlobalValue::ExternalLinkage, nullptr, MangledName, nullptr,
       llvm::GlobalVariable::NotThreadLocal, AddrSpace);
 
+  // If we already created a global with the same mangled name (but different
+  // type) before, take its name and remove it from its parent.
+  if (Entry) {
+    GV->takeName(Entry);
+
+    if (!Entry->use_empty()) {
+      llvm::Constant *NewPtrForOldDecl =
+          llvm::ConstantExpr::getBitCast(GV, Entry->getType());
+      Entry->replaceAllUsesWith(NewPtrForOldDecl);
+    }
+
+    Entry->eraseFromParent();
+  }
+
   // This is the first use or definition of a mangled name.  If there is a
   // deferred decl with this name, remember that we need to emit it at the end
   // of the file.
@@ -2086,7 +2200,8 @@ CodeGenModule::GetAddrOfGlobal(GlobalDecl GD,
     return GetAddrOfFunction(GD, Ty, /*ForVTable=*/false, /*DontDefer=*/false,
                              IsForDefinition);
   } else
-    return GetAddrOfGlobalVar(cast<VarDecl>(GD.getDecl()));
+    return GetAddrOfGlobalVar(cast<VarDecl>(GD.getDecl()), /*Ty=*/nullptr,
+                              IsForDefinition);
 }
 
 llvm::GlobalVariable *
@@ -2134,9 +2249,12 @@ CodeGenModule::CreateOrReplaceCXXRuntimeVariable(StringRef Name,
 /// GetAddrOfGlobalVar - Return the llvm::Constant for the address of the
 /// given global variable.  If Ty is non-null and if the global doesn't exist,
 /// then it will be created with the specified type instead of whatever the
-/// normal requested type would be.
+/// normal requested type would be. If IsForDefinition is true, it is guranteed
+/// that an actual global with type Ty will be returned, not conversion of a
+/// variable with the same mangled name but some other type.
 llvm::Constant *CodeGenModule::GetAddrOfGlobalVar(const VarDecl *D,
-                                                  llvm::Type *Ty) {
+                                                  llvm::Type *Ty,
+                                                  bool IsForDefinition) {
   assert(D->hasGlobalStorage() && "Not a global variable");
   QualType ASTTy = D->getType();
   if (!Ty)
@@ -2146,7 +2264,7 @@ llvm::Constant *CodeGenModule::GetAddrOfGlobalVar(const VarDecl *D,
     llvm::PointerType::get(Ty, getContext().getTargetAddressSpace(ASTTy));
 
   StringRef MangledName = getMangledName(D);
-  return GetOrCreateLLVMGlobal(MangledName, PTy, D);
+  return GetOrCreateLLVMGlobal(MangledName, PTy, D, IsForDefinition);
 }
 
 /// CreateRuntimeVariable - Create a new runtime global variable with the
@@ -2160,15 +2278,20 @@ CodeGenModule::CreateRuntimeVariable(llvm::Type *Ty,
 void CodeGenModule::EmitTentativeDefinition(const VarDecl *D) {
   assert(!D->getInit() && "Cannot emit definite definitions here!");
 
-  if (!MustBeEmitted(D)) {
-    // If we have not seen a reference to this variable yet, place it
-    // into the deferred declarations table to be emitted if needed
-    // later.
-    StringRef MangledName = getMangledName(D);
-    if (!GetGlobalValue(MangledName)) {
+  StringRef MangledName = getMangledName(D);
+  llvm::GlobalValue *GV = GetGlobalValue(MangledName);
+
+  // We already have a definition, not declaration, with the same mangled name.
+  // Emitting of declaration is not required (and actually overwrites emitted
+  // definition).
+  if (GV && !GV->isDeclaration())
+    return;
+
+  // If we have not seen a reference to this variable yet, place it into the
+  // deferred declarations table to be emitted if needed later.
+  if (!MustBeEmitted(D) && !GV) {
       DeferredDecls[MangledName] = D;
       return;
-    }
   }
 
   // The tentative definition is the only definition.
@@ -2182,7 +2305,7 @@ CharUnits CodeGenModule::GetTargetTypeStoreSize(llvm::Type *Ty) const {
 
 unsigned CodeGenModule::GetGlobalVarAddressSpace(const VarDecl *D,
                                                  unsigned AddrSpace) {
-  if (LangOpts.CUDA && LangOpts.CUDAIsDevice) {
+  if (D && LangOpts.CUDA && LangOpts.CUDAIsDevice) {
     if (D->hasAttr<CUDAConstantAttr>())
       AddrSpace = getContext().getTargetAddressSpace(LangAS::cuda_constant);
     else if (D->hasAttr<CUDASharedAttr>())
@@ -2259,7 +2382,9 @@ void CodeGenModule::maybeSetTrivialComdat(const Decl &D,
   GO.setComdat(TheModule.getOrInsertComdat(GO.getName()));
 }
 
-void CodeGenModule::EmitGlobalVarDefinition(const VarDecl *D) {
+/// Pass IsTentative as true if you want to create a tentative definition.
+void CodeGenModule::EmitGlobalVarDefinition(const VarDecl *D,
+                                            bool IsTentative) {
   llvm::Constant *Init = nullptr;
   QualType ASTTy = D->getType();
   CXXRecordDecl *RD = ASTTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl();
@@ -2269,18 +2394,13 @@ void CodeGenModule::EmitGlobalVarDefinition(const VarDecl *D) {
   const VarDecl *InitDecl;
   const Expr *InitExpr = D->getAnyInitializer(InitDecl);
 
-  // CUDA E.2.4.1 "__shared__ variables cannot have an initialization as part
-  // of their declaration."
-  if (getLangOpts().CPlusPlus && getLangOpts().CUDAIsDevice
-      && D->hasAttr<CUDASharedAttr>()) {
-    if (InitExpr) {
-      const auto *C = dyn_cast<CXXConstructExpr>(InitExpr);
-      if (C == nullptr || !C->getConstructor()->hasTrivialBody())
-        Error(D->getLocation(),
-              "__shared__ variable cannot have an initialization.");
-    }
+  // CUDA E.2.4.1 "__shared__ variables cannot have an initialization
+  // as part of their declaration."  Sema has already checked for
+  // error cases, so we just need to set Init to UndefValue.
+  if (getLangOpts().CUDA && getLangOpts().CUDAIsDevice &&
+      D->hasAttr<CUDASharedAttr>())
     Init = llvm::UndefValue::get(getTypes().ConvertType(ASTTy));
-  } else if (!InitExpr) {
+  else if (!InitExpr) {
     // This is a tentative definition; tentative definitions are
     // implicitly initialized with { 0 }.
     //
@@ -2318,7 +2438,8 @@ void CodeGenModule::EmitGlobalVarDefinition(const VarDecl *D) {
   }
 
   llvm::Type* InitType = Init->getType();
-  llvm::Constant *Entry = GetAddrOfGlobalVar(D, InitType);
+  llvm::Constant *Entry =
+      GetAddrOfGlobalVar(D, InitType, /*IsForDefinition=*/!IsTentative);
 
   // Strip off a bitcast if we got one back.
   if (auto *CE = dyn_cast<llvm::ConstantExpr>(Entry)) {
@@ -2350,7 +2471,8 @@ void CodeGenModule::EmitGlobalVarDefinition(const VarDecl *D) {
     Entry->setName(StringRef());
 
     // Make a new global with the correct type, this is now guaranteed to work.
-    GV = cast<llvm::GlobalVariable>(GetAddrOfGlobalVar(D, InitType));
+    GV = cast<llvm::GlobalVariable>(
+        GetAddrOfGlobalVar(D, InitType, /*IsForDefinition=*/!IsTentative));
 
     // Replace all uses of the old global with the new global
     llvm::Constant *NewPtrForOldDecl =
@@ -2366,6 +2488,10 @@ void CodeGenModule::EmitGlobalVarDefinition(const VarDecl *D) {
   if (D->hasAttr<AnnotateAttr>())
     AddGlobalAnnotations(D, GV);
 
+  // Set the llvm linkage type as appropriate.
+  llvm::GlobalValue::LinkageTypes Linkage =
+      getLLVMLinkageVarDefinition(D, GV->isConstant());
+
   // CUDA B.2.1 "The __device__ qualifier declares a variable that resides on
   // the device. [...]"
   // CUDA B.2.2 "The __constant__ qualifier, optionally used together with
@@ -2373,9 +2499,34 @@ void CodeGenModule::EmitGlobalVarDefinition(const VarDecl *D) {
   // Is accessible from all the threads within the grid and from the host
   // through the runtime library (cudaGetSymbolAddress() / cudaGetSymbolSize()
   // / cudaMemcpyToSymbol() / cudaMemcpyFromSymbol())."
-  if (GV && LangOpts.CUDA && LangOpts.CUDAIsDevice &&
-      (D->hasAttr<CUDAConstantAttr>() || D->hasAttr<CUDADeviceAttr>())) {
-    GV->setExternallyInitialized(true);
+  if (GV && LangOpts.CUDA) {
+    if (LangOpts.CUDAIsDevice) {
+      if (D->hasAttr<CUDADeviceAttr>() || D->hasAttr<CUDAConstantAttr>())
+        GV->setExternallyInitialized(true);
+    } else {
+      // Host-side shadows of external declarations of device-side
+      // global variables become internal definitions. These have to
+      // be internal in order to prevent name conflicts with global
+      // host variables with the same name in a different TUs.
+      if (D->hasAttr<CUDADeviceAttr>() || D->hasAttr<CUDAConstantAttr>()) {
+        Linkage = llvm::GlobalValue::InternalLinkage;
+
+        // Shadow variables and their properties must be registered
+        // with CUDA runtime.
+        unsigned Flags = 0;
+        if (!D->hasDefinition())
+          Flags |= CGCUDARuntime::ExternDeviceVar;
+        if (D->hasAttr<CUDAConstantAttr>())
+          Flags |= CGCUDARuntime::ConstantDeviceVar;
+        getCUDARuntime().registerDeviceVar(*GV, Flags);
+      } else if (D->hasAttr<CUDASharedAttr>())
+        // __shared__ variables are odd. Shadows do get created, but
+        // they are not registered with the CUDA runtime, so they
+        // can't really be used to access their device-side
+        // counterparts. It's not clear yet whether it's nvcc's bug or
+        // a feature, but we've got to do the same for compatibility.
+        Linkage = llvm::GlobalValue::InternalLinkage;
+    }
   }
   GV->setInitializer(Init);
 
@@ -2392,9 +2543,6 @@ void CodeGenModule::EmitGlobalVarDefinition(const VarDecl *D) {
 
   GV->setAlignment(getContext().getDeclAlign(D).getQuantity());
 
-  // Set the llvm linkage type as appropriate.
-  llvm::GlobalValue::LinkageTypes Linkage =
-      getLLVMLinkageVarDefinition(D, GV->isConstant());
 
   // On Darwin, if the normal linkage of a C++ thread_local variable is
   // LinkOnce or Weak, we keep the normal linkage to prevent multiple
@@ -2439,7 +2587,7 @@ void CodeGenModule::EmitGlobalVarDefinition(const VarDecl *D) {
 
   // Emit global variable debug information.
   if (CGDebugInfo *DI = getModuleDebugInfo())
-    if (getCodeGenOpts().getDebugInfo() >= CodeGenOptions::LimitedDebugInfo)
+    if (getCodeGenOpts().getDebugInfo() >= codegenoptions::LimitedDebugInfo)
       DI->EmitGlobalVariable(GV, D);
 }
 
@@ -2474,7 +2622,7 @@ static bool isVarDeclStrongDefinition(const ASTContext &Context,
   if (shouldBeInCOMDAT(CGM, *D))
     return true;
 
-  // Declarations with a required alignment do not have common linakge in MSVC
+  // Declarations with a required alignment do not have common linkage in MSVC
   // mode.
   if (Context.getTargetInfo().getCXXABI().isMicrosoft()) {
     if (D->hasAttr<AlignedAttr>())
@@ -2535,9 +2683,18 @@ llvm::GlobalValue::LinkageTypes CodeGenModule::getLLVMLinkageForDeclarator(
   // explicit instantiations can occur in multiple translation units
   // and must all be equivalent. However, we are not allowed to
   // throw away these explicit instantiations.
-  if (Linkage == GVA_StrongODR)
-    return !Context.getLangOpts().AppleKext ? llvm::Function::WeakODRLinkage
-                                            : llvm::Function::ExternalLinkage;
+  //
+  // We don't currently support CUDA device code spread out across multiple TUs,
+  // so say that CUDA templates are either external (for kernels) or internal.
+  // This lets llvm perform aggressive inter-procedural optimizations.
+  if (Linkage == GVA_StrongODR) {
+    if (Context.getLangOpts().AppleKext)
+      return llvm::Function::ExternalLinkage;
+    if (Context.getLangOpts().CUDA && Context.getLangOpts().CUDAIsDevice)
+      return D->hasAttr<CUDAGlobalAttr>() ? llvm::Function::ExternalLinkage
+                                          : llvm::Function::InternalLinkage;
+    return llvm::Function::WeakODRLinkage;
+  }
 
   // C++ doesn't have tentative definitions and thus cannot have common
   // linkage.
@@ -2694,6 +2851,10 @@ static void ReplaceUsesOfNonProtoTypeWithRealFunction(llvm::GlobalValue *Old,
 }
 
 void CodeGenModule::HandleCXXStaticMemberVarInstantiation(VarDecl *VD) {
+  auto DK = VD->isThisDeclarationADefinition();
+  if (DK == VarDecl::Definition && VD->hasAttr<DLLImportAttr>())
+    return;
+
   TemplateSpecializationKind TSK = VD->getTemplateSpecializationKind();
   // If we have a definition, this might be a deferred decl. If the
   // instantiation is explicit, make sure we emit it at the end.
@@ -2757,7 +2918,7 @@ void CodeGenModule::EmitAliasDefinition(GlobalDecl GD) {
   StringRef MangledName = getMangledName(GD);
 
   if (AA->getAliasee() == MangledName) {
-    Diags.Report(AA->getLocation(), diag::err_cyclic_alias);
+    Diags.Report(AA->getLocation(), diag::err_cyclic_alias) << 0;
     return;
   }
 
@@ -2788,7 +2949,7 @@ void CodeGenModule::EmitAliasDefinition(GlobalDecl GD) {
 
   if (Entry) {
     if (GA->getAliasee() == Entry) {
-      Diags.Report(AA->getLocation(), diag::err_cyclic_alias);
+      Diags.Report(AA->getLocation(), diag::err_cyclic_alias) << 0;
       return;
     }
 
@@ -2825,6 +2986,65 @@ void CodeGenModule::EmitAliasDefinition(GlobalDecl GD) {
   setAliasAttributes(D, GA);
 }
 
+void CodeGenModule::emitIFuncDefinition(GlobalDecl GD) {
+  const auto *D = cast<ValueDecl>(GD.getDecl());
+  const IFuncAttr *IFA = D->getAttr<IFuncAttr>();
+  assert(IFA && "Not an ifunc?");
+
+  StringRef MangledName = getMangledName(GD);
+
+  if (IFA->getResolver() == MangledName) {
+    Diags.Report(IFA->getLocation(), diag::err_cyclic_alias) << 1;
+    return;
+  }
+
+  // Report an error if some definition overrides ifunc.
+  llvm::GlobalValue *Entry = GetGlobalValue(MangledName);
+  if (Entry && !Entry->isDeclaration()) {
+    GlobalDecl OtherGD;
+    if (lookupRepresentativeDecl(MangledName, OtherGD) &&
+        DiagnosedConflictingDefinitions.insert(GD).second) {
+      Diags.Report(D->getLocation(), diag::err_duplicate_mangled_name);
+      Diags.Report(OtherGD.getDecl()->getLocation(),
+                   diag::note_previous_definition);
+    }
+    return;
+  }
+
+  Aliases.push_back(GD);
+
+  llvm::Type *DeclTy = getTypes().ConvertTypeForMem(D->getType());
+  llvm::Constant *Resolver =
+      GetOrCreateLLVMFunction(IFA->getResolver(), DeclTy, GD,
+                              /*ForVTable=*/false);
+  llvm::GlobalIFunc *GIF =
+      llvm::GlobalIFunc::create(DeclTy, 0, llvm::Function::ExternalLinkage,
+                                "", Resolver, &getModule());
+  if (Entry) {
+    if (GIF->getResolver() == Entry) {
+      Diags.Report(IFA->getLocation(), diag::err_cyclic_alias) << 1;
+      return;
+    }
+    assert(Entry->isDeclaration());
+
+    // If there is a declaration in the module, then we had an extern followed
+    // by the ifunc, as in:
+    //   extern int test();
+    //   ...
+    //   int test() __attribute__((ifunc("resolver")));
+    //
+    // Remove it and replace uses of it with the ifunc.
+    GIF->takeName(Entry);
+
+    Entry->replaceAllUsesWith(llvm::ConstantExpr::getBitCast(GIF,
+                                                          Entry->getType()));
+    Entry->eraseFromParent();
+  } else
+    GIF->setName(MangledName);
+
+  SetCommonAttributes(D, GIF);
+}
+
 llvm::Function *CodeGenModule::getIntrinsic(unsigned IID,
                                             ArrayRef<llvm::Type*> Tys) {
   return llvm::Intrinsic::getDeclaration(&getModule(), (llvm::Intrinsic::ID)IID,
@@ -2889,19 +3109,40 @@ CodeGenModule::GetAddrOfConstantCFString(const StringLiteral *Literal) {
   llvm::Constant *Zero = llvm::Constant::getNullValue(Int32Ty);
   llvm::Constant *Zeros[] = { Zero, Zero };
   llvm::Value *V;
-  
+
   // If we don't already have it, get __CFConstantStringClassReference.
   if (!CFConstantStringClassRef) {
     llvm::Type *Ty = getTypes().ConvertType(getContext().IntTy);
     Ty = llvm::ArrayType::get(Ty, 0);
-    llvm::Constant *GV = CreateRuntimeVariable(Ty,
-                                           "__CFConstantStringClassReference");
+    llvm::Constant *GV =
+        CreateRuntimeVariable(Ty, "__CFConstantStringClassReference");
+
+    if (getTarget().getTriple().isOSBinFormatCOFF()) {
+      IdentifierInfo &II = getContext().Idents.get(GV->getName());
+      TranslationUnitDecl *TUDecl = getContext().getTranslationUnitDecl();
+      DeclContext *DC = TranslationUnitDecl::castToDeclContext(TUDecl);
+      llvm::GlobalValue *CGV = cast<llvm::GlobalValue>(GV);
+
+      const VarDecl *VD = nullptr;
+      for (const auto &Result : DC->lookup(&II))
+        if ((VD = dyn_cast<VarDecl>(Result)))
+          break;
+
+      if (!VD || !VD->hasAttr<DLLExportAttr>()) {
+        CGV->setDLLStorageClass(llvm::GlobalValue::DLLImportStorageClass);
+        CGV->setLinkage(llvm::GlobalValue::ExternalLinkage);
+      } else {
+        CGV->setDLLStorageClass(llvm::GlobalValue::DLLExportStorageClass);
+        CGV->setLinkage(llvm::GlobalValue::ExternalLinkage);
+      }
+    }
+
     // Decay array -> ptr
     V = llvm::ConstantExpr::getGetElementPtr(Ty, GV, Zeros);
     CFConstantStringClassRef = V;
-  }
-  else
+  } else {
     V = CFConstantStringClassRef;
+  }
 
   QualType CFTy = getContext().getCFConstantStringType();
 
@@ -2914,8 +3155,8 @@ CodeGenModule::GetAddrOfConstantCFString(const StringLiteral *Literal) {
 
   // Flags.
   llvm::Type *Ty = getTypes().ConvertType(getContext().UnsignedIntTy);
-  Fields[1] = isUTF16 ? llvm::ConstantInt::get(Ty, 0x07d0) :
-    llvm::ConstantInt::get(Ty, 0x07C8);
+  Fields[1] = isUTF16 ? llvm::ConstantInt::get(Ty, 0x07d0)
+                      : llvm::ConstantInt::get(Ty, 0x07C8);
 
   // String pointer.
   llvm::Constant *C = nullptr;
@@ -2933,21 +3174,20 @@ CodeGenModule::GetAddrOfConstantCFString(const StringLiteral *Literal) {
   auto *GV =
       new llvm::GlobalVariable(getModule(), C->getType(), /*isConstant=*/true,
                                llvm::GlobalValue::PrivateLinkage, C, ".str");
-  GV->setUnnamedAddr(true);
+  GV->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
   // Don't enforce the target's minimum global alignment, since the only use
   // of the string is via this class initializer.
-  // FIXME: We set the section explicitly to avoid a bug in ld64 224.1. Without
-  // it LLVM can merge the string with a non unnamed_addr one during LTO. Doing
-  // that changes the section it ends in, which surprises ld64.
-  if (isUTF16) {
-    CharUnits Align = getContext().getTypeAlignInChars(getContext().ShortTy);
-    GV->setAlignment(Align.getQuantity());
-    GV->setSection("__TEXT,__ustring");
-  } else {
-    CharUnits Align = getContext().getTypeAlignInChars(getContext().CharTy);
-    GV->setAlignment(Align.getQuantity());
-    GV->setSection("__TEXT,__cstring,cstring_literals");
-  }
+  CharUnits Align = isUTF16
+                        ? getContext().getTypeAlignInChars(getContext().ShortTy)
+                        : getContext().getTypeAlignInChars(getContext().CharTy);
+  GV->setAlignment(Align.getQuantity());
+
+  // FIXME: We set the section explicitly to avoid a bug in ld64 224.1.
+  // Without it LLVM can merge the string with a non unnamed_addr one during
+  // LTO.  Doing that changes the section it ends in, which surprises ld64.
+  if (getTarget().getTriple().isOSBinFormatMachO())
+    GV->setSection(isUTF16 ? "__TEXT,__ustring"
+                           : "__TEXT,__cstring,cstring_literals");
 
   // String.
   Fields[2] =
@@ -2968,8 +3208,18 @@ CodeGenModule::GetAddrOfConstantCFString(const StringLiteral *Literal) {
   GV = new llvm::GlobalVariable(getModule(), C->getType(), true,
                                 llvm::GlobalVariable::PrivateLinkage, C,
                                 "_unnamed_cfstring_");
-  GV->setSection("__DATA,__cfstring");
   GV->setAlignment(Alignment.getQuantity());
+  switch (getTarget().getTriple().getObjectFormat()) {
+  case llvm::Triple::UnknownObjectFormat:
+    llvm_unreachable("unknown file format");
+  case llvm::Triple::COFF:
+  case llvm::Triple::ELF:
+    GV->setSection("cfstring");
+    break;
+  case llvm::Triple::MachO:
+    GV->setSection("__DATA,__cfstring");
+    break;
+  }
   Entry.second = GV;
 
   return ConstantAddress(GV, Alignment);
@@ -3062,7 +3312,7 @@ CodeGenModule::GetAddrOfConstantString(const StringLiteral *Literal) {
 
   auto *GV = new llvm::GlobalVariable(getModule(), C->getType(), isConstant,
                                       Linkage, C, ".str");
-  GV->setUnnamedAddr(true);
+  GV->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
   // Don't enforce the target's minimum global alignment, since the only use
   // of the string is via this class initializer.
   CharUnits Align = getContext().getTypeAlignInChars(getContext().CharTy);
@@ -3181,7 +3431,7 @@ GenerateStringLiteral(llvm::Constant *C, llvm::GlobalValue::LinkageTypes LT,
       M, C->getType(), !CGM.getLangOpts().WritableStrings, LT, C, GlobalName,
       nullptr, llvm::GlobalVariable::NotThreadLocal, AddrSpace);
   GV->setAlignment(Alignment.getQuantity());
-  GV->setUnnamedAddr(true);
+  GV->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
   if (GV->isWeakForLinker()) {
     assert(CGM.supportsCOMDAT() && "Only COFF uses weak string literals");
     GV->setComdat(M.getOrInsertComdat(GV->getName()));
@@ -3528,6 +3778,12 @@ void CodeGenModule::EmitTopLevelDecl(Decl *D) {
   case Decl::Namespace:
     EmitNamespace(cast<NamespaceDecl>(D));
     break;
+  case Decl::CXXRecord:
+    // Emit any static data members, they may be definitions.
+    for (auto *I : cast<CXXRecordDecl>(D)->decls())
+      if (isa<VarDecl>(I) || isa<CXXRecordDecl>(I))
+        EmitTopLevelDecl(I);
+    break;
     // No code generation needed.
   case Decl::UsingShadow:
   case Decl::ClassTemplate:
@@ -3595,7 +3851,7 @@ void CodeGenModule::EmitTopLevelDecl(Decl *D) {
     ObjCRuntime->GenerateClass(OMD);
     // Emit global variable debug information.
     if (CGDebugInfo *DI = getModuleDebugInfo())
-      if (getCodeGenOpts().getDebugInfo() >= CodeGenOptions::LimitedDebugInfo)
+      if (getCodeGenOpts().getDebugInfo() >= codegenoptions::LimitedDebugInfo)
         DI->getOrCreateInterfaceType(getContext().getObjCInterfaceType(
             OMD->getClassInterface()), OMD->getLocation());
     break;
@@ -3611,6 +3867,31 @@ void CodeGenModule::EmitTopLevelDecl(Decl *D) {
     ObjCRuntime->RegisterAlias(cast<ObjCCompatibleAliasDecl>(D));
     break;
 
+  case Decl::PragmaComment: {
+    const auto *PCD = cast<PragmaCommentDecl>(D);
+    switch (PCD->getCommentKind()) {
+    case PCK_Unknown:
+      llvm_unreachable("unexpected pragma comment kind");
+    case PCK_Linker:
+      AppendLinkerOptions(PCD->getArg());
+      break;
+    case PCK_Lib:
+      AddDependentLib(PCD->getArg());
+      break;
+    case PCK_Compiler:
+    case PCK_ExeStr:
+    case PCK_User:
+      break; // We ignore all of these.
+    }
+    break;
+  }
+
+  case Decl::PragmaDetectMismatch: {
+    const auto *PDMD = cast<PragmaDetectMismatchDecl>(D);
+    AddDetectMismatch(PDMD->getName(), PDMD->getValue());
+    break;
+  }
+
   case Decl::LinkageSpec:
     EmitLinkageSpec(cast<LinkageSpecDecl>(D));
     break;
@@ -3653,6 +3934,10 @@ void CodeGenModule::EmitTopLevelDecl(Decl *D) {
     break;
   }
 
+  case Decl::OMPDeclareReduction:
+    EmitOMPDeclareReduction(cast<OMPDeclareReductionDecl>(D));
+    break;
+
   default:
     // Make sure we handled everything we should, every other kind is a
     // non-top-level decl.  FIXME: Would be nice to have an isTopLevelDeclKind
@@ -3775,6 +4060,10 @@ static void EmitGlobalDeclMetadata(CodeGenModule &CGM,
 /// to such functions with an unmangled name from inline assembly within the
 /// same translation unit.
 void CodeGenModule::EmitStaticExternCAliases() {
+  // Don't do anything if we're generating CUDA device code -- the NVPTX
+  // assembly target doesn't support aliases.
+  if (Context.getTargetInfo().getTriple().isNVPTX())
+    return;
   for (auto &I : StaticExternCValues) {
     IdentifierInfo *Name = I.first;
     llvm::GlobalValue *Val = I.second;
@@ -3955,27 +4244,35 @@ llvm::Metadata *CodeGenModule::CreateMetadataIdentifierForType(QualType T) {
   return InternalId;
 }
 
-void CodeGenModule::CreateVTableBitSetEntry(llvm::NamedMDNode *BitsetsMD,
-                                            llvm::GlobalVariable *VTable,
-                                            CharUnits Offset,
-                                            const CXXRecordDecl *RD) {
+/// Returns whether this module needs the "all-vtables" type identifier.
+bool CodeGenModule::NeedAllVtablesTypeId() const {
+  // Returns true if at least one of vtable-based CFI checkers is enabled and
+  // is not in the trapping mode.
+  return ((LangOpts.Sanitize.has(SanitizerKind::CFIVCall) &&
+           !CodeGenOpts.SanitizeTrap.has(SanitizerKind::CFIVCall)) ||
+          (LangOpts.Sanitize.has(SanitizerKind::CFINVCall) &&
+           !CodeGenOpts.SanitizeTrap.has(SanitizerKind::CFINVCall)) ||
+          (LangOpts.Sanitize.has(SanitizerKind::CFIDerivedCast) &&
+           !CodeGenOpts.SanitizeTrap.has(SanitizerKind::CFIDerivedCast)) ||
+          (LangOpts.Sanitize.has(SanitizerKind::CFIUnrelatedCast) &&
+           !CodeGenOpts.SanitizeTrap.has(SanitizerKind::CFIUnrelatedCast)));
+}
+
+void CodeGenModule::AddVTableTypeMetadata(llvm::GlobalVariable *VTable,
+                                          CharUnits Offset,
+                                          const CXXRecordDecl *RD) {
   llvm::Metadata *MD =
       CreateMetadataIdentifierForType(QualType(RD->getTypeForDecl(), 0));
-  llvm::Metadata *BitsetOps[] = {
-      MD, llvm::ConstantAsMetadata::get(VTable),
-      llvm::ConstantAsMetadata::get(
-          llvm::ConstantInt::get(Int64Ty, Offset.getQuantity()))};
-  BitsetsMD->addOperand(llvm::MDTuple::get(getLLVMContext(), BitsetOps));
+  VTable->addTypeMetadata(Offset.getQuantity(), MD);
 
-  if (CodeGenOpts.SanitizeCfiCrossDso) {
-    if (auto TypeId = CreateCfiIdForTypeMetadata(MD)) {
-      llvm::Metadata *BitsetOps2[] = {
-          llvm::ConstantAsMetadata::get(TypeId),
-          llvm::ConstantAsMetadata::get(VTable),
-          llvm::ConstantAsMetadata::get(
-              llvm::ConstantInt::get(Int64Ty, Offset.getQuantity()))};
-      BitsetsMD->addOperand(llvm::MDTuple::get(getLLVMContext(), BitsetOps2));
-    }
+  if (CodeGenOpts.SanitizeCfiCrossDso)
+    if (auto CrossDsoTypeId = CreateCrossDsoCfiTypeId(MD))
+      VTable->addTypeMetadata(Offset.getQuantity(),
+                              llvm::ConstantAsMetadata::get(CrossDsoTypeId));
+
+  if (NeedAllVtablesTypeId()) {
+    llvm::Metadata *MD = llvm::MDString::get(getLLVMContext(), "all-vtables");
+    VTable->addTypeMetadata(Offset.getQuantity(), MD);
   }
 }
 
@@ -4007,3 +4304,10 @@ void CodeGenModule::getFunctionFeatureMap(llvm::StringMap<bool> &FeatureMap,
                           Target.getTargetOpts().Features);
   }
 }
+
+llvm::SanitizerStatReport &CodeGenModule::getSanStats() {
+  if (!SanStats)
+    SanStats = llvm::make_unique<llvm::SanitizerStatReport>(&getModule());
+
+  return *SanStats;
+}
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenModule.h b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenModule.h
index fdb4d78b3558..94904997d629 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenModule.h
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenModule.h
@@ -21,6 +21,7 @@
 #include "clang/AST/Attr.h"
 #include "clang/AST/DeclCXX.h"
 #include "clang/AST/DeclObjC.h"
+#include "clang/AST/DeclOpenMP.h"
 #include "clang/AST/GlobalDecl.h"
 #include "clang/AST/Mangle.h"
 #include "clang/Basic/ABI.h"
@@ -33,6 +34,7 @@
 #include "llvm/ADT/StringMap.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/ValueHandle.h"
+#include "llvm/Transforms/Utils/SanitizerStats.h"
 
 namespace llvm {
 class Module;
@@ -47,7 +49,6 @@ class IndexedInstrProfReader;
 }
 
 namespace clang {
-class TargetCodeGenInfo;
 class ASTContext;
 class AtomicType;
 class FunctionDecl;
@@ -91,6 +92,7 @@ class CGCUDARuntime;
 class BlockFieldFlags;
 class FunctionArgList;
 class CoverageMappingModuleGen;
+class TargetCodeGenInfo;
 
 struct OrderGlobalInits {
   unsigned int priority;
@@ -165,6 +167,9 @@ struct ObjCEntrypoints {
   /// id objc_storeWeak(id*, id);
   llvm::Constant *objc_storeWeak;
 
+  /// id objc_unsafeClaimAutoreleasedReturnValue(id);
+  llvm::Constant *objc_unsafeClaimAutoreleasedReturnValue;
+
   /// A void(void) inline asm to use to mark that the return value of
   /// a call will be immediately retain.
   llvm::InlineAsm *retainAutoreleasedReturnValueMarker;
@@ -268,9 +273,9 @@ private:
   std::unique_ptr<CGCXXABI> ABI;
   llvm::LLVMContext &VMContext;
 
-  CodeGenTBAA *TBAA;
+  std::unique_ptr<CodeGenTBAA> TBAA;
   
-  mutable const TargetCodeGenInfo *TheTargetCodeGenInfo;
+  mutable std::unique_ptr<TargetCodeGenInfo> TheTargetCodeGenInfo;
   
   // This should not be moved earlier, since its initialization depends on some
   // of the previous reference members being already initialized and also checks
@@ -280,15 +285,16 @@ private:
   /// Holds information about C++ vtables.
   CodeGenVTables VTables;
 
-  CGObjCRuntime* ObjCRuntime;
-  CGOpenCLRuntime* OpenCLRuntime;
-  CGOpenMPRuntime* OpenMPRuntime;
-  CGCUDARuntime* CUDARuntime;
-  CGDebugInfo* DebugInfo;
-  ObjCEntrypoints *ObjCData;
-  llvm::MDNode *NoObjCARCExceptionsMetadata;
+  std::unique_ptr<CGObjCRuntime> ObjCRuntime;
+  std::unique_ptr<CGOpenCLRuntime> OpenCLRuntime;
+  std::unique_ptr<CGOpenMPRuntime> OpenMPRuntime;
+  std::unique_ptr<CGCUDARuntime> CUDARuntime;
+  std::unique_ptr<CGDebugInfo> DebugInfo;
+  std::unique_ptr<ObjCEntrypoints> ObjCData;
+  llvm::MDNode *NoObjCARCExceptionsMetadata = nullptr;
   std::unique_ptr<llvm::IndexedInstrProfReader> PGOReader;
   InstrProfStats PGOStats;
+  std::unique_ptr<llvm::SanitizerStatReport> SanStats;
 
   // A set of references that have only been seen via a weakref so far. This is
   // used to remove the weak of the reference if we ever see a direct reference
@@ -429,8 +435,8 @@ private:
   llvm::WeakVH ConstantStringClassRef;
 
   /// \brief The LLVM type corresponding to NSConstantString.
-  llvm::StructType *NSConstantStringType;
-  
+  llvm::StructType *NSConstantStringType = nullptr;
+
   /// \brief The type used to describe the state of a fast enumeration in
   /// Objective-C's for..in loop.
   QualType ObjCFastEnumerationStateType;
@@ -450,24 +456,24 @@ private:
   /// @name Cache for Blocks Runtime Globals
   /// @{
 
-  llvm::Constant *NSConcreteGlobalBlock;
-  llvm::Constant *NSConcreteStackBlock;
+  llvm::Constant *NSConcreteGlobalBlock = nullptr;
+  llvm::Constant *NSConcreteStackBlock = nullptr;
 
-  llvm::Constant *BlockObjectAssign;
-  llvm::Constant *BlockObjectDispose;
+  llvm::Constant *BlockObjectAssign = nullptr;
+  llvm::Constant *BlockObjectDispose = nullptr;
 
-  llvm::Type *BlockDescriptorType;
-  llvm::Type *GenericBlockLiteralType;
+  llvm::Type *BlockDescriptorType = nullptr;
+  llvm::Type *GenericBlockLiteralType = nullptr;
 
   struct {
     int GlobalUniqueCount;
   } Block;
 
   /// void @llvm.lifetime.start(i64 %size, i8* nocapture <ptr>)
-  llvm::Constant *LifetimeStartFn;
+  llvm::Constant *LifetimeStartFn = nullptr;
 
   /// void @llvm.lifetime.end(i64 %size, i8* nocapture <ptr>)
-  llvm::Constant *LifetimeEndFn;
+  llvm::Constant *LifetimeEndFn = nullptr;
 
   GlobalDecl initializedGlobalDecl;
 
@@ -583,7 +589,7 @@ public:
     TypeDescriptorMap[Ty] = C;
   }
 
-  CGDebugInfo *getModuleDebugInfo() { return DebugInfo; }
+  CGDebugInfo *getModuleDebugInfo() { return DebugInfo.get(); }
 
   llvm::MDNode *getNoObjCARCExceptionsMetadata() {
     if (!NoObjCARCExceptionsMetadata)
@@ -696,11 +702,14 @@ public:
   unsigned GetGlobalVarAddressSpace(const VarDecl *D, unsigned AddrSpace);
 
   /// Return the llvm::Constant for the address of the given global variable.
-  /// If Ty is non-null and if the global doesn't exist, then it will be greated
+  /// If Ty is non-null and if the global doesn't exist, then it will be created
   /// with the specified type instead of whatever the normal requested type
-  /// would be.
+  /// would be. If IsForDefinition is true, it is guranteed that an actual
+  /// global with type Ty will be returned, not conversion of a variable with
+  /// the same mangled name but some other type.
   llvm::Constant *GetAddrOfGlobalVar(const VarDecl *D,
-                                     llvm::Type *Ty = nullptr);
+                                     llvm::Type *Ty = nullptr,
+                                     bool IsForDefinition = false);
 
   /// Return the address of the given function. If Ty is non-null, then this
   /// function will use the specified type if it has to create it.
@@ -989,6 +998,8 @@ public:
 
   void EmitVTable(CXXRecordDecl *Class);
 
+  void RefreshTypeCacheForClass(const CXXRecordDecl *Class);
+
   /// \brief Appends Opts to the "Linker Options" metadata value.
   void AppendLinkerOptions(StringRef Opts);
 
@@ -1098,34 +1109,42 @@ public:
   /// \param D Threadprivate declaration.
   void EmitOMPThreadPrivateDecl(const OMPThreadPrivateDecl *D);
 
-  /// Returns whether the given record is blacklisted from control flow
-  /// integrity checks.
-  bool IsCFIBlacklistedRecord(const CXXRecordDecl *RD);
+  /// \brief Emit a code for declare reduction construct.
+  void EmitOMPDeclareReduction(const OMPDeclareReductionDecl *D,
+                               CodeGenFunction *CGF = nullptr);
+
+  /// Returns whether the given record has hidden LTO visibility and therefore
+  /// may participate in (single-module) CFI and whole-program vtable
+  /// optimization.
+  bool HasHiddenLTOVisibility(const CXXRecordDecl *RD);
 
-  /// Emit bit set entries for the given vtable using the given layout if
-  /// vptr CFI is enabled.
-  void EmitVTableBitSetEntries(llvm::GlobalVariable *VTable,
-                               const VTableLayout &VTLayout);
+  /// Emit type metadata for the given vtable using the given layout.
+  void EmitVTableTypeMetadata(llvm::GlobalVariable *VTable,
+                              const VTableLayout &VTLayout);
 
-  /// Generate a cross-DSO type identifier for type.
-  llvm::ConstantInt *CreateCfiIdForTypeMetadata(llvm::Metadata *MD);
+  /// Generate a cross-DSO type identifier for MD.
+  llvm::ConstantInt *CreateCrossDsoCfiTypeId(llvm::Metadata *MD);
 
   /// Create a metadata identifier for the given type. This may either be an
   /// MDString (for external identifiers) or a distinct unnamed MDNode (for
   /// internal identifiers).
   llvm::Metadata *CreateMetadataIdentifierForType(QualType T);
 
-  /// Create a bitset entry for the given function and add it to BitsetsMD.
-  void CreateFunctionBitSetEntry(const FunctionDecl *FD, llvm::Function *F);
+  /// Create and attach type metadata to the given function.
+  void CreateFunctionTypeMetadata(const FunctionDecl *FD, llvm::Function *F);
 
-  /// Create a bitset entry for the given vtable and add it to BitsetsMD.
-  void CreateVTableBitSetEntry(llvm::NamedMDNode *BitsetsMD,
-                               llvm::GlobalVariable *VTable, CharUnits Offset,
-                               const CXXRecordDecl *RD);
+  /// Returns whether this module needs the "all-vtables" type identifier.
+  bool NeedAllVtablesTypeId() const;
+
+  /// Create and attach type metadata for the given vtable.
+  void AddVTableTypeMetadata(llvm::GlobalVariable *VTable, CharUnits Offset,
+                             const CXXRecordDecl *RD);
 
   /// \breif Get the declaration of std::terminate for the platform.
   llvm::Constant *getTerminateFn();
 
+  llvm::SanitizerStatReport &getSanStats();
+
 private:
   llvm::Constant *
   GetOrCreateLLVMFunction(StringRef MangledName, llvm::Type *Ty, GlobalDecl D,
@@ -1136,7 +1155,8 @@ private:
 
   llvm::Constant *GetOrCreateLLVMGlobal(StringRef MangledName,
                                         llvm::PointerType *PTy,
-                                        const VarDecl *D);
+                                        const VarDecl *D,
+                                        bool IsForDefinition = false);
 
   void setNonAliasAttributes(const Decl *D, llvm::GlobalObject *GO);
 
@@ -1147,8 +1167,9 @@ private:
   void EmitGlobalDefinition(GlobalDecl D, llvm::GlobalValue *GV = nullptr);
 
   void EmitGlobalFunctionDefinition(GlobalDecl GD, llvm::GlobalValue *GV);
-  void EmitGlobalVarDefinition(const VarDecl *D);
+  void EmitGlobalVarDefinition(const VarDecl *D, bool IsTentative = false);
   void EmitAliasDefinition(GlobalDecl GD);
+  void emitIFuncDefinition(GlobalDecl GD);
   void EmitObjCPropertyImplementations(const ObjCImplementationDecl *D);
   void EmitObjCIvarInitializations(ObjCImplementationDecl *D);
   
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenPGO.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenPGO.cpp
index 2c0d93b394ac..4eefdd72b7e4 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenPGO.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenPGO.cpp
@@ -18,11 +18,14 @@
 #include "clang/AST/StmtVisitor.h"
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/MDBuilder.h"
-#include "llvm/ProfileData/InstrProfReader.h"
 #include "llvm/Support/Endian.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/MD5.h"
 
+static llvm::cl::opt<bool> EnableValueProfiling(
+  "enable-value-profiling", llvm::cl::ZeroOrMore,
+  llvm::cl::desc("Enable value profiling"), llvm::cl::init(false));
+
 using namespace clang;
 using namespace CodeGen;
 
@@ -34,12 +37,14 @@ void CodeGenPGO::setFuncName(StringRef Name,
       PGOReader ? PGOReader->getVersion() : llvm::IndexedInstrProf::Version);
 
   // If we're generating a profile, create a variable for the name.
-  if (CGM.getCodeGenOpts().ProfileInstrGenerate)
+  if (CGM.getCodeGenOpts().hasProfileClangInstr())
     FuncNameVar = llvm::createPGOFuncNameVar(CGM.getModule(), Linkage, FuncName);
 }
 
 void CodeGenPGO::setFuncName(llvm::Function *Fn) {
   setFuncName(Fn->getName(), Fn->getLinkage());
+  // Create PGOFuncName meta data.
+  llvm::createPGOFuncNameMetadata(*Fn, FuncName);
 }
 
 namespace {
@@ -406,7 +411,8 @@ struct ComputeRegionCounts : public ConstStmtVisitor<ComputeRegionCounts> {
     RecordStmtCount(S);
     Visit(S->getLoopVarStmt());
     Visit(S->getRangeStmt());
-    Visit(S->getBeginEndStmt());
+    Visit(S->getBeginStmt());
+    Visit(S->getEndStmt());
 
     uint64_t ParentCount = CurrentCount;
     BreakContinueStack.push_back(BreakContinue());
@@ -607,7 +613,7 @@ uint64_t PGOHash::finalize() {
 
 void CodeGenPGO::assignRegionCounters(GlobalDecl GD, llvm::Function *Fn) {
   const Decl *D = GD.getDecl();
-  bool InstrumentRegions = CGM.getCodeGenOpts().ProfileInstrGenerate;
+  bool InstrumentRegions = CGM.getCodeGenOpts().hasProfileClangInstr();
   llvm::IndexedInstrProfReader *PGOReader = CGM.getPGOReader();
   if (!InstrumentRegions && !PGOReader)
     return;
@@ -653,12 +659,18 @@ void CodeGenPGO::mapRegionCounters(const Decl *D) {
   FunctionHash = Walker.Hash.finalize();
 }
 
-void CodeGenPGO::emitCounterRegionMapping(const Decl *D) {
+bool CodeGenPGO::skipRegionMappingForDecl(const Decl *D) {
   if (SkipCoverageMapping)
-    return;
-  // Don't map the functions inside the system headers
+    return true;
+
+  // Don't map the functions in system headers.
+  const auto &SM = CGM.getContext().getSourceManager();
   auto Loc = D->getBody()->getLocStart();
-  if (CGM.getContext().getSourceManager().isInSystemHeader(Loc))
+  return SM.isInSystemHeader(Loc);
+}
+
+void CodeGenPGO::emitCounterRegionMapping(const Decl *D) {
+  if (skipRegionMappingForDecl(D))
     return;
 
   std::string CoverageMapping;
@@ -679,11 +691,7 @@ void CodeGenPGO::emitCounterRegionMapping(const Decl *D) {
 void
 CodeGenPGO::emitEmptyCounterMapping(const Decl *D, StringRef Name,
                                     llvm::GlobalValue::LinkageTypes Linkage) {
-  if (SkipCoverageMapping)
-    return;
-  // Don't map the functions inside the system headers
-  auto Loc = D->getBody()->getLocStart();
-  if (CGM.getContext().getSourceManager().isInSystemHeader(Loc))
+  if (skipRegionMappingForDecl(D))
     return;
 
   std::string CoverageMapping;
@@ -726,7 +734,7 @@ CodeGenPGO::applyFunctionAttributes(llvm::IndexedInstrProfReader *PGOReader,
 }
 
 void CodeGenPGO::emitCounterIncrement(CGBuilderTy &Builder, const Stmt *S) {
-  if (!CGM.getCodeGenOpts().ProfileInstrGenerate || !RegionCounterMap)
+  if (!CGM.getCodeGenOpts().hasProfileClangInstr() || !RegionCounterMap)
     return;
   if (!Builder.GetInsertBlock())
     return;
@@ -740,21 +748,76 @@ void CodeGenPGO::emitCounterIncrement(CGBuilderTy &Builder, const Stmt *S) {
                       Builder.getInt32(Counter)});
 }
 
+// This method either inserts a call to the profile run-time during
+// instrumentation or puts profile data into metadata for PGO use.
+void CodeGenPGO::valueProfile(CGBuilderTy &Builder, uint32_t ValueKind,
+    llvm::Instruction *ValueSite, llvm::Value *ValuePtr) {
+
+  if (!EnableValueProfiling)
+    return;
+
+  if (!ValuePtr || !ValueSite || !Builder.GetInsertBlock())
+    return;
+
+  if (isa<llvm::Constant>(ValuePtr))
+    return;
+
+  bool InstrumentValueSites = CGM.getCodeGenOpts().hasProfileClangInstr();
+  if (InstrumentValueSites && RegionCounterMap) {
+    auto BuilderInsertPoint = Builder.saveIP();
+    Builder.SetInsertPoint(ValueSite);
+    llvm::Value *Args[5] = {
+        llvm::ConstantExpr::getBitCast(FuncNameVar, Builder.getInt8PtrTy()),
+        Builder.getInt64(FunctionHash),
+        Builder.CreatePtrToInt(ValuePtr, Builder.getInt64Ty()),
+        Builder.getInt32(ValueKind),
+        Builder.getInt32(NumValueSites[ValueKind]++)
+    };
+    Builder.CreateCall(
+        CGM.getIntrinsic(llvm::Intrinsic::instrprof_value_profile), Args);
+    Builder.restoreIP(BuilderInsertPoint);
+    return;
+  }
+
+  llvm::IndexedInstrProfReader *PGOReader = CGM.getPGOReader();
+  if (PGOReader && haveRegionCounts()) {
+    // We record the top most called three functions at each call site.
+    // Profile metadata contains "VP" string identifying this metadata
+    // as value profiling data, then a uint32_t value for the value profiling
+    // kind, a uint64_t value for the total number of times the call is
+    // executed, followed by the function hash and execution count (uint64_t)
+    // pairs for each function.
+    if (NumValueSites[ValueKind] >= ProfRecord->getNumValueSites(ValueKind))
+      return;
+
+    llvm::annotateValueSite(CGM.getModule(), *ValueSite, *ProfRecord,
+                            (llvm::InstrProfValueKind)ValueKind,
+                            NumValueSites[ValueKind]);
+
+    NumValueSites[ValueKind]++;
+  }
+}
+
 void CodeGenPGO::loadRegionCounts(llvm::IndexedInstrProfReader *PGOReader,
                                   bool IsInMainFile) {
   CGM.getPGOStats().addVisited(IsInMainFile);
   RegionCounts.clear();
-  if (std::error_code EC =
-          PGOReader->getFunctionCounts(FuncName, FunctionHash, RegionCounts)) {
-    if (EC == llvm::instrprof_error::unknown_function)
+  llvm::Expected<llvm::InstrProfRecord> RecordExpected =
+      PGOReader->getInstrProfRecord(FuncName, FunctionHash);
+  if (auto E = RecordExpected.takeError()) {
+    auto IPE = llvm::InstrProfError::take(std::move(E));
+    if (IPE == llvm::instrprof_error::unknown_function)
       CGM.getPGOStats().addMissing(IsInMainFile);
-    else if (EC == llvm::instrprof_error::hash_mismatch)
+    else if (IPE == llvm::instrprof_error::hash_mismatch)
       CGM.getPGOStats().addMismatched(IsInMainFile);
-    else if (EC == llvm::instrprof_error::malformed)
+    else if (IPE == llvm::instrprof_error::malformed)
       // TODO: Consider a more specific warning for this case.
       CGM.getPGOStats().addMismatched(IsInMainFile);
-    RegionCounts.clear();
+    return;
   }
+  ProfRecord =
+      llvm::make_unique<llvm::InstrProfRecord>(std::move(RecordExpected.get()));
+  RegionCounts = ProfRecord->Counts;
 }
 
 /// \brief Calculate what to divide by to scale weights.
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenPGO.h b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenPGO.h
index 6bf29ecaa7c4..d03f23535bb9 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenPGO.h
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenPGO.h
@@ -19,7 +19,9 @@
 #include "CodeGenTypes.h"
 #include "clang/Frontend/CodeGenOptions.h"
 #include "llvm/ADT/StringMap.h"
+#include "llvm/ProfileData/InstrProfReader.h"
 #include "llvm/Support/MemoryBuffer.h"
+#include <array>
 #include <memory>
 
 namespace clang {
@@ -32,10 +34,12 @@ private:
   std::string FuncName;
   llvm::GlobalVariable *FuncNameVar;
 
+  std::array <unsigned, llvm::IPVK_Last + 1> NumValueSites;
   unsigned NumRegionCounters;
   uint64_t FunctionHash;
   std::unique_ptr<llvm::DenseMap<const Stmt *, unsigned>> RegionCounterMap;
   std::unique_ptr<llvm::DenseMap<const Stmt *, uint64_t>> StmtCountMap;
+  std::unique_ptr<llvm::InstrProfRecord> ProfRecord;
   std::vector<uint64_t> RegionCounts;
   uint64_t CurrentRegionCount;
   /// \brief A flag that is set to true when this function doesn't need
@@ -44,8 +48,8 @@ private:
 
 public:
   CodeGenPGO(CodeGenModule &CGM)
-      : CGM(CGM), NumRegionCounters(0), FunctionHash(0), CurrentRegionCount(0),
-        SkipCoverageMapping(false) {}
+      : CGM(CGM), NumValueSites({{0}}), NumRegionCounters(0),
+        FunctionHash(0), CurrentRegionCount(0), SkipCoverageMapping(false) {}
 
   /// Whether or not we have PGO region data for the current function. This is
   /// false both when we have no data at all and when our data has been
@@ -87,6 +91,9 @@ public:
   /// for an unused declaration.
   void emitEmptyCounterMapping(const Decl *D, StringRef FuncName,
                                llvm::GlobalValue::LinkageTypes Linkage);
+  // Insert instrumentation or attach profile metadata at value sites
+  void valueProfile(CGBuilderTy &Builder, uint32_t ValueKind,
+                    llvm::Instruction *ValueSite, llvm::Value *ValuePtr);
 private:
   void setFuncName(llvm::Function *Fn);
   void setFuncName(StringRef Name, llvm::GlobalValue::LinkageTypes Linkage);
@@ -96,6 +103,7 @@ private:
                                llvm::Function *Fn);
   void loadRegionCounts(llvm::IndexedInstrProfReader *PGOReader,
                         bool IsInMainFile);
+  bool skipRegionMappingForDecl(const Decl *D);
   void emitCounterRegionMapping(const Decl *D);
 
 public:
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenTBAA.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenTBAA.cpp
index c3c925cde2fd..04224e726797 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenTBAA.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenTBAA.cpp
@@ -44,8 +44,12 @@ llvm::MDNode *CodeGenTBAA::getRoot() {
   // if our LLVM IR is linked with LLVM IR from a different front-end
   // (or a different version of this front-end), their TBAA trees will
   // remain distinct, and the optimizer will treat them conservatively.
-  if (!Root)
-    Root = MDHelper.createTBAARoot("Simple C/C++ TBAA");
+  if (!Root) {
+    if (Features.CPlusPlus)
+      Root = MDHelper.createTBAARoot("Simple C++ TBAA");
+    else
+      Root = MDHelper.createTBAARoot("Simple C/C++ TBAA");
+  }
 
   return Root;
 }
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenTBAA.h b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenTBAA.h
index 632caddce980..ddb063d9e88a 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenTBAA.h
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenTBAA.h
@@ -15,14 +15,11 @@
 #ifndef LLVM_CLANG_LIB_CODEGEN_CODEGENTBAA_H
 #define LLVM_CLANG_LIB_CODEGEN_CODEGENTBAA_H
 
+#include "clang/AST/Type.h"
 #include "clang/Basic/LLVM.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/IR/MDBuilder.h"
-
-namespace llvm {
-  class LLVMContext;
-  class MDNode;
-}
+#include "llvm/IR/Metadata.h"
 
 namespace clang {
   class ASTContext;
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenTypes.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenTypes.cpp
index 09d9bf17b3bf..ebe55c70d817 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenTypes.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenTypes.cpp
@@ -272,6 +272,17 @@ void CodeGenTypes::UpdateCompletedType(const TagDecl *TD) {
     DI->completeType(RD);
 }
 
+void CodeGenTypes::RefreshTypeCacheForClass(const CXXRecordDecl *RD) {
+  QualType T = Context.getRecordType(RD);
+  T = Context.getCanonicalType(T);
+
+  const Type *Ty = T.getTypePtr();
+  if (RecordsWithOpaqueMemberPointers.count(Ty)) {
+    TypeCache.clear();
+    RecordsWithOpaqueMemberPointers.clear();
+  }
+}
+
 static llvm::Type *getTypeForFormat(llvm::LLVMContext &VMContext,
                                     const llvm::fltSemantics &format,
                                     bool UseNativeHalf = false) {
@@ -438,6 +449,7 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) {
     case BuiltinType::Float:
     case BuiltinType::Double:
     case BuiltinType::LongDouble:
+    case BuiltinType::Float128:
       ResultType = getTypeForFormat(getLLVMContext(),
                                     Context.getFloatTypeSemantics(T),
                                     /* UseNativeHalf = */ false);
@@ -453,18 +465,9 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) {
       ResultType = llvm::IntegerType::get(getLLVMContext(), 128);
       break;
 
-    case BuiltinType::OCLImage1d:
-    case BuiltinType::OCLImage1dArray:
-    case BuiltinType::OCLImage1dBuffer:
-    case BuiltinType::OCLImage2d:
-    case BuiltinType::OCLImage2dArray:
-    case BuiltinType::OCLImage2dDepth:
-    case BuiltinType::OCLImage2dArrayDepth:
-    case BuiltinType::OCLImage2dMSAA:
-    case BuiltinType::OCLImage2dArrayMSAA:
-    case BuiltinType::OCLImage2dMSAADepth:
-    case BuiltinType::OCLImage2dArrayMSAADepth:
-    case BuiltinType::OCLImage3d:
+#define IMAGE_TYPE(ImgType, Id, SingletonId, Access, Suffix) \
+    case BuiltinType::Id:
+#include "clang/Basic/OpenCLImageTypes.def"
     case BuiltinType::OCLSampler:
     case BuiltinType::OCLEvent:
     case BuiltinType::OCLClkEvent:
@@ -603,10 +606,13 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) {
   }
 
   case Type::MemberPointer: {
-    if (!getCXXABI().isMemberPointerConvertible(cast<MemberPointerType>(Ty)))
-      return llvm::StructType::create(getLLVMContext());
-    ResultType = 
-      getCXXABI().ConvertMemberPointerType(cast<MemberPointerType>(Ty));
+    auto *MPTy = cast<MemberPointerType>(Ty);
+    if (!getCXXABI().isMemberPointerConvertible(MPTy)) {
+      RecordsWithOpaqueMemberPointers.insert(MPTy->getClass());
+      ResultType = llvm::StructType::create(getLLVMContext());
+    } else {
+      ResultType = getCXXABI().ConvertMemberPointerType(MPTy);
+    }
     break;
   }
 
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenTypes.h b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenTypes.h
index a96f23c44894..5796ab8fe5aa 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenTypes.h
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenTypes.h
@@ -31,7 +31,6 @@ class StructType;
 }
 
 namespace clang {
-class ABIInfo;
 class ASTContext;
 template <typename> class CanQual;
 class CXXConstructorDecl;
@@ -51,6 +50,7 @@ class Type;
 typedef CanQual<Type> CanQualType;
 
 namespace CodeGen {
+class ABIInfo;
 class CGCXXABI;
 class CGRecordLayout;
 class CodeGenModule;
@@ -162,6 +162,10 @@ class CodeGenTypes {
   /// corresponding llvm::Type.
   llvm::DenseMap<const Type *, llvm::Type *> TypeCache;
 
+  llvm::SmallSet<const Type *, 8> RecordsWithOpaqueMemberPointers;
+
+  unsigned ClangCallConvToLLVMCallConv(CallingConv CC);
+
 public:
   CodeGenTypes(CodeGenModule &cgm);
   ~CodeGenTypes();
@@ -203,6 +207,11 @@ public:
   bool isFuncTypeConvertible(const FunctionType *FT);
   bool isFuncParamTypeConvertible(QualType Ty);
 
+  /// Determine if a C++ inheriting constructor should have parameters matching
+  /// those of its inherited constructor.
+  bool inheritingCtorHasParams(const InheritedConstructor &Inherited,
+                               CXXCtorType Type);
+
   /// GetFunctionTypeForVTable - Get the LLVM function type for use in a vtable,
   /// given a CXXMethodDecl. If the method to has an incomplete return type,
   /// and/or incomplete argument types, this will return the opaque type.
@@ -214,9 +223,9 @@ public:
   /// replace the 'opaque' type we previously made for it if applicable.
   void UpdateCompletedType(const TagDecl *TD);
 
-  /// getNullaryFunctionInfo - Get the function info for a void()
-  /// function with standard CC.
-  const CGFunctionInfo &arrangeNullaryFunction();
+  /// \brief Remove stale types from the type cache when an inheritance model
+  /// gets assigned to a class.
+  void RefreshTypeCacheForClass(const CXXRecordDecl *RD);
 
   // The arrangement methods are split into three families:
   //   - those meant to drive the signature and prologue/epilogue
@@ -239,16 +248,55 @@ public:
   //   this for compatibility reasons.
 
   const CGFunctionInfo &arrangeGlobalDeclaration(GlobalDecl GD);
+
+  /// Given a function info for a declaration, return the function info
+  /// for a call with the given arguments.
+  ///
+  /// Often this will be able to simply return the declaration info.
+  const CGFunctionInfo &arrangeCall(const CGFunctionInfo &declFI,
+                                    const CallArgList &args);
+
+  /// Free functions are functions that are compatible with an ordinary
+  /// C function pointer type.
   const CGFunctionInfo &arrangeFunctionDeclaration(const FunctionDecl *FD);
+  const CGFunctionInfo &arrangeFreeFunctionCall(const CallArgList &Args,
+                                                const FunctionType *Ty,
+                                                bool ChainCall);
+  const CGFunctionInfo &arrangeFreeFunctionType(CanQual<FunctionProtoType> Ty,
+                                                const FunctionDecl *FD);
+  const CGFunctionInfo &arrangeFreeFunctionType(CanQual<FunctionNoProtoType> Ty);
+
+  /// A nullary function is a freestanding function of type 'void ()'.
+  /// This method works for both calls and declarations.
+  const CGFunctionInfo &arrangeNullaryFunction();
+
+  /// A builtin function is a freestanding function using the default
+  /// C conventions.
+  const CGFunctionInfo &
+  arrangeBuiltinFunctionDeclaration(QualType resultType,
+                                    const FunctionArgList &args);
   const CGFunctionInfo &
-  arrangeFreeFunctionDeclaration(QualType ResTy, const FunctionArgList &Args,
-                                 const FunctionType::ExtInfo &Info,
-                                 bool isVariadic);
+  arrangeBuiltinFunctionDeclaration(CanQualType resultType,
+                                    ArrayRef<CanQualType> argTypes);
+  const CGFunctionInfo &arrangeBuiltinFunctionCall(QualType resultType,
+                                                   const CallArgList &args);
 
+  /// Objective-C methods are C functions with some implicit parameters.
   const CGFunctionInfo &arrangeObjCMethodDeclaration(const ObjCMethodDecl *MD);
   const CGFunctionInfo &arrangeObjCMessageSendSignature(const ObjCMethodDecl *MD,
                                                         QualType receiverType);
+  const CGFunctionInfo &arrangeUnprototypedObjCMessageSend(
+                                                     QualType returnType,
+                                                     const CallArgList &args);
+
+  /// Block invocation functions are C functions with an implicit parameter.
+  const CGFunctionInfo &arrangeBlockFunctionDeclaration(
+                                                 const FunctionProtoType *type,
+                                                 const FunctionArgList &args);
+  const CGFunctionInfo &arrangeBlockFunctionCall(const CallArgList &args,
+                                                 const FunctionType *type);
 
+  /// C++ methods have some special rules and also have implicit parameters.
   const CGFunctionInfo &arrangeCXXMethodDeclaration(const CXXMethodDecl *MD);
   const CGFunctionInfo &arrangeCXXStructorDeclaration(const CXXMethodDecl *MD,
                                                       StructorType Type);
@@ -256,15 +304,6 @@ public:
                                                   const CXXConstructorDecl *D,
                                                   CXXCtorType CtorKind,
                                                   unsigned ExtraArgs);
-  const CGFunctionInfo &arrangeFreeFunctionCall(const CallArgList &Args,
-                                                const FunctionType *Ty,
-                                                bool ChainCall);
-  const CGFunctionInfo &arrangeFreeFunctionCall(QualType ResTy,
-                                                const CallArgList &args,
-                                                FunctionType::ExtInfo info,
-                                                RequiredArgs required);
-  const CGFunctionInfo &arrangeBlockFunctionCall(const CallArgList &args,
-                                                 const FunctionType *type);
 
   const CGFunctionInfo &arrangeCXXMethodCall(const CallArgList &args,
                                              const FunctionProtoType *type,
@@ -272,9 +311,6 @@ public:
   const CGFunctionInfo &arrangeMSMemberPointerThunk(const CXXMethodDecl *MD);
   const CGFunctionInfo &arrangeMSCtorClosure(const CXXConstructorDecl *CD,
                                                  CXXCtorType CT);
-  const CGFunctionInfo &arrangeFreeFunctionType(CanQual<FunctionProtoType> Ty,
-                                                const FunctionDecl *FD);
-  const CGFunctionInfo &arrangeFreeFunctionType(CanQual<FunctionNoProtoType> Ty);
   const CGFunctionInfo &arrangeCXXMethodType(const CXXRecordDecl *RD,
                                              const FunctionProtoType *FTP,
                                              const CXXMethodDecl *MD);
@@ -290,6 +326,7 @@ public:
                                                 bool chainCall,
                                                 ArrayRef<CanQualType> argTypes,
                                                 FunctionType::ExtInfo info,
+                    ArrayRef<FunctionProtoType::ExtParameterInfo> paramInfos,
                                                 RequiredArgs args);
 
   /// \brief Compute a new LLVM record layout object for the given record.
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CoverageMappingGen.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CoverageMappingGen.cpp
index 03e22cd398aa..b011a0f319e3 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CoverageMappingGen.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CoverageMappingGen.cpp
@@ -15,10 +15,12 @@
 #include "CodeGenFunction.h"
 #include "clang/AST/StmtVisitor.h"
 #include "clang/Lex/Lexer.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/Optional.h"
-#include "llvm/ProfileData/CoverageMapping.h"
-#include "llvm/ProfileData/CoverageMappingReader.h"
-#include "llvm/ProfileData/CoverageMappingWriter.h"
+#include "llvm/ProfileData/Coverage/CoverageMapping.h"
+#include "llvm/ProfileData/Coverage/CoverageMappingReader.h"
+#include "llvm/ProfileData/Coverage/CoverageMappingWriter.h"
 #include "llvm/ProfileData/InstrProfReader.h"
 #include "llvm/Support/FileSystem.h"
 
@@ -128,6 +130,16 @@ public:
     return strcmp(SM.getBufferName(SM.getSpellingLoc(Loc)), "<built-in>") == 0;
   }
 
+  /// \brief Check whether \c Loc is included or expanded from \c Parent.
+  bool isNestedIn(SourceLocation Loc, FileID Parent) {
+    do {
+      Loc = getIncludeOrExpansionLoc(Loc);
+      if (Loc.isInvalid())
+        return false;
+    } while (!SM.isInFileID(Loc, Parent));
+    return true;
+  }
+
   /// \brief Get the start of \c S ignoring macro arguments and builtin macros.
   SourceLocation getStart(const Stmt *S) {
     SourceLocation Loc = S->getLocStart();
@@ -152,14 +164,17 @@ public:
   void gatherFileIDs(SmallVectorImpl<unsigned> &Mapping) {
     FileIDMapping.clear();
 
-    SmallVector<FileID, 8> Visited;
+    llvm::SmallSet<FileID, 8> Visited;
     SmallVector<std::pair<SourceLocation, unsigned>, 8> FileLocs;
     for (const auto &Region : SourceRegions) {
       SourceLocation Loc = Region.getStartLoc();
       FileID File = SM.getFileID(Loc);
-      if (std::find(Visited.begin(), Visited.end(), File) != Visited.end())
+      if (!Visited.insert(File).second)
+        continue;
+
+      // Do not map FileID's associated with system headers.
+      if (SM.isInSystemHeader(SM.getSpellingLoc(Loc)))
         continue;
-      Visited.push_back(File);
 
       unsigned Depth = 0;
       for (SourceLocation Parent = getIncludeOrExpansionLoc(Loc);
@@ -191,12 +206,6 @@ public:
     return None;
   }
 
-  /// \brief Return true if the given clang's file id has a corresponding
-  /// coverage file id.
-  bool hasExistingCoverageFileID(FileID File) const {
-    return FileIDMapping.count(File);
-  }
-
   /// \brief Gather all the regions that were skipped by the preprocessor
   /// using the constructs like #if.
   void gatherSkippedRegions() {
@@ -246,6 +255,10 @@ public:
       SourceLocation LocStart = Region.getStartLoc();
       assert(SM.getFileID(LocStart).isValid() && "region in invalid file");
 
+      // Ignore regions from system headers.
+      if (SM.isInSystemHeader(SM.getSpellingLoc(LocStart)))
+        continue;
+
       auto CovFileID = getCoverageFileID(LocStart);
       // Ignore regions that don't have a file, such as builtin macros.
       if (!CovFileID)
@@ -309,7 +322,27 @@ struct EmptyCoverageMappingBuilder : public CoverageMappingBuilder {
     if (!D->hasBody())
       return;
     auto Body = D->getBody();
-    SourceRegions.emplace_back(Counter(), getStart(Body), getEnd(Body));
+    SourceLocation Start = getStart(Body);
+    SourceLocation End = getEnd(Body);
+    if (!SM.isWrittenInSameFile(Start, End)) {
+      // Walk up to find the common ancestor.
+      // Correct the locations accordingly.
+      FileID StartFileID = SM.getFileID(Start);
+      FileID EndFileID = SM.getFileID(End);
+      while (StartFileID != EndFileID && !isNestedIn(End, StartFileID)) {
+        Start = getIncludeOrExpansionLoc(Start);
+        assert(Start.isValid() &&
+               "Declaration start location not nested within a known region");
+        StartFileID = SM.getFileID(Start);
+      }
+      while (StartFileID != EndFileID) {
+        End = getPreciseTokenLocEnd(getIncludeOrExpansionLoc(End));
+        assert(End.isValid() &&
+               "Declaration end location not nested within a known region");
+        EndFileID = SM.getFileID(End);
+      }
+    }
+    SourceRegions.emplace_back(Counter(), Start, End);
   }
 
   /// \brief Write the mapping data to the output stream
@@ -318,6 +351,9 @@ struct EmptyCoverageMappingBuilder : public CoverageMappingBuilder {
     gatherFileIDs(FileIDMapping);
     emitSourceRegions();
 
+    if (MappingRegions.empty())
+      return;
+
     CoverageMappingWriter Writer(FileIDMapping, None, MappingRegions);
     Writer.write(OS);
   }
@@ -356,10 +392,6 @@ struct CounterCoverageMappingBuilder
     return addCounters(addCounters(C1, C2), C3);
   }
 
-  Counter addCounters(Counter C1, Counter C2, Counter C3, Counter C4) {
-    return addCounters(addCounters(C1, C2, C3), C4);
-  }
-
   /// \brief Return the region counter for the given statement.
   ///
   /// This should only be called on statements that have a dedicated counter.
@@ -433,31 +465,43 @@ struct CounterCoverageMappingBuilder
     Visit(S);
     Counter ExitCount = getRegion().getCounter();
     popRegions(Index);
+
+    // The statement may be spanned by an expansion. Make sure we handle a file
+    // exit out of this expansion before moving to the next statement.
+    if (SM.isBeforeInTranslationUnit(getStart(S), S->getLocStart()))
+      MostRecentLocation = getEnd(S);
+
     return ExitCount;
   }
 
+  /// \brief Check whether a region with bounds \c StartLoc and \c EndLoc
+  /// is already added to \c SourceRegions.
+  bool isRegionAlreadyAdded(SourceLocation StartLoc, SourceLocation EndLoc) {
+    return SourceRegions.rend() !=
+           std::find_if(SourceRegions.rbegin(), SourceRegions.rend(),
+                        [&](const SourceMappingRegion &Region) {
+                          return Region.getStartLoc() == StartLoc &&
+                                 Region.getEndLoc() == EndLoc;
+                        });
+  }
+
   /// \brief Adjust the most recently visited location to \c EndLoc.
   ///
   /// This should be used after visiting any statements in non-source order.
   void adjustForOutOfOrderTraversal(SourceLocation EndLoc) {
     MostRecentLocation = EndLoc;
-    // Avoid adding duplicate regions if we have a completed region on the top
-    // of the stack and are adjusting to the end of a virtual file.
+    // The code region for a whole macro is created in handleFileExit() when
+    // it detects exiting of the virtual file of that macro. If we visited
+    // statements in non-source order, we might already have such a region
+    // added, for example, if a body of a loop is divided among multiple
+    // macros. Avoid adding duplicate regions in such case.
     if (getRegion().hasEndLoc() &&
-        MostRecentLocation == getEndOfFileOrMacro(MostRecentLocation))
+        MostRecentLocation == getEndOfFileOrMacro(MostRecentLocation) &&
+        isRegionAlreadyAdded(getStartOfFileOrMacro(MostRecentLocation),
+                             MostRecentLocation))
       MostRecentLocation = getIncludeOrExpansionLoc(MostRecentLocation);
   }
 
-  /// \brief Check whether \c Loc is included or expanded from \c Parent.
-  bool isNestedIn(SourceLocation Loc, FileID Parent) {
-    do {
-      Loc = getIncludeOrExpansionLoc(Loc);
-      if (Loc.isInvalid())
-        return false;
-    } while (!SM.isInFileID(Loc, Parent));
-    return true;
-  }
-
   /// \brief Adjust regions and state when \c NewLoc exits a file.
   ///
   /// If moving from our most recently tracked location to \c NewLoc exits any
@@ -563,6 +607,9 @@ struct CounterCoverageMappingBuilder
     emitExpansionRegions();
     gatherSkippedRegions();
 
+    if (MappingRegions.empty())
+      return;
+
     CoverageMappingWriter Writer(VirtualFileMapping, Builder.getExpressions(),
                                  MappingRegions);
     Writer.write(OS);
@@ -579,6 +626,11 @@ struct CounterCoverageMappingBuilder
 
   void VisitDecl(const Decl *D) {
     Stmt *Body = D->getBody();
+
+    // Do not propagate region counts into system headers.
+    if (Body && SM.isInSystemHeader(SM.getSpellingLoc(getStart(Body))))
+      return;
+
     propagateCounts(getRegionCounter(Body), Body);
   }
 
@@ -769,7 +821,9 @@ struct CounterCoverageMappingBuilder
           BreakContinueStack.back().ContinueCount, BC.ContinueCount);
 
     Counter ExitCount = getRegionCounter(S);
-    pushRegion(ExitCount);
+    SourceLocation ExitLoc = getEnd(S);
+    pushRegion(ExitCount, getStart(S), ExitLoc);
+    handleFileExit(ExitLoc);
   }
 
   void VisitSwitchCase(const SwitchCase *S) {
@@ -822,7 +876,12 @@ struct CounterCoverageMappingBuilder
 
   void VisitCXXTryStmt(const CXXTryStmt *S) {
     extendRegion(S);
-    Visit(S->getTryBlock());
+    // Handle macros that generate the "try" but not the rest.
+    extendRegion(S->getTryBlock());
+
+    Counter ParentCount = getRegion().getCounter();
+    propagateCounts(ParentCount, S->getTryBlock());
+
     for (unsigned I = 0, E = S->getNumHandlers(); I < E; ++I)
       Visit(S->getHandler(I));
 
@@ -911,7 +970,7 @@ static void dump(llvm::raw_ostream &OS, StringRef FunctionName,
 
 void CoverageMappingModuleGen::addFunctionMappingRecord(
     llvm::GlobalVariable *NamePtr, StringRef NameValue, uint64_t FuncHash,
-    const std::string &CoverageMapping, bool isUsed) {
+    const std::string &CoverageMapping, bool IsUsed) {
   llvm::LLVMContext &Ctx = CGM.getLLVMContext();
   if (!FunctionRecordTy) {
 #define COVMAP_FUNC_RECORD(Type, LLVMType, Name, Init) LLVMType,
@@ -929,10 +988,10 @@ void CoverageMappingModuleGen::addFunctionMappingRecord(
   };
   FunctionRecords.push_back(llvm::ConstantStruct::get(
       FunctionRecordTy, makeArrayRef(FunctionRecordVals)));
-  if (!isUsed)
+  if (!IsUsed)
     FunctionNames.push_back(
         llvm::ConstantExpr::getBitCast(NamePtr, llvm::Type::getInt8PtrTy(Ctx)));
-  CoverageMappings += CoverageMapping;
+  CoverageMappings.push_back(CoverageMapping);
 
   if (CGM.getCodeGenOpts().DumpCoverageMapping) {
     // Dump the coverage mapping data for this function by decoding the
@@ -978,8 +1037,10 @@ void CoverageMappingModuleGen::emit() {
   std::string FilenamesAndCoverageMappings;
   llvm::raw_string_ostream OS(FilenamesAndCoverageMappings);
   CoverageFilenamesSectionWriter(FilenameRefs).write(OS);
-  OS << CoverageMappings;
-  size_t CoverageMappingSize = CoverageMappings.size();
+  std::string RawCoverageMappings =
+      llvm::join(CoverageMappings.begin(), CoverageMappings.end(), "");
+  OS << RawCoverageMappings;
+  size_t CoverageMappingSize = RawCoverageMappings.size();
   size_t FilenamesSize = OS.str().size() - CoverageMappingSize;
   // Append extra zeroes if necessary to ensure that the size of the filenames
   // and coverage mappings is a multiple of 8.
@@ -1035,7 +1096,7 @@ void CoverageMappingModuleGen::emit() {
     // to pass the list of names referenced to codegen.
     new llvm::GlobalVariable(CGM.getModule(), NamesArrTy, true,
                              llvm::GlobalValue::InternalLinkage, NamesArrVal,
-                             llvm::getCoverageNamesVarName());
+                             llvm::getCoverageUnusedNamesVarName());
   }
 }
 
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CoverageMappingGen.h b/contrib/llvm/tools/clang/lib/CodeGen/CoverageMappingGen.h
index 9ae2bcffe4ca..c202fe899343 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CoverageMappingGen.h
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CoverageMappingGen.h
@@ -56,7 +56,7 @@ class CoverageMappingModuleGen {
   std::vector<llvm::Constant *> FunctionRecords;
   std::vector<llvm::Constant *> FunctionNames;
   llvm::StructType *FunctionRecordTy;
-  std::string CoverageMappings;
+  std::vector<std::string> CoverageMappings;
 
 public:
   CoverageMappingModuleGen(CodeGenModule &CGM, CoverageSourceInfo &SourceInfo)
@@ -72,7 +72,7 @@ public:
                                 StringRef FunctionNameValue,
                                 uint64_t FunctionHash,
                                 const std::string &CoverageMapping,
-                                bool isUsed = true);
+                                bool IsUsed = true);
 
   /// \brief Emit the coverage mapping data for a translation unit.
   void emit();
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/EHScopeStack.h b/contrib/llvm/tools/clang/lib/CodeGen/EHScopeStack.h
index 85cd1543e5bf..4717a667d2d2 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/EHScopeStack.h
+++ b/contrib/llvm/tools/clang/lib/CodeGen/EHScopeStack.h
@@ -89,7 +89,10 @@ enum CleanupKind : unsigned {
   InactiveCleanup = 0x4,
   InactiveEHCleanup = EHCleanup | InactiveCleanup,
   InactiveNormalCleanup = NormalCleanup | InactiveCleanup,
-  InactiveNormalAndEHCleanup = NormalAndEHCleanup | InactiveCleanup
+  InactiveNormalAndEHCleanup = NormalAndEHCleanup | InactiveCleanup,
+
+  LifetimeMarker = 0x8,
+  NormalEHLifetimeMarker = LifetimeMarker | NormalAndEHCleanup,
 };
 
 /// A stack of scopes which respond to exceptions, including cleanups
@@ -341,9 +344,7 @@ public:
   /// Determines whether the exception-scopes stack is empty.
   bool empty() const { return StartOfData == EndOfBuffer; }
 
-  bool requiresLandingPad() const {
-    return InnermostEHScope != stable_end();
-  }
+  bool requiresLandingPad() const;
 
   /// Determines whether there are any normal cleanups on the stack.
   bool hasNormalCleanups() const {
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/ItaniumCXXABI.cpp b/contrib/llvm/tools/clang/lib/CodeGen/ItaniumCXXABI.cpp
index e02c8dc3a86a..6051594fb001 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/ItaniumCXXABI.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/ItaniumCXXABI.cpp
@@ -154,17 +154,9 @@ public:
                                Address Ptr, QualType ElementType,
                                const CXXDestructorDecl *Dtor) override;
 
-  /// Itanium says that an _Unwind_Exception has to be "double-word"
-  /// aligned (and thus the end of it is also so-aligned), meaning 16
-  /// bytes.  Of course, that was written for the actual Itanium,
-  /// which is a 64-bit platform.  Classically, the ABI doesn't really
-  /// specify the alignment on other platforms, but in practice
-  /// libUnwind declares the struct with __attribute__((aligned)), so
-  /// we assume that alignment here.  (It's generally 16 bytes, but
-  /// some targets overwrite it.)
   CharUnits getAlignmentOfExnObject() {
-    auto align = CGM.getContext().getTargetDefaultAlignForAttributeAligned();
-    return CGM.getContext().toCharUnitsFromBits(align);
+    unsigned Align = CGM.getContext().getTargetInfo().getExnObjectAlignment();
+    return CGM.getContext().toCharUnitsFromBits(Align);
   }
 
   void emitRethrow(CodeGenFunction &CGF, bool isNoReturn) override;
@@ -451,6 +443,7 @@ private:
            (isa<CXXDestructorDecl>(GD.getDecl()) &&
             GD.getDtorType() != Dtor_Deleting);
   }
+  bool canCallMismatchedFunctionType() const override { return false; }
 };
 }
 
@@ -1496,7 +1489,8 @@ void ItaniumCXXABI::emitVTableDefinitions(CodeGenVTables &CGVT,
       DC->getParent()->isTranslationUnit())
     EmitFundamentalRTTIDescriptors();
 
-  CGM.EmitVTableBitSetEntries(VTable, VTLayout);
+  if (!VTable->isDeclarationForLinker())
+    CGM.EmitVTableTypeMetadata(VTable, VTLayout);
 }
 
 bool ItaniumCXXABI::isVirtualOffsetNeededForVTableField(
@@ -1528,8 +1522,8 @@ ItaniumCXXABI::getVTableAddressPoint(BaseSubobject Base,
                               .getVTableLayout(VTableClass)
                               .getAddressPoint(Base);
   llvm::Value *Indices[] = {
-    llvm::ConstantInt::get(CGM.Int64Ty, 0),
-    llvm::ConstantInt::get(CGM.Int64Ty, AddressPoint)
+    llvm::ConstantInt::get(CGM.Int32Ty, 0),
+    llvm::ConstantInt::get(CGM.Int32Ty, AddressPoint)
   };
 
   return llvm::ConstantExpr::getInBoundsGetElementPtr(VTable->getValueType(),
@@ -1568,7 +1562,7 @@ llvm::GlobalVariable *ItaniumCXXABI::getAddrOfVTable(const CXXRecordDecl *RD,
   if (VTable)
     return VTable;
 
-  // Queue up this v-table for possible deferred emission.
+  // Queue up this vtable for possible deferred emission.
   CGM.addDeferredVTable(RD);
 
   SmallString<256> Name;
@@ -1581,7 +1575,7 @@ llvm::GlobalVariable *ItaniumCXXABI::getAddrOfVTable(const CXXRecordDecl *RD,
 
   VTable = CGM.CreateOrReplaceCXXRuntimeVariable(
       Name, ArrayType, llvm::GlobalValue::ExternalLinkage);
-  VTable->setUnnamedAddr(true);
+  VTable->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
 
   if (RD->hasAttr<DLLImportAttr>())
     VTable->setDLLStorageClass(llvm::GlobalValue::DLLImportStorageClass);
@@ -1601,14 +1595,18 @@ llvm::Value *ItaniumCXXABI::getVirtualFunctionPointer(CodeGenFunction &CGF,
   auto *MethodDecl = cast<CXXMethodDecl>(GD.getDecl());
   llvm::Value *VTable = CGF.GetVTablePtr(This, Ty, MethodDecl->getParent());
 
-  if (CGF.SanOpts.has(SanitizerKind::CFIVCall))
-    CGF.EmitVTablePtrCheckForCall(MethodDecl, VTable,
-                                  CodeGenFunction::CFITCK_VCall, Loc);
-
   uint64_t VTableIndex = CGM.getItaniumVTableContext().getMethodVTableIndex(GD);
-  llvm::Value *VFuncPtr =
-      CGF.Builder.CreateConstInBoundsGEP1_64(VTable, VTableIndex, "vfn");
-  return CGF.Builder.CreateAlignedLoad(VFuncPtr, CGF.getPointerAlign());
+  if (CGF.ShouldEmitVTableTypeCheckedLoad(MethodDecl->getParent())) {
+    return CGF.EmitVTableTypeCheckedLoad(
+        MethodDecl->getParent(), VTable,
+        VTableIndex * CGM.getContext().getTargetInfo().getPointerWidth(0) / 8);
+  } else {
+    CGF.EmitTypeMetadataCodeForVCall(MethodDecl->getParent(), VTable, Loc);
+
+    llvm::Value *VFuncPtr =
+        CGF.Builder.CreateConstInBoundsGEP1_64(VTable, VTableIndex, "vfn");
+    return CGF.Builder.CreateAlignedLoad(VFuncPtr, CGF.getPointerAlign());
+  }
 }
 
 llvm::Value *ItaniumCXXABI::EmitVirtualDestructorCall(
@@ -1913,10 +1911,18 @@ void ItaniumCXXABI::EmitGuardedInit(CodeGenFunction &CGF,
                                     bool shouldPerformInit) {
   CGBuilderTy &Builder = CGF.Builder;
 
-  // We only need to use thread-safe statics for local non-TLS variables;
-  // global initialization is always single-threaded.
+  // Inline variables that weren't instantiated from variable templates have
+  // partially-ordered initialization within their translation unit.
+  bool NonTemplateInline =
+      D.isInline() &&
+      !isTemplateInstantiation(D.getTemplateSpecializationKind());
+
+  // We only need to use thread-safe statics for local non-TLS variables and
+  // inline variables; other global initialization is always single-threaded
+  // or (through lazy dynamic loading in multiple threads) unsequenced.
   bool threadsafe = getContext().getLangOpts().ThreadsafeStatics &&
-                    D.isLocalVarDecl() && !D.getTLSKind();
+                    (D.isLocalVarDecl() || NonTemplateInline) &&
+                    !D.getTLSKind();
 
   // If we have a global variable with internal linkage and thread-safe statics
   // are disabled, we can just let the guard variable be of type i8.
@@ -1970,7 +1976,11 @@ void ItaniumCXXABI::EmitGuardedInit(CodeGenFunction &CGF,
     if (!D.isLocalVarDecl() && C &&
         CGM.getTarget().getTriple().isOSBinFormatELF()) {
       guard->setComdat(C);
-      CGF.CurFn->setComdat(C);
+      // An inline variable's guard function is run from the per-TU
+      // initialization function, not via a dedicated global ctor function, so
+      // we can't put it in a comdat.
+      if (!NonTemplateInline)
+        CGF.CurFn->setComdat(C);
     } else if (CGM.supportsCOMDAT() && guard->isWeakForLinker()) {
       guard->setComdat(CGM.getModule().getOrInsertComdat(guard->getName()));
     }
@@ -2008,7 +2018,7 @@ void ItaniumCXXABI::EmitGuardedInit(CodeGenFunction &CGF,
   //
   // In LLVM, we do this by marking the load Acquire.
   if (threadsafe)
-    LI->setAtomic(llvm::Acquire);
+    LI->setAtomic(llvm::AtomicOrdering::Acquire);
 
   // For ARM, we should only check the first bit, rather than the entire byte:
   //
@@ -2178,17 +2188,28 @@ ItaniumCXXABI::getOrCreateThreadLocalWrapper(const VarDecl *VD,
     getMangleContext().mangleItaniumThreadLocalWrapper(VD, Out);
   }
 
+  // FIXME: If VD is a definition, we should regenerate the function attributes
+  // before returning.
   if (llvm::Value *V = CGM.getModule().getNamedValue(WrapperName))
     return cast<llvm::Function>(V);
 
-  llvm::Type *RetTy = Val->getType();
-  if (VD->getType()->isReferenceType())
-    RetTy = RetTy->getPointerElementType();
+  QualType RetQT = VD->getType();
+  if (RetQT->isReferenceType())
+    RetQT = RetQT.getNonReferenceType();
+
+  const CGFunctionInfo &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
+      getContext().getPointerType(RetQT), FunctionArgList());
 
-  llvm::FunctionType *FnTy = llvm::FunctionType::get(RetTy, false);
+  llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FI);
   llvm::Function *Wrapper =
       llvm::Function::Create(FnTy, getThreadLocalWrapperLinkage(VD, CGM),
                              WrapperName.str(), &CGM.getModule());
+
+  CGM.SetLLVMFunctionAttributes(nullptr, FI, Wrapper);
+
+  if (VD->hasDefinition())
+    CGM.SetLLVMFunctionAttributesForDefinition(nullptr, Wrapper);
+
   // Always resolve references to the wrapper at link time.
   if (!Wrapper->hasLocalLinkage() && !(isThreadWrapperReplaceable(VD, CGM) &&
       !llvm::GlobalVariable::isLinkOnceLinkage(Wrapper->getLinkage()) &&
@@ -2227,6 +2248,11 @@ void ItaniumCXXABI::EmitThreadLocalInitFuncs(
     CodeGenFunction(CGM)
         .GenerateCXXGlobalInitFunc(InitFunc, CXXThreadLocalInits,
                                    Address(Guard, GuardAlign));
+    // On Darwin platforms, use CXX_FAST_TLS calling convention.
+    if (CGM.getTarget().getTriple().isOSDarwin()) {
+      InitFunc->setCallingConv(llvm::CallingConv::CXX_FAST_TLS);
+      InitFunc->addFnAttr(llvm::Attribute::NoUnwind);
+    }
   }
   for (const VarDecl *VD : CXXThreadLocals) {
     llvm::GlobalVariable *Var =
@@ -2264,6 +2290,8 @@ void ItaniumCXXABI::EmitThreadLocalInitFuncs(
       Init = llvm::Function::Create(
           FnTy, llvm::GlobalVariable::ExternalWeakLinkage, InitFnName.str(),
           &CGM.getModule());
+      const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
+      CGM.SetLLVMFunctionAttributes(nullptr, FI, cast<llvm::Function>(Init));
     }
 
     if (Init)
@@ -2274,8 +2302,11 @@ void ItaniumCXXABI::EmitThreadLocalInitFuncs(
     llvm::BasicBlock *Entry = llvm::BasicBlock::Create(Context, "", Wrapper);
     CGBuilderTy Builder(CGM, Entry);
     if (InitIsInitFunc) {
-      if (Init)
-        Builder.CreateCall(Init);
+      if (Init) {
+        llvm::CallInst *CallVal = Builder.CreateCall(Init);
+        if (isThreadWrapperReplaceable(VD, CGM))
+          CallVal->setCallingConv(llvm::CallingConv::CXX_FAST_TLS);
+      }
     } else {
       // Don't know whether we have an init function. Call it if it exists.
       llvm::Value *Have = Builder.CreateIsNotNull(Init);
@@ -2491,6 +2522,11 @@ static bool TypeInfoIsInStandardLibrary(const BuiltinType *Ty) {
   //   long, unsigned long, long long, unsigned long long, float, double,
   //   long double, char16_t, char32_t, and the IEEE 754r decimal and
   //   half-precision floating point types.
+  //
+  // GCC also emits RTTI for __int128.
+  // FIXME: We do not emit RTTI information for decimal types here.
+
+  // Types added here must also be added to EmitFundamentalRTTIDescriptors.
   switch (Ty->getKind()) {
     case BuiltinType::Void:
     case BuiltinType::NullPtr:
@@ -2513,29 +2549,23 @@ static bool TypeInfoIsInStandardLibrary(const BuiltinType *Ty) {
     case BuiltinType::Float:
     case BuiltinType::Double:
     case BuiltinType::LongDouble:
+    case BuiltinType::Float128:
     case BuiltinType::Char16:
     case BuiltinType::Char32:
     case BuiltinType::Int128:
     case BuiltinType::UInt128:
-    case BuiltinType::OCLImage1d:
-    case BuiltinType::OCLImage1dArray:
-    case BuiltinType::OCLImage1dBuffer:
-    case BuiltinType::OCLImage2d:
-    case BuiltinType::OCLImage2dArray:
-    case BuiltinType::OCLImage2dDepth:
-    case BuiltinType::OCLImage2dArrayDepth:
-    case BuiltinType::OCLImage2dMSAA:
-    case BuiltinType::OCLImage2dArrayMSAA:
-    case BuiltinType::OCLImage2dMSAADepth:
-    case BuiltinType::OCLImage2dArrayMSAADepth:
-    case BuiltinType::OCLImage3d:
+      return true;
+
+#define IMAGE_TYPE(ImgType, Id, SingletonId, Access, Suffix) \
+    case BuiltinType::Id:
+#include "clang/Basic/OpenCLImageTypes.def"
     case BuiltinType::OCLSampler:
     case BuiltinType::OCLEvent:
     case BuiltinType::OCLClkEvent:
     case BuiltinType::OCLQueue:
     case BuiltinType::OCLNDRange:
     case BuiltinType::OCLReserveID:
-      return true;
+      return false;
 
     case BuiltinType::Dependent:
 #define BUILTIN_TYPE(Id, SingletonId)
@@ -2864,7 +2894,7 @@ static llvm::GlobalVariable::LinkageTypes getTypeInfoLinkage(CodeGenModule &CGM,
 
 llvm::Constant *ItaniumRTTIBuilder::BuildTypeInfo(QualType Ty, bool Force) {
   // We want to operate on the canonical type.
-  Ty = CGM.getContext().getCanonicalType(Ty);
+  Ty = Ty.getCanonicalType();
 
   // Check if we've already emitted an RTTI descriptor for this type.
   SmallString<256> Name;
@@ -3327,6 +3357,7 @@ void ItaniumCXXABI::EmitFundamentalRTTIDescriptor(QualType Type) {
 }
 
 void ItaniumCXXABI::EmitFundamentalRTTIDescriptors() {
+  // Types added here must also be added to TypeInfoIsInStandardLibrary.
   QualType FundamentalTypes[] = {
       getContext().VoidTy,             getContext().NullPtrTy,
       getContext().BoolTy,             getContext().WCharTy,
@@ -3335,10 +3366,11 @@ void ItaniumCXXABI::EmitFundamentalRTTIDescriptors() {
       getContext().UnsignedShortTy,    getContext().IntTy,
       getContext().UnsignedIntTy,      getContext().LongTy,
       getContext().UnsignedLongTy,     getContext().LongLongTy,
-      getContext().UnsignedLongLongTy, getContext().HalfTy,
+      getContext().UnsignedLongLongTy, getContext().Int128Ty,
+      getContext().UnsignedInt128Ty,   getContext().HalfTy,
       getContext().FloatTy,            getContext().DoubleTy,
-      getContext().LongDoubleTy,       getContext().Char16Ty,
-      getContext().Char32Ty,
+      getContext().LongDoubleTy,       getContext().Float128Ty,
+      getContext().Char16Ty,           getContext().Char32Ty
   };
   for (const QualType &FundamentalType : FundamentalTypes)
     EmitFundamentalRTTIDescriptor(FundamentalType);
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/MicrosoftCXXABI.cpp b/contrib/llvm/tools/clang/lib/CodeGen/MicrosoftCXXABI.cpp
index 93210d54d4bb..41cd53c2215f 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/MicrosoftCXXABI.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/MicrosoftCXXABI.cpp
@@ -254,8 +254,8 @@ public:
                           CXXDtorType Type, bool ForVirtualBase,
                           bool Delegating, Address This) override;
 
-  void emitVTableBitSetEntries(VPtrInfo *Info, const CXXRecordDecl *RD,
-                               llvm::GlobalVariable *VTable);
+  void emitVTableTypeMetadata(VPtrInfo *Info, const CXXRecordDecl *RD,
+                              llvm::GlobalVariable *VTable);
 
   void emitVTableDefinitions(CodeGenVTables &CGVT,
                              const CXXRecordDecl *RD) override;
@@ -551,7 +551,7 @@ private:
     return  llvm::Constant::getAllOnesValue(CGM.IntTy);
   }
 
-  CharUnits getVirtualFunctionPrologueThisAdjustment(GlobalDecl GD);
+  CharUnits getVirtualFunctionPrologueThisAdjustment(GlobalDecl GD) override;
 
   void
   GetNullMemberPointerFields(const MemberPointerType *MPT,
@@ -942,7 +942,6 @@ MicrosoftCXXABI::performBaseAdjustment(CodeGenFunction &CGF, Address Value,
   llvm::Value *Offset =
     GetVirtualBaseClassOffset(CGF, Value, SrcDecl, PolymorphicBase);
   llvm::Value *Ptr = CGF.Builder.CreateInBoundsGEP(Value.getPointer(), Offset);
-  Offset = CGF.Builder.CreateTrunc(Offset, CGF.Int32Ty);
   CharUnits VBaseAlign =
     CGF.CGM.getVBaseAlignment(Value.getAlignment(), SrcDecl, PolymorphicBase);
   return std::make_pair(Address(Ptr, VBaseAlign), Offset);
@@ -976,8 +975,8 @@ llvm::Value *MicrosoftCXXABI::EmitTypeid(CodeGenFunction &CGF,
                                          QualType SrcRecordTy,
                                          Address ThisPtr,
                                          llvm::Type *StdTypeInfoPtrTy) {
-  llvm::Value *Offset;
-  std::tie(ThisPtr, Offset) = performBaseAdjustment(CGF, ThisPtr, SrcRecordTy);
+  std::tie(ThisPtr, std::ignore) =
+      performBaseAdjustment(CGF, ThisPtr, SrcRecordTy);
   auto Typeid = emitRTtypeidCall(CGF, ThisPtr.getPointer()).getInstruction();
   return CGF.Builder.CreateBitCast(Typeid, StdTypeInfoPtrTy);
 }
@@ -1002,6 +1001,7 @@ llvm::Value *MicrosoftCXXABI::EmitDynamicCastCall(
   llvm::Value *Offset;
   std::tie(This, Offset) = performBaseAdjustment(CGF, This, SrcRecordTy);
   llvm::Value *ThisPtr = This.getPointer();
+  Offset = CGF.Builder.CreateTrunc(Offset, CGF.Int32Ty);
 
   // PVOID __RTDynamicCast(
   //   PVOID inptr,
@@ -1025,8 +1025,7 @@ llvm::Value *
 MicrosoftCXXABI::EmitDynamicCastToVoid(CodeGenFunction &CGF, Address Value,
                                        QualType SrcRecordTy,
                                        QualType DestTy) {
-  llvm::Value *Offset;
-  std::tie(Value, Offset) = performBaseAdjustment(CGF, Value, SrcRecordTy);
+  std::tie(Value, std::ignore) = performBaseAdjustment(CGF, Value, SrcRecordTy);
 
   // PVOID __RTCastToVoid(
   //   PVOID inptr)
@@ -1152,16 +1151,14 @@ void MicrosoftCXXABI::initializeHiddenVirtualInheritanceMembers(
 
     llvm::Value *VBaseOffset =
         GetVirtualBaseClassOffset(CGF, getThisAddress(CGF), RD, I->first);
-    // FIXME: it doesn't look right that we SExt in GetVirtualBaseClassOffset()
-    // just to Trunc back immediately.
-    VBaseOffset = Builder.CreateTruncOrBitCast(VBaseOffset, CGF.Int32Ty);
     uint64_t ConstantVBaseOffset =
         Layout.getVBaseClassOffset(I->first).getQuantity();
 
     // vtorDisp_for_vbase = vbptr[vbase_idx] - offsetof(RD, vbase).
     llvm::Value *VtorDispValue = Builder.CreateSub(
-        VBaseOffset, llvm::ConstantInt::get(CGM.Int32Ty, ConstantVBaseOffset),
+        VBaseOffset, llvm::ConstantInt::get(CGM.PtrDiffTy, ConstantVBaseOffset),
         "vtordisp.value");
+    VtorDispValue = Builder.CreateTruncOrBitCast(VtorDispValue, CGF.Int32Ty);
 
     if (!Int8This)
       Int8This = Builder.CreateBitCast(getThisValue(CGF),
@@ -1467,16 +1464,18 @@ unsigned MicrosoftCXXABI::addImplicitConstructorArgs(
 
   // Add the 'most_derived' argument second if we are variadic or last if not.
   const FunctionProtoType *FPT = D->getType()->castAs<FunctionProtoType>();
-  llvm::Value *MostDerivedArg =
-      llvm::ConstantInt::get(CGM.Int32Ty, Type == Ctor_Complete);
-  RValue RV = RValue::get(MostDerivedArg);
-  if (MostDerivedArg) {
-    if (FPT->isVariadic())
-      Args.insert(Args.begin() + 1,
-                  CallArg(RV, getContext().IntTy, /*needscopy=*/false));
-    else
-      Args.add(RV, getContext().IntTy);
+  llvm::Value *MostDerivedArg;
+  if (Delegating) {
+    MostDerivedArg = getStructorImplicitParamValue(CGF);
+  } else {
+    MostDerivedArg = llvm::ConstantInt::get(CGM.Int32Ty, Type == Ctor_Complete);
   }
+  RValue RV = RValue::get(MostDerivedArg);
+  if (FPT->isVariadic())
+    Args.insert(Args.begin() + 1,
+                CallArg(RV, getContext().IntTy, /*needscopy=*/false));
+  else
+    Args.add(RV, getContext().IntTy);
 
   return 1;  // Added one arg.
 }
@@ -1494,24 +1493,18 @@ void MicrosoftCXXABI::EmitDestructorCall(CodeGenFunction &CGF,
                                                     This, false);
   }
 
-  CGF.EmitCXXStructorCall(DD, Callee, ReturnValueSlot(), This.getPointer(),
-                          /*ImplicitParam=*/nullptr,
-                          /*ImplicitParamTy=*/QualType(), nullptr,
-                          getFromDtorType(Type));
+  CGF.EmitCXXDestructorCall(DD, Callee, This.getPointer(),
+                            /*ImplicitParam=*/nullptr,
+                            /*ImplicitParamTy=*/QualType(), nullptr,
+                            getFromDtorType(Type));
 }
 
-void MicrosoftCXXABI::emitVTableBitSetEntries(VPtrInfo *Info,
-                                              const CXXRecordDecl *RD,
-                                              llvm::GlobalVariable *VTable) {
-  if (!getContext().getLangOpts().Sanitize.has(SanitizerKind::CFIVCall) &&
-      !getContext().getLangOpts().Sanitize.has(SanitizerKind::CFINVCall) &&
-      !getContext().getLangOpts().Sanitize.has(SanitizerKind::CFIDerivedCast) &&
-      !getContext().getLangOpts().Sanitize.has(SanitizerKind::CFIUnrelatedCast))
+void MicrosoftCXXABI::emitVTableTypeMetadata(VPtrInfo *Info,
+                                             const CXXRecordDecl *RD,
+                                             llvm::GlobalVariable *VTable) {
+  if (!CGM.getCodeGenOpts().PrepareForLTO)
     return;
 
-  llvm::NamedMDNode *BitsetsMD =
-      CGM.getModule().getOrInsertNamedMetadata("llvm.bitsets");
-
   // The location of the first virtual function pointer in the virtual table,
   // aka the "address point" on Itanium. This is at offset 0 if RTTI is
   // disabled, or sizeof(void*) if RTTI is enabled.
@@ -1522,15 +1515,13 @@ void MicrosoftCXXABI::emitVTableBitSetEntries(VPtrInfo *Info,
           : CharUnits::Zero();
 
   if (Info->PathToBaseWithVPtr.empty()) {
-    if (!CGM.IsCFIBlacklistedRecord(RD))
-      CGM.CreateVTableBitSetEntry(BitsetsMD, VTable, AddressPoint, RD);
+    CGM.AddVTableTypeMetadata(VTable, AddressPoint, RD);
     return;
   }
 
   // Add a bitset entry for the least derived base belonging to this vftable.
-  if (!CGM.IsCFIBlacklistedRecord(Info->PathToBaseWithVPtr.back()))
-    CGM.CreateVTableBitSetEntry(BitsetsMD, VTable, AddressPoint,
-                                Info->PathToBaseWithVPtr.back());
+  CGM.AddVTableTypeMetadata(VTable, AddressPoint,
+                            Info->PathToBaseWithVPtr.back());
 
   // Add a bitset entry for each derived class that is laid out at the same
   // offset as the least derived base.
@@ -1548,13 +1539,12 @@ void MicrosoftCXXABI::emitVTableBitSetEntries(VPtrInfo *Info,
       Offset = VBI->second.VBaseOffset;
     if (!Offset.isZero())
       return;
-    if (!CGM.IsCFIBlacklistedRecord(DerivedRD))
-      CGM.CreateVTableBitSetEntry(BitsetsMD, VTable, AddressPoint, DerivedRD);
+    CGM.AddVTableTypeMetadata(VTable, AddressPoint, DerivedRD);
   }
 
   // Finally do the same for the most derived class.
-  if (Info->FullOffsetInMDC.isZero() && !CGM.IsCFIBlacklistedRecord(RD))
-    CGM.CreateVTableBitSetEntry(BitsetsMD, VTable, AddressPoint, RD);
+  if (Info->FullOffsetInMDC.isZero())
+    CGM.AddVTableTypeMetadata(VTable, AddressPoint, RD);
 }
 
 void MicrosoftCXXABI::emitVTableDefinitions(CodeGenVTables &CGVT,
@@ -1567,12 +1557,14 @@ void MicrosoftCXXABI::emitVTableDefinitions(CodeGenVTables &CGVT,
     if (VTable->hasInitializer())
       continue;
 
-    llvm::Constant *RTTI = getContext().getLangOpts().RTTIData
-                               ? getMSCompleteObjectLocator(RD, Info)
-                               : nullptr;
-
     const VTableLayout &VTLayout =
       VFTContext.getVFTableLayout(RD, Info->FullOffsetInMDC);
+
+    llvm::Constant *RTTI = nullptr;
+    if (any_of(VTLayout.vtable_components(),
+               [](const VTableComponent &VTC) { return VTC.isRTTIKind(); }))
+      RTTI = getMSCompleteObjectLocator(RD, Info);
+
     llvm::Constant *Init = CGVT.CreateVTableInitializer(
         RD, VTLayout.vtable_component_begin(),
         VTLayout.getNumVTableComponents(), VTLayout.vtable_thunk_begin(),
@@ -1580,7 +1572,7 @@ void MicrosoftCXXABI::emitVTableDefinitions(CodeGenVTables &CGVT,
 
     VTable->setInitializer(Init);
 
-    emitVTableBitSetEntries(Info, RD, VTable);
+    emitVTableTypeMetadata(Info, RD, VTable);
   }
 }
 
@@ -1642,7 +1634,7 @@ llvm::GlobalVariable *MicrosoftCXXABI::getAddrOfVTable(const CXXRecordDecl *RD,
 
   if (DeferredVFTables.insert(RD).second) {
     // We haven't processed this record type before.
-    // Queue up this v-table for possible deferred emission.
+    // Queue up this vtable for possible deferred emission.
     CGM.addDeferredVTable(RD);
 
 #ifndef NDEBUG
@@ -1671,7 +1663,16 @@ llvm::GlobalVariable *MicrosoftCXXABI::getAddrOfVTable(const CXXRecordDecl *RD,
   SmallString<256> VFTableName;
   mangleVFTableName(getMangleContext(), RD, VFPtr, VFTableName);
 
-  llvm::GlobalValue::LinkageTypes VFTableLinkage = CGM.getVTableLinkage(RD);
+  // Classes marked __declspec(dllimport) need vftables generated on the
+  // import-side in order to support features like constexpr.  No other
+  // translation unit relies on the emission of the local vftable, translation
+  // units are expected to generate them as needed.
+  //
+  // Because of this unique behavior, we maintain this logic here instead of
+  // getVTableLinkage.
+  llvm::GlobalValue::LinkageTypes VFTableLinkage =
+      RD->hasAttr<DLLImportAttr>() ? llvm::GlobalValue::LinkOnceODRLinkage
+                                   : CGM.getVTableLinkage(RD);
   bool VFTableComesFromAnotherTU =
       llvm::GlobalValue::isAvailableExternallyLinkage(VFTableLinkage) ||
       llvm::GlobalValue::isExternalLinkage(VFTableLinkage);
@@ -1705,7 +1706,7 @@ llvm::GlobalVariable *MicrosoftCXXABI::getAddrOfVTable(const CXXRecordDecl *RD,
   VTable = new llvm::GlobalVariable(CGM.getModule(), VTableType,
                                     /*isConstant=*/true, VTableLinkage,
                                     /*Initializer=*/nullptr, VTableName);
-  VTable->setUnnamedAddr(true);
+  VTable->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
 
   llvm::Comdat *C = nullptr;
   if (!VFTableComesFromAnotherTU &&
@@ -1733,7 +1734,7 @@ llvm::GlobalVariable *MicrosoftCXXABI::getAddrOfVTable(const CXXRecordDecl *RD,
                                         /*AddressSpace=*/0, VFTableLinkage,
                                         VFTableName.str(), VTableGEP,
                                         &CGM.getModule());
-    VFTable->setUnnamedAddr(true);
+    VFTable->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
   } else {
     // We don't need a GlobalAlias to be a symbol for the VTable if we won't
     // be referencing any RTTI data.
@@ -1744,9 +1745,7 @@ llvm::GlobalVariable *MicrosoftCXXABI::getAddrOfVTable(const CXXRecordDecl *RD,
   if (C)
     VTable->setComdat(C);
 
-  if (RD->hasAttr<DLLImportAttr>())
-    VFTable->setDLLStorageClass(llvm::GlobalValue::DLLImportStorageClass);
-  else if (RD->hasAttr<DLLExportAttr>())
+  if (RD->hasAttr<DLLExportAttr>())
     VFTable->setDLLStorageClass(llvm::GlobalValue::DLLExportStorageClass);
 
   VFTablesMap[ID] = VFTable;
@@ -1813,13 +1812,20 @@ llvm::Value *MicrosoftCXXABI::getVirtualFunctionPointer(CodeGenFunction &CGF,
 
   MicrosoftVTableContext::MethodVFTableLocation ML =
       CGM.getMicrosoftVTableContext().getMethodVFTableLocation(GD);
-  if (CGF.SanOpts.has(SanitizerKind::CFIVCall))
-    CGF.EmitVTablePtrCheck(getClassAtVTableLocation(getContext(), GD, ML),
-                           VTable, CodeGenFunction::CFITCK_VCall, Loc);
 
-  llvm::Value *VFuncPtr =
-      Builder.CreateConstInBoundsGEP1_64(VTable, ML.Index, "vfn");
-  return Builder.CreateAlignedLoad(VFuncPtr, CGF.getPointerAlign());
+  if (CGF.ShouldEmitVTableTypeCheckedLoad(MethodDecl->getParent())) {
+    return CGF.EmitVTableTypeCheckedLoad(
+        getClassAtVTableLocation(getContext(), GD, ML), VTable,
+        ML.Index * CGM.getContext().getTargetInfo().getPointerWidth(0) / 8);
+  } else {
+    if (CGM.getCodeGenOpts().PrepareForLTO)
+      CGF.EmitTypeMetadataCodeForVCall(
+          getClassAtVTableLocation(getContext(), GD, ML), VTable, Loc);
+
+    llvm::Value *VFuncPtr =
+        Builder.CreateConstInBoundsGEP1_64(VTable, ML.Index, "vfn");
+    return Builder.CreateAlignedLoad(VFuncPtr, CGF.getPointerAlign());
+  }
 }
 
 llvm::Value *MicrosoftCXXABI::EmitVirtualDestructorCall(
@@ -1843,10 +1849,9 @@ llvm::Value *MicrosoftCXXABI::EmitVirtualDestructorCall(
       DtorType == Dtor_Deleting);
 
   This = adjustThisArgumentForVirtualFunctionCall(CGF, GD, This, true);
-  RValue RV = CGF.EmitCXXStructorCall(Dtor, Callee, ReturnValueSlot(),
-                                      This.getPointer(),
-                                      ImplicitParam, Context.IntTy, CE,
-                                      StructorType::Deleting);
+  RValue RV =
+      CGF.EmitCXXDestructorCall(Dtor, Callee, This.getPointer(), ImplicitParam,
+                                Context.IntTy, CE, StructorType::Deleting);
   return RV.getScalarVal();
 }
 
@@ -1916,7 +1921,7 @@ llvm::Function *MicrosoftCXXABI::EmitVirtualMemPtrThunk(
   ThunkFn->addFnAttr("thunk");
 
   // These thunks can be compared, so they are not unnamed.
-  ThunkFn->setUnnamedAddr(false);
+  ThunkFn->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::None);
 
   // Start codegen.
   CodeGenFunction CGF(CGM);
@@ -1973,7 +1978,7 @@ MicrosoftCXXABI::getAddrOfVBTable(const VPtrInfo &VBT, const CXXRecordDecl *RD,
          "vbtable with this name already exists: mangling bug?");
   llvm::GlobalVariable *GV =
       CGM.CreateOrReplaceCXXRuntimeVariable(Name, VBTableType, Linkage);
-  GV->setUnnamedAddr(true);
+  GV->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
 
   if (RD->hasAttr<DLLImportAttr>())
     GV->setDLLStorageClass(llvm::GlobalValue::DLLImportStorageClass);
@@ -2030,6 +2035,9 @@ void MicrosoftCXXABI::emitVBTableDefinition(const VPtrInfo &VBT,
     llvm::ArrayType::get(CGM.IntTy, Offsets.size());
   llvm::Constant *Init = llvm::ConstantArray::get(VBTableType, Offsets);
   GV->setInitializer(Init);
+
+  if (RD->hasAttr<DLLImportAttr>())
+    GV->setLinkage(llvm::GlobalVariable::AvailableExternallyLinkage);
 }
 
 llvm::Value *MicrosoftCXXABI::performThisAdjustment(CodeGenFunction &CGF,
@@ -2302,7 +2310,7 @@ struct ResetGuardBit final : EHScopeStack::Cleanup {
     CGBuilderTy &Builder = CGF.Builder;
     llvm::LoadInst *LI = Builder.CreateLoad(Guard);
     llvm::ConstantInt *Mask =
-        llvm::ConstantInt::get(CGF.IntTy, ~(1U << GuardNum));
+        llvm::ConstantInt::get(CGF.IntTy, ~(1ULL << GuardNum));
     Builder.CreateStore(Builder.CreateAnd(LI, Mask), Guard);
   }
 };
@@ -2415,7 +2423,7 @@ void MicrosoftCXXABI::EmitGuardedInit(CodeGenFunction &CGF, const VarDecl &D,
     // }
 
     // Test our bit from the guard variable.
-    llvm::ConstantInt *Bit = llvm::ConstantInt::get(GuardTy, 1U << GuardNum);
+    llvm::ConstantInt *Bit = llvm::ConstantInt::get(GuardTy, 1ULL << GuardNum);
     llvm::LoadInst *LI = Builder.CreateLoad(GuardAddr);
     llvm::Value *IsInitialized =
         Builder.CreateICmpNE(Builder.CreateAnd(LI, Bit), Zero);
@@ -3631,7 +3639,8 @@ MSRTTIBuilder::getCompleteObjectLocator(const VPtrInfo *Info) {
 }
 
 static QualType decomposeTypeForEH(ASTContext &Context, QualType T,
-                                   bool &IsConst, bool &IsVolatile) {
+                                   bool &IsConst, bool &IsVolatile,
+                                   bool &IsUnaligned) {
   T = Context.getExceptionObjectType(T);
 
   // C++14 [except.handle]p3:
@@ -3641,10 +3650,12 @@ static QualType decomposeTypeForEH(ASTContext &Context, QualType T,
   //         - a qualification conversion
   IsConst = false;
   IsVolatile = false;
+  IsUnaligned = false;
   QualType PointeeType = T->getPointeeType();
   if (!PointeeType.isNull()) {
     IsConst = PointeeType.isConstQualified();
     IsVolatile = PointeeType.isVolatileQualified();
+    IsUnaligned = PointeeType.getQualifiers().hasUnaligned();
   }
 
   // Member pointer types like "const int A::*" are represented by having RTTI
@@ -3667,8 +3678,9 @@ MicrosoftCXXABI::getAddrOfCXXCatchHandlerType(QualType Type,
   // TypeDescriptors for exceptions never have qualified pointer types,
   // qualifiers are stored seperately in order to support qualification
   // conversions.
-  bool IsConst, IsVolatile;
-  Type = decomposeTypeForEH(getContext(), Type, IsConst, IsVolatile);
+  bool IsConst, IsVolatile, IsUnaligned;
+  Type =
+      decomposeTypeForEH(getContext(), Type, IsConst, IsVolatile, IsUnaligned);
 
   bool IsReference = CatchHandlerType->isReferenceType();
 
@@ -3677,6 +3689,8 @@ MicrosoftCXXABI::getAddrOfCXXCatchHandlerType(QualType Type,
     Flags |= 1;
   if (IsVolatile)
     Flags |= 2;
+  if (IsUnaligned)
+    Flags |= 4;
   if (IsReference)
     Flags |= 8;
 
@@ -3961,7 +3975,7 @@ llvm::Constant *MicrosoftCXXABI::getCatchableType(QualType T,
   auto *GV = new llvm::GlobalVariable(
       CGM.getModule(), CTType, /*Constant=*/true, getLinkageForRTTI(T),
       llvm::ConstantStruct::get(CTType, Fields), MangledName);
-  GV->setUnnamedAddr(true);
+  GV->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
   GV->setSection(".xdata");
   if (GV->isWeakForLinker())
     GV->setComdat(CGM.getModule().getOrInsertComdat(GV->getName()));
@@ -4079,7 +4093,7 @@ llvm::GlobalVariable *MicrosoftCXXABI::getCatchableTypeArray(QualType T) {
   CTA = new llvm::GlobalVariable(
       CGM.getModule(), CTAType, /*Constant=*/true, getLinkageForRTTI(T),
       llvm::ConstantStruct::get(CTAType, Fields), MangledName);
-  CTA->setUnnamedAddr(true);
+  CTA->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
   CTA->setSection(".xdata");
   if (CTA->isWeakForLinker())
     CTA->setComdat(CGM.getModule().getOrInsertComdat(CTA->getName()));
@@ -4087,8 +4101,8 @@ llvm::GlobalVariable *MicrosoftCXXABI::getCatchableTypeArray(QualType T) {
 }
 
 llvm::GlobalVariable *MicrosoftCXXABI::getThrowInfo(QualType T) {
-  bool IsConst, IsVolatile;
-  T = decomposeTypeForEH(getContext(), T, IsConst, IsVolatile);
+  bool IsConst, IsVolatile, IsUnaligned;
+  T = decomposeTypeForEH(getContext(), T, IsConst, IsVolatile, IsUnaligned);
 
   // The CatchableTypeArray enumerates the various (CV-unqualified) types that
   // the exception object may be caught as.
@@ -4104,8 +4118,8 @@ llvm::GlobalVariable *MicrosoftCXXABI::getThrowInfo(QualType T) {
   SmallString<256> MangledName;
   {
     llvm::raw_svector_ostream Out(MangledName);
-    getMangleContext().mangleCXXThrowInfo(T, IsConst, IsVolatile, NumEntries,
-                                          Out);
+    getMangleContext().mangleCXXThrowInfo(T, IsConst, IsVolatile, IsUnaligned,
+                                          NumEntries, Out);
   }
 
   // Reuse a previously generated ThrowInfo if we have generated an appropriate
@@ -4121,6 +4135,8 @@ llvm::GlobalVariable *MicrosoftCXXABI::getThrowInfo(QualType T) {
     Flags |= 1;
   if (IsVolatile)
     Flags |= 2;
+  if (IsUnaligned)
+    Flags |= 4;
 
   // The cleanup-function (a destructor) must be called when the exception
   // object's lifetime ends.
@@ -4146,7 +4162,7 @@ llvm::GlobalVariable *MicrosoftCXXABI::getThrowInfo(QualType T) {
   auto *GV = new llvm::GlobalVariable(
       CGM.getModule(), TIType, /*Constant=*/true, getLinkageForRTTI(T),
       llvm::ConstantStruct::get(TIType, Fields), StringRef(MangledName));
-  GV->setUnnamedAddr(true);
+  GV->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
   GV->setSection(".xdata");
   if (GV->isWeakForLinker())
     GV->setComdat(CGM.getModule().getOrInsertComdat(GV->getName()));
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/ModuleBuilder.cpp b/contrib/llvm/tools/clang/lib/CodeGen/ModuleBuilder.cpp
index 0be5c5592b22..952d1627fa84 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/ModuleBuilder.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/ModuleBuilder.cpp
@@ -25,7 +25,9 @@
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Module.h"
 #include <memory>
+
 using namespace clang;
+using namespace CodeGen;
 
 namespace {
   class CodeGeneratorImpl : public CodeGenerator {
@@ -36,13 +38,21 @@ namespace {
     const CodeGenOptions CodeGenOpts;  // Intentionally copied in.
 
     unsigned HandlingTopLevelDecls;
+
+    /// Use this when emitting decls to block re-entrant decl emission. It will
+    /// emit all deferred decls on scope exit. Set EmitDeferred to false if decl
+    /// emission must be deferred longer, like at the end of a tag definition.
     struct HandlingTopLevelDeclRAII {
       CodeGeneratorImpl &Self;
-      HandlingTopLevelDeclRAII(CodeGeneratorImpl &Self) : Self(Self) {
+      bool EmitDeferred;
+      HandlingTopLevelDeclRAII(CodeGeneratorImpl &Self,
+                               bool EmitDeferred = true)
+          : Self(Self), EmitDeferred(EmitDeferred) {
         ++Self.HandlingTopLevelDecls;
       }
       ~HandlingTopLevelDeclRAII() {
-        if (--Self.HandlingTopLevelDecls == 0)
+        unsigned Level = --Self.HandlingTopLevelDecls;
+        if (Level == 0 && EmitDeferred)
           Self.EmitDeferredDecls();
       }
     };
@@ -57,15 +67,16 @@ namespace {
     SmallVector<CXXMethodDecl *, 8> DeferredInlineMethodDefinitions;
 
   public:
-    CodeGeneratorImpl(DiagnosticsEngine &diags, const std::string &ModuleName,
+    CodeGeneratorImpl(DiagnosticsEngine &diags, llvm::StringRef ModuleName,
                       const HeaderSearchOptions &HSO,
                       const PreprocessorOptions &PPO, const CodeGenOptions &CGO,
                       llvm::LLVMContext &C,
                       CoverageSourceInfo *CoverageInfo = nullptr)
         : Diags(diags), Ctx(nullptr), HeaderSearchOpts(HSO),
           PreprocessorOpts(PPO), CodeGenOpts(CGO), HandlingTopLevelDecls(0),
-          CoverageInfo(CoverageInfo),
-          M(new llvm::Module(ModuleName, C)) {}
+          CoverageInfo(CoverageInfo), M(new llvm::Module(ModuleName, C)) {
+      C.setDiscardValueNames(CGO.DiscardValueNames);
+    }
 
     ~CodeGeneratorImpl() override {
       // There should normally not be any leftover inline method definitions.
@@ -73,11 +84,19 @@ namespace {
              Diags.hasErrorOccurred());
     }
 
-    llvm::Module* GetModule() override {
+    CodeGenModule &CGM() {
+      return *Builder;
+    }
+
+    llvm::Module *GetModule() {
       return M.get();
     }
 
-    const Decl *GetDeclForMangledName(StringRef MangledName) override {
+    llvm::Module *ReleaseModule() {
+      return M.release();
+    }
+
+    const Decl *GetDeclForMangledName(StringRef MangledName) {
       GlobalDecl Result;
       if (!Builder->lookupRepresentativeDecl(MangledName, Result))
         return nullptr;
@@ -92,19 +111,23 @@ namespace {
       return D;
     }
 
-    llvm::Module *ReleaseModule() override { return M.release(); }
+    llvm::Constant *GetAddrOfGlobal(GlobalDecl global, bool isForDefinition) {
+      return Builder->GetAddrOfGlobal(global, isForDefinition);
+    }
 
     void Initialize(ASTContext &Context) override {
       Ctx = &Context;
 
       M->setTargetTriple(Ctx->getTargetInfo().getTriple().getTriple());
-      M->setDataLayout(Ctx->getTargetInfo().getDataLayoutString());
+      M->setDataLayout(Ctx->getTargetInfo().getDataLayout());
       Builder.reset(new CodeGen::CodeGenModule(Context, HeaderSearchOpts,
                                                PreprocessorOpts, CodeGenOpts,
                                                *M, Diags, CoverageInfo));
 
-      for (size_t i = 0, e = CodeGenOpts.DependentLibraries.size(); i < e; ++i)
-        HandleDependentLibrary(CodeGenOpts.DependentLibraries[i]);
+      for (auto &&Lib : CodeGenOpts.DependentLibraries)
+        Builder->AddDependentLib(Lib);
+      for (auto &&Opt : CodeGenOpts.LinkerOptions)
+        Builder->AppendLinkerOptions(Opt);
     }
 
     void HandleCXXStaticMemberVarInstantiation(VarDecl *VD) override {
@@ -140,12 +163,23 @@ namespace {
       DeferredInlineMethodDefinitions.clear();
     }
 
-    void HandleInlineMethodDefinition(CXXMethodDecl *D) override {
+    void HandleInlineFunctionDefinition(FunctionDecl *D) override {
       if (Diags.hasErrorOccurred())
         return;
 
       assert(D->doesThisDeclarationHaveABody());
 
+      // Handle friend functions.
+      if (D->isInIdentifierNamespace(Decl::IDNS_OrdinaryFriend)) {
+        if (Ctx->getTargetInfo().getCXXABI().isMicrosoft()
+            && !D->getLexicalDeclContext()->isDependentContext())
+          Builder->EmitTopLevelDecl(D);
+        return;
+      }
+
+      // Otherwise, must be a method.
+      auto MD = cast<CXXMethodDecl>(D);
+
       // We may want to emit this definition. However, that decision might be
       // based on computing the linkage, and we have to defer that in case we
       // are inside of something that will change the method's final linkage,
@@ -154,13 +188,13 @@ namespace {
       //     void bar();
       //     void foo() { bar(); }
       //   } A;
-      DeferredInlineMethodDefinitions.push_back(D);
+      DeferredInlineMethodDefinitions.push_back(MD);
 
       // Provide some coverage mapping even for methods that aren't emitted.
       // Don't do this for templated classes though, as they may not be
       // instantiable.
-      if (!D->getParent()->getDescribedClassTemplate())
-        Builder->AddDeferredUnusedCoverageMapping(D);
+      if (!MD->getParent()->getDescribedClassTemplate())
+        Builder->AddDeferredUnusedCoverageMapping(MD);
     }
 
     /// HandleTagDeclDefinition - This callback is invoked each time a TagDecl
@@ -171,6 +205,10 @@ namespace {
       if (Diags.hasErrorOccurred())
         return;
 
+      // Don't allow re-entrant calls to CodeGen triggered by PCH
+      // deserialization to emit deferred decls.
+      HandlingTopLevelDeclRAII HandlingDecl(*this, /*EmitDeferred=*/false);
+
       Builder->UpdateCompletedType(D);
 
       // For MSVC compatibility, treat declarations of static data members with
@@ -185,27 +223,50 @@ namespace {
           }
         }
       }
+      // For OpenMP emit declare reduction functions, if required.
+      if (Ctx->getLangOpts().OpenMP) {
+        for (Decl *Member : D->decls()) {
+          if (auto *DRD = dyn_cast<OMPDeclareReductionDecl>(Member)) {
+            if (Ctx->DeclMustBeEmitted(DRD))
+              Builder->EmitGlobal(DRD);
+          }
+        }
+      }
     }
 
     void HandleTagDeclRequiredDefinition(const TagDecl *D) override {
       if (Diags.hasErrorOccurred())
         return;
 
+      // Don't allow re-entrant calls to CodeGen triggered by PCH
+      // deserialization to emit deferred decls.
+      HandlingTopLevelDeclRAII HandlingDecl(*this, /*EmitDeferred=*/false);
+
       if (CodeGen::CGDebugInfo *DI = Builder->getModuleDebugInfo())
         if (const RecordDecl *RD = dyn_cast<RecordDecl>(D))
           DI->completeRequiredType(RD);
     }
 
     void HandleTranslationUnit(ASTContext &Ctx) override {
+      // Release the Builder when there is no error.
+      if (!Diags.hasErrorOccurred() && Builder)
+        Builder->Release();
+
+      // If there are errors before or when releasing the Builder, reset
+      // the module to stop here before invoking the backend.
       if (Diags.hasErrorOccurred()) {
         if (Builder)
           Builder->clear();
         M.reset();
         return;
       }
+    }
 
-      if (Builder)
-        Builder->Release();
+    void AssignInheritanceModel(CXXRecordDecl *RD) override {
+      if (Diags.hasErrorOccurred())
+        return;
+
+      Builder->RefreshTypeCacheForClass(RD);
     }
 
     void CompleteTentativeDefinition(VarDecl *D) override {
@@ -221,26 +282,35 @@ namespace {
 
       Builder->EmitVTable(RD);
     }
+  };
+}
 
-    void HandleLinkerOptionPragma(llvm::StringRef Opts) override {
-      Builder->AppendLinkerOptions(Opts);
-    }
+void CodeGenerator::anchor() { }
 
-    void HandleDetectMismatch(llvm::StringRef Name,
-                              llvm::StringRef Value) override {
-      Builder->AddDetectMismatch(Name, Value);
-    }
+CodeGenModule &CodeGenerator::CGM() {
+  return static_cast<CodeGeneratorImpl*>(this)->CGM();
+}
 
-    void HandleDependentLibrary(llvm::StringRef Lib) override {
-      Builder->AddDependentLib(Lib);
-    }
-  };
+llvm::Module *CodeGenerator::GetModule() {
+  return static_cast<CodeGeneratorImpl*>(this)->GetModule();
 }
 
-void CodeGenerator::anchor() { }
+llvm::Module *CodeGenerator::ReleaseModule() {
+  return static_cast<CodeGeneratorImpl*>(this)->ReleaseModule();
+}
+
+const Decl *CodeGenerator::GetDeclForMangledName(llvm::StringRef name) {
+  return static_cast<CodeGeneratorImpl*>(this)->GetDeclForMangledName(name);
+}
+
+llvm::Constant *CodeGenerator::GetAddrOfGlobal(GlobalDecl global,
+                                               bool isForDefinition) {
+  return static_cast<CodeGeneratorImpl*>(this)
+           ->GetAddrOfGlobal(global, isForDefinition);
+}
 
 CodeGenerator *clang::CreateLLVMCodeGen(
-    DiagnosticsEngine &Diags, const std::string &ModuleName,
+    DiagnosticsEngine &Diags, llvm::StringRef ModuleName,
     const HeaderSearchOptions &HeaderSearchOpts,
     const PreprocessorOptions &PreprocessorOpts, const CodeGenOptions &CGO,
     llvm::LLVMContext &C, CoverageSourceInfo *CoverageInfo) {
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/ObjectFilePCHContainerOperations.cpp b/contrib/llvm/tools/clang/lib/CodeGen/ObjectFilePCHContainerOperations.cpp
index f385e53fa01f..de40e4121124 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/ObjectFilePCHContainerOperations.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/ObjectFilePCHContainerOperations.cpp
@@ -19,8 +19,8 @@
 #include "clang/CodeGen/BackendUtil.h"
 #include "clang/Frontend/CodeGenOptions.h"
 #include "clang/Frontend/CompilerInstance.h"
-#include "clang/Lex/Preprocessor.h"
 #include "clang/Lex/HeaderSearch.h"
+#include "clang/Lex/Preprocessor.h"
 #include "clang/Serialization/ASTWriter.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Bitcode/BitstreamReader.h"
@@ -31,8 +31,10 @@
 #include "llvm/IR/Module.h"
 #include "llvm/Object/COFF.h"
 #include "llvm/Object/ObjectFile.h"
+#include "llvm/Support/Path.h"
 #include "llvm/Support/TargetRegistry.h"
 #include <memory>
+#include <utility>
 
 using namespace clang;
 
@@ -42,6 +44,7 @@ namespace {
 class PCHContainerGenerator : public ASTConsumer {
   DiagnosticsEngine &Diags;
   const std::string MainFileName;
+  const std::string OutputFileName;
   ASTContext *Ctx;
   ModuleMap &MMap;
   const HeaderSearchOptions &HeaderSearchOpts;
@@ -52,17 +55,15 @@ class PCHContainerGenerator : public ASTConsumer {
   std::unique_ptr<llvm::LLVMContext> VMContext;
   std::unique_ptr<llvm::Module> M;
   std::unique_ptr<CodeGen::CodeGenModule> Builder;
-  raw_pwrite_stream *OS;
+  std::unique_ptr<raw_pwrite_stream> OS;
   std::shared_ptr<PCHBuffer> Buffer;
 
   /// Visit every type and emit debug info for it.
   struct DebugTypeVisitor : public RecursiveASTVisitor<DebugTypeVisitor> {
     clang::CodeGen::CGDebugInfo &DI;
     ASTContext &Ctx;
-    bool SkipTagDecls;
-    DebugTypeVisitor(clang::CodeGen::CGDebugInfo &DI, ASTContext &Ctx,
-                     bool SkipTagDecls)
-        : DI(DI), Ctx(Ctx), SkipTagDecls(SkipTagDecls) {}
+    DebugTypeVisitor(clang::CodeGen::CGDebugInfo &DI, ASTContext &Ctx)
+        : DI(DI), Ctx(Ctx) {}
 
     /// Determine whether this type can be represented in DWARF.
     static bool CanRepresent(const Type *Ty) {
@@ -80,7 +81,8 @@ class PCHContainerGenerator : public ASTConsumer {
       // TagDecls may be deferred until after all decls have been merged and we
       // know the complete type. Pure forward declarations will be skipped, but
       // they don't need to be emitted into the module anyway.
-      if (SkipTagDecls && isa<TagDecl>(D))
+      if (auto *TD = dyn_cast<TagDecl>(D))
+        if (!TD->isCompleteDefinition())
           return true;
 
       QualType QualTy = Ctx.getTypeDeclType(D);
@@ -103,7 +105,7 @@ class PCHContainerGenerator : public ASTConsumer {
         return true;
 
       SmallVector<QualType, 16> ArgTypes;
-      for (auto i : D->params())
+      for (auto i : D->parameters())
         ArgTypes.push_back(i->getType());
       QualType RetTy = D->getReturnType();
       QualType FnTy = Ctx.getFunctionType(RetTy, ArgTypes,
@@ -122,7 +124,7 @@ class PCHContainerGenerator : public ASTConsumer {
       ArgTypes.push_back(D->getSelfType(Ctx, D->getClassInterface(),
                                         selfIsPseudoStrong, selfIsConsumed));
       ArgTypes.push_back(Ctx.getObjCSelType());
-      for (auto i : D->params())
+      for (auto i : D->parameters())
         ArgTypes.push_back(i->getType());
       QualType RetTy = D->getReturnType();
       QualType FnTy = Ctx.getFunctionType(RetTy, ArgTypes,
@@ -136,20 +138,22 @@ class PCHContainerGenerator : public ASTConsumer {
 public:
   PCHContainerGenerator(CompilerInstance &CI, const std::string &MainFileName,
                         const std::string &OutputFileName,
-                        raw_pwrite_stream *OS,
+                        std::unique_ptr<raw_pwrite_stream> OS,
                         std::shared_ptr<PCHBuffer> Buffer)
-      : Diags(CI.getDiagnostics()), Ctx(nullptr),
+      : Diags(CI.getDiagnostics()), MainFileName(MainFileName),
+        OutputFileName(OutputFileName), Ctx(nullptr),
         MMap(CI.getPreprocessor().getHeaderSearchInfo().getModuleMap()),
         HeaderSearchOpts(CI.getHeaderSearchOpts()),
         PreprocessorOpts(CI.getPreprocessorOpts()),
-        TargetOpts(CI.getTargetOpts()), LangOpts(CI.getLangOpts()), OS(OS),
-        Buffer(Buffer) {
+        TargetOpts(CI.getTargetOpts()), LangOpts(CI.getLangOpts()),
+        OS(std::move(OS)), Buffer(std::move(Buffer)) {
     // The debug info output isn't affected by CodeModel and
     // ThreadModel, but the backend expects them to be nonempty.
     CodeGenOpts.CodeModel = "default";
     CodeGenOpts.ThreadModel = "single";
     CodeGenOpts.DebugTypeExtRefs = true;
-    CodeGenOpts.setDebugInfo(CodeGenOptions::FullDebugInfo);
+    CodeGenOpts.setDebugInfo(codegenoptions::FullDebugInfo);
+    CodeGenOpts.setDebuggerTuning(CI.getCodeGenOpts().getDebuggerTuning());
   }
 
   ~PCHContainerGenerator() override = default;
@@ -160,10 +164,15 @@ public:
     Ctx = &Context;
     VMContext.reset(new llvm::LLVMContext());
     M.reset(new llvm::Module(MainFileName, *VMContext));
-    M->setDataLayout(Ctx->getTargetInfo().getDataLayoutString());
+    M->setDataLayout(Ctx->getTargetInfo().getDataLayout());
     Builder.reset(new CodeGen::CodeGenModule(
         *Ctx, HeaderSearchOpts, PreprocessorOpts, CodeGenOpts, *M, Diags));
-    Builder->getModuleDebugInfo()->setModuleMap(MMap);
+
+    // Prepare CGDebugInfo to emit debug info for a clang module.
+    auto *DI = Builder->getModuleDebugInfo();
+    StringRef ModuleName = llvm::sys::path::filename(MainFileName);
+    DI->setPCHDescriptor({ModuleName, "", OutputFileName, ~1ULL});
+    DI->setModuleMap(MMap);
   }
 
   bool HandleTopLevelDecl(DeclGroupRef D) override {
@@ -173,7 +182,7 @@ public:
     // Collect debug info for all decls in this group.
     for (auto *I : D)
       if (!I->isFromASTFile()) {
-        DebugTypeVisitor DTV(*Builder->getModuleDebugInfo(), *Ctx, true);
+        DebugTypeVisitor DTV(*Builder->getModuleDebugInfo(), *Ctx);
         DTV.TraverseDecl(I);
       }
     return true;
@@ -190,7 +199,20 @@ public:
     if (D->isFromASTFile())
       return;
 
-    DebugTypeVisitor DTV(*Builder->getModuleDebugInfo(), *Ctx, false);
+    // Anonymous tag decls are deferred until we are building their declcontext.
+    if (D->getName().empty())
+      return;
+
+    // Defer tag decls until their declcontext is complete.
+    auto *DeclCtx = D->getDeclContext();
+    while (DeclCtx) {
+      if (auto *D = dyn_cast<TagDecl>(DeclCtx))
+        if (!D->isCompleteDefinition())
+          return;
+      DeclCtx = DeclCtx->getParent();
+    }
+
+    DebugTypeVisitor DTV(*Builder->getModuleDebugInfo(), *Ctx);
     DTV.TraverseDecl(D);
     Builder->UpdateCompletedType(D);
   }
@@ -215,8 +237,12 @@ public:
       return;
 
     M->setTargetTriple(Ctx.getTargetInfo().getTriple().getTriple());
-    M->setDataLayout(Ctx.getTargetInfo().getDataLayoutString());
-    Builder->getModuleDebugInfo()->setDwoId(Buffer->Signature);
+    M->setDataLayout(Ctx.getTargetInfo().getDataLayout());
+
+    // PCH files don't have a signature field in the control block,
+    // but LLVM detects DWO CUs by looking for a non-zero DWO id.
+    uint64_t Signature = Buffer->Signature ? Buffer->Signature : ~1ULL;
+    Builder->getModuleDebugInfo()->setDwoId(Signature);
 
     // Finalize the Builder.
     if (Builder)
@@ -255,20 +281,18 @@ public:
     DEBUG({
       // Print the IR for the PCH container to the debug output.
       llvm::SmallString<0> Buffer;
-      llvm::raw_svector_ostream OS(Buffer);
-      clang::EmitBackendOutput(Diags, CodeGenOpts, TargetOpts, LangOpts,
-                               Ctx.getTargetInfo().getDataLayoutString(),
-                               M.get(), BackendAction::Backend_EmitLL, &OS);
+      clang::EmitBackendOutput(
+          Diags, CodeGenOpts, TargetOpts, LangOpts,
+          Ctx.getTargetInfo().getDataLayout(), M.get(),
+          BackendAction::Backend_EmitLL,
+          llvm::make_unique<llvm::raw_svector_ostream>(Buffer));
       llvm::dbgs() << Buffer;
     });
 
     // Use the LLVM backend to emit the pch container.
     clang::EmitBackendOutput(Diags, CodeGenOpts, TargetOpts, LangOpts,
-                             Ctx.getTargetInfo().getDataLayoutString(),
-                             M.get(), BackendAction::Backend_EmitObj, OS);
-
-    // Make sure the pch container hits disk.
-    OS->flush();
+                             Ctx.getTargetInfo().getDataLayout(), M.get(),
+                             BackendAction::Backend_EmitObj, std::move(OS));
 
     // Free the memory for the temporary buffer.
     llvm::SmallVector<char, 0> Empty;
@@ -281,10 +305,11 @@ public:
 std::unique_ptr<ASTConsumer>
 ObjectFilePCHContainerWriter::CreatePCHContainerGenerator(
     CompilerInstance &CI, const std::string &MainFileName,
-    const std::string &OutputFileName, llvm::raw_pwrite_stream *OS,
+    const std::string &OutputFileName,
+    std::unique_ptr<llvm::raw_pwrite_stream> OS,
     std::shared_ptr<PCHBuffer> Buffer) const {
-  return llvm::make_unique<PCHContainerGenerator>(CI, MainFileName,
-                                                  OutputFileName, OS, Buffer);
+  return llvm::make_unique<PCHContainerGenerator>(
+      CI, MainFileName, OutputFileName, std::move(OS), Buffer);
 }
 
 void ObjectFilePCHContainerReader::ExtractPCH(
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/SwiftCallingConv.cpp b/contrib/llvm/tools/clang/lib/CodeGen/SwiftCallingConv.cpp
new file mode 100644
index 000000000000..6c20f8c9d3e9
--- /dev/null
+++ b/contrib/llvm/tools/clang/lib/CodeGen/SwiftCallingConv.cpp
@@ -0,0 +1,830 @@
+//===--- SwiftCallingConv.cpp - Lowering for the Swift calling convention -===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Implementation of the abstract lowering for the Swift calling convention.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/CodeGen/SwiftCallingConv.h"
+#include "clang/Basic/TargetInfo.h"
+#include "CodeGenModule.h"
+#include "TargetInfo.h"
+
+using namespace clang;
+using namespace CodeGen;
+using namespace swiftcall;
+
+static const SwiftABIInfo &getSwiftABIInfo(CodeGenModule &CGM) {
+  return cast<SwiftABIInfo>(CGM.getTargetCodeGenInfo().getABIInfo());
+}
+
+static bool isPowerOf2(unsigned n) {
+  return n == (n & -n);
+}
+
+/// Given two types with the same size, try to find a common type.
+static llvm::Type *getCommonType(llvm::Type *first, llvm::Type *second) {
+  assert(first != second);
+
+  // Allow pointers to merge with integers, but prefer the integer type.
+  if (first->isIntegerTy()) {
+    if (second->isPointerTy()) return first;
+  } else if (first->isPointerTy()) {
+    if (second->isIntegerTy()) return second;
+    if (second->isPointerTy()) return first;
+
+  // Allow two vectors to be merged (given that they have the same size).
+  // This assumes that we never have two different vector register sets.
+  } else if (auto firstVecTy = dyn_cast<llvm::VectorType>(first)) {
+    if (auto secondVecTy = dyn_cast<llvm::VectorType>(second)) {
+      if (auto commonTy = getCommonType(firstVecTy->getElementType(),
+                                        secondVecTy->getElementType())) {
+        return (commonTy == firstVecTy->getElementType() ? first : second);
+      }
+    }
+  }
+
+  return nullptr;
+}
+
+static CharUnits getTypeStoreSize(CodeGenModule &CGM, llvm::Type *type) {
+  return CharUnits::fromQuantity(CGM.getDataLayout().getTypeStoreSize(type));
+}
+
+void SwiftAggLowering::addTypedData(QualType type, CharUnits begin) {
+  // Deal with various aggregate types as special cases:
+
+  // Record types.
+  if (auto recType = type->getAs<RecordType>()) {
+    addTypedData(recType->getDecl(), begin);
+
+  // Array types.
+  } else if (type->isArrayType()) {
+    // Incomplete array types (flexible array members?) don't provide
+    // data to lay out, and the other cases shouldn't be possible.
+    auto arrayType = CGM.getContext().getAsConstantArrayType(type);
+    if (!arrayType) return;
+
+    QualType eltType = arrayType->getElementType();
+    auto eltSize = CGM.getContext().getTypeSizeInChars(eltType);
+    for (uint64_t i = 0, e = arrayType->getSize().getZExtValue(); i != e; ++i) {
+      addTypedData(eltType, begin + i * eltSize);
+    }
+
+  // Complex types.
+  } else if (auto complexType = type->getAs<ComplexType>()) {
+    auto eltType = complexType->getElementType();
+    auto eltSize = CGM.getContext().getTypeSizeInChars(eltType);
+    auto eltLLVMType = CGM.getTypes().ConvertType(eltType);
+    addTypedData(eltLLVMType, begin, begin + eltSize);
+    addTypedData(eltLLVMType, begin + eltSize, begin + 2 * eltSize);
+
+  // Member pointer types.
+  } else if (type->getAs<MemberPointerType>()) {
+    // Just add it all as opaque.
+    addOpaqueData(begin, begin + CGM.getContext().getTypeSizeInChars(type));
+
+  // Everything else is scalar and should not convert as an LLVM aggregate.
+  } else {
+    // We intentionally convert as !ForMem because we want to preserve
+    // that a type was an i1.
+    auto llvmType = CGM.getTypes().ConvertType(type);
+    addTypedData(llvmType, begin);
+  }
+}
+
+void SwiftAggLowering::addTypedData(const RecordDecl *record, CharUnits begin) {
+  addTypedData(record, begin, CGM.getContext().getASTRecordLayout(record));
+}
+
+void SwiftAggLowering::addTypedData(const RecordDecl *record, CharUnits begin,
+                                    const ASTRecordLayout &layout) {
+  // Unions are a special case.
+  if (record->isUnion()) {
+    for (auto field : record->fields()) {
+      if (field->isBitField()) {
+        addBitFieldData(field, begin, 0);
+      } else {
+        addTypedData(field->getType(), begin);
+      }
+    }
+    return;
+  }
+
+  // Note that correctness does not rely on us adding things in
+  // their actual order of layout; it's just somewhat more efficient
+  // for the builder.
+
+  // With that in mind, add "early" C++ data.
+  auto cxxRecord = dyn_cast<CXXRecordDecl>(record);
+  if (cxxRecord) {
+    //   - a v-table pointer, if the class adds its own
+    if (layout.hasOwnVFPtr()) {
+      addTypedData(CGM.Int8PtrTy, begin);
+    }
+
+    //   - non-virtual bases
+    for (auto &baseSpecifier : cxxRecord->bases()) {
+      if (baseSpecifier.isVirtual()) continue;
+
+      auto baseRecord = baseSpecifier.getType()->getAsCXXRecordDecl();
+      addTypedData(baseRecord, begin + layout.getBaseClassOffset(baseRecord));
+    }
+
+    //   - a vbptr if the class adds its own
+    if (layout.hasOwnVBPtr()) {
+      addTypedData(CGM.Int8PtrTy, begin + layout.getVBPtrOffset());
+    }
+  }
+
+  // Add fields.
+  for (auto field : record->fields()) {
+    auto fieldOffsetInBits = layout.getFieldOffset(field->getFieldIndex());
+    if (field->isBitField()) {
+      addBitFieldData(field, begin, fieldOffsetInBits);
+    } else {
+      addTypedData(field->getType(),
+              begin + CGM.getContext().toCharUnitsFromBits(fieldOffsetInBits));
+    }
+  }
+
+  // Add "late" C++ data:
+  if (cxxRecord) {
+    //   - virtual bases
+    for (auto &vbaseSpecifier : cxxRecord->vbases()) {
+      auto baseRecord = vbaseSpecifier.getType()->getAsCXXRecordDecl();
+      addTypedData(baseRecord, begin + layout.getVBaseClassOffset(baseRecord));      
+    }
+  }
+}
+
+void SwiftAggLowering::addBitFieldData(const FieldDecl *bitfield,
+                                       CharUnits recordBegin,
+                                       uint64_t bitfieldBitBegin) {
+  assert(bitfield->isBitField());
+  auto &ctx = CGM.getContext();
+  auto width = bitfield->getBitWidthValue(ctx);
+
+  // We can ignore zero-width bit-fields.
+  if (width == 0) return;
+
+  // toCharUnitsFromBits rounds down.
+  CharUnits bitfieldByteBegin = ctx.toCharUnitsFromBits(bitfieldBitBegin);
+
+  // Find the offset of the last byte that is partially occupied by the
+  // bit-field; since we otherwise expect exclusive ends, the end is the
+  // next byte.
+  uint64_t bitfieldBitLast = bitfieldBitBegin + width - 1;
+  CharUnits bitfieldByteEnd =
+    ctx.toCharUnitsFromBits(bitfieldBitLast) + CharUnits::One();
+  addOpaqueData(recordBegin + bitfieldByteBegin,
+                recordBegin + bitfieldByteEnd);
+}
+
+void SwiftAggLowering::addTypedData(llvm::Type *type, CharUnits begin) {
+  assert(type && "didn't provide type for typed data");
+  addTypedData(type, begin, begin + getTypeStoreSize(CGM, type));
+}
+
+void SwiftAggLowering::addTypedData(llvm::Type *type,
+                                    CharUnits begin, CharUnits end) {
+  assert(type && "didn't provide type for typed data");
+  assert(getTypeStoreSize(CGM, type) == end - begin);
+
+  // Legalize vector types.
+  if (auto vecTy = dyn_cast<llvm::VectorType>(type)) {
+    SmallVector<llvm::Type*, 4> componentTys;
+    legalizeVectorType(CGM, end - begin, vecTy, componentTys);
+    assert(componentTys.size() >= 1);
+
+    // Walk the initial components.
+    for (size_t i = 0, e = componentTys.size(); i != e - 1; ++i) {
+      llvm::Type *componentTy = componentTys[i];
+      auto componentSize = getTypeStoreSize(CGM, componentTy);
+      assert(componentSize < end - begin);
+      addLegalTypedData(componentTy, begin, begin + componentSize);
+      begin += componentSize;
+    }
+
+    return addLegalTypedData(componentTys.back(), begin, end);
+  }
+
+  // Legalize integer types.
+  if (auto intTy = dyn_cast<llvm::IntegerType>(type)) {
+    if (!isLegalIntegerType(CGM, intTy))
+      return addOpaqueData(begin, end);
+  }
+
+  // All other types should be legal.
+  return addLegalTypedData(type, begin, end);
+}
+
+void SwiftAggLowering::addLegalTypedData(llvm::Type *type,
+                                         CharUnits begin, CharUnits end) {
+  // Require the type to be naturally aligned.
+  if (!begin.isZero() && !begin.isMultipleOf(getNaturalAlignment(CGM, type))) {
+
+    // Try splitting vector types.
+    if (auto vecTy = dyn_cast<llvm::VectorType>(type)) {
+      auto split = splitLegalVectorType(CGM, end - begin, vecTy);
+      auto eltTy = split.first;
+      auto numElts = split.second;
+
+      auto eltSize = (end - begin) / numElts;
+      assert(eltSize == getTypeStoreSize(CGM, eltTy));
+      for (size_t i = 0, e = numElts; i != e; ++i) {
+        addLegalTypedData(eltTy, begin, begin + eltSize);
+        begin += eltSize;
+      }
+      assert(begin == end);
+      return;
+    }
+
+    return addOpaqueData(begin, end);
+  }
+
+  addEntry(type, begin, end);
+}
+
+void SwiftAggLowering::addEntry(llvm::Type *type,
+                                CharUnits begin, CharUnits end) {
+  assert((!type ||
+          (!isa<llvm::StructType>(type) && !isa<llvm::ArrayType>(type))) &&
+         "cannot add aggregate-typed data");
+  assert(!type || begin.isMultipleOf(getNaturalAlignment(CGM, type)));
+
+  // Fast path: we can just add entries to the end.
+  if (Entries.empty() || Entries.back().End <= begin) {
+    Entries.push_back({begin, end, type});
+    return;
+  }
+
+  // Find the first existing entry that ends after the start of the new data.
+  // TODO: do a binary search if Entries is big enough for it to matter.
+  size_t index = Entries.size() - 1;
+  while (index != 0) {
+    if (Entries[index - 1].End <= begin) break;
+    --index;
+  }
+
+  // The entry ends after the start of the new data.
+  // If the entry starts after the end of the new data, there's no conflict.
+  if (Entries[index].Begin >= end) {
+    // This insertion is potentially O(n), but the way we generally build
+    // these layouts makes that unlikely to matter: we'd need a union of
+    // several very large types.
+    Entries.insert(Entries.begin() + index, {begin, end, type});
+    return;
+  }
+
+  // Otherwise, the ranges overlap.  The new range might also overlap
+  // with later ranges.
+restartAfterSplit:
+
+  // Simplest case: an exact overlap.
+  if (Entries[index].Begin == begin && Entries[index].End == end) {
+    // If the types match exactly, great.
+    if (Entries[index].Type == type) return;
+
+    // If either type is opaque, make the entry opaque and return.
+    if (Entries[index].Type == nullptr) {
+      return;
+    } else if (type == nullptr) {
+      Entries[index].Type = nullptr;
+      return;
+    }
+
+    // If they disagree in an ABI-agnostic way, just resolve the conflict
+    // arbitrarily.
+    if (auto entryType = getCommonType(Entries[index].Type, type)) {
+      Entries[index].Type = entryType;
+      return;
+    }
+
+    // Otherwise, make the entry opaque.
+    Entries[index].Type = nullptr;
+    return;
+  }
+
+  // Okay, we have an overlapping conflict of some sort.
+
+  // If we have a vector type, split it.
+  if (auto vecTy = dyn_cast_or_null<llvm::VectorType>(type)) {
+    auto eltTy = vecTy->getElementType();
+    CharUnits eltSize = (end - begin) / vecTy->getNumElements();
+    assert(eltSize == getTypeStoreSize(CGM, eltTy));
+    for (unsigned i = 0, e = vecTy->getNumElements(); i != e; ++i) {
+      addEntry(eltTy, begin, begin + eltSize);
+      begin += eltSize;
+    }
+    assert(begin == end);
+    return;
+  }
+
+  // If the entry is a vector type, split it and try again.
+  if (Entries[index].Type && Entries[index].Type->isVectorTy()) {
+    splitVectorEntry(index);
+    goto restartAfterSplit;
+  }
+
+  // Okay, we have no choice but to make the existing entry opaque.
+
+  Entries[index].Type = nullptr;
+
+  // Stretch the start of the entry to the beginning of the range.
+  if (begin < Entries[index].Begin) {
+    Entries[index].Begin = begin;
+    assert(index == 0 || begin >= Entries[index - 1].End);
+  }
+
+  // Stretch the end of the entry to the end of the range; but if we run
+  // into the start of the next entry, just leave the range there and repeat.
+  while (end > Entries[index].End) {
+    assert(Entries[index].Type == nullptr);
+
+    // If the range doesn't overlap the next entry, we're done.
+    if (index == Entries.size() - 1 || end <= Entries[index + 1].Begin) {
+      Entries[index].End = end;
+      break;
+    }
+
+    // Otherwise, stretch to the start of the next entry.
+    Entries[index].End = Entries[index + 1].Begin;
+
+    // Continue with the next entry.
+    index++;
+
+    // This entry needs to be made opaque if it is not already.
+    if (Entries[index].Type == nullptr)
+      continue;
+
+    // Split vector entries unless we completely subsume them.
+    if (Entries[index].Type->isVectorTy() &&
+        end < Entries[index].End) {
+      splitVectorEntry(index);
+    }
+
+    // Make the entry opaque.
+    Entries[index].Type = nullptr;
+  }
+}
+
+/// Replace the entry of vector type at offset 'index' with a sequence
+/// of its component vectors.
+void SwiftAggLowering::splitVectorEntry(unsigned index) {
+  auto vecTy = cast<llvm::VectorType>(Entries[index].Type);
+  auto split = splitLegalVectorType(CGM, Entries[index].getWidth(), vecTy);
+
+  auto eltTy = split.first;
+  CharUnits eltSize = getTypeStoreSize(CGM, eltTy);
+  auto numElts = split.second;
+  Entries.insert(&Entries[index + 1], numElts - 1, StorageEntry());
+
+  CharUnits begin = Entries[index].Begin;
+  for (unsigned i = 0; i != numElts; ++i) {
+    Entries[index].Type = eltTy;
+    Entries[index].Begin = begin;
+    Entries[index].End = begin + eltSize;
+    begin += eltSize;
+  }
+}
+
+/// Given a power-of-two unit size, return the offset of the aligned unit
+/// of that size which contains the given offset.
+///
+/// In other words, round down to the nearest multiple of the unit size.
+static CharUnits getOffsetAtStartOfUnit(CharUnits offset, CharUnits unitSize) {
+  assert(isPowerOf2(unitSize.getQuantity()));
+  auto unitMask = ~(unitSize.getQuantity() - 1);
+  return CharUnits::fromQuantity(offset.getQuantity() & unitMask);
+}
+
+static bool areBytesInSameUnit(CharUnits first, CharUnits second,
+                               CharUnits chunkSize) {
+  return getOffsetAtStartOfUnit(first, chunkSize)
+      == getOffsetAtStartOfUnit(second, chunkSize);
+}
+
+void SwiftAggLowering::finish() {
+  if (Entries.empty()) {
+    Finished = true;
+    return;
+  }
+
+  // We logically split the layout down into a series of chunks of this size,
+  // which is generally the size of a pointer.
+  const CharUnits chunkSize = getMaximumVoluntaryIntegerSize(CGM);
+
+  // First pass: if two entries share a chunk, make them both opaque
+  // and stretch one to meet the next.
+  bool hasOpaqueEntries = (Entries[0].Type == nullptr);
+  for (size_t i = 1, e = Entries.size(); i != e; ++i) {
+    if (areBytesInSameUnit(Entries[i - 1].End - CharUnits::One(),
+                           Entries[i].Begin, chunkSize)) {
+      Entries[i - 1].Type = nullptr;
+      Entries[i].Type = nullptr;
+      Entries[i - 1].End = Entries[i].Begin;
+      hasOpaqueEntries = true;
+
+    } else if (Entries[i].Type == nullptr) {
+      hasOpaqueEntries = true;
+    }
+  }
+
+  // The rest of the algorithm leaves non-opaque entries alone, so if we
+  // have no opaque entries, we're done.
+  if (!hasOpaqueEntries) {
+    Finished = true;
+    return;
+  }
+
+  // Okay, move the entries to a temporary and rebuild Entries.
+  auto orig = std::move(Entries);
+  assert(Entries.empty());
+
+  for (size_t i = 0, e = orig.size(); i != e; ++i) {
+    // Just copy over non-opaque entries.
+    if (orig[i].Type != nullptr) {
+      Entries.push_back(orig[i]);
+      continue;
+    }
+
+    // Scan forward to determine the full extent of the next opaque range.
+    // We know from the first pass that only contiguous ranges will overlap
+    // the same aligned chunk.
+    auto begin = orig[i].Begin;
+    auto end = orig[i].End;
+    while (i + 1 != e &&
+           orig[i + 1].Type == nullptr &&
+           end == orig[i + 1].Begin) {
+      end = orig[i + 1].End;
+      i++;
+    }
+
+    // Add an entry per intersected chunk.
+    do {
+      // Find the smallest aligned storage unit in the maximal aligned
+      // storage unit containing 'begin' that contains all the bytes in
+      // the intersection between the range and this chunk.
+      CharUnits localBegin = begin;
+      CharUnits chunkBegin = getOffsetAtStartOfUnit(localBegin, chunkSize);
+      CharUnits chunkEnd = chunkBegin + chunkSize;
+      CharUnits localEnd = std::min(end, chunkEnd);
+
+      // Just do a simple loop over ever-increasing unit sizes.
+      CharUnits unitSize = CharUnits::One();
+      CharUnits unitBegin, unitEnd;
+      for (; ; unitSize *= 2) {
+        assert(unitSize <= chunkSize);
+        unitBegin = getOffsetAtStartOfUnit(localBegin, unitSize);
+        unitEnd = unitBegin + unitSize;
+        if (unitEnd >= localEnd) break;
+      }
+
+      // Add an entry for this unit.
+      auto entryTy =
+        llvm::IntegerType::get(CGM.getLLVMContext(),
+                               CGM.getContext().toBits(unitSize));
+      Entries.push_back({unitBegin, unitEnd, entryTy});
+
+      // The next chunk starts where this chunk left off.
+      begin = localEnd;
+    } while (begin != end);
+  }
+
+  // Okay, finally finished.
+  Finished = true;
+}
+
+void SwiftAggLowering::enumerateComponents(EnumerationCallback callback) const {
+  assert(Finished && "haven't yet finished lowering");
+
+  for (auto &entry : Entries) {
+    callback(entry.Begin, entry.Type);
+  }
+}
+
+std::pair<llvm::StructType*, llvm::Type*>
+SwiftAggLowering::getCoerceAndExpandTypes() const {
+  assert(Finished && "haven't yet finished lowering");
+
+  auto &ctx = CGM.getLLVMContext();
+
+  if (Entries.empty()) {
+    auto type = llvm::StructType::get(ctx);
+    return { type, type };
+  }
+
+  SmallVector<llvm::Type*, 8> elts;
+  CharUnits lastEnd = CharUnits::Zero();
+  bool hasPadding = false;
+  bool packed = false;
+  for (auto &entry : Entries) {
+    if (entry.Begin != lastEnd) {
+      auto paddingSize = entry.Begin - lastEnd;
+      assert(!paddingSize.isNegative());
+
+      auto padding = llvm::ArrayType::get(llvm::Type::getInt8Ty(ctx),
+                                          paddingSize.getQuantity());
+      elts.push_back(padding);
+      hasPadding = true;
+    }
+
+    if (!packed && !entry.Begin.isMultipleOf(
+          CharUnits::fromQuantity(
+            CGM.getDataLayout().getABITypeAlignment(entry.Type))))
+      packed = true;
+
+    elts.push_back(entry.Type);
+    lastEnd = entry.End;
+  }
+
+  // We don't need to adjust 'packed' to deal with possible tail padding
+  // because we never do that kind of access through the coercion type.
+  auto coercionType = llvm::StructType::get(ctx, elts, packed);
+
+  llvm::Type *unpaddedType = coercionType;
+  if (hasPadding) {
+    elts.clear();
+    for (auto &entry : Entries) {
+      elts.push_back(entry.Type);
+    }
+    if (elts.size() == 1) {
+      unpaddedType = elts[0];
+    } else {
+      unpaddedType = llvm::StructType::get(ctx, elts, /*packed*/ false);
+    }
+  } else if (Entries.size() == 1) {
+    unpaddedType = Entries[0].Type;
+  }
+
+  return { coercionType, unpaddedType };
+}
+
+bool SwiftAggLowering::shouldPassIndirectly(bool asReturnValue) const {
+  assert(Finished && "haven't yet finished lowering");
+
+  // Empty types don't need to be passed indirectly.
+  if (Entries.empty()) return false;
+
+  CharUnits totalSize = Entries.back().End;
+
+  // Avoid copying the array of types when there's just a single element.
+  if (Entries.size() == 1) {
+    return getSwiftABIInfo(CGM).shouldPassIndirectlyForSwift(totalSize,
+                                                           Entries.back().Type,
+                                                             asReturnValue);    
+  }
+
+  SmallVector<llvm::Type*, 8> componentTys;
+  componentTys.reserve(Entries.size());
+  for (auto &entry : Entries) {
+    componentTys.push_back(entry.Type);
+  }
+  return getSwiftABIInfo(CGM).shouldPassIndirectlyForSwift(totalSize,
+                                                           componentTys,
+                                                           asReturnValue);
+}
+
+CharUnits swiftcall::getMaximumVoluntaryIntegerSize(CodeGenModule &CGM) {
+  // Currently always the size of an ordinary pointer.
+  return CGM.getContext().toCharUnitsFromBits(
+           CGM.getContext().getTargetInfo().getPointerWidth(0));
+}
+
+CharUnits swiftcall::getNaturalAlignment(CodeGenModule &CGM, llvm::Type *type) {
+  // For Swift's purposes, this is always just the store size of the type
+  // rounded up to a power of 2.
+  auto size = (unsigned long long) getTypeStoreSize(CGM, type).getQuantity();
+  if (!isPowerOf2(size)) {
+    size = 1ULL << (llvm::findLastSet(size, llvm::ZB_Undefined) + 1);
+  }
+  assert(size >= CGM.getDataLayout().getABITypeAlignment(type));
+  return CharUnits::fromQuantity(size);
+}
+
+bool swiftcall::isLegalIntegerType(CodeGenModule &CGM,
+                                   llvm::IntegerType *intTy) {
+  auto size = intTy->getBitWidth();
+  switch (size) {
+  case 1:
+  case 8:
+  case 16:
+  case 32:
+  case 64:
+    // Just assume that the above are always legal.
+    return true;
+
+  case 128:
+    return CGM.getContext().getTargetInfo().hasInt128Type();
+
+  default:
+    return false;
+  }
+}
+
+bool swiftcall::isLegalVectorType(CodeGenModule &CGM, CharUnits vectorSize,
+                                  llvm::VectorType *vectorTy) {
+  return isLegalVectorType(CGM, vectorSize, vectorTy->getElementType(),
+                           vectorTy->getNumElements());
+}
+
+bool swiftcall::isLegalVectorType(CodeGenModule &CGM, CharUnits vectorSize,
+                                  llvm::Type *eltTy, unsigned numElts) {
+  assert(numElts > 1 && "illegal vector length");
+  return getSwiftABIInfo(CGM)
+           .isLegalVectorTypeForSwift(vectorSize, eltTy, numElts);
+}
+
+std::pair<llvm::Type*, unsigned>
+swiftcall::splitLegalVectorType(CodeGenModule &CGM, CharUnits vectorSize,
+                                llvm::VectorType *vectorTy) {
+  auto numElts = vectorTy->getNumElements();
+  auto eltTy = vectorTy->getElementType();
+
+  // Try to split the vector type in half.
+  if (numElts >= 4 && isPowerOf2(numElts)) {
+    if (isLegalVectorType(CGM, vectorSize / 2, eltTy, numElts / 2))
+      return {llvm::VectorType::get(eltTy, numElts / 2), 2};
+  }
+
+  return {eltTy, numElts};
+}
+
+void swiftcall::legalizeVectorType(CodeGenModule &CGM, CharUnits origVectorSize,
+                                   llvm::VectorType *origVectorTy,
+                             llvm::SmallVectorImpl<llvm::Type*> &components) {
+  // If it's already a legal vector type, use it.
+  if (isLegalVectorType(CGM, origVectorSize, origVectorTy)) {
+    components.push_back(origVectorTy);
+    return;
+  }
+
+  // Try to split the vector into legal subvectors.
+  auto numElts = origVectorTy->getNumElements();
+  auto eltTy = origVectorTy->getElementType();
+  assert(numElts != 1);
+
+  // The largest size that we're still considering making subvectors of.
+  // Always a power of 2.
+  unsigned logCandidateNumElts = llvm::findLastSet(numElts, llvm::ZB_Undefined);
+  unsigned candidateNumElts = 1U << logCandidateNumElts;
+  assert(candidateNumElts <= numElts && candidateNumElts * 2 > numElts);
+
+  // Minor optimization: don't check the legality of this exact size twice.
+  if (candidateNumElts == numElts) {
+    logCandidateNumElts--;
+    candidateNumElts >>= 1;
+  }
+
+  CharUnits eltSize = (origVectorSize / numElts);
+  CharUnits candidateSize = eltSize * candidateNumElts;
+
+  // The sensibility of this algorithm relies on the fact that we never
+  // have a legal non-power-of-2 vector size without having the power of 2
+  // also be legal.
+  while (logCandidateNumElts > 0) {
+    assert(candidateNumElts == 1U << logCandidateNumElts);
+    assert(candidateNumElts <= numElts);
+    assert(candidateSize == eltSize * candidateNumElts);
+
+    // Skip illegal vector sizes.
+    if (!isLegalVectorType(CGM, candidateSize, eltTy, candidateNumElts)) {
+      logCandidateNumElts--;
+      candidateNumElts /= 2;
+      candidateSize /= 2;
+      continue;
+    }
+
+    // Add the right number of vectors of this size.
+    auto numVecs = numElts >> logCandidateNumElts;
+    components.append(numVecs, llvm::VectorType::get(eltTy, candidateNumElts));
+    numElts -= (numVecs << logCandidateNumElts);
+
+    if (numElts == 0) return;
+
+    // It's possible that the number of elements remaining will be legal.
+    // This can happen with e.g. <7 x float> when <3 x float> is legal.
+    // This only needs to be separately checked if it's not a power of 2.
+    if (numElts > 2 && !isPowerOf2(numElts) &&
+        isLegalVectorType(CGM, eltSize * numElts, eltTy, numElts)) {
+      components.push_back(llvm::VectorType::get(eltTy, numElts));
+      return;
+    }
+
+    // Bring vecSize down to something no larger than numElts.
+    do {
+      logCandidateNumElts--;
+      candidateNumElts /= 2;
+      candidateSize /= 2;
+    } while (candidateNumElts > numElts);
+  }
+
+  // Otherwise, just append a bunch of individual elements.
+  components.append(numElts, eltTy);
+}
+
+bool swiftcall::shouldPassCXXRecordIndirectly(CodeGenModule &CGM,
+                                              const CXXRecordDecl *record) {
+  // Following a recommendation from Richard Smith, pass a C++ type
+  // indirectly only if the destructor is non-trivial or *all* of the
+  // copy/move constructors are deleted or non-trivial.
+
+  if (record->hasNonTrivialDestructor())
+    return true;
+
+  // It would be nice if this were summarized on the CXXRecordDecl.
+  for (auto ctor : record->ctors()) {
+    if (ctor->isCopyOrMoveConstructor() && !ctor->isDeleted() &&
+        ctor->isTrivial()) {
+      return false;
+    }
+  }
+
+  return true;
+}
+
+static ABIArgInfo classifyExpandedType(SwiftAggLowering &lowering,
+                                       bool forReturn,
+                                       CharUnits alignmentForIndirect) {
+  if (lowering.empty()) {
+    return ABIArgInfo::getIgnore();
+  } else if (lowering.shouldPassIndirectly(forReturn)) {
+    return ABIArgInfo::getIndirect(alignmentForIndirect, /*byval*/ false);
+  } else {
+    auto types = lowering.getCoerceAndExpandTypes();
+    return ABIArgInfo::getCoerceAndExpand(types.first, types.second);
+  }
+}
+
+static ABIArgInfo classifyType(CodeGenModule &CGM, CanQualType type,
+                               bool forReturn) {
+  if (auto recordType = dyn_cast<RecordType>(type)) {
+    auto record = recordType->getDecl();
+    auto &layout = CGM.getContext().getASTRecordLayout(record);
+
+    if (auto cxxRecord = dyn_cast<CXXRecordDecl>(record)) {
+      if (shouldPassCXXRecordIndirectly(CGM, cxxRecord))
+        return ABIArgInfo::getIndirect(layout.getAlignment(), /*byval*/ false);
+    }
+
+    SwiftAggLowering lowering(CGM);
+    lowering.addTypedData(recordType->getDecl(), CharUnits::Zero(), layout);
+    lowering.finish();
+
+    return classifyExpandedType(lowering, forReturn, layout.getAlignment());
+  }
+
+  // Just assume that all of our target ABIs can support returning at least
+  // two integer or floating-point values.
+  if (isa<ComplexType>(type)) {
+    return (forReturn ? ABIArgInfo::getDirect() : ABIArgInfo::getExpand());
+  }
+
+  // Vector types may need to be legalized.
+  if (isa<VectorType>(type)) {
+    SwiftAggLowering lowering(CGM);
+    lowering.addTypedData(type, CharUnits::Zero());
+    lowering.finish();
+
+    CharUnits alignment = CGM.getContext().getTypeAlignInChars(type);
+    return classifyExpandedType(lowering, forReturn, alignment);
+  }
+
+  // Member pointer types need to be expanded, but it's a simple form of
+  // expansion that 'Direct' can handle.  Note that CanBeFlattened should be
+  // true for this to work.
+
+  // 'void' needs to be ignored.
+  if (type->isVoidType()) {
+    return ABIArgInfo::getIgnore();
+  }
+
+  // Everything else can be passed directly.
+  return ABIArgInfo::getDirect();
+}
+
+ABIArgInfo swiftcall::classifyReturnType(CodeGenModule &CGM, CanQualType type) {
+  return classifyType(CGM, type, /*forReturn*/ true);
+}
+
+ABIArgInfo swiftcall::classifyArgumentType(CodeGenModule &CGM,
+                                           CanQualType type) {
+  return classifyType(CGM, type, /*forReturn*/ false);
+}
+
+void swiftcall::computeABIInfo(CodeGenModule &CGM, CGFunctionInfo &FI) {
+  auto &retInfo = FI.getReturnInfo();
+  retInfo = classifyReturnType(CGM, FI.getReturnType());
+
+  for (unsigned i = 0, e = FI.arg_size(); i != e; ++i) {
+    auto &argInfo = FI.arg_begin()[i];
+    argInfo.info = classifyArgumentType(CGM, argInfo.type);
+  }
+}
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/TargetInfo.cpp b/contrib/llvm/tools/clang/lib/CodeGen/TargetInfo.cpp
index 3d1ddef94657..bc03616d25ce 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/TargetInfo.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/TargetInfo.cpp
@@ -19,6 +19,7 @@
 #include "CodeGenFunction.h"
 #include "clang/AST/RecordLayout.h"
 #include "clang/CodeGen/CGFunctionInfo.h"
+#include "clang/CodeGen/SwiftCallingConv.h"
 #include "clang/Frontend/CodeGenOptions.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/Triple.h"
@@ -68,6 +69,46 @@ Address ABIInfo::EmitMSVAArg(CodeGenFunction &CGF, Address VAListAddr,
 
 ABIInfo::~ABIInfo() {}
 
+/// Does the given lowering require more than the given number of
+/// registers when expanded?
+///
+/// This is intended to be the basis of a reasonable basic implementation
+/// of should{Pass,Return}IndirectlyForSwift.
+///
+/// For most targets, a limit of four total registers is reasonable; this
+/// limits the amount of code required in order to move around the value
+/// in case it wasn't produced immediately prior to the call by the caller
+/// (or wasn't produced in exactly the right registers) or isn't used
+/// immediately within the callee.  But some targets may need to further
+/// limit the register count due to an inability to support that many
+/// return registers.
+static bool occupiesMoreThan(CodeGenTypes &cgt,
+                             ArrayRef<llvm::Type*> scalarTypes,
+                             unsigned maxAllRegisters) {
+  unsigned intCount = 0, fpCount = 0;
+  for (llvm::Type *type : scalarTypes) {
+    if (type->isPointerTy()) {
+      intCount++;
+    } else if (auto intTy = dyn_cast<llvm::IntegerType>(type)) {
+      auto ptrWidth = cgt.getTarget().getPointerWidth(0);
+      intCount += (intTy->getBitWidth() + ptrWidth - 1) / ptrWidth;
+    } else {
+      assert(type->isVectorTy() || type->isFloatingPointTy());
+      fpCount++;
+    }
+  }
+
+  return (intCount + fpCount > maxAllRegisters);
+}
+
+bool SwiftABIInfo::isLegalVectorTypeForSwift(CharUnits vectorSize,
+                                             llvm::Type *eltTy,
+                                             unsigned numElts) const {
+  // The default implementation of this assumes that the target guarantees
+  // 128-bit SIMD support but nothing more.
+  return (vectorSize.getQuantity() > 8 && vectorSize.getQuantity() <= 16);
+}
+
 static CGCXXABI::RecordArgABI getRecordArgABI(const RecordType *RT,
                                               CGCXXABI &CXXABI) {
   const CXXRecordDecl *RD = dyn_cast<CXXRecordDecl>(RT->getDecl());
@@ -117,6 +158,8 @@ const TargetInfo &ABIInfo::getTarget() const {
   return CGT.getTarget();
 }
 
+bool ABIInfo:: isAndroid() const { return getTarget().getTriple().isAndroid(); }
+
 bool ABIInfo::isHomogeneousAggregateBaseType(QualType Ty) const {
   return false;
 }
@@ -130,7 +173,7 @@ bool ABIInfo::shouldSignExtUnsignedType(QualType Ty) const {
   return false;
 }
 
-void ABIArgInfo::dump() const {
+LLVM_DUMP_METHOD void ABIArgInfo::dump() const {
   raw_ostream &OS = llvm::errs();
   OS << "(ABIArgInfo Kind=";
   switch (TheKind) {
@@ -158,6 +201,10 @@ void ABIArgInfo::dump() const {
   case Expand:
     OS << "Expand";
     break;
+  case CoerceAndExpand:
+    OS << "CoerceAndExpand Type=";
+    getCoerceAndExpandType()->print(OS);
+    break;
   }
   OS << ")\n";
 }
@@ -217,7 +264,7 @@ static Address emitVoidPtrDirectVAArg(CodeGenFunction &CGF,
   }
 
   // Advance the pointer past the argument, then store that back.
-  CharUnits FullDirectSize = DirectSize.RoundUpToAlignment(SlotSize);
+  CharUnits FullDirectSize = DirectSize.alignTo(SlotSize);
   llvm::Value *NextPtr =
     CGF.Builder.CreateConstInBoundsByteGEP(Addr.getPointer(), FullDirectSize,
                                            "argp.next");
@@ -225,7 +272,8 @@ static Address emitVoidPtrDirectVAArg(CodeGenFunction &CGF,
 
   // If the argument is smaller than a slot, and this is a big-endian
   // target, the argument will be right-adjusted in its slot.
-  if (DirectSize < SlotSize && CGF.CGM.getDataLayout().isBigEndian()) {
+  if (DirectSize < SlotSize && CGF.CGM.getDataLayout().isBigEndian() &&
+      !DirectTy->isStructTy()) {
     Addr = CGF.Builder.CreateConstInBoundsByteGEP(Addr, SlotSize - DirectSize);
   }
 
@@ -324,6 +372,9 @@ TargetCodeGenInfo::getDependentLibraryOption(llvm::StringRef Lib,
   Opt += Lib;
 }
 
+unsigned TargetCodeGenInfo::getOpenCLKernelCallingConv() const {
+  return llvm::CallingConv::C;
+}
 static bool isEmptyRecord(ASTContext &Context, QualType T, bool AllowArrays);
 
 /// isEmptyField - Return true iff a the field is "empty", that is it
@@ -364,7 +415,7 @@ static bool isEmptyField(ASTContext &Context, const FieldDecl *FD,
 static bool isEmptyRecord(ASTContext &Context, QualType T, bool AllowArrays) {
   const RecordType *RT = T->getAs<RecordType>();
   if (!RT)
-    return 0;
+    return false;
   const RecordDecl *RD = RT->getDecl();
   if (RD->hasFlexibleArrayMember())
     return false;
@@ -456,73 +507,55 @@ static const Type *isSingleElementStruct(QualType T, ASTContext &Context) {
   return Found;
 }
 
-static bool is32Or64BitBasicType(QualType Ty, ASTContext &Context) {
-  // Treat complex types as the element type.
-  if (const ComplexType *CTy = Ty->getAs<ComplexType>())
-    Ty = CTy->getElementType();
-
-  // Check for a type which we know has a simple scalar argument-passing
-  // convention without any padding.  (We're specifically looking for 32
-  // and 64-bit integer and integer-equivalents, float, and double.)
-  if (!Ty->getAs<BuiltinType>() && !Ty->hasPointerRepresentation() &&
-      !Ty->isEnumeralType() && !Ty->isBlockPointerType())
-    return false;
-
-  uint64_t Size = Context.getTypeSize(Ty);
-  return Size == 32 || Size == 64;
-}
-
-/// canExpandIndirectArgument - Test whether an argument type which is to be
-/// passed indirectly (on the stack) would have the equivalent layout if it was
-/// expanded into separate arguments. If so, we prefer to do the latter to avoid
-/// inhibiting optimizations.
-///
-// FIXME: This predicate is missing many cases, currently it just follows
-// llvm-gcc (checks that all fields are 32-bit or 64-bit primitive types). We
-// should probably make this smarter, or better yet make the LLVM backend
-// capable of handling it.
-static bool canExpandIndirectArgument(QualType Ty, ASTContext &Context) {
-  // We can only expand structure types.
-  const RecordType *RT = Ty->getAs<RecordType>();
-  if (!RT)
-    return false;
-
-  // We can only expand (C) structures.
-  //
-  // FIXME: This needs to be generalized to handle classes as well.
-  const RecordDecl *RD = RT->getDecl();
-  if (!RD->isStruct())
-    return false;
-
-  // We try to expand CLike CXXRecordDecl.
-  if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD)) {
-    if (!CXXRD->isCLike())
-      return false;
-  }
-
-  uint64_t Size = 0;
-
-  for (const auto *FD : RD->fields()) {
-    if (!is32Or64BitBasicType(FD->getType(), Context))
-      return false;
+namespace {
+Address EmitVAArgInstr(CodeGenFunction &CGF, Address VAListAddr, QualType Ty,
+                       const ABIArgInfo &AI) {
+  // This default implementation defers to the llvm backend's va_arg
+  // instruction. It can handle only passing arguments directly
+  // (typically only handled in the backend for primitive types), or
+  // aggregates passed indirectly by pointer (NOTE: if the "byval"
+  // flag has ABI impact in the callee, this implementation cannot
+  // work.)
+
+  // Only a few cases are covered here at the moment -- those needed
+  // by the default abi.
+  llvm::Value *Val;
+
+  if (AI.isIndirect()) {
+    assert(!AI.getPaddingType() &&
+           "Unexpected PaddingType seen in arginfo in generic VAArg emitter!");
+    assert(
+        !AI.getIndirectRealign() &&
+        "Unexpected IndirectRealign seen in arginfo in generic VAArg emitter!");
+
+    auto TyInfo = CGF.getContext().getTypeInfoInChars(Ty);
+    CharUnits TyAlignForABI = TyInfo.second;
+
+    llvm::Type *BaseTy =
+        llvm::PointerType::getUnqual(CGF.ConvertTypeForMem(Ty));
+    llvm::Value *Addr =
+        CGF.Builder.CreateVAArg(VAListAddr.getPointer(), BaseTy);
+    return Address(Addr, TyAlignForABI);
+  } else {
+    assert((AI.isDirect() || AI.isExtend()) &&
+           "Unexpected ArgInfo Kind in generic VAArg emitter!");
 
-    // FIXME: Reject bit-fields wholesale; there are two problems, we don't know
-    // how to expand them yet, and the predicate for telling if a bitfield still
-    // counts as "basic" is more complicated than what we were doing previously.
-    if (FD->isBitField())
-      return false;
+    assert(!AI.getInReg() &&
+           "Unexpected InReg seen in arginfo in generic VAArg emitter!");
+    assert(!AI.getPaddingType() &&
+           "Unexpected PaddingType seen in arginfo in generic VAArg emitter!");
+    assert(!AI.getDirectOffset() &&
+           "Unexpected DirectOffset seen in arginfo in generic VAArg emitter!");
+    assert(!AI.getCoerceToType() &&
+           "Unexpected CoerceToType seen in arginfo in generic VAArg emitter!");
 
-    Size += Context.getTypeSize(FD->getType());
+    Address Temp = CGF.CreateMemTemp(Ty, "varet");
+    Val = CGF.Builder.CreateVAArg(VAListAddr.getPointer(), CGF.ConvertType(Ty));
+    CGF.Builder.CreateStore(Val, Temp);
+    return Temp;
   }
-
-  // Make sure there are not any holes in the struct.
-  if (Size != Context.getTypeSize(Ty))
-    return false;
-
-  return true;
 }
 
-namespace {
 /// DefaultABIInfo - The default implementation for ABI specific
 /// details. This implementation provides information which results in
 /// self-consistent and sensible LLVM IR generation, but does not
@@ -542,7 +575,9 @@ public:
   }
 
   Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
-                    QualType Ty) const override;
+                    QualType Ty) const override {
+    return EmitVAArgInstr(CGF, VAListAddr, Ty, classifyArgumentType(Ty));
+  }
 };
 
 class DefaultTargetCodeGenInfo : public TargetCodeGenInfo {
@@ -551,11 +586,6 @@ public:
     : TargetCodeGenInfo(new DefaultABIInfo(CGT)) {}
 };
 
-Address DefaultABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
-                                  QualType Ty) const {
-  return Address::invalid();
-}
-
 ABIArgInfo DefaultABIInfo::classifyArgumentType(QualType Ty) const {
   Ty = useFirstFieldIfTransparentUnion(Ty);
 
@@ -607,13 +637,17 @@ private:
   ABIArgInfo classifyArgumentType(QualType Ty) const;
 
   // DefaultABIInfo's classifyReturnType and classifyArgumentType are
-  // non-virtual, but computeInfo is virtual, so we overload that.
+  // non-virtual, but computeInfo and EmitVAArg are virtual, so we
+  // overload them.
   void computeInfo(CGFunctionInfo &FI) const override {
     if (!getCXXABI().classifyReturnType(FI))
       FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
     for (auto &Arg : FI.arguments())
       Arg.info = classifyArgumentType(Arg.type);
   }
+
+  Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
+                    QualType Ty) const override;
 };
 
 class WebAssemblyTargetCodeGenInfo final : public TargetCodeGenInfo {
@@ -665,6 +699,14 @@ ABIArgInfo WebAssemblyABIInfo::classifyReturnType(QualType RetTy) const {
   return DefaultABIInfo::classifyReturnType(RetTy);
 }
 
+Address WebAssemblyABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
+                                      QualType Ty) const {
+  return emitVoidPtrVAArg(CGF, VAListAddr, Ty, /*Indirect=*/ false,
+                          getContext().getTypeInfoInChars(Ty),
+                          CharUnits::fromQuantity(4),
+                          /*AllowHigherAlign=*/ true);
+}
+
 //===----------------------------------------------------------------------===//
 // le32/PNaCl bitcode ABI Implementation
 //
@@ -700,7 +742,13 @@ void PNaClABIInfo::computeInfo(CGFunctionInfo &FI) const {
 
 Address PNaClABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
                                 QualType Ty) const {
-  return Address::invalid();
+  // The PNaCL ABI is a bit odd, in that varargs don't use normal
+  // function classification. Structs get passed directly for varargs
+  // functions, through a rewriting transform in
+  // pnacl-llvm/lib/Transforms/NaCl/ExpandVarArgs.cpp, which allows
+  // this target to actually support a va_arg instructions with an
+  // aggregate type, unlike other targets.
+  return EmitVAArgInstr(CGF, VAListAddr, Ty, ABIArgInfo::getDirect());
 }
 
 /// \brief Classify argument of given type \p Ty.
@@ -797,7 +845,7 @@ struct CCState {
 };
 
 /// X86_32ABIInfo - The X86-32 ABI information.
-class X86_32ABIInfo : public ABIInfo {
+class X86_32ABIInfo : public SwiftABIInfo {
   enum Class {
     Integer,
     Float
@@ -849,6 +897,8 @@ class X86_32ABIInfo : public ABIInfo {
                                 bool &NeedsPadding) const;
   bool shouldPrimitiveUseInReg(QualType Ty, CCState &State) const;
 
+  bool canExpandIndirectArgument(QualType Ty) const;
+
   /// \brief Rewrite the function info so that all memory arguments use
   /// inalloca.
   void rewriteWithInAlloca(CGFunctionInfo &FI) const;
@@ -866,12 +916,22 @@ public:
   X86_32ABIInfo(CodeGen::CodeGenTypes &CGT, bool DarwinVectorABI,
                 bool RetSmallStructInRegABI, bool Win32StructABI,
                 unsigned NumRegisterParameters, bool SoftFloatABI)
-    : ABIInfo(CGT), IsDarwinVectorABI(DarwinVectorABI),
+    : SwiftABIInfo(CGT), IsDarwinVectorABI(DarwinVectorABI),
       IsRetSmallStructInRegABI(RetSmallStructInRegABI), 
       IsWin32StructABI(Win32StructABI),
       IsSoftFloatABI(SoftFloatABI),
       IsMCUABI(CGT.getTarget().getTriple().isOSIAMCU()),
       DefaultNumRegisterParameters(NumRegisterParameters) {}
+
+  bool shouldPassIndirectlyForSwift(CharUnits totalSize,
+                                    ArrayRef<llvm::Type*> scalars,
+                                    bool asReturnValue) const override {
+    // LLVM's x86-32 lowering currently only assigns up to three
+    // integer registers and three fp registers.  Oddly, it'll use up to
+    // four vector registers for vectors, but those can overlap with the
+    // scalar registers.
+    return occupiesMoreThan(CGT, scalars, /*total*/ 3);
+  }  
 };
 
 class X86_32TargetCodeGenInfo : public TargetCodeGenInfo {
@@ -920,6 +980,11 @@ public:
                    ('T' << 24);
     return llvm::ConstantInt::get(CGM.Int32Ty, Sig);
   }
+
+  StringRef getARCRetainAutoreleasedReturnValueMarker() const override {
+    return "movl\t%ebp, %ebp"
+           "\t\t## marker for objc_retainAutoreleaseReturnValue";
+  }
 };
 
 }
@@ -1054,6 +1119,72 @@ bool X86_32ABIInfo::shouldReturnTypeInRegister(QualType Ty,
   return true;
 }
 
+static bool is32Or64BitBasicType(QualType Ty, ASTContext &Context) {
+  // Treat complex types as the element type.
+  if (const ComplexType *CTy = Ty->getAs<ComplexType>())
+    Ty = CTy->getElementType();
+
+  // Check for a type which we know has a simple scalar argument-passing
+  // convention without any padding.  (We're specifically looking for 32
+  // and 64-bit integer and integer-equivalents, float, and double.)
+  if (!Ty->getAs<BuiltinType>() && !Ty->hasPointerRepresentation() &&
+      !Ty->isEnumeralType() && !Ty->isBlockPointerType())
+    return false;
+
+  uint64_t Size = Context.getTypeSize(Ty);
+  return Size == 32 || Size == 64;
+}
+
+/// Test whether an argument type which is to be passed indirectly (on the
+/// stack) would have the equivalent layout if it was expanded into separate
+/// arguments. If so, we prefer to do the latter to avoid inhibiting
+/// optimizations.
+bool X86_32ABIInfo::canExpandIndirectArgument(QualType Ty) const {
+  // We can only expand structure types.
+  const RecordType *RT = Ty->getAs<RecordType>();
+  if (!RT)
+    return false;
+  const RecordDecl *RD = RT->getDecl();
+  if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD)) {
+    if (!IsWin32StructABI ) {
+      // On non-Windows, we have to conservatively match our old bitcode
+      // prototypes in order to be ABI-compatible at the bitcode level.
+      if (!CXXRD->isCLike())
+        return false;
+    } else {
+      // Don't do this for dynamic classes.
+      if (CXXRD->isDynamicClass())
+        return false;
+      // Don't do this if there are any non-empty bases.
+      for (const CXXBaseSpecifier &Base : CXXRD->bases()) {
+        if (!isEmptyRecord(getContext(), Base.getType(), /*AllowArrays=*/true))
+          return false;
+      }
+    }
+  }
+
+  uint64_t Size = 0;
+
+  for (const auto *FD : RD->fields()) {
+    // Scalar arguments on the stack get 4 byte alignment on x86. If the
+    // argument is smaller than 32-bits, expanding the struct will create
+    // alignment padding.
+    if (!is32Or64BitBasicType(FD->getType(), getContext()))
+      return false;
+
+    // FIXME: Reject bit-fields wholesale; there are two problems, we don't know
+    // how to expand them yet, and the predicate for telling if a bitfield still
+    // counts as "basic" is more complicated than what we were doing previously.
+    if (FD->isBitField())
+      return false;
+
+    Size += getContext().getTypeSize(FD->getType());
+  }
+
+  // We can do this if there was no alignment padding.
+  return Size == getContext().getTypeSize(Ty);
+}
+
 ABIArgInfo X86_32ABIInfo::getIndirectReturnResult(QualType RetTy, CCState &State) const {
   // If the return value is indirect, then the hidden argument is consuming one
   // integer register.
@@ -1114,6 +1245,10 @@ ABIArgInfo X86_32ABIInfo::classifyReturnType(QualType RetTy,
     if (!IsRetSmallStructInRegABI && !RetTy->isAnyComplexType())
       return getIndirectReturnResult(RetTy, State);
 
+    // Ignore empty structs/unions.
+    if (isEmptyRecord(getContext(), RetTy, true))
+      return ABIArgInfo::getIgnore();
+
     // Small structures which are register sized are generally returned
     // in a register.
     if (shouldReturnTypeInRegister(RetTy, getContext())) {
@@ -1266,6 +1401,12 @@ bool X86_32ABIInfo::updateFreeRegs(QualType Ty, CCState &State) const {
 bool X86_32ABIInfo::shouldAggregateUseDirect(QualType Ty, CCState &State, 
                                              bool &InReg,
                                              bool &NeedsPadding) const {
+  // On Windows, aggregates other than HFAs are never passed in registers, and
+  // they do not consume register slots. Homogenous floating-point aggregates
+  // (HFAs) have already been dealt with at this point.
+  if (IsWin32StructABI && isAggregateTypeForABI(Ty))
+    return false;
+
   NeedsPadding = false;
   InReg = !IsMCUABI;
 
@@ -1339,23 +1480,19 @@ ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty,
   }
 
   if (isAggregateTypeForABI(Ty)) {
-    if (RT) {
-      // Structs are always byval on win32, regardless of what they contain.
-      if (IsWin32StructABI)
-        return getIndirectResult(Ty, true, State);
+    // Structures with flexible arrays are always indirect.
+    // FIXME: This should not be byval!
+    if (RT && RT->getDecl()->hasFlexibleArrayMember())
+      return getIndirectResult(Ty, true, State);
 
-      // Structures with flexible arrays are always indirect.
-      if (RT->getDecl()->hasFlexibleArrayMember())
-        return getIndirectResult(Ty, true, State);
-    }
-
-    // Ignore empty structs/unions.
-    if (isEmptyRecord(getContext(), Ty, true))
+    // Ignore empty structs/unions on non-Windows.
+    if (!IsWin32StructABI && isEmptyRecord(getContext(), Ty, true))
       return ABIArgInfo::getIgnore();
 
     llvm::LLVMContext &LLVMContext = getVMContext();
     llvm::IntegerType *Int32 = llvm::Type::getInt32Ty(LLVMContext);
-    bool NeedsPadding, InReg;
+    bool NeedsPadding = false;
+    bool InReg;
     if (shouldAggregateUseDirect(Ty, State, InReg, NeedsPadding)) {
       unsigned SizeInRegs = (getContext().getTypeSize(Ty) + 31) / 32;
       SmallVector<llvm::Type*, 3> Elements(SizeInRegs, Int32);
@@ -1373,9 +1510,8 @@ ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty,
     // optimizations.
     // Don't do this for the MCU if there are still free integer registers
     // (see X86_64 ABI for full explanation).
-    if (getContext().getTypeSize(Ty) <= 4*32 &&
-        canExpandIndirectArgument(Ty, getContext()) &&
-        (!IsMCUABI || State.FreeRegs == 0))
+    if (getContext().getTypeSize(Ty) <= 4 * 32 &&
+        (!IsMCUABI || State.FreeRegs == 0) && canExpandIndirectArgument(Ty))
       return ABIArgInfo::getExpandWithPadding(
           State.CC == llvm::CallingConv::X86_FastCall ||
               State.CC == llvm::CallingConv::X86_VectorCall,
@@ -1474,7 +1610,7 @@ X86_32ABIInfo::addFieldToArgStruct(SmallVector<llvm::Type *, 6> &FrameFields,
 
   // Insert padding bytes to respect alignment.
   CharUnits FieldEnd = StackOffset;
-  StackOffset = FieldEnd.RoundUpToAlignment(FieldAlign);
+  StackOffset = FieldEnd.alignTo(FieldAlign);
   if (StackOffset != FieldEnd) {
     CharUnits NumBytes = StackOffset - FieldEnd;
     llvm::Type *Ty = llvm::Type::getInt8Ty(getVMContext());
@@ -1495,10 +1631,14 @@ static bool isArgInAlloca(const ABIArgInfo &Info) {
     return false;
   case ABIArgInfo::Direct:
   case ABIArgInfo::Extend:
-  case ABIArgInfo::Expand:
     if (Info.getInReg())
       return false;
     return true;
+  case ABIArgInfo::Expand:
+  case ABIArgInfo::CoerceAndExpand:
+    // These are aggregate types which are never passed in registers when
+    // inalloca is involved.
+    return true;
   }
   llvm_unreachable("invalid enum");
 }
@@ -1609,6 +1749,10 @@ void X86_32TargetCodeGenInfo::setTargetAttributes(const Decl *D,
                                               llvm::AttributeSet::FunctionIndex,
                                               B));
     }
+    if (FD->hasAttr<AnyX86InterruptAttr>()) {
+      llvm::Function *Fn = cast<llvm::Function>(GV);
+      Fn->setCallingConv(llvm::CallingConv::X86_INTR);
+    }
   }
 }
 
@@ -1675,7 +1819,7 @@ static unsigned getNativeVectorSizeForAVXABI(X86AVXABILevel AVXLevel) {
 }
 
 /// X86_64ABIInfo - The X86_64 ABI information.
-class X86_64ABIInfo : public ABIInfo {
+class X86_64ABIInfo : public SwiftABIInfo {
   enum Class {
     Integer = 0,
     SSE,
@@ -1779,6 +1923,17 @@ class X86_64ABIInfo : public ABIInfo {
     return !getTarget().getTriple().isOSDarwin();
   }
 
+  /// GCC classifies <1 x long long> as SSE but compatibility with older clang
+  // compilers require us to classify it as INTEGER.
+  bool classifyIntegerMMXAsSSE() const {
+    const llvm::Triple &Triple = getTarget().getTriple();
+    if (Triple.isOSDarwin() || Triple.getOS() == llvm::Triple::PS4)
+      return false;
+    if (Triple.isOSFreeBSD() && Triple.getOSMajorVersion() >= 10)
+      return false;
+    return true;
+  }
+
   X86AVXABILevel AVXLevel;
   // Some ABIs (e.g. X32 ABI and Native Client OS) use 32 bit pointers on
   // 64-bit hardware.
@@ -1786,7 +1941,7 @@ class X86_64ABIInfo : public ABIInfo {
 
 public:
   X86_64ABIInfo(CodeGen::CodeGenTypes &CGT, X86AVXABILevel AVXLevel) :
-      ABIInfo(CGT), AVXLevel(AVXLevel),
+      SwiftABIInfo(CGT), AVXLevel(AVXLevel),
       Has64BitPointers(CGT.getDataLayout().getPointerSize(0) == 8) {
   }
 
@@ -1813,6 +1968,12 @@ public:
   bool has64BitPointers() const {
     return Has64BitPointers;
   }
+
+  bool shouldPassIndirectlyForSwift(CharUnits totalSize,
+                                    ArrayRef<llvm::Type*> scalars,
+                                    bool asReturnValue) const override {
+    return occupiesMoreThan(CGT, scalars, /*total*/ 4);
+  }  
 };
 
 /// WinX86_64ABIInfo - The Windows X86_64 ABI information.
@@ -1914,6 +2075,16 @@ public:
             ('T' << 24);
     return llvm::ConstantInt::get(CGM.Int32Ty, Sig);
   }
+
+  void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
+                           CodeGen::CodeGenModule &CGM) const override {
+    if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D)) {
+      if (FD->hasAttr<AnyX86InterruptAttr>()) {
+        llvm::Function *Fn = cast<llvm::Function>(GV);
+        Fn->setCallingConv(llvm::CallingConv::X86_INTR);
+      }
+    }
+  }
 };
 
 class PS4TargetCodeGenInfo : public X86_64TargetCodeGenInfo {
@@ -2031,6 +2202,13 @@ void WinX86_64TargetCodeGenInfo::setTargetAttributes(const Decl *D,
                                             CodeGen::CodeGenModule &CGM) const {
   TargetCodeGenInfo::setTargetAttributes(D, GV, CGM);
 
+  if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D)) {
+    if (FD->hasAttr<AnyX86InterruptAttr>()) {
+      llvm::Function *Fn = cast<llvm::Function>(GV);
+      Fn->setCallingConv(llvm::CallingConv::X86_INTR);
+    }
+  }
+
   addStackProbeSizeTargetAttribute(D, GV, CGM);
 }
 }
@@ -2203,15 +2381,20 @@ void X86_64ABIInfo::classify(QualType Ty, uint64_t OffsetBase,
       if (EB_Lo != EB_Hi)
         Hi = Lo;
     } else if (Size == 64) {
+      QualType ElementType = VT->getElementType();
+
       // gcc passes <1 x double> in memory. :(
-      if (VT->getElementType()->isSpecificBuiltinType(BuiltinType::Double))
+      if (ElementType->isSpecificBuiltinType(BuiltinType::Double))
         return;
 
-      // gcc passes <1 x long long> as INTEGER.
-      if (VT->getElementType()->isSpecificBuiltinType(BuiltinType::LongLong) ||
-          VT->getElementType()->isSpecificBuiltinType(BuiltinType::ULongLong) ||
-          VT->getElementType()->isSpecificBuiltinType(BuiltinType::Long) ||
-          VT->getElementType()->isSpecificBuiltinType(BuiltinType::ULong))
+      // gcc passes <1 x long long> as SSE but clang used to unconditionally
+      // pass them as integer.  For platforms where clang is the de facto
+      // platform compiler, we must continue to use integer.
+      if (!classifyIntegerMMXAsSSE() &&
+          (ElementType->isSpecificBuiltinType(BuiltinType::LongLong) ||
+           ElementType->isSpecificBuiltinType(BuiltinType::ULongLong) ||
+           ElementType->isSpecificBuiltinType(BuiltinType::Long) ||
+           ElementType->isSpecificBuiltinType(BuiltinType::ULong)))
         Current = Integer;
       else
         Current = SSE;
@@ -2775,7 +2958,7 @@ GetX86_64ByValArgumentPair(llvm::Type *Lo, llvm::Type *Hi,
   // the second element at offset 8.  Check for this:
   unsigned LoSize = (unsigned)TD.getTypeAllocSize(Lo);
   unsigned HiAlign = TD.getABITypeAlignment(Hi);
-  unsigned HiStart = llvm::RoundUpToAlignment(LoSize, HiAlign);
+  unsigned HiStart = llvm::alignTo(LoSize, HiAlign);
   assert(HiStart != 0 && HiStart <= 8 && "Invalid x86-64 argument pair!");
 
   // To handle this, we have to increase the size of the low part so that the
@@ -3473,13 +3656,15 @@ public:
 
 }
 
+// TODO: this implementation is now likely redundant with
+// DefaultABIInfo::EmitVAArg.
 Address PPC32_SVR4_ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAList,
                                       QualType Ty) const {
   const unsigned OverflowLimit = 8;
   if (const ComplexType *CTy = Ty->getAs<ComplexType>()) {
     // TODO: Implement this. For now ignore.
     (void)CTy;
-    return Address::invalid();
+    return Address::invalid(); // FIXME?
   }
 
   // struct __va_list_tag {
@@ -3578,7 +3763,7 @@ Address PPC32_SVR4_ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAList,
     CharUnits Size;
     if (!isIndirect) {
       auto TypeInfo = CGF.getContext().getTypeInfoInChars(Ty);
-      Size = TypeInfo.first.RoundUpToAlignment(OverflowAreaAlign);
+      Size = TypeInfo.first.alignTo(OverflowAreaAlign);
     } else {
       Size = CGF.getPointerSize();
     }
@@ -3663,7 +3848,7 @@ PPC32TargetCodeGenInfo::initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF,
 
 namespace {
 /// PPC64_SVR4_ABIInfo - The 64-bit PowerPC ELF (SVR4) ABI information.
-class PPC64_SVR4_ABIInfo : public DefaultABIInfo {
+class PPC64_SVR4_ABIInfo : public ABIInfo {
 public:
   enum ABIKind {
     ELFv1 = 0,
@@ -3674,6 +3859,7 @@ private:
   static const unsigned GPRBits = 64;
   ABIKind Kind;
   bool HasQPX;
+  bool IsSoftFloatABI;
 
   // A vector of float or double will be promoted to <4 x f32> or <4 x f64> and
   // will be passed in a QPX register.
@@ -3704,8 +3890,10 @@ private:
   }
 
 public:
-  PPC64_SVR4_ABIInfo(CodeGen::CodeGenTypes &CGT, ABIKind Kind, bool HasQPX)
-    : DefaultABIInfo(CGT), Kind(Kind), HasQPX(HasQPX) {}
+  PPC64_SVR4_ABIInfo(CodeGen::CodeGenTypes &CGT, ABIKind Kind, bool HasQPX,
+                     bool SoftFloatABI)
+      : ABIInfo(CGT), Kind(Kind), HasQPX(HasQPX),
+        IsSoftFloatABI(SoftFloatABI) {}
 
   bool isPromotableTypeForABI(QualType Ty) const;
   CharUnits getParamTypeAlignment(QualType Ty) const;
@@ -3753,8 +3941,10 @@ class PPC64_SVR4_TargetCodeGenInfo : public TargetCodeGenInfo {
 
 public:
   PPC64_SVR4_TargetCodeGenInfo(CodeGenTypes &CGT,
-                               PPC64_SVR4_ABIInfo::ABIKind Kind, bool HasQPX)
-      : TargetCodeGenInfo(new PPC64_SVR4_ABIInfo(CGT, Kind, HasQPX)) {}
+                               PPC64_SVR4_ABIInfo::ABIKind Kind, bool HasQPX,
+                               bool SoftFloatABI)
+      : TargetCodeGenInfo(new PPC64_SVR4_ABIInfo(CGT, Kind, HasQPX,
+                                                 SoftFloatABI)) {}
 
   int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const override {
     // This is recovered from gcc output.
@@ -3945,8 +4135,19 @@ bool ABIInfo::isHomogeneousAggregate(QualType Ty, const Type *&Base,
     // agree in both total size and mode (float vs. vector) are
     // treated as being equivalent here.
     const Type *TyPtr = Ty.getTypePtr();
-    if (!Base)
+    if (!Base) {
       Base = TyPtr;
+      // If it's a non-power-of-2 vector, its size is already a power-of-2,
+      // so make sure to widen it explicitly.
+      if (const VectorType *VT = Base->getAs<VectorType>()) {
+        QualType EltTy = VT->getElementType();
+        unsigned NumElements =
+            getContext().getTypeSize(VT) / getContext().getTypeSize(EltTy);
+        Base = getContext()
+                   .getVectorType(EltTy, NumElements, VT->getVectorKind())
+                   .getTypePtr();
+      }
+    }
 
     if (Base->isVectorType() != TyPtr->isVectorType() ||
         getContext().getTypeSize(Base) != getContext().getTypeSize(TyPtr))
@@ -3961,8 +4162,11 @@ bool PPC64_SVR4_ABIInfo::isHomogeneousAggregateBaseType(QualType Ty) const {
   if (const BuiltinType *BT = Ty->getAs<BuiltinType>()) {
     if (BT->getKind() == BuiltinType::Float ||
         BT->getKind() == BuiltinType::Double ||
-        BT->getKind() == BuiltinType::LongDouble)
+        BT->getKind() == BuiltinType::LongDouble) {
+      if (IsSoftFloatABI)
+        return false;
       return true;
+    }
   }
   if (const VectorType *VT = Ty->getAs<VectorType>()) {
     if (getContext().getTypeSize(VT) == 128 || IsQPXVectorTy(Ty))
@@ -4029,13 +4233,13 @@ PPC64_SVR4_ABIInfo::classifyArgumentType(QualType Ty) const {
       // Types up to 8 bytes are passed as integer type (which will be
       // properly aligned in the argument save area doubleword).
       if (Bits <= GPRBits)
-        CoerceTy = llvm::IntegerType::get(getVMContext(),
-                                          llvm::RoundUpToAlignment(Bits, 8));
+        CoerceTy =
+            llvm::IntegerType::get(getVMContext(), llvm::alignTo(Bits, 8));
       // Larger types are passed as arrays, with the base type selected
       // according to the required alignment in the save area.
       else {
         uint64_t RegBits = ABIAlign * 8;
-        uint64_t NumRegs = llvm::RoundUpToAlignment(Bits, RegBits) / RegBits;
+        uint64_t NumRegs = llvm::alignTo(Bits, RegBits) / RegBits;
         llvm::Type *RegTy = llvm::IntegerType::get(getVMContext(), RegBits);
         CoerceTy = llvm::ArrayType::get(RegTy, NumRegs);
       }
@@ -4095,8 +4299,8 @@ PPC64_SVR4_ABIInfo::classifyReturnType(QualType RetTy) const {
         CoerceTy = llvm::IntegerType::get(getVMContext(), GPRBits);
         CoerceTy = llvm::StructType::get(CoerceTy, CoerceTy, nullptr);
       } else
-        CoerceTy = llvm::IntegerType::get(getVMContext(),
-                                          llvm::RoundUpToAlignment(Bits, 8));
+        CoerceTy =
+            llvm::IntegerType::get(getVMContext(), llvm::alignTo(Bits, 8));
       return ABIArgInfo::getDirect(CoerceTy);
     }
 
@@ -4220,7 +4424,7 @@ PPC64TargetCodeGenInfo::initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF,
 
 namespace {
 
-class AArch64ABIInfo : public ABIInfo {
+class AArch64ABIInfo : public SwiftABIInfo {
 public:
   enum ABIKind {
     AAPCS = 0,
@@ -4231,7 +4435,8 @@ private:
   ABIKind Kind;
 
 public:
-  AArch64ABIInfo(CodeGenTypes &CGT, ABIKind Kind) : ABIInfo(CGT), Kind(Kind) {}
+  AArch64ABIInfo(CodeGenTypes &CGT, ABIKind Kind)
+    : SwiftABIInfo(CGT), Kind(Kind) {}
 
 private:
   ABIKind getABIKind() const { return Kind; }
@@ -4264,6 +4469,12 @@ private:
     return isDarwinPCS() ? EmitDarwinVAArg(VAListAddr, Ty, CGF)
                          : EmitAAPCSVAArg(VAListAddr, Ty, CGF);
   }
+
+  bool shouldPassIndirectlyForSwift(CharUnits totalSize,
+                                    ArrayRef<llvm::Type*> scalars,
+                                    bool asReturnValue) const override {
+    return occupiesMoreThan(CGT, scalars, /*total*/ 4);
+  }
 };
 
 class AArch64TargetCodeGenInfo : public TargetCodeGenInfo {
@@ -4289,6 +4500,11 @@ ABIArgInfo AArch64ABIInfo::classifyArgumentType(QualType Ty) const {
   // Handle illegal vector types here.
   if (isIllegalVectorType(Ty)) {
     uint64_t Size = getContext().getTypeSize(Ty);
+    // Android promotes <2 x i8> to i16, not i32
+    if (isAndroid() && (Size <= 16)) {
+      llvm::Type *ResType = llvm::Type::getInt16Ty(getVMContext());
+      return ABIArgInfo::getDirect(ResType);
+    }
     if (Size <= 32) {
       llvm::Type *ResType = llvm::Type::getInt32Ty(getVMContext());
       return ABIArgInfo::getDirect(ResType);
@@ -4409,8 +4625,8 @@ bool AArch64ABIInfo::isIllegalVectorType(QualType Ty) const {
     // Check whether VT is legal.
     unsigned NumElements = VT->getNumElements();
     uint64_t Size = getContext().getTypeSize(VT);
-    // NumElements should be power of 2 between 1 and 16.
-    if ((NumElements & (NumElements - 1)) != 0 || NumElements > 16)
+    // NumElements should be power of 2.
+    if (!llvm::isPowerOf2_32(NumElements))
       return true;
     return Size != 64 && (Size != 128 || NumElements == 1);
   }
@@ -4489,7 +4705,7 @@ Address AArch64ABIInfo::EmitAAPCSVAArg(Address VAListAddr,
     reg_offs = CGF.Builder.CreateLoad(reg_offs_p, "gr_offs");
     reg_top_index = 1; // field number for __gr_top
     reg_top_offset = CharUnits::fromQuantity(8);
-    RegSize = llvm::RoundUpToAlignment(RegSize, 8);
+    RegSize = llvm::alignTo(RegSize, 8);
   } else {
     // 4 is the field number of __vr_offs.
     reg_offs_p =
@@ -4659,7 +4875,7 @@ Address AArch64ABIInfo::EmitAAPCSVAArg(Address VAListAddr,
   if (IsIndirect)
     StackSize = StackSlotSize;
   else
-    StackSize = TyInfo.first.RoundUpToAlignment(StackSlotSize);
+    StackSize = TyInfo.first.alignTo(StackSlotSize);
 
   llvm::Value *StackSizeC = CGF.Builder.getSize(StackSize);
   llvm::Value *NewStack =
@@ -4699,7 +4915,7 @@ Address AArch64ABIInfo::EmitDarwinVAArg(Address VAListAddr, QualType Ty,
   // illegal vector types.  Lower VAArg here for these cases and use
   // the LLVM va_arg instruction for everything else.
   if (!isAggregateTypeForABI(Ty) && !isIllegalVectorType(Ty))
-    return Address::invalid();
+    return EmitVAArgInstr(CGF, VAListAddr, Ty, ABIArgInfo::getDirect());
 
   CharUnits SlotSize = CharUnits::fromQuantity(8);
 
@@ -4733,7 +4949,7 @@ Address AArch64ABIInfo::EmitDarwinVAArg(Address VAListAddr, QualType Ty,
 
 namespace {
 
-class ARMABIInfo : public ABIInfo {
+class ARMABIInfo : public SwiftABIInfo {
 public:
   enum ABIKind {
     APCS = 0,
@@ -4746,7 +4962,8 @@ private:
   ABIKind Kind;
 
 public:
-  ARMABIInfo(CodeGenTypes &CGT, ABIKind _Kind) : ABIInfo(CGT), Kind(_Kind) {
+  ARMABIInfo(CodeGenTypes &CGT, ABIKind _Kind)
+      : SwiftABIInfo(CGT), Kind(_Kind) {
     setCCs();
   }
 
@@ -4757,6 +4974,8 @@ public:
     case llvm::Triple::EABIHF:
     case llvm::Triple::GNUEABI:
     case llvm::Triple::GNUEABIHF:
+    case llvm::Triple::MuslEABI:
+    case llvm::Triple::MuslEABIHF:
       return true;
     default:
       return false;
@@ -4767,17 +4986,13 @@ public:
     switch (getTarget().getTriple().getEnvironment()) {
     case llvm::Triple::EABIHF:
     case llvm::Triple::GNUEABIHF:
+    case llvm::Triple::MuslEABIHF:
       return true;
     default:
       return false;
     }
   }
 
-  bool isAndroid() const {
-    return (getTarget().getTriple().getEnvironment() ==
-            llvm::Triple::Android);
-  }
-
   ABIKind getABIKind() const { return Kind; }
 
 private:
@@ -4797,6 +5012,12 @@ private:
   llvm::CallingConv::ID getLLVMDefaultCC() const;
   llvm::CallingConv::ID getABIDefaultCC() const;
   void setCCs();
+
+  bool shouldPassIndirectlyForSwift(CharUnits totalSize,
+                                    ArrayRef<llvm::Type*> scalars,
+                                    bool asReturnValue) const override {
+    return occupiesMoreThan(CGT, scalars, /*total*/ 4);
+  }
 };
 
 class ARMTargetCodeGenInfo : public TargetCodeGenInfo {
@@ -4877,6 +5098,16 @@ public:
 
   void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
                            CodeGen::CodeGenModule &CGM) const override;
+
+  void getDependentLibraryOption(llvm::StringRef Lib,
+                                 llvm::SmallString<24> &Opt) const override {
+    Opt = "/DEFAULTLIB:" + qualifyWindowsLibrary(Lib);
+  }
+
+  void getDetectMismatchOption(llvm::StringRef Name, llvm::StringRef Value,
+                               llvm::SmallString<32> &Opt) const override {
+    Opt = "/FAILIFMISMATCH:\"" + Name.str() + "=" + Value.str() + "\"";
+  }
 };
 
 void WindowsARMTargetCodeGenInfo::setTargetAttributes(
@@ -4906,7 +5137,7 @@ void ARMABIInfo::computeInfo(CGFunctionInfo &FI) const {
 /// Return the default calling convention that LLVM will use.
 llvm::CallingConv::ID ARMABIInfo::getLLVMDefaultCC() const {
   // The default calling convention that LLVM will infer.
-  if (isEABIHF() || getTarget().getTriple().isWatchOS())
+  if (isEABIHF() || getTarget().getTriple().isWatchABI())
     return llvm::CallingConv::ARM_AAPCS_VFP;
   else if (isEABI())
     return llvm::CallingConv::ARM_AAPCS;
@@ -4988,7 +5219,7 @@ ABIArgInfo ARMABIInfo::classifyArgumentType(QualType Ty,
   // __fp16 gets passed as if it were an int or float, but with the top 16 bits
   // unspecified. This is not done for OpenCL as it handles the half type
   // natively, and does not need to interwork with AAPCS code.
-  if (Ty->isHalfType() && !getContext().getLangOpts().OpenCL) {
+  if (Ty->isHalfType() && !getContext().getLangOpts().NativeHalfArgsAndReturns) {
     llvm::Type *ResType = IsEffectivelyAAPCS_VFP ?
       llvm::Type::getFloatTy(getVMContext()) :
       llvm::Type::getInt32Ty(getVMContext());
@@ -5180,7 +5411,7 @@ ABIArgInfo ARMABIInfo::classifyReturnType(QualType RetTy,
   // __fp16 gets returned as if it were an int or float, but with the top 16
   // bits unspecified. This is not done for OpenCL as it handles the half type
   // natively, and does not need to interwork with AAPCS code.
-  if (RetTy->isHalfType() && !getContext().getLangOpts().OpenCL) {
+  if (RetTy->isHalfType() && !getContext().getLangOpts().NativeHalfArgsAndReturns) {
     llvm::Type *ResType = IsEffectivelyAAPCS_VFP ?
       llvm::Type::getFloatTy(getVMContext()) :
       llvm::Type::getInt32Ty(getVMContext());
@@ -5257,7 +5488,7 @@ ABIArgInfo ARMABIInfo::classifyReturnType(QualType RetTy,
   } else if (Size <= 128 && getABIKind() == AAPCS16_VFP) {
     llvm::Type *Int32Ty = llvm::Type::getInt32Ty(getVMContext());
     llvm::Type *CoerceTy =
-        llvm::ArrayType::get(Int32Ty, llvm::RoundUpToAlignment(Size, 32) / 32);
+        llvm::ArrayType::get(Int32Ty, llvm::alignTo(Size, 32) / 32);
     return ABIArgInfo::getDirect(CoerceTy);
   }
 
@@ -5513,12 +5744,12 @@ void NVPTXTargetCodeGenInfo::addNVVMMetadata(llvm::Function *F, StringRef Name,
 
 namespace {
 
-class SystemZABIInfo : public ABIInfo {
+class SystemZABIInfo : public SwiftABIInfo {
   bool HasVector;
 
 public:
   SystemZABIInfo(CodeGenTypes &CGT, bool HV)
-    : ABIInfo(CGT), HasVector(HV) {}
+    : SwiftABIInfo(CGT), HasVector(HV) {}
 
   bool isPromotableIntegerType(QualType Ty) const;
   bool isCompoundType(QualType Ty) const;
@@ -5538,6 +5769,12 @@ public:
 
   Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
                     QualType Ty) const override;
+
+  bool shouldPassIndirectlyForSwift(CharUnits totalSize,
+                                    ArrayRef<llvm::Type*> scalars,
+                                    bool asReturnValue) const override {
+    return occupiesMoreThan(CGT, scalars, /*total*/ 4);
+  }
 };
 
 class SystemZTargetCodeGenInfo : public TargetCodeGenInfo {
@@ -6067,8 +6304,8 @@ MipsABIInfo::classifyArgumentType(QualType Ty, uint64_t &Offset) const {
 
   Align = std::min(std::max(Align, (uint64_t)MinABIStackAlignInBytes),
                    (uint64_t)StackAlignInBytes);
-  unsigned CurrOffset = llvm::RoundUpToAlignment(Offset, Align);
-  Offset = CurrOffset + llvm::RoundUpToAlignment(TySize, Align * 8) / 8;
+  unsigned CurrOffset = llvm::alignTo(Offset, Align);
+  Offset = CurrOffset + llvm::alignTo(TySize, Align * 8) / 8;
 
   if (isAggregateTypeForABI(Ty) || Ty->isVectorType()) {
     // Ignore empty aggregates.
@@ -6465,6 +6702,132 @@ Address HexagonABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
 }
 
 //===----------------------------------------------------------------------===//
+// Lanai ABI Implementation
+//===----------------------------------------------------------------------===//
+
+namespace {
+class LanaiABIInfo : public DefaultABIInfo {
+public:
+  LanaiABIInfo(CodeGen::CodeGenTypes &CGT) : DefaultABIInfo(CGT) {}
+
+  bool shouldUseInReg(QualType Ty, CCState &State) const;
+
+  void computeInfo(CGFunctionInfo &FI) const override {
+    CCState State(FI.getCallingConvention());
+    // Lanai uses 4 registers to pass arguments unless the function has the
+    // regparm attribute set.
+    if (FI.getHasRegParm()) {
+      State.FreeRegs = FI.getRegParm();
+    } else {
+      State.FreeRegs = 4;
+    }
+
+    if (!getCXXABI().classifyReturnType(FI))
+      FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
+    for (auto &I : FI.arguments())
+      I.info = classifyArgumentType(I.type, State);
+  }
+
+  ABIArgInfo getIndirectResult(QualType Ty, bool ByVal, CCState &State) const;
+  ABIArgInfo classifyArgumentType(QualType RetTy, CCState &State) const;
+};
+} // end anonymous namespace
+
+bool LanaiABIInfo::shouldUseInReg(QualType Ty, CCState &State) const {
+  unsigned Size = getContext().getTypeSize(Ty);
+  unsigned SizeInRegs = llvm::alignTo(Size, 32U) / 32U;
+
+  if (SizeInRegs == 0)
+    return false;
+
+  if (SizeInRegs > State.FreeRegs) {
+    State.FreeRegs = 0;
+    return false;
+  }
+
+  State.FreeRegs -= SizeInRegs;
+
+  return true;
+}
+
+ABIArgInfo LanaiABIInfo::getIndirectResult(QualType Ty, bool ByVal,
+                                           CCState &State) const {
+  if (!ByVal) {
+    if (State.FreeRegs) {
+      --State.FreeRegs; // Non-byval indirects just use one pointer.
+      return getNaturalAlignIndirectInReg(Ty);
+    }
+    return getNaturalAlignIndirect(Ty, false);
+  }
+
+  // Compute the byval alignment.
+  const unsigned MinABIStackAlignInBytes = 4;
+  unsigned TypeAlign = getContext().getTypeAlign(Ty) / 8;
+  return ABIArgInfo::getIndirect(CharUnits::fromQuantity(4), /*ByVal=*/true,
+                                 /*Realign=*/TypeAlign >
+                                     MinABIStackAlignInBytes);
+}
+
+ABIArgInfo LanaiABIInfo::classifyArgumentType(QualType Ty,
+                                              CCState &State) const {
+  // Check with the C++ ABI first.
+  const RecordType *RT = Ty->getAs<RecordType>();
+  if (RT) {
+    CGCXXABI::RecordArgABI RAA = getRecordArgABI(RT, getCXXABI());
+    if (RAA == CGCXXABI::RAA_Indirect) {
+      return getIndirectResult(Ty, /*ByVal=*/false, State);
+    } else if (RAA == CGCXXABI::RAA_DirectInMemory) {
+      return getNaturalAlignIndirect(Ty, /*ByRef=*/true);
+    }
+  }
+
+  if (isAggregateTypeForABI(Ty)) {
+    // Structures with flexible arrays are always indirect.
+    if (RT && RT->getDecl()->hasFlexibleArrayMember())
+      return getIndirectResult(Ty, /*ByVal=*/true, State);
+
+    // Ignore empty structs/unions.
+    if (isEmptyRecord(getContext(), Ty, true))
+      return ABIArgInfo::getIgnore();
+
+    llvm::LLVMContext &LLVMContext = getVMContext();
+    unsigned SizeInRegs = (getContext().getTypeSize(Ty) + 31) / 32;
+    if (SizeInRegs <= State.FreeRegs) {
+      llvm::IntegerType *Int32 = llvm::Type::getInt32Ty(LLVMContext);
+      SmallVector<llvm::Type *, 3> Elements(SizeInRegs, Int32);
+      llvm::Type *Result = llvm::StructType::get(LLVMContext, Elements);
+      State.FreeRegs -= SizeInRegs;
+      return ABIArgInfo::getDirectInReg(Result);
+    } else {
+      State.FreeRegs = 0;
+    }
+    return getIndirectResult(Ty, true, State);
+  }
+
+  // Treat an enum type as its underlying type.
+  if (const auto *EnumTy = Ty->getAs<EnumType>())
+    Ty = EnumTy->getDecl()->getIntegerType();
+
+  bool InReg = shouldUseInReg(Ty, State);
+  if (Ty->isPromotableIntegerType()) {
+    if (InReg)
+      return ABIArgInfo::getDirectInReg();
+    return ABIArgInfo::getExtend();
+  }
+  if (InReg)
+    return ABIArgInfo::getDirectInReg();
+  return ABIArgInfo::getDirect();
+}
+
+namespace {
+class LanaiTargetCodeGenInfo : public TargetCodeGenInfo {
+public:
+  LanaiTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT)
+      : TargetCodeGenInfo(new LanaiABIInfo(CGT)) {}
+};
+}
+
+//===----------------------------------------------------------------------===//
 // AMDGPU ABI Implementation
 //===----------------------------------------------------------------------===//
 
@@ -6476,6 +6839,7 @@ public:
     : TargetCodeGenInfo(new DefaultABIInfo(CGT)) {}
   void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
                            CodeGen::CodeGenModule &M) const override;
+  unsigned getOpenCLKernelCallingConv() const override;
 };
 
 }
@@ -6504,6 +6868,53 @@ void AMDGPUTargetCodeGenInfo::setTargetAttributes(
 }
 
 
+unsigned AMDGPUTargetCodeGenInfo::getOpenCLKernelCallingConv() const {
+  return llvm::CallingConv::AMDGPU_KERNEL;
+}
+
+//===----------------------------------------------------------------------===//
+// SPARC v8 ABI Implementation.
+// Based on the SPARC Compliance Definition version 2.4.1.
+//
+// Ensures that complex values are passed in registers.
+//
+namespace {
+class SparcV8ABIInfo : public DefaultABIInfo {
+public:
+  SparcV8ABIInfo(CodeGenTypes &CGT) : DefaultABIInfo(CGT) {}
+
+private:
+  ABIArgInfo classifyReturnType(QualType RetTy) const;
+  void computeInfo(CGFunctionInfo &FI) const override;
+};
+} // end anonymous namespace
+
+
+ABIArgInfo
+SparcV8ABIInfo::classifyReturnType(QualType Ty) const {
+  if (Ty->isAnyComplexType()) {
+    return ABIArgInfo::getDirect();
+  }
+  else {
+    return DefaultABIInfo::classifyReturnType(Ty);
+  }
+}
+
+void SparcV8ABIInfo::computeInfo(CGFunctionInfo &FI) const {
+
+  FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
+  for (auto &Arg : FI.arguments())
+    Arg.info = classifyArgumentType(Arg.type);
+}
+
+namespace {
+class SparcV8TargetCodeGenInfo : public TargetCodeGenInfo {
+public:
+  SparcV8TargetCodeGenInfo(CodeGenTypes &CGT)
+    : TargetCodeGenInfo(new SparcV8ABIInfo(CGT)) {}
+};
+} // end anonymous namespace
+
 //===----------------------------------------------------------------------===//
 // SPARC v9 ABI Implementation.
 // Based on the SPARC Compliance Definition version 2.4.1.
@@ -6569,7 +6980,7 @@ private:
         return;
 
       // Finish the current 64-bit word.
-      uint64_t Aligned = llvm::RoundUpToAlignment(Size, 64);
+      uint64_t Aligned = llvm::alignTo(Size, 64);
       if (Aligned > Size && Aligned <= ToSize) {
         Elems.push_back(llvm::IntegerType::get(Context, Aligned - Size));
         Size = Aligned;
@@ -6686,7 +7097,7 @@ SparcV9ABIInfo::classifyType(QualType Ty, unsigned SizeLimit) const {
 
   CoerceBuilder CB(getVMContext(), getDataLayout());
   CB.addStruct(0, StrTy);
-  CB.pad(llvm::RoundUpToAlignment(CB.DL.getTypeSizeInBits(StrTy), 64));
+  CB.pad(llvm::alignTo(CB.DL.getTypeSizeInBits(StrTy), 64));
 
   // Try to use the original type for coercion.
   llvm::Type *CoerceTy = CB.isUsableType(StrTy) ? StrTy : CB.getType();
@@ -6716,6 +7127,7 @@ Address SparcV9ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
   CharUnits Stride;
   switch (AI.getKind()) {
   case ABIArgInfo::Expand:
+  case ABIArgInfo::CoerceAndExpand:
   case ABIArgInfo::InAlloca:
     llvm_unreachable("Unsupported ABI kind for va_arg");
 
@@ -6728,7 +7140,7 @@ Address SparcV9ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
 
   case ABIArgInfo::Direct: {
     auto AllocSize = getDataLayout().getTypeAllocSize(AI.getCoerceToType());
-    Stride = CharUnits::fromQuantity(AllocSize).RoundUpToAlignment(SlotSize);
+    Stride = CharUnits::fromQuantity(AllocSize).alignTo(SlotSize);
     ArgAddr = Addr;
     break;
   }
@@ -6924,6 +7336,8 @@ public:
 
 } // End anonymous namespace.
 
+// TODO: this implementation is likely now redundant with the default
+// EmitVAArg.
 Address XCoreABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
                                 QualType Ty) const {
   CGBuilderTy &Builder = CGF.Builder;
@@ -6944,6 +7358,7 @@ Address XCoreABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
   CharUnits ArgSize = CharUnits::Zero();
   switch (AI.getKind()) {
   case ABIArgInfo::Expand:
+  case ABIArgInfo::CoerceAndExpand:
   case ABIArgInfo::InAlloca:
     llvm_unreachable("Unsupported ABI kind for va_arg");
   case ABIArgInfo::Ignore:
@@ -6955,7 +7370,7 @@ Address XCoreABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
     Val = Builder.CreateBitCast(AP, ArgPtrTy);
     ArgSize = CharUnits::fromQuantity(
                        getDataLayout().getTypeAllocSize(AI.getCoerceToType()));
-    ArgSize = ArgSize.RoundUpToAlignment(SlotSize);
+    ArgSize = ArgSize.alignTo(SlotSize);
     break;
   case ABIArgInfo::Indirect:
     Val = Builder.CreateElementBitCast(AP, ArgPtrTy);
@@ -7086,15 +7501,59 @@ void XCoreTargetCodeGenInfo::emitTargetMD(const Decl *D, llvm::GlobalValue *GV,
   SmallStringEnc Enc;
   if (getTypeString(Enc, D, CGM, TSC)) {
     llvm::LLVMContext &Ctx = CGM.getModule().getContext();
-    llvm::SmallVector<llvm::Metadata *, 2> MDVals;
-    MDVals.push_back(llvm::ConstantAsMetadata::get(GV));
-    MDVals.push_back(llvm::MDString::get(Ctx, Enc.str()));
+    llvm::Metadata *MDVals[] = {llvm::ConstantAsMetadata::get(GV),
+                                llvm::MDString::get(Ctx, Enc.str())};
     llvm::NamedMDNode *MD =
       CGM.getModule().getOrInsertNamedMetadata("xcore.typestrings");
     MD->addOperand(llvm::MDNode::get(Ctx, MDVals));
   }
 }
 
+//===----------------------------------------------------------------------===//
+// SPIR ABI Implementation
+//===----------------------------------------------------------------------===//
+
+namespace {
+class SPIRTargetCodeGenInfo : public TargetCodeGenInfo {
+public:
+  SPIRTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT)
+    : TargetCodeGenInfo(new DefaultABIInfo(CGT)) {}
+  void emitTargetMD(const Decl *D, llvm::GlobalValue *GV,
+                    CodeGen::CodeGenModule &M) const override;
+  unsigned getOpenCLKernelCallingConv() const override;
+};
+} // End anonymous namespace.
+
+/// Emit SPIR specific metadata: OpenCL and SPIR version.
+void SPIRTargetCodeGenInfo::emitTargetMD(const Decl *D, llvm::GlobalValue *GV,
+                                         CodeGen::CodeGenModule &CGM) const {
+  llvm::LLVMContext &Ctx = CGM.getModule().getContext();
+  llvm::Type *Int32Ty = llvm::Type::getInt32Ty(Ctx);
+  llvm::Module &M = CGM.getModule();
+  // SPIR v2.0 s2.12 - The SPIR version used by the module is stored in the
+  // opencl.spir.version named metadata.
+  llvm::Metadata *SPIRVerElts[] = {
+      llvm::ConstantAsMetadata::get(llvm::ConstantInt::get(Int32Ty, 2)),
+      llvm::ConstantAsMetadata::get(llvm::ConstantInt::get(Int32Ty, 0))};
+  llvm::NamedMDNode *SPIRVerMD =
+      M.getOrInsertNamedMetadata("opencl.spir.version");
+  SPIRVerMD->addOperand(llvm::MDNode::get(Ctx, SPIRVerElts));
+  // SPIR v2.0 s2.13 - The OpenCL version used by the module is stored in the
+  // opencl.ocl.version named metadata node.
+  llvm::Metadata *OCLVerElts[] = {
+      llvm::ConstantAsMetadata::get(llvm::ConstantInt::get(
+          Int32Ty, CGM.getLangOpts().OpenCLVersion / 100)),
+      llvm::ConstantAsMetadata::get(llvm::ConstantInt::get(
+          Int32Ty, (CGM.getLangOpts().OpenCLVersion % 100) / 10))};
+  llvm::NamedMDNode *OCLVerMD =
+      M.getOrInsertNamedMetadata("opencl.ocl.version");
+  OCLVerMD->addOperand(llvm::MDNode::get(Ctx, OCLVerElts));
+}
+
+unsigned SPIRTargetCodeGenInfo::getOpenCLKernelCallingConv() const {
+  return llvm::CallingConv::SPIR_KERNEL;
+}
+
 static bool appendType(SmallStringEnc &Enc, QualType QType,
                        const CodeGen::CodeGenModule &CGM,
                        TypeStringCache &TSC);
@@ -7436,29 +7895,35 @@ const llvm::Triple &CodeGenModule::getTriple() const {
 }
 
 bool CodeGenModule::supportsCOMDAT() const {
-  return !getTriple().isOSBinFormatMachO();
+  return getTriple().supportsCOMDAT();
 }
 
 const TargetCodeGenInfo &CodeGenModule::getTargetCodeGenInfo() {
   if (TheTargetCodeGenInfo)
     return *TheTargetCodeGenInfo;
 
+  // Helper to set the unique_ptr while still keeping the return value.
+  auto SetCGInfo = [&](TargetCodeGenInfo *P) -> const TargetCodeGenInfo & {
+    this->TheTargetCodeGenInfo.reset(P);
+    return *P;
+  };
+
   const llvm::Triple &Triple = getTarget().getTriple();
   switch (Triple.getArch()) {
   default:
-    return *(TheTargetCodeGenInfo = new DefaultTargetCodeGenInfo(Types));
+    return SetCGInfo(new DefaultTargetCodeGenInfo(Types));
 
   case llvm::Triple::le32:
-    return *(TheTargetCodeGenInfo = new PNaClTargetCodeGenInfo(Types));
+    return SetCGInfo(new PNaClTargetCodeGenInfo(Types));
   case llvm::Triple::mips:
   case llvm::Triple::mipsel:
     if (Triple.getOS() == llvm::Triple::NaCl)
-      return *(TheTargetCodeGenInfo = new PNaClTargetCodeGenInfo(Types));
-    return *(TheTargetCodeGenInfo = new MIPSTargetCodeGenInfo(Types, true));
+      return SetCGInfo(new PNaClTargetCodeGenInfo(Types));
+    return SetCGInfo(new MIPSTargetCodeGenInfo(Types, true));
 
   case llvm::Triple::mips64:
   case llvm::Triple::mips64el:
-    return *(TheTargetCodeGenInfo = new MIPSTargetCodeGenInfo(Types, false));
+    return SetCGInfo(new MIPSTargetCodeGenInfo(Types, false));
 
   case llvm::Triple::aarch64:
   case llvm::Triple::aarch64_be: {
@@ -7466,78 +7931,79 @@ const TargetCodeGenInfo &CodeGenModule::getTargetCodeGenInfo() {
     if (getTarget().getABI() == "darwinpcs")
       Kind = AArch64ABIInfo::DarwinPCS;
 
-    return *(TheTargetCodeGenInfo = new AArch64TargetCodeGenInfo(Types, Kind));
+    return SetCGInfo(new AArch64TargetCodeGenInfo(Types, Kind));
   }
 
   case llvm::Triple::wasm32:
   case llvm::Triple::wasm64:
-    return *(TheTargetCodeGenInfo = new WebAssemblyTargetCodeGenInfo(Types));
+    return SetCGInfo(new WebAssemblyTargetCodeGenInfo(Types));
 
   case llvm::Triple::arm:
   case llvm::Triple::armeb:
   case llvm::Triple::thumb:
-  case llvm::Triple::thumbeb:
-    {
-      if (Triple.getOS() == llvm::Triple::Win32) {
-        TheTargetCodeGenInfo =
-            new WindowsARMTargetCodeGenInfo(Types, ARMABIInfo::AAPCS_VFP);
-        return *TheTargetCodeGenInfo;
-      }
-
-      ARMABIInfo::ABIKind Kind = ARMABIInfo::AAPCS;
-      StringRef ABIStr = getTarget().getABI();
-      if (ABIStr == "apcs-gnu")
-        Kind = ARMABIInfo::APCS;
-      else if (ABIStr == "aapcs16")
-        Kind = ARMABIInfo::AAPCS16_VFP;
-      else if (CodeGenOpts.FloatABI == "hard" ||
-               (CodeGenOpts.FloatABI != "soft" &&
-                Triple.getEnvironment() == llvm::Triple::GNUEABIHF))
-        Kind = ARMABIInfo::AAPCS_VFP;
-
-      return *(TheTargetCodeGenInfo = new ARMTargetCodeGenInfo(Types, Kind));
+  case llvm::Triple::thumbeb: {
+    if (Triple.getOS() == llvm::Triple::Win32) {
+      return SetCGInfo(
+          new WindowsARMTargetCodeGenInfo(Types, ARMABIInfo::AAPCS_VFP));
     }
 
+    ARMABIInfo::ABIKind Kind = ARMABIInfo::AAPCS;
+    StringRef ABIStr = getTarget().getABI();
+    if (ABIStr == "apcs-gnu")
+      Kind = ARMABIInfo::APCS;
+    else if (ABIStr == "aapcs16")
+      Kind = ARMABIInfo::AAPCS16_VFP;
+    else if (CodeGenOpts.FloatABI == "hard" ||
+             (CodeGenOpts.FloatABI != "soft" &&
+              (Triple.getEnvironment() == llvm::Triple::GNUEABIHF ||
+               Triple.getEnvironment() == llvm::Triple::MuslEABIHF ||
+               Triple.getEnvironment() == llvm::Triple::EABIHF)))
+      Kind = ARMABIInfo::AAPCS_VFP;
+
+    return SetCGInfo(new ARMTargetCodeGenInfo(Types, Kind));
+  }
+
   case llvm::Triple::ppc:
-    return *(TheTargetCodeGenInfo = 
-             new PPC32TargetCodeGenInfo(Types, CodeGenOpts.FloatABI == "soft"));
+    return SetCGInfo(
+        new PPC32TargetCodeGenInfo(Types, CodeGenOpts.FloatABI == "soft"));
   case llvm::Triple::ppc64:
     if (Triple.isOSBinFormatELF()) {
       PPC64_SVR4_ABIInfo::ABIKind Kind = PPC64_SVR4_ABIInfo::ELFv1;
       if (getTarget().getABI() == "elfv2")
         Kind = PPC64_SVR4_ABIInfo::ELFv2;
       bool HasQPX = getTarget().getABI() == "elfv1-qpx";
+      bool IsSoftFloat = CodeGenOpts.FloatABI == "soft";
 
-      return *(TheTargetCodeGenInfo =
-               new PPC64_SVR4_TargetCodeGenInfo(Types, Kind, HasQPX));
+      return SetCGInfo(new PPC64_SVR4_TargetCodeGenInfo(Types, Kind, HasQPX,
+                                                        IsSoftFloat));
     } else
-      return *(TheTargetCodeGenInfo = new PPC64TargetCodeGenInfo(Types));
+      return SetCGInfo(new PPC64TargetCodeGenInfo(Types));
   case llvm::Triple::ppc64le: {
     assert(Triple.isOSBinFormatELF() && "PPC64 LE non-ELF not supported!");
     PPC64_SVR4_ABIInfo::ABIKind Kind = PPC64_SVR4_ABIInfo::ELFv2;
     if (getTarget().getABI() == "elfv1" || getTarget().getABI() == "elfv1-qpx")
       Kind = PPC64_SVR4_ABIInfo::ELFv1;
     bool HasQPX = getTarget().getABI() == "elfv1-qpx";
+    bool IsSoftFloat = CodeGenOpts.FloatABI == "soft";
 
-    return *(TheTargetCodeGenInfo =
-             new PPC64_SVR4_TargetCodeGenInfo(Types, Kind, HasQPX));
+    return SetCGInfo(new PPC64_SVR4_TargetCodeGenInfo(Types, Kind, HasQPX,
+                                                      IsSoftFloat));
   }
 
   case llvm::Triple::nvptx:
   case llvm::Triple::nvptx64:
-    return *(TheTargetCodeGenInfo = new NVPTXTargetCodeGenInfo(Types));
+    return SetCGInfo(new NVPTXTargetCodeGenInfo(Types));
 
   case llvm::Triple::msp430:
-    return *(TheTargetCodeGenInfo = new MSP430TargetCodeGenInfo(Types));
+    return SetCGInfo(new MSP430TargetCodeGenInfo(Types));
 
   case llvm::Triple::systemz: {
     bool HasVector = getTarget().getABI() == "vector";
-    return *(TheTargetCodeGenInfo = new SystemZTargetCodeGenInfo(Types,
-                                                                 HasVector));
+    return SetCGInfo(new SystemZTargetCodeGenInfo(Types, HasVector));
   }
 
   case llvm::Triple::tce:
-    return *(TheTargetCodeGenInfo = new TCETargetCodeGenInfo(Types));
+    return SetCGInfo(new TCETargetCodeGenInfo(Types));
 
   case llvm::Triple::x86: {
     bool IsDarwinVectorABI = Triple.isOSDarwin();
@@ -7546,44 +8012,49 @@ const TargetCodeGenInfo &CodeGenModule::getTargetCodeGenInfo() {
     bool IsWin32FloatStructABI = Triple.isOSWindows() && !Triple.isOSCygMing();
 
     if (Triple.getOS() == llvm::Triple::Win32) {
-      return *(TheTargetCodeGenInfo = new WinX86_32TargetCodeGenInfo(
-                   Types, IsDarwinVectorABI, RetSmallStructInRegABI,
-                   IsWin32FloatStructABI, CodeGenOpts.NumRegisterParameters));
+      return SetCGInfo(new WinX86_32TargetCodeGenInfo(
+          Types, IsDarwinVectorABI, RetSmallStructInRegABI,
+          IsWin32FloatStructABI, CodeGenOpts.NumRegisterParameters));
     } else {
-      return *(TheTargetCodeGenInfo = new X86_32TargetCodeGenInfo(
-                   Types, IsDarwinVectorABI, RetSmallStructInRegABI,
-                   IsWin32FloatStructABI, CodeGenOpts.NumRegisterParameters,
-                   CodeGenOpts.FloatABI == "soft"));
+      return SetCGInfo(new X86_32TargetCodeGenInfo(
+          Types, IsDarwinVectorABI, RetSmallStructInRegABI,
+          IsWin32FloatStructABI, CodeGenOpts.NumRegisterParameters,
+          CodeGenOpts.FloatABI == "soft"));
     }
   }
 
   case llvm::Triple::x86_64: {
     StringRef ABI = getTarget().getABI();
-    X86AVXABILevel AVXLevel = (ABI == "avx512" ? X86AVXABILevel::AVX512 :
-                               ABI == "avx" ? X86AVXABILevel::AVX :
-                               X86AVXABILevel::None);
+    X86AVXABILevel AVXLevel =
+        (ABI == "avx512"
+             ? X86AVXABILevel::AVX512
+             : ABI == "avx" ? X86AVXABILevel::AVX : X86AVXABILevel::None);
 
     switch (Triple.getOS()) {
     case llvm::Triple::Win32:
-      return *(TheTargetCodeGenInfo =
-                   new WinX86_64TargetCodeGenInfo(Types, AVXLevel));
+      return SetCGInfo(new WinX86_64TargetCodeGenInfo(Types, AVXLevel));
     case llvm::Triple::PS4:
-      return *(TheTargetCodeGenInfo =
-                   new PS4TargetCodeGenInfo(Types, AVXLevel));
+      return SetCGInfo(new PS4TargetCodeGenInfo(Types, AVXLevel));
     default:
-      return *(TheTargetCodeGenInfo =
-                   new X86_64TargetCodeGenInfo(Types, AVXLevel));
+      return SetCGInfo(new X86_64TargetCodeGenInfo(Types, AVXLevel));
     }
   }
   case llvm::Triple::hexagon:
-    return *(TheTargetCodeGenInfo = new HexagonTargetCodeGenInfo(Types));
+    return SetCGInfo(new HexagonTargetCodeGenInfo(Types));
+  case llvm::Triple::lanai:
+    return SetCGInfo(new LanaiTargetCodeGenInfo(Types));
   case llvm::Triple::r600:
-    return *(TheTargetCodeGenInfo = new AMDGPUTargetCodeGenInfo(Types));
+    return SetCGInfo(new AMDGPUTargetCodeGenInfo(Types));
   case llvm::Triple::amdgcn:
-    return *(TheTargetCodeGenInfo = new AMDGPUTargetCodeGenInfo(Types));
+    return SetCGInfo(new AMDGPUTargetCodeGenInfo(Types));
+  case llvm::Triple::sparc:
+    return SetCGInfo(new SparcV8TargetCodeGenInfo(Types));
   case llvm::Triple::sparcv9:
-    return *(TheTargetCodeGenInfo = new SparcV9TargetCodeGenInfo(Types));
+    return SetCGInfo(new SparcV9TargetCodeGenInfo(Types));
   case llvm::Triple::xcore:
-    return *(TheTargetCodeGenInfo = new XCoreTargetCodeGenInfo(Types));
+    return SetCGInfo(new XCoreTargetCodeGenInfo(Types));
+  case llvm::Triple::spir:
+  case llvm::Triple::spir64:
+    return SetCGInfo(new SPIRTargetCodeGenInfo(Types));
   }
 }
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/TargetInfo.h b/contrib/llvm/tools/clang/lib/CodeGen/TargetInfo.h
index 87b470498623..e46382596af7 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/TargetInfo.h
+++ b/contrib/llvm/tools/clang/lib/CodeGen/TargetInfo.h
@@ -29,15 +29,14 @@ class Value;
 }
 
 namespace clang {
-class ABIInfo;
 class Decl;
 
 namespace CodeGen {
+class ABIInfo;
 class CallArgList;
 class CodeGenModule;
 class CodeGenFunction;
 class CGFunctionInfo;
-}
 
 /// TargetCodeGenInfo - This class organizes various target-specific
 /// codegeneration issues, like target-specific attributes, builtins and so
@@ -218,7 +217,12 @@ public:
   virtual void getDetectMismatchOption(llvm::StringRef Name,
                                        llvm::StringRef Value,
                                        llvm::SmallString<32> &Opt) const {}
+
+  /// Get LLVM calling convention for OpenCL kernel.
+  virtual unsigned getOpenCLKernelCallingConv() const;
 };
+
+} // namespace CodeGen
 } // namespace clang
 
 #endif // LLVM_CLANG_LIB_CODEGEN_TARGETINFO_H
diff --git a/contrib/llvm/tools/clang/lib/Driver/Action.cpp b/contrib/llvm/tools/clang/lib/Driver/Action.cpp
index e9490e96db8d..29a46794d4b9 100644
--- a/contrib/llvm/tools/clang/lib/Driver/Action.cpp
+++ b/contrib/llvm/tools/clang/lib/Driver/Action.cpp
@@ -8,6 +8,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "clang/Driver/Action.h"
+#include "clang/Driver/ToolChain.h"
 #include "llvm/ADT/StringSwitch.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/Regex.h"
@@ -21,8 +22,8 @@ const char *Action::getClassName(ActionClass AC) {
   switch (AC) {
   case InputClass: return "input";
   case BindArchClass: return "bind-arch";
-  case CudaDeviceClass: return "cuda-device";
-  case CudaHostClass: return "cuda-host";
+  case OffloadClass:
+    return "offload";
   case PreprocessJobClass: return "preprocessor";
   case PrecompileJobClass: return "precompiler";
   case AnalyzeJobClass: return "analyzer";
@@ -40,6 +41,82 @@ const char *Action::getClassName(ActionClass AC) {
   llvm_unreachable("invalid class");
 }
 
+void Action::propagateDeviceOffloadInfo(OffloadKind OKind, const char *OArch) {
+  // Offload action set its own kinds on their dependences.
+  if (Kind == OffloadClass)
+    return;
+
+  assert((OffloadingDeviceKind == OKind || OffloadingDeviceKind == OFK_None) &&
+         "Setting device kind to a different device??");
+  assert(!ActiveOffloadKindMask && "Setting a device kind in a host action??");
+  OffloadingDeviceKind = OKind;
+  OffloadingArch = OArch;
+
+  for (auto *A : Inputs)
+    A->propagateDeviceOffloadInfo(OffloadingDeviceKind, OArch);
+}
+
+void Action::propagateHostOffloadInfo(unsigned OKinds, const char *OArch) {
+  // Offload action set its own kinds on their dependences.
+  if (Kind == OffloadClass)
+    return;
+
+  assert(OffloadingDeviceKind == OFK_None &&
+         "Setting a host kind in a device action.");
+  ActiveOffloadKindMask |= OKinds;
+  OffloadingArch = OArch;
+
+  for (auto *A : Inputs)
+    A->propagateHostOffloadInfo(ActiveOffloadKindMask, OArch);
+}
+
+void Action::propagateOffloadInfo(const Action *A) {
+  if (unsigned HK = A->getOffloadingHostActiveKinds())
+    propagateHostOffloadInfo(HK, A->getOffloadingArch());
+  else
+    propagateDeviceOffloadInfo(A->getOffloadingDeviceKind(),
+                               A->getOffloadingArch());
+}
+
+std::string Action::getOffloadingKindPrefix() const {
+  switch (OffloadingDeviceKind) {
+  case OFK_None:
+    break;
+  case OFK_Host:
+    llvm_unreachable("Host kind is not an offloading device kind.");
+    break;
+  case OFK_Cuda:
+    return "device-cuda";
+
+    // TODO: Add other programming models here.
+  }
+
+  if (!ActiveOffloadKindMask)
+    return "";
+
+  std::string Res("host");
+  if (ActiveOffloadKindMask & OFK_Cuda)
+    Res += "-cuda";
+
+  // TODO: Add other programming models here.
+
+  return Res;
+}
+
+std::string
+Action::getOffloadingFileNamePrefix(llvm::StringRef NormalizedTriple) const {
+  // A file prefix is only generated for device actions and consists of the
+  // offload kind and triple.
+  if (!OffloadingDeviceKind)
+    return "";
+
+  std::string Res("-");
+  Res += getOffloadingKindPrefix();
+  Res += "-";
+  Res += NormalizedTriple;
+  return Res;
+}
+
 void InputAction::anchor() {}
 
 InputAction::InputAction(const Arg &_Input, types::ID _Type)
@@ -51,45 +128,138 @@ void BindArchAction::anchor() {}
 BindArchAction::BindArchAction(Action *Input, const char *_ArchName)
     : Action(BindArchClass, Input), ArchName(_ArchName) {}
 
-// Converts CUDA GPU architecture, e.g. "sm_21", to its corresponding virtual
-// compute arch, e.g. "compute_20".  Returns null if the input arch is null or
-// doesn't match an existing arch.
-static const char* GpuArchToComputeName(const char *ArchName) {
-  if (!ArchName)
-    return nullptr;
-  return llvm::StringSwitch<const char *>(ArchName)
-      .Cases("sm_20", "sm_21", "compute_20")
-      .Case("sm_30", "compute_30")
-      .Case("sm_32", "compute_32")
-      .Case("sm_35", "compute_35")
-      .Case("sm_37", "compute_37")
-      .Case("sm_50", "compute_50")
-      .Case("sm_52", "compute_52")
-      .Case("sm_53", "compute_53")
-      .Default(nullptr);
+void OffloadAction::anchor() {}
+
+OffloadAction::OffloadAction(const HostDependence &HDep)
+    : Action(OffloadClass, HDep.getAction()), HostTC(HDep.getToolChain()) {
+  OffloadingArch = HDep.getBoundArch();
+  ActiveOffloadKindMask = HDep.getOffloadKinds();
+  HDep.getAction()->propagateHostOffloadInfo(HDep.getOffloadKinds(),
+                                             HDep.getBoundArch());
+}
+
+OffloadAction::OffloadAction(const DeviceDependences &DDeps, types::ID Ty)
+    : Action(OffloadClass, DDeps.getActions(), Ty),
+      DevToolChains(DDeps.getToolChains()) {
+  auto &OKinds = DDeps.getOffloadKinds();
+  auto &BArchs = DDeps.getBoundArchs();
+
+  // If all inputs agree on the same kind, use it also for this action.
+  if (llvm::all_of(OKinds, [&](OffloadKind K) { return K == OKinds.front(); }))
+    OffloadingDeviceKind = OKinds.front();
+
+  // If we have a single dependency, inherit the architecture from it.
+  if (OKinds.size() == 1)
+    OffloadingArch = BArchs.front();
+
+  // Propagate info to the dependencies.
+  for (unsigned i = 0, e = getInputs().size(); i != e; ++i)
+    getInputs()[i]->propagateDeviceOffloadInfo(OKinds[i], BArchs[i]);
+}
+
+OffloadAction::OffloadAction(const HostDependence &HDep,
+                             const DeviceDependences &DDeps)
+    : Action(OffloadClass, HDep.getAction()), HostTC(HDep.getToolChain()),
+      DevToolChains(DDeps.getToolChains()) {
+  // We use the kinds of the host dependence for this action.
+  OffloadingArch = HDep.getBoundArch();
+  ActiveOffloadKindMask = HDep.getOffloadKinds();
+  HDep.getAction()->propagateHostOffloadInfo(HDep.getOffloadKinds(),
+                                             HDep.getBoundArch());
+
+  // Add device inputs and propagate info to the device actions. Do work only if
+  // we have dependencies.
+  for (unsigned i = 0, e = DDeps.getActions().size(); i != e; ++i)
+    if (auto *A = DDeps.getActions()[i]) {
+      getInputs().push_back(A);
+      A->propagateDeviceOffloadInfo(DDeps.getOffloadKinds()[i],
+                                    DDeps.getBoundArchs()[i]);
+    }
+}
+
+void OffloadAction::doOnHostDependence(const OffloadActionWorkTy &Work) const {
+  if (!HostTC)
+    return;
+  assert(!getInputs().empty() && "No dependencies for offload action??");
+  auto *A = getInputs().front();
+  Work(A, HostTC, A->getOffloadingArch());
 }
 
-void CudaDeviceAction::anchor() {}
+void OffloadAction::doOnEachDeviceDependence(
+    const OffloadActionWorkTy &Work) const {
+  auto I = getInputs().begin();
+  auto E = getInputs().end();
+  if (I == E)
+    return;
+
+  // We expect to have the same number of input dependences and device tool
+  // chains, except if we also have a host dependence. In that case we have one
+  // more dependence than we have device tool chains.
+  assert(getInputs().size() == DevToolChains.size() + (HostTC ? 1 : 0) &&
+         "Sizes of action dependences and toolchains are not consistent!");
+
+  // Skip host action
+  if (HostTC)
+    ++I;
+
+  auto TI = DevToolChains.begin();
+  for (; I != E; ++I, ++TI)
+    Work(*I, *TI, (*I)->getOffloadingArch());
+}
 
-CudaDeviceAction::CudaDeviceAction(Action *Input, const char *ArchName,
-                                   bool AtTopLevel)
-    : Action(CudaDeviceClass, Input), GpuArchName(ArchName),
-      AtTopLevel(AtTopLevel) {
-  assert(IsValidGpuArchName(GpuArchName));
+void OffloadAction::doOnEachDependence(const OffloadActionWorkTy &Work) const {
+  doOnHostDependence(Work);
+  doOnEachDeviceDependence(Work);
 }
 
-const char *CudaDeviceAction::getComputeArchName() const {
-  return GpuArchToComputeName(GpuArchName);
+void OffloadAction::doOnEachDependence(bool IsHostDependence,
+                                       const OffloadActionWorkTy &Work) const {
+  if (IsHostDependence)
+    doOnHostDependence(Work);
+  else
+    doOnEachDeviceDependence(Work);
 }
 
-bool CudaDeviceAction::IsValidGpuArchName(llvm::StringRef ArchName) {
-  return GpuArchToComputeName(ArchName.data()) != nullptr;
+bool OffloadAction::hasHostDependence() const { return HostTC != nullptr; }
+
+Action *OffloadAction::getHostDependence() const {
+  assert(hasHostDependence() && "Host dependence does not exist!");
+  assert(!getInputs().empty() && "No dependencies for offload action??");
+  return HostTC ? getInputs().front() : nullptr;
 }
 
-void CudaHostAction::anchor() {}
+bool OffloadAction::hasSingleDeviceDependence(
+    bool DoNotConsiderHostActions) const {
+  if (DoNotConsiderHostActions)
+    return getInputs().size() == (HostTC ? 2 : 1);
+  return !HostTC && getInputs().size() == 1;
+}
 
-CudaHostAction::CudaHostAction(Action *Input, const ActionList &DeviceActions)
-    : Action(CudaHostClass, Input), DeviceActions(DeviceActions) {}
+Action *
+OffloadAction::getSingleDeviceDependence(bool DoNotConsiderHostActions) const {
+  assert(hasSingleDeviceDependence(DoNotConsiderHostActions) &&
+         "Single device dependence does not exist!");
+  // The previous assert ensures the number of entries in getInputs() is
+  // consistent with what we are doing here.
+  return HostTC ? getInputs()[1] : getInputs().front();
+}
+
+void OffloadAction::DeviceDependences::add(Action &A, const ToolChain &TC,
+                                           const char *BoundArch,
+                                           OffloadKind OKind) {
+  DeviceActions.push_back(&A);
+  DeviceToolChains.push_back(&TC);
+  DeviceBoundArchs.push_back(BoundArch);
+  DeviceOffloadKinds.push_back(OKind);
+}
+
+OffloadAction::HostDependence::HostDependence(Action &A, const ToolChain &TC,
+                                              const char *BoundArch,
+                                              const DeviceDependences &DDeps)
+    : HostAction(A), HostToolChain(TC), HostBoundArch(BoundArch) {
+  for (auto K : DDeps.getOffloadKinds())
+    HostOffloadKinds |= K;
+}
 
 void JobAction::anchor() {}
 
diff --git a/contrib/llvm/tools/clang/lib/Driver/Compilation.cpp b/contrib/llvm/tools/clang/lib/Driver/Compilation.cpp
index 1c2eecd3ccc5..6a2616f0c2a4 100644
--- a/contrib/llvm/tools/clang/lib/Driver/Compilation.cpp
+++ b/contrib/llvm/tools/clang/lib/Driver/Compilation.cpp
@@ -24,10 +24,13 @@ using namespace llvm::opt;
 
 Compilation::Compilation(const Driver &D, const ToolChain &_DefaultToolChain,
                          InputArgList *_Args, DerivedArgList *_TranslatedArgs)
-    : TheDriver(D), DefaultToolChain(_DefaultToolChain),
-      CudaHostToolChain(&DefaultToolChain), CudaDeviceToolChain(nullptr),
+    : TheDriver(D), DefaultToolChain(_DefaultToolChain), ActiveOffloadMask(0u),
       Args(_Args), TranslatedArgs(_TranslatedArgs), Redirects(nullptr),
-      ForDiagnostics(false) {}
+      ForDiagnostics(false) {
+  // The offloading host toolchain is the default tool chain.
+  OrderedOffloadingToolchains.insert(
+      std::make_pair(Action::OFK_Host, &DefaultToolChain));
+}
 
 Compilation::~Compilation() {
   delete TranslatedArgs;
@@ -42,6 +45,7 @@ Compilation::~Compilation() {
 
   // Free redirections of stdout/stderr.
   if (Redirects) {
+    delete Redirects[0];
     delete Redirects[1];
     delete Redirects[2];
     delete [] Redirects;
@@ -163,39 +167,17 @@ int Compilation::ExecuteCommand(const Command &C,
   return ExecutionFailed ? 1 : Res;
 }
 
-typedef SmallVectorImpl< std::pair<int, const Command *> > FailingCommandList;
-
-static bool ActionFailed(const Action *A,
-                         const FailingCommandList &FailingCommands) {
-
-  if (FailingCommands.empty())
-    return false;
-
-  for (FailingCommandList::const_iterator CI = FailingCommands.begin(),
-         CE = FailingCommands.end(); CI != CE; ++CI)
-    if (A == &(CI->second->getSource()))
-      return true;
-
-  for (Action::const_iterator AI = A->begin(), AE = A->end(); AI != AE; ++AI)
-    if (ActionFailed(*AI, FailingCommands))
-      return true;
-
-  return false;
-}
-
-static bool InputsOk(const Command &C,
-                     const FailingCommandList &FailingCommands) {
-  return !ActionFailed(&C.getSource(), FailingCommands);
-}
-
-void Compilation::ExecuteJobs(const JobList &Jobs,
-                              FailingCommandList &FailingCommands) const {
+void Compilation::ExecuteJobs(
+    const JobList &Jobs,
+    SmallVectorImpl<std::pair<int, const Command *>> &FailingCommands) const {
   for (const auto &Job : Jobs) {
-    if (!InputsOk(Job, FailingCommands))
-      continue;
     const Command *FailingCommand = nullptr;
-    if (int Res = ExecuteCommand(Job, FailingCommand))
+    if (int Res = ExecuteCommand(Job, FailingCommand)) {
       FailingCommands.push_back(std::make_pair(Res, FailingCommand));
+      // Bail as soon as one command fails, so we don't output duplicate error
+      // messages if we die on e.g. the same file.
+      return;
+    }
   }
 }
 
@@ -232,3 +214,7 @@ void Compilation::initCompilationForDiagnostics() {
 StringRef Compilation::getSysRoot() const {
   return getDriver().SysRoot;
 }
+
+void Compilation::Redirect(const StringRef** Redirects) {
+  this->Redirects = Redirects;
+}
diff --git a/contrib/llvm/tools/clang/lib/Driver/CrossWindowsToolChain.cpp b/contrib/llvm/tools/clang/lib/Driver/CrossWindowsToolChain.cpp
index 57bf89635987..4ebbc533232f 100644
--- a/contrib/llvm/tools/clang/lib/Driver/CrossWindowsToolChain.cpp
+++ b/contrib/llvm/tools/clang/lib/Driver/CrossWindowsToolChain.cpp
@@ -62,6 +62,8 @@ AddClangSystemIncludeArgs(const llvm::opt::ArgList &DriverArgs,
     llvm::sys::path::append(ResourceDir, "include");
     addSystemInclude(DriverArgs, CC1Args, ResourceDir);
   }
+  for (const auto &P : DriverArgs.getAllArgValues(options::OPT_isystem_after))
+    addSystemInclude(DriverArgs, CC1Args, P);
   addExternCSystemInclude(DriverArgs, CC1Args, SysRoot + "/usr/include");
 }
 
diff --git a/contrib/llvm/tools/clang/lib/Driver/Driver.cpp b/contrib/llvm/tools/clang/lib/Driver/Driver.cpp
index 1e0a48d52928..02f4a9997711 100644
--- a/contrib/llvm/tools/clang/lib/Driver/Driver.cpp
+++ b/contrib/llvm/tools/clang/lib/Driver/Driver.cpp
@@ -23,6 +23,7 @@
 #include "clang/Driver/ToolChain.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringSet.h"
 #include "llvm/ADT/StringSwitch.h"
@@ -41,6 +42,7 @@
 #include "llvm/Support/raw_ostream.h"
 #include <map>
 #include <memory>
+#include <utility>
 
 using namespace clang::driver;
 using namespace clang;
@@ -49,9 +51,9 @@ using namespace llvm::opt;
 Driver::Driver(StringRef ClangExecutable, StringRef DefaultTargetTriple,
                DiagnosticsEngine &Diags,
                IntrusiveRefCntPtr<vfs::FileSystem> VFS)
-    : Opts(createDriverOptTable()), Diags(Diags), VFS(VFS), Mode(GCCMode),
-      SaveTemps(SaveTempsNone), LTOMode(LTOK_None),
-      ClangExecutable(ClangExecutable),
+    : Opts(createDriverOptTable()), Diags(Diags), VFS(std::move(VFS)),
+      Mode(GCCMode), SaveTemps(SaveTempsNone), BitcodeEmbed(EmbedNone),
+      LTOMode(LTOK_None), ClangExecutable(ClangExecutable),
       SysRoot(DEFAULT_SYSROOT), UseStdLib(true),
       DefaultTargetTriple(DefaultTargetTriple),
       DriverTitle("clang LLVM compiler"), CCPrintOptionsFilename(nullptr),
@@ -146,7 +148,9 @@ InputArgList Driver::ParseArgStrings(ArrayRef<const char *> ArgStrings) {
   }
 
   for (const Arg *A : Args.filtered(options::OPT_UNKNOWN))
-    Diags.Report(diag::err_drv_unknown_argument) << A->getAsString(Args);
+    Diags.Report(IsCLMode() ? diag::warn_drv_unknown_argument_clang_cl :
+                              diag::err_drv_unknown_argument)
+      << A->getAsString(Args);
 
   return Args;
 }
@@ -276,6 +280,10 @@ DerivedArgList *Driver::TranslateInputArgs(const InputArgList &Args) const {
     DAL->append(A);
   }
 
+  // Enforce -static if -miamcu is present.
+  if (Args.hasFlag(options::OPT_miamcu, options::OPT_mno_iamcu, false))
+    DAL->AddFlagArg(0, Opts->getOption(options::OPT_static));
+
 // Add a default value of -mlinker-version=, if one was given and the user
 // didn't specify one.
 #if defined(HOST_LINK_VERSION)
@@ -294,7 +302,8 @@ DerivedArgList *Driver::TranslateInputArgs(const InputArgList &Args) const {
 ///
 /// This routine provides the logic to compute a target triple from various
 /// args passed to the driver and the default triple string.
-static llvm::Triple computeTargetTriple(StringRef DefaultTargetTriple,
+static llvm::Triple computeTargetTriple(const Driver &D,
+                                        StringRef DefaultTargetTriple,
                                         const ArgList &Args,
                                         StringRef DarwinArchName = "") {
   // FIXME: Already done in Compilation *Driver::BuildCompilation
@@ -339,8 +348,9 @@ static llvm::Triple computeTargetTriple(StringRef DefaultTargetTriple,
     return Target;
 
   // Handle pseudo-target flags '-m64', '-mx32', '-m32' and '-m16'.
-  if (Arg *A = Args.getLastArg(options::OPT_m64, options::OPT_mx32,
-                               options::OPT_m32, options::OPT_m16)) {
+  Arg *A = Args.getLastArg(options::OPT_m64, options::OPT_mx32,
+                           options::OPT_m32, options::OPT_m16);
+  if (A) {
     llvm::Triple::ArchType AT = llvm::Triple::UnknownArch;
 
     if (A->getOption().matches(options::OPT_m64)) {
@@ -365,6 +375,25 @@ static llvm::Triple computeTargetTriple(StringRef DefaultTargetTriple,
       Target.setArch(AT);
   }
 
+  // Handle -miamcu flag.
+  if (Args.hasFlag(options::OPT_miamcu, options::OPT_mno_iamcu, false)) {
+    if (Target.get32BitArchVariant().getArch() != llvm::Triple::x86)
+      D.Diag(diag::err_drv_unsupported_opt_for_target) << "-miamcu"
+                                                       << Target.str();
+
+    if (A && !A->getOption().matches(options::OPT_m32))
+      D.Diag(diag::err_drv_argument_not_allowed_with)
+          << "-miamcu" << A->getBaseArg().getAsString(Args);
+
+    Target.setArch(llvm::Triple::x86);
+    Target.setArchName("i586");
+    Target.setEnvironment(llvm::Triple::UnknownEnvironment);
+    Target.setEnvironmentName("");
+    Target.setOS(llvm::Triple::ELFIAMCU);
+    Target.setVendor(llvm::Triple::UnknownVendor);
+    Target.setVendorName("intel");
+  }
+
   return Target;
 }
 
@@ -394,6 +423,33 @@ void Driver::setLTOMode(const llvm::opt::ArgList &Args) {
   }
 }
 
+void Driver::CreateOffloadingDeviceToolChains(Compilation &C,
+                                              InputList &Inputs) {
+
+  //
+  // CUDA
+  //
+  // We need to generate a CUDA toolchain if any of the inputs has a CUDA type.
+  if (llvm::any_of(Inputs, [](std::pair<types::ID, const llvm::opt::Arg *> &I) {
+        return types::isCuda(I.first);
+      })) {
+    const ToolChain &TC = getToolChain(
+        C.getInputArgs(),
+        llvm::Triple(C.getSingleOffloadToolChain<Action::OFK_Host>()
+                             ->getTriple()
+                             .isArch64Bit()
+                         ? "nvptx64-nvidia-cuda"
+                         : "nvptx-nvidia-cuda"));
+    C.addOffloadDeviceToolChain(&TC, Action::OFK_Cuda);
+  }
+
+  //
+  // TODO: Add support for other offloading programming models here.
+  //
+
+  return;
+}
+
 Compilation *Driver::BuildCompilation(ArrayRef<const char *> ArgList) {
   llvm::PrettyStackTraceString CrashInfo("Compilation construction");
 
@@ -479,6 +535,28 @@ Compilation *Driver::BuildCompilation(ArrayRef<const char *> ArgList) {
 
   setLTOMode(Args);
 
+  // Ignore -fembed-bitcode options with LTO
+  // since the output will be bitcode anyway.
+  if (getLTOMode() == LTOK_None) {
+    if (Arg *A = Args.getLastArg(options::OPT_fembed_bitcode_EQ)) {
+      StringRef Name = A->getValue();
+      unsigned Model = llvm::StringSwitch<unsigned>(Name)
+          .Case("off", EmbedNone)
+          .Case("all", EmbedBitcode)
+          .Case("bitcode", EmbedBitcode)
+          .Case("marker", EmbedMarker)
+          .Default(~0U);
+      if (Model == ~0U) {
+        Diags.Report(diag::err_drv_invalid_value) << A->getAsString(Args)
+                                                  << Name;
+      } else
+        BitcodeEmbed = static_cast<BitcodeEmbedMode>(Model);
+    }
+  } else {
+    // claim the bitcode option under LTO so no warning is issued.
+    Args.ClaimAllArgs(options::OPT_fembed_bitcode_EQ);
+  }
+
   std::unique_ptr<llvm::opt::InputArgList> UArgs =
       llvm::make_unique<InputArgList>(std::move(Args));
 
@@ -486,16 +564,12 @@ Compilation *Driver::BuildCompilation(ArrayRef<const char *> ArgList) {
   DerivedArgList *TranslatedArgs = TranslateInputArgs(*UArgs);
 
   // Owned by the host.
-  const ToolChain &TC =
-      getToolChain(*UArgs, computeTargetTriple(DefaultTargetTriple, *UArgs));
+  const ToolChain &TC = getToolChain(
+      *UArgs, computeTargetTriple(*this, DefaultTargetTriple, *UArgs));
 
   // The compilation takes ownership of Args.
   Compilation *C = new Compilation(*this, TC, UArgs.release(), TranslatedArgs);
 
-  C->setCudaDeviceToolChain(
-      &getToolChain(C->getArgs(), llvm::Triple(TC.getTriple().isArch64Bit()
-                                                   ? "nvptx64-nvidia-cuda"
-                                                   : "nvptx-nvidia-cuda")));
   if (!HandleImmediateArgs(*C))
     return C;
 
@@ -503,13 +577,15 @@ Compilation *Driver::BuildCompilation(ArrayRef<const char *> ArgList) {
   InputList Inputs;
   BuildInputs(C->getDefaultToolChain(), *TranslatedArgs, Inputs);
 
+  // Populate the tool chains for the offloading devices, if any.
+  CreateOffloadingDeviceToolChains(*C, Inputs);
+
   // Construct the list of abstract actions to perform for this compilation. On
   // MachO targets this uses the driver-driver and universal actions.
   if (TC.getTriple().isOSBinFormatMachO())
     BuildUniversalActions(*C, C->getDefaultToolChain(), Inputs);
   else
-    BuildActions(*C, C->getDefaultToolChain(), C->getArgs(), Inputs,
-                 C->getActions());
+    BuildActions(*C, C->getArgs(), Inputs, C->getActions());
 
   if (CCCPrintPhases) {
     PrintActions(*C);
@@ -623,7 +699,7 @@ void Driver::generateCompilationDiagnostics(Compilation &C,
   if (TC.getTriple().isOSBinFormatMachO())
     BuildUniversalActions(C, TC, Inputs);
   else
-    BuildActions(C, TC, C.getArgs(), Inputs, C.getActions());
+    BuildActions(C, C.getArgs(), Inputs, C.getActions());
 
   BuildJobs(C);
 
@@ -947,18 +1023,34 @@ static unsigned PrintActions1(const Compilation &C, Action *A,
     os << "\"" << IA->getInputArg().getValue() << "\"";
   } else if (BindArchAction *BIA = dyn_cast<BindArchAction>(A)) {
     os << '"' << BIA->getArchName() << '"' << ", {"
-       << PrintActions1(C, *BIA->begin(), Ids) << "}";
-  } else if (CudaDeviceAction *CDA = dyn_cast<CudaDeviceAction>(A)) {
-    os << '"' << CDA->getGpuArchName() << '"' << ", {"
-       << PrintActions1(C, *CDA->begin(), Ids) << "}";
+       << PrintActions1(C, *BIA->input_begin(), Ids) << "}";
+  } else if (OffloadAction *OA = dyn_cast<OffloadAction>(A)) {
+    bool IsFirst = true;
+    OA->doOnEachDependence(
+        [&](Action *A, const ToolChain *TC, const char *BoundArch) {
+          // E.g. for two CUDA device dependences whose bound arch is sm_20 and
+          // sm_35 this will generate:
+          // "cuda-device" (nvptx64-nvidia-cuda:sm_20) {#ID}, "cuda-device"
+          // (nvptx64-nvidia-cuda:sm_35) {#ID}
+          if (!IsFirst)
+            os << ", ";
+          os << '"';
+          if (TC)
+            os << A->getOffloadingKindPrefix();
+          else
+            os << "host";
+          os << " (";
+          os << TC->getTriple().normalize();
+
+          if (BoundArch)
+            os << ":" << BoundArch;
+          os << ")";
+          os << '"';
+          os << " {" << PrintActions1(C, A, Ids) << "}";
+          IsFirst = false;
+        });
   } else {
-    const ActionList *AL;
-    if (CudaHostAction *CHA = dyn_cast<CudaHostAction>(A)) {
-      os << "{" << PrintActions1(C, *CHA->begin(), Ids) << "}"
-         << ", gpu binaries ";
-      AL = &CHA->getDeviceActions();
-    } else
-      AL = &A->getInputs();
+    const ActionList *AL = &A->getInputs();
 
     if (AL->size()) {
       const char *Prefix = "{";
@@ -971,10 +1063,24 @@ static unsigned PrintActions1(const Compilation &C, Action *A,
       os << "{}";
   }
 
+  // Append offload info for all options other than the offloading action
+  // itself (e.g. (cuda-device, sm_20) or (cuda-host)).
+  std::string offload_str;
+  llvm::raw_string_ostream offload_os(offload_str);
+  if (!isa<OffloadAction>(A)) {
+    auto S = A->getOffloadingKindPrefix();
+    if (!S.empty()) {
+      offload_os << ", (" << S;
+      if (A->getOffloadingArch())
+        offload_os << ", " << A->getOffloadingArch();
+      offload_os << ")";
+    }
+  }
+
   unsigned Id = Ids.size();
   Ids[A] = Id;
   llvm::errs() << Id << ": " << os.str() << ", "
-               << types::getTypeName(A->getType()) << "\n";
+               << types::getTypeName(A->getType()) << offload_os.str() << "\n";
 
   return Id;
 }
@@ -994,7 +1100,7 @@ static bool ContainsCompileOrAssembleAction(const Action *A) {
       isa<AssembleJobAction>(A))
     return true;
 
-  for (const Action *Input : *A)
+  for (const Action *Input : A->inputs())
     if (ContainsCompileOrAssembleAction(Input))
       return true;
 
@@ -1033,7 +1139,7 @@ void Driver::BuildUniversalActions(Compilation &C, const ToolChain &TC,
     Archs.push_back(Args.MakeArgString(TC.getDefaultUniversalArchName()));
 
   ActionList SingleActions;
-  BuildActions(C, TC, Args, BAInputs, SingleActions);
+  BuildActions(C, Args, BAInputs, SingleActions);
 
   // Add in arch bindings for every top level action, as well as lipo and
   // dsymutil steps if needed.
@@ -1091,7 +1197,7 @@ void Driver::BuildUniversalActions(Compilation &C, const ToolChain &TC,
 /// \brief Check that the file referenced by Value exists. If it doesn't,
 /// issue a diagnostic and return false.
 static bool DiagnoseInputExistence(const Driver &D, const DerivedArgList &Args,
-                                   StringRef Value) {
+                                   StringRef Value, types::ID Ty) {
   if (!D.getCheckInputsExist())
     return true;
 
@@ -1111,9 +1217,18 @@ static bool DiagnoseInputExistence(const Driver &D, const DerivedArgList &Args,
   if (llvm::sys::fs::exists(Twine(Path)))
     return true;
 
-  if (D.IsCLMode() && !llvm::sys::path::is_absolute(Twine(Path)) &&
-      llvm::sys::Process::FindInEnvPath("LIB", Value))
-    return true;
+  if (D.IsCLMode()) {
+    if (!llvm::sys::path::is_absolute(Twine(Path)) &&
+        llvm::sys::Process::FindInEnvPath("LIB", Value))
+      return true;
+
+    if (Args.hasArg(options::OPT__SLASH_link) && Ty == types::TY_Object) {
+      // Arguments to the /link flag might cause the linker to search for object
+      // and library files in paths we don't know about. Don't error in such
+      // cases.
+      return true;
+    }
+  }
 
   D.Diag(clang::diag::err_drv_no_such_file) << Path;
   return false;
@@ -1229,19 +1344,19 @@ void Driver::BuildInputs(const ToolChain &TC, DerivedArgList &Args,
         }
       }
 
-      if (DiagnoseInputExistence(*this, Args, Value))
+      if (DiagnoseInputExistence(*this, Args, Value, Ty))
         Inputs.push_back(std::make_pair(Ty, A));
 
     } else if (A->getOption().matches(options::OPT__SLASH_Tc)) {
       StringRef Value = A->getValue();
-      if (DiagnoseInputExistence(*this, Args, Value)) {
+      if (DiagnoseInputExistence(*this, Args, Value, types::TY_C)) {
         Arg *InputArg = MakeInputArg(Args, Opts, A->getValue());
         Inputs.push_back(std::make_pair(types::TY_C, InputArg));
       }
       A->claim();
     } else if (A->getOption().matches(options::OPT__SLASH_Tp)) {
       StringRef Value = A->getValue();
-      if (DiagnoseInputExistence(*this, Args, Value)) {
+      if (DiagnoseInputExistence(*this, Args, Value, types::TY_CXX)) {
         Arg *InputArg = MakeInputArg(Args, Opts, A->getValue());
         Inputs.push_back(std::make_pair(types::TY_CXX, InputArg));
       }
@@ -1283,32 +1398,43 @@ void Driver::BuildInputs(const ToolChain &TC, DerivedArgList &Args,
 static Action *buildCudaActions(Compilation &C, DerivedArgList &Args,
                                 const Arg *InputArg, Action *HostAction,
                                 ActionList &Actions) {
-  Arg *PartialCompilationArg = Args.getLastArg(options::OPT_cuda_host_only,
-                                               options::OPT_cuda_device_only);
-  // Host-only compilation case.
-  if (PartialCompilationArg &&
-      PartialCompilationArg->getOption().matches(options::OPT_cuda_host_only))
-    return C.MakeAction<CudaHostAction>(HostAction, ActionList());
+  Arg *PartialCompilationArg = Args.getLastArg(
+      options::OPT_cuda_host_only, options::OPT_cuda_device_only,
+      options::OPT_cuda_compile_host_device);
+  bool CompileHostOnly =
+      PartialCompilationArg &&
+      PartialCompilationArg->getOption().matches(options::OPT_cuda_host_only);
+  bool CompileDeviceOnly =
+      PartialCompilationArg &&
+      PartialCompilationArg->getOption().matches(options::OPT_cuda_device_only);
+
+  if (CompileHostOnly) {
+    OffloadAction::HostDependence HDep(
+        *HostAction, *C.getSingleOffloadToolChain<Action::OFK_Host>(),
+        /*BoundArch=*/nullptr, Action::OFK_Cuda);
+    return C.MakeAction<OffloadAction>(HDep);
+  }
 
   // Collect all cuda_gpu_arch parameters, removing duplicates.
-  SmallVector<const char *, 4> GpuArchList;
-  llvm::StringSet<> GpuArchNames;
+  SmallVector<CudaArch, 4> GpuArchList;
+  llvm::SmallSet<CudaArch, 4> GpuArchs;
   for (Arg *A : Args) {
     if (!A->getOption().matches(options::OPT_cuda_gpu_arch_EQ))
       continue;
     A->claim();
 
-    const auto& Arch = A->getValue();
-    if (!CudaDeviceAction::IsValidGpuArchName(Arch))
-      C.getDriver().Diag(clang::diag::err_drv_cuda_bad_gpu_arch) << Arch;
-    else if (GpuArchNames.insert(Arch).second)
+    const auto &ArchStr = A->getValue();
+    CudaArch Arch = StringToCudaArch(ArchStr);
+    if (Arch == CudaArch::UNKNOWN)
+      C.getDriver().Diag(clang::diag::err_drv_cuda_bad_gpu_arch) << ArchStr;
+    else if (GpuArchs.insert(Arch).second)
       GpuArchList.push_back(Arch);
   }
 
   // Default to sm_20 which is the lowest common denominator for supported GPUs.
   // sm_20 code should work correctly, if suboptimally, on all newer GPUs.
   if (GpuArchList.empty())
-    GpuArchList.push_back("sm_20");
+    GpuArchList.push_back(CudaArch::SM_20);
 
   // Replicate inputs for each GPU architecture.
   Driver::InputList CudaDeviceInputs;
@@ -1316,61 +1442,81 @@ static Action *buildCudaActions(Compilation &C, DerivedArgList &Args,
     CudaDeviceInputs.push_back(std::make_pair(types::TY_CUDA_DEVICE, InputArg));
 
   // Build actions for all device inputs.
-  assert(C.getCudaDeviceToolChain() &&
-         "Missing toolchain for device-side compilation.");
   ActionList CudaDeviceActions;
-  C.getDriver().BuildActions(C, *C.getCudaDeviceToolChain(), Args,
-                             CudaDeviceInputs, CudaDeviceActions);
+  C.getDriver().BuildActions(C, Args, CudaDeviceInputs, CudaDeviceActions);
   assert(GpuArchList.size() == CudaDeviceActions.size() &&
          "Failed to create actions for all devices");
 
   // Check whether any of device actions stopped before they could generate PTX.
   bool PartialCompilation =
       llvm::any_of(CudaDeviceActions, [](const Action *a) {
-        return a->getKind() != Action::BackendJobClass;
+        return a->getKind() != Action::AssembleJobClass;
       });
 
+  const ToolChain *CudaTC = C.getSingleOffloadToolChain<Action::OFK_Cuda>();
+
   // Figure out what to do with device actions -- pass them as inputs to the
   // host action or run each of them independently.
-  bool DeviceOnlyCompilation = PartialCompilationArg != nullptr;
-  if (PartialCompilation || DeviceOnlyCompilation) {
+  if (PartialCompilation || CompileDeviceOnly) {
     // In case of partial or device-only compilation results of device actions
     // are not consumed by the host action device actions have to be added to
     // top-level actions list with AtTopLevel=true and run independently.
 
     // -o is ambiguous if we have more than one top-level action.
     if (Args.hasArg(options::OPT_o) &&
-        (!DeviceOnlyCompilation || GpuArchList.size() > 1)) {
+        (!CompileDeviceOnly || GpuArchList.size() > 1)) {
       C.getDriver().Diag(
           clang::diag::err_drv_output_argument_with_multiple_files);
       return nullptr;
     }
 
-    for (unsigned I = 0, E = GpuArchList.size(); I != E; ++I)
-      Actions.push_back(C.MakeAction<CudaDeviceAction>(CudaDeviceActions[I],
-                                                       GpuArchList[I],
-                                                       /* AtTopLevel */ true));
+    for (unsigned I = 0, E = GpuArchList.size(); I != E; ++I) {
+      OffloadAction::DeviceDependences DDep;
+      DDep.add(*CudaDeviceActions[I], *CudaTC, CudaArchToString(GpuArchList[I]),
+               Action::OFK_Cuda);
+      Actions.push_back(
+          C.MakeAction<OffloadAction>(DDep, CudaDeviceActions[I]->getType()));
+    }
     // Kill host action in case of device-only compilation.
-    if (DeviceOnlyCompilation)
+    if (CompileDeviceOnly)
       return nullptr;
     return HostAction;
   }
 
-  // Outputs of device actions during complete CUDA compilation get created
-  // with AtTopLevel=false and become inputs for the host action.
+  // If we're not a partial or device-only compilation, we compile each arch to
+  // ptx and assemble to cubin, then feed the cubin *and* the ptx into a device
+  // "link" action, which uses fatbinary to combine these cubins into one
+  // fatbin.  The fatbin is then an input to the host compilation.
   ActionList DeviceActions;
-  for (unsigned I = 0, E = GpuArchList.size(); I != E; ++I)
-    DeviceActions.push_back(
-        C.MakeAction<CudaDeviceAction>(CudaDeviceActions[I], GpuArchList[I],
-                                       /* AtTopLevel */ false));
+  for (unsigned I = 0, E = GpuArchList.size(); I != E; ++I) {
+    Action* AssembleAction = CudaDeviceActions[I];
+    assert(AssembleAction->getType() == types::TY_Object);
+    assert(AssembleAction->getInputs().size() == 1);
+
+    Action* BackendAction = AssembleAction->getInputs()[0];
+    assert(BackendAction->getType() == types::TY_PP_Asm);
+
+    for (auto &A : {AssembleAction, BackendAction}) {
+      OffloadAction::DeviceDependences DDep;
+      DDep.add(*A, *CudaTC, CudaArchToString(GpuArchList[I]), Action::OFK_Cuda);
+      DeviceActions.push_back(C.MakeAction<OffloadAction>(DDep, A->getType()));
+    }
+  }
+  auto FatbinAction =
+      C.MakeAction<LinkJobAction>(DeviceActions, types::TY_CUDA_FATBIN);
+
   // Return a new host action that incorporates original host action and all
   // device actions.
-  return C.MakeAction<CudaHostAction>(HostAction, DeviceActions);
+  OffloadAction::HostDependence HDep(
+      *HostAction, *C.getSingleOffloadToolChain<Action::OFK_Host>(),
+      /*BoundArch=*/nullptr, Action::OFK_Cuda);
+  OffloadAction::DeviceDependences DDep;
+  DDep.add(*FatbinAction, *CudaTC, /*BoundArch=*/nullptr, Action::OFK_Cuda);
+  return C.MakeAction<OffloadAction>(HDep, DDep);
 }
 
-void Driver::BuildActions(Compilation &C, const ToolChain &TC,
-                          DerivedArgList &Args, const InputList &Inputs,
-                          ActionList &Actions) const {
+void Driver::BuildActions(Compilation &C, DerivedArgList &Args,
+                          const InputList &Inputs, ActionList &Actions) const {
   llvm::PrettyStackTraceString CrashInfo("Building compilation actions");
 
   if (!SuppressMissingInputWarning && Inputs.empty()) {
@@ -1423,6 +1569,61 @@ void Driver::BuildActions(Compilation &C, const ToolChain &TC,
     }
   }
 
+  // Diagnose unsupported forms of /Yc /Yu. Ignore /Yc/Yu for now if:
+  // * no filename after it
+  // * both /Yc and /Yu passed but with different filenames
+  // * corresponding file not also passed as /FI
+  Arg *YcArg = Args.getLastArg(options::OPT__SLASH_Yc);
+  Arg *YuArg = Args.getLastArg(options::OPT__SLASH_Yu);
+  if (YcArg && YcArg->getValue()[0] == '\0') {
+    Diag(clang::diag::warn_drv_ycyu_no_arg_clang_cl) << YcArg->getSpelling();
+    Args.eraseArg(options::OPT__SLASH_Yc);
+    YcArg = nullptr;
+  }
+  if (YuArg && YuArg->getValue()[0] == '\0') {
+    Diag(clang::diag::warn_drv_ycyu_no_arg_clang_cl) << YuArg->getSpelling();
+    Args.eraseArg(options::OPT__SLASH_Yu);
+    YuArg = nullptr;
+  }
+  if (YcArg && YuArg && strcmp(YcArg->getValue(), YuArg->getValue()) != 0) {
+    Diag(clang::diag::warn_drv_ycyu_different_arg_clang_cl);
+    Args.eraseArg(options::OPT__SLASH_Yc);
+    Args.eraseArg(options::OPT__SLASH_Yu);
+    YcArg = YuArg = nullptr;
+  }
+  if (YcArg || YuArg) {
+    StringRef Val = YcArg ? YcArg->getValue() : YuArg->getValue();
+    bool FoundMatchingInclude = false;
+    for (const Arg *Inc : Args.filtered(options::OPT_include)) {
+      // FIXME: Do case-insensitive matching and consider / and \ as equal.
+      if (Inc->getValue() == Val)
+        FoundMatchingInclude = true;
+    }
+    if (!FoundMatchingInclude) {
+      Diag(clang::diag::warn_drv_ycyu_no_fi_arg_clang_cl)
+          << (YcArg ? YcArg : YuArg)->getSpelling();
+      Args.eraseArg(options::OPT__SLASH_Yc);
+      Args.eraseArg(options::OPT__SLASH_Yu);
+      YcArg = YuArg = nullptr;
+    }
+  }
+  if (YcArg && Inputs.size() > 1) {
+    Diag(clang::diag::warn_drv_yc_multiple_inputs_clang_cl);
+    Args.eraseArg(options::OPT__SLASH_Yc);
+    YcArg = nullptr;
+  }
+  if (Args.hasArg(options::OPT__SLASH_Y_)) {
+    // /Y- disables all pch handling.  Rather than check for it everywhere,
+    // just remove clang-cl pch-related flags here.
+    Args.eraseArg(options::OPT__SLASH_Fp);
+    Args.eraseArg(options::OPT__SLASH_Yc);
+    Args.eraseArg(options::OPT__SLASH_Yu);
+    YcArg = YuArg = nullptr;
+  }
+
+  // Track the host offload kinds used on this compilation.
+  unsigned CompilationActiveOffloadHostKinds = 0u;
+
   // Construct the actions to perform.
   ActionList LinkerInputs;
 
@@ -1466,12 +1667,34 @@ void Driver::BuildActions(Compilation &C, const ToolChain &TC,
       continue;
     }
 
+    if (YcArg) {
+      // Add a separate precompile phase for the compile phase.
+      if (FinalPhase >= phases::Compile) {
+        llvm::SmallVector<phases::ID, phases::MaxNumberOfPhases> PCHPL;
+        types::getCompilationPhases(types::TY_CXXHeader, PCHPL);
+        Arg *PchInputArg = MakeInputArg(Args, Opts, YcArg->getValue());
+
+        // Build the pipeline for the pch file.
+        Action *ClangClPch = C.MakeAction<InputAction>(*PchInputArg, InputType);
+        for (phases::ID Phase : PCHPL)
+          ClangClPch = ConstructPhaseAction(C, Args, Phase, ClangClPch);
+        assert(ClangClPch);
+        Actions.push_back(ClangClPch);
+        // The driver currently exits after the first failed command.  This
+        // relies on that behavior, to make sure if the pch generation fails,
+        // the main compilation won't run.
+      }
+    }
+
     phases::ID CudaInjectionPhase =
         (phases::Compile < FinalPhase &&
          llvm::find(PL, phases::Compile) != PL.end())
             ? phases::Compile
             : FinalPhase;
 
+    // Track the host offload kinds used on this input.
+    unsigned InputActiveOffloadHostKinds = 0u;
+
     // Build the pipeline for this file.
     Action *Current = C.MakeAction<InputAction>(*InputArg, InputType);
     for (SmallVectorImpl<phases::ID>::iterator i = PL.begin(), e = PL.end();
@@ -1497,27 +1720,42 @@ void Driver::BuildActions(Compilation &C, const ToolChain &TC,
         continue;
 
       // Otherwise construct the appropriate action.
-      Current = ConstructPhaseAction(C, TC, Args, Phase, Current);
+      Current = ConstructPhaseAction(C, Args, Phase, Current);
 
       if (InputType == types::TY_CUDA && Phase == CudaInjectionPhase) {
         Current = buildCudaActions(C, Args, InputArg, Current, Actions);
         if (!Current)
           break;
+
+        // We produced a CUDA action for this input, so the host has to support
+        // CUDA.
+        InputActiveOffloadHostKinds |= Action::OFK_Cuda;
+        CompilationActiveOffloadHostKinds |= Action::OFK_Cuda;
       }
 
       if (Current->getType() == types::TY_Nothing)
         break;
     }
 
-    // If we ended with something, add to the output list.
-    if (Current)
+    // If we ended with something, add to the output list. Also, propagate the
+    // offload information to the top-level host action related with the current
+    // input.
+    if (Current) {
+      if (InputActiveOffloadHostKinds)
+        Current->propagateHostOffloadInfo(InputActiveOffloadHostKinds,
+                                          /*BoundArch=*/nullptr);
       Actions.push_back(Current);
+    }
   }
 
-  // Add a link action if necessary.
-  if (!LinkerInputs.empty())
+  // Add a link action if necessary and propagate the offload information for
+  // the current compilation.
+  if (!LinkerInputs.empty()) {
     Actions.push_back(
         C.MakeAction<LinkJobAction>(LinkerInputs, types::TY_Image));
+    Actions.back()->propagateHostOffloadInfo(CompilationActiveOffloadHostKinds,
+                                             /*BoundArch=*/nullptr);
+  }
 
   // If we are linking, claim any options which are obviously only used for
   // compilation.
@@ -1529,14 +1767,14 @@ void Driver::BuildActions(Compilation &C, const ToolChain &TC,
   // Claim ignored clang-cl options.
   Args.ClaimAllArgs(options::OPT_cl_ignored_Group);
 
-  // Claim --cuda-host-only arg which may be passed to non-CUDA
-  // compilations and should not trigger warnings there.
+  // Claim --cuda-host-only and --cuda-compile-host-device, which may be passed
+  // to non-CUDA compilations and should not trigger warnings there.
   Args.ClaimAllArgs(options::OPT_cuda_host_only);
+  Args.ClaimAllArgs(options::OPT_cuda_compile_host_device);
 }
 
-Action *Driver::ConstructPhaseAction(Compilation &C, const ToolChain &TC,
-                                     const ArgList &Args, phases::ID Phase,
-                                     Action *Input) const {
+Action *Driver::ConstructPhaseAction(Compilation &C, const ArgList &Args,
+                                     phases::ID Phase, Action *Input) const {
   llvm::PrettyStackTraceString CrashInfo("Constructing phase actions");
   // Build the appropriate action.
   switch (Phase) {
@@ -1600,7 +1838,7 @@ Action *Driver::ConstructPhaseAction(Compilation &C, const ToolChain &TC,
     return C.MakeAction<BackendJobAction>(Input, types::TY_PP_Asm);
   }
   case phases::Assemble:
-    return C.MakeAction<AssembleJobAction>(Input, types::TY_Object);
+    return C.MakeAction<AssembleJobAction>(std::move(Input), types::TY_Object);
   }
 
   llvm_unreachable("invalid phase in ConstructPhaseAction");
@@ -1632,6 +1870,8 @@ void Driver::BuildJobs(Compilation &C) const {
       if (A->getOption().matches(options::OPT_arch))
         ArchNames.insert(A->getValue());
 
+  // Set of (Action, canonical ToolChain triple) pairs we've built jobs for.
+  std::map<std::pair<const Action *, std::string>, InputInfo> CachedResults;
   for (Action *A : C.getActions()) {
     // If we are linking an image for multiple archs then the linker wants
     // -arch_multiple and -final_output <final image name>. Unfortunately, this
@@ -1651,7 +1891,8 @@ void Driver::BuildJobs(Compilation &C) const {
                        /*BoundArch*/ nullptr,
                        /*AtTopLevel*/ true,
                        /*MultipleArchs*/ ArchNames.size() > 1,
-                       /*LinkingOutput*/ LinkingOutput);
+                       /*LinkingOutput*/ LinkingOutput, CachedResults,
+                       /*BuildForOffloadDevice*/ false);
   }
 
   // If the user passed -Qunused-arguments or there were errors, don't warn
@@ -1663,8 +1904,9 @@ void Driver::BuildJobs(Compilation &C) const {
   // Claim -### here.
   (void)C.getArgs().hasArg(options::OPT__HASH_HASH_HASH);
 
-  // Claim --driver-mode, it was handled earlier.
+  // Claim --driver-mode, --rsp-quoting, it was handled earlier.
   (void)C.getArgs().hasArg(options::OPT_driver_mode);
+  (void)C.getArgs().hasArg(options::OPT_rsp_quoting);
 
   for (Arg *A : C.getArgs()) {
     // FIXME: It would be nice to be able to send the argument to the
@@ -1691,74 +1933,123 @@ void Driver::BuildJobs(Compilation &C) const {
           continue;
       }
 
-      Diag(clang::diag::warn_drv_unused_argument)
-          << A->getAsString(C.getArgs());
+      // In clang-cl, don't mention unknown arguments here since they have
+      // already been warned about.
+      if (!IsCLMode() || !A->getOption().matches(options::OPT_UNKNOWN))
+        Diag(clang::diag::warn_drv_unused_argument)
+            << A->getAsString(C.getArgs());
     }
   }
 }
-
+/// Collapse an offloading action looking for a job of the given type. The input
+/// action is changed to the input of the collapsed sequence. If we effectively
+/// had a collapse return the corresponding offloading action, otherwise return
+/// null.
+template <typename T>
+static OffloadAction *collapseOffloadingAction(Action *&CurAction) {
+  if (!CurAction)
+    return nullptr;
+  if (auto *OA = dyn_cast<OffloadAction>(CurAction)) {
+    if (OA->hasHostDependence())
+      if (auto *HDep = dyn_cast<T>(OA->getHostDependence())) {
+        CurAction = HDep;
+        return OA;
+      }
+    if (OA->hasSingleDeviceDependence())
+      if (auto *DDep = dyn_cast<T>(OA->getSingleDeviceDependence())) {
+        CurAction = DDep;
+        return OA;
+      }
+  }
+  return nullptr;
+}
 // Returns a Tool for a given JobAction.  In case the action and its
 // predecessors can be combined, updates Inputs with the inputs of the
 // first combined action. If one of the collapsed actions is a
 // CudaHostAction, updates CollapsedCHA with the pointer to it so the
 // caller can deal with extra handling such action requires.
 static const Tool *selectToolForJob(Compilation &C, bool SaveTemps,
-                                    const ToolChain *TC, const JobAction *JA,
+                                    bool EmbedBitcode, const ToolChain *TC,
+                                    const JobAction *JA,
                                     const ActionList *&Inputs,
-                                    const CudaHostAction *&CollapsedCHA) {
+                                    ActionList &CollapsedOffloadAction) {
   const Tool *ToolForJob = nullptr;
-  CollapsedCHA = nullptr;
+  CollapsedOffloadAction.clear();
 
   // See if we should look for a compiler with an integrated assembler. We match
   // bottom up, so what we are actually looking for is an assembler job with a
   // compiler input.
 
+  // Look through offload actions between assembler and backend actions.
+  Action *BackendJA = (isa<AssembleJobAction>(JA) && Inputs->size() == 1)
+                          ? *Inputs->begin()
+                          : nullptr;
+  auto *BackendOA = collapseOffloadingAction<BackendJobAction>(BackendJA);
+
   if (TC->useIntegratedAs() && !SaveTemps &&
       !C.getArgs().hasArg(options::OPT_via_file_asm) &&
       !C.getArgs().hasArg(options::OPT__SLASH_FA) &&
-      !C.getArgs().hasArg(options::OPT__SLASH_Fa) &&
-      isa<AssembleJobAction>(JA) && Inputs->size() == 1 &&
-      isa<BackendJobAction>(*Inputs->begin())) {
-    // A BackendJob is always preceded by a CompileJob, and without
-    // -save-temps they will always get combined together, so instead of
-    // checking the backend tool, check if the tool for the CompileJob
-    // has an integrated assembler.
-    const ActionList *BackendInputs = &(*Inputs)[0]->getInputs();
-    // Compile job may be wrapped in CudaHostAction, extract it if
-    // that's the case and update CollapsedCHA if we combine phases.
-    CudaHostAction *CHA = dyn_cast<CudaHostAction>(*BackendInputs->begin());
-    JobAction *CompileJA =
-        cast<CompileJobAction>(CHA ? *CHA->begin() : *BackendInputs->begin());
-    assert(CompileJA && "Backend job is not preceeded by compile job.");
-    const Tool *Compiler = TC->SelectTool(*CompileJA);
+      !C.getArgs().hasArg(options::OPT__SLASH_Fa) && BackendJA &&
+      isa<BackendJobAction>(BackendJA)) {
+    // A BackendJob is always preceded by a CompileJob, and without -save-temps
+    // or -fembed-bitcode, they will always get combined together, so instead of
+    // checking the backend tool, check if the tool for the CompileJob has an
+    // integrated assembler. For -fembed-bitcode, CompileJob is still used to
+    // look up tools for BackendJob, but they need to match before we can split
+    // them.
+
+    // Look through offload actions between backend and compile actions.
+    Action *CompileJA = *BackendJA->getInputs().begin();
+    auto *CompileOA = collapseOffloadingAction<CompileJobAction>(CompileJA);
+
+    assert(CompileJA && isa<CompileJobAction>(CompileJA) &&
+           "Backend job is not preceeded by compile job.");
+    const Tool *Compiler = TC->SelectTool(*cast<CompileJobAction>(CompileJA));
     if (!Compiler)
       return nullptr;
+    // When using -fembed-bitcode, it is required to have the same tool (clang)
+    // for both CompilerJA and BackendJA. Otherwise, combine two stages.
+    if (EmbedBitcode) {
+      JobAction *InputJA = cast<JobAction>(*Inputs->begin());
+      const Tool *BackendTool = TC->SelectTool(*InputJA);
+      if (BackendTool == Compiler)
+        CompileJA = InputJA;
+    }
     if (Compiler->hasIntegratedAssembler()) {
       Inputs = &CompileJA->getInputs();
       ToolForJob = Compiler;
-      CollapsedCHA = CHA;
+      // Save the collapsed offload actions because they may still contain
+      // device actions.
+      if (CompileOA)
+        CollapsedOffloadAction.push_back(CompileOA);
+      if (BackendOA)
+        CollapsedOffloadAction.push_back(BackendOA);
     }
   }
 
   // A backend job should always be combined with the preceding compile job
-  // unless OPT_save_temps is enabled and the compiler is capable of emitting
-  // LLVM IR as an intermediate output.
+  // unless OPT_save_temps or OPT_fembed_bitcode is enabled and the compiler is
+  // capable of emitting LLVM IR as an intermediate output.
   if (isa<BackendJobAction>(JA)) {
     // Check if the compiler supports emitting LLVM IR.
     assert(Inputs->size() == 1);
-    // Compile job may be wrapped in CudaHostAction, extract it if
-    // that's the case and update CollapsedCHA if we combine phases.
-    CudaHostAction *CHA = dyn_cast<CudaHostAction>(*Inputs->begin());
-    JobAction *CompileJA =
-        cast<CompileJobAction>(CHA ? *CHA->begin() : *Inputs->begin());
-    assert(CompileJA && "Backend job is not preceeded by compile job.");
-    const Tool *Compiler = TC->SelectTool(*CompileJA);
+
+    // Look through offload actions between backend and compile actions.
+    Action *CompileJA = *JA->getInputs().begin();
+    auto *CompileOA = collapseOffloadingAction<CompileJobAction>(CompileJA);
+
+    assert(CompileJA && isa<CompileJobAction>(CompileJA) &&
+           "Backend job is not preceeded by compile job.");
+    const Tool *Compiler = TC->SelectTool(*cast<CompileJobAction>(CompileJA));
     if (!Compiler)
       return nullptr;
-    if (!Compiler->canEmitIR() || !SaveTemps) {
+    if (!Compiler->canEmitIR() ||
+        (!SaveTemps && !EmbedBitcode)) {
       Inputs = &CompileJA->getInputs();
       ToolForJob = Compiler;
-      CollapsedCHA = CHA;
+
+      if (CompileOA)
+        CollapsedOffloadAction.push_back(CompileOA);
     }
   }
 
@@ -1769,33 +2060,112 @@ static const Tool *selectToolForJob(Compilation &C, bool SaveTemps,
   // See if we should use an integrated preprocessor. We do so when we have
   // exactly one input, since this is the only use case we care about
   // (irrelevant since we don't support combine yet).
-  if (Inputs->size() == 1 && isa<PreprocessJobAction>(*Inputs->begin()) &&
+
+  // Look through offload actions after preprocessing.
+  Action *PreprocessJA = (Inputs->size() == 1) ? *Inputs->begin() : nullptr;
+  auto *PreprocessOA =
+      collapseOffloadingAction<PreprocessJobAction>(PreprocessJA);
+
+  if (PreprocessJA && isa<PreprocessJobAction>(PreprocessJA) &&
       !C.getArgs().hasArg(options::OPT_no_integrated_cpp) &&
       !C.getArgs().hasArg(options::OPT_traditional_cpp) && !SaveTemps &&
       !C.getArgs().hasArg(options::OPT_rewrite_objc) &&
-      ToolForJob->hasIntegratedCPP())
-    Inputs = &(*Inputs)[0]->getInputs();
+      ToolForJob->hasIntegratedCPP()) {
+    Inputs = &PreprocessJA->getInputs();
+    if (PreprocessOA)
+      CollapsedOffloadAction.push_back(PreprocessOA);
+  }
 
   return ToolForJob;
 }
 
-InputInfo Driver::BuildJobsForAction(Compilation &C, const Action *A,
-                                     const ToolChain *TC, const char *BoundArch,
-                                     bool AtTopLevel, bool MultipleArchs,
-                                     const char *LinkingOutput) const {
+InputInfo Driver::BuildJobsForAction(
+    Compilation &C, const Action *A, const ToolChain *TC, const char *BoundArch,
+    bool AtTopLevel, bool MultipleArchs, const char *LinkingOutput,
+    std::map<std::pair<const Action *, std::string>, InputInfo> &CachedResults,
+    bool BuildForOffloadDevice) const {
+  // The bound arch is not necessarily represented in the toolchain's triple --
+  // for example, armv7 and armv7s both map to the same triple -- so we need
+  // both in our map.
+  std::string TriplePlusArch = TC->getTriple().normalize();
+  if (BoundArch) {
+    TriplePlusArch += "-";
+    TriplePlusArch += BoundArch;
+  }
+  std::pair<const Action *, std::string> ActionTC = {A, TriplePlusArch};
+  auto CachedResult = CachedResults.find(ActionTC);
+  if (CachedResult != CachedResults.end()) {
+    return CachedResult->second;
+  }
+  InputInfo Result = BuildJobsForActionNoCache(
+      C, A, TC, BoundArch, AtTopLevel, MultipleArchs, LinkingOutput,
+      CachedResults, BuildForOffloadDevice);
+  CachedResults[ActionTC] = Result;
+  return Result;
+}
+
+InputInfo Driver::BuildJobsForActionNoCache(
+    Compilation &C, const Action *A, const ToolChain *TC, const char *BoundArch,
+    bool AtTopLevel, bool MultipleArchs, const char *LinkingOutput,
+    std::map<std::pair<const Action *, std::string>, InputInfo> &CachedResults,
+    bool BuildForOffloadDevice) const {
   llvm::PrettyStackTraceString CrashInfo("Building compilation jobs");
 
-  InputInfoList CudaDeviceInputInfos;
-  if (const CudaHostAction *CHA = dyn_cast<CudaHostAction>(A)) {
-    // Append outputs of device jobs to the input list.
-    for (const Action *DA : CHA->getDeviceActions()) {
-      CudaDeviceInputInfos.push_back(
-          BuildJobsForAction(C, DA, TC, nullptr, AtTopLevel,
-                             /*MultipleArchs*/ false, LinkingOutput));
+  InputInfoList OffloadDependencesInputInfo;
+  if (const OffloadAction *OA = dyn_cast<OffloadAction>(A)) {
+    // The offload action is expected to be used in four different situations.
+    //
+    // a) Set a toolchain/architecture/kind for a host action:
+    //    Host Action 1 -> OffloadAction -> Host Action 2
+    //
+    // b) Set a toolchain/architecture/kind for a device action;
+    //    Device Action 1 -> OffloadAction -> Device Action 2
+    //
+    // c) Specify a device dependences to a host action;
+    //    Device Action 1  _
+    //                      \
+    //      Host Action 1  ---> OffloadAction -> Host Action 2
+    //
+    // d) Specify a host dependence to a device action.
+    //      Host Action 1  _
+    //                      \
+    //    Device Action 1  ---> OffloadAction -> Device Action 2
+    //
+    // For a) and b), we just return the job generated for the dependence. For
+    // c) and d) we override the current action with the host/device dependence
+    // if the current toolchain is host/device and set the offload dependences
+    // info with the jobs obtained from the device/host dependence(s).
+
+    // If there is a single device option, just generate the job for it.
+    if (OA->hasSingleDeviceDependence()) {
+      InputInfo DevA;
+      OA->doOnEachDeviceDependence([&](Action *DepA, const ToolChain *DepTC,
+                                       const char *DepBoundArch) {
+        DevA =
+            BuildJobsForAction(C, DepA, DepTC, DepBoundArch, AtTopLevel,
+                               /*MultipleArchs*/ !!DepBoundArch, LinkingOutput,
+                               CachedResults, /*BuildForOffloadDevice=*/true);
+      });
+      return DevA;
     }
-    // Override current action with a real host compile action and continue
-    // processing it.
-    A = *CHA->begin();
+
+    // If 'Action 2' is host, we generate jobs for the device dependences and
+    // override the current action with the host dependence. Otherwise, we
+    // generate the host dependences and override the action with the device
+    // dependence. The dependences can't therefore be a top-level action.
+    OA->doOnEachDependence(
+        /*IsHostDependence=*/BuildForOffloadDevice,
+        [&](Action *DepA, const ToolChain *DepTC, const char *DepBoundArch) {
+          OffloadDependencesInputInfo.push_back(BuildJobsForAction(
+              C, DepA, DepTC, DepBoundArch, /*AtTopLevel=*/false,
+              /*MultipleArchs*/ !!DepBoundArch, LinkingOutput, CachedResults,
+              /*BuildForOffloadDevice=*/DepA->getOffloadingDeviceKind() !=
+                  Action::OFK_None));
+        });
+
+    A = BuildForOffloadDevice
+            ? OA->getSingleDeviceDependence(/*DoNotConsiderHostActions=*/true)
+            : OA->getHostDependence();
   }
 
   if (const InputAction *IA = dyn_cast<InputAction>(A)) {
@@ -1815,43 +2185,41 @@ InputInfo Driver::BuildJobsForAction(Compilation &C, const Action *A,
     const char *ArchName = BAA->getArchName();
 
     if (ArchName)
-      TC = &getToolChain(
-          C.getArgs(),
-          computeTargetTriple(DefaultTargetTriple, C.getArgs(), ArchName));
+      TC = &getToolChain(C.getArgs(),
+                         computeTargetTriple(*this, DefaultTargetTriple,
+                                             C.getArgs(), ArchName));
     else
       TC = &C.getDefaultToolChain();
 
-    return BuildJobsForAction(C, *BAA->begin(), TC, ArchName, AtTopLevel,
-                              MultipleArchs, LinkingOutput);
+    return BuildJobsForAction(C, *BAA->input_begin(), TC, ArchName, AtTopLevel,
+                              MultipleArchs, LinkingOutput, CachedResults,
+                              BuildForOffloadDevice);
   }
 
-  if (const CudaDeviceAction *CDA = dyn_cast<CudaDeviceAction>(A)) {
-    // Initial processing of CudaDeviceAction carries host params.
-    // Call BuildJobsForAction() again, now with correct device parameters.
-    assert(CDA->getGpuArchName() && "No GPU name in device action.");
-    return BuildJobsForAction(C, *CDA->begin(), C.getCudaDeviceToolChain(),
-                              CDA->getGpuArchName(), CDA->isAtTopLevel(),
-                              /*MultipleArchs*/ true, LinkingOutput);
-  }
 
   const ActionList *Inputs = &A->getInputs();
 
   const JobAction *JA = cast<JobAction>(A);
-  const CudaHostAction *CollapsedCHA = nullptr;
+  ActionList CollapsedOffloadActions;
+
   const Tool *T =
-      selectToolForJob(C, isSaveTempsEnabled(), TC, JA, Inputs, CollapsedCHA);
+      selectToolForJob(C, isSaveTempsEnabled(), embedBitcodeEnabled(), TC, JA,
+                       Inputs, CollapsedOffloadActions);
   if (!T)
     return InputInfo();
 
-  // If we've collapsed action list that contained CudaHostAction we
-  // need to build jobs for device-side inputs it may have held.
-  if (CollapsedCHA) {
-    for (const Action *DA : CollapsedCHA->getDeviceActions()) {
-      CudaDeviceInputInfos.push_back(
-          BuildJobsForAction(C, DA, TC, "", AtTopLevel,
-                             /*MultipleArchs*/ false, LinkingOutput));
-    }
-  }
+  // If we've collapsed action list that contained OffloadAction we
+  // need to build jobs for host/device-side inputs it may have held.
+  for (const auto *OA : CollapsedOffloadActions)
+    cast<OffloadAction>(OA)->doOnEachDependence(
+        /*IsHostDependence=*/BuildForOffloadDevice,
+        [&](Action *DepA, const ToolChain *DepTC, const char *DepBoundArch) {
+          OffloadDependencesInputInfo.push_back(BuildJobsForAction(
+              C, DepA, DepTC, DepBoundArch, AtTopLevel,
+              /*MultipleArchs=*/!!DepBoundArch, LinkingOutput, CachedResults,
+              /*BuildForOffloadDevice=*/DepA->getOffloadingDeviceKind() !=
+                  Action::OFK_None));
+        });
 
   // Only use pipes when there is exactly one input.
   InputInfoList InputInfos;
@@ -1861,9 +2229,9 @@ InputInfo Driver::BuildJobsForAction(Compilation &C, const Action *A,
     // FIXME: Clean this up.
     bool SubJobAtTopLevel =
         AtTopLevel && (isa<DsymutilJobAction>(A) || isa<VerifyJobAction>(A));
-    InputInfos.push_back(BuildJobsForAction(C, Input, TC, BoundArch,
-                                            SubJobAtTopLevel, MultipleArchs,
-                                            LinkingOutput));
+    InputInfos.push_back(BuildJobsForAction(
+        C, Input, TC, BoundArch, SubJobAtTopLevel, MultipleArchs, LinkingOutput,
+        CachedResults, BuildForOffloadDevice));
   }
 
   // Always use the first input as the base input.
@@ -1874,9 +2242,10 @@ InputInfo Driver::BuildJobsForAction(Compilation &C, const Action *A,
   if (JA->getType() == types::TY_dSYM)
     BaseInput = InputInfos[0].getFilename();
 
-  // Append outputs of cuda device jobs to the input list
-  if (CudaDeviceInputInfos.size())
-    InputInfos.append(CudaDeviceInputInfos.begin(), CudaDeviceInputInfos.end());
+  // Append outputs of offload device jobs to the input list
+  if (!OffloadDependencesInputInfo.empty())
+    InputInfos.append(OffloadDependencesInputInfo.begin(),
+                      OffloadDependencesInputInfo.end());
 
   // Determine the place to write output to, if any.
   InputInfo Result;
@@ -1884,7 +2253,8 @@ InputInfo Driver::BuildJobsForAction(Compilation &C, const Action *A,
     Result = InputInfo(A, BaseInput);
   else
     Result = InputInfo(A, GetNamedOutputPath(C, *JA, BaseInput, BoundArch,
-                                             AtTopLevel, MultipleArchs),
+                                             AtTopLevel, MultipleArchs,
+                                             TC->getTriple().normalize()),
                        BaseInput);
 
   if (CCCPrintBindings && !CCGenDiagnostics) {
@@ -1944,7 +2314,8 @@ static const char *MakeCLOutputFilename(const ArgList &Args, StringRef ArgValue,
 const char *Driver::GetNamedOutputPath(Compilation &C, const JobAction &JA,
                                        const char *BaseInput,
                                        const char *BoundArch, bool AtTopLevel,
-                                       bool MultipleArchs) const {
+                                       bool MultipleArchs,
+                                       StringRef NormalizedTriple) const {
   llvm::PrettyStackTraceString CrashInfo("Computing output path");
   // Output to a user requested destination?
   if (AtTopLevel && !isa<DsymutilJobAction>(JA) && !isa<VerifyJobAction>(JA)) {
@@ -2030,11 +2401,15 @@ const char *Driver::GetNamedOutputPath(Compilation &C, const JobAction &JA,
           MakeCLOutputFilename(C.getArgs(), "", BaseName, types::TY_Image);
     } else if (MultipleArchs && BoundArch) {
       SmallString<128> Output(getDefaultImageName());
+      Output += JA.getOffloadingFileNamePrefix(NormalizedTriple);
       Output += "-";
       Output.append(BoundArch);
       NamedOutput = C.getArgs().MakeArgString(Output.c_str());
-    } else
+    } else {
       NamedOutput = getDefaultImageName();
+    }
+  } else if (JA.getType() == types::TY_PCH && IsCLMode()) {
+    NamedOutput = C.getArgs().MakeArgString(GetClPchPath(C, BaseName).c_str());
   } else {
     const char *Suffix = types::getTypeTempSuffix(JA.getType(), IsCLMode());
     assert(Suffix && "All types used for output should have a suffix.");
@@ -2043,6 +2418,7 @@ const char *Driver::GetNamedOutputPath(Compilation &C, const JobAction &JA,
     if (!types::appendSuffixForType(JA.getType()))
       End = BaseName.rfind('.');
     SmallString<128> Suffixed(BaseName.substr(0, End));
+    Suffixed += JA.getOffloadingFileNamePrefix(NormalizedTriple);
     if (MultipleArchs && BoundArch) {
       Suffixed += "-";
       Suffixed.append(BoundArch);
@@ -2088,7 +2464,7 @@ const char *Driver::GetNamedOutputPath(Compilation &C, const JobAction &JA,
   }
 
   // As an annoying special case, PCH generation doesn't strip the pathname.
-  if (JA.getType() == types::TY_PCH) {
+  if (JA.getType() == types::TY_PCH && !IsCLMode()) {
     llvm::sys::path::remove_filename(BasePath);
     if (BasePath.empty())
       BasePath = NamedOutput;
@@ -2200,12 +2576,34 @@ std::string Driver::GetTemporaryPath(StringRef Prefix,
   return Path.str();
 }
 
+std::string Driver::GetClPchPath(Compilation &C, StringRef BaseName) const {
+  SmallString<128> Output;
+  if (Arg *FpArg = C.getArgs().getLastArg(options::OPT__SLASH_Fp)) {
+    // FIXME: If anybody needs it, implement this obscure rule:
+    // "If you specify a directory without a file name, the default file name
+    // is VCx0.pch., where x is the major version of Visual C++ in use."
+    Output = FpArg->getValue();
+
+    // "If you do not specify an extension as part of the path name, an
+    // extension of .pch is assumed. "
+    if (!llvm::sys::path::has_extension(Output))
+      Output += ".pch";
+  } else {
+    Output = BaseName;
+    llvm::sys::path::replace_extension(Output, ".pch");
+  }
+  return Output.str();
+}
+
 const ToolChain &Driver::getToolChain(const ArgList &Args,
                                       const llvm::Triple &Target) const {
 
   ToolChain *&TC = ToolChains[Target.str()];
   if (!TC) {
     switch (Target.getOS()) {
+    case llvm::Triple::Haiku:
+      TC = new toolchains::Haiku(*this, Target, Args);
+      break;
     case llvm::Triple::CloudABI:
       TC = new toolchains::CloudABI(*this, Target, Args);
       break;
@@ -2235,6 +2633,7 @@ const ToolChain &Driver::getToolChain(const ArgList &Args,
       TC = new toolchains::Minix(*this, Target, Args);
       break;
     case llvm::Triple::Linux:
+    case llvm::Triple::ELFIAMCU:
       if (Target.getArch() == llvm::Triple::hexagon)
         TC = new toolchains::HexagonToolChain(*this, Target, Args);
       else if ((Target.getVendor() == llvm::Triple::MipsTechnologies) &&
@@ -2290,6 +2689,9 @@ const ToolChain &Driver::getToolChain(const ArgList &Args,
       case llvm::Triple::hexagon:
         TC = new toolchains::HexagonToolChain(*this, Target, Args);
         break;
+      case llvm::Triple::lanai:
+        TC = new toolchains::LanaiToolChain(*this, Target, Args);
+        break;
       case llvm::Triple::xcore:
         TC = new toolchains::XCoreToolChain(*this, Target, Args);
         break;
@@ -2314,7 +2716,8 @@ const ToolChain &Driver::getToolChain(const ArgList &Args,
 
 bool Driver::ShouldUseClangCompiler(const JobAction &JA) const {
   // Say "no" if there is not exactly one input of a type clang understands.
-  if (JA.size() != 1 || !types::isAcceptedByClang((*JA.begin())->getType()))
+  if (JA.size() != 1 ||
+      !types::isAcceptedByClang((*JA.input_begin())->getType()))
     return false;
 
   // And say "no" if this is not a kind of action clang understands.
@@ -2363,6 +2766,34 @@ bool Driver::GetReleaseVersion(const char *Str, unsigned &Major,
   return true;
 }
 
+/// Parse digits from a string \p Str and fulfill \p Digits with
+/// the parsed numbers. This method assumes that the max number of
+/// digits to look for is equal to Digits.size().
+///
+/// \return True if the entire string was parsed and there are
+/// no extra characters remaining at the end.
+bool Driver::GetReleaseVersion(const char *Str,
+                               MutableArrayRef<unsigned> Digits) {
+  if (*Str == '\0')
+    return false;
+
+  char *End;
+  unsigned CurDigit = 0;
+  while (CurDigit < Digits.size()) {
+    unsigned Digit = (unsigned)strtol(Str, &End, 10);
+    Digits[CurDigit] = Digit;
+    if (*Str != '\0' && *End == '\0')
+      return true;
+    if (*End != '.' || Str == End)
+      return false;
+    Str = End + 1;
+    CurDigit++;
+  }
+
+  // More digits than requested, bail out...
+  return false;
+}
+
 std::pair<unsigned, unsigned> Driver::getIncludeExcludeOptionFlagMasks() const {
   unsigned IncludedFlagsBitmask = 0;
   unsigned ExcludedFlagsBitmask = options::NoDriverOption;
diff --git a/contrib/llvm/tools/clang/lib/Driver/Job.cpp b/contrib/llvm/tools/clang/lib/Driver/Job.cpp
index 22904e5398a0..2d99b1f22385 100644
--- a/contrib/llvm/tools/clang/lib/Driver/Job.cpp
+++ b/contrib/llvm/tools/clang/lib/Driver/Job.cpp
@@ -18,6 +18,7 @@
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/StringSet.h"
 #include "llvm/ADT/StringSwitch.h"
+#include "llvm/Support/FileSystem.h"
 #include "llvm/Support/Program.h"
 #include "llvm/Support/raw_ostream.h"
 #include <cassert>
@@ -40,17 +41,16 @@ static int skipArgs(const char *Flag, bool HaveCrashVFS) {
   // These flags are all of the form -Flag <Arg> and are treated as two
   // arguments.  Therefore, we need to skip the flag and the next argument.
   bool Res = llvm::StringSwitch<bool>(Flag)
-    .Cases("-I", "-MF", "-MT", "-MQ", true)
+    .Cases("-MF", "-MT", "-MQ", "-serialize-diagnostic-file", true)
     .Cases("-o", "-coverage-file", "-dependency-file", true)
     .Cases("-fdebug-compilation-dir", "-idirafter", true)
     .Cases("-include", "-include-pch", "-internal-isystem", true)
     .Cases("-internal-externc-isystem", "-iprefix", "-iwithprefix", true)
     .Cases("-iwithprefixbefore", "-isystem", "-iquote", true)
-    .Cases("-resource-dir", "-serialize-diagnostic-file", true)
     .Cases("-dwarf-debug-flags", "-ivfsoverlay", true)
     .Cases("-header-include-file", "-diagnostic-log-file", true)
     // Some include flags shouldn't be skipped if we have a crash VFS
-    .Case("-isysroot", !HaveCrashVFS)
+    .Cases("-isysroot", "-I", "-F", "-resource-dir", !HaveCrashVFS)
     .Default(false);
 
   // Match found.
@@ -71,7 +71,8 @@ static int skipArgs(const char *Flag, bool HaveCrashVFS) {
 
   // These flags are treated as a single argument (e.g., -F<Dir>).
   StringRef FlagRef(Flag);
-  if (FlagRef.startswith("-F") || FlagRef.startswith("-I") ||
+  if ((!HaveCrashVFS &&
+       (FlagRef.startswith("-F") || FlagRef.startswith("-I"))) ||
       FlagRef.startswith("-fmodules-cache-path="))
     return 1;
 
@@ -194,6 +195,18 @@ void Command::Print(raw_ostream &OS, const char *Terminator, bool Quote,
     printArg(OS, "-ivfsoverlay", Quote);
     OS << ' ';
     printArg(OS, CrashInfo->VFSPath.str().c_str(), Quote);
+
+    // Insert -fmodules-cache-path and use the relative module directory
+    // <name>.cache/vfs/modules where we already dumped the modules.
+    SmallString<128> RelModCacheDir = llvm::sys::path::parent_path(
+        llvm::sys::path::parent_path(CrashInfo->VFSPath));
+    llvm::sys::path::append(RelModCacheDir, "modules");
+
+    std::string ModCachePath = "-fmodules-cache-path=";
+    ModCachePath.append(RelModCacheDir.c_str());
+
+    OS << ' ';
+    printArg(OS, ModCachePath.c_str(), Quote);
   }
 
   if (ResponseFile != nullptr) {
@@ -297,6 +310,29 @@ int FallbackCommand::Execute(const StringRef **Redirects, std::string *ErrMsg,
   return SecondaryStatus;
 }
 
+ForceSuccessCommand::ForceSuccessCommand(const Action &Source_,
+                                         const Tool &Creator_,
+                                         const char *Executable_,
+                                         const ArgStringList &Arguments_,
+                                         ArrayRef<InputInfo> Inputs)
+    : Command(Source_, Creator_, Executable_, Arguments_, Inputs) {}
+
+void ForceSuccessCommand::Print(raw_ostream &OS, const char *Terminator,
+                            bool Quote, CrashReportInfo *CrashInfo) const {
+  Command::Print(OS, "", Quote, CrashInfo);
+  OS << " || (exit 0)" << Terminator;
+}
+
+int ForceSuccessCommand::Execute(const StringRef **Redirects,
+                                 std::string *ErrMsg,
+                                 bool *ExecutionFailed) const {
+  int Status = Command::Execute(Redirects, ErrMsg, ExecutionFailed);
+  (void)Status;
+  if (ExecutionFailed)
+    *ExecutionFailed = false;
+  return 0;
+}
+
 void JobList::Print(raw_ostream &OS, const char *Terminator, bool Quote,
                     CrashReportInfo *CrashInfo) const {
   for (const auto &Job : *this)
diff --git a/contrib/llvm/tools/clang/lib/Driver/MSVCToolChain.cpp b/contrib/llvm/tools/clang/lib/Driver/MSVCToolChain.cpp
index 68747155b81c..b8de5ad49182 100644
--- a/contrib/llvm/tools/clang/lib/Driver/MSVCToolChain.cpp
+++ b/contrib/llvm/tools/clang/lib/Driver/MSVCToolChain.cpp
@@ -19,6 +19,7 @@
 #include "llvm/Config/llvm-config.h"
 #include "llvm/Option/Arg.h"
 #include "llvm/Option/ArgList.h"
+#include "llvm/Support/ConvertUTF.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/Process.h"
@@ -71,6 +72,11 @@ bool MSVCToolChain::IsUnwindTablesDefault() const {
   // Emit unwind tables by default on Win64. All non-x86_32 Windows platforms
   // such as ARM and PPC actually require unwind tables, but LLVM doesn't know
   // how to generate them yet.
+
+  // Don't emit unwind tables by default for MachO targets.
+  if (getTriple().isOSBinFormatMachO())
+    return false;
+
   return getArch() == llvm::Triple::x86_64;
 }
 
@@ -89,23 +95,31 @@ bool MSVCToolChain::isPICDefaultForced() const {
 #ifdef USE_WIN32
 static bool readFullStringValue(HKEY hkey, const char *valueName,
                                 std::string &value) {
-  // FIXME: We should be using the W versions of the registry functions, but
-  // doing so requires UTF8 / UTF16 conversions similar to how we handle command
-  // line arguments.  The UTF8 conversion functions are not exposed publicly
-  // from LLVM though, so in order to do this we will probably need to create
-  // a registry abstraction in LLVMSupport that is Windows only.
+  std::wstring WideValueName;
+  if (!llvm::ConvertUTF8toWide(valueName, WideValueName))
+    return false;
+
   DWORD result = 0;
   DWORD valueSize = 0;
   DWORD type = 0;
   // First just query for the required size.
-  result = RegQueryValueEx(hkey, valueName, NULL, &type, NULL, &valueSize);
-  if (result != ERROR_SUCCESS || type != REG_SZ)
+  result = RegQueryValueExW(hkey, WideValueName.c_str(), NULL, &type, NULL,
+                            &valueSize);
+  if (result != ERROR_SUCCESS || type != REG_SZ || !valueSize)
     return false;
   std::vector<BYTE> buffer(valueSize);
-  result = RegQueryValueEx(hkey, valueName, NULL, NULL, &buffer[0], &valueSize);
-  if (result == ERROR_SUCCESS)
-    value.assign(reinterpret_cast<const char *>(buffer.data()));
-  return result;
+  result = RegQueryValueExW(hkey, WideValueName.c_str(), NULL, NULL, &buffer[0],
+                            &valueSize);
+  if (result == ERROR_SUCCESS) {
+    std::wstring WideValue(reinterpret_cast<const wchar_t *>(buffer.data()),
+                           valueSize / sizeof(wchar_t));
+    // The destination buffer must be empty as an invariant of the conversion
+    // function; but this function is sometimes called in a loop that passes in
+    // the same buffer, however. Simply clear it out so we can overwrite it.
+    value.clear();
+    return llvm::convertWideToUTF8(WideValue, value);
+  }
+  return false;
 }
 #endif
 
@@ -141,19 +155,20 @@ static bool getSystemRegistryString(const char *keyPath, const char *valueName,
       nextKey++;
     size_t partialKeyLength = keyEnd - keyPath;
     char partialKey[256];
-    if (partialKeyLength > sizeof(partialKey))
-      partialKeyLength = sizeof(partialKey);
+    if (partialKeyLength >= sizeof(partialKey))
+      partialKeyLength = sizeof(partialKey) - 1;
     strncpy(partialKey, keyPath, partialKeyLength);
     partialKey[partialKeyLength] = '\0';
     HKEY hTopKey = NULL;
-    lResult = RegOpenKeyEx(hRootKey, partialKey, 0, KEY_READ | KEY_WOW64_32KEY,
-                           &hTopKey);
+    lResult = RegOpenKeyExA(hRootKey, partialKey, 0, KEY_READ | KEY_WOW64_32KEY,
+                            &hTopKey);
     if (lResult == ERROR_SUCCESS) {
       char keyName[256];
       double bestValue = 0.0;
       DWORD index, size = sizeof(keyName) - 1;
-      for (index = 0; RegEnumKeyEx(hTopKey, index, keyName, &size, NULL,
-          NULL, NULL, NULL) == ERROR_SUCCESS; index++) {
+      for (index = 0; RegEnumKeyExA(hTopKey, index, keyName, &size, NULL, NULL,
+                                    NULL, NULL) == ERROR_SUCCESS;
+           index++) {
         const char *sp = keyName;
         while (*sp && !isDigit(*sp))
           sp++;
@@ -172,8 +187,8 @@ static bool getSystemRegistryString(const char *keyPath, const char *valueName,
           bestName = keyName;
           // Append rest of key.
           bestName.append(nextKey);
-          lResult = RegOpenKeyEx(hTopKey, bestName.c_str(), 0,
-                                 KEY_READ | KEY_WOW64_32KEY, &hKey);
+          lResult = RegOpenKeyExA(hTopKey, bestName.c_str(), 0,
+                                  KEY_READ | KEY_WOW64_32KEY, &hKey);
           if (lResult == ERROR_SUCCESS) {
             lResult = readFullStringValue(hKey, valueName, value);
             if (lResult == ERROR_SUCCESS) {
@@ -191,7 +206,7 @@ static bool getSystemRegistryString(const char *keyPath, const char *valueName,
     }
   } else {
     lResult =
-        RegOpenKeyEx(hRootKey, keyPath, 0, KEY_READ | KEY_WOW64_32KEY, &hKey);
+        RegOpenKeyExA(hRootKey, keyPath, 0, KEY_READ | KEY_WOW64_32KEY, &hKey);
     if (lResult == ERROR_SUCCESS) {
       lResult = readFullStringValue(hKey, valueName, value);
       if (lResult == ERROR_SUCCESS)
@@ -402,7 +417,10 @@ bool MSVCToolChain::getVisualStudioBinariesFolder(const char *clangProgramPath,
 
         SmallString<128> FilePath(PathSegment);
         llvm::sys::path::append(FilePath, "cl.exe");
-        if (llvm::sys::fs::can_execute(FilePath.c_str()) &&
+        // Checking if cl.exe exists is a small optimization over calling
+        // can_execute, which really only checks for existence but will also do
+        // extra checks for cl.exe.exe.  These add up when walking a long path.
+        if (llvm::sys::fs::exists(FilePath.c_str()) &&
             !llvm::sys::fs::equivalent(FilePath.c_str(), clangProgramPath)) {
           // If we found it on the PATH, use it exactly as is with no
           // modifications.
@@ -452,6 +470,45 @@ bool MSVCToolChain::getVisualStudioBinariesFolder(const char *clangProgramPath,
   return true;
 }
 
+VersionTuple MSVCToolChain::getMSVCVersionFromExe() const {
+  VersionTuple Version;
+#ifdef USE_WIN32
+  std::string BinPath;
+  if (!getVisualStudioBinariesFolder("", BinPath))
+    return Version;
+  SmallString<128> ClExe(BinPath);
+  llvm::sys::path::append(ClExe, "cl.exe");
+
+  std::wstring ClExeWide;
+  if (!llvm::ConvertUTF8toWide(ClExe.c_str(), ClExeWide))
+    return Version;
+
+  const DWORD VersionSize = ::GetFileVersionInfoSizeW(ClExeWide.c_str(),
+                                                      nullptr);
+  if (VersionSize == 0)
+    return Version;
+
+  SmallVector<uint8_t, 4 * 1024> VersionBlock(VersionSize);
+  if (!::GetFileVersionInfoW(ClExeWide.c_str(), 0, VersionSize,
+                             VersionBlock.data()))
+    return Version;
+
+  VS_FIXEDFILEINFO *FileInfo = nullptr;
+  UINT FileInfoSize = 0;
+  if (!::VerQueryValueW(VersionBlock.data(), L"\\",
+                        reinterpret_cast<LPVOID *>(&FileInfo), &FileInfoSize) ||
+      FileInfoSize < sizeof(*FileInfo))
+    return Version;
+
+  const unsigned Major = (FileInfo->dwFileVersionMS >> 16) & 0xFFFF;
+  const unsigned Minor = (FileInfo->dwFileVersionMS      ) & 0xFFFF;
+  const unsigned Micro = (FileInfo->dwFileVersionLS >> 16) & 0xFFFF;
+
+  Version = VersionTuple(Major, Minor, Micro);
+#endif
+  return Version;
+}
+
 // Get Visual Studio installation directory.
 bool MSVCToolChain::getVisualStudioInstallDir(std::string &path) const {
   // First check the environment variables that vsvars32.bat sets.
@@ -527,6 +584,10 @@ void MSVCToolChain::AddClangSystemIncludeArgs(const ArgList &DriverArgs,
                                   "include");
   }
 
+  // Add %INCLUDE%-like directories from the -imsvc flag.
+  for (const auto &Path : DriverArgs.getAllArgValues(options::OPT__SLASH_imsvc))
+    addSystemInclude(DriverArgs, CC1Args, Path);
+
   if (DriverArgs.hasArg(options::OPT_nostdlibinc))
     return;
 
@@ -609,7 +670,7 @@ MSVCToolChain::ComputeEffectiveClangTriple(const ArgList &Args,
       ToolChain::ComputeEffectiveClangTriple(Args, InputType);
   llvm::Triple Triple(TripleStr);
   VersionTuple MSVT =
-      tools::visualstudio::getMSVCVersion(/*D=*/nullptr, Triple, Args,
+      tools::visualstudio::getMSVCVersion(/*D=*/nullptr, *this, Triple, Args,
                                           /*IsWindowsMSVC=*/true);
   if (MSVT.empty())
     return TripleStr;
@@ -659,7 +720,8 @@ static void TranslateOptArg(Arg *A, llvm::opt::DerivedArgList &DAL,
             DAL.AddFlagArg(A, Opts.getOption(options::OPT_fbuiltin));
             DAL.AddJoinedArg(A, Opts.getOption(options::OPT_O), "2");
           }
-          if (SupportsForcingFramePointer)
+          if (SupportsForcingFramePointer &&
+              !DAL.hasArgNoClaim(options::OPT_fno_omit_frame_pointer))
             DAL.AddFlagArg(A,
                            Opts.getOption(options::OPT_fomit_frame_pointer));
           if (OptChar == '1' || OptChar == '2')
@@ -669,8 +731,20 @@ static void TranslateOptArg(Arg *A, llvm::opt::DerivedArgList &DAL,
       }
       break;
     case 'b':
-      if (I + 1 != E && isdigit(OptStr[I + 1]))
+      if (I + 1 != E && isdigit(OptStr[I + 1])) {
+        switch (OptStr[I + 1]) {
+        case '0':
+          DAL.AddFlagArg(A, Opts.getOption(options::OPT_fno_inline));
+          break;
+        case '1':
+          DAL.AddFlagArg(A, Opts.getOption(options::OPT_finline_hint_functions));
+          break;
+        case '2':
+          DAL.AddFlagArg(A, Opts.getOption(options::OPT_finline_functions));
+          break;
+        }
         ++I;
+      }
       break;
     case 'g':
       break;
@@ -701,6 +775,12 @@ static void TranslateOptArg(Arg *A, llvm::opt::DerivedArgList &DAL,
         else
           DAL.AddFlagArg(
               A, Opts.getOption(options::OPT_fno_omit_frame_pointer));
+      } else {
+        // Don't warn about /Oy- in 64-bit builds (where
+        // SupportsForcingFramePointer is false).  The flag having no effect
+        // there is a compiler-internal optimization, and people shouldn't have
+        // to special-case their build files for 64-bit clang-cl.
+        A->claim();
       }
       break;
     }
@@ -748,7 +828,12 @@ MSVCToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args,
       continue;
     StringRef OptStr = A->getValue();
     for (size_t I = 0, E = OptStr.size(); I != E; ++I) {
-      const char &OptChar = *(OptStr.data() + I);
+      char OptChar = OptStr[I];
+      char PrevChar = I > 0 ? OptStr[I - 1] : '0';
+      if (PrevChar == 'b') {
+        // OptChar does not expand; it's an argument to the previous char.
+        continue;
+      }
       if (OptChar == '1' || OptChar == '2' || OptChar == 'x' || OptChar == 'd')
         ExpandChar = OptStr.data() + I;
     }
diff --git a/contrib/llvm/tools/clang/lib/Driver/SanitizerArgs.cpp b/contrib/llvm/tools/clang/lib/Driver/SanitizerArgs.cpp
index 2fded1c80da9..30cc3f45c9e0 100644
--- a/contrib/llvm/tools/clang/lib/Driver/SanitizerArgs.cpp
+++ b/contrib/llvm/tools/clang/lib/Driver/SanitizerArgs.cpp
@@ -39,6 +39,7 @@ enum : SanitizerMask {
   TrappingSupported =
       (Undefined & ~Vptr) | UnsignedIntegerOverflow | LocalBounds | CFI,
   TrappingDefault = CFI,
+  CFIClasses = CFIVCall | CFINVCall | CFIDerivedCast | CFIUnrelatedCast,
 };
 
 enum CoverageFeature {
@@ -49,6 +50,7 @@ enum CoverageFeature {
   CoverageTraceBB = 1 << 4,
   CoverageTraceCmp = 1 << 5,
   Coverage8bitCounters = 1 << 6,
+  CoverageTracePC = 1 << 7,
 };
 
 /// Parse a -fsanitize= or -fno-sanitize= argument's values, diagnosing any
@@ -157,11 +159,10 @@ static SanitizerMask parseSanitizeTrapArgs(const Driver &D,
 }
 
 bool SanitizerArgs::needsUbsanRt() const {
-  return (Sanitizers.Mask & NeedsUbsanRt & ~TrapSanitizers.Mask) &&
-         !Sanitizers.has(Address) &&
-         !Sanitizers.has(Memory) &&
-         !Sanitizers.has(Thread) &&
-         !CfiCrossDso;
+  return ((Sanitizers.Mask & NeedsUbsanRt & ~TrapSanitizers.Mask) ||
+          CoverageFeatures) &&
+         !Sanitizers.has(Address) && !Sanitizers.has(Memory) &&
+         !Sanitizers.has(Thread) && !Sanitizers.has(DataFlow) && !CfiCrossDso;
 }
 
 bool SanitizerArgs::needsCfiRt() const {
@@ -180,25 +181,8 @@ bool SanitizerArgs::needsUnwindTables() const {
   return Sanitizers.Mask & NeedsUnwindTables;
 }
 
-void SanitizerArgs::clear() {
-  Sanitizers.clear();
-  RecoverableSanitizers.clear();
-  TrapSanitizers.clear();
-  BlacklistFiles.clear();
-  ExtraDeps.clear();
-  CoverageFeatures = 0;
-  MsanTrackOrigins = 0;
-  MsanUseAfterDtor = false;
-  NeedPIE = false;
-  AsanFieldPadding = 0;
-  AsanSharedRuntime = false;
-  LinkCXXRuntimes = false;
-  CfiCrossDso = false;
-}
-
 SanitizerArgs::SanitizerArgs(const ToolChain &TC,
                              const llvm::opt::ArgList &Args) {
-  clear();
   SanitizerMask AllRemove = 0;  // During the loop below, the accumulated set of
                                 // sanitizers disabled by the current sanitizer
                                 // argument or any argument after it.
@@ -284,6 +268,9 @@ SanitizerArgs::SanitizerArgs(const ToolChain &TC,
     }
   }
 
+  // Enable toolchain specific default sanitizers if not explicitly disabled.
+  Kinds |= TC.getDefaultSanitizers() & ~AllRemove;
+
   // We disable the vptr sanitizer if it was enabled by group expansion but RTTI
   // is disabled.
   if ((Kinds & Vptr) &&
@@ -324,7 +311,12 @@ SanitizerArgs::SanitizerArgs(const ToolChain &TC,
       std::make_pair(Leak, Memory), std::make_pair(KernelAddress, Address),
       std::make_pair(KernelAddress, Leak),
       std::make_pair(KernelAddress, Thread),
-      std::make_pair(KernelAddress, Memory)};
+      std::make_pair(KernelAddress, Memory),
+      std::make_pair(Efficiency, Address),
+      std::make_pair(Efficiency, Leak),
+      std::make_pair(Efficiency, Thread),
+      std::make_pair(Efficiency, Memory),
+      std::make_pair(Efficiency, KernelAddress)};
   for (auto G : IncompatibleGroups) {
     SanitizerMask Group = G.first;
     if (Kinds & Group) {
@@ -347,11 +339,13 @@ SanitizerArgs::SanitizerArgs(const ToolChain &TC,
   for (const auto *Arg : Args) {
     const char *DeprecatedReplacement = nullptr;
     if (Arg->getOption().matches(options::OPT_fsanitize_recover)) {
-      DeprecatedReplacement = "-fsanitize-recover=undefined,integer";
+      DeprecatedReplacement =
+          "-fsanitize-recover=undefined,integer' or '-fsanitize-recover=all";
       RecoverableKinds |= expandSanitizerGroups(LegacyFsanitizeRecoverMask);
       Arg->claim();
     } else if (Arg->getOption().matches(options::OPT_fno_sanitize_recover)) {
-      DeprecatedReplacement = "-fno-sanitize-recover=undefined,integer";
+      DeprecatedReplacement = "-fno-sanitize-recover=undefined,integer' or "
+                              "'-fno-sanitize-recover=all";
       RecoverableKinds &= ~expandSanitizerGroups(LegacyFsanitizeRecoverMask);
       Arg->claim();
     } else if (Arg->getOption().matches(options::OPT_fsanitize_recover_EQ)) {
@@ -448,42 +442,59 @@ SanitizerArgs::SanitizerArgs(const ToolChain &TC,
     NeedPIE |= CfiCrossDso;
   }
 
+  Stats = Args.hasFlag(options::OPT_fsanitize_stats,
+                       options::OPT_fno_sanitize_stats, false);
+
   // Parse -f(no-)?sanitize-coverage flags if coverage is supported by the
   // enabled sanitizers.
-  if (AllAddedKinds & SupportsCoverage) {
-    for (const auto *Arg : Args) {
-      if (Arg->getOption().matches(options::OPT_fsanitize_coverage)) {
-        Arg->claim();
-        int LegacySanitizeCoverage;
-        if (Arg->getNumValues() == 1 &&
-            !StringRef(Arg->getValue(0))
-                 .getAsInteger(0, LegacySanitizeCoverage) &&
-            LegacySanitizeCoverage >= 0 && LegacySanitizeCoverage <= 4) {
-          // TODO: Add deprecation notice for this form.
-          switch (LegacySanitizeCoverage) {
-          case 0:
-            CoverageFeatures = 0;
-            break;
-          case 1:
-            CoverageFeatures = CoverageFunc;
-            break;
-          case 2:
-            CoverageFeatures = CoverageBB;
-            break;
-          case 3:
-            CoverageFeatures = CoverageEdge;
-            break;
-          case 4:
-            CoverageFeatures = CoverageEdge | CoverageIndirCall;
-            break;
-          }
-          continue;
+  for (const auto *Arg : Args) {
+    if (Arg->getOption().matches(options::OPT_fsanitize_coverage)) {
+      int LegacySanitizeCoverage;
+      if (Arg->getNumValues() == 1 &&
+          !StringRef(Arg->getValue(0))
+               .getAsInteger(0, LegacySanitizeCoverage) &&
+          LegacySanitizeCoverage >= 0 && LegacySanitizeCoverage <= 4) {
+        switch (LegacySanitizeCoverage) {
+        case 0:
+          CoverageFeatures = 0;
+          Arg->claim();
+          break;
+        case 1:
+          D.Diag(diag::warn_drv_deprecated_arg) << Arg->getAsString(Args)
+                                                << "-fsanitize-coverage=func";
+          CoverageFeatures = CoverageFunc;
+          break;
+        case 2:
+          D.Diag(diag::warn_drv_deprecated_arg) << Arg->getAsString(Args)
+                                                << "-fsanitize-coverage=bb";
+          CoverageFeatures = CoverageBB;
+          break;
+        case 3:
+          D.Diag(diag::warn_drv_deprecated_arg) << Arg->getAsString(Args)
+                                                << "-fsanitize-coverage=edge";
+          CoverageFeatures = CoverageEdge;
+          break;
+        case 4:
+          D.Diag(diag::warn_drv_deprecated_arg)
+              << Arg->getAsString(Args)
+              << "-fsanitize-coverage=edge,indirect-calls";
+          CoverageFeatures = CoverageEdge | CoverageIndirCall;
+          break;
         }
-        CoverageFeatures |= parseCoverageFeatures(D, Arg);
-      } else if (Arg->getOption().matches(options::OPT_fno_sanitize_coverage)) {
+        continue;
+      }
+      CoverageFeatures |= parseCoverageFeatures(D, Arg);
+
+      // Disable coverage and not claim the flags if there is at least one
+      // non-supporting sanitizer.
+      if (!(AllAddedKinds & ~setGroupBits(SupportsCoverage))) {
         Arg->claim();
-        CoverageFeatures &= ~parseCoverageFeatures(D, Arg);
+      } else {
+        CoverageFeatures = 0;
       }
+    } else if (Arg->getOption().matches(options::OPT_fno_sanitize_coverage)) {
+      Arg->claim();
+      CoverageFeatures &= ~parseCoverageFeatures(D, Arg);
     }
   }
   // Choose at most one coverage type: function, bb, or edge.
@@ -512,6 +523,10 @@ SanitizerArgs::SanitizerArgs(const ToolChain &TC,
     D.Diag(clang::diag::err_drv_argument_only_allowed_with)
         << "-fsanitize-coverage=8bit-counters"
         << "-fsanitize-coverage=(func|bb|edge)";
+  // trace-pc w/o func/bb/edge implies edge.
+  if ((CoverageFeatures & CoverageTracePC) &&
+      !(CoverageFeatures & CoverageTypes))
+    CoverageFeatures |= CoverageEdge;
 
   if (AllAddedKinds & Address) {
     AsanSharedRuntime =
@@ -543,6 +558,14 @@ SanitizerArgs::SanitizerArgs(const ToolChain &TC,
     }
   }
 
+  AsanUseAfterScope =
+      Args.hasArg(options::OPT_fsanitize_address_use_after_scope);
+  if (AsanUseAfterScope && !(AllAddedKinds & Address)) {
+    D.Diag(clang::diag::err_drv_argument_only_allowed_with)
+        << "-fsanitize-address-use-after-scope"
+        << "-fsanitize=address";
+  }
+
   // Parse -link-cxx-sanitizer flag.
   LinkCXXRuntimes =
       Args.hasArg(options::OPT_fsanitize_link_cxx_runtime) || D.CCCIsCXX();
@@ -565,9 +588,62 @@ static std::string toString(const clang::SanitizerSet &Sanitizers) {
   return Res;
 }
 
+static void addIncludeLinkerOption(const ToolChain &TC,
+                                   const llvm::opt::ArgList &Args,
+                                   llvm::opt::ArgStringList &CmdArgs,
+                                   StringRef SymbolName) {
+  SmallString<64> LinkerOptionFlag;
+  LinkerOptionFlag = "--linker-option=/include:";
+  if (TC.getTriple().getArch() == llvm::Triple::x86) {
+    // Win32 mangles C function names with a '_' prefix.
+    LinkerOptionFlag += '_';
+  }
+  LinkerOptionFlag += SymbolName;
+  CmdArgs.push_back(Args.MakeArgString(LinkerOptionFlag));
+}
+
 void SanitizerArgs::addArgs(const ToolChain &TC, const llvm::opt::ArgList &Args,
                             llvm::opt::ArgStringList &CmdArgs,
                             types::ID InputType) const {
+  // Translate available CoverageFeatures to corresponding clang-cc1 flags.
+  // Do it even if Sanitizers.empty() since some forms of coverage don't require
+  // sanitizers.
+  std::pair<int, const char *> CoverageFlags[] = {
+    std::make_pair(CoverageFunc, "-fsanitize-coverage-type=1"),
+    std::make_pair(CoverageBB, "-fsanitize-coverage-type=2"),
+    std::make_pair(CoverageEdge, "-fsanitize-coverage-type=3"),
+    std::make_pair(CoverageIndirCall, "-fsanitize-coverage-indirect-calls"),
+    std::make_pair(CoverageTraceBB, "-fsanitize-coverage-trace-bb"),
+    std::make_pair(CoverageTraceCmp, "-fsanitize-coverage-trace-cmp"),
+    std::make_pair(Coverage8bitCounters, "-fsanitize-coverage-8bit-counters"),
+    std::make_pair(CoverageTracePC, "-fsanitize-coverage-trace-pc")};
+  for (auto F : CoverageFlags) {
+    if (CoverageFeatures & F.first)
+      CmdArgs.push_back(Args.MakeArgString(F.second));
+  }
+
+  if (TC.getTriple().isOSWindows() && needsUbsanRt()) {
+    // Instruct the code generator to embed linker directives in the object file
+    // that cause the required runtime libraries to be linked.
+    CmdArgs.push_back(Args.MakeArgString(
+        "--dependent-lib=" + TC.getCompilerRT(Args, "ubsan_standalone")));
+    if (types::isCXX(InputType))
+      CmdArgs.push_back(Args.MakeArgString(
+          "--dependent-lib=" + TC.getCompilerRT(Args, "ubsan_standalone_cxx")));
+  }
+  if (TC.getTriple().isOSWindows() && needsStatsRt()) {
+    CmdArgs.push_back(Args.MakeArgString("--dependent-lib=" +
+                                         TC.getCompilerRT(Args, "stats_client")));
+
+    // The main executable must export the stats runtime.
+    // FIXME: Only exporting from the main executable (e.g. based on whether the
+    // translation unit defines main()) would save a little space, but having
+    // multiple copies of the runtime shouldn't hurt.
+    CmdArgs.push_back(Args.MakeArgString("--dependent-lib=" +
+                                         TC.getCompilerRT(Args, "stats")));
+    addIncludeLinkerOption(TC, Args, CmdArgs, "__sanitizer_stats_register");
+  }
+
   if (Sanitizers.empty())
     return;
   CmdArgs.push_back(Args.MakeArgString("-fsanitize=" + toString(Sanitizers)));
@@ -601,23 +677,15 @@ void SanitizerArgs::addArgs(const ToolChain &TC, const llvm::opt::ArgList &Args,
   if (CfiCrossDso)
     CmdArgs.push_back(Args.MakeArgString("-fsanitize-cfi-cross-dso"));
 
+  if (Stats)
+    CmdArgs.push_back(Args.MakeArgString("-fsanitize-stats"));
+
   if (AsanFieldPadding)
     CmdArgs.push_back(Args.MakeArgString("-fsanitize-address-field-padding=" +
                                          llvm::utostr(AsanFieldPadding)));
-  // Translate available CoverageFeatures to corresponding clang-cc1 flags.
-  std::pair<int, const char *> CoverageFlags[] = {
-    std::make_pair(CoverageFunc, "-fsanitize-coverage-type=1"),
-    std::make_pair(CoverageBB, "-fsanitize-coverage-type=2"),
-    std::make_pair(CoverageEdge, "-fsanitize-coverage-type=3"),
-    std::make_pair(CoverageIndirCall, "-fsanitize-coverage-indirect-calls"),
-    std::make_pair(CoverageTraceBB, "-fsanitize-coverage-trace-bb"),
-    std::make_pair(CoverageTraceCmp, "-fsanitize-coverage-trace-cmp"),
-    std::make_pair(Coverage8bitCounters, "-fsanitize-coverage-8bit-counters")};
-  for (auto F : CoverageFlags) {
-    if (CoverageFeatures & F.first)
-      CmdArgs.push_back(Args.MakeArgString(F.second));
-  }
 
+  if (AsanUseAfterScope)
+    CmdArgs.push_back(Args.MakeArgString("-fsanitize-address-use-after-scope"));
 
   // MSan: Workaround for PR16386.
   // ASan: This is mainly to help LSan with cases such as
@@ -627,14 +695,14 @@ void SanitizerArgs::addArgs(const ToolChain &TC, const llvm::opt::ArgList &Args,
   if (Sanitizers.has(Memory) || Sanitizers.has(Address))
     CmdArgs.push_back(Args.MakeArgString("-fno-assume-sane-operator-new"));
 
-  if (TC.getTriple().isOSWindows() && needsUbsanRt()) {
-    // Instruct the code generator to embed linker directives in the object file
-    // that cause the required runtime libraries to be linked.
-    CmdArgs.push_back(Args.MakeArgString(
-        "--dependent-lib=" + TC.getCompilerRT(Args, "ubsan_standalone")));
-    if (types::isCXX(InputType))
-      CmdArgs.push_back(Args.MakeArgString(
-          "--dependent-lib=" + TC.getCompilerRT(Args, "ubsan_standalone_cxx")));
+  // Require -fvisibility= flag on non-Windows when compiling if vptr CFI is
+  // enabled.
+  if (Sanitizers.hasOneOf(CFIClasses) && !TC.getTriple().isOSWindows() &&
+      !Args.hasArg(options::OPT_fvisibility_EQ)) {
+    TC.getDriver().Diag(clang::diag::err_drv_argument_only_allowed_with)
+        << lastArgumentForMask(TC.getDriver(), Args,
+                               Sanitizers.Mask & CFIClasses)
+        << "-fvisibility=";
   }
 }
 
@@ -655,6 +723,10 @@ SanitizerMask parseArgValues(const Driver &D, const llvm::opt::Arg *A,
     if (A->getOption().matches(options::OPT_fsanitize_EQ) &&
         0 == strcmp("all", Value))
       Kind = 0;
+    // Similarly, don't accept -fsanitize=efficiency-all.
+    else if (A->getOption().matches(options::OPT_fsanitize_EQ) &&
+        0 == strcmp("efficiency-all", Value))
+      Kind = 0;
     else
       Kind = parseSanitizerValue(Value, /*AllowGroups=*/true);
 
@@ -681,6 +753,7 @@ int parseCoverageFeatures(const Driver &D, const llvm::opt::Arg *A) {
         .Case("trace-bb", CoverageTraceBB)
         .Case("trace-cmp", CoverageTraceCmp)
         .Case("8bit-counters", Coverage8bitCounters)
+        .Case("trace-pc", CoverageTracePC)
         .Default(0);
     if (F == 0)
       D.Diag(clang::diag::err_drv_unsupported_option_argument)
diff --git a/contrib/llvm/tools/clang/lib/Driver/ToolChain.cpp b/contrib/llvm/tools/clang/lib/Driver/ToolChain.cpp
index cbbd485a9b77..e96688cbaf81 100644
--- a/contrib/llvm/tools/clang/lib/Driver/ToolChain.cpp
+++ b/contrib/llvm/tools/clang/lib/Driver/ToolChain.cpp
@@ -9,6 +9,7 @@
 
 #include "Tools.h"
 #include "clang/Basic/ObjCRuntime.h"
+#include "clang/Config/config.h"
 #include "clang/Driver/Action.h"
 #include "clang/Driver/Driver.h"
 #include "clang/Driver/DriverDiagnostic.h"
@@ -247,8 +248,7 @@ Tool *ToolChain::getTool(Action::ActionClass AC) const {
 
   case Action::InputClass:
   case Action::BindArchClass:
-  case Action::CudaDeviceClass:
-  case Action::CudaHostClass:
+  case Action::OffloadClass:
   case Action::LipoJobClass:
   case Action::DsymutilJobClass:
   case Action::VerifyDebugInfoJobClass:
@@ -341,19 +341,26 @@ std::string ToolChain::GetProgramPath(const char *Name) const {
 
 std::string ToolChain::GetLinkerPath() const {
   if (Arg *A = Args.getLastArg(options::OPT_fuse_ld_EQ)) {
-    StringRef Suffix = A->getValue();
-
-    // If we're passed -fuse-ld= with no argument, or with the argument ld,
-    // then use whatever the default system linker is.
-    if (Suffix.empty() || Suffix == "ld")
-      return GetProgramPath("ld");
-
-    llvm::SmallString<8> LinkerName("ld.");
-    LinkerName.append(Suffix);
-
-    std::string LinkerPath(GetProgramPath(LinkerName.c_str()));
-    if (llvm::sys::fs::exists(LinkerPath))
-      return LinkerPath;
+    StringRef UseLinker = A->getValue();
+
+    if (llvm::sys::path::is_absolute(UseLinker)) {
+      // If we're passed -fuse-ld= with what looks like an absolute path,
+      // don't attempt to second-guess that.
+      if (llvm::sys::fs::exists(UseLinker))
+        return UseLinker;
+    } else {
+      // If we're passed -fuse-ld= with no argument, or with the argument ld,
+      // then use whatever the default system linker is.
+      if (UseLinker.empty() || UseLinker == "ld")
+        return GetProgramPath("ld");
+
+      llvm::SmallString<8> LinkerName("ld.");
+      LinkerName.append(UseLinker);
+
+      std::string LinkerPath(GetProgramPath(LinkerName.c_str()));
+      if (llvm::sys::fs::exists(LinkerPath))
+        return LinkerPath;
+    }
 
     getDriver().Diag(diag::err_drv_invalid_linker_name) << A->getAsString(Args);
     return "";
@@ -515,7 +522,6 @@ void ToolChain::addProfileRTLibs(const llvm::opt::ArgList &Args,
   if (!needsProfileRT(Args)) return;
 
   CmdArgs.push_back(getCompilerRTArgString(Args, "profile"));
-  return;
 }
 
 ToolChain::RuntimeLibType ToolChain::GetRuntimeLibType(
@@ -533,18 +539,41 @@ ToolChain::RuntimeLibType ToolChain::GetRuntimeLibType(
   return GetDefaultRuntimeLibType();
 }
 
+static bool ParseCXXStdlibType(const StringRef& Name,
+                               ToolChain::CXXStdlibType& Type) {
+  if (Name == "libc++")
+    Type = ToolChain::CST_Libcxx;
+  else if (Name == "libstdc++")
+    Type = ToolChain::CST_Libstdcxx;
+  else
+    return false;
+
+  return true;
+}
+
 ToolChain::CXXStdlibType ToolChain::GetCXXStdlibType(const ArgList &Args) const{
-  if (Arg *A = Args.getLastArg(options::OPT_stdlib_EQ)) {
+  ToolChain::CXXStdlibType Type;
+  bool HasValidType = false;
+  bool ForcePlatformDefault = false;
+
+  const Arg *A = Args.getLastArg(options::OPT_stdlib_EQ);
+  if (A) {
     StringRef Value = A->getValue();
-    if (Value == "libc++")
-      return ToolChain::CST_Libcxx;
-    if (Value == "libstdc++")
-      return ToolChain::CST_Libstdcxx;
-    getDriver().Diag(diag::err_drv_invalid_stdlib_name)
-      << A->getAsString(Args);
+    HasValidType = ParseCXXStdlibType(Value, Type);
+
+    // Only use in tests to override CLANG_DEFAULT_CXX_STDLIB!
+    if (Value == "platform")
+      ForcePlatformDefault = true;
+    else if (!HasValidType)
+      getDriver().Diag(diag::err_drv_invalid_stdlib_name)
+        << A->getAsString(Args);
   }
 
-  return ToolChain::CST_Libstdcxx;
+  if (!HasValidType && (ForcePlatformDefault ||
+      !ParseCXXStdlibType(CLANG_DEFAULT_CXX_STDLIB, Type)))
+    Type = GetDefaultCXXStdlibType();
+
+  return Type;
 }
 
 /// \brief Utility function to add a system include directory to CC1 arguments.
@@ -666,3 +695,6 @@ SanitizerMask ToolChain::getSupportedSanitizers() const {
 
 void ToolChain::AddCudaIncludeArgs(const ArgList &DriverArgs,
                                    ArgStringList &CC1Args) const {}
+
+void ToolChain::AddIAMCUIncludeArgs(const ArgList &DriverArgs,
+                                    ArgStringList &CC1Args) const {}
diff --git a/contrib/llvm/tools/clang/lib/Driver/ToolChains.cpp b/contrib/llvm/tools/clang/lib/Driver/ToolChains.cpp
index 99c7b8e68c61..1b02f467c141 100644
--- a/contrib/llvm/tools/clang/lib/Driver/ToolChains.cpp
+++ b/contrib/llvm/tools/clang/lib/Driver/ToolChains.cpp
@@ -8,6 +8,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "ToolChains.h"
+#include "clang/Basic/Cuda.h"
 #include "clang/Basic/ObjCRuntime.h"
 #include "clang/Basic/Version.h"
 #include "clang/Basic/VirtualFileSystem.h"
@@ -65,6 +66,16 @@ types::ID MachO::LookupTypeForExtension(const char *Ext) const {
 
 bool MachO::HasNativeLLVMSupport() const { return true; }
 
+ToolChain::CXXStdlibType Darwin::GetDefaultCXXStdlibType() const {
+  // Default to use libc++ on OS X 10.9+ and iOS 7+.
+  if ((isTargetMacOS() && !isMacosxVersionLT(10, 9)) ||
+       (isTargetIOSBased() && !isIPhoneOSVersionLT(7, 0)) ||
+       isTargetWatchOSBased())
+    return ToolChain::CST_Libcxx;
+
+  return ToolChain::CST_Libstdcxx;
+}
+
 /// Darwin provides an ARC runtime starting in MacOS X 10.7 and iOS 5.0.
 ObjCRuntime Darwin::getDefaultObjCRuntime(bool isNonFragile) const {
   if (isTargetWatchOSBased())
@@ -319,64 +330,74 @@ void MachO::AddLinkRuntimeLib(const ArgList &Args, ArgStringList &CmdArgs,
   }
 }
 
+StringRef Darwin::getPlatformFamily() const {
+  switch (TargetPlatform) {
+    case DarwinPlatformKind::MacOS:
+      return "MacOSX";
+    case DarwinPlatformKind::IPhoneOS:
+    case DarwinPlatformKind::IPhoneOSSimulator:
+      return "iPhone";
+    case DarwinPlatformKind::TvOS:
+    case DarwinPlatformKind::TvOSSimulator:
+      return "AppleTV";
+    case DarwinPlatformKind::WatchOS:
+    case DarwinPlatformKind::WatchOSSimulator:
+      return "Watch";
+  }
+  llvm_unreachable("Unsupported platform");
+}
+
+StringRef Darwin::getSDKName(StringRef isysroot) {
+  // Assume SDK has path: SOME_PATH/SDKs/PlatformXX.YY.sdk
+  llvm::sys::path::const_iterator SDKDir;
+  auto BeginSDK = llvm::sys::path::begin(isysroot);
+  auto EndSDK = llvm::sys::path::end(isysroot);
+  for (auto IT = BeginSDK; IT != EndSDK; ++IT) {
+    StringRef SDK = *IT;
+    if (SDK.endswith(".sdk"))
+      return SDK.slice(0, SDK.size() - 4);
+  }
+  return "";
+}
+
+StringRef Darwin::getOSLibraryNameSuffix() const {
+  switch(TargetPlatform) {
+  case DarwinPlatformKind::MacOS:
+    return "osx";
+  case DarwinPlatformKind::IPhoneOS:
+    return "ios";
+  case DarwinPlatformKind::IPhoneOSSimulator:
+    return "iossim";
+  case DarwinPlatformKind::TvOS:
+    return "tvos";
+  case DarwinPlatformKind::TvOSSimulator:
+    return "tvossim";
+  case DarwinPlatformKind::WatchOS:
+    return "watchos";
+  case DarwinPlatformKind::WatchOSSimulator:
+    return "watchossim";
+  }
+  llvm_unreachable("Unsupported platform");
+}
+
 void Darwin::addProfileRTLibs(const ArgList &Args,
                               ArgStringList &CmdArgs) const {
   if (!needsProfileRT(Args)) return;
 
-  // TODO: Clean this up once autoconf is gone
-  SmallString<128> P(getDriver().ResourceDir);
-  llvm::sys::path::append(P, "lib", "darwin");
-  const char *Library = "libclang_rt.profile_osx.a";
-
-  // Select the appropriate runtime library for the target.
-  if (isTargetWatchOS()) {
-    Library = "libclang_rt.profile_watchos.a";
-  } else if (isTargetWatchOSSimulator()) {
-    llvm::sys::path::append(P, "libclang_rt.profile_watchossim.a");
-    Library = getVFS().exists(P) ? "libclang_rt.profile_watchossim.a"
-                                 : "libclang_rt.profile_watchos.a";
-  } else if (isTargetTvOS()) {
-    Library = "libclang_rt.profile_tvos.a";
-  } else if (isTargetTvOSSimulator()) {
-    llvm::sys::path::append(P, "libclang_rt.profile_tvossim.a");
-    Library = getVFS().exists(P) ? "libclang_rt.profile_tvossim.a"
-                                 : "libclang_rt.profile_tvos.a";
-  } else if (isTargetIPhoneOS()) {
-    Library = "libclang_rt.profile_ios.a";
-  } else if (isTargetIOSSimulator()) {
-    llvm::sys::path::append(P, "libclang_rt.profile_iossim.a");
-    Library = getVFS().exists(P) ? "libclang_rt.profile_iossim.a"
-                                 : "libclang_rt.profile_ios.a";
-  } else {
-    assert(isTargetMacOS() && "unexpected non MacOS platform");
-  }
-  AddLinkRuntimeLib(Args, CmdArgs, Library,
+  AddLinkRuntimeLib(Args, CmdArgs, (Twine("libclang_rt.profile_") +
+       getOSLibraryNameSuffix() + ".a").str(),
                     /*AlwaysLink*/ true);
-  return;
 }
 
 void DarwinClang::AddLinkSanitizerLibArgs(const ArgList &Args,
                                           ArgStringList &CmdArgs,
                                           StringRef Sanitizer) const {
-  if (!Args.hasArg(options::OPT_dynamiclib) &&
-      !Args.hasArg(options::OPT_bundle)) {
-    // Sanitizer runtime libraries requires C++.
-    AddCXXStdlibLibArgs(Args, CmdArgs);
-  }
-  // ASan is not supported on watchOS.
-  assert(isTargetMacOS() || isTargetIOSSimulator());
-  StringRef OS = isTargetMacOS() ? "osx" : "iossim";
   AddLinkRuntimeLib(
       Args, CmdArgs,
-      (Twine("libclang_rt.") + Sanitizer + "_" + OS + "_dynamic.dylib").str(),
+      (Twine("libclang_rt.") + Sanitizer + "_" +
+       getOSLibraryNameSuffix() + "_dynamic.dylib").str(),
       /*AlwaysLink*/ true, /*IsEmbedded*/ false,
       /*AddRPath*/ true);
-
-  if (GetCXXStdlibType(Args) == ToolChain::CST_Libcxx) {
-    // Add explicit dependcy on -lc++abi, as -lc++ doesn't re-export
-    // all RTTI-related symbols that UBSan uses.
-    CmdArgs.push_back("-lc++abi");
-  }
 }
 
 void DarwinClang::AddLinkRuntimeLibArgs(const ArgList &Args,
@@ -413,6 +434,15 @@ void DarwinClang::AddLinkRuntimeLibArgs(const ArgList &Args,
     AddLinkSanitizerLibArgs(Args, CmdArgs, "ubsan");
   if (Sanitize.needsTsanRt())
     AddLinkSanitizerLibArgs(Args, CmdArgs, "tsan");
+  if (Sanitize.needsStatsRt()) {
+    StringRef OS = isTargetMacOS() ? "osx" : "iossim";
+    AddLinkRuntimeLib(Args, CmdArgs,
+                      (Twine("libclang_rt.stats_client_") + OS + ".a").str(),
+                      /*AlwaysLink=*/true);
+    AddLinkSanitizerLibArgs(Args, CmdArgs, "stats");
+  }
+  if (Sanitize.needsEsanRt())
+    AddLinkSanitizerLibArgs(Args, CmdArgs, "esan");
 
   // Otherwise link libSystem, then the dynamic runtime library, and finally any
   // target specific static runtime library.
@@ -444,21 +474,26 @@ void DarwinClang::AddLinkRuntimeLibArgs(const ArgList &Args,
     else if (isMacosxVersionLT(10, 6))
       CmdArgs.push_back("-lgcc_s.10.5");
 
-    // For OS X, we thought we would only need a static runtime library when
-    // targeting 10.4, to provide versions of the static functions which were
-    // omitted from 10.4.dylib.
+    // Originally for OS X, we thought we would only need a static runtime
+    // library when targeting 10.4, to provide versions of the static functions
+    // which were omitted from 10.4.dylib. This led to the creation of the 10.4
+    // builtins library.
     //
     // Unfortunately, that turned out to not be true, because Darwin system
     // headers can still use eprintf on i386, and it is not exported from
     // libSystem. Therefore, we still must provide a runtime library just for
     // the tiny tiny handful of projects that *might* use that symbol.
-    if (isMacosxVersionLT(10, 5)) {
+    //
+    // Then over time, we figured out it was useful to add more things to the
+    // runtime so we created libclang_rt.osx.a to provide new functions when
+    // deploying to old OS builds, and for a long time we had both eprintf and
+    // osx builtin libraries. Which just seems excessive. So with PR 28855, we
+    // are removing the eprintf library and expecting eprintf to be provided by
+    // the OS X builtins library.
+    if (isMacosxVersionLT(10, 5))
       AddLinkRuntimeLib(Args, CmdArgs, "libclang_rt.10.4.a");
-    } else {
-      if (getTriple().getArch() == llvm::Triple::x86)
-        AddLinkRuntimeLib(Args, CmdArgs, "libclang_rt.eprintf.a");
+    else
       AddLinkRuntimeLib(Args, CmdArgs, "libclang_rt.osx.a");
-    }
   }
 }
 
@@ -529,11 +564,8 @@ void Darwin::AddDeploymentTarget(DerivedArgList &Args) const {
         TvOSTarget.empty() && Args.hasArg(options::OPT_isysroot)) {
       if (const Arg *A = Args.getLastArg(options::OPT_isysroot)) {
         StringRef isysroot = A->getValue();
-        // Assume SDK has path: SOME_PATH/SDKs/PlatformXX.YY.sdk
-        size_t BeginSDK = isysroot.rfind("SDKs/");
-        size_t EndSDK = isysroot.rfind(".sdk");
-        if (BeginSDK != StringRef::npos && EndSDK != StringRef::npos) {
-          StringRef SDK = isysroot.slice(BeginSDK + 5, EndSDK);
+        StringRef SDK = getSDKName(isysroot);
+        if (SDK.size() > 0) {
           // Slice the version number out.
           // Version number is between the first and the last number.
           size_t StartVer = SDK.find_first_of("0123456789");
@@ -656,13 +688,13 @@ void Darwin::AddDeploymentTarget(DerivedArgList &Args) const {
     assert(iOSVersion && "Unknown target platform!");
     if (!Driver::GetReleaseVersion(iOSVersion->getValue(), Major, Minor, Micro,
                                    HadExtra) ||
-        HadExtra || Major >= 10 || Minor >= 100 || Micro >= 100)
+        HadExtra || Major >= 100 || Minor >= 100 || Micro >= 100)
       getDriver().Diag(diag::err_drv_invalid_version_number)
           << iOSVersion->getAsString(Args);
   } else if (Platform == TvOS) {
     if (!Driver::GetReleaseVersion(TvOSVersion->getValue(), Major, Minor,
                                    Micro, HadExtra) || HadExtra ||
-        Major >= 10 || Minor >= 100 || Micro >= 100)
+        Major >= 100 || Minor >= 100 || Micro >= 100)
       getDriver().Diag(diag::err_drv_invalid_version_number)
           << TvOSVersion->getAsString(Args);
   } else if (Platform == WatchOS) {
@@ -686,6 +718,17 @@ void Darwin::AddDeploymentTarget(DerivedArgList &Args) const {
     Platform = WatchOSSimulator;
 
   setTarget(Platform, Major, Minor, Micro);
+
+  if (const Arg *A = Args.getLastArg(options::OPT_isysroot)) {
+    StringRef SDK = getSDKName(A->getValue());
+    if (SDK.size() > 0) {
+      size_t StartVer = SDK.find_first_of("0123456789");
+      StringRef SDKName = SDK.slice(0, StartVer);
+      if (!SDKName.startswith(getPlatformFamily()))
+        getDriver().Diag(diag::warn_incompatible_sysroot)
+            << SDKName << getPlatformFamily();
+    }
+  }
 }
 
 void DarwinClang::AddCXXStdlibLibArgs(const ArgList &Args,
@@ -735,7 +778,6 @@ void DarwinClang::AddCXXStdlibLibArgs(const ArgList &Args,
 
 void DarwinClang::AddCCKextLibArgs(const ArgList &Args,
                                    ArgStringList &CmdArgs) const {
-
   // For Darwin platforms, use the compiler-rt-based support library
   // instead of the gcc-provided one (which is also incidentally
   // only present in the gcc lib dir, which makes it hard to find).
@@ -1025,11 +1067,8 @@ DerivedArgList *Darwin::TranslateArgs(const DerivedArgList &Args,
     }
   }
 
-  // Default to use libc++ on OS X 10.9+ and iOS 7+.
-  if (((isTargetMacOS() && !isMacosxVersionLT(10, 9)) ||
-       (isTargetIOSBased() && !isIPhoneOSVersionLT(7, 0)) ||
-       isTargetWatchOSBased()) &&
-      !Args.getLastArg(options::OPT_stdlib_EQ))
+  if (!Args.getLastArg(options::OPT_stdlib_EQ) &&
+      GetCXXStdlibType(Args) == ToolChain::CST_Libcxx)
     DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_stdlib_EQ),
                       "libc++");
 
@@ -1068,7 +1107,15 @@ bool Darwin::UseSjLjExceptions(const ArgList &Args) const {
     return false;
 
   // Only watchOS uses the new DWARF/Compact unwinding method.
-  return !isTargetWatchOS();
+  llvm::Triple Triple(ComputeLLVMTriple(Args));
+  return !Triple.isWatchABI();
+}
+
+bool Darwin::SupportsEmbeddedBitcode() const {
+  assert(TargetInitialized && "Target not initialized!");
+  if (isTargetIPhoneOS() && isIPhoneOSVersionLT(6, 0))
+    return false;
+  return true;
 }
 
 bool MachO::isPICDefault() const { return true; }
@@ -1212,14 +1259,18 @@ void Darwin::CheckObjCARC() const {
 }
 
 SanitizerMask Darwin::getSupportedSanitizers() const {
+  const bool IsX86_64 = getTriple().getArch() == llvm::Triple::x86_64;
   SanitizerMask Res = ToolChain::getSupportedSanitizers();
-  if (isTargetMacOS() || isTargetIOSSimulator())
-    Res |= SanitizerKind::Address;
+  Res |= SanitizerKind::Address;
   if (isTargetMacOS()) {
     if (!isMacosxVersionLT(10, 9))
       Res |= SanitizerKind::Vptr;
     Res |= SanitizerKind::SafeStack;
-    Res |= SanitizerKind::Thread;
+    if (IsX86_64)
+      Res |= SanitizerKind::Thread;
+  } else if (isTargetIOSSimulator() || isTargetTvOSSimulator()) {
+    if (IsX86_64)
+      Res |= SanitizerKind::Thread;
   }
   return Res;
 }
@@ -1241,6 +1292,8 @@ Generic_GCC::GCCVersion Linux::GCCVersion::Parse(StringRef VersionText) {
   if (First.first.getAsInteger(10, GoodVersion.Major) || GoodVersion.Major < 0)
     return BadVersion;
   GoodVersion.MajorStr = First.first.str();
+  if (First.second.empty())
+    return GoodVersion;
   if (Second.first.getAsInteger(10, GoodVersion.Minor) || GoodVersion.Minor < 0)
     return BadVersion;
   GoodVersion.MinorStr = Second.first.str();
@@ -1248,6 +1301,7 @@ Generic_GCC::GCCVersion Linux::GCCVersion::Parse(StringRef VersionText) {
   // First look for a number prefix and parse that if present. Otherwise just
   // stash the entire patch string in the suffix, and leave the number
   // unspecified. This covers versions strings such as:
+  //   5        (handled above)
   //   4.4
   //   4.4.0
   //   4.4.x
@@ -1353,9 +1407,17 @@ void Generic_GCC::GCCInstallationDetector::init(
     // Then look for gcc installed alongside clang.
     Prefixes.push_back(D.InstalledDir + "/..");
 
-    // And finally in /usr.
-    if (D.SysRoot.empty())
+    // Then look for distribution supplied gcc installations.
+    if (D.SysRoot.empty()) {
+      // Look for RHEL devtoolsets.
+      Prefixes.push_back("/opt/rh/devtoolset-4/root/usr");
+      Prefixes.push_back("/opt/rh/devtoolset-3/root/usr");
+      Prefixes.push_back("/opt/rh/devtoolset-2/root/usr");
+      Prefixes.push_back("/opt/rh/devtoolset-1.1/root/usr");
+      Prefixes.push_back("/opt/rh/devtoolset-1.0/root/usr");
+      // And finally in /usr.
       Prefixes.push_back("/usr");
+    }
   }
 
   // Loop over the various components which exist and select the best GCC
@@ -1553,9 +1615,13 @@ bool Generic_GCC::GCCInstallationDetector::getBiarchSibling(Multilib &M) const {
     break;
   case llvm::Triple::x86:
     LibDirs.append(begin(X86LibDirs), end(X86LibDirs));
-    TripleAliases.append(begin(X86Triples), end(X86Triples));
-    BiarchLibDirs.append(begin(X86_64LibDirs), end(X86_64LibDirs));
-    BiarchTripleAliases.append(begin(X86_64Triples), end(X86_64Triples));
+    // MCU toolchain is 32 bit only and its triple alias is TargetTriple
+    // itself, which will be appended below.
+    if (!TargetTriple.isOSIAMCU()) {
+      TripleAliases.append(begin(X86Triples), end(X86Triples));
+      BiarchLibDirs.append(begin(X86_64LibDirs), end(X86_64LibDirs));
+      BiarchTripleAliases.append(begin(X86_64Triples), end(X86_64Triples));
+    }
     break;
   case llvm::Triple::mips:
     LibDirs.append(begin(MIPSLibDirs), end(MIPSLibDirs));
@@ -1631,9 +1697,33 @@ bool Generic_GCC::GCCInstallationDetector::getBiarchSibling(Multilib &M) const {
     BiarchTripleAliases.push_back(BiarchTriple.str());
 }
 
+// Parses the contents of version.txt in an CUDA installation.  It should
+// contain one line of the from e.g. "CUDA Version 7.5.2".
+static CudaVersion ParseCudaVersionFile(llvm::StringRef V) {
+  if (!V.startswith("CUDA Version "))
+    return CudaVersion::UNKNOWN;
+  V = V.substr(strlen("CUDA Version "));
+  int Major = -1, Minor = -1;
+  auto First = V.split('.');
+  auto Second = First.second.split('.');
+  if (!First.first.getAsInteger(10, Major) ||
+      !Second.first.getAsInteger(10, Minor))
+    return CudaVersion::UNKNOWN;
+
+  if (Major == 7 && Minor == 0) {
+    // This doesn't appear to ever happen -- version.txt doesn't exist in the
+    // CUDA 7 installs I've seen.  But no harm in checking.
+    return CudaVersion::CUDA_70;
+  }
+  if (Major == 7 && Minor == 5)
+    return CudaVersion::CUDA_75;
+  if (Major == 8 && Minor == 0)
+    return CudaVersion::CUDA_80;
+  return CudaVersion::UNKNOWN;
+}
+
 // \brief -- try common CUDA installation paths looking for files we need for
 // CUDA compilation.
-
 void Generic_GCC::CudaInstallationDetector::init(
     const llvm::Triple &TargetTriple, const llvm::opt::ArgList &Args) {
   SmallVector<std::string, 4> CudaPathCandidates;
@@ -1643,6 +1733,8 @@ void Generic_GCC::CudaInstallationDetector::init(
         Args.getLastArgValue(options::OPT_cuda_path_EQ));
   else {
     CudaPathCandidates.push_back(D.SysRoot + "/usr/local/cuda");
+    // FIXME: Uncomment this once we can compile the cuda 8 headers.
+    // CudaPathCandidates.push_back(D.SysRoot + "/usr/local/cuda-8.0");
     CudaPathCandidates.push_back(D.SysRoot + "/usr/local/cuda-7.5");
     CudaPathCandidates.push_back(D.SysRoot + "/usr/local/cuda-7.0");
   }
@@ -1651,19 +1743,19 @@ void Generic_GCC::CudaInstallationDetector::init(
     if (CudaPath.empty() || !D.getVFS().exists(CudaPath))
       continue;
 
-    CudaInstallPath = CudaPath;
-    CudaIncludePath = CudaInstallPath + "/include";
-    CudaLibDevicePath = CudaInstallPath + "/nvvm/libdevice";
-    CudaLibPath =
-        CudaInstallPath + (TargetTriple.isArch64Bit() ? "/lib64" : "/lib");
+    InstallPath = CudaPath;
+    BinPath = CudaPath + "/bin";
+    IncludePath = InstallPath + "/include";
+    LibDevicePath = InstallPath + "/nvvm/libdevice";
+    LibPath = InstallPath + (TargetTriple.isArch64Bit() ? "/lib64" : "/lib");
 
-    if (!(D.getVFS().exists(CudaIncludePath) &&
-          D.getVFS().exists(CudaLibPath) &&
-          D.getVFS().exists(CudaLibDevicePath)))
+    auto &FS = D.getVFS();
+    if (!(FS.exists(IncludePath) && FS.exists(BinPath) && FS.exists(LibPath) &&
+          FS.exists(LibDevicePath)))
       continue;
 
     std::error_code EC;
-    for (llvm::sys::fs::directory_iterator LI(CudaLibDevicePath, EC), LE;
+    for (llvm::sys::fs::directory_iterator LI(LibDevicePath, EC), LE;
          !EC && LI != LE; LI = LI.increment(EC)) {
       StringRef FilePath = LI->path();
       StringRef FileName = llvm::sys::path::filename(FilePath);
@@ -1673,41 +1765,74 @@ void Generic_GCC::CudaInstallationDetector::init(
         continue;
       StringRef GpuArch = FileName.slice(
           LibDeviceName.size(), FileName.find('.', LibDeviceName.size()));
-      CudaLibDeviceMap[GpuArch] = FilePath.str();
+      LibDeviceMap[GpuArch] = FilePath.str();
       // Insert map entries for specifc devices with this compute capability.
       if (GpuArch == "compute_20") {
-        CudaLibDeviceMap["sm_20"] = FilePath;
-        CudaLibDeviceMap["sm_21"] = FilePath;
+        LibDeviceMap["sm_20"] = FilePath;
+        LibDeviceMap["sm_21"] = FilePath;
       } else if (GpuArch == "compute_30") {
-        CudaLibDeviceMap["sm_30"] = FilePath;
-        CudaLibDeviceMap["sm_32"] = FilePath;
+        LibDeviceMap["sm_30"] = FilePath;
+        LibDeviceMap["sm_32"] = FilePath;
       } else if (GpuArch == "compute_35") {
-        CudaLibDeviceMap["sm_35"] = FilePath;
-        CudaLibDeviceMap["sm_37"] = FilePath;
+        LibDeviceMap["sm_35"] = FilePath;
+        LibDeviceMap["sm_37"] = FilePath;
+      } else if (GpuArch == "compute_50") {
+        LibDeviceMap["sm_50"] = FilePath;
+        LibDeviceMap["sm_52"] = FilePath;
+        LibDeviceMap["sm_53"] = FilePath;
+        LibDeviceMap["sm_60"] = FilePath;
+        LibDeviceMap["sm_61"] = FilePath;
+        LibDeviceMap["sm_62"] = FilePath;
       }
     }
 
+    llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> VersionFile =
+        FS.getBufferForFile(InstallPath + "/version.txt");
+    if (!VersionFile) {
+      // CUDA 7.0 doesn't have a version.txt, so guess that's our version if
+      // version.txt isn't present.
+      Version = CudaVersion::CUDA_70;
+    } else {
+      Version = ParseCudaVersionFile((*VersionFile)->getBuffer());
+    }
+
     IsValid = true;
     break;
   }
 }
 
+void Generic_GCC::CudaInstallationDetector::CheckCudaVersionSupportsArch(
+    CudaArch Arch) const {
+  if (Arch == CudaArch::UNKNOWN || Version == CudaVersion::UNKNOWN ||
+      ArchsWithVersionTooLowErrors.count(Arch) > 0)
+    return;
+
+  auto RequiredVersion = MinVersionForCudaArch(Arch);
+  if (Version < RequiredVersion) {
+    ArchsWithVersionTooLowErrors.insert(Arch);
+    D.Diag(diag::err_drv_cuda_version_too_low)
+        << InstallPath << CudaArchToString(Arch) << CudaVersionToString(Version)
+        << CudaVersionToString(RequiredVersion);
+  }
+}
+
 void Generic_GCC::CudaInstallationDetector::print(raw_ostream &OS) const {
   if (isValid())
-    OS << "Found CUDA installation: " << CudaInstallPath << "\n";
+    OS << "Found CUDA installation: " << InstallPath << ", version "
+       << CudaVersionToString(Version) << "\n";
 }
 
 namespace {
 // Filter to remove Multilibs that don't exist as a suffix to Path
 class FilterNonExistent {
-  StringRef Base;
+  StringRef Base, File;
   vfs::FileSystem &VFS;
 
 public:
-  FilterNonExistent(StringRef Base, vfs::FileSystem &VFS)
-      : Base(Base), VFS(VFS) {}
+  FilterNonExistent(StringRef Base, StringRef File, vfs::FileSystem &VFS)
+      : Base(Base), File(File), VFS(VFS) {}
   bool operator()(const Multilib &M) {
-    return !VFS.exists(Base + M.gccSuffix() + "/crtbegin.o");
+    return !VFS.exists(Base + M.gccSuffix() + File);
   }
 };
 } // end anonymous namespace
@@ -1720,6 +1845,10 @@ static void addMultilibFlag(bool Enabled, const char *const Flag,
     Flags.push_back(std::string("-") + Flag);
 }
 
+static bool isArmOrThumbArch(llvm::Triple::ArchType Arch) {
+  return Arch == llvm::Triple::arm || Arch == llvm::Triple::thumb;
+}
+
 static bool isMipsArch(llvm::Triple::ArchType Arch) {
   return Arch == llvm::Triple::mips || Arch == llvm::Triple::mipsel ||
          Arch == llvm::Triple::mips64 || Arch == llvm::Triple::mips64el;
@@ -1765,38 +1894,151 @@ static Multilib makeMultilib(StringRef commonSuffix) {
   return Multilib(commonSuffix, commonSuffix, commonSuffix);
 }
 
-static bool findMIPSMultilibs(const Driver &D, const llvm::Triple &TargetTriple,
-                              StringRef Path, const ArgList &Args,
-                              DetectedMultilibs &Result) {
-  // Some MIPS toolchains put libraries and object files compiled
-  // using different options in to the sub-directoris which names
-  // reflects the flags used for compilation. For example sysroot
-  // directory might looks like the following examples:
-  //
-  // /usr
-  //   /lib      <= crt*.o files compiled with '-mips32'
-  // /mips16
-  //   /usr
-  //     /lib    <= crt*.o files compiled with '-mips16'
-  //   /el
-  //     /usr
-  //       /lib  <= crt*.o files compiled with '-mips16 -EL'
-  //
-  // or
-  //
-  // /usr
-  //   /lib      <= crt*.o files compiled with '-mips32r2'
-  // /mips16
-  //   /usr
-  //     /lib    <= crt*.o files compiled with '-mips32r2 -mips16'
-  // /mips32
-  //     /usr
-  //       /lib  <= crt*.o files compiled with '-mips32'
-
-  FilterNonExistent NonExistent(Path, D.getVFS());
-
-  // Check for FSF toolchain multilibs
-  MultilibSet FSFMipsMultilibs;
+static bool findMipsCsMultilibs(const Multilib::flags_list &Flags,
+                                FilterNonExistent &NonExistent,
+                                DetectedMultilibs &Result) {
+  // Check for Code Sourcery toolchain multilibs
+  MultilibSet CSMipsMultilibs;
+  {
+    auto MArchMips16 = makeMultilib("/mips16").flag("+m32").flag("+mips16");
+
+    auto MArchMicroMips =
+        makeMultilib("/micromips").flag("+m32").flag("+mmicromips");
+
+    auto MArchDefault = makeMultilib("").flag("-mips16").flag("-mmicromips");
+
+    auto UCLibc = makeMultilib("/uclibc").flag("+muclibc");
+
+    auto SoftFloat = makeMultilib("/soft-float").flag("+msoft-float");
+
+    auto Nan2008 = makeMultilib("/nan2008").flag("+mnan=2008");
+
+    auto DefaultFloat =
+        makeMultilib("").flag("-msoft-float").flag("-mnan=2008");
+
+    auto BigEndian = makeMultilib("").flag("+EB").flag("-EL");
+
+    auto LittleEndian = makeMultilib("/el").flag("+EL").flag("-EB");
+
+    // Note that this one's osSuffix is ""
+    auto MAbi64 = makeMultilib("")
+                      .gccSuffix("/64")
+                      .includeSuffix("/64")
+                      .flag("+mabi=n64")
+                      .flag("-mabi=n32")
+                      .flag("-m32");
+
+    CSMipsMultilibs =
+        MultilibSet()
+            .Either(MArchMips16, MArchMicroMips, MArchDefault)
+            .Maybe(UCLibc)
+            .Either(SoftFloat, Nan2008, DefaultFloat)
+            .FilterOut("/micromips/nan2008")
+            .FilterOut("/mips16/nan2008")
+            .Either(BigEndian, LittleEndian)
+            .Maybe(MAbi64)
+            .FilterOut("/mips16.*/64")
+            .FilterOut("/micromips.*/64")
+            .FilterOut(NonExistent)
+            .setIncludeDirsCallback([](const Multilib &M) {
+              std::vector<std::string> Dirs({"/include"});
+              if (StringRef(M.includeSuffix()).startswith("/uclibc"))
+                Dirs.push_back(
+                    "/../../../../mips-linux-gnu/libc/uclibc/usr/include");
+              else
+                Dirs.push_back("/../../../../mips-linux-gnu/libc/usr/include");
+              return Dirs;
+            });
+  }
+
+  MultilibSet DebianMipsMultilibs;
+  {
+    Multilib MAbiN32 =
+        Multilib().gccSuffix("/n32").includeSuffix("/n32").flag("+mabi=n32");
+
+    Multilib M64 = Multilib()
+                       .gccSuffix("/64")
+                       .includeSuffix("/64")
+                       .flag("+m64")
+                       .flag("-m32")
+                       .flag("-mabi=n32");
+
+    Multilib M32 = Multilib().flag("-m64").flag("+m32").flag("-mabi=n32");
+
+    DebianMipsMultilibs =
+        MultilibSet().Either(M32, M64, MAbiN32).FilterOut(NonExistent);
+  }
+
+  // Sort candidates. Toolchain that best meets the directories tree goes first.
+  // Then select the first toolchains matches command line flags.
+  MultilibSet *Candidates[] = {&CSMipsMultilibs, &DebianMipsMultilibs};
+  if (CSMipsMultilibs.size() < DebianMipsMultilibs.size())
+    std::iter_swap(Candidates, Candidates + 1);
+  for (const MultilibSet *Candidate : Candidates) {
+    if (Candidate->select(Flags, Result.SelectedMultilib)) {
+      if (Candidate == &DebianMipsMultilibs)
+        Result.BiarchSibling = Multilib();
+      Result.Multilibs = *Candidate;
+      return true;
+    }
+  }
+  return false;
+}
+
+static bool findMipsAndroidMultilibs(const Multilib::flags_list &Flags,
+                                     FilterNonExistent &NonExistent,
+                                     DetectedMultilibs &Result) {
+
+  MultilibSet AndroidMipsMultilibs =
+      MultilibSet()
+          .Maybe(Multilib("/mips-r2").flag("+march=mips32r2"))
+          .Maybe(Multilib("/mips-r6").flag("+march=mips32r6"))
+          .FilterOut(NonExistent);
+
+  if (AndroidMipsMultilibs.select(Flags, Result.SelectedMultilib)) {
+    Result.Multilibs = AndroidMipsMultilibs;
+    return true;
+  }
+  return false;
+}
+
+static bool findMipsMuslMultilibs(const Multilib::flags_list &Flags,
+                                  FilterNonExistent &NonExistent,
+                                  DetectedMultilibs &Result) {
+  // Musl toolchain multilibs
+  MultilibSet MuslMipsMultilibs;
+  {
+    auto MArchMipsR2 = makeMultilib("")
+                           .osSuffix("/mips-r2-hard-musl")
+                           .flag("+EB")
+                           .flag("-EL")
+                           .flag("+march=mips32r2");
+
+    auto MArchMipselR2 = makeMultilib("/mipsel-r2-hard-musl")
+                             .flag("-EB")
+                             .flag("+EL")
+                             .flag("+march=mips32r2");
+
+    MuslMipsMultilibs = MultilibSet().Either(MArchMipsR2, MArchMipselR2);
+
+    // Specify the callback that computes the include directories.
+    MuslMipsMultilibs.setIncludeDirsCallback([](const Multilib &M) {
+      return std::vector<std::string>(
+          {"/../sysroot" + M.osSuffix() + "/usr/include"});
+    });
+  }
+  if (MuslMipsMultilibs.select(Flags, Result.SelectedMultilib)) {
+    Result.Multilibs = MuslMipsMultilibs;
+    return true;
+  }
+  return false;
+}
+
+static bool findMipsMtiMultilibs(const Multilib::flags_list &Flags,
+                                 FilterNonExistent &NonExistent,
+                                 DetectedMultilibs &Result) {
+  // CodeScape MTI toolchain v1.2 and early.
+  MultilibSet MtiMipsMultilibsV1;
   {
     auto MArchMips32 = makeMultilib("/mips32")
                            .flag("+m32")
@@ -1838,7 +2080,7 @@ static bool findMIPSMultilibs(const Driver &D, const llvm::Triple &TargetTriple,
 
     auto Nan2008 = makeMultilib("/nan2008").flag("+mnan=2008");
 
-    FSFMipsMultilibs =
+    MtiMipsMultilibsV1 =
         MultilibSet()
             .Either(MArchMips32, MArchMicroMips, MArchMips64r2, MArchMips64,
                     MArchDefault)
@@ -1857,128 +2099,118 @@ static bool findMIPSMultilibs(const Driver &D, const llvm::Triple &TargetTriple,
             .Maybe(Nan2008)
             .FilterOut(".*sof/nan2008")
             .FilterOut(NonExistent)
-            .setIncludeDirsCallback([](StringRef InstallDir,
-                                       StringRef TripleStr, const Multilib &M) {
-              std::vector<std::string> Dirs;
-              Dirs.push_back((InstallDir + "/include").str());
-              std::string SysRootInc =
-                  InstallDir.str() + "/../../../../sysroot";
+            .setIncludeDirsCallback([](const Multilib &M) {
+              std::vector<std::string> Dirs({"/include"});
               if (StringRef(M.includeSuffix()).startswith("/uclibc"))
-                Dirs.push_back(SysRootInc + "/uclibc/usr/include");
+                Dirs.push_back("/../../../../sysroot/uclibc/usr/include");
               else
-                Dirs.push_back(SysRootInc + "/usr/include");
+                Dirs.push_back("/../../../../sysroot/usr/include");
               return Dirs;
             });
   }
 
-  // Check for Musl toolchain multilibs
-  MultilibSet MuslMipsMultilibs;
+  // CodeScape IMG toolchain starting from v1.3.
+  MultilibSet MtiMipsMultilibsV2;
   {
-    auto MArchMipsR2 = makeMultilib("")
-                           .osSuffix("/mips-r2-hard-musl")
-                           .flag("+EB")
-                           .flag("-EL")
-                           .flag("+march=mips32r2");
-
-    auto MArchMipselR2 = makeMultilib("/mipsel-r2-hard-musl")
-                             .flag("-EB")
-                             .flag("+EL")
-                             .flag("+march=mips32r2");
-
-    MuslMipsMultilibs = MultilibSet().Either(MArchMipsR2, MArchMipselR2);
-
-    // Specify the callback that computes the include directories.
-    MuslMipsMultilibs.setIncludeDirsCallback([](
-        StringRef InstallDir, StringRef TripleStr, const Multilib &M) {
-      std::vector<std::string> Dirs;
-      Dirs.push_back(
-          (InstallDir + "/../sysroot" + M.osSuffix() + "/usr/include").str());
-      return Dirs;
-    });
-  }
-
-  // Check for Code Sourcery toolchain multilibs
-  MultilibSet CSMipsMultilibs;
-  {
-    auto MArchMips16 = makeMultilib("/mips16").flag("+m32").flag("+mips16");
-
-    auto MArchMicroMips =
-        makeMultilib("/micromips").flag("+m32").flag("+mmicromips");
-
-    auto MArchDefault = makeMultilib("").flag("-mips16").flag("-mmicromips");
-
-    auto UCLibc = makeMultilib("/uclibc").flag("+muclibc");
-
-    auto SoftFloat = makeMultilib("/soft-float").flag("+msoft-float");
-
-    auto Nan2008 = makeMultilib("/nan2008").flag("+mnan=2008");
-
-    auto DefaultFloat =
-        makeMultilib("").flag("-msoft-float").flag("-mnan=2008");
-
-    auto BigEndian = makeMultilib("").flag("+EB").flag("-EL");
-
-    auto LittleEndian = makeMultilib("/el").flag("+EL").flag("-EB");
-
-    // Note that this one's osSuffix is ""
-    auto MAbi64 = makeMultilib("")
-                      .gccSuffix("/64")
-                      .includeSuffix("/64")
-                      .flag("+mabi=n64")
-                      .flag("-mabi=n32")
-                      .flag("-m32");
-
-    CSMipsMultilibs =
+    auto BeHard = makeMultilib("/mips-r2-hard")
+                      .flag("+EB")
+                      .flag("-msoft-float")
+                      .flag("-mnan=2008")
+                      .flag("-muclibc");
+    auto BeSoft = makeMultilib("/mips-r2-soft")
+                      .flag("+EB")
+                      .flag("+msoft-float")
+                      .flag("-mnan=2008");
+    auto ElHard = makeMultilib("/mipsel-r2-hard")
+                      .flag("+EL")
+                      .flag("-msoft-float")
+                      .flag("-mnan=2008")
+                      .flag("-muclibc");
+    auto ElSoft = makeMultilib("/mipsel-r2-soft")
+                      .flag("+EL")
+                      .flag("+msoft-float")
+                      .flag("-mnan=2008")
+                      .flag("-mmicromips");
+    auto BeHardNan = makeMultilib("/mips-r2-hard-nan2008")
+                         .flag("+EB")
+                         .flag("-msoft-float")
+                         .flag("+mnan=2008")
+                         .flag("-muclibc");
+    auto ElHardNan = makeMultilib("/mipsel-r2-hard-nan2008")
+                         .flag("+EL")
+                         .flag("-msoft-float")
+                         .flag("+mnan=2008")
+                         .flag("-muclibc")
+                         .flag("-mmicromips");
+    auto BeHardNanUclibc = makeMultilib("/mips-r2-hard-nan2008-uclibc")
+                               .flag("+EB")
+                               .flag("-msoft-float")
+                               .flag("+mnan=2008")
+                               .flag("+muclibc");
+    auto ElHardNanUclibc = makeMultilib("/mipsel-r2-hard-nan2008-uclibc")
+                               .flag("+EL")
+                               .flag("-msoft-float")
+                               .flag("+mnan=2008")
+                               .flag("+muclibc");
+    auto BeHardUclibc = makeMultilib("/mips-r2-hard-uclibc")
+                            .flag("+EB")
+                            .flag("-msoft-float")
+                            .flag("-mnan=2008")
+                            .flag("+muclibc");
+    auto ElHardUclibc = makeMultilib("/mipsel-r2-hard-uclibc")
+                            .flag("+EL")
+                            .flag("-msoft-float")
+                            .flag("-mnan=2008")
+                            .flag("+muclibc");
+    auto ElMicroHardNan = makeMultilib("/micromipsel-r2-hard-nan2008")
+                              .flag("+EL")
+                              .flag("-msoft-float")
+                              .flag("+mnan=2008")
+                              .flag("+mmicromips");
+    auto ElMicroSoft = makeMultilib("/micromipsel-r2-soft")
+                           .flag("+EL")
+                           .flag("+msoft-float")
+                           .flag("-mnan=2008")
+                           .flag("+mmicromips");
+
+    auto O32 =
+        makeMultilib("/lib").osSuffix("").flag("-mabi=n32").flag("-mabi=n64");
+    auto N32 =
+        makeMultilib("/lib32").osSuffix("").flag("+mabi=n32").flag("-mabi=n64");
+    auto N64 =
+        makeMultilib("/lib64").osSuffix("").flag("-mabi=n32").flag("+mabi=n64");
+
+    MtiMipsMultilibsV2 =
         MultilibSet()
-            .Either(MArchMips16, MArchMicroMips, MArchDefault)
-            .Maybe(UCLibc)
-            .Either(SoftFloat, Nan2008, DefaultFloat)
-            .FilterOut("/micromips/nan2008")
-            .FilterOut("/mips16/nan2008")
-            .Either(BigEndian, LittleEndian)
-            .Maybe(MAbi64)
-            .FilterOut("/mips16.*/64")
-            .FilterOut("/micromips.*/64")
+            .Either({BeHard, BeSoft, ElHard, ElSoft, BeHardNan, ElHardNan,
+                     BeHardNanUclibc, ElHardNanUclibc, BeHardUclibc,
+                     ElHardUclibc, ElMicroHardNan, ElMicroSoft})
+            .Either(O32, N32, N64)
             .FilterOut(NonExistent)
-            .setIncludeDirsCallback([](StringRef InstallDir,
-                                       StringRef TripleStr, const Multilib &M) {
-              std::vector<std::string> Dirs;
-              Dirs.push_back((InstallDir + "/include").str());
-              std::string SysRootInc =
-                  InstallDir.str() + "/../../../../" + TripleStr.str();
-              if (StringRef(M.includeSuffix()).startswith("/uclibc"))
-                Dirs.push_back(SysRootInc + "/libc/uclibc/usr/include");
-              else
-                Dirs.push_back(SysRootInc + "/libc/usr/include");
-              return Dirs;
+            .setIncludeDirsCallback([](const Multilib &M) {
+              return std::vector<std::string>({"/../../../../sysroot" +
+                                               M.includeSuffix() +
+                                               "/../usr/include"});
+            })
+            .setFilePathsCallback([](const Multilib &M) {
+              return std::vector<std::string>(
+                  {"/../../../../mips-mti-linux-gnu/lib" + M.gccSuffix()});
             });
   }
-
-  MultilibSet AndroidMipsMultilibs =
-      MultilibSet()
-          .Maybe(Multilib("/mips-r2").flag("+march=mips32r2"))
-          .Maybe(Multilib("/mips-r6").flag("+march=mips32r6"))
-          .FilterOut(NonExistent);
-
-  MultilibSet DebianMipsMultilibs;
-  {
-    Multilib MAbiN32 =
-        Multilib().gccSuffix("/n32").includeSuffix("/n32").flag("+mabi=n32");
-
-    Multilib M64 = Multilib()
-                       .gccSuffix("/64")
-                       .includeSuffix("/64")
-                       .flag("+m64")
-                       .flag("-m32")
-                       .flag("-mabi=n32");
-
-    Multilib M32 = Multilib().flag("-m64").flag("+m32").flag("-mabi=n32");
-
-    DebianMipsMultilibs =
-        MultilibSet().Either(M32, M64, MAbiN32).FilterOut(NonExistent);
+  for (auto Candidate : {&MtiMipsMultilibsV1, &MtiMipsMultilibsV2}) {
+    if (Candidate->select(Flags, Result.SelectedMultilib)) {
+      Result.Multilibs = *Candidate;
+      return true;
+    }
   }
+  return false;
+}
 
-  MultilibSet ImgMultilibs;
+static bool findMipsImgMultilibs(const Multilib::flags_list &Flags,
+                                 FilterNonExistent &NonExistent,
+                                 DetectedMultilibs &Result) {
+  // CodeScape IMG toolchain v1.2 and early.
+  MultilibSet ImgMultilibsV1;
   {
     auto Mips64r6 = makeMultilib("/mips64r6").flag("+m64").flag("-m32");
 
@@ -1987,22 +2219,91 @@ static bool findMIPSMultilibs(const Driver &D, const llvm::Triple &TargetTriple,
     auto MAbi64 =
         makeMultilib("/64").flag("+mabi=n64").flag("-mabi=n32").flag("-m32");
 
-    ImgMultilibs =
+    ImgMultilibsV1 =
         MultilibSet()
             .Maybe(Mips64r6)
             .Maybe(MAbi64)
             .Maybe(LittleEndian)
             .FilterOut(NonExistent)
-            .setIncludeDirsCallback([](StringRef InstallDir,
-                                       StringRef TripleStr, const Multilib &M) {
-              std::vector<std::string> Dirs;
-              Dirs.push_back((InstallDir + "/include").str());
-              Dirs.push_back(
-                  (InstallDir + "/../../../../sysroot/usr/include").str());
-              return Dirs;
+            .setIncludeDirsCallback([](const Multilib &M) {
+              return std::vector<std::string>(
+                  {"/include", "/../../../../sysroot/usr/include"});
             });
   }
 
+  // CodeScape IMG toolchain starting from v1.3.
+  MultilibSet ImgMultilibsV2;
+  {
+    auto BeHard = makeMultilib("/mips-r6-hard")
+                      .flag("+EB")
+                      .flag("-msoft-float")
+                      .flag("-mmicromips");
+    auto BeSoft = makeMultilib("/mips-r6-soft")
+                      .flag("+EB")
+                      .flag("+msoft-float")
+                      .flag("-mmicromips");
+    auto ElHard = makeMultilib("/mipsel-r6-hard")
+                      .flag("+EL")
+                      .flag("-msoft-float")
+                      .flag("-mmicromips");
+    auto ElSoft = makeMultilib("/mipsel-r6-soft")
+                      .flag("+EL")
+                      .flag("+msoft-float")
+                      .flag("-mmicromips");
+    auto BeMicroHard = makeMultilib("/micromips-r6-hard")
+                           .flag("+EB")
+                           .flag("-msoft-float")
+                           .flag("+mmicromips");
+    auto BeMicroSoft = makeMultilib("/micromips-r6-soft")
+                           .flag("+EB")
+                           .flag("+msoft-float")
+                           .flag("+mmicromips");
+    auto ElMicroHard = makeMultilib("/micromipsel-r6-hard")
+                           .flag("+EL")
+                           .flag("-msoft-float")
+                           .flag("+mmicromips");
+    auto ElMicroSoft = makeMultilib("/micromipsel-r6-soft")
+                           .flag("+EL")
+                           .flag("+msoft-float")
+                           .flag("+mmicromips");
+
+    auto O32 =
+        makeMultilib("/lib").osSuffix("").flag("-mabi=n32").flag("-mabi=n64");
+    auto N32 =
+        makeMultilib("/lib32").osSuffix("").flag("+mabi=n32").flag("-mabi=n64");
+    auto N64 =
+        makeMultilib("/lib64").osSuffix("").flag("-mabi=n32").flag("+mabi=n64");
+
+    ImgMultilibsV2 =
+        MultilibSet()
+            .Either({BeHard, BeSoft, ElHard, ElSoft, BeMicroHard, BeMicroSoft,
+                     ElMicroHard, ElMicroSoft})
+            .Either(O32, N32, N64)
+            .FilterOut(NonExistent)
+            .setIncludeDirsCallback([](const Multilib &M) {
+              return std::vector<std::string>({"/../../../../sysroot" +
+                                               M.includeSuffix() +
+                                               "/../usr/include"});
+            })
+            .setFilePathsCallback([](const Multilib &M) {
+              return std::vector<std::string>(
+                  {"/../../../../mips-img-linux-gnu/lib" + M.gccSuffix()});
+            });
+  }
+  for (auto Candidate : {&ImgMultilibsV1, &ImgMultilibsV2}) {
+    if (Candidate->select(Flags, Result.SelectedMultilib)) {
+      Result.Multilibs = *Candidate;
+      return true;
+    }
+  }
+  return false;
+}
+
+static bool findMIPSMultilibs(const Driver &D, const llvm::Triple &TargetTriple,
+                              StringRef Path, const ArgList &Args,
+                              DetectedMultilibs &Result) {
+  FilterNonExistent NonExistent(Path, "/crtbegin.o", D.getVFS());
+
   StringRef CPUName;
   StringRef ABIName;
   tools::mips::getMipsCPUAndABI(Args, TargetTriple, CPUName, ABIName);
@@ -2033,67 +2334,82 @@ static bool findMIPSMultilibs(const Driver &D, const llvm::Triple &TargetTriple,
   addMultilibFlag(isMipsEL(TargetArch), "EL", Flags);
   addMultilibFlag(!isMipsEL(TargetArch), "EB", Flags);
 
-  if (TargetTriple.isAndroid()) {
-    // Select Android toolchain. It's the only choice in that case.
-    if (AndroidMipsMultilibs.select(Flags, Result.SelectedMultilib)) {
-      Result.Multilibs = AndroidMipsMultilibs;
-      return true;
-    }
-    return false;
-  }
+  if (TargetTriple.isAndroid())
+    return findMipsAndroidMultilibs(Flags, NonExistent, Result);
 
   if (TargetTriple.getVendor() == llvm::Triple::MipsTechnologies &&
       TargetTriple.getOS() == llvm::Triple::Linux &&
-      TargetTriple.getEnvironment() == llvm::Triple::UnknownEnvironment) {
-    if (MuslMipsMultilibs.select(Flags, Result.SelectedMultilib)) {
-      Result.Multilibs = MuslMipsMultilibs;
-      return true;
-    }
-    return false;
-  }
+      TargetTriple.getEnvironment() == llvm::Triple::UnknownEnvironment)
+    return findMipsMuslMultilibs(Flags, NonExistent, Result);
+
+  if (TargetTriple.getVendor() == llvm::Triple::MipsTechnologies &&
+      TargetTriple.getOS() == llvm::Triple::Linux &&
+      TargetTriple.getEnvironment() == llvm::Triple::GNU)
+    return findMipsMtiMultilibs(Flags, NonExistent, Result);
 
   if (TargetTriple.getVendor() == llvm::Triple::ImaginationTechnologies &&
       TargetTriple.getOS() == llvm::Triple::Linux &&
-      TargetTriple.getEnvironment() == llvm::Triple::GNU) {
-    // Select mips-img-linux-gnu toolchain.
-    if (ImgMultilibs.select(Flags, Result.SelectedMultilib)) {
-      Result.Multilibs = ImgMultilibs;
-      return true;
-    }
-    return false;
-  }
+      TargetTriple.getEnvironment() == llvm::Triple::GNU)
+    return findMipsImgMultilibs(Flags, NonExistent, Result);
 
-  // Sort candidates. Toolchain that best meets the directories goes first.
-  // Then select the first toolchains matches command line flags.
-  MultilibSet *candidates[] = {&DebianMipsMultilibs, &FSFMipsMultilibs,
-                               &CSMipsMultilibs};
-  std::sort(
-      std::begin(candidates), std::end(candidates),
-      [](MultilibSet *a, MultilibSet *b) { return a->size() > b->size(); });
-  for (const auto &candidate : candidates) {
-    if (candidate->select(Flags, Result.SelectedMultilib)) {
-      if (candidate == &DebianMipsMultilibs)
-        Result.BiarchSibling = Multilib();
-      Result.Multilibs = *candidate;
-      return true;
-    }
-  }
+  if (findMipsCsMultilibs(Flags, NonExistent, Result))
+    return true;
 
-  {
-    // Fallback to the regular toolchain-tree structure.
-    Multilib Default;
-    Result.Multilibs.push_back(Default);
-    Result.Multilibs.FilterOut(NonExistent);
+  // Fallback to the regular toolchain-tree structure.
+  Multilib Default;
+  Result.Multilibs.push_back(Default);
+  Result.Multilibs.FilterOut(NonExistent);
 
-    if (Result.Multilibs.select(Flags, Result.SelectedMultilib)) {
-      Result.BiarchSibling = Multilib();
-      return true;
-    }
+  if (Result.Multilibs.select(Flags, Result.SelectedMultilib)) {
+    Result.BiarchSibling = Multilib();
+    return true;
   }
 
   return false;
 }
 
+static void findAndroidArmMultilibs(const Driver &D,
+                                    const llvm::Triple &TargetTriple,
+                                    StringRef Path, const ArgList &Args,
+                                    DetectedMultilibs &Result) {
+  // Find multilibs with subdirectories like armv7-a, thumb, armv7-a/thumb.
+  FilterNonExistent NonExistent(Path, "/crtbegin.o", D.getVFS());
+  Multilib ArmV7Multilib = makeMultilib("/armv7-a")
+                               .flag("+armv7")
+                               .flag("-thumb");
+  Multilib ThumbMultilib = makeMultilib("/thumb")
+                               .flag("-armv7")
+                               .flag("+thumb");
+  Multilib ArmV7ThumbMultilib = makeMultilib("/armv7-a/thumb")
+                               .flag("+armv7")
+                               .flag("+thumb");
+  Multilib DefaultMultilib = makeMultilib("")
+                               .flag("-armv7")
+                               .flag("-thumb");
+  MultilibSet AndroidArmMultilibs =
+      MultilibSet()
+          .Either(ThumbMultilib, ArmV7Multilib,
+                  ArmV7ThumbMultilib, DefaultMultilib)
+          .FilterOut(NonExistent);
+
+  Multilib::flags_list Flags;
+  llvm::StringRef Arch = Args.getLastArgValue(options::OPT_march_EQ);
+  bool IsArmArch = TargetTriple.getArch() == llvm::Triple::arm;
+  bool IsThumbArch = TargetTriple.getArch() == llvm::Triple::thumb;
+  bool IsV7SubArch = TargetTriple.getSubArch() == llvm::Triple::ARMSubArch_v7;
+  bool IsThumbMode = IsThumbArch ||
+      Args.hasFlag(options::OPT_mthumb, options::OPT_mno_thumb, false) ||
+      (IsArmArch && llvm::ARM::parseArchISA(Arch) == llvm::ARM::IK_THUMB);
+  bool IsArmV7Mode = (IsArmArch || IsThumbArch) &&
+      (llvm::ARM::parseArchVersion(Arch) == 7 ||
+       (IsArmArch && Arch == "" && IsV7SubArch));
+  addMultilibFlag(IsArmV7Mode, "armv7", Flags);
+  addMultilibFlag(IsThumbMode, "thumb", Flags);
+
+  if (AndroidArmMultilibs.select(Flags, Result.SelectedMultilib))
+    Result.Multilibs = AndroidArmMultilibs;
+}
+
 static bool findBiarchMultilibs(const Driver &D,
                                 const llvm::Triple &TargetTriple,
                                 StringRef Path, const ArgList &Args,
@@ -2126,7 +2442,9 @@ static bool findBiarchMultilibs(const Driver &D,
                         .flag("-m64")
                         .flag("+mx32");
 
-  FilterNonExistent NonExistent(Path, D.getVFS());
+  // GCC toolchain for IAMCU doesn't have crtbegin.o, so look for libgcc.a.
+  FilterNonExistent NonExistent(
+      Path, TargetTriple.isOSIAMCU() ? "/libgcc.a" : "/crtbegin.o", D.getVFS());
 
   // Determine default multilib from: 32, 64, x32
   // Also handle cases such as 64 on 32, 32 on 64, etc.
@@ -2285,9 +2603,13 @@ void Generic_GCC::GCCInstallationDetector::ScanLibDirForGCCTriple(
 
       DetectedMultilibs Detected;
 
+      // Android standalone toolchain could have multilibs for ARM and Thumb.
       // Debian mips multilibs behave more like the rest of the biarch ones,
       // so handle them there
-      if (isMipsArch(TargetArch)) {
+      if (isArmOrThumbArch(TargetArch) && TargetTriple.isAndroid()) {
+        // It should also work without multilibs in a simplified toolchain.
+        findAndroidArmMultilibs(D, TargetTriple, LI->getName(), Args, Detected);
+      } else if (isMipsArch(TargetArch)) {
         if (!findMIPSMultilibs(D, TargetTriple, LI->getName(), Args, Detected))
           continue;
       } else if (!findBiarchMultilibs(D, TargetTriple, LI->getName(), Args,
@@ -2378,6 +2700,8 @@ bool Generic_GCC::IsIntegratedAssemblerDefault() const {
   case llvm::Triple::ppc64:
   case llvm::Triple::ppc64le:
   case llvm::Triple::systemz:
+  case llvm::Triple::mips:
+  case llvm::Triple::mipsel:
     return true;
   default:
     return false;
@@ -2417,7 +2741,6 @@ bool Generic_GCC::addLibStdCXXIncludePaths(
   return true;
 }
 
-
 void Generic_ELF::addClangTargetOptions(const ArgList &DriverArgs,
                                         ArgStringList &CC1Args) const {
   const Generic_GCC::GCCVersion &V = GCCInstallation.getVersion();
@@ -2473,10 +2796,9 @@ void MipsLLVMToolChain::AddClangSystemIncludeArgs(
 
   const auto &Callback = Multilibs.includeDirsCallback();
   if (Callback) {
-    const auto IncludePaths =
-        Callback(D.getInstalledDir(), getTripleString(), SelectedMultilib);
-    for (const auto &Path : IncludePaths)
-      addExternCSystemIncludeIfExists(DriverArgs, CC1Args, Path);
+    for (const auto &Path : Callback(SelectedMultilib))
+      addExternCSystemIncludeIfExists(DriverArgs, CC1Args,
+                                      D.getInstalledDir() + Path);
   }
 }
 
@@ -2521,11 +2843,10 @@ void MipsLLVMToolChain::AddClangCXXStdlibIncludeArgs(
 
   const auto &Callback = Multilibs.includeDirsCallback();
   if (Callback) {
-    const auto IncludePaths = Callback(getDriver().getInstalledDir(),
-                                       getTripleString(), SelectedMultilib);
-    for (const auto &Path : IncludePaths) {
-      if (llvm::sys::fs::exists(Path + "/c++/v1")) {
-        addSystemInclude(DriverArgs, CC1Args, Path + "/c++/v1");
+    for (std::string Path : Callback(SelectedMultilib)) {
+      Path = getDriver().getInstalledDir() + Path + "/c++/v1";
+      if (llvm::sys::fs::exists(Path)) {
+        addSystemInclude(DriverArgs, CC1Args, Path);
         break;
       }
     }
@@ -2569,14 +2890,9 @@ std::string HexagonToolChain::getHexagonTargetDir(
   if (getVFS().exists(InstallRelDir = InstalledDir + "/../target"))
     return InstallRelDir;
 
-  std::string PrefixRelDir = std::string(LLVM_PREFIX) + "/target";
-  if (getVFS().exists(PrefixRelDir))
-    return PrefixRelDir;
-
   return InstallRelDir;
 }
 
-
 Optional<unsigned> HexagonToolChain::getSmallDataThreshold(
       const ArgList &Args) {
   StringRef Gn = "";
@@ -2595,7 +2911,6 @@ Optional<unsigned> HexagonToolChain::getSmallDataThreshold(
   return None;
 }
 
-
 void HexagonToolChain::getHexagonLibraryPaths(const ArgList &Args,
       ToolChain::path_list &LibPaths) const {
   const Driver &D = getDriver();
@@ -2971,6 +3286,61 @@ Tool *CloudABI::buildLinker() const {
   return new tools::cloudabi::Linker(*this);
 }
 
+bool CloudABI::isPIEDefault() const {
+  // Only enable PIE on architectures that support PC-relative
+  // addressing. PC-relative addressing is required, as the process
+  // startup code must be able to relocate itself.
+  switch (getTriple().getArch()) {
+  case llvm::Triple::aarch64:
+  case llvm::Triple::x86_64:
+    return true;
+  default:
+    return false;
+  }
+}
+
+SanitizerMask CloudABI::getSupportedSanitizers() const {
+  SanitizerMask Res = ToolChain::getSupportedSanitizers();
+  Res |= SanitizerKind::SafeStack;
+  return Res;
+}
+
+SanitizerMask CloudABI::getDefaultSanitizers() const {
+  return SanitizerKind::SafeStack;
+}
+
+/// Haiku - Haiku tool chain which can call as(1) and ld(1) directly.
+
+Haiku::Haiku(const Driver &D, const llvm::Triple& Triple, const ArgList &Args)
+  : Generic_ELF(D, Triple, Args) {
+
+}
+
+void Haiku::AddClangCXXStdlibIncludeArgs(const ArgList &DriverArgs,
+                                          ArgStringList &CC1Args) const {
+  if (DriverArgs.hasArg(options::OPT_nostdlibinc) ||
+      DriverArgs.hasArg(options::OPT_nostdincxx))
+    return;
+
+  switch (GetCXXStdlibType(DriverArgs)) {
+  case ToolChain::CST_Libcxx:
+    addSystemInclude(DriverArgs, CC1Args,
+                     getDriver().SysRoot + "/system/develop/headers/c++/v1");
+    break;
+  case ToolChain::CST_Libstdcxx:
+    addSystemInclude(DriverArgs, CC1Args,
+                     getDriver().SysRoot + "/system/develop/headers/c++");
+    addSystemInclude(DriverArgs, CC1Args,
+                     getDriver().SysRoot + "/system/develop/headers/c++/backward");
+
+    StringRef Triple = getTriple().str();
+    addSystemInclude(DriverArgs, CC1Args,
+                     getDriver().SysRoot + "/system/develop/headers/c++/" +
+                     Triple);
+    break;
+  }
+}
+
 /// OpenBSD - OpenBSD tool chain which can call as(1) and ld(1) directly.
 
 OpenBSD::OpenBSD(const Driver &D, const llvm::Triple &Triple,
@@ -3000,16 +3370,7 @@ Tool *Bitrig::buildAssembler() const {
 
 Tool *Bitrig::buildLinker() const { return new tools::bitrig::Linker(*this); }
 
-ToolChain::CXXStdlibType Bitrig::GetCXXStdlibType(const ArgList &Args) const {
-  if (Arg *A = Args.getLastArg(options::OPT_stdlib_EQ)) {
-    StringRef Value = A->getValue();
-    if (Value == "libstdc++")
-      return ToolChain::CST_Libstdcxx;
-    if (Value == "libc++")
-      return ToolChain::CST_Libcxx;
-
-    getDriver().Diag(diag::err_drv_invalid_stdlib_name) << A->getAsString(Args);
-  }
+ToolChain::CXXStdlibType Bitrig::GetDefaultCXXStdlibType() const {
   return ToolChain::CST_Libcxx;
 }
 
@@ -3073,16 +3434,7 @@ FreeBSD::FreeBSD(const Driver &D, const llvm::Triple &Triple,
     getFilePaths().push_back(getDriver().SysRoot + "/usr/lib");
 }
 
-ToolChain::CXXStdlibType FreeBSD::GetCXXStdlibType(const ArgList &Args) const {
-  if (Arg *A = Args.getLastArg(options::OPT_stdlib_EQ)) {
-    StringRef Value = A->getValue();
-    if (Value == "libstdc++")
-      return ToolChain::CST_Libstdcxx;
-    if (Value == "libc++")
-      return ToolChain::CST_Libcxx;
-
-    getDriver().Diag(diag::err_drv_invalid_stdlib_name) << A->getAsString(Args);
-  }
+ToolChain::CXXStdlibType FreeBSD::GetDefaultCXXStdlibType() const {
   if (getTriple().getOSMajorVersion() >= 10)
     return ToolChain::CST_Libcxx;
   return ToolChain::CST_Libstdcxx;
@@ -3170,7 +3522,6 @@ SanitizerMask FreeBSD::getSupportedSanitizers() const {
 
 NetBSD::NetBSD(const Driver &D, const llvm::Triple &Triple, const ArgList &Args)
     : Generic_ELF(D, Triple, Args) {
-
   if (getDriver().UseStdLib) {
     // When targeting a 32-bit platform, try the special directory used on
     // 64-bit hosts, and only fall back to the main library directory if that
@@ -3226,20 +3577,10 @@ Tool *NetBSD::buildAssembler() const {
 
 Tool *NetBSD::buildLinker() const { return new tools::netbsd::Linker(*this); }
 
-ToolChain::CXXStdlibType NetBSD::GetCXXStdlibType(const ArgList &Args) const {
-  if (Arg *A = Args.getLastArg(options::OPT_stdlib_EQ)) {
-    StringRef Value = A->getValue();
-    if (Value == "libstdc++")
-      return ToolChain::CST_Libstdcxx;
-    if (Value == "libc++")
-      return ToolChain::CST_Libcxx;
-
-    getDriver().Diag(diag::err_drv_invalid_stdlib_name) << A->getAsString(Args);
-  }
-
+ToolChain::CXXStdlibType NetBSD::GetDefaultCXXStdlibType() const {
   unsigned Major, Minor, Micro;
   getTriple().getOSVersion(Major, Minor, Micro);
-  if (Major >= 7 || (Major == 6 && Minor == 99 && Micro >= 49) || Major == 0) {
+  if (Major >= 7 || Major == 0) {
     switch (getArch()) {
     case llvm::Triple::aarch64:
     case llvm::Triple::arm:
@@ -3381,7 +3722,6 @@ enum Distro {
   DebianJessie,
   DebianStretch,
   Exherbo,
-  RHEL4,
   RHEL5,
   RHEL6,
   RHEL7,
@@ -3408,7 +3748,7 @@ enum Distro {
 };
 
 static bool IsRedhat(enum Distro Distro) {
-  return Distro == Fedora || (Distro >= RHEL4 && Distro <= RHEL7);
+  return Distro == Fedora || (Distro >= RHEL5 && Distro <= RHEL7);
 }
 
 static bool IsOpenSUSE(enum Distro Distro) { return Distro == OpenSUSE; }
@@ -3450,7 +3790,8 @@ static Distro DetectDistro(const Driver &D, llvm::Triple::ArchType Arch) {
                       .Case("wily", UbuntuWily)
                       .Case("xenial", UbuntuXenial)
                       .Default(UnknownDistro);
-    return Version;
+    if (Version != UnknownDistro)
+      return Version;
   }
 
   File = llvm::MemoryBuffer::getFile("/etc/redhat-release");
@@ -3459,15 +3800,14 @@ static Distro DetectDistro(const Driver &D, llvm::Triple::ArchType Arch) {
     if (Data.startswith("Fedora release"))
       return Fedora;
     if (Data.startswith("Red Hat Enterprise Linux") ||
-        Data.startswith("CentOS")) {
+        Data.startswith("CentOS") ||
+        Data.startswith("Scientific Linux")) {
       if (Data.find("release 7") != StringRef::npos)
         return RHEL7;
       else if (Data.find("release 6") != StringRef::npos)
         return RHEL6;
       else if (Data.find("release 5") != StringRef::npos)
         return RHEL5;
-      else if (Data.find("release 4") != StringRef::npos)
-        return RHEL4;
     }
     return UnknownDistro;
   }
@@ -3640,6 +3980,15 @@ static StringRef getOSLibDir(const llvm::Triple &Triple, const ArgList &Args) {
   return Triple.isArch32Bit() ? "lib" : "lib64";
 }
 
+static void addMultilibsFilePaths(const Driver &D, const MultilibSet &Multilibs,
+                                  const Multilib &Multilib,
+                                  StringRef InstallPath,
+                                  ToolChain::path_list &Paths) {
+  if (const auto &PathsCallback = Multilibs.filePathsCallback())
+    for (const auto &Path : PathsCallback(Multilib))
+      addPathIfExists(D, InstallPath + Path, Paths);
+}
+
 Linux::Linux(const Driver &D, const llvm::Triple &Triple, const ArgList &Args)
     : Generic_ELF(D, Triple, Args) {
   GCCInstallation.init(Triple, Args);
@@ -3692,13 +4041,12 @@ Linux::Linux(const Driver &D, const llvm::Triple &Triple, const ArgList &Args)
       ExtraOpts.push_back("--hash-style=both");
   }
 
-  if (IsRedhat(Distro))
+  if (IsRedhat(Distro) && Distro != RHEL5 && Distro != RHEL6)
     ExtraOpts.push_back("--no-add-needed");
 
-  if ((IsDebian(Distro) && Distro >= DebianSqueeze) || IsOpenSUSE(Distro) ||
-      (IsRedhat(Distro) && Distro != RHEL4 && Distro != RHEL5) ||
-      (IsUbuntu(Distro) && Distro >= UbuntuKarmic))
-    ExtraOpts.push_back("--build-id");
+#ifdef ENABLE_LINKER_BUILD_ID
+  ExtraOpts.push_back("--build-id");
+#endif
 
   if (IsOpenSUSE(Distro))
     ExtraOpts.push_back("--enable-new-dtags");
@@ -3718,6 +4066,11 @@ Linux::Linux(const Driver &D, const llvm::Triple &Triple, const ArgList &Args)
     const llvm::Triple &GCCTriple = GCCInstallation.getTriple();
     const std::string &LibPath = GCCInstallation.getParentLibPath();
     const Multilib &Multilib = GCCInstallation.getMultilib();
+    const MultilibSet &Multilibs = GCCInstallation.getMultilibs();
+
+    // Add toolchain / multilib specific file paths.
+    addMultilibsFilePaths(D, Multilibs, Multilib,
+                          GCCInstallation.getInstallPath(), Paths);
 
     // Sourcery CodeBench MIPS toolchain holds some libraries under
     // a biarch-like suffix of the GCC installation.
@@ -3857,6 +4210,125 @@ std::string Linux::computeSysRoot() const {
   return std::string();
 }
 
+std::string Linux::getDynamicLinker(const ArgList &Args) const {
+  const llvm::Triple::ArchType Arch = getArch();
+  const llvm::Triple &Triple = getTriple();
+
+  const enum Distro Distro = DetectDistro(getDriver(), Arch);
+
+  if (Triple.isAndroid())
+    return Triple.isArch64Bit() ? "/system/bin/linker64" : "/system/bin/linker";
+  else if (Triple.isMusl()) {
+    std::string ArchName;
+    switch (Arch) {
+    case llvm::Triple::thumb:
+      ArchName = "arm";
+      break;
+    case llvm::Triple::thumbeb:
+      ArchName = "armeb";
+      break;
+    default:
+      ArchName = Triple.getArchName().str();
+    }
+    if (Triple.getEnvironment() == llvm::Triple::MuslEABIHF)
+      ArchName += "hf";
+
+    return "/lib/ld-musl-" + ArchName + ".so.1";
+  }
+
+  std::string LibDir;
+  std::string Loader;
+
+  switch (Arch) {
+  default:
+    llvm_unreachable("unsupported architecture");
+
+  case llvm::Triple::aarch64:
+    LibDir = "lib";
+    Loader = "ld-linux-aarch64.so.1";
+    break;
+  case llvm::Triple::aarch64_be:
+    LibDir = "lib";
+    Loader = "ld-linux-aarch64_be.so.1";
+    break;
+  case llvm::Triple::arm:
+  case llvm::Triple::thumb:
+  case llvm::Triple::armeb:
+  case llvm::Triple::thumbeb: {
+    const bool HF =
+        Triple.getEnvironment() == llvm::Triple::GNUEABIHF ||
+        tools::arm::getARMFloatABI(*this, Args) == tools::arm::FloatABI::Hard;
+
+    LibDir = "lib";
+    Loader = HF ? "ld-linux-armhf.so.3" : "ld-linux.so.3";
+    break;
+  }
+  case llvm::Triple::mips:
+  case llvm::Triple::mipsel:
+  case llvm::Triple::mips64:
+  case llvm::Triple::mips64el: {
+    bool LE = (Triple.getArch() == llvm::Triple::mipsel) ||
+              (Triple.getArch() == llvm::Triple::mips64el);
+    bool IsNaN2008 = tools::mips::isNaN2008(Args, Triple);
+
+    LibDir = "lib" + tools::mips::getMipsABILibSuffix(Args, Triple);
+
+    if (tools::mips::isUCLibc(Args))
+      Loader = IsNaN2008 ? "ld-uClibc-mipsn8.so.0" : "ld-uClibc.so.0";
+    else if (!Triple.hasEnvironment() &&
+             Triple.getVendor() == llvm::Triple::VendorType::MipsTechnologies)
+      Loader = LE ? "ld-musl-mipsel.so.1" : "ld-musl-mips.so.1";
+    else
+      Loader = IsNaN2008 ? "ld-linux-mipsn8.so.1" : "ld.so.1";
+
+    break;
+  }
+  case llvm::Triple::ppc:
+    LibDir = "lib";
+    Loader = "ld.so.1";
+    break;
+  case llvm::Triple::ppc64:
+    LibDir = "lib64";
+    Loader =
+        (tools::ppc::hasPPCAbiArg(Args, "elfv2")) ? "ld64.so.2" : "ld64.so.1";
+    break;
+  case llvm::Triple::ppc64le:
+    LibDir = "lib64";
+    Loader =
+        (tools::ppc::hasPPCAbiArg(Args, "elfv1")) ? "ld64.so.1" : "ld64.so.2";
+    break;
+  case llvm::Triple::sparc:
+  case llvm::Triple::sparcel:
+    LibDir = "lib";
+    Loader = "ld-linux.so.2";
+    break;
+  case llvm::Triple::sparcv9:
+    LibDir = "lib64";
+    Loader = "ld-linux.so.2";
+    break;
+  case llvm::Triple::systemz:
+    LibDir = "lib";
+    Loader = "ld64.so.1";
+    break;
+  case llvm::Triple::x86:
+    LibDir = "lib";
+    Loader = "ld-linux.so.2";
+    break;
+  case llvm::Triple::x86_64: {
+    bool X32 = Triple.getEnvironment() == llvm::Triple::GNUX32;
+
+    LibDir = X32 ? "libx32" : "lib64";
+    Loader = X32 ? "ld-linux-x32.so.2" : "ld-linux-x86-64.so.2";
+    break;
+  }
+  }
+
+  if (Distro == Exherbo && (Triple.getVendor() == llvm::Triple::UnknownVendor ||
+                            Triple.getVendor() == llvm::Triple::PC))
+    return "/usr/" + Triple.str() + "/lib/" + Loader;
+  return "/" + LibDir + "/" + Loader;
+}
+
 void Linux::AddClangSystemIncludeArgs(const ArgList &DriverArgs,
                                       ArgStringList &CC1Args) const {
   const Driver &D = getDriver();
@@ -3897,11 +4369,9 @@ void Linux::AddClangSystemIncludeArgs(const ArgList &DriverArgs,
   if (GCCInstallation.isValid()) {
     const auto &Callback = Multilibs.includeDirsCallback();
     if (Callback) {
-      const auto IncludePaths = Callback(GCCInstallation.getInstallPath(),
-                                         GCCInstallation.getTriple().str(),
-                                         GCCInstallation.getMultilib());
-      for (const auto &Path : IncludePaths)
-        addExternCSystemIncludeIfExists(DriverArgs, CC1Args, Path);
+      for (const auto &Path : Callback(GCCInstallation.getMultilib()))
+        addExternCSystemIncludeIfExists(
+            DriverArgs, CC1Args, GCCInstallation.getInstallPath() + Path);
     }
   }
 
@@ -4028,7 +4498,6 @@ void Linux::AddClangSystemIncludeArgs(const ArgList &DriverArgs,
   addExternCSystemInclude(DriverArgs, CC1Args, SysRoot + "/usr/include");
 }
 
-
 static std::string DetectLibcxxIncludePath(StringRef base) {
   std::error_code EC;
   int MaxVersion = 0;
@@ -4058,11 +4527,11 @@ void Linux::AddClangCXXStdlibIncludeArgs(const ArgList &DriverArgs,
   if (GetCXXStdlibType(DriverArgs) == ToolChain::CST_Libcxx) {
     const std::string LibCXXIncludePathCandidates[] = {
         DetectLibcxxIncludePath(getDriver().Dir + "/../include/c++"),
-
-        // We also check the system as for a long time this is the only place
-        // Clang looked.
-        // FIXME: We should really remove this. It doesn't make any sense.
-        DetectLibcxxIncludePath(getDriver().SysRoot + "/usr/include/c++")};
+        // If this is a development, non-installed, clang, libcxx will
+        // not be found at ../include/c++ but it likely to be found at
+        // one of the following two locations:
+        DetectLibcxxIncludePath(getDriver().SysRoot + "/usr/local/include/c++"),
+        DetectLibcxxIncludePath(getDriver().SysRoot + "/usr/include/c++") };
     for (const auto &IncludePath : LibCXXIncludePathCandidates) {
       if (IncludePath.empty() || !getVFS().exists(IncludePath))
         continue;
@@ -4103,6 +4572,7 @@ void Linux::AddClangCXXStdlibIncludeArgs(const ArgList &DriverArgs,
   const std::string LibStdCXXIncludePathCandidates[] = {
       // Gentoo is weird and places its headers inside the GCC install,
       // so if the first attempt to find the headers fails, try these patterns.
+      InstallDir.str() + "/include/g++-v" + Version.Text,
       InstallDir.str() + "/include/g++-v" + Version.MajorStr + "." +
           Version.MinorStr,
       InstallDir.str() + "/include/g++-v" + Version.MajorStr,
@@ -4127,10 +4597,23 @@ void Linux::AddCudaIncludeArgs(const ArgList &DriverArgs,
   if (DriverArgs.hasArg(options::OPT_nocudainc))
     return;
 
-  if (CudaInstallation.isValid()) {
-    addSystemInclude(DriverArgs, CC1Args, CudaInstallation.getIncludePath());
-    CC1Args.push_back("-include");
-    CC1Args.push_back("__clang_cuda_runtime_wrapper.h");
+  if (!CudaInstallation.isValid()) {
+    getDriver().Diag(diag::err_drv_no_cuda_installation);
+    return;
+  }
+
+  addSystemInclude(DriverArgs, CC1Args, CudaInstallation.getIncludePath());
+  CC1Args.push_back("-include");
+  CC1Args.push_back("__clang_cuda_runtime_wrapper.h");
+}
+
+void Linux::AddIAMCUIncludeArgs(const ArgList &DriverArgs,
+                                ArgStringList &CC1Args) const {
+  if (GCCInstallation.isValid()) {
+    CC1Args.push_back("-isystem");
+    CC1Args.push_back(DriverArgs.MakeArgString(
+        GCCInstallation.getParentLibPath() + "/../" +
+        GCCInstallation.getTriple().str() + "/include"));
   }
 }
 
@@ -4158,6 +4641,8 @@ SanitizerMask Linux::getSupportedSanitizers() const {
     Res |= SanitizerKind::Thread;
   if (IsX86_64 || IsMIPS64 || IsPowerPC64 || IsAArch64)
     Res |= SanitizerKind::Memory;
+  if (IsX86_64)
+    Res |= SanitizerKind::Efficiency;
   if (IsX86 || IsX86_64) {
     Res |= SanitizerKind::Function;
   }
@@ -4200,13 +4685,16 @@ Tool *DragonFly::buildLinker() const {
   return new tools::dragonfly::Linker(*this);
 }
 
-/// Stub for CUDA toolchain. At the moment we don't have assembler or
-/// linker and need toolchain mainly to propagate device-side options
-/// to CC1.
+/// CUDA toolchain.  Our assembler is ptxas, and our "linker" is fatbinary,
+/// which isn't properly a linker but nonetheless performs the step of stitching
+/// together object files from the assembler into a single blob.
 
 CudaToolChain::CudaToolChain(const Driver &D, const llvm::Triple &Triple,
                              const ArgList &Args)
-    : Linux(D, Triple, Args) {}
+    : Linux(D, Triple, Args) {
+  if (CudaInstallation.isValid())
+    getProgramPaths().push_back(CudaInstallation.getBinPath());
+}
 
 void
 CudaToolChain::addClangTargetOptions(const llvm::opt::ArgList &DriverArgs,
@@ -4214,6 +4702,14 @@ CudaToolChain::addClangTargetOptions(const llvm::opt::ArgList &DriverArgs,
   Linux::addClangTargetOptions(DriverArgs, CC1Args);
   CC1Args.push_back("-fcuda-is-device");
 
+  if (DriverArgs.hasFlag(options::OPT_fcuda_flush_denormals_to_zero,
+                         options::OPT_fno_cuda_flush_denormals_to_zero, false))
+    CC1Args.push_back("-fcuda-flush-denormals-to-zero");
+
+  if (DriverArgs.hasFlag(options::OPT_fcuda_approx_transcendentals,
+                         options::OPT_fno_cuda_approx_transcendentals, false))
+    CC1Args.push_back("-fcuda-approx-transcendentals");
+
   if (DriverArgs.hasArg(options::OPT_nocudalib))
     return;
 
@@ -4231,6 +4727,18 @@ CudaToolChain::addClangTargetOptions(const llvm::opt::ArgList &DriverArgs,
   }
 }
 
+void CudaToolChain::AddCudaIncludeArgs(const ArgList &DriverArgs,
+                                       ArgStringList &CC1Args) const {
+  // Check our CUDA version if we're going to include the CUDA headers.
+  if (!DriverArgs.hasArg(options::OPT_nocudainc) &&
+      !DriverArgs.hasArg(options::OPT_no_cuda_version_check)) {
+    StringRef Arch = DriverArgs.getLastArgValue(options::OPT_march_EQ);
+    assert(!Arch.empty() && "Must have an explicit GPU arch.");
+    CudaInstallation.CheckCudaVersionSupportsArch(StringToCudaArch(Arch));
+  }
+  Linux::AddCudaIncludeArgs(DriverArgs, CC1Args);
+}
+
 llvm::opt::DerivedArgList *
 CudaToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args,
                              const char *BoundArch) const {
@@ -4240,7 +4748,7 @@ CudaToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args,
   for (Arg *A : Args) {
     if (A->getOption().matches(options::OPT_Xarch__)) {
       // Skip this argument unless the architecture matches BoundArch
-      if (A->getValue(0) != StringRef(BoundArch))
+      if (!BoundArch || A->getValue(0) != StringRef(BoundArch))
         continue;
 
       unsigned Index = Args.getBaseArgs().MakeIndex(A->getValue(1));
@@ -4271,10 +4779,21 @@ CudaToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args,
     DAL->append(A);
   }
 
-  DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ), BoundArch);
+  if (BoundArch) {
+    DAL->eraseArg(options::OPT_march_EQ);
+    DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ), BoundArch);
+  }
   return DAL;
 }
 
+Tool *CudaToolChain::buildAssembler() const {
+  return new tools::NVPTX::Assembler(*this);
+}
+
+Tool *CudaToolChain::buildLinker() const {
+  return new tools::NVPTX::Linker(*this);
+}
+
 /// XCore tool chain
 XCoreToolChain::XCoreToolChain(const Driver &D, const llvm::Triple &Triple,
                                const ArgList &Args)
@@ -4341,7 +4860,7 @@ void XCoreToolChain::AddCXXStdlibLibArgs(const ArgList &Args,
 
 MyriadToolChain::MyriadToolChain(const Driver &D, const llvm::Triple &Triple,
                                  const ArgList &Args)
-    : Generic_GCC(D, Triple, Args) {
+    : Generic_ELF(D, Triple, Args) {
   // If a target of 'sparc-myriad-elf' is specified to clang, it wants to use
   // 'sparc-myriad--elf' (note the unknown OS) as the canonical triple.
   // This won't work to find gcc. Instead we give the installation detector an
@@ -4432,6 +4951,11 @@ Tool *MyriadToolChain::buildLinker() const {
 WebAssembly::WebAssembly(const Driver &D, const llvm::Triple &Triple,
                          const llvm::opt::ArgList &Args)
   : ToolChain(D, Triple, Args) {
+
+  assert(Triple.isArch32Bit() != Triple.isArch64Bit());
+  getFilePaths().push_back(
+      getDriver().SysRoot + "/lib" + (Triple.isArch32Bit() ? "32" : "64"));
+
   // Use LLD by default.
   DefaultLinker = "lld";
 }
@@ -4467,6 +4991,29 @@ void WebAssembly::addClangTargetOptions(const ArgList &DriverArgs,
     CC1Args.push_back("-fuse-init-array");
 }
 
+ToolChain::RuntimeLibType WebAssembly::GetDefaultRuntimeLibType() const {
+  return ToolChain::RLT_CompilerRT;
+}
+
+ToolChain::CXXStdlibType WebAssembly::GetCXXStdlibType(const ArgList &Args) const {
+  return ToolChain::CST_Libcxx;
+}
+
+void WebAssembly::AddClangSystemIncludeArgs(const ArgList &DriverArgs,
+                                            ArgStringList &CC1Args) const {
+  if (!DriverArgs.hasArg(options::OPT_nostdinc))
+    addSystemInclude(DriverArgs, CC1Args, getDriver().SysRoot + "/include");
+}
+
+void WebAssembly::AddClangCXXStdlibIncludeArgs(
+      const llvm::opt::ArgList &DriverArgs,
+      llvm::opt::ArgStringList &CC1Args) const {
+  if (!DriverArgs.hasArg(options::OPT_nostdlibinc) &&
+      !DriverArgs.hasArg(options::OPT_nostdincxx))
+    addSystemInclude(DriverArgs, CC1Args,
+                     getDriver().SysRoot + "/include/c++/v1");
+}
+
 Tool *WebAssembly::buildLinker() const {
   return new tools::wasm::Linker(*this);
 }
@@ -4476,12 +5023,12 @@ PS4CPU::PS4CPU(const Driver &D, const llvm::Triple &Triple, const ArgList &Args)
   if (Args.hasArg(options::OPT_static))
     D.Diag(diag::err_drv_unsupported_opt_for_target) << "-static" << "PS4";
 
-  // Determine where to find the PS4 libraries. We use SCE_PS4_SDK_DIR
+  // Determine where to find the PS4 libraries. We use SCE_ORBIS_SDK_DIR
   // if it exists; otherwise use the driver's installation path, which
   // should be <SDK_DIR>/host_tools/bin.
 
   SmallString<512> PS4SDKDir;
-  if (const char *EnvValue = getenv("SCE_PS4_SDK_DIR")) {
+  if (const char *EnvValue = getenv("SCE_ORBIS_SDK_DIR")) {
     if (!llvm::sys::fs::exists(EnvValue))
       getDriver().Diag(clang::diag::warn_drv_ps4_sdk_dir) << EnvValue;
     PS4SDKDir = EnvValue;
diff --git a/contrib/llvm/tools/clang/lib/Driver/ToolChains.h b/contrib/llvm/tools/clang/lib/Driver/ToolChains.h
index f940e5847e14..369712fa934b 100644
--- a/contrib/llvm/tools/clang/lib/Driver/ToolChains.h
+++ b/contrib/llvm/tools/clang/lib/Driver/ToolChains.h
@@ -11,12 +11,14 @@
 #define LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_H
 
 #include "Tools.h"
+#include "clang/Basic/Cuda.h"
 #include "clang/Basic/VersionTuple.h"
 #include "clang/Driver/Action.h"
 #include "clang/Driver/Multilib.h"
 #include "clang/Driver/ToolChain.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/Optional.h"
+#include "llvm/ADT/SmallSet.h"
 #include "llvm/Support/Compiler.h"
 #include <set>
 #include <vector>
@@ -158,36 +160,52 @@ protected:
   GCCInstallationDetector GCCInstallation;
 
   // \brief A class to find a viable CUDA installation
-
   class CudaInstallationDetector {
-    bool IsValid;
+  private:
     const Driver &D;
-    std::string CudaInstallPath;
-    std::string CudaLibPath;
-    std::string CudaLibDevicePath;
-    std::string CudaIncludePath;
-    llvm::StringMap<std::string> CudaLibDeviceMap;
+    bool IsValid = false;
+    CudaVersion Version = CudaVersion::UNKNOWN;
+    std::string InstallPath;
+    std::string BinPath;
+    std::string LibPath;
+    std::string LibDevicePath;
+    std::string IncludePath;
+    llvm::StringMap<std::string> LibDeviceMap;
+
+    // CUDA architectures for which we have raised an error in
+    // CheckCudaVersionSupportsArch.
+    mutable llvm::SmallSet<CudaArch, 4> ArchsWithVersionTooLowErrors;
 
   public:
-    CudaInstallationDetector(const Driver &D) : IsValid(false), D(D) {}
+    CudaInstallationDetector(const Driver &D) : D(D) {}
     void init(const llvm::Triple &TargetTriple, const llvm::opt::ArgList &Args);
 
+    /// \brief Emit an error if Version does not support the given Arch.
+    ///
+    /// If either Version or Arch is unknown, does not emit an error.  Emits at
+    /// most one error per Arch.
+    void CheckCudaVersionSupportsArch(CudaArch Arch) const;
+
     /// \brief Check whether we detected a valid Cuda install.
     bool isValid() const { return IsValid; }
     /// \brief Print information about the detected CUDA installation.
     void print(raw_ostream &OS) const;
 
+    /// \brief Get the deteced Cuda install's version.
+    CudaVersion version() const { return Version; }
     /// \brief Get the detected Cuda installation path.
-    StringRef getInstallPath() const { return CudaInstallPath; }
+    StringRef getInstallPath() const { return InstallPath; }
+    /// \brief Get the detected path to Cuda's bin directory.
+    StringRef getBinPath() const { return BinPath; }
     /// \brief Get the detected Cuda Include path.
-    StringRef getIncludePath() const { return CudaIncludePath; }
+    StringRef getIncludePath() const { return IncludePath; }
     /// \brief Get the detected Cuda library path.
-    StringRef getLibPath() const { return CudaLibPath; }
+    StringRef getLibPath() const { return LibPath; }
     /// \brief Get the detected Cuda device library path.
-    StringRef getLibDevicePath() const { return CudaLibDevicePath; }
+    StringRef getLibDevicePath() const { return LibDevicePath; }
     /// \brief Get libdevice file for given architecture
     std::string getLibDeviceFile(StringRef Gpu) const {
-      return CudaLibDeviceMap.lookup(Gpu);
+      return LibDeviceMap.lookup(Gpu);
     }
   };
 
@@ -493,6 +511,10 @@ protected:
     return TargetVersion < VersionTuple(V0, V1, V2);
   }
 
+  StringRef getPlatformFamily() const;
+  static StringRef getSDKName(StringRef isysroot);
+  StringRef getOSLibraryNameSuffix() const;
+
 public:
   /// }
   /// @name ToolChain Implementation
@@ -507,6 +529,7 @@ public:
   TranslateArgs(const llvm::opt::DerivedArgList &Args,
                 const char *BoundArch) const override;
 
+  CXXStdlibType GetDefaultCXXStdlibType() const override;
   ObjCRuntime getDefaultObjCRuntime(bool isNonFragile) const override;
   bool hasBlocksRuntime() const override;
 
@@ -536,6 +559,8 @@ public:
 
   bool UseSjLjExceptions(const llvm::opt::ArgList &Args) const override;
 
+  bool SupportsEmbeddedBitcode() const override;
+
   SanitizerMask getSupportedSanitizers() const override;
 };
 
@@ -609,7 +634,9 @@ public:
   void AddCXXStdlibLibArgs(const llvm::opt::ArgList &Args,
                            llvm::opt::ArgStringList &CmdArgs) const override;
 
-  bool isPIEDefault() const override { return false; }
+  bool isPIEDefault() const override;
+  SanitizerMask getSupportedSanitizers() const override;
+  SanitizerMask getDefaultSanitizers() const override;
 
 protected:
   Tool *buildLinker() const override;
@@ -667,6 +694,18 @@ private:
   void findGccLibDir();
 };
 
+class LLVM_LIBRARY_VISIBILITY Haiku : public Generic_ELF {
+public:
+  Haiku(const Driver &D, const llvm::Triple &Triple,
+          const llvm::opt::ArgList &Args);
+
+  bool isPIEDefault() const override { return getTriple().getArch() == llvm::Triple::x86_64; }
+
+  void
+  AddClangCXXStdlibIncludeArgs(const llvm::opt::ArgList &DriverArgs,
+                              llvm::opt::ArgStringList &CC1Args) const override;
+};
+
 class LLVM_LIBRARY_VISIBILITY OpenBSD : public Generic_ELF {
 public:
   OpenBSD(const Driver &D, const llvm::Triple &Triple,
@@ -694,7 +733,7 @@ public:
   bool IsMathErrnoDefault() const override { return false; }
   bool IsObjCNonFragileABIDefault() const override { return true; }
 
-  CXXStdlibType GetCXXStdlibType(const llvm::opt::ArgList &Args) const override;
+  CXXStdlibType GetDefaultCXXStdlibType() const override;
   void AddClangCXXStdlibIncludeArgs(
       const llvm::opt::ArgList &DriverArgs,
       llvm::opt::ArgStringList &CC1Args) const override;
@@ -718,7 +757,7 @@ public:
   bool IsMathErrnoDefault() const override { return false; }
   bool IsObjCNonFragileABIDefault() const override { return true; }
 
-  CXXStdlibType GetCXXStdlibType(const llvm::opt::ArgList &Args) const override;
+  CXXStdlibType GetDefaultCXXStdlibType() const override;
   void AddClangCXXStdlibIncludeArgs(
       const llvm::opt::ArgList &DriverArgs,
       llvm::opt::ArgStringList &CC1Args) const override;
@@ -746,7 +785,7 @@ public:
   bool IsMathErrnoDefault() const override { return false; }
   bool IsObjCNonFragileABIDefault() const override { return true; }
 
-  CXXStdlibType GetCXXStdlibType(const llvm::opt::ArgList &Args) const override;
+  CXXStdlibType GetDefaultCXXStdlibType() const override;
 
   void AddClangCXXStdlibIncludeArgs(
       const llvm::opt::ArgList &DriverArgs,
@@ -795,12 +834,16 @@ public:
       llvm::opt::ArgStringList &CC1Args) const override;
   void AddCudaIncludeArgs(const llvm::opt::ArgList &DriverArgs,
                           llvm::opt::ArgStringList &CC1Args) const override;
+  void AddIAMCUIncludeArgs(const llvm::opt::ArgList &DriverArgs,
+                           llvm::opt::ArgStringList &CC1Args) const override;
   bool isPIEDefault() const override;
   SanitizerMask getSupportedSanitizers() const override;
   void addProfileRTLibs(const llvm::opt::ArgList &Args,
                         llvm::opt::ArgStringList &CmdArgs) const override;
   virtual std::string computeSysRoot() const;
 
+  virtual std::string getDynamicLinker(const llvm::opt::ArgList &Args) const;
+
   std::vector<std::string> ExtraOpts;
 
 protected:
@@ -818,6 +861,24 @@ public:
                 const char *BoundArch) const override;
   void addClangTargetOptions(const llvm::opt::ArgList &DriverArgs,
                              llvm::opt::ArgStringList &CC1Args) const override;
+
+  // Never try to use the integrated assembler with CUDA; always fork out to
+  // ptxas.
+  bool useIntegratedAs() const override { return false; }
+
+  void AddCudaIncludeArgs(const llvm::opt::ArgList &DriverArgs,
+                          llvm::opt::ArgStringList &CC1Args) const override;
+
+  const Generic_GCC::CudaInstallationDetector &cudaInstallation() const {
+    return CudaInstallation;
+  }
+  Generic_GCC::CudaInstallationDetector &cudaInstallation() {
+    return CudaInstallation;
+  }
+
+protected:
+  Tool *buildAssembler() const override;  // ptxas
+  Tool *buildLinker() const override;     // fatbinary (ok, not really a linker)
 };
 
 class LLVM_LIBRARY_VISIBILITY MipsLLVMToolChain : public Linux {
@@ -856,6 +917,14 @@ private:
   std::string LibSuffix;
 };
 
+class LLVM_LIBRARY_VISIBILITY LanaiToolChain : public Generic_ELF {
+public:
+  LanaiToolChain(const Driver &D, const llvm::Triple &Triple,
+                 const llvm::opt::ArgList &Args)
+      : Generic_ELF(D, Triple, Args) {}
+  bool IsIntegratedAssemblerDefault() const override { return true; }
+};
+
 class LLVM_LIBRARY_VISIBILITY HexagonToolChain : public Linux {
 protected:
   GCCVersion GCCLibAndIncVersion;
@@ -900,6 +969,7 @@ protected:
 public:
   AMDGPUToolChain(const Driver &D, const llvm::Triple &Triple,
             const llvm::opt::ArgList &Args);
+  unsigned GetDefaultDwarfVersion() const override { return 2; }
   bool IsIntegratedAssemblerDefault() const override { return true; }
 };
 
@@ -987,6 +1057,7 @@ public:
   bool getVisualStudioInstallDir(std::string &path) const;
   bool getVisualStudioBinariesFolder(const char *clangProgramPath,
                                      std::string &path) const;
+  VersionTuple getMSVCVersionFromExe() const override;
 
   std::string ComputeEffectiveClangTriple(const llvm::opt::ArgList &Args,
                                           types::ID InputType) const override;
@@ -1064,7 +1135,7 @@ public:
 
 /// MyriadToolChain - A tool chain using either clang or the external compiler
 /// installed by the Movidius SDK to perform all subcommands.
-class LLVM_LIBRARY_VISIBILITY MyriadToolChain : public Generic_GCC {
+class LLVM_LIBRARY_VISIBILITY MyriadToolChain : public Generic_ELF {
 public:
   MyriadToolChain(const Driver &D, const llvm::Triple &Triple,
                   const llvm::opt::ArgList &Args);
@@ -1109,6 +1180,14 @@ private:
   bool HasNativeLLVMSupport() const override;
   void addClangTargetOptions(const llvm::opt::ArgList &DriverArgs,
                              llvm::opt::ArgStringList &CC1Args) const override;
+  RuntimeLibType GetDefaultRuntimeLibType() const override;
+  CXXStdlibType GetCXXStdlibType(const llvm::opt::ArgList &Args) const override;
+  void AddClangSystemIncludeArgs(
+      const llvm::opt::ArgList &DriverArgs,
+      llvm::opt::ArgStringList &CC1Args) const override;
+  void AddClangCXXStdlibIncludeArgs(
+      const llvm::opt::ArgList &DriverArgs,
+      llvm::opt::ArgStringList &CC1Args) const override;
 
   Tool *buildLinker() const override;
 };
diff --git a/contrib/llvm/tools/clang/lib/Driver/Tools.cpp b/contrib/llvm/tools/clang/lib/Driver/Tools.cpp
index b139cd47688e..1b3229a2f2ee 100644
--- a/contrib/llvm/tools/clang/lib/Driver/Tools.cpp
+++ b/contrib/llvm/tools/clang/lib/Driver/Tools.cpp
@@ -96,6 +96,14 @@ static const char *getSparcAsmModeForCPU(StringRef Name,
           .Case("niagara2", "-Av8plusb")
           .Case("niagara3", "-Av8plusd")
           .Case("niagara4", "-Av8plusd")
+          .Case("leon2", "-Av8")
+          .Case("at697e", "-Av8")
+          .Case("at697f", "-Av8")
+          .Case("leon3", "-Av8")
+          .Case("ut699", "-Av8")
+          .Case("gr712rc", "-Av8")
+          .Case("leon4", "-Av8")
+          .Case("gr740", "-Av8")
           .Default("-Av8");
   }
 }
@@ -288,13 +296,47 @@ static bool forwardToGCC(const Option &O) {
          !O.hasFlag(options::DriverOption) && !O.hasFlag(options::LinkerInput);
 }
 
+/// Add the C++ include args of other offloading toolchains. If this is a host
+/// job, the device toolchains are added. If this is a device job, the host
+/// toolchains will be added.
+static void addExtraOffloadCXXStdlibIncludeArgs(Compilation &C,
+                                                const JobAction &JA,
+                                                const ArgList &Args,
+                                                ArgStringList &CmdArgs) {
+
+  if (JA.isHostOffloading(Action::OFK_Cuda))
+    C.getSingleOffloadToolChain<Action::OFK_Cuda>()
+        ->AddClangCXXStdlibIncludeArgs(Args, CmdArgs);
+  else if (JA.isDeviceOffloading(Action::OFK_Cuda))
+    C.getSingleOffloadToolChain<Action::OFK_Host>()
+        ->AddClangCXXStdlibIncludeArgs(Args, CmdArgs);
+
+  // TODO: Add support for other programming models here.
+}
+
+/// Add the include args that are specific of each offloading programming model.
+static void addExtraOffloadSpecificIncludeArgs(Compilation &C,
+                                               const JobAction &JA,
+                                               const ArgList &Args,
+                                               ArgStringList &CmdArgs) {
+
+  if (JA.isHostOffloading(Action::OFK_Cuda))
+    C.getSingleOffloadToolChain<Action::OFK_Host>()->AddCudaIncludeArgs(
+        Args, CmdArgs);
+  else if (JA.isDeviceOffloading(Action::OFK_Cuda))
+    C.getSingleOffloadToolChain<Action::OFK_Cuda>()->AddCudaIncludeArgs(
+        Args, CmdArgs);
+
+  // TODO: Add support for other programming models here.
+}
+
 void Clang::AddPreprocessingOptions(Compilation &C, const JobAction &JA,
                                     const Driver &D, const ArgList &Args,
                                     ArgStringList &CmdArgs,
                                     const InputInfo &Output,
-                                    const InputInfoList &Inputs,
-                                    const ToolChain *AuxToolChain) const {
+                                    const InputInfoList &Inputs) const {
   Arg *A;
+  const bool IsIAMCU = getToolChain().getTriple().isOSIAMCU();
 
   CheckPreprocessingOptions(D, Args);
 
@@ -386,9 +428,74 @@ void Clang::AddPreprocessingOptions(Compilation &C, const JobAction &JA,
   // wonky, but we include looking for .gch so we can support seamless
   // replacement into a build system already set up to be generating
   // .gch files.
+  int YcIndex = -1, YuIndex = -1;
+  {
+    int AI = -1;
+    const Arg *YcArg = Args.getLastArg(options::OPT__SLASH_Yc);
+    const Arg *YuArg = Args.getLastArg(options::OPT__SLASH_Yu);
+    for (const Arg *A : Args.filtered(options::OPT_clang_i_Group)) {
+      // Walk the whole i_Group and skip non "-include" flags so that the index
+      // here matches the index in the next loop below.
+      ++AI;
+      if (!A->getOption().matches(options::OPT_include))
+        continue;
+      if (YcArg && strcmp(A->getValue(), YcArg->getValue()) == 0)
+        YcIndex = AI;
+      if (YuArg && strcmp(A->getValue(), YuArg->getValue()) == 0)
+        YuIndex = AI;
+    }
+  }
+  if (isa<PrecompileJobAction>(JA) && YcIndex != -1) {
+    Driver::InputList Inputs;
+    D.BuildInputs(getToolChain(), C.getArgs(), Inputs);
+    assert(Inputs.size() == 1 && "Need one input when building pch");
+    CmdArgs.push_back(Args.MakeArgString(Twine("-find-pch-source=") +
+                                         Inputs[0].second->getValue()));
+  }
+
   bool RenderedImplicitInclude = false;
+  int AI = -1;
   for (const Arg *A : Args.filtered(options::OPT_clang_i_Group)) {
-    if (A->getOption().matches(options::OPT_include)) {
+    ++AI;
+
+    if (getToolChain().getDriver().IsCLMode() &&
+        A->getOption().matches(options::OPT_include)) {
+      // In clang-cl mode, /Ycfoo.h means that all code up to a foo.h
+      // include is compiled into foo.h, and everything after goes into
+      // the .obj file. /Yufoo.h means that all includes prior to and including
+      // foo.h are completely skipped and replaced with a use of the pch file
+      // for foo.h.  (Each flag can have at most one value, multiple /Yc flags
+      // just mean that the last one wins.)  If /Yc and /Yu are both present
+      // and refer to the same file, /Yc wins.
+      // Note that OPT__SLASH_FI gets mapped to OPT_include.
+      // FIXME: The code here assumes that /Yc and /Yu refer to the same file.
+      // cl.exe seems to support both flags with different values, but that
+      // seems strange (which flag does /Fp now refer to?), so don't implement
+      // that until someone needs it.
+      int PchIndex = YcIndex != -1 ? YcIndex : YuIndex;
+      if (PchIndex != -1) {
+        if (isa<PrecompileJobAction>(JA)) {
+          // When building the pch, skip all includes after the pch.
+          assert(YcIndex != -1 && PchIndex == YcIndex);
+          if (AI >= YcIndex)
+            continue;
+        } else {
+          // When using the pch, skip all includes prior to the pch.
+          if (AI < PchIndex) {
+            A->claim();
+            continue;
+          }
+          if (AI == PchIndex) {
+            A->claim();
+            CmdArgs.push_back("-include-pch");
+            CmdArgs.push_back(
+                Args.MakeArgString(D.GetClPchPath(C, A->getValue())));
+            continue;
+          }
+        }
+      }
+    } else if (A->getOption().matches(options::OPT_include)) {
+      // Handling of gcc-style gch precompiled headers.
       bool IsFirstImplicitInclude = !RenderedImplicitInclude;
       RenderedImplicitInclude = true;
 
@@ -436,6 +543,13 @@ void Clang::AddPreprocessingOptions(Compilation &C, const JobAction &JA,
                                                        << A->getAsString(Args);
         }
       }
+    } else if (A->getOption().matches(options::OPT_isystem_after)) {
+      // Handling of paths which must come late.  These entries are handled by
+      // the toolchain itself after the resource dir is inserted in the right
+      // search order.
+      // Do not claim the argument so that the use of the argument does not
+      // silently go unnoticed on toolchains which do not honour the option.
+      continue;
     }
 
     // Not translated, render as usual.
@@ -485,26 +599,27 @@ void Clang::AddPreprocessingOptions(Compilation &C, const JobAction &JA,
   // OBJCPLUS_INCLUDE_PATH - system includes enabled when compiling ObjC++.
   addDirectoryList(Args, CmdArgs, "-objcxx-isystem", "OBJCPLUS_INCLUDE_PATH");
 
-  // Optional AuxToolChain indicates that we need to include headers
-  // for more than one target. If that's the case, add include paths
-  // from AuxToolChain right after include paths of the same kind for
-  // the current target.
+  // While adding the include arguments, we also attempt to retrieve the
+  // arguments of related offloading toolchains or arguments that are specific
+  // of an offloading programming model.
 
   // Add C++ include arguments, if needed.
   if (types::isCXX(Inputs[0].getType())) {
     getToolChain().AddClangCXXStdlibIncludeArgs(Args, CmdArgs);
-    if (AuxToolChain)
-      AuxToolChain->AddClangCXXStdlibIncludeArgs(Args, CmdArgs);
+    addExtraOffloadCXXStdlibIncludeArgs(C, JA, Args, CmdArgs);
   }
 
-  // Add system include arguments.
-  getToolChain().AddClangSystemIncludeArgs(Args, CmdArgs);
-  if (AuxToolChain)
-      AuxToolChain->AddClangCXXStdlibIncludeArgs(Args, CmdArgs);
+  // Add system include arguments for all targets but IAMCU.
+  if (!IsIAMCU) {
+    getToolChain().AddClangSystemIncludeArgs(Args, CmdArgs);
+    addExtraOffloadCXXStdlibIncludeArgs(C, JA, Args, CmdArgs);
+  } else {
+    // For IAMCU add special include arguments.
+    getToolChain().AddIAMCUIncludeArgs(Args, CmdArgs);
+  }
 
-  // Add CUDA include arguments, if needed.
-  if (types::isCuda(Inputs[0].getType()))
-    getToolChain().AddCudaIncludeArgs(Args, CmdArgs);
+  // Add offload include arguments, if needed.
+  addExtraOffloadSpecificIncludeArgs(C, JA, Args, CmdArgs);
 }
 
 // FIXME: Move to target hook.
@@ -698,6 +813,7 @@ arm::FloatABI arm::getARMFloatABI(const ToolChain &TC, const ArgList &Args) {
     case llvm::Triple::TvOS: {
       // Darwin defaults to "softfp" for v6 and v7.
       ABI = (SubArch == 6 || SubArch == 7) ? FloatABI::SoftFP : FloatABI::Soft;
+      ABI = Triple.isWatchABI() ? FloatABI::Hard : ABI;
       break;
     }
     case llvm::Triple::WatchOS:
@@ -724,10 +840,12 @@ arm::FloatABI arm::getARMFloatABI(const ToolChain &TC, const ArgList &Args) {
     default:
       switch (Triple.getEnvironment()) {
       case llvm::Triple::GNUEABIHF:
+      case llvm::Triple::MuslEABIHF:
       case llvm::Triple::EABIHF:
         ABI = FloatABI::Hard;
         break;
       case llvm::Triple::GNUEABI:
+      case llvm::Triple::MuslEABI:
       case llvm::Triple::EABI:
         // EABI is always AAPCS, and if it was not marked 'hard', it's softfp
         ABI = FloatABI::SoftFP;
@@ -737,7 +855,12 @@ arm::FloatABI arm::getARMFloatABI(const ToolChain &TC, const ArgList &Args) {
         break;
       default:
         // Assume "soft", but warn the user we are guessing.
-        ABI = FloatABI::Soft;
+        if (Triple.isOSBinFormatMachO() &&
+            Triple.getSubArch() == llvm::Triple::ARMSubArch_v7em)
+          ABI = FloatABI::Hard;
+        else
+          ABI = FloatABI::Soft;
+
         if (Triple.getOS() != llvm::Triple::UnknownOS ||
             !Triple.isOSBinFormatMachO())
           D.Diag(diag::warn_drv_assuming_mfloat_abi_is) << "soft";
@@ -879,10 +1002,6 @@ static void getARMTargetFeatures(const ToolChain &TC,
       Features.push_back("-crc");
   }
 
-  if (Triple.getSubArch() == llvm::Triple::SubArchType::ARMSubArch_v8_1a) {
-    Features.insert(Features.begin(), "+v8.1a");
-  }
-
   // Look for the last occurrence of -mlong-calls or -mno-long-calls. If
   // neither options are specified, see if we are compiling for kernel/kext and
   // decide whether to pass "+long-calls" based on the OS and its version.
@@ -904,6 +1023,10 @@ static void getARMTargetFeatures(const ToolChain &TC,
       // No v6M core supports unaligned memory access (v6M ARM ARM A3.2).
       if (Triple.getSubArch() == llvm::Triple::SubArchType::ARMSubArch_v6m)
         D.Diag(diag::err_target_unsupported_unaligned) << "v6m";
+      // v8M Baseline follows on from v6M, so doesn't support unaligned memory
+      // access either.
+      else if (Triple.getSubArch() == llvm::Triple::SubArchType::ARMSubArch_v8m_baseline)
+        D.Diag(diag::err_target_unsupported_unaligned) << "v8m.base";
     } else
       Features.push_back("+strict-align");
   } else {
@@ -954,7 +1077,7 @@ void Clang::AddARMTargetArgs(const llvm::Triple &Triple, const ArgList &Args,
   } else if (Triple.isOSBinFormatMachO()) {
     if (useAAPCSForMachO(Triple)) {
       ABIName = "aapcs";
-    } else if (Triple.isWatchOS()) {
+    } else if (Triple.isWatchABI()) {
       ABIName = "aapcs16";
     } else {
       ABIName = "apcs-gnu";
@@ -968,6 +1091,8 @@ void Clang::AddARMTargetArgs(const llvm::Triple &Triple, const ArgList &Args,
     case llvm::Triple::Android:
     case llvm::Triple::GNUEABI:
     case llvm::Triple::GNUEABIHF:
+    case llvm::Triple::MuslEABI:
+    case llvm::Triple::MuslEABIHF:
       ABIName = "aapcs-linux";
       break;
     case llvm::Triple::EABIHF:
@@ -1112,8 +1237,10 @@ void mips::getMipsCPUAndABI(const ArgList &Args, const llvm::Triple &Triple,
   }
 
   // MIPS64r6 is the default for Android MIPS64 (mips64el-linux-android).
-  if (Triple.isAndroid())
+  if (Triple.isAndroid()) {
+    DefMips32CPU = "mips32";
     DefMips64CPU = "mips64r6";
+  }
 
   // MIPS3 is the default for mips64*-unknown-openbsd.
   if (Triple.getOS() == llvm::Triple::OpenBSD)
@@ -1148,6 +1275,30 @@ void mips::getMipsCPUAndABI(const ArgList &Args, const llvm::Triple &Triple,
     }
   }
 
+  if (ABIName.empty() &&
+      (Triple.getVendor() == llvm::Triple::MipsTechnologies ||
+       Triple.getVendor() == llvm::Triple::ImaginationTechnologies)) {
+    ABIName = llvm::StringSwitch<const char *>(CPUName)
+                  .Case("mips1", "o32")
+                  .Case("mips2", "o32")
+                  .Case("mips3", "n64")
+                  .Case("mips4", "n64")
+                  .Case("mips5", "n64")
+                  .Case("mips32", "o32")
+                  .Case("mips32r2", "o32")
+                  .Case("mips32r3", "o32")
+                  .Case("mips32r5", "o32")
+                  .Case("mips32r6", "o32")
+                  .Case("mips64", "n64")
+                  .Case("mips64r2", "n64")
+                  .Case("mips64r3", "n64")
+                  .Case("mips64r5", "n64")
+                  .Case("mips64r6", "n64")
+                  .Case("octeon", "n64")
+                  .Case("p5600", "o32")
+                  .Default("");
+  }
+
   if (ABIName.empty()) {
     // Deduce ABI name from the target triple.
     if (Triple.getArch() == llvm::Triple::mips ||
@@ -1160,7 +1311,7 @@ void mips::getMipsCPUAndABI(const ArgList &Args, const llvm::Triple &Triple,
   if (CPUName.empty()) {
     // Deduce CPU name from ABI name.
     CPUName = llvm::StringSwitch<const char *>(ABIName)
-                  .Cases("o32", "eabi", DefMips32CPU)
+                  .Case("o32", DefMips32CPU)
                   .Cases("n32", "n64", DefMips64CPU)
                   .Default("");
   }
@@ -1286,8 +1437,9 @@ static void getMIPSTargetFeatures(const Driver &D, const llvm::Triple &Triple,
   AddTargetFeature(Args, Features, options::OPT_mmsa, options::OPT_mno_msa,
                    "msa");
 
-  // Add the last -mfp32/-mfpxx/-mfp64 or if none are given and the ABI is O32
-  // pass -mfpxx
+  // Add the last -mfp32/-mfpxx/-mfp64, if none are given and the ABI is O32
+  // pass -mfpxx, or if none are given and fp64a is default, pass fp64 and
+  // nooddspreg.
   if (Arg *A = Args.getLastArg(options::OPT_mfp32, options::OPT_mfpxx,
                                options::OPT_mfp64)) {
     if (A->getOption().matches(options::OPT_mfp32))
@@ -1300,6 +1452,9 @@ static void getMIPSTargetFeatures(const Driver &D, const llvm::Triple &Triple,
   } else if (mips::shouldUseFPXX(Args, Triple, CPUName, ABIName, FloatABI)) {
     Features.push_back(Args.MakeArgString("+fpxx"));
     Features.push_back(Args.MakeArgString("+nooddspreg"));
+  } else if (mips::isFP64ADefault(Triple, CPUName)) {
+    Features.push_back(Args.MakeArgString("+fp64"));
+    Features.push_back(Args.MakeArgString("+nooddspreg"));
   }
 
   AddTargetFeature(Args, Features, options::OPT_mno_odd_spreg,
@@ -1359,6 +1514,19 @@ void Clang::AddMIPSTargetArgs(const ArgList &Args,
     CmdArgs.push_back(Args.MakeArgString("-mips-ssection-threshold=" + v));
     A->claim();
   }
+
+  if (Arg *A = Args.getLastArg(options::OPT_mcompact_branches_EQ)) {
+    StringRef Val = StringRef(A->getValue());
+    if (mips::hasCompactBranches(CPUName)) {
+      if (Val == "never" || Val == "always" || Val == "optimal") {
+        CmdArgs.push_back("-mllvm");
+        CmdArgs.push_back(Args.MakeArgString("-mips-compact-branches=" + Val));
+      } else
+        D.Diag(diag::err_drv_unsupported_option_argument)
+            << A->getOption().getName() << Val;
+    } else
+      D.Diag(diag::warn_target_unsupported_compact_branches) << CPUName;
+  }
 }
 
 /// getPPCTargetCPU - Get the (LLVM) name of the PowerPC cpu we are targeting.
@@ -1408,6 +1576,7 @@ static std::string getPPCTargetCPU(const ArgList &Args) {
         .Case("power6x", "pwr6x")
         .Case("power7", "pwr7")
         .Case("power8", "pwr8")
+        .Case("power9", "pwr9")
         .Case("pwr3", "pwr3")
         .Case("pwr4", "pwr4")
         .Case("pwr5", "pwr5")
@@ -1416,6 +1585,7 @@ static std::string getPPCTargetCPU(const ArgList &Args) {
         .Case("pwr6x", "pwr6x")
         .Case("pwr7", "pwr7")
         .Case("pwr8", "pwr8")
+        .Case("pwr9", "pwr9")
         .Case("powerpc", "ppc")
         .Case("powerpc64", "ppc64")
         .Case("powerpc64le", "ppc64le")
@@ -1431,15 +1601,8 @@ static void getPPCTargetFeatures(const Driver &D, const llvm::Triple &Triple,
   handleTargetFeaturesGroup(Args, Features, options::OPT_m_ppc_Features_Group);
 
   ppc::FloatABI FloatABI = ppc::getPPCFloatABI(D, Args);
-  if (FloatABI == ppc::FloatABI::Soft &&
-      !(Triple.getArch() == llvm::Triple::ppc64 ||
-        Triple.getArch() == llvm::Triple::ppc64le))
-    Features.push_back("+soft-float");
-  else if (FloatABI == ppc::FloatABI::Soft &&
-           (Triple.getArch() == llvm::Triple::ppc64 ||
-            Triple.getArch() == llvm::Triple::ppc64le))
-    D.Diag(diag::err_drv_invalid_mfloat_abi)
-        << "soft float is not supported for ppc64";
+  if (FloatABI == ppc::FloatABI::Soft)
+    Features.push_back("-hard-float");
 
   // Altivec is a bit weird, allow overriding of the Altivec feature here.
   AddTargetFeature(Args, Features, options::OPT_faltivec,
@@ -1554,27 +1717,77 @@ static std::string getR600TargetGPU(const ArgList &Args) {
   return "";
 }
 
-void Clang::AddSparcTargetArgs(const ArgList &Args,
-                               ArgStringList &CmdArgs) const {
-  const Driver &D = getToolChain().getDriver();
-  std::string Triple = getToolChain().ComputeEffectiveClangTriple(Args);
+static std::string getLanaiTargetCPU(const ArgList &Args) {
+  if (Arg *A = Args.getLastArg(options::OPT_mcpu_EQ)) {
+    return A->getValue();
+  }
+  return "";
+}
 
-  bool SoftFloatABI = false;
+sparc::FloatABI sparc::getSparcFloatABI(const Driver &D,
+                                        const ArgList &Args) {
+  sparc::FloatABI ABI = sparc::FloatABI::Invalid;
   if (Arg *A =
-          Args.getLastArg(options::OPT_msoft_float, options::OPT_mhard_float)) {
+          Args.getLastArg(options::OPT_msoft_float, options::OPT_mhard_float,
+                          options::OPT_mfloat_abi_EQ)) {
     if (A->getOption().matches(options::OPT_msoft_float))
-      SoftFloatABI = true;
+      ABI = sparc::FloatABI::Soft;
+    else if (A->getOption().matches(options::OPT_mhard_float))
+      ABI = sparc::FloatABI::Hard;
+    else {
+      ABI = llvm::StringSwitch<sparc::FloatABI>(A->getValue())
+                .Case("soft", sparc::FloatABI::Soft)
+                .Case("hard", sparc::FloatABI::Hard)
+                .Default(sparc::FloatABI::Invalid);
+      if (ABI == sparc::FloatABI::Invalid &&
+          !StringRef(A->getValue()).empty()) {
+        D.Diag(diag::err_drv_invalid_mfloat_abi) << A->getAsString(Args);
+        ABI = sparc::FloatABI::Hard;
+      }
+    }
   }
 
+  // If unspecified, choose the default based on the platform.
   // Only the hard-float ABI on Sparc is standardized, and it is the
-  // default. GCC also supports a nonstandard soft-float ABI mode, and
-  // perhaps LLVM should implement that, too. However, since llvm
-  // currently does not support Sparc soft-float, at all, display an
-  // error if it's requested.
-  if (SoftFloatABI) {
-    D.Diag(diag::err_drv_unsupported_opt_for_target) << "-msoft-float"
-                                                     << Triple;
+  // default. GCC also supports a nonstandard soft-float ABI mode, also
+  // implemented in LLVM. However as this is not standard we set the default
+  // to be hard-float.
+  if (ABI == sparc::FloatABI::Invalid) {
+    ABI = sparc::FloatABI::Hard;
   }
+
+  return ABI;
+}
+
+static void getSparcTargetFeatures(const Driver &D, const ArgList &Args,
+                                 std::vector<const char *> &Features) {
+  sparc::FloatABI FloatABI = sparc::getSparcFloatABI(D, Args);
+  if (FloatABI == sparc::FloatABI::Soft)
+    Features.push_back("+soft-float");
+}
+
+void Clang::AddSparcTargetArgs(const ArgList &Args,
+                               ArgStringList &CmdArgs) const {
+  sparc::FloatABI FloatABI =
+      sparc::getSparcFloatABI(getToolChain().getDriver(), Args);
+
+  if (FloatABI == sparc::FloatABI::Soft) {
+    // Floating point operations and argument passing are soft.
+    CmdArgs.push_back("-msoft-float");
+    CmdArgs.push_back("-mfloat-abi");
+    CmdArgs.push_back("soft");
+  } else {
+    // Floating point operations and argument passing are hard.
+    assert(FloatABI == sparc::FloatABI::Hard && "Invalid float abi!");
+    CmdArgs.push_back("-mfloat-abi");
+    CmdArgs.push_back("hard");
+  }
+}
+
+void Clang::AddSystemZTargetArgs(const ArgList &Args,
+                                 ArgStringList &CmdArgs) const {
+  if (Args.hasFlag(options::OPT_mbackchain, options::OPT_mno_backchain, false))
+    CmdArgs.push_back("-mbackchain");
 }
 
 static const char *getSystemZTargetCPU(const ArgList &Args) {
@@ -1771,6 +1984,9 @@ static std::string getCPUName(const ArgList &Args, const llvm::Triple &T,
     return "hexagon" +
            toolchains::HexagonToolChain::GetTargetCPUVersion(Args).str();
 
+  case llvm::Triple::lanai:
+    return getLanaiTargetCPU(Args);
+
   case llvm::Triple::systemz:
     return getSystemZTargetCPU(Args);
 
@@ -1817,6 +2033,17 @@ static void AddGoldPlugin(const ToolChain &ToolChain, const ArgList &Args,
 
   if (IsThinLTO)
     CmdArgs.push_back("-plugin-opt=thinlto");
+
+  // If an explicit debugger tuning argument appeared, pass it along.
+  if (Arg *A = Args.getLastArg(options::OPT_gTune_Group,
+                               options::OPT_ggdbN_Group)) {
+    if (A->getOption().matches(options::OPT_glldb))
+      CmdArgs.push_back("-plugin-opt=-debugger-tune=lldb");
+    else if (A->getOption().matches(options::OPT_gsce))
+      CmdArgs.push_back("-plugin-opt=-debugger-tune=sce");
+    else
+      CmdArgs.push_back("-plugin-opt=-debugger-tune=gdb");
+  }
 }
 
 /// This is a helper function for validating the optional refinement step
@@ -2045,6 +2272,13 @@ void Clang::AddX86TargetArgs(const ArgList &Args,
           << A->getOption().getName() << Value;
     }
   }
+
+  // Set flags to support MCU ABI.
+  if (Args.hasFlag(options::OPT_miamcu, options::OPT_mno_iamcu, false)) {
+    CmdArgs.push_back("-mfloat-abi");
+    CmdArgs.push_back("soft");
+    CmdArgs.push_back("-mstack-alignment=4");
+  }
 }
 
 void Clang::AddHexagonTargetArgs(const ArgList &Args,
@@ -2069,6 +2303,29 @@ void Clang::AddHexagonTargetArgs(const ArgList &Args,
   CmdArgs.push_back("-machine-sink-split=0");
 }
 
+void Clang::AddLanaiTargetArgs(const ArgList &Args,
+                               ArgStringList &CmdArgs) const {
+  if (Arg *A = Args.getLastArg(options::OPT_mcpu_EQ)) {
+    StringRef CPUName = A->getValue();
+
+    CmdArgs.push_back("-target-cpu");
+    CmdArgs.push_back(Args.MakeArgString(CPUName));
+  }
+  if (Arg *A = Args.getLastArg(options::OPT_mregparm_EQ)) {
+    StringRef Value = A->getValue();
+    // Only support mregparm=4 to support old usage. Report error for all other
+    // cases.
+    int Mregparm;
+    if (Value.getAsInteger(10, Mregparm)) {
+      if (Mregparm != 4) {
+        getToolChain().getDriver().Diag(
+            diag::err_drv_unsupported_option_argument)
+            << A->getOption().getName() << Value;
+      }
+    }
+  }
+}
+
 void Clang::AddWebAssemblyTargetArgs(const ArgList &Args,
                                      ArgStringList &CmdArgs) const {
   // Default to "hidden" visibility.
@@ -2093,12 +2350,14 @@ static bool DecodeAArch64Features(const Driver &D, StringRef text,
                              .Case("crypto", "+crypto")
                              .Case("fp16", "+fullfp16")
                              .Case("profile", "+spe")
+                             .Case("ras", "+ras")
                              .Case("nofp", "-fp-armv8")
                              .Case("nosimd", "-neon")
                              .Case("nocrc", "-crc")
                              .Case("nocrypto", "-crypto")
                              .Case("nofp16", "-fullfp16")
                              .Case("noprofile", "-spe")
+                             .Case("noras", "-ras")
                              .Default(nullptr);
     if (result)
       Features.push_back(result);
@@ -2116,11 +2375,15 @@ static bool DecodeAArch64Mcpu(const Driver &D, StringRef Mcpu, StringRef &CPU,
                               std::vector<const char *> &Features) {
   std::pair<StringRef, StringRef> Split = Mcpu.split("+");
   CPU = Split.first;
-  if (CPU == "cyclone" || CPU == "cortex-a53" || CPU == "cortex-a57" ||
-      CPU == "cortex-a72" || CPU == "cortex-a35" || CPU == "exynos-m1") {
+  if (CPU == "cortex-a53" || CPU == "cortex-a57" ||
+      CPU == "cortex-a72" || CPU == "cortex-a35" || CPU == "exynos-m1" ||
+      CPU == "kryo"       || CPU == "cortex-a73" || CPU == "vulcan") {
     Features.push_back("+neon");
     Features.push_back("+crc");
     Features.push_back("+crypto");
+  } else if (CPU == "cyclone") {
+    Features.push_back("+neon");
+    Features.push_back("+crypto");
   } else if (CPU == "generic") {
     Features.push_back("+neon");
   } else {
@@ -2277,6 +2540,23 @@ static void getWebAssemblyTargetFeatures(const ArgList &Args,
   handleTargetFeaturesGroup(Args, Features, options::OPT_m_wasm_Features_Group);
 }
 
+static void getAMDGPUTargetFeatures(const Driver &D, const ArgList &Args,
+                                    std::vector<const char *> &Features) {
+  if (const Arg *dAbi = Args.getLastArg(options::OPT_mamdgpu_debugger_abi)) {
+    StringRef value = dAbi->getValue();
+    if (value == "1.0") {
+      Features.push_back("+amdgpu-debugger-insert-nops");
+      Features.push_back("+amdgpu-debugger-reserve-regs");
+      Features.push_back("+amdgpu-debugger-emit-prologue");
+    } else {
+      D.Diag(diag::err_drv_clang_unsupported) << dAbi->getAsString(Args);
+    }
+  }
+
+  handleTargetFeaturesGroup(
+    Args, Features, options::OPT_m_amdgpu_Features_Group);
+}
+
 static void getTargetFeatures(const ToolChain &TC, const llvm::Triple &Triple,
                               const ArgList &Args, ArgStringList &CmdArgs,
                               bool ForAS) {
@@ -2321,6 +2601,15 @@ static void getTargetFeatures(const ToolChain &TC, const llvm::Triple &Triple,
   case llvm::Triple::wasm32:
   case llvm::Triple::wasm64:
     getWebAssemblyTargetFeatures(Args, Features);
+    break; 
+  case llvm::Triple::sparc:
+  case llvm::Triple::sparcel:
+  case llvm::Triple::sparcv9:
+    getSparcTargetFeatures(D, Args, Features);
+    break;
+  case llvm::Triple::r600:
+  case llvm::Triple::amdgcn:
+    getAMDGPUTargetFeatures(D, Args, Features);
     break;
   }
 
@@ -2401,11 +2690,9 @@ static void addExceptionArgs(const ArgList &Args, types::ID InputType,
   }
 
   if (types::isCXX(InputType)) {
-    // Disable C++ EH by default on XCore, PS4, and MSVC.
-    // FIXME: Remove MSVC from this list once things work.
-    bool CXXExceptionsEnabled = Triple.getArch() != llvm::Triple::xcore &&
-                                !Triple.isPS4CPU() &&
-                                !Triple.isWindowsMSVCEnvironment();
+    // Disable C++ EH by default on XCore and PS4.
+    bool CXXExceptionsEnabled =
+        Triple.getArch() != llvm::Triple::xcore && !Triple.isPS4CPU();
     Arg *ExceptionArg = Args.getLastArg(
         options::OPT_fcxx_exceptions, options::OPT_fno_cxx_exceptions,
         options::OPT_fexceptions, options::OPT_fno_exceptions);
@@ -2464,8 +2751,8 @@ static bool ContainsCompileAction(const Action *A) {
   if (isa<CompileJobAction>(A) || isa<BackendJobAction>(A))
     return true;
 
-  for (const auto &Act : *A)
-    if (ContainsCompileAction(Act))
+  for (const auto &AI : A->inputs())
+    if (ContainsCompileAction(AI))
       return true;
 
   return false;
@@ -2495,16 +2782,16 @@ static bool UseRelaxAll(Compilation &C, const ArgList &Args) {
 
 // Convert an arg of the form "-gN" or "-ggdbN" or one of their aliases
 // to the corresponding DebugInfoKind.
-static CodeGenOptions::DebugInfoKind DebugLevelToInfoKind(const Arg &A) {
+static codegenoptions::DebugInfoKind DebugLevelToInfoKind(const Arg &A) {
   assert(A.getOption().matches(options::OPT_gN_Group) &&
          "Not a -g option that specifies a debug-info level");
   if (A.getOption().matches(options::OPT_g0) ||
       A.getOption().matches(options::OPT_ggdb0))
-    return CodeGenOptions::NoDebugInfo;
+    return codegenoptions::NoDebugInfo;
   if (A.getOption().matches(options::OPT_gline_tables_only) ||
       A.getOption().matches(options::OPT_ggdb1))
-    return CodeGenOptions::DebugLineTablesOnly;
-  return CodeGenOptions::LimitedDebugInfo;
+    return codegenoptions::DebugLineTablesOnly;
+  return codegenoptions::LimitedDebugInfo;
 }
 
 // Extract the integer N from a string spelled "-dwarf-N", returning 0
@@ -2520,17 +2807,17 @@ static unsigned DwarfVersionNum(StringRef ArgValue) {
 }
 
 static void RenderDebugEnablingArgs(const ArgList &Args, ArgStringList &CmdArgs,
-                                    CodeGenOptions::DebugInfoKind DebugInfoKind,
+                                    codegenoptions::DebugInfoKind DebugInfoKind,
                                     unsigned DwarfVersion,
                                     llvm::DebuggerKind DebuggerTuning) {
   switch (DebugInfoKind) {
-  case CodeGenOptions::DebugLineTablesOnly:
+  case codegenoptions::DebugLineTablesOnly:
     CmdArgs.push_back("-debug-info-kind=line-tables-only");
     break;
-  case CodeGenOptions::LimitedDebugInfo:
+  case codegenoptions::LimitedDebugInfo:
     CmdArgs.push_back("-debug-info-kind=limited");
     break;
-  case CodeGenOptions::FullDebugInfo:
+  case codegenoptions::FullDebugInfo:
     CmdArgs.push_back("-debug-info-kind=standalone");
     break;
   default:
@@ -2580,6 +2867,9 @@ static void CollectArgsForIntegratedAssembler(Compilation &C,
   // When using an integrated assembler, translate -Wa, and -Xassembler
   // options.
   bool CompressDebugSections = false;
+
+  bool UseRelaxRelocations = ENABLE_X86_RELAX_RELOCATIONS;
+  const char *MipsTargetFeature = nullptr;
   for (const Arg *A :
        Args.filtered(options::OPT_Wa_COMMA, options::OPT_Xassembler)) {
     A->claim();
@@ -2618,7 +2908,26 @@ static void CollectArgsForIntegratedAssembler(Compilation &C,
           CmdArgs.push_back("-soft-float");
           continue;
         }
-        break;
+
+        MipsTargetFeature = llvm::StringSwitch<const char *>(Value)
+                                .Case("-mips1", "+mips1")
+                                .Case("-mips2", "+mips2")
+                                .Case("-mips3", "+mips3")
+                                .Case("-mips4", "+mips4")
+                                .Case("-mips5", "+mips5")
+                                .Case("-mips32", "+mips32")
+                                .Case("-mips32r2", "+mips32r2")
+                                .Case("-mips32r3", "+mips32r3")
+                                .Case("-mips32r5", "+mips32r5")
+                                .Case("-mips32r6", "+mips32r6")
+                                .Case("-mips64", "+mips64")
+                                .Case("-mips64r2", "+mips64r2")
+                                .Case("-mips64r3", "+mips64r3")
+                                .Case("-mips64r5", "+mips64r5")
+                                .Case("-mips64r6", "+mips64r6")
+                                .Default(nullptr);
+        if (MipsTargetFeature)
+          continue;
       }
 
       if (Value == "-force_cpusubtype_ALL") {
@@ -2635,6 +2944,12 @@ static void CollectArgsForIntegratedAssembler(Compilation &C,
       } else if (Value == "-nocompress-debug-sections" ||
                  Value == "--nocompress-debug-sections") {
         CompressDebugSections = false;
+      } else if (Value == "-mrelax-relocations=yes" ||
+                 Value == "--mrelax-relocations=yes") {
+        UseRelaxRelocations = true;
+      } else if (Value == "-mrelax-relocations=no" ||
+                 Value == "--mrelax-relocations=no") {
+        UseRelaxRelocations = false;
       } else if (Value.startswith("-I")) {
         CmdArgs.push_back(Value.data());
         // We need to consume the next argument if the current arg is a plain
@@ -2647,9 +2962,9 @@ static void CollectArgsForIntegratedAssembler(Compilation &C,
         if (DwarfVersion == 0) { // Send it onward, and let cc1as complain.
           CmdArgs.push_back(Value.data());
         } else {
-          RenderDebugEnablingArgs(
-              Args, CmdArgs, CodeGenOptions::LimitedDebugInfo, DwarfVersion,
-              llvm::DebuggerKind::Default);
+          RenderDebugEnablingArgs(Args, CmdArgs,
+                                  codegenoptions::LimitedDebugInfo,
+                                  DwarfVersion, llvm::DebuggerKind::Default);
         }
       } else if (Value.startswith("-mcpu") || Value.startswith("-mfpu") ||
                  Value.startswith("-mhwdiv") || Value.startswith("-march")) {
@@ -2666,6 +2981,12 @@ static void CollectArgsForIntegratedAssembler(Compilation &C,
     else
       D.Diag(diag::warn_debug_compression_unavailable);
   }
+  if (UseRelaxRelocations)
+    CmdArgs.push_back("--mrelax-relocations");
+  if (MipsTargetFeature != nullptr) {
+    CmdArgs.push_back("-target-feature");
+    CmdArgs.push_back(MipsTargetFeature);
+  }
 }
 
 // This adds the static libclang_rt.builtins-arch.a directly to the command line
@@ -2749,12 +3070,12 @@ static void addOpenMPRuntime(ArgStringList &CmdArgs, const ToolChain &TC,
 
 static void addSanitizerRuntime(const ToolChain &TC, const ArgList &Args,
                                 ArgStringList &CmdArgs, StringRef Sanitizer,
-                                bool IsShared) {
-  // Static runtimes must be forced into executable, so we wrap them in
+                                bool IsShared, bool IsWhole) {
+  // Wrap any static runtimes that must be forced into executable in
   // whole-archive.
-  if (!IsShared) CmdArgs.push_back("-whole-archive");
+  if (IsWhole) CmdArgs.push_back("-whole-archive");
   CmdArgs.push_back(TC.getCompilerRTArgString(Args, Sanitizer, IsShared));
-  if (!IsShared) CmdArgs.push_back("-no-whole-archive");
+  if (IsWhole) CmdArgs.push_back("-no-whole-archive");
 }
 
 // Tries to use a file with the list of dynamic symbols that need to be exported
@@ -2787,12 +3108,17 @@ static void
 collectSanitizerRuntimes(const ToolChain &TC, const ArgList &Args,
                          SmallVectorImpl<StringRef> &SharedRuntimes,
                          SmallVectorImpl<StringRef> &StaticRuntimes,
-                         SmallVectorImpl<StringRef> &HelperStaticRuntimes) {
+                         SmallVectorImpl<StringRef> &NonWholeStaticRuntimes,
+                         SmallVectorImpl<StringRef> &HelperStaticRuntimes,
+                         SmallVectorImpl<StringRef> &RequiredSymbols) {
   const SanitizerArgs &SanArgs = TC.getSanitizerArgs();
   // Collect shared runtimes.
   if (SanArgs.needsAsanRt() && SanArgs.needsSharedAsanRt()) {
     SharedRuntimes.push_back("asan");
   }
+  // The stats_client library is also statically linked into DSOs.
+  if (SanArgs.needsStatsRt())
+    StaticRuntimes.push_back("stats_client");
 
   // Collect static runtimes.
   if (Args.hasArg(options::OPT_shared) || TC.getTriple().isAndroid()) {
@@ -2831,8 +3157,17 @@ collectSanitizerRuntimes(const ToolChain &TC, const ArgList &Args,
     StaticRuntimes.push_back("safestack");
   if (SanArgs.needsCfiRt())
     StaticRuntimes.push_back("cfi");
-  if (SanArgs.needsCfiDiagRt())
+  if (SanArgs.needsCfiDiagRt()) {
     StaticRuntimes.push_back("cfi_diag");
+    if (SanArgs.linkCXXRuntimes())
+      StaticRuntimes.push_back("ubsan_standalone_cxx");
+  }
+  if (SanArgs.needsStatsRt()) {
+    NonWholeStaticRuntimes.push_back("stats");
+    RequiredSymbols.push_back("__sanitizer_stats_register");
+  }
+  if (SanArgs.needsEsanRt())
+    StaticRuntimes.push_back("esan");
 }
 
 // Should be called before we add system libraries (C++ ABI, libstdc++/libc++,
@@ -2840,18 +3175,27 @@ collectSanitizerRuntimes(const ToolChain &TC, const ArgList &Args,
 static bool addSanitizerRuntimes(const ToolChain &TC, const ArgList &Args,
                                  ArgStringList &CmdArgs) {
   SmallVector<StringRef, 4> SharedRuntimes, StaticRuntimes,
-      HelperStaticRuntimes;
+      NonWholeStaticRuntimes, HelperStaticRuntimes, RequiredSymbols;
   collectSanitizerRuntimes(TC, Args, SharedRuntimes, StaticRuntimes,
-                           HelperStaticRuntimes);
+                           NonWholeStaticRuntimes, HelperStaticRuntimes,
+                           RequiredSymbols);
   for (auto RT : SharedRuntimes)
-    addSanitizerRuntime(TC, Args, CmdArgs, RT, true);
+    addSanitizerRuntime(TC, Args, CmdArgs, RT, true, false);
   for (auto RT : HelperStaticRuntimes)
-    addSanitizerRuntime(TC, Args, CmdArgs, RT, false);
+    addSanitizerRuntime(TC, Args, CmdArgs, RT, false, true);
   bool AddExportDynamic = false;
   for (auto RT : StaticRuntimes) {
-    addSanitizerRuntime(TC, Args, CmdArgs, RT, false);
+    addSanitizerRuntime(TC, Args, CmdArgs, RT, false, true);
     AddExportDynamic |= !addSanitizerDynamicList(TC, Args, CmdArgs, RT);
   }
+  for (auto RT : NonWholeStaticRuntimes) {
+    addSanitizerRuntime(TC, Args, CmdArgs, RT, false, false);
+    AddExportDynamic |= !addSanitizerDynamicList(TC, Args, CmdArgs, RT);
+  }
+  for (auto S : RequiredSymbols) {
+    CmdArgs.push_back("-u");
+    CmdArgs.push_back(Args.MakeArgString(S));
+  }
   // If there is a static runtime with no dynamic list, force all the symbols
   // to be dynamic to be sure we export sanitizer interface functions.
   if (AddExportDynamic)
@@ -2859,6 +3203,33 @@ static bool addSanitizerRuntimes(const ToolChain &TC, const ArgList &Args,
   return !StaticRuntimes.empty();
 }
 
+static bool addXRayRuntime(const ToolChain &TC, const ArgList &Args,
+                           ArgStringList &CmdArgs) {
+  if (Args.hasFlag(options::OPT_fxray_instrument,
+                   options::OPT_fnoxray_instrument, false)) {
+    CmdArgs.push_back("-whole-archive");
+    CmdArgs.push_back(TC.getCompilerRTArgString(Args, "xray", false));
+    CmdArgs.push_back("-no-whole-archive");
+    return true;
+  }
+  return false;
+}
+
+static void linkXRayRuntimeDeps(const ToolChain &TC, const ArgList &Args,
+                                ArgStringList &CmdArgs) {
+  CmdArgs.push_back("--no-as-needed");
+  CmdArgs.push_back("-lpthread");
+  CmdArgs.push_back("-lrt");
+  CmdArgs.push_back("-lm");
+  CmdArgs.push_back("-latomic");
+  if (TC.GetCXXStdlibType(Args) == ToolChain::CST_Libcxx)
+    CmdArgs.push_back("-lc++");
+  else
+    CmdArgs.push_back("-lstdc++");
+  if (TC.getTriple().getOS() != llvm::Triple::FreeBSD)
+    CmdArgs.push_back("-ldl");
+}
+
 static bool areOptimizationsEnabled(const ArgList &Args) {
   // Find the last -O arg and see if it is non-zero.
   if (Arg *A = Args.getLastArg(options::OPT_O_Group))
@@ -2880,7 +3251,7 @@ static bool shouldUseFramePointerForTarget(const ArgList &Args,
     break;
   }
 
-  if (Triple.isOSLinux()) {
+  if (Triple.isOSLinux() || Triple.getOS() == llvm::Triple::CloudABI) {
     switch (Triple.getArch()) {
     // Don't use a frame pointer on linux if optimizing for certain targets.
     case llvm::Triple::mips64:
@@ -2900,6 +3271,8 @@ static bool shouldUseFramePointerForTarget(const ArgList &Args,
     switch (Triple.getArch()) {
     case llvm::Triple::x86:
       return !areOptimizationsEnabled(Args);
+    case llvm::Triple::x86_64:
+      return Triple.isOSBinFormatMachO();
     case llvm::Triple::arm:
     case llvm::Triple::thumb:
       // Windows on ARM builds with FPO disabled to aid fast stack walking
@@ -3092,7 +3465,7 @@ static void appendUserToPath(SmallVectorImpl<char> &Result) {
   Result.append(UID.begin(), UID.end());
 }
 
-VersionTuple visualstudio::getMSVCVersion(const Driver *D,
+VersionTuple visualstudio::getMSVCVersion(const Driver *D, const ToolChain &TC,
                                           const llvm::Triple &Triple,
                                           const llvm::opt::ArgList &Args,
                                           bool IsWindowsMSVC) {
@@ -3134,7 +3507,14 @@ VersionTuple visualstudio::getMSVCVersion(const Driver *D,
     if (Major || Minor || Micro)
       return VersionTuple(Major, Minor, Micro);
 
-    return VersionTuple(18);
+    if (IsWindowsMSVC) {
+      VersionTuple MSVT = TC.getMSVCVersionFromExe();
+      if (!MSVT.empty())
+        return MSVT;
+
+      // FIXME: Consider bumping this to 19 (MSVC2015) soon.
+      return VersionTuple(18);
+    }
   }
   return VersionTuple();
 }
@@ -3142,16 +3522,27 @@ VersionTuple visualstudio::getMSVCVersion(const Driver *D,
 static void addPGOAndCoverageFlags(Compilation &C, const Driver &D,
                                    const InputInfo &Output, const ArgList &Args,
                                    ArgStringList &CmdArgs) {
+
+  auto *PGOGenerateArg = Args.getLastArg(options::OPT_fprofile_generate,
+                                         options::OPT_fprofile_generate_EQ,
+                                         options::OPT_fno_profile_generate);
+  if (PGOGenerateArg &&
+      PGOGenerateArg->getOption().matches(options::OPT_fno_profile_generate))
+    PGOGenerateArg = nullptr;
+
   auto *ProfileGenerateArg = Args.getLastArg(
       options::OPT_fprofile_instr_generate,
-      options::OPT_fprofile_instr_generate_EQ, options::OPT_fprofile_generate,
-      options::OPT_fprofile_generate_EQ,
+      options::OPT_fprofile_instr_generate_EQ,
       options::OPT_fno_profile_instr_generate);
   if (ProfileGenerateArg &&
       ProfileGenerateArg->getOption().matches(
           options::OPT_fno_profile_instr_generate))
     ProfileGenerateArg = nullptr;
 
+  if (PGOGenerateArg && ProfileGenerateArg)
+    D.Diag(diag::err_drv_argument_not_allowed_with)
+        << PGOGenerateArg->getSpelling() << ProfileGenerateArg->getSpelling();
+
   auto *ProfileUseArg = Args.getLastArg(
       options::OPT_fprofile_instr_use, options::OPT_fprofile_instr_use_EQ,
       options::OPT_fprofile_use, options::OPT_fprofile_use_EQ,
@@ -3160,6 +3551,10 @@ static void addPGOAndCoverageFlags(Compilation &C, const Driver &D,
       ProfileUseArg->getOption().matches(options::OPT_fno_profile_instr_use))
     ProfileUseArg = nullptr;
 
+  if (PGOGenerateArg && ProfileUseArg)
+    D.Diag(diag::err_drv_argument_not_allowed_with)
+        << ProfileUseArg->getSpelling() << PGOGenerateArg->getSpelling();
+
   if (ProfileGenerateArg && ProfileUseArg)
     D.Diag(diag::err_drv_argument_not_allowed_with)
         << ProfileGenerateArg->getSpelling() << ProfileUseArg->getSpelling();
@@ -3167,20 +3562,27 @@ static void addPGOAndCoverageFlags(Compilation &C, const Driver &D,
   if (ProfileGenerateArg) {
     if (ProfileGenerateArg->getOption().matches(
             options::OPT_fprofile_instr_generate_EQ))
-      ProfileGenerateArg->render(Args, CmdArgs);
-    else if (ProfileGenerateArg->getOption().matches(
-                 options::OPT_fprofile_generate_EQ)) {
-      SmallString<128> Path(ProfileGenerateArg->getValue());
+      CmdArgs.push_back(Args.MakeArgString(Twine("-fprofile-instrument-path=") +
+                                           ProfileGenerateArg->getValue()));
+    // The default is to use Clang Instrumentation.
+    CmdArgs.push_back("-fprofile-instrument=clang");
+  }
+
+  if (PGOGenerateArg) {
+    CmdArgs.push_back("-fprofile-instrument=llvm");
+    if (PGOGenerateArg->getOption().matches(
+            options::OPT_fprofile_generate_EQ)) {
+      SmallString<128> Path(PGOGenerateArg->getValue());
       llvm::sys::path::append(Path, "default.profraw");
       CmdArgs.push_back(
-          Args.MakeArgString(Twine("-fprofile-instr-generate=") + Path));
-    } else
-      Args.AddAllArgs(CmdArgs, options::OPT_fprofile_instr_generate);
+          Args.MakeArgString(Twine("-fprofile-instrument-path=") + Path));
+    }
   }
 
   if (ProfileUseArg) {
     if (ProfileUseArg->getOption().matches(options::OPT_fprofile_instr_use_EQ))
-      ProfileUseArg->render(Args, CmdArgs);
+      CmdArgs.push_back(Args.MakeArgString(
+          Twine("-fprofile-instrument-use-path=") + ProfileUseArg->getValue()));
     else if ((ProfileUseArg->getOption().matches(
                   options::OPT_fprofile_use_EQ) ||
               ProfileUseArg->getOption().matches(
@@ -3190,7 +3592,7 @@ static void addPGOAndCoverageFlags(Compilation &C, const Driver &D,
       if (Path.empty() || llvm::sys::fs::is_directory(Path))
         llvm::sys::path::append(Path, "default.profdata");
       CmdArgs.push_back(
-          Args.MakeArgString(Twine("-fprofile-instr-use=") + Path));
+          Args.MakeArgString(Twine("-fprofile-instrument-use-path=") + Path));
     }
   }
 
@@ -3392,8 +3794,6 @@ ParsePICArgs(const ToolChain &ToolChain, const llvm::Triple &Triple,
 
 static const char *RelocationModelName(llvm::Reloc::Model Model) {
   switch (Model) {
-  case llvm::Reloc::Default:
-    return nullptr;
   case llvm::Reloc::Static:
     return "static";
   case llvm::Reloc::PIC_:
@@ -3432,6 +3832,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
       getToolChain().getTriple().isWindowsCygwinEnvironment();
   bool IsWindowsMSVC = getToolChain().getTriple().isWindowsMSVCEnvironment();
   bool IsPS4CPU = getToolChain().getTriple().isPS4CPU();
+  bool IsIAMCU = getToolChain().getTriple().isOSIAMCU();
 
   // Check number of inputs for sanity. We need at least one input.
   assert(Inputs.size() >= 1 && "Must have at least one input.");
@@ -3439,9 +3840,13 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
   // CUDA compilation may have multiple inputs (source file + results of
   // device-side compilations). All other jobs are expected to have exactly one
   // input.
-  bool IsCuda = types::isCuda(Input.getType());
+  bool IsCuda = JA.isOffloading(Action::OFK_Cuda);
   assert((IsCuda || Inputs.size() == 1) && "Unable to handle multiple inputs.");
 
+  // C++ is not supported for IAMCU.
+  if (IsIAMCU && types::isCXX(Input.getType()))
+    D.Diag(diag::err_drv_clang_unsupported) << "C++ for IAMCU";
+
   // Invoke ourselves in -cc1 mode.
   //
   // FIXME: Implement custom jobs for internal actions.
@@ -3451,23 +3856,21 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
   CmdArgs.push_back("-triple");
   CmdArgs.push_back(Args.MakeArgString(TripleStr));
 
-  const ToolChain *AuxToolChain = nullptr;
   if (IsCuda) {
-    // FIXME: We need a (better) way to pass information about
-    // particular compilation pass we're constructing here. For now we
-    // can check which toolchain we're using and pick the other one to
-    // extract the triple.
-    if (&getToolChain() == C.getCudaDeviceToolChain())
-      AuxToolChain = C.getCudaHostToolChain();
-    else if (&getToolChain() == C.getCudaHostToolChain())
-      AuxToolChain = C.getCudaDeviceToolChain();
+    // We have to pass the triple of the host if compiling for a CUDA device and
+    // vice-versa.
+    std::string NormalizedTriple;
+    if (JA.isDeviceOffloading(Action::OFK_Cuda))
+      NormalizedTriple = C.getSingleOffloadToolChain<Action::OFK_Host>()
+                             ->getTriple()
+                             .normalize();
     else
-      llvm_unreachable("Can't figure out CUDA compilation mode.");
-    assert(AuxToolChain != nullptr && "No aux toolchain.");
+      NormalizedTriple = C.getSingleOffloadToolChain<Action::OFK_Cuda>()
+                             ->getTriple()
+                             .normalize();
+
     CmdArgs.push_back("-aux-triple");
-    CmdArgs.push_back(Args.MakeArgString(AuxToolChain->getTriple().str()));
-    CmdArgs.push_back("-fcuda-target-overloads");
-    CmdArgs.push_back("-fcuda-disable-target-call-checks");
+    CmdArgs.push_back(Args.MakeArgString(NormalizedTriple));
   }
 
   if (Triple.isOSWindows() && (Triple.getArch() == llvm::Triple::arm ||
@@ -3566,6 +3969,17 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
     Args.AddLastArg(CmdArgs, options::OPT_fthinlto_index_EQ);
   }
 
+  // Embed-bitcode option.
+  if (C.getDriver().embedBitcodeEnabled() &&
+      (isa<BackendJobAction>(JA) || isa<AssembleJobAction>(JA))) {
+    // Add flags implied by -fembed-bitcode.
+    Args.AddLastArg(CmdArgs, options::OPT_fembed_bitcode_EQ);
+    // Disable all llvm IR level optimizations.
+    CmdArgs.push_back("-disable-llvm-optzns");
+  }
+  if (C.getDriver().embedBitcodeMarkerOnly())
+    CmdArgs.push_back("-fembed-bitcode=marker");
+
   // We normally speed up the clang process a bit by skipping destructors at
   // exit, but when we're generating diagnostics we can rely on some of the
   // cleanup.
@@ -3575,6 +3989,8 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
 // Disable the verification pass in -asserts builds.
 #ifdef NDEBUG
   CmdArgs.push_back("-disable-llvm-verifier");
+  // Discard LLVM value names in -asserts builds.
+  CmdArgs.push_back("-discard-value-names");
 #endif
 
   // Set the main file name, so that debug info works even with
@@ -3600,8 +4016,17 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
     if (!Args.hasArg(options::OPT__analyzer_no_default_checks)) {
       CmdArgs.push_back("-analyzer-checker=core");
 
-      if (!IsWindowsMSVC)
-        CmdArgs.push_back("-analyzer-checker=unix");
+    if (!IsWindowsMSVC) {
+      CmdArgs.push_back("-analyzer-checker=unix");
+    } else {
+      // Enable "unix" checkers that also work on Windows.
+      CmdArgs.push_back("-analyzer-checker=unix.API");
+      CmdArgs.push_back("-analyzer-checker=unix.Malloc");
+      CmdArgs.push_back("-analyzer-checker=unix.MallocSizeof");
+      CmdArgs.push_back("-analyzer-checker=unix.MismatchedDeallocator");
+      CmdArgs.push_back("-analyzer-checker=unix.cstring.BadSizeArg");
+      CmdArgs.push_back("-analyzer-checker=unix.cstring.NullArg");
+    }
 
       // Disable some unix checkers for PS4.
       if (IsPS4CPU) {
@@ -3666,10 +4091,8 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
   if (PICLevel > 0) {
     CmdArgs.push_back("-pic-level");
     CmdArgs.push_back(PICLevel == 1 ? "1" : "2");
-    if (IsPIE) {
-      CmdArgs.push_back("-pie-level");
-      CmdArgs.push_back(PICLevel == 1 ? "1" : "2");
-    }
+    if (IsPIE)
+      CmdArgs.push_back("-pic-is-pie");
   }
 
   if (Arg *A = Args.getLastArg(options::OPT_meabi)) {
@@ -3708,6 +4131,10 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
     A->claim();
   }
 
+  if (!Args.hasFlag(options::OPT_fjump_tables, options::OPT_fno_jump_tables,
+                    true))
+    CmdArgs.push_back("-fno-jump-tables");
+
   if (Arg *A = Args.getLastArg(options::OPT_mregparm_EQ)) {
     CmdArgs.push_back("-mregparm");
     CmdArgs.push_back(A->getValue());
@@ -3727,7 +4154,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
   }
 
   if (Args.hasFlag(options::OPT_mrtd, options::OPT_mno_rtd, false))
-    CmdArgs.push_back("-mrtd");
+    CmdArgs.push_back("-fdefault-calling-conv=stdcall");
 
   if (shouldUseFramePointer(Args, getToolChain().getTriple()))
     CmdArgs.push_back("-mdisable-fp-elim");
@@ -3923,9 +4350,11 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
     CmdArgs.push_back("Arguments");
   }
 
-  // Enable -mconstructor-aliases except on darwin, where we have to
-  // work around a linker bug;  see <rdar://problem/7651567>.
-  if (!getToolChain().getTriple().isOSDarwin())
+  // Enable -mconstructor-aliases except on darwin, where we have to work around
+  // a linker bug (see <rdar://problem/7651567>), and CUDA device code, where
+  // aliases aren't supported.
+  if (!getToolChain().getTriple().isOSDarwin() &&
+      !getToolChain().getTriple().isNVPTX())
     CmdArgs.push_back("-mconstructor-aliases");
 
   // Darwin's kernel doesn't support guard variables; just die if we
@@ -4018,11 +4447,19 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
     AddSparcTargetArgs(Args, CmdArgs);
     break;
 
+  case llvm::Triple::systemz:
+    AddSystemZTargetArgs(Args, CmdArgs);
+    break;
+
   case llvm::Triple::x86:
   case llvm::Triple::x86_64:
     AddX86TargetArgs(Args, CmdArgs);
     break;
 
+  case llvm::Triple::lanai:
+    AddLanaiTargetArgs(Args, CmdArgs);
+    break;
+
   case llvm::Triple::hexagon:
     AddHexagonTargetArgs(Args, CmdArgs);
     break;
@@ -4045,13 +4482,13 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
   unsigned DwarfVersion = 0;
   llvm::DebuggerKind DebuggerTuning = getToolChain().getDefaultDebuggerTuning();
   // These two are potentially updated by AddClangCLArgs.
-  enum CodeGenOptions::DebugInfoKind DebugInfoKind =
-      CodeGenOptions::NoDebugInfo;
+  codegenoptions::DebugInfoKind DebugInfoKind = codegenoptions::NoDebugInfo;
   bool EmitCodeView = false;
 
   // Add clang-cl arguments.
+  types::ID InputType = Input.getType();
   if (getToolChain().getDriver().IsCLMode())
-    AddClangCLArgs(Args, CmdArgs, &DebugInfoKind, &EmitCodeView);
+    AddClangCLArgs(Args, InputType, CmdArgs, &DebugInfoKind, &EmitCodeView);
 
   // Pass the linker version in use.
   if (Arg *A = Args.getLastArg(options::OPT_mlinker_version_EQ)) {
@@ -4064,7 +4501,6 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
 
   // Explicitly error on some things we know we don't support and can't just
   // ignore.
-  types::ID InputType = Input.getType();
   if (!Args.hasArg(options::OPT_fallow_unsupported)) {
     Arg *Unsupported;
     if (types::isCXX(InputType) && getToolChain().getTriple().isOSDarwin() &&
@@ -4101,12 +4537,12 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
       // If you say "-gsplit-dwarf -gline-tables-only", -gsplit-dwarf loses.
       // But -gsplit-dwarf is not a g_group option, hence we have to check the
       // order explicitly. (If -gsplit-dwarf wins, we fix DebugInfoKind later.)
-      if (SplitDwarfArg && DebugInfoKind < CodeGenOptions::LimitedDebugInfo &&
+      if (SplitDwarfArg && DebugInfoKind < codegenoptions::LimitedDebugInfo &&
           A->getIndex() > SplitDwarfArg->getIndex())
         SplitDwarfArg = nullptr;
     } else
       // For any other 'g' option, use Limited.
-      DebugInfoKind = CodeGenOptions::LimitedDebugInfo;
+      DebugInfoKind = codegenoptions::LimitedDebugInfo;
   }
 
   // If a debugger tuning argument appeared, remember it.
@@ -4125,13 +4561,13 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
                                options::OPT_gdwarf_4, options::OPT_gdwarf_5))
     DwarfVersion = DwarfVersionNum(A->getSpelling());
 
-  // Forward -gcodeview.
-  // 'EmitCodeView might have been set by CL-compatibility argument parsing.
+  // Forward -gcodeview. EmitCodeView might have been set by CL-compatibility
+  // argument parsing.
   if (Args.hasArg(options::OPT_gcodeview) || EmitCodeView) {
     // DwarfVersion remains at 0 if no explicit choice was made.
     CmdArgs.push_back("-gcodeview");
   } else if (DwarfVersion == 0 &&
-             DebugInfoKind != CodeGenOptions::NoDebugInfo) {
+             DebugInfoKind != codegenoptions::NoDebugInfo) {
     DwarfVersion = getToolChain().GetDefaultDwarfVersion();
   }
 
@@ -4145,7 +4581,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
 
   // FIXME: Move backend command line options to the module.
   if (Args.hasArg(options::OPT_gmodules)) {
-    DebugInfoKind = CodeGenOptions::LimitedDebugInfo;
+    DebugInfoKind = codegenoptions::LimitedDebugInfo;
     CmdArgs.push_back("-dwarf-ext-refs");
     CmdArgs.push_back("-fmodule-format=obj");
   }
@@ -4154,7 +4590,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
   // splitting and extraction.
   // FIXME: Currently only works on Linux.
   if (getToolChain().getTriple().isOSLinux() && SplitDwarfArg) {
-    DebugInfoKind = CodeGenOptions::LimitedDebugInfo;
+    DebugInfoKind = codegenoptions::LimitedDebugInfo;
     CmdArgs.push_back("-backend-option");
     CmdArgs.push_back("-split-dwarf=Enable");
   }
@@ -4167,8 +4603,8 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
   bool NeedFullDebug = Args.hasFlag(options::OPT_fstandalone_debug,
                                     options::OPT_fno_standalone_debug,
                                     getToolChain().GetDefaultStandaloneDebug());
-  if (DebugInfoKind == CodeGenOptions::LimitedDebugInfo && NeedFullDebug)
-    DebugInfoKind = CodeGenOptions::FullDebugInfo;
+  if (DebugInfoKind == codegenoptions::LimitedDebugInfo && NeedFullDebug)
+    DebugInfoKind = codegenoptions::FullDebugInfo;
   RenderDebugEnablingArgs(Args, CmdArgs, DebugInfoKind, DwarfVersion,
                           DebuggerTuning);
 
@@ -4214,6 +4650,17 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
 
   Args.AddAllArgs(CmdArgs, options::OPT_finstrument_functions);
 
+  if (Args.hasFlag(options::OPT_fxray_instrument,
+                   options::OPT_fnoxray_instrument, false)) {
+    CmdArgs.push_back("-fxray-instrument");
+    if (const Arg *A =
+            Args.getLastArg(options::OPT_fxray_instruction_threshold_,
+                            options::OPT_fxray_instruction_threshold_EQ)) {
+      CmdArgs.push_back("-fxray-instruction-threshold");
+      CmdArgs.push_back(A->getValue());
+    }
+  }
+
   addPGOAndCoverageFlags(C, D, Output, Args, CmdArgs);
 
   // Add runtime flag for PS4 when PGO or Coverage are enabled.
@@ -4312,8 +4759,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
   //
   // FIXME: Support -fpreprocessed
   if (types::getPreprocessedType(InputType) != types::TY_INVALID)
-    AddPreprocessingOptions(C, JA, D, Args, CmdArgs, Output, Inputs,
-                            AuxToolChain);
+    AddPreprocessingOptions(C, JA, D, Args, CmdArgs, Output, Inputs);
 
   // Don't warn about "clang -c -DPIC -fPIC test.i" because libtool.m4 assumes
   // that "The compiler can only warn and ignore the option if not recognized".
@@ -4555,7 +5001,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
   Args.AddLastArg(CmdArgs, options::OPT_fno_operator_names);
   // Emulated TLS is enabled by default on Android, and can be enabled manually
   // with -femulated-tls.
-  bool EmulatedTLSDefault = Triple.isAndroid();
+  bool EmulatedTLSDefault = Triple.isAndroid() || Triple.isWindowsCygwinEnvironment();
   if (Args.hasFlag(options::OPT_femulated_tls, options::OPT_fno_emulated_tls,
                    EmulatedTLSDefault))
     CmdArgs.push_back("-femulated-tls");
@@ -4567,9 +5013,13 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
   Args.AddLastArg(CmdArgs, options::OPT_fdiagnostics_show_template_tree);
   Args.AddLastArg(CmdArgs, options::OPT_fno_elide_type);
 
-  // Forward flags for OpenMP
+  // Forward flags for OpenMP. We don't do this if the current action is an
+  // device offloading action.
+  //
+  // TODO: Allow OpenMP offload actions when they become available.
   if (Args.hasFlag(options::OPT_fopenmp, options::OPT_fopenmp_EQ,
-                   options::OPT_fno_openmp, false))
+                   options::OPT_fno_openmp, false) &&
+      JA.isDeviceOffloading(Action::OFK_None)) {
     switch (getOpenMPRuntime(getToolChain(), Args)) {
     case OMPRT_OMP:
     case OMPRT_IOMP5:
@@ -4582,6 +5032,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
       if (!Args.hasFlag(options::OPT_fopenmp_use_tls,
                         options::OPT_fnoopenmp_use_tls, /*Default=*/true))
         CmdArgs.push_back("-fnoopenmp-use-tls");
+      Args.AddAllArgs(CmdArgs, options::OPT_fopenmp_version_EQ);
       break;
     default:
       // By default, if Clang doesn't know how to generate useful OpenMP code
@@ -4592,6 +5043,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
       // semantic analysis, etc.
       break;
     }
+  }
 
   const SanitizerArgs &Sanitize = getToolChain().getSanitizerArgs();
   Sanitize.addArgs(getToolChain(), Args, CmdArgs, InputType);
@@ -4660,15 +5112,10 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
 
   // -stack-protector=0 is default.
   unsigned StackProtectorLevel = 0;
-  if (getToolChain().getSanitizerArgs().needsSafeStackRt()) {
-    Args.ClaimAllArgs(options::OPT_fno_stack_protector);
-    Args.ClaimAllArgs(options::OPT_fstack_protector_all);
-    Args.ClaimAllArgs(options::OPT_fstack_protector_strong);
-    Args.ClaimAllArgs(options::OPT_fstack_protector);
-  } else if (Arg *A = Args.getLastArg(options::OPT_fno_stack_protector,
-                                      options::OPT_fstack_protector_all,
-                                      options::OPT_fstack_protector_strong,
-                                      options::OPT_fstack_protector)) {
+  if (Arg *A = Args.getLastArg(options::OPT_fno_stack_protector,
+                               options::OPT_fstack_protector_all,
+                               options::OPT_fstack_protector_strong,
+                               options::OPT_fstack_protector)) {
     if (A->getOption().matches(options::OPT_fstack_protector)) {
       StackProtectorLevel = std::max<unsigned>(
           LangOptions::SSPOn,
@@ -4749,6 +5196,43 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
     CmdArgs.push_back("-arm-restrict-it");
   }
 
+  // Forward -cl options to -cc1
+  if (Args.getLastArg(options::OPT_cl_opt_disable)) {
+    CmdArgs.push_back("-cl-opt-disable");
+  }
+  if (Args.getLastArg(options::OPT_cl_strict_aliasing)) {
+    CmdArgs.push_back("-cl-strict-aliasing");
+  }
+  if (Args.getLastArg(options::OPT_cl_single_precision_constant)) {
+    CmdArgs.push_back("-cl-single-precision-constant");
+  }
+  if (Args.getLastArg(options::OPT_cl_finite_math_only)) {
+    CmdArgs.push_back("-cl-finite-math-only");
+  }
+  if (Args.getLastArg(options::OPT_cl_kernel_arg_info)) {
+    CmdArgs.push_back("-cl-kernel-arg-info");
+  }
+  if (Args.getLastArg(options::OPT_cl_unsafe_math_optimizations)) {
+    CmdArgs.push_back("-cl-unsafe-math-optimizations");
+  }
+  if (Args.getLastArg(options::OPT_cl_fast_relaxed_math)) {
+    CmdArgs.push_back("-cl-fast-relaxed-math");
+  }
+  if (Args.getLastArg(options::OPT_cl_mad_enable)) {
+    CmdArgs.push_back("-cl-mad-enable");
+  }
+  if (Args.getLastArg(options::OPT_cl_no_signed_zeros)) {
+    CmdArgs.push_back("-cl-no-signed-zeros");
+  }
+  if (Arg *A = Args.getLastArg(options::OPT_cl_std_EQ)) {
+    std::string CLStdStr = "-cl-std=";
+    CLStdStr += A->getValue();
+    CmdArgs.push_back(Args.MakeArgString(CLStdStr));
+  }
+  if (Args.getLastArg(options::OPT_cl_denorms_are_zero)) {
+    CmdArgs.push_back("-cl-denorms-are-zero");
+  }
+
   // Forward -f options with positive and negative forms; we translate
   // these by hand.
   if (Arg *A = Args.getLastArg(options::OPT_fprofile_sample_use_EQ)) {
@@ -4841,28 +5325,12 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
   if (!Args.hasFlag(options::OPT_fimplicit_modules,
                     options::OPT_fno_implicit_modules)) {
     CmdArgs.push_back("-fno-implicit-modules");
-  }
-
-  // -fmodule-name specifies the module that is currently being built (or
-  // used for header checking by -fmodule-maps).
-  Args.AddLastArg(CmdArgs, options::OPT_fmodule_name);
-
-  // -fmodule-map-file can be used to specify files containing module
-  // definitions.
-  Args.AddAllArgs(CmdArgs, options::OPT_fmodule_map_file);
-
-  // -fmodule-file can be used to specify files containing precompiled modules.
-  if (HaveModules)
-    Args.AddAllArgs(CmdArgs, options::OPT_fmodule_file);
-  else
-    Args.ClaimAllArgs(options::OPT_fmodule_file);
-
-  // -fmodule-cache-path specifies where our implicitly-built module files
-  // should be written.
-  SmallString<128> Path;
-  if (Arg *A = Args.getLastArg(options::OPT_fmodules_cache_path))
-    Path = A->getValue();
-  if (HaveModules) {
+  } else if (HaveModules) {
+    // -fmodule-cache-path specifies where our implicitly-built module files
+    // should be written.
+    SmallString<128> Path;
+    if (Arg *A = Args.getLastArg(options::OPT_fmodules_cache_path))
+      Path = A->getValue();
     if (C.isForDiagnostics()) {
       // When generating crash reports, we want to emit the modules along with
       // the reproduction sources, so we ignore any provided module path.
@@ -4881,6 +5349,20 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
     CmdArgs.push_back(Args.MakeArgString(Path));
   }
 
+  // -fmodule-name specifies the module that is currently being built (or
+  // used for header checking by -fmodule-maps).
+  Args.AddLastArg(CmdArgs, options::OPT_fmodule_name_EQ);
+
+  // -fmodule-map-file can be used to specify files containing module
+  // definitions.
+  Args.AddAllArgs(CmdArgs, options::OPT_fmodule_map_file);
+
+  // -fmodule-file can be used to specify files containing precompiled modules.
+  if (HaveModules)
+    Args.AddAllArgs(CmdArgs, options::OPT_fmodule_file);
+  else
+    Args.ClaimAllArgs(options::OPT_fmodule_file);
+
   // When building modules and generating crashdumps, we need to dump a module
   // dependency VFS alongside the output.
   if (HaveModules && C.isForDiagnostics()) {
@@ -4995,17 +5477,32 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
 
   // -fms-compatibility-version=18.00 is default.
   VersionTuple MSVT = visualstudio::getMSVCVersion(
-      &D, getToolChain().getTriple(), Args, IsWindowsMSVC);
+      &D, getToolChain(), getToolChain().getTriple(), Args, IsWindowsMSVC);
   if (!MSVT.empty())
     CmdArgs.push_back(
         Args.MakeArgString("-fms-compatibility-version=" + MSVT.getAsString()));
 
   bool IsMSVC2015Compatible = MSVT.getMajor() >= 19;
   if (ImplyVCPPCXXVer) {
-    if (IsMSVC2015Compatible)
-      CmdArgs.push_back("-std=c++14");
-    else
-      CmdArgs.push_back("-std=c++11");
+    StringRef LanguageStandard;
+    if (const Arg *StdArg = Args.getLastArg(options::OPT__SLASH_std)) {
+      LanguageStandard = llvm::StringSwitch<StringRef>(StdArg->getValue())
+                             .Case("c++14", "-std=c++14")
+                             .Case("c++latest", "-std=c++1z")
+                             .Default("");
+      if (LanguageStandard.empty())
+        D.Diag(clang::diag::warn_drv_unused_argument)
+            << StdArg->getAsString(Args);
+    }
+
+    if (LanguageStandard.empty()) {
+      if (IsMSVC2015Compatible)
+        LanguageStandard = "-std=c++14";
+      else
+        LanguageStandard = "-std=c++11";
+    }
+
+    CmdArgs.push_back(LanguageStandard.data());
   }
 
   // -fno-borland-extensions is default.
@@ -5046,8 +5543,10 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
   if (Args.hasArg(options::OPT_fno_inline))
     CmdArgs.push_back("-fno-inline");
 
-  if (Args.hasArg(options::OPT_fno_inline_functions))
-    CmdArgs.push_back("-fno-inline-functions");
+  if (Arg* InlineArg = Args.getLastArg(options::OPT_finline_functions,
+                                       options::OPT_finline_hint_functions,
+                                       options::OPT_fno_inline_functions))
+    InlineArg->render(Args, CmdArgs);
 
   ObjCRuntime objcRuntime = AddObjCRuntimeArgs(Args, CmdArgs, rewriteKind);
 
@@ -5156,7 +5655,8 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
     addExceptionArgs(Args, InputType, getToolChain(), KernelOrKext, objcRuntime,
                      CmdArgs);
 
-  if (getToolChain().UseSjLjExceptions(Args))
+  if (Args.hasArg(options::OPT_fsjlj_exceptions) ||
+      getToolChain().UseSjLjExceptions(Args))
     CmdArgs.push_back("-fsjlj-exceptions");
 
   // C++ "sane" operator new.
@@ -5286,43 +5786,27 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
       CmdArgs.push_back("-fno-diagnostics-show-note-include-stack");
   }
 
-  // Color diagnostics are the default, unless the terminal doesn't support
-  // them.
-  // Support both clang's -f[no-]color-diagnostics and gcc's
-  // -f[no-]diagnostics-colors[=never|always|auto].
-  enum { Colors_On, Colors_Off, Colors_Auto } ShowColors = Colors_Auto;
-  for (const auto &Arg : Args) {
-    const Option &O = Arg->getOption();
+  // Color diagnostics are parsed by the driver directly from argv
+  // and later re-parsed to construct this job; claim any possible
+  // color diagnostic here to avoid warn_drv_unused_argument and
+  // diagnose bad OPT_fdiagnostics_color_EQ values.
+  for (Arg *A : Args) {
+    const Option &O = A->getOption();
     if (!O.matches(options::OPT_fcolor_diagnostics) &&
         !O.matches(options::OPT_fdiagnostics_color) &&
         !O.matches(options::OPT_fno_color_diagnostics) &&
         !O.matches(options::OPT_fno_diagnostics_color) &&
         !O.matches(options::OPT_fdiagnostics_color_EQ))
       continue;
-
-    Arg->claim();
-    if (O.matches(options::OPT_fcolor_diagnostics) ||
-        O.matches(options::OPT_fdiagnostics_color)) {
-      ShowColors = Colors_On;
-    } else if (O.matches(options::OPT_fno_color_diagnostics) ||
-               O.matches(options::OPT_fno_diagnostics_color)) {
-      ShowColors = Colors_Off;
-    } else {
-      assert(O.matches(options::OPT_fdiagnostics_color_EQ));
-      StringRef value(Arg->getValue());
-      if (value == "always")
-        ShowColors = Colors_On;
-      else if (value == "never")
-        ShowColors = Colors_Off;
-      else if (value == "auto")
-        ShowColors = Colors_Auto;
-      else
+    if (O.matches(options::OPT_fdiagnostics_color_EQ)) {
+      StringRef Value(A->getValue());
+      if (Value != "always" && Value != "never" && Value != "auto")
         getToolChain().getDriver().Diag(diag::err_drv_clang_unsupported)
-            << ("-fdiagnostics-color=" + value).str();
+              << ("-fdiagnostics-color=" + Value).str();
     }
+    A->claim();
   }
-  if (ShowColors == Colors_On ||
-      (ShowColors == Colors_Auto && llvm::sys::Process::StandardErrHasColors()))
+  if (D.getDiags().getDiagnosticOptions().ShowColors)
     CmdArgs.push_back("-fcolor-diagnostics");
 
   if (Args.hasArg(options::OPT_fansi_escape_codes))
@@ -5376,6 +5860,10 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
   if (Arg *A = Args.getLastArg(options::OPT_fshow_overloads_EQ))
     A->render(Args, CmdArgs);
 
+  if (Arg *A = Args.getLastArg(
+          options::OPT_fsanitize_undefined_strip_path_components_EQ))
+    A->render(Args, CmdArgs);
+
   // -fdollars-in-identifiers default varies depending on platform and
   // language; only pass if specified.
   if (Arg *A = Args.getLastArg(options::OPT_fdollars_in_identifiers,
@@ -5407,7 +5895,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
 
 // Default to -fno-builtin-str{cat,cpy} on Darwin for ARM.
 //
-// FIXME: This is disabled until clang -cc1 supports -fno-builtin-foo. PR4941.
+// FIXME: Now that PR4941 has been fixed this can be enabled.
 #if 0
   if (getToolChain().getTriple().isOSDarwin() &&
       (getToolChain().getArch() == llvm::Triple::arm ||
@@ -5478,7 +5966,13 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
   // With -save-temps, we want to save the unoptimized bitcode output from the
   // CompileJobAction, use -disable-llvm-passes to get pristine IR generated
   // by the frontend.
-  if (C.getDriver().isSaveTempsEnabled() && isa<CompileJobAction>(JA))
+  // When -fembed-bitcode is enabled, optimized bitcode is emitted because it
+  // has slightly different breakdown between stages.
+  // FIXME: -fembed-bitcode -save-temps will save optimized bitcode instead of
+  // pristine IR generated by the frontend. Ideally, a new compile action should
+  // be added so both IR can be captured.
+  if (C.getDriver().isSaveTempsEnabled() &&
+      !C.getDriver().embedBitcodeEnabled() && isa<CompileJobAction>(JA))
     CmdArgs.push_back("-disable-llvm-passes");
 
   if (Output.getType() == types::TY_Dependencies) {
@@ -5540,6 +6034,17 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
       CmdArgs.push_back(I->getFilename());
     }
 
+  bool WholeProgramVTables =
+      Args.hasFlag(options::OPT_fwhole_program_vtables,
+                   options::OPT_fno_whole_program_vtables, false);
+  if (WholeProgramVTables) {
+    if (!D.isUsingLTO())
+      D.Diag(diag::err_drv_argument_only_allowed_with)
+          << "-fwhole-program-vtables"
+          << "-flto";
+    CmdArgs.push_back("-fwhole-program-vtables");
+  }
+
   // Finally add the compile command to the compilation.
   if (Args.hasArg(options::OPT__SLASH_fallback) &&
       Output.getType() == types::TY_Object &&
@@ -5548,6 +6053,12 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
         getCLFallback()->GetCommand(C, JA, Output, Inputs, Args, LinkingOutput);
     C.addCommand(llvm::make_unique<FallbackCommand>(
         JA, *this, Exec, CmdArgs, Inputs, std::move(CLCommand)));
+  } else if (Args.hasArg(options::OPT__SLASH_fallback) &&
+             isa<PrecompileJobAction>(JA)) {
+    // In /fallback builds, run the main compilation even if the pch generation
+    // fails, so that the main compilation's fallback to cl.exe runs.
+    C.addCommand(llvm::make_unique<ForceSuccessCommand>(JA, *this, Exec,
+                                                        CmdArgs, Inputs));
   } else {
     C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
   }
@@ -5555,7 +6066,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
   // Handle the debug info splitting at object creation time if we're
   // creating an object.
   // TODO: Currently only works on linux with newer objcopy.
-  if (SplitDwarf && !isa<CompileJobAction>(JA) && !isa<BackendJobAction>(JA))
+  if (SplitDwarf && Output.getType() == types::TY_Object)
     SplitDebugInfo(getToolChain(), C, *this, JA, Args, Output, SplitDwarfOut);
 
   if (Arg *A = Args.getLastArg(options::OPT_pg))
@@ -5711,10 +6222,9 @@ static bool maybeConsumeDash(const std::string &EH, size_t &I) {
 
 namespace {
 struct EHFlags {
-  EHFlags() : Synch(false), Asynch(false), NoExceptC(false) {}
-  bool Synch;
-  bool Asynch;
-  bool NoExceptC;
+  bool Synch = false;
+  bool Asynch = false;
+  bool NoUnwindC = false;
 };
 } // end anonymous namespace
 
@@ -5723,8 +6233,7 @@ struct EHFlags {
 /// - s: Cleanup after "synchronous" exceptions, aka C++ exceptions.
 /// - a: Cleanup after "asynchronous" exceptions, aka structured exceptions.
 ///      The 'a' modifier is unimplemented and fundamentally hard in LLVM IR.
-/// - c: Assume that extern "C" functions are implicitly noexcept.  This
-///      modifier is an optimization, so we ignore it for now.
+/// - c: Assume that extern "C" functions are implicitly nounwind.
 /// The default is /EHs-c-, meaning cleanups are disabled.
 static EHFlags parseClangCLEHFlags(const Driver &D, const ArgList &Args) {
   EHFlags EH;
@@ -5736,12 +6245,16 @@ static EHFlags parseClangCLEHFlags(const Driver &D, const ArgList &Args) {
       switch (EHVal[I]) {
       case 'a':
         EH.Asynch = maybeConsumeDash(EHVal, I);
+        if (EH.Asynch)
+          EH.Synch = false;
         continue;
       case 'c':
-        EH.NoExceptC = maybeConsumeDash(EHVal, I);
+        EH.NoUnwindC = maybeConsumeDash(EHVal, I);
         continue;
       case 's':
         EH.Synch = maybeConsumeDash(EHVal, I);
+        if (EH.Synch)
+          EH.Asynch = false;
         continue;
       default:
         break;
@@ -5750,12 +6263,21 @@ static EHFlags parseClangCLEHFlags(const Driver &D, const ArgList &Args) {
       break;
     }
   }
+  // The /GX, /GX- flags are only processed if there are not /EH flags.
+  // The default is that /GX is not specified.
+  if (EHArgs.empty() &&
+      Args.hasFlag(options::OPT__SLASH_GX, options::OPT__SLASH_GX_,
+                   /*default=*/false)) {
+    EH.Synch = true;
+    EH.NoUnwindC = true;
+  }
 
   return EH;
 }
 
-void Clang::AddClangCLArgs(const ArgList &Args, ArgStringList &CmdArgs,
-                           enum CodeGenOptions::DebugInfoKind *DebugInfoKind,
+void Clang::AddClangCLArgs(const ArgList &Args, types::ID InputType,
+                           ArgStringList &CmdArgs,
+                           codegenoptions::DebugInfoKind *DebugInfoKind,
                            bool *EmitCodeView) const {
   unsigned RTOptionID = options::OPT__SLASH_MT;
 
@@ -5786,11 +6308,13 @@ void Clang::AddClangCLArgs(const ArgList &Args, ArgStringList &CmdArgs,
     if (Args.hasArg(options::OPT__SLASH_LDd))
       CmdArgs.push_back("-D_DEBUG");
     CmdArgs.push_back("-D_MT");
+    CmdArgs.push_back("-flto-visibility-public-std");
     FlagForCRT = "--dependent-lib=libcmt";
     break;
   case options::OPT__SLASH_MTd:
     CmdArgs.push_back("-D_DEBUG");
     CmdArgs.push_back("-D_MT");
+    CmdArgs.push_back("-flto-visibility-public-std");
     FlagForCRT = "--dependent-lib=libcmtd";
     break;
   default:
@@ -5819,23 +6343,37 @@ void Clang::AddClangCLArgs(const ArgList &Args, ArgStringList &CmdArgs,
                    /*default=*/false))
     CmdArgs.push_back("-fno-rtti-data");
 
-  // Emit CodeView if -Z7 is present.
-  *EmitCodeView = Args.hasArg(options::OPT__SLASH_Z7);
-  bool EmitDwarf = Args.hasArg(options::OPT_gdwarf);
-  // If we are emitting CV but not DWARF, don't build information that LLVM
-  // can't yet process.
-  if (*EmitCodeView && !EmitDwarf)
-    *DebugInfoKind = CodeGenOptions::DebugLineTablesOnly;
-  if (*EmitCodeView)
+  // This controls whether or not we emit stack-protector instrumentation.
+  // In MSVC, Buffer Security Check (/GS) is on by default.
+  if (Args.hasFlag(options::OPT__SLASH_GS, options::OPT__SLASH_GS_,
+                   /*default=*/true)) {
+    CmdArgs.push_back("-stack-protector");
+    CmdArgs.push_back(Args.MakeArgString(Twine(LangOptions::SSPStrong)));
+  }
+
+  // Emit CodeView if -Z7, -Zd, or -gline-tables-only are present.
+  if (Arg *DebugInfoArg =
+          Args.getLastArg(options::OPT__SLASH_Z7, options::OPT__SLASH_Zd,
+                          options::OPT_gline_tables_only)) {
+    *EmitCodeView = true;
+    if (DebugInfoArg->getOption().matches(options::OPT__SLASH_Z7))
+      *DebugInfoKind = codegenoptions::LimitedDebugInfo;
+    else
+      *DebugInfoKind = codegenoptions::DebugLineTablesOnly;
     CmdArgs.push_back("-gcodeview");
+  } else {
+    *EmitCodeView = false;
+  }
 
   const Driver &D = getToolChain().getDriver();
   EHFlags EH = parseClangCLEHFlags(D, Args);
-  // FIXME: Do something with NoExceptC.
   if (EH.Synch || EH.Asynch) {
-    CmdArgs.push_back("-fcxx-exceptions");
+    if (types::isCXX(InputType))
+      CmdArgs.push_back("-fcxx-exceptions");
     CmdArgs.push_back("-fexceptions");
   }
+  if (types::isCXX(InputType) && EH.Synch && EH.NoUnwindC)
+    CmdArgs.push_back("-fexternc-nounwind");
 
   // /EP should expand to -E -P.
   if (Args.hasArg(options::OPT__SLASH_EP)) {
@@ -5882,6 +6420,15 @@ void Clang::AddClangCLArgs(const ArgList &Args, ArgStringList &CmdArgs,
       CmdArgs.push_back("-fms-memptr-rep=virtual");
   }
 
+  if (Args.getLastArg(options::OPT__SLASH_Gd))
+     CmdArgs.push_back("-fdefault-calling-conv=cdecl");
+  else if (Args.getLastArg(options::OPT__SLASH_Gr))
+     CmdArgs.push_back("-fdefault-calling-conv=fastcall");
+  else if (Args.getLastArg(options::OPT__SLASH_Gz))
+     CmdArgs.push_back("-fdefault-calling-conv=stdcall");
+  else if (Args.getLastArg(options::OPT__SLASH_Gv))
+     CmdArgs.push_back("-fdefault-calling-conv=vectorcall");
+
   if (Arg *A = Args.getLastArg(options::OPT_vtordisp_mode_EQ))
     A->render(Args, CmdArgs);
 
@@ -5975,24 +6522,28 @@ void ClangAs::ConstructJob(Compilation &C, const JobAction &JA,
 
   // Forward -g and handle debug info related flags, assuming we are dealing
   // with an actual assembly file.
+  bool WantDebug = false;
+  unsigned DwarfVersion = 0;
+  Args.ClaimAllArgs(options::OPT_g_Group);
+  if (Arg *A = Args.getLastArg(options::OPT_g_Group)) {
+    WantDebug = !A->getOption().matches(options::OPT_g0) &&
+                !A->getOption().matches(options::OPT_ggdb0);
+    if (WantDebug)
+      DwarfVersion = DwarfVersionNum(A->getSpelling());
+  }
+  if (DwarfVersion == 0)
+    DwarfVersion = getToolChain().GetDefaultDwarfVersion();
+
+  codegenoptions::DebugInfoKind DebugInfoKind = codegenoptions::NoDebugInfo;
+
   if (SourceAction->getType() == types::TY_Asm ||
       SourceAction->getType() == types::TY_PP_Asm) {
-    bool WantDebug = false;
-    unsigned DwarfVersion = 0;
-    Args.ClaimAllArgs(options::OPT_g_Group);
-    if (Arg *A = Args.getLastArg(options::OPT_g_Group)) {
-      WantDebug = !A->getOption().matches(options::OPT_g0) &&
-        !A->getOption().matches(options::OPT_ggdb0);
-      if (WantDebug)
-        DwarfVersion = DwarfVersionNum(A->getSpelling());
-    }
-    if (DwarfVersion == 0)
-      DwarfVersion = getToolChain().GetDefaultDwarfVersion();
-    RenderDebugEnablingArgs(Args, CmdArgs,
-                            (WantDebug ? CodeGenOptions::LimitedDebugInfo
-                                       : CodeGenOptions::NoDebugInfo),
-                            DwarfVersion, llvm::DebuggerKind::Default);
-
+    // You might think that it would be ok to set DebugInfoKind outside of
+    // the guard for source type, however there is a test which asserts
+    // that some assembler invocation receives no -debug-info-kind,
+    // and it's not clear whether that test is just overly restrictive.
+    DebugInfoKind = (WantDebug ? codegenoptions::LimitedDebugInfo
+                               : codegenoptions::NoDebugInfo);
     // Add the -fdebug-compilation-dir flag if needed.
     addDebugCompDirArg(Args, CmdArgs);
 
@@ -6004,6 +6555,8 @@ void ClangAs::ConstructJob(Compilation &C, const JobAction &JA,
     // And pass along -I options
     Args.AddAllArgs(CmdArgs, options::OPT_I);
   }
+  RenderDebugEnablingArgs(Args, CmdArgs, DebugInfoKind, DwarfVersion,
+                          llvm::DebuggerKind::Default);
 
   // Handle -fPIC et al -- the relocation-model affects the assembler
   // for some targets.
@@ -6061,12 +6614,6 @@ void ClangAs::ConstructJob(Compilation &C, const JobAction &JA,
   // FIXME: Stop lying and consume only the appropriate driver flags
   Args.ClaimAllArgs(options::OPT_W_Group);
 
-  // Assemblers that want to know the dwarf version can't assume a value,
-  // since the defaulting logic resides in the driver. Put in something
-  // reasonable now, in case a subsequent "-Wa,-g" changes it.
-  RenderDebugEnablingArgs(Args, CmdArgs, CodeGenOptions::NoDebugInfo,
-                          getToolChain().GetDefaultDwarfVersion(),
-                          llvm::DebuggerKind::Default);
   CollectArgsForIntegratedAssembler(C, Args, CmdArgs,
                                     getToolChain().getDriver());
 
@@ -6203,7 +6750,7 @@ void gcc::Common::ConstructJob(Compilation &C, const JobAction &JA,
     }
   }
 
-  const std::string customGCCName = D.getCCCGenericGCCName();
+  const std::string &customGCCName = D.getCCCGenericGCCName();
   const char *GCCName;
   if (!customGCCName.empty())
     GCCName = customGCCName.c_str();
@@ -6528,6 +7075,7 @@ void amdgpu::Linker::ConstructJob(Compilation &C, const JobAction &JA,
   std::string Linker = getToolChain().GetProgramPath(getShortName());
   ArgStringList CmdArgs;
   AddLinkerInputs(getToolChain(), Inputs, Args, CmdArgs);
+  CmdArgs.push_back("-shared");
   CmdArgs.push_back("-o");
   CmdArgs.push_back(Output.getFilename());
   C.addCommand(llvm::make_unique<Command>(JA, *this, Args.MakeArgString(Linker),
@@ -6551,7 +7099,10 @@ void wasm::Linker::ConstructJob(Compilation &C, const JobAction &JA,
                                 const InputInfoList &Inputs,
                                 const ArgList &Args,
                                 const char *LinkingOutput) const {
-  const char *Linker = Args.MakeArgString(getToolChain().GetLinkerPath());
+
+  const ToolChain &ToolChain = getToolChain();
+  const Driver &D = ToolChain.getDriver();
+  const char *Linker = Args.MakeArgString(ToolChain.GetLinkerPath());
   ArgStringList CmdArgs;
   CmdArgs.push_back("-flavor");
   CmdArgs.push_back("ld");
@@ -6563,9 +7114,48 @@ void wasm::Linker::ConstructJob(Compilation &C, const JobAction &JA,
   if (areOptimizationsEnabled(Args))
     CmdArgs.push_back("--gc-sections");
 
-  AddLinkerInputs(getToolChain(), Inputs, Args, CmdArgs);
+  if (Args.hasArg(options::OPT_rdynamic))
+    CmdArgs.push_back("-export-dynamic");
+  if (Args.hasArg(options::OPT_s))
+    CmdArgs.push_back("--strip-all");
+  if (Args.hasArg(options::OPT_shared))
+    CmdArgs.push_back("-shared");
+  if (Args.hasArg(options::OPT_static))
+    CmdArgs.push_back("-Bstatic");
+
+  Args.AddAllArgs(CmdArgs, options::OPT_L);
+  ToolChain.AddFilePathLibArgs(Args, CmdArgs);
+
+  if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) {
+    if (Args.hasArg(options::OPT_shared))
+      CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("rcrt1.o")));
+    else if (Args.hasArg(options::OPT_pie))
+      CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("Scrt1.o")));
+    else
+      CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("crt1.o")));
+
+    CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("crti.o")));
+  }
+
+  AddLinkerInputs(ToolChain, Inputs, Args, CmdArgs);
+
+  if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs)) {
+    if (D.CCCIsCXX())
+      ToolChain.AddCXXStdlibLibArgs(Args, CmdArgs);
+
+    if (Args.hasArg(options::OPT_pthread))
+      CmdArgs.push_back("-lpthread");
+
+    CmdArgs.push_back("-lc");
+    CmdArgs.push_back("-lcompiler_rt");
+  }
+
+  if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles))
+    CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("crtn.o")));
+
   CmdArgs.push_back("-o");
   CmdArgs.push_back(Output.getFilename());
+
   C.addCommand(llvm::make_unique<Command>(JA, *this, Linker, CmdArgs, Inputs));
 }
 
@@ -6684,6 +7274,14 @@ mips::NanEncoding mips::getSupportedNanEncoding(StringRef &CPU) {
       .Default(NanLegacy);
 }
 
+bool mips::hasCompactBranches(StringRef &CPU) {
+  // mips32r6 and mips64r6 have compact branches.
+  return llvm::StringSwitch<bool>(CPU)
+      .Case("mips32r6", true)
+      .Case("mips64r6", true)
+      .Default(false);
+}
+
 bool mips::hasMipsAbiArg(const ArgList &Args, const char *Value) {
   Arg *A = Args.getLastArg(options::OPT_mabi_EQ);
   return A && (A->getValue() == StringRef(Value));
@@ -6709,10 +7307,21 @@ bool mips::isNaN2008(const ArgList &Args, const llvm::Triple &Triple) {
   return false;
 }
 
+bool mips::isFP64ADefault(const llvm::Triple &Triple, StringRef CPUName) {
+  if (!Triple.isAndroid())
+    return false;
+
+  // Android MIPS32R6 defaults to FP64A.
+  return llvm::StringSwitch<bool>(CPUName)
+      .Case("mips32r6", true)
+      .Default(false);
+}
+
 bool mips::isFPXXDefault(const llvm::Triple &Triple, StringRef CPUName,
                          StringRef ABIName, mips::FloatABI FloatABI) {
   if (Triple.getVendor() != llvm::Triple::ImaginationTechnologies &&
-      Triple.getVendor() != llvm::Triple::MipsTechnologies)
+      Triple.getVendor() != llvm::Triple::MipsTechnologies &&
+      !Triple.isAndroid())
     return false;
 
   if (ABIName != "32")
@@ -6842,6 +7451,14 @@ void cloudabi::Linker::ConstructJob(Compilation &C, const JobAction &JA,
 
   // CloudABI only supports static linkage.
   CmdArgs.push_back("-Bstatic");
+  CmdArgs.push_back("--no-dynamic-linker");
+
+  // Provide PIE linker flags in case PIE is default for the architecture.
+  if (ToolChain.isPIEDefault()) {
+    CmdArgs.push_back("-pie");
+    CmdArgs.push_back("-zrelro");
+  }
+
   CmdArgs.push_back("--eh-frame-hdr");
   CmdArgs.push_back("--gc-sections");
 
@@ -6982,12 +7599,9 @@ void darwin::Linker::AddLinkArgs(Compilation &C, const ArgList &Args,
   const Driver &D = getToolChain().getDriver();
   const toolchains::MachO &MachOTC = getMachOToolChain();
 
-  unsigned Version[3] = {0, 0, 0};
+  unsigned Version[5] = {0, 0, 0, 0, 0};
   if (Arg *A = Args.getLastArg(options::OPT_mlinker_version_EQ)) {
-    bool HadExtra;
-    if (!Driver::GetReleaseVersion(A->getValue(), Version[0], Version[1],
-                                   Version[2], HadExtra) ||
-        HadExtra)
+    if (!Driver::GetReleaseVersion(A->getValue(), Version))
       D.Diag(diag::err_drv_invalid_version_number) << A->getAsString(Args);
   }
 
@@ -7016,23 +7630,23 @@ void darwin::Linker::AddLinkArgs(Compilation &C, const ArgList &Args,
       CmdArgs.push_back("-object_path_lto");
       CmdArgs.push_back(TmpPath);
     }
+  }
 
-    // Use -lto_library option to specify the libLTO.dylib path. Try to find
-    // it in clang installed libraries. If not found, the option is not used
-    // and 'ld' will use its default mechanism to search for libLTO.dylib.
-    if (Version[0] >= 133) {
-      // Search for libLTO in <InstalledDir>/../lib/libLTO.dylib
-      StringRef P = llvm::sys::path::parent_path(D.getInstalledDir());
-      SmallString<128> LibLTOPath(P);
-      llvm::sys::path::append(LibLTOPath, "lib");
-      llvm::sys::path::append(LibLTOPath, "libLTO.dylib");
-      if (llvm::sys::fs::exists(LibLTOPath)) {
-        CmdArgs.push_back("-lto_library");
-        CmdArgs.push_back(C.getArgs().MakeArgString(LibLTOPath));
-      } else {
-        D.Diag(diag::warn_drv_lto_libpath);
-      }
-    }
+  // Use -lto_library option to specify the libLTO.dylib path. Try to find
+  // it in clang installed libraries. ld64 will only look at this argument
+  // when it actually uses LTO, so libLTO.dylib only needs to exist at link
+  // time if ld64 decides that it needs to use LTO.
+  // Since this is passed unconditionally, ld64 will never look for libLTO.dylib
+  // next to it. That's ok since ld64 using a libLTO.dylib not matching the
+  // clang version won't work anyways.
+  if (Version[0] >= 133) {
+    // Search for libLTO in <InstalledDir>/../lib/libLTO.dylib
+    StringRef P = llvm::sys::path::parent_path(D.Dir);
+    SmallString<128> LibLTOPath(P);
+    llvm::sys::path::append(LibLTOPath, "lib");
+    llvm::sys::path::append(LibLTOPath, "libLTO.dylib");
+    CmdArgs.push_back("-lto_library");
+    CmdArgs.push_back(C.getArgs().MakeArgString(LibLTOPath));
   }
 
   // Derived from the "link" spec.
@@ -7121,6 +7735,15 @@ void darwin::Linker::AddLinkArgs(Compilation &C, const ArgList &Args,
     else
       CmdArgs.push_back("-no_pie");
   }
+  // for embed-bitcode, use -bitcode_bundle in linker command
+  if (C.getDriver().embedBitcodeEnabled() ||
+      C.getDriver().embedBitcodeMarkerOnly()) {
+    // Check if the toolchain supports bitcode build flow.
+    if (MachOTC.SupportsEmbeddedBitcode())
+      CmdArgs.push_back("-bitcode_bundle");
+    else
+      D.Diag(diag::err_drv_bitcode_unsupported_on_toolchain);
+  }
 
   Args.AddLastArg(CmdArgs, options::OPT_prebind);
   Args.AddLastArg(CmdArgs, options::OPT_noprebind);
@@ -8187,6 +8810,7 @@ void netbsd::Linker::ConstructJob(Compilation &C, const JobAction &JA,
     if (Args.hasArg(options::OPT_shared)) {
       CmdArgs.push_back("-Bshareable");
     } else {
+      Args.AddAllArgs(CmdArgs, options::OPT_pie);
       CmdArgs.push_back("-dynamic-linker");
       CmdArgs.push_back("/libexec/ld.elf_so");
     }
@@ -8288,15 +8912,15 @@ void netbsd::Linker::ConstructJob(Compilation &C, const JobAction &JA,
     if (!Args.hasArg(options::OPT_shared)) {
       CmdArgs.push_back(
           Args.MakeArgString(getToolChain().GetFilePath("crt0.o")));
+    }
+    CmdArgs.push_back(
+        Args.MakeArgString(getToolChain().GetFilePath("crti.o")));
+    if (Args.hasArg(options::OPT_shared) || Args.hasArg(options::OPT_pie)) {
       CmdArgs.push_back(
-          Args.MakeArgString(getToolChain().GetFilePath("crti.o")));
-      CmdArgs.push_back(
-          Args.MakeArgString(getToolChain().GetFilePath("crtbegin.o")));
+          Args.MakeArgString(getToolChain().GetFilePath("crtbeginS.o")));
     } else {
       CmdArgs.push_back(
-          Args.MakeArgString(getToolChain().GetFilePath("crti.o")));
-      CmdArgs.push_back(
-          Args.MakeArgString(getToolChain().GetFilePath("crtbeginS.o")));
+          Args.MakeArgString(getToolChain().GetFilePath("crtbegin.o")));
     }
   }
 
@@ -8313,7 +8937,7 @@ void netbsd::Linker::ConstructJob(Compilation &C, const JobAction &JA,
   unsigned Major, Minor, Micro;
   getToolChain().getTriple().getOSVersion(Major, Minor, Micro);
   bool useLibgcc = true;
-  if (Major >= 7 || (Major == 6 && Minor == 99 && Micro >= 49) || Major == 0) {
+  if (Major >= 7 || Major == 0) {
     switch (getToolChain().getArch()) {
     case llvm::Triple::aarch64:
     case llvm::Triple::arm:
@@ -8362,12 +8986,12 @@ void netbsd::Linker::ConstructJob(Compilation &C, const JobAction &JA,
   }
 
   if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) {
-    if (!Args.hasArg(options::OPT_shared))
+    if (Args.hasArg(options::OPT_shared) || Args.hasArg(options::OPT_pie))
       CmdArgs.push_back(
-          Args.MakeArgString(getToolChain().GetFilePath("crtend.o")));
+          Args.MakeArgString(getToolChain().GetFilePath("crtendS.o")));
     else
       CmdArgs.push_back(
-          Args.MakeArgString(getToolChain().GetFilePath("crtendS.o")));
+          Args.MakeArgString(getToolChain().GetFilePath("crtend.o")));
     CmdArgs.push_back(Args.MakeArgString(getToolChain().GetFilePath("crtn.o")));
   }
 
@@ -8472,12 +9096,12 @@ void gnutools::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
     Args.AddLastArg(CmdArgs, options::OPT_march_EQ);
 
     // FIXME: remove krait check when GNU tools support krait cpu
-    // for now replace it with -march=armv7-a  to avoid a lower
+    // for now replace it with -mcpu=cortex-a15 to avoid a lower
     // march from being picked in the absence of a cpu flag.
     Arg *A;
     if ((A = Args.getLastArg(options::OPT_mcpu_EQ)) &&
         StringRef(A->getValue()).lower() == "krait")
-      CmdArgs.push_back("-march=armv7-a");
+      CmdArgs.push_back("-mcpu=cortex-a15");
     else
       Args.AddLastArg(CmdArgs, options::OPT_mcpu_EQ);
     Args.AddLastArg(CmdArgs, options::OPT_mfpu_EQ);
@@ -8599,6 +9223,7 @@ static void AddLibgcc(const llvm::Triple &Triple, const Driver &D,
                       ArgStringList &CmdArgs, const ArgList &Args) {
   bool isAndroid = Triple.isAndroid();
   bool isCygMing = Triple.isOSCygMing();
+  bool IsIAMCU = Triple.isOSIAMCU();
   bool StaticLibgcc = Args.hasArg(options::OPT_static_libgcc) ||
                       Args.hasArg(options::OPT_static);
   if (!D.CCCIsCXX())
@@ -8615,7 +9240,7 @@ static void AddLibgcc(const llvm::Triple &Triple, const Driver &D,
       CmdArgs.push_back("--no-as-needed");
   }
 
-  if (StaticLibgcc && !isAndroid)
+  if (StaticLibgcc && !isAndroid && !IsIAMCU)
     CmdArgs.push_back("-lgcc_eh");
   else if (!Args.hasArg(options::OPT_shared) && D.CCCIsCXX())
     CmdArgs.push_back("-lgcc");
@@ -8629,72 +9254,6 @@ static void AddLibgcc(const llvm::Triple &Triple, const Driver &D,
     CmdArgs.push_back("-ldl");
 }
 
-static std::string getLinuxDynamicLinker(const ArgList &Args,
-                                         const toolchains::Linux &ToolChain) {
-  const llvm::Triple::ArchType Arch = ToolChain.getArch();
-
-  if (ToolChain.getTriple().isAndroid()) {
-    if (ToolChain.getTriple().isArch64Bit())
-      return "/system/bin/linker64";
-    else
-      return "/system/bin/linker";
-  } else if (Arch == llvm::Triple::x86 || Arch == llvm::Triple::sparc ||
-             Arch == llvm::Triple::sparcel)
-    return "/lib/ld-linux.so.2";
-  else if (Arch == llvm::Triple::aarch64)
-    return "/lib/ld-linux-aarch64.so.1";
-  else if (Arch == llvm::Triple::aarch64_be)
-    return "/lib/ld-linux-aarch64_be.so.1";
-  else if (Arch == llvm::Triple::arm || Arch == llvm::Triple::thumb) {
-    if (ToolChain.getTriple().getEnvironment() == llvm::Triple::GNUEABIHF ||
-        arm::getARMFloatABI(ToolChain, Args) == arm::FloatABI::Hard)
-      return "/lib/ld-linux-armhf.so.3";
-    else
-      return "/lib/ld-linux.so.3";
-  } else if (Arch == llvm::Triple::armeb || Arch == llvm::Triple::thumbeb) {
-    // TODO: check which dynamic linker name.
-    if (ToolChain.getTriple().getEnvironment() == llvm::Triple::GNUEABIHF ||
-        arm::getARMFloatABI(ToolChain, Args) == arm::FloatABI::Hard)
-      return "/lib/ld-linux-armhf.so.3";
-    else
-      return "/lib/ld-linux.so.3";
-  } else if (Arch == llvm::Triple::mips || Arch == llvm::Triple::mipsel ||
-             Arch == llvm::Triple::mips64 || Arch == llvm::Triple::mips64el) {
-    std::string LibDir =
-        "/lib" + mips::getMipsABILibSuffix(Args, ToolChain.getTriple());
-    StringRef LibName;
-    bool IsNaN2008 = mips::isNaN2008(Args, ToolChain.getTriple());
-    if (mips::isUCLibc(Args))
-      LibName = IsNaN2008 ? "ld-uClibc-mipsn8.so.0" : "ld-uClibc.so.0";
-    else if (!ToolChain.getTriple().hasEnvironment()) {
-      bool LE = (ToolChain.getTriple().getArch() == llvm::Triple::mipsel) ||
-                (ToolChain.getTriple().getArch() == llvm::Triple::mips64el);
-      LibName = LE ? "ld-musl-mipsel.so.1" : "ld-musl-mips.so.1";
-    } else
-      LibName = IsNaN2008 ? "ld-linux-mipsn8.so.1" : "ld.so.1";
-
-    return (LibDir + "/" + LibName).str();
-  } else if (Arch == llvm::Triple::ppc)
-    return "/lib/ld.so.1";
-  else if (Arch == llvm::Triple::ppc64) {
-    if (ppc::hasPPCAbiArg(Args, "elfv2"))
-      return "/lib64/ld64.so.2";
-    return "/lib64/ld64.so.1";
-  } else if (Arch == llvm::Triple::ppc64le) {
-    if (ppc::hasPPCAbiArg(Args, "elfv1"))
-      return "/lib64/ld64.so.1";
-    return "/lib64/ld64.so.2";
-  } else if (Arch == llvm::Triple::systemz)
-    return "/lib/ld64.so.1";
-  else if (Arch == llvm::Triple::sparcv9)
-    return "/lib64/ld-linux.so.2";
-  else if (Arch == llvm::Triple::x86_64 &&
-           ToolChain.getTriple().getEnvironment() == llvm::Triple::GNUX32)
-    return "/libx32/ld-linux-x32.so.2";
-  else
-    return "/lib64/ld-linux-x86-64.so.2";
-}
-
 static void AddRunTimeLibs(const ToolChain &TC, const Driver &D,
                            ArgStringList &CmdArgs, const ArgList &Args) {
   // Make use of compiler-rt if --rtlib option is used
@@ -8712,7 +9271,16 @@ static void AddRunTimeLibs(const ToolChain &TC, const Driver &D,
     }
     break;
   case ToolChain::RLT_Libgcc:
-    AddLibgcc(TC.getTriple(), D, CmdArgs, Args);
+    // Make sure libgcc is not used under MSVC environment by default
+    if (TC.getTriple().isKnownWindowsMSVCEnvironment()) {
+      // Issue error diagnostic if libgcc is explicitly specified
+      // through command line as --rtlib option argument.
+      if (Args.hasArg(options::OPT_rtlib_EQ)) {
+        TC.getDriver().Diag(diag::err_drv_unsupported_rtlib_for_platform)
+            << Args.getLastArg(options::OPT_rtlib_EQ)->getValue() << "MSVC";
+      }
+    } else
+      AddLibgcc(TC.getTriple(), D, CmdArgs, Args);
     break;
   }
 }
@@ -8720,6 +9288,8 @@ static void AddRunTimeLibs(const ToolChain &TC, const Driver &D,
 static const char *getLDMOption(const llvm::Triple &T, const ArgList &Args) {
   switch (T.getArch()) {
   case llvm::Triple::x86:
+    if (T.isOSIAMCU())
+      return "elf_iamcu";
     return "elf_i386";
   case llvm::Triple::aarch64:
     return "aarch64linux";
@@ -8730,7 +9300,7 @@ static const char *getLDMOption(const llvm::Triple &T, const ArgList &Args) {
     return "armelf_linux_eabi";
   case llvm::Triple::armeb:
   case llvm::Triple::thumbeb:
-    return "armebelf_linux_eabi"; /* TODO: check which NAME.  */
+    return "armelfb_linux_eabi";
   case llvm::Triple::ppc:
     return "elf32ppclinux";
   case llvm::Triple::ppc64:
@@ -8779,6 +9349,7 @@ void gnutools::Linker::ConstructJob(Compilation &C, const JobAction &JA,
 
   const llvm::Triple::ArchType Arch = ToolChain.getArch();
   const bool isAndroid = ToolChain.getTriple().isAndroid();
+  const bool IsIAMCU = ToolChain.getTriple().isOSIAMCU();
   const bool IsPIE =
       !Args.hasArg(options::OPT_shared) && !Args.hasArg(options::OPT_static) &&
       (Args.hasArg(options::OPT_pie) || ToolChain.isPIEDefault());
@@ -8839,20 +9410,23 @@ void gnutools::Linker::ConstructJob(Compilation &C, const JobAction &JA,
     CmdArgs.push_back("-shared");
   }
 
-  if (Arch == llvm::Triple::arm || Arch == llvm::Triple::armeb ||
-      Arch == llvm::Triple::thumb || Arch == llvm::Triple::thumbeb ||
-      (!Args.hasArg(options::OPT_static) &&
-       !Args.hasArg(options::OPT_shared))) {
-    CmdArgs.push_back("-dynamic-linker");
-    CmdArgs.push_back(Args.MakeArgString(
-        D.DyldPrefix + getLinuxDynamicLinker(Args, ToolChain)));
+  if (!Args.hasArg(options::OPT_static)) {
+    if (Args.hasArg(options::OPT_rdynamic))
+      CmdArgs.push_back("-export-dynamic");
+
+    if (!Args.hasArg(options::OPT_shared)) {
+      const std::string Loader =
+          D.DyldPrefix + ToolChain.getDynamicLinker(Args);
+      CmdArgs.push_back("-dynamic-linker");
+      CmdArgs.push_back(Args.MakeArgString(Loader));
+    }
   }
 
   CmdArgs.push_back("-o");
   CmdArgs.push_back(Output.getFilename());
 
   if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) {
-    if (!isAndroid) {
+    if (!isAndroid && !IsIAMCU) {
       const char *crt1 = nullptr;
       if (!Args.hasArg(options::OPT_shared)) {
         if (Args.hasArg(options::OPT_pg))
@@ -8868,18 +9442,22 @@ void gnutools::Linker::ConstructJob(Compilation &C, const JobAction &JA,
       CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("crti.o")));
     }
 
-    const char *crtbegin;
-    if (Args.hasArg(options::OPT_static))
-      crtbegin = isAndroid ? "crtbegin_static.o" : "crtbeginT.o";
-    else if (Args.hasArg(options::OPT_shared))
-      crtbegin = isAndroid ? "crtbegin_so.o" : "crtbeginS.o";
-    else if (IsPIE)
-      crtbegin = isAndroid ? "crtbegin_dynamic.o" : "crtbeginS.o";
-    else
-      crtbegin = isAndroid ? "crtbegin_dynamic.o" : "crtbegin.o";
+    if (IsIAMCU)
+      CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("crt0.o")));
+    else {
+      const char *crtbegin;
+      if (Args.hasArg(options::OPT_static))
+        crtbegin = isAndroid ? "crtbegin_static.o" : "crtbeginT.o";
+      else if (Args.hasArg(options::OPT_shared))
+        crtbegin = isAndroid ? "crtbegin_so.o" : "crtbeginS.o";
+      else if (IsPIE)
+        crtbegin = isAndroid ? "crtbegin_dynamic.o" : "crtbeginS.o";
+      else
+        crtbegin = isAndroid ? "crtbegin_dynamic.o" : "crtbegin.o";
 
-    if (HasCRTBeginEndFiles)
-      CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath(crtbegin)));
+      if (HasCRTBeginEndFiles)
+        CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath(crtbegin)));
+    }
 
     // Add crtfastmath.o if available and fast math is enabled.
     ToolChain.AddFastMathRuntimeIfAvailable(Args, CmdArgs);
@@ -8897,6 +9475,7 @@ void gnutools::Linker::ConstructJob(Compilation &C, const JobAction &JA,
     CmdArgs.push_back("--no-demangle");
 
   bool NeedsSanitizerDeps = addSanitizerRuntimes(ToolChain, Args, CmdArgs);
+  bool NeedsXRayDeps = addXRayRuntime(ToolChain, Args, CmdArgs);
   AddLinkerInputs(ToolChain, Inputs, Args, CmdArgs);
   // The profile runtime also needs access to system libraries.
   getToolChain().addProfileRTLibs(Args, CmdArgs);
@@ -8923,6 +9502,9 @@ void gnutools::Linker::ConstructJob(Compilation &C, const JobAction &JA,
       if (NeedsSanitizerDeps)
         linkSanitizerRuntimeDeps(ToolChain, CmdArgs);
 
+      if (NeedsXRayDeps)
+        linkXRayRuntimeDeps(ToolChain, Args, CmdArgs);
+
       bool WantPthread = Args.hasArg(options::OPT_pthread) ||
                          Args.hasArg(options::OPT_pthreads);
 
@@ -8958,15 +9540,29 @@ void gnutools::Linker::ConstructJob(Compilation &C, const JobAction &JA,
       if (WantPthread && !isAndroid)
         CmdArgs.push_back("-lpthread");
 
+      if (Args.hasArg(options::OPT_fsplit_stack))
+        CmdArgs.push_back("--wrap=pthread_create");
+
       CmdArgs.push_back("-lc");
 
+      // Add IAMCU specific libs, if needed.
+      if (IsIAMCU)
+        CmdArgs.push_back("-lgloss");
+
       if (Args.hasArg(options::OPT_static))
         CmdArgs.push_back("--end-group");
       else
         AddRunTimeLibs(ToolChain, D, CmdArgs, Args);
+
+      // Add IAMCU specific libs (outside the group), if needed.
+      if (IsIAMCU) {
+        CmdArgs.push_back("--as-needed");
+        CmdArgs.push_back("-lsoftfp");
+        CmdArgs.push_back("--no-as-needed");
+      }
     }
 
-    if (!Args.hasArg(options::OPT_nostartfiles)) {
+    if (!Args.hasArg(options::OPT_nostartfiles) && !IsIAMCU) {
       const char *crtend;
       if (Args.hasArg(options::OPT_shared))
         crtend = isAndroid ? "crtend_so.o" : "crtendS.o";
@@ -9457,9 +10053,14 @@ void visualstudio::Linker::ConstructJob(Compilation &C, const JobAction &JA,
                                            WindowsSdkLibPath.c_str()));
   }
 
+  if (!C.getDriver().IsCLMode() && Args.hasArg(options::OPT_L))
+    for (const auto &LibPath : Args.getAllArgValues(options::OPT_L))
+      CmdArgs.push_back(Args.MakeArgString("-libpath:" + LibPath));
+
   CmdArgs.push_back("-nologo");
 
-  if (Args.hasArg(options::OPT_g_Group, options::OPT__SLASH_Z7))
+  if (Args.hasArg(options::OPT_g_Group, options::OPT__SLASH_Z7,
+                  options::OPT__SLASH_Zd))
     CmdArgs.push_back("-debug");
 
   bool DLL = Args.hasArg(options::OPT__SLASH_LD, options::OPT__SLASH_LDd,
@@ -9512,6 +10113,12 @@ void visualstudio::Linker::ConstructJob(Compilation &C, const JobAction &JA,
     }
   }
 
+  // Add compiler-rt lib in case if it was explicitly
+  // specified as an argument for --rtlib option.
+  if (!Args.hasArg(options::OPT_nostdlib)) {
+    AddRunTimeLibs(TC, TC.getDriver(), CmdArgs, Args);
+  }
+
   // Add filenames, libraries, and other linker inputs.
   for (const auto &Input : Inputs) {
     if (Input.isFilename()) {
@@ -9620,6 +10227,11 @@ std::unique_ptr<Command> visualstudio::Compiler::GetCommand(
   if (Args.hasFlag(options::OPT__SLASH_GR_, options::OPT__SLASH_GR,
                    /*default=*/false))
     CmdArgs.push_back("/GR-");
+
+  if (Args.hasFlag(options::OPT__SLASH_GS_, options::OPT__SLASH_GS,
+                   /*default=*/false))
+    CmdArgs.push_back("/GS-");
+
   if (Arg *A = Args.getLastArg(options::OPT_ffunction_sections,
                                options::OPT_fno_function_sections))
     CmdArgs.push_back(A->getOption().getID() == options::OPT_ffunction_sections
@@ -9643,6 +10255,8 @@ std::unique_ptr<Command> visualstudio::Compiler::GetCommand(
   // Flags that can simply be passed through.
   Args.AddAllArgs(CmdArgs, options::OPT__SLASH_LD);
   Args.AddAllArgs(CmdArgs, options::OPT__SLASH_LDd);
+  Args.AddAllArgs(CmdArgs, options::OPT__SLASH_GX);
+  Args.AddAllArgs(CmdArgs, options::OPT__SLASH_GX_);
   Args.AddAllArgs(CmdArgs, options::OPT__SLASH_EH);
   Args.AddAllArgs(CmdArgs, options::OPT__SLASH_Zl);
 
@@ -9651,6 +10265,10 @@ std::unique_ptr<Command> visualstudio::Compiler::GetCommand(
                                options::OPT__SLASH_MT, options::OPT__SLASH_MTd))
     A->render(Args, CmdArgs);
 
+  // Pass through all unknown arguments so that the fallback command can see
+  // them too.
+  Args.AddAllArgs(CmdArgs, options::OPT_UNKNOWN);
+
   // Input filename.
   assert(Inputs.size() == 1);
   const InputInfo &II = Inputs[0];
@@ -10135,12 +10753,12 @@ void CrossWindows::Linker::ConstructJob(Compilation &C, const JobAction &JA,
     } else {
       for (const auto &Lib : {"asan_dynamic", "asan_dynamic_runtime_thunk"})
         CmdArgs.push_back(TC.getCompilerRTArgString(Args, Lib));
-        // Make sure the dynamic runtime thunk is not optimized out at link time
-        // to ensure proper SEH handling.
-        CmdArgs.push_back(Args.MakeArgString("--undefined"));
-        CmdArgs.push_back(Args.MakeArgString(TC.getArch() == llvm::Triple::x86
-                                                 ? "___asan_seh_interceptor"
-                                                 : "__asan_seh_interceptor"));
+      // Make sure the dynamic runtime thunk is not optimized out at link time
+      // to ensure proper SEH handling.
+      CmdArgs.push_back(Args.MakeArgString("--undefined"));
+      CmdArgs.push_back(Args.MakeArgString(TC.getArch() == llvm::Triple::x86
+                                               ? "___asan_seh_interceptor"
+                                               : "__asan_seh_interceptor"));
     }
   }
 
@@ -10168,7 +10786,6 @@ void tools::SHAVE::Compiler::ConstructJob(Compilation &C, const JobAction &JA,
     CmdArgs.push_back("-S");
     CmdArgs.push_back("-fno-exceptions"); // Always do this even if unspecified.
   }
-  CmdArgs.push_back("-mcpu=myriad2");
   CmdArgs.push_back("-DMYRIAD2");
 
   // Append all -I, -iquote, -isystem paths, defines/undefines,
@@ -10178,7 +10795,8 @@ void tools::SHAVE::Compiler::ConstructJob(Compilation &C, const JobAction &JA,
                             options::OPT_std_EQ, options::OPT_D, options::OPT_U,
                             options::OPT_f_Group, options::OPT_f_clang_Group,
                             options::OPT_g_Group, options::OPT_M_Group,
-                            options::OPT_O_Group, options::OPT_W_Group});
+                            options::OPT_O_Group, options::OPT_W_Group,
+                            options::OPT_mcpu_EQ});
 
   // If we're producing a dependency file, and assembly is the final action,
   // then the name of the target in the dependency file should be the '.o'
@@ -10218,7 +10836,10 @@ void tools::SHAVE::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
   assert(Output.getType() == types::TY_Object);
 
   CmdArgs.push_back("-no6thSlotCompression");
-  CmdArgs.push_back("-cv:myriad2"); // Chip Version
+  const Arg *CPUArg = Args.getLastArg(options::OPT_mcpu_EQ);
+  if (CPUArg)
+    CmdArgs.push_back(
+        Args.MakeArgString("-cv:" + StringRef(CPUArg->getValue())));
   CmdArgs.push_back("-noSPrefixing");
   CmdArgs.push_back("-a"); // Mystery option.
   Args.AddAllArgValues(CmdArgs, options::OPT_Wa_COMMA, options::OPT_Xassembler);
@@ -10332,7 +10953,7 @@ void PS4cpu::Assemble::ConstructJob(Compilation &C, const JobAction &JA,
   CmdArgs.push_back(Input.getFilename());
 
   const char *Exec =
-      Args.MakeArgString(getToolChain().GetProgramPath("ps4-as"));
+      Args.MakeArgString(getToolChain().GetProgramPath("orbis-as"));
   C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
 }
 
@@ -10400,7 +11021,7 @@ static void ConstructPS4LinkJob(const Tool &T, Compilation &C,
     CmdArgs.push_back("-lpthread");
   }
 
-  const char *Exec = Args.MakeArgString(ToolChain.GetProgramPath("ps4-ld"));
+  const char *Exec = Args.MakeArgString(ToolChain.GetProgramPath("orbis-ld"));
 
   C.addCommand(llvm::make_unique<Command>(JA, T, Exec, CmdArgs, Inputs));
 }
@@ -10573,9 +11194,9 @@ static void ConstructGoldLinkJob(const Tool &T, Compilation &C,
 
   const char *Exec =
 #ifdef LLVM_ON_WIN32
-      Args.MakeArgString(ToolChain.GetProgramPath("ps4-ld.gold"));
+      Args.MakeArgString(ToolChain.GetProgramPath("orbis-ld.gold"));
 #else
-      Args.MakeArgString(ToolChain.GetProgramPath("ps4-ld"));
+      Args.MakeArgString(ToolChain.GetProgramPath("orbis-ld"));
 #endif
 
   C.addCommand(llvm::make_unique<Command>(JA, T, Exec, CmdArgs, Inputs));
@@ -10609,3 +11230,120 @@ void PS4cpu::Link::ConstructJob(Compilation &C, const JobAction &JA,
   else
     ConstructGoldLinkJob(*this, C, JA, Output, Inputs, Args, LinkingOutput);
 }
+
+void NVPTX::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
+                                    const InputInfo &Output,
+                                    const InputInfoList &Inputs,
+                                    const ArgList &Args,
+                                    const char *LinkingOutput) const {
+  const auto &TC =
+      static_cast<const toolchains::CudaToolChain &>(getToolChain());
+  assert(TC.getTriple().isNVPTX() && "Wrong platform");
+
+  // Obtain architecture from the action.
+  CudaArch gpu_arch = StringToCudaArch(JA.getOffloadingArch());
+  assert(gpu_arch != CudaArch::UNKNOWN &&
+         "Device action expected to have an architecture.");
+
+  // Check that our installation's ptxas supports gpu_arch.
+  if (!Args.hasArg(options::OPT_no_cuda_version_check)) {
+    TC.cudaInstallation().CheckCudaVersionSupportsArch(gpu_arch);
+  }
+
+  ArgStringList CmdArgs;
+  CmdArgs.push_back(TC.getTriple().isArch64Bit() ? "-m64" : "-m32");
+  if (Args.hasFlag(options::OPT_cuda_noopt_device_debug,
+                   options::OPT_no_cuda_noopt_device_debug, false)) {
+    // ptxas does not accept -g option if optimization is enabled, so
+    // we ignore the compiler's -O* options if we want debug info.
+    CmdArgs.push_back("-g");
+    CmdArgs.push_back("--dont-merge-basicblocks");
+    CmdArgs.push_back("--return-at-end");
+  } else if (Arg *A = Args.getLastArg(options::OPT_O_Group)) {
+    // Map the -O we received to -O{0,1,2,3}.
+    //
+    // TODO: Perhaps we should map host -O2 to ptxas -O3. -O3 is ptxas's
+    // default, so it may correspond more closely to the spirit of clang -O2.
+
+    // -O3 seems like the least-bad option when -Osomething is specified to
+    // clang but it isn't handled below.
+    StringRef OOpt = "3";
+    if (A->getOption().matches(options::OPT_O4) ||
+        A->getOption().matches(options::OPT_Ofast))
+      OOpt = "3";
+    else if (A->getOption().matches(options::OPT_O0))
+      OOpt = "0";
+    else if (A->getOption().matches(options::OPT_O)) {
+      // -Os, -Oz, and -O(anything else) map to -O2, for lack of better options.
+      OOpt = llvm::StringSwitch<const char *>(A->getValue())
+                 .Case("1", "1")
+                 .Case("2", "2")
+                 .Case("3", "3")
+                 .Case("s", "2")
+                 .Case("z", "2")
+                 .Default("2");
+    }
+    CmdArgs.push_back(Args.MakeArgString(llvm::Twine("-O") + OOpt));
+  } else {
+    // If no -O was passed, pass -O0 to ptxas -- no opt flag should correspond
+    // to no optimizations, but ptxas's default is -O3.
+    CmdArgs.push_back("-O0");
+  }
+
+  CmdArgs.push_back("--gpu-name");
+  CmdArgs.push_back(Args.MakeArgString(CudaArchToString(gpu_arch)));
+  CmdArgs.push_back("--output-file");
+  CmdArgs.push_back(Args.MakeArgString(Output.getFilename()));
+  for (const auto& II : Inputs)
+    CmdArgs.push_back(Args.MakeArgString(II.getFilename()));
+
+  for (const auto& A : Args.getAllArgValues(options::OPT_Xcuda_ptxas))
+    CmdArgs.push_back(Args.MakeArgString(A));
+
+  const char *Exec = Args.MakeArgString(TC.GetProgramPath("ptxas"));
+  C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
+}
+
+// All inputs to this linker must be from CudaDeviceActions, as we need to look
+// at the Inputs' Actions in order to figure out which GPU architecture they
+// correspond to.
+void NVPTX::Linker::ConstructJob(Compilation &C, const JobAction &JA,
+                                 const InputInfo &Output,
+                                 const InputInfoList &Inputs,
+                                 const ArgList &Args,
+                                 const char *LinkingOutput) const {
+  const auto &TC =
+      static_cast<const toolchains::CudaToolChain &>(getToolChain());
+  assert(TC.getTriple().isNVPTX() && "Wrong platform");
+
+  ArgStringList CmdArgs;
+  CmdArgs.push_back("--cuda");
+  CmdArgs.push_back(TC.getTriple().isArch64Bit() ? "-64" : "-32");
+  CmdArgs.push_back(Args.MakeArgString("--create"));
+  CmdArgs.push_back(Args.MakeArgString(Output.getFilename()));
+
+  for (const auto& II : Inputs) {
+    auto *A = II.getAction();
+    assert(A->getInputs().size() == 1 &&
+           "Device offload action is expected to have a single input");
+    const char *gpu_arch_str = A->getOffloadingArch();
+    assert(gpu_arch_str &&
+           "Device action expected to have associated a GPU architecture!");
+    CudaArch gpu_arch = StringToCudaArch(gpu_arch_str);
+
+    // We need to pass an Arch of the form "sm_XX" for cubin files and
+    // "compute_XX" for ptx.
+    const char *Arch =
+        (II.getType() == types::TY_PP_Asm)
+            ? CudaVirtualArchToString(VirtualArchForCudaArch(gpu_arch))
+            : gpu_arch_str;
+    CmdArgs.push_back(Args.MakeArgString(llvm::Twine("--image=profile=") +
+                                         Arch + ",file=" + II.getFilename()));
+  }
+
+  for (const auto& A : Args.getAllArgValues(options::OPT_Xcuda_fatbinary))
+    CmdArgs.push_back(Args.MakeArgString(A));
+
+  const char *Exec = Args.MakeArgString(TC.GetProgramPath("fatbinary"));
+  C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
+}
diff --git a/contrib/llvm/tools/clang/lib/Driver/Tools.h b/contrib/llvm/tools/clang/lib/Driver/Tools.h
index 2b137f4a6d0b..02bdb8e5e2d2 100644
--- a/contrib/llvm/tools/clang/lib/Driver/Tools.h
+++ b/contrib/llvm/tools/clang/lib/Driver/Tools.h
@@ -10,11 +10,11 @@
 #ifndef LLVM_CLANG_LIB_DRIVER_TOOLS_H
 #define LLVM_CLANG_LIB_DRIVER_TOOLS_H
 
+#include "clang/Basic/DebugInfoOptions.h"
 #include "clang/Basic/VersionTuple.h"
 #include "clang/Driver/Tool.h"
 #include "clang/Driver/Types.h"
 #include "clang/Driver/Util.h"
-#include "clang/Frontend/CodeGenOptions.h"
 #include "llvm/ADT/Triple.h"
 #include "llvm/Option/Option.h"
 #include "llvm/Support/Compiler.h"
@@ -57,8 +57,7 @@ private:
                                const Driver &D, const llvm::opt::ArgList &Args,
                                llvm::opt::ArgStringList &CmdArgs,
                                const InputInfo &Output,
-                               const InputInfoList &Inputs,
-                               const ToolChain *AuxToolChain) const;
+                               const InputInfoList &Inputs) const;
 
   void AddAArch64TargetArgs(const llvm::opt::ArgList &Args,
                             llvm::opt::ArgStringList &CmdArgs) const;
@@ -82,6 +81,8 @@ private:
                         llvm::opt::ArgStringList &CmdArgs) const;
   void AddHexagonTargetArgs(const llvm::opt::ArgList &Args,
                             llvm::opt::ArgStringList &CmdArgs) const;
+  void AddLanaiTargetArgs(const llvm::opt::ArgList &Args,
+                          llvm::opt::ArgStringList &CmdArgs) const;
   void AddWebAssemblyTargetArgs(const llvm::opt::ArgList &Args,
                                 llvm::opt::ArgStringList &CmdArgs) const;
 
@@ -91,9 +92,9 @@ private:
                                  llvm::opt::ArgStringList &cmdArgs,
                                  RewriteKind rewrite) const;
 
-  void AddClangCLArgs(const llvm::opt::ArgList &Args,
+  void AddClangCLArgs(const llvm::opt::ArgList &Args, types::ID InputType,
                       llvm::opt::ArgStringList &CmdArgs,
-                      enum CodeGenOptions::DebugInfoKind *DebugInfoKind,
+                      codegenoptions::DebugInfoKind *DebugInfoKind,
                       bool *EmitCodeView) const;
 
   visualstudio::Compiler *getCLFallback() const;
@@ -289,6 +290,7 @@ enum class FloatABI {
 };
 
 NanEncoding getSupportedNanEncoding(StringRef &CPU);
+bool hasCompactBranches(StringRef &CPU);
 void getMipsCPUAndABI(const llvm::opt::ArgList &Args,
                       const llvm::Triple &Triple, StringRef &CPUName,
                       StringRef &ABIName);
@@ -297,6 +299,7 @@ std::string getMipsABILibSuffix(const llvm::opt::ArgList &Args,
 bool hasMipsAbiArg(const llvm::opt::ArgList &Args, const char *Value);
 bool isUCLibc(const llvm::opt::ArgList &Args);
 bool isNaN2008(const llvm::opt::ArgList &Args, const llvm::Triple &Triple);
+bool isFP64ADefault(const llvm::Triple &Triple, StringRef CPUName);
 bool isFPXXDefault(const llvm::Triple &Triple, StringRef CPUName,
                    StringRef ABIName, mips::FloatABI FloatABI);
 bool shouldUseFPXX(const llvm::opt::ArgList &Args, const llvm::Triple &Triple,
@@ -680,7 +683,8 @@ public:
 
 /// Visual studio tools.
 namespace visualstudio {
-VersionTuple getMSVCVersion(const Driver *D, const llvm::Triple &Triple,
+VersionTuple getMSVCVersion(const Driver *D, const ToolChain &TC,
+                            const llvm::Triple &Triple,
                             const llvm::opt::ArgList &Args, bool IsWindowsMSVC);
 
 class LLVM_LIBRARY_VISIBILITY Linker : public Tool {
@@ -773,6 +777,16 @@ enum class FloatABI {
 FloatABI getPPCFloatABI(const Driver &D, const llvm::opt::ArgList &Args);
 } // end namespace ppc
 
+namespace sparc {
+enum class FloatABI {
+  Invalid,
+  Soft,
+  Hard,
+};
+
+FloatABI getSparcFloatABI(const Driver &D, const llvm::opt::ArgList &Args);
+} // end namespace sparc
+
 namespace XCore {
 // For XCore, we do not need to instantiate tools for PreProcess, PreCompile and
 // Compile.
@@ -903,6 +917,41 @@ public:
 };
 } // end namespace PS4cpu
 
+namespace NVPTX {
+
+// Run ptxas, the NVPTX assembler.
+class LLVM_LIBRARY_VISIBILITY Assembler : public Tool {
+ public:
+   Assembler(const ToolChain &TC)
+       : Tool("NVPTX::Assembler", "ptxas", TC, RF_Full, llvm::sys::WEM_UTF8,
+              "--options-file") {}
+
+   bool hasIntegratedCPP() const override { return false; }
+
+   void ConstructJob(Compilation &C, const JobAction &JA,
+                     const InputInfo &Output, const InputInfoList &Inputs,
+                     const llvm::opt::ArgList &TCArgs,
+                     const char *LinkingOutput) const override;
+};
+
+// Runs fatbinary, which combines GPU object files ("cubin" files) and/or PTX
+// assembly into a single output file.
+class LLVM_LIBRARY_VISIBILITY Linker : public Tool {
+ public:
+   Linker(const ToolChain &TC)
+       : Tool("NVPTX::Linker", "fatbinary", TC, RF_Full, llvm::sys::WEM_UTF8,
+              "--options-file") {}
+
+   bool hasIntegratedCPP() const override { return false; }
+
+   void ConstructJob(Compilation &C, const JobAction &JA,
+                     const InputInfo &Output, const InputInfoList &Inputs,
+                     const llvm::opt::ArgList &TCArgs,
+                     const char *LinkingOutput) const override;
+};
+
+}  // end namespace NVPTX
+
 } // end namespace tools
 } // end namespace driver
 } // end namespace clang
diff --git a/contrib/llvm/tools/clang/lib/Driver/Types.cpp b/contrib/llvm/tools/clang/lib/Driver/Types.cpp
index c29ce9462a07..f8e1e40dc6bf 100644
--- a/contrib/llvm/tools/clang/lib/Driver/Types.cpp
+++ b/contrib/llvm/tools/clang/lib/Driver/Types.cpp
@@ -204,6 +204,7 @@ types::ID types::lookupTypeForExtension(const char *Ext) {
            .Case("pcm", TY_ModuleFile)
            .Case("pch", TY_PCH)
            .Case("gch", TY_PCH)
+           .Case("rs", TY_RenderScript)
            .Default(TY_INVALID);
 }
 
@@ -232,8 +233,7 @@ void types::getCompilationPhases(ID Id, llvm::SmallVectorImpl<phases::ID> &P) {
         P.push_back(phases::Compile);
         P.push_back(phases::Backend);
       }
-      if (Id != TY_CUDA_DEVICE)
-        P.push_back(phases::Assemble);
+      P.push_back(phases::Assemble);
     }
   }
 
@@ -242,7 +242,6 @@ void types::getCompilationPhases(ID Id, llvm::SmallVectorImpl<phases::ID> &P) {
   }
   assert(0 < P.size() && "Not enough phases in list");
   assert(P.size() <= phases::MaxNumberOfPhases && "Too many phases in list");
-  return;
 }
 
 ID types::lookupCXXTypeForCType(ID Id) {
diff --git a/contrib/llvm/tools/clang/lib/Format/AffectedRangeManager.cpp b/contrib/llvm/tools/clang/lib/Format/AffectedRangeManager.cpp
new file mode 100644
index 000000000000..5d4df1941209
--- /dev/null
+++ b/contrib/llvm/tools/clang/lib/Format/AffectedRangeManager.cpp
@@ -0,0 +1,150 @@
+//===--- AffectedRangeManager.cpp - Format C++ code -----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file implements AffectRangeManager class.
+///
+//===----------------------------------------------------------------------===//
+
+#include "AffectedRangeManager.h"
+
+#include "FormatToken.h"
+#include "TokenAnnotator.h"
+
+namespace clang {
+namespace format {
+
+bool AffectedRangeManager::computeAffectedLines(
+    SmallVectorImpl<AnnotatedLine *>::iterator I,
+    SmallVectorImpl<AnnotatedLine *>::iterator E) {
+  bool SomeLineAffected = false;
+  const AnnotatedLine *PreviousLine = nullptr;
+  while (I != E) {
+    AnnotatedLine *Line = *I;
+    Line->LeadingEmptyLinesAffected = affectsLeadingEmptyLines(*Line->First);
+
+    // If a line is part of a preprocessor directive, it needs to be formatted
+    // if any token within the directive is affected.
+    if (Line->InPPDirective) {
+      FormatToken *Last = Line->Last;
+      SmallVectorImpl<AnnotatedLine *>::iterator PPEnd = I + 1;
+      while (PPEnd != E && !(*PPEnd)->First->HasUnescapedNewline) {
+        Last = (*PPEnd)->Last;
+        ++PPEnd;
+      }
+
+      if (affectsTokenRange(*Line->First, *Last,
+                            /*IncludeLeadingNewlines=*/false)) {
+        SomeLineAffected = true;
+        markAllAsAffected(I, PPEnd);
+      }
+      I = PPEnd;
+      continue;
+    }
+
+    if (nonPPLineAffected(Line, PreviousLine))
+      SomeLineAffected = true;
+
+    PreviousLine = Line;
+    ++I;
+  }
+  return SomeLineAffected;
+}
+
+bool AffectedRangeManager::affectsCharSourceRange(
+    const CharSourceRange &Range) {
+  for (SmallVectorImpl<CharSourceRange>::const_iterator I = Ranges.begin(),
+                                                        E = Ranges.end();
+       I != E; ++I) {
+    if (!SourceMgr.isBeforeInTranslationUnit(Range.getEnd(), I->getBegin()) &&
+        !SourceMgr.isBeforeInTranslationUnit(I->getEnd(), Range.getBegin()))
+      return true;
+  }
+  return false;
+}
+
+bool AffectedRangeManager::affectsTokenRange(const FormatToken &First,
+                                             const FormatToken &Last,
+                                             bool IncludeLeadingNewlines) {
+  SourceLocation Start = First.WhitespaceRange.getBegin();
+  if (!IncludeLeadingNewlines)
+    Start = Start.getLocWithOffset(First.LastNewlineOffset);
+  SourceLocation End = Last.getStartOfNonWhitespace();
+  End = End.getLocWithOffset(Last.TokenText.size());
+  CharSourceRange Range = CharSourceRange::getCharRange(Start, End);
+  return affectsCharSourceRange(Range);
+}
+
+bool AffectedRangeManager::affectsLeadingEmptyLines(const FormatToken &Tok) {
+  CharSourceRange EmptyLineRange = CharSourceRange::getCharRange(
+      Tok.WhitespaceRange.getBegin(),
+      Tok.WhitespaceRange.getBegin().getLocWithOffset(Tok.LastNewlineOffset));
+  return affectsCharSourceRange(EmptyLineRange);
+}
+
+void AffectedRangeManager::markAllAsAffected(
+    SmallVectorImpl<AnnotatedLine *>::iterator I,
+    SmallVectorImpl<AnnotatedLine *>::iterator E) {
+  while (I != E) {
+    (*I)->Affected = true;
+    markAllAsAffected((*I)->Children.begin(), (*I)->Children.end());
+    ++I;
+  }
+}
+
+bool AffectedRangeManager::nonPPLineAffected(
+    AnnotatedLine *Line, const AnnotatedLine *PreviousLine) {
+  bool SomeLineAffected = false;
+  Line->ChildrenAffected =
+      computeAffectedLines(Line->Children.begin(), Line->Children.end());
+  if (Line->ChildrenAffected)
+    SomeLineAffected = true;
+
+  // Stores whether one of the line's tokens is directly affected.
+  bool SomeTokenAffected = false;
+  // Stores whether we need to look at the leading newlines of the next token
+  // in order to determine whether it was affected.
+  bool IncludeLeadingNewlines = false;
+
+  // Stores whether the first child line of any of this line's tokens is
+  // affected.
+  bool SomeFirstChildAffected = false;
+
+  for (FormatToken *Tok = Line->First; Tok; Tok = Tok->Next) {
+    // Determine whether 'Tok' was affected.
+    if (affectsTokenRange(*Tok, *Tok, IncludeLeadingNewlines))
+      SomeTokenAffected = true;
+
+    // Determine whether the first child of 'Tok' was affected.
+    if (!Tok->Children.empty() && Tok->Children.front()->Affected)
+      SomeFirstChildAffected = true;
+
+    IncludeLeadingNewlines = Tok->Children.empty();
+  }
+
+  // Was this line moved, i.e. has it previously been on the same line as an
+  // affected line?
+  bool LineMoved = PreviousLine && PreviousLine->Affected &&
+                   Line->First->NewlinesBefore == 0;
+
+  bool IsContinuedComment =
+      Line->First->is(tok::comment) && Line->First->Next == nullptr &&
+      Line->First->NewlinesBefore < 2 && PreviousLine &&
+      PreviousLine->Affected && PreviousLine->Last->is(tok::comment);
+
+  if (SomeTokenAffected || SomeFirstChildAffected || LineMoved ||
+      IsContinuedComment) {
+    Line->Affected = true;
+    SomeLineAffected = true;
+  }
+  return SomeLineAffected;
+}
+
+} // namespace format
+} // namespace clang
diff --git a/contrib/llvm/tools/clang/lib/Format/AffectedRangeManager.h b/contrib/llvm/tools/clang/lib/Format/AffectedRangeManager.h
new file mode 100644
index 000000000000..d8d5ee55acd8
--- /dev/null
+++ b/contrib/llvm/tools/clang/lib/Format/AffectedRangeManager.h
@@ -0,0 +1,67 @@
+//===--- AffectedRangeManager.h - Format C++ code ---------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief AffectedRangeManager class manages affected ranges in the code.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_LIB_FORMAT_AFFECTEDRANGEMANAGER_H
+#define LLVM_CLANG_LIB_FORMAT_AFFECTEDRANGEMANAGER_H
+
+#include "clang/Basic/SourceManager.h"
+
+namespace clang {
+namespace format {
+
+struct FormatToken;
+class AnnotatedLine;
+
+class AffectedRangeManager {
+public:
+  AffectedRangeManager(const SourceManager &SourceMgr,
+                       const ArrayRef<CharSourceRange> Ranges)
+      : SourceMgr(SourceMgr), Ranges(Ranges.begin(), Ranges.end()) {}
+
+  // Determines which lines are affected by the SourceRanges given as input.
+  // Returns \c true if at least one line between I and E or one of their
+  // children is affected.
+  bool computeAffectedLines(SmallVectorImpl<AnnotatedLine *>::iterator I,
+                            SmallVectorImpl<AnnotatedLine *>::iterator E);
+
+  // Returns true if 'Range' intersects with one of the input ranges.
+  bool affectsCharSourceRange(const CharSourceRange &Range);
+
+private:
+  // Returns true if the range from 'First' to 'Last' intersects with one of the
+  // input ranges.
+  bool affectsTokenRange(const FormatToken &First, const FormatToken &Last,
+                         bool IncludeLeadingNewlines);
+
+  // Returns true if one of the input ranges intersect the leading empty lines
+  // before 'Tok'.
+  bool affectsLeadingEmptyLines(const FormatToken &Tok);
+
+  // Marks all lines between I and E as well as all their children as affected.
+  void markAllAsAffected(SmallVectorImpl<AnnotatedLine *>::iterator I,
+                         SmallVectorImpl<AnnotatedLine *>::iterator E);
+
+  // Determines whether 'Line' is affected by the SourceRanges given as input.
+  // Returns \c true if line or one if its children is affected.
+  bool nonPPLineAffected(AnnotatedLine *Line,
+                         const AnnotatedLine *PreviousLine);
+
+  const SourceManager &SourceMgr;
+  const SmallVector<CharSourceRange, 8> Ranges;
+};
+
+} // namespace format
+} // namespace clang
+
+#endif // LLVM_CLANG_LIB_FORMAT_AFFECTEDRANGEMANAGER_H
diff --git a/contrib/llvm/tools/clang/lib/Format/ContinuationIndenter.cpp b/contrib/llvm/tools/clang/lib/Format/ContinuationIndenter.cpp
index b820f53db527..322969e4bb71 100644
--- a/contrib/llvm/tools/clang/lib/Format/ContinuationIndenter.cpp
+++ b/contrib/llvm/tools/clang/lib/Format/ContinuationIndenter.cpp
@@ -64,7 +64,7 @@ static bool startsNextParameter(const FormatToken &Current,
 
 ContinuationIndenter::ContinuationIndenter(const FormatStyle &Style,
                                            const AdditionalKeywords &Keywords,
-                                           SourceManager &SourceMgr,
+                                           const SourceManager &SourceMgr,
                                            WhitespaceManager &Whitespaces,
                                            encoding::Encoding Encoding,
                                            bool BinPackInconclusiveFunctions)
@@ -151,6 +151,7 @@ bool ContinuationIndenter::mustBreak(const LineState &State) {
     return true;
   if ((startsNextParameter(Current, Style) || Previous.is(tok::semi) ||
        (Previous.is(TT_TemplateCloser) && Current.is(TT_StartOfName) &&
+        Style.Language == FormatStyle::LK_Cpp &&
         // FIXME: This is a temporary workaround for the case where clang-format
         // sets BreakBeforeParameter to avoid bin packing and this creates a
         // completely unnecessary line break after a template type that isn't
@@ -249,7 +250,7 @@ bool ContinuationIndenter::mustBreak(const LineState &State) {
   // If the return type spans multiple lines, wrap before the function name.
   if ((Current.is(TT_FunctionDeclarationName) ||
        (Current.is(tok::kw_operator) && !Previous.is(tok::coloncolon))) &&
-      State.Stack.back().BreakBeforeParameter)
+      !Previous.is(tok::kw_template) && State.Stack.back().BreakBeforeParameter)
     return true;
 
   if (startsSegmentOfBuilderTypeCall(Current) &&
@@ -352,9 +353,20 @@ void ContinuationIndenter::addTokenOnCurrentLine(LineState &State, bool DryRun,
   // disallowing any further line breaks if there is no line break after the
   // opening parenthesis. Don't break if it doesn't conserve columns.
   if (Style.AlignAfterOpenBracket == FormatStyle::BAS_AlwaysBreak &&
-      Previous.is(tok::l_paren) && State.Column > getNewLineColumn(State) &&
+      Previous.isOneOf(tok::l_paren, TT_TemplateOpener, tok::l_square) &&
+      State.Column > getNewLineColumn(State) &&
       (!Previous.Previous ||
-       !Previous.Previous->isOneOf(tok::kw_for, tok::kw_while, tok::kw_switch)))
+       !Previous.Previous->isOneOf(tok::kw_for, tok::kw_while,
+                                   tok::kw_switch)) &&
+      // Don't do this for simple (no expressions) one-argument function calls
+      // as that feels like needlessly wasting whitespace, e.g.:
+      //
+      //   caaaaaaaaaaaall(
+      //       caaaaaaaaaaaall(
+      //           caaaaaaaaaaaall(
+      //               caaaaaaaaaaaaaaaaaaaaaaall(aaaaaaaaaaaaaa, aaaaaaaaa))));
+      Current.FakeLParens.size() > 0 &&
+      Current.FakeLParens.back() > prec::Unknown)
     State.Stack.back().NoLineBreak = true;
 
   if (Style.AlignAfterOpenBracket != FormatStyle::BAS_DontAlign &&
@@ -400,9 +412,9 @@ void ContinuationIndenter::addTokenOnCurrentLine(LineState &State, bool DryRun,
                (Previous.isNot(tok::lessless) || Previous.OperatorIndex != 0 ||
                 Previous.NextOperator)) ||
               Current.StartsBinaryExpression)) {
-    // Always indent relative to the RHS of the expression unless this is a
-    // simple assignment without binary expression on the RHS. Also indent
-    // relative to unary operators and the colons of constructor initializers.
+    // Indent relative to the RHS of the expression unless this is a simple
+    // assignment without binary expression on the RHS. Also indent relative to
+    // unary operators and the colons of constructor initializers.
     State.Stack.back().LastSpace = State.Column;
   } else if (Previous.is(TT_InheritanceColon)) {
     State.Stack.back().Indent = State.Column;
@@ -464,10 +476,13 @@ unsigned ContinuationIndenter::addTokenOnNewLine(LineState &State,
   //     // code
   //   }
   //
-  // is common and should be formatted like a free-standing function.
-  if (Style.Language != FormatStyle::LK_JavaScript ||
-      Current.NestingLevel != 0 || !PreviousNonComment->is(tok::equal) ||
-      !Current.is(Keywords.kw_function))
+  // is common and should be formatted like a free-standing function. The same
+  // goes for wrapping before the lambda return type arrow.
+  if (!Current.is(TT_LambdaArrow) &&
+      (Style.Language != FormatStyle::LK_JavaScript ||
+       Current.NestingLevel != 0 || !PreviousNonComment ||
+       !PreviousNonComment->is(tok::equal) ||
+       !Current.isOneOf(Keywords.kw_async, Keywords.kw_function)))
     State.Stack.back().NestedBlockIndent = State.Column;
 
   if (NextNonComment->isMemberAccess()) {
@@ -529,6 +544,12 @@ unsigned ContinuationIndenter::addTokenOnNewLine(LineState &State,
 
   if (!Current.isTrailingComment())
     State.Stack.back().LastSpace = State.Column;
+  if (Current.is(tok::lessless))
+    // If we are breaking before a "<<", we always want to indent relative to
+    // RHS. This is necessary only for "<<", as we special-case it and don't
+    // always indent relative to the RHS.
+    State.Stack.back().LastSpace += 3; // 3 -> width of "<< ".
+
   State.StartOfLineLevel = Current.NestingLevel;
   State.LowestLevelOnLine = Current.NestingLevel;
 
@@ -703,11 +724,15 @@ unsigned ContinuationIndenter::moveStateToNextToken(LineState &State,
   if (Current.is(TT_ArraySubscriptLSquare) &&
       State.Stack.back().StartOfArraySubscripts == 0)
     State.Stack.back().StartOfArraySubscripts = State.Column;
-  if ((Current.is(tok::question) && Style.BreakBeforeTernaryOperators) ||
-      (Current.getPreviousNonComment() && Current.isNot(tok::colon) &&
-       Current.getPreviousNonComment()->is(tok::question) &&
-       !Style.BreakBeforeTernaryOperators))
+  if (Style.BreakBeforeTernaryOperators && Current.is(tok::question))
     State.Stack.back().QuestionColumn = State.Column;
+  if (!Style.BreakBeforeTernaryOperators && Current.isNot(tok::colon)) {
+    const FormatToken *Previous = Current.Previous;
+    while (Previous && Previous->isTrailingComment())
+      Previous = Previous->Previous;
+    if (Previous && Previous->is(tok::question))
+      State.Stack.back().QuestionColumn = State.Column;
+  }
   if (!Current.opensScope() && !Current.closesScope())
     State.LowestLevelOnLine =
         std::min(State.LowestLevelOnLine, Current.NestingLevel);
@@ -835,7 +860,7 @@ void ContinuationIndenter::moveStatePastFakeLParens(LineState &State,
     // there is a line-break right after the operator.
     // Exclude relational operators, as there, it is always more desirable to
     // have the LHS 'left' of the RHS.
-    if (Previous && Previous->getPrecedence() > prec::Assignment &&
+    if (Previous && Previous->getPrecedence() != prec::Assignment &&
         Previous->isOneOf(TT_BinaryOperator, TT_ConditionalExpr) &&
         Previous->getPrecedence() != prec::Relational) {
       bool BreakBeforeOperator =
@@ -857,7 +882,8 @@ void ContinuationIndenter::moveStatePastFakeLParens(LineState &State,
     //       ParameterToInnerFunction));
     if (*I > prec::Unknown)
       NewParenState.LastSpace = std::max(NewParenState.LastSpace, State.Column);
-    if (*I != prec::Conditional && !Current.is(TT_UnaryOperator))
+    if (*I != prec::Conditional && !Current.is(TT_UnaryOperator) &&
+        Style.AlignAfterOpenBracket != FormatStyle::BAS_DontAlign)
       NewParenState.StartOfFunctionCall = State.Column;
 
     // Always indent conditional expressions. Never indent expression where
@@ -1022,6 +1048,9 @@ void ContinuationIndenter::moveStateToNewBlock(LineState &State) {
 
 unsigned ContinuationIndenter::addMultilineToken(const FormatToken &Current,
                                                  LineState &State) {
+  if (!Current.IsMultiline)
+    return 0;
+
   // Break before further function parameters on all levels.
   for (unsigned i = 0, e = State.Stack.size(); i != e; ++i)
     State.Stack[i].BreakBeforeParameter = true;
@@ -1060,7 +1089,8 @@ unsigned ContinuationIndenter::breakProtrudingToken(const FormatToken &Current,
     // FIXME: String literal breaking is currently disabled for Java and JS, as
     // it requires strings to be merged using "+" which we don't support.
     if (Style.Language == FormatStyle::LK_Java ||
-        Style.Language == FormatStyle::LK_JavaScript)
+        Style.Language == FormatStyle::LK_JavaScript ||
+        !Style.BreakStringLiterals)
       return 0;
 
     // Don't break string literals inside preprocessor directives (except for
@@ -1100,10 +1130,10 @@ unsigned ContinuationIndenter::breakProtrudingToken(const FormatToken &Current,
     } else {
       return 0;
     }
-  } else if (Current.is(TT_BlockComment) && Current.isTrailingComment()) {
-    if (!Style.ReflowComments ||
+  } else if (Current.is(TT_BlockComment)) {
+    if (!Current.isTrailingComment() || !Style.ReflowComments ||
         CommentPragmasRegex.match(Current.TokenText.substr(2)))
-      return 0;
+      return addMultilineToken(Current, State);
     Token.reset(new BreakableBlockComment(
         Current, State.Line->Level, StartColumn, Current.OriginalColumn,
         !Current.Previous, State.Line->InPPDirective, Encoding, Style));
diff --git a/contrib/llvm/tools/clang/lib/Format/ContinuationIndenter.h b/contrib/llvm/tools/clang/lib/Format/ContinuationIndenter.h
index 9b9154ed3095..21ad653c4fa4 100644
--- a/contrib/llvm/tools/clang/lib/Format/ContinuationIndenter.h
+++ b/contrib/llvm/tools/clang/lib/Format/ContinuationIndenter.h
@@ -38,7 +38,8 @@ public:
   /// column \p FirstIndent.
   ContinuationIndenter(const FormatStyle &Style,
                        const AdditionalKeywords &Keywords,
-                       SourceManager &SourceMgr, WhitespaceManager &Whitespaces,
+                       const SourceManager &SourceMgr,
+                       WhitespaceManager &Whitespaces,
                        encoding::Encoding Encoding,
                        bool BinPackInconclusiveFunctions);
 
@@ -137,7 +138,7 @@ private:
 
   FormatStyle Style;
   const AdditionalKeywords &Keywords;
-  SourceManager &SourceMgr;
+  const SourceManager &SourceMgr;
   WhitespaceManager &Whitespaces;
   encoding::Encoding Encoding;
   bool BinPackInconclusiveFunctions;
diff --git a/contrib/llvm/tools/clang/lib/Format/Encoding.h b/contrib/llvm/tools/clang/lib/Format/Encoding.h
index 592d7201a8ac..148f7fd0e91b 100644
--- a/contrib/llvm/tools/clang/lib/Format/Encoding.h
+++ b/contrib/llvm/tools/clang/lib/Format/Encoding.h
@@ -17,6 +17,7 @@
 #define LLVM_CLANG_LIB_FORMAT_ENCODING_H
 
 #include "clang/Basic/LLVM.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/Support/ConvertUTF.h"
 #include "llvm/Support/Unicode.h"
 
diff --git a/contrib/llvm/tools/clang/lib/Format/Format.cpp b/contrib/llvm/tools/clang/lib/Format/Format.cpp
index 2689368da513..32d6bb855ad6 100644
--- a/contrib/llvm/tools/clang/lib/Format/Format.cpp
+++ b/contrib/llvm/tools/clang/lib/Format/Format.cpp
@@ -14,7 +14,11 @@
 //===----------------------------------------------------------------------===//
 
 #include "clang/Format/Format.h"
+#include "AffectedRangeManager.h"
 #include "ContinuationIndenter.h"
+#include "FormatTokenLexer.h"
+#include "SortJavaScriptImports.h"
+#include "TokenAnalyzer.h"
 #include "TokenAnnotator.h"
 #include "UnwrappedLineFormatter.h"
 #include "UnwrappedLineParser.h"
@@ -22,6 +26,7 @@
 #include "clang/Basic/Diagnostic.h"
 #include "clang/Basic/DiagnosticOptions.h"
 #include "clang/Basic/SourceManager.h"
+#include "clang/Basic/VirtualFileSystem.h"
 #include "clang/Lex/Lexer.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/Support/Allocator.h"
@@ -29,6 +34,8 @@
 #include "llvm/Support/Path.h"
 #include "llvm/Support/Regex.h"
 #include "llvm/Support/YAMLTraits.h"
+#include <algorithm>
+#include <memory>
 #include <queue>
 #include <string>
 
@@ -68,6 +75,16 @@ template <> struct ScalarEnumerationTraits<FormatStyle::UseTabStyle> {
     IO.enumCase(Value, "Always", FormatStyle::UT_Always);
     IO.enumCase(Value, "true", FormatStyle::UT_Always);
     IO.enumCase(Value, "ForIndentation", FormatStyle::UT_ForIndentation);
+    IO.enumCase(Value, "ForContinuationAndIndentation",
+                FormatStyle::UT_ForContinuationAndIndentation);
+  }
+};
+
+template <> struct ScalarEnumerationTraits<FormatStyle::JavaScriptQuoteStyle> {
+  static void enumeration(IO &IO, FormatStyle::JavaScriptQuoteStyle &Value) {
+    IO.enumCase(Value, "Leave", FormatStyle::JSQS_Leave);
+    IO.enumCase(Value, "Single", FormatStyle::JSQS_Single);
+    IO.enumCase(Value, "Double", FormatStyle::JSQS_Double);
   }
 };
 
@@ -275,6 +292,9 @@ template <> struct MappingTraits<FormatStyle> {
                    Style.BreakBeforeTernaryOperators);
     IO.mapOptional("BreakConstructorInitializersBeforeComma",
                    Style.BreakConstructorInitializersBeforeComma);
+    IO.mapOptional("BreakAfterJavaFieldAnnotations",
+                   Style.BreakAfterJavaFieldAnnotations);
+    IO.mapOptional("BreakStringLiterals", Style.BreakStringLiterals);
     IO.mapOptional("ColumnLimit", Style.ColumnLimit);
     IO.mapOptional("CommentPragmas", Style.CommentPragmas);
     IO.mapOptional("ConstructorInitializerAllOnOneLineOrOnePerLine",
@@ -289,10 +309,13 @@ template <> struct MappingTraits<FormatStyle> {
                    Style.ExperimentalAutoDetectBinPacking);
     IO.mapOptional("ForEachMacros", Style.ForEachMacros);
     IO.mapOptional("IncludeCategories", Style.IncludeCategories);
+    IO.mapOptional("IncludeIsMainRegex", Style.IncludeIsMainRegex);
     IO.mapOptional("IndentCaseLabels", Style.IndentCaseLabels);
     IO.mapOptional("IndentWidth", Style.IndentWidth);
     IO.mapOptional("IndentWrappedFunctionNames",
                    Style.IndentWrappedFunctionNames);
+    IO.mapOptional("JavaScriptQuotes", Style.JavaScriptQuotes);
+    IO.mapOptional("JavaScriptWrapImports", Style.JavaScriptWrapImports);
     IO.mapOptional("KeepEmptyLinesAtTheStartOfBlocks",
                    Style.KeepEmptyLinesAtTheStartOfBlocks);
     IO.mapOptional("MacroBlockBegin", Style.MacroBlockBegin);
@@ -488,8 +511,9 @@ FormatStyle getLLVMStyle() {
   LLVMStyle.BreakBeforeBraces = FormatStyle::BS_Attach;
   LLVMStyle.BraceWrapping = {false, false, false, false, false, false,
                              false, false, false, false, false};
-  LLVMStyle.BreakConstructorInitializersBeforeComma = false;
   LLVMStyle.BreakAfterJavaFieldAnnotations = false;
+  LLVMStyle.BreakConstructorInitializersBeforeComma = false;
+  LLVMStyle.BreakStringLiterals = true;
   LLVMStyle.ColumnLimit = 80;
   LLVMStyle.CommentPragmas = "^ IWYU pragma:";
   LLVMStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = false;
@@ -504,9 +528,12 @@ FormatStyle getLLVMStyle() {
   LLVMStyle.IncludeCategories = {{"^\"(llvm|llvm-c|clang|clang-c)/", 2},
                                  {"^(<|\"(gtest|isl|json)/)", 3},
                                  {".*", 1}};
+  LLVMStyle.IncludeIsMainRegex = "$";
   LLVMStyle.IndentCaseLabels = false;
   LLVMStyle.IndentWrappedFunctionNames = false;
   LLVMStyle.IndentWidth = 2;
+  LLVMStyle.JavaScriptQuotes = FormatStyle::JSQS_Leave;
+  LLVMStyle.JavaScriptWrapImports = true;
   LLVMStyle.TabWidth = 8;
   LLVMStyle.MaxEmptyLinesToKeep = 1;
   LLVMStyle.KeepEmptyLinesAtTheStartOfBlocks = true;
@@ -518,6 +545,7 @@ FormatStyle getLLVMStyle() {
   LLVMStyle.SpacesBeforeTrailingComments = 1;
   LLVMStyle.Standard = FormatStyle::LS_Cpp11;
   LLVMStyle.UseTab = FormatStyle::UT_Never;
+  LLVMStyle.JavaScriptQuotes = FormatStyle::JSQS_Leave;
   LLVMStyle.ReflowComments = true;
   LLVMStyle.SpacesInParentheses = false;
   LLVMStyle.SpacesInSquareBrackets = false;
@@ -555,6 +583,7 @@ FormatStyle getGoogleStyle(FormatStyle::LanguageKind Language) {
   GoogleStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = true;
   GoogleStyle.DerivePointerAlignment = true;
   GoogleStyle.IncludeCategories = {{"^<.*\\.h>", 1}, {"^<.*", 2}, {".*", 3}};
+  GoogleStyle.IncludeIsMainRegex = "([-_](test|unittest))?$";
   GoogleStyle.IndentCaseLabels = true;
   GoogleStyle.KeepEmptyLinesAtTheStartOfBlocks = false;
   GoogleStyle.ObjCSpaceAfterProperty = false;
@@ -583,9 +612,12 @@ FormatStyle getGoogleStyle(FormatStyle::LanguageKind Language) {
     GoogleStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_Inline;
     GoogleStyle.AlwaysBreakBeforeMultilineStrings = false;
     GoogleStyle.BreakBeforeTernaryOperators = false;
-    GoogleStyle.CommentPragmas = "@(export|visibility) {";
+    GoogleStyle.CommentPragmas = "@(export|requirecss|return|see|visibility) ";
     GoogleStyle.MaxEmptyLinesToKeep = 3;
+    GoogleStyle.NamespaceIndentation = FormatStyle::NI_All;
     GoogleStyle.SpacesInContainerLiterals = false;
+    GoogleStyle.JavaScriptQuotes = FormatStyle::JSQS_Single;
+    GoogleStyle.JavaScriptWrapImports = false;
   } else if (Language == FormatStyle::LK_Proto) {
     GoogleStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_None;
     GoogleStyle.SpacesInContainerLiterals = false;
@@ -759,734 +791,35 @@ std::string configurationAsText(const FormatStyle &Style) {
 
 namespace {
 
-class FormatTokenLexer {
-public:
-  FormatTokenLexer(SourceManager &SourceMgr, FileID ID, FormatStyle &Style,
-                   encoding::Encoding Encoding)
-      : FormatTok(nullptr), IsFirstToken(true), GreaterStashed(false),
-        LessStashed(false), Column(0), TrailingWhitespace(0),
-        SourceMgr(SourceMgr), ID(ID), Style(Style),
-        IdentTable(getFormattingLangOpts(Style)), Keywords(IdentTable),
-        Encoding(Encoding), FirstInLineIndex(0), FormattingDisabled(false),
-        MacroBlockBeginRegex(Style.MacroBlockBegin),
-        MacroBlockEndRegex(Style.MacroBlockEnd) {
-    Lex.reset(new Lexer(ID, SourceMgr.getBuffer(ID), SourceMgr,
-                        getFormattingLangOpts(Style)));
-    Lex->SetKeepWhitespaceMode(true);
-
-    for (const std::string &ForEachMacro : Style.ForEachMacros)
-      ForEachMacros.push_back(&IdentTable.get(ForEachMacro));
-    std::sort(ForEachMacros.begin(), ForEachMacros.end());
-  }
-
-  ArrayRef<FormatToken *> lex() {
-    assert(Tokens.empty());
-    assert(FirstInLineIndex == 0);
-    do {
-      Tokens.push_back(getNextToken());
-      if (Style.Language == FormatStyle::LK_JavaScript)
-        tryParseJSRegexLiteral();
-      tryMergePreviousTokens();
-      if (Tokens.back()->NewlinesBefore > 0 || Tokens.back()->IsMultiline)
-        FirstInLineIndex = Tokens.size() - 1;
-    } while (Tokens.back()->Tok.isNot(tok::eof));
-    return Tokens;
-  }
-
-  const AdditionalKeywords &getKeywords() { return Keywords; }
-
-private:
-  void tryMergePreviousTokens() {
-    if (tryMerge_TMacro())
-      return;
-    if (tryMergeConflictMarkers())
-      return;
-    if (tryMergeLessLess())
-      return;
-
-    if (Style.Language == FormatStyle::LK_JavaScript) {
-      if (tryMergeTemplateString())
-        return;
-
-      static const tok::TokenKind JSIdentity[] = {tok::equalequal, tok::equal};
-      static const tok::TokenKind JSNotIdentity[] = {tok::exclaimequal,
-                                                     tok::equal};
-      static const tok::TokenKind JSShiftEqual[] = {tok::greater, tok::greater,
-                                                    tok::greaterequal};
-      static const tok::TokenKind JSRightArrow[] = {tok::equal, tok::greater};
-      // FIXME: Investigate what token type gives the correct operator priority.
-      if (tryMergeTokens(JSIdentity, TT_BinaryOperator))
-        return;
-      if (tryMergeTokens(JSNotIdentity, TT_BinaryOperator))
-        return;
-      if (tryMergeTokens(JSShiftEqual, TT_BinaryOperator))
-        return;
-      if (tryMergeTokens(JSRightArrow, TT_JsFatArrow))
-        return;
-    }
-  }
-
-  bool tryMergeLessLess() {
-    // Merge X,less,less,Y into X,lessless,Y unless X or Y is less.
-    if (Tokens.size() < 3)
-      return false;
-
-    bool FourthTokenIsLess = false;
-    if (Tokens.size() > 3)
-      FourthTokenIsLess = (Tokens.end() - 4)[0]->is(tok::less);
-
-    auto First = Tokens.end() - 3;
-    if (First[2]->is(tok::less) || First[1]->isNot(tok::less) ||
-        First[0]->isNot(tok::less) || FourthTokenIsLess)
-      return false;
-
-    // Only merge if there currently is no whitespace between the two "<".
-    if (First[1]->WhitespaceRange.getBegin() !=
-        First[1]->WhitespaceRange.getEnd())
-      return false;
-
-    First[0]->Tok.setKind(tok::lessless);
-    First[0]->TokenText = "<<";
-    First[0]->ColumnWidth += 1;
-    Tokens.erase(Tokens.end() - 2);
-    return true;
-  }
-
-  bool tryMergeTokens(ArrayRef<tok::TokenKind> Kinds, TokenType NewType) {
-    if (Tokens.size() < Kinds.size())
-      return false;
-
-    SmallVectorImpl<FormatToken *>::const_iterator First =
-        Tokens.end() - Kinds.size();
-    if (!First[0]->is(Kinds[0]))
-      return false;
-    unsigned AddLength = 0;
-    for (unsigned i = 1; i < Kinds.size(); ++i) {
-      if (!First[i]->is(Kinds[i]) ||
-          First[i]->WhitespaceRange.getBegin() !=
-              First[i]->WhitespaceRange.getEnd())
-        return false;
-      AddLength += First[i]->TokenText.size();
-    }
-    Tokens.resize(Tokens.size() - Kinds.size() + 1);
-    First[0]->TokenText = StringRef(First[0]->TokenText.data(),
-                                    First[0]->TokenText.size() + AddLength);
-    First[0]->ColumnWidth += AddLength;
-    First[0]->Type = NewType;
-    return true;
-  }
-
-  // Returns \c true if \p Tok can only be followed by an operand in JavaScript.
-  bool precedesOperand(FormatToken *Tok) {
-    // NB: This is not entirely correct, as an r_paren can introduce an operand
-    // location in e.g. `if (foo) /bar/.exec(...);`. That is a rare enough
-    // corner case to not matter in practice, though.
-    return Tok->isOneOf(tok::period, tok::l_paren, tok::comma, tok::l_brace,
-                        tok::r_brace, tok::l_square, tok::semi, tok::exclaim,
-                        tok::colon, tok::question, tok::tilde) ||
-           Tok->isOneOf(tok::kw_return, tok::kw_do, tok::kw_case, tok::kw_throw,
-                        tok::kw_else, tok::kw_new, tok::kw_delete, tok::kw_void,
-                        tok::kw_typeof, Keywords.kw_instanceof,
-                        Keywords.kw_in) ||
-           Tok->isBinaryOperator();
-  }
-
-  bool canPrecedeRegexLiteral(FormatToken *Prev) {
-    if (!Prev)
-      return true;
-
-    // Regex literals can only follow after prefix unary operators, not after
-    // postfix unary operators. If the '++' is followed by a non-operand
-    // introducing token, the slash here is the operand and not the start of a
-    // regex.
-    if (Prev->isOneOf(tok::plusplus, tok::minusminus))
-      return (Tokens.size() < 3 || precedesOperand(Tokens[Tokens.size() - 3]));
-
-    // The previous token must introduce an operand location where regex
-    // literals can occur.
-    if (!precedesOperand(Prev))
-      return false;
-
-    return true;
-  }
-
-  // Tries to parse a JavaScript Regex literal starting at the current token,
-  // if that begins with a slash and is in a location where JavaScript allows
-  // regex literals. Changes the current token to a regex literal and updates
-  // its text if successful.
-  void tryParseJSRegexLiteral() {
-    FormatToken *RegexToken = Tokens.back();
-    if (!RegexToken->isOneOf(tok::slash, tok::slashequal))
-      return;
-
-    FormatToken *Prev = nullptr;
-    for (auto I = Tokens.rbegin() + 1, E = Tokens.rend(); I != E; ++I) {
-      // NB: Because previous pointers are not initialized yet, this cannot use
-      // Token.getPreviousNonComment.
-      if ((*I)->isNot(tok::comment)) {
-        Prev = *I;
-        break;
-      }
-    }
-
-    if (!canPrecedeRegexLiteral(Prev))
-      return;
-
-    // 'Manually' lex ahead in the current file buffer.
-    const char *Offset = Lex->getBufferLocation();
-    const char *RegexBegin = Offset - RegexToken->TokenText.size();
-    StringRef Buffer = Lex->getBuffer();
-    bool InCharacterClass = false;
-    bool HaveClosingSlash = false;
-    for (; !HaveClosingSlash && Offset != Buffer.end(); ++Offset) {
-      // Regular expressions are terminated with a '/', which can only be
-      // escaped using '\' or a character class between '[' and ']'.
-      // See http://www.ecma-international.org/ecma-262/5.1/#sec-7.8.5.
-      switch (*Offset) {
-      case '\\':
-        // Skip the escaped character.
-        ++Offset;
-        break;
-      case '[':
-        InCharacterClass = true;
-        break;
-      case ']':
-        InCharacterClass = false;
-        break;
-      case '/':
-        if (!InCharacterClass)
-          HaveClosingSlash = true;
-        break;
-      }
-    }
-
-    RegexToken->Type = TT_RegexLiteral;
-    // Treat regex literals like other string_literals.
-    RegexToken->Tok.setKind(tok::string_literal);
-    RegexToken->TokenText = StringRef(RegexBegin, Offset - RegexBegin);
-    RegexToken->ColumnWidth = RegexToken->TokenText.size();
-
-    resetLexer(SourceMgr.getFileOffset(Lex->getSourceLocation(Offset)));
-  }
-
-  bool tryMergeTemplateString() {
-    if (Tokens.size() < 2)
-      return false;
-
-    FormatToken *EndBacktick = Tokens.back();
-    // Backticks get lexed as tok::unknown tokens. If a template string contains
-    // a comment start, it gets lexed as a tok::comment, or tok::unknown if
-    // unterminated.
-    if (!EndBacktick->isOneOf(tok::comment, tok::string_literal,
-                              tok::char_constant, tok::unknown))
-      return false;
-    size_t CommentBacktickPos = EndBacktick->TokenText.find('`');
-    // Unknown token that's not actually a backtick, or a comment that doesn't
-    // contain a backtick.
-    if (CommentBacktickPos == StringRef::npos)
-      return false;
-
-    unsigned TokenCount = 0;
-    bool IsMultiline = false;
-    unsigned EndColumnInFirstLine =
-        EndBacktick->OriginalColumn + EndBacktick->ColumnWidth;
-    for (auto I = Tokens.rbegin() + 1, E = Tokens.rend(); I != E; I++) {
-      ++TokenCount;
-      if (I[0]->IsMultiline)
-        IsMultiline = true;
-
-      // If there was a preceding template string, this must be the start of a
-      // template string, not the end.
-      if (I[0]->is(TT_TemplateString))
-        return false;
-
-      if (I[0]->isNot(tok::unknown) || I[0]->TokenText != "`") {
-        // Keep track of the rhs offset of the last token to wrap across lines -
-        // its the rhs offset of the first line of the template string, used to
-        // determine its width.
-        if (I[0]->IsMultiline)
-          EndColumnInFirstLine = I[0]->OriginalColumn + I[0]->ColumnWidth;
-        // If the token has newlines, the token before it (if it exists) is the
-        // rhs end of the previous line.
-        if (I[0]->NewlinesBefore > 0 && (I + 1 != E)) {
-          EndColumnInFirstLine = I[1]->OriginalColumn + I[1]->ColumnWidth;
-          IsMultiline = true;
-        }
-        continue;
-      }
-
-      Tokens.resize(Tokens.size() - TokenCount);
-      Tokens.back()->Type = TT_TemplateString;
-      const char *EndOffset =
-          EndBacktick->TokenText.data() + 1 + CommentBacktickPos;
-      if (CommentBacktickPos != 0) {
-        // If the backtick was not the first character (e.g. in a comment),
-        // re-lex after the backtick position.
-        SourceLocation Loc = EndBacktick->Tok.getLocation();
-        resetLexer(SourceMgr.getFileOffset(Loc) + CommentBacktickPos + 1);
-      }
-      Tokens.back()->TokenText =
-          StringRef(Tokens.back()->TokenText.data(),
-                    EndOffset - Tokens.back()->TokenText.data());
-
-      unsigned EndOriginalColumn = EndBacktick->OriginalColumn;
-      if (EndOriginalColumn == 0) {
-        SourceLocation Loc = EndBacktick->Tok.getLocation();
-        EndOriginalColumn = SourceMgr.getSpellingColumnNumber(Loc);
-      }
-      // If the ` is further down within the token (e.g. in a comment).
-      EndOriginalColumn += CommentBacktickPos;
-
-      if (IsMultiline) {
-        // ColumnWidth is from backtick to last token in line.
-        // LastLineColumnWidth is 0 to backtick.
-        // x = `some content
-        //     until here`;
-        Tokens.back()->ColumnWidth =
-            EndColumnInFirstLine - Tokens.back()->OriginalColumn;
-        // +1 for the ` itself.
-        Tokens.back()->LastLineColumnWidth = EndOriginalColumn + 1;
-        Tokens.back()->IsMultiline = true;
-      } else {
-        // Token simply spans from start to end, +1 for the ` itself.
-        Tokens.back()->ColumnWidth =
-            EndOriginalColumn - Tokens.back()->OriginalColumn + 1;
-      }
-      return true;
-    }
-    return false;
-  }
-
-  bool tryMerge_TMacro() {
-    if (Tokens.size() < 4)
-      return false;
-    FormatToken *Last = Tokens.back();
-    if (!Last->is(tok::r_paren))
-      return false;
-
-    FormatToken *String = Tokens[Tokens.size() - 2];
-    if (!String->is(tok::string_literal) || String->IsMultiline)
-      return false;
-
-    if (!Tokens[Tokens.size() - 3]->is(tok::l_paren))
-      return false;
-
-    FormatToken *Macro = Tokens[Tokens.size() - 4];
-    if (Macro->TokenText != "_T")
-      return false;
-
-    const char *Start = Macro->TokenText.data();
-    const char *End = Last->TokenText.data() + Last->TokenText.size();
-    String->TokenText = StringRef(Start, End - Start);
-    String->IsFirst = Macro->IsFirst;
-    String->LastNewlineOffset = Macro->LastNewlineOffset;
-    String->WhitespaceRange = Macro->WhitespaceRange;
-    String->OriginalColumn = Macro->OriginalColumn;
-    String->ColumnWidth = encoding::columnWidthWithTabs(
-        String->TokenText, String->OriginalColumn, Style.TabWidth, Encoding);
-    String->NewlinesBefore = Macro->NewlinesBefore;
-    String->HasUnescapedNewline = Macro->HasUnescapedNewline;
-
-    Tokens.pop_back();
-    Tokens.pop_back();
-    Tokens.pop_back();
-    Tokens.back() = String;
-    return true;
-  }
-
-  bool tryMergeConflictMarkers() {
-    if (Tokens.back()->NewlinesBefore == 0 && Tokens.back()->isNot(tok::eof))
-      return false;
-
-    // Conflict lines look like:
-    // <marker> <text from the vcs>
-    // For example:
-    // >>>>>>> /file/in/file/system at revision 1234
-    //
-    // We merge all tokens in a line that starts with a conflict marker
-    // into a single token with a special token type that the unwrapped line
-    // parser will use to correctly rebuild the underlying code.
-
-    FileID ID;
-    // Get the position of the first token in the line.
-    unsigned FirstInLineOffset;
-    std::tie(ID, FirstInLineOffset) = SourceMgr.getDecomposedLoc(
-        Tokens[FirstInLineIndex]->getStartOfNonWhitespace());
-    StringRef Buffer = SourceMgr.getBuffer(ID)->getBuffer();
-    // Calculate the offset of the start of the current line.
-    auto LineOffset = Buffer.rfind('\n', FirstInLineOffset);
-    if (LineOffset == StringRef::npos) {
-      LineOffset = 0;
-    } else {
-      ++LineOffset;
-    }
-
-    auto FirstSpace = Buffer.find_first_of(" \n", LineOffset);
-    StringRef LineStart;
-    if (FirstSpace == StringRef::npos) {
-      LineStart = Buffer.substr(LineOffset);
-    } else {
-      LineStart = Buffer.substr(LineOffset, FirstSpace - LineOffset);
-    }
-
-    TokenType Type = TT_Unknown;
-    if (LineStart == "<<<<<<<" || LineStart == ">>>>") {
-      Type = TT_ConflictStart;
-    } else if (LineStart == "|||||||" || LineStart == "=======" ||
-               LineStart == "====") {
-      Type = TT_ConflictAlternative;
-    } else if (LineStart == ">>>>>>>" || LineStart == "<<<<") {
-      Type = TT_ConflictEnd;
-    }
-
-    if (Type != TT_Unknown) {
-      FormatToken *Next = Tokens.back();
-
-      Tokens.resize(FirstInLineIndex + 1);
-      // We do not need to build a complete token here, as we will skip it
-      // during parsing anyway (as we must not touch whitespace around conflict
-      // markers).
-      Tokens.back()->Type = Type;
-      Tokens.back()->Tok.setKind(tok::kw___unknown_anytype);
-
-      Tokens.push_back(Next);
-      return true;
-    }
-
-    return false;
-  }
-
-  FormatToken *getStashedToken() {
-    // Create a synthesized second '>' or '<' token.
-    Token Tok = FormatTok->Tok;
-    StringRef TokenText = FormatTok->TokenText;
-
-    unsigned OriginalColumn = FormatTok->OriginalColumn;
-    FormatTok = new (Allocator.Allocate()) FormatToken;
-    FormatTok->Tok = Tok;
-    SourceLocation TokLocation =
-        FormatTok->Tok.getLocation().getLocWithOffset(Tok.getLength() - 1);
-    FormatTok->Tok.setLocation(TokLocation);
-    FormatTok->WhitespaceRange = SourceRange(TokLocation, TokLocation);
-    FormatTok->TokenText = TokenText;
-    FormatTok->ColumnWidth = 1;
-    FormatTok->OriginalColumn = OriginalColumn + 1;
-
-    return FormatTok;
-  }
-
-  FormatToken *getNextToken() {
-    if (GreaterStashed) {
-      GreaterStashed = false;
-      return getStashedToken();
-    }
-    if (LessStashed) {
-      LessStashed = false;
-      return getStashedToken();
-    }
-
-    FormatTok = new (Allocator.Allocate()) FormatToken;
-    readRawToken(*FormatTok);
-    SourceLocation WhitespaceStart =
-        FormatTok->Tok.getLocation().getLocWithOffset(-TrailingWhitespace);
-    FormatTok->IsFirst = IsFirstToken;
-    IsFirstToken = false;
-
-    // Consume and record whitespace until we find a significant token.
-    unsigned WhitespaceLength = TrailingWhitespace;
-    while (FormatTok->Tok.is(tok::unknown)) {
-      StringRef Text = FormatTok->TokenText;
-      auto EscapesNewline = [&](int pos) {
-        // A '\r' here is just part of '\r\n'. Skip it.
-        if (pos >= 0 && Text[pos] == '\r')
-          --pos;
-        // See whether there is an odd number of '\' before this.
-        unsigned count = 0;
-        for (; pos >= 0; --pos, ++count)
-          if (Text[pos] != '\\')
-            break;
-        return count & 1;
-      };
-      // FIXME: This miscounts tok:unknown tokens that are not just
-      // whitespace, e.g. a '`' character.
-      for (int i = 0, e = Text.size(); i != e; ++i) {
-        switch (Text[i]) {
-        case '\n':
-          ++FormatTok->NewlinesBefore;
-          FormatTok->HasUnescapedNewline = !EscapesNewline(i - 1);
-          FormatTok->LastNewlineOffset = WhitespaceLength + i + 1;
-          Column = 0;
-          break;
-        case '\r':
-          FormatTok->LastNewlineOffset = WhitespaceLength + i + 1;
-          Column = 0;
-          break;
-        case '\f':
-        case '\v':
-          Column = 0;
-          break;
-        case ' ':
-          ++Column;
-          break;
-        case '\t':
-          Column += Style.TabWidth - Column % Style.TabWidth;
-          break;
-        case '\\':
-          if (i + 1 == e || (Text[i + 1] != '\r' && Text[i + 1] != '\n'))
-            FormatTok->Type = TT_ImplicitStringLiteral;
-          break;
-        default:
-          FormatTok->Type = TT_ImplicitStringLiteral;
-          break;
-        }
-        if (FormatTok->Type == TT_ImplicitStringLiteral)
-          break;
-      }
-
-      if (FormatTok->is(TT_ImplicitStringLiteral))
-        break;
-      WhitespaceLength += FormatTok->Tok.getLength();
-
-      readRawToken(*FormatTok);
-    }
-
-    // In case the token starts with escaped newlines, we want to
-    // take them into account as whitespace - this pattern is quite frequent
-    // in macro definitions.
-    // FIXME: Add a more explicit test.
-    while (FormatTok->TokenText.size() > 1 && FormatTok->TokenText[0] == '\\' &&
-           FormatTok->TokenText[1] == '\n') {
-      ++FormatTok->NewlinesBefore;
-      WhitespaceLength += 2;
-      FormatTok->LastNewlineOffset = 2;
-      Column = 0;
-      FormatTok->TokenText = FormatTok->TokenText.substr(2);
-    }
-
-    FormatTok->WhitespaceRange = SourceRange(
-        WhitespaceStart, WhitespaceStart.getLocWithOffset(WhitespaceLength));
-
-    FormatTok->OriginalColumn = Column;
-
-    TrailingWhitespace = 0;
-    if (FormatTok->Tok.is(tok::comment)) {
-      // FIXME: Add the trimmed whitespace to Column.
-      StringRef UntrimmedText = FormatTok->TokenText;
-      FormatTok->TokenText = FormatTok->TokenText.rtrim(" \t\v\f");
-      TrailingWhitespace = UntrimmedText.size() - FormatTok->TokenText.size();
-    } else if (FormatTok->Tok.is(tok::raw_identifier)) {
-      IdentifierInfo &Info = IdentTable.get(FormatTok->TokenText);
-      FormatTok->Tok.setIdentifierInfo(&Info);
-      FormatTok->Tok.setKind(Info.getTokenID());
-      if (Style.Language == FormatStyle::LK_Java &&
-          FormatTok->isOneOf(tok::kw_struct, tok::kw_union, tok::kw_delete,
-                             tok::kw_operator)) {
-        FormatTok->Tok.setKind(tok::identifier);
-        FormatTok->Tok.setIdentifierInfo(nullptr);
-      } else if (Style.Language == FormatStyle::LK_JavaScript &&
-                 FormatTok->isOneOf(tok::kw_struct, tok::kw_union,
-                                    tok::kw_operator)) {
-        FormatTok->Tok.setKind(tok::identifier);
-        FormatTok->Tok.setIdentifierInfo(nullptr);
-      }
-    } else if (FormatTok->Tok.is(tok::greatergreater)) {
-      FormatTok->Tok.setKind(tok::greater);
-      FormatTok->TokenText = FormatTok->TokenText.substr(0, 1);
-      GreaterStashed = true;
-    } else if (FormatTok->Tok.is(tok::lessless)) {
-      FormatTok->Tok.setKind(tok::less);
-      FormatTok->TokenText = FormatTok->TokenText.substr(0, 1);
-      LessStashed = true;
-    }
-
-    // Now FormatTok is the next non-whitespace token.
-
-    StringRef Text = FormatTok->TokenText;
-    size_t FirstNewlinePos = Text.find('\n');
-    if (FirstNewlinePos == StringRef::npos) {
-      // FIXME: ColumnWidth actually depends on the start column, we need to
-      // take this into account when the token is moved.
-      FormatTok->ColumnWidth =
-          encoding::columnWidthWithTabs(Text, Column, Style.TabWidth, Encoding);
-      Column += FormatTok->ColumnWidth;
-    } else {
-      FormatTok->IsMultiline = true;
-      // FIXME: ColumnWidth actually depends on the start column, we need to
-      // take this into account when the token is moved.
-      FormatTok->ColumnWidth = encoding::columnWidthWithTabs(
-          Text.substr(0, FirstNewlinePos), Column, Style.TabWidth, Encoding);
-
-      // The last line of the token always starts in column 0.
-      // Thus, the length can be precomputed even in the presence of tabs.
-      FormatTok->LastLineColumnWidth = encoding::columnWidthWithTabs(
-          Text.substr(Text.find_last_of('\n') + 1), 0, Style.TabWidth,
-          Encoding);
-      Column = FormatTok->LastLineColumnWidth;
-    }
-
-    if (Style.Language == FormatStyle::LK_Cpp) {
-      if (!(Tokens.size() > 0 && Tokens.back()->Tok.getIdentifierInfo() &&
-            Tokens.back()->Tok.getIdentifierInfo()->getPPKeywordID() ==
-                tok::pp_define) &&
-          std::find(ForEachMacros.begin(), ForEachMacros.end(),
-                    FormatTok->Tok.getIdentifierInfo()) != ForEachMacros.end()) {
-        FormatTok->Type = TT_ForEachMacro;
-      } else if (FormatTok->is(tok::identifier)) {
-        if (MacroBlockBeginRegex.match(Text)) {
-          FormatTok->Type = TT_MacroBlockBegin;
-        } else if (MacroBlockEndRegex.match(Text)) {
-          FormatTok->Type = TT_MacroBlockEnd;
-        }
-      }
-    }
-
-    return FormatTok;
-  }
-
-  FormatToken *FormatTok;
-  bool IsFirstToken;
-  bool GreaterStashed, LessStashed;
-  unsigned Column;
-  unsigned TrailingWhitespace;
-  std::unique_ptr<Lexer> Lex;
-  SourceManager &SourceMgr;
-  FileID ID;
-  FormatStyle &Style;
-  IdentifierTable IdentTable;
-  AdditionalKeywords Keywords;
-  encoding::Encoding Encoding;
-  llvm::SpecificBumpPtrAllocator<FormatToken> Allocator;
-  // Index (in 'Tokens') of the last token that starts a new line.
-  unsigned FirstInLineIndex;
-  SmallVector<FormatToken *, 16> Tokens;
-  SmallVector<IdentifierInfo *, 8> ForEachMacros;
-
-  bool FormattingDisabled;
-
-  llvm::Regex MacroBlockBeginRegex;
-  llvm::Regex MacroBlockEndRegex;
-
-  void readRawToken(FormatToken &Tok) {
-    Lex->LexFromRawLexer(Tok.Tok);
-    Tok.TokenText = StringRef(SourceMgr.getCharacterData(Tok.Tok.getLocation()),
-                              Tok.Tok.getLength());
-    // For formatting, treat unterminated string literals like normal string
-    // literals.
-    if (Tok.is(tok::unknown)) {
-      if (!Tok.TokenText.empty() && Tok.TokenText[0] == '"') {
-        Tok.Tok.setKind(tok::string_literal);
-        Tok.IsUnterminatedLiteral = true;
-      } else if (Style.Language == FormatStyle::LK_JavaScript &&
-                 Tok.TokenText == "''") {
-        Tok.Tok.setKind(tok::char_constant);
-      }
-    }
-
-    if (Tok.is(tok::comment) && (Tok.TokenText == "// clang-format on" ||
-                                 Tok.TokenText == "/* clang-format on */")) {
-      FormattingDisabled = false;
-    }
-
-    Tok.Finalized = FormattingDisabled;
-
-    if (Tok.is(tok::comment) && (Tok.TokenText == "// clang-format off" ||
-                                 Tok.TokenText == "/* clang-format off */")) {
-      FormattingDisabled = true;
-    }
-  }
-
-  void resetLexer(unsigned Offset) {
-    StringRef Buffer = SourceMgr.getBufferData(ID);
-    Lex.reset(new Lexer(SourceMgr.getLocForStartOfFile(ID),
-                        getFormattingLangOpts(Style), Buffer.begin(),
-                        Buffer.begin() + Offset, Buffer.end()));
-    Lex->SetKeepWhitespaceMode(true);
-    TrailingWhitespace = 0;
-  }
-};
-
-static StringRef getLanguageName(FormatStyle::LanguageKind Language) {
-  switch (Language) {
-  case FormatStyle::LK_Cpp:
-    return "C++";
-  case FormatStyle::LK_Java:
-    return "Java";
-  case FormatStyle::LK_JavaScript:
-    return "JavaScript";
-  case FormatStyle::LK_Proto:
-    return "Proto";
-  default:
-    return "Unknown";
-  }
-}
-
-class Formatter : public UnwrappedLineConsumer {
+class Formatter : public TokenAnalyzer {
 public:
-  Formatter(const FormatStyle &Style, SourceManager &SourceMgr, FileID ID,
-            ArrayRef<CharSourceRange> Ranges)
-      : Style(Style), ID(ID), SourceMgr(SourceMgr),
-        Whitespaces(SourceMgr, Style,
-                    inputUsesCRLF(SourceMgr.getBufferData(ID))),
-        Ranges(Ranges.begin(), Ranges.end()), UnwrappedLines(1),
-        Encoding(encoding::detectEncoding(SourceMgr.getBufferData(ID))) {
-    DEBUG(llvm::dbgs() << "File encoding: "
-                       << (Encoding == encoding::Encoding_UTF8 ? "UTF8"
-                                                               : "unknown")
-                       << "\n");
-    DEBUG(llvm::dbgs() << "Language: " << getLanguageName(Style.Language)
-                       << "\n");
-  }
+  Formatter(const Environment &Env, const FormatStyle &Style,
+            bool *IncompleteFormat)
+      : TokenAnalyzer(Env, Style), IncompleteFormat(IncompleteFormat) {}
+
+  tooling::Replacements
+  analyze(TokenAnnotator &Annotator,
+          SmallVectorImpl<AnnotatedLine *> &AnnotatedLines,
+          FormatTokenLexer &Tokens, tooling::Replacements &Result) override {
+    deriveLocalStyle(AnnotatedLines);
+    AffectedRangeMgr.computeAffectedLines(AnnotatedLines.begin(),
+                                          AnnotatedLines.end());
 
-  tooling::Replacements format(bool *IncompleteFormat) {
-    tooling::Replacements Result;
-    FormatTokenLexer Tokens(SourceMgr, ID, Style, Encoding);
-
-    UnwrappedLineParser Parser(Style, Tokens.getKeywords(), Tokens.lex(),
-                               *this);
-    Parser.parse();
-    assert(UnwrappedLines.rbegin()->empty());
-    for (unsigned Run = 0, RunE = UnwrappedLines.size(); Run + 1 != RunE;
-         ++Run) {
-      DEBUG(llvm::dbgs() << "Run " << Run << "...\n");
-      SmallVector<AnnotatedLine *, 16> AnnotatedLines;
-      for (unsigned i = 0, e = UnwrappedLines[Run].size(); i != e; ++i) {
-        AnnotatedLines.push_back(new AnnotatedLine(UnwrappedLines[Run][i]));
-      }
-      tooling::Replacements RunResult =
-          format(AnnotatedLines, Tokens, IncompleteFormat);
-      DEBUG({
-        llvm::dbgs() << "Replacements for run " << Run << ":\n";
-        for (tooling::Replacements::iterator I = RunResult.begin(),
-                                             E = RunResult.end();
-             I != E; ++I) {
-          llvm::dbgs() << I->toString() << "\n";
-        }
-      });
-      for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
-        delete AnnotatedLines[i];
-      }
-      Result.insert(RunResult.begin(), RunResult.end());
-      Whitespaces.reset();
-    }
-    return Result;
-  }
+    if (Style.Language == FormatStyle::LK_JavaScript &&
+        Style.JavaScriptQuotes != FormatStyle::JSQS_Leave)
+      requoteJSStringLiteral(AnnotatedLines, Result);
 
-  tooling::Replacements format(SmallVectorImpl<AnnotatedLine *> &AnnotatedLines,
-                               FormatTokenLexer &Tokens,
-                               bool *IncompleteFormat) {
-    TokenAnnotator Annotator(Style, Tokens.getKeywords());
-    for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
-      Annotator.annotate(*AnnotatedLines[i]);
-    }
-    deriveLocalStyle(AnnotatedLines);
     for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
       Annotator.calculateFormattingInformation(*AnnotatedLines[i]);
     }
-    computeAffectedLines(AnnotatedLines.begin(), AnnotatedLines.end());
 
     Annotator.setCommentLineLevels(AnnotatedLines);
-    ContinuationIndenter Indenter(Style, Tokens.getKeywords(), SourceMgr,
-                                  Whitespaces, Encoding,
+
+    WhitespaceManager Whitespaces(
+        Env.getSourceManager(), Style,
+        inputUsesCRLF(Env.getSourceManager().getBufferData(Env.getFileID())));
+    ContinuationIndenter Indenter(Style, Tokens.getKeywords(),
+                                  Env.getSourceManager(), Whitespaces, Encoding,
                                   BinPackInconclusiveFunctions);
     UnwrappedLineFormatter(&Indenter, &Whitespaces, Style, Tokens.getKeywords(),
                            IncompleteFormat)
@@ -1495,137 +828,80 @@ public:
   }
 
 private:
-  // Determines which lines are affected by the SourceRanges given as input.
-  // Returns \c true if at least one line between I and E or one of their
-  // children is affected.
-  bool computeAffectedLines(SmallVectorImpl<AnnotatedLine *>::iterator I,
-                            SmallVectorImpl<AnnotatedLine *>::iterator E) {
-    bool SomeLineAffected = false;
-    const AnnotatedLine *PreviousLine = nullptr;
-    while (I != E) {
-      AnnotatedLine *Line = *I;
-      Line->LeadingEmptyLinesAffected = affectsLeadingEmptyLines(*Line->First);
-
-      // If a line is part of a preprocessor directive, it needs to be formatted
-      // if any token within the directive is affected.
-      if (Line->InPPDirective) {
-        FormatToken *Last = Line->Last;
-        SmallVectorImpl<AnnotatedLine *>::iterator PPEnd = I + 1;
-        while (PPEnd != E && !(*PPEnd)->First->HasUnescapedNewline) {
-          Last = (*PPEnd)->Last;
-          ++PPEnd;
-        }
-
-        if (affectsTokenRange(*Line->First, *Last,
-                              /*IncludeLeadingNewlines=*/false)) {
-          SomeLineAffected = true;
-          markAllAsAffected(I, PPEnd);
-        }
-        I = PPEnd;
+  // If the last token is a double/single-quoted string literal, generates a
+  // replacement with a single/double quoted string literal, re-escaping the
+  // contents in the process.
+  void requoteJSStringLiteral(SmallVectorImpl<AnnotatedLine *> &Lines,
+                              tooling::Replacements &Result) {
+    for (AnnotatedLine *Line : Lines) {
+      requoteJSStringLiteral(Line->Children, Result);
+      if (!Line->Affected)
         continue;
-      }
-
-      if (nonPPLineAffected(Line, PreviousLine))
-        SomeLineAffected = true;
-
-      PreviousLine = Line;
-      ++I;
-    }
-    return SomeLineAffected;
-  }
-
-  // Determines whether 'Line' is affected by the SourceRanges given as input.
-  // Returns \c true if line or one if its children is affected.
-  bool nonPPLineAffected(AnnotatedLine *Line,
-                         const AnnotatedLine *PreviousLine) {
-    bool SomeLineAffected = false;
-    Line->ChildrenAffected =
-        computeAffectedLines(Line->Children.begin(), Line->Children.end());
-    if (Line->ChildrenAffected)
-      SomeLineAffected = true;
-
-    // Stores whether one of the line's tokens is directly affected.
-    bool SomeTokenAffected = false;
-    // Stores whether we need to look at the leading newlines of the next token
-    // in order to determine whether it was affected.
-    bool IncludeLeadingNewlines = false;
-
-    // Stores whether the first child line of any of this line's tokens is
-    // affected.
-    bool SomeFirstChildAffected = false;
-
-    for (FormatToken *Tok = Line->First; Tok; Tok = Tok->Next) {
-      // Determine whether 'Tok' was affected.
-      if (affectsTokenRange(*Tok, *Tok, IncludeLeadingNewlines))
-        SomeTokenAffected = true;
-
-      // Determine whether the first child of 'Tok' was affected.
-      if (!Tok->Children.empty() && Tok->Children.front()->Affected)
-        SomeFirstChildAffected = true;
-
-      IncludeLeadingNewlines = Tok->Children.empty();
-    }
-
-    // Was this line moved, i.e. has it previously been on the same line as an
-    // affected line?
-    bool LineMoved = PreviousLine && PreviousLine->Affected &&
-                     Line->First->NewlinesBefore == 0;
-
-    bool IsContinuedComment =
-        Line->First->is(tok::comment) && Line->First->Next == nullptr &&
-        Line->First->NewlinesBefore < 2 && PreviousLine &&
-        PreviousLine->Affected && PreviousLine->Last->is(tok::comment);
-
-    if (SomeTokenAffected || SomeFirstChildAffected || LineMoved ||
-        IsContinuedComment) {
-      Line->Affected = true;
-      SomeLineAffected = true;
-    }
-    return SomeLineAffected;
-  }
-
-  // Marks all lines between I and E as well as all their children as affected.
-  void markAllAsAffected(SmallVectorImpl<AnnotatedLine *>::iterator I,
-                         SmallVectorImpl<AnnotatedLine *>::iterator E) {
-    while (I != E) {
-      (*I)->Affected = true;
-      markAllAsAffected((*I)->Children.begin(), (*I)->Children.end());
-      ++I;
-    }
-  }
-
-  // Returns true if the range from 'First' to 'Last' intersects with one of the
-  // input ranges.
-  bool affectsTokenRange(const FormatToken &First, const FormatToken &Last,
-                         bool IncludeLeadingNewlines) {
-    SourceLocation Start = First.WhitespaceRange.getBegin();
-    if (!IncludeLeadingNewlines)
-      Start = Start.getLocWithOffset(First.LastNewlineOffset);
-    SourceLocation End = Last.getStartOfNonWhitespace();
-    End = End.getLocWithOffset(Last.TokenText.size());
-    CharSourceRange Range = CharSourceRange::getCharRange(Start, End);
-    return affectsCharSourceRange(Range);
-  }
+      for (FormatToken *FormatTok = Line->First; FormatTok;
+           FormatTok = FormatTok->Next) {
+        StringRef Input = FormatTok->TokenText;
+        if (FormatTok->Finalized || !FormatTok->isStringLiteral() ||
+            // NB: testing for not starting with a double quote to avoid
+            // breaking
+            // `template strings`.
+            (Style.JavaScriptQuotes == FormatStyle::JSQS_Single &&
+             !Input.startswith("\"")) ||
+            (Style.JavaScriptQuotes == FormatStyle::JSQS_Double &&
+             !Input.startswith("\'")))
+          continue;
 
-  // Returns true if one of the input ranges intersect the leading empty lines
-  // before 'Tok'.
-  bool affectsLeadingEmptyLines(const FormatToken &Tok) {
-    CharSourceRange EmptyLineRange = CharSourceRange::getCharRange(
-        Tok.WhitespaceRange.getBegin(),
-        Tok.WhitespaceRange.getBegin().getLocWithOffset(Tok.LastNewlineOffset));
-    return affectsCharSourceRange(EmptyLineRange);
-  }
+        // Change start and end quote.
+        bool IsSingle = Style.JavaScriptQuotes == FormatStyle::JSQS_Single;
+        SourceLocation Start = FormatTok->Tok.getLocation();
+        auto Replace = [&](SourceLocation Start, unsigned Length,
+                           StringRef ReplacementText) {
+          Result.insert(tooling::Replacement(Env.getSourceManager(), Start,
+                                             Length, ReplacementText));
+        };
+        Replace(Start, 1, IsSingle ? "'" : "\"");
+        Replace(FormatTok->Tok.getEndLoc().getLocWithOffset(-1), 1,
+                IsSingle ? "'" : "\"");
+
+        // Escape internal quotes.
+        size_t ColumnWidth = FormatTok->TokenText.size();
+        bool Escaped = false;
+        for (size_t i = 1; i < Input.size() - 1; i++) {
+          switch (Input[i]) {
+          case '\\':
+            if (!Escaped && i + 1 < Input.size() &&
+                ((IsSingle && Input[i + 1] == '"') ||
+                 (!IsSingle && Input[i + 1] == '\''))) {
+              // Remove this \, it's escaping a " or ' that no longer needs
+              // escaping
+              ColumnWidth--;
+              Replace(Start.getLocWithOffset(i), 1, "");
+              continue;
+            }
+            Escaped = !Escaped;
+            break;
+          case '\"':
+          case '\'':
+            if (!Escaped && IsSingle == (Input[i] == '\'')) {
+              // Escape the quote.
+              Replace(Start.getLocWithOffset(i), 0, "\\");
+              ColumnWidth++;
+            }
+            Escaped = false;
+            break;
+          default:
+            Escaped = false;
+            break;
+          }
+        }
 
-  // Returns true if 'Range' intersects with one of the input ranges.
-  bool affectsCharSourceRange(const CharSourceRange &Range) {
-    for (SmallVectorImpl<CharSourceRange>::const_iterator I = Ranges.begin(),
-                                                          E = Ranges.end();
-         I != E; ++I) {
-      if (!SourceMgr.isBeforeInTranslationUnit(Range.getEnd(), I->getBegin()) &&
-          !SourceMgr.isBeforeInTranslationUnit(I->getEnd(), Range.getBegin()))
-        return true;
+        // For formatting, count the number of non-escaped single quotes in them
+        // and adjust ColumnWidth to take the added escapes into account.
+        // FIXME(martinprobst): this might conflict with code breaking a long
+        // string literal (which clang-format doesn't do, yet). For that to
+        // work, this code would have to modify TokenText directly.
+        FormatTok->ColumnWidth = ColumnWidth;
+      }
     }
-    return false;
   }
 
   static bool inputUsesCRLF(StringRef Text) {
@@ -1634,7 +910,7 @@ private:
 
   bool
   hasCpp03IncompatibleFormat(const SmallVectorImpl<AnnotatedLine *> &Lines) {
-    for (const AnnotatedLine* Line : Lines) {
+    for (const AnnotatedLine *Line : Lines) {
       if (hasCpp03IncompatibleFormat(Line->Children))
         return true;
       for (FormatToken *Tok = Line->First->Next; Tok; Tok = Tok->Next) {
@@ -1652,7 +928,7 @@ private:
 
   int countVariableAlignments(const SmallVectorImpl<AnnotatedLine *> &Lines) {
     int AlignmentDiff = 0;
-    for (const AnnotatedLine* Line : Lines) {
+    for (const AnnotatedLine *Line : Lines) {
       AlignmentDiff += countVariableAlignments(Line->Children);
       for (FormatToken *Tok = Line->First; Tok && Tok->Next; Tok = Tok->Next) {
         if (!Tok->is(TT_PointerOrReference))
@@ -1699,24 +975,219 @@ private:
         HasBinPackedFunction || !HasOnePerLineFunction;
   }
 
-  void consumeUnwrappedLine(const UnwrappedLine &TheLine) override {
-    assert(!UnwrappedLines.empty());
-    UnwrappedLines.back().push_back(TheLine);
+  bool BinPackInconclusiveFunctions;
+  bool *IncompleteFormat;
+};
+
+// This class clean up the erroneous/redundant code around the given ranges in
+// file.
+class Cleaner : public TokenAnalyzer {
+public:
+  Cleaner(const Environment &Env, const FormatStyle &Style)
+      : TokenAnalyzer(Env, Style),
+        DeletedTokens(FormatTokenLess(Env.getSourceManager())) {}
+
+  // FIXME: eliminate unused parameters.
+  tooling::Replacements
+  analyze(TokenAnnotator &Annotator,
+          SmallVectorImpl<AnnotatedLine *> &AnnotatedLines,
+          FormatTokenLexer &Tokens, tooling::Replacements &Result) override {
+    // FIXME: in the current implementation the granularity of affected range
+    // is an annotated line. However, this is not sufficient. Furthermore,
+    // redundant code introduced by replacements does not necessarily
+    // intercept with ranges of replacements that result in the redundancy.
+    // To determine if some redundant code is actually introduced by
+    // replacements(e.g. deletions), we need to come up with a more
+    // sophisticated way of computing affected ranges.
+    AffectedRangeMgr.computeAffectedLines(AnnotatedLines.begin(),
+                                          AnnotatedLines.end());
+
+    checkEmptyNamespace(AnnotatedLines);
+
+    for (auto &Line : AnnotatedLines) {
+      if (Line->Affected) {
+        cleanupRight(Line->First, tok::comma, tok::comma);
+        cleanupRight(Line->First, TT_CtorInitializerColon, tok::comma);
+        cleanupLeft(Line->First, TT_CtorInitializerComma, tok::l_brace);
+        cleanupLeft(Line->First, TT_CtorInitializerColon, tok::l_brace);
+      }
+    }
+
+    return generateFixes();
   }
 
-  void finishRun() override {
-    UnwrappedLines.push_back(SmallVector<UnwrappedLine, 16>());
+private:
+  bool containsOnlyComments(const AnnotatedLine &Line) {
+    for (FormatToken *Tok = Line.First; Tok != nullptr; Tok = Tok->Next) {
+      if (Tok->isNot(tok::comment))
+        return false;
+    }
+    return true;
   }
 
-  FormatStyle Style;
-  FileID ID;
-  SourceManager &SourceMgr;
-  WhitespaceManager Whitespaces;
-  SmallVector<CharSourceRange, 8> Ranges;
-  SmallVector<SmallVector<UnwrappedLine, 16>, 2> UnwrappedLines;
+  // Iterate through all lines and remove any empty (nested) namespaces.
+  void checkEmptyNamespace(SmallVectorImpl<AnnotatedLine *> &AnnotatedLines) {
+    for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
+      auto &Line = *AnnotatedLines[i];
+      if (Line.startsWith(tok::kw_namespace) ||
+          Line.startsWith(tok::kw_inline, tok::kw_namespace)) {
+        checkEmptyNamespace(AnnotatedLines, i, i);
+      }
+    }
 
-  encoding::Encoding Encoding;
-  bool BinPackInconclusiveFunctions;
+    for (auto Line : DeletedLines) {
+      FormatToken *Tok = AnnotatedLines[Line]->First;
+      while (Tok) {
+        deleteToken(Tok);
+        Tok = Tok->Next;
+      }
+    }
+  }
+
+  // The function checks if the namespace, which starts from \p CurrentLine, and
+  // its nested namespaces are empty and delete them if they are empty. It also
+  // sets \p NewLine to the last line checked.
+  // Returns true if the current namespace is empty.
+  bool checkEmptyNamespace(SmallVectorImpl<AnnotatedLine *> &AnnotatedLines,
+                           unsigned CurrentLine, unsigned &NewLine) {
+    unsigned InitLine = CurrentLine, End = AnnotatedLines.size();
+    if (Style.BraceWrapping.AfterNamespace) {
+      // If the left brace is in a new line, we should consume it first so that
+      // it does not make the namespace non-empty.
+      // FIXME: error handling if there is no left brace.
+      if (!AnnotatedLines[++CurrentLine]->startsWith(tok::l_brace)) {
+        NewLine = CurrentLine;
+        return false;
+      }
+    } else if (!AnnotatedLines[CurrentLine]->endsWith(tok::l_brace)) {
+      return false;
+    }
+    while (++CurrentLine < End) {
+      if (AnnotatedLines[CurrentLine]->startsWith(tok::r_brace))
+        break;
+
+      if (AnnotatedLines[CurrentLine]->startsWith(tok::kw_namespace) ||
+          AnnotatedLines[CurrentLine]->startsWith(tok::kw_inline,
+                                                  tok::kw_namespace)) {
+        if (!checkEmptyNamespace(AnnotatedLines, CurrentLine, NewLine))
+          return false;
+        CurrentLine = NewLine;
+        continue;
+      }
+
+      if (containsOnlyComments(*AnnotatedLines[CurrentLine]))
+        continue;
+
+      // If there is anything other than comments or nested namespaces in the
+      // current namespace, the namespace cannot be empty.
+      NewLine = CurrentLine;
+      return false;
+    }
+
+    NewLine = CurrentLine;
+    if (CurrentLine >= End)
+      return false;
+
+    // Check if the empty namespace is actually affected by changed ranges.
+    if (!AffectedRangeMgr.affectsCharSourceRange(CharSourceRange::getCharRange(
+            AnnotatedLines[InitLine]->First->Tok.getLocation(),
+            AnnotatedLines[CurrentLine]->Last->Tok.getEndLoc())))
+      return false;
+
+    for (unsigned i = InitLine; i <= CurrentLine; ++i) {
+      DeletedLines.insert(i);
+    }
+
+    return true;
+  }
+
+  // Checks pairs {start, start->next},..., {end->previous, end} and deletes one
+  // of the token in the pair if the left token has \p LK token kind and the
+  // right token has \p RK token kind. If \p DeleteLeft is true, the left token
+  // is deleted on match; otherwise, the right token is deleted.
+  template <typename LeftKind, typename RightKind>
+  void cleanupPair(FormatToken *Start, LeftKind LK, RightKind RK,
+                   bool DeleteLeft) {
+    auto NextNotDeleted = [this](const FormatToken &Tok) -> FormatToken * {
+      for (auto *Res = Tok.Next; Res; Res = Res->Next)
+        if (!Res->is(tok::comment) &&
+            DeletedTokens.find(Res) == DeletedTokens.end())
+          return Res;
+      return nullptr;
+    };
+    for (auto *Left = Start; Left;) {
+      auto *Right = NextNotDeleted(*Left);
+      if (!Right)
+        break;
+      if (Left->is(LK) && Right->is(RK)) {
+        deleteToken(DeleteLeft ? Left : Right);
+        // If the right token is deleted, we should keep the left token
+        // unchanged and pair it with the new right token.
+        if (!DeleteLeft)
+          continue;
+      }
+      Left = Right;
+    }
+  }
+
+  template <typename LeftKind, typename RightKind>
+  void cleanupLeft(FormatToken *Start, LeftKind LK, RightKind RK) {
+    cleanupPair(Start, LK, RK, /*DeleteLeft=*/true);
+  }
+
+  template <typename LeftKind, typename RightKind>
+  void cleanupRight(FormatToken *Start, LeftKind LK, RightKind RK) {
+    cleanupPair(Start, LK, RK, /*DeleteLeft=*/false);
+  }
+
+  // Delete the given token.
+  inline void deleteToken(FormatToken *Tok) {
+    if (Tok)
+      DeletedTokens.insert(Tok);
+  }
+
+  tooling::Replacements generateFixes() {
+    tooling::Replacements Fixes;
+    std::vector<FormatToken *> Tokens;
+    std::copy(DeletedTokens.begin(), DeletedTokens.end(),
+              std::back_inserter(Tokens));
+
+    // Merge multiple continuous token deletions into one big deletion so that
+    // the number of replacements can be reduced. This makes computing affected
+    // ranges more efficient when we run reformat on the changed code.
+    unsigned Idx = 0;
+    while (Idx < Tokens.size()) {
+      unsigned St = Idx, End = Idx;
+      while ((End + 1) < Tokens.size() &&
+             Tokens[End]->Next == Tokens[End + 1]) {
+        End++;
+      }
+      auto SR = CharSourceRange::getCharRange(Tokens[St]->Tok.getLocation(),
+                                              Tokens[End]->Tok.getEndLoc());
+      Fixes.insert(tooling::Replacement(Env.getSourceManager(), SR, ""));
+      Idx = End + 1;
+    }
+
+    return Fixes;
+  }
+
+  // Class for less-than inequality comparason for the set `RedundantTokens`.
+  // We store tokens in the order they appear in the translation unit so that
+  // we do not need to sort them in `generateFixes()`.
+  struct FormatTokenLess {
+    FormatTokenLess(const SourceManager &SM) : SM(SM) {}
+
+    bool operator()(const FormatToken *LHS, const FormatToken *RHS) const {
+      return SM.isBeforeInTranslationUnit(LHS->Tok.getLocation(),
+                                          RHS->Tok.getLocation());
+    }
+    const SourceManager &SM;
+  };
+
+  // Tokens to be deleted.
+  std::set<FormatToken *, FormatTokenLess> DeletedTokens;
+  // The line numbers of lines to be deleted.
+  std::set<unsigned> DeletedLines;
 };
 
 struct IncludeDirective {
@@ -1742,7 +1213,7 @@ static bool affectsRange(ArrayRef<tooling::Range> Ranges, unsigned Start,
 // Sorts a block of includes given by 'Includes' alphabetically adding the
 // necessary replacement to 'Replaces'. 'Includes' must be in strict source
 // order.
-static void sortIncludes(const FormatStyle &Style,
+static void sortCppIncludes(const FormatStyle &Style,
                          const SmallVectorImpl<IncludeDirective> &Includes,
                          ArrayRef<tooling::Range> Ranges, StringRef FileName,
                          tooling::Replacements &Replaces, unsigned *Cursor) {
@@ -1752,21 +1223,15 @@ static void sortIncludes(const FormatStyle &Style,
   SmallVector<unsigned, 16> Indices;
   for (unsigned i = 0, e = Includes.size(); i != e; ++i)
     Indices.push_back(i);
-  std::sort(Indices.begin(), Indices.end(), [&](unsigned LHSI, unsigned RHSI) {
-    return std::tie(Includes[LHSI].Category, Includes[LHSI].Filename) <
-           std::tie(Includes[RHSI].Category, Includes[RHSI].Filename);
-  });
+  std::stable_sort(
+      Indices.begin(), Indices.end(), [&](unsigned LHSI, unsigned RHSI) {
+        return std::tie(Includes[LHSI].Category, Includes[LHSI].Filename) <
+               std::tie(Includes[RHSI].Category, Includes[RHSI].Filename);
+      });
 
   // If the #includes are out of order, we generate a single replacement fixing
   // the entire block. Otherwise, no replacement is generated.
-  bool OutOfOrder = false;
-  for (unsigned i = 1, e = Indices.size(); i != e; ++i) {
-    if (Indices[i] != i) {
-      OutOfOrder = true;
-      break;
-    }
-  }
-  if (!OutOfOrder)
+  if (std::is_sorted(Indices.begin(), Indices.end()))
     return;
 
   std::string result;
@@ -1796,17 +1261,73 @@ static void sortIncludes(const FormatStyle &Style,
                                        result.size(), result));
 }
 
-tooling::Replacements sortIncludes(const FormatStyle &Style, StringRef Code,
-                                   ArrayRef<tooling::Range> Ranges,
-                                   StringRef FileName, unsigned *Cursor) {
-  tooling::Replacements Replaces;
-  if (!Style.SortIncludes)
-    return Replaces;
+namespace {
+
+// This class manages priorities of #include categories and calculates
+// priorities for headers.
+class IncludeCategoryManager {
+public:
+  IncludeCategoryManager(const FormatStyle &Style, StringRef FileName)
+      : Style(Style), FileName(FileName) {
+    FileStem = llvm::sys::path::stem(FileName);
+    for (const auto &Category : Style.IncludeCategories)
+      CategoryRegexs.emplace_back(Category.Regex);
+    IsMainFile = FileName.endswith(".c") || FileName.endswith(".cc") ||
+                 FileName.endswith(".cpp") || FileName.endswith(".c++") ||
+                 FileName.endswith(".cxx") || FileName.endswith(".m") ||
+                 FileName.endswith(".mm");
+  }
+
+  // Returns the priority of the category which \p IncludeName belongs to.
+  // If \p CheckMainHeader is true and \p IncludeName is a main header, returns
+  // 0. Otherwise, returns the priority of the matching category or INT_MAX.
+  int getIncludePriority(StringRef IncludeName, bool CheckMainHeader) {
+    int Ret = INT_MAX;
+    for (unsigned i = 0, e = CategoryRegexs.size(); i != e; ++i)
+      if (CategoryRegexs[i].match(IncludeName)) {
+        Ret = Style.IncludeCategories[i].Priority;
+        break;
+      }
+    if (CheckMainHeader && IsMainFile && Ret > 0 && isMainHeader(IncludeName))
+      Ret = 0;
+    return Ret;
+  }
+
+private:
+  bool isMainHeader(StringRef IncludeName) const {
+    if (!IncludeName.startswith("\""))
+      return false;
+    StringRef HeaderStem =
+        llvm::sys::path::stem(IncludeName.drop_front(1).drop_back(1));
+    if (FileStem.startswith(HeaderStem)) {
+      llvm::Regex MainIncludeRegex(
+          (HeaderStem + Style.IncludeIsMainRegex).str());
+      if (MainIncludeRegex.match(FileStem))
+        return true;
+    }
+    return false;
+  }
+
+  const FormatStyle &Style;
+  bool IsMainFile;
+  StringRef FileName;
+  StringRef FileStem;
+  SmallVector<llvm::Regex, 4> CategoryRegexs;
+};
+
+const char IncludeRegexPattern[] =
+    R"(^[\t\ ]*#[\t\ ]*(import|include)[^"<]*(["<][^">]*[">]))";
+
+} // anonymous namespace
 
+tooling::Replacements sortCppIncludes(const FormatStyle &Style, StringRef Code,
+                                      ArrayRef<tooling::Range> Ranges,
+                                      StringRef FileName,
+                                      tooling::Replacements &Replaces,
+                                      unsigned *Cursor) {
   unsigned Prev = 0;
   unsigned SearchFrom = 0;
-  llvm::Regex IncludeRegex(
-      R"(^[\t\ ]*#[\t\ ]*(import|include)[^"<]*(["<][^">]*[">]))");
+  llvm::Regex IncludeRegex(IncludeRegexPattern);
   SmallVector<StringRef, 4> Matches;
   SmallVector<IncludeDirective, 16> IncludesInBlock;
 
@@ -1817,19 +1338,9 @@ tooling::Replacements sortIncludes(const FormatStyle &Style, StringRef Code,
   //
   // FIXME: Do some sanity checking, e.g. edit distance of the base name, to fix
   // cases where the first #include is unlikely to be the main header.
-  bool IsSource = FileName.endswith(".c") || FileName.endswith(".cc") ||
-                  FileName.endswith(".cpp") || FileName.endswith(".c++") ||
-                  FileName.endswith(".cxx") || FileName.endswith(".m") ||
-                  FileName.endswith(".mm");
-  StringRef FileStem = llvm::sys::path::stem(FileName);
+  IncludeCategoryManager Categories(Style, FileName);
   bool FirstIncludeBlock = true;
   bool MainIncludeFound = false;
-
-  // Create pre-compiled regular expressions for the #include categories.
-  SmallVector<llvm::Regex, 4> CategoryRegexs;
-  for (const auto &Category : Style.IncludeCategories)
-    CategoryRegexs.emplace_back(Category.Regex);
-
   bool FormattingOff = false;
 
   for (;;) {
@@ -1846,26 +1357,15 @@ tooling::Replacements sortIncludes(const FormatStyle &Style, StringRef Code,
     if (!FormattingOff && !Line.endswith("\\")) {
       if (IncludeRegex.match(Line, &Matches)) {
         StringRef IncludeName = Matches[2];
-        int Category = INT_MAX;
-        for (unsigned i = 0, e = CategoryRegexs.size(); i != e; ++i) {
-          if (CategoryRegexs[i].match(IncludeName)) {
-            Category = Style.IncludeCategories[i].Priority;
-            break;
-          }
-        }
-        if (IsSource && !MainIncludeFound && Category > 0 &&
-            FirstIncludeBlock && IncludeName.startswith("\"")) {
-          StringRef HeaderStem =
-              llvm::sys::path::stem(IncludeName.drop_front(1).drop_back(1));
-          if (FileStem.startswith(HeaderStem)) {
-            Category = 0;
-            MainIncludeFound = true;
-          }
-        }
+        int Category = Categories.getIncludePriority(
+            IncludeName,
+            /*CheckMainHeader=*/!MainIncludeFound && FirstIncludeBlock);
+        if (Category == 0)
+          MainIncludeFound = true;
         IncludesInBlock.push_back({IncludeName, Line, Prev, Category});
       } else if (!IncludesInBlock.empty()) {
-        sortIncludes(Style, IncludesInBlock, Ranges, FileName, Replaces,
-                     Cursor);
+        sortCppIncludes(Style, IncludesInBlock, Ranges, FileName, Replaces,
+                        Cursor);
         IncludesInBlock.clear();
         FirstIncludeBlock = false;
       }
@@ -1876,47 +1376,280 @@ tooling::Replacements sortIncludes(const FormatStyle &Style, StringRef Code,
     SearchFrom = Pos + 1;
   }
   if (!IncludesInBlock.empty())
-    sortIncludes(Style, IncludesInBlock, Ranges, FileName, Replaces, Cursor);
+    sortCppIncludes(Style, IncludesInBlock, Ranges, FileName, Replaces, Cursor);
   return Replaces;
 }
 
-tooling::Replacements reformat(const FormatStyle &Style,
-                               SourceManager &SourceMgr, FileID ID,
-                               ArrayRef<CharSourceRange> Ranges,
+tooling::Replacements sortIncludes(const FormatStyle &Style, StringRef Code,
+                                   ArrayRef<tooling::Range> Ranges,
+                                   StringRef FileName, unsigned *Cursor) {
+  tooling::Replacements Replaces;
+  if (!Style.SortIncludes)
+    return Replaces;
+  if (Style.Language == FormatStyle::LanguageKind::LK_JavaScript)
+    return sortJavaScriptImports(Style, Code, Ranges, FileName);
+  sortCppIncludes(Style, Code, Ranges, FileName, Replaces, Cursor);
+  return Replaces;
+}
+
+template <typename T>
+static llvm::Expected<tooling::Replacements>
+processReplacements(T ProcessFunc, StringRef Code,
+                    const tooling::Replacements &Replaces,
+                    const FormatStyle &Style) {
+  if (Replaces.empty())
+    return tooling::Replacements();
+
+  auto NewCode = applyAllReplacements(Code, Replaces);
+  if (!NewCode)
+    return NewCode.takeError();
+  std::vector<tooling::Range> ChangedRanges =
+      tooling::calculateChangedRanges(Replaces);
+  StringRef FileName = Replaces.begin()->getFilePath();
+
+  tooling::Replacements FormatReplaces =
+      ProcessFunc(Style, *NewCode, ChangedRanges, FileName);
+
+  return mergeReplacements(Replaces, FormatReplaces);
+}
+
+llvm::Expected<tooling::Replacements>
+formatReplacements(StringRef Code, const tooling::Replacements &Replaces,
+                   const FormatStyle &Style) {
+  // We need to use lambda function here since there are two versions of
+  // `sortIncludes`.
+  auto SortIncludes = [](const FormatStyle &Style, StringRef Code,
+                         std::vector<tooling::Range> Ranges,
+                         StringRef FileName) -> tooling::Replacements {
+    return sortIncludes(Style, Code, Ranges, FileName);
+  };
+  auto SortedReplaces =
+      processReplacements(SortIncludes, Code, Replaces, Style);
+  if (!SortedReplaces)
+    return SortedReplaces.takeError();
+
+  // We need to use lambda function here since there are two versions of
+  // `reformat`.
+  auto Reformat = [](const FormatStyle &Style, StringRef Code,
+                     std::vector<tooling::Range> Ranges,
+                     StringRef FileName) -> tooling::Replacements {
+    return reformat(Style, Code, Ranges, FileName);
+  };
+  return processReplacements(Reformat, Code, *SortedReplaces, Style);
+}
+
+namespace {
+
+inline bool isHeaderInsertion(const tooling::Replacement &Replace) {
+  return Replace.getOffset() == UINT_MAX &&
+         llvm::Regex(IncludeRegexPattern).match(Replace.getReplacementText());
+}
+
+void skipComments(Lexer &Lex, Token &Tok) {
+  while (Tok.is(tok::comment))
+    if (Lex.LexFromRawLexer(Tok))
+      return;
+}
+
+// Check if a sequence of tokens is like "#<Name> <raw_identifier>". If it is,
+// \p Tok will be the token after this directive; otherwise, it can be any token
+// after the given \p Tok (including \p Tok).
+bool checkAndConsumeDirectiveWithName(Lexer &Lex, StringRef Name, Token &Tok) {
+  bool Matched = Tok.is(tok::hash) && !Lex.LexFromRawLexer(Tok) &&
+                 Tok.is(tok::raw_identifier) &&
+                 Tok.getRawIdentifier() == Name && !Lex.LexFromRawLexer(Tok) &&
+                 Tok.is(tok::raw_identifier);
+  if (Matched)
+    Lex.LexFromRawLexer(Tok);
+  return Matched;
+}
+
+unsigned getOffsetAfterHeaderGuardsAndComments(StringRef FileName,
+                                               StringRef Code,
+                                               const FormatStyle &Style) {
+  std::unique_ptr<Environment> Env =
+      Environment::CreateVirtualEnvironment(Code, FileName, /*Ranges=*/{});
+  const SourceManager &SourceMgr = Env->getSourceManager();
+  Lexer Lex(Env->getFileID(), SourceMgr.getBuffer(Env->getFileID()), SourceMgr,
+            getFormattingLangOpts(Style));
+  Token Tok;
+  // Get the first token.
+  Lex.LexFromRawLexer(Tok);
+  skipComments(Lex, Tok);
+  unsigned AfterComments = SourceMgr.getFileOffset(Tok.getLocation());
+  if (checkAndConsumeDirectiveWithName(Lex, "ifndef", Tok)) {
+    skipComments(Lex, Tok);
+    if (checkAndConsumeDirectiveWithName(Lex, "define", Tok))
+      return SourceMgr.getFileOffset(Tok.getLocation());
+  }
+  return AfterComments;
+}
+
+// FIXME: we also need to insert a '\n' at the end of the code if we have an
+// insertion with offset Code.size(), and there is no '\n' at the end of the
+// code.
+// FIXME: do not insert headers into conditional #include blocks, e.g. #includes
+// surrounded by compile condition "#if...".
+// FIXME: insert empty lines between newly created blocks.
+tooling::Replacements
+fixCppIncludeInsertions(StringRef Code, const tooling::Replacements &Replaces,
+                        const FormatStyle &Style) {
+  if (Style.Language != FormatStyle::LanguageKind::LK_Cpp)
+    return Replaces;
+
+  tooling::Replacements HeaderInsertions;
+  for (const auto &R : Replaces) {
+    if (isHeaderInsertion(R))
+      HeaderInsertions.insert(R);
+    else if (R.getOffset() == UINT_MAX)
+      llvm::errs() << "Insertions other than header #include insertion are "
+                      "not supported! "
+                   << R.getReplacementText() << "\n";
+  }
+  if (HeaderInsertions.empty())
+    return Replaces;
+  tooling::Replacements Result;
+  std::set_difference(Replaces.begin(), Replaces.end(),
+                      HeaderInsertions.begin(), HeaderInsertions.end(),
+                      std::inserter(Result, Result.begin()));
+
+  llvm::Regex IncludeRegex(IncludeRegexPattern);
+  llvm::Regex DefineRegex(R"(^[\t\ ]*#[\t\ ]*define[\t\ ]*[^\\]*$)");
+  SmallVector<StringRef, 4> Matches;
+
+  StringRef FileName = Replaces.begin()->getFilePath();
+  IncludeCategoryManager Categories(Style, FileName);
+
+  // Record the offset of the end of the last include in each category.
+  std::map<int, int> CategoryEndOffsets;
+  // All possible priorities.
+  // Add 0 for main header and INT_MAX for headers that are not in any category.
+  std::set<int> Priorities = {0, INT_MAX};
+  for (const auto &Category : Style.IncludeCategories)
+    Priorities.insert(Category.Priority);
+  int FirstIncludeOffset = -1;
+  // All new headers should be inserted after this offset.
+  unsigned MinInsertOffset =
+      getOffsetAfterHeaderGuardsAndComments(FileName, Code, Style);
+  StringRef TrimmedCode = Code.drop_front(MinInsertOffset);
+  SmallVector<StringRef, 32> Lines;
+  TrimmedCode.split(Lines, '\n');
+  unsigned Offset = MinInsertOffset;
+  unsigned NextLineOffset;
+  std::set<StringRef> ExistingIncludes;
+  for (auto Line : Lines) {
+    NextLineOffset = std::min(Code.size(), Offset + Line.size() + 1);
+    if (IncludeRegex.match(Line, &Matches)) {
+      StringRef IncludeName = Matches[2];
+      ExistingIncludes.insert(IncludeName);
+      int Category = Categories.getIncludePriority(
+          IncludeName, /*CheckMainHeader=*/FirstIncludeOffset < 0);
+      CategoryEndOffsets[Category] = NextLineOffset;
+      if (FirstIncludeOffset < 0)
+        FirstIncludeOffset = Offset;
+    }
+    Offset = NextLineOffset;
+  }
+
+  // Populate CategoryEndOfssets:
+  // - Ensure that CategoryEndOffset[Highest] is always populated.
+  // - If CategoryEndOffset[Priority] isn't set, use the next higher value that
+  //   is set, up to CategoryEndOffset[Highest].
+  auto Highest = Priorities.begin();
+  if (CategoryEndOffsets.find(*Highest) == CategoryEndOffsets.end()) {
+    if (FirstIncludeOffset >= 0)
+      CategoryEndOffsets[*Highest] = FirstIncludeOffset;
+    else
+      CategoryEndOffsets[*Highest] = MinInsertOffset;
+  }
+  // By this point, CategoryEndOffset[Highest] is always set appropriately:
+  //  - to an appropriate location before/after existing #includes, or
+  //  - to right after the header guard, or
+  //  - to the beginning of the file.
+  for (auto I = ++Priorities.begin(), E = Priorities.end(); I != E; ++I)
+    if (CategoryEndOffsets.find(*I) == CategoryEndOffsets.end())
+      CategoryEndOffsets[*I] = CategoryEndOffsets[*std::prev(I)];
+
+  for (const auto &R : HeaderInsertions) {
+    auto IncludeDirective = R.getReplacementText();
+    bool Matched = IncludeRegex.match(IncludeDirective, &Matches);
+    assert(Matched && "Header insertion replacement must have replacement text "
+                      "'#include ...'");
+    (void)Matched;
+    auto IncludeName = Matches[2];
+    if (ExistingIncludes.find(IncludeName) != ExistingIncludes.end()) {
+      DEBUG(llvm::dbgs() << "Skip adding existing include : " << IncludeName
+                         << "\n");
+      continue;
+    }
+    int Category =
+        Categories.getIncludePriority(IncludeName, /*CheckMainHeader=*/true);
+    Offset = CategoryEndOffsets[Category];
+    std::string NewInclude = !IncludeDirective.endswith("\n")
+                                 ? (IncludeDirective + "\n").str()
+                                 : IncludeDirective.str();
+    Result.insert(tooling::Replacement(FileName, Offset, 0, NewInclude));
+  }
+  return Result;
+}
+
+} // anonymous namespace
+
+llvm::Expected<tooling::Replacements>
+cleanupAroundReplacements(StringRef Code, const tooling::Replacements &Replaces,
+                          const FormatStyle &Style) {
+  // We need to use lambda function here since there are two versions of
+  // `cleanup`.
+  auto Cleanup = [](const FormatStyle &Style, StringRef Code,
+                    std::vector<tooling::Range> Ranges,
+                    StringRef FileName) -> tooling::Replacements {
+    return cleanup(Style, Code, Ranges, FileName);
+  };
+  // Make header insertion replacements insert new headers into correct blocks.
+  tooling::Replacements NewReplaces =
+      fixCppIncludeInsertions(Code, Replaces, Style);
+  return processReplacements(Cleanup, Code, NewReplaces, Style);
+}
+
+tooling::Replacements reformat(const FormatStyle &Style, SourceManager &SM,
+                               FileID ID, ArrayRef<CharSourceRange> Ranges,
                                bool *IncompleteFormat) {
   FormatStyle Expanded = expandPresets(Style);
   if (Expanded.DisableFormat)
     return tooling::Replacements();
-  Formatter formatter(Expanded, SourceMgr, ID, Ranges);
-  return formatter.format(IncompleteFormat);
+
+  Environment Env(SM, ID, Ranges);
+  Formatter Format(Env, Expanded, IncompleteFormat);
+  return Format.process();
 }
 
 tooling::Replacements reformat(const FormatStyle &Style, StringRef Code,
                                ArrayRef<tooling::Range> Ranges,
                                StringRef FileName, bool *IncompleteFormat) {
-  if (Style.DisableFormat)
+  FormatStyle Expanded = expandPresets(Style);
+  if (Expanded.DisableFormat)
     return tooling::Replacements();
 
-  IntrusiveRefCntPtr<vfs::InMemoryFileSystem> InMemoryFileSystem(
-      new vfs::InMemoryFileSystem);
-  FileManager Files(FileSystemOptions(), InMemoryFileSystem);
-  DiagnosticsEngine Diagnostics(
-      IntrusiveRefCntPtr<DiagnosticIDs>(new DiagnosticIDs),
-      new DiagnosticOptions);
-  SourceManager SourceMgr(Diagnostics, Files);
-  InMemoryFileSystem->addFile(
-      FileName, 0, llvm::MemoryBuffer::getMemBuffer(
-                       Code, FileName, /*RequiresNullTerminator=*/false));
-  FileID ID = SourceMgr.createFileID(Files.getFile(FileName), SourceLocation(),
-                                     clang::SrcMgr::C_User);
-  SourceLocation StartOfFile = SourceMgr.getLocForStartOfFile(ID);
-  std::vector<CharSourceRange> CharRanges;
-  for (const tooling::Range &Range : Ranges) {
-    SourceLocation Start = StartOfFile.getLocWithOffset(Range.getOffset());
-    SourceLocation End = Start.getLocWithOffset(Range.getLength());
-    CharRanges.push_back(CharSourceRange::getCharRange(Start, End));
-  }
-  return reformat(Style, SourceMgr, ID, CharRanges, IncompleteFormat);
+  std::unique_ptr<Environment> Env =
+      Environment::CreateVirtualEnvironment(Code, FileName, Ranges);
+  Formatter Format(*Env, Expanded, IncompleteFormat);
+  return Format.process();
+}
+
+tooling::Replacements cleanup(const FormatStyle &Style, SourceManager &SM,
+                              FileID ID, ArrayRef<CharSourceRange> Ranges) {
+  Environment Env(SM, ID, Ranges);
+  Cleaner Clean(Env, Style);
+  return Clean.process();
+}
+
+tooling::Replacements cleanup(const FormatStyle &Style, StringRef Code,
+                              ArrayRef<tooling::Range> Ranges,
+                              StringRef FileName) {
+  std::unique_ptr<Environment> Env =
+      Environment::CreateVirtualEnvironment(Code, FileName, Ranges);
+  Cleaner Clean(*Env, Style);
+  return Clean.process();
 }
 
 LangOptions getFormattingLangOpts(const FormatStyle &Style) {
@@ -1930,7 +1663,7 @@ LangOptions getFormattingLangOpts(const FormatStyle &Style) {
   LangOpts.Bool = 1;
   LangOpts.ObjC1 = 1;
   LangOpts.ObjC2 = 1;
-  LangOpts.MicrosoftExt = 1; // To get kw___try, kw___finally.
+  LangOpts.MicrosoftExt = 1;    // To get kw___try, kw___finally.
   LangOpts.DeclSpecKeyword = 1; // To get __declspec.
   return LangOpts;
 }
@@ -1960,7 +1693,10 @@ static FormatStyle::LanguageKind getLanguageByFileName(StringRef FileName) {
 }
 
 FormatStyle getStyle(StringRef StyleName, StringRef FileName,
-                     StringRef FallbackStyle) {
+                     StringRef FallbackStyle, vfs::FileSystem *FS) {
+  if (!FS) {
+    FS = vfs::getRealFileSystem().get();
+  }
   FormatStyle Style = getLLVMStyle();
   Style.Language = getLanguageByFileName(FileName);
   if (!getPredefinedStyle(FallbackStyle, Style.Language, &Style)) {
@@ -1991,28 +1727,34 @@ FormatStyle getStyle(StringRef StyleName, StringRef FileName,
   llvm::sys::fs::make_absolute(Path);
   for (StringRef Directory = Path; !Directory.empty();
        Directory = llvm::sys::path::parent_path(Directory)) {
-    if (!llvm::sys::fs::is_directory(Directory))
+
+    auto Status = FS->status(Directory);
+    if (!Status ||
+        Status->getType() != llvm::sys::fs::file_type::directory_file) {
       continue;
+    }
+
     SmallString<128> ConfigFile(Directory);
 
     llvm::sys::path::append(ConfigFile, ".clang-format");
     DEBUG(llvm::dbgs() << "Trying " << ConfigFile << "...\n");
-    bool IsFile = false;
-    // Ignore errors from is_regular_file: we only need to know if we can read
-    // the file or not.
-    llvm::sys::fs::is_regular_file(Twine(ConfigFile), IsFile);
 
+    Status = FS->status(ConfigFile.str());
+    bool IsFile =
+        Status && (Status->getType() == llvm::sys::fs::file_type::regular_file);
     if (!IsFile) {
       // Try _clang-format too, since dotfiles are not commonly used on Windows.
       ConfigFile = Directory;
       llvm::sys::path::append(ConfigFile, "_clang-format");
       DEBUG(llvm::dbgs() << "Trying " << ConfigFile << "...\n");
-      llvm::sys::fs::is_regular_file(Twine(ConfigFile), IsFile);
+      Status = FS->status(ConfigFile.str());
+      IsFile = Status &&
+               (Status->getType() == llvm::sys::fs::file_type::regular_file);
     }
 
     if (IsFile) {
       llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> Text =
-          llvm::MemoryBuffer::getFile(ConfigFile.c_str());
+          FS->getBufferForFile(ConfigFile.str());
       if (std::error_code EC = Text.getError()) {
         llvm::errs() << EC.message() << "\n";
         break;
diff --git a/contrib/llvm/tools/clang/lib/Format/FormatToken.cpp b/contrib/llvm/tools/clang/lib/Format/FormatToken.cpp
index d6cd450d892e..2ae4ddcfd08a 100644
--- a/contrib/llvm/tools/clang/lib/Format/FormatToken.cpp
+++ b/contrib/llvm/tools/clang/lib/Format/FormatToken.cpp
@@ -53,6 +53,7 @@ bool FormatToken::isSimpleTypeSpecifier() const {
   case tok::kw_half:
   case tok::kw_float:
   case tok::kw_double:
+  case tok::kw___float128:
   case tok::kw_wchar_t:
   case tok::kw_bool:
   case tok::kw___underlying_type:
diff --git a/contrib/llvm/tools/clang/lib/Format/FormatToken.h b/contrib/llvm/tools/clang/lib/Format/FormatToken.h
index b683660f350a..43b162513620 100644
--- a/contrib/llvm/tools/clang/lib/Format/FormatToken.h
+++ b/contrib/llvm/tools/clang/lib/Format/FormatToken.h
@@ -54,6 +54,7 @@ namespace format {
   TYPE(JsComputedPropertyName) \
   TYPE(JsFatArrow) \
   TYPE(JsTypeColon) \
+  TYPE(JsTypeOperator) \
   TYPE(JsTypeOptionalQuestion) \
   TYPE(LambdaArrow) \
   TYPE(LambdaLSquare) \
@@ -144,7 +145,7 @@ struct FormatToken {
   /// \brief Whether the token text contains newlines (escaped or not).
   bool IsMultiline = false;
 
-  /// \brief Indicates that this is the first token.
+  /// \brief Indicates that this is the first token of the file.
   bool IsFirst = false;
 
   /// \brief Whether there must be a line break before this token.
@@ -296,6 +297,20 @@ struct FormatToken {
   }
   template <typename T> bool isNot(T Kind) const { return !is(Kind); }
 
+  /// \c true if this token starts a sequence with the given tokens in order,
+  /// following the ``Next`` pointers, ignoring comments.
+  template <typename A, typename... Ts>
+  bool startsSequence(A K1, Ts... Tokens) const {
+    return startsSequenceInternal(K1, Tokens...);
+  }
+
+  /// \c true if this token ends a sequence with the given tokens in order,
+  /// following the ``Previous`` pointers, ignoring comments.
+  template <typename A, typename... Ts>
+  bool endsSequence(A K1, Ts... Tokens) const {
+    return endsSequenceInternal(K1, Tokens...);
+  }
+
   bool isStringLiteral() const { return tok::isStringLiteral(Tok.getKind()); }
 
   bool isObjCAtKeyword(tok::ObjCKeywordKind Kind) const {
@@ -428,6 +443,34 @@ private:
   // Disallow copying.
   FormatToken(const FormatToken &) = delete;
   void operator=(const FormatToken &) = delete;
+
+  template <typename A, typename... Ts>
+  bool startsSequenceInternal(A K1, Ts... Tokens) const {
+    if (is(tok::comment) && Next)
+      return Next->startsSequenceInternal(K1, Tokens...);
+    return is(K1) && Next && Next->startsSequenceInternal(Tokens...);
+  }
+
+  template <typename A>
+  bool startsSequenceInternal(A K1) const {
+    if (is(tok::comment) && Next)
+      return Next->startsSequenceInternal(K1);
+    return is(K1);
+  }
+
+  template <typename A, typename... Ts>
+  bool endsSequenceInternal(A K1) const {
+    if (is(tok::comment) && Previous)
+      return Previous->endsSequenceInternal(K1);
+    return is(K1);
+  }
+
+  template <typename A, typename... Ts>
+  bool endsSequenceInternal(A K1, Ts... Tokens) const {
+    if (is(tok::comment) && Previous)
+      return Previous->endsSequenceInternal(K1, Tokens...);
+    return is(K1) && Previous && Previous->endsSequenceInternal(Tokens...);
+  }
 };
 
 class ContinuationIndenter;
@@ -528,17 +571,24 @@ struct AdditionalKeywords {
     kw_final = &IdentTable.get("final");
     kw_override = &IdentTable.get("override");
     kw_in = &IdentTable.get("in");
+    kw_of = &IdentTable.get("of");
     kw_CF_ENUM = &IdentTable.get("CF_ENUM");
     kw_CF_OPTIONS = &IdentTable.get("CF_OPTIONS");
     kw_NS_ENUM = &IdentTable.get("NS_ENUM");
     kw_NS_OPTIONS = &IdentTable.get("NS_OPTIONS");
 
+    kw_as = &IdentTable.get("as");
+    kw_async = &IdentTable.get("async");
+    kw_await = &IdentTable.get("await");
     kw_finally = &IdentTable.get("finally");
+    kw_from = &IdentTable.get("from");
     kw_function = &IdentTable.get("function");
     kw_import = &IdentTable.get("import");
     kw_is = &IdentTable.get("is");
     kw_let = &IdentTable.get("let");
+    kw_type = &IdentTable.get("type");
     kw_var = &IdentTable.get("var");
+    kw_yield = &IdentTable.get("yield");
 
     kw_abstract = &IdentTable.get("abstract");
     kw_assert = &IdentTable.get("assert");
@@ -571,6 +621,7 @@ struct AdditionalKeywords {
   IdentifierInfo *kw_final;
   IdentifierInfo *kw_override;
   IdentifierInfo *kw_in;
+  IdentifierInfo *kw_of;
   IdentifierInfo *kw_CF_ENUM;
   IdentifierInfo *kw_CF_OPTIONS;
   IdentifierInfo *kw_NS_ENUM;
@@ -578,12 +629,18 @@ struct AdditionalKeywords {
   IdentifierInfo *kw___except;
 
   // JavaScript keywords.
+  IdentifierInfo *kw_as;
+  IdentifierInfo *kw_async;
+  IdentifierInfo *kw_await;
   IdentifierInfo *kw_finally;
+  IdentifierInfo *kw_from;
   IdentifierInfo *kw_function;
   IdentifierInfo *kw_import;
   IdentifierInfo *kw_is;
   IdentifierInfo *kw_let;
+  IdentifierInfo *kw_type;
   IdentifierInfo *kw_var;
+  IdentifierInfo *kw_yield;
 
   // Java keywords.
   IdentifierInfo *kw_abstract;
diff --git a/contrib/llvm/tools/clang/lib/Format/FormatTokenLexer.cpp b/contrib/llvm/tools/clang/lib/Format/FormatTokenLexer.cpp
new file mode 100644
index 000000000000..9778f84732d6
--- /dev/null
+++ b/contrib/llvm/tools/clang/lib/Format/FormatTokenLexer.cpp
@@ -0,0 +1,597 @@
+//===--- FormatTokenLexer.cpp - Lex FormatTokens -------------*- C++ ----*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file implements FormatTokenLexer, which tokenizes a source file
+/// into a FormatToken stream suitable for ClangFormat.
+///
+//===----------------------------------------------------------------------===//
+
+#include "FormatTokenLexer.h"
+#include "FormatToken.h"
+#include "clang/Basic/SourceLocation.h"
+#include "clang/Basic/SourceManager.h"
+#include "clang/Format/Format.h"
+#include "llvm/Support/Regex.h"
+
+namespace clang {
+namespace format {
+
+FormatTokenLexer::FormatTokenLexer(const SourceManager &SourceMgr, FileID ID,
+                                   const FormatStyle &Style,
+                                   encoding::Encoding Encoding)
+    : FormatTok(nullptr), IsFirstToken(true), GreaterStashed(false),
+      LessStashed(false), Column(0), TrailingWhitespace(0),
+      SourceMgr(SourceMgr), ID(ID), Style(Style),
+      IdentTable(getFormattingLangOpts(Style)), Keywords(IdentTable),
+      Encoding(Encoding), FirstInLineIndex(0), FormattingDisabled(false),
+      MacroBlockBeginRegex(Style.MacroBlockBegin),
+      MacroBlockEndRegex(Style.MacroBlockEnd) {
+  Lex.reset(new Lexer(ID, SourceMgr.getBuffer(ID), SourceMgr,
+                      getFormattingLangOpts(Style)));
+  Lex->SetKeepWhitespaceMode(true);
+
+  for (const std::string &ForEachMacro : Style.ForEachMacros)
+    ForEachMacros.push_back(&IdentTable.get(ForEachMacro));
+  std::sort(ForEachMacros.begin(), ForEachMacros.end());
+}
+
+ArrayRef<FormatToken *> FormatTokenLexer::lex() {
+  assert(Tokens.empty());
+  assert(FirstInLineIndex == 0);
+  do {
+    Tokens.push_back(getNextToken());
+    if (Style.Language == FormatStyle::LK_JavaScript) {
+      tryParseJSRegexLiteral();
+      tryParseTemplateString();
+    }
+    tryMergePreviousTokens();
+    if (Tokens.back()->NewlinesBefore > 0 || Tokens.back()->IsMultiline)
+      FirstInLineIndex = Tokens.size() - 1;
+  } while (Tokens.back()->Tok.isNot(tok::eof));
+  return Tokens;
+}
+
+void FormatTokenLexer::tryMergePreviousTokens() {
+  if (tryMerge_TMacro())
+    return;
+  if (tryMergeConflictMarkers())
+    return;
+  if (tryMergeLessLess())
+    return;
+
+  if (Style.Language == FormatStyle::LK_JavaScript) {
+    static const tok::TokenKind JSIdentity[] = {tok::equalequal, tok::equal};
+    static const tok::TokenKind JSNotIdentity[] = {tok::exclaimequal,
+                                                   tok::equal};
+    static const tok::TokenKind JSShiftEqual[] = {tok::greater, tok::greater,
+                                                  tok::greaterequal};
+    static const tok::TokenKind JSRightArrow[] = {tok::equal, tok::greater};
+    // FIXME: Investigate what token type gives the correct operator priority.
+    if (tryMergeTokens(JSIdentity, TT_BinaryOperator))
+      return;
+    if (tryMergeTokens(JSNotIdentity, TT_BinaryOperator))
+      return;
+    if (tryMergeTokens(JSShiftEqual, TT_BinaryOperator))
+      return;
+    if (tryMergeTokens(JSRightArrow, TT_JsFatArrow))
+      return;
+  }
+}
+
+bool FormatTokenLexer::tryMergeLessLess() {
+  // Merge X,less,less,Y into X,lessless,Y unless X or Y is less.
+  if (Tokens.size() < 3)
+    return false;
+
+  bool FourthTokenIsLess = false;
+  if (Tokens.size() > 3)
+    FourthTokenIsLess = (Tokens.end() - 4)[0]->is(tok::less);
+
+  auto First = Tokens.end() - 3;
+  if (First[2]->is(tok::less) || First[1]->isNot(tok::less) ||
+      First[0]->isNot(tok::less) || FourthTokenIsLess)
+    return false;
+
+  // Only merge if there currently is no whitespace between the two "<".
+  if (First[1]->WhitespaceRange.getBegin() !=
+      First[1]->WhitespaceRange.getEnd())
+    return false;
+
+  First[0]->Tok.setKind(tok::lessless);
+  First[0]->TokenText = "<<";
+  First[0]->ColumnWidth += 1;
+  Tokens.erase(Tokens.end() - 2);
+  return true;
+}
+
+bool FormatTokenLexer::tryMergeTokens(ArrayRef<tok::TokenKind> Kinds,
+                                      TokenType NewType) {
+  if (Tokens.size() < Kinds.size())
+    return false;
+
+  SmallVectorImpl<FormatToken *>::const_iterator First =
+      Tokens.end() - Kinds.size();
+  if (!First[0]->is(Kinds[0]))
+    return false;
+  unsigned AddLength = 0;
+  for (unsigned i = 1; i < Kinds.size(); ++i) {
+    if (!First[i]->is(Kinds[i]) ||
+        First[i]->WhitespaceRange.getBegin() !=
+            First[i]->WhitespaceRange.getEnd())
+      return false;
+    AddLength += First[i]->TokenText.size();
+  }
+  Tokens.resize(Tokens.size() - Kinds.size() + 1);
+  First[0]->TokenText = StringRef(First[0]->TokenText.data(),
+                                  First[0]->TokenText.size() + AddLength);
+  First[0]->ColumnWidth += AddLength;
+  First[0]->Type = NewType;
+  return true;
+}
+
+// Returns \c true if \p Tok can only be followed by an operand in JavaScript.
+bool FormatTokenLexer::precedesOperand(FormatToken *Tok) {
+  // NB: This is not entirely correct, as an r_paren can introduce an operand
+  // location in e.g. `if (foo) /bar/.exec(...);`. That is a rare enough
+  // corner case to not matter in practice, though.
+  return Tok->isOneOf(tok::period, tok::l_paren, tok::comma, tok::l_brace,
+                      tok::r_brace, tok::l_square, tok::semi, tok::exclaim,
+                      tok::colon, tok::question, tok::tilde) ||
+         Tok->isOneOf(tok::kw_return, tok::kw_do, tok::kw_case, tok::kw_throw,
+                      tok::kw_else, tok::kw_new, tok::kw_delete, tok::kw_void,
+                      tok::kw_typeof, Keywords.kw_instanceof, Keywords.kw_in) ||
+         Tok->isBinaryOperator();
+}
+
+bool FormatTokenLexer::canPrecedeRegexLiteral(FormatToken *Prev) {
+  if (!Prev)
+    return true;
+
+  // Regex literals can only follow after prefix unary operators, not after
+  // postfix unary operators. If the '++' is followed by a non-operand
+  // introducing token, the slash here is the operand and not the start of a
+  // regex.
+  if (Prev->isOneOf(tok::plusplus, tok::minusminus))
+    return (Tokens.size() < 3 || precedesOperand(Tokens[Tokens.size() - 3]));
+
+  // The previous token must introduce an operand location where regex
+  // literals can occur.
+  if (!precedesOperand(Prev))
+    return false;
+
+  return true;
+}
+
+// Tries to parse a JavaScript Regex literal starting at the current token,
+// if that begins with a slash and is in a location where JavaScript allows
+// regex literals. Changes the current token to a regex literal and updates
+// its text if successful.
+void FormatTokenLexer::tryParseJSRegexLiteral() {
+  FormatToken *RegexToken = Tokens.back();
+  if (!RegexToken->isOneOf(tok::slash, tok::slashequal))
+    return;
+
+  FormatToken *Prev = nullptr;
+  for (auto I = Tokens.rbegin() + 1, E = Tokens.rend(); I != E; ++I) {
+    // NB: Because previous pointers are not initialized yet, this cannot use
+    // Token.getPreviousNonComment.
+    if ((*I)->isNot(tok::comment)) {
+      Prev = *I;
+      break;
+    }
+  }
+
+  if (!canPrecedeRegexLiteral(Prev))
+    return;
+
+  // 'Manually' lex ahead in the current file buffer.
+  const char *Offset = Lex->getBufferLocation();
+  const char *RegexBegin = Offset - RegexToken->TokenText.size();
+  StringRef Buffer = Lex->getBuffer();
+  bool InCharacterClass = false;
+  bool HaveClosingSlash = false;
+  for (; !HaveClosingSlash && Offset != Buffer.end(); ++Offset) {
+    // Regular expressions are terminated with a '/', which can only be
+    // escaped using '\' or a character class between '[' and ']'.
+    // See http://www.ecma-international.org/ecma-262/5.1/#sec-7.8.5.
+    switch (*Offset) {
+    case '\\':
+      // Skip the escaped character.
+      ++Offset;
+      break;
+    case '[':
+      InCharacterClass = true;
+      break;
+    case ']':
+      InCharacterClass = false;
+      break;
+    case '/':
+      if (!InCharacterClass)
+        HaveClosingSlash = true;
+      break;
+    }
+  }
+
+  RegexToken->Type = TT_RegexLiteral;
+  // Treat regex literals like other string_literals.
+  RegexToken->Tok.setKind(tok::string_literal);
+  RegexToken->TokenText = StringRef(RegexBegin, Offset - RegexBegin);
+  RegexToken->ColumnWidth = RegexToken->TokenText.size();
+
+  resetLexer(SourceMgr.getFileOffset(Lex->getSourceLocation(Offset)));
+}
+
+void FormatTokenLexer::tryParseTemplateString() {
+  FormatToken *BacktickToken = Tokens.back();
+  if (!BacktickToken->is(tok::unknown) || BacktickToken->TokenText != "`")
+    return;
+
+  // 'Manually' lex ahead in the current file buffer.
+  const char *Offset = Lex->getBufferLocation();
+  const char *TmplBegin = Offset - BacktickToken->TokenText.size(); // at "`"
+  for (; Offset != Lex->getBuffer().end() && *Offset != '`'; ++Offset) {
+    if (*Offset == '\\')
+      ++Offset; // Skip the escaped character.
+  }
+
+  StringRef LiteralText(TmplBegin, Offset - TmplBegin + 1);
+  BacktickToken->Type = TT_TemplateString;
+  BacktickToken->Tok.setKind(tok::string_literal);
+  BacktickToken->TokenText = LiteralText;
+
+  // Adjust width for potentially multiline string literals.
+  size_t FirstBreak = LiteralText.find('\n');
+  StringRef FirstLineText = FirstBreak == StringRef::npos
+                                ? LiteralText
+                                : LiteralText.substr(0, FirstBreak);
+  BacktickToken->ColumnWidth = encoding::columnWidthWithTabs(
+      FirstLineText, BacktickToken->OriginalColumn, Style.TabWidth, Encoding);
+  size_t LastBreak = LiteralText.rfind('\n');
+  if (LastBreak != StringRef::npos) {
+    BacktickToken->IsMultiline = true;
+    unsigned StartColumn = 0; // The template tail spans the entire line.
+    BacktickToken->LastLineColumnWidth = encoding::columnWidthWithTabs(
+        LiteralText.substr(LastBreak + 1, LiteralText.size()), StartColumn,
+        Style.TabWidth, Encoding);
+  }
+
+  resetLexer(SourceMgr.getFileOffset(Lex->getSourceLocation(Offset + 1)));
+}
+
+bool FormatTokenLexer::tryMerge_TMacro() {
+  if (Tokens.size() < 4)
+    return false;
+  FormatToken *Last = Tokens.back();
+  if (!Last->is(tok::r_paren))
+    return false;
+
+  FormatToken *String = Tokens[Tokens.size() - 2];
+  if (!String->is(tok::string_literal) || String->IsMultiline)
+    return false;
+
+  if (!Tokens[Tokens.size() - 3]->is(tok::l_paren))
+    return false;
+
+  FormatToken *Macro = Tokens[Tokens.size() - 4];
+  if (Macro->TokenText != "_T")
+    return false;
+
+  const char *Start = Macro->TokenText.data();
+  const char *End = Last->TokenText.data() + Last->TokenText.size();
+  String->TokenText = StringRef(Start, End - Start);
+  String->IsFirst = Macro->IsFirst;
+  String->LastNewlineOffset = Macro->LastNewlineOffset;
+  String->WhitespaceRange = Macro->WhitespaceRange;
+  String->OriginalColumn = Macro->OriginalColumn;
+  String->ColumnWidth = encoding::columnWidthWithTabs(
+      String->TokenText, String->OriginalColumn, Style.TabWidth, Encoding);
+  String->NewlinesBefore = Macro->NewlinesBefore;
+  String->HasUnescapedNewline = Macro->HasUnescapedNewline;
+
+  Tokens.pop_back();
+  Tokens.pop_back();
+  Tokens.pop_back();
+  Tokens.back() = String;
+  return true;
+}
+
+bool FormatTokenLexer::tryMergeConflictMarkers() {
+  if (Tokens.back()->NewlinesBefore == 0 && Tokens.back()->isNot(tok::eof))
+    return false;
+
+  // Conflict lines look like:
+  // <marker> <text from the vcs>
+  // For example:
+  // >>>>>>> /file/in/file/system at revision 1234
+  //
+  // We merge all tokens in a line that starts with a conflict marker
+  // into a single token with a special token type that the unwrapped line
+  // parser will use to correctly rebuild the underlying code.
+
+  FileID ID;
+  // Get the position of the first token in the line.
+  unsigned FirstInLineOffset;
+  std::tie(ID, FirstInLineOffset) = SourceMgr.getDecomposedLoc(
+      Tokens[FirstInLineIndex]->getStartOfNonWhitespace());
+  StringRef Buffer = SourceMgr.getBuffer(ID)->getBuffer();
+  // Calculate the offset of the start of the current line.
+  auto LineOffset = Buffer.rfind('\n', FirstInLineOffset);
+  if (LineOffset == StringRef::npos) {
+    LineOffset = 0;
+  } else {
+    ++LineOffset;
+  }
+
+  auto FirstSpace = Buffer.find_first_of(" \n", LineOffset);
+  StringRef LineStart;
+  if (FirstSpace == StringRef::npos) {
+    LineStart = Buffer.substr(LineOffset);
+  } else {
+    LineStart = Buffer.substr(LineOffset, FirstSpace - LineOffset);
+  }
+
+  TokenType Type = TT_Unknown;
+  if (LineStart == "<<<<<<<" || LineStart == ">>>>") {
+    Type = TT_ConflictStart;
+  } else if (LineStart == "|||||||" || LineStart == "=======" ||
+             LineStart == "====") {
+    Type = TT_ConflictAlternative;
+  } else if (LineStart == ">>>>>>>" || LineStart == "<<<<") {
+    Type = TT_ConflictEnd;
+  }
+
+  if (Type != TT_Unknown) {
+    FormatToken *Next = Tokens.back();
+
+    Tokens.resize(FirstInLineIndex + 1);
+    // We do not need to build a complete token here, as we will skip it
+    // during parsing anyway (as we must not touch whitespace around conflict
+    // markers).
+    Tokens.back()->Type = Type;
+    Tokens.back()->Tok.setKind(tok::kw___unknown_anytype);
+
+    Tokens.push_back(Next);
+    return true;
+  }
+
+  return false;
+}
+
+FormatToken *FormatTokenLexer::getStashedToken() {
+  // Create a synthesized second '>' or '<' token.
+  Token Tok = FormatTok->Tok;
+  StringRef TokenText = FormatTok->TokenText;
+
+  unsigned OriginalColumn = FormatTok->OriginalColumn;
+  FormatTok = new (Allocator.Allocate()) FormatToken;
+  FormatTok->Tok = Tok;
+  SourceLocation TokLocation =
+      FormatTok->Tok.getLocation().getLocWithOffset(Tok.getLength() - 1);
+  FormatTok->Tok.setLocation(TokLocation);
+  FormatTok->WhitespaceRange = SourceRange(TokLocation, TokLocation);
+  FormatTok->TokenText = TokenText;
+  FormatTok->ColumnWidth = 1;
+  FormatTok->OriginalColumn = OriginalColumn + 1;
+
+  return FormatTok;
+}
+
+FormatToken *FormatTokenLexer::getNextToken() {
+  if (GreaterStashed) {
+    GreaterStashed = false;
+    return getStashedToken();
+  }
+  if (LessStashed) {
+    LessStashed = false;
+    return getStashedToken();
+  }
+
+  FormatTok = new (Allocator.Allocate()) FormatToken;
+  readRawToken(*FormatTok);
+  SourceLocation WhitespaceStart =
+      FormatTok->Tok.getLocation().getLocWithOffset(-TrailingWhitespace);
+  FormatTok->IsFirst = IsFirstToken;
+  IsFirstToken = false;
+
+  // Consume and record whitespace until we find a significant token.
+  unsigned WhitespaceLength = TrailingWhitespace;
+  while (FormatTok->Tok.is(tok::unknown)) {
+    StringRef Text = FormatTok->TokenText;
+    auto EscapesNewline = [&](int pos) {
+      // A '\r' here is just part of '\r\n'. Skip it.
+      if (pos >= 0 && Text[pos] == '\r')
+        --pos;
+      // See whether there is an odd number of '\' before this.
+      unsigned count = 0;
+      for (; pos >= 0; --pos, ++count)
+        if (Text[pos] != '\\')
+          break;
+      return count & 1;
+    };
+    // FIXME: This miscounts tok:unknown tokens that are not just
+    // whitespace, e.g. a '`' character.
+    for (int i = 0, e = Text.size(); i != e; ++i) {
+      switch (Text[i]) {
+      case '\n':
+        ++FormatTok->NewlinesBefore;
+        FormatTok->HasUnescapedNewline = !EscapesNewline(i - 1);
+        FormatTok->LastNewlineOffset = WhitespaceLength + i + 1;
+        Column = 0;
+        break;
+      case '\r':
+        FormatTok->LastNewlineOffset = WhitespaceLength + i + 1;
+        Column = 0;
+        break;
+      case '\f':
+      case '\v':
+        Column = 0;
+        break;
+      case ' ':
+        ++Column;
+        break;
+      case '\t':
+        Column += Style.TabWidth - Column % Style.TabWidth;
+        break;
+      case '\\':
+        if (i + 1 == e || (Text[i + 1] != '\r' && Text[i + 1] != '\n'))
+          FormatTok->Type = TT_ImplicitStringLiteral;
+        break;
+      default:
+        FormatTok->Type = TT_ImplicitStringLiteral;
+        break;
+      }
+      if (FormatTok->Type == TT_ImplicitStringLiteral)
+        break;
+    }
+
+    if (FormatTok->is(TT_ImplicitStringLiteral))
+      break;
+    WhitespaceLength += FormatTok->Tok.getLength();
+
+    readRawToken(*FormatTok);
+  }
+
+  // In case the token starts with escaped newlines, we want to
+  // take them into account as whitespace - this pattern is quite frequent
+  // in macro definitions.
+  // FIXME: Add a more explicit test.
+  while (FormatTok->TokenText.size() > 1 && FormatTok->TokenText[0] == '\\' &&
+         FormatTok->TokenText[1] == '\n') {
+    ++FormatTok->NewlinesBefore;
+    WhitespaceLength += 2;
+    FormatTok->LastNewlineOffset = 2;
+    Column = 0;
+    FormatTok->TokenText = FormatTok->TokenText.substr(2);
+  }
+
+  FormatTok->WhitespaceRange = SourceRange(
+      WhitespaceStart, WhitespaceStart.getLocWithOffset(WhitespaceLength));
+
+  FormatTok->OriginalColumn = Column;
+
+  TrailingWhitespace = 0;
+  if (FormatTok->Tok.is(tok::comment)) {
+    // FIXME: Add the trimmed whitespace to Column.
+    StringRef UntrimmedText = FormatTok->TokenText;
+    FormatTok->TokenText = FormatTok->TokenText.rtrim(" \t\v\f");
+    TrailingWhitespace = UntrimmedText.size() - FormatTok->TokenText.size();
+  } else if (FormatTok->Tok.is(tok::raw_identifier)) {
+    IdentifierInfo &Info = IdentTable.get(FormatTok->TokenText);
+    FormatTok->Tok.setIdentifierInfo(&Info);
+    FormatTok->Tok.setKind(Info.getTokenID());
+    if (Style.Language == FormatStyle::LK_Java &&
+        FormatTok->isOneOf(tok::kw_struct, tok::kw_union, tok::kw_delete,
+                           tok::kw_operator)) {
+      FormatTok->Tok.setKind(tok::identifier);
+      FormatTok->Tok.setIdentifierInfo(nullptr);
+    } else if (Style.Language == FormatStyle::LK_JavaScript &&
+               FormatTok->isOneOf(tok::kw_struct, tok::kw_union,
+                                  tok::kw_operator)) {
+      FormatTok->Tok.setKind(tok::identifier);
+      FormatTok->Tok.setIdentifierInfo(nullptr);
+    }
+  } else if (FormatTok->Tok.is(tok::greatergreater)) {
+    FormatTok->Tok.setKind(tok::greater);
+    FormatTok->TokenText = FormatTok->TokenText.substr(0, 1);
+    GreaterStashed = true;
+  } else if (FormatTok->Tok.is(tok::lessless)) {
+    FormatTok->Tok.setKind(tok::less);
+    FormatTok->TokenText = FormatTok->TokenText.substr(0, 1);
+    LessStashed = true;
+  }
+
+  // Now FormatTok is the next non-whitespace token.
+
+  StringRef Text = FormatTok->TokenText;
+  size_t FirstNewlinePos = Text.find('\n');
+  if (FirstNewlinePos == StringRef::npos) {
+    // FIXME: ColumnWidth actually depends on the start column, we need to
+    // take this into account when the token is moved.
+    FormatTok->ColumnWidth =
+        encoding::columnWidthWithTabs(Text, Column, Style.TabWidth, Encoding);
+    Column += FormatTok->ColumnWidth;
+  } else {
+    FormatTok->IsMultiline = true;
+    // FIXME: ColumnWidth actually depends on the start column, we need to
+    // take this into account when the token is moved.
+    FormatTok->ColumnWidth = encoding::columnWidthWithTabs(
+        Text.substr(0, FirstNewlinePos), Column, Style.TabWidth, Encoding);
+
+    // The last line of the token always starts in column 0.
+    // Thus, the length can be precomputed even in the presence of tabs.
+    FormatTok->LastLineColumnWidth = encoding::columnWidthWithTabs(
+        Text.substr(Text.find_last_of('\n') + 1), 0, Style.TabWidth, Encoding);
+    Column = FormatTok->LastLineColumnWidth;
+  }
+
+  if (Style.Language == FormatStyle::LK_Cpp) {
+    if (!(Tokens.size() > 0 && Tokens.back()->Tok.getIdentifierInfo() &&
+          Tokens.back()->Tok.getIdentifierInfo()->getPPKeywordID() ==
+              tok::pp_define) &&
+        std::find(ForEachMacros.begin(), ForEachMacros.end(),
+                  FormatTok->Tok.getIdentifierInfo()) != ForEachMacros.end()) {
+      FormatTok->Type = TT_ForEachMacro;
+    } else if (FormatTok->is(tok::identifier)) {
+      if (MacroBlockBeginRegex.match(Text)) {
+        FormatTok->Type = TT_MacroBlockBegin;
+      } else if (MacroBlockEndRegex.match(Text)) {
+        FormatTok->Type = TT_MacroBlockEnd;
+      }
+    }
+  }
+
+  return FormatTok;
+}
+
+void FormatTokenLexer::readRawToken(FormatToken &Tok) {
+  Lex->LexFromRawLexer(Tok.Tok);
+  Tok.TokenText = StringRef(SourceMgr.getCharacterData(Tok.Tok.getLocation()),
+                            Tok.Tok.getLength());
+  // For formatting, treat unterminated string literals like normal string
+  // literals.
+  if (Tok.is(tok::unknown)) {
+    if (!Tok.TokenText.empty() && Tok.TokenText[0] == '"') {
+      Tok.Tok.setKind(tok::string_literal);
+      Tok.IsUnterminatedLiteral = true;
+    } else if (Style.Language == FormatStyle::LK_JavaScript &&
+               Tok.TokenText == "''") {
+      Tok.Tok.setKind(tok::string_literal);
+    }
+  }
+
+  if (Style.Language == FormatStyle::LK_JavaScript &&
+      Tok.is(tok::char_constant)) {
+    Tok.Tok.setKind(tok::string_literal);
+  }
+
+  if (Tok.is(tok::comment) && (Tok.TokenText == "// clang-format on" ||
+                               Tok.TokenText == "/* clang-format on */")) {
+    FormattingDisabled = false;
+  }
+
+  Tok.Finalized = FormattingDisabled;
+
+  if (Tok.is(tok::comment) && (Tok.TokenText == "// clang-format off" ||
+                               Tok.TokenText == "/* clang-format off */")) {
+    FormattingDisabled = true;
+  }
+}
+
+void FormatTokenLexer::resetLexer(unsigned Offset) {
+  StringRef Buffer = SourceMgr.getBufferData(ID);
+  Lex.reset(new Lexer(SourceMgr.getLocForStartOfFile(ID),
+                      getFormattingLangOpts(Style), Buffer.begin(),
+                      Buffer.begin() + Offset, Buffer.end()));
+  Lex->SetKeepWhitespaceMode(true);
+  TrailingWhitespace = 0;
+}
+
+} // namespace format
+} // namespace clang
diff --git a/contrib/llvm/tools/clang/lib/Format/FormatTokenLexer.h b/contrib/llvm/tools/clang/lib/Format/FormatTokenLexer.h
new file mode 100644
index 000000000000..fa8c8882574f
--- /dev/null
+++ b/contrib/llvm/tools/clang/lib/Format/FormatTokenLexer.h
@@ -0,0 +1,97 @@
+//===--- FormatTokenLexer.h - Format C++ code ----------------*- C++ ----*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file contains FormatTokenLexer, which tokenizes a source file
+/// into a token stream suitable for ClangFormat.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_LIB_FORMAT_FORMATTOKENLEXER_H
+#define LLVM_CLANG_LIB_FORMAT_FORMATTOKENLEXER_H
+
+#include "Encoding.h"
+#include "FormatToken.h"
+#include "clang/Basic/SourceLocation.h"
+#include "clang/Basic/SourceManager.h"
+#include "clang/Format/Format.h"
+#include "llvm/Support/Regex.h"
+
+namespace clang {
+namespace format {
+
+class FormatTokenLexer {
+public:
+  FormatTokenLexer(const SourceManager &SourceMgr, FileID ID,
+                   const FormatStyle &Style, encoding::Encoding Encoding);
+
+  ArrayRef<FormatToken *> lex();
+
+  const AdditionalKeywords &getKeywords() { return Keywords; }
+
+private:
+  void tryMergePreviousTokens();
+
+  bool tryMergeLessLess();
+
+  bool tryMergeTokens(ArrayRef<tok::TokenKind> Kinds, TokenType NewType);
+
+  // Returns \c true if \p Tok can only be followed by an operand in JavaScript.
+  bool precedesOperand(FormatToken *Tok);
+
+  bool canPrecedeRegexLiteral(FormatToken *Prev);
+
+  // Tries to parse a JavaScript Regex literal starting at the current token,
+  // if that begins with a slash and is in a location where JavaScript allows
+  // regex literals. Changes the current token to a regex literal and updates
+  // its text if successful.
+  void tryParseJSRegexLiteral();
+
+  void tryParseTemplateString();
+
+  bool tryMerge_TMacro();
+
+  bool tryMergeConflictMarkers();
+
+  FormatToken *getStashedToken();
+
+  FormatToken *getNextToken();
+
+  FormatToken *FormatTok;
+  bool IsFirstToken;
+  bool GreaterStashed, LessStashed;
+  unsigned Column;
+  unsigned TrailingWhitespace;
+  std::unique_ptr<Lexer> Lex;
+  const SourceManager &SourceMgr;
+  FileID ID;
+  const FormatStyle &Style;
+  IdentifierTable IdentTable;
+  AdditionalKeywords Keywords;
+  encoding::Encoding Encoding;
+  llvm::SpecificBumpPtrAllocator<FormatToken> Allocator;
+  // Index (in 'Tokens') of the last token that starts a new line.
+  unsigned FirstInLineIndex;
+  SmallVector<FormatToken *, 16> Tokens;
+  SmallVector<IdentifierInfo *, 8> ForEachMacros;
+
+  bool FormattingDisabled;
+
+  llvm::Regex MacroBlockBeginRegex;
+  llvm::Regex MacroBlockEndRegex;
+
+  void readRawToken(FormatToken &Tok);
+
+  void resetLexer(unsigned Offset);
+};
+
+} // namespace format
+} // namespace clang
+
+#endif
diff --git a/contrib/llvm/tools/clang/lib/Format/SortJavaScriptImports.cpp b/contrib/llvm/tools/clang/lib/Format/SortJavaScriptImports.cpp
new file mode 100644
index 000000000000..32d5d756a3f0
--- /dev/null
+++ b/contrib/llvm/tools/clang/lib/Format/SortJavaScriptImports.cpp
@@ -0,0 +1,442 @@
+//===--- SortJavaScriptImports.h - Sort ES6 Imports -------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file implements a sort operation for JavaScript ES6 imports.
+///
+//===----------------------------------------------------------------------===//
+
+#include "SortJavaScriptImports.h"
+#include "SortJavaScriptImports.h"
+#include "TokenAnalyzer.h"
+#include "TokenAnnotator.h"
+#include "clang/Basic/Diagnostic.h"
+#include "clang/Basic/DiagnosticOptions.h"
+#include "clang/Basic/LLVM.h"
+#include "clang/Basic/SourceLocation.h"
+#include "clang/Basic/SourceManager.h"
+#include "clang/Format/Format.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/Debug.h"
+#include <algorithm>
+#include <string>
+
+#define DEBUG_TYPE "format-formatter"
+
+namespace clang {
+namespace format {
+
+class FormatTokenLexer;
+
+using clang::format::FormatStyle;
+
+// An imported symbol in a JavaScript ES6 import/export, possibly aliased.
+struct JsImportedSymbol {
+  StringRef Symbol;
+  StringRef Alias;
+  SourceRange Range;
+
+  bool operator==(const JsImportedSymbol &RHS) const {
+    // Ignore Range for comparison, it is only used to stitch code together,
+    // but imports at different code locations are still conceptually the same.
+    return Symbol == RHS.Symbol && Alias == RHS.Alias;
+  }
+};
+
+// An ES6 module reference.
+//
+// ES6 implements a module system, where individual modules (~= source files)
+// can reference other modules, either importing symbols from them, or exporting
+// symbols from them:
+//   import {foo} from 'foo';
+//   export {foo};
+//   export {bar} from 'bar';
+//
+// `export`s with URLs are syntactic sugar for an import of the symbol from the
+// URL, followed by an export of the symbol, allowing this code to treat both
+// statements more or less identically, with the exception being that `export`s
+// are sorted last.
+//
+// imports and exports support individual symbols, but also a wildcard syntax:
+//   import * as prefix from 'foo';
+//   export * from 'bar';
+//
+// This struct represents both exports and imports to build up the information
+// required for sorting module references.
+struct JsModuleReference {
+  bool IsExport = false;
+  // Module references are sorted into these categories, in order.
+  enum ReferenceCategory {
+    SIDE_EFFECT,     // "import 'something';"
+    ABSOLUTE,        // from 'something'
+    RELATIVE_PARENT, // from '../*'
+    RELATIVE,        // from './*'
+  };
+  ReferenceCategory Category = ReferenceCategory::SIDE_EFFECT;
+  // The URL imported, e.g. `import .. from 'url';`. Empty for `export {a, b};`.
+  StringRef URL;
+  // Prefix from "import * as prefix". Empty for symbol imports and `export *`.
+  // Implies an empty names list.
+  StringRef Prefix;
+  // Symbols from `import {SymbolA, SymbolB, ...} from ...;`.
+  SmallVector<JsImportedSymbol, 1> Symbols;
+  // Textual position of the import/export, including preceding and trailing
+  // comments.
+  SourceRange Range;
+};
+
+bool operator<(const JsModuleReference &LHS, const JsModuleReference &RHS) {
+  if (LHS.IsExport != RHS.IsExport)
+    return LHS.IsExport < RHS.IsExport;
+  if (LHS.Category != RHS.Category)
+    return LHS.Category < RHS.Category;
+  if (LHS.Category == JsModuleReference::ReferenceCategory::SIDE_EFFECT)
+    // Side effect imports might be ordering sensitive. Consider them equal so
+    // that they maintain their relative order in the stable sort below.
+    // This retains transitivity because LHS.Category == RHS.Category here.
+    return false;
+  // Empty URLs sort *last* (for export {...};).
+  if (LHS.URL.empty() != RHS.URL.empty())
+    return LHS.URL.empty() < RHS.URL.empty();
+  if (int Res = LHS.URL.compare_lower(RHS.URL))
+    return Res < 0;
+  // '*' imports (with prefix) sort before {a, b, ...} imports.
+  if (LHS.Prefix.empty() != RHS.Prefix.empty())
+    return LHS.Prefix.empty() < RHS.Prefix.empty();
+  if (LHS.Prefix != RHS.Prefix)
+    return LHS.Prefix > RHS.Prefix;
+  return false;
+}
+
+// JavaScriptImportSorter sorts JavaScript ES6 imports and exports. It is
+// implemented as a TokenAnalyzer because ES6 imports have substantial syntactic
+// structure, making it messy to sort them using regular expressions.
+class JavaScriptImportSorter : public TokenAnalyzer {
+public:
+  JavaScriptImportSorter(const Environment &Env, const FormatStyle &Style)
+      : TokenAnalyzer(Env, Style),
+        FileContents(Env.getSourceManager().getBufferData(Env.getFileID())) {}
+
+  tooling::Replacements
+  analyze(TokenAnnotator &Annotator,
+          SmallVectorImpl<AnnotatedLine *> &AnnotatedLines,
+          FormatTokenLexer &Tokens, tooling::Replacements &Result) override {
+    AffectedRangeMgr.computeAffectedLines(AnnotatedLines.begin(),
+                                          AnnotatedLines.end());
+
+    const AdditionalKeywords &Keywords = Tokens.getKeywords();
+    SmallVector<JsModuleReference, 16> References;
+    AnnotatedLine *FirstNonImportLine;
+    std::tie(References, FirstNonImportLine) =
+        parseModuleReferences(Keywords, AnnotatedLines);
+
+    if (References.empty())
+      return Result;
+
+    SmallVector<unsigned, 16> Indices;
+    for (unsigned i = 0, e = References.size(); i != e; ++i)
+      Indices.push_back(i);
+    std::stable_sort(Indices.begin(), Indices.end(),
+                     [&](unsigned LHSI, unsigned RHSI) {
+                       return References[LHSI] < References[RHSI];
+                     });
+    bool ReferencesInOrder = std::is_sorted(Indices.begin(), Indices.end());
+
+    std::string ReferencesText;
+    bool SymbolsInOrder = true;
+    for (unsigned i = 0, e = Indices.size(); i != e; ++i) {
+      JsModuleReference Reference = References[Indices[i]];
+      if (appendReference(ReferencesText, Reference))
+        SymbolsInOrder = false;
+      if (i + 1 < e) {
+        // Insert breaks between imports and exports.
+        ReferencesText += "\n";
+        // Separate imports groups with two line breaks, but keep all exports
+        // in a single group.
+        if (!Reference.IsExport &&
+            (Reference.IsExport != References[Indices[i + 1]].IsExport ||
+             Reference.Category != References[Indices[i + 1]].Category))
+          ReferencesText += "\n";
+      }
+    }
+
+    if (ReferencesInOrder && SymbolsInOrder)
+      return Result;
+
+    SourceRange InsertionPoint = References[0].Range;
+    InsertionPoint.setEnd(References[References.size() - 1].Range.getEnd());
+
+    // The loop above might collapse previously existing line breaks between
+    // import blocks, and thus shrink the file. SortIncludes must not shrink
+    // overall source length as there is currently no re-calculation of ranges
+    // after applying source sorting.
+    // This loop just backfills trailing spaces after the imports, which are
+    // harmless and will be stripped by the subsequent formatting pass.
+    // FIXME: A better long term fix is to re-calculate Ranges after sorting.
+    unsigned PreviousSize = getSourceText(InsertionPoint).size();
+    while (ReferencesText.size() < PreviousSize) {
+      ReferencesText += " ";
+    }
+
+    // Separate references from the main code body of the file.
+    if (FirstNonImportLine && FirstNonImportLine->First->NewlinesBefore < 2)
+      ReferencesText += "\n";
+
+    DEBUG(llvm::dbgs() << "Replacing imports:\n"
+                       << getSourceText(InsertionPoint) << "\nwith:\n"
+                       << ReferencesText << "\n");
+    Result.insert(tooling::Replacement(
+        Env.getSourceManager(), CharSourceRange::getCharRange(InsertionPoint),
+        ReferencesText));
+
+    return Result;
+  }
+
+private:
+  FormatToken *Current;
+  FormatToken *LineEnd;
+
+  FormatToken invalidToken;
+
+  StringRef FileContents;
+
+  void skipComments() { Current = skipComments(Current); }
+
+  FormatToken *skipComments(FormatToken *Tok) {
+    while (Tok && Tok->is(tok::comment))
+      Tok = Tok->Next;
+    return Tok;
+  }
+
+  void nextToken() {
+    Current = Current->Next;
+    skipComments();
+    if (!Current || Current == LineEnd->Next) {
+      // Set the current token to an invalid token, so that further parsing on
+      // this line fails.
+      invalidToken.Tok.setKind(tok::unknown);
+      Current = &invalidToken;
+    }
+  }
+
+  StringRef getSourceText(SourceRange Range) {
+    return getSourceText(Range.getBegin(), Range.getEnd());
+  }
+
+  StringRef getSourceText(SourceLocation Begin, SourceLocation End) {
+    const SourceManager &SM = Env.getSourceManager();
+    return FileContents.substr(SM.getFileOffset(Begin),
+                               SM.getFileOffset(End) - SM.getFileOffset(Begin));
+  }
+
+  // Appends ``Reference`` to ``Buffer``, returning true if text within the
+  // ``Reference`` changed (e.g. symbol order).
+  bool appendReference(std::string &Buffer, JsModuleReference &Reference) {
+    // Sort the individual symbols within the import.
+    // E.g. `import {b, a} from 'x';` -> `import {a, b} from 'x';`
+    SmallVector<JsImportedSymbol, 1> Symbols = Reference.Symbols;
+    std::stable_sort(
+        Symbols.begin(), Symbols.end(),
+        [&](const JsImportedSymbol &LHS, const JsImportedSymbol &RHS) {
+          return LHS.Symbol.compare_lower(RHS.Symbol) < 0;
+        });
+    if (Symbols == Reference.Symbols) {
+      // No change in symbol order.
+      StringRef ReferenceStmt = getSourceText(Reference.Range);
+      Buffer += ReferenceStmt;
+      return false;
+    }
+    // Stitch together the module reference start...
+    SourceLocation SymbolsStart = Reference.Symbols.front().Range.getBegin();
+    SourceLocation SymbolsEnd = Reference.Symbols.back().Range.getEnd();
+    Buffer += getSourceText(Reference.Range.getBegin(), SymbolsStart);
+    // ... then the references in order ...
+    for (auto I = Symbols.begin(), E = Symbols.end(); I != E; ++I) {
+      if (I != Symbols.begin())
+        Buffer += ",";
+      Buffer += getSourceText(I->Range);
+    }
+    // ... followed by the module reference end.
+    Buffer += getSourceText(SymbolsEnd, Reference.Range.getEnd());
+    return true;
+  }
+
+  // Parses module references in the given lines. Returns the module references,
+  // and a pointer to the first "main code" line if that is adjacent to the
+  // affected lines of module references, nullptr otherwise.
+  std::pair<SmallVector<JsModuleReference, 16>, AnnotatedLine*>
+  parseModuleReferences(const AdditionalKeywords &Keywords,
+                        SmallVectorImpl<AnnotatedLine *> &AnnotatedLines) {
+    SmallVector<JsModuleReference, 16> References;
+    SourceLocation Start;
+    bool FoundLines = false;
+    AnnotatedLine *FirstNonImportLine = nullptr;
+    for (auto Line : AnnotatedLines) {
+      if (!Line->Affected) {
+        // Only sort the first contiguous block of affected lines.
+        if (FoundLines)
+          break;
+        else
+          continue;
+      }
+      Current = Line->First;
+      LineEnd = Line->Last;
+      skipComments();
+      if (Start.isInvalid() || References.empty())
+        // After the first file level comment, consider line comments to be part
+        // of the import that immediately follows them by using the previously
+        // set Start.
+        Start = Line->First->Tok.getLocation();
+      if (!Current)
+        continue; // Only comments on this line.
+      FoundLines = true;
+      JsModuleReference Reference;
+      Reference.Range.setBegin(Start);
+      if (!parseModuleReference(Keywords, Reference)) {
+        FirstNonImportLine = Line;
+        break;
+      }
+      Reference.Range.setEnd(LineEnd->Tok.getEndLoc());
+      DEBUG({
+        llvm::dbgs() << "JsModuleReference: {"
+                     << "is_export: " << Reference.IsExport
+                     << ", cat: " << Reference.Category
+                     << ", url: " << Reference.URL
+                     << ", prefix: " << Reference.Prefix;
+        for (size_t i = 0; i < Reference.Symbols.size(); ++i)
+          llvm::dbgs() << ", " << Reference.Symbols[i].Symbol << " as "
+                       << Reference.Symbols[i].Alias;
+        llvm::dbgs() << ", text: " << getSourceText(Reference.Range);
+        llvm::dbgs() << "}\n";
+      });
+      References.push_back(Reference);
+      Start = SourceLocation();
+    }
+    return std::make_pair(References, FirstNonImportLine);
+  }
+
+  // Parses a JavaScript/ECMAScript 6 module reference.
+  // See http://www.ecma-international.org/ecma-262/6.0/#sec-scripts-and-modules
+  // for grammar EBNF (production ModuleItem).
+  bool parseModuleReference(const AdditionalKeywords &Keywords,
+                            JsModuleReference &Reference) {
+    if (!Current || !Current->isOneOf(Keywords.kw_import, tok::kw_export))
+      return false;
+    Reference.IsExport = Current->is(tok::kw_export);
+
+    nextToken();
+    if (Current->isStringLiteral() && !Reference.IsExport) {
+      // "import 'side-effect';"
+      Reference.Category = JsModuleReference::ReferenceCategory::SIDE_EFFECT;
+      Reference.URL =
+          Current->TokenText.substr(1, Current->TokenText.size() - 2);
+      return true;
+    }
+
+    if (!parseModuleBindings(Keywords, Reference))
+      return false;
+    nextToken();
+
+    if (Current->is(Keywords.kw_from)) {
+      // imports have a 'from' clause, exports might not.
+      nextToken();
+      if (!Current->isStringLiteral())
+        return false;
+      // URL = TokenText without the quotes.
+      Reference.URL =
+          Current->TokenText.substr(1, Current->TokenText.size() - 2);
+      if (Reference.URL.startswith(".."))
+        Reference.Category =
+            JsModuleReference::ReferenceCategory::RELATIVE_PARENT;
+      else if (Reference.URL.startswith("."))
+        Reference.Category = JsModuleReference::ReferenceCategory::RELATIVE;
+      else
+        Reference.Category = JsModuleReference::ReferenceCategory::ABSOLUTE;
+    } else {
+      // w/o URL groups with "empty".
+      Reference.Category = JsModuleReference::ReferenceCategory::RELATIVE;
+    }
+    return true;
+  }
+
+  bool parseModuleBindings(const AdditionalKeywords &Keywords,
+                           JsModuleReference &Reference) {
+    if (parseStarBinding(Keywords, Reference))
+      return true;
+    return parseNamedBindings(Keywords, Reference);
+  }
+
+  bool parseStarBinding(const AdditionalKeywords &Keywords,
+                        JsModuleReference &Reference) {
+    // * as prefix from '...';
+    if (Current->isNot(tok::star))
+      return false;
+    nextToken();
+    if (Current->isNot(Keywords.kw_as))
+      return false;
+    nextToken();
+    if (Current->isNot(tok::identifier))
+      return false;
+    Reference.Prefix = Current->TokenText;
+    return true;
+  }
+
+  bool parseNamedBindings(const AdditionalKeywords &Keywords,
+                          JsModuleReference &Reference) {
+    if (Current->isNot(tok::l_brace))
+      return false;
+
+    // {sym as alias, sym2 as ...} from '...';
+    nextToken();
+    while (true) {
+      if (Current->is(tok::r_brace))
+        return true;
+      if (Current->isNot(tok::identifier))
+        return false;
+
+      JsImportedSymbol Symbol;
+      Symbol.Symbol = Current->TokenText;
+      // Make sure to include any preceding comments.
+      Symbol.Range.setBegin(
+          Current->getPreviousNonComment()->Next->WhitespaceRange.getBegin());
+      nextToken();
+
+      if (Current->is(Keywords.kw_as)) {
+        nextToken();
+        if (Current->isNot(tok::identifier))
+          return false;
+        Symbol.Alias = Current->TokenText;
+        nextToken();
+      }
+      Symbol.Range.setEnd(Current->Tok.getLocation());
+      Reference.Symbols.push_back(Symbol);
+
+      if (Current->is(tok::r_brace))
+        return true;
+      if (Current->isNot(tok::comma))
+        return false;
+      nextToken();
+    }
+  }
+};
+
+tooling::Replacements sortJavaScriptImports(const FormatStyle &Style,
+                                            StringRef Code,
+                                            ArrayRef<tooling::Range> Ranges,
+                                            StringRef FileName) {
+  // FIXME: Cursor support.
+  std::unique_ptr<Environment> Env =
+      Environment::CreateVirtualEnvironment(Code, FileName, Ranges);
+  JavaScriptImportSorter Sorter(*Env, Style);
+  return Sorter.process();
+}
+
+} // end namespace format
+} // end namespace clang
diff --git a/contrib/llvm/tools/clang/lib/Format/SortJavaScriptImports.h b/contrib/llvm/tools/clang/lib/Format/SortJavaScriptImports.h
new file mode 100644
index 000000000000..f22a051008f0
--- /dev/null
+++ b/contrib/llvm/tools/clang/lib/Format/SortJavaScriptImports.h
@@ -0,0 +1,36 @@
+//===--- SortJavaScriptImports.h - Sort ES6 Imports -------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file implements a sorter for JavaScript ES6 imports.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_LIB_FORMAT_SORTJAVASCRIPTIMPORTS_H
+#define LLVM_CLANG_LIB_FORMAT_SORTJAVASCRIPTIMPORTS_H
+
+#include "clang/Basic/LLVM.h"
+#include "clang/Format/Format.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/StringRef.h"
+
+namespace clang {
+namespace format {
+
+// Sort JavaScript ES6 imports/exports in ``Code``. The generated replacements
+// only monotonically increase the length of the given code.
+tooling::Replacements sortJavaScriptImports(const FormatStyle &Style,
+                                            StringRef Code,
+                                            ArrayRef<tooling::Range> Ranges,
+                                            StringRef FileName);
+
+} // end namespace format
+} // end namespace clang
+
+#endif
diff --git a/contrib/llvm/tools/clang/lib/Format/TokenAnalyzer.cpp b/contrib/llvm/tools/clang/lib/Format/TokenAnalyzer.cpp
new file mode 100644
index 000000000000..89ac35f3e842
--- /dev/null
+++ b/contrib/llvm/tools/clang/lib/Format/TokenAnalyzer.cpp
@@ -0,0 +1,138 @@
+//===--- TokenAnalyzer.cpp - Analyze Token Streams --------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file implements an abstract TokenAnalyzer and associated helper
+/// classes. TokenAnalyzer can be extended to generate replacements based on
+/// an annotated and pre-processed token stream.
+///
+//===----------------------------------------------------------------------===//
+
+#include "TokenAnalyzer.h"
+#include "AffectedRangeManager.h"
+#include "Encoding.h"
+#include "FormatToken.h"
+#include "FormatTokenLexer.h"
+#include "TokenAnnotator.h"
+#include "UnwrappedLineParser.h"
+#include "clang/Basic/Diagnostic.h"
+#include "clang/Basic/DiagnosticOptions.h"
+#include "clang/Basic/FileManager.h"
+#include "clang/Basic/SourceManager.h"
+#include "clang/Format/Format.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/Debug.h"
+
+#define DEBUG_TYPE "format-formatter"
+
+namespace clang {
+namespace format {
+
+// This sets up an virtual file system with file \p FileName containing \p
+// Code.
+std::unique_ptr<Environment>
+Environment::CreateVirtualEnvironment(StringRef Code, StringRef FileName,
+                                      ArrayRef<tooling::Range> Ranges) {
+  // This is referenced by `FileMgr` and will be released by `FileMgr` when it
+  // is deleted.
+  IntrusiveRefCntPtr<vfs::InMemoryFileSystem> InMemoryFileSystem(
+      new vfs::InMemoryFileSystem);
+  // This is passed to `SM` as reference, so the pointer has to be referenced
+  // in `Environment` so that `FileMgr` can out-live this function scope.
+  std::unique_ptr<FileManager> FileMgr(
+      new FileManager(FileSystemOptions(), InMemoryFileSystem));
+  // This is passed to `SM` as reference, so the pointer has to be referenced
+  // by `Environment` due to the same reason above.
+  std::unique_ptr<DiagnosticsEngine> Diagnostics(new DiagnosticsEngine(
+      IntrusiveRefCntPtr<DiagnosticIDs>(new DiagnosticIDs),
+      new DiagnosticOptions));
+  // This will be stored as reference, so the pointer has to be stored in
+  // due to the same reason above.
+  std::unique_ptr<SourceManager> VirtualSM(
+      new SourceManager(*Diagnostics, *FileMgr));
+  InMemoryFileSystem->addFile(
+      FileName, 0, llvm::MemoryBuffer::getMemBuffer(
+                       Code, FileName, /*RequiresNullTerminator=*/false));
+  FileID ID = VirtualSM->createFileID(FileMgr->getFile(FileName),
+                                      SourceLocation(), clang::SrcMgr::C_User);
+  assert(ID.isValid());
+  SourceLocation StartOfFile = VirtualSM->getLocForStartOfFile(ID);
+  std::vector<CharSourceRange> CharRanges;
+  for (const tooling::Range &Range : Ranges) {
+    SourceLocation Start = StartOfFile.getLocWithOffset(Range.getOffset());
+    SourceLocation End = Start.getLocWithOffset(Range.getLength());
+    CharRanges.push_back(CharSourceRange::getCharRange(Start, End));
+  }
+  return llvm::make_unique<Environment>(ID, std::move(FileMgr),
+                                        std::move(VirtualSM),
+                                        std::move(Diagnostics), CharRanges);
+}
+
+TokenAnalyzer::TokenAnalyzer(const Environment &Env, const FormatStyle &Style)
+    : Style(Style), Env(Env),
+      AffectedRangeMgr(Env.getSourceManager(), Env.getCharRanges()),
+      UnwrappedLines(1),
+      Encoding(encoding::detectEncoding(
+          Env.getSourceManager().getBufferData(Env.getFileID()))) {
+  DEBUG(
+      llvm::dbgs() << "File encoding: "
+                   << (Encoding == encoding::Encoding_UTF8 ? "UTF8" : "unknown")
+                   << "\n");
+  DEBUG(llvm::dbgs() << "Language: " << getLanguageName(Style.Language)
+                     << "\n");
+}
+
+tooling::Replacements TokenAnalyzer::process() {
+  tooling::Replacements Result;
+  FormatTokenLexer Tokens(Env.getSourceManager(), Env.getFileID(), Style,
+                          Encoding);
+
+  UnwrappedLineParser Parser(Style, Tokens.getKeywords(), Tokens.lex(), *this);
+  Parser.parse();
+  assert(UnwrappedLines.rbegin()->empty());
+  for (unsigned Run = 0, RunE = UnwrappedLines.size(); Run + 1 != RunE; ++Run) {
+    DEBUG(llvm::dbgs() << "Run " << Run << "...\n");
+    SmallVector<AnnotatedLine *, 16> AnnotatedLines;
+
+    TokenAnnotator Annotator(Style, Tokens.getKeywords());
+    for (unsigned i = 0, e = UnwrappedLines[Run].size(); i != e; ++i) {
+      AnnotatedLines.push_back(new AnnotatedLine(UnwrappedLines[Run][i]));
+      Annotator.annotate(*AnnotatedLines.back());
+    }
+
+    tooling::Replacements RunResult =
+        analyze(Annotator, AnnotatedLines, Tokens, Result);
+
+    DEBUG({
+      llvm::dbgs() << "Replacements for run " << Run << ":\n";
+      for (tooling::Replacements::iterator I = RunResult.begin(),
+                                           E = RunResult.end();
+           I != E; ++I) {
+        llvm::dbgs() << I->toString() << "\n";
+      }
+    });
+    for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
+      delete AnnotatedLines[i];
+    }
+    Result.insert(RunResult.begin(), RunResult.end());
+  }
+  return Result;
+}
+
+void TokenAnalyzer::consumeUnwrappedLine(const UnwrappedLine &TheLine) {
+  assert(!UnwrappedLines.empty());
+  UnwrappedLines.back().push_back(TheLine);
+}
+
+void TokenAnalyzer::finishRun() {
+  UnwrappedLines.push_back(SmallVector<UnwrappedLine, 16>());
+}
+
+} // end namespace format
+} // end namespace clang
diff --git a/contrib/llvm/tools/clang/lib/Format/TokenAnalyzer.h b/contrib/llvm/tools/clang/lib/Format/TokenAnalyzer.h
new file mode 100644
index 000000000000..c1aa9c594fc3
--- /dev/null
+++ b/contrib/llvm/tools/clang/lib/Format/TokenAnalyzer.h
@@ -0,0 +1,108 @@
+//===--- TokenAnalyzer.h - Analyze Token Streams ----------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file declares an abstract TokenAnalyzer, and associated helper
+/// classes. TokenAnalyzer can be extended to generate replacements based on
+/// an annotated and pre-processed token stream.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_LIB_FORMAT_TOKENANALYZER_H
+#define LLVM_CLANG_LIB_FORMAT_TOKENANALYZER_H
+
+#include "AffectedRangeManager.h"
+#include "Encoding.h"
+#include "FormatToken.h"
+#include "FormatTokenLexer.h"
+#include "TokenAnnotator.h"
+#include "UnwrappedLineParser.h"
+#include "clang/Basic/Diagnostic.h"
+#include "clang/Basic/DiagnosticOptions.h"
+#include "clang/Basic/FileManager.h"
+#include "clang/Basic/SourceManager.h"
+#include "clang/Format/Format.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/Debug.h"
+
+#define DEBUG_TYPE "format-formatter"
+
+namespace clang {
+namespace format {
+
+class Environment {
+public:
+  Environment(SourceManager &SM, FileID ID, ArrayRef<CharSourceRange> Ranges)
+      : ID(ID), CharRanges(Ranges.begin(), Ranges.end()), SM(SM) {}
+
+  Environment(FileID ID, std::unique_ptr<FileManager> FileMgr,
+              std::unique_ptr<SourceManager> VirtualSM,
+              std::unique_ptr<DiagnosticsEngine> Diagnostics,
+              const std::vector<CharSourceRange> &CharRanges)
+      : ID(ID), CharRanges(CharRanges.begin(), CharRanges.end()),
+        SM(*VirtualSM), FileMgr(std::move(FileMgr)),
+        VirtualSM(std::move(VirtualSM)), Diagnostics(std::move(Diagnostics)) {}
+
+  // This sets up an virtual file system with file \p FileName containing \p
+  // Code.
+  static std::unique_ptr<Environment>
+  CreateVirtualEnvironment(StringRef Code, StringRef FileName,
+                           ArrayRef<tooling::Range> Ranges);
+
+  FileID getFileID() const { return ID; }
+
+  StringRef getFileName() const { return FileName; }
+
+  ArrayRef<CharSourceRange> getCharRanges() const { return CharRanges; }
+
+  const SourceManager &getSourceManager() const { return SM; }
+
+private:
+  FileID ID;
+  StringRef FileName;
+  SmallVector<CharSourceRange, 8> CharRanges;
+  SourceManager &SM;
+
+  // The order of these fields are important - they should be in the same order
+  // as they are created in `CreateVirtualEnvironment` so that they can be
+  // deleted in the reverse order as they are created.
+  std::unique_ptr<FileManager> FileMgr;
+  std::unique_ptr<SourceManager> VirtualSM;
+  std::unique_ptr<DiagnosticsEngine> Diagnostics;
+};
+
+class TokenAnalyzer : public UnwrappedLineConsumer {
+public:
+  TokenAnalyzer(const Environment &Env, const FormatStyle &Style);
+
+  tooling::Replacements process();
+
+protected:
+  virtual tooling::Replacements
+  analyze(TokenAnnotator &Annotator,
+          SmallVectorImpl<AnnotatedLine *> &AnnotatedLines,
+          FormatTokenLexer &Tokens, tooling::Replacements &Result) = 0;
+
+  void consumeUnwrappedLine(const UnwrappedLine &TheLine) override;
+
+  void finishRun() override;
+
+  FormatStyle Style;
+  // Stores Style, FileID and SourceManager etc.
+  const Environment &Env;
+  // AffectedRangeMgr stores ranges to be fixed.
+  AffectedRangeManager AffectedRangeMgr;
+  SmallVector<SmallVector<UnwrappedLine, 16>, 2> UnwrappedLines;
+  encoding::Encoding Encoding;
+};
+
+} // end namespace format
+} // end namespace clang
+
+#endif
diff --git a/contrib/llvm/tools/clang/lib/Format/TokenAnnotator.cpp b/contrib/llvm/tools/clang/lib/Format/TokenAnnotator.cpp
index 8fbb43b7996d..4a90522e6e31 100644
--- a/contrib/llvm/tools/clang/lib/Format/TokenAnnotator.cpp
+++ b/contrib/llvm/tools/clang/lib/Format/TokenAnnotator.cpp
@@ -42,11 +42,24 @@ public:
 
 private:
   bool parseAngle() {
-    if (!CurrentToken)
+    if (!CurrentToken || !CurrentToken->Previous)
+      return false;
+    if (NonTemplateLess.count(CurrentToken->Previous))
       return false;
+
+    const FormatToken& Previous = *CurrentToken->Previous;
+    if (Previous.Previous) {
+      if (Previous.Previous->Tok.isLiteral())
+        return false;
+      if (Previous.Previous->is(tok::r_paren) && Contexts.size() > 1 &&
+          (!Previous.Previous->MatchingParen ||
+           !Previous.Previous->MatchingParen->is(TT_OverloadedOperatorLParen)))
+        return false;
+    }
+
     FormatToken *Left = CurrentToken->Previous;
     Left->ParentBracket = Contexts.back().ContextKind;
-    ScopedContextCreator ContextCreator(*this, tok::less, 10);
+    ScopedContextCreator ContextCreator(*this, tok::less, 12);
 
     // If this angle is in the context of an expression, we need to be more
     // hesitant to detect it as opening template parameters.
@@ -121,6 +134,10 @@ private:
 
     if (Left->is(TT_OverloadedOperatorLParen)) {
       Contexts.back().IsExpression = false;
+    } else if (Style.Language == FormatStyle::LK_JavaScript &&
+               Line.startsWith(Keywords.kw_type, tok::identifier)) {
+      // type X = (...);
+      Contexts.back().IsExpression = false;
     } else if (Left->Previous &&
         (Left->Previous->isOneOf(tok::kw_static_assert, tok::kw_decltype,
                                  tok::kw_if, tok::kw_while, tok::l_paren,
@@ -128,6 +145,16 @@ private:
          Left->Previous->is(TT_BinaryOperator))) {
       // static_assert, if and while usually contain expressions.
       Contexts.back().IsExpression = true;
+    } else if (Style.Language == FormatStyle::LK_JavaScript && Left->Previous &&
+               (Left->Previous->is(Keywords.kw_function) ||
+                (Left->Previous->endsSequence(tok::identifier,
+                                              Keywords.kw_function)))) {
+      // function(...) or function f(...)
+      Contexts.back().IsExpression = false;
+    } else if (Style.Language == FormatStyle::LK_JavaScript && Left->Previous &&
+               Left->Previous->is(TT_JsTypeColon)) {
+      // let x: (SomeType);
+      Contexts.back().IsExpression = false;
     } else if (Left->Previous && Left->Previous->is(tok::r_square) &&
                Left->Previous->MatchingParen &&
                Left->Previous->MatchingParen->is(TT_LambdaLSquare)) {
@@ -159,8 +186,8 @@ private:
       Left->Type = TT_ObjCMethodExpr;
     }
 
-    bool MightBeFunctionType = CurrentToken->isOneOf(tok::star, tok::amp) &&
-                               !Contexts[Contexts.size() - 2].IsExpression;
+    bool MightBeFunctionType = !Contexts[Contexts.size() - 2].IsExpression;
+    bool ProbablyFunctionType = CurrentToken->isOneOf(tok::star, tok::amp);
     bool HasMultipleLines = false;
     bool HasMultipleParametersOnALine = false;
     bool MightBeObjCForRangeLoop =
@@ -187,14 +214,15 @@ private:
       if (CurrentToken->Previous->is(TT_PointerOrReference) &&
           CurrentToken->Previous->Previous->isOneOf(tok::l_paren,
                                                     tok::coloncolon))
-        MightBeFunctionType = true;
+        ProbablyFunctionType = true;
+      if (CurrentToken->is(tok::comma))
+        MightBeFunctionType = false;
       if (CurrentToken->Previous->is(TT_BinaryOperator))
         Contexts.back().IsExpression = true;
       if (CurrentToken->is(tok::r_paren)) {
-        if (MightBeFunctionType && CurrentToken->Next &&
+        if (MightBeFunctionType && ProbablyFunctionType && CurrentToken->Next &&
             (CurrentToken->Next->is(tok::l_paren) ||
-             (CurrentToken->Next->is(tok::l_square) &&
-              Line.MustBeDeclaration)))
+             (CurrentToken->Next->is(tok::l_square) && Line.MustBeDeclaration)))
           Left->Type = TT_FunctionTypeLParen;
         Left->MatchingParen = CurrentToken;
         CurrentToken->MatchingParen = Left;
@@ -299,9 +327,9 @@ private:
         Left->Type = TT_JsComputedPropertyName;
       } else if (Style.Language == FormatStyle::LK_Proto ||
                  (Parent &&
-                  Parent->isOneOf(TT_BinaryOperator, tok::at, tok::comma,
-                                  tok::l_paren, tok::l_square, tok::question,
-                                  tok::colon, tok::kw_return,
+                  Parent->isOneOf(TT_BinaryOperator, TT_TemplateCloser, tok::at,
+                                  tok::comma, tok::l_paren, tok::l_square,
+                                  tok::question, tok::colon, tok::kw_return,
                                   // Should only be relevant to JavaScript:
                                   tok::kw_default))) {
         Left->Type = TT_ArrayInitializerLSquare;
@@ -396,7 +424,8 @@ private:
                 (!Contexts.back().ColonIsDictLiteral ||
                  Style.Language != FormatStyle::LK_Cpp)) ||
                Style.Language == FormatStyle::LK_Proto) &&
-              Previous->Tok.getIdentifierInfo())
+              (Previous->Tok.getIdentifierInfo() ||
+               Previous->is(tok::string_literal)))
             Previous->Type = TT_SelectorName;
           if (CurrentToken->is(tok::colon) ||
               Style.Language == FormatStyle::LK_JavaScript)
@@ -410,7 +439,7 @@ private:
   }
 
   void updateParameterCount(FormatToken *Left, FormatToken *Current) {
-    if (Current->is(tok::l_brace) && !Current->is(TT_DictLiteral))
+    if (Current->is(tok::l_brace) && Current->BlockKind == BK_Block)
       ++Left->BlockParameterCount;
     if (Current->is(tok::comma)) {
       ++Left->ParameterCount;
@@ -491,7 +520,7 @@ private:
         Tok->Type = TT_BitFieldColon;
       } else if (Contexts.size() == 1 &&
                  !Line.First->isOneOf(tok::kw_enum, tok::kw_case)) {
-        if (Tok->Previous->is(tok::r_paren))
+        if (Tok->Previous->isOneOf(tok::r_paren, tok::kw_noexcept))
           Tok->Type = TT_CtorInitializerColon;
         else
           Tok->Type = TT_InheritanceColon;
@@ -504,6 +533,14 @@ private:
         Tok->Type = TT_InlineASMColon;
       }
       break;
+    case tok::pipe:
+    case tok::amp:
+      // | and & in declarations/type expressions represent union and
+      // intersection types, respectively.
+      if (Style.Language == FormatStyle::LK_JavaScript &&
+          !Contexts.back().IsExpression)
+        Tok->Type = TT_JsTypeOperator;
+      break;
     case tok::kw_if:
     case tok::kw_while:
       if (CurrentToken && CurrentToken->is(tok::l_paren)) {
@@ -513,6 +550,9 @@ private:
       }
       break;
     case tok::kw_for:
+      if (Style.Language == FormatStyle::LK_JavaScript && Tok->Previous &&
+          Tok->Previous->is(tok::period))
+        break;
       Contexts.back().ColonIsForRangeExpr = true;
       next();
       if (!parseParens())
@@ -550,11 +590,7 @@ private:
         return false;
       break;
     case tok::less:
-      if (!NonTemplateLess.count(Tok) &&
-          (!Tok->Previous ||
-           (!Tok->Previous->Tok.isLiteral() &&
-            !(Tok->Previous->is(tok::r_paren) && Contexts.size() > 1))) &&
-          parseAngle()) {
+      if (parseAngle()) {
         Tok->Type = TT_TemplateOpener;
       } else {
         Tok->Type = TT_BinaryOperator;
@@ -603,7 +639,7 @@ private:
       }
       // Declarations cannot be conditional expressions, this can only be part
       // of a type declaration.
-      if (Line.MustBeDeclaration &&
+      if (Line.MustBeDeclaration && !Contexts.back().IsExpression &&
           Style.Language == FormatStyle::LK_JavaScript)
         break;
       parseConditional();
@@ -666,10 +702,24 @@ private:
   }
 
   LineType parsePreprocessorDirective() {
+    bool IsFirstToken = CurrentToken->IsFirst;
     LineType Type = LT_PreprocessorDirective;
     next();
     if (!CurrentToken)
       return Type;
+
+    if (Style.Language == FormatStyle::LK_JavaScript && IsFirstToken) {
+      // JavaScript files can contain shebang lines of the form:
+      // #!/usr/bin/env node
+      // Treat these like C++ #include directives.
+      while (CurrentToken) {
+        // Tokens cannot be comments here.
+        CurrentToken->Type = TT_ImplicitStringLiteral;
+        next();
+      }
+      return LT_ImportStatement;
+    }
+
     if (CurrentToken->Tok.is(tok::numeric_constant)) {
       CurrentToken->SpacesRequiredBefore = 1;
       return Type;
@@ -745,11 +795,29 @@ public:
 
     bool KeywordVirtualFound = false;
     bool ImportStatement = false;
+
+    // import {...} from '...';
+    if (Style.Language == FormatStyle::LK_JavaScript &&
+        CurrentToken->is(Keywords.kw_import))
+      ImportStatement = true;
+
     while (CurrentToken) {
       if (CurrentToken->is(tok::kw_virtual))
         KeywordVirtualFound = true;
-      if (isImportStatement(*CurrentToken))
-        ImportStatement = true;
+      if (Style.Language == FormatStyle::LK_JavaScript) {
+        // export {...} from '...';
+        // An export followed by "from 'some string';" is a re-export from
+        // another module identified by a URI and is treated as a
+        // LT_ImportStatement (i.e. prevent wraps on it for long URIs).
+        // Just "export {...};" or "export class ..." should not be treated as
+        // an import in this sense.
+        if (Line.First->is(tok::kw_export) &&
+            CurrentToken->is(Keywords.kw_from) && CurrentToken->Next &&
+            CurrentToken->Next->isStringLiteral())
+          ImportStatement = true;
+        if (isClosureImportStatement(*CurrentToken))
+          ImportStatement = true;
+      }
       if (!consumeToken())
         return LT_Invalid;
     }
@@ -769,15 +837,15 @@ public:
   }
 
 private:
-  bool isImportStatement(const FormatToken &Tok) {
+  bool isClosureImportStatement(const FormatToken &Tok) {
     // FIXME: Closure-library specific stuff should not be hard-coded but be
     // configurable.
-    return Style.Language == FormatStyle::LK_JavaScript &&
-           Tok.TokenText == "goog" && Tok.Next && Tok.Next->is(tok::period) &&
+    return Tok.TokenText == "goog" && Tok.Next && Tok.Next->is(tok::period) &&
            Tok.Next->Next && (Tok.Next->Next->TokenText == "module" ||
                               Tok.Next->Next->TokenText == "provide" ||
                               Tok.Next->Next->TokenText == "require" ||
-                              Tok.Next->Next->TokenText == "setTestOnly") &&
+                              Tok.Next->Next->TokenText == "setTestOnly" ||
+                              Tok.Next->Next->TokenText == "forwardDeclare") &&
            Tok.Next->Next->Next && Tok.Next->Next->Next->is(tok::l_paren);
   }
 
@@ -853,6 +921,9 @@ private:
   void modifyContext(const FormatToken &Current) {
     if (Current.getPrecedence() == prec::Assignment &&
         !Line.First->isOneOf(tok::kw_template, tok::kw_using, tok::kw_return) &&
+        // Type aliases use `type X = ...;` in TypeScript.
+        !(Style.Language == FormatStyle::LK_JavaScript &&
+          Line.startsWith(Keywords.kw_type, tok::identifier)) &&
         (!Current.Previous || Current.Previous->isNot(tok::kw_operator))) {
       Contexts.back().IsExpression = true;
       if (!Line.startsWith(TT_UnaryOperator)) {
@@ -882,17 +953,17 @@ private:
       Contexts.back().IsExpression = false;
     } else if (Current.is(TT_LambdaArrow) || Current.is(Keywords.kw_assert)) {
       Contexts.back().IsExpression = Style.Language == FormatStyle::LK_Java;
+    } else if (Current.Previous &&
+               Current.Previous->is(TT_CtorInitializerColon)) {
+      Contexts.back().IsExpression = true;
+      Contexts.back().InCtorInitializer = true;
     } else if (Current.isOneOf(tok::r_paren, tok::greater, tok::comma)) {
       for (FormatToken *Previous = Current.Previous;
            Previous && Previous->isOneOf(tok::star, tok::amp);
            Previous = Previous->Previous)
         Previous->Type = TT_PointerOrReference;
-      if (Line.MustBeDeclaration)
-        Contexts.back().IsExpression = Contexts.front().InCtorInitializer;
-    } else if (Current.Previous &&
-               Current.Previous->is(TT_CtorInitializerColon)) {
-      Contexts.back().IsExpression = true;
-      Contexts.back().InCtorInitializer = true;
+      if (Line.MustBeDeclaration && !Contexts.front().InCtorInitializer)
+        Contexts.back().IsExpression = false;
     } else if (Current.is(tok::kw_new)) {
       Contexts.back().CanBeExpression = false;
     } else if (Current.isOneOf(tok::semi, tok::exclaim)) {
@@ -938,7 +1009,7 @@ private:
       Current.Type = TT_UnaryOperator;
     } else if (Current.is(tok::question)) {
       if (Style.Language == FormatStyle::LK_JavaScript &&
-          Line.MustBeDeclaration) {
+          Line.MustBeDeclaration && !Contexts.back().IsExpression) {
         // In JavaScript, `interface X { foo?(): bar; }` is an optional method
         // on the interface, not a ternary expression.
         Current.Type = TT_JsTypeOptionalQuestion;
@@ -964,7 +1035,8 @@ private:
         Current.Type = TT_CastRParen;
       if (Current.MatchingParen && Current.Next &&
           !Current.Next->isBinaryOperator() &&
-          !Current.Next->isOneOf(tok::semi, tok::colon, tok::l_brace))
+          !Current.Next->isOneOf(tok::semi, tok::colon, tok::l_brace,
+                                 tok::period, tok::arrow, tok::coloncolon))
         if (FormatToken *BeforeParen = Current.MatchingParen->Previous)
           if (BeforeParen->is(tok::identifier) &&
               BeforeParen->TokenText == BeforeParen->TokenText.upper() &&
@@ -1035,6 +1107,9 @@ private:
 
     if (Tok.Previous->isOneOf(TT_LeadingJavaAnnotation, Keywords.kw_instanceof))
       return false;
+    if (Style.Language == FormatStyle::LK_JavaScript &&
+        Tok.Previous->is(Keywords.kw_in))
+      return false;
 
     // Skip "const" as it does not have an influence on whether this is a name.
     FormatToken *PreviousNotConst = Tok.Previous;
@@ -1078,7 +1153,7 @@ private:
 
     FormatToken *LeftOfParens = Tok.MatchingParen->getPreviousNonComment();
     if (LeftOfParens) {
-      // If there is an opening parenthesis left of the current parentheses,
+      // If there is a closing parenthesis left of the current parentheses,
       // look past it as these might be chained casts.
       if (LeftOfParens->is(tok::r_paren)) {
         if (!LeftOfParens->MatchingParen ||
@@ -1097,7 +1172,7 @@ private:
       // Certain other tokens right before the parentheses are also signals that
       // this cannot be a cast.
       if (LeftOfParens->isOneOf(tok::at, tok::r_square, TT_OverloadedOperator,
-                                TT_TemplateCloser))
+                                TT_TemplateCloser, tok::ellipsis))
         return false;
     }
 
@@ -1131,9 +1206,9 @@ private:
     if (!LeftOfParens)
       return false;
 
-    // If the following token is an identifier, this is a cast. All cases where
-    // this can be something else are handled above.
-    if (Tok.Next->is(tok::identifier))
+    // If the following token is an identifier or 'this', this is a cast. All
+    // cases where this can be something else are handled above.
+    if (Tok.Next->isOneOf(tok::identifier, tok::kw_this))
       return true;
 
     if (!Tok.Next->Next)
@@ -1390,11 +1465,15 @@ private:
            Style.Language == FormatStyle::LK_JavaScript) &&
           Current->is(Keywords.kw_instanceof))
         return prec::Relational;
+      if (Style.Language == FormatStyle::LK_JavaScript &&
+          Current->is(Keywords.kw_in))
+        return prec::Relational;
       if (Current->is(TT_BinaryOperator) || Current->is(tok::comma))
         return Current->getPrecedence();
       if (Current->isOneOf(tok::period, tok::arrow))
         return PrecedenceArrowAndPeriod;
-      if (Style.Language == FormatStyle::LK_Java &&
+      if ((Style.Language == FormatStyle::LK_Java ||
+           Style.Language == FormatStyle::LK_JavaScript) &&
           Current->isOneOf(Keywords.kw_extends, Keywords.kw_implements,
                            Keywords.kw_throws))
         return 0;
@@ -1508,7 +1587,8 @@ void TokenAnnotator::annotate(AnnotatedLine &Line) {
 
 // This function heuristically determines whether 'Current' starts the name of a
 // function declaration.
-static bool isFunctionDeclarationName(const FormatToken &Current) {
+static bool isFunctionDeclarationName(const FormatToken &Current,
+                                      const AnnotatedLine &Line) {
   auto skipOperatorName = [](const FormatToken* Next) -> const FormatToken* {
     for (; Next; Next = Next->Next) {
       if (Next->is(TT_OverloadedOperatorLParen))
@@ -1528,6 +1608,7 @@ static bool isFunctionDeclarationName(const FormatToken &Current) {
     return nullptr;
   };
 
+  // Find parentheses of parameter list.
   const FormatToken *Next = Current.Next;
   if (Current.is(tok::kw_operator)) {
     if (Current.Previous && Current.Previous->is(tok::coloncolon))
@@ -1557,14 +1638,22 @@ static bool isFunctionDeclarationName(const FormatToken &Current) {
     }
   }
 
-  if (!Next || !Next->is(tok::l_paren))
+  // Check whether parameter list can be long to a function declaration.
+  if (!Next || !Next->is(tok::l_paren) || !Next->MatchingParen)
     return false;
+  // If the lines ends with "{", this is likely an function definition.
+  if (Line.Last->is(tok::l_brace))
+    return true;
   if (Next->Next == Next->MatchingParen)
+    return true; // Empty parentheses.
+  // If there is an &/&& after the r_paren, this is likely a function.
+  if (Next->MatchingParen->Next &&
+      Next->MatchingParen->Next->is(TT_PointerOrReference))
     return true;
   for (const FormatToken *Tok = Next->Next; Tok && Tok != Next->MatchingParen;
        Tok = Tok->Next) {
     if (Tok->is(tok::kw_const) || Tok->isSimpleTypeSpecifier() ||
-        Tok->isOneOf(TT_PointerOrReference, TT_StartOfName))
+        Tok->isOneOf(TT_PointerOrReference, TT_StartOfName, tok::ellipsis))
       return true;
     if (Tok->isOneOf(tok::l_brace, tok::string_literal, TT_ObjCMethodExpr) ||
         Tok->Tok.isLiteral())
@@ -1610,7 +1699,7 @@ void TokenAnnotator::calculateFormattingInformation(AnnotatedLine &Line) {
   FormatToken *Current = Line.First->Next;
   bool InFunctionDecl = Line.MightBeFunctionDecl;
   while (Current) {
-    if (isFunctionDeclarationName(*Current))
+    if (isFunctionDeclarationName(*Current, Line))
       Current->Type = TT_FunctionDeclarationName;
     if (Current->is(TT_LineComment)) {
       if (Current->Previous->BlockKind == BK_BracedInit &&
@@ -1736,7 +1825,7 @@ unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line,
     if (Style.Language == FormatStyle::LK_Proto)
       return 1;
     if (Left.is(tok::r_square))
-      return 25;
+      return 200;
     // Slightly prefer formatting local lambda definitions like functions.
     if (Right.is(TT_LambdaLSquare) && Left.is(tok::equal))
       return 35;
@@ -1768,6 +1857,8 @@ unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line,
     return 500;
   if (Left.isOneOf(tok::kw_class, tok::kw_struct))
     return 5000;
+  if (Left.is(tok::comment))
+    return 1000;
 
   if (Left.isOneOf(TT_RangeBasedForLoopColon, TT_InheritanceColon))
     return 2;
@@ -1910,15 +2001,14 @@ bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line,
   if (Left.is(tok::less) || Right.isOneOf(tok::greater, tok::less))
     return false;
   if (Right.is(tok::ellipsis))
-    return Left.Tok.isLiteral();
+    return Left.Tok.isLiteral() || (Left.is(tok::identifier) && Left.Previous &&
+                                    Left.Previous->is(tok::kw_case));
   if (Left.is(tok::l_square) && Right.is(tok::amp))
     return false;
   if (Right.is(TT_PointerOrReference))
-    return (Left.is(tok::r_paren) && Left.MatchingParen &&
-            (Left.MatchingParen->is(TT_OverloadedOperatorLParen) ||
-             (Left.MatchingParen->Previous &&
-              Left.MatchingParen->Previous->is(TT_FunctionDeclarationName)))) ||
-           (Left.Tok.isLiteral() ||
+    return (Left.is(tok::r_paren) && Line.MightBeFunctionDecl) ||
+           (Left.Tok.isLiteral() || (Left.is(tok::kw_const) && Left.Previous &&
+                                     Left.Previous->is(tok::r_paren)) ||
             (!Left.isOneOf(TT_PointerOrReference, tok::l_paren) &&
              (Style.PointerAlignment != FormatStyle::PAS_Left ||
               Line.IsMultiVariableDeclStmt)));
@@ -2021,8 +2111,14 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line,
         Left.isOneOf(Keywords.kw_returns, Keywords.kw_option))
       return true;
   } else if (Style.Language == FormatStyle::LK_JavaScript) {
-    if (Left.isOneOf(Keywords.kw_let, Keywords.kw_var, TT_JsFatArrow,
-                     Keywords.kw_in))
+    if (Left.is(TT_JsFatArrow))
+      return true;
+    if (Right.is(tok::star) &&
+        Left.isOneOf(Keywords.kw_function, Keywords.kw_yield))
+      return false;
+    if (Left.isOneOf(Keywords.kw_let, Keywords.kw_var, Keywords.kw_in,
+                     Keywords.kw_of, tok::kw_const) &&
+        (!Left.Previous || !Left.Previous->is(tok::period)))
       return true;
     if (Left.is(tok::kw_default) && Left.Previous &&
         Left.Previous->is(tok::kw_export))
@@ -2031,6 +2127,8 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line,
       return true;
     if (Right.isOneOf(TT_JsTypeColon, TT_JsTypeOptionalQuestion))
       return false;
+    if (Left.is(TT_JsTypeOperator) || Right.is(TT_JsTypeOperator))
+      return false;
     if ((Left.is(tok::l_brace) || Right.is(tok::r_brace)) &&
         Line.First->isOneOf(Keywords.kw_import, tok::kw_export))
       return false;
@@ -2043,6 +2141,11 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line,
       // locations that should have whitespace following are identified by the
       // above set of follower tokens.
       return false;
+    // Postfix non-null assertion operator, as in `foo!.bar()`.
+    if (Right.is(tok::exclaim) && (Left.isOneOf(tok::identifier, tok::r_paren,
+                                                tok::r_square, tok::r_brace) ||
+                                   Left.Tok.isLiteral()))
+      return false;
   } else if (Style.Language == FormatStyle::LK_Java) {
     if (Left.is(tok::r_square) && Right.is(tok::l_brace))
       return true;
@@ -2111,10 +2214,11 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line,
   if (!Style.SpaceBeforeAssignmentOperators &&
       Right.getPrecedence() == prec::Assignment)
     return false;
-  if (Right.is(tok::coloncolon) && Left.isNot(tok::l_brace))
+  if (Right.is(tok::coloncolon) && !Left.isOneOf(tok::l_brace, tok::comment))
     return (Left.is(TT_TemplateOpener) &&
             Style.Standard == FormatStyle::LS_Cpp03) ||
-           !(Left.isOneOf(tok::identifier, tok::l_paren, tok::r_paren) ||
+           !(Left.isOneOf(tok::identifier, tok::l_paren, tok::r_paren,
+                          tok::l_square) ||
              Left.isOneOf(TT_TemplateCloser, TT_TemplateOpener));
   if ((Left.is(TT_TemplateOpener)) != (Right.is(TT_TemplateCloser)))
     return Style.SpacesInAngles;
@@ -2152,8 +2256,8 @@ bool TokenAnnotator::mustBreakBefore(const AnnotatedLine &Line,
 
   if (Style.Language == FormatStyle::LK_JavaScript) {
     // FIXME: This might apply to other languages and token kinds.
-    if (Right.is(tok::char_constant) && Left.is(tok::plus) && Left.Previous &&
-        Left.Previous->is(tok::char_constant))
+    if (Right.is(tok::string_literal) && Left.is(tok::plus) && Left.Previous &&
+        Left.Previous->is(tok::string_literal))
       return true;
     if (Left.is(TT_DictLiteral) && Left.is(tok::l_brace) && Line.Level == 0 &&
         Left.Previous && Left.Previous->is(tok::equal) &&
@@ -2239,9 +2343,6 @@ bool TokenAnnotator::mustBreakBefore(const AnnotatedLine &Line,
     return (Line.startsWith(tok::kw_enum) && Style.BraceWrapping.AfterEnum) ||
            (Line.startsWith(tok::kw_class) && Style.BraceWrapping.AfterClass) ||
            (Line.startsWith(tok::kw_struct) && Style.BraceWrapping.AfterStruct);
-  if (Style.Language == FormatStyle::LK_Proto && Left.isNot(tok::l_brace) &&
-      Right.is(TT_SelectorName))
-    return true;
   if (Left.is(TT_ObjCBlockLBrace) && !Style.AllowShortBlocksOnASingleLine)
     return true;
 
@@ -2268,12 +2369,20 @@ bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line,
                       Keywords.kw_implements))
       return true;
   } else if (Style.Language == FormatStyle::LK_JavaScript) {
+    if (Left.is(tok::kw_return))
+      return false; // Otherwise a semicolon is inserted.
     if (Left.is(TT_JsFatArrow) && Right.is(tok::l_brace))
       return false;
     if (Left.is(TT_JsTypeColon))
       return true;
     if (Right.NestingLevel == 0 && Right.is(Keywords.kw_is))
       return false;
+    if (Left.is(Keywords.kw_in))
+      return Style.BreakBeforeBinaryOperators == FormatStyle::BOS_None;
+    if (Right.is(Keywords.kw_in))
+      return Style.BreakBeforeBinaryOperators != FormatStyle::BOS_None;
+    if (Right.is(Keywords.kw_as))
+      return false; // must not break before as in 'x as type' casts
   }
 
   if (Left.is(tok::at))
@@ -2390,7 +2499,7 @@ bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line,
        Left.getPrecedence() == prec::Assignment))
     return true;
   return Left.isOneOf(tok::comma, tok::coloncolon, tok::semi, tok::l_brace,
-                      tok::kw_class, tok::kw_struct) ||
+                      tok::kw_class, tok::kw_struct, tok::comment) ||
          Right.isMemberAccess() ||
          Right.isOneOf(TT_TrailingReturnArrow, TT_LambdaArrow, tok::lessless,
                        tok::colon, tok::l_square, tok::at) ||
diff --git a/contrib/llvm/tools/clang/lib/Format/TokenAnnotator.h b/contrib/llvm/tools/clang/lib/Format/TokenAnnotator.h
index 5329f1f3f2fc..baa68ded9740 100644
--- a/contrib/llvm/tools/clang/lib/Format/TokenAnnotator.h
+++ b/contrib/llvm/tools/clang/lib/Format/TokenAnnotator.h
@@ -83,7 +83,15 @@ public:
   /// \c true if this line starts with the given tokens in order, ignoring
   /// comments.
   template <typename... Ts> bool startsWith(Ts... Tokens) const {
-    return startsWith(First, Tokens...);
+    return First && First->startsSequence(Tokens...);
+  }
+
+  /// \c true if this line ends with the given tokens in reversed order,
+  /// ignoring comments.
+  /// For example, given tokens [T1, T2, T3, ...], the function returns true if
+  /// this line is like "... T3 T2 T1".
+  template <typename... Ts> bool endsWith(Ts... Tokens) const {
+    return Last && Last->endsSequence(Tokens...);
   }
 
   /// \c true if this line looks like a function definition instead of a
@@ -122,18 +130,6 @@ private:
   // Disallow copying.
   AnnotatedLine(const AnnotatedLine &) = delete;
   void operator=(const AnnotatedLine &) = delete;
-
-  template <typename A, typename... Ts>
-  bool startsWith(FormatToken *Tok, A K1) const {
-    while (Tok && Tok->is(tok::comment))
-      Tok = Tok->Next;
-    return Tok && Tok->is(K1);
-  }
-
-  template <typename A, typename... Ts>
-  bool startsWith(FormatToken *Tok, A K1, Ts... Tokens) const {
-    return startsWith(Tok, K1) && startsWith(Tok->Next, Tokens...);
-  }
 };
 
 /// \brief Determines extra information about the tokens comprising an
diff --git a/contrib/llvm/tools/clang/lib/Format/UnwrappedLineFormatter.cpp b/contrib/llvm/tools/clang/lib/Format/UnwrappedLineFormatter.cpp
index f65056907963..35035ea8afba 100644
--- a/contrib/llvm/tools/clang/lib/Format/UnwrappedLineFormatter.cpp
+++ b/contrib/llvm/tools/clang/lib/Format/UnwrappedLineFormatter.cpp
@@ -847,7 +847,9 @@ UnwrappedLineFormatter::format(const SmallVectorImpl<AnnotatedLine *> &Lines,
       unsigned ColumnLimit = getColumnLimit(TheLine.InPPDirective, NextLine);
       bool FitsIntoOneLine =
           TheLine.Last->TotalLength + Indent <= ColumnLimit ||
-          TheLine.Type == LT_ImportStatement;
+          (TheLine.Type == LT_ImportStatement &&
+           (Style.Language != FormatStyle::LK_JavaScript ||
+            !Style.JavaScriptWrapImports));
 
       if (Style.ColumnLimit == 0)
         NoColumnLimitLineFormatter(Indenter, Whitespaces, Style, this)
@@ -863,7 +865,9 @@ UnwrappedLineFormatter::format(const SmallVectorImpl<AnnotatedLine *> &Lines,
       // If no token in the current line is affected, we still need to format
       // affected children.
       if (TheLine.ChildrenAffected)
-        format(TheLine.Children, DryRun);
+        for (const FormatToken *Tok = TheLine.First; Tok; Tok = Tok->Next)
+          if (!Tok->Children.empty())
+            format(Tok->Children, DryRun);
 
       // Adapt following lines on the current indent level to the same level
       // unless the current \c AnnotatedLine is not at the beginning of a line.
diff --git a/contrib/llvm/tools/clang/lib/Format/UnwrappedLineParser.cpp b/contrib/llvm/tools/clang/lib/Format/UnwrappedLineParser.cpp
index 7b8f6e652416..2fe72987bc7c 100644
--- a/contrib/llvm/tools/clang/lib/Format/UnwrappedLineParser.cpp
+++ b/contrib/llvm/tools/clang/lib/Format/UnwrappedLineParser.cpp
@@ -363,6 +363,8 @@ void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
           //
           // We exclude + and - as they can be ObjC visibility modifiers.
           ProbablyBracedList =
+              (Style.Language == FormatStyle::LK_JavaScript &&
+               NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in)) ||
               NextTok->isOneOf(tok::comma, tok::period, tok::colon,
                                tok::r_paren, tok::r_square, tok::l_brace,
                                tok::l_square, tok::l_paren, tok::ellipsis) ||
@@ -428,6 +430,9 @@ void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel,
     ++Line->Level;
   parseLevel(/*HasOpeningBrace=*/true);
 
+  if (eof())
+    return;
+
   if (MacroBlock ? !FormatTok->is(TT_MacroBlockEnd)
                  : !FormatTok->is(tok::r_brace)) {
     Line->Level = InitialLevel;
@@ -658,6 +663,85 @@ static bool tokenCanStartNewLine(const clang::Token &Tok) {
          Tok.isNot(tok::kw_noexcept);
 }
 
+static bool mustBeJSIdent(const AdditionalKeywords &Keywords,
+                          const FormatToken *FormatTok) {
+  // FIXME: This returns true for C/C++ keywords like 'struct'.
+  return FormatTok->is(tok::identifier) &&
+         (FormatTok->Tok.getIdentifierInfo() == nullptr ||
+          !FormatTok->isOneOf(Keywords.kw_in, Keywords.kw_of, Keywords.kw_as,
+                              Keywords.kw_async, Keywords.kw_await,
+                              Keywords.kw_yield, Keywords.kw_finally,
+                              Keywords.kw_function, Keywords.kw_import,
+                              Keywords.kw_is, Keywords.kw_let, Keywords.kw_var,
+                              Keywords.kw_abstract, Keywords.kw_extends,
+                              Keywords.kw_implements, Keywords.kw_instanceof,
+                              Keywords.kw_interface, Keywords.kw_throws));
+}
+
+static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords,
+                                 const FormatToken *FormatTok) {
+  return FormatTok->Tok.isLiteral() || mustBeJSIdent(Keywords, FormatTok);
+}
+
+// isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement
+// when encountered after a value (see mustBeJSIdentOrValue).
+static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords,
+                           const FormatToken *FormatTok) {
+  return FormatTok->isOneOf(
+      tok::kw_return, Keywords.kw_yield,
+      // conditionals
+      tok::kw_if, tok::kw_else,
+      // loops
+      tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break,
+      // switch/case
+      tok::kw_switch, tok::kw_case,
+      // exceptions
+      tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally,
+      // declaration
+      tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let,
+      Keywords.kw_async, Keywords.kw_function,
+      // import/export
+      Keywords.kw_import, tok::kw_export);
+}
+
+// readTokenWithJavaScriptASI reads the next token and terminates the current
+// line if JavaScript Automatic Semicolon Insertion must
+// happen between the current token and the next token.
+//
+// This method is conservative - it cannot cover all edge cases of JavaScript,
+// but only aims to correctly handle certain well known cases. It *must not*
+// return true in speculative cases.
+void UnwrappedLineParser::readTokenWithJavaScriptASI() {
+  FormatToken *Previous = FormatTok;
+  readToken();
+  FormatToken *Next = FormatTok;
+
+  bool IsOnSameLine =
+      CommentsBeforeNextToken.empty()
+          ? Next->NewlinesBefore == 0
+          : CommentsBeforeNextToken.front()->NewlinesBefore == 0;
+  if (IsOnSameLine)
+    return;
+
+  bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous);
+  if (PreviousMustBeValue && Line && Line->Tokens.size() > 1) {
+    // If the token before the previous one is an '@', the previous token is an
+    // annotation and can precede another identifier/value.
+    const FormatToken *PrePrevious = std::prev(Line->Tokens.end(), 2)->Tok;
+    if (PrePrevious->is(tok::at))
+      return;
+  }
+  if (Next->is(tok::exclaim) && PreviousMustBeValue)
+    addUnwrappedLine();
+  bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next);
+  if (NextMustBeValue && (PreviousMustBeValue ||
+                          Previous->isOneOf(tok::r_square, tok::r_paren,
+                                            tok::plusplus, tok::minusminus)))
+    addUnwrappedLine();
+  if (PreviousMustBeValue && isJSDeclOrStmt(Keywords, Next))
+    addUnwrappedLine();
+}
+
 void UnwrappedLineParser::parseStructuralElement() {
   assert(!FormatTok->is(tok::l_brace));
   if (Style.Language == FormatStyle::LK_TableGen &&
@@ -798,10 +882,23 @@ void UnwrappedLineParser::parseStructuralElement() {
                  /*MunchSemi=*/false);
       return;
     }
-    if (Style.Language == FormatStyle::LK_JavaScript &&
-        FormatTok->is(Keywords.kw_import)) {
-      parseJavaScriptEs6ImportExport();
-      return;
+    if (FormatTok->is(Keywords.kw_import)) {
+      if (Style.Language == FormatStyle::LK_JavaScript) {
+        parseJavaScriptEs6ImportExport();
+        return;
+      }
+      if (Style.Language == FormatStyle::LK_Proto) {
+        nextToken();
+        if (FormatTok->is(tok::kw_public))
+          nextToken();
+        if (!FormatTok->is(tok::string_literal))
+          return;
+        nextToken();
+        if (FormatTok->is(tok::semi))
+          nextToken();
+        addUnwrappedLine();
+        return;
+      }
     }
     if (FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals,
                            Keywords.kw_slots, Keywords.kw_qslots)) {
@@ -818,6 +915,7 @@ void UnwrappedLineParser::parseStructuralElement() {
     break;
   }
   do {
+    const FormatToken *Previous = getPreviousToken();
     switch (FormatTok->Tok.getKind()) {
     case tok::at:
       nextToken();
@@ -825,6 +923,12 @@ void UnwrappedLineParser::parseStructuralElement() {
         parseBracedList();
       break;
     case tok::kw_enum:
+      // Ignore if this is part of "template <enum ...".
+      if (Previous && Previous->is(tok::less)) {
+        nextToken();
+        break;
+      }
+
       // parseEnum falls through and does not yet add an unwrapped line as an
       // enum definition can start a structural element.
       if (!parseEnum())
@@ -922,18 +1026,35 @@ void UnwrappedLineParser::parseStructuralElement() {
       // Parse function literal unless 'function' is the first token in a line
       // in which case this should be treated as a free-standing function.
       if (Style.Language == FormatStyle::LK_JavaScript &&
-          FormatTok->is(Keywords.kw_function) && Line->Tokens.size() > 0) {
+          (FormatTok->is(Keywords.kw_function) ||
+           FormatTok->startsSequence(Keywords.kw_async,
+                                     Keywords.kw_function)) &&
+          Line->Tokens.size() > 0) {
         tryToParseJSFunction();
         break;
       }
       if ((Style.Language == FormatStyle::LK_JavaScript ||
            Style.Language == FormatStyle::LK_Java) &&
           FormatTok->is(Keywords.kw_interface)) {
+        if (Style.Language == FormatStyle::LK_JavaScript) {
+          // In JavaScript/TypeScript, "interface" can be used as a standalone
+          // identifier, e.g. in `var interface = 1;`. If "interface" is
+          // followed by another identifier, it is very like to be an actual
+          // interface declaration.
+          unsigned StoredPosition = Tokens->getPosition();
+          FormatToken *Next = Tokens->getNextToken();
+          FormatTok = Tokens->setPosition(StoredPosition);
+          if (Next && !mustBeJSIdent(Keywords, Next)) {
+            nextToken();
+            break;
+          }
+        }
         parseRecord();
         addUnwrappedLine();
         return;
       }
 
+      // See if the following token should start a new unwrapped line.
       StringRef Text = FormatTok->TokenText;
       nextToken();
       if (Line->Tokens.size() == 1 &&
@@ -941,6 +1062,7 @@ void UnwrappedLineParser::parseStructuralElement() {
           // not labels.
           Style.Language != FormatStyle::LK_JavaScript) {
         if (FormatTok->Tok.is(tok::colon) && !Line->MustBeDeclaration) {
+          Line->Tokens.begin()->Tok->MustBreakBefore = true;
           parseLabel();
           return;
         }
@@ -1093,8 +1215,17 @@ bool UnwrappedLineParser::tryToParseLambdaIntroducer() {
 }
 
 void UnwrappedLineParser::tryToParseJSFunction() {
+  assert(FormatTok->is(Keywords.kw_function) ||
+         FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function));
+  if (FormatTok->is(Keywords.kw_async))
+    nextToken();
+  // Consume "function".
   nextToken();
 
+  // Consume * (generator function).
+  if (FormatTok->is(tok::star))
+    nextToken();
+
   // Consume function name.
   if (FormatTok->is(tok::identifier))
     nextToken();
@@ -1139,7 +1270,8 @@ bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons) {
   // replace this by using parseAssigmentExpression() inside.
   do {
     if (Style.Language == FormatStyle::LK_JavaScript) {
-      if (FormatTok->is(Keywords.kw_function)) {
+      if (FormatTok->is(Keywords.kw_function) ||
+          FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)) {
         tryToParseJSFunction();
         continue;
       }
@@ -1237,7 +1369,8 @@ void UnwrappedLineParser::parseParens() {
       break;
     case tok::identifier:
       if (Style.Language == FormatStyle::LK_JavaScript &&
-          FormatTok->is(Keywords.kw_function))
+          (FormatTok->is(Keywords.kw_function) ||
+           FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)))
         tryToParseJSFunction();
       else
         nextToken();
@@ -1315,6 +1448,8 @@ void UnwrappedLineParser::parseIfThenElse() {
       addUnwrappedLine();
       ++Line->Level;
       parseStructuralElement();
+      if (FormatTok->is(tok::eof))
+        addUnwrappedLine();
       --Line->Level;
     }
   } else if (NeedsUnwrappedLine) {
@@ -1503,6 +1638,10 @@ void UnwrappedLineParser::parseLabel() {
     addUnwrappedLine();
   }
   Line->Level = OldLineLevel;
+  if (FormatTok->isNot(tok::l_brace)) {
+    parseStructuralElement();
+    addUnwrappedLine();
+  }
 }
 
 void UnwrappedLineParser::parseCaseLabel() {
@@ -1550,7 +1689,8 @@ bool UnwrappedLineParser::parseEnum() {
   // In TypeScript, "enum" can also be used as property name, e.g. in interface
   // declarations. An "enum" keyword followed by a colon would be a syntax
   // error and thus assume it is just an identifier.
-  if (Style.Language == FormatStyle::LK_JavaScript && FormatTok->is(tok::colon))
+  if (Style.Language == FormatStyle::LK_JavaScript &&
+      FormatTok->isOneOf(tok::colon, tok::question))
     return false;
 
   // Eat up enum class ...
@@ -1795,28 +1935,31 @@ void UnwrappedLineParser::parseObjCProtocol() {
 }
 
 void UnwrappedLineParser::parseJavaScriptEs6ImportExport() {
-  assert(FormatTok->isOneOf(Keywords.kw_import, tok::kw_export));
+  bool IsImport = FormatTok->is(Keywords.kw_import);
+  assert(IsImport || FormatTok->is(tok::kw_export));
   nextToken();
 
   // Consume the "default" in "export default class/function".
   if (FormatTok->is(tok::kw_default))
     nextToken();
 
-  // Consume "function" and "default function", so that these get parsed as
-  // free-standing JS functions, i.e. do not require a trailing semicolon.
+  // Consume "async function", "function" and "default function", so that these
+  // get parsed as free-standing JS functions, i.e. do not require a trailing
+  // semicolon.
+  if (FormatTok->is(Keywords.kw_async))
+    nextToken();
   if (FormatTok->is(Keywords.kw_function)) {
     nextToken();
     return;
   }
 
-  // Consume the "abstract" in "export abstract class".
-  if (FormatTok->is(Keywords.kw_abstract))
-    nextToken();
-
-  if (FormatTok->isOneOf(tok::kw_const, tok::kw_class, tok::kw_enum,
-                         Keywords.kw_interface, Keywords.kw_let,
-                         Keywords.kw_var))
-    return; // Fall through to parsing the corresponding structure.
+  // For imports, `export *`, `export {...}`, consume the rest of the line up
+  // to the terminating `;`. For everything else, just return and continue
+  // parsing the structural element, i.e. the declaration or expression for
+  // `export default`.
+  if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) &&
+      !FormatTok->isStringLiteral())
+    return;
 
   while (!eof() && FormatTok->isNot(tok::semi)) {
     if (FormatTok->is(tok::l_brace)) {
@@ -1895,7 +2038,10 @@ void UnwrappedLineParser::nextToken() {
     return;
   flushComments(isOnNewLine(*FormatTok));
   pushToken(FormatTok);
-  readToken();
+  if (Style.Language != FormatStyle::LK_JavaScript)
+    readToken();
+  else
+    readTokenWithJavaScriptASI();
 }
 
 const FormatToken *UnwrappedLineParser::getPreviousToken() {
diff --git a/contrib/llvm/tools/clang/lib/Format/UnwrappedLineParser.h b/contrib/llvm/tools/clang/lib/Format/UnwrappedLineParser.h
index 6d40ab4f3120..9c78d33632c6 100644
--- a/contrib/llvm/tools/clang/lib/Format/UnwrappedLineParser.h
+++ b/contrib/llvm/tools/clang/lib/Format/UnwrappedLineParser.h
@@ -81,6 +81,7 @@ private:
   void parsePPElse();
   void parsePPEndIf();
   void parsePPUnknown();
+  void readTokenWithJavaScriptASI();
   void parseStructuralElement();
   bool tryToParseBracedList();
   bool parseBracedList(bool ContinueOnSemicolons = false);
diff --git a/contrib/llvm/tools/clang/lib/Format/WhitespaceManager.cpp b/contrib/llvm/tools/clang/lib/Format/WhitespaceManager.cpp
index d6e6ed2c2baa..9cdba9df10a9 100644
--- a/contrib/llvm/tools/clang/lib/Format/WhitespaceManager.cpp
+++ b/contrib/llvm/tools/clang/lib/Format/WhitespaceManager.cpp
@@ -372,16 +372,20 @@ void WhitespaceManager::alignTrailingComments() {
       unsigned CommentColumn = SourceMgr.getSpellingColumnNumber(
           Changes[i].OriginalWhitespaceRange.getEnd());
       for (unsigned j = i + 1; j != e; ++j) {
-        if (Changes[j].Kind != tok::comment) { // Skip over comments.
-          unsigned NextColumn = SourceMgr.getSpellingColumnNumber(
-              Changes[j].OriginalWhitespaceRange.getEnd());
-          // The start of the next token was previously aligned with the
-          // start of this comment.
-          WasAlignedWithStartOfNextLine =
-              CommentColumn == NextColumn ||
-              CommentColumn == NextColumn + Style.IndentWidth;
-          break;
-        }
+        if (Changes[j].Kind == tok::comment ||
+            Changes[j].Kind == tok::unknown)
+          // Skip over comments and unknown tokens. "unknown tokens are used for
+          // the continuation of multiline comments.
+          continue;
+
+        unsigned NextColumn = SourceMgr.getSpellingColumnNumber(
+            Changes[j].OriginalWhitespaceRange.getEnd());
+        // The start of the next token was previously aligned with the
+        // start of this comment.
+        WasAlignedWithStartOfNextLine =
+            CommentColumn == NextColumn ||
+            CommentColumn == NextColumn + Style.IndentWidth;
+        break;
       }
     }
     if (!Style.AlignTrailingComments || FollowsRBraceInColumn0) {
@@ -554,6 +558,14 @@ void WhitespaceManager::appendIndentText(std::string &Text,
     }
     Text.append(Spaces, ' ');
     break;
+  case FormatStyle::UT_ForContinuationAndIndentation:
+    if (WhitespaceStartColumn == 0) {
+      unsigned Tabs = Spaces / Style.TabWidth;
+      Text.append(Tabs, '\t');
+      Spaces -= Tabs * Style.TabWidth;
+    }
+    Text.append(Spaces, ' ');
+    break;
   }
 }
 
diff --git a/contrib/llvm/tools/clang/lib/Format/WhitespaceManager.h b/contrib/llvm/tools/clang/lib/Format/WhitespaceManager.h
index 9ca9db6f7488..3562347a0e60 100644
--- a/contrib/llvm/tools/clang/lib/Format/WhitespaceManager.h
+++ b/contrib/llvm/tools/clang/lib/Format/WhitespaceManager.h
@@ -37,7 +37,7 @@ namespace format {
 /// There may be multiple calls to \c breakToken for a given token.
 class WhitespaceManager {
 public:
-  WhitespaceManager(SourceManager &SourceMgr, const FormatStyle &Style,
+  WhitespaceManager(const SourceManager &SourceMgr, const FormatStyle &Style,
                     bool UseCRLF)
       : SourceMgr(SourceMgr), Style(Style), UseCRLF(UseCRLF) {}
 
@@ -203,7 +203,7 @@ private:
                         unsigned Spaces, unsigned WhitespaceStartColumn);
 
   SmallVector<Change, 16> Changes;
-  SourceManager &SourceMgr;
+  const SourceManager &SourceMgr;
   tooling::Replacements Replaces;
   const FormatStyle &Style;
   bool UseCRLF;
diff --git a/contrib/llvm/tools/clang/lib/Frontend/ASTConsumers.cpp b/contrib/llvm/tools/clang/lib/Frontend/ASTConsumers.cpp
index 52776b6a8483..de72ea57e35b 100644
--- a/contrib/llvm/tools/clang/lib/Frontend/ASTConsumers.cpp
+++ b/contrib/llvm/tools/clang/lib/Frontend/ASTConsumers.cpp
@@ -35,9 +35,9 @@ namespace {
     typedef RecursiveASTVisitor<ASTPrinter> base;
 
   public:
-    ASTPrinter(raw_ostream *Out = nullptr, bool Dump = false,
+    ASTPrinter(std::unique_ptr<raw_ostream> Out = nullptr, bool Dump = false,
                StringRef FilterString = "", bool DumpLookups = false)
-        : Out(Out ? *Out : llvm::outs()), Dump(Dump),
+        : Out(Out ? *Out : llvm::outs()), OwnedOut(std::move(Out)), Dump(Dump),
           FilterString(FilterString), DumpLookups(DumpLookups) {}
 
     void HandleTranslationUnit(ASTContext &Context) override {
@@ -94,6 +94,7 @@ namespace {
     }
 
     raw_ostream &Out;
+    std::unique_ptr<raw_ostream> OwnedOut;
     bool Dump;
     std::string FilterString;
     bool DumpLookups;
@@ -122,9 +123,11 @@ namespace {
   };
 } // end anonymous namespace
 
-std::unique_ptr<ASTConsumer> clang::CreateASTPrinter(raw_ostream *Out,
-                                                     StringRef FilterString) {
-  return llvm::make_unique<ASTPrinter>(Out, /*Dump=*/false, FilterString);
+std::unique_ptr<ASTConsumer>
+clang::CreateASTPrinter(std::unique_ptr<raw_ostream> Out,
+                        StringRef FilterString) {
+  return llvm::make_unique<ASTPrinter>(std::move(Out), /*Dump=*/false,
+                                       FilterString);
 }
 
 std::unique_ptr<ASTConsumer> clang::CreateASTDumper(StringRef FilterString,
@@ -268,7 +271,7 @@ void DeclContextPrinter::PrintDeclContext(const DeclContext* DC,
     // Print the parameters.
     Out << "(";
     bool PrintComma = false;
-    for (auto I : FD->params()) {
+    for (auto I : FD->parameters()) {
       if (PrintComma)
         Out << ", ";
       else
@@ -290,13 +293,12 @@ void DeclContextPrinter::PrintDeclContext(const DeclContext* DC,
     // Print the parameters.
     Out << "(";
     bool PrintComma = false;
-    for (FunctionDecl::param_const_iterator I = D->param_begin(),
-           E = D->param_end(); I != E; ++I) {
+    for (ParmVarDecl *Parameter : D->parameters()) {
       if (PrintComma)
         Out << ", ";
       else
         PrintComma = true;
-      Out << **I;
+      Out << *Parameter;
     }
     Out << ")";
 
@@ -320,13 +322,12 @@ void DeclContextPrinter::PrintDeclContext(const DeclContext* DC,
     // Print the parameters.
     Out << "(";
     bool PrintComma = false;
-    for (FunctionDecl::param_const_iterator I = D->param_begin(),
-           E = D->param_end(); I != E; ++I) {
+    for (ParmVarDecl *Parameter : D->parameters()) {
       if (PrintComma)
         Out << ", ";
       else
         PrintComma = true;
-      Out << **I;
+      Out << *Parameter;
     }
     Out << ")";
 
diff --git a/contrib/llvm/tools/clang/lib/Frontend/ASTMerge.cpp b/contrib/llvm/tools/clang/lib/Frontend/ASTMerge.cpp
index b499fa2b0e68..51064da270cc 100644
--- a/contrib/llvm/tools/clang/lib/Frontend/ASTMerge.cpp
+++ b/contrib/llvm/tools/clang/lib/Frontend/ASTMerge.cpp
@@ -83,14 +83,13 @@ void ASTMergeAction::EndSourceFileAction() {
   return AdaptedAction->EndSourceFileAction();
 }
 
-ASTMergeAction::ASTMergeAction(FrontendAction *AdaptedAction,
+ASTMergeAction::ASTMergeAction(std::unique_ptr<FrontendAction> adaptedAction,
                                ArrayRef<std::string> ASTFiles)
-  : AdaptedAction(AdaptedAction), ASTFiles(ASTFiles.begin(), ASTFiles.end()) {
+: AdaptedAction(std::move(adaptedAction)), ASTFiles(ASTFiles.begin(), ASTFiles.end()) {
   assert(AdaptedAction && "ASTMergeAction needs an action to adapt");
 }
 
 ASTMergeAction::~ASTMergeAction() { 
-  delete AdaptedAction;
 }
 
 bool ASTMergeAction::usesPreprocessorOnly() const {
diff --git a/contrib/llvm/tools/clang/lib/Frontend/ASTUnit.cpp b/contrib/llvm/tools/clang/lib/Frontend/ASTUnit.cpp
index e6ba29201f85..76fd00a132b4 100644
--- a/contrib/llvm/tools/clang/lib/Frontend/ASTUnit.cpp
+++ b/contrib/llvm/tools/clang/lib/Frontend/ASTUnit.cpp
@@ -920,17 +920,17 @@ class PrecompilePreambleConsumer : public PCHGenerator {
   unsigned &Hash;
   std::vector<Decl *> TopLevelDecls;
   PrecompilePreambleAction *Action;
-  raw_ostream *Out;
+  std::unique_ptr<raw_ostream> Out;
 
 public:
   PrecompilePreambleConsumer(ASTUnit &Unit, PrecompilePreambleAction *Action,
                              const Preprocessor &PP, StringRef isysroot,
-                             raw_ostream *Out)
+                             std::unique_ptr<raw_ostream> Out)
       : PCHGenerator(PP, "", nullptr, isysroot, std::make_shared<PCHBuffer>(),
                      ArrayRef<llvm::IntrusiveRefCntPtr<ModuleFileExtension>>(),
                      /*AllowASTWithErrors=*/true),
         Unit(Unit), Hash(Unit.getCurrentTopLevelHashValue()), Action(Action),
-        Out(Out) {
+        Out(std::move(Out)) {
     Hash = 0;
   }
 
@@ -982,8 +982,9 @@ PrecompilePreambleAction::CreateASTConsumer(CompilerInstance &CI,
                                             StringRef InFile) {
   std::string Sysroot;
   std::string OutputFile;
-  raw_ostream *OS = GeneratePCHAction::ComputeASTConsumerArguments(
-      CI, InFile, Sysroot, OutputFile);
+  std::unique_ptr<raw_ostream> OS =
+      GeneratePCHAction::ComputeASTConsumerArguments(CI, InFile, Sysroot,
+                                                     OutputFile);
   if (!OS)
     return nullptr;
 
@@ -994,7 +995,7 @@ PrecompilePreambleAction::CreateASTConsumer(CompilerInstance &CI,
       llvm::make_unique<MacroDefinitionTrackerPPCallbacks>(
                                            Unit.getCurrentTopLevelHashValue()));
   return llvm::make_unique<PrecompilePreambleConsumer>(
-      Unit, this, CI.getPreprocessor(), Sysroot, OS);
+      Unit, this, CI.getPreprocessor(), Sysroot, std::move(OS));
 }
 
 static bool isNonDriverDiag(const StoredDiagnostic &StoredDiag) {
@@ -1040,7 +1041,7 @@ bool ASTUnit::Parse(std::shared_ptr<PCHContainerOperations> PCHContainerOps,
 
   // Create the compiler instance to use for building the AST.
   std::unique_ptr<CompilerInstance> Clang(
-      new CompilerInstance(PCHContainerOps));
+      new CompilerInstance(std::move(PCHContainerOps)));
 
   // Recover resources if we crash before exiting this method.
   llvm::CrashRecoveryContextCleanupRegistrar<CompilerInstance>
@@ -1138,11 +1139,9 @@ bool ASTUnit::Parse(std::shared_ptr<PCHContainerOperations> PCHContainerOps,
   if (!Act->BeginSourceFile(*Clang.get(), Clang->getFrontendOpts().Inputs[0]))
     goto error;
 
-  if (SavedMainFileBuffer) {
-    std::string ModName = getPreambleFile(this);
+  if (SavedMainFileBuffer)
     TranslateStoredDiagnostics(getFileManager(), getSourceManager(),
                                PreambleDiagnostics, StoredDiagnostics);
-  }
 
   if (!Act->Execute())
     goto error;
@@ -1380,7 +1379,7 @@ ASTUnit::getMainBufferWithPrecompiledPreamble(
           
       // First, make a record of those files that have been overridden via
       // remapping or unsaved_files.
-      llvm::StringMap<PreambleFileHash> OverriddenFiles;
+      std::map<llvm::sys::fs::UniqueID, PreambleFileHash> OverriddenFiles;
       for (const auto &R : PreprocessorOpts.RemappedFiles) {
         if (AnyFileChanged)
           break;
@@ -1393,24 +1392,38 @@ ASTUnit::getMainBufferWithPrecompiledPreamble(
           break;
         }
 
-        OverriddenFiles[R.first] = PreambleFileHash::createForFile(
+        OverriddenFiles[Status.getUniqueID()] = PreambleFileHash::createForFile(
             Status.getSize(), Status.getLastModificationTime().toEpochTime());
       }
 
       for (const auto &RB : PreprocessorOpts.RemappedFileBuffers) {
         if (AnyFileChanged)
           break;
-        OverriddenFiles[RB.first] =
+
+        vfs::Status Status;
+        if (FileMgr->getNoncachedStatValue(RB.first, Status)) {
+          AnyFileChanged = true;
+          break;
+        }
+
+        OverriddenFiles[Status.getUniqueID()] =
             PreambleFileHash::createForMemoryBuffer(RB.second);
       }
        
       // Check whether anything has changed.
-      for (llvm::StringMap<PreambleFileHash>::iterator 
+      for (llvm::StringMap<PreambleFileHash>::iterator
              F = FilesInPreamble.begin(), FEnd = FilesInPreamble.end();
            !AnyFileChanged && F != FEnd; 
            ++F) {
-        llvm::StringMap<PreambleFileHash>::iterator Overridden
-          = OverriddenFiles.find(F->first());
+        vfs::Status Status;
+        if (FileMgr->getNoncachedStatValue(F->first(), Status)) {
+          // If we can't stat the file, assume that something horrible happened.
+          AnyFileChanged = true;
+          break;
+        }
+
+        std::map<llvm::sys::fs::UniqueID, PreambleFileHash>::iterator Overridden
+          = OverriddenFiles.find(Status.getUniqueID());
         if (Overridden != OverriddenFiles.end()) {
           // This file was remapped; check whether the newly-mapped file 
           // matches up with the previous mapping.
@@ -1420,13 +1433,9 @@ ASTUnit::getMainBufferWithPrecompiledPreamble(
         }
         
         // The file was not remapped; check whether it has changed on disk.
-        vfs::Status Status;
-        if (FileMgr->getNoncachedStatValue(F->first(), Status)) {
-          // If we can't stat the file, assume that something horrible happened.
-          AnyFileChanged = true;
-        } else if (Status.getSize() != uint64_t(F->second.Size) ||
-                   Status.getLastModificationTime().toEpochTime() !=
-                       uint64_t(F->second.ModTime))
+        if (Status.getSize() != uint64_t(F->second.Size) ||
+            Status.getLastModificationTime().toEpochTime() !=
+                uint64_t(F->second.ModTime))
           AnyFileChanged = true;
       }
           
@@ -1506,7 +1515,7 @@ ASTUnit::getMainBufferWithPrecompiledPreamble(
   
   // Create the compiler instance to use for building the precompiled preamble.
   std::unique_ptr<CompilerInstance> Clang(
-      new CompilerInstance(PCHContainerOps));
+      new CompilerInstance(std::move(PCHContainerOps)));
 
   // Recover resources if we crash before exiting this method.
   llvm::CrashRecoveryContextCleanupRegistrar<CompilerInstance>
@@ -1723,7 +1732,7 @@ ASTUnit *ASTUnit::create(CompilerInvocation *CI,
 ASTUnit *ASTUnit::LoadFromCompilerInvocationAction(
     CompilerInvocation *CI,
     std::shared_ptr<PCHContainerOperations> PCHContainerOps,
-    IntrusiveRefCntPtr<DiagnosticsEngine> Diags, ASTFrontendAction *Action,
+    IntrusiveRefCntPtr<DiagnosticsEngine> Diags, FrontendAction *Action,
     ASTUnit *Unit, bool Persistent, StringRef ResourceFilesPath,
     bool OnlyLocalDecls, bool CaptureDiagnostics,
     unsigned PrecompilePreambleAfterNParses, bool CacheCodeCompletionResults,
@@ -1768,7 +1777,7 @@ ASTUnit *ASTUnit::LoadFromCompilerInvocationAction(
 
   // Create the compiler instance to use for building the AST.
   std::unique_ptr<CompilerInstance> Clang(
-      new CompilerInstance(PCHContainerOps));
+      new CompilerInstance(std::move(PCHContainerOps)));
 
   // Recover resources if we crash before exiting this method.
   llvm::CrashRecoveryContextCleanupRegistrar<CompilerInstance>
@@ -1812,7 +1821,7 @@ ASTUnit *ASTUnit::LoadFromCompilerInvocationAction(
   // Create the source manager.
   Clang->setSourceManager(&AST->getSourceManager());
 
-  ASTFrontendAction *Act = Action;
+  FrontendAction *Act = Action;
 
   std::unique_ptr<TopLevelDeclTrackerAction> TrackerAct;
   if (!Act) {
@@ -1888,7 +1897,7 @@ bool ASTUnit::LoadFromCompilerInvocation(
   llvm::CrashRecoveryContextCleanupRegistrar<llvm::MemoryBuffer>
     MemBufferCleanup(OverrideMainBuffer.get());
 
-  return Parse(PCHContainerOps, std::move(OverrideMainBuffer));
+  return Parse(std::move(PCHContainerOps), std::move(OverrideMainBuffer));
 }
 
 std::unique_ptr<ASTUnit> ASTUnit::LoadFromCompilerInvocation(
@@ -1921,7 +1930,7 @@ std::unique_ptr<ASTUnit> ASTUnit::LoadFromCompilerInvocation(
     llvm::CrashRecoveryContextReleaseRefCleanup<DiagnosticsEngine> >
     DiagCleanup(Diags.get());
 
-  if (AST->LoadFromCompilerInvocation(PCHContainerOps,
+  if (AST->LoadFromCompilerInvocation(std::move(PCHContainerOps),
                                       PrecompilePreambleAfterNParses))
     return nullptr;
   return AST;
@@ -2004,7 +2013,7 @@ ASTUnit *ASTUnit::LoadFromCommandLine(
   llvm::CrashRecoveryContextCleanupRegistrar<ASTUnit>
     ASTUnitCleanup(AST.get());
 
-  if (AST->LoadFromCompilerInvocation(PCHContainerOps,
+  if (AST->LoadFromCompilerInvocation(std::move(PCHContainerOps),
                                       PrecompilePreambleAfterNParses)) {
     // Some error occurred, if caller wants to examine diagnostics, pass it the
     // ASTUnit.
@@ -2054,7 +2063,8 @@ bool ASTUnit::Reparse(std::shared_ptr<PCHContainerOperations> PCHContainerOps,
     getDiagnostics().setNumWarnings(NumWarningsInPreamble);
 
   // Parse the sources
-  bool Result = Parse(PCHContainerOps, std::move(OverrideMainBuffer));
+  bool Result =
+      Parse(std::move(PCHContainerOps), std::move(OverrideMainBuffer));
 
   // If we're caching global code-completion results, and the top-level 
   // declarations have changed, clear out the code-completion cache.
@@ -2500,7 +2510,8 @@ static bool serializeUnit(ASTWriter &Writer,
 }
 
 bool ASTUnit::serialize(raw_ostream &OS) {
-  bool hasErrors = getDiagnostics().hasErrorOccurred();
+  // For serialization we are lenient if the errors were only warn-as-error kind.
+  bool hasErrors = getDiagnostics().hasUncompilableErrorOccurred();
 
   if (WriterData)
     return serializeUnit(WriterData->Writer, WriterData->Buffer,
@@ -2814,7 +2825,7 @@ const FileEntry *ASTUnit::getPCHFile() {
 }
 
 bool ASTUnit::isModuleFile() {
-  return isMainFileAST() && !ASTFileLangOpts.CurrentModule.empty();
+  return isMainFileAST() && ASTFileLangOpts.CompilingModule;
 }
 
 void ASTUnit::PreambleData::countLines() const {
diff --git a/contrib/llvm/tools/clang/lib/Frontend/CacheTokens.cpp b/contrib/llvm/tools/clang/lib/Frontend/CacheTokens.cpp
index 87f3d1725814..15b0adab7c5e 100644
--- a/contrib/llvm/tools/clang/lib/Frontend/CacheTokens.cpp
+++ b/contrib/llvm/tools/clang/lib/Frontend/CacheTokens.cpp
@@ -241,7 +241,7 @@ public:
       : Out(out), PP(pp), idcount(0), CurStrOffset(0) {}
 
   PTHMap &getPM() { return PM; }
-  void GeneratePTH(const std::string &MainFile);
+  void GeneratePTH(StringRef MainFile);
 };
 } // end anonymous namespace
 
@@ -479,7 +479,7 @@ static void pwrite32le(raw_pwrite_stream &OS, uint32_t Val, uint64_t &Off) {
   Off += 4;
 }
 
-void PTHWriter::GeneratePTH(const std::string &MainFile) {
+void PTHWriter::GeneratePTH(StringRef MainFile) {
   // Generate the prologue.
   Out << "cfe-pth" << '\0';
   Emit32(PTHManager::Version);
diff --git a/contrib/llvm/tools/clang/lib/Frontend/ChainedIncludesSource.cpp b/contrib/llvm/tools/clang/lib/Frontend/ChainedIncludesSource.cpp
index 1c1081fbe08e..3f126615b1eb 100644
--- a/contrib/llvm/tools/clang/lib/Frontend/ChainedIncludesSource.cpp
+++ b/contrib/llvm/tools/clang/lib/Frontend/ChainedIncludesSource.cpp
@@ -18,6 +18,7 @@
 #include "clang/Frontend/TextDiagnosticPrinter.h"
 #include "clang/Lex/Preprocessor.h"
 #include "clang/Parse/ParseAST.h"
+#include "clang/Sema/MultiplexExternalSemaSource.h"
 #include "clang/Serialization/ASTReader.h"
 #include "clang/Serialization/ASTWriter.h"
 #include "llvm/Support/MemoryBuffer.h"
@@ -25,51 +26,52 @@
 using namespace clang;
 
 namespace {
-class ChainedIncludesSource : public ExternalSemaSource {
+class ChainedIncludesSourceImpl : public ExternalSemaSource {
 public:
-  ~ChainedIncludesSource() override;
-
-  ExternalSemaSource &getFinalReader() const { return *FinalReader; }
-
-  std::vector<CompilerInstance *> CIs;
-  IntrusiveRefCntPtr<ExternalSemaSource> FinalReader;
+  ChainedIncludesSourceImpl(std::vector<std::unique_ptr<CompilerInstance>> CIs)
+      : CIs(std::move(CIs)) {}
 
 protected:
   //===----------------------------------------------------------------------===//
   // ExternalASTSource interface.
   //===----------------------------------------------------------------------===//
 
-  Decl *GetExternalDecl(uint32_t ID) override;
-  Selector GetExternalSelector(uint32_t ID) override;
-  uint32_t GetNumExternalSelectors() override;
-  Stmt *GetExternalDeclStmt(uint64_t Offset) override;
-  CXXCtorInitializer **GetExternalCXXCtorInitializers(uint64_t Offset) override;
-  CXXBaseSpecifier *GetExternalCXXBaseSpecifiers(uint64_t Offset) override;
-  bool FindExternalVisibleDeclsByName(const DeclContext *DC,
-                                      DeclarationName Name) override;
-  void
-  FindExternalLexicalDecls(const DeclContext *DC,
-                           llvm::function_ref<bool(Decl::Kind)> IsKindWeWant,
-                           SmallVectorImpl<Decl *> &Result) override;
-  void CompleteType(TagDecl *Tag) override;
-  void CompleteType(ObjCInterfaceDecl *Class) override;
-  void StartedDeserializing() override;
-  void FinishedDeserializing() override;
-  void StartTranslationUnit(ASTConsumer *Consumer) override;
-  void PrintStats() override;
-
   /// Return the amount of memory used by memory buffers, breaking down
   /// by heap-backed versus mmap'ed memory.
-  void getMemoryBufferSizes(MemoryBufferSizes &sizes) const override;
+  void getMemoryBufferSizes(MemoryBufferSizes &sizes) const override {
+    for (unsigned i = 0, e = CIs.size(); i != e; ++i) {
+      if (const ExternalASTSource *eSrc =
+          CIs[i]->getASTContext().getExternalSource()) {
+        eSrc->getMemoryBufferSizes(sizes);
+      }
+    }
+  }
 
-  //===----------------------------------------------------------------------===//
-  // ExternalSemaSource interface.
-  //===----------------------------------------------------------------------===//
+private:
+  std::vector<std::unique_ptr<CompilerInstance>> CIs;
+};
+
+/// Members of ChainedIncludesSource, factored out so we can initialize
+/// them before we initialize the ExternalSemaSource base class.
+struct ChainedIncludesSourceMembers {
+  ChainedIncludesSourceMembers(
+      std::vector<std::unique_ptr<CompilerInstance>> CIs,
+      IntrusiveRefCntPtr<ExternalSemaSource> FinalReader)
+      : Impl(std::move(CIs)), FinalReader(std::move(FinalReader)) {}
+  ChainedIncludesSourceImpl Impl;
+  IntrusiveRefCntPtr<ExternalSemaSource> FinalReader;
+};
 
-  void InitializeSema(Sema &S) override;
-  void ForgetSema() override;
-  void ReadMethodPool(Selector Sel) override;
-  bool LookupUnqualified(LookupResult &R, Scope *S) override;
+/// Use MultiplexExternalSemaSource to dispatch all ExternalSemaSource
+/// calls to the final reader.
+class ChainedIncludesSource
+    : private ChainedIncludesSourceMembers,
+      public MultiplexExternalSemaSource {
+public:
+  ChainedIncludesSource(std::vector<std::unique_ptr<CompilerInstance>> CIs,
+                        IntrusiveRefCntPtr<ExternalSemaSource> FinalReader)
+      : ChainedIncludesSourceMembers(std::move(CIs), std::move(FinalReader)),
+        MultiplexExternalSemaSource(Impl, *this->FinalReader) {}
 };
 }
 
@@ -107,18 +109,13 @@ createASTReader(CompilerInstance &CI, StringRef pchFile,
   return nullptr;
 }
 
-ChainedIncludesSource::~ChainedIncludesSource() {
-  for (unsigned i = 0, e = CIs.size(); i != e; ++i)
-    delete CIs[i];
-}
-
 IntrusiveRefCntPtr<ExternalSemaSource> clang::createChainedIncludesSource(
     CompilerInstance &CI, IntrusiveRefCntPtr<ExternalSemaSource> &Reader) {
 
   std::vector<std::string> &includes = CI.getPreprocessorOpts().ChainedIncludes;
   assert(!includes.empty() && "No '-chain-include' in options!");
 
-  IntrusiveRefCntPtr<ChainedIncludesSource> source(new ChainedIncludesSource());
+  std::vector<std::unique_ptr<CompilerInstance>> CIs;
   InputKind IK = CI.getFrontendOpts().Inputs[0].getKind();
 
   SmallVector<std::unique_ptr<llvm::MemoryBuffer>, 4> SerialBufs;
@@ -164,7 +161,7 @@ IntrusiveRefCntPtr<ExternalSemaSource> clang::createChainedIncludesSource(
     ArrayRef<llvm::IntrusiveRefCntPtr<ModuleFileExtension>> Extensions;
     auto consumer = llvm::make_unique<PCHGenerator>(
         Clang->getPreprocessor(), "-", nullptr, /*isysroot=*/"", Buffer,
-        Extensions);
+        Extensions, /*AllowASTWithErrors=*/true);
     Clang->getASTContext().setASTMutationListener(
                                             consumer->GetASTMutationListener());
     Clang->setASTConsumer(std::move(consumer));
@@ -206,7 +203,7 @@ IntrusiveRefCntPtr<ExternalSemaSource> clang::createChainedIncludesSource(
     SerialBufs.push_back(llvm::MemoryBuffer::getMemBufferCopy(
         StringRef(serialAST.data(), serialAST.size())));
     serialAST.clear();
-    source->CIs.push_back(Clang.release());
+    CIs.push_back(std::move(Clang));
   }
 
   assert(!SerialBufs.empty());
@@ -216,83 +213,6 @@ IntrusiveRefCntPtr<ExternalSemaSource> clang::createChainedIncludesSource(
   if (!Reader)
     return nullptr;
 
-  source->FinalReader = Reader;
-  return source;
-}
-
-//===----------------------------------------------------------------------===//
-// ExternalASTSource interface.
-//===----------------------------------------------------------------------===//
-
-Decl *ChainedIncludesSource::GetExternalDecl(uint32_t ID) {
-  return getFinalReader().GetExternalDecl(ID);
-}
-Selector ChainedIncludesSource::GetExternalSelector(uint32_t ID) {
-  return getFinalReader().GetExternalSelector(ID);
-}
-uint32_t ChainedIncludesSource::GetNumExternalSelectors() {
-  return getFinalReader().GetNumExternalSelectors();
+  return IntrusiveRefCntPtr<ChainedIncludesSource>(
+      new ChainedIncludesSource(std::move(CIs), Reader));
 }
-Stmt *ChainedIncludesSource::GetExternalDeclStmt(uint64_t Offset) {
-  return getFinalReader().GetExternalDeclStmt(Offset);
-}
-CXXBaseSpecifier *
-ChainedIncludesSource::GetExternalCXXBaseSpecifiers(uint64_t Offset) {
-  return getFinalReader().GetExternalCXXBaseSpecifiers(Offset);
-}
-CXXCtorInitializer **
-ChainedIncludesSource::GetExternalCXXCtorInitializers(uint64_t Offset) {
-  return getFinalReader().GetExternalCXXCtorInitializers(Offset);
-}
-bool
-ChainedIncludesSource::FindExternalVisibleDeclsByName(const DeclContext *DC,
-                                                      DeclarationName Name) {
-  return getFinalReader().FindExternalVisibleDeclsByName(DC, Name);
-}
-void ChainedIncludesSource::FindExternalLexicalDecls(
-    const DeclContext *DC, llvm::function_ref<bool(Decl::Kind)> IsKindWeWant,
-    SmallVectorImpl<Decl *> &Result) {
-  return getFinalReader().FindExternalLexicalDecls(DC, IsKindWeWant, Result);
-}
-void ChainedIncludesSource::CompleteType(TagDecl *Tag) {
-  return getFinalReader().CompleteType(Tag);
-}
-void ChainedIncludesSource::CompleteType(ObjCInterfaceDecl *Class) {
-  return getFinalReader().CompleteType(Class);
-}
-void ChainedIncludesSource::StartedDeserializing() {
-  return getFinalReader().StartedDeserializing();
-}
-void ChainedIncludesSource::FinishedDeserializing() {
-  return getFinalReader().FinishedDeserializing();
-}
-void ChainedIncludesSource::StartTranslationUnit(ASTConsumer *Consumer) {
-  return getFinalReader().StartTranslationUnit(Consumer);
-}
-void ChainedIncludesSource::PrintStats() {
-  return getFinalReader().PrintStats();
-}
-void ChainedIncludesSource::getMemoryBufferSizes(MemoryBufferSizes &sizes)const{
-  for (unsigned i = 0, e = CIs.size(); i != e; ++i) {
-    if (const ExternalASTSource *eSrc =
-        CIs[i]->getASTContext().getExternalSource()) {
-      eSrc->getMemoryBufferSizes(sizes);
-    }
-  }
-
-  getFinalReader().getMemoryBufferSizes(sizes);
-}
-
-void ChainedIncludesSource::InitializeSema(Sema &S) {
-  return getFinalReader().InitializeSema(S);
-}
-void ChainedIncludesSource::ForgetSema() {
-  return getFinalReader().ForgetSema();
-}
-void ChainedIncludesSource::ReadMethodPool(Selector Sel) {
-  getFinalReader().ReadMethodPool(Sel);
-}
-bool ChainedIncludesSource::LookupUnqualified(LookupResult &R, Scope *S) {
-  return getFinalReader().LookupUnqualified(R, S);
-}
-
diff --git a/contrib/llvm/tools/clang/lib/Frontend/CompilerInstance.cpp b/contrib/llvm/tools/clang/lib/Frontend/CompilerInstance.cpp
index 3edcf5d654b9..8b00a3d00879 100644
--- a/contrib/llvm/tools/clang/lib/Frontend/CompilerInstance.cpp
+++ b/contrib/llvm/tools/clang/lib/Frontend/CompilerInstance.cpp
@@ -48,6 +48,7 @@
 #include <sys/stat.h>
 #include <system_error>
 #include <time.h>
+#include <utility>
 
 using namespace clang;
 
@@ -55,7 +56,8 @@ CompilerInstance::CompilerInstance(
     std::shared_ptr<PCHContainerOperations> PCHContainerOps,
     bool BuildingModule)
     : ModuleLoader(BuildingModule), Invocation(new CompilerInvocation()),
-      ModuleManager(nullptr), ThePCHContainerOperations(PCHContainerOps),
+      ModuleManager(nullptr),
+      ThePCHContainerOperations(std::move(PCHContainerOps)),
       BuildGlobalModuleIndex(false), HaveFullGlobalModuleIndex(false),
       ModuleBuildFailed(false) {}
 
@@ -125,7 +127,7 @@ IntrusiveRefCntPtr<ASTReader> CompilerInstance::getModuleManager() const {
   return ModuleManager;
 }
 void CompilerInstance::setModuleManager(IntrusiveRefCntPtr<ASTReader> Reader) {
-  ModuleManager = Reader;
+  ModuleManager = std::move(Reader);
 }
 
 std::shared_ptr<ModuleDependencyCollector>
@@ -135,7 +137,7 @@ CompilerInstance::getModuleDepCollector() const {
 
 void CompilerInstance::setModuleDepCollector(
     std::shared_ptr<ModuleDependencyCollector> Collector) {
-  ModuleDepCollector = Collector;
+  ModuleDepCollector = std::move(Collector);
 }
 
 // Diagnostics
@@ -349,30 +351,34 @@ void CompilerInstance::createPreprocessor(TranslationUnitKind TUKind) {
     AttachDependencyGraphGen(*PP, DepOpts.DOTOutputFile,
                              getHeaderSearchOpts().Sysroot);
 
-  for (auto &Listener : DependencyCollectors)
-    Listener->attachToPreprocessor(*PP);
-
   // If we don't have a collector, but we are collecting module dependencies,
   // then we're the top level compiler instance and need to create one.
-  if (!ModuleDepCollector && !DepOpts.ModuleDependencyOutputDir.empty())
+  if (!ModuleDepCollector && !DepOpts.ModuleDependencyOutputDir.empty()) {
     ModuleDepCollector = std::make_shared<ModuleDependencyCollector>(
         DepOpts.ModuleDependencyOutputDir);
+  }
+
+  if (ModuleDepCollector)
+    addDependencyCollector(ModuleDepCollector);
+
+  for (auto &Listener : DependencyCollectors)
+    Listener->attachToPreprocessor(*PP);
 
   // Handle generating header include information, if requested.
   if (DepOpts.ShowHeaderIncludes)
-    AttachHeaderIncludeGen(*PP, DepOpts.ExtraDeps);
+    AttachHeaderIncludeGen(*PP, DepOpts);
   if (!DepOpts.HeaderIncludeOutputFile.empty()) {
     StringRef OutputPath = DepOpts.HeaderIncludeOutputFile;
     if (OutputPath == "-")
       OutputPath = "";
-    AttachHeaderIncludeGen(*PP, DepOpts.ExtraDeps,
+    AttachHeaderIncludeGen(*PP, DepOpts,
                            /*ShowAllHeaders=*/true, OutputPath,
                            /*ShowDepth=*/false);
   }
 
   if (DepOpts.PrintShowIncludes) {
-    AttachHeaderIncludeGen(*PP, DepOpts.ExtraDeps,
-                           /*ShowAllHeaders=*/false, /*OutputPath=*/"",
+    AttachHeaderIncludeGen(*PP, DepOpts,
+                           /*ShowAllHeaders=*/true, /*OutputPath=*/"",
                            /*ShowDepth=*/true, /*MSStyle=*/true);
   }
 }
@@ -467,7 +473,7 @@ IntrusiveRefCntPtr<ASTReader> CompilerInstance::createPCHExternalASTSource(
 // Code Completion
 
 static bool EnableCodeCompletion(Preprocessor &PP,
-                                 const std::string &Filename,
+                                 StringRef Filename,
                                  unsigned Line,
                                  unsigned Column) {
   // Tell the source manager to chop off the given file at a specific
@@ -536,15 +542,11 @@ void CompilerInstance::createSema(TranslationUnitKind TUKind,
 // Output Files
 
 void CompilerInstance::addOutputFile(OutputFile &&OutFile) {
-  assert(OutFile.OS && "Attempt to add empty stream to output list!");
   OutputFiles.push_back(std::move(OutFile));
 }
 
 void CompilerInstance::clearOutputFiles(bool EraseFiles) {
   for (OutputFile &OF : OutputFiles) {
-    // Manually close the stream before we rename it.
-    OF.OS.reset();
-
     if (!OF.TempFilename.empty()) {
       if (EraseFiles) {
         llvm::sys::fs::remove(OF.TempFilename);
@@ -564,13 +566,12 @@ void CompilerInstance::clearOutputFiles(bool EraseFiles) {
       }
     } else if (!OF.Filename.empty() && EraseFiles)
       llvm::sys::fs::remove(OF.Filename);
-
   }
   OutputFiles.clear();
   NonSeekStream.reset();
 }
 
-raw_pwrite_stream *
+std::unique_ptr<raw_pwrite_stream>
 CompilerInstance::createDefaultOutputFile(bool Binary, StringRef InFile,
                                           StringRef Extension) {
   return createOutputFile(getFrontendOpts().OutputFile, Binary,
@@ -578,14 +579,11 @@ CompilerInstance::createDefaultOutputFile(bool Binary, StringRef InFile,
                           /*UseTemporary=*/true);
 }
 
-llvm::raw_null_ostream *CompilerInstance::createNullOutputFile() {
-  auto OS = llvm::make_unique<llvm::raw_null_ostream>();
-  llvm::raw_null_ostream *Ret = OS.get();
-  addOutputFile(OutputFile("", "", std::move(OS)));
-  return Ret;
+std::unique_ptr<raw_pwrite_stream> CompilerInstance::createNullOutputFile() {
+  return llvm::make_unique<llvm::raw_null_ostream>();
 }
 
-raw_pwrite_stream *
+std::unique_ptr<raw_pwrite_stream>
 CompilerInstance::createOutputFile(StringRef OutputPath, bool Binary,
                                    bool RemoveFileOnSignal, StringRef InFile,
                                    StringRef Extension, bool UseTemporary,
@@ -601,13 +599,12 @@ CompilerInstance::createOutputFile(StringRef OutputPath, bool Binary,
     return nullptr;
   }
 
-  raw_pwrite_stream *Ret = OS.get();
   // Add the output file -- but don't try to remove "-", since this means we are
   // using stdin.
-  addOutputFile(OutputFile((OutputPathName != "-") ? OutputPathName : "",
-                           TempPathName, std::move(OS)));
+  addOutputFile(
+      OutputFile((OutputPathName != "-") ? OutputPathName : "", TempPathName));
 
-  return Ret;
+  return OS;
 }
 
 std::unique_ptr<llvm::raw_pwrite_stream> CompilerInstance::createOutputFile(
@@ -712,16 +709,17 @@ std::unique_ptr<llvm::raw_pwrite_stream> CompilerInstance::createOutputFile(
 // Initialization Utilities
 
 bool CompilerInstance::InitializeSourceManager(const FrontendInputFile &Input){
-  return InitializeSourceManager(Input, getDiagnostics(),
-                                 getFileManager(), getSourceManager(), 
-                                 getFrontendOpts());
+  return InitializeSourceManager(
+      Input, getDiagnostics(), getFileManager(), getSourceManager(),
+      hasPreprocessor() ? &getPreprocessor().getHeaderSearchInfo() : nullptr,
+      getDependencyOutputOpts(), getFrontendOpts());
 }
 
-bool CompilerInstance::InitializeSourceManager(const FrontendInputFile &Input,
-                                               DiagnosticsEngine &Diags,
-                                               FileManager &FileMgr,
-                                               SourceManager &SourceMgr,
-                                               const FrontendOptions &Opts) {
+// static
+bool CompilerInstance::InitializeSourceManager(
+    const FrontendInputFile &Input, DiagnosticsEngine &Diags,
+    FileManager &FileMgr, SourceManager &SourceMgr, HeaderSearch *HS,
+    DependencyOutputOptions &DepOpts, const FrontendOptions &Opts) {
   SrcMgr::CharacteristicKind
     Kind = Input.isSystem() ? SrcMgr::C_System : SrcMgr::C_User;
 
@@ -737,7 +735,35 @@ bool CompilerInstance::InitializeSourceManager(const FrontendInputFile &Input,
 
   // Figure out where to get and map in the main file.
   if (InputFile != "-") {
-    const FileEntry *File = FileMgr.getFile(InputFile, /*OpenFile=*/true);
+    const FileEntry *File;
+    if (Opts.FindPchSource.empty()) {
+      File = FileMgr.getFile(InputFile, /*OpenFile=*/true);
+    } else {
+      // When building a pch file in clang-cl mode, the .h file is built as if
+      // it was included by a cc file.  Since the driver doesn't know about
+      // all include search directories, the frontend must search the input
+      // file through HeaderSearch here, as if it had been included by the
+      // cc file at Opts.FindPchSource.
+      const FileEntry *FindFile = FileMgr.getFile(Opts.FindPchSource);
+      if (!FindFile) {
+        Diags.Report(diag::err_fe_error_reading) << Opts.FindPchSource;
+        return false;
+      }
+      const DirectoryLookup *UnusedCurDir;
+      SmallVector<std::pair<const FileEntry *, const DirectoryEntry *>, 16>
+          Includers;
+      Includers.push_back(std::make_pair(FindFile, FindFile->getDir()));
+      File = HS->LookupFile(InputFile, SourceLocation(), /*isAngled=*/false,
+                            /*FromDir=*/nullptr,
+                            /*CurDir=*/UnusedCurDir, Includers,
+                            /*SearchPath=*/nullptr,
+                            /*RelativePath=*/nullptr,
+                            /*RequestingModule=*/nullptr,
+                            /*SuggestedModule=*/nullptr, /*SkipCache=*/true);
+      // Also add the header to /showIncludes output.
+      if (File)
+        DepOpts.ShowIncludesPretendHeader = File->getName();
+    }
     if (!File) {
       Diags.Report(diag::err_fe_error_reading) << InputFile;
       return false;
@@ -803,8 +829,9 @@ bool CompilerInstance::ExecuteAction(FrontendAction &Act) {
 
   // Create TargetInfo for the other side of CUDA compilation.
   if (getLangOpts().CUDA && !getFrontendOpts().AuxTriple.empty()) {
-    std::shared_ptr<TargetOptions> TO(new TargetOptions);
+    auto TO = std::make_shared<TargetOptions>();
     TO->Triple = getFrontendOpts().AuxTriple;
+    TO->HostTriple = getTarget().getTriple().str();
     setAuxTarget(TargetInfo::CreateTargetInfo(getDiagnostics(), TO));
   }
 
@@ -1050,7 +1077,7 @@ static bool compileAndLoadModule(CompilerInstance &ImportingInstance,
     switch (Locked) {
     case llvm::LockFileManager::LFS_Error:
       Diags.Report(ModuleNameLoc, diag::err_module_lock_failure)
-          << Module->Name;
+          << Module->Name << Locked.getErrorMessage();
       return false;
 
     case llvm::LockFileManager::LFS_Owned:
@@ -1290,8 +1317,6 @@ void CompilerInstance::createModuleManager() {
 
     if (TheDependencyFileGenerator)
       TheDependencyFileGenerator->AttachToASTReader(*ModuleManager);
-    if (ModuleDepCollector)
-      ModuleDepCollector->attachToASTReader(*ModuleManager);
     for (auto &Listener : DependencyCollectors)
       Listener->attachToASTReader(*ModuleManager);
   }
@@ -1386,8 +1411,7 @@ CompilerInstance::loadModule(SourceLocation ImportLoc,
   // when both the preprocessor and parser see the same import declaration.
   if (ImportLoc.isValid() && LastModuleImportLoc == ImportLoc) {
     // Make the named module visible.
-    if (LastModuleImportResult && ModuleName != getLangOpts().CurrentModule &&
-        ModuleName != getLangOpts().ImplementationOfModule)
+    if (LastModuleImportResult && ModuleName != getLangOpts().CurrentModule)
       ModuleManager->makeModuleVisible(LastModuleImportResult, Visibility,
                                        ImportLoc);
     return LastModuleImportResult;
@@ -1401,8 +1425,7 @@ CompilerInstance::loadModule(SourceLocation ImportLoc,
   if (Known != KnownModules.end()) {
     // Retrieve the cached top-level module.
     Module = Known->second;    
-  } else if (ModuleName == getLangOpts().CurrentModule ||
-             ModuleName == getLangOpts().ImplementationOfModule) {
+  } else if (ModuleName == getLangOpts().CurrentModule) {
     // This is the module we're building. 
     Module = PP->getHeaderSearchInfo().lookupModule(ModuleName);
     Known = KnownModules.insert(std::make_pair(Path[0].first, Module)).first;
@@ -1580,10 +1603,6 @@ CompilerInstance::loadModule(SourceLocation ImportLoc,
     }
   }
 
-  // Don't make the module visible if we are in the implementation.
-  if (ModuleName == getLangOpts().ImplementationOfModule)
-    return ModuleLoadResult(Module, false);
-  
   // Make the named module visible, if it's not already part of the module
   // we are parsing.
   if (ModuleName != getLangOpts().CurrentModule) {
diff --git a/contrib/llvm/tools/clang/lib/Frontend/CompilerInvocation.cpp b/contrib/llvm/tools/clang/lib/Frontend/CompilerInvocation.cpp
index 237a44704096..c6948ebfc4b4 100644
--- a/contrib/llvm/tools/clang/lib/Frontend/CompilerInvocation.cpp
+++ b/contrib/llvm/tools/clang/lib/Frontend/CompilerInvocation.cpp
@@ -33,6 +33,7 @@
 #include "llvm/Option/ArgList.h"
 #include "llvm/Option/OptTable.h"
 #include "llvm/Option/Option.h"
+#include "llvm/ProfileData/InstrProfReader.h"
 #include "llvm/Support/CodeGen.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/FileSystem.h"
@@ -40,6 +41,7 @@
 #include "llvm/Support/Path.h"
 #include "llvm/Support/Process.h"
 #include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/ScopedPrinter.h"
 #include <atomic>
 #include <memory>
 #include <sys/stat.h>
@@ -375,6 +377,46 @@ static void parseSanitizerKinds(StringRef FlagName,
   }
 }
 
+// Set the profile kind for fprofile-instrument.
+static void setPGOInstrumentor(CodeGenOptions &Opts, ArgList &Args,
+                               DiagnosticsEngine &Diags) {
+  Arg *A = Args.getLastArg(OPT_fprofile_instrument_EQ);
+  if (A == nullptr)
+    return;
+  StringRef S = A->getValue();
+  unsigned I = llvm::StringSwitch<unsigned>(S)
+                   .Case("none", CodeGenOptions::ProfileNone)
+                   .Case("clang", CodeGenOptions::ProfileClangInstr)
+                   .Case("llvm", CodeGenOptions::ProfileIRInstr)
+                   .Default(~0U);
+  if (I == ~0U) {
+    Diags.Report(diag::err_drv_invalid_pgo_instrumentor) << A->getAsString(Args)
+                                                         << S;
+    return;
+  }
+  CodeGenOptions::ProfileInstrKind Instrumentor =
+      static_cast<CodeGenOptions::ProfileInstrKind>(I);
+  Opts.setProfileInstr(Instrumentor);
+}
+
+// Set the profile kind using fprofile-instrument-use-path.
+static void setPGOUseInstrumentor(CodeGenOptions &Opts,
+                                  const Twine &ProfileName) {
+  auto ReaderOrErr = llvm::IndexedInstrProfReader::create(ProfileName);
+  // In error, return silently and let Clang PGOUse report the error message.
+  if (auto E = ReaderOrErr.takeError()) {
+    llvm::consumeError(std::move(E));
+    Opts.setProfileUse(CodeGenOptions::ProfileClangInstr);
+    return;
+  }
+  std::unique_ptr<llvm::IndexedInstrProfReader> PGOReader =
+    std::move(ReaderOrErr.get());
+  if (PGOReader->isIRLevelProfile())
+    Opts.setProfileUse(CodeGenOptions::ProfileIRInstr);
+  else
+    Opts.setProfileUse(CodeGenOptions::ProfileClangInstr);
+}
+
 static bool ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args, InputKind IK,
                              DiagnosticsEngine &Diags,
                              const TargetOptions &TargetOpts) {
@@ -400,8 +442,17 @@ static bool ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args, InputKind IK,
                                  : CodeGenOptions::OnlyAlwaysInlining);
   // -fno-inline-functions overrides OptimizationLevel > 1.
   Opts.NoInline = Args.hasArg(OPT_fno_inline);
-  Opts.setInlining(Args.hasArg(OPT_fno_inline_functions) ?
-                     CodeGenOptions::OnlyAlwaysInlining : Opts.getInlining());
+  if (Arg* InlineArg = Args.getLastArg(options::OPT_finline_functions,
+                                       options::OPT_finline_hint_functions,
+                                       options::OPT_fno_inline_functions)) {
+    const Option& InlineOpt = InlineArg->getOption();
+    if (InlineOpt.matches(options::OPT_finline_functions))
+      Opts.setInlining(CodeGenOptions::NormalInlining);
+    else if (InlineOpt.matches(options::OPT_finline_hint_functions))
+      Opts.setInlining(CodeGenOptions::OnlyHintInlining);
+    else
+      Opts.setInlining(CodeGenOptions::OnlyAlwaysInlining);
+  }
 
   if (Arg *A = Args.getLastArg(OPT_fveclib)) {
     StringRef Name = A->getValue();
@@ -416,34 +467,36 @@ static bool ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args, InputKind IK,
   if (Arg *A = Args.getLastArg(OPT_debug_info_kind_EQ)) {
     unsigned Val =
         llvm::StringSwitch<unsigned>(A->getValue())
-            .Case("line-tables-only", CodeGenOptions::DebugLineTablesOnly)
-            .Case("limited", CodeGenOptions::LimitedDebugInfo)
-            .Case("standalone", CodeGenOptions::FullDebugInfo)
+            .Case("line-tables-only", codegenoptions::DebugLineTablesOnly)
+            .Case("limited", codegenoptions::LimitedDebugInfo)
+            .Case("standalone", codegenoptions::FullDebugInfo)
             .Default(~0U);
     if (Val == ~0U)
       Diags.Report(diag::err_drv_invalid_value) << A->getAsString(Args)
                                                 << A->getValue();
     else
-      Opts.setDebugInfo(static_cast<CodeGenOptions::DebugInfoKind>(Val));
+      Opts.setDebugInfo(static_cast<codegenoptions::DebugInfoKind>(Val));
   }
   if (Arg *A = Args.getLastArg(OPT_debugger_tuning_EQ)) {
     unsigned Val = llvm::StringSwitch<unsigned>(A->getValue())
-                       .Case("gdb", CodeGenOptions::DebuggerKindGDB)
-                       .Case("lldb", CodeGenOptions::DebuggerKindLLDB)
-                       .Case("sce", CodeGenOptions::DebuggerKindSCE)
+                       .Case("gdb", unsigned(llvm::DebuggerKind::GDB))
+                       .Case("lldb", unsigned(llvm::DebuggerKind::LLDB))
+                       .Case("sce", unsigned(llvm::DebuggerKind::SCE))
                        .Default(~0U);
     if (Val == ~0U)
       Diags.Report(diag::err_drv_invalid_value) << A->getAsString(Args)
                                                 << A->getValue();
     else
-      Opts.setDebuggerTuning(static_cast<CodeGenOptions::DebuggerKind>(Val));
+      Opts.setDebuggerTuning(static_cast<llvm::DebuggerKind>(Val));
   }
   Opts.DwarfVersion = getLastArgIntValue(Args, OPT_dwarf_version_EQ, 0, Diags);
   Opts.DebugColumnInfo = Args.hasArg(OPT_dwarf_column_info);
   Opts.EmitCodeView = Args.hasArg(OPT_gcodeview);
+  Opts.WholeProgramVTables = Args.hasArg(OPT_fwhole_program_vtables);
+  Opts.LTOVisibilityPublicStd = Args.hasArg(OPT_flto_visibility_public_std);
   Opts.SplitDwarfFile = Args.getLastArgValue(OPT_split_dwarf_file);
   Opts.DebugTypeExtRefs = Args.hasArg(OPT_dwarf_ext_refs);
-  Opts.DebugExplicitImport = Triple.isPS4CPU(); 
+  Opts.DebugExplicitImport = Triple.isPS4CPU();
 
   for (const auto &Arg : Args.getAllArgValues(OPT_fdebug_prefix_map_EQ))
     Opts.DebugPrefixMap.insert(StringRef(Arg).split('='));
@@ -471,20 +524,26 @@ static bool ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args, InputKind IK,
     getAllNoBuiltinFuncValues(Args, Opts.NoBuiltinFuncs);
   Opts.UnrollLoops =
       Args.hasFlag(OPT_funroll_loops, OPT_fno_unroll_loops,
-                   (Opts.OptimizationLevel > 1 && !Opts.OptimizeSize));
+                   (Opts.OptimizationLevel > 1));
   Opts.RerollLoops = Args.hasArg(OPT_freroll_loops);
 
   Opts.DisableIntegratedAS = Args.hasArg(OPT_fno_integrated_as);
   Opts.Autolink = !Args.hasArg(OPT_fno_autolink);
   Opts.SampleProfileFile = Args.getLastArgValue(OPT_fprofile_sample_use_EQ);
-  Opts.ProfileInstrGenerate = Args.hasArg(OPT_fprofile_instr_generate) ||
-      Args.hasArg(OPT_fprofile_instr_generate_EQ);
-  Opts.InstrProfileOutput = Args.getLastArgValue(OPT_fprofile_instr_generate_EQ);
-  Opts.InstrProfileInput = Args.getLastArgValue(OPT_fprofile_instr_use_EQ);
+
+  setPGOInstrumentor(Opts, Args, Diags);
+  Opts.InstrProfileOutput =
+      Args.getLastArgValue(OPT_fprofile_instrument_path_EQ);
+  Opts.ProfileInstrumentUsePath =
+      Args.getLastArgValue(OPT_fprofile_instrument_use_path_EQ);
+  if (!Opts.ProfileInstrumentUsePath.empty())
+    setPGOUseInstrumentor(Opts, Opts.ProfileInstrumentUsePath);
+
   Opts.CoverageMapping =
       Args.hasFlag(OPT_fcoverage_mapping, OPT_fno_coverage_mapping, false);
   Opts.DumpCoverageMapping = Args.hasArg(OPT_dump_coverage_mapping);
   Opts.AsmVerbose = Args.hasArg(OPT_masm_verbose);
+  Opts.AssumeSaneOperatorNew = !Args.hasArg(OPT_fno_assume_sane_operator_new);
   Opts.ObjCAutoRefCountExceptions = Args.hasArg(OPT_fobjc_arc_exceptions);
   Opts.CXAAtExit = !Args.hasArg(OPT_fno_use_cxa_atexit);
   Opts.CXXCtorDtorAliases = Args.hasArg(OPT_mconstructor_aliases);
@@ -493,22 +552,9 @@ static bool ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args, InputKind IK,
   Opts.DisableFPElim =
       (Args.hasArg(OPT_mdisable_fp_elim) || Args.hasArg(OPT_pg));
   Opts.DisableFree = Args.hasArg(OPT_disable_free);
+  Opts.DiscardValueNames = Args.hasArg(OPT_discard_value_names);
   Opts.DisableTailCalls = Args.hasArg(OPT_mdisable_tail_calls);
   Opts.FloatABI = Args.getLastArgValue(OPT_mfloat_abi);
-  if (Arg *A = Args.getLastArg(OPT_meabi)) {
-    StringRef Value = A->getValue();
-    llvm::EABI EABIVersion = llvm::StringSwitch<llvm::EABI>(Value)
-                                 .Case("default", llvm::EABI::Default)
-                                 .Case("4", llvm::EABI::EABI4)
-                                 .Case("5", llvm::EABI::EABI5)
-                                 .Case("gnu", llvm::EABI::GNU)
-                                 .Default(llvm::EABI::Unknown);
-    if (EABIVersion == llvm::EABI::Unknown)
-      Diags.Report(diag::err_drv_invalid_value) << A->getAsString(Args)
-                                                << Value;
-    else
-      Opts.EABIVersion = Value;
-  }
   Opts.LessPreciseFPMAD = Args.hasArg(OPT_cl_mad_enable);
   Opts.LimitFloatPrecision = Args.getLastArgValue(OPT_mlimit_float_precision);
   Opts.NoInfsFPMath = (Args.hasArg(OPT_menable_no_infinities) ||
@@ -518,7 +564,8 @@ static bool ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args, InputKind IK,
                        Args.hasArg(OPT_cl_unsafe_math_optimizations) ||
                        Args.hasArg(OPT_cl_finite_math_only) ||
                        Args.hasArg(OPT_cl_fast_relaxed_math));
-  Opts.NoSignedZeros = Args.hasArg(OPT_fno_signed_zeros);
+  Opts.NoSignedZeros = (Args.hasArg(OPT_fno_signed_zeros) ||
+                        Args.hasArg(OPT_cl_no_signed_zeros));
   Opts.ReciprocalMath = Args.hasArg(OPT_freciprocal_math);
   Opts.NoZeroInitializedInBSS = Args.hasArg(OPT_mno_zero_initialized_in_bss);
   Opts.BackendOptions = Args.getAllArgValues(OPT_backend_option);
@@ -557,9 +604,11 @@ static bool ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args, InputKind IK,
 
   Opts.MergeFunctions = Args.hasArg(OPT_fmerge_functions);
 
+  Opts.NoUseJumpTables = Args.hasArg(OPT_fno_jump_tables);
+
   Opts.PrepareForLTO = Args.hasArg(OPT_flto, OPT_flto_EQ);
   const Arg *A = Args.getLastArg(OPT_flto, OPT_flto_EQ);
-  Opts.EmitFunctionSummary = A && A->containsValue("thin");
+  Opts.EmitSummaryIndex = A && A->containsValue("thin");
   if (Arg *A = Args.getLastArg(OPT_fthinlto_index_EQ)) {
     if (IK != IK_LLVM_IR)
       Diags.Report(diag::err_drv_argument_only_allowed_with)
@@ -597,11 +646,54 @@ static bool ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args, InputKind IK,
       }
     }
   }
+	// Handle -fembed-bitcode option.
+  if (Arg *A = Args.getLastArg(OPT_fembed_bitcode_EQ)) {
+    StringRef Name = A->getValue();
+    unsigned Model = llvm::StringSwitch<unsigned>(Name)
+        .Case("off", CodeGenOptions::Embed_Off)
+        .Case("all", CodeGenOptions::Embed_All)
+        .Case("bitcode", CodeGenOptions::Embed_Bitcode)
+        .Case("marker", CodeGenOptions::Embed_Marker)
+        .Default(~0U);
+    if (Model == ~0U) {
+      Diags.Report(diag::err_drv_invalid_value) << A->getAsString(Args) << Name;
+      Success = false;
+    } else
+      Opts.setEmbedBitcode(
+          static_cast<CodeGenOptions::EmbedBitcodeKind>(Model));
+  }
+  // FIXME: For backend options that are not yet recorded as function
+  // attributes in the IR, keep track of them so we can embed them in a
+  // separate data section and use them when building the bitcode.
+  if (Opts.getEmbedBitcode() == CodeGenOptions::Embed_All) {
+    for (const auto &A : Args) {
+      // Do not encode output and input.
+      if (A->getOption().getID() == options::OPT_o ||
+          A->getOption().getID() == options::OPT_INPUT ||
+          A->getOption().getID() == options::OPT_x ||
+          A->getOption().getID() == options::OPT_fembed_bitcode ||
+          (A->getOption().getGroup().isValid() &&
+           A->getOption().getGroup().getID() == options::OPT_W_Group))
+        continue;
+      ArgStringList ASL;
+      A->render(Args, ASL);
+      for (const auto &arg : ASL) {
+        StringRef ArgStr(arg);
+        Opts.CmdArgs.insert(Opts.CmdArgs.end(), ArgStr.begin(), ArgStr.end());
+        // using \00 to seperate each commandline options.
+        Opts.CmdArgs.push_back('\0');
+      }
+    }
+  }
 
   Opts.InstrumentFunctions = Args.hasArg(OPT_finstrument_functions);
+  Opts.XRayInstrumentFunctions = Args.hasArg(OPT_fxray_instrument);
+  Opts.XRayInstructionThreshold =
+      getLastArgIntValue(Args, OPT_fxray_instruction_threshold_, 200, Diags);
   Opts.InstrumentForProfiling = Args.hasArg(OPT_pg);
   Opts.EmitOpenCLArgMetadata = Args.hasArg(OPT_cl_kernel_arg_info);
   Opts.CompressDebugSections = Args.hasArg(OPT_compress_debug_sections);
+  Opts.RelaxELFRelocations = Args.hasArg(OPT_mrelax_relocations);
   Opts.DebugCompilationDir = Args.getLastArgValue(OPT_fdebug_compilation_dir);
   for (auto A : Args.filtered(OPT_mlink_bitcode_file, OPT_mlink_cuda_bitcode)) {
     unsigned LinkFlags = llvm::Linker::Flags::None;
@@ -618,11 +710,15 @@ static bool ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args, InputKind IK,
   Opts.SanitizeCoverageTraceCmp = Args.hasArg(OPT_fsanitize_coverage_trace_cmp);
   Opts.SanitizeCoverage8bitCounters =
       Args.hasArg(OPT_fsanitize_coverage_8bit_counters);
+  Opts.SanitizeCoverageTracePC = Args.hasArg(OPT_fsanitize_coverage_trace_pc);
   Opts.SanitizeMemoryTrackOrigins =
       getLastArgIntValue(Args, OPT_fsanitize_memory_track_origins_EQ, 0, Diags);
   Opts.SanitizeMemoryUseAfterDtor =
       Args.hasArg(OPT_fsanitize_memory_use_after_dtor);
   Opts.SanitizeCfiCrossDso = Args.hasArg(OPT_fsanitize_cfi_cross_dso);
+  Opts.SanitizeStats = Args.hasArg(OPT_fsanitize_stats);
+  Opts.SanitizeAddressUseAfterScope =
+      Args.hasArg(OPT_fsanitize_address_use_after_scope);
   Opts.SSPBufferSize =
       getLastArgIntValue(Args, OPT_stack_protector_buffer_size, 8, Diags);
   Opts.StackRealignment = Args.hasArg(OPT_mstackrealign);
@@ -697,6 +793,7 @@ static bool ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args, InputKind IK,
   }
 
   Opts.DependentLibraries = Args.getAllArgValues(OPT_dependent_lib);
+  Opts.LinkerOptions = Args.getAllArgValues(OPT_linker_option);
   bool NeedLocTracking = false;
 
   if (Arg *A = Args.getLastArg(OPT_Rpass_EQ)) {
@@ -725,8 +822,8 @@ static bool ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args, InputKind IK,
 
   // If the user requested a flag that requires source locations available in
   // the backend, make sure that the backend tracks source location information.
-  if (NeedLocTracking && Opts.getDebugInfo() == CodeGenOptions::NoDebugInfo)
-    Opts.setDebugInfo(CodeGenOptions::LocTrackingOnly);
+  if (NeedLocTracking && Opts.getDebugInfo() == codegenoptions::NoDebugInfo)
+    Opts.setDebugInfo(codegenoptions::LocTrackingOnly);
 
   Opts.RewriteMapFiles = Args.getAllArgValues(OPT_frewrite_map_file);
 
@@ -742,6 +839,11 @@ static bool ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args, InputKind IK,
   Opts.CudaGpuBinaryFileNames =
       Args.getAllArgValues(OPT_fcuda_include_gpubinary);
 
+  Opts.Backchain = Args.hasArg(OPT_mbackchain);
+
+  Opts.EmitCheckPathComponentsToStrip = getLastArgIntValue(
+      Args, OPT_fsanitize_undefined_strip_path_components_EQ, 0, Diags);
+
   return Success;
 }
 
@@ -771,8 +873,51 @@ static void ParseDependencyOutputArgs(DependencyOutputOptions &Opts,
                         ModuleFiles.end());
 }
 
+static bool parseShowColorsArgs(const ArgList &Args, bool DefaultColor) {
+  // Color diagnostics default to auto ("on" if terminal supports) in the driver
+  // but default to off in cc1, needing an explicit OPT_fdiagnostics_color.
+  // Support both clang's -f[no-]color-diagnostics and gcc's
+  // -f[no-]diagnostics-colors[=never|always|auto].
+  enum {
+    Colors_On,
+    Colors_Off,
+    Colors_Auto
+  } ShowColors = DefaultColor ? Colors_Auto : Colors_Off;
+  for (Arg *A : Args) {
+    const Option &O = A->getOption();
+    if (!O.matches(options::OPT_fcolor_diagnostics) &&
+        !O.matches(options::OPT_fdiagnostics_color) &&
+        !O.matches(options::OPT_fno_color_diagnostics) &&
+        !O.matches(options::OPT_fno_diagnostics_color) &&
+        !O.matches(options::OPT_fdiagnostics_color_EQ))
+      continue;
+
+    if (O.matches(options::OPT_fcolor_diagnostics) ||
+        O.matches(options::OPT_fdiagnostics_color)) {
+      ShowColors = Colors_On;
+    } else if (O.matches(options::OPT_fno_color_diagnostics) ||
+               O.matches(options::OPT_fno_diagnostics_color)) {
+      ShowColors = Colors_Off;
+    } else {
+      assert(O.matches(options::OPT_fdiagnostics_color_EQ));
+      StringRef Value(A->getValue());
+      if (Value == "always")
+        ShowColors = Colors_On;
+      else if (Value == "never")
+        ShowColors = Colors_Off;
+      else if (Value == "auto")
+        ShowColors = Colors_Auto;
+    }
+  }
+  if (ShowColors == Colors_On ||
+      (ShowColors == Colors_Auto && llvm::sys::Process::StandardErrHasColors()))
+    return true;
+  return false;
+}
+
 bool clang::ParseDiagnosticArgs(DiagnosticOptions &Opts, ArgList &Args,
-                                DiagnosticsEngine *Diags) {
+                                DiagnosticsEngine *Diags,
+                                bool DefaultDiagColor) {
   using namespace options;
   bool Success = true;
 
@@ -785,7 +930,7 @@ bool clang::ParseDiagnosticArgs(DiagnosticOptions &Opts, ArgList &Args,
   Opts.Pedantic = Args.hasArg(OPT_pedantic);
   Opts.PedanticErrors = Args.hasArg(OPT_pedantic_errors);
   Opts.ShowCarets = !Args.hasArg(OPT_fno_caret_diagnostics);
-  Opts.ShowColors = Args.hasArg(OPT_fcolor_diagnostics);
+  Opts.ShowColors = parseShowColorsArgs(Args, DefaultDiagColor);
   Opts.ShowColumn = Args.hasFlag(OPT_fshow_column,
                                  OPT_fno_show_column,
                                  /*Default=*/true);
@@ -999,18 +1144,10 @@ static InputKind ParseFrontendArgs(FrontendOptions &Opts, ArgList &Args,
     Opts.Plugins.emplace_back(A->getValue(0));
     Opts.ProgramAction = frontend::PluginAction;
     Opts.ActionName = A->getValue();
-
-    for (const Arg *AA : Args.filtered(OPT_plugin_arg))
-      if (AA->getValue(0) == Opts.ActionName)
-        Opts.PluginArgs.emplace_back(AA->getValue(1));
   }
-
   Opts.AddPluginActions = Args.getAllArgValues(OPT_add_plugin);
-  Opts.AddPluginArgs.resize(Opts.AddPluginActions.size());
-  for (int i = 0, e = Opts.AddPluginActions.size(); i != e; ++i)
-    for (const Arg *A : Args.filtered(OPT_plugin_arg))
-      if (A->getValue(0) == Opts.AddPluginActions[i])
-        Opts.AddPluginArgs[i].emplace_back(A->getValue(1));
+  for (const Arg *AA : Args.filtered(OPT_plugin_arg))
+    Opts.PluginArgs[AA->getValue(0)].emplace_back(AA->getValue(1));
 
   for (const std::string &Arg :
          Args.getAllArgValues(OPT_ftest_module_file_extension_EQ)) {
@@ -1063,6 +1200,7 @@ static InputKind ParseFrontendArgs(FrontendOptions &Opts, ArgList &Args,
   Opts.ModuleFiles = Args.getAllArgValues(OPT_fmodule_file);
   Opts.ModulesEmbedFiles = Args.getAllArgValues(OPT_fmodules_embed_file_EQ);
   Opts.ModulesEmbedAllFiles = Args.hasArg(OPT_fmodules_embed_all_files);
+  Opts.IncludeTimestamps = !Args.hasArg(OPT_fno_pch_timestamp);
 
   Opts.CodeCompleteOpts.IncludeMacros
     = Args.hasArg(OPT_code_completion_macros);
@@ -1077,6 +1215,7 @@ static InputKind ParseFrontendArgs(FrontendOptions &Opts, ArgList &Args,
     = Args.getLastArgValue(OPT_foverride_record_layout_EQ);
   Opts.AuxTriple =
       llvm::Triple::normalize(Args.getLastArgValue(OPT_aux_triple));
+  Opts.FindPchSource = Args.getLastArgValue(OPT_find_pch_source_EQ);
 
   if (const Arg *A = Args.getLastArg(OPT_arcmt_check,
                                      OPT_arcmt_modify,
@@ -1164,6 +1303,7 @@ static InputKind ParseFrontendArgs(FrontendOptions &Opts, ArgList &Args,
       .Case("objective-c++-header", IK_ObjCXX)
       .Cases("ast", "pcm", IK_AST)
       .Case("ir", IK_LLVM_IR)
+      .Case("renderscript", IK_RenderScript)
       .Default(IK_None);
     if (DashX == IK_None)
       Diags.Report(diag::err_drv_invalid_value)
@@ -1243,6 +1383,8 @@ static void ParseHeaderSearchArgs(HeaderSearchOptions &Opts, ArgList &Args) {
 
   // Add -I..., -F..., and -index-header-map options in order.
   bool IsIndexHeaderMap = false;
+  bool IsSysrootSpecified =
+      Args.hasArg(OPT__sysroot_EQ) || Args.hasArg(OPT_isysroot);
   for (const Arg *A : Args.filtered(OPT_I, OPT_F, OPT_index_header_map)) {
     if (A->getOption().matches(OPT_index_header_map)) {
       // -index-header-map applies to the next -I or -F.
@@ -1253,8 +1395,18 @@ static void ParseHeaderSearchArgs(HeaderSearchOptions &Opts, ArgList &Args) {
     frontend::IncludeDirGroup Group =
         IsIndexHeaderMap ? frontend::IndexHeaderMap : frontend::Angled;
 
-    Opts.AddPath(A->getValue(), Group,
-                 /*IsFramework=*/A->getOption().matches(OPT_F), true);
+    bool IsFramework = A->getOption().matches(OPT_F);
+    std::string Path = A->getValue();
+
+    if (IsSysrootSpecified && !IsFramework && A->getValue()[0] == '=') {
+      SmallString<32> Buffer;
+      llvm::sys::path::append(Buffer, Opts.Sysroot,
+                              llvm::StringRef(A->getValue()).substr(1));
+      Path = Buffer.str();
+    }
+
+    Opts.AddPath(Path.c_str(), Group, IsFramework,
+                 /*IgnoreSysroot*/ true);
     IsIndexHeaderMap = false;
   }
 
@@ -1309,7 +1461,16 @@ static void ParseHeaderSearchArgs(HeaderSearchOptions &Opts, ArgList &Args) {
     Opts.AddVFSOverlayFile(A->getValue());
 }
 
+bool isOpenCL(LangStandard::Kind LangStd) {
+  return LangStd == LangStandard::lang_opencl ||
+         LangStd == LangStandard::lang_opencl11 ||
+         LangStd == LangStandard::lang_opencl12 ||
+         LangStd == LangStandard::lang_opencl20;
+}
+
 void CompilerInvocation::setLangDefaults(LangOptions &Opts, InputKind IK,
+                                         const llvm::Triple &T,
+                                         PreprocessorOptions &PPOpts,
                                          LangStandard::Kind LangStd) {
   // Set some properties which depend solely on the input kind; it would be nice
   // to move these to the language standard, and have the driver resolve the
@@ -1342,7 +1503,11 @@ void CompilerInvocation::setLangDefaults(LangOptions &Opts, InputKind IK,
     case IK_PreprocessedC:
     case IK_ObjC:
     case IK_PreprocessedObjC:
-      LangStd = LangStandard::lang_gnu11;
+      // The PS4 uses C99 as the default C standard.
+      if (T.isPS4())
+        LangStd = LangStandard::lang_gnu99;
+      else
+        LangStd = LangStandard::lang_gnu11;
       break;
     case IK_CXX:
     case IK_PreprocessedCXX:
@@ -1350,6 +1515,9 @@ void CompilerInvocation::setLangDefaults(LangOptions &Opts, InputKind IK,
     case IK_PreprocessedObjCXX:
       LangStd = LangStandard::lang_gnucxx98;
       break;
+    case IK_RenderScript:
+      LangStd = LangStandard::lang_c99;
+      break;
     }
   }
 
@@ -1368,7 +1536,7 @@ void CompilerInvocation::setLangDefaults(LangOptions &Opts, InputKind IK,
   Opts.ImplicitInt = Std.hasImplicitInt();
 
   // Set OpenCL Version.
-  Opts.OpenCL = LangStd == LangStandard::lang_opencl || IK == IK_OpenCL;
+  Opts.OpenCL = isOpenCL(LangStd) || IK == IK_OpenCL;
   if (LangStd == LangStandard::lang_opencl)
     Opts.OpenCLVersion = 100;
   else if (LangStd == LangStandard::lang_opencl11)
@@ -1386,11 +1554,22 @@ void CompilerInvocation::setLangDefaults(LangOptions &Opts, InputKind IK,
     Opts.LaxVectorConversions = 0;
     Opts.DefaultFPContract = 1;
     Opts.NativeHalfType = 1;
+    Opts.NativeHalfArgsAndReturns = 1;
+    // Include default header file for OpenCL.
+    if (Opts.IncludeDefaultHeader) {
+      PPOpts.Includes.push_back("opencl-c.h");
+    }
   }
 
   Opts.CUDA = IK == IK_CUDA || IK == IK_PreprocessedCuda ||
               LangStd == LangStandard::lang_cuda;
 
+  Opts.RenderScript = IK == IK_RenderScript;
+  if (Opts.RenderScript) {
+    Opts.NativeHalfType = 1;
+    Opts.NativeHalfArgsAndReturns = 1;
+  }
+
   // OpenCL and C++ both have bool, true, false keywords.
   Opts.Bool = Opts.OpenCL || Opts.CPlusPlus;
 
@@ -1425,6 +1604,8 @@ static Visibility parseVisibility(Arg *arg, ArgList &args,
 }
 
 static void ParseLangArgs(LangOptions &Opts, ArgList &Args, InputKind IK,
+                          const TargetOptions &TargetOpts,
+                          PreprocessorOptions &PPOpts,
                           DiagnosticsEngine &Diags) {
   // FIXME: Cleanup per-file based stuff.
   LangStandard::Kind LangStd = LangStandard::lang_unspecified;
@@ -1432,6 +1613,8 @@ static void ParseLangArgs(LangOptions &Opts, ArgList &Args, InputKind IK,
     LangStd = llvm::StringSwitch<LangStandard::Kind>(A->getValue())
 #define LANGSTANDARD(id, name, desc, features) \
       .Case(name, LangStandard::lang_##id)
+#define LANGSTANDARD_ALIAS(id, alias) \
+      .Case(alias, LangStandard::lang_##id)
 #include "clang/Frontend/LangStandards.def"
       .Default(LangStandard::lang_unspecified);
     if (LangStd == LangStandard::lang_unspecified)
@@ -1459,7 +1642,7 @@ static void ParseLangArgs(LangOptions &Opts, ArgList &Args, InputKind IK,
             << A->getAsString(Args) << "C++/ObjC++";
         break;
       case IK_OpenCL:
-        if (!Std.isC99())
+        if (!isOpenCL(LangStd))
           Diags.Report(diag::err_drv_argument_not_allowed_with)
             << A->getAsString(Args) << "OpenCL";
         break;
@@ -1480,10 +1663,10 @@ static void ParseLangArgs(LangOptions &Opts, ArgList &Args, InputKind IK,
   if (const Arg *A = Args.getLastArg(OPT_cl_std_EQ)) {
     LangStandard::Kind OpenCLLangStd
     = llvm::StringSwitch<LangStandard::Kind>(A->getValue())
-    .Case("CL", LangStandard::lang_opencl)
-    .Case("CL1.1", LangStandard::lang_opencl11)
-    .Case("CL1.2", LangStandard::lang_opencl12)
-    .Case("CL2.0", LangStandard::lang_opencl20)
+    .Cases("cl", "CL", LangStandard::lang_opencl)
+    .Cases("cl1.1", "CL1.1", LangStandard::lang_opencl11)
+    .Cases("cl1.2", "CL1.2", LangStandard::lang_opencl12)
+    .Cases("cl2.0", "CL2.0", LangStandard::lang_opencl20)
     .Default(LangStandard::lang_unspecified);
 
     if (OpenCLLangStd == LangStandard::lang_unspecified) {
@@ -1494,7 +1677,22 @@ static void ParseLangArgs(LangOptions &Opts, ArgList &Args, InputKind IK,
       LangStd = OpenCLLangStd;
   }
 
-  CompilerInvocation::setLangDefaults(Opts, IK, LangStd);
+  Opts.IncludeDefaultHeader = Args.hasArg(OPT_finclude_default_header);
+
+  llvm::Triple T(TargetOpts.Triple);
+  CompilerInvocation::setLangDefaults(Opts, IK, T, PPOpts, LangStd);
+
+  // -cl-strict-aliasing needs to emit diagnostic in the case where CL > 1.0.
+  // This option should be deprecated for CL > 1.0 because
+  // this option was added for compatibility with OpenCL 1.0.
+  if (Args.getLastArg(OPT_cl_strict_aliasing)
+       && Opts.OpenCLVersion > 100) {
+    std::string VerSpec = llvm::to_string(Opts.OpenCLVersion / 100) +
+                          std::string(".") +
+                          llvm::to_string((Opts.OpenCLVersion % 100) / 10);
+    Diags.Report(diag::warn_option_invalid_ocl_version)
+      << VerSpec << Args.getLastArg(OPT_cl_strict_aliasing)->getAsString(Args);
+  }
 
   // We abuse '-f[no-]gnu-keywords' to force overriding all GNU-extension
   // keywords. This behavior is provided by GCC's poorly named '-fasm' flag,
@@ -1510,14 +1708,17 @@ static void ParseLangArgs(LangOptions &Opts, ArgList &Args, InputKind IK,
   if (Args.hasArg(OPT_fcuda_is_device))
     Opts.CUDAIsDevice = 1;
 
-  if (Args.hasArg(OPT_fcuda_allow_host_calls_from_host_device))
-    Opts.CUDAAllowHostCallsFromHostDevice = 1;
+  if (Args.hasArg(OPT_fcuda_allow_variadic_functions))
+    Opts.CUDAAllowVariadicFunctions = 1;
+
+  if (Args.hasArg(OPT_fno_cuda_host_device_constexpr))
+    Opts.CUDAHostDeviceConstexpr = 0;
 
-  if (Args.hasArg(OPT_fcuda_disable_target_call_checks))
-    Opts.CUDADisableTargetCallChecks = 1;
+  if (Opts.CUDAIsDevice && Args.hasArg(OPT_fcuda_flush_denormals_to_zero))
+    Opts.CUDADeviceFlushDenormalsToZero = 1;
 
-  if (Args.hasArg(OPT_fcuda_target_overloads))
-    Opts.CUDATargetOverloads = 1;
+  if (Opts.CUDAIsDevice && Args.hasArg(OPT_fcuda_approx_transcendentals))
+    Opts.CUDADeviceApproxTranscendentals = 1;
 
   if (Opts.ObjC1) {
     if (Arg *arg = Args.getLastArg(OPT_fobjc_runtime_EQ)) {
@@ -1662,11 +1863,13 @@ static void ParseLangArgs(LangOptions &Opts, ArgList &Args, InputKind IK,
   Opts.ObjCExceptions = Args.hasArg(OPT_fobjc_exceptions);
   Opts.CXXExceptions = Args.hasArg(OPT_fcxx_exceptions);
   Opts.SjLjExceptions = Args.hasArg(OPT_fsjlj_exceptions);
+  Opts.ExternCNoUnwind = Args.hasArg(OPT_fexternc_nounwind);
   Opts.TraditionalCPP = Args.hasArg(OPT_traditional_cpp);
 
-  Opts.RTTI = !Args.hasArg(OPT_fno_rtti);
+  Opts.RTTI = Opts.CPlusPlus && !Args.hasArg(OPT_fno_rtti);
   Opts.RTTIData = Opts.RTTI && !Args.hasArg(OPT_fno_rtti_data);
-  Opts.Blocks = Args.hasArg(OPT_fblocks);
+  Opts.Blocks = Args.hasArg(OPT_fblocks) || (Opts.OpenCL
+    && Opts.OpenCLVersion >= 200);
   Opts.BlocksRuntimeOptional = Args.hasArg(OPT_fblocks_runtime_optional);
   Opts.Coroutines = Args.hasArg(OPT_fcoroutines);
   Opts.Modules = Args.hasArg(OPT_fmodules);
@@ -1689,7 +1892,6 @@ static void ParseLangArgs(LangOptions &Opts, ArgList &Args, InputKind IK,
   if (!Opts.NoBuiltin)
     getAllNoBuiltinFuncValues(Args, Opts.NoBuiltinFuncs);
   Opts.NoMathBuiltin = Args.hasArg(OPT_fno_math_builtin);
-  Opts.AssumeSaneOperatorNew = !Args.hasArg(OPT_fno_assume_sane_operator_new);
   Opts.SizedDeallocation = Args.hasArg(OPT_fsized_deallocation);
   Opts.ConceptsTS = Args.hasArg(OPT_fconcepts_ts);
   Opts.HeinousExtensions = Args.hasArg(OPT_fheinous_gnu_extensions);
@@ -1718,8 +1920,9 @@ static void ParseLangArgs(LangOptions &Opts, ArgList &Args, InputKind IK,
   Opts.EmitAllDecls = Args.hasArg(OPT_femit_all_decls);
   Opts.PackStruct = getLastArgIntValue(Args, OPT_fpack_struct_EQ, 0, Diags);
   Opts.MaxTypeAlign = getLastArgIntValue(Args, OPT_fmax_type_align_EQ, 0, Diags);
+  Opts.AlignDouble = Args.hasArg(OPT_malign_double);
   Opts.PICLevel = getLastArgIntValue(Args, OPT_pic_level, 0, Diags);
-  Opts.PIELevel = getLastArgIntValue(Args, OPT_pie_level, 0, Diags);
+  Opts.PIE = Args.hasArg(OPT_pic_is_pie);
   Opts.Static = Args.hasArg(OPT_static_define);
   Opts.DumpRecordLayoutsSimple = Args.hasArg(OPT_fdump_record_layouts_simple);
   Opts.DumpRecordLayouts = Opts.DumpRecordLayoutsSimple
@@ -1729,7 +1932,6 @@ static void ParseLangArgs(LangOptions &Opts, ArgList &Args, InputKind IK,
   Opts.NoBitFieldTypeAlign = Args.hasArg(OPT_fno_bitfield_type_align);
   Opts.SinglePrecisionConstants = Args.hasArg(OPT_cl_single_precision_constant);
   Opts.FastRelaxedMath = Args.hasArg(OPT_cl_fast_relaxed_math);
-  Opts.MRTD = Args.hasArg(OPT_mrtd);
   Opts.HexagonQdsp6Compat = Args.hasArg(OPT_mqdsp6_compat);
   Opts.FakeAddressSpaceMap = Args.hasArg(OPT_ffake_address_space_map);
   Opts.ParseUnknownAnytype = Args.hasArg(OPT_funknown_anytype);
@@ -1737,14 +1939,16 @@ static void ParseLangArgs(LangOptions &Opts, ArgList &Args, InputKind IK,
   Opts.DebuggerCastResultToId = Args.hasArg(OPT_fdebugger_cast_result_to_id);
   Opts.DebuggerObjCLiteral = Args.hasArg(OPT_fdebugger_objc_literal);
   Opts.ApplePragmaPack = Args.hasArg(OPT_fapple_pragma_pack);
-  Opts.CurrentModule = Args.getLastArgValue(OPT_fmodule_name);
+  Opts.CurrentModule = Args.getLastArgValue(OPT_fmodule_name_EQ);
   Opts.AppExt = Args.hasArg(OPT_fapplication_extension);
-  Opts.ImplementationOfModule =
-      Args.getLastArgValue(OPT_fmodule_implementation_of);
   Opts.ModuleFeatures = Args.getAllArgValues(OPT_fmodule_feature);
   std::sort(Opts.ModuleFeatures.begin(), Opts.ModuleFeatures.end());
   Opts.NativeHalfType |= Args.hasArg(OPT_fnative_half_type);
-  Opts.HalfArgsAndReturns = Args.hasArg(OPT_fallow_half_arguments_and_returns);
+  Opts.NativeHalfArgsAndReturns |= Args.hasArg(OPT_fnative_half_arguments_and_returns);
+  // Enable HalfArgsAndReturns if present in Args or if NativeHalfArgsAndReturns
+  // is enabled.
+  Opts.HalfArgsAndReturns = Args.hasArg(OPT_fallow_half_arguments_and_returns)
+                            | Opts.NativeHalfArgsAndReturns;
   Opts.GNUAsm = !Args.hasArg(OPT_fno_gnu_inline_asm);
 
   // __declspec is enabled by default for the PS4 by the driver, and also
@@ -1758,12 +1962,6 @@ static void ParseLangArgs(LangOptions &Opts, ArgList &Args, InputKind IK,
       Args.hasFlag(OPT_fdeclspec, OPT_fno_declspec,
                    (Opts.MicrosoftExt || Opts.Borland || Opts.CUDA));
 
-  if (!Opts.CurrentModule.empty() && !Opts.ImplementationOfModule.empty() &&
-      Opts.CurrentModule != Opts.ImplementationOfModule) {
-    Diags.Report(diag::err_conflicting_module_names)
-        << Opts.CurrentModule << Opts.ImplementationOfModule;
-  }
-
   // For now, we only support local submodule visibility in C++ (because we
   // heavily depend on the ODR for merging redefinitions).
   if (Opts.ModulesLocalVisibility && !Opts.CPlusPlus)
@@ -1810,15 +2008,79 @@ static void ParseLangArgs(LangOptions &Opts, ArgList &Args, InputKind IK,
     Opts.setMSPointerToMemberRepresentationMethod(InheritanceModel);
   }
 
+  // Check for MS default calling conventions being specified.
+  if (Arg *A = Args.getLastArg(OPT_fdefault_calling_conv_EQ)) {
+    LangOptions::DefaultCallingConvention DefaultCC =
+        llvm::StringSwitch<LangOptions::DefaultCallingConvention>(
+            A->getValue())
+            .Case("cdecl", LangOptions::DCC_CDecl)
+            .Case("fastcall", LangOptions::DCC_FastCall)
+            .Case("stdcall", LangOptions::DCC_StdCall)
+            .Case("vectorcall", LangOptions::DCC_VectorCall)
+            .Default(LangOptions::DCC_None);
+    if (DefaultCC == LangOptions::DCC_None)
+      Diags.Report(diag::err_drv_invalid_value)
+          << "-fdefault-calling-conv=" << A->getValue();
+
+    llvm::Triple T(TargetOpts.Triple);
+    llvm::Triple::ArchType Arch = T.getArch();
+    bool emitError = (DefaultCC == LangOptions::DCC_FastCall ||
+                  DefaultCC == LangOptions::DCC_StdCall) &&
+                 Arch != llvm::Triple::x86;
+    emitError |= DefaultCC == LangOptions::DCC_VectorCall &&
+                 !(Arch == llvm::Triple::x86 || Arch == llvm::Triple::x86_64);
+    if (emitError)
+      Diags.Report(diag::err_drv_argument_not_allowed_with)
+          << A->getSpelling() << T.getTriple();
+    else
+      Opts.setDefaultCallingConv(DefaultCC);
+  }
+
+  // -mrtd option
+  if (Arg *A = Args.getLastArg(OPT_mrtd)) {
+    if (Opts.getDefaultCallingConv() != LangOptions::DCC_None)
+      Diags.Report(diag::err_drv_argument_not_allowed_with)
+          << A->getSpelling() << "-fdefault-calling-conv";
+    else {
+      llvm::Triple T(TargetOpts.Triple);
+      if (T.getArch() != llvm::Triple::x86)
+        Diags.Report(diag::err_drv_argument_not_allowed_with)
+            << A->getSpelling() << T.getTriple();
+      else
+        Opts.setDefaultCallingConv(LangOptions::DCC_StdCall);
+    }
+  }
+
   // Check if -fopenmp is specified.
-  Opts.OpenMP = Args.hasArg(options::OPT_fopenmp);
+  Opts.OpenMP = Args.hasArg(options::OPT_fopenmp) ? 1 : 0;
   Opts.OpenMPUseTLS =
       Opts.OpenMP && !Args.hasArg(options::OPT_fnoopenmp_use_tls);
   Opts.OpenMPIsDevice =
       Opts.OpenMP && Args.hasArg(options::OPT_fopenmp_is_device);
 
+  if (Opts.OpenMP) {
+    int Version =
+        getLastArgIntValue(Args, OPT_fopenmp_version_EQ, Opts.OpenMP, Diags);
+    if (Version != 0)
+      Opts.OpenMP = Version;
+    // Provide diagnostic when a given target is not expected to be an OpenMP
+    // device or host.
+    if (!Opts.OpenMPIsDevice) {
+      switch (T.getArch()) {
+      default:
+        break;
+      // Add unsupported host targets here:
+      case llvm::Triple::nvptx:
+      case llvm::Triple::nvptx64:
+        Diags.Report(clang::diag::err_drv_omp_host_target_not_supported)
+            << TargetOpts.Triple;
+        break;
+      }
+    }
+  }
+
   // Get the OpenMP target triples if any.
-  if (Arg *A = Args.getLastArg(options::OPT_omptargets_EQ)) {
+  if (Arg *A = Args.getLastArg(options::OPT_fopenmp_targets_EQ)) {
 
     for (unsigned i = 0; i < A->getNumValues(); ++i) {
       llvm::Triple TT(A->getValue(i));
@@ -1832,7 +2094,7 @@ static void ParseLangArgs(LangOptions &Opts, ArgList &Args, InputKind IK,
 
   // Get OpenMP host file path if any and report if a non existent file is
   // found
-  if (Arg *A = Args.getLastArg(options::OPT_omp_host_ir_file_path)) {
+  if (Arg *A = Args.getLastArg(options::OPT_fopenmp_host_ir_file_path)) {
     Opts.OMPHostIRFile = A->getValue();
     if (!llvm::sys::fs::exists(Opts.OMPHostIRFile))
       Diags.Report(clang::diag::err_drv_omp_host_ir_file_not_found)
@@ -1939,10 +2201,6 @@ static void ParsePreprocessorArgs(PreprocessorOptions &Opts, ArgList &Args,
   for (const Arg *A : Args.filtered(OPT_chain_include))
     Opts.ChainedIncludes.emplace_back(A->getValue());
 
-  // Include 'altivec.h' if -faltivec option present
-  if (Args.hasArg(OPT_faltivec))
-    Opts.Includes.emplace_back("altivec.h");
-
   for (const Arg *A : Args.filtered(OPT_remap_file)) {
     std::pair<StringRef, StringRef> Split = StringRef(A->getValue()).split(';');
 
@@ -2020,9 +2278,24 @@ static void ParsePreprocessorOutputArgs(PreprocessorOutputOptions &Opts,
   Opts.UseLineDirectives = Args.hasArg(OPT_fuse_line_directives);
 }
 
-static void ParseTargetArgs(TargetOptions &Opts, ArgList &Args) {
+static void ParseTargetArgs(TargetOptions &Opts, ArgList &Args,
+                            DiagnosticsEngine &Diags) {
   using namespace options;
   Opts.ABI = Args.getLastArgValue(OPT_target_abi);
+  if (Arg *A = Args.getLastArg(OPT_meabi)) {
+    StringRef Value = A->getValue();
+    llvm::EABI EABIVersion = llvm::StringSwitch<llvm::EABI>(Value)
+                                 .Case("default", llvm::EABI::Default)
+                                 .Case("4", llvm::EABI::EABI4)
+                                 .Case("5", llvm::EABI::EABI5)
+                                 .Case("gnu", llvm::EABI::GNU)
+                                 .Default(llvm::EABI::Unknown);
+    if (EABIVersion == llvm::EABI::Unknown)
+      Diags.Report(diag::err_drv_invalid_value) << A->getAsString(Args)
+                                                << Value;
+    else
+      Opts.EABIVersion = Value;
+  }
   Opts.CPU = Args.getLastArgValue(OPT_target_cpu);
   Opts.FPMath = Args.getLastArgValue(OPT_mfpmath);
   Opts.FeaturesAsWritten = Args.getAllArgValues(OPT_target_feature);
@@ -2047,6 +2320,7 @@ bool CompilerInvocation::CreateFromArgs(CompilerInvocation &Res,
   InputArgList Args =
       Opts->ParseArgs(llvm::makeArrayRef(ArgBegin, ArgEnd), MissingArgIndex,
                       MissingArgCount, IncludedFlagsBitmask);
+  LangOptions &LangOpts = *Res.getLangOpts();
 
   // Check for missing argument error.
   if (MissingArgCount) {
@@ -2064,12 +2338,13 @@ bool CompilerInvocation::CreateFromArgs(CompilerInvocation &Res,
   Success &= ParseAnalyzerArgs(*Res.getAnalyzerOpts(), Args, Diags);
   Success &= ParseMigratorArgs(Res.getMigratorOpts(), Args);
   ParseDependencyOutputArgs(Res.getDependencyOutputOpts(), Args);
-  Success &= ParseDiagnosticArgs(Res.getDiagnosticOpts(), Args, &Diags);
-  ParseCommentArgs(Res.getLangOpts()->CommentOpts, Args);
+  Success &= ParseDiagnosticArgs(Res.getDiagnosticOpts(), Args, &Diags,
+                                 false /*DefaultDiagColor*/);
+  ParseCommentArgs(LangOpts.CommentOpts, Args);
   ParseFileSystemArgs(Res.getFileSystemOpts(), Args);
   // FIXME: We shouldn't have to pass the DashX option around here
   InputKind DashX = ParseFrontendArgs(Res.getFrontendOpts(), Args, Diags);
-  ParseTargetArgs(Res.getTargetOpts(), Args);
+  ParseTargetArgs(Res.getTargetOpts(), Args, Diags);
   Success &= ParseCodeGenArgs(Res.getCodeGenOpts(), Args, DashX, Diags,
                               Res.getTargetOpts());
   ParseHeaderSearchArgs(Res.getHeaderSearchOpts(), Args);
@@ -2078,15 +2353,39 @@ bool CompilerInvocation::CreateFromArgs(CompilerInvocation &Res,
     // PassManager in BackendUtil.cpp. They need to be initializd no matter
     // what the input type is.
     if (Args.hasArg(OPT_fobjc_arc))
-      Res.getLangOpts()->ObjCAutoRefCount = 1;
+      LangOpts.ObjCAutoRefCount = 1;
+    // PIClevel and PIELevel are needed during code generation and this should be
+    // set regardless of the input type.
+    LangOpts.PICLevel = getLastArgIntValue(Args, OPT_pic_level, 0, Diags);
+    LangOpts.PIE = Args.hasArg(OPT_pic_is_pie);
     parseSanitizerKinds("-fsanitize=", Args.getAllArgValues(OPT_fsanitize_EQ),
-                        Diags, Res.getLangOpts()->Sanitize);
+                        Diags, LangOpts.Sanitize);
   } else {
     // Other LangOpts are only initialzed when the input is not AST or LLVM IR.
-    ParseLangArgs(*Res.getLangOpts(), Args, DashX, Diags);
+    ParseLangArgs(LangOpts, Args, DashX, Res.getTargetOpts(),
+      Res.getPreprocessorOpts(), Diags);
     if (Res.getFrontendOpts().ProgramAction == frontend::RewriteObjC)
-      Res.getLangOpts()->ObjCExceptions = 1;
+      LangOpts.ObjCExceptions = 1;
+  }
+
+  if (LangOpts.CUDA) {
+    // During CUDA device-side compilation, the aux triple is the
+    // triple used for host compilation.
+    if (LangOpts.CUDAIsDevice)
+      Res.getTargetOpts().HostTriple = Res.getFrontendOpts().AuxTriple;
+
+    // Set default FP_CONTRACT to FAST.
+    if (!Args.hasArg(OPT_ffp_contract))
+      Res.getCodeGenOpts().setFPContractMode(CodeGenOptions::FPC_Fast);
   }
+
+  // FIXME: Override value name discarding when asan or msan is used because the
+  // backend passes depend on the name of the alloca in order to print out
+  // names.
+  Res.getCodeGenOpts().DiscardValueNames &=
+      !LangOpts.Sanitize.has(SanitizerKind::Address) &&
+      !LangOpts.Sanitize.has(SanitizerKind::Memory);
+
   // FIXME: ParsePreprocessorArgs uses the FileManager to read the contents of
   // PCH file and find the original header name. Remove the need to do that in
   // ParsePreprocessorArgs and remove the FileManager
@@ -2098,59 +2397,6 @@ bool CompilerInvocation::CreateFromArgs(CompilerInvocation &Res,
   return Success;
 }
 
-namespace {
-
-  class ModuleSignature {
-    SmallVector<uint64_t, 16> Data;
-    unsigned CurBit;
-    uint64_t CurValue;
-
-  public:
-    ModuleSignature() : CurBit(0), CurValue(0) { }
-
-    void add(uint64_t Value, unsigned Bits);
-    void add(StringRef Value);
-    void flush();
-
-    llvm::APInt getAsInteger() const;
-  };
-}
-
-void ModuleSignature::add(uint64_t Value, unsigned int NumBits) {
-  CurValue |= Value << CurBit;
-  if (CurBit + NumBits < 64) {
-    CurBit += NumBits;
-    return;
-  }
-
-  // Add the current word.
-  Data.push_back(CurValue);
-
-  if (CurBit)
-    CurValue = Value >> (64-CurBit);
-  else
-    CurValue = 0;
-  CurBit = (CurBit+NumBits) & 63;
-}
-
-void ModuleSignature::flush() {
-  if (CurBit == 0)
-    return;
-
-  Data.push_back(CurValue);
-  CurBit = 0;
-  CurValue = 0;
-}
-
-void ModuleSignature::add(StringRef Value) {
-  for (auto &c : Value)
-    add(c, 8);
-}
-
-llvm::APInt ModuleSignature::getAsInteger() const {
-  return llvm::APInt(Data.size() * 64, Data);
-}
-
 std::string CompilerInvocation::getModuleHash() const {
   // Note: For QoI reasons, the things we use as a hash here should all be
   // dumped via the -module-info flag.
@@ -2217,7 +2463,7 @@ std::string CompilerInvocation::getModuleHash() const {
 
   // Extend the signature with the module file extensions.
   const FrontendOptions &frontendOpts = getFrontendOpts();
-  for (auto ext : frontendOpts.ModuleFileExtensions) {
+  for (const auto &ext : frontendOpts.ModuleFileExtensions) {
     code = ext->hashExtension(code);
   }
 
@@ -2309,8 +2555,8 @@ createVFSFromCompilerInvocation(const CompilerInvocation &CI,
       return IntrusiveRefCntPtr<vfs::FileSystem>();
     }
 
-    IntrusiveRefCntPtr<vfs::FileSystem> FS =
-        vfs::getVFSFromYAML(std::move(Buffer.get()), /*DiagHandler*/ nullptr);
+    IntrusiveRefCntPtr<vfs::FileSystem> FS = vfs::getVFSFromYAML(
+        std::move(Buffer.get()), /*DiagHandler*/ nullptr, File);
     if (!FS.get()) {
       Diags.Report(diag::err_invalid_vfs_overlay) << File;
       return IntrusiveRefCntPtr<vfs::FileSystem>();
diff --git a/contrib/llvm/tools/clang/lib/Frontend/CreateInvocationFromCommandLine.cpp b/contrib/llvm/tools/clang/lib/Frontend/CreateInvocationFromCommandLine.cpp
index 301916422564..1e9e57afb6bd 100644
--- a/contrib/llvm/tools/clang/lib/Frontend/CreateInvocationFromCommandLine.cpp
+++ b/contrib/llvm/tools/clang/lib/Frontend/CreateInvocationFromCommandLine.cpp
@@ -60,25 +60,25 @@ clang::createInvocationFromCommandLine(ArrayRef<const char *> ArgList,
   }
 
   // We expect to get back exactly one command job, if we didn't something
-  // failed. CUDA compilation is an exception as it creates multiple jobs. If
-  // that's the case, we proceed with the first job. If caller needs particular
-  // CUDA job, it should be controlled via --cuda-{host|device}-only option
-  // passed to the driver.
+  // failed. Offload compilation is an exception as it creates multiple jobs. If
+  // that's the case, we proceed with the first job. If caller needs a
+  // particular job, it should be controlled via options (e.g.
+  // --cuda-{host|device}-only for CUDA) passed to the driver.
   const driver::JobList &Jobs = C->getJobs();
-  bool CudaCompilation = false;
+  bool OffloadCompilation = false;
   if (Jobs.size() > 1) {
     for (auto &A : C->getActions()){
       // On MacOSX real actions may end up being wrapped in BindArchAction
       if (isa<driver::BindArchAction>(A))
-        A = *A->begin();
-      if (isa<driver::CudaDeviceAction>(A)) {
-        CudaCompilation = true;
+        A = *A->input_begin();
+      if (isa<driver::OffloadAction>(A)) {
+        OffloadCompilation = true;
         break;
       }
     }
   }
   if (Jobs.size() == 0 || !isa<driver::Command>(*Jobs.begin()) ||
-      (Jobs.size() > 1 && !CudaCompilation)) {
+      (Jobs.size() > 1 && !OffloadCompilation)) {
     SmallString<256> Msg;
     llvm::raw_svector_ostream OS(Msg);
     Jobs.Print(OS, "; ", true);
diff --git a/contrib/llvm/tools/clang/lib/Frontend/DependencyFile.cpp b/contrib/llvm/tools/clang/lib/Frontend/DependencyFile.cpp
index 93d4a8034696..a9b61282378d 100644
--- a/contrib/llvm/tools/clang/lib/Frontend/DependencyFile.cpp
+++ b/contrib/llvm/tools/clang/lib/Frontend/DependencyFile.cpp
@@ -177,7 +177,7 @@ public:
       SeenMissingHeader(false),
       IncludeModuleFiles(Opts.IncludeModuleFiles),
       OutputFormat(Opts.OutputFormat) {
-    for (auto ExtraDep : Opts.ExtraDeps) {
+    for (const auto &ExtraDep : Opts.ExtraDeps) {
       AddFilename(ExtraDep);
     }
   }
diff --git a/contrib/llvm/tools/clang/lib/Frontend/DiagnosticRenderer.cpp b/contrib/llvm/tools/clang/lib/Frontend/DiagnosticRenderer.cpp
index caf1f0dce99f..586d2e6167b3 100644
--- a/contrib/llvm/tools/clang/lib/Frontend/DiagnosticRenderer.cpp
+++ b/contrib/llvm/tools/clang/lib/Frontend/DiagnosticRenderer.cpp
@@ -23,48 +23,6 @@
 #include <algorithm>
 using namespace clang;
 
-/// \brief Retrieve the name of the immediate macro expansion.
-///
-/// This routine starts from a source location, and finds the name of the macro
-/// responsible for its immediate expansion. It looks through any intervening
-/// macro argument expansions to compute this. It returns a StringRef which
-/// refers to the SourceManager-owned buffer of the source where that macro
-/// name is spelled. Thus, the result shouldn't out-live that SourceManager.
-///
-/// This differs from Lexer::getImmediateMacroName in that any macro argument
-/// location will result in the topmost function macro that accepted it.
-/// e.g.
-/// \code
-///   MAC1( MAC2(foo) )
-/// \endcode
-/// for location of 'foo' token, this function will return "MAC1" while
-/// Lexer::getImmediateMacroName will return "MAC2".
-static StringRef getImmediateMacroName(SourceLocation Loc,
-                                       const SourceManager &SM,
-                                       const LangOptions &LangOpts) {
-   assert(Loc.isMacroID() && "Only reasonble to call this on macros");
-   // Walk past macro argument expanions.
-   while (SM.isMacroArgExpansion(Loc))
-     Loc = SM.getImmediateExpansionRange(Loc).first;
-
-   // If the macro's spelling has no FileID, then it's actually a token paste
-   // or stringization (or similar) and not a macro at all.
-   if (!SM.getFileEntryForID(SM.getFileID(SM.getSpellingLoc(Loc))))
-     return StringRef();
-
-   // Find the spelling location of the start of the non-argument expansion
-   // range. This is where the macro name was spelled in order to begin
-   // expanding this macro.
-   Loc = SM.getSpellingLoc(SM.getImmediateExpansionRange(Loc).first);
-
-   // Dig out the buffer where the macro name was spelled and the extents of the
-   // name so that we can render it into the expansion note.
-   std::pair<FileID, unsigned> ExpansionInfo = SM.getDecomposedLoc(Loc);
-   unsigned MacroTokenLength = Lexer::MeasureTokenLength(Loc, SM, LangOpts);
-   StringRef ExpansionBuffer = SM.getBufferData(ExpansionInfo.first);
-   return ExpansionBuffer.substr(ExpansionInfo.second, MacroTokenLength);
-}
-
 DiagnosticRenderer::DiagnosticRenderer(const LangOptions &LangOpts,
                                        DiagnosticOptions *DiagOpts)
   : LangOpts(LangOpts), DiagOpts(DiagOpts), LastLevel() {}
@@ -209,7 +167,8 @@ void DiagnosticRenderer::emitIncludeStack(SourceLocation Loc,
                                           PresumedLoc PLoc,
                                           DiagnosticsEngine::Level Level,
                                           const SourceManager &SM) {
-  SourceLocation IncludeLoc = PLoc.getIncludeLoc();
+  SourceLocation IncludeLoc =
+      PLoc.isInvalid() ? SourceLocation() : PLoc.getIncludeLoc();
 
   // Skip redundant include stacks altogether.
   if (LastIncludeLoc == IncludeLoc)
@@ -474,7 +433,8 @@ void DiagnosticRenderer::emitSingleMacroExpansion(
 
   SmallString<100> MessageStorage;
   llvm::raw_svector_ostream Message(MessageStorage);
-  StringRef MacroName = getImmediateMacroName(Loc, SM, LangOpts);
+  StringRef MacroName =
+      Lexer::getImmediateMacroNameForDiagnostics(Loc, SM, LangOpts);
   if (MacroName.empty())
     Message << "expanded from here";
   else
@@ -658,7 +618,7 @@ DiagnosticNoteRenderer::emitBuildingModuleLocation(SourceLocation Loc,
   // Generate a note indicating the include location.
   SmallString<200> MessageStorage;
   llvm::raw_svector_ostream Message(MessageStorage);
-  if (PLoc.getFilename())
+  if (PLoc.isValid())
     Message << "while building module '" << ModuleName << "' imported from "
             << PLoc.getFilename() << ':' << PLoc.getLine() << ":";
   else
diff --git a/contrib/llvm/tools/clang/lib/Frontend/FrontendAction.cpp b/contrib/llvm/tools/clang/lib/Frontend/FrontendAction.cpp
index ecef92e0a7dd..d514d406d8b6 100644
--- a/contrib/llvm/tools/clang/lib/Frontend/FrontendAction.cpp
+++ b/contrib/llvm/tools/clang/lib/Frontend/FrontendAction.cpp
@@ -141,28 +141,46 @@ FrontendAction::CreateWrappedASTConsumer(CompilerInstance &CI,
   if (!Consumer)
     return nullptr;
 
-  if (CI.getFrontendOpts().AddPluginActions.size() == 0)
+  // If there are no registered plugins we don't need to wrap the consumer
+  if (FrontendPluginRegistry::begin() == FrontendPluginRegistry::end())
     return Consumer;
 
-  // Make sure the non-plugin consumer is first, so that plugins can't
-  // modifiy the AST.
+  // Collect the list of plugins that go before the main action (in Consumers)
+  // or after it (in AfterConsumers)
   std::vector<std::unique_ptr<ASTConsumer>> Consumers;
-  Consumers.push_back(std::move(Consumer));
-
-  for (size_t i = 0, e = CI.getFrontendOpts().AddPluginActions.size();
-       i != e; ++i) { 
-    // This is O(|plugins| * |add_plugins|), but since both numbers are
-    // way below 50 in practice, that's ok.
-    for (FrontendPluginRegistry::iterator
-        it = FrontendPluginRegistry::begin(),
-        ie = FrontendPluginRegistry::end();
-        it != ie; ++it) {
-      if (it->getName() != CI.getFrontendOpts().AddPluginActions[i])
-        continue;
-      std::unique_ptr<PluginASTAction> P = it->instantiate();
-      if (P->ParseArgs(CI, CI.getFrontendOpts().AddPluginArgs[i]))
-        Consumers.push_back(P->CreateASTConsumer(CI, InFile));
+  std::vector<std::unique_ptr<ASTConsumer>> AfterConsumers;
+  for (FrontendPluginRegistry::iterator it = FrontendPluginRegistry::begin(),
+                                        ie = FrontendPluginRegistry::end();
+       it != ie; ++it) {
+    std::unique_ptr<PluginASTAction> P = it->instantiate();
+    PluginASTAction::ActionType ActionType = P->getActionType();
+    if (ActionType == PluginASTAction::Cmdline) {
+      // This is O(|plugins| * |add_plugins|), but since both numbers are
+      // way below 50 in practice, that's ok.
+      for (size_t i = 0, e = CI.getFrontendOpts().AddPluginActions.size();
+           i != e; ++i) {
+        if (it->getName() == CI.getFrontendOpts().AddPluginActions[i]) {
+          ActionType = PluginASTAction::AddAfterMainAction;
+          break;
+        }
+      }
     }
+    if ((ActionType == PluginASTAction::AddBeforeMainAction ||
+         ActionType == PluginASTAction::AddAfterMainAction) &&
+        P->ParseArgs(CI, CI.getFrontendOpts().PluginArgs[it->getName()])) {
+      std::unique_ptr<ASTConsumer> PluginConsumer = P->CreateASTConsumer(CI, InFile);
+      if (ActionType == PluginASTAction::AddBeforeMainAction) {
+        Consumers.push_back(std::move(PluginConsumer));
+      } else {
+        AfterConsumers.push_back(std::move(PluginConsumer));
+      }
+    }
+  }
+
+  // Add to Consumers the main consumer, then all the plugins that go after it
+  Consumers.push_back(std::move(Consumer));
+  for (auto &C : AfterConsumers) {
+    Consumers.push_back(std::move(C));
   }
 
   return llvm::make_unique<MultiplexConsumer>(std::move(Consumers));
@@ -559,7 +577,10 @@ bool WrapperFrontendAction::BeginSourceFileAction(CompilerInstance &CI,
                                                   StringRef Filename) {
   WrappedAction->setCurrentInput(getCurrentInput());
   WrappedAction->setCompilerInstance(&CI);
-  return WrappedAction->BeginSourceFileAction(CI, Filename);
+  auto Ret = WrappedAction->BeginSourceFileAction(CI, Filename);
+  // BeginSourceFileAction may change CurrentInput, e.g. during module builds.
+  setCurrentInput(WrappedAction->getCurrentInput());
+  return Ret;
 }
 void WrapperFrontendAction::ExecuteAction() {
   WrappedAction->ExecuteAction();
@@ -587,6 +608,7 @@ bool WrapperFrontendAction::hasCodeCompletionSupport() const {
   return WrappedAction->hasCodeCompletionSupport();
 }
 
-WrapperFrontendAction::WrapperFrontendAction(FrontendAction *WrappedAction)
-  : WrappedAction(WrappedAction) {}
+WrapperFrontendAction::WrapperFrontendAction(
+    std::unique_ptr<FrontendAction> WrappedAction)
+  : WrappedAction(std::move(WrappedAction)) {}
 
diff --git a/contrib/llvm/tools/clang/lib/Frontend/FrontendActions.cpp b/contrib/llvm/tools/clang/lib/Frontend/FrontendActions.cpp
index 407ccea2e7d1..b1e806add8cc 100644
--- a/contrib/llvm/tools/clang/lib/Frontend/FrontendActions.cpp
+++ b/contrib/llvm/tools/clang/lib/Frontend/FrontendActions.cpp
@@ -48,8 +48,9 @@ void InitOnlyAction::ExecuteAction() {
 
 std::unique_ptr<ASTConsumer>
 ASTPrintAction::CreateASTConsumer(CompilerInstance &CI, StringRef InFile) {
-  if (raw_ostream *OS = CI.createDefaultOutputFile(false, InFile))
-    return CreateASTPrinter(OS, CI.getFrontendOpts().ASTDumpFilter);
+  if (std::unique_ptr<raw_ostream> OS =
+          CI.createDefaultOutputFile(false, InFile))
+    return CreateASTPrinter(std::move(OS), CI.getFrontendOpts().ASTDumpFilter);
   return nullptr;
 }
 
@@ -80,7 +81,7 @@ std::unique_ptr<ASTConsumer>
 GeneratePCHAction::CreateASTConsumer(CompilerInstance &CI, StringRef InFile) {
   std::string Sysroot;
   std::string OutputFile;
-  raw_pwrite_stream *OS =
+  std::unique_ptr<raw_pwrite_stream> OS =
       ComputeASTConsumerArguments(CI, InFile, Sysroot, OutputFile);
   if (!OS)
     return nullptr;
@@ -92,16 +93,21 @@ GeneratePCHAction::CreateASTConsumer(CompilerInstance &CI, StringRef InFile) {
   std::vector<std::unique_ptr<ASTConsumer>> Consumers;
   Consumers.push_back(llvm::make_unique<PCHGenerator>(
                         CI.getPreprocessor(), OutputFile, nullptr, Sysroot,
-                        Buffer, CI.getFrontendOpts().ModuleFileExtensions));
+                        Buffer, CI.getFrontendOpts().ModuleFileExtensions,
+                        /*AllowASTWithErrors*/false,
+                        /*IncludeTimestamps*/
+                          +CI.getFrontendOpts().IncludeTimestamps));
   Consumers.push_back(CI.getPCHContainerWriter().CreatePCHContainerGenerator(
-      CI, InFile, OutputFile, OS, Buffer));
+      CI, InFile, OutputFile, std::move(OS), Buffer));
 
   return llvm::make_unique<MultiplexConsumer>(std::move(Consumers));
 }
 
-raw_pwrite_stream *GeneratePCHAction::ComputeASTConsumerArguments(
-    CompilerInstance &CI, StringRef InFile, std::string &Sysroot,
-    std::string &OutputFile) {
+std::unique_ptr<raw_pwrite_stream>
+GeneratePCHAction::ComputeASTConsumerArguments(CompilerInstance &CI,
+                                               StringRef InFile,
+                                               std::string &Sysroot,
+                                               std::string &OutputFile) {
   Sysroot = CI.getHeaderSearchOpts().Sysroot;
   if (CI.getFrontendOpts().RelocatablePCH && Sysroot.empty()) {
     CI.getDiagnostics().Report(diag::err_relocatable_without_isysroot);
@@ -111,7 +117,7 @@ raw_pwrite_stream *GeneratePCHAction::ComputeASTConsumerArguments(
   // We use createOutputFile here because this is exposed via libclang, and we
   // must disable the RemoveFileOnSignal behavior.
   // We use a temporary to avoid race conditions.
-  raw_pwrite_stream *OS =
+  std::unique_ptr<raw_pwrite_stream> OS =
       CI.createOutputFile(CI.getFrontendOpts().OutputFile, /*Binary=*/true,
                           /*RemoveFileOnSignal=*/false, InFile,
                           /*Extension=*/"", /*useTemporary=*/true);
@@ -127,7 +133,7 @@ GenerateModuleAction::CreateASTConsumer(CompilerInstance &CI,
                                         StringRef InFile) {
   std::string Sysroot;
   std::string OutputFile;
-  raw_pwrite_stream *OS =
+  std::unique_ptr<raw_pwrite_stream> OS =
       ComputeASTConsumerArguments(CI, InFile, Sysroot, OutputFile);
   if (!OS)
     return nullptr;
@@ -142,7 +148,7 @@ GenerateModuleAction::CreateASTConsumer(CompilerInstance &CI,
                         /*IncludeTimestamps=*/
                           +CI.getFrontendOpts().BuildingImplicitModule));
   Consumers.push_back(CI.getPCHContainerWriter().CreatePCHContainerGenerator(
-      CI, InFile, OutputFile, OS, Buffer));
+      CI, InFile, OutputFile, std::move(OS), Buffer));
   return llvm::make_unique<MultiplexConsumer>(std::move(Consumers));
 }
 
@@ -152,10 +158,10 @@ operator+=(SmallVectorImpl<char> &Includes, StringRef RHS) {
   return Includes;
 }
 
-static std::error_code addHeaderInclude(StringRef HeaderName,
-                                        SmallVectorImpl<char> &Includes,
-                                        const LangOptions &LangOpts,
-                                        bool IsExternC) {
+static void addHeaderInclude(StringRef HeaderName,
+                             SmallVectorImpl<char> &Includes,
+                             const LangOptions &LangOpts,
+                             bool IsExternC) {
   if (IsExternC && LangOpts.CPlusPlus)
     Includes += "extern \"C\" {\n";
   if (LangOpts.ObjC1)
@@ -168,7 +174,6 @@ static std::error_code addHeaderInclude(StringRef HeaderName,
   Includes += "\"\n";
   if (IsExternC && LangOpts.CPlusPlus)
     Includes += "}\n";
-  return std::error_code();
 }
 
 /// \brief Collect the set of header includes needed to construct the given 
@@ -194,38 +199,34 @@ collectModuleHeaderIncludes(const LangOptions &LangOpts, FileManager &FileMgr,
       // file relative to the module build directory (the directory containing
       // the module map file) so this will find the same file that we found
       // while parsing the module map.
-      if (std::error_code Err = addHeaderInclude(H.NameAsWritten, Includes,
-                                                 LangOpts, Module->IsExternC))
-        return Err;
+      addHeaderInclude(H.NameAsWritten, Includes, LangOpts, Module->IsExternC);
     }
   }
   // Note that Module->PrivateHeaders will not be a TopHeader.
 
   if (Module::Header UmbrellaHeader = Module->getUmbrellaHeader()) {
     Module->addTopHeader(UmbrellaHeader.Entry);
-    if (Module->Parent) {
+    if (Module->Parent)
       // Include the umbrella header for submodules.
-      if (std::error_code Err = addHeaderInclude(UmbrellaHeader.NameAsWritten,
-                                                 Includes, LangOpts,
-                                                 Module->IsExternC))
-        return Err;
-    }
+      addHeaderInclude(UmbrellaHeader.NameAsWritten, Includes, LangOpts,
+                       Module->IsExternC);
   } else if (Module::DirectoryName UmbrellaDir = Module->getUmbrellaDir()) {
     // Add all of the headers we find in this subdirectory.
     std::error_code EC;
     SmallString<128> DirNative;
     llvm::sys::path::native(UmbrellaDir.Entry->getName(), DirNative);
-    for (llvm::sys::fs::recursive_directory_iterator Dir(DirNative, EC), 
-                                                     DirEnd;
-         Dir != DirEnd && !EC; Dir.increment(EC)) {
+
+    vfs::FileSystem &FS = *FileMgr.getVirtualFileSystem();
+    for (vfs::recursive_directory_iterator Dir(FS, DirNative, EC), End;
+         Dir != End && !EC; Dir.increment(EC)) {
       // Check whether this entry has an extension typically associated with 
       // headers.
-      if (!llvm::StringSwitch<bool>(llvm::sys::path::extension(Dir->path()))
+      if (!llvm::StringSwitch<bool>(llvm::sys::path::extension(Dir->getName()))
           .Cases(".h", ".H", ".hh", ".hpp", true)
           .Default(false))
         continue;
 
-      const FileEntry *Header = FileMgr.getFile(Dir->path());
+      const FileEntry *Header = FileMgr.getFile(Dir->getName());
       // FIXME: This shouldn't happen unless there is a file system race. Is
       // that worth diagnosing?
       if (!Header)
@@ -238,7 +239,7 @@ collectModuleHeaderIncludes(const LangOptions &LangOpts, FileManager &FileMgr,
 
       // Compute the relative path from the directory to this file.
       SmallVector<StringRef, 16> Components;
-      auto PathIt = llvm::sys::path::rbegin(Dir->path());
+      auto PathIt = llvm::sys::path::rbegin(Dir->getName());
       for (int I = 0; I != Dir.level() + 1; ++I, ++PathIt)
         Components.push_back(*PathIt);
       SmallString<128> RelativeHeader(UmbrellaDir.NameAsWritten);
@@ -248,9 +249,7 @@ collectModuleHeaderIncludes(const LangOptions &LangOpts, FileManager &FileMgr,
 
       // Include this header as part of the umbrella directory.
       Module->addTopHeader(Header);
-      if (std::error_code Err = addHeaderInclude(RelativeHeader, Includes,
-                                                 LangOpts, Module->IsExternC))
-        return Err;
+      addHeaderInclude(RelativeHeader, Includes, LangOpts, Module->IsExternC);
     }
 
     if (EC)
@@ -270,6 +269,8 @@ collectModuleHeaderIncludes(const LangOptions &LangOpts, FileManager &FileMgr,
 
 bool GenerateModuleAction::BeginSourceFileAction(CompilerInstance &CI, 
                                                  StringRef Filename) {
+  CI.getLangOpts().CompilingModule = true;
+
   // Find the module map file.
   const FileEntry *ModuleMap =
       CI.getFileManager().getFile(Filename, /*openFile*/true);
@@ -354,10 +355,9 @@ bool GenerateModuleAction::BeginSourceFileAction(CompilerInstance &CI,
   SmallString<256> HeaderContents;
   std::error_code Err = std::error_code();
   if (Module::Header UmbrellaHeader = Module->getUmbrellaHeader())
-    Err = addHeaderInclude(UmbrellaHeader.NameAsWritten, HeaderContents,
-                           CI.getLangOpts(), Module->IsExternC);
-  if (!Err)
-    Err = collectModuleHeaderIncludes(
+    addHeaderInclude(UmbrellaHeader.NameAsWritten, HeaderContents,
+                     CI.getLangOpts(), Module->IsExternC);
+  Err = collectModuleHeaderIncludes(
         CI.getLangOpts(), FileMgr,
         CI.getPreprocessor().getHeaderSearchInfo().getModuleMap(), Module,
         HeaderContents);
@@ -381,9 +381,11 @@ bool GenerateModuleAction::BeginSourceFileAction(CompilerInstance &CI,
   return true;
 }
 
-raw_pwrite_stream *GenerateModuleAction::ComputeASTConsumerArguments(
-    CompilerInstance &CI, StringRef InFile, std::string &Sysroot,
-    std::string &OutputFile) {
+std::unique_ptr<raw_pwrite_stream>
+GenerateModuleAction::ComputeASTConsumerArguments(CompilerInstance &CI,
+                                                  StringRef InFile,
+                                                  std::string &Sysroot,
+                                                  std::string &OutputFile) {
   // If no output file was provided, figure out where this module would go
   // in the module cache.
   if (CI.getFrontendOpts().OutputFile.empty()) {
@@ -396,7 +398,7 @@ raw_pwrite_stream *GenerateModuleAction::ComputeASTConsumerArguments(
   // We use createOutputFile here because this is exposed via libclang, and we
   // must disable the RemoveFileOnSignal behavior.
   // We use a temporary to avoid race conditions.
-  raw_pwrite_stream *OS =
+  std::unique_ptr<raw_pwrite_stream> OS =
       CI.createOutputFile(CI.getFrontendOpts().OutputFile, /*Binary=*/true,
                           /*RemoveFileOnSignal=*/false, InFile,
                           /*Extension=*/"", /*useTemporary=*/true,
@@ -408,6 +410,9 @@ raw_pwrite_stream *GenerateModuleAction::ComputeASTConsumerArguments(
   return OS;
 }
 
+SyntaxOnlyAction::~SyntaxOnlyAction() {
+}
+
 std::unique_ptr<ASTConsumer>
 SyntaxOnlyAction::CreateASTConsumer(CompilerInstance &CI, StringRef InFile) {
   return llvm::make_unique<ASTConsumer>();
@@ -647,11 +652,12 @@ void DumpTokensAction::ExecuteAction() {
 
 void GeneratePTHAction::ExecuteAction() {
   CompilerInstance &CI = getCompilerInstance();
-  raw_pwrite_stream *OS = CI.createDefaultOutputFile(true, getCurrentFile());
+  std::unique_ptr<raw_pwrite_stream> OS =
+      CI.createDefaultOutputFile(true, getCurrentFile());
   if (!OS)
     return;
 
-  CacheTokens(CI.getPreprocessor(), OS);
+  CacheTokens(CI.getPreprocessor(), OS.get());
 }
 
 void PreprocessOnlyAction::ExecuteAction() {
@@ -707,14 +713,16 @@ void PrintPreprocessedAction::ExecuteAction() {
       } else if (*cur == 0x0A)  // LF
         break;
 
-      ++cur, ++next;
+      ++cur;
+      ++next;
     }
   }
 
-  raw_ostream *OS = CI.createDefaultOutputFile(BinaryMode, getCurrentFile());
+  std::unique_ptr<raw_ostream> OS =
+      CI.createDefaultOutputFile(BinaryMode, getCurrentFile());
   if (!OS) return;
 
-  DoPrintPreprocessedInput(CI.getPreprocessor(), OS,
+  DoPrintPreprocessedInput(CI.getPreprocessor(), OS.get(),
                            CI.getPreprocessorOutputOpts());
 }
 
@@ -737,6 +745,7 @@ void PrintPreambleAction::ExecuteAction() {
   case IK_PreprocessedObjCXX:
   case IK_AST:
   case IK_LLVM_IR:
+  case IK_RenderScript:
     // We can't do anything with these.
     return;
   }
diff --git a/contrib/llvm/tools/clang/lib/Frontend/HeaderIncludeGen.cpp b/contrib/llvm/tools/clang/lib/Frontend/HeaderIncludeGen.cpp
index 0bc1169ba0a9..5bff4ecd0b46 100644
--- a/contrib/llvm/tools/clang/lib/Frontend/HeaderIncludeGen.cpp
+++ b/contrib/llvm/tools/clang/lib/Frontend/HeaderIncludeGen.cpp
@@ -7,6 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "clang/Frontend/DependencyOutputOptions.h"
 #include "clang/Frontend/Utils.h"
 #include "clang/Basic/SourceManager.h"
 #include "clang/Frontend/FrontendDiagnostic.h"
@@ -19,6 +20,7 @@ namespace {
 class HeaderIncludesCallback : public PPCallbacks {
   SourceManager &SM;
   raw_ostream *OutputFile;
+  const DependencyOutputOptions &DepOpts;
   unsigned CurrentIncludeDepth;
   bool HasProcessedPredefines;
   bool OwnsOutputFile;
@@ -28,12 +30,13 @@ class HeaderIncludesCallback : public PPCallbacks {
 
 public:
   HeaderIncludesCallback(const Preprocessor *PP, bool ShowAllHeaders_,
-                         raw_ostream *OutputFile_, bool OwnsOutputFile_,
-                         bool ShowDepth_, bool MSStyle_)
-    : SM(PP->getSourceManager()), OutputFile(OutputFile_),
-      CurrentIncludeDepth(0), HasProcessedPredefines(false),
-      OwnsOutputFile(OwnsOutputFile_), ShowAllHeaders(ShowAllHeaders_),
-      ShowDepth(ShowDepth_), MSStyle(MSStyle_) {}
+                         raw_ostream *OutputFile_,
+                         const DependencyOutputOptions &DepOpts,
+                         bool OwnsOutputFile_, bool ShowDepth_, bool MSStyle_)
+      : SM(PP->getSourceManager()), OutputFile(OutputFile_), DepOpts(DepOpts),
+        CurrentIncludeDepth(0), HasProcessedPredefines(false),
+        OwnsOutputFile(OwnsOutputFile_), ShowAllHeaders(ShowAllHeaders_),
+        ShowDepth(ShowDepth_), MSStyle(MSStyle_) {}
 
   ~HeaderIncludesCallback() override {
     if (OwnsOutputFile)
@@ -46,38 +49,37 @@ public:
 };
 }
 
-static void PrintHeaderInfo(raw_ostream *OutputFile, const char* Filename,
+static void PrintHeaderInfo(raw_ostream *OutputFile, StringRef Filename,
                             bool ShowDepth, unsigned CurrentIncludeDepth,
                             bool MSStyle) {
-    // Write to a temporary string to avoid unnecessary flushing on errs().
-    SmallString<512> Pathname(Filename);
-    if (!MSStyle)
-      Lexer::Stringify(Pathname);
+  // Write to a temporary string to avoid unnecessary flushing on errs().
+  SmallString<512> Pathname(Filename);
+  if (!MSStyle)
+    Lexer::Stringify(Pathname);
 
-    SmallString<256> Msg;
-    if (MSStyle)
-      Msg += "Note: including file:";
+  SmallString<256> Msg;
+  if (MSStyle)
+    Msg += "Note: including file:";
 
-    if (ShowDepth) {
-      // The main source file is at depth 1, so skip one dot.
-      for (unsigned i = 1; i != CurrentIncludeDepth; ++i)
-        Msg += MSStyle ? ' ' : '.';
+  if (ShowDepth) {
+    // The main source file is at depth 1, so skip one dot.
+    for (unsigned i = 1; i != CurrentIncludeDepth; ++i)
+      Msg += MSStyle ? ' ' : '.';
 
-      if (!MSStyle)
-        Msg += ' ';
-    }
-    Msg += Pathname;
-    Msg += '\n';
+    if (!MSStyle)
+      Msg += ' ';
+  }
+  Msg += Pathname;
+  Msg += '\n';
 
-    OutputFile->write(Msg.data(), Msg.size());
-    OutputFile->flush();
+  *OutputFile << Msg;
+  OutputFile->flush();
 }
 
 void clang::AttachHeaderIncludeGen(Preprocessor &PP,
-                                   const std::vector<std::string> &ExtraHeaders,
-                                   bool ShowAllHeaders,
-                                   StringRef OutputPath, bool ShowDepth,
-                                   bool MSStyle) {
+                                   const DependencyOutputOptions &DepOpts,
+                                   bool ShowAllHeaders, StringRef OutputPath,
+                                   bool ShowDepth, bool MSStyle) {
   raw_ostream *OutputFile = MSStyle ? &llvm::outs() : &llvm::errs();
   bool OwnsOutputFile = false;
 
@@ -97,20 +99,16 @@ void clang::AttachHeaderIncludeGen(Preprocessor &PP,
     }
   }
 
-  // Print header info for extra headers, pretending they were discovered
-  // by the regular preprocessor. The primary use case is to support
-  // proper generation of Make / Ninja file dependencies for implicit includes,
-  // such as sanitizer blacklists. It's only important for cl.exe
-  // compatibility, the GNU way to generate rules is -M / -MM / -MD / -MMD.
-  for (auto Header : ExtraHeaders) {
-    PrintHeaderInfo(OutputFile, Header.c_str(), ShowDepth, 2, MSStyle);
-  }
-  PP.addPPCallbacks(llvm::make_unique<HeaderIncludesCallback>(&PP,
-                                                              ShowAllHeaders,
-                                                              OutputFile,
-                                                              OwnsOutputFile,
-                                                              ShowDepth,
-                                                              MSStyle));
+  // Print header info for extra headers, pretending they were discovered by
+  // the regular preprocessor. The primary use case is to support proper
+  // generation of Make / Ninja file dependencies for implicit includes, such
+  // as sanitizer blacklists. It's only important for cl.exe compatibility,
+  // the GNU way to generate rules is -M / -MM / -MD / -MMD.
+  for (const auto &Header : DepOpts.ExtraDeps)
+    PrintHeaderInfo(OutputFile, Header, ShowDepth, 2, MSStyle);
+  PP.addPPCallbacks(llvm::make_unique<HeaderIncludesCallback>(
+      &PP, ShowAllHeaders, OutputFile, DepOpts, OwnsOutputFile, ShowDepth,
+      MSStyle));
 }
 
 void HeaderIncludesCallback::FileChanged(SourceLocation Loc,
@@ -132,8 +130,13 @@ void HeaderIncludesCallback::FileChanged(SourceLocation Loc,
 
     // We track when we are done with the predefines by watching for the first
     // place where we drop back to a nesting depth of 1.
-    if (CurrentIncludeDepth == 1 && !HasProcessedPredefines)
+    if (CurrentIncludeDepth == 1 && !HasProcessedPredefines) {
+      if (!DepOpts.ShowIncludesPretendHeader.empty()) {
+        PrintHeaderInfo(OutputFile, DepOpts.ShowIncludesPretendHeader,
+                        ShowDepth, 2, MSStyle);
+      }
       HasProcessedPredefines = true;
+    }
 
     return;
   } else
@@ -144,11 +147,20 @@ void HeaderIncludesCallback::FileChanged(SourceLocation Loc,
   // line buffers.
   bool ShowHeader = (HasProcessedPredefines ||
                      (ShowAllHeaders && CurrentIncludeDepth > 2));
+  unsigned IncludeDepth = CurrentIncludeDepth;
+  if (!HasProcessedPredefines)
+    --IncludeDepth; // Ignore indent from <built-in>.
+  else if (!DepOpts.ShowIncludesPretendHeader.empty())
+    ++IncludeDepth; // Pretend inclusion by ShowIncludesPretendHeader.
 
   // Dump the header include information we are past the predefines buffer or
-  // are showing all headers.
-  if (ShowHeader && Reason == PPCallbacks::EnterFile) {
-    PrintHeaderInfo(OutputFile, UserLoc.getFilename(),
-                    ShowDepth, CurrentIncludeDepth, MSStyle);
+  // are showing all headers and this isn't the magic implicit <command line>
+  // header.
+  // FIXME: Identify headers in a more robust way than comparing their name to
+  // "<command line>" and "<built-in>" in a bunch of places.
+  if (ShowHeader && Reason == PPCallbacks::EnterFile &&
+      UserLoc.getFilename() != StringRef("<command line>")) {
+    PrintHeaderInfo(OutputFile, UserLoc.getFilename(), ShowDepth, IncludeDepth,
+                    MSStyle);
   }
 }
diff --git a/contrib/llvm/tools/clang/lib/Frontend/InitHeaderSearch.cpp b/contrib/llvm/tools/clang/lib/Frontend/InitHeaderSearch.cpp
index 26bab0db5347..1b5c760f01b5 100644
--- a/contrib/llvm/tools/clang/lib/Frontend/InitHeaderSearch.cpp
+++ b/contrib/llvm/tools/clang/lib/Frontend/InitHeaderSearch.cpp
@@ -267,38 +267,39 @@ void InitHeaderSearch::AddDefaultCIncludePaths(const llvm::Triple &triple,
   }
 
   case llvm::Triple::Haiku:
-    AddPath("/boot/common/include", System, false);
-    AddPath("/boot/develop/headers/os", System, false);
-    AddPath("/boot/develop/headers/os/app", System, false);
-    AddPath("/boot/develop/headers/os/arch", System, false);
-    AddPath("/boot/develop/headers/os/device", System, false);
-    AddPath("/boot/develop/headers/os/drivers", System, false);
-    AddPath("/boot/develop/headers/os/game", System, false);
-    AddPath("/boot/develop/headers/os/interface", System, false);
-    AddPath("/boot/develop/headers/os/kernel", System, false);
-    AddPath("/boot/develop/headers/os/locale", System, false);
-    AddPath("/boot/develop/headers/os/mail", System, false);
-    AddPath("/boot/develop/headers/os/media", System, false);
-    AddPath("/boot/develop/headers/os/midi", System, false);
-    AddPath("/boot/develop/headers/os/midi2", System, false);
-    AddPath("/boot/develop/headers/os/net", System, false);
-    AddPath("/boot/develop/headers/os/storage", System, false);
-    AddPath("/boot/develop/headers/os/support", System, false);
-    AddPath("/boot/develop/headers/os/translation", System, false);
-    AddPath("/boot/develop/headers/os/add-ons/graphics", System, false);
-    AddPath("/boot/develop/headers/os/add-ons/input_server", System, false);
-    AddPath("/boot/develop/headers/os/add-ons/screen_saver", System, false);
-    AddPath("/boot/develop/headers/os/add-ons/tracker", System, false);
-    AddPath("/boot/develop/headers/os/be_apps/Deskbar", System, false);
-    AddPath("/boot/develop/headers/os/be_apps/NetPositive", System, false);
-    AddPath("/boot/develop/headers/os/be_apps/Tracker", System, false);
-    AddPath("/boot/develop/headers/cpp", System, false);
-    AddPath("/boot/develop/headers/cpp/i586-pc-haiku", System, false);
-    AddPath("/boot/develop/headers/3rdparty", System, false);
-    AddPath("/boot/develop/headers/bsd", System, false);
-    AddPath("/boot/develop/headers/glibc", System, false);
-    AddPath("/boot/develop/headers/posix", System, false);
-    AddPath("/boot/develop/headers",  System, false);
+    AddPath("/boot/system/non-packaged/develop/headers", System, false);
+    AddPath("/boot/system/develop/headers/os", System, false);
+    AddPath("/boot/system/develop/headers/os/app", System, false);
+    AddPath("/boot/system/develop/headers/os/arch", System, false);
+    AddPath("/boot/system/develop/headers/os/device", System, false);
+    AddPath("/boot/system/develop/headers/os/drivers", System, false);
+    AddPath("/boot/system/develop/headers/os/game", System, false);
+    AddPath("/boot/system/develop/headers/os/interface", System, false);
+    AddPath("/boot/system/develop/headers/os/kernel", System, false);
+    AddPath("/boot/system/develop/headers/os/locale", System, false);
+    AddPath("/boot/system/develop/headers/os/mail", System, false);
+    AddPath("/boot/system/develop/headers/os/media", System, false);
+    AddPath("/boot/system/develop/headers/os/midi", System, false);
+    AddPath("/boot/system/develop/headers/os/midi2", System, false);
+    AddPath("/boot/system/develop/headers/os/net", System, false);
+    AddPath("/boot/system/develop/headers/os/opengl", System, false);
+    AddPath("/boot/system/develop/headers/os/storage", System, false);
+    AddPath("/boot/system/develop/headers/os/support", System, false);
+    AddPath("/boot/system/develop/headers/os/translation", System, false);
+    AddPath("/boot/system/develop/headers/os/add-ons/graphics", System, false);
+    AddPath("/boot/system/develop/headers/os/add-ons/input_server", System, false);
+    AddPath("/boot/system/develop/headers/os/add-ons/mail_daemon", System, false);
+    AddPath("/boot/system/develop/headers/os/add-ons/registrar", System, false);
+    AddPath("/boot/system/develop/headers/os/add-ons/screen_saver", System, false);
+    AddPath("/boot/system/develop/headers/os/add-ons/tracker", System, false);
+    AddPath("/boot/system/develop/headers/os/be_apps/Deskbar", System, false);
+    AddPath("/boot/system/develop/headers/os/be_apps/NetPositive", System, false);
+    AddPath("/boot/system/develop/headers/os/be_apps/Tracker", System, false);
+    AddPath("/boot/system/develop/headers/3rdparty", System, false);
+    AddPath("/boot/system/develop/headers/bsd", System, false);
+    AddPath("/boot/system/develop/headers/glibc", System, false);
+    AddPath("/boot/system/develop/headers/posix", System, false);
+    AddPath("/boot/system/develop/headers",  System, false);
     break;
   case llvm::Triple::RTEMS:
     break;
@@ -326,7 +327,7 @@ void InitHeaderSearch::AddDefaultCIncludePaths(const llvm::Triple &triple,
     // <isysroot> gets prepended later in AddPath().
     std::string BaseSDKPath = "";
     if (!HasSysroot) {
-      const char *envValue = getenv("SCE_PS4_SDK_DIR");
+      const char *envValue = getenv("SCE_ORBIS_SDK_DIR");
       if (envValue)
         BaseSDKPath = envValue;
       else {
diff --git a/contrib/llvm/tools/clang/lib/Frontend/InitPreprocessor.cpp b/contrib/llvm/tools/clang/lib/Frontend/InitPreprocessor.cpp
index 15aa54607ced..6b93c697d9b1 100644
--- a/contrib/llvm/tools/clang/lib/Frontend/InitPreprocessor.cpp
+++ b/contrib/llvm/tools/clang/lib/Frontend/InitPreprocessor.cpp
@@ -408,6 +408,39 @@ static void InitializeStandardPredefinedMacros(const TargetInfo &TI,
   if (LangOpts.ObjC1)
     Builder.defineMacro("__OBJC__");
 
+  // OpenCL v1.0/1.1 s6.9, v1.2/2.0 s6.10: Preprocessor Directives and Macros.
+  if (LangOpts.OpenCL) {
+    // OpenCL v1.0 and v1.1 do not have a predefined macro to indicate the
+    // language standard with which the program is compiled. __OPENCL_VERSION__
+    // is for the OpenCL version supported by the OpenCL device, which is not
+    // necessarily the language standard with which the program is compiled.
+    // A shared OpenCL header file requires a macro to indicate the language
+    // standard. As a workaround, __OPENCL_C_VERSION__ is defined for
+    // OpenCL v1.0 and v1.1.
+    switch (LangOpts.OpenCLVersion) {
+    case 100:
+      Builder.defineMacro("__OPENCL_C_VERSION__", "100");
+      break;
+    case 110:
+      Builder.defineMacro("__OPENCL_C_VERSION__", "110");
+      break;
+    case 120:
+      Builder.defineMacro("__OPENCL_C_VERSION__", "120");
+      break;
+    case 200:
+      Builder.defineMacro("__OPENCL_C_VERSION__", "200");
+      break;
+    default:
+      llvm_unreachable("Unsupported OpenCL version");
+    }
+    Builder.defineMacro("CL_VERSION_1_0", "100");
+    Builder.defineMacro("CL_VERSION_1_1", "110");
+    Builder.defineMacro("CL_VERSION_1_2", "120");
+    Builder.defineMacro("CL_VERSION_2_0", "200");
+
+    if (LangOpts.FastRelaxedMath)
+      Builder.defineMacro("__FAST_RELAXED_MATH__");
+  }
   // Not "standard" per se, but available even with the -undef flag.
   if (LangOpts.AsmPreprocessor)
     Builder.defineMacro("__ASSEMBLER__");
@@ -793,8 +826,8 @@ static void InitializePredefinedMacros(const TargetInfo &TI,
   DefineFastIntType(64, true, TI, Builder);
   DefineFastIntType(64, false, TI, Builder);
 
-  if (const char *Prefix = TI.getUserLabelPrefix())
-    Builder.defineMacro("__USER_LABEL_PREFIX__", Prefix);
+  char UserLabelPrefix[2] = {TI.getDataLayout().getGlobalPrefix(), 0};
+  Builder.defineMacro("__USER_LABEL_PREFIX__", UserLabelPrefix);
 
   if (LangOpts.FastMath || LangOpts.FiniteMathOnly)
     Builder.defineMacro("__FINITE_MATH_ONLY__", "1");
@@ -811,7 +844,7 @@ static void InitializePredefinedMacros(const TargetInfo &TI,
     // FIXME: This is target-dependent.
     Builder.defineMacro("__GCC_ATOMIC_TEST_AND_SET_TRUEVAL", "1");
 
-    // Used by libstdc++ to implement ATOMIC_<foo>_LOCK_FREE.
+    // Used by libc++ and libstdc++ to implement ATOMIC_<foo>_LOCK_FREE.
     unsigned InlineWidthBits = TI.getMaxAtomicInlineWidth();
 #define DEFINE_LOCK_FREE_MACRO(TYPE, Type) \
     Builder.defineMacro("__GCC_ATOMIC_" #TYPE "_LOCK_FREE", \
@@ -840,10 +873,10 @@ static void InitializePredefinedMacros(const TargetInfo &TI,
   if (unsigned PICLevel = LangOpts.PICLevel) {
     Builder.defineMacro("__PIC__", Twine(PICLevel));
     Builder.defineMacro("__pic__", Twine(PICLevel));
-  }
-  if (unsigned PIELevel = LangOpts.PIELevel) {
-    Builder.defineMacro("__PIE__", Twine(PIELevel));
-    Builder.defineMacro("__pie__", Twine(PIELevel));
+    if (LangOpts.PIE) {
+      Builder.defineMacro("__PIE__", Twine(PICLevel));
+      Builder.defineMacro("__pie__", Twine(PICLevel));
+    }
   }
 
   // Macros to control C99 numerics and <float.h>
@@ -889,13 +922,24 @@ static void InitializePredefinedMacros(const TargetInfo &TI,
   }
 
   // OpenMP definition
-  if (LangOpts.OpenMP) {
-    // OpenMP 2.2:
-    //   In implementations that support a preprocessor, the _OPENMP
-    //   macro name is defined to have the decimal value yyyymm where
-    //   yyyy and mm are the year and the month designations of the
-    //   version of the OpenMP API that the implementation support.
+  // OpenMP 2.2:
+  //   In implementations that support a preprocessor, the _OPENMP
+  //   macro name is defined to have the decimal value yyyymm where
+  //   yyyy and mm are the year and the month designations of the
+  //   version of the OpenMP API that the implementation support.
+  switch (LangOpts.OpenMP) {
+  case 0:
+    break;
+  case 40:
     Builder.defineMacro("_OPENMP", "201307");
+    break;
+  case 45:
+    Builder.defineMacro("_OPENMP", "201511");
+    break;
+  default:
+    // Default version is OpenMP 3.1
+    Builder.defineMacro("_OPENMP", "201107");
+    break;
   }
 
   // CUDA device path compilaton
@@ -905,6 +949,21 @@ static void InitializePredefinedMacros(const TargetInfo &TI,
     Builder.defineMacro("__CUDA_ARCH__");
   }
 
+  // We need to communicate this to our CUDA header wrapper, which in turn
+  // informs the proper CUDA headers of this choice.
+  if (LangOpts.CUDADeviceApproxTranscendentals || LangOpts.FastMath) {
+    Builder.defineMacro("__CLANG_CUDA_APPROX_TRANSCENDENTALS__");
+  }
+
+  // OpenCL definitions.
+  if (LangOpts.OpenCL) {
+#define OPENCLEXT(Ext) \
+    if (TI.getSupportedOpenCLOpts().is_##Ext##_supported( \
+        LangOpts.OpenCLVersion)) \
+      Builder.defineMacro(#Ext);
+#include "clang/Basic/OpenCLExtensions.def"
+  }
+
   // Get other target #defines.
   TI.getTargetDefines(LangOpts, Builder);
 }
@@ -972,6 +1031,10 @@ void clang::InitializePreprocessor(
                          PP.getDiagnostics());
   }
 
+  // Exit the command line and go back to <built-in> (2 is LC_LEAVE).
+  if (!PP.getLangOpts().AsmPreprocessor)
+    Builder.append("# 1 \"<built-in>\" 2");
+
   // If -imacros are specified, include them now.  These are processed before
   // any -include directives.
   for (unsigned i = 0, e = InitOpts.MacroIncludes.size(); i != e; ++i)
@@ -990,10 +1053,6 @@ void clang::InitializePreprocessor(
     AddImplicitInclude(Builder, Path);
   }
 
-  // Exit the command line and go back to <built-in> (2 is LC_LEAVE).
-  if (!PP.getLangOpts().AsmPreprocessor)
-    Builder.append("# 1 \"<built-in>\" 2");
-
   // Instruct the preprocessor to skip the preamble.
   PP.setSkipMainFilePreamble(InitOpts.PrecompiledPreambleBytes.first,
                              InitOpts.PrecompiledPreambleBytes.second);
diff --git a/contrib/llvm/tools/clang/lib/Frontend/LayoutOverrideSource.cpp b/contrib/llvm/tools/clang/lib/Frontend/LayoutOverrideSource.cpp
index 924a64068fe4..06e9a7dc50b4 100644
--- a/contrib/llvm/tools/clang/lib/Frontend/LayoutOverrideSource.cpp
+++ b/contrib/llvm/tools/clang/lib/Frontend/LayoutOverrideSource.cpp
@@ -188,7 +188,7 @@ LayoutOverrideSource::layoutRecordType(const RecordDecl *Record,
   return true;
 }
 
-void LayoutOverrideSource::dump() {
+LLVM_DUMP_METHOD void LayoutOverrideSource::dump() {
   raw_ostream &OS = llvm::errs();
   for (llvm::StringMap<Layout>::iterator L = Layouts.begin(), 
                                       LEnd = Layouts.end();
diff --git a/contrib/llvm/tools/clang/lib/Frontend/ModuleDependencyCollector.cpp b/contrib/llvm/tools/clang/lib/Frontend/ModuleDependencyCollector.cpp
index 9768a164acbc..ca11f9b863bb 100644
--- a/contrib/llvm/tools/clang/lib/Frontend/ModuleDependencyCollector.cpp
+++ b/contrib/llvm/tools/clang/lib/Frontend/ModuleDependencyCollector.cpp
@@ -11,9 +11,11 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "clang/Basic/CharInfo.h"
 #include "clang/Frontend/Utils.h"
+#include "clang/Lex/Preprocessor.h"
 #include "clang/Serialization/ASTReader.h"
-#include "llvm/ADT/StringSet.h"
+#include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/iterator_range.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/Path.h"
@@ -22,73 +24,202 @@
 using namespace clang;
 
 namespace {
-/// Private implementation for ModuleDependencyCollector
+/// Private implementations for ModuleDependencyCollector
 class ModuleDependencyListener : public ASTReaderListener {
   ModuleDependencyCollector &Collector;
-
-  std::error_code copyToRoot(StringRef Src);
 public:
   ModuleDependencyListener(ModuleDependencyCollector &Collector)
       : Collector(Collector) {}
   bool needsInputFileVisitation() override { return true; }
   bool needsSystemInputFileVisitation() override { return true; }
   bool visitInputFile(StringRef Filename, bool IsSystem, bool IsOverridden,
-                      bool IsExplicitModule) override;
+                      bool IsExplicitModule) override {
+    Collector.addFile(Filename);
+    return true;
+  }
+};
+
+struct ModuleDependencyMMCallbacks : public ModuleMapCallbacks {
+  ModuleDependencyCollector &Collector;
+  ModuleDependencyMMCallbacks(ModuleDependencyCollector &Collector)
+      : Collector(Collector) {}
+
+  void moduleMapAddHeader(StringRef HeaderPath) override {
+    if (llvm::sys::path::is_absolute(HeaderPath))
+      Collector.addFile(HeaderPath);
+  }
+  void moduleMapAddUmbrellaHeader(FileManager *FileMgr,
+                                  const FileEntry *Header) override {
+    StringRef HeaderFilename = Header->getName();
+    moduleMapAddHeader(HeaderFilename);
+    // The FileManager can find and cache the symbolic link for a framework
+    // header before its real path, this means a module can have some of its
+    // headers to use other paths. Although this is usually not a problem, it's
+    // inconsistent, and not collecting the original path header leads to
+    // umbrella clashes while rebuilding modules in the crash reproducer. For
+    // example:
+    //    ApplicationServices.framework/Frameworks/ImageIO.framework/ImageIO.h
+    // instead of:
+    //    ImageIO.framework/ImageIO.h
+    //
+    // FIXME: this shouldn't be necessary once we have FileName instances
+    // around instead of FileEntry ones. For now, make sure we collect all
+    // that we need for the reproducer to work correctly.
+    StringRef UmbreallDirFromHeader =
+        llvm::sys::path::parent_path(HeaderFilename);
+    StringRef UmbrellaDir = Header->getDir()->getName();
+    if (!UmbrellaDir.equals(UmbreallDirFromHeader)) {
+      SmallString<128> AltHeaderFilename;
+      llvm::sys::path::append(AltHeaderFilename, UmbrellaDir,
+                              llvm::sys::path::filename(HeaderFilename));
+      if (FileMgr->getFile(AltHeaderFilename))
+        moduleMapAddHeader(AltHeaderFilename);
+    }
+  }
 };
+
+}
+
+// TODO: move this to Support/Path.h and check for HAVE_REALPATH?
+static bool real_path(StringRef SrcPath, SmallVectorImpl<char> &RealPath) {
+#ifdef LLVM_ON_UNIX
+  char CanonicalPath[PATH_MAX];
+
+  // TODO: emit a warning in case this fails...?
+  if (!realpath(SrcPath.str().c_str(), CanonicalPath))
+    return false;
+
+  SmallString<256> RPath(CanonicalPath);
+  RealPath.swap(RPath);
+  return true;
+#else
+  // FIXME: Add support for systems without realpath.
+  return false;
+#endif
 }
 
 void ModuleDependencyCollector::attachToASTReader(ASTReader &R) {
   R.addListener(llvm::make_unique<ModuleDependencyListener>(*this));
 }
 
+void ModuleDependencyCollector::attachToPreprocessor(Preprocessor &PP) {
+  PP.getHeaderSearchInfo().getModuleMap().addModuleMapCallbacks(
+      llvm::make_unique<ModuleDependencyMMCallbacks>(*this));
+}
+
+static bool isCaseSensitivePath(StringRef Path) {
+  SmallString<256> TmpDest = Path, UpperDest, RealDest;
+  // Remove component traversals, links, etc.
+  if (!real_path(Path, TmpDest))
+    return true; // Current default value in vfs.yaml
+  Path = TmpDest;
+
+  // Change path to all upper case and ask for its real path, if the latter
+  // exists and is equal to Path, it's not case sensitive. Default to case
+  // sensitive in the absense of realpath, since this is what the VFSWriter
+  // already expects when sensitivity isn't setup.
+  for (auto &C : Path)
+    UpperDest.push_back(toUppercase(C));
+  if (real_path(UpperDest, RealDest) && Path.equals(RealDest))
+    return false;
+  return true;
+}
+
 void ModuleDependencyCollector::writeFileMap() {
   if (Seen.empty())
     return;
 
-  SmallString<256> Dest = getDest();
-  llvm::sys::path::append(Dest, "vfs.yaml");
+  StringRef VFSDir = getDest();
+
+  // Default to use relative overlay directories in the VFS yaml file. This
+  // allows crash reproducer scripts to work across machines.
+  VFSWriter.setOverlayDir(VFSDir);
+
+  // Explicitly set case sensitivity for the YAML writer. For that, find out
+  // the sensitivity at the path where the headers all collected to.
+  VFSWriter.setCaseSensitivity(isCaseSensitivePath(VFSDir));
+
+  // Do not rely on real path names when executing the crash reproducer scripts
+  // since we only want to actually use the files we have on the VFS cache.
+  VFSWriter.setUseExternalNames(false);
 
   std::error_code EC;
-  llvm::raw_fd_ostream OS(Dest, EC, llvm::sys::fs::F_Text);
+  SmallString<256> YAMLPath = VFSDir;
+  llvm::sys::path::append(YAMLPath, "vfs.yaml");
+  llvm::raw_fd_ostream OS(YAMLPath, EC, llvm::sys::fs::F_Text);
   if (EC) {
-    setHasErrors();
+    HasErrors = true;
     return;
   }
   VFSWriter.write(OS);
 }
 
-std::error_code ModuleDependencyListener::copyToRoot(StringRef Src) {
+bool ModuleDependencyCollector::getRealPath(StringRef SrcPath,
+                                            SmallVectorImpl<char> &Result) {
+  using namespace llvm::sys;
+  SmallString<256> RealPath;
+  StringRef FileName = path::filename(SrcPath);
+  std::string Dir = path::parent_path(SrcPath).str();
+  auto DirWithSymLink = SymLinkMap.find(Dir);
+
+  // Use real_path to fix any symbolic link component present in a path.
+  // Computing the real path is expensive, cache the search through the
+  // parent path directory.
+  if (DirWithSymLink == SymLinkMap.end()) {
+    if (!real_path(Dir, RealPath))
+      return false;
+    SymLinkMap[Dir] = RealPath.str();
+  } else {
+    RealPath = DirWithSymLink->second;
+  }
+
+  path::append(RealPath, FileName);
+  Result.swap(RealPath);
+  return true;
+}
+
+std::error_code ModuleDependencyCollector::copyToRoot(StringRef Src) {
   using namespace llvm::sys;
 
-  // We need an absolute path to append to the root.
+  // We need an absolute src path to append to the root.
   SmallString<256> AbsoluteSrc = Src;
   fs::make_absolute(AbsoluteSrc);
-  // Canonicalize to a native path to avoid mixed separator styles.
+  // Canonicalize src to a native path to avoid mixed separator styles.
   path::native(AbsoluteSrc);
-  // TODO: We probably need to handle .. as well as . in order to have valid
-  // input to the YAMLVFSWriter.
-  path::remove_dots(AbsoluteSrc);
+  // Remove redundant leading "./" pieces and consecutive separators.
+  AbsoluteSrc = path::remove_leading_dotslash(AbsoluteSrc);
 
-  // Build the destination path.
-  SmallString<256> Dest = Collector.getDest();
-  path::append(Dest, path::relative_path(AbsoluteSrc));
+  // Canonicalize the source path by removing "..", "." components.
+  SmallString<256> CanonicalPath = AbsoluteSrc;
+  path::remove_dots(CanonicalPath, /*remove_dot_dot=*/true);
+
+  // If a ".." component is present after a symlink component, remove_dots may
+  // lead to the wrong real destination path. Let the source be canonicalized
+  // like that but make sure we always use the real path for the destination.
+  SmallString<256> RealPath;
+  if (!getRealPath(AbsoluteSrc, RealPath))
+    RealPath = CanonicalPath;
+  SmallString<256> Dest = getDest();
+  path::append(Dest, path::relative_path(RealPath));
 
   // Copy the file into place.
   if (std::error_code EC = fs::create_directories(path::parent_path(Dest),
                                                    /*IgnoreExisting=*/true))
     return EC;
-  if (std::error_code EC = fs::copy_file(AbsoluteSrc, Dest))
+  if (std::error_code EC = fs::copy_file(RealPath, Dest))
     return EC;
-  // Use the absolute path under the root for the file mapping.
-  Collector.addFileMapping(AbsoluteSrc, Dest);
+
+  // Always map a canonical src path to its real path into the YAML, by doing
+  // this we map different virtual src paths to the same entry in the VFS
+  // overlay, which is a way to emulate symlink inside the VFS; this is also
+  // needed for correctness, not doing that can lead to module redifinition
+  // errors.
+  addFileMapping(CanonicalPath, Dest);
   return std::error_code();
 }
 
-bool ModuleDependencyListener::visitInputFile(StringRef Filename, bool IsSystem,
-                                              bool IsOverridden,
-                                              bool IsExplicitModule) {
-  if (Collector.insertSeen(Filename))
+void ModuleDependencyCollector::addFile(StringRef Filename) {
+  if (insertSeen(Filename))
     if (copyToRoot(Filename))
-      Collector.setHasErrors();
-  return true;
+      HasErrors = true;
 }
diff --git a/contrib/llvm/tools/clang/lib/Frontend/MultiplexConsumer.cpp b/contrib/llvm/tools/clang/lib/Frontend/MultiplexConsumer.cpp
index f8b73e9034b3..17cdaee4be05 100644
--- a/contrib/llvm/tools/clang/lib/Frontend/MultiplexConsumer.cpp
+++ b/contrib/llvm/tools/clang/lib/Frontend/MultiplexConsumer.cpp
@@ -125,6 +125,8 @@ public:
   void FunctionDefinitionInstantiated(const FunctionDecl *D) override;
   void DeclarationMarkedUsed(const Decl *D) override;
   void DeclarationMarkedOpenMPThreadPrivate(const Decl *D) override;
+  void DeclarationMarkedOpenMPDeclareTarget(const Decl *D,
+                                            const Attr *Attr) override;
   void RedefinedHiddenDefinition(const NamedDecl *D, Module *M) override;
   void AddedAttributeToRecord(const Attr *Attr, 
                               const RecordDecl *Record) override;
@@ -219,6 +221,11 @@ void MultiplexASTMutationListener::DeclarationMarkedOpenMPThreadPrivate(
   for (size_t i = 0, e = Listeners.size(); i != e; ++i)
     Listeners[i]->DeclarationMarkedOpenMPThreadPrivate(D);
 }
+void MultiplexASTMutationListener::DeclarationMarkedOpenMPDeclareTarget(
+    const Decl *D, const Attr *Attr) {
+  for (auto *L : Listeners)
+    L->DeclarationMarkedOpenMPDeclareTarget(D, Attr);
+}
 void MultiplexASTMutationListener::RedefinedHiddenDefinition(const NamedDecl *D,
                                                              Module *M) {
   for (auto *L : Listeners)
@@ -272,9 +279,9 @@ bool MultiplexConsumer::HandleTopLevelDecl(DeclGroupRef D) {
   return Continue;
 }
 
-void MultiplexConsumer::HandleInlineMethodDefinition(CXXMethodDecl *D) {
+void MultiplexConsumer::HandleInlineFunctionDefinition(FunctionDecl *D) {
   for (auto &Consumer : Consumers)
-    Consumer->HandleInlineMethodDefinition(D);
+    Consumer->HandleInlineFunctionDefinition(D);
 }
 
 void MultiplexConsumer::HandleCXXStaticMemberVarInstantiation(VarDecl *VD) {
@@ -317,24 +324,14 @@ void MultiplexConsumer::HandleImplicitImportDecl(ImportDecl *D) {
     Consumer->HandleImplicitImportDecl(D);
 }
 
-void MultiplexConsumer::HandleLinkerOptionPragma(llvm::StringRef Opts) {
-  for (auto &Consumer : Consumers)
-    Consumer->HandleLinkerOptionPragma(Opts);
-}
-
-void MultiplexConsumer::HandleDetectMismatch(llvm::StringRef Name, llvm::StringRef Value) {
-  for (auto &Consumer : Consumers)
-    Consumer->HandleDetectMismatch(Name, Value);
-}
-
-void MultiplexConsumer::HandleDependentLibrary(llvm::StringRef Lib) {
+void MultiplexConsumer::CompleteTentativeDefinition(VarDecl *D) {
   for (auto &Consumer : Consumers)
-    Consumer->HandleDependentLibrary(Lib);
+    Consumer->CompleteTentativeDefinition(D);
 }
 
-void MultiplexConsumer::CompleteTentativeDefinition(VarDecl *D) {
+void MultiplexConsumer::AssignInheritanceModel(CXXRecordDecl *RD) {
   for (auto &Consumer : Consumers)
-    Consumer->CompleteTentativeDefinition(D);
+    Consumer->AssignInheritanceModel(RD);
 }
 
 void MultiplexConsumer::HandleVTable(CXXRecordDecl *RD) {
@@ -355,6 +352,13 @@ void MultiplexConsumer::PrintStats() {
     Consumer->PrintStats();
 }
 
+bool MultiplexConsumer::shouldSkipFunctionBody(Decl *D) {
+  bool Skip = true;
+  for (auto &Consumer : Consumers)
+    Skip = Skip && Consumer->shouldSkipFunctionBody(D);
+  return Skip;
+}
+
 void MultiplexConsumer::InitializeSema(Sema &S) {
   for (auto &Consumer : Consumers)
     if (SemaConsumer *SC = dyn_cast<SemaConsumer>(Consumer.get()))
diff --git a/contrib/llvm/tools/clang/lib/Frontend/PCHContainerOperations.cpp b/contrib/llvm/tools/clang/lib/Frontend/PCHContainerOperations.cpp
index 5e1d77205098..2d4edde43280 100644
--- a/contrib/llvm/tools/clang/lib/Frontend/PCHContainerOperations.cpp
+++ b/contrib/llvm/tools/clang/lib/Frontend/PCHContainerOperations.cpp
@@ -13,23 +13,27 @@
 
 #include "clang/Frontend/PCHContainerOperations.h"
 #include "clang/AST/ASTConsumer.h"
+#include "clang/Lex/ModuleLoader.h"
 #include "llvm/Bitcode/BitstreamReader.h"
 #include "llvm/Support/raw_ostream.h"
-#include "clang/Lex/ModuleLoader.h"
+#include <utility>
 
 using namespace clang;
 
+PCHContainerWriter::~PCHContainerWriter() {}
+PCHContainerReader::~PCHContainerReader() {}
+
 namespace {
 
 /// \brief A PCHContainerGenerator that writes out the PCH to a flat file.
 class RawPCHContainerGenerator : public ASTConsumer {
   std::shared_ptr<PCHBuffer> Buffer;
-  raw_pwrite_stream *OS;
+  std::unique_ptr<raw_pwrite_stream> OS;
 
 public:
-  RawPCHContainerGenerator(llvm::raw_pwrite_stream *OS,
+  RawPCHContainerGenerator(std::unique_ptr<llvm::raw_pwrite_stream> OS,
                            std::shared_ptr<PCHBuffer> Buffer)
-      : Buffer(Buffer), OS(OS) {}
+      : Buffer(std::move(Buffer)), OS(std::move(OS)) {}
 
   ~RawPCHContainerGenerator() override = default;
 
@@ -49,9 +53,9 @@ public:
 
 std::unique_ptr<ASTConsumer> RawPCHContainerWriter::CreatePCHContainerGenerator(
     CompilerInstance &CI, const std::string &MainFileName,
-    const std::string &OutputFileName, llvm::raw_pwrite_stream *OS,
+    const std::string &OutputFileName, std::unique_ptr<llvm::raw_pwrite_stream> OS,
     std::shared_ptr<PCHBuffer> Buffer) const {
-  return llvm::make_unique<RawPCHContainerGenerator>(OS, Buffer);
+  return llvm::make_unique<RawPCHContainerGenerator>(std::move(OS), Buffer);
 }
 
 void RawPCHContainerReader::ExtractPCH(
diff --git a/contrib/llvm/tools/clang/lib/Frontend/PrintPreprocessedOutput.cpp b/contrib/llvm/tools/clang/lib/Frontend/PrintPreprocessedOutput.cpp
index a58c935620a2..77b80e612fbf 100644
--- a/contrib/llvm/tools/clang/lib/Frontend/PrintPreprocessedOutput.cpp
+++ b/contrib/llvm/tools/clang/lib/Frontend/PrintPreprocessedOutput.cpp
@@ -326,8 +326,20 @@ void PrintPPOutputPPCallbacks::InclusionDirective(SourceLocation HashLoc,
   if (Imported) {
     startNewLineIfNeeded();
     MoveToLine(HashLoc);
-    OS << "@import " << Imported->getFullModuleName() << ";"
-       << " /* clang -E: implicit import for \"" << File->getName() << "\" */";
+    if (PP.getLangOpts().ObjC2) {
+      OS << "@import " << Imported->getFullModuleName() << ";"
+         << " /* clang -E: implicit import for \"" << File->getName()
+         << "\" */";
+    } else {
+      // FIXME: Preseve whether this was a
+      // #include/#include_next/#include_macros/#import.
+      OS << "#include "
+         << (IsAngled ? '<' : '"')
+         << FileName
+         << (IsAngled ? '>' : '"')
+         << " /* clang -E: implicit import for module "
+         << Imported->getFullModuleName() << " */";
+    }
     // Since we want a newline after the @import, but not a #<line>, start a new
     // line immediately.
     EmittedTokensOnThisLine = true;
@@ -369,18 +381,16 @@ void PrintPPOutputPPCallbacks::MacroUndefined(const Token &MacroNameTok,
   setEmittedDirectiveOnThisLine();
 }
 
-static void outputPrintable(llvm::raw_ostream& OS,
-                                             const std::string &Str) {
-    for (unsigned i = 0, e = Str.size(); i != e; ++i) {
-      unsigned char Char = Str[i];
-      if (isPrintable(Char) && Char != '\\' && Char != '"')
-        OS << (char)Char;
-      else  // Output anything hard as an octal escape.
-        OS << '\\'
-           << (char)('0'+ ((Char >> 6) & 7))
-           << (char)('0'+ ((Char >> 3) & 7))
-           << (char)('0'+ ((Char >> 0) & 7));
-    }
+static void outputPrintable(raw_ostream &OS, StringRef Str) {
+  for (unsigned char Char : Str) {
+    if (isPrintable(Char) && Char != '\\' && Char != '"')
+      OS << (char)Char;
+    else // Output anything hard as an octal escape.
+      OS << '\\'
+         << (char)('0' + ((Char >> 6) & 7))
+         << (char)('0' + ((Char >> 3) & 7))
+         << (char)('0' + ((Char >> 0) & 7));
+  }
 }
 
 void PrintPPOutputPPCallbacks::PragmaMessage(SourceLocation Loc,
@@ -547,8 +557,10 @@ void PrintPPOutputPPCallbacks::HandleNewlinesInToken(const char *TokStr,
     // If we have \n\r or \r\n, skip both and count as one line.
     if (Len != 1 &&
         (TokStr[1] == '\n' || TokStr[1] == '\r') &&
-        TokStr[0] != TokStr[1])
-      ++TokStr, --Len;
+        TokStr[0] != TokStr[1]) {
+      ++TokStr;
+      --Len;
+    }
   }
 
   if (NumNewlines == 0) return;
@@ -577,6 +589,15 @@ struct UnknownPragmaHandler : public PragmaHandler {
     Callbacks->MoveToLine(PragmaTok.getLocation());
     Callbacks->OS.write(Prefix, strlen(Prefix));
 
+    if (ShouldExpandTokens) {
+      // The first token does not have expanded macros. Expand them, if
+      // required.
+      auto Toks = llvm::make_unique<Token[]>(1);
+      Toks[0] = PragmaTok;
+      PP.EnterTokenStream(std::move(Toks), /*NumToks=*/1,
+                          /*DisableMacroExpansion=*/false);
+      PP.Lex(PragmaTok);
+    }
     Token PrevToken;
     Token PrevPrevToken;
     PrevToken.startToken();
diff --git a/contrib/llvm/tools/clang/lib/Frontend/Rewrite/FrontendActions.cpp b/contrib/llvm/tools/clang/lib/Frontend/Rewrite/FrontendActions.cpp
index 8cf8adf37ed6..13d410e21381 100644
--- a/contrib/llvm/tools/clang/lib/Frontend/Rewrite/FrontendActions.cpp
+++ b/contrib/llvm/tools/clang/lib/Frontend/Rewrite/FrontendActions.cpp
@@ -23,6 +23,7 @@
 #include "llvm/Support/Path.h"
 #include "llvm/Support/raw_ostream.h"
 #include <memory>
+#include <utility>
 
 using namespace clang;
 
@@ -32,8 +33,9 @@ using namespace clang;
 
 std::unique_ptr<ASTConsumer>
 HTMLPrintAction::CreateASTConsumer(CompilerInstance &CI, StringRef InFile) {
-  if (raw_ostream *OS = CI.createDefaultOutputFile(false, InFile))
-    return CreateHTMLPrinter(OS, CI.getPreprocessor());
+  if (std::unique_ptr<raw_ostream> OS =
+          CI.createDefaultOutputFile(false, InFile))
+    return CreateHTMLPrinter(std::move(OS), CI.getPreprocessor());
   return nullptr;
 }
 
@@ -60,8 +62,8 @@ class FixItActionSuffixInserter : public FixItOptions {
 
 public:
   FixItActionSuffixInserter(std::string NewSuffix, bool FixWhatYouCan)
-    : NewSuffix(NewSuffix) {
-      this->FixWhatYouCan = FixWhatYouCan;
+      : NewSuffix(std::move(NewSuffix)) {
+    this->FixWhatYouCan = FixWhatYouCan;
   }
 
   std::string RewriteFilename(const std::string &Filename, int &fd) override {
@@ -151,15 +153,15 @@ bool FixItRecompile::BeginInvocation(CompilerInstance &CI) {
 
 std::unique_ptr<ASTConsumer>
 RewriteObjCAction::CreateASTConsumer(CompilerInstance &CI, StringRef InFile) {
-  if (raw_ostream *OS = CI.createDefaultOutputFile(false, InFile, "cpp")) {
+  if (std::unique_ptr<raw_ostream> OS =
+          CI.createDefaultOutputFile(false, InFile, "cpp")) {
     if (CI.getLangOpts().ObjCRuntime.isNonFragile())
-      return CreateModernObjCRewriter(InFile, OS,
-                                CI.getDiagnostics(), CI.getLangOpts(),
-                                CI.getDiagnosticOpts().NoRewriteMacros,
-                                (CI.getCodeGenOpts().getDebugInfo() !=
-                                 CodeGenOptions::NoDebugInfo));
-    return CreateObjCRewriter(InFile, OS,
-                              CI.getDiagnostics(), CI.getLangOpts(),
+      return CreateModernObjCRewriter(
+          InFile, std::move(OS), CI.getDiagnostics(), CI.getLangOpts(),
+          CI.getDiagnosticOpts().NoRewriteMacros,
+          (CI.getCodeGenOpts().getDebugInfo() != codegenoptions::NoDebugInfo));
+    return CreateObjCRewriter(InFile, std::move(OS), CI.getDiagnostics(),
+                              CI.getLangOpts(),
                               CI.getDiagnosticOpts().NoRewriteMacros);
   }
   return nullptr;
@@ -173,25 +175,28 @@ RewriteObjCAction::CreateASTConsumer(CompilerInstance &CI, StringRef InFile) {
 
 void RewriteMacrosAction::ExecuteAction() {
   CompilerInstance &CI = getCompilerInstance();
-  raw_ostream *OS = CI.createDefaultOutputFile(true, getCurrentFile());
+  std::unique_ptr<raw_ostream> OS =
+      CI.createDefaultOutputFile(true, getCurrentFile());
   if (!OS) return;
 
-  RewriteMacrosInInput(CI.getPreprocessor(), OS);
+  RewriteMacrosInInput(CI.getPreprocessor(), OS.get());
 }
 
 void RewriteTestAction::ExecuteAction() {
   CompilerInstance &CI = getCompilerInstance();
-  raw_ostream *OS = CI.createDefaultOutputFile(false, getCurrentFile());
+  std::unique_ptr<raw_ostream> OS =
+      CI.createDefaultOutputFile(false, getCurrentFile());
   if (!OS) return;
 
-  DoRewriteTest(CI.getPreprocessor(), OS);
+  DoRewriteTest(CI.getPreprocessor(), OS.get());
 }
 
 void RewriteIncludesAction::ExecuteAction() {
   CompilerInstance &CI = getCompilerInstance();
-  raw_ostream *OS = CI.createDefaultOutputFile(true, getCurrentFile());
+  std::unique_ptr<raw_ostream> OS =
+      CI.createDefaultOutputFile(true, getCurrentFile());
   if (!OS) return;
 
-  RewriteIncludesInInput(CI.getPreprocessor(), OS,
+  RewriteIncludesInInput(CI.getPreprocessor(), OS.get(),
                          CI.getPreprocessorOutputOpts());
 }
diff --git a/contrib/llvm/tools/clang/lib/Frontend/Rewrite/HTMLPrint.cpp b/contrib/llvm/tools/clang/lib/Frontend/Rewrite/HTMLPrint.cpp
index 22ccfe6936b7..f5fad346124a 100644
--- a/contrib/llvm/tools/clang/lib/Frontend/Rewrite/HTMLPrint.cpp
+++ b/contrib/llvm/tools/clang/lib/Frontend/Rewrite/HTMLPrint.cpp
@@ -32,14 +32,14 @@ using namespace clang;
 namespace {
   class HTMLPrinter : public ASTConsumer {
     Rewriter R;
-    raw_ostream *Out;
+    std::unique_ptr<raw_ostream> Out;
     Preprocessor &PP;
     bool SyntaxHighlight, HighlightMacros;
 
   public:
-    HTMLPrinter(raw_ostream *OS, Preprocessor &pp,
+    HTMLPrinter(std::unique_ptr<raw_ostream> OS, Preprocessor &pp,
                 bool _SyntaxHighlight, bool _HighlightMacros)
-      : Out(OS), PP(pp), SyntaxHighlight(_SyntaxHighlight),
+      : Out(std::move(OS)), PP(pp), SyntaxHighlight(_SyntaxHighlight),
         HighlightMacros(_HighlightMacros) {}
 
     void Initialize(ASTContext &context) override;
@@ -47,11 +47,10 @@ namespace {
   };
 }
 
-std::unique_ptr<ASTConsumer> clang::CreateHTMLPrinter(raw_ostream *OS,
-                                                      Preprocessor &PP,
-                                                      bool SyntaxHighlight,
-                                                      bool HighlightMacros) {
-  return llvm::make_unique<HTMLPrinter>(OS, PP, SyntaxHighlight,
+std::unique_ptr<ASTConsumer>
+clang::CreateHTMLPrinter(std::unique_ptr<raw_ostream> OS, Preprocessor &PP,
+                         bool SyntaxHighlight, bool HighlightMacros) {
+  return llvm::make_unique<HTMLPrinter>(std::move(OS), PP, SyntaxHighlight,
                                         HighlightMacros);
 }
 
diff --git a/contrib/llvm/tools/clang/lib/Frontend/Rewrite/InclusionRewriter.cpp b/contrib/llvm/tools/clang/lib/Frontend/Rewrite/InclusionRewriter.cpp
index ca8226251fd9..b761c34fcbde 100644
--- a/contrib/llvm/tools/clang/lib/Frontend/Rewrite/InclusionRewriter.cpp
+++ b/contrib/llvm/tools/clang/lib/Frontend/Rewrite/InclusionRewriter.cpp
@@ -450,7 +450,9 @@ bool InclusionRewriter::Process(FileID FileId,
               WriteLineInfo(FileName, Line - 1, FileType, "");
             StringRef LineInfoExtra;
             SourceLocation Loc = HashToken.getLocation();
-            if (const Module *Mod = FindModuleAtLocation(Loc))
+            if (const Module *Mod = PP.getLangOpts().ObjC2
+                                        ? FindModuleAtLocation(Loc)
+                                        : nullptr)
               WriteImplicitModuleImport(Mod);
             else if (const IncludedFile *Inc = FindIncludeAtLocation(Loc)) {
               // include and recursively process the file
diff --git a/contrib/llvm/tools/clang/lib/Frontend/Rewrite/RewriteModernObjC.cpp b/contrib/llvm/tools/clang/lib/Frontend/Rewrite/RewriteModernObjC.cpp
index be68d42affa1..ad217517d7d7 100644
--- a/contrib/llvm/tools/clang/lib/Frontend/Rewrite/RewriteModernObjC.cpp
+++ b/contrib/llvm/tools/clang/lib/Frontend/Rewrite/RewriteModernObjC.cpp
@@ -72,7 +72,7 @@ namespace {
     Stmt *CurrentBody;
     ParentMap *PropParentMap; // created lazily.
     std::string InFileName;
-    raw_ostream* OutFile;
+    std::unique_ptr<raw_ostream> OutFile;
     std::string Preamble;
     
     TypeDecl *ProtocolTypeDecl;
@@ -135,7 +135,6 @@ namespace {
     
     SmallVector<DeclRefExpr *, 32> BlockDeclRefs;
 
-    
     // Block related declarations.
     SmallVector<ValueDecl *, 8> BlockByCopyDecls;
     llvm::SmallPtrSet<ValueDecl *, 8> BlockByCopyDeclsPtrSet;
@@ -186,6 +185,7 @@ namespace {
 
   public:
     llvm::DenseMap<ObjCMethodDecl*, std::string> MethodInternalNames;
+
     // Top Level Driver code.
     bool HandleTopLevelDecl(DeclGroupRef D) override {
       for (DeclGroupRef::iterator I = D.begin(), E = D.end(); I != E; ++I) {
@@ -235,14 +235,13 @@ namespace {
             RewriteObjCQualifiedInterfaceTypes(TD);
         }
       }
-      return;
     }
     
     void HandleTopLevelSingleDecl(Decl *D);
     void HandleDeclInMainFile(Decl *D);
-    RewriteModernObjC(std::string inFile, raw_ostream *OS,
-                DiagnosticsEngine &D, const LangOptions &LOpts,
-                bool silenceMacroWarn, bool LineInfo);
+    RewriteModernObjC(std::string inFile, std::unique_ptr<raw_ostream> OS,
+                      DiagnosticsEngine &D, const LangOptions &LOpts,
+                      bool silenceMacroWarn, bool LineInfo);
 
     ~RewriteModernObjC() override {}
 
@@ -367,7 +366,6 @@ namespace {
     Stmt *RewriteContinueStmt(ContinueStmt *S);
     void RewriteCastExpr(CStyleCastExpr *CE);
     void RewriteImplicitCastObjCExpr(CastExpr *IE);
-    void RewriteLinkageSpec(LinkageSpecDecl *LSD);
     
     // Computes ivar bitfield group no.
     unsigned ObjCIvarBitfieldGroupNo(ObjCIvarDecl *IV);
@@ -448,9 +446,6 @@ namespace {
                                     std::string &Result);
     void RewriteObjCProtocolMetaData(ObjCProtocolDecl *Protocol,
                                      std::string &Result);
-    void RewriteObjCProtocolListMetaData(
-                   const ObjCList<ObjCProtocolDecl> &Prots,
-                   StringRef prefix, StringRef ClassName, std::string &Result);
     void RewriteObjCClassMetaData(ObjCImplementationDecl *IDecl,
                                           std::string &Result);
     void RewriteClassSetupInitHook(std::string &Result);
@@ -523,7 +518,6 @@ namespace {
     QualType getSuperStructType();
     QualType getConstantStringStructType();
     QualType convertFunctionTypeOfBlocks(const FunctionType *FT);
-    bool BufferContainsPPDirectives(const char *startBuf, const char *endBuf);
     
     void convertToUnqualifiedObjCType(QualType &T) {
       if (T->isObjCQualifiedIdType()) {
@@ -562,6 +556,7 @@ namespace {
       }
       return false;
     }
+
     bool PointerTypeTakesAnyBlockArguments(QualType QT);
     bool PointerTypeTakesAnyObjCQualifiedType(QualType QT);
     void GetExtentOfArgList(const char *Name, const char *&LParen,
@@ -608,8 +603,7 @@ namespace {
                                    /*Pascal=*/false, StrType, SourceLocation());
     }
   };
-  
-}
+} // end anonymous namespace
 
 void RewriteModernObjC::RewriteBlocksInFunctionProtoType(QualType funcType,
                                                    NamedDecl *D) {
@@ -644,12 +638,13 @@ static bool IsHeaderFile(const std::string &Filename) {
   return Ext == "h" || Ext == "hh" || Ext == "H";
 }
 
-RewriteModernObjC::RewriteModernObjC(std::string inFile, raw_ostream* OS,
-                         DiagnosticsEngine &D, const LangOptions &LOpts,
-                         bool silenceMacroWarn,
-                         bool LineInfo)
-      : Diags(D), LangOpts(LOpts), InFileName(inFile), OutFile(OS),
-        SilenceRewriteMacroWarning(silenceMacroWarn), GenerateLineInfo(LineInfo) {
+RewriteModernObjC::RewriteModernObjC(std::string inFile,
+                                     std::unique_ptr<raw_ostream> OS,
+                                     DiagnosticsEngine &D,
+                                     const LangOptions &LOpts,
+                                     bool silenceMacroWarn, bool LineInfo)
+    : Diags(D), LangOpts(LOpts), InFileName(inFile), OutFile(std::move(OS)),
+      SilenceRewriteMacroWarning(silenceMacroWarn), GenerateLineInfo(LineInfo) {
   IsHeader = IsHeaderFile(inFile);
   RewriteFailedDiag = Diags.getCustomDiagID(DiagnosticsEngine::Warning,
                "rewriting sub-expression within a macro (may not be correct)");
@@ -665,10 +660,12 @@ RewriteModernObjC::RewriteModernObjC(std::string inFile, raw_ostream* OS,
 }
 
 std::unique_ptr<ASTConsumer> clang::CreateModernObjCRewriter(
-    const std::string &InFile, raw_ostream *OS, DiagnosticsEngine &Diags,
-    const LangOptions &LOpts, bool SilenceRewriteMacroWarning, bool LineInfo) {
-  return llvm::make_unique<RewriteModernObjC>(
-      InFile, OS, Diags, LOpts, SilenceRewriteMacroWarning, LineInfo);
+    const std::string &InFile, std::unique_ptr<raw_ostream> OS,
+    DiagnosticsEngine &Diags, const LangOptions &LOpts,
+    bool SilenceRewriteMacroWarning, bool LineInfo) {
+  return llvm::make_unique<RewriteModernObjC>(InFile, std::move(OS), Diags,
+                                              LOpts, SilenceRewriteMacroWarning,
+                                              LineInfo);
 }
 
 void RewriteModernObjC::InitializeCommon(ASTContext &context) {
@@ -743,10 +740,6 @@ void RewriteModernObjC::HandleTopLevelSingleDecl(Decl *D) {
     if (PD->isThisDeclarationADefinition())
       RewriteProtocolDecl(PD);
   } else if (LinkageSpecDecl *LSD = dyn_cast<LinkageSpecDecl>(D)) {
-    // FIXME. This will not work in all situations and leaving it out
-    // is harmless.
-    // RewriteLinkageSpec(LSD);
-    
     // Recurse into linkage specifications
     for (DeclContext::decl_iterator DI = LSD->decls_begin(),
                                  DIEnd = LSD->decls_end();
@@ -853,7 +846,6 @@ RewriteModernObjC::getIvarAccessString(ObjCIvarDecl *D) {
   else
     WriteInternalIvarName(ClassDecl, D, IvarOffsetName);
   
-  
   std::string S = "(*(";
   QualType IvarT = D->getType();
   if (D->isBitField())
@@ -1068,11 +1060,11 @@ static void RewriteOneForwardClassDecl(ObjCInterfaceDecl *ForwardDecl,
 
 void RewriteModernObjC::RewriteForwardClassEpilogue(ObjCInterfaceDecl *ClassDecl,
                                               const std::string &typedefString) {
-    SourceLocation startLoc = ClassDecl->getLocStart();
-    const char *startBuf = SM->getCharacterData(startLoc);
-    const char *semiPtr = strchr(startBuf, ';'); 
-    // Replace the @class with typedefs corresponding to the classes.
-    ReplaceText(startLoc, semiPtr-startBuf+1, typedefString);  
+  SourceLocation startLoc = ClassDecl->getLocStart();
+  const char *startBuf = SM->getCharacterData(startLoc);
+  const char *semiPtr = strchr(startBuf, ';'); 
+  // Replace the @class with typedefs corresponding to the classes.
+  ReplaceText(startLoc, semiPtr-startBuf+1, typedefString);  
 }
 
 void RewriteModernObjC::RewriteForwardClassDecl(DeclGroupRef D) {
@@ -1147,7 +1139,7 @@ void RewriteModernObjC::RewriteCategoryDecl(ObjCCategoryDecl *CatDecl) {
     ReplaceText(LocStart, 0, "// ");
   }
   
-  for (auto *I : CatDecl->properties())
+  for (auto *I : CatDecl->instance_properties())
     RewriteProperty(I);
   
   for (auto *I : CatDecl->instance_methods())
@@ -1171,7 +1163,7 @@ void RewriteModernObjC::RewriteProtocolDecl(ObjCProtocolDecl *PDecl) {
     RewriteMethodDeclaration(I);
   for (auto *I : PDecl->class_methods())
     RewriteMethodDeclaration(I);
-  for (auto *I : PDecl->properties())
+  for (auto *I : PDecl->instance_properties())
     RewriteProperty(I);
   
   // Lastly, comment out the @end.
@@ -1212,22 +1204,6 @@ RewriteModernObjC::RewriteForwardProtocolDecl(const SmallVectorImpl<Decl *> &DG)
   ReplaceText(LocStart, 0, "// ");
 }
 
-void 
-RewriteModernObjC::RewriteLinkageSpec(LinkageSpecDecl *LSD) {
-  SourceLocation LocStart = LSD->getExternLoc();
-  if (LocStart.isInvalid())
-    llvm_unreachable("Invalid extern SourceLocation");
-  
-  ReplaceText(LocStart, 0, "// ");
-  if (!LSD->hasBraces())
-    return;
-  // FIXME. We don't rewrite well if '{' is not on same line as 'extern'.
-  SourceLocation LocRBrace = LSD->getRBraceLoc();
-  if (LocRBrace.isInvalid())
-    llvm_unreachable("Invalid rbrace SourceLocation");
-  ReplaceText(LocRBrace, 0, "// ");
-}
-
 void RewriteModernObjC::RewriteTypeIntoString(QualType T, std::string &ResultStr,
                                         const FunctionType *&FPRetType) {
   if (T->isObjCQualifiedIdType())
@@ -1313,7 +1289,7 @@ void RewriteModernObjC::RewriteObjCMethodDecl(const ObjCInterfaceDecl *IDecl,
   ResultStr += " _cmd";
 
   // Method arguments.
-  for (const auto *PDecl : OMD->params()) {
+  for (const auto *PDecl : OMD->parameters()) {
     ResultStr += ", ";
     if (PDecl->getType()->isObjCQualifiedIdType()) {
       ResultStr += "id ";
@@ -1354,6 +1330,7 @@ void RewriteModernObjC::RewriteObjCMethodDecl(const ObjCInterfaceDecl *IDecl,
     }
   }
 }
+
 void RewriteModernObjC::RewriteImplementationDecl(Decl *OID) {
   ObjCImplementationDecl *IMD = dyn_cast<ObjCImplementationDecl>(OID);
   ObjCCategoryImplDecl *CID = dyn_cast<ObjCCategoryImplDecl>(OID);
@@ -1417,7 +1394,7 @@ void RewriteModernObjC::RewriteInterfaceDecl(ObjCInterfaceDecl *ClassDecl) {
     // Mark this typedef as having been written into its c++ equivalent.
     ObjCWrittenInterfaces.insert(ClassDecl->getCanonicalDecl());
   
-    for (auto *I : ClassDecl->properties())
+    for (auto *I : ClassDecl->instance_properties())
       RewriteProperty(I);
     for (auto *I : ClassDecl->instance_methods())
       RewriteMethodDeclaration(I);
@@ -1940,7 +1917,6 @@ void RewriteModernObjC::WarnAboutReturnGotoStmts(Stmt *S)
     Diags.Report(Context->getFullLoc(S->getLocStart()),
                  TryFinallyContainsReturnDiag);
   }
-  return;
 }
 
 Stmt *RewriteModernObjC::RewriteObjCAutoreleasePoolStmt(ObjCAutoreleasePoolStmt  *S) {
@@ -2809,11 +2785,10 @@ Stmt *RewriteModernObjC::RewriteObjCArrayLiteralExpr(ObjCArrayLiteral *Exp) {
                                      Context->UnsignedIntTy, SourceLocation());
   MsgExprs.push_back(cnt);
   
-  
   SmallVector<QualType, 4> ArgTypes;
   ArgTypes.push_back(Context->getObjCClassType());
   ArgTypes.push_back(Context->getObjCSelType());
-  for (const auto *PI : ArrayMethod->params())
+  for (const auto *PI : ArrayMethod->parameters())
     ArgTypes.push_back(PI->getType());
   
   QualType returnType = Exp->getType();
@@ -2921,8 +2896,6 @@ Stmt *RewriteModernObjC::RewriteObjCDictionaryLiteralExpr(ObjCDictionaryLiteral
                              CK_BitCast,
                              DictLiteralKeyME);
   
-  
-  
   // Synthesize a call to objc_msgSend().
   SmallVector<Expr*, 32> MsgExprs;
   SmallVector<Expr*, 4> ClsExprs;
@@ -2959,11 +2932,10 @@ Stmt *RewriteModernObjC::RewriteObjCDictionaryLiteralExpr(ObjCDictionaryLiteral
                                      Context->UnsignedIntTy, SourceLocation());
   MsgExprs.push_back(cnt);
   
-  
   SmallVector<QualType, 8> ArgTypes;
   ArgTypes.push_back(Context->getObjCClassType());
   ArgTypes.push_back(Context->getObjCSelType());
-  for (const auto *PI : DictMethod->params()) {
+  for (const auto *PI : DictMethod->parameters()) {
     QualType T = PI->getType();
     if (const PointerType* PT = T->getAs<PointerType>()) {
       QualType PointeeTy = PT->getPointeeType();
@@ -3176,7 +3148,6 @@ Expr *RewriteModernObjC::SynthMsgSendStretCallExpr(FunctionDecl *MsgSendStretFla
   str += "\t    memset((void*)&s, 0, sizeof(s));\n";
   str += "\t  else\n";
   
-  
   str += "\t    s = (("; str += castType.getAsString(Context->getPrintingPolicy());
   str += ")(void *)objc_msgSend_stret)(receiver, sel";
   for (unsigned i = 2; i < ArgTypes.size(); i++) {
@@ -3188,7 +3159,6 @@ Expr *RewriteModernObjC::SynthMsgSendStretCallExpr(FunctionDecl *MsgSendStretFla
   }
   str += ");\n";
   
-  
   str += "\t}\n";
   str += "\t"; str += returnType.getAsString(Context->getPrintingPolicy());
   str += " s;\n";
@@ -3530,7 +3500,7 @@ Stmt *RewriteModernObjC::SynthMessageExpr(ObjCMessageExpr *Exp,
   ArgTypes.push_back(Context->getObjCSelType());
   if (ObjCMethodDecl *OMD = Exp->getMethodDecl()) {
     // Push any user argument types.
-    for (const auto *PI : OMD->params()) {
+    for (const auto *PI : OMD->parameters()) {
       QualType t = PI->getType()->isObjCQualifiedIdType()
                      ? Context->getObjCIdType()
                      : PI->getType();
@@ -3635,33 +3605,6 @@ Stmt *RewriteModernObjC::RewriteObjCProtocolExpr(ObjCProtocolExpr *Exp) {
   ProtocolExprDecls.insert(Exp->getProtocol()->getCanonicalDecl());
   // delete Exp; leak for now, see RewritePropertyOrImplicitSetter() usage for more info.
   return castExpr;
-
-}
-
-bool RewriteModernObjC::BufferContainsPPDirectives(const char *startBuf,
-                                             const char *endBuf) {
-  while (startBuf < endBuf) {
-    if (*startBuf == '#') {
-      // Skip whitespace.
-      for (++startBuf; startBuf[0] == ' ' || startBuf[0] == '\t'; ++startBuf)
-        ;
-      if (!strncmp(startBuf, "if", strlen("if")) ||
-          !strncmp(startBuf, "ifdef", strlen("ifdef")) ||
-          !strncmp(startBuf, "ifndef", strlen("ifndef")) ||
-          !strncmp(startBuf, "define", strlen("define")) ||
-          !strncmp(startBuf, "undef", strlen("undef")) ||
-          !strncmp(startBuf, "else", strlen("else")) ||
-          !strncmp(startBuf, "elif", strlen("elif")) ||
-          !strncmp(startBuf, "endif", strlen("endif")) ||
-          !strncmp(startBuf, "pragma", strlen("pragma")) ||
-          !strncmp(startBuf, "include", strlen("include")) ||
-          !strncmp(startBuf, "import", strlen("import")) ||
-          !strncmp(startBuf, "include_next", strlen("include_next")))
-        return true;
-    }
-    startBuf++;
-  }
-  return false;
 }
 
 /// IsTagDefinedInsideClass - This routine checks that a named tagged type 
@@ -3688,7 +3631,6 @@ bool RewriteModernObjC::IsTagDefinedInsideClass(ObjCContainerDecl *IDecl,
     TagLocation = ED->getLocation();
     return Context->getSourceManager().isBeforeInTranslationUnit(
                                           IDecl->getLocation(), TagLocation);
-
   }
   return false;
 }
@@ -3820,7 +3762,6 @@ void RewriteModernObjC::RewriteLocallyDefinedNamedAggregates(FieldDecl *fieldDec
     if (IsNamedDefinition)
       GlobalDefinedTags.insert(TD);
   }
-    
 }
 
 unsigned RewriteModernObjC::ObjCIvarBitfieldGroupNo(ObjCIvarDecl *IV) {
@@ -3911,7 +3852,6 @@ void RewriteModernObjC::ObjCIvarBitfieldGroupDecl(ObjCIvarDecl *IV,
   Result += "__GRBF_";
   unsigned GroupNo = ObjCIvarBitfieldGroupNo(IV);
   Result += utostr(GroupNo);
-  return;
 }
 
 /// ObjCIvarBitfieldGroupType - Names struct type for ivar bitfield group.
@@ -3924,7 +3864,6 @@ void RewriteModernObjC::ObjCIvarBitfieldGroupType(ObjCIvarDecl *IV,
   Result += "__T_";
   unsigned GroupNo = ObjCIvarBitfieldGroupNo(IV);
   Result += utostr(GroupNo);
-  return;
 }
 
 /// ObjCIvarBitfieldGroupOffset - Names symbol for ivar bitfield group field offset.
@@ -4063,7 +4002,6 @@ void RewriteModernObjC::RewriteIvarOffsetSymbols(ObjCInterfaceDecl *CDecl,
 // Meta Data Emission
 //===----------------------------------------------------------------------===//
 
-
 /// RewriteImplementations - This routine rewrites all method implementations
 /// and emits meta-data.
 
@@ -4543,8 +4481,6 @@ void RewriteModernObjC::GetBlockDeclRefExprs(Stmt *S) {
         HasLocalVariableExternalStorage(DRE->getDecl()))
       // FIXME: Handle enums.
       BlockDeclRefs.push_back(DRE);
-
-  return;
 }
 
 void RewriteModernObjC::GetInnerBlockDeclRefExprs(Stmt *S,
@@ -4572,8 +4508,6 @@ void RewriteModernObjC::GetInnerBlockDeclRefExprs(Stmt *S,
           ImportedLocalExternalDecls.insert(Var);
     }
   }
-  
-  return;
 }
 
 /// convertObjCTypeToCStyleType - This routine converts such objc types
@@ -4658,7 +4592,7 @@ Stmt *RewriteModernObjC::SynthesizeBlockCall(CallExpr *Exp, const Expr *BlockExp
                = dyn_cast<PseudoObjectExpr>(BlockExp)) {
     CPT = POE->getType()->castAs<BlockPointerType>();
   } else {
-    assert(1 && "RewriteBlockClass: Bad type");
+    assert(false && "RewriteBlockClass: Bad type");
   }
   assert(CPT && "RewriteBlockClass: Bad type");
   const FunctionType *FT = CPT->getPointeeType()->getAs<FunctionType>();
@@ -4828,7 +4762,6 @@ void RewriteModernObjC::RewriteCastExpr(CStyleCastExpr *CE) {
       break;
     }
   }
-  return;
 }
 
 void RewriteModernObjC::RewriteImplicitCastObjCExpr(CastExpr *IC) {
@@ -4844,8 +4777,6 @@ void RewriteModernObjC::RewriteImplicitCastObjCExpr(CastExpr *IC) {
   Str += TypeString;
   Str += ")";
   InsertText(IC->getSubExpr()->getLocStart(), Str);
-
-  return;
 }
 
 void RewriteModernObjC::RewriteBlockPointerFunctionArgs(FunctionDecl *FD) {
@@ -4880,7 +4811,6 @@ void RewriteModernObjC::RewriteBlockPointerFunctionArgs(FunctionDecl *FD) {
       break;
     }
   }
-  return;
 }
 
 bool RewriteModernObjC::PointerTypeTakesAnyBlockArguments(QualType QT) {
@@ -5017,11 +4947,8 @@ void RewriteModernObjC::RewriteBlockPointerDecl(NamedDecl *ND) {
     OrigLength++;
   }
   ReplaceText(Start, OrigLength, buf);
-  
-  return;
 }
 
-
 /// SynthesizeByrefCopyDestroyHelper - This routine synthesizes:
 /// void __Block_byref_id_object_copy(struct Block_byref_id_object *dst,
 ///                    struct Block_byref_id_object *src) {
@@ -5242,7 +5169,6 @@ void RewriteModernObjC::RewriteByRefVar(VarDecl *ND, bool firstDecl,
     
     InsertText(separatorLoc, lastDecl ? "}" : "};\n");
   }
-  return;
 }
 
 void RewriteModernObjC::CollectBlockDeclRefInfo(BlockExpr *Exp) {
@@ -5284,7 +5210,6 @@ FunctionDecl *RewriteModernObjC::SynthBlockInitFunctionDecl(StringRef name) {
 
 Stmt *RewriteModernObjC::SynthBlockInitExpr(BlockExpr *Exp,
                      const SmallVectorImpl<DeclRefExpr *> &InnerBlockDeclRefs) {
-  
   const BlockDecl *block = Exp->getBlockDecl();
   
   Blocks.push_back(Exp);
@@ -5292,7 +5217,7 @@ Stmt *RewriteModernObjC::SynthBlockInitExpr(BlockExpr *Exp,
   CollectBlockDeclRefInfo(Exp);
   
   // Add inner imported variables now used in current block.
- int countOfInnerDecls = 0;
+  int countOfInnerDecls = 0;
   if (!InnerBlockDeclRefs.empty()) {
     for (unsigned i = 0; i < InnerBlockDeclRefs.size(); i++) {
       DeclRefExpr *Exp = InnerBlockDeclRefs[i];
@@ -6995,7 +6920,8 @@ void RewriteModernObjC::RewriteObjCProtocolMetaData(ObjCProtocolDecl *PDecl,
                                   PDecl->getNameAsString(), false);
   
   // Protocol's property metadata.
-  SmallVector<ObjCPropertyDecl *, 8> ProtocolProperties(PDecl->properties());
+  SmallVector<ObjCPropertyDecl *, 8> ProtocolProperties(
+      PDecl->instance_properties());
   Write_prop_list_t_initializer(*this, Context, Result, ProtocolProperties,
                                  /* Container */nullptr,
                                  "_OBJC_PROTOCOL_PROPERTIES_",
@@ -7007,7 +6933,7 @@ void RewriteModernObjC::RewriteObjCProtocolMetaData(ObjCProtocolDecl *PDecl,
     Result += "static ";
   Result += "struct _protocol_t _OBJC_PROTOCOL_";
   Result += PDecl->getNameAsString();
-  Result += " __attribute__ ((used, section (\"__DATA,__datacoal_nt,coalesced\"))) = {\n";
+  Result += " __attribute__ ((used)) = {\n";
   Result += "\t0,\n"; // id is; is null
   Result += "\t\""; Result += PDecl->getNameAsString(); Result += "\",\n";
   if (SuperProtocols.size() > 0) {
@@ -7072,52 +6998,6 @@ void RewriteModernObjC::RewriteObjCProtocolMetaData(ObjCProtocolDecl *PDecl,
   // Mark this protocol as having been generated.
   if (!ObjCSynthesizedProtocols.insert(PDecl->getCanonicalDecl()).second)
     llvm_unreachable("protocol already synthesized");
-  
-}
-
-void RewriteModernObjC::RewriteObjCProtocolListMetaData(
-                                const ObjCList<ObjCProtocolDecl> &Protocols,
-                                StringRef prefix, StringRef ClassName,
-                                std::string &Result) {
-  if (Protocols.empty()) return;
-  
-  for (unsigned i = 0; i != Protocols.size(); i++)
-    RewriteObjCProtocolMetaData(Protocols[i], Result);
-  
-  // Output the top lovel protocol meta-data for the class.
-  /* struct _objc_protocol_list {
-   struct _objc_protocol_list *next;
-   int    protocol_count;
-   struct _objc_protocol *class_protocols[];
-   }
-   */
-  Result += "\n";
-  if (LangOpts.MicrosoftExt)
-    Result += "__declspec(allocate(\".cat_cls_meth$B\")) ";
-  Result += "static struct {\n";
-  Result += "\tstruct _objc_protocol_list *next;\n";
-  Result += "\tint    protocol_count;\n";
-  Result += "\tstruct _objc_protocol *class_protocols[";
-  Result += utostr(Protocols.size());
-  Result += "];\n} _OBJC_";
-  Result += prefix;
-  Result += "_PROTOCOLS_";
-  Result += ClassName;
-  Result += " __attribute__ ((used, section (\"__OBJC, __cat_cls_meth\")))= "
-  "{\n\t0, ";
-  Result += utostr(Protocols.size());
-  Result += "\n";
-  
-  Result += "\t,{&_OBJC_PROTOCOL_";
-  Result += Protocols[0]->getNameAsString();
-  Result += " \n";
-  
-  for (unsigned i = 1; i != Protocols.size(); i++) {
-    Result += "\t ,&_OBJC_PROTOCOL_";
-    Result += Protocols[i]->getNameAsString();
-    Result += "\n";
-  }
-  Result += "\t }\n};\n";
 }
 
 /// hasObjCExceptionAttribute - Return true if this class or any super
@@ -7208,19 +7088,18 @@ void RewriteModernObjC::RewriteObjCClassMetaData(ObjCImplementationDecl *IDecl,
                                   IDecl->getNameAsString());
   
   // Protocol's property metadata.
-  SmallVector<ObjCPropertyDecl *, 8> ClassProperties(CDecl->properties());
+  SmallVector<ObjCPropertyDecl *, 8> ClassProperties(
+      CDecl->instance_properties());
   Write_prop_list_t_initializer(*this, Context, Result, ClassProperties,
                                  /* Container */IDecl,
                                  "_OBJC_$_PROP_LIST_",
                                  CDecl->getNameAsString());
-
   
   // Data for initializing _class_ro_t  metaclass meta-data
   uint32_t flags = CLS_META;
   std::string InstanceSize;
   std::string InstanceStart;
   
-  
   bool classIsHidden = CDecl->getVisibility() == HiddenVisibility;
   if (classIsHidden)
     flags |= OBJC2_CLS_HIDDEN;
@@ -7288,7 +7167,6 @@ void RewriteModernObjC::RewriteObjCClassMetaData(ObjCImplementationDecl *IDecl,
   
   if (ImplementationIsNonLazy(IDecl))
     DefinedNonLazyClasses.push_back(CDecl);
-                
 }
 
 void RewriteModernObjC::RewriteClassSetupInitHook(std::string &Result) {
@@ -7453,7 +7331,8 @@ void RewriteModernObjC::RewriteObjCCategoryImplDecl(ObjCCategoryImplDecl *IDecl,
                                   FullCategoryName);
   
   // Protocol's property metadata.
-  SmallVector<ObjCPropertyDecl *, 8> ClassProperties(CDecl->properties());
+  SmallVector<ObjCPropertyDecl *, 8> ClassProperties(
+      CDecl->instance_properties());
   Write_prop_list_t_initializer(*this, Context, Result, ClassProperties,
                                 /* Container */IDecl,
                                 "_OBJC_$_PROP_LIST_",
@@ -7470,7 +7349,6 @@ void RewriteModernObjC::RewriteObjCCategoryImplDecl(ObjCCategoryImplDecl *IDecl,
   // Determine if this category is also "non-lazy".
   if (ImplementationIsNonLazy(IDecl))
     DefinedNonLazyCategories.push_back(CDecl);
-    
 }
 
 void RewriteModernObjC::RewriteCategorySetupInitHook(std::string &Result) {
@@ -7705,4 +7583,4 @@ Stmt *RewriteModernObjC::RewriteObjCIvarRefExpr(ObjCIvarRefExpr *IV) {
     return Replacement;  
 }
 
-#endif
+#endif // CLANG_ENABLE_OBJC_REWRITER
diff --git a/contrib/llvm/tools/clang/lib/Frontend/Rewrite/RewriteObjC.cpp b/contrib/llvm/tools/clang/lib/Frontend/Rewrite/RewriteObjC.cpp
index e0ddadb12306..5967e40bfed9 100644
--- a/contrib/llvm/tools/clang/lib/Frontend/Rewrite/RewriteObjC.cpp
+++ b/contrib/llvm/tools/clang/lib/Frontend/Rewrite/RewriteObjC.cpp
@@ -37,7 +37,6 @@ using llvm::utostr;
 namespace {
   class RewriteObjC : public ASTConsumer {
   protected:
-    
     enum {
       BLOCK_FIELD_IS_OBJECT   =  3,  /* id, NSObject, __attribute__((NSObject)),
                                         block, ... */
@@ -72,7 +71,7 @@ namespace {
     Stmt *CurrentBody;
     ParentMap *PropParentMap; // created lazily.
     std::string InFileName;
-    raw_ostream* OutFile;
+    std::unique_ptr<raw_ostream> OutFile;
     std::string Preamble;
     
     TypeDecl *ProtocolTypeDecl;
@@ -158,14 +157,15 @@ namespace {
         : R(R), SavedValue(R.DisableReplaceStmt) {
         R.DisableReplaceStmt = true;
       }
+
       ~DisableReplaceStmtScope() {
         R.DisableReplaceStmt = SavedValue;
       }
     };
+
     void InitializeCommon(ASTContext &context);
 
   public:
-
     // Top Level Driver code.
     bool HandleTopLevelDecl(DeclGroupRef D) override {
       for (DeclGroupRef::iterator I = D.begin(), E = D.end(); I != E; ++I) {
@@ -187,9 +187,10 @@ namespace {
       }
       return true;
     }
+
     void HandleTopLevelSingleDecl(Decl *D);
     void HandleDeclInMainFile(Decl *D);
-    RewriteObjC(std::string inFile, raw_ostream *OS,
+    RewriteObjC(std::string inFile, std::unique_ptr<raw_ostream> OS,
                 DiagnosticsEngine &D, const LangOptions &LOpts,
                 bool silenceMacroWarn);
 
@@ -505,12 +506,10 @@ namespace {
   
   class RewriteObjCFragileABI : public RewriteObjC {
   public:
-    
-    RewriteObjCFragileABI(std::string inFile, raw_ostream *OS,
-                DiagnosticsEngine &D, const LangOptions &LOpts,
-                bool silenceMacroWarn) : RewriteObjC(inFile, OS,
-                                                     D, LOpts,
-                                                     silenceMacroWarn) {}
+    RewriteObjCFragileABI(std::string inFile, std::unique_ptr<raw_ostream> OS,
+                          DiagnosticsEngine &D, const LangOptions &LOpts,
+                          bool silenceMacroWarn)
+        : RewriteObjC(inFile, std::move(OS), D, LOpts, silenceMacroWarn) {}
 
     ~RewriteObjCFragileABI() override {}
     void Initialize(ASTContext &context) override;
@@ -540,7 +539,7 @@ namespace {
                                       std::string &Result) override;
     Stmt *RewriteObjCIvarRefExpr(ObjCIvarRefExpr *IV) override;
   };
-}
+} // end anonymous namespace
 
 void RewriteObjC::RewriteBlocksInFunctionProtoType(QualType funcType,
                                                    NamedDecl *D) {
@@ -575,11 +574,11 @@ static bool IsHeaderFile(const std::string &Filename) {
   return Ext == "h" || Ext == "hh" || Ext == "H";
 }
 
-RewriteObjC::RewriteObjC(std::string inFile, raw_ostream* OS,
+RewriteObjC::RewriteObjC(std::string inFile, std::unique_ptr<raw_ostream> OS,
                          DiagnosticsEngine &D, const LangOptions &LOpts,
                          bool silenceMacroWarn)
-      : Diags(D), LangOpts(LOpts), InFileName(inFile), OutFile(OS),
-        SilenceRewriteMacroWarning(silenceMacroWarn) {
+    : Diags(D), LangOpts(LOpts), InFileName(inFile), OutFile(std::move(OS)),
+      SilenceRewriteMacroWarning(silenceMacroWarn) {
   IsHeader = IsHeaderFile(inFile);
   RewriteFailedDiag = Diags.getCustomDiagID(DiagnosticsEngine::Warning,
                "rewriting sub-expression within a macro (may not be correct)");
@@ -590,11 +589,12 @@ RewriteObjC::RewriteObjC(std::string inFile, raw_ostream* OS,
 }
 
 std::unique_ptr<ASTConsumer>
-clang::CreateObjCRewriter(const std::string &InFile, raw_ostream *OS,
+clang::CreateObjCRewriter(const std::string &InFile,
+                          std::unique_ptr<raw_ostream> OS,
                           DiagnosticsEngine &Diags, const LangOptions &LOpts,
                           bool SilenceRewriteMacroWarning) {
-  return llvm::make_unique<RewriteObjCFragileABI>(InFile, OS, Diags, LOpts,
-                                                  SilenceRewriteMacroWarning);
+  return llvm::make_unique<RewriteObjCFragileABI>(
+      InFile, std::move(OS), Diags, LOpts, SilenceRewriteMacroWarning);
 }
 
 void RewriteObjC::InitializeCommon(ASTContext &context) {
@@ -969,7 +969,7 @@ void RewriteObjC::RewriteCategoryDecl(ObjCCategoryDecl *CatDecl) {
   // FIXME: handle category headers that are declared across multiple lines.
   ReplaceText(LocStart, 0, "// ");
 
-  for (auto *I : CatDecl->properties())
+  for (auto *I : CatDecl->instance_properties())
     RewriteProperty(I);  
   for (auto *I : CatDecl->instance_methods())
     RewriteMethodDeclaration(I);
@@ -992,7 +992,7 @@ void RewriteObjC::RewriteProtocolDecl(ObjCProtocolDecl *PDecl) {
     RewriteMethodDeclaration(I);
   for (auto *I : PDecl->class_methods())
     RewriteMethodDeclaration(I);
-  for (auto *I : PDecl->properties())
+  for (auto *I : PDecl->instance_properties())
     RewriteProperty(I);
   
   // Lastly, comment out the @end.
@@ -1118,7 +1118,7 @@ void RewriteObjC::RewriteObjCMethodDecl(const ObjCInterfaceDecl *IDecl,
   ResultStr += " _cmd";
 
   // Method arguments.
-  for (const auto *PDecl : OMD->params()) {
+  for (const auto *PDecl : OMD->parameters()) {
     ResultStr += ", ";
     if (PDecl->getType()->isObjCQualifiedIdType()) {
       ResultStr += "id ";
@@ -1159,6 +1159,7 @@ void RewriteObjC::RewriteObjCMethodDecl(const ObjCInterfaceDecl *IDecl,
     }
   }
 }
+
 void RewriteObjC::RewriteImplementationDecl(Decl *OID) {
   ObjCImplementationDecl *IMD = dyn_cast<ObjCImplementationDecl>(OID);
   ObjCCategoryImplDecl *CID = dyn_cast<ObjCCategoryImplDecl>(OID);
@@ -1210,7 +1211,7 @@ void RewriteObjC::RewriteInterfaceDecl(ObjCInterfaceDecl *ClassDecl) {
   }
   RewriteObjCInternalStruct(ClassDecl, ResultStr);
 
-  for (auto *I : ClassDecl->properties())
+  for (auto *I : ClassDecl->instance_properties())
     RewriteProperty(I);
   for (auto *I : ClassDecl->instance_methods())
     RewriteMethodDeclaration(I);
@@ -1720,7 +1721,6 @@ void RewriteObjC::WarnAboutReturnGotoStmts(Stmt *S)
     Diags.Report(Context->getFullLoc(S->getLocStart()),
                  TryFinallyContainsReturnDiag);
   }
-  return;
 }
 
 void RewriteObjC::HasReturnStmts(Stmt *S, bool &hasReturns) 
@@ -1730,32 +1730,29 @@ void RewriteObjC::HasReturnStmts(Stmt *S, bool &hasReturns)
     if (SubStmt)
       HasReturnStmts(SubStmt, hasReturns);
 
- if (isa<ReturnStmt>(S))
-   hasReturns = true;
- return;
+  if (isa<ReturnStmt>(S))
+    hasReturns = true;
 }
 
 void RewriteObjC::RewriteTryReturnStmts(Stmt *S) {
- // Perform a bottom up traversal of all children.
- for (Stmt *SubStmt : S->children())
-   if (SubStmt) {
-     RewriteTryReturnStmts(SubStmt);
-   }
- if (isa<ReturnStmt>(S)) {
-   SourceLocation startLoc = S->getLocStart();
-   const char *startBuf = SM->getCharacterData(startLoc);
-
-   const char *semiBuf = strchr(startBuf, ';');
-   assert((*semiBuf == ';') && "RewriteTryReturnStmts: can't find ';'");
-   SourceLocation onePastSemiLoc = startLoc.getLocWithOffset(semiBuf-startBuf+1);
+  // Perform a bottom up traversal of all children.
+  for (Stmt *SubStmt : S->children())
+    if (SubStmt) {
+      RewriteTryReturnStmts(SubStmt);
+    }
+  if (isa<ReturnStmt>(S)) {
+    SourceLocation startLoc = S->getLocStart();
+    const char *startBuf = SM->getCharacterData(startLoc);
+    const char *semiBuf = strchr(startBuf, ';');
+    assert((*semiBuf == ';') && "RewriteTryReturnStmts: can't find ';'");
+    SourceLocation onePastSemiLoc = startLoc.getLocWithOffset(semiBuf-startBuf+1);
 
-   std::string buf;
-   buf = "{ objc_exception_try_exit(&_stack); return";
+    std::string buf;
+    buf = "{ objc_exception_try_exit(&_stack); return";
    
-   ReplaceText(startLoc, 6, buf);
-   InsertText(onePastSemiLoc, "}");
- }
- return;
+    ReplaceText(startLoc, 6, buf);
+    InsertText(onePastSemiLoc, "}");
+  }
 }
 
 void RewriteObjC::RewriteSyncReturnStmts(Stmt *S, std::string syncExitBuf) {
@@ -1780,7 +1777,6 @@ void RewriteObjC::RewriteSyncReturnStmts(Stmt *S, std::string syncExitBuf) {
     ReplaceText(startLoc, 6, buf);
     InsertText(onePastSemiLoc, "}");
   }
-  return;
 }
 
 Stmt *RewriteObjC::RewriteObjCTryStmt(ObjCAtTryStmt *S) {
@@ -2287,7 +2283,6 @@ void RewriteObjC::RewriteBlockPointerTypeVariable(std::string& Str,
   }
 }
 
-
 void RewriteObjC::RewriteBlockLiteralFunctionDecl(FunctionDecl *FD) {
   SourceLocation FunLocStart = FD->getTypeSpecStartLoc();
   const FunctionType *funcType = FD->getType()->getAs<FunctionType>();
@@ -2615,10 +2610,8 @@ CallExpr *RewriteObjC::SynthMsgSendStretCallExpr(FunctionDecl *MsgSendStretFlavo
   CallExpr *STCE = new (Context) CallExpr(
       *Context, PE, MsgExprs, FT->getReturnType(), VK_RValue, SourceLocation());
   return STCE;
-  
 }
 
-
 Stmt *RewriteObjC::SynthMessageExpr(ObjCMessageExpr *Exp,
                                     SourceLocation StartLoc,
                                     SourceLocation EndLoc) {
@@ -2924,7 +2917,7 @@ Stmt *RewriteObjC::SynthMessageExpr(ObjCMessageExpr *Exp,
   ArgTypes.push_back(Context->getObjCSelType());
   if (ObjCMethodDecl *OMD = Exp->getMethodDecl()) {
     // Push any user argument types.
-    for (const auto *PI : OMD->params()) {
+    for (const auto *PI : OMD->parameters()) {
       QualType t = PI->getType()->isObjCQualifiedIdType()
                      ? Context->getObjCIdType()
                      : PI->getType();
@@ -3059,7 +3052,6 @@ Stmt *RewriteObjC::RewriteObjCProtocolExpr(ObjCProtocolExpr *Exp) {
   ProtocolExprDecls.insert(Exp->getProtocol()->getCanonicalDecl());
   // delete Exp; leak for now, see RewritePropertyOrImplicitSetter() usage for more info.
   return castExpr;
-
 }
 
 bool RewriteObjC::BufferContainsPPDirectives(const char *startBuf,
@@ -3224,7 +3216,6 @@ void RewriteObjC::RewriteObjCInternalStruct(ObjCInterfaceDecl *CDecl,
 // Meta Data Emission
 //===----------------------------------------------------------------------===//
 
-
 /// RewriteImplementations - This routine rewrites all method implementations
 /// and emits meta-data.
 
@@ -3665,8 +3656,6 @@ void RewriteObjC::GetBlockDeclRefExprs(Stmt *S) {
         HasLocalVariableExternalStorage(DRE->getDecl()))
       // FIXME: Handle enums.
       BlockDeclRefs.push_back(DRE);
-
-  return;
 }
 
 void RewriteObjC::GetInnerBlockDeclRefExprs(Stmt *S,
@@ -3694,8 +3683,6 @@ void RewriteObjC::GetInnerBlockDeclRefExprs(Stmt *S,
           ImportedLocalExternalDecls.insert(Var);
     }
   }
-  
-  return;
 }
 
 /// convertFunctionTypeOfBlocks - This routine converts a function type
@@ -3761,7 +3748,7 @@ Stmt *RewriteObjC::SynthesizeBlockCall(CallExpr *Exp, const Expr *BlockExp) {
                = dyn_cast<PseudoObjectExpr>(BlockExp)) {
     CPT = POE->getType()->castAs<BlockPointerType>();
   } else {
-    assert(1 && "RewriteBlockClass: Bad type");
+    assert(false && "RewriteBlockClass: Bad type");
   }
   assert(CPT && "RewriteBlockClass: Bad type");
   const FunctionType *FT = CPT->getPointeeType()->getAs<FunctionType>();
@@ -3931,7 +3918,6 @@ void RewriteObjC::RewriteCastExpr(CStyleCastExpr *CE) {
       break;
     }
   }
-  return;
 }
 
 void RewriteObjC::RewriteBlockPointerFunctionArgs(FunctionDecl *FD) {
@@ -3966,7 +3952,6 @@ void RewriteObjC::RewriteBlockPointerFunctionArgs(FunctionDecl *FD) {
       break;
     }
   }
-  return;
 }
 
 bool RewriteObjC::PointerTypeTakesAnyBlockArguments(QualType QT) {
@@ -4103,11 +4088,8 @@ void RewriteObjC::RewriteBlockPointerDecl(NamedDecl *ND) {
     OrigLength++;
   }
   ReplaceText(Start, OrigLength, buf);
-  
-  return;
 }
 
-
 /// SynthesizeByrefCopyDestroyHelper - This routine synthesizes:
 /// void __Block_byref_id_object_copy(struct Block_byref_id_object *dst,
 ///                    struct Block_byref_id_object *src) {
@@ -4328,7 +4310,6 @@ void RewriteObjC::RewriteByRefVar(VarDecl *ND) {
 
     InsertText(semiLoc, "}");
   }
-  return;
 }
 
 void RewriteObjC::CollectBlockDeclRefInfo(BlockExpr *Exp) {
@@ -4494,7 +4475,6 @@ Stmt *RewriteObjC::SynthBlockInitExpr(BlockExpr *Exp,
           Exp = new (Context) UnaryOperator(Exp, UO_AddrOf, QT, VK_RValue,
                                             OK_Ordinary, SourceLocation());
         }
-        
       }
       InitExprs.push_back(Exp);
     }
@@ -5241,7 +5221,6 @@ void RewriteObjCFragileABI::RewriteObjCProtocolMetaData(
   // Mark this protocol as having been generated.
   if (!ObjCSynthesizedProtocols.insert(PDecl->getCanonicalDecl()).second)
     llvm_unreachable("protocol already synthesized");
-  
 }
 
 void RewriteObjCFragileABI::RewriteObjCProtocolListMetaData(
@@ -5910,4 +5889,4 @@ Stmt *RewriteObjCFragileABI::RewriteObjCIvarRefExpr(ObjCIvarRefExpr *IV) {
   return Replacement;  
 }
 
-#endif
+#endif // CLANG_ENABLE_OBJC_REWRITER
diff --git a/contrib/llvm/tools/clang/lib/Frontend/SerializedDiagnosticPrinter.cpp b/contrib/llvm/tools/clang/lib/Frontend/SerializedDiagnosticPrinter.cpp
index 1bf10d276945..5c42406876b6 100644
--- a/contrib/llvm/tools/clang/lib/Frontend/SerializedDiagnosticPrinter.cpp
+++ b/contrib/llvm/tools/clang/lib/Frontend/SerializedDiagnosticPrinter.cpp
@@ -24,6 +24,7 @@
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Support/raw_ostream.h"
+#include <utility>
 #include <vector>
 
 using namespace clang;
@@ -147,7 +148,7 @@ class SDiagsWriter : public DiagnosticConsumer {
 
   explicit SDiagsWriter(IntrusiveRefCntPtr<SharedState> State)
       : LangOpts(nullptr), OriginalInstance(false), MergeChildRecords(false),
-        State(State) {}
+        State(std::move(State)) {}
 
 public:
   SDiagsWriter(StringRef File, DiagnosticOptions *Diags, bool MergeChildRecords)
diff --git a/contrib/llvm/tools/clang/lib/Frontend/TestModuleFileExtension.cpp b/contrib/llvm/tools/clang/lib/Frontend/TestModuleFileExtension.cpp
index d1b20c4a80b3..b43d45f7ae46 100644
--- a/contrib/llvm/tools/clang/lib/Frontend/TestModuleFileExtension.cpp
+++ b/contrib/llvm/tools/clang/lib/Frontend/TestModuleFileExtension.cpp
@@ -38,9 +38,7 @@ void TestModuleFileExtension::Writer::writeExtensionContents(
     OS << "Hello from " << Ext->BlockName << " v" << Ext->MajorVersion << "."
        << Ext->MinorVersion;
   }
-  SmallVector<uint64_t, 4> Record;
-  Record.push_back(FIRST_EXTENSION_RECORD_ID);
-  Record.push_back(Message.size());
+  uint64_t Record[] = {FIRST_EXTENSION_RECORD_ID, Message.size()};
   Stream.EmitRecordWithBlob(Abbrev, Record, Message);
 }
 
diff --git a/contrib/llvm/tools/clang/lib/Frontend/TextDiagnostic.cpp b/contrib/llvm/tools/clang/lib/Frontend/TextDiagnostic.cpp
index d4e156d44582..977af079a77a 100644
--- a/contrib/llvm/tools/clang/lib/Frontend/TextDiagnostic.cpp
+++ b/contrib/llvm/tools/clang/lib/Frontend/TextDiagnostic.cpp
@@ -819,7 +819,15 @@ void TextDiagnostic::emitDiagnosticLoc(SourceLocation Loc, PresumedLoc PLoc,
   switch (DiagOpts->getFormat()) {
   case DiagnosticOptions::Clang:
   case DiagnosticOptions::Vi:    OS << ':';    break;
-  case DiagnosticOptions::MSVC:  OS << ") : "; break;
+  case DiagnosticOptions::MSVC:
+    // MSVC2013 and before print 'file(4) : error'. MSVC2015 gets rid of the
+    // space and prints 'file(4): error'.
+    OS << ')';
+    if (LangOpts.MSCompatibilityVersion &&
+        !LangOpts.isCompatibleWithMSVC(LangOptions::MSVC2015))
+      OS << ' ';
+    OS << ": ";
+    break;
   }
 
   if (DiagOpts->ShowSourceRanges && !Ranges.empty()) {
@@ -875,7 +883,7 @@ void TextDiagnostic::emitDiagnosticLoc(SourceLocation Loc, PresumedLoc PLoc,
 void TextDiagnostic::emitIncludeLocation(SourceLocation Loc,
                                          PresumedLoc PLoc,
                                          const SourceManager &SM) {
-  if (DiagOpts->ShowLocation && PLoc.getFilename())
+  if (DiagOpts->ShowLocation && PLoc.isValid())
     OS << "In file included from " << PLoc.getFilename() << ':'
        << PLoc.getLine() << ":\n";
   else
@@ -885,7 +893,7 @@ void TextDiagnostic::emitIncludeLocation(SourceLocation Loc,
 void TextDiagnostic::emitImportLocation(SourceLocation Loc, PresumedLoc PLoc,
                                         StringRef ModuleName,
                                         const SourceManager &SM) {
-  if (DiagOpts->ShowLocation && PLoc.getFilename())
+  if (DiagOpts->ShowLocation && PLoc.isValid())
     OS << "In module '" << ModuleName << "' imported from "
        << PLoc.getFilename() << ':' << PLoc.getLine() << ":\n";
   else
@@ -896,7 +904,7 @@ void TextDiagnostic::emitBuildingModuleLocation(SourceLocation Loc,
                                                 PresumedLoc PLoc,
                                                 StringRef ModuleName,
                                                 const SourceManager &SM) {
-  if (DiagOpts->ShowLocation && PLoc.getFilename())
+  if (DiagOpts->ShowLocation && PLoc.isValid())
     OS << "While building module '" << ModuleName << "' imported from "
       << PLoc.getFilename() << ':' << PLoc.getLine() << ":\n";
   else
@@ -1082,10 +1090,13 @@ void TextDiagnostic::emitSnippetAndCaret(
 
   // Get information about the buffer it points into.
   bool Invalid = false;
-  const char *BufStart = SM.getBufferData(FID, &Invalid).data();
+  StringRef BufData = SM.getBufferData(FID, &Invalid);
   if (Invalid)
     return;
 
+  const char *BufStart = BufData.data();
+  const char *BufEnd = BufStart + BufData.size();
+
   unsigned LineNo = SM.getLineNumber(FID, FileOffset);
   unsigned ColNo = SM.getColumnNumber(FID, FileOffset);
   
@@ -1101,15 +1112,20 @@ void TextDiagnostic::emitSnippetAndCaret(
   // Compute the line end.  Scan forward from the error position to the end of
   // the line.
   const char *LineEnd = TokPtr;
-  while (*LineEnd != '\n' && *LineEnd != '\r' && *LineEnd != '\0')
+  while (*LineEnd != '\n' && *LineEnd != '\r' && LineEnd != BufEnd)
     ++LineEnd;
 
   // Arbitrarily stop showing snippets when the line is too long.
   if (size_t(LineEnd - LineStart) > MaxLineLengthToPrint)
     return;
 
+  // Trim trailing null-bytes.
+  StringRef Line(LineStart, LineEnd - LineStart);
+  while (Line.size() > ColNo && Line.back() == '\0')
+    Line = Line.drop_back();
+
   // Copy the line of code into an std::string for ease of manipulation.
-  std::string SourceLine(LineStart, LineEnd);
+  std::string SourceLine(Line.begin(), Line.end());
 
   // Build the byte to column map.
   const SourceColumnMap sourceColMap(SourceLine, DiagOpts->TabStop);
diff --git a/contrib/llvm/tools/clang/lib/FrontendTool/ExecuteCompilerInvocation.cpp b/contrib/llvm/tools/clang/lib/FrontendTool/ExecuteCompilerInvocation.cpp
index 79cf0049a7b2..509c326d1597 100644
--- a/contrib/llvm/tools/clang/lib/FrontendTool/ExecuteCompilerInvocation.cpp
+++ b/contrib/llvm/tools/clang/lib/FrontendTool/ExecuteCompilerInvocation.cpp
@@ -31,33 +31,34 @@
 using namespace clang;
 using namespace llvm::opt;
 
-static FrontendAction *CreateFrontendBaseAction(CompilerInstance &CI) {
+static std::unique_ptr<FrontendAction>
+CreateFrontendBaseAction(CompilerInstance &CI) {
   using namespace clang::frontend;
   StringRef Action("unknown");
   (void)Action;
 
   switch (CI.getFrontendOpts().ProgramAction) {
-  case ASTDeclList:            return new ASTDeclListAction();
-  case ASTDump:                return new ASTDumpAction();
-  case ASTPrint:               return new ASTPrintAction();
-  case ASTView:                return new ASTViewAction();
-  case DumpRawTokens:          return new DumpRawTokensAction();
-  case DumpTokens:             return new DumpTokensAction();
-  case EmitAssembly:           return new EmitAssemblyAction();
-  case EmitBC:                 return new EmitBCAction();
-  case EmitHTML:               return new HTMLPrintAction();
-  case EmitLLVM:               return new EmitLLVMAction();
-  case EmitLLVMOnly:           return new EmitLLVMOnlyAction();
-  case EmitCodeGenOnly:        return new EmitCodeGenOnlyAction();
-  case EmitObj:                return new EmitObjAction();
-  case FixIt:                  return new FixItAction();
-  case GenerateModule:         return new GenerateModuleAction;
-  case GeneratePCH:            return new GeneratePCHAction;
-  case GeneratePTH:            return new GeneratePTHAction();
-  case InitOnly:               return new InitOnlyAction();
-  case ParseSyntaxOnly:        return new SyntaxOnlyAction();
-  case ModuleFileInfo:         return new DumpModuleInfoAction();
-  case VerifyPCH:              return new VerifyPCHAction();
+  case ASTDeclList:            return llvm::make_unique<ASTDeclListAction>();
+  case ASTDump:                return llvm::make_unique<ASTDumpAction>();
+  case ASTPrint:               return llvm::make_unique<ASTPrintAction>();
+  case ASTView:                return llvm::make_unique<ASTViewAction>();
+  case DumpRawTokens:          return llvm::make_unique<DumpRawTokensAction>();
+  case DumpTokens:             return llvm::make_unique<DumpTokensAction>();
+  case EmitAssembly:           return llvm::make_unique<EmitAssemblyAction>();
+  case EmitBC:                 return llvm::make_unique<EmitBCAction>();
+  case EmitHTML:               return llvm::make_unique<HTMLPrintAction>();
+  case EmitLLVM:               return llvm::make_unique<EmitLLVMAction>();
+  case EmitLLVMOnly:           return llvm::make_unique<EmitLLVMOnlyAction>();
+  case EmitCodeGenOnly:        return llvm::make_unique<EmitCodeGenOnlyAction>();
+  case EmitObj:                return llvm::make_unique<EmitObjAction>();
+  case FixIt:                  return llvm::make_unique<FixItAction>();
+  case GenerateModule:         return llvm::make_unique<GenerateModuleAction>();
+  case GeneratePCH:            return llvm::make_unique<GeneratePCHAction>();
+  case GeneratePTH:            return llvm::make_unique<GeneratePTHAction>();
+  case InitOnly:               return llvm::make_unique<InitOnlyAction>();
+  case ParseSyntaxOnly:        return llvm::make_unique<SyntaxOnlyAction>();
+  case ModuleFileInfo:         return llvm::make_unique<DumpModuleInfoAction>();
+  case VerifyPCH:              return llvm::make_unique<VerifyPCHAction>();
 
   case PluginAction: {
     for (FrontendPluginRegistry::iterator it =
@@ -65,9 +66,11 @@ static FrontendAction *CreateFrontendBaseAction(CompilerInstance &CI) {
          it != ie; ++it) {
       if (it->getName() == CI.getFrontendOpts().ActionName) {
         std::unique_ptr<PluginASTAction> P(it->instantiate());
-        if (!P->ParseArgs(CI, CI.getFrontendOpts().PluginArgs))
+        if ((P->getActionType() != PluginASTAction::ReplaceAction &&
+             P->getActionType() != PluginASTAction::Cmdline) ||
+            !P->ParseArgs(CI, CI.getFrontendOpts().PluginArgs[it->getName()]))
           return nullptr;
-        return P.release();
+        return std::move(P);
       }
     }
 
@@ -76,32 +79,33 @@ static FrontendAction *CreateFrontendBaseAction(CompilerInstance &CI) {
     return nullptr;
   }
 
-  case PrintDeclContext:       return new DeclContextPrintAction();
-  case PrintPreamble:          return new PrintPreambleAction();
+  case PrintDeclContext:       return llvm::make_unique<DeclContextPrintAction>();
+  case PrintPreamble:          return llvm::make_unique<PrintPreambleAction>();
   case PrintPreprocessedInput: {
     if (CI.getPreprocessorOutputOpts().RewriteIncludes)
-      return new RewriteIncludesAction();
-    return new PrintPreprocessedAction();
+      return llvm::make_unique<RewriteIncludesAction>();
+    return llvm::make_unique<PrintPreprocessedAction>();
   }
 
-  case RewriteMacros:          return new RewriteMacrosAction();
-  case RewriteTest:            return new RewriteTestAction();
+  case RewriteMacros:          return llvm::make_unique<RewriteMacrosAction>();
+  case RewriteTest:            return llvm::make_unique<RewriteTestAction>();
 #ifdef CLANG_ENABLE_OBJC_REWRITER
-  case RewriteObjC:            return new RewriteObjCAction();
+  case RewriteObjC:            return llvm::make_unique<RewriteObjCAction>();
 #else
   case RewriteObjC:            Action = "RewriteObjC"; break;
 #endif
 #ifdef CLANG_ENABLE_ARCMT
-  case MigrateSource:          return new arcmt::MigrateSourceAction();
+  case MigrateSource:
+    return llvm::make_unique<arcmt::MigrateSourceAction>();
 #else
   case MigrateSource:          Action = "MigrateSource"; break;
 #endif
 #ifdef CLANG_ENABLE_STATIC_ANALYZER
-  case RunAnalysis:            return new ento::AnalysisAction();
+  case RunAnalysis:            return llvm::make_unique<ento::AnalysisAction>();
 #else
   case RunAnalysis:            Action = "RunAnalysis"; break;
 #endif
-  case RunPreprocessorOnly:    return new PreprocessOnlyAction();
+  case RunPreprocessorOnly:    return llvm::make_unique<PreprocessOnlyAction>();
   }
 
 #if !defined(CLANG_ENABLE_ARCMT) || !defined(CLANG_ENABLE_STATIC_ANALYZER) \
@@ -113,16 +117,17 @@ static FrontendAction *CreateFrontendBaseAction(CompilerInstance &CI) {
 #endif
 }
 
-static FrontendAction *CreateFrontendAction(CompilerInstance &CI) {
+static std::unique_ptr<FrontendAction>
+CreateFrontendAction(CompilerInstance &CI) {
   // Create the underlying action.
-  FrontendAction *Act = CreateFrontendBaseAction(CI);
+  std::unique_ptr<FrontendAction> Act = CreateFrontendBaseAction(CI);
   if (!Act)
     return nullptr;
 
   const FrontendOptions &FEOpts = CI.getFrontendOpts();
 
   if (FEOpts.FixAndRecompile) {
-    Act = new FixItRecompile(Act);
+    Act = llvm::make_unique<FixItRecompile>(std::move(Act));
   }
   
 #ifdef CLANG_ENABLE_ARCMT
@@ -133,13 +138,13 @@ static FrontendAction *CreateFrontendAction(CompilerInstance &CI) {
     case FrontendOptions::ARCMT_None:
       break;
     case FrontendOptions::ARCMT_Check:
-      Act = new arcmt::CheckAction(Act);
+      Act = llvm::make_unique<arcmt::CheckAction>(std::move(Act));
       break;
     case FrontendOptions::ARCMT_Modify:
-      Act = new arcmt::ModifyAction(Act);
+      Act = llvm::make_unique<arcmt::ModifyAction>(std::move(Act));
       break;
     case FrontendOptions::ARCMT_Migrate:
-      Act = new arcmt::MigrateAction(Act,
+      Act = llvm::make_unique<arcmt::MigrateAction>(std::move(Act),
                                      FEOpts.MTMigrateDir,
                                      FEOpts.ARCMTMigrateReportOut,
                                      FEOpts.ARCMTMigrateEmitARCErrors);
@@ -147,8 +152,9 @@ static FrontendAction *CreateFrontendAction(CompilerInstance &CI) {
     }
 
     if (FEOpts.ObjCMTAction != FrontendOptions::ObjCMT_None) {
-      Act = new arcmt::ObjCMigrateAction(Act, FEOpts.MTMigrateDir,
-                                         FEOpts.ObjCMTAction);
+      Act = llvm::make_unique<arcmt::ObjCMigrateAction>(std::move(Act),
+                                                        FEOpts.MTMigrateDir,
+                                                        FEOpts.ObjCMTAction);
     }
   }
 #endif
@@ -156,7 +162,8 @@ static FrontendAction *CreateFrontendAction(CompilerInstance &CI) {
   // If there are any AST files to merge, create a frontend action
   // adaptor to perform the merge.
   if (!FEOpts.ASTMergeFiles.empty())
-    Act = new ASTMergeAction(Act, FEOpts.ASTMergeFiles);
+    Act = llvm::make_unique<ASTMergeAction>(std::move(Act),
+                                            FEOpts.ASTMergeFiles);
 
   return Act;
 }
@@ -189,6 +196,18 @@ bool clang::ExecuteCompilerInvocation(CompilerInstance *Clang) {
         << Path << Error;
   }
 
+  // Check if any of the loaded plugins replaces the main AST action
+  for (FrontendPluginRegistry::iterator it = FrontendPluginRegistry::begin(),
+                                        ie = FrontendPluginRegistry::end();
+       it != ie; ++it) {
+    std::unique_ptr<PluginASTAction> P(it->instantiate());
+    if (P->getActionType() == PluginASTAction::ReplaceAction) {
+      Clang->getFrontendOpts().ProgramAction = clang::frontend::PluginAction;
+      Clang->getFrontendOpts().ActionName = it->getName();
+      break;
+    }
+  }
+
   // Honor -mllvm.
   //
   // FIXME: Remove this, one day.
diff --git a/contrib/llvm/tools/clang/lib/Headers/__clang_cuda_cmath.h b/contrib/llvm/tools/clang/lib/Headers/__clang_cuda_cmath.h
new file mode 100644
index 000000000000..ae7ff2f8d306
--- /dev/null
+++ b/contrib/llvm/tools/clang/lib/Headers/__clang_cuda_cmath.h
@@ -0,0 +1,148 @@
+/*===---- __clang_cuda_cmath.h - Device-side CUDA cmath support ------------===
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ *
+ *===-----------------------------------------------------------------------===
+ */
+#ifndef __CLANG_CUDA_CMATH_H__
+#define __CLANG_CUDA_CMATH_H__
+#ifndef __CUDA__
+#error "This file is for CUDA compilation only."
+#endif
+
+// CUDA lets us use various std math functions on the device side.  This file
+// works in concert with __clang_cuda_math_forward_declares.h to make this work.
+//
+// Specifically, the forward-declares header declares __device__ overloads for
+// these functions in the global namespace, then pulls them into namespace std
+// with 'using' statements.  Then this file implements those functions, after
+// the implementations have been pulled in.
+//
+// It's important that we declare the functions in the global namespace and pull
+// them into namespace std with using statements, as opposed to simply declaring
+// these functions in namespace std, because our device functions need to
+// overload the standard library functions, which may be declared in the global
+// namespace or in std, depending on the degree of conformance of the stdlib
+// implementation.  Declaring in the global namespace and pulling into namespace
+// std covers all of the known knowns.
+
+#define __DEVICE__ static __device__ __inline__ __attribute__((always_inline))
+
+__DEVICE__ long long abs(long long __n) { return ::llabs(__n); }
+__DEVICE__ long abs(long __n) { return ::labs(__n); }
+__DEVICE__ float abs(float __x) { return ::fabsf(__x); }
+__DEVICE__ double abs(double __x) { return ::fabs(__x); }
+__DEVICE__ float acos(float __x) { return ::acosf(__x); }
+__DEVICE__ float asin(float __x) { return ::asinf(__x); }
+__DEVICE__ float atan(float __x) { return ::atanf(__x); }
+__DEVICE__ float atan2(float __x, float __y) { return ::atan2f(__x, __y); }
+__DEVICE__ float ceil(float __x) { return ::ceilf(__x); }
+__DEVICE__ float cos(float __x) { return ::cosf(__x); }
+__DEVICE__ float cosh(float __x) { return ::coshf(__x); }
+__DEVICE__ float exp(float __x) { return ::expf(__x); }
+__DEVICE__ float fabs(float __x) { return ::fabsf(__x); }
+__DEVICE__ float floor(float __x) { return ::floorf(__x); }
+__DEVICE__ float fmod(float __x, float __y) { return ::fmodf(__x, __y); }
+__DEVICE__ int fpclassify(float __x) {
+  return __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL,
+                              FP_ZERO, __x);
+}
+__DEVICE__ int fpclassify(double __x) {
+  return __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL,
+                              FP_ZERO, __x);
+}
+__DEVICE__ float frexp(float __arg, int *__exp) {
+  return ::frexpf(__arg, __exp);
+}
+__DEVICE__ bool isinf(float __x) { return ::__isinff(__x); }
+__DEVICE__ bool isinf(double __x) { return ::__isinf(__x); }
+__DEVICE__ bool isfinite(float __x) { return ::__finitef(__x); }
+__DEVICE__ bool isfinite(double __x) { return ::__finite(__x); }
+__DEVICE__ bool isgreater(float __x, float __y) {
+  return __builtin_isgreater(__x, __y);
+}
+__DEVICE__ bool isgreater(double __x, double __y) {
+  return __builtin_isgreater(__x, __y);
+}
+__DEVICE__ bool isgreaterequal(float __x, float __y) {
+  return __builtin_isgreaterequal(__x, __y);
+}
+__DEVICE__ bool isgreaterequal(double __x, double __y) {
+  return __builtin_isgreaterequal(__x, __y);
+}
+__DEVICE__ bool isless(float __x, float __y) {
+  return __builtin_isless(__x, __y);
+}
+__DEVICE__ bool isless(double __x, double __y) {
+  return __builtin_isless(__x, __y);
+}
+__DEVICE__ bool islessequal(float __x, float __y) {
+  return __builtin_islessequal(__x, __y);
+}
+__DEVICE__ bool islessequal(double __x, double __y) {
+  return __builtin_islessequal(__x, __y);
+}
+__DEVICE__ bool islessgreater(float __x, float __y) {
+  return __builtin_islessgreater(__x, __y);
+}
+__DEVICE__ bool islessgreater(double __x, double __y) {
+  return __builtin_islessgreater(__x, __y);
+}
+__DEVICE__ bool isnan(float __x) { return ::__isnanf(__x); }
+__DEVICE__ bool isnan(double __x) { return ::__isnan(__x); }
+__DEVICE__ bool isnormal(float __x) { return __builtin_isnormal(__x); }
+__DEVICE__ bool isnormal(double __x) { return __builtin_isnormal(__x); }
+__DEVICE__ bool isunordered(float __x, float __y) {
+  return __builtin_isunordered(__x, __y);
+}
+__DEVICE__ bool isunordered(double __x, double __y) {
+  return __builtin_isunordered(__x, __y);
+}
+__DEVICE__ float ldexp(float __arg, int __exp) {
+  return ::ldexpf(__arg, __exp);
+}
+__DEVICE__ float log(float __x) { return ::logf(__x); }
+__DEVICE__ float log10(float __x) { return ::log10f(__x); }
+__DEVICE__ float modf(float __x, float *__iptr) { return ::modff(__x, __iptr); }
+__DEVICE__ float nexttoward(float __from, float __to) {
+  return __builtin_nexttowardf(__from, __to);
+}
+__DEVICE__ double nexttoward(double __from, double __to) {
+  return __builtin_nexttoward(__from, __to);
+}
+__DEVICE__ float pow(float __base, float __exp) {
+  return ::powf(__base, __exp);
+}
+__DEVICE__ float pow(float __base, int __iexp) {
+  return ::powif(__base, __iexp);
+}
+__DEVICE__ double pow(double __base, int __iexp) {
+  return ::powi(__base, __iexp);
+}
+__DEVICE__ bool signbit(float __x) { return ::__signbitf(__x); }
+__DEVICE__ bool signbit(double __x) { return ::__signbit(__x); }
+__DEVICE__ float sin(float __x) { return ::sinf(__x); }
+__DEVICE__ float sinh(float __x) { return ::sinhf(__x); }
+__DEVICE__ float sqrt(float __x) { return ::sqrtf(__x); }
+__DEVICE__ float tan(float __x) { return ::tanf(__x); }
+__DEVICE__ float tanh(float __x) { return ::tanhf(__x); }
+
+#undef __DEVICE__
+
+#endif
diff --git a/contrib/llvm/tools/clang/lib/Headers/__clang_cuda_intrinsics.h b/contrib/llvm/tools/clang/lib/Headers/__clang_cuda_intrinsics.h
new file mode 100644
index 000000000000..3df41fa290d3
--- /dev/null
+++ b/contrib/llvm/tools/clang/lib/Headers/__clang_cuda_intrinsics.h
@@ -0,0 +1,322 @@
+/*===--- __clang_cuda_intrinsics.h - Device-side CUDA intrinsic wrappers ---===
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ *
+ *===-----------------------------------------------------------------------===
+ */
+#ifndef __CLANG_CUDA_INTRINSICS_H__
+#define __CLANG_CUDA_INTRINSICS_H__
+#ifndef __CUDA__
+#error "This file is for CUDA compilation only."
+#endif
+
+// sm_30 intrinsics: __shfl_{up,down,xor}.
+
+#define __SM_30_INTRINSICS_H__
+#define __SM_30_INTRINSICS_HPP__
+
+#if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 300
+
+#pragma push_macro("__MAKE_SHUFFLES")
+#define __MAKE_SHUFFLES(__FnName, __IntIntrinsic, __FloatIntrinsic, __Mask)    \
+  inline __device__ int __FnName(int __in, int __offset,                       \
+                                 int __width = warpSize) {                     \
+    return __IntIntrinsic(__in, __offset,                                      \
+                          ((warpSize - __width) << 8) | (__Mask));             \
+  }                                                                            \
+  inline __device__ float __FnName(float __in, int __offset,                   \
+                                   int __width = warpSize) {                   \
+    return __FloatIntrinsic(__in, __offset,                                    \
+                            ((warpSize - __width) << 8) | (__Mask));           \
+  }                                                                            \
+  inline __device__ unsigned int __FnName(unsigned int __in, int __offset,     \
+                                          int __width = warpSize) {            \
+    return static_cast<unsigned int>(                                          \
+        ::__FnName(static_cast<int>(__in), __offset, __width));                \
+  }                                                                            \
+  inline __device__ long long __FnName(long long __in, int __offset,           \
+                                       int __width = warpSize) {               \
+    struct __Bits {                                                            \
+      int __a, __b;                                                            \
+    };                                                                         \
+    _Static_assert(sizeof(__in) == sizeof(__Bits));                            \
+    _Static_assert(sizeof(__Bits) == 2 * sizeof(int));                         \
+    __Bits __tmp;                                                              \
+    memcpy(&__in, &__tmp, sizeof(__in));                                       \
+    __tmp.__a = ::__FnName(__tmp.__a, __offset, __width);                      \
+    __tmp.__b = ::__FnName(__tmp.__b, __offset, __width);                      \
+    long long __out;                                                           \
+    memcpy(&__out, &__tmp, sizeof(__tmp));                                     \
+    return __out;                                                              \
+  }                                                                            \
+  inline __device__ unsigned long long __FnName(                               \
+      unsigned long long __in, int __offset, int __width = warpSize) {         \
+    return static_cast<unsigned long long>(                                    \
+        ::__FnName(static_cast<unsigned long long>(__in), __offset, __width)); \
+  }                                                                            \
+  inline __device__ double __FnName(double __in, int __offset,                 \
+                                    int __width = warpSize) {                  \
+    long long __tmp;                                                           \
+    _Static_assert(sizeof(__tmp) == sizeof(__in));                             \
+    memcpy(&__tmp, &__in, sizeof(__in));                                       \
+    __tmp = ::__FnName(__tmp, __offset, __width);                              \
+    double __out;                                                              \
+    memcpy(&__out, &__tmp, sizeof(__out));                                     \
+    return __out;                                                              \
+  }
+
+__MAKE_SHUFFLES(__shfl, __nvvm_shfl_idx_i32, __nvvm_shfl_idx_f32, 0x1f);
+// We use 0 rather than 31 as our mask, because shfl.up applies to lanes >=
+// maxLane.
+__MAKE_SHUFFLES(__shfl_up, __nvvm_shfl_up_i32, __nvvm_shfl_up_f32, 0);
+__MAKE_SHUFFLES(__shfl_down, __nvvm_shfl_down_i32, __nvvm_shfl_down_f32, 0x1f);
+__MAKE_SHUFFLES(__shfl_xor, __nvvm_shfl_bfly_i32, __nvvm_shfl_bfly_f32, 0x1f);
+
+#pragma pop_macro("__MAKE_SHUFFLES")
+
+#endif // !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 300
+
+// sm_32 intrinsics: __ldg and __funnelshift_{l,lc,r,rc}.
+
+// Prevent the vanilla sm_32 intrinsics header from being included.
+#define __SM_32_INTRINSICS_H__
+#define __SM_32_INTRINSICS_HPP__
+
+#if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 320
+
+inline __device__ char __ldg(const char *ptr) { return __nvvm_ldg_c(ptr); }
+inline __device__ short __ldg(const short *ptr) { return __nvvm_ldg_s(ptr); }
+inline __device__ int __ldg(const int *ptr) { return __nvvm_ldg_i(ptr); }
+inline __device__ long __ldg(const long *ptr) { return __nvvm_ldg_l(ptr); }
+inline __device__ long long __ldg(const long long *ptr) {
+  return __nvvm_ldg_ll(ptr);
+}
+inline __device__ unsigned char __ldg(const unsigned char *ptr) {
+  return __nvvm_ldg_uc(ptr);
+}
+inline __device__ unsigned short __ldg(const unsigned short *ptr) {
+  return __nvvm_ldg_us(ptr);
+}
+inline __device__ unsigned int __ldg(const unsigned int *ptr) {
+  return __nvvm_ldg_ui(ptr);
+}
+inline __device__ unsigned long __ldg(const unsigned long *ptr) {
+  return __nvvm_ldg_ul(ptr);
+}
+inline __device__ unsigned long long __ldg(const unsigned long long *ptr) {
+  return __nvvm_ldg_ull(ptr);
+}
+inline __device__ float __ldg(const float *ptr) { return __nvvm_ldg_f(ptr); }
+inline __device__ double __ldg(const double *ptr) { return __nvvm_ldg_d(ptr); }
+
+inline __device__ char2 __ldg(const char2 *ptr) {
+  typedef char c2 __attribute__((ext_vector_type(2)));
+  // We can assume that ptr is aligned at least to char2's alignment, but the
+  // load will assume that ptr is aligned to char2's alignment.  This is only
+  // safe if alignof(c2) <= alignof(char2).
+  c2 rv = __nvvm_ldg_c2(reinterpret_cast<const c2 *>(ptr));
+  char2 ret;
+  ret.x = rv[0];
+  ret.y = rv[1];
+  return ret;
+}
+inline __device__ char4 __ldg(const char4 *ptr) {
+  typedef char c4 __attribute__((ext_vector_type(4)));
+  c4 rv = __nvvm_ldg_c4(reinterpret_cast<const c4 *>(ptr));
+  char4 ret;
+  ret.x = rv[0];
+  ret.y = rv[1];
+  ret.z = rv[2];
+  ret.w = rv[3];
+  return ret;
+}
+inline __device__ short2 __ldg(const short2 *ptr) {
+  typedef short s2 __attribute__((ext_vector_type(2)));
+  s2 rv = __nvvm_ldg_s2(reinterpret_cast<const s2 *>(ptr));
+  short2 ret;
+  ret.x = rv[0];
+  ret.y = rv[1];
+  return ret;
+}
+inline __device__ short4 __ldg(const short4 *ptr) {
+  typedef short s4 __attribute__((ext_vector_type(4)));
+  s4 rv = __nvvm_ldg_s4(reinterpret_cast<const s4 *>(ptr));
+  short4 ret;
+  ret.x = rv[0];
+  ret.y = rv[1];
+  ret.z = rv[2];
+  ret.w = rv[3];
+  return ret;
+}
+inline __device__ int2 __ldg(const int2 *ptr) {
+  typedef int i2 __attribute__((ext_vector_type(2)));
+  i2 rv = __nvvm_ldg_i2(reinterpret_cast<const i2 *>(ptr));
+  int2 ret;
+  ret.x = rv[0];
+  ret.y = rv[1];
+  return ret;
+}
+inline __device__ int4 __ldg(const int4 *ptr) {
+  typedef int i4 __attribute__((ext_vector_type(4)));
+  i4 rv = __nvvm_ldg_i4(reinterpret_cast<const i4 *>(ptr));
+  int4 ret;
+  ret.x = rv[0];
+  ret.y = rv[1];
+  ret.z = rv[2];
+  ret.w = rv[3];
+  return ret;
+}
+inline __device__ longlong2 __ldg(const longlong2 *ptr) {
+  typedef long long ll2 __attribute__((ext_vector_type(2)));
+  ll2 rv = __nvvm_ldg_ll2(reinterpret_cast<const ll2 *>(ptr));
+  longlong2 ret;
+  ret.x = rv[0];
+  ret.y = rv[1];
+  return ret;
+}
+
+inline __device__ uchar2 __ldg(const uchar2 *ptr) {
+  typedef unsigned char uc2 __attribute__((ext_vector_type(2)));
+  uc2 rv = __nvvm_ldg_uc2(reinterpret_cast<const uc2 *>(ptr));
+  uchar2 ret;
+  ret.x = rv[0];
+  ret.y = rv[1];
+  return ret;
+}
+inline __device__ uchar4 __ldg(const uchar4 *ptr) {
+  typedef unsigned char uc4 __attribute__((ext_vector_type(4)));
+  uc4 rv = __nvvm_ldg_uc4(reinterpret_cast<const uc4 *>(ptr));
+  uchar4 ret;
+  ret.x = rv[0];
+  ret.y = rv[1];
+  ret.z = rv[2];
+  ret.w = rv[3];
+  return ret;
+}
+inline __device__ ushort2 __ldg(const ushort2 *ptr) {
+  typedef unsigned short us2 __attribute__((ext_vector_type(2)));
+  us2 rv = __nvvm_ldg_us2(reinterpret_cast<const us2 *>(ptr));
+  ushort2 ret;
+  ret.x = rv[0];
+  ret.y = rv[1];
+  return ret;
+}
+inline __device__ ushort4 __ldg(const ushort4 *ptr) {
+  typedef unsigned short us4 __attribute__((ext_vector_type(4)));
+  us4 rv = __nvvm_ldg_us4(reinterpret_cast<const us4 *>(ptr));
+  ushort4 ret;
+  ret.x = rv[0];
+  ret.y = rv[1];
+  ret.z = rv[2];
+  ret.w = rv[3];
+  return ret;
+}
+inline __device__ uint2 __ldg(const uint2 *ptr) {
+  typedef unsigned int ui2 __attribute__((ext_vector_type(2)));
+  ui2 rv = __nvvm_ldg_ui2(reinterpret_cast<const ui2 *>(ptr));
+  uint2 ret;
+  ret.x = rv[0];
+  ret.y = rv[1];
+  return ret;
+}
+inline __device__ uint4 __ldg(const uint4 *ptr) {
+  typedef unsigned int ui4 __attribute__((ext_vector_type(4)));
+  ui4 rv = __nvvm_ldg_ui4(reinterpret_cast<const ui4 *>(ptr));
+  uint4 ret;
+  ret.x = rv[0];
+  ret.y = rv[1];
+  ret.z = rv[2];
+  ret.w = rv[3];
+  return ret;
+}
+inline __device__ ulonglong2 __ldg(const ulonglong2 *ptr) {
+  typedef unsigned long long ull2 __attribute__((ext_vector_type(2)));
+  ull2 rv = __nvvm_ldg_ull2(reinterpret_cast<const ull2 *>(ptr));
+  ulonglong2 ret;
+  ret.x = rv[0];
+  ret.y = rv[1];
+  return ret;
+}
+
+inline __device__ float2 __ldg(const float2 *ptr) {
+  typedef float f2 __attribute__((ext_vector_type(2)));
+  f2 rv = __nvvm_ldg_f2(reinterpret_cast<const f2 *>(ptr));
+  float2 ret;
+  ret.x = rv[0];
+  ret.y = rv[1];
+  return ret;
+}
+inline __device__ float4 __ldg(const float4 *ptr) {
+  typedef float f4 __attribute__((ext_vector_type(4)));
+  f4 rv = __nvvm_ldg_f4(reinterpret_cast<const f4 *>(ptr));
+  float4 ret;
+  ret.x = rv[0];
+  ret.y = rv[1];
+  ret.z = rv[2];
+  ret.w = rv[3];
+  return ret;
+}
+inline __device__ double2 __ldg(const double2 *ptr) {
+  typedef double d2 __attribute__((ext_vector_type(2)));
+  d2 rv = __nvvm_ldg_d2(reinterpret_cast<const d2 *>(ptr));
+  double2 ret;
+  ret.x = rv[0];
+  ret.y = rv[1];
+  return ret;
+}
+
+// TODO: Implement these as intrinsics, so the backend can work its magic on
+// these.  Alternatively, we could implement these as plain C and try to get
+// llvm to recognize the relevant patterns.
+inline __device__ unsigned __funnelshift_l(unsigned low32, unsigned high32,
+                                           unsigned shiftWidth) {
+  unsigned result;
+  asm("shf.l.wrap.b32 %0, %1, %2, %3;"
+      : "=r"(result)
+      : "r"(low32), "r"(high32), "r"(shiftWidth));
+  return result;
+}
+inline __device__ unsigned __funnelshift_lc(unsigned low32, unsigned high32,
+                                            unsigned shiftWidth) {
+  unsigned result;
+  asm("shf.l.clamp.b32 %0, %1, %2, %3;"
+      : "=r"(result)
+      : "r"(low32), "r"(high32), "r"(shiftWidth));
+  return result;
+}
+inline __device__ unsigned __funnelshift_r(unsigned low32, unsigned high32,
+                                           unsigned shiftWidth) {
+  unsigned result;
+  asm("shf.r.wrap.b32 %0, %1, %2, %3;"
+      : "=r"(result)
+      : "r"(low32), "r"(high32), "r"(shiftWidth));
+  return result;
+}
+inline __device__ unsigned __funnelshift_rc(unsigned low32, unsigned high32,
+                                            unsigned shiftWidth) {
+  unsigned ret;
+  asm("shf.r.clamp.b32 %0, %1, %2, %3;"
+      : "=r"(ret)
+      : "r"(low32), "r"(high32), "r"(shiftWidth));
+  return ret;
+}
+
+#endif // !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 320
+
+#endif // defined(__CLANG_CUDA_INTRINSICS_H__)
diff --git a/contrib/llvm/tools/clang/lib/Headers/__clang_cuda_math_forward_declares.h b/contrib/llvm/tools/clang/lib/Headers/__clang_cuda_math_forward_declares.h
new file mode 100644
index 000000000000..3f2834d95000
--- /dev/null
+++ b/contrib/llvm/tools/clang/lib/Headers/__clang_cuda_math_forward_declares.h
@@ -0,0 +1,263 @@
+/*===- __clang_math_forward_declares.h - Prototypes of __device__ math fns --===
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ *
+ *===-----------------------------------------------------------------------===
+ */
+#ifndef __CLANG__CUDA_MATH_FORWARD_DECLARES_H__
+#define __CLANG__CUDA_MATH_FORWARD_DECLARES_H__
+#ifndef __CUDA__
+#error "This file is for CUDA compilation only."
+#endif
+
+// This file forward-declares of some math functions we (or the CUDA headers)
+// will define later.  We need to do this, and do it before cmath is included,
+// because the standard library may have constexpr math functions.  In the
+// absence of a prior __device__ decl, those constexpr functions may become
+// implicitly host+device.  host+device functions can't be overloaded, so that
+// would preclude the use of our own __device__ overloads for these functions.
+
+#pragma push_macro("__DEVICE__")
+#define __DEVICE__                                                             \
+  static __inline__ __attribute__((always_inline)) __attribute__((device))
+
+__DEVICE__ double abs(double);
+__DEVICE__ float abs(float);
+__DEVICE__ int abs(int);
+__DEVICE__ long abs(long);
+__DEVICE__ long long abs(long long);
+__DEVICE__ double acos(double);
+__DEVICE__ float acos(float);
+__DEVICE__ double acosh(double);
+__DEVICE__ float acosh(float);
+__DEVICE__ double asin(double);
+__DEVICE__ float asin(float);
+__DEVICE__ double asinh(double);
+__DEVICE__ float asinh(float);
+__DEVICE__ double atan2(double, double);
+__DEVICE__ float atan2(float, float);
+__DEVICE__ double atan(double);
+__DEVICE__ float atan(float);
+__DEVICE__ double atanh(double);
+__DEVICE__ float atanh(float);
+__DEVICE__ double cbrt(double);
+__DEVICE__ float cbrt(float);
+__DEVICE__ double ceil(double);
+__DEVICE__ float ceil(float);
+__DEVICE__ double copysign(double, double);
+__DEVICE__ float copysign(float, float);
+__DEVICE__ double cos(double);
+__DEVICE__ float cos(float);
+__DEVICE__ double cosh(double);
+__DEVICE__ float cosh(float);
+__DEVICE__ double erfc(double);
+__DEVICE__ float erfc(float);
+__DEVICE__ double erf(double);
+__DEVICE__ float erf(float);
+__DEVICE__ double exp2(double);
+__DEVICE__ float exp2(float);
+__DEVICE__ double exp(double);
+__DEVICE__ float exp(float);
+__DEVICE__ double expm1(double);
+__DEVICE__ float expm1(float);
+__DEVICE__ double fabs(double);
+__DEVICE__ float fabs(float);
+__DEVICE__ double fdim(double, double);
+__DEVICE__ float fdim(float, float);
+__DEVICE__ double floor(double);
+__DEVICE__ float floor(float);
+__DEVICE__ double fma(double, double, double);
+__DEVICE__ float fma(float, float, float);
+__DEVICE__ double fmax(double, double);
+__DEVICE__ float fmax(float, float);
+__DEVICE__ double fmin(double, double);
+__DEVICE__ float fmin(float, float);
+__DEVICE__ double fmod(double, double);
+__DEVICE__ float fmod(float, float);
+__DEVICE__ int fpclassify(double);
+__DEVICE__ int fpclassify(float);
+__DEVICE__ double frexp(double, int *);
+__DEVICE__ float frexp(float, int *);
+__DEVICE__ double hypot(double, double);
+__DEVICE__ float hypot(float, float);
+__DEVICE__ int ilogb(double);
+__DEVICE__ int ilogb(float);
+__DEVICE__ bool isfinite(double);
+__DEVICE__ bool isfinite(float);
+__DEVICE__ bool isgreater(double, double);
+__DEVICE__ bool isgreaterequal(double, double);
+__DEVICE__ bool isgreaterequal(float, float);
+__DEVICE__ bool isgreater(float, float);
+__DEVICE__ bool isinf(double);
+__DEVICE__ bool isinf(float);
+__DEVICE__ bool isless(double, double);
+__DEVICE__ bool islessequal(double, double);
+__DEVICE__ bool islessequal(float, float);
+__DEVICE__ bool isless(float, float);
+__DEVICE__ bool islessgreater(double, double);
+__DEVICE__ bool islessgreater(float, float);
+__DEVICE__ bool isnan(double);
+__DEVICE__ bool isnan(float);
+__DEVICE__ bool isnormal(double);
+__DEVICE__ bool isnormal(float);
+__DEVICE__ bool isunordered(double, double);
+__DEVICE__ bool isunordered(float, float);
+__DEVICE__ long labs(long);
+__DEVICE__ double ldexp(double, int);
+__DEVICE__ float ldexp(float, int);
+__DEVICE__ double lgamma(double);
+__DEVICE__ float lgamma(float);
+__DEVICE__ long long llabs(long long);
+__DEVICE__ long long llrint(double);
+__DEVICE__ long long llrint(float);
+__DEVICE__ double log10(double);
+__DEVICE__ float log10(float);
+__DEVICE__ double log1p(double);
+__DEVICE__ float log1p(float);
+__DEVICE__ double log2(double);
+__DEVICE__ float log2(float);
+__DEVICE__ double logb(double);
+__DEVICE__ float logb(float);
+__DEVICE__ double log(double);
+__DEVICE__ float log(float);
+__DEVICE__ long lrint(double);
+__DEVICE__ long lrint(float);
+__DEVICE__ long lround(double);
+__DEVICE__ long lround(float);
+__DEVICE__ double modf(double, double *);
+__DEVICE__ float modf(float, float *);
+__DEVICE__ double nan(const char *);
+__DEVICE__ float nanf(const char *);
+__DEVICE__ double nearbyint(double);
+__DEVICE__ float nearbyint(float);
+__DEVICE__ double nextafter(double, double);
+__DEVICE__ float nextafter(float, float);
+__DEVICE__ double nexttoward(double, double);
+__DEVICE__ float nexttoward(float, float);
+__DEVICE__ double pow(double, double);
+__DEVICE__ double pow(double, int);
+__DEVICE__ float pow(float, float);
+__DEVICE__ float pow(float, int);
+__DEVICE__ double remainder(double, double);
+__DEVICE__ float remainder(float, float);
+__DEVICE__ double remquo(double, double, int *);
+__DEVICE__ float remquo(float, float, int *);
+__DEVICE__ double rint(double);
+__DEVICE__ float rint(float);
+__DEVICE__ double round(double);
+__DEVICE__ float round(float);
+__DEVICE__ double scalbln(double, long);
+__DEVICE__ float scalbln(float, long);
+__DEVICE__ double scalbn(double, int);
+__DEVICE__ float scalbn(float, int);
+__DEVICE__ bool signbit(double);
+__DEVICE__ bool signbit(float);
+__DEVICE__ double sin(double);
+__DEVICE__ float sin(float);
+__DEVICE__ double sinh(double);
+__DEVICE__ float sinh(float);
+__DEVICE__ double sqrt(double);
+__DEVICE__ float sqrt(float);
+__DEVICE__ double tan(double);
+__DEVICE__ float tan(float);
+__DEVICE__ double tanh(double);
+__DEVICE__ float tanh(float);
+__DEVICE__ double tgamma(double);
+__DEVICE__ float tgamma(float);
+__DEVICE__ double trunc(double);
+__DEVICE__ float trunc(float);
+
+namespace std {
+using ::abs;
+using ::acos;
+using ::acosh;
+using ::asin;
+using ::asinh;
+using ::atan;
+using ::atan2;
+using ::atanh;
+using ::cbrt;
+using ::ceil;
+using ::copysign;
+using ::cos;
+using ::cosh;
+using ::erf;
+using ::erfc;
+using ::exp;
+using ::exp2;
+using ::expm1;
+using ::fabs;
+using ::fdim;
+using ::floor;
+using ::fma;
+using ::fmax;
+using ::fmin;
+using ::fmod;
+using ::fpclassify;
+using ::frexp;
+using ::hypot;
+using ::ilogb;
+using ::isfinite;
+using ::isgreater;
+using ::isgreaterequal;
+using ::isinf;
+using ::isless;
+using ::islessequal;
+using ::islessgreater;
+using ::isnan;
+using ::isnormal;
+using ::isunordered;
+using ::labs;
+using ::ldexp;
+using ::lgamma;
+using ::llabs;
+using ::llrint;
+using ::log;
+using ::log10;
+using ::log1p;
+using ::log2;
+using ::logb;
+using ::lrint;
+using ::lround;
+using ::modf;
+using ::nan;
+using ::nanf;
+using ::nearbyint;
+using ::nextafter;
+using ::nexttoward;
+using ::pow;
+using ::remainder;
+using ::remquo;
+using ::rint;
+using ::round;
+using ::scalbln;
+using ::scalbn;
+using ::signbit;
+using ::sin;
+using ::sinh;
+using ::sqrt;
+using ::tan;
+using ::tanh;
+using ::tgamma;
+using ::trunc;
+} // namespace std
+
+#pragma pop_macro("__DEVICE__")
+
+#endif
diff --git a/contrib/llvm/tools/clang/lib/Headers/__clang_cuda_runtime_wrapper.h b/contrib/llvm/tools/clang/lib/Headers/__clang_cuda_runtime_wrapper.h
index 8e5f0331cb38..6445f9b76b8f 100644
--- a/contrib/llvm/tools/clang/lib/Headers/__clang_cuda_runtime_wrapper.h
+++ b/contrib/llvm/tools/clang/lib/Headers/__clang_cuda_runtime_wrapper.h
@@ -42,10 +42,14 @@
 
 #if defined(__CUDA__) && defined(__clang__)
 
+// Include some forward declares that must come before cmath.
+#include <__clang_cuda_math_forward_declares.h>
+
 // Include some standard headers to avoid CUDA headers including them
 // while some required macros (like __THROW) are in a weird state.
-#include <stdlib.h>
 #include <cmath>
+#include <cstdlib>
+#include <stdlib.h>
 
 // Preserve common macros that will be changed below by us or by CUDA
 // headers.
@@ -79,17 +83,15 @@
 // definitions from .hpp files.
 #define __DEVICE_FUNCTIONS_H__
 #define __MATH_FUNCTIONS_H__
+#define __COMMON_FUNCTIONS_H__
 
 #undef __CUDACC__
 #define __CUDABE__
 // Disables definitions of device-side runtime support stubs in
 // cuda_device_runtime_api.h
-#define __CUDADEVRT_INTERNAL__
+#include "driver_types.h"
 #include "host_config.h"
 #include "host_defines.h"
-#include "driver_types.h"
-#include "common_functions.h"
-#undef __CUDADEVRT_INTERNAL__
 
 #undef __CUDABE__
 #define __CUDACC__
@@ -100,11 +102,11 @@
 
 // CUDA headers use __nvvm_memcpy and __nvvm_memset which Clang does
 // not have at the moment. Emulate them with a builtin memcpy/memset.
-#define __nvvm_memcpy(s,d,n,a) __builtin_memcpy(s,d,n)
-#define __nvvm_memset(d,c,n,a) __builtin_memset(d,c,n)
+#define __nvvm_memcpy(s, d, n, a) __builtin_memcpy(s, d, n)
+#define __nvvm_memset(d, c, n, a) __builtin_memset(d, c, n)
 
-#include "crt/host_runtime.h"
 #include "crt/device_runtime.h"
+#include "crt/host_runtime.h"
 // device_runtime.h defines __cxa_* macros that will conflict with
 // cxxabi.h.
 // FIXME: redefine these as __device__ functions.
@@ -140,7 +142,20 @@
 #pragma push_macro("__forceinline__")
 #define __forceinline__ __device__ __inline__ __attribute__((always_inline))
 #include "device_functions.hpp"
+
+// math_function.hpp uses the __USE_FAST_MATH__ macro to determine whether we
+// get the slow-but-accurate or fast-but-inaccurate versions of functions like
+// sin and exp.  This is controlled in clang by -fcuda-approx-transcendentals.
+//
+// device_functions.hpp uses __USE_FAST_MATH__ for a different purpose (fast vs.
+// slow divides), so we need to scope our define carefully here.
+#pragma push_macro("__USE_FAST_MATH__")
+#if defined(__CLANG_CUDA_APPROX_TRANSCENDENTALS__)
+#define __USE_FAST_MATH__
+#endif
 #include "math_functions.hpp"
+#pragma pop_macro("__USE_FAST_MATH__")
+
 #include "math_functions_dbl_ptx3.hpp"
 #pragma pop_macro("__forceinline__")
 
@@ -152,21 +167,21 @@
 // Alas, additional overloads for these functions are hard to get to.
 // Considering that we only need these overloads for a few functions,
 // we can provide them here.
-static inline float rsqrt(float a) { return rsqrtf(a); }
-static inline float rcbrt(float a) { return rcbrtf(a); }
-static inline float sinpi(float a) { return sinpif(a); }
-static inline float cospi(float a) { return cospif(a); }
-static inline void sincospi(float a, float *b, float *c) {
-  return sincospi(a, b, c);
+static inline float rsqrt(float __a) { return rsqrtf(__a); }
+static inline float rcbrt(float __a) { return rcbrtf(__a); }
+static inline float sinpi(float __a) { return sinpif(__a); }
+static inline float cospi(float __a) { return cospif(__a); }
+static inline void sincospi(float __a, float *__b, float *__c) {
+  return sincospif(__a, __b, __c);
 }
-static inline float erfcinv(float a) { return erfcinvf(a); }
-static inline float normcdfinv(float a) { return normcdfinvf(a); }
-static inline float normcdf(float a) { return normcdff(a); }
-static inline float erfcx(float a) { return erfcxf(a); }
+static inline float erfcinv(float __a) { return erfcinvf(__a); }
+static inline float normcdfinv(float __a) { return normcdfinvf(__a); }
+static inline float normcdf(float __a) { return normcdff(__a); }
+static inline float erfcx(float __a) { return erfcxf(__a); }
 
 // For some reason single-argument variant is not always declared by
 // CUDA headers. Alas, device_functions.hpp included below needs it.
-static inline __device__ void __brkpt(int c) { __brkpt(); }
+static inline __device__ void __brkpt(int __c) { __brkpt(); }
 
 // Now include *.hpp with definitions of various GPU functions.  Alas,
 // a lot of thins get declared/defined with __host__ attribute which
@@ -178,17 +193,34 @@ static inline __device__ void __brkpt(int c) { __brkpt(); }
 #undef __CUDABE__
 #define __CUDACC__
 #undef __DEVICE_FUNCTIONS_HPP__
-#include "device_functions.hpp"
 #include "device_atomic_functions.hpp"
+#include "device_functions.hpp"
 #include "sm_20_atomic_functions.hpp"
-#include "sm_32_atomic_functions.hpp"
 #include "sm_20_intrinsics.hpp"
-// sm_30_intrinsics.h has declarations that use default argument, so
-// we have to include it and it will in turn include .hpp
-#include "sm_30_intrinsics.h"
-#include "sm_32_intrinsics.hpp"
+#include "sm_32_atomic_functions.hpp"
+
+// Don't include sm_30_intrinsics.h and sm_32_intrinsics.h.  These define the
+// __shfl and __ldg intrinsics using inline (volatile) asm, but we want to
+// define them using builtins so that the optimizer can reason about and across
+// these instructions.  In particular, using intrinsics for ldg gets us the
+// [addr+imm] addressing mode, which, although it doesn't actually exist in the
+// hardware, seems to generate faster machine code because ptxas can more easily
+// reason about our code.
+
 #undef __MATH_FUNCTIONS_HPP__
+
+// math_functions.hpp defines ::signbit as a __host__ __device__ function.  This
+// conflicts with libstdc++'s constexpr ::signbit, so we have to rename
+// math_function.hpp's ::signbit.  It's guarded by #undef signbit, but that's
+// conditional on __GNUC__.  :)
+#pragma push_macro("signbit")
+#pragma push_macro("__GNUC__")
+#undef __GNUC__
+#define signbit __ignored_cuda_signbit
 #include "math_functions.hpp"
+#pragma pop_macro("__GNUC__")
+#pragma pop_macro("signbit")
+
 #pragma pop_macro("__host__")
 
 #include "texture_indirect_functions.h"
@@ -200,17 +232,85 @@ static inline __device__ void __brkpt(int c) { __brkpt(); }
 // Set up compiler macros expected to be seen during compilation.
 #undef __CUDABE__
 #define __CUDACC__
-#define __NVCC__
-
-#if defined(__CUDA_ARCH__)
-// We need to emit IR declaration for non-existing __nvvm_reflect() to
-// let backend know that it should be treated as const nothrow
-// function which is what NVVMReflect pass expects to see.
-extern "C" __device__ __attribute__((const)) int __nvvm_reflect(const void *);
-static __device__ __attribute__((used)) int __nvvm_reflect_anchor() {
-  return __nvvm_reflect("NONE");
+
+extern "C" {
+// Device-side CUDA system calls.
+// http://docs.nvidia.com/cuda/ptx-writers-guide-to-interoperability/index.html#system-calls
+// We need these declarations and wrappers for device-side
+// malloc/free/printf calls to work without relying on
+// -fcuda-disable-target-call-checks option.
+__device__ int vprintf(const char *, const char *);
+__device__ void free(void *) __attribute((nothrow));
+__device__ void *malloc(size_t) __attribute((nothrow)) __attribute__((malloc));
+__device__ void __assertfail(const char *__message, const char *__file,
+                             unsigned __line, const char *__function,
+                             size_t __charSize) __attribute__((noreturn));
+
+// In order for standard assert() macro on linux to work we need to
+// provide device-side __assert_fail()
+__device__ static inline void __assert_fail(const char *__message,
+                                            const char *__file, unsigned __line,
+                                            const char *__function) {
+  __assertfail(__message, __file, __line, __function, sizeof(char));
 }
-#endif
+
+// Clang will convert printf into vprintf, but we still need
+// device-side declaration for it.
+__device__ int printf(const char *, ...);
+} // extern "C"
+
+// We also need device-side std::malloc and std::free.
+namespace std {
+__device__ static inline void free(void *__ptr) { ::free(__ptr); }
+__device__ static inline void *malloc(size_t __size) {
+  return ::malloc(__size);
+}
+} // namespace std
+
+// Out-of-line implementations from cuda_builtin_vars.h.  These need to come
+// after we've pulled in the definition of uint3 and dim3.
+
+__device__ inline __cuda_builtin_threadIdx_t::operator uint3() const {
+  uint3 ret;
+  ret.x = x;
+  ret.y = y;
+  ret.z = z;
+  return ret;
+}
+
+__device__ inline __cuda_builtin_blockIdx_t::operator uint3() const {
+  uint3 ret;
+  ret.x = x;
+  ret.y = y;
+  ret.z = z;
+  return ret;
+}
+
+__device__ inline __cuda_builtin_blockDim_t::operator dim3() const {
+  return dim3(x, y, z);
+}
+
+__device__ inline __cuda_builtin_gridDim_t::operator dim3() const {
+  return dim3(x, y, z);
+}
+
+#include <__clang_cuda_cmath.h>
+#include <__clang_cuda_intrinsics.h>
+
+// curand_mtgp32_kernel helpfully redeclares blockDim and threadIdx in host
+// mode, giving them their "proper" types of dim3 and uint3.  This is
+// incompatible with the types we give in cuda_builtin_vars.h.  As as hack,
+// force-include the header (nvcc doesn't include it by default) but redefine
+// dim3 and uint3 to our builtin types.  (Thankfully dim3 and uint3 are only
+// used here for the redeclarations of blockDim and threadIdx.)
+#pragma push_macro("dim3")
+#pragma push_macro("uint3")
+#define dim3 __cuda_builtin_blockDim_t
+#define uint3 __cuda_builtin_threadIdx_t
+#include "curand_mtgp32_kernel.h"
+#pragma pop_macro("dim3")
+#pragma pop_macro("uint3")
+#pragma pop_macro("__USE_FAST_MATH__")
 
 #endif // __CUDA__
 #endif // __CLANG_CUDA_RUNTIME_WRAPPER_H__
diff --git a/contrib/llvm/tools/clang/lib/Headers/__wmmintrin_aes.h b/contrib/llvm/tools/clang/lib/Headers/__wmmintrin_aes.h
index 100799ebfdb8..211518eb2884 100644
--- a/contrib/llvm/tools/clang/lib/Headers/__wmmintrin_aes.h
+++ b/contrib/llvm/tools/clang/lib/Headers/__wmmintrin_aes.h
@@ -28,36 +28,121 @@
 /* Define the default attributes for the functions in this file. */
 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("aes")))
 
+/// \brief Performs a single round of AES encryption using the Equivalent
+///    Inverse Cipher, transforming the state value from the first source
+///    operand using a 128-bit round key value contained in the second source
+///    operand, and writes the result to the destination.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VAESENC instruction.
+///
+/// \param __V
+///    A 128-bit integer vector containing the state value.
+/// \param __R
+///    A 128-bit integer vector containing the round key value.
+/// \returns A 128-bit integer vector containing the encrypted value.
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_aesenc_si128(__m128i __V, __m128i __R)
 {
-  return (__m128i)__builtin_ia32_aesenc128(__V, __R);
+  return (__m128i)__builtin_ia32_aesenc128((__v2di)__V, (__v2di)__R);
 }
 
+/// \brief Performs the final round of AES encryption using the Equivalent
+///    Inverse Cipher, transforming the state value from the first source
+///    operand using a 128-bit round key value contained in the second source
+///    operand, and writes the result to the destination.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VAESENCLAST instruction.
+///
+/// \param __V
+///    A 128-bit integer vector containing the state value.
+/// \param __R
+///    A 128-bit integer vector containing the round key value.
+/// \returns A 128-bit integer vector containing the encrypted value.
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_aesenclast_si128(__m128i __V, __m128i __R)
 {
-  return (__m128i)__builtin_ia32_aesenclast128(__V, __R);
+  return (__m128i)__builtin_ia32_aesenclast128((__v2di)__V, (__v2di)__R);
 }
 
+/// \brief Performs a single round of AES decryption using the Equivalent
+///    Inverse Cipher, transforming the state value from the first source
+///    operand using a 128-bit round key value contained in the second source
+///    operand, and writes the result to the destination.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VAESDEC instruction.
+///
+/// \param __V
+///    A 128-bit integer vector containing the state value.
+/// \param __R
+///    A 128-bit integer vector containing the round key value.
+/// \returns A 128-bit integer vector containing the decrypted value.
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_aesdec_si128(__m128i __V, __m128i __R)
 {
-  return (__m128i)__builtin_ia32_aesdec128(__V, __R);
+  return (__m128i)__builtin_ia32_aesdec128((__v2di)__V, (__v2di)__R);
 }
 
+/// \brief Performs the final round of AES decryption using the Equivalent
+///    Inverse Cipher, transforming the state value from the first source
+///    operand using a 128-bit round key value contained in the second source
+///    operand, and writes the result to the destination.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VAESDECLAST instruction.
+///
+/// \param __V
+///    A 128-bit integer vector containing the state value.
+/// \param __R
+///    A 128-bit integer vector containing the round key value.
+/// \returns A 128-bit integer vector containing the decrypted value.
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_aesdeclast_si128(__m128i __V, __m128i __R)
 {
-  return (__m128i)__builtin_ia32_aesdeclast128(__V, __R);
+  return (__m128i)__builtin_ia32_aesdeclast128((__v2di)__V, (__v2di)__R);
 }
 
+/// \brief Applies the AES InvMixColumns() transformation to an expanded key
+///    contained in the source operand, and writes the result to the
+///    destination.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VAESIMC instruction.
+///
+/// \param __V
+///    A 128-bit integer vector containing the expanded key.
+/// \returns A 128-bit integer vector containing the transformed value.
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_aesimc_si128(__m128i __V)
 {
-  return (__m128i)__builtin_ia32_aesimc128(__V);
+  return (__m128i)__builtin_ia32_aesimc128((__v2di)__V);
 }
 
+/// \brief Generates a round key for AES encyption, operating on 128-bit data
+///    specified in the first source operand and using an 8-bit round constant
+///    specified by the second source operand, and writes the result to the
+///    destination.
+///
+/// \headerfile <x86intrin.h>
+///
+/// \code
+/// __m128i _mm_aeskeygenassist_si128(__m128i C, const int R);
+/// \endcode
+///
+/// This intrinsic corresponds to the \c AESKEYGENASSIST instruction.
+///
+/// \param C
+///    A 128-bit integer vector that is used to generate the AES encryption key.
+/// \param R
+///    An 8-bit round constant used to generate the AES encryption key.
+/// \returns A 128-bit round key for AES encryption.
 #define _mm_aeskeygenassist_si128(C, R) \
   (__m128i)__builtin_ia32_aeskeygenassist128((__v2di)(__m128i)(C), (int)(R))
 
diff --git a/contrib/llvm/tools/clang/lib/Headers/__wmmintrin_pclmul.h b/contrib/llvm/tools/clang/lib/Headers/__wmmintrin_pclmul.h
index 68e944e92198..d4e073f40688 100644
--- a/contrib/llvm/tools/clang/lib/Headers/__wmmintrin_pclmul.h
+++ b/contrib/llvm/tools/clang/lib/Headers/__wmmintrin_pclmul.h
@@ -23,6 +23,34 @@
 #ifndef _WMMINTRIN_PCLMUL_H
 #define _WMMINTRIN_PCLMUL_H
 
+/// \brief Multiplies two 64-bit integer values, which are selected from source
+///    operands using the immediate-value operand. The multiplication is a
+///    carry-less multiplication, and the 128-bit integer product is stored in
+///    the destination.
+///
+/// \headerfile <x86intrin.h>
+///
+/// \code
+/// __m128i _mm_clmulepi64_si128(__m128i __X, __m128i __Y, const int __I);
+/// \endcode
+///
+/// This intrinsic corresponds to the \c VPCLMULQDQ instruction.
+///
+/// \param __X
+///    A 128-bit vector of [2 x i64] containing one of the source operands.
+/// \param __Y
+///    A 128-bit vector of [2 x i64] containing one of the source operands.
+/// \param __I
+///    An immediate value specifying which 64-bit values to select from the
+///    operands.
+///    Bit 0 is used to select a value from operand __X,
+///    and bit 4 is used to select a value from operand __Y:
+///    Bit[0]=0 indicates that bits[63:0] of operand __X are used.
+///    Bit[0]=1 indicates that bits[127:64] of operand __X are used.
+///    Bit[4]=0 indicates that bits[63:0] of operand __Y are used.
+///    Bit[4]=1 indicates that bits[127:64] of operand __Y are used.
+/// \returns The 128-bit integer vector containing the result of the carry-less
+///    multiplication of the selected 64-bit values.
 #define _mm_clmulepi64_si128(__X, __Y, __I) \
   ((__m128i)__builtin_ia32_pclmulqdq128((__v2di)(__m128i)(__X), \
                                         (__v2di)(__m128i)(__Y), (char)(__I)))
diff --git a/contrib/llvm/tools/clang/lib/Headers/altivec.h b/contrib/llvm/tools/clang/lib/Headers/altivec.h
index a5b4f7434d1c..74a1914ce83b 100644
--- a/contrib/llvm/tools/clang/lib/Headers/altivec.h
+++ b/contrib/llvm/tools/clang/lib/Headers/altivec.h
@@ -36,67 +36,65 @@
 
 #define __ATTRS_o_ai __attribute__((__overloadable__, __always_inline__))
 
-static vector signed char __ATTRS_o_ai vec_perm(vector signed char __a,
-                                                vector signed char __b,
-                                                vector unsigned char __c);
+static __inline__ vector signed char __ATTRS_o_ai vec_perm(
+    vector signed char __a, vector signed char __b, vector unsigned char __c);
 
-static vector unsigned char __ATTRS_o_ai vec_perm(vector unsigned char __a,
-                                                  vector unsigned char __b,
-                                                  vector unsigned char __c);
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_perm(vector unsigned char __a, vector unsigned char __b,
+         vector unsigned char __c);
 
-static vector bool char __ATTRS_o_ai vec_perm(vector bool char __a,
-                                              vector bool char __b,
-                                              vector unsigned char __c);
+static __inline__ vector bool char __ATTRS_o_ai
+vec_perm(vector bool char __a, vector bool char __b, vector unsigned char __c);
 
-static vector short __ATTRS_o_ai vec_perm(vector signed short __a,
-                                          vector signed short __b,
-                                          vector unsigned char __c);
+static __inline__ vector short __ATTRS_o_ai vec_perm(vector signed short __a,
+                                                     vector signed short __b,
+                                                     vector unsigned char __c);
 
-static vector unsigned short __ATTRS_o_ai vec_perm(vector unsigned short __a,
-                                                   vector unsigned short __b,
-                                                   vector unsigned char __c);
+static __inline__ vector unsigned short __ATTRS_o_ai
+vec_perm(vector unsigned short __a, vector unsigned short __b,
+         vector unsigned char __c);
 
-static vector bool short __ATTRS_o_ai vec_perm(vector bool short __a,
-                                               vector bool short __b,
-                                               vector unsigned char __c);
+static __inline__ vector bool short __ATTRS_o_ai vec_perm(
+    vector bool short __a, vector bool short __b, vector unsigned char __c);
 
-static vector pixel __ATTRS_o_ai vec_perm(vector pixel __a, vector pixel __b,
-                                          vector unsigned char __c);
+static __inline__ vector pixel __ATTRS_o_ai vec_perm(vector pixel __a,
+                                                     vector pixel __b,
+                                                     vector unsigned char __c);
 
-static vector int __ATTRS_o_ai vec_perm(vector signed int __a,
-                                        vector signed int __b,
-                                        vector unsigned char __c);
+static __inline__ vector int __ATTRS_o_ai vec_perm(vector signed int __a,
+                                                   vector signed int __b,
+                                                   vector unsigned char __c);
 
-static vector unsigned int __ATTRS_o_ai vec_perm(vector unsigned int __a,
-                                                 vector unsigned int __b,
-                                                 vector unsigned char __c);
+static __inline__ vector unsigned int __ATTRS_o_ai vec_perm(
+    vector unsigned int __a, vector unsigned int __b, vector unsigned char __c);
 
-static vector bool int __ATTRS_o_ai vec_perm(vector bool int __a,
-                                             vector bool int __b,
-                                             vector unsigned char __c);
+static __inline__ vector bool int __ATTRS_o_ai
+vec_perm(vector bool int __a, vector bool int __b, vector unsigned char __c);
 
-static vector float __ATTRS_o_ai vec_perm(vector float __a, vector float __b,
-                                          vector unsigned char __c);
+static __inline__ vector float __ATTRS_o_ai vec_perm(vector float __a,
+                                                     vector float __b,
+                                                     vector unsigned char __c);
 
 #ifdef __VSX__
-static vector long long __ATTRS_o_ai vec_perm(vector signed long long __a,
-                                              vector signed long long __b,
-                                              vector unsigned char __c);
+static __inline__ vector long long __ATTRS_o_ai
+vec_perm(vector signed long long __a, vector signed long long __b,
+         vector unsigned char __c);
 
-static vector unsigned long long __ATTRS_o_ai
+static __inline__ vector unsigned long long __ATTRS_o_ai
 vec_perm(vector unsigned long long __a, vector unsigned long long __b,
          vector unsigned char __c);
 
-static vector bool long long __ATTRS_o_ai
+static __inline__ vector bool long long __ATTRS_o_ai
 vec_perm(vector bool long long __a, vector bool long long __b,
          vector unsigned char __c);
 
-static vector double __ATTRS_o_ai vec_perm(vector double __a, vector double __b,
-                                           vector unsigned char __c);
+static __inline__ vector double __ATTRS_o_ai vec_perm(vector double __a,
+                                                      vector double __b,
+                                                      vector unsigned char __c);
 #endif
 
-static vector unsigned char __ATTRS_o_ai vec_xor(vector unsigned char __a,
-                                                 vector unsigned char __b);
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_xor(vector unsigned char __a, vector unsigned char __b);
 
 /* vec_abs */
 
@@ -104,36 +102,41 @@ static vector unsigned char __ATTRS_o_ai vec_xor(vector unsigned char __a,
 #define __builtin_altivec_abs_v8hi vec_abs
 #define __builtin_altivec_abs_v4si vec_abs
 
-static vector signed char __ATTRS_o_ai vec_abs(vector signed char __a) {
+static __inline__ vector signed char __ATTRS_o_ai
+vec_abs(vector signed char __a) {
   return __builtin_altivec_vmaxsb(__a, -__a);
 }
 
-static vector signed short __ATTRS_o_ai vec_abs(vector signed short __a) {
+static __inline__ vector signed short __ATTRS_o_ai
+vec_abs(vector signed short __a) {
   return __builtin_altivec_vmaxsh(__a, -__a);
 }
 
-static vector signed int __ATTRS_o_ai vec_abs(vector signed int __a) {
+static __inline__ vector signed int __ATTRS_o_ai
+vec_abs(vector signed int __a) {
   return __builtin_altivec_vmaxsw(__a, -__a);
 }
 
 #if defined(__POWER8_VECTOR__) && defined(__powerpc64__)
-static vector signed long long __ATTRS_o_ai
+static __inline__ vector signed long long __ATTRS_o_ai
 vec_abs(vector signed long long __a) {
   return __builtin_altivec_vmaxsd(__a, -__a);
 }
 #endif
 
-static vector float __ATTRS_o_ai vec_abs(vector float __a) {
+static __inline__ vector float __ATTRS_o_ai vec_abs(vector float __a) {
+#ifdef __VSX__
+  return __builtin_vsx_xvabssp(__a);
+#else
   vector unsigned int __res =
       (vector unsigned int)__a & (vector unsigned int)(0x7FFFFFFF);
   return (vector float)__res;
+#endif
 }
 
 #if defined(__POWER8_VECTOR__) && defined(__powerpc64__)
-static vector double __ATTRS_o_ai vec_abs(vector double __a) {
-  vector unsigned long long __res = { 0x7FFFFFFFFFFFFFFF, 0x7FFFFFFFFFFFFFFF };
-  __res &= (vector unsigned int)__a;
-  return (vector double)__res;
+static __inline__ vector double __ATTRS_o_ai vec_abs(vector double __a) {
+  return __builtin_vsx_xvabsdp(__a);
 }
 #endif
 
@@ -142,138 +145,146 @@ static vector double __ATTRS_o_ai vec_abs(vector double __a) {
 #define __builtin_altivec_abss_v8hi vec_abss
 #define __builtin_altivec_abss_v4si vec_abss
 
-static vector signed char __ATTRS_o_ai vec_abss(vector signed char __a) {
+static __inline__ vector signed char __ATTRS_o_ai
+vec_abss(vector signed char __a) {
   return __builtin_altivec_vmaxsb(
       __a, __builtin_altivec_vsubsbs((vector signed char)(0), __a));
 }
 
-static vector signed short __ATTRS_o_ai vec_abss(vector signed short __a) {
+static __inline__ vector signed short __ATTRS_o_ai
+vec_abss(vector signed short __a) {
   return __builtin_altivec_vmaxsh(
       __a, __builtin_altivec_vsubshs((vector signed short)(0), __a));
 }
 
-static vector signed int __ATTRS_o_ai vec_abss(vector signed int __a) {
+static __inline__ vector signed int __ATTRS_o_ai
+vec_abss(vector signed int __a) {
   return __builtin_altivec_vmaxsw(
       __a, __builtin_altivec_vsubsws((vector signed int)(0), __a));
 }
 
 /* vec_add */
 
-static vector signed char __ATTRS_o_ai vec_add(vector signed char __a,
-                                               vector signed char __b) {
+static __inline__ vector signed char __ATTRS_o_ai
+vec_add(vector signed char __a, vector signed char __b) {
   return __a + __b;
 }
 
-static vector signed char __ATTRS_o_ai vec_add(vector bool char __a,
-                                               vector signed char __b) {
+static __inline__ vector signed char __ATTRS_o_ai
+vec_add(vector bool char __a, vector signed char __b) {
   return (vector signed char)__a + __b;
 }
 
-static vector signed char __ATTRS_o_ai vec_add(vector signed char __a,
-                                               vector bool char __b) {
+static __inline__ vector signed char __ATTRS_o_ai
+vec_add(vector signed char __a, vector bool char __b) {
   return __a + (vector signed char)__b;
 }
 
-static vector unsigned char __ATTRS_o_ai vec_add(vector unsigned char __a,
-                                                 vector unsigned char __b) {
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_add(vector unsigned char __a, vector unsigned char __b) {
   return __a + __b;
 }
 
-static vector unsigned char __ATTRS_o_ai vec_add(vector bool char __a,
-                                                 vector unsigned char __b) {
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_add(vector bool char __a, vector unsigned char __b) {
   return (vector unsigned char)__a + __b;
 }
 
-static vector unsigned char __ATTRS_o_ai vec_add(vector unsigned char __a,
-                                                 vector bool char __b) {
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_add(vector unsigned char __a, vector bool char __b) {
   return __a + (vector unsigned char)__b;
 }
 
-static vector short __ATTRS_o_ai vec_add(vector short __a, vector short __b) {
+static __inline__ vector short __ATTRS_o_ai vec_add(vector short __a,
+                                                    vector short __b) {
   return __a + __b;
 }
 
-static vector short __ATTRS_o_ai vec_add(vector bool short __a,
-                                         vector short __b) {
+static __inline__ vector short __ATTRS_o_ai vec_add(vector bool short __a,
+                                                    vector short __b) {
   return (vector short)__a + __b;
 }
 
-static vector short __ATTRS_o_ai vec_add(vector short __a,
-                                         vector bool short __b) {
+static __inline__ vector short __ATTRS_o_ai vec_add(vector short __a,
+                                                    vector bool short __b) {
   return __a + (vector short)__b;
 }
 
-static vector unsigned short __ATTRS_o_ai vec_add(vector unsigned short __a,
-                                                  vector unsigned short __b) {
+static __inline__ vector unsigned short __ATTRS_o_ai
+vec_add(vector unsigned short __a, vector unsigned short __b) {
   return __a + __b;
 }
 
-static vector unsigned short __ATTRS_o_ai vec_add(vector bool short __a,
-                                                  vector unsigned short __b) {
+static __inline__ vector unsigned short __ATTRS_o_ai
+vec_add(vector bool short __a, vector unsigned short __b) {
   return (vector unsigned short)__a + __b;
 }
 
-static vector unsigned short __ATTRS_o_ai vec_add(vector unsigned short __a,
-                                                  vector bool short __b) {
+static __inline__ vector unsigned short __ATTRS_o_ai
+vec_add(vector unsigned short __a, vector bool short __b) {
   return __a + (vector unsigned short)__b;
 }
 
-static vector int __ATTRS_o_ai vec_add(vector int __a, vector int __b) {
+static __inline__ vector int __ATTRS_o_ai vec_add(vector int __a,
+                                                  vector int __b) {
   return __a + __b;
 }
 
-static vector int __ATTRS_o_ai vec_add(vector bool int __a, vector int __b) {
+static __inline__ vector int __ATTRS_o_ai vec_add(vector bool int __a,
+                                                  vector int __b) {
   return (vector int)__a + __b;
 }
 
-static vector int __ATTRS_o_ai vec_add(vector int __a, vector bool int __b) {
+static __inline__ vector int __ATTRS_o_ai vec_add(vector int __a,
+                                                  vector bool int __b) {
   return __a + (vector int)__b;
 }
 
-static vector unsigned int __ATTRS_o_ai vec_add(vector unsigned int __a,
-                                                vector unsigned int __b) {
+static __inline__ vector unsigned int __ATTRS_o_ai
+vec_add(vector unsigned int __a, vector unsigned int __b) {
   return __a + __b;
 }
 
-static vector unsigned int __ATTRS_o_ai vec_add(vector bool int __a,
-                                                vector unsigned int __b) {
+static __inline__ vector unsigned int __ATTRS_o_ai
+vec_add(vector bool int __a, vector unsigned int __b) {
   return (vector unsigned int)__a + __b;
 }
 
-static vector unsigned int __ATTRS_o_ai vec_add(vector unsigned int __a,
-                                                vector bool int __b) {
+static __inline__ vector unsigned int __ATTRS_o_ai
+vec_add(vector unsigned int __a, vector bool int __b) {
   return __a + (vector unsigned int)__b;
 }
 
 #if defined(__POWER8_VECTOR__) && defined(__powerpc64__)
-static vector signed long long __ATTRS_o_ai
+static __inline__ vector signed long long __ATTRS_o_ai
 vec_add(vector signed long long __a, vector signed long long __b) {
   return __a + __b;
 }
 
-static vector unsigned long long __ATTRS_o_ai
+static __inline__ vector unsigned long long __ATTRS_o_ai
 vec_add(vector unsigned long long __a, vector unsigned long long __b) {
   return __a + __b;
 }
 
-static vector signed __int128 __ATTRS_o_ai vec_add(vector signed __int128 __a,
-                                                   vector signed __int128 __b) {
+static __inline__ vector signed __int128 __ATTRS_o_ai
+vec_add(vector signed __int128 __a, vector signed __int128 __b) {
   return __a + __b;
 }
 
-static vector unsigned __int128 __ATTRS_o_ai
+static __inline__ vector unsigned __int128 __ATTRS_o_ai
 vec_add(vector unsigned __int128 __a, vector unsigned __int128 __b) {
   return __a + __b;
 }
 #endif // defined(__POWER8_VECTOR__) && defined(__powerpc64__)
 
-static vector float __ATTRS_o_ai vec_add(vector float __a, vector float __b) {
+static __inline__ vector float __ATTRS_o_ai vec_add(vector float __a,
+                                                    vector float __b) {
   return __a + __b;
 }
 
 #ifdef __VSX__
-static vector double __ATTRS_o_ai
-vec_add(vector double __a, vector double __b) {
+static __inline__ vector double __ATTRS_o_ai vec_add(vector double __a,
+                                                     vector double __b) {
   return __a + __b;
 }
 #endif // __VSX__
@@ -281,13 +292,13 @@ vec_add(vector double __a, vector double __b) {
 /* vec_adde */
 
 #if defined(__POWER8_VECTOR__) && defined(__powerpc64__)
-static vector signed __int128 __ATTRS_o_ai
+static __inline__ vector signed __int128 __ATTRS_o_ai
 vec_adde(vector signed __int128 __a, vector signed __int128 __b,
          vector signed __int128 __c) {
   return __builtin_altivec_vaddeuqm(__a, __b, __c);
 }
 
-static vector unsigned __int128 __ATTRS_o_ai
+static __inline__ vector unsigned __int128 __ATTRS_o_ai
 vec_adde(vector unsigned __int128 __a, vector unsigned __int128 __b,
          vector unsigned __int128 __c) {
   return __builtin_altivec_vaddeuqm(__a, __b, __c);
@@ -297,13 +308,13 @@ vec_adde(vector unsigned __int128 __a, vector unsigned __int128 __b,
 /* vec_addec */
 
 #if defined(__POWER8_VECTOR__) && defined(__powerpc64__)
-static vector signed __int128 __ATTRS_o_ai
+static __inline__ vector signed __int128 __ATTRS_o_ai
 vec_addec(vector signed __int128 __a, vector signed __int128 __b,
           vector signed __int128 __c) {
   return __builtin_altivec_vaddecuq(__a, __b, __c);
 }
 
-static vector unsigned __int128 __ATTRS_o_ai
+static __inline__ vector unsigned __int128 __ATTRS_o_ai
 vec_addec(vector unsigned __int128 __a, vector unsigned __int128 __b,
           vector unsigned __int128 __c) {
   return __builtin_altivec_vaddecuq(__a, __b, __c);
@@ -314,33 +325,33 @@ vec_addec(vector unsigned __int128 __a, vector unsigned __int128 __b,
 
 #define __builtin_altivec_vaddubm vec_vaddubm
 
-static vector signed char __ATTRS_o_ai vec_vaddubm(vector signed char __a,
-                                                   vector signed char __b) {
+static __inline__ vector signed char __ATTRS_o_ai
+vec_vaddubm(vector signed char __a, vector signed char __b) {
   return __a + __b;
 }
 
-static vector signed char __ATTRS_o_ai vec_vaddubm(vector bool char __a,
-                                                   vector signed char __b) {
+static __inline__ vector signed char __ATTRS_o_ai
+vec_vaddubm(vector bool char __a, vector signed char __b) {
   return (vector signed char)__a + __b;
 }
 
-static vector signed char __ATTRS_o_ai vec_vaddubm(vector signed char __a,
-                                                   vector bool char __b) {
+static __inline__ vector signed char __ATTRS_o_ai
+vec_vaddubm(vector signed char __a, vector bool char __b) {
   return __a + (vector signed char)__b;
 }
 
-static vector unsigned char __ATTRS_o_ai vec_vaddubm(vector unsigned char __a,
-                                                     vector unsigned char __b) {
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_vaddubm(vector unsigned char __a, vector unsigned char __b) {
   return __a + __b;
 }
 
-static vector unsigned char __ATTRS_o_ai vec_vaddubm(vector bool char __a,
-                                                     vector unsigned char __b) {
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_vaddubm(vector bool char __a, vector unsigned char __b) {
   return (vector unsigned char)__a + __b;
 }
 
-static vector unsigned char __ATTRS_o_ai vec_vaddubm(vector unsigned char __a,
-                                                     vector bool char __b) {
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_vaddubm(vector unsigned char __a, vector bool char __b) {
   return __a + (vector unsigned char)__b;
 }
 
@@ -348,33 +359,33 @@ static vector unsigned char __ATTRS_o_ai vec_vaddubm(vector unsigned char __a,
 
 #define __builtin_altivec_vadduhm vec_vadduhm
 
-static vector short __ATTRS_o_ai vec_vadduhm(vector short __a,
-                                             vector short __b) {
+static __inline__ vector short __ATTRS_o_ai vec_vadduhm(vector short __a,
+                                                        vector short __b) {
   return __a + __b;
 }
 
-static vector short __ATTRS_o_ai vec_vadduhm(vector bool short __a,
-                                             vector short __b) {
+static __inline__ vector short __ATTRS_o_ai vec_vadduhm(vector bool short __a,
+                                                        vector short __b) {
   return (vector short)__a + __b;
 }
 
-static vector short __ATTRS_o_ai vec_vadduhm(vector short __a,
-                                             vector bool short __b) {
+static __inline__ vector short __ATTRS_o_ai vec_vadduhm(vector short __a,
+                                                        vector bool short __b) {
   return __a + (vector short)__b;
 }
 
-static vector unsigned short __ATTRS_o_ai
+static __inline__ vector unsigned short __ATTRS_o_ai
 vec_vadduhm(vector unsigned short __a, vector unsigned short __b) {
   return __a + __b;
 }
 
-static vector unsigned short __ATTRS_o_ai
+static __inline__ vector unsigned short __ATTRS_o_ai
 vec_vadduhm(vector bool short __a, vector unsigned short __b) {
   return (vector unsigned short)__a + __b;
 }
 
-static vector unsigned short __ATTRS_o_ai vec_vadduhm(vector unsigned short __a,
-                                                      vector bool short __b) {
+static __inline__ vector unsigned short __ATTRS_o_ai
+vec_vadduhm(vector unsigned short __a, vector bool short __b) {
   return __a + (vector unsigned short)__b;
 }
 
@@ -382,32 +393,33 @@ static vector unsigned short __ATTRS_o_ai vec_vadduhm(vector unsigned short __a,
 
 #define __builtin_altivec_vadduwm vec_vadduwm
 
-static vector int __ATTRS_o_ai vec_vadduwm(vector int __a, vector int __b) {
+static __inline__ vector int __ATTRS_o_ai vec_vadduwm(vector int __a,
+                                                      vector int __b) {
   return __a + __b;
 }
 
-static vector int __ATTRS_o_ai vec_vadduwm(vector bool int __a,
-                                           vector int __b) {
+static __inline__ vector int __ATTRS_o_ai vec_vadduwm(vector bool int __a,
+                                                      vector int __b) {
   return (vector int)__a + __b;
 }
 
-static vector int __ATTRS_o_ai vec_vadduwm(vector int __a,
-                                           vector bool int __b) {
+static __inline__ vector int __ATTRS_o_ai vec_vadduwm(vector int __a,
+                                                      vector bool int __b) {
   return __a + (vector int)__b;
 }
 
-static vector unsigned int __ATTRS_o_ai vec_vadduwm(vector unsigned int __a,
-                                                    vector unsigned int __b) {
+static __inline__ vector unsigned int __ATTRS_o_ai
+vec_vadduwm(vector unsigned int __a, vector unsigned int __b) {
   return __a + __b;
 }
 
-static vector unsigned int __ATTRS_o_ai vec_vadduwm(vector bool int __a,
-                                                    vector unsigned int __b) {
+static __inline__ vector unsigned int __ATTRS_o_ai
+vec_vadduwm(vector bool int __a, vector unsigned int __b) {
   return (vector unsigned int)__a + __b;
 }
 
-static vector unsigned int __ATTRS_o_ai vec_vadduwm(vector unsigned int __a,
-                                                    vector bool int __b) {
+static __inline__ vector unsigned int __ATTRS_o_ai
+vec_vadduwm(vector unsigned int __a, vector bool int __b) {
   return __a + (vector unsigned int)__b;
 }
 
@@ -415,33 +427,32 @@ static vector unsigned int __ATTRS_o_ai vec_vadduwm(vector unsigned int __a,
 
 #define __builtin_altivec_vaddfp vec_vaddfp
 
-static vector float __attribute__((__always_inline__))
+static __inline__ vector float __attribute__((__always_inline__))
 vec_vaddfp(vector float __a, vector float __b) {
   return __a + __b;
 }
 
 /* vec_addc */
 
-static vector signed int __ATTRS_o_ai vec_addc(vector signed int __a,
-                                               vector signed int __b) {
+static __inline__ vector signed int __ATTRS_o_ai
+vec_addc(vector signed int __a, vector signed int __b) {
   return (vector signed int)__builtin_altivec_vaddcuw((vector unsigned int)__a,
                                                       (vector unsigned int)__b);
 }
 
-static vector unsigned int __ATTRS_o_ai vec_addc(vector unsigned int __a,
-                                                 vector unsigned int __b) {
+static __inline__ vector unsigned int __ATTRS_o_ai
+vec_addc(vector unsigned int __a, vector unsigned int __b) {
   return __builtin_altivec_vaddcuw(__a, __b);
 }
 
 #if defined(__POWER8_VECTOR__) && defined(__powerpc64__)
-static vector signed __int128 __ATTRS_o_ai
+static __inline__ vector signed __int128 __ATTRS_o_ai
 vec_addc(vector signed __int128 __a, vector signed __int128 __b) {
   return (vector signed __int128)__builtin_altivec_vaddcuq(
-    (vector unsigned __int128)__a,
-    (vector unsigned __int128)__b);
+      (vector unsigned __int128)__a, (vector unsigned __int128)__b);
 }
 
-static vector unsigned __int128 __ATTRS_o_ai
+static __inline__ vector unsigned __int128 __ATTRS_o_ai
 vec_addc(vector unsigned __int128 __a, vector unsigned __int128 __b) {
   return __builtin_altivec_vaddcuq(__a, __b);
 }
@@ -449,222 +460,227 @@ vec_addc(vector unsigned __int128 __a, vector unsigned __int128 __b) {
 
 /* vec_vaddcuw */
 
-static vector unsigned int __attribute__((__always_inline__))
+static __inline__ vector unsigned int __attribute__((__always_inline__))
 vec_vaddcuw(vector unsigned int __a, vector unsigned int __b) {
   return __builtin_altivec_vaddcuw(__a, __b);
 }
 
 /* vec_adds */
 
-static vector signed char __ATTRS_o_ai vec_adds(vector signed char __a,
-                                                vector signed char __b) {
+static __inline__ vector signed char __ATTRS_o_ai
+vec_adds(vector signed char __a, vector signed char __b) {
   return __builtin_altivec_vaddsbs(__a, __b);
 }
 
-static vector signed char __ATTRS_o_ai vec_adds(vector bool char __a,
-                                                vector signed char __b) {
+static __inline__ vector signed char __ATTRS_o_ai
+vec_adds(vector bool char __a, vector signed char __b) {
   return __builtin_altivec_vaddsbs((vector signed char)__a, __b);
 }
 
-static vector signed char __ATTRS_o_ai vec_adds(vector signed char __a,
-                                                vector bool char __b) {
+static __inline__ vector signed char __ATTRS_o_ai
+vec_adds(vector signed char __a, vector bool char __b) {
   return __builtin_altivec_vaddsbs(__a, (vector signed char)__b);
 }
 
-static vector unsigned char __ATTRS_o_ai vec_adds(vector unsigned char __a,
-                                                  vector unsigned char __b) {
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_adds(vector unsigned char __a, vector unsigned char __b) {
   return __builtin_altivec_vaddubs(__a, __b);
 }
 
-static vector unsigned char __ATTRS_o_ai vec_adds(vector bool char __a,
-                                                  vector unsigned char __b) {
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_adds(vector bool char __a, vector unsigned char __b) {
   return __builtin_altivec_vaddubs((vector unsigned char)__a, __b);
 }
 
-static vector unsigned char __ATTRS_o_ai vec_adds(vector unsigned char __a,
-                                                  vector bool char __b) {
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_adds(vector unsigned char __a, vector bool char __b) {
   return __builtin_altivec_vaddubs(__a, (vector unsigned char)__b);
 }
 
-static vector short __ATTRS_o_ai vec_adds(vector short __a, vector short __b) {
+static __inline__ vector short __ATTRS_o_ai vec_adds(vector short __a,
+                                                     vector short __b) {
   return __builtin_altivec_vaddshs(__a, __b);
 }
 
-static vector short __ATTRS_o_ai vec_adds(vector bool short __a,
-                                          vector short __b) {
+static __inline__ vector short __ATTRS_o_ai vec_adds(vector bool short __a,
+                                                     vector short __b) {
   return __builtin_altivec_vaddshs((vector short)__a, __b);
 }
 
-static vector short __ATTRS_o_ai vec_adds(vector short __a,
-                                          vector bool short __b) {
+static __inline__ vector short __ATTRS_o_ai vec_adds(vector short __a,
+                                                     vector bool short __b) {
   return __builtin_altivec_vaddshs(__a, (vector short)__b);
 }
 
-static vector unsigned short __ATTRS_o_ai vec_adds(vector unsigned short __a,
-                                                   vector unsigned short __b) {
+static __inline__ vector unsigned short __ATTRS_o_ai
+vec_adds(vector unsigned short __a, vector unsigned short __b) {
   return __builtin_altivec_vadduhs(__a, __b);
 }
 
-static vector unsigned short __ATTRS_o_ai vec_adds(vector bool short __a,
-                                                   vector unsigned short __b) {
+static __inline__ vector unsigned short __ATTRS_o_ai
+vec_adds(vector bool short __a, vector unsigned short __b) {
   return __builtin_altivec_vadduhs((vector unsigned short)__a, __b);
 }
 
-static vector unsigned short __ATTRS_o_ai vec_adds(vector unsigned short __a,
-                                                   vector bool short __b) {
+static __inline__ vector unsigned short __ATTRS_o_ai
+vec_adds(vector unsigned short __a, vector bool short __b) {
   return __builtin_altivec_vadduhs(__a, (vector unsigned short)__b);
 }
 
-static vector int __ATTRS_o_ai vec_adds(vector int __a, vector int __b) {
+static __inline__ vector int __ATTRS_o_ai vec_adds(vector int __a,
+                                                   vector int __b) {
   return __builtin_altivec_vaddsws(__a, __b);
 }
 
-static vector int __ATTRS_o_ai vec_adds(vector bool int __a, vector int __b) {
+static __inline__ vector int __ATTRS_o_ai vec_adds(vector bool int __a,
+                                                   vector int __b) {
   return __builtin_altivec_vaddsws((vector int)__a, __b);
 }
 
-static vector int __ATTRS_o_ai vec_adds(vector int __a, vector bool int __b) {
+static __inline__ vector int __ATTRS_o_ai vec_adds(vector int __a,
+                                                   vector bool int __b) {
   return __builtin_altivec_vaddsws(__a, (vector int)__b);
 }
 
-static vector unsigned int __ATTRS_o_ai vec_adds(vector unsigned int __a,
-                                                 vector unsigned int __b) {
+static __inline__ vector unsigned int __ATTRS_o_ai
+vec_adds(vector unsigned int __a, vector unsigned int __b) {
   return __builtin_altivec_vadduws(__a, __b);
 }
 
-static vector unsigned int __ATTRS_o_ai vec_adds(vector bool int __a,
-                                                 vector unsigned int __b) {
+static __inline__ vector unsigned int __ATTRS_o_ai
+vec_adds(vector bool int __a, vector unsigned int __b) {
   return __builtin_altivec_vadduws((vector unsigned int)__a, __b);
 }
 
-static vector unsigned int __ATTRS_o_ai vec_adds(vector unsigned int __a,
-                                                 vector bool int __b) {
+static __inline__ vector unsigned int __ATTRS_o_ai
+vec_adds(vector unsigned int __a, vector bool int __b) {
   return __builtin_altivec_vadduws(__a, (vector unsigned int)__b);
 }
 
 /* vec_vaddsbs */
 
-static vector signed char __ATTRS_o_ai vec_vaddsbs(vector signed char __a,
-                                                   vector signed char __b) {
+static __inline__ vector signed char __ATTRS_o_ai
+vec_vaddsbs(vector signed char __a, vector signed char __b) {
   return __builtin_altivec_vaddsbs(__a, __b);
 }
 
-static vector signed char __ATTRS_o_ai vec_vaddsbs(vector bool char __a,
-                                                   vector signed char __b) {
+static __inline__ vector signed char __ATTRS_o_ai
+vec_vaddsbs(vector bool char __a, vector signed char __b) {
   return __builtin_altivec_vaddsbs((vector signed char)__a, __b);
 }
 
-static vector signed char __ATTRS_o_ai vec_vaddsbs(vector signed char __a,
-                                                   vector bool char __b) {
+static __inline__ vector signed char __ATTRS_o_ai
+vec_vaddsbs(vector signed char __a, vector bool char __b) {
   return __builtin_altivec_vaddsbs(__a, (vector signed char)__b);
 }
 
 /* vec_vaddubs */
 
-static vector unsigned char __ATTRS_o_ai vec_vaddubs(vector unsigned char __a,
-                                                     vector unsigned char __b) {
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_vaddubs(vector unsigned char __a, vector unsigned char __b) {
   return __builtin_altivec_vaddubs(__a, __b);
 }
 
-static vector unsigned char __ATTRS_o_ai vec_vaddubs(vector bool char __a,
-                                                     vector unsigned char __b) {
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_vaddubs(vector bool char __a, vector unsigned char __b) {
   return __builtin_altivec_vaddubs((vector unsigned char)__a, __b);
 }
 
-static vector unsigned char __ATTRS_o_ai vec_vaddubs(vector unsigned char __a,
-                                                     vector bool char __b) {
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_vaddubs(vector unsigned char __a, vector bool char __b) {
   return __builtin_altivec_vaddubs(__a, (vector unsigned char)__b);
 }
 
 /* vec_vaddshs */
 
-static vector short __ATTRS_o_ai vec_vaddshs(vector short __a,
-                                             vector short __b) {
+static __inline__ vector short __ATTRS_o_ai vec_vaddshs(vector short __a,
+                                                        vector short __b) {
   return __builtin_altivec_vaddshs(__a, __b);
 }
 
-static vector short __ATTRS_o_ai vec_vaddshs(vector bool short __a,
-                                             vector short __b) {
+static __inline__ vector short __ATTRS_o_ai vec_vaddshs(vector bool short __a,
+                                                        vector short __b) {
   return __builtin_altivec_vaddshs((vector short)__a, __b);
 }
 
-static vector short __ATTRS_o_ai vec_vaddshs(vector short __a,
-                                             vector bool short __b) {
+static __inline__ vector short __ATTRS_o_ai vec_vaddshs(vector short __a,
+                                                        vector bool short __b) {
   return __builtin_altivec_vaddshs(__a, (vector short)__b);
 }
 
 /* vec_vadduhs */
 
-static vector unsigned short __ATTRS_o_ai
+static __inline__ vector unsigned short __ATTRS_o_ai
 vec_vadduhs(vector unsigned short __a, vector unsigned short __b) {
   return __builtin_altivec_vadduhs(__a, __b);
 }
 
-static vector unsigned short __ATTRS_o_ai
+static __inline__ vector unsigned short __ATTRS_o_ai
 vec_vadduhs(vector bool short __a, vector unsigned short __b) {
   return __builtin_altivec_vadduhs((vector unsigned short)__a, __b);
 }
 
-static vector unsigned short __ATTRS_o_ai vec_vadduhs(vector unsigned short __a,
-                                                      vector bool short __b) {
+static __inline__ vector unsigned short __ATTRS_o_ai
+vec_vadduhs(vector unsigned short __a, vector bool short __b) {
   return __builtin_altivec_vadduhs(__a, (vector unsigned short)__b);
 }
 
 /* vec_vaddsws */
 
-static vector int __ATTRS_o_ai vec_vaddsws(vector int __a, vector int __b) {
+static __inline__ vector int __ATTRS_o_ai vec_vaddsws(vector int __a,
+                                                      vector int __b) {
   return __builtin_altivec_vaddsws(__a, __b);
 }
 
-static vector int __ATTRS_o_ai vec_vaddsws(vector bool int __a,
-                                           vector int __b) {
+static __inline__ vector int __ATTRS_o_ai vec_vaddsws(vector bool int __a,
+                                                      vector int __b) {
   return __builtin_altivec_vaddsws((vector int)__a, __b);
 }
 
-static vector int __ATTRS_o_ai vec_vaddsws(vector int __a,
-                                           vector bool int __b) {
+static __inline__ vector int __ATTRS_o_ai vec_vaddsws(vector int __a,
+                                                      vector bool int __b) {
   return __builtin_altivec_vaddsws(__a, (vector int)__b);
 }
 
 /* vec_vadduws */
 
-static vector unsigned int __ATTRS_o_ai vec_vadduws(vector unsigned int __a,
-                                                    vector unsigned int __b) {
+static __inline__ vector unsigned int __ATTRS_o_ai
+vec_vadduws(vector unsigned int __a, vector unsigned int __b) {
   return __builtin_altivec_vadduws(__a, __b);
 }
 
-static vector unsigned int __ATTRS_o_ai vec_vadduws(vector bool int __a,
-                                                    vector unsigned int __b) {
+static __inline__ vector unsigned int __ATTRS_o_ai
+vec_vadduws(vector bool int __a, vector unsigned int __b) {
   return __builtin_altivec_vadduws((vector unsigned int)__a, __b);
 }
 
-static vector unsigned int __ATTRS_o_ai vec_vadduws(vector unsigned int __a,
-                                                    vector bool int __b) {
+static __inline__ vector unsigned int __ATTRS_o_ai
+vec_vadduws(vector unsigned int __a, vector bool int __b) {
   return __builtin_altivec_vadduws(__a, (vector unsigned int)__b);
 }
 
 #if defined(__POWER8_VECTOR__) && defined(__powerpc64__)
 /* vec_vadduqm */
 
-static vector signed __int128 __ATTRS_o_ai
+static __inline__ vector signed __int128 __ATTRS_o_ai
 vec_vadduqm(vector signed __int128 __a, vector signed __int128 __b) {
   return __a + __b;
 }
 
-static vector unsigned __int128 __ATTRS_o_ai
+static __inline__ vector unsigned __int128 __ATTRS_o_ai
 vec_vadduqm(vector unsigned __int128 __a, vector unsigned __int128 __b) {
   return __a + __b;
 }
 
 /* vec_vaddeuqm */
 
-static vector signed __int128 __ATTRS_o_ai
+static __inline__ vector signed __int128 __ATTRS_o_ai
 vec_vaddeuqm(vector signed __int128 __a, vector signed __int128 __b,
              vector signed __int128 __c) {
   return __builtin_altivec_vaddeuqm(__a, __b, __c);
 }
 
-static vector unsigned __int128 __ATTRS_o_ai
+static __inline__ vector unsigned __int128 __ATTRS_o_ai
 vec_vaddeuqm(vector unsigned __int128 __a, vector unsigned __int128 __b,
              vector unsigned __int128 __c) {
   return __builtin_altivec_vaddeuqm(__a, __b, __c);
@@ -672,25 +688,25 @@ vec_vaddeuqm(vector unsigned __int128 __a, vector unsigned __int128 __b,
 
 /* vec_vaddcuq */
 
-static vector signed __int128 __ATTRS_o_ai
+static __inline__ vector signed __int128 __ATTRS_o_ai
 vec_vaddcuq(vector signed __int128 __a, vector signed __int128 __b) {
   return __builtin_altivec_vaddcuq(__a, __b);
 }
 
-static vector unsigned __int128 __ATTRS_o_ai
+static __inline__ vector unsigned __int128 __ATTRS_o_ai
 vec_vaddcuq(vector unsigned __int128 __a, vector unsigned __int128 __b) {
   return __builtin_altivec_vaddcuq(__a, __b);
 }
 
 /* vec_vaddecuq */
 
-static vector signed __int128 __ATTRS_o_ai
+static __inline__ vector signed __int128 __ATTRS_o_ai
 vec_vaddecuq(vector signed __int128 __a, vector signed __int128 __b,
              vector signed __int128 __c) {
   return __builtin_altivec_vaddecuq(__a, __b, __c);
 }
 
-static vector unsigned __int128 __ATTRS_o_ai
+static __inline__ vector unsigned __int128 __ATTRS_o_ai
 vec_vaddecuq(vector unsigned __int128 __a, vector unsigned __int128 __b,
              vector unsigned __int128 __c) {
   return __builtin_altivec_vaddecuq(__a, __b, __c);
@@ -701,338 +717,351 @@ vec_vaddecuq(vector unsigned __int128 __a, vector unsigned __int128 __b,
 
 #define __builtin_altivec_vand vec_and
 
-static vector signed char __ATTRS_o_ai vec_and(vector signed char __a,
-                                               vector signed char __b) {
+static __inline__ vector signed char __ATTRS_o_ai
+vec_and(vector signed char __a, vector signed char __b) {
   return __a & __b;
 }
 
-static vector signed char __ATTRS_o_ai vec_and(vector bool char __a,
-                                               vector signed char __b) {
+static __inline__ vector signed char __ATTRS_o_ai
+vec_and(vector bool char __a, vector signed char __b) {
   return (vector signed char)__a & __b;
 }
 
-static vector signed char __ATTRS_o_ai vec_and(vector signed char __a,
-                                               vector bool char __b) {
+static __inline__ vector signed char __ATTRS_o_ai
+vec_and(vector signed char __a, vector bool char __b) {
   return __a & (vector signed char)__b;
 }
 
-static vector unsigned char __ATTRS_o_ai vec_and(vector unsigned char __a,
-                                                 vector unsigned char __b) {
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_and(vector unsigned char __a, vector unsigned char __b) {
   return __a & __b;
 }
 
-static vector unsigned char __ATTRS_o_ai vec_and(vector bool char __a,
-                                                 vector unsigned char __b) {
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_and(vector bool char __a, vector unsigned char __b) {
   return (vector unsigned char)__a & __b;
 }
 
-static vector unsigned char __ATTRS_o_ai vec_and(vector unsigned char __a,
-                                                 vector bool char __b) {
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_and(vector unsigned char __a, vector bool char __b) {
   return __a & (vector unsigned char)__b;
 }
 
-static vector bool char __ATTRS_o_ai vec_and(vector bool char __a,
-                                             vector bool char __b) {
+static __inline__ vector bool char __ATTRS_o_ai vec_and(vector bool char __a,
+                                                        vector bool char __b) {
   return __a & __b;
 }
 
-static vector short __ATTRS_o_ai vec_and(vector short __a, vector short __b) {
+static __inline__ vector short __ATTRS_o_ai vec_and(vector short __a,
+                                                    vector short __b) {
   return __a & __b;
 }
 
-static vector short __ATTRS_o_ai vec_and(vector bool short __a,
-                                         vector short __b) {
+static __inline__ vector short __ATTRS_o_ai vec_and(vector bool short __a,
+                                                    vector short __b) {
   return (vector short)__a & __b;
 }
 
-static vector short __ATTRS_o_ai vec_and(vector short __a,
-                                         vector bool short __b) {
+static __inline__ vector short __ATTRS_o_ai vec_and(vector short __a,
+                                                    vector bool short __b) {
   return __a & (vector short)__b;
 }
 
-static vector unsigned short __ATTRS_o_ai vec_and(vector unsigned short __a,
-                                                  vector unsigned short __b) {
+static __inline__ vector unsigned short __ATTRS_o_ai
+vec_and(vector unsigned short __a, vector unsigned short __b) {
   return __a & __b;
 }
 
-static vector unsigned short __ATTRS_o_ai vec_and(vector bool short __a,
-                                                  vector unsigned short __b) {
+static __inline__ vector unsigned short __ATTRS_o_ai
+vec_and(vector bool short __a, vector unsigned short __b) {
   return (vector unsigned short)__a & __b;
 }
 
-static vector unsigned short __ATTRS_o_ai vec_and(vector unsigned short __a,
-                                                  vector bool short __b) {
+static __inline__ vector unsigned short __ATTRS_o_ai
+vec_and(vector unsigned short __a, vector bool short __b) {
   return __a & (vector unsigned short)__b;
 }
 
-static vector bool short __ATTRS_o_ai vec_and(vector bool short __a,
-                                              vector bool short __b) {
+static __inline__ vector bool short __ATTRS_o_ai
+vec_and(vector bool short __a, vector bool short __b) {
   return __a & __b;
 }
 
-static vector int __ATTRS_o_ai vec_and(vector int __a, vector int __b) {
+static __inline__ vector int __ATTRS_o_ai vec_and(vector int __a,
+                                                  vector int __b) {
   return __a & __b;
 }
 
-static vector int __ATTRS_o_ai vec_and(vector bool int __a, vector int __b) {
+static __inline__ vector int __ATTRS_o_ai vec_and(vector bool int __a,
+                                                  vector int __b) {
   return (vector int)__a & __b;
 }
 
-static vector int __ATTRS_o_ai vec_and(vector int __a, vector bool int __b) {
+static __inline__ vector int __ATTRS_o_ai vec_and(vector int __a,
+                                                  vector bool int __b) {
   return __a & (vector int)__b;
 }
 
-static vector unsigned int __ATTRS_o_ai vec_and(vector unsigned int __a,
-                                                vector unsigned int __b) {
+static __inline__ vector unsigned int __ATTRS_o_ai
+vec_and(vector unsigned int __a, vector unsigned int __b) {
   return __a & __b;
 }
 
-static vector unsigned int __ATTRS_o_ai vec_and(vector bool int __a,
-                                                vector unsigned int __b) {
+static __inline__ vector unsigned int __ATTRS_o_ai
+vec_and(vector bool int __a, vector unsigned int __b) {
   return (vector unsigned int)__a & __b;
 }
 
-static vector unsigned int __ATTRS_o_ai vec_and(vector unsigned int __a,
-                                                vector bool int __b) {
+static __inline__ vector unsigned int __ATTRS_o_ai
+vec_and(vector unsigned int __a, vector bool int __b) {
   return __a & (vector unsigned int)__b;
 }
 
-static vector bool int __ATTRS_o_ai vec_and(vector bool int __a,
-                                            vector bool int __b) {
+static __inline__ vector bool int __ATTRS_o_ai vec_and(vector bool int __a,
+                                                       vector bool int __b) {
   return __a & __b;
 }
 
-static vector float __ATTRS_o_ai vec_and(vector float __a, vector float __b) {
+static __inline__ vector float __ATTRS_o_ai vec_and(vector float __a,
+                                                    vector float __b) {
   vector unsigned int __res =
       (vector unsigned int)__a & (vector unsigned int)__b;
   return (vector float)__res;
 }
 
-static vector float __ATTRS_o_ai vec_and(vector bool int __a,
-                                         vector float __b) {
+static __inline__ vector float __ATTRS_o_ai vec_and(vector bool int __a,
+                                                    vector float __b) {
   vector unsigned int __res =
       (vector unsigned int)__a & (vector unsigned int)__b;
   return (vector float)__res;
 }
 
-static vector float __ATTRS_o_ai vec_and(vector float __a,
-                                         vector bool int __b) {
+static __inline__ vector float __ATTRS_o_ai vec_and(vector float __a,
+                                                    vector bool int __b) {
   vector unsigned int __res =
       (vector unsigned int)__a & (vector unsigned int)__b;
   return (vector float)__res;
 }
 
 #ifdef __VSX__
-static vector double __ATTRS_o_ai vec_and(vector bool long long __a, vector double __b) {
+static __inline__ vector double __ATTRS_o_ai vec_and(vector bool long long __a,
+                                                     vector double __b) {
   vector unsigned long long __res =
       (vector unsigned long long)__a & (vector unsigned long long)__b;
   return (vector double)__res;
 }
 
-static vector double __ATTRS_o_ai vec_and(vector double __a, vector bool long long __b) {
+static __inline__ vector double __ATTRS_o_ai
+vec_and(vector double __a, vector bool long long __b) {
   vector unsigned long long __res =
       (vector unsigned long long)__a & (vector unsigned long long)__b;
   return (vector double)__res;
 }
 
-static vector double __ATTRS_o_ai vec_and(vector double __a, vector double __b) {
+static __inline__ vector double __ATTRS_o_ai vec_and(vector double __a,
+                                                     vector double __b) {
   vector unsigned long long __res =
       (vector unsigned long long)__a & (vector unsigned long long)__b;
   return (vector double)__res;
 }
 
-static vector signed long long __ATTRS_o_ai
+static __inline__ vector signed long long __ATTRS_o_ai
 vec_and(vector signed long long __a, vector signed long long __b) {
   return __a & __b;
 }
 
-static vector signed long long __ATTRS_o_ai
+static __inline__ vector signed long long __ATTRS_o_ai
 vec_and(vector bool long long __a, vector signed long long __b) {
   return (vector signed long long)__a & __b;
 }
 
-static vector signed long long __ATTRS_o_ai vec_and(vector signed long long __a,
-                                                    vector bool long long __b) {
+static __inline__ vector signed long long __ATTRS_o_ai
+vec_and(vector signed long long __a, vector bool long long __b) {
   return __a & (vector signed long long)__b;
 }
 
-static vector unsigned long long __ATTRS_o_ai
+static __inline__ vector unsigned long long __ATTRS_o_ai
 vec_and(vector unsigned long long __a, vector unsigned long long __b) {
   return __a & __b;
 }
 
-static vector unsigned long long __ATTRS_o_ai
+static __inline__ vector unsigned long long __ATTRS_o_ai
 vec_and(vector bool long long __a, vector unsigned long long __b) {
   return (vector unsigned long long)__a & __b;
 }
 
-static vector unsigned long long __ATTRS_o_ai
+static __inline__ vector unsigned long long __ATTRS_o_ai
 vec_and(vector unsigned long long __a, vector bool long long __b) {
   return __a & (vector unsigned long long)__b;
 }
 
-static vector bool long long __ATTRS_o_ai vec_and(vector bool long long __a,
-                                                  vector bool long long __b) {
+static __inline__ vector bool long long __ATTRS_o_ai
+vec_and(vector bool long long __a, vector bool long long __b) {
   return __a & __b;
 }
 #endif
 
 /* vec_vand */
 
-static vector signed char __ATTRS_o_ai vec_vand(vector signed char __a,
-                                                vector signed char __b) {
+static __inline__ vector signed char __ATTRS_o_ai
+vec_vand(vector signed char __a, vector signed char __b) {
   return __a & __b;
 }
 
-static vector signed char __ATTRS_o_ai vec_vand(vector bool char __a,
-                                                vector signed char __b) {
+static __inline__ vector signed char __ATTRS_o_ai
+vec_vand(vector bool char __a, vector signed char __b) {
   return (vector signed char)__a & __b;
 }
 
-static vector signed char __ATTRS_o_ai vec_vand(vector signed char __a,
-                                                vector bool char __b) {
+static __inline__ vector signed char __ATTRS_o_ai
+vec_vand(vector signed char __a, vector bool char __b) {
   return __a & (vector signed char)__b;
 }
 
-static vector unsigned char __ATTRS_o_ai vec_vand(vector unsigned char __a,
-                                                  vector unsigned char __b) {
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_vand(vector unsigned char __a, vector unsigned char __b) {
   return __a & __b;
 }
 
-static vector unsigned char __ATTRS_o_ai vec_vand(vector bool char __a,
-                                                  vector unsigned char __b) {
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_vand(vector bool char __a, vector unsigned char __b) {
   return (vector unsigned char)__a & __b;
 }
 
-static vector unsigned char __ATTRS_o_ai vec_vand(vector unsigned char __a,
-                                                  vector bool char __b) {
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_vand(vector unsigned char __a, vector bool char __b) {
   return __a & (vector unsigned char)__b;
 }
 
-static vector bool char __ATTRS_o_ai vec_vand(vector bool char __a,
-                                              vector bool char __b) {
+static __inline__ vector bool char __ATTRS_o_ai vec_vand(vector bool char __a,
+                                                         vector bool char __b) {
   return __a & __b;
 }
 
-static vector short __ATTRS_o_ai vec_vand(vector short __a, vector short __b) {
+static __inline__ vector short __ATTRS_o_ai vec_vand(vector short __a,
+                                                     vector short __b) {
   return __a & __b;
 }
 
-static vector short __ATTRS_o_ai vec_vand(vector bool short __a,
-                                          vector short __b) {
+static __inline__ vector short __ATTRS_o_ai vec_vand(vector bool short __a,
+                                                     vector short __b) {
   return (vector short)__a & __b;
 }
 
-static vector short __ATTRS_o_ai vec_vand(vector short __a,
-                                          vector bool short __b) {
+static __inline__ vector short __ATTRS_o_ai vec_vand(vector short __a,
+                                                     vector bool short __b) {
   return __a & (vector short)__b;
 }
 
-static vector unsigned short __ATTRS_o_ai vec_vand(vector unsigned short __a,
-                                                   vector unsigned short __b) {
+static __inline__ vector unsigned short __ATTRS_o_ai
+vec_vand(vector unsigned short __a, vector unsigned short __b) {
   return __a & __b;
 }
 
-static vector unsigned short __ATTRS_o_ai vec_vand(vector bool short __a,
-                                                   vector unsigned short __b) {
+static __inline__ vector unsigned short __ATTRS_o_ai
+vec_vand(vector bool short __a, vector unsigned short __b) {
   return (vector unsigned short)__a & __b;
 }
 
-static vector unsigned short __ATTRS_o_ai vec_vand(vector unsigned short __a,
-                                                   vector bool short __b) {
+static __inline__ vector unsigned short __ATTRS_o_ai
+vec_vand(vector unsigned short __a, vector bool short __b) {
   return __a & (vector unsigned short)__b;
 }
 
-static vector bool short __ATTRS_o_ai vec_vand(vector bool short __a,
-                                               vector bool short __b) {
+static __inline__ vector bool short __ATTRS_o_ai
+vec_vand(vector bool short __a, vector bool short __b) {
   return __a & __b;
 }
 
-static vector int __ATTRS_o_ai vec_vand(vector int __a, vector int __b) {
+static __inline__ vector int __ATTRS_o_ai vec_vand(vector int __a,
+                                                   vector int __b) {
   return __a & __b;
 }
 
-static vector int __ATTRS_o_ai vec_vand(vector bool int __a, vector int __b) {
+static __inline__ vector int __ATTRS_o_ai vec_vand(vector bool int __a,
+                                                   vector int __b) {
   return (vector int)__a & __b;
 }
 
-static vector int __ATTRS_o_ai vec_vand(vector int __a, vector bool int __b) {
+static __inline__ vector int __ATTRS_o_ai vec_vand(vector int __a,
+                                                   vector bool int __b) {
   return __a & (vector int)__b;
 }
 
-static vector unsigned int __ATTRS_o_ai vec_vand(vector unsigned int __a,
-                                                 vector unsigned int __b) {
+static __inline__ vector unsigned int __ATTRS_o_ai
+vec_vand(vector unsigned int __a, vector unsigned int __b) {
   return __a & __b;
 }
 
-static vector unsigned int __ATTRS_o_ai vec_vand(vector bool int __a,
-                                                 vector unsigned int __b) {
+static __inline__ vector unsigned int __ATTRS_o_ai
+vec_vand(vector bool int __a, vector unsigned int __b) {
   return (vector unsigned int)__a & __b;
 }
 
-static vector unsigned int __ATTRS_o_ai vec_vand(vector unsigned int __a,
-                                                 vector bool int __b) {
+static __inline__ vector unsigned int __ATTRS_o_ai
+vec_vand(vector unsigned int __a, vector bool int __b) {
   return __a & (vector unsigned int)__b;
 }
 
-static vector bool int __ATTRS_o_ai vec_vand(vector bool int __a,
-                                             vector bool int __b) {
+static __inline__ vector bool int __ATTRS_o_ai vec_vand(vector bool int __a,
+                                                        vector bool int __b) {
   return __a & __b;
 }
 
-static vector float __ATTRS_o_ai vec_vand(vector float __a, vector float __b) {
+static __inline__ vector float __ATTRS_o_ai vec_vand(vector float __a,
+                                                     vector float __b) {
   vector unsigned int __res =
       (vector unsigned int)__a & (vector unsigned int)__b;
   return (vector float)__res;
 }
 
-static vector float __ATTRS_o_ai vec_vand(vector bool int __a,
-                                          vector float __b) {
+static __inline__ vector float __ATTRS_o_ai vec_vand(vector bool int __a,
+                                                     vector float __b) {
   vector unsigned int __res =
       (vector unsigned int)__a & (vector unsigned int)__b;
   return (vector float)__res;
 }
 
-static vector float __ATTRS_o_ai vec_vand(vector float __a,
-                                          vector bool int __b) {
+static __inline__ vector float __ATTRS_o_ai vec_vand(vector float __a,
+                                                     vector bool int __b) {
   vector unsigned int __res =
       (vector unsigned int)__a & (vector unsigned int)__b;
   return (vector float)__res;
 }
 
 #ifdef __VSX__
-static vector signed long long __ATTRS_o_ai
+static __inline__ vector signed long long __ATTRS_o_ai
 vec_vand(vector signed long long __a, vector signed long long __b) {
   return __a & __b;
 }
 
-static vector signed long long __ATTRS_o_ai
+static __inline__ vector signed long long __ATTRS_o_ai
 vec_vand(vector bool long long __a, vector signed long long __b) {
   return (vector signed long long)__a & __b;
 }
 
-static vector signed long long __ATTRS_o_ai
+static __inline__ vector signed long long __ATTRS_o_ai
 vec_vand(vector signed long long __a, vector bool long long __b) {
   return __a & (vector signed long long)__b;
 }
 
-static vector unsigned long long __ATTRS_o_ai
+static __inline__ vector unsigned long long __ATTRS_o_ai
 vec_vand(vector unsigned long long __a, vector unsigned long long __b) {
   return __a & __b;
 }
 
-static vector unsigned long long __ATTRS_o_ai
+static __inline__ vector unsigned long long __ATTRS_o_ai
 vec_vand(vector bool long long __a, vector unsigned long long __b) {
   return (vector unsigned long long)__a & __b;
 }
 
-static vector unsigned long long __ATTRS_o_ai
+static __inline__ vector unsigned long long __ATTRS_o_ai
 vec_vand(vector unsigned long long __a, vector bool long long __b) {
   return __a & (vector unsigned long long)__b;
 }
 
-static vector bool long long __ATTRS_o_ai vec_vand(vector bool long long __a,
-                                                   vector bool long long __b) {
+static __inline__ vector bool long long __ATTRS_o_ai
+vec_vand(vector bool long long __a, vector bool long long __b) {
   return __a & __b;
 }
 #endif
@@ -1041,419 +1070,432 @@ static vector bool long long __ATTRS_o_ai vec_vand(vector bool long long __a,
 
 #define __builtin_altivec_vandc vec_andc
 
-static vector signed char __ATTRS_o_ai vec_andc(vector signed char __a,
-                                                vector signed char __b) {
+static __inline__ vector signed char __ATTRS_o_ai
+vec_andc(vector signed char __a, vector signed char __b) {
   return __a & ~__b;
 }
 
-static vector signed char __ATTRS_o_ai vec_andc(vector bool char __a,
-                                                vector signed char __b) {
+static __inline__ vector signed char __ATTRS_o_ai
+vec_andc(vector bool char __a, vector signed char __b) {
   return (vector signed char)__a & ~__b;
 }
 
-static vector signed char __ATTRS_o_ai vec_andc(vector signed char __a,
-                                                vector bool char __b) {
+static __inline__ vector signed char __ATTRS_o_ai
+vec_andc(vector signed char __a, vector bool char __b) {
   return __a & ~(vector signed char)__b;
 }
 
-static vector unsigned char __ATTRS_o_ai vec_andc(vector unsigned char __a,
-                                                  vector unsigned char __b) {
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_andc(vector unsigned char __a, vector unsigned char __b) {
   return __a & ~__b;
 }
 
-static vector unsigned char __ATTRS_o_ai vec_andc(vector bool char __a,
-                                                  vector unsigned char __b) {
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_andc(vector bool char __a, vector unsigned char __b) {
   return (vector unsigned char)__a & ~__b;
 }
 
-static vector unsigned char __ATTRS_o_ai vec_andc(vector unsigned char __a,
-                                                  vector bool char __b) {
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_andc(vector unsigned char __a, vector bool char __b) {
   return __a & ~(vector unsigned char)__b;
 }
 
-static vector bool char __ATTRS_o_ai vec_andc(vector bool char __a,
-                                              vector bool char __b) {
+static __inline__ vector bool char __ATTRS_o_ai vec_andc(vector bool char __a,
+                                                         vector bool char __b) {
   return __a & ~__b;
 }
 
-static vector short __ATTRS_o_ai vec_andc(vector short __a, vector short __b) {
+static __inline__ vector short __ATTRS_o_ai vec_andc(vector short __a,
+                                                     vector short __b) {
   return __a & ~__b;
 }
 
-static vector short __ATTRS_o_ai vec_andc(vector bool short __a,
-                                          vector short __b) {
+static __inline__ vector short __ATTRS_o_ai vec_andc(vector bool short __a,
+                                                     vector short __b) {
   return (vector short)__a & ~__b;
 }
 
-static vector short __ATTRS_o_ai vec_andc(vector short __a,
-                                          vector bool short __b) {
+static __inline__ vector short __ATTRS_o_ai vec_andc(vector short __a,
+                                                     vector bool short __b) {
   return __a & ~(vector short)__b;
 }
 
-static vector unsigned short __ATTRS_o_ai vec_andc(vector unsigned short __a,
-                                                   vector unsigned short __b) {
+static __inline__ vector unsigned short __ATTRS_o_ai
+vec_andc(vector unsigned short __a, vector unsigned short __b) {
   return __a & ~__b;
 }
 
-static vector unsigned short __ATTRS_o_ai vec_andc(vector bool short __a,
-                                                   vector unsigned short __b) {
+static __inline__ vector unsigned short __ATTRS_o_ai
+vec_andc(vector bool short __a, vector unsigned short __b) {
   return (vector unsigned short)__a & ~__b;
 }
 
-static vector unsigned short __ATTRS_o_ai vec_andc(vector unsigned short __a,
-                                                   vector bool short __b) {
+static __inline__ vector unsigned short __ATTRS_o_ai
+vec_andc(vector unsigned short __a, vector bool short __b) {
   return __a & ~(vector unsigned short)__b;
 }
 
-static vector bool short __ATTRS_o_ai vec_andc(vector bool short __a,
-                                               vector bool short __b) {
+static __inline__ vector bool short __ATTRS_o_ai
+vec_andc(vector bool short __a, vector bool short __b) {
   return __a & ~__b;
 }
 
-static vector int __ATTRS_o_ai vec_andc(vector int __a, vector int __b) {
+static __inline__ vector int __ATTRS_o_ai vec_andc(vector int __a,
+                                                   vector int __b) {
   return __a & ~__b;
 }
 
-static vector int __ATTRS_o_ai vec_andc(vector bool int __a, vector int __b) {
+static __inline__ vector int __ATTRS_o_ai vec_andc(vector bool int __a,
+                                                   vector int __b) {
   return (vector int)__a & ~__b;
 }
 
-static vector int __ATTRS_o_ai vec_andc(vector int __a, vector bool int __b) {
+static __inline__ vector int __ATTRS_o_ai vec_andc(vector int __a,
+                                                   vector bool int __b) {
   return __a & ~(vector int)__b;
 }
 
-static vector unsigned int __ATTRS_o_ai vec_andc(vector unsigned int __a,
-                                                 vector unsigned int __b) {
+static __inline__ vector unsigned int __ATTRS_o_ai
+vec_andc(vector unsigned int __a, vector unsigned int __b) {
   return __a & ~__b;
 }
 
-static vector unsigned int __ATTRS_o_ai vec_andc(vector bool int __a,
-                                                 vector unsigned int __b) {
+static __inline__ vector unsigned int __ATTRS_o_ai
+vec_andc(vector bool int __a, vector unsigned int __b) {
   return (vector unsigned int)__a & ~__b;
 }
 
-static vector unsigned int __ATTRS_o_ai vec_andc(vector unsigned int __a,
-                                                 vector bool int __b) {
+static __inline__ vector unsigned int __ATTRS_o_ai
+vec_andc(vector unsigned int __a, vector bool int __b) {
   return __a & ~(vector unsigned int)__b;
 }
 
-static vector bool int __ATTRS_o_ai vec_andc(vector bool int __a,
-                                             vector bool int __b) {
+static __inline__ vector bool int __ATTRS_o_ai vec_andc(vector bool int __a,
+                                                        vector bool int __b) {
   return __a & ~__b;
 }
 
-static vector float __ATTRS_o_ai vec_andc(vector float __a, vector float __b) {
+static __inline__ vector float __ATTRS_o_ai vec_andc(vector float __a,
+                                                     vector float __b) {
   vector unsigned int __res =
       (vector unsigned int)__a & ~(vector unsigned int)__b;
   return (vector float)__res;
 }
 
-static vector float __ATTRS_o_ai vec_andc(vector bool int __a,
-                                          vector float __b) {
+static __inline__ vector float __ATTRS_o_ai vec_andc(vector bool int __a,
+                                                     vector float __b) {
   vector unsigned int __res =
       (vector unsigned int)__a & ~(vector unsigned int)__b;
   return (vector float)__res;
 }
 
-static vector float __ATTRS_o_ai vec_andc(vector float __a,
-                                          vector bool int __b) {
+static __inline__ vector float __ATTRS_o_ai vec_andc(vector float __a,
+                                                     vector bool int __b) {
   vector unsigned int __res =
       (vector unsigned int)__a & ~(vector unsigned int)__b;
   return (vector float)__res;
 }
 
 #ifdef __VSX__
-static vector double __ATTRS_o_ai
-vec_andc(vector bool long long __a, vector double __b) {
+static __inline__ vector double __ATTRS_o_ai vec_andc(vector bool long long __a,
+                                                      vector double __b) {
   vector unsigned long long __res =
       (vector unsigned long long)__a & ~(vector unsigned long long)__b;
   return (vector double)__res;
 }
 
-static vector double __ATTRS_o_ai
+static __inline__ vector double __ATTRS_o_ai
 vec_andc(vector double __a, vector bool long long __b) {
   vector unsigned long long __res =
       (vector unsigned long long)__a & ~(vector unsigned long long)__b;
   return (vector double)__res;
 }
 
-static vector double __ATTRS_o_ai vec_andc(vector double __a, vector double __b) {
+static __inline__ vector double __ATTRS_o_ai vec_andc(vector double __a,
+                                                      vector double __b) {
   vector unsigned long long __res =
       (vector unsigned long long)__a & ~(vector unsigned long long)__b;
   return (vector double)__res;
 }
 
-static vector signed long long __ATTRS_o_ai
+static __inline__ vector signed long long __ATTRS_o_ai
 vec_andc(vector signed long long __a, vector signed long long __b) {
   return __a & ~__b;
 }
 
-static vector signed long long __ATTRS_o_ai
+static __inline__ vector signed long long __ATTRS_o_ai
 vec_andc(vector bool long long __a, vector signed long long __b) {
   return (vector signed long long)__a & ~__b;
 }
 
-static vector signed long long __ATTRS_o_ai
+static __inline__ vector signed long long __ATTRS_o_ai
 vec_andc(vector signed long long __a, vector bool long long __b) {
   return __a & ~(vector signed long long)__b;
 }
 
-static vector unsigned long long __ATTRS_o_ai
+static __inline__ vector unsigned long long __ATTRS_o_ai
 vec_andc(vector unsigned long long __a, vector unsigned long long __b) {
   return __a & ~__b;
 }
 
-static vector unsigned long long __ATTRS_o_ai
+static __inline__ vector unsigned long long __ATTRS_o_ai
 vec_andc(vector bool long long __a, vector unsigned long long __b) {
   return (vector unsigned long long)__a & ~__b;
 }
 
-static vector unsigned long long __ATTRS_o_ai
+static __inline__ vector unsigned long long __ATTRS_o_ai
 vec_andc(vector unsigned long long __a, vector bool long long __b) {
   return __a & ~(vector unsigned long long)__b;
 }
 
-static vector bool long long __ATTRS_o_ai vec_andc(vector bool long long __a,
-                                                   vector bool long long __b) {
+static __inline__ vector bool long long __ATTRS_o_ai
+vec_andc(vector bool long long __a, vector bool long long __b) {
   return __a & ~__b;
 }
 #endif
 
 /* vec_vandc */
 
-static vector signed char __ATTRS_o_ai vec_vandc(vector signed char __a,
-                                                 vector signed char __b) {
+static __inline__ vector signed char __ATTRS_o_ai
+vec_vandc(vector signed char __a, vector signed char __b) {
   return __a & ~__b;
 }
 
-static vector signed char __ATTRS_o_ai vec_vandc(vector bool char __a,
-                                                 vector signed char __b) {
+static __inline__ vector signed char __ATTRS_o_ai
+vec_vandc(vector bool char __a, vector signed char __b) {
   return (vector signed char)__a & ~__b;
 }
 
-static vector signed char __ATTRS_o_ai vec_vandc(vector signed char __a,
-                                                 vector bool char __b) {
+static __inline__ vector signed char __ATTRS_o_ai
+vec_vandc(vector signed char __a, vector bool char __b) {
   return __a & ~(vector signed char)__b;
 }
 
-static vector unsigned char __ATTRS_o_ai vec_vandc(vector unsigned char __a,
-                                                   vector unsigned char __b) {
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_vandc(vector unsigned char __a, vector unsigned char __b) {
   return __a & ~__b;
 }
 
-static vector unsigned char __ATTRS_o_ai vec_vandc(vector bool char __a,
-                                                   vector unsigned char __b) {
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_vandc(vector bool char __a, vector unsigned char __b) {
   return (vector unsigned char)__a & ~__b;
 }
 
-static vector unsigned char __ATTRS_o_ai vec_vandc(vector unsigned char __a,
-                                                   vector bool char __b) {
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_vandc(vector unsigned char __a, vector bool char __b) {
   return __a & ~(vector unsigned char)__b;
 }
 
-static vector bool char __ATTRS_o_ai vec_vandc(vector bool char __a,
-                                               vector bool char __b) {
+static __inline__ vector bool char __ATTRS_o_ai
+vec_vandc(vector bool char __a, vector bool char __b) {
   return __a & ~__b;
 }
 
-static vector short __ATTRS_o_ai vec_vandc(vector short __a, vector short __b) {
+static __inline__ vector short __ATTRS_o_ai vec_vandc(vector short __a,
+                                                      vector short __b) {
   return __a & ~__b;
 }
 
-static vector short __ATTRS_o_ai vec_vandc(vector bool short __a,
-                                           vector short __b) {
+static __inline__ vector short __ATTRS_o_ai vec_vandc(vector bool short __a,
+                                                      vector short __b) {
   return (vector short)__a & ~__b;
 }
 
-static vector short __ATTRS_o_ai vec_vandc(vector short __a,
-                                           vector bool short __b) {
+static __inline__ vector short __ATTRS_o_ai vec_vandc(vector short __a,
+                                                      vector bool short __b) {
   return __a & ~(vector short)__b;
 }
 
-static vector unsigned short __ATTRS_o_ai vec_vandc(vector unsigned short __a,
-                                                    vector unsigned short __b) {
+static __inline__ vector unsigned short __ATTRS_o_ai
+vec_vandc(vector unsigned short __a, vector unsigned short __b) {
   return __a & ~__b;
 }
 
-static vector unsigned short __ATTRS_o_ai vec_vandc(vector bool short __a,
-                                                    vector unsigned short __b) {
+static __inline__ vector unsigned short __ATTRS_o_ai
+vec_vandc(vector bool short __a, vector unsigned short __b) {
   return (vector unsigned short)__a & ~__b;
 }
 
-static vector unsigned short __ATTRS_o_ai vec_vandc(vector unsigned short __a,
-                                                    vector bool short __b) {
+static __inline__ vector unsigned short __ATTRS_o_ai
+vec_vandc(vector unsigned short __a, vector bool short __b) {
   return __a & ~(vector unsigned short)__b;
 }
 
-static vector bool short __ATTRS_o_ai vec_vandc(vector bool short __a,
-                                                vector bool short __b) {
+static __inline__ vector bool short __ATTRS_o_ai
+vec_vandc(vector bool short __a, vector bool short __b) {
   return __a & ~__b;
 }
 
-static vector int __ATTRS_o_ai vec_vandc(vector int __a, vector int __b) {
+static __inline__ vector int __ATTRS_o_ai vec_vandc(vector int __a,
+                                                    vector int __b) {
   return __a & ~__b;
 }
 
-static vector int __ATTRS_o_ai vec_vandc(vector bool int __a, vector int __b) {
+static __inline__ vector int __ATTRS_o_ai vec_vandc(vector bool int __a,
+                                                    vector int __b) {
   return (vector int)__a & ~__b;
 }
 
-static vector int __ATTRS_o_ai vec_vandc(vector int __a, vector bool int __b) {
+static __inline__ vector int __ATTRS_o_ai vec_vandc(vector int __a,
+                                                    vector bool int __b) {
   return __a & ~(vector int)__b;
 }
 
-static vector unsigned int __ATTRS_o_ai vec_vandc(vector unsigned int __a,
-                                                  vector unsigned int __b) {
+static __inline__ vector unsigned int __ATTRS_o_ai
+vec_vandc(vector unsigned int __a, vector unsigned int __b) {
   return __a & ~__b;
 }
 
-static vector unsigned int __ATTRS_o_ai vec_vandc(vector bool int __a,
-                                                  vector unsigned int __b) {
+static __inline__ vector unsigned int __ATTRS_o_ai
+vec_vandc(vector bool int __a, vector unsigned int __b) {
   return (vector unsigned int)__a & ~__b;
 }
 
-static vector unsigned int __ATTRS_o_ai vec_vandc(vector unsigned int __a,
-                                                  vector bool int __b) {
+static __inline__ vector unsigned int __ATTRS_o_ai
+vec_vandc(vector unsigned int __a, vector bool int __b) {
   return __a & ~(vector unsigned int)__b;
 }
 
-static vector bool int __ATTRS_o_ai vec_vandc(vector bool int __a,
-                                              vector bool int __b) {
+static __inline__ vector bool int __ATTRS_o_ai vec_vandc(vector bool int __a,
+                                                         vector bool int __b) {
   return __a & ~__b;
 }
 
-static vector float __ATTRS_o_ai vec_vandc(vector float __a, vector float __b) {
+static __inline__ vector float __ATTRS_o_ai vec_vandc(vector float __a,
+                                                      vector float __b) {
   vector unsigned int __res =
       (vector unsigned int)__a & ~(vector unsigned int)__b;
   return (vector float)__res;
 }
 
-static vector float __ATTRS_o_ai vec_vandc(vector bool int __a,
-                                           vector float __b) {
+static __inline__ vector float __ATTRS_o_ai vec_vandc(vector bool int __a,
+                                                      vector float __b) {
   vector unsigned int __res =
       (vector unsigned int)__a & ~(vector unsigned int)__b;
   return (vector float)__res;
 }
 
-static vector float __ATTRS_o_ai vec_vandc(vector float __a,
-                                           vector bool int __b) {
+static __inline__ vector float __ATTRS_o_ai vec_vandc(vector float __a,
+                                                      vector bool int __b) {
   vector unsigned int __res =
       (vector unsigned int)__a & ~(vector unsigned int)__b;
   return (vector float)__res;
 }
 
 #ifdef __VSX__
-static vector signed long long __ATTRS_o_ai
+static __inline__ vector signed long long __ATTRS_o_ai
 vec_vandc(vector signed long long __a, vector signed long long __b) {
   return __a & ~__b;
 }
 
-static vector signed long long __ATTRS_o_ai
+static __inline__ vector signed long long __ATTRS_o_ai
 vec_vandc(vector bool long long __a, vector signed long long __b) {
   return (vector signed long long)__a & ~__b;
 }
 
-static vector signed long long __ATTRS_o_ai
+static __inline__ vector signed long long __ATTRS_o_ai
 vec_vandc(vector signed long long __a, vector bool long long __b) {
   return __a & ~(vector signed long long)__b;
 }
 
-static vector unsigned long long __ATTRS_o_ai
+static __inline__ vector unsigned long long __ATTRS_o_ai
 vec_vandc(vector unsigned long long __a, vector unsigned long long __b) {
   return __a & ~__b;
 }
 
-static vector unsigned long long __ATTRS_o_ai
+static __inline__ vector unsigned long long __ATTRS_o_ai
 vec_vandc(vector bool long long __a, vector unsigned long long __b) {
   return (vector unsigned long long)__a & ~__b;
 }
 
-static vector unsigned long long __ATTRS_o_ai
+static __inline__ vector unsigned long long __ATTRS_o_ai
 vec_vandc(vector unsigned long long __a, vector bool long long __b) {
   return __a & ~(vector unsigned long long)__b;
 }
 
-static vector bool long long __ATTRS_o_ai vec_vandc(vector bool long long __a,
-                                                    vector bool long long __b) {
+static __inline__ vector bool long long __ATTRS_o_ai
+vec_vandc(vector bool long long __a, vector bool long long __b) {
   return __a & ~__b;
 }
 #endif
 
 /* vec_avg */
 
-static vector signed char __ATTRS_o_ai vec_avg(vector signed char __a,
-                                               vector signed char __b) {
+static __inline__ vector signed char __ATTRS_o_ai
+vec_avg(vector signed char __a, vector signed char __b) {
   return __builtin_altivec_vavgsb(__a, __b);
 }
 
-static vector unsigned char __ATTRS_o_ai vec_avg(vector unsigned char __a,
-                                                 vector unsigned char __b) {
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_avg(vector unsigned char __a, vector unsigned char __b) {
   return __builtin_altivec_vavgub(__a, __b);
 }
 
-static vector short __ATTRS_o_ai vec_avg(vector short __a, vector short __b) {
+static __inline__ vector short __ATTRS_o_ai vec_avg(vector short __a,
+                                                    vector short __b) {
   return __builtin_altivec_vavgsh(__a, __b);
 }
 
-static vector unsigned short __ATTRS_o_ai vec_avg(vector unsigned short __a,
-                                                  vector unsigned short __b) {
+static __inline__ vector unsigned short __ATTRS_o_ai
+vec_avg(vector unsigned short __a, vector unsigned short __b) {
   return __builtin_altivec_vavguh(__a, __b);
 }
 
-static vector int __ATTRS_o_ai vec_avg(vector int __a, vector int __b) {
+static __inline__ vector int __ATTRS_o_ai vec_avg(vector int __a,
+                                                  vector int __b) {
   return __builtin_altivec_vavgsw(__a, __b);
 }
 
-static vector unsigned int __ATTRS_o_ai vec_avg(vector unsigned int __a,
-                                                vector unsigned int __b) {
+static __inline__ vector unsigned int __ATTRS_o_ai
+vec_avg(vector unsigned int __a, vector unsigned int __b) {
   return __builtin_altivec_vavguw(__a, __b);
 }
 
 /* vec_vavgsb */
 
-static vector signed char __attribute__((__always_inline__))
+static __inline__ vector signed char __attribute__((__always_inline__))
 vec_vavgsb(vector signed char __a, vector signed char __b) {
   return __builtin_altivec_vavgsb(__a, __b);
 }
 
 /* vec_vavgub */
 
-static vector unsigned char __attribute__((__always_inline__))
+static __inline__ vector unsigned char __attribute__((__always_inline__))
 vec_vavgub(vector unsigned char __a, vector unsigned char __b) {
   return __builtin_altivec_vavgub(__a, __b);
 }
 
 /* vec_vavgsh */
 
-static vector short __attribute__((__always_inline__))
+static __inline__ vector short __attribute__((__always_inline__))
 vec_vavgsh(vector short __a, vector short __b) {
   return __builtin_altivec_vavgsh(__a, __b);
 }
 
 /* vec_vavguh */
 
-static vector unsigned short __attribute__((__always_inline__))
+static __inline__ vector unsigned short __attribute__((__always_inline__))
 vec_vavguh(vector unsigned short __a, vector unsigned short __b) {
   return __builtin_altivec_vavguh(__a, __b);
 }
 
 /* vec_vavgsw */
 
-static vector int __attribute__((__always_inline__))
+static __inline__ vector int __attribute__((__always_inline__))
 vec_vavgsw(vector int __a, vector int __b) {
   return __builtin_altivec_vavgsw(__a, __b);
 }
 
 /* vec_vavguw */
 
-static vector unsigned int __attribute__((__always_inline__))
+static __inline__ vector unsigned int __attribute__((__always_inline__))
 vec_vavguw(vector unsigned int __a, vector unsigned int __b) {
   return __builtin_altivec_vavguw(__a, __b);
 }
 
 /* vec_ceil */
 
-static vector float __ATTRS_o_ai vec_ceil(vector float __a) {
+static __inline__ vector float __ATTRS_o_ai vec_ceil(vector float __a) {
 #ifdef __VSX__
   return __builtin_vsx_xvrspip(__a);
 #else
@@ -1462,82 +1504,83 @@ static vector float __ATTRS_o_ai vec_ceil(vector float __a) {
 }
 
 #ifdef __VSX__
-static vector double __ATTRS_o_ai vec_ceil(vector double __a) {
+static __inline__ vector double __ATTRS_o_ai vec_ceil(vector double __a) {
   return __builtin_vsx_xvrdpip(__a);
 }
 #endif
 
 /* vec_vrfip */
 
-static vector float __attribute__((__always_inline__))
+static __inline__ vector float __attribute__((__always_inline__))
 vec_vrfip(vector float __a) {
   return __builtin_altivec_vrfip(__a);
 }
 
 /* vec_cmpb */
 
-static vector int __attribute__((__always_inline__))
+static __inline__ vector int __attribute__((__always_inline__))
 vec_cmpb(vector float __a, vector float __b) {
   return __builtin_altivec_vcmpbfp(__a, __b);
 }
 
 /* vec_vcmpbfp */
 
-static vector int __attribute__((__always_inline__))
+static __inline__ vector int __attribute__((__always_inline__))
 vec_vcmpbfp(vector float __a, vector float __b) {
   return __builtin_altivec_vcmpbfp(__a, __b);
 }
 
 /* vec_cmpeq */
 
-static vector bool char __ATTRS_o_ai vec_cmpeq(vector signed char __a,
-                                               vector signed char __b) {
+static __inline__ vector bool char __ATTRS_o_ai
+vec_cmpeq(vector signed char __a, vector signed char __b) {
   return (vector bool char)__builtin_altivec_vcmpequb((vector char)__a,
                                                       (vector char)__b);
 }
 
-static vector bool char __ATTRS_o_ai vec_cmpeq(vector unsigned char __a,
-                                               vector unsigned char __b) {
+static __inline__ vector bool char __ATTRS_o_ai
+vec_cmpeq(vector unsigned char __a, vector unsigned char __b) {
   return (vector bool char)__builtin_altivec_vcmpequb((vector char)__a,
                                                       (vector char)__b);
 }
 
-static vector bool short __ATTRS_o_ai vec_cmpeq(vector short __a,
-                                                vector short __b) {
+static __inline__ vector bool short __ATTRS_o_ai vec_cmpeq(vector short __a,
+                                                           vector short __b) {
   return (vector bool short)__builtin_altivec_vcmpequh(__a, __b);
 }
 
-static vector bool short __ATTRS_o_ai vec_cmpeq(vector unsigned short __a,
-                                                vector unsigned short __b) {
+static __inline__ vector bool short __ATTRS_o_ai
+vec_cmpeq(vector unsigned short __a, vector unsigned short __b) {
   return (vector bool short)__builtin_altivec_vcmpequh((vector short)__a,
                                                        (vector short)__b);
 }
 
-static vector bool int __ATTRS_o_ai vec_cmpeq(vector int __a, vector int __b) {
+static __inline__ vector bool int __ATTRS_o_ai vec_cmpeq(vector int __a,
+                                                         vector int __b) {
   return (vector bool int)__builtin_altivec_vcmpequw(__a, __b);
 }
 
-static vector bool int __ATTRS_o_ai vec_cmpeq(vector unsigned int __a,
-                                              vector unsigned int __b) {
+static __inline__ vector bool int __ATTRS_o_ai
+vec_cmpeq(vector unsigned int __a, vector unsigned int __b) {
   return (vector bool int)__builtin_altivec_vcmpequw((vector int)__a,
                                                      (vector int)__b);
 }
 
 #ifdef __POWER8_VECTOR__
-static vector bool long long __ATTRS_o_ai
+static __inline__ vector bool long long __ATTRS_o_ai
 vec_cmpeq(vector signed long long __a, vector signed long long __b) {
   return (vector bool long long)__builtin_altivec_vcmpequd(__a, __b);
 }
 
-static vector bool long long __ATTRS_o_ai
+static __inline__ vector bool long long __ATTRS_o_ai
 vec_cmpeq(vector unsigned long long __a, vector unsigned long long __b) {
   return (vector bool long long)__builtin_altivec_vcmpequd(
       (vector long long)__a, (vector long long)__b);
 }
 #endif
 
-static vector bool int __ATTRS_o_ai vec_cmpeq(vector float __a,
-                                              vector float __b) {
+static __inline__ vector bool int __ATTRS_o_ai vec_cmpeq(vector float __a,
+                                                         vector float __b) {
 #ifdef __VSX__
   return (vector bool int)__builtin_vsx_xvcmpeqsp(__a, __b);
 #else
@@ -1546,58 +1589,58 @@ static vector bool int __ATTRS_o_ai vec_cmpeq(vector float __a,
 }
 
 #ifdef __VSX__
-static vector bool long long __ATTRS_o_ai
+static __inline__ vector bool long long __ATTRS_o_ai
 vec_cmpeq(vector double __a, vector double __b) {
   return (vector bool long long)__builtin_vsx_xvcmpeqdp(__a, __b);
 }
 #endif
 
-
 /* vec_cmpgt */
 
-static vector bool char __ATTRS_o_ai vec_cmpgt(vector signed char __a,
-                                               vector signed char __b) {
+static __inline__ vector bool char __ATTRS_o_ai
+vec_cmpgt(vector signed char __a, vector signed char __b) {
   return (vector bool char)__builtin_altivec_vcmpgtsb(__a, __b);
 }
 
-static vector bool char __ATTRS_o_ai vec_cmpgt(vector unsigned char __a,
-                                               vector unsigned char __b) {
+static __inline__ vector bool char __ATTRS_o_ai
+vec_cmpgt(vector unsigned char __a, vector unsigned char __b) {
   return (vector bool char)__builtin_altivec_vcmpgtub(__a, __b);
 }
 
-static vector bool short __ATTRS_o_ai vec_cmpgt(vector short __a,
-                                                vector short __b) {
+static __inline__ vector bool short __ATTRS_o_ai vec_cmpgt(vector short __a,
+                                                           vector short __b) {
   return (vector bool short)__builtin_altivec_vcmpgtsh(__a, __b);
 }
 
-static vector bool short __ATTRS_o_ai vec_cmpgt(vector unsigned short __a,
-                                                vector unsigned short __b) {
+static __inline__ vector bool short __ATTRS_o_ai
+vec_cmpgt(vector unsigned short __a, vector unsigned short __b) {
   return (vector bool short)__builtin_altivec_vcmpgtuh(__a, __b);
 }
 
-static vector bool int __ATTRS_o_ai vec_cmpgt(vector int __a, vector int __b) {
+static __inline__ vector bool int __ATTRS_o_ai vec_cmpgt(vector int __a,
+                                                         vector int __b) {
   return (vector bool int)__builtin_altivec_vcmpgtsw(__a, __b);
 }
 
-static vector bool int __ATTRS_o_ai vec_cmpgt(vector unsigned int __a,
-                                              vector unsigned int __b) {
+static __inline__ vector bool int __ATTRS_o_ai
+vec_cmpgt(vector unsigned int __a, vector unsigned int __b) {
   return (vector bool int)__builtin_altivec_vcmpgtuw(__a, __b);
 }
 
 #ifdef __POWER8_VECTOR__
-static vector bool long long __ATTRS_o_ai
+static __inline__ vector bool long long __ATTRS_o_ai
 vec_cmpgt(vector signed long long __a, vector signed long long __b) {
   return (vector bool long long)__builtin_altivec_vcmpgtsd(__a, __b);
 }
 
-static vector bool long long __ATTRS_o_ai
+static __inline__ vector bool long long __ATTRS_o_ai
 vec_cmpgt(vector unsigned long long __a, vector unsigned long long __b) {
   return (vector bool long long)__builtin_altivec_vcmpgtud(__a, __b);
 }
 #endif
 
-static vector bool int __ATTRS_o_ai vec_cmpgt(vector float __a,
-                                              vector float __b) {
+static __inline__ vector bool int __ATTRS_o_ai vec_cmpgt(vector float __a,
+                                                         vector float __b) {
 #ifdef __VSX__
   return (vector bool int)__builtin_vsx_xvcmpgtsp(__a, __b);
 #else
@@ -1606,7 +1649,7 @@ static vector bool int __ATTRS_o_ai vec_cmpgt(vector float __a,
 }
 
 #ifdef __VSX__
-static vector bool long long __ATTRS_o_ai
+static __inline__ vector bool long long __ATTRS_o_ai
 vec_cmpgt(vector double __a, vector double __b) {
   return (vector bool long long)__builtin_vsx_xvcmpgtdp(__a, __b);
 }
@@ -1614,38 +1657,38 @@ vec_cmpgt(vector double __a, vector double __b) {
 
 /* vec_cmpge */
 
-static vector bool char __ATTRS_o_ai
-vec_cmpge (vector signed char __a, vector signed char __b) {
+static __inline__ vector bool char __ATTRS_o_ai
+vec_cmpge(vector signed char __a, vector signed char __b) {
   return ~(vec_cmpgt(__b, __a));
 }
 
-static vector bool char __ATTRS_o_ai
-vec_cmpge (vector unsigned char __a, vector unsigned char __b) {
+static __inline__ vector bool char __ATTRS_o_ai
+vec_cmpge(vector unsigned char __a, vector unsigned char __b) {
   return ~(vec_cmpgt(__b, __a));
 }
 
-static vector bool short __ATTRS_o_ai
-vec_cmpge (vector signed short __a, vector signed short __b) {
+static __inline__ vector bool short __ATTRS_o_ai
+vec_cmpge(vector signed short __a, vector signed short __b) {
   return ~(vec_cmpgt(__b, __a));
 }
 
-static vector bool short __ATTRS_o_ai
-vec_cmpge (vector unsigned short __a, vector unsigned short __b) {
+static __inline__ vector bool short __ATTRS_o_ai
+vec_cmpge(vector unsigned short __a, vector unsigned short __b) {
   return ~(vec_cmpgt(__b, __a));
 }
 
-static vector bool int __ATTRS_o_ai
-vec_cmpge (vector signed int __a, vector signed int __b) {
+static __inline__ vector bool int __ATTRS_o_ai
+vec_cmpge(vector signed int __a, vector signed int __b) {
   return ~(vec_cmpgt(__b, __a));
 }
 
-static vector bool int __ATTRS_o_ai
-vec_cmpge (vector unsigned int __a, vector unsigned int __b) {
+static __inline__ vector bool int __ATTRS_o_ai
+vec_cmpge(vector unsigned int __a, vector unsigned int __b) {
   return ~(vec_cmpgt(__b, __a));
 }
 
-static vector bool int __ATTRS_o_ai
-vec_cmpge(vector float __a, vector float __b) {
+static __inline__ vector bool int __ATTRS_o_ai vec_cmpge(vector float __a,
+                                                         vector float __b) {
 #ifdef __VSX__
   return (vector bool int)__builtin_vsx_xvcmpgesp(__a, __b);
 #else
@@ -1654,19 +1697,19 @@ vec_cmpge(vector float __a, vector float __b) {
 }
 
 #ifdef __VSX__
-static vector bool long long __ATTRS_o_ai
+static __inline__ vector bool long long __ATTRS_o_ai
 vec_cmpge(vector double __a, vector double __b) {
   return (vector bool long long)__builtin_vsx_xvcmpgedp(__a, __b);
 }
 #endif
 
 #ifdef __POWER8_VECTOR__
-static vector bool long long __ATTRS_o_ai
+static __inline__ vector bool long long __ATTRS_o_ai
 vec_cmpge(vector signed long long __a, vector signed long long __b) {
   return ~(vec_cmpgt(__b, __a));
 }
 
-static vector bool long long __ATTRS_o_ai
+static __inline__ vector bool long long __ATTRS_o_ai
 vec_cmpge(vector unsigned long long __a, vector unsigned long long __b) {
   return ~(vec_cmpgt(__b, __a));
 }
@@ -1674,111 +1717,111 @@ vec_cmpge(vector unsigned long long __a, vector unsigned long long __b) {
 
 /* vec_vcmpgefp */
 
-static vector bool int __attribute__((__always_inline__))
+static __inline__ vector bool int __attribute__((__always_inline__))
 vec_vcmpgefp(vector float __a, vector float __b) {
   return (vector bool int)__builtin_altivec_vcmpgefp(__a, __b);
 }
 
 /* vec_vcmpgtsb */
 
-static vector bool char __attribute__((__always_inline__))
+static __inline__ vector bool char __attribute__((__always_inline__))
 vec_vcmpgtsb(vector signed char __a, vector signed char __b) {
   return (vector bool char)__builtin_altivec_vcmpgtsb(__a, __b);
 }
 
 /* vec_vcmpgtub */
 
-static vector bool char __attribute__((__always_inline__))
+static __inline__ vector bool char __attribute__((__always_inline__))
 vec_vcmpgtub(vector unsigned char __a, vector unsigned char __b) {
   return (vector bool char)__builtin_altivec_vcmpgtub(__a, __b);
 }
 
 /* vec_vcmpgtsh */
 
-static vector bool short __attribute__((__always_inline__))
+static __inline__ vector bool short __attribute__((__always_inline__))
 vec_vcmpgtsh(vector short __a, vector short __b) {
   return (vector bool short)__builtin_altivec_vcmpgtsh(__a, __b);
 }
 
 /* vec_vcmpgtuh */
 
-static vector bool short __attribute__((__always_inline__))
+static __inline__ vector bool short __attribute__((__always_inline__))
 vec_vcmpgtuh(vector unsigned short __a, vector unsigned short __b) {
   return (vector bool short)__builtin_altivec_vcmpgtuh(__a, __b);
 }
 
 /* vec_vcmpgtsw */
 
-static vector bool int __attribute__((__always_inline__))
+static __inline__ vector bool int __attribute__((__always_inline__))
 vec_vcmpgtsw(vector int __a, vector int __b) {
   return (vector bool int)__builtin_altivec_vcmpgtsw(__a, __b);
 }
 
 /* vec_vcmpgtuw */
 
-static vector bool int __attribute__((__always_inline__))
+static __inline__ vector bool int __attribute__((__always_inline__))
 vec_vcmpgtuw(vector unsigned int __a, vector unsigned int __b) {
   return (vector bool int)__builtin_altivec_vcmpgtuw(__a, __b);
 }
 
 /* vec_vcmpgtfp */
 
-static vector bool int __attribute__((__always_inline__))
+static __inline__ vector bool int __attribute__((__always_inline__))
 vec_vcmpgtfp(vector float __a, vector float __b) {
   return (vector bool int)__builtin_altivec_vcmpgtfp(__a, __b);
 }
 
 /* vec_cmple */
 
-static vector bool char __ATTRS_o_ai
-vec_cmple (vector signed char __a, vector signed char __b) {
+static __inline__ vector bool char __ATTRS_o_ai
+vec_cmple(vector signed char __a, vector signed char __b) {
   return vec_cmpge(__b, __a);
 }
 
-static vector bool char __ATTRS_o_ai
-vec_cmple (vector unsigned char __a, vector unsigned char __b) {
+static __inline__ vector bool char __ATTRS_o_ai
+vec_cmple(vector unsigned char __a, vector unsigned char __b) {
   return vec_cmpge(__b, __a);
 }
 
-static vector bool short __ATTRS_o_ai
-vec_cmple (vector signed short __a, vector signed short __b) {
+static __inline__ vector bool short __ATTRS_o_ai
+vec_cmple(vector signed short __a, vector signed short __b) {
   return vec_cmpge(__b, __a);
 }
 
-static vector bool short __ATTRS_o_ai
-vec_cmple (vector unsigned short __a, vector unsigned short __b) {
+static __inline__ vector bool short __ATTRS_o_ai
+vec_cmple(vector unsigned short __a, vector unsigned short __b) {
   return vec_cmpge(__b, __a);
 }
 
-static vector bool int __ATTRS_o_ai
-vec_cmple (vector signed int __a, vector signed int __b) {
+static __inline__ vector bool int __ATTRS_o_ai
+vec_cmple(vector signed int __a, vector signed int __b) {
   return vec_cmpge(__b, __a);
 }
 
-static vector bool int __ATTRS_o_ai
-vec_cmple (vector unsigned int __a, vector unsigned int __b) {
+static __inline__ vector bool int __ATTRS_o_ai
+vec_cmple(vector unsigned int __a, vector unsigned int __b) {
   return vec_cmpge(__b, __a);
 }
 
-static vector bool int __ATTRS_o_ai
-vec_cmple(vector float __a, vector float __b) {
+static __inline__ vector bool int __ATTRS_o_ai vec_cmple(vector float __a,
+                                                         vector float __b) {
   return vec_cmpge(__b, __a);
 }
 
 #ifdef __VSX__
-static vector bool long long __ATTRS_o_ai
+static __inline__ vector bool long long __ATTRS_o_ai
 vec_cmple(vector double __a, vector double __b) {
   return vec_cmpge(__b, __a);
 }
 #endif
 
 #ifdef __POWER8_VECTOR__
-static vector bool long long __ATTRS_o_ai
+static __inline__ vector bool long long __ATTRS_o_ai
 vec_cmple(vector signed long long __a, vector signed long long __b) {
   return vec_cmpge(__b, __a);
 }
 
-static vector bool long long __ATTRS_o_ai
+static __inline__ vector bool long long __ATTRS_o_ai
 vec_cmple(vector unsigned long long __a, vector unsigned long long __b) {
   return vec_cmpge(__b, __a);
 }
@@ -1786,83 +1829,90 @@ vec_cmple(vector unsigned long long __a, vector unsigned long long __b) {
 
 /* vec_cmplt */
 
-static vector bool char __ATTRS_o_ai vec_cmplt(vector signed char __a,
-                                               vector signed char __b) {
+static __inline__ vector bool char __ATTRS_o_ai
+vec_cmplt(vector signed char __a, vector signed char __b) {
   return vec_cmpgt(__b, __a);
 }
 
-static vector bool char __ATTRS_o_ai vec_cmplt(vector unsigned char __a,
-                                               vector unsigned char __b) {
+static __inline__ vector bool char __ATTRS_o_ai
+vec_cmplt(vector unsigned char __a, vector unsigned char __b) {
   return vec_cmpgt(__b, __a);
 }
 
-static vector bool short __ATTRS_o_ai vec_cmplt(vector short __a,
-                                                vector short __b) {
+static __inline__ vector bool short __ATTRS_o_ai vec_cmplt(vector short __a,
+                                                           vector short __b) {
   return vec_cmpgt(__b, __a);
 }
 
-static vector bool short __ATTRS_o_ai vec_cmplt(vector unsigned short __a,
-                                                vector unsigned short __b) {
+static __inline__ vector bool short __ATTRS_o_ai
+vec_cmplt(vector unsigned short __a, vector unsigned short __b) {
   return vec_cmpgt(__b, __a);
 }
 
-static vector bool int __ATTRS_o_ai vec_cmplt(vector int __a, vector int __b) {
+static __inline__ vector bool int __ATTRS_o_ai vec_cmplt(vector int __a,
+                                                         vector int __b) {
   return vec_cmpgt(__b, __a);
 }
 
-static vector bool int __ATTRS_o_ai vec_cmplt(vector unsigned int __a,
-                                              vector unsigned int __b) {
+static __inline__ vector bool int __ATTRS_o_ai
+vec_cmplt(vector unsigned int __a, vector unsigned int __b) {
   return vec_cmpgt(__b, __a);
 }
 
-static vector bool int __ATTRS_o_ai vec_cmplt(vector float __a,
-                                              vector float __b) {
+static __inline__ vector bool int __ATTRS_o_ai vec_cmplt(vector float __a,
+                                                         vector float __b) {
   return vec_cmpgt(__b, __a);
 }
 
 #ifdef __VSX__
-static vector bool long long __ATTRS_o_ai
+static __inline__ vector bool long long __ATTRS_o_ai
 vec_cmplt(vector double __a, vector double __b) {
   return vec_cmpgt(__b, __a);
 }
 #endif
 
 #ifdef __POWER8_VECTOR__
-static vector bool long long __ATTRS_o_ai
+static __inline__ vector bool long long __ATTRS_o_ai
 vec_cmplt(vector signed long long __a, vector signed long long __b) {
   return vec_cmpgt(__b, __a);
 }
 
-static vector bool long long __ATTRS_o_ai
+static __inline__ vector bool long long __ATTRS_o_ai
 vec_cmplt(vector unsigned long long __a, vector unsigned long long __b) {
   return vec_cmpgt(__b, __a);
 }
 
 /* vec_cntlz */
 
-static vector signed char __ATTRS_o_ai vec_cntlz(vector signed char __a) {
+static __inline__ vector signed char __ATTRS_o_ai
+vec_cntlz(vector signed char __a) {
   return __builtin_altivec_vclzb(__a);
 }
-static vector unsigned char __ATTRS_o_ai vec_cntlz(vector unsigned char __a) {
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_cntlz(vector unsigned char __a) {
   return __builtin_altivec_vclzb(__a);
 }
-static vector signed short __ATTRS_o_ai vec_cntlz(vector signed short __a) {
+static __inline__ vector signed short __ATTRS_o_ai
+vec_cntlz(vector signed short __a) {
   return __builtin_altivec_vclzh(__a);
 }
-static vector unsigned short __ATTRS_o_ai vec_cntlz(vector unsigned short __a) {
+static __inline__ vector unsigned short __ATTRS_o_ai
+vec_cntlz(vector unsigned short __a) {
   return __builtin_altivec_vclzh(__a);
 }
-static vector signed int __ATTRS_o_ai vec_cntlz(vector signed int __a) {
+static __inline__ vector signed int __ATTRS_o_ai
+vec_cntlz(vector signed int __a) {
   return __builtin_altivec_vclzw(__a);
 }
-static vector unsigned int __ATTRS_o_ai vec_cntlz(vector unsigned int __a) {
+static __inline__ vector unsigned int __ATTRS_o_ai
+vec_cntlz(vector unsigned int __a) {
   return __builtin_altivec_vclzw(__a);
 }
-static vector signed long long __ATTRS_o_ai
+static __inline__ vector signed long long __ATTRS_o_ai
 vec_cntlz(vector signed long long __a) {
   return __builtin_altivec_vclzd(__a);
 }
-static vector unsigned long long __ATTRS_o_ai
+static __inline__ vector unsigned long long __ATTRS_o_ai
 vec_cntlz(vector unsigned long long __a) {
   return __builtin_altivec_vclzd(__a);
 }
@@ -1871,36 +1921,38 @@ vec_cntlz(vector unsigned long long __a) {
 /* vec_cpsgn */
 
 #ifdef __VSX__
-static vector float __ATTRS_o_ai vec_cpsgn(vector float __a, vector float __b) {
+static __inline__ vector float __ATTRS_o_ai vec_cpsgn(vector float __a,
+                                                      vector float __b) {
   return __builtin_vsx_xvcpsgnsp(__a, __b);
 }
 
-static vector double __ATTRS_o_ai vec_cpsgn(vector double __a,
-                                            vector double __b) {
+static __inline__ vector double __ATTRS_o_ai vec_cpsgn(vector double __a,
+                                                       vector double __b) {
   return __builtin_vsx_xvcpsgndp(__a, __b);
 }
 #endif
 
 /* vec_ctf */
 
-static vector float __ATTRS_o_ai vec_ctf(vector int __a, int __b) {
+static __inline__ vector float __ATTRS_o_ai vec_ctf(vector int __a, int __b) {
   return __builtin_altivec_vcfsx(__a, __b);
 }
 
-static vector float __ATTRS_o_ai vec_ctf(vector unsigned int __a, int __b) {
+static __inline__ vector float __ATTRS_o_ai vec_ctf(vector unsigned int __a,
+                                                    int __b) {
   return __builtin_altivec_vcfux((vector int)__a, __b);
 }
 
 #ifdef __VSX__
-static vector double __ATTRS_o_ai vec_ctf(vector unsigned long long __a,
-                                          int __b) {
+static __inline__ vector double __ATTRS_o_ai
+vec_ctf(vector unsigned long long __a, int __b) {
   vector double __ret = __builtin_convertvector(__a, vector double);
   __ret *= (vector double)(vector unsigned long long)((0x3ffULL - __b) << 52);
   return __ret;
 }
 
-static vector double __ATTRS_o_ai vec_ctf(vector signed long long __a,
-                                          int __b) {
+static __inline__ vector double __ATTRS_o_ai
+vec_ctf(vector signed long long __a, int __b) {
   vector double __ret = __builtin_convertvector(__a, vector double);
   __ret *= (vector double)(vector unsigned long long)((0x3ffULL - __b) << 52);
   return __ret;
@@ -1909,27 +1961,27 @@ static vector double __ATTRS_o_ai vec_ctf(vector signed long long __a,
 
 /* vec_vcfsx */
 
-static vector float __attribute__((__always_inline__))
+static __inline__ vector float __attribute__((__always_inline__))
 vec_vcfsx(vector int __a, int __b) {
   return __builtin_altivec_vcfsx(__a, __b);
 }
 
 /* vec_vcfux */
 
-static vector float __attribute__((__always_inline__))
+static __inline__ vector float __attribute__((__always_inline__))
 vec_vcfux(vector unsigned int __a, int __b) {
   return __builtin_altivec_vcfux((vector int)__a, __b);
 }
 
 /* vec_cts */
 
-static vector int __ATTRS_o_ai vec_cts(vector float __a, int __b) {
+static __inline__ vector int __ATTRS_o_ai vec_cts(vector float __a, int __b) {
   return __builtin_altivec_vctsxs(__a, __b);
 }
 
 #ifdef __VSX__
-static vector signed long long __ATTRS_o_ai vec_cts(vector double __a,
-                                                    int __b) {
+static __inline__ vector signed long long __ATTRS_o_ai
+vec_cts(vector double __a, int __b) {
   __a *= (vector double)(vector unsigned long long)((0x3ffULL + __b) << 52);
   return __builtin_convertvector(__a, vector signed long long);
 }
@@ -1937,20 +1989,21 @@ static vector signed long long __ATTRS_o_ai vec_cts(vector double __a,
 
 /* vec_vctsxs */
 
-static vector int __attribute__((__always_inline__))
+static __inline__ vector int __attribute__((__always_inline__))
 vec_vctsxs(vector float __a, int __b) {
   return __builtin_altivec_vctsxs(__a, __b);
 }
 
 /* vec_ctu */
 
-static vector unsigned int __ATTRS_o_ai vec_ctu(vector float __a, int __b) {
+static __inline__ vector unsigned int __ATTRS_o_ai vec_ctu(vector float __a,
+                                                           int __b) {
   return __builtin_altivec_vctuxs(__a, __b);
 }
 
 #ifdef __VSX__
-static vector unsigned long long __ATTRS_o_ai vec_ctu(vector double __a,
-                                                      int __b) {
+static __inline__ vector unsigned long long __ATTRS_o_ai
+vec_ctu(vector double __a, int __b) {
   __a *= (vector double)(vector unsigned long long)((0x3ffULL + __b) << 52);
   return __builtin_convertvector(__a, vector unsigned long long);
 }
@@ -1958,7 +2011,7 @@ static vector unsigned long long __ATTRS_o_ai vec_ctu(vector double __a,
 
 /* vec_vctuxs */
 
-static vector unsigned int __attribute__((__always_inline__))
+static __inline__ vector unsigned int __attribute__((__always_inline__))
 vec_vctuxs(vector float __a, int __b) {
   return __builtin_altivec_vctuxs(__a, __b);
 }
@@ -1966,13 +2019,15 @@ vec_vctuxs(vector float __a, int __b) {
 /* vec_double */
 
 #ifdef __VSX__
-static vector double __ATTRS_o_ai vec_double (vector signed long long __a) {
-  vector double __ret = { __a[0], __a[1] };
+static __inline__ vector double __ATTRS_o_ai
+vec_double(vector signed long long __a) {
+  vector double __ret = {__a[0], __a[1]};
   return __ret;
 }
 
-static vector double __ATTRS_o_ai vec_double (vector unsigned long long __a) {
-  vector double __ret = { __a[0], __a[1] };
+static __inline__ vector double __ATTRS_o_ai
+vec_double(vector unsigned long long __a) {
+  vector double __ret = {__a[0], __a[1]};
   return __ret;
 }
 #endif
@@ -1982,178 +2037,172 @@ static vector double __ATTRS_o_ai vec_double (vector unsigned long long __a) {
 /* Integer vector divides (vectors are scalarized, elements divided
    and the vectors reassembled).
 */
-static vector signed char __ATTRS_o_ai vec_div(vector signed char __a,
-                                               vector signed char __b) {
+static __inline__ vector signed char __ATTRS_o_ai
+vec_div(vector signed char __a, vector signed char __b) {
   return __a / __b;
 }
 
-static vector unsigned char __ATTRS_o_ai vec_div(vector unsigned char __a,
-                                                 vector unsigned char __b) {
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_div(vector unsigned char __a, vector unsigned char __b) {
   return __a / __b;
 }
 
-static vector signed short __ATTRS_o_ai vec_div(vector signed short __a,
-                                                vector signed short __b) {
+static __inline__ vector signed short __ATTRS_o_ai
+vec_div(vector signed short __a, vector signed short __b) {
   return __a / __b;
 }
 
-static vector unsigned short __ATTRS_o_ai vec_div(vector unsigned short __a,
-                                                  vector unsigned short __b) {
+static __inline__ vector unsigned short __ATTRS_o_ai
+vec_div(vector unsigned short __a, vector unsigned short __b) {
   return __a / __b;
 }
 
-static vector signed int __ATTRS_o_ai vec_div(vector signed int __a,
-                                              vector signed int __b) {
+static __inline__ vector signed int __ATTRS_o_ai
+vec_div(vector signed int __a, vector signed int __b) {
   return __a / __b;
 }
 
-static vector unsigned int __ATTRS_o_ai vec_div(vector unsigned int __a,
-                                                vector unsigned int __b) {
+static __inline__ vector unsigned int __ATTRS_o_ai
+vec_div(vector unsigned int __a, vector unsigned int __b) {
   return __a / __b;
 }
 
 #ifdef __VSX__
-static vector signed long long __ATTRS_o_ai
+static __inline__ vector signed long long __ATTRS_o_ai
 vec_div(vector signed long long __a, vector signed long long __b) {
   return __a / __b;
 }
 
-static vector unsigned long long __ATTRS_o_ai
+static __inline__ vector unsigned long long __ATTRS_o_ai
 vec_div(vector unsigned long long __a, vector unsigned long long __b) {
   return __a / __b;
 }
 
-static vector float __ATTRS_o_ai vec_div(vector float __a, vector float __b) {
+static __inline__ vector float __ATTRS_o_ai vec_div(vector float __a,
+                                                    vector float __b) {
   return __a / __b;
 }
 
-static vector double __ATTRS_o_ai vec_div(vector double __a,
-                                          vector double __b) {
+static __inline__ vector double __ATTRS_o_ai vec_div(vector double __a,
+                                                     vector double __b) {
   return __a / __b;
 }
 #endif
 
 /* vec_dss */
 
-static void __attribute__((__always_inline__)) vec_dss(int __a) {
+static __inline__ void __attribute__((__always_inline__)) vec_dss(int __a) {
   __builtin_altivec_dss(__a);
 }
 
 /* vec_dssall */
 
-static void __attribute__((__always_inline__)) vec_dssall(void) {
+static __inline__ void __attribute__((__always_inline__)) vec_dssall(void) {
   __builtin_altivec_dssall();
 }
 
 /* vec_dst */
-
-static void __attribute__((__always_inline__))
-vec_dst(const void *__a, int __b, int __c) {
-  __builtin_altivec_dst(__a, __b, __c);
-}
+#define vec_dst(__PTR, __CW, __STR) \
+  __extension__(                    \
+      { __builtin_altivec_dst((const void *)(__PTR), (__CW), (__STR)); })
 
 /* vec_dstst */
-
-static void __attribute__((__always_inline__))
-vec_dstst(const void *__a, int __b, int __c) {
-  __builtin_altivec_dstst(__a, __b, __c);
-}
+#define vec_dstst(__PTR, __CW, __STR) \
+  __extension__(                      \
+      { __builtin_altivec_dstst((const void *)(__PTR), (__CW), (__STR)); })
 
 /* vec_dststt */
-
-static void __attribute__((__always_inline__))
-vec_dststt(const void *__a, int __b, int __c) {
-  __builtin_altivec_dststt(__a, __b, __c);
-}
+#define vec_dststt(__PTR, __CW, __STR) \
+  __extension__(                       \
+      { __builtin_altivec_dststt((const void *)(__PTR), (__CW), (__STR)); })
 
 /* vec_dstt */
-
-static void __attribute__((__always_inline__))
-vec_dstt(const void *__a, int __b, int __c) {
-  __builtin_altivec_dstt(__a, __b, __c);
-}
+#define vec_dstt(__PTR, __CW, __STR) \
+  __extension__(                     \
+      { __builtin_altivec_dstt((const void *)(__PTR), (__CW), (__STR)); })
 
 /* vec_eqv */
 
 #ifdef __POWER8_VECTOR__
-static vector signed char __ATTRS_o_ai vec_eqv(vector signed char __a,
-                                               vector signed char __b) {
+static __inline__ vector signed char __ATTRS_o_ai
+vec_eqv(vector signed char __a, vector signed char __b) {
   return (vector signed char)__builtin_vsx_xxleqv((vector unsigned int)__a,
                                                   (vector unsigned int)__b);
 }
 
-static vector unsigned char __ATTRS_o_ai vec_eqv(vector unsigned char __a,
-                                                 vector unsigned char __b) {
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_eqv(vector unsigned char __a, vector unsigned char __b) {
   return (vector unsigned char)__builtin_vsx_xxleqv((vector unsigned int)__a,
                                                     (vector unsigned int)__b);
 }
 
-static vector bool char __ATTRS_o_ai vec_eqv(vector bool char __a,
-                                             vector bool char __b) {
+static __inline__ vector bool char __ATTRS_o_ai vec_eqv(vector bool char __a,
+                                                        vector bool char __b) {
   return (vector bool char)__builtin_vsx_xxleqv((vector unsigned int)__a,
                                                 (vector unsigned int)__b);
 }
 
-static vector signed short __ATTRS_o_ai vec_eqv(vector signed short __a,
-                                                vector signed short __b) {
+static __inline__ vector signed short __ATTRS_o_ai
+vec_eqv(vector signed short __a, vector signed short __b) {
   return (vector signed short)__builtin_vsx_xxleqv((vector unsigned int)__a,
                                                    (vector unsigned int)__b);
 }
 
-static vector unsigned short __ATTRS_o_ai vec_eqv(vector unsigned short __a,
-                                                  vector unsigned short __b) {
+static __inline__ vector unsigned short __ATTRS_o_ai
+vec_eqv(vector unsigned short __a, vector unsigned short __b) {
   return (vector unsigned short)__builtin_vsx_xxleqv((vector unsigned int)__a,
                                                      (vector unsigned int)__b);
 }
 
-static vector bool short __ATTRS_o_ai vec_eqv(vector bool short __a,
-                                              vector bool short __b) {
+static __inline__ vector bool short __ATTRS_o_ai
+vec_eqv(vector bool short __a, vector bool short __b) {
   return (vector bool short)__builtin_vsx_xxleqv((vector unsigned int)__a,
                                                  (vector unsigned int)__b);
 }
 
-static vector signed int __ATTRS_o_ai vec_eqv(vector signed int __a,
-                                              vector signed int __b) {
+static __inline__ vector signed int __ATTRS_o_ai
+vec_eqv(vector signed int __a, vector signed int __b) {
   return (vector signed int)__builtin_vsx_xxleqv((vector unsigned int)__a,
                                                  (vector unsigned int)__b);
 }
 
-static vector unsigned int __ATTRS_o_ai vec_eqv(vector unsigned int __a,
-                                                vector unsigned int __b) {
+static __inline__ vector unsigned int __ATTRS_o_ai
+vec_eqv(vector unsigned int __a, vector unsigned int __b) {
   return __builtin_vsx_xxleqv(__a, __b);
 }
 
-static vector bool int __ATTRS_o_ai vec_eqv(vector bool int __a,
-                                            vector bool int __b) {
+static __inline__ vector bool int __ATTRS_o_ai vec_eqv(vector bool int __a,
+                                                       vector bool int __b) {
   return (vector bool int)__builtin_vsx_xxleqv((vector unsigned int)__a,
-                                                 (vector unsigned int)__b);
+                                               (vector unsigned int)__b);
 }
 
-static vector signed long long __ATTRS_o_ai
+static __inline__ vector signed long long __ATTRS_o_ai
 vec_eqv(vector signed long long __a, vector signed long long __b) {
-  return (vector signed long long)
-    __builtin_vsx_xxleqv((vector unsigned int)__a, (vector unsigned int)__b);
+  return (vector signed long long)__builtin_vsx_xxleqv(
+      (vector unsigned int)__a, (vector unsigned int)__b);
 }
 
-static vector unsigned long long __ATTRS_o_ai
+static __inline__ vector unsigned long long __ATTRS_o_ai
 vec_eqv(vector unsigned long long __a, vector unsigned long long __b) {
-  return (vector unsigned long long)
-    __builtin_vsx_xxleqv((vector unsigned int)__a, (vector unsigned int)__b);
+  return (vector unsigned long long)__builtin_vsx_xxleqv(
+      (vector unsigned int)__a, (vector unsigned int)__b);
 }
 
-static vector bool long long __ATTRS_o_ai
+static __inline__ vector bool long long __ATTRS_o_ai
 vec_eqv(vector bool long long __a, vector bool long long __b) {
-  return (vector bool long long)
-    __builtin_vsx_xxleqv((vector unsigned int)__a, (vector unsigned int)__b);
+  return (vector bool long long)__builtin_vsx_xxleqv((vector unsigned int)__a,
+                                                     (vector unsigned int)__b);
 }
 
-static vector float __ATTRS_o_ai vec_eqv(vector float __a, vector float __b) {
+static __inline__ vector float __ATTRS_o_ai vec_eqv(vector float __a,
+                                                    vector float __b) {
   return (vector float)__builtin_vsx_xxleqv((vector unsigned int)__a,
                                             (vector unsigned int)__b);
 }
 
-static vector double __ATTRS_o_ai vec_eqv(vector double __a,
-                                          vector double __b) {
+static __inline__ vector double __ATTRS_o_ai vec_eqv(vector double __a,
+                                                     vector double __b) {
   return (vector double)__builtin_vsx_xxleqv((vector unsigned int)__a,
                                              (vector unsigned int)__b);
 }
@@ -2161,21 +2210,21 @@ static vector double __ATTRS_o_ai vec_eqv(vector double __a,
 
 /* vec_expte */
 
-static vector float __attribute__((__always_inline__))
+static __inline__ vector float __attribute__((__always_inline__))
 vec_expte(vector float __a) {
   return __builtin_altivec_vexptefp(__a);
 }
 
 /* vec_vexptefp */
 
-static vector float __attribute__((__always_inline__))
+static __inline__ vector float __attribute__((__always_inline__))
 vec_vexptefp(vector float __a) {
   return __builtin_altivec_vexptefp(__a);
 }
 
 /* vec_floor */
 
-static vector float __ATTRS_o_ai vec_floor(vector float __a) {
+static __inline__ vector float __ATTRS_o_ai vec_floor(vector float __a) {
 #ifdef __VSX__
   return __builtin_vsx_xvrspim(__a);
 #else
@@ -2184,439 +2233,460 @@ static vector float __ATTRS_o_ai vec_floor(vector float __a) {
 }
 
 #ifdef __VSX__
-static vector double __ATTRS_o_ai vec_floor(vector double __a) {
+static __inline__ vector double __ATTRS_o_ai vec_floor(vector double __a) {
   return __builtin_vsx_xvrdpim(__a);
 }
 #endif
 
 /* vec_vrfim */
 
-static vector float __attribute__((__always_inline__))
+static __inline__ vector float __attribute__((__always_inline__))
 vec_vrfim(vector float __a) {
   return __builtin_altivec_vrfim(__a);
 }
 
 /* vec_ld */
 
-static vector signed char __ATTRS_o_ai vec_ld(int __a,
-                                              const vector signed char *__b) {
+static __inline__ vector signed char __ATTRS_o_ai
+vec_ld(int __a, const vector signed char *__b) {
   return (vector signed char)__builtin_altivec_lvx(__a, __b);
 }
 
-static vector signed char __ATTRS_o_ai vec_ld(int __a, const signed char *__b) {
+static __inline__ vector signed char __ATTRS_o_ai
+vec_ld(int __a, const signed char *__b) {
   return (vector signed char)__builtin_altivec_lvx(__a, __b);
 }
 
-static vector unsigned char __ATTRS_o_ai
+static __inline__ vector unsigned char __ATTRS_o_ai
 vec_ld(int __a, const vector unsigned char *__b) {
   return (vector unsigned char)__builtin_altivec_lvx(__a, __b);
 }
 
-static vector unsigned char __ATTRS_o_ai vec_ld(int __a,
-                                                const unsigned char *__b) {
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_ld(int __a, const unsigned char *__b) {
   return (vector unsigned char)__builtin_altivec_lvx(__a, __b);
 }
 
-static vector bool char __ATTRS_o_ai vec_ld(int __a,
-                                            const vector bool char *__b) {
+static __inline__ vector bool char __ATTRS_o_ai
+vec_ld(int __a, const vector bool char *__b) {
   return (vector bool char)__builtin_altivec_lvx(__a, __b);
 }
 
-static vector short __ATTRS_o_ai vec_ld(int __a, const vector short *__b) {
+static __inline__ vector short __ATTRS_o_ai vec_ld(int __a,
+                                                   const vector short *__b) {
   return (vector short)__builtin_altivec_lvx(__a, __b);
 }
 
-static vector short __ATTRS_o_ai vec_ld(int __a, const short *__b) {
+static __inline__ vector short __ATTRS_o_ai vec_ld(int __a, const short *__b) {
   return (vector short)__builtin_altivec_lvx(__a, __b);
 }
 
-static vector unsigned short __ATTRS_o_ai
+static __inline__ vector unsigned short __ATTRS_o_ai
 vec_ld(int __a, const vector unsigned short *__b) {
   return (vector unsigned short)__builtin_altivec_lvx(__a, __b);
 }
 
-static vector unsigned short __ATTRS_o_ai vec_ld(int __a,
-                                                 const unsigned short *__b) {
+static __inline__ vector unsigned short __ATTRS_o_ai
+vec_ld(int __a, const unsigned short *__b) {
   return (vector unsigned short)__builtin_altivec_lvx(__a, __b);
 }
 
-static vector bool short __ATTRS_o_ai vec_ld(int __a,
-                                             const vector bool short *__b) {
+static __inline__ vector bool short __ATTRS_o_ai
+vec_ld(int __a, const vector bool short *__b) {
   return (vector bool short)__builtin_altivec_lvx(__a, __b);
 }
 
-static vector pixel __ATTRS_o_ai vec_ld(int __a, const vector pixel *__b) {
+static __inline__ vector pixel __ATTRS_o_ai vec_ld(int __a,
+                                                   const vector pixel *__b) {
   return (vector pixel)__builtin_altivec_lvx(__a, __b);
 }
 
-static vector int __ATTRS_o_ai vec_ld(int __a, const vector int *__b) {
+static __inline__ vector int __ATTRS_o_ai vec_ld(int __a,
+                                                 const vector int *__b) {
   return (vector int)__builtin_altivec_lvx(__a, __b);
 }
 
-static vector int __ATTRS_o_ai vec_ld(int __a, const int *__b) {
+static __inline__ vector int __ATTRS_o_ai vec_ld(int __a, const int *__b) {
   return (vector int)__builtin_altivec_lvx(__a, __b);
 }
 
-static vector unsigned int __ATTRS_o_ai vec_ld(int __a,
-                                               const vector unsigned int *__b) {
+static __inline__ vector unsigned int __ATTRS_o_ai
+vec_ld(int __a, const vector unsigned int *__b) {
   return (vector unsigned int)__builtin_altivec_lvx(__a, __b);
 }
 
-static vector unsigned int __ATTRS_o_ai vec_ld(int __a,
-                                               const unsigned int *__b) {
+static __inline__ vector unsigned int __ATTRS_o_ai
+vec_ld(int __a, const unsigned int *__b) {
   return (vector unsigned int)__builtin_altivec_lvx(__a, __b);
 }
 
-static vector bool int __ATTRS_o_ai vec_ld(int __a,
-                                           const vector bool int *__b) {
+static __inline__ vector bool int __ATTRS_o_ai
+vec_ld(int __a, const vector bool int *__b) {
   return (vector bool int)__builtin_altivec_lvx(__a, __b);
 }
 
-static vector float __ATTRS_o_ai vec_ld(int __a, const vector float *__b) {
+static __inline__ vector float __ATTRS_o_ai vec_ld(int __a,
+                                                   const vector float *__b) {
   return (vector float)__builtin_altivec_lvx(__a, __b);
 }
 
-static vector float __ATTRS_o_ai vec_ld(int __a, const float *__b) {
+static __inline__ vector float __ATTRS_o_ai vec_ld(int __a, const float *__b) {
   return (vector float)__builtin_altivec_lvx(__a, __b);
 }
 
 /* vec_lvx */
 
-static vector signed char __ATTRS_o_ai vec_lvx(int __a,
-                                               const vector signed char *__b) {
+static __inline__ vector signed char __ATTRS_o_ai
+vec_lvx(int __a, const vector signed char *__b) {
   return (vector signed char)__builtin_altivec_lvx(__a, __b);
 }
 
-static vector signed char __ATTRS_o_ai vec_lvx(int __a,
-                                               const signed char *__b) {
+static __inline__ vector signed char __ATTRS_o_ai
+vec_lvx(int __a, const signed char *__b) {
   return (vector signed char)__builtin_altivec_lvx(__a, __b);
 }
 
-static vector unsigned char __ATTRS_o_ai
+static __inline__ vector unsigned char __ATTRS_o_ai
 vec_lvx(int __a, const vector unsigned char *__b) {
   return (vector unsigned char)__builtin_altivec_lvx(__a, __b);
 }
 
-static vector unsigned char __ATTRS_o_ai vec_lvx(int __a,
-                                                 const unsigned char *__b) {
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_lvx(int __a, const unsigned char *__b) {
   return (vector unsigned char)__builtin_altivec_lvx(__a, __b);
 }
 
-static vector bool char __ATTRS_o_ai vec_lvx(int __a,
-                                             const vector bool char *__b) {
+static __inline__ vector bool char __ATTRS_o_ai
+vec_lvx(int __a, const vector bool char *__b) {
   return (vector bool char)__builtin_altivec_lvx(__a, __b);
 }
 
-static vector short __ATTRS_o_ai vec_lvx(int __a, const vector short *__b) {
+static __inline__ vector short __ATTRS_o_ai vec_lvx(int __a,
+                                                    const vector short *__b) {
   return (vector short)__builtin_altivec_lvx(__a, __b);
 }
 
-static vector short __ATTRS_o_ai vec_lvx(int __a, const short *__b) {
+static __inline__ vector short __ATTRS_o_ai vec_lvx(int __a, const short *__b) {
   return (vector short)__builtin_altivec_lvx(__a, __b);
 }
 
-static vector unsigned short __ATTRS_o_ai
+static __inline__ vector unsigned short __ATTRS_o_ai
 vec_lvx(int __a, const vector unsigned short *__b) {
   return (vector unsigned short)__builtin_altivec_lvx(__a, __b);
 }
 
-static vector unsigned short __ATTRS_o_ai vec_lvx(int __a,
-                                                  const unsigned short *__b) {
+static __inline__ vector unsigned short __ATTRS_o_ai
+vec_lvx(int __a, const unsigned short *__b) {
   return (vector unsigned short)__builtin_altivec_lvx(__a, __b);
 }
 
-static vector bool short __ATTRS_o_ai vec_lvx(int __a,
-                                              const vector bool short *__b) {
+static __inline__ vector bool short __ATTRS_o_ai
+vec_lvx(int __a, const vector bool short *__b) {
   return (vector bool short)__builtin_altivec_lvx(__a, __b);
 }
 
-static vector pixel __ATTRS_o_ai vec_lvx(int __a, const vector pixel *__b) {
+static __inline__ vector pixel __ATTRS_o_ai vec_lvx(int __a,
+                                                    const vector pixel *__b) {
   return (vector pixel)__builtin_altivec_lvx(__a, __b);
 }
 
-static vector int __ATTRS_o_ai vec_lvx(int __a, const vector int *__b) {
+static __inline__ vector int __ATTRS_o_ai vec_lvx(int __a,
+                                                  const vector int *__b) {
   return (vector int)__builtin_altivec_lvx(__a, __b);
 }
 
-static vector int __ATTRS_o_ai vec_lvx(int __a, const int *__b) {
+static __inline__ vector int __ATTRS_o_ai vec_lvx(int __a, const int *__b) {
   return (vector int)__builtin_altivec_lvx(__a, __b);
 }
 
-static vector unsigned int __ATTRS_o_ai
+static __inline__ vector unsigned int __ATTRS_o_ai
 vec_lvx(int __a, const vector unsigned int *__b) {
   return (vector unsigned int)__builtin_altivec_lvx(__a, __b);
 }
 
-static vector unsigned int __ATTRS_o_ai vec_lvx(int __a,
-                                                const unsigned int *__b) {
+static __inline__ vector unsigned int __ATTRS_o_ai
+vec_lvx(int __a, const unsigned int *__b) {
   return (vector unsigned int)__builtin_altivec_lvx(__a, __b);
 }
 
-static vector bool int __ATTRS_o_ai vec_lvx(int __a,
-                                            const vector bool int *__b) {
+static __inline__ vector bool int __ATTRS_o_ai
+vec_lvx(int __a, const vector bool int *__b) {
   return (vector bool int)__builtin_altivec_lvx(__a, __b);
 }
 
-static vector float __ATTRS_o_ai vec_lvx(int __a, const vector float *__b) {
+static __inline__ vector float __ATTRS_o_ai vec_lvx(int __a,
+                                                    const vector float *__b) {
   return (vector float)__builtin_altivec_lvx(__a, __b);
 }
 
-static vector float __ATTRS_o_ai vec_lvx(int __a, const float *__b) {
+static __inline__ vector float __ATTRS_o_ai vec_lvx(int __a, const float *__b) {
   return (vector float)__builtin_altivec_lvx(__a, __b);
 }
 
 /* vec_lde */
 
-static vector signed char __ATTRS_o_ai vec_lde(int __a,
-                                               const signed char *__b) {
+static __inline__ vector signed char __ATTRS_o_ai
+vec_lde(int __a, const signed char *__b) {
   return (vector signed char)__builtin_altivec_lvebx(__a, __b);
 }
 
-static vector unsigned char __ATTRS_o_ai vec_lde(int __a,
-                                                 const unsigned char *__b) {
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_lde(int __a, const unsigned char *__b) {
   return (vector unsigned char)__builtin_altivec_lvebx(__a, __b);
 }
 
-static vector short __ATTRS_o_ai vec_lde(int __a, const short *__b) {
+static __inline__ vector short __ATTRS_o_ai vec_lde(int __a, const short *__b) {
   return (vector short)__builtin_altivec_lvehx(__a, __b);
 }
 
-static vector unsigned short __ATTRS_o_ai vec_lde(int __a,
-                                                  const unsigned short *__b) {
+static __inline__ vector unsigned short __ATTRS_o_ai
+vec_lde(int __a, const unsigned short *__b) {
   return (vector unsigned short)__builtin_altivec_lvehx(__a, __b);
 }
 
-static vector int __ATTRS_o_ai vec_lde(int __a, const int *__b) {
+static __inline__ vector int __ATTRS_o_ai vec_lde(int __a, const int *__b) {
   return (vector int)__builtin_altivec_lvewx(__a, __b);
 }
 
-static vector unsigned int __ATTRS_o_ai vec_lde(int __a,
-                                                const unsigned int *__b) {
+static __inline__ vector unsigned int __ATTRS_o_ai
+vec_lde(int __a, const unsigned int *__b) {
   return (vector unsigned int)__builtin_altivec_lvewx(__a, __b);
 }
 
-static vector float __ATTRS_o_ai vec_lde(int __a, const float *__b) {
+static __inline__ vector float __ATTRS_o_ai vec_lde(int __a, const float *__b) {
   return (vector float)__builtin_altivec_lvewx(__a, __b);
 }
 
 /* vec_lvebx */
 
-static vector signed char __ATTRS_o_ai vec_lvebx(int __a,
-                                                 const signed char *__b) {
+static __inline__ vector signed char __ATTRS_o_ai
+vec_lvebx(int __a, const signed char *__b) {
   return (vector signed char)__builtin_altivec_lvebx(__a, __b);
 }
 
-static vector unsigned char __ATTRS_o_ai vec_lvebx(int __a,
-                                                   const unsigned char *__b) {
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_lvebx(int __a, const unsigned char *__b) {
   return (vector unsigned char)__builtin_altivec_lvebx(__a, __b);
 }
 
 /* vec_lvehx */
 
-static vector short __ATTRS_o_ai vec_lvehx(int __a, const short *__b) {
+static __inline__ vector short __ATTRS_o_ai vec_lvehx(int __a,
+                                                      const short *__b) {
   return (vector short)__builtin_altivec_lvehx(__a, __b);
 }
 
-static vector unsigned short __ATTRS_o_ai vec_lvehx(int __a,
-                                                    const unsigned short *__b) {
+static __inline__ vector unsigned short __ATTRS_o_ai
+vec_lvehx(int __a, const unsigned short *__b) {
   return (vector unsigned short)__builtin_altivec_lvehx(__a, __b);
 }
 
 /* vec_lvewx */
 
-static vector int __ATTRS_o_ai vec_lvewx(int __a, const int *__b) {
+static __inline__ vector int __ATTRS_o_ai vec_lvewx(int __a, const int *__b) {
   return (vector int)__builtin_altivec_lvewx(__a, __b);
 }
 
-static vector unsigned int __ATTRS_o_ai vec_lvewx(int __a,
-                                                  const unsigned int *__b) {
+static __inline__ vector unsigned int __ATTRS_o_ai
+vec_lvewx(int __a, const unsigned int *__b) {
   return (vector unsigned int)__builtin_altivec_lvewx(__a, __b);
 }
 
-static vector float __ATTRS_o_ai vec_lvewx(int __a, const float *__b) {
+static __inline__ vector float __ATTRS_o_ai vec_lvewx(int __a,
+                                                      const float *__b) {
   return (vector float)__builtin_altivec_lvewx(__a, __b);
 }
 
 /* vec_ldl */
 
-static vector signed char __ATTRS_o_ai vec_ldl(int __a,
-                                               const vector signed char *__b) {
+static __inline__ vector signed char __ATTRS_o_ai
+vec_ldl(int __a, const vector signed char *__b) {
   return (vector signed char)__builtin_altivec_lvxl(__a, __b);
 }
 
-static vector signed char __ATTRS_o_ai vec_ldl(int __a,
-                                               const signed char *__b) {
+static __inline__ vector signed char __ATTRS_o_ai
+vec_ldl(int __a, const signed char *__b) {
   return (vector signed char)__builtin_altivec_lvxl(__a, __b);
 }
 
-static vector unsigned char __ATTRS_o_ai
+static __inline__ vector unsigned char __ATTRS_o_ai
 vec_ldl(int __a, const vector unsigned char *__b) {
   return (vector unsigned char)__builtin_altivec_lvxl(__a, __b);
 }
 
-static vector unsigned char __ATTRS_o_ai vec_ldl(int __a,
-                                                 const unsigned char *__b) {
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_ldl(int __a, const unsigned char *__b) {
   return (vector unsigned char)__builtin_altivec_lvxl(__a, __b);
 }
 
-static vector bool char __ATTRS_o_ai vec_ldl(int __a,
-                                             const vector bool char *__b) {
+static __inline__ vector bool char __ATTRS_o_ai
+vec_ldl(int __a, const vector bool char *__b) {
   return (vector bool char)__builtin_altivec_lvxl(__a, __b);
 }
 
-static vector short __ATTRS_o_ai vec_ldl(int __a, const vector short *__b) {
+static __inline__ vector short __ATTRS_o_ai vec_ldl(int __a,
+                                                    const vector short *__b) {
   return (vector short)__builtin_altivec_lvxl(__a, __b);
 }
 
-static vector short __ATTRS_o_ai vec_ldl(int __a, const short *__b) {
+static __inline__ vector short __ATTRS_o_ai vec_ldl(int __a, const short *__b) {
   return (vector short)__builtin_altivec_lvxl(__a, __b);
 }
 
-static vector unsigned short __ATTRS_o_ai
+static __inline__ vector unsigned short __ATTRS_o_ai
 vec_ldl(int __a, const vector unsigned short *__b) {
   return (vector unsigned short)__builtin_altivec_lvxl(__a, __b);
 }
 
-static vector unsigned short __ATTRS_o_ai vec_ldl(int __a,
-                                                  const unsigned short *__b) {
+static __inline__ vector unsigned short __ATTRS_o_ai
+vec_ldl(int __a, const unsigned short *__b) {
   return (vector unsigned short)__builtin_altivec_lvxl(__a, __b);
 }
 
-static vector bool short __ATTRS_o_ai vec_ldl(int __a,
-                                              const vector bool short *__b) {
+static __inline__ vector bool short __ATTRS_o_ai
+vec_ldl(int __a, const vector bool short *__b) {
   return (vector bool short)__builtin_altivec_lvxl(__a, __b);
 }
 
-static vector pixel __ATTRS_o_ai vec_ldl(int __a, const vector pixel *__b) {
+static __inline__ vector pixel __ATTRS_o_ai vec_ldl(int __a,
+                                                    const vector pixel *__b) {
   return (vector pixel short)__builtin_altivec_lvxl(__a, __b);
 }
 
-static vector int __ATTRS_o_ai vec_ldl(int __a, const vector int *__b) {
+static __inline__ vector int __ATTRS_o_ai vec_ldl(int __a,
+                                                  const vector int *__b) {
   return (vector int)__builtin_altivec_lvxl(__a, __b);
 }
 
-static vector int __ATTRS_o_ai vec_ldl(int __a, const int *__b) {
+static __inline__ vector int __ATTRS_o_ai vec_ldl(int __a, const int *__b) {
   return (vector int)__builtin_altivec_lvxl(__a, __b);
 }
 
-static vector unsigned int __ATTRS_o_ai
+static __inline__ vector unsigned int __ATTRS_o_ai
 vec_ldl(int __a, const vector unsigned int *__b) {
   return (vector unsigned int)__builtin_altivec_lvxl(__a, __b);
 }
 
-static vector unsigned int __ATTRS_o_ai vec_ldl(int __a,
-                                                const unsigned int *__b) {
+static __inline__ vector unsigned int __ATTRS_o_ai
+vec_ldl(int __a, const unsigned int *__b) {
   return (vector unsigned int)__builtin_altivec_lvxl(__a, __b);
 }
 
-static vector bool int __ATTRS_o_ai vec_ldl(int __a,
-                                            const vector bool int *__b) {
+static __inline__ vector bool int __ATTRS_o_ai
+vec_ldl(int __a, const vector bool int *__b) {
   return (vector bool int)__builtin_altivec_lvxl(__a, __b);
 }
 
-static vector float __ATTRS_o_ai vec_ldl(int __a, const vector float *__b) {
+static __inline__ vector float __ATTRS_o_ai vec_ldl(int __a,
+                                                    const vector float *__b) {
   return (vector float)__builtin_altivec_lvxl(__a, __b);
 }
 
-static vector float __ATTRS_o_ai vec_ldl(int __a, const float *__b) {
+static __inline__ vector float __ATTRS_o_ai vec_ldl(int __a, const float *__b) {
   return (vector float)__builtin_altivec_lvxl(__a, __b);
 }
 
 /* vec_lvxl */
 
-static vector signed char __ATTRS_o_ai vec_lvxl(int __a,
-                                                const vector signed char *__b) {
+static __inline__ vector signed char __ATTRS_o_ai
+vec_lvxl(int __a, const vector signed char *__b) {
   return (vector signed char)__builtin_altivec_lvxl(__a, __b);
 }
 
-static vector signed char __ATTRS_o_ai vec_lvxl(int __a,
-                                                const signed char *__b) {
+static __inline__ vector signed char __ATTRS_o_ai
+vec_lvxl(int __a, const signed char *__b) {
   return (vector signed char)__builtin_altivec_lvxl(__a, __b);
 }
 
-static vector unsigned char __ATTRS_o_ai
+static __inline__ vector unsigned char __ATTRS_o_ai
 vec_lvxl(int __a, const vector unsigned char *__b) {
   return (vector unsigned char)__builtin_altivec_lvxl(__a, __b);
 }
 
-static vector unsigned char __ATTRS_o_ai vec_lvxl(int __a,
-                                                  const unsigned char *__b) {
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_lvxl(int __a, const unsigned char *__b) {
   return (vector unsigned char)__builtin_altivec_lvxl(__a, __b);
 }
 
-static vector bool char __ATTRS_o_ai vec_lvxl(int __a,
-                                              const vector bool char *__b) {
+static __inline__ vector bool char __ATTRS_o_ai
+vec_lvxl(int __a, const vector bool char *__b) {
   return (vector bool char)__builtin_altivec_lvxl(__a, __b);
 }
 
-static vector short __ATTRS_o_ai vec_lvxl(int __a, const vector short *__b) {
+static __inline__ vector short __ATTRS_o_ai vec_lvxl(int __a,
+                                                     const vector short *__b) {
   return (vector short)__builtin_altivec_lvxl(__a, __b);
 }
 
-static vector short __ATTRS_o_ai vec_lvxl(int __a, const short *__b) {
+static __inline__ vector short __ATTRS_o_ai vec_lvxl(int __a,
+                                                     const short *__b) {
   return (vector short)__builtin_altivec_lvxl(__a, __b);
 }
 
-static vector unsigned short __ATTRS_o_ai
+static __inline__ vector unsigned short __ATTRS_o_ai
 vec_lvxl(int __a, const vector unsigned short *__b) {
   return (vector unsigned short)__builtin_altivec_lvxl(__a, __b);
 }
 
-static vector unsigned short __ATTRS_o_ai vec_lvxl(int __a,
-                                                   const unsigned short *__b) {
+static __inline__ vector unsigned short __ATTRS_o_ai
+vec_lvxl(int __a, const unsigned short *__b) {
   return (vector unsigned short)__builtin_altivec_lvxl(__a, __b);
 }
 
-static vector bool short __ATTRS_o_ai vec_lvxl(int __a,
-                                               const vector bool short *__b) {
+static __inline__ vector bool short __ATTRS_o_ai
+vec_lvxl(int __a, const vector bool short *__b) {
   return (vector bool short)__builtin_altivec_lvxl(__a, __b);
 }
 
-static vector pixel __ATTRS_o_ai vec_lvxl(int __a, const vector pixel *__b) {
+static __inline__ vector pixel __ATTRS_o_ai vec_lvxl(int __a,
+                                                     const vector pixel *__b) {
   return (vector pixel)__builtin_altivec_lvxl(__a, __b);
 }
 
-static vector int __ATTRS_o_ai vec_lvxl(int __a, const vector int *__b) {
+static __inline__ vector int __ATTRS_o_ai vec_lvxl(int __a,
+                                                   const vector int *__b) {
   return (vector int)__builtin_altivec_lvxl(__a, __b);
 }
 
-static vector int __ATTRS_o_ai vec_lvxl(int __a, const int *__b) {
+static __inline__ vector int __ATTRS_o_ai vec_lvxl(int __a, const int *__b) {
   return (vector int)__builtin_altivec_lvxl(__a, __b);
 }
 
-static vector unsigned int __ATTRS_o_ai
+static __inline__ vector unsigned int __ATTRS_o_ai
 vec_lvxl(int __a, const vector unsigned int *__b) {
   return (vector unsigned int)__builtin_altivec_lvxl(__a, __b);
 }
 
-static vector unsigned int __ATTRS_o_ai vec_lvxl(int __a,
-                                                 const unsigned int *__b) {
+static __inline__ vector unsigned int __ATTRS_o_ai
+vec_lvxl(int __a, const unsigned int *__b) {
   return (vector unsigned int)__builtin_altivec_lvxl(__a, __b);
 }
 
-static vector bool int __ATTRS_o_ai vec_lvxl(int __a,
-                                             const vector bool int *__b) {
+static __inline__ vector bool int __ATTRS_o_ai
+vec_lvxl(int __a, const vector bool int *__b) {
   return (vector bool int)__builtin_altivec_lvxl(__a, __b);
 }
 
-static vector float __ATTRS_o_ai vec_lvxl(int __a, const vector float *__b) {
+static __inline__ vector float __ATTRS_o_ai vec_lvxl(int __a,
+                                                     const vector float *__b) {
   return (vector float)__builtin_altivec_lvxl(__a, __b);
 }
 
-static vector float __ATTRS_o_ai vec_lvxl(int __a, const float *__b) {
+static __inline__ vector float __ATTRS_o_ai vec_lvxl(int __a,
+                                                     const float *__b) {
   return (vector float)__builtin_altivec_lvxl(__a, __b);
 }
 
 /* vec_loge */
 
-static vector float __attribute__((__always_inline__))
+static __inline__ vector float __attribute__((__always_inline__))
 vec_loge(vector float __a) {
   return __builtin_altivec_vlogefp(__a);
 }
 
 /* vec_vlogefp */
 
-static vector float __attribute__((__always_inline__))
+static __inline__ vector float __attribute__((__always_inline__))
 vec_vlogefp(vector float __a) {
   return __builtin_altivec_vlogefp(__a);
 }
@@ -2624,7 +2694,7 @@ vec_vlogefp(vector float __a) {
 /* vec_lvsl */
 
 #ifdef __LITTLE_ENDIAN__
-static vector unsigned char __ATTRS_o_ai
+static __inline__ vector unsigned char __ATTRS_o_ai
     __attribute__((__deprecated__("use assignment for unaligned little endian \
 loads/stores"))) vec_lvsl(int __a, const signed char *__b) {
   vector unsigned char mask =
@@ -2634,14 +2704,14 @@ loads/stores"))) vec_lvsl(int __a, const signed char *__b) {
   return vec_perm(mask, mask, reverse);
 }
 #else
-static vector unsigned char __ATTRS_o_ai vec_lvsl(int __a,
-                                                  const signed char *__b) {
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_lvsl(int __a, const signed char *__b) {
   return (vector unsigned char)__builtin_altivec_lvsl(__a, __b);
 }
 #endif
 
 #ifdef __LITTLE_ENDIAN__
-static vector unsigned char __ATTRS_o_ai
+static __inline__ vector unsigned char __ATTRS_o_ai
     __attribute__((__deprecated__("use assignment for unaligned little endian \
 loads/stores"))) vec_lvsl(int __a, const unsigned char *__b) {
   vector unsigned char mask =
@@ -2651,14 +2721,14 @@ loads/stores"))) vec_lvsl(int __a, const unsigned char *__b) {
   return vec_perm(mask, mask, reverse);
 }
 #else
-static vector unsigned char __ATTRS_o_ai vec_lvsl(int __a,
-                                                  const unsigned char *__b) {
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_lvsl(int __a, const unsigned char *__b) {
   return (vector unsigned char)__builtin_altivec_lvsl(__a, __b);
 }
 #endif
 
 #ifdef __LITTLE_ENDIAN__
-static vector unsigned char __ATTRS_o_ai
+static __inline__ vector unsigned char __ATTRS_o_ai
     __attribute__((__deprecated__("use assignment for unaligned little endian \
 loads/stores"))) vec_lvsl(int __a, const short *__b) {
   vector unsigned char mask =
@@ -2668,13 +2738,14 @@ loads/stores"))) vec_lvsl(int __a, const short *__b) {
   return vec_perm(mask, mask, reverse);
 }
 #else
-static vector unsigned char __ATTRS_o_ai vec_lvsl(int __a, const short *__b) {
+static __inline__ vector unsigned char __ATTRS_o_ai vec_lvsl(int __a,
+                                                             const short *__b) {
   return (vector unsigned char)__builtin_altivec_lvsl(__a, __b);
 }
 #endif
 
 #ifdef __LITTLE_ENDIAN__
-static vector unsigned char __ATTRS_o_ai
+static __inline__ vector unsigned char __ATTRS_o_ai
     __attribute__((__deprecated__("use assignment for unaligned little endian \
 loads/stores"))) vec_lvsl(int __a, const unsigned short *__b) {
   vector unsigned char mask =
@@ -2684,14 +2755,14 @@ loads/stores"))) vec_lvsl(int __a, const unsigned short *__b) {
   return vec_perm(mask, mask, reverse);
 }
 #else
-static vector unsigned char __ATTRS_o_ai vec_lvsl(int __a,
-                                                  const unsigned short *__b) {
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_lvsl(int __a, const unsigned short *__b) {
   return (vector unsigned char)__builtin_altivec_lvsl(__a, __b);
 }
 #endif
 
 #ifdef __LITTLE_ENDIAN__
-static vector unsigned char __ATTRS_o_ai
+static __inline__ vector unsigned char __ATTRS_o_ai
     __attribute__((__deprecated__("use assignment for unaligned little endian \
 loads/stores"))) vec_lvsl(int __a, const int *__b) {
   vector unsigned char mask =
@@ -2701,13 +2772,14 @@ loads/stores"))) vec_lvsl(int __a, const int *__b) {
   return vec_perm(mask, mask, reverse);
 }
 #else
-static vector unsigned char __ATTRS_o_ai vec_lvsl(int __a, const int *__b) {
+static __inline__ vector unsigned char __ATTRS_o_ai vec_lvsl(int __a,
+                                                             const int *__b) {
   return (vector unsigned char)__builtin_altivec_lvsl(__a, __b);
 }
 #endif
 
 #ifdef __LITTLE_ENDIAN__
-static vector unsigned char __ATTRS_o_ai
+static __inline__ vector unsigned char __ATTRS_o_ai
     __attribute__((__deprecated__("use assignment for unaligned little endian \
 loads/stores"))) vec_lvsl(int __a, const unsigned int *__b) {
   vector unsigned char mask =
@@ -2717,14 +2789,14 @@ loads/stores"))) vec_lvsl(int __a, const unsigned int *__b) {
   return vec_perm(mask, mask, reverse);
 }
 #else
-static vector unsigned char __ATTRS_o_ai vec_lvsl(int __a,
-                                                  const unsigned int *__b) {
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_lvsl(int __a, const unsigned int *__b) {
   return (vector unsigned char)__builtin_altivec_lvsl(__a, __b);
 }
 #endif
 
 #ifdef __LITTLE_ENDIAN__
-static vector unsigned char __ATTRS_o_ai
+static __inline__ vector unsigned char __ATTRS_o_ai
     __attribute__((__deprecated__("use assignment for unaligned little endian \
 loads/stores"))) vec_lvsl(int __a, const float *__b) {
   vector unsigned char mask =
@@ -2734,7 +2806,8 @@ loads/stores"))) vec_lvsl(int __a, const float *__b) {
   return vec_perm(mask, mask, reverse);
 }
 #else
-static vector unsigned char __ATTRS_o_ai vec_lvsl(int __a, const float *__b) {
+static __inline__ vector unsigned char __ATTRS_o_ai vec_lvsl(int __a,
+                                                             const float *__b) {
   return (vector unsigned char)__builtin_altivec_lvsl(__a, __b);
 }
 #endif
@@ -2742,7 +2815,7 @@ static vector unsigned char __ATTRS_o_ai vec_lvsl(int __a, const float *__b) {
 /* vec_lvsr */
 
 #ifdef __LITTLE_ENDIAN__
-static vector unsigned char __ATTRS_o_ai
+static __inline__ vector unsigned char __ATTRS_o_ai
     __attribute__((__deprecated__("use assignment for unaligned little endian \
 loads/stores"))) vec_lvsr(int __a, const signed char *__b) {
   vector unsigned char mask =
@@ -2752,14 +2825,14 @@ loads/stores"))) vec_lvsr(int __a, const signed char *__b) {
   return vec_perm(mask, mask, reverse);
 }
 #else
-static vector unsigned char __ATTRS_o_ai vec_lvsr(int __a,
-                                                  const signed char *__b) {
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_lvsr(int __a, const signed char *__b) {
   return (vector unsigned char)__builtin_altivec_lvsr(__a, __b);
 }
 #endif
 
 #ifdef __LITTLE_ENDIAN__
-static vector unsigned char __ATTRS_o_ai
+static __inline__ vector unsigned char __ATTRS_o_ai
     __attribute__((__deprecated__("use assignment for unaligned little endian \
 loads/stores"))) vec_lvsr(int __a, const unsigned char *__b) {
   vector unsigned char mask =
@@ -2769,14 +2842,14 @@ loads/stores"))) vec_lvsr(int __a, const unsigned char *__b) {
   return vec_perm(mask, mask, reverse);
 }
 #else
-static vector unsigned char __ATTRS_o_ai vec_lvsr(int __a,
-                                                  const unsigned char *__b) {
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_lvsr(int __a, const unsigned char *__b) {
   return (vector unsigned char)__builtin_altivec_lvsr(__a, __b);
 }
 #endif
 
 #ifdef __LITTLE_ENDIAN__
-static vector unsigned char __ATTRS_o_ai
+static __inline__ vector unsigned char __ATTRS_o_ai
     __attribute__((__deprecated__("use assignment for unaligned little endian \
 loads/stores"))) vec_lvsr(int __a, const short *__b) {
   vector unsigned char mask =
@@ -2786,13 +2859,14 @@ loads/stores"))) vec_lvsr(int __a, const short *__b) {
   return vec_perm(mask, mask, reverse);
 }
 #else
-static vector unsigned char __ATTRS_o_ai vec_lvsr(int __a, const short *__b) {
+static __inline__ vector unsigned char __ATTRS_o_ai vec_lvsr(int __a,
+                                                             const short *__b) {
   return (vector unsigned char)__builtin_altivec_lvsr(__a, __b);
 }
 #endif
 
 #ifdef __LITTLE_ENDIAN__
-static vector unsigned char __ATTRS_o_ai
+static __inline__ vector unsigned char __ATTRS_o_ai
     __attribute__((__deprecated__("use assignment for unaligned little endian \
 loads/stores"))) vec_lvsr(int __a, const unsigned short *__b) {
   vector unsigned char mask =
@@ -2802,14 +2876,14 @@ loads/stores"))) vec_lvsr(int __a, const unsigned short *__b) {
   return vec_perm(mask, mask, reverse);
 }
 #else
-static vector unsigned char __ATTRS_o_ai vec_lvsr(int __a,
-                                                  const unsigned short *__b) {
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_lvsr(int __a, const unsigned short *__b) {
   return (vector unsigned char)__builtin_altivec_lvsr(__a, __b);
 }
 #endif
 
 #ifdef __LITTLE_ENDIAN__
-static vector unsigned char __ATTRS_o_ai
+static __inline__ vector unsigned char __ATTRS_o_ai
     __attribute__((__deprecated__("use assignment for unaligned little endian \
 loads/stores"))) vec_lvsr(int __a, const int *__b) {
   vector unsigned char mask =
@@ -2819,13 +2893,14 @@ loads/stores"))) vec_lvsr(int __a, const int *__b) {
   return vec_perm(mask, mask, reverse);
 }
 #else
-static vector unsigned char __ATTRS_o_ai vec_lvsr(int __a, const int *__b) {
+static __inline__ vector unsigned char __ATTRS_o_ai vec_lvsr(int __a,
+                                                             const int *__b) {
   return (vector unsigned char)__builtin_altivec_lvsr(__a, __b);
 }
 #endif
 
 #ifdef __LITTLE_ENDIAN__
-static vector unsigned char __ATTRS_o_ai
+static __inline__ vector unsigned char __ATTRS_o_ai
     __attribute__((__deprecated__("use assignment for unaligned little endian \
 loads/stores"))) vec_lvsr(int __a, const unsigned int *__b) {
   vector unsigned char mask =
@@ -2835,14 +2910,14 @@ loads/stores"))) vec_lvsr(int __a, const unsigned int *__b) {
   return vec_perm(mask, mask, reverse);
 }
 #else
-static vector unsigned char __ATTRS_o_ai vec_lvsr(int __a,
-                                                  const unsigned int *__b) {
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_lvsr(int __a, const unsigned int *__b) {
   return (vector unsigned char)__builtin_altivec_lvsr(__a, __b);
 }
 #endif
 
 #ifdef __LITTLE_ENDIAN__
-static vector unsigned char __ATTRS_o_ai
+static __inline__ vector unsigned char __ATTRS_o_ai
     __attribute__((__deprecated__("use assignment for unaligned little endian \
 loads/stores"))) vec_lvsr(int __a, const float *__b) {
   vector unsigned char mask =
@@ -2852,47 +2927,48 @@ loads/stores"))) vec_lvsr(int __a, const float *__b) {
   return vec_perm(mask, mask, reverse);
 }
 #else
-static vector unsigned char __ATTRS_o_ai vec_lvsr(int __a, const float *__b) {
+static __inline__ vector unsigned char __ATTRS_o_ai vec_lvsr(int __a,
+                                                             const float *__b) {
   return (vector unsigned char)__builtin_altivec_lvsr(__a, __b);
 }
 #endif
 
 /* vec_madd */
-static vector signed short __ATTRS_o_ai
+static __inline__ vector signed short __ATTRS_o_ai
 vec_mladd(vector signed short, vector signed short, vector signed short);
-static vector signed short __ATTRS_o_ai
+static __inline__ vector signed short __ATTRS_o_ai
 vec_mladd(vector signed short, vector unsigned short, vector unsigned short);
-static vector signed short __ATTRS_o_ai
+static __inline__ vector signed short __ATTRS_o_ai
 vec_mladd(vector unsigned short, vector signed short, vector signed short);
-static vector unsigned short __ATTRS_o_ai
+static __inline__ vector unsigned short __ATTRS_o_ai
 vec_mladd(vector unsigned short, vector unsigned short, vector unsigned short);
 
-static vector signed short __ATTRS_o_ai
-vec_madd(vector signed short __a, vector signed short __b,
-         vector signed short __c) {
-  return  vec_mladd(__a, __b, __c);
+static __inline__ vector signed short __ATTRS_o_ai vec_madd(
+    vector signed short __a, vector signed short __b, vector signed short __c) {
+  return vec_mladd(__a, __b, __c);
 }
 
-static vector signed short __ATTRS_o_ai
+static __inline__ vector signed short __ATTRS_o_ai
 vec_madd(vector signed short __a, vector unsigned short __b,
          vector unsigned short __c) {
   return vec_mladd(__a, __b, __c);
 }
 
-static vector signed short __ATTRS_o_ai
+static __inline__ vector signed short __ATTRS_o_ai
 vec_madd(vector unsigned short __a, vector signed short __b,
          vector signed short __c) {
   return vec_mladd(__a, __b, __c);
 }
 
-static vector unsigned short __ATTRS_o_ai
+static __inline__ vector unsigned short __ATTRS_o_ai
 vec_madd(vector unsigned short __a, vector unsigned short __b,
          vector unsigned short __c) {
   return vec_mladd(__a, __b, __c);
 }
 
-static vector float __ATTRS_o_ai
-vec_madd(vector float __a, vector float __b, vector float __c) {
+static __inline__ vector float __ATTRS_o_ai vec_madd(vector float __a,
+                                                     vector float __b,
+                                                     vector float __c) {
 #ifdef __VSX__
   return __builtin_vsx_xvmaddasp(__a, __b, __c);
 #else
@@ -2901,29 +2977,30 @@ vec_madd(vector float __a, vector float __b, vector float __c) {
 }
 
 #ifdef __VSX__
-static vector double __ATTRS_o_ai
-vec_madd(vector double __a, vector double __b, vector double __c) {
+static __inline__ vector double __ATTRS_o_ai vec_madd(vector double __a,
+                                                      vector double __b,
+                                                      vector double __c) {
   return __builtin_vsx_xvmaddadp(__a, __b, __c);
 }
 #endif
 
 /* vec_vmaddfp */
 
-static vector float __attribute__((__always_inline__))
+static __inline__ vector float __attribute__((__always_inline__))
 vec_vmaddfp(vector float __a, vector float __b, vector float __c) {
   return __builtin_altivec_vmaddfp(__a, __b, __c);
 }
 
 /* vec_madds */
 
-static vector signed short __attribute__((__always_inline__))
+static __inline__ vector signed short __attribute__((__always_inline__))
 vec_madds(vector signed short __a, vector signed short __b,
           vector signed short __c) {
   return __builtin_altivec_vmhaddshs(__a, __b, __c);
 }
 
 /* vec_vmhaddshs */
-static vector signed short __attribute__((__always_inline__))
+static __inline__ vector signed short __attribute__((__always_inline__))
 vec_vmhaddshs(vector signed short __a, vector signed short __b,
               vector signed short __c) {
   return __builtin_altivec_vmhaddshs(__a, __b, __c);
@@ -2932,138 +3009,145 @@ vec_vmhaddshs(vector signed short __a, vector signed short __b,
 /* vec_msub */
 
 #ifdef __VSX__
-static vector float __ATTRS_o_ai
-vec_msub(vector float __a, vector float __b, vector float __c) {
+static __inline__ vector float __ATTRS_o_ai vec_msub(vector float __a,
+                                                     vector float __b,
+                                                     vector float __c) {
   return __builtin_vsx_xvmsubasp(__a, __b, __c);
 }
 
-static vector double __ATTRS_o_ai
-vec_msub(vector double __a, vector double __b, vector double __c) {
+static __inline__ vector double __ATTRS_o_ai vec_msub(vector double __a,
+                                                      vector double __b,
+                                                      vector double __c) {
   return __builtin_vsx_xvmsubadp(__a, __b, __c);
 }
 #endif
 
 /* vec_max */
 
-static vector signed char __ATTRS_o_ai vec_max(vector signed char __a,
-                                               vector signed char __b) {
+static __inline__ vector signed char __ATTRS_o_ai
+vec_max(vector signed char __a, vector signed char __b) {
   return __builtin_altivec_vmaxsb(__a, __b);
 }
 
-static vector signed char __ATTRS_o_ai vec_max(vector bool char __a,
-                                               vector signed char __b) {
+static __inline__ vector signed char __ATTRS_o_ai
+vec_max(vector bool char __a, vector signed char __b) {
   return __builtin_altivec_vmaxsb((vector signed char)__a, __b);
 }
 
-static vector signed char __ATTRS_o_ai vec_max(vector signed char __a,
-                                               vector bool char __b) {
+static __inline__ vector signed char __ATTRS_o_ai
+vec_max(vector signed char __a, vector bool char __b) {
   return __builtin_altivec_vmaxsb(__a, (vector signed char)__b);
 }
 
-static vector unsigned char __ATTRS_o_ai vec_max(vector unsigned char __a,
-                                                 vector unsigned char __b) {
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_max(vector unsigned char __a, vector unsigned char __b) {
   return __builtin_altivec_vmaxub(__a, __b);
 }
 
-static vector unsigned char __ATTRS_o_ai vec_max(vector bool char __a,
-                                                 vector unsigned char __b) {
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_max(vector bool char __a, vector unsigned char __b) {
   return __builtin_altivec_vmaxub((vector unsigned char)__a, __b);
 }
 
-static vector unsigned char __ATTRS_o_ai vec_max(vector unsigned char __a,
-                                                 vector bool char __b) {
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_max(vector unsigned char __a, vector bool char __b) {
   return __builtin_altivec_vmaxub(__a, (vector unsigned char)__b);
 }
 
-static vector short __ATTRS_o_ai vec_max(vector short __a, vector short __b) {
+static __inline__ vector short __ATTRS_o_ai vec_max(vector short __a,
+                                                    vector short __b) {
   return __builtin_altivec_vmaxsh(__a, __b);
 }
 
-static vector short __ATTRS_o_ai vec_max(vector bool short __a,
-                                         vector short __b) {
+static __inline__ vector short __ATTRS_o_ai vec_max(vector bool short __a,
+                                                    vector short __b) {
   return __builtin_altivec_vmaxsh((vector short)__a, __b);
 }
 
-static vector short __ATTRS_o_ai vec_max(vector short __a,
-                                         vector bool short __b) {
+static __inline__ vector short __ATTRS_o_ai vec_max(vector short __a,
+                                                    vector bool short __b) {
   return __builtin_altivec_vmaxsh(__a, (vector short)__b);
 }
 
-static vector unsigned short __ATTRS_o_ai vec_max(vector unsigned short __a,
-                                                  vector unsigned short __b) {
+static __inline__ vector unsigned short __ATTRS_o_ai
+vec_max(vector unsigned short __a, vector unsigned short __b) {
   return __builtin_altivec_vmaxuh(__a, __b);
 }
 
-static vector unsigned short __ATTRS_o_ai vec_max(vector bool short __a,
-                                                  vector unsigned short __b) {
+static __inline__ vector unsigned short __ATTRS_o_ai
+vec_max(vector bool short __a, vector unsigned short __b) {
   return __builtin_altivec_vmaxuh((vector unsigned short)__a, __b);
 }
 
-static vector unsigned short __ATTRS_o_ai vec_max(vector unsigned short __a,
-                                                  vector bool short __b) {
+static __inline__ vector unsigned short __ATTRS_o_ai
+vec_max(vector unsigned short __a, vector bool short __b) {
   return __builtin_altivec_vmaxuh(__a, (vector unsigned short)__b);
 }
 
-static vector int __ATTRS_o_ai vec_max(vector int __a, vector int __b) {
+static __inline__ vector int __ATTRS_o_ai vec_max(vector int __a,
+                                                  vector int __b) {
   return __builtin_altivec_vmaxsw(__a, __b);
 }
 
-static vector int __ATTRS_o_ai vec_max(vector bool int __a, vector int __b) {
+static __inline__ vector int __ATTRS_o_ai vec_max(vector bool int __a,
+                                                  vector int __b) {
   return __builtin_altivec_vmaxsw((vector int)__a, __b);
 }
 
-static vector int __ATTRS_o_ai vec_max(vector int __a, vector bool int __b) {
+static __inline__ vector int __ATTRS_o_ai vec_max(vector int __a,
+                                                  vector bool int __b) {
   return __builtin_altivec_vmaxsw(__a, (vector int)__b);
 }
 
-static vector unsigned int __ATTRS_o_ai vec_max(vector unsigned int __a,
-                                                vector unsigned int __b) {
+static __inline__ vector unsigned int __ATTRS_o_ai
+vec_max(vector unsigned int __a, vector unsigned int __b) {
   return __builtin_altivec_vmaxuw(__a, __b);
 }
 
-static vector unsigned int __ATTRS_o_ai vec_max(vector bool int __a,
-                                                vector unsigned int __b) {
+static __inline__ vector unsigned int __ATTRS_o_ai
+vec_max(vector bool int __a, vector unsigned int __b) {
   return __builtin_altivec_vmaxuw((vector unsigned int)__a, __b);
 }
 
-static vector unsigned int __ATTRS_o_ai vec_max(vector unsigned int __a,
-                                                vector bool int __b) {
+static __inline__ vector unsigned int __ATTRS_o_ai
+vec_max(vector unsigned int __a, vector bool int __b) {
   return __builtin_altivec_vmaxuw(__a, (vector unsigned int)__b);
 }
 
 #ifdef __POWER8_VECTOR__
-static vector signed long long __ATTRS_o_ai
+static __inline__ vector signed long long __ATTRS_o_ai
 vec_max(vector signed long long __a, vector signed long long __b) {
   return __builtin_altivec_vmaxsd(__a, __b);
 }
 
-static vector signed long long __ATTRS_o_ai
+static __inline__ vector signed long long __ATTRS_o_ai
 vec_max(vector bool long long __a, vector signed long long __b) {
   return __builtin_altivec_vmaxsd((vector signed long long)__a, __b);
 }
 
-static vector signed long long __ATTRS_o_ai vec_max(vector signed long long __a,
-                                                    vector bool long long __b) {
+static __inline__ vector signed long long __ATTRS_o_ai
+vec_max(vector signed long long __a, vector bool long long __b) {
   return __builtin_altivec_vmaxsd(__a, (vector signed long long)__b);
 }
 
-static vector unsigned long long __ATTRS_o_ai
+static __inline__ vector unsigned long long __ATTRS_o_ai
 vec_max(vector unsigned long long __a, vector unsigned long long __b) {
   return __builtin_altivec_vmaxud(__a, __b);
 }
 
-static vector unsigned long long __ATTRS_o_ai
+static __inline__ vector unsigned long long __ATTRS_o_ai
 vec_max(vector bool long long __a, vector unsigned long long __b) {
   return __builtin_altivec_vmaxud((vector unsigned long long)__a, __b);
 }
 
-static vector unsigned long long __ATTRS_o_ai
+static __inline__ vector unsigned long long __ATTRS_o_ai
 vec_max(vector unsigned long long __a, vector bool long long __b) {
   return __builtin_altivec_vmaxud(__a, (vector unsigned long long)__b);
 }
 #endif
 
-static vector float __ATTRS_o_ai vec_max(vector float __a, vector float __b) {
+static __inline__ vector float __ATTRS_o_ai vec_max(vector float __a,
+                                                    vector float __b) {
 #ifdef __VSX__
   return __builtin_vsx_xvmaxsp(__a, __b);
 #else
@@ -3072,114 +3156,117 @@ static vector float __ATTRS_o_ai vec_max(vector float __a, vector float __b) {
 }
 
 #ifdef __VSX__
-static vector double __ATTRS_o_ai vec_max(vector double __a,
-                                          vector double __b) {
+static __inline__ vector double __ATTRS_o_ai vec_max(vector double __a,
+                                                     vector double __b) {
   return __builtin_vsx_xvmaxdp(__a, __b);
 }
 #endif
 
 /* vec_vmaxsb */
 
-static vector signed char __ATTRS_o_ai vec_vmaxsb(vector signed char __a,
-                                                  vector signed char __b) {
+static __inline__ vector signed char __ATTRS_o_ai
+vec_vmaxsb(vector signed char __a, vector signed char __b) {
   return __builtin_altivec_vmaxsb(__a, __b);
 }
 
-static vector signed char __ATTRS_o_ai vec_vmaxsb(vector bool char __a,
-                                                  vector signed char __b) {
+static __inline__ vector signed char __ATTRS_o_ai
+vec_vmaxsb(vector bool char __a, vector signed char __b) {
   return __builtin_altivec_vmaxsb((vector signed char)__a, __b);
 }
 
-static vector signed char __ATTRS_o_ai vec_vmaxsb(vector signed char __a,
-                                                  vector bool char __b) {
+static __inline__ vector signed char __ATTRS_o_ai
+vec_vmaxsb(vector signed char __a, vector bool char __b) {
   return __builtin_altivec_vmaxsb(__a, (vector signed char)__b);
 }
 
 /* vec_vmaxub */
 
-static vector unsigned char __ATTRS_o_ai vec_vmaxub(vector unsigned char __a,
-                                                    vector unsigned char __b) {
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_vmaxub(vector unsigned char __a, vector unsigned char __b) {
   return __builtin_altivec_vmaxub(__a, __b);
 }
 
-static vector unsigned char __ATTRS_o_ai vec_vmaxub(vector bool char __a,
-                                                    vector unsigned char __b) {
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_vmaxub(vector bool char __a, vector unsigned char __b) {
   return __builtin_altivec_vmaxub((vector unsigned char)__a, __b);
 }
 
-static vector unsigned char __ATTRS_o_ai vec_vmaxub(vector unsigned char __a,
-                                                    vector bool char __b) {
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_vmaxub(vector unsigned char __a, vector bool char __b) {
   return __builtin_altivec_vmaxub(__a, (vector unsigned char)__b);
 }
 
 /* vec_vmaxsh */
 
-static vector short __ATTRS_o_ai vec_vmaxsh(vector short __a,
-                                            vector short __b) {
+static __inline__ vector short __ATTRS_o_ai vec_vmaxsh(vector short __a,
+                                                       vector short __b) {
   return __builtin_altivec_vmaxsh(__a, __b);
 }
 
-static vector short __ATTRS_o_ai vec_vmaxsh(vector bool short __a,
-                                            vector short __b) {
+static __inline__ vector short __ATTRS_o_ai vec_vmaxsh(vector bool short __a,
+                                                       vector short __b) {
   return __builtin_altivec_vmaxsh((vector short)__a, __b);
 }
 
-static vector short __ATTRS_o_ai vec_vmaxsh(vector short __a,
-                                            vector bool short __b) {
+static __inline__ vector short __ATTRS_o_ai vec_vmaxsh(vector short __a,
+                                                       vector bool short __b) {
   return __builtin_altivec_vmaxsh(__a, (vector short)__b);
 }
 
 /* vec_vmaxuh */
 
-static vector unsigned short __ATTRS_o_ai
+static __inline__ vector unsigned short __ATTRS_o_ai
 vec_vmaxuh(vector unsigned short __a, vector unsigned short __b) {
   return __builtin_altivec_vmaxuh(__a, __b);
 }
 
-static vector unsigned short __ATTRS_o_ai
+static __inline__ vector unsigned short __ATTRS_o_ai
 vec_vmaxuh(vector bool short __a, vector unsigned short __b) {
   return __builtin_altivec_vmaxuh((vector unsigned short)__a, __b);
 }
 
-static vector unsigned short __ATTRS_o_ai vec_vmaxuh(vector unsigned short __a,
-                                                     vector bool short __b) {
+static __inline__ vector unsigned short __ATTRS_o_ai
+vec_vmaxuh(vector unsigned short __a, vector bool short __b) {
   return __builtin_altivec_vmaxuh(__a, (vector unsigned short)__b);
 }
 
 /* vec_vmaxsw */
 
-static vector int __ATTRS_o_ai vec_vmaxsw(vector int __a, vector int __b) {
+static __inline__ vector int __ATTRS_o_ai vec_vmaxsw(vector int __a,
+                                                     vector int __b) {
   return __builtin_altivec_vmaxsw(__a, __b);
 }
 
-static vector int __ATTRS_o_ai vec_vmaxsw(vector bool int __a, vector int __b) {
+static __inline__ vector int __ATTRS_o_ai vec_vmaxsw(vector bool int __a,
+                                                     vector int __b) {
   return __builtin_altivec_vmaxsw((vector int)__a, __b);
 }
 
-static vector int __ATTRS_o_ai vec_vmaxsw(vector int __a, vector bool int __b) {
+static __inline__ vector int __ATTRS_o_ai vec_vmaxsw(vector int __a,
+                                                     vector bool int __b) {
   return __builtin_altivec_vmaxsw(__a, (vector int)__b);
 }
 
 /* vec_vmaxuw */
 
-static vector unsigned int __ATTRS_o_ai vec_vmaxuw(vector unsigned int __a,
-                                                   vector unsigned int __b) {
+static __inline__ vector unsigned int __ATTRS_o_ai
+vec_vmaxuw(vector unsigned int __a, vector unsigned int __b) {
   return __builtin_altivec_vmaxuw(__a, __b);
 }
 
-static vector unsigned int __ATTRS_o_ai vec_vmaxuw(vector bool int __a,
-                                                   vector unsigned int __b) {
+static __inline__ vector unsigned int __ATTRS_o_ai
+vec_vmaxuw(vector bool int __a, vector unsigned int __b) {
   return __builtin_altivec_vmaxuw((vector unsigned int)__a, __b);
 }
 
-static vector unsigned int __ATTRS_o_ai vec_vmaxuw(vector unsigned int __a,
-                                                   vector bool int __b) {
+static __inline__ vector unsigned int __ATTRS_o_ai
+vec_vmaxuw(vector unsigned int __a, vector bool int __b) {
   return __builtin_altivec_vmaxuw(__a, (vector unsigned int)__b);
 }
 
 /* vec_vmaxfp */
 
-static vector float __attribute__((__always_inline__))
+static __inline__ vector float __attribute__((__always_inline__))
 vec_vmaxfp(vector float __a, vector float __b) {
 #ifdef __VSX__
   return __builtin_vsx_xvmaxsp(__a, __b);
@@ -3190,39 +3277,39 @@ vec_vmaxfp(vector float __a, vector float __b) {
 
 /* vec_mergeh */
 
-static vector signed char __ATTRS_o_ai vec_mergeh(vector signed char __a,
-                                                  vector signed char __b) {
+static __inline__ vector signed char __ATTRS_o_ai
+vec_mergeh(vector signed char __a, vector signed char __b) {
   return vec_perm(__a, __b,
                   (vector unsigned char)(0x00, 0x10, 0x01, 0x11, 0x02, 0x12,
                                          0x03, 0x13, 0x04, 0x14, 0x05, 0x15,
                                          0x06, 0x16, 0x07, 0x17));
 }
 
-static vector unsigned char __ATTRS_o_ai vec_mergeh(vector unsigned char __a,
-                                                    vector unsigned char __b) {
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_mergeh(vector unsigned char __a, vector unsigned char __b) {
   return vec_perm(__a, __b,
                   (vector unsigned char)(0x00, 0x10, 0x01, 0x11, 0x02, 0x12,
                                          0x03, 0x13, 0x04, 0x14, 0x05, 0x15,
                                          0x06, 0x16, 0x07, 0x17));
 }
 
-static vector bool char __ATTRS_o_ai vec_mergeh(vector bool char __a,
-                                                vector bool char __b) {
+static __inline__ vector bool char __ATTRS_o_ai
+vec_mergeh(vector bool char __a, vector bool char __b) {
   return vec_perm(__a, __b,
                   (vector unsigned char)(0x00, 0x10, 0x01, 0x11, 0x02, 0x12,
                                          0x03, 0x13, 0x04, 0x14, 0x05, 0x15,
                                          0x06, 0x16, 0x07, 0x17));
 }
 
-static vector short __ATTRS_o_ai vec_mergeh(vector short __a,
-                                            vector short __b) {
+static __inline__ vector short __ATTRS_o_ai vec_mergeh(vector short __a,
+                                                       vector short __b) {
   return vec_perm(__a, __b,
                   (vector unsigned char)(0x00, 0x01, 0x10, 0x11, 0x02, 0x03,
                                          0x12, 0x13, 0x04, 0x05, 0x14, 0x15,
                                          0x06, 0x07, 0x16, 0x17));
 }
 
-static vector unsigned short __ATTRS_o_ai
+static __inline__ vector unsigned short __ATTRS_o_ai
 vec_mergeh(vector unsigned short __a, vector unsigned short __b) {
   return vec_perm(__a, __b,
                   (vector unsigned char)(0x00, 0x01, 0x10, 0x11, 0x02, 0x03,
@@ -3230,47 +3317,48 @@ vec_mergeh(vector unsigned short __a, vector unsigned short __b) {
                                          0x06, 0x07, 0x16, 0x17));
 }
 
-static vector bool short __ATTRS_o_ai vec_mergeh(vector bool short __a,
-                                                 vector bool short __b) {
+static __inline__ vector bool short __ATTRS_o_ai
+vec_mergeh(vector bool short __a, vector bool short __b) {
   return vec_perm(__a, __b,
                   (vector unsigned char)(0x00, 0x01, 0x10, 0x11, 0x02, 0x03,
                                          0x12, 0x13, 0x04, 0x05, 0x14, 0x15,
                                          0x06, 0x07, 0x16, 0x17));
 }
 
-static vector pixel __ATTRS_o_ai vec_mergeh(vector pixel __a,
-                                            vector pixel __b) {
+static __inline__ vector pixel __ATTRS_o_ai vec_mergeh(vector pixel __a,
+                                                       vector pixel __b) {
   return vec_perm(__a, __b,
                   (vector unsigned char)(0x00, 0x01, 0x10, 0x11, 0x02, 0x03,
                                          0x12, 0x13, 0x04, 0x05, 0x14, 0x15,
                                          0x06, 0x07, 0x16, 0x17));
 }
 
-static vector int __ATTRS_o_ai vec_mergeh(vector int __a, vector int __b) {
+static __inline__ vector int __ATTRS_o_ai vec_mergeh(vector int __a,
+                                                     vector int __b) {
   return vec_perm(__a, __b,
                   (vector unsigned char)(0x00, 0x01, 0x02, 0x03, 0x10, 0x11,
                                          0x12, 0x13, 0x04, 0x05, 0x06, 0x07,
                                          0x14, 0x15, 0x16, 0x17));
 }
 
-static vector unsigned int __ATTRS_o_ai vec_mergeh(vector unsigned int __a,
-                                                   vector unsigned int __b) {
+static __inline__ vector unsigned int __ATTRS_o_ai
+vec_mergeh(vector unsigned int __a, vector unsigned int __b) {
   return vec_perm(__a, __b,
                   (vector unsigned char)(0x00, 0x01, 0x02, 0x03, 0x10, 0x11,
                                          0x12, 0x13, 0x04, 0x05, 0x06, 0x07,
                                          0x14, 0x15, 0x16, 0x17));
 }
 
-static vector bool int __ATTRS_o_ai vec_mergeh(vector bool int __a,
-                                               vector bool int __b) {
+static __inline__ vector bool int __ATTRS_o_ai vec_mergeh(vector bool int __a,
+                                                          vector bool int __b) {
   return vec_perm(__a, __b,
                   (vector unsigned char)(0x00, 0x01, 0x02, 0x03, 0x10, 0x11,
                                          0x12, 0x13, 0x04, 0x05, 0x06, 0x07,
                                          0x14, 0x15, 0x16, 0x17));
 }
 
-static vector float __ATTRS_o_ai vec_mergeh(vector float __a,
-                                            vector float __b) {
+static __inline__ vector float __ATTRS_o_ai vec_mergeh(vector float __a,
+                                                       vector float __b) {
   return vec_perm(__a, __b,
                   (vector unsigned char)(0x00, 0x01, 0x02, 0x03, 0x10, 0x11,
                                          0x12, 0x13, 0x04, 0x05, 0x06, 0x07,
@@ -3278,91 +3366,81 @@ static vector float __ATTRS_o_ai vec_mergeh(vector float __a,
 }
 
 #ifdef __VSX__
-static vector signed long long __ATTRS_o_ai
+static __inline__ vector signed long long __ATTRS_o_ai
 vec_mergeh(vector signed long long __a, vector signed long long __b) {
   return vec_perm(__a, __b,
-                  (vector unsigned char)(0x00, 0x01, 0x02, 0x03,
-                                         0x04, 0x05, 0x06, 0x07,
-                                         0x10, 0x11, 0x12, 0x13,
+                  (vector unsigned char)(0x00, 0x01, 0x02, 0x03, 0x04, 0x05,
+                                         0x06, 0x07, 0x10, 0x11, 0x12, 0x13,
                                          0x14, 0x15, 0x16, 0x17));
 }
 
-static vector signed long long __ATTRS_o_ai
+static __inline__ vector signed long long __ATTRS_o_ai
 vec_mergeh(vector signed long long __a, vector bool long long __b) {
   return vec_perm(__a, (vector signed long long)__b,
-                  (vector unsigned char)(0x00, 0x01, 0x02, 0x03,
-                                         0x04, 0x05, 0x06, 0x07,
-                                         0x10, 0x11, 0x12, 0x13,
+                  (vector unsigned char)(0x00, 0x01, 0x02, 0x03, 0x04, 0x05,
+                                         0x06, 0x07, 0x10, 0x11, 0x12, 0x13,
                                          0x14, 0x15, 0x16, 0x17));
 }
 
-static vector signed long long __ATTRS_o_ai
+static __inline__ vector signed long long __ATTRS_o_ai
 vec_mergeh(vector bool long long __a, vector signed long long __b) {
   return vec_perm((vector signed long long)__a, __b,
-                  (vector unsigned char)(0x00, 0x01, 0x02, 0x03,
-                                         0x04, 0x05, 0x06, 0x07,
-                                         0x10, 0x11, 0x12, 0x13,
+                  (vector unsigned char)(0x00, 0x01, 0x02, 0x03, 0x04, 0x05,
+                                         0x06, 0x07, 0x10, 0x11, 0x12, 0x13,
                                          0x14, 0x15, 0x16, 0x17));
 }
 
-static vector unsigned long long __ATTRS_o_ai
+static __inline__ vector unsigned long long __ATTRS_o_ai
 vec_mergeh(vector unsigned long long __a, vector unsigned long long __b) {
   return vec_perm(__a, __b,
-                  (vector unsigned char)(0x00, 0x01, 0x02, 0x03,
-                                         0x04, 0x05, 0x06, 0x07,
-                                         0x10, 0x11, 0x12, 0x13,
+                  (vector unsigned char)(0x00, 0x01, 0x02, 0x03, 0x04, 0x05,
+                                         0x06, 0x07, 0x10, 0x11, 0x12, 0x13,
                                          0x14, 0x15, 0x16, 0x17));
 }
 
-static vector unsigned long long __ATTRS_o_ai
+static __inline__ vector unsigned long long __ATTRS_o_ai
 vec_mergeh(vector unsigned long long __a, vector bool long long __b) {
   return vec_perm(__a, (vector unsigned long long)__b,
-                  (vector unsigned char)(0x00, 0x01, 0x02, 0x03,
-                                         0x04, 0x05, 0x06, 0x07,
-                                         0x10, 0x11, 0x12, 0x13,
+                  (vector unsigned char)(0x00, 0x01, 0x02, 0x03, 0x04, 0x05,
+                                         0x06, 0x07, 0x10, 0x11, 0x12, 0x13,
                                          0x14, 0x15, 0x16, 0x17));
 }
 
-static vector unsigned long long __ATTRS_o_ai
+static __inline__ vector unsigned long long __ATTRS_o_ai
 vec_mergeh(vector bool long long __a, vector unsigned long long __b) {
   return vec_perm((vector unsigned long long)__a, __b,
-                  (vector unsigned char)(0x00, 0x01, 0x02, 0x03,
-                                         0x04, 0x05, 0x06, 0x07,
-                                         0x10, 0x11, 0x12, 0x13,
+                  (vector unsigned char)(0x00, 0x01, 0x02, 0x03, 0x04, 0x05,
+                                         0x06, 0x07, 0x10, 0x11, 0x12, 0x13,
                                          0x14, 0x15, 0x16, 0x17));
 }
 
-static vector bool long long __ATTRS_o_ai
+static __inline__ vector bool long long __ATTRS_o_ai
 vec_mergeh(vector bool long long __a, vector bool long long __b) {
   return vec_perm(__a, __b,
-                 (vector unsigned char)(0x00, 0x01, 0x02, 0x03,
-                                        0x04, 0x05, 0x06, 0x07,
-                                        0x10, 0x11, 0x12, 0x13,
-                                        0x14, 0x15, 0x16, 0x17));
+                  (vector unsigned char)(0x00, 0x01, 0x02, 0x03, 0x04, 0x05,
+                                         0x06, 0x07, 0x10, 0x11, 0x12, 0x13,
+                                         0x14, 0x15, 0x16, 0x17));
 }
 
-static vector double __ATTRS_o_ai vec_mergeh(vector double __a,
-                                             vector double __b) {
+static __inline__ vector double __ATTRS_o_ai vec_mergeh(vector double __a,
+                                                        vector double __b) {
   return vec_perm(__a, __b,
-                  (vector unsigned char)(0x00, 0x01, 0x02, 0x03,
-                                         0x04, 0x05, 0x06, 0x07,
-                                         0x10, 0x11, 0x12, 0x13,
+                  (vector unsigned char)(0x00, 0x01, 0x02, 0x03, 0x04, 0x05,
+                                         0x06, 0x07, 0x10, 0x11, 0x12, 0x13,
                                          0x14, 0x15, 0x16, 0x17));
 }
-static vector double __ATTRS_o_ai vec_mergeh(vector double __a,
-                                             vector bool long long __b) {
+static __inline__ vector double __ATTRS_o_ai
+vec_mergeh(vector double __a, vector bool long long __b) {
   return vec_perm(__a, (vector double)__b,
-                  (vector unsigned char)(0x00, 0x01, 0x02, 0x03,
-                                         0x04, 0x05, 0x06, 0x07,
-                                         0x10, 0x11, 0x12, 0x13,
+                  (vector unsigned char)(0x00, 0x01, 0x02, 0x03, 0x04, 0x05,
+                                         0x06, 0x07, 0x10, 0x11, 0x12, 0x13,
                                          0x14, 0x15, 0x16, 0x17));
 }
-static vector double __ATTRS_o_ai vec_mergeh(vector bool long long __a,
-                                             vector double __b) {
+static __inline__ vector double __ATTRS_o_ai
+vec_mergeh(vector bool long long __a, vector double __b) {
   return vec_perm((vector double)__a, __b,
-                  (vector unsigned char)(0x00, 0x01, 0x02, 0x03,
-                                         0x04, 0x05, 0x06, 0x07,
-                                         0x10, 0x11, 0x12, 0x13,
+                  (vector unsigned char)(0x00, 0x01, 0x02, 0x03, 0x04, 0x05,
+                                         0x06, 0x07, 0x10, 0x11, 0x12, 0x13,
                                          0x14, 0x15, 0x16, 0x17));
 }
 #endif
@@ -3371,24 +3449,24 @@ static vector double __ATTRS_o_ai vec_mergeh(vector bool long long __a,
 
 #define __builtin_altivec_vmrghb vec_vmrghb
 
-static vector signed char __ATTRS_o_ai vec_vmrghb(vector signed char __a,
-                                                  vector signed char __b) {
+static __inline__ vector signed char __ATTRS_o_ai
+vec_vmrghb(vector signed char __a, vector signed char __b) {
   return vec_perm(__a, __b,
                   (vector unsigned char)(0x00, 0x10, 0x01, 0x11, 0x02, 0x12,
                                          0x03, 0x13, 0x04, 0x14, 0x05, 0x15,
                                          0x06, 0x16, 0x07, 0x17));
 }
 
-static vector unsigned char __ATTRS_o_ai vec_vmrghb(vector unsigned char __a,
-                                                    vector unsigned char __b) {
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_vmrghb(vector unsigned char __a, vector unsigned char __b) {
   return vec_perm(__a, __b,
                   (vector unsigned char)(0x00, 0x10, 0x01, 0x11, 0x02, 0x12,
                                          0x03, 0x13, 0x04, 0x14, 0x05, 0x15,
                                          0x06, 0x16, 0x07, 0x17));
 }
 
-static vector bool char __ATTRS_o_ai vec_vmrghb(vector bool char __a,
-                                                vector bool char __b) {
+static __inline__ vector bool char __ATTRS_o_ai
+vec_vmrghb(vector bool char __a, vector bool char __b) {
   return vec_perm(__a, __b,
                   (vector unsigned char)(0x00, 0x10, 0x01, 0x11, 0x02, 0x12,
                                          0x03, 0x13, 0x04, 0x14, 0x05, 0x15,
@@ -3399,15 +3477,15 @@ static vector bool char __ATTRS_o_ai vec_vmrghb(vector bool char __a,
 
 #define __builtin_altivec_vmrghh vec_vmrghh
 
-static vector short __ATTRS_o_ai vec_vmrghh(vector short __a,
-                                            vector short __b) {
+static __inline__ vector short __ATTRS_o_ai vec_vmrghh(vector short __a,
+                                                       vector short __b) {
   return vec_perm(__a, __b,
                   (vector unsigned char)(0x00, 0x01, 0x10, 0x11, 0x02, 0x03,
                                          0x12, 0x13, 0x04, 0x05, 0x14, 0x15,
                                          0x06, 0x07, 0x16, 0x17));
 }
 
-static vector unsigned short __ATTRS_o_ai
+static __inline__ vector unsigned short __ATTRS_o_ai
 vec_vmrghh(vector unsigned short __a, vector unsigned short __b) {
   return vec_perm(__a, __b,
                   (vector unsigned char)(0x00, 0x01, 0x10, 0x11, 0x02, 0x03,
@@ -3415,16 +3493,16 @@ vec_vmrghh(vector unsigned short __a, vector unsigned short __b) {
                                          0x06, 0x07, 0x16, 0x17));
 }
 
-static vector bool short __ATTRS_o_ai vec_vmrghh(vector bool short __a,
-                                                 vector bool short __b) {
+static __inline__ vector bool short __ATTRS_o_ai
+vec_vmrghh(vector bool short __a, vector bool short __b) {
   return vec_perm(__a, __b,
                   (vector unsigned char)(0x00, 0x01, 0x10, 0x11, 0x02, 0x03,
                                          0x12, 0x13, 0x04, 0x05, 0x14, 0x15,
                                          0x06, 0x07, 0x16, 0x17));
 }
 
-static vector pixel __ATTRS_o_ai vec_vmrghh(vector pixel __a,
-                                            vector pixel __b) {
+static __inline__ vector pixel __ATTRS_o_ai vec_vmrghh(vector pixel __a,
+                                                       vector pixel __b) {
   return vec_perm(__a, __b,
                   (vector unsigned char)(0x00, 0x01, 0x10, 0x11, 0x02, 0x03,
                                          0x12, 0x13, 0x04, 0x05, 0x14, 0x15,
@@ -3435,31 +3513,32 @@ static vector pixel __ATTRS_o_ai vec_vmrghh(vector pixel __a,
 
 #define __builtin_altivec_vmrghw vec_vmrghw
 
-static vector int __ATTRS_o_ai vec_vmrghw(vector int __a, vector int __b) {
+static __inline__ vector int __ATTRS_o_ai vec_vmrghw(vector int __a,
+                                                     vector int __b) {
   return vec_perm(__a, __b,
                   (vector unsigned char)(0x00, 0x01, 0x02, 0x03, 0x10, 0x11,
                                          0x12, 0x13, 0x04, 0x05, 0x06, 0x07,
                                          0x14, 0x15, 0x16, 0x17));
 }
 
-static vector unsigned int __ATTRS_o_ai vec_vmrghw(vector unsigned int __a,
-                                                   vector unsigned int __b) {
+static __inline__ vector unsigned int __ATTRS_o_ai
+vec_vmrghw(vector unsigned int __a, vector unsigned int __b) {
   return vec_perm(__a, __b,
                   (vector unsigned char)(0x00, 0x01, 0x02, 0x03, 0x10, 0x11,
                                          0x12, 0x13, 0x04, 0x05, 0x06, 0x07,
                                          0x14, 0x15, 0x16, 0x17));
 }
 
-static vector bool int __ATTRS_o_ai vec_vmrghw(vector bool int __a,
-                                               vector bool int __b) {
+static __inline__ vector bool int __ATTRS_o_ai vec_vmrghw(vector bool int __a,
+                                                          vector bool int __b) {
   return vec_perm(__a, __b,
                   (vector unsigned char)(0x00, 0x01, 0x02, 0x03, 0x10, 0x11,
                                          0x12, 0x13, 0x04, 0x05, 0x06, 0x07,
                                          0x14, 0x15, 0x16, 0x17));
 }
 
-static vector float __ATTRS_o_ai vec_vmrghw(vector float __a,
-                                            vector float __b) {
+static __inline__ vector float __ATTRS_o_ai vec_vmrghw(vector float __a,
+                                                       vector float __b) {
   return vec_perm(__a, __b,
                   (vector unsigned char)(0x00, 0x01, 0x02, 0x03, 0x10, 0x11,
                                          0x12, 0x13, 0x04, 0x05, 0x06, 0x07,
@@ -3468,39 +3547,39 @@ static vector float __ATTRS_o_ai vec_vmrghw(vector float __a,
 
 /* vec_mergel */
 
-static vector signed char __ATTRS_o_ai vec_mergel(vector signed char __a,
-                                                  vector signed char __b) {
+static __inline__ vector signed char __ATTRS_o_ai
+vec_mergel(vector signed char __a, vector signed char __b) {
   return vec_perm(__a, __b,
                   (vector unsigned char)(0x08, 0x18, 0x09, 0x19, 0x0A, 0x1A,
                                          0x0B, 0x1B, 0x0C, 0x1C, 0x0D, 0x1D,
                                          0x0E, 0x1E, 0x0F, 0x1F));
 }
 
-static vector unsigned char __ATTRS_o_ai vec_mergel(vector unsigned char __a,
-                                                    vector unsigned char __b) {
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_mergel(vector unsigned char __a, vector unsigned char __b) {
   return vec_perm(__a, __b,
                   (vector unsigned char)(0x08, 0x18, 0x09, 0x19, 0x0A, 0x1A,
                                          0x0B, 0x1B, 0x0C, 0x1C, 0x0D, 0x1D,
                                          0x0E, 0x1E, 0x0F, 0x1F));
 }
 
-static vector bool char __ATTRS_o_ai vec_mergel(vector bool char __a,
-                                                vector bool char __b) {
+static __inline__ vector bool char __ATTRS_o_ai
+vec_mergel(vector bool char __a, vector bool char __b) {
   return vec_perm(__a, __b,
                   (vector unsigned char)(0x08, 0x18, 0x09, 0x19, 0x0A, 0x1A,
                                          0x0B, 0x1B, 0x0C, 0x1C, 0x0D, 0x1D,
                                          0x0E, 0x1E, 0x0F, 0x1F));
 }
 
-static vector short __ATTRS_o_ai vec_mergel(vector short __a,
-                                            vector short __b) {
+static __inline__ vector short __ATTRS_o_ai vec_mergel(vector short __a,
+                                                       vector short __b) {
   return vec_perm(__a, __b,
                   (vector unsigned char)(0x08, 0x09, 0x18, 0x19, 0x0A, 0x0B,
                                          0x1A, 0x1B, 0x0C, 0x0D, 0x1C, 0x1D,
                                          0x0E, 0x0F, 0x1E, 0x1F));
 }
 
-static vector unsigned short __ATTRS_o_ai
+static __inline__ vector unsigned short __ATTRS_o_ai
 vec_mergel(vector unsigned short __a, vector unsigned short __b) {
   return vec_perm(__a, __b,
                   (vector unsigned char)(0x08, 0x09, 0x18, 0x19, 0x0A, 0x0B,
@@ -3508,47 +3587,48 @@ vec_mergel(vector unsigned short __a, vector unsigned short __b) {
                                          0x0E, 0x0F, 0x1E, 0x1F));
 }
 
-static vector bool short __ATTRS_o_ai vec_mergel(vector bool short __a,
-                                                 vector bool short __b) {
+static __inline__ vector bool short __ATTRS_o_ai
+vec_mergel(vector bool short __a, vector bool short __b) {
   return vec_perm(__a, __b,
                   (vector unsigned char)(0x08, 0x09, 0x18, 0x19, 0x0A, 0x0B,
                                          0x1A, 0x1B, 0x0C, 0x0D, 0x1C, 0x1D,
                                          0x0E, 0x0F, 0x1E, 0x1F));
 }
 
-static vector pixel __ATTRS_o_ai vec_mergel(vector pixel __a,
-                                            vector pixel __b) {
+static __inline__ vector pixel __ATTRS_o_ai vec_mergel(vector pixel __a,
+                                                       vector pixel __b) {
   return vec_perm(__a, __b,
                   (vector unsigned char)(0x08, 0x09, 0x18, 0x19, 0x0A, 0x0B,
                                          0x1A, 0x1B, 0x0C, 0x0D, 0x1C, 0x1D,
                                          0x0E, 0x0F, 0x1E, 0x1F));
 }
 
-static vector int __ATTRS_o_ai vec_mergel(vector int __a, vector int __b) {
+static __inline__ vector int __ATTRS_o_ai vec_mergel(vector int __a,
+                                                     vector int __b) {
   return vec_perm(__a, __b,
                   (vector unsigned char)(0x08, 0x09, 0x0A, 0x0B, 0x18, 0x19,
                                          0x1A, 0x1B, 0x0C, 0x0D, 0x0E, 0x0F,
                                          0x1C, 0x1D, 0x1E, 0x1F));
 }
 
-static vector unsigned int __ATTRS_o_ai vec_mergel(vector unsigned int __a,
-                                                   vector unsigned int __b) {
+static __inline__ vector unsigned int __ATTRS_o_ai
+vec_mergel(vector unsigned int __a, vector unsigned int __b) {
   return vec_perm(__a, __b,
                   (vector unsigned char)(0x08, 0x09, 0x0A, 0x0B, 0x18, 0x19,
                                          0x1A, 0x1B, 0x0C, 0x0D, 0x0E, 0x0F,
                                          0x1C, 0x1D, 0x1E, 0x1F));
 }
 
-static vector bool int __ATTRS_o_ai vec_mergel(vector bool int __a,
-                                               vector bool int __b) {
+static __inline__ vector bool int __ATTRS_o_ai vec_mergel(vector bool int __a,
+                                                          vector bool int __b) {
   return vec_perm(__a, __b,
                   (vector unsigned char)(0x08, 0x09, 0x0A, 0x0B, 0x18, 0x19,
                                          0x1A, 0x1B, 0x0C, 0x0D, 0x0E, 0x0F,
                                          0x1C, 0x1D, 0x1E, 0x1F));
 }
 
-static vector float __ATTRS_o_ai vec_mergel(vector float __a,
-                                            vector float __b) {
+static __inline__ vector float __ATTRS_o_ai vec_mergel(vector float __a,
+                                                       vector float __b) {
   return vec_perm(__a, __b,
                   (vector unsigned char)(0x08, 0x09, 0x0A, 0x0B, 0x18, 0x19,
                                          0x1A, 0x1B, 0x0C, 0x0D, 0x0E, 0x0F,
@@ -3556,84 +3636,74 @@ static vector float __ATTRS_o_ai vec_mergel(vector float __a,
 }
 
 #ifdef __VSX__
-static vector signed long long __ATTRS_o_ai
+static __inline__ vector signed long long __ATTRS_o_ai
 vec_mergel(vector signed long long __a, vector signed long long __b) {
   return vec_perm(__a, __b,
-                  (vector unsigned char)(0x08, 0x09, 0x0A, 0x0B,
-                                         0x0C, 0x0D, 0x0E, 0x0F,
-                                         0x18, 0X19, 0x1A, 0x1B,
+                  (vector unsigned char)(0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D,
+                                         0x0E, 0x0F, 0x18, 0X19, 0x1A, 0x1B,
                                          0x1C, 0x1D, 0x1E, 0x1F));
 }
-static vector signed long long __ATTRS_o_ai
+static __inline__ vector signed long long __ATTRS_o_ai
 vec_mergel(vector signed long long __a, vector bool long long __b) {
   return vec_perm(__a, (vector signed long long)__b,
-                  (vector unsigned char)(0x08, 0x09, 0x0A, 0x0B,
-                                         0x0C, 0x0D, 0x0E, 0x0F,
-                                         0x18, 0X19, 0x1A, 0x1B,
+                  (vector unsigned char)(0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D,
+                                         0x0E, 0x0F, 0x18, 0X19, 0x1A, 0x1B,
                                          0x1C, 0x1D, 0x1E, 0x1F));
 }
-static vector signed long long __ATTRS_o_ai
+static __inline__ vector signed long long __ATTRS_o_ai
 vec_mergel(vector bool long long __a, vector signed long long __b) {
   return vec_perm((vector signed long long)__a, __b,
-                  (vector unsigned char)(0x08, 0x09, 0x0A, 0x0B,
-                                         0x0C, 0x0D, 0x0E, 0x0F,
-                                         0x18, 0X19, 0x1A, 0x1B,
+                  (vector unsigned char)(0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D,
+                                         0x0E, 0x0F, 0x18, 0X19, 0x1A, 0x1B,
                                          0x1C, 0x1D, 0x1E, 0x1F));
 }
-static vector unsigned long long __ATTRS_o_ai
+static __inline__ vector unsigned long long __ATTRS_o_ai
 vec_mergel(vector unsigned long long __a, vector unsigned long long __b) {
   return vec_perm(__a, __b,
-                  (vector unsigned char)(0x08, 0x09, 0x0A, 0x0B,
-                                         0x0C, 0x0D, 0x0E, 0x0F,
-                                         0x18, 0X19, 0x1A, 0x1B,
+                  (vector unsigned char)(0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D,
+                                         0x0E, 0x0F, 0x18, 0X19, 0x1A, 0x1B,
                                          0x1C, 0x1D, 0x1E, 0x1F));
 }
-static vector unsigned long long __ATTRS_o_ai
+static __inline__ vector unsigned long long __ATTRS_o_ai
 vec_mergel(vector unsigned long long __a, vector bool long long __b) {
   return vec_perm(__a, (vector unsigned long long)__b,
-                  (vector unsigned char)(0x08, 0x09, 0x0A, 0x0B,
-                                         0x0C, 0x0D, 0x0E, 0x0F,
-                                         0x18, 0X19, 0x1A, 0x1B,
+                  (vector unsigned char)(0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D,
+                                         0x0E, 0x0F, 0x18, 0X19, 0x1A, 0x1B,
                                          0x1C, 0x1D, 0x1E, 0x1F));
 }
-static vector unsigned long long __ATTRS_o_ai
+static __inline__ vector unsigned long long __ATTRS_o_ai
 vec_mergel(vector bool long long __a, vector unsigned long long __b) {
   return vec_perm((vector unsigned long long)__a, __b,
-                  (vector unsigned char)(0x08, 0x09, 0x0A, 0x0B,
-                                         0x0C, 0x0D, 0x0E, 0x0F,
-                                         0x18, 0X19, 0x1A, 0x1B,
+                  (vector unsigned char)(0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D,
+                                         0x0E, 0x0F, 0x18, 0X19, 0x1A, 0x1B,
                                          0x1C, 0x1D, 0x1E, 0x1F));
 }
-static vector bool long long __ATTRS_o_ai
+static __inline__ vector bool long long __ATTRS_o_ai
 vec_mergel(vector bool long long __a, vector bool long long __b) {
   return vec_perm(__a, __b,
-                  (vector unsigned char)(0x08, 0x09, 0x0A, 0x0B,
-                                         0x0C, 0x0D, 0x0E, 0x0F,
-                                         0x18, 0X19, 0x1A, 0x1B,
+                  (vector unsigned char)(0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D,
+                                         0x0E, 0x0F, 0x18, 0X19, 0x1A, 0x1B,
                                          0x1C, 0x1D, 0x1E, 0x1F));
 }
-static vector double __ATTRS_o_ai
-vec_mergel(vector double __a, vector double __b) {
+static __inline__ vector double __ATTRS_o_ai vec_mergel(vector double __a,
+                                                        vector double __b) {
   return vec_perm(__a, __b,
-                  (vector unsigned char)(0x08, 0x09, 0x0A, 0x0B,
-                                         0x0C, 0x0D, 0x0E, 0x0F,
-                                         0x18, 0X19, 0x1A, 0x1B,
+                  (vector unsigned char)(0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D,
+                                         0x0E, 0x0F, 0x18, 0X19, 0x1A, 0x1B,
                                          0x1C, 0x1D, 0x1E, 0x1F));
 }
-static vector double __ATTRS_o_ai
+static __inline__ vector double __ATTRS_o_ai
 vec_mergel(vector double __a, vector bool long long __b) {
   return vec_perm(__a, (vector double)__b,
-                  (vector unsigned char)(0x08, 0x09, 0x0A, 0x0B,
-                                         0x0C, 0x0D, 0x0E, 0x0F,
-                                         0x18, 0X19, 0x1A, 0x1B,
+                  (vector unsigned char)(0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D,
+                                         0x0E, 0x0F, 0x18, 0X19, 0x1A, 0x1B,
                                          0x1C, 0x1D, 0x1E, 0x1F));
 }
-static vector double __ATTRS_o_ai
+static __inline__ vector double __ATTRS_o_ai
 vec_mergel(vector bool long long __a, vector double __b) {
   return vec_perm((vector double)__a, __b,
-                  (vector unsigned char)(0x08, 0x09, 0x0A, 0x0B,
-                                         0x0C, 0x0D, 0x0E, 0x0F,
-                                         0x18, 0X19, 0x1A, 0x1B,
+                  (vector unsigned char)(0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D,
+                                         0x0E, 0x0F, 0x18, 0X19, 0x1A, 0x1B,
                                          0x1C, 0x1D, 0x1E, 0x1F));
 }
 #endif
@@ -3642,24 +3712,24 @@ vec_mergel(vector bool long long __a, vector double __b) {
 
 #define __builtin_altivec_vmrglb vec_vmrglb
 
-static vector signed char __ATTRS_o_ai vec_vmrglb(vector signed char __a,
-                                                  vector signed char __b) {
+static __inline__ vector signed char __ATTRS_o_ai
+vec_vmrglb(vector signed char __a, vector signed char __b) {
   return vec_perm(__a, __b,
                   (vector unsigned char)(0x08, 0x18, 0x09, 0x19, 0x0A, 0x1A,
                                          0x0B, 0x1B, 0x0C, 0x1C, 0x0D, 0x1D,
                                          0x0E, 0x1E, 0x0F, 0x1F));
 }
 
-static vector unsigned char __ATTRS_o_ai vec_vmrglb(vector unsigned char __a,
-                                                    vector unsigned char __b) {
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_vmrglb(vector unsigned char __a, vector unsigned char __b) {
   return vec_perm(__a, __b,
                   (vector unsigned char)(0x08, 0x18, 0x09, 0x19, 0x0A, 0x1A,
                                          0x0B, 0x1B, 0x0C, 0x1C, 0x0D, 0x1D,
                                          0x0E, 0x1E, 0x0F, 0x1F));
 }
 
-static vector bool char __ATTRS_o_ai vec_vmrglb(vector bool char __a,
-                                                vector bool char __b) {
+static __inline__ vector bool char __ATTRS_o_ai
+vec_vmrglb(vector bool char __a, vector bool char __b) {
   return vec_perm(__a, __b,
                   (vector unsigned char)(0x08, 0x18, 0x09, 0x19, 0x0A, 0x1A,
                                          0x0B, 0x1B, 0x0C, 0x1C, 0x0D, 0x1D,
@@ -3670,15 +3740,15 @@ static vector bool char __ATTRS_o_ai vec_vmrglb(vector bool char __a,
 
 #define __builtin_altivec_vmrglh vec_vmrglh
 
-static vector short __ATTRS_o_ai vec_vmrglh(vector short __a,
-                                            vector short __b) {
+static __inline__ vector short __ATTRS_o_ai vec_vmrglh(vector short __a,
+                                                       vector short __b) {
   return vec_perm(__a, __b,
                   (vector unsigned char)(0x08, 0x09, 0x18, 0x19, 0x0A, 0x0B,
                                          0x1A, 0x1B, 0x0C, 0x0D, 0x1C, 0x1D,
                                          0x0E, 0x0F, 0x1E, 0x1F));
 }
 
-static vector unsigned short __ATTRS_o_ai
+static __inline__ vector unsigned short __ATTRS_o_ai
 vec_vmrglh(vector unsigned short __a, vector unsigned short __b) {
   return vec_perm(__a, __b,
                   (vector unsigned char)(0x08, 0x09, 0x18, 0x19, 0x0A, 0x0B,
@@ -3686,16 +3756,16 @@ vec_vmrglh(vector unsigned short __a, vector unsigned short __b) {
                                          0x0E, 0x0F, 0x1E, 0x1F));
 }
 
-static vector bool short __ATTRS_o_ai vec_vmrglh(vector bool short __a,
-                                                 vector bool short __b) {
+static __inline__ vector bool short __ATTRS_o_ai
+vec_vmrglh(vector bool short __a, vector bool short __b) {
   return vec_perm(__a, __b,
                   (vector unsigned char)(0x08, 0x09, 0x18, 0x19, 0x0A, 0x0B,
                                          0x1A, 0x1B, 0x0C, 0x0D, 0x1C, 0x1D,
                                          0x0E, 0x0F, 0x1E, 0x1F));
 }
 
-static vector pixel __ATTRS_o_ai vec_vmrglh(vector pixel __a,
-                                            vector pixel __b) {
+static __inline__ vector pixel __ATTRS_o_ai vec_vmrglh(vector pixel __a,
+                                                       vector pixel __b) {
   return vec_perm(__a, __b,
                   (vector unsigned char)(0x08, 0x09, 0x18, 0x19, 0x0A, 0x0B,
                                          0x1A, 0x1B, 0x0C, 0x0D, 0x1C, 0x1D,
@@ -3706,215 +3776,226 @@ static vector pixel __ATTRS_o_ai vec_vmrglh(vector pixel __a,
 
 #define __builtin_altivec_vmrglw vec_vmrglw
 
-static vector int __ATTRS_o_ai vec_vmrglw(vector int __a, vector int __b) {
+static __inline__ vector int __ATTRS_o_ai vec_vmrglw(vector int __a,
+                                                     vector int __b) {
   return vec_perm(__a, __b,
                   (vector unsigned char)(0x08, 0x09, 0x0A, 0x0B, 0x18, 0x19,
                                          0x1A, 0x1B, 0x0C, 0x0D, 0x0E, 0x0F,
                                          0x1C, 0x1D, 0x1E, 0x1F));
 }
 
-static vector unsigned int __ATTRS_o_ai vec_vmrglw(vector unsigned int __a,
-                                                   vector unsigned int __b) {
+static __inline__ vector unsigned int __ATTRS_o_ai
+vec_vmrglw(vector unsigned int __a, vector unsigned int __b) {
   return vec_perm(__a, __b,
                   (vector unsigned char)(0x08, 0x09, 0x0A, 0x0B, 0x18, 0x19,
                                          0x1A, 0x1B, 0x0C, 0x0D, 0x0E, 0x0F,
                                          0x1C, 0x1D, 0x1E, 0x1F));
 }
 
-static vector bool int __ATTRS_o_ai vec_vmrglw(vector bool int __a,
-                                               vector bool int __b) {
+static __inline__ vector bool int __ATTRS_o_ai vec_vmrglw(vector bool int __a,
+                                                          vector bool int __b) {
   return vec_perm(__a, __b,
                   (vector unsigned char)(0x08, 0x09, 0x0A, 0x0B, 0x18, 0x19,
                                          0x1A, 0x1B, 0x0C, 0x0D, 0x0E, 0x0F,
                                          0x1C, 0x1D, 0x1E, 0x1F));
 }
 
-static vector float __ATTRS_o_ai vec_vmrglw(vector float __a,
-                                            vector float __b) {
+static __inline__ vector float __ATTRS_o_ai vec_vmrglw(vector float __a,
+                                                       vector float __b) {
   return vec_perm(__a, __b,
                   (vector unsigned char)(0x08, 0x09, 0x0A, 0x0B, 0x18, 0x19,
                                          0x1A, 0x1B, 0x0C, 0x0D, 0x0E, 0x0F,
                                          0x1C, 0x1D, 0x1E, 0x1F));
 }
 
-
 #ifdef __POWER8_VECTOR__
 /* vec_mergee */
 
-static vector bool int __ATTRS_o_ai
-vec_mergee(vector bool int __a, vector bool int __b) {
-  return vec_perm(__a, __b, (vector unsigned char)
-                  (0x00, 0x01, 0x02, 0x03, 0x10, 0x11, 0x12, 0x13,
-                   0x08, 0x09, 0x0A, 0x0B, 0x18, 0x19, 0x1A, 0x1B));
+static __inline__ vector bool int __ATTRS_o_ai vec_mergee(vector bool int __a,
+                                                          vector bool int __b) {
+  return vec_perm(__a, __b,
+                  (vector unsigned char)(0x00, 0x01, 0x02, 0x03, 0x10, 0x11,
+                                         0x12, 0x13, 0x08, 0x09, 0x0A, 0x0B,
+                                         0x18, 0x19, 0x1A, 0x1B));
 }
 
-static vector signed int __ATTRS_o_ai
+static __inline__ vector signed int __ATTRS_o_ai
 vec_mergee(vector signed int __a, vector signed int __b) {
-  return vec_perm(__a, __b, (vector unsigned char)
-                  (0x00, 0x01, 0x02, 0x03, 0x10, 0x11, 0x12, 0x13,
-                   0x08, 0x09, 0x0A, 0x0B, 0x18, 0x19, 0x1A, 0x1B));
+  return vec_perm(__a, __b,
+                  (vector unsigned char)(0x00, 0x01, 0x02, 0x03, 0x10, 0x11,
+                                         0x12, 0x13, 0x08, 0x09, 0x0A, 0x0B,
+                                         0x18, 0x19, 0x1A, 0x1B));
 }
 
-static vector unsigned int __ATTRS_o_ai
+static __inline__ vector unsigned int __ATTRS_o_ai
 vec_mergee(vector unsigned int __a, vector unsigned int __b) {
-  return vec_perm(__a, __b, (vector unsigned char)
-                  (0x00, 0x01, 0x02, 0x03, 0x10, 0x11, 0x12, 0x13,
-                   0x08, 0x09, 0x0A, 0x0B, 0x18, 0x19, 0x1A, 0x1B));
+  return vec_perm(__a, __b,
+                  (vector unsigned char)(0x00, 0x01, 0x02, 0x03, 0x10, 0x11,
+                                         0x12, 0x13, 0x08, 0x09, 0x0A, 0x0B,
+                                         0x18, 0x19, 0x1A, 0x1B));
 }
 
 /* vec_mergeo */
 
-static vector bool int  __ATTRS_o_ai
-vec_mergeo(vector bool int __a, vector bool int __b) {
-  return vec_perm(__a, __b, (vector unsigned char)
-                  (0x04, 0x05, 0x06, 0x07, 0x14, 0x15, 0x16, 0x17,
-                   0x0C, 0x0D, 0x0E, 0x0F, 0x1C, 0x1D, 0x1E, 0x1F));
+static __inline__ vector bool int __ATTRS_o_ai vec_mergeo(vector bool int __a,
+                                                          vector bool int __b) {
+  return vec_perm(__a, __b,
+                  (vector unsigned char)(0x04, 0x05, 0x06, 0x07, 0x14, 0x15,
+                                         0x16, 0x17, 0x0C, 0x0D, 0x0E, 0x0F,
+                                         0x1C, 0x1D, 0x1E, 0x1F));
 }
 
-static vector signed int  __ATTRS_o_ai
+static __inline__ vector signed int __ATTRS_o_ai
 vec_mergeo(vector signed int __a, vector signed int __b) {
-  return vec_perm(__a, __b, (vector unsigned char)
-                  (0x04, 0x05, 0x06, 0x07, 0x14, 0x15, 0x16, 0x17,
-                   0x0C, 0x0D, 0x0E, 0x0F, 0x1C, 0x1D, 0x1E, 0x1F));
+  return vec_perm(__a, __b,
+                  (vector unsigned char)(0x04, 0x05, 0x06, 0x07, 0x14, 0x15,
+                                         0x16, 0x17, 0x0C, 0x0D, 0x0E, 0x0F,
+                                         0x1C, 0x1D, 0x1E, 0x1F));
 }
 
-static vector unsigned int  __ATTRS_o_ai
+static __inline__ vector unsigned int __ATTRS_o_ai
 vec_mergeo(vector unsigned int __a, vector unsigned int __b) {
-  return vec_perm(__a, __b, (vector unsigned char)
-                  (0x04, 0x05, 0x06, 0x07, 0x14, 0x15, 0x16, 0x17,
-                   0x0C, 0x0D, 0x0E, 0x0F, 0x1C, 0x1D, 0x1E, 0x1F));
+  return vec_perm(__a, __b,
+                  (vector unsigned char)(0x04, 0x05, 0x06, 0x07, 0x14, 0x15,
+                                         0x16, 0x17, 0x0C, 0x0D, 0x0E, 0x0F,
+                                         0x1C, 0x1D, 0x1E, 0x1F));
 }
 
 #endif
 
 /* vec_mfvscr */
 
-static vector unsigned short __attribute__((__always_inline__))
+static __inline__ vector unsigned short __attribute__((__always_inline__))
 vec_mfvscr(void) {
   return __builtin_altivec_mfvscr();
 }
 
 /* vec_min */
 
-static vector signed char __ATTRS_o_ai vec_min(vector signed char __a,
-                                               vector signed char __b) {
+static __inline__ vector signed char __ATTRS_o_ai
+vec_min(vector signed char __a, vector signed char __b) {
   return __builtin_altivec_vminsb(__a, __b);
 }
 
-static vector signed char __ATTRS_o_ai vec_min(vector bool char __a,
-                                               vector signed char __b) {
+static __inline__ vector signed char __ATTRS_o_ai
+vec_min(vector bool char __a, vector signed char __b) {
   return __builtin_altivec_vminsb((vector signed char)__a, __b);
 }
 
-static vector signed char __ATTRS_o_ai vec_min(vector signed char __a,
-                                               vector bool char __b) {
+static __inline__ vector signed char __ATTRS_o_ai
+vec_min(vector signed char __a, vector bool char __b) {
   return __builtin_altivec_vminsb(__a, (vector signed char)__b);
 }
 
-static vector unsigned char __ATTRS_o_ai vec_min(vector unsigned char __a,
-                                                 vector unsigned char __b) {
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_min(vector unsigned char __a, vector unsigned char __b) {
   return __builtin_altivec_vminub(__a, __b);
 }
 
-static vector unsigned char __ATTRS_o_ai vec_min(vector bool char __a,
-                                                 vector unsigned char __b) {
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_min(vector bool char __a, vector unsigned char __b) {
   return __builtin_altivec_vminub((vector unsigned char)__a, __b);
 }
 
-static vector unsigned char __ATTRS_o_ai vec_min(vector unsigned char __a,
-                                                 vector bool char __b) {
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_min(vector unsigned char __a, vector bool char __b) {
   return __builtin_altivec_vminub(__a, (vector unsigned char)__b);
 }
 
-static vector short __ATTRS_o_ai vec_min(vector short __a, vector short __b) {
+static __inline__ vector short __ATTRS_o_ai vec_min(vector short __a,
+                                                    vector short __b) {
   return __builtin_altivec_vminsh(__a, __b);
 }
 
-static vector short __ATTRS_o_ai vec_min(vector bool short __a,
-                                         vector short __b) {
+static __inline__ vector short __ATTRS_o_ai vec_min(vector bool short __a,
+                                                    vector short __b) {
   return __builtin_altivec_vminsh((vector short)__a, __b);
 }
 
-static vector short __ATTRS_o_ai vec_min(vector short __a,
-                                         vector bool short __b) {
+static __inline__ vector short __ATTRS_o_ai vec_min(vector short __a,
+                                                    vector bool short __b) {
   return __builtin_altivec_vminsh(__a, (vector short)__b);
 }
 
-static vector unsigned short __ATTRS_o_ai vec_min(vector unsigned short __a,
-                                                  vector unsigned short __b) {
+static __inline__ vector unsigned short __ATTRS_o_ai
+vec_min(vector unsigned short __a, vector unsigned short __b) {
   return __builtin_altivec_vminuh(__a, __b);
 }
 
-static vector unsigned short __ATTRS_o_ai vec_min(vector bool short __a,
-                                                  vector unsigned short __b) {
+static __inline__ vector unsigned short __ATTRS_o_ai
+vec_min(vector bool short __a, vector unsigned short __b) {
   return __builtin_altivec_vminuh((vector unsigned short)__a, __b);
 }
 
-static vector unsigned short __ATTRS_o_ai vec_min(vector unsigned short __a,
-                                                  vector bool short __b) {
+static __inline__ vector unsigned short __ATTRS_o_ai
+vec_min(vector unsigned short __a, vector bool short __b) {
   return __builtin_altivec_vminuh(__a, (vector unsigned short)__b);
 }
 
-static vector int __ATTRS_o_ai vec_min(vector int __a, vector int __b) {
+static __inline__ vector int __ATTRS_o_ai vec_min(vector int __a,
+                                                  vector int __b) {
   return __builtin_altivec_vminsw(__a, __b);
 }
 
-static vector int __ATTRS_o_ai vec_min(vector bool int __a, vector int __b) {
+static __inline__ vector int __ATTRS_o_ai vec_min(vector bool int __a,
+                                                  vector int __b) {
   return __builtin_altivec_vminsw((vector int)__a, __b);
 }
 
-static vector int __ATTRS_o_ai vec_min(vector int __a, vector bool int __b) {
+static __inline__ vector int __ATTRS_o_ai vec_min(vector int __a,
+                                                  vector bool int __b) {
   return __builtin_altivec_vminsw(__a, (vector int)__b);
 }
 
-static vector unsigned int __ATTRS_o_ai vec_min(vector unsigned int __a,
-                                                vector unsigned int __b) {
+static __inline__ vector unsigned int __ATTRS_o_ai
+vec_min(vector unsigned int __a, vector unsigned int __b) {
   return __builtin_altivec_vminuw(__a, __b);
 }
 
-static vector unsigned int __ATTRS_o_ai vec_min(vector bool int __a,
-                                                vector unsigned int __b) {
+static __inline__ vector unsigned int __ATTRS_o_ai
+vec_min(vector bool int __a, vector unsigned int __b) {
   return __builtin_altivec_vminuw((vector unsigned int)__a, __b);
 }
 
-static vector unsigned int __ATTRS_o_ai vec_min(vector unsigned int __a,
-                                                vector bool int __b) {
+static __inline__ vector unsigned int __ATTRS_o_ai
+vec_min(vector unsigned int __a, vector bool int __b) {
   return __builtin_altivec_vminuw(__a, (vector unsigned int)__b);
 }
 
 #ifdef __POWER8_VECTOR__
-static vector signed long long __ATTRS_o_ai
+static __inline__ vector signed long long __ATTRS_o_ai
 vec_min(vector signed long long __a, vector signed long long __b) {
   return __builtin_altivec_vminsd(__a, __b);
 }
 
-static vector signed long long __ATTRS_o_ai
+static __inline__ vector signed long long __ATTRS_o_ai
 vec_min(vector bool long long __a, vector signed long long __b) {
   return __builtin_altivec_vminsd((vector signed long long)__a, __b);
 }
 
-static vector signed long long __ATTRS_o_ai vec_min(vector signed long long __a,
-                                                    vector bool long long __b) {
+static __inline__ vector signed long long __ATTRS_o_ai
+vec_min(vector signed long long __a, vector bool long long __b) {
   return __builtin_altivec_vminsd(__a, (vector signed long long)__b);
 }
 
-static vector unsigned long long __ATTRS_o_ai
+static __inline__ vector unsigned long long __ATTRS_o_ai
 vec_min(vector unsigned long long __a, vector unsigned long long __b) {
   return __builtin_altivec_vminud(__a, __b);
 }
 
-static vector unsigned long long __ATTRS_o_ai
+static __inline__ vector unsigned long long __ATTRS_o_ai
 vec_min(vector bool long long __a, vector unsigned long long __b) {
   return __builtin_altivec_vminud((vector unsigned long long)__a, __b);
 }
 
-static vector unsigned long long __ATTRS_o_ai
+static __inline__ vector unsigned long long __ATTRS_o_ai
 vec_min(vector unsigned long long __a, vector bool long long __b) {
   return __builtin_altivec_vminud(__a, (vector unsigned long long)__b);
 }
 #endif
 
-static vector float __ATTRS_o_ai vec_min(vector float __a, vector float __b) {
+static __inline__ vector float __ATTRS_o_ai vec_min(vector float __a,
+                                                    vector float __b) {
 #ifdef __VSX__
   return __builtin_vsx_xvminsp(__a, __b);
 #else
@@ -3923,114 +4004,117 @@ static vector float __ATTRS_o_ai vec_min(vector float __a, vector float __b) {
 }
 
 #ifdef __VSX__
-static vector double __ATTRS_o_ai vec_min(vector double __a,
-                                          vector double __b) {
+static __inline__ vector double __ATTRS_o_ai vec_min(vector double __a,
+                                                     vector double __b) {
   return __builtin_vsx_xvmindp(__a, __b);
 }
 #endif
 
 /* vec_vminsb */
 
-static vector signed char __ATTRS_o_ai vec_vminsb(vector signed char __a,
-                                                  vector signed char __b) {
+static __inline__ vector signed char __ATTRS_o_ai
+vec_vminsb(vector signed char __a, vector signed char __b) {
   return __builtin_altivec_vminsb(__a, __b);
 }
 
-static vector signed char __ATTRS_o_ai vec_vminsb(vector bool char __a,
-                                                  vector signed char __b) {
+static __inline__ vector signed char __ATTRS_o_ai
+vec_vminsb(vector bool char __a, vector signed char __b) {
   return __builtin_altivec_vminsb((vector signed char)__a, __b);
 }
 
-static vector signed char __ATTRS_o_ai vec_vminsb(vector signed char __a,
-                                                  vector bool char __b) {
+static __inline__ vector signed char __ATTRS_o_ai
+vec_vminsb(vector signed char __a, vector bool char __b) {
   return __builtin_altivec_vminsb(__a, (vector signed char)__b);
 }
 
 /* vec_vminub */
 
-static vector unsigned char __ATTRS_o_ai vec_vminub(vector unsigned char __a,
-                                                    vector unsigned char __b) {
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_vminub(vector unsigned char __a, vector unsigned char __b) {
   return __builtin_altivec_vminub(__a, __b);
 }
 
-static vector unsigned char __ATTRS_o_ai vec_vminub(vector bool char __a,
-                                                    vector unsigned char __b) {
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_vminub(vector bool char __a, vector unsigned char __b) {
   return __builtin_altivec_vminub((vector unsigned char)__a, __b);
 }
 
-static vector unsigned char __ATTRS_o_ai vec_vminub(vector unsigned char __a,
-                                                    vector bool char __b) {
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_vminub(vector unsigned char __a, vector bool char __b) {
   return __builtin_altivec_vminub(__a, (vector unsigned char)__b);
 }
 
 /* vec_vminsh */
 
-static vector short __ATTRS_o_ai vec_vminsh(vector short __a,
-                                            vector short __b) {
+static __inline__ vector short __ATTRS_o_ai vec_vminsh(vector short __a,
+                                                       vector short __b) {
   return __builtin_altivec_vminsh(__a, __b);
 }
 
-static vector short __ATTRS_o_ai vec_vminsh(vector bool short __a,
-                                            vector short __b) {
+static __inline__ vector short __ATTRS_o_ai vec_vminsh(vector bool short __a,
+                                                       vector short __b) {
   return __builtin_altivec_vminsh((vector short)__a, __b);
 }
 
-static vector short __ATTRS_o_ai vec_vminsh(vector short __a,
-                                            vector bool short __b) {
+static __inline__ vector short __ATTRS_o_ai vec_vminsh(vector short __a,
+                                                       vector bool short __b) {
   return __builtin_altivec_vminsh(__a, (vector short)__b);
 }
 
 /* vec_vminuh */
 
-static vector unsigned short __ATTRS_o_ai
+static __inline__ vector unsigned short __ATTRS_o_ai
 vec_vminuh(vector unsigned short __a, vector unsigned short __b) {
   return __builtin_altivec_vminuh(__a, __b);
 }
 
-static vector unsigned short __ATTRS_o_ai
+static __inline__ vector unsigned short __ATTRS_o_ai
 vec_vminuh(vector bool short __a, vector unsigned short __b) {
   return __builtin_altivec_vminuh((vector unsigned short)__a, __b);
 }
 
-static vector unsigned short __ATTRS_o_ai vec_vminuh(vector unsigned short __a,
-                                                     vector bool short __b) {
+static __inline__ vector unsigned short __ATTRS_o_ai
+vec_vminuh(vector unsigned short __a, vector bool short __b) {
   return __builtin_altivec_vminuh(__a, (vector unsigned short)__b);
 }
 
 /* vec_vminsw */
 
-static vector int __ATTRS_o_ai vec_vminsw(vector int __a, vector int __b) {
+static __inline__ vector int __ATTRS_o_ai vec_vminsw(vector int __a,
+                                                     vector int __b) {
   return __builtin_altivec_vminsw(__a, __b);
 }
 
-static vector int __ATTRS_o_ai vec_vminsw(vector bool int __a, vector int __b) {
+static __inline__ vector int __ATTRS_o_ai vec_vminsw(vector bool int __a,
+                                                     vector int __b) {
   return __builtin_altivec_vminsw((vector int)__a, __b);
 }
 
-static vector int __ATTRS_o_ai vec_vminsw(vector int __a, vector bool int __b) {
+static __inline__ vector int __ATTRS_o_ai vec_vminsw(vector int __a,
+                                                     vector bool int __b) {
   return __builtin_altivec_vminsw(__a, (vector int)__b);
 }
 
 /* vec_vminuw */
 
-static vector unsigned int __ATTRS_o_ai vec_vminuw(vector unsigned int __a,
-                                                   vector unsigned int __b) {
+static __inline__ vector unsigned int __ATTRS_o_ai
+vec_vminuw(vector unsigned int __a, vector unsigned int __b) {
   return __builtin_altivec_vminuw(__a, __b);
 }
 
-static vector unsigned int __ATTRS_o_ai vec_vminuw(vector bool int __a,
-                                                   vector unsigned int __b) {
+static __inline__ vector unsigned int __ATTRS_o_ai
+vec_vminuw(vector bool int __a, vector unsigned int __b) {
   return __builtin_altivec_vminuw((vector unsigned int)__a, __b);
 }
 
-static vector unsigned int __ATTRS_o_ai vec_vminuw(vector unsigned int __a,
-                                                   vector bool int __b) {
+static __inline__ vector unsigned int __ATTRS_o_ai
+vec_vminuw(vector unsigned int __a, vector bool int __b) {
   return __builtin_altivec_vminuw(__a, (vector unsigned int)__b);
 }
 
 /* vec_vminfp */
 
-static vector float __attribute__((__always_inline__))
+static __inline__ vector float __attribute__((__always_inline__))
 vec_vminfp(vector float __a, vector float __b) {
 #ifdef __VSX__
   return __builtin_vsx_xvminsp(__a, __b);
@@ -4043,49 +4127,48 @@ vec_vminfp(vector float __a, vector float __b) {
 
 #define __builtin_altivec_vmladduhm vec_mladd
 
-static vector short __ATTRS_o_ai vec_mladd(vector short __a, vector short __b,
-                                           vector short __c) {
+static __inline__ vector short __ATTRS_o_ai vec_mladd(vector short __a,
+                                                      vector short __b,
+                                                      vector short __c) {
   return __a * __b + __c;
 }
 
-static vector short __ATTRS_o_ai vec_mladd(vector short __a,
-                                           vector unsigned short __b,
-                                           vector unsigned short __c) {
+static __inline__ vector short __ATTRS_o_ai vec_mladd(
+    vector short __a, vector unsigned short __b, vector unsigned short __c) {
   return __a * (vector short)__b + (vector short)__c;
 }
 
-static vector short __ATTRS_o_ai vec_mladd(vector unsigned short __a,
-                                           vector short __b, vector short __c) {
+static __inline__ vector short __ATTRS_o_ai vec_mladd(vector unsigned short __a,
+                                                      vector short __b,
+                                                      vector short __c) {
   return (vector short)__a * __b + __c;
 }
 
-static vector unsigned short __ATTRS_o_ai vec_mladd(vector unsigned short __a,
-                                                    vector unsigned short __b,
-                                                    vector unsigned short __c) {
+static __inline__ vector unsigned short __ATTRS_o_ai
+vec_mladd(vector unsigned short __a, vector unsigned short __b,
+          vector unsigned short __c) {
   return __a * __b + __c;
 }
 
 /* vec_vmladduhm */
 
-static vector short __ATTRS_o_ai vec_vmladduhm(vector short __a,
-                                               vector short __b,
-                                               vector short __c) {
+static __inline__ vector short __ATTRS_o_ai vec_vmladduhm(vector short __a,
+                                                          vector short __b,
+                                                          vector short __c) {
   return __a * __b + __c;
 }
 
-static vector short __ATTRS_o_ai vec_vmladduhm(vector short __a,
-                                               vector unsigned short __b,
-                                               vector unsigned short __c) {
+static __inline__ vector short __ATTRS_o_ai vec_vmladduhm(
+    vector short __a, vector unsigned short __b, vector unsigned short __c) {
   return __a * (vector short)__b + (vector short)__c;
 }
 
-static vector short __ATTRS_o_ai vec_vmladduhm(vector unsigned short __a,
-                                               vector short __b,
-                                               vector short __c) {
+static __inline__ vector short __ATTRS_o_ai
+vec_vmladduhm(vector unsigned short __a, vector short __b, vector short __c) {
   return (vector short)__a * __b + __c;
 }
 
-static vector unsigned short __ATTRS_o_ai
+static __inline__ vector unsigned short __ATTRS_o_ai
 vec_vmladduhm(vector unsigned short __a, vector unsigned short __b,
               vector unsigned short __c) {
   return __a * __b + __c;
@@ -4093,53 +4176,54 @@ vec_vmladduhm(vector unsigned short __a, vector unsigned short __b,
 
 /* vec_mradds */
 
-static vector short __attribute__((__always_inline__))
+static __inline__ vector short __attribute__((__always_inline__))
 vec_mradds(vector short __a, vector short __b, vector short __c) {
   return __builtin_altivec_vmhraddshs(__a, __b, __c);
 }
 
 /* vec_vmhraddshs */
 
-static vector short __attribute__((__always_inline__))
+static __inline__ vector short __attribute__((__always_inline__))
 vec_vmhraddshs(vector short __a, vector short __b, vector short __c) {
   return __builtin_altivec_vmhraddshs(__a, __b, __c);
 }
 
 /* vec_msum */
 
-static vector int __ATTRS_o_ai vec_msum(vector signed char __a,
-                                        vector unsigned char __b,
-                                        vector int __c) {
+static __inline__ vector int __ATTRS_o_ai vec_msum(vector signed char __a,
+                                                   vector unsigned char __b,
+                                                   vector int __c) {
   return __builtin_altivec_vmsummbm(__a, __b, __c);
 }
 
-static vector unsigned int __ATTRS_o_ai vec_msum(vector unsigned char __a,
-                                                 vector unsigned char __b,
-                                                 vector unsigned int __c) {
+static __inline__ vector unsigned int __ATTRS_o_ai
+vec_msum(vector unsigned char __a, vector unsigned char __b,
+         vector unsigned int __c) {
   return __builtin_altivec_vmsumubm(__a, __b, __c);
 }
 
-static vector int __ATTRS_o_ai vec_msum(vector short __a, vector short __b,
-                                        vector int __c) {
+static __inline__ vector int __ATTRS_o_ai vec_msum(vector short __a,
+                                                   vector short __b,
+                                                   vector int __c) {
   return __builtin_altivec_vmsumshm(__a, __b, __c);
 }
 
-static vector unsigned int __ATTRS_o_ai vec_msum(vector unsigned short __a,
-                                                 vector unsigned short __b,
-                                                 vector unsigned int __c) {
+static __inline__ vector unsigned int __ATTRS_o_ai
+vec_msum(vector unsigned short __a, vector unsigned short __b,
+         vector unsigned int __c) {
   return __builtin_altivec_vmsumuhm(__a, __b, __c);
 }
 
 /* vec_vmsummbm */
 
-static vector int __attribute__((__always_inline__))
+static __inline__ vector int __attribute__((__always_inline__))
 vec_vmsummbm(vector signed char __a, vector unsigned char __b, vector int __c) {
   return __builtin_altivec_vmsummbm(__a, __b, __c);
 }
 
 /* vec_vmsumubm */
 
-static vector unsigned int __attribute__((__always_inline__))
+static __inline__ vector unsigned int __attribute__((__always_inline__))
 vec_vmsumubm(vector unsigned char __a, vector unsigned char __b,
              vector unsigned int __c) {
   return __builtin_altivec_vmsumubm(__a, __b, __c);
@@ -4147,14 +4231,14 @@ vec_vmsumubm(vector unsigned char __a, vector unsigned char __b,
 
 /* vec_vmsumshm */
 
-static vector int __attribute__((__always_inline__))
+static __inline__ vector int __attribute__((__always_inline__))
 vec_vmsumshm(vector short __a, vector short __b, vector int __c) {
   return __builtin_altivec_vmsumshm(__a, __b, __c);
 }
 
 /* vec_vmsumuhm */
 
-static vector unsigned int __attribute__((__always_inline__))
+static __inline__ vector unsigned int __attribute__((__always_inline__))
 vec_vmsumuhm(vector unsigned short __a, vector unsigned short __b,
              vector unsigned int __c) {
   return __builtin_altivec_vmsumuhm(__a, __b, __c);
@@ -4162,27 +4246,28 @@ vec_vmsumuhm(vector unsigned short __a, vector unsigned short __b,
 
 /* vec_msums */
 
-static vector int __ATTRS_o_ai vec_msums(vector short __a, vector short __b,
-                                         vector int __c) {
+static __inline__ vector int __ATTRS_o_ai vec_msums(vector short __a,
+                                                    vector short __b,
+                                                    vector int __c) {
   return __builtin_altivec_vmsumshs(__a, __b, __c);
 }
 
-static vector unsigned int __ATTRS_o_ai vec_msums(vector unsigned short __a,
-                                                  vector unsigned short __b,
-                                                  vector unsigned int __c) {
+static __inline__ vector unsigned int __ATTRS_o_ai
+vec_msums(vector unsigned short __a, vector unsigned short __b,
+          vector unsigned int __c) {
   return __builtin_altivec_vmsumuhs(__a, __b, __c);
 }
 
 /* vec_vmsumshs */
 
-static vector int __attribute__((__always_inline__))
+static __inline__ vector int __attribute__((__always_inline__))
 vec_vmsumshs(vector short __a, vector short __b, vector int __c) {
   return __builtin_altivec_vmsumshs(__a, __b, __c);
 }
 
 /* vec_vmsumuhs */
 
-static vector unsigned int __attribute__((__always_inline__))
+static __inline__ vector unsigned int __attribute__((__always_inline__))
 vec_vmsumuhs(vector unsigned short __a, vector unsigned short __b,
              vector unsigned int __c) {
   return __builtin_altivec_vmsumuhs(__a, __b, __c);
@@ -4190,47 +4275,47 @@ vec_vmsumuhs(vector unsigned short __a, vector unsigned short __b,
 
 /* vec_mtvscr */
 
-static void __ATTRS_o_ai vec_mtvscr(vector signed char __a) {
+static __inline__ void __ATTRS_o_ai vec_mtvscr(vector signed char __a) {
   __builtin_altivec_mtvscr((vector int)__a);
 }
 
-static void __ATTRS_o_ai vec_mtvscr(vector unsigned char __a) {
+static __inline__ void __ATTRS_o_ai vec_mtvscr(vector unsigned char __a) {
   __builtin_altivec_mtvscr((vector int)__a);
 }
 
-static void __ATTRS_o_ai vec_mtvscr(vector bool char __a) {
+static __inline__ void __ATTRS_o_ai vec_mtvscr(vector bool char __a) {
   __builtin_altivec_mtvscr((vector int)__a);
 }
 
-static void __ATTRS_o_ai vec_mtvscr(vector short __a) {
+static __inline__ void __ATTRS_o_ai vec_mtvscr(vector short __a) {
   __builtin_altivec_mtvscr((vector int)__a);
 }
 
-static void __ATTRS_o_ai vec_mtvscr(vector unsigned short __a) {
+static __inline__ void __ATTRS_o_ai vec_mtvscr(vector unsigned short __a) {
   __builtin_altivec_mtvscr((vector int)__a);
 }
 
-static void __ATTRS_o_ai vec_mtvscr(vector bool short __a) {
+static __inline__ void __ATTRS_o_ai vec_mtvscr(vector bool short __a) {
   __builtin_altivec_mtvscr((vector int)__a);
 }
 
-static void __ATTRS_o_ai vec_mtvscr(vector pixel __a) {
+static __inline__ void __ATTRS_o_ai vec_mtvscr(vector pixel __a) {
   __builtin_altivec_mtvscr((vector int)__a);
 }
 
-static void __ATTRS_o_ai vec_mtvscr(vector int __a) {
+static __inline__ void __ATTRS_o_ai vec_mtvscr(vector int __a) {
   __builtin_altivec_mtvscr((vector int)__a);
 }
 
-static void __ATTRS_o_ai vec_mtvscr(vector unsigned int __a) {
+static __inline__ void __ATTRS_o_ai vec_mtvscr(vector unsigned int __a) {
   __builtin_altivec_mtvscr((vector int)__a);
 }
 
-static void __ATTRS_o_ai vec_mtvscr(vector bool int __a) {
+static __inline__ void __ATTRS_o_ai vec_mtvscr(vector bool int __a) {
   __builtin_altivec_mtvscr((vector int)__a);
 }
 
-static void __ATTRS_o_ai vec_mtvscr(vector float __a) {
+static __inline__ void __ATTRS_o_ai vec_mtvscr(vector float __a) {
   __builtin_altivec_mtvscr((vector int)__a);
 }
 
@@ -4240,55 +4325,56 @@ static void __ATTRS_o_ai vec_mtvscr(vector float __a) {
    elements separately, then truncating the results and moving to the
    result vector.
 */
-static vector signed char __ATTRS_o_ai vec_mul(vector signed char __a,
-                                               vector signed char __b) {
+static __inline__ vector signed char __ATTRS_o_ai
+vec_mul(vector signed char __a, vector signed char __b) {
   return __a * __b;
 }
 
-static vector unsigned char __ATTRS_o_ai vec_mul(vector unsigned char __a,
-                                                 vector unsigned char __b) {
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_mul(vector unsigned char __a, vector unsigned char __b) {
   return __a * __b;
 }
 
-static vector signed short __ATTRS_o_ai vec_mul(vector signed short __a,
-                                                vector signed short __b) {
+static __inline__ vector signed short __ATTRS_o_ai
+vec_mul(vector signed short __a, vector signed short __b) {
   return __a * __b;
 }
 
-static vector unsigned short __ATTRS_o_ai vec_mul(vector unsigned short __a,
-                                                  vector unsigned short __b) {
+static __inline__ vector unsigned short __ATTRS_o_ai
+vec_mul(vector unsigned short __a, vector unsigned short __b) {
   return __a * __b;
 }
 
-static vector signed int __ATTRS_o_ai vec_mul(vector signed int __a,
-                                              vector signed int __b) {
+static __inline__ vector signed int __ATTRS_o_ai
+vec_mul(vector signed int __a, vector signed int __b) {
   return __a * __b;
 }
 
-static vector unsigned int __ATTRS_o_ai vec_mul(vector unsigned int __a,
-                                                vector unsigned int __b) {
+static __inline__ vector unsigned int __ATTRS_o_ai
+vec_mul(vector unsigned int __a, vector unsigned int __b) {
   return __a * __b;
 }
 
 #ifdef __VSX__
-static vector signed long long __ATTRS_o_ai
+static __inline__ vector signed long long __ATTRS_o_ai
 vec_mul(vector signed long long __a, vector signed long long __b) {
   return __a * __b;
 }
 
-static vector unsigned long long __ATTRS_o_ai
+static __inline__ vector unsigned long long __ATTRS_o_ai
 vec_mul(vector unsigned long long __a, vector unsigned long long __b) {
   return __a * __b;
 }
 #endif
 
-static vector float __ATTRS_o_ai vec_mul(vector float __a, vector float __b) {
+static __inline__ vector float __ATTRS_o_ai vec_mul(vector float __a,
+                                                    vector float __b) {
   return __a * __b;
 }
 
 #ifdef __VSX__
-static vector double __ATTRS_o_ai
-vec_mul(vector double __a, vector double __b) {
+static __inline__ vector double __ATTRS_o_ai vec_mul(vector double __a,
+                                                     vector double __b) {
   return __a * __b;
 }
 #endif
@@ -4298,8 +4384,8 @@ vec_mul(vector double __a, vector double __b) {
 
 /* vec_mule */
 
-static vector short __ATTRS_o_ai vec_mule(vector signed char __a,
-                                          vector signed char __b) {
+static __inline__ vector short __ATTRS_o_ai vec_mule(vector signed char __a,
+                                                     vector signed char __b) {
 #ifdef __LITTLE_ENDIAN__
   return __builtin_altivec_vmulosb(__a, __b);
 #else
@@ -4307,8 +4393,8 @@ static vector short __ATTRS_o_ai vec_mule(vector signed char __a,
 #endif
 }
 
-static vector unsigned short __ATTRS_o_ai vec_mule(vector unsigned char __a,
-                                                   vector unsigned char __b) {
+static __inline__ vector unsigned short __ATTRS_o_ai
+vec_mule(vector unsigned char __a, vector unsigned char __b) {
 #ifdef __LITTLE_ENDIAN__
   return __builtin_altivec_vmuloub(__a, __b);
 #else
@@ -4316,7 +4402,8 @@ static vector unsigned short __ATTRS_o_ai vec_mule(vector unsigned char __a,
 #endif
 }
 
-static vector int __ATTRS_o_ai vec_mule(vector short __a, vector short __b) {
+static __inline__ vector int __ATTRS_o_ai vec_mule(vector short __a,
+                                                   vector short __b) {
 #ifdef __LITTLE_ENDIAN__
   return __builtin_altivec_vmulosh(__a, __b);
 #else
@@ -4324,8 +4411,8 @@ static vector int __ATTRS_o_ai vec_mule(vector short __a, vector short __b) {
 #endif
 }
 
-static vector unsigned int __ATTRS_o_ai vec_mule(vector unsigned short __a,
-                                                 vector unsigned short __b) {
+static __inline__ vector unsigned int __ATTRS_o_ai
+vec_mule(vector unsigned short __a, vector unsigned short __b) {
 #ifdef __LITTLE_ENDIAN__
   return __builtin_altivec_vmulouh(__a, __b);
 #else
@@ -4334,8 +4421,8 @@ static vector unsigned int __ATTRS_o_ai vec_mule(vector unsigned short __a,
 }
 
 #ifdef __POWER8_VECTOR__
-static vector signed long long __ATTRS_o_ai vec_mule(vector signed int __a,
-                                                     vector signed int __b) {
+static __inline__ vector signed long long __ATTRS_o_ai
+vec_mule(vector signed int __a, vector signed int __b) {
 #ifdef __LITTLE_ENDIAN__
   return __builtin_altivec_vmulosw(__a, __b);
 #else
@@ -4343,7 +4430,7 @@ static vector signed long long __ATTRS_o_ai vec_mule(vector signed int __a,
 #endif
 }
 
-static vector unsigned long long __ATTRS_o_ai
+static __inline__ vector unsigned long long __ATTRS_o_ai
 vec_mule(vector unsigned int __a, vector unsigned int __b) {
 #ifdef __LITTLE_ENDIAN__
   return __builtin_altivec_vmulouw(__a, __b);
@@ -4355,7 +4442,7 @@ vec_mule(vector unsigned int __a, vector unsigned int __b) {
 
 /* vec_vmulesb */
 
-static vector short __attribute__((__always_inline__))
+static __inline__ vector short __attribute__((__always_inline__))
 vec_vmulesb(vector signed char __a, vector signed char __b) {
 #ifdef __LITTLE_ENDIAN__
   return __builtin_altivec_vmulosb(__a, __b);
@@ -4366,7 +4453,7 @@ vec_vmulesb(vector signed char __a, vector signed char __b) {
 
 /* vec_vmuleub */
 
-static vector unsigned short __attribute__((__always_inline__))
+static __inline__ vector unsigned short __attribute__((__always_inline__))
 vec_vmuleub(vector unsigned char __a, vector unsigned char __b) {
 #ifdef __LITTLE_ENDIAN__
   return __builtin_altivec_vmuloub(__a, __b);
@@ -4377,7 +4464,7 @@ vec_vmuleub(vector unsigned char __a, vector unsigned char __b) {
 
 /* vec_vmulesh */
 
-static vector int __attribute__((__always_inline__))
+static __inline__ vector int __attribute__((__always_inline__))
 vec_vmulesh(vector short __a, vector short __b) {
 #ifdef __LITTLE_ENDIAN__
   return __builtin_altivec_vmulosh(__a, __b);
@@ -4388,7 +4475,7 @@ vec_vmulesh(vector short __a, vector short __b) {
 
 /* vec_vmuleuh */
 
-static vector unsigned int __attribute__((__always_inline__))
+static __inline__ vector unsigned int __attribute__((__always_inline__))
 vec_vmuleuh(vector unsigned short __a, vector unsigned short __b) {
 #ifdef __LITTLE_ENDIAN__
   return __builtin_altivec_vmulouh(__a, __b);
@@ -4399,8 +4486,8 @@ vec_vmuleuh(vector unsigned short __a, vector unsigned short __b) {
 
 /* vec_mulo */
 
-static vector short __ATTRS_o_ai vec_mulo(vector signed char __a,
-                                          vector signed char __b) {
+static __inline__ vector short __ATTRS_o_ai vec_mulo(vector signed char __a,
+                                                     vector signed char __b) {
 #ifdef __LITTLE_ENDIAN__
   return __builtin_altivec_vmulesb(__a, __b);
 #else
@@ -4408,8 +4495,8 @@ static vector short __ATTRS_o_ai vec_mulo(vector signed char __a,
 #endif
 }
 
-static vector unsigned short __ATTRS_o_ai vec_mulo(vector unsigned char __a,
-                                                   vector unsigned char __b) {
+static __inline__ vector unsigned short __ATTRS_o_ai
+vec_mulo(vector unsigned char __a, vector unsigned char __b) {
 #ifdef __LITTLE_ENDIAN__
   return __builtin_altivec_vmuleub(__a, __b);
 #else
@@ -4417,7 +4504,8 @@ static vector unsigned short __ATTRS_o_ai vec_mulo(vector unsigned char __a,
 #endif
 }
 
-static vector int __ATTRS_o_ai vec_mulo(vector short __a, vector short __b) {
+static __inline__ vector int __ATTRS_o_ai vec_mulo(vector short __a,
+                                                   vector short __b) {
 #ifdef __LITTLE_ENDIAN__
   return __builtin_altivec_vmulesh(__a, __b);
 #else
@@ -4425,8 +4513,8 @@ static vector int __ATTRS_o_ai vec_mulo(vector short __a, vector short __b) {
 #endif
 }
 
-static vector unsigned int __ATTRS_o_ai vec_mulo(vector unsigned short __a,
-                                                 vector unsigned short __b) {
+static __inline__ vector unsigned int __ATTRS_o_ai
+vec_mulo(vector unsigned short __a, vector unsigned short __b) {
 #ifdef __LITTLE_ENDIAN__
   return __builtin_altivec_vmuleuh(__a, __b);
 #else
@@ -4435,8 +4523,8 @@ static vector unsigned int __ATTRS_o_ai vec_mulo(vector unsigned short __a,
 }
 
 #ifdef __POWER8_VECTOR__
-static vector signed long long __ATTRS_o_ai vec_mulo(vector signed int __a,
-                                                     vector signed int __b) {
+static __inline__ vector signed long long __ATTRS_o_ai
+vec_mulo(vector signed int __a, vector signed int __b) {
 #ifdef __LITTLE_ENDIAN__
   return __builtin_altivec_vmulesw(__a, __b);
 #else
@@ -4444,7 +4532,7 @@ static vector signed long long __ATTRS_o_ai vec_mulo(vector signed int __a,
 #endif
 }
 
-static vector unsigned long long __ATTRS_o_ai
+static __inline__ vector unsigned long long __ATTRS_o_ai
 vec_mulo(vector unsigned int __a, vector unsigned int __b) {
 #ifdef __LITTLE_ENDIAN__
   return __builtin_altivec_vmuleuw(__a, __b);
@@ -4456,7 +4544,7 @@ vec_mulo(vector unsigned int __a, vector unsigned int __b) {
 
 /* vec_vmulosb */
 
-static vector short __attribute__((__always_inline__))
+static __inline__ vector short __attribute__((__always_inline__))
 vec_vmulosb(vector signed char __a, vector signed char __b) {
 #ifdef __LITTLE_ENDIAN__
   return __builtin_altivec_vmulesb(__a, __b);
@@ -4467,7 +4555,7 @@ vec_vmulosb(vector signed char __a, vector signed char __b) {
 
 /* vec_vmuloub */
 
-static vector unsigned short __attribute__((__always_inline__))
+static __inline__ vector unsigned short __attribute__((__always_inline__))
 vec_vmuloub(vector unsigned char __a, vector unsigned char __b) {
 #ifdef __LITTLE_ENDIAN__
   return __builtin_altivec_vmuleub(__a, __b);
@@ -4478,7 +4566,7 @@ vec_vmuloub(vector unsigned char __a, vector unsigned char __b) {
 
 /* vec_vmulosh */
 
-static vector int __attribute__((__always_inline__))
+static __inline__ vector int __attribute__((__always_inline__))
 vec_vmulosh(vector short __a, vector short __b) {
 #ifdef __LITTLE_ENDIAN__
   return __builtin_altivec_vmulesh(__a, __b);
@@ -4489,7 +4577,7 @@ vec_vmulosh(vector short __a, vector short __b) {
 
 /* vec_vmulouh */
 
-static vector unsigned int __attribute__((__always_inline__))
+static __inline__ vector unsigned int __attribute__((__always_inline__))
 vec_vmulouh(vector unsigned short __a, vector unsigned short __b) {
 #ifdef __LITTLE_ENDIAN__
   return __builtin_altivec_vmuleuh(__a, __b);
@@ -4501,140 +4589,137 @@ vec_vmulouh(vector unsigned short __a, vector unsigned short __b) {
 /*  vec_nand */
 
 #ifdef __POWER8_VECTOR__
-static vector signed char __ATTRS_o_ai vec_nand(vector signed char __a,
-                                                vector signed char __b) {
+static __inline__ vector signed char __ATTRS_o_ai
+vec_nand(vector signed char __a, vector signed char __b) {
   return ~(__a & __b);
 }
 
-static vector signed char __ATTRS_o_ai vec_nand(vector signed char __a,
-                                                vector bool char __b) {
+static __inline__ vector signed char __ATTRS_o_ai
+vec_nand(vector signed char __a, vector bool char __b) {
   return ~(__a & __b);
 }
 
-static vector signed char __ATTRS_o_ai vec_nand(vector bool char __a,
-                                                vector signed char __b) {
+static __inline__ vector signed char __ATTRS_o_ai
+vec_nand(vector bool char __a, vector signed char __b) {
   return ~(__a & __b);
 }
 
-static vector unsigned char __ATTRS_o_ai vec_nand(vector unsigned char __a,
-                                                  vector unsigned char __b) {
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_nand(vector unsigned char __a, vector unsigned char __b) {
   return ~(__a & __b);
 }
 
-static vector unsigned char __ATTRS_o_ai vec_nand(vector unsigned char __a,
-                                                  vector bool char __b) {
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_nand(vector unsigned char __a, vector bool char __b) {
   return ~(__a & __b);
-
 }
 
-static vector unsigned char __ATTRS_o_ai vec_nand(vector bool char __a,
-                                                  vector unsigned char __b) {
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_nand(vector bool char __a, vector unsigned char __b) {
   return ~(__a & __b);
 }
 
-static vector bool char __ATTRS_o_ai vec_nand(vector bool char __a,
-                                              vector bool char __b) {
+static __inline__ vector bool char __ATTRS_o_ai vec_nand(vector bool char __a,
+                                                         vector bool char __b) {
   return ~(__a & __b);
 }
 
-static vector signed short __ATTRS_o_ai vec_nand(vector signed short __a,
-                                                 vector signed short __b) {
+static __inline__ vector signed short __ATTRS_o_ai
+vec_nand(vector signed short __a, vector signed short __b) {
   return ~(__a & __b);
 }
 
-static vector signed short __ATTRS_o_ai vec_nand(vector signed short __a,
-                                                 vector bool short __b) {
+static __inline__ vector signed short __ATTRS_o_ai
+vec_nand(vector signed short __a, vector bool short __b) {
   return ~(__a & __b);
 }
 
-static vector signed short __ATTRS_o_ai vec_nand(vector bool short __a,
-                                                 vector signed short __b) {
+static __inline__ vector signed short __ATTRS_o_ai
+vec_nand(vector bool short __a, vector signed short __b) {
   return ~(__a & __b);
 }
 
-static vector unsigned short __ATTRS_o_ai vec_nand(vector unsigned short __a,
-                                                   vector unsigned short __b) {
+static __inline__ vector unsigned short __ATTRS_o_ai
+vec_nand(vector unsigned short __a, vector unsigned short __b) {
   return ~(__a & __b);
 }
 
-static vector unsigned short __ATTRS_o_ai vec_nand(vector unsigned short __a,
-                                                   vector bool short __b) {
+static __inline__ vector unsigned short __ATTRS_o_ai
+vec_nand(vector unsigned short __a, vector bool short __b) {
   return ~(__a & __b);
-
 }
 
-static vector bool short __ATTRS_o_ai vec_nand(vector bool short __a,
-                                               vector bool short __b) {
+static __inline__ vector bool short __ATTRS_o_ai
+vec_nand(vector bool short __a, vector bool short __b) {
   return ~(__a & __b);
-
 }
 
-static vector signed int __ATTRS_o_ai vec_nand(vector signed int __a,
-                                               vector signed int __b) {
+static __inline__ vector signed int __ATTRS_o_ai
+vec_nand(vector signed int __a, vector signed int __b) {
   return ~(__a & __b);
 }
 
-static vector signed int __ATTRS_o_ai vec_nand(vector signed int __a,
-                                               vector bool int __b) {
+static __inline__ vector signed int __ATTRS_o_ai vec_nand(vector signed int __a,
+                                                          vector bool int __b) {
   return ~(__a & __b);
 }
 
-static vector signed int __ATTRS_o_ai vec_nand(vector bool int __a,
-                                               vector signed int __b) {
+static __inline__ vector signed int __ATTRS_o_ai
+vec_nand(vector bool int __a, vector signed int __b) {
   return ~(__a & __b);
 }
 
-static vector unsigned int __ATTRS_o_ai vec_nand(vector unsigned int __a,
-                                                 vector unsigned int __b) {
+static __inline__ vector unsigned int __ATTRS_o_ai
+vec_nand(vector unsigned int __a, vector unsigned int __b) {
   return ~(__a & __b);
 }
 
-static vector unsigned int __ATTRS_o_ai vec_nand(vector unsigned int __a,
-                                                 vector bool int __b) {
+static __inline__ vector unsigned int __ATTRS_o_ai
+vec_nand(vector unsigned int __a, vector bool int __b) {
   return ~(__a & __b);
 }
 
-static vector unsigned int __ATTRS_o_ai vec_nand(vector bool int __a,
-                                                 vector unsigned int __b) {
+static __inline__ vector unsigned int __ATTRS_o_ai
+vec_nand(vector bool int __a, vector unsigned int __b) {
   return ~(__a & __b);
 }
 
-static vector bool int __ATTRS_o_ai vec_nand(vector bool int __a,
-                                             vector bool int __b) {
+static __inline__ vector bool int __ATTRS_o_ai vec_nand(vector bool int __a,
+                                                        vector bool int __b) {
   return ~(__a & __b);
 }
 
-static vector signed long long __ATTRS_o_ai
+static __inline__ vector signed long long __ATTRS_o_ai
 vec_nand(vector signed long long __a, vector signed long long __b) {
   return ~(__a & __b);
 }
 
-static vector signed long long __ATTRS_o_ai
+static __inline__ vector signed long long __ATTRS_o_ai
 vec_nand(vector signed long long __a, vector bool long long __b) {
   return ~(__a & __b);
 }
 
-static vector signed long long __ATTRS_o_ai
+static __inline__ vector signed long long __ATTRS_o_ai
 vec_nand(vector bool long long __a, vector signed long long __b) {
   return ~(__a & __b);
 }
 
-static vector unsigned long long __ATTRS_o_ai
+static __inline__ vector unsigned long long __ATTRS_o_ai
 vec_nand(vector unsigned long long __a, vector unsigned long long __b) {
   return ~(__a & __b);
 }
 
-static vector unsigned long long __ATTRS_o_ai
+static __inline__ vector unsigned long long __ATTRS_o_ai
 vec_nand(vector unsigned long long __a, vector bool long long __b) {
   return ~(__a & __b);
 }
 
-static vector unsigned long long __ATTRS_o_ai
+static __inline__ vector unsigned long long __ATTRS_o_ai
 vec_nand(vector bool long long __a, vector unsigned long long __b) {
   return ~(__a & __b);
 }
 
-static vector bool long long __ATTRS_o_ai
+static __inline__ vector bool long long __ATTRS_o_ai
 vec_nand(vector bool long long __a, vector bool long long __b) {
   return ~(__a & __b);
 }
@@ -4644,21 +4729,24 @@ vec_nand(vector bool long long __a, vector bool long long __b) {
 /* vec_nmadd */
 
 #ifdef __VSX__
-static vector float __ATTRS_o_ai
-vec_nmadd(vector float __a, vector float __b, vector float __c) {
+static __inline__ vector float __ATTRS_o_ai vec_nmadd(vector float __a,
+                                                      vector float __b,
+                                                      vector float __c) {
   return __builtin_vsx_xvnmaddasp(__a, __b, __c);
 }
 
-static vector double __ATTRS_o_ai
-vec_nmadd(vector double __a, vector double __b, vector double __c) {
+static __inline__ vector double __ATTRS_o_ai vec_nmadd(vector double __a,
+                                                       vector double __b,
+                                                       vector double __c) {
   return __builtin_vsx_xvnmaddadp(__a, __b, __c);
 }
 #endif
 
 /* vec_nmsub */
 
-static vector float __ATTRS_o_ai
-vec_nmsub(vector float __a, vector float __b, vector float __c) {
+static __inline__ vector float __ATTRS_o_ai vec_nmsub(vector float __a,
+                                                      vector float __b,
+                                                      vector float __c) {
 #ifdef __VSX__
   return __builtin_vsx_xvnmsubasp(__a, __b, __c);
 #else
@@ -4667,15 +4755,16 @@ vec_nmsub(vector float __a, vector float __b, vector float __c) {
 }
 
 #ifdef __VSX__
-static vector double __ATTRS_o_ai
-vec_nmsub(vector double __a, vector double __b, vector double __c) {
+static __inline__ vector double __ATTRS_o_ai vec_nmsub(vector double __a,
+                                                       vector double __b,
+                                                       vector double __c) {
   return __builtin_vsx_xvnmsubadp(__a, __b, __c);
 }
 #endif
 
 /* vec_vnmsubfp */
 
-static vector float __attribute__((__always_inline__))
+static __inline__ vector float __attribute__((__always_inline__))
 vec_vnmsubfp(vector float __a, vector float __b, vector float __c) {
   return __builtin_altivec_vnmsubfp(__a, __b, __c);
 }
@@ -4684,58 +4773,61 @@ vec_vnmsubfp(vector float __a, vector float __b, vector float __c) {
 
 #define __builtin_altivec_vnor vec_nor
 
-static vector signed char __ATTRS_o_ai vec_nor(vector signed char __a,
-                                               vector signed char __b) {
+static __inline__ vector signed char __ATTRS_o_ai
+vec_nor(vector signed char __a, vector signed char __b) {
   return ~(__a | __b);
 }
 
-static vector unsigned char __ATTRS_o_ai vec_nor(vector unsigned char __a,
-                                                 vector unsigned char __b) {
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_nor(vector unsigned char __a, vector unsigned char __b) {
   return ~(__a | __b);
 }
 
-static vector bool char __ATTRS_o_ai vec_nor(vector bool char __a,
-                                             vector bool char __b) {
+static __inline__ vector bool char __ATTRS_o_ai vec_nor(vector bool char __a,
+                                                        vector bool char __b) {
   return ~(__a | __b);
 }
 
-static vector short __ATTRS_o_ai vec_nor(vector short __a, vector short __b) {
+static __inline__ vector short __ATTRS_o_ai vec_nor(vector short __a,
+                                                    vector short __b) {
   return ~(__a | __b);
 }
 
-static vector unsigned short __ATTRS_o_ai vec_nor(vector unsigned short __a,
-                                                  vector unsigned short __b) {
+static __inline__ vector unsigned short __ATTRS_o_ai
+vec_nor(vector unsigned short __a, vector unsigned short __b) {
   return ~(__a | __b);
 }
 
-static vector bool short __ATTRS_o_ai vec_nor(vector bool short __a,
-                                              vector bool short __b) {
+static __inline__ vector bool short __ATTRS_o_ai
+vec_nor(vector bool short __a, vector bool short __b) {
   return ~(__a | __b);
 }
 
-static vector int __ATTRS_o_ai vec_nor(vector int __a, vector int __b) {
+static __inline__ vector int __ATTRS_o_ai vec_nor(vector int __a,
+                                                  vector int __b) {
   return ~(__a | __b);
 }
 
-static vector unsigned int __ATTRS_o_ai vec_nor(vector unsigned int __a,
-                                                vector unsigned int __b) {
+static __inline__ vector unsigned int __ATTRS_o_ai
+vec_nor(vector unsigned int __a, vector unsigned int __b) {
   return ~(__a | __b);
 }
 
-static vector bool int __ATTRS_o_ai vec_nor(vector bool int __a,
-                                            vector bool int __b) {
+static __inline__ vector bool int __ATTRS_o_ai vec_nor(vector bool int __a,
+                                                       vector bool int __b) {
   return ~(__a | __b);
 }
 
-static vector float __ATTRS_o_ai vec_nor(vector float __a, vector float __b) {
+static __inline__ vector float __ATTRS_o_ai vec_nor(vector float __a,
+                                                    vector float __b) {
   vector unsigned int __res =
       ~((vector unsigned int)__a | (vector unsigned int)__b);
   return (vector float)__res;
 }
 
 #ifdef __VSX__
-static vector double __ATTRS_o_ai
-vec_nor(vector double __a, vector double __b) {
+static __inline__ vector double __ATTRS_o_ai vec_nor(vector double __a,
+                                                     vector double __b) {
   vector unsigned long long __res =
       ~((vector unsigned long long)__a | (vector unsigned long long)__b);
   return (vector double)__res;
@@ -4744,68 +4836,71 @@ vec_nor(vector double __a, vector double __b) {
 
 /* vec_vnor */
 
-static vector signed char __ATTRS_o_ai vec_vnor(vector signed char __a,
-                                                vector signed char __b) {
+static __inline__ vector signed char __ATTRS_o_ai
+vec_vnor(vector signed char __a, vector signed char __b) {
   return ~(__a | __b);
 }
 
-static vector unsigned char __ATTRS_o_ai vec_vnor(vector unsigned char __a,
-                                                  vector unsigned char __b) {
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_vnor(vector unsigned char __a, vector unsigned char __b) {
   return ~(__a | __b);
 }
 
-static vector bool char __ATTRS_o_ai vec_vnor(vector bool char __a,
-                                              vector bool char __b) {
+static __inline__ vector bool char __ATTRS_o_ai vec_vnor(vector bool char __a,
+                                                         vector bool char __b) {
   return ~(__a | __b);
 }
 
-static vector short __ATTRS_o_ai vec_vnor(vector short __a, vector short __b) {
+static __inline__ vector short __ATTRS_o_ai vec_vnor(vector short __a,
+                                                     vector short __b) {
   return ~(__a | __b);
 }
 
-static vector unsigned short __ATTRS_o_ai vec_vnor(vector unsigned short __a,
-                                                   vector unsigned short __b) {
+static __inline__ vector unsigned short __ATTRS_o_ai
+vec_vnor(vector unsigned short __a, vector unsigned short __b) {
   return ~(__a | __b);
 }
 
-static vector bool short __ATTRS_o_ai vec_vnor(vector bool short __a,
-                                               vector bool short __b) {
+static __inline__ vector bool short __ATTRS_o_ai
+vec_vnor(vector bool short __a, vector bool short __b) {
   return ~(__a | __b);
 }
 
-static vector int __ATTRS_o_ai vec_vnor(vector int __a, vector int __b) {
+static __inline__ vector int __ATTRS_o_ai vec_vnor(vector int __a,
+                                                   vector int __b) {
   return ~(__a | __b);
 }
 
-static vector unsigned int __ATTRS_o_ai vec_vnor(vector unsigned int __a,
-                                                 vector unsigned int __b) {
+static __inline__ vector unsigned int __ATTRS_o_ai
+vec_vnor(vector unsigned int __a, vector unsigned int __b) {
   return ~(__a | __b);
 }
 
-static vector bool int __ATTRS_o_ai vec_vnor(vector bool int __a,
-                                             vector bool int __b) {
+static __inline__ vector bool int __ATTRS_o_ai vec_vnor(vector bool int __a,
+                                                        vector bool int __b) {
   return ~(__a | __b);
 }
 
-static vector float __ATTRS_o_ai vec_vnor(vector float __a, vector float __b) {
+static __inline__ vector float __ATTRS_o_ai vec_vnor(vector float __a,
+                                                     vector float __b) {
   vector unsigned int __res =
       ~((vector unsigned int)__a | (vector unsigned int)__b);
   return (vector float)__res;
 }
 
 #ifdef __VSX__
-static vector signed long long __ATTRS_o_ai
+static __inline__ vector signed long long __ATTRS_o_ai
 vec_nor(vector signed long long __a, vector signed long long __b) {
   return ~(__a | __b);
 }
 
-static vector unsigned long long __ATTRS_o_ai
+static __inline__ vector unsigned long long __ATTRS_o_ai
 vec_nor(vector unsigned long long __a, vector unsigned long long __b) {
   return ~(__a | __b);
 }
 
-static vector bool long long __ATTRS_o_ai vec_nor(vector bool long long __a,
-                                                  vector bool long long __b) {
+static __inline__ vector bool long long __ATTRS_o_ai
+vec_nor(vector bool long long __a, vector bool long long __b) {
   return ~(__a | __b);
 }
 #endif
@@ -4814,315 +4909,323 @@ static vector bool long long __ATTRS_o_ai vec_nor(vector bool long long __a,
 
 #define __builtin_altivec_vor vec_or
 
-static vector signed char __ATTRS_o_ai vec_or(vector signed char __a,
-                                              vector signed char __b) {
+static __inline__ vector signed char __ATTRS_o_ai
+vec_or(vector signed char __a, vector signed char __b) {
   return __a | __b;
 }
 
-static vector signed char __ATTRS_o_ai vec_or(vector bool char __a,
-                                              vector signed char __b) {
+static __inline__ vector signed char __ATTRS_o_ai
+vec_or(vector bool char __a, vector signed char __b) {
   return (vector signed char)__a | __b;
 }
 
-static vector signed char __ATTRS_o_ai vec_or(vector signed char __a,
-                                              vector bool char __b) {
+static __inline__ vector signed char __ATTRS_o_ai vec_or(vector signed char __a,
+                                                         vector bool char __b) {
   return __a | (vector signed char)__b;
 }
 
-static vector unsigned char __ATTRS_o_ai vec_or(vector unsigned char __a,
-                                                vector unsigned char __b) {
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_or(vector unsigned char __a, vector unsigned char __b) {
   return __a | __b;
 }
 
-static vector unsigned char __ATTRS_o_ai vec_or(vector bool char __a,
-                                                vector unsigned char __b) {
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_or(vector bool char __a, vector unsigned char __b) {
   return (vector unsigned char)__a | __b;
 }
 
-static vector unsigned char __ATTRS_o_ai vec_or(vector unsigned char __a,
-                                                vector bool char __b) {
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_or(vector unsigned char __a, vector bool char __b) {
   return __a | (vector unsigned char)__b;
 }
 
-static vector bool char __ATTRS_o_ai vec_or(vector bool char __a,
-                                            vector bool char __b) {
+static __inline__ vector bool char __ATTRS_o_ai vec_or(vector bool char __a,
+                                                       vector bool char __b) {
   return __a | __b;
 }
 
-static vector short __ATTRS_o_ai vec_or(vector short __a, vector short __b) {
+static __inline__ vector short __ATTRS_o_ai vec_or(vector short __a,
+                                                   vector short __b) {
   return __a | __b;
 }
 
-static vector short __ATTRS_o_ai vec_or(vector bool short __a,
-                                        vector short __b) {
+static __inline__ vector short __ATTRS_o_ai vec_or(vector bool short __a,
+                                                   vector short __b) {
   return (vector short)__a | __b;
 }
 
-static vector short __ATTRS_o_ai vec_or(vector short __a,
-                                        vector bool short __b) {
+static __inline__ vector short __ATTRS_o_ai vec_or(vector short __a,
+                                                   vector bool short __b) {
   return __a | (vector short)__b;
 }
 
-static vector unsigned short __ATTRS_o_ai vec_or(vector unsigned short __a,
-                                                 vector unsigned short __b) {
+static __inline__ vector unsigned short __ATTRS_o_ai
+vec_or(vector unsigned short __a, vector unsigned short __b) {
   return __a | __b;
 }
 
-static vector unsigned short __ATTRS_o_ai vec_or(vector bool short __a,
-                                                 vector unsigned short __b) {
+static __inline__ vector unsigned short __ATTRS_o_ai
+vec_or(vector bool short __a, vector unsigned short __b) {
   return (vector unsigned short)__a | __b;
 }
 
-static vector unsigned short __ATTRS_o_ai vec_or(vector unsigned short __a,
-                                                 vector bool short __b) {
+static __inline__ vector unsigned short __ATTRS_o_ai
+vec_or(vector unsigned short __a, vector bool short __b) {
   return __a | (vector unsigned short)__b;
 }
 
-static vector bool short __ATTRS_o_ai vec_or(vector bool short __a,
-                                             vector bool short __b) {
+static __inline__ vector bool short __ATTRS_o_ai vec_or(vector bool short __a,
+                                                        vector bool short __b) {
   return __a | __b;
 }
 
-static vector int __ATTRS_o_ai vec_or(vector int __a, vector int __b) {
+static __inline__ vector int __ATTRS_o_ai vec_or(vector int __a,
+                                                 vector int __b) {
   return __a | __b;
 }
 
-static vector int __ATTRS_o_ai vec_or(vector bool int __a, vector int __b) {
+static __inline__ vector int __ATTRS_o_ai vec_or(vector bool int __a,
+                                                 vector int __b) {
   return (vector int)__a | __b;
 }
 
-static vector int __ATTRS_o_ai vec_or(vector int __a, vector bool int __b) {
+static __inline__ vector int __ATTRS_o_ai vec_or(vector int __a,
+                                                 vector bool int __b) {
   return __a | (vector int)__b;
 }
 
-static vector unsigned int __ATTRS_o_ai vec_or(vector unsigned int __a,
-                                               vector unsigned int __b) {
+static __inline__ vector unsigned int __ATTRS_o_ai
+vec_or(vector unsigned int __a, vector unsigned int __b) {
   return __a | __b;
 }
 
-static vector unsigned int __ATTRS_o_ai vec_or(vector bool int __a,
-                                               vector unsigned int __b) {
+static __inline__ vector unsigned int __ATTRS_o_ai
+vec_or(vector bool int __a, vector unsigned int __b) {
   return (vector unsigned int)__a | __b;
 }
 
-static vector unsigned int __ATTRS_o_ai vec_or(vector unsigned int __a,
-                                               vector bool int __b) {
+static __inline__ vector unsigned int __ATTRS_o_ai
+vec_or(vector unsigned int __a, vector bool int __b) {
   return __a | (vector unsigned int)__b;
 }
 
-static vector bool int __ATTRS_o_ai vec_or(vector bool int __a,
-                                           vector bool int __b) {
+static __inline__ vector bool int __ATTRS_o_ai vec_or(vector bool int __a,
+                                                      vector bool int __b) {
   return __a | __b;
 }
 
-static vector float __ATTRS_o_ai vec_or(vector float __a, vector float __b) {
+static __inline__ vector float __ATTRS_o_ai vec_or(vector float __a,
+                                                   vector float __b) {
   vector unsigned int __res =
       (vector unsigned int)__a | (vector unsigned int)__b;
   return (vector float)__res;
 }
 
-static vector float __ATTRS_o_ai vec_or(vector bool int __a, vector float __b) {
+static __inline__ vector float __ATTRS_o_ai vec_or(vector bool int __a,
+                                                   vector float __b) {
   vector unsigned int __res =
       (vector unsigned int)__a | (vector unsigned int)__b;
   return (vector float)__res;
 }
 
-static vector float __ATTRS_o_ai vec_or(vector float __a, vector bool int __b) {
+static __inline__ vector float __ATTRS_o_ai vec_or(vector float __a,
+                                                   vector bool int __b) {
   vector unsigned int __res =
       (vector unsigned int)__a | (vector unsigned int)__b;
   return (vector float)__res;
 }
 
 #ifdef __VSX__
-static vector double __ATTRS_o_ai vec_or(vector bool long long __a,
-                                         vector double __b) {
+static __inline__ vector double __ATTRS_o_ai vec_or(vector bool long long __a,
+                                                    vector double __b) {
   return (vector unsigned long long)__a | (vector unsigned long long)__b;
 }
 
-static vector double __ATTRS_o_ai vec_or(vector double __a,
-                                         vector bool long long __b) {
+static __inline__ vector double __ATTRS_o_ai vec_or(vector double __a,
+                                                    vector bool long long __b) {
   return (vector unsigned long long)__a | (vector unsigned long long)__b;
 }
 
-static vector double __ATTRS_o_ai vec_or(vector double __a, vector double __b) {
+static __inline__ vector double __ATTRS_o_ai vec_or(vector double __a,
+                                                    vector double __b) {
   vector unsigned long long __res =
       (vector unsigned long long)__a | (vector unsigned long long)__b;
   return (vector double)__res;
 }
 
-static vector signed long long __ATTRS_o_ai
+static __inline__ vector signed long long __ATTRS_o_ai
 vec_or(vector signed long long __a, vector signed long long __b) {
   return __a | __b;
 }
 
-static vector signed long long __ATTRS_o_ai
+static __inline__ vector signed long long __ATTRS_o_ai
 vec_or(vector bool long long __a, vector signed long long __b) {
   return (vector signed long long)__a | __b;
 }
 
-static vector signed long long __ATTRS_o_ai vec_or(vector signed long long __a,
-                                                   vector bool long long __b) {
+static __inline__ vector signed long long __ATTRS_o_ai
+vec_or(vector signed long long __a, vector bool long long __b) {
   return __a | (vector signed long long)__b;
 }
 
-static vector unsigned long long __ATTRS_o_ai
+static __inline__ vector unsigned long long __ATTRS_o_ai
 vec_or(vector unsigned long long __a, vector unsigned long long __b) {
   return __a | __b;
 }
 
-static vector unsigned long long __ATTRS_o_ai
+static __inline__ vector unsigned long long __ATTRS_o_ai
 vec_or(vector bool long long __a, vector unsigned long long __b) {
   return (vector unsigned long long)__a | __b;
 }
 
-static vector unsigned long long __ATTRS_o_ai
+static __inline__ vector unsigned long long __ATTRS_o_ai
 vec_or(vector unsigned long long __a, vector bool long long __b) {
   return __a | (vector unsigned long long)__b;
 }
 
-static vector bool long long __ATTRS_o_ai vec_or(vector bool long long __a,
-                                                 vector bool long long __b) {
+static __inline__ vector bool long long __ATTRS_o_ai
+vec_or(vector bool long long __a, vector bool long long __b) {
   return __a | __b;
 }
 #endif
 
 #ifdef __POWER8_VECTOR__
-static vector signed char __ATTRS_o_ai vec_orc(vector signed char __a,
-                                               vector signed char __b) {
+static __inline__ vector signed char __ATTRS_o_ai
+vec_orc(vector signed char __a, vector signed char __b) {
   return __a | ~__b;
 }
 
-static vector signed char __ATTRS_o_ai vec_orc(vector signed char __a,
-                                               vector bool char __b) {
+static __inline__ vector signed char __ATTRS_o_ai
+vec_orc(vector signed char __a, vector bool char __b) {
   return __a | ~__b;
 }
 
-static vector signed char __ATTRS_o_ai vec_orc(vector bool char __a,
-                                               vector signed char __b) {
+static __inline__ vector signed char __ATTRS_o_ai
+vec_orc(vector bool char __a, vector signed char __b) {
   return __a | ~__b;
 }
 
-static vector unsigned char __ATTRS_o_ai vec_orc(vector unsigned char __a,
-                                                 vector unsigned char __b) {
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_orc(vector unsigned char __a, vector unsigned char __b) {
   return __a | ~__b;
 }
 
-static vector unsigned char __ATTRS_o_ai vec_orc(vector unsigned char __a,
-                                                 vector bool char __b) {
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_orc(vector unsigned char __a, vector bool char __b) {
   return __a | ~__b;
 }
 
-static vector unsigned char __ATTRS_o_ai vec_orc(vector bool char __a,
-                                                 vector unsigned char __b) {
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_orc(vector bool char __a, vector unsigned char __b) {
   return __a | ~__b;
 }
 
-static vector bool char __ATTRS_o_ai vec_orc(vector bool char __a,
-                                             vector bool char __b) {
+static __inline__ vector bool char __ATTRS_o_ai vec_orc(vector bool char __a,
+                                                        vector bool char __b) {
   return __a | ~__b;
 }
 
-static vector signed short __ATTRS_o_ai vec_orc(vector signed short __a,
-                                                vector signed short __b) {
+static __inline__ vector signed short __ATTRS_o_ai
+vec_orc(vector signed short __a, vector signed short __b) {
   return __a | ~__b;
 }
 
-static vector signed short __ATTRS_o_ai vec_orc(vector signed short __a,
-                                                vector bool short __b) {
+static __inline__ vector signed short __ATTRS_o_ai
+vec_orc(vector signed short __a, vector bool short __b) {
   return __a | ~__b;
 }
 
-static vector signed short __ATTRS_o_ai vec_orc(vector bool short __a,
-                                                vector signed short __b) {
+static __inline__ vector signed short __ATTRS_o_ai
+vec_orc(vector bool short __a, vector signed short __b) {
   return __a | ~__b;
 }
 
-static vector unsigned short __ATTRS_o_ai vec_orc(vector unsigned short __a,
-                                                  vector unsigned short __b) {
+static __inline__ vector unsigned short __ATTRS_o_ai
+vec_orc(vector unsigned short __a, vector unsigned short __b) {
   return __a | ~__b;
 }
 
-static vector unsigned short __ATTRS_o_ai vec_orc(vector unsigned short __a,
-                                                  vector bool short __b) {
+static __inline__ vector unsigned short __ATTRS_o_ai
+vec_orc(vector unsigned short __a, vector bool short __b) {
   return __a | ~__b;
 }
 
-static vector unsigned short __ATTRS_o_ai
+static __inline__ vector unsigned short __ATTRS_o_ai
 vec_orc(vector bool short __a, vector unsigned short __b) {
   return __a | ~__b;
 }
 
-static vector bool short __ATTRS_o_ai vec_orc(vector bool short __a,
-                                              vector bool short __b) {
+static __inline__ vector bool short __ATTRS_o_ai
+vec_orc(vector bool short __a, vector bool short __b) {
   return __a | ~__b;
 }
 
-static vector signed int __ATTRS_o_ai vec_orc(vector signed int __a,
-                                              vector signed int __b) {
+static __inline__ vector signed int __ATTRS_o_ai
+vec_orc(vector signed int __a, vector signed int __b) {
   return __a | ~__b;
 }
 
-static vector signed int __ATTRS_o_ai vec_orc(vector signed int __a,
-                                              vector bool int __b) {
+static __inline__ vector signed int __ATTRS_o_ai vec_orc(vector signed int __a,
+                                                         vector bool int __b) {
   return __a | ~__b;
 }
 
-static vector signed int __ATTRS_o_ai vec_orc(vector bool int __a,
-                                              vector signed int __b) {
+static __inline__ vector signed int __ATTRS_o_ai
+vec_orc(vector bool int __a, vector signed int __b) {
   return __a | ~__b;
 }
 
-static vector unsigned int __ATTRS_o_ai vec_orc(vector unsigned int __a,
-                                                vector unsigned int __b) {
+static __inline__ vector unsigned int __ATTRS_o_ai
+vec_orc(vector unsigned int __a, vector unsigned int __b) {
   return __a | ~__b;
 }
 
-static vector unsigned int __ATTRS_o_ai vec_orc(vector unsigned int __a,
-                                                vector bool int __b) {
+static __inline__ vector unsigned int __ATTRS_o_ai
+vec_orc(vector unsigned int __a, vector bool int __b) {
   return __a | ~__b;
 }
 
-static vector unsigned int __ATTRS_o_ai vec_orc(vector bool int __a,
-                                                vector unsigned int __b) {
+static __inline__ vector unsigned int __ATTRS_o_ai
+vec_orc(vector bool int __a, vector unsigned int __b) {
   return __a | ~__b;
 }
 
-static vector bool int __ATTRS_o_ai vec_orc(vector bool int __a,
-                                            vector bool int __b) {
+static __inline__ vector bool int __ATTRS_o_ai vec_orc(vector bool int __a,
+                                                       vector bool int __b) {
   return __a | ~__b;
 }
 
-static vector signed long long __ATTRS_o_ai
+static __inline__ vector signed long long __ATTRS_o_ai
 vec_orc(vector signed long long __a, vector signed long long __b) {
   return __a | ~__b;
 }
 
-static vector signed long long __ATTRS_o_ai vec_orc(vector signed long long __a,
-                                                    vector bool long long __b) {
+static __inline__ vector signed long long __ATTRS_o_ai
+vec_orc(vector signed long long __a, vector bool long long __b) {
   return __a | ~__b;
 }
 
-static vector signed long long __ATTRS_o_ai
+static __inline__ vector signed long long __ATTRS_o_ai
 vec_orc(vector bool long long __a, vector signed long long __b) {
   return __a | ~__b;
 }
 
-static vector unsigned long long __ATTRS_o_ai
+static __inline__ vector unsigned long long __ATTRS_o_ai
 vec_orc(vector unsigned long long __a, vector unsigned long long __b) {
   return __a | ~__b;
 }
 
-static vector unsigned long long __ATTRS_o_ai
+static __inline__ vector unsigned long long __ATTRS_o_ai
 vec_orc(vector unsigned long long __a, vector bool long long __b) {
   return __a | ~__b;
 }
 
-static vector unsigned long long __ATTRS_o_ai
+static __inline__ vector unsigned long long __ATTRS_o_ai
 vec_orc(vector bool long long __a, vector unsigned long long __b) {
   return __a | ~__b;
 }
 
-static vector bool long long __ATTRS_o_ai
+static __inline__ vector bool long long __ATTRS_o_ai
 vec_orc(vector bool long long __a, vector bool long long __b) {
   return __a | ~__b;
 }
@@ -5130,160 +5233,165 @@ vec_orc(vector bool long long __a, vector bool long long __b) {
 
 /* vec_vor */
 
-static vector signed char __ATTRS_o_ai vec_vor(vector signed char __a,
-                                               vector signed char __b) {
+static __inline__ vector signed char __ATTRS_o_ai
+vec_vor(vector signed char __a, vector signed char __b) {
   return __a | __b;
 }
 
-static vector signed char __ATTRS_o_ai vec_vor(vector bool char __a,
-                                               vector signed char __b) {
+static __inline__ vector signed char __ATTRS_o_ai
+vec_vor(vector bool char __a, vector signed char __b) {
   return (vector signed char)__a | __b;
 }
 
-static vector signed char __ATTRS_o_ai vec_vor(vector signed char __a,
-                                               vector bool char __b) {
+static __inline__ vector signed char __ATTRS_o_ai
+vec_vor(vector signed char __a, vector bool char __b) {
   return __a | (vector signed char)__b;
 }
 
-static vector unsigned char __ATTRS_o_ai vec_vor(vector unsigned char __a,
-                                                 vector unsigned char __b) {
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_vor(vector unsigned char __a, vector unsigned char __b) {
   return __a | __b;
 }
 
-static vector unsigned char __ATTRS_o_ai vec_vor(vector bool char __a,
-                                                 vector unsigned char __b) {
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_vor(vector bool char __a, vector unsigned char __b) {
   return (vector unsigned char)__a | __b;
 }
 
-static vector unsigned char __ATTRS_o_ai vec_vor(vector unsigned char __a,
-                                                 vector bool char __b) {
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_vor(vector unsigned char __a, vector bool char __b) {
   return __a | (vector unsigned char)__b;
 }
 
-static vector bool char __ATTRS_o_ai vec_vor(vector bool char __a,
-                                             vector bool char __b) {
+static __inline__ vector bool char __ATTRS_o_ai vec_vor(vector bool char __a,
+                                                        vector bool char __b) {
   return __a | __b;
 }
 
-static vector short __ATTRS_o_ai vec_vor(vector short __a, vector short __b) {
+static __inline__ vector short __ATTRS_o_ai vec_vor(vector short __a,
+                                                    vector short __b) {
   return __a | __b;
 }
 
-static vector short __ATTRS_o_ai vec_vor(vector bool short __a,
-                                         vector short __b) {
+static __inline__ vector short __ATTRS_o_ai vec_vor(vector bool short __a,
+                                                    vector short __b) {
   return (vector short)__a | __b;
 }
 
-static vector short __ATTRS_o_ai vec_vor(vector short __a,
-                                         vector bool short __b) {
+static __inline__ vector short __ATTRS_o_ai vec_vor(vector short __a,
+                                                    vector bool short __b) {
   return __a | (vector short)__b;
 }
 
-static vector unsigned short __ATTRS_o_ai vec_vor(vector unsigned short __a,
-                                                  vector unsigned short __b) {
+static __inline__ vector unsigned short __ATTRS_o_ai
+vec_vor(vector unsigned short __a, vector unsigned short __b) {
   return __a | __b;
 }
 
-static vector unsigned short __ATTRS_o_ai vec_vor(vector bool short __a,
-                                                  vector unsigned short __b) {
+static __inline__ vector unsigned short __ATTRS_o_ai
+vec_vor(vector bool short __a, vector unsigned short __b) {
   return (vector unsigned short)__a | __b;
 }
 
-static vector unsigned short __ATTRS_o_ai vec_vor(vector unsigned short __a,
-                                                  vector bool short __b) {
+static __inline__ vector unsigned short __ATTRS_o_ai
+vec_vor(vector unsigned short __a, vector bool short __b) {
   return __a | (vector unsigned short)__b;
 }
 
-static vector bool short __ATTRS_o_ai vec_vor(vector bool short __a,
-                                              vector bool short __b) {
+static __inline__ vector bool short __ATTRS_o_ai
+vec_vor(vector bool short __a, vector bool short __b) {
   return __a | __b;
 }
 
-static vector int __ATTRS_o_ai vec_vor(vector int __a, vector int __b) {
+static __inline__ vector int __ATTRS_o_ai vec_vor(vector int __a,
+                                                  vector int __b) {
   return __a | __b;
 }
 
-static vector int __ATTRS_o_ai vec_vor(vector bool int __a, vector int __b) {
+static __inline__ vector int __ATTRS_o_ai vec_vor(vector bool int __a,
+                                                  vector int __b) {
   return (vector int)__a | __b;
 }
 
-static vector int __ATTRS_o_ai vec_vor(vector int __a, vector bool int __b) {
+static __inline__ vector int __ATTRS_o_ai vec_vor(vector int __a,
+                                                  vector bool int __b) {
   return __a | (vector int)__b;
 }
 
-static vector unsigned int __ATTRS_o_ai vec_vor(vector unsigned int __a,
-                                                vector unsigned int __b) {
+static __inline__ vector unsigned int __ATTRS_o_ai
+vec_vor(vector unsigned int __a, vector unsigned int __b) {
   return __a | __b;
 }
 
-static vector unsigned int __ATTRS_o_ai vec_vor(vector bool int __a,
-                                                vector unsigned int __b) {
+static __inline__ vector unsigned int __ATTRS_o_ai
+vec_vor(vector bool int __a, vector unsigned int __b) {
   return (vector unsigned int)__a | __b;
 }
 
-static vector unsigned int __ATTRS_o_ai vec_vor(vector unsigned int __a,
-                                                vector bool int __b) {
+static __inline__ vector unsigned int __ATTRS_o_ai
+vec_vor(vector unsigned int __a, vector bool int __b) {
   return __a | (vector unsigned int)__b;
 }
 
-static vector bool int __ATTRS_o_ai vec_vor(vector bool int __a,
-                                            vector bool int __b) {
+static __inline__ vector bool int __ATTRS_o_ai vec_vor(vector bool int __a,
+                                                       vector bool int __b) {
   return __a | __b;
 }
 
-static vector float __ATTRS_o_ai vec_vor(vector float __a, vector float __b) {
+static __inline__ vector float __ATTRS_o_ai vec_vor(vector float __a,
+                                                    vector float __b) {
   vector unsigned int __res =
       (vector unsigned int)__a | (vector unsigned int)__b;
   return (vector float)__res;
 }
 
-static vector float __ATTRS_o_ai vec_vor(vector bool int __a,
-                                         vector float __b) {
+static __inline__ vector float __ATTRS_o_ai vec_vor(vector bool int __a,
+                                                    vector float __b) {
   vector unsigned int __res =
       (vector unsigned int)__a | (vector unsigned int)__b;
   return (vector float)__res;
 }
 
-static vector float __ATTRS_o_ai vec_vor(vector float __a,
-                                         vector bool int __b) {
+static __inline__ vector float __ATTRS_o_ai vec_vor(vector float __a,
+                                                    vector bool int __b) {
   vector unsigned int __res =
       (vector unsigned int)__a | (vector unsigned int)__b;
   return (vector float)__res;
 }
 
 #ifdef __VSX__
-static vector signed long long __ATTRS_o_ai
+static __inline__ vector signed long long __ATTRS_o_ai
 vec_vor(vector signed long long __a, vector signed long long __b) {
   return __a | __b;
 }
 
-static vector signed long long __ATTRS_o_ai
+static __inline__ vector signed long long __ATTRS_o_ai
 vec_vor(vector bool long long __a, vector signed long long __b) {
   return (vector signed long long)__a | __b;
 }
 
-static vector signed long long __ATTRS_o_ai vec_vor(vector signed long long __a,
-                                                    vector bool long long __b) {
+static __inline__ vector signed long long __ATTRS_o_ai
+vec_vor(vector signed long long __a, vector bool long long __b) {
   return __a | (vector signed long long)__b;
 }
 
-static vector unsigned long long __ATTRS_o_ai
+static __inline__ vector unsigned long long __ATTRS_o_ai
 vec_vor(vector unsigned long long __a, vector unsigned long long __b) {
   return __a | __b;
 }
 
-static vector unsigned long long __ATTRS_o_ai
+static __inline__ vector unsigned long long __ATTRS_o_ai
 vec_vor(vector bool long long __a, vector unsigned long long __b) {
   return (vector unsigned long long)__a | __b;
 }
 
-static vector unsigned long long __ATTRS_o_ai
+static __inline__ vector unsigned long long __ATTRS_o_ai
 vec_vor(vector unsigned long long __a, vector bool long long __b) {
   return __a | (vector unsigned long long)__b;
 }
 
-static vector bool long long __ATTRS_o_ai vec_vor(vector bool long long __a,
-                                                  vector bool long long __b) {
+static __inline__ vector bool long long __ATTRS_o_ai
+vec_vor(vector bool long long __a, vector bool long long __b) {
   return __a | __b;
 }
 #endif
@@ -5293,8 +5401,8 @@ static vector bool long long __ATTRS_o_ai vec_vor(vector bool long long __a,
 /* The various vector pack instructions have a big-endian bias, so for
    little endian we must handle reversed element numbering.  */
 
-static vector signed char __ATTRS_o_ai vec_pack(vector signed short __a,
-                                                vector signed short __b) {
+static __inline__ vector signed char __ATTRS_o_ai
+vec_pack(vector signed short __a, vector signed short __b) {
 #ifdef __LITTLE_ENDIAN__
   return (vector signed char)vec_perm(
       __a, __b,
@@ -5308,8 +5416,8 @@ static vector signed char __ATTRS_o_ai vec_pack(vector signed short __a,
 #endif
 }
 
-static vector unsigned char __ATTRS_o_ai vec_pack(vector unsigned short __a,
-                                                  vector unsigned short __b) {
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_pack(vector unsigned short __a, vector unsigned short __b) {
 #ifdef __LITTLE_ENDIAN__
   return (vector unsigned char)vec_perm(
       __a, __b,
@@ -5323,8 +5431,8 @@ static vector unsigned char __ATTRS_o_ai vec_pack(vector unsigned short __a,
 #endif
 }
 
-static vector bool char __ATTRS_o_ai vec_pack(vector bool short __a,
-                                              vector bool short __b) {
+static __inline__ vector bool char __ATTRS_o_ai
+vec_pack(vector bool short __a, vector bool short __b) {
 #ifdef __LITTLE_ENDIAN__
   return (vector bool char)vec_perm(
       __a, __b,
@@ -5338,7 +5446,8 @@ static vector bool char __ATTRS_o_ai vec_pack(vector bool short __a,
 #endif
 }
 
-static vector short __ATTRS_o_ai vec_pack(vector int __a, vector int __b) {
+static __inline__ vector short __ATTRS_o_ai vec_pack(vector int __a,
+                                                     vector int __b) {
 #ifdef __LITTLE_ENDIAN__
   return (vector short)vec_perm(
       __a, __b,
@@ -5352,8 +5461,8 @@ static vector short __ATTRS_o_ai vec_pack(vector int __a, vector int __b) {
 #endif
 }
 
-static vector unsigned short __ATTRS_o_ai vec_pack(vector unsigned int __a,
-                                                   vector unsigned int __b) {
+static __inline__ vector unsigned short __ATTRS_o_ai
+vec_pack(vector unsigned int __a, vector unsigned int __b) {
 #ifdef __LITTLE_ENDIAN__
   return (vector unsigned short)vec_perm(
       __a, __b,
@@ -5367,8 +5476,8 @@ static vector unsigned short __ATTRS_o_ai vec_pack(vector unsigned int __a,
 #endif
 }
 
-static vector bool short __ATTRS_o_ai vec_pack(vector bool int __a,
-                                               vector bool int __b) {
+static __inline__ vector bool short __ATTRS_o_ai vec_pack(vector bool int __a,
+                                                          vector bool int __b) {
 #ifdef __LITTLE_ENDIAN__
   return (vector bool short)vec_perm(
       __a, __b,
@@ -5383,8 +5492,8 @@ static vector bool short __ATTRS_o_ai vec_pack(vector bool int __a,
 }
 
 #ifdef __VSX__
-static vector signed int __ATTRS_o_ai vec_pack(vector signed long long __a,
-                                               vector signed long long __b) {
+static __inline__ vector signed int __ATTRS_o_ai
+vec_pack(vector signed long long __a, vector signed long long __b) {
 #ifdef __LITTLE_ENDIAN__
   return (vector signed int)vec_perm(
       __a, __b,
@@ -5397,7 +5506,7 @@ static vector signed int __ATTRS_o_ai vec_pack(vector signed long long __a,
                              0x14, 0x15, 0x16, 0x17, 0x1C, 0x1D, 0x1E, 0x1F));
 #endif
 }
-static vector unsigned int __ATTRS_o_ai
+static __inline__ vector unsigned int __ATTRS_o_ai
 vec_pack(vector unsigned long long __a, vector unsigned long long __b) {
 #ifdef __LITTLE_ENDIAN__
   return (vector unsigned int)vec_perm(
@@ -5412,8 +5521,8 @@ vec_pack(vector unsigned long long __a, vector unsigned long long __b) {
 #endif
 }
 
-static vector bool int __ATTRS_o_ai vec_pack(vector bool long long __a,
-                                             vector bool long long __b) {
+static __inline__ vector bool int __ATTRS_o_ai
+vec_pack(vector bool long long __a, vector bool long long __b) {
 #ifdef __LITTLE_ENDIAN__
   return (vector bool int)vec_perm(
       __a, __b,
@@ -5433,8 +5542,8 @@ static vector bool int __ATTRS_o_ai vec_pack(vector bool long long __a,
 
 #define __builtin_altivec_vpkuhum vec_vpkuhum
 
-static vector signed char __ATTRS_o_ai vec_vpkuhum(vector signed short __a,
-                                                   vector signed short __b) {
+static __inline__ vector signed char __ATTRS_o_ai
+vec_vpkuhum(vector signed short __a, vector signed short __b) {
 #ifdef __LITTLE_ENDIAN__
   return (vector signed char)vec_perm(
       __a, __b,
@@ -5448,7 +5557,7 @@ static vector signed char __ATTRS_o_ai vec_vpkuhum(vector signed short __a,
 #endif
 }
 
-static vector unsigned char __ATTRS_o_ai
+static __inline__ vector unsigned char __ATTRS_o_ai
 vec_vpkuhum(vector unsigned short __a, vector unsigned short __b) {
 #ifdef __LITTLE_ENDIAN__
   return (vector unsigned char)vec_perm(
@@ -5463,8 +5572,8 @@ vec_vpkuhum(vector unsigned short __a, vector unsigned short __b) {
 #endif
 }
 
-static vector bool char __ATTRS_o_ai vec_vpkuhum(vector bool short __a,
-                                                 vector bool short __b) {
+static __inline__ vector bool char __ATTRS_o_ai
+vec_vpkuhum(vector bool short __a, vector bool short __b) {
 #ifdef __LITTLE_ENDIAN__
   return (vector bool char)vec_perm(
       __a, __b,
@@ -5482,7 +5591,8 @@ static vector bool char __ATTRS_o_ai vec_vpkuhum(vector bool short __a,
 
 #define __builtin_altivec_vpkuwum vec_vpkuwum
 
-static vector short __ATTRS_o_ai vec_vpkuwum(vector int __a, vector int __b) {
+static __inline__ vector short __ATTRS_o_ai vec_vpkuwum(vector int __a,
+                                                        vector int __b) {
 #ifdef __LITTLE_ENDIAN__
   return (vector short)vec_perm(
       __a, __b,
@@ -5496,8 +5606,8 @@ static vector short __ATTRS_o_ai vec_vpkuwum(vector int __a, vector int __b) {
 #endif
 }
 
-static vector unsigned short __ATTRS_o_ai vec_vpkuwum(vector unsigned int __a,
-                                                      vector unsigned int __b) {
+static __inline__ vector unsigned short __ATTRS_o_ai
+vec_vpkuwum(vector unsigned int __a, vector unsigned int __b) {
 #ifdef __LITTLE_ENDIAN__
   return (vector unsigned short)vec_perm(
       __a, __b,
@@ -5511,8 +5621,8 @@ static vector unsigned short __ATTRS_o_ai vec_vpkuwum(vector unsigned int __a,
 #endif
 }
 
-static vector bool short __ATTRS_o_ai vec_vpkuwum(vector bool int __a,
-                                                  vector bool int __b) {
+static __inline__ vector bool short __ATTRS_o_ai
+vec_vpkuwum(vector bool int __a, vector bool int __b) {
 #ifdef __LITTLE_ENDIAN__
   return (vector bool short)vec_perm(
       __a, __b,
@@ -5531,8 +5641,8 @@ static vector bool short __ATTRS_o_ai vec_vpkuwum(vector bool int __a,
 #ifdef __POWER8_VECTOR__
 #define __builtin_altivec_vpkudum vec_vpkudum
 
-static vector int __ATTRS_o_ai vec_vpkudum(vector long long __a,
-                                           vector long long __b) {
+static __inline__ vector int __ATTRS_o_ai vec_vpkudum(vector long long __a,
+                                                      vector long long __b) {
 #ifdef __LITTLE_ENDIAN__
   return (vector int)vec_perm(
       __a, __b,
@@ -5546,7 +5656,7 @@ static vector int __ATTRS_o_ai vec_vpkudum(vector long long __a,
 #endif
 }
 
-static vector unsigned int __ATTRS_o_ai
+static __inline__ vector unsigned int __ATTRS_o_ai
 vec_vpkudum(vector unsigned long long __a, vector unsigned long long __b) {
 #ifdef __LITTLE_ENDIAN__
   return (vector unsigned int)vec_perm(
@@ -5561,8 +5671,8 @@ vec_vpkudum(vector unsigned long long __a, vector unsigned long long __b) {
 #endif
 }
 
-static vector bool int __ATTRS_o_ai vec_vpkudum(vector bool long long __a,
-                                                vector bool long long __b) {
+static __inline__ vector bool int __ATTRS_o_ai
+vec_vpkudum(vector bool long long __a, vector bool long long __b) {
 #ifdef __LITTLE_ENDIAN__
   return (vector bool int)vec_perm(
       (vector long long)__a, (vector long long)__b,
@@ -5579,7 +5689,7 @@ static vector bool int __ATTRS_o_ai vec_vpkudum(vector bool long long __a,
 
 /* vec_packpx */
 
-static vector pixel __attribute__((__always_inline__))
+static __inline__ vector pixel __attribute__((__always_inline__))
 vec_packpx(vector unsigned int __a, vector unsigned int __b) {
 #ifdef __LITTLE_ENDIAN__
   return (vector pixel)__builtin_altivec_vpkpx(__b, __a);
@@ -5590,7 +5700,7 @@ vec_packpx(vector unsigned int __a, vector unsigned int __b) {
 
 /* vec_vpkpx */
 
-static vector pixel __attribute__((__always_inline__))
+static __inline__ vector pixel __attribute__((__always_inline__))
 vec_vpkpx(vector unsigned int __a, vector unsigned int __b) {
 #ifdef __LITTLE_ENDIAN__
   return (vector pixel)__builtin_altivec_vpkpx(__b, __a);
@@ -5601,8 +5711,8 @@ vec_vpkpx(vector unsigned int __a, vector unsigned int __b) {
 
 /* vec_packs */
 
-static vector signed char __ATTRS_o_ai vec_packs(vector short __a,
-                                                 vector short __b) {
+static __inline__ vector signed char __ATTRS_o_ai vec_packs(vector short __a,
+                                                            vector short __b) {
 #ifdef __LITTLE_ENDIAN__
   return __builtin_altivec_vpkshss(__b, __a);
 #else
@@ -5610,8 +5720,8 @@ static vector signed char __ATTRS_o_ai vec_packs(vector short __a,
 #endif
 }
 
-static vector unsigned char __ATTRS_o_ai vec_packs(vector unsigned short __a,
-                                                   vector unsigned short __b) {
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_packs(vector unsigned short __a, vector unsigned short __b) {
 #ifdef __LITTLE_ENDIAN__
   return __builtin_altivec_vpkuhus(__b, __a);
 #else
@@ -5619,8 +5729,8 @@ static vector unsigned char __ATTRS_o_ai vec_packs(vector unsigned short __a,
 #endif
 }
 
-static vector signed short __ATTRS_o_ai vec_packs(vector int __a,
-                                                  vector int __b) {
+static __inline__ vector signed short __ATTRS_o_ai vec_packs(vector int __a,
+                                                             vector int __b) {
 #ifdef __LITTLE_ENDIAN__
   return __builtin_altivec_vpkswss(__b, __a);
 #else
@@ -5628,8 +5738,8 @@ static vector signed short __ATTRS_o_ai vec_packs(vector int __a,
 #endif
 }
 
-static vector unsigned short __ATTRS_o_ai vec_packs(vector unsigned int __a,
-                                                    vector unsigned int __b) {
+static __inline__ vector unsigned short __ATTRS_o_ai
+vec_packs(vector unsigned int __a, vector unsigned int __b) {
 #ifdef __LITTLE_ENDIAN__
   return __builtin_altivec_vpkuwus(__b, __a);
 #else
@@ -5638,8 +5748,8 @@ static vector unsigned short __ATTRS_o_ai vec_packs(vector unsigned int __a,
 }
 
 #ifdef __POWER8_VECTOR__
-static vector int __ATTRS_o_ai vec_packs(vector long long __a,
-                                         vector long long __b) {
+static __inline__ vector int __ATTRS_o_ai vec_packs(vector long long __a,
+                                                    vector long long __b) {
 #ifdef __LITTLE_ENDIAN__
   return __builtin_altivec_vpksdss(__b, __a);
 #else
@@ -5647,7 +5757,7 @@ static vector int __ATTRS_o_ai vec_packs(vector long long __a,
 #endif
 }
 
-static vector unsigned int __ATTRS_o_ai
+static __inline__ vector unsigned int __ATTRS_o_ai
 vec_packs(vector unsigned long long __a, vector unsigned long long __b) {
 #ifdef __LITTLE_ENDIAN__
   return __builtin_altivec_vpkudus(__b, __a);
@@ -5659,7 +5769,7 @@ vec_packs(vector unsigned long long __a, vector unsigned long long __b) {
 
 /* vec_vpkshss */
 
-static vector signed char __attribute__((__always_inline__))
+static __inline__ vector signed char __attribute__((__always_inline__))
 vec_vpkshss(vector short __a, vector short __b) {
 #ifdef __LITTLE_ENDIAN__
   return __builtin_altivec_vpkshss(__b, __a);
@@ -5671,8 +5781,8 @@ vec_vpkshss(vector short __a, vector short __b) {
 /* vec_vpksdss */
 
 #ifdef __POWER8_VECTOR__
-static vector int __ATTRS_o_ai vec_vpksdss(vector long long __a,
-                                           vector long long __b) {
+static __inline__ vector int __ATTRS_o_ai vec_vpksdss(vector long long __a,
+                                                      vector long long __b) {
 #ifdef __LITTLE_ENDIAN__
   return __builtin_altivec_vpksdss(__b, __a);
 #else
@@ -5683,7 +5793,7 @@ static vector int __ATTRS_o_ai vec_vpksdss(vector long long __a,
 
 /* vec_vpkuhus */
 
-static vector unsigned char __attribute__((__always_inline__))
+static __inline__ vector unsigned char __attribute__((__always_inline__))
 vec_vpkuhus(vector unsigned short __a, vector unsigned short __b) {
 #ifdef __LITTLE_ENDIAN__
   return __builtin_altivec_vpkuhus(__b, __a);
@@ -5695,7 +5805,7 @@ vec_vpkuhus(vector unsigned short __a, vector unsigned short __b) {
 /* vec_vpkudus */
 
 #ifdef __POWER8_VECTOR__
-static vector unsigned int __attribute__((__always_inline__))
+static __inline__ vector unsigned int __attribute__((__always_inline__))
 vec_vpkudus(vector unsigned long long __a, vector unsigned long long __b) {
 #ifdef __LITTLE_ENDIAN__
   return __builtin_altivec_vpkudus(__b, __a);
@@ -5707,7 +5817,7 @@ vec_vpkudus(vector unsigned long long __a, vector unsigned long long __b) {
 
 /* vec_vpkswss */
 
-static vector signed short __attribute__((__always_inline__))
+static __inline__ vector signed short __attribute__((__always_inline__))
 vec_vpkswss(vector int __a, vector int __b) {
 #ifdef __LITTLE_ENDIAN__
   return __builtin_altivec_vpkswss(__b, __a);
@@ -5718,7 +5828,7 @@ vec_vpkswss(vector int __a, vector int __b) {
 
 /* vec_vpkuwus */
 
-static vector unsigned short __attribute__((__always_inline__))
+static __inline__ vector unsigned short __attribute__((__always_inline__))
 vec_vpkuwus(vector unsigned int __a, vector unsigned int __b) {
 #ifdef __LITTLE_ENDIAN__
   return __builtin_altivec_vpkuwus(__b, __a);
@@ -5729,8 +5839,8 @@ vec_vpkuwus(vector unsigned int __a, vector unsigned int __b) {
 
 /* vec_packsu */
 
-static vector unsigned char __ATTRS_o_ai vec_packsu(vector short __a,
-                                                    vector short __b) {
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_packsu(vector short __a, vector short __b) {
 #ifdef __LITTLE_ENDIAN__
   return __builtin_altivec_vpkshus(__b, __a);
 #else
@@ -5738,8 +5848,8 @@ static vector unsigned char __ATTRS_o_ai vec_packsu(vector short __a,
 #endif
 }
 
-static vector unsigned char __ATTRS_o_ai vec_packsu(vector unsigned short __a,
-                                                    vector unsigned short __b) {
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_packsu(vector unsigned short __a, vector unsigned short __b) {
 #ifdef __LITTLE_ENDIAN__
   return __builtin_altivec_vpkuhus(__b, __a);
 #else
@@ -5747,8 +5857,8 @@ static vector unsigned char __ATTRS_o_ai vec_packsu(vector unsigned short __a,
 #endif
 }
 
-static vector unsigned short __ATTRS_o_ai vec_packsu(vector int __a,
-                                                     vector int __b) {
+static __inline__ vector unsigned short __ATTRS_o_ai
+vec_packsu(vector int __a, vector int __b) {
 #ifdef __LITTLE_ENDIAN__
   return __builtin_altivec_vpkswus(__b, __a);
 #else
@@ -5756,8 +5866,8 @@ static vector unsigned short __ATTRS_o_ai vec_packsu(vector int __a,
 #endif
 }
 
-static vector unsigned short __ATTRS_o_ai vec_packsu(vector unsigned int __a,
-                                                     vector unsigned int __b) {
+static __inline__ vector unsigned short __ATTRS_o_ai
+vec_packsu(vector unsigned int __a, vector unsigned int __b) {
 #ifdef __LITTLE_ENDIAN__
   return __builtin_altivec_vpkuwus(__b, __a);
 #else
@@ -5766,8 +5876,8 @@ static vector unsigned short __ATTRS_o_ai vec_packsu(vector unsigned int __a,
 }
 
 #ifdef __POWER8_VECTOR__
-static vector unsigned int __ATTRS_o_ai vec_packsu(vector long long __a,
-                                                   vector long long __b) {
+static __inline__ vector unsigned int __ATTRS_o_ai
+vec_packsu(vector long long __a, vector long long __b) {
 #ifdef __LITTLE_ENDIAN__
   return __builtin_altivec_vpksdus(__b, __a);
 #else
@@ -5775,7 +5885,7 @@ static vector unsigned int __ATTRS_o_ai vec_packsu(vector long long __a,
 #endif
 }
 
-static vector unsigned int __ATTRS_o_ai
+static __inline__ vector unsigned int __ATTRS_o_ai
 vec_packsu(vector unsigned long long __a, vector unsigned long long __b) {
 #ifdef __LITTLE_ENDIAN__
   return __builtin_altivec_vpkudus(__b, __a);
@@ -5787,8 +5897,8 @@ vec_packsu(vector unsigned long long __a, vector unsigned long long __b) {
 
 /* vec_vpkshus */
 
-static vector unsigned char __ATTRS_o_ai vec_vpkshus(vector short __a,
-                                                     vector short __b) {
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_vpkshus(vector short __a, vector short __b) {
 #ifdef __LITTLE_ENDIAN__
   return __builtin_altivec_vpkshus(__b, __a);
 #else
@@ -5796,7 +5906,7 @@ static vector unsigned char __ATTRS_o_ai vec_vpkshus(vector short __a,
 #endif
 }
 
-static vector unsigned char __ATTRS_o_ai
+static __inline__ vector unsigned char __ATTRS_o_ai
 vec_vpkshus(vector unsigned short __a, vector unsigned short __b) {
 #ifdef __LITTLE_ENDIAN__
   return __builtin_altivec_vpkuhus(__b, __a);
@@ -5807,8 +5917,8 @@ vec_vpkshus(vector unsigned short __a, vector unsigned short __b) {
 
 /* vec_vpkswus */
 
-static vector unsigned short __ATTRS_o_ai vec_vpkswus(vector int __a,
-                                                      vector int __b) {
+static __inline__ vector unsigned short __ATTRS_o_ai
+vec_vpkswus(vector int __a, vector int __b) {
 #ifdef __LITTLE_ENDIAN__
   return __builtin_altivec_vpkswus(__b, __a);
 #else
@@ -5816,8 +5926,8 @@ static vector unsigned short __ATTRS_o_ai vec_vpkswus(vector int __a,
 #endif
 }
 
-static vector unsigned short __ATTRS_o_ai vec_vpkswus(vector unsigned int __a,
-                                                      vector unsigned int __b) {
+static __inline__ vector unsigned short __ATTRS_o_ai
+vec_vpkswus(vector unsigned int __a, vector unsigned int __b) {
 #ifdef __LITTLE_ENDIAN__
   return __builtin_altivec_vpkuwus(__b, __a);
 #else
@@ -5828,8 +5938,8 @@ static vector unsigned short __ATTRS_o_ai vec_vpkswus(vector unsigned int __a,
 /* vec_vpksdus */
 
 #ifdef __POWER8_VECTOR__
-static vector unsigned int __ATTRS_o_ai vec_vpksdus(vector long long __a,
-                                                    vector long long __b) {
+static __inline__ vector unsigned int __ATTRS_o_ai
+vec_vpksdus(vector long long __a, vector long long __b) {
 #ifdef __LITTLE_ENDIAN__
   return __builtin_altivec_vpksdus(__b, __a);
 #else
@@ -5848,9 +5958,8 @@ static vector unsigned int __ATTRS_o_ai vec_vpksdus(vector long long __a,
 // in that the vec_xor can be recognized as a vec_nor (and for P8 and
 // later, possibly a vec_nand).
 
-static vector signed char __ATTRS_o_ai vec_perm(vector signed char __a,
-                                                vector signed char __b,
-                                                vector unsigned char __c) {
+static __inline__ vector signed char __ATTRS_o_ai vec_perm(
+    vector signed char __a, vector signed char __b, vector unsigned char __c) {
 #ifdef __LITTLE_ENDIAN__
   vector unsigned char __d = {255, 255, 255, 255, 255, 255, 255, 255,
                               255, 255, 255, 255, 255, 255, 255, 255};
@@ -5863,9 +5972,9 @@ static vector signed char __ATTRS_o_ai vec_perm(vector signed char __a,
 #endif
 }
 
-static vector unsigned char __ATTRS_o_ai vec_perm(vector unsigned char __a,
-                                                  vector unsigned char __b,
-                                                  vector unsigned char __c) {
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_perm(vector unsigned char __a, vector unsigned char __b,
+         vector unsigned char __c) {
 #ifdef __LITTLE_ENDIAN__
   vector unsigned char __d = {255, 255, 255, 255, 255, 255, 255, 255,
                               255, 255, 255, 255, 255, 255, 255, 255};
@@ -5878,9 +5987,8 @@ static vector unsigned char __ATTRS_o_ai vec_perm(vector unsigned char __a,
 #endif
 }
 
-static vector bool char __ATTRS_o_ai vec_perm(vector bool char __a,
-                                              vector bool char __b,
-                                              vector unsigned char __c) {
+static __inline__ vector bool char __ATTRS_o_ai
+vec_perm(vector bool char __a, vector bool char __b, vector unsigned char __c) {
 #ifdef __LITTLE_ENDIAN__
   vector unsigned char __d = {255, 255, 255, 255, 255, 255, 255, 255,
                               255, 255, 255, 255, 255, 255, 255, 255};
@@ -5893,9 +6001,9 @@ static vector bool char __ATTRS_o_ai vec_perm(vector bool char __a,
 #endif
 }
 
-static vector short __ATTRS_o_ai vec_perm(vector signed short __a,
-                                          vector signed short __b,
-                                          vector unsigned char __c) {
+static __inline__ vector short __ATTRS_o_ai vec_perm(vector signed short __a,
+                                                     vector signed short __b,
+                                                     vector unsigned char __c) {
 #ifdef __LITTLE_ENDIAN__
   vector unsigned char __d = {255, 255, 255, 255, 255, 255, 255, 255,
                               255, 255, 255, 255, 255, 255, 255, 255};
@@ -5908,9 +6016,9 @@ static vector short __ATTRS_o_ai vec_perm(vector signed short __a,
 #endif
 }
 
-static vector unsigned short __ATTRS_o_ai vec_perm(vector unsigned short __a,
-                                                   vector unsigned short __b,
-                                                   vector unsigned char __c) {
+static __inline__ vector unsigned short __ATTRS_o_ai
+vec_perm(vector unsigned short __a, vector unsigned short __b,
+         vector unsigned char __c) {
 #ifdef __LITTLE_ENDIAN__
   vector unsigned char __d = {255, 255, 255, 255, 255, 255, 255, 255,
                               255, 255, 255, 255, 255, 255, 255, 255};
@@ -5923,9 +6031,8 @@ static vector unsigned short __ATTRS_o_ai vec_perm(vector unsigned short __a,
 #endif
 }
 
-static vector bool short __ATTRS_o_ai vec_perm(vector bool short __a,
-                                               vector bool short __b,
-                                               vector unsigned char __c) {
+static __inline__ vector bool short __ATTRS_o_ai vec_perm(
+    vector bool short __a, vector bool short __b, vector unsigned char __c) {
 #ifdef __LITTLE_ENDIAN__
   vector unsigned char __d = {255, 255, 255, 255, 255, 255, 255, 255,
                               255, 255, 255, 255, 255, 255, 255, 255};
@@ -5938,8 +6045,9 @@ static vector bool short __ATTRS_o_ai vec_perm(vector bool short __a,
 #endif
 }
 
-static vector pixel __ATTRS_o_ai vec_perm(vector pixel __a, vector pixel __b,
-                                          vector unsigned char __c) {
+static __inline__ vector pixel __ATTRS_o_ai vec_perm(vector pixel __a,
+                                                     vector pixel __b,
+                                                     vector unsigned char __c) {
 #ifdef __LITTLE_ENDIAN__
   vector unsigned char __d = {255, 255, 255, 255, 255, 255, 255, 255,
                               255, 255, 255, 255, 255, 255, 255, 255};
@@ -5952,9 +6060,9 @@ static vector pixel __ATTRS_o_ai vec_perm(vector pixel __a, vector pixel __b,
 #endif
 }
 
-static vector int __ATTRS_o_ai vec_perm(vector signed int __a,
-                                        vector signed int __b,
-                                        vector unsigned char __c) {
+static __inline__ vector int __ATTRS_o_ai vec_perm(vector signed int __a,
+                                                   vector signed int __b,
+                                                   vector unsigned char __c) {
 #ifdef __LITTLE_ENDIAN__
   vector unsigned char __d = {255, 255, 255, 255, 255, 255, 255, 255,
                               255, 255, 255, 255, 255, 255, 255, 255};
@@ -5965,9 +6073,9 @@ static vector int __ATTRS_o_ai vec_perm(vector signed int __a,
 #endif
 }
 
-static vector unsigned int __ATTRS_o_ai vec_perm(vector unsigned int __a,
-                                                 vector unsigned int __b,
-                                                 vector unsigned char __c) {
+static __inline__ vector unsigned int __ATTRS_o_ai
+vec_perm(vector unsigned int __a, vector unsigned int __b,
+         vector unsigned char __c) {
 #ifdef __LITTLE_ENDIAN__
   vector unsigned char __d = {255, 255, 255, 255, 255, 255, 255, 255,
                               255, 255, 255, 255, 255, 255, 255, 255};
@@ -5980,9 +6088,8 @@ static vector unsigned int __ATTRS_o_ai vec_perm(vector unsigned int __a,
 #endif
 }
 
-static vector bool int __ATTRS_o_ai vec_perm(vector bool int __a,
-                                             vector bool int __b,
-                                             vector unsigned char __c) {
+static __inline__ vector bool int __ATTRS_o_ai
+vec_perm(vector bool int __a, vector bool int __b, vector unsigned char __c) {
 #ifdef __LITTLE_ENDIAN__
   vector unsigned char __d = {255, 255, 255, 255, 255, 255, 255, 255,
                               255, 255, 255, 255, 255, 255, 255, 255};
@@ -5995,8 +6102,9 @@ static vector bool int __ATTRS_o_ai vec_perm(vector bool int __a,
 #endif
 }
 
-static vector float __ATTRS_o_ai vec_perm(vector float __a, vector float __b,
-                                          vector unsigned char __c) {
+static __inline__ vector float __ATTRS_o_ai vec_perm(vector float __a,
+                                                     vector float __b,
+                                                     vector unsigned char __c) {
 #ifdef __LITTLE_ENDIAN__
   vector unsigned char __d = {255, 255, 255, 255, 255, 255, 255, 255,
                               255, 255, 255, 255, 255, 255, 255, 255};
@@ -6010,9 +6118,9 @@ static vector float __ATTRS_o_ai vec_perm(vector float __a, vector float __b,
 }
 
 #ifdef __VSX__
-static vector long long __ATTRS_o_ai vec_perm(vector signed long long __a,
-                                              vector signed long long __b,
-                                              vector unsigned char __c) {
+static __inline__ vector long long __ATTRS_o_ai
+vec_perm(vector signed long long __a, vector signed long long __b,
+         vector unsigned char __c) {
 #ifdef __LITTLE_ENDIAN__
   vector unsigned char __d = {255, 255, 255, 255, 255, 255, 255, 255,
                               255, 255, 255, 255, 255, 255, 255, 255};
@@ -6025,7 +6133,7 @@ static vector long long __ATTRS_o_ai vec_perm(vector signed long long __a,
 #endif
 }
 
-static vector unsigned long long __ATTRS_o_ai
+static __inline__ vector unsigned long long __ATTRS_o_ai
 vec_perm(vector unsigned long long __a, vector unsigned long long __b,
          vector unsigned char __c) {
 #ifdef __LITTLE_ENDIAN__
@@ -6040,7 +6148,7 @@ vec_perm(vector unsigned long long __a, vector unsigned long long __b,
 #endif
 }
 
-static vector bool long long __ATTRS_o_ai
+static __inline__ vector bool long long __ATTRS_o_ai
 vec_perm(vector bool long long __a, vector bool long long __b,
          vector unsigned char __c) {
 #ifdef __LITTLE_ENDIAN__
@@ -6055,8 +6163,8 @@ vec_perm(vector bool long long __a, vector bool long long __b,
 #endif
 }
 
-static vector double __ATTRS_o_ai vec_perm(vector double __a, vector double __b,
-                                           vector unsigned char __c) {
+static __inline__ vector double __ATTRS_o_ai
+vec_perm(vector double __a, vector double __b, vector unsigned char __c) {
 #ifdef __LITTLE_ENDIAN__
   vector unsigned char __d = {255, 255, 255, 255, 255, 255, 255, 255,
                               255, 255, 255, 255, 255, 255, 255, 255};
@@ -6072,92 +6180,86 @@ static vector double __ATTRS_o_ai vec_perm(vector double __a, vector double __b,
 
 /* vec_vperm */
 
-static vector signed char __ATTRS_o_ai vec_vperm(vector signed char __a,
-                                                 vector signed char __b,
-                                                 vector unsigned char __c) {
+static __inline__ vector signed char __ATTRS_o_ai vec_vperm(
+    vector signed char __a, vector signed char __b, vector unsigned char __c) {
   return vec_perm(__a, __b, __c);
 }
 
-static vector unsigned char __ATTRS_o_ai vec_vperm(vector unsigned char __a,
-                                                   vector unsigned char __b,
-                                                   vector unsigned char __c) {
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_vperm(vector unsigned char __a, vector unsigned char __b,
+          vector unsigned char __c) {
   return vec_perm(__a, __b, __c);
 }
 
-static vector bool char __ATTRS_o_ai vec_vperm(vector bool char __a,
-                                               vector bool char __b,
-                                               vector unsigned char __c) {
+static __inline__ vector bool char __ATTRS_o_ai vec_vperm(
+    vector bool char __a, vector bool char __b, vector unsigned char __c) {
   return vec_perm(__a, __b, __c);
 }
 
-static vector short __ATTRS_o_ai vec_vperm(vector short __a, vector short __b,
-                                           vector unsigned char __c) {
+static __inline__ vector short __ATTRS_o_ai
+vec_vperm(vector short __a, vector short __b, vector unsigned char __c) {
   return vec_perm(__a, __b, __c);
 }
 
-static vector unsigned short __ATTRS_o_ai vec_vperm(vector unsigned short __a,
-                                                    vector unsigned short __b,
-                                                    vector unsigned char __c) {
+static __inline__ vector unsigned short __ATTRS_o_ai
+vec_vperm(vector unsigned short __a, vector unsigned short __b,
+          vector unsigned char __c) {
   return vec_perm(__a, __b, __c);
 }
 
-static vector bool short __ATTRS_o_ai vec_vperm(vector bool short __a,
-                                                vector bool short __b,
-                                                vector unsigned char __c) {
+static __inline__ vector bool short __ATTRS_o_ai vec_vperm(
+    vector bool short __a, vector bool short __b, vector unsigned char __c) {
   return vec_perm(__a, __b, __c);
 }
 
-static vector pixel __ATTRS_o_ai vec_vperm(vector pixel __a, vector pixel __b,
-                                           vector unsigned char __c) {
+static __inline__ vector pixel __ATTRS_o_ai
+vec_vperm(vector pixel __a, vector pixel __b, vector unsigned char __c) {
   return vec_perm(__a, __b, __c);
 }
 
-static vector int __ATTRS_o_ai vec_vperm(vector int __a, vector int __b,
-                                         vector unsigned char __c) {
+static __inline__ vector int __ATTRS_o_ai vec_vperm(vector int __a,
+                                                    vector int __b,
+                                                    vector unsigned char __c) {
   return vec_perm(__a, __b, __c);
 }
 
-static vector unsigned int __ATTRS_o_ai vec_vperm(vector unsigned int __a,
-                                                  vector unsigned int __b,
-                                                  vector unsigned char __c) {
+static __inline__ vector unsigned int __ATTRS_o_ai
+vec_vperm(vector unsigned int __a, vector unsigned int __b,
+          vector unsigned char __c) {
   return vec_perm(__a, __b, __c);
 }
 
-static vector bool int __ATTRS_o_ai vec_vperm(vector bool int __a,
-                                              vector bool int __b,
-                                              vector unsigned char __c) {
+static __inline__ vector bool int __ATTRS_o_ai
+vec_vperm(vector bool int __a, vector bool int __b, vector unsigned char __c) {
   return vec_perm(__a, __b, __c);
 }
 
-static vector float __ATTRS_o_ai vec_vperm(vector float __a, vector float __b,
-                                           vector unsigned char __c) {
+static __inline__ vector float __ATTRS_o_ai
+vec_vperm(vector float __a, vector float __b, vector unsigned char __c) {
   return vec_perm(__a, __b, __c);
 }
 
 #ifdef __VSX__
-static vector long long __ATTRS_o_ai vec_vperm(vector long long __a,
-                                               vector long long __b,
-                                               vector unsigned char __c) {
+static __inline__ vector long long __ATTRS_o_ai vec_vperm(
+    vector long long __a, vector long long __b, vector unsigned char __c) {
   return vec_perm(__a, __b, __c);
 }
 
-static vector unsigned long long __ATTRS_o_ai
+static __inline__ vector unsigned long long __ATTRS_o_ai
 vec_vperm(vector unsigned long long __a, vector unsigned long long __b,
           vector unsigned char __c) {
   return vec_perm(__a, __b, __c);
 }
 
-static vector double __ATTRS_o_ai vec_vperm(vector double __a,
-                                            vector double __b,
-                                            vector unsigned char __c) {
+static __inline__ vector double __ATTRS_o_ai
+vec_vperm(vector double __a, vector double __b, vector unsigned char __c) {
   return vec_perm(__a, __b, __c);
 }
 #endif
 
 /* vec_re */
 
-static vector float __ATTRS_o_ai
-vec_re(vector float __a) {
+static __inline__ vector float __ATTRS_o_ai vec_re(vector float __a) {
 #ifdef __VSX__
   return __builtin_vsx_xvresp(__a);
 #else
@@ -6166,56 +6268,57 @@ vec_re(vector float __a) {
 }
 
 #ifdef __VSX__
-static vector double __ATTRS_o_ai vec_re(vector double __a) {
+static __inline__ vector double __ATTRS_o_ai vec_re(vector double __a) {
   return __builtin_vsx_xvredp(__a);
 }
 #endif
 
 /* vec_vrefp */
 
-static vector float __attribute__((__always_inline__))
+static __inline__ vector float __attribute__((__always_inline__))
 vec_vrefp(vector float __a) {
   return __builtin_altivec_vrefp(__a);
 }
 
 /* vec_rl */
 
-static vector signed char __ATTRS_o_ai vec_rl(vector signed char __a,
-                                              vector unsigned char __b) {
+static __inline__ vector signed char __ATTRS_o_ai
+vec_rl(vector signed char __a, vector unsigned char __b) {
   return (vector signed char)__builtin_altivec_vrlb((vector char)__a, __b);
 }
 
-static vector unsigned char __ATTRS_o_ai vec_rl(vector unsigned char __a,
-                                                vector unsigned char __b) {
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_rl(vector unsigned char __a, vector unsigned char __b) {
   return (vector unsigned char)__builtin_altivec_vrlb((vector char)__a, __b);
 }
 
-static vector short __ATTRS_o_ai vec_rl(vector short __a,
-                                        vector unsigned short __b) {
+static __inline__ vector short __ATTRS_o_ai vec_rl(vector short __a,
+                                                   vector unsigned short __b) {
   return __builtin_altivec_vrlh(__a, __b);
 }
 
-static vector unsigned short __ATTRS_o_ai vec_rl(vector unsigned short __a,
-                                                 vector unsigned short __b) {
+static __inline__ vector unsigned short __ATTRS_o_ai
+vec_rl(vector unsigned short __a, vector unsigned short __b) {
   return (vector unsigned short)__builtin_altivec_vrlh((vector short)__a, __b);
 }
 
-static vector int __ATTRS_o_ai vec_rl(vector int __a, vector unsigned int __b) {
+static __inline__ vector int __ATTRS_o_ai vec_rl(vector int __a,
+                                                 vector unsigned int __b) {
   return __builtin_altivec_vrlw(__a, __b);
 }
 
-static vector unsigned int __ATTRS_o_ai vec_rl(vector unsigned int __a,
-                                               vector unsigned int __b) {
+static __inline__ vector unsigned int __ATTRS_o_ai
+vec_rl(vector unsigned int __a, vector unsigned int __b) {
   return (vector unsigned int)__builtin_altivec_vrlw((vector int)__a, __b);
 }
 
 #ifdef __POWER8_VECTOR__
-static vector signed long long __ATTRS_o_ai
+static __inline__ vector signed long long __ATTRS_o_ai
 vec_rl(vector signed long long __a, vector unsigned long long __b) {
   return __builtin_altivec_vrld(__a, __b);
 }
 
-static vector unsigned long long __ATTRS_o_ai
+static __inline__ vector unsigned long long __ATTRS_o_ai
 vec_rl(vector unsigned long long __a, vector unsigned long long __b) {
   return __builtin_altivec_vrld(__a, __b);
 }
@@ -6223,43 +6326,43 @@ vec_rl(vector unsigned long long __a, vector unsigned long long __b) {
 
 /* vec_vrlb */
 
-static vector signed char __ATTRS_o_ai vec_vrlb(vector signed char __a,
-                                                vector unsigned char __b) {
+static __inline__ vector signed char __ATTRS_o_ai
+vec_vrlb(vector signed char __a, vector unsigned char __b) {
   return (vector signed char)__builtin_altivec_vrlb((vector char)__a, __b);
 }
 
-static vector unsigned char __ATTRS_o_ai vec_vrlb(vector unsigned char __a,
-                                                  vector unsigned char __b) {
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_vrlb(vector unsigned char __a, vector unsigned char __b) {
   return (vector unsigned char)__builtin_altivec_vrlb((vector char)__a, __b);
 }
 
 /* vec_vrlh */
 
-static vector short __ATTRS_o_ai vec_vrlh(vector short __a,
-                                          vector unsigned short __b) {
+static __inline__ vector short __ATTRS_o_ai
+vec_vrlh(vector short __a, vector unsigned short __b) {
   return __builtin_altivec_vrlh(__a, __b);
 }
 
-static vector unsigned short __ATTRS_o_ai vec_vrlh(vector unsigned short __a,
-                                                   vector unsigned short __b) {
+static __inline__ vector unsigned short __ATTRS_o_ai
+vec_vrlh(vector unsigned short __a, vector unsigned short __b) {
   return (vector unsigned short)__builtin_altivec_vrlh((vector short)__a, __b);
 }
 
 /* vec_vrlw */
 
-static vector int __ATTRS_o_ai vec_vrlw(vector int __a,
-                                        vector unsigned int __b) {
+static __inline__ vector int __ATTRS_o_ai vec_vrlw(vector int __a,
+                                                   vector unsigned int __b) {
   return __builtin_altivec_vrlw(__a, __b);
 }
 
-static vector unsigned int __ATTRS_o_ai vec_vrlw(vector unsigned int __a,
-                                                 vector unsigned int __b) {
+static __inline__ vector unsigned int __ATTRS_o_ai
+vec_vrlw(vector unsigned int __a, vector unsigned int __b) {
   return (vector unsigned int)__builtin_altivec_vrlw((vector int)__a, __b);
 }
 
 /* vec_round */
 
-static vector float __ATTRS_o_ai vec_round(vector float __a) {
+static __inline__ vector float __ATTRS_o_ai vec_round(vector float __a) {
 #ifdef __VSX__
   return __builtin_vsx_xvrspi(__a);
 #else
@@ -6268,36 +6371,34 @@ static vector float __ATTRS_o_ai vec_round(vector float __a) {
 }
 
 #ifdef __VSX__
-static vector double __ATTRS_o_ai vec_round(vector double __a) {
+static __inline__ vector double __ATTRS_o_ai vec_round(vector double __a) {
   return __builtin_vsx_xvrdpi(__a);
 }
 
 /* vec_rint */
 
-static vector float __ATTRS_o_ai
-vec_rint(vector float __a) {
+static __inline__ vector float __ATTRS_o_ai vec_rint(vector float __a) {
   return __builtin_vsx_xvrspic(__a);
 }
 
-static vector double __ATTRS_o_ai
-vec_rint(vector double __a) {
+static __inline__ vector double __ATTRS_o_ai vec_rint(vector double __a) {
   return __builtin_vsx_xvrdpic(__a);
 }
 
 /* vec_nearbyint */
 
-static vector float __ATTRS_o_ai vec_nearbyint(vector float __a) {
+static __inline__ vector float __ATTRS_o_ai vec_nearbyint(vector float __a) {
   return __builtin_vsx_xvrspi(__a);
 }
 
-static vector double __ATTRS_o_ai vec_nearbyint(vector double __a) {
+static __inline__ vector double __ATTRS_o_ai vec_nearbyint(vector double __a) {
   return __builtin_vsx_xvrdpi(__a);
 }
 #endif
 
 /* vec_vrfin */
 
-static vector float __attribute__((__always_inline__))
+static __inline__ vector float __attribute__((__always_inline__))
 vec_vrfin(vector float __a) {
   return __builtin_altivec_vrfin(__a);
 }
@@ -6305,19 +6406,18 @@ vec_vrfin(vector float __a) {
 /* vec_sqrt */
 
 #ifdef __VSX__
-static vector float __ATTRS_o_ai vec_sqrt(vector float __a) {
+static __inline__ vector float __ATTRS_o_ai vec_sqrt(vector float __a) {
   return __builtin_vsx_xvsqrtsp(__a);
 }
 
-static vector double __ATTRS_o_ai vec_sqrt(vector double __a) {
+static __inline__ vector double __ATTRS_o_ai vec_sqrt(vector double __a) {
   return __builtin_vsx_xvsqrtdp(__a);
 }
 #endif
 
 /* vec_rsqrte */
 
-static vector float __ATTRS_o_ai
-vec_rsqrte(vector float __a) {
+static __inline__ vector float __ATTRS_o_ai vec_rsqrte(vector float __a) {
 #ifdef __VSX__
   return __builtin_vsx_xvrsqrtesp(__a);
 #else
@@ -6326,14 +6426,14 @@ vec_rsqrte(vector float __a) {
 }
 
 #ifdef __VSX__
-static vector double __ATTRS_o_ai vec_rsqrte(vector double __a) {
+static __inline__ vector double __ATTRS_o_ai vec_rsqrte(vector double __a) {
   return __builtin_vsx_xvrsqrtedp(__a);
 }
 #endif
 
 /* vec_vrsqrtefp */
 
-static __vector float __attribute__((__always_inline__))
+static __inline__ __vector float __attribute__((__always_inline__))
 vec_vrsqrtefp(vector float __a) {
   return __builtin_altivec_vrsqrtefp(__a);
 }
@@ -6342,257 +6442,250 @@ vec_vrsqrtefp(vector float __a) {
 
 #define __builtin_altivec_vsel_4si vec_sel
 
-static vector signed char __ATTRS_o_ai vec_sel(vector signed char __a,
-                                               vector signed char __b,
-                                               vector unsigned char __c) {
+static __inline__ vector signed char __ATTRS_o_ai vec_sel(
+    vector signed char __a, vector signed char __b, vector unsigned char __c) {
   return (__a & ~(vector signed char)__c) | (__b & (vector signed char)__c);
 }
 
-static vector signed char __ATTRS_o_ai vec_sel(vector signed char __a,
-                                               vector signed char __b,
-                                               vector bool char __c) {
+static __inline__ vector signed char __ATTRS_o_ai
+vec_sel(vector signed char __a, vector signed char __b, vector bool char __c) {
   return (__a & ~(vector signed char)__c) | (__b & (vector signed char)__c);
 }
 
-static vector unsigned char __ATTRS_o_ai vec_sel(vector unsigned char __a,
-                                                 vector unsigned char __b,
-                                                 vector unsigned char __c) {
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_sel(vector unsigned char __a, vector unsigned char __b,
+        vector unsigned char __c) {
   return (__a & ~__c) | (__b & __c);
 }
 
-static vector unsigned char __ATTRS_o_ai vec_sel(vector unsigned char __a,
-                                                 vector unsigned char __b,
-                                                 vector bool char __c) {
+static __inline__ vector unsigned char __ATTRS_o_ai vec_sel(
+    vector unsigned char __a, vector unsigned char __b, vector bool char __c) {
   return (__a & ~(vector unsigned char)__c) | (__b & (vector unsigned char)__c);
 }
 
-static vector bool char __ATTRS_o_ai vec_sel(vector bool char __a,
-                                             vector bool char __b,
-                                             vector unsigned char __c) {
+static __inline__ vector bool char __ATTRS_o_ai
+vec_sel(vector bool char __a, vector bool char __b, vector unsigned char __c) {
   return (__a & ~(vector bool char)__c) | (__b & (vector bool char)__c);
 }
 
-static vector bool char __ATTRS_o_ai vec_sel(vector bool char __a,
-                                             vector bool char __b,
-                                             vector bool char __c) {
+static __inline__ vector bool char __ATTRS_o_ai vec_sel(vector bool char __a,
+                                                        vector bool char __b,
+                                                        vector bool char __c) {
   return (__a & ~__c) | (__b & __c);
 }
 
-static vector short __ATTRS_o_ai vec_sel(vector short __a, vector short __b,
-                                         vector unsigned short __c) {
+static __inline__ vector short __ATTRS_o_ai vec_sel(vector short __a,
+                                                    vector short __b,
+                                                    vector unsigned short __c) {
   return (__a & ~(vector short)__c) | (__b & (vector short)__c);
 }
 
-static vector short __ATTRS_o_ai vec_sel(vector short __a, vector short __b,
-                                         vector bool short __c) {
+static __inline__ vector short __ATTRS_o_ai vec_sel(vector short __a,
+                                                    vector short __b,
+                                                    vector bool short __c) {
   return (__a & ~(vector short)__c) | (__b & (vector short)__c);
 }
 
-static vector unsigned short __ATTRS_o_ai vec_sel(vector unsigned short __a,
-                                                  vector unsigned short __b,
-                                                  vector unsigned short __c) {
+static __inline__ vector unsigned short __ATTRS_o_ai
+vec_sel(vector unsigned short __a, vector unsigned short __b,
+        vector unsigned short __c) {
   return (__a & ~__c) | (__b & __c);
 }
 
-static vector unsigned short __ATTRS_o_ai vec_sel(vector unsigned short __a,
-                                                  vector unsigned short __b,
-                                                  vector bool short __c) {
+static __inline__ vector unsigned short __ATTRS_o_ai
+vec_sel(vector unsigned short __a, vector unsigned short __b,
+        vector bool short __c) {
   return (__a & ~(vector unsigned short)__c) |
          (__b & (vector unsigned short)__c);
 }
 
-static vector bool short __ATTRS_o_ai vec_sel(vector bool short __a,
-                                              vector bool short __b,
-                                              vector unsigned short __c) {
+static __inline__ vector bool short __ATTRS_o_ai vec_sel(
+    vector bool short __a, vector bool short __b, vector unsigned short __c) {
   return (__a & ~(vector bool short)__c) | (__b & (vector bool short)__c);
 }
 
-static vector bool short __ATTRS_o_ai vec_sel(vector bool short __a,
-                                              vector bool short __b,
-                                              vector bool short __c) {
+static __inline__ vector bool short __ATTRS_o_ai
+vec_sel(vector bool short __a, vector bool short __b, vector bool short __c) {
   return (__a & ~__c) | (__b & __c);
 }
 
-static vector int __ATTRS_o_ai vec_sel(vector int __a, vector int __b,
-                                       vector unsigned int __c) {
+static __inline__ vector int __ATTRS_o_ai vec_sel(vector int __a,
+                                                  vector int __b,
+                                                  vector unsigned int __c) {
   return (__a & ~(vector int)__c) | (__b & (vector int)__c);
 }
 
-static vector int __ATTRS_o_ai vec_sel(vector int __a, vector int __b,
-                                       vector bool int __c) {
+static __inline__ vector int __ATTRS_o_ai vec_sel(vector int __a,
+                                                  vector int __b,
+                                                  vector bool int __c) {
   return (__a & ~(vector int)__c) | (__b & (vector int)__c);
 }
 
-static vector unsigned int __ATTRS_o_ai vec_sel(vector unsigned int __a,
-                                                vector unsigned int __b,
-                                                vector unsigned int __c) {
+static __inline__ vector unsigned int __ATTRS_o_ai vec_sel(
+    vector unsigned int __a, vector unsigned int __b, vector unsigned int __c) {
   return (__a & ~__c) | (__b & __c);
 }
 
-static vector unsigned int __ATTRS_o_ai vec_sel(vector unsigned int __a,
-                                                vector unsigned int __b,
-                                                vector bool int __c) {
+static __inline__ vector unsigned int __ATTRS_o_ai
+vec_sel(vector unsigned int __a, vector unsigned int __b, vector bool int __c) {
   return (__a & ~(vector unsigned int)__c) | (__b & (vector unsigned int)__c);
 }
 
-static vector bool int __ATTRS_o_ai vec_sel(vector bool int __a,
-                                            vector bool int __b,
-                                            vector unsigned int __c) {
+static __inline__ vector bool int __ATTRS_o_ai
+vec_sel(vector bool int __a, vector bool int __b, vector unsigned int __c) {
   return (__a & ~(vector bool int)__c) | (__b & (vector bool int)__c);
 }
 
-static vector bool int __ATTRS_o_ai vec_sel(vector bool int __a,
-                                            vector bool int __b,
-                                            vector bool int __c) {
+static __inline__ vector bool int __ATTRS_o_ai vec_sel(vector bool int __a,
+                                                       vector bool int __b,
+                                                       vector bool int __c) {
   return (__a & ~__c) | (__b & __c);
 }
 
-static vector float __ATTRS_o_ai vec_sel(vector float __a, vector float __b,
-                                         vector unsigned int __c) {
+static __inline__ vector float __ATTRS_o_ai vec_sel(vector float __a,
+                                                    vector float __b,
+                                                    vector unsigned int __c) {
   vector int __res = ((vector int)__a & ~(vector int)__c) |
                      ((vector int)__b & (vector int)__c);
   return (vector float)__res;
 }
 
-static vector float __ATTRS_o_ai vec_sel(vector float __a, vector float __b,
-                                         vector bool int __c) {
+static __inline__ vector float __ATTRS_o_ai vec_sel(vector float __a,
+                                                    vector float __b,
+                                                    vector bool int __c) {
   vector int __res = ((vector int)__a & ~(vector int)__c) |
                      ((vector int)__b & (vector int)__c);
   return (vector float)__res;
 }
 
 #ifdef __VSX__
-static vector double __ATTRS_o_ai vec_sel(vector double __a, vector double __b,
-                                          vector bool long long __c) {
+static __inline__ vector double __ATTRS_o_ai
+vec_sel(vector double __a, vector double __b, vector bool long long __c) {
   vector long long __res = ((vector long long)__a & ~(vector long long)__c) |
-                     ((vector long long)__b & (vector long long)__c);
+                           ((vector long long)__b & (vector long long)__c);
   return (vector double)__res;
 }
 
-static vector double __ATTRS_o_ai vec_sel(vector double __a, vector double __b,
-                                          vector unsigned long long __c) {
+static __inline__ vector double __ATTRS_o_ai
+vec_sel(vector double __a, vector double __b, vector unsigned long long __c) {
   vector long long __res = ((vector long long)__a & ~(vector long long)__c) |
-                     ((vector long long)__b & (vector long long)__c);
+                           ((vector long long)__b & (vector long long)__c);
   return (vector double)__res;
 }
 #endif
 
 /* vec_vsel */
 
-static vector signed char __ATTRS_o_ai vec_vsel(vector signed char __a,
-                                                vector signed char __b,
-                                                vector unsigned char __c) {
+static __inline__ vector signed char __ATTRS_o_ai vec_vsel(
+    vector signed char __a, vector signed char __b, vector unsigned char __c) {
   return (__a & ~(vector signed char)__c) | (__b & (vector signed char)__c);
 }
 
-static vector signed char __ATTRS_o_ai vec_vsel(vector signed char __a,
-                                                vector signed char __b,
-                                                vector bool char __c) {
+static __inline__ vector signed char __ATTRS_o_ai
+vec_vsel(vector signed char __a, vector signed char __b, vector bool char __c) {
   return (__a & ~(vector signed char)__c) | (__b & (vector signed char)__c);
 }
 
-static vector unsigned char __ATTRS_o_ai vec_vsel(vector unsigned char __a,
-                                                  vector unsigned char __b,
-                                                  vector unsigned char __c) {
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_vsel(vector unsigned char __a, vector unsigned char __b,
+         vector unsigned char __c) {
   return (__a & ~__c) | (__b & __c);
 }
 
-static vector unsigned char __ATTRS_o_ai vec_vsel(vector unsigned char __a,
-                                                  vector unsigned char __b,
-                                                  vector bool char __c) {
+static __inline__ vector unsigned char __ATTRS_o_ai vec_vsel(
+    vector unsigned char __a, vector unsigned char __b, vector bool char __c) {
   return (__a & ~(vector unsigned char)__c) | (__b & (vector unsigned char)__c);
 }
 
-static vector bool char __ATTRS_o_ai vec_vsel(vector bool char __a,
-                                              vector bool char __b,
-                                              vector unsigned char __c) {
+static __inline__ vector bool char __ATTRS_o_ai
+vec_vsel(vector bool char __a, vector bool char __b, vector unsigned char __c) {
   return (__a & ~(vector bool char)__c) | (__b & (vector bool char)__c);
 }
 
-static vector bool char __ATTRS_o_ai vec_vsel(vector bool char __a,
-                                              vector bool char __b,
-                                              vector bool char __c) {
+static __inline__ vector bool char __ATTRS_o_ai vec_vsel(vector bool char __a,
+                                                         vector bool char __b,
+                                                         vector bool char __c) {
   return (__a & ~__c) | (__b & __c);
 }
 
-static vector short __ATTRS_o_ai vec_vsel(vector short __a, vector short __b,
-                                          vector unsigned short __c) {
+static __inline__ vector short __ATTRS_o_ai
+vec_vsel(vector short __a, vector short __b, vector unsigned short __c) {
   return (__a & ~(vector short)__c) | (__b & (vector short)__c);
 }
 
-static vector short __ATTRS_o_ai vec_vsel(vector short __a, vector short __b,
-                                          vector bool short __c) {
+static __inline__ vector short __ATTRS_o_ai vec_vsel(vector short __a,
+                                                     vector short __b,
+                                                     vector bool short __c) {
   return (__a & ~(vector short)__c) | (__b & (vector short)__c);
 }
 
-static vector unsigned short __ATTRS_o_ai vec_vsel(vector unsigned short __a,
-                                                   vector unsigned short __b,
-                                                   vector unsigned short __c) {
+static __inline__ vector unsigned short __ATTRS_o_ai
+vec_vsel(vector unsigned short __a, vector unsigned short __b,
+         vector unsigned short __c) {
   return (__a & ~__c) | (__b & __c);
 }
 
-static vector unsigned short __ATTRS_o_ai vec_vsel(vector unsigned short __a,
-                                                   vector unsigned short __b,
-                                                   vector bool short __c) {
+static __inline__ vector unsigned short __ATTRS_o_ai
+vec_vsel(vector unsigned short __a, vector unsigned short __b,
+         vector bool short __c) {
   return (__a & ~(vector unsigned short)__c) |
          (__b & (vector unsigned short)__c);
 }
 
-static vector bool short __ATTRS_o_ai vec_vsel(vector bool short __a,
-                                               vector bool short __b,
-                                               vector unsigned short __c) {
+static __inline__ vector bool short __ATTRS_o_ai vec_vsel(
+    vector bool short __a, vector bool short __b, vector unsigned short __c) {
   return (__a & ~(vector bool short)__c) | (__b & (vector bool short)__c);
 }
 
-static vector bool short __ATTRS_o_ai vec_vsel(vector bool short __a,
-                                               vector bool short __b,
-                                               vector bool short __c) {
+static __inline__ vector bool short __ATTRS_o_ai
+vec_vsel(vector bool short __a, vector bool short __b, vector bool short __c) {
   return (__a & ~__c) | (__b & __c);
 }
 
-static vector int __ATTRS_o_ai vec_vsel(vector int __a, vector int __b,
-                                        vector unsigned int __c) {
+static __inline__ vector int __ATTRS_o_ai vec_vsel(vector int __a,
+                                                   vector int __b,
+                                                   vector unsigned int __c) {
   return (__a & ~(vector int)__c) | (__b & (vector int)__c);
 }
 
-static vector int __ATTRS_o_ai vec_vsel(vector int __a, vector int __b,
-                                        vector bool int __c) {
+static __inline__ vector int __ATTRS_o_ai vec_vsel(vector int __a,
+                                                   vector int __b,
+                                                   vector bool int __c) {
   return (__a & ~(vector int)__c) | (__b & (vector int)__c);
 }
 
-static vector unsigned int __ATTRS_o_ai vec_vsel(vector unsigned int __a,
-                                                 vector unsigned int __b,
-                                                 vector unsigned int __c) {
+static __inline__ vector unsigned int __ATTRS_o_ai vec_vsel(
+    vector unsigned int __a, vector unsigned int __b, vector unsigned int __c) {
   return (__a & ~__c) | (__b & __c);
 }
 
-static vector unsigned int __ATTRS_o_ai vec_vsel(vector unsigned int __a,
-                                                 vector unsigned int __b,
-                                                 vector bool int __c) {
+static __inline__ vector unsigned int __ATTRS_o_ai vec_vsel(
+    vector unsigned int __a, vector unsigned int __b, vector bool int __c) {
   return (__a & ~(vector unsigned int)__c) | (__b & (vector unsigned int)__c);
 }
 
-static vector bool int __ATTRS_o_ai vec_vsel(vector bool int __a,
-                                             vector bool int __b,
-                                             vector unsigned int __c) {
+static __inline__ vector bool int __ATTRS_o_ai
+vec_vsel(vector bool int __a, vector bool int __b, vector unsigned int __c) {
   return (__a & ~(vector bool int)__c) | (__b & (vector bool int)__c);
 }
 
-static vector bool int __ATTRS_o_ai vec_vsel(vector bool int __a,
-                                             vector bool int __b,
-                                             vector bool int __c) {
+static __inline__ vector bool int __ATTRS_o_ai vec_vsel(vector bool int __a,
+                                                        vector bool int __b,
+                                                        vector bool int __c) {
   return (__a & ~__c) | (__b & __c);
 }
 
-static vector float __ATTRS_o_ai vec_vsel(vector float __a, vector float __b,
-                                          vector unsigned int __c) {
+static __inline__ vector float __ATTRS_o_ai vec_vsel(vector float __a,
+                                                     vector float __b,
+                                                     vector unsigned int __c) {
   vector int __res = ((vector int)__a & ~(vector int)__c) |
                      ((vector int)__b & (vector int)__c);
   return (vector float)__res;
 }
 
-static vector float __ATTRS_o_ai vec_vsel(vector float __a, vector float __b,
-                                          vector bool int __c) {
+static __inline__ vector float __ATTRS_o_ai vec_vsel(vector float __a,
+                                                     vector float __b,
+                                                     vector bool int __c) {
   vector int __res = ((vector int)__a & ~(vector int)__c) |
                      ((vector int)__b & (vector int)__c);
   return (vector float)__res;
@@ -6600,42 +6693,43 @@ static vector float __ATTRS_o_ai vec_vsel(vector float __a, vector float __b,
 
 /* vec_sl */
 
-static vector signed char __ATTRS_o_ai vec_sl(vector signed char __a,
-                                              vector unsigned char __b) {
+static __inline__ vector signed char __ATTRS_o_ai
+vec_sl(vector signed char __a, vector unsigned char __b) {
   return __a << (vector signed char)__b;
 }
 
-static vector unsigned char __ATTRS_o_ai vec_sl(vector unsigned char __a,
-                                                vector unsigned char __b) {
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_sl(vector unsigned char __a, vector unsigned char __b) {
   return __a << __b;
 }
 
-static vector short __ATTRS_o_ai vec_sl(vector short __a,
-                                        vector unsigned short __b) {
+static __inline__ vector short __ATTRS_o_ai vec_sl(vector short __a,
+                                                   vector unsigned short __b) {
   return __a << (vector short)__b;
 }
 
-static vector unsigned short __ATTRS_o_ai vec_sl(vector unsigned short __a,
-                                                 vector unsigned short __b) {
+static __inline__ vector unsigned short __ATTRS_o_ai
+vec_sl(vector unsigned short __a, vector unsigned short __b) {
   return __a << __b;
 }
 
-static vector int __ATTRS_o_ai vec_sl(vector int __a, vector unsigned int __b) {
+static __inline__ vector int __ATTRS_o_ai vec_sl(vector int __a,
+                                                 vector unsigned int __b) {
   return __a << (vector int)__b;
 }
 
-static vector unsigned int __ATTRS_o_ai vec_sl(vector unsigned int __a,
-                                               vector unsigned int __b) {
+static __inline__ vector unsigned int __ATTRS_o_ai
+vec_sl(vector unsigned int __a, vector unsigned int __b) {
   return __a << __b;
 }
 
 #ifdef __POWER8_VECTOR__
-static vector signed long long __ATTRS_o_ai
+static __inline__ vector signed long long __ATTRS_o_ai
 vec_sl(vector signed long long __a, vector unsigned long long __b) {
   return __a << (vector long long)__b;
 }
 
-static vector unsigned long long __ATTRS_o_ai
+static __inline__ vector unsigned long long __ATTRS_o_ai
 vec_sl(vector unsigned long long __a, vector unsigned long long __b) {
   return __a << __b;
 }
@@ -6645,13 +6739,13 @@ vec_sl(vector unsigned long long __a, vector unsigned long long __b) {
 
 #define __builtin_altivec_vslb vec_vslb
 
-static vector signed char __ATTRS_o_ai vec_vslb(vector signed char __a,
-                                                vector unsigned char __b) {
+static __inline__ vector signed char __ATTRS_o_ai
+vec_vslb(vector signed char __a, vector unsigned char __b) {
   return vec_sl(__a, __b);
 }
 
-static vector unsigned char __ATTRS_o_ai vec_vslb(vector unsigned char __a,
-                                                  vector unsigned char __b) {
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_vslb(vector unsigned char __a, vector unsigned char __b) {
   return vec_sl(__a, __b);
 }
 
@@ -6659,13 +6753,13 @@ static vector unsigned char __ATTRS_o_ai vec_vslb(vector unsigned char __a,
 
 #define __builtin_altivec_vslh vec_vslh
 
-static vector short __ATTRS_o_ai vec_vslh(vector short __a,
-                                          vector unsigned short __b) {
+static __inline__ vector short __ATTRS_o_ai
+vec_vslh(vector short __a, vector unsigned short __b) {
   return vec_sl(__a, __b);
 }
 
-static vector unsigned short __ATTRS_o_ai vec_vslh(vector unsigned short __a,
-                                                   vector unsigned short __b) {
+static __inline__ vector unsigned short __ATTRS_o_ai
+vec_vslh(vector unsigned short __a, vector unsigned short __b) {
   return vec_sl(__a, __b);
 }
 
@@ -6673,13 +6767,13 @@ static vector unsigned short __ATTRS_o_ai vec_vslh(vector unsigned short __a,
 
 #define __builtin_altivec_vslw vec_vslw
 
-static vector int __ATTRS_o_ai vec_vslw(vector int __a,
-                                        vector unsigned int __b) {
+static __inline__ vector int __ATTRS_o_ai vec_vslw(vector int __a,
+                                                   vector unsigned int __b) {
   return vec_sl(__a, __b);
 }
 
-static vector unsigned int __ATTRS_o_ai vec_vslw(vector unsigned int __a,
-                                                 vector unsigned int __b) {
+static __inline__ vector unsigned int __ATTRS_o_ai
+vec_vslw(vector unsigned int __a, vector unsigned int __b) {
   return vec_sl(__a, __b);
 }
 
@@ -6687,17 +6781,15 @@ static vector unsigned int __ATTRS_o_ai vec_vslw(vector unsigned int __a,
 
 #define __builtin_altivec_vsldoi_4si vec_sld
 
-static vector signed char __ATTRS_o_ai vec_sld(vector signed char __a,
-                                               vector signed char __b,
-                                               unsigned const int __c) {
+static __inline__ vector signed char __ATTRS_o_ai vec_sld(
+    vector signed char __a, vector signed char __b, unsigned const int __c) {
   unsigned char __d = __c & 0x0F;
 #ifdef __LITTLE_ENDIAN__
   return vec_perm(
-      __b, __a,
-      (vector unsigned char)(16 - __d, 17 - __d, 18 - __d, 19 - __d, 20 - __d,
-                             21 - __d, 22 - __d, 23 - __d, 24 - __d, 25 - __d,
-                             26 - __d, 27 - __d, 28 - __d, 29 - __d, 30 - __d,
-                             31 - __d));
+      __b, __a, (vector unsigned char)(16 - __d, 17 - __d, 18 - __d, 19 - __d,
+                                       20 - __d, 21 - __d, 22 - __d, 23 - __d,
+                                       24 - __d, 25 - __d, 26 - __d, 27 - __d,
+                                       28 - __d, 29 - __d, 30 - __d, 31 - __d));
 #else
   return vec_perm(
       __a, __b,
@@ -6707,17 +6799,16 @@ static vector signed char __ATTRS_o_ai vec_sld(vector signed char __a,
 #endif
 }
 
-static vector unsigned char __ATTRS_o_ai vec_sld(vector unsigned char __a,
-                                                 vector unsigned char __b,
-                                                 unsigned const int __c) {
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_sld(vector unsigned char __a, vector unsigned char __b,
+        unsigned const int __c) {
   unsigned char __d = __c & 0x0F;
 #ifdef __LITTLE_ENDIAN__
   return vec_perm(
-      __b, __a,
-      (vector unsigned char)(16 - __d, 17 - __d, 18 - __d, 19 - __d, 20 - __d,
-                             21 - __d, 22 - __d, 23 - __d, 24 - __d, 25 - __d,
-                             26 - __d, 27 - __d, 28 - __d, 29 - __d, 30 - __d,
-                             31 - __d));
+      __b, __a, (vector unsigned char)(16 - __d, 17 - __d, 18 - __d, 19 - __d,
+                                       20 - __d, 21 - __d, 22 - __d, 23 - __d,
+                                       24 - __d, 25 - __d, 26 - __d, 27 - __d,
+                                       28 - __d, 29 - __d, 30 - __d, 31 - __d));
 #else
   return vec_perm(
       __a, __b,
@@ -6727,17 +6818,15 @@ static vector unsigned char __ATTRS_o_ai vec_sld(vector unsigned char __a,
 #endif
 }
 
-static vector bool char __ATTRS_o_ai vec_sld(vector bool char __a,
-                                             vector bool char __b,
-                                             unsigned const int __c) {
+static __inline__ vector bool char __ATTRS_o_ai
+vec_sld(vector bool char __a, vector bool char __b, unsigned const int __c) {
   unsigned char __d = __c & 0x0F;
 #ifdef __LITTLE_ENDIAN__
   return vec_perm(
-      __b, __a,
-      (vector unsigned char)(16 - __d, 17 - __d, 18 - __d, 19 - __d, 20 - __d,
-                             21 - __d, 22 - __d, 23 - __d, 24 - __d, 25 - __d,
-                             26 - __d, 27 - __d, 28 - __d, 29 - __d, 30 - __d,
-                             31 - __d));
+      __b, __a, (vector unsigned char)(16 - __d, 17 - __d, 18 - __d, 19 - __d,
+                                       20 - __d, 21 - __d, 22 - __d, 23 - __d,
+                                       24 - __d, 25 - __d, 26 - __d, 27 - __d,
+                                       28 - __d, 29 - __d, 30 - __d, 31 - __d));
 #else
   return vec_perm(
       __a, __b,
@@ -6747,17 +6836,15 @@ static vector bool char __ATTRS_o_ai vec_sld(vector bool char __a,
 #endif
 }
 
-static vector signed short __ATTRS_o_ai vec_sld(vector signed short __a,
-                                                vector signed short __b,
-                                                unsigned const int __c) {
+static __inline__ vector signed short __ATTRS_o_ai vec_sld(
+    vector signed short __a, vector signed short __b, unsigned const int __c) {
   unsigned char __d = __c & 0x0F;
 #ifdef __LITTLE_ENDIAN__
   return vec_perm(
-      __b, __a,
-      (vector unsigned char)(16 - __d, 17 - __d, 18 - __d, 19 - __d, 20 - __d,
-                             21 - __d, 22 - __d, 23 - __d, 24 - __d, 25 - __d,
-                             26 - __d, 27 - __d, 28 - __d, 29 - __d, 30 - __d,
-                             31 - __d));
+      __b, __a, (vector unsigned char)(16 - __d, 17 - __d, 18 - __d, 19 - __d,
+                                       20 - __d, 21 - __d, 22 - __d, 23 - __d,
+                                       24 - __d, 25 - __d, 26 - __d, 27 - __d,
+                                       28 - __d, 29 - __d, 30 - __d, 31 - __d));
 #else
   return vec_perm(
       __a, __b,
@@ -6767,17 +6854,16 @@ static vector signed short __ATTRS_o_ai vec_sld(vector signed short __a,
 #endif
 }
 
-static vector unsigned short __ATTRS_o_ai vec_sld(vector unsigned short __a,
-                                                  vector unsigned short __b,
-                                                  unsigned const int __c) {
+static __inline__ vector unsigned short __ATTRS_o_ai
+vec_sld(vector unsigned short __a, vector unsigned short __b,
+        unsigned const int __c) {
   unsigned char __d = __c & 0x0F;
 #ifdef __LITTLE_ENDIAN__
   return vec_perm(
-      __b, __a,
-      (vector unsigned char)(16 - __d, 17 - __d, 18 - __d, 19 - __d, 20 - __d,
-                             21 - __d, 22 - __d, 23 - __d, 24 - __d, 25 - __d,
-                             26 - __d, 27 - __d, 28 - __d, 29 - __d, 30 - __d,
-                             31 - __d));
+      __b, __a, (vector unsigned char)(16 - __d, 17 - __d, 18 - __d, 19 - __d,
+                                       20 - __d, 21 - __d, 22 - __d, 23 - __d,
+                                       24 - __d, 25 - __d, 26 - __d, 27 - __d,
+                                       28 - __d, 29 - __d, 30 - __d, 31 - __d));
 #else
   return vec_perm(
       __a, __b,
@@ -6787,17 +6873,15 @@ static vector unsigned short __ATTRS_o_ai vec_sld(vector unsigned short __a,
 #endif
 }
 
-static vector bool short __ATTRS_o_ai vec_sld(vector bool short __a,
-                                              vector bool short __b,
-                                              unsigned const int __c) {
+static __inline__ vector bool short __ATTRS_o_ai
+vec_sld(vector bool short __a, vector bool short __b, unsigned const int __c) {
   unsigned char __d = __c & 0x0F;
 #ifdef __LITTLE_ENDIAN__
   return vec_perm(
-      __b, __a,
-      (vector unsigned char)(16 - __d, 17 - __d, 18 - __d, 19 - __d, 20 - __d,
-                             21 - __d, 22 - __d, 23 - __d, 24 - __d, 25 - __d,
-                             26 - __d, 27 - __d, 28 - __d, 29 - __d, 30 - __d,
-                             31 - __d));
+      __b, __a, (vector unsigned char)(16 - __d, 17 - __d, 18 - __d, 19 - __d,
+                                       20 - __d, 21 - __d, 22 - __d, 23 - __d,
+                                       24 - __d, 25 - __d, 26 - __d, 27 - __d,
+                                       28 - __d, 29 - __d, 30 - __d, 31 - __d));
 #else
   return vec_perm(
       __a, __b,
@@ -6807,16 +6891,16 @@ static vector bool short __ATTRS_o_ai vec_sld(vector bool short __a,
 #endif
 }
 
-static vector pixel __ATTRS_o_ai vec_sld(vector pixel __a, vector pixel __b,
-                                         unsigned const int __c) {
+static __inline__ vector pixel __ATTRS_o_ai vec_sld(vector pixel __a,
+                                                    vector pixel __b,
+                                                    unsigned const int __c) {
   unsigned char __d = __c & 0x0F;
 #ifdef __LITTLE_ENDIAN__
   return vec_perm(
-      __b, __a,
-      (vector unsigned char)(16 - __d, 17 - __d, 18 - __d, 19 - __d, 20 - __d,
-                             21 - __d, 22 - __d, 23 - __d, 24 - __d, 25 - __d,
-                             26 - __d, 27 - __d, 28 - __d, 29 - __d, 30 - __d,
-                             31 - __d));
+      __b, __a, (vector unsigned char)(16 - __d, 17 - __d, 18 - __d, 19 - __d,
+                                       20 - __d, 21 - __d, 22 - __d, 23 - __d,
+                                       24 - __d, 25 - __d, 26 - __d, 27 - __d,
+                                       28 - __d, 29 - __d, 30 - __d, 31 - __d));
 #else
   return vec_perm(
       __a, __b,
@@ -6826,17 +6910,15 @@ static vector pixel __ATTRS_o_ai vec_sld(vector pixel __a, vector pixel __b,
 #endif
 }
 
-static vector signed int __ATTRS_o_ai vec_sld(vector signed int __a,
-                                              vector signed int __b,
-                                              unsigned const int __c) {
+static __inline__ vector signed int __ATTRS_o_ai
+vec_sld(vector signed int __a, vector signed int __b, unsigned const int __c) {
   unsigned char __d = __c & 0x0F;
 #ifdef __LITTLE_ENDIAN__
   return vec_perm(
-      __b, __a,
-      (vector unsigned char)(16 - __d, 17 - __d, 18 - __d, 19 - __d, 20 - __d,
-                             21 - __d, 22 - __d, 23 - __d, 24 - __d, 25 - __d,
-                             26 - __d, 27 - __d, 28 - __d, 29 - __d, 30 - __d,
-                             31 - __d));
+      __b, __a, (vector unsigned char)(16 - __d, 17 - __d, 18 - __d, 19 - __d,
+                                       20 - __d, 21 - __d, 22 - __d, 23 - __d,
+                                       24 - __d, 25 - __d, 26 - __d, 27 - __d,
+                                       28 - __d, 29 - __d, 30 - __d, 31 - __d));
 #else
   return vec_perm(
       __a, __b,
@@ -6846,17 +6928,15 @@ static vector signed int __ATTRS_o_ai vec_sld(vector signed int __a,
 #endif
 }
 
-static vector unsigned int __ATTRS_o_ai vec_sld(vector unsigned int __a,
-                                                vector unsigned int __b,
-                                                unsigned const int __c) {
+static __inline__ vector unsigned int __ATTRS_o_ai vec_sld(
+    vector unsigned int __a, vector unsigned int __b, unsigned const int __c) {
   unsigned char __d = __c & 0x0F;
 #ifdef __LITTLE_ENDIAN__
   return vec_perm(
-      __b, __a,
-      (vector unsigned char)(16 - __d, 17 - __d, 18 - __d, 19 - __d, 20 - __d,
-                             21 - __d, 22 - __d, 23 - __d, 24 - __d, 25 - __d,
-                             26 - __d, 27 - __d, 28 - __d, 29 - __d, 30 - __d,
-                             31 - __d));
+      __b, __a, (vector unsigned char)(16 - __d, 17 - __d, 18 - __d, 19 - __d,
+                                       20 - __d, 21 - __d, 22 - __d, 23 - __d,
+                                       24 - __d, 25 - __d, 26 - __d, 27 - __d,
+                                       28 - __d, 29 - __d, 30 - __d, 31 - __d));
 #else
   return vec_perm(
       __a, __b,
@@ -6866,17 +6946,16 @@ static vector unsigned int __ATTRS_o_ai vec_sld(vector unsigned int __a,
 #endif
 }
 
-static vector bool int __ATTRS_o_ai vec_sld(vector bool int __a,
-                                            vector bool int __b,
-                                            unsigned const int __c) {
+static __inline__ vector bool int __ATTRS_o_ai vec_sld(vector bool int __a,
+                                                       vector bool int __b,
+                                                       unsigned const int __c) {
   unsigned char __d = __c & 0x0F;
 #ifdef __LITTLE_ENDIAN__
   return vec_perm(
-      __b, __a,
-      (vector unsigned char)(16 - __d, 17 - __d, 18 - __d, 19 - __d, 20 - __d,
-                             21 - __d, 22 - __d, 23 - __d, 24 - __d, 25 - __d,
-                             26 - __d, 27 - __d, 28 - __d, 29 - __d, 30 - __d,
-                             31 - __d));
+      __b, __a, (vector unsigned char)(16 - __d, 17 - __d, 18 - __d, 19 - __d,
+                                       20 - __d, 21 - __d, 22 - __d, 23 - __d,
+                                       24 - __d, 25 - __d, 26 - __d, 27 - __d,
+                                       28 - __d, 29 - __d, 30 - __d, 31 - __d));
 #else
   return vec_perm(
       __a, __b,
@@ -6886,16 +6965,16 @@ static vector bool int __ATTRS_o_ai vec_sld(vector bool int __a,
 #endif
 }
 
-static vector float __ATTRS_o_ai vec_sld(vector float __a, vector float __b,
-                                         unsigned const int __c) {
+static __inline__ vector float __ATTRS_o_ai vec_sld(vector float __a,
+                                                    vector float __b,
+                                                    unsigned const int __c) {
   unsigned char __d = __c & 0x0F;
 #ifdef __LITTLE_ENDIAN__
   return vec_perm(
-      __b, __a,
-      (vector unsigned char)(16 - __d, 17 - __d, 18 - __d, 19 - __d, 20 - __d,
-                             21 - __d, 22 - __d, 23 - __d, 24 - __d, 25 - __d,
-                             26 - __d, 27 - __d, 28 - __d, 29 - __d, 30 - __d,
-                             31 - __d));
+      __b, __a, (vector unsigned char)(16 - __d, 17 - __d, 18 - __d, 19 - __d,
+                                       20 - __d, 21 - __d, 22 - __d, 23 - __d,
+                                       24 - __d, 25 - __d, 26 - __d, 27 - __d,
+                                       28 - __d, 29 - __d, 30 - __d, 31 - __d));
 #else
   return vec_perm(
       __a, __b,
@@ -6907,17 +6986,15 @@ static vector float __ATTRS_o_ai vec_sld(vector float __a, vector float __b,
 
 /* vec_vsldoi */
 
-static vector signed char __ATTRS_o_ai vec_vsldoi(vector signed char __a,
-                                                  vector signed char __b,
-                                                  unsigned char __c) {
+static __inline__ vector signed char __ATTRS_o_ai
+vec_vsldoi(vector signed char __a, vector signed char __b, unsigned char __c) {
   unsigned char __d = __c & 0x0F;
 #ifdef __LITTLE_ENDIAN__
   return vec_perm(
-      __b, __a,
-      (vector unsigned char)(16 - __d, 17 - __d, 18 - __d, 19 - __d, 20 - __d,
-                             21 - __d, 22 - __d, 23 - __d, 24 - __d, 25 - __d,
-                             26 - __d, 27 - __d, 28 - __d, 29 - __d, 30 - __d,
-                             31 - __d));
+      __b, __a, (vector unsigned char)(16 - __d, 17 - __d, 18 - __d, 19 - __d,
+                                       20 - __d, 21 - __d, 22 - __d, 23 - __d,
+                                       24 - __d, 25 - __d, 26 - __d, 27 - __d,
+                                       28 - __d, 29 - __d, 30 - __d, 31 - __d));
 #else
   return vec_perm(
       __a, __b,
@@ -6927,17 +7004,15 @@ static vector signed char __ATTRS_o_ai vec_vsldoi(vector signed char __a,
 #endif
 }
 
-static vector unsigned char __ATTRS_o_ai vec_vsldoi(vector unsigned char __a,
-                                                    vector unsigned char __b,
-                                                    unsigned char __c) {
+static __inline__ vector unsigned char __ATTRS_o_ai vec_vsldoi(
+    vector unsigned char __a, vector unsigned char __b, unsigned char __c) {
   unsigned char __d = __c & 0x0F;
 #ifdef __LITTLE_ENDIAN__
   return vec_perm(
-      __b, __a,
-      (vector unsigned char)(16 - __d, 17 - __d, 18 - __d, 19 - __d, 20 - __d,
-                             21 - __d, 22 - __d, 23 - __d, 24 - __d, 25 - __d,
-                             26 - __d, 27 - __d, 28 - __d, 29 - __d, 30 - __d,
-                             31 - __d));
+      __b, __a, (vector unsigned char)(16 - __d, 17 - __d, 18 - __d, 19 - __d,
+                                       20 - __d, 21 - __d, 22 - __d, 23 - __d,
+                                       24 - __d, 25 - __d, 26 - __d, 27 - __d,
+                                       28 - __d, 29 - __d, 30 - __d, 31 - __d));
 #else
   return vec_perm(
       __a, __b,
@@ -6947,16 +7022,16 @@ static vector unsigned char __ATTRS_o_ai vec_vsldoi(vector unsigned char __a,
 #endif
 }
 
-static vector short __ATTRS_o_ai vec_vsldoi(vector short __a, vector short __b,
-                                            unsigned char __c) {
+static __inline__ vector short __ATTRS_o_ai vec_vsldoi(vector short __a,
+                                                       vector short __b,
+                                                       unsigned char __c) {
   unsigned char __d = __c & 0x0F;
 #ifdef __LITTLE_ENDIAN__
   return vec_perm(
-      __b, __a,
-      (vector unsigned char)(16 - __d, 17 - __d, 18 - __d, 19 - __d, 20 - __d,
-                             21 - __d, 22 - __d, 23 - __d, 24 - __d, 25 - __d,
-                             26 - __d, 27 - __d, 28 - __d, 29 - __d, 30 - __d,
-                             31 - __d));
+      __b, __a, (vector unsigned char)(16 - __d, 17 - __d, 18 - __d, 19 - __d,
+                                       20 - __d, 21 - __d, 22 - __d, 23 - __d,
+                                       24 - __d, 25 - __d, 26 - __d, 27 - __d,
+                                       28 - __d, 29 - __d, 30 - __d, 31 - __d));
 #else
   return vec_perm(
       __a, __b,
@@ -6966,17 +7041,15 @@ static vector short __ATTRS_o_ai vec_vsldoi(vector short __a, vector short __b,
 #endif
 }
 
-static vector unsigned short __ATTRS_o_ai vec_vsldoi(vector unsigned short __a,
-                                                     vector unsigned short __b,
-                                                     unsigned char __c) {
+static __inline__ vector unsigned short __ATTRS_o_ai vec_vsldoi(
+    vector unsigned short __a, vector unsigned short __b, unsigned char __c) {
   unsigned char __d = __c & 0x0F;
 #ifdef __LITTLE_ENDIAN__
   return vec_perm(
-      __b, __a,
-      (vector unsigned char)(16 - __d, 17 - __d, 18 - __d, 19 - __d, 20 - __d,
-                             21 - __d, 22 - __d, 23 - __d, 24 - __d, 25 - __d,
-                             26 - __d, 27 - __d, 28 - __d, 29 - __d, 30 - __d,
-                             31 - __d));
+      __b, __a, (vector unsigned char)(16 - __d, 17 - __d, 18 - __d, 19 - __d,
+                                       20 - __d, 21 - __d, 22 - __d, 23 - __d,
+                                       24 - __d, 25 - __d, 26 - __d, 27 - __d,
+                                       28 - __d, 29 - __d, 30 - __d, 31 - __d));
 #else
   return vec_perm(
       __a, __b,
@@ -6986,16 +7059,16 @@ static vector unsigned short __ATTRS_o_ai vec_vsldoi(vector unsigned short __a,
 #endif
 }
 
-static vector pixel __ATTRS_o_ai vec_vsldoi(vector pixel __a, vector pixel __b,
-                                            unsigned char __c) {
+static __inline__ vector pixel __ATTRS_o_ai vec_vsldoi(vector pixel __a,
+                                                       vector pixel __b,
+                                                       unsigned char __c) {
   unsigned char __d = __c & 0x0F;
 #ifdef __LITTLE_ENDIAN__
   return vec_perm(
-      __b, __a,
-      (vector unsigned char)(16 - __d, 17 - __d, 18 - __d, 19 - __d, 20 - __d,
-                             21 - __d, 22 - __d, 23 - __d, 24 - __d, 25 - __d,
-                             26 - __d, 27 - __d, 28 - __d, 29 - __d, 30 - __d,
-                             31 - __d));
+      __b, __a, (vector unsigned char)(16 - __d, 17 - __d, 18 - __d, 19 - __d,
+                                       20 - __d, 21 - __d, 22 - __d, 23 - __d,
+                                       24 - __d, 25 - __d, 26 - __d, 27 - __d,
+                                       28 - __d, 29 - __d, 30 - __d, 31 - __d));
 #else
   return vec_perm(
       __a, __b,
@@ -7005,16 +7078,16 @@ static vector pixel __ATTRS_o_ai vec_vsldoi(vector pixel __a, vector pixel __b,
 #endif
 }
 
-static vector int __ATTRS_o_ai vec_vsldoi(vector int __a, vector int __b,
-                                          unsigned char __c) {
+static __inline__ vector int __ATTRS_o_ai vec_vsldoi(vector int __a,
+                                                     vector int __b,
+                                                     unsigned char __c) {
   unsigned char __d = __c & 0x0F;
 #ifdef __LITTLE_ENDIAN__
   return vec_perm(
-      __b, __a,
-      (vector unsigned char)(16 - __d, 17 - __d, 18 - __d, 19 - __d, 20 - __d,
-                             21 - __d, 22 - __d, 23 - __d, 24 - __d, 25 - __d,
-                             26 - __d, 27 - __d, 28 - __d, 29 - __d, 30 - __d,
-                             31 - __d));
+      __b, __a, (vector unsigned char)(16 - __d, 17 - __d, 18 - __d, 19 - __d,
+                                       20 - __d, 21 - __d, 22 - __d, 23 - __d,
+                                       24 - __d, 25 - __d, 26 - __d, 27 - __d,
+                                       28 - __d, 29 - __d, 30 - __d, 31 - __d));
 #else
   return vec_perm(
       __a, __b,
@@ -7024,17 +7097,15 @@ static vector int __ATTRS_o_ai vec_vsldoi(vector int __a, vector int __b,
 #endif
 }
 
-static vector unsigned int __ATTRS_o_ai vec_vsldoi(vector unsigned int __a,
-                                                   vector unsigned int __b,
-                                                   unsigned char __c) {
+static __inline__ vector unsigned int __ATTRS_o_ai vec_vsldoi(
+    vector unsigned int __a, vector unsigned int __b, unsigned char __c) {
   unsigned char __d = __c & 0x0F;
 #ifdef __LITTLE_ENDIAN__
   return vec_perm(
-      __b, __a,
-      (vector unsigned char)(16 - __d, 17 - __d, 18 - __d, 19 - __d, 20 - __d,
-                             21 - __d, 22 - __d, 23 - __d, 24 - __d, 25 - __d,
-                             26 - __d, 27 - __d, 28 - __d, 29 - __d, 30 - __d,
-                             31 - __d));
+      __b, __a, (vector unsigned char)(16 - __d, 17 - __d, 18 - __d, 19 - __d,
+                                       20 - __d, 21 - __d, 22 - __d, 23 - __d,
+                                       24 - __d, 25 - __d, 26 - __d, 27 - __d,
+                                       28 - __d, 29 - __d, 30 - __d, 31 - __d));
 #else
   return vec_perm(
       __a, __b,
@@ -7044,16 +7115,16 @@ static vector unsigned int __ATTRS_o_ai vec_vsldoi(vector unsigned int __a,
 #endif
 }
 
-static vector float __ATTRS_o_ai vec_vsldoi(vector float __a, vector float __b,
-                                            unsigned char __c) {
+static __inline__ vector float __ATTRS_o_ai vec_vsldoi(vector float __a,
+                                                       vector float __b,
+                                                       unsigned char __c) {
   unsigned char __d = __c & 0x0F;
 #ifdef __LITTLE_ENDIAN__
   return vec_perm(
-      __b, __a,
-      (vector unsigned char)(16 - __d, 17 - __d, 18 - __d, 19 - __d, 20 - __d,
-                             21 - __d, 22 - __d, 23 - __d, 24 - __d, 25 - __d,
-                             26 - __d, 27 - __d, 28 - __d, 29 - __d, 30 - __d,
-                             31 - __d));
+      __b, __a, (vector unsigned char)(16 - __d, 17 - __d, 18 - __d, 19 - __d,
+                                       20 - __d, 21 - __d, 22 - __d, 23 - __d,
+                                       24 - __d, 25 - __d, 26 - __d, 27 - __d,
+                                       28 - __d, 29 - __d, 30 - __d, 31 - __d));
 #else
   return vec_perm(
       __a, __b,
@@ -7065,654 +7136,655 @@ static vector float __ATTRS_o_ai vec_vsldoi(vector float __a, vector float __b,
 
 /* vec_sll */
 
-static vector signed char __ATTRS_o_ai vec_sll(vector signed char __a,
-                                               vector unsigned char __b) {
+static __inline__ vector signed char __ATTRS_o_ai
+vec_sll(vector signed char __a, vector unsigned char __b) {
   return (vector signed char)__builtin_altivec_vsl((vector int)__a,
                                                    (vector int)__b);
 }
 
-static vector signed char __ATTRS_o_ai vec_sll(vector signed char __a,
-                                               vector unsigned short __b) {
+static __inline__ vector signed char __ATTRS_o_ai
+vec_sll(vector signed char __a, vector unsigned short __b) {
   return (vector signed char)__builtin_altivec_vsl((vector int)__a,
                                                    (vector int)__b);
 }
 
-static vector signed char __ATTRS_o_ai vec_sll(vector signed char __a,
-                                               vector unsigned int __b) {
+static __inline__ vector signed char __ATTRS_o_ai
+vec_sll(vector signed char __a, vector unsigned int __b) {
   return (vector signed char)__builtin_altivec_vsl((vector int)__a,
                                                    (vector int)__b);
 }
 
-static vector unsigned char __ATTRS_o_ai vec_sll(vector unsigned char __a,
-                                                 vector unsigned char __b) {
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_sll(vector unsigned char __a, vector unsigned char __b) {
   return (vector unsigned char)__builtin_altivec_vsl((vector int)__a,
                                                      (vector int)__b);
 }
 
-static vector unsigned char __ATTRS_o_ai vec_sll(vector unsigned char __a,
-                                                 vector unsigned short __b) {
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_sll(vector unsigned char __a, vector unsigned short __b) {
   return (vector unsigned char)__builtin_altivec_vsl((vector int)__a,
                                                      (vector int)__b);
 }
 
-static vector unsigned char __ATTRS_o_ai vec_sll(vector unsigned char __a,
-                                                 vector unsigned int __b) {
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_sll(vector unsigned char __a, vector unsigned int __b) {
   return (vector unsigned char)__builtin_altivec_vsl((vector int)__a,
                                                      (vector int)__b);
 }
 
-static vector bool char __ATTRS_o_ai vec_sll(vector bool char __a,
-                                             vector unsigned char __b) {
+static __inline__ vector bool char __ATTRS_o_ai
+vec_sll(vector bool char __a, vector unsigned char __b) {
   return (vector bool char)__builtin_altivec_vsl((vector int)__a,
                                                  (vector int)__b);
 }
 
-static vector bool char __ATTRS_o_ai vec_sll(vector bool char __a,
-                                             vector unsigned short __b) {
+static __inline__ vector bool char __ATTRS_o_ai
+vec_sll(vector bool char __a, vector unsigned short __b) {
   return (vector bool char)__builtin_altivec_vsl((vector int)__a,
                                                  (vector int)__b);
 }
 
-static vector bool char __ATTRS_o_ai vec_sll(vector bool char __a,
-                                             vector unsigned int __b) {
+static __inline__ vector bool char __ATTRS_o_ai
+vec_sll(vector bool char __a, vector unsigned int __b) {
   return (vector bool char)__builtin_altivec_vsl((vector int)__a,
                                                  (vector int)__b);
 }
 
-static vector short __ATTRS_o_ai vec_sll(vector short __a,
-                                         vector unsigned char __b) {
+static __inline__ vector short __ATTRS_o_ai vec_sll(vector short __a,
+                                                    vector unsigned char __b) {
   return (vector short)__builtin_altivec_vsl((vector int)__a, (vector int)__b);
 }
 
-static vector short __ATTRS_o_ai vec_sll(vector short __a,
-                                         vector unsigned short __b) {
+static __inline__ vector short __ATTRS_o_ai vec_sll(vector short __a,
+                                                    vector unsigned short __b) {
   return (vector short)__builtin_altivec_vsl((vector int)__a, (vector int)__b);
 }
 
-static vector short __ATTRS_o_ai vec_sll(vector short __a,
-                                         vector unsigned int __b) {
+static __inline__ vector short __ATTRS_o_ai vec_sll(vector short __a,
+                                                    vector unsigned int __b) {
   return (vector short)__builtin_altivec_vsl((vector int)__a, (vector int)__b);
 }
 
-static vector unsigned short __ATTRS_o_ai vec_sll(vector unsigned short __a,
-                                                  vector unsigned char __b) {
+static __inline__ vector unsigned short __ATTRS_o_ai
+vec_sll(vector unsigned short __a, vector unsigned char __b) {
   return (vector unsigned short)__builtin_altivec_vsl((vector int)__a,
                                                       (vector int)__b);
 }
 
-static vector unsigned short __ATTRS_o_ai vec_sll(vector unsigned short __a,
-                                                  vector unsigned short __b) {
+static __inline__ vector unsigned short __ATTRS_o_ai
+vec_sll(vector unsigned short __a, vector unsigned short __b) {
   return (vector unsigned short)__builtin_altivec_vsl((vector int)__a,
                                                       (vector int)__b);
 }
 
-static vector unsigned short __ATTRS_o_ai vec_sll(vector unsigned short __a,
-                                                  vector unsigned int __b) {
+static __inline__ vector unsigned short __ATTRS_o_ai
+vec_sll(vector unsigned short __a, vector unsigned int __b) {
   return (vector unsigned short)__builtin_altivec_vsl((vector int)__a,
                                                       (vector int)__b);
 }
 
-static vector bool short __ATTRS_o_ai vec_sll(vector bool short __a,
-                                              vector unsigned char __b) {
+static __inline__ vector bool short __ATTRS_o_ai
+vec_sll(vector bool short __a, vector unsigned char __b) {
   return (vector bool short)__builtin_altivec_vsl((vector int)__a,
                                                   (vector int)__b);
 }
 
-static vector bool short __ATTRS_o_ai vec_sll(vector bool short __a,
-                                              vector unsigned short __b) {
+static __inline__ vector bool short __ATTRS_o_ai
+vec_sll(vector bool short __a, vector unsigned short __b) {
   return (vector bool short)__builtin_altivec_vsl((vector int)__a,
                                                   (vector int)__b);
 }
 
-static vector bool short __ATTRS_o_ai vec_sll(vector bool short __a,
-                                              vector unsigned int __b) {
+static __inline__ vector bool short __ATTRS_o_ai
+vec_sll(vector bool short __a, vector unsigned int __b) {
   return (vector bool short)__builtin_altivec_vsl((vector int)__a,
                                                   (vector int)__b);
 }
 
-static vector pixel __ATTRS_o_ai vec_sll(vector pixel __a,
-                                         vector unsigned char __b) {
+static __inline__ vector pixel __ATTRS_o_ai vec_sll(vector pixel __a,
+                                                    vector unsigned char __b) {
   return (vector pixel)__builtin_altivec_vsl((vector int)__a, (vector int)__b);
 }
 
-static vector pixel __ATTRS_o_ai vec_sll(vector pixel __a,
-                                         vector unsigned short __b) {
+static __inline__ vector pixel __ATTRS_o_ai vec_sll(vector pixel __a,
+                                                    vector unsigned short __b) {
   return (vector pixel)__builtin_altivec_vsl((vector int)__a, (vector int)__b);
 }
 
-static vector pixel __ATTRS_o_ai vec_sll(vector pixel __a,
-                                         vector unsigned int __b) {
+static __inline__ vector pixel __ATTRS_o_ai vec_sll(vector pixel __a,
+                                                    vector unsigned int __b) {
   return (vector pixel)__builtin_altivec_vsl((vector int)__a, (vector int)__b);
 }
 
-static vector int __ATTRS_o_ai vec_sll(vector int __a,
-                                       vector unsigned char __b) {
+static __inline__ vector int __ATTRS_o_ai vec_sll(vector int __a,
+                                                  vector unsigned char __b) {
   return (vector int)__builtin_altivec_vsl(__a, (vector int)__b);
 }
 
-static vector int __ATTRS_o_ai vec_sll(vector int __a,
-                                       vector unsigned short __b) {
+static __inline__ vector int __ATTRS_o_ai vec_sll(vector int __a,
+                                                  vector unsigned short __b) {
   return (vector int)__builtin_altivec_vsl(__a, (vector int)__b);
 }
 
-static vector int __ATTRS_o_ai vec_sll(vector int __a,
-                                       vector unsigned int __b) {
+static __inline__ vector int __ATTRS_o_ai vec_sll(vector int __a,
+                                                  vector unsigned int __b) {
   return (vector int)__builtin_altivec_vsl(__a, (vector int)__b);
 }
 
-static vector unsigned int __ATTRS_o_ai vec_sll(vector unsigned int __a,
-                                                vector unsigned char __b) {
+static __inline__ vector unsigned int __ATTRS_o_ai
+vec_sll(vector unsigned int __a, vector unsigned char __b) {
   return (vector unsigned int)__builtin_altivec_vsl((vector int)__a,
                                                     (vector int)__b);
 }
 
-static vector unsigned int __ATTRS_o_ai vec_sll(vector unsigned int __a,
-                                                vector unsigned short __b) {
+static __inline__ vector unsigned int __ATTRS_o_ai
+vec_sll(vector unsigned int __a, vector unsigned short __b) {
   return (vector unsigned int)__builtin_altivec_vsl((vector int)__a,
                                                     (vector int)__b);
 }
 
-static vector unsigned int __ATTRS_o_ai vec_sll(vector unsigned int __a,
-                                                vector unsigned int __b) {
+static __inline__ vector unsigned int __ATTRS_o_ai
+vec_sll(vector unsigned int __a, vector unsigned int __b) {
   return (vector unsigned int)__builtin_altivec_vsl((vector int)__a,
                                                     (vector int)__b);
 }
 
-static vector bool int __ATTRS_o_ai vec_sll(vector bool int __a,
-                                            vector unsigned char __b) {
+static __inline__ vector bool int __ATTRS_o_ai
+vec_sll(vector bool int __a, vector unsigned char __b) {
   return (vector bool int)__builtin_altivec_vsl((vector int)__a,
                                                 (vector int)__b);
 }
 
-static vector bool int __ATTRS_o_ai vec_sll(vector bool int __a,
-                                            vector unsigned short __b) {
+static __inline__ vector bool int __ATTRS_o_ai
+vec_sll(vector bool int __a, vector unsigned short __b) {
   return (vector bool int)__builtin_altivec_vsl((vector int)__a,
                                                 (vector int)__b);
 }
 
-static vector bool int __ATTRS_o_ai vec_sll(vector bool int __a,
-                                            vector unsigned int __b) {
+static __inline__ vector bool int __ATTRS_o_ai
+vec_sll(vector bool int __a, vector unsigned int __b) {
   return (vector bool int)__builtin_altivec_vsl((vector int)__a,
                                                 (vector int)__b);
 }
 
 /* vec_vsl */
 
-static vector signed char __ATTRS_o_ai vec_vsl(vector signed char __a,
-                                               vector unsigned char __b) {
+static __inline__ vector signed char __ATTRS_o_ai
+vec_vsl(vector signed char __a, vector unsigned char __b) {
   return (vector signed char)__builtin_altivec_vsl((vector int)__a,
                                                    (vector int)__b);
 }
 
-static vector signed char __ATTRS_o_ai vec_vsl(vector signed char __a,
-                                               vector unsigned short __b) {
+static __inline__ vector signed char __ATTRS_o_ai
+vec_vsl(vector signed char __a, vector unsigned short __b) {
   return (vector signed char)__builtin_altivec_vsl((vector int)__a,
                                                    (vector int)__b);
 }
 
-static vector signed char __ATTRS_o_ai vec_vsl(vector signed char __a,
-                                               vector unsigned int __b) {
+static __inline__ vector signed char __ATTRS_o_ai
+vec_vsl(vector signed char __a, vector unsigned int __b) {
   return (vector signed char)__builtin_altivec_vsl((vector int)__a,
                                                    (vector int)__b);
 }
 
-static vector unsigned char __ATTRS_o_ai vec_vsl(vector unsigned char __a,
-                                                 vector unsigned char __b) {
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_vsl(vector unsigned char __a, vector unsigned char __b) {
   return (vector unsigned char)__builtin_altivec_vsl((vector int)__a,
                                                      (vector int)__b);
 }
 
-static vector unsigned char __ATTRS_o_ai vec_vsl(vector unsigned char __a,
-                                                 vector unsigned short __b) {
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_vsl(vector unsigned char __a, vector unsigned short __b) {
   return (vector unsigned char)__builtin_altivec_vsl((vector int)__a,
                                                      (vector int)__b);
 }
 
-static vector unsigned char __ATTRS_o_ai vec_vsl(vector unsigned char __a,
-                                                 vector unsigned int __b) {
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_vsl(vector unsigned char __a, vector unsigned int __b) {
   return (vector unsigned char)__builtin_altivec_vsl((vector int)__a,
                                                      (vector int)__b);
 }
 
-static vector bool char __ATTRS_o_ai vec_vsl(vector bool char __a,
-                                             vector unsigned char __b) {
+static __inline__ vector bool char __ATTRS_o_ai
+vec_vsl(vector bool char __a, vector unsigned char __b) {
   return (vector bool char)__builtin_altivec_vsl((vector int)__a,
                                                  (vector int)__b);
 }
 
-static vector bool char __ATTRS_o_ai vec_vsl(vector bool char __a,
-                                             vector unsigned short __b) {
+static __inline__ vector bool char __ATTRS_o_ai
+vec_vsl(vector bool char __a, vector unsigned short __b) {
   return (vector bool char)__builtin_altivec_vsl((vector int)__a,
                                                  (vector int)__b);
 }
 
-static vector bool char __ATTRS_o_ai vec_vsl(vector bool char __a,
-                                             vector unsigned int __b) {
+static __inline__ vector bool char __ATTRS_o_ai
+vec_vsl(vector bool char __a, vector unsigned int __b) {
   return (vector bool char)__builtin_altivec_vsl((vector int)__a,
                                                  (vector int)__b);
 }
 
-static vector short __ATTRS_o_ai vec_vsl(vector short __a,
-                                         vector unsigned char __b) {
+static __inline__ vector short __ATTRS_o_ai vec_vsl(vector short __a,
+                                                    vector unsigned char __b) {
   return (vector short)__builtin_altivec_vsl((vector int)__a, (vector int)__b);
 }
 
-static vector short __ATTRS_o_ai vec_vsl(vector short __a,
-                                         vector unsigned short __b) {
+static __inline__ vector short __ATTRS_o_ai vec_vsl(vector short __a,
+                                                    vector unsigned short __b) {
   return (vector short)__builtin_altivec_vsl((vector int)__a, (vector int)__b);
 }
 
-static vector short __ATTRS_o_ai vec_vsl(vector short __a,
-                                         vector unsigned int __b) {
+static __inline__ vector short __ATTRS_o_ai vec_vsl(vector short __a,
+                                                    vector unsigned int __b) {
   return (vector short)__builtin_altivec_vsl((vector int)__a, (vector int)__b);
 }
 
-static vector unsigned short __ATTRS_o_ai vec_vsl(vector unsigned short __a,
-                                                  vector unsigned char __b) {
+static __inline__ vector unsigned short __ATTRS_o_ai
+vec_vsl(vector unsigned short __a, vector unsigned char __b) {
   return (vector unsigned short)__builtin_altivec_vsl((vector int)__a,
                                                       (vector int)__b);
 }
 
-static vector unsigned short __ATTRS_o_ai vec_vsl(vector unsigned short __a,
-                                                  vector unsigned short __b) {
+static __inline__ vector unsigned short __ATTRS_o_ai
+vec_vsl(vector unsigned short __a, vector unsigned short __b) {
   return (vector unsigned short)__builtin_altivec_vsl((vector int)__a,
                                                       (vector int)__b);
 }
 
-static vector unsigned short __ATTRS_o_ai vec_vsl(vector unsigned short __a,
-                                                  vector unsigned int __b) {
+static __inline__ vector unsigned short __ATTRS_o_ai
+vec_vsl(vector unsigned short __a, vector unsigned int __b) {
   return (vector unsigned short)__builtin_altivec_vsl((vector int)__a,
                                                       (vector int)__b);
 }
 
-static vector bool short __ATTRS_o_ai vec_vsl(vector bool short __a,
-                                              vector unsigned char __b) {
+static __inline__ vector bool short __ATTRS_o_ai
+vec_vsl(vector bool short __a, vector unsigned char __b) {
   return (vector bool short)__builtin_altivec_vsl((vector int)__a,
                                                   (vector int)__b);
 }
 
-static vector bool short __ATTRS_o_ai vec_vsl(vector bool short __a,
-                                              vector unsigned short __b) {
+static __inline__ vector bool short __ATTRS_o_ai
+vec_vsl(vector bool short __a, vector unsigned short __b) {
   return (vector bool short)__builtin_altivec_vsl((vector int)__a,
                                                   (vector int)__b);
 }
 
-static vector bool short __ATTRS_o_ai vec_vsl(vector bool short __a,
-                                              vector unsigned int __b) {
+static __inline__ vector bool short __ATTRS_o_ai
+vec_vsl(vector bool short __a, vector unsigned int __b) {
   return (vector bool short)__builtin_altivec_vsl((vector int)__a,
                                                   (vector int)__b);
 }
 
-static vector pixel __ATTRS_o_ai vec_vsl(vector pixel __a,
-                                         vector unsigned char __b) {
+static __inline__ vector pixel __ATTRS_o_ai vec_vsl(vector pixel __a,
+                                                    vector unsigned char __b) {
   return (vector pixel)__builtin_altivec_vsl((vector int)__a, (vector int)__b);
 }
 
-static vector pixel __ATTRS_o_ai vec_vsl(vector pixel __a,
-                                         vector unsigned short __b) {
+static __inline__ vector pixel __ATTRS_o_ai vec_vsl(vector pixel __a,
+                                                    vector unsigned short __b) {
   return (vector pixel)__builtin_altivec_vsl((vector int)__a, (vector int)__b);
 }
 
-static vector pixel __ATTRS_o_ai vec_vsl(vector pixel __a,
-                                         vector unsigned int __b) {
+static __inline__ vector pixel __ATTRS_o_ai vec_vsl(vector pixel __a,
+                                                    vector unsigned int __b) {
   return (vector pixel)__builtin_altivec_vsl((vector int)__a, (vector int)__b);
 }
 
-static vector int __ATTRS_o_ai vec_vsl(vector int __a,
-                                       vector unsigned char __b) {
+static __inline__ vector int __ATTRS_o_ai vec_vsl(vector int __a,
+                                                  vector unsigned char __b) {
   return (vector int)__builtin_altivec_vsl(__a, (vector int)__b);
 }
 
-static vector int __ATTRS_o_ai vec_vsl(vector int __a,
-                                       vector unsigned short __b) {
+static __inline__ vector int __ATTRS_o_ai vec_vsl(vector int __a,
+                                                  vector unsigned short __b) {
   return (vector int)__builtin_altivec_vsl(__a, (vector int)__b);
 }
 
-static vector int __ATTRS_o_ai vec_vsl(vector int __a,
-                                       vector unsigned int __b) {
+static __inline__ vector int __ATTRS_o_ai vec_vsl(vector int __a,
+                                                  vector unsigned int __b) {
   return (vector int)__builtin_altivec_vsl(__a, (vector int)__b);
 }
 
-static vector unsigned int __ATTRS_o_ai vec_vsl(vector unsigned int __a,
-                                                vector unsigned char __b) {
+static __inline__ vector unsigned int __ATTRS_o_ai
+vec_vsl(vector unsigned int __a, vector unsigned char __b) {
   return (vector unsigned int)__builtin_altivec_vsl((vector int)__a,
                                                     (vector int)__b);
 }
 
-static vector unsigned int __ATTRS_o_ai vec_vsl(vector unsigned int __a,
-                                                vector unsigned short __b) {
+static __inline__ vector unsigned int __ATTRS_o_ai
+vec_vsl(vector unsigned int __a, vector unsigned short __b) {
   return (vector unsigned int)__builtin_altivec_vsl((vector int)__a,
                                                     (vector int)__b);
 }
 
-static vector unsigned int __ATTRS_o_ai vec_vsl(vector unsigned int __a,
-                                                vector unsigned int __b) {
+static __inline__ vector unsigned int __ATTRS_o_ai
+vec_vsl(vector unsigned int __a, vector unsigned int __b) {
   return (vector unsigned int)__builtin_altivec_vsl((vector int)__a,
                                                     (vector int)__b);
 }
 
-static vector bool int __ATTRS_o_ai vec_vsl(vector bool int __a,
-                                            vector unsigned char __b) {
+static __inline__ vector bool int __ATTRS_o_ai
+vec_vsl(vector bool int __a, vector unsigned char __b) {
   return (vector bool int)__builtin_altivec_vsl((vector int)__a,
                                                 (vector int)__b);
 }
 
-static vector bool int __ATTRS_o_ai vec_vsl(vector bool int __a,
-                                            vector unsigned short __b) {
+static __inline__ vector bool int __ATTRS_o_ai
+vec_vsl(vector bool int __a, vector unsigned short __b) {
   return (vector bool int)__builtin_altivec_vsl((vector int)__a,
                                                 (vector int)__b);
 }
 
-static vector bool int __ATTRS_o_ai vec_vsl(vector bool int __a,
-                                            vector unsigned int __b) {
+static __inline__ vector bool int __ATTRS_o_ai
+vec_vsl(vector bool int __a, vector unsigned int __b) {
   return (vector bool int)__builtin_altivec_vsl((vector int)__a,
                                                 (vector int)__b);
 }
 
 /* vec_slo */
 
-static vector signed char __ATTRS_o_ai vec_slo(vector signed char __a,
-                                               vector signed char __b) {
+static __inline__ vector signed char __ATTRS_o_ai
+vec_slo(vector signed char __a, vector signed char __b) {
   return (vector signed char)__builtin_altivec_vslo((vector int)__a,
                                                     (vector int)__b);
 }
 
-static vector signed char __ATTRS_o_ai vec_slo(vector signed char __a,
-                                               vector unsigned char __b) {
+static __inline__ vector signed char __ATTRS_o_ai
+vec_slo(vector signed char __a, vector unsigned char __b) {
   return (vector signed char)__builtin_altivec_vslo((vector int)__a,
                                                     (vector int)__b);
 }
 
-static vector unsigned char __ATTRS_o_ai vec_slo(vector unsigned char __a,
-                                                 vector signed char __b) {
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_slo(vector unsigned char __a, vector signed char __b) {
   return (vector unsigned char)__builtin_altivec_vslo((vector int)__a,
                                                       (vector int)__b);
 }
 
-static vector unsigned char __ATTRS_o_ai vec_slo(vector unsigned char __a,
-                                                 vector unsigned char __b) {
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_slo(vector unsigned char __a, vector unsigned char __b) {
   return (vector unsigned char)__builtin_altivec_vslo((vector int)__a,
                                                       (vector int)__b);
 }
 
-static vector short __ATTRS_o_ai vec_slo(vector short __a,
-                                         vector signed char __b) {
+static __inline__ vector short __ATTRS_o_ai vec_slo(vector short __a,
+                                                    vector signed char __b) {
   return (vector short)__builtin_altivec_vslo((vector int)__a, (vector int)__b);
 }
 
-static vector short __ATTRS_o_ai vec_slo(vector short __a,
-                                         vector unsigned char __b) {
+static __inline__ vector short __ATTRS_o_ai vec_slo(vector short __a,
+                                                    vector unsigned char __b) {
   return (vector short)__builtin_altivec_vslo((vector int)__a, (vector int)__b);
 }
 
-static vector unsigned short __ATTRS_o_ai vec_slo(vector unsigned short __a,
-                                                  vector signed char __b) {
+static __inline__ vector unsigned short __ATTRS_o_ai
+vec_slo(vector unsigned short __a, vector signed char __b) {
   return (vector unsigned short)__builtin_altivec_vslo((vector int)__a,
                                                        (vector int)__b);
 }
 
-static vector unsigned short __ATTRS_o_ai vec_slo(vector unsigned short __a,
-                                                  vector unsigned char __b) {
+static __inline__ vector unsigned short __ATTRS_o_ai
+vec_slo(vector unsigned short __a, vector unsigned char __b) {
   return (vector unsigned short)__builtin_altivec_vslo((vector int)__a,
                                                        (vector int)__b);
 }
 
-static vector pixel __ATTRS_o_ai vec_slo(vector pixel __a,
-                                         vector signed char __b) {
+static __inline__ vector pixel __ATTRS_o_ai vec_slo(vector pixel __a,
+                                                    vector signed char __b) {
   return (vector pixel)__builtin_altivec_vslo((vector int)__a, (vector int)__b);
 }
 
-static vector pixel __ATTRS_o_ai vec_slo(vector pixel __a,
-                                         vector unsigned char __b) {
+static __inline__ vector pixel __ATTRS_o_ai vec_slo(vector pixel __a,
+                                                    vector unsigned char __b) {
   return (vector pixel)__builtin_altivec_vslo((vector int)__a, (vector int)__b);
 }
 
-static vector int __ATTRS_o_ai vec_slo(vector int __a, vector signed char __b) {
+static __inline__ vector int __ATTRS_o_ai vec_slo(vector int __a,
+                                                  vector signed char __b) {
   return (vector int)__builtin_altivec_vslo(__a, (vector int)__b);
 }
 
-static vector int __ATTRS_o_ai vec_slo(vector int __a,
-                                       vector unsigned char __b) {
+static __inline__ vector int __ATTRS_o_ai vec_slo(vector int __a,
+                                                  vector unsigned char __b) {
   return (vector int)__builtin_altivec_vslo(__a, (vector int)__b);
 }
 
-static vector unsigned int __ATTRS_o_ai vec_slo(vector unsigned int __a,
-                                                vector signed char __b) {
+static __inline__ vector unsigned int __ATTRS_o_ai
+vec_slo(vector unsigned int __a, vector signed char __b) {
   return (vector unsigned int)__builtin_altivec_vslo((vector int)__a,
                                                      (vector int)__b);
 }
 
-static vector unsigned int __ATTRS_o_ai vec_slo(vector unsigned int __a,
-                                                vector unsigned char __b) {
+static __inline__ vector unsigned int __ATTRS_o_ai
+vec_slo(vector unsigned int __a, vector unsigned char __b) {
   return (vector unsigned int)__builtin_altivec_vslo((vector int)__a,
                                                      (vector int)__b);
 }
 
-static vector float __ATTRS_o_ai vec_slo(vector float __a,
-                                         vector signed char __b) {
+static __inline__ vector float __ATTRS_o_ai vec_slo(vector float __a,
+                                                    vector signed char __b) {
   return (vector float)__builtin_altivec_vslo((vector int)__a, (vector int)__b);
 }
 
-static vector float __ATTRS_o_ai vec_slo(vector float __a,
-                                         vector unsigned char __b) {
+static __inline__ vector float __ATTRS_o_ai vec_slo(vector float __a,
+                                                    vector unsigned char __b) {
   return (vector float)__builtin_altivec_vslo((vector int)__a, (vector int)__b);
 }
 
 /* vec_vslo */
 
-static vector signed char __ATTRS_o_ai vec_vslo(vector signed char __a,
-                                                vector signed char __b) {
+static __inline__ vector signed char __ATTRS_o_ai
+vec_vslo(vector signed char __a, vector signed char __b) {
   return (vector signed char)__builtin_altivec_vslo((vector int)__a,
                                                     (vector int)__b);
 }
 
-static vector signed char __ATTRS_o_ai vec_vslo(vector signed char __a,
-                                                vector unsigned char __b) {
+static __inline__ vector signed char __ATTRS_o_ai
+vec_vslo(vector signed char __a, vector unsigned char __b) {
   return (vector signed char)__builtin_altivec_vslo((vector int)__a,
                                                     (vector int)__b);
 }
 
-static vector unsigned char __ATTRS_o_ai vec_vslo(vector unsigned char __a,
-                                                  vector signed char __b) {
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_vslo(vector unsigned char __a, vector signed char __b) {
   return (vector unsigned char)__builtin_altivec_vslo((vector int)__a,
                                                       (vector int)__b);
 }
 
-static vector unsigned char __ATTRS_o_ai vec_vslo(vector unsigned char __a,
-                                                  vector unsigned char __b) {
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_vslo(vector unsigned char __a, vector unsigned char __b) {
   return (vector unsigned char)__builtin_altivec_vslo((vector int)__a,
                                                       (vector int)__b);
 }
 
-static vector short __ATTRS_o_ai vec_vslo(vector short __a,
-                                          vector signed char __b) {
+static __inline__ vector short __ATTRS_o_ai vec_vslo(vector short __a,
+                                                     vector signed char __b) {
   return (vector short)__builtin_altivec_vslo((vector int)__a, (vector int)__b);
 }
 
-static vector short __ATTRS_o_ai vec_vslo(vector short __a,
-                                          vector unsigned char __b) {
+static __inline__ vector short __ATTRS_o_ai vec_vslo(vector short __a,
+                                                     vector unsigned char __b) {
   return (vector short)__builtin_altivec_vslo((vector int)__a, (vector int)__b);
 }
 
-static vector unsigned short __ATTRS_o_ai vec_vslo(vector unsigned short __a,
-                                                   vector signed char __b) {
+static __inline__ vector unsigned short __ATTRS_o_ai
+vec_vslo(vector unsigned short __a, vector signed char __b) {
   return (vector unsigned short)__builtin_altivec_vslo((vector int)__a,
                                                        (vector int)__b);
 }
 
-static vector unsigned short __ATTRS_o_ai vec_vslo(vector unsigned short __a,
-                                                   vector unsigned char __b) {
+static __inline__ vector unsigned short __ATTRS_o_ai
+vec_vslo(vector unsigned short __a, vector unsigned char __b) {
   return (vector unsigned short)__builtin_altivec_vslo((vector int)__a,
                                                        (vector int)__b);
 }
 
-static vector pixel __ATTRS_o_ai vec_vslo(vector pixel __a,
-                                          vector signed char __b) {
+static __inline__ vector pixel __ATTRS_o_ai vec_vslo(vector pixel __a,
+                                                     vector signed char __b) {
   return (vector pixel)__builtin_altivec_vslo((vector int)__a, (vector int)__b);
 }
 
-static vector pixel __ATTRS_o_ai vec_vslo(vector pixel __a,
-                                          vector unsigned char __b) {
+static __inline__ vector pixel __ATTRS_o_ai vec_vslo(vector pixel __a,
+                                                     vector unsigned char __b) {
   return (vector pixel)__builtin_altivec_vslo((vector int)__a, (vector int)__b);
 }
 
-static vector int __ATTRS_o_ai vec_vslo(vector int __a,
-                                        vector signed char __b) {
+static __inline__ vector int __ATTRS_o_ai vec_vslo(vector int __a,
+                                                   vector signed char __b) {
   return (vector int)__builtin_altivec_vslo(__a, (vector int)__b);
 }
 
-static vector int __ATTRS_o_ai vec_vslo(vector int __a,
-                                        vector unsigned char __b) {
+static __inline__ vector int __ATTRS_o_ai vec_vslo(vector int __a,
+                                                   vector unsigned char __b) {
   return (vector int)__builtin_altivec_vslo(__a, (vector int)__b);
 }
 
-static vector unsigned int __ATTRS_o_ai vec_vslo(vector unsigned int __a,
-                                                 vector signed char __b) {
+static __inline__ vector unsigned int __ATTRS_o_ai
+vec_vslo(vector unsigned int __a, vector signed char __b) {
   return (vector unsigned int)__builtin_altivec_vslo((vector int)__a,
                                                      (vector int)__b);
 }
 
-static vector unsigned int __ATTRS_o_ai vec_vslo(vector unsigned int __a,
-                                                 vector unsigned char __b) {
+static __inline__ vector unsigned int __ATTRS_o_ai
+vec_vslo(vector unsigned int __a, vector unsigned char __b) {
   return (vector unsigned int)__builtin_altivec_vslo((vector int)__a,
                                                      (vector int)__b);
 }
 
-static vector float __ATTRS_o_ai vec_vslo(vector float __a,
-                                          vector signed char __b) {
+static __inline__ vector float __ATTRS_o_ai vec_vslo(vector float __a,
+                                                     vector signed char __b) {
   return (vector float)__builtin_altivec_vslo((vector int)__a, (vector int)__b);
 }
 
-static vector float __ATTRS_o_ai vec_vslo(vector float __a,
-                                          vector unsigned char __b) {
+static __inline__ vector float __ATTRS_o_ai vec_vslo(vector float __a,
+                                                     vector unsigned char __b) {
   return (vector float)__builtin_altivec_vslo((vector int)__a, (vector int)__b);
 }
 
 /* vec_splat */
 
-static vector signed char __ATTRS_o_ai vec_splat(vector signed char __a,
-                                                 unsigned const int __b) {
+static __inline__ vector signed char __ATTRS_o_ai
+vec_splat(vector signed char __a, unsigned const int __b) {
   return vec_perm(__a, __a, (vector unsigned char)(__b & 0x0F));
 }
 
-static vector unsigned char __ATTRS_o_ai vec_splat(vector unsigned char __a,
-                                                   unsigned const int __b) {
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_splat(vector unsigned char __a, unsigned const int __b) {
   return vec_perm(__a, __a, (vector unsigned char)(__b & 0x0F));
 }
 
-static vector bool char __ATTRS_o_ai vec_splat(vector bool char __a,
-                                               unsigned const int __b) {
+static __inline__ vector bool char __ATTRS_o_ai
+vec_splat(vector bool char __a, unsigned const int __b) {
   return vec_perm(__a, __a, (vector unsigned char)(__b & 0x0F));
 }
 
-static vector signed short __ATTRS_o_ai vec_splat(vector signed short __a,
-                                                  unsigned const int __b) {
+static __inline__ vector signed short __ATTRS_o_ai
+vec_splat(vector signed short __a, unsigned const int __b) {
   unsigned char b0 = (__b & 0x07) * 2;
   unsigned char b1 = b0 + 1;
   return vec_perm(__a, __a,
-                  (vector unsigned char)(b0, b1, b0, b1, b0, b1, b0, b1,
-                                         b0, b1, b0, b1, b0, b1, b0, b1));
+                  (vector unsigned char)(b0, b1, b0, b1, b0, b1, b0, b1, b0, b1,
+                                         b0, b1, b0, b1, b0, b1));
 }
 
-static vector unsigned short __ATTRS_o_ai vec_splat(vector unsigned short __a,
-                                                    unsigned const int __b) {
+static __inline__ vector unsigned short __ATTRS_o_ai
+vec_splat(vector unsigned short __a, unsigned const int __b) {
   unsigned char b0 = (__b & 0x07) * 2;
   unsigned char b1 = b0 + 1;
   return vec_perm(__a, __a,
-                  (vector unsigned char)(b0, b1, b0, b1, b0, b1, b0, b1,
-                                         b0, b1, b0, b1, b0, b1, b0, b1));
+                  (vector unsigned char)(b0, b1, b0, b1, b0, b1, b0, b1, b0, b1,
+                                         b0, b1, b0, b1, b0, b1));
 }
 
-static vector bool short __ATTRS_o_ai vec_splat(vector bool short __a,
-                                                unsigned const int __b) {
+static __inline__ vector bool short __ATTRS_o_ai
+vec_splat(vector bool short __a, unsigned const int __b) {
   unsigned char b0 = (__b & 0x07) * 2;
   unsigned char b1 = b0 + 1;
   return vec_perm(__a, __a,
-                  (vector unsigned char)(b0, b1, b0, b1, b0, b1, b0, b1,
-                                         b0, b1, b0, b1, b0, b1, b0, b1));
+                  (vector unsigned char)(b0, b1, b0, b1, b0, b1, b0, b1, b0, b1,
+                                         b0, b1, b0, b1, b0, b1));
 }
 
-static vector pixel __ATTRS_o_ai vec_splat(vector pixel __a,
-                                           unsigned const int __b) {
+static __inline__ vector pixel __ATTRS_o_ai vec_splat(vector pixel __a,
+                                                      unsigned const int __b) {
   unsigned char b0 = (__b & 0x07) * 2;
   unsigned char b1 = b0 + 1;
   return vec_perm(__a, __a,
-                  (vector unsigned char)(b0, b1, b0, b1, b0, b1, b0, b1,
-                                         b0, b1, b0, b1, b0, b1, b0, b1));
+                  (vector unsigned char)(b0, b1, b0, b1, b0, b1, b0, b1, b0, b1,
+                                         b0, b1, b0, b1, b0, b1));
 }
 
-static vector signed int __ATTRS_o_ai vec_splat(vector signed int __a,
-                                                unsigned const int __b) {
+static __inline__ vector signed int __ATTRS_o_ai
+vec_splat(vector signed int __a, unsigned const int __b) {
   unsigned char b0 = (__b & 0x03) * 4;
   unsigned char b1 = b0 + 1, b2 = b0 + 2, b3 = b0 + 3;
   return vec_perm(__a, __a,
-                  (vector unsigned char)(b0, b1, b2, b3, b0, b1, b2, b3, b0,
-                                         b1, b2, b3, b0, b1, b2, b3));
+                  (vector unsigned char)(b0, b1, b2, b3, b0, b1, b2, b3, b0, b1,
+                                         b2, b3, b0, b1, b2, b3));
 }
 
-static vector unsigned int __ATTRS_o_ai vec_splat(vector unsigned int __a,
-                                                  unsigned const int __b) {
+static __inline__ vector unsigned int __ATTRS_o_ai
+vec_splat(vector unsigned int __a, unsigned const int __b) {
   unsigned char b0 = (__b & 0x03) * 4;
   unsigned char b1 = b0 + 1, b2 = b0 + 2, b3 = b0 + 3;
   return vec_perm(__a, __a,
-                  (vector unsigned char)(b0, b1, b2, b3, b0, b1, b2, b3, b0,
-                                         b1, b2, b3, b0, b1, b2, b3));
+                  (vector unsigned char)(b0, b1, b2, b3, b0, b1, b2, b3, b0, b1,
+                                         b2, b3, b0, b1, b2, b3));
 }
 
-static vector bool int __ATTRS_o_ai vec_splat(vector bool int __a,
-                                              unsigned const int __b) {
+static __inline__ vector bool int __ATTRS_o_ai
+vec_splat(vector bool int __a, unsigned const int __b) {
   unsigned char b0 = (__b & 0x03) * 4;
   unsigned char b1 = b0 + 1, b2 = b0 + 2, b3 = b0 + 3;
   return vec_perm(__a, __a,
-                  (vector unsigned char)(b0, b1, b2, b3, b0, b1, b2, b3, b0,
-                                         b1, b2, b3, b0, b1, b2, b3));
+                  (vector unsigned char)(b0, b1, b2, b3, b0, b1, b2, b3, b0, b1,
+                                         b2, b3, b0, b1, b2, b3));
 }
 
-static vector float __ATTRS_o_ai vec_splat(vector float __a,
-                                           unsigned const int __b) {
+static __inline__ vector float __ATTRS_o_ai vec_splat(vector float __a,
+                                                      unsigned const int __b) {
   unsigned char b0 = (__b & 0x03) * 4;
   unsigned char b1 = b0 + 1, b2 = b0 + 2, b3 = b0 + 3;
   return vec_perm(__a, __a,
-                  (vector unsigned char)(b0, b1, b2, b3, b0, b1, b2, b3, b0,
-                                         b1, b2, b3, b0, b1, b2, b3));
+                  (vector unsigned char)(b0, b1, b2, b3, b0, b1, b2, b3, b0, b1,
+                                         b2, b3, b0, b1, b2, b3));
 }
 
 #ifdef __VSX__
-static vector double __ATTRS_o_ai vec_splat(vector double __a,
-                                            unsigned const int __b) {
+static __inline__ vector double __ATTRS_o_ai vec_splat(vector double __a,
+                                                       unsigned const int __b) {
   unsigned char b0 = (__b & 0x01) * 8;
-  unsigned char b1 = b0 + 1, b2 = b0 + 2, b3 = b0 + 3, b4 = b0 + 4,
-                b5 = b0 + 5, b6 = b0 + 6, b7 = b0 + 7;
+  unsigned char b1 = b0 + 1, b2 = b0 + 2, b3 = b0 + 3, b4 = b0 + 4, b5 = b0 + 5,
+                b6 = b0 + 6, b7 = b0 + 7;
   return vec_perm(__a, __a,
-                  (vector unsigned char)(b0, b1, b2, b3, b4, b5, b6, b7,
-                                         b0, b1, b2, b3, b4, b5, b6, b7));
+                  (vector unsigned char)(b0, b1, b2, b3, b4, b5, b6, b7, b0, b1,
+                                         b2, b3, b4, b5, b6, b7));
 }
-static vector bool long long __ATTRS_o_ai vec_splat(vector bool long long __a,
-                                                    unsigned const int __b) {
+static __inline__ vector bool long long __ATTRS_o_ai
+vec_splat(vector bool long long __a, unsigned const int __b) {
   unsigned char b0 = (__b & 0x01) * 8;
-  unsigned char b1 = b0 + 1, b2 = b0 + 2, b3 = b0 + 3, b4 = b0 + 4,
-                b5 = b0 + 5, b6 = b0 + 6, b7 = b0 + 7;
+  unsigned char b1 = b0 + 1, b2 = b0 + 2, b3 = b0 + 3, b4 = b0 + 4, b5 = b0 + 5,
+                b6 = b0 + 6, b7 = b0 + 7;
   return vec_perm(__a, __a,
-                  (vector unsigned char)(b0, b1, b2, b3, b4, b5, b6, b7,
-                                         b0, b1, b2, b3, b4, b5, b6, b7));
+                  (vector unsigned char)(b0, b1, b2, b3, b4, b5, b6, b7, b0, b1,
+                                         b2, b3, b4, b5, b6, b7));
 }
-static vector signed long long __ATTRS_o_ai
+static __inline__ vector signed long long __ATTRS_o_ai
 vec_splat(vector signed long long __a, unsigned const int __b) {
   unsigned char b0 = (__b & 0x01) * 8;
-  unsigned char b1 = b0 + 1, b2 = b0 + 2, b3 = b0 + 3, b4 = b0 + 4,
-                b5 = b0 + 5, b6 = b0 + 6, b7 = b0 + 7;
+  unsigned char b1 = b0 + 1, b2 = b0 + 2, b3 = b0 + 3, b4 = b0 + 4, b5 = b0 + 5,
+                b6 = b0 + 6, b7 = b0 + 7;
   return vec_perm(__a, __a,
-                  (vector unsigned char)(b0, b1, b2, b3, b4, b5, b6, b7,
-                                         b0, b1, b2, b3, b4, b5, b6, b7));
+                  (vector unsigned char)(b0, b1, b2, b3, b4, b5, b6, b7, b0, b1,
+                                         b2, b3, b4, b5, b6, b7));
 }
-static vector unsigned long long __ATTRS_o_ai
+static __inline__ vector unsigned long long __ATTRS_o_ai
 vec_splat(vector unsigned long long __a, unsigned const int __b) {
   unsigned char b0 = (__b & 0x01) * 8;
-  unsigned char b1 = b0 + 1, b2 = b0 + 2, b3 = b0 + 3, b4 = b0 + 4,
-                b5 = b0 + 5, b6 = b0 + 6, b7 = b0 + 7;
+  unsigned char b1 = b0 + 1, b2 = b0 + 2, b3 = b0 + 3, b4 = b0 + 4, b5 = b0 + 5,
+                b6 = b0 + 6, b7 = b0 + 7;
   return vec_perm(__a, __a,
-                  (vector unsigned char)(b0, b1, b2, b3, b4, b5, b6, b7,
-                                         b0, b1, b2, b3, b4, b5, b6, b7));
+                  (vector unsigned char)(b0, b1, b2, b3, b4, b5, b6, b7, b0, b1,
+                                         b2, b3, b4, b5, b6, b7));
 }
 #endif
 
@@ -7720,18 +7792,18 @@ vec_splat(vector unsigned long long __a, unsigned const int __b) {
 
 #define __builtin_altivec_vspltb vec_vspltb
 
-static vector signed char __ATTRS_o_ai vec_vspltb(vector signed char __a,
-                                                  unsigned char __b) {
+static __inline__ vector signed char __ATTRS_o_ai
+vec_vspltb(vector signed char __a, unsigned char __b) {
   return vec_perm(__a, __a, (vector unsigned char)(__b));
 }
 
-static vector unsigned char __ATTRS_o_ai vec_vspltb(vector unsigned char __a,
-                                                    unsigned char __b) {
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_vspltb(vector unsigned char __a, unsigned char __b) {
   return vec_perm(__a, __a, (vector unsigned char)(__b));
 }
 
-static vector bool char __ATTRS_o_ai vec_vspltb(vector bool char __a,
-                                                unsigned char __b) {
+static __inline__ vector bool char __ATTRS_o_ai vec_vspltb(vector bool char __a,
+                                                           unsigned char __b) {
   return vec_perm(__a, __a, (vector unsigned char)(__b));
 }
 
@@ -7739,8 +7811,8 @@ static vector bool char __ATTRS_o_ai vec_vspltb(vector bool char __a,
 
 #define __builtin_altivec_vsplth vec_vsplth
 
-static vector short __ATTRS_o_ai vec_vsplth(vector short __a,
-                                            unsigned char __b) {
+static __inline__ vector short __ATTRS_o_ai vec_vsplth(vector short __a,
+                                                       unsigned char __b) {
   __b *= 2;
   unsigned char b1 = __b + 1;
   return vec_perm(__a, __a,
@@ -7748,8 +7820,8 @@ static vector short __ATTRS_o_ai vec_vsplth(vector short __a,
                                          __b, b1, __b, b1, __b, b1, __b, b1));
 }
 
-static vector unsigned short __ATTRS_o_ai vec_vsplth(vector unsigned short __a,
-                                                     unsigned char __b) {
+static __inline__ vector unsigned short __ATTRS_o_ai
+vec_vsplth(vector unsigned short __a, unsigned char __b) {
   __b *= 2;
   unsigned char b1 = __b + 1;
   return vec_perm(__a, __a,
@@ -7757,8 +7829,8 @@ static vector unsigned short __ATTRS_o_ai vec_vsplth(vector unsigned short __a,
                                          __b, b1, __b, b1, __b, b1, __b, b1));
 }
 
-static vector bool short __ATTRS_o_ai vec_vsplth(vector bool short __a,
-                                                 unsigned char __b) {
+static __inline__ vector bool short __ATTRS_o_ai
+vec_vsplth(vector bool short __a, unsigned char __b) {
   __b *= 2;
   unsigned char b1 = __b + 1;
   return vec_perm(__a, __a,
@@ -7766,8 +7838,8 @@ static vector bool short __ATTRS_o_ai vec_vsplth(vector bool short __a,
                                          __b, b1, __b, b1, __b, b1, __b, b1));
 }
 
-static vector pixel __ATTRS_o_ai vec_vsplth(vector pixel __a,
-                                            unsigned char __b) {
+static __inline__ vector pixel __ATTRS_o_ai vec_vsplth(vector pixel __a,
+                                                       unsigned char __b) {
   __b *= 2;
   unsigned char b1 = __b + 1;
   return vec_perm(__a, __a,
@@ -7779,7 +7851,8 @@ static vector pixel __ATTRS_o_ai vec_vsplth(vector pixel __a,
 
 #define __builtin_altivec_vspltw vec_vspltw
 
-static vector int __ATTRS_o_ai vec_vspltw(vector int __a, unsigned char __b) {
+static __inline__ vector int __ATTRS_o_ai vec_vspltw(vector int __a,
+                                                     unsigned char __b) {
   __b *= 4;
   unsigned char b1 = __b + 1, b2 = __b + 2, b3 = __b + 3;
   return vec_perm(__a, __a,
@@ -7787,8 +7860,8 @@ static vector int __ATTRS_o_ai vec_vspltw(vector int __a, unsigned char __b) {
                                          b1, b2, b3, __b, b1, b2, b3));
 }
 
-static vector unsigned int __ATTRS_o_ai vec_vspltw(vector unsigned int __a,
-                                                   unsigned char __b) {
+static __inline__ vector unsigned int __ATTRS_o_ai
+vec_vspltw(vector unsigned int __a, unsigned char __b) {
   __b *= 4;
   unsigned char b1 = __b + 1, b2 = __b + 2, b3 = __b + 3;
   return vec_perm(__a, __a,
@@ -7796,8 +7869,8 @@ static vector unsigned int __ATTRS_o_ai vec_vspltw(vector unsigned int __a,
                                          b1, b2, b3, __b, b1, b2, b3));
 }
 
-static vector bool int __ATTRS_o_ai vec_vspltw(vector bool int __a,
-                                               unsigned char __b) {
+static __inline__ vector bool int __ATTRS_o_ai vec_vspltw(vector bool int __a,
+                                                          unsigned char __b) {
   __b *= 4;
   unsigned char b1 = __b + 1, b2 = __b + 2, b3 = __b + 3;
   return vec_perm(__a, __a,
@@ -7805,8 +7878,8 @@ static vector bool int __ATTRS_o_ai vec_vspltw(vector bool int __a,
                                          b1, b2, b3, __b, b1, b2, b3));
 }
 
-static vector float __ATTRS_o_ai vec_vspltw(vector float __a,
-                                            unsigned char __b) {
+static __inline__ vector float __ATTRS_o_ai vec_vspltw(vector float __a,
+                                                       unsigned char __b) {
   __b *= 4;
   unsigned char b1 = __b + 1, b2 = __b + 2, b3 = __b + 3;
   return vec_perm(__a, __a,
@@ -7819,14 +7892,16 @@ static vector float __ATTRS_o_ai vec_vspltw(vector float __a,
 #define __builtin_altivec_vspltisb vec_splat_s8
 
 // FIXME: parameter should be treated as 5-bit signed literal
-static vector signed char __ATTRS_o_ai vec_splat_s8(signed char __a) {
+static __inline__ vector signed char __ATTRS_o_ai
+vec_splat_s8(signed char __a) {
   return (vector signed char)(__a);
 }
 
 /* vec_vspltisb */
 
 // FIXME: parameter should be treated as 5-bit signed literal
-static vector signed char __ATTRS_o_ai vec_vspltisb(signed char __a) {
+static __inline__ vector signed char __ATTRS_o_ai
+vec_vspltisb(signed char __a) {
   return (vector signed char)(__a);
 }
 
@@ -7835,14 +7910,14 @@ static vector signed char __ATTRS_o_ai vec_vspltisb(signed char __a) {
 #define __builtin_altivec_vspltish vec_splat_s16
 
 // FIXME: parameter should be treated as 5-bit signed literal
-static vector short __ATTRS_o_ai vec_splat_s16(signed char __a) {
+static __inline__ vector short __ATTRS_o_ai vec_splat_s16(signed char __a) {
   return (vector short)(__a);
 }
 
 /* vec_vspltish */
 
 // FIXME: parameter should be treated as 5-bit signed literal
-static vector short __ATTRS_o_ai vec_vspltish(signed char __a) {
+static __inline__ vector short __ATTRS_o_ai vec_vspltish(signed char __a) {
   return (vector short)(__a);
 }
 
@@ -7851,81 +7926,84 @@ static vector short __ATTRS_o_ai vec_vspltish(signed char __a) {
 #define __builtin_altivec_vspltisw vec_splat_s32
 
 // FIXME: parameter should be treated as 5-bit signed literal
-static vector int __ATTRS_o_ai vec_splat_s32(signed char __a) {
+static __inline__ vector int __ATTRS_o_ai vec_splat_s32(signed char __a) {
   return (vector int)(__a);
 }
 
 /* vec_vspltisw */
 
 // FIXME: parameter should be treated as 5-bit signed literal
-static vector int __ATTRS_o_ai vec_vspltisw(signed char __a) {
+static __inline__ vector int __ATTRS_o_ai vec_vspltisw(signed char __a) {
   return (vector int)(__a);
 }
 
 /* vec_splat_u8 */
 
 // FIXME: parameter should be treated as 5-bit signed literal
-static vector unsigned char __ATTRS_o_ai vec_splat_u8(unsigned char __a) {
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_splat_u8(unsigned char __a) {
   return (vector unsigned char)(__a);
 }
 
 /* vec_splat_u16 */
 
 // FIXME: parameter should be treated as 5-bit signed literal
-static vector unsigned short __ATTRS_o_ai vec_splat_u16(signed char __a) {
+static __inline__ vector unsigned short __ATTRS_o_ai
+vec_splat_u16(signed char __a) {
   return (vector unsigned short)(__a);
 }
 
 /* vec_splat_u32 */
 
 // FIXME: parameter should be treated as 5-bit signed literal
-static vector unsigned int __ATTRS_o_ai vec_splat_u32(signed char __a) {
+static __inline__ vector unsigned int __ATTRS_o_ai
+vec_splat_u32(signed char __a) {
   return (vector unsigned int)(__a);
 }
 
 /* vec_sr */
 
-static vector signed char __ATTRS_o_ai vec_sr(vector signed char __a,
-                                              vector unsigned char __b) {
+static __inline__ vector signed char __ATTRS_o_ai
+vec_sr(vector signed char __a, vector unsigned char __b) {
   vector unsigned char __res = (vector unsigned char)__a >> __b;
   return (vector signed char)__res;
 }
 
-static vector unsigned char __ATTRS_o_ai vec_sr(vector unsigned char __a,
-                                                vector unsigned char __b) {
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_sr(vector unsigned char __a, vector unsigned char __b) {
   return __a >> __b;
 }
 
-static vector signed short __ATTRS_o_ai vec_sr(vector signed short __a,
-                                        vector unsigned short __b) {
+static __inline__ vector signed short __ATTRS_o_ai
+vec_sr(vector signed short __a, vector unsigned short __b) {
   vector unsigned short __res = (vector unsigned short)__a >> __b;
   return (vector signed short)__res;
 }
 
-static vector unsigned short __ATTRS_o_ai vec_sr(vector unsigned short __a,
-                                                 vector unsigned short __b) {
+static __inline__ vector unsigned short __ATTRS_o_ai
+vec_sr(vector unsigned short __a, vector unsigned short __b) {
   return __a >> __b;
 }
 
-static vector signed int __ATTRS_o_ai vec_sr(vector signed int __a,
-                                             vector unsigned int __b) {
+static __inline__ vector signed int __ATTRS_o_ai
+vec_sr(vector signed int __a, vector unsigned int __b) {
   vector unsigned int __res = (vector unsigned int)__a >> __b;
   return (vector signed int)__res;
 }
 
-static vector unsigned int __ATTRS_o_ai vec_sr(vector unsigned int __a,
-                                               vector unsigned int __b) {
+static __inline__ vector unsigned int __ATTRS_o_ai
+vec_sr(vector unsigned int __a, vector unsigned int __b) {
   return __a >> __b;
 }
 
 #ifdef __POWER8_VECTOR__
-static vector signed long long __ATTRS_o_ai
+static __inline__ vector signed long long __ATTRS_o_ai
 vec_sr(vector signed long long __a, vector unsigned long long __b) {
   vector unsigned long long __res = (vector unsigned long long)__a >> __b;
   return (vector signed long long)__res;
 }
 
-static vector unsigned long long __ATTRS_o_ai
+static __inline__ vector unsigned long long __ATTRS_o_ai
 vec_sr(vector unsigned long long __a, vector unsigned long long __b) {
   return __a >> __b;
 }
@@ -7935,13 +8013,13 @@ vec_sr(vector unsigned long long __a, vector unsigned long long __b) {
 
 #define __builtin_altivec_vsrb vec_vsrb
 
-static vector signed char __ATTRS_o_ai vec_vsrb(vector signed char __a,
-                                                vector unsigned char __b) {
+static __inline__ vector signed char __ATTRS_o_ai
+vec_vsrb(vector signed char __a, vector unsigned char __b) {
   return __a >> (vector signed char)__b;
 }
 
-static vector unsigned char __ATTRS_o_ai vec_vsrb(vector unsigned char __a,
-                                                  vector unsigned char __b) {
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_vsrb(vector unsigned char __a, vector unsigned char __b) {
   return __a >> __b;
 }
 
@@ -7949,13 +8027,13 @@ static vector unsigned char __ATTRS_o_ai vec_vsrb(vector unsigned char __a,
 
 #define __builtin_altivec_vsrh vec_vsrh
 
-static vector short __ATTRS_o_ai vec_vsrh(vector short __a,
-                                          vector unsigned short __b) {
+static __inline__ vector short __ATTRS_o_ai
+vec_vsrh(vector short __a, vector unsigned short __b) {
   return __a >> (vector short)__b;
 }
 
-static vector unsigned short __ATTRS_o_ai vec_vsrh(vector unsigned short __a,
-                                                   vector unsigned short __b) {
+static __inline__ vector unsigned short __ATTRS_o_ai
+vec_vsrh(vector unsigned short __a, vector unsigned short __b) {
   return __a >> __b;
 }
 
@@ -7963,55 +8041,55 @@ static vector unsigned short __ATTRS_o_ai vec_vsrh(vector unsigned short __a,
 
 #define __builtin_altivec_vsrw vec_vsrw
 
-static vector int __ATTRS_o_ai vec_vsrw(vector int __a,
-                                        vector unsigned int __b) {
+static __inline__ vector int __ATTRS_o_ai vec_vsrw(vector int __a,
+                                                   vector unsigned int __b) {
   return __a >> (vector int)__b;
 }
 
-static vector unsigned int __ATTRS_o_ai vec_vsrw(vector unsigned int __a,
-                                                 vector unsigned int __b) {
+static __inline__ vector unsigned int __ATTRS_o_ai
+vec_vsrw(vector unsigned int __a, vector unsigned int __b) {
   return __a >> __b;
 }
 
 /* vec_sra */
 
-static vector signed char __ATTRS_o_ai vec_sra(vector signed char __a,
-                                               vector unsigned char __b) {
+static __inline__ vector signed char __ATTRS_o_ai
+vec_sra(vector signed char __a, vector unsigned char __b) {
   return (vector signed char)__builtin_altivec_vsrab((vector char)__a, __b);
 }
 
-static vector unsigned char __ATTRS_o_ai vec_sra(vector unsigned char __a,
-                                                 vector unsigned char __b) {
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_sra(vector unsigned char __a, vector unsigned char __b) {
   return (vector unsigned char)__builtin_altivec_vsrab((vector char)__a, __b);
 }
 
-static vector short __ATTRS_o_ai vec_sra(vector short __a,
-                                         vector unsigned short __b) {
+static __inline__ vector short __ATTRS_o_ai vec_sra(vector short __a,
+                                                    vector unsigned short __b) {
   return __builtin_altivec_vsrah(__a, (vector unsigned short)__b);
 }
 
-static vector unsigned short __ATTRS_o_ai vec_sra(vector unsigned short __a,
-                                                  vector unsigned short __b) {
+static __inline__ vector unsigned short __ATTRS_o_ai
+vec_sra(vector unsigned short __a, vector unsigned short __b) {
   return (vector unsigned short)__builtin_altivec_vsrah((vector short)__a, __b);
 }
 
-static vector int __ATTRS_o_ai vec_sra(vector int __a,
-                                       vector unsigned int __b) {
+static __inline__ vector int __ATTRS_o_ai vec_sra(vector int __a,
+                                                  vector unsigned int __b) {
   return __builtin_altivec_vsraw(__a, __b);
 }
 
-static vector unsigned int __ATTRS_o_ai vec_sra(vector unsigned int __a,
-                                                vector unsigned int __b) {
+static __inline__ vector unsigned int __ATTRS_o_ai
+vec_sra(vector unsigned int __a, vector unsigned int __b) {
   return (vector unsigned int)__builtin_altivec_vsraw((vector int)__a, __b);
 }
 
 #ifdef __POWER8_VECTOR__
-static vector signed long long __ATTRS_o_ai
+static __inline__ vector signed long long __ATTRS_o_ai
 vec_sra(vector signed long long __a, vector unsigned long long __b) {
   return __a >> __b;
 }
 
-static vector unsigned long long __ATTRS_o_ai
+static __inline__ vector unsigned long long __ATTRS_o_ai
 vec_sra(vector unsigned long long __a, vector unsigned long long __b) {
   return (vector unsigned long long)((vector signed long long)__a >> __b);
 }
@@ -8019,1324 +8097,1373 @@ vec_sra(vector unsigned long long __a, vector unsigned long long __b) {
 
 /* vec_vsrab */
 
-static vector signed char __ATTRS_o_ai vec_vsrab(vector signed char __a,
-                                                 vector unsigned char __b) {
+static __inline__ vector signed char __ATTRS_o_ai
+vec_vsrab(vector signed char __a, vector unsigned char __b) {
   return (vector signed char)__builtin_altivec_vsrab((vector char)__a, __b);
 }
 
-static vector unsigned char __ATTRS_o_ai vec_vsrab(vector unsigned char __a,
-                                                   vector unsigned char __b) {
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_vsrab(vector unsigned char __a, vector unsigned char __b) {
   return (vector unsigned char)__builtin_altivec_vsrab((vector char)__a, __b);
 }
 
 /* vec_vsrah */
 
-static vector short __ATTRS_o_ai vec_vsrah(vector short __a,
-                                           vector unsigned short __b) {
+static __inline__ vector short __ATTRS_o_ai
+vec_vsrah(vector short __a, vector unsigned short __b) {
   return __builtin_altivec_vsrah(__a, (vector unsigned short)__b);
 }
 
-static vector unsigned short __ATTRS_o_ai vec_vsrah(vector unsigned short __a,
-                                                    vector unsigned short __b) {
+static __inline__ vector unsigned short __ATTRS_o_ai
+vec_vsrah(vector unsigned short __a, vector unsigned short __b) {
   return (vector unsigned short)__builtin_altivec_vsrah((vector short)__a, __b);
 }
 
 /* vec_vsraw */
 
-static vector int __ATTRS_o_ai vec_vsraw(vector int __a,
-                                         vector unsigned int __b) {
+static __inline__ vector int __ATTRS_o_ai vec_vsraw(vector int __a,
+                                                    vector unsigned int __b) {
   return __builtin_altivec_vsraw(__a, __b);
 }
 
-static vector unsigned int __ATTRS_o_ai vec_vsraw(vector unsigned int __a,
-                                                  vector unsigned int __b) {
+static __inline__ vector unsigned int __ATTRS_o_ai
+vec_vsraw(vector unsigned int __a, vector unsigned int __b) {
   return (vector unsigned int)__builtin_altivec_vsraw((vector int)__a, __b);
 }
 
 /* vec_srl */
 
-static vector signed char __ATTRS_o_ai vec_srl(vector signed char __a,
-                                               vector unsigned char __b) {
+static __inline__ vector signed char __ATTRS_o_ai
+vec_srl(vector signed char __a, vector unsigned char __b) {
   return (vector signed char)__builtin_altivec_vsr((vector int)__a,
                                                    (vector int)__b);
 }
 
-static vector signed char __ATTRS_o_ai vec_srl(vector signed char __a,
-                                               vector unsigned short __b) {
+static __inline__ vector signed char __ATTRS_o_ai
+vec_srl(vector signed char __a, vector unsigned short __b) {
   return (vector signed char)__builtin_altivec_vsr((vector int)__a,
                                                    (vector int)__b);
 }
 
-static vector signed char __ATTRS_o_ai vec_srl(vector signed char __a,
-                                               vector unsigned int __b) {
+static __inline__ vector signed char __ATTRS_o_ai
+vec_srl(vector signed char __a, vector unsigned int __b) {
   return (vector signed char)__builtin_altivec_vsr((vector int)__a,
                                                    (vector int)__b);
 }
 
-static vector unsigned char __ATTRS_o_ai vec_srl(vector unsigned char __a,
-                                                 vector unsigned char __b) {
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_srl(vector unsigned char __a, vector unsigned char __b) {
   return (vector unsigned char)__builtin_altivec_vsr((vector int)__a,
                                                      (vector int)__b);
 }
 
-static vector unsigned char __ATTRS_o_ai vec_srl(vector unsigned char __a,
-                                                 vector unsigned short __b) {
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_srl(vector unsigned char __a, vector unsigned short __b) {
   return (vector unsigned char)__builtin_altivec_vsr((vector int)__a,
                                                      (vector int)__b);
 }
 
-static vector unsigned char __ATTRS_o_ai vec_srl(vector unsigned char __a,
-                                                 vector unsigned int __b) {
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_srl(vector unsigned char __a, vector unsigned int __b) {
   return (vector unsigned char)__builtin_altivec_vsr((vector int)__a,
                                                      (vector int)__b);
 }
 
-static vector bool char __ATTRS_o_ai vec_srl(vector bool char __a,
-                                             vector unsigned char __b) {
+static __inline__ vector bool char __ATTRS_o_ai
+vec_srl(vector bool char __a, vector unsigned char __b) {
   return (vector bool char)__builtin_altivec_vsr((vector int)__a,
                                                  (vector int)__b);
 }
 
-static vector bool char __ATTRS_o_ai vec_srl(vector bool char __a,
-                                             vector unsigned short __b) {
+static __inline__ vector bool char __ATTRS_o_ai
+vec_srl(vector bool char __a, vector unsigned short __b) {
   return (vector bool char)__builtin_altivec_vsr((vector int)__a,
                                                  (vector int)__b);
 }
 
-static vector bool char __ATTRS_o_ai vec_srl(vector bool char __a,
-                                             vector unsigned int __b) {
+static __inline__ vector bool char __ATTRS_o_ai
+vec_srl(vector bool char __a, vector unsigned int __b) {
   return (vector bool char)__builtin_altivec_vsr((vector int)__a,
                                                  (vector int)__b);
 }
 
-static vector short __ATTRS_o_ai vec_srl(vector short __a,
-                                         vector unsigned char __b) {
+static __inline__ vector short __ATTRS_o_ai vec_srl(vector short __a,
+                                                    vector unsigned char __b) {
   return (vector short)__builtin_altivec_vsr((vector int)__a, (vector int)__b);
 }
 
-static vector short __ATTRS_o_ai vec_srl(vector short __a,
-                                         vector unsigned short __b) {
+static __inline__ vector short __ATTRS_o_ai vec_srl(vector short __a,
+                                                    vector unsigned short __b) {
   return (vector short)__builtin_altivec_vsr((vector int)__a, (vector int)__b);
 }
 
-static vector short __ATTRS_o_ai vec_srl(vector short __a,
-                                         vector unsigned int __b) {
+static __inline__ vector short __ATTRS_o_ai vec_srl(vector short __a,
+                                                    vector unsigned int __b) {
   return (vector short)__builtin_altivec_vsr((vector int)__a, (vector int)__b);
 }
 
-static vector unsigned short __ATTRS_o_ai vec_srl(vector unsigned short __a,
-                                                  vector unsigned char __b) {
+static __inline__ vector unsigned short __ATTRS_o_ai
+vec_srl(vector unsigned short __a, vector unsigned char __b) {
   return (vector unsigned short)__builtin_altivec_vsr((vector int)__a,
                                                       (vector int)__b);
 }
 
-static vector unsigned short __ATTRS_o_ai vec_srl(vector unsigned short __a,
-                                                  vector unsigned short __b) {
+static __inline__ vector unsigned short __ATTRS_o_ai
+vec_srl(vector unsigned short __a, vector unsigned short __b) {
   return (vector unsigned short)__builtin_altivec_vsr((vector int)__a,
                                                       (vector int)__b);
 }
 
-static vector unsigned short __ATTRS_o_ai vec_srl(vector unsigned short __a,
-                                                  vector unsigned int __b) {
+static __inline__ vector unsigned short __ATTRS_o_ai
+vec_srl(vector unsigned short __a, vector unsigned int __b) {
   return (vector unsigned short)__builtin_altivec_vsr((vector int)__a,
                                                       (vector int)__b);
 }
 
-static vector bool short __ATTRS_o_ai vec_srl(vector bool short __a,
-                                              vector unsigned char __b) {
+static __inline__ vector bool short __ATTRS_o_ai
+vec_srl(vector bool short __a, vector unsigned char __b) {
   return (vector bool short)__builtin_altivec_vsr((vector int)__a,
                                                   (vector int)__b);
 }
 
-static vector bool short __ATTRS_o_ai vec_srl(vector bool short __a,
-                                              vector unsigned short __b) {
+static __inline__ vector bool short __ATTRS_o_ai
+vec_srl(vector bool short __a, vector unsigned short __b) {
   return (vector bool short)__builtin_altivec_vsr((vector int)__a,
                                                   (vector int)__b);
 }
 
-static vector bool short __ATTRS_o_ai vec_srl(vector bool short __a,
-                                              vector unsigned int __b) {
+static __inline__ vector bool short __ATTRS_o_ai
+vec_srl(vector bool short __a, vector unsigned int __b) {
   return (vector bool short)__builtin_altivec_vsr((vector int)__a,
                                                   (vector int)__b);
 }
 
-static vector pixel __ATTRS_o_ai vec_srl(vector pixel __a,
-                                         vector unsigned char __b) {
+static __inline__ vector pixel __ATTRS_o_ai vec_srl(vector pixel __a,
+                                                    vector unsigned char __b) {
   return (vector pixel)__builtin_altivec_vsr((vector int)__a, (vector int)__b);
 }
 
-static vector pixel __ATTRS_o_ai vec_srl(vector pixel __a,
-                                         vector unsigned short __b) {
+static __inline__ vector pixel __ATTRS_o_ai vec_srl(vector pixel __a,
+                                                    vector unsigned short __b) {
   return (vector pixel)__builtin_altivec_vsr((vector int)__a, (vector int)__b);
 }
 
-static vector pixel __ATTRS_o_ai vec_srl(vector pixel __a,
-                                         vector unsigned int __b) {
+static __inline__ vector pixel __ATTRS_o_ai vec_srl(vector pixel __a,
+                                                    vector unsigned int __b) {
   return (vector pixel)__builtin_altivec_vsr((vector int)__a, (vector int)__b);
 }
 
-static vector int __ATTRS_o_ai vec_srl(vector int __a,
-                                       vector unsigned char __b) {
+static __inline__ vector int __ATTRS_o_ai vec_srl(vector int __a,
+                                                  vector unsigned char __b) {
   return (vector int)__builtin_altivec_vsr(__a, (vector int)__b);
 }
 
-static vector int __ATTRS_o_ai vec_srl(vector int __a,
-                                       vector unsigned short __b) {
+static __inline__ vector int __ATTRS_o_ai vec_srl(vector int __a,
+                                                  vector unsigned short __b) {
   return (vector int)__builtin_altivec_vsr(__a, (vector int)__b);
 }
 
-static vector int __ATTRS_o_ai vec_srl(vector int __a,
-                                       vector unsigned int __b) {
+static __inline__ vector int __ATTRS_o_ai vec_srl(vector int __a,
+                                                  vector unsigned int __b) {
   return (vector int)__builtin_altivec_vsr(__a, (vector int)__b);
 }
 
-static vector unsigned int __ATTRS_o_ai vec_srl(vector unsigned int __a,
-                                                vector unsigned char __b) {
+static __inline__ vector unsigned int __ATTRS_o_ai
+vec_srl(vector unsigned int __a, vector unsigned char __b) {
   return (vector unsigned int)__builtin_altivec_vsr((vector int)__a,
                                                     (vector int)__b);
 }
 
-static vector unsigned int __ATTRS_o_ai vec_srl(vector unsigned int __a,
-                                                vector unsigned short __b) {
+static __inline__ vector unsigned int __ATTRS_o_ai
+vec_srl(vector unsigned int __a, vector unsigned short __b) {
   return (vector unsigned int)__builtin_altivec_vsr((vector int)__a,
                                                     (vector int)__b);
 }
 
-static vector unsigned int __ATTRS_o_ai vec_srl(vector unsigned int __a,
-                                                vector unsigned int __b) {
+static __inline__ vector unsigned int __ATTRS_o_ai
+vec_srl(vector unsigned int __a, vector unsigned int __b) {
   return (vector unsigned int)__builtin_altivec_vsr((vector int)__a,
                                                     (vector int)__b);
 }
 
-static vector bool int __ATTRS_o_ai vec_srl(vector bool int __a,
-                                            vector unsigned char __b) {
+static __inline__ vector bool int __ATTRS_o_ai
+vec_srl(vector bool int __a, vector unsigned char __b) {
   return (vector bool int)__builtin_altivec_vsr((vector int)__a,
                                                 (vector int)__b);
 }
 
-static vector bool int __ATTRS_o_ai vec_srl(vector bool int __a,
-                                            vector unsigned short __b) {
+static __inline__ vector bool int __ATTRS_o_ai
+vec_srl(vector bool int __a, vector unsigned short __b) {
   return (vector bool int)__builtin_altivec_vsr((vector int)__a,
                                                 (vector int)__b);
 }
 
-static vector bool int __ATTRS_o_ai vec_srl(vector bool int __a,
-                                            vector unsigned int __b) {
+static __inline__ vector bool int __ATTRS_o_ai
+vec_srl(vector bool int __a, vector unsigned int __b) {
   return (vector bool int)__builtin_altivec_vsr((vector int)__a,
                                                 (vector int)__b);
 }
 
 /* vec_vsr */
 
-static vector signed char __ATTRS_o_ai vec_vsr(vector signed char __a,
-                                               vector unsigned char __b) {
+static __inline__ vector signed char __ATTRS_o_ai
+vec_vsr(vector signed char __a, vector unsigned char __b) {
   return (vector signed char)__builtin_altivec_vsr((vector int)__a,
                                                    (vector int)__b);
 }
 
-static vector signed char __ATTRS_o_ai vec_vsr(vector signed char __a,
-                                               vector unsigned short __b) {
+static __inline__ vector signed char __ATTRS_o_ai
+vec_vsr(vector signed char __a, vector unsigned short __b) {
   return (vector signed char)__builtin_altivec_vsr((vector int)__a,
                                                    (vector int)__b);
 }
 
-static vector signed char __ATTRS_o_ai vec_vsr(vector signed char __a,
-                                               vector unsigned int __b) {
+static __inline__ vector signed char __ATTRS_o_ai
+vec_vsr(vector signed char __a, vector unsigned int __b) {
   return (vector signed char)__builtin_altivec_vsr((vector int)__a,
                                                    (vector int)__b);
 }
 
-static vector unsigned char __ATTRS_o_ai vec_vsr(vector unsigned char __a,
-                                                 vector unsigned char __b) {
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_vsr(vector unsigned char __a, vector unsigned char __b) {
   return (vector unsigned char)__builtin_altivec_vsr((vector int)__a,
                                                      (vector int)__b);
 }
 
-static vector unsigned char __ATTRS_o_ai vec_vsr(vector unsigned char __a,
-                                                 vector unsigned short __b) {
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_vsr(vector unsigned char __a, vector unsigned short __b) {
   return (vector unsigned char)__builtin_altivec_vsr((vector int)__a,
                                                      (vector int)__b);
 }
 
-static vector unsigned char __ATTRS_o_ai vec_vsr(vector unsigned char __a,
-                                                 vector unsigned int __b) {
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_vsr(vector unsigned char __a, vector unsigned int __b) {
   return (vector unsigned char)__builtin_altivec_vsr((vector int)__a,
                                                      (vector int)__b);
 }
 
-static vector bool char __ATTRS_o_ai vec_vsr(vector bool char __a,
-                                             vector unsigned char __b) {
+static __inline__ vector bool char __ATTRS_o_ai
+vec_vsr(vector bool char __a, vector unsigned char __b) {
   return (vector bool char)__builtin_altivec_vsr((vector int)__a,
                                                  (vector int)__b);
 }
 
-static vector bool char __ATTRS_o_ai vec_vsr(vector bool char __a,
-                                             vector unsigned short __b) {
+static __inline__ vector bool char __ATTRS_o_ai
+vec_vsr(vector bool char __a, vector unsigned short __b) {
   return (vector bool char)__builtin_altivec_vsr((vector int)__a,
                                                  (vector int)__b);
 }
 
-static vector bool char __ATTRS_o_ai vec_vsr(vector bool char __a,
-                                             vector unsigned int __b) {
+static __inline__ vector bool char __ATTRS_o_ai
+vec_vsr(vector bool char __a, vector unsigned int __b) {
   return (vector bool char)__builtin_altivec_vsr((vector int)__a,
                                                  (vector int)__b);
 }
 
-static vector short __ATTRS_o_ai vec_vsr(vector short __a,
-                                         vector unsigned char __b) {
+static __inline__ vector short __ATTRS_o_ai vec_vsr(vector short __a,
+                                                    vector unsigned char __b) {
   return (vector short)__builtin_altivec_vsr((vector int)__a, (vector int)__b);
 }
 
-static vector short __ATTRS_o_ai vec_vsr(vector short __a,
-                                         vector unsigned short __b) {
+static __inline__ vector short __ATTRS_o_ai vec_vsr(vector short __a,
+                                                    vector unsigned short __b) {
   return (vector short)__builtin_altivec_vsr((vector int)__a, (vector int)__b);
 }
 
-static vector short __ATTRS_o_ai vec_vsr(vector short __a,
-                                         vector unsigned int __b) {
+static __inline__ vector short __ATTRS_o_ai vec_vsr(vector short __a,
+                                                    vector unsigned int __b) {
   return (vector short)__builtin_altivec_vsr((vector int)__a, (vector int)__b);
 }
 
-static vector unsigned short __ATTRS_o_ai vec_vsr(vector unsigned short __a,
-                                                  vector unsigned char __b) {
+static __inline__ vector unsigned short __ATTRS_o_ai
+vec_vsr(vector unsigned short __a, vector unsigned char __b) {
   return (vector unsigned short)__builtin_altivec_vsr((vector int)__a,
                                                       (vector int)__b);
 }
 
-static vector unsigned short __ATTRS_o_ai vec_vsr(vector unsigned short __a,
-                                                  vector unsigned short __b) {
+static __inline__ vector unsigned short __ATTRS_o_ai
+vec_vsr(vector unsigned short __a, vector unsigned short __b) {
   return (vector unsigned short)__builtin_altivec_vsr((vector int)__a,
                                                       (vector int)__b);
 }
 
-static vector unsigned short __ATTRS_o_ai vec_vsr(vector unsigned short __a,
-                                                  vector unsigned int __b) {
+static __inline__ vector unsigned short __ATTRS_o_ai
+vec_vsr(vector unsigned short __a, vector unsigned int __b) {
   return (vector unsigned short)__builtin_altivec_vsr((vector int)__a,
                                                       (vector int)__b);
 }
 
-static vector bool short __ATTRS_o_ai vec_vsr(vector bool short __a,
-                                              vector unsigned char __b) {
+static __inline__ vector bool short __ATTRS_o_ai
+vec_vsr(vector bool short __a, vector unsigned char __b) {
   return (vector bool short)__builtin_altivec_vsr((vector int)__a,
                                                   (vector int)__b);
 }
 
-static vector bool short __ATTRS_o_ai vec_vsr(vector bool short __a,
-                                              vector unsigned short __b) {
+static __inline__ vector bool short __ATTRS_o_ai
+vec_vsr(vector bool short __a, vector unsigned short __b) {
   return (vector bool short)__builtin_altivec_vsr((vector int)__a,
                                                   (vector int)__b);
 }
 
-static vector bool short __ATTRS_o_ai vec_vsr(vector bool short __a,
-                                              vector unsigned int __b) {
+static __inline__ vector bool short __ATTRS_o_ai
+vec_vsr(vector bool short __a, vector unsigned int __b) {
   return (vector bool short)__builtin_altivec_vsr((vector int)__a,
                                                   (vector int)__b);
 }
 
-static vector pixel __ATTRS_o_ai vec_vsr(vector pixel __a,
-                                         vector unsigned char __b) {
+static __inline__ vector pixel __ATTRS_o_ai vec_vsr(vector pixel __a,
+                                                    vector unsigned char __b) {
   return (vector pixel)__builtin_altivec_vsr((vector int)__a, (vector int)__b);
 }
 
-static vector pixel __ATTRS_o_ai vec_vsr(vector pixel __a,
-                                         vector unsigned short __b) {
+static __inline__ vector pixel __ATTRS_o_ai vec_vsr(vector pixel __a,
+                                                    vector unsigned short __b) {
   return (vector pixel)__builtin_altivec_vsr((vector int)__a, (vector int)__b);
 }
 
-static vector pixel __ATTRS_o_ai vec_vsr(vector pixel __a,
-                                         vector unsigned int __b) {
+static __inline__ vector pixel __ATTRS_o_ai vec_vsr(vector pixel __a,
+                                                    vector unsigned int __b) {
   return (vector pixel)__builtin_altivec_vsr((vector int)__a, (vector int)__b);
 }
 
-static vector int __ATTRS_o_ai vec_vsr(vector int __a,
-                                       vector unsigned char __b) {
+static __inline__ vector int __ATTRS_o_ai vec_vsr(vector int __a,
+                                                  vector unsigned char __b) {
   return (vector int)__builtin_altivec_vsr(__a, (vector int)__b);
 }
 
-static vector int __ATTRS_o_ai vec_vsr(vector int __a,
-                                       vector unsigned short __b) {
+static __inline__ vector int __ATTRS_o_ai vec_vsr(vector int __a,
+                                                  vector unsigned short __b) {
   return (vector int)__builtin_altivec_vsr(__a, (vector int)__b);
 }
 
-static vector int __ATTRS_o_ai vec_vsr(vector int __a,
-                                       vector unsigned int __b) {
+static __inline__ vector int __ATTRS_o_ai vec_vsr(vector int __a,
+                                                  vector unsigned int __b) {
   return (vector int)__builtin_altivec_vsr(__a, (vector int)__b);
 }
 
-static vector unsigned int __ATTRS_o_ai vec_vsr(vector unsigned int __a,
-                                                vector unsigned char __b) {
+static __inline__ vector unsigned int __ATTRS_o_ai
+vec_vsr(vector unsigned int __a, vector unsigned char __b) {
   return (vector unsigned int)__builtin_altivec_vsr((vector int)__a,
                                                     (vector int)__b);
 }
 
-static vector unsigned int __ATTRS_o_ai vec_vsr(vector unsigned int __a,
-                                                vector unsigned short __b) {
+static __inline__ vector unsigned int __ATTRS_o_ai
+vec_vsr(vector unsigned int __a, vector unsigned short __b) {
   return (vector unsigned int)__builtin_altivec_vsr((vector int)__a,
                                                     (vector int)__b);
 }
 
-static vector unsigned int __ATTRS_o_ai vec_vsr(vector unsigned int __a,
-                                                vector unsigned int __b) {
+static __inline__ vector unsigned int __ATTRS_o_ai
+vec_vsr(vector unsigned int __a, vector unsigned int __b) {
   return (vector unsigned int)__builtin_altivec_vsr((vector int)__a,
                                                     (vector int)__b);
 }
 
-static vector bool int __ATTRS_o_ai vec_vsr(vector bool int __a,
-                                            vector unsigned char __b) {
+static __inline__ vector bool int __ATTRS_o_ai
+vec_vsr(vector bool int __a, vector unsigned char __b) {
   return (vector bool int)__builtin_altivec_vsr((vector int)__a,
                                                 (vector int)__b);
 }
 
-static vector bool int __ATTRS_o_ai vec_vsr(vector bool int __a,
-                                            vector unsigned short __b) {
+static __inline__ vector bool int __ATTRS_o_ai
+vec_vsr(vector bool int __a, vector unsigned short __b) {
   return (vector bool int)__builtin_altivec_vsr((vector int)__a,
                                                 (vector int)__b);
 }
 
-static vector bool int __ATTRS_o_ai vec_vsr(vector bool int __a,
-                                            vector unsigned int __b) {
+static __inline__ vector bool int __ATTRS_o_ai
+vec_vsr(vector bool int __a, vector unsigned int __b) {
   return (vector bool int)__builtin_altivec_vsr((vector int)__a,
                                                 (vector int)__b);
 }
 
 /* vec_sro */
 
-static vector signed char __ATTRS_o_ai vec_sro(vector signed char __a,
-                                               vector signed char __b) {
+static __inline__ vector signed char __ATTRS_o_ai
+vec_sro(vector signed char __a, vector signed char __b) {
   return (vector signed char)__builtin_altivec_vsro((vector int)__a,
                                                     (vector int)__b);
 }
 
-static vector signed char __ATTRS_o_ai vec_sro(vector signed char __a,
-                                               vector unsigned char __b) {
+static __inline__ vector signed char __ATTRS_o_ai
+vec_sro(vector signed char __a, vector unsigned char __b) {
   return (vector signed char)__builtin_altivec_vsro((vector int)__a,
                                                     (vector int)__b);
 }
 
-static vector unsigned char __ATTRS_o_ai vec_sro(vector unsigned char __a,
-                                                 vector signed char __b) {
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_sro(vector unsigned char __a, vector signed char __b) {
   return (vector unsigned char)__builtin_altivec_vsro((vector int)__a,
                                                       (vector int)__b);
 }
 
-static vector unsigned char __ATTRS_o_ai vec_sro(vector unsigned char __a,
-                                                 vector unsigned char __b) {
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_sro(vector unsigned char __a, vector unsigned char __b) {
   return (vector unsigned char)__builtin_altivec_vsro((vector int)__a,
                                                       (vector int)__b);
 }
 
-static vector short __ATTRS_o_ai vec_sro(vector short __a,
-                                         vector signed char __b) {
+static __inline__ vector short __ATTRS_o_ai vec_sro(vector short __a,
+                                                    vector signed char __b) {
   return (vector short)__builtin_altivec_vsro((vector int)__a, (vector int)__b);
 }
 
-static vector short __ATTRS_o_ai vec_sro(vector short __a,
-                                         vector unsigned char __b) {
+static __inline__ vector short __ATTRS_o_ai vec_sro(vector short __a,
+                                                    vector unsigned char __b) {
   return (vector short)__builtin_altivec_vsro((vector int)__a, (vector int)__b);
 }
 
-static vector unsigned short __ATTRS_o_ai vec_sro(vector unsigned short __a,
-                                                  vector signed char __b) {
+static __inline__ vector unsigned short __ATTRS_o_ai
+vec_sro(vector unsigned short __a, vector signed char __b) {
   return (vector unsigned short)__builtin_altivec_vsro((vector int)__a,
                                                        (vector int)__b);
 }
 
-static vector unsigned short __ATTRS_o_ai vec_sro(vector unsigned short __a,
-                                                  vector unsigned char __b) {
+static __inline__ vector unsigned short __ATTRS_o_ai
+vec_sro(vector unsigned short __a, vector unsigned char __b) {
   return (vector unsigned short)__builtin_altivec_vsro((vector int)__a,
                                                        (vector int)__b);
 }
 
-static vector pixel __ATTRS_o_ai vec_sro(vector pixel __a,
-                                         vector signed char __b) {
+static __inline__ vector pixel __ATTRS_o_ai vec_sro(vector pixel __a,
+                                                    vector signed char __b) {
   return (vector pixel)__builtin_altivec_vsro((vector int)__a, (vector int)__b);
 }
 
-static vector pixel __ATTRS_o_ai vec_sro(vector pixel __a,
-                                         vector unsigned char __b) {
+static __inline__ vector pixel __ATTRS_o_ai vec_sro(vector pixel __a,
+                                                    vector unsigned char __b) {
   return (vector pixel)__builtin_altivec_vsro((vector int)__a, (vector int)__b);
 }
 
-static vector int __ATTRS_o_ai vec_sro(vector int __a, vector signed char __b) {
+static __inline__ vector int __ATTRS_o_ai vec_sro(vector int __a,
+                                                  vector signed char __b) {
   return (vector int)__builtin_altivec_vsro(__a, (vector int)__b);
 }
 
-static vector int __ATTRS_o_ai vec_sro(vector int __a,
-                                       vector unsigned char __b) {
+static __inline__ vector int __ATTRS_o_ai vec_sro(vector int __a,
+                                                  vector unsigned char __b) {
   return (vector int)__builtin_altivec_vsro(__a, (vector int)__b);
 }
 
-static vector unsigned int __ATTRS_o_ai vec_sro(vector unsigned int __a,
-                                                vector signed char __b) {
+static __inline__ vector unsigned int __ATTRS_o_ai
+vec_sro(vector unsigned int __a, vector signed char __b) {
   return (vector unsigned int)__builtin_altivec_vsro((vector int)__a,
                                                      (vector int)__b);
 }
 
-static vector unsigned int __ATTRS_o_ai vec_sro(vector unsigned int __a,
-                                                vector unsigned char __b) {
+static __inline__ vector unsigned int __ATTRS_o_ai
+vec_sro(vector unsigned int __a, vector unsigned char __b) {
   return (vector unsigned int)__builtin_altivec_vsro((vector int)__a,
                                                      (vector int)__b);
 }
 
-static vector float __ATTRS_o_ai vec_sro(vector float __a,
-                                         vector signed char __b) {
+static __inline__ vector float __ATTRS_o_ai vec_sro(vector float __a,
+                                                    vector signed char __b) {
   return (vector float)__builtin_altivec_vsro((vector int)__a, (vector int)__b);
 }
 
-static vector float __ATTRS_o_ai vec_sro(vector float __a,
-                                         vector unsigned char __b) {
+static __inline__ vector float __ATTRS_o_ai vec_sro(vector float __a,
+                                                    vector unsigned char __b) {
   return (vector float)__builtin_altivec_vsro((vector int)__a, (vector int)__b);
 }
 
 /* vec_vsro */
 
-static vector signed char __ATTRS_o_ai vec_vsro(vector signed char __a,
-                                                vector signed char __b) {
+static __inline__ vector signed char __ATTRS_o_ai
+vec_vsro(vector signed char __a, vector signed char __b) {
   return (vector signed char)__builtin_altivec_vsro((vector int)__a,
                                                     (vector int)__b);
 }
 
-static vector signed char __ATTRS_o_ai vec_vsro(vector signed char __a,
-                                                vector unsigned char __b) {
+static __inline__ vector signed char __ATTRS_o_ai
+vec_vsro(vector signed char __a, vector unsigned char __b) {
   return (vector signed char)__builtin_altivec_vsro((vector int)__a,
                                                     (vector int)__b);
 }
 
-static vector unsigned char __ATTRS_o_ai vec_vsro(vector unsigned char __a,
-                                                  vector signed char __b) {
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_vsro(vector unsigned char __a, vector signed char __b) {
   return (vector unsigned char)__builtin_altivec_vsro((vector int)__a,
                                                       (vector int)__b);
 }
 
-static vector unsigned char __ATTRS_o_ai vec_vsro(vector unsigned char __a,
-                                                  vector unsigned char __b) {
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_vsro(vector unsigned char __a, vector unsigned char __b) {
   return (vector unsigned char)__builtin_altivec_vsro((vector int)__a,
                                                       (vector int)__b);
 }
 
-static vector short __ATTRS_o_ai vec_vsro(vector short __a,
-                                          vector signed char __b) {
+static __inline__ vector short __ATTRS_o_ai vec_vsro(vector short __a,
+                                                     vector signed char __b) {
   return (vector short)__builtin_altivec_vsro((vector int)__a, (vector int)__b);
 }
 
-static vector short __ATTRS_o_ai vec_vsro(vector short __a,
-                                          vector unsigned char __b) {
+static __inline__ vector short __ATTRS_o_ai vec_vsro(vector short __a,
+                                                     vector unsigned char __b) {
   return (vector short)__builtin_altivec_vsro((vector int)__a, (vector int)__b);
 }
 
-static vector unsigned short __ATTRS_o_ai vec_vsro(vector unsigned short __a,
-                                                   vector signed char __b) {
+static __inline__ vector unsigned short __ATTRS_o_ai
+vec_vsro(vector unsigned short __a, vector signed char __b) {
   return (vector unsigned short)__builtin_altivec_vsro((vector int)__a,
                                                        (vector int)__b);
 }
 
-static vector unsigned short __ATTRS_o_ai vec_vsro(vector unsigned short __a,
-                                                   vector unsigned char __b) {
+static __inline__ vector unsigned short __ATTRS_o_ai
+vec_vsro(vector unsigned short __a, vector unsigned char __b) {
   return (vector unsigned short)__builtin_altivec_vsro((vector int)__a,
                                                        (vector int)__b);
 }
 
-static vector pixel __ATTRS_o_ai vec_vsro(vector pixel __a,
-                                          vector signed char __b) {
+static __inline__ vector pixel __ATTRS_o_ai vec_vsro(vector pixel __a,
+                                                     vector signed char __b) {
   return (vector pixel)__builtin_altivec_vsro((vector int)__a, (vector int)__b);
 }
 
-static vector pixel __ATTRS_o_ai vec_vsro(vector pixel __a,
-                                          vector unsigned char __b) {
+static __inline__ vector pixel __ATTRS_o_ai vec_vsro(vector pixel __a,
+                                                     vector unsigned char __b) {
   return (vector pixel)__builtin_altivec_vsro((vector int)__a, (vector int)__b);
 }
 
-static vector int __ATTRS_o_ai vec_vsro(vector int __a,
-                                        vector signed char __b) {
+static __inline__ vector int __ATTRS_o_ai vec_vsro(vector int __a,
+                                                   vector signed char __b) {
   return (vector int)__builtin_altivec_vsro(__a, (vector int)__b);
 }
 
-static vector int __ATTRS_o_ai vec_vsro(vector int __a,
-                                        vector unsigned char __b) {
+static __inline__ vector int __ATTRS_o_ai vec_vsro(vector int __a,
+                                                   vector unsigned char __b) {
   return (vector int)__builtin_altivec_vsro(__a, (vector int)__b);
 }
 
-static vector unsigned int __ATTRS_o_ai vec_vsro(vector unsigned int __a,
-                                                 vector signed char __b) {
+static __inline__ vector unsigned int __ATTRS_o_ai
+vec_vsro(vector unsigned int __a, vector signed char __b) {
   return (vector unsigned int)__builtin_altivec_vsro((vector int)__a,
                                                      (vector int)__b);
 }
 
-static vector unsigned int __ATTRS_o_ai vec_vsro(vector unsigned int __a,
-                                                 vector unsigned char __b) {
+static __inline__ vector unsigned int __ATTRS_o_ai
+vec_vsro(vector unsigned int __a, vector unsigned char __b) {
   return (vector unsigned int)__builtin_altivec_vsro((vector int)__a,
                                                      (vector int)__b);
 }
 
-static vector float __ATTRS_o_ai vec_vsro(vector float __a,
-                                          vector signed char __b) {
+static __inline__ vector float __ATTRS_o_ai vec_vsro(vector float __a,
+                                                     vector signed char __b) {
   return (vector float)__builtin_altivec_vsro((vector int)__a, (vector int)__b);
 }
 
-static vector float __ATTRS_o_ai vec_vsro(vector float __a,
-                                          vector unsigned char __b) {
+static __inline__ vector float __ATTRS_o_ai vec_vsro(vector float __a,
+                                                     vector unsigned char __b) {
   return (vector float)__builtin_altivec_vsro((vector int)__a, (vector int)__b);
 }
 
 /* vec_st */
 
-static void __ATTRS_o_ai vec_st(vector signed char __a, int __b,
-                                vector signed char *__c) {
+static __inline__ void __ATTRS_o_ai vec_st(vector signed char __a, int __b,
+                                           vector signed char *__c) {
   __builtin_altivec_stvx((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai vec_st(vector signed char __a, int __b,
-                                signed char *__c) {
+static __inline__ void __ATTRS_o_ai vec_st(vector signed char __a, int __b,
+                                           signed char *__c) {
   __builtin_altivec_stvx((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai vec_st(vector unsigned char __a, int __b,
-                                vector unsigned char *__c) {
+static __inline__ void __ATTRS_o_ai vec_st(vector unsigned char __a, int __b,
+                                           vector unsigned char *__c) {
   __builtin_altivec_stvx((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai vec_st(vector unsigned char __a, int __b,
-                                unsigned char *__c) {
+static __inline__ void __ATTRS_o_ai vec_st(vector unsigned char __a, int __b,
+                                           unsigned char *__c) {
   __builtin_altivec_stvx((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai vec_st(vector bool char __a, int __b,
-                                signed char *__c) {
+static __inline__ void __ATTRS_o_ai vec_st(vector bool char __a, int __b,
+                                           signed char *__c) {
   __builtin_altivec_stvx((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai vec_st(vector bool char __a, int __b,
-                                unsigned char *__c) {
+static __inline__ void __ATTRS_o_ai vec_st(vector bool char __a, int __b,
+                                           unsigned char *__c) {
   __builtin_altivec_stvx((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai vec_st(vector bool char __a, int __b,
-                                vector bool char *__c) {
+static __inline__ void __ATTRS_o_ai vec_st(vector bool char __a, int __b,
+                                           vector bool char *__c) {
   __builtin_altivec_stvx((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai vec_st(vector short __a, int __b, vector short *__c) {
+static __inline__ void __ATTRS_o_ai vec_st(vector short __a, int __b,
+                                           vector short *__c) {
   __builtin_altivec_stvx((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai vec_st(vector short __a, int __b, short *__c) {
+static __inline__ void __ATTRS_o_ai vec_st(vector short __a, int __b,
+                                           short *__c) {
   __builtin_altivec_stvx((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai vec_st(vector unsigned short __a, int __b,
-                                vector unsigned short *__c) {
+static __inline__ void __ATTRS_o_ai vec_st(vector unsigned short __a, int __b,
+                                           vector unsigned short *__c) {
   __builtin_altivec_stvx((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai vec_st(vector unsigned short __a, int __b,
-                                unsigned short *__c) {
+static __inline__ void __ATTRS_o_ai vec_st(vector unsigned short __a, int __b,
+                                           unsigned short *__c) {
   __builtin_altivec_stvx((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai vec_st(vector bool short __a, int __b, short *__c) {
+static __inline__ void __ATTRS_o_ai vec_st(vector bool short __a, int __b,
+                                           short *__c) {
   __builtin_altivec_stvx((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai vec_st(vector bool short __a, int __b,
-                                unsigned short *__c) {
+static __inline__ void __ATTRS_o_ai vec_st(vector bool short __a, int __b,
+                                           unsigned short *__c) {
   __builtin_altivec_stvx((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai vec_st(vector bool short __a, int __b,
-                                vector bool short *__c) {
+static __inline__ void __ATTRS_o_ai vec_st(vector bool short __a, int __b,
+                                           vector bool short *__c) {
   __builtin_altivec_stvx((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai vec_st(vector pixel __a, int __b, short *__c) {
+static __inline__ void __ATTRS_o_ai vec_st(vector pixel __a, int __b,
+                                           short *__c) {
   __builtin_altivec_stvx((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai vec_st(vector pixel __a, int __b,
-                                unsigned short *__c) {
+static __inline__ void __ATTRS_o_ai vec_st(vector pixel __a, int __b,
+                                           unsigned short *__c) {
   __builtin_altivec_stvx((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai vec_st(vector pixel __a, int __b, vector pixel *__c) {
+static __inline__ void __ATTRS_o_ai vec_st(vector pixel __a, int __b,
+                                           vector pixel *__c) {
   __builtin_altivec_stvx((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai vec_st(vector int __a, int __b, vector int *__c) {
+static __inline__ void __ATTRS_o_ai vec_st(vector int __a, int __b,
+                                           vector int *__c) {
   __builtin_altivec_stvx(__a, __b, __c);
 }
 
-static void __ATTRS_o_ai vec_st(vector int __a, int __b, int *__c) {
+static __inline__ void __ATTRS_o_ai vec_st(vector int __a, int __b, int *__c) {
   __builtin_altivec_stvx(__a, __b, __c);
 }
 
-static void __ATTRS_o_ai vec_st(vector unsigned int __a, int __b,
-                                vector unsigned int *__c) {
+static __inline__ void __ATTRS_o_ai vec_st(vector unsigned int __a, int __b,
+                                           vector unsigned int *__c) {
   __builtin_altivec_stvx((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai vec_st(vector unsigned int __a, int __b,
-                                unsigned int *__c) {
+static __inline__ void __ATTRS_o_ai vec_st(vector unsigned int __a, int __b,
+                                           unsigned int *__c) {
   __builtin_altivec_stvx((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai vec_st(vector bool int __a, int __b, int *__c) {
+static __inline__ void __ATTRS_o_ai vec_st(vector bool int __a, int __b,
+                                           int *__c) {
   __builtin_altivec_stvx((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai vec_st(vector bool int __a, int __b,
-                                unsigned int *__c) {
+static __inline__ void __ATTRS_o_ai vec_st(vector bool int __a, int __b,
+                                           unsigned int *__c) {
   __builtin_altivec_stvx((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai vec_st(vector bool int __a, int __b,
-                                vector bool int *__c) {
+static __inline__ void __ATTRS_o_ai vec_st(vector bool int __a, int __b,
+                                           vector bool int *__c) {
   __builtin_altivec_stvx((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai vec_st(vector float __a, int __b, vector float *__c) {
+static __inline__ void __ATTRS_o_ai vec_st(vector float __a, int __b,
+                                           vector float *__c) {
   __builtin_altivec_stvx((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai vec_st(vector float __a, int __b, float *__c) {
+static __inline__ void __ATTRS_o_ai vec_st(vector float __a, int __b,
+                                           float *__c) {
   __builtin_altivec_stvx((vector int)__a, __b, __c);
 }
 
 /* vec_stvx */
 
-static void __ATTRS_o_ai vec_stvx(vector signed char __a, int __b,
-                                  vector signed char *__c) {
+static __inline__ void __ATTRS_o_ai vec_stvx(vector signed char __a, int __b,
+                                             vector signed char *__c) {
   __builtin_altivec_stvx((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai vec_stvx(vector signed char __a, int __b,
-                                  signed char *__c) {
+static __inline__ void __ATTRS_o_ai vec_stvx(vector signed char __a, int __b,
+                                             signed char *__c) {
   __builtin_altivec_stvx((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai vec_stvx(vector unsigned char __a, int __b,
-                                  vector unsigned char *__c) {
+static __inline__ void __ATTRS_o_ai vec_stvx(vector unsigned char __a, int __b,
+                                             vector unsigned char *__c) {
   __builtin_altivec_stvx((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai vec_stvx(vector unsigned char __a, int __b,
-                                  unsigned char *__c) {
+static __inline__ void __ATTRS_o_ai vec_stvx(vector unsigned char __a, int __b,
+                                             unsigned char *__c) {
   __builtin_altivec_stvx((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai vec_stvx(vector bool char __a, int __b,
-                                  signed char *__c) {
+static __inline__ void __ATTRS_o_ai vec_stvx(vector bool char __a, int __b,
+                                             signed char *__c) {
   __builtin_altivec_stvx((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai vec_stvx(vector bool char __a, int __b,
-                                  unsigned char *__c) {
+static __inline__ void __ATTRS_o_ai vec_stvx(vector bool char __a, int __b,
+                                             unsigned char *__c) {
   __builtin_altivec_stvx((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai vec_stvx(vector bool char __a, int __b,
-                                  vector bool char *__c) {
+static __inline__ void __ATTRS_o_ai vec_stvx(vector bool char __a, int __b,
+                                             vector bool char *__c) {
   __builtin_altivec_stvx((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai vec_stvx(vector short __a, int __b,
-                                  vector short *__c) {
+static __inline__ void __ATTRS_o_ai vec_stvx(vector short __a, int __b,
+                                             vector short *__c) {
   __builtin_altivec_stvx((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai vec_stvx(vector short __a, int __b, short *__c) {
+static __inline__ void __ATTRS_o_ai vec_stvx(vector short __a, int __b,
+                                             short *__c) {
   __builtin_altivec_stvx((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai vec_stvx(vector unsigned short __a, int __b,
-                                  vector unsigned short *__c) {
+static __inline__ void __ATTRS_o_ai vec_stvx(vector unsigned short __a, int __b,
+                                             vector unsigned short *__c) {
   __builtin_altivec_stvx((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai vec_stvx(vector unsigned short __a, int __b,
-                                  unsigned short *__c) {
+static __inline__ void __ATTRS_o_ai vec_stvx(vector unsigned short __a, int __b,
+                                             unsigned short *__c) {
   __builtin_altivec_stvx((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai vec_stvx(vector bool short __a, int __b, short *__c) {
+static __inline__ void __ATTRS_o_ai vec_stvx(vector bool short __a, int __b,
+                                             short *__c) {
   __builtin_altivec_stvx((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai vec_stvx(vector bool short __a, int __b,
-                                  unsigned short *__c) {
+static __inline__ void __ATTRS_o_ai vec_stvx(vector bool short __a, int __b,
+                                             unsigned short *__c) {
   __builtin_altivec_stvx((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai vec_stvx(vector bool short __a, int __b,
-                                  vector bool short *__c) {
+static __inline__ void __ATTRS_o_ai vec_stvx(vector bool short __a, int __b,
+                                             vector bool short *__c) {
   __builtin_altivec_stvx((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai vec_stvx(vector pixel __a, int __b, short *__c) {
+static __inline__ void __ATTRS_o_ai vec_stvx(vector pixel __a, int __b,
+                                             short *__c) {
   __builtin_altivec_stvx((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai vec_stvx(vector pixel __a, int __b,
-                                  unsigned short *__c) {
+static __inline__ void __ATTRS_o_ai vec_stvx(vector pixel __a, int __b,
+                                             unsigned short *__c) {
   __builtin_altivec_stvx((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai vec_stvx(vector pixel __a, int __b,
-                                  vector pixel *__c) {
+static __inline__ void __ATTRS_o_ai vec_stvx(vector pixel __a, int __b,
+                                             vector pixel *__c) {
   __builtin_altivec_stvx((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai vec_stvx(vector int __a, int __b, vector int *__c) {
+static __inline__ void __ATTRS_o_ai vec_stvx(vector int __a, int __b,
+                                             vector int *__c) {
   __builtin_altivec_stvx(__a, __b, __c);
 }
 
-static void __ATTRS_o_ai vec_stvx(vector int __a, int __b, int *__c) {
+static __inline__ void __ATTRS_o_ai vec_stvx(vector int __a, int __b,
+                                             int *__c) {
   __builtin_altivec_stvx(__a, __b, __c);
 }
 
-static void __ATTRS_o_ai vec_stvx(vector unsigned int __a, int __b,
-                                  vector unsigned int *__c) {
+static __inline__ void __ATTRS_o_ai vec_stvx(vector unsigned int __a, int __b,
+                                             vector unsigned int *__c) {
   __builtin_altivec_stvx((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai vec_stvx(vector unsigned int __a, int __b,
-                                  unsigned int *__c) {
+static __inline__ void __ATTRS_o_ai vec_stvx(vector unsigned int __a, int __b,
+                                             unsigned int *__c) {
   __builtin_altivec_stvx((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai vec_stvx(vector bool int __a, int __b, int *__c) {
+static __inline__ void __ATTRS_o_ai vec_stvx(vector bool int __a, int __b,
+                                             int *__c) {
   __builtin_altivec_stvx((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai vec_stvx(vector bool int __a, int __b,
-                                  unsigned int *__c) {
+static __inline__ void __ATTRS_o_ai vec_stvx(vector bool int __a, int __b,
+                                             unsigned int *__c) {
   __builtin_altivec_stvx((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai vec_stvx(vector bool int __a, int __b,
-                                  vector bool int *__c) {
+static __inline__ void __ATTRS_o_ai vec_stvx(vector bool int __a, int __b,
+                                             vector bool int *__c) {
   __builtin_altivec_stvx((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai vec_stvx(vector float __a, int __b,
-                                  vector float *__c) {
+static __inline__ void __ATTRS_o_ai vec_stvx(vector float __a, int __b,
+                                             vector float *__c) {
   __builtin_altivec_stvx((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai vec_stvx(vector float __a, int __b, float *__c) {
+static __inline__ void __ATTRS_o_ai vec_stvx(vector float __a, int __b,
+                                             float *__c) {
   __builtin_altivec_stvx((vector int)__a, __b, __c);
 }
 
 /* vec_ste */
 
-static void __ATTRS_o_ai vec_ste(vector signed char __a, int __b,
-                                 signed char *__c) {
+static __inline__ void __ATTRS_o_ai vec_ste(vector signed char __a, int __b,
+                                            signed char *__c) {
   __builtin_altivec_stvebx((vector char)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai vec_ste(vector unsigned char __a, int __b,
-                                 unsigned char *__c) {
+static __inline__ void __ATTRS_o_ai vec_ste(vector unsigned char __a, int __b,
+                                            unsigned char *__c) {
   __builtin_altivec_stvebx((vector char)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai vec_ste(vector bool char __a, int __b,
-                                 signed char *__c) {
+static __inline__ void __ATTRS_o_ai vec_ste(vector bool char __a, int __b,
+                                            signed char *__c) {
   __builtin_altivec_stvebx((vector char)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai vec_ste(vector bool char __a, int __b,
-                                 unsigned char *__c) {
+static __inline__ void __ATTRS_o_ai vec_ste(vector bool char __a, int __b,
+                                            unsigned char *__c) {
   __builtin_altivec_stvebx((vector char)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai vec_ste(vector short __a, int __b, short *__c) {
+static __inline__ void __ATTRS_o_ai vec_ste(vector short __a, int __b,
+                                            short *__c) {
   __builtin_altivec_stvehx(__a, __b, __c);
 }
 
-static void __ATTRS_o_ai vec_ste(vector unsigned short __a, int __b,
-                                 unsigned short *__c) {
+static __inline__ void __ATTRS_o_ai vec_ste(vector unsigned short __a, int __b,
+                                            unsigned short *__c) {
   __builtin_altivec_stvehx((vector short)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai vec_ste(vector bool short __a, int __b, short *__c) {
+static __inline__ void __ATTRS_o_ai vec_ste(vector bool short __a, int __b,
+                                            short *__c) {
   __builtin_altivec_stvehx((vector short)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai vec_ste(vector bool short __a, int __b,
-                                 unsigned short *__c) {
+static __inline__ void __ATTRS_o_ai vec_ste(vector bool short __a, int __b,
+                                            unsigned short *__c) {
   __builtin_altivec_stvehx((vector short)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai vec_ste(vector pixel __a, int __b, short *__c) {
+static __inline__ void __ATTRS_o_ai vec_ste(vector pixel __a, int __b,
+                                            short *__c) {
   __builtin_altivec_stvehx((vector short)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai vec_ste(vector pixel __a, int __b,
-                                 unsigned short *__c) {
+static __inline__ void __ATTRS_o_ai vec_ste(vector pixel __a, int __b,
+                                            unsigned short *__c) {
   __builtin_altivec_stvehx((vector short)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai vec_ste(vector int __a, int __b, int *__c) {
+static __inline__ void __ATTRS_o_ai vec_ste(vector int __a, int __b, int *__c) {
   __builtin_altivec_stvewx(__a, __b, __c);
 }
 
-static void __ATTRS_o_ai vec_ste(vector unsigned int __a, int __b,
-                                 unsigned int *__c) {
+static __inline__ void __ATTRS_o_ai vec_ste(vector unsigned int __a, int __b,
+                                            unsigned int *__c) {
   __builtin_altivec_stvewx((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai vec_ste(vector bool int __a, int __b, int *__c) {
+static __inline__ void __ATTRS_o_ai vec_ste(vector bool int __a, int __b,
+                                            int *__c) {
   __builtin_altivec_stvewx((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai vec_ste(vector bool int __a, int __b,
-                                 unsigned int *__c) {
+static __inline__ void __ATTRS_o_ai vec_ste(vector bool int __a, int __b,
+                                            unsigned int *__c) {
   __builtin_altivec_stvewx((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai vec_ste(vector float __a, int __b, float *__c) {
+static __inline__ void __ATTRS_o_ai vec_ste(vector float __a, int __b,
+                                            float *__c) {
   __builtin_altivec_stvewx((vector int)__a, __b, __c);
 }
 
 /* vec_stvebx */
 
-static void __ATTRS_o_ai vec_stvebx(vector signed char __a, int __b,
-                                    signed char *__c) {
+static __inline__ void __ATTRS_o_ai vec_stvebx(vector signed char __a, int __b,
+                                               signed char *__c) {
   __builtin_altivec_stvebx((vector char)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai vec_stvebx(vector unsigned char __a, int __b,
-                                    unsigned char *__c) {
+static __inline__ void __ATTRS_o_ai vec_stvebx(vector unsigned char __a,
+                                               int __b, unsigned char *__c) {
   __builtin_altivec_stvebx((vector char)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai vec_stvebx(vector bool char __a, int __b,
-                                    signed char *__c) {
+static __inline__ void __ATTRS_o_ai vec_stvebx(vector bool char __a, int __b,
+                                               signed char *__c) {
   __builtin_altivec_stvebx((vector char)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai vec_stvebx(vector bool char __a, int __b,
-                                    unsigned char *__c) {
+static __inline__ void __ATTRS_o_ai vec_stvebx(vector bool char __a, int __b,
+                                               unsigned char *__c) {
   __builtin_altivec_stvebx((vector char)__a, __b, __c);
 }
 
 /* vec_stvehx */
 
-static void __ATTRS_o_ai vec_stvehx(vector short __a, int __b, short *__c) {
+static __inline__ void __ATTRS_o_ai vec_stvehx(vector short __a, int __b,
+                                               short *__c) {
   __builtin_altivec_stvehx(__a, __b, __c);
 }
 
-static void __ATTRS_o_ai vec_stvehx(vector unsigned short __a, int __b,
-                                    unsigned short *__c) {
+static __inline__ void __ATTRS_o_ai vec_stvehx(vector unsigned short __a,
+                                               int __b, unsigned short *__c) {
   __builtin_altivec_stvehx((vector short)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai vec_stvehx(vector bool short __a, int __b,
-                                    short *__c) {
+static __inline__ void __ATTRS_o_ai vec_stvehx(vector bool short __a, int __b,
+                                               short *__c) {
   __builtin_altivec_stvehx((vector short)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai vec_stvehx(vector bool short __a, int __b,
-                                    unsigned short *__c) {
+static __inline__ void __ATTRS_o_ai vec_stvehx(vector bool short __a, int __b,
+                                               unsigned short *__c) {
   __builtin_altivec_stvehx((vector short)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai vec_stvehx(vector pixel __a, int __b, short *__c) {
+static __inline__ void __ATTRS_o_ai vec_stvehx(vector pixel __a, int __b,
+                                               short *__c) {
   __builtin_altivec_stvehx((vector short)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai vec_stvehx(vector pixel __a, int __b,
-                                    unsigned short *__c) {
+static __inline__ void __ATTRS_o_ai vec_stvehx(vector pixel __a, int __b,
+                                               unsigned short *__c) {
   __builtin_altivec_stvehx((vector short)__a, __b, __c);
 }
 
 /* vec_stvewx */
 
-static void __ATTRS_o_ai vec_stvewx(vector int __a, int __b, int *__c) {
+static __inline__ void __ATTRS_o_ai vec_stvewx(vector int __a, int __b,
+                                               int *__c) {
   __builtin_altivec_stvewx(__a, __b, __c);
 }
 
-static void __ATTRS_o_ai vec_stvewx(vector unsigned int __a, int __b,
-                                    unsigned int *__c) {
+static __inline__ void __ATTRS_o_ai vec_stvewx(vector unsigned int __a, int __b,
+                                               unsigned int *__c) {
   __builtin_altivec_stvewx((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai vec_stvewx(vector bool int __a, int __b, int *__c) {
+static __inline__ void __ATTRS_o_ai vec_stvewx(vector bool int __a, int __b,
+                                               int *__c) {
   __builtin_altivec_stvewx((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai vec_stvewx(vector bool int __a, int __b,
-                                    unsigned int *__c) {
+static __inline__ void __ATTRS_o_ai vec_stvewx(vector bool int __a, int __b,
+                                               unsigned int *__c) {
   __builtin_altivec_stvewx((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai vec_stvewx(vector float __a, int __b, float *__c) {
+static __inline__ void __ATTRS_o_ai vec_stvewx(vector float __a, int __b,
+                                               float *__c) {
   __builtin_altivec_stvewx((vector int)__a, __b, __c);
 }
 
 /* vec_stl */
 
-static void __ATTRS_o_ai vec_stl(vector signed char __a, int __b,
-                                 vector signed char *__c) {
+static __inline__ void __ATTRS_o_ai vec_stl(vector signed char __a, int __b,
+                                            vector signed char *__c) {
   __builtin_altivec_stvxl((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai vec_stl(vector signed char __a, int __b,
-                                 signed char *__c) {
+static __inline__ void __ATTRS_o_ai vec_stl(vector signed char __a, int __b,
+                                            signed char *__c) {
   __builtin_altivec_stvxl((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai vec_stl(vector unsigned char __a, int __b,
-                                 vector unsigned char *__c) {
+static __inline__ void __ATTRS_o_ai vec_stl(vector unsigned char __a, int __b,
+                                            vector unsigned char *__c) {
   __builtin_altivec_stvxl((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai vec_stl(vector unsigned char __a, int __b,
-                                 unsigned char *__c) {
+static __inline__ void __ATTRS_o_ai vec_stl(vector unsigned char __a, int __b,
+                                            unsigned char *__c) {
   __builtin_altivec_stvxl((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai vec_stl(vector bool char __a, int __b,
-                                 signed char *__c) {
+static __inline__ void __ATTRS_o_ai vec_stl(vector bool char __a, int __b,
+                                            signed char *__c) {
   __builtin_altivec_stvxl((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai vec_stl(vector bool char __a, int __b,
-                                 unsigned char *__c) {
+static __inline__ void __ATTRS_o_ai vec_stl(vector bool char __a, int __b,
+                                            unsigned char *__c) {
   __builtin_altivec_stvxl((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai vec_stl(vector bool char __a, int __b,
-                                 vector bool char *__c) {
+static __inline__ void __ATTRS_o_ai vec_stl(vector bool char __a, int __b,
+                                            vector bool char *__c) {
   __builtin_altivec_stvxl((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai vec_stl(vector short __a, int __b, vector short *__c) {
+static __inline__ void __ATTRS_o_ai vec_stl(vector short __a, int __b,
+                                            vector short *__c) {
   __builtin_altivec_stvxl((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai vec_stl(vector short __a, int __b, short *__c) {
+static __inline__ void __ATTRS_o_ai vec_stl(vector short __a, int __b,
+                                            short *__c) {
   __builtin_altivec_stvxl((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai vec_stl(vector unsigned short __a, int __b,
-                                 vector unsigned short *__c) {
+static __inline__ void __ATTRS_o_ai vec_stl(vector unsigned short __a, int __b,
+                                            vector unsigned short *__c) {
   __builtin_altivec_stvxl((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai vec_stl(vector unsigned short __a, int __b,
-                                 unsigned short *__c) {
+static __inline__ void __ATTRS_o_ai vec_stl(vector unsigned short __a, int __b,
+                                            unsigned short *__c) {
   __builtin_altivec_stvxl((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai vec_stl(vector bool short __a, int __b, short *__c) {
+static __inline__ void __ATTRS_o_ai vec_stl(vector bool short __a, int __b,
+                                            short *__c) {
   __builtin_altivec_stvxl((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai vec_stl(vector bool short __a, int __b,
-                                 unsigned short *__c) {
+static __inline__ void __ATTRS_o_ai vec_stl(vector bool short __a, int __b,
+                                            unsigned short *__c) {
   __builtin_altivec_stvxl((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai vec_stl(vector bool short __a, int __b,
-                                 vector bool short *__c) {
+static __inline__ void __ATTRS_o_ai vec_stl(vector bool short __a, int __b,
+                                            vector bool short *__c) {
   __builtin_altivec_stvxl((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai vec_stl(vector pixel __a, int __b, short *__c) {
+static __inline__ void __ATTRS_o_ai vec_stl(vector pixel __a, int __b,
+                                            short *__c) {
   __builtin_altivec_stvxl((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai vec_stl(vector pixel __a, int __b,
-                                 unsigned short *__c) {
+static __inline__ void __ATTRS_o_ai vec_stl(vector pixel __a, int __b,
+                                            unsigned short *__c) {
   __builtin_altivec_stvxl((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai vec_stl(vector pixel __a, int __b, vector pixel *__c) {
+static __inline__ void __ATTRS_o_ai vec_stl(vector pixel __a, int __b,
+                                            vector pixel *__c) {
   __builtin_altivec_stvxl((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai vec_stl(vector int __a, int __b, vector int *__c) {
+static __inline__ void __ATTRS_o_ai vec_stl(vector int __a, int __b,
+                                            vector int *__c) {
   __builtin_altivec_stvxl(__a, __b, __c);
 }
 
-static void __ATTRS_o_ai vec_stl(vector int __a, int __b, int *__c) {
+static __inline__ void __ATTRS_o_ai vec_stl(vector int __a, int __b, int *__c) {
   __builtin_altivec_stvxl(__a, __b, __c);
 }
 
-static void __ATTRS_o_ai vec_stl(vector unsigned int __a, int __b,
-                                 vector unsigned int *__c) {
+static __inline__ void __ATTRS_o_ai vec_stl(vector unsigned int __a, int __b,
+                                            vector unsigned int *__c) {
   __builtin_altivec_stvxl((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai vec_stl(vector unsigned int __a, int __b,
-                                 unsigned int *__c) {
+static __inline__ void __ATTRS_o_ai vec_stl(vector unsigned int __a, int __b,
+                                            unsigned int *__c) {
   __builtin_altivec_stvxl((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai vec_stl(vector bool int __a, int __b, int *__c) {
+static __inline__ void __ATTRS_o_ai vec_stl(vector bool int __a, int __b,
+                                            int *__c) {
   __builtin_altivec_stvxl((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai vec_stl(vector bool int __a, int __b,
-                                 unsigned int *__c) {
+static __inline__ void __ATTRS_o_ai vec_stl(vector bool int __a, int __b,
+                                            unsigned int *__c) {
   __builtin_altivec_stvxl((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai vec_stl(vector bool int __a, int __b,
-                                 vector bool int *__c) {
+static __inline__ void __ATTRS_o_ai vec_stl(vector bool int __a, int __b,
+                                            vector bool int *__c) {
   __builtin_altivec_stvxl((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai vec_stl(vector float __a, int __b, vector float *__c) {
+static __inline__ void __ATTRS_o_ai vec_stl(vector float __a, int __b,
+                                            vector float *__c) {
   __builtin_altivec_stvxl((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai vec_stl(vector float __a, int __b, float *__c) {
+static __inline__ void __ATTRS_o_ai vec_stl(vector float __a, int __b,
+                                            float *__c) {
   __builtin_altivec_stvxl((vector int)__a, __b, __c);
 }
 
 /* vec_stvxl */
 
-static void __ATTRS_o_ai vec_stvxl(vector signed char __a, int __b,
-                                   vector signed char *__c) {
+static __inline__ void __ATTRS_o_ai vec_stvxl(vector signed char __a, int __b,
+                                              vector signed char *__c) {
   __builtin_altivec_stvxl((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai vec_stvxl(vector signed char __a, int __b,
-                                   signed char *__c) {
+static __inline__ void __ATTRS_o_ai vec_stvxl(vector signed char __a, int __b,
+                                              signed char *__c) {
   __builtin_altivec_stvxl((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai vec_stvxl(vector unsigned char __a, int __b,
-                                   vector unsigned char *__c) {
+static __inline__ void __ATTRS_o_ai vec_stvxl(vector unsigned char __a, int __b,
+                                              vector unsigned char *__c) {
   __builtin_altivec_stvxl((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai vec_stvxl(vector unsigned char __a, int __b,
-                                   unsigned char *__c) {
+static __inline__ void __ATTRS_o_ai vec_stvxl(vector unsigned char __a, int __b,
+                                              unsigned char *__c) {
   __builtin_altivec_stvxl((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai vec_stvxl(vector bool char __a, int __b,
-                                   signed char *__c) {
+static __inline__ void __ATTRS_o_ai vec_stvxl(vector bool char __a, int __b,
+                                              signed char *__c) {
   __builtin_altivec_stvxl((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai vec_stvxl(vector bool char __a, int __b,
-                                   unsigned char *__c) {
+static __inline__ void __ATTRS_o_ai vec_stvxl(vector bool char __a, int __b,
+                                              unsigned char *__c) {
   __builtin_altivec_stvxl((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai vec_stvxl(vector bool char __a, int __b,
-                                   vector bool char *__c) {
+static __inline__ void __ATTRS_o_ai vec_stvxl(vector bool char __a, int __b,
+                                              vector bool char *__c) {
   __builtin_altivec_stvxl((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai vec_stvxl(vector short __a, int __b,
-                                   vector short *__c) {
+static __inline__ void __ATTRS_o_ai vec_stvxl(vector short __a, int __b,
+                                              vector short *__c) {
   __builtin_altivec_stvxl((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai vec_stvxl(vector short __a, int __b, short *__c) {
+static __inline__ void __ATTRS_o_ai vec_stvxl(vector short __a, int __b,
+                                              short *__c) {
   __builtin_altivec_stvxl((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai vec_stvxl(vector unsigned short __a, int __b,
-                                   vector unsigned short *__c) {
+static __inline__ void __ATTRS_o_ai vec_stvxl(vector unsigned short __a,
+                                              int __b,
+                                              vector unsigned short *__c) {
   __builtin_altivec_stvxl((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai vec_stvxl(vector unsigned short __a, int __b,
-                                   unsigned short *__c) {
+static __inline__ void __ATTRS_o_ai vec_stvxl(vector unsigned short __a,
+                                              int __b, unsigned short *__c) {
   __builtin_altivec_stvxl((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai vec_stvxl(vector bool short __a, int __b, short *__c) {
+static __inline__ void __ATTRS_o_ai vec_stvxl(vector bool short __a, int __b,
+                                              short *__c) {
   __builtin_altivec_stvxl((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai vec_stvxl(vector bool short __a, int __b,
-                                   unsigned short *__c) {
+static __inline__ void __ATTRS_o_ai vec_stvxl(vector bool short __a, int __b,
+                                              unsigned short *__c) {
   __builtin_altivec_stvxl((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai vec_stvxl(vector bool short __a, int __b,
-                                   vector bool short *__c) {
+static __inline__ void __ATTRS_o_ai vec_stvxl(vector bool short __a, int __b,
+                                              vector bool short *__c) {
   __builtin_altivec_stvxl((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai vec_stvxl(vector pixel __a, int __b, short *__c) {
+static __inline__ void __ATTRS_o_ai vec_stvxl(vector pixel __a, int __b,
+                                              short *__c) {
   __builtin_altivec_stvxl((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai vec_stvxl(vector pixel __a, int __b,
-                                   unsigned short *__c) {
+static __inline__ void __ATTRS_o_ai vec_stvxl(vector pixel __a, int __b,
+                                              unsigned short *__c) {
   __builtin_altivec_stvxl((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai vec_stvxl(vector pixel __a, int __b,
-                                   vector pixel *__c) {
+static __inline__ void __ATTRS_o_ai vec_stvxl(vector pixel __a, int __b,
+                                              vector pixel *__c) {
   __builtin_altivec_stvxl((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai vec_stvxl(vector int __a, int __b, vector int *__c) {
+static __inline__ void __ATTRS_o_ai vec_stvxl(vector int __a, int __b,
+                                              vector int *__c) {
   __builtin_altivec_stvxl(__a, __b, __c);
 }
 
-static void __ATTRS_o_ai vec_stvxl(vector int __a, int __b, int *__c) {
+static __inline__ void __ATTRS_o_ai vec_stvxl(vector int __a, int __b,
+                                              int *__c) {
   __builtin_altivec_stvxl(__a, __b, __c);
 }
 
-static void __ATTRS_o_ai vec_stvxl(vector unsigned int __a, int __b,
-                                   vector unsigned int *__c) {
+static __inline__ void __ATTRS_o_ai vec_stvxl(vector unsigned int __a, int __b,
+                                              vector unsigned int *__c) {
   __builtin_altivec_stvxl((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai vec_stvxl(vector unsigned int __a, int __b,
-                                   unsigned int *__c) {
+static __inline__ void __ATTRS_o_ai vec_stvxl(vector unsigned int __a, int __b,
+                                              unsigned int *__c) {
   __builtin_altivec_stvxl((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai vec_stvxl(vector bool int __a, int __b, int *__c) {
+static __inline__ void __ATTRS_o_ai vec_stvxl(vector bool int __a, int __b,
+                                              int *__c) {
   __builtin_altivec_stvxl((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai vec_stvxl(vector bool int __a, int __b,
-                                   unsigned int *__c) {
+static __inline__ void __ATTRS_o_ai vec_stvxl(vector bool int __a, int __b,
+                                              unsigned int *__c) {
   __builtin_altivec_stvxl((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai vec_stvxl(vector bool int __a, int __b,
-                                   vector bool int *__c) {
+static __inline__ void __ATTRS_o_ai vec_stvxl(vector bool int __a, int __b,
+                                              vector bool int *__c) {
   __builtin_altivec_stvxl((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai vec_stvxl(vector float __a, int __b,
-                                   vector float *__c) {
+static __inline__ void __ATTRS_o_ai vec_stvxl(vector float __a, int __b,
+                                              vector float *__c) {
   __builtin_altivec_stvxl((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai vec_stvxl(vector float __a, int __b, float *__c) {
+static __inline__ void __ATTRS_o_ai vec_stvxl(vector float __a, int __b,
+                                              float *__c) {
   __builtin_altivec_stvxl((vector int)__a, __b, __c);
 }
 
 /* vec_sub */
 
-static vector signed char __ATTRS_o_ai vec_sub(vector signed char __a,
-                                               vector signed char __b) {
+static __inline__ vector signed char __ATTRS_o_ai
+vec_sub(vector signed char __a, vector signed char __b) {
   return __a - __b;
 }
 
-static vector signed char __ATTRS_o_ai vec_sub(vector bool char __a,
-                                               vector signed char __b) {
+static __inline__ vector signed char __ATTRS_o_ai
+vec_sub(vector bool char __a, vector signed char __b) {
   return (vector signed char)__a - __b;
 }
 
-static vector signed char __ATTRS_o_ai vec_sub(vector signed char __a,
-                                               vector bool char __b) {
+static __inline__ vector signed char __ATTRS_o_ai
+vec_sub(vector signed char __a, vector bool char __b) {
   return __a - (vector signed char)__b;
 }
 
-static vector unsigned char __ATTRS_o_ai vec_sub(vector unsigned char __a,
-                                                 vector unsigned char __b) {
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_sub(vector unsigned char __a, vector unsigned char __b) {
   return __a - __b;
 }
 
-static vector unsigned char __ATTRS_o_ai vec_sub(vector bool char __a,
-                                                 vector unsigned char __b) {
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_sub(vector bool char __a, vector unsigned char __b) {
   return (vector unsigned char)__a - __b;
 }
 
-static vector unsigned char __ATTRS_o_ai vec_sub(vector unsigned char __a,
-                                                 vector bool char __b) {
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_sub(vector unsigned char __a, vector bool char __b) {
   return __a - (vector unsigned char)__b;
 }
 
-static vector short __ATTRS_o_ai vec_sub(vector short __a, vector short __b) {
+static __inline__ vector short __ATTRS_o_ai vec_sub(vector short __a,
+                                                    vector short __b) {
   return __a - __b;
 }
 
-static vector short __ATTRS_o_ai vec_sub(vector bool short __a,
-                                         vector short __b) {
+static __inline__ vector short __ATTRS_o_ai vec_sub(vector bool short __a,
+                                                    vector short __b) {
   return (vector short)__a - __b;
 }
 
-static vector short __ATTRS_o_ai vec_sub(vector short __a,
-                                         vector bool short __b) {
+static __inline__ vector short __ATTRS_o_ai vec_sub(vector short __a,
+                                                    vector bool short __b) {
   return __a - (vector short)__b;
 }
 
-static vector unsigned short __ATTRS_o_ai vec_sub(vector unsigned short __a,
-                                                  vector unsigned short __b) {
+static __inline__ vector unsigned short __ATTRS_o_ai
+vec_sub(vector unsigned short __a, vector unsigned short __b) {
   return __a - __b;
 }
 
-static vector unsigned short __ATTRS_o_ai vec_sub(vector bool short __a,
-                                                  vector unsigned short __b) {
+static __inline__ vector unsigned short __ATTRS_o_ai
+vec_sub(vector bool short __a, vector unsigned short __b) {
   return (vector unsigned short)__a - __b;
 }
 
-static vector unsigned short __ATTRS_o_ai vec_sub(vector unsigned short __a,
-                                                  vector bool short __b) {
+static __inline__ vector unsigned short __ATTRS_o_ai
+vec_sub(vector unsigned short __a, vector bool short __b) {
   return __a - (vector unsigned short)__b;
 }
 
-static vector int __ATTRS_o_ai vec_sub(vector int __a, vector int __b) {
+static __inline__ vector int __ATTRS_o_ai vec_sub(vector int __a,
+                                                  vector int __b) {
   return __a - __b;
 }
 
-static vector int __ATTRS_o_ai vec_sub(vector bool int __a, vector int __b) {
+static __inline__ vector int __ATTRS_o_ai vec_sub(vector bool int __a,
+                                                  vector int __b) {
   return (vector int)__a - __b;
 }
 
-static vector int __ATTRS_o_ai vec_sub(vector int __a, vector bool int __b) {
+static __inline__ vector int __ATTRS_o_ai vec_sub(vector int __a,
+                                                  vector bool int __b) {
   return __a - (vector int)__b;
 }
 
-static vector unsigned int __ATTRS_o_ai vec_sub(vector unsigned int __a,
-                                                vector unsigned int __b) {
+static __inline__ vector unsigned int __ATTRS_o_ai
+vec_sub(vector unsigned int __a, vector unsigned int __b) {
   return __a - __b;
 }
 
-static vector unsigned int __ATTRS_o_ai vec_sub(vector bool int __a,
-                                                vector unsigned int __b) {
+static __inline__ vector unsigned int __ATTRS_o_ai
+vec_sub(vector bool int __a, vector unsigned int __b) {
   return (vector unsigned int)__a - __b;
 }
 
-static vector unsigned int __ATTRS_o_ai vec_sub(vector unsigned int __a,
-                                                vector bool int __b) {
+static __inline__ vector unsigned int __ATTRS_o_ai
+vec_sub(vector unsigned int __a, vector bool int __b) {
   return __a - (vector unsigned int)__b;
 }
 
 #if defined(__POWER8_VECTOR__) && defined(__powerpc64__)
-static vector signed __int128 __ATTRS_o_ai vec_sub(vector signed __int128 __a,
-                                                   vector signed __int128 __b) {
+static __inline__ vector signed __int128 __ATTRS_o_ai
+vec_sub(vector signed __int128 __a, vector signed __int128 __b) {
   return __a - __b;
 }
 
-static vector unsigned __int128 __ATTRS_o_ai
+static __inline__ vector unsigned __int128 __ATTRS_o_ai
 vec_sub(vector unsigned __int128 __a, vector unsigned __int128 __b) {
   return __a - __b;
 }
 #endif // defined(__POWER8_VECTOR__) && defined(__powerpc64__)
 
 #ifdef __VSX__
-static vector signed long long __ATTRS_o_ai
+static __inline__ vector signed long long __ATTRS_o_ai
 vec_sub(vector signed long long __a, vector signed long long __b) {
   return __a - __b;
 }
 
-static vector unsigned long long __ATTRS_o_ai
+static __inline__ vector unsigned long long __ATTRS_o_ai
 vec_sub(vector unsigned long long __a, vector unsigned long long __b) {
   return __a - __b;
 }
 
-static vector double __ATTRS_o_ai
-vec_sub(vector double __a, vector double __b) {
+static __inline__ vector double __ATTRS_o_ai vec_sub(vector double __a,
+                                                     vector double __b) {
   return __a - __b;
 }
 #endif
 
-static vector float __ATTRS_o_ai vec_sub(vector float __a, vector float __b) {
+static __inline__ vector float __ATTRS_o_ai vec_sub(vector float __a,
+                                                    vector float __b) {
   return __a - __b;
 }
 
@@ -9344,33 +9471,33 @@ static vector float __ATTRS_o_ai vec_sub(vector float __a, vector float __b) {
 
 #define __builtin_altivec_vsububm vec_vsububm
 
-static vector signed char __ATTRS_o_ai vec_vsububm(vector signed char __a,
-                                                   vector signed char __b) {
+static __inline__ vector signed char __ATTRS_o_ai
+vec_vsububm(vector signed char __a, vector signed char __b) {
   return __a - __b;
 }
 
-static vector signed char __ATTRS_o_ai vec_vsububm(vector bool char __a,
-                                                   vector signed char __b) {
+static __inline__ vector signed char __ATTRS_o_ai
+vec_vsububm(vector bool char __a, vector signed char __b) {
   return (vector signed char)__a - __b;
 }
 
-static vector signed char __ATTRS_o_ai vec_vsububm(vector signed char __a,
-                                                   vector bool char __b) {
+static __inline__ vector signed char __ATTRS_o_ai
+vec_vsububm(vector signed char __a, vector bool char __b) {
   return __a - (vector signed char)__b;
 }
 
-static vector unsigned char __ATTRS_o_ai vec_vsububm(vector unsigned char __a,
-                                                     vector unsigned char __b) {
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_vsububm(vector unsigned char __a, vector unsigned char __b) {
   return __a - __b;
 }
 
-static vector unsigned char __ATTRS_o_ai vec_vsububm(vector bool char __a,
-                                                     vector unsigned char __b) {
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_vsububm(vector bool char __a, vector unsigned char __b) {
   return (vector unsigned char)__a - __b;
 }
 
-static vector unsigned char __ATTRS_o_ai vec_vsububm(vector unsigned char __a,
-                                                     vector bool char __b) {
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_vsububm(vector unsigned char __a, vector bool char __b) {
   return __a - (vector unsigned char)__b;
 }
 
@@ -9378,33 +9505,33 @@ static vector unsigned char __ATTRS_o_ai vec_vsububm(vector unsigned char __a,
 
 #define __builtin_altivec_vsubuhm vec_vsubuhm
 
-static vector short __ATTRS_o_ai vec_vsubuhm(vector short __a,
-                                             vector short __b) {
+static __inline__ vector short __ATTRS_o_ai vec_vsubuhm(vector short __a,
+                                                        vector short __b) {
   return __a - __b;
 }
 
-static vector short __ATTRS_o_ai vec_vsubuhm(vector bool short __a,
-                                             vector short __b) {
+static __inline__ vector short __ATTRS_o_ai vec_vsubuhm(vector bool short __a,
+                                                        vector short __b) {
   return (vector short)__a - __b;
 }
 
-static vector short __ATTRS_o_ai vec_vsubuhm(vector short __a,
-                                             vector bool short __b) {
+static __inline__ vector short __ATTRS_o_ai vec_vsubuhm(vector short __a,
+                                                        vector bool short __b) {
   return __a - (vector short)__b;
 }
 
-static vector unsigned short __ATTRS_o_ai
+static __inline__ vector unsigned short __ATTRS_o_ai
 vec_vsubuhm(vector unsigned short __a, vector unsigned short __b) {
   return __a - __b;
 }
 
-static vector unsigned short __ATTRS_o_ai
+static __inline__ vector unsigned short __ATTRS_o_ai
 vec_vsubuhm(vector bool short __a, vector unsigned short __b) {
   return (vector unsigned short)__a - __b;
 }
 
-static vector unsigned short __ATTRS_o_ai vec_vsubuhm(vector unsigned short __a,
-                                                      vector bool short __b) {
+static __inline__ vector unsigned short __ATTRS_o_ai
+vec_vsubuhm(vector unsigned short __a, vector bool short __b) {
   return __a - (vector unsigned short)__b;
 }
 
@@ -9412,32 +9539,33 @@ static vector unsigned short __ATTRS_o_ai vec_vsubuhm(vector unsigned short __a,
 
 #define __builtin_altivec_vsubuwm vec_vsubuwm
 
-static vector int __ATTRS_o_ai vec_vsubuwm(vector int __a, vector int __b) {
+static __inline__ vector int __ATTRS_o_ai vec_vsubuwm(vector int __a,
+                                                      vector int __b) {
   return __a - __b;
 }
 
-static vector int __ATTRS_o_ai vec_vsubuwm(vector bool int __a,
-                                           vector int __b) {
+static __inline__ vector int __ATTRS_o_ai vec_vsubuwm(vector bool int __a,
+                                                      vector int __b) {
   return (vector int)__a - __b;
 }
 
-static vector int __ATTRS_o_ai vec_vsubuwm(vector int __a,
-                                           vector bool int __b) {
+static __inline__ vector int __ATTRS_o_ai vec_vsubuwm(vector int __a,
+                                                      vector bool int __b) {
   return __a - (vector int)__b;
 }
 
-static vector unsigned int __ATTRS_o_ai vec_vsubuwm(vector unsigned int __a,
-                                                    vector unsigned int __b) {
+static __inline__ vector unsigned int __ATTRS_o_ai
+vec_vsubuwm(vector unsigned int __a, vector unsigned int __b) {
   return __a - __b;
 }
 
-static vector unsigned int __ATTRS_o_ai vec_vsubuwm(vector bool int __a,
-                                                    vector unsigned int __b) {
+static __inline__ vector unsigned int __ATTRS_o_ai
+vec_vsubuwm(vector bool int __a, vector unsigned int __b) {
   return (vector unsigned int)__a - __b;
 }
 
-static vector unsigned int __ATTRS_o_ai vec_vsubuwm(vector unsigned int __a,
-                                                    vector bool int __b) {
+static __inline__ vector unsigned int __ATTRS_o_ai
+vec_vsubuwm(vector unsigned int __a, vector bool int __b) {
   return __a - (vector unsigned int)__b;
 }
 
@@ -9445,25 +9573,25 @@ static vector unsigned int __ATTRS_o_ai vec_vsubuwm(vector unsigned int __a,
 
 #define __builtin_altivec_vsubfp vec_vsubfp
 
-static vector float __attribute__((__always_inline__))
+static __inline__ vector float __attribute__((__always_inline__))
 vec_vsubfp(vector float __a, vector float __b) {
   return __a - __b;
 }
 
 /* vec_subc */
 
-static vector unsigned int __ATTRS_o_ai vec_subc(vector unsigned int __a,
-                                                 vector unsigned int __b) {
+static __inline__ vector unsigned int __ATTRS_o_ai
+vec_subc(vector unsigned int __a, vector unsigned int __b) {
   return __builtin_altivec_vsubcuw(__a, __b);
 }
 
 #if defined(__POWER8_VECTOR__) && defined(__powerpc64__)
-static vector unsigned __int128 __ATTRS_o_ai
+static __inline__ vector unsigned __int128 __ATTRS_o_ai
 vec_subc(vector unsigned __int128 __a, vector unsigned __int128 __b) {
   return __builtin_altivec_vsubcuq(__a, __b);
 }
 
-static vector signed __int128 __ATTRS_o_ai
+static __inline__ vector signed __int128 __ATTRS_o_ai
 vec_subc(vector signed __int128 __a, vector signed __int128 __b) {
   return __builtin_altivec_vsubcuq(__a, __b);
 }
@@ -9471,222 +9599,227 @@ vec_subc(vector signed __int128 __a, vector signed __int128 __b) {
 
 /* vec_vsubcuw */
 
-static vector unsigned int __attribute__((__always_inline__))
+static __inline__ vector unsigned int __attribute__((__always_inline__))
 vec_vsubcuw(vector unsigned int __a, vector unsigned int __b) {
   return __builtin_altivec_vsubcuw(__a, __b);
 }
 
 /* vec_subs */
 
-static vector signed char __ATTRS_o_ai vec_subs(vector signed char __a,
-                                                vector signed char __b) {
+static __inline__ vector signed char __ATTRS_o_ai
+vec_subs(vector signed char __a, vector signed char __b) {
   return __builtin_altivec_vsubsbs(__a, __b);
 }
 
-static vector signed char __ATTRS_o_ai vec_subs(vector bool char __a,
-                                                vector signed char __b) {
+static __inline__ vector signed char __ATTRS_o_ai
+vec_subs(vector bool char __a, vector signed char __b) {
   return __builtin_altivec_vsubsbs((vector signed char)__a, __b);
 }
 
-static vector signed char __ATTRS_o_ai vec_subs(vector signed char __a,
-                                                vector bool char __b) {
+static __inline__ vector signed char __ATTRS_o_ai
+vec_subs(vector signed char __a, vector bool char __b) {
   return __builtin_altivec_vsubsbs(__a, (vector signed char)__b);
 }
 
-static vector unsigned char __ATTRS_o_ai vec_subs(vector unsigned char __a,
-                                                  vector unsigned char __b) {
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_subs(vector unsigned char __a, vector unsigned char __b) {
   return __builtin_altivec_vsububs(__a, __b);
 }
 
-static vector unsigned char __ATTRS_o_ai vec_subs(vector bool char __a,
-                                                  vector unsigned char __b) {
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_subs(vector bool char __a, vector unsigned char __b) {
   return __builtin_altivec_vsububs((vector unsigned char)__a, __b);
 }
 
-static vector unsigned char __ATTRS_o_ai vec_subs(vector unsigned char __a,
-                                                  vector bool char __b) {
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_subs(vector unsigned char __a, vector bool char __b) {
   return __builtin_altivec_vsububs(__a, (vector unsigned char)__b);
 }
 
-static vector short __ATTRS_o_ai vec_subs(vector short __a, vector short __b) {
+static __inline__ vector short __ATTRS_o_ai vec_subs(vector short __a,
+                                                     vector short __b) {
   return __builtin_altivec_vsubshs(__a, __b);
 }
 
-static vector short __ATTRS_o_ai vec_subs(vector bool short __a,
-                                          vector short __b) {
+static __inline__ vector short __ATTRS_o_ai vec_subs(vector bool short __a,
+                                                     vector short __b) {
   return __builtin_altivec_vsubshs((vector short)__a, __b);
 }
 
-static vector short __ATTRS_o_ai vec_subs(vector short __a,
-                                          vector bool short __b) {
+static __inline__ vector short __ATTRS_o_ai vec_subs(vector short __a,
+                                                     vector bool short __b) {
   return __builtin_altivec_vsubshs(__a, (vector short)__b);
 }
 
-static vector unsigned short __ATTRS_o_ai vec_subs(vector unsigned short __a,
-                                                   vector unsigned short __b) {
+static __inline__ vector unsigned short __ATTRS_o_ai
+vec_subs(vector unsigned short __a, vector unsigned short __b) {
   return __builtin_altivec_vsubuhs(__a, __b);
 }
 
-static vector unsigned short __ATTRS_o_ai vec_subs(vector bool short __a,
-                                                   vector unsigned short __b) {
+static __inline__ vector unsigned short __ATTRS_o_ai
+vec_subs(vector bool short __a, vector unsigned short __b) {
   return __builtin_altivec_vsubuhs((vector unsigned short)__a, __b);
 }
 
-static vector unsigned short __ATTRS_o_ai vec_subs(vector unsigned short __a,
-                                                   vector bool short __b) {
+static __inline__ vector unsigned short __ATTRS_o_ai
+vec_subs(vector unsigned short __a, vector bool short __b) {
   return __builtin_altivec_vsubuhs(__a, (vector unsigned short)__b);
 }
 
-static vector int __ATTRS_o_ai vec_subs(vector int __a, vector int __b) {
+static __inline__ vector int __ATTRS_o_ai vec_subs(vector int __a,
+                                                   vector int __b) {
   return __builtin_altivec_vsubsws(__a, __b);
 }
 
-static vector int __ATTRS_o_ai vec_subs(vector bool int __a, vector int __b) {
+static __inline__ vector int __ATTRS_o_ai vec_subs(vector bool int __a,
+                                                   vector int __b) {
   return __builtin_altivec_vsubsws((vector int)__a, __b);
 }
 
-static vector int __ATTRS_o_ai vec_subs(vector int __a, vector bool int __b) {
+static __inline__ vector int __ATTRS_o_ai vec_subs(vector int __a,
+                                                   vector bool int __b) {
   return __builtin_altivec_vsubsws(__a, (vector int)__b);
 }
 
-static vector unsigned int __ATTRS_o_ai vec_subs(vector unsigned int __a,
-                                                 vector unsigned int __b) {
+static __inline__ vector unsigned int __ATTRS_o_ai
+vec_subs(vector unsigned int __a, vector unsigned int __b) {
   return __builtin_altivec_vsubuws(__a, __b);
 }
 
-static vector unsigned int __ATTRS_o_ai vec_subs(vector bool int __a,
-                                                 vector unsigned int __b) {
+static __inline__ vector unsigned int __ATTRS_o_ai
+vec_subs(vector bool int __a, vector unsigned int __b) {
   return __builtin_altivec_vsubuws((vector unsigned int)__a, __b);
 }
 
-static vector unsigned int __ATTRS_o_ai vec_subs(vector unsigned int __a,
-                                                 vector bool int __b) {
+static __inline__ vector unsigned int __ATTRS_o_ai
+vec_subs(vector unsigned int __a, vector bool int __b) {
   return __builtin_altivec_vsubuws(__a, (vector unsigned int)__b);
 }
 
 /* vec_vsubsbs */
 
-static vector signed char __ATTRS_o_ai vec_vsubsbs(vector signed char __a,
-                                                   vector signed char __b) {
+static __inline__ vector signed char __ATTRS_o_ai
+vec_vsubsbs(vector signed char __a, vector signed char __b) {
   return __builtin_altivec_vsubsbs(__a, __b);
 }
 
-static vector signed char __ATTRS_o_ai vec_vsubsbs(vector bool char __a,
-                                                   vector signed char __b) {
+static __inline__ vector signed char __ATTRS_o_ai
+vec_vsubsbs(vector bool char __a, vector signed char __b) {
   return __builtin_altivec_vsubsbs((vector signed char)__a, __b);
 }
 
-static vector signed char __ATTRS_o_ai vec_vsubsbs(vector signed char __a,
-                                                   vector bool char __b) {
+static __inline__ vector signed char __ATTRS_o_ai
+vec_vsubsbs(vector signed char __a, vector bool char __b) {
   return __builtin_altivec_vsubsbs(__a, (vector signed char)__b);
 }
 
 /* vec_vsububs */
 
-static vector unsigned char __ATTRS_o_ai vec_vsububs(vector unsigned char __a,
-                                                     vector unsigned char __b) {
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_vsububs(vector unsigned char __a, vector unsigned char __b) {
   return __builtin_altivec_vsububs(__a, __b);
 }
 
-static vector unsigned char __ATTRS_o_ai vec_vsububs(vector bool char __a,
-                                                     vector unsigned char __b) {
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_vsububs(vector bool char __a, vector unsigned char __b) {
   return __builtin_altivec_vsububs((vector unsigned char)__a, __b);
 }
 
-static vector unsigned char __ATTRS_o_ai vec_vsububs(vector unsigned char __a,
-                                                     vector bool char __b) {
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_vsububs(vector unsigned char __a, vector bool char __b) {
   return __builtin_altivec_vsububs(__a, (vector unsigned char)__b);
 }
 
 /* vec_vsubshs */
 
-static vector short __ATTRS_o_ai vec_vsubshs(vector short __a,
-                                             vector short __b) {
+static __inline__ vector short __ATTRS_o_ai vec_vsubshs(vector short __a,
+                                                        vector short __b) {
   return __builtin_altivec_vsubshs(__a, __b);
 }
 
-static vector short __ATTRS_o_ai vec_vsubshs(vector bool short __a,
-                                             vector short __b) {
+static __inline__ vector short __ATTRS_o_ai vec_vsubshs(vector bool short __a,
+                                                        vector short __b) {
   return __builtin_altivec_vsubshs((vector short)__a, __b);
 }
 
-static vector short __ATTRS_o_ai vec_vsubshs(vector short __a,
-                                             vector bool short __b) {
+static __inline__ vector short __ATTRS_o_ai vec_vsubshs(vector short __a,
+                                                        vector bool short __b) {
   return __builtin_altivec_vsubshs(__a, (vector short)__b);
 }
 
 /* vec_vsubuhs */
 
-static vector unsigned short __ATTRS_o_ai
+static __inline__ vector unsigned short __ATTRS_o_ai
 vec_vsubuhs(vector unsigned short __a, vector unsigned short __b) {
   return __builtin_altivec_vsubuhs(__a, __b);
 }
 
-static vector unsigned short __ATTRS_o_ai
+static __inline__ vector unsigned short __ATTRS_o_ai
 vec_vsubuhs(vector bool short __a, vector unsigned short __b) {
   return __builtin_altivec_vsubuhs((vector unsigned short)__a, __b);
 }
 
-static vector unsigned short __ATTRS_o_ai vec_vsubuhs(vector unsigned short __a,
-                                                      vector bool short __b) {
+static __inline__ vector unsigned short __ATTRS_o_ai
+vec_vsubuhs(vector unsigned short __a, vector bool short __b) {
   return __builtin_altivec_vsubuhs(__a, (vector unsigned short)__b);
 }
 
 /* vec_vsubsws */
 
-static vector int __ATTRS_o_ai vec_vsubsws(vector int __a, vector int __b) {
+static __inline__ vector int __ATTRS_o_ai vec_vsubsws(vector int __a,
+                                                      vector int __b) {
   return __builtin_altivec_vsubsws(__a, __b);
 }
 
-static vector int __ATTRS_o_ai vec_vsubsws(vector bool int __a,
-                                           vector int __b) {
+static __inline__ vector int __ATTRS_o_ai vec_vsubsws(vector bool int __a,
+                                                      vector int __b) {
   return __builtin_altivec_vsubsws((vector int)__a, __b);
 }
 
-static vector int __ATTRS_o_ai vec_vsubsws(vector int __a,
-                                           vector bool int __b) {
+static __inline__ vector int __ATTRS_o_ai vec_vsubsws(vector int __a,
+                                                      vector bool int __b) {
   return __builtin_altivec_vsubsws(__a, (vector int)__b);
 }
 
 /* vec_vsubuws */
 
-static vector unsigned int __ATTRS_o_ai vec_vsubuws(vector unsigned int __a,
-                                                    vector unsigned int __b) {
+static __inline__ vector unsigned int __ATTRS_o_ai
+vec_vsubuws(vector unsigned int __a, vector unsigned int __b) {
   return __builtin_altivec_vsubuws(__a, __b);
 }
 
-static vector unsigned int __ATTRS_o_ai vec_vsubuws(vector bool int __a,
-                                                    vector unsigned int __b) {
+static __inline__ vector unsigned int __ATTRS_o_ai
+vec_vsubuws(vector bool int __a, vector unsigned int __b) {
   return __builtin_altivec_vsubuws((vector unsigned int)__a, __b);
 }
 
-static vector unsigned int __ATTRS_o_ai vec_vsubuws(vector unsigned int __a,
-                                                    vector bool int __b) {
+static __inline__ vector unsigned int __ATTRS_o_ai
+vec_vsubuws(vector unsigned int __a, vector bool int __b) {
   return __builtin_altivec_vsubuws(__a, (vector unsigned int)__b);
 }
 
 #if defined(__POWER8_VECTOR__) && defined(__powerpc64__)
 /* vec_vsubuqm */
 
-static vector signed __int128 __ATTRS_o_ai
+static __inline__ vector signed __int128 __ATTRS_o_ai
 vec_vsubuqm(vector signed __int128 __a, vector signed __int128 __b) {
   return __a - __b;
 }
 
-static vector unsigned __int128 __ATTRS_o_ai
+static __inline__ vector unsigned __int128 __ATTRS_o_ai
 vec_vsubuqm(vector unsigned __int128 __a, vector unsigned __int128 __b) {
   return __a - __b;
 }
 
 /* vec_vsubeuqm */
 
-static vector signed __int128 __ATTRS_o_ai
+static __inline__ vector signed __int128 __ATTRS_o_ai
 vec_vsubeuqm(vector signed __int128 __a, vector signed __int128 __b,
              vector signed __int128 __c) {
   return __builtin_altivec_vsubeuqm(__a, __b, __c);
 }
 
-static vector unsigned __int128 __ATTRS_o_ai
+static __inline__ vector unsigned __int128 __ATTRS_o_ai
 vec_vsubeuqm(vector unsigned __int128 __a, vector unsigned __int128 __b,
              vector unsigned __int128 __c) {
   return __builtin_altivec_vsubeuqm(__a, __b, __c);
@@ -9694,25 +9827,25 @@ vec_vsubeuqm(vector unsigned __int128 __a, vector unsigned __int128 __b,
 
 /* vec_vsubcuq */
 
-static vector signed __int128 __ATTRS_o_ai
+static __inline__ vector signed __int128 __ATTRS_o_ai
 vec_vsubcuq(vector signed __int128 __a, vector signed __int128 __b) {
   return __builtin_altivec_vsubcuq(__a, __b);
 }
 
-static vector unsigned __int128 __ATTRS_o_ai
+static __inline__ vector unsigned __int128 __ATTRS_o_ai
 vec_vsubcuq(vector unsigned __int128 __a, vector unsigned __int128 __b) {
   return __builtin_altivec_vsubcuq(__a, __b);
 }
 
 /* vec_vsubecuq */
 
-static vector signed __int128 __ATTRS_o_ai
+static __inline__ vector signed __int128 __ATTRS_o_ai
 vec_vsubecuq(vector signed __int128 __a, vector signed __int128 __b,
              vector signed __int128 __c) {
   return __builtin_altivec_vsubecuq(__a, __b, __c);
 }
 
-static vector unsigned __int128 __ATTRS_o_ai
+static __inline__ vector unsigned __int128 __ATTRS_o_ai
 vec_vsubecuq(vector unsigned __int128 __a, vector unsigned __int128 __b,
              vector unsigned __int128 __c) {
   return __builtin_altivec_vsubecuq(__a, __b, __c);
@@ -9721,38 +9854,38 @@ vec_vsubecuq(vector unsigned __int128 __a, vector unsigned __int128 __b,
 
 /* vec_sum4s */
 
-static vector int __ATTRS_o_ai vec_sum4s(vector signed char __a,
-                                         vector int __b) {
+static __inline__ vector int __ATTRS_o_ai vec_sum4s(vector signed char __a,
+                                                    vector int __b) {
   return __builtin_altivec_vsum4sbs(__a, __b);
 }
 
-static vector unsigned int __ATTRS_o_ai vec_sum4s(vector unsigned char __a,
-                                                  vector unsigned int __b) {
+static __inline__ vector unsigned int __ATTRS_o_ai
+vec_sum4s(vector unsigned char __a, vector unsigned int __b) {
   return __builtin_altivec_vsum4ubs(__a, __b);
 }
 
-static vector int __ATTRS_o_ai vec_sum4s(vector signed short __a,
-                                         vector int __b) {
+static __inline__ vector int __ATTRS_o_ai vec_sum4s(vector signed short __a,
+                                                    vector int __b) {
   return __builtin_altivec_vsum4shs(__a, __b);
 }
 
 /* vec_vsum4sbs */
 
-static vector int __attribute__((__always_inline__))
+static __inline__ vector int __attribute__((__always_inline__))
 vec_vsum4sbs(vector signed char __a, vector int __b) {
   return __builtin_altivec_vsum4sbs(__a, __b);
 }
 
 /* vec_vsum4ubs */
 
-static vector unsigned int __attribute__((__always_inline__))
+static __inline__ vector unsigned int __attribute__((__always_inline__))
 vec_vsum4ubs(vector unsigned char __a, vector unsigned int __b) {
   return __builtin_altivec_vsum4ubs(__a, __b);
 }
 
 /* vec_vsum4shs */
 
-static vector int __attribute__((__always_inline__))
+static __inline__ vector int __attribute__((__always_inline__))
 vec_vsum4shs(vector signed short __a, vector int __b) {
   return __builtin_altivec_vsum4shs(__a, __b);
 }
@@ -9765,7 +9898,7 @@ vec_vsum4shs(vector signed short __a, vector int __b) {
    programmer wants elements 1 and 3 in both cases, so for little
    endian we must perform some permutes.  */
 
-static vector signed int __attribute__((__always_inline__))
+static __inline__ vector signed int __attribute__((__always_inline__))
 vec_sum2s(vector int __a, vector int __b) {
 #ifdef __LITTLE_ENDIAN__
   vector int __c = (vector signed int)vec_perm(
@@ -9782,7 +9915,7 @@ vec_sum2s(vector int __a, vector int __b) {
 
 /* vec_vsum2sws */
 
-static vector signed int __attribute__((__always_inline__))
+static __inline__ vector signed int __attribute__((__always_inline__))
 vec_vsum2sws(vector int __a, vector int __b) {
 #ifdef __LITTLE_ENDIAN__
   vector int __c = (vector signed int)vec_perm(
@@ -9805,7 +9938,7 @@ vec_vsum2sws(vector int __a, vector int __b) {
    wants element 3 in both cases, so for little endian we must perform
    some permutes.  */
 
-static vector signed int __attribute__((__always_inline__))
+static __inline__ vector signed int __attribute__((__always_inline__))
 vec_sums(vector signed int __a, vector signed int __b) {
 #ifdef __LITTLE_ENDIAN__
   __b = (vector signed int)vec_splat(__b, 3);
@@ -9818,7 +9951,7 @@ vec_sums(vector signed int __a, vector signed int __b) {
 
 /* vec_vsumsws */
 
-static vector signed int __attribute__((__always_inline__))
+static __inline__ vector signed int __attribute__((__always_inline__))
 vec_vsumsws(vector signed int __a, vector signed int __b) {
 #ifdef __LITTLE_ENDIAN__
   __b = (vector signed int)vec_splat(__b, 3);
@@ -9831,8 +9964,7 @@ vec_vsumsws(vector signed int __a, vector signed int __b) {
 
 /* vec_trunc */
 
-static vector float __ATTRS_o_ai
-vec_trunc(vector float __a) {
+static __inline__ vector float __ATTRS_o_ai vec_trunc(vector float __a) {
 #ifdef __VSX__
   return __builtin_vsx_xvrspiz(__a);
 #else
@@ -9841,14 +9973,14 @@ vec_trunc(vector float __a) {
 }
 
 #ifdef __VSX__
-static vector double __ATTRS_o_ai vec_trunc(vector double __a) {
+static __inline__ vector double __ATTRS_o_ai vec_trunc(vector double __a) {
   return __builtin_vsx_xvrdpiz(__a);
 }
 #endif
 
 /* vec_vrfiz */
 
-static vector float __attribute__((__always_inline__))
+static __inline__ vector float __attribute__((__always_inline__))
 vec_vrfiz(vector float __a) {
   return __builtin_altivec_vrfiz(__a);
 }
@@ -9858,7 +9990,8 @@ vec_vrfiz(vector float __a) {
 /* The vector unpack instructions all have a big-endian bias, so for
    little endian we must reverse the meanings of "high" and "low."  */
 
-static vector short __ATTRS_o_ai vec_unpackh(vector signed char __a) {
+static __inline__ vector short __ATTRS_o_ai
+vec_unpackh(vector signed char __a) {
 #ifdef __LITTLE_ENDIAN__
   return __builtin_altivec_vupklsb((vector char)__a);
 #else
@@ -9866,7 +9999,8 @@ static vector short __ATTRS_o_ai vec_unpackh(vector signed char __a) {
 #endif
 }
 
-static vector bool short __ATTRS_o_ai vec_unpackh(vector bool char __a) {
+static __inline__ vector bool short __ATTRS_o_ai
+vec_unpackh(vector bool char __a) {
 #ifdef __LITTLE_ENDIAN__
   return (vector bool short)__builtin_altivec_vupklsb((vector char)__a);
 #else
@@ -9874,7 +10008,7 @@ static vector bool short __ATTRS_o_ai vec_unpackh(vector bool char __a) {
 #endif
 }
 
-static vector int __ATTRS_o_ai vec_unpackh(vector short __a) {
+static __inline__ vector int __ATTRS_o_ai vec_unpackh(vector short __a) {
 #ifdef __LITTLE_ENDIAN__
   return __builtin_altivec_vupklsh(__a);
 #else
@@ -9882,7 +10016,8 @@ static vector int __ATTRS_o_ai vec_unpackh(vector short __a) {
 #endif
 }
 
-static vector bool int __ATTRS_o_ai vec_unpackh(vector bool short __a) {
+static __inline__ vector bool int __ATTRS_o_ai
+vec_unpackh(vector bool short __a) {
 #ifdef __LITTLE_ENDIAN__
   return (vector bool int)__builtin_altivec_vupklsh((vector short)__a);
 #else
@@ -9890,7 +10025,8 @@ static vector bool int __ATTRS_o_ai vec_unpackh(vector bool short __a) {
 #endif
 }
 
-static vector unsigned int __ATTRS_o_ai vec_unpackh(vector pixel __a) {
+static __inline__ vector unsigned int __ATTRS_o_ai
+vec_unpackh(vector pixel __a) {
 #ifdef __LITTLE_ENDIAN__
   return (vector unsigned int)__builtin_altivec_vupklpx((vector short)__a);
 #else
@@ -9899,7 +10035,7 @@ static vector unsigned int __ATTRS_o_ai vec_unpackh(vector pixel __a) {
 }
 
 #ifdef __POWER8_VECTOR__
-static vector long long __ATTRS_o_ai vec_unpackh(vector int __a) {
+static __inline__ vector long long __ATTRS_o_ai vec_unpackh(vector int __a) {
 #ifdef __LITTLE_ENDIAN__
   return __builtin_altivec_vupklsw(__a);
 #else
@@ -9907,7 +10043,8 @@ static vector long long __ATTRS_o_ai vec_unpackh(vector int __a) {
 #endif
 }
 
-static vector bool long long __ATTRS_o_ai vec_unpackh(vector bool int __a) {
+static __inline__ vector bool long long __ATTRS_o_ai
+vec_unpackh(vector bool int __a) {
 #ifdef __LITTLE_ENDIAN__
   return (vector bool long long)__builtin_altivec_vupklsw((vector int)__a);
 #else
@@ -9918,7 +10055,8 @@ static vector bool long long __ATTRS_o_ai vec_unpackh(vector bool int __a) {
 
 /* vec_vupkhsb */
 
-static vector short __ATTRS_o_ai vec_vupkhsb(vector signed char __a) {
+static __inline__ vector short __ATTRS_o_ai
+vec_vupkhsb(vector signed char __a) {
 #ifdef __LITTLE_ENDIAN__
   return __builtin_altivec_vupklsb((vector char)__a);
 #else
@@ -9926,7 +10064,8 @@ static vector short __ATTRS_o_ai vec_vupkhsb(vector signed char __a) {
 #endif
 }
 
-static vector bool short __ATTRS_o_ai vec_vupkhsb(vector bool char __a) {
+static __inline__ vector bool short __ATTRS_o_ai
+vec_vupkhsb(vector bool char __a) {
 #ifdef __LITTLE_ENDIAN__
   return (vector bool short)__builtin_altivec_vupklsb((vector char)__a);
 #else
@@ -9936,7 +10075,7 @@ static vector bool short __ATTRS_o_ai vec_vupkhsb(vector bool char __a) {
 
 /* vec_vupkhsh */
 
-static vector int __ATTRS_o_ai vec_vupkhsh(vector short __a) {
+static __inline__ vector int __ATTRS_o_ai vec_vupkhsh(vector short __a) {
 #ifdef __LITTLE_ENDIAN__
   return __builtin_altivec_vupklsh(__a);
 #else
@@ -9944,7 +10083,8 @@ static vector int __ATTRS_o_ai vec_vupkhsh(vector short __a) {
 #endif
 }
 
-static vector bool int __ATTRS_o_ai vec_vupkhsh(vector bool short __a) {
+static __inline__ vector bool int __ATTRS_o_ai
+vec_vupkhsh(vector bool short __a) {
 #ifdef __LITTLE_ENDIAN__
   return (vector bool int)__builtin_altivec_vupklsh((vector short)__a);
 #else
@@ -9952,7 +10092,8 @@ static vector bool int __ATTRS_o_ai vec_vupkhsh(vector bool short __a) {
 #endif
 }
 
-static vector unsigned int __ATTRS_o_ai vec_vupkhsh(vector pixel __a) {
+static __inline__ vector unsigned int __ATTRS_o_ai
+vec_vupkhsh(vector pixel __a) {
 #ifdef __LITTLE_ENDIAN__
   return (vector unsigned int)__builtin_altivec_vupklpx((vector short)__a);
 #else
@@ -9963,7 +10104,7 @@ static vector unsigned int __ATTRS_o_ai vec_vupkhsh(vector pixel __a) {
 /* vec_vupkhsw */
 
 #ifdef __POWER8_VECTOR__
-static vector long long __ATTRS_o_ai vec_vupkhsw(vector int __a) {
+static __inline__ vector long long __ATTRS_o_ai vec_vupkhsw(vector int __a) {
 #ifdef __LITTLE_ENDIAN__
   return __builtin_altivec_vupklsw(__a);
 #else
@@ -9971,7 +10112,8 @@ static vector long long __ATTRS_o_ai vec_vupkhsw(vector int __a) {
 #endif
 }
 
-static vector bool long long __ATTRS_o_ai vec_vupkhsw(vector bool int __a) {
+static __inline__ vector bool long long __ATTRS_o_ai
+vec_vupkhsw(vector bool int __a) {
 #ifdef __LITTLE_ENDIAN__
   return (vector bool long long)__builtin_altivec_vupklsw((vector int)__a);
 #else
@@ -9982,7 +10124,8 @@ static vector bool long long __ATTRS_o_ai vec_vupkhsw(vector bool int __a) {
 
 /* vec_unpackl */
 
-static vector short __ATTRS_o_ai vec_unpackl(vector signed char __a) {
+static __inline__ vector short __ATTRS_o_ai
+vec_unpackl(vector signed char __a) {
 #ifdef __LITTLE_ENDIAN__
   return __builtin_altivec_vupkhsb((vector char)__a);
 #else
@@ -9990,7 +10133,8 @@ static vector short __ATTRS_o_ai vec_unpackl(vector signed char __a) {
 #endif
 }
 
-static vector bool short __ATTRS_o_ai vec_unpackl(vector bool char __a) {
+static __inline__ vector bool short __ATTRS_o_ai
+vec_unpackl(vector bool char __a) {
 #ifdef __LITTLE_ENDIAN__
   return (vector bool short)__builtin_altivec_vupkhsb((vector char)__a);
 #else
@@ -9998,7 +10142,7 @@ static vector bool short __ATTRS_o_ai vec_unpackl(vector bool char __a) {
 #endif
 }
 
-static vector int __ATTRS_o_ai vec_unpackl(vector short __a) {
+static __inline__ vector int __ATTRS_o_ai vec_unpackl(vector short __a) {
 #ifdef __LITTLE_ENDIAN__
   return __builtin_altivec_vupkhsh(__a);
 #else
@@ -10006,7 +10150,8 @@ static vector int __ATTRS_o_ai vec_unpackl(vector short __a) {
 #endif
 }
 
-static vector bool int __ATTRS_o_ai vec_unpackl(vector bool short __a) {
+static __inline__ vector bool int __ATTRS_o_ai
+vec_unpackl(vector bool short __a) {
 #ifdef __LITTLE_ENDIAN__
   return (vector bool int)__builtin_altivec_vupkhsh((vector short)__a);
 #else
@@ -10014,7 +10159,8 @@ static vector bool int __ATTRS_o_ai vec_unpackl(vector bool short __a) {
 #endif
 }
 
-static vector unsigned int __ATTRS_o_ai vec_unpackl(vector pixel __a) {
+static __inline__ vector unsigned int __ATTRS_o_ai
+vec_unpackl(vector pixel __a) {
 #ifdef __LITTLE_ENDIAN__
   return (vector unsigned int)__builtin_altivec_vupkhpx((vector short)__a);
 #else
@@ -10023,7 +10169,7 @@ static vector unsigned int __ATTRS_o_ai vec_unpackl(vector pixel __a) {
 }
 
 #ifdef __POWER8_VECTOR__
-static vector long long __ATTRS_o_ai vec_unpackl(vector int __a) {
+static __inline__ vector long long __ATTRS_o_ai vec_unpackl(vector int __a) {
 #ifdef __LITTLE_ENDIAN__
   return __builtin_altivec_vupkhsw(__a);
 #else
@@ -10031,7 +10177,8 @@ static vector long long __ATTRS_o_ai vec_unpackl(vector int __a) {
 #endif
 }
 
-static vector bool long long __ATTRS_o_ai vec_unpackl(vector bool int __a) {
+static __inline__ vector bool long long __ATTRS_o_ai
+vec_unpackl(vector bool int __a) {
 #ifdef __LITTLE_ENDIAN__
   return (vector bool long long)__builtin_altivec_vupkhsw((vector int)__a);
 #else
@@ -10042,7 +10189,8 @@ static vector bool long long __ATTRS_o_ai vec_unpackl(vector bool int __a) {
 
 /* vec_vupklsb */
 
-static vector short __ATTRS_o_ai vec_vupklsb(vector signed char __a) {
+static __inline__ vector short __ATTRS_o_ai
+vec_vupklsb(vector signed char __a) {
 #ifdef __LITTLE_ENDIAN__
   return __builtin_altivec_vupkhsb((vector char)__a);
 #else
@@ -10050,7 +10198,8 @@ static vector short __ATTRS_o_ai vec_vupklsb(vector signed char __a) {
 #endif
 }
 
-static vector bool short __ATTRS_o_ai vec_vupklsb(vector bool char __a) {
+static __inline__ vector bool short __ATTRS_o_ai
+vec_vupklsb(vector bool char __a) {
 #ifdef __LITTLE_ENDIAN__
   return (vector bool short)__builtin_altivec_vupkhsb((vector char)__a);
 #else
@@ -10060,7 +10209,7 @@ static vector bool short __ATTRS_o_ai vec_vupklsb(vector bool char __a) {
 
 /* vec_vupklsh */
 
-static vector int __ATTRS_o_ai vec_vupklsh(vector short __a) {
+static __inline__ vector int __ATTRS_o_ai vec_vupklsh(vector short __a) {
 #ifdef __LITTLE_ENDIAN__
   return __builtin_altivec_vupkhsh(__a);
 #else
@@ -10068,7 +10217,8 @@ static vector int __ATTRS_o_ai vec_vupklsh(vector short __a) {
 #endif
 }
 
-static vector bool int __ATTRS_o_ai vec_vupklsh(vector bool short __a) {
+static __inline__ vector bool int __ATTRS_o_ai
+vec_vupklsh(vector bool short __a) {
 #ifdef __LITTLE_ENDIAN__
   return (vector bool int)__builtin_altivec_vupkhsh((vector short)__a);
 #else
@@ -10076,7 +10226,8 @@ static vector bool int __ATTRS_o_ai vec_vupklsh(vector bool short __a) {
 #endif
 }
 
-static vector unsigned int __ATTRS_o_ai vec_vupklsh(vector pixel __a) {
+static __inline__ vector unsigned int __ATTRS_o_ai
+vec_vupklsh(vector pixel __a) {
 #ifdef __LITTLE_ENDIAN__
   return (vector unsigned int)__builtin_altivec_vupkhpx((vector short)__a);
 #else
@@ -10087,7 +10238,7 @@ static vector unsigned int __ATTRS_o_ai vec_vupklsh(vector pixel __a) {
 /* vec_vupklsw */
 
 #ifdef __POWER8_VECTOR__
-static vector long long __ATTRS_o_ai vec_vupklsw(vector int __a) {
+static __inline__ vector long long __ATTRS_o_ai vec_vupklsw(vector int __a) {
 #ifdef __LITTLE_ENDIAN__
   return __builtin_altivec_vupkhsw(__a);
 #else
@@ -10095,7 +10246,8 @@ static vector long long __ATTRS_o_ai vec_vupklsw(vector int __a) {
 #endif
 }
 
-static vector bool long long __ATTRS_o_ai vec_vupklsw(vector bool int __a) {
+static __inline__ vector bool long long __ATTRS_o_ai
+vec_vupklsw(vector bool int __a) {
 #ifdef __LITTLE_ENDIAN__
   return (vector bool long long)__builtin_altivec_vupkhsw((vector int)__a);
 #else
@@ -10108,248 +10260,437 @@ static vector bool long long __ATTRS_o_ai vec_vupklsw(vector bool int __a) {
 
 #ifdef __VSX__
 
-static vector signed int __ATTRS_o_ai vec_vsx_ld(int __a,
-                                                 const vector signed int *__b) {
+static __inline__ vector bool int __ATTRS_o_ai
+vec_vsx_ld(int __a, const vector bool int *__b) {
+  return (vector bool int)__builtin_vsx_lxvw4x(__a, __b);
+}
+
+static __inline__ vector signed int __ATTRS_o_ai
+vec_vsx_ld(int __a, const vector signed int *__b) {
   return (vector signed int)__builtin_vsx_lxvw4x(__a, __b);
 }
 
-static vector unsigned int __ATTRS_o_ai
+static __inline__ vector signed int __ATTRS_o_ai
+vec_vsx_ld(int __a, const signed int *__b) {
+  return (vector signed int)__builtin_vsx_lxvw4x(__a, __b);
+}
+
+static __inline__ vector unsigned int __ATTRS_o_ai
 vec_vsx_ld(int __a, const vector unsigned int *__b) {
   return (vector unsigned int)__builtin_vsx_lxvw4x(__a, __b);
 }
 
-static vector float __ATTRS_o_ai vec_vsx_ld(int __a, const vector float *__b) {
+static __inline__ vector unsigned int __ATTRS_o_ai
+vec_vsx_ld(int __a, const unsigned int *__b) {
+  return (vector unsigned int)__builtin_vsx_lxvw4x(__a, __b);
+}
+
+static __inline__ vector float __ATTRS_o_ai
+vec_vsx_ld(int __a, const vector float *__b) {
+  return (vector float)__builtin_vsx_lxvw4x(__a, __b);
+}
+
+static __inline__ vector float __ATTRS_o_ai vec_vsx_ld(int __a,
+                                                       const float *__b) {
   return (vector float)__builtin_vsx_lxvw4x(__a, __b);
 }
 
-static vector signed long long __ATTRS_o_ai
+static __inline__ vector signed long long __ATTRS_o_ai
 vec_vsx_ld(int __a, const vector signed long long *__b) {
   return (vector signed long long)__builtin_vsx_lxvd2x(__a, __b);
 }
 
-static vector unsigned long long __ATTRS_o_ai
+static __inline__ vector unsigned long long __ATTRS_o_ai
 vec_vsx_ld(int __a, const vector unsigned long long *__b) {
   return (vector unsigned long long)__builtin_vsx_lxvd2x(__a, __b);
 }
 
-static vector double __ATTRS_o_ai vec_vsx_ld(int __a,
-                                             const vector double *__b) {
+static __inline__ vector double __ATTRS_o_ai
+vec_vsx_ld(int __a, const vector double *__b) {
   return (vector double)__builtin_vsx_lxvd2x(__a, __b);
 }
 
+static __inline__ vector double __ATTRS_o_ai
+vec_vsx_ld(int __a, const double *__b) {
+  return (vector double)__builtin_vsx_lxvd2x(__a, __b);
+}
+
+static __inline__ vector bool short __ATTRS_o_ai
+vec_vsx_ld(int __a, const vector bool short *__b) {
+  return (vector bool short)__builtin_vsx_lxvw4x(__a, __b);
+}
+
+static __inline__ vector signed short __ATTRS_o_ai
+vec_vsx_ld(int __a, const vector signed short *__b) {
+  return (vector signed short)__builtin_vsx_lxvw4x(__a, __b);
+}
+
+static __inline__ vector signed short __ATTRS_o_ai
+vec_vsx_ld(int __a, const signed short *__b) {
+  return (vector signed short)__builtin_vsx_lxvw4x(__a, __b);
+}
+
+static __inline__ vector unsigned short __ATTRS_o_ai
+vec_vsx_ld(int __a, const vector unsigned short *__b) {
+  return (vector unsigned short)__builtin_vsx_lxvw4x(__a, __b);
+}
+
+static __inline__ vector unsigned short __ATTRS_o_ai
+vec_vsx_ld(int __a, const unsigned short *__b) {
+  return (vector unsigned short)__builtin_vsx_lxvw4x(__a, __b);
+}
+
+static __inline__ vector bool char __ATTRS_o_ai
+vec_vsx_ld(int __a, const vector bool char *__b) {
+  return (vector bool char)__builtin_vsx_lxvw4x(__a, __b);
+}
+
+static __inline__ vector signed char __ATTRS_o_ai
+vec_vsx_ld(int __a, const vector signed char *__b) {
+  return (vector signed char)__builtin_vsx_lxvw4x(__a, __b);
+}
+
+static __inline__ vector signed char __ATTRS_o_ai
+vec_vsx_ld(int __a, const signed char *__b) {
+  return (vector signed char)__builtin_vsx_lxvw4x(__a, __b);
+}
+
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_vsx_ld(int __a, const vector unsigned char *__b) {
+  return (vector unsigned char)__builtin_vsx_lxvw4x(__a, __b);
+}
+
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_vsx_ld(int __a, const unsigned char *__b) {
+  return (vector unsigned char)__builtin_vsx_lxvw4x(__a, __b);
+}
+
 #endif
 
 /* vec_vsx_st */
 
 #ifdef __VSX__
 
-static void __ATTRS_o_ai vec_vsx_st(vector signed int __a, int __b,
-                                    vector signed int *__c) {
+static __inline__ void __ATTRS_o_ai vec_vsx_st(vector bool int __a, int __b,
+                                               vector bool int *__c) {
+  __builtin_vsx_stxvw4x((vector int)__a, __b, __c);
+}
+
+static __inline__ void __ATTRS_o_ai vec_vsx_st(vector bool int __a, int __b,
+                                               signed int *__c) {
+  __builtin_vsx_stxvw4x((vector int)__a, __b, __c);
+}
+
+static __inline__ void __ATTRS_o_ai vec_vsx_st(vector bool int __a, int __b,
+                                               unsigned int *__c) {
+  __builtin_vsx_stxvw4x((vector int)__a, __b, __c);
+}
+
+static __inline__ void __ATTRS_o_ai vec_vsx_st(vector signed int __a, int __b,
+                                               vector signed int *__c) {
+  __builtin_vsx_stxvw4x((vector int)__a, __b, __c);
+}
+
+static __inline__ void __ATTRS_o_ai vec_vsx_st(vector signed int __a, int __b,
+                                               signed int *__c) {
   __builtin_vsx_stxvw4x((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai vec_vsx_st(vector unsigned int __a, int __b,
-                                    vector unsigned int *__c) {
+static __inline__ void __ATTRS_o_ai vec_vsx_st(vector unsigned int __a, int __b,
+                                               vector unsigned int *__c) {
   __builtin_vsx_stxvw4x((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai vec_vsx_st(vector float __a, int __b,
-                                    vector float *__c) {
+static __inline__ void __ATTRS_o_ai vec_vsx_st(vector unsigned int __a, int __b,
+                                               unsigned int *__c) {
   __builtin_vsx_stxvw4x((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai vec_vsx_st(vector signed long long __a, int __b,
-                                    vector signed long long *__c) {
+static __inline__ void __ATTRS_o_ai vec_vsx_st(vector float __a, int __b,
+                                               vector float *__c) {
+  __builtin_vsx_stxvw4x((vector int)__a, __b, __c);
+}
+
+static __inline__ void __ATTRS_o_ai vec_vsx_st(vector float __a, int __b,
+                                               float *__c) {
+  __builtin_vsx_stxvw4x((vector int)__a, __b, __c);
+}
+
+static __inline__ void __ATTRS_o_ai vec_vsx_st(vector signed long long __a,
+                                               int __b,
+                                               vector signed long long *__c) {
+  __builtin_vsx_stxvd2x((vector double)__a, __b, __c);
+}
+
+static __inline__ void __ATTRS_o_ai vec_vsx_st(vector unsigned long long __a,
+                                               int __b,
+                                               vector unsigned long long *__c) {
   __builtin_vsx_stxvd2x((vector double)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai vec_vsx_st(vector unsigned long long __a, int __b,
-                                    vector unsigned long long *__c) {
+static __inline__ void __ATTRS_o_ai vec_vsx_st(vector double __a, int __b,
+                                               vector double *__c) {
   __builtin_vsx_stxvd2x((vector double)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai vec_vsx_st(vector double __a, int __b,
-                                    vector double *__c) {
+static __inline__ void __ATTRS_o_ai vec_vsx_st(vector double __a, int __b,
+                                               double *__c) {
   __builtin_vsx_stxvd2x((vector double)__a, __b, __c);
 }
 
+static __inline__ void __ATTRS_o_ai vec_vsx_st(vector bool short __a, int __b,
+                                               vector bool short *__c) {
+  __builtin_vsx_stxvw4x((vector int)__a, __b, __c);
+}
+
+static __inline__ void __ATTRS_o_ai vec_vsx_st(vector bool short __a, int __b,
+                                               signed short *__c) {
+  __builtin_vsx_stxvw4x((vector int)__a, __b, __c);
+}
+
+static __inline__ void __ATTRS_o_ai vec_vsx_st(vector bool short __a, int __b,
+                                               unsigned short *__c) {
+  __builtin_vsx_stxvw4x((vector int)__a, __b, __c);
+}
+static __inline__ void __ATTRS_o_ai vec_vsx_st(vector signed short __a, int __b,
+                                               vector signed short *__c) {
+  __builtin_vsx_stxvw4x((vector int)__a, __b, __c);
+}
+
+static __inline__ void __ATTRS_o_ai vec_vsx_st(vector signed short __a, int __b,
+                                               signed short *__c) {
+  __builtin_vsx_stxvw4x((vector int)__a, __b, __c);
+}
+
+static __inline__ void __ATTRS_o_ai vec_vsx_st(vector unsigned short __a,
+                                               int __b,
+                                               vector unsigned short *__c) {
+  __builtin_vsx_stxvw4x((vector int)__a, __b, __c);
+}
+
+static __inline__ void __ATTRS_o_ai vec_vsx_st(vector unsigned short __a,
+                                               int __b, unsigned short *__c) {
+  __builtin_vsx_stxvw4x((vector int)__a, __b, __c);
+}
+
+static __inline__ void __ATTRS_o_ai vec_vsx_st(vector bool char __a, int __b,
+                                               vector bool char *__c) {
+  __builtin_vsx_stxvw4x((vector int)__a, __b, __c);
+}
+
+static __inline__ void __ATTRS_o_ai vec_vsx_st(vector bool char __a, int __b,
+                                               signed char *__c) {
+  __builtin_vsx_stxvw4x((vector int)__a, __b, __c);
+}
+
+static __inline__ void __ATTRS_o_ai vec_vsx_st(vector bool char __a, int __b,
+                                               unsigned char *__c) {
+  __builtin_vsx_stxvw4x((vector int)__a, __b, __c);
+}
+
+static __inline__ void __ATTRS_o_ai vec_vsx_st(vector signed char __a, int __b,
+                                               vector signed char *__c) {
+  __builtin_vsx_stxvw4x((vector int)__a, __b, __c);
+}
+
+static __inline__ void __ATTRS_o_ai vec_vsx_st(vector signed char __a, int __b,
+                                               signed char *__c) {
+  __builtin_vsx_stxvw4x((vector int)__a, __b, __c);
+}
+
+static __inline__ void __ATTRS_o_ai vec_vsx_st(vector unsigned char __a,
+                                               int __b,
+                                               vector unsigned char *__c) {
+  __builtin_vsx_stxvw4x((vector int)__a, __b, __c);
+}
+
+static __inline__ void __ATTRS_o_ai vec_vsx_st(vector unsigned char __a,
+                                               int __b, unsigned char *__c) {
+  __builtin_vsx_stxvw4x((vector int)__a, __b, __c);
+}
+
 #endif
 
 /* vec_xor */
 
 #define __builtin_altivec_vxor vec_xor
 
-static vector signed char __ATTRS_o_ai vec_xor(vector signed char __a,
-                                               vector signed char __b) {
+static __inline__ vector signed char __ATTRS_o_ai
+vec_xor(vector signed char __a, vector signed char __b) {
   return __a ^ __b;
 }
 
-static vector signed char __ATTRS_o_ai vec_xor(vector bool char __a,
-                                               vector signed char __b) {
+static __inline__ vector signed char __ATTRS_o_ai
+vec_xor(vector bool char __a, vector signed char __b) {
   return (vector signed char)__a ^ __b;
 }
 
-static vector signed char __ATTRS_o_ai vec_xor(vector signed char __a,
-                                               vector bool char __b) {
+static __inline__ vector signed char __ATTRS_o_ai
+vec_xor(vector signed char __a, vector bool char __b) {
   return __a ^ (vector signed char)__b;
 }
 
-static vector unsigned char __ATTRS_o_ai vec_xor(vector unsigned char __a,
-                                                 vector unsigned char __b) {
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_xor(vector unsigned char __a, vector unsigned char __b) {
   return __a ^ __b;
 }
 
-static vector unsigned char __ATTRS_o_ai vec_xor(vector bool char __a,
-                                                 vector unsigned char __b) {
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_xor(vector bool char __a, vector unsigned char __b) {
   return (vector unsigned char)__a ^ __b;
 }
 
-static vector unsigned char __ATTRS_o_ai vec_xor(vector unsigned char __a,
-                                                 vector bool char __b) {
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_xor(vector unsigned char __a, vector bool char __b) {
   return __a ^ (vector unsigned char)__b;
 }
 
-static vector bool char __ATTRS_o_ai vec_xor(vector bool char __a,
-                                             vector bool char __b) {
+static __inline__ vector bool char __ATTRS_o_ai vec_xor(vector bool char __a,
+                                                        vector bool char __b) {
   return __a ^ __b;
 }
 
-static vector short __ATTRS_o_ai vec_xor(vector short __a, vector short __b) {
+static __inline__ vector short __ATTRS_o_ai vec_xor(vector short __a,
+                                                    vector short __b) {
   return __a ^ __b;
 }
 
-static vector short __ATTRS_o_ai vec_xor(vector bool short __a,
-                                         vector short __b) {
+static __inline__ vector short __ATTRS_o_ai vec_xor(vector bool short __a,
+                                                    vector short __b) {
   return (vector short)__a ^ __b;
 }
 
-static vector short __ATTRS_o_ai vec_xor(vector short __a,
-                                         vector bool short __b) {
+static __inline__ vector short __ATTRS_o_ai vec_xor(vector short __a,
+                                                    vector bool short __b) {
   return __a ^ (vector short)__b;
 }
 
-static vector unsigned short __ATTRS_o_ai vec_xor(vector unsigned short __a,
-                                                  vector unsigned short __b) {
+static __inline__ vector unsigned short __ATTRS_o_ai
+vec_xor(vector unsigned short __a, vector unsigned short __b) {
   return __a ^ __b;
 }
 
-static vector unsigned short __ATTRS_o_ai vec_xor(vector bool short __a,
-                                                  vector unsigned short __b) {
+static __inline__ vector unsigned short __ATTRS_o_ai
+vec_xor(vector bool short __a, vector unsigned short __b) {
   return (vector unsigned short)__a ^ __b;
 }
 
-static vector unsigned short __ATTRS_o_ai vec_xor(vector unsigned short __a,
-                                                  vector bool short __b) {
+static __inline__ vector unsigned short __ATTRS_o_ai
+vec_xor(vector unsigned short __a, vector bool short __b) {
   return __a ^ (vector unsigned short)__b;
 }
 
-static vector bool short __ATTRS_o_ai vec_xor(vector bool short __a,
-                                              vector bool short __b) {
+static __inline__ vector bool short __ATTRS_o_ai
+vec_xor(vector bool short __a, vector bool short __b) {
   return __a ^ __b;
 }
 
-static vector int __ATTRS_o_ai vec_xor(vector int __a, vector int __b) {
+static __inline__ vector int __ATTRS_o_ai vec_xor(vector int __a,
+                                                  vector int __b) {
   return __a ^ __b;
 }
 
-static vector int __ATTRS_o_ai vec_xor(vector bool int __a, vector int __b) {
+static __inline__ vector int __ATTRS_o_ai vec_xor(vector bool int __a,
+                                                  vector int __b) {
   return (vector int)__a ^ __b;
 }
 
-static vector int __ATTRS_o_ai vec_xor(vector int __a, vector bool int __b) {
+static __inline__ vector int __ATTRS_o_ai vec_xor(vector int __a,
+                                                  vector bool int __b) {
   return __a ^ (vector int)__b;
 }
 
-static vector unsigned int __ATTRS_o_ai vec_xor(vector unsigned int __a,
-                                                vector unsigned int __b) {
+static __inline__ vector unsigned int __ATTRS_o_ai
+vec_xor(vector unsigned int __a, vector unsigned int __b) {
   return __a ^ __b;
 }
 
-static vector unsigned int __ATTRS_o_ai vec_xor(vector bool int __a,
-                                                vector unsigned int __b) {
+static __inline__ vector unsigned int __ATTRS_o_ai
+vec_xor(vector bool int __a, vector unsigned int __b) {
   return (vector unsigned int)__a ^ __b;
 }
 
-static vector unsigned int __ATTRS_o_ai vec_xor(vector unsigned int __a,
-                                                vector bool int __b) {
+static __inline__ vector unsigned int __ATTRS_o_ai
+vec_xor(vector unsigned int __a, vector bool int __b) {
   return __a ^ (vector unsigned int)__b;
 }
 
-static vector bool int __ATTRS_o_ai vec_xor(vector bool int __a,
-                                            vector bool int __b) {
+static __inline__ vector bool int __ATTRS_o_ai vec_xor(vector bool int __a,
+                                                       vector bool int __b) {
   return __a ^ __b;
 }
 
-static vector float __ATTRS_o_ai vec_xor(vector float __a, vector float __b) {
+static __inline__ vector float __ATTRS_o_ai vec_xor(vector float __a,
+                                                    vector float __b) {
   vector unsigned int __res =
       (vector unsigned int)__a ^ (vector unsigned int)__b;
   return (vector float)__res;
 }
 
-static vector float __ATTRS_o_ai vec_xor(vector bool int __a,
-                                         vector float __b) {
+static __inline__ vector float __ATTRS_o_ai vec_xor(vector bool int __a,
+                                                    vector float __b) {
   vector unsigned int __res =
       (vector unsigned int)__a ^ (vector unsigned int)__b;
   return (vector float)__res;
 }
 
-static vector float __ATTRS_o_ai vec_xor(vector float __a,
-                                         vector bool int __b) {
+static __inline__ vector float __ATTRS_o_ai vec_xor(vector float __a,
+                                                    vector bool int __b) {
   vector unsigned int __res =
       (vector unsigned int)__a ^ (vector unsigned int)__b;
   return (vector float)__res;
 }
 
 #ifdef __VSX__
-static vector signed long long __ATTRS_o_ai
+static __inline__ vector signed long long __ATTRS_o_ai
 vec_xor(vector signed long long __a, vector signed long long __b) {
   return __a ^ __b;
 }
 
-static vector signed long long __ATTRS_o_ai
+static __inline__ vector signed long long __ATTRS_o_ai
 vec_xor(vector bool long long __a, vector signed long long __b) {
   return (vector signed long long)__a ^ __b;
 }
 
-static vector signed long long __ATTRS_o_ai vec_xor(vector signed long long __a,
-                                                    vector bool long long __b) {
+static __inline__ vector signed long long __ATTRS_o_ai
+vec_xor(vector signed long long __a, vector bool long long __b) {
   return __a ^ (vector signed long long)__b;
 }
 
-static vector unsigned long long __ATTRS_o_ai
+static __inline__ vector unsigned long long __ATTRS_o_ai
 vec_xor(vector unsigned long long __a, vector unsigned long long __b) {
   return __a ^ __b;
 }
 
-static vector unsigned long long __ATTRS_o_ai
+static __inline__ vector unsigned long long __ATTRS_o_ai
 vec_xor(vector bool long long __a, vector unsigned long long __b) {
   return (vector unsigned long long)__a ^ __b;
 }
 
-static vector unsigned long long __ATTRS_o_ai
+static __inline__ vector unsigned long long __ATTRS_o_ai
 vec_xor(vector unsigned long long __a, vector bool long long __b) {
   return __a ^ (vector unsigned long long)__b;
 }
 
-static vector bool long long __ATTRS_o_ai vec_xor(vector bool long long __a,
-                                                  vector bool long long __b) {
+static __inline__ vector bool long long __ATTRS_o_ai
+vec_xor(vector bool long long __a, vector bool long long __b) {
   return __a ^ __b;
 }
 
-static vector double __ATTRS_o_ai
-vec_xor(vector double __a, vector double __b) {
+static __inline__ vector double __ATTRS_o_ai vec_xor(vector double __a,
+                                                     vector double __b) {
   return (vector double)((vector unsigned long long)__a ^
-                          (vector unsigned long long)__b);
+                         (vector unsigned long long)__b);
 }
 
-static vector double __ATTRS_o_ai
+static __inline__ vector double __ATTRS_o_ai
 vec_xor(vector double __a, vector bool long long __b) {
   return (vector double)((vector unsigned long long)__a ^
-                         (vector unsigned long long) __b);
+                         (vector unsigned long long)__b);
 }
 
-static vector double __ATTRS_o_ai
-vec_xor(vector bool long long __a, vector double __b) {
+static __inline__ vector double __ATTRS_o_ai vec_xor(vector bool long long __a,
+                                                     vector double __b) {
   return (vector double)((vector unsigned long long)__a ^
                          (vector unsigned long long)__b);
 }
@@ -10357,160 +10698,165 @@ vec_xor(vector bool long long __a, vector double __b) {
 
 /* vec_vxor */
 
-static vector signed char __ATTRS_o_ai vec_vxor(vector signed char __a,
-                                                vector signed char __b) {
+static __inline__ vector signed char __ATTRS_o_ai
+vec_vxor(vector signed char __a, vector signed char __b) {
   return __a ^ __b;
 }
 
-static vector signed char __ATTRS_o_ai vec_vxor(vector bool char __a,
-                                                vector signed char __b) {
+static __inline__ vector signed char __ATTRS_o_ai
+vec_vxor(vector bool char __a, vector signed char __b) {
   return (vector signed char)__a ^ __b;
 }
 
-static vector signed char __ATTRS_o_ai vec_vxor(vector signed char __a,
-                                                vector bool char __b) {
+static __inline__ vector signed char __ATTRS_o_ai
+vec_vxor(vector signed char __a, vector bool char __b) {
   return __a ^ (vector signed char)__b;
 }
 
-static vector unsigned char __ATTRS_o_ai vec_vxor(vector unsigned char __a,
-                                                  vector unsigned char __b) {
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_vxor(vector unsigned char __a, vector unsigned char __b) {
   return __a ^ __b;
 }
 
-static vector unsigned char __ATTRS_o_ai vec_vxor(vector bool char __a,
-                                                  vector unsigned char __b) {
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_vxor(vector bool char __a, vector unsigned char __b) {
   return (vector unsigned char)__a ^ __b;
 }
 
-static vector unsigned char __ATTRS_o_ai vec_vxor(vector unsigned char __a,
-                                                  vector bool char __b) {
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_vxor(vector unsigned char __a, vector bool char __b) {
   return __a ^ (vector unsigned char)__b;
 }
 
-static vector bool char __ATTRS_o_ai vec_vxor(vector bool char __a,
-                                              vector bool char __b) {
+static __inline__ vector bool char __ATTRS_o_ai vec_vxor(vector bool char __a,
+                                                         vector bool char __b) {
   return __a ^ __b;
 }
 
-static vector short __ATTRS_o_ai vec_vxor(vector short __a, vector short __b) {
+static __inline__ vector short __ATTRS_o_ai vec_vxor(vector short __a,
+                                                     vector short __b) {
   return __a ^ __b;
 }
 
-static vector short __ATTRS_o_ai vec_vxor(vector bool short __a,
-                                          vector short __b) {
+static __inline__ vector short __ATTRS_o_ai vec_vxor(vector bool short __a,
+                                                     vector short __b) {
   return (vector short)__a ^ __b;
 }
 
-static vector short __ATTRS_o_ai vec_vxor(vector short __a,
-                                          vector bool short __b) {
+static __inline__ vector short __ATTRS_o_ai vec_vxor(vector short __a,
+                                                     vector bool short __b) {
   return __a ^ (vector short)__b;
 }
 
-static vector unsigned short __ATTRS_o_ai vec_vxor(vector unsigned short __a,
-                                                   vector unsigned short __b) {
+static __inline__ vector unsigned short __ATTRS_o_ai
+vec_vxor(vector unsigned short __a, vector unsigned short __b) {
   return __a ^ __b;
 }
 
-static vector unsigned short __ATTRS_o_ai vec_vxor(vector bool short __a,
-                                                   vector unsigned short __b) {
+static __inline__ vector unsigned short __ATTRS_o_ai
+vec_vxor(vector bool short __a, vector unsigned short __b) {
   return (vector unsigned short)__a ^ __b;
 }
 
-static vector unsigned short __ATTRS_o_ai vec_vxor(vector unsigned short __a,
-                                                   vector bool short __b) {
+static __inline__ vector unsigned short __ATTRS_o_ai
+vec_vxor(vector unsigned short __a, vector bool short __b) {
   return __a ^ (vector unsigned short)__b;
 }
 
-static vector bool short __ATTRS_o_ai vec_vxor(vector bool short __a,
-                                               vector bool short __b) {
+static __inline__ vector bool short __ATTRS_o_ai
+vec_vxor(vector bool short __a, vector bool short __b) {
   return __a ^ __b;
 }
 
-static vector int __ATTRS_o_ai vec_vxor(vector int __a, vector int __b) {
+static __inline__ vector int __ATTRS_o_ai vec_vxor(vector int __a,
+                                                   vector int __b) {
   return __a ^ __b;
 }
 
-static vector int __ATTRS_o_ai vec_vxor(vector bool int __a, vector int __b) {
+static __inline__ vector int __ATTRS_o_ai vec_vxor(vector bool int __a,
+                                                   vector int __b) {
   return (vector int)__a ^ __b;
 }
 
-static vector int __ATTRS_o_ai vec_vxor(vector int __a, vector bool int __b) {
+static __inline__ vector int __ATTRS_o_ai vec_vxor(vector int __a,
+                                                   vector bool int __b) {
   return __a ^ (vector int)__b;
 }
 
-static vector unsigned int __ATTRS_o_ai vec_vxor(vector unsigned int __a,
-                                                 vector unsigned int __b) {
+static __inline__ vector unsigned int __ATTRS_o_ai
+vec_vxor(vector unsigned int __a, vector unsigned int __b) {
   return __a ^ __b;
 }
 
-static vector unsigned int __ATTRS_o_ai vec_vxor(vector bool int __a,
-                                                 vector unsigned int __b) {
+static __inline__ vector unsigned int __ATTRS_o_ai
+vec_vxor(vector bool int __a, vector unsigned int __b) {
   return (vector unsigned int)__a ^ __b;
 }
 
-static vector unsigned int __ATTRS_o_ai vec_vxor(vector unsigned int __a,
-                                                 vector bool int __b) {
+static __inline__ vector unsigned int __ATTRS_o_ai
+vec_vxor(vector unsigned int __a, vector bool int __b) {
   return __a ^ (vector unsigned int)__b;
 }
 
-static vector bool int __ATTRS_o_ai vec_vxor(vector bool int __a,
-                                             vector bool int __b) {
+static __inline__ vector bool int __ATTRS_o_ai vec_vxor(vector bool int __a,
+                                                        vector bool int __b) {
   return __a ^ __b;
 }
 
-static vector float __ATTRS_o_ai vec_vxor(vector float __a, vector float __b) {
+static __inline__ vector float __ATTRS_o_ai vec_vxor(vector float __a,
+                                                     vector float __b) {
   vector unsigned int __res =
       (vector unsigned int)__a ^ (vector unsigned int)__b;
   return (vector float)__res;
 }
 
-static vector float __ATTRS_o_ai vec_vxor(vector bool int __a,
-                                          vector float __b) {
+static __inline__ vector float __ATTRS_o_ai vec_vxor(vector bool int __a,
+                                                     vector float __b) {
   vector unsigned int __res =
       (vector unsigned int)__a ^ (vector unsigned int)__b;
   return (vector float)__res;
 }
 
-static vector float __ATTRS_o_ai vec_vxor(vector float __a,
-                                          vector bool int __b) {
+static __inline__ vector float __ATTRS_o_ai vec_vxor(vector float __a,
+                                                     vector bool int __b) {
   vector unsigned int __res =
       (vector unsigned int)__a ^ (vector unsigned int)__b;
   return (vector float)__res;
 }
 
 #ifdef __VSX__
-static vector signed long long __ATTRS_o_ai
+static __inline__ vector signed long long __ATTRS_o_ai
 vec_vxor(vector signed long long __a, vector signed long long __b) {
   return __a ^ __b;
 }
 
-static vector signed long long __ATTRS_o_ai
+static __inline__ vector signed long long __ATTRS_o_ai
 vec_vxor(vector bool long long __a, vector signed long long __b) {
   return (vector signed long long)__a ^ __b;
 }
 
-static vector signed long long __ATTRS_o_ai
+static __inline__ vector signed long long __ATTRS_o_ai
 vec_vxor(vector signed long long __a, vector bool long long __b) {
   return __a ^ (vector signed long long)__b;
 }
 
-static vector unsigned long long __ATTRS_o_ai
+static __inline__ vector unsigned long long __ATTRS_o_ai
 vec_vxor(vector unsigned long long __a, vector unsigned long long __b) {
   return __a ^ __b;
 }
 
-static vector unsigned long long __ATTRS_o_ai
+static __inline__ vector unsigned long long __ATTRS_o_ai
 vec_vxor(vector bool long long __a, vector unsigned long long __b) {
   return (vector unsigned long long)__a ^ __b;
 }
 
-static vector unsigned long long __ATTRS_o_ai
+static __inline__ vector unsigned long long __ATTRS_o_ai
 vec_vxor(vector unsigned long long __a, vector bool long long __b) {
   return __a ^ (vector unsigned long long)__b;
 }
 
-static vector bool long long __ATTRS_o_ai vec_vxor(vector bool long long __a,
-                                                   vector bool long long __b) {
+static __inline__ vector bool long long __ATTRS_o_ai
+vec_vxor(vector bool long long __a, vector bool long long __b) {
   return __a ^ __b;
 }
 #endif
@@ -10519,674 +10865,702 @@ static vector bool long long __ATTRS_o_ai vec_vxor(vector bool long long __a,
 
 /* vec_extract */
 
-static signed char __ATTRS_o_ai vec_extract(vector signed char __a, int __b) {
+static __inline__ signed char __ATTRS_o_ai vec_extract(vector signed char __a,
+                                                       int __b) {
   return __a[__b];
 }
 
-static unsigned char __ATTRS_o_ai vec_extract(vector unsigned char __a,
-                                              int __b) {
+static __inline__ unsigned char __ATTRS_o_ai
+vec_extract(vector unsigned char __a, int __b) {
   return __a[__b];
 }
 
-static unsigned char __ATTRS_o_ai vec_extract(vector bool char __a,
-                                              int __b) {
+static __inline__ unsigned char __ATTRS_o_ai vec_extract(vector bool char __a,
+                                                         int __b) {
   return __a[__b];
 }
 
-static signed short __ATTRS_o_ai vec_extract(vector signed short __a, int __b) {
+static __inline__ signed short __ATTRS_o_ai vec_extract(vector signed short __a,
+                                                        int __b) {
   return __a[__b];
 }
 
-static unsigned short __ATTRS_o_ai vec_extract(vector unsigned short __a,
-                                               int __b) {
+static __inline__ unsigned short __ATTRS_o_ai
+vec_extract(vector unsigned short __a, int __b) {
   return __a[__b];
 }
 
-static unsigned short __ATTRS_o_ai vec_extract(vector bool short __a,
-                                               int __b) {
+static __inline__ unsigned short __ATTRS_o_ai vec_extract(vector bool short __a,
+                                                          int __b) {
   return __a[__b];
 }
 
-static signed int __ATTRS_o_ai vec_extract(vector signed int __a, int __b) {
+static __inline__ signed int __ATTRS_o_ai vec_extract(vector signed int __a,
+                                                      int __b) {
   return __a[__b];
 }
 
-static unsigned int __ATTRS_o_ai vec_extract(vector unsigned int __a, int __b) {
+static __inline__ unsigned int __ATTRS_o_ai vec_extract(vector unsigned int __a,
+                                                        int __b) {
   return __a[__b];
 }
 
-static unsigned int __ATTRS_o_ai vec_extract(vector bool int __a, int __b) {
+static __inline__ unsigned int __ATTRS_o_ai vec_extract(vector bool int __a,
+                                                        int __b) {
   return __a[__b];
 }
 
 #ifdef __VSX__
-static signed long long __ATTRS_o_ai vec_extract(vector signed long long __a,
-                                                 int __b) {
+static __inline__ signed long long __ATTRS_o_ai
+vec_extract(vector signed long long __a, int __b) {
   return __a[__b];
 }
 
-static unsigned long long __ATTRS_o_ai
+static __inline__ unsigned long long __ATTRS_o_ai
 vec_extract(vector unsigned long long __a, int __b) {
   return __a[__b];
 }
 
-static unsigned long long __ATTRS_o_ai vec_extract(vector bool long long __a,
-                                                   int __b) {
+static __inline__ unsigned long long __ATTRS_o_ai
+vec_extract(vector bool long long __a, int __b) {
   return __a[__b];
 }
 
-static double __ATTRS_o_ai vec_extract(vector double __a, int __b) {
+static __inline__ double __ATTRS_o_ai vec_extract(vector double __a, int __b) {
   return __a[__b];
 }
 #endif
 
-static float __ATTRS_o_ai vec_extract(vector float __a, int __b) {
+static __inline__ float __ATTRS_o_ai vec_extract(vector float __a, int __b) {
   return __a[__b];
 }
 
 /* vec_insert */
 
-static vector signed char __ATTRS_o_ai vec_insert(signed char __a,
-                                                  vector signed char __b,
-                                                  int __c) {
+static __inline__ vector signed char __ATTRS_o_ai
+vec_insert(signed char __a, vector signed char __b, int __c) {
   __b[__c] = __a;
   return __b;
 }
 
-static vector unsigned char __ATTRS_o_ai vec_insert(unsigned char __a,
-                                                    vector unsigned char __b,
-                                                    int __c) {
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_insert(unsigned char __a, vector unsigned char __b, int __c) {
   __b[__c] = __a;
   return __b;
 }
 
-static vector bool char __ATTRS_o_ai vec_insert(unsigned char __a,
-                                                vector bool char __b,
-                                                int __c) {
+static __inline__ vector bool char __ATTRS_o_ai vec_insert(unsigned char __a,
+                                                           vector bool char __b,
+                                                           int __c) {
   __b[__c] = __a;
   return __b;
 }
 
-static vector signed short __ATTRS_o_ai vec_insert(signed short __a,
-                                                   vector signed short __b,
-                                                   int __c) {
+static __inline__ vector signed short __ATTRS_o_ai
+vec_insert(signed short __a, vector signed short __b, int __c) {
   __b[__c] = __a;
   return __b;
 }
 
-static vector unsigned short __ATTRS_o_ai vec_insert(unsigned short __a,
-                                                     vector unsigned short __b,
-                                                     int __c) {
+static __inline__ vector unsigned short __ATTRS_o_ai
+vec_insert(unsigned short __a, vector unsigned short __b, int __c) {
   __b[__c] = __a;
   return __b;
 }
 
-static vector bool short __ATTRS_o_ai vec_insert(unsigned short __a,
-                                                 vector bool short __b,
-                                                 int __c) {
+static __inline__ vector bool short __ATTRS_o_ai
+vec_insert(unsigned short __a, vector bool short __b, int __c) {
   __b[__c] = __a;
   return __b;
 }
 
-static vector signed int __ATTRS_o_ai vec_insert(signed int __a,
-                                                 vector signed int __b,
-                                                 int __c) {
+static __inline__ vector signed int __ATTRS_o_ai
+vec_insert(signed int __a, vector signed int __b, int __c) {
   __b[__c] = __a;
   return __b;
 }
 
-static vector unsigned int __ATTRS_o_ai vec_insert(unsigned int __a,
-                                                   vector unsigned int __b,
-                                                   int __c) {
+static __inline__ vector unsigned int __ATTRS_o_ai
+vec_insert(unsigned int __a, vector unsigned int __b, int __c) {
   __b[__c] = __a;
   return __b;
 }
 
-static vector bool int __ATTRS_o_ai vec_insert(unsigned int __a,
-                                               vector bool int __b,
-                                               int __c) {
+static __inline__ vector bool int __ATTRS_o_ai vec_insert(unsigned int __a,
+                                                          vector bool int __b,
+                                                          int __c) {
   __b[__c] = __a;
   return __b;
 }
 
 #ifdef __VSX__
-static vector signed long long __ATTRS_o_ai
+static __inline__ vector signed long long __ATTRS_o_ai
 vec_insert(signed long long __a, vector signed long long __b, int __c) {
   __b[__c] = __a;
   return __b;
 }
 
-static vector unsigned long long __ATTRS_o_ai
+static __inline__ vector unsigned long long __ATTRS_o_ai
 vec_insert(unsigned long long __a, vector unsigned long long __b, int __c) {
   __b[__c] = __a;
   return __b;
 }
 
-static vector bool long long __ATTRS_o_ai
+static __inline__ vector bool long long __ATTRS_o_ai
 vec_insert(unsigned long long __a, vector bool long long __b, int __c) {
   __b[__c] = __a;
   return __b;
 }
-static vector double __ATTRS_o_ai vec_insert(double __a, vector double __b,
-                                             int __c) {
+static __inline__ vector double __ATTRS_o_ai vec_insert(double __a,
+                                                        vector double __b,
+                                                        int __c) {
   __b[__c] = __a;
   return __b;
 }
 #endif
 
-static vector float __ATTRS_o_ai vec_insert(float __a, vector float __b,
-                                            int __c) {
+static __inline__ vector float __ATTRS_o_ai vec_insert(float __a,
+                                                       vector float __b,
+                                                       int __c) {
   __b[__c] = __a;
   return __b;
 }
 
 /* vec_lvlx */
 
-static vector signed char __ATTRS_o_ai vec_lvlx(int __a,
-                                                const signed char *__b) {
+static __inline__ vector signed char __ATTRS_o_ai
+vec_lvlx(int __a, const signed char *__b) {
   return vec_perm(vec_ld(__a, __b), (vector signed char)(0),
                   vec_lvsl(__a, __b));
 }
 
-static vector signed char __ATTRS_o_ai vec_lvlx(int __a,
-                                                const vector signed char *__b) {
+static __inline__ vector signed char __ATTRS_o_ai
+vec_lvlx(int __a, const vector signed char *__b) {
   return vec_perm(vec_ld(__a, __b), (vector signed char)(0),
                   vec_lvsl(__a, (unsigned char *)__b));
 }
 
-static vector unsigned char __ATTRS_o_ai vec_lvlx(int __a,
-                                                  const unsigned char *__b) {
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_lvlx(int __a, const unsigned char *__b) {
   return vec_perm(vec_ld(__a, __b), (vector unsigned char)(0),
                   vec_lvsl(__a, __b));
 }
 
-static vector unsigned char __ATTRS_o_ai
+static __inline__ vector unsigned char __ATTRS_o_ai
 vec_lvlx(int __a, const vector unsigned char *__b) {
   return vec_perm(vec_ld(__a, __b), (vector unsigned char)(0),
                   vec_lvsl(__a, (unsigned char *)__b));
 }
 
-static vector bool char __ATTRS_o_ai vec_lvlx(int __a,
-                                              const vector bool char *__b) {
+static __inline__ vector bool char __ATTRS_o_ai
+vec_lvlx(int __a, const vector bool char *__b) {
   return vec_perm(vec_ld(__a, __b), (vector bool char)(0),
                   vec_lvsl(__a, (unsigned char *)__b));
 }
 
-static vector short __ATTRS_o_ai vec_lvlx(int __a, const short *__b) {
+static __inline__ vector short __ATTRS_o_ai vec_lvlx(int __a,
+                                                     const short *__b) {
   return vec_perm(vec_ld(__a, __b), (vector short)(0), vec_lvsl(__a, __b));
 }
 
-static vector short __ATTRS_o_ai vec_lvlx(int __a, const vector short *__b) {
+static __inline__ vector short __ATTRS_o_ai vec_lvlx(int __a,
+                                                     const vector short *__b) {
   return vec_perm(vec_ld(__a, __b), (vector short)(0),
                   vec_lvsl(__a, (unsigned char *)__b));
 }
 
-static vector unsigned short __ATTRS_o_ai vec_lvlx(int __a,
-                                                   const unsigned short *__b) {
+static __inline__ vector unsigned short __ATTRS_o_ai
+vec_lvlx(int __a, const unsigned short *__b) {
   return vec_perm(vec_ld(__a, __b), (vector unsigned short)(0),
                   vec_lvsl(__a, __b));
 }
 
-static vector unsigned short __ATTRS_o_ai
+static __inline__ vector unsigned short __ATTRS_o_ai
 vec_lvlx(int __a, const vector unsigned short *__b) {
   return vec_perm(vec_ld(__a, __b), (vector unsigned short)(0),
                   vec_lvsl(__a, (unsigned char *)__b));
 }
 
-static vector bool short __ATTRS_o_ai vec_lvlx(int __a,
-                                               const vector bool short *__b) {
+static __inline__ vector bool short __ATTRS_o_ai
+vec_lvlx(int __a, const vector bool short *__b) {
   return vec_perm(vec_ld(__a, __b), (vector bool short)(0),
                   vec_lvsl(__a, (unsigned char *)__b));
 }
 
-static vector pixel __ATTRS_o_ai vec_lvlx(int __a, const vector pixel *__b) {
+static __inline__ vector pixel __ATTRS_o_ai vec_lvlx(int __a,
+                                                     const vector pixel *__b) {
   return vec_perm(vec_ld(__a, __b), (vector pixel)(0),
                   vec_lvsl(__a, (unsigned char *)__b));
 }
 
-static vector int __ATTRS_o_ai vec_lvlx(int __a, const int *__b) {
+static __inline__ vector int __ATTRS_o_ai vec_lvlx(int __a, const int *__b) {
   return vec_perm(vec_ld(__a, __b), (vector int)(0), vec_lvsl(__a, __b));
 }
 
-static vector int __ATTRS_o_ai vec_lvlx(int __a, const vector int *__b) {
+static __inline__ vector int __ATTRS_o_ai vec_lvlx(int __a,
+                                                   const vector int *__b) {
   return vec_perm(vec_ld(__a, __b), (vector int)(0),
                   vec_lvsl(__a, (unsigned char *)__b));
 }
 
-static vector unsigned int __ATTRS_o_ai vec_lvlx(int __a,
-                                                 const unsigned int *__b) {
+static __inline__ vector unsigned int __ATTRS_o_ai
+vec_lvlx(int __a, const unsigned int *__b) {
   return vec_perm(vec_ld(__a, __b), (vector unsigned int)(0),
                   vec_lvsl(__a, __b));
 }
 
-static vector unsigned int __ATTRS_o_ai
+static __inline__ vector unsigned int __ATTRS_o_ai
 vec_lvlx(int __a, const vector unsigned int *__b) {
   return vec_perm(vec_ld(__a, __b), (vector unsigned int)(0),
                   vec_lvsl(__a, (unsigned char *)__b));
 }
 
-static vector bool int __ATTRS_o_ai vec_lvlx(int __a,
-                                             const vector bool int *__b) {
+static __inline__ vector bool int __ATTRS_o_ai
+vec_lvlx(int __a, const vector bool int *__b) {
   return vec_perm(vec_ld(__a, __b), (vector bool int)(0),
                   vec_lvsl(__a, (unsigned char *)__b));
 }
 
-static vector float __ATTRS_o_ai vec_lvlx(int __a, const float *__b) {
+static __inline__ vector float __ATTRS_o_ai vec_lvlx(int __a,
+                                                     const float *__b) {
   return vec_perm(vec_ld(__a, __b), (vector float)(0), vec_lvsl(__a, __b));
 }
 
-static vector float __ATTRS_o_ai vec_lvlx(int __a, const vector float *__b) {
+static __inline__ vector float __ATTRS_o_ai vec_lvlx(int __a,
+                                                     const vector float *__b) {
   return vec_perm(vec_ld(__a, __b), (vector float)(0),
                   vec_lvsl(__a, (unsigned char *)__b));
 }
 
 /* vec_lvlxl */
 
-static vector signed char __ATTRS_o_ai vec_lvlxl(int __a,
-                                                 const signed char *__b) {
+static __inline__ vector signed char __ATTRS_o_ai
+vec_lvlxl(int __a, const signed char *__b) {
   return vec_perm(vec_ldl(__a, __b), (vector signed char)(0),
                   vec_lvsl(__a, __b));
 }
 
-static vector signed char __ATTRS_o_ai
+static __inline__ vector signed char __ATTRS_o_ai
 vec_lvlxl(int __a, const vector signed char *__b) {
   return vec_perm(vec_ldl(__a, __b), (vector signed char)(0),
                   vec_lvsl(__a, (unsigned char *)__b));
 }
 
-static vector unsigned char __ATTRS_o_ai vec_lvlxl(int __a,
-                                                   const unsigned char *__b) {
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_lvlxl(int __a, const unsigned char *__b) {
   return vec_perm(vec_ldl(__a, __b), (vector unsigned char)(0),
                   vec_lvsl(__a, __b));
 }
 
-static vector unsigned char __ATTRS_o_ai
+static __inline__ vector unsigned char __ATTRS_o_ai
 vec_lvlxl(int __a, const vector unsigned char *__b) {
   return vec_perm(vec_ldl(__a, __b), (vector unsigned char)(0),
                   vec_lvsl(__a, (unsigned char *)__b));
 }
 
-static vector bool char __ATTRS_o_ai vec_lvlxl(int __a,
-                                               const vector bool char *__b) {
+static __inline__ vector bool char __ATTRS_o_ai
+vec_lvlxl(int __a, const vector bool char *__b) {
   return vec_perm(vec_ldl(__a, __b), (vector bool char)(0),
                   vec_lvsl(__a, (unsigned char *)__b));
 }
 
-static vector short __ATTRS_o_ai vec_lvlxl(int __a, const short *__b) {
+static __inline__ vector short __ATTRS_o_ai vec_lvlxl(int __a,
+                                                      const short *__b) {
   return vec_perm(vec_ldl(__a, __b), (vector short)(0), vec_lvsl(__a, __b));
 }
 
-static vector short __ATTRS_o_ai vec_lvlxl(int __a, const vector short *__b) {
+static __inline__ vector short __ATTRS_o_ai vec_lvlxl(int __a,
+                                                      const vector short *__b) {
   return vec_perm(vec_ldl(__a, __b), (vector short)(0),
                   vec_lvsl(__a, (unsigned char *)__b));
 }
 
-static vector unsigned short __ATTRS_o_ai vec_lvlxl(int __a,
-                                                    const unsigned short *__b) {
+static __inline__ vector unsigned short __ATTRS_o_ai
+vec_lvlxl(int __a, const unsigned short *__b) {
   return vec_perm(vec_ldl(__a, __b), (vector unsigned short)(0),
                   vec_lvsl(__a, __b));
 }
 
-static vector unsigned short __ATTRS_o_ai
+static __inline__ vector unsigned short __ATTRS_o_ai
 vec_lvlxl(int __a, const vector unsigned short *__b) {
   return vec_perm(vec_ldl(__a, __b), (vector unsigned short)(0),
                   vec_lvsl(__a, (unsigned char *)__b));
 }
 
-static vector bool short __ATTRS_o_ai vec_lvlxl(int __a,
-                                                const vector bool short *__b) {
+static __inline__ vector bool short __ATTRS_o_ai
+vec_lvlxl(int __a, const vector bool short *__b) {
   return vec_perm(vec_ldl(__a, __b), (vector bool short)(0),
                   vec_lvsl(__a, (unsigned char *)__b));
 }
 
-static vector pixel __ATTRS_o_ai vec_lvlxl(int __a, const vector pixel *__b) {
+static __inline__ vector pixel __ATTRS_o_ai vec_lvlxl(int __a,
+                                                      const vector pixel *__b) {
   return vec_perm(vec_ldl(__a, __b), (vector pixel)(0),
                   vec_lvsl(__a, (unsigned char *)__b));
 }
 
-static vector int __ATTRS_o_ai vec_lvlxl(int __a, const int *__b) {
+static __inline__ vector int __ATTRS_o_ai vec_lvlxl(int __a, const int *__b) {
   return vec_perm(vec_ldl(__a, __b), (vector int)(0), vec_lvsl(__a, __b));
 }
 
-static vector int __ATTRS_o_ai vec_lvlxl(int __a, const vector int *__b) {
+static __inline__ vector int __ATTRS_o_ai vec_lvlxl(int __a,
+                                                    const vector int *__b) {
   return vec_perm(vec_ldl(__a, __b), (vector int)(0),
                   vec_lvsl(__a, (unsigned char *)__b));
 }
 
-static vector unsigned int __ATTRS_o_ai vec_lvlxl(int __a,
-                                                  const unsigned int *__b) {
+static __inline__ vector unsigned int __ATTRS_o_ai
+vec_lvlxl(int __a, const unsigned int *__b) {
   return vec_perm(vec_ldl(__a, __b), (vector unsigned int)(0),
                   vec_lvsl(__a, __b));
 }
 
-static vector unsigned int __ATTRS_o_ai
+static __inline__ vector unsigned int __ATTRS_o_ai
 vec_lvlxl(int __a, const vector unsigned int *__b) {
   return vec_perm(vec_ldl(__a, __b), (vector unsigned int)(0),
                   vec_lvsl(__a, (unsigned char *)__b));
 }
 
-static vector bool int __ATTRS_o_ai vec_lvlxl(int __a,
-                                              const vector bool int *__b) {
+static __inline__ vector bool int __ATTRS_o_ai
+vec_lvlxl(int __a, const vector bool int *__b) {
   return vec_perm(vec_ldl(__a, __b), (vector bool int)(0),
                   vec_lvsl(__a, (unsigned char *)__b));
 }
 
-static vector float __ATTRS_o_ai vec_lvlxl(int __a, const float *__b) {
+static __inline__ vector float __ATTRS_o_ai vec_lvlxl(int __a,
+                                                      const float *__b) {
   return vec_perm(vec_ldl(__a, __b), (vector float)(0), vec_lvsl(__a, __b));
 }
 
-static vector float __ATTRS_o_ai vec_lvlxl(int __a, vector float *__b) {
+static __inline__ vector float __ATTRS_o_ai vec_lvlxl(int __a,
+                                                      vector float *__b) {
   return vec_perm(vec_ldl(__a, __b), (vector float)(0),
                   vec_lvsl(__a, (unsigned char *)__b));
 }
 
 /* vec_lvrx */
 
-static vector signed char __ATTRS_o_ai vec_lvrx(int __a,
-                                                const signed char *__b) {
+static __inline__ vector signed char __ATTRS_o_ai
+vec_lvrx(int __a, const signed char *__b) {
   return vec_perm((vector signed char)(0), vec_ld(__a, __b),
                   vec_lvsl(__a, __b));
 }
 
-static vector signed char __ATTRS_o_ai vec_lvrx(int __a,
-                                                const vector signed char *__b) {
+static __inline__ vector signed char __ATTRS_o_ai
+vec_lvrx(int __a, const vector signed char *__b) {
   return vec_perm((vector signed char)(0), vec_ld(__a, __b),
                   vec_lvsl(__a, (unsigned char *)__b));
 }
 
-static vector unsigned char __ATTRS_o_ai vec_lvrx(int __a,
-                                                  const unsigned char *__b) {
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_lvrx(int __a, const unsigned char *__b) {
   return vec_perm((vector unsigned char)(0), vec_ld(__a, __b),
                   vec_lvsl(__a, __b));
 }
 
-static vector unsigned char __ATTRS_o_ai
+static __inline__ vector unsigned char __ATTRS_o_ai
 vec_lvrx(int __a, const vector unsigned char *__b) {
   return vec_perm((vector unsigned char)(0), vec_ld(__a, __b),
                   vec_lvsl(__a, (unsigned char *)__b));
 }
 
-static vector bool char __ATTRS_o_ai vec_lvrx(int __a,
-                                              const vector bool char *__b) {
+static __inline__ vector bool char __ATTRS_o_ai
+vec_lvrx(int __a, const vector bool char *__b) {
   return vec_perm((vector bool char)(0), vec_ld(__a, __b),
                   vec_lvsl(__a, (unsigned char *)__b));
 }
 
-static vector short __ATTRS_o_ai vec_lvrx(int __a, const short *__b) {
+static __inline__ vector short __ATTRS_o_ai vec_lvrx(int __a,
+                                                     const short *__b) {
   return vec_perm((vector short)(0), vec_ld(__a, __b), vec_lvsl(__a, __b));
 }
 
-static vector short __ATTRS_o_ai vec_lvrx(int __a, const vector short *__b) {
+static __inline__ vector short __ATTRS_o_ai vec_lvrx(int __a,
+                                                     const vector short *__b) {
   return vec_perm((vector short)(0), vec_ld(__a, __b),
                   vec_lvsl(__a, (unsigned char *)__b));
 }
 
-static vector unsigned short __ATTRS_o_ai vec_lvrx(int __a,
-                                                   const unsigned short *__b) {
+static __inline__ vector unsigned short __ATTRS_o_ai
+vec_lvrx(int __a, const unsigned short *__b) {
   return vec_perm((vector unsigned short)(0), vec_ld(__a, __b),
                   vec_lvsl(__a, __b));
 }
 
-static vector unsigned short __ATTRS_o_ai
+static __inline__ vector unsigned short __ATTRS_o_ai
 vec_lvrx(int __a, const vector unsigned short *__b) {
   return vec_perm((vector unsigned short)(0), vec_ld(__a, __b),
                   vec_lvsl(__a, (unsigned char *)__b));
 }
 
-static vector bool short __ATTRS_o_ai vec_lvrx(int __a,
-                                               const vector bool short *__b) {
+static __inline__ vector bool short __ATTRS_o_ai
+vec_lvrx(int __a, const vector bool short *__b) {
   return vec_perm((vector bool short)(0), vec_ld(__a, __b),
                   vec_lvsl(__a, (unsigned char *)__b));
 }
 
-static vector pixel __ATTRS_o_ai vec_lvrx(int __a, const vector pixel *__b) {
+static __inline__ vector pixel __ATTRS_o_ai vec_lvrx(int __a,
+                                                     const vector pixel *__b) {
   return vec_perm((vector pixel)(0), vec_ld(__a, __b),
                   vec_lvsl(__a, (unsigned char *)__b));
 }
 
-static vector int __ATTRS_o_ai vec_lvrx(int __a, const int *__b) {
+static __inline__ vector int __ATTRS_o_ai vec_lvrx(int __a, const int *__b) {
   return vec_perm((vector int)(0), vec_ld(__a, __b), vec_lvsl(__a, __b));
 }
 
-static vector int __ATTRS_o_ai vec_lvrx(int __a, const vector int *__b) {
+static __inline__ vector int __ATTRS_o_ai vec_lvrx(int __a,
+                                                   const vector int *__b) {
   return vec_perm((vector int)(0), vec_ld(__a, __b),
                   vec_lvsl(__a, (unsigned char *)__b));
 }
 
-static vector unsigned int __ATTRS_o_ai vec_lvrx(int __a,
-                                                 const unsigned int *__b) {
+static __inline__ vector unsigned int __ATTRS_o_ai
+vec_lvrx(int __a, const unsigned int *__b) {
   return vec_perm((vector unsigned int)(0), vec_ld(__a, __b),
                   vec_lvsl(__a, __b));
 }
 
-static vector unsigned int __ATTRS_o_ai
+static __inline__ vector unsigned int __ATTRS_o_ai
 vec_lvrx(int __a, const vector unsigned int *__b) {
   return vec_perm((vector unsigned int)(0), vec_ld(__a, __b),
                   vec_lvsl(__a, (unsigned char *)__b));
 }
 
-static vector bool int __ATTRS_o_ai vec_lvrx(int __a,
-                                             const vector bool int *__b) {
+static __inline__ vector bool int __ATTRS_o_ai
+vec_lvrx(int __a, const vector bool int *__b) {
   return vec_perm((vector bool int)(0), vec_ld(__a, __b),
                   vec_lvsl(__a, (unsigned char *)__b));
 }
 
-static vector float __ATTRS_o_ai vec_lvrx(int __a, const float *__b) {
+static __inline__ vector float __ATTRS_o_ai vec_lvrx(int __a,
+                                                     const float *__b) {
   return vec_perm((vector float)(0), vec_ld(__a, __b), vec_lvsl(__a, __b));
 }
 
-static vector float __ATTRS_o_ai vec_lvrx(int __a, const vector float *__b) {
+static __inline__ vector float __ATTRS_o_ai vec_lvrx(int __a,
+                                                     const vector float *__b) {
   return vec_perm((vector float)(0), vec_ld(__a, __b),
                   vec_lvsl(__a, (unsigned char *)__b));
 }
 
 /* vec_lvrxl */
 
-static vector signed char __ATTRS_o_ai vec_lvrxl(int __a,
-                                                 const signed char *__b) {
+static __inline__ vector signed char __ATTRS_o_ai
+vec_lvrxl(int __a, const signed char *__b) {
   return vec_perm((vector signed char)(0), vec_ldl(__a, __b),
                   vec_lvsl(__a, __b));
 }
 
-static vector signed char __ATTRS_o_ai
+static __inline__ vector signed char __ATTRS_o_ai
 vec_lvrxl(int __a, const vector signed char *__b) {
   return vec_perm((vector signed char)(0), vec_ldl(__a, __b),
                   vec_lvsl(__a, (unsigned char *)__b));
 }
 
-static vector unsigned char __ATTRS_o_ai vec_lvrxl(int __a,
-                                                   const unsigned char *__b) {
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_lvrxl(int __a, const unsigned char *__b) {
   return vec_perm((vector unsigned char)(0), vec_ldl(__a, __b),
                   vec_lvsl(__a, __b));
 }
 
-static vector unsigned char __ATTRS_o_ai
+static __inline__ vector unsigned char __ATTRS_o_ai
 vec_lvrxl(int __a, const vector unsigned char *__b) {
   return vec_perm((vector unsigned char)(0), vec_ldl(__a, __b),
                   vec_lvsl(__a, (unsigned char *)__b));
 }
 
-static vector bool char __ATTRS_o_ai vec_lvrxl(int __a,
-                                               const vector bool char *__b) {
+static __inline__ vector bool char __ATTRS_o_ai
+vec_lvrxl(int __a, const vector bool char *__b) {
   return vec_perm((vector bool char)(0), vec_ldl(__a, __b),
                   vec_lvsl(__a, (unsigned char *)__b));
 }
 
-static vector short __ATTRS_o_ai vec_lvrxl(int __a, const short *__b) {
+static __inline__ vector short __ATTRS_o_ai vec_lvrxl(int __a,
+                                                      const short *__b) {
   return vec_perm((vector short)(0), vec_ldl(__a, __b), vec_lvsl(__a, __b));
 }
 
-static vector short __ATTRS_o_ai vec_lvrxl(int __a, const vector short *__b) {
+static __inline__ vector short __ATTRS_o_ai vec_lvrxl(int __a,
+                                                      const vector short *__b) {
   return vec_perm((vector short)(0), vec_ldl(__a, __b),
                   vec_lvsl(__a, (unsigned char *)__b));
 }
 
-static vector unsigned short __ATTRS_o_ai vec_lvrxl(int __a,
-                                                    const unsigned short *__b) {
+static __inline__ vector unsigned short __ATTRS_o_ai
+vec_lvrxl(int __a, const unsigned short *__b) {
   return vec_perm((vector unsigned short)(0), vec_ldl(__a, __b),
                   vec_lvsl(__a, __b));
 }
 
-static vector unsigned short __ATTRS_o_ai
+static __inline__ vector unsigned short __ATTRS_o_ai
 vec_lvrxl(int __a, const vector unsigned short *__b) {
   return vec_perm((vector unsigned short)(0), vec_ldl(__a, __b),
                   vec_lvsl(__a, (unsigned char *)__b));
 }
 
-static vector bool short __ATTRS_o_ai vec_lvrxl(int __a,
-                                                const vector bool short *__b) {
+static __inline__ vector bool short __ATTRS_o_ai
+vec_lvrxl(int __a, const vector bool short *__b) {
   return vec_perm((vector bool short)(0), vec_ldl(__a, __b),
                   vec_lvsl(__a, (unsigned char *)__b));
 }
 
-static vector pixel __ATTRS_o_ai vec_lvrxl(int __a, const vector pixel *__b) {
+static __inline__ vector pixel __ATTRS_o_ai vec_lvrxl(int __a,
+                                                      const vector pixel *__b) {
   return vec_perm((vector pixel)(0), vec_ldl(__a, __b),
                   vec_lvsl(__a, (unsigned char *)__b));
 }
 
-static vector int __ATTRS_o_ai vec_lvrxl(int __a, const int *__b) {
+static __inline__ vector int __ATTRS_o_ai vec_lvrxl(int __a, const int *__b) {
   return vec_perm((vector int)(0), vec_ldl(__a, __b), vec_lvsl(__a, __b));
 }
 
-static vector int __ATTRS_o_ai vec_lvrxl(int __a, const vector int *__b) {
+static __inline__ vector int __ATTRS_o_ai vec_lvrxl(int __a,
+                                                    const vector int *__b) {
   return vec_perm((vector int)(0), vec_ldl(__a, __b),
                   vec_lvsl(__a, (unsigned char *)__b));
 }
 
-static vector unsigned int __ATTRS_o_ai vec_lvrxl(int __a,
-                                                  const unsigned int *__b) {
+static __inline__ vector unsigned int __ATTRS_o_ai
+vec_lvrxl(int __a, const unsigned int *__b) {
   return vec_perm((vector unsigned int)(0), vec_ldl(__a, __b),
                   vec_lvsl(__a, __b));
 }
 
-static vector unsigned int __ATTRS_o_ai
+static __inline__ vector unsigned int __ATTRS_o_ai
 vec_lvrxl(int __a, const vector unsigned int *__b) {
   return vec_perm((vector unsigned int)(0), vec_ldl(__a, __b),
                   vec_lvsl(__a, (unsigned char *)__b));
 }
 
-static vector bool int __ATTRS_o_ai vec_lvrxl(int __a,
-                                              const vector bool int *__b) {
+static __inline__ vector bool int __ATTRS_o_ai
+vec_lvrxl(int __a, const vector bool int *__b) {
   return vec_perm((vector bool int)(0), vec_ldl(__a, __b),
                   vec_lvsl(__a, (unsigned char *)__b));
 }
 
-static vector float __ATTRS_o_ai vec_lvrxl(int __a, const float *__b) {
+static __inline__ vector float __ATTRS_o_ai vec_lvrxl(int __a,
+                                                      const float *__b) {
   return vec_perm((vector float)(0), vec_ldl(__a, __b), vec_lvsl(__a, __b));
 }
 
-static vector float __ATTRS_o_ai vec_lvrxl(int __a, const vector float *__b) {
+static __inline__ vector float __ATTRS_o_ai vec_lvrxl(int __a,
+                                                      const vector float *__b) {
   return vec_perm((vector float)(0), vec_ldl(__a, __b),
                   vec_lvsl(__a, (unsigned char *)__b));
 }
 
 /* vec_stvlx */
 
-static void __ATTRS_o_ai vec_stvlx(vector signed char __a, int __b,
-                                   signed char *__c) {
+static __inline__ void __ATTRS_o_ai vec_stvlx(vector signed char __a, int __b,
+                                              signed char *__c) {
   return vec_st(vec_perm(vec_lvrx(__b, __c), __a, vec_lvsr(__b, __c)), __b,
                 __c);
 }
 
-static void __ATTRS_o_ai vec_stvlx(vector signed char __a, int __b,
-                                   vector signed char *__c) {
+static __inline__ void __ATTRS_o_ai vec_stvlx(vector signed char __a, int __b,
+                                              vector signed char *__c) {
   return vec_st(
       vec_perm(vec_lvrx(__b, __c), __a, vec_lvsr(__b, (unsigned char *)__c)),
       __b, __c);
 }
 
-static void __ATTRS_o_ai vec_stvlx(vector unsigned char __a, int __b,
-                                   unsigned char *__c) {
+static __inline__ void __ATTRS_o_ai vec_stvlx(vector unsigned char __a, int __b,
+                                              unsigned char *__c) {
   return vec_st(vec_perm(vec_lvrx(__b, __c), __a, vec_lvsr(__b, __c)), __b,
                 __c);
 }
 
-static void __ATTRS_o_ai vec_stvlx(vector unsigned char __a, int __b,
-                                   vector unsigned char *__c) {
+static __inline__ void __ATTRS_o_ai vec_stvlx(vector unsigned char __a, int __b,
+                                              vector unsigned char *__c) {
   return vec_st(
       vec_perm(vec_lvrx(__b, __c), __a, vec_lvsr(__b, (unsigned char *)__c)),
       __b, __c);
 }
 
-static void __ATTRS_o_ai vec_stvlx(vector bool char __a, int __b,
-                                   vector bool char *__c) {
+static __inline__ void __ATTRS_o_ai vec_stvlx(vector bool char __a, int __b,
+                                              vector bool char *__c) {
   return vec_st(
       vec_perm(vec_lvrx(__b, __c), __a, vec_lvsr(__b, (unsigned char *)__c)),
       __b, __c);
 }
 
-static void __ATTRS_o_ai vec_stvlx(vector short __a, int __b, short *__c) {
+static __inline__ void __ATTRS_o_ai vec_stvlx(vector short __a, int __b,
+                                              short *__c) {
   return vec_st(vec_perm(vec_lvrx(__b, __c), __a, vec_lvsr(__b, __c)), __b,
                 __c);
 }
 
-static void __ATTRS_o_ai vec_stvlx(vector short __a, int __b,
-                                   vector short *__c) {
+static __inline__ void __ATTRS_o_ai vec_stvlx(vector short __a, int __b,
+                                              vector short *__c) {
   return vec_st(
       vec_perm(vec_lvrx(__b, __c), __a, vec_lvsr(__b, (unsigned char *)__c)),
       __b, __c);
 }
 
-static void __ATTRS_o_ai vec_stvlx(vector unsigned short __a, int __b,
-                                   unsigned short *__c) {
+static __inline__ void __ATTRS_o_ai vec_stvlx(vector unsigned short __a,
+                                              int __b, unsigned short *__c) {
   return vec_st(vec_perm(vec_lvrx(__b, __c), __a, vec_lvsr(__b, __c)), __b,
                 __c);
 }
 
-static void __ATTRS_o_ai vec_stvlx(vector unsigned short __a, int __b,
-                                   vector unsigned short *__c) {
+static __inline__ void __ATTRS_o_ai vec_stvlx(vector unsigned short __a,
+                                              int __b,
+                                              vector unsigned short *__c) {
   return vec_st(
       vec_perm(vec_lvrx(__b, __c), __a, vec_lvsr(__b, (unsigned char *)__c)),
       __b, __c);
 }
 
-static void __ATTRS_o_ai vec_stvlx(vector bool short __a, int __b,
-                                   vector bool short *__c) {
+static __inline__ void __ATTRS_o_ai vec_stvlx(vector bool short __a, int __b,
+                                              vector bool short *__c) {
   return vec_st(
       vec_perm(vec_lvrx(__b, __c), __a, vec_lvsr(__b, (unsigned char *)__c)),
       __b, __c);
 }
 
-static void __ATTRS_o_ai vec_stvlx(vector pixel __a, int __b,
-                                   vector pixel *__c) {
+static __inline__ void __ATTRS_o_ai vec_stvlx(vector pixel __a, int __b,
+                                              vector pixel *__c) {
   return vec_st(
       vec_perm(vec_lvrx(__b, __c), __a, vec_lvsr(__b, (unsigned char *)__c)),
       __b, __c);
 }
 
-static void __ATTRS_o_ai vec_stvlx(vector int __a, int __b, int *__c) {
+static __inline__ void __ATTRS_o_ai vec_stvlx(vector int __a, int __b,
+                                              int *__c) {
   return vec_st(vec_perm(vec_lvrx(__b, __c), __a, vec_lvsr(__b, __c)), __b,
                 __c);
 }
 
-static void __ATTRS_o_ai vec_stvlx(vector int __a, int __b, vector int *__c) {
+static __inline__ void __ATTRS_o_ai vec_stvlx(vector int __a, int __b,
+                                              vector int *__c) {
   return vec_st(
       vec_perm(vec_lvrx(__b, __c), __a, vec_lvsr(__b, (unsigned char *)__c)),
       __b, __c);
 }
 
-static void __ATTRS_o_ai vec_stvlx(vector unsigned int __a, int __b,
-                                   unsigned int *__c) {
+static __inline__ void __ATTRS_o_ai vec_stvlx(vector unsigned int __a, int __b,
+                                              unsigned int *__c) {
   return vec_st(vec_perm(vec_lvrx(__b, __c), __a, vec_lvsr(__b, __c)), __b,
                 __c);
 }
 
-static void __ATTRS_o_ai vec_stvlx(vector unsigned int __a, int __b,
-                                   vector unsigned int *__c) {
+static __inline__ void __ATTRS_o_ai vec_stvlx(vector unsigned int __a, int __b,
+                                              vector unsigned int *__c) {
   return vec_st(
       vec_perm(vec_lvrx(__b, __c), __a, vec_lvsr(__b, (unsigned char *)__c)),
       __b, __c);
 }
 
-static void __ATTRS_o_ai vec_stvlx(vector bool int __a, int __b,
-                                   vector bool int *__c) {
+static __inline__ void __ATTRS_o_ai vec_stvlx(vector bool int __a, int __b,
+                                              vector bool int *__c) {
   return vec_st(
       vec_perm(vec_lvrx(__b, __c), __a, vec_lvsr(__b, (unsigned char *)__c)),
       __b, __c);
 }
 
-static void __ATTRS_o_ai vec_stvlx(vector float __a, int __b,
-                                   vector float *__c) {
+static __inline__ void __ATTRS_o_ai vec_stvlx(vector float __a, int __b,
+                                              vector float *__c) {
   return vec_st(
       vec_perm(vec_lvrx(__b, __c), __a, vec_lvsr(__b, (unsigned char *)__c)),
       __b, __c);
@@ -11194,111 +11568,116 @@ static void __ATTRS_o_ai vec_stvlx(vector float __a, int __b,
 
 /* vec_stvlxl */
 
-static void __ATTRS_o_ai vec_stvlxl(vector signed char __a, int __b,
-                                    signed char *__c) {
+static __inline__ void __ATTRS_o_ai vec_stvlxl(vector signed char __a, int __b,
+                                               signed char *__c) {
   return vec_stl(vec_perm(vec_lvrx(__b, __c), __a, vec_lvsr(__b, __c)), __b,
                  __c);
 }
 
-static void __ATTRS_o_ai vec_stvlxl(vector signed char __a, int __b,
-                                    vector signed char *__c) {
+static __inline__ void __ATTRS_o_ai vec_stvlxl(vector signed char __a, int __b,
+                                               vector signed char *__c) {
   return vec_stl(
       vec_perm(vec_lvrx(__b, __c), __a, vec_lvsr(__b, (unsigned char *)__c)),
       __b, __c);
 }
 
-static void __ATTRS_o_ai vec_stvlxl(vector unsigned char __a, int __b,
-                                    unsigned char *__c) {
+static __inline__ void __ATTRS_o_ai vec_stvlxl(vector unsigned char __a,
+                                               int __b, unsigned char *__c) {
   return vec_stl(vec_perm(vec_lvrx(__b, __c), __a, vec_lvsr(__b, __c)), __b,
                  __c);
 }
 
-static void __ATTRS_o_ai vec_stvlxl(vector unsigned char __a, int __b,
-                                    vector unsigned char *__c) {
+static __inline__ void __ATTRS_o_ai vec_stvlxl(vector unsigned char __a,
+                                               int __b,
+                                               vector unsigned char *__c) {
   return vec_stl(
       vec_perm(vec_lvrx(__b, __c), __a, vec_lvsr(__b, (unsigned char *)__c)),
       __b, __c);
 }
 
-static void __ATTRS_o_ai vec_stvlxl(vector bool char __a, int __b,
-                                    vector bool char *__c) {
+static __inline__ void __ATTRS_o_ai vec_stvlxl(vector bool char __a, int __b,
+                                               vector bool char *__c) {
   return vec_stl(
       vec_perm(vec_lvrx(__b, __c), __a, vec_lvsr(__b, (unsigned char *)__c)),
       __b, __c);
 }
 
-static void __ATTRS_o_ai vec_stvlxl(vector short __a, int __b, short *__c) {
+static __inline__ void __ATTRS_o_ai vec_stvlxl(vector short __a, int __b,
+                                               short *__c) {
   return vec_stl(vec_perm(vec_lvrx(__b, __c), __a, vec_lvsr(__b, __c)), __b,
                  __c);
 }
 
-static void __ATTRS_o_ai vec_stvlxl(vector short __a, int __b,
-                                    vector short *__c) {
+static __inline__ void __ATTRS_o_ai vec_stvlxl(vector short __a, int __b,
+                                               vector short *__c) {
   return vec_stl(
       vec_perm(vec_lvrx(__b, __c), __a, vec_lvsr(__b, (unsigned char *)__c)),
       __b, __c);
 }
 
-static void __ATTRS_o_ai vec_stvlxl(vector unsigned short __a, int __b,
-                                    unsigned short *__c) {
+static __inline__ void __ATTRS_o_ai vec_stvlxl(vector unsigned short __a,
+                                               int __b, unsigned short *__c) {
   return vec_stl(vec_perm(vec_lvrx(__b, __c), __a, vec_lvsr(__b, __c)), __b,
                  __c);
 }
 
-static void __ATTRS_o_ai vec_stvlxl(vector unsigned short __a, int __b,
-                                    vector unsigned short *__c) {
+static __inline__ void __ATTRS_o_ai vec_stvlxl(vector unsigned short __a,
+                                               int __b,
+                                               vector unsigned short *__c) {
   return vec_stl(
       vec_perm(vec_lvrx(__b, __c), __a, vec_lvsr(__b, (unsigned char *)__c)),
       __b, __c);
 }
 
-static void __ATTRS_o_ai vec_stvlxl(vector bool short __a, int __b,
-                                    vector bool short *__c) {
+static __inline__ void __ATTRS_o_ai vec_stvlxl(vector bool short __a, int __b,
+                                               vector bool short *__c) {
   return vec_stl(
       vec_perm(vec_lvrx(__b, __c), __a, vec_lvsr(__b, (unsigned char *)__c)),
       __b, __c);
 }
 
-static void __ATTRS_o_ai vec_stvlxl(vector pixel __a, int __b,
-                                    vector pixel *__c) {
+static __inline__ void __ATTRS_o_ai vec_stvlxl(vector pixel __a, int __b,
+                                               vector pixel *__c) {
   return vec_stl(
       vec_perm(vec_lvrx(__b, __c), __a, vec_lvsr(__b, (unsigned char *)__c)),
       __b, __c);
 }
 
-static void __ATTRS_o_ai vec_stvlxl(vector int __a, int __b, int *__c) {
+static __inline__ void __ATTRS_o_ai vec_stvlxl(vector int __a, int __b,
+                                               int *__c) {
   return vec_stl(vec_perm(vec_lvrx(__b, __c), __a, vec_lvsr(__b, __c)), __b,
                  __c);
 }
 
-static void __ATTRS_o_ai vec_stvlxl(vector int __a, int __b, vector int *__c) {
+static __inline__ void __ATTRS_o_ai vec_stvlxl(vector int __a, int __b,
+                                               vector int *__c) {
   return vec_stl(
       vec_perm(vec_lvrx(__b, __c), __a, vec_lvsr(__b, (unsigned char *)__c)),
       __b, __c);
 }
 
-static void __ATTRS_o_ai vec_stvlxl(vector unsigned int __a, int __b,
-                                    unsigned int *__c) {
+static __inline__ void __ATTRS_o_ai vec_stvlxl(vector unsigned int __a, int __b,
+                                               unsigned int *__c) {
   return vec_stl(vec_perm(vec_lvrx(__b, __c), __a, vec_lvsr(__b, __c)), __b,
                  __c);
 }
 
-static void __ATTRS_o_ai vec_stvlxl(vector unsigned int __a, int __b,
-                                    vector unsigned int *__c) {
+static __inline__ void __ATTRS_o_ai vec_stvlxl(vector unsigned int __a, int __b,
+                                               vector unsigned int *__c) {
   return vec_stl(
       vec_perm(vec_lvrx(__b, __c), __a, vec_lvsr(__b, (unsigned char *)__c)),
       __b, __c);
 }
 
-static void __ATTRS_o_ai vec_stvlxl(vector bool int __a, int __b,
-                                    vector bool int *__c) {
+static __inline__ void __ATTRS_o_ai vec_stvlxl(vector bool int __a, int __b,
+                                               vector bool int *__c) {
   return vec_stl(
       vec_perm(vec_lvrx(__b, __c), __a, vec_lvsr(__b, (unsigned char *)__c)),
       __b, __c);
 }
 
-static void __ATTRS_o_ai vec_stvlxl(vector float __a, int __b,
-                                    vector float *__c) {
+static __inline__ void __ATTRS_o_ai vec_stvlxl(vector float __a, int __b,
+                                               vector float *__c) {
   return vec_stl(
       vec_perm(vec_lvrx(__b, __c), __a, vec_lvsr(__b, (unsigned char *)__c)),
       __b, __c);
@@ -11306,111 +11685,115 @@ static void __ATTRS_o_ai vec_stvlxl(vector float __a, int __b,
 
 /* vec_stvrx */
 
-static void __ATTRS_o_ai vec_stvrx(vector signed char __a, int __b,
-                                   signed char *__c) {
+static __inline__ void __ATTRS_o_ai vec_stvrx(vector signed char __a, int __b,
+                                              signed char *__c) {
   return vec_st(vec_perm(__a, vec_lvlx(__b, __c), vec_lvsr(__b, __c)), __b,
                 __c);
 }
 
-static void __ATTRS_o_ai vec_stvrx(vector signed char __a, int __b,
-                                   vector signed char *__c) {
+static __inline__ void __ATTRS_o_ai vec_stvrx(vector signed char __a, int __b,
+                                              vector signed char *__c) {
   return vec_st(
       vec_perm(__a, vec_lvlx(__b, __c), vec_lvsr(__b, (unsigned char *)__c)),
       __b, __c);
 }
 
-static void __ATTRS_o_ai vec_stvrx(vector unsigned char __a, int __b,
-                                   unsigned char *__c) {
+static __inline__ void __ATTRS_o_ai vec_stvrx(vector unsigned char __a, int __b,
+                                              unsigned char *__c) {
   return vec_st(vec_perm(__a, vec_lvlx(__b, __c), vec_lvsr(__b, __c)), __b,
                 __c);
 }
 
-static void __ATTRS_o_ai vec_stvrx(vector unsigned char __a, int __b,
-                                   vector unsigned char *__c) {
+static __inline__ void __ATTRS_o_ai vec_stvrx(vector unsigned char __a, int __b,
+                                              vector unsigned char *__c) {
   return vec_st(
       vec_perm(__a, vec_lvlx(__b, __c), vec_lvsr(__b, (unsigned char *)__c)),
       __b, __c);
 }
 
-static void __ATTRS_o_ai vec_stvrx(vector bool char __a, int __b,
-                                   vector bool char *__c) {
+static __inline__ void __ATTRS_o_ai vec_stvrx(vector bool char __a, int __b,
+                                              vector bool char *__c) {
   return vec_st(
       vec_perm(__a, vec_lvlx(__b, __c), vec_lvsr(__b, (unsigned char *)__c)),
       __b, __c);
 }
 
-static void __ATTRS_o_ai vec_stvrx(vector short __a, int __b, short *__c) {
+static __inline__ void __ATTRS_o_ai vec_stvrx(vector short __a, int __b,
+                                              short *__c) {
   return vec_st(vec_perm(__a, vec_lvlx(__b, __c), vec_lvsr(__b, __c)), __b,
                 __c);
 }
 
-static void __ATTRS_o_ai vec_stvrx(vector short __a, int __b,
-                                   vector short *__c) {
+static __inline__ void __ATTRS_o_ai vec_stvrx(vector short __a, int __b,
+                                              vector short *__c) {
   return vec_st(
       vec_perm(__a, vec_lvlx(__b, __c), vec_lvsr(__b, (unsigned char *)__c)),
       __b, __c);
 }
 
-static void __ATTRS_o_ai vec_stvrx(vector unsigned short __a, int __b,
-                                   unsigned short *__c) {
+static __inline__ void __ATTRS_o_ai vec_stvrx(vector unsigned short __a,
+                                              int __b, unsigned short *__c) {
   return vec_st(vec_perm(__a, vec_lvlx(__b, __c), vec_lvsr(__b, __c)), __b,
                 __c);
 }
 
-static void __ATTRS_o_ai vec_stvrx(vector unsigned short __a, int __b,
-                                   vector unsigned short *__c) {
+static __inline__ void __ATTRS_o_ai vec_stvrx(vector unsigned short __a,
+                                              int __b,
+                                              vector unsigned short *__c) {
   return vec_st(
       vec_perm(__a, vec_lvlx(__b, __c), vec_lvsr(__b, (unsigned char *)__c)),
       __b, __c);
 }
 
-static void __ATTRS_o_ai vec_stvrx(vector bool short __a, int __b,
-                                   vector bool short *__c) {
+static __inline__ void __ATTRS_o_ai vec_stvrx(vector bool short __a, int __b,
+                                              vector bool short *__c) {
   return vec_st(
       vec_perm(__a, vec_lvlx(__b, __c), vec_lvsr(__b, (unsigned char *)__c)),
       __b, __c);
 }
 
-static void __ATTRS_o_ai vec_stvrx(vector pixel __a, int __b,
-                                   vector pixel *__c) {
+static __inline__ void __ATTRS_o_ai vec_stvrx(vector pixel __a, int __b,
+                                              vector pixel *__c) {
   return vec_st(
       vec_perm(__a, vec_lvlx(__b, __c), vec_lvsr(__b, (unsigned char *)__c)),
       __b, __c);
 }
 
-static void __ATTRS_o_ai vec_stvrx(vector int __a, int __b, int *__c) {
+static __inline__ void __ATTRS_o_ai vec_stvrx(vector int __a, int __b,
+                                              int *__c) {
   return vec_st(vec_perm(__a, vec_lvlx(__b, __c), vec_lvsr(__b, __c)), __b,
                 __c);
 }
 
-static void __ATTRS_o_ai vec_stvrx(vector int __a, int __b, vector int *__c) {
+static __inline__ void __ATTRS_o_ai vec_stvrx(vector int __a, int __b,
+                                              vector int *__c) {
   return vec_st(
       vec_perm(__a, vec_lvlx(__b, __c), vec_lvsr(__b, (unsigned char *)__c)),
       __b, __c);
 }
 
-static void __ATTRS_o_ai vec_stvrx(vector unsigned int __a, int __b,
-                                   unsigned int *__c) {
+static __inline__ void __ATTRS_o_ai vec_stvrx(vector unsigned int __a, int __b,
+                                              unsigned int *__c) {
   return vec_st(vec_perm(__a, vec_lvlx(__b, __c), vec_lvsr(__b, __c)), __b,
                 __c);
 }
 
-static void __ATTRS_o_ai vec_stvrx(vector unsigned int __a, int __b,
-                                   vector unsigned int *__c) {
+static __inline__ void __ATTRS_o_ai vec_stvrx(vector unsigned int __a, int __b,
+                                              vector unsigned int *__c) {
   return vec_st(
       vec_perm(__a, vec_lvlx(__b, __c), vec_lvsr(__b, (unsigned char *)__c)),
       __b, __c);
 }
 
-static void __ATTRS_o_ai vec_stvrx(vector bool int __a, int __b,
-                                   vector bool int *__c) {
+static __inline__ void __ATTRS_o_ai vec_stvrx(vector bool int __a, int __b,
+                                              vector bool int *__c) {
   return vec_st(
       vec_perm(__a, vec_lvlx(__b, __c), vec_lvsr(__b, (unsigned char *)__c)),
       __b, __c);
 }
 
-static void __ATTRS_o_ai vec_stvrx(vector float __a, int __b,
-                                   vector float *__c) {
+static __inline__ void __ATTRS_o_ai vec_stvrx(vector float __a, int __b,
+                                              vector float *__c) {
   return vec_st(
       vec_perm(__a, vec_lvlx(__b, __c), vec_lvsr(__b, (unsigned char *)__c)),
       __b, __c);
@@ -11418,111 +11801,116 @@ static void __ATTRS_o_ai vec_stvrx(vector float __a, int __b,
 
 /* vec_stvrxl */
 
-static void __ATTRS_o_ai vec_stvrxl(vector signed char __a, int __b,
-                                    signed char *__c) {
+static __inline__ void __ATTRS_o_ai vec_stvrxl(vector signed char __a, int __b,
+                                               signed char *__c) {
   return vec_stl(vec_perm(__a, vec_lvlx(__b, __c), vec_lvsr(__b, __c)), __b,
                  __c);
 }
 
-static void __ATTRS_o_ai vec_stvrxl(vector signed char __a, int __b,
-                                    vector signed char *__c) {
+static __inline__ void __ATTRS_o_ai vec_stvrxl(vector signed char __a, int __b,
+                                               vector signed char *__c) {
   return vec_stl(
       vec_perm(__a, vec_lvlx(__b, __c), vec_lvsr(__b, (unsigned char *)__c)),
       __b, __c);
 }
 
-static void __ATTRS_o_ai vec_stvrxl(vector unsigned char __a, int __b,
-                                    unsigned char *__c) {
+static __inline__ void __ATTRS_o_ai vec_stvrxl(vector unsigned char __a,
+                                               int __b, unsigned char *__c) {
   return vec_stl(vec_perm(__a, vec_lvlx(__b, __c), vec_lvsr(__b, __c)), __b,
                  __c);
 }
 
-static void __ATTRS_o_ai vec_stvrxl(vector unsigned char __a, int __b,
-                                    vector unsigned char *__c) {
+static __inline__ void __ATTRS_o_ai vec_stvrxl(vector unsigned char __a,
+                                               int __b,
+                                               vector unsigned char *__c) {
   return vec_stl(
       vec_perm(__a, vec_lvlx(__b, __c), vec_lvsr(__b, (unsigned char *)__c)),
       __b, __c);
 }
 
-static void __ATTRS_o_ai vec_stvrxl(vector bool char __a, int __b,
-                                    vector bool char *__c) {
+static __inline__ void __ATTRS_o_ai vec_stvrxl(vector bool char __a, int __b,
+                                               vector bool char *__c) {
   return vec_stl(
       vec_perm(__a, vec_lvlx(__b, __c), vec_lvsr(__b, (unsigned char *)__c)),
       __b, __c);
 }
 
-static void __ATTRS_o_ai vec_stvrxl(vector short __a, int __b, short *__c) {
+static __inline__ void __ATTRS_o_ai vec_stvrxl(vector short __a, int __b,
+                                               short *__c) {
   return vec_stl(vec_perm(__a, vec_lvlx(__b, __c), vec_lvsr(__b, __c)), __b,
                  __c);
 }
 
-static void __ATTRS_o_ai vec_stvrxl(vector short __a, int __b,
-                                    vector short *__c) {
+static __inline__ void __ATTRS_o_ai vec_stvrxl(vector short __a, int __b,
+                                               vector short *__c) {
   return vec_stl(
       vec_perm(__a, vec_lvlx(__b, __c), vec_lvsr(__b, (unsigned char *)__c)),
       __b, __c);
 }
 
-static void __ATTRS_o_ai vec_stvrxl(vector unsigned short __a, int __b,
-                                    unsigned short *__c) {
+static __inline__ void __ATTRS_o_ai vec_stvrxl(vector unsigned short __a,
+                                               int __b, unsigned short *__c) {
   return vec_stl(vec_perm(__a, vec_lvlx(__b, __c), vec_lvsr(__b, __c)), __b,
                  __c);
 }
 
-static void __ATTRS_o_ai vec_stvrxl(vector unsigned short __a, int __b,
-                                    vector unsigned short *__c) {
+static __inline__ void __ATTRS_o_ai vec_stvrxl(vector unsigned short __a,
+                                               int __b,
+                                               vector unsigned short *__c) {
   return vec_stl(
       vec_perm(__a, vec_lvlx(__b, __c), vec_lvsr(__b, (unsigned char *)__c)),
       __b, __c);
 }
 
-static void __ATTRS_o_ai vec_stvrxl(vector bool short __a, int __b,
-                                    vector bool short *__c) {
+static __inline__ void __ATTRS_o_ai vec_stvrxl(vector bool short __a, int __b,
+                                               vector bool short *__c) {
   return vec_stl(
       vec_perm(__a, vec_lvlx(__b, __c), vec_lvsr(__b, (unsigned char *)__c)),
       __b, __c);
 }
 
-static void __ATTRS_o_ai vec_stvrxl(vector pixel __a, int __b,
-                                    vector pixel *__c) {
+static __inline__ void __ATTRS_o_ai vec_stvrxl(vector pixel __a, int __b,
+                                               vector pixel *__c) {
   return vec_stl(
       vec_perm(__a, vec_lvlx(__b, __c), vec_lvsr(__b, (unsigned char *)__c)),
       __b, __c);
 }
 
-static void __ATTRS_o_ai vec_stvrxl(vector int __a, int __b, int *__c) {
+static __inline__ void __ATTRS_o_ai vec_stvrxl(vector int __a, int __b,
+                                               int *__c) {
   return vec_stl(vec_perm(__a, vec_lvlx(__b, __c), vec_lvsr(__b, __c)), __b,
                  __c);
 }
 
-static void __ATTRS_o_ai vec_stvrxl(vector int __a, int __b, vector int *__c) {
+static __inline__ void __ATTRS_o_ai vec_stvrxl(vector int __a, int __b,
+                                               vector int *__c) {
   return vec_stl(
       vec_perm(__a, vec_lvlx(__b, __c), vec_lvsr(__b, (unsigned char *)__c)),
       __b, __c);
 }
 
-static void __ATTRS_o_ai vec_stvrxl(vector unsigned int __a, int __b,
-                                    unsigned int *__c) {
+static __inline__ void __ATTRS_o_ai vec_stvrxl(vector unsigned int __a, int __b,
+                                               unsigned int *__c) {
   return vec_stl(vec_perm(__a, vec_lvlx(__b, __c), vec_lvsr(__b, __c)), __b,
                  __c);
 }
 
-static void __ATTRS_o_ai vec_stvrxl(vector unsigned int __a, int __b,
-                                    vector unsigned int *__c) {
+static __inline__ void __ATTRS_o_ai vec_stvrxl(vector unsigned int __a, int __b,
+                                               vector unsigned int *__c) {
   return vec_stl(
       vec_perm(__a, vec_lvlx(__b, __c), vec_lvsr(__b, (unsigned char *)__c)),
       __b, __c);
 }
 
-static void __ATTRS_o_ai vec_stvrxl(vector bool int __a, int __b,
-                                    vector bool int *__c) {
+static __inline__ void __ATTRS_o_ai vec_stvrxl(vector bool int __a, int __b,
+                                               vector bool int *__c) {
   return vec_stl(
       vec_perm(__a, vec_lvlx(__b, __c), vec_lvsr(__b, (unsigned char *)__c)),
       __b, __c);
 }
 
-static void __ATTRS_o_ai vec_stvrxl(vector float __a, int __b,
-                                    vector float *__c) {
+static __inline__ void __ATTRS_o_ai vec_stvrxl(vector float __a, int __b,
+                                               vector float *__c) {
   return vec_stl(
       vec_perm(__a, vec_lvlx(__b, __c), vec_lvsr(__b, (unsigned char *)__c)),
       __b, __c);
@@ -11530,45 +11918,47 @@ static void __ATTRS_o_ai vec_stvrxl(vector float __a, int __b,
 
 /* vec_promote */
 
-static vector signed char __ATTRS_o_ai vec_promote(signed char __a, int __b) {
+static __inline__ vector signed char __ATTRS_o_ai vec_promote(signed char __a,
+                                                              int __b) {
   vector signed char __res = (vector signed char)(0);
   __res[__b] = __a;
   return __res;
 }
 
-static vector unsigned char __ATTRS_o_ai vec_promote(unsigned char __a,
-                                                     int __b) {
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_promote(unsigned char __a, int __b) {
   vector unsigned char __res = (vector unsigned char)(0);
   __res[__b] = __a;
   return __res;
 }
 
-static vector short __ATTRS_o_ai vec_promote(short __a, int __b) {
+static __inline__ vector short __ATTRS_o_ai vec_promote(short __a, int __b) {
   vector short __res = (vector short)(0);
   __res[__b] = __a;
   return __res;
 }
 
-static vector unsigned short __ATTRS_o_ai vec_promote(unsigned short __a,
-                                                      int __b) {
+static __inline__ vector unsigned short __ATTRS_o_ai
+vec_promote(unsigned short __a, int __b) {
   vector unsigned short __res = (vector unsigned short)(0);
   __res[__b] = __a;
   return __res;
 }
 
-static vector int __ATTRS_o_ai vec_promote(int __a, int __b) {
+static __inline__ vector int __ATTRS_o_ai vec_promote(int __a, int __b) {
   vector int __res = (vector int)(0);
   __res[__b] = __a;
   return __res;
 }
 
-static vector unsigned int __ATTRS_o_ai vec_promote(unsigned int __a, int __b) {
+static __inline__ vector unsigned int __ATTRS_o_ai vec_promote(unsigned int __a,
+                                                               int __b) {
   vector unsigned int __res = (vector unsigned int)(0);
   __res[__b] = __a;
   return __res;
 }
 
-static vector float __ATTRS_o_ai vec_promote(float __a, int __b) {
+static __inline__ vector float __ATTRS_o_ai vec_promote(float __a, int __b) {
   vector float __res = (vector float)(0);
   __res[__b] = __a;
   return __res;
@@ -11576,56 +11966,63 @@ static vector float __ATTRS_o_ai vec_promote(float __a, int __b) {
 
 /* vec_splats */
 
-static vector signed char __ATTRS_o_ai vec_splats(signed char __a) {
+static __inline__ vector signed char __ATTRS_o_ai vec_splats(signed char __a) {
   return (vector signed char)(__a);
 }
 
-static vector unsigned char __ATTRS_o_ai vec_splats(unsigned char __a) {
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_splats(unsigned char __a) {
   return (vector unsigned char)(__a);
 }
 
-static vector short __ATTRS_o_ai vec_splats(short __a) {
+static __inline__ vector short __ATTRS_o_ai vec_splats(short __a) {
   return (vector short)(__a);
 }
 
-static vector unsigned short __ATTRS_o_ai vec_splats(unsigned short __a) {
+static __inline__ vector unsigned short __ATTRS_o_ai
+vec_splats(unsigned short __a) {
   return (vector unsigned short)(__a);
 }
 
-static vector int __ATTRS_o_ai vec_splats(int __a) { return (vector int)(__a); }
+static __inline__ vector int __ATTRS_o_ai vec_splats(int __a) {
+  return (vector int)(__a);
+}
 
-static vector unsigned int __ATTRS_o_ai vec_splats(unsigned int __a) {
+static __inline__ vector unsigned int __ATTRS_o_ai
+vec_splats(unsigned int __a) {
   return (vector unsigned int)(__a);
 }
 
 #ifdef __VSX__
-static vector signed long long __ATTRS_o_ai vec_splats(signed long long __a) {
+static __inline__ vector signed long long __ATTRS_o_ai
+vec_splats(signed long long __a) {
   return (vector signed long long)(__a);
 }
 
-static vector unsigned long long __ATTRS_o_ai
+static __inline__ vector unsigned long long __ATTRS_o_ai
 vec_splats(unsigned long long __a) {
   return (vector unsigned long long)(__a);
 }
 
 #if defined(__POWER8_VECTOR__) && defined(__powerpc64__)
-static vector signed __int128 __ATTRS_o_ai vec_splats(signed __int128 __a) {
+static __inline__ vector signed __int128 __ATTRS_o_ai
+vec_splats(signed __int128 __a) {
   return (vector signed __int128)(__a);
 }
 
-static vector unsigned __int128 __ATTRS_o_ai
+static __inline__ vector unsigned __int128 __ATTRS_o_ai
 vec_splats(unsigned __int128 __a) {
   return (vector unsigned __int128)(__a);
 }
 
 #endif
 
-static vector double __ATTRS_o_ai vec_splats(double __a) {
+static __inline__ vector double __ATTRS_o_ai vec_splats(double __a) {
   return (vector double)(__a);
 }
 #endif
 
-static vector float __ATTRS_o_ai vec_splats(float __a) {
+static __inline__ vector float __ATTRS_o_ai vec_splats(float __a) {
   return (vector float)(__a);
 }
 
@@ -11633,168 +12030,177 @@ static vector float __ATTRS_o_ai vec_splats(float __a) {
 
 /* vec_all_eq */
 
-static int __ATTRS_o_ai vec_all_eq(vector signed char __a,
-                                   vector signed char __b) {
+static __inline__ int __ATTRS_o_ai vec_all_eq(vector signed char __a,
+                                              vector signed char __b) {
   return __builtin_altivec_vcmpequb_p(__CR6_LT, (vector char)__a,
                                       (vector char)__b);
 }
 
-static int __ATTRS_o_ai vec_all_eq(vector signed char __a,
-                                   vector bool char __b) {
+static __inline__ int __ATTRS_o_ai vec_all_eq(vector signed char __a,
+                                              vector bool char __b) {
   return __builtin_altivec_vcmpequb_p(__CR6_LT, (vector char)__a,
                                       (vector char)__b);
 }
 
-static int __ATTRS_o_ai vec_all_eq(vector unsigned char __a,
-                                   vector unsigned char __b) {
+static __inline__ int __ATTRS_o_ai vec_all_eq(vector unsigned char __a,
+                                              vector unsigned char __b) {
   return __builtin_altivec_vcmpequb_p(__CR6_LT, (vector char)__a,
                                       (vector char)__b);
 }
 
-static int __ATTRS_o_ai vec_all_eq(vector unsigned char __a,
-                                   vector bool char __b) {
+static __inline__ int __ATTRS_o_ai vec_all_eq(vector unsigned char __a,
+                                              vector bool char __b) {
   return __builtin_altivec_vcmpequb_p(__CR6_LT, (vector char)__a,
                                       (vector char)__b);
 }
 
-static int __ATTRS_o_ai vec_all_eq(vector bool char __a,
-                                   vector signed char __b) {
+static __inline__ int __ATTRS_o_ai vec_all_eq(vector bool char __a,
+                                              vector signed char __b) {
   return __builtin_altivec_vcmpequb_p(__CR6_LT, (vector char)__a,
                                       (vector char)__b);
 }
 
-static int __ATTRS_o_ai vec_all_eq(vector bool char __a,
-                                   vector unsigned char __b) {
+static __inline__ int __ATTRS_o_ai vec_all_eq(vector bool char __a,
+                                              vector unsigned char __b) {
   return __builtin_altivec_vcmpequb_p(__CR6_LT, (vector char)__a,
                                       (vector char)__b);
 }
 
-static int __ATTRS_o_ai vec_all_eq(vector bool char __a, vector bool char __b) {
+static __inline__ int __ATTRS_o_ai vec_all_eq(vector bool char __a,
+                                              vector bool char __b) {
   return __builtin_altivec_vcmpequb_p(__CR6_LT, (vector char)__a,
                                       (vector char)__b);
 }
 
-static int __ATTRS_o_ai vec_all_eq(vector short __a, vector short __b) {
+static __inline__ int __ATTRS_o_ai vec_all_eq(vector short __a,
+                                              vector short __b) {
   return __builtin_altivec_vcmpequh_p(__CR6_LT, __a, __b);
 }
 
-static int __ATTRS_o_ai vec_all_eq(vector short __a, vector bool short __b) {
+static __inline__ int __ATTRS_o_ai vec_all_eq(vector short __a,
+                                              vector bool short __b) {
   return __builtin_altivec_vcmpequh_p(__CR6_LT, __a, (vector short)__b);
 }
 
-static int __ATTRS_o_ai vec_all_eq(vector unsigned short __a,
-                                   vector unsigned short __b) {
+static __inline__ int __ATTRS_o_ai vec_all_eq(vector unsigned short __a,
+                                              vector unsigned short __b) {
   return __builtin_altivec_vcmpequh_p(__CR6_LT, (vector short)__a,
                                       (vector short)__b);
 }
 
-static int __ATTRS_o_ai vec_all_eq(vector unsigned short __a,
-                                   vector bool short __b) {
+static __inline__ int __ATTRS_o_ai vec_all_eq(vector unsigned short __a,
+                                              vector bool short __b) {
   return __builtin_altivec_vcmpequh_p(__CR6_LT, (vector short)__a,
                                       (vector short)__b);
 }
 
-static int __ATTRS_o_ai vec_all_eq(vector bool short __a, vector short __b) {
+static __inline__ int __ATTRS_o_ai vec_all_eq(vector bool short __a,
+                                              vector short __b) {
   return __builtin_altivec_vcmpequh_p(__CR6_LT, (vector short)__a,
                                       (vector short)__b);
 }
 
-static int __ATTRS_o_ai vec_all_eq(vector bool short __a,
-                                   vector unsigned short __b) {
+static __inline__ int __ATTRS_o_ai vec_all_eq(vector bool short __a,
+                                              vector unsigned short __b) {
   return __builtin_altivec_vcmpequh_p(__CR6_LT, (vector short)__a,
                                       (vector short)__b);
 }
 
-static int __ATTRS_o_ai vec_all_eq(vector bool short __a,
-                                   vector bool short __b) {
+static __inline__ int __ATTRS_o_ai vec_all_eq(vector bool short __a,
+                                              vector bool short __b) {
   return __builtin_altivec_vcmpequh_p(__CR6_LT, (vector short)__a,
                                       (vector short)__b);
 }
 
-static int __ATTRS_o_ai vec_all_eq(vector pixel __a, vector pixel __b) {
+static __inline__ int __ATTRS_o_ai vec_all_eq(vector pixel __a,
+                                              vector pixel __b) {
   return __builtin_altivec_vcmpequh_p(__CR6_LT, (vector short)__a,
                                       (vector short)__b);
 }
 
-static int __ATTRS_o_ai vec_all_eq(vector int __a, vector int __b) {
+static __inline__ int __ATTRS_o_ai vec_all_eq(vector int __a, vector int __b) {
   return __builtin_altivec_vcmpequw_p(__CR6_LT, __a, __b);
 }
 
-static int __ATTRS_o_ai vec_all_eq(vector int __a, vector bool int __b) {
+static __inline__ int __ATTRS_o_ai vec_all_eq(vector int __a,
+                                              vector bool int __b) {
   return __builtin_altivec_vcmpequw_p(__CR6_LT, __a, (vector int)__b);
 }
 
-static int __ATTRS_o_ai vec_all_eq(vector unsigned int __a,
-                                   vector unsigned int __b) {
+static __inline__ int __ATTRS_o_ai vec_all_eq(vector unsigned int __a,
+                                              vector unsigned int __b) {
   return __builtin_altivec_vcmpequw_p(__CR6_LT, (vector int)__a,
                                       (vector int)__b);
 }
 
-static int __ATTRS_o_ai vec_all_eq(vector unsigned int __a,
-                                   vector bool int __b) {
+static __inline__ int __ATTRS_o_ai vec_all_eq(vector unsigned int __a,
+                                              vector bool int __b) {
   return __builtin_altivec_vcmpequw_p(__CR6_LT, (vector int)__a,
                                       (vector int)__b);
 }
 
-static int __ATTRS_o_ai vec_all_eq(vector bool int __a, vector int __b) {
+static __inline__ int __ATTRS_o_ai vec_all_eq(vector bool int __a,
+                                              vector int __b) {
   return __builtin_altivec_vcmpequw_p(__CR6_LT, (vector int)__a,
                                       (vector int)__b);
 }
 
-static int __ATTRS_o_ai vec_all_eq(vector bool int __a,
-                                   vector unsigned int __b) {
+static __inline__ int __ATTRS_o_ai vec_all_eq(vector bool int __a,
+                                              vector unsigned int __b) {
   return __builtin_altivec_vcmpequw_p(__CR6_LT, (vector int)__a,
                                       (vector int)__b);
 }
 
-static int __ATTRS_o_ai vec_all_eq(vector bool int __a, vector bool int __b) {
+static __inline__ int __ATTRS_o_ai vec_all_eq(vector bool int __a,
+                                              vector bool int __b) {
   return __builtin_altivec_vcmpequw_p(__CR6_LT, (vector int)__a,
                                       (vector int)__b);
 }
 
 #ifdef __POWER8_VECTOR__
-static int __ATTRS_o_ai vec_all_eq(vector signed long long __a,
-                                   vector signed long long __b) {
+static __inline__ int __ATTRS_o_ai vec_all_eq(vector signed long long __a,
+                                              vector signed long long __b) {
   return __builtin_altivec_vcmpequd_p(__CR6_LT, __a, __b);
 }
 
-static int __ATTRS_o_ai vec_all_eq(vector long long __a,
-                                   vector bool long long __b) {
+static __inline__ int __ATTRS_o_ai vec_all_eq(vector long long __a,
+                                              vector bool long long __b) {
   return __builtin_altivec_vcmpequd_p(__CR6_LT, __a, (vector long long)__b);
 }
 
-static int __ATTRS_o_ai vec_all_eq(vector unsigned long long __a,
-                                   vector unsigned long long __b) {
+static __inline__ int __ATTRS_o_ai vec_all_eq(vector unsigned long long __a,
+                                              vector unsigned long long __b) {
   return __builtin_altivec_vcmpequd_p(__CR6_LT, (vector long long)__a,
                                       (vector long long)__b);
 }
 
-static int __ATTRS_o_ai vec_all_eq(vector unsigned long long __a,
-                                   vector bool long long __b) {
+static __inline__ int __ATTRS_o_ai vec_all_eq(vector unsigned long long __a,
+                                              vector bool long long __b) {
   return __builtin_altivec_vcmpequd_p(__CR6_LT, (vector long long)__a,
                                       (vector long long)__b);
 }
 
-static int __ATTRS_o_ai vec_all_eq(vector bool long long __a,
-                                   vector long long __b) {
+static __inline__ int __ATTRS_o_ai vec_all_eq(vector bool long long __a,
+                                              vector long long __b) {
   return __builtin_altivec_vcmpequd_p(__CR6_LT, (vector long long)__a,
                                       (vector long long)__b);
 }
 
-static int __ATTRS_o_ai vec_all_eq(vector bool long long __a,
-                                   vector unsigned long long __b) {
+static __inline__ int __ATTRS_o_ai vec_all_eq(vector bool long long __a,
+                                              vector unsigned long long __b) {
   return __builtin_altivec_vcmpequd_p(__CR6_LT, (vector long long)__a,
                                       (vector long long)__b);
 }
 
-static int __ATTRS_o_ai vec_all_eq(vector bool long long __a,
-                                   vector bool long long __b) {
+static __inline__ int __ATTRS_o_ai vec_all_eq(vector bool long long __a,
+                                              vector bool long long __b) {
   return __builtin_altivec_vcmpequd_p(__CR6_LT, (vector long long)__a,
                                       (vector long long)__b);
 }
 #endif
 
-static int __ATTRS_o_ai vec_all_eq(vector float __a, vector float __b) {
+static __inline__ int __ATTRS_o_ai vec_all_eq(vector float __a,
+                                              vector float __b) {
 #ifdef __VSX__
   return __builtin_vsx_xvcmpeqsp_p(__CR6_LT, __a, __b);
 #else
@@ -11803,160 +12209,169 @@ static int __ATTRS_o_ai vec_all_eq(vector float __a, vector float __b) {
 }
 
 #ifdef __VSX__
-static int __ATTRS_o_ai vec_all_eq(vector double __a, vector double __b) {
+static __inline__ int __ATTRS_o_ai vec_all_eq(vector double __a,
+                                              vector double __b) {
   return __builtin_vsx_xvcmpeqdp_p(__CR6_LT, __a, __b);
 }
 #endif
 
 /* vec_all_ge */
 
-static int __ATTRS_o_ai vec_all_ge(vector signed char __a,
-                                   vector signed char __b) {
+static __inline__ int __ATTRS_o_ai vec_all_ge(vector signed char __a,
+                                              vector signed char __b) {
   return __builtin_altivec_vcmpgtsb_p(__CR6_EQ, __b, __a);
 }
 
-static int __ATTRS_o_ai vec_all_ge(vector signed char __a,
-                                   vector bool char __b) {
+static __inline__ int __ATTRS_o_ai vec_all_ge(vector signed char __a,
+                                              vector bool char __b) {
   return __builtin_altivec_vcmpgtsb_p(__CR6_EQ, (vector signed char)__b, __a);
 }
 
-static int __ATTRS_o_ai vec_all_ge(vector unsigned char __a,
-                                   vector unsigned char __b) {
+static __inline__ int __ATTRS_o_ai vec_all_ge(vector unsigned char __a,
+                                              vector unsigned char __b) {
   return __builtin_altivec_vcmpgtub_p(__CR6_EQ, __b, __a);
 }
 
-static int __ATTRS_o_ai vec_all_ge(vector unsigned char __a,
-                                   vector bool char __b) {
+static __inline__ int __ATTRS_o_ai vec_all_ge(vector unsigned char __a,
+                                              vector bool char __b) {
   return __builtin_altivec_vcmpgtub_p(__CR6_EQ, (vector unsigned char)__b, __a);
 }
 
-static int __ATTRS_o_ai vec_all_ge(vector bool char __a,
-                                   vector signed char __b) {
+static __inline__ int __ATTRS_o_ai vec_all_ge(vector bool char __a,
+                                              vector signed char __b) {
   return __builtin_altivec_vcmpgtub_p(__CR6_EQ, (vector unsigned char)__b,
                                       (vector unsigned char)__a);
 }
 
-static int __ATTRS_o_ai vec_all_ge(vector bool char __a,
-                                   vector unsigned char __b) {
+static __inline__ int __ATTRS_o_ai vec_all_ge(vector bool char __a,
+                                              vector unsigned char __b) {
   return __builtin_altivec_vcmpgtub_p(__CR6_EQ, __b, (vector unsigned char)__a);
 }
 
-static int __ATTRS_o_ai vec_all_ge(vector bool char __a, vector bool char __b) {
+static __inline__ int __ATTRS_o_ai vec_all_ge(vector bool char __a,
+                                              vector bool char __b) {
   return __builtin_altivec_vcmpgtub_p(__CR6_EQ, (vector unsigned char)__b,
                                       (vector unsigned char)__a);
 }
 
-static int __ATTRS_o_ai vec_all_ge(vector short __a, vector short __b) {
+static __inline__ int __ATTRS_o_ai vec_all_ge(vector short __a,
+                                              vector short __b) {
   return __builtin_altivec_vcmpgtsh_p(__CR6_EQ, __b, __a);
 }
 
-static int __ATTRS_o_ai vec_all_ge(vector short __a, vector bool short __b) {
+static __inline__ int __ATTRS_o_ai vec_all_ge(vector short __a,
+                                              vector bool short __b) {
   return __builtin_altivec_vcmpgtsh_p(__CR6_EQ, (vector short)__b, __a);
 }
 
-static int __ATTRS_o_ai vec_all_ge(vector unsigned short __a,
-                                   vector unsigned short __b) {
+static __inline__ int __ATTRS_o_ai vec_all_ge(vector unsigned short __a,
+                                              vector unsigned short __b) {
   return __builtin_altivec_vcmpgtuh_p(__CR6_EQ, __b, __a);
 }
 
-static int __ATTRS_o_ai vec_all_ge(vector unsigned short __a,
-                                   vector bool short __b) {
+static __inline__ int __ATTRS_o_ai vec_all_ge(vector unsigned short __a,
+                                              vector bool short __b) {
   return __builtin_altivec_vcmpgtuh_p(__CR6_EQ, (vector unsigned short)__b,
                                       __a);
 }
 
-static int __ATTRS_o_ai vec_all_ge(vector bool short __a, vector short __b) {
+static __inline__ int __ATTRS_o_ai vec_all_ge(vector bool short __a,
+                                              vector short __b) {
   return __builtin_altivec_vcmpgtuh_p(__CR6_EQ, (vector unsigned short)__b,
                                       (vector unsigned short)__a);
 }
 
-static int __ATTRS_o_ai vec_all_ge(vector bool short __a,
-                                   vector unsigned short __b) {
+static __inline__ int __ATTRS_o_ai vec_all_ge(vector bool short __a,
+                                              vector unsigned short __b) {
   return __builtin_altivec_vcmpgtuh_p(__CR6_EQ, __b,
                                       (vector unsigned short)__a);
 }
 
-static int __ATTRS_o_ai vec_all_ge(vector bool short __a,
-                                   vector bool short __b) {
+static __inline__ int __ATTRS_o_ai vec_all_ge(vector bool short __a,
+                                              vector bool short __b) {
   return __builtin_altivec_vcmpgtuh_p(__CR6_EQ, (vector unsigned short)__b,
                                       (vector unsigned short)__a);
 }
 
-static int __ATTRS_o_ai vec_all_ge(vector int __a, vector int __b) {
+static __inline__ int __ATTRS_o_ai vec_all_ge(vector int __a, vector int __b) {
   return __builtin_altivec_vcmpgtsw_p(__CR6_EQ, __b, __a);
 }
 
-static int __ATTRS_o_ai vec_all_ge(vector int __a, vector bool int __b) {
+static __inline__ int __ATTRS_o_ai vec_all_ge(vector int __a,
+                                              vector bool int __b) {
   return __builtin_altivec_vcmpgtsw_p(__CR6_EQ, (vector int)__b, __a);
 }
 
-static int __ATTRS_o_ai vec_all_ge(vector unsigned int __a,
-                                   vector unsigned int __b) {
+static __inline__ int __ATTRS_o_ai vec_all_ge(vector unsigned int __a,
+                                              vector unsigned int __b) {
   return __builtin_altivec_vcmpgtuw_p(__CR6_EQ, __b, __a);
 }
 
-static int __ATTRS_o_ai vec_all_ge(vector unsigned int __a,
-                                   vector bool int __b) {
+static __inline__ int __ATTRS_o_ai vec_all_ge(vector unsigned int __a,
+                                              vector bool int __b) {
   return __builtin_altivec_vcmpgtuw_p(__CR6_EQ, (vector unsigned int)__b, __a);
 }
 
-static int __ATTRS_o_ai vec_all_ge(vector bool int __a, vector int __b) {
+static __inline__ int __ATTRS_o_ai vec_all_ge(vector bool int __a,
+                                              vector int __b) {
   return __builtin_altivec_vcmpgtuw_p(__CR6_EQ, (vector unsigned int)__b,
                                       (vector unsigned int)__a);
 }
 
-static int __ATTRS_o_ai vec_all_ge(vector bool int __a,
-                                   vector unsigned int __b) {
+static __inline__ int __ATTRS_o_ai vec_all_ge(vector bool int __a,
+                                              vector unsigned int __b) {
   return __builtin_altivec_vcmpgtuw_p(__CR6_EQ, __b, (vector unsigned int)__a);
 }
 
-static int __ATTRS_o_ai vec_all_ge(vector bool int __a, vector bool int __b) {
+static __inline__ int __ATTRS_o_ai vec_all_ge(vector bool int __a,
+                                              vector bool int __b) {
   return __builtin_altivec_vcmpgtuw_p(__CR6_EQ, (vector unsigned int)__b,
                                       (vector unsigned int)__a);
 }
 
 #ifdef __POWER8_VECTOR__
-static int __ATTRS_o_ai vec_all_ge(vector signed long long __a,
-                                   vector signed long long __b) {
+static __inline__ int __ATTRS_o_ai vec_all_ge(vector signed long long __a,
+                                              vector signed long long __b) {
   return __builtin_altivec_vcmpgtsd_p(__CR6_EQ, __b, __a);
 }
-static int __ATTRS_o_ai vec_all_ge(vector signed long long __a,
-                                   vector bool long long __b) {
+static __inline__ int __ATTRS_o_ai vec_all_ge(vector signed long long __a,
+                                              vector bool long long __b) {
   return __builtin_altivec_vcmpgtsd_p(__CR6_EQ, (vector signed long long)__b,
                                       __a);
 }
 
-static int __ATTRS_o_ai vec_all_ge(vector unsigned long long __a,
-                                   vector unsigned long long __b) {
+static __inline__ int __ATTRS_o_ai vec_all_ge(vector unsigned long long __a,
+                                              vector unsigned long long __b) {
   return __builtin_altivec_vcmpgtud_p(__CR6_EQ, __b, __a);
 }
 
-static int __ATTRS_o_ai vec_all_ge(vector unsigned long long __a,
-                                   vector bool long long __b) {
+static __inline__ int __ATTRS_o_ai vec_all_ge(vector unsigned long long __a,
+                                              vector bool long long __b) {
   return __builtin_altivec_vcmpgtud_p(__CR6_EQ, (vector unsigned long long)__b,
                                       __a);
 }
 
-static int __ATTRS_o_ai vec_all_ge(vector bool long long __a,
-                                   vector signed long long __b) {
+static __inline__ int __ATTRS_o_ai vec_all_ge(vector bool long long __a,
+                                              vector signed long long __b) {
   return __builtin_altivec_vcmpgtud_p(__CR6_EQ, (vector unsigned long long)__b,
                                       (vector unsigned long long)__a);
 }
 
-static int __ATTRS_o_ai vec_all_ge(vector bool long long __a,
-                                   vector unsigned long long __b) {
+static __inline__ int __ATTRS_o_ai vec_all_ge(vector bool long long __a,
+                                              vector unsigned long long __b) {
   return __builtin_altivec_vcmpgtud_p(__CR6_EQ, __b,
                                       (vector unsigned long long)__a);
 }
 
-static int __ATTRS_o_ai vec_all_ge(vector bool long long __a,
-                                   vector bool long long __b) {
+static __inline__ int __ATTRS_o_ai vec_all_ge(vector bool long long __a,
+                                              vector bool long long __b) {
   return __builtin_altivec_vcmpgtud_p(__CR6_EQ, (vector unsigned long long)__b,
                                       (vector unsigned long long)__a);
 }
 #endif
 
-static int __ATTRS_o_ai vec_all_ge(vector float __a, vector float __b) {
+static __inline__ int __ATTRS_o_ai vec_all_ge(vector float __a,
+                                              vector float __b) {
 #ifdef __VSX__
   return __builtin_vsx_xvcmpgesp_p(__CR6_LT, __a, __b);
 #else
@@ -11965,160 +12380,169 @@ static int __ATTRS_o_ai vec_all_ge(vector float __a, vector float __b) {
 }
 
 #ifdef __VSX__
-static int __ATTRS_o_ai vec_all_ge(vector double __a, vector double __b) {
+static __inline__ int __ATTRS_o_ai vec_all_ge(vector double __a,
+                                              vector double __b) {
   return __builtin_vsx_xvcmpgedp_p(__CR6_LT, __a, __b);
 }
 #endif
 
 /* vec_all_gt */
 
-static int __ATTRS_o_ai vec_all_gt(vector signed char __a,
-                                   vector signed char __b) {
+static __inline__ int __ATTRS_o_ai vec_all_gt(vector signed char __a,
+                                              vector signed char __b) {
   return __builtin_altivec_vcmpgtsb_p(__CR6_LT, __a, __b);
 }
 
-static int __ATTRS_o_ai vec_all_gt(vector signed char __a,
-                                   vector bool char __b) {
+static __inline__ int __ATTRS_o_ai vec_all_gt(vector signed char __a,
+                                              vector bool char __b) {
   return __builtin_altivec_vcmpgtsb_p(__CR6_LT, __a, (vector signed char)__b);
 }
 
-static int __ATTRS_o_ai vec_all_gt(vector unsigned char __a,
-                                   vector unsigned char __b) {
+static __inline__ int __ATTRS_o_ai vec_all_gt(vector unsigned char __a,
+                                              vector unsigned char __b) {
   return __builtin_altivec_vcmpgtub_p(__CR6_LT, __a, __b);
 }
 
-static int __ATTRS_o_ai vec_all_gt(vector unsigned char __a,
-                                   vector bool char __b) {
+static __inline__ int __ATTRS_o_ai vec_all_gt(vector unsigned char __a,
+                                              vector bool char __b) {
   return __builtin_altivec_vcmpgtub_p(__CR6_LT, __a, (vector unsigned char)__b);
 }
 
-static int __ATTRS_o_ai vec_all_gt(vector bool char __a,
-                                   vector signed char __b) {
+static __inline__ int __ATTRS_o_ai vec_all_gt(vector bool char __a,
+                                              vector signed char __b) {
   return __builtin_altivec_vcmpgtub_p(__CR6_LT, (vector unsigned char)__a,
                                       (vector unsigned char)__b);
 }
 
-static int __ATTRS_o_ai vec_all_gt(vector bool char __a,
-                                   vector unsigned char __b) {
+static __inline__ int __ATTRS_o_ai vec_all_gt(vector bool char __a,
+                                              vector unsigned char __b) {
   return __builtin_altivec_vcmpgtub_p(__CR6_LT, (vector unsigned char)__a, __b);
 }
 
-static int __ATTRS_o_ai vec_all_gt(vector bool char __a, vector bool char __b) {
+static __inline__ int __ATTRS_o_ai vec_all_gt(vector bool char __a,
+                                              vector bool char __b) {
   return __builtin_altivec_vcmpgtub_p(__CR6_LT, (vector unsigned char)__a,
                                       (vector unsigned char)__b);
 }
 
-static int __ATTRS_o_ai vec_all_gt(vector short __a, vector short __b) {
+static __inline__ int __ATTRS_o_ai vec_all_gt(vector short __a,
+                                              vector short __b) {
   return __builtin_altivec_vcmpgtsh_p(__CR6_LT, __a, __b);
 }
 
-static int __ATTRS_o_ai vec_all_gt(vector short __a, vector bool short __b) {
+static __inline__ int __ATTRS_o_ai vec_all_gt(vector short __a,
+                                              vector bool short __b) {
   return __builtin_altivec_vcmpgtsh_p(__CR6_LT, __a, (vector short)__b);
 }
 
-static int __ATTRS_o_ai vec_all_gt(vector unsigned short __a,
-                                   vector unsigned short __b) {
+static __inline__ int __ATTRS_o_ai vec_all_gt(vector unsigned short __a,
+                                              vector unsigned short __b) {
   return __builtin_altivec_vcmpgtuh_p(__CR6_LT, __a, __b);
 }
 
-static int __ATTRS_o_ai vec_all_gt(vector unsigned short __a,
-                                   vector bool short __b) {
+static __inline__ int __ATTRS_o_ai vec_all_gt(vector unsigned short __a,
+                                              vector bool short __b) {
   return __builtin_altivec_vcmpgtuh_p(__CR6_LT, __a,
                                       (vector unsigned short)__b);
 }
 
-static int __ATTRS_o_ai vec_all_gt(vector bool short __a, vector short __b) {
+static __inline__ int __ATTRS_o_ai vec_all_gt(vector bool short __a,
+                                              vector short __b) {
   return __builtin_altivec_vcmpgtuh_p(__CR6_LT, (vector unsigned short)__a,
                                       (vector unsigned short)__b);
 }
 
-static int __ATTRS_o_ai vec_all_gt(vector bool short __a,
-                                   vector unsigned short __b) {
+static __inline__ int __ATTRS_o_ai vec_all_gt(vector bool short __a,
+                                              vector unsigned short __b) {
   return __builtin_altivec_vcmpgtuh_p(__CR6_LT, (vector unsigned short)__a,
                                       __b);
 }
 
-static int __ATTRS_o_ai vec_all_gt(vector bool short __a,
-                                   vector bool short __b) {
+static __inline__ int __ATTRS_o_ai vec_all_gt(vector bool short __a,
+                                              vector bool short __b) {
   return __builtin_altivec_vcmpgtuh_p(__CR6_LT, (vector unsigned short)__a,
                                       (vector unsigned short)__b);
 }
 
-static int __ATTRS_o_ai vec_all_gt(vector int __a, vector int __b) {
+static __inline__ int __ATTRS_o_ai vec_all_gt(vector int __a, vector int __b) {
   return __builtin_altivec_vcmpgtsw_p(__CR6_LT, __a, __b);
 }
 
-static int __ATTRS_o_ai vec_all_gt(vector int __a, vector bool int __b) {
+static __inline__ int __ATTRS_o_ai vec_all_gt(vector int __a,
+                                              vector bool int __b) {
   return __builtin_altivec_vcmpgtsw_p(__CR6_LT, __a, (vector int)__b);
 }
 
-static int __ATTRS_o_ai vec_all_gt(vector unsigned int __a,
-                                   vector unsigned int __b) {
+static __inline__ int __ATTRS_o_ai vec_all_gt(vector unsigned int __a,
+                                              vector unsigned int __b) {
   return __builtin_altivec_vcmpgtuw_p(__CR6_LT, __a, __b);
 }
 
-static int __ATTRS_o_ai vec_all_gt(vector unsigned int __a,
-                                   vector bool int __b) {
+static __inline__ int __ATTRS_o_ai vec_all_gt(vector unsigned int __a,
+                                              vector bool int __b) {
   return __builtin_altivec_vcmpgtuw_p(__CR6_LT, __a, (vector unsigned int)__b);
 }
 
-static int __ATTRS_o_ai vec_all_gt(vector bool int __a, vector int __b) {
+static __inline__ int __ATTRS_o_ai vec_all_gt(vector bool int __a,
+                                              vector int __b) {
   return __builtin_altivec_vcmpgtuw_p(__CR6_LT, (vector unsigned int)__a,
                                       (vector unsigned int)__b);
 }
 
-static int __ATTRS_o_ai vec_all_gt(vector bool int __a,
-                                   vector unsigned int __b) {
+static __inline__ int __ATTRS_o_ai vec_all_gt(vector bool int __a,
+                                              vector unsigned int __b) {
   return __builtin_altivec_vcmpgtuw_p(__CR6_LT, (vector unsigned int)__a, __b);
 }
 
-static int __ATTRS_o_ai vec_all_gt(vector bool int __a, vector bool int __b) {
+static __inline__ int __ATTRS_o_ai vec_all_gt(vector bool int __a,
+                                              vector bool int __b) {
   return __builtin_altivec_vcmpgtuw_p(__CR6_LT, (vector unsigned int)__a,
                                       (vector unsigned int)__b);
 }
 
 #ifdef __POWER8_VECTOR__
-static int __ATTRS_o_ai vec_all_gt(vector signed long long __a,
-                                   vector signed long long __b) {
+static __inline__ int __ATTRS_o_ai vec_all_gt(vector signed long long __a,
+                                              vector signed long long __b) {
   return __builtin_altivec_vcmpgtsd_p(__CR6_LT, __a, __b);
 }
-static int __ATTRS_o_ai vec_all_gt(vector signed long long __a,
-                                   vector bool long long __b) {
+static __inline__ int __ATTRS_o_ai vec_all_gt(vector signed long long __a,
+                                              vector bool long long __b) {
   return __builtin_altivec_vcmpgtsd_p(__CR6_LT, __a,
                                       (vector signed long long)__b);
 }
 
-static int __ATTRS_o_ai vec_all_gt(vector unsigned long long __a,
-                                   vector unsigned long long __b) {
+static __inline__ int __ATTRS_o_ai vec_all_gt(vector unsigned long long __a,
+                                              vector unsigned long long __b) {
   return __builtin_altivec_vcmpgtud_p(__CR6_LT, __a, __b);
 }
 
-static int __ATTRS_o_ai vec_all_gt(vector unsigned long long __a,
-                                   vector bool long long __b) {
+static __inline__ int __ATTRS_o_ai vec_all_gt(vector unsigned long long __a,
+                                              vector bool long long __b) {
   return __builtin_altivec_vcmpgtud_p(__CR6_LT, __a,
                                       (vector unsigned long long)__b);
 }
 
-static int __ATTRS_o_ai vec_all_gt(vector bool long long __a,
-                                   vector signed long long __b) {
+static __inline__ int __ATTRS_o_ai vec_all_gt(vector bool long long __a,
+                                              vector signed long long __b) {
   return __builtin_altivec_vcmpgtud_p(__CR6_LT, (vector unsigned long long)__a,
                                       (vector unsigned long long)__b);
 }
 
-static int __ATTRS_o_ai vec_all_gt(vector bool long long __a,
-                                   vector unsigned long long __b) {
+static __inline__ int __ATTRS_o_ai vec_all_gt(vector bool long long __a,
+                                              vector unsigned long long __b) {
   return __builtin_altivec_vcmpgtud_p(__CR6_LT, (vector unsigned long long)__a,
                                       __b);
 }
 
-static int __ATTRS_o_ai vec_all_gt(vector bool long long __a,
-                                   vector bool long long __b) {
+static __inline__ int __ATTRS_o_ai vec_all_gt(vector bool long long __a,
+                                              vector bool long long __b) {
   return __builtin_altivec_vcmpgtud_p(__CR6_LT, (vector unsigned long long)__a,
                                       (vector unsigned long long)__b);
 }
 #endif
 
-static int __ATTRS_o_ai vec_all_gt(vector float __a, vector float __b) {
+static __inline__ int __ATTRS_o_ai vec_all_gt(vector float __a,
+                                              vector float __b) {
 #ifdef __VSX__
   return __builtin_vsx_xvcmpgtsp_p(__CR6_LT, __a, __b);
 #else
@@ -12127,168 +12551,177 @@ static int __ATTRS_o_ai vec_all_gt(vector float __a, vector float __b) {
 }
 
 #ifdef __VSX__
-static int __ATTRS_o_ai vec_all_gt(vector double __a, vector double __b) {
+static __inline__ int __ATTRS_o_ai vec_all_gt(vector double __a,
+                                              vector double __b) {
   return __builtin_vsx_xvcmpgtdp_p(__CR6_LT, __a, __b);
 }
 #endif
 
 /* vec_all_in */
 
-static int __attribute__((__always_inline__))
+static __inline__ int __attribute__((__always_inline__))
 vec_all_in(vector float __a, vector float __b) {
   return __builtin_altivec_vcmpbfp_p(__CR6_EQ, __a, __b);
 }
 
 /* vec_all_le */
 
-static int __ATTRS_o_ai vec_all_le(vector signed char __a,
-                                   vector signed char __b) {
+static __inline__ int __ATTRS_o_ai vec_all_le(vector signed char __a,
+                                              vector signed char __b) {
   return __builtin_altivec_vcmpgtsb_p(__CR6_EQ, __a, __b);
 }
 
-static int __ATTRS_o_ai vec_all_le(vector signed char __a,
-                                   vector bool char __b) {
+static __inline__ int __ATTRS_o_ai vec_all_le(vector signed char __a,
+                                              vector bool char __b) {
   return __builtin_altivec_vcmpgtsb_p(__CR6_EQ, __a, (vector signed char)__b);
 }
 
-static int __ATTRS_o_ai vec_all_le(vector unsigned char __a,
-                                   vector unsigned char __b) {
+static __inline__ int __ATTRS_o_ai vec_all_le(vector unsigned char __a,
+                                              vector unsigned char __b) {
   return __builtin_altivec_vcmpgtub_p(__CR6_EQ, __a, __b);
 }
 
-static int __ATTRS_o_ai vec_all_le(vector unsigned char __a,
-                                   vector bool char __b) {
+static __inline__ int __ATTRS_o_ai vec_all_le(vector unsigned char __a,
+                                              vector bool char __b) {
   return __builtin_altivec_vcmpgtub_p(__CR6_EQ, __a, (vector unsigned char)__b);
 }
 
-static int __ATTRS_o_ai vec_all_le(vector bool char __a,
-                                   vector signed char __b) {
+static __inline__ int __ATTRS_o_ai vec_all_le(vector bool char __a,
+                                              vector signed char __b) {
   return __builtin_altivec_vcmpgtub_p(__CR6_EQ, (vector unsigned char)__a,
                                       (vector unsigned char)__b);
 }
 
-static int __ATTRS_o_ai vec_all_le(vector bool char __a,
-                                   vector unsigned char __b) {
+static __inline__ int __ATTRS_o_ai vec_all_le(vector bool char __a,
+                                              vector unsigned char __b) {
   return __builtin_altivec_vcmpgtub_p(__CR6_EQ, (vector unsigned char)__a, __b);
 }
 
-static int __ATTRS_o_ai vec_all_le(vector bool char __a, vector bool char __b) {
+static __inline__ int __ATTRS_o_ai vec_all_le(vector bool char __a,
+                                              vector bool char __b) {
   return __builtin_altivec_vcmpgtub_p(__CR6_EQ, (vector unsigned char)__a,
                                       (vector unsigned char)__b);
 }
 
-static int __ATTRS_o_ai vec_all_le(vector short __a, vector short __b) {
+static __inline__ int __ATTRS_o_ai vec_all_le(vector short __a,
+                                              vector short __b) {
   return __builtin_altivec_vcmpgtsh_p(__CR6_EQ, __a, __b);
 }
 
-static int __ATTRS_o_ai vec_all_le(vector short __a, vector bool short __b) {
+static __inline__ int __ATTRS_o_ai vec_all_le(vector short __a,
+                                              vector bool short __b) {
   return __builtin_altivec_vcmpgtsh_p(__CR6_EQ, __a, (vector short)__b);
 }
 
-static int __ATTRS_o_ai vec_all_le(vector unsigned short __a,
-                                   vector unsigned short __b) {
+static __inline__ int __ATTRS_o_ai vec_all_le(vector unsigned short __a,
+                                              vector unsigned short __b) {
   return __builtin_altivec_vcmpgtuh_p(__CR6_EQ, __a, __b);
 }
 
-static int __ATTRS_o_ai vec_all_le(vector unsigned short __a,
-                                   vector bool short __b) {
+static __inline__ int __ATTRS_o_ai vec_all_le(vector unsigned short __a,
+                                              vector bool short __b) {
   return __builtin_altivec_vcmpgtuh_p(__CR6_EQ, __a,
                                       (vector unsigned short)__b);
 }
 
-static int __ATTRS_o_ai vec_all_le(vector bool short __a, vector short __b) {
+static __inline__ int __ATTRS_o_ai vec_all_le(vector bool short __a,
+                                              vector short __b) {
   return __builtin_altivec_vcmpgtuh_p(__CR6_EQ, (vector unsigned short)__a,
                                       (vector unsigned short)__b);
 }
 
-static int __ATTRS_o_ai vec_all_le(vector bool short __a,
-                                   vector unsigned short __b) {
+static __inline__ int __ATTRS_o_ai vec_all_le(vector bool short __a,
+                                              vector unsigned short __b) {
   return __builtin_altivec_vcmpgtuh_p(__CR6_EQ, (vector unsigned short)__a,
                                       __b);
 }
 
-static int __ATTRS_o_ai vec_all_le(vector bool short __a,
-                                   vector bool short __b) {
+static __inline__ int __ATTRS_o_ai vec_all_le(vector bool short __a,
+                                              vector bool short __b) {
   return __builtin_altivec_vcmpgtuh_p(__CR6_EQ, (vector unsigned short)__a,
                                       (vector unsigned short)__b);
 }
 
-static int __ATTRS_o_ai vec_all_le(vector int __a, vector int __b) {
+static __inline__ int __ATTRS_o_ai vec_all_le(vector int __a, vector int __b) {
   return __builtin_altivec_vcmpgtsw_p(__CR6_EQ, __a, __b);
 }
 
-static int __ATTRS_o_ai vec_all_le(vector int __a, vector bool int __b) {
+static __inline__ int __ATTRS_o_ai vec_all_le(vector int __a,
+                                              vector bool int __b) {
   return __builtin_altivec_vcmpgtsw_p(__CR6_EQ, __a, (vector int)__b);
 }
 
-static int __ATTRS_o_ai vec_all_le(vector unsigned int __a,
-                                   vector unsigned int __b) {
+static __inline__ int __ATTRS_o_ai vec_all_le(vector unsigned int __a,
+                                              vector unsigned int __b) {
   return __builtin_altivec_vcmpgtuw_p(__CR6_EQ, __a, __b);
 }
 
-static int __ATTRS_o_ai vec_all_le(vector unsigned int __a,
-                                   vector bool int __b) {
+static __inline__ int __ATTRS_o_ai vec_all_le(vector unsigned int __a,
+                                              vector bool int __b) {
   return __builtin_altivec_vcmpgtuw_p(__CR6_EQ, __a, (vector unsigned int)__b);
 }
 
-static int __ATTRS_o_ai vec_all_le(vector bool int __a, vector int __b) {
+static __inline__ int __ATTRS_o_ai vec_all_le(vector bool int __a,
+                                              vector int __b) {
   return __builtin_altivec_vcmpgtuw_p(__CR6_EQ, (vector unsigned int)__a,
                                       (vector unsigned int)__b);
 }
 
-static int __ATTRS_o_ai vec_all_le(vector bool int __a,
-                                   vector unsigned int __b) {
+static __inline__ int __ATTRS_o_ai vec_all_le(vector bool int __a,
+                                              vector unsigned int __b) {
   return __builtin_altivec_vcmpgtuw_p(__CR6_EQ, (vector unsigned int)__a, __b);
 }
 
-static int __ATTRS_o_ai vec_all_le(vector bool int __a, vector bool int __b) {
+static __inline__ int __ATTRS_o_ai vec_all_le(vector bool int __a,
+                                              vector bool int __b) {
   return __builtin_altivec_vcmpgtuw_p(__CR6_EQ, (vector unsigned int)__a,
                                       (vector unsigned int)__b);
 }
 
 #ifdef __POWER8_VECTOR__
-static int __ATTRS_o_ai vec_all_le(vector signed long long __a,
-                                   vector signed long long __b) {
+static __inline__ int __ATTRS_o_ai vec_all_le(vector signed long long __a,
+                                              vector signed long long __b) {
   return __builtin_altivec_vcmpgtsd_p(__CR6_EQ, __a, __b);
 }
 
-static int __ATTRS_o_ai vec_all_le(vector unsigned long long __a,
-                                   vector unsigned long long __b) {
+static __inline__ int __ATTRS_o_ai vec_all_le(vector unsigned long long __a,
+                                              vector unsigned long long __b) {
   return __builtin_altivec_vcmpgtud_p(__CR6_EQ, __a, __b);
 }
 
-static int __ATTRS_o_ai vec_all_le(vector signed long long __a,
-                                   vector bool long long __b) {
+static __inline__ int __ATTRS_o_ai vec_all_le(vector signed long long __a,
+                                              vector bool long long __b) {
   return __builtin_altivec_vcmpgtsd_p(__CR6_EQ, __a,
                                       (vector signed long long)__b);
 }
 
-static int __ATTRS_o_ai vec_all_le(vector unsigned long long __a,
-                                   vector bool long long __b) {
+static __inline__ int __ATTRS_o_ai vec_all_le(vector unsigned long long __a,
+                                              vector bool long long __b) {
   return __builtin_altivec_vcmpgtud_p(__CR6_EQ, __a,
                                       (vector unsigned long long)__b);
 }
 
-static int __ATTRS_o_ai vec_all_le(vector bool long long __a,
-                                   vector signed long long __b) {
+static __inline__ int __ATTRS_o_ai vec_all_le(vector bool long long __a,
+                                              vector signed long long __b) {
   return __builtin_altivec_vcmpgtud_p(__CR6_EQ, (vector unsigned long long)__a,
                                       (vector unsigned long long)__b);
 }
 
-static int __ATTRS_o_ai vec_all_le(vector bool long long __a,
-                                   vector unsigned long long __b) {
+static __inline__ int __ATTRS_o_ai vec_all_le(vector bool long long __a,
+                                              vector unsigned long long __b) {
   return __builtin_altivec_vcmpgtud_p(__CR6_EQ, (vector unsigned long long)__a,
                                       __b);
 }
 
-static int __ATTRS_o_ai vec_all_le(vector bool long long __a,
-                                   vector bool long long __b) {
+static __inline__ int __ATTRS_o_ai vec_all_le(vector bool long long __a,
+                                              vector bool long long __b) {
   return __builtin_altivec_vcmpgtud_p(__CR6_EQ, (vector unsigned long long)__a,
                                       (vector unsigned long long)__b);
 }
 #endif
 
-static int __ATTRS_o_ai vec_all_le(vector float __a, vector float __b) {
+static __inline__ int __ATTRS_o_ai vec_all_le(vector float __a,
+                                              vector float __b) {
 #ifdef __VSX__
   return __builtin_vsx_xvcmpgesp_p(__CR6_LT, __b, __a);
 #else
@@ -12297,161 +12730,170 @@ static int __ATTRS_o_ai vec_all_le(vector float __a, vector float __b) {
 }
 
 #ifdef __VSX__
-static int __ATTRS_o_ai vec_all_le(vector double __a, vector double __b) {
+static __inline__ int __ATTRS_o_ai vec_all_le(vector double __a,
+                                              vector double __b) {
   return __builtin_vsx_xvcmpgedp_p(__CR6_LT, __b, __a);
 }
 #endif
 
 /* vec_all_lt */
 
-static int __ATTRS_o_ai vec_all_lt(vector signed char __a,
-                                   vector signed char __b) {
+static __inline__ int __ATTRS_o_ai vec_all_lt(vector signed char __a,
+                                              vector signed char __b) {
   return __builtin_altivec_vcmpgtsb_p(__CR6_LT, __b, __a);
 }
 
-static int __ATTRS_o_ai vec_all_lt(vector signed char __a,
-                                   vector bool char __b) {
+static __inline__ int __ATTRS_o_ai vec_all_lt(vector signed char __a,
+                                              vector bool char __b) {
   return __builtin_altivec_vcmpgtsb_p(__CR6_LT, (vector signed char)__b, __a);
 }
 
-static int __ATTRS_o_ai vec_all_lt(vector unsigned char __a,
-                                   vector unsigned char __b) {
+static __inline__ int __ATTRS_o_ai vec_all_lt(vector unsigned char __a,
+                                              vector unsigned char __b) {
   return __builtin_altivec_vcmpgtub_p(__CR6_LT, __b, __a);
 }
 
-static int __ATTRS_o_ai vec_all_lt(vector unsigned char __a,
-                                   vector bool char __b) {
+static __inline__ int __ATTRS_o_ai vec_all_lt(vector unsigned char __a,
+                                              vector bool char __b) {
   return __builtin_altivec_vcmpgtub_p(__CR6_LT, (vector unsigned char)__b, __a);
 }
 
-static int __ATTRS_o_ai vec_all_lt(vector bool char __a,
-                                   vector signed char __b) {
+static __inline__ int __ATTRS_o_ai vec_all_lt(vector bool char __a,
+                                              vector signed char __b) {
   return __builtin_altivec_vcmpgtub_p(__CR6_LT, (vector unsigned char)__b,
                                       (vector unsigned char)__a);
 }
 
-static int __ATTRS_o_ai vec_all_lt(vector bool char __a,
-                                   vector unsigned char __b) {
+static __inline__ int __ATTRS_o_ai vec_all_lt(vector bool char __a,
+                                              vector unsigned char __b) {
   return __builtin_altivec_vcmpgtub_p(__CR6_LT, __b, (vector unsigned char)__a);
 }
 
-static int __ATTRS_o_ai vec_all_lt(vector bool char __a, vector bool char __b) {
+static __inline__ int __ATTRS_o_ai vec_all_lt(vector bool char __a,
+                                              vector bool char __b) {
   return __builtin_altivec_vcmpgtub_p(__CR6_LT, (vector unsigned char)__b,
                                       (vector unsigned char)__a);
 }
 
-static int __ATTRS_o_ai vec_all_lt(vector short __a, vector short __b) {
+static __inline__ int __ATTRS_o_ai vec_all_lt(vector short __a,
+                                              vector short __b) {
   return __builtin_altivec_vcmpgtsh_p(__CR6_LT, __b, __a);
 }
 
-static int __ATTRS_o_ai vec_all_lt(vector short __a, vector bool short __b) {
+static __inline__ int __ATTRS_o_ai vec_all_lt(vector short __a,
+                                              vector bool short __b) {
   return __builtin_altivec_vcmpgtsh_p(__CR6_LT, (vector short)__b, __a);
 }
 
-static int __ATTRS_o_ai vec_all_lt(vector unsigned short __a,
-                                   vector unsigned short __b) {
+static __inline__ int __ATTRS_o_ai vec_all_lt(vector unsigned short __a,
+                                              vector unsigned short __b) {
   return __builtin_altivec_vcmpgtuh_p(__CR6_LT, __b, __a);
 }
 
-static int __ATTRS_o_ai vec_all_lt(vector unsigned short __a,
-                                   vector bool short __b) {
+static __inline__ int __ATTRS_o_ai vec_all_lt(vector unsigned short __a,
+                                              vector bool short __b) {
   return __builtin_altivec_vcmpgtuh_p(__CR6_LT, (vector unsigned short)__b,
                                       __a);
 }
 
-static int __ATTRS_o_ai vec_all_lt(vector bool short __a, vector short __b) {
+static __inline__ int __ATTRS_o_ai vec_all_lt(vector bool short __a,
+                                              vector short __b) {
   return __builtin_altivec_vcmpgtuh_p(__CR6_LT, (vector unsigned short)__b,
                                       (vector unsigned short)__a);
 }
 
-static int __ATTRS_o_ai vec_all_lt(vector bool short __a,
-                                   vector unsigned short __b) {
+static __inline__ int __ATTRS_o_ai vec_all_lt(vector bool short __a,
+                                              vector unsigned short __b) {
   return __builtin_altivec_vcmpgtuh_p(__CR6_LT, __b,
                                       (vector unsigned short)__a);
 }
 
-static int __ATTRS_o_ai vec_all_lt(vector bool short __a,
-                                   vector bool short __b) {
+static __inline__ int __ATTRS_o_ai vec_all_lt(vector bool short __a,
+                                              vector bool short __b) {
   return __builtin_altivec_vcmpgtuh_p(__CR6_LT, (vector unsigned short)__b,
                                       (vector unsigned short)__a);
 }
 
-static int __ATTRS_o_ai vec_all_lt(vector int __a, vector int __b) {
+static __inline__ int __ATTRS_o_ai vec_all_lt(vector int __a, vector int __b) {
   return __builtin_altivec_vcmpgtsw_p(__CR6_LT, __b, __a);
 }
 
-static int __ATTRS_o_ai vec_all_lt(vector int __a, vector bool int __b) {
+static __inline__ int __ATTRS_o_ai vec_all_lt(vector int __a,
+                                              vector bool int __b) {
   return __builtin_altivec_vcmpgtsw_p(__CR6_LT, (vector int)__b, __a);
 }
 
-static int __ATTRS_o_ai vec_all_lt(vector unsigned int __a,
-                                   vector unsigned int __b) {
+static __inline__ int __ATTRS_o_ai vec_all_lt(vector unsigned int __a,
+                                              vector unsigned int __b) {
   return __builtin_altivec_vcmpgtuw_p(__CR6_LT, __b, __a);
 }
 
-static int __ATTRS_o_ai vec_all_lt(vector unsigned int __a,
-                                   vector bool int __b) {
+static __inline__ int __ATTRS_o_ai vec_all_lt(vector unsigned int __a,
+                                              vector bool int __b) {
   return __builtin_altivec_vcmpgtuw_p(__CR6_LT, (vector unsigned int)__b, __a);
 }
 
-static int __ATTRS_o_ai vec_all_lt(vector bool int __a, vector int __b) {
+static __inline__ int __ATTRS_o_ai vec_all_lt(vector bool int __a,
+                                              vector int __b) {
   return __builtin_altivec_vcmpgtuw_p(__CR6_LT, (vector unsigned int)__b,
                                       (vector unsigned int)__a);
 }
 
-static int __ATTRS_o_ai vec_all_lt(vector bool int __a,
-                                   vector unsigned int __b) {
+static __inline__ int __ATTRS_o_ai vec_all_lt(vector bool int __a,
+                                              vector unsigned int __b) {
   return __builtin_altivec_vcmpgtuw_p(__CR6_LT, __b, (vector unsigned int)__a);
 }
 
-static int __ATTRS_o_ai vec_all_lt(vector bool int __a, vector bool int __b) {
+static __inline__ int __ATTRS_o_ai vec_all_lt(vector bool int __a,
+                                              vector bool int __b) {
   return __builtin_altivec_vcmpgtuw_p(__CR6_LT, (vector unsigned int)__b,
                                       (vector unsigned int)__a);
 }
 
 #ifdef __POWER8_VECTOR__
-static int __ATTRS_o_ai vec_all_lt(vector signed long long __a,
-                                   vector signed long long __b) {
+static __inline__ int __ATTRS_o_ai vec_all_lt(vector signed long long __a,
+                                              vector signed long long __b) {
   return __builtin_altivec_vcmpgtsd_p(__CR6_LT, __b, __a);
 }
 
-static int __ATTRS_o_ai vec_all_lt(vector unsigned long long __a,
-                                   vector unsigned long long __b) {
+static __inline__ int __ATTRS_o_ai vec_all_lt(vector unsigned long long __a,
+                                              vector unsigned long long __b) {
   return __builtin_altivec_vcmpgtud_p(__CR6_LT, __b, __a);
 }
 
-static int __ATTRS_o_ai vec_all_lt(vector signed long long __a,
-                                   vector bool long long __b) {
+static __inline__ int __ATTRS_o_ai vec_all_lt(vector signed long long __a,
+                                              vector bool long long __b) {
   return __builtin_altivec_vcmpgtsd_p(__CR6_LT, (vector signed long long)__b,
                                       __a);
 }
 
-static int __ATTRS_o_ai vec_all_lt(vector unsigned long long __a,
-                                   vector bool long long __b) {
+static __inline__ int __ATTRS_o_ai vec_all_lt(vector unsigned long long __a,
+                                              vector bool long long __b) {
   return __builtin_altivec_vcmpgtud_p(__CR6_LT, (vector unsigned long long)__b,
                                       __a);
 }
 
-static int __ATTRS_o_ai vec_all_lt(vector bool long long __a,
-                                   vector signed long long __b) {
+static __inline__ int __ATTRS_o_ai vec_all_lt(vector bool long long __a,
+                                              vector signed long long __b) {
   return __builtin_altivec_vcmpgtud_p(__CR6_LT, (vector unsigned long long)__b,
                                       (vector unsigned long long)__a);
 }
 
-static int __ATTRS_o_ai vec_all_lt(vector bool long long __a,
-                                   vector unsigned long long __b) {
+static __inline__ int __ATTRS_o_ai vec_all_lt(vector bool long long __a,
+                                              vector unsigned long long __b) {
   return __builtin_altivec_vcmpgtud_p(__CR6_LT, __b,
                                       (vector unsigned long long)__a);
 }
 
-static int __ATTRS_o_ai vec_all_lt(vector bool long long __a,
-                                   vector bool long long __b) {
+static __inline__ int __ATTRS_o_ai vec_all_lt(vector bool long long __a,
+                                              vector bool long long __b) {
   return __builtin_altivec_vcmpgtud_p(__CR6_LT, (vector unsigned long long)__b,
                                       (vector unsigned long long)__a);
 }
 #endif
 
-static int __ATTRS_o_ai vec_all_lt(vector float __a, vector float __b) {
+static __inline__ int __ATTRS_o_ai vec_all_lt(vector float __a,
+                                              vector float __b) {
 #ifdef __VSX__
   return __builtin_vsx_xvcmpgtsp_p(__CR6_LT, __b, __a);
 #else
@@ -12460,14 +12902,15 @@ static int __ATTRS_o_ai vec_all_lt(vector float __a, vector float __b) {
 }
 
 #ifdef __VSX__
-static int __ATTRS_o_ai vec_all_lt(vector double __a, vector double __b) {
+static __inline__ int __ATTRS_o_ai vec_all_lt(vector double __a,
+                                              vector double __b) {
   return __builtin_vsx_xvcmpgtdp_p(__CR6_LT, __b, __a);
 }
 #endif
 
 /* vec_all_nan */
 
-static int __ATTRS_o_ai vec_all_nan(vector float __a) {
+static __inline__ int __ATTRS_o_ai vec_all_nan(vector float __a) {
 #ifdef __VSX__
   return __builtin_vsx_xvcmpeqsp_p(__CR6_EQ, __a, __a);
 #else
@@ -12476,176 +12919,185 @@ static int __ATTRS_o_ai vec_all_nan(vector float __a) {
 }
 
 #ifdef __VSX__
-static int __ATTRS_o_ai vec_all_nan(vector double __a) {
+static __inline__ int __ATTRS_o_ai vec_all_nan(vector double __a) {
   return __builtin_vsx_xvcmpeqdp_p(__CR6_EQ, __a, __a);
 }
 #endif
 
 /* vec_all_ne */
 
-static int __ATTRS_o_ai vec_all_ne(vector signed char __a,
-                                   vector signed char __b) {
+static __inline__ int __ATTRS_o_ai vec_all_ne(vector signed char __a,
+                                              vector signed char __b) {
   return __builtin_altivec_vcmpequb_p(__CR6_EQ, (vector char)__a,
                                       (vector char)__b);
 }
 
-static int __ATTRS_o_ai vec_all_ne(vector signed char __a,
-                                   vector bool char __b) {
+static __inline__ int __ATTRS_o_ai vec_all_ne(vector signed char __a,
+                                              vector bool char __b) {
   return __builtin_altivec_vcmpequb_p(__CR6_EQ, (vector char)__a,
                                       (vector char)__b);
 }
 
-static int __ATTRS_o_ai vec_all_ne(vector unsigned char __a,
-                                   vector unsigned char __b) {
+static __inline__ int __ATTRS_o_ai vec_all_ne(vector unsigned char __a,
+                                              vector unsigned char __b) {
   return __builtin_altivec_vcmpequb_p(__CR6_EQ, (vector char)__a,
                                       (vector char)__b);
 }
 
-static int __ATTRS_o_ai vec_all_ne(vector unsigned char __a,
-                                   vector bool char __b) {
+static __inline__ int __ATTRS_o_ai vec_all_ne(vector unsigned char __a,
+                                              vector bool char __b) {
   return __builtin_altivec_vcmpequb_p(__CR6_EQ, (vector char)__a,
                                       (vector char)__b);
 }
 
-static int __ATTRS_o_ai vec_all_ne(vector bool char __a,
-                                   vector signed char __b) {
+static __inline__ int __ATTRS_o_ai vec_all_ne(vector bool char __a,
+                                              vector signed char __b) {
   return __builtin_altivec_vcmpequb_p(__CR6_EQ, (vector char)__a,
                                       (vector char)__b);
 }
 
-static int __ATTRS_o_ai vec_all_ne(vector bool char __a,
-                                   vector unsigned char __b) {
+static __inline__ int __ATTRS_o_ai vec_all_ne(vector bool char __a,
+                                              vector unsigned char __b) {
   return __builtin_altivec_vcmpequb_p(__CR6_EQ, (vector char)__a,
                                       (vector char)__b);
 }
 
-static int __ATTRS_o_ai vec_all_ne(vector bool char __a, vector bool char __b) {
+static __inline__ int __ATTRS_o_ai vec_all_ne(vector bool char __a,
+                                              vector bool char __b) {
   return __builtin_altivec_vcmpequb_p(__CR6_EQ, (vector char)__a,
                                       (vector char)__b);
 }
 
-static int __ATTRS_o_ai vec_all_ne(vector short __a, vector short __b) {
+static __inline__ int __ATTRS_o_ai vec_all_ne(vector short __a,
+                                              vector short __b) {
   return __builtin_altivec_vcmpequh_p(__CR6_EQ, __a, __b);
 }
 
-static int __ATTRS_o_ai vec_all_ne(vector short __a, vector bool short __b) {
+static __inline__ int __ATTRS_o_ai vec_all_ne(vector short __a,
+                                              vector bool short __b) {
   return __builtin_altivec_vcmpequh_p(__CR6_EQ, __a, (vector short)__b);
 }
 
-static int __ATTRS_o_ai vec_all_ne(vector unsigned short __a,
-                                   vector unsigned short __b) {
+static __inline__ int __ATTRS_o_ai vec_all_ne(vector unsigned short __a,
+                                              vector unsigned short __b) {
   return __builtin_altivec_vcmpequh_p(__CR6_EQ, (vector short)__a,
                                       (vector short)__b);
 }
 
-static int __ATTRS_o_ai vec_all_ne(vector unsigned short __a,
-                                   vector bool short __b) {
+static __inline__ int __ATTRS_o_ai vec_all_ne(vector unsigned short __a,
+                                              vector bool short __b) {
   return __builtin_altivec_vcmpequh_p(__CR6_EQ, (vector short)__a,
                                       (vector short)__b);
 }
 
-static int __ATTRS_o_ai vec_all_ne(vector bool short __a, vector short __b) {
+static __inline__ int __ATTRS_o_ai vec_all_ne(vector bool short __a,
+                                              vector short __b) {
   return __builtin_altivec_vcmpequh_p(__CR6_EQ, (vector short)__a,
                                       (vector short)__b);
 }
 
-static int __ATTRS_o_ai vec_all_ne(vector bool short __a,
-                                   vector unsigned short __b) {
+static __inline__ int __ATTRS_o_ai vec_all_ne(vector bool short __a,
+                                              vector unsigned short __b) {
   return __builtin_altivec_vcmpequh_p(__CR6_EQ, (vector short)__a,
                                       (vector short)__b);
 }
 
-static int __ATTRS_o_ai vec_all_ne(vector bool short __a,
-                                   vector bool short __b) {
+static __inline__ int __ATTRS_o_ai vec_all_ne(vector bool short __a,
+                                              vector bool short __b) {
   return __builtin_altivec_vcmpequh_p(__CR6_EQ, (vector short)__a,
                                       (vector short)__b);
 }
 
-static int __ATTRS_o_ai vec_all_ne(vector pixel __a, vector pixel __b) {
+static __inline__ int __ATTRS_o_ai vec_all_ne(vector pixel __a,
+                                              vector pixel __b) {
   return __builtin_altivec_vcmpequh_p(__CR6_EQ, (vector short)__a,
                                       (vector short)__b);
 }
 
-static int __ATTRS_o_ai vec_all_ne(vector int __a, vector int __b) {
+static __inline__ int __ATTRS_o_ai vec_all_ne(vector int __a, vector int __b) {
   return __builtin_altivec_vcmpequw_p(__CR6_EQ, __a, __b);
 }
 
-static int __ATTRS_o_ai vec_all_ne(vector int __a, vector bool int __b) {
+static __inline__ int __ATTRS_o_ai vec_all_ne(vector int __a,
+                                              vector bool int __b) {
   return __builtin_altivec_vcmpequw_p(__CR6_EQ, __a, (vector int)__b);
 }
 
-static int __ATTRS_o_ai vec_all_ne(vector unsigned int __a,
-                                   vector unsigned int __b) {
+static __inline__ int __ATTRS_o_ai vec_all_ne(vector unsigned int __a,
+                                              vector unsigned int __b) {
   return __builtin_altivec_vcmpequw_p(__CR6_EQ, (vector int)__a,
                                       (vector int)__b);
 }
 
-static int __ATTRS_o_ai vec_all_ne(vector unsigned int __a,
-                                   vector bool int __b) {
+static __inline__ int __ATTRS_o_ai vec_all_ne(vector unsigned int __a,
+                                              vector bool int __b) {
   return __builtin_altivec_vcmpequw_p(__CR6_EQ, (vector int)__a,
                                       (vector int)__b);
 }
 
-static int __ATTRS_o_ai vec_all_ne(vector bool int __a, vector int __b) {
+static __inline__ int __ATTRS_o_ai vec_all_ne(vector bool int __a,
+                                              vector int __b) {
   return __builtin_altivec_vcmpequw_p(__CR6_EQ, (vector int)__a,
                                       (vector int)__b);
 }
 
-static int __ATTRS_o_ai vec_all_ne(vector bool int __a,
-                                   vector unsigned int __b) {
+static __inline__ int __ATTRS_o_ai vec_all_ne(vector bool int __a,
+                                              vector unsigned int __b) {
   return __builtin_altivec_vcmpequw_p(__CR6_EQ, (vector int)__a,
                                       (vector int)__b);
 }
 
-static int __ATTRS_o_ai vec_all_ne(vector bool int __a, vector bool int __b) {
+static __inline__ int __ATTRS_o_ai vec_all_ne(vector bool int __a,
+                                              vector bool int __b) {
   return __builtin_altivec_vcmpequw_p(__CR6_EQ, (vector int)__a,
                                       (vector int)__b);
 }
 
 #ifdef __POWER8_VECTOR__
-static int __ATTRS_o_ai vec_all_ne(vector signed long long __a,
-                                   vector signed long long __b) {
+static __inline__ int __ATTRS_o_ai vec_all_ne(vector signed long long __a,
+                                              vector signed long long __b) {
   return __builtin_altivec_vcmpequd_p(__CR6_EQ, __a, __b);
 }
 
-static int __ATTRS_o_ai vec_all_ne(vector unsigned long long __a,
-                                   vector unsigned long long __b) {
+static __inline__ int __ATTRS_o_ai vec_all_ne(vector unsigned long long __a,
+                                              vector unsigned long long __b) {
   return __builtin_altivec_vcmpequd_p(__CR6_EQ, (vector long long)__a,
                                       (vector long long)__b);
 }
 
-static int __ATTRS_o_ai vec_all_ne(vector signed long long __a,
-                                   vector bool long long __b) {
+static __inline__ int __ATTRS_o_ai vec_all_ne(vector signed long long __a,
+                                              vector bool long long __b) {
   return __builtin_altivec_vcmpequd_p(__CR6_EQ, __a,
                                       (vector signed long long)__b);
 }
 
-static int __ATTRS_o_ai vec_all_ne(vector unsigned long long __a,
-                                   vector bool long long __b) {
+static __inline__ int __ATTRS_o_ai vec_all_ne(vector unsigned long long __a,
+                                              vector bool long long __b) {
   return __builtin_altivec_vcmpequd_p(__CR6_EQ, (vector signed long long)__a,
                                       (vector signed long long)__b);
 }
 
-static int __ATTRS_o_ai vec_all_ne(vector bool long long __a,
-                                   vector signed long long __b) {
+static __inline__ int __ATTRS_o_ai vec_all_ne(vector bool long long __a,
+                                              vector signed long long __b) {
   return __builtin_altivec_vcmpequd_p(__CR6_EQ, (vector signed long long)__a,
                                       (vector signed long long)__b);
 }
 
-static int __ATTRS_o_ai vec_all_ne(vector bool long long __a,
-                                   vector unsigned long long __b) {
+static __inline__ int __ATTRS_o_ai vec_all_ne(vector bool long long __a,
+                                              vector unsigned long long __b) {
   return __builtin_altivec_vcmpequd_p(__CR6_EQ, (vector signed long long)__a,
                                       (vector signed long long)__b);
 }
 
-static int __ATTRS_o_ai vec_all_ne(vector bool long long __a,
-                                   vector bool long long __b) {
+static __inline__ int __ATTRS_o_ai vec_all_ne(vector bool long long __a,
+                                              vector bool long long __b) {
   return __builtin_altivec_vcmpequd_p(__CR6_EQ, (vector signed long long)__a,
                                       (vector signed long long)__b);
 }
 #endif
 
-static int __ATTRS_o_ai vec_all_ne(vector float __a, vector float __b) {
+static __inline__ int __ATTRS_o_ai vec_all_ne(vector float __a,
+                                              vector float __b) {
 #ifdef __VSX__
   return __builtin_vsx_xvcmpeqdp_p(__CR6_EQ, __a, __b);
 #else
@@ -12654,15 +13106,16 @@ static int __ATTRS_o_ai vec_all_ne(vector float __a, vector float __b) {
 }
 
 #ifdef __VSX__
-static int __ATTRS_o_ai vec_all_ne(vector double __a, vector double __b) {
+static __inline__ int __ATTRS_o_ai vec_all_ne(vector double __a,
+                                              vector double __b) {
   return __builtin_vsx_xvcmpeqdp_p(__CR6_EQ, __a, __b);
 }
 #endif
 
 /* vec_all_nge */
 
-static int __ATTRS_o_ai
-vec_all_nge(vector float __a, vector float __b) {
+static __inline__ int __ATTRS_o_ai vec_all_nge(vector float __a,
+                                               vector float __b) {
 #ifdef __VSX__
   return __builtin_vsx_xvcmpgesp_p(__CR6_EQ, __a, __b);
 #else
@@ -12671,16 +13124,16 @@ vec_all_nge(vector float __a, vector float __b) {
 }
 
 #ifdef __VSX__
-static int __ATTRS_o_ai
-vec_all_nge(vector double __a, vector double __b) {
+static __inline__ int __ATTRS_o_ai vec_all_nge(vector double __a,
+                                               vector double __b) {
   return __builtin_vsx_xvcmpgedp_p(__CR6_EQ, __a, __b);
 }
 #endif
 
 /* vec_all_ngt */
 
-static int __ATTRS_o_ai
-vec_all_ngt(vector float __a, vector float __b) {
+static __inline__ int __ATTRS_o_ai vec_all_ngt(vector float __a,
+                                               vector float __b) {
 #ifdef __VSX__
   return __builtin_vsx_xvcmpgtsp_p(__CR6_EQ, __a, __b);
 #else
@@ -12689,198 +13142,207 @@ vec_all_ngt(vector float __a, vector float __b) {
 }
 
 #ifdef __VSX__
-static int __ATTRS_o_ai
-vec_all_ngt(vector double __a, vector double __b) {
+static __inline__ int __ATTRS_o_ai vec_all_ngt(vector double __a,
+                                               vector double __b) {
   return __builtin_vsx_xvcmpgtdp_p(__CR6_EQ, __a, __b);
 }
 #endif
 
 /* vec_all_nle */
 
-static int __attribute__((__always_inline__))
+static __inline__ int __attribute__((__always_inline__))
 vec_all_nle(vector float __a, vector float __b) {
   return __builtin_altivec_vcmpgefp_p(__CR6_EQ, __b, __a);
 }
 
 /* vec_all_nlt */
 
-static int __attribute__((__always_inline__))
+static __inline__ int __attribute__((__always_inline__))
 vec_all_nlt(vector float __a, vector float __b) {
   return __builtin_altivec_vcmpgtfp_p(__CR6_EQ, __b, __a);
 }
 
 /* vec_all_numeric */
 
-static int __attribute__((__always_inline__))
+static __inline__ int __attribute__((__always_inline__))
 vec_all_numeric(vector float __a) {
   return __builtin_altivec_vcmpeqfp_p(__CR6_LT, __a, __a);
 }
 
 /* vec_any_eq */
 
-static int __ATTRS_o_ai vec_any_eq(vector signed char __a,
-                                   vector signed char __b) {
+static __inline__ int __ATTRS_o_ai vec_any_eq(vector signed char __a,
+                                              vector signed char __b) {
   return __builtin_altivec_vcmpequb_p(__CR6_EQ_REV, (vector char)__a,
                                       (vector char)__b);
 }
 
-static int __ATTRS_o_ai vec_any_eq(vector signed char __a,
-                                   vector bool char __b) {
+static __inline__ int __ATTRS_o_ai vec_any_eq(vector signed char __a,
+                                              vector bool char __b) {
   return __builtin_altivec_vcmpequb_p(__CR6_EQ_REV, (vector char)__a,
                                       (vector char)__b);
 }
 
-static int __ATTRS_o_ai vec_any_eq(vector unsigned char __a,
-                                   vector unsigned char __b) {
+static __inline__ int __ATTRS_o_ai vec_any_eq(vector unsigned char __a,
+                                              vector unsigned char __b) {
   return __builtin_altivec_vcmpequb_p(__CR6_EQ_REV, (vector char)__a,
                                       (vector char)__b);
 }
 
-static int __ATTRS_o_ai vec_any_eq(vector unsigned char __a,
-                                   vector bool char __b) {
+static __inline__ int __ATTRS_o_ai vec_any_eq(vector unsigned char __a,
+                                              vector bool char __b) {
   return __builtin_altivec_vcmpequb_p(__CR6_EQ_REV, (vector char)__a,
                                       (vector char)__b);
 }
 
-static int __ATTRS_o_ai vec_any_eq(vector bool char __a,
-                                   vector signed char __b) {
+static __inline__ int __ATTRS_o_ai vec_any_eq(vector bool char __a,
+                                              vector signed char __b) {
   return __builtin_altivec_vcmpequb_p(__CR6_EQ_REV, (vector char)__a,
                                       (vector char)__b);
 }
 
-static int __ATTRS_o_ai vec_any_eq(vector bool char __a,
-                                   vector unsigned char __b) {
+static __inline__ int __ATTRS_o_ai vec_any_eq(vector bool char __a,
+                                              vector unsigned char __b) {
   return __builtin_altivec_vcmpequb_p(__CR6_EQ_REV, (vector char)__a,
                                       (vector char)__b);
 }
 
-static int __ATTRS_o_ai vec_any_eq(vector bool char __a, vector bool char __b) {
+static __inline__ int __ATTRS_o_ai vec_any_eq(vector bool char __a,
+                                              vector bool char __b) {
   return __builtin_altivec_vcmpequb_p(__CR6_EQ_REV, (vector char)__a,
                                       (vector char)__b);
 }
 
-static int __ATTRS_o_ai vec_any_eq(vector short __a, vector short __b) {
+static __inline__ int __ATTRS_o_ai vec_any_eq(vector short __a,
+                                              vector short __b) {
   return __builtin_altivec_vcmpequh_p(__CR6_EQ_REV, __a, __b);
 }
 
-static int __ATTRS_o_ai vec_any_eq(vector short __a, vector bool short __b) {
+static __inline__ int __ATTRS_o_ai vec_any_eq(vector short __a,
+                                              vector bool short __b) {
   return __builtin_altivec_vcmpequh_p(__CR6_EQ_REV, __a, (vector short)__b);
 }
 
-static int __ATTRS_o_ai vec_any_eq(vector unsigned short __a,
-                                   vector unsigned short __b) {
+static __inline__ int __ATTRS_o_ai vec_any_eq(vector unsigned short __a,
+                                              vector unsigned short __b) {
   return __builtin_altivec_vcmpequh_p(__CR6_EQ_REV, (vector short)__a,
                                       (vector short)__b);
 }
 
-static int __ATTRS_o_ai vec_any_eq(vector unsigned short __a,
-                                   vector bool short __b) {
+static __inline__ int __ATTRS_o_ai vec_any_eq(vector unsigned short __a,
+                                              vector bool short __b) {
   return __builtin_altivec_vcmpequh_p(__CR6_EQ_REV, (vector short)__a,
                                       (vector short)__b);
 }
 
-static int __ATTRS_o_ai vec_any_eq(vector bool short __a, vector short __b) {
+static __inline__ int __ATTRS_o_ai vec_any_eq(vector bool short __a,
+                                              vector short __b) {
   return __builtin_altivec_vcmpequh_p(__CR6_EQ_REV, (vector short)__a,
                                       (vector short)__b);
 }
 
-static int __ATTRS_o_ai vec_any_eq(vector bool short __a,
-                                   vector unsigned short __b) {
+static __inline__ int __ATTRS_o_ai vec_any_eq(vector bool short __a,
+                                              vector unsigned short __b) {
   return __builtin_altivec_vcmpequh_p(__CR6_EQ_REV, (vector short)__a,
                                       (vector short)__b);
 }
 
-static int __ATTRS_o_ai vec_any_eq(vector bool short __a,
-                                   vector bool short __b) {
+static __inline__ int __ATTRS_o_ai vec_any_eq(vector bool short __a,
+                                              vector bool short __b) {
   return __builtin_altivec_vcmpequh_p(__CR6_EQ_REV, (vector short)__a,
                                       (vector short)__b);
 }
 
-static int __ATTRS_o_ai vec_any_eq(vector pixel __a, vector pixel __b) {
+static __inline__ int __ATTRS_o_ai vec_any_eq(vector pixel __a,
+                                              vector pixel __b) {
   return __builtin_altivec_vcmpequh_p(__CR6_EQ_REV, (vector short)__a,
                                       (vector short)__b);
 }
 
-static int __ATTRS_o_ai vec_any_eq(vector int __a, vector int __b) {
+static __inline__ int __ATTRS_o_ai vec_any_eq(vector int __a, vector int __b) {
   return __builtin_altivec_vcmpequw_p(__CR6_EQ_REV, __a, __b);
 }
 
-static int __ATTRS_o_ai vec_any_eq(vector int __a, vector bool int __b) {
+static __inline__ int __ATTRS_o_ai vec_any_eq(vector int __a,
+                                              vector bool int __b) {
   return __builtin_altivec_vcmpequw_p(__CR6_EQ_REV, __a, (vector int)__b);
 }
 
-static int __ATTRS_o_ai vec_any_eq(vector unsigned int __a,
-                                   vector unsigned int __b) {
+static __inline__ int __ATTRS_o_ai vec_any_eq(vector unsigned int __a,
+                                              vector unsigned int __b) {
   return __builtin_altivec_vcmpequw_p(__CR6_EQ_REV, (vector int)__a,
                                       (vector int)__b);
 }
 
-static int __ATTRS_o_ai vec_any_eq(vector unsigned int __a,
-                                   vector bool int __b) {
+static __inline__ int __ATTRS_o_ai vec_any_eq(vector unsigned int __a,
+                                              vector bool int __b) {
   return __builtin_altivec_vcmpequw_p(__CR6_EQ_REV, (vector int)__a,
                                       (vector int)__b);
 }
 
-static int __ATTRS_o_ai vec_any_eq(vector bool int __a, vector int __b) {
+static __inline__ int __ATTRS_o_ai vec_any_eq(vector bool int __a,
+                                              vector int __b) {
   return __builtin_altivec_vcmpequw_p(__CR6_EQ_REV, (vector int)__a,
                                       (vector int)__b);
 }
 
-static int __ATTRS_o_ai vec_any_eq(vector bool int __a,
-                                   vector unsigned int __b) {
+static __inline__ int __ATTRS_o_ai vec_any_eq(vector bool int __a,
+                                              vector unsigned int __b) {
   return __builtin_altivec_vcmpequw_p(__CR6_EQ_REV, (vector int)__a,
                                       (vector int)__b);
 }
 
-static int __ATTRS_o_ai vec_any_eq(vector bool int __a, vector bool int __b) {
+static __inline__ int __ATTRS_o_ai vec_any_eq(vector bool int __a,
+                                              vector bool int __b) {
   return __builtin_altivec_vcmpequw_p(__CR6_EQ_REV, (vector int)__a,
                                       (vector int)__b);
 }
 
 #ifdef __POWER8_VECTOR__
-static int __ATTRS_o_ai vec_any_eq(vector signed long long __a,
-                                   vector signed long long __b) {
+static __inline__ int __ATTRS_o_ai vec_any_eq(vector signed long long __a,
+                                              vector signed long long __b) {
   return __builtin_altivec_vcmpequd_p(__CR6_EQ_REV, __a, __b);
 }
 
-static int __ATTRS_o_ai vec_any_eq(vector unsigned long long __a,
-                                   vector unsigned long long __b) {
+static __inline__ int __ATTRS_o_ai vec_any_eq(vector unsigned long long __a,
+                                              vector unsigned long long __b) {
   return __builtin_altivec_vcmpequd_p(__CR6_EQ_REV, (vector long long)__a,
                                       (vector long long)__b);
 }
 
-static int __ATTRS_o_ai vec_any_eq(vector signed long long __a,
-                                   vector bool long long __b) {
+static __inline__ int __ATTRS_o_ai vec_any_eq(vector signed long long __a,
+                                              vector bool long long __b) {
   return __builtin_altivec_vcmpequd_p(__CR6_EQ_REV, __a,
                                       (vector signed long long)__b);
 }
 
-static int __ATTRS_o_ai vec_any_eq(vector unsigned long long __a,
-                                   vector bool long long __b) {
+static __inline__ int __ATTRS_o_ai vec_any_eq(vector unsigned long long __a,
+                                              vector bool long long __b) {
   return __builtin_altivec_vcmpequd_p(
       __CR6_EQ_REV, (vector signed long long)__a, (vector signed long long)__b);
 }
 
-static int __ATTRS_o_ai vec_any_eq(vector bool long long __a,
-                                   vector signed long long __b) {
+static __inline__ int __ATTRS_o_ai vec_any_eq(vector bool long long __a,
+                                              vector signed long long __b) {
   return __builtin_altivec_vcmpequd_p(
       __CR6_EQ_REV, (vector signed long long)__a, (vector signed long long)__b);
 }
 
-static int __ATTRS_o_ai vec_any_eq(vector bool long long __a,
-                                   vector unsigned long long __b) {
+static __inline__ int __ATTRS_o_ai vec_any_eq(vector bool long long __a,
+                                              vector unsigned long long __b) {
   return __builtin_altivec_vcmpequd_p(
       __CR6_EQ_REV, (vector signed long long)__a, (vector signed long long)__b);
 }
 
-static int __ATTRS_o_ai vec_any_eq(vector bool long long __a,
-                                   vector bool long long __b) {
+static __inline__ int __ATTRS_o_ai vec_any_eq(vector bool long long __a,
+                                              vector bool long long __b) {
   return __builtin_altivec_vcmpequd_p(
       __CR6_EQ_REV, (vector signed long long)__a, (vector signed long long)__b);
 }
 #endif
 
-static int __ATTRS_o_ai vec_any_eq(vector float __a, vector float __b) {
+static __inline__ int __ATTRS_o_ai vec_any_eq(vector float __a,
+                                              vector float __b) {
 #ifdef __VSX__
   return __builtin_vsx_xvcmpeqsp_p(__CR6_EQ_REV, __a, __b);
 #else
@@ -12889,168 +13351,177 @@ static int __ATTRS_o_ai vec_any_eq(vector float __a, vector float __b) {
 }
 
 #ifdef __VSX__
-static int __ATTRS_o_ai vec_any_eq(vector double __a, vector double __b) {
+static __inline__ int __ATTRS_o_ai vec_any_eq(vector double __a,
+                                              vector double __b) {
   return __builtin_vsx_xvcmpeqdp_p(__CR6_EQ_REV, __a, __b);
 }
 #endif
 
 /* vec_any_ge */
 
-static int __ATTRS_o_ai vec_any_ge(vector signed char __a,
-                                   vector signed char __b) {
+static __inline__ int __ATTRS_o_ai vec_any_ge(vector signed char __a,
+                                              vector signed char __b) {
   return __builtin_altivec_vcmpgtsb_p(__CR6_LT_REV, __b, __a);
 }
 
-static int __ATTRS_o_ai vec_any_ge(vector signed char __a,
-                                   vector bool char __b) {
+static __inline__ int __ATTRS_o_ai vec_any_ge(vector signed char __a,
+                                              vector bool char __b) {
   return __builtin_altivec_vcmpgtsb_p(__CR6_LT_REV, (vector signed char)__b,
                                       __a);
 }
 
-static int __ATTRS_o_ai vec_any_ge(vector unsigned char __a,
-                                   vector unsigned char __b) {
+static __inline__ int __ATTRS_o_ai vec_any_ge(vector unsigned char __a,
+                                              vector unsigned char __b) {
   return __builtin_altivec_vcmpgtub_p(__CR6_LT_REV, __b, __a);
 }
 
-static int __ATTRS_o_ai vec_any_ge(vector unsigned char __a,
-                                   vector bool char __b) {
+static __inline__ int __ATTRS_o_ai vec_any_ge(vector unsigned char __a,
+                                              vector bool char __b) {
   return __builtin_altivec_vcmpgtub_p(__CR6_LT_REV, (vector unsigned char)__b,
                                       __a);
 }
 
-static int __ATTRS_o_ai vec_any_ge(vector bool char __a,
-                                   vector signed char __b) {
+static __inline__ int __ATTRS_o_ai vec_any_ge(vector bool char __a,
+                                              vector signed char __b) {
   return __builtin_altivec_vcmpgtub_p(__CR6_LT_REV, (vector unsigned char)__b,
                                       (vector unsigned char)__a);
 }
 
-static int __ATTRS_o_ai vec_any_ge(vector bool char __a,
-                                   vector unsigned char __b) {
+static __inline__ int __ATTRS_o_ai vec_any_ge(vector bool char __a,
+                                              vector unsigned char __b) {
   return __builtin_altivec_vcmpgtub_p(__CR6_LT_REV, __b,
                                       (vector unsigned char)__a);
 }
 
-static int __ATTRS_o_ai vec_any_ge(vector bool char __a, vector bool char __b) {
+static __inline__ int __ATTRS_o_ai vec_any_ge(vector bool char __a,
+                                              vector bool char __b) {
   return __builtin_altivec_vcmpgtub_p(__CR6_LT_REV, (vector unsigned char)__b,
                                       (vector unsigned char)__a);
 }
 
-static int __ATTRS_o_ai vec_any_ge(vector short __a, vector short __b) {
+static __inline__ int __ATTRS_o_ai vec_any_ge(vector short __a,
+                                              vector short __b) {
   return __builtin_altivec_vcmpgtsh_p(__CR6_LT_REV, __b, __a);
 }
 
-static int __ATTRS_o_ai vec_any_ge(vector short __a, vector bool short __b) {
+static __inline__ int __ATTRS_o_ai vec_any_ge(vector short __a,
+                                              vector bool short __b) {
   return __builtin_altivec_vcmpgtsh_p(__CR6_LT_REV, (vector short)__b, __a);
 }
 
-static int __ATTRS_o_ai vec_any_ge(vector unsigned short __a,
-                                   vector unsigned short __b) {
+static __inline__ int __ATTRS_o_ai vec_any_ge(vector unsigned short __a,
+                                              vector unsigned short __b) {
   return __builtin_altivec_vcmpgtuh_p(__CR6_LT_REV, __b, __a);
 }
 
-static int __ATTRS_o_ai vec_any_ge(vector unsigned short __a,
-                                   vector bool short __b) {
+static __inline__ int __ATTRS_o_ai vec_any_ge(vector unsigned short __a,
+                                              vector bool short __b) {
   return __builtin_altivec_vcmpgtuh_p(__CR6_LT_REV, (vector unsigned short)__b,
                                       __a);
 }
 
-static int __ATTRS_o_ai vec_any_ge(vector bool short __a, vector short __b) {
+static __inline__ int __ATTRS_o_ai vec_any_ge(vector bool short __a,
+                                              vector short __b) {
   return __builtin_altivec_vcmpgtuh_p(__CR6_LT_REV, (vector unsigned short)__b,
                                       (vector unsigned short)__a);
 }
 
-static int __ATTRS_o_ai vec_any_ge(vector bool short __a,
-                                   vector unsigned short __b) {
+static __inline__ int __ATTRS_o_ai vec_any_ge(vector bool short __a,
+                                              vector unsigned short __b) {
   return __builtin_altivec_vcmpgtuh_p(__CR6_LT_REV, __b,
                                       (vector unsigned short)__a);
 }
 
-static int __ATTRS_o_ai vec_any_ge(vector bool short __a,
-                                   vector bool short __b) {
+static __inline__ int __ATTRS_o_ai vec_any_ge(vector bool short __a,
+                                              vector bool short __b) {
   return __builtin_altivec_vcmpgtuh_p(__CR6_LT_REV, (vector unsigned short)__b,
                                       (vector unsigned short)__a);
 }
 
-static int __ATTRS_o_ai vec_any_ge(vector int __a, vector int __b) {
+static __inline__ int __ATTRS_o_ai vec_any_ge(vector int __a, vector int __b) {
   return __builtin_altivec_vcmpgtsw_p(__CR6_LT_REV, __b, __a);
 }
 
-static int __ATTRS_o_ai vec_any_ge(vector int __a, vector bool int __b) {
+static __inline__ int __ATTRS_o_ai vec_any_ge(vector int __a,
+                                              vector bool int __b) {
   return __builtin_altivec_vcmpgtsw_p(__CR6_LT_REV, (vector int)__b, __a);
 }
 
-static int __ATTRS_o_ai vec_any_ge(vector unsigned int __a,
-                                   vector unsigned int __b) {
+static __inline__ int __ATTRS_o_ai vec_any_ge(vector unsigned int __a,
+                                              vector unsigned int __b) {
   return __builtin_altivec_vcmpgtuw_p(__CR6_LT_REV, __b, __a);
 }
 
-static int __ATTRS_o_ai vec_any_ge(vector unsigned int __a,
-                                   vector bool int __b) {
+static __inline__ int __ATTRS_o_ai vec_any_ge(vector unsigned int __a,
+                                              vector bool int __b) {
   return __builtin_altivec_vcmpgtuw_p(__CR6_LT_REV, (vector unsigned int)__b,
                                       __a);
 }
 
-static int __ATTRS_o_ai vec_any_ge(vector bool int __a, vector int __b) {
+static __inline__ int __ATTRS_o_ai vec_any_ge(vector bool int __a,
+                                              vector int __b) {
   return __builtin_altivec_vcmpgtuw_p(__CR6_LT_REV, (vector unsigned int)__b,
                                       (vector unsigned int)__a);
 }
 
-static int __ATTRS_o_ai vec_any_ge(vector bool int __a,
-                                   vector unsigned int __b) {
+static __inline__ int __ATTRS_o_ai vec_any_ge(vector bool int __a,
+                                              vector unsigned int __b) {
   return __builtin_altivec_vcmpgtuw_p(__CR6_LT_REV, __b,
                                       (vector unsigned int)__a);
 }
 
-static int __ATTRS_o_ai vec_any_ge(vector bool int __a, vector bool int __b) {
+static __inline__ int __ATTRS_o_ai vec_any_ge(vector bool int __a,
+                                              vector bool int __b) {
   return __builtin_altivec_vcmpgtuw_p(__CR6_LT_REV, (vector unsigned int)__b,
                                       (vector unsigned int)__a);
 }
 
 #ifdef __POWER8_VECTOR__
-static int __ATTRS_o_ai vec_any_ge(vector signed long long __a,
-                                   vector signed long long __b) {
+static __inline__ int __ATTRS_o_ai vec_any_ge(vector signed long long __a,
+                                              vector signed long long __b) {
   return __builtin_altivec_vcmpgtsd_p(__CR6_LT_REV, __b, __a);
 }
 
-static int __ATTRS_o_ai vec_any_ge(vector unsigned long long __a,
-                                   vector unsigned long long __b) {
+static __inline__ int __ATTRS_o_ai vec_any_ge(vector unsigned long long __a,
+                                              vector unsigned long long __b) {
   return __builtin_altivec_vcmpgtud_p(__CR6_LT_REV, __b, __a);
 }
 
-static int __ATTRS_o_ai vec_any_ge(vector signed long long __a,
-                                   vector bool long long __b) {
+static __inline__ int __ATTRS_o_ai vec_any_ge(vector signed long long __a,
+                                              vector bool long long __b) {
   return __builtin_altivec_vcmpgtsd_p(__CR6_LT_REV,
                                       (vector signed long long)__b, __a);
 }
 
-static int __ATTRS_o_ai vec_any_ge(vector unsigned long long __a,
-                                   vector bool long long __b) {
+static __inline__ int __ATTRS_o_ai vec_any_ge(vector unsigned long long __a,
+                                              vector bool long long __b) {
   return __builtin_altivec_vcmpgtud_p(__CR6_LT_REV,
                                       (vector unsigned long long)__b, __a);
 }
 
-static int __ATTRS_o_ai vec_any_ge(vector bool long long __a,
-                                   vector signed long long __b) {
+static __inline__ int __ATTRS_o_ai vec_any_ge(vector bool long long __a,
+                                              vector signed long long __b) {
   return __builtin_altivec_vcmpgtud_p(__CR6_LT_REV,
                                       (vector unsigned long long)__b,
                                       (vector unsigned long long)__a);
 }
 
-static int __ATTRS_o_ai vec_any_ge(vector bool long long __a,
-                                   vector unsigned long long __b) {
+static __inline__ int __ATTRS_o_ai vec_any_ge(vector bool long long __a,
+                                              vector unsigned long long __b) {
   return __builtin_altivec_vcmpgtud_p(__CR6_LT_REV, __b,
                                       (vector unsigned long long)__a);
 }
 
-static int __ATTRS_o_ai vec_any_ge(vector bool long long __a,
-                                   vector bool long long __b) {
+static __inline__ int __ATTRS_o_ai vec_any_ge(vector bool long long __a,
+                                              vector bool long long __b) {
   return __builtin_altivec_vcmpgtud_p(__CR6_LT_REV,
                                       (vector unsigned long long)__b,
                                       (vector unsigned long long)__a);
 }
 #endif
 
-static int __ATTRS_o_ai vec_any_ge(vector float __a, vector float __b) {
+static __inline__ int __ATTRS_o_ai vec_any_ge(vector float __a,
+                                              vector float __b) {
 #ifdef __VSX__
   return __builtin_vsx_xvcmpgesp_p(__CR6_EQ_REV, __a, __b);
 #else
@@ -13059,168 +13530,177 @@ static int __ATTRS_o_ai vec_any_ge(vector float __a, vector float __b) {
 }
 
 #ifdef __VSX__
-static int __ATTRS_o_ai vec_any_ge(vector double __a, vector double __b) {
+static __inline__ int __ATTRS_o_ai vec_any_ge(vector double __a,
+                                              vector double __b) {
   return __builtin_vsx_xvcmpgedp_p(__CR6_EQ_REV, __a, __b);
 }
 #endif
 
 /* vec_any_gt */
 
-static int __ATTRS_o_ai vec_any_gt(vector signed char __a,
-                                   vector signed char __b) {
+static __inline__ int __ATTRS_o_ai vec_any_gt(vector signed char __a,
+                                              vector signed char __b) {
   return __builtin_altivec_vcmpgtsb_p(__CR6_EQ_REV, __a, __b);
 }
 
-static int __ATTRS_o_ai vec_any_gt(vector signed char __a,
-                                   vector bool char __b) {
+static __inline__ int __ATTRS_o_ai vec_any_gt(vector signed char __a,
+                                              vector bool char __b) {
   return __builtin_altivec_vcmpgtsb_p(__CR6_EQ_REV, __a,
                                       (vector signed char)__b);
 }
 
-static int __ATTRS_o_ai vec_any_gt(vector unsigned char __a,
-                                   vector unsigned char __b) {
+static __inline__ int __ATTRS_o_ai vec_any_gt(vector unsigned char __a,
+                                              vector unsigned char __b) {
   return __builtin_altivec_vcmpgtub_p(__CR6_EQ_REV, __a, __b);
 }
 
-static int __ATTRS_o_ai vec_any_gt(vector unsigned char __a,
-                                   vector bool char __b) {
+static __inline__ int __ATTRS_o_ai vec_any_gt(vector unsigned char __a,
+                                              vector bool char __b) {
   return __builtin_altivec_vcmpgtub_p(__CR6_EQ_REV, __a,
                                       (vector unsigned char)__b);
 }
 
-static int __ATTRS_o_ai vec_any_gt(vector bool char __a,
-                                   vector signed char __b) {
+static __inline__ int __ATTRS_o_ai vec_any_gt(vector bool char __a,
+                                              vector signed char __b) {
   return __builtin_altivec_vcmpgtub_p(__CR6_EQ_REV, (vector unsigned char)__a,
                                       (vector unsigned char)__b);
 }
 
-static int __ATTRS_o_ai vec_any_gt(vector bool char __a,
-                                   vector unsigned char __b) {
+static __inline__ int __ATTRS_o_ai vec_any_gt(vector bool char __a,
+                                              vector unsigned char __b) {
   return __builtin_altivec_vcmpgtub_p(__CR6_EQ_REV, (vector unsigned char)__a,
                                       __b);
 }
 
-static int __ATTRS_o_ai vec_any_gt(vector bool char __a, vector bool char __b) {
+static __inline__ int __ATTRS_o_ai vec_any_gt(vector bool char __a,
+                                              vector bool char __b) {
   return __builtin_altivec_vcmpgtub_p(__CR6_EQ_REV, (vector unsigned char)__a,
                                       (vector unsigned char)__b);
 }
 
-static int __ATTRS_o_ai vec_any_gt(vector short __a, vector short __b) {
+static __inline__ int __ATTRS_o_ai vec_any_gt(vector short __a,
+                                              vector short __b) {
   return __builtin_altivec_vcmpgtsh_p(__CR6_EQ_REV, __a, __b);
 }
 
-static int __ATTRS_o_ai vec_any_gt(vector short __a, vector bool short __b) {
+static __inline__ int __ATTRS_o_ai vec_any_gt(vector short __a,
+                                              vector bool short __b) {
   return __builtin_altivec_vcmpgtsh_p(__CR6_EQ_REV, __a, (vector short)__b);
 }
 
-static int __ATTRS_o_ai vec_any_gt(vector unsigned short __a,
-                                   vector unsigned short __b) {
+static __inline__ int __ATTRS_o_ai vec_any_gt(vector unsigned short __a,
+                                              vector unsigned short __b) {
   return __builtin_altivec_vcmpgtuh_p(__CR6_EQ_REV, __a, __b);
 }
 
-static int __ATTRS_o_ai vec_any_gt(vector unsigned short __a,
-                                   vector bool short __b) {
+static __inline__ int __ATTRS_o_ai vec_any_gt(vector unsigned short __a,
+                                              vector bool short __b) {
   return __builtin_altivec_vcmpgtuh_p(__CR6_EQ_REV, __a,
                                       (vector unsigned short)__b);
 }
 
-static int __ATTRS_o_ai vec_any_gt(vector bool short __a, vector short __b) {
+static __inline__ int __ATTRS_o_ai vec_any_gt(vector bool short __a,
+                                              vector short __b) {
   return __builtin_altivec_vcmpgtuh_p(__CR6_EQ_REV, (vector unsigned short)__a,
                                       (vector unsigned short)__b);
 }
 
-static int __ATTRS_o_ai vec_any_gt(vector bool short __a,
-                                   vector unsigned short __b) {
+static __inline__ int __ATTRS_o_ai vec_any_gt(vector bool short __a,
+                                              vector unsigned short __b) {
   return __builtin_altivec_vcmpgtuh_p(__CR6_EQ_REV, (vector unsigned short)__a,
                                       __b);
 }
 
-static int __ATTRS_o_ai vec_any_gt(vector bool short __a,
-                                   vector bool short __b) {
+static __inline__ int __ATTRS_o_ai vec_any_gt(vector bool short __a,
+                                              vector bool short __b) {
   return __builtin_altivec_vcmpgtuh_p(__CR6_EQ_REV, (vector unsigned short)__a,
                                       (vector unsigned short)__b);
 }
 
-static int __ATTRS_o_ai vec_any_gt(vector int __a, vector int __b) {
+static __inline__ int __ATTRS_o_ai vec_any_gt(vector int __a, vector int __b) {
   return __builtin_altivec_vcmpgtsw_p(__CR6_EQ_REV, __a, __b);
 }
 
-static int __ATTRS_o_ai vec_any_gt(vector int __a, vector bool int __b) {
+static __inline__ int __ATTRS_o_ai vec_any_gt(vector int __a,
+                                              vector bool int __b) {
   return __builtin_altivec_vcmpgtsw_p(__CR6_EQ_REV, __a, (vector int)__b);
 }
 
-static int __ATTRS_o_ai vec_any_gt(vector unsigned int __a,
-                                   vector unsigned int __b) {
+static __inline__ int __ATTRS_o_ai vec_any_gt(vector unsigned int __a,
+                                              vector unsigned int __b) {
   return __builtin_altivec_vcmpgtuw_p(__CR6_EQ_REV, __a, __b);
 }
 
-static int __ATTRS_o_ai vec_any_gt(vector unsigned int __a,
-                                   vector bool int __b) {
+static __inline__ int __ATTRS_o_ai vec_any_gt(vector unsigned int __a,
+                                              vector bool int __b) {
   return __builtin_altivec_vcmpgtuw_p(__CR6_EQ_REV, __a,
                                       (vector unsigned int)__b);
 }
 
-static int __ATTRS_o_ai vec_any_gt(vector bool int __a, vector int __b) {
+static __inline__ int __ATTRS_o_ai vec_any_gt(vector bool int __a,
+                                              vector int __b) {
   return __builtin_altivec_vcmpgtuw_p(__CR6_EQ_REV, (vector unsigned int)__a,
                                       (vector unsigned int)__b);
 }
 
-static int __ATTRS_o_ai vec_any_gt(vector bool int __a,
-                                   vector unsigned int __b) {
+static __inline__ int __ATTRS_o_ai vec_any_gt(vector bool int __a,
+                                              vector unsigned int __b) {
   return __builtin_altivec_vcmpgtuw_p(__CR6_EQ_REV, (vector unsigned int)__a,
                                       __b);
 }
 
-static int __ATTRS_o_ai vec_any_gt(vector bool int __a, vector bool int __b) {
+static __inline__ int __ATTRS_o_ai vec_any_gt(vector bool int __a,
+                                              vector bool int __b) {
   return __builtin_altivec_vcmpgtuw_p(__CR6_EQ_REV, (vector unsigned int)__a,
                                       (vector unsigned int)__b);
 }
 
 #ifdef __POWER8_VECTOR__
-static int __ATTRS_o_ai vec_any_gt(vector signed long long __a,
-                                   vector signed long long __b) {
+static __inline__ int __ATTRS_o_ai vec_any_gt(vector signed long long __a,
+                                              vector signed long long __b) {
   return __builtin_altivec_vcmpgtsd_p(__CR6_EQ_REV, __a, __b);
 }
 
-static int __ATTRS_o_ai vec_any_gt(vector unsigned long long __a,
-                                   vector unsigned long long __b) {
+static __inline__ int __ATTRS_o_ai vec_any_gt(vector unsigned long long __a,
+                                              vector unsigned long long __b) {
   return __builtin_altivec_vcmpgtud_p(__CR6_EQ_REV, __a, __b);
 }
 
-static int __ATTRS_o_ai vec_any_gt(vector signed long long __a,
-                                   vector bool long long __b) {
+static __inline__ int __ATTRS_o_ai vec_any_gt(vector signed long long __a,
+                                              vector bool long long __b) {
   return __builtin_altivec_vcmpgtsd_p(__CR6_EQ_REV, __a,
                                       (vector signed long long)__b);
 }
 
-static int __ATTRS_o_ai vec_any_gt(vector unsigned long long __a,
-                                   vector bool long long __b) {
+static __inline__ int __ATTRS_o_ai vec_any_gt(vector unsigned long long __a,
+                                              vector bool long long __b) {
   return __builtin_altivec_vcmpgtud_p(__CR6_EQ_REV, __a,
                                       (vector unsigned long long)__b);
 }
 
-static int __ATTRS_o_ai vec_any_gt(vector bool long long __a,
-                                   vector signed long long __b) {
+static __inline__ int __ATTRS_o_ai vec_any_gt(vector bool long long __a,
+                                              vector signed long long __b) {
   return __builtin_altivec_vcmpgtud_p(__CR6_EQ_REV,
                                       (vector unsigned long long)__a,
                                       (vector unsigned long long)__b);
 }
 
-static int __ATTRS_o_ai vec_any_gt(vector bool long long __a,
-                                   vector unsigned long long __b) {
+static __inline__ int __ATTRS_o_ai vec_any_gt(vector bool long long __a,
+                                              vector unsigned long long __b) {
   return __builtin_altivec_vcmpgtud_p(__CR6_EQ_REV,
                                       (vector unsigned long long)__a, __b);
 }
 
-static int __ATTRS_o_ai vec_any_gt(vector bool long long __a,
-                                   vector bool long long __b) {
+static __inline__ int __ATTRS_o_ai vec_any_gt(vector bool long long __a,
+                                              vector bool long long __b) {
   return __builtin_altivec_vcmpgtud_p(__CR6_EQ_REV,
                                       (vector unsigned long long)__a,
                                       (vector unsigned long long)__b);
 }
 #endif
 
-static int __ATTRS_o_ai vec_any_gt(vector float __a, vector float __b) {
+static __inline__ int __ATTRS_o_ai vec_any_gt(vector float __a,
+                                              vector float __b) {
 #ifdef __VSX__
   return __builtin_vsx_xvcmpgtsp_p(__CR6_EQ_REV, __a, __b);
 #else
@@ -13229,168 +13709,177 @@ static int __ATTRS_o_ai vec_any_gt(vector float __a, vector float __b) {
 }
 
 #ifdef __VSX__
-static int __ATTRS_o_ai vec_any_gt(vector double __a, vector double __b) {
+static __inline__ int __ATTRS_o_ai vec_any_gt(vector double __a,
+                                              vector double __b) {
   return __builtin_vsx_xvcmpgtdp_p(__CR6_EQ_REV, __a, __b);
 }
 #endif
 
 /* vec_any_le */
 
-static int __ATTRS_o_ai vec_any_le(vector signed char __a,
-                                   vector signed char __b) {
+static __inline__ int __ATTRS_o_ai vec_any_le(vector signed char __a,
+                                              vector signed char __b) {
   return __builtin_altivec_vcmpgtsb_p(__CR6_LT_REV, __a, __b);
 }
 
-static int __ATTRS_o_ai vec_any_le(vector signed char __a,
-                                   vector bool char __b) {
+static __inline__ int __ATTRS_o_ai vec_any_le(vector signed char __a,
+                                              vector bool char __b) {
   return __builtin_altivec_vcmpgtsb_p(__CR6_LT_REV, __a,
                                       (vector signed char)__b);
 }
 
-static int __ATTRS_o_ai vec_any_le(vector unsigned char __a,
-                                   vector unsigned char __b) {
+static __inline__ int __ATTRS_o_ai vec_any_le(vector unsigned char __a,
+                                              vector unsigned char __b) {
   return __builtin_altivec_vcmpgtub_p(__CR6_LT_REV, __a, __b);
 }
 
-static int __ATTRS_o_ai vec_any_le(vector unsigned char __a,
-                                   vector bool char __b) {
+static __inline__ int __ATTRS_o_ai vec_any_le(vector unsigned char __a,
+                                              vector bool char __b) {
   return __builtin_altivec_vcmpgtub_p(__CR6_LT_REV, __a,
                                       (vector unsigned char)__b);
 }
 
-static int __ATTRS_o_ai vec_any_le(vector bool char __a,
-                                   vector signed char __b) {
+static __inline__ int __ATTRS_o_ai vec_any_le(vector bool char __a,
+                                              vector signed char __b) {
   return __builtin_altivec_vcmpgtub_p(__CR6_LT_REV, (vector unsigned char)__a,
                                       (vector unsigned char)__b);
 }
 
-static int __ATTRS_o_ai vec_any_le(vector bool char __a,
-                                   vector unsigned char __b) {
+static __inline__ int __ATTRS_o_ai vec_any_le(vector bool char __a,
+                                              vector unsigned char __b) {
   return __builtin_altivec_vcmpgtub_p(__CR6_LT_REV, (vector unsigned char)__a,
                                       __b);
 }
 
-static int __ATTRS_o_ai vec_any_le(vector bool char __a, vector bool char __b) {
+static __inline__ int __ATTRS_o_ai vec_any_le(vector bool char __a,
+                                              vector bool char __b) {
   return __builtin_altivec_vcmpgtub_p(__CR6_LT_REV, (vector unsigned char)__a,
                                       (vector unsigned char)__b);
 }
 
-static int __ATTRS_o_ai vec_any_le(vector short __a, vector short __b) {
+static __inline__ int __ATTRS_o_ai vec_any_le(vector short __a,
+                                              vector short __b) {
   return __builtin_altivec_vcmpgtsh_p(__CR6_LT_REV, __a, __b);
 }
 
-static int __ATTRS_o_ai vec_any_le(vector short __a, vector bool short __b) {
+static __inline__ int __ATTRS_o_ai vec_any_le(vector short __a,
+                                              vector bool short __b) {
   return __builtin_altivec_vcmpgtsh_p(__CR6_LT_REV, __a, (vector short)__b);
 }
 
-static int __ATTRS_o_ai vec_any_le(vector unsigned short __a,
-                                   vector unsigned short __b) {
+static __inline__ int __ATTRS_o_ai vec_any_le(vector unsigned short __a,
+                                              vector unsigned short __b) {
   return __builtin_altivec_vcmpgtuh_p(__CR6_LT_REV, __a, __b);
 }
 
-static int __ATTRS_o_ai vec_any_le(vector unsigned short __a,
-                                   vector bool short __b) {
+static __inline__ int __ATTRS_o_ai vec_any_le(vector unsigned short __a,
+                                              vector bool short __b) {
   return __builtin_altivec_vcmpgtuh_p(__CR6_LT_REV, __a,
                                       (vector unsigned short)__b);
 }
 
-static int __ATTRS_o_ai vec_any_le(vector bool short __a, vector short __b) {
+static __inline__ int __ATTRS_o_ai vec_any_le(vector bool short __a,
+                                              vector short __b) {
   return __builtin_altivec_vcmpgtuh_p(__CR6_LT_REV, (vector unsigned short)__a,
                                       (vector unsigned short)__b);
 }
 
-static int __ATTRS_o_ai vec_any_le(vector bool short __a,
-                                   vector unsigned short __b) {
+static __inline__ int __ATTRS_o_ai vec_any_le(vector bool short __a,
+                                              vector unsigned short __b) {
   return __builtin_altivec_vcmpgtuh_p(__CR6_LT_REV, (vector unsigned short)__a,
                                       __b);
 }
 
-static int __ATTRS_o_ai vec_any_le(vector bool short __a,
-                                   vector bool short __b) {
+static __inline__ int __ATTRS_o_ai vec_any_le(vector bool short __a,
+                                              vector bool short __b) {
   return __builtin_altivec_vcmpgtuh_p(__CR6_LT_REV, (vector unsigned short)__a,
                                       (vector unsigned short)__b);
 }
 
-static int __ATTRS_o_ai vec_any_le(vector int __a, vector int __b) {
+static __inline__ int __ATTRS_o_ai vec_any_le(vector int __a, vector int __b) {
   return __builtin_altivec_vcmpgtsw_p(__CR6_LT_REV, __a, __b);
 }
 
-static int __ATTRS_o_ai vec_any_le(vector int __a, vector bool int __b) {
+static __inline__ int __ATTRS_o_ai vec_any_le(vector int __a,
+                                              vector bool int __b) {
   return __builtin_altivec_vcmpgtsw_p(__CR6_LT_REV, __a, (vector int)__b);
 }
 
-static int __ATTRS_o_ai vec_any_le(vector unsigned int __a,
-                                   vector unsigned int __b) {
+static __inline__ int __ATTRS_o_ai vec_any_le(vector unsigned int __a,
+                                              vector unsigned int __b) {
   return __builtin_altivec_vcmpgtuw_p(__CR6_LT_REV, __a, __b);
 }
 
-static int __ATTRS_o_ai vec_any_le(vector unsigned int __a,
-                                   vector bool int __b) {
+static __inline__ int __ATTRS_o_ai vec_any_le(vector unsigned int __a,
+                                              vector bool int __b) {
   return __builtin_altivec_vcmpgtuw_p(__CR6_LT_REV, __a,
                                       (vector unsigned int)__b);
 }
 
-static int __ATTRS_o_ai vec_any_le(vector bool int __a, vector int __b) {
+static __inline__ int __ATTRS_o_ai vec_any_le(vector bool int __a,
+                                              vector int __b) {
   return __builtin_altivec_vcmpgtuw_p(__CR6_LT_REV, (vector unsigned int)__a,
                                       (vector unsigned int)__b);
 }
 
-static int __ATTRS_o_ai vec_any_le(vector bool int __a,
-                                   vector unsigned int __b) {
+static __inline__ int __ATTRS_o_ai vec_any_le(vector bool int __a,
+                                              vector unsigned int __b) {
   return __builtin_altivec_vcmpgtuw_p(__CR6_LT_REV, (vector unsigned int)__a,
                                       __b);
 }
 
-static int __ATTRS_o_ai vec_any_le(vector bool int __a, vector bool int __b) {
+static __inline__ int __ATTRS_o_ai vec_any_le(vector bool int __a,
+                                              vector bool int __b) {
   return __builtin_altivec_vcmpgtuw_p(__CR6_LT_REV, (vector unsigned int)__a,
                                       (vector unsigned int)__b);
 }
 
 #ifdef __POWER8_VECTOR__
-static int __ATTRS_o_ai vec_any_le(vector signed long long __a,
-                                   vector signed long long __b) {
+static __inline__ int __ATTRS_o_ai vec_any_le(vector signed long long __a,
+                                              vector signed long long __b) {
   return __builtin_altivec_vcmpgtsd_p(__CR6_LT_REV, __a, __b);
 }
 
-static int __ATTRS_o_ai vec_any_le(vector unsigned long long __a,
-                                   vector unsigned long long __b) {
+static __inline__ int __ATTRS_o_ai vec_any_le(vector unsigned long long __a,
+                                              vector unsigned long long __b) {
   return __builtin_altivec_vcmpgtud_p(__CR6_LT_REV, __a, __b);
 }
 
-static int __ATTRS_o_ai vec_any_le(vector signed long long __a,
-                                   vector bool long long __b) {
+static __inline__ int __ATTRS_o_ai vec_any_le(vector signed long long __a,
+                                              vector bool long long __b) {
   return __builtin_altivec_vcmpgtsd_p(__CR6_LT_REV, __a,
                                       (vector signed long long)__b);
 }
 
-static int __ATTRS_o_ai vec_any_le(vector unsigned long long __a,
-                                   vector bool long long __b) {
+static __inline__ int __ATTRS_o_ai vec_any_le(vector unsigned long long __a,
+                                              vector bool long long __b) {
   return __builtin_altivec_vcmpgtud_p(__CR6_LT_REV, __a,
                                       (vector unsigned long long)__b);
 }
 
-static int __ATTRS_o_ai vec_any_le(vector bool long long __a,
-                                   vector signed long long __b) {
+static __inline__ int __ATTRS_o_ai vec_any_le(vector bool long long __a,
+                                              vector signed long long __b) {
   return __builtin_altivec_vcmpgtud_p(__CR6_LT_REV,
                                       (vector unsigned long long)__a,
                                       (vector unsigned long long)__b);
 }
 
-static int __ATTRS_o_ai vec_any_le(vector bool long long __a,
-                                   vector unsigned long long __b) {
+static __inline__ int __ATTRS_o_ai vec_any_le(vector bool long long __a,
+                                              vector unsigned long long __b) {
   return __builtin_altivec_vcmpgtud_p(__CR6_LT_REV,
                                       (vector unsigned long long)__a, __b);
 }
 
-static int __ATTRS_o_ai vec_any_le(vector bool long long __a,
-                                   vector bool long long __b) {
+static __inline__ int __ATTRS_o_ai vec_any_le(vector bool long long __a,
+                                              vector bool long long __b) {
   return __builtin_altivec_vcmpgtud_p(__CR6_LT_REV,
                                       (vector unsigned long long)__a,
                                       (vector unsigned long long)__b);
 }
 #endif
 
-static int __ATTRS_o_ai vec_any_le(vector float __a, vector float __b) {
+static __inline__ int __ATTRS_o_ai vec_any_le(vector float __a,
+                                              vector float __b) {
 #ifdef __VSX__
   return __builtin_vsx_xvcmpgesp_p(__CR6_EQ_REV, __b, __a);
 #else
@@ -13399,168 +13888,177 @@ static int __ATTRS_o_ai vec_any_le(vector float __a, vector float __b) {
 }
 
 #ifdef __VSX__
-static int __ATTRS_o_ai vec_any_le(vector double __a, vector double __b) {
+static __inline__ int __ATTRS_o_ai vec_any_le(vector double __a,
+                                              vector double __b) {
   return __builtin_vsx_xvcmpgedp_p(__CR6_EQ_REV, __b, __a);
 }
 #endif
 
 /* vec_any_lt */
 
-static int __ATTRS_o_ai vec_any_lt(vector signed char __a,
-                                   vector signed char __b) {
+static __inline__ int __ATTRS_o_ai vec_any_lt(vector signed char __a,
+                                              vector signed char __b) {
   return __builtin_altivec_vcmpgtsb_p(__CR6_EQ_REV, __b, __a);
 }
 
-static int __ATTRS_o_ai vec_any_lt(vector signed char __a,
-                                   vector bool char __b) {
+static __inline__ int __ATTRS_o_ai vec_any_lt(vector signed char __a,
+                                              vector bool char __b) {
   return __builtin_altivec_vcmpgtsb_p(__CR6_EQ_REV, (vector signed char)__b,
                                       __a);
 }
 
-static int __ATTRS_o_ai vec_any_lt(vector unsigned char __a,
-                                   vector unsigned char __b) {
+static __inline__ int __ATTRS_o_ai vec_any_lt(vector unsigned char __a,
+                                              vector unsigned char __b) {
   return __builtin_altivec_vcmpgtub_p(__CR6_EQ_REV, __b, __a);
 }
 
-static int __ATTRS_o_ai vec_any_lt(vector unsigned char __a,
-                                   vector bool char __b) {
+static __inline__ int __ATTRS_o_ai vec_any_lt(vector unsigned char __a,
+                                              vector bool char __b) {
   return __builtin_altivec_vcmpgtub_p(__CR6_EQ_REV, (vector unsigned char)__b,
                                       __a);
 }
 
-static int __ATTRS_o_ai vec_any_lt(vector bool char __a,
-                                   vector signed char __b) {
+static __inline__ int __ATTRS_o_ai vec_any_lt(vector bool char __a,
+                                              vector signed char __b) {
   return __builtin_altivec_vcmpgtub_p(__CR6_EQ_REV, (vector unsigned char)__b,
                                       (vector unsigned char)__a);
 }
 
-static int __ATTRS_o_ai vec_any_lt(vector bool char __a,
-                                   vector unsigned char __b) {
+static __inline__ int __ATTRS_o_ai vec_any_lt(vector bool char __a,
+                                              vector unsigned char __b) {
   return __builtin_altivec_vcmpgtub_p(__CR6_EQ_REV, __b,
                                       (vector unsigned char)__a);
 }
 
-static int __ATTRS_o_ai vec_any_lt(vector bool char __a, vector bool char __b) {
+static __inline__ int __ATTRS_o_ai vec_any_lt(vector bool char __a,
+                                              vector bool char __b) {
   return __builtin_altivec_vcmpgtub_p(__CR6_EQ_REV, (vector unsigned char)__b,
                                       (vector unsigned char)__a);
 }
 
-static int __ATTRS_o_ai vec_any_lt(vector short __a, vector short __b) {
+static __inline__ int __ATTRS_o_ai vec_any_lt(vector short __a,
+                                              vector short __b) {
   return __builtin_altivec_vcmpgtsh_p(__CR6_EQ_REV, __b, __a);
 }
 
-static int __ATTRS_o_ai vec_any_lt(vector short __a, vector bool short __b) {
+static __inline__ int __ATTRS_o_ai vec_any_lt(vector short __a,
+                                              vector bool short __b) {
   return __builtin_altivec_vcmpgtsh_p(__CR6_EQ_REV, (vector short)__b, __a);
 }
 
-static int __ATTRS_o_ai vec_any_lt(vector unsigned short __a,
-                                   vector unsigned short __b) {
+static __inline__ int __ATTRS_o_ai vec_any_lt(vector unsigned short __a,
+                                              vector unsigned short __b) {
   return __builtin_altivec_vcmpgtuh_p(__CR6_EQ_REV, __b, __a);
 }
 
-static int __ATTRS_o_ai vec_any_lt(vector unsigned short __a,
-                                   vector bool short __b) {
+static __inline__ int __ATTRS_o_ai vec_any_lt(vector unsigned short __a,
+                                              vector bool short __b) {
   return __builtin_altivec_vcmpgtuh_p(__CR6_EQ_REV, (vector unsigned short)__b,
                                       __a);
 }
 
-static int __ATTRS_o_ai vec_any_lt(vector bool short __a, vector short __b) {
+static __inline__ int __ATTRS_o_ai vec_any_lt(vector bool short __a,
+                                              vector short __b) {
   return __builtin_altivec_vcmpgtuh_p(__CR6_EQ_REV, (vector unsigned short)__b,
                                       (vector unsigned short)__a);
 }
 
-static int __ATTRS_o_ai vec_any_lt(vector bool short __a,
-                                   vector unsigned short __b) {
+static __inline__ int __ATTRS_o_ai vec_any_lt(vector bool short __a,
+                                              vector unsigned short __b) {
   return __builtin_altivec_vcmpgtuh_p(__CR6_EQ_REV, __b,
                                       (vector unsigned short)__a);
 }
 
-static int __ATTRS_o_ai vec_any_lt(vector bool short __a,
-                                   vector bool short __b) {
+static __inline__ int __ATTRS_o_ai vec_any_lt(vector bool short __a,
+                                              vector bool short __b) {
   return __builtin_altivec_vcmpgtuh_p(__CR6_EQ_REV, (vector unsigned short)__b,
                                       (vector unsigned short)__a);
 }
 
-static int __ATTRS_o_ai vec_any_lt(vector int __a, vector int __b) {
+static __inline__ int __ATTRS_o_ai vec_any_lt(vector int __a, vector int __b) {
   return __builtin_altivec_vcmpgtsw_p(__CR6_EQ_REV, __b, __a);
 }
 
-static int __ATTRS_o_ai vec_any_lt(vector int __a, vector bool int __b) {
+static __inline__ int __ATTRS_o_ai vec_any_lt(vector int __a,
+                                              vector bool int __b) {
   return __builtin_altivec_vcmpgtsw_p(__CR6_EQ_REV, (vector int)__b, __a);
 }
 
-static int __ATTRS_o_ai vec_any_lt(vector unsigned int __a,
-                                   vector unsigned int __b) {
+static __inline__ int __ATTRS_o_ai vec_any_lt(vector unsigned int __a,
+                                              vector unsigned int __b) {
   return __builtin_altivec_vcmpgtuw_p(__CR6_EQ_REV, __b, __a);
 }
 
-static int __ATTRS_o_ai vec_any_lt(vector unsigned int __a,
-                                   vector bool int __b) {
+static __inline__ int __ATTRS_o_ai vec_any_lt(vector unsigned int __a,
+                                              vector bool int __b) {
   return __builtin_altivec_vcmpgtuw_p(__CR6_EQ_REV, (vector unsigned int)__b,
                                       __a);
 }
 
-static int __ATTRS_o_ai vec_any_lt(vector bool int __a, vector int __b) {
+static __inline__ int __ATTRS_o_ai vec_any_lt(vector bool int __a,
+                                              vector int __b) {
   return __builtin_altivec_vcmpgtuw_p(__CR6_EQ_REV, (vector unsigned int)__b,
                                       (vector unsigned int)__a);
 }
 
-static int __ATTRS_o_ai vec_any_lt(vector bool int __a,
-                                   vector unsigned int __b) {
+static __inline__ int __ATTRS_o_ai vec_any_lt(vector bool int __a,
+                                              vector unsigned int __b) {
   return __builtin_altivec_vcmpgtuw_p(__CR6_EQ_REV, __b,
                                       (vector unsigned int)__a);
 }
 
-static int __ATTRS_o_ai vec_any_lt(vector bool int __a, vector bool int __b) {
+static __inline__ int __ATTRS_o_ai vec_any_lt(vector bool int __a,
+                                              vector bool int __b) {
   return __builtin_altivec_vcmpgtuw_p(__CR6_EQ_REV, (vector unsigned int)__b,
                                       (vector unsigned int)__a);
 }
 
 #ifdef __POWER8_VECTOR__
-static int __ATTRS_o_ai vec_any_lt(vector signed long long __a,
-                                   vector signed long long __b) {
+static __inline__ int __ATTRS_o_ai vec_any_lt(vector signed long long __a,
+                                              vector signed long long __b) {
   return __builtin_altivec_vcmpgtsd_p(__CR6_EQ_REV, __b, __a);
 }
 
-static int __ATTRS_o_ai vec_any_lt(vector unsigned long long __a,
-                                   vector unsigned long long __b) {
+static __inline__ int __ATTRS_o_ai vec_any_lt(vector unsigned long long __a,
+                                              vector unsigned long long __b) {
   return __builtin_altivec_vcmpgtud_p(__CR6_EQ_REV, __b, __a);
 }
 
-static int __ATTRS_o_ai vec_any_lt(vector signed long long __a,
-                                   vector bool long long __b) {
+static __inline__ int __ATTRS_o_ai vec_any_lt(vector signed long long __a,
+                                              vector bool long long __b) {
   return __builtin_altivec_vcmpgtsd_p(__CR6_EQ_REV,
                                       (vector signed long long)__b, __a);
 }
 
-static int __ATTRS_o_ai vec_any_lt(vector unsigned long long __a,
-                                   vector bool long long __b) {
+static __inline__ int __ATTRS_o_ai vec_any_lt(vector unsigned long long __a,
+                                              vector bool long long __b) {
   return __builtin_altivec_vcmpgtud_p(__CR6_EQ_REV,
                                       (vector unsigned long long)__b, __a);
 }
 
-static int __ATTRS_o_ai vec_any_lt(vector bool long long __a,
-                                   vector signed long long __b) {
+static __inline__ int __ATTRS_o_ai vec_any_lt(vector bool long long __a,
+                                              vector signed long long __b) {
   return __builtin_altivec_vcmpgtud_p(__CR6_EQ_REV,
                                       (vector unsigned long long)__b,
                                       (vector unsigned long long)__a);
 }
 
-static int __ATTRS_o_ai vec_any_lt(vector bool long long __a,
-                                   vector unsigned long long __b) {
+static __inline__ int __ATTRS_o_ai vec_any_lt(vector bool long long __a,
+                                              vector unsigned long long __b) {
   return __builtin_altivec_vcmpgtud_p(__CR6_EQ_REV, __b,
                                       (vector unsigned long long)__a);
 }
 
-static int __ATTRS_o_ai vec_any_lt(vector bool long long __a,
-                                   vector bool long long __b) {
+static __inline__ int __ATTRS_o_ai vec_any_lt(vector bool long long __a,
+                                              vector bool long long __b) {
   return __builtin_altivec_vcmpgtud_p(__CR6_EQ_REV,
                                       (vector unsigned long long)__b,
                                       (vector unsigned long long)__a);
 }
 #endif
 
-static int __ATTRS_o_ai vec_any_lt(vector float __a, vector float __b) {
+static __inline__ int __ATTRS_o_ai vec_any_lt(vector float __a,
+                                              vector float __b) {
 #ifdef __VSX__
   return __builtin_vsx_xvcmpgtsp_p(__CR6_EQ_REV, __b, __a);
 #else
@@ -13569,182 +14067,193 @@ static int __ATTRS_o_ai vec_any_lt(vector float __a, vector float __b) {
 }
 
 #ifdef __VSX__
-static int __ATTRS_o_ai vec_any_lt(vector double __a, vector double __b) {
+static __inline__ int __ATTRS_o_ai vec_any_lt(vector double __a,
+                                              vector double __b) {
   return __builtin_vsx_xvcmpgtdp_p(__CR6_EQ_REV, __b, __a);
 }
 #endif
 
 /* vec_any_nan */
 
-static int __attribute__((__always_inline__)) vec_any_nan(vector float __a) {
+static __inline__ int __attribute__((__always_inline__))
+vec_any_nan(vector float __a) {
   return __builtin_altivec_vcmpeqfp_p(__CR6_LT_REV, __a, __a);
 }
 
 /* vec_any_ne */
 
-static int __ATTRS_o_ai vec_any_ne(vector signed char __a,
-                                   vector signed char __b) {
+static __inline__ int __ATTRS_o_ai vec_any_ne(vector signed char __a,
+                                              vector signed char __b) {
   return __builtin_altivec_vcmpequb_p(__CR6_LT_REV, (vector char)__a,
                                       (vector char)__b);
 }
 
-static int __ATTRS_o_ai vec_any_ne(vector signed char __a,
-                                   vector bool char __b) {
+static __inline__ int __ATTRS_o_ai vec_any_ne(vector signed char __a,
+                                              vector bool char __b) {
   return __builtin_altivec_vcmpequb_p(__CR6_LT_REV, (vector char)__a,
                                       (vector char)__b);
 }
 
-static int __ATTRS_o_ai vec_any_ne(vector unsigned char __a,
-                                   vector unsigned char __b) {
+static __inline__ int __ATTRS_o_ai vec_any_ne(vector unsigned char __a,
+                                              vector unsigned char __b) {
   return __builtin_altivec_vcmpequb_p(__CR6_LT_REV, (vector char)__a,
                                       (vector char)__b);
 }
 
-static int __ATTRS_o_ai vec_any_ne(vector unsigned char __a,
-                                   vector bool char __b) {
+static __inline__ int __ATTRS_o_ai vec_any_ne(vector unsigned char __a,
+                                              vector bool char __b) {
   return __builtin_altivec_vcmpequb_p(__CR6_LT_REV, (vector char)__a,
                                       (vector char)__b);
 }
 
-static int __ATTRS_o_ai vec_any_ne(vector bool char __a,
-                                   vector signed char __b) {
+static __inline__ int __ATTRS_o_ai vec_any_ne(vector bool char __a,
+                                              vector signed char __b) {
   return __builtin_altivec_vcmpequb_p(__CR6_LT_REV, (vector char)__a,
                                       (vector char)__b);
 }
 
-static int __ATTRS_o_ai vec_any_ne(vector bool char __a,
-                                   vector unsigned char __b) {
+static __inline__ int __ATTRS_o_ai vec_any_ne(vector bool char __a,
+                                              vector unsigned char __b) {
   return __builtin_altivec_vcmpequb_p(__CR6_LT_REV, (vector char)__a,
                                       (vector char)__b);
 }
 
-static int __ATTRS_o_ai vec_any_ne(vector bool char __a, vector bool char __b) {
+static __inline__ int __ATTRS_o_ai vec_any_ne(vector bool char __a,
+                                              vector bool char __b) {
   return __builtin_altivec_vcmpequb_p(__CR6_LT_REV, (vector char)__a,
                                       (vector char)__b);
 }
 
-static int __ATTRS_o_ai vec_any_ne(vector short __a, vector short __b) {
+static __inline__ int __ATTRS_o_ai vec_any_ne(vector short __a,
+                                              vector short __b) {
   return __builtin_altivec_vcmpequh_p(__CR6_LT_REV, __a, __b);
 }
 
-static int __ATTRS_o_ai vec_any_ne(vector short __a, vector bool short __b) {
+static __inline__ int __ATTRS_o_ai vec_any_ne(vector short __a,
+                                              vector bool short __b) {
   return __builtin_altivec_vcmpequh_p(__CR6_LT_REV, __a, (vector short)__b);
 }
 
-static int __ATTRS_o_ai vec_any_ne(vector unsigned short __a,
-                                   vector unsigned short __b) {
+static __inline__ int __ATTRS_o_ai vec_any_ne(vector unsigned short __a,
+                                              vector unsigned short __b) {
   return __builtin_altivec_vcmpequh_p(__CR6_LT_REV, (vector short)__a,
                                       (vector short)__b);
 }
 
-static int __ATTRS_o_ai vec_any_ne(vector unsigned short __a,
-                                   vector bool short __b) {
+static __inline__ int __ATTRS_o_ai vec_any_ne(vector unsigned short __a,
+                                              vector bool short __b) {
   return __builtin_altivec_vcmpequh_p(__CR6_LT_REV, (vector short)__a,
                                       (vector short)__b);
 }
 
-static int __ATTRS_o_ai vec_any_ne(vector bool short __a, vector short __b) {
+static __inline__ int __ATTRS_o_ai vec_any_ne(vector bool short __a,
+                                              vector short __b) {
   return __builtin_altivec_vcmpequh_p(__CR6_LT_REV, (vector short)__a,
                                       (vector short)__b);
 }
 
-static int __ATTRS_o_ai vec_any_ne(vector bool short __a,
-                                   vector unsigned short __b) {
+static __inline__ int __ATTRS_o_ai vec_any_ne(vector bool short __a,
+                                              vector unsigned short __b) {
   return __builtin_altivec_vcmpequh_p(__CR6_LT_REV, (vector short)__a,
                                       (vector short)__b);
 }
 
-static int __ATTRS_o_ai vec_any_ne(vector bool short __a,
-                                   vector bool short __b) {
+static __inline__ int __ATTRS_o_ai vec_any_ne(vector bool short __a,
+                                              vector bool short __b) {
   return __builtin_altivec_vcmpequh_p(__CR6_LT_REV, (vector short)__a,
                                       (vector short)__b);
 }
 
-static int __ATTRS_o_ai vec_any_ne(vector pixel __a, vector pixel __b) {
+static __inline__ int __ATTRS_o_ai vec_any_ne(vector pixel __a,
+                                              vector pixel __b) {
   return __builtin_altivec_vcmpequh_p(__CR6_LT_REV, (vector short)__a,
                                       (vector short)__b);
 }
 
-static int __ATTRS_o_ai vec_any_ne(vector int __a, vector int __b) {
+static __inline__ int __ATTRS_o_ai vec_any_ne(vector int __a, vector int __b) {
   return __builtin_altivec_vcmpequw_p(__CR6_LT_REV, __a, __b);
 }
 
-static int __ATTRS_o_ai vec_any_ne(vector int __a, vector bool int __b) {
+static __inline__ int __ATTRS_o_ai vec_any_ne(vector int __a,
+                                              vector bool int __b) {
   return __builtin_altivec_vcmpequw_p(__CR6_LT_REV, __a, (vector int)__b);
 }
 
-static int __ATTRS_o_ai vec_any_ne(vector unsigned int __a,
-                                   vector unsigned int __b) {
+static __inline__ int __ATTRS_o_ai vec_any_ne(vector unsigned int __a,
+                                              vector unsigned int __b) {
   return __builtin_altivec_vcmpequw_p(__CR6_LT_REV, (vector int)__a,
                                       (vector int)__b);
 }
 
-static int __ATTRS_o_ai vec_any_ne(vector unsigned int __a,
-                                   vector bool int __b) {
+static __inline__ int __ATTRS_o_ai vec_any_ne(vector unsigned int __a,
+                                              vector bool int __b) {
   return __builtin_altivec_vcmpequw_p(__CR6_LT_REV, (vector int)__a,
                                       (vector int)__b);
 }
 
-static int __ATTRS_o_ai vec_any_ne(vector bool int __a, vector int __b) {
+static __inline__ int __ATTRS_o_ai vec_any_ne(vector bool int __a,
+                                              vector int __b) {
   return __builtin_altivec_vcmpequw_p(__CR6_LT_REV, (vector int)__a,
                                       (vector int)__b);
 }
 
-static int __ATTRS_o_ai vec_any_ne(vector bool int __a,
-                                   vector unsigned int __b) {
+static __inline__ int __ATTRS_o_ai vec_any_ne(vector bool int __a,
+                                              vector unsigned int __b) {
   return __builtin_altivec_vcmpequw_p(__CR6_LT_REV, (vector int)__a,
                                       (vector int)__b);
 }
 
-static int __ATTRS_o_ai vec_any_ne(vector bool int __a, vector bool int __b) {
+static __inline__ int __ATTRS_o_ai vec_any_ne(vector bool int __a,
+                                              vector bool int __b) {
   return __builtin_altivec_vcmpequw_p(__CR6_LT_REV, (vector int)__a,
                                       (vector int)__b);
 }
 
 #ifdef __POWER8_VECTOR__
-static int __ATTRS_o_ai vec_any_ne(vector signed long long __a,
-                                   vector signed long long __b) {
+static __inline__ int __ATTRS_o_ai vec_any_ne(vector signed long long __a,
+                                              vector signed long long __b) {
   return __builtin_altivec_vcmpequd_p(__CR6_LT_REV, __a, __b);
 }
 
-static int __ATTRS_o_ai vec_any_ne(vector unsigned long long __a,
-                                   vector unsigned long long __b) {
+static __inline__ int __ATTRS_o_ai vec_any_ne(vector unsigned long long __a,
+                                              vector unsigned long long __b) {
   return __builtin_altivec_vcmpequd_p(__CR6_LT_REV, (vector long long)__a,
                                       (vector long long)__b);
 }
 
-static int __ATTRS_o_ai vec_any_ne(vector signed long long __a,
-                                   vector bool long long __b) {
+static __inline__ int __ATTRS_o_ai vec_any_ne(vector signed long long __a,
+                                              vector bool long long __b) {
   return __builtin_altivec_vcmpequd_p(__CR6_LT_REV, __a,
                                       (vector signed long long)__b);
 }
 
-static int __ATTRS_o_ai vec_any_ne(vector unsigned long long __a,
-                                   vector bool long long __b) {
+static __inline__ int __ATTRS_o_ai vec_any_ne(vector unsigned long long __a,
+                                              vector bool long long __b) {
   return __builtin_altivec_vcmpequd_p(
       __CR6_LT_REV, (vector signed long long)__a, (vector signed long long)__b);
 }
 
-static int __ATTRS_o_ai vec_any_ne(vector bool long long __a,
-                                   vector signed long long __b) {
+static __inline__ int __ATTRS_o_ai vec_any_ne(vector bool long long __a,
+                                              vector signed long long __b) {
   return __builtin_altivec_vcmpequd_p(
       __CR6_LT_REV, (vector signed long long)__a, (vector signed long long)__b);
 }
 
-static int __ATTRS_o_ai vec_any_ne(vector bool long long __a,
-                                   vector unsigned long long __b) {
+static __inline__ int __ATTRS_o_ai vec_any_ne(vector bool long long __a,
+                                              vector unsigned long long __b) {
   return __builtin_altivec_vcmpequd_p(
       __CR6_LT_REV, (vector signed long long)__a, (vector signed long long)__b);
 }
 
-static int __ATTRS_o_ai vec_any_ne(vector bool long long __a,
-                                   vector bool long long __b) {
+static __inline__ int __ATTRS_o_ai vec_any_ne(vector bool long long __a,
+                                              vector bool long long __b) {
   return __builtin_altivec_vcmpequd_p(
       __CR6_LT_REV, (vector signed long long)__a, (vector signed long long)__b);
 }
 #endif
 
-static int __ATTRS_o_ai vec_any_ne(vector float __a, vector float __b) {
+static __inline__ int __ATTRS_o_ai vec_any_ne(vector float __a,
+                                              vector float __b) {
 #ifdef __VSX__
   return __builtin_vsx_xvcmpeqsp_p(__CR6_LT_REV, __a, __b);
 #else
@@ -13753,49 +14262,50 @@ static int __ATTRS_o_ai vec_any_ne(vector float __a, vector float __b) {
 }
 
 #ifdef __VSX__
-static int __ATTRS_o_ai vec_any_ne(vector double __a, vector double __b) {
+static __inline__ int __ATTRS_o_ai vec_any_ne(vector double __a,
+                                              vector double __b) {
   return __builtin_vsx_xvcmpeqdp_p(__CR6_LT_REV, __a, __b);
 }
 #endif
 
 /* vec_any_nge */
 
-static int __attribute__((__always_inline__))
+static __inline__ int __attribute__((__always_inline__))
 vec_any_nge(vector float __a, vector float __b) {
   return __builtin_altivec_vcmpgefp_p(__CR6_LT_REV, __a, __b);
 }
 
 /* vec_any_ngt */
 
-static int __attribute__((__always_inline__))
+static __inline__ int __attribute__((__always_inline__))
 vec_any_ngt(vector float __a, vector float __b) {
   return __builtin_altivec_vcmpgtfp_p(__CR6_LT_REV, __a, __b);
 }
 
 /* vec_any_nle */
 
-static int __attribute__((__always_inline__))
+static __inline__ int __attribute__((__always_inline__))
 vec_any_nle(vector float __a, vector float __b) {
   return __builtin_altivec_vcmpgefp_p(__CR6_LT_REV, __b, __a);
 }
 
 /* vec_any_nlt */
 
-static int __attribute__((__always_inline__))
+static __inline__ int __attribute__((__always_inline__))
 vec_any_nlt(vector float __a, vector float __b) {
   return __builtin_altivec_vcmpgtfp_p(__CR6_LT_REV, __b, __a);
 }
 
 /* vec_any_numeric */
 
-static int __attribute__((__always_inline__))
+static __inline__ int __attribute__((__always_inline__))
 vec_any_numeric(vector float __a) {
   return __builtin_altivec_vcmpeqfp_p(__CR6_EQ_REV, __a, __a);
 }
 
 /* vec_any_out */
 
-static int __attribute__((__always_inline__))
+static __inline__ int __attribute__((__always_inline__))
 vec_any_out(vector float __a, vector float __b) {
   return __builtin_altivec_vcmpbfp_p(__CR6_EQ_REV, __a, __b);
 }
@@ -13820,30 +14330,30 @@ provided.
 #define vec_ncipher_be __builtin_altivec_crypto_vncipher
 #define vec_ncipherlast_be __builtin_altivec_crypto_vncipherlast
 
-static vector unsigned long long __attribute__((__always_inline__))
+static __inline__ vector unsigned long long __attribute__((__always_inline__))
 __builtin_crypto_vsbox(vector unsigned long long __a) {
   return __builtin_altivec_crypto_vsbox(__a);
 }
 
-static vector unsigned long long __attribute__((__always_inline__))
+static __inline__ vector unsigned long long __attribute__((__always_inline__))
 __builtin_crypto_vcipher(vector unsigned long long __a,
                          vector unsigned long long __b) {
   return __builtin_altivec_crypto_vcipher(__a, __b);
 }
 
-static vector unsigned long long __attribute__((__always_inline__))
+static __inline__ vector unsigned long long __attribute__((__always_inline__))
 __builtin_crypto_vcipherlast(vector unsigned long long __a,
                              vector unsigned long long __b) {
   return __builtin_altivec_crypto_vcipherlast(__a, __b);
 }
 
-static vector unsigned long long __attribute__((__always_inline__))
+static __inline__ vector unsigned long long __attribute__((__always_inline__))
 __builtin_crypto_vncipher(vector unsigned long long __a,
                           vector unsigned long long __b) {
   return __builtin_altivec_crypto_vncipher(__a, __b);
 }
 
-static vector unsigned long long __attribute__((__always_inline__))
+static __inline__ vector unsigned long long __attribute__((__always_inline__))
 __builtin_crypto_vncipherlast(vector unsigned long long __a,
                               vector unsigned long long __b) {
   return __builtin_altivec_crypto_vncipherlast(__a, __b);
@@ -13852,20 +14362,20 @@ __builtin_crypto_vncipherlast(vector unsigned long long __a,
 #define __builtin_crypto_vshasigmad __builtin_altivec_crypto_vshasigmad
 #define __builtin_crypto_vshasigmaw __builtin_altivec_crypto_vshasigmaw
 
-#define vec_shasigma_be(X, Y, Z) \
-  _Generic((X), vector unsigned int: __builtin_crypto_vshasigmaw, \
-                vector unsigned long long: __builtin_crypto_vshasigmad) \
-((X), (Y), (Z))
+#define vec_shasigma_be(X, Y, Z)                                               \
+  _Generic((X), vector unsigned int                                            \
+           : __builtin_crypto_vshasigmaw, vector unsigned long long            \
+           : __builtin_crypto_vshasigmad)((X), (Y), (Z))
 #endif
 
 #ifdef __POWER8_VECTOR__
-static vector unsigned char __ATTRS_o_ai
+static __inline__ vector unsigned char __ATTRS_o_ai
 __builtin_crypto_vpermxor(vector unsigned char __a, vector unsigned char __b,
                           vector unsigned char __c) {
   return __builtin_altivec_crypto_vpermxor(__a, __b, __c);
 }
 
-static vector unsigned short __ATTRS_o_ai
+static __inline__ vector unsigned short __ATTRS_o_ai
 __builtin_crypto_vpermxor(vector unsigned short __a, vector unsigned short __b,
                           vector unsigned short __c) {
   return (vector unsigned short)__builtin_altivec_crypto_vpermxor(
@@ -13873,73 +14383,72 @@ __builtin_crypto_vpermxor(vector unsigned short __a, vector unsigned short __b,
       (vector unsigned char)__c);
 }
 
-static vector unsigned int __ATTRS_o_ai __builtin_crypto_vpermxor(
+static __inline__ vector unsigned int __ATTRS_o_ai __builtin_crypto_vpermxor(
     vector unsigned int __a, vector unsigned int __b, vector unsigned int __c) {
   return (vector unsigned int)__builtin_altivec_crypto_vpermxor(
       (vector unsigned char)__a, (vector unsigned char)__b,
       (vector unsigned char)__c);
 }
 
-static vector unsigned long long __ATTRS_o_ai __builtin_crypto_vpermxor(
-    vector unsigned long long __a, vector unsigned long long __b,
-    vector unsigned long long __c) {
+static __inline__ vector unsigned long long __ATTRS_o_ai
+__builtin_crypto_vpermxor(vector unsigned long long __a,
+                          vector unsigned long long __b,
+                          vector unsigned long long __c) {
   return (vector unsigned long long)__builtin_altivec_crypto_vpermxor(
       (vector unsigned char)__a, (vector unsigned char)__b,
       (vector unsigned char)__c);
 }
 
-static vector unsigned char __ATTRS_o_ai
+static __inline__ vector unsigned char __ATTRS_o_ai
 __builtin_crypto_vpmsumb(vector unsigned char __a, vector unsigned char __b) {
   return __builtin_altivec_crypto_vpmsumb(__a, __b);
 }
 
-static vector unsigned short __ATTRS_o_ai
+static __inline__ vector unsigned short __ATTRS_o_ai
 __builtin_crypto_vpmsumb(vector unsigned short __a, vector unsigned short __b) {
   return __builtin_altivec_crypto_vpmsumh(__a, __b);
 }
 
-static vector unsigned int __ATTRS_o_ai
+static __inline__ vector unsigned int __ATTRS_o_ai
 __builtin_crypto_vpmsumb(vector unsigned int __a, vector unsigned int __b) {
   return __builtin_altivec_crypto_vpmsumw(__a, __b);
 }
 
-static vector unsigned long long __ATTRS_o_ai
+static __inline__ vector unsigned long long __ATTRS_o_ai
 __builtin_crypto_vpmsumb(vector unsigned long long __a,
                          vector unsigned long long __b) {
   return __builtin_altivec_crypto_vpmsumd(__a, __b);
 }
 
-static vector signed char __ATTRS_o_ai vec_vgbbd (vector signed char __a)
-{
-  return __builtin_altivec_vgbbd((vector unsigned char) __a);
+static __inline__ vector signed char __ATTRS_o_ai
+vec_vgbbd(vector signed char __a) {
+  return __builtin_altivec_vgbbd((vector unsigned char)__a);
 }
 
 #define vec_pmsum_be __builtin_crypto_vpmsumb
 #define vec_gb __builtin_altivec_vgbbd
 
-static vector unsigned char __ATTRS_o_ai vec_vgbbd (vector unsigned char __a)
-{
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_vgbbd(vector unsigned char __a) {
   return __builtin_altivec_vgbbd(__a);
 }
 
-static vector long long __ATTRS_o_ai
-vec_vbpermq (vector signed char __a, vector signed char __b)
-{
-  return __builtin_altivec_vbpermq((vector unsigned char) __a,
-                                   (vector unsigned char) __b);
+static __inline__ vector long long __ATTRS_o_ai
+vec_vbpermq(vector signed char __a, vector signed char __b) {
+  return __builtin_altivec_vbpermq((vector unsigned char)__a,
+                                   (vector unsigned char)__b);
 }
 
-static vector long long __ATTRS_o_ai
-vec_vbpermq (vector unsigned char __a, vector unsigned char __b)
-{
+static __inline__ vector long long __ATTRS_o_ai
+vec_vbpermq(vector unsigned char __a, vector unsigned char __b) {
   return __builtin_altivec_vbpermq(__a, __b);
 }
 
 #ifdef __powerpc64__
-static vector unsigned long long __attribute__((__always_inline__))
-vec_bperm (vector unsigned __int128 __a, vector unsigned char __b) {
-  return __builtin_altivec_vbpermq((vector unsigned char) __a,
-                                   (vector unsigned char) __b);
+static __inline__ vector unsigned long long __attribute__((__always_inline__))
+vec_bperm(vector unsigned __int128 __a, vector unsigned char __b) {
+  return __builtin_altivec_vbpermq((vector unsigned char)__a,
+                                   (vector unsigned char)__b);
 }
 #endif
 #endif
diff --git a/contrib/llvm/tools/clang/lib/Headers/ammintrin.h b/contrib/llvm/tools/clang/lib/Headers/ammintrin.h
index 4880fd7ebad1..8985bb404f47 100644
--- a/contrib/llvm/tools/clang/lib/Headers/ammintrin.h
+++ b/contrib/llvm/tools/clang/lib/Headers/ammintrin.h
@@ -38,9 +38,7 @@
 /// __m128i _mm_extracti_si64(__m128i x, const int len, const int idx);
 /// \endcode
 ///
-/// \code
 /// This intrinsic corresponds to the \c EXTRQ instruction.
-/// \endcode
 ///
 /// \param x
 ///    The value from which bits are extracted.
@@ -49,10 +47,10 @@
 ///    are zero, the length is interpreted as 64.
 /// \param idx
 ///    Bits [5:0] specify the index of the least significant bit; the other
-///    bits are ignored. If the sum of the index and length is greater than
-///    64, the result is undefined. If the length and index are both zero,
-///    bits [63:0] of parameter x are extracted. If the length is zero
-///    but the index is non-zero, the result is undefined.
+///    bits are ignored. If the sum of the index and length is greater than 64,
+///    the result is undefined. If the length and index are both zero, bits
+///    [63:0] of parameter x are extracted. If the length is zero but the index
+///    is non-zero, the result is undefined.
 /// \returns A 128-bit integer vector whose lower 64 bits contain the bits
 ///    extracted from the source operand.
 #define _mm_extracti_si64(x, len, idx) \
@@ -64,20 +62,17 @@
 ///
 /// \headerfile <x86intrin.h>
 ///
-/// \code
 /// This intrinsic corresponds to the \c EXTRQ instruction.
-/// \endcode
 ///
 /// \param __x
 ///    The value from which bits are extracted.
 /// \param __y
-///    Specifies the index of the least significant bit at [13:8]
-///    and the length at [5:0]; all other bits are ignored.
-///    If bits [5:0] are zero, the length is interpreted as 64.
-///    If the sum of the index and length is greater than 64, the result is
-///    undefined. If the length and index are both zero, bits [63:0] of
-///    parameter __x are extracted. If the length is zero but the index is
-///    non-zero, the result is undefined.
+///    Specifies the index of the least significant bit at [13:8] and the
+///    length at [5:0]; all other bits are ignored. If bits [5:0] are zero, the
+///    length is interpreted as 64. If the sum of the index and length is
+///    greater than 64, the result is undefined. If the length and index are
+///    both zero, bits [63:0] of parameter __x are extracted. If the length is
+///    zero but the index is non-zero, the result is undefined.
 /// \returns A 128-bit vector whose lower 64 bits contain the bits extracted
 ///    from the source operand.
 static __inline__ __m128i __DEFAULT_FN_ATTRS
@@ -86,9 +81,9 @@ _mm_extract_si64(__m128i __x, __m128i __y)
   return (__m128i)__builtin_ia32_extrq((__v2di)__x, (__v16qi)__y);
 }
 
-/// \brief Inserts bits of a specified length from the source integer vector
-///    y into the lower 64 bits of the destination integer vector x at the
-///    index idx and of the length len.
+/// \brief Inserts bits of a specified length from the source integer vector y
+///    into the lower 64 bits of the destination integer vector x at the index
+///    idx and of the length len.
 ///
 /// \headerfile <x86intrin.h>
 ///
@@ -97,9 +92,7 @@ _mm_extract_si64(__m128i __x, __m128i __y)
 /// const int idx);
 /// \endcode
 ///
-/// \code
 /// This intrinsic corresponds to the \c INSERTQ instruction.
-/// \endcode
 ///
 /// \param x
 ///    The destination operand where bits will be inserted. The inserted bits
@@ -113,14 +106,14 @@ _mm_extract_si64(__m128i __x, __m128i __y)
 ///    are zero, the length is interpreted as 64.
 /// \param idx
 ///    Bits [5:0] specify the index of the least significant bit; the other
-///    bits are ignored. If the sum of the index and length is greater than
-///    64, the result is undefined. If the length and index are both zero,
-///    bits [63:0] of parameter y are inserted into parameter x. If the
-///    length is zero but the index is non-zero, the result is undefined.
-/// \returns A 128-bit integer vector containing the original lower 64-bits
-///    of destination operand x with the specified bitfields replaced by the
-///    lower bits of source operand y. The upper 64 bits of the return value
-///    are undefined.
+///    bits are ignored. If the sum of the index and length is greater than 64,
+///    the result is undefined. If the length and index are both zero, bits
+///    [63:0] of parameter y are inserted into parameter x. If the length is
+///    zero but the index is non-zero, the result is undefined.
+/// \returns A 128-bit integer vector containing the original lower 64-bits of
+///    destination operand x with the specified bitfields replaced by the lower
+///    bits of source operand y. The upper 64 bits of the return value are
+///    undefined.
 
 #define _mm_inserti_si64(x, y, len, idx) \
   ((__m128i)__builtin_ia32_insertqi((__v2di)(__m128i)(x), \
@@ -128,14 +121,12 @@ _mm_extract_si64(__m128i __x, __m128i __y)
                                     (char)(len), (char)(idx)))
 
 /// \brief Inserts bits of a specified length from the source integer vector
-///    __y into the lower 64 bits of the destination integer vector __x at
-///    the index and of the length specified by __y.
+///    __y into the lower 64 bits of the destination integer vector __x at the
+///    index and of the length specified by __y.
 ///
 /// \headerfile <x86intrin.h>
 ///
-/// \code
 /// This intrinsic corresponds to the \c INSERTQ instruction.
-/// \endcode
 ///
 /// \param __x
 ///    The destination operand where bits will be inserted. The inserted bits
@@ -145,14 +136,14 @@ _mm_extract_si64(__m128i __x, __m128i __y)
 ///    The source operand containing the bits to be extracted. The extracted
 ///    bits are the least significant bits of operand __y with length specified
 ///    by bits [69:64]. These are inserted into the destination at the index
-///    specified by bits [77:72]; all other bits are ignored.
-///    If bits [69:64] are zero, the length is interpreted as 64.
-///    If the sum of the index and length is greater than 64, the result is
-///    undefined. If the length and index are both zero, bits [63:0] of
-///    parameter __y are inserted into parameter __x. If the length
-///    is zero but the index is non-zero, the result is undefined.
-/// \returns A 128-bit integer vector containing the original lower 64-bits
-///    of destination operand __x with the specified bitfields replaced by the
+///    specified by bits [77:72]; all other bits are ignored. If bits [69:64]
+///    are zero, the length is interpreted as 64. If the sum of the index and
+///    length is greater than 64, the result is undefined. If the length and
+///    index are both zero, bits [63:0] of parameter __y are inserted into
+///    parameter __x. If the length is zero but the index is non-zero, the
+///    result is undefined.
+/// \returns A 128-bit integer vector containing the original lower 64-bits of
+///    destination operand __x with the specified bitfields replaced by the
 ///    lower bits of source operand __y. The upper 64 bits of the return value
 ///    are undefined.
 
@@ -168,15 +159,12 @@ _mm_insert_si64(__m128i __x, __m128i __y)
 ///
 /// \headerfile <x86intrin.h>
 ///
-/// \code
 /// This intrinsic corresponds to the \c MOVNTSD instruction.
-/// \endcode
 ///
 /// \param __p
 ///    The 64-bit memory location used to store the register value.
 /// \param __a
-///    The 64-bit double-precision floating-point register value to
-///    be stored.
+///    The 64-bit double-precision floating-point register value to be stored.
 static __inline__ void __DEFAULT_FN_ATTRS
 _mm_stream_sd(double *__p, __m128d __a)
 {
@@ -189,15 +177,12 @@ _mm_stream_sd(double *__p, __m128d __a)
 ///
 /// \headerfile <x86intrin.h>
 ///
-/// \code
 /// This intrinsic corresponds to the \c MOVNTSS instruction.
-/// \endcode
 ///
 /// \param __p
 ///    The 32-bit memory location used to store the register value.
 /// \param __a
-///    The 32-bit single-precision floating-point register value to
-///    be stored.
+///    The 32-bit single-precision floating-point register value to be stored.
 static __inline__ void __DEFAULT_FN_ATTRS
 _mm_stream_ss(float *__p, __m128 __a)
 {
diff --git a/contrib/llvm/tools/clang/lib/Headers/arm_acle.h b/contrib/llvm/tools/clang/lib/Headers/arm_acle.h
index 4be1d097dc5e..8423e62a381b 100644
--- a/contrib/llvm/tools/clang/lib/Headers/arm_acle.h
+++ b/contrib/llvm/tools/clang/lib/Headers/arm_acle.h
@@ -72,9 +72,11 @@ static __inline__ void __attribute__((__always_inline__, __nodebug__)) __yield(v
 
 /* 8.5 Swap */
 static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
-  __swp(uint32_t x, volatile uint32_t *p) {
+__swp(uint32_t __x, volatile uint32_t *__p) {
   uint32_t v;
-  do v = __builtin_arm_ldrex(p); while (__builtin_arm_strex(x, p));
+  do
+    v = __builtin_arm_ldrex(__p);
+  while (__builtin_arm_strex(__x, __p));
   return v;
 }
 
@@ -110,113 +112,115 @@ static __inline__ void __attribute__((__always_inline__, __nodebug__)) __nop(voi
 /* 9.2 Miscellaneous data-processing intrinsics */
 /* ROR */
 static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
-  __ror(uint32_t x, uint32_t y) {
-  y %= 32;
-  if (y == 0)  return x;
-  return (x >> y) | (x << (32 - y));
+__ror(uint32_t __x, uint32_t __y) {
+  __y %= 32;
+  if (__y == 0)
+    return __x;
+  return (__x >> __y) | (__x << (32 - __y));
 }
 
 static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
-  __rorll(uint64_t x, uint32_t y) {
-  y %= 64;
-  if (y == 0)  return x;
-  return (x >> y) | (x << (64 - y));
+__rorll(uint64_t __x, uint32_t __y) {
+  __y %= 64;
+  if (__y == 0)
+    return __x;
+  return (__x >> __y) | (__x << (64 - __y));
 }
 
 static __inline__ unsigned long __attribute__((__always_inline__, __nodebug__))
-  __rorl(unsigned long x, uint32_t y) {
+__rorl(unsigned long __x, uint32_t __y) {
 #if __SIZEOF_LONG__ == 4
-  return __ror(x, y);
+  return __ror(__x, __y);
 #else
-  return __rorll(x, y);
+  return __rorll(__x, __y);
 #endif
 }
 
 
 /* CLZ */
 static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
-  __clz(uint32_t t) {
-  return __builtin_clz(t);
+__clz(uint32_t __t) {
+  return __builtin_clz(__t);
 }
 
 static __inline__ unsigned long __attribute__((__always_inline__, __nodebug__))
-  __clzl(unsigned long t) {
-  return __builtin_clzl(t);
+__clzl(unsigned long __t) {
+  return __builtin_clzl(__t);
 }
 
 static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
-  __clzll(uint64_t t) {
-  return __builtin_clzll(t);
+__clzll(uint64_t __t) {
+  return __builtin_clzll(__t);
 }
 
 /* REV */
 static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
-  __rev(uint32_t t) {
-  return __builtin_bswap32(t);
+__rev(uint32_t __t) {
+  return __builtin_bswap32(__t);
 }
 
 static __inline__ unsigned long __attribute__((__always_inline__, __nodebug__))
-  __revl(unsigned long t) {
+__revl(unsigned long __t) {
 #if __SIZEOF_LONG__ == 4
-  return __builtin_bswap32(t);
+  return __builtin_bswap32(__t);
 #else
-  return __builtin_bswap64(t);
+  return __builtin_bswap64(__t);
 #endif
 }
 
 static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
-  __revll(uint64_t t) {
-  return __builtin_bswap64(t);
+__revll(uint64_t __t) {
+  return __builtin_bswap64(__t);
 }
 
 /* REV16 */
 static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
-  __rev16(uint32_t t) {
-  return __ror(__rev(t), 16);
+__rev16(uint32_t __t) {
+  return __ror(__rev(__t), 16);
 }
 
 static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
-  __rev16ll(uint64_t t) {
-  return (((uint64_t)__rev16(t >> 32)) << 32) | __rev16(t);
+__rev16ll(uint64_t __t) {
+  return (((uint64_t)__rev16(__t >> 32)) << 32) | __rev16(__t);
 }
 
 static __inline__ unsigned long __attribute__((__always_inline__, __nodebug__))
-  __rev16l(unsigned long t) {
+__rev16l(unsigned long __t) {
 #if __SIZEOF_LONG__ == 4
-    return __rev16(t);
+    return __rev16(__t);
 #else
-    return __rev16ll(t);
+    return __rev16ll(__t);
 #endif
 }
 
 /* REVSH */
 static __inline__ int16_t __attribute__((__always_inline__, __nodebug__))
-  __revsh(int16_t t) {
-  return __builtin_bswap16(t);
+__revsh(int16_t __t) {
+  return __builtin_bswap16(__t);
 }
 
 /* RBIT */
 static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
-  __rbit(uint32_t t) {
-  return __builtin_arm_rbit(t);
+__rbit(uint32_t __t) {
+  return __builtin_arm_rbit(__t);
 }
 
 static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
-  __rbitll(uint64_t t) {
+__rbitll(uint64_t __t) {
 #if __ARM_32BIT_STATE
-  return (((uint64_t) __builtin_arm_rbit(t)) << 32) |
-    __builtin_arm_rbit(t >> 32);
+  return (((uint64_t)__builtin_arm_rbit(__t)) << 32) |
+         __builtin_arm_rbit(__t >> 32);
 #else
-  return __builtin_arm_rbit64(t);
+  return __builtin_arm_rbit64(__t);
 #endif
 }
 
 static __inline__ unsigned long __attribute__((__always_inline__, __nodebug__))
-  __rbitl(unsigned long t) {
+__rbitl(unsigned long __t) {
 #if __SIZEOF_LONG__ == 4
-  return __rbit(t);
+  return __rbit(__t);
 #else
-  return __rbitll(t);
+  return __rbitll(__t);
 #endif
 }
 
@@ -235,61 +239,61 @@ static __inline__ unsigned long __attribute__((__always_inline__, __nodebug__))
 /* 9.4.2 Saturating addition and subtraction intrinsics */
 #if __ARM_32BIT_STATE
 static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
-  __qadd(int32_t t, int32_t v) {
-  return __builtin_arm_qadd(t, v);
+__qadd(int32_t __t, int32_t __v) {
+  return __builtin_arm_qadd(__t, __v);
 }
 
 static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
-  __qsub(int32_t t, int32_t v) {
-  return __builtin_arm_qsub(t, v);
+__qsub(int32_t __t, int32_t __v) {
+  return __builtin_arm_qsub(__t, __v);
 }
 
 static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
-__qdbl(int32_t t) {
-  return __builtin_arm_qadd(t, t);
+__qdbl(int32_t __t) {
+  return __builtin_arm_qadd(__t, __t);
 }
 #endif
 
 /* 9.7 CRC32 intrinsics */
 #if __ARM_FEATURE_CRC32
 static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
-  __crc32b(uint32_t a, uint8_t b) {
-  return __builtin_arm_crc32b(a, b);
+__crc32b(uint32_t __a, uint8_t __b) {
+  return __builtin_arm_crc32b(__a, __b);
 }
 
 static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
-  __crc32h(uint32_t a, uint16_t b) {
-  return __builtin_arm_crc32h(a, b);
+__crc32h(uint32_t __a, uint16_t __b) {
+  return __builtin_arm_crc32h(__a, __b);
 }
 
 static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
-  __crc32w(uint32_t a, uint32_t b) {
-  return __builtin_arm_crc32w(a, b);
+__crc32w(uint32_t __a, uint32_t __b) {
+  return __builtin_arm_crc32w(__a, __b);
 }
 
 static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
-  __crc32d(uint32_t a, uint64_t b) {
-  return __builtin_arm_crc32d(a, b);
+__crc32d(uint32_t __a, uint64_t __b) {
+  return __builtin_arm_crc32d(__a, __b);
 }
 
 static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
-  __crc32cb(uint32_t a, uint8_t b) {
-  return __builtin_arm_crc32cb(a, b);
+__crc32cb(uint32_t __a, uint8_t __b) {
+  return __builtin_arm_crc32cb(__a, __b);
 }
 
 static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
-  __crc32ch(uint32_t a, uint16_t b) {
-  return __builtin_arm_crc32ch(a, b);
+__crc32ch(uint32_t __a, uint16_t __b) {
+  return __builtin_arm_crc32ch(__a, __b);
 }
 
 static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
-  __crc32cw(uint32_t a, uint32_t b) {
-  return __builtin_arm_crc32cw(a, b);
+__crc32cw(uint32_t __a, uint32_t __b) {
+  return __builtin_arm_crc32cw(__a, __b);
 }
 
 static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
-  __crc32cd(uint32_t a, uint64_t b) {
-  return __builtin_arm_crc32cd(a, b);
+__crc32cd(uint32_t __a, uint64_t __b) {
+  return __builtin_arm_crc32cd(__a, __b);
 }
 #endif
 
diff --git a/contrib/llvm/tools/clang/lib/Headers/avx2intrin.h b/contrib/llvm/tools/clang/lib/Headers/avx2intrin.h
index f786572dae7d..13bcbef4dbbe 100644
--- a/contrib/llvm/tools/clang/lib/Headers/avx2intrin.h
+++ b/contrib/llvm/tools/clang/lib/Headers/avx2intrin.h
@@ -32,7 +32,9 @@
 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx2")))
 
 /* SSE4 Multiple Packed Sums of Absolute Difference.  */
-#define _mm256_mpsadbw_epu8(X, Y, M) __builtin_ia32_mpsadbw256((X), (Y), (M))
+#define _mm256_mpsadbw_epu8(X, Y, M) \
+  (__m256i)__builtin_ia32_mpsadbw256((__v32qi)(__m256i)(X), \
+                                     (__v32qi)(__m256i)(Y), (int)(M))
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_abs_epi8(__m256i __a)
@@ -79,25 +81,25 @@ _mm256_packus_epi32(__m256i __V1, __m256i __V2)
 static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_add_epi8(__m256i __a, __m256i __b)
 {
-  return (__m256i)((__v32qi)__a + (__v32qi)__b);
+  return (__m256i)((__v32qu)__a + (__v32qu)__b);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_add_epi16(__m256i __a, __m256i __b)
 {
-  return (__m256i)((__v16hi)__a + (__v16hi)__b);
+  return (__m256i)((__v16hu)__a + (__v16hu)__b);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_add_epi32(__m256i __a, __m256i __b)
 {
-  return (__m256i)((__v8si)__a + (__v8si)__b);
+  return (__m256i)((__v8su)__a + (__v8su)__b);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_add_epi64(__m256i __a, __m256i __b)
 {
-  return __a + __b;
+  return (__m256i)((__v4du)__a + (__v4du)__b);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
@@ -131,13 +133,13 @@ _mm256_adds_epu16(__m256i __a, __m256i __b)
 static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_and_si256(__m256i __a, __m256i __b)
 {
-  return __a & __b;
+  return (__m256i)((__v4du)__a & (__v4du)__b);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_andnot_si256(__m256i __a, __m256i __b)
 {
-  return ~__a & __b;
+  return (__m256i)(~(__v4du)__a & (__v4du)__b);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
@@ -200,7 +202,7 @@ _mm256_cmpeq_epi32(__m256i __a, __m256i __b)
 static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_cmpeq_epi64(__m256i __a, __m256i __b)
 {
-  return (__m256i)(__a == __b);
+  return (__m256i)((__v4di)__a == (__v4di)__b);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
@@ -226,7 +228,7 @@ _mm256_cmpgt_epi32(__m256i __a, __m256i __b)
 static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_cmpgt_epi64(__m256i __a, __m256i __b)
 {
-  return (__m256i)(__a > __b);
+  return (__m256i)((__v4di)__a > (__v4di)__b);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
@@ -358,73 +360,79 @@ _mm256_movemask_epi8(__m256i __a)
 static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_cvtepi8_epi16(__m128i __V)
 {
-  return (__m256i)__builtin_ia32_pmovsxbw256((__v16qi)__V);
+  /* This function always performs a signed extension, but __v16qi is a char
+     which may be signed or unsigned, so use __v16qs. */
+  return (__m256i)__builtin_convertvector((__v16qs)__V, __v16hi);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_cvtepi8_epi32(__m128i __V)
 {
-  return (__m256i)__builtin_ia32_pmovsxbd256((__v16qi)__V);
+  /* This function always performs a signed extension, but __v16qi is a char
+     which may be signed or unsigned, so use __v16qs. */
+  return (__m256i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__V, (__v16qs)__V, 0, 1, 2, 3, 4, 5, 6, 7), __v8si);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_cvtepi8_epi64(__m128i __V)
 {
-  return (__m256i)__builtin_ia32_pmovsxbq256((__v16qi)__V);
+  /* This function always performs a signed extension, but __v16qi is a char
+     which may be signed or unsigned, so use __v16qs. */
+  return (__m256i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__V, (__v16qs)__V, 0, 1, 2, 3), __v4di);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_cvtepi16_epi32(__m128i __V)
 {
-  return (__m256i)__builtin_ia32_pmovsxwd256((__v8hi)__V);
+  return (__m256i)__builtin_convertvector((__v8hi)__V, __v8si);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_cvtepi16_epi64(__m128i __V)
 {
-  return (__m256i)__builtin_ia32_pmovsxwq256((__v8hi)__V);
+  return (__m256i)__builtin_convertvector(__builtin_shufflevector((__v8hi)__V, (__v8hi)__V, 0, 1, 2, 3), __v4di);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_cvtepi32_epi64(__m128i __V)
 {
-  return (__m256i)__builtin_ia32_pmovsxdq256((__v4si)__V);
+  return (__m256i)__builtin_convertvector((__v4si)__V, __v4di);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_cvtepu8_epi16(__m128i __V)
 {
-  return (__m256i)__builtin_ia32_pmovzxbw256((__v16qi)__V);
+  return (__m256i)__builtin_convertvector((__v16qu)__V, __v16hi);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_cvtepu8_epi32(__m128i __V)
 {
-  return (__m256i)__builtin_ia32_pmovzxbd256((__v16qi)__V);
+  return (__m256i)__builtin_convertvector(__builtin_shufflevector((__v16qu)__V, (__v16qu)__V, 0, 1, 2, 3, 4, 5, 6, 7), __v8si);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_cvtepu8_epi64(__m128i __V)
 {
-  return (__m256i)__builtin_ia32_pmovzxbq256((__v16qi)__V);
+  return (__m256i)__builtin_convertvector(__builtin_shufflevector((__v16qu)__V, (__v16qu)__V, 0, 1, 2, 3), __v4di);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_cvtepu16_epi32(__m128i __V)
 {
-  return (__m256i)__builtin_ia32_pmovzxwd256((__v8hi)__V);
+  return (__m256i)__builtin_convertvector((__v8hu)__V, __v8si);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_cvtepu16_epi64(__m128i __V)
 {
-  return (__m256i)__builtin_ia32_pmovzxwq256((__v8hi)__V);
+  return (__m256i)__builtin_convertvector(__builtin_shufflevector((__v8hu)__V, (__v8hu)__V, 0, 1, 2, 3), __v4di);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_cvtepu32_epi64(__m128i __V)
 {
-  return (__m256i)__builtin_ia32_pmovzxdq256((__v4si)__V);
+  return (__m256i)__builtin_convertvector((__v4su)__V, __v4di);
 }
 
 static __inline__  __m256i __DEFAULT_FN_ATTRS
@@ -454,13 +462,13 @@ _mm256_mulhi_epi16(__m256i __a, __m256i __b)
 static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_mullo_epi16(__m256i __a, __m256i __b)
 {
-  return (__m256i)((__v16hi)__a * (__v16hi)__b);
+  return (__m256i)((__v16hu)__a * (__v16hu)__b);
 }
 
 static __inline__  __m256i __DEFAULT_FN_ATTRS
 _mm256_mullo_epi32 (__m256i __a, __m256i __b)
 {
-  return (__m256i)((__v8si)__a * (__v8si)__b);
+  return (__m256i)((__v8su)__a * (__v8su)__b);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
@@ -472,7 +480,7 @@ _mm256_mul_epu32(__m256i __a, __m256i __b)
 static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_or_si256(__m256i __a, __m256i __b)
 {
-  return __a | __b;
+  return (__m256i)((__v4du)__a | (__v4du)__b);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
@@ -489,38 +497,42 @@ _mm256_shuffle_epi8(__m256i __a, __m256i __b)
 
 #define _mm256_shuffle_epi32(a, imm) __extension__ ({ \
   (__m256i)__builtin_shufflevector((__v8si)(__m256i)(a), \
-                                   (__v8si)_mm256_setzero_si256(), \
-                                   (imm) & 0x3, ((imm) & 0xc) >> 2, \
-                                   ((imm) & 0x30) >> 4, ((imm) & 0xc0) >> 6, \
-                                   4 + (((imm) & 0x03) >> 0), \
-                                   4 + (((imm) & 0x0c) >> 2), \
-                                   4 + (((imm) & 0x30) >> 4), \
-                                   4 + (((imm) & 0xc0) >> 6)); })
+                                   (__v8si)_mm256_undefined_si256(), \
+                                   0 + (((imm) >> 0) & 0x3), \
+                                   0 + (((imm) >> 2) & 0x3), \
+                                   0 + (((imm) >> 4) & 0x3), \
+                                   0 + (((imm) >> 6) & 0x3), \
+                                   4 + (((imm) >> 0) & 0x3), \
+                                   4 + (((imm) >> 2) & 0x3), \
+                                   4 + (((imm) >> 4) & 0x3), \
+                                   4 + (((imm) >> 6) & 0x3)); })
 
 #define _mm256_shufflehi_epi16(a, imm) __extension__ ({ \
   (__m256i)__builtin_shufflevector((__v16hi)(__m256i)(a), \
-                                   (__v16hi)_mm256_setzero_si256(), \
+                                   (__v16hi)_mm256_undefined_si256(), \
                                    0, 1, 2, 3, \
-                                   4 + (((imm) & 0x03) >> 0), \
-                                   4 + (((imm) & 0x0c) >> 2), \
-                                   4 + (((imm) & 0x30) >> 4), \
-                                   4 + (((imm) & 0xc0) >> 6), \
+                                   4  + (((imm) >> 0) & 0x3), \
+                                   4  + (((imm) >> 2) & 0x3), \
+                                   4  + (((imm) >> 4) & 0x3), \
+                                   4  + (((imm) >> 6) & 0x3), \
                                    8, 9, 10, 11, \
-                                   12 + (((imm) & 0x03) >> 0), \
-                                   12 + (((imm) & 0x0c) >> 2), \
-                                   12 + (((imm) & 0x30) >> 4), \
-                                   12 + (((imm) & 0xc0) >> 6)); })
+                                   12 + (((imm) >> 0) & 0x3), \
+                                   12 + (((imm) >> 2) & 0x3), \
+                                   12 + (((imm) >> 4) & 0x3), \
+                                   12 + (((imm) >> 6) & 0x3)); })
 
 #define _mm256_shufflelo_epi16(a, imm) __extension__ ({ \
   (__m256i)__builtin_shufflevector((__v16hi)(__m256i)(a), \
-                                   (__v16hi)_mm256_setzero_si256(), \
-                                   (imm) & 0x3,((imm) & 0xc) >> 2, \
-                                   ((imm) & 0x30) >> 4, ((imm) & 0xc0) >> 6, \
+                                   (__v16hi)_mm256_undefined_si256(), \
+                                   0 + (((imm) >> 0) & 0x3), \
+                                   0 + (((imm) >> 2) & 0x3), \
+                                   0 + (((imm) >> 4) & 0x3), \
+                                   0 + (((imm) >> 6) & 0x3), \
                                    4, 5, 6, 7, \
-                                   8 + (((imm) & 0x03) >> 0), \
-                                   8 + (((imm) & 0x0c) >> 2), \
-                                   8 + (((imm) & 0x30) >> 4), \
-                                   8 + (((imm) & 0xc0) >> 6), \
+                                   8 + (((imm) >> 0) & 0x3), \
+                                   8 + (((imm) >> 2) & 0x3), \
+                                   8 + (((imm) >> 4) & 0x3), \
+                                   8 + (((imm) >> 6) & 0x3), \
                                    12, 13, 14, 15); })
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
@@ -541,8 +553,42 @@ _mm256_sign_epi32(__m256i __a, __m256i __b)
     return (__m256i)__builtin_ia32_psignd256((__v8si)__a, (__v8si)__b);
 }
 
-#define _mm256_slli_si256(a, count) __extension__ ({ \
-  (__m256i)__builtin_ia32_pslldqi256((__m256i)(a), (count)*8); })
+#define _mm256_slli_si256(a, imm) __extension__ ({ \
+  (__m256i)__builtin_shufflevector(                                          \
+        (__v32qi)_mm256_setzero_si256(),                                     \
+        (__v32qi)(__m256i)(a),                                               \
+        ((char)(imm)&0xF0) ?  0 : ((char)(imm)>0x0 ? 16 : 32) - (char)(imm), \
+        ((char)(imm)&0xF0) ?  1 : ((char)(imm)>0x1 ? 17 : 33) - (char)(imm), \
+        ((char)(imm)&0xF0) ?  2 : ((char)(imm)>0x2 ? 18 : 34) - (char)(imm), \
+        ((char)(imm)&0xF0) ?  3 : ((char)(imm)>0x3 ? 19 : 35) - (char)(imm), \
+        ((char)(imm)&0xF0) ?  4 : ((char)(imm)>0x4 ? 20 : 36) - (char)(imm), \
+        ((char)(imm)&0xF0) ?  5 : ((char)(imm)>0x5 ? 21 : 37) - (char)(imm), \
+        ((char)(imm)&0xF0) ?  6 : ((char)(imm)>0x6 ? 22 : 38) - (char)(imm), \
+        ((char)(imm)&0xF0) ?  7 : ((char)(imm)>0x7 ? 23 : 39) - (char)(imm), \
+        ((char)(imm)&0xF0) ?  8 : ((char)(imm)>0x8 ? 24 : 40) - (char)(imm), \
+        ((char)(imm)&0xF0) ?  9 : ((char)(imm)>0x9 ? 25 : 41) - (char)(imm), \
+        ((char)(imm)&0xF0) ? 10 : ((char)(imm)>0xA ? 26 : 42) - (char)(imm), \
+        ((char)(imm)&0xF0) ? 11 : ((char)(imm)>0xB ? 27 : 43) - (char)(imm), \
+        ((char)(imm)&0xF0) ? 12 : ((char)(imm)>0xC ? 28 : 44) - (char)(imm), \
+        ((char)(imm)&0xF0) ? 13 : ((char)(imm)>0xD ? 29 : 45) - (char)(imm), \
+        ((char)(imm)&0xF0) ? 14 : ((char)(imm)>0xE ? 30 : 46) - (char)(imm), \
+        ((char)(imm)&0xF0) ? 15 : ((char)(imm)>0xF ? 31 : 47) - (char)(imm), \
+        ((char)(imm)&0xF0) ? 16 : ((char)(imm)>0x0 ? 32 : 48) - (char)(imm), \
+        ((char)(imm)&0xF0) ? 17 : ((char)(imm)>0x1 ? 33 : 49) - (char)(imm), \
+        ((char)(imm)&0xF0) ? 18 : ((char)(imm)>0x2 ? 34 : 50) - (char)(imm), \
+        ((char)(imm)&0xF0) ? 19 : ((char)(imm)>0x3 ? 35 : 51) - (char)(imm), \
+        ((char)(imm)&0xF0) ? 20 : ((char)(imm)>0x4 ? 36 : 52) - (char)(imm), \
+        ((char)(imm)&0xF0) ? 21 : ((char)(imm)>0x5 ? 37 : 53) - (char)(imm), \
+        ((char)(imm)&0xF0) ? 22 : ((char)(imm)>0x6 ? 38 : 54) - (char)(imm), \
+        ((char)(imm)&0xF0) ? 23 : ((char)(imm)>0x7 ? 39 : 55) - (char)(imm), \
+        ((char)(imm)&0xF0) ? 24 : ((char)(imm)>0x8 ? 40 : 56) - (char)(imm), \
+        ((char)(imm)&0xF0) ? 25 : ((char)(imm)>0x9 ? 41 : 57) - (char)(imm), \
+        ((char)(imm)&0xF0) ? 26 : ((char)(imm)>0xA ? 42 : 58) - (char)(imm), \
+        ((char)(imm)&0xF0) ? 27 : ((char)(imm)>0xB ? 43 : 59) - (char)(imm), \
+        ((char)(imm)&0xF0) ? 28 : ((char)(imm)>0xC ? 44 : 60) - (char)(imm), \
+        ((char)(imm)&0xF0) ? 29 : ((char)(imm)>0xD ? 45 : 61) - (char)(imm), \
+        ((char)(imm)&0xF0) ? 30 : ((char)(imm)>0xE ? 46 : 62) - (char)(imm), \
+        ((char)(imm)&0xF0) ? 31 : ((char)(imm)>0xF ? 47 : 63) - (char)(imm)); })
 
 #define _mm256_bslli_epi128(a, count) _mm256_slli_si256((a), (count))
 
@@ -573,13 +619,13 @@ _mm256_sll_epi32(__m256i __a, __m128i __count)
 static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_slli_epi64(__m256i __a, int __count)
 {
-  return __builtin_ia32_psllqi256(__a, __count);
+  return __builtin_ia32_psllqi256((__v4di)__a, __count);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_sll_epi64(__m256i __a, __m128i __count)
 {
-  return __builtin_ia32_psllq256(__a, __count);
+  return __builtin_ia32_psllq256((__v4di)__a, __count);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
@@ -606,8 +652,42 @@ _mm256_sra_epi32(__m256i __a, __m128i __count)
   return (__m256i)__builtin_ia32_psrad256((__v8si)__a, (__v4si)__count);
 }
 
-#define _mm256_srli_si256(a, count) __extension__ ({ \
-  (__m256i)__builtin_ia32_psrldqi256((__m256i)(a), (count)*8); })
+#define _mm256_srli_si256(a, imm) __extension__ ({ \
+  (__m256i)__builtin_shufflevector(                                           \
+        (__v32qi)(__m256i)(a),                                               \
+        (__v32qi)_mm256_setzero_si256(),                                     \
+        ((char)(imm)&0xF0) ? 32 : (char)(imm) + ((char)(imm)>0xF ? 16 : 0),  \
+        ((char)(imm)&0xF0) ? 33 : (char)(imm) + ((char)(imm)>0xE ? 17 : 1),  \
+        ((char)(imm)&0xF0) ? 34 : (char)(imm) + ((char)(imm)>0xD ? 18 : 2),  \
+        ((char)(imm)&0xF0) ? 35 : (char)(imm) + ((char)(imm)>0xC ? 19 : 3),  \
+        ((char)(imm)&0xF0) ? 36 : (char)(imm) + ((char)(imm)>0xB ? 20 : 4),  \
+        ((char)(imm)&0xF0) ? 37 : (char)(imm) + ((char)(imm)>0xA ? 21 : 5),  \
+        ((char)(imm)&0xF0) ? 38 : (char)(imm) + ((char)(imm)>0x9 ? 22 : 6),  \
+        ((char)(imm)&0xF0) ? 39 : (char)(imm) + ((char)(imm)>0x8 ? 23 : 7),  \
+        ((char)(imm)&0xF0) ? 40 : (char)(imm) + ((char)(imm)>0x7 ? 24 : 8),  \
+        ((char)(imm)&0xF0) ? 41 : (char)(imm) + ((char)(imm)>0x6 ? 25 : 9),  \
+        ((char)(imm)&0xF0) ? 42 : (char)(imm) + ((char)(imm)>0x5 ? 26 : 10), \
+        ((char)(imm)&0xF0) ? 43 : (char)(imm) + ((char)(imm)>0x4 ? 27 : 11), \
+        ((char)(imm)&0xF0) ? 44 : (char)(imm) + ((char)(imm)>0x3 ? 28 : 12), \
+        ((char)(imm)&0xF0) ? 45 : (char)(imm) + ((char)(imm)>0x2 ? 29 : 13), \
+        ((char)(imm)&0xF0) ? 46 : (char)(imm) + ((char)(imm)>0x1 ? 30 : 14), \
+        ((char)(imm)&0xF0) ? 47 : (char)(imm) + ((char)(imm)>0x0 ? 31 : 15), \
+        ((char)(imm)&0xF0) ? 48 : (char)(imm) + ((char)(imm)>0xF ? 32 : 16), \
+        ((char)(imm)&0xF0) ? 49 : (char)(imm) + ((char)(imm)>0xE ? 33 : 17), \
+        ((char)(imm)&0xF0) ? 50 : (char)(imm) + ((char)(imm)>0xD ? 34 : 18), \
+        ((char)(imm)&0xF0) ? 51 : (char)(imm) + ((char)(imm)>0xC ? 35 : 19), \
+        ((char)(imm)&0xF0) ? 52 : (char)(imm) + ((char)(imm)>0xB ? 36 : 20), \
+        ((char)(imm)&0xF0) ? 53 : (char)(imm) + ((char)(imm)>0xA ? 37 : 21), \
+        ((char)(imm)&0xF0) ? 54 : (char)(imm) + ((char)(imm)>0x9 ? 38 : 22), \
+        ((char)(imm)&0xF0) ? 55 : (char)(imm) + ((char)(imm)>0x8 ? 39 : 23), \
+        ((char)(imm)&0xF0) ? 56 : (char)(imm) + ((char)(imm)>0x7 ? 40 : 24), \
+        ((char)(imm)&0xF0) ? 57 : (char)(imm) + ((char)(imm)>0x6 ? 41 : 25), \
+        ((char)(imm)&0xF0) ? 58 : (char)(imm) + ((char)(imm)>0x5 ? 42 : 26), \
+        ((char)(imm)&0xF0) ? 59 : (char)(imm) + ((char)(imm)>0x4 ? 43 : 27), \
+        ((char)(imm)&0xF0) ? 60 : (char)(imm) + ((char)(imm)>0x3 ? 44 : 28), \
+        ((char)(imm)&0xF0) ? 61 : (char)(imm) + ((char)(imm)>0x2 ? 45 : 29), \
+        ((char)(imm)&0xF0) ? 62 : (char)(imm) + ((char)(imm)>0x1 ? 46 : 30), \
+        ((char)(imm)&0xF0) ? 63 : (char)(imm) + ((char)(imm)>0x0 ? 47 : 31)); })
 
 #define _mm256_bsrli_epi128(a, count) _mm256_srli_si256((a), (count))
 
@@ -638,37 +718,37 @@ _mm256_srl_epi32(__m256i __a, __m128i __count)
 static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_srli_epi64(__m256i __a, int __count)
 {
-  return __builtin_ia32_psrlqi256(__a, __count);
+  return __builtin_ia32_psrlqi256((__v4di)__a, __count);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_srl_epi64(__m256i __a, __m128i __count)
 {
-  return __builtin_ia32_psrlq256(__a, __count);
+  return __builtin_ia32_psrlq256((__v4di)__a, __count);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_sub_epi8(__m256i __a, __m256i __b)
 {
-  return (__m256i)((__v32qi)__a - (__v32qi)__b);
+  return (__m256i)((__v32qu)__a - (__v32qu)__b);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_sub_epi16(__m256i __a, __m256i __b)
 {
-  return (__m256i)((__v16hi)__a - (__v16hi)__b);
+  return (__m256i)((__v16hu)__a - (__v16hu)__b);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_sub_epi32(__m256i __a, __m256i __b)
 {
-  return (__m256i)((__v8si)__a - (__v8si)__b);
+  return (__m256i)((__v8su)__a - (__v8su)__b);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_sub_epi64(__m256i __a, __m256i __b)
 {
-  return __a - __b;
+  return (__m256i)((__v4du)__a - (__v4du)__b);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
@@ -716,7 +796,7 @@ _mm256_unpackhi_epi32(__m256i __a, __m256i __b)
 static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_unpackhi_epi64(__m256i __a, __m256i __b)
 {
-  return (__m256i)__builtin_shufflevector(__a, __b, 1, 4+1, 3, 4+3);
+  return (__m256i)__builtin_shufflevector((__v4di)__a, (__v4di)__b, 1, 4+1, 3, 4+3);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
@@ -740,13 +820,13 @@ _mm256_unpacklo_epi32(__m256i __a, __m256i __b)
 static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_unpacklo_epi64(__m256i __a, __m256i __b)
 {
-  return (__m256i)__builtin_shufflevector(__a, __b, 0, 4+0, 2, 4+2);
+  return (__m256i)__builtin_shufflevector((__v4di)__a, (__v4di)__b, 0, 4+0, 2, 4+2);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_xor_si256(__m256i __a, __m256i __b)
 {
-  return __a ^ __b;
+  return (__m256i)((__v4du)__a ^ (__v4du)__b);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
@@ -764,7 +844,7 @@ _mm_broadcastss_ps(__m128 __X)
 static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_broadcastsd_pd(__m128d __a)
 {
-  return __builtin_shufflevector(__a, __a, 0, 0);
+  return __builtin_shufflevector((__v2df)__a, (__v2df)__a, 0, 0);
 }
 
 static __inline__ __m256 __DEFAULT_FN_ATTRS
@@ -782,7 +862,7 @@ _mm256_broadcastsd_pd(__m128d __X)
 static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_broadcastsi128_si256(__m128i __X)
 {
-  return (__m256i)__builtin_shufflevector(__X, __X, 0, 1, 0, 1);
+  return (__m256i)__builtin_shufflevector((__v2di)__X, (__v2di)__X, 0, 1, 0, 1);
 }
 
 #define _mm_blend_epi32(V1, V2, M) __extension__ ({ \
@@ -826,7 +906,7 @@ _mm256_broadcastd_epi32(__m128i __X)
 static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_broadcastq_epi64(__m128i __X)
 {
-  return (__m256i)__builtin_shufflevector(__X, __X, 0, 0, 0, 0);
+  return (__m256i)__builtin_shufflevector((__v2di)__X, (__v2di)__X, 0, 0, 0, 0);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS
@@ -851,7 +931,7 @@ _mm_broadcastd_epi32(__m128i __X)
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_broadcastq_epi64(__m128i __X)
 {
-  return (__m128i)__builtin_shufflevector(__X, __X, 0, 0);
+  return (__m128i)__builtin_shufflevector((__v2di)__X, (__v2di)__X, 0, 0);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
@@ -862,9 +942,11 @@ _mm256_permutevar8x32_epi32(__m256i __a, __m256i __b)
 
 #define _mm256_permute4x64_pd(V, M) __extension__ ({ \
   (__m256d)__builtin_shufflevector((__v4df)(__m256d)(V), \
-                                   (__v4df)_mm256_setzero_pd(), \
-                                   (M) & 0x3, ((M) & 0xc) >> 2, \
-                                   ((M) & 0x30) >> 4, ((M) & 0xc0) >> 6); })
+                                   (__v4df)_mm256_undefined_pd(), \
+                                   ((M) >> 0) & 0x3, \
+                                   ((M) >> 2) & 0x3, \
+                                   ((M) >> 4) & 0x3, \
+                                   ((M) >> 6) & 0x3); })
 
 static __inline__ __m256 __DEFAULT_FN_ATTRS
 _mm256_permutevar8x32_ps(__m256 __a, __m256i __b)
@@ -874,16 +956,18 @@ _mm256_permutevar8x32_ps(__m256 __a, __m256i __b)
 
 #define _mm256_permute4x64_epi64(V, M) __extension__ ({ \
   (__m256i)__builtin_shufflevector((__v4di)(__m256i)(V), \
-                                   (__v4di)_mm256_setzero_si256(), \
-                                   (M) & 0x3, ((M) & 0xc) >> 2, \
-                                   ((M) & 0x30) >> 4, ((M) & 0xc0) >> 6); })
+                                   (__v4di)_mm256_undefined_si256(), \
+                                   ((M) >> 0) & 0x3, \
+                                   ((M) >> 2) & 0x3, \
+                                   ((M) >> 4) & 0x3, \
+                                   ((M) >> 6) & 0x3); })
 
 #define _mm256_permute2x128_si256(V1, V2, M) __extension__ ({ \
   (__m256i)__builtin_ia32_permti256((__m256i)(V1), (__m256i)(V2), (M)); })
 
 #define _mm256_extracti128_si256(V, M) __extension__ ({ \
   (__m128i)__builtin_shufflevector((__v4di)(__m256i)(V), \
-                                   (__v4di)_mm256_setzero_si256(), \
+                                   (__v4di)_mm256_undefined_si256(), \
                                    (((M) & 1) ? 2 : 0), \
                                    (((M) & 1) ? 3 : 1) ); })
 
@@ -904,7 +988,7 @@ _mm256_maskload_epi32(int const *__X, __m256i __M)
 static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_maskload_epi64(long long const *__X, __m256i __M)
 {
-  return (__m256i)__builtin_ia32_maskloadq256((const __v4di *)__X, __M);
+  return (__m256i)__builtin_ia32_maskloadq256((const __v4di *)__X, (__v4di)__M);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS
@@ -928,7 +1012,7 @@ _mm256_maskstore_epi32(int *__X, __m256i __M, __m256i __Y)
 static __inline__ void __DEFAULT_FN_ATTRS
 _mm256_maskstore_epi64(long long *__X, __m256i __M, __m256i __Y)
 {
-  __builtin_ia32_maskstoreq256((__v4di *)__X, __M, __Y);
+  __builtin_ia32_maskstoreq256((__v4di *)__X, (__v4di)__M, (__v4di)__Y);
 }
 
 static __inline__ void __DEFAULT_FN_ATTRS
@@ -940,7 +1024,7 @@ _mm_maskstore_epi32(int *__X, __m128i __M, __m128i __Y)
 static __inline__ void __DEFAULT_FN_ATTRS
 _mm_maskstore_epi64(long long *__X, __m128i __M, __m128i __Y)
 {
-  __builtin_ia32_maskstoreq(( __v2di *)__X, __M, __Y);
+  __builtin_ia32_maskstoreq(( __v2di *)__X, (__v2di)__M, (__v2di)__Y);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
@@ -958,13 +1042,13 @@ _mm_sllv_epi32(__m128i __X, __m128i __Y)
 static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_sllv_epi64(__m256i __X, __m256i __Y)
 {
-  return (__m256i)__builtin_ia32_psllv4di(__X, __Y);
+  return (__m256i)__builtin_ia32_psllv4di((__v4di)__X, (__v4di)__Y);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_sllv_epi64(__m128i __X, __m128i __Y)
 {
-  return (__m128i)__builtin_ia32_psllv2di(__X, __Y);
+  return (__m128i)__builtin_ia32_psllv2di((__v2di)__X, (__v2di)__Y);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
@@ -994,13 +1078,13 @@ _mm_srlv_epi32(__m128i __X, __m128i __Y)
 static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_srlv_epi64(__m256i __X, __m256i __Y)
 {
-  return (__m256i)__builtin_ia32_psrlv4di(__X, __Y);
+  return (__m256i)__builtin_ia32_psrlv4di((__v4di)__X, (__v4di)__Y);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_srlv_epi64(__m128i __X, __m128i __Y)
 {
-  return (__m128i)__builtin_ia32_psrlv2di(__X, __Y);
+  return (__m128i)__builtin_ia32_psrlv2di((__v2di)__X, (__v2di)__Y);
 }
 
 #define _mm_mask_i32gather_pd(a, m, i, mask, s) __extension__ ({ \
diff --git a/contrib/llvm/tools/clang/lib/Headers/avx512bwintrin.h b/contrib/llvm/tools/clang/lib/Headers/avx512bwintrin.h
index f289ed71a332..d3c5a6c96446 100644
--- a/contrib/llvm/tools/clang/lib/Headers/avx512bwintrin.h
+++ b/contrib/llvm/tools/clang/lib/Headers/avx512bwintrin.h
@@ -30,30 +30,28 @@
 
 typedef unsigned int __mmask32;
 typedef unsigned long long __mmask64;
-typedef char __v64qi __attribute__ ((__vector_size__ (64)));
-typedef short __v32hi __attribute__ ((__vector_size__ (64)));
 
 /* Define the default attributes for the functions in this file. */
 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512bw")))
 
-static  __inline __v64qi __DEFAULT_FN_ATTRS
+static  __inline __m512i __DEFAULT_FN_ATTRS
 _mm512_setzero_qi(void) {
-  return (__v64qi){ 0, 0, 0, 0, 0, 0, 0, 0,
-                       0, 0, 0, 0, 0, 0, 0, 0,
-                       0, 0, 0, 0, 0, 0, 0, 0,
-                       0, 0, 0, 0, 0, 0, 0, 0,
-                       0, 0, 0, 0, 0, 0, 0, 0,
-                       0, 0, 0, 0, 0, 0, 0, 0,
-                       0, 0, 0, 0, 0, 0, 0, 0,
-                       0, 0, 0, 0, 0, 0, 0, 0 };
+  return (__m512i)(__v64qi){ 0, 0, 0, 0, 0, 0, 0, 0,
+                             0, 0, 0, 0, 0, 0, 0, 0,
+                             0, 0, 0, 0, 0, 0, 0, 0,
+                             0, 0, 0, 0, 0, 0, 0, 0,
+                             0, 0, 0, 0, 0, 0, 0, 0,
+                             0, 0, 0, 0, 0, 0, 0, 0,
+                             0, 0, 0, 0, 0, 0, 0, 0,
+                             0, 0, 0, 0, 0, 0, 0, 0 };
 }
 
-static  __inline __v32hi __DEFAULT_FN_ATTRS
+static  __inline __m512i __DEFAULT_FN_ATTRS
 _mm512_setzero_hi(void) {
-  return (__v32hi){ 0, 0, 0, 0, 0, 0, 0, 0,
-                       0, 0, 0, 0, 0, 0, 0, 0,
-                       0, 0, 0, 0, 0, 0, 0, 0,
-                       0, 0, 0, 0, 0, 0, 0, 0 };
+  return (__m512i)(__v32hi){ 0, 0, 0, 0, 0, 0, 0, 0,
+                             0, 0, 0, 0, 0, 0, 0, 0,
+                             0, 0, 0, 0, 0, 0, 0, 0,
+                             0, 0, 0, 0, 0, 0, 0, 0 };
 }
 
 /* Integer compare */
@@ -348,7 +346,7 @@ _mm512_mask_cmpneq_epu16_mask(__mmask32 __u, __m512i __a, __m512i __b) {
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_add_epi8 (__m512i __A, __m512i __B) {
-  return (__m512i) ((__v64qi) __A + (__v64qi) __B);
+  return (__m512i) ((__v64qu) __A + (__v64qu) __B);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
@@ -369,7 +367,7 @@ _mm512_maskz_add_epi8 (__mmask64 __U, __m512i __A, __m512i __B) {
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_sub_epi8 (__m512i __A, __m512i __B) {
-  return (__m512i) ((__v64qi) __A - (__v64qi) __B);
+  return (__m512i) ((__v64qu) __A - (__v64qu) __B);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
@@ -390,7 +388,7 @@ _mm512_maskz_sub_epi8 (__mmask64 __U, __m512i __A, __m512i __B) {
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_add_epi16 (__m512i __A, __m512i __B) {
-  return (__m512i) ((__v32hi) __A + (__v32hi) __B);
+  return (__m512i) ((__v32hu) __A + (__v32hu) __B);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
@@ -411,7 +409,7 @@ _mm512_maskz_add_epi16 (__mmask32 __U, __m512i __A, __m512i __B) {
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_sub_epi16 (__m512i __A, __m512i __B) {
-  return (__m512i) ((__v32hi) __A - (__v32hi) __B);
+  return (__m512i) ((__v32hu) __A - (__v32hu) __B);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
@@ -432,7 +430,7 @@ _mm512_maskz_sub_epi16 (__mmask32 __U, __m512i __A, __m512i __B) {
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_mullo_epi16 (__m512i __A, __m512i __B) {
-  return (__m512i) ((__v32hi) __A * (__v32hi) __B);
+  return (__m512i) ((__v32hu) __A * (__v32hu) __B);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
@@ -454,17 +452,17 @@ _mm512_maskz_mullo_epi16 (__mmask32 __U, __m512i __A, __m512i __B) {
 static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_mask_blend_epi8 (__mmask64 __U, __m512i __A, __m512i __W)
 {
-  return (__m512i) __builtin_ia32_blendmb_512_mask ((__v64qi) __A,
+  return (__m512i) __builtin_ia32_selectb_512 ((__mmask64) __U,
               (__v64qi) __W,
-              (__mmask64) __U);
+              (__v64qi) __A);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_mask_blend_epi16 (__mmask32 __U, __m512i __A, __m512i __W)
 {
-  return (__m512i) __builtin_ia32_blendmw_512_mask ((__v32hi) __A,
+  return (__m512i) __builtin_ia32_selectw_512 ((__mmask32) __U,
               (__v32hi) __W,
-              (__mmask32) __U);
+              (__v32hi) __A);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
@@ -1396,145 +1394,1015 @@ _mm512_maskz_cvtepi16_epi8 (__mmask32 __M, __m512i __A) {
               __M);
 }
 
+static __inline__ void __DEFAULT_FN_ATTRS
+_mm512_mask_cvtepi16_storeu_epi8 (void * __P, __mmask32 __M, __m512i __A)
+{
+  __builtin_ia32_pmovwb512mem_mask ((__v32qi *) __P, (__v32hi) __A, __M);
+}
+
+static __inline__ void __DEFAULT_FN_ATTRS
+_mm512_mask_cvtsepi16_storeu_epi8 (void * __P, __mmask32 __M, __m512i __A)
+{
+  __builtin_ia32_pmovswb512mem_mask ((__v32qi *) __P, (__v32hi) __A, __M);
+}
+
+static __inline__ void __DEFAULT_FN_ATTRS
+_mm512_mask_cvtusepi16_storeu_epi8 (void * __P, __mmask32 __M, __m512i __A)
+{
+  __builtin_ia32_pmovuswb512mem_mask ((__v32qi *) __P, (__v32hi) __A, __M);
+}
+
 static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_unpackhi_epi8 (__m512i __A, __m512i __B) {
-  return (__m512i) __builtin_ia32_punpckhbw512_mask ((__v64qi) __A,
-                 (__v64qi) __B,
-                 (__v64qi) _mm512_setzero_qi(),
-                 (__mmask64) -1);
+_mm512_unpackhi_epi8(__m512i __A, __m512i __B) {
+  return (__m512i)__builtin_shufflevector((__v64qi)__A, (__v64qi)__B,
+                                          8,  64+8,   9, 64+9,
+                                          10, 64+10, 11, 64+11,
+                                          12, 64+12, 13, 64+13,
+                                          14, 64+14, 15, 64+15,
+                                          24, 64+24, 25, 64+25,
+                                          26, 64+26, 27, 64+27,
+                                          28, 64+28, 29, 64+29,
+                                          30, 64+30, 31, 64+31,
+                                          40, 64+40, 41, 64+41,
+                                          42, 64+42, 43, 64+43,
+                                          44, 64+44, 45, 64+45,
+                                          46, 64+46, 47, 64+47,
+                                          56, 64+56, 57, 64+57,
+                                          58, 64+58, 59, 64+59,
+                                          60, 64+60, 61, 64+61,
+                                          62, 64+62, 63, 64+63);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_mask_unpackhi_epi8 (__m512i __W, __mmask64 __U, __m512i __A,
-         __m512i __B) {
-  return (__m512i) __builtin_ia32_punpckhbw512_mask ((__v64qi) __A,
-                 (__v64qi) __B,
-                 (__v64qi) __W,
-                 (__mmask64) __U);
+_mm512_mask_unpackhi_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) {
+  return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
+                                        (__v64qi)_mm512_unpackhi_epi8(__A, __B),
+                                        (__v64qi)__W);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_maskz_unpackhi_epi8 (__mmask64 __U, __m512i __A, __m512i __B) {
-  return (__m512i) __builtin_ia32_punpckhbw512_mask ((__v64qi) __A,
-                 (__v64qi) __B,
-                 (__v64qi) _mm512_setzero_qi(),
-                 (__mmask64) __U);
+_mm512_maskz_unpackhi_epi8(__mmask64 __U, __m512i __A, __m512i __B) {
+  return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
+                                        (__v64qi)_mm512_unpackhi_epi8(__A, __B),
+                                        (__v64qi)_mm512_setzero_qi());
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_unpackhi_epi16 (__m512i __A, __m512i __B) {
-  return (__m512i) __builtin_ia32_punpckhwd512_mask ((__v32hi) __A,
-                 (__v32hi) __B,
-                 (__v32hi) _mm512_setzero_hi(),
-                 (__mmask32) -1);
+_mm512_unpackhi_epi16(__m512i __A, __m512i __B) {
+  return (__m512i)__builtin_shufflevector((__v32hi)__A, (__v32hi)__B,
+                                          4,  32+4,   5, 32+5,
+                                          6,  32+6,   7, 32+7,
+                                          12, 32+12, 13, 32+13,
+                                          14, 32+14, 15, 32+15,
+                                          20, 32+20, 21, 32+21,
+                                          22, 32+22, 23, 32+23,
+                                          28, 32+28, 29, 32+29,
+                                          30, 32+30, 31, 32+31);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_mask_unpackhi_epi16 (__m512i __W, __mmask32 __U, __m512i __A,
-          __m512i __B) {
-  return (__m512i) __builtin_ia32_punpckhwd512_mask ((__v32hi) __A,
-                 (__v32hi) __B,
-                 (__v32hi) __W,
-                 (__mmask32) __U);
+_mm512_mask_unpackhi_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) {
+  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
+                                       (__v32hi)_mm512_unpackhi_epi16(__A, __B),
+                                       (__v32hi)__W);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_maskz_unpackhi_epi16 (__mmask32 __U, __m512i __A, __m512i __B) {
-  return (__m512i) __builtin_ia32_punpckhwd512_mask ((__v32hi) __A,
-                 (__v32hi) __B,
-                 (__v32hi) _mm512_setzero_hi(),
-                 (__mmask32) __U);
+_mm512_maskz_unpackhi_epi16(__mmask32 __U, __m512i __A, __m512i __B) {
+  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
+                                       (__v32hi)_mm512_unpackhi_epi16(__A, __B),
+                                       (__v32hi)_mm512_setzero_hi());
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_unpacklo_epi8 (__m512i __A, __m512i __B) {
-  return (__m512i) __builtin_ia32_punpcklbw512_mask ((__v64qi) __A,
-                 (__v64qi) __B,
-                 (__v64qi) _mm512_setzero_qi(),
-                 (__mmask64) -1);
+_mm512_unpacklo_epi8(__m512i __A, __m512i __B) {
+  return (__m512i)__builtin_shufflevector((__v64qi)__A, (__v64qi)__B,
+                                          0,  64+0,   1, 64+1,
+                                          2,  64+2,   3, 64+3,
+                                          4,  64+4,   5, 64+5,
+                                          6,  64+6,   7, 64+7,
+                                          16, 64+16, 17, 64+17,
+                                          18, 64+18, 19, 64+19,
+                                          20, 64+20, 21, 64+21,
+                                          22, 64+22, 23, 64+23,
+                                          32, 64+32, 33, 64+33,
+                                          34, 64+34, 35, 64+35,
+                                          36, 64+36, 37, 64+37,
+                                          38, 64+38, 39, 64+39,
+                                          48, 64+48, 49, 64+49,
+                                          50, 64+50, 51, 64+51,
+                                          52, 64+52, 53, 64+53,
+                                          54, 64+54, 55, 64+55);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_mask_unpacklo_epi8 (__m512i __W, __mmask64 __U, __m512i __A,
-         __m512i __B) {
-  return (__m512i) __builtin_ia32_punpcklbw512_mask ((__v64qi) __A,
-                 (__v64qi) __B,
-                 (__v64qi) __W,
-                 (__mmask64) __U);
+_mm512_mask_unpacklo_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) {
+  return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
+                                        (__v64qi)_mm512_unpacklo_epi8(__A, __B),
+                                        (__v64qi)__W);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_maskz_unpacklo_epi8 (__mmask64 __U, __m512i __A, __m512i __B) {
-  return (__m512i) __builtin_ia32_punpcklbw512_mask ((__v64qi) __A,
-                 (__v64qi) __B,
-                 (__v64qi) _mm512_setzero_qi(),
-                 (__mmask64) __U);
+_mm512_maskz_unpacklo_epi8(__mmask64 __U, __m512i __A, __m512i __B) {
+  return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
+                                        (__v64qi)_mm512_unpacklo_epi8(__A, __B),
+                                        (__v64qi)_mm512_setzero_qi());
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_unpacklo_epi16 (__m512i __A, __m512i __B) {
-  return (__m512i) __builtin_ia32_punpcklwd512_mask ((__v32hi) __A,
-                 (__v32hi) __B,
-                 (__v32hi) _mm512_setzero_hi(),
-                 (__mmask32) -1);
+_mm512_unpacklo_epi16(__m512i __A, __m512i __B) {
+  return (__m512i)__builtin_shufflevector((__v32hi)__A, (__v32hi)__B,
+                                          0,  32+0,   1, 32+1,
+                                          2,  32+2,   3, 32+3,
+                                          8,  32+8,   9, 32+9,
+                                          10, 32+10, 11, 32+11,
+                                          16, 32+16, 17, 32+17,
+                                          18, 32+18, 19, 32+19,
+                                          24, 32+24, 25, 32+25,
+                                          26, 32+26, 27, 32+27);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_mask_unpacklo_epi16 (__m512i __W, __mmask32 __U, __m512i __A,
-          __m512i __B) {
-  return (__m512i) __builtin_ia32_punpcklwd512_mask ((__v32hi) __A,
-                 (__v32hi) __B,
-                 (__v32hi) __W,
-                 (__mmask32) __U);
+_mm512_mask_unpacklo_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) {
+  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
+                                       (__v32hi)_mm512_unpacklo_epi16(__A, __B),
+                                       (__v32hi)__W);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_maskz_unpacklo_epi16 (__mmask32 __U, __m512i __A, __m512i __B) {
-  return (__m512i) __builtin_ia32_punpcklwd512_mask ((__v32hi) __A,
-                 (__v32hi) __B,
-                 (__v32hi) _mm512_setzero_hi(),
-                 (__mmask32) __U);
+_mm512_maskz_unpacklo_epi16(__mmask32 __U, __m512i __A, __m512i __B) {
+  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
+                                       (__v32hi)_mm512_unpacklo_epi16(__A, __B),
+                                       (__v32hi)_mm512_setzero_hi());
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_cvtepi8_epi16 (__m256i __A)
+{
+  return (__m512i) __builtin_ia32_pmovsxbw512_mask ((__v32qi) __A,
+                (__v32hi)
+                _mm512_setzero_hi (),
+                (__mmask32) -1);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_cvtepi8_epi16 (__m512i __W, __mmask32 __U, __m256i __A)
+{
+  return (__m512i) __builtin_ia32_pmovsxbw512_mask ((__v32qi) __A,
+                (__v32hi) __W,
+                (__mmask32) __U);
 }
 
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_cvtepi8_epi16 (__mmask32 __U, __m256i __A)
+{
+  return (__m512i) __builtin_ia32_pmovsxbw512_mask ((__v32qi) __A,
+                (__v32hi)
+                _mm512_setzero_hi(),
+                (__mmask32) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_cvtepu8_epi16 (__m256i __A)
+{
+  return (__m512i) __builtin_ia32_pmovzxbw512_mask ((__v32qi) __A,
+                (__v32hi)
+                _mm512_setzero_hi (),
+                (__mmask32) -1);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_cvtepu8_epi16 (__m512i __W, __mmask32 __U, __m256i __A)
+{
+  return (__m512i) __builtin_ia32_pmovzxbw512_mask ((__v32qi) __A,
+                (__v32hi) __W,
+                (__mmask32) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_cvtepu8_epi16 (__mmask32 __U, __m256i __A)
+{
+  return (__m512i) __builtin_ia32_pmovzxbw512_mask ((__v32qi) __A,
+                (__v32hi)
+                _mm512_setzero_hi(),
+                (__mmask32) __U);
+}
+
+
 #define _mm512_cmp_epi8_mask(a, b, p) __extension__ ({ \
-  (__mmask16)__builtin_ia32_cmpb512_mask((__v64qi)(__m512i)(a), \
-                                         (__v64qi)(__m512i)(b), \
-                                         (p), (__mmask64)-1); })
+  (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)(__m512i)(a), \
+                                         (__v64qi)(__m512i)(b), (int)(p), \
+                                         (__mmask64)-1); })
 
 #define _mm512_mask_cmp_epi8_mask(m, a, b, p) __extension__ ({ \
-  (__mmask16)__builtin_ia32_cmpb512_mask((__v64qi)(__m512i)(a), \
-                                         (__v64qi)(__m512i)(b), \
-                                         (p), (__mmask64)(m)); })
+  (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)(__m512i)(a), \
+                                         (__v64qi)(__m512i)(b), (int)(p), \
+                                         (__mmask64)(m)); })
 
 #define _mm512_cmp_epu8_mask(a, b, p) __extension__ ({ \
-  (__mmask16)__builtin_ia32_ucmpb512_mask((__v64qi)(__m512i)(a), \
-                                          (__v64qi)(__m512i)(b), \
-                                          (p), (__mmask64)-1); })
+  (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)(__m512i)(a), \
+                                          (__v64qi)(__m512i)(b), (int)(p), \
+                                          (__mmask64)-1); })
 
 #define _mm512_mask_cmp_epu8_mask(m, a, b, p) __extension__ ({ \
-  (__mmask16)__builtin_ia32_ucmpb512_mask((__v64qi)(__m512i)(a), \
-                                          (__v64qi)(__m512i)(b), \
-                                          (p), (__mmask64)(m)); })
+  (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)(__m512i)(a), \
+                                          (__v64qi)(__m512i)(b), (int)(p), \
+                                          (__mmask64)(m)); })
 
 #define _mm512_cmp_epi16_mask(a, b, p) __extension__ ({ \
-  (__mmask16)__builtin_ia32_cmpw512_mask((__v32hi)(__m512i)(a), \
-                                         (__v32hi)(__m512i)(b), \
-                                         (p), (__mmask32)-1); })
+  (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)(__m512i)(a), \
+                                         (__v32hi)(__m512i)(b), (int)(p), \
+                                         (__mmask32)-1); })
 
 #define _mm512_mask_cmp_epi16_mask(m, a, b, p) __extension__ ({ \
-  (__mmask16)__builtin_ia32_cmpw512_mask((__v32hi)(__m512i)(a), \
-                                         (__v32hi)(__m512i)(b), \
-                                         (p), (__mmask32)(m)); })
+  (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)(__m512i)(a), \
+                                         (__v32hi)(__m512i)(b), (int)(p), \
+                                         (__mmask32)(m)); })
 
 #define _mm512_cmp_epu16_mask(a, b, p) __extension__ ({ \
-  (__mmask16)__builtin_ia32_ucmpw512_mask((__v32hi)(__m512i)(a), \
-                                          (__v32hi)(__m512i)(b), \
-                                          (p), (__mmask32)-1); })
+  (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)(__m512i)(a), \
+                                          (__v32hi)(__m512i)(b), (int)(p), \
+                                          (__mmask32)-1); })
 
 #define _mm512_mask_cmp_epu16_mask(m, a, b, p) __extension__ ({ \
-  (__mmask16)__builtin_ia32_ucmpw512_mask((__v32hi)(__m512i)(a), \
-                                          (__v32hi)(__m512i)(b), \
-                                          (p), (__mmask32)(m)); })
+  (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)(__m512i)(a), \
+                                          (__v32hi)(__m512i)(b), (int)(p), \
+                                          (__mmask32)(m)); })
+
+#define _mm512_shufflehi_epi16(A, imm) __extension__ ({ \
+  (__m512i)__builtin_shufflevector((__v32hi)(__m512i)(A), \
+                                   (__v32hi)_mm512_undefined_epi32(), \
+                                   0, 1, 2, 3, \
+                                   4  + (((imm) >> 0) & 0x3), \
+                                   4  + (((imm) >> 2) & 0x3), \
+                                   4  + (((imm) >> 4) & 0x3), \
+                                   4  + (((imm) >> 6) & 0x3), \
+                                   8, 9, 10, 11, \
+                                   12 + (((imm) >> 0) & 0x3), \
+                                   12 + (((imm) >> 2) & 0x3), \
+                                   12 + (((imm) >> 4) & 0x3), \
+                                   12 + (((imm) >> 6) & 0x3), \
+                                   16, 17, 18, 19, \
+                                   20 + (((imm) >> 0) & 0x3), \
+                                   20 + (((imm) >> 2) & 0x3), \
+                                   20 + (((imm) >> 4) & 0x3), \
+                                   20 + (((imm) >> 6) & 0x3), \
+                                   24, 25, 26, 27, \
+                                   28 + (((imm) >> 0) & 0x3), \
+                                   28 + (((imm) >> 2) & 0x3), \
+                                   28 + (((imm) >> 4) & 0x3), \
+                                   28 + (((imm) >> 6) & 0x3)); })
+
+#define _mm512_mask_shufflehi_epi16(W, U, A, imm) __extension__ ({ \
+  (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \
+                                      (__v32hi)_mm512_shufflehi_epi16((A), \
+                                                                      (imm)), \
+                                      (__v32hi)(__m512i)(W)); })
+
+#define _mm512_maskz_shufflehi_epi16(U, A, imm) __extension__ ({ \
+  (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \
+                                      (__v32hi)_mm512_shufflehi_epi16((A), \
+                                                                      (imm)), \
+                                      (__v32hi)_mm512_setzero_hi()); })
+
+#define _mm512_shufflelo_epi16(A, imm) __extension__ ({ \
+  (__m512i)__builtin_shufflevector((__v32hi)(__m512i)(A), \
+                                   (__v32hi)_mm512_undefined_epi32(), \
+                                   0 + (((imm) >> 0) & 0x3), \
+                                   0 + (((imm) >> 2) & 0x3), \
+                                   0 + (((imm) >> 4) & 0x3), \
+                                   0 + (((imm) >> 6) & 0x3), \
+                                   4, 5, 6, 7, \
+                                   8 + (((imm) >> 0) & 0x3), \
+                                   8 + (((imm) >> 2) & 0x3), \
+                                   8 + (((imm) >> 4) & 0x3), \
+                                   8 + (((imm) >> 6) & 0x3), \
+                                   12, 13, 14, 15, \
+                                   16 + (((imm) >> 0) & 0x3), \
+                                   16 + (((imm) >> 2) & 0x3), \
+                                   16 + (((imm) >> 4) & 0x3), \
+                                   16 + (((imm) >> 6) & 0x3), \
+                                   20, 21, 22, 23, \
+                                   24 + (((imm) >> 0) & 0x3), \
+                                   24 + (((imm) >> 2) & 0x3), \
+                                   24 + (((imm) >> 4) & 0x3), \
+                                   24 + (((imm) >> 6) & 0x3), \
+                                   28, 29, 30, 31); })
+
+
+#define _mm512_mask_shufflelo_epi16(W, U, A, imm) __extension__ ({ \
+  (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \
+                                      (__v32hi)_mm512_shufflelo_epi16((A), \
+                                                                      (imm)), \
+                                      (__v32hi)(__m512i)(W)); })
+
+
+#define _mm512_maskz_shufflelo_epi16(U, A, imm) __extension__ ({ \
+  (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \
+                                      (__v32hi)_mm512_shufflelo_epi16((A), \
+                                                                      (imm)), \
+                                      (__v32hi)_mm512_setzero_hi()); })
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_sllv_epi16 (__m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_psllv32hi_mask ((__v32hi) __A,
+              (__v32hi) __B,
+              (__v32hi)
+              _mm512_setzero_hi (),
+              (__mmask32) -1);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_sllv_epi16 (__m512i __W, __mmask32 __U, __m512i __A,
+      __m512i __B)
+{
+  return (__m512i) __builtin_ia32_psllv32hi_mask ((__v32hi) __A,
+              (__v32hi) __B,
+              (__v32hi) __W,
+              (__mmask32) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_sllv_epi16 (__mmask32 __U, __m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_psllv32hi_mask ((__v32hi) __A,
+              (__v32hi) __B,
+              (__v32hi)
+              _mm512_setzero_hi (),
+              (__mmask32) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_sll_epi16 (__m512i __A, __m128i __B)
+{
+  return (__m512i) __builtin_ia32_psllw512_mask ((__v32hi) __A,
+             (__v8hi) __B,
+             (__v32hi)
+             _mm512_setzero_hi (),
+             (__mmask32) -1);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_sll_epi16 (__m512i __W, __mmask32 __U, __m512i __A,
+           __m128i __B)
+{
+  return (__m512i) __builtin_ia32_psllw512_mask ((__v32hi) __A,
+             (__v8hi) __B,
+             (__v32hi) __W,
+             (__mmask32) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_sll_epi16 (__mmask32 __U, __m512i __A, __m128i __B)
+{
+  return (__m512i) __builtin_ia32_psllw512_mask ((__v32hi) __A,
+             (__v8hi) __B,
+             (__v32hi)
+             _mm512_setzero_hi (),
+             (__mmask32) __U);
+}
+
+#define _mm512_slli_epi16(A, B) __extension__ ({ \
+  (__m512i)__builtin_ia32_psllwi512_mask((__v32hi)(__m512i)(A), (int)(B), \
+                                         (__v32hi)_mm512_setzero_hi(), \
+                                         (__mmask32)-1); })
+
+#define _mm512_mask_slli_epi16(W, U, A, B) __extension__ ({ \
+  (__m512i)__builtin_ia32_psllwi512_mask((__v32hi)(__m512i)(A), (int)(B), \
+                                         (__v32hi)(__m512i)(W), \
+                                         (__mmask32)(U)); })
+
+#define _mm512_maskz_slli_epi16(U, A, B) __extension__ ({ \
+  (__m512i)__builtin_ia32_psllwi512_mask((__v32hi)(__m512i)(A), (int)(B), \
+                                         (__v32hi)_mm512_setzero_hi(), \
+                                         (__mmask32)(U)); })
+
+#define _mm512_bslli_epi128(a, imm) __extension__ ({ \
+  (__m512i)__builtin_shufflevector(                                          \
+       (__v64qi)_mm512_setzero_si512(),                                      \
+       (__v64qi)(__m512i)(a),                                                \
+       ((char)(imm)&0xF0) ?  0 : ((char)(imm)>0x0 ? 16 :  64) - (char)(imm), \
+       ((char)(imm)&0xF0) ?  1 : ((char)(imm)>0x1 ? 17 :  65) - (char)(imm), \
+       ((char)(imm)&0xF0) ?  2 : ((char)(imm)>0x2 ? 18 :  66) - (char)(imm), \
+       ((char)(imm)&0xF0) ?  3 : ((char)(imm)>0x3 ? 19 :  67) - (char)(imm), \
+       ((char)(imm)&0xF0) ?  4 : ((char)(imm)>0x4 ? 20 :  68) - (char)(imm), \
+       ((char)(imm)&0xF0) ?  5 : ((char)(imm)>0x5 ? 21 :  69) - (char)(imm), \
+       ((char)(imm)&0xF0) ?  6 : ((char)(imm)>0x6 ? 22 :  70) - (char)(imm), \
+       ((char)(imm)&0xF0) ?  7 : ((char)(imm)>0x7 ? 23 :  71) - (char)(imm), \
+       ((char)(imm)&0xF0) ?  8 : ((char)(imm)>0x8 ? 24 :  72) - (char)(imm), \
+       ((char)(imm)&0xF0) ?  9 : ((char)(imm)>0x9 ? 25 :  73) - (char)(imm), \
+       ((char)(imm)&0xF0) ? 10 : ((char)(imm)>0xA ? 26 :  74) - (char)(imm), \
+       ((char)(imm)&0xF0) ? 11 : ((char)(imm)>0xB ? 27 :  75) - (char)(imm), \
+       ((char)(imm)&0xF0) ? 12 : ((char)(imm)>0xC ? 28 :  76) - (char)(imm), \
+       ((char)(imm)&0xF0) ? 13 : ((char)(imm)>0xD ? 29 :  77) - (char)(imm), \
+       ((char)(imm)&0xF0) ? 14 : ((char)(imm)>0xE ? 30 :  78) - (char)(imm), \
+       ((char)(imm)&0xF0) ? 15 : ((char)(imm)>0xF ? 31 :  79) - (char)(imm), \
+       ((char)(imm)&0xF0) ? 16 : ((char)(imm)>0x0 ? 32 :  80) - (char)(imm), \
+       ((char)(imm)&0xF0) ? 17 : ((char)(imm)>0x1 ? 33 :  81) - (char)(imm), \
+       ((char)(imm)&0xF0) ? 18 : ((char)(imm)>0x2 ? 34 :  82) - (char)(imm), \
+       ((char)(imm)&0xF0) ? 19 : ((char)(imm)>0x3 ? 35 :  83) - (char)(imm), \
+       ((char)(imm)&0xF0) ? 20 : ((char)(imm)>0x4 ? 36 :  84) - (char)(imm), \
+       ((char)(imm)&0xF0) ? 21 : ((char)(imm)>0x5 ? 37 :  85) - (char)(imm), \
+       ((char)(imm)&0xF0) ? 22 : ((char)(imm)>0x6 ? 38 :  86) - (char)(imm), \
+       ((char)(imm)&0xF0) ? 23 : ((char)(imm)>0x7 ? 39 :  87) - (char)(imm), \
+       ((char)(imm)&0xF0) ? 24 : ((char)(imm)>0x8 ? 40 :  88) - (char)(imm), \
+       ((char)(imm)&0xF0) ? 25 : ((char)(imm)>0x9 ? 41 :  89) - (char)(imm), \
+       ((char)(imm)&0xF0) ? 26 : ((char)(imm)>0xA ? 42 :  90) - (char)(imm), \
+       ((char)(imm)&0xF0) ? 27 : ((char)(imm)>0xB ? 43 :  91) - (char)(imm), \
+       ((char)(imm)&0xF0) ? 28 : ((char)(imm)>0xC ? 44 :  92) - (char)(imm), \
+       ((char)(imm)&0xF0) ? 29 : ((char)(imm)>0xD ? 45 :  93) - (char)(imm), \
+       ((char)(imm)&0xF0) ? 30 : ((char)(imm)>0xE ? 46 :  94) - (char)(imm), \
+       ((char)(imm)&0xF0) ? 31 : ((char)(imm)>0xF ? 47 :  95) - (char)(imm), \
+       ((char)(imm)&0xF0) ? 32 : ((char)(imm)>0x0 ? 48 :  96) - (char)(imm), \
+       ((char)(imm)&0xF0) ? 33 : ((char)(imm)>0x1 ? 49 :  97) - (char)(imm), \
+       ((char)(imm)&0xF0) ? 34 : ((char)(imm)>0x2 ? 50 :  98) - (char)(imm), \
+       ((char)(imm)&0xF0) ? 35 : ((char)(imm)>0x3 ? 51 :  99) - (char)(imm), \
+       ((char)(imm)&0xF0) ? 36 : ((char)(imm)>0x4 ? 52 : 100) - (char)(imm), \
+       ((char)(imm)&0xF0) ? 37 : ((char)(imm)>0x5 ? 53 : 101) - (char)(imm), \
+       ((char)(imm)&0xF0) ? 38 : ((char)(imm)>0x6 ? 54 : 102) - (char)(imm), \
+       ((char)(imm)&0xF0) ? 39 : ((char)(imm)>0x7 ? 55 : 103) - (char)(imm), \
+       ((char)(imm)&0xF0) ? 40 : ((char)(imm)>0x8 ? 56 : 104) - (char)(imm), \
+       ((char)(imm)&0xF0) ? 41 : ((char)(imm)>0x9 ? 57 : 105) - (char)(imm), \
+       ((char)(imm)&0xF0) ? 42 : ((char)(imm)>0xA ? 58 : 106) - (char)(imm), \
+       ((char)(imm)&0xF0) ? 43 : ((char)(imm)>0xB ? 59 : 107) - (char)(imm), \
+       ((char)(imm)&0xF0) ? 44 : ((char)(imm)>0xC ? 60 : 108) - (char)(imm), \
+       ((char)(imm)&0xF0) ? 45 : ((char)(imm)>0xD ? 61 : 109) - (char)(imm), \
+       ((char)(imm)&0xF0) ? 46 : ((char)(imm)>0xE ? 62 : 110) - (char)(imm), \
+       ((char)(imm)&0xF0) ? 47 : ((char)(imm)>0xF ? 63 : 111) - (char)(imm), \
+       ((char)(imm)&0xF0) ? 48 : ((char)(imm)>0x0 ? 64 : 112) - (char)(imm), \
+       ((char)(imm)&0xF0) ? 49 : ((char)(imm)>0x1 ? 65 : 113) - (char)(imm), \
+       ((char)(imm)&0xF0) ? 50 : ((char)(imm)>0x2 ? 66 : 114) - (char)(imm), \
+       ((char)(imm)&0xF0) ? 51 : ((char)(imm)>0x3 ? 67 : 115) - (char)(imm), \
+       ((char)(imm)&0xF0) ? 52 : ((char)(imm)>0x4 ? 68 : 116) - (char)(imm), \
+       ((char)(imm)&0xF0) ? 53 : ((char)(imm)>0x5 ? 69 : 117) - (char)(imm), \
+       ((char)(imm)&0xF0) ? 54 : ((char)(imm)>0x6 ? 70 : 118) - (char)(imm), \
+       ((char)(imm)&0xF0) ? 55 : ((char)(imm)>0x7 ? 71 : 119) - (char)(imm), \
+       ((char)(imm)&0xF0) ? 56 : ((char)(imm)>0x8 ? 72 : 120) - (char)(imm), \
+       ((char)(imm)&0xF0) ? 57 : ((char)(imm)>0x9 ? 73 : 121) - (char)(imm), \
+       ((char)(imm)&0xF0) ? 58 : ((char)(imm)>0xA ? 74 : 122) - (char)(imm), \
+       ((char)(imm)&0xF0) ? 59 : ((char)(imm)>0xB ? 75 : 123) - (char)(imm), \
+       ((char)(imm)&0xF0) ? 60 : ((char)(imm)>0xC ? 76 : 124) - (char)(imm), \
+       ((char)(imm)&0xF0) ? 61 : ((char)(imm)>0xD ? 77 : 125) - (char)(imm), \
+       ((char)(imm)&0xF0) ? 62 : ((char)(imm)>0xE ? 78 : 126) - (char)(imm), \
+       ((char)(imm)&0xF0) ? 63 : ((char)(imm)>0xF ? 79 : 127) - (char)(imm)); })
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_srlv_epi16 (__m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_psrlv32hi_mask ((__v32hi) __A,
+              (__v32hi) __B,
+              (__v32hi)
+              _mm512_setzero_hi (),
+              (__mmask32) -1);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_srlv_epi16 (__m512i __W, __mmask32 __U, __m512i __A,
+      __m512i __B)
+{
+  return (__m512i) __builtin_ia32_psrlv32hi_mask ((__v32hi) __A,
+              (__v32hi) __B,
+              (__v32hi) __W,
+              (__mmask32) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_srlv_epi16 (__mmask32 __U, __m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_psrlv32hi_mask ((__v32hi) __A,
+              (__v32hi) __B,
+              (__v32hi)
+              _mm512_setzero_hi (),
+              (__mmask32) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_srav_epi16 (__m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_psrav32hi_mask ((__v32hi) __A,
+              (__v32hi) __B,
+              (__v32hi)
+              _mm512_setzero_hi (),
+              (__mmask32) -1);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_srav_epi16 (__m512i __W, __mmask32 __U, __m512i __A,
+      __m512i __B)
+{
+  return (__m512i) __builtin_ia32_psrav32hi_mask ((__v32hi) __A,
+              (__v32hi) __B,
+              (__v32hi) __W,
+              (__mmask32) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_srav_epi16 (__mmask32 __U, __m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_psrav32hi_mask ((__v32hi) __A,
+              (__v32hi) __B,
+              (__v32hi)
+              _mm512_setzero_hi (),
+              (__mmask32) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_sra_epi16 (__m512i __A, __m128i __B)
+{
+ return (__m512i) __builtin_ia32_psraw512_mask ((__v32hi) __A,
+             (__v8hi) __B,
+             (__v32hi)
+             _mm512_setzero_hi (),
+             (__mmask32) -1);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_sra_epi16 (__m512i __W, __mmask32 __U, __m512i __A,
+           __m128i __B)
+{
+  return (__m512i) __builtin_ia32_psraw512_mask ((__v32hi) __A,
+             (__v8hi) __B,
+             (__v32hi) __W,
+            (__mmask32) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_sra_epi16 (__mmask32 __U, __m512i __A, __m128i __B)
+{
+  return (__m512i) __builtin_ia32_psraw512_mask ((__v32hi) __A,
+             (__v8hi) __B,
+             (__v32hi)
+             _mm512_setzero_hi (),
+            (__mmask32) __U);
+}
+
+#define _mm512_srai_epi16(A, B) __extension__ ({ \
+  (__m512i)__builtin_ia32_psrawi512_mask((__v32hi)(__m512i)(A), (int)(B), \
+                                         (__v32hi)_mm512_setzero_hi(), \
+                                         (__mmask32)-1); })
+
+#define _mm512_mask_srai_epi16(W, U, A, B) __extension__ ({ \
+  (__m512i)__builtin_ia32_psrawi512_mask((__v32hi)(__m512i)(A), (int)(B), \
+                                         (__v32hi)(__m512i)(W), \
+                                         (__mmask32)(U)); })
+
+#define _mm512_maskz_srai_epi16(U, A, B) __extension__ ({ \
+  (__m512i)__builtin_ia32_psrawi512_mask((__v32hi)(__m512i)(A), (int)(B), \
+                                         (__v32hi)_mm512_setzero_hi(), \
+                                         (__mmask32)(U)); })
+
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_srl_epi16 (__m512i __A, __m128i __B)
+{
+  return (__m512i) __builtin_ia32_psrlw512_mask ((__v32hi) __A,
+             (__v8hi) __B,
+             (__v32hi)
+             _mm512_setzero_hi (),
+             (__mmask32) -1);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_srl_epi16 (__m512i __W, __mmask32 __U, __m512i __A,
+           __m128i __B)
+{
+  return (__m512i) __builtin_ia32_psrlw512_mask ((__v32hi) __A,
+             (__v8hi) __B,
+             (__v32hi) __W,
+             (__mmask32) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_srl_epi16 (__mmask32 __U, __m512i __A, __m128i __B)
+{
+  return (__m512i) __builtin_ia32_psrlw512_mask ((__v32hi) __A,
+             (__v8hi) __B,
+             (__v32hi)
+             _mm512_setzero_hi (),
+             (__mmask32) __U);
+}
+
+#define _mm512_srli_epi16(A, imm) __extension__ ({ \
+  (__m512i)__builtin_ia32_psrlwi512_mask((__v32hi)(__m512i)(A), (int)(imm), \
+                                         (__v32hi)_mm512_setzero_hi(), \
+                                         (__mmask32)-1); })
+
+#define _mm512_mask_srli_epi16(W, U, A, imm) __extension__ ({ \
+  (__m512i)__builtin_ia32_psrlwi512_mask((__v32hi)(__m512i)(A), (int)(imm), \
+                                         (__v32hi)(__m512i)(W), \
+                                         (__mmask32)(U)); })
+
+#define _mm512_maskz_srli_epi16(U, A, imm) __extension__ ({ \
+  (__m512i)__builtin_ia32_psrlwi512_mask((__v32hi)(__m512i)(A), (int)(imm), \
+                                         (__v32hi)_mm512_setzero_hi(), \
+                                         (__mmask32)(U)); })
+
+#define _mm512_bsrli_epi128(a, imm) __extension__ ({ \
+  (__m512i)__builtin_shufflevector(                     \
+      (__v64qi)(__m512i)(a),                      \
+      (__v64qi)_mm512_setzero_si512(),            \
+      ((char)(imm)&0xF0) ?  64 : (char)(imm) + ((char)(imm)>0xF ?  48 : 0),  \
+      ((char)(imm)&0xF0) ?  65 : (char)(imm) + ((char)(imm)>0xE ?  49 : 1),  \
+      ((char)(imm)&0xF0) ?  66 : (char)(imm) + ((char)(imm)>0xD ?  50 : 2),  \
+      ((char)(imm)&0xF0) ?  67 : (char)(imm) + ((char)(imm)>0xC ?  51 : 3),  \
+      ((char)(imm)&0xF0) ?  68 : (char)(imm) + ((char)(imm)>0xB ?  52 : 4),  \
+      ((char)(imm)&0xF0) ?  69 : (char)(imm) + ((char)(imm)>0xA ?  53 : 5),  \
+      ((char)(imm)&0xF0) ?  70 : (char)(imm) + ((char)(imm)>0x9 ?  54 : 6),  \
+      ((char)(imm)&0xF0) ?  71 : (char)(imm) + ((char)(imm)>0x8 ?  55 : 7),  \
+      ((char)(imm)&0xF0) ?  72 : (char)(imm) + ((char)(imm)>0x7 ?  56 : 8),  \
+      ((char)(imm)&0xF0) ?  73 : (char)(imm) + ((char)(imm)>0x6 ?  57 : 9),  \
+      ((char)(imm)&0xF0) ?  74 : (char)(imm) + ((char)(imm)>0x5 ?  58 : 10), \
+      ((char)(imm)&0xF0) ?  75 : (char)(imm) + ((char)(imm)>0x4 ?  59 : 11), \
+      ((char)(imm)&0xF0) ?  76 : (char)(imm) + ((char)(imm)>0x3 ?  60 : 12), \
+      ((char)(imm)&0xF0) ?  77 : (char)(imm) + ((char)(imm)>0x2 ?  61 : 13), \
+      ((char)(imm)&0xF0) ?  78 : (char)(imm) + ((char)(imm)>0x1 ?  62 : 14), \
+      ((char)(imm)&0xF0) ?  79 : (char)(imm) + ((char)(imm)>0x0 ?  63 : 15), \
+      ((char)(imm)&0xF0) ?  80 : (char)(imm) + ((char)(imm)>0xF ?  64 : 16), \
+      ((char)(imm)&0xF0) ?  81 : (char)(imm) + ((char)(imm)>0xE ?  65 : 17), \
+      ((char)(imm)&0xF0) ?  82 : (char)(imm) + ((char)(imm)>0xD ?  66 : 18), \
+      ((char)(imm)&0xF0) ?  83 : (char)(imm) + ((char)(imm)>0xC ?  67 : 19), \
+      ((char)(imm)&0xF0) ?  84 : (char)(imm) + ((char)(imm)>0xB ?  68 : 20), \
+      ((char)(imm)&0xF0) ?  85 : (char)(imm) + ((char)(imm)>0xA ?  69 : 21), \
+      ((char)(imm)&0xF0) ?  86 : (char)(imm) + ((char)(imm)>0x9 ?  70 : 22), \
+      ((char)(imm)&0xF0) ?  87 : (char)(imm) + ((char)(imm)>0x8 ?  71 : 23), \
+      ((char)(imm)&0xF0) ?  88 : (char)(imm) + ((char)(imm)>0x7 ?  72 : 24), \
+      ((char)(imm)&0xF0) ?  89 : (char)(imm) + ((char)(imm)>0x6 ?  73 : 25), \
+      ((char)(imm)&0xF0) ?  90 : (char)(imm) + ((char)(imm)>0x5 ?  74 : 26), \
+      ((char)(imm)&0xF0) ?  91 : (char)(imm) + ((char)(imm)>0x4 ?  75 : 27), \
+      ((char)(imm)&0xF0) ?  92 : (char)(imm) + ((char)(imm)>0x3 ?  76 : 28), \
+      ((char)(imm)&0xF0) ?  93 : (char)(imm) + ((char)(imm)>0x2 ?  77 : 29), \
+      ((char)(imm)&0xF0) ?  94 : (char)(imm) + ((char)(imm)>0x1 ?  78 : 30), \
+      ((char)(imm)&0xF0) ?  95 : (char)(imm) + ((char)(imm)>0x0 ?  79 : 31), \
+      ((char)(imm)&0xF0) ?  96 : (char)(imm) + ((char)(imm)>0xF ?  80 : 32), \
+      ((char)(imm)&0xF0) ?  97 : (char)(imm) + ((char)(imm)>0xE ?  81 : 33), \
+      ((char)(imm)&0xF0) ?  98 : (char)(imm) + ((char)(imm)>0xD ?  82 : 34), \
+      ((char)(imm)&0xF0) ?  99 : (char)(imm) + ((char)(imm)>0xC ?  83 : 35), \
+      ((char)(imm)&0xF0) ? 100 : (char)(imm) + ((char)(imm)>0xB ?  84 : 36), \
+      ((char)(imm)&0xF0) ? 101 : (char)(imm) + ((char)(imm)>0xA ?  85 : 37), \
+      ((char)(imm)&0xF0) ? 102 : (char)(imm) + ((char)(imm)>0x9 ?  86 : 38), \
+      ((char)(imm)&0xF0) ? 103 : (char)(imm) + ((char)(imm)>0x8 ?  87 : 39), \
+      ((char)(imm)&0xF0) ? 104 : (char)(imm) + ((char)(imm)>0x7 ?  88 : 40), \
+      ((char)(imm)&0xF0) ? 105 : (char)(imm) + ((char)(imm)>0x6 ?  89 : 41), \
+      ((char)(imm)&0xF0) ? 106 : (char)(imm) + ((char)(imm)>0x5 ?  90 : 42), \
+      ((char)(imm)&0xF0) ? 107 : (char)(imm) + ((char)(imm)>0x4 ?  91 : 43), \
+      ((char)(imm)&0xF0) ? 108 : (char)(imm) + ((char)(imm)>0x3 ?  92 : 44), \
+      ((char)(imm)&0xF0) ? 109 : (char)(imm) + ((char)(imm)>0x2 ?  93 : 45), \
+      ((char)(imm)&0xF0) ? 110 : (char)(imm) + ((char)(imm)>0x1 ?  94 : 46), \
+      ((char)(imm)&0xF0) ? 111 : (char)(imm) + ((char)(imm)>0x0 ?  95 : 47), \
+      ((char)(imm)&0xF0) ? 112 : (char)(imm) + ((char)(imm)>0xF ?  96 : 48), \
+      ((char)(imm)&0xF0) ? 113 : (char)(imm) + ((char)(imm)>0xE ?  97 : 49), \
+      ((char)(imm)&0xF0) ? 114 : (char)(imm) + ((char)(imm)>0xD ?  98 : 50), \
+      ((char)(imm)&0xF0) ? 115 : (char)(imm) + ((char)(imm)>0xC ?  99 : 51), \
+      ((char)(imm)&0xF0) ? 116 : (char)(imm) + ((char)(imm)>0xB ? 100 : 52), \
+      ((char)(imm)&0xF0) ? 117 : (char)(imm) + ((char)(imm)>0xA ? 101 : 53), \
+      ((char)(imm)&0xF0) ? 118 : (char)(imm) + ((char)(imm)>0x9 ? 102 : 54), \
+      ((char)(imm)&0xF0) ? 119 : (char)(imm) + ((char)(imm)>0x8 ? 103 : 55), \
+      ((char)(imm)&0xF0) ? 120 : (char)(imm) + ((char)(imm)>0x7 ? 104 : 56), \
+      ((char)(imm)&0xF0) ? 121 : (char)(imm) + ((char)(imm)>0x6 ? 105 : 57), \
+      ((char)(imm)&0xF0) ? 122 : (char)(imm) + ((char)(imm)>0x5 ? 106 : 58), \
+      ((char)(imm)&0xF0) ? 123 : (char)(imm) + ((char)(imm)>0x4 ? 107 : 59), \
+      ((char)(imm)&0xF0) ? 124 : (char)(imm) + ((char)(imm)>0x3 ? 108 : 60), \
+      ((char)(imm)&0xF0) ? 125 : (char)(imm) + ((char)(imm)>0x2 ? 109 : 61), \
+      ((char)(imm)&0xF0) ? 126 : (char)(imm) + ((char)(imm)>0x1 ? 110 : 62), \
+      ((char)(imm)&0xF0) ? 127 : (char)(imm) + ((char)(imm)>0x0 ? 111 : 63)); })
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_mov_epi16 (__m512i __W, __mmask32 __U, __m512i __A)
+{
+  return (__m512i) __builtin_ia32_selectw_512 ((__mmask32) __U,
+                (__v32hi) __A,
+                (__v32hi) __W);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_mov_epi16 (__mmask32 __U, __m512i __A)
+{
+  return (__m512i) __builtin_ia32_selectw_512 ((__mmask32) __U,
+                (__v32hi) __A,
+                (__v32hi) _mm512_setzero_hi ());
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_mov_epi8 (__m512i __W, __mmask64 __U, __m512i __A)
+{
+  return (__m512i) __builtin_ia32_selectb_512 ((__mmask64) __U,
+                (__v64qi) __A,
+                (__v64qi) __W);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_mov_epi8 (__mmask64 __U, __m512i __A)
+{
+  return (__m512i) __builtin_ia32_selectb_512 ((__mmask64) __U,
+                (__v64qi) __A,
+                (__v64qi) _mm512_setzero_hi ());
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_set1_epi8 (__m512i __O, __mmask64 __M, char __A)
+{
+  return (__m512i) __builtin_ia32_pbroadcastb512_gpr_mask (__A,
+                 (__v64qi) __O,
+                 __M);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_set1_epi8 (__mmask64 __M, char __A)
+{
+  return (__m512i) __builtin_ia32_pbroadcastb512_gpr_mask (__A,
+                 (__v64qi)
+                 _mm512_setzero_qi(),
+                 __M);
+}
+
+static __inline__ __mmask64 __DEFAULT_FN_ATTRS
+_mm512_kunpackd (__mmask64 __A, __mmask64 __B)
+{
+  return (__mmask64) __builtin_ia32_kunpckdi ((__mmask64) __A,
+                (__mmask64) __B);
+}
+
+static __inline__ __mmask32 __DEFAULT_FN_ATTRS
+_mm512_kunpackw (__mmask32 __A, __mmask32 __B)
+{
+  return (__mmask32) __builtin_ia32_kunpcksi ((__mmask32) __A,
+                (__mmask32) __B);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_loadu_epi16 (__m512i __W, __mmask32 __U, void const *__P)
+{
+  return (__m512i) __builtin_ia32_loaddquhi512_mask ((__v32hi *) __P,
+                 (__v32hi) __W,
+                 (__mmask32) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_loadu_epi16 (__mmask32 __U, void const *__P)
+{
+  return (__m512i) __builtin_ia32_loaddquhi512_mask ((__v32hi *) __P,
+                 (__v32hi)
+                 _mm512_setzero_hi (),
+                 (__mmask32) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_loadu_epi8 (__m512i __W, __mmask64 __U, void const *__P)
+{
+  return (__m512i) __builtin_ia32_loaddquqi512_mask ((__v64qi *) __P,
+                 (__v64qi) __W,
+                 (__mmask64) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_loadu_epi8 (__mmask64 __U, void const *__P)
+{
+  return (__m512i) __builtin_ia32_loaddquqi512_mask ((__v64qi *) __P,
+                 (__v64qi)
+                 _mm512_setzero_hi (),
+                 (__mmask64) __U);
+}
+static __inline__ void __DEFAULT_FN_ATTRS
+_mm512_mask_storeu_epi16 (void *__P, __mmask32 __U, __m512i __A)
+{
+  __builtin_ia32_storedquhi512_mask ((__v32hi *) __P,
+             (__v32hi) __A,
+             (__mmask32) __U);
+}
+
+static __inline__ void __DEFAULT_FN_ATTRS
+_mm512_mask_storeu_epi8 (void *__P, __mmask64 __U, __m512i __A)
+{
+  __builtin_ia32_storedquqi512_mask ((__v64qi *) __P,
+             (__v64qi) __A,
+             (__mmask64) __U);
+}
+
+static __inline__ __mmask64 __DEFAULT_FN_ATTRS
+_mm512_test_epi8_mask (__m512i __A, __m512i __B)
+{
+  return (__mmask64) __builtin_ia32_ptestmb512 ((__v64qi) __A,
+            (__v64qi) __B,
+            (__mmask64) -1);
+}
+
+static __inline__ __mmask64 __DEFAULT_FN_ATTRS
+_mm512_mask_test_epi8_mask (__mmask64 __U, __m512i __A, __m512i __B)
+{
+  return (__mmask64) __builtin_ia32_ptestmb512 ((__v64qi) __A,
+            (__v64qi) __B, __U);
+}
+
+static __inline__ __mmask32 __DEFAULT_FN_ATTRS
+_mm512_test_epi16_mask (__m512i __A, __m512i __B)
+{
+  return (__mmask32) __builtin_ia32_ptestmw512 ((__v32hi) __A,
+            (__v32hi) __B,
+            (__mmask32) -1);
+}
+
+static __inline__ __mmask32 __DEFAULT_FN_ATTRS
+_mm512_mask_test_epi16_mask (__mmask32 __U, __m512i __A, __m512i __B)
+{
+  return (__mmask32) __builtin_ia32_ptestmw512 ((__v32hi) __A,
+            (__v32hi) __B, __U);
+}
+
+static __inline__ __mmask64 __DEFAULT_FN_ATTRS
+_mm512_testn_epi8_mask (__m512i __A, __m512i __B)
+{
+  return (__mmask64) __builtin_ia32_ptestnmb512 ((__v64qi) __A,
+             (__v64qi) __B,
+             (__mmask64) -1);
+}
+
+static __inline__ __mmask64 __DEFAULT_FN_ATTRS
+_mm512_mask_testn_epi8_mask (__mmask64 __U, __m512i __A, __m512i __B)
+{
+  return (__mmask64) __builtin_ia32_ptestnmb512 ((__v64qi) __A,
+             (__v64qi) __B, __U);
+}
+
+static __inline__ __mmask32 __DEFAULT_FN_ATTRS
+_mm512_testn_epi16_mask (__m512i __A, __m512i __B)
+{
+  return (__mmask32) __builtin_ia32_ptestnmw512 ((__v32hi) __A,
+             (__v32hi) __B,
+             (__mmask32) -1);
+}
+
+static __inline__ __mmask32 __DEFAULT_FN_ATTRS
+_mm512_mask_testn_epi16_mask (__mmask32 __U, __m512i __A, __m512i __B)
+{
+  return (__mmask32) __builtin_ia32_ptestnmw512 ((__v32hi) __A,
+             (__v32hi) __B, __U);
+}
+
+static __inline__ __mmask64 __DEFAULT_FN_ATTRS
+_mm512_movepi8_mask (__m512i __A)
+{
+  return (__mmask64) __builtin_ia32_cvtb2mask512 ((__v64qi) __A);
+}
+
+static __inline__ __mmask32 __DEFAULT_FN_ATTRS
+_mm512_movepi16_mask (__m512i __A)
+{
+  return (__mmask32) __builtin_ia32_cvtw2mask512 ((__v32hi) __A);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_movm_epi8 (__mmask64 __A)
+{
+  return (__m512i) __builtin_ia32_cvtmask2b512 (__A);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_movm_epi16 (__mmask32 __A)
+{
+  return (__m512i) __builtin_ia32_cvtmask2w512 (__A);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_broadcastb_epi8 (__m128i __A)
+{
+  return (__m512i)__builtin_shufflevector((__v16qi) __A,
+                                          (__v16qi)_mm_undefined_si128(),
+                                          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                                          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                                          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                                          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_broadcastb_epi8 (__m512i __O, __mmask64 __M, __m128i __A)
+{
+  return (__m512i)__builtin_ia32_selectb_512(__M,
+                                             (__v64qi) _mm512_broadcastb_epi8(__A),
+                                             (__v64qi) __O);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_broadcastb_epi8 (__mmask64 __M, __m128i __A)
+{
+  return (__m512i)__builtin_ia32_selectb_512(__M,
+                                             (__v64qi) _mm512_broadcastb_epi8(__A),
+                                             (__v64qi) _mm512_setzero_si512());
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_set1_epi16 (__m512i __O, __mmask32 __M, short __A)
+{
+  return (__m512i) __builtin_ia32_pbroadcastw512_gpr_mask (__A,
+                 (__v32hi) __O,
+                 __M);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_set1_epi16 (__mmask32 __M, short __A)
+{
+  return (__m512i) __builtin_ia32_pbroadcastw512_gpr_mask (__A,
+                 (__v32hi) _mm512_setzero_hi(),
+                 __M);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_broadcastw_epi16 (__m128i __A)
+{
+  return (__m512i)__builtin_shufflevector((__v8hi) __A,
+                                          (__v8hi)_mm_undefined_si128(),
+                                          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                                          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_broadcastw_epi16 (__m512i __O, __mmask32 __M, __m128i __A)
+{
+  return (__m512i)__builtin_ia32_selectw_512(__M,
+                                             (__v32hi) _mm512_broadcastw_epi16(__A),
+                                             (__v32hi) __O);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_broadcastw_epi16 (__mmask32 __M, __m128i __A)
+{
+  return (__m512i)__builtin_ia32_selectw_512(__M,
+                                             (__v32hi) _mm512_broadcastw_epi16(__A),
+                                             (__v32hi) _mm512_setzero_si512());
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_permutexvar_epi16 (__m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_permvarhi512_mask ((__v32hi) __B,
+                 (__v32hi) __A,
+                 (__v32hi) _mm512_undefined_epi32 (),
+                 (__mmask32) -1);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_permutexvar_epi16 (__mmask32 __M, __m512i __A,
+        __m512i __B)
+{
+  return (__m512i) __builtin_ia32_permvarhi512_mask ((__v32hi) __B,
+                 (__v32hi) __A,
+                 (__v32hi) _mm512_setzero_hi(),
+                 (__mmask32) __M);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_permutexvar_epi16 (__m512i __W, __mmask32 __M, __m512i __A,
+             __m512i __B)
+{
+  return (__m512i) __builtin_ia32_permvarhi512_mask ((__v32hi) __B,
+                 (__v32hi) __A,
+                 (__v32hi) __W,
+                 (__mmask32) __M);
+}
+
+#define _mm512_alignr_epi8(A, B, N) __extension__ ({\
+  (__m512i)__builtin_ia32_palignr512_mask((__v64qi)(__m512i)(A), \
+                                          (__v64qi)(__m512i)(B), (int)(N), \
+                                          (__v64qi)_mm512_undefined_pd(), \
+                                          (__mmask64)-1); })
+
+#define _mm512_mask_alignr_epi8(W, U, A, B, N) __extension__({\
+  (__m512i)__builtin_ia32_palignr512_mask((__v64qi)(__m512i)(A), \
+                                          (__v64qi)(__m512i)(B), (int)(N), \
+                                          (__v64qi)(__m512i)(W), \
+                                          (__mmask64)(U)); })
+
+#define _mm512_maskz_alignr_epi8(U, A, B, N) __extension__({\
+  (__m512i)__builtin_ia32_palignr512_mask((__v64qi)(__m512i)(A), \
+                                          (__v64qi)(__m512i)(B), (int)(N), \
+                                          (__v64qi)_mm512_setzero_si512(), \
+                                          (__mmask64)(U)); })
+
+#define _mm512_dbsad_epu8(A, B, imm) __extension__ ({\
+  (__m512i)__builtin_ia32_dbpsadbw512_mask((__v64qi)(__m512i)(A), \
+                                           (__v64qi)(__m512i)(B), (int)(imm), \
+                                           (__v32hi)_mm512_undefined_epi32(), \
+                                           (__mmask32)-1); })
+
+#define _mm512_mask_dbsad_epu8(W, U, A, B, imm) ({\
+  (__m512i)__builtin_ia32_dbpsadbw512_mask((__v64qi)(__m512i)(A), \
+                                           (__v64qi)(__m512i)(B), (int)(imm), \
+                                           (__v32hi)(__m512i)(W), \
+                                           (__mmask32)(U)); })
+
+#define _mm512_maskz_dbsad_epu8(U, A, B, imm) ({\
+  (__m512i)__builtin_ia32_dbpsadbw512_mask((__v64qi)(__m512i)(A), \
+                                           (__v64qi)(__m512i)(B), (int)(imm), \
+                                           (__v32hi)_mm512_setzero_hi(), \
+                                           (__mmask32)(U)); })
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_sad_epu8 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_psadbw512 ((__v64qi) __A,
+               (__v64qi) __B);
+}
+
 
 
 #undef __DEFAULT_FN_ATTRS
diff --git a/contrib/llvm/tools/clang/lib/Headers/avx512cdintrin.h b/contrib/llvm/tools/clang/lib/Headers/avx512cdintrin.h
index 3894b29f5725..23c423584a7a 100644
--- a/contrib/llvm/tools/clang/lib/Headers/avx512cdintrin.h
+++ b/contrib/llvm/tools/clang/lib/Headers/avx512cdintrin.h
@@ -126,6 +126,19 @@ _mm512_maskz_lzcnt_epi64 (__mmask8 __U, __m512i __A)
              (__v8di) _mm512_setzero_si512 (),
              (__mmask8) __U);
 }
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_broadcastmb_epi64 (__mmask8 __A)
+{
+  return (__m512i) __builtin_ia32_broadcastmb512 (__A);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_broadcastmw_epi32 (__mmask16 __A)
+{
+  return (__m512i) __builtin_ia32_broadcastmw512 (__A);
+}
+
 #undef __DEFAULT_FN_ATTRS
 
 #endif
diff --git a/contrib/llvm/tools/clang/lib/Headers/avx512dqintrin.h b/contrib/llvm/tools/clang/lib/Headers/avx512dqintrin.h
index afee4903ba77..13665e4c6668 100644
--- a/contrib/llvm/tools/clang/lib/Headers/avx512dqintrin.h
+++ b/contrib/llvm/tools/clang/lib/Headers/avx512dqintrin.h
@@ -33,7 +33,7 @@
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_mullo_epi64 (__m512i __A, __m512i __B) {
-  return (__m512i) ((__v8di) __A * (__v8di) __B);
+  return (__m512i) ((__v8du) __A * (__v8du) __B);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
@@ -55,7 +55,7 @@ _mm512_maskz_mullo_epi64 (__mmask8 __U, __m512i __A, __m512i __B) {
 
 static __inline__ __m512d __DEFAULT_FN_ATTRS
 _mm512_xor_pd (__m512d __A, __m512d __B) {
-  return (__m512d) ((__v8di) __A ^ (__v8di) __B);
+  return (__m512d) ((__v8du) __A ^ (__v8du) __B);
 }
 
 static __inline__ __m512d __DEFAULT_FN_ATTRS
@@ -77,7 +77,7 @@ _mm512_maskz_xor_pd (__mmask8 __U, __m512d __A, __m512d __B) {
 
 static __inline__ __m512 __DEFAULT_FN_ATTRS
 _mm512_xor_ps (__m512 __A, __m512 __B) {
-  return (__m512) ((__v16si) __A ^ (__v16si) __B);
+  return (__m512) ((__v16su) __A ^ (__v16su) __B);
 }
 
 static __inline__ __m512 __DEFAULT_FN_ATTRS
@@ -99,7 +99,7 @@ _mm512_maskz_xor_ps (__mmask16 __U, __m512 __A, __m512 __B) {
 
 static __inline__ __m512d __DEFAULT_FN_ATTRS
 _mm512_or_pd (__m512d __A, __m512d __B) {
-  return (__m512d) ((__v8di) __A | (__v8di) __B);
+  return (__m512d) ((__v8du) __A | (__v8du) __B);
 }
 
 static __inline__ __m512d __DEFAULT_FN_ATTRS
@@ -121,7 +121,7 @@ _mm512_maskz_or_pd (__mmask8 __U, __m512d __A, __m512d __B) {
 
 static __inline__ __m512 __DEFAULT_FN_ATTRS
 _mm512_or_ps (__m512 __A, __m512 __B) {
-  return (__m512) ((__v16si) __A | (__v16si) __B);
+  return (__m512) ((__v16su) __A | (__v16su) __B);
 }
 
 static __inline__ __m512 __DEFAULT_FN_ATTRS
@@ -143,7 +143,7 @@ _mm512_maskz_or_ps (__mmask16 __U, __m512 __A, __m512 __B) {
 
 static __inline__ __m512d __DEFAULT_FN_ATTRS
 _mm512_and_pd (__m512d __A, __m512d __B) {
-  return (__m512d) ((__v8di) __A & (__v8di) __B);
+  return (__m512d) ((__v8du) __A & (__v8du) __B);
 }
 
 static __inline__ __m512d __DEFAULT_FN_ATTRS
@@ -165,7 +165,7 @@ _mm512_maskz_and_pd (__mmask8 __U, __m512d __A, __m512d __B) {
 
 static __inline__ __m512 __DEFAULT_FN_ATTRS
 _mm512_and_ps (__m512 __A, __m512 __B) {
-  return (__m512) ((__v16si) __A & (__v16si) __B);
+  return (__m512) ((__v16su) __A & (__v16su) __B);
 }
 
 static __inline__ __m512 __DEFAULT_FN_ATTRS
@@ -261,17 +261,20 @@ _mm512_maskz_cvtpd_epi64 (__mmask8 __U, __m512d __A) {
                 _MM_FROUND_CUR_DIRECTION);
 }
 
-#define _mm512_cvt_roundpd_epi64(__A, __R) __extension__ ({              \
-  (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A,               \
-                (__v8di) _mm512_setzero_si512(), (__mmask8) -1, __R);})
+#define _mm512_cvt_roundpd_epi64(A, R) __extension__ ({              \
+  (__m512i)__builtin_ia32_cvtpd2qq512_mask((__v8df)(__m512d)(A), \
+                                           (__v8di)_mm512_setzero_si512(), \
+                                           (__mmask8)-1, (int)(R)); })
 
-#define _mm512_mask_cvt_roundpd_epi64(__W, __U, __A, __R) __extension__ ({ \
-  (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A,                 \
-                (__v8di) __W, (__mmask8) __U, __R);})
+#define _mm512_mask_cvt_roundpd_epi64(W, U, A, R) __extension__ ({ \
+  (__m512i)__builtin_ia32_cvtpd2qq512_mask((__v8df)(__m512d)(A), \
+                                           (__v8di)(__m512i)(W), \
+                                           (__mmask8)(U), (int)(R)); })
 
-#define _mm512_maskz_cvt_roundpd_epi64(__U, __A, __R) __extension__ ({   \
-  (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A,        \
-                (__v8di) _mm512_setzero_si512(), (__mmask8) __U, __R); })
+#define _mm512_maskz_cvt_roundpd_epi64(U, A, R) __extension__ ({   \
+  (__m512i)__builtin_ia32_cvtpd2qq512_mask((__v8df)(__m512d)(A), \
+                                           (__v8di)_mm512_setzero_si512(), \
+                                           (__mmask8)(U), (int)(R)); })
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_cvtpd_epu64 (__m512d __A) {
@@ -297,17 +300,20 @@ _mm512_maskz_cvtpd_epu64 (__mmask8 __U, __m512d __A) {
                  _MM_FROUND_CUR_DIRECTION);
 }
 
-#define _mm512_cvt_roundpd_epu64(__A, __R) __extension__ ({               \
-  (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A,               \
-                 (__v8di) _mm512_setzero_si512(), (__mmask8) -1, __R);})
+#define _mm512_cvt_roundpd_epu64(A, R) __extension__ ({               \
+  (__m512i)__builtin_ia32_cvtpd2uqq512_mask((__v8df)(__m512d)(A), \
+                                            (__v8di)_mm512_setzero_si512(), \
+                                            (__mmask8)-1, (int)(R)); })
 
-#define _mm512_mask_cvt_roundpd_epu64(__W, __U, __A, __R) __extension__ ({ \
-  (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A,                \
-                 (__v8di) __W, (__mmask8) __U, __R);})
+#define _mm512_mask_cvt_roundpd_epu64(W, U, A, R) __extension__ ({ \
+  (__m512i)__builtin_ia32_cvtpd2uqq512_mask((__v8df)(__m512d)(A), \
+                                            (__v8di)(__m512i)(W), \
+                                            (__mmask8)(U), (int)(R)); })
 
-#define _mm512_maskz_cvt_roundpd_epu64(__U, __A, __R) __extension__ ({     \
-  (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A,                \
-                 (__v8di) _mm512_setzero_si512(), (__mmask8) __U, __R);})
+#define _mm512_maskz_cvt_roundpd_epu64(U, A, R) __extension__ ({     \
+  (__m512i)__builtin_ia32_cvtpd2uqq512_mask((__v8df)(__m512d)(A), \
+                                            (__v8di)_mm512_setzero_si512(), \
+                                            (__mmask8)(U), (int)(R)); })
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_cvtps_epi64 (__m256 __A) {
@@ -333,17 +339,20 @@ _mm512_maskz_cvtps_epi64 (__mmask8 __U, __m256 __A) {
                 _MM_FROUND_CUR_DIRECTION);
 }
 
-#define _mm512_cvt_roundps_epi64(__A, __R) __extension__ ({             \
-  (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A,              \
-                (__v8di) _mm512_setzero_si512(), (__mmask8) -1, __R);})
+#define _mm512_cvt_roundps_epi64(A, R) __extension__ ({             \
+  (__m512i)__builtin_ia32_cvtps2qq512_mask((__v8sf)(__m256)(A), \
+                                           (__v8di)_mm512_setzero_si512(), \
+                                           (__mmask8)-1, (int)(R)); })
 
-#define _mm512_mask_cvt_roundps_epi64(__W, __U, __A, __R) __extension__ ({ \
-  (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A,                 \
-                (__v8di) __W, (__mmask8) __U, __R);})
+#define _mm512_mask_cvt_roundps_epi64(W, U, A, R) __extension__ ({ \
+  (__m512i)__builtin_ia32_cvtps2qq512_mask((__v8sf)(__m256)(A), \
+                                           (__v8di)(__m512i)(W), \
+                                           (__mmask8)(U), (int)(R)); })
 
-#define _mm512_maskz_cvt_roundps_epi64(__U, __A, __R) __extension__ ({   \
-  (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A,               \
-                (__v8di) _mm512_setzero_si512(), (__mmask8) __U, __R);})
+#define _mm512_maskz_cvt_roundps_epi64(U, A, R) __extension__ ({   \
+  (__m512i)__builtin_ia32_cvtps2qq512_mask((__v8sf)(__m256)(A), \
+                                           (__v8di)_mm512_setzero_si512(), \
+                                           (__mmask8)(U), (int)(R)); })
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_cvtps_epu64 (__m256 __A) {
@@ -369,17 +378,20 @@ _mm512_maskz_cvtps_epu64 (__mmask8 __U, __m256 __A) {
                  _MM_FROUND_CUR_DIRECTION);
 }
 
-#define _mm512_cvt_roundps_epu64(__A, __R) __extension__ ({              \
-  (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A,              \
-                 (__v8di) _mm512_setzero_si512(), (__mmask8) -1, __R);})
+#define _mm512_cvt_roundps_epu64(A, R) __extension__ ({              \
+  (__m512i)__builtin_ia32_cvtps2uqq512_mask((__v8sf)(__m256)(A), \
+                                            (__v8di)_mm512_setzero_si512(), \
+                                            (__mmask8)-1, (int)(R)); })
 
-#define _mm512_mask_cvt_roundps_epu64(__W, __U, __A, __R) __extension__ ({ \
-  (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A,                \
-                 (__v8di) __W, (__mmask8) __U, __R);})
+#define _mm512_mask_cvt_roundps_epu64(W, U, A, R) __extension__ ({ \
+  (__m512i)__builtin_ia32_cvtps2uqq512_mask((__v8sf)(__m256)(A), \
+                                            (__v8di)(__m512i)(W), \
+                                            (__mmask8)(U), (int)(R)); })
 
-#define _mm512_maskz_cvt_roundps_epu64(__U, __A, __R) __extension__ ({   \
-  (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A,              \
-                 (__v8di) _mm512_setzero_si512(), (__mmask8) __U, __R);})
+#define _mm512_maskz_cvt_roundps_epu64(U, A, R) __extension__ ({   \
+  (__m512i)__builtin_ia32_cvtps2uqq512_mask((__v8sf)(__m256)(A), \
+                                            (__v8di)_mm512_setzero_si512(), \
+                                            (__mmask8)(U), (int)(R)); })
 
 
 static __inline__ __m512d __DEFAULT_FN_ATTRS
@@ -406,17 +418,20 @@ _mm512_maskz_cvtepi64_pd (__mmask8 __U, __m512i __A) {
                 _MM_FROUND_CUR_DIRECTION);
 }
 
-#define _mm512_cvt_roundepi64_pd(__A, __R) __extension__ ({          \
-  (__m512d) __builtin_ia32_cvtqq2pd512_mask ((__v8di) __A,           \
-                (__v8df) _mm512_setzero_pd(), (__mmask8) -1, __R);})
+#define _mm512_cvt_roundepi64_pd(A, R) __extension__ ({          \
+  (__m512d)__builtin_ia32_cvtqq2pd512_mask((__v8di)(__m512i)(A), \
+                                           (__v8df)_mm512_setzero_pd(), \
+                                           (__mmask8)-1, (int)(R)); })
 
-#define _mm512_mask_cvt_roundepi64_pd(__W, __U, __A, __R) __extension__ ({ \
-  (__m512d) __builtin_ia32_cvtqq2pd512_mask ((__v8di) __A,                 \
-                (__v8df) __W, (__mmask8) __U, __R);})
+#define _mm512_mask_cvt_roundepi64_pd(W, U, A, R) __extension__ ({ \
+  (__m512d)__builtin_ia32_cvtqq2pd512_mask((__v8di)(__m512i)(A), \
+                                           (__v8df)(__m512d)(W), \
+                                           (__mmask8)(U), (int)(R)); })
 
-#define _mm512_maskz_cvt_roundepi64_pd(__U, __A, __R) __extension__ ({ \
-  (__m512d) __builtin_ia32_cvtqq2pd512_mask ((__v8di) __A,             \
-                (__v8df) _mm512_setzero_pd(), (__mmask8) __U, __R);})
+#define _mm512_maskz_cvt_roundepi64_pd(U, A, R) __extension__ ({ \
+  (__m512d)__builtin_ia32_cvtqq2pd512_mask((__v8di)(__m512i)(A), \
+                                           (__v8df)_mm512_setzero_pd(), \
+                                           (__mmask8)(U), (int)(R)); })
 
 static __inline__ __m256 __DEFAULT_FN_ATTRS
 _mm512_cvtepi64_ps (__m512i __A) {
@@ -442,17 +457,20 @@ _mm512_maskz_cvtepi64_ps (__mmask8 __U, __m512i __A) {
                _MM_FROUND_CUR_DIRECTION);
 }
 
-#define _mm512_cvt_roundepi64_ps(__A, __R) __extension__ ({        \
-  (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A,          \
-               (__v8sf) _mm256_setzero_ps(), (__mmask8) -1, __R);})
+#define _mm512_cvt_roundepi64_ps(A, R) __extension__ ({        \
+  (__m256)__builtin_ia32_cvtqq2ps512_mask((__v8di)(__m512i)(A), \
+                                          (__v8sf)_mm256_setzero_ps(), \
+                                          (__mmask8)-1, (int)(R)); })
 
-#define _mm512_mask_cvt_roundepi64_ps(__W, __U, __A, __R) __extension__ ({ \
-  (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A,                  \
-               (__v8sf) __W, (__mmask8) __U, __R);})
+#define _mm512_mask_cvt_roundepi64_ps(W, U, A, R) __extension__ ({ \
+  (__m256)__builtin_ia32_cvtqq2ps512_mask((__v8di)(__m512i)(A), \
+                                          (__v8sf)(__m256)(W), (__mmask8)(U), \
+                                          (int)(R)); })
 
-#define _mm512_maskz_cvt_roundepi64_ps(__U, __A, __R) __extension__ ({ \
-  (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A,              \
-               (__v8sf) _mm256_setzero_ps(), (__mmask8) __U, __R);})
+#define _mm512_maskz_cvt_roundepi64_ps(U, A, R) __extension__ ({ \
+  (__m256)__builtin_ia32_cvtqq2ps512_mask((__v8di)(__m512i)(A), \
+                                          (__v8sf)_mm256_setzero_ps(), \
+                                          (__mmask8)(U), (int)(R)); })
 
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
@@ -479,17 +497,20 @@ _mm512_maskz_cvttpd_epi64 (__mmask8 __U, __m512d __A) {
                  _MM_FROUND_CUR_DIRECTION);
 }
 
-#define _mm512_cvtt_roundpd_epi64(__A, __R) __extension__ ({             \
-  (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A,              \
-                 (__v8di) _mm512_setzero_si512(), (__mmask8) -1, __R);})
+#define _mm512_cvtt_roundpd_epi64(A, R) __extension__ ({             \
+  (__m512i)__builtin_ia32_cvttpd2qq512_mask((__v8df)(__m512d)(A), \
+                                            (__v8di)_mm512_setzero_si512(), \
+                                            (__mmask8)-1, (int)(R)); })
 
-#define _mm512_mask_cvtt_roundpd_epi64(__W, __U, __A, __R) __extension__ ({ \
-  (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A,                 \
-                 (__v8di) __W, (__mmask8) __U, __R);})
+#define _mm512_mask_cvtt_roundpd_epi64(W, U, A, R) __extension__ ({ \
+  (__m512i)__builtin_ia32_cvttpd2qq512_mask((__v8df)(__m512d)(A), \
+                                            (__v8di)(__m512i)(W), \
+                                            (__mmask8)(U), (int)(R)); })
 
-#define _mm512_maskz_cvtt_roundpd_epi64(__U, __A, __R) __extension__ ({ \
-  (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A,             \
-                 (__v8di) _mm512_setzero_si512(), (__mmask8) __U, __R);})
+#define _mm512_maskz_cvtt_roundpd_epi64(U, A, R) __extension__ ({ \
+  (__m512i)__builtin_ia32_cvttpd2qq512_mask((__v8df)(__m512d)(A), \
+                                            (__v8di)_mm512_setzero_si512(), \
+                                            (__mmask8)(U), (int)(R)); })
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_cvttpd_epu64 (__m512d __A) {
@@ -515,17 +536,20 @@ _mm512_maskz_cvttpd_epu64 (__mmask8 __U, __m512d __A) {
                   _MM_FROUND_CUR_DIRECTION);
 }
 
-#define _mm512_cvtt_roundpd_epu64(__A, __R) __extension__ ({              \
-  (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A,              \
-                  (__v8di) _mm512_setzero_si512(), (__mmask8) -1, __R);})
+#define _mm512_cvtt_roundpd_epu64(A, R) __extension__ ({              \
+  (__m512i)__builtin_ia32_cvttpd2uqq512_mask((__v8df)(__m512d)(A), \
+                                             (__v8di)_mm512_setzero_si512(), \
+                                             (__mmask8)-1, (int)(R)); })
 
-#define _mm512_mask_cvtt_roundpd_epu64(__W, __U, __A, __R) __extension__ ({ \
-  (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A,                \
-                  (__v8di) __W, (__mmask8) __U, __R);})
+#define _mm512_mask_cvtt_roundpd_epu64(W, U, A, R) __extension__ ({ \
+  (__m512i)__builtin_ia32_cvttpd2uqq512_mask((__v8df)(__m512d)(A), \
+                                             (__v8di)(__m512i)(W), \
+                                             (__mmask8)(U), (int)(R)); })
 
-#define _mm512_maskz_cvtt_roundpd_epu64(__U, __A, __R) __extension__ ({   \
-  (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A,              \
-                  (__v8di) _mm512_setzero_si512(), (__mmask8) __U, __R);})
+#define _mm512_maskz_cvtt_roundpd_epu64(U, A, R) __extension__ ({   \
+  (__m512i)__builtin_ia32_cvttpd2uqq512_mask((__v8df)(__m512d)(A), \
+                                             (__v8di)_mm512_setzero_si512(), \
+                                             (__mmask8)(U), (int)(R)); })
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_cvttps_epi64 (__m256 __A) {
@@ -551,17 +575,20 @@ _mm512_maskz_cvttps_epi64 (__mmask8 __U, __m256 __A) {
                  _MM_FROUND_CUR_DIRECTION);
 }
 
-#define _mm512_cvtt_roundps_epi64(__A, __R) __extension__ ({            \
-  (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A,             \
-                 (__v8di) _mm512_setzero_si512(), (__mmask8) -1, __R);})
+#define _mm512_cvtt_roundps_epi64(A, R) __extension__ ({            \
+  (__m512i)__builtin_ia32_cvttps2qq512_mask((__v8sf)(__m256)(A), \
+                                            (__v8di)_mm512_setzero_si512(), \
+                                            (__mmask8)-1, (int)(R)); })
 
-#define _mm512_mask_cvtt_roundps_epi64(__W, __U, __A, __R) __extension__ ({ \
-  (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A,                 \
-                 (__v8di) __W, (__mmask8) __U, __R);})
+#define _mm512_mask_cvtt_roundps_epi64(W, U, A, R) __extension__ ({ \
+  (__m512i)__builtin_ia32_cvttps2qq512_mask((__v8sf)(__m256)(A), \
+                                            (__v8di)(__m512i)(W), \
+                                            (__mmask8)(U), (int)(R)); })
 
-#define _mm512_maskz_cvtt_roundps_epi64(__U, __A, __R) __extension__ ({  \
-  (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A,              \
-                 (__v8di) _mm512_setzero_si512(), (__mmask8) __U, __R);})
+#define _mm512_maskz_cvtt_roundps_epi64(U, A, R) __extension__ ({  \
+  (__m512i)__builtin_ia32_cvttps2qq512_mask((__v8sf)(__m256)(A), \
+                                            (__v8di)_mm512_setzero_si512(), \
+                                            (__mmask8)(U), (int)(R)); })
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_cvttps_epu64 (__m256 __A) {
@@ -587,17 +614,20 @@ _mm512_maskz_cvttps_epu64 (__mmask8 __U, __m256 __A) {
                   _MM_FROUND_CUR_DIRECTION);
 }
 
-#define _mm512_cvtt_roundps_epu64(__A, __R) __extension__ ({            \
-  (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A,            \
-                  (__v8di) _mm512_setzero_si512(),(__mmask8) -1, __R);})
+#define _mm512_cvtt_roundps_epu64(A, R) __extension__ ({            \
+  (__m512i)__builtin_ia32_cvttps2uqq512_mask((__v8sf)(__m256)(A), \
+                                             (__v8di)_mm512_setzero_si512(), \
+                                             (__mmask8)-1, (int)(R)); })
 
-#define _mm512_mask_cvtt_roundps_epu64(__W, __U, __A, __R) __extension__ ({ \
-  (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A,                \
-                  (__v8di) __W, (__mmask8) __U, __R);})
+#define _mm512_mask_cvtt_roundps_epu64(W, U, A, R) __extension__ ({ \
+  (__m512i)__builtin_ia32_cvttps2uqq512_mask((__v8sf)(__m256)(A), \
+                                             (__v8di)(__m512i)(W), \
+                                             (__mmask8)(U), (int)(R)); })
 
-#define _mm512_maskz_cvtt_roundps_epu64(__U, __A, __R) __extension__ ({  \
-  (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A,             \
-                  (__v8di) _mm512_setzero_si512(), (__mmask8) __U, __R);})
+#define _mm512_maskz_cvtt_roundps_epu64(U, A, R) __extension__ ({  \
+  (__m512i)__builtin_ia32_cvttps2uqq512_mask((__v8sf)(__m256)(A), \
+                                             (__v8di)_mm512_setzero_si512(), \
+                                             (__mmask8)(U), (int)(R)); })
 
 static __inline__ __m512d __DEFAULT_FN_ATTRS
 _mm512_cvtepu64_pd (__m512i __A) {
@@ -623,18 +653,21 @@ _mm512_maskz_cvtepu64_pd (__mmask8 __U, __m512i __A) {
                  _MM_FROUND_CUR_DIRECTION);
 }
 
-#define _mm512_cvt_roundepu64_pd(__A, __R) __extension__ ({          \
-  (__m512d) __builtin_ia32_cvtuqq2pd512_mask ((__v8di) __A,          \
-                 (__v8df) _mm512_setzero_pd(), (__mmask8) -1, __R);})
+#define _mm512_cvt_roundepu64_pd(A, R) __extension__ ({          \
+  (__m512d)__builtin_ia32_cvtuqq2pd512_mask((__v8di)(__m512i)(A), \
+                                            (__v8df)_mm512_setzero_pd(), \
+                                            (__mmask8)-1, (int)(R)); })
 
-#define _mm512_mask_cvt_roundepu64_pd(__W, __U, __A, __R) __extension__ ({ \
-  (__m512d) __builtin_ia32_cvtuqq2pd512_mask ((__v8di) __A,                \
-                 (__v8df) __W, (__mmask8) __U, __R);})
+#define _mm512_mask_cvt_roundepu64_pd(W, U, A, R) __extension__ ({ \
+  (__m512d)__builtin_ia32_cvtuqq2pd512_mask((__v8di)(__m512i)(A), \
+                                            (__v8df)(__m512d)(W), \
+                                            (__mmask8)(U), (int)(R)); })
 
 
-#define _mm512_maskz_cvt_roundepu64_pd(__U, __A, __R) __extension__ ({ \
-  (__m512d) __builtin_ia32_cvtuqq2pd512_mask ((__v8di) __A,            \
-                 (__v8df) _mm512_setzero_pd(), (__mmask8) __U, __R);})
+#define _mm512_maskz_cvt_roundepu64_pd(U, A, R) __extension__ ({ \
+  (__m512d)__builtin_ia32_cvtuqq2pd512_mask((__v8di)(__m512i)(A), \
+                                            (__v8df)_mm512_setzero_pd(), \
+                                            (__mmask8)(U), (int)(R)); })
 
 
 static __inline__ __m256 __DEFAULT_FN_ATTRS
@@ -661,117 +694,637 @@ _mm512_maskz_cvtepu64_ps (__mmask8 __U, __m512i __A) {
                 _MM_FROUND_CUR_DIRECTION);
 }
 
-#define _mm512_cvt_roundepu64_ps(__A, __R) __extension__ ({         \
-  (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A,          \
-                (__v8sf) _mm256_setzero_ps(), (__mmask8) -1, __R);})
+#define _mm512_cvt_roundepu64_ps(A, R) __extension__ ({         \
+  (__m256)__builtin_ia32_cvtuqq2ps512_mask((__v8di)(__m512i)(A), \
+                                           (__v8sf)_mm256_setzero_ps(), \
+                                           (__mmask8)-1, (int)(R)); })
+
+#define _mm512_mask_cvt_roundepu64_ps(W, U, A, R) __extension__ ({ \
+  (__m256)__builtin_ia32_cvtuqq2ps512_mask((__v8di)(__m512i)(A), \
+                                           (__v8sf)(__m256)(W), (__mmask8)(U), \
+                                           (int)(R)); })
+
+#define _mm512_maskz_cvt_roundepu64_ps(U, A, R) __extension__ ({ \
+  (__m256)__builtin_ia32_cvtuqq2ps512_mask((__v8di)(__m512i)(A), \
+                                           (__v8sf)_mm256_setzero_ps(), \
+                                           (__mmask8)(U), (int)(R)); })
+
+#define _mm512_range_pd(A, B, C) __extension__ ({                     \
+  (__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \
+                                          (__v8df)(__m512d)(B), (int)(C), \
+                                          (__v8df)_mm512_setzero_pd(), \
+                                          (__mmask8)-1, \
+                                          _MM_FROUND_CUR_DIRECTION); })
+
+#define _mm512_mask_range_pd(W, U, A, B, C) __extension__ ({      \
+  (__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \
+                                          (__v8df)(__m512d)(B), (int)(C), \
+                                          (__v8df)(__m512d)(W), (__mmask8)(U), \
+                                          _MM_FROUND_CUR_DIRECTION); })
+
+#define _mm512_maskz_range_pd(U, A, B, C) __extension__ ({           \
+  (__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \
+                                          (__v8df)(__m512d)(B), (int)(C), \
+                                          (__v8df)_mm512_setzero_pd(), \
+                                          (__mmask8)(U), \
+                                          _MM_FROUND_CUR_DIRECTION); })
+
+#define _mm512_range_round_pd(A, B, C, R) __extension__ ({           \
+  (__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \
+                                          (__v8df)(__m512d)(B), (int)(C), \
+                                          (__v8df)_mm512_setzero_pd(), \
+                                          (__mmask8)-1, (int)(R)); })
+
+#define _mm512_mask_range_round_pd(W, U, A, B, C, R) __extension__ ({ \
+  (__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \
+                                          (__v8df)(__m512d)(B), (int)(C), \
+                                          (__v8df)(__m512d)(W), (__mmask8)(U), \
+                                          (int)(R)); })
+
+#define _mm512_maskz_range_round_pd(U, A, B, C, R) __extension__ ({ \
+  (__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \
+                                          (__v8df)(__m512d)(B), (int)(C), \
+                                          (__v8df)_mm512_setzero_pd(), \
+                                          (__mmask8)(U), (int)(R)); })
+
+#define _mm512_range_ps(A, B, C) __extension__ ({                       \
+  (__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \
+                                         (__v16sf)(__m512)(B), (int)(C), \
+                                         (__v16sf)_mm512_setzero_ps(), \
+                                         (__mmask16)-1, \
+                                         _MM_FROUND_CUR_DIRECTION); })
+
+#define _mm512_mask_range_ps(W, U, A, B, C) __extension__ ({         \
+  (__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \
+                                         (__v16sf)(__m512)(B), (int)(C), \
+                                         (__v16sf)(__m512)(W), (__mmask16)(U), \
+                                         _MM_FROUND_CUR_DIRECTION); })
+
+#define _mm512_maskz_range_ps(U, A, B, C) __extension__ ({      \
+  (__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \
+                                         (__v16sf)(__m512)(B), (int)(C), \
+                                         (__v16sf)_mm512_setzero_ps(), \
+                                         (__mmask16)(U), \
+                                         _MM_FROUND_CUR_DIRECTION); })
+
+#define _mm512_range_round_ps(A, B, C, R) __extension__ ({         \
+  (__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \
+                                         (__v16sf)(__m512)(B), (int)(C), \
+                                         (__v16sf)_mm512_setzero_ps(), \
+                                         (__mmask16)-1, (int)(R)); })
+
+#define _mm512_mask_range_round_ps(W, U, A, B, C, R) __extension__ ({ \
+  (__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \
+                                         (__v16sf)(__m512)(B), (int)(C), \
+                                         (__v16sf)(__m512)(W), (__mmask16)(U), \
+                                         (int)(R)); })
+
+#define _mm512_maskz_range_round_ps(U, A, B, C, R) __extension__ ({ \
+  (__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \
+                                         (__v16sf)(__m512)(B), (int)(C), \
+                                         (__v16sf)_mm512_setzero_ps(), \
+                                         (__mmask16)(U), (int)(R)); })
+
+#define _mm_range_round_ss(A, B, C, R) __extension__ ({           \
+  (__m128)__builtin_ia32_rangess128_round_mask((__v4sf)(__m128)(A), \
+                                               (__v4sf)(__m128)(B), \
+                                               (__v4sf)_mm_setzero_ps(), \
+                                               (__mmask8) -1, (int)(C),\
+                                               (int)(R)); })
+
+#define _mm_range_ss(A ,B , C) _mm_range_round_ss(A, B, C ,_MM_FROUND_CUR_DIRECTION)
+
+#define _mm_mask_range_round_ss(W, U, A, B, C, R) __extension__ ({ \
+  (__m128)__builtin_ia32_rangess128_round_mask((__v4sf)(__m128)(A), \
+                                               (__v4sf)(__m128)(B), \
+                                               (__v4sf)(__m128)(W),\
+                                               (__mmask8)(U), (int)(C),\
+                                               (int)(R)); })
+
+#define _mm_mask_range_ss(W , U, A, B, C) _mm_mask_range_round_ss(W, U, A, B, C , _MM_FROUND_CUR_DIRECTION)
+
+#define _mm_maskz_range_round_ss(U, A, B, C, R) __extension__ ({ \
+  (__m128)__builtin_ia32_rangess128_round_mask((__v4sf)(__m128)(A), \
+                                               (__v4sf)(__m128)(B), \
+                                               (__v4sf)_mm_setzero_ps(), \
+                                               (__mmask8)(U), (int)(C),\
+                                               (int)(R)); })
+
+#define _mm_maskz_range_ss(U, A ,B , C) _mm_maskz_range_round_ss(U, A, B, C ,_MM_FROUND_CUR_DIRECTION)
+
+#define _mm_range_round_sd(A, B, C, R) __extension__ ({           \
+  (__m128d)__builtin_ia32_rangesd128_round_mask((__v2df)(__m128d)(A), \
+                                                (__v2df)(__m128d)(B), \
+                                                (__v2df)_mm_setzero_pd(), \
+                                                (__mmask8) -1, (int)(C),\
+                                                (int)(R)); })
+
+#define _mm_range_sd(A ,B , C) _mm_range_round_sd(A, B, C ,_MM_FROUND_CUR_DIRECTION)
+
+#define _mm_mask_range_round_sd(W, U, A, B, C, R) __extension__ ({ \
+  (__m128d)__builtin_ia32_rangesd128_round_mask((__v2df)(__m128d)(A), \
+                                                (__v2df)(__m128d)(B), \
+                                                (__v2df)(__m128d)(W),\
+                                                (__mmask8)(U), (int)(C),\
+                                                (int)(R)); })
+
+#define _mm_mask_range_sd(W, U, A, B, C) _mm_mask_range_round_sd(W, U, A, B, C ,_MM_FROUND_CUR_DIRECTION)
+
+#define _mm_maskz_range_round_sd(U, A, B, C, R) __extension__ ({ \
+  (__m128d)__builtin_ia32_rangesd128_round_mask((__v2df)(__m128d)(A), \
+                                                (__v2df)(__m128d)(B), \
+                                                (__v2df)_mm_setzero_pd(), \
+                                                (__mmask8)(U), (int)(C),\
+                                                (int)(R)); })
+
+#define _mm_maskz_range_sd(U, A, B, C) _mm_maskz_range_round_sd(U, A, B, C ,_MM_FROUND_CUR_DIRECTION)
+
+#define _mm512_reduce_pd(A, B) __extension__ ({             \
+  (__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \
+                                           (__v8df)_mm512_setzero_pd(), \
+                                           (__mmask8)-1, \
+                                           _MM_FROUND_CUR_DIRECTION); })
+
+#define _mm512_mask_reduce_pd(W, U, A, B) __extension__ ({ \
+  (__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \
+                                           (__v8df)(__m512d)(W), \
+                                           (__mmask8)(U), \
+                                           _MM_FROUND_CUR_DIRECTION); })
+
+#define _mm512_maskz_reduce_pd(U, A, B) __extension__ ({  \
+  (__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \
+                                           (__v8df)_mm512_setzero_pd(), \
+                                           (__mmask8)(U), \
+                                           _MM_FROUND_CUR_DIRECTION); })
+
+#define _mm512_reduce_ps(A, B) __extension__ ({              \
+  (__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \
+                                          (__v16sf)_mm512_setzero_ps(), \
+                                          (__mmask16)-1, \
+                                          _MM_FROUND_CUR_DIRECTION); })
+
+#define _mm512_mask_reduce_ps(W, U, A, B) __extension__ ({   \
+  (__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \
+                                          (__v16sf)(__m512)(W), \
+                                          (__mmask16)(U), \
+                                          _MM_FROUND_CUR_DIRECTION); })
+
+#define _mm512_maskz_reduce_ps(U, A, B) __extension__ ({       \
+  (__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \
+                                          (__v16sf)_mm512_setzero_ps(), \
+                                          (__mmask16)(U), \
+                                          _MM_FROUND_CUR_DIRECTION); })
+
+#define _mm512_reduce_round_pd(A, B, R) __extension__ ({\
+  (__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \
+                                           (__v8df)_mm512_setzero_pd(), \
+                                           (__mmask8)-1, (int)(R)); })
+
+#define _mm512_mask_reduce_round_pd(W, U, A, B, R) __extension__ ({\
+  (__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \
+                                           (__v8df)(__m512d)(W), \
+                                           (__mmask8)(U), (int)(R)); })
+
+#define _mm512_maskz_reduce_round_pd(U, A, B, R) __extension__ ({\
+  (__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \
+                                           (__v8df)_mm512_setzero_pd(), \
+                                           (__mmask8)(U), (int)(R)); })
+
+#define _mm512_reduce_round_ps(A, B, R) __extension__ ({\
+  (__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \
+                                          (__v16sf)_mm512_setzero_ps(), \
+                                          (__mmask16)-1, (int)(R)); })
+
+#define _mm512_mask_reduce_round_ps(W, U, A, B, R) __extension__ ({\
+  (__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \
+                                          (__v16sf)(__m512)(W), \
+                                          (__mmask16)(U), (int)(R)); })
+
+#define _mm512_maskz_reduce_round_ps(U, A, B, R) __extension__ ({\
+  (__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \
+                                          (__v16sf)_mm512_setzero_ps(), \
+                                          (__mmask16)(U), (int)(R)); })
+
+#define _mm_reduce_ss(A, B, C) __extension__ ({              \
+  (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \
+                                       (__v4sf)(__m128)(B), \
+                                       (__v4sf)_mm_setzero_ps(), (__mmask8)-1, \
+                                       (int)(C), _MM_FROUND_CUR_DIRECTION); })
+
+#define _mm_mask_reduce_ss(W, U, A, B, C) __extension__ ({   \
+  (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \
+                                       (__v4sf)(__m128)(B), \
+                                       (__v4sf)(__m128)(W), (__mmask8)(U), \
+                                       (int)(C), _MM_FROUND_CUR_DIRECTION); })
+
+#define _mm_maskz_reduce_ss(U, A, B, C) __extension__ ({       \
+  (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \
+                                       (__v4sf)(__m128)(B), \
+                                       (__v4sf)_mm_setzero_ps(), \
+                                       (__mmask8)(U), (int)(C), \
+                                       _MM_FROUND_CUR_DIRECTION); })
+
+#define _mm_reduce_round_ss(A, B, C, R) __extension__ ({              \
+  (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \
+                                       (__v4sf)(__m128)(B), \
+                                       (__v4sf)_mm_setzero_ps(), (__mmask8)-1, \
+                                       (int)(C), (int)(R)); })
+
+#define _mm_mask_reduce_round_ss(W, U, A, B, C, R) __extension__ ({   \
+  (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \
+                                       (__v4sf)(__m128)(B), \
+                                       (__v4sf)(__m128)(W), (__mmask8)(U), \
+                                       (int)(C), (int)(R)); })
+
+#define _mm_maskz_reduce_round_ss(U, A, B, C, R) __extension__ ({       \
+  (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \
+                                       (__v4sf)(__m128)(B), \
+                                       (__v4sf)_mm_setzero_ps(), \
+                                       (__mmask8)(U), (int)(C), (int)(R)); })
+
+#define _mm_reduce_sd(A, B, C) __extension__ ({              \
+  (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \
+                                        (__v2df)(__m128d)(B), \
+                                        (__v2df)_mm_setzero_pd(), \
+                                        (__mmask8)-1, (int)(C), \
+                                        _MM_FROUND_CUR_DIRECTION); })
+
+#define _mm_mask_reduce_sd(W, U, A, B, C) __extension__ ({   \
+  (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \
+                                        (__v2df)(__m128d)(B), \
+                                        (__v2df)(__m128d)(W), (__mmask8)(U), \
+                                        (int)(C), _MM_FROUND_CUR_DIRECTION); })
+
+#define _mm_maskz_reduce_sd(U, A, B, C) __extension__ ({       \
+  (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \
+                                        (__v2df)(__m128d)(B), \
+                                        (__v2df)_mm_setzero_pd(), \
+                                        (__mmask8)(U), (int)(C), \
+                                        _MM_FROUND_CUR_DIRECTION); })
+
+#define _mm_reduce_round_sd(A, B, C, R) __extension__ ({              \
+  (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \
+                                        (__v2df)(__m128d)(B), \
+                                        (__v2df)_mm_setzero_pd(), \
+                                        (__mmask8)-1, (int)(C), (int)(R)); })
+
+#define _mm_mask_reduce_round_sd(W, U, A, B, C, R) __extension__ ({   \
+  (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \
+                                        (__v2df)(__m128d)(B), \
+                                        (__v2df)(__m128d)(W), (__mmask8)(U), \
+                                        (int)(C), (int)(R)); })
+
+#define _mm_maskz_reduce_round_sd(U, A, B, C, R) __extension__ ({       \
+  (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \
+                                        (__v2df)(__m128d)(B), \
+                                        (__v2df)_mm_setzero_pd(), \
+                                        (__mmask8)(U), (int)(C), (int)(R)); })
+                     
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS
+_mm512_movepi32_mask (__m512i __A)
+{
+  return (__mmask16) __builtin_ia32_cvtd2mask512 ((__v16si) __A);
+}
 
-#define _mm512_mask_cvt_roundepu64_ps(__W, __U, __A, __R) __extension__ ({ \
-  (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A,                 \
-                (__v8sf) __W, (__mmask8) __U, __R);})
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_movm_epi32 (__mmask16 __A)
+{
+  return (__m512i) __builtin_ia32_cvtmask2d512 (__A);
+}
 
-#define _mm512_maskz_cvt_roundepu64_ps(__U, __A, __R) __extension__ ({ \
-  (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A,             \
-                (__v8sf) _mm256_setzero_ps(), (__mmask8) __U, __R);})
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_movm_epi64 (__mmask8 __A)
+{
+  return (__m512i) __builtin_ia32_cvtmask2q512 (__A);
+}
 
-#define _mm512_range_pd(__A, __B, __C) __extension__ ({                     \
-  (__m512d) __builtin_ia32_rangepd512_mask ((__v8df) __A, (__v8df) __B, __C,\
-               (__v8df) _mm512_setzero_pd(), (__mmask8) -1,                 \
-               _MM_FROUND_CUR_DIRECTION);})
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
+_mm512_movepi64_mask (__m512i __A)
+{
+  return (__mmask8) __builtin_ia32_cvtq2mask512 ((__v8di) __A);
+}
 
-#define _mm512_mask_range_pd(__W, __U, __A, __B, __C) __extension__ ({      \
-  (__m512d) __builtin_ia32_rangepd512_mask ((__v8df) __A, (__v8df) __B, __C,\
-               (__v8df) __W, (__mmask8) __U, _MM_FROUND_CUR_DIRECTION);})
 
-#define _mm512_maskz_range_pd(__U, __A, __B, __C) __extension__ ({           \
-  (__m512d) __builtin_ia32_rangepd512_mask ((__v8df) __A, (__v8df) __B, __C, \
-               (__v8df) _mm512_setzero_pd(), (__mmask8) __U,                 \
-               _MM_FROUND_CUR_DIRECTION);})
+static __inline__ __m512 __DEFAULT_FN_ATTRS
+_mm512_broadcast_f32x2 (__m128 __A)
+{
+  return (__m512) __builtin_ia32_broadcastf32x2_512_mask ((__v4sf) __A,
+                (__v16sf)_mm512_undefined_ps(),
+                (__mmask16) -1);
+}
 
-#define _mm512_range_round_pd(__A, __B, __C, __R) __extension__ ({           \
-  (__m512d) __builtin_ia32_rangepd512_mask ((__v8df) __A, (__v8df) __B, __C, \
-               (__v8df) _mm512_setzero_pd(), (__mmask8) -1, __R);})
+static __inline__ __m512 __DEFAULT_FN_ATTRS
+_mm512_mask_broadcast_f32x2 (__m512 __O, __mmask16 __M, __m128 __A)
+{
+  return (__m512) __builtin_ia32_broadcastf32x2_512_mask ((__v4sf) __A,
+                (__v16sf)
+                __O, __M);
+}
 
-#define _mm512_mask_range_round_pd(__W, __U, __A, __B, __C, __R) __extension__ ({ \
-  (__m512d) __builtin_ia32_rangepd512_mask ((__v8df) __A, (__v8df) __B, __C,      \
-               (__v8df) __W, (__mmask8) __U, __R);})
+static __inline__ __m512 __DEFAULT_FN_ATTRS
+_mm512_maskz_broadcast_f32x2 (__mmask16 __M, __m128 __A)
+{
+  return (__m512) __builtin_ia32_broadcastf32x2_512_mask ((__v4sf) __A,
+                (__v16sf)_mm512_setzero_ps (),
+                __M);
+}
 
-#define _mm512_maskz_range_round_pd(__U, __A, __B, __C, __R) __extension__ ({ \
-  (__m512d) __builtin_ia32_rangepd512_mask ((__v8df) __A, (__v8df) __B, __C,   \
-               (__v8df) _mm512_setzero_pd(), (__mmask8) __U, __R);})
+static __inline__ __m512 __DEFAULT_FN_ATTRS
+_mm512_broadcast_f32x8 (__m256 __A)
+{
+  return (__m512) __builtin_ia32_broadcastf32x8_512_mask ((__v8sf) __A,
+                _mm512_undefined_ps(),
+                (__mmask16) -1);
+}
 
-#define _mm512_range_ps(__A, __B, __C) __extension__ ({                       \
-  (__m512) __builtin_ia32_rangeps512_mask ((__v16sf) __A, (__v16sf) __B, __C, \
-               (__v16sf) _mm512_setzero_ps(), (__mmask16) -1,                 \
-               _MM_FROUND_CUR_DIRECTION);})
+static __inline__ __m512 __DEFAULT_FN_ATTRS
+_mm512_mask_broadcast_f32x8 (__m512 __O, __mmask16 __M, __m256 __A)
+{
+  return (__m512) __builtin_ia32_broadcastf32x8_512_mask ((__v8sf) __A,
+                (__v16sf)__O,
+                __M);
+}
 
-#define _mm512_mask_range_ps(__W, __U, __A, __B, __C) __extension__ ({         \
-  (__m512) __builtin_ia32_rangeps512_mask ((__v16sf) __A, (__v16sf) __B,       \
-               __C, (__v16sf) __W, (__mmask16) __U, _MM_FROUND_CUR_DIRECTION);})
+static __inline__ __m512 __DEFAULT_FN_ATTRS
+_mm512_maskz_broadcast_f32x8 (__mmask16 __M, __m256 __A)
+{
+  return (__m512) __builtin_ia32_broadcastf32x8_512_mask ((__v8sf) __A,
+                (__v16sf)_mm512_setzero_ps (),
+                __M);
+}
 
-#define _mm512_maskz_range_ps(__U, __A, __B, __C) __extension__ ({      \
-  (__m512) __builtin_ia32_rangeps512_mask ((__v16sf) __A,(__v16sf) __B, \
-              __C, (__v16sf) _mm512_setzero_ps(), (__mmask16) __U,      \
-              _MM_FROUND_CUR_DIRECTION);})
+static __inline__ __m512d __DEFAULT_FN_ATTRS
+_mm512_broadcast_f64x2 (__m128d __A)
+{
+  return (__m512d) __builtin_ia32_broadcastf64x2_512_mask ((__v2df) __A,
+                 (__v8df)_mm512_undefined_pd(),
+                 (__mmask8) -1);
+}
 
-#define _mm512_range_round_ps(__A, __B, __C, __R) __extension__ ({         \
-  (__m512) __builtin_ia32_rangeps512_mask ((__v16sf) __A, (__v16sf) __B,   \
-                __C, (__v16sf) _mm512_setzero_ps(), (__mmask16) -1, __R);})
+static __inline__ __m512d __DEFAULT_FN_ATTRS
+_mm512_mask_broadcast_f64x2 (__m512d __O, __mmask8 __M, __m128d __A)
+{
+  return (__m512d) __builtin_ia32_broadcastf64x2_512_mask ((__v2df) __A,
+                 (__v8df)
+                 __O, __M);
+}
 
-#define _mm512_mask_range_round_ps(__W, __U, __A, __B, __C, __R) __extension__ ({ \
-  (__m512) __builtin_ia32_rangeps512_mask ((__v16sf) __A, (__v16sf) __B,          \
-                __C, (__v16sf) __W, (__mmask16) __U, __R);})
+static __inline__ __m512d __DEFAULT_FN_ATTRS
+_mm512_maskz_broadcast_f64x2 (__mmask8 __M, __m128d __A)
+{
+  return (__m512d) __builtin_ia32_broadcastf64x2_512_mask ((__v2df) __A,
+                 (__v8df)_mm512_setzero_ps (),
+                 __M);
+}
 
-#define _mm512_maskz_range_round_ps(__U, __A, __B, __C, __R) __extension__ ({ \
-  (__m512) __builtin_ia32_rangeps512_mask ((__v16sf) __A, (__v16sf) __B,      \
-                __C, (__v16sf) _mm512_setzero_ps(), (__mmask16) __U, __R);})
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_broadcast_i32x2 (__m128i __A)
+{
+  return (__m512i) __builtin_ia32_broadcasti32x2_512_mask ((__v4si) __A,
+                 (__v16si)_mm512_setzero_si512(),
+                 (__mmask16) -1);
+}
 
-#define _mm512_reduce_pd(__A, __B) __extension__ ({             \
-  (__m512d) __builtin_ia32_reducepd512_mask ((__v8df) __A, __B, \
-                (__v8df) _mm512_setzero_pd(), (__mmask8) -1, _MM_FROUND_CUR_DIRECTION);})
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_broadcast_i32x2 (__m512i __O, __mmask16 __M, __m128i __A)
+{
+  return (__m512i) __builtin_ia32_broadcasti32x2_512_mask ((__v4si) __A,
+                 (__v16si)
+                 __O, __M);
+}
 
-#define _mm512_mask_reduce_pd(__W, __U, __A, __B) __extension__ ({ \
-  (__m512d) __builtin_ia32_reducepd512_mask ((__v8df) __A, __B,    \
-                (__v8df) __W,(__mmask8) __U, _MM_FROUND_CUR_DIRECTION);})
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_broadcast_i32x2 (__mmask16 __M, __m128i __A)
+{
+  return (__m512i) __builtin_ia32_broadcasti32x2_512_mask ((__v4si) __A,
+                 (__v16si)_mm512_setzero_si512 (),
+                 __M);
+}
 
-#define _mm512_maskz_reduce_pd(__U, __A, __B) __extension__ ({  \
-  (__m512d) __builtin_ia32_reducepd512_mask ((__v8df) __A, __B, \
-                (__v8df) _mm512_setzero_pd(), (__mmask8) __U, _MM_FROUND_CUR_DIRECTION);})
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_broadcast_i32x8 (__m256i __A)
+{
+  return (__m512i) __builtin_ia32_broadcasti32x8_512_mask ((__v8si) __A,
+                 (__v16si)_mm512_setzero_si512(),
+                 (__mmask16) -1);
+}
 
-#define _mm512_reduce_ps(__A, __B) __extension__ ({              \
-  (__m512) __builtin_ia32_reduceps512_mask ((__v16sf) __A, __B,  \
-               (__v16sf) _mm512_setzero_ps(), (__mmask16) -1, _MM_FROUND_CUR_DIRECTION);})
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_broadcast_i32x8 (__m512i __O, __mmask16 __M, __m256i __A)
+{
+  return (__m512i) __builtin_ia32_broadcasti32x8_512_mask ((__v8si) __A,
+                 (__v16si)__O,
+                 __M);
+}
 
-#define _mm512_mask_reduce_ps(__W, __U, __A, __B) __extension__ ({   \
-  (__m512) __builtin_ia32_reduceps512_mask ((__v16sf) __A, __B,      \
-               (__v16sf) __W, (__mmask16) __U, _MM_FROUND_CUR_DIRECTION);})
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_broadcast_i32x8 (__mmask16 __M, __m256i __A)
+{
+  return (__m512i) __builtin_ia32_broadcasti32x8_512_mask ((__v8si) __A,
+                 (__v16si)
+                 _mm512_setzero_si512 (),
+                 __M);
+}
 
-#define _mm512_maskz_reduce_ps(__U, __A, __B) __extension__ ({       \
-  (__m512) __builtin_ia32_reduceps512_mask ((__v16sf) __A, __B,      \
-               (__v16sf) _mm512_setzero_ps(), (__mmask16) __U, _MM_FROUND_CUR_DIRECTION);})
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_broadcast_i64x2 (__m128i __A)
+{
+  return (__m512i) __builtin_ia32_broadcasti64x2_512_mask ((__v2di) __A,
+                 (__v8di)_mm512_setzero_si512(),
+                 (__mmask8) -1);
+}
 
-#define _mm512_reduce_round_pd(__A, __B, __R) __extension__ ({\
-  (__m512d) __builtin_ia32_reducepd512_mask ((__v8df) __A, __B, \
-                (__v8df) _mm512_setzero_pd(), (__mmask8) -1, __R);})
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_broadcast_i64x2 (__m512i __O, __mmask8 __M, __m128i __A)
+{
+  return (__m512i) __builtin_ia32_broadcasti64x2_512_mask ((__v2di) __A,
+                 (__v8di)
+                 __O, __M);
+}
 
-#define _mm512_mask_reduce_round_pd(__W, __U, __A, __B, __R) __extension__ ({\
-  (__m512d) __builtin_ia32_reducepd512_mask ((__v8df) __A, __B,    \
-                (__v8df) __W,(__mmask8) __U, __R);})
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_broadcast_i64x2 (__mmask8 __M, __m128i __A)
+{
+  return (__m512i) __builtin_ia32_broadcasti64x2_512_mask ((__v2di) __A,
+                 (__v8di)_mm512_setzero_si512 (),
+                 __M);
+}
 
-#define _mm512_maskz_reduce_round_pd(__U, __A, __B, __R) __extension__ ({\
-  (__m512d) __builtin_ia32_reducepd512_mask ((__v8df) __A, __B, \
-                (__v8df) _mm512_setzero_pd(), (__mmask8) __U, __R);})
-
-#define _mm512_reduce_round_ps(__A, __B, __R) __extension__ ({\
-  (__m512) __builtin_ia32_reduceps512_mask ((__v16sf) __A, __B,  \
-               (__v16sf) _mm512_setzero_ps(), (__mmask16) -1, __R);})
-
-#define _mm512_mask_reduce_round_ps(__W, __U, __A, __B, __R) __extension__ ({\
-  (__m512) __builtin_ia32_reduceps512_mask ((__v16sf) __A, __B,      \
-               (__v16sf) __W, (__mmask16) __U, __R);})
-
-#define _mm512_maskz_reduce_round_ps(__U, __A, __B, __R) __extension__ ({\
-  (__m512) __builtin_ia32_reduceps512_mask ((__v16sf) __A, __B,      \
-               (__v16sf) _mm512_setzero_ps(), (__mmask16) __U, __R);})
+#define _mm512_extractf32x8_ps(A, imm) __extension__ ({ \
+  (__m256)__builtin_ia32_extractf32x8_mask((__v16sf)(__m512)(A), (int)(imm), \
+                                           (__v8sf)_mm256_setzero_ps(), \
+                                           (__mmask8)-1); })
+
+#define _mm512_mask_extractf32x8_ps(W, U, A, imm) __extension__ ({ \
+  (__m256)__builtin_ia32_extractf32x8_mask((__v16sf)(__m512)(A), (int)(imm), \
+                                           (__v8sf)(__m256)(W), \
+                                           (__mmask8)(U)); })
+
+#define _mm512_maskz_extractf32x8_ps(U, A, imm) __extension__ ({ \
+  (__m256)__builtin_ia32_extractf32x8_mask((__v16sf)(__m512)(A), (int)(imm), \
+                                           (__v8sf)_mm256_setzero_ps(), \
+                                           (__mmask8)(U)); })
+
+#define _mm512_extractf64x2_pd(A, imm) __extension__ ({ \
+  (__m128d)__builtin_ia32_extractf64x2_512_mask((__v8df)(__m512d)(A), \
+                                                (int)(imm), \
+                                                (__v2df)_mm_setzero_pd(), \
+                                                (__mmask8)-1); })
+
+#define _mm512_mask_extractf64x2_pd(W, U, A, imm) __extension__ ({ \
+  (__m128d)__builtin_ia32_extractf64x2_512_mask((__v8df)(__m512d)(A), \
+                                                (int)(imm), \
+                                                (__v2df)(__m128d)(W), \
+                                                (__mmask8)(U)); })
+
+#define _mm512_maskz_extractf64x2_pd(U, A, imm) __extension__ ({ \
+  (__m128d)__builtin_ia32_extractf64x2_512_mask((__v8df)(__m512d)(A), \
+                                                (int)(imm), \
+                                                (__v2df)_mm_setzero_pd(), \
+                                                (__mmask8)(U)); })
+
+#define _mm512_extracti32x8_epi32(A, imm) __extension__ ({ \
+  (__m256i)__builtin_ia32_extracti32x8_mask((__v16si)(__m512i)(A), (int)(imm), \
+                                            (__v8si)_mm256_setzero_si256(), \
+                                            (__mmask8)-1); })
+
+#define _mm512_mask_extracti32x8_epi32(W, U, A, imm) __extension__ ({ \
+  (__m256i)__builtin_ia32_extracti32x8_mask((__v16si)(__m512i)(A), (int)(imm), \
+                                            (__v8si)(__m256i)(W), \
+                                            (__mmask8)(U)); })
+
+#define _mm512_maskz_extracti32x8_epi32(U, A, imm) __extension__ ({ \
+  (__m256i)__builtin_ia32_extracti32x8_mask((__v16si)(__m512i)(A), (int)(imm), \
+                                            (__v8si)_mm256_setzero_si256(), \
+                                            (__mmask8)(U)); })
+
+#define _mm512_extracti64x2_epi64(A, imm) __extension__ ({ \
+  (__m128i)__builtin_ia32_extracti64x2_512_mask((__v8di)(__m512i)(A), \
+                                                (int)(imm), \
+                                                (__v2di)_mm_setzero_di(), \
+                                                (__mmask8)-1); })
+
+#define _mm512_mask_extracti64x2_epi64(W, U, A, imm) __extension__ ({ \
+  (__m128i)__builtin_ia32_extracti64x2_512_mask((__v8di)(__m512i)(A), \
+                                                (int)(imm), \
+                                                (__v2di)(__m128i)(W), \
+                                                (__mmask8)(U)); })
+
+#define _mm512_maskz_extracti64x2_epi64(U, A, imm) __extension__ ({ \
+  (__m128i)__builtin_ia32_extracti64x2_512_mask((__v8di)(__m512i)(A), \
+                                                (int)(imm), \
+                                                (__v2di)_mm_setzero_di(), \
+                                                (__mmask8)(U)); })
+
+#define _mm512_insertf32x8(A, B, imm) __extension__ ({ \
+  (__m512)__builtin_ia32_insertf32x8_mask((__v16sf)(__m512)(A), \
+                                          (__v8sf)(__m256)(B), (int)(imm), \
+                                          (__v16sf)_mm512_setzero_ps(), \
+                                          (__mmask16)-1); })
+
+#define _mm512_mask_insertf32x8(W, U, A, B, imm) __extension__ ({ \
+  (__m512)__builtin_ia32_insertf32x8_mask((__v16sf)(__m512)(A), \
+                                          (__v8sf)(__m256)(B), (int)(imm), \
+                                          (__v16sf)(__m512)(W), \
+                                          (__mmask16)(U)); })
+
+#define _mm512_maskz_insertf32x8(U, A, B, imm) __extension__ ({ \
+  (__m512)__builtin_ia32_insertf32x8_mask((__v16sf)(__m512)(A), \
+                                          (__v8sf)(__m256)(B), (int)(imm), \
+                                          (__v16sf)_mm512_setzero_ps(), \
+                                          (__mmask16)(U)); })
+
+#define _mm512_insertf64x2(A, B, imm) __extension__ ({ \
+  (__m512d)__builtin_ia32_insertf64x2_512_mask((__v8df)(__m512d)(A), \
+                                               (__v2df)(__m128d)(B), \
+                                               (int)(imm), \
+                                               (__v8df)_mm512_setzero_pd(), \
+                                               (__mmask8)-1); })
+
+#define _mm512_mask_insertf64x2(W, U, A, B, imm) __extension__ ({ \
+  (__m512d)__builtin_ia32_insertf64x2_512_mask((__v8df)(__m512d)(A), \
+                                               (__v2df)(__m128d)(B), \
+                                               (int)(imm), \
+                                               (__v8df)(__m512d)(W), \
+                                               (__mmask8)(U)); })
+
+#define _mm512_maskz_insertf64x2(U, A, B, imm) __extension__ ({ \
+  (__m512d)__builtin_ia32_insertf64x2_512_mask((__v8df)(__m512d)(A), \
+                                               (__v2df)(__m128d)(B), \
+                                               (int)(imm), \
+                                               (__v8df)_mm512_setzero_pd(), \
+                                               (__mmask8)(U)); })
+
+#define _mm512_inserti32x8(A, B, imm) __extension__ ({ \
+  (__m512i)__builtin_ia32_inserti32x8_mask((__v16si)(__m512i)(A), \
+                                           (__v8si)(__m256i)(B), (int)(imm), \
+                                           (__v16si)_mm512_setzero_si512(), \
+                                           (__mmask16)-1); })
+
+#define _mm512_mask_inserti32x8(W, U, A, B, imm) __extension__ ({ \
+  (__m512i)__builtin_ia32_inserti32x8_mask((__v16si)(__m512i)(A), \
+                                           (__v8si)(__m256i)(B), (int)(imm), \
+                                           (__v16si)(__m512i)(W), \
+                                           (__mmask16)(U)); })
+
+#define _mm512_maskz_inserti32x8(U, A, B, imm) __extension__ ({ \
+  (__m512i)__builtin_ia32_inserti32x8_mask((__v16si)(__m512i)(A), \
+                                           (__v8si)(__m256i)(B), (int)(imm), \
+                                           (__v16si)_mm512_setzero_si512(), \
+                                           (__mmask16)(U)); })
+
+#define _mm512_inserti64x2(A, B, imm) __extension__ ({ \
+  (__m512i)__builtin_ia32_inserti64x2_512_mask((__v8di)(__m512i)(A), \
+                                               (__v2di)(__m128i)(B), \
+                                               (int)(imm), \
+                                               (__v8di)_mm512_setzero_si512(), \
+                                               (__mmask8)-1); })
+
+#define _mm512_mask_inserti64x2(W, U, A, B, imm) __extension__ ({ \
+  (__m512i)__builtin_ia32_inserti64x2_512_mask((__v8di)(__m512i)(A), \
+                                               (__v2di)(__m128i)(B), \
+                                               (int)(imm), \
+                                               (__v8di)(__m512i)(W), \
+                                               (__mmask8)(U)); })
+
+#define _mm512_maskz_inserti64x2(U, A, B, imm) __extension__ ({ \
+  (__m512i)__builtin_ia32_inserti64x2_512_mask((__v8di)(__m512i)(A), \
+                                               (__v2di)(__m128i)(B), \
+                                               (int)(imm), \
+                                               (__v8di)_mm512_setzero_si512(), \
+                                               (__mmask8)(U)); })
+
+#define _mm512_mask_fpclass_ps_mask(U, A, imm) __extension__ ({ \
+  (__mmask16)__builtin_ia32_fpclassps512_mask((__v16sf)(__m512)(A), \
+                                              (int)(imm), (__mmask16)(U)); })
+
+#define _mm512_fpclass_ps_mask(A, imm) __extension__ ({ \
+  (__mmask16)__builtin_ia32_fpclassps512_mask((__v16sf)(__m512)(A), \
+                                              (int)(imm), (__mmask16)-1); })
+
+#define _mm512_mask_fpclass_pd_mask(U, A, imm) __extension__ ({ \
+  (__mmask8)__builtin_ia32_fpclasspd512_mask((__v8df)(__m512d)(A), (int)(imm), \
+                                             (__mmask8)(U)); })
+
+#define _mm512_fpclass_pd_mask(A, imm) __extension__ ({ \
+  (__mmask8)__builtin_ia32_fpclasspd512_mask((__v8df)(__m512d)(A), (int)(imm), \
+                                             (__mmask8)-1); })
+
+#define _mm_fpclass_sd_mask(A, imm) __extension__ ({ \
+  (__mmask8)__builtin_ia32_fpclasssd_mask((__v2df)(__m128d)(A), (int)(imm), \
+                                          (__mmask8)-1); })
+
+#define _mm_mask_fpclass_sd_mask(U, A, imm) __extension__ ({ \
+  (__mmask8)__builtin_ia32_fpclasssd_mask((__v2df)(__m128d)(A), (int)(imm), \
+                                          (__mmask8)(U)); })
+
+#define _mm_fpclass_ss_mask(A, imm) __extension__ ({ \
+  (__mmask8)__builtin_ia32_fpclassss_mask((__v4sf)(__m128)(A), (int)(imm), \
+                                          (__mmask8)-1); })
+
+#define _mm_mask_fpclass_ss_mask(U, A, imm) __extension__ ({ \
+  (__mmask8)__builtin_ia32_fpclassss_mask((__v4sf)(__m128)(A), (int)(imm), \
+                                          (__mmask8)(U)); })
 
 #undef __DEFAULT_FN_ATTRS
 
diff --git a/contrib/llvm/tools/clang/lib/Headers/avx512erintrin.h b/contrib/llvm/tools/clang/lib/Headers/avx512erintrin.h
index 40a912189e5d..8ff212c42211 100644
--- a/contrib/llvm/tools/clang/lib/Headers/avx512erintrin.h
+++ b/contrib/llvm/tools/clang/lib/Headers/avx512erintrin.h
@@ -31,66 +31,66 @@
 #define _mm512_exp2a23_round_pd(A, R) __extension__ ({ \
   (__m512d)__builtin_ia32_exp2pd_mask((__v8df)(__m512d)(A), \
                                       (__v8df)_mm512_setzero_pd(), \
-                                      (__mmask8)-1, (R)); })
+                                      (__mmask8)-1, (int)(R)); })
 
 #define _mm512_mask_exp2a23_round_pd(S, M, A, R) __extension__ ({ \
   (__m512d)__builtin_ia32_exp2pd_mask((__v8df)(__m512d)(A), \
-                                      (__v8df)(__m512d)(S), \
-                                      (__mmask8)(M), (R)); })
+                                      (__v8df)(__m512d)(S), (__mmask8)(M), \
+                                      (int)(R)); })
 
 #define _mm512_maskz_exp2a23_round_pd(M, A, R) __extension__ ({ \
   (__m512d)__builtin_ia32_exp2pd_mask((__v8df)(__m512d)(A), \
                                       (__v8df)_mm512_setzero_pd(), \
-                                      (__mmask8)(M), (R)); })
+                                      (__mmask8)(M), (int)(R)); })
 
 #define _mm512_exp2a23_pd(A) \
-   _mm512_exp2a23_round_pd((A), _MM_FROUND_CUR_DIRECTION)
+  _mm512_exp2a23_round_pd((A), _MM_FROUND_CUR_DIRECTION)
 
 #define _mm512_mask_exp2a23_pd(S, M, A) \
-   _mm512_mask_exp2a23_round_pd((S), (M), (A), _MM_FROUND_CUR_DIRECTION)
+  _mm512_mask_exp2a23_round_pd((S), (M), (A), _MM_FROUND_CUR_DIRECTION)
 
 #define _mm512_maskz_exp2a23_pd(M, A) \
-   _mm512_maskz_exp2a23_round_pd((M), (A), _MM_FROUND_CUR_DIRECTION)
+  _mm512_maskz_exp2a23_round_pd((M), (A), _MM_FROUND_CUR_DIRECTION)
 
 #define _mm512_exp2a23_round_ps(A, R) __extension__ ({ \
   (__m512)__builtin_ia32_exp2ps_mask((__v16sf)(__m512)(A), \
                                      (__v16sf)_mm512_setzero_ps(), \
-                                     (__mmask8)-1, (R)); })
+                                     (__mmask16)-1, (int)(R)); })
 
 #define _mm512_mask_exp2a23_round_ps(S, M, A, R) __extension__ ({ \
   (__m512)__builtin_ia32_exp2ps_mask((__v16sf)(__m512)(A), \
-                                     (__v16sf)(__m512)(S), \
-                                     (__mmask8)(M), (R)); })
+                                     (__v16sf)(__m512)(S), (__mmask16)(M), \
+                                     (int)(R)); })
 
 #define _mm512_maskz_exp2a23_round_ps(M, A, R) __extension__ ({ \
   (__m512)__builtin_ia32_exp2ps_mask((__v16sf)(__m512)(A), \
                                      (__v16sf)_mm512_setzero_ps(), \
-                                     (__mmask8)(M), (R)); })
+                                     (__mmask16)(M), (int)(R)); })
 
 #define _mm512_exp2a23_ps(A) \
-   _mm512_exp2a23_round_ps((A), _MM_FROUND_CUR_DIRECTION)
+  _mm512_exp2a23_round_ps((A), _MM_FROUND_CUR_DIRECTION)
 
 #define _mm512_mask_exp2a23_ps(S, M, A) \
-   _mm512_mask_exp2a23_round_ps((S), (M), (A), _MM_FROUND_CUR_DIRECTION)
+  _mm512_mask_exp2a23_round_ps((S), (M), (A), _MM_FROUND_CUR_DIRECTION)
 
 #define _mm512_maskz_exp2a23_ps(M, A) \
-   _mm512_maskz_exp2a23_round_ps((M), (A), _MM_FROUND_CUR_DIRECTION)
+  _mm512_maskz_exp2a23_round_ps((M), (A), _MM_FROUND_CUR_DIRECTION)
 
 // rsqrt28
 #define _mm512_rsqrt28_round_pd(A, R) __extension__ ({ \
   (__m512d)__builtin_ia32_rsqrt28pd_mask((__v8df)(__m512d)(A), \
                                          (__v8df)_mm512_setzero_pd(), \
-                                         (__mmask8)-1, (R)); })
+                                         (__mmask8)-1, (int)(R)); })
 
 #define _mm512_mask_rsqrt28_round_pd(S, M, A, R) __extension__ ({ \
   (__m512d)__builtin_ia32_rsqrt28pd_mask((__v8df)(__m512d)(A), \
-                                         (__v8df)(__m512d)(S), \
-                                         (__mmask8)(M), (R)); })
+                                         (__v8df)(__m512d)(S), (__mmask8)(M), \
+                                         (int)(R)); })
 
 #define _mm512_maskz_rsqrt28_round_pd(M, A, R) __extension__ ({ \
   (__m512d)__builtin_ia32_rsqrt28pd_mask((__v8df)(__m512d)(A), \
                                          (__v8df)_mm512_setzero_pd(), \
-                                         (__mmask8)(M), (R)); })
+                                         (__mmask8)(M), (int)(R)); })
 
 #define _mm512_rsqrt28_pd(A) \
   _mm512_rsqrt28_round_pd((A), _MM_FROUND_CUR_DIRECTION)
@@ -104,17 +104,17 @@
 #define _mm512_rsqrt28_round_ps(A, R) __extension__ ({ \
   (__m512)__builtin_ia32_rsqrt28ps_mask((__v16sf)(__m512)(A), \
                                         (__v16sf)_mm512_setzero_ps(), \
-                                        (__mmask16)-1, (R)); })
+                                        (__mmask16)-1, (int)(R)); })
 
 #define _mm512_mask_rsqrt28_round_ps(S, M, A, R) __extension__ ({ \
   (__m512)__builtin_ia32_rsqrt28ps_mask((__v16sf)(__m512)(A), \
-                                        (__v16sf)(__m512)(S), \
-                                        (__mmask16)(M), (R)); })
+                                        (__v16sf)(__m512)(S), (__mmask16)(M), \
+                                        (int)(R)); })
 
 #define _mm512_maskz_rsqrt28_round_ps(M, A, R) __extension__ ({ \
   (__m512)__builtin_ia32_rsqrt28ps_mask((__v16sf)(__m512)(A), \
                                         (__v16sf)_mm512_setzero_ps(), \
-                                        (__mmask16)(M), (R)); })
+                                        (__mmask16)(M), (int)(R)); })
 
 #define _mm512_rsqrt28_ps(A) \
   _mm512_rsqrt28_round_ps((A), _MM_FROUND_CUR_DIRECTION)
@@ -126,22 +126,22 @@
   _mm512_maskz_rsqrt28_round_ps((M), (A), _MM_FROUND_CUR_DIRECTION)
 
 #define _mm_rsqrt28_round_ss(A, B, R) __extension__ ({ \
-  (__m128)__builtin_ia32_rsqrt28ss_round((__v4sf)(__m128)(A), \
-                                        (__v4sf)(__m128)(B), \
-                                        (__v4sf)_mm_setzero_ps(), \
-                                        (__mmask8)-1, (R)); })
+  (__m128)__builtin_ia32_rsqrt28ss_round_mask((__v4sf)(__m128)(A), \
+                                              (__v4sf)(__m128)(B), \
+                                              (__v4sf)_mm_setzero_ps(), \
+                                              (__mmask8)-1, (int)(R)); })
 
 #define _mm_mask_rsqrt28_round_ss(S, M, A, B, R) __extension__ ({ \
-  (__m128)__builtin_ia32_rsqrt28ss_round((__v4sf)(__m128)(A), \
-                                        (__v4sf)(__m128)(B), \
-                                        (__v4sf)(__m128)(S), \
-                                        (__mmask8)(M), (R)); })
+  (__m128)__builtin_ia32_rsqrt28ss_round_mask((__v4sf)(__m128)(A), \
+                                              (__v4sf)(__m128)(B), \
+                                              (__v4sf)(__m128)(S), \
+                                              (__mmask8)(M), (int)(R)); })
 
 #define _mm_maskz_rsqrt28_round_ss(M, A, B, R) __extension__ ({ \
-  (__m128)__builtin_ia32_rsqrt28ss_round((__v4sf)(__m128)(A), \
-                                        (__v4sf)(__m128)(B), \
-                                        (__v4sf)_mm_setzero_ps(), \
-                                        (__mmask8)(M), (R)); })
+  (__m128)__builtin_ia32_rsqrt28ss_round_mask((__v4sf)(__m128)(A), \
+                                              (__v4sf)(__m128)(B), \
+                                              (__v4sf)_mm_setzero_ps(), \
+                                              (__mmask8)(M), (int)(R)); })
 
 #define _mm_rsqrt28_ss(A, B) \
   _mm_rsqrt28_round_ss((A), (B), _MM_FROUND_CUR_DIRECTION)
@@ -153,22 +153,22 @@
   _mm_maskz_rsqrt28_round_ss((M), (A), (B), _MM_FROUND_CUR_DIRECTION)
 
 #define _mm_rsqrt28_round_sd(A, B, R) __extension__ ({ \
-  (__m128d)__builtin_ia32_rsqrt28sd_round((__v2df)(__m128d)(A), \
-                                         (__v2df)(__m128d)(B), \
-                                         (__v2df)_mm_setzero_pd(), \
-                                         (__mmask8)-1, (R)); })
+  (__m128d)__builtin_ia32_rsqrt28sd_round_mask((__v2df)(__m128d)(A), \
+                                               (__v2df)(__m128d)(B), \
+                                               (__v2df)_mm_setzero_pd(), \
+                                               (__mmask8)-1, (int)(R)); })
 
 #define _mm_mask_rsqrt28_round_sd(S, M, A, B, R) __extension__ ({ \
-  (__m128d)__builtin_ia32_rsqrt28sd_round((__v2df)(__m128d)(A), \
-                                         (__v2df)(__m128d)(B), \
-                                         (__v2df)(__m128d)(S), \
-                                         (__mmask8)(M), (R)); })
+  (__m128d)__builtin_ia32_rsqrt28sd_round_mask((__v2df)(__m128d)(A), \
+                                               (__v2df)(__m128d)(B), \
+                                               (__v2df)(__m128d)(S), \
+                                               (__mmask8)(M), (int)(R)); })
 
 #define _mm_maskz_rsqrt28_round_sd(M, A, B, R) __extension__ ({ \
-  (__m128d)__builtin_ia32_rsqrt28sd_round((__v2df)(__m128d)(A), \
-                                         (__v2df)(__m128d)(B), \
-                                         (__v2df)_mm_setzero_pd(), \
-                                         (__mmask8)(M), (R)); })
+  (__m128d)__builtin_ia32_rsqrt28sd_round_mask((__v2df)(__m128d)(A), \
+                                               (__v2df)(__m128d)(B), \
+                                               (__v2df)_mm_setzero_pd(), \
+                                               (__mmask8)(M), (int)(R)); })
 
 #define _mm_rsqrt28_sd(A, B) \
   _mm_rsqrt28_round_sd((A), (B), _MM_FROUND_CUR_DIRECTION)
@@ -177,23 +177,23 @@
   _mm_mask_rsqrt28_round_sd((S), (M), (A), (B), _MM_FROUND_CUR_DIRECTION)
 
 #define _mm_maskz_rsqrt28_sd(M, A, B) \
-  _mm_mask_rsqrt28_round_sd((M), (A), (B), _MM_FROUND_CUR_DIRECTION)
+  _mm_maskz_rsqrt28_round_sd((M), (A), (B), _MM_FROUND_CUR_DIRECTION)
 
 // rcp28
 #define _mm512_rcp28_round_pd(A, R) __extension__ ({ \
   (__m512d)__builtin_ia32_rcp28pd_mask((__v8df)(__m512d)(A), \
                                        (__v8df)_mm512_setzero_pd(), \
-                                       (__mmask8)-1, (R)); })
+                                       (__mmask8)-1, (int)(R)); })
 
 #define _mm512_mask_rcp28_round_pd(S, M, A, R) __extension__ ({ \
   (__m512d)__builtin_ia32_rcp28pd_mask((__v8df)(__m512d)(A), \
-                                       (__v8df)(__m512d)(S), \
-                                       (__mmask8)(M), (R)); })
+                                       (__v8df)(__m512d)(S), (__mmask8)(M), \
+                                       (int)(R)); })
 
 #define _mm512_maskz_rcp28_round_pd(M, A, R) __extension__ ({ \
   (__m512d)__builtin_ia32_rcp28pd_mask((__v8df)(__m512d)(A), \
                                        (__v8df)_mm512_setzero_pd(), \
-                                       (__mmask8)(M), (R)); })
+                                       (__mmask8)(M), (int)(R)); })
 
 #define _mm512_rcp28_pd(A) \
   _mm512_rcp28_round_pd((A), _MM_FROUND_CUR_DIRECTION)
@@ -207,17 +207,17 @@
 #define _mm512_rcp28_round_ps(A, R) __extension__ ({ \
   (__m512)__builtin_ia32_rcp28ps_mask((__v16sf)(__m512)(A), \
                                       (__v16sf)_mm512_setzero_ps(), \
-                                      (__mmask16)-1, (R)); })
+                                      (__mmask16)-1, (int)(R)); })
 
 #define _mm512_mask_rcp28_round_ps(S, M, A, R) __extension__ ({ \
   (__m512)__builtin_ia32_rcp28ps_mask((__v16sf)(__m512)(A), \
-                                      (__v16sf)(__m512)(S), \
-                                      (__mmask16)(M), (R)); })
+                                      (__v16sf)(__m512)(S), (__mmask16)(M), \
+                                      (int)(R)); })
 
 #define _mm512_maskz_rcp28_round_ps(M, A, R) __extension__ ({ \
   (__m512)__builtin_ia32_rcp28ps_mask((__v16sf)(__m512)(A), \
                                       (__v16sf)_mm512_setzero_ps(), \
-                                      (__mmask16)(M), (R)); })
+                                      (__mmask16)(M), (int)(R)); })
 
 #define _mm512_rcp28_ps(A) \
   _mm512_rcp28_round_ps((A), _MM_FROUND_CUR_DIRECTION)
@@ -229,22 +229,22 @@
   _mm512_maskz_rcp28_round_ps((M), (A), _MM_FROUND_CUR_DIRECTION)
 
 #define _mm_rcp28_round_ss(A, B, R) __extension__ ({ \
-  (__m128)__builtin_ia32_rcp28ss_round((__v4sf)(__m128)(A), \
-                                      (__v4sf)(__m128)(B), \
-                                      (__v4sf)_mm_setzero_ps(), \
-                                      (__mmask8)-1, (R)); })
+  (__m128)__builtin_ia32_rcp28ss_round_mask((__v4sf)(__m128)(A), \
+                                            (__v4sf)(__m128)(B), \
+                                            (__v4sf)_mm_setzero_ps(), \
+                                            (__mmask8)-1, (int)(R)); })
 
 #define _mm_mask_rcp28_round_ss(S, M, A, B, R) __extension__ ({ \
-  (__m128)__builtin_ia32_rcp28ss_round((__v4sf)(__m128)(A), \
-                                      (__v4sf)(__m128)(B), \
-                                      (__v4sf)(__m128)(S), \
-                                      (__mmask8)(M), (R)); })
+  (__m128)__builtin_ia32_rcp28ss_round_mask((__v4sf)(__m128)(A), \
+                                            (__v4sf)(__m128)(B), \
+                                            (__v4sf)(__m128)(S), \
+                                            (__mmask8)(M), (int)(R)); })
 
 #define _mm_maskz_rcp28_round_ss(M, A, B, R) __extension__ ({ \
-  (__m128)__builtin_ia32_rcp28ss_round((__v4sf)(__m128)(A), \
-                                      (__v4sf)(__m128)(B), \
-                                      (__v4sf)_mm_setzero_ps(), \
-                                      (__mmask8)(M), (R)); })
+  (__m128)__builtin_ia32_rcp28ss_round_mask((__v4sf)(__m128)(A), \
+                                            (__v4sf)(__m128)(B), \
+                                            (__v4sf)_mm_setzero_ps(), \
+                                            (__mmask8)(M), (int)(R)); })
 
 #define _mm_rcp28_ss(A, B) \
   _mm_rcp28_round_ss((A), (B), _MM_FROUND_CUR_DIRECTION)
@@ -256,22 +256,22 @@
   _mm_maskz_rcp28_round_ss((M), (A), (B), _MM_FROUND_CUR_DIRECTION)
 
 #define _mm_rcp28_round_sd(A, B, R) __extension__ ({ \
-  (__m128d)__builtin_ia32_rcp28sd_round((__v2df)(__m128d)(A), \
-                                       (__v2df)(__m128d)(B), \
-                                       (__v2df)_mm_setzero_pd(), \
-                                       (__mmask8)-1, (R)); })
+  (__m128d)__builtin_ia32_rcp28sd_round_mask((__v2df)(__m128d)(A), \
+                                             (__v2df)(__m128d)(B), \
+                                             (__v2df)_mm_setzero_pd(), \
+                                             (__mmask8)-1, (int)(R)); })
 
 #define _mm_mask_rcp28_round_sd(S, M, A, B, R) __extension__ ({ \
-  (__m128d)__builtin_ia32_rcp28sd_round((__v2df)(__m128d)(A), \
-                                       (__v2df)(__m128d)(B), \
-                                       (__v2df)(__m128d)(S), \
-                                       (__mmask8)(M), (R)); })
+  (__m128d)__builtin_ia32_rcp28sd_round_mask((__v2df)(__m128d)(A), \
+                                             (__v2df)(__m128d)(B), \
+                                             (__v2df)(__m128d)(S), \
+                                             (__mmask8)(M), (int)(R)); })
 
 #define _mm_maskz_rcp28_round_sd(M, A, B, R) __extension__ ({ \
-  (__m128d)__builtin_ia32_rcp28sd_round((__v2df)(__m128d)(A), \
-                                       (__v2df)(__m128d)(B), \
-                                       (__v2df)_mm_setzero_pd(), \
-                                       (__mmask8)(M), (R)); })
+  (__m128d)__builtin_ia32_rcp28sd_round_mask((__v2df)(__m128d)(A), \
+                                             (__v2df)(__m128d)(B), \
+                                             (__v2df)_mm_setzero_pd(), \
+                                             (__mmask8)(M), (int)(R)); })
 
 #define _mm_rcp28_sd(A, B) \
   _mm_rcp28_round_sd((A), (B), _MM_FROUND_CUR_DIRECTION)
diff --git a/contrib/llvm/tools/clang/lib/Headers/avx512fintrin.h b/contrib/llvm/tools/clang/lib/Headers/avx512fintrin.h
index 8dcdc710d5c3..0bf6582345d4 100644
--- a/contrib/llvm/tools/clang/lib/Headers/avx512fintrin.h
+++ b/contrib/llvm/tools/clang/lib/Headers/avx512fintrin.h
@@ -27,11 +27,19 @@
 #ifndef __AVX512FINTRIN_H
 #define __AVX512FINTRIN_H
 
+typedef char __v64qi __attribute__((__vector_size__(64)));
+typedef short __v32hi __attribute__((__vector_size__(64)));
 typedef double __v8df __attribute__((__vector_size__(64)));
 typedef float __v16sf __attribute__((__vector_size__(64)));
 typedef long long __v8di __attribute__((__vector_size__(64)));
 typedef int __v16si __attribute__((__vector_size__(64)));
 
+/* Unsigned types */
+typedef unsigned char __v64qu __attribute__((__vector_size__(64)));
+typedef unsigned short __v32hu __attribute__((__vector_size__(64)));
+typedef unsigned long long __v8du __attribute__((__vector_size__(64)));
+typedef unsigned int __v16su __attribute__((__vector_size__(64)));
+
 typedef float __m512 __attribute__((__vector_size__(64)));
 typedef double __m512d __attribute__((__vector_size__(64)));
 typedef long long __m512i __attribute__((__vector_size__(64)));
@@ -46,6 +54,111 @@ typedef unsigned short __mmask16;
 #define _MM_FROUND_TO_ZERO          0x03
 #define _MM_FROUND_CUR_DIRECTION    0x04
 
+typedef enum
+{
+  _MM_PERM_AAAA = 0x00, _MM_PERM_AAAB = 0x01, _MM_PERM_AAAC = 0x02,
+  _MM_PERM_AAAD = 0x03, _MM_PERM_AABA = 0x04, _MM_PERM_AABB = 0x05,
+  _MM_PERM_AABC = 0x06, _MM_PERM_AABD = 0x07, _MM_PERM_AACA = 0x08,
+  _MM_PERM_AACB = 0x09, _MM_PERM_AACC = 0x0A, _MM_PERM_AACD = 0x0B,
+  _MM_PERM_AADA = 0x0C, _MM_PERM_AADB = 0x0D, _MM_PERM_AADC = 0x0E,
+  _MM_PERM_AADD = 0x0F, _MM_PERM_ABAA = 0x10, _MM_PERM_ABAB = 0x11,
+  _MM_PERM_ABAC = 0x12, _MM_PERM_ABAD = 0x13, _MM_PERM_ABBA = 0x14,
+  _MM_PERM_ABBB = 0x15, _MM_PERM_ABBC = 0x16, _MM_PERM_ABBD = 0x17,
+  _MM_PERM_ABCA = 0x18, _MM_PERM_ABCB = 0x19, _MM_PERM_ABCC = 0x1A,
+  _MM_PERM_ABCD = 0x1B, _MM_PERM_ABDA = 0x1C, _MM_PERM_ABDB = 0x1D,
+  _MM_PERM_ABDC = 0x1E, _MM_PERM_ABDD = 0x1F, _MM_PERM_ACAA = 0x20,
+  _MM_PERM_ACAB = 0x21, _MM_PERM_ACAC = 0x22, _MM_PERM_ACAD = 0x23,
+  _MM_PERM_ACBA = 0x24, _MM_PERM_ACBB = 0x25, _MM_PERM_ACBC = 0x26,
+  _MM_PERM_ACBD = 0x27, _MM_PERM_ACCA = 0x28, _MM_PERM_ACCB = 0x29,
+  _MM_PERM_ACCC = 0x2A, _MM_PERM_ACCD = 0x2B, _MM_PERM_ACDA = 0x2C,
+  _MM_PERM_ACDB = 0x2D, _MM_PERM_ACDC = 0x2E, _MM_PERM_ACDD = 0x2F,
+  _MM_PERM_ADAA = 0x30, _MM_PERM_ADAB = 0x31, _MM_PERM_ADAC = 0x32,
+  _MM_PERM_ADAD = 0x33, _MM_PERM_ADBA = 0x34, _MM_PERM_ADBB = 0x35,
+  _MM_PERM_ADBC = 0x36, _MM_PERM_ADBD = 0x37, _MM_PERM_ADCA = 0x38,
+  _MM_PERM_ADCB = 0x39, _MM_PERM_ADCC = 0x3A, _MM_PERM_ADCD = 0x3B,
+  _MM_PERM_ADDA = 0x3C, _MM_PERM_ADDB = 0x3D, _MM_PERM_ADDC = 0x3E,
+  _MM_PERM_ADDD = 0x3F, _MM_PERM_BAAA = 0x40, _MM_PERM_BAAB = 0x41,
+  _MM_PERM_BAAC = 0x42, _MM_PERM_BAAD = 0x43, _MM_PERM_BABA = 0x44,
+  _MM_PERM_BABB = 0x45, _MM_PERM_BABC = 0x46, _MM_PERM_BABD = 0x47,
+  _MM_PERM_BACA = 0x48, _MM_PERM_BACB = 0x49, _MM_PERM_BACC = 0x4A,
+  _MM_PERM_BACD = 0x4B, _MM_PERM_BADA = 0x4C, _MM_PERM_BADB = 0x4D,
+  _MM_PERM_BADC = 0x4E, _MM_PERM_BADD = 0x4F, _MM_PERM_BBAA = 0x50,
+  _MM_PERM_BBAB = 0x51, _MM_PERM_BBAC = 0x52, _MM_PERM_BBAD = 0x53,
+  _MM_PERM_BBBA = 0x54, _MM_PERM_BBBB = 0x55, _MM_PERM_BBBC = 0x56,
+  _MM_PERM_BBBD = 0x57, _MM_PERM_BBCA = 0x58, _MM_PERM_BBCB = 0x59,
+  _MM_PERM_BBCC = 0x5A, _MM_PERM_BBCD = 0x5B, _MM_PERM_BBDA = 0x5C,
+  _MM_PERM_BBDB = 0x5D, _MM_PERM_BBDC = 0x5E, _MM_PERM_BBDD = 0x5F,
+  _MM_PERM_BCAA = 0x60, _MM_PERM_BCAB = 0x61, _MM_PERM_BCAC = 0x62,
+  _MM_PERM_BCAD = 0x63, _MM_PERM_BCBA = 0x64, _MM_PERM_BCBB = 0x65,
+  _MM_PERM_BCBC = 0x66, _MM_PERM_BCBD = 0x67, _MM_PERM_BCCA = 0x68,
+  _MM_PERM_BCCB = 0x69, _MM_PERM_BCCC = 0x6A, _MM_PERM_BCCD = 0x6B,
+  _MM_PERM_BCDA = 0x6C, _MM_PERM_BCDB = 0x6D, _MM_PERM_BCDC = 0x6E,
+  _MM_PERM_BCDD = 0x6F, _MM_PERM_BDAA = 0x70, _MM_PERM_BDAB = 0x71,
+  _MM_PERM_BDAC = 0x72, _MM_PERM_BDAD = 0x73, _MM_PERM_BDBA = 0x74,
+  _MM_PERM_BDBB = 0x75, _MM_PERM_BDBC = 0x76, _MM_PERM_BDBD = 0x77,
+  _MM_PERM_BDCA = 0x78, _MM_PERM_BDCB = 0x79, _MM_PERM_BDCC = 0x7A,
+  _MM_PERM_BDCD = 0x7B, _MM_PERM_BDDA = 0x7C, _MM_PERM_BDDB = 0x7D,
+  _MM_PERM_BDDC = 0x7E, _MM_PERM_BDDD = 0x7F, _MM_PERM_CAAA = 0x80,
+  _MM_PERM_CAAB = 0x81, _MM_PERM_CAAC = 0x82, _MM_PERM_CAAD = 0x83,
+  _MM_PERM_CABA = 0x84, _MM_PERM_CABB = 0x85, _MM_PERM_CABC = 0x86,
+  _MM_PERM_CABD = 0x87, _MM_PERM_CACA = 0x88, _MM_PERM_CACB = 0x89,
+  _MM_PERM_CACC = 0x8A, _MM_PERM_CACD = 0x8B, _MM_PERM_CADA = 0x8C,
+  _MM_PERM_CADB = 0x8D, _MM_PERM_CADC = 0x8E, _MM_PERM_CADD = 0x8F,
+  _MM_PERM_CBAA = 0x90, _MM_PERM_CBAB = 0x91, _MM_PERM_CBAC = 0x92,
+  _MM_PERM_CBAD = 0x93, _MM_PERM_CBBA = 0x94, _MM_PERM_CBBB = 0x95,
+  _MM_PERM_CBBC = 0x96, _MM_PERM_CBBD = 0x97, _MM_PERM_CBCA = 0x98,
+  _MM_PERM_CBCB = 0x99, _MM_PERM_CBCC = 0x9A, _MM_PERM_CBCD = 0x9B,
+  _MM_PERM_CBDA = 0x9C, _MM_PERM_CBDB = 0x9D, _MM_PERM_CBDC = 0x9E,
+  _MM_PERM_CBDD = 0x9F, _MM_PERM_CCAA = 0xA0, _MM_PERM_CCAB = 0xA1,
+  _MM_PERM_CCAC = 0xA2, _MM_PERM_CCAD = 0xA3, _MM_PERM_CCBA = 0xA4,
+  _MM_PERM_CCBB = 0xA5, _MM_PERM_CCBC = 0xA6, _MM_PERM_CCBD = 0xA7,
+  _MM_PERM_CCCA = 0xA8, _MM_PERM_CCCB = 0xA9, _MM_PERM_CCCC = 0xAA,
+  _MM_PERM_CCCD = 0xAB, _MM_PERM_CCDA = 0xAC, _MM_PERM_CCDB = 0xAD,
+  _MM_PERM_CCDC = 0xAE, _MM_PERM_CCDD = 0xAF, _MM_PERM_CDAA = 0xB0,
+  _MM_PERM_CDAB = 0xB1, _MM_PERM_CDAC = 0xB2, _MM_PERM_CDAD = 0xB3,
+  _MM_PERM_CDBA = 0xB4, _MM_PERM_CDBB = 0xB5, _MM_PERM_CDBC = 0xB6,
+  _MM_PERM_CDBD = 0xB7, _MM_PERM_CDCA = 0xB8, _MM_PERM_CDCB = 0xB9,
+  _MM_PERM_CDCC = 0xBA, _MM_PERM_CDCD = 0xBB, _MM_PERM_CDDA = 0xBC,
+  _MM_PERM_CDDB = 0xBD, _MM_PERM_CDDC = 0xBE, _MM_PERM_CDDD = 0xBF,
+  _MM_PERM_DAAA = 0xC0, _MM_PERM_DAAB = 0xC1, _MM_PERM_DAAC = 0xC2,
+  _MM_PERM_DAAD = 0xC3, _MM_PERM_DABA = 0xC4, _MM_PERM_DABB = 0xC5,
+  _MM_PERM_DABC = 0xC6, _MM_PERM_DABD = 0xC7, _MM_PERM_DACA = 0xC8,
+  _MM_PERM_DACB = 0xC9, _MM_PERM_DACC = 0xCA, _MM_PERM_DACD = 0xCB,
+  _MM_PERM_DADA = 0xCC, _MM_PERM_DADB = 0xCD, _MM_PERM_DADC = 0xCE,
+  _MM_PERM_DADD = 0xCF, _MM_PERM_DBAA = 0xD0, _MM_PERM_DBAB = 0xD1,
+  _MM_PERM_DBAC = 0xD2, _MM_PERM_DBAD = 0xD3, _MM_PERM_DBBA = 0xD4,
+  _MM_PERM_DBBB = 0xD5, _MM_PERM_DBBC = 0xD6, _MM_PERM_DBBD = 0xD7,
+  _MM_PERM_DBCA = 0xD8, _MM_PERM_DBCB = 0xD9, _MM_PERM_DBCC = 0xDA,
+  _MM_PERM_DBCD = 0xDB, _MM_PERM_DBDA = 0xDC, _MM_PERM_DBDB = 0xDD,
+  _MM_PERM_DBDC = 0xDE, _MM_PERM_DBDD = 0xDF, _MM_PERM_DCAA = 0xE0,
+  _MM_PERM_DCAB = 0xE1, _MM_PERM_DCAC = 0xE2, _MM_PERM_DCAD = 0xE3,
+  _MM_PERM_DCBA = 0xE4, _MM_PERM_DCBB = 0xE5, _MM_PERM_DCBC = 0xE6,
+  _MM_PERM_DCBD = 0xE7, _MM_PERM_DCCA = 0xE8, _MM_PERM_DCCB = 0xE9,
+  _MM_PERM_DCCC = 0xEA, _MM_PERM_DCCD = 0xEB, _MM_PERM_DCDA = 0xEC,
+  _MM_PERM_DCDB = 0xED, _MM_PERM_DCDC = 0xEE, _MM_PERM_DCDD = 0xEF,
+  _MM_PERM_DDAA = 0xF0, _MM_PERM_DDAB = 0xF1, _MM_PERM_DDAC = 0xF2,
+  _MM_PERM_DDAD = 0xF3, _MM_PERM_DDBA = 0xF4, _MM_PERM_DDBB = 0xF5,
+  _MM_PERM_DDBC = 0xF6, _MM_PERM_DDBD = 0xF7, _MM_PERM_DDCA = 0xF8,
+  _MM_PERM_DDCB = 0xF9, _MM_PERM_DDCC = 0xFA, _MM_PERM_DDCD = 0xFB,
+  _MM_PERM_DDDA = 0xFC, _MM_PERM_DDDB = 0xFD, _MM_PERM_DDDC = 0xFE,
+  _MM_PERM_DDDD = 0xFF
+} _MM_PERM_ENUM;
+
+typedef enum
+{
+  _MM_MANT_NORM_1_2,    /* interval [1, 2)      */
+  _MM_MANT_NORM_p5_2,   /* interval [0.5, 2)    */
+  _MM_MANT_NORM_p5_1,   /* interval [0.5, 1)    */
+  _MM_MANT_NORM_p75_1p5   /* interval [0.75, 1.5) */
+} _MM_MANTISSA_NORM_ENUM;
+
+typedef enum
+{
+  _MM_MANT_SIGN_src,    /* sign = sign(SRC)     */
+  _MM_MANT_SIGN_zero,   /* sign = 0             */
+  _MM_MANT_SIGN_nan   /* DEST = NaN if sign(SRC) = 1 */
+} _MM_MANTISSA_SIGN_ENUM;
+
 /* Define the default attributes for the functions in this file. */
 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512f")))
 
@@ -57,30 +170,81 @@ _mm512_setzero_si512(void)
   return (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 };
 }
 
+#define _mm512_setzero_epi32 _mm512_setzero_si512
+
 static __inline__ __m512d __DEFAULT_FN_ATTRS
-_mm512_undefined_pd()
+_mm512_undefined_pd(void)
 {
   return (__m512d)__builtin_ia32_undef512();
 }
 
 static __inline__ __m512 __DEFAULT_FN_ATTRS
-_mm512_undefined()
+_mm512_undefined(void)
 {
   return (__m512)__builtin_ia32_undef512();
 }
 
 static __inline__ __m512 __DEFAULT_FN_ATTRS
-_mm512_undefined_ps()
+_mm512_undefined_ps(void)
 {
   return (__m512)__builtin_ia32_undef512();
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_undefined_epi32()
+_mm512_undefined_epi32(void)
 {
   return (__m512i)__builtin_ia32_undef512();
 }
 
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_broadcastd_epi32 (__m128i __A)
+{
+  return (__m512i)__builtin_shufflevector((__v4si) __A,
+                                          (__v4si)_mm_undefined_si128(),
+                                          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_broadcastd_epi32 (__m512i __O, __mmask16 __M, __m128i __A)
+{
+  return (__m512i)__builtin_ia32_selectd_512(__M,
+                                             (__v16si) _mm512_broadcastd_epi32(__A),
+                                             (__v16si) __O);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_broadcastd_epi32 (__mmask16 __M, __m128i __A)
+{
+  return (__m512i)__builtin_ia32_selectd_512(__M,
+                                             (__v16si) _mm512_broadcastd_epi32(__A),
+                                             (__v16si) _mm512_setzero_si512());
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_broadcastq_epi64 (__m128i __A)
+{
+  return (__m512i)__builtin_shufflevector((__v2di) __A,
+                                          (__v2di) _mm_undefined_si128(),
+                                          0, 0, 0, 0, 0, 0, 0, 0);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_broadcastq_epi64 (__m512i __O, __mmask8 __M, __m128i __A)
+{
+  return (__m512i)__builtin_ia32_selectq_512(__M,
+                                             (__v8di) _mm512_broadcastq_epi64(__A),
+                                             (__v8di) __O);
+
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
+{
+  return (__m512i)__builtin_ia32_selectq_512(__M,
+                                             (__v8di) _mm512_broadcastq_epi64(__A),
+                                             (__v8di) _mm512_setzero_si512());
+}
+
 static __inline __m512i __DEFAULT_FN_ATTRS
 _mm512_maskz_set1_epi32(__mmask16 __M, int __A)
 {
@@ -112,6 +276,9 @@ _mm512_setzero_ps(void)
   return (__m512){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
                    0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
 }
+
+#define _mm512_setzero _mm512_setzero_ps
+
 static  __inline __m512d __DEFAULT_FN_ATTRS
 _mm512_setzero_pd(void)
 {
@@ -132,6 +299,28 @@ _mm512_set1_pd(double __w)
 }
 
 static __inline __m512i __DEFAULT_FN_ATTRS
+_mm512_set1_epi8(char __w)
+{
+  return (__m512i)(__v64qi){ __w, __w, __w, __w, __w, __w, __w, __w,
+                             __w, __w, __w, __w, __w, __w, __w, __w,
+                             __w, __w, __w, __w, __w, __w, __w, __w,
+                             __w, __w, __w, __w, __w, __w, __w, __w,
+                             __w, __w, __w, __w, __w, __w, __w, __w,
+                             __w, __w, __w, __w, __w, __w, __w, __w,
+                             __w, __w, __w, __w, __w, __w, __w, __w,
+                             __w, __w, __w, __w, __w, __w, __w, __w  };
+}
+
+static __inline __m512i __DEFAULT_FN_ATTRS
+_mm512_set1_epi16(short __w)
+{
+  return (__m512i)(__v32hi){ __w, __w, __w, __w, __w, __w, __w, __w,
+                             __w, __w, __w, __w, __w, __w, __w, __w,
+                             __w, __w, __w, __w, __w, __w, __w, __w,
+                             __w, __w, __w, __w, __w, __w, __w, __w };
+}
+
+static __inline __m512i __DEFAULT_FN_ATTRS
 _mm512_set1_epi32(int __s)
 {
   return (__m512i)(__v16si){ __s, __s, __s, __s, __s, __s, __s, __s,
@@ -145,21 +334,62 @@ _mm512_set1_epi64(long long __d)
 }
 
 static __inline__ __m512 __DEFAULT_FN_ATTRS
-_mm512_broadcastss_ps(__m128 __X)
+_mm512_broadcastss_ps(__m128 __A)
+{
+  return (__m512)__builtin_shufflevector((__v4sf) __A,
+                                         (__v4sf)_mm_undefined_ps(),
+                                         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+}
+
+static __inline __m512i __DEFAULT_FN_ATTRS
+_mm512_set4_epi32 (int __A, int __B, int __C, int __D)
 {
-  float __f = __X[0];
-  return (__v16sf){ __f, __f, __f, __f,
-                    __f, __f, __f, __f,
-                    __f, __f, __f, __f,
-                    __f, __f, __f, __f };
+  return  (__m512i)(__v16si)
+   { __D, __C, __B, __A, __D, __C, __B, __A,
+     __D, __C, __B, __A, __D, __C, __B, __A };
 }
 
+static __inline __m512i __DEFAULT_FN_ATTRS
+_mm512_set4_epi64 (long long __A, long long __B, long long __C,
+       long long __D)
+{
+  return  (__m512i) (__v8di)
+   { __D, __C, __B, __A, __D, __C, __B, __A };
+}
+
+static __inline __m512d __DEFAULT_FN_ATTRS
+_mm512_set4_pd (double __A, double __B, double __C, double __D)
+{
+  return  (__m512d)
+   { __D, __C, __B, __A, __D, __C, __B, __A };
+}
+
+static __inline __m512 __DEFAULT_FN_ATTRS
+_mm512_set4_ps (float __A, float __B, float __C, float __D)
+{
+  return  (__m512)
+   { __D, __C, __B, __A, __D, __C, __B, __A,
+     __D, __C, __B, __A, __D, __C, __B, __A };
+}
+
+#define _mm512_setr4_epi32(e0,e1,e2,e3)               \
+  _mm512_set4_epi32((e3),(e2),(e1),(e0))
+
+#define _mm512_setr4_epi64(e0,e1,e2,e3)               \
+  _mm512_set4_epi64((e3),(e2),(e1),(e0))
+
+#define _mm512_setr4_pd(e0,e1,e2,e3)                \
+  _mm512_set4_pd((e3),(e2),(e1),(e0))
+
+#define _mm512_setr4_ps(e0,e1,e2,e3)                \
+  _mm512_set4_ps((e3),(e2),(e1),(e0))
+
 static __inline__ __m512d __DEFAULT_FN_ATTRS
-_mm512_broadcastsd_pd(__m128d __X)
+_mm512_broadcastsd_pd(__m128d __A)
 {
-  double __d = __X[0];
-  return (__v8df){ __d, __d, __d, __d,
-                   __d, __d, __d, __d };
+  return (__m512d)__builtin_shufflevector((__v2df) __A,
+                                          (__v2df) _mm_undefined_pd(),
+                                          0, 0, 0, 0, 0, 0, 0, 0);
 }
 
 /* Cast between vector types */
@@ -183,272 +413,327 @@ _mm512_castpd512_pd128(__m512d __a)
   return __builtin_shufflevector(__a, __a, 0, 1);
 }
 
+static __inline __m256d __DEFAULT_FN_ATTRS
+_mm512_castpd512_pd256 (__m512d __A)
+{
+  return __builtin_shufflevector(__A, __A, 0, 1, 2, 3);
+}
+
 static __inline __m128 __DEFAULT_FN_ATTRS
 _mm512_castps512_ps128(__m512 __a)
 {
   return __builtin_shufflevector(__a, __a, 0, 1, 2, 3);
 }
 
+static __inline __m256 __DEFAULT_FN_ATTRS
+_mm512_castps512_ps256 (__m512 __A)
+{
+  return __builtin_shufflevector(__A, __A, 0, 1, 2, 3, 4, 5, 6, 7);
+}
+
+static __inline __m512 __DEFAULT_FN_ATTRS
+_mm512_castpd_ps (__m512d __A)
+{
+  return (__m512) (__A);
+}
+
+static __inline __m512i __DEFAULT_FN_ATTRS
+_mm512_castpd_si512 (__m512d __A)
+{
+  return (__m512i) (__A);
+}
+
+static __inline__ __m512d __DEFAULT_FN_ATTRS
+_mm512_castpd128_pd512 (__m128d __A)
+{
+  return __builtin_shufflevector( __A, __A, 0, 1, -1, -1, -1, -1, -1, -1);
+}
+
+static __inline __m512d __DEFAULT_FN_ATTRS
+_mm512_castps_pd (__m512 __A)
+{
+  return (__m512d) (__A);
+}
+
+static __inline __m512i __DEFAULT_FN_ATTRS
+_mm512_castps_si512 (__m512 __A)
+{
+  return (__m512i) (__A);
+}
+
+static __inline__ __m512 __DEFAULT_FN_ATTRS
+_mm512_castps128_ps512 (__m128 __A)
+{
+    return  __builtin_shufflevector( __A, __A, 0, 1, 2, 3, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_castsi128_si512 (__m128i __A)
+{
+   return  __builtin_shufflevector( __A, __A, 0, 1, -1, -1, -1, -1, -1, -1);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_castsi256_si512 (__m256i __A)
+{
+   return  __builtin_shufflevector( __A, __A, 0, 1, 2, 3, -1, -1, -1, -1);
+}
+
+static __inline __m512 __DEFAULT_FN_ATTRS
+_mm512_castsi512_ps (__m512i __A)
+{
+  return (__m512) (__A);
+}
+
+static __inline __m512d __DEFAULT_FN_ATTRS
+_mm512_castsi512_pd (__m512i __A)
+{
+  return (__m512d) (__A);
+}
+
+static __inline __m128i __DEFAULT_FN_ATTRS
+_mm512_castsi512_si128 (__m512i __A)
+{
+  return (__m128i)__builtin_shufflevector(__A, __A , 0, 1);
+}
+
+static __inline __m256i __DEFAULT_FN_ATTRS
+_mm512_castsi512_si256 (__m512i __A)
+{
+  return (__m256i)__builtin_shufflevector(__A, __A , 0, 1, 2, 3);
+}
+
 /* Bitwise operators */
 static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_and_epi32(__m512i __a, __m512i __b)
 {
-  return __a & __b;
+  return (__m512i)((__v16su)__a & (__v16su)__b);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_mask_and_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
 {
-  return (__m512i) __builtin_ia32_pandd512_mask((__v16si) __a,
-              (__v16si) __b,
-              (__v16si) __src,
-              (__mmask16) __k);
+  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k,
+                (__v16si) _mm512_and_epi32(__a, __b),
+                (__v16si) __src);
 }
+
 static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_maskz_and_epi32(__mmask16 __k, __m512i __a, __m512i __b)
 {
-  return (__m512i) __builtin_ia32_pandd512_mask((__v16si) __a,
-              (__v16si) __b,
-              (__v16si)
-              _mm512_setzero_si512 (),
-              (__mmask16) __k);
+  return (__m512i) _mm512_mask_and_epi32(_mm512_setzero_si512 (),
+                                         __k, __a, __b);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_and_epi64(__m512i __a, __m512i __b)
 {
-  return __a & __b;
+  return (__m512i)((__v8du)__a & (__v8du)__b);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_mask_and_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
 {
-  return (__m512i) __builtin_ia32_pandq512_mask ((__v8di) __a,
-              (__v8di) __b,
-              (__v8di) __src,
-              (__mmask8) __k);
+    return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __k,
+                (__v8di) _mm512_and_epi64(__a, __b),
+                (__v8di) __src);
 }
+
 static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_maskz_and_epi64(__mmask8 __k, __m512i __a, __m512i __b)
 {
-  return (__m512i) __builtin_ia32_pandq512_mask ((__v8di) __a,
-              (__v8di) __b,
-              (__v8di)
-              _mm512_setzero_si512 (),
-              (__mmask8) __k);
+  return (__m512i) _mm512_mask_and_epi64(_mm512_setzero_si512 (),
+                                         __k, __a, __b);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_andnot_si512 (__m512i __A, __m512i __B)
+{
+  return (__m512i)(~(__v8du)(__A) & (__v8du)__B);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_andnot_epi32 (__m512i __A, __m512i __B)
 {
-  return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
-              (__v16si) __B,
-              (__v16si)
-              _mm512_setzero_si512 (),
-              (__mmask16) -1);
+  return (__m512i)(~(__v16su)(__A) & (__v16su)__B);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_mask_andnot_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
+_mm512_mask_andnot_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
 {
-  return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
-              (__v16si) __B,
-              (__v16si) __W,
-              (__mmask16) __U);
+  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
+                                         (__v16si)_mm512_andnot_epi32(__A, __B),
+                                         (__v16si)__W);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_maskz_andnot_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
+_mm512_maskz_andnot_epi32(__mmask16 __U, __m512i __A, __m512i __B)
 {
-  return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
-              (__v16si) __B,
-              (__v16si)
-              _mm512_setzero_si512 (),
-              (__mmask16) __U);
+  return (__m512i)_mm512_mask_andnot_epi32(_mm512_setzero_si512(),
+                                           __U, __A, __B);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_andnot_epi64 (__m512i __A, __m512i __B)
+_mm512_andnot_epi64(__m512i __A, __m512i __B)
 {
-  return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A,
-              (__v8di) __B,
-              (__v8di)
-              _mm512_setzero_si512 (),
-              (__mmask8) -1);
+  return (__m512i)(~(__v8du)(__A) & (__v8du)__B);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_mask_andnot_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
+_mm512_mask_andnot_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
 {
-  return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A,
-              (__v8di) __B,
-              (__v8di) __W, __U);
+  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
+                                          (__v8di)_mm512_andnot_epi64(__A, __B),
+                                          (__v8di)__W);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_maskz_andnot_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
+_mm512_maskz_andnot_epi64(__mmask8 __U, __m512i __A, __m512i __B)
 {
-  return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A,
-              (__v8di) __B,
-              (__v8di)
-              _mm512_setzero_pd (),
-              __U);
+  return (__m512i)_mm512_mask_andnot_epi64(_mm512_setzero_si512(),
+                                           __U, __A, __B);
 }
+
 static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_or_epi32(__m512i __a, __m512i __b)
 {
-  return __a | __b;
+  return (__m512i)((__v16su)__a | (__v16su)__b);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_mask_or_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
 {
-  return (__m512i) __builtin_ia32_pord512_mask((__v16si) __a,
-              (__v16si) __b,
-              (__v16si) __src,
-              (__mmask16) __k);
+  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k,
+                                             (__v16si)_mm512_or_epi32(__a, __b),
+                                             (__v16si)__src);
 }
+
 static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_maskz_or_epi32(__mmask16 __k, __m512i __a, __m512i __b)
 {
-  return (__m512i) __builtin_ia32_pord512_mask((__v16si) __a,
-              (__v16si) __b,
-              (__v16si)
-              _mm512_setzero_si512 (),
-              (__mmask16) __k);
+  return (__m512i)_mm512_mask_or_epi32(_mm512_setzero_si512(), __k, __a, __b);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_or_epi64(__m512i __a, __m512i __b)
 {
-  return __a | __b;
+  return (__m512i)((__v8du)__a | (__v8du)__b);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_mask_or_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
 {
-  return (__m512i) __builtin_ia32_porq512_mask ((__v8di) __a,
-              (__v8di) __b,
-              (__v8di) __src,
-              (__mmask8) __k);
+  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__k,
+                                             (__v8di)_mm512_or_epi64(__a, __b),
+                                             (__v8di)__src);
 }
+
 static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_maskz_or_epi64(__mmask8 __k, __m512i __a, __m512i __b)
 {
-  return (__m512i) __builtin_ia32_porq512_mask ((__v8di) __a,
-              (__v8di) __b,
-              (__v8di)
-              _mm512_setzero_si512 (),
-              (__mmask8) __k);
+  return (__m512i)_mm512_mask_or_epi64(_mm512_setzero_si512(), __k, __a, __b);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_xor_epi32(__m512i __a, __m512i __b)
 {
-  return __a ^ __b;
+  return (__m512i)((__v16su)__a ^ (__v16su)__b);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_mask_xor_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
 {
-  return (__m512i) __builtin_ia32_pxord512_mask((__v16si) __a,
-              (__v16si) __b,
-              (__v16si) __src,
-              (__mmask16) __k);
+  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k,
+                                            (__v16si)_mm512_xor_epi32(__a, __b),
+                                            (__v16si)__src);
 }
+
 static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_maskz_xor_epi32(__mmask16 __k, __m512i __a, __m512i __b)
 {
-  return (__m512i) __builtin_ia32_pxord512_mask((__v16si) __a,
-              (__v16si) __b,
-              (__v16si)
-              _mm512_setzero_si512 (),
-              (__mmask16) __k);
+  return (__m512i)_mm512_mask_xor_epi32(_mm512_setzero_si512(), __k, __a, __b);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_xor_epi64(__m512i __a, __m512i __b)
 {
-  return __a ^ __b;
+  return (__m512i)((__v8du)__a ^ (__v8du)__b);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_mask_xor_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
 {
-  return (__m512i) __builtin_ia32_pxorq512_mask ((__v8di) __a,
-              (__v8di) __b,
-              (__v8di) __src,
-              (__mmask8) __k);
+  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__k,
+                                             (__v8di)_mm512_xor_epi64(__a, __b),
+                                             (__v8di)__src);
 }
+
 static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_maskz_xor_epi64(__mmask8 __k, __m512i __a, __m512i __b)
 {
-  return (__m512i) __builtin_ia32_pxorq512_mask ((__v8di) __a,
-              (__v8di) __b,
-              (__v8di)
-              _mm512_setzero_si512 (),
-              (__mmask8) __k);
+  return (__m512i)_mm512_mask_xor_epi64(_mm512_setzero_si512(), __k, __a, __b);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_and_si512(__m512i __a, __m512i __b)
 {
-  return __a & __b;
+  return (__m512i)((__v8du)__a & (__v8du)__b);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_or_si512(__m512i __a, __m512i __b)
 {
-  return __a | __b;
+  return (__m512i)((__v8du)__a | (__v8du)__b);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_xor_si512(__m512i __a, __m512i __b)
 {
-  return __a ^ __b;
+  return (__m512i)((__v8du)__a ^ (__v8du)__b);
 }
+
 /* Arithmetic */
 
 static __inline __m512d __DEFAULT_FN_ATTRS
 _mm512_add_pd(__m512d __a, __m512d __b)
 {
-  return __a + __b;
+  return (__m512d)((__v8df)__a + (__v8df)__b);
 }
 
 static __inline __m512 __DEFAULT_FN_ATTRS
 _mm512_add_ps(__m512 __a, __m512 __b)
 {
-  return __a + __b;
+  return (__m512)((__v16sf)__a + (__v16sf)__b);
 }
 
 static __inline __m512d __DEFAULT_FN_ATTRS
 _mm512_mul_pd(__m512d __a, __m512d __b)
 {
-  return __a * __b;
+  return (__m512d)((__v8df)__a * (__v8df)__b);
 }
 
 static __inline __m512 __DEFAULT_FN_ATTRS
 _mm512_mul_ps(__m512 __a, __m512 __b)
 {
-  return __a * __b;
+  return (__m512)((__v16sf)__a * (__v16sf)__b);
 }
 
 static __inline __m512d __DEFAULT_FN_ATTRS
 _mm512_sub_pd(__m512d __a, __m512d __b)
 {
-  return __a - __b;
+  return (__m512d)((__v8df)__a - (__v8df)__b);
 }
 
 static __inline __m512 __DEFAULT_FN_ATTRS
 _mm512_sub_ps(__m512 __a, __m512 __b)
 {
-  return __a - __b;
+  return (__m512)((__v16sf)__a - (__v16sf)__b);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_add_epi64 (__m512i __A, __m512i __B)
 {
-  return (__m512i) ((__v8di) __A + (__v8di) __B);
+  return (__m512i) ((__v8du) __A + (__v8du) __B);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
@@ -473,7 +758,7 @@ _mm512_maskz_add_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
 static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_sub_epi64 (__m512i __A, __m512i __B)
 {
-  return (__m512i) ((__v8di) __A - (__v8di) __B);
+  return (__m512i) ((__v8du) __A - (__v8du) __B);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
@@ -498,7 +783,7 @@ _mm512_maskz_sub_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
 static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_add_epi32 (__m512i __A, __m512i __B)
 {
-  return (__m512i) ((__v16si) __A + (__v16si) __B);
+  return (__m512i) ((__v16su) __A + (__v16su) __B);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
@@ -523,7 +808,7 @@ _mm512_maskz_add_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
 static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_sub_epi32 (__m512i __A, __m512i __B)
 {
-  return (__m512i) ((__v16si) __A - (__v16si) __B);
+  return (__m512i) ((__v16su) __A - (__v16su) __B);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
@@ -545,6 +830,24 @@ _mm512_maskz_sub_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
              (__mmask16) __U);
 }
 
+#define _mm512_mask_max_round_pd(W, U, A, B, R) __extension__ ({ \
+  (__m512d)__builtin_ia32_maxpd512_mask((__v8df)(__m512d)(A), \
+                                        (__v8df)(__m512d)(B), \
+                                        (__v8df)(__m512d)(W), (__mmask8)(U), \
+                                        (int)(R)); })
+
+#define _mm512_maskz_max_round_pd(U, A, B, R) __extension__ ({ \
+  (__m512d)__builtin_ia32_maxpd512_mask((__v8df)(__m512d)(A), \
+                                        (__v8df)(__m512d)(B), \
+                                        (__v8df)_mm512_setzero_pd(), \
+                                        (__mmask8)(U), (int)(R)); })
+
+#define _mm512_max_round_pd(A, B, R) __extension__ ({ \
+  (__m512d)__builtin_ia32_maxpd512_mask((__v8df)(__m512d)(A), \
+                                        (__v8df)(__m512d)(B), \
+                                        (__v8df)_mm512_undefined_pd(), \
+                                        (__mmask8)-1, (int)(R)); })
+
 static  __inline__ __m512d __DEFAULT_FN_ATTRS
 _mm512_max_pd(__m512d __A, __m512d __B)
 {
@@ -556,6 +859,45 @@ _mm512_max_pd(__m512d __A, __m512d __B)
              _MM_FROUND_CUR_DIRECTION);
 }
 
+static __inline__ __m512d __DEFAULT_FN_ATTRS
+_mm512_mask_max_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
+{
+  return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
+                  (__v8df) __B,
+                  (__v8df) __W,
+                  (__mmask8) __U,
+                  _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512d __DEFAULT_FN_ATTRS
+_mm512_maskz_max_pd (__mmask8 __U, __m512d __A, __m512d __B)
+{
+  return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
+                  (__v8df) __B,
+                  (__v8df)
+                  _mm512_setzero_pd (),
+                  (__mmask8) __U,
+                  _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm512_mask_max_round_ps(W, U, A, B, R) __extension__ ({ \
+  (__m512)__builtin_ia32_maxps512_mask((__v16sf)(__m512)(A), \
+                                       (__v16sf)(__m512)(B), \
+                                       (__v16sf)(__m512)(W), (__mmask16)(U), \
+                                       (int)(R)); })
+
+#define _mm512_maskz_max_round_ps(U, A, B, R) __extension__ ({ \
+  (__m512)__builtin_ia32_maxps512_mask((__v16sf)(__m512)(A), \
+                                       (__v16sf)(__m512)(B), \
+                                       (__v16sf)_mm512_setzero_ps(), \
+                                       (__mmask16)(U), (int)(R)); })
+
+#define _mm512_max_round_ps(A, B, R) __extension__ ({ \
+  (__m512)__builtin_ia32_maxps512_mask((__v16sf)(__m512)(A), \
+                                       (__v16sf)(__m512)(B), \
+                                       (__v16sf)_mm512_undefined_ps(), \
+                                       (__mmask16)-1, (int)(R)); })
+
 static  __inline__ __m512 __DEFAULT_FN_ATTRS
 _mm512_max_ps(__m512 __A, __m512 __B)
 {
@@ -567,9 +909,30 @@ _mm512_max_ps(__m512 __A, __m512 __B)
             _MM_FROUND_CUR_DIRECTION);
 }
 
+static __inline__ __m512 __DEFAULT_FN_ATTRS
+_mm512_mask_max_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
+{
+  return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
+                 (__v16sf) __B,
+                 (__v16sf) __W,
+                 (__mmask16) __U,
+                 _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512 __DEFAULT_FN_ATTRS
+_mm512_maskz_max_ps (__mmask16 __U, __m512 __A, __m512 __B)
+{
+  return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
+                 (__v16sf) __B,
+                 (__v16sf)
+                 _mm512_setzero_ps (),
+                 (__mmask16) __U,
+                 _MM_FROUND_CUR_DIRECTION);
+}
+
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_mask_max_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
-  return (__m128) __builtin_ia32_maxss_round ((__v4sf) __A,
+  return (__m128) __builtin_ia32_maxss_round_mask ((__v4sf) __A,
                 (__v4sf) __B,
                 (__v4sf) __W,
                 (__mmask8) __U,
@@ -578,28 +941,34 @@ _mm_mask_max_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
 
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_maskz_max_ss(__mmask8 __U,__m128 __A, __m128 __B) {
-  return (__m128) __builtin_ia32_maxss_round ((__v4sf) __A,
+  return (__m128) __builtin_ia32_maxss_round_mask ((__v4sf) __A,
                 (__v4sf) __B,
                 (__v4sf)  _mm_setzero_ps (),
                 (__mmask8) __U,
                 _MM_FROUND_CUR_DIRECTION);
 }
 
-#define _mm_max_round_ss(__A, __B, __R) __extension__ ({ \
-  (__m128) __builtin_ia32_maxss_round ((__v4sf) __A, (__v4sf) __B, \
-                (__v4sf) _mm_setzero_ps(), (__mmask8) -1, __R); })
+#define _mm_max_round_ss(A, B, R) __extension__ ({ \
+  (__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \
+                                          (__v4sf)(__m128)(B), \
+                                          (__v4sf)_mm_setzero_ps(), \
+                                          (__mmask8)-1, (int)(R)); })
 
-#define _mm_mask_max_round_ss(__W, __U, __A, __B, __R) __extension__ ({ \
-  (__m128) __builtin_ia32_maxss_round ((__v4sf) __A, (__v4sf) __B, \
-                (__v4sf)  __W, (__mmask8) __U,__R); })
+#define _mm_mask_max_round_ss(W, U, A, B, R) __extension__ ({ \
+  (__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \
+                                          (__v4sf)(__m128)(B), \
+                                          (__v4sf)(__m128)(W), (__mmask8)(U), \
+                                          (int)(R)); })
 
-#define _mm_maskz_max_round_ss(__U, __A, __B, __R) __extension__ ({ \
-  (__m128) __builtin_ia32_maxss_round ((__v4sf) __A, (__v4sf) __B, \
-                (__v4sf)  _mm_setzero_ps(), (__mmask8) __U,__R); })
+#define _mm_maskz_max_round_ss(U, A, B, R) __extension__ ({ \
+  (__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \
+                                          (__v4sf)(__m128)(B), \
+                                          (__v4sf)_mm_setzero_ps(), \
+                                          (__mmask8)(U), (int)(R)); })
 
 static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_mask_max_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
-  return (__m128d) __builtin_ia32_maxsd_round ((__v2df) __A,
+  return (__m128d) __builtin_ia32_maxsd_round_mask ((__v2df) __A,
                 (__v2df) __B,
                 (__v2df) __W,
                 (__mmask8) __U,
@@ -608,24 +977,30 @@ _mm_mask_max_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
 
 static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_maskz_max_sd(__mmask8 __U,__m128d __A, __m128d __B) {
-  return (__m128d) __builtin_ia32_maxsd_round ((__v2df) __A,
+  return (__m128d) __builtin_ia32_maxsd_round_mask ((__v2df) __A,
                 (__v2df) __B,
                 (__v2df)  _mm_setzero_pd (),
                 (__mmask8) __U,
                 _MM_FROUND_CUR_DIRECTION);
 }
 
-#define _mm_max_round_sd(__A, __B, __R) __extension__ ({ \
-  (__m128d) __builtin_ia32_maxsd_round ((__v2df) __A, (__v2df) __B, \
-                (__v2df) _mm_setzero_pd(), (__mmask8) -1, __R); })
+#define _mm_max_round_sd(A, B, R) __extension__ ({ \
+  (__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \
+                                           (__v2df)(__m128d)(B), \
+                                           (__v2df)_mm_setzero_pd(), \
+                                           (__mmask8)-1, (int)(R)); })
 
-#define _mm_mask_max_round_sd(__W, __U, __A, __B, __R) __extension__ ({ \
-  (__m128d) __builtin_ia32_maxsd_round ((__v2df) __A, (__v2df) __B, \
-                (__v2df)  __W, (__mmask8) __U,__R); })
+#define _mm_mask_max_round_sd(W, U, A, B, R) __extension__ ({ \
+  (__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \
+                                           (__v2df)(__m128d)(B), \
+                                           (__v2df)(__m128d)(W), \
+                                           (__mmask8)(U), (int)(R)); })
 
-#define _mm_maskz_max_round_sd(__U, __A, __B, __R) __extension__ ({ \
-  (__m128d) __builtin_ia32_maxsd_round ((__v2df) __A, (__v2df) __B, \
-                (__v2df)  _mm_setzero_pd(), (__mmask8) __U,__R); })
+#define _mm_maskz_max_round_sd(U, A, B, R) __extension__ ({ \
+  (__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \
+                                           (__v2df)(__m128d)(B), \
+                                           (__v2df)_mm_setzero_pd(), \
+                                           (__mmask8)(U), (int)(R)); })
 
 static __inline __m512i
 __DEFAULT_FN_ATTRS
@@ -638,6 +1013,24 @@ _mm512_max_epi32(__m512i __A, __m512i __B)
               (__mmask16) -1);
 }
 
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_max_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
+                   (__v16si) __B,
+                   (__v16si) __W, __M);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_max_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
+                   (__v16si) __B,
+                   (__v16si)
+                   _mm512_setzero_si512 (),
+                   __M);
+}
+
 static __inline __m512i __DEFAULT_FN_ATTRS
 _mm512_max_epu32(__m512i __A, __m512i __B)
 {
@@ -648,6 +1041,24 @@ _mm512_max_epu32(__m512i __A, __m512i __B)
               (__mmask16) -1);
 }
 
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_max_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
+                   (__v16si) __B,
+                   (__v16si) __W, __M);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_max_epu32 (__mmask16 __M, __m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
+                   (__v16si) __B,
+                   (__v16si)
+                   _mm512_setzero_si512 (),
+                   __M);
+}
+
 static __inline __m512i __DEFAULT_FN_ATTRS
 _mm512_max_epi64(__m512i __A, __m512i __B)
 {
@@ -658,6 +1069,24 @@ _mm512_max_epi64(__m512i __A, __m512i __B)
               (__mmask8) -1);
 }
 
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_max_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
+                   (__v8di) __B,
+                   (__v8di) __W, __M);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_max_epi64 (__mmask8 __M, __m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
+                   (__v8di) __B,
+                   (__v8di)
+                   _mm512_setzero_si512 (),
+                   __M);
+}
+
 static __inline __m512i __DEFAULT_FN_ATTRS
 _mm512_max_epu64(__m512i __A, __m512i __B)
 {
@@ -668,6 +1097,42 @@ _mm512_max_epu64(__m512i __A, __m512i __B)
               (__mmask8) -1);
 }
 
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_max_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
+                   (__v8di) __B,
+                   (__v8di) __W, __M);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_max_epu64 (__mmask8 __M, __m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
+                   (__v8di) __B,
+                   (__v8di)
+                   _mm512_setzero_si512 (),
+                   __M);
+}
+
+#define _mm512_mask_min_round_pd(W, U, A, B, R) __extension__ ({ \
+  (__m512d)__builtin_ia32_minpd512_mask((__v8df)(__m512d)(A), \
+                                        (__v8df)(__m512d)(B), \
+                                        (__v8df)(__m512d)(W), (__mmask8)(U), \
+                                        (int)(R)); })
+
+#define _mm512_maskz_min_round_pd(U, A, B, R) __extension__ ({ \
+  (__m512d)__builtin_ia32_minpd512_mask((__v8df)(__m512d)(A), \
+                                        (__v8df)(__m512d)(B), \
+                                        (__v8df)_mm512_setzero_pd(), \
+                                        (__mmask8)(U), (int)(R)); })
+
+#define _mm512_min_round_pd(A, B, R) __extension__ ({ \
+  (__m512d)__builtin_ia32_minpd512_mask((__v8df)(__m512d)(A), \
+                                        (__v8df)(__m512d)(B), \
+                                        (__v8df)_mm512_undefined_pd(), \
+                                        (__mmask8)-1, (int)(R)); })
+
 static  __inline__ __m512d __DEFAULT_FN_ATTRS
 _mm512_min_pd(__m512d __A, __m512d __B)
 {
@@ -679,6 +1144,45 @@ _mm512_min_pd(__m512d __A, __m512d __B)
              _MM_FROUND_CUR_DIRECTION);
 }
 
+static __inline__ __m512d __DEFAULT_FN_ATTRS
+_mm512_mask_min_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
+{
+  return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
+                  (__v8df) __B,
+                  (__v8df) __W,
+                  (__mmask8) __U,
+                  _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm512_mask_min_round_ps(W, U, A, B, R) __extension__ ({ \
+  (__m512)__builtin_ia32_minps512_mask((__v16sf)(__m512)(A), \
+                                       (__v16sf)(__m512)(B), \
+                                       (__v16sf)(__m512)(W), (__mmask16)(U), \
+                                       (int)(R)); })
+
+#define _mm512_maskz_min_round_ps(U, A, B, R) __extension__ ({ \
+  (__m512)__builtin_ia32_minps512_mask((__v16sf)(__m512)(A), \
+                                       (__v16sf)(__m512)(B), \
+                                       (__v16sf)_mm512_setzero_ps(), \
+                                       (__mmask16)(U), (int)(R)); })
+
+#define _mm512_min_round_ps(A, B, R) __extension__ ({ \
+  (__m512)__builtin_ia32_minps512_mask((__v16sf)(__m512)(A), \
+                                       (__v16sf)(__m512)(B), \
+                                       (__v16sf)_mm512_undefined_ps(), \
+                                       (__mmask16)-1, (int)(R)); })
+
+static __inline__ __m512d __DEFAULT_FN_ATTRS
+_mm512_maskz_min_pd (__mmask8 __U, __m512d __A, __m512d __B)
+{
+  return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
+                  (__v8df) __B,
+                  (__v8df)
+                  _mm512_setzero_pd (),
+                  (__mmask8) __U,
+                  _MM_FROUND_CUR_DIRECTION);
+}
+
 static  __inline__ __m512 __DEFAULT_FN_ATTRS
 _mm512_min_ps(__m512 __A, __m512 __B)
 {
@@ -690,9 +1194,30 @@ _mm512_min_ps(__m512 __A, __m512 __B)
             _MM_FROUND_CUR_DIRECTION);
 }
 
+static __inline__ __m512 __DEFAULT_FN_ATTRS
+_mm512_mask_min_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
+{
+  return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
+                 (__v16sf) __B,
+                 (__v16sf) __W,
+                 (__mmask16) __U,
+                 _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512 __DEFAULT_FN_ATTRS
+_mm512_maskz_min_ps (__mmask16 __U, __m512 __A, __m512 __B)
+{
+  return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
+                 (__v16sf) __B,
+                 (__v16sf)
+                 _mm512_setzero_ps (),
+                 (__mmask16) __U,
+                 _MM_FROUND_CUR_DIRECTION);
+}
+
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_mask_min_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
-  return (__m128) __builtin_ia32_minss_round ((__v4sf) __A,
+  return (__m128) __builtin_ia32_minss_round_mask ((__v4sf) __A,
                 (__v4sf) __B,
                 (__v4sf) __W,
                 (__mmask8) __U,
@@ -701,28 +1226,34 @@ _mm_mask_min_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
 
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_maskz_min_ss(__mmask8 __U,__m128 __A, __m128 __B) {
-  return (__m128) __builtin_ia32_minss_round ((__v4sf) __A,
+  return (__m128) __builtin_ia32_minss_round_mask ((__v4sf) __A,
                 (__v4sf) __B,
                 (__v4sf)  _mm_setzero_ps (),
                 (__mmask8) __U,
                 _MM_FROUND_CUR_DIRECTION);
 }
 
-#define _mm_min_round_ss(__A, __B, __R) __extension__ ({ \
-  (__m128) __builtin_ia32_minss_round ((__v4sf) __A, (__v4sf) __B, \
-                (__v4sf) _mm_setzero_ps(), (__mmask8) -1, __R); })
+#define _mm_min_round_ss(A, B, R) __extension__ ({ \
+  (__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \
+                                          (__v4sf)(__m128)(B), \
+                                          (__v4sf)_mm_setzero_ps(), \
+                                          (__mmask8)-1, (int)(R)); })
 
-#define _mm_mask_min_round_ss(__W, __U, __A, __B, __R) __extension__ ({ \
-  (__m128) __builtin_ia32_minss_round ((__v4sf) __A, (__v4sf) __B, \
-                (__v4sf)  __W, (__mmask8) __U,__R); })
+#define _mm_mask_min_round_ss(W, U, A, B, R) __extension__ ({ \
+  (__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \
+                                          (__v4sf)(__m128)(B), \
+                                          (__v4sf)(__m128)(W), (__mmask8)(U), \
+                                          (int)(R)); })
 
-#define _mm_maskz_min_round_ss(__U, __A, __B, __R) __extension__ ({ \
-  (__m128) __builtin_ia32_minss_round ((__v4sf) __A, (__v4sf) __B, \
-                (__v4sf)  _mm_setzero_ps(), (__mmask8) __U,__R); })
+#define _mm_maskz_min_round_ss(U, A, B, R) __extension__ ({ \
+  (__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \
+                                          (__v4sf)(__m128)(B), \
+                                          (__v4sf)_mm_setzero_ps(), \
+                                          (__mmask8)(U), (int)(R)); })
 
 static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_mask_min_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
-  return (__m128d) __builtin_ia32_minsd_round ((__v2df) __A,
+  return (__m128d) __builtin_ia32_minsd_round_mask ((__v2df) __A,
                 (__v2df) __B,
                 (__v2df) __W,
                 (__mmask8) __U,
@@ -731,24 +1262,30 @@ _mm_mask_min_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
 
 static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_maskz_min_sd(__mmask8 __U,__m128d __A, __m128d __B) {
-  return (__m128d) __builtin_ia32_minsd_round ((__v2df) __A,
+  return (__m128d) __builtin_ia32_minsd_round_mask ((__v2df) __A,
                 (__v2df) __B,
                 (__v2df)  _mm_setzero_pd (),
                 (__mmask8) __U,
                 _MM_FROUND_CUR_DIRECTION);
 }
 
-#define _mm_min_round_sd(__A, __B, __R) __extension__ ({ \
-  (__m128d) __builtin_ia32_minsd_round ((__v2df) __A, (__v2df) __B, \
-                (__v2df) _mm_setzero_pd(), (__mmask8) -1, __R); })
+#define _mm_min_round_sd(A, B, R) __extension__ ({ \
+  (__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \
+                                           (__v2df)(__m128d)(B), \
+                                           (__v2df)_mm_setzero_pd(), \
+                                           (__mmask8)-1, (int)(R)); })
 
-#define _mm_mask_min_round_sd(__W, __U, __A, __B, __R) __extension__ ({ \
-  (__m128d) __builtin_ia32_minsd_round ((__v2df) __A, (__v2df) __B, \
-                (__v2df)  __W, (__mmask8) __U,__R); })
+#define _mm_mask_min_round_sd(W, U, A, B, R) __extension__ ({ \
+  (__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \
+                                           (__v2df)(__m128d)(B), \
+                                           (__v2df)(__m128d)(W), \
+                                           (__mmask8)(U), (int)(R)); })
 
-#define _mm_maskz_min_round_sd(__U, __A, __B, __R) __extension__ ({ \
-  (__m128d) __builtin_ia32_minsd_round ((__v2df) __A, (__v2df) __B, \
-                (__v2df)  _mm_setzero_pd(), (__mmask8) __U,__R); })
+#define _mm_maskz_min_round_sd(U, A, B, R) __extension__ ({ \
+  (__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \
+                                           (__v2df)(__m128d)(B), \
+                                           (__v2df)_mm_setzero_pd(), \
+                                           (__mmask8)(U), (int)(R)); })
 
 static __inline __m512i
 __DEFAULT_FN_ATTRS
@@ -761,6 +1298,24 @@ _mm512_min_epi32(__m512i __A, __m512i __B)
               (__mmask16) -1);
 }
 
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_min_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
+                   (__v16si) __B,
+                   (__v16si) __W, __M);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_min_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
+                   (__v16si) __B,
+                   (__v16si)
+                   _mm512_setzero_si512 (),
+                   __M);
+}
+
 static __inline __m512i __DEFAULT_FN_ATTRS
 _mm512_min_epu32(__m512i __A, __m512i __B)
 {
@@ -771,6 +1326,24 @@ _mm512_min_epu32(__m512i __A, __m512i __B)
               (__mmask16) -1);
 }
 
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_min_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
+                   (__v16si) __B,
+                   (__v16si) __W, __M);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_min_epu32 (__mmask16 __M, __m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
+                   (__v16si) __B,
+                   (__v16si)
+                   _mm512_setzero_si512 (),
+                   __M);
+}
+
 static __inline __m512i __DEFAULT_FN_ATTRS
 _mm512_min_epi64(__m512i __A, __m512i __B)
 {
@@ -781,6 +1354,24 @@ _mm512_min_epi64(__m512i __A, __m512i __B)
               (__mmask8) -1);
 }
 
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_min_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
+                   (__v8di) __B,
+                   (__v8di) __W, __M);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_min_epi64 (__mmask8 __M, __m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
+                   (__v8di) __B,
+                   (__v8di)
+                   _mm512_setzero_si512 (),
+                   __M);
+}
+
 static __inline __m512i __DEFAULT_FN_ATTRS
 _mm512_min_epu64(__m512i __A, __m512i __B)
 {
@@ -791,6 +1382,24 @@ _mm512_min_epu64(__m512i __A, __m512i __B)
               (__mmask8) -1);
 }
 
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_min_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
+                   (__v8di) __B,
+                   (__v8di) __W, __M);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_min_epu64 (__mmask8 __M, __m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
+                   (__v8di) __B,
+                   (__v8di)
+                   _mm512_setzero_si512 (),
+                   __M);
+}
+
 static __inline __m512i __DEFAULT_FN_ATTRS
 _mm512_mul_epi32(__m512i __X, __m512i __Y)
 {
@@ -850,7 +1459,7 @@ _mm512_maskz_mul_epu32 (__mmask8 __M, __m512i __X, __m512i __Y)
 static __inline __m512i __DEFAULT_FN_ATTRS
 _mm512_mullo_epi32 (__m512i __A, __m512i __B)
 {
-  return (__m512i) ((__v16si) __A * (__v16si) __B);
+  return (__m512i) ((__v16su) __A * (__v16su) __B);
 }
 
 static __inline __m512i __DEFAULT_FN_ATTRS
@@ -871,6 +1480,21 @@ _mm512_mask_mullo_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
               (__v16si) __W, __M);
 }
 
+#define _mm512_mask_sqrt_round_pd(W, U, A, R) __extension__ ({ \
+  (__m512d)__builtin_ia32_sqrtpd512_mask((__v8df)(__m512d)(A), \
+                                         (__v8df)(__m512d)(W), (__mmask8)(U), \
+                                         (int)(R)); })
+
+#define _mm512_maskz_sqrt_round_pd(U, A, R) __extension__ ({ \
+  (__m512d)__builtin_ia32_sqrtpd512_mask((__v8df)(__m512d)(A), \
+                                         (__v8df)_mm512_setzero_pd(), \
+                                         (__mmask8)(U), (int)(R)); })
+
+#define _mm512_sqrt_round_pd(A, R) __extension__ ({ \
+  (__m512d)__builtin_ia32_sqrtpd512_mask((__v8df)(__m512d)(A), \
+                                         (__v8df)_mm512_undefined_pd(), \
+                                         (__mmask8)-1, (int)(R)); })
+
 static  __inline__ __m512d __DEFAULT_FN_ATTRS
 _mm512_sqrt_pd(__m512d __a)
 {
@@ -880,6 +1504,40 @@ _mm512_sqrt_pd(__m512d __a)
                                                 _MM_FROUND_CUR_DIRECTION);
 }
 
+static __inline__ __m512d __DEFAULT_FN_ATTRS
+_mm512_mask_sqrt_pd (__m512d __W, __mmask8 __U, __m512d __A)
+{
+  return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
+                   (__v8df) __W,
+                   (__mmask8) __U,
+                   _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512d __DEFAULT_FN_ATTRS
+_mm512_maskz_sqrt_pd (__mmask8 __U, __m512d __A)
+{
+  return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
+                   (__v8df)
+                   _mm512_setzero_pd (),
+                   (__mmask8) __U,
+                   _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm512_mask_sqrt_round_ps(W, U, A, R) __extension__ ({ \
+  (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)(__m512)(A), \
+                                        (__v16sf)(__m512)(W), (__mmask16)(U), \
+                                        (int)(R)); })
+
+#define _mm512_maskz_sqrt_round_ps(U, A, R) __extension__ ({ \
+  (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)(__m512)(A), \
+                                        (__v16sf)_mm512_setzero_ps(), \
+                                        (__mmask16)(U), (int)(R)); })
+
+#define _mm512_sqrt_round_ps(A, R) __extension__ ({ \
+  (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)(__m512)(A), \
+                                        (__v16sf)_mm512_undefined_ps(), \
+                                        (__mmask16)-1, (int)(R)); })
+
 static  __inline__ __m512 __DEFAULT_FN_ATTRS
 _mm512_sqrt_ps(__m512 __a)
 {
@@ -889,6 +1547,24 @@ _mm512_sqrt_ps(__m512 __a)
                                                _MM_FROUND_CUR_DIRECTION);
 }
 
+static  __inline__ __m512 __DEFAULT_FN_ATTRS
+_mm512_mask_sqrt_ps(__m512 __W, __mmask16 __U, __m512 __A)
+{
+  return (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)__A,
+                                               (__v16sf) __W,
+                                               (__mmask16) __U,
+                                               _MM_FROUND_CUR_DIRECTION);
+}
+
+static  __inline__ __m512 __DEFAULT_FN_ATTRS
+_mm512_maskz_sqrt_ps( __mmask16 __U, __m512 __A)
+{
+  return (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)__A,
+                                               (__v16sf) _mm512_setzero_ps (),
+                                               (__mmask16) __U,
+                                               _MM_FROUND_CUR_DIRECTION);
+}
+
 static  __inline__ __m512d __DEFAULT_FN_ATTRS
 _mm512_rsqrt14_pd(__m512d __A)
 {
@@ -897,6 +1573,23 @@ _mm512_rsqrt14_pd(__m512d __A)
                  _mm512_setzero_pd (),
                  (__mmask8) -1);}
 
+static __inline__ __m512d __DEFAULT_FN_ATTRS
+_mm512_mask_rsqrt14_pd (__m512d __W, __mmask8 __U, __m512d __A)
+{
+  return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
+                  (__v8df) __W,
+                  (__mmask8) __U);
+}
+
+static __inline__ __m512d __DEFAULT_FN_ATTRS
+_mm512_maskz_rsqrt14_pd (__mmask8 __U, __m512d __A)
+{
+  return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
+                  (__v8df)
+                  _mm512_setzero_pd (),
+                  (__mmask8) __U);
+}
+
 static  __inline__ __m512 __DEFAULT_FN_ATTRS
 _mm512_rsqrt14_ps(__m512 __A)
 {
@@ -906,26 +1599,79 @@ _mm512_rsqrt14_ps(__m512 __A)
                 (__mmask16) -1);
 }
 
+static __inline__ __m512 __DEFAULT_FN_ATTRS
+_mm512_mask_rsqrt14_ps (__m512 __W, __mmask16 __U, __m512 __A)
+{
+  return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
+                 (__v16sf) __W,
+                 (__mmask16) __U);
+}
+
+static __inline__ __m512 __DEFAULT_FN_ATTRS
+_mm512_maskz_rsqrt14_ps (__mmask16 __U, __m512 __A)
+{
+  return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
+                 (__v16sf)
+                 _mm512_setzero_ps (),
+                 (__mmask16) __U);
+}
+
 static  __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_rsqrt14_ss(__m128 __A, __m128 __B)
 {
-  return (__m128) __builtin_ia32_rsqrt14ss ((__v4sf) __A,
+  return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
              (__v4sf) __B,
              (__v4sf)
              _mm_setzero_ps (),
              (__mmask8) -1);
 }
 
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_mask_rsqrt14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
+          (__v4sf) __B,
+          (__v4sf) __W,
+          (__mmask8) __U);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_maskz_rsqrt14_ss (__mmask8 __U, __m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
+          (__v4sf) __B,
+          (__v4sf) _mm_setzero_ps (),
+          (__mmask8) __U);
+}
+
 static  __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_rsqrt14_sd(__m128d __A, __m128d __B)
 {
-  return (__m128d) __builtin_ia32_rsqrt14sd ((__v2df) __A,
+  return (__m128d) __builtin_ia32_rsqrt14sd_mask ((__v2df) __A,
               (__v2df) __B,
               (__v2df)
               _mm_setzero_pd (),
               (__mmask8) -1);
 }
 
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_mask_rsqrt14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_rsqrt14sd_mask ( (__v2df) __A,
+          (__v2df) __B,
+          (__v2df) __W,
+          (__mmask8) __U);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_maskz_rsqrt14_sd (__mmask8 __U, __m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_rsqrt14sd_mask ( (__v2df) __A,
+          (__v2df) __B,
+          (__v2df) _mm_setzero_pd (),
+          (__mmask8) __U);
+}
+
 static  __inline__ __m512d __DEFAULT_FN_ATTRS
 _mm512_rcp14_pd(__m512d __A)
 {
@@ -935,6 +1681,23 @@ _mm512_rcp14_pd(__m512d __A)
                (__mmask8) -1);
 }
 
+static __inline__ __m512d __DEFAULT_FN_ATTRS
+_mm512_mask_rcp14_pd (__m512d __W, __mmask8 __U, __m512d __A)
+{
+  return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
+                (__v8df) __W,
+                (__mmask8) __U);
+}
+
+static __inline__ __m512d __DEFAULT_FN_ATTRS
+_mm512_maskz_rcp14_pd (__mmask8 __U, __m512d __A)
+{
+  return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
+                (__v8df)
+                _mm512_setzero_pd (),
+                (__mmask8) __U);
+}
+
 static  __inline__ __m512 __DEFAULT_FN_ATTRS
 _mm512_rcp14_ps(__m512 __A)
 {
@@ -943,26 +1706,80 @@ _mm512_rcp14_ps(__m512 __A)
               _mm512_setzero_ps (),
               (__mmask16) -1);
 }
+
+static __inline__ __m512 __DEFAULT_FN_ATTRS
+_mm512_mask_rcp14_ps (__m512 __W, __mmask16 __U, __m512 __A)
+{
+  return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
+                   (__v16sf) __W,
+                   (__mmask16) __U);
+}
+
+static __inline__ __m512 __DEFAULT_FN_ATTRS
+_mm512_maskz_rcp14_ps (__mmask16 __U, __m512 __A)
+{
+  return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
+                   (__v16sf)
+                   _mm512_setzero_ps (),
+                   (__mmask16) __U);
+}
+
 static  __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_rcp14_ss(__m128 __A, __m128 __B)
 {
-  return (__m128) __builtin_ia32_rcp14ss ((__v4sf) __A,
+  return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
                  (__v4sf) __B,
                  (__v4sf)
                  _mm_setzero_ps (),
                  (__mmask8) -1);
 }
 
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_mask_rcp14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
+          (__v4sf) __B,
+          (__v4sf) __W,
+          (__mmask8) __U);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_maskz_rcp14_ss (__mmask8 __U, __m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
+          (__v4sf) __B,
+          (__v4sf) _mm_setzero_ps (),
+          (__mmask8) __U);
+}
+
 static  __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_rcp14_sd(__m128d __A, __m128d __B)
 {
-  return (__m128d) __builtin_ia32_rcp14sd ((__v2df) __A,
+  return (__m128d) __builtin_ia32_rcp14sd_mask ((__v2df) __A,
             (__v2df) __B,
             (__v2df)
             _mm_setzero_pd (),
             (__mmask8) -1);
 }
 
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_mask_rcp14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_rcp14sd_mask ( (__v2df) __A,
+          (__v2df) __B,
+          (__v2df) __W,
+          (__mmask8) __U);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_maskz_rcp14_sd (__mmask8 __U, __m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_rcp14sd_mask ( (__v2df) __A,
+          (__v2df) __B,
+          (__v2df) _mm_setzero_pd (),
+          (__mmask8) __U);
+}
+
 static __inline __m512 __DEFAULT_FN_ATTRS
 _mm512_floor_ps(__m512 __A)
 {
@@ -972,6 +1789,15 @@ _mm512_floor_ps(__m512 __A)
                                                   _MM_FROUND_CUR_DIRECTION);
 }
 
+static __inline__ __m512 __DEFAULT_FN_ATTRS
+_mm512_mask_floor_ps (__m512 __W, __mmask16 __U, __m512 __A)
+{
+  return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
+                   _MM_FROUND_FLOOR,
+                   (__v16sf) __W, __U,
+                   _MM_FROUND_CUR_DIRECTION);
+}
+
 static __inline __m512d __DEFAULT_FN_ATTRS
 _mm512_floor_pd(__m512d __A)
 {
@@ -981,6 +1807,24 @@ _mm512_floor_pd(__m512d __A)
                                                    _MM_FROUND_CUR_DIRECTION);
 }
 
+static __inline__ __m512d __DEFAULT_FN_ATTRS
+_mm512_mask_floor_pd (__m512d __W, __mmask8 __U, __m512d __A)
+{
+  return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
+                _MM_FROUND_FLOOR,
+                (__v8df) __W, __U,
+                _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512 __DEFAULT_FN_ATTRS
+_mm512_mask_ceil_ps (__m512 __W, __mmask16 __U, __m512 __A)
+{
+  return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
+                   _MM_FROUND_CEIL,
+                   (__v16sf) __W, __U,
+                   _MM_FROUND_CUR_DIRECTION);
+}
+
 static __inline __m512 __DEFAULT_FN_ATTRS
 _mm512_ceil_ps(__m512 __A)
 {
@@ -999,6 +1843,15 @@ _mm512_ceil_pd(__m512d __A)
                                                    _MM_FROUND_CUR_DIRECTION);
 }
 
+static __inline__ __m512d __DEFAULT_FN_ATTRS
+_mm512_mask_ceil_pd (__m512d __W, __mmask8 __U, __m512d __A)
+{
+  return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
+                _MM_FROUND_CEIL,
+                (__v8df) __W, __U,
+                _MM_FROUND_CUR_DIRECTION);
+}
+
 static __inline __m512i __DEFAULT_FN_ATTRS
 _mm512_abs_epi64(__m512i __A)
 {
@@ -1008,6 +1861,23 @@ _mm512_abs_epi64(__m512i __A)
              (__mmask8) -1);
 }
 
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_abs_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
+{
+  return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
+                  (__v8di) __W,
+                  (__mmask8) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_abs_epi64 (__mmask8 __U, __m512i __A)
+{
+  return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
+                  (__v8di)
+                  _mm512_setzero_si512 (),
+                  (__mmask8) __U);
+}
+
 static __inline __m512i __DEFAULT_FN_ATTRS
 _mm512_abs_epi32(__m512i __A)
 {
@@ -1017,9 +1887,26 @@ _mm512_abs_epi32(__m512i __A)
              (__mmask16) -1);
 }
 
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_abs_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
+{
+  return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
+                  (__v16si) __W,
+                  (__mmask16) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_abs_epi32 (__mmask16 __U, __m512i __A)
+{
+  return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
+                  (__v16si)
+                  _mm512_setzero_si512 (),
+                  (__mmask16) __U);
+}
+
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_mask_add_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
-  return (__m128) __builtin_ia32_addss_round ((__v4sf) __A,
+  return (__m128) __builtin_ia32_addss_round_mask ((__v4sf) __A,
                 (__v4sf) __B,
                 (__v4sf) __W,
                 (__mmask8) __U,
@@ -1028,28 +1915,34 @@ _mm_mask_add_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
 
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_maskz_add_ss(__mmask8 __U,__m128 __A, __m128 __B) {
-  return (__m128) __builtin_ia32_addss_round ((__v4sf) __A,
+  return (__m128) __builtin_ia32_addss_round_mask ((__v4sf) __A,
                 (__v4sf) __B,
                 (__v4sf)  _mm_setzero_ps (),
                 (__mmask8) __U,
                 _MM_FROUND_CUR_DIRECTION);
 }
 
-#define _mm_add_round_ss(__A, __B, __R) __extension__ ({ \
-  (__m128) __builtin_ia32_addss_round ((__v4sf) __A, (__v4sf) __B, \
-                (__v4sf) _mm_setzero_ps(), (__mmask8) -1, __R); })
+#define _mm_add_round_ss(A, B, R) __extension__ ({ \
+  (__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \
+                                          (__v4sf)(__m128)(B), \
+                                          (__v4sf)_mm_setzero_ps(), \
+                                          (__mmask8)-1, (int)(R)); })
 
-#define _mm_mask_add_round_ss(__W, __U, __A, __B, __R) __extension__ ({ \
-  (__m128) __builtin_ia32_addss_round ((__v4sf) __A, (__v4sf) __B, \
-                (__v4sf)  __W, (__mmask8) __U,__R); })
+#define _mm_mask_add_round_ss(W, U, A, B, R) __extension__ ({ \
+  (__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \
+                                          (__v4sf)(__m128)(B), \
+                                          (__v4sf)(__m128)(W), (__mmask8)(U), \
+                                          (int)(R)); })
 
-#define _mm_maskz_add_round_ss(__U, __A, __B, __R) __extension__ ({ \
-  (__m128) __builtin_ia32_addss_round ((__v4sf) __A, (__v4sf) __B, \
-                (__v4sf)  _mm_setzero_ps(), (__mmask8) __U,__R); })
+#define _mm_maskz_add_round_ss(U, A, B, R) __extension__ ({ \
+  (__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \
+                                          (__v4sf)(__m128)(B), \
+                                          (__v4sf)_mm_setzero_ps(), \
+                                          (__mmask8)(U), (int)(R)); })
 
 static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_mask_add_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
-  return (__m128d) __builtin_ia32_addsd_round ((__v2df) __A,
+  return (__m128d) __builtin_ia32_addsd_round_mask ((__v2df) __A,
                 (__v2df) __B,
                 (__v2df) __W,
                 (__mmask8) __U,
@@ -1058,23 +1951,29 @@ _mm_mask_add_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
 
 static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_maskz_add_sd(__mmask8 __U,__m128d __A, __m128d __B) {
-  return (__m128d) __builtin_ia32_addsd_round ((__v2df) __A,
+  return (__m128d) __builtin_ia32_addsd_round_mask ((__v2df) __A,
                 (__v2df) __B,
                 (__v2df)  _mm_setzero_pd (),
                 (__mmask8) __U,
                 _MM_FROUND_CUR_DIRECTION);
 }
-#define _mm_add_round_sd(__A, __B, __R) __extension__ ({ \
-  (__m128d) __builtin_ia32_addsd_round ((__v2df) __A, (__v2df) __B, \
-                (__v2df) _mm_setzero_pd(), (__mmask8) -1, __R); })
+#define _mm_add_round_sd(A, B, R) __extension__ ({ \
+  (__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \
+                                           (__v2df)(__m128d)(B), \
+                                           (__v2df)_mm_setzero_pd(), \
+                                           (__mmask8)-1, (int)(R)); })
 
-#define _mm_mask_add_round_sd(__W, __U, __A, __B, __R) __extension__ ({ \
-  (__m128d) __builtin_ia32_addsd_round ((__v2df) __A, (__v2df) __B, \
-                (__v2df)  __W, (__mmask8) __U,__R); })
+#define _mm_mask_add_round_sd(W, U, A, B, R) __extension__ ({ \
+  (__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \
+                                           (__v2df)(__m128d)(B), \
+                                           (__v2df)(__m128d)(W), \
+                                           (__mmask8)(U), (int)(R)); })
 
-#define _mm_maskz_add_round_sd(__U, __A, __B, __R) __extension__ ({ \
-  (__m128d) __builtin_ia32_addsd_round ((__v2df) __A, (__v2df) __B, \
-                (__v2df)  _mm_setzero_pd(), (__mmask8) __U,__R); })
+#define _mm_maskz_add_round_sd(U, A, B, R) __extension__ ({ \
+  (__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \
+                                           (__v2df)(__m128d)(B), \
+                                           (__v2df)_mm_setzero_pd(), \
+                                           (__mmask8)(U), (int)(R)); })
 
 static __inline__ __m512d __DEFAULT_FN_ATTRS
 _mm512_mask_add_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
@@ -1112,33 +2011,45 @@ _mm512_maskz_add_ps(__mmask16 __U, __m512 __A, __m512 __B) {
             _MM_FROUND_CUR_DIRECTION);
 }
 
-#define _mm512_add_round_pd(__A, __B, __R) __extension__ ({ \
-  (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A, (__v8df) __B, \
-               (__v8df) _mm512_setzero_pd(), (__mmask8) -1, __R); })
-
-#define _mm512_mask_add_round_pd(__W, __U, __A, __B, __R) __extension__ ({ \
-  (__m512d) __builtin_ia32_addpd512_mask((__v8df) __A, (__v8df) __B, \
-                (__v8df) __W, (__mmask8) __U, __R); })
-
-#define _mm512_maskz_add_round_pd(__U, __A, __B, __R) __extension__ ({ \
-  (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A, (__v8df) __B, \
-                (__v8df) _mm512_setzero_pd(), (__mmask8) __U, __R); })
-
-#define _mm512_add_round_ps(__A, __B, __R) __extension__ ({ \
-  (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A, (__v16sf) __B, \
-                (__v16sf) _mm512_setzero_ps(), (__mmask16) -1, __R); })
-
-#define _mm512_mask_add_round_ps(__W, __U, __A, __B, __R) __extension__ ({ \
-  (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A, (__v16sf) __B, \
-                (__v16sf) __W, (__mmask16)__U, __R); })
-
-#define _mm512_maskz_add_round_ps(__U, __A, __B, __R) __extension__ ({ \
-  (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A, (__v16sf) __B, \
-                (__v16sf) _mm512_setzero_ps(), (__mmask16)__U, __R); })
+#define _mm512_add_round_pd(A, B, R) __extension__ ({ \
+  (__m512d)__builtin_ia32_addpd512_mask((__v8df)(__m512d)(A), \
+                                        (__v8df)(__m512d)(B), \
+                                        (__v8df)_mm512_setzero_pd(), \
+                                        (__mmask8)-1, (int)(R)); })
+
+#define _mm512_mask_add_round_pd(W, U, A, B, R) __extension__ ({ \
+  (__m512d)__builtin_ia32_addpd512_mask((__v8df)(__m512d)(A), \
+                                        (__v8df)(__m512d)(B), \
+                                        (__v8df)(__m512d)(W), (__mmask8)(U), \
+                                        (int)(R)); })
+
+#define _mm512_maskz_add_round_pd(U, A, B, R) __extension__ ({ \
+  (__m512d)__builtin_ia32_addpd512_mask((__v8df)(__m512d)(A), \
+                                        (__v8df)(__m512d)(B), \
+                                        (__v8df)_mm512_setzero_pd(), \
+                                        (__mmask8)(U), (int)(R)); })
+
+#define _mm512_add_round_ps(A, B, R) __extension__ ({ \
+  (__m512)__builtin_ia32_addps512_mask((__v16sf)(__m512)(A), \
+                                       (__v16sf)(__m512)(B), \
+                                       (__v16sf)_mm512_setzero_ps(), \
+                                       (__mmask16)-1, (int)(R)); })
+
+#define _mm512_mask_add_round_ps(W, U, A, B, R) __extension__ ({ \
+  (__m512)__builtin_ia32_addps512_mask((__v16sf)(__m512)(A), \
+                                       (__v16sf)(__m512)(B), \
+                                       (__v16sf)(__m512)(W), (__mmask16)(U), \
+                                       (int)(R)); })
+
+#define _mm512_maskz_add_round_ps(U, A, B, R) __extension__ ({ \
+  (__m512)__builtin_ia32_addps512_mask((__v16sf)(__m512)(A), \
+                                       (__v16sf)(__m512)(B), \
+                                       (__v16sf)_mm512_setzero_ps(), \
+                                       (__mmask16)(U), (int)(R)); })
 
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_mask_sub_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
-  return (__m128) __builtin_ia32_subss_round ((__v4sf) __A,
+  return (__m128) __builtin_ia32_subss_round_mask ((__v4sf) __A,
                 (__v4sf) __B,
                 (__v4sf) __W,
                 (__mmask8) __U,
@@ -1147,27 +2058,33 @@ _mm_mask_sub_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
 
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_maskz_sub_ss(__mmask8 __U,__m128 __A, __m128 __B) {
-  return (__m128) __builtin_ia32_subss_round ((__v4sf) __A,
+  return (__m128) __builtin_ia32_subss_round_mask ((__v4sf) __A,
                 (__v4sf) __B,
                 (__v4sf)  _mm_setzero_ps (),
                 (__mmask8) __U,
                 _MM_FROUND_CUR_DIRECTION);
 }
-#define _mm_sub_round_ss(__A, __B, __R) __extension__ ({ \
-  (__m128) __builtin_ia32_subss_round ((__v4sf) __A, (__v4sf) __B, \
-                (__v4sf) _mm_setzero_ps(), (__mmask8) -1, __R); })
+#define _mm_sub_round_ss(A, B, R) __extension__ ({ \
+  (__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \
+                                          (__v4sf)(__m128)(B), \
+                                          (__v4sf)_mm_setzero_ps(), \
+                                          (__mmask8)-1, (int)(R)); })
 
-#define _mm_mask_sub_round_ss(__W, __U, __A, __B, __R) __extension__ ({ \
-  (__m128) __builtin_ia32_subss_round ((__v4sf) __A, (__v4sf) __B, \
-                (__v4sf)  __W, (__mmask8) __U,__R); })
+#define _mm_mask_sub_round_ss(W, U, A, B, R) __extension__ ({ \
+  (__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \
+                                          (__v4sf)(__m128)(B), \
+                                          (__v4sf)(__m128)(W), (__mmask8)(U), \
+                                          (int)(R)); })
 
-#define _mm_maskz_sub_round_ss(__U, __A, __B, __R) __extension__ ({ \
-  (__m128) __builtin_ia32_subss_round ((__v4sf) __A, (__v4sf) __B, \
-                (__v4sf)  _mm_setzero_ps(), (__mmask8) __U,__R); })
+#define _mm_maskz_sub_round_ss(U, A, B, R) __extension__ ({ \
+  (__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \
+                                          (__v4sf)(__m128)(B), \
+                                          (__v4sf)_mm_setzero_ps(), \
+                                          (__mmask8)(U), (int)(R)); })
 
 static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_mask_sub_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
-  return (__m128d) __builtin_ia32_subsd_round ((__v2df) __A,
+  return (__m128d) __builtin_ia32_subsd_round_mask ((__v2df) __A,
                 (__v2df) __B,
                 (__v2df) __W,
                 (__mmask8) __U,
@@ -1176,24 +2093,30 @@ _mm_mask_sub_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
 
 static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_maskz_sub_sd(__mmask8 __U,__m128d __A, __m128d __B) {
-  return (__m128d) __builtin_ia32_subsd_round ((__v2df) __A,
+  return (__m128d) __builtin_ia32_subsd_round_mask ((__v2df) __A,
                 (__v2df) __B,
                 (__v2df)  _mm_setzero_pd (),
                 (__mmask8) __U,
                 _MM_FROUND_CUR_DIRECTION);
 }
 
-#define _mm_sub_round_sd(__A, __B, __R) __extension__ ({ \
-  (__m128d) __builtin_ia32_subsd_round ((__v2df) __A, (__v2df) __B, \
-                (__v2df) _mm_setzero_pd(), (__mmask8) -1, __R); })
+#define _mm_sub_round_sd(A, B, R) __extension__ ({ \
+  (__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \
+                                           (__v2df)(__m128d)(B), \
+                                           (__v2df)_mm_setzero_pd(), \
+                                           (__mmask8)-1, (int)(R)); })
 
-#define _mm_mask_sub_round_sd(__W, __U, __A, __B, __R) __extension__ ({ \
-  (__m128d) __builtin_ia32_subsd_round ((__v2df) __A, (__v2df) __B, \
-                (__v2df)  __W, (__mmask8) __U,__R); })
+#define _mm_mask_sub_round_sd(W, U, A, B, R) __extension__ ({ \
+  (__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \
+                                           (__v2df)(__m128d)(B), \
+                                           (__v2df)(__m128d)(W), \
+                                           (__mmask8)(U), (int)(R)); })
 
-#define _mm_maskz_sub_round_sd(__U, __A, __B, __R) __extension__ ({ \
-  (__m128d) __builtin_ia32_subsd_round ((__v2df) __A, (__v2df) __B, \
-                (__v2df)  _mm_setzero_pd(), (__mmask8) __U,__R); })
+#define _mm_maskz_sub_round_sd(U, A, B, R) __extension__ ({ \
+  (__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \
+                                           (__v2df)(__m128d)(B), \
+                                           (__v2df)_mm_setzero_pd(), \
+                                           (__mmask8)(U), (int)(R)); })
 
 static __inline__ __m512d __DEFAULT_FN_ATTRS
 _mm512_mask_sub_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
@@ -1233,33 +2156,45 @@ _mm512_maskz_sub_ps(__mmask16 __U, __m512 __A, __m512 __B) {
             _MM_FROUND_CUR_DIRECTION);
 }
 
-#define _mm512_sub_round_pd(__A, __B, __R) __extension__ ({ \
-  (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A, (__v8df) __B,\
-             (__v8df) _mm512_setzero_pd(), (__mmask8) -1, __R); })
-
-#define _mm512_mask_sub_round_pd(__W, __U, __A, __B, __R) __extension__ ({ \
-  (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A, (__v8df) __B, \
-             (__v8df) __W, (__mmask8) __U, __R); })
-
-#define _mm512_maskz_sub_round_pd(__U, __A, __B, __R) __extension__ ({ \
-   (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A, (__v8df) __B, \
-             (__v8df) _mm512_setzero_pd(), (__mmask8) __U, __R);})
-
-#define _mm512_sub_round_ps(__A, __B, __R) __extension__ ({ \
-  (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A, (__v16sf) __B, \
-            (__v16sf) _mm512_setzero_ps (), (__mmask16) -1, __R);})
-
-#define _mm512_mask_sub_round_ps(__W, __U, __A, __B, __R)  __extension__ ({ \
-  (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A, (__v16sf) __B, \
-            (__v16sf) __W, (__mmask16) __U, __R); });
-
-#define _mm512_maskz_sub_round_ps(__U, __A, __B, __R)  __extension__ ({ \
-  (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A, (__v16sf) __B, \
-            (__v16sf) _mm512_setzero_ps (), (__mmask16) __U, __R);});
+#define _mm512_sub_round_pd(A, B, R) __extension__ ({ \
+  (__m512d)__builtin_ia32_subpd512_mask((__v8df)(__m512d)(A), \
+                                        (__v8df)(__m512d)(B), \
+                                        (__v8df)_mm512_setzero_pd(), \
+                                        (__mmask8)-1, (int)(R)); })
+
+#define _mm512_mask_sub_round_pd(W, U, A, B, R) __extension__ ({ \
+  (__m512d)__builtin_ia32_subpd512_mask((__v8df)(__m512d)(A), \
+                                        (__v8df)(__m512d)(B), \
+                                        (__v8df)(__m512d)(W), (__mmask8)(U), \
+                                        (int)(R)); })
+
+#define _mm512_maskz_sub_round_pd(U, A, B, R) __extension__ ({ \
+  (__m512d)__builtin_ia32_subpd512_mask((__v8df)(__m512d)(A), \
+                                        (__v8df)(__m512d)(B), \
+                                        (__v8df)_mm512_setzero_pd(), \
+                                        (__mmask8)(U), (int)(R)); })
+
+#define _mm512_sub_round_ps(A, B, R) __extension__ ({ \
+  (__m512)__builtin_ia32_subps512_mask((__v16sf)(__m512)(A), \
+                                       (__v16sf)(__m512)(B), \
+                                       (__v16sf)_mm512_setzero_ps(), \
+                                       (__mmask16)-1, (int)(R)); })
+
+#define _mm512_mask_sub_round_ps(W, U, A, B, R)  __extension__ ({ \
+  (__m512)__builtin_ia32_subps512_mask((__v16sf)(__m512)(A), \
+                                       (__v16sf)(__m512)(B), \
+                                       (__v16sf)(__m512)(W), (__mmask16)(U), \
+                                       (int)(R)); });
+
+#define _mm512_maskz_sub_round_ps(U, A, B, R)  __extension__ ({ \
+  (__m512)__builtin_ia32_subps512_mask((__v16sf)(__m512)(A), \
+                                       (__v16sf)(__m512)(B), \
+                                       (__v16sf)_mm512_setzero_ps(), \
+                                       (__mmask16)(U), (int)(R)); });
 
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_mask_mul_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
-  return (__m128) __builtin_ia32_mulss_round ((__v4sf) __A,
+  return (__m128) __builtin_ia32_mulss_round_mask ((__v4sf) __A,
                 (__v4sf) __B,
                 (__v4sf) __W,
                 (__mmask8) __U,
@@ -1268,27 +2203,33 @@ _mm_mask_mul_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
 
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_maskz_mul_ss(__mmask8 __U,__m128 __A, __m128 __B) {
-  return (__m128) __builtin_ia32_mulss_round ((__v4sf) __A,
+  return (__m128) __builtin_ia32_mulss_round_mask ((__v4sf) __A,
                 (__v4sf) __B,
                 (__v4sf)  _mm_setzero_ps (),
                 (__mmask8) __U,
                 _MM_FROUND_CUR_DIRECTION);
 }
-#define _mm_mul_round_ss(__A, __B, __R) __extension__ ({ \
-  (__m128) __builtin_ia32_mulss_round ((__v4sf) __A, (__v4sf) __B, \
-                (__v4sf) _mm_setzero_ps(), (__mmask8) -1, __R); })
+#define _mm_mul_round_ss(A, B, R) __extension__ ({ \
+  (__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \
+                                          (__v4sf)(__m128)(B), \
+                                          (__v4sf)_mm_setzero_ps(), \
+                                          (__mmask8)-1, (int)(R)); })
 
-#define _mm_mask_mul_round_ss(__W, __U, __A, __B, __R) __extension__ ({ \
-  (__m128) __builtin_ia32_mulss_round ((__v4sf) __A, (__v4sf) __B, \
-                (__v4sf)  __W, (__mmask8) __U,__R); })
+#define _mm_mask_mul_round_ss(W, U, A, B, R) __extension__ ({ \
+  (__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \
+                                          (__v4sf)(__m128)(B), \
+                                          (__v4sf)(__m128)(W), (__mmask8)(U), \
+                                          (int)(R)); })
 
-#define _mm_maskz_mul_round_ss(__U, __A, __B, __R) __extension__ ({ \
-  (__m128) __builtin_ia32_mulss_round ((__v4sf) __A, (__v4sf) __B, \
-                (__v4sf)  _mm_setzero_ps(), (__mmask8) __U,__R); })
+#define _mm_maskz_mul_round_ss(U, A, B, R) __extension__ ({ \
+  (__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \
+                                          (__v4sf)(__m128)(B), \
+                                          (__v4sf)_mm_setzero_ps(), \
+                                          (__mmask8)(U), (int)(R)); })
 
 static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_mask_mul_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
-  return (__m128d) __builtin_ia32_mulsd_round ((__v2df) __A,
+  return (__m128d) __builtin_ia32_mulsd_round_mask ((__v2df) __A,
                 (__v2df) __B,
                 (__v2df) __W,
                 (__mmask8) __U,
@@ -1297,24 +2238,30 @@ _mm_mask_mul_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
 
 static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_maskz_mul_sd(__mmask8 __U,__m128d __A, __m128d __B) {
-  return (__m128d) __builtin_ia32_mulsd_round ((__v2df) __A,
+  return (__m128d) __builtin_ia32_mulsd_round_mask ((__v2df) __A,
                 (__v2df) __B,
                 (__v2df)  _mm_setzero_pd (),
                 (__mmask8) __U,
                 _MM_FROUND_CUR_DIRECTION);
 }
 
-#define _mm_mul_round_sd(__A, __B, __R) __extension__ ({ \
-  (__m128d) __builtin_ia32_mulsd_round ((__v2df) __A, (__v2df) __B, \
-                (__v2df) _mm_setzero_pd(), (__mmask8) -1, __R); })
+#define _mm_mul_round_sd(A, B, R) __extension__ ({ \
+  (__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \
+                                           (__v2df)(__m128d)(B), \
+                                           (__v2df)_mm_setzero_pd(), \
+                                           (__mmask8)-1, (int)(R)); })
 
-#define _mm_mask_mul_round_sd(__W, __U, __A, __B, __R) __extension__ ({ \
-  (__m128d) __builtin_ia32_mulsd_round ((__v2df) __A, (__v2df) __B, \
-                (__v2df)  __W, (__mmask8) __U,__R); })
+#define _mm_mask_mul_round_sd(W, U, A, B, R) __extension__ ({ \
+  (__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \
+                                           (__v2df)(__m128d)(B), \
+                                           (__v2df)(__m128d)(W), \
+                                           (__mmask8)(U), (int)(R)); })
 
-#define _mm_maskz_mul_round_sd(__U, __A, __B, __R) __extension__ ({ \
-  (__m128d) __builtin_ia32_mulsd_round ((__v2df) __A, (__v2df) __B, \
-                (__v2df)  _mm_setzero_pd(), (__mmask8) __U,__R); })
+#define _mm_maskz_mul_round_sd(U, A, B, R) __extension__ ({ \
+  (__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \
+                                           (__v2df)(__m128d)(B), \
+                                           (__v2df)_mm_setzero_pd(), \
+                                           (__mmask8)(U), (int)(R)); })
 
 static __inline__ __m512d __DEFAULT_FN_ATTRS
 _mm512_mask_mul_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
@@ -1354,33 +2301,45 @@ _mm512_maskz_mul_ps(__mmask16 __U, __m512 __A, __m512 __B) {
             _MM_FROUND_CUR_DIRECTION);
 }
 
-#define _mm512_mul_round_pd(__A, __B, __R) __extension__ ({ \
-  (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A, (__v8df) __B,\
-             (__v8df) _mm512_setzero_pd(), (__mmask8) -1, __R); })
-
-#define _mm512_mask_mul_round_pd(__W, __U, __A, __B, __R) __extension__ ({ \
-  (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A, (__v8df) __B, \
-             (__v8df) __W, (__mmask8) __U, __R); })
-
-#define _mm512_maskz_mul_round_pd(__U, __A, __B, __R) __extension__ ({ \
-   (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A, (__v8df) __B, \
-             (__v8df) _mm512_setzero_pd(), (__mmask8) __U, __R);})
-
-#define _mm512_mul_round_ps(__A, __B, __R) __extension__ ({ \
-  (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A, (__v16sf) __B, \
-            (__v16sf) _mm512_setzero_ps (), (__mmask16) -1, __R);})
-
-#define _mm512_mask_mul_round_ps(__W, __U, __A, __B, __R)  __extension__ ({ \
-  (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A, (__v16sf) __B, \
-            (__v16sf) __W, (__mmask16) __U, __R); });
-
-#define _mm512_maskz_mul_round_ps(__U, __A, __B, __R)  __extension__ ({ \
-  (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A, (__v16sf) __B, \
-            (__v16sf) _mm512_setzero_ps (), (__mmask16) __U, __R);});
+#define _mm512_mul_round_pd(A, B, R) __extension__ ({ \
+  (__m512d)__builtin_ia32_mulpd512_mask((__v8df)(__m512d)(A), \
+                                        (__v8df)(__m512d)(B), \
+                                        (__v8df)_mm512_setzero_pd(), \
+                                        (__mmask8)-1, (int)(R)); })
+
+#define _mm512_mask_mul_round_pd(W, U, A, B, R) __extension__ ({ \
+  (__m512d)__builtin_ia32_mulpd512_mask((__v8df)(__m512d)(A), \
+                                        (__v8df)(__m512d)(B), \
+                                        (__v8df)(__m512d)(W), (__mmask8)(U), \
+                                        (int)(R)); })
+
+#define _mm512_maskz_mul_round_pd(U, A, B, R) __extension__ ({ \
+  (__m512d)__builtin_ia32_mulpd512_mask((__v8df)(__m512d)(A), \
+                                        (__v8df)(__m512d)(B), \
+                                        (__v8df)_mm512_setzero_pd(), \
+                                        (__mmask8)(U), (int)(R)); })
+
+#define _mm512_mul_round_ps(A, B, R) __extension__ ({ \
+  (__m512)__builtin_ia32_mulps512_mask((__v16sf)(__m512)(A), \
+                                       (__v16sf)(__m512)(B), \
+                                       (__v16sf)_mm512_setzero_ps(), \
+                                       (__mmask16)-1, (int)(R)); })
+
+#define _mm512_mask_mul_round_ps(W, U, A, B, R)  __extension__ ({ \
+  (__m512)__builtin_ia32_mulps512_mask((__v16sf)(__m512)(A), \
+                                       (__v16sf)(__m512)(B), \
+                                       (__v16sf)(__m512)(W), (__mmask16)(U), \
+                                       (int)(R)); });
+
+#define _mm512_maskz_mul_round_ps(U, A, B, R)  __extension__ ({ \
+  (__m512)__builtin_ia32_mulps512_mask((__v16sf)(__m512)(A), \
+                                       (__v16sf)(__m512)(B), \
+                                       (__v16sf)_mm512_setzero_ps(), \
+                                       (__mmask16)(U), (int)(R)); });
 
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_mask_div_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
-  return (__m128) __builtin_ia32_divss_round ((__v4sf) __A,
+  return (__m128) __builtin_ia32_divss_round_mask ((__v4sf) __A,
                 (__v4sf) __B,
                 (__v4sf) __W,
                 (__mmask8) __U,
@@ -1389,28 +2348,34 @@ _mm_mask_div_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
 
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_maskz_div_ss(__mmask8 __U,__m128 __A, __m128 __B) {
-  return (__m128) __builtin_ia32_divss_round ((__v4sf) __A,
+  return (__m128) __builtin_ia32_divss_round_mask ((__v4sf) __A,
                 (__v4sf) __B,
                 (__v4sf)  _mm_setzero_ps (),
                 (__mmask8) __U,
                 _MM_FROUND_CUR_DIRECTION);
 }
 
-#define _mm_div_round_ss(__A, __B, __R) __extension__ ({ \
-  (__m128) __builtin_ia32_divss_round ((__v4sf) __A, (__v4sf) __B, \
-                (__v4sf) _mm_setzero_ps(), (__mmask8) -1, __R); })
+#define _mm_div_round_ss(A, B, R) __extension__ ({ \
+  (__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \
+                                          (__v4sf)(__m128)(B), \
+                                          (__v4sf)_mm_setzero_ps(), \
+                                          (__mmask8)-1, (int)(R)); })
 
-#define _mm_mask_div_round_ss(__W, __U, __A, __B, __R) __extension__ ({ \
-  (__m128) __builtin_ia32_divss_round ((__v4sf) __A, (__v4sf) __B, \
-                (__v4sf)  __W, (__mmask8) __U,__R); })
+#define _mm_mask_div_round_ss(W, U, A, B, R) __extension__ ({ \
+  (__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \
+                                          (__v4sf)(__m128)(B), \
+                                          (__v4sf)(__m128)(W), (__mmask8)(U), \
+                                          (int)(R)); })
 
-#define _mm_maskz_div_round_ss(__U, __A, __B, __R) __extension__ ({ \
-  (__m128) __builtin_ia32_divss_round ((__v4sf) __A, (__v4sf) __B, \
-                (__v4sf)  _mm_setzero_ps(), (__mmask8) __U,__R); })
+#define _mm_maskz_div_round_ss(U, A, B, R) __extension__ ({ \
+  (__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \
+                                          (__v4sf)(__m128)(B), \
+                                          (__v4sf)_mm_setzero_ps(), \
+                                          (__mmask8)(U), (int)(R)); })
 
 static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_mask_div_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
-  return (__m128d) __builtin_ia32_divsd_round ((__v2df) __A,
+  return (__m128d) __builtin_ia32_divsd_round_mask ((__v2df) __A,
                 (__v2df) __B,
                 (__v2df) __W,
                 (__mmask8) __U,
@@ -1419,24 +2384,36 @@ _mm_mask_div_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
 
 static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_maskz_div_sd(__mmask8 __U,__m128d __A, __m128d __B) {
-  return (__m128d) __builtin_ia32_divsd_round ((__v2df) __A,
+  return (__m128d) __builtin_ia32_divsd_round_mask ((__v2df) __A,
                 (__v2df) __B,
                 (__v2df)  _mm_setzero_pd (),
                 (__mmask8) __U,
                 _MM_FROUND_CUR_DIRECTION);
 }
 
-#define _mm_div_round_sd(__A, __B, __R) __extension__ ({ \
-  (__m128d) __builtin_ia32_divsd_round ((__v2df) __A, (__v2df) __B, \
-                (__v2df) _mm_setzero_pd(), (__mmask8) -1, __R); })
+#define _mm_div_round_sd(A, B, R) __extension__ ({ \
+  (__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \
+                                           (__v2df)(__m128d)(B), \
+                                           (__v2df)_mm_setzero_pd(), \
+                                           (__mmask8)-1, (int)(R)); })
 
-#define _mm_mask_div_round_sd(__W, __U, __A, __B, __R) __extension__ ({ \
-  (__m128d) __builtin_ia32_divsd_round ((__v2df) __A, (__v2df) __B, \
-                (__v2df)  __W, (__mmask8) __U,__R); })
+#define _mm_mask_div_round_sd(W, U, A, B, R) __extension__ ({ \
+  (__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \
+                                           (__v2df)(__m128d)(B), \
+                                           (__v2df)(__m128d)(W), \
+                                           (__mmask8)(U), (int)(R)); })
 
-#define _mm_maskz_div_round_sd(__U, __A, __B, __R) __extension__ ({ \
-  (__m128d) __builtin_ia32_divsd_round ((__v2df) __A, (__v2df) __B, \
-                (__v2df)  _mm_setzero_pd(), (__mmask8) __U,__R); })
+#define _mm_maskz_div_round_sd(U, A, B, R) __extension__ ({ \
+  (__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \
+                                           (__v2df)(__m128d)(B), \
+                                           (__v2df)_mm_setzero_pd(), \
+                                           (__mmask8)(U), (int)(R)); })
+
+static __inline __m512d __DEFAULT_FN_ATTRS
+_mm512_div_pd(__m512d __a, __m512d __b)
+{
+  return (__m512d)((__v8df)__a/(__v8df)__b);
+}
 
 static __inline__ __m512d __DEFAULT_FN_ATTRS
 _mm512_mask_div_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
@@ -1457,6 +2434,12 @@ _mm512_maskz_div_pd(__mmask8 __U, __m512d __A, __m512d __B) {
              _MM_FROUND_CUR_DIRECTION);
 }
 
+static __inline __m512 __DEFAULT_FN_ATTRS
+_mm512_div_ps(__m512 __a, __m512 __b)
+{
+  return (__m512)((__v16sf)__a/(__v16sf)__b);
+}
+
 static __inline__ __m512 __DEFAULT_FN_ATTRS
 _mm512_mask_div_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
   return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
@@ -1476,108 +2459,186 @@ _mm512_maskz_div_ps(__mmask16 __U, __m512 __A, __m512 __B) {
             _MM_FROUND_CUR_DIRECTION);
 }
 
-#define _mm512_div_round_pd(__A, __B, __R) __extension__ ({ \
-  (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __A, (__v8df) __B,\
-             (__v8df) _mm512_setzero_pd(), (__mmask8) -1, __R); })
-
-#define _mm512_mask_div_round_pd(__W, __U, __A, __B, __R) __extension__ ({ \
-  (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __A, (__v8df) __B, \
-             (__v8df) __W, (__mmask8) __U, __R); })
-
-#define _mm512_maskz_div_round_pd(__U, __A, __B, __R) __extension__ ({ \
-   (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __A, (__v8df) __B, \
-             (__v8df) _mm512_setzero_pd(), (__mmask8) __U, __R);})
-
-#define _mm512_div_round_ps(__A, __B, __R) __extension__ ({ \
-  (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A, (__v16sf) __B, \
-            (__v16sf) _mm512_setzero_ps (), (__mmask16) -1, __R);})
-
-#define _mm512_mask_div_round_ps(__W, __U, __A, __B, __R)  __extension__ ({ \
-  (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A, (__v16sf) __B, \
-            (__v16sf) __W, (__mmask16) __U, __R); });
-
-#define _mm512_maskz_div_round_ps(__U, __A, __B, __R)  __extension__ ({ \
-  (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A, (__v16sf) __B, \
-            (__v16sf) _mm512_setzero_ps (), (__mmask16) __U, __R);});
+#define _mm512_div_round_pd(A, B, R) __extension__ ({ \
+  (__m512d)__builtin_ia32_divpd512_mask((__v8df)(__m512d)(A), \
+                                        (__v8df)(__m512d)(B), \
+                                        (__v8df)_mm512_setzero_pd(), \
+                                        (__mmask8)-1, (int)(R)); })
+
+#define _mm512_mask_div_round_pd(W, U, A, B, R) __extension__ ({ \
+  (__m512d)__builtin_ia32_divpd512_mask((__v8df)(__m512d)(A), \
+                                        (__v8df)(__m512d)(B), \
+                                        (__v8df)(__m512d)(W), (__mmask8)(U), \
+                                        (int)(R)); })
+
+#define _mm512_maskz_div_round_pd(U, A, B, R) __extension__ ({ \
+  (__m512d)__builtin_ia32_divpd512_mask((__v8df)(__m512d)(A), \
+                                        (__v8df)(__m512d)(B), \
+                                        (__v8df)_mm512_setzero_pd(), \
+                                        (__mmask8)(U), (int)(R)); })
+
+#define _mm512_div_round_ps(A, B, R) __extension__ ({ \
+  (__m512)__builtin_ia32_divps512_mask((__v16sf)(__m512)(A), \
+                                       (__v16sf)(__m512)(B), \
+                                       (__v16sf)_mm512_setzero_ps(), \
+                                       (__mmask16)-1, (int)(R)); })
+
+#define _mm512_mask_div_round_ps(W, U, A, B, R)  __extension__ ({ \
+  (__m512)__builtin_ia32_divps512_mask((__v16sf)(__m512)(A), \
+                                       (__v16sf)(__m512)(B), \
+                                       (__v16sf)(__m512)(W), (__mmask16)(U), \
+                                       (int)(R)); });
+
+#define _mm512_maskz_div_round_ps(U, A, B, R)  __extension__ ({ \
+  (__m512)__builtin_ia32_divps512_mask((__v16sf)(__m512)(A), \
+                                       (__v16sf)(__m512)(B), \
+                                       (__v16sf)_mm512_setzero_ps(), \
+                                       (__mmask16)(U), (int)(R)); });
 
 #define _mm512_roundscale_ps(A, B) __extension__ ({ \
-  (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(A), (B), (__v16sf)(A), \
-                                         -1, _MM_FROUND_CUR_DIRECTION); })
+  (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(A), (int)(B), \
+                                         (__v16sf)(__m512)(A), (__mmask16)-1, \
+                                         _MM_FROUND_CUR_DIRECTION); })
+
+#define _mm512_mask_roundscale_ps(A, B, C, imm) __extension__ ({\
+  (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(C), (int)(imm), \
+                                         (__v16sf)(__m512)(A), (__mmask16)(B), \
+                                         _MM_FROUND_CUR_DIRECTION); })
+
+#define _mm512_maskz_roundscale_ps(A, B, imm) __extension__ ({\
+  (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(B), (int)(imm), \
+                                         (__v16sf)_mm512_setzero_ps(), \
+                                         (__mmask16)(A), \
+                                         _MM_FROUND_CUR_DIRECTION); })
+
+#define _mm512_mask_roundscale_round_ps(A, B, C, imm, R) __extension__ ({ \
+  (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(C), (int)(imm), \
+                                         (__v16sf)(__m512)(A), (__mmask16)(B), \
+                                         (int)(R)); })
+
+#define _mm512_maskz_roundscale_round_ps(A, B, imm, R) __extension__ ({ \
+  (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(B), (int)(imm), \
+                                         (__v16sf)_mm512_setzero_ps(), \
+                                         (__mmask16)(A), (int)(R)); })
+
+#define _mm512_roundscale_round_ps(A, imm, R) __extension__ ({ \
+  (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(A), (int)(imm), \
+                                         (__v16sf)_mm512_undefined_ps(), \
+                                         (__mmask16)-1, (int)(R)); })
 
 #define _mm512_roundscale_pd(A, B) __extension__ ({ \
-  (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(A), (B), (__v8df)(A), \
-                                          -1, _MM_FROUND_CUR_DIRECTION); })
+  (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(A), (int)(B), \
+                                          (__v8df)(__m512d)(A), (__mmask8)-1, \
+                                          _MM_FROUND_CUR_DIRECTION); })
+
+#define _mm512_mask_roundscale_pd(A, B, C, imm) __extension__ ({\
+  (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(C), (int)(imm), \
+                                          (__v8df)(__m512d)(A), (__mmask8)(B), \
+                                          _MM_FROUND_CUR_DIRECTION); })
+
+#define _mm512_maskz_roundscale_pd(A, B, imm) __extension__ ({\
+  (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(B), (int)(imm), \
+                                          (__v8df)_mm512_setzero_pd(), \
+                                          (__mmask8)(A), \
+                                          _MM_FROUND_CUR_DIRECTION); })
+
+#define _mm512_mask_roundscale_round_pd(A, B, C, imm, R) __extension__ ({ \
+  (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(C), (int)(imm), \
+                                          (__v8df)(__m512d)(A), (__mmask8)(B), \
+                                          (int)(R)); })
+
+#define _mm512_maskz_roundscale_round_pd(A, B, imm, R) __extension__ ({ \
+  (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(B), (int)(imm), \
+                                          (__v8df)_mm512_setzero_pd(), \
+                                          (__mmask8)(A), (int)(R)); })
+
+#define _mm512_roundscale_round_pd(A, imm, R) __extension__ ({ \
+  (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(A), (int)(imm), \
+                                          (__v8df)_mm512_undefined_pd(), \
+                                          (__mmask8)-1, (int)(R)); })
 
 #define _mm512_fmadd_round_pd(A, B, C, R) __extension__ ({ \
-  (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) (A), \
-                                             (__v8df) (B), (__v8df) (C), \
-                                             (__mmask8) -1, (R)); })
+  (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
+                                           (__v8df)(__m512d)(B), \
+                                           (__v8df)(__m512d)(C), (__mmask8)-1, \
+                                           (int)(R)); })
 
 
 #define _mm512_mask_fmadd_round_pd(A, U, B, C, R) __extension__ ({ \
-  (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) (A), \
-                                             (__v8df) (B), (__v8df) (C), \
-                                             (__mmask8) (U), (R)); })
+  (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
+                                           (__v8df)(__m512d)(B), \
+                                           (__v8df)(__m512d)(C), \
+                                           (__mmask8)(U), (int)(R)); })
 
 
 #define _mm512_mask3_fmadd_round_pd(A, B, C, U, R) __extension__ ({ \
-  (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) (A), \
-                                              (__v8df) (B), (__v8df) (C), \
-                                              (__mmask8) (U), (R)); })
+  (__m512d)__builtin_ia32_vfmaddpd512_mask3((__v8df)(__m512d)(A), \
+                                            (__v8df)(__m512d)(B), \
+                                            (__v8df)(__m512d)(C), \
+                                            (__mmask8)(U), (int)(R)); })
 
 
 #define _mm512_maskz_fmadd_round_pd(U, A, B, C, R) __extension__ ({ \
-  (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) (A), \
-                                              (__v8df) (B), (__v8df) (C), \
-                                              (__mmask8) (U), (R)); })
+  (__m512d)__builtin_ia32_vfmaddpd512_maskz((__v8df)(__m512d)(A), \
+                                            (__v8df)(__m512d)(B), \
+                                            (__v8df)(__m512d)(C), \
+                                            (__mmask8)(U), (int)(R)); })
 
 
 #define _mm512_fmsub_round_pd(A, B, C, R) __extension__ ({ \
-  (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) (A), \
-                                             (__v8df) (B), -(__v8df) (C), \
-                                             (__mmask8) -1, (R)); })
+  (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
+                                           (__v8df)(__m512d)(B), \
+                                           -(__v8df)(__m512d)(C), \
+                                           (__mmask8)-1, (int)(R)); })
 
 
 #define _mm512_mask_fmsub_round_pd(A, U, B, C, R) __extension__ ({ \
-  (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) (A), \
-                                             (__v8df) (B), -(__v8df) (C), \
-                                             (__mmask8) (U), (R)); })
+  (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
+                                           (__v8df)(__m512d)(B), \
+                                           -(__v8df)(__m512d)(C), \
+                                           (__mmask8)(U), (int)(R)); })
 
 
 #define _mm512_maskz_fmsub_round_pd(U, A, B, C, R) __extension__ ({ \
-  (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) (A), \
-                                              (__v8df) (B), -(__v8df) (C), \
-                                              (__mmask8) (U), (R)); })
+  (__m512d)__builtin_ia32_vfmaddpd512_maskz((__v8df)(__m512d)(A), \
+                                            (__v8df)(__m512d)(B), \
+                                            -(__v8df)(__m512d)(C), \
+                                            (__mmask8)(U), (int)(R)); })
 
 
 #define _mm512_fnmadd_round_pd(A, B, C, R) __extension__ ({ \
-  (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) (A), \
-                                             (__v8df) (B), (__v8df) (C), \
-                                             (__mmask8) -1, (R)); })
+  (__m512d)__builtin_ia32_vfmaddpd512_mask(-(__v8df)(__m512d)(A), \
+                                           (__v8df)(__m512d)(B), \
+                                           (__v8df)(__m512d)(C), (__mmask8)-1, \
+                                           (int)(R)); })
 
 
 #define _mm512_mask3_fnmadd_round_pd(A, B, C, U, R) __extension__ ({ \
-  (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) (A), \
-                                              (__v8df) (B), (__v8df) (C), \
-                                              (__mmask8) (U), (R)); })
+  (__m512d)__builtin_ia32_vfmaddpd512_mask3(-(__v8df)(__m512d)(A), \
+                                            (__v8df)(__m512d)(B), \
+                                            (__v8df)(__m512d)(C), \
+                                            (__mmask8)(U), (int)(R)); })
 
 
 #define _mm512_maskz_fnmadd_round_pd(U, A, B, C, R) __extension__ ({ \
-  (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) (A), \
-                                              (__v8df) (B), (__v8df) (C), \
-                                              (__mmask8) (U), (R)); })
+  (__m512d)__builtin_ia32_vfmaddpd512_maskz(-(__v8df)(__m512d)(A), \
+                                            (__v8df)(__m512d)(B), \
+                                            (__v8df)(__m512d)(C), \
+                                            (__mmask8)(U), (int)(R)); })
 
 
 #define _mm512_fnmsub_round_pd(A, B, C, R) __extension__ ({ \
-  (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) (A), \
-                                             (__v8df) (B), -(__v8df) (C), \
-                                             (__mmask8) -1, (R)); })
+  (__m512d)__builtin_ia32_vfmaddpd512_mask(-(__v8df)(__m512d)(A), \
+                                           (__v8df)(__m512d)(B), \
+                                           -(__v8df)(__m512d)(C), \
+                                           (__mmask8)-1, (int)(R)); })
 
 
 #define _mm512_maskz_fnmsub_round_pd(U, A, B, C, R) __extension__ ({ \
-  (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) (A), \
-                                              (__v8df) (B), -(__v8df) (C), \
-                                              (__mmask8) (U), (R)); })
+  (__m512d)__builtin_ia32_vfmaddpd512_maskz(-(__v8df)(__m512d)(A), \
+                                            (__v8df)(__m512d)(B), \
+                                            -(__v8df)(__m512d)(C), \
+                                            (__mmask8)(U), (int)(R)); })
 
 
 static __inline__ __m512d __DEFAULT_FN_ATTRS
@@ -1701,75 +2762,87 @@ _mm512_maskz_fnmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
 }
 
 #define _mm512_fmadd_round_ps(A, B, C, R) __extension__ ({ \
-  (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) (A), \
-                                            (__v16sf) (B), (__v16sf) (C), \
-                                            (__mmask16) -1, (R)); })
+  (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
+                                          (__v16sf)(__m512)(B), \
+                                          (__v16sf)(__m512)(C), (__mmask16)-1, \
+                                          (int)(R)); })
 
 
 #define _mm512_mask_fmadd_round_ps(A, U, B, C, R) __extension__ ({ \
-  (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) (A), \
-                                            (__v16sf) (B), (__v16sf) (C), \
-                                            (__mmask16) (U), (R)); })
+  (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
+                                          (__v16sf)(__m512)(B), \
+                                          (__v16sf)(__m512)(C), \
+                                          (__mmask16)(U), (int)(R)); })
 
 
 #define _mm512_mask3_fmadd_round_ps(A, B, C, U, R) __extension__ ({ \
-  (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) (A), \
-                                             (__v16sf) (B), (__v16sf) (C), \
-                                             (__mmask16) (U), (R)); })
+  (__m512)__builtin_ia32_vfmaddps512_mask3((__v16sf)(__m512)(A), \
+                                           (__v16sf)(__m512)(B), \
+                                           (__v16sf)(__m512)(C), \
+                                           (__mmask16)(U), (int)(R)); })
 
 
 #define _mm512_maskz_fmadd_round_ps(U, A, B, C, R) __extension__ ({ \
-  (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) (A), \
-                                             (__v16sf) (B), (__v16sf) (C), \
-                                             (__mmask16) (U), (R)); })
+  (__m512)__builtin_ia32_vfmaddps512_maskz((__v16sf)(__m512)(A), \
+                                           (__v16sf)(__m512)(B), \
+                                           (__v16sf)(__m512)(C), \
+                                           (__mmask16)(U), (int)(R)); })
 
 
 #define _mm512_fmsub_round_ps(A, B, C, R) __extension__ ({ \
-  (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) (A), \
-                                            (__v16sf) (B), -(__v16sf) (C), \
-                                            (__mmask16) -1, (R)); })
+  (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
+                                          (__v16sf)(__m512)(B), \
+                                          -(__v16sf)(__m512)(C), \
+                                          (__mmask16)-1, (int)(R)); })
 
 
 #define _mm512_mask_fmsub_round_ps(A, U, B, C, R) __extension__ ({ \
-  (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) (A), \
-                                            (__v16sf) (B), -(__v16sf) (C), \
-                                            (__mmask16) (U), (R)); })
+  (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
+                                          (__v16sf)(__m512)(B), \
+                                          -(__v16sf)(__m512)(C), \
+                                          (__mmask16)(U), (int)(R)); })
 
 
 #define _mm512_maskz_fmsub_round_ps(U, A, B, C, R) __extension__ ({ \
-  (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) (A), \
-                                             (__v16sf) (B), -(__v16sf) (C), \
-                                             (__mmask16) (U), (R)); })
+  (__m512)__builtin_ia32_vfmaddps512_maskz((__v16sf)(__m512)(A), \
+                                           (__v16sf)(__m512)(B), \
+                                           -(__v16sf)(__m512)(C), \
+                                           (__mmask16)(U), (int)(R)); })
 
 
 #define _mm512_fnmadd_round_ps(A, B, C, R) __extension__ ({ \
-  (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) (A), \
-                                            (__v16sf) (B), (__v16sf) (C), \
-                                            (__mmask16) -1, (R)); })
+  (__m512)__builtin_ia32_vfmaddps512_mask(-(__v16sf)(__m512)(A), \
+                                          (__v16sf)(__m512)(B), \
+                                          (__v16sf)(__m512)(C), (__mmask16)-1, \
+                                          (int)(R)); })
 
 
 #define _mm512_mask3_fnmadd_round_ps(A, B, C, U, R) __extension__ ({ \
-  (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) (A), \
-                                             (__v16sf) (B), (__v16sf) (C), \
-                                             (__mmask16) (U), (R)); })
+  (__m512)__builtin_ia32_vfmaddps512_mask3(-(__v16sf)(__m512)(A), \
+                                           (__v16sf)(__m512)(B), \
+                                           (__v16sf)(__m512)(C), \
+                                           (__mmask16)(U), (int)(R)); })
 
 
 #define _mm512_maskz_fnmadd_round_ps(U, A, B, C, R) __extension__ ({ \
-  (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) (A), \
-                                             (__v16sf) (B), (__v16sf) (C), \
-                                             (__mmask16) (U), (R)); })
+  (__m512)__builtin_ia32_vfmaddps512_maskz(-(__v16sf)(__m512)(A), \
+                                           (__v16sf)(__m512)(B), \
+                                           (__v16sf)(__m512)(C), \
+                                           (__mmask16)(U), (int)(R)); })
 
 
 #define _mm512_fnmsub_round_ps(A, B, C, R) __extension__ ({ \
-  (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) (A), \
-                                            (__v16sf) (B), -(__v16sf) (C), \
-                                            (__mmask16) -1, (R)); })
+  (__m512)__builtin_ia32_vfmaddps512_mask(-(__v16sf)(__m512)(A), \
+                                          (__v16sf)(__m512)(B), \
+                                          -(__v16sf)(__m512)(C), \
+                                          (__mmask16)-1, (int)(R)); })
 
 
 #define _mm512_maskz_fnmsub_round_ps(U, A, B, C, R) __extension__ ({ \
-  (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) (A), \
-                                             (__v16sf) (B), -(__v16sf) (C), \
-                                             (__mmask16) (U), (R)); })
+  (__m512)__builtin_ia32_vfmaddps512_maskz(-(__v16sf)(__m512)(A), \
+                                           (__v16sf)(__m512)(B), \
+                                           -(__v16sf)(__m512)(C), \
+                                           (__mmask16)(U), (int)(R)); })
 
 
 static __inline__ __m512 __DEFAULT_FN_ATTRS
@@ -1893,45 +2966,52 @@ _mm512_maskz_fnmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
 }
 
 #define _mm512_fmaddsub_round_pd(A, B, C, R) __extension__ ({ \
-  (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) (A), \
-                                                (__v8df) (B), (__v8df) (C), \
-                                                (__mmask8) -1, (R)); })
+  (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
+                                              (__v8df)(__m512d)(B), \
+                                              (__v8df)(__m512d)(C), \
+                                              (__mmask8)-1, (int)(R)); })
 
 
 #define _mm512_mask_fmaddsub_round_pd(A, U, B, C, R) __extension__ ({ \
-  (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) (A), \
-                                                (__v8df) (B), (__v8df) (C), \
-                                                (__mmask8) (U), (R)); })
+  (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
+                                              (__v8df)(__m512d)(B), \
+                                              (__v8df)(__m512d)(C), \
+                                              (__mmask8)(U), (int)(R)); })
 
 
 #define _mm512_mask3_fmaddsub_round_pd(A, B, C, U, R) __extension__ ({ \
-  (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) (A), \
-                                                 (__v8df) (B), (__v8df) (C), \
-                                                 (__mmask8) (U), (R)); })
+  (__m512d)__builtin_ia32_vfmaddsubpd512_mask3((__v8df)(__m512d)(A), \
+                                               (__v8df)(__m512d)(B), \
+                                               (__v8df)(__m512d)(C), \
+                                               (__mmask8)(U), (int)(R)); })
 
 
 #define _mm512_maskz_fmaddsub_round_pd(U, A, B, C, R) __extension__ ({ \
-  (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) (A), \
-                                                 (__v8df) (B), (__v8df) (C), \
-                                                 (__mmask8) (U), (R)); })
+  (__m512d)__builtin_ia32_vfmaddsubpd512_maskz((__v8df)(__m512d)(A), \
+                                               (__v8df)(__m512d)(B), \
+                                               (__v8df)(__m512d)(C), \
+                                               (__mmask8)(U), (int)(R)); })
 
 
 #define _mm512_fmsubadd_round_pd(A, B, C, R) __extension__ ({ \
-  (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) (A), \
-                                                (__v8df) (B), -(__v8df) (C), \
-                                                (__mmask8) -1, (R)); })
+  (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
+                                              (__v8df)(__m512d)(B), \
+                                              -(__v8df)(__m512d)(C), \
+                                              (__mmask8)-1, (int)(R)); })
 
 
 #define _mm512_mask_fmsubadd_round_pd(A, U, B, C, R) __extension__ ({ \
-  (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) (A), \
-                                                (__v8df) (B), -(__v8df) (C), \
-                                                (__mmask8) (U), (R)); })
+  (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
+                                              (__v8df)(__m512d)(B), \
+                                              -(__v8df)(__m512d)(C), \
+                                              (__mmask8)(U), (int)(R)); })
 
 
 #define _mm512_maskz_fmsubadd_round_pd(U, A, B, C, R) __extension__ ({ \
-  (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) (A), \
-                                                 (__v8df) (B), -(__v8df) (C), \
-                                                 (__mmask8) (U), (R)); })
+  (__m512d)__builtin_ia32_vfmaddsubpd512_maskz((__v8df)(__m512d)(A), \
+                                               (__v8df)(__m512d)(B), \
+                                               -(__v8df)(__m512d)(C), \
+                                               (__mmask8)(U), (int)(R)); })
 
 
 static __inline__ __m512d __DEFAULT_FN_ATTRS
@@ -2005,45 +3085,52 @@ _mm512_maskz_fmsubadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
 }
 
 #define _mm512_fmaddsub_round_ps(A, B, C, R) __extension__ ({ \
-  (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) (A), \
-                                               (__v16sf) (B), (__v16sf) (C), \
-                                               (__mmask16) -1, (R)); })
+  (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
+                                             (__v16sf)(__m512)(B), \
+                                             (__v16sf)(__m512)(C), \
+                                             (__mmask16)-1, (int)(R)); })
 
 
 #define _mm512_mask_fmaddsub_round_ps(A, U, B, C, R) __extension__ ({ \
-  (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) (A), \
-                                               (__v16sf) (B), (__v16sf) (C), \
-                                               (__mmask16) (U), (R)); })
+  (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
+                                             (__v16sf)(__m512)(B), \
+                                             (__v16sf)(__m512)(C), \
+                                             (__mmask16)(U), (int)(R)); })
 
 
 #define _mm512_mask3_fmaddsub_round_ps(A, B, C, U, R) __extension__ ({ \
-  (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) (A), \
-                                                (__v16sf) (B), (__v16sf) (C), \
-                                                (__mmask16) (U), (R)); })
+  (__m512)__builtin_ia32_vfmaddsubps512_mask3((__v16sf)(__m512)(A), \
+                                              (__v16sf)(__m512)(B), \
+                                              (__v16sf)(__m512)(C), \
+                                              (__mmask16)(U), (int)(R)); })
 
 
 #define _mm512_maskz_fmaddsub_round_ps(U, A, B, C, R) __extension__ ({ \
-  (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) (A), \
-                                                (__v16sf) (B), (__v16sf) (C), \
-                                                (__mmask16) (U), (R)); })
+  (__m512)__builtin_ia32_vfmaddsubps512_maskz((__v16sf)(__m512)(A), \
+                                              (__v16sf)(__m512)(B), \
+                                              (__v16sf)(__m512)(C), \
+                                              (__mmask16)(U), (int)(R)); })
 
 
 #define _mm512_fmsubadd_round_ps(A, B, C, R) __extension__ ({ \
-  (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) (A), \
-                                               (__v16sf) (B), -(__v16sf) (C), \
-                                               (__mmask16) -1, (R)); })
+  (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
+                                             (__v16sf)(__m512)(B), \
+                                             -(__v16sf)(__m512)(C), \
+                                             (__mmask16)-1, (int)(R)); })
 
 
 #define _mm512_mask_fmsubadd_round_ps(A, U, B, C, R) __extension__ ({ \
-  (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) (A), \
-                                               (__v16sf) (B), -(__v16sf) (C), \
-                                               (__mmask16) (U), (R)); })
+  (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
+                                             (__v16sf)(__m512)(B), \
+                                             -(__v16sf)(__m512)(C), \
+                                             (__mmask16)(U), (int)(R)); })
 
 
 #define _mm512_maskz_fmsubadd_round_ps(U, A, B, C, R) __extension__ ({ \
-  (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) (A), \
-                                                (__v16sf) (B), -(__v16sf) (C), \
-                                                (__mmask16) (U), (R)); })
+  (__m512)__builtin_ia32_vfmaddsubps512_maskz((__v16sf)(__m512)(A), \
+                                              (__v16sf)(__m512)(B), \
+                                              -(__v16sf)(__m512)(C), \
+                                              (__mmask16)(U), (int)(R)); })
 
 
 static __inline__ __m512 __DEFAULT_FN_ATTRS
@@ -2117,9 +3204,10 @@ _mm512_maskz_fmsubadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
 }
 
 #define _mm512_mask3_fmsub_round_pd(A, B, C, U, R) __extension__ ({ \
-  (__m512d) __builtin_ia32_vfmsubpd512_mask3 ((__v8df) (A), \
-                                              (__v8df) (B), (__v8df) (C), \
-                                              (__mmask8) (U), (R)); })
+  (__m512d)__builtin_ia32_vfmsubpd512_mask3((__v8df)(__m512d)(A), \
+                                            (__v8df)(__m512d)(B), \
+                                            (__v8df)(__m512d)(C), \
+                                            (__mmask8)(U), (int)(R)); })
 
 
 static __inline__ __m512d __DEFAULT_FN_ATTRS
@@ -2133,9 +3221,10 @@ _mm512_mask3_fmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
 }
 
 #define _mm512_mask3_fmsub_round_ps(A, B, C, U, R) __extension__ ({ \
-  (__m512) __builtin_ia32_vfmsubps512_mask3 ((__v16sf) (A), \
-                                             (__v16sf) (B), (__v16sf) (C), \
-                                             (__mmask16) (U), (R)); })
+  (__m512)__builtin_ia32_vfmsubps512_mask3((__v16sf)(__m512)(A), \
+                                           (__v16sf)(__m512)(B), \
+                                           (__v16sf)(__m512)(C), \
+                                           (__mmask16)(U), (int)(R)); })
 
 
 static __inline__ __m512 __DEFAULT_FN_ATTRS
@@ -2149,9 +3238,10 @@ _mm512_mask3_fmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
 }
 
 #define _mm512_mask3_fmsubadd_round_pd(A, B, C, U, R) __extension__ ({ \
-  (__m512d) __builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) (A), \
-                                                 (__v8df) (B), (__v8df) (C), \
-                                                 (__mmask8) (U), (R)); })
+  (__m512d)__builtin_ia32_vfmsubaddpd512_mask3((__v8df)(__m512d)(A), \
+                                               (__v8df)(__m512d)(B), \
+                                               (__v8df)(__m512d)(C), \
+                                               (__mmask8)(U), (int)(R)); })
 
 
 static __inline__ __m512d __DEFAULT_FN_ATTRS
@@ -2165,9 +3255,10 @@ _mm512_mask3_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
 }
 
 #define _mm512_mask3_fmsubadd_round_ps(A, B, C, U, R) __extension__ ({ \
-  (__m512) __builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) (A), \
-                                                (__v16sf) (B), (__v16sf) (C), \
-                                                (__mmask16) (U), (R)); })
+  (__m512)__builtin_ia32_vfmsubaddps512_mask3((__v16sf)(__m512)(A), \
+                                              (__v16sf)(__m512)(B), \
+                                              (__v16sf)(__m512)(C), \
+                                              (__mmask16)(U), (int)(R)); })
 
 
 static __inline__ __m512 __DEFAULT_FN_ATTRS
@@ -2181,9 +3272,10 @@ _mm512_mask3_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
 }
 
 #define _mm512_mask_fnmadd_round_pd(A, U, B, C, R) __extension__ ({ \
-  (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) (A), \
-                                              (__v8df) (B), (__v8df) (C), \
-                                              (__mmask8) (U), (R)); })
+  (__m512d)__builtin_ia32_vfnmaddpd512_mask((__v8df)(__m512d)(A), \
+                                            (__v8df)(__m512d)(B), \
+                                            (__v8df)(__m512d)(C), \
+                                            (__mmask8)(U), (int)(R)); })
 
 
 static __inline__ __m512d __DEFAULT_FN_ATTRS
@@ -2197,9 +3289,10 @@ _mm512_mask_fnmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
 }
 
 #define _mm512_mask_fnmadd_round_ps(A, U, B, C, R) __extension__ ({ \
-  (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) (A), \
-                                             (__v16sf) (B), (__v16sf) (C), \
-                                             (__mmask16) (U), (R)); })
+  (__m512)__builtin_ia32_vfnmaddps512_mask((__v16sf)(__m512)(A), \
+                                           (__v16sf)(__m512)(B), \
+                                           (__v16sf)(__m512)(C), \
+                                           (__mmask16)(U), (int)(R)); })
 
 
 static __inline__ __m512 __DEFAULT_FN_ATTRS
@@ -2213,15 +3306,17 @@ _mm512_mask_fnmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
 }
 
 #define _mm512_mask_fnmsub_round_pd(A, U, B, C, R) __extension__ ({ \
-  (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) (A), \
-                                              (__v8df) (B), (__v8df) (C), \
-                                              (__mmask8) (U), (R)); })
+  (__m512d)__builtin_ia32_vfnmsubpd512_mask((__v8df)(__m512d)(A), \
+                                            (__v8df)(__m512d)(B), \
+                                            (__v8df)(__m512d)(C), \
+                                            (__mmask8)(U), (int)(R)); })
 
 
 #define _mm512_mask3_fnmsub_round_pd(A, B, C, U, R) __extension__ ({ \
-  (__m512d) __builtin_ia32_vfnmsubpd512_mask3 ((__v8df) (A), \
-                                               (__v8df) (B), (__v8df) (C), \
-                                               (__mmask8) (U), (R)); })
+  (__m512d)__builtin_ia32_vfnmsubpd512_mask3((__v8df)(__m512d)(A), \
+                                             (__v8df)(__m512d)(B), \
+                                             (__v8df)(__m512d)(C), \
+                                             (__mmask8)(U), (int)(R)); })
 
 
 static __inline__ __m512d __DEFAULT_FN_ATTRS
@@ -2245,15 +3340,17 @@ _mm512_mask3_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
 }
 
 #define _mm512_mask_fnmsub_round_ps(A, U, B, C, R) __extension__ ({ \
-  (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) (A), \
-                                             (__v16sf) (B), (__v16sf) (C), \
-                                             (__mmask16) (U), (R)); })
+  (__m512)__builtin_ia32_vfnmsubps512_mask((__v16sf)(__m512)(A), \
+                                           (__v16sf)(__m512)(B), \
+                                           (__v16sf)(__m512)(C), \
+                                           (__mmask16)(U), (int)(R)); })
 
 
 #define _mm512_mask3_fnmsub_round_ps(A, B, C, U, R) __extension__ ({ \
-  (__m512) __builtin_ia32_vfnmsubps512_mask3 ((__v16sf) (A), \
-                                              (__v16sf) (B), (__v16sf) (C), \
-                                              (__mmask16) (U), (R)); })
+  (__m512)__builtin_ia32_vfnmsubps512_mask3((__v16sf)(__m512)(A), \
+                                            (__v16sf)(__m512)(B), \
+                                            (__v16sf)(__m512)(C), \
+                                            (__mmask16)(U), (int)(R)); })
 
 
 static __inline__ __m512 __DEFAULT_FN_ATTRS
@@ -2289,6 +3386,29 @@ _mm512_permutex2var_epi32(__m512i __A, __m512i __I, __m512i __B)
                                                        (__v16si) __B,
                                                        (__mmask16) -1);
 }
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_permutex2var_epi32 (__m512i __A, __mmask16 __U,
+                                __m512i __I, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I
+                                                        /* idx */ ,
+                                                        (__v16si) __A,
+                                                        (__v16si) __B,
+                                                        (__mmask16) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_permutex2var_epi32 (__mmask16 __U, __m512i __A,
+                                 __m512i __I, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_vpermt2vard512_maskz ((__v16si) __I
+                                                        /* idx */ ,
+                                                        (__v16si) __A,
+                                                        (__v16si) __B,
+                                                        (__mmask16) __U);
+}
+
 static __inline __m512i __DEFAULT_FN_ATTRS
 _mm512_permutex2var_epi64(__m512i __A, __m512i __I, __m512i __B)
 {
@@ -2299,98 +3419,140 @@ _mm512_permutex2var_epi64(__m512i __A, __m512i __I, __m512i __B)
                                                        (__mmask8) -1);
 }
 
-static __inline __m512d __DEFAULT_FN_ATTRS
-_mm512_permutex2var_pd(__m512d __A, __m512i __I, __m512d __B)
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_permutex2var_epi64 (__m512i __A, __mmask8 __U, __m512i __I,
+                                __m512i __B)
 {
-  return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I
-                                                        /* idx */ ,
-                                                        (__v8df) __A,
-                                                        (__v8df) __B,
-                                                        (__mmask8) -1);
+  return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I
+                                                       /* idx */ ,
+                                                       (__v8di) __A,
+                                                       (__v8di) __B,
+                                                       (__mmask8) __U);
 }
-static __inline __m512 __DEFAULT_FN_ATTRS
-_mm512_permutex2var_ps(__m512 __A, __m512i __I, __m512 __B)
+
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_permutex2var_epi64 (__mmask8 __U, __m512i __A,
+         __m512i __I, __m512i __B)
 {
-  return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I
-                                                       /* idx */ ,
-                                                       (__v16sf) __A,
-                                                       (__v16sf) __B,
-                                                       (__mmask16) -1);
+  return (__m512i) __builtin_ia32_vpermt2varq512_maskz ((__v8di) __I
+                                                        /* idx */ ,
+                                                        (__v8di) __A,
+                                                        (__v8di) __B,
+                                                        (__mmask8) __U);
 }
 
 #define _mm512_alignr_epi64(A, B, I) __extension__ ({ \
   (__m512i)__builtin_ia32_alignq512_mask((__v8di)(__m512i)(A), \
-                                         (__v8di)(__m512i)(B), \
-                                         (I), (__v8di)_mm512_setzero_si512(), \
+                                         (__v8di)(__m512i)(B), (int)(I), \
+                                         (__v8di)_mm512_setzero_si512(), \
                                          (__mmask8)-1); })
 
+#define _mm512_mask_alignr_epi64(W, U, A, B, imm) __extension__({\
+  (__m512i)__builtin_ia32_alignq512_mask((__v8di)(__m512i)(A), \
+                                         (__v8di)(__m512i)(B), (int)(imm), \
+                                         (__v8di)(__m512i)(W), \
+                                         (__mmask8)(U)); })
+
+#define _mm512_maskz_alignr_epi64(U, A, B, imm) __extension__({\
+  (__m512i)__builtin_ia32_alignq512_mask((__v8di)(__m512i)(A), \
+                                         (__v8di)(__m512i)(B), (int)(imm), \
+                                         (__v8di)_mm512_setzero_si512(), \
+                                         (__mmask8)(U)); })
+
 #define _mm512_alignr_epi32(A, B, I) __extension__ ({ \
   (__m512i)__builtin_ia32_alignd512_mask((__v16si)(__m512i)(A), \
-                                         (__v16si)(__m512i)(B), \
-                                         (I), (__v16si)_mm512_setzero_si512(), \
+                                         (__v16si)(__m512i)(B), (int)(I), \
+                                         (__v16si)_mm512_setzero_si512(), \
                                          (__mmask16)-1); })
 
+#define _mm512_mask_alignr_epi32(W, U, A, B, imm) __extension__ ({\
+  (__m512i)__builtin_ia32_alignd512_mask((__v16si)(__m512i)(A), \
+                                         (__v16si)(__m512i)(B), (int)(imm), \
+                                         (__v16si)(__m512i)(W), \
+                                         (__mmask16)(U)); })
+
+#define _mm512_maskz_alignr_epi32(U, A, B, imm) __extension__({\
+  (__m512i)__builtin_ia32_alignd512_mask((__v16si)(__m512i)(A), \
+                                         (__v16si)(__m512i)(B), (int)(imm), \
+                                         (__v16si)_mm512_setzero_si512(), \
+                                         (__mmask16)(U)); })
 /* Vector Extract */
 
 #define _mm512_extractf64x4_pd(A, I) __extension__ ({                    \
-      (__m256d)                                                          \
-        __builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A),           \
-                                         (I),                            \
-                                         (__v4df)_mm256_setzero_si256(), \
-                                         (__mmask8) -1); })
+  (__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(I), \
+                                            (__v4df)_mm256_setzero_si256(), \
+                                            (__mmask8)-1); })
 
-#define _mm512_extractf32x4_ps(A, I) __extension__ ({                    \
-      (__m128)                                                           \
-        __builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A),           \
-                                         (I),                            \
-                                         (__v4sf)_mm_setzero_ps(),       \
-                                         (__mmask8) -1); })
+#define _mm512_mask_extractf64x4_pd(W, U, A, imm) __extension__ ({\
+  (__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(imm), \
+                                            (__v4df)(__m256d)(W), \
+                                            (__mmask8)(U)); })
+
+#define _mm512_maskz_extractf64x4_pd(U, A, imm) __extension__ ({\
+  (__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(imm), \
+                                            (__v4df)_mm256_setzero_pd(), \
+                                            (__mmask8)(U)); })
 
+#define _mm512_extractf32x4_ps(A, I) __extension__ ({                    \
+  (__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(I), \
+                                           (__v4sf)_mm_setzero_ps(), \
+                                           (__mmask8)-1); })
+
+#define _mm512_mask_extractf32x4_ps(W, U, A, imm) __extension__ ({\
+  (__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(imm), \
+                                           (__v4sf)(__m128)(W), \
+                                           (__mmask8)(U)); })
+
+#define _mm512_maskz_extractf32x4_ps(U, A, imm) __extension__ ({\
+  (__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(imm), \
+                                           (__v4sf)_mm_setzero_ps(), \
+                                           (__mmask8)(U)); })
 /* Vector Blend */
 
 static __inline __m512d __DEFAULT_FN_ATTRS
 _mm512_mask_blend_pd(__mmask8 __U, __m512d __A, __m512d __W)
 {
-  return (__m512d) __builtin_ia32_blendmpd_512_mask ((__v8df) __A,
+  return (__m512d) __builtin_ia32_selectpd_512 ((__mmask8) __U,
                  (__v8df) __W,
-                 (__mmask8) __U);
+                 (__v8df) __A);
 }
 
 static __inline __m512 __DEFAULT_FN_ATTRS
 _mm512_mask_blend_ps(__mmask16 __U, __m512 __A, __m512 __W)
 {
-  return (__m512) __builtin_ia32_blendmps_512_mask ((__v16sf) __A,
+  return (__m512) __builtin_ia32_selectps_512 ((__mmask16) __U,
                 (__v16sf) __W,
-                (__mmask16) __U);
+                (__v16sf) __A);
 }
 
 static __inline __m512i __DEFAULT_FN_ATTRS
 _mm512_mask_blend_epi64(__mmask8 __U, __m512i __A, __m512i __W)
 {
-  return (__m512i) __builtin_ia32_blendmq_512_mask ((__v8di) __A,
+  return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U,
                 (__v8di) __W,
-                (__mmask8) __U);
+                (__v8di) __A);
 }
 
 static __inline __m512i __DEFAULT_FN_ATTRS
 _mm512_mask_blend_epi32(__mmask16 __U, __m512i __A, __m512i __W)
 {
-  return (__m512i) __builtin_ia32_blendmd_512_mask ((__v16si) __A,
+  return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U,
                 (__v16si) __W,
-                (__mmask16) __U);
+                (__v16si) __A);
 }
 
 /* Compare */
 
 #define _mm512_cmp_round_ps_mask(A, B, P, R) __extension__ ({ \
   (__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \
-                                          (__v16sf)(__m512)(B), \
-                                          (P), (__mmask16)-1, (R)); })
+                                          (__v16sf)(__m512)(B), (int)(P), \
+                                          (__mmask16)-1, (int)(R)); })
 
 #define _mm512_mask_cmp_round_ps_mask(U, A, B, P, R) __extension__ ({ \
   (__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \
-                                          (__v16sf)(__m512)(B), \
-                                          (P), (__mmask16)(U), (R)); })
+                                          (__v16sf)(__m512)(B), (int)(P), \
+                                          (__mmask16)(U), (int)(R)); })
 
 #define _mm512_cmp_ps_mask(A, B, P) \
   _mm512_cmp_round_ps_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION)
@@ -2400,13 +3562,13 @@ _mm512_mask_blend_epi32(__mmask16 __U, __m512i __A, __m512i __W)
 
 #define _mm512_cmp_round_pd_mask(A, B, P, R) __extension__ ({ \
   (__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \
-                                         (__v8df)(__m512d)(B), \
-                                         (P), (__mmask8)-1, (R)); })
+                                         (__v8df)(__m512d)(B), (int)(P), \
+                                         (__mmask8)-1, (int)(R)); })
 
 #define _mm512_mask_cmp_round_pd_mask(U, A, B, P, R) __extension__ ({ \
   (__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \
-                                         (__v8df)(__m512d)(B), \
-                                         (P), (__mmask8)(U), (R)); })
+                                         (__v8df)(__m512d)(B), (int)(P), \
+                                         (__mmask8)(U), (int)(R)); })
 
 #define _mm512_cmp_pd_mask(A, B, P) \
   _mm512_cmp_round_pd_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION)
@@ -2416,6 +3578,22 @@ _mm512_mask_blend_epi32(__mmask16 __U, __m512i __A, __m512i __W)
 
 /* Conversion */
 
+#define _mm512_cvtt_roundps_epu32(A, R) __extension__ ({ \
+  (__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \
+                                             (__v16si)_mm512_undefined_epi32(), \
+                                             (__mmask16)-1, (int)(R)); })
+
+#define _mm512_mask_cvtt_roundps_epu32(W, U, A, R) __extension__ ({ \
+  (__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \
+                                             (__v16si)(__m512i)(W), \
+                                             (__mmask16)(U), (int)(R)); })
+
+#define _mm512_maskz_cvtt_roundps_epu32(U, A, R) __extension__ ({ \
+  (__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \
+                                             (__v16si)_mm512_setzero_si512(), \
+                                             (__mmask16)(U), (int)(R)); })
+
+
 static __inline __m512i __DEFAULT_FN_ATTRS
 _mm512_cvttps_epu32(__m512 __A)
 {
@@ -2426,15 +3604,80 @@ _mm512_cvttps_epu32(__m512 __A)
                   _MM_FROUND_CUR_DIRECTION);
 }
 
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_cvttps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
+{
+  return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
+                   (__v16si) __W,
+                   (__mmask16) __U,
+                   _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_cvttps_epu32 (__mmask16 __U, __m512 __A)
+{
+  return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
+                   (__v16si) _mm512_setzero_si512 (),
+                   (__mmask16) __U,
+                   _MM_FROUND_CUR_DIRECTION);
+}
+
 #define _mm512_cvt_roundepi32_ps(A, R) __extension__ ({ \
-  (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(A), \
+  (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \
+                                          (__v16sf)_mm512_setzero_ps(), \
+                                          (__mmask16)-1, (int)(R)); })
+
+#define _mm512_mask_cvt_roundepi32_ps(W, U, A, R) __extension__ ({ \
+  (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \
+                                          (__v16sf)(__m512)(W), \
+                                          (__mmask16)(U), (int)(R)); })
+
+#define _mm512_maskz_cvt_roundepi32_ps(U, A, R) __extension__ ({ \
+  (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \
                                           (__v16sf)_mm512_setzero_ps(), \
-                                          (__mmask16)-1, (R)); })
+                                          (__mmask16)(U), (int)(R)); })
 
 #define _mm512_cvt_roundepu32_ps(A, R) __extension__ ({ \
-  (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(A), \
+  (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \
+                                           (__v16sf)_mm512_setzero_ps(), \
+                                           (__mmask16)-1, (int)(R)); })
+
+#define _mm512_mask_cvt_roundepu32_ps(W, U, A, R) __extension__ ({ \
+  (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \
+                                           (__v16sf)(__m512)(W), \
+                                           (__mmask16)(U), (int)(R)); })
+
+#define _mm512_maskz_cvt_roundepu32_ps(U, A, R) __extension__ ({ \
+  (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \
                                            (__v16sf)_mm512_setzero_ps(), \
-                                           (__mmask16)-1, (R)); })
+                                           (__mmask16)(U), (int)(R)); })
+
+static __inline__ __m512 __DEFAULT_FN_ATTRS
+_mm512_cvtepu32_ps (__m512i __A)
+{
+  return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
+                 (__v16sf) _mm512_undefined_ps (),
+                 (__mmask16) -1,
+                 _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512 __DEFAULT_FN_ATTRS
+_mm512_mask_cvtepu32_ps (__m512 __W, __mmask16 __U, __m512i __A)
+{
+  return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
+                 (__v16sf) __W,
+                 (__mmask16) __U,
+                 _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512 __DEFAULT_FN_ATTRS
+_mm512_maskz_cvtepu32_ps (__mmask16 __U, __m512i __A)
+{
+  return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
+                 (__v16sf) _mm512_setzero_ps (),
+                 (__mmask16) __U,
+                 _MM_FROUND_CUR_DIRECTION);
+}
 
 static __inline __m512d __DEFAULT_FN_ATTRS
 _mm512_cvtepi32_pd(__m256i __A)
@@ -2445,6 +3688,49 @@ _mm512_cvtepi32_pd(__m256i __A)
                 (__mmask8) -1);
 }
 
+static __inline__ __m512d __DEFAULT_FN_ATTRS
+_mm512_mask_cvtepi32_pd (__m512d __W, __mmask8 __U, __m256i __A)
+{
+  return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A,
+                (__v8df) __W,
+                (__mmask8) __U);
+}
+
+static __inline__ __m512d __DEFAULT_FN_ATTRS
+_mm512_maskz_cvtepi32_pd (__mmask8 __U, __m256i __A)
+{
+  return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A,
+                (__v8df) _mm512_setzero_pd (),
+                (__mmask8) __U);
+}
+
+static __inline__ __m512 __DEFAULT_FN_ATTRS
+_mm512_cvtepi32_ps (__m512i __A)
+{
+  return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
+                (__v16sf) _mm512_undefined_ps (),
+                (__mmask16) -1,
+                _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512 __DEFAULT_FN_ATTRS
+_mm512_mask_cvtepi32_ps (__m512 __W, __mmask16 __U, __m512i __A)
+{
+  return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
+                (__v16sf) __W,
+                (__mmask16) __U,
+                _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512 __DEFAULT_FN_ATTRS
+_mm512_maskz_cvtepi32_ps (__mmask16 __U, __m512i __A)
+{
+  return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
+                (__v16sf) _mm512_setzero_ps (),
+                (__mmask16) __U,
+                _MM_FROUND_CUR_DIRECTION);
+}
+
 static __inline __m512d __DEFAULT_FN_ATTRS
 _mm512_cvtepu32_pd(__m256i __A)
 {
@@ -2454,15 +3740,109 @@ _mm512_cvtepu32_pd(__m256i __A)
                 (__mmask8) -1);
 }
 
+static __inline__ __m512d __DEFAULT_FN_ATTRS
+_mm512_mask_cvtepu32_pd (__m512d __W, __mmask8 __U, __m256i __A)
+{
+  return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A,
+                  (__v8df) __W,
+                  (__mmask8) __U);
+}
+
+static __inline__ __m512d __DEFAULT_FN_ATTRS
+_mm512_maskz_cvtepu32_pd (__mmask8 __U, __m256i __A)
+{
+  return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A,
+                  (__v8df) _mm512_setzero_pd (),
+                  (__mmask8) __U);
+}
+
 #define _mm512_cvt_roundpd_ps(A, R) __extension__ ({ \
-  (__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(A), \
+  (__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \
                                           (__v8sf)_mm256_setzero_ps(), \
-                                          (__mmask8)-1, (R)); })
+                                          (__mmask8)-1, (int)(R)); })
+
+#define _mm512_mask_cvt_roundpd_ps(W, U, A, R) __extension__ ({ \
+  (__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \
+                                          (__v8sf)(__m256)(W), (__mmask8)(U), \
+                                          (int)(R)); })
+
+#define _mm512_maskz_cvt_roundpd_ps(U, A, R) __extension__ ({ \
+  (__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \
+                                          (__v8sf)_mm256_setzero_ps(), \
+                                          (__mmask8)(U), (int)(R)); })
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS
+_mm512_cvtpd_ps (__m512d __A)
+{
+  return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
+                (__v8sf) _mm256_undefined_ps (),
+                (__mmask8) -1,
+                _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS
+_mm512_mask_cvtpd_ps (__m256 __W, __mmask8 __U, __m512d __A)
+{
+  return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
+                (__v8sf) __W,
+                (__mmask8) __U,
+                _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS
+_mm512_maskz_cvtpd_ps (__mmask8 __U, __m512d __A)
+{
+  return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
+                (__v8sf) _mm256_setzero_ps (),
+                (__mmask8) __U,
+                _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm512_cvt_roundps_ph(A, I) __extension__ ({ \
+  (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
+                                            (__v16hi)_mm256_undefined_si256(), \
+                                            (__mmask16)-1); })
+
+#define _mm512_mask_cvt_roundps_ph(U, W, A, I) __extension__ ({ \
+  (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
+                                            (__v16hi)(__m256i)(U), \
+                                            (__mmask16)(W)); })
+
+#define _mm512_maskz_cvt_roundps_ph(W, A, I) __extension__ ({ \
+  (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
+                                            (__v16hi)_mm256_setzero_si256(), \
+                                            (__mmask16)(W)); })
 
 #define _mm512_cvtps_ph(A, I) __extension__ ({ \
-  (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(A), (I), \
+  (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
                                             (__v16hi)_mm256_setzero_si256(), \
-                                            -1); })
+                                            (__mmask16)-1); })
+
+#define _mm512_mask_cvtps_ph(U, W, A, I) __extension__ ({ \
+  (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
+                                            (__v16hi)(__m256i)(U), \
+                                            (__mmask16)(W)); })
+
+#define _mm512_maskz_cvtps_ph(W, A, I) __extension__ ({\
+  (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
+                                            (__v16hi)_mm256_setzero_si256(), \
+                                            (__mmask16)(W)); })
+
+#define _mm512_cvt_roundph_ps(A, R) __extension__ ({ \
+  (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \
+                                           (__v16sf)_mm512_undefined_ps(), \
+                                           (__mmask16)-1, (int)(R)); })
+
+#define _mm512_mask_cvt_roundph_ps(W, U, A, R) __extension__ ({ \
+  (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \
+                                           (__v16sf)(__m512)(W), \
+                                           (__mmask16)(U), (int)(R)); })
+
+#define _mm512_maskz_cvt_roundph_ps(U, A, R) __extension__ ({ \
+  (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \
+                                           (__v16sf)_mm512_setzero_ps(), \
+                                           (__mmask16)(U), (int)(R)); })
+
 
 static  __inline __m512 __DEFAULT_FN_ATTRS
 _mm512_cvtph_ps(__m256i __A)
@@ -2474,15 +3854,39 @@ _mm512_cvtph_ps(__m256i __A)
                 _MM_FROUND_CUR_DIRECTION);
 }
 
-static __inline __m512i __DEFAULT_FN_ATTRS
-_mm512_cvttps_epi32(__m512 __a)
+static __inline__ __m512 __DEFAULT_FN_ATTRS
+_mm512_mask_cvtph_ps (__m512 __W, __mmask16 __U, __m256i __A)
 {
-  return (__m512i)
-    __builtin_ia32_cvttps2dq512_mask((__v16sf) __a,
-                                     (__v16si) _mm512_setzero_si512 (),
-                                     (__mmask16) -1, _MM_FROUND_CUR_DIRECTION);
+  return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
+                 (__v16sf) __W,
+                 (__mmask16) __U,
+                 _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512 __DEFAULT_FN_ATTRS
+_mm512_maskz_cvtph_ps (__mmask16 __U, __m256i __A)
+{
+  return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
+                 (__v16sf) _mm512_setzero_ps (),
+                 (__mmask16) __U,
+                 _MM_FROUND_CUR_DIRECTION);
 }
 
+#define _mm512_cvtt_roundpd_epi32(A, R) __extension__ ({ \
+  (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \
+                                            (__v8si)_mm256_setzero_si256(), \
+                                            (__mmask8)-1, (int)(R)); })
+
+#define _mm512_mask_cvtt_roundpd_epi32(W, U, A, R) __extension__ ({ \
+  (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \
+                                            (__v8si)(__m256i)(W), \
+                                            (__mmask8)(U), (int)(R)); })
+
+#define _mm512_maskz_cvtt_roundpd_epi32(U, A, R) __extension__ ({ \
+  (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \
+                                            (__v8si)_mm256_setzero_si256(), \
+                                            (__mmask8)(U), (int)(R)); })
+
 static __inline __m256i __DEFAULT_FN_ATTRS
 _mm512_cvttpd_epi32(__m512d __a)
 {
@@ -2492,67 +3896,437 @@ _mm512_cvttpd_epi32(__m512d __a)
                                                     _MM_FROUND_CUR_DIRECTION);
 }
 
-#define _mm512_cvtt_roundpd_epi32(A, R) __extension__ ({ \
-  (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(A), \
-                                            (__v8si)_mm256_setzero_si256(), \
-                                            (__mmask8)-1, (R)); })
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm512_mask_cvttpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
+{
+  return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
+                  (__v8si) __W,
+                  (__mmask8) __U,
+                  _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm512_maskz_cvttpd_epi32 (__mmask8 __U, __m512d __A)
+{
+  return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
+                  (__v8si) _mm256_setzero_si256 (),
+                  (__mmask8) __U,
+                  _MM_FROUND_CUR_DIRECTION);
+}
 
 #define _mm512_cvtt_roundps_epi32(A, R) __extension__ ({ \
-  (__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(A), \
+  (__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \
                                             (__v16si)_mm512_setzero_si512(), \
-                                            (__mmask16)-1, (R)); })
+                                            (__mmask16)-1, (int)(R)); })
+
+#define _mm512_mask_cvtt_roundps_epi32(W, U, A, R) __extension__ ({ \
+  (__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \
+                                            (__v16si)(__m512i)(W), \
+                                            (__mmask16)(U), (int)(R)); })
+
+#define _mm512_maskz_cvtt_roundps_epi32(U, A, R) __extension__ ({ \
+  (__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \
+                                            (__v16si)_mm512_setzero_si512(), \
+                                            (__mmask16)(U), (int)(R)); })
+
+static __inline __m512i __DEFAULT_FN_ATTRS
+_mm512_cvttps_epi32(__m512 __a)
+{
+  return (__m512i)
+    __builtin_ia32_cvttps2dq512_mask((__v16sf) __a,
+                                     (__v16si) _mm512_setzero_si512 (),
+                                     (__mmask16) -1, _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_cvttps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
+{
+  return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
+                  (__v16si) __W,
+                  (__mmask16) __U,
+                  _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_cvttps_epi32 (__mmask16 __U, __m512 __A)
+{
+  return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
+                  (__v16si) _mm512_setzero_si512 (),
+                  (__mmask16) __U,
+                  _MM_FROUND_CUR_DIRECTION);
+}
 
 #define _mm512_cvt_roundps_epi32(A, R) __extension__ ({ \
-  (__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(A), \
+  (__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \
                                            (__v16si)_mm512_setzero_si512(), \
-                                           (__mmask16)-1, (R)); })
+                                           (__mmask16)-1, (int)(R)); })
+
+#define _mm512_mask_cvt_roundps_epi32(W, U, A, R) __extension__ ({ \
+  (__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \
+                                           (__v16si)(__m512i)(W), \
+                                           (__mmask16)(U), (int)(R)); })
+
+#define _mm512_maskz_cvt_roundps_epi32(U, A, R) __extension__ ({ \
+  (__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \
+                                           (__v16si)_mm512_setzero_si512(), \
+                                           (__mmask16)(U), (int)(R)); })
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_cvtps_epi32 (__m512 __A)
+{
+  return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
+                 (__v16si) _mm512_undefined_epi32 (),
+                 (__mmask16) -1,
+                 _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_cvtps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
+{
+  return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
+                 (__v16si) __W,
+                 (__mmask16) __U,
+                 _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_cvtps_epi32 (__mmask16 __U, __m512 __A)
+{
+  return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
+                 (__v16si)
+                 _mm512_setzero_si512 (),
+                 (__mmask16) __U,
+                 _MM_FROUND_CUR_DIRECTION);
+}
 
 #define _mm512_cvt_roundpd_epi32(A, R) __extension__ ({ \
-  (__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(A), \
+  (__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \
+                                           (__v8si)_mm256_setzero_si256(), \
+                                           (__mmask8)-1, (int)(R)); })
+
+#define _mm512_mask_cvt_roundpd_epi32(W, U, A, R) __extension__ ({ \
+  (__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \
+                                           (__v8si)(__m256i)(W), \
+                                           (__mmask8)(U), (int)(R)); })
+
+#define _mm512_maskz_cvt_roundpd_epi32(U, A, R) __extension__ ({ \
+  (__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \
                                            (__v8si)_mm256_setzero_si256(), \
-                                           (__mmask8)-1, (R)); })
+                                           (__mmask8)(U), (int)(R)); })
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm512_cvtpd_epi32 (__m512d __A)
+{
+  return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
+                 (__v8si)
+                 _mm256_undefined_si256 (),
+                 (__mmask8) -1,
+                 _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm512_mask_cvtpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
+{
+  return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
+                 (__v8si) __W,
+                 (__mmask8) __U,
+                 _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm512_maskz_cvtpd_epi32 (__mmask8 __U, __m512d __A)
+{
+  return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
+                 (__v8si)
+                 _mm256_setzero_si256 (),
+                 (__mmask8) __U,
+                 _MM_FROUND_CUR_DIRECTION);
+}
 
 #define _mm512_cvt_roundps_epu32(A, R) __extension__ ({ \
-  (__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(A), \
+  (__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \
+                                            (__v16si)_mm512_setzero_si512(), \
+                                            (__mmask16)-1, (int)(R)); })
+
+#define _mm512_mask_cvt_roundps_epu32(W, U, A, R) __extension__ ({ \
+  (__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \
+                                            (__v16si)(__m512i)(W), \
+                                            (__mmask16)(U), (int)(R)); })
+
+#define _mm512_maskz_cvt_roundps_epu32(U, A, R) __extension__ ({ \
+  (__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \
                                             (__v16si)_mm512_setzero_si512(), \
-                                            (__mmask16)-1, (R)); })
+                                            (__mmask16)(U), (int)(R)); })
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_cvtps_epu32 ( __m512 __A)
+{
+  return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,\
+                  (__v16si)\
+                  _mm512_undefined_epi32 (),\
+                  (__mmask16) -1,\
+                  _MM_FROUND_CUR_DIRECTION);\
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_cvtps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
+{
+  return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
+                  (__v16si) __W,
+                  (__mmask16) __U,
+                  _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_cvtps_epu32 ( __mmask16 __U, __m512 __A)
+{
+  return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
+                  (__v16si) 
+                  _mm512_setzero_si512 (),
+                  (__mmask16) __U ,
+                  _MM_FROUND_CUR_DIRECTION);
+}
 
 #define _mm512_cvt_roundpd_epu32(A, R) __extension__ ({ \
-  (__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(A), \
+  (__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \
                                             (__v8si)_mm256_setzero_si256(), \
-                                            (__mmask8) -1, (R)); })
+                                            (__mmask8)-1, (int)(R)); })
+
+#define _mm512_mask_cvt_roundpd_epu32(W, U, A, R) __extension__ ({ \
+  (__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \
+                                            (__v8si)(W), \
+                                            (__mmask8)(U), (int)(R)); })
+
+#define _mm512_maskz_cvt_roundpd_epu32(U, A, R) __extension__ ({ \
+  (__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \
+                                            (__v8si)_mm256_setzero_si256(), \
+                                            (__mmask8)(U), (int)(R)); })
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm512_cvtpd_epu32 (__m512d __A)
+{
+  return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
+                  (__v8si)
+                  _mm256_undefined_si256 (),
+                  (__mmask8) -1,
+                  _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm512_mask_cvtpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A)
+{
+  return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
+                  (__v8si) __W,
+                  (__mmask8) __U,
+                  _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm512_maskz_cvtpd_epu32 (__mmask8 __U, __m512d __A)
+{
+  return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
+                  (__v8si)
+                  _mm256_setzero_si256 (),
+                  (__mmask8) __U,
+                  _MM_FROUND_CUR_DIRECTION);
+}
 
 /* Unpack and Interleave */
+
 static __inline __m512d __DEFAULT_FN_ATTRS
 _mm512_unpackhi_pd(__m512d __a, __m512d __b)
 {
-  return __builtin_shufflevector(__a, __b, 1, 9, 1+2, 9+2, 1+4, 9+4, 1+6, 9+6);
+  return (__m512d)__builtin_shufflevector((__v8df)__a, (__v8df)__b,
+                                          1, 9, 1+2, 9+2, 1+4, 9+4, 1+6, 9+6);
+}
+
+static __inline__ __m512d __DEFAULT_FN_ATTRS
+_mm512_mask_unpackhi_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
+{
+  return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
+                                           (__v8df)_mm512_unpackhi_pd(__A, __B),
+                                           (__v8df)__W);
+}
+
+static __inline__ __m512d __DEFAULT_FN_ATTRS
+_mm512_maskz_unpackhi_pd(__mmask8 __U, __m512d __A, __m512d __B)
+{
+  return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
+                                           (__v8df)_mm512_unpackhi_pd(__A, __B),
+                                           (__v8df)_mm512_setzero_pd());
 }
 
 static __inline __m512d __DEFAULT_FN_ATTRS
 _mm512_unpacklo_pd(__m512d __a, __m512d __b)
 {
-  return __builtin_shufflevector(__a, __b, 0, 8, 0+2, 8+2, 0+4, 8+4, 0+6, 8+6);
+  return (__m512d)__builtin_shufflevector((__v8df)__a, (__v8df)__b,
+                                          0, 8, 0+2, 8+2, 0+4, 8+4, 0+6, 8+6);
+}
+
+static __inline__ __m512d __DEFAULT_FN_ATTRS
+_mm512_mask_unpacklo_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
+{
+  return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
+                                           (__v8df)_mm512_unpacklo_pd(__A, __B),
+                                           (__v8df)__W);
+}
+
+static __inline__ __m512d __DEFAULT_FN_ATTRS
+_mm512_maskz_unpacklo_pd (__mmask8 __U, __m512d __A, __m512d __B)
+{
+  return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
+                                           (__v8df)_mm512_unpacklo_pd(__A, __B),
+                                           (__v8df)_mm512_setzero_pd());
 }
 
 static __inline __m512 __DEFAULT_FN_ATTRS
 _mm512_unpackhi_ps(__m512 __a, __m512 __b)
 {
-  return __builtin_shufflevector(__a, __b,
-                                 2,    18,    3,    19,
-                                 2+4,  18+4,  3+4,  19+4,
-                                 2+8,  18+8,  3+8,  19+8,
-                                 2+12, 18+12, 3+12, 19+12);
+  return (__m512)__builtin_shufflevector((__v16sf)__a, (__v16sf)__b,
+                                         2,    18,    3,    19,
+                                         2+4,  18+4,  3+4,  19+4,
+                                         2+8,  18+8,  3+8,  19+8,
+                                         2+12, 18+12, 3+12, 19+12);
+}
+
+static __inline__ __m512 __DEFAULT_FN_ATTRS
+_mm512_mask_unpackhi_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
+{
+  return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
+                                          (__v16sf)_mm512_unpackhi_ps(__A, __B),
+                                          (__v16sf)__W);
+}
+
+static __inline__ __m512 __DEFAULT_FN_ATTRS
+_mm512_maskz_unpackhi_ps (__mmask16 __U, __m512 __A, __m512 __B)
+{
+  return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
+                                          (__v16sf)_mm512_unpackhi_ps(__A, __B),
+                                          (__v16sf)_mm512_setzero_ps());
 }
 
 static __inline __m512 __DEFAULT_FN_ATTRS
 _mm512_unpacklo_ps(__m512 __a, __m512 __b)
 {
-  return __builtin_shufflevector(__a, __b,
-                                 0,    16,    1,    17,
-                                 0+4,  16+4,  1+4,  17+4,
-                                 0+8,  16+8,  1+8,  17+8,
-                                 0+12, 16+12, 1+12, 17+12);
+  return (__m512)__builtin_shufflevector((__v16sf)__a, (__v16sf)__b,
+                                         0,    16,    1,    17,
+                                         0+4,  16+4,  1+4,  17+4,
+                                         0+8,  16+8,  1+8,  17+8,
+                                         0+12, 16+12, 1+12, 17+12);
+}
+
+static __inline__ __m512 __DEFAULT_FN_ATTRS
+_mm512_mask_unpacklo_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
+{
+  return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
+                                          (__v16sf)_mm512_unpacklo_ps(__A, __B),
+                                          (__v16sf)__W);
+}
+
+static __inline__ __m512 __DEFAULT_FN_ATTRS
+_mm512_maskz_unpacklo_ps (__mmask16 __U, __m512 __A, __m512 __B)
+{
+  return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
+                                          (__v16sf)_mm512_unpacklo_ps(__A, __B),
+                                          (__v16sf)_mm512_setzero_ps());
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_unpackhi_epi32(__m512i __A, __m512i __B)
+{
+  return (__m512i)__builtin_shufflevector((__v16si)__A, (__v16si)__B,
+                                          2,    18,    3,    19,
+                                          2+4,  18+4,  3+4,  19+4,
+                                          2+8,  18+8,  3+8,  19+8,
+                                          2+12, 18+12, 3+12, 19+12);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_unpackhi_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
+{
+  return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
+                                       (__v16si)_mm512_unpackhi_epi32(__A, __B),
+                                       (__v16si)__W);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_unpackhi_epi32(__mmask16 __U, __m512i __A, __m512i __B)
+{
+  return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
+                                       (__v16si)_mm512_unpackhi_epi32(__A, __B),
+                                       (__v16si)_mm512_setzero_si512());
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_unpacklo_epi32(__m512i __A, __m512i __B)
+{
+  return (__m512i)__builtin_shufflevector((__v16si)__A, (__v16si)__B,
+                                          0,    16,    1,    17,
+                                          0+4,  16+4,  1+4,  17+4,
+                                          0+8,  16+8,  1+8,  17+8,
+                                          0+12, 16+12, 1+12, 17+12);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_unpacklo_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
+{
+  return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
+                                       (__v16si)_mm512_unpacklo_epi32(__A, __B),
+                                       (__v16si)__W);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_unpacklo_epi32(__mmask16 __U, __m512i __A, __m512i __B)
+{
+  return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
+                                       (__v16si)_mm512_unpacklo_epi32(__A, __B),
+                                       (__v16si)_mm512_setzero_si512());
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_unpackhi_epi64(__m512i __A, __m512i __B)
+{
+  return (__m512i)__builtin_shufflevector((__v8di)__A, (__v8di)__B,
+                                          1, 9, 1+2, 9+2, 1+4, 9+4, 1+6, 9+6);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_unpackhi_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
+{
+  return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
+                                        (__v8di)_mm512_unpackhi_epi64(__A, __B),
+                                        (__v8di)__W);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_unpackhi_epi64(__mmask8 __U, __m512i __A, __m512i __B)
+{
+  return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
+                                        (__v8di)_mm512_unpackhi_epi64(__A, __B),
+                                        (__v8di)_mm512_setzero_si512());
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_unpacklo_epi64 (__m512i __A, __m512i __B)
+{
+  return (__m512i)__builtin_shufflevector((__v8di)__A, (__v8di)__B,
+                                          0, 8, 0+2, 8+2, 0+4, 8+4, 0+6, 8+6);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_unpacklo_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
+{
+  return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
+                                        (__v8di)_mm512_unpacklo_epi64(__A, __B),
+                                        (__v8di)__W);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_unpacklo_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
+{
+  return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
+                                        (__v8di)_mm512_unpacklo_epi64(__A, __B),
+                                        (__v8di)_mm512_setzero_si512());
 }
 
 /* Bit Test */
@@ -2565,6 +4339,13 @@ _mm512_test_epi32_mask(__m512i __A, __m512i __B)
             (__mmask16) -1);
 }
 
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS
+_mm512_mask_test_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
+{
+  return (__mmask16) __builtin_ia32_ptestmd512 ((__v16si) __A,
+                 (__v16si) __B, __U);
+}
+
 static __inline __mmask8 __DEFAULT_FN_ATTRS
 _mm512_test_epi64_mask(__m512i __A, __m512i __B)
 {
@@ -2573,57 +4354,88 @@ _mm512_test_epi64_mask(__m512i __A, __m512i __B)
                  (__mmask8) -1);
 }
 
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
+_mm512_mask_test_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
+{
+  return (__mmask8) __builtin_ia32_ptestmq512 ((__v8di) __A, (__v8di) __B, __U);
+}
+
+
 /* SIMD load ops */
 
 static __inline __m512i __DEFAULT_FN_ATTRS
+_mm512_loadu_si512 (void const *__P)
+{
+  return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *) __P,
+                  (__v16si)
+                  _mm512_setzero_si512 (),
+                  (__mmask16) -1);
+}
+
+static __inline __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_loadu_epi32 (__m512i __W, __mmask16 __U, void const *__P)
+{
+  return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *) __P,
+                  (__v16si) __W,
+                  (__mmask16) __U);
+}
+
+
+static __inline __m512i __DEFAULT_FN_ATTRS
 _mm512_maskz_loadu_epi32(__mmask16 __U, void const *__P)
 {
-  return (__m512i) __builtin_ia32_loaddqusi512_mask ((const __v16si *)__P,
+  return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *)__P,
                                                      (__v16si)
                                                      _mm512_setzero_si512 (),
                                                      (__mmask16) __U);
 }
 
 static __inline __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_loadu_epi64 (__m512i __W, __mmask8 __U, void const *__P)
+{
+  return (__m512i) __builtin_ia32_loaddqudi512_mask ((const long long *) __P,
+                  (__v8di) __W,
+                  (__mmask8) __U);
+}
+
+static __inline __m512i __DEFAULT_FN_ATTRS
 _mm512_maskz_loadu_epi64(__mmask8 __U, void const *__P)
 {
-  return (__m512i) __builtin_ia32_loaddqudi512_mask ((const __v8di *)__P,
+  return (__m512i) __builtin_ia32_loaddqudi512_mask ((const long long *)__P,
                                                      (__v8di)
                                                      _mm512_setzero_si512 (),
                                                      (__mmask8) __U);
 }
 
 static __inline __m512 __DEFAULT_FN_ATTRS
+_mm512_mask_loadu_ps (__m512 __W, __mmask16 __U, void const *__P)
+{
+  return (__m512) __builtin_ia32_loadups512_mask ((const float *) __P,
+                   (__v16sf) __W,
+                   (__mmask16) __U);
+}
+
+static __inline __m512 __DEFAULT_FN_ATTRS
 _mm512_maskz_loadu_ps(__mmask16 __U, void const *__P)
 {
-  return (__m512) __builtin_ia32_loadups512_mask ((const __v16sf *)__P,
+  return (__m512) __builtin_ia32_loadups512_mask ((const float *)__P,
                                                   (__v16sf)
                                                   _mm512_setzero_ps (),
                                                   (__mmask16) __U);
 }
 
 static __inline __m512d __DEFAULT_FN_ATTRS
-_mm512_maskz_loadu_pd(__mmask8 __U, void const *__P)
+_mm512_mask_loadu_pd (__m512d __W, __mmask8 __U, void const *__P)
 {
-  return (__m512d) __builtin_ia32_loadupd512_mask ((const __v8df *)__P,
-                                                   (__v8df)
-                                                   _mm512_setzero_pd (),
-                                                   (__mmask8) __U);
-}
-
-static __inline __m512 __DEFAULT_FN_ATTRS
-_mm512_maskz_load_ps(__mmask16 __U, void const *__P)
-{
-  return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *)__P,
-                                                  (__v16sf)
-                                                  _mm512_setzero_ps (),
-                                                  (__mmask16) __U);
+  return (__m512d) __builtin_ia32_loadupd512_mask ((const double *) __P,
+                (__v8df) __W,
+                (__mmask8) __U);
 }
 
 static __inline __m512d __DEFAULT_FN_ATTRS
-_mm512_maskz_load_pd(__mmask8 __U, void const *__P)
+_mm512_maskz_loadu_pd(__mmask8 __U, void const *__P)
 {
-  return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *)__P,
+  return (__m512d) __builtin_ia32_loadupd512_mask ((const double *)__P,
                                                    (__v8df)
                                                    _mm512_setzero_pd (),
                                                    (__mmask8) __U);
@@ -2648,7 +4460,7 @@ _mm512_loadu_ps(float const *__p)
 }
 
 static __inline __m512 __DEFAULT_FN_ATTRS
-_mm512_load_ps(double const *__p)
+_mm512_load_ps(float const *__p)
 {
   return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *)__p,
                                                   (__v16sf)
@@ -2656,8 +4468,25 @@ _mm512_load_ps(double const *__p)
                                                   (__mmask16) -1);
 }
 
+static __inline __m512 __DEFAULT_FN_ATTRS
+_mm512_mask_load_ps (__m512 __W, __mmask16 __U, void const *__P)
+{
+  return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *) __P,
+                   (__v16sf) __W,
+                   (__mmask16) __U);
+}
+
+static __inline __m512 __DEFAULT_FN_ATTRS
+_mm512_maskz_load_ps(__mmask16 __U, void const *__P)
+{
+  return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *)__P,
+                                                  (__v16sf)
+                                                  _mm512_setzero_ps (),
+                                                  (__mmask16) __U);
+}
+
 static __inline __m512d __DEFAULT_FN_ATTRS
-_mm512_load_pd(float const *__p)
+_mm512_load_pd(double const *__p)
 {
   return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *)__p,
                                                    (__v8df)
@@ -2665,45 +4494,87 @@ _mm512_load_pd(float const *__p)
                                                    (__mmask8) -1);
 }
 
+static __inline __m512d __DEFAULT_FN_ATTRS
+_mm512_mask_load_pd (__m512d __W, __mmask8 __U, void const *__P)
+{
+  return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *) __P,
+                          (__v8df) __W,
+                          (__mmask8) __U);
+}
+
+static __inline __m512d __DEFAULT_FN_ATTRS
+_mm512_maskz_load_pd(__mmask8 __U, void const *__P)
+{
+  return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *)__P,
+                                                   (__v8df)
+                                                   _mm512_setzero_pd (),
+                                                   (__mmask8) __U);
+}
+
+static __inline __m512i __DEFAULT_FN_ATTRS
+_mm512_load_si512 (void const *__P)
+{
+  return *(__m512i *) __P;
+}
+
+static __inline __m512i __DEFAULT_FN_ATTRS
+_mm512_load_epi32 (void const *__P)
+{
+  return *(__m512i *) __P;
+}
+
+static __inline __m512i __DEFAULT_FN_ATTRS
+_mm512_load_epi64 (void const *__P)
+{
+  return *(__m512i *) __P;
+}
+
 /* SIMD store ops */
 
 static __inline void __DEFAULT_FN_ATTRS
 _mm512_mask_storeu_epi64(void *__P, __mmask8 __U, __m512i __A)
 {
-  __builtin_ia32_storedqudi512_mask ((__v8di *)__P, (__v8di) __A,
+  __builtin_ia32_storedqudi512_mask ((long long *)__P, (__v8di) __A,
                                      (__mmask8) __U);
 }
 
 static __inline void __DEFAULT_FN_ATTRS
+_mm512_storeu_si512 (void *__P, __m512i __A)
+{
+  __builtin_ia32_storedqusi512_mask ((int *) __P, (__v16si) __A,
+            (__mmask16) -1);
+}
+
+static __inline void __DEFAULT_FN_ATTRS
 _mm512_mask_storeu_epi32(void *__P, __mmask16 __U, __m512i __A)
 {
-  __builtin_ia32_storedqusi512_mask ((__v16si *)__P, (__v16si) __A,
+  __builtin_ia32_storedqusi512_mask ((int *)__P, (__v16si) __A,
                                      (__mmask16) __U);
 }
 
 static __inline void __DEFAULT_FN_ATTRS
 _mm512_mask_storeu_pd(void *__P, __mmask8 __U, __m512d __A)
 {
-  __builtin_ia32_storeupd512_mask ((__v8df *)__P, (__v8df) __A, (__mmask8) __U);
+  __builtin_ia32_storeupd512_mask ((double *)__P, (__v8df) __A, (__mmask8) __U);
 }
 
 static __inline void __DEFAULT_FN_ATTRS
 _mm512_storeu_pd(void *__P, __m512d __A)
 {
-  __builtin_ia32_storeupd512_mask((__v8df *)__P, (__v8df)__A, (__mmask8)-1);
+  __builtin_ia32_storeupd512_mask((double *)__P, (__v8df)__A, (__mmask8)-1);
 }
 
 static __inline void __DEFAULT_FN_ATTRS
 _mm512_mask_storeu_ps(void *__P, __mmask16 __U, __m512 __A)
 {
-  __builtin_ia32_storeups512_mask ((__v16sf *)__P, (__v16sf) __A,
+  __builtin_ia32_storeups512_mask ((float *)__P, (__v16sf) __A,
                                    (__mmask16) __U);
 }
 
 static __inline void __DEFAULT_FN_ATTRS
 _mm512_storeu_ps(void *__P, __m512 __A)
 {
-  __builtin_ia32_storeups512_mask((__v16sf *)__P, (__v16sf)__A, (__mmask16)-1);
+  __builtin_ia32_storeups512_mask((float *)__P, (__v16sf)__A, (__mmask16)-1);
 }
 
 static __inline void __DEFAULT_FN_ATTRS
@@ -2731,6 +4602,24 @@ _mm512_store_ps(void *__P, __m512 __A)
   *(__m512*)__P = __A;
 }
 
+static __inline void __DEFAULT_FN_ATTRS
+_mm512_store_si512 (void *__P, __m512i __A)
+{
+  *(__m512i *) __P = __A;
+}
+
+static __inline void __DEFAULT_FN_ATTRS
+_mm512_store_epi32 (void *__P, __m512i __A)
+{
+  *(__m512i *) __P = __A;
+}
+
+static __inline void __DEFAULT_FN_ATTRS
+_mm512_store_epi64 (void *__P, __m512i __A)
+{
+  *(__m512i *) __P = __A;
+}
+
 /* Mask ops */
 
 static __inline __mmask16 __DEFAULT_FN_ATTRS
@@ -3029,46 +4918,4625 @@ _mm512_mask_cmpneq_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
                                                 __u);
 }
 
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_cvtepi8_epi32 (__m128i __A)
+{
+  return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A,
+                (__v16si)
+                _mm512_setzero_si512 (),
+                (__mmask16) -1);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_cvtepi8_epi32 (__m512i __W, __mmask16 __U, __m128i __A)
+{
+  return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A,
+                (__v16si) __W,
+                (__mmask16) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_cvtepi8_epi32 (__mmask16 __U, __m128i __A)
+{
+  return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A,
+                (__v16si)
+                _mm512_setzero_si512 (),
+                (__mmask16) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_cvtepi8_epi64 (__m128i __A)
+{
+  return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A,
+                (__v8di)
+                _mm512_setzero_si512 (),
+                (__mmask8) -1);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_cvtepi8_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
+{
+  return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A,
+                (__v8di) __W,
+                (__mmask8) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_cvtepi8_epi64 (__mmask8 __U, __m128i __A)
+{
+  return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A,
+                (__v8di)
+                _mm512_setzero_si512 (),
+                (__mmask8) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_cvtepi32_epi64 (__m256i __X)
+{
+  return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X,
+                (__v8di)
+                _mm512_setzero_si512 (),
+                (__mmask8) -1);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_cvtepi32_epi64 (__m512i __W, __mmask8 __U, __m256i __X)
+{
+  return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X,
+                (__v8di) __W,
+                (__mmask8) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_cvtepi32_epi64 (__mmask8 __U, __m256i __X)
+{
+  return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X,
+                (__v8di)
+                _mm512_setzero_si512 (),
+                (__mmask8) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_cvtepi16_epi32 (__m256i __A)
+{
+  return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A,
+                (__v16si)
+                _mm512_setzero_si512 (),
+                (__mmask16) -1);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_cvtepi16_epi32 (__m512i __W, __mmask16 __U, __m256i __A)
+{
+  return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A,
+                (__v16si) __W,
+                (__mmask16) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_cvtepi16_epi32 (__mmask16 __U, __m256i __A)
+{
+  return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A,
+                (__v16si)
+                _mm512_setzero_si512 (),
+                (__mmask16) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_cvtepi16_epi64 (__m128i __A)
+{
+  return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A,
+                (__v8di)
+                _mm512_setzero_si512 (),
+                (__mmask8) -1);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_cvtepi16_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
+{
+  return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A,
+                (__v8di) __W,
+                (__mmask8) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_cvtepi16_epi64 (__mmask8 __U, __m128i __A)
+{
+  return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A,
+                (__v8di)
+                _mm512_setzero_si512 (),
+                (__mmask8) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_cvtepu8_epi32 (__m128i __A)
+{
+  return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A,
+                (__v16si)
+                _mm512_setzero_si512 (),
+                (__mmask16) -1);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_cvtepu8_epi32 (__m512i __W, __mmask16 __U, __m128i __A)
+{
+  return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A,
+                (__v16si) __W,
+                (__mmask16) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_cvtepu8_epi32 (__mmask16 __U, __m128i __A)
+{
+  return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A,
+                (__v16si)
+                _mm512_setzero_si512 (),
+                (__mmask16) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_cvtepu8_epi64 (__m128i __A)
+{
+  return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A,
+                (__v8di)
+                _mm512_setzero_si512 (),
+                (__mmask8) -1);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_cvtepu8_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
+{
+  return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A,
+                (__v8di) __W,
+                (__mmask8) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_cvtepu8_epi64 (__mmask8 __U, __m128i __A)
+{
+  return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A,
+                (__v8di)
+                _mm512_setzero_si512 (),
+                (__mmask8) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_cvtepu32_epi64 (__m256i __X)
+{
+  return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X,
+                (__v8di)
+                _mm512_setzero_si512 (),
+                (__mmask8) -1);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_cvtepu32_epi64 (__m512i __W, __mmask8 __U, __m256i __X)
+{
+  return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X,
+                (__v8di) __W,
+                (__mmask8) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_cvtepu32_epi64 (__mmask8 __U, __m256i __X)
+{
+  return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X,
+                (__v8di)
+                _mm512_setzero_si512 (),
+                (__mmask8) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_cvtepu16_epi32 (__m256i __A)
+{
+  return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A,
+                (__v16si)
+                _mm512_setzero_si512 (),
+                (__mmask16) -1);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_cvtepu16_epi32 (__m512i __W, __mmask16 __U, __m256i __A)
+{
+  return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A,
+                (__v16si) __W,
+                (__mmask16) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_cvtepu16_epi32 (__mmask16 __U, __m256i __A)
+{
+  return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A,
+                (__v16si)
+                _mm512_setzero_si512 (),
+                (__mmask16) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_cvtepu16_epi64 (__m128i __A)
+{
+  return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A,
+                (__v8di)
+                _mm512_setzero_si512 (),
+                (__mmask8) -1);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_cvtepu16_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
+{
+  return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A,
+                (__v8di) __W,
+                (__mmask8) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_cvtepu16_epi64 (__mmask8 __U, __m128i __A)
+{
+  return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A,
+                (__v8di)
+                _mm512_setzero_si512 (),
+                (__mmask8) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_rorv_epi32 (__m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
+              (__v16si) __B,
+              (__v16si)
+              _mm512_setzero_si512 (),
+              (__mmask16) -1);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_rorv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
+              (__v16si) __B,
+              (__v16si) __W,
+              (__mmask16) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_rorv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
+              (__v16si) __B,
+              (__v16si)
+              _mm512_setzero_si512 (),
+              (__mmask16) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_rorv_epi64 (__m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
+              (__v8di) __B,
+              (__v8di)
+              _mm512_setzero_si512 (),
+              (__mmask8) -1);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_rorv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
+              (__v8di) __B,
+              (__v8di) __W,
+              (__mmask8) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_rorv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
+              (__v8di) __B,
+              (__v8di)
+              _mm512_setzero_si512 (),
+              (__mmask8) __U);
+}
+
+
+
 #define _mm512_cmp_epi32_mask(a, b, p) __extension__ ({ \
   (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)(__m512i)(a), \
-                                         (__v16si)(__m512i)(b), (p), \
+                                         (__v16si)(__m512i)(b), (int)(p), \
                                          (__mmask16)-1); })
 
 #define _mm512_cmp_epu32_mask(a, b, p) __extension__ ({ \
   (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)(__m512i)(a), \
-                                          (__v16si)(__m512i)(b), (p), \
+                                          (__v16si)(__m512i)(b), (int)(p), \
                                           (__mmask16)-1); })
 
 #define _mm512_cmp_epi64_mask(a, b, p) __extension__ ({ \
   (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)(__m512i)(a), \
-                                        (__v8di)(__m512i)(b), (p), \
+                                        (__v8di)(__m512i)(b), (int)(p), \
                                         (__mmask8)-1); })
 
 #define _mm512_cmp_epu64_mask(a, b, p) __extension__ ({ \
   (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)(__m512i)(a), \
-                                         (__v8di)(__m512i)(b), (p), \
+                                         (__v8di)(__m512i)(b), (int)(p), \
                                          (__mmask8)-1); })
 
 #define _mm512_mask_cmp_epi32_mask(m, a, b, p) __extension__ ({ \
   (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)(__m512i)(a), \
-                                         (__v16si)(__m512i)(b), (p), \
+                                         (__v16si)(__m512i)(b), (int)(p), \
                                          (__mmask16)(m)); })
 
 #define _mm512_mask_cmp_epu32_mask(m, a, b, p) __extension__ ({ \
   (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)(__m512i)(a), \
-                                          (__v16si)(__m512i)(b), (p), \
+                                          (__v16si)(__m512i)(b), (int)(p), \
                                           (__mmask16)(m)); })
 
 #define _mm512_mask_cmp_epi64_mask(m, a, b, p) __extension__ ({ \
   (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)(__m512i)(a), \
-                                        (__v8di)(__m512i)(b), (p), \
+                                        (__v8di)(__m512i)(b), (int)(p), \
                                         (__mmask8)(m)); })
 
 #define _mm512_mask_cmp_epu64_mask(m, a, b, p) __extension__ ({ \
   (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)(__m512i)(a), \
-                                         (__v8di)(__m512i)(b), (p), \
+                                         (__v8di)(__m512i)(b), (int)(p), \
                                          (__mmask8)(m)); })
 
+#define _mm512_rol_epi32(a, b) __extension__ ({ \
+  (__m512i)__builtin_ia32_prold512_mask((__v16si)(__m512i)(a), (int)(b), \
+                                        (__v16si)_mm512_setzero_si512(), \
+                                        (__mmask16)-1); })
+
+#define _mm512_mask_rol_epi32(W, U, a, b) __extension__ ({ \
+  (__m512i)__builtin_ia32_prold512_mask((__v16si)(__m512i)(a), (int)(b), \
+                                        (__v16si)(__m512i)(W), \
+                                        (__mmask16)(U)); })
+
+#define _mm512_maskz_rol_epi32(U, a, b) __extension__ ({ \
+  (__m512i)__builtin_ia32_prold512_mask((__v16si)(__m512i)(a), (int)(b), \
+                                        (__v16si)_mm512_setzero_si512(), \
+                                        (__mmask16)(U)); })
+
+#define _mm512_rol_epi64(a, b) __extension__ ({ \
+  (__m512i)__builtin_ia32_prolq512_mask((__v8di)(__m512i)(a), (int)(b), \
+                                        (__v8di)_mm512_setzero_si512(), \
+                                        (__mmask8)-1); })
+
+#define _mm512_mask_rol_epi64(W, U, a, b) __extension__ ({ \
+  (__m512i)__builtin_ia32_prolq512_mask((__v8di)(__m512i)(a), (int)(b), \
+                                        (__v8di)(__m512i)(W), (__mmask8)(U)); })
+
+#define _mm512_maskz_rol_epi64(U, a, b) __extension__ ({ \
+  (__m512i)__builtin_ia32_prolq512_mask((__v8di)(__m512i)(a), (int)(b), \
+                                        (__v8di)_mm512_setzero_si512(), \
+                                        (__mmask8)(U)); })
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_rolv_epi32 (__m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
+              (__v16si) __B,
+              (__v16si)
+              _mm512_setzero_si512 (),
+              (__mmask16) -1);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_rolv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
+              (__v16si) __B,
+              (__v16si) __W,
+              (__mmask16) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_rolv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
+              (__v16si) __B,
+              (__v16si)
+              _mm512_setzero_si512 (),
+              (__mmask16) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_rolv_epi64 (__m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
+              (__v8di) __B,
+              (__v8di)
+              _mm512_setzero_si512 (),
+              (__mmask8) -1);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_rolv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
+              (__v8di) __B,
+              (__v8di) __W,
+              (__mmask8) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_rolv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
+              (__v8di) __B,
+              (__v8di)
+              _mm512_setzero_si512 (),
+              (__mmask8) __U);
+}
+
+#define _mm512_ror_epi32(A, B) __extension__ ({ \
+  (__m512i)__builtin_ia32_prord512_mask((__v16si)(__m512i)(A), (int)(B), \
+                                        (__v16si)_mm512_setzero_si512(), \
+                                        (__mmask16)-1); })
+
+#define _mm512_mask_ror_epi32(W, U, A, B) __extension__ ({ \
+  (__m512i)__builtin_ia32_prord512_mask((__v16si)(__m512i)(A), (int)(B), \
+                                        (__v16si)(__m512i)(W), \
+                                        (__mmask16)(U)); })
+
+#define _mm512_maskz_ror_epi32(U, A, B) __extension__ ({ \
+  (__m512i)__builtin_ia32_prord512_mask((__v16si)(__m512i)(A), (int)(B), \
+                                        (__v16si)_mm512_setzero_si512(), \
+                                        (__mmask16)(U)); })
+
+#define _mm512_ror_epi64(A, B) __extension__ ({ \
+  (__m512i)__builtin_ia32_prorq512_mask((__v8di)(__m512i)(A), (int)(B), \
+                                        (__v8di)_mm512_setzero_si512(), \
+                                        (__mmask8)-1); })
+
+#define _mm512_mask_ror_epi64(W, U, A, B) __extension__ ({ \
+  (__m512i)__builtin_ia32_prorq512_mask((__v8di)(__m512i)(A), (int)(B), \
+                                        (__v8di)(__m512i)(W), (__mmask8)(U)); })
+
+#define _mm512_maskz_ror_epi64(U, A, B) __extension__ ({ \
+  (__m512i)__builtin_ia32_prorq512_mask((__v8di)(__m512i)(A), (int)(B), \
+                                        (__v8di)_mm512_setzero_si512(), \
+                                        (__mmask8)(U)); })
+
+#define _mm512_slli_epi32(A, B) __extension__ ({ \
+  (__m512i)__builtin_ia32_pslldi512_mask((__v16si)(__m512i)(A), (int)(B), \
+                                         (__v16si)_mm512_setzero_si512(), \
+                                         (__mmask16)-1); })
+
+#define _mm512_mask_slli_epi32(W, U, A, B) __extension__ ({ \
+  (__m512i)__builtin_ia32_pslldi512_mask((__v16si)(__m512i)(A), (int)(B), \
+                                         (__v16si)(__m512i)(W), \
+                                         (__mmask16)(U)); })
+
+#define _mm512_maskz_slli_epi32(U, A, B) __extension__ ({ \
+  (__m512i)__builtin_ia32_pslldi512_mask((__v16si)(__m512i)(A), (int)(B), \
+                                         (__v16si)_mm512_setzero_si512(), \
+                                         (__mmask16)(U)); })
+
+#define _mm512_slli_epi64(A, B) __extension__ ({ \
+  (__m512i)__builtin_ia32_psllqi512_mask((__v8di)(__m512i)(A), (int)(B), \
+                                         (__v8di)_mm512_setzero_si512(), \
+                                         (__mmask8)-1); })
+
+#define _mm512_mask_slli_epi64(W, U, A, B) __extension__ ({ \
+  (__m512i)__builtin_ia32_psllqi512_mask((__v8di)(__m512i)(A), (int)(B), \
+                                         (__v8di)(__m512i)(W), \
+                                         (__mmask8)(U)); })
+
+#define _mm512_maskz_slli_epi64(U, A, B) __extension__ ({ \
+  (__m512i)__builtin_ia32_psllqi512_mask((__v8di)(__m512i)(A), (int)(B), \
+                                         (__v8di)_mm512_setzero_si512(), \
+                                         (__mmask8)(U)); })
+
+
+
+#define _mm512_srli_epi32(A, B) __extension__ ({ \
+  (__m512i)__builtin_ia32_psrldi512_mask((__v16si)(__m512i)(A), (int)(B), \
+                                         (__v16si)_mm512_setzero_si512(), \
+                                         (__mmask16)-1); })
+
+#define _mm512_mask_srli_epi32(W, U, A, B) __extension__ ({ \
+  (__m512i)__builtin_ia32_psrldi512_mask((__v16si)(__m512i)(A), (int)(B), \
+                                         (__v16si)(__m512i)(W), \
+                                         (__mmask16)(U)); })
+
+#define _mm512_maskz_srli_epi32(U, A, B) __extension__ ({ \
+  (__m512i)__builtin_ia32_psrldi512_mask((__v16si)(__m512i)(A), (int)(B), \
+                                         (__v16si)_mm512_setzero_si512(), \
+                                         (__mmask16)(U)); })
+
+#define _mm512_srli_epi64(A, B) __extension__ ({ \
+  (__m512i)__builtin_ia32_psrlqi512_mask((__v8di)(__m512i)(A), (int)(B), \
+                                         (__v8di)_mm512_setzero_si512(), \
+                                         (__mmask8)-1); })
+
+#define _mm512_mask_srli_epi64(W, U, A, B) __extension__ ({ \
+  (__m512i)__builtin_ia32_psrlqi512_mask((__v8di)(__m512i)(A), (int)(B), \
+                                         (__v8di)(__m512i)(W), \
+                                         (__mmask8)(U)); })
+
+#define _mm512_maskz_srli_epi64(U, A, B) __extension__ ({ \
+  (__m512i)__builtin_ia32_psrlqi512_mask((__v8di)(__m512i)(A), (int)(B), \
+                                         (__v8di)_mm512_setzero_si512(), \
+                                         (__mmask8)(U)); })
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_load_epi32 (__m512i __W, __mmask16 __U, void const *__P)
+{
+  return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
+              (__v16si) __W,
+              (__mmask16) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_load_epi32 (__mmask16 __U, void const *__P)
+{
+  return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
+              (__v16si)
+              _mm512_setzero_si512 (),
+              (__mmask16) __U);
+}
+
+static __inline__ void __DEFAULT_FN_ATTRS
+_mm512_mask_store_epi32 (void *__P, __mmask16 __U, __m512i __A)
+{
+  __builtin_ia32_movdqa32store512_mask ((__v16si *) __P, (__v16si) __A,
+          (__mmask16) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_mov_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
+{
+  return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U,
+                 (__v16si) __A,
+                 (__v16si) __W);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_mov_epi32 (__mmask16 __U, __m512i __A)
+{
+  return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U,
+                 (__v16si) __A,
+                 (__v16si) _mm512_setzero_si512 ());
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_mov_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
+{
+  return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U,
+                 (__v8di) __A,
+                 (__v8di) __W);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_mov_epi64 (__mmask8 __U, __m512i __A)
+{
+  return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U,
+                 (__v8di) __A,
+                 (__v8di) _mm512_setzero_si512 ());
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_load_epi64 (__m512i __W, __mmask8 __U, void const *__P)
+{
+  return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
+              (__v8di) __W,
+              (__mmask8) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_load_epi64 (__mmask8 __U, void const *__P)
+{
+  return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
+              (__v8di)
+              _mm512_setzero_si512 (),
+              (__mmask8) __U);
+}
+
+static __inline__ void __DEFAULT_FN_ATTRS
+_mm512_mask_store_epi64 (void *__P, __mmask8 __U, __m512i __A)
+{
+  __builtin_ia32_movdqa64store512_mask ((__v8di *) __P, (__v8di) __A,
+          (__mmask8) __U);
+}
+
+static __inline__ __m512d __DEFAULT_FN_ATTRS
+_mm512_movedup_pd (__m512d __A)
+{
+  return (__m512d)__builtin_shufflevector((__v8df)__A, (__v8df)__A,
+                                          0, 0, 2, 2, 4, 4, 6, 6);
+}
+
+static __inline__ __m512d __DEFAULT_FN_ATTRS
+_mm512_mask_movedup_pd (__m512d __W, __mmask8 __U, __m512d __A)
+{
+  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
+                                              (__v8df)_mm512_movedup_pd(__A),
+                                              (__v8df)__W);
+}
+
+static __inline__ __m512d __DEFAULT_FN_ATTRS
+_mm512_maskz_movedup_pd (__mmask8 __U, __m512d __A)
+{
+  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
+                                              (__v8df)_mm512_movedup_pd(__A),
+                                              (__v8df)_mm512_setzero_pd());
+}
+
+#define _mm512_fixupimm_round_pd(A, B, C, imm, R) __extension__ ({ \
+  (__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
+                                             (__v8df)(__m512d)(B), \
+                                             (__v8di)(__m512i)(C), (int)(imm), \
+                                             (__mmask8)-1, (int)(R)); })
+
+#define _mm512_mask_fixupimm_round_pd(A, U, B, C, imm, R) __extension__ ({ \
+  (__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
+                                             (__v8df)(__m512d)(B), \
+                                             (__v8di)(__m512i)(C), (int)(imm), \
+                                             (__mmask8)(U), (int)(R)); })
+
+#define _mm512_fixupimm_pd(A, B, C, imm) __extension__ ({ \
+  (__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
+                                             (__v8df)(__m512d)(B), \
+                                             (__v8di)(__m512i)(C), (int)(imm), \
+                                             (__mmask8)-1, \
+                                             _MM_FROUND_CUR_DIRECTION); })
+
+#define _mm512_mask_fixupimm_pd(A, U, B, C, imm) __extension__ ({ \
+  (__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
+                                             (__v8df)(__m512d)(B), \
+                                             (__v8di)(__m512i)(C), (int)(imm), \
+                                             (__mmask8)(U), \
+                                             _MM_FROUND_CUR_DIRECTION); })
+
+#define _mm512_maskz_fixupimm_round_pd(U, A, B, C, imm, R) __extension__ ({ \
+  (__m512d)__builtin_ia32_fixupimmpd512_maskz((__v8df)(__m512d)(A), \
+                                              (__v8df)(__m512d)(B), \
+                                              (__v8di)(__m512i)(C), \
+                                              (int)(imm), (__mmask8)(U), \
+                                              (int)(R)); })
+
+#define _mm512_maskz_fixupimm_pd(U, A, B, C, imm) __extension__ ({ \
+  (__m512d)__builtin_ia32_fixupimmpd512_maskz((__v8df)(__m512d)(A), \
+                                              (__v8df)(__m512d)(B), \
+                                              (__v8di)(__m512i)(C), \
+                                              (int)(imm), (__mmask8)(U), \
+                                              _MM_FROUND_CUR_DIRECTION); })
+
+#define _mm512_fixupimm_round_ps(A, B, C, imm, R) __extension__ ({ \
+  (__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
+                                            (__v16sf)(__m512)(B), \
+                                            (__v16si)(__m512i)(C), (int)(imm), \
+                                            (__mmask16)-1, (int)(R)); })
+
+#define _mm512_mask_fixupimm_round_ps(A, U, B, C, imm, R) __extension__ ({ \
+  (__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
+                                            (__v16sf)(__m512)(B), \
+                                            (__v16si)(__m512i)(C), (int)(imm), \
+                                            (__mmask16)(U), (int)(R)); })
+
+#define _mm512_fixupimm_ps(A, B, C, imm) __extension__ ({ \
+  (__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
+                                            (__v16sf)(__m512)(B), \
+                                            (__v16si)(__m512i)(C), (int)(imm), \
+                                            (__mmask16)-1, \
+                                            _MM_FROUND_CUR_DIRECTION); })
+
+#define _mm512_mask_fixupimm_ps(A, U, B, C, imm) __extension__ ({ \
+  (__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
+                                            (__v16sf)(__m512)(B), \
+                                            (__v16si)(__m512i)(C), (int)(imm), \
+                                            (__mmask16)(U), \
+                                            _MM_FROUND_CUR_DIRECTION); })
+
+#define _mm512_maskz_fixupimm_round_ps(U, A, B, C, imm, R) __extension__ ({ \
+  (__m512)__builtin_ia32_fixupimmps512_maskz((__v16sf)(__m512)(A), \
+                                             (__v16sf)(__m512)(B), \
+                                             (__v16si)(__m512i)(C), \
+                                             (int)(imm), (__mmask16)(U), \
+                                             (int)(R)); })
+
+#define _mm512_maskz_fixupimm_ps(U, A, B, C, imm) __extension__ ({ \
+  (__m512)__builtin_ia32_fixupimmps512_maskz((__v16sf)(__m512)(A), \
+                                             (__v16sf)(__m512)(B), \
+                                             (__v16si)(__m512i)(C), \
+                                             (int)(imm), (__mmask16)(U), \
+                                             _MM_FROUND_CUR_DIRECTION); })
+
+#define _mm_fixupimm_round_sd(A, B, C, imm, R) __extension__ ({ \
+  (__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
+                                          (__v2df)(__m128d)(B), \
+                                          (__v2di)(__m128i)(C), (int)(imm), \
+                                          (__mmask8)-1, (int)(R)); })
+
+#define _mm_mask_fixupimm_round_sd(A, U, B, C, imm, R) __extension__ ({ \
+  (__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
+                                          (__v2df)(__m128d)(B), \
+                                          (__v2di)(__m128i)(C), (int)(imm), \
+                                          (__mmask8)(U), (int)(R)); })
+
+#define _mm_fixupimm_sd(A, B, C, imm) __extension__ ({ \
+  (__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
+                                          (__v2df)(__m128d)(B), \
+                                          (__v2di)(__m128i)(C), (int)(imm), \
+                                          (__mmask8)-1, \
+                                          _MM_FROUND_CUR_DIRECTION); })
+
+#define _mm_mask_fixupimm_sd(A, U, B, C, imm) __extension__ ({ \
+  (__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
+                                          (__v2df)(__m128d)(B), \
+                                          (__v2di)(__m128i)(C), (int)(imm), \
+                                          (__mmask8)(U), \
+                                          _MM_FROUND_CUR_DIRECTION); })
+
+#define _mm_maskz_fixupimm_round_sd(U, A, B, C, imm, R) __extension__ ({ \
+  (__m128d)__builtin_ia32_fixupimmsd_maskz((__v2df)(__m128d)(A), \
+                                           (__v2df)(__m128d)(B), \
+                                           (__v2di)(__m128i)(C), (int)(imm), \
+                                           (__mmask8)(U), (int)(R)); })
+
+#define _mm_maskz_fixupimm_sd(U, A, B, C, imm) __extension__ ({ \
+  (__m128d)__builtin_ia32_fixupimmsd_maskz((__v2df)(__m128d)(A), \
+                                           (__v2df)(__m128d)(B), \
+                                           (__v2di)(__m128i)(C), (int)(imm), \
+                                           (__mmask8)(U), \
+                                           _MM_FROUND_CUR_DIRECTION); })
+
+#define _mm_fixupimm_round_ss(A, B, C, imm, R) __extension__ ({ \
+  (__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
+                                         (__v4sf)(__m128)(B), \
+                                         (__v4si)(__m128i)(C), (int)(imm), \
+                                         (__mmask8)-1, (int)(R)); })
+
+#define _mm_mask_fixupimm_round_ss(A, U, B, C, imm, R) __extension__ ({ \
+  (__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
+                                         (__v4sf)(__m128)(B), \
+                                         (__v4si)(__m128i)(C), (int)(imm), \
+                                         (__mmask8)(U), (int)(R)); })
+
+#define _mm_fixupimm_ss(A, B, C, imm) __extension__ ({ \
+  (__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
+                                         (__v4sf)(__m128)(B), \
+                                         (__v4si)(__m128i)(C), (int)(imm), \
+                                         (__mmask8)-1, \
+                                         _MM_FROUND_CUR_DIRECTION); })
+
+#define _mm_mask_fixupimm_ss(A, U, B, C, imm) __extension__ ({ \
+  (__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
+                                         (__v4sf)(__m128)(B), \
+                                         (__v4si)(__m128i)(C), (int)(imm), \
+                                         (__mmask8)(U), \
+                                         _MM_FROUND_CUR_DIRECTION); })
+
+#define _mm_maskz_fixupimm_round_ss(U, A, B, C, imm, R) __extension__ ({ \
+  (__m128)__builtin_ia32_fixupimmss_maskz((__v4sf)(__m128)(A), \
+                                          (__v4sf)(__m128)(B), \
+                                          (__v4si)(__m128i)(C), (int)(imm), \
+                                          (__mmask8)(U), (int)(R)); })
+
+#define _mm_maskz_fixupimm_ss(U, A, B, C, imm) __extension__ ({ \
+  (__m128)__builtin_ia32_fixupimmss_maskz((__v4sf)(__m128)(A), \
+                                          (__v4sf)(__m128)(B), \
+                                          (__v4si)(__m128i)(C), (int)(imm), \
+                                          (__mmask8)(U), \
+                                          _MM_FROUND_CUR_DIRECTION); })
+
+#define _mm_getexp_round_sd(A, B, R) __extension__ ({ \
+  (__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \
+                                                 (__v2df)(__m128d)(B), \
+                                                 (__v2df)_mm_setzero_pd(), \
+                                                 (__mmask8)-1, (int)(R)); })
+
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_getexp_sd (__m128d __A, __m128d __B)
+{
+  return (__m128d) __builtin_ia32_getexpsd128_round_mask ((__v2df) __A,
+                 (__v2df) __B, (__v2df) _mm_setzero_pd(), (__mmask8) -1, _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_mask_getexp_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_getexpsd128_round_mask ( (__v2df) __A,
+          (__v2df) __B,
+          (__v2df) __W,
+          (__mmask8) __U,
+          _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm_mask_getexp_round_sd(W, U, A, B, R) __extension__ ({\
+  (__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \
+                                                 (__v2df)(__m128d)(B), \
+                                                 (__v2df)(__m128d)(W), \
+                                                 (__mmask8)(U), (int)(R)); })
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_maskz_getexp_sd (__mmask8 __U, __m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_getexpsd128_round_mask ( (__v2df) __A,
+          (__v2df) __B,
+          (__v2df) _mm_setzero_pd (),
+          (__mmask8) __U,
+          _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm_maskz_getexp_round_sd(U, A, B, R) __extension__ ({\
+  (__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \
+                                                 (__v2df)(__m128d)(B), \
+                                                 (__v2df)_mm_setzero_pd(), \
+                                                 (__mmask8)(U), (int)(R)); })
+
+#define _mm_getexp_round_ss(A, B, R) __extension__ ({ \
+  (__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \
+                                                (__v4sf)(__m128)(B), \
+                                                (__v4sf)_mm_setzero_ps(), \
+                                                (__mmask8)-1, (int)(R)); })
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_getexp_ss (__m128 __A, __m128 __B)
+{
+  return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A,
+                (__v4sf) __B, (__v4sf)  _mm_setzero_ps(), (__mmask8) -1, _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_mask_getexp_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A,
+          (__v4sf) __B,
+          (__v4sf) __W,
+          (__mmask8) __U,
+          _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm_mask_getexp_round_ss(W, U, A, B, R) __extension__ ({\
+  (__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \
+                                                (__v4sf)(__m128)(B), \
+                                                (__v4sf)(__m128)(W), \
+                                                (__mmask8)(U), (int)(R)); })
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_maskz_getexp_ss (__mmask8 __U, __m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A,
+          (__v4sf) __B,
+          (__v4sf) _mm_setzero_pd (),
+          (__mmask8) __U,
+          _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm_maskz_getexp_round_ss(U, A, B, R) __extension__ ({\
+  (__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \
+                                                (__v4sf)(__m128)(B), \
+                                                (__v4sf)_mm_setzero_ps(), \
+                                                (__mmask8)(U), (int)(R)); })
+
+#define _mm_getmant_round_sd(A, B, C, D, R) __extension__ ({ \
+  (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
+                                               (__v2df)(__m128d)(B), \
+                                               (int)(((D)<<2) | (C)), \
+                                               (__v2df)_mm_setzero_pd(), \
+                                               (__mmask8)-1, (int)(R)); })
+
+#define _mm_getmant_sd(A, B, C, D)  __extension__ ({ \
+  (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
+                                               (__v2df)(__m128d)(B), \
+                                               (int)(((D)<<2) | (C)), \
+                                               (__v2df)_mm_setzero_pd(), \
+                                               (__mmask8)-1, \
+                                               _MM_FROUND_CUR_DIRECTION); })
+
+#define _mm_mask_getmant_sd(W, U, A, B, C, D) __extension__ ({\
+  (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
+                                               (__v2df)(__m128d)(B), \
+                                               (int)(((D)<<2) | (C)), \
+                                               (__v2df)(__m128d)(W), \
+                                               (__mmask8)(U), \
+                                               _MM_FROUND_CUR_DIRECTION); })
+
+#define _mm_mask_getmant_round_sd(W, U, A, B, C, D, R)({\
+  (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
+                                               (__v2df)(__m128d)(B), \
+                                               (int)(((D)<<2) | (C)), \
+                                               (__v2df)(__m128d)(W), \
+                                               (__mmask8)(U), (int)(R)); })
+
+#define _mm_maskz_getmant_sd(U, A, B, C, D) __extension__ ({\
+  (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
+                                               (__v2df)(__m128d)(B), \
+                                               (int)(((D)<<2) | (C)), \
+                                               (__v2df)_mm_setzero_pd(), \
+                                               (__mmask8)(U), \
+                                               _MM_FROUND_CUR_DIRECTION); })
+
+#define _mm_maskz_getmant_round_sd(U, A, B, C, D, R) __extension__ ({\
+  (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
+                                               (__v2df)(__m128d)(B), \
+                                               (int)(((D)<<2) | (C)), \
+                                               (__v2df)_mm_setzero_pd(), \
+                                               (__mmask8)(U), (int)(R)); })
+
+#define _mm_getmant_round_ss(A, B, C, D, R) __extension__ ({ \
+  (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
+                                              (__v4sf)(__m128)(B), \
+                                              (int)(((D)<<2) | (C)), \
+                                              (__v4sf)_mm_setzero_ps(), \
+                                              (__mmask8)-1, (int)(R)); })
+
+#define _mm_getmant_ss(A, B, C, D) __extension__ ({ \
+  (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
+                                              (__v4sf)(__m128)(B), \
+                                              (int)(((D)<<2) | (C)), \
+                                              (__v4sf)_mm_setzero_ps(), \
+                                              (__mmask8)-1, \
+                                              _MM_FROUND_CUR_DIRECTION); })
+
+#define _mm_mask_getmant_ss(W, U, A, B, C, D) __extension__ ({\
+  (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
+                                              (__v4sf)(__m128)(B), \
+                                              (int)(((D)<<2) | (C)), \
+                                              (__v4sf)(__m128)(W), \
+                                              (__mmask8)(U), \
+                                              _MM_FROUND_CUR_DIRECTION); })
+
+#define _mm_mask_getmant_round_ss(W, U, A, B, C, D, R)({\
+  (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
+                                              (__v4sf)(__m128)(B), \
+                                              (int)(((D)<<2) | (C)), \
+                                              (__v4sf)(__m128)(W), \
+                                              (__mmask8)(U), (int)(R)); })
+
+#define _mm_maskz_getmant_ss(U, A, B, C, D) __extension__ ({\
+  (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
+                                              (__v4sf)(__m128)(B), \
+                                              (int)(((D)<<2) | (C)), \
+                                              (__v4sf)_mm_setzero_pd(), \
+                                              (__mmask8)(U), \
+                                              _MM_FROUND_CUR_DIRECTION); })
+
+#define _mm_maskz_getmant_round_ss(U, A, B, C, D, R) __extension__ ({\
+  (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
+                                              (__v4sf)(__m128)(B), \
+                                              (int)(((D)<<2) | (C)), \
+                                              (__v4sf)_mm_setzero_ps(), \
+                                              (__mmask8)(U), (int)(R)); })
+
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS
+_mm512_kmov (__mmask16 __A)
+{
+  return  __A;
+}
+
+#define _mm_comi_round_sd(A, B, P, R) __extension__ ({\
+  (int)__builtin_ia32_vcomisd((__v2df)(__m128d)(A), (__v2df)(__m128d)(B), \
+                              (int)(P), (int)(R)); })
+
+#define _mm_comi_round_ss(A, B, P, R) __extension__ ({\
+  (int)__builtin_ia32_vcomiss((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), \
+                              (int)(P), (int)(R)); })
+
+#define _mm_cvt_roundsd_si64(A, R) __extension__ ({ \
+  (long long)__builtin_ia32_vcvtsd2si64((__v2df)(__m128d)(A), (int)(R)); })
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask2_permutex2var_epi32 (__m512i __A, __m512i __I,
+         __mmask16 __U, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_vpermi2vard512_mask ((__v16si) __A,
+                   (__v16si) __I
+                   /* idx */ ,
+                   (__v16si) __B,
+                   (__mmask16) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_sll_epi32 (__m512i __A, __m128i __B)
+{
+  return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A,
+             (__v4si) __B,
+             (__v16si)
+             _mm512_setzero_si512 (),
+             (__mmask16) -1);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_sll_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
+{
+  return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A,
+             (__v4si) __B,
+             (__v16si) __W,
+             (__mmask16) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_sll_epi32 (__mmask16 __U, __m512i __A, __m128i __B)
+{
+  return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A,
+             (__v4si) __B,
+             (__v16si)
+             _mm512_setzero_si512 (),
+             (__mmask16) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_sll_epi64 (__m512i __A, __m128i __B)
+{
+  return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A,
+             (__v2di) __B,
+             (__v8di)
+             _mm512_setzero_si512 (),
+             (__mmask8) -1);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_sll_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
+{
+  return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A,
+             (__v2di) __B,
+             (__v8di) __W,
+             (__mmask8) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_sll_epi64 (__mmask8 __U, __m512i __A, __m128i __B)
+{
+  return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A,
+             (__v2di) __B,
+             (__v8di)
+             _mm512_setzero_si512 (),
+             (__mmask8) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_sllv_epi32 (__m512i __X, __m512i __Y)
+{
+  return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X,
+              (__v16si) __Y,
+              (__v16si)
+              _mm512_setzero_si512 (),
+              (__mmask16) -1);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_sllv_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
+{
+  return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X,
+              (__v16si) __Y,
+              (__v16si) __W,
+              (__mmask16) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_sllv_epi32 (__mmask16 __U, __m512i __X, __m512i __Y)
+{
+  return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X,
+              (__v16si) __Y,
+              (__v16si)
+              _mm512_setzero_si512 (),
+              (__mmask16) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_sllv_epi64 (__m512i __X, __m512i __Y)
+{
+  return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X,
+             (__v8di) __Y,
+             (__v8di)
+             _mm512_undefined_pd (),
+             (__mmask8) -1);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_sllv_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
+{
+  return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X,
+             (__v8di) __Y,
+             (__v8di) __W,
+             (__mmask8) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_sllv_epi64 (__mmask8 __U, __m512i __X, __m512i __Y)
+{
+  return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X,
+             (__v8di) __Y,
+             (__v8di)
+             _mm512_setzero_si512 (),
+             (__mmask8) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_sra_epi32 (__m512i __A, __m128i __B)
+{
+  return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A,
+             (__v4si) __B,
+             (__v16si)
+             _mm512_setzero_si512 (),
+             (__mmask16) -1);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_sra_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
+{
+  return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A,
+             (__v4si) __B,
+             (__v16si) __W,
+             (__mmask16) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_sra_epi32 (__mmask16 __U, __m512i __A, __m128i __B)
+{
+  return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A,
+             (__v4si) __B,
+             (__v16si)
+             _mm512_setzero_si512 (),
+             (__mmask16) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_sra_epi64 (__m512i __A, __m128i __B)
+{
+  return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A,
+             (__v2di) __B,
+             (__v8di)
+             _mm512_setzero_si512 (),
+             (__mmask8) -1);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_sra_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
+{
+  return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A,
+             (__v2di) __B,
+             (__v8di) __W,
+             (__mmask8) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_sra_epi64 (__mmask8 __U, __m512i __A, __m128i __B)
+{
+  return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A,
+             (__v2di) __B,
+             (__v8di)
+             _mm512_setzero_si512 (),
+             (__mmask8) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_srav_epi32 (__m512i __X, __m512i __Y)
+{
+  return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X,
+              (__v16si) __Y,
+              (__v16si)
+              _mm512_setzero_si512 (),
+              (__mmask16) -1);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_srav_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
+{
+  return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X,
+              (__v16si) __Y,
+              (__v16si) __W,
+              (__mmask16) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_srav_epi32 (__mmask16 __U, __m512i __X, __m512i __Y)
+{
+  return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X,
+              (__v16si) __Y,
+              (__v16si)
+              _mm512_setzero_si512 (),
+              (__mmask16) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_srav_epi64 (__m512i __X, __m512i __Y)
+{
+  return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X,
+             (__v8di) __Y,
+             (__v8di)
+             _mm512_setzero_si512 (),
+             (__mmask8) -1);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_srav_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
+{
+  return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X,
+             (__v8di) __Y,
+             (__v8di) __W,
+             (__mmask8) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_srav_epi64 (__mmask8 __U, __m512i __X, __m512i __Y)
+{
+  return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X,
+             (__v8di) __Y,
+             (__v8di)
+             _mm512_setzero_si512 (),
+             (__mmask8) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_srl_epi32 (__m512i __A, __m128i __B)
+{
+  return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A,
+             (__v4si) __B,
+             (__v16si)
+             _mm512_setzero_si512 (),
+             (__mmask16) -1);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_srl_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
+{
+  return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A,
+             (__v4si) __B,
+             (__v16si) __W,
+             (__mmask16) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_srl_epi32 (__mmask16 __U, __m512i __A, __m128i __B)
+{
+  return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A,
+             (__v4si) __B,
+             (__v16si)
+             _mm512_setzero_si512 (),
+             (__mmask16) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_srl_epi64 (__m512i __A, __m128i __B)
+{
+  return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A,
+             (__v2di) __B,
+             (__v8di)
+             _mm512_setzero_si512 (),
+             (__mmask8) -1);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_srl_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
+{
+  return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A,
+             (__v2di) __B,
+             (__v8di) __W,
+             (__mmask8) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_srl_epi64 (__mmask8 __U, __m512i __A, __m128i __B)
+{
+  return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A,
+             (__v2di) __B,
+             (__v8di)
+             _mm512_setzero_si512 (),
+             (__mmask8) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_srlv_epi32 (__m512i __X, __m512i __Y)
+{
+  return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X,
+              (__v16si) __Y,
+              (__v16si)
+              _mm512_setzero_si512 (),
+              (__mmask16) -1);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_srlv_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
+{
+  return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X,
+              (__v16si) __Y,
+              (__v16si) __W,
+              (__mmask16) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_srlv_epi32 (__mmask16 __U, __m512i __X, __m512i __Y)
+{
+  return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X,
+              (__v16si) __Y,
+              (__v16si)
+              _mm512_setzero_si512 (),
+              (__mmask16) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_srlv_epi64 (__m512i __X, __m512i __Y)
+{
+  return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X,
+             (__v8di) __Y,
+             (__v8di)
+             _mm512_setzero_si512 (),
+             (__mmask8) -1);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_srlv_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
+{
+  return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X,
+             (__v8di) __Y,
+             (__v8di) __W,
+             (__mmask8) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_srlv_epi64 (__mmask8 __U, __m512i __X, __m512i __Y)
+{
+  return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X,
+             (__v8di) __Y,
+             (__v8di)
+             _mm512_setzero_si512 (),
+             (__mmask8) __U);
+}
+
+#define _mm512_ternarylogic_epi32(A, B, C, imm) __extension__ ({ \
+  (__m512i)__builtin_ia32_pternlogd512_mask((__v16si)(__m512i)(A), \
+                                            (__v16si)(__m512i)(B), \
+                                            (__v16si)(__m512i)(C), (int)(imm), \
+                                            (__mmask16)-1); })
+
+#define _mm512_mask_ternarylogic_epi32(A, U, B, C, imm) __extension__ ({ \
+  (__m512i)__builtin_ia32_pternlogd512_mask((__v16si)(__m512i)(A), \
+                                            (__v16si)(__m512i)(B), \
+                                            (__v16si)(__m512i)(C), (int)(imm), \
+                                            (__mmask16)(U)); })
+
+#define _mm512_maskz_ternarylogic_epi32(U, A, B, C, imm) __extension__ ({ \
+  (__m512i)__builtin_ia32_pternlogd512_maskz((__v16si)(__m512i)(A), \
+                                             (__v16si)(__m512i)(B), \
+                                             (__v16si)(__m512i)(C), \
+                                             (int)(imm), (__mmask16)(U)); })
+
+#define _mm512_ternarylogic_epi64(A, B, C, imm) __extension__ ({ \
+  (__m512i)__builtin_ia32_pternlogq512_mask((__v8di)(__m512i)(A), \
+                                            (__v8di)(__m512i)(B), \
+                                            (__v8di)(__m512i)(C), (int)(imm), \
+                                            (__mmask8)-1); })
+
+#define _mm512_mask_ternarylogic_epi64(A, U, B, C, imm) __extension__ ({ \
+  (__m512i)__builtin_ia32_pternlogq512_mask((__v8di)(__m512i)(A), \
+                                            (__v8di)(__m512i)(B), \
+                                            (__v8di)(__m512i)(C), (int)(imm), \
+                                            (__mmask8)(U)); })
+
+#define _mm512_maskz_ternarylogic_epi64(U, A, B, C, imm) __extension__ ({ \
+  (__m512i)__builtin_ia32_pternlogq512_maskz((__v8di)(__m512i)(A), \
+                                             (__v8di)(__m512i)(B), \
+                                             (__v8di)(__m512i)(C), (int)(imm), \
+                                             (__mmask8)(U)); })
+
+#define _mm_cvt_roundsd_i64(A, R) __extension__ ({ \
+  (long long)__builtin_ia32_vcvtsd2si64((__v2df)(__m128d)(A), (int)(R)); })
+
+#define _mm_cvt_roundsd_si32(A, R) __extension__ ({ \
+  (int)__builtin_ia32_vcvtsd2si32((__v2df)(__m128d)(A), (int)(R)); })
+
+#define _mm_cvt_roundsd_i32(A, R) __extension__ ({ \
+  (int)__builtin_ia32_vcvtsd2si32((__v2df)(__m128d)(A), (int)(R)); })
+
+#define _mm_cvt_roundsd_u32(A, R) __extension__ ({ \
+  (unsigned int)__builtin_ia32_vcvtsd2usi32((__v2df)(__m128d)(A), (int)(R)); })
+
+static __inline__ unsigned __DEFAULT_FN_ATTRS
+_mm_cvtsd_u32 (__m128d __A)
+{
+  return (unsigned) __builtin_ia32_vcvtsd2usi32 ((__v2df) __A,
+             _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm_cvt_roundsd_u64(A, R) __extension__ ({ \
+  (unsigned long long)__builtin_ia32_vcvtsd2usi64((__v2df)(__m128d)(A), \
+                                                  (int)(R)); })
+
+static __inline__ unsigned long long __DEFAULT_FN_ATTRS
+_mm_cvtsd_u64 (__m128d __A)
+{
+  return (unsigned long long) __builtin_ia32_vcvtsd2usi64 ((__v2df)
+                 __A,
+                 _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm_cvt_roundss_si32(A, R) __extension__ ({ \
+  (int)__builtin_ia32_vcvtss2si32((__v4sf)(__m128)(A), (int)(R)); })
+
+#define _mm_cvt_roundss_i32(A, R) __extension__ ({ \
+  (int)__builtin_ia32_vcvtss2si32((__v4sf)(__m128)(A), (int)(R)); })
+
+#define _mm_cvt_roundss_si64(A, R) __extension__ ({ \
+  (long long)__builtin_ia32_vcvtss2si64((__v4sf)(__m128)(A), (int)(R)); })
+
+#define _mm_cvt_roundss_i64(A, R) __extension__ ({ \
+  (long long)__builtin_ia32_vcvtss2si64((__v4sf)(__m128)(A), (int)(R)); })
+
+#define _mm_cvt_roundss_u32(A, R) __extension__ ({ \
+  (unsigned int)__builtin_ia32_vcvtss2usi32((__v4sf)(__m128)(A), (int)(R)); })
+
+static __inline__ unsigned __DEFAULT_FN_ATTRS
+_mm_cvtss_u32 (__m128 __A)
+{
+  return (unsigned) __builtin_ia32_vcvtss2usi32 ((__v4sf) __A,
+             _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm_cvt_roundss_u64(A, R) __extension__ ({ \
+  (unsigned long long)__builtin_ia32_vcvtss2usi64((__v4sf)(__m128)(A), \
+                                                  (int)(R)); })
+
+static __inline__ unsigned long long __DEFAULT_FN_ATTRS
+_mm_cvtss_u64 (__m128 __A)
+{
+  return (unsigned long long) __builtin_ia32_vcvtss2usi64 ((__v4sf)
+                 __A,
+                 _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm_cvtt_roundsd_i32(A, R) __extension__ ({ \
+  (int)__builtin_ia32_vcvttsd2si32((__v2df)(__m128d)(A), (int)(R)); })
+
+#define _mm_cvtt_roundsd_si32(A, R) __extension__ ({ \
+  (int)__builtin_ia32_vcvttsd2si32((__v2df)(__m128d)(A), (int)(R)); })
+
+static __inline__ int __DEFAULT_FN_ATTRS
+_mm_cvttsd_i32 (__m128d __A)
+{
+  return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A,
+              _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm_cvtt_roundsd_si64(A, R) __extension__ ({ \
+  (long long)__builtin_ia32_vcvttsd2si64((__v2df)(__m128d)(A), (int)(R)); })
+
+#define _mm_cvtt_roundsd_i64(A, R) __extension__ ({ \
+  (long long)__builtin_ia32_vcvttsd2si64((__v2df)(__m128d)(A), (int)(R)); })
+
+static __inline__ long long __DEFAULT_FN_ATTRS
+_mm_cvttsd_i64 (__m128d __A)
+{
+  return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A,
+              _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm_cvtt_roundsd_u32(A, R) __extension__ ({ \
+  (unsigned int)__builtin_ia32_vcvttsd2usi32((__v2df)(__m128d)(A), (int)(R)); })
+
+static __inline__ unsigned __DEFAULT_FN_ATTRS
+_mm_cvttsd_u32 (__m128d __A)
+{
+  return (unsigned) __builtin_ia32_vcvttsd2usi32 ((__v2df) __A,
+              _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm_cvtt_roundsd_u64(A, R) __extension__ ({ \
+  (unsigned long long)__builtin_ia32_vcvttsd2usi64((__v2df)(__m128d)(A), \
+                                                   (int)(R)); })
+
+static __inline__ unsigned long long __DEFAULT_FN_ATTRS
+_mm_cvttsd_u64 (__m128d __A)
+{
+  return (unsigned long long) __builtin_ia32_vcvttsd2usi64 ((__v2df)
+                  __A,
+                  _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm_cvtt_roundss_i32(A, R) __extension__ ({ \
+  (int)__builtin_ia32_vcvttss2si32((__v4sf)(__m128)(A), (int)(R)); })
+
+#define _mm_cvtt_roundss_si32(A, R) __extension__ ({ \
+  (int)__builtin_ia32_vcvttss2si32((__v4sf)(__m128)(A), (int)(R)); })
+
+static __inline__ int __DEFAULT_FN_ATTRS
+_mm_cvttss_i32 (__m128 __A)
+{
+  return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A,
+              _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm_cvtt_roundss_i64(A, R) __extension__ ({ \
+  (long long)__builtin_ia32_vcvttss2si64((__v4sf)(__m128)(A), (int)(R)); })
+
+#define _mm_cvtt_roundss_si64(A, R) __extension__ ({ \
+  (long long)__builtin_ia32_vcvttss2si64((__v4sf)(__m128)(A), (int)(R)); })
+
+static __inline__ long long __DEFAULT_FN_ATTRS
+_mm_cvttss_i64 (__m128 __A)
+{
+  return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A,
+              _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm_cvtt_roundss_u32(A, R) __extension__ ({ \
+  (unsigned int)__builtin_ia32_vcvttss2usi32((__v4sf)(__m128)(A), (int)(R)); })
+
+static __inline__ unsigned __DEFAULT_FN_ATTRS
+_mm_cvttss_u32 (__m128 __A)
+{
+  return (unsigned) __builtin_ia32_vcvttss2usi32 ((__v4sf) __A,
+              _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm_cvtt_roundss_u64(A, R) __extension__ ({ \
+  (unsigned long long)__builtin_ia32_vcvttss2usi64((__v4sf)(__m128)(A), \
+                                                   (int)(R)); })
+
+static __inline__ unsigned long long __DEFAULT_FN_ATTRS
+_mm_cvttss_u64 (__m128 __A)
+{
+  return (unsigned long long) __builtin_ia32_vcvttss2usi64 ((__v4sf)
+                  __A,
+                  _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512d __DEFAULT_FN_ATTRS
+_mm512_mask2_permutex2var_pd (__m512d __A, __m512i __I, __mmask8 __U,
+            __m512d __B)
+{
+  return (__m512d) __builtin_ia32_vpermi2varpd512_mask ((__v8df) __A,
+              (__v8di) __I
+              /* idx */ ,
+              (__v8df) __B,
+              (__mmask8) __U);
+}
+
+static __inline__ __m512 __DEFAULT_FN_ATTRS
+_mm512_mask2_permutex2var_ps (__m512 __A, __m512i __I, __mmask16 __U,
+            __m512 __B)
+{
+  return (__m512) __builtin_ia32_vpermi2varps512_mask ((__v16sf) __A,
+                   (__v16si) __I
+                   /* idx */ ,
+                   (__v16sf) __B,
+                   (__mmask16) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask2_permutex2var_epi64 (__m512i __A, __m512i __I,
+         __mmask8 __U, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_vpermi2varq512_mask ((__v8di) __A,
+                   (__v8di) __I
+                   /* idx */ ,
+                   (__v8di) __B,
+                   (__mmask8) __U);
+}
+
+#define _mm512_permute_pd(X, C) __extension__ ({ \
+  (__m512d)__builtin_shufflevector((__v8df)(__m512d)(X), \
+                                   (__v8df)_mm512_undefined_pd(), \
+                                   0 + (((C) >> 0) & 0x1), \
+                                   0 + (((C) >> 1) & 0x1), \
+                                   2 + (((C) >> 2) & 0x1), \
+                                   2 + (((C) >> 3) & 0x1), \
+                                   4 + (((C) >> 4) & 0x1), \
+                                   4 + (((C) >> 5) & 0x1), \
+                                   6 + (((C) >> 6) & 0x1), \
+                                   6 + (((C) >> 7) & 0x1)); })
+
+#define _mm512_mask_permute_pd(W, U, X, C) __extension__ ({ \
+  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
+                                       (__v8df)_mm512_permute_pd((X), (C)), \
+                                       (__v8df)(__m512d)(W)); })
+
+#define _mm512_maskz_permute_pd(U, X, C) __extension__ ({ \
+  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
+                                       (__v8df)_mm512_permute_pd((X), (C)), \
+                                       (__v8df)_mm512_setzero_pd()); })
+
+#define _mm512_permute_ps(X, C) __extension__ ({ \
+  (__m512)__builtin_shufflevector((__v16sf)(__m512)(X), \
+                                  (__v16sf)_mm512_undefined_ps(), \
+                                   0  + (((C) >> 0) & 0x3), \
+                                   0  + (((C) >> 2) & 0x3), \
+                                   0  + (((C) >> 4) & 0x3), \
+                                   0  + (((C) >> 6) & 0x3), \
+                                   4  + (((C) >> 0) & 0x3), \
+                                   4  + (((C) >> 2) & 0x3), \
+                                   4  + (((C) >> 4) & 0x3), \
+                                   4  + (((C) >> 6) & 0x3), \
+                                   8  + (((C) >> 0) & 0x3), \
+                                   8  + (((C) >> 2) & 0x3), \
+                                   8  + (((C) >> 4) & 0x3), \
+                                   8  + (((C) >> 6) & 0x3), \
+                                   12 + (((C) >> 0) & 0x3), \
+                                   12 + (((C) >> 2) & 0x3), \
+                                   12 + (((C) >> 4) & 0x3), \
+                                   12 + (((C) >> 6) & 0x3)); })
+
+#define _mm512_mask_permute_ps(W, U, X, C) __extension__ ({ \
+  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
+                                      (__v16sf)_mm512_permute_ps((X), (C)), \
+                                      (__v16sf)(__m512)(W)); })
+
+#define _mm512_maskz_permute_ps(U, X, C) __extension__ ({ \
+  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
+                                      (__v16sf)_mm512_permute_ps((X), (C)), \
+                                      (__v16sf)_mm512_setzero_ps()); })
+
+static __inline__ __m512d __DEFAULT_FN_ATTRS
+_mm512_permutevar_pd (__m512d __A, __m512i __C)
+{
+  return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A,
+              (__v8di) __C,
+              (__v8df)
+              _mm512_undefined_pd (),
+              (__mmask8) -1);
+}
+
+static __inline__ __m512d __DEFAULT_FN_ATTRS
+_mm512_mask_permutevar_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512i __C)
+{
+  return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A,
+              (__v8di) __C,
+              (__v8df) __W,
+              (__mmask8) __U);
+}
+
+static __inline__ __m512d __DEFAULT_FN_ATTRS
+_mm512_maskz_permutevar_pd (__mmask8 __U, __m512d __A, __m512i __C)
+{
+  return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A,
+              (__v8di) __C,
+              (__v8df)
+              _mm512_setzero_pd (),
+              (__mmask8) __U);
+}
+
+static __inline__ __m512 __DEFAULT_FN_ATTRS
+_mm512_permutevar_ps (__m512 __A, __m512i __C)
+{
+  return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A,
+                   (__v16si) __C,
+                   (__v16sf)
+                   _mm512_undefined_ps (),
+                   (__mmask16) -1);
+}
+
+static __inline__ __m512 __DEFAULT_FN_ATTRS
+_mm512_mask_permutevar_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512i __C)
+{
+  return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A,
+                   (__v16si) __C,
+                   (__v16sf) __W,
+                   (__mmask16) __U);
+}
+
+static __inline__ __m512 __DEFAULT_FN_ATTRS
+_mm512_maskz_permutevar_ps (__mmask16 __U, __m512 __A, __m512i __C)
+{
+  return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A,
+                   (__v16si) __C,
+                   (__v16sf)
+                   _mm512_setzero_ps (),
+                   (__mmask16) __U);
+}
+
+static __inline __m512d __DEFAULT_FN_ATTRS
+_mm512_permutex2var_pd(__m512d __A, __m512i __I, __m512d __B)
+{
+  return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I
+                    /* idx */ ,
+                    (__v8df) __A,
+                    (__v8df) __B,
+                    (__mmask8) -1);
+}
+
+static __inline__ __m512d __DEFAULT_FN_ATTRS
+_mm512_mask_permutex2var_pd (__m512d __A, __mmask8 __U, __m512i __I, __m512d __B)
+{
+  return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I
+                    /* idx */ ,
+                    (__v8df) __A,
+                    (__v8df) __B,
+                    (__mmask8) __U);
+}
+
+static __inline__ __m512d __DEFAULT_FN_ATTRS
+_mm512_maskz_permutex2var_pd (__mmask8 __U, __m512d __A, __m512i __I,
+            __m512d __B)
+{
+  return (__m512d) __builtin_ia32_vpermt2varpd512_maskz ((__v8di) __I
+                                                         /* idx */ ,
+                                                         (__v8df) __A,
+                                                         (__v8df) __B,
+                                                         (__mmask8) __U);
+}
+
+static __inline __m512 __DEFAULT_FN_ATTRS
+_mm512_permutex2var_ps(__m512 __A, __m512i __I, __m512 __B)
+{
+  return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I
+                                                         /* idx */ ,
+                                                         (__v16sf) __A,
+                                                         (__v16sf) __B,
+                                                         (__mmask16) -1);
+}
+
+static __inline__ __m512 __DEFAULT_FN_ATTRS
+_mm512_mask_permutex2var_ps (__m512 __A, __mmask16 __U, __m512i __I, __m512 __B)
+{
+  return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I
+                                                         /* idx */ ,
+                                                         (__v16sf) __A,
+                                                         (__v16sf) __B,
+                                                         (__mmask16) __U);
+}
+
+static __inline__ __m512 __DEFAULT_FN_ATTRS
+_mm512_maskz_permutex2var_ps (__mmask16 __U, __m512 __A, __m512i __I,
+            __m512 __B)
+{
+  return (__m512) __builtin_ia32_vpermt2varps512_maskz ((__v16si) __I
+                                                        /* idx */ ,
+                                                        (__v16sf) __A,
+                                                        (__v16sf) __B,
+                                                        (__mmask16) __U);
+}
+
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS
+_mm512_testn_epi32_mask (__m512i __A, __m512i __B)
+{
+  return (__mmask16) __builtin_ia32_ptestnmd512 ((__v16si) __A,
+             (__v16si) __B,
+             (__mmask16) -1);
+}
+
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS
+_mm512_mask_testn_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
+{
+  return (__mmask16) __builtin_ia32_ptestnmd512 ((__v16si) __A,
+             (__v16si) __B, __U);
+}
+
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
+_mm512_testn_epi64_mask (__m512i __A, __m512i __B)
+{
+  return (__mmask8) __builtin_ia32_ptestnmq512 ((__v8di) __A,
+            (__v8di) __B,
+            (__mmask8) -1);
+}
+
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
+_mm512_mask_testn_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
+{
+  return (__mmask8) __builtin_ia32_ptestnmq512 ((__v8di) __A,
+            (__v8di) __B, __U);
+}
+
+#define _mm512_cvtt_roundpd_epu32(A, R) __extension__ ({ \
+  (__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \
+                                             (__v8si)_mm256_undefined_si256(), \
+                                             (__mmask8)-1, (int)(R)); })
+
+#define _mm512_mask_cvtt_roundpd_epu32(W, U, A, R) __extension__ ({ \
+  (__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \
+                                             (__v8si)(__m256i)(W), \
+                                             (__mmask8)(U), (int)(R)); })
+
+#define _mm512_maskz_cvtt_roundpd_epu32(U, A, R) __extension__ ({ \
+  (__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \
+                                             (__v8si)_mm256_setzero_si256(), \
+                                             (__mmask8)(U), (int)(R)); })
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm512_cvttpd_epu32 (__m512d __A)
+{
+  return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
+                  (__v8si)
+                  _mm256_undefined_si256 (),
+                  (__mmask8) -1,
+                  _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm512_mask_cvttpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A)
+{
+  return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
+                  (__v8si) __W,
+                  (__mmask8) __U,
+                  _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm512_maskz_cvttpd_epu32 (__mmask8 __U, __m512d __A)
+{
+  return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
+                  (__v8si)
+                  _mm256_setzero_si256 (),
+                  (__mmask8) __U,
+                  _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm_roundscale_round_sd(A, B, imm, R) __extension__ ({ \
+  (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
+                                                (__v2df)(__m128d)(B), \
+                                                (__v2df)_mm_setzero_pd(), \
+                                                (__mmask8)-1, (int)(imm), \
+                                                (int)(R)); })
+
+#define _mm_roundscale_sd(A, B, imm) __extension__ ({ \
+  (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
+                                                (__v2df)(__m128d)(B), \
+                                                (__v2df)_mm_setzero_pd(), \
+                                                (__mmask8)-1, (int)(imm), \
+                                                _MM_FROUND_CUR_DIRECTION); })
+
+#define _mm_mask_roundscale_sd(W, U, A, B, imm) __extension__ ({ \
+  (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
+                                                (__v2df)(__m128d)(B), \
+                                                (__v2df)(__m128d)(W), \
+                                                (__mmask8)(U), (int)(imm), \
+                                                _MM_FROUND_CUR_DIRECTION); })
+
+#define _mm_mask_roundscale_round_sd(W, U, A, B, I, R) __extension__ ({ \
+  (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
+                                                (__v2df)(__m128d)(B), \
+                                                (__v2df)(__m128d)(W), \
+                                                (__mmask8)(U), (int)(I), \
+                                                (int)(R)); })
+
+#define _mm_maskz_roundscale_sd(U, A, B, I) __extension__ ({ \
+  (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
+                                                (__v2df)(__m128d)(B), \
+                                                (__v2df)_mm_setzero_pd(), \
+                                                (__mmask8)(U), (int)(I), \
+                                                _MM_FROUND_CUR_DIRECTION); })
+
+#define _mm_maskz_roundscale_round_sd(U, A, B, I, R) __extension__ ({ \
+  (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
+                                                (__v2df)(__m128d)(B), \
+                                                (__v2df)_mm_setzero_pd(), \
+                                                (__mmask8)(U), (int)(I), \
+                                                (int)(R)); })
+
+#define _mm_roundscale_round_ss(A, B, imm, R) __extension__ ({ \
+  (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
+                                               (__v4sf)(__m128)(B), \
+                                               (__v4sf)_mm_setzero_ps(), \
+                                               (__mmask8)-1, (int)(imm), \
+                                               (int)(R)); })
+
+#define _mm_roundscale_ss(A, B, imm) __extension__ ({ \
+  (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
+                                               (__v4sf)(__m128)(B), \
+                                               (__v4sf)_mm_setzero_ps(), \
+                                               (__mmask8)-1, (int)(imm), \
+                                               _MM_FROUND_CUR_DIRECTION); })
+
+#define _mm_mask_roundscale_ss(W, U, A, B, I) __extension__ ({ \
+  (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
+                                               (__v4sf)(__m128)(B), \
+                                               (__v4sf)(__m128)(W), \
+                                               (__mmask8)(U), (int)(I), \
+                                               _MM_FROUND_CUR_DIRECTION); })
+
+#define _mm_mask_roundscale_round_ss(W, U, A, B, I, R) __extension__ ({ \
+  (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
+                                               (__v4sf)(__m128)(B), \
+                                               (__v4sf)(__m128)(W), \
+                                               (__mmask8)(U), (int)(I), \
+                                               (int)(R)); })
+
+#define _mm_maskz_roundscale_ss(U, A, B, I) __extension__ ({ \
+  (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
+                                               (__v4sf)(__m128)(B), \
+                                               (__v4sf)_mm_setzero_ps(), \
+                                               (__mmask8)(U), (int)(I), \
+                                               _MM_FROUND_CUR_DIRECTION); })
+
+#define _mm_maskz_roundscale_round_ss(U, A, B, I, R) __extension__ ({ \
+  (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
+                                               (__v4sf)(__m128)(B), \
+                                               (__v4sf)_mm_setzero_ps(), \
+                                               (__mmask8)(U), (int)(I), \
+                                               (int)(R)); })
+
+#define _mm512_scalef_round_pd(A, B, R) __extension__ ({ \
+  (__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \
+                                           (__v8df)(__m512d)(B), \
+                                           (__v8df)_mm512_undefined_pd(), \
+                                           (__mmask8)-1, (int)(R)); })
+
+#define _mm512_mask_scalef_round_pd(W, U, A, B, R) __extension__ ({ \
+  (__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \
+                                           (__v8df)(__m512d)(B), \
+                                           (__v8df)(__m512d)(W), \
+                                           (__mmask8)(U), (int)(R)); })
+
+#define _mm512_maskz_scalef_round_pd(U, A, B, R) __extension__ ({ \
+  (__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \
+                                           (__v8df)(__m512d)(B), \
+                                           (__v8df)_mm512_setzero_pd(), \
+                                           (__mmask8)(U), (int)(R)); })
+
+static __inline__ __m512d __DEFAULT_FN_ATTRS
+_mm512_scalef_pd (__m512d __A, __m512d __B)
+{
+  return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
+                (__v8df) __B,
+                (__v8df)
+                _mm512_undefined_pd (),
+                (__mmask8) -1,
+                _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512d __DEFAULT_FN_ATTRS
+_mm512_mask_scalef_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
+{
+  return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
+                (__v8df) __B,
+                (__v8df) __W,
+                (__mmask8) __U,
+                _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512d __DEFAULT_FN_ATTRS
+_mm512_maskz_scalef_pd (__mmask8 __U, __m512d __A, __m512d __B)
+{
+  return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
+                (__v8df) __B,
+                (__v8df)
+                _mm512_setzero_pd (),
+                (__mmask8) __U,
+                _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm512_scalef_round_ps(A, B, R) __extension__ ({ \
+  (__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \
+                                          (__v16sf)(__m512)(B), \
+                                          (__v16sf)_mm512_undefined_ps(), \
+                                          (__mmask16)-1, (int)(R)); })
+
+#define _mm512_mask_scalef_round_ps(W, U, A, B, R) __extension__ ({ \
+  (__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \
+                                          (__v16sf)(__m512)(B), \
+                                          (__v16sf)(__m512)(W), \
+                                          (__mmask16)(U), (int)(R)); })
+
+#define _mm512_maskz_scalef_round_ps(U, A, B, R) __extension__ ({ \
+  (__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \
+                                          (__v16sf)(__m512)(B), \
+                                          (__v16sf)_mm512_setzero_ps(), \
+                                          (__mmask16)(U), (int)(R)); })
+
+static __inline__ __m512 __DEFAULT_FN_ATTRS
+_mm512_scalef_ps (__m512 __A, __m512 __B)
+{
+  return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
+               (__v16sf) __B,
+               (__v16sf)
+               _mm512_undefined_ps (),
+               (__mmask16) -1,
+               _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512 __DEFAULT_FN_ATTRS
+_mm512_mask_scalef_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
+{
+  return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
+               (__v16sf) __B,
+               (__v16sf) __W,
+               (__mmask16) __U,
+               _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512 __DEFAULT_FN_ATTRS
+_mm512_maskz_scalef_ps (__mmask16 __U, __m512 __A, __m512 __B)
+{
+  return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
+               (__v16sf) __B,
+               (__v16sf)
+               _mm512_setzero_ps (),
+               (__mmask16) __U,
+               _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm_scalef_round_sd(A, B, R) __extension__ ({ \
+  (__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \
+                                              (__v2df)(__m128d)(B), \
+                                              (__v2df)_mm_setzero_pd(), \
+                                              (__mmask8)-1, (int)(R)); })
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_scalef_sd (__m128d __A, __m128d __B)
+{
+  return (__m128d) __builtin_ia32_scalefsd_round_mask ((__v2df) __A,
+              (__v2df)( __B), (__v2df) _mm_setzero_pd(),
+              (__mmask8) -1,
+              _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_mask_scalef_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_scalefsd_round_mask ( (__v2df) __A,
+                 (__v2df) __B,
+                (__v2df) __W,
+                (__mmask8) __U,
+                _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm_mask_scalef_round_sd(W, U, A, B, R) __extension__ ({ \
+  (__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \
+                                              (__v2df)(__m128d)(B), \
+                                              (__v2df)(__m128d)(W), \
+                                              (__mmask8)(U), (int)(R)); })
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_maskz_scalef_sd (__mmask8 __U, __m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_scalefsd_round_mask ( (__v2df) __A,
+                 (__v2df) __B,
+                (__v2df) _mm_setzero_pd (),
+                (__mmask8) __U,
+                _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm_maskz_scalef_round_sd(U, A, B, R) __extension__ ({ \
+  (__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \
+                                              (__v2df)(__m128d)(B), \
+                                              (__v2df)_mm_setzero_pd(), \
+                                              (__mmask8)(U), (int)(R)); })
+
+#define _mm_scalef_round_ss(A, B, R) __extension__ ({ \
+  (__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \
+                                             (__v4sf)(__m128)(B), \
+                                             (__v4sf)_mm_setzero_ps(), \
+                                             (__mmask8)-1, (int)(R)); })
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_scalef_ss (__m128 __A, __m128 __B)
+{
+  return (__m128) __builtin_ia32_scalefss_round_mask ((__v4sf) __A,
+             (__v4sf)( __B), (__v4sf) _mm_setzero_ps(),
+             (__mmask8) -1,
+             _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_mask_scalef_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_scalefss_round_mask ( (__v4sf) __A,
+                (__v4sf) __B,
+                (__v4sf) __W,
+                (__mmask8) __U,
+                _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm_mask_scalef_round_ss(W, U, A, B, R) __extension__ ({ \
+  (__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \
+                                             (__v4sf)(__m128)(B), \
+                                             (__v4sf)(__m128)(W), \
+                                             (__mmask8)(U), (int)(R)); })
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_maskz_scalef_ss (__mmask8 __U, __m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_scalefss_round_mask ( (__v4sf) __A,
+                 (__v4sf) __B,
+                (__v4sf) _mm_setzero_ps (),
+                (__mmask8) __U,
+                _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm_maskz_scalef_round_ss(U, A, B, R) __extension__ ({ \
+  (__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \
+                                             (__v4sf)(__m128)(B), \
+                                             (__v4sf)_mm_setzero_ps(), \
+                                             (__mmask8)(U), \
+                                             _MM_FROUND_CUR_DIRECTION); })
+
+#define _mm512_srai_epi32(A, B) __extension__ ({ \
+  (__m512i)__builtin_ia32_psradi512_mask((__v16si)(__m512i)(A), (int)(B), \
+                                         (__v16si)_mm512_setzero_si512(), \
+                                         (__mmask16)-1); })
+
+#define _mm512_mask_srai_epi32(W, U, A, B) __extension__ ({ \
+  (__m512i)__builtin_ia32_psradi512_mask((__v16si)(__m512i)(A), (int)(B), \
+                                         (__v16si)(__m512i)(W), \
+                                         (__mmask16)(U)); })
+
+#define _mm512_maskz_srai_epi32(U, A, B) __extension__ ({ \
+  (__m512i)__builtin_ia32_psradi512_mask((__v16si)(__m512i)(A), (int)(B), \
+                                         (__v16si)_mm512_setzero_si512(), \
+                                         (__mmask16)(U)); })
+
+#define _mm512_srai_epi64(A, B) __extension__ ({ \
+  (__m512i)__builtin_ia32_psraqi512_mask((__v8di)(__m512i)(A), (int)(B), \
+                                         (__v8di)_mm512_setzero_si512(), \
+                                         (__mmask8)-1); })
+
+#define _mm512_mask_srai_epi64(W, U, A, B) __extension__ ({ \
+  (__m512i)__builtin_ia32_psraqi512_mask((__v8di)(__m512i)(A), (int)(B), \
+                                         (__v8di)(__m512i)(W), \
+                                         (__mmask8)(U)); })
+
+#define _mm512_maskz_srai_epi64(U, A, B) __extension__ ({ \
+  (__m512i)__builtin_ia32_psraqi512_mask((__v8di)(__m512i)(A), (int)(B), \
+                                         (__v8di)_mm512_setzero_si512(), \
+                                         (__mmask8)(U)); })
+
+#define _mm512_shuffle_f32x4(A, B, imm) __extension__ ({ \
+  (__m512)__builtin_ia32_shuf_f32x4_mask((__v16sf)(__m512)(A), \
+                                         (__v16sf)(__m512)(B), (int)(imm), \
+                                         (__v16sf)_mm512_undefined_ps(), \
+                                         (__mmask16)-1); })
+
+#define _mm512_mask_shuffle_f32x4(W, U, A, B, imm) __extension__ ({ \
+  (__m512)__builtin_ia32_shuf_f32x4_mask((__v16sf)(__m512)(A), \
+                                         (__v16sf)(__m512)(B), (int)(imm), \
+                                         (__v16sf)(__m512)(W), \
+                                         (__mmask16)(U)); })
+
+#define _mm512_maskz_shuffle_f32x4(U, A, B, imm) __extension__ ({ \
+  (__m512)__builtin_ia32_shuf_f32x4_mask((__v16sf)(__m512)(A), \
+                                         (__v16sf)(__m512)(B), (int)(imm), \
+                                         (__v16sf)_mm512_setzero_ps(), \
+                                         (__mmask16)(U)); })
+
+#define _mm512_shuffle_f64x2(A, B, imm) __extension__ ({ \
+  (__m512d)__builtin_ia32_shuf_f64x2_mask((__v8df)(__m512d)(A), \
+                                          (__v8df)(__m512d)(B), (int)(imm), \
+                                          (__v8df)_mm512_undefined_pd(), \
+                                          (__mmask8)-1); })
+
+#define _mm512_mask_shuffle_f64x2(W, U, A, B, imm) __extension__ ({ \
+  (__m512d)__builtin_ia32_shuf_f64x2_mask((__v8df)(__m512d)(A), \
+                                          (__v8df)(__m512d)(B), (int)(imm), \
+                                          (__v8df)(__m512d)(W), \
+                                          (__mmask8)(U)); })
+
+#define _mm512_maskz_shuffle_f64x2(U, A, B, imm) __extension__ ({ \
+  (__m512d)__builtin_ia32_shuf_f64x2_mask((__v8df)(__m512d)(A), \
+                                          (__v8df)(__m512d)(B), (int)(imm), \
+                                          (__v8df)_mm512_setzero_pd(), \
+                                          (__mmask8)(U)); })
+
+#define _mm512_shuffle_i32x4(A, B, imm) __extension__ ({ \
+  (__m512i)__builtin_ia32_shuf_i32x4_mask((__v16si)(__m512i)(A), \
+                                          (__v16si)(__m512i)(B), (int)(imm), \
+                                          (__v16si)_mm512_setzero_si512(), \
+                                          (__mmask16)-1); })
+
+#define _mm512_mask_shuffle_i32x4(W, U, A, B, imm) __extension__ ({ \
+  (__m512i)__builtin_ia32_shuf_i32x4_mask((__v16si)(__m512i)(A), \
+                                          (__v16si)(__m512i)(B), (int)(imm), \
+                                          (__v16si)(__m512i)(W), \
+                                          (__mmask16)(U)); })
+
+#define _mm512_maskz_shuffle_i32x4(U, A, B, imm) __extension__ ({ \
+  (__m512i)__builtin_ia32_shuf_i32x4_mask((__v16si)(__m512i)(A), \
+                                          (__v16si)(__m512i)(B), (int)(imm), \
+                                          (__v16si)_mm512_setzero_si512(), \
+                                          (__mmask16)(U)); })
+
+#define _mm512_shuffle_i64x2(A, B, imm) __extension__ ({ \
+  (__m512i)__builtin_ia32_shuf_i64x2_mask((__v8di)(__m512i)(A), \
+                                          (__v8di)(__m512i)(B), (int)(imm), \
+                                          (__v8di)_mm512_setzero_si512(), \
+                                          (__mmask8)-1); })
+
+#define _mm512_mask_shuffle_i64x2(W, U, A, B, imm) __extension__ ({ \
+  (__m512i)__builtin_ia32_shuf_i64x2_mask((__v8di)(__m512i)(A), \
+                                          (__v8di)(__m512i)(B), (int)(imm), \
+                                          (__v8di)(__m512i)(W), \
+                                          (__mmask8)(U)); })
+
+#define _mm512_maskz_shuffle_i64x2(U, A, B, imm) __extension__ ({ \
+  (__m512i)__builtin_ia32_shuf_i64x2_mask((__v8di)(__m512i)(A), \
+                                          (__v8di)(__m512i)(B), (int)(imm), \
+                                          (__v8di)_mm512_setzero_si512(), \
+                                          (__mmask8)(U)); })
+
+#define _mm512_shuffle_pd(A, B, M) __extension__ ({ \
+  (__m512d)__builtin_shufflevector((__v8df)(__m512d)(A), \
+                                   (__v8df)(__m512d)(B), \
+                                   0  + (((M) >> 0) & 0x1), \
+                                   8  + (((M) >> 1) & 0x1), \
+                                   2  + (((M) >> 2) & 0x1), \
+                                   10 + (((M) >> 3) & 0x1), \
+                                   4  + (((M) >> 4) & 0x1), \
+                                   12 + (((M) >> 5) & 0x1), \
+                                   6  + (((M) >> 6) & 0x1), \
+                                   14 + (((M) >> 7) & 0x1)); })
+
+#define _mm512_mask_shuffle_pd(W, U, A, B, M) __extension__ ({ \
+  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
+                                       (__v8df)_mm512_shuffle_pd((A), (B), (M)), \
+                                       (__v8df)(__m512d)(W)); })
+
+#define _mm512_maskz_shuffle_pd(U, A, B, M) __extension__ ({ \
+  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
+                                       (__v8df)_mm512_shuffle_pd((A), (B), (M)), \
+                                       (__v8df)_mm512_setzero_pd()); })
+
+#define _mm512_shuffle_ps(A, B, M) __extension__ ({ \
+  (__m512d)__builtin_shufflevector((__v16sf)(__m512)(A), \
+                                   (__v16sf)(__m512)(B), \
+                                   0  + (((M) >> 0) & 0x3), \
+                                   0  + (((M) >> 2) & 0x3), \
+                                   16 + (((M) >> 4) & 0x3), \
+                                   16 + (((M) >> 6) & 0x3), \
+                                   4  + (((M) >> 0) & 0x3), \
+                                   4  + (((M) >> 2) & 0x3), \
+                                   20 + (((M) >> 4) & 0x3), \
+                                   20 + (((M) >> 6) & 0x3), \
+                                   8  + (((M) >> 0) & 0x3), \
+                                   8  + (((M) >> 2) & 0x3), \
+                                   24 + (((M) >> 4) & 0x3), \
+                                   24 + (((M) >> 6) & 0x3), \
+                                   12 + (((M) >> 0) & 0x3), \
+                                   12 + (((M) >> 2) & 0x3), \
+                                   28 + (((M) >> 4) & 0x3), \
+                                   28 + (((M) >> 6) & 0x3)); })
+
+#define _mm512_mask_shuffle_ps(W, U, A, B, M) __extension__ ({ \
+  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
+                                      (__v16sf)_mm512_shuffle_ps((A), (B), (M)), \
+                                      (__v16sf)(__m512)(W)); })
+
+#define _mm512_maskz_shuffle_ps(U, A, B, M) __extension__ ({ \
+  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
+                                      (__v16sf)_mm512_shuffle_ps((A), (B), (M)), \
+                                      (__v16sf)_mm512_setzero_ps()); })
+
+#define _mm_sqrt_round_sd(A, B, R) __extension__ ({ \
+  (__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \
+                                            (__v2df)(__m128d)(B), \
+                                            (__v2df)_mm_setzero_pd(), \
+                                            (__mmask8)-1, (int)(R)); })
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_mask_sqrt_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_sqrtsd_round_mask ( (__v2df) __A,
+                 (__v2df) __B,
+                (__v2df) __W,
+                (__mmask8) __U,
+                _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm_mask_sqrt_round_sd(W, U, A, B, R) __extension__ ({ \
+  (__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \
+                                            (__v2df)(__m128d)(B), \
+                                            (__v2df)(__m128d)(W), \
+                                            (__mmask8)(U), (int)(R)); })
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_maskz_sqrt_sd (__mmask8 __U, __m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_sqrtsd_round_mask ( (__v2df) __A,
+                 (__v2df) __B,
+                (__v2df) _mm_setzero_pd (),
+                (__mmask8) __U,
+                _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm_maskz_sqrt_round_sd(U, A, B, R) __extension__ ({ \
+  (__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \
+                                            (__v2df)(__m128d)(B), \
+                                            (__v2df)_mm_setzero_pd(), \
+                                            (__mmask8)(U), (int)(R)); })
+
+#define _mm_sqrt_round_ss(A, B, R) __extension__ ({ \
+  (__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \
+                                           (__v4sf)(__m128)(B), \
+                                           (__v4sf)_mm_setzero_ps(), \
+                                           (__mmask8)-1, (int)(R)); })
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_mask_sqrt_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_sqrtss_round_mask ( (__v4sf) __A,
+                 (__v4sf) __B,
+                (__v4sf) __W,
+                (__mmask8) __U,
+                _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm_mask_sqrt_round_ss(W, U, A, B, R) __extension__ ({ \
+  (__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \
+                                           (__v4sf)(__m128)(B), \
+                                           (__v4sf)(__m128)(W), (__mmask8)(U), \
+                                           (int)(R)); })
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_maskz_sqrt_ss (__mmask8 __U, __m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_sqrtss_round_mask ( (__v4sf) __A,
+                 (__v4sf) __B,
+                (__v4sf) _mm_setzero_ps (),
+                (__mmask8) __U,
+                _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm_maskz_sqrt_round_ss(U, A, B, R) __extension__ ({ \
+  (__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \
+                                           (__v4sf)(__m128)(B), \
+                                           (__v4sf)_mm_setzero_ps(), \
+                                           (__mmask8)(U), (int)(R)); })
+
+static __inline__ __m512 __DEFAULT_FN_ATTRS
+_mm512_broadcast_f32x4 (__m128 __A)
+{
+  return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A,
+                 (__v16sf)
+                 _mm512_undefined_ps (),
+                 (__mmask16) -1);
+}
+
+static __inline__ __m512 __DEFAULT_FN_ATTRS
+_mm512_mask_broadcast_f32x4 (__m512 __O, __mmask16 __M, __m128 __A)
+{
+  return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A,
+                 (__v16sf) __O,
+                 __M);
+}
+
+static __inline__ __m512 __DEFAULT_FN_ATTRS
+_mm512_maskz_broadcast_f32x4 (__mmask16 __M, __m128 __A)
+{
+  return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A,
+                 (__v16sf)
+                 _mm512_setzero_ps (),
+                 __M);
+}
+
+static __inline__ __m512d __DEFAULT_FN_ATTRS
+_mm512_broadcast_f64x4 (__m256d __A)
+{
+  return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A,
+                  (__v8df)
+                  _mm512_undefined_pd (),
+                  (__mmask8) -1);
+}
+
+static __inline__ __m512d __DEFAULT_FN_ATTRS
+_mm512_mask_broadcast_f64x4 (__m512d __O, __mmask8 __M, __m256d __A)
+{
+  return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A,
+                  (__v8df) __O,
+                  __M);
+}
+
+static __inline__ __m512d __DEFAULT_FN_ATTRS
+_mm512_maskz_broadcast_f64x4 (__mmask8 __M, __m256d __A)
+{
+  return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A,
+                  (__v8df)
+                  _mm512_setzero_pd (),
+                  __M);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_broadcast_i32x4 (__m128i __A)
+{
+  return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A,
+                  (__v16si)
+                  _mm512_undefined_epi32 (),
+                  (__mmask16) -1);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_broadcast_i32x4 (__m512i __O, __mmask16 __M, __m128i __A)
+{
+  return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A,
+                  (__v16si) __O,
+                  __M);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_broadcast_i32x4 (__mmask16 __M, __m128i __A)
+{
+  return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A,
+                  (__v16si)
+                  _mm512_setzero_si512 (),
+                  __M);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_broadcast_i64x4 (__m256i __A)
+{
+  return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A,
+                  (__v8di)
+                  _mm512_undefined_epi32 (),
+                  (__mmask8) -1);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_broadcast_i64x4 (__m512i __O, __mmask8 __M, __m256i __A)
+{
+  return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A,
+                  (__v8di) __O,
+                  __M);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_broadcast_i64x4 (__mmask8 __M, __m256i __A)
+{
+  return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A,
+                  (__v8di)
+                  _mm512_setzero_si512 (),
+                  __M);
+}
+
+static __inline__ __m512d __DEFAULT_FN_ATTRS
+_mm512_mask_broadcastsd_pd (__m512d __O, __mmask8 __M, __m128d __A)
+{
+  return (__m512d)__builtin_ia32_selectpd_512(__M,
+                                              (__v8df) _mm512_broadcastsd_pd(__A),
+                                              (__v8df) __O);
+}
+
+static __inline__ __m512d __DEFAULT_FN_ATTRS
+_mm512_maskz_broadcastsd_pd (__mmask8 __M, __m128d __A)
+{
+  return (__m512d)__builtin_ia32_selectpd_512(__M,
+                                              (__v8df) _mm512_broadcastsd_pd(__A),
+                                              (__v8df) _mm512_setzero_pd());
+}
+
+static __inline__ __m512 __DEFAULT_FN_ATTRS
+_mm512_mask_broadcastss_ps (__m512 __O, __mmask16 __M, __m128 __A)
+{
+  return (__m512)__builtin_ia32_selectps_512(__M,
+                                             (__v16sf) _mm512_broadcastss_ps(__A),
+                                             (__v16sf) __O);
+}
+
+static __inline__ __m512 __DEFAULT_FN_ATTRS
+_mm512_maskz_broadcastss_ps (__mmask16 __M, __m128 __A)
+{
+  return (__m512)__builtin_ia32_selectps_512(__M,
+                                             (__v16sf) _mm512_broadcastss_ps(__A),
+                                             (__v16sf) _mm512_setzero_ps());
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm512_cvtsepi32_epi8 (__m512i __A)
+{
+  return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
+               (__v16qi) _mm_undefined_si128 (),
+               (__mmask16) -1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm512_mask_cvtsepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
+{
+  return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
+               (__v16qi) __O, __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm512_maskz_cvtsepi32_epi8 (__mmask16 __M, __m512i __A)
+{
+  return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
+               (__v16qi) _mm_setzero_si128 (),
+               __M);
+}
+
+static __inline__ void __DEFAULT_FN_ATTRS
+_mm512_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
+{
+  __builtin_ia32_pmovsdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm512_cvtsepi32_epi16 (__m512i __A)
+{
+  return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
+               (__v16hi) _mm256_undefined_si256 (),
+               (__mmask16) -1);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm512_mask_cvtsepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
+{
+  return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
+               (__v16hi) __O, __M);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm512_maskz_cvtsepi32_epi16 (__mmask16 __M, __m512i __A)
+{
+  return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
+               (__v16hi) _mm256_setzero_si256 (),
+               __M);
+}
+
+static __inline__ void __DEFAULT_FN_ATTRS
+_mm512_mask_cvtsepi32_storeu_epi16 (void *__P, __mmask16 __M, __m512i __A)
+{
+  __builtin_ia32_pmovsdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm512_cvtsepi64_epi8 (__m512i __A)
+{
+  return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
+               (__v16qi) _mm_undefined_si128 (),
+               (__mmask8) -1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm512_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
+{
+  return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
+               (__v16qi) __O, __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm512_maskz_cvtsepi64_epi8 (__mmask8 __M, __m512i __A)
+{
+  return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
+               (__v16qi) _mm_setzero_si128 (),
+               __M);
+}
+
+static __inline__ void __DEFAULT_FN_ATTRS
+_mm512_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
+{
+  __builtin_ia32_pmovsqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm512_cvtsepi64_epi32 (__m512i __A)
+{
+  return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
+               (__v8si) _mm256_undefined_si256 (),
+               (__mmask8) -1);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm512_mask_cvtsepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
+{
+  return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
+               (__v8si) __O, __M);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm512_maskz_cvtsepi64_epi32 (__mmask8 __M, __m512i __A)
+{
+  return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
+               (__v8si) _mm256_setzero_si256 (),
+               __M);
+}
+
+static __inline__ void __DEFAULT_FN_ATTRS
+_mm512_mask_cvtsepi64_storeu_epi32 (void *__P, __mmask8 __M, __m512i __A)
+{
+  __builtin_ia32_pmovsqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm512_cvtsepi64_epi16 (__m512i __A)
+{
+  return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
+               (__v8hi) _mm_undefined_si128 (),
+               (__mmask8) -1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm512_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
+{
+  return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
+               (__v8hi) __O, __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm512_maskz_cvtsepi64_epi16 (__mmask8 __M, __m512i __A)
+{
+  return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
+               (__v8hi) _mm_setzero_si128 (),
+               __M);
+}
+
+static __inline__ void __DEFAULT_FN_ATTRS
+_mm512_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m512i __A)
+{
+  __builtin_ia32_pmovsqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm512_cvtusepi32_epi8 (__m512i __A)
+{
+  return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
+                (__v16qi) _mm_undefined_si128 (),
+                (__mmask16) -1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm512_mask_cvtusepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
+{
+  return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
+                (__v16qi) __O,
+                __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm512_maskz_cvtusepi32_epi8 (__mmask16 __M, __m512i __A)
+{
+  return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
+                (__v16qi) _mm_setzero_si128 (),
+                __M);
+}
+
+static __inline__ void __DEFAULT_FN_ATTRS
+_mm512_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
+{
+  __builtin_ia32_pmovusdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm512_cvtusepi32_epi16 (__m512i __A)
+{
+  return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
+                (__v16hi) _mm256_undefined_si256 (),
+                (__mmask16) -1);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm512_mask_cvtusepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
+{
+  return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
+                (__v16hi) __O,
+                __M);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm512_maskz_cvtusepi32_epi16 (__mmask16 __M, __m512i __A)
+{
+  return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
+                (__v16hi) _mm256_setzero_si256 (),
+                __M);
+}
+
+static __inline__ void __DEFAULT_FN_ATTRS
+_mm512_mask_cvtusepi32_storeu_epi16 (void *__P, __mmask16 __M, __m512i __A)
+{
+  __builtin_ia32_pmovusdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm512_cvtusepi64_epi8 (__m512i __A)
+{
+  return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
+                (__v16qi) _mm_undefined_si128 (),
+                (__mmask8) -1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm512_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
+{
+  return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
+                (__v16qi) __O,
+                __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm512_maskz_cvtusepi64_epi8 (__mmask8 __M, __m512i __A)
+{
+  return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
+                (__v16qi) _mm_setzero_si128 (),
+                __M);
+}
+
+static __inline__ void __DEFAULT_FN_ATTRS
+_mm512_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
+{
+  __builtin_ia32_pmovusqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm512_cvtusepi64_epi32 (__m512i __A)
+{
+  return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
+                (__v8si) _mm256_undefined_si256 (),
+                (__mmask8) -1);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm512_mask_cvtusepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
+{
+  return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
+                (__v8si) __O, __M);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm512_maskz_cvtusepi64_epi32 (__mmask8 __M, __m512i __A)
+{
+  return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
+                (__v8si) _mm256_setzero_si256 (),
+                __M);
+}
+
+static __inline__ void __DEFAULT_FN_ATTRS
+_mm512_mask_cvtusepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A)
+{
+  __builtin_ia32_pmovusqd512mem_mask ((__v8si*) __P, (__v8di) __A, __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm512_cvtusepi64_epi16 (__m512i __A)
+{
+  return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
+                (__v8hi) _mm_undefined_si128 (),
+                (__mmask8) -1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm512_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
+{
+  return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
+                (__v8hi) __O, __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm512_maskz_cvtusepi64_epi16 (__mmask8 __M, __m512i __A)
+{
+  return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
+                (__v8hi) _mm_setzero_si128 (),
+                __M);
+}
+
+static __inline__ void __DEFAULT_FN_ATTRS
+_mm512_mask_cvtusepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A)
+{
+  __builtin_ia32_pmovusqw512mem_mask ((__v8hi*) __P, (__v8di) __A, __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm512_cvtepi32_epi8 (__m512i __A)
+{
+  return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
+              (__v16qi) _mm_undefined_si128 (),
+              (__mmask16) -1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm512_mask_cvtepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
+{
+  return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
+              (__v16qi) __O, __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm512_maskz_cvtepi32_epi8 (__mmask16 __M, __m512i __A)
+{
+  return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
+              (__v16qi) _mm_setzero_si128 (),
+              __M);
+}
+
+static __inline__ void __DEFAULT_FN_ATTRS
+_mm512_mask_cvtepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
+{
+  __builtin_ia32_pmovdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm512_cvtepi32_epi16 (__m512i __A)
+{
+  return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
+              (__v16hi) _mm256_undefined_si256 (),
+              (__mmask16) -1);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm512_mask_cvtepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
+{
+  return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
+              (__v16hi) __O, __M);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm512_maskz_cvtepi32_epi16 (__mmask16 __M, __m512i __A)
+{
+  return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
+              (__v16hi) _mm256_setzero_si256 (),
+              __M);
+}
+
+static __inline__ void __DEFAULT_FN_ATTRS
+_mm512_mask_cvtepi32_storeu_epi16 (void * __P, __mmask16 __M, __m512i __A)
+{
+  __builtin_ia32_pmovdw512mem_mask ((__v16hi *) __P, (__v16si) __A, __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm512_cvtepi64_epi8 (__m512i __A)
+{
+  return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
+              (__v16qi) _mm_undefined_si128 (),
+              (__mmask8) -1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm512_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
+{
+  return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
+              (__v16qi) __O, __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm512_maskz_cvtepi64_epi8 (__mmask8 __M, __m512i __A)
+{
+  return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
+              (__v16qi) _mm_setzero_si128 (),
+              __M);
+}
+
+static __inline__ void __DEFAULT_FN_ATTRS
+_mm512_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
+{
+  __builtin_ia32_pmovqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm512_cvtepi64_epi32 (__m512i __A)
+{
+  return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
+              (__v8si) _mm256_undefined_si256 (),
+              (__mmask8) -1);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm512_mask_cvtepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
+{
+  return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
+              (__v8si) __O, __M);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm512_maskz_cvtepi64_epi32 (__mmask8 __M, __m512i __A)
+{
+  return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
+              (__v8si) _mm256_setzero_si256 (),
+              __M);
+}
+
+static __inline__ void __DEFAULT_FN_ATTRS
+_mm512_mask_cvtepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A)
+{
+  __builtin_ia32_pmovqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm512_cvtepi64_epi16 (__m512i __A)
+{
+  return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
+              (__v8hi) _mm_undefined_si128 (),
+              (__mmask8) -1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm512_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
+{
+  return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
+              (__v8hi) __O, __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm512_maskz_cvtepi64_epi16 (__mmask8 __M, __m512i __A)
+{
+  return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
+              (__v8hi) _mm_setzero_si128 (),
+              __M);
+}
+
+static __inline__ void __DEFAULT_FN_ATTRS
+_mm512_mask_cvtepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A)
+{
+  __builtin_ia32_pmovqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
+}
+
+#define _mm512_extracti32x4_epi32(A, imm) __extension__ ({ \
+  (__m128i)__builtin_ia32_extracti32x4_mask((__v16si)(__m512i)(A), (int)(imm), \
+                                            (__v4si)_mm_undefined_si128(), \
+                                            (__mmask8)-1); })
+
+#define _mm512_mask_extracti32x4_epi32(W, U, A, imm) __extension__ ({ \
+  (__m128i)__builtin_ia32_extracti32x4_mask((__v16si)(__m512i)(A), (int)(imm), \
+                                            (__v4si)(__m128i)(W), \
+                                            (__mmask8)(U)); })
+
+#define _mm512_maskz_extracti32x4_epi32(U, A, imm) __extension__ ({ \
+  (__m128i)__builtin_ia32_extracti32x4_mask((__v16si)(__m512i)(A), (int)(imm), \
+                                            (__v4si)_mm_setzero_si128(), \
+                                            (__mmask8)(U)); })
+
+#define _mm512_extracti64x4_epi64(A, imm) __extension__ ({ \
+  (__m256i)__builtin_ia32_extracti64x4_mask((__v8di)(__m512i)(A), (int)(imm), \
+                                            (__v4di)_mm256_undefined_si256(), \
+                                            (__mmask8)-1); })
+
+#define _mm512_mask_extracti64x4_epi64(W, U, A, imm) __extension__ ({ \
+  (__m256i)__builtin_ia32_extracti64x4_mask((__v8di)(__m512i)(A), (int)(imm), \
+                                            (__v4di)(__m256i)(W), \
+                                            (__mmask8)(U)); })
+
+#define _mm512_maskz_extracti64x4_epi64(U, A, imm) __extension__ ({ \
+  (__m256i)__builtin_ia32_extracti64x4_mask((__v8di)(__m512i)(A), (int)(imm), \
+                                            (__v4di)_mm256_setzero_si256(), \
+                                            (__mmask8)(U)); })
+
+#define _mm512_insertf64x4(A, B, imm) __extension__ ({ \
+  (__m512d)__builtin_ia32_insertf64x4_mask((__v8df)(__m512d)(A), \
+                                           (__v4df)(__m256d)(B), (int)(imm), \
+                                           (__v8df)_mm512_undefined_pd(), \
+                                           (__mmask8)-1); })
+
+#define _mm512_mask_insertf64x4(W, U, A, B, imm) __extension__ ({ \
+  (__m512d)__builtin_ia32_insertf64x4_mask((__v8df)(__m512d)(A), \
+                                           (__v4df)(__m256d)(B), (int)(imm), \
+                                           (__v8df)(__m512d)(W), \
+                                           (__mmask8)(U)); })
+
+#define _mm512_maskz_insertf64x4(U, A, B, imm) __extension__ ({ \
+  (__m512d)__builtin_ia32_insertf64x4_mask((__v8df)(__m512d)(A), \
+                                           (__v4df)(__m256d)(B), (int)(imm), \
+                                           (__v8df)_mm512_setzero_pd(), \
+                                           (__mmask8)(U)); })
+
+#define _mm512_inserti64x4(A, B, imm) __extension__ ({ \
+  (__m512i)__builtin_ia32_inserti64x4_mask((__v8di)(__m512i)(A), \
+                                           (__v4di)(__m256i)(B), (int)(imm), \
+                                           (__v8di)_mm512_setzero_si512(), \
+                                           (__mmask8)-1); })
+
+#define _mm512_mask_inserti64x4(W, U, A, B, imm) __extension__ ({ \
+  (__m512i)__builtin_ia32_inserti64x4_mask((__v8di)(__m512i)(A), \
+                                           (__v4di)(__m256i)(B), (int)(imm), \
+                                           (__v8di)(__m512i)(W), \
+                                           (__mmask8)(U)); })
+
+#define _mm512_maskz_inserti64x4(U, A, B, imm) __extension__ ({ \
+  (__m512i)__builtin_ia32_inserti64x4_mask((__v8di)(__m512i)(A), \
+                                           (__v4di)(__m256i)(B), (int)(imm), \
+                                           (__v8di)_mm512_setzero_si512(), \
+                                           (__mmask8)(U)); })
+
+#define _mm512_insertf32x4(A, B, imm) __extension__ ({ \
+  (__m512)__builtin_ia32_insertf32x4_mask((__v16sf)(__m512)(A), \
+                                          (__v4sf)(__m128)(B), (int)(imm), \
+                                          (__v16sf)_mm512_undefined_ps(), \
+                                          (__mmask16)-1); })
+
+#define _mm512_mask_insertf32x4(W, U, A, B, imm) __extension__ ({ \
+  (__m512)__builtin_ia32_insertf32x4_mask((__v16sf)(__m512)(A), \
+                                          (__v4sf)(__m128)(B), (int)(imm), \
+                                          (__v16sf)(__m512)(W), \
+                                          (__mmask16)(U)); })
+
+#define _mm512_maskz_insertf32x4(U, A, B, imm) __extension__ ({ \
+  (__m512)__builtin_ia32_insertf32x4_mask((__v16sf)(__m512)(A), \
+                                          (__v4sf)(__m128)(B), (int)(imm), \
+                                          (__v16sf)_mm512_setzero_ps(), \
+                                          (__mmask16)(U)); })
+
+#define _mm512_inserti32x4(A, B, imm) __extension__ ({ \
+  (__m512i)__builtin_ia32_inserti32x4_mask((__v16si)(__m512i)(A), \
+                                           (__v4si)(__m128i)(B), (int)(imm), \
+                                           (__v16si)_mm512_setzero_si512(), \
+                                           (__mmask16)-1); })
+
+#define _mm512_mask_inserti32x4(W, U, A, B, imm) __extension__ ({ \
+  (__m512i)__builtin_ia32_inserti32x4_mask((__v16si)(__m512i)(A), \
+                                           (__v4si)(__m128i)(B), (int)(imm), \
+                                           (__v16si)(__m512i)(W), \
+                                           (__mmask16)(U)); })
+
+#define _mm512_maskz_inserti32x4(U, A, B, imm) __extension__ ({ \
+  (__m512i)__builtin_ia32_inserti32x4_mask((__v16si)(__m512i)(A), \
+                                           (__v4si)(__m128i)(B), (int)(imm), \
+                                           (__v16si)_mm512_setzero_si512(), \
+                                           (__mmask16)(U)); })
+
+#define _mm512_getmant_round_pd(A, B, C, R) __extension__ ({ \
+  (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
+                                            (int)(((C)<<2) | (B)), \
+                                            (__v8df)_mm512_undefined_pd(), \
+                                            (__mmask8)-1, (int)(R)); })
+
+#define _mm512_mask_getmant_round_pd(W, U, A, B, C, R) __extension__ ({ \
+  (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
+                                            (int)(((C)<<2) | (B)), \
+                                            (__v8df)(__m512d)(W), \
+                                            (__mmask8)(U), (int)(R)); })
+
+#define _mm512_maskz_getmant_round_pd(U, A, B, C, R) __extension__ ({ \
+  (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
+                                            (int)(((C)<<2) | (B)), \
+                                            (__v8df)_mm512_setzero_pd(), \
+                                            (__mmask8)(U), (int)(R)); })
+
+#define _mm512_getmant_pd(A, B, C) __extension__ ({ \
+  (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
+                                            (int)(((C)<<2) | (B)), \
+                                            (__v8df)_mm512_setzero_pd(), \
+                                            (__mmask8)-1, \
+                                            _MM_FROUND_CUR_DIRECTION); })
+
+#define _mm512_mask_getmant_pd(W, U, A, B, C) __extension__ ({ \
+  (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
+                                            (int)(((C)<<2) | (B)), \
+                                            (__v8df)(__m512d)(W), \
+                                            (__mmask8)(U), \
+                                            _MM_FROUND_CUR_DIRECTION); })
+
+#define _mm512_maskz_getmant_pd(U, A, B, C) __extension__ ({ \
+  (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
+                                            (int)(((C)<<2) | (B)), \
+                                            (__v8df)_mm512_setzero_pd(), \
+                                            (__mmask8)(U), \
+                                            _MM_FROUND_CUR_DIRECTION); })
+
+#define _mm512_getmant_round_ps(A, B, C, R) __extension__ ({ \
+  (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
+                                           (int)(((C)<<2) | (B)), \
+                                           (__v16sf)_mm512_undefined_ps(), \
+                                           (__mmask16)-1, (int)(R)); })
+
+#define _mm512_mask_getmant_round_ps(W, U, A, B, C, R) __extension__ ({ \
+  (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
+                                           (int)(((C)<<2) | (B)), \
+                                           (__v16sf)(__m512)(W), \
+                                           (__mmask16)(U), (int)(R)); })
+
+#define _mm512_maskz_getmant_round_ps(U, A, B, C, R) __extension__ ({ \
+  (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
+                                           (int)(((C)<<2) | (B)), \
+                                           (__v16sf)_mm512_setzero_ps(), \
+                                           (__mmask16)(U), (int)(R)); })
+
+#define _mm512_getmant_ps(A, B, C) __extension__ ({ \
+  (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
+                                           (int)(((C)<<2)|(B)), \
+                                           (__v16sf)_mm512_undefined_ps(), \
+                                           (__mmask16)-1, \
+                                           _MM_FROUND_CUR_DIRECTION); })
+
+#define _mm512_mask_getmant_ps(W, U, A, B, C) __extension__ ({ \
+  (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
+                                           (int)(((C)<<2)|(B)), \
+                                           (__v16sf)(__m512)(W), \
+                                           (__mmask16)(U), \
+                                           _MM_FROUND_CUR_DIRECTION); })
+
+#define _mm512_maskz_getmant_ps(U, A, B, C) __extension__ ({ \
+  (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
+                                           (int)(((C)<<2)|(B)), \
+                                           (__v16sf)_mm512_setzero_ps(), \
+                                           (__mmask16)(U), \
+                                           _MM_FROUND_CUR_DIRECTION); })
+
+#define _mm512_getexp_round_pd(A, R) __extension__ ({ \
+  (__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
+                                           (__v8df)_mm512_undefined_pd(), \
+                                           (__mmask8)-1, (int)(R)); })
+
+#define _mm512_mask_getexp_round_pd(W, U, A, R) __extension__ ({ \
+  (__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
+                                           (__v8df)(__m512d)(W), \
+                                           (__mmask8)(U), (int)(R)); })
+
+#define _mm512_maskz_getexp_round_pd(U, A, R) __extension__ ({ \
+  (__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
+                                           (__v8df)_mm512_setzero_pd(), \
+                                           (__mmask8)(U), (int)(R)); })
+
+static __inline__ __m512d __DEFAULT_FN_ATTRS
+_mm512_getexp_pd (__m512d __A)
+{
+  return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
+                (__v8df) _mm512_undefined_pd (),
+                (__mmask8) -1,
+                _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512d __DEFAULT_FN_ATTRS
+_mm512_mask_getexp_pd (__m512d __W, __mmask8 __U, __m512d __A)
+{
+  return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
+                (__v8df) __W,
+                (__mmask8) __U,
+                _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512d __DEFAULT_FN_ATTRS
+_mm512_maskz_getexp_pd (__mmask8 __U, __m512d __A)
+{
+  return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
+                (__v8df) _mm512_setzero_pd (),
+                (__mmask8) __U,
+                _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm512_getexp_round_ps(A, R) __extension__ ({ \
+  (__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
+                                          (__v16sf)_mm512_undefined_ps(), \
+                                          (__mmask16)-1, (int)(R)); })
+
+#define _mm512_mask_getexp_round_ps(W, U, A, R) __extension__ ({ \
+  (__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
+                                          (__v16sf)(__m512)(W), \
+                                          (__mmask16)(U), (int)(R)); })
+
+#define _mm512_maskz_getexp_round_ps(U, A, R) __extension__ ({ \
+  (__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
+                                          (__v16sf)_mm512_setzero_ps(), \
+                                          (__mmask16)(U), (int)(R)); })
+
+static __inline__ __m512 __DEFAULT_FN_ATTRS
+_mm512_getexp_ps (__m512 __A)
+{
+  return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
+               (__v16sf) _mm512_undefined_ps (),
+               (__mmask16) -1,
+               _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512 __DEFAULT_FN_ATTRS
+_mm512_mask_getexp_ps (__m512 __W, __mmask16 __U, __m512 __A)
+{
+  return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
+               (__v16sf) __W,
+               (__mmask16) __U,
+               _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512 __DEFAULT_FN_ATTRS
+_mm512_maskz_getexp_ps (__mmask16 __U, __m512 __A)
+{
+  return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
+               (__v16sf) _mm512_setzero_ps (),
+               (__mmask16) __U,
+               _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm512_i64gather_ps(index, addr, scale) __extension__ ({ \
+  (__m256)__builtin_ia32_gatherdiv16sf((__v8sf)_mm256_undefined_ps(), \
+                                       (float const *)(addr), \
+                                       (__v8di)(__m512i)(index), (__mmask8)-1, \
+                                       (int)(scale)); })
+
+#define _mm512_mask_i64gather_ps( __v1_old, __mask, __index,\
+                                  __addr, __scale) __extension__({\
+__builtin_ia32_gatherdiv16sf ((__v8sf) __v1_old,\
+                              __addr,(__v8di) __index, __mask, __scale);\
+})
+
+#define _mm512_i64gather_epi32(index, addr, scale) __extension__ ({\
+  (__m256i)__builtin_ia32_gatherdiv16si((__v8si)_mm256_undefined_ps(), \
+                                        (int const *)(addr), \
+                                        (__v8di)(__m512i)(index), \
+                                        (__mmask8)-1, (int)(scale)); })
+
+#define _mm512_mask_i64gather_epi32(v1_old, mask, index, addr, scale) __extension__ ({\
+  (__m256i)__builtin_ia32_gatherdiv16si((__v8si)(__m256i)(v1_old), \
+                                        (int const *)(addr), \
+                                        (__v8di)(__m512i)(index), \
+                                        (__mmask8)(mask), (int)(scale)); })
+
+#define _mm512_i64gather_pd(index, addr, scale) __extension__ ({\
+  (__m512d)__builtin_ia32_gatherdiv8df((__v8df)_mm512_undefined_pd(), \
+                                       (double const *)(addr), \
+                                       (__v8di)(__m512i)(index), (__mmask8)-1, \
+                                       (int)(scale)); })
+
+#define _mm512_mask_i64gather_pd(v1_old, mask, index, addr, scale) __extension__ ({\
+  (__m512d)__builtin_ia32_gatherdiv8df((__v8df)(__m512d)(v1_old), \
+                                       (double const *)(addr), \
+                                       (__v8di)(__m512i)(index), \
+                                       (__mmask8)(mask), (int)(scale)); })
+
+#define _mm512_i64gather_epi64(index, addr, scale) __extension__ ({\
+  (__m512i)__builtin_ia32_gatherdiv8di((__v8di)_mm512_undefined_pd(), \
+                                       (long long const *)(addr), \
+                                       (__v8di)(__m512i)(index), (__mmask8)-1, \
+                                       (int)(scale)); })
+
+#define _mm512_mask_i64gather_epi64(v1_old, mask, index, addr, scale) __extension__ ({\
+  (__m512i)__builtin_ia32_gatherdiv8di((__v8di)(__m512i)(v1_old), \
+                                       (long long const *)(addr), \
+                                       (__v8di)(__m512i)(index), \
+                                       (__mmask8)(mask), (int)(scale)); })
+
+#define _mm512_i32gather_ps(index, addr, scale) __extension__ ({\
+  (__m512)__builtin_ia32_gathersiv16sf((__v16sf)_mm512_undefined_ps(), \
+                                       (float const *)(addr), \
+                                       (__v16sf)(__m512)(index), \
+                                       (__mmask16)-1, (int)(scale)); })
+
+#define _mm512_mask_i32gather_ps(v1_old, mask, index, addr, scale) __extension__ ({\
+  (__m512)__builtin_ia32_gathersiv16sf((__v16sf)(__m512)(v1_old), \
+                                       (float const *)(addr), \
+                                       (__v16sf)(__m512)(index), \
+                                       (__mmask16)(mask), (int)(scale)); })
+
+#define _mm512_i32gather_epi32(index, addr, scale) __extension__ ({\
+  (__m512i)__builtin_ia32_gathersiv16si((__v16si)_mm512_undefined_epi32(), \
+                                        (int const *)(addr), \
+                                        (__v16si)(__m512i)(index), \
+                                        (__mmask16)-1, (int)(scale)); })
+
+#define _mm512_mask_i32gather_epi32(v1_old, mask, index, addr, scale) __extension__ ({\
+  (__m512i)__builtin_ia32_gathersiv16si((__v16si)(__m512i)(v1_old), \
+                                        (int const *)(addr), \
+                                        (__v16si)(__m512i)(index), \
+                                        (__mmask16)(mask), (int)(scale)); })
+
+#define _mm512_i32gather_pd(index, addr, scale) __extension__ ({\
+  (__m512d)__builtin_ia32_gathersiv8df((__v8df)_mm512_undefined_pd(), \
+                                       (double const *)(addr), \
+                                       (__v8si)(__m256i)(index), (__mmask8)-1, \
+                                       (int)(scale)); })
+
+#define _mm512_mask_i32gather_pd(v1_old, mask, index, addr, scale) __extension__ ({\
+  (__m512d)__builtin_ia32_gathersiv8df((__v8df)(__m512d)(v1_old), \
+                                       (double const *)(addr), \
+                                       (__v8si)(__m256i)(index), \
+                                       (__mmask8)(mask), (int)(scale)); })
+
+#define _mm512_i32gather_epi64(index, addr, scale) __extension__ ({\
+  (__m512i)__builtin_ia32_gathersiv8di((__v8di)_mm512_undefined_epi32(), \
+                                       (long long const *)(addr), \
+                                       (__v8si)(__m256i)(index), (__mmask8)-1, \
+                                       (int)(scale)); })
+
+#define _mm512_mask_i32gather_epi64(v1_old, mask, index, addr, scale) __extension__ ({\
+  (__m512i)__builtin_ia32_gathersiv8di((__v8di)(__m512i)(v1_old), \
+                                       (long long const *)(addr), \
+                                       (__v8si)(__m256i)(index), \
+                                       (__mmask8)(mask), (int)(scale)); })
+
+#define _mm512_i64scatter_ps(addr, index, v1, scale) __extension__ ({\
+  __builtin_ia32_scatterdiv16sf((float *)(addr), (__mmask8)-1, \
+                                (__v8di)(__m512i)(index), \
+                                (__v8sf)(__m256)(v1), (int)(scale)); })
+
+#define _mm512_mask_i64scatter_ps(addr, mask, index, v1, scale) __extension__ ({\
+  __builtin_ia32_scatterdiv16sf((float *)(addr), (__mmask8)(mask), \
+                                (__v8di)(__m512i)(index), \
+                                (__v8sf)(__m256)(v1), (int)(scale)); })
+
+#define _mm512_i64scatter_epi32(addr, index, v1, scale) __extension__ ({\
+  __builtin_ia32_scatterdiv16si((int *)(addr), (__mmask8)-1, \
+                                (__v8di)(__m512i)(index), \
+                                (__v8si)(__m256i)(v1), (int)(scale)); })
+
+#define _mm512_mask_i64scatter_epi32(addr, mask, index, v1, scale) __extension__ ({\
+  __builtin_ia32_scatterdiv16si((int *)(addr), (__mmask8)(mask), \
+                                (__v8di)(__m512i)(index), \
+                                (__v8si)(__m256i)(v1), (int)(scale)); })
+
+#define _mm512_i64scatter_pd(addr, index, v1, scale) __extension__ ({\
+  __builtin_ia32_scatterdiv8df((double *)(addr), (__mmask8)-1, \
+                               (__v8di)(__m512i)(index), \
+                               (__v8df)(__m512d)(v1), (int)(scale)); })
+
+#define _mm512_mask_i64scatter_pd(addr, mask, index, v1, scale) __extension__ ({\
+  __builtin_ia32_scatterdiv8df((double *)(addr), (__mmask8)(mask), \
+                               (__v8di)(__m512i)(index), \
+                               (__v8df)(__m512d)(v1), (int)(scale)); })
+
+#define _mm512_i64scatter_epi64(addr, index, v1, scale) __extension__ ({\
+  __builtin_ia32_scatterdiv8di((long long *)(addr), (__mmask8)-1, \
+                               (__v8di)(__m512i)(index), \
+                               (__v8di)(__m512i)(v1), (int)(scale)); })
+
+#define _mm512_mask_i64scatter_epi64(addr, mask, index, v1, scale) __extension__ ({\
+  __builtin_ia32_scatterdiv8di((long long *)(addr), (__mmask8)(mask), \
+                               (__v8di)(__m512i)(index), \
+                               (__v8di)(__m512i)(v1), (int)(scale)); })
+
+#define _mm512_i32scatter_ps(addr, index, v1, scale) __extension__ ({\
+  __builtin_ia32_scattersiv16sf((float *)(addr), (__mmask16)-1, \
+                                (__v16si)(__m512i)(index), \
+                                (__v16sf)(__m512)(v1), (int)(scale)); })
+
+#define _mm512_mask_i32scatter_ps(addr, mask, index, v1, scale) __extension__ ({\
+  __builtin_ia32_scattersiv16sf((float *)(addr), (__mmask16)(mask), \
+                                (__v16si)(__m512i)(index), \
+                                (__v16sf)(__m512)(v1), (int)(scale)); })
+
+#define _mm512_i32scatter_epi32(addr, index, v1, scale) __extension__ ({\
+  __builtin_ia32_scattersiv16si((int *)(addr), (__mmask16)-1, \
+                                (__v16si)(__m512i)(index), \
+                                (__v16si)(__m512i)(v1), (int)(scale)); })
+
+#define _mm512_mask_i32scatter_epi32(addr, mask, index, v1, scale) __extension__ ({\
+  __builtin_ia32_scattersiv16si((int *)(addr), (__mmask16)(mask), \
+                                (__v16si)(__m512i)(index), \
+                                (__v16si)(__m512i)(v1), (int)(scale)); })
+
+#define _mm512_i32scatter_pd(addr, index, v1, scale) __extension__ ({\
+  __builtin_ia32_scattersiv8df((double *)(addr), (__mmask8)-1, \
+                               (__v8si)(__m256i)(index), \
+                               (__v8df)(__m512d)(v1), (int)(scale)); })
+
+#define _mm512_mask_i32scatter_pd(addr, mask, index, v1, scale) __extension__ ({\
+  __builtin_ia32_scattersiv8df((double *)(addr), (__mmask8)(mask), \
+                               (__v8si)(__m256i)(index), \
+                               (__v8df)(__m512d)(v1), (int)(scale)); })
+
+#define _mm512_i32scatter_epi64(addr, index, v1, scale) __extension__ ({\
+  __builtin_ia32_scattersiv8di((long long *)(addr), (__mmask8)-1, \
+                               (__v8si)(__m256i)(index), \
+                               (__v8di)(__m512i)(v1), (int)(scale)); })
+
+#define _mm512_mask_i32scatter_epi64(addr, mask, index, v1, scale) __extension__ ({\
+  __builtin_ia32_scattersiv8di((long long *)(addr), (__mmask8)(mask), \
+                               (__v8si)(__m256i)(index), \
+                               (__v8di)(__m512i)(v1), (int)(scale)); })
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_mask_fmadd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __A,
+          (__v4sf) __B,
+          (__v4sf) __W,
+          (__mmask8) __U,
+          _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm_mask_fmadd_round_ss(W, U, A, B, R) __extension__({\
+  (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \
+                                        (__v4sf)(__m128)(B), \
+                                        (__v4sf)(__m128)(W), (__mmask8)(U), \
+                                        (int)(R)); })
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_maskz_fmadd_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
+{
+ return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __A,
+          (__v4sf) __B,
+          (__v4sf) __C,
+          (__mmask8) __U,
+          _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm_maskz_fmadd_round_ss(U, A, B, C, R) __extension__ ({\
+  (__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \
+                                         (__v4sf)(__m128)(B), \
+                                         (__v4sf)(__m128)(C), (__mmask8)(U), \
+                                         _MM_FROUND_CUR_DIRECTION); })
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_mask3_fmadd_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
+{
+ return (__m128) __builtin_ia32_vfmaddss3_mask3 ((__v4sf) __W,
+          (__v4sf) __X,
+          (__v4sf) __Y,
+          (__mmask8) __U,
+          _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm_mask3_fmadd_round_ss(W, X, Y, U, R) __extension__ ({\
+  (__m128)__builtin_ia32_vfmaddss3_mask3((__v4sf)(__m128)(W), \
+                                         (__v4sf)(__m128)(X), \
+                                         (__v4sf)(__m128)(Y), (__mmask8)(U), \
+                                         (int)(R)); })
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_mask_fmsub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __A,
+          -(__v4sf) __B,
+          (__v4sf) __W,
+          (__mmask8) __U,
+          _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm_mask_fmsub_round_ss(W, U, A, B, R) __extension__ ({\
+  (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \
+                                        -(__v4sf)(__m128)(B), \
+                                        (__v4sf)(__m128)(W), (__mmask8)(U), \
+                                        (int)(R)); })
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_maskz_fmsub_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
+{
+ return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __A,
+          (__v4sf) __B,
+          -(__v4sf) __C,
+          (__mmask8) __U,
+          _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm_maskz_fmsub_round_ss(U, A, B, C, R) __extension__ ({\
+  (__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \
+                                         (__v4sf)(__m128)(B), \
+                                         -(__v4sf)(__m128)(C), (__mmask8)(U), \
+                                         (int)(R)); })
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_mask3_fmsub_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
+{
+ return (__m128) __builtin_ia32_vfmaddss3_mask3 ((__v4sf) __W,
+          (__v4sf) __X,
+          -(__v4sf) __Y,
+          (__mmask8) __U,
+          _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm_mask3_fmsub_round_ss(W, X, Y, U, R) __extension__ ({\
+  (__m128)__builtin_ia32_vfmaddss3_mask3((__v4sf)(__m128)(W), \
+                                         (__v4sf)(__m128)(X), \
+                                         -(__v4sf)(__m128)(Y), (__mmask8)(U), \
+                                         (int)(R)); })
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_mask_fnmadd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_vfmaddss3_mask (-(__v4sf) __A,
+          (__v4sf) __B,
+          (__v4sf) __W,
+          (__mmask8) __U,
+          _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm_mask_fnmadd_round_ss(W, U, A, B, R) __extension__ ({\
+  (__m128)__builtin_ia32_vfmaddss3_mask(-(__v4sf)(__m128)(A), \
+                                        (__v4sf)(__m128)(B), \
+                                        (__v4sf)(__m128)(W), (__mmask8)(U), \
+                                        (int)(R)); })
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_maskz_fnmadd_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
+{
+ return (__m128) __builtin_ia32_vfmaddss3_maskz (-(__v4sf) __A,
+          (__v4sf) __B,
+          (__v4sf) __C,
+          (__mmask8) __U,
+          _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm_maskz_fnmadd_round_ss(U, A, B, C, R) __extension__ ({\
+  (__m128)__builtin_ia32_vfmaddss3_maskz(-(__v4sf)(__m128)(A), \
+                                         (__v4sf)(__m128)(B), \
+                                         (__v4sf)(__m128)(C), (__mmask8)(U), \
+                                         (int)(R)); })
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_mask3_fnmadd_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
+{
+ return (__m128) __builtin_ia32_vfmaddss3_mask3 (-(__v4sf) __W,
+          (__v4sf) __X,
+          (__v4sf) __Y,
+          (__mmask8) __U,
+          _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm_mask3_fnmadd_round_ss(W, X, Y, U, R) __extension__({\
+  (__m128)__builtin_ia32_vfmaddss3_mask3(-(__v4sf)(__m128)(W), \
+                                         (__v4sf)(__m128)(X), \
+                                         (__v4sf)(__m128)(Y), (__mmask8)(U), \
+                                         (int)(R)); })
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_mask_fnmsub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_vfmaddss3_mask (-(__v4sf) __A,
+          -(__v4sf) __B,
+          (__v4sf) __W,
+          (__mmask8) __U,
+          _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm_mask_fnmsub_round_ss(W, U, A, B, R) __extension__ ({\
+  (__m128)__builtin_ia32_vfmaddss3_mask(-(__v4sf)(__m128)(A), \
+                                        -(__v4sf)(__m128)(B), \
+                                        (__v4sf)(__m128)(W), (__mmask8)(U), \
+                                        (int)(R)); })
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_maskz_fnmsub_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
+{
+ return (__m128) __builtin_ia32_vfmaddss3_maskz (-(__v4sf) __A,
+          (__v4sf) __B,
+          -(__v4sf) __C,
+          (__mmask8) __U,
+          _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm_maskz_fnmsub_round_ss(U, A, B, C, R) __extension__ ({\
+  (__m128)__builtin_ia32_vfmaddss3_maskz(-(__v4sf)(__m128)(A), \
+                                         (__v4sf)(__m128)(B), \
+                                         -(__v4sf)(__m128)(C), (__mmask8)(U), \
+                                         _MM_FROUND_CUR_DIRECTION); })
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_mask3_fnmsub_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
+{
+ return (__m128) __builtin_ia32_vfmaddss3_mask3 (-(__v4sf) __W,
+          (__v4sf) __X,
+          -(__v4sf) __Y,
+          (__mmask8) __U,
+          _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm_mask3_fnmsub_round_ss(W, X, Y, U, R) __extension__({\
+  (__m128)__builtin_ia32_vfmaddss3_mask3(-(__v4sf)(__m128)(W), \
+                                         (__v4sf)(__m128)(X), \
+                                         -(__v4sf)(__m128)(Y), (__mmask8)(U), \
+                                         (int)(R)); })
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_mask_fmadd_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_vfmaddsd3_mask ( (__v2df) __A,
+          (__v2df) __B,
+          (__v2df) __W,
+          (__mmask8) __U,
+          _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm_mask_fmadd_round_sd(W, U, A, B, R) __extension__({\
+  (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \
+                                         (__v2df)(__m128d)(B), \
+                                         (__v2df)(__m128d)(W), (__mmask8)(U), \
+                                         (int)(R)); })
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_maskz_fmadd_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
+{
+ return (__m128d) __builtin_ia32_vfmaddsd3_maskz ( (__v2df) __A,
+          (__v2df) __B,
+          (__v2df) __C,
+          (__mmask8) __U,
+          _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm_maskz_fmadd_round_sd(U, A, B, C, R) __extension__ ({\
+  (__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \
+                                          (__v2df)(__m128d)(B), \
+                                          (__v2df)(__m128d)(C), (__mmask8)(U), \
+                                          _MM_FROUND_CUR_DIRECTION); })
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_mask3_fmadd_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
+{
+ return (__m128d) __builtin_ia32_vfmaddsd3_mask3 ((__v2df) __W,
+          (__v2df) __X,
+          (__v2df) __Y,
+          (__mmask8) __U,
+          _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm_mask3_fmadd_round_sd(W, X, Y, U, R) __extension__ ({\
+  (__m128d)__builtin_ia32_vfmaddsd3_mask3((__v2df)(__m128d)(W), \
+                                          (__v2df)(__m128d)(X), \
+                                          (__v2df)(__m128d)(Y), (__mmask8)(U), \
+                                          (int)(R)); })
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_mask_fmsub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_vfmaddsd3_mask ( (__v2df) __A,
+          -(__v2df) __B,
+          (__v2df) __W,
+          (__mmask8) __U,
+          _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm_mask_fmsub_round_sd(W, U, A, B, R) __extension__ ({\
+  (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \
+                                         -(__v2df)(__m128d)(B), \
+                                         (__v2df)(__m128d)(W), (__mmask8)(U), \
+                                         (int)(R)); })
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_maskz_fmsub_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
+{
+ return (__m128d) __builtin_ia32_vfmaddsd3_maskz ( (__v2df) __A,
+          (__v2df) __B,
+          -(__v2df) __C,
+          (__mmask8) __U,
+          _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm_maskz_fmsub_round_sd(U, A, B, C, R) __extension__ ({\
+  (__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \
+                                          (__v2df)(__m128d)(B), \
+                                          -(__v2df)(__m128d)(C), \
+                                          (__mmask8)(U), (int)(R)); })
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_mask3_fmsub_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
+{
+ return (__m128d) __builtin_ia32_vfmaddsd3_mask3 ((__v2df) __W,
+          (__v2df) __X,
+          -(__v2df) __Y,
+          (__mmask8) __U,
+          _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm_mask3_fmsub_round_sd(W, X, Y, U, R) __extension__ ({\
+  (__m128d)__builtin_ia32_vfmaddsd3_mask3((__v2df)(__m128d)(W), \
+                                          (__v2df)(__m128d)(X), \
+                                          -(__v2df)(__m128d)(Y), \
+                                          (__mmask8)(U), (int)(R)); })
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_mask_fnmadd_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_vfmaddsd3_mask ( -(__v2df) __A,
+          (__v2df) __B,
+          (__v2df) __W,
+          (__mmask8) __U,
+          _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm_mask_fnmadd_round_sd(W, U, A, B, R) __extension__ ({\
+  (__m128d)__builtin_ia32_vfmaddsd3_mask(-(__v2df)(__m128d)(A), \
+                                         (__v2df)(__m128d)(B), \
+                                         (__v2df)(__m128d)(W), (__mmask8)(U), \
+                                         (int)(R)); })
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_maskz_fnmadd_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
+{
+ return (__m128d) __builtin_ia32_vfmaddsd3_maskz ( -(__v2df) __A,
+          (__v2df) __B,
+          (__v2df) __C,
+          (__mmask8) __U,
+          _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm_maskz_fnmadd_round_sd(U, A, B, C, R) __extension__ ({\
+  (__m128d)__builtin_ia32_vfmaddsd3_maskz(-(__v2df)(__m128d)(A), \
+                                          (__v2df)(__m128d)(B), \
+                                          (__v2df)(__m128d)(C), (__mmask8)(U), \
+                                          (int)(R)); })
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_mask3_fnmadd_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
+{
+ return (__m128d) __builtin_ia32_vfmaddsd3_mask3 (-(__v2df) __W,
+          (__v2df) __X,
+          (__v2df) __Y,
+          (__mmask8) __U,
+          _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm_mask3_fnmadd_round_sd(W, X, Y, U, R) __extension__({\
+  (__m128d)__builtin_ia32_vfmaddsd3_mask3(-(__v2df)(__m128d)(W), \
+                                          (__v2df)(__m128d)(X), \
+                                          (__v2df)(__m128d)(Y), (__mmask8)(U), \
+                                          (int)(R)); })
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_mask_fnmsub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_vfmaddsd3_mask ( -(__v2df) __A,
+          -(__v2df) __B,
+          (__v2df) __W,
+          (__mmask8) __U,
+          _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm_mask_fnmsub_round_sd(W, U, A, B, R) __extension__ ({\
+  (__m128d)__builtin_ia32_vfmaddsd3_mask(-(__v2df)(__m128d)(A), \
+                                         -(__v2df)(__m128d)(B), \
+                                         (__v2df)(__m128d)(W), (__mmask8)(U), \
+                                         (int)(R)); })
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_maskz_fnmsub_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
+{
+ return (__m128d) __builtin_ia32_vfmaddsd3_maskz ( -(__v2df) __A,
+          (__v2df) __B,
+          -(__v2df) __C,
+          (__mmask8) __U,
+          _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm_maskz_fnmsub_round_sd(U, A, B, C, R) __extension__ ({\
+  (__m128d)__builtin_ia32_vfmaddsd3_maskz(-(__v2df)(__m128d)(A), \
+                                          (__v2df)(__m128d)(B), \
+                                          -(__v2df)(__m128d)(C), \
+                                          (__mmask8)(U), \
+                                          _MM_FROUND_CUR_DIRECTION); })
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_mask3_fnmsub_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
+{
+ return (__m128d) __builtin_ia32_vfmaddsd3_mask3 (-(__v2df) (__W),
+          (__v2df) __X,
+          -(__v2df) (__Y),
+          (__mmask8) __U,
+          _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm_mask3_fnmsub_round_sd(W, X, Y, U, R) __extension__({\
+  (__m128d)__builtin_ia32_vfmaddsd3_mask3(-(__v2df)(__m128d)(W), \
+                                          (__v2df)(__m128d)(X), \
+                                          -(__v2df)(__m128d)(Y), \
+                                          (__mmask8)(U), (int)(R)); })
+
+#define _mm512_permutex_pd(X, C) __extension__ ({ \
+  (__m512d)__builtin_shufflevector((__v8df)(__m512d)(X), \
+                                   (__v8df)_mm512_undefined_pd(), \
+                                   0 + (((C) >> 0) & 0x3), \
+                                   0 + (((C) >> 2) & 0x3), \
+                                   0 + (((C) >> 4) & 0x3), \
+                                   0 + (((C) >> 6) & 0x3), \
+                                   4 + (((C) >> 0) & 0x3), \
+                                   4 + (((C) >> 2) & 0x3), \
+                                   4 + (((C) >> 4) & 0x3), \
+                                   4 + (((C) >> 6) & 0x3)); })
+
+#define _mm512_mask_permutex_pd(W, U, X, C) __extension__ ({ \
+  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
+                                       (__v8df)_mm512_permutex_pd((X), (C)), \
+                                       (__v8df)(__m512d)(W)); })
+
+#define _mm512_maskz_permutex_pd(U, X, C) __extension__ ({ \
+  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
+                                       (__v8df)_mm512_permutex_pd((X), (C)), \
+                                       (__v8df)_mm512_setzero_pd()); })
+
+#define _mm512_permutex_epi64(X, C) __extension__ ({ \
+  (__m512i)__builtin_shufflevector((__v8di)(__m512i)(X), \
+                                   (__v8di)_mm512_undefined_epi32(), \
+                                   0 + (((C) >> 0) & 0x3), \
+                                   0 + (((C) >> 2) & 0x3), \
+                                   0 + (((C) >> 4) & 0x3), \
+                                   0 + (((C) >> 6) & 0x3), \
+                                   4 + (((C) >> 0) & 0x3), \
+                                   4 + (((C) >> 2) & 0x3), \
+                                   4 + (((C) >> 4) & 0x3), \
+                                   4 + (((C) >> 6) & 0x3)); })
+
+#define _mm512_mask_permutex_epi64(W, U, X, C) __extension__ ({ \
+  (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
+                                      (__v8di)_mm512_permutex_epi64((X), (C)), \
+                                      (__v8di)(__m512i)(W)); })
+
+#define _mm512_maskz_permutex_epi64(U, X, C) __extension__ ({ \
+  (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
+                                      (__v8di)_mm512_permutex_epi64((X), (C)), \
+                                      (__v8di)_mm512_setzero_si512()); })
+
+static __inline__ __m512d __DEFAULT_FN_ATTRS
+_mm512_permutexvar_pd (__m512i __X, __m512d __Y)
+{
+  return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
+                 (__v8di) __X,
+                 (__v8df) _mm512_undefined_pd (),
+                 (__mmask8) -1);
+}
+
+static __inline__ __m512d __DEFAULT_FN_ATTRS
+_mm512_mask_permutexvar_pd (__m512d __W, __mmask8 __U, __m512i __X, __m512d __Y)
+{
+  return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
+                 (__v8di) __X,
+                 (__v8df) __W,
+                 (__mmask8) __U);
+}
+
+static __inline__ __m512d __DEFAULT_FN_ATTRS
+_mm512_maskz_permutexvar_pd (__mmask8 __U, __m512i __X, __m512d __Y)
+{
+  return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
+                 (__v8di) __X,
+                 (__v8df) _mm512_setzero_pd (),
+                 (__mmask8) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_permutexvar_epi64 (__mmask8 __M, __m512i __X, __m512i __Y)
+{
+  return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y,
+                 (__v8di) __X,
+                 (__v8di) _mm512_setzero_si512 (),
+                 __M);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_permutexvar_epi64 (__m512i __X, __m512i __Y)
+{
+  return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y,
+                 (__v8di) __X,
+                 (__v8di) _mm512_undefined_epi32 (),
+                 (__mmask8) -1);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_permutexvar_epi64 (__m512i __W, __mmask8 __M, __m512i __X,
+             __m512i __Y)
+{
+  return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y,
+                 (__v8di) __X,
+                 (__v8di) __W,
+                 __M);
+}
+
+static __inline__ __m512 __DEFAULT_FN_ATTRS
+_mm512_permutexvar_ps (__m512i __X, __m512 __Y)
+{
+  return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
+                (__v16si) __X,
+                (__v16sf) _mm512_undefined_ps (),
+                (__mmask16) -1);
+}
+
+static __inline__ __m512 __DEFAULT_FN_ATTRS
+_mm512_mask_permutexvar_ps (__m512 __W, __mmask16 __U, __m512i __X, __m512 __Y)
+{
+  return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
+                (__v16si) __X,
+                (__v16sf) __W,
+                (__mmask16) __U);
+}
+
+static __inline__ __m512 __DEFAULT_FN_ATTRS
+_mm512_maskz_permutexvar_ps (__mmask16 __U, __m512i __X, __m512 __Y)
+{
+  return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
+                (__v16si) __X,
+                (__v16sf) _mm512_setzero_ps (),
+                (__mmask16) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_permutexvar_epi32 (__mmask16 __M, __m512i __X, __m512i __Y)
+{
+  return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y,
+                 (__v16si) __X,
+                 (__v16si) _mm512_setzero_si512 (),
+                 __M);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_permutexvar_epi32 (__m512i __X, __m512i __Y)
+{
+  return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y,
+                 (__v16si) __X,
+                 (__v16si) _mm512_undefined_epi32 (),
+                 (__mmask16) -1);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_permutexvar_epi32 (__m512i __W, __mmask16 __M, __m512i __X,
+             __m512i __Y)
+{
+  return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y,
+                 (__v16si) __X,
+                 (__v16si) __W,
+                 __M);
+}
+
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS
+_mm512_kand (__mmask16 __A, __mmask16 __B)
+{
+  return (__mmask16) __builtin_ia32_kandhi ((__mmask16) __A, (__mmask16) __B);
+}
+
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS
+_mm512_kandn (__mmask16 __A, __mmask16 __B)
+{
+  return (__mmask16) __builtin_ia32_kandnhi ((__mmask16) __A, (__mmask16) __B);
+}
+
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS
+_mm512_kor (__mmask16 __A, __mmask16 __B)
+{
+  return (__mmask16) __builtin_ia32_korhi ((__mmask16) __A, (__mmask16) __B);
+}
+
+static __inline__ int __DEFAULT_FN_ATTRS
+_mm512_kortestc (__mmask16 __A, __mmask16 __B)
+{
+  return __builtin_ia32_kortestchi ((__mmask16) __A, (__mmask16) __B);
+}
+
+static __inline__ int __DEFAULT_FN_ATTRS
+_mm512_kortestz (__mmask16 __A, __mmask16 __B)
+{
+  return __builtin_ia32_kortestzhi ((__mmask16) __A, (__mmask16) __B);
+}
+
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS
+_mm512_kunpackb (__mmask16 __A, __mmask16 __B)
+{
+  return (__mmask16) __builtin_ia32_kunpckhi ((__mmask16) __A, (__mmask16) __B);
+}
+
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS
+_mm512_kxnor (__mmask16 __A, __mmask16 __B)
+{
+  return (__mmask16) __builtin_ia32_kxnorhi ((__mmask16) __A, (__mmask16) __B);
+}
+
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS
+_mm512_kxor (__mmask16 __A, __mmask16 __B)
+{
+  return (__mmask16) __builtin_ia32_kxorhi ((__mmask16) __A, (__mmask16) __B);
+}
+
+static __inline__ void __DEFAULT_FN_ATTRS
+_mm512_stream_si512 (__m512i * __P, __m512i __A)
+{
+  __builtin_nontemporal_store((__v8di)__A, (__v8di*)__P);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_stream_load_si512 (void *__P)
+{
+  return __builtin_ia32_movntdqa512 ((__v8di *)__P);
+}
+
+static __inline__ void __DEFAULT_FN_ATTRS
+_mm512_stream_pd (double *__P, __m512d __A)
+{
+  __builtin_nontemporal_store((__v8df)__A, (__v8df*)__P);
+}
+
+static __inline__ void __DEFAULT_FN_ATTRS
+_mm512_stream_ps (float *__P, __m512 __A)
+{
+  __builtin_nontemporal_store((__v16sf)__A, (__v16sf*)__P);
+}
+
+static __inline__ __m512d __DEFAULT_FN_ATTRS
+_mm512_mask_compress_pd (__m512d __W, __mmask8 __U, __m512d __A)
+{
+  return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
+                  (__v8df) __W,
+                  (__mmask8) __U);
+}
+
+static __inline__ __m512d __DEFAULT_FN_ATTRS
+_mm512_maskz_compress_pd (__mmask8 __U, __m512d __A)
+{
+  return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
+                  (__v8df)
+                  _mm512_setzero_pd (),
+                  (__mmask8) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_compress_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
+{
+  return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
+                  (__v8di) __W,
+                  (__mmask8) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_compress_epi64 (__mmask8 __U, __m512i __A)
+{
+  return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
+                  (__v8di)
+                  _mm512_setzero_si512 (),
+                  (__mmask8) __U);
+}
+
+static __inline__ __m512 __DEFAULT_FN_ATTRS
+_mm512_mask_compress_ps (__m512 __W, __mmask16 __U, __m512 __A)
+{
+  return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
+                 (__v16sf) __W,
+                 (__mmask16) __U);
+}
+
+static __inline__ __m512 __DEFAULT_FN_ATTRS
+_mm512_maskz_compress_ps (__mmask16 __U, __m512 __A)
+{
+  return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
+                 (__v16sf)
+                 _mm512_setzero_ps (),
+                 (__mmask16) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_compress_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
+{
+  return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
+                  (__v16si) __W,
+                  (__mmask16) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_compress_epi32 (__mmask16 __U, __m512i __A)
+{
+  return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
+                  (__v16si)
+                  _mm512_setzero_si512 (),
+                  (__mmask16) __U);
+}
+
+#define _mm_cmp_round_ss_mask(X, Y, P, R) __extension__ ({ \
+  (__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
+                                      (__v4sf)(__m128)(Y), (int)(P), \
+                                      (__mmask8)-1, (int)(R)); })
+
+#define _mm_mask_cmp_round_ss_mask(M, X, Y, P, R) __extension__ ({ \
+  (__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
+                                      (__v4sf)(__m128)(Y), (int)(P), \
+                                      (__mmask8)(M), (int)(R)); })
+
+#define _mm_cmp_ss_mask(X, Y, P) __extension__ ({ \
+  (__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
+                                      (__v4sf)(__m128)(Y), (int)(P), \
+                                      (__mmask8)-1, \
+                                      _MM_FROUND_CUR_DIRECTION); })
+
+#define _mm_mask_cmp_ss_mask(M, X, Y, P) __extension__ ({ \
+  (__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
+                                      (__v4sf)(__m128)(Y), (int)(P), \
+                                      (__mmask8)(M), \
+                                      _MM_FROUND_CUR_DIRECTION); })
+
+#define _mm_cmp_round_sd_mask(X, Y, P, R) __extension__ ({ \
+  (__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
+                                      (__v2df)(__m128d)(Y), (int)(P), \
+                                      (__mmask8)-1, (int)(R)); })
+
+#define _mm_mask_cmp_round_sd_mask(M, X, Y, P, R) __extension__ ({ \
+  (__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
+                                      (__v2df)(__m128d)(Y), (int)(P), \
+                                      (__mmask8)(M), (int)(R)); })
+
+#define _mm_cmp_sd_mask(X, Y, P) __extension__ ({ \
+  (__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
+                                      (__v2df)(__m128d)(Y), (int)(P), \
+                                      (__mmask8)-1, \
+                                      _MM_FROUND_CUR_DIRECTION); })
+
+#define _mm_mask_cmp_sd_mask(M, X, Y, P) __extension__ ({ \
+  (__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
+                                      (__v2df)(__m128d)(Y), (int)(P), \
+                                      (__mmask8)(M), \
+                                      _MM_FROUND_CUR_DIRECTION); })
+
+static __inline__ __m512 __DEFAULT_FN_ATTRS
+_mm512_movehdup_ps (__m512 __A)
+{
+  return (__m512)__builtin_shufflevector((__v16sf)__A, (__v16sf)__A,
+                         1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15);
+}
+
+static __inline__ __m512 __DEFAULT_FN_ATTRS
+_mm512_mask_movehdup_ps (__m512 __W, __mmask16 __U, __m512 __A)
+{
+  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
+                                             (__v16sf)_mm512_movehdup_ps(__A),
+                                             (__v16sf)__W);
+}
+
+static __inline__ __m512 __DEFAULT_FN_ATTRS
+_mm512_maskz_movehdup_ps (__mmask16 __U, __m512 __A)
+{
+  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
+                                             (__v16sf)_mm512_movehdup_ps(__A),
+                                             (__v16sf)_mm512_setzero_ps());
+}
+
+static __inline__ __m512 __DEFAULT_FN_ATTRS
+_mm512_moveldup_ps (__m512 __A)
+{
+  return (__m512)__builtin_shufflevector((__v16sf)__A, (__v16sf)__A,
+                         0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14);
+}
+
+static __inline__ __m512 __DEFAULT_FN_ATTRS
+_mm512_mask_moveldup_ps (__m512 __W, __mmask16 __U, __m512 __A)
+{
+  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
+                                             (__v16sf)_mm512_moveldup_ps(__A),
+                                             (__v16sf)__W);
+}
+
+static __inline__ __m512 __DEFAULT_FN_ATTRS
+_mm512_maskz_moveldup_ps (__mmask16 __U, __m512 __A)
+{
+  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
+                                             (__v16sf)_mm512_moveldup_ps(__A),
+                                             (__v16sf)_mm512_setzero_ps());
+}
+
+#define _mm512_shuffle_epi32(A, I) __extension__ ({ \
+  (__m512i)__builtin_shufflevector((__v16si)(__m512i)(A), \
+                                   (__v16si)_mm512_undefined_epi32(), \
+                                   0  + (((I) >> 0) & 0x3), \
+                                   0  + (((I) >> 2) & 0x3), \
+                                   0  + (((I) >> 4) & 0x3), \
+                                   0  + (((I) >> 6) & 0x3), \
+                                   4  + (((I) >> 0) & 0x3), \
+                                   4  + (((I) >> 2) & 0x3), \
+                                   4  + (((I) >> 4) & 0x3), \
+                                   4  + (((I) >> 6) & 0x3), \
+                                   8  + (((I) >> 0) & 0x3), \
+                                   8  + (((I) >> 2) & 0x3), \
+                                   8  + (((I) >> 4) & 0x3), \
+                                   8  + (((I) >> 6) & 0x3), \
+                                   12 + (((I) >> 0) & 0x3), \
+                                   12 + (((I) >> 2) & 0x3), \
+                                   12 + (((I) >> 4) & 0x3), \
+                                   12 + (((I) >> 6) & 0x3)); })
+
+#define _mm512_mask_shuffle_epi32(W, U, A, I) __extension__ ({ \
+  (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
+                                      (__v16si)_mm512_shuffle_epi32((A), (I)), \
+                                      (__v16si)(__m512i)(W)); })
+
+#define _mm512_maskz_shuffle_epi32(U, A, I) __extension__ ({ \
+  (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
+                                      (__v16si)_mm512_shuffle_epi32((A), (I)), \
+                                      (__v16si)_mm512_setzero_si512()); })
+
+static __inline__ __m512d __DEFAULT_FN_ATTRS
+_mm512_mask_expand_pd (__m512d __W, __mmask8 __U, __m512d __A)
+{
+  return (__m512d) __builtin_ia32_expanddf512_mask ((__v8df) __A,
+                (__v8df) __W,
+                (__mmask8) __U);
+}
+
+static __inline__ __m512d __DEFAULT_FN_ATTRS
+_mm512_maskz_expand_pd (__mmask8 __U, __m512d __A)
+{
+  return (__m512d) __builtin_ia32_expanddf512_mask ((__v8df) __A,
+                (__v8df) _mm512_setzero_pd (),
+                (__mmask8) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_expand_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
+{
+  return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A,
+                (__v8di) __W,
+                (__mmask8) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_expand_epi64 ( __mmask8 __U, __m512i __A)
+{
+  return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A,
+                (__v8di) _mm512_setzero_pd (),
+                (__mmask8) __U);
+}
+
+static __inline__ __m512d __DEFAULT_FN_ATTRS
+_mm512_mask_expandloadu_pd(__m512d __W, __mmask8 __U, void const *__P)
+{
+  return (__m512d) __builtin_ia32_expandloaddf512_mask ((const __v8df *)__P,
+              (__v8df) __W,
+              (__mmask8) __U);
+}
+
+static __inline__ __m512d __DEFAULT_FN_ATTRS
+_mm512_maskz_expandloadu_pd(__mmask8 __U, void const *__P)
+{
+  return (__m512d) __builtin_ia32_expandloaddf512_mask ((const __v8df *)__P,
+              (__v8df) _mm512_setzero_pd(),
+              (__mmask8) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_expandloadu_epi64(__m512i __W, __mmask8 __U, void const *__P)
+{
+  return (__m512i) __builtin_ia32_expandloaddi512_mask ((const __v8di *)__P,
+              (__v8di) __W,
+              (__mmask8) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_expandloadu_epi64(__mmask8 __U, void const *__P)
+{
+  return (__m512i) __builtin_ia32_expandloaddi512_mask ((const __v8di *)__P,
+              (__v8di) _mm512_setzero_pd(),
+              (__mmask8) __U);
+}
+
+static __inline__ __m512 __DEFAULT_FN_ATTRS
+_mm512_mask_expandloadu_ps(__m512 __W, __mmask16 __U, void const *__P)
+{
+  return (__m512) __builtin_ia32_expandloadsf512_mask ((const __v16sf *)__P,
+                   (__v16sf) __W,
+                   (__mmask16) __U);
+}
+
+static __inline__ __m512 __DEFAULT_FN_ATTRS
+_mm512_maskz_expandloadu_ps(__mmask16 __U, void const *__P)
+{
+  return (__m512) __builtin_ia32_expandloadsf512_mask ((const __v16sf *)__P,
+                   (__v16sf) _mm512_setzero_ps(),
+                   (__mmask16) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_expandloadu_epi32(__m512i __W, __mmask16 __U, void const *__P)
+{
+  return (__m512i) __builtin_ia32_expandloadsi512_mask ((const __v16si *)__P,
+              (__v16si) __W,
+              (__mmask16) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_expandloadu_epi32(__mmask16 __U, void const *__P)
+{
+  return (__m512i) __builtin_ia32_expandloadsi512_mask ((const __v16si *)__P,
+              (__v16si) _mm512_setzero_ps(),
+              (__mmask16) __U);
+}
+
+static __inline__ __m512 __DEFAULT_FN_ATTRS
+_mm512_mask_expand_ps (__m512 __W, __mmask16 __U, __m512 __A)
+{
+  return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A,
+               (__v16sf) __W,
+               (__mmask16) __U);
+}
+
+static __inline__ __m512 __DEFAULT_FN_ATTRS
+_mm512_maskz_expand_ps (__mmask16 __U, __m512 __A)
+{
+  return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A,
+               (__v16sf) _mm512_setzero_ps(),
+               (__mmask16) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_expand_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
+{
+  return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A,
+                (__v16si) __W,
+                (__mmask16) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_expand_epi32 (__mmask16 __U, __m512i __A)
+{
+  return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A,
+                (__v16si) _mm512_setzero_ps(),
+                (__mmask16) __U);
+}
+
+#define _mm512_cvt_roundps_pd(A, R) __extension__ ({ \
+  (__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \
+                                           (__v8df)_mm512_undefined_pd(), \
+                                           (__mmask8)-1, (int)(R)); })
+
+#define _mm512_mask_cvt_roundps_pd(W, U, A, R) __extension__ ({ \
+  (__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \
+                                           (__v8df)(__m512d)(W), \
+                                           (__mmask8)(U), (int)(R)); })
+
+#define _mm512_maskz_cvt_roundps_pd(U, A, R) __extension__ ({ \
+  (__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \
+                                           (__v8df)_mm512_setzero_pd(), \
+                                           (__mmask8)(U), (int)(R)); })
+
+static __inline__ __m512d __DEFAULT_FN_ATTRS
+_mm512_cvtps_pd (__m256 __A)
+{
+  return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
+                (__v8df)
+                _mm512_undefined_pd (),
+                (__mmask8) -1,
+                _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512d __DEFAULT_FN_ATTRS
+_mm512_mask_cvtps_pd (__m512d __W, __mmask8 __U, __m256 __A)
+{
+  return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
+                (__v8df) __W,
+                (__mmask8) __U,
+                _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512d __DEFAULT_FN_ATTRS
+_mm512_maskz_cvtps_pd (__mmask8 __U, __m256 __A)
+{
+  return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
+                (__v8df)
+                _mm512_setzero_pd (),
+                (__mmask8) __U,
+                _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512d __DEFAULT_FN_ATTRS
+_mm512_mask_mov_pd (__m512d __W, __mmask8 __U, __m512d __A)
+{
+  return (__m512d) __builtin_ia32_selectpd_512 ((__mmask8) __U,
+              (__v8df) __A,
+              (__v8df) __W);
+}
+
+static __inline__ __m512d __DEFAULT_FN_ATTRS
+_mm512_maskz_mov_pd (__mmask8 __U, __m512d __A)
+{
+  return (__m512d) __builtin_ia32_selectpd_512 ((__mmask8) __U,
+              (__v8df) __A,
+              (__v8df) _mm512_setzero_pd ());
+}
+
+static __inline__ __m512 __DEFAULT_FN_ATTRS
+_mm512_mask_mov_ps (__m512 __W, __mmask16 __U, __m512 __A)
+{
+  return (__m512) __builtin_ia32_selectps_512 ((__mmask16) __U,
+             (__v16sf) __A,
+             (__v16sf) __W);
+}
+
+static __inline__ __m512 __DEFAULT_FN_ATTRS
+_mm512_maskz_mov_ps (__mmask16 __U, __m512 __A)
+{
+  return (__m512) __builtin_ia32_selectps_512 ((__mmask16) __U,
+             (__v16sf) __A,
+             (__v16sf) _mm512_setzero_ps ());
+}
+
+static __inline__ void __DEFAULT_FN_ATTRS
+_mm512_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m512d __A)
+{
+  __builtin_ia32_compressstoredf512_mask ((__v8df *) __P, (__v8df) __A,
+            (__mmask8) __U);
+}
+
+static __inline__ void __DEFAULT_FN_ATTRS
+_mm512_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m512i __A)
+{
+  __builtin_ia32_compressstoredi512_mask ((__v8di *) __P, (__v8di) __A,
+            (__mmask8) __U);
+}
+
+static __inline__ void __DEFAULT_FN_ATTRS
+_mm512_mask_compressstoreu_ps (void *__P, __mmask16 __U, __m512 __A)
+{
+  __builtin_ia32_compressstoresf512_mask ((__v16sf *) __P, (__v16sf) __A,
+            (__mmask16) __U);
+}
+
+static __inline__ void __DEFAULT_FN_ATTRS
+_mm512_mask_compressstoreu_epi32 (void *__P, __mmask16 __U, __m512i __A)
+{
+  __builtin_ia32_compressstoresi512_mask ((__v16si *) __P, (__v16si) __A,
+            (__mmask16) __U);
+}
+
+#define _mm_cvt_roundsd_ss(A, B, R) __extension__ ({ \
+  (__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \
+                                             (__v2df)(__m128d)(B), \
+                                             (__v4sf)_mm_undefined_ps(), \
+                                             (__mmask8)-1, (int)(R)); })
+
+#define _mm_mask_cvt_roundsd_ss(W, U, A, B, R) __extension__ ({ \
+  (__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \
+                                             (__v2df)(__m128d)(B), \
+                                             (__v4sf)(__m128)(W), \
+                                             (__mmask8)(U), (int)(R)); })
+
+#define _mm_maskz_cvt_roundsd_ss(U, A, B, R) __extension__ ({ \
+  (__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \
+                                             (__v2df)(__m128d)(B), \
+                                             (__v4sf)_mm_setzero_ps(), \
+                                             (__mmask8)(U), (int)(R)); })
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_mask_cvtsd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128d __B)
+{
+  return __builtin_ia32_cvtsd2ss_round_mask ((__v4sf)(__A),
+                                             (__v2df)(__B),
+                                             (__v4sf)(__W), 
+                                             (__mmask8)(__U), _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_maskz_cvtsd_ss (__mmask8 __U, __m128 __A, __m128d __B)
+{
+  return __builtin_ia32_cvtsd2ss_round_mask ((__v4sf)(__A),
+                                             (__v2df)(__B),
+                                             (__v4sf)_mm_setzero_ps(), 
+                                             (__mmask8)(__U), _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm_cvtss_i32 _mm_cvtss_si32
+#define _mm_cvtss_i64 _mm_cvtss_si64
+#define _mm_cvtsd_i32 _mm_cvtsd_si32
+#define _mm_cvtsd_i64 _mm_cvtsd_si64
+#define _mm_cvti32_sd _mm_cvtsi32_sd
+#define _mm_cvti64_sd _mm_cvtsi64_sd
+#define _mm_cvti32_ss _mm_cvtsi32_ss
+#define _mm_cvti64_ss _mm_cvtsi64_ss
+
+#define _mm_cvt_roundi64_sd(A, B, R) __extension__ ({ \
+  (__m128d)__builtin_ia32_cvtsi2sd64((__v2df)(__m128d)(A), (long long)(B), \
+                                     (int)(R)); })
+
+#define _mm_cvt_roundsi64_sd(A, B, R) __extension__ ({ \
+  (__m128d)__builtin_ia32_cvtsi2sd64((__v2df)(__m128d)(A), (long long)(B), \
+                                     (int)(R)); })
+
+#define _mm_cvt_roundsi32_ss(A, B, R) __extension__ ({ \
+  (__m128)__builtin_ia32_cvtsi2ss32((__v4sf)(__m128)(A), (int)(B), (int)(R)); })
+
+#define _mm_cvt_roundi32_ss(A, B, R) __extension__ ({ \
+  (__m128)__builtin_ia32_cvtsi2ss32((__v4sf)(__m128)(A), (int)(B), (int)(R)); })
+
+#define _mm_cvt_roundsi64_ss(A, B, R) __extension__ ({ \
+  (__m128)__builtin_ia32_cvtsi2ss64((__v4sf)(__m128)(A), (long long)(B), \
+                                    (int)(R)); })
+
+#define _mm_cvt_roundi64_ss(A, B, R) __extension__ ({ \
+  (__m128)__builtin_ia32_cvtsi2ss64((__v4sf)(__m128)(A), (long long)(B), \
+                                    (int)(R)); })
+
+#define _mm_cvt_roundss_sd(A, B, R) __extension__ ({ \
+  (__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \
+                                              (__v4sf)(__m128)(B), \
+                                              (__v2df)_mm_undefined_pd(), \
+                                              (__mmask8)-1, (int)(R)); })
+
+#define _mm_mask_cvt_roundss_sd(W, U, A, B, R) __extension__ ({ \
+  (__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \
+                                              (__v4sf)(__m128)(B), \
+                                              (__v2df)(__m128d)(W), \
+                                              (__mmask8)(U), (int)(R)); })
+
+#define _mm_maskz_cvt_roundss_sd(U, A, B, R) __extension__ ({ \
+  (__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \
+                                              (__v4sf)(__m128)(B), \
+                                              (__v2df)_mm_setzero_pd(), \
+                                              (__mmask8)(U), (int)(R)); })
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_mask_cvtss_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128 __B)
+{
+  return __builtin_ia32_cvtss2sd_round_mask((__v2df)(__A),
+                                              (__v4sf)(__B),
+                                              (__v2df)(__W),
+                                              (__mmask8)(__U), _MM_FROUND_CUR_DIRECTION); 
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_maskz_cvtss_sd (__mmask8 __U, __m128d __A, __m128 __B)
+{
+  return __builtin_ia32_cvtss2sd_round_mask((__v2df)(__A),
+                                              (__v4sf)(__B),
+                                              (__v2df)_mm_setzero_pd(), 
+                                              (__mmask8)(__U), _MM_FROUND_CUR_DIRECTION); 
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_cvtu32_sd (__m128d __A, unsigned __B)
+{
+  return (__m128d) __builtin_ia32_cvtusi2sd32 ((__v2df) __A, __B);
+}
+
+#define _mm_cvt_roundu64_sd(A, B, R) __extension__ ({ \
+  (__m128d)__builtin_ia32_cvtusi2sd64((__v2df)(__m128d)(A), \
+                                      (unsigned long long)(B), (int)(R)); })
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_cvtu64_sd (__m128d __A, unsigned long long __B)
+{
+  return (__m128d) __builtin_ia32_cvtusi2sd64 ((__v2df) __A, __B,
+                 _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm_cvt_roundu32_ss(A, B, R) __extension__ ({ \
+  (__m128)__builtin_ia32_cvtusi2ss32((__v4sf)(__m128)(A), (unsigned int)(B), \
+                                     (int)(R)); })
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_cvtu32_ss (__m128 __A, unsigned __B)
+{
+  return (__m128) __builtin_ia32_cvtusi2ss32 ((__v4sf) __A, __B,
+                _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm_cvt_roundu64_ss(A, B, R) __extension__ ({ \
+  (__m128)__builtin_ia32_cvtusi2ss64((__v4sf)(__m128)(A), \
+                                     (unsigned long long)(B), (int)(R)); })
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_cvtu64_ss (__m128 __A, unsigned long long __B)
+{
+  return (__m128) __builtin_ia32_cvtusi2ss64 ((__v4sf) __A, __B,
+                _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_set1_epi32 (__m512i __O, __mmask16 __M, int __A)
+{
+  return (__m512i) __builtin_ia32_pbroadcastd512_gpr_mask (__A, (__v16si) __O,
+                 __M);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_set1_epi64 (__m512i __O, __mmask8 __M, long long __A)
+{
+  return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A, (__v8di) __O,
+                 __M);
+}
+
+static __inline __m512i __DEFAULT_FN_ATTRS
+_mm512_set_epi32 (int __A, int __B, int __C, int __D,
+     int __E, int __F, int __G, int __H,
+     int __I, int __J, int __K, int __L,
+     int __M, int __N, int __O, int __P)
+{
+  return __extension__ (__m512i)(__v16si)
+  { __P, __O, __N, __M, __L, __K, __J, __I,
+    __H, __G, __F, __E, __D, __C, __B, __A };
+}
+
+#define _mm512_setr_epi32(e0,e1,e2,e3,e4,e5,e6,e7,           \
+       e8,e9,e10,e11,e12,e13,e14,e15)          \
+  _mm512_set_epi32((e15),(e14),(e13),(e12),(e11),(e10),(e9),(e8),(e7),(e6), \
+                   (e5),(e4),(e3),(e2),(e1),(e0))
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_set_epi64 (long long __A, long long __B, long long __C,
+     long long __D, long long __E, long long __F,
+     long long __G, long long __H)
+{
+  return __extension__ (__m512i) (__v8di)
+  { __H, __G, __F, __E, __D, __C, __B, __A };
+}
+
+#define _mm512_setr_epi64(e0,e1,e2,e3,e4,e5,e6,e7)           \
+  _mm512_set_epi64((e7),(e6),(e5),(e4),(e3),(e2),(e1),(e0))
+
+static __inline__ __m512d __DEFAULT_FN_ATTRS
+_mm512_set_pd (double __A, double __B, double __C, double __D,
+        double __E, double __F, double __G, double __H)
+{
+  return __extension__ (__m512d)
+  { __H, __G, __F, __E, __D, __C, __B, __A };
+}
+
+#define _mm512_setr_pd(e0,e1,e2,e3,e4,e5,e6,e7)              \
+  _mm512_set_pd((e7),(e6),(e5),(e4),(e3),(e2),(e1),(e0))
+
+static __inline__ __m512 __DEFAULT_FN_ATTRS
+_mm512_set_ps (float __A, float __B, float __C, float __D,
+        float __E, float __F, float __G, float __H,
+        float __I, float __J, float __K, float __L,
+        float __M, float __N, float __O, float __P)
+{
+  return __extension__ (__m512)
+  { __P, __O, __N, __M, __L, __K, __J, __I,
+    __H, __G, __F, __E, __D, __C, __B, __A };
+}
+
+#define _mm512_setr_ps(e0,e1,e2,e3,e4,e5,e6,e7,e8,e9,e10,e11,e12,e13,e14,e15) \
+  _mm512_set_ps((e15),(e14),(e13),(e12),(e11),(e10),(e9),(e8),(e7),(e6),(e5), \
+                (e4),(e3),(e2),(e1),(e0))
+
+static __inline__ __m512 __DEFAULT_FN_ATTRS
+_mm512_abs_ps(__m512 A)
+{
+  return (__m512)_mm512_and_epi32(_mm512_set1_epi32(0x7FFFFFFF),(__m512i)A) ;
+}
+
+static __inline__ __m512 __DEFAULT_FN_ATTRS
+_mm512_mask_abs_ps(__m512 W, __mmask16 K, __m512 A)
+{
+  return (__m512)_mm512_mask_and_epi32((__m512i)W, K, _mm512_set1_epi32(0x7FFFFFFF),(__m512i)A) ;
+}
+
+static __inline__ __m512d __DEFAULT_FN_ATTRS
+_mm512_abs_pd(__m512d A)
+{
+  return (__m512d)_mm512_and_epi64(_mm512_set1_epi64(0x7FFFFFFFFFFFFFFF),(__v8di)A) ;
+}
+
+static __inline__ __m512d __DEFAULT_FN_ATTRS
+_mm512_mask_abs_pd(__m512d W, __mmask8 K, __m512d A)
+{
+  return (__m512d)_mm512_mask_and_epi64((__v8di)W, K, _mm512_set1_epi64(0x7FFFFFFFFFFFFFFF),(__v8di)A);
+}
+
 #undef __DEFAULT_FN_ATTRS
 
 #endif // __AVX512FINTRIN_H
diff --git a/contrib/llvm/tools/clang/lib/Headers/avx512ifmaintrin.h b/contrib/llvm/tools/clang/lib/Headers/avx512ifmaintrin.h
new file mode 100644
index 000000000000..5defbaea8bcc
--- /dev/null
+++ b/contrib/llvm/tools/clang/lib/Headers/avx512ifmaintrin.h
@@ -0,0 +1,92 @@
+/*===------------- avx512ifmaintrin.h - IFMA intrinsics ------------------===
+ *
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ *
+ *===-----------------------------------------------------------------------===
+ */
+#ifndef __IMMINTRIN_H
+#error "Never use <avx512ifmaintrin.h> directly; include <immintrin.h> instead."
+#endif
+
+#ifndef __IFMAINTRIN_H
+#define __IFMAINTRIN_H
+
+/* Define the default attributes for the functions in this file. */
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512ifma")))
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_madd52hi_epu64 (__m512i __X, __m512i __Y, __m512i __Z)
+{
+  return (__m512i) __builtin_ia32_vpmadd52huq512_mask ((__v8di) __X,
+                   (__v8di) __Y,
+                   (__v8di) __Z,
+                   (__mmask8) -1);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_madd52hi_epu64 (__m512i __W, __mmask8 __M, __m512i __X,
+          __m512i __Y)
+{
+  return (__m512i) __builtin_ia32_vpmadd52huq512_mask ((__v8di) __W,
+                   (__v8di) __X,
+                   (__v8di) __Y,
+                   (__mmask8) __M);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_madd52hi_epu64 (__mmask8 __M, __m512i __X, __m512i __Y, __m512i __Z)
+{
+  return (__m512i) __builtin_ia32_vpmadd52huq512_maskz ((__v8di) __X,
+              (__v8di) __Y,
+              (__v8di) __Z,
+              (__mmask8) __M);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_madd52lo_epu64 (__m512i __X, __m512i __Y, __m512i __Z)
+{
+  return (__m512i) __builtin_ia32_vpmadd52luq512_mask ((__v8di) __X,
+                   (__v8di) __Y,
+                   (__v8di) __Z,
+                   (__mmask8) -1);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_madd52lo_epu64 (__m512i __W, __mmask8 __M, __m512i __X,
+          __m512i __Y)
+{
+  return (__m512i) __builtin_ia32_vpmadd52luq512_mask ((__v8di) __W,
+                   (__v8di) __X,
+                   (__v8di) __Y,
+                   (__mmask8) __M);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_madd52lo_epu64 (__mmask8 __M, __m512i __X, __m512i __Y, __m512i __Z)
+{
+  return (__m512i) __builtin_ia32_vpmadd52luq512_maskz ((__v8di) __X,
+              (__v8di) __Y,
+              (__v8di) __Z,
+              (__mmask8) __M);
+}
+
+#undef __DEFAULT_FN_ATTRS
+
+#endif
diff --git a/contrib/llvm/tools/clang/lib/Headers/avx512ifmavlintrin.h b/contrib/llvm/tools/clang/lib/Headers/avx512ifmavlintrin.h
new file mode 100644
index 000000000000..131ee5cb4f88
--- /dev/null
+++ b/contrib/llvm/tools/clang/lib/Headers/avx512ifmavlintrin.h
@@ -0,0 +1,149 @@
+/*===------------- avx512ifmavlintrin.h - IFMA intrinsics ------------------===
+ *
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ *
+ *===-----------------------------------------------------------------------===
+ */
+#ifndef __IMMINTRIN_H
+#error "Never use <avx512ifmavlintrin.h> directly; include <immintrin.h> instead."
+#endif
+
+#ifndef __IFMAVLINTRIN_H
+#define __IFMAVLINTRIN_H
+
+/* Define the default attributes for the functions in this file. */
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512ifma,avx512vl")))
+
+
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_madd52hi_epu64 (__m128i __X, __m128i __Y, __m128i __Z)
+{
+  return (__m128i) __builtin_ia32_vpmadd52huq128_mask ((__v2di) __X,
+                   (__v2di) __Y,
+                   (__v2di) __Z,
+                   (__mmask8) -1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_madd52hi_epu64 (__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y)
+{
+  return (__m128i) __builtin_ia32_vpmadd52huq128_mask ((__v2di) __W,
+                   (__v2di) __X,
+                   (__v2di) __Y,
+                   (__mmask8) __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_madd52hi_epu64 (__mmask8 __M, __m128i __X, __m128i __Y, __m128i __Z)
+{
+  return (__m128i) __builtin_ia32_vpmadd52huq128_maskz ((__v2di) __X,
+              (__v2di) __Y,
+              (__v2di) __Z,
+              (__mmask8) __M);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_madd52hi_epu64 (__m256i __X, __m256i __Y, __m256i __Z)
+{
+  return (__m256i) __builtin_ia32_vpmadd52huq256_mask ((__v4di) __X,
+                   (__v4di) __Y,
+                   (__v4di) __Z,
+                   (__mmask8) -1);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_madd52hi_epu64 (__m256i __W, __mmask8 __M, __m256i __X,
+          __m256i __Y)
+{
+  return (__m256i) __builtin_ia32_vpmadd52huq256_mask ((__v4di) __W,
+                   (__v4di) __X,
+                   (__v4di) __Y,
+                   (__mmask8) __M);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_madd52hi_epu64 (__mmask8 __M, __m256i __X, __m256i __Y, __m256i __Z)
+{
+  return (__m256i) __builtin_ia32_vpmadd52huq256_maskz ((__v4di) __X,
+              (__v4di) __Y,
+              (__v4di) __Z,
+              (__mmask8) __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_madd52lo_epu64 (__m128i __X, __m128i __Y, __m128i __Z)
+{
+  return (__m128i) __builtin_ia32_vpmadd52luq128_mask ((__v2di) __X,
+                   (__v2di) __Y,
+                   (__v2di) __Z,
+                   (__mmask8) -1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_madd52lo_epu64 (__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y)
+{
+  return (__m128i) __builtin_ia32_vpmadd52luq128_mask ((__v2di) __W,
+                   (__v2di) __X,
+                   (__v2di) __Y,
+                   (__mmask8) __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_madd52lo_epu64 (__mmask8 __M, __m128i __X, __m128i __Y, __m128i __Z)
+{
+  return (__m128i) __builtin_ia32_vpmadd52luq128_maskz ((__v2di) __X,
+              (__v2di) __Y,
+              (__v2di) __Z,
+              (__mmask8) __M);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_madd52lo_epu64 (__m256i __X, __m256i __Y, __m256i __Z)
+{
+  return (__m256i) __builtin_ia32_vpmadd52luq256_mask ((__v4di) __X,
+                   (__v4di) __Y,
+                   (__v4di) __Z,
+                   (__mmask8) -1);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_madd52lo_epu64 (__m256i __W, __mmask8 __M, __m256i __X,
+          __m256i __Y)
+{
+  return (__m256i) __builtin_ia32_vpmadd52luq256_mask ((__v4di) __W,
+                   (__v4di) __X,
+                   (__v4di) __Y,
+                   (__mmask8) __M);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_madd52lo_epu64 (__mmask8 __M, __m256i __X, __m256i __Y, __m256i __Z)
+{
+  return (__m256i) __builtin_ia32_vpmadd52luq256_maskz ((__v4di) __X,
+              (__v4di) __Y,
+              (__v4di) __Z,
+              (__mmask8) __M);
+}
+
+
+#undef __DEFAULT_FN_ATTRS
+
+#endif
diff --git a/contrib/llvm/tools/clang/lib/Headers/avx512pfintrin.h b/contrib/llvm/tools/clang/lib/Headers/avx512pfintrin.h
new file mode 100644
index 000000000000..c7fa3cf313e3
--- /dev/null
+++ b/contrib/llvm/tools/clang/lib/Headers/avx512pfintrin.h
@@ -0,0 +1,111 @@
+/*===------------- avx512pfintrin.h - PF intrinsics ------------------===
+ *
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ *
+ *===-----------------------------------------------------------------------===
+ */
+#ifndef __IMMINTRIN_H
+#error "Never use <avx512pfintrin.h> directly; include <immintrin.h> instead."
+#endif
+
+#ifndef __AVX512PFINTRIN_H
+#define __AVX512PFINTRIN_H
+
+/* Define the default attributes for the functions in this file. */
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512pf")))
+
+#define _mm512_mask_prefetch_i32gather_pd(index, mask, addr, scale, hint) __extension__ ({\
+  __builtin_ia32_gatherpfdpd((__mmask8)(mask), (__v8si)(__m256i)(index), \
+                             (long long const *)(addr), (int)(scale), \
+                             (int)(hint)); })
+              
+#define _mm512_prefetch_i32gather_pd(index, addr, scale, hint) __extension__ ({\
+  __builtin_ia32_gatherpfdpd((__mmask8) -1, (__v8si)(__m256i)(index), \
+                             (long long const *)(addr), (int)(scale), \
+                             (int)(hint)); })
+
+#define _mm512_mask_prefetch_i32gather_ps(index, mask, addr, scale, hint) ({\
+  __builtin_ia32_gatherpfdps((__mmask16)(mask), \
+                             (__v16si)(__m512i)(index), (int const *)(addr), \
+                             (int)(scale), (int)(hint)); })
+
+#define _mm512_prefetch_i32gather_ps(index, addr, scale, hint) ({\
+  __builtin_ia32_gatherpfdps((__mmask16) -1, \
+                             (__v16si)(__m512i)(index), (int const *)(addr), \
+                             (int)(scale), (int)(hint)); })
+
+#define _mm512_mask_prefetch_i64gather_pd(index, mask, addr, scale, hint) __extension__ ({\
+  __builtin_ia32_gatherpfqpd((__mmask8)(mask), (__v8di)(__m512i)(index), \
+                             (long long const *)(addr), (int)(scale), \
+                             (int)(hint)); })
+
+#define _mm512_prefetch_i64gather_pd(index, addr, scale, hint) __extension__ ({\
+  __builtin_ia32_gatherpfqpd((__mmask8) -1, (__v8di)(__m512i)(index), \
+                             (long long const *)(addr), (int)(scale), \
+                             (int)(hint)); })
+              
+#define _mm512_mask_prefetch_i64gather_ps(index, mask, addr, scale, hint) ({\
+  __builtin_ia32_gatherpfqps((__mmask8)(mask), (__v8di)(__m512i)(index), \
+                             (int const *)(addr), (int)(scale), (int)(hint)); })
+
+#define _mm512_prefetch_i64gather_ps(index, addr, scale, hint) ({\
+  __builtin_ia32_gatherpfqps((__mmask8) -1, (__v8di)(__m512i)(index), \
+                             (int const *)(addr), (int)(scale), (int)(hint)); })
+
+#define _mm512_prefetch_i32scatter_pd(addr, index, scale, hint) __extension__ ({\
+  __builtin_ia32_scatterpfdpd((__mmask8)-1, (__v8si)(__m256i)(index), \
+                              (long long *)(addr), (int)(scale), \
+                              (int)(hint)); })
+
+#define _mm512_mask_prefetch_i32scatter_pd(addr, mask, index, scale, hint) __extension__ ({\
+  __builtin_ia32_scatterpfdpd((__mmask8)(mask), (__v8si)(__m256i)(index), \
+                              (long long *)(addr), (int)(scale), \
+                              (int)(hint)); })
+
+#define _mm512_prefetch_i32scatter_ps(addr, index, scale, hint) __extension__ ({\
+  __builtin_ia32_scatterpfdps((__mmask16)-1, (__v16si)(__m512i)(index), \
+                              (int *)(addr), (int)(scale), (int)(hint)); })
+
+#define _mm512_mask_prefetch_i32scatter_ps(addr, mask, index, scale, hint) __extension__ ({\
+  __builtin_ia32_scatterpfdps((__mmask16)(mask), \
+                              (__v16si)(__m512i)(index), (int *)(addr), \
+                              (int)(scale), (int)(hint)); })
+
+#define _mm512_prefetch_i64scatter_pd(addr, index, scale, hint) __extension__ ({\
+  __builtin_ia32_scatterpfqpd((__mmask8)-1, (__v8di)(__m512i)(index), \
+                              (long long *)(addr), (int)(scale), \
+                              (int)(hint)); })
+
+#define _mm512_mask_prefetch_i64scatter_pd(addr, mask, index, scale, hint) __extension__ ({\
+  __builtin_ia32_scatterpfqpd((__mmask8)(mask), (__v8di)(__m512i)(index), \
+                              (long long *)(addr), (int)(scale), \
+                              (int)(hint)); })
+
+#define _mm512_prefetch_i64scatter_ps(addr, index, scale, hint) __extension__ ({\
+  __builtin_ia32_scatterpfqps((__mmask8)-1, (__v8di)(__m512i)(index), \
+                              (int *)(addr), (int)(scale), (int)(hint)); })
+
+#define _mm512_mask_prefetch_i64scatter_ps(addr, mask, index, scale, hint) __extension__ ({\
+  __builtin_ia32_scatterpfqps((__mmask8)(mask), (__v8di)(__m512i)(index), \
+                              (int *)(addr), (int)(scale), (int)(hint)); })
+
+#undef __DEFAULT_FN_ATTRS
+
+#endif
diff --git a/contrib/llvm/tools/clang/lib/Headers/avx512vbmiintrin.h b/contrib/llvm/tools/clang/lib/Headers/avx512vbmiintrin.h
new file mode 100644
index 000000000000..837238eda97f
--- /dev/null
+++ b/contrib/llvm/tools/clang/lib/Headers/avx512vbmiintrin.h
@@ -0,0 +1,137 @@
+/*===------------- avx512vbmiintrin.h - VBMI intrinsics ------------------===
+ *
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ *
+ *===-----------------------------------------------------------------------===
+ */
+#ifndef __IMMINTRIN_H
+#error "Never use <avx512vbmiintrin.h> directly; include <immintrin.h> instead."
+#endif
+
+#ifndef __VBMIINTRIN_H
+#define __VBMIINTRIN_H
+
+/* Define the default attributes for the functions in this file. */
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vbmi")))
+
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask2_permutex2var_epi8 (__m512i __A, __m512i __I,
+         __mmask64 __U, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_vpermi2varqi512_mask ((__v64qi) __A,
+              (__v64qi) __I
+              /* idx */ ,
+              (__v64qi) __B,
+              (__mmask64) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_permutex2var_epi8 (__m512i __A, __m512i __I, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_vpermt2varqi512_mask ((__v64qi) __I
+              /* idx */ ,
+              (__v64qi) __A,
+              (__v64qi) __B,
+              (__mmask64) -1);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_permutex2var_epi8 (__m512i __A, __mmask64 __U,
+        __m512i __I, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_vpermt2varqi512_mask ((__v64qi) __I
+              /* idx */ ,
+              (__v64qi) __A,
+              (__v64qi) __B,
+              (__mmask64) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_permutex2var_epi8 (__mmask64 __U, __m512i __A,
+         __m512i __I, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_vpermt2varqi512_maskz ((__v64qi) __I
+               /* idx */ ,
+               (__v64qi) __A,
+               (__v64qi) __B,
+               (__mmask64) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_permutexvar_epi8 (__m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_permvarqi512_mask ((__v64qi) __B,
+                 (__v64qi) __A,
+                 (__v64qi) _mm512_undefined_epi32 (),
+                 (__mmask64) -1);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_permutexvar_epi8 (__mmask64 __M, __m512i __A,
+        __m512i __B)
+{
+  return (__m512i) __builtin_ia32_permvarqi512_mask ((__v64qi) __B,
+                 (__v64qi) __A,
+                 (__v64qi) _mm512_setzero_si512(),
+                 (__mmask64) __M);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_permutexvar_epi8 (__m512i __W, __mmask64 __M, __m512i __A,
+             __m512i __B)
+{
+  return (__m512i) __builtin_ia32_permvarqi512_mask ((__v64qi) __B,
+                 (__v64qi) __A,
+                 (__v64qi) __W,
+                 (__mmask64) __M);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_multishift_epi64_epi8 (__m512i __W, __mmask64 __M, __m512i __X, __m512i __Y)
+{
+  return (__m512i) __builtin_ia32_vpmultishiftqb512_mask ((__v64qi) __X,
+                (__v64qi) __Y,
+                (__v64qi) __W,
+                (__mmask64) __M);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_multishift_epi64_epi8 (__mmask64 __M, __m512i __X, __m512i __Y)
+{
+  return (__m512i) __builtin_ia32_vpmultishiftqb512_mask ((__v64qi) __X,
+                (__v64qi) __Y,
+                (__v64qi) _mm512_setzero_si512 (),
+                (__mmask64) __M);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_multishift_epi64_epi8 (__m512i __X, __m512i __Y)
+{
+  return (__m512i) __builtin_ia32_vpmultishiftqb512_mask ((__v64qi) __X,
+                (__v64qi) __Y,
+                (__v64qi) _mm512_undefined_epi32 (),
+                (__mmask64) -1);
+}
+
+
+#undef __DEFAULT_FN_ATTRS
+
+#endif
diff --git a/contrib/llvm/tools/clang/lib/Headers/avx512vbmivlintrin.h b/contrib/llvm/tools/clang/lib/Headers/avx512vbmivlintrin.h
new file mode 100644
index 000000000000..105c6d142fa6
--- /dev/null
+++ b/contrib/llvm/tools/clang/lib/Headers/avx512vbmivlintrin.h
@@ -0,0 +1,247 @@
+/*===------------- avx512vbmivlintrin.h - VBMI intrinsics ------------------===
+ *
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ *
+ *===-----------------------------------------------------------------------===
+ */
+#ifndef __IMMINTRIN_H
+#error "Never use <avx512vbmivlintrin.h> directly; include <immintrin.h> instead."
+#endif
+
+#ifndef __VBMIVLINTRIN_H
+#define __VBMIVLINTRIN_H
+
+/* Define the default attributes for the functions in this file. */
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vbmi,avx512vl")))
+
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask2_permutex2var_epi8 (__m128i __A, __m128i __I, __mmask16 __U,
+            __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpermi2varqi128_mask ((__v16qi) __A,
+              (__v16qi) __I
+              /* idx */ ,
+              (__v16qi) __B,
+              (__mmask16)
+              __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask2_permutex2var_epi8 (__m256i __A, __m256i __I,
+         __mmask32 __U, __m256i __B)
+{
+  return (__m256i) __builtin_ia32_vpermi2varqi256_mask ((__v32qi) __A,
+              (__v32qi) __I
+              /* idx */ ,
+              (__v32qi) __B,
+              (__mmask32)
+              __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_permutex2var_epi8 (__m128i __A, __m128i __I, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpermt2varqi128_mask ((__v16qi) __I
+              /* idx */ ,
+              (__v16qi) __A,
+              (__v16qi) __B,
+              (__mmask16) -
+              1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_permutex2var_epi8 (__m128i __A, __mmask16 __U, __m128i __I,
+           __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpermt2varqi128_mask ((__v16qi) __I
+              /* idx */ ,
+              (__v16qi) __A,
+              (__v16qi) __B,
+              (__mmask16)
+              __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_permutex2var_epi8 (__mmask16 __U, __m128i __A, __m128i __I,
+            __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vpermt2varqi128_maskz ((__v16qi) __I
+               /* idx */ ,
+               (__v16qi) __A,
+               (__v16qi) __B,
+               (__mmask16)
+               __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_permutex2var_epi8 (__m256i __A, __m256i __I, __m256i __B)
+{
+  return (__m256i) __builtin_ia32_vpermt2varqi256_mask ((__v32qi) __I
+              /* idx */ ,
+              (__v32qi) __A,
+              (__v32qi) __B,
+              (__mmask32) -
+              1);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_permutex2var_epi8 (__m256i __A, __mmask32 __U,
+        __m256i __I, __m256i __B)
+{
+  return (__m256i) __builtin_ia32_vpermt2varqi256_mask ((__v32qi) __I
+              /* idx */ ,
+              (__v32qi) __A,
+              (__v32qi) __B,
+              (__mmask32)
+              __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_permutex2var_epi8 (__mmask32 __U, __m256i __A,
+         __m256i __I, __m256i __B)
+{
+  return (__m256i) __builtin_ia32_vpermt2varqi256_maskz ((__v32qi) __I
+               /* idx */ ,
+               (__v32qi) __A,
+               (__v32qi) __B,
+               (__mmask32)
+               __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_permutexvar_epi8 (__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_permvarqi128_mask ((__v16qi) __B,
+                 (__v16qi) __A,
+                 (__v16qi) _mm_undefined_si128 (),
+                 (__mmask16) -1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_permutexvar_epi8 (__mmask16 __M, __m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_permvarqi128_mask ((__v16qi) __B,
+                 (__v16qi) __A,
+                 (__v16qi) _mm_setzero_si128 (),
+                 (__mmask16) __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_permutexvar_epi8 (__m128i __W, __mmask16 __M, __m128i __A,
+          __m128i __B)
+{
+  return (__m128i) __builtin_ia32_permvarqi128_mask ((__v16qi) __B,
+                 (__v16qi) __A,
+                 (__v16qi) __W,
+                 (__mmask16) __M);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_permutexvar_epi8 (__m256i __A, __m256i __B)
+{
+  return (__m256i) __builtin_ia32_permvarqi256_mask ((__v32qi) __B,
+                 (__v32qi) __A,
+                 (__v32qi) _mm256_undefined_si256 (),
+                 (__mmask32) -1);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_permutexvar_epi8 (__mmask32 __M, __m256i __A,
+        __m256i __B)
+{
+  return (__m256i) __builtin_ia32_permvarqi256_mask ((__v32qi) __B,
+                 (__v32qi) __A,
+                 (__v32qi) _mm256_setzero_si256 (),
+                 (__mmask32) __M);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_permutexvar_epi8 (__m256i __W, __mmask32 __M, __m256i __A,
+             __m256i __B)
+{
+  return (__m256i) __builtin_ia32_permvarqi256_mask ((__v32qi) __B,
+                 (__v32qi) __A,
+                 (__v32qi) __W,
+                 (__mmask32) __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_multishift_epi64_epi8 (__m128i __W, __mmask16 __M, __m128i __X, __m128i __Y)
+{
+  return (__m128i) __builtin_ia32_vpmultishiftqb128_mask ((__v16qi) __X,
+                (__v16qi) __Y,
+                (__v16qi) __W,
+                (__mmask16) __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_multishift_epi64_epi8 (__mmask16 __M, __m128i __X, __m128i __Y)
+{
+  return (__m128i) __builtin_ia32_vpmultishiftqb128_mask ((__v16qi) __X,
+                (__v16qi) __Y,
+                (__v16qi)
+                _mm_setzero_si128 (),
+                (__mmask16) __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_multishift_epi64_epi8 (__m128i __X, __m128i __Y)
+{
+  return (__m128i) __builtin_ia32_vpmultishiftqb128_mask ((__v16qi) __X,
+                (__v16qi) __Y,
+                (__v16qi)
+                _mm_undefined_si128 (),
+                (__mmask16) -1);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_multishift_epi64_epi8 (__m256i __W, __mmask32 __M, __m256i __X, __m256i __Y)
+{
+  return (__m256i) __builtin_ia32_vpmultishiftqb256_mask ((__v32qi) __X,
+                (__v32qi) __Y,
+                (__v32qi) __W,
+                (__mmask32) __M);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_multishift_epi64_epi8 (__mmask32 __M, __m256i __X, __m256i __Y)
+{
+  return (__m256i) __builtin_ia32_vpmultishiftqb256_mask ((__v32qi) __X,
+                (__v32qi) __Y,
+                (__v32qi)
+                _mm256_setzero_si256 (),
+                (__mmask32) __M);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_multishift_epi64_epi8 (__m256i __X, __m256i __Y)
+{
+  return (__m256i) __builtin_ia32_vpmultishiftqb256_mask ((__v32qi) __X,
+                (__v32qi) __Y,
+                (__v32qi)
+                _mm256_undefined_si256 (),
+                (__mmask32) -1);
+}
+
+
+#undef __DEFAULT_FN_ATTRS
+
+#endif
diff --git a/contrib/llvm/tools/clang/lib/Headers/avx512vlbwintrin.h b/contrib/llvm/tools/clang/lib/Headers/avx512vlbwintrin.h
index b4542d69ab08..990e992a113f 100644
--- a/contrib/llvm/tools/clang/lib/Headers/avx512vlbwintrin.h
+++ b/contrib/llvm/tools/clang/lib/Headers/avx512vlbwintrin.h
@@ -31,6 +31,11 @@
 /* Define the default attributes for the functions in this file. */
 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512bw")))
 
+static  __inline __m128i __DEFAULT_FN_ATTRS
+_mm_setzero_hi(void){
+    return (__m128i)(__v8hi){ 0, 0, 0, 0, 0, 0, 0, 0 };
+}
+
 /* Integer compare */
 
 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
@@ -781,33 +786,33 @@ _mm_maskz_mullo_epi16 (__mmask8 __U, __m128i __A, __m128i __B) {
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_mask_blend_epi8 (__mmask16 __U, __m128i __A, __m128i __W)
 {
-  return (__m128i) __builtin_ia32_blendmb_128_mask ((__v16qi) __A,
-               (__v16qi) __W,
-               (__mmask16) __U);
+  return (__m128i) __builtin_ia32_selectb_128 ((__mmask16) __U,
+              (__v16qi) __W,
+              (__v16qi) __A);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_mask_blend_epi8 (__mmask32 __U, __m256i __A, __m256i __W)
 {
-  return (__m256i) __builtin_ia32_blendmb_256_mask ((__v32qi) __A,
+  return (__m256i) __builtin_ia32_selectb_256 ((__mmask32) __U,
                (__v32qi) __W,
-               (__mmask32) __U);
+               (__v32qi) __A);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_mask_blend_epi16 (__mmask8 __U, __m128i __A, __m128i __W)
 {
-  return (__m128i) __builtin_ia32_blendmw_128_mask ((__v8hi) __A,
+  return (__m128i) __builtin_ia32_selectw_128 ((__mmask8) __U,
                (__v8hi) __W,
-               (__mmask8) __U);
+               (__v8hi) __A);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_mask_blend_epi16 (__mmask16 __U, __m256i __A, __m256i __W)
 {
-  return (__m256i) __builtin_ia32_blendmw_256_mask ((__v16hi) __A,
+  return (__m256i) __builtin_ia32_selectw_256 ((__mmask16) __U,
                (__v16hi) __W,
-               (__mmask16) __U);
+               (__v16hi) __A);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS
@@ -1994,6 +1999,25 @@ _mm_maskz_cvtepi16_epi8 (__mmask8 __M, __m128i __A) {
                __M);
 }
 
+static __inline__ void __DEFAULT_FN_ATTRS
+_mm_mask_cvtepi16_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
+{
+  __builtin_ia32_pmovwb128mem_mask ((__v16qi *) __P, (__v8hi) __A, __M);
+}
+
+
+static __inline__ void __DEFAULT_FN_ATTRS
+_mm_mask_cvtsepi16_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
+{
+  __builtin_ia32_pmovswb128mem_mask ((__v16qi *) __P, (__v8hi) __A, __M);
+}
+
+static __inline__ void __DEFAULT_FN_ATTRS
+_mm_mask_cvtusepi16_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
+{
+  __builtin_ia32_pmovuswb128mem_mask ((__v16qi *) __P, (__v8hi) __A, __M);
+}
+
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm256_cvtepi16_epi8 (__m256i __A) {
   return (__m128i) __builtin_ia32_pmovwb256_mask ((__v16hi) __A,
@@ -2015,6 +2039,23 @@ _mm256_maskz_cvtepi16_epi8 (__mmask16 __M, __m256i __A) {
                __M);
 }
 
+static __inline__ void __DEFAULT_FN_ATTRS
+_mm256_mask_cvtepi16_storeu_epi8 (void * __P, __mmask16 __M, __m256i __A)
+{
+  __builtin_ia32_pmovwb256mem_mask ((__v16qi *) __P, (__v16hi) __A, __M);
+}
+
+static __inline__ void __DEFAULT_FN_ATTRS
+_mm256_mask_cvtsepi16_storeu_epi8 (void * __P, __mmask16 __M, __m256i __A)
+{
+  __builtin_ia32_pmovswb256mem_mask ((__v16qi *) __P, (__v16hi) __A, __M);
+}
+
+static __inline__ void __DEFAULT_FN_ATTRS
+_mm256_mask_cvtusepi16_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
+{
+  __builtin_ia32_pmovuswb256mem_mask ((__v16qi*) __P, (__v16hi) __A, __M);
+}
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_mask_mulhrs_epi16 (__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) {
   return (__m128i) __builtin_ia32_pmulhrsw128_mask ((__v8hi) __X,
@@ -2116,220 +2157,1249 @@ _mm256_maskz_mulhi_epi16 (__mmask16 __U, __m256i __A, __m256i __B) {
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS
-_mm_mask_unpackhi_epi8 (__m128i __W, __mmask16 __U, __m128i __A,
-      __m128i __B) {
-  return (__m128i) __builtin_ia32_punpckhbw128_mask ((__v16qi) __A,
-               (__v16qi) __B,
-               (__v16qi) __W,
-               (__mmask16) __U);
+_mm_mask_unpackhi_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) {
+  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
+                                           (__v16qi)_mm_unpackhi_epi8(__A, __B),
+                                           (__v16qi)__W);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS
-_mm_maskz_unpackhi_epi8 (__mmask16 __U, __m128i __A, __m128i __B) {
-  return (__m128i) __builtin_ia32_punpckhbw128_mask ((__v16qi) __A,
-               (__v16qi) __B,
-               (__v16qi) _mm_setzero_si128(),
-               (__mmask16) __U);
+_mm_maskz_unpackhi_epi8(__mmask16 __U, __m128i __A, __m128i __B) {
+  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
+                                           (__v16qi)_mm_unpackhi_epi8(__A, __B),
+                                           (__v16qi)_mm_setzero_si128());
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
-_mm256_mask_unpackhi_epi8 (__m256i __W, __mmask32 __U, __m256i __A,
-         __m256i __B) {
-  return (__m256i) __builtin_ia32_punpckhbw256_mask ((__v32qi) __A,
-               (__v32qi) __B,
-               (__v32qi) __W,
-               (__mmask32) __U);
+_mm256_mask_unpackhi_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) {
+  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
+                                        (__v32qi)_mm256_unpackhi_epi8(__A, __B),
+                                        (__v32qi)__W);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
-_mm256_maskz_unpackhi_epi8 (__mmask32 __U, __m256i __A, __m256i __B) {
-  return (__m256i) __builtin_ia32_punpckhbw256_mask ((__v32qi) __A,
-               (__v32qi) __B,
-               (__v32qi) _mm256_setzero_si256(),
-               (__mmask32) __U);
+_mm256_maskz_unpackhi_epi8(__mmask32 __U, __m256i __A, __m256i __B) {
+  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
+                                        (__v32qi)_mm256_unpackhi_epi8(__A, __B),
+                                        (__v32qi)_mm256_setzero_si256());
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS
-_mm_mask_unpackhi_epi16 (__m128i __W, __mmask8 __U, __m128i __A,
-       __m128i __B) {
-  return (__m128i) __builtin_ia32_punpckhwd128_mask ((__v8hi) __A,
-               (__v8hi) __B,
-               (__v8hi) __W,
-               (__mmask8) __U);
+_mm_mask_unpackhi_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
+  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
+                                           (__v8hi)_mm_unpackhi_epi16(__A, __B),
+                                           (__v8hi)__W);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS
-_mm_maskz_unpackhi_epi16 (__mmask8 __U, __m128i __A, __m128i __B) {
-  return (__m128i) __builtin_ia32_punpckhwd128_mask ((__v8hi) __A,
-               (__v8hi) __B,
-               (__v8hi) _mm_setzero_si128(),
-               (__mmask8) __U);
+_mm_maskz_unpackhi_epi16(__mmask8 __U, __m128i __A, __m128i __B) {
+  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
+                                           (__v8hi)_mm_unpackhi_epi16(__A, __B),
+                                           (__v8hi) _mm_setzero_si128());
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
-_mm256_mask_unpackhi_epi16 (__m256i __W, __mmask16 __U, __m256i __A,
-          __m256i __B) {
-  return (__m256i) __builtin_ia32_punpckhwd256_mask ((__v16hi) __A,
-               (__v16hi) __B,
-               (__v16hi) __W,
-               (__mmask16) __U);
+_mm256_mask_unpackhi_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) {
+  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
+                                       (__v16hi)_mm256_unpackhi_epi16(__A, __B),
+                                       (__v16hi)__W);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
-_mm256_maskz_unpackhi_epi16 (__mmask16 __U, __m256i __A, __m256i __B) {
-  return (__m256i) __builtin_ia32_punpckhwd256_mask ((__v16hi) __A,
-               (__v16hi) __B,
-               (__v16hi) _mm256_setzero_si256(),
-               (__mmask16) __U);
+_mm256_maskz_unpackhi_epi16(__mmask16 __U, __m256i __A, __m256i __B) {
+  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
+                                       (__v16hi)_mm256_unpackhi_epi16(__A, __B),
+                                       (__v16hi)_mm256_setzero_si256());
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS
-_mm_mask_unpacklo_epi8 (__m128i __W, __mmask16 __U, __m128i __A,
-      __m128i __B) {
-  return (__m128i) __builtin_ia32_punpcklbw128_mask ((__v16qi) __A,
-               (__v16qi) __B,
-               (__v16qi) __W,
-               (__mmask16) __U);
+_mm_mask_unpacklo_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) {
+  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
+                                           (__v16qi)_mm_unpacklo_epi8(__A, __B),
+                                           (__v16qi)__W);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS
-_mm_maskz_unpacklo_epi8 (__mmask16 __U, __m128i __A, __m128i __B) {
-  return (__m128i) __builtin_ia32_punpcklbw128_mask ((__v16qi) __A,
-               (__v16qi) __B,
-               (__v16qi) _mm_setzero_si128(),
-               (__mmask16) __U);
+_mm_maskz_unpacklo_epi8(__mmask16 __U, __m128i __A, __m128i __B) {
+  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
+                                           (__v16qi)_mm_unpacklo_epi8(__A, __B),
+                                           (__v16qi)_mm_setzero_si128());
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
-_mm256_mask_unpacklo_epi8 (__m256i __W, __mmask32 __U, __m256i __A,
-         __m256i __B) {
-  return (__m256i) __builtin_ia32_punpcklbw256_mask ((__v32qi) __A,
-               (__v32qi) __B,
-               (__v32qi) __W,
-               (__mmask32) __U);
+_mm256_mask_unpacklo_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) {
+  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
+                                        (__v32qi)_mm256_unpacklo_epi8(__A, __B),
+                                        (__v32qi)__W);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
-_mm256_maskz_unpacklo_epi8 (__mmask32 __U, __m256i __A, __m256i __B) {
-  return (__m256i) __builtin_ia32_punpcklbw256_mask ((__v32qi) __A,
-               (__v32qi) __B,
-               (__v32qi) _mm256_setzero_si256(),
-               (__mmask32) __U);
+_mm256_maskz_unpacklo_epi8(__mmask32 __U, __m256i __A, __m256i __B) {
+  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
+                                        (__v32qi)_mm256_unpacklo_epi8(__A, __B),
+                                        (__v32qi)_mm256_setzero_si256());
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS
-_mm_mask_unpacklo_epi16 (__m128i __W, __mmask8 __U, __m128i __A,
-       __m128i __B) {
-  return (__m128i) __builtin_ia32_punpcklwd128_mask ((__v8hi) __A,
-               (__v8hi) __B,
-               (__v8hi) __W,
-               (__mmask8) __U);
+_mm_mask_unpacklo_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
+  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
+                                           (__v8hi)_mm_unpacklo_epi16(__A, __B),
+                                           (__v8hi)__W);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS
-_mm_maskz_unpacklo_epi16 (__mmask8 __U, __m128i __A, __m128i __B) {
-  return (__m128i) __builtin_ia32_punpcklwd128_mask ((__v8hi) __A,
-               (__v8hi) __B,
-               (__v8hi) _mm_setzero_si128(),
-               (__mmask8) __U);
+_mm_maskz_unpacklo_epi16(__mmask8 __U, __m128i __A, __m128i __B) {
+  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
+                                           (__v8hi)_mm_unpacklo_epi16(__A, __B),
+                                           (__v8hi) _mm_setzero_si128());
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
-_mm256_mask_unpacklo_epi16 (__m256i __W, __mmask16 __U, __m256i __A,
-          __m256i __B) {
-  return (__m256i) __builtin_ia32_punpcklwd256_mask ((__v16hi) __A,
-               (__v16hi) __B,
-               (__v16hi) __W,
-               (__mmask16) __U);
+_mm256_mask_unpacklo_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) {
+  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
+                                       (__v16hi)_mm256_unpacklo_epi16(__A, __B),
+                                       (__v16hi)__W);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
-_mm256_maskz_unpacklo_epi16 (__mmask16 __U, __m256i __A, __m256i __B) {
-  return (__m256i) __builtin_ia32_punpcklwd256_mask ((__v16hi) __A,
-               (__v16hi) __B,
-               (__v16hi) _mm256_setzero_si256(),
-               (__mmask16) __U);
+_mm256_maskz_unpacklo_epi16(__mmask16 __U, __m256i __A, __m256i __B) {
+  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
+                                       (__v16hi)_mm256_unpacklo_epi16(__A, __B),
+                                       (__v16hi)_mm256_setzero_si256());
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_cvtepi8_epi16 (__m128i __W, __mmask32 __U, __m128i __A)
+{
+  return (__m128i) __builtin_ia32_pmovsxbw128_mask ((__v16qi) __A,
+                (__v8hi) __W,
+                (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_cvtepi8_epi16 (__mmask8 __U, __m128i __A)
+{
+  return (__m128i) __builtin_ia32_pmovsxbw128_mask ((__v16qi) __A,
+                (__v8hi)
+                _mm_setzero_si128 (),
+                (__mmask8) __U);
 }
 
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_cvtepi8_epi16 (__m256i __W, __mmask32 __U, __m128i __A)
+{
+  return (__m256i) __builtin_ia32_pmovsxbw256_mask ((__v16qi) __A,
+                (__v16hi) __W,
+                (__mmask16) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_cvtepi8_epi16 (__mmask16 __U, __m128i __A)
+{
+  return (__m256i) __builtin_ia32_pmovsxbw256_mask ((__v16qi) __A,
+                (__v16hi)
+                _mm256_setzero_si256 (),
+                (__mmask16) __U);
+}
+
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_cvtepu8_epi16 (__m128i __W, __mmask32 __U, __m128i __A)
+{
+  return (__m128i) __builtin_ia32_pmovzxbw128_mask ((__v16qi) __A,
+                (__v8hi) __W,
+                (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_cvtepu8_epi16 (__mmask8 __U, __m128i __A)
+{
+  return (__m128i) __builtin_ia32_pmovzxbw128_mask ((__v16qi) __A,
+                (__v8hi)
+                _mm_setzero_si128 (),
+                (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_cvtepu8_epi16 (__m256i __W, __mmask32 __U, __m128i __A)
+{
+  return (__m256i) __builtin_ia32_pmovzxbw256_mask ((__v16qi) __A,
+                (__v16hi) __W,
+                (__mmask16) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_cvtepu8_epi16 (__mmask16 __U, __m128i __A)
+{
+  return (__m256i) __builtin_ia32_pmovzxbw256_mask ((__v16qi) __A,
+                (__v16hi)
+                _mm256_setzero_si256 (),
+                (__mmask16) __U);
+}
+
+
 #define _mm_cmp_epi8_mask(a, b, p) __extension__ ({ \
   (__mmask16)__builtin_ia32_cmpb128_mask((__v16qi)(__m128i)(a), \
-                                         (__v16qi)(__m128i)(b), \
-                                         (p), (__mmask16)-1); })
+                                         (__v16qi)(__m128i)(b), (int)(p), \
+                                         (__mmask16)-1); })
 
 #define _mm_mask_cmp_epi8_mask(m, a, b, p) __extension__ ({ \
   (__mmask16)__builtin_ia32_cmpb128_mask((__v16qi)(__m128i)(a), \
-                                         (__v16qi)(__m128i)(b), \
-                                         (p), (__mmask16)(m)); })
+                                         (__v16qi)(__m128i)(b), (int)(p), \
+                                         (__mmask16)(m)); })
 
 #define _mm_cmp_epu8_mask(a, b, p) __extension__ ({ \
   (__mmask16)__builtin_ia32_ucmpb128_mask((__v16qi)(__m128i)(a), \
-                                          (__v16qi)(__m128i)(b), \
-                                          (p), (__mmask16)-1); })
+                                          (__v16qi)(__m128i)(b), (int)(p), \
+                                          (__mmask16)-1); })
 
 #define _mm_mask_cmp_epu8_mask(m, a, b, p) __extension__ ({ \
   (__mmask16)__builtin_ia32_ucmpb128_mask((__v16qi)(__m128i)(a), \
-                                          (__v16qi)(__m128i)(b), \
-                                          (p), (__mmask16)(m)); })
+                                          (__v16qi)(__m128i)(b), (int)(p), \
+                                          (__mmask16)(m)); })
 
 #define _mm256_cmp_epi8_mask(a, b, p) __extension__ ({ \
   (__mmask32)__builtin_ia32_cmpb256_mask((__v32qi)(__m256i)(a), \
-                                         (__v32qi)(__m256i)(b), \
-                                         (p), (__mmask32)-1); })
+                                         (__v32qi)(__m256i)(b), (int)(p), \
+                                         (__mmask32)-1); })
 
 #define _mm256_mask_cmp_epi8_mask(m, a, b, p) __extension__ ({ \
   (__mmask32)__builtin_ia32_cmpb256_mask((__v32qi)(__m256i)(a), \
-                                         (__v32qi)(__m256i)(b), \
-                                         (p), (__mmask32)(m)); })
+                                         (__v32qi)(__m256i)(b), (int)(p), \
+                                         (__mmask32)(m)); })
 
 #define _mm256_cmp_epu8_mask(a, b, p) __extension__ ({ \
   (__mmask32)__builtin_ia32_ucmpb256_mask((__v32qi)(__m256i)(a), \
-                                          (__v32qi)(__m256i)(b), \
-                                          (p), (__mmask32)-1); })
+                                          (__v32qi)(__m256i)(b), (int)(p), \
+                                          (__mmask32)-1); })
 
 #define _mm256_mask_cmp_epu8_mask(m, a, b, p) __extension__ ({ \
   (__mmask32)__builtin_ia32_ucmpb256_mask((__v32qi)(__m256i)(a), \
-                                          (__v32qi)(__m256i)(b), \
-                                          (p), (__mmask32)(m)); })
+                                          (__v32qi)(__m256i)(b), (int)(p), \
+                                          (__mmask32)(m)); })
 
 #define _mm_cmp_epi16_mask(a, b, p) __extension__ ({ \
   (__mmask8)__builtin_ia32_cmpw128_mask((__v8hi)(__m128i)(a), \
-                                        (__v8hi)(__m128i)(b), \
-                                        (p), (__mmask8)-1); })
+                                        (__v8hi)(__m128i)(b), (int)(p), \
+                                        (__mmask8)-1); })
 
 #define _mm_mask_cmp_epi16_mask(m, a, b, p) __extension__ ({ \
   (__mmask8)__builtin_ia32_cmpw128_mask((__v8hi)(__m128i)(a), \
-                                        (__v8hi)(__m128i)(b), \
-                                        (p), (__mmask8)(m)); })
+                                        (__v8hi)(__m128i)(b), (int)(p), \
+                                        (__mmask8)(m)); })
 
 #define _mm_cmp_epu16_mask(a, b, p) __extension__ ({ \
   (__mmask8)__builtin_ia32_ucmpw128_mask((__v8hi)(__m128i)(a), \
-                                         (__v8hi)(__m128i)(b), \
-                                         (p), (__mmask8)-1); })
+                                         (__v8hi)(__m128i)(b), (int)(p), \
+                                         (__mmask8)-1); })
 
 #define _mm_mask_cmp_epu16_mask(m, a, b, p) __extension__ ({ \
   (__mmask8)__builtin_ia32_ucmpw128_mask((__v8hi)(__m128i)(a), \
-                                         (__v8hi)(__m128i)(b), \
-                                         (p), (__mmask8)(m)); })
+                                         (__v8hi)(__m128i)(b), (int)(p), \
+                                         (__mmask8)(m)); })
 
 #define _mm256_cmp_epi16_mask(a, b, p) __extension__ ({ \
   (__mmask16)__builtin_ia32_cmpw256_mask((__v16hi)(__m256i)(a), \
-                                         (__v16hi)(__m256i)(b), \
-                                         (p), (__mmask16)-1); })
+                                         (__v16hi)(__m256i)(b), (int)(p), \
+                                         (__mmask16)-1); })
 
 #define _mm256_mask_cmp_epi16_mask(m, a, b, p) __extension__ ({ \
   (__mmask16)__builtin_ia32_cmpw256_mask((__v16hi)(__m256i)(a), \
-                                         (__v16hi)(__m256i)(b), \
-                                         (p), (__mmask16)(m)); })
+                                         (__v16hi)(__m256i)(b), (int)(p), \
+                                         (__mmask16)(m)); })
 
 #define _mm256_cmp_epu16_mask(a, b, p) __extension__ ({ \
   (__mmask16)__builtin_ia32_ucmpw256_mask((__v16hi)(__m256i)(a), \
-                                          (__v16hi)(__m256i)(b), \
-                                          (p), (__mmask16)-1); })
+                                          (__v16hi)(__m256i)(b), (int)(p), \
+                                          (__mmask16)-1); })
 
 #define _mm256_mask_cmp_epu16_mask(m, a, b, p) __extension__ ({ \
   (__mmask16)__builtin_ia32_ucmpw256_mask((__v16hi)(__m256i)(a), \
-                                          (__v16hi)(__m256i)(b), \
-                                          (p), (__mmask16)(m)); })
+                                          (__v16hi)(__m256i)(b), (int)(p), \
+                                          (__mmask16)(m)); })
+
+#define _mm_mask_shufflehi_epi16(W, U, A, imm) __extension__ ({ \
+  (__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \
+                                      (__v8hi)_mm_shufflehi_epi16((A), (imm)), \
+                                      (__v8hi)(__m128i)(W)); })
+
+#define _mm_maskz_shufflehi_epi16(U, A, imm) __extension__ ({ \
+  (__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \
+                                      (__v8hi)_mm_shufflehi_epi16((A), (imm)), \
+                                      (__v8hi)_mm_setzero_hi()); })
+
+#define _mm256_mask_shufflehi_epi16(W, U, A, imm) __extension__ ({ \
+  (__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
+                                      (__v16hi)_mm256_shufflehi_epi16((A), (imm)), \
+                                      (__v16hi)(__m256i)(W)); })
+
+#define _mm256_maskz_shufflehi_epi16(U, A, imm) __extension__ ({ \
+  (__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
+                                      (__v16hi)_mm256_shufflehi_epi16((A), (imm)), \
+                                      (__v16hi)_mm256_setzero_si256()); })
+
+#define _mm_mask_shufflelo_epi16(W, U, A, imm) __extension__ ({ \
+  (__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \
+                                      (__v8hi)_mm_shufflelo_epi16((A), (imm)), \
+                                      (__v8hi)(__m128i)(W)); })
+
+#define _mm_maskz_shufflelo_epi16(U, A, imm) __extension__ ({ \
+  (__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \
+                                      (__v8hi)_mm_shufflelo_epi16((A), (imm)), \
+                                      (__v8hi)_mm_setzero_hi()); })
+
+#define _mm256_mask_shufflelo_epi16(W, U, A, imm) __extension__ ({ \
+  (__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
+                                      (__v16hi)_mm256_shufflelo_epi16((A), \
+                                                                      (imm)), \
+                                      (__v16hi)(__m256i)(W)); })
+
+#define _mm256_maskz_shufflelo_epi16(U, A, imm) __extension__ ({ \
+  (__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
+                                      (__v16hi)_mm256_shufflelo_epi16((A), \
+                                                                      (imm)), \
+                                      (__v16hi)_mm256_setzero_si256()); })
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_sllv_epi16 (__m256i __A, __m256i __B)
+{
+  return (__m256i) __builtin_ia32_psllv16hi_mask ((__v16hi) __A,
+              (__v16hi) __B,
+              (__v16hi)
+              _mm256_setzero_si256 (),
+              (__mmask16) -1);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_sllv_epi16 (__m256i __W, __mmask16 __U, __m256i __A,
+      __m256i __B)
+{
+  return (__m256i) __builtin_ia32_psllv16hi_mask ((__v16hi) __A,
+              (__v16hi) __B,
+              (__v16hi) __W,
+              (__mmask16) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_sllv_epi16 (__mmask16 __U, __m256i __A, __m256i __B)
+{
+  return (__m256i) __builtin_ia32_psllv16hi_mask ((__v16hi) __A,
+              (__v16hi) __B,
+              (__v16hi)
+              _mm256_setzero_si256 (),
+              (__mmask16) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_sllv_epi16 (__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_psllv8hi_mask ((__v8hi) __A,
+             (__v8hi) __B,
+             (__v8hi)
+             _mm_setzero_hi (),
+             (__mmask8) -1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_sllv_epi16 (__m128i __W, __mmask8 __U, __m128i __A,
+         __m128i __B)
+{
+  return (__m128i) __builtin_ia32_psllv8hi_mask ((__v8hi) __A,
+             (__v8hi) __B,
+             (__v8hi) __W,
+             (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_sllv_epi16 (__mmask8 __U, __m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_psllv8hi_mask ((__v8hi) __A,
+             (__v8hi) __B,
+             (__v8hi)
+             _mm_setzero_si128 (),
+             (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_sll_epi16 (__m128i __W, __mmask8 __U, __m128i __A,
+        __m128i __B)
+{
+  return (__m128i) __builtin_ia32_psllw128_mask ((__v8hi) __A,
+             (__v8hi) __B,
+             (__v8hi) __W,
+             (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_sll_epi16 (__mmask8 __U, __m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_psllw128_mask ((__v8hi) __A,
+             (__v8hi) __B,
+             (__v8hi)
+             _mm_setzero_si128 (),
+             (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_sll_epi16 (__m256i __W, __mmask16 __U, __m256i __A,
+           __m128i __B)
+{
+  return (__m256i) __builtin_ia32_psllw256_mask ((__v16hi) __A,
+             (__v8hi) __B,
+             (__v16hi) __W,
+             (__mmask16) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_sll_epi16 (__mmask16 __U, __m256i __A, __m128i __B)
+{
+  return (__m256i) __builtin_ia32_psllw256_mask ((__v16hi) __A,
+             (__v8hi) __B,
+             (__v16hi)
+             _mm256_setzero_si256 (),
+             (__mmask16) __U);
+}
+
+#define _mm_mask_slli_epi16(W, U, A, B) __extension__ ({ \
+  (__m128i)__builtin_ia32_psllwi128_mask((__v8hi)(__m128i)(A), (int)(B), \
+                                         (__v8hi)(__m128i)(W), \
+                                         (__mmask8)(U)); })
+
+#define _mm_maskz_slli_epi16(U, A, B) __extension__ ({ \
+  (__m128i)__builtin_ia32_psllwi128_mask((__v8hi)(__m128i)(A), (int)(B), \
+                                         (__v8hi)_mm_setzero_si128(), \
+                                         (__mmask8)(U)); })
+
+#define _mm256_mask_slli_epi16(W, U, A, B) __extension__ ({ \
+  (__m256i)__builtin_ia32_psllwi256_mask((__v16hi)(__m256i)(A), (int)(B), \
+                                         (__v16hi)(__m256i)(W), \
+                                         (__mmask16)(U)); })
+
+#define _mm256_maskz_slli_epi16(U, A, B) __extension__ ({ \
+  (__m256i)__builtin_ia32_psllwi256_mask((__v16hi)(__m256i)(A), (int)(B), \
+                                         (__v16hi)_mm256_setzero_si256(), \
+                                         (__mmask16)(U)); })
+
+
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_srlv_epi16 (__m256i __A, __m256i __B)
+{
+  return (__m256i) __builtin_ia32_psrlv16hi_mask ((__v16hi) __A,
+              (__v16hi) __B,
+              (__v16hi)
+              _mm256_setzero_si256 (),
+              (__mmask16) -1);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_srlv_epi16 (__m256i __W, __mmask16 __U, __m256i __A,
+      __m256i __B)
+{
+  return (__m256i) __builtin_ia32_psrlv16hi_mask ((__v16hi) __A,
+              (__v16hi) __B,
+              (__v16hi) __W,
+              (__mmask16) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_srlv_epi16 (__mmask16 __U, __m256i __A, __m256i __B)
+{
+  return (__m256i) __builtin_ia32_psrlv16hi_mask ((__v16hi) __A,
+              (__v16hi) __B,
+              (__v16hi)
+              _mm256_setzero_si256 (),
+              (__mmask16) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_srlv_epi16 (__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_psrlv8hi_mask ((__v8hi) __A,
+             (__v8hi) __B,
+             (__v8hi)
+             _mm_setzero_hi (),
+             (__mmask8) -1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_srlv_epi16 (__m128i __W, __mmask8 __U, __m128i __A,
+         __m128i __B)
+{
+  return (__m128i) __builtin_ia32_psrlv8hi_mask ((__v8hi) __A,
+             (__v8hi) __B,
+             (__v8hi) __W,
+             (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_srlv_epi16 (__mmask8 __U, __m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_psrlv8hi_mask ((__v8hi) __A,
+             (__v8hi) __B,
+             (__v8hi)
+             _mm_setzero_si128 (),
+             (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_srav_epi16 (__m256i __A, __m256i __B)
+{
+  return (__m256i) __builtin_ia32_psrav16hi_mask ((__v16hi) __A,
+              (__v16hi) __B,
+              (__v16hi)
+              _mm256_setzero_si256 (),
+              (__mmask16) -1);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_srav_epi16 (__m256i __W, __mmask16 __U, __m256i __A,
+      __m256i __B)
+{
+  return (__m256i) __builtin_ia32_psrav16hi_mask ((__v16hi) __A,
+              (__v16hi) __B,
+              (__v16hi) __W,
+              (__mmask16) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_srav_epi16 (__mmask16 __U, __m256i __A, __m256i __B)
+{
+  return (__m256i) __builtin_ia32_psrav16hi_mask ((__v16hi) __A,
+              (__v16hi) __B,
+              (__v16hi)
+              _mm256_setzero_si256 (),
+              (__mmask16) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_srav_epi16 (__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_psrav8hi_mask ((__v8hi) __A,
+             (__v8hi) __B,
+             (__v8hi)
+             _mm_setzero_hi (),
+             (__mmask8) -1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_srav_epi16 (__m128i __W, __mmask8 __U, __m128i __A,
+         __m128i __B)
+{
+  return (__m128i) __builtin_ia32_psrav8hi_mask ((__v8hi) __A,
+             (__v8hi) __B,
+             (__v8hi) __W,
+             (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_srav_epi16 (__mmask8 __U, __m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_psrav8hi_mask ((__v8hi) __A,
+             (__v8hi) __B,
+             (__v8hi)
+             _mm_setzero_si128 (),
+             (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_sra_epi16 (__m128i __W, __mmask8 __U, __m128i __A,
+        __m128i __B)
+{
+  return (__m128i) __builtin_ia32_psraw128_mask ((__v8hi) __A,
+             (__v8hi) __B,
+             (__v8hi) __W,
+             (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_sra_epi16 (__mmask8 __U, __m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_psraw128_mask ((__v8hi) __A,
+             (__v8hi) __B,
+             (__v8hi)
+             _mm_setzero_si128 (),
+             (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_sra_epi16 (__m256i __W, __mmask16 __U, __m256i __A,
+           __m128i __B)
+{
+  return (__m256i) __builtin_ia32_psraw256_mask ((__v16hi) __A,
+             (__v8hi) __B,
+             (__v16hi) __W,
+             (__mmask16) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_sra_epi16 (__mmask16 __U, __m256i __A, __m128i __B)
+{
+  return (__m256i) __builtin_ia32_psraw256_mask ((__v16hi) __A,
+             (__v8hi) __B,
+             (__v16hi)
+             _mm256_setzero_si256 (),
+             (__mmask16) __U);
+}
+
+#define _mm_mask_srai_epi16(W, U, A, imm) __extension__ ({ \
+  (__m128i)__builtin_ia32_psrawi128_mask((__v8hi)(__m128i)(A), (int)(imm), \
+                                         (__v8hi)(__m128i)(W), \
+                                         (__mmask8)(U)); })
+
+#define _mm_maskz_srai_epi16(U, A, imm) __extension__ ({ \
+  (__m128i)__builtin_ia32_psrawi128_mask((__v8hi)(__m128i)(A), (int)(imm), \
+                                         (__v8hi)_mm_setzero_si128(), \
+                                         (__mmask8)(U)); })
+
+#define _mm256_mask_srai_epi16(W, U, A, imm) __extension__ ({ \
+  (__m256i)__builtin_ia32_psrawi256_mask((__v16hi)(__m256i)(A), (int)(imm), \
+                                         (__v16hi)(__m256i)(W), \
+                                         (__mmask16)(U)); })
+
+#define _mm256_maskz_srai_epi16(U, A, imm) __extension__ ({ \
+  (__m256i)__builtin_ia32_psrawi256_mask((__v16hi)(__m256i)(A), (int)(imm), \
+                                         (__v16hi)_mm256_setzero_si256(), \
+                                         (__mmask16)(U)); })
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_srl_epi16 (__m128i __W, __mmask8 __U, __m128i __A,
+        __m128i __B)
+{
+  return (__m128i) __builtin_ia32_psrlw128_mask ((__v8hi) __A,
+             (__v8hi) __B,
+             (__v8hi) __W,
+             (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_srl_epi16 (__mmask8 __U, __m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_psrlw128_mask ((__v8hi) __A,
+             (__v8hi) __B,
+             (__v8hi)
+             _mm_setzero_si128 (),
+             (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_srl_epi16 (__m256i __W, __mmask16 __U, __m256i __A,
+           __m128i __B)
+{
+  return (__m256i) __builtin_ia32_psrlw256_mask ((__v16hi) __A,
+             (__v8hi) __B,
+             (__v16hi) __W,
+             (__mmask16) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_srl_epi16 (__mmask16 __U, __m256i __A, __m128i __B)
+{
+  return (__m256i) __builtin_ia32_psrlw256_mask ((__v16hi) __A,
+             (__v8hi) __B,
+             (__v16hi)
+             _mm256_setzero_si256 (),
+             (__mmask16) __U);
+}
+
+#define _mm_mask_srli_epi16(W, U, A, imm) __extension__ ({ \
+  (__m128i)__builtin_ia32_psrlwi128_mask((__v8hi)(__m128i)(A), (int)(imm), \
+                                         (__v8hi)(__m128i)(W), \
+                                         (__mmask8)(U)); })
+
+#define _mm_maskz_srli_epi16(U, A, imm) __extension__ ({ \
+  (__m128i)__builtin_ia32_psrlwi128_mask((__v8hi)(__m128i)(A), (int)(imm), \
+                                         (__v8hi)_mm_setzero_si128(), \
+                                         (__mmask8)(U)); })
+
+#define _mm256_mask_srli_epi16(W, U, A, imm) __extension__ ({ \
+  (__m256i)__builtin_ia32_psrlwi256_mask((__v16hi)(__m256i)(A), (int)(imm), \
+                                         (__v16hi)(__m256i)(W), \
+                                         (__mmask16)(U)); })
+
+#define _mm256_maskz_srli_epi16(U, A, imm) __extension__ ({ \
+  (__m256i)__builtin_ia32_psrlwi256_mask((__v16hi)(__m256i)(A), (int)(imm), \
+                                         (__v16hi)_mm256_setzero_si256(), \
+                                         (__mmask16)(U)); })
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_mov_epi16 (__m128i __W, __mmask8 __U, __m128i __A)
+{
+  return (__m128i) __builtin_ia32_selectw_128 ((__mmask8) __U,
+                (__v8hi) __A,
+                (__v8hi) __W);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_mov_epi16 (__mmask8 __U, __m128i __A)
+{
+  return (__m128i) __builtin_ia32_selectw_128 ((__mmask8) __U,
+                (__v8hi) __A,
+                (__v8hi) _mm_setzero_hi ());
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_mov_epi16 (__m256i __W, __mmask16 __U, __m256i __A)
+{
+  return (__m256i) __builtin_ia32_selectw_256 ((__mmask16) __U,
+                (__v16hi) __A,
+                (__v16hi) __W);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_mov_epi16 (__mmask16 __U, __m256i __A)
+{
+  return (__m256i) __builtin_ia32_selectw_256 ((__mmask16) __U,
+                (__v16hi) __A,
+                (__v16hi) _mm256_setzero_si256 ());
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_mov_epi8 (__m128i __W, __mmask16 __U, __m128i __A)
+{
+  return (__m128i) __builtin_ia32_selectb_128 ((__mmask16) __U,
+                (__v16qi) __A,
+                (__v16qi) __W);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_mov_epi8 (__mmask16 __U, __m128i __A)
+{
+  return (__m128i) __builtin_ia32_selectb_128 ((__mmask16) __U,
+                (__v16qi) __A,
+                (__v16qi) _mm_setzero_hi ());
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_mov_epi8 (__m256i __W, __mmask32 __U, __m256i __A)
+{
+  return (__m256i) __builtin_ia32_selectb_256 ((__mmask32) __U,
+                (__v32qi) __A,
+                (__v32qi) __W);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_mov_epi8 (__mmask32 __U, __m256i __A)
+{
+  return (__m256i) __builtin_ia32_selectb_256 ((__mmask32) __U,
+                (__v32qi) __A,
+                (__v32qi) _mm256_setzero_si256 ());
+}
+
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_set1_epi8 (__m128i __O, __mmask16 __M, char __A)
+{
+  return (__m128i) __builtin_ia32_pbroadcastb128_gpr_mask (__A,
+                 (__v16qi) __O,
+                 __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_set1_epi8 (__mmask16 __M, char __A)
+{
+  return (__m128i) __builtin_ia32_pbroadcastb128_gpr_mask (__A,
+                 (__v16qi)
+                 _mm_setzero_si128 (),
+                 __M);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_set1_epi8 (__m256i __O, __mmask32 __M, char __A)
+{
+  return (__m256i) __builtin_ia32_pbroadcastb256_gpr_mask (__A,
+                 (__v32qi) __O,
+                 __M);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_set1_epi8 (__mmask32 __M, char __A)
+{
+  return (__m256i) __builtin_ia32_pbroadcastb256_gpr_mask (__A,
+                 (__v32qi)
+                 _mm256_setzero_si256 (),
+                 __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_loadu_epi16 (__m128i __W, __mmask8 __U, void const *__P)
+{
+  return (__m128i) __builtin_ia32_loaddquhi128_mask ((__v8hi *) __P,
+                 (__v8hi) __W,
+                 (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_loadu_epi16 (__mmask8 __U, void const *__P)
+{
+  return (__m128i) __builtin_ia32_loaddquhi128_mask ((__v8hi *) __P,
+                 (__v8hi)
+                 _mm_setzero_hi (),
+                 (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_loadu_epi16 (__m256i __W, __mmask16 __U, void const *__P)
+{
+  return (__m256i) __builtin_ia32_loaddquhi256_mask ((__v16hi *) __P,
+                 (__v16hi) __W,
+                 (__mmask16) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_loadu_epi16 (__mmask16 __U, void const *__P)
+{
+  return (__m256i) __builtin_ia32_loaddquhi256_mask ((__v16hi *) __P,
+                 (__v16hi)
+                 _mm256_setzero_si256 (),
+                 (__mmask16) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_loadu_epi8 (__m128i __W, __mmask16 __U, void const *__P)
+{
+  return (__m128i) __builtin_ia32_loaddquqi128_mask ((__v16qi *) __P,
+                 (__v16qi) __W,
+                 (__mmask16) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_loadu_epi8 (__mmask16 __U, void const *__P)
+{
+  return (__m128i) __builtin_ia32_loaddquqi128_mask ((__v16qi *) __P,
+                 (__v16qi)
+                 _mm_setzero_si128 (),
+                 (__mmask16) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_loadu_epi8 (__m256i __W, __mmask32 __U, void const *__P)
+{
+  return (__m256i) __builtin_ia32_loaddquqi256_mask ((__v32qi *) __P,
+                 (__v32qi) __W,
+                 (__mmask32) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_loadu_epi8 (__mmask32 __U, void const *__P)
+{
+  return (__m256i) __builtin_ia32_loaddquqi256_mask ((__v32qi *) __P,
+                 (__v32qi)
+                 _mm256_setzero_si256 (),
+                 (__mmask32) __U);
+}
+static __inline__ void __DEFAULT_FN_ATTRS
+_mm_mask_storeu_epi16 (void *__P, __mmask8 __U, __m128i __A)
+{
+  __builtin_ia32_storedquhi128_mask ((__v8hi *) __P,
+             (__v8hi) __A,
+             (__mmask8) __U);
+}
+
+static __inline__ void __DEFAULT_FN_ATTRS
+_mm256_mask_storeu_epi16 (void *__P, __mmask16 __U, __m256i __A)
+{
+  __builtin_ia32_storedquhi256_mask ((__v16hi *) __P,
+             (__v16hi) __A,
+             (__mmask16) __U);
+}
+
+static __inline__ void __DEFAULT_FN_ATTRS
+_mm_mask_storeu_epi8 (void *__P, __mmask16 __U, __m128i __A)
+{
+  __builtin_ia32_storedquqi128_mask ((__v16qi *) __P,
+             (__v16qi) __A,
+             (__mmask16) __U);
+}
+
+static __inline__ void __DEFAULT_FN_ATTRS
+_mm256_mask_storeu_epi8 (void *__P, __mmask32 __U, __m256i __A)
+{
+  __builtin_ia32_storedquqi256_mask ((__v32qi *) __P,
+             (__v32qi) __A,
+             (__mmask32) __U);
+}
+
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS
+_mm_test_epi8_mask (__m128i __A, __m128i __B)
+{
+  return (__mmask16) __builtin_ia32_ptestmb128 ((__v16qi) __A,
+            (__v16qi) __B,
+            (__mmask16) -1);
+}
+
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS
+_mm_mask_test_epi8_mask (__mmask16 __U, __m128i __A, __m128i __B)
+{
+  return (__mmask16) __builtin_ia32_ptestmb128 ((__v16qi) __A,
+            (__v16qi) __B, __U);
+}
+
+static __inline__ __mmask32 __DEFAULT_FN_ATTRS
+_mm256_test_epi8_mask (__m256i __A, __m256i __B)
+{
+  return (__mmask32) __builtin_ia32_ptestmb256 ((__v32qi) __A,
+            (__v32qi) __B,
+            (__mmask32) -1);
+}
+
+static __inline__ __mmask32 __DEFAULT_FN_ATTRS
+_mm256_mask_test_epi8_mask (__mmask32 __U, __m256i __A, __m256i __B)
+{
+  return (__mmask32) __builtin_ia32_ptestmb256 ((__v32qi) __A,
+            (__v32qi) __B, __U);
+}
+
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
+_mm_test_epi16_mask (__m128i __A, __m128i __B)
+{
+  return (__mmask8) __builtin_ia32_ptestmw128 ((__v8hi) __A,
+                 (__v8hi) __B,
+                 (__mmask8) -1);
+}
+
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
+_mm_mask_test_epi16_mask (__mmask8 __U, __m128i __A, __m128i __B)
+{
+  return (__mmask8) __builtin_ia32_ptestmw128 ((__v8hi) __A,
+                 (__v8hi) __B, __U);
+}
+
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS
+_mm256_test_epi16_mask (__m256i __A, __m256i __B)
+{
+  return (__mmask16) __builtin_ia32_ptestmw256 ((__v16hi) __A,
+            (__v16hi) __B,
+            (__mmask16) -1);
+}
+
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS
+_mm256_mask_test_epi16_mask (__mmask16 __U, __m256i __A, __m256i __B)
+{
+  return (__mmask16) __builtin_ia32_ptestmw256 ((__v16hi) __A,
+            (__v16hi) __B, __U);
+}
+
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS
+_mm_testn_epi8_mask (__m128i __A, __m128i __B)
+{
+  return (__mmask16) __builtin_ia32_ptestnmb128 ((__v16qi) __A,
+             (__v16qi) __B,
+             (__mmask16) -1);
+}
+
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS
+_mm_mask_testn_epi8_mask (__mmask16 __U, __m128i __A, __m128i __B)
+{
+  return (__mmask16) __builtin_ia32_ptestnmb128 ((__v16qi) __A,
+             (__v16qi) __B, __U);
+}
+
+static __inline__ __mmask32 __DEFAULT_FN_ATTRS
+_mm256_testn_epi8_mask (__m256i __A, __m256i __B)
+{
+  return (__mmask32) __builtin_ia32_ptestnmb256 ((__v32qi) __A,
+             (__v32qi) __B,
+             (__mmask32) -1);
+}
+
+static __inline__ __mmask32 __DEFAULT_FN_ATTRS
+_mm256_mask_testn_epi8_mask (__mmask32 __U, __m256i __A, __m256i __B)
+{
+  return (__mmask32) __builtin_ia32_ptestnmb256 ((__v32qi) __A,
+             (__v32qi) __B, __U);
+}
+
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
+_mm_testn_epi16_mask (__m128i __A, __m128i __B)
+{
+  return (__mmask8) __builtin_ia32_ptestnmw128 ((__v8hi) __A,
+            (__v8hi) __B,
+            (__mmask8) -1);
+}
+
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
+_mm_mask_testn_epi16_mask (__mmask8 __U, __m128i __A, __m128i __B)
+{
+  return (__mmask8) __builtin_ia32_ptestnmw128 ((__v8hi) __A,
+            (__v8hi) __B, __U);
+}
+
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS
+_mm256_testn_epi16_mask (__m256i __A, __m256i __B)
+{
+  return (__mmask16) __builtin_ia32_ptestnmw256 ((__v16hi) __A,
+             (__v16hi) __B,
+             (__mmask16) -1);
+}
+
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS
+_mm256_mask_testn_epi16_mask (__mmask16 __U, __m256i __A, __m256i __B)
+{
+  return (__mmask16) __builtin_ia32_ptestnmw256 ((__v16hi) __A,
+             (__v16hi) __B, __U);
+}
+
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS
+_mm_movepi8_mask (__m128i __A)
+{
+  return (__mmask16) __builtin_ia32_cvtb2mask128 ((__v16qi) __A);
+}
+
+static __inline__ __mmask32 __DEFAULT_FN_ATTRS
+_mm256_movepi8_mask (__m256i __A)
+{
+  return (__mmask32) __builtin_ia32_cvtb2mask256 ((__v32qi) __A);
+}
+
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
+_mm_movepi16_mask (__m128i __A)
+{
+  return (__mmask8) __builtin_ia32_cvtw2mask128 ((__v8hi) __A);
+}
+
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS
+_mm256_movepi16_mask (__m256i __A)
+{
+  return (__mmask16) __builtin_ia32_cvtw2mask256 ((__v16hi) __A);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_movm_epi8 (__mmask16 __A)
+{
+  return (__m128i) __builtin_ia32_cvtmask2b128 (__A);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_movm_epi8 (__mmask32 __A)
+{
+  return (__m256i) __builtin_ia32_cvtmask2b256 (__A);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_movm_epi16 (__mmask8 __A)
+{
+  return (__m128i) __builtin_ia32_cvtmask2w128 (__A);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_movm_epi16 (__mmask16 __A)
+{
+  return (__m256i) __builtin_ia32_cvtmask2w256 (__A);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_broadcastb_epi8 (__m128i __O, __mmask16 __M, __m128i __A)
+{
+  return (__m128i)__builtin_ia32_selectb_128(__M,
+                                             (__v16qi) _mm_broadcastb_epi8(__A),
+                                             (__v16qi) __O);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_broadcastb_epi8 (__mmask16 __M, __m128i __A)
+{
+  return (__m128i)__builtin_ia32_selectb_128(__M,
+                                             (__v16qi) _mm_broadcastb_epi8(__A),
+                                             (__v16qi) _mm_setzero_si128());
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_broadcastb_epi8 (__m256i __O, __mmask32 __M, __m128i __A)
+{
+  return (__m256i)__builtin_ia32_selectb_256(__M,
+                                             (__v32qi) _mm256_broadcastb_epi8(__A),
+                                             (__v32qi) __O);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_broadcastb_epi8 (__mmask32 __M, __m128i __A)
+{
+  return (__m256i)__builtin_ia32_selectb_256(__M,
+                                             (__v32qi) _mm256_broadcastb_epi8(__A),
+                                             (__v32qi) _mm256_setzero_si256());
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_broadcastw_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
+{
+  return (__m128i)__builtin_ia32_selectw_128(__M,
+                                             (__v8hi) _mm_broadcastw_epi16(__A),
+                                             (__v8hi) __O);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_broadcastw_epi16 (__mmask8 __M, __m128i __A)
+{
+  return (__m128i)__builtin_ia32_selectw_128(__M,
+                                             (__v8hi) _mm_broadcastw_epi16(__A),
+                                             (__v8hi) _mm_setzero_si128());
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_broadcastw_epi16 (__m256i __O, __mmask16 __M, __m128i __A)
+{
+  return (__m256i)__builtin_ia32_selectw_256(__M,
+                                             (__v16hi) _mm256_broadcastw_epi16(__A),
+                                             (__v16hi) __O);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_broadcastw_epi16 (__mmask16 __M, __m128i __A)
+{
+  return (__m256i)__builtin_ia32_selectw_256(__M,
+                                             (__v16hi) _mm256_broadcastw_epi16(__A),
+                                             (__v16hi) _mm256_setzero_si256());
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_set1_epi16 (__m256i __O, __mmask16 __M, short __A)
+{
+  return (__m256i) __builtin_ia32_pbroadcastw256_gpr_mask (__A,
+                 (__v16hi) __O,
+                 __M);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_set1_epi16 (__mmask16 __M, short __A)
+{
+  return (__m256i) __builtin_ia32_pbroadcastw256_gpr_mask (__A,
+                 (__v16hi) _mm256_setzero_si256 (),
+                 __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_set1_epi16 (__m128i __O, __mmask8 __M, short __A)
+{
+  return (__m128i) __builtin_ia32_pbroadcastw128_gpr_mask (__A,
+                 (__v8hi) __O,
+                 __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_set1_epi16 (__mmask8 __M, short __A)
+{
+  return (__m128i) __builtin_ia32_pbroadcastw128_gpr_mask (__A,
+                 (__v8hi) _mm_setzero_si128 (),
+                 __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_permutexvar_epi16 (__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_permvarhi128_mask ((__v8hi) __B,
+                 (__v8hi) __A,
+                 (__v8hi) _mm_undefined_si128 (),
+                 (__mmask8) -1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_permutexvar_epi16 (__mmask8 __M, __m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_permvarhi128_mask ((__v8hi) __B,
+                 (__v8hi) __A,
+                 (__v8hi) _mm_setzero_si128 (),
+                 (__mmask8) __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_permutexvar_epi16 (__m128i __W, __mmask8 __M, __m128i __A,
+          __m128i __B)
+{
+  return (__m128i) __builtin_ia32_permvarhi128_mask ((__v8hi) __B,
+                 (__v8hi) __A,
+                 (__v8hi) __W,
+                 (__mmask8) __M);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_permutexvar_epi16 (__m256i __A, __m256i __B)
+{
+  return (__m256i) __builtin_ia32_permvarhi256_mask ((__v16hi) __B,
+                 (__v16hi) __A,
+                 (__v16hi) _mm256_undefined_si256 (),
+                 (__mmask16) -1);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_permutexvar_epi16 (__mmask16 __M, __m256i __A,
+        __m256i __B)
+{
+  return (__m256i) __builtin_ia32_permvarhi256_mask ((__v16hi) __B,
+                 (__v16hi) __A,
+                 (__v16hi) _mm256_setzero_si256 (),
+                 (__mmask16) __M);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_permutexvar_epi16 (__m256i __W, __mmask16 __M, __m256i __A,
+             __m256i __B)
+{
+  return (__m256i) __builtin_ia32_permvarhi256_mask ((__v16hi) __B,
+                 (__v16hi) __A,
+                 (__v16hi) __W,
+                 (__mmask16) __M);
+}
+
+#define _mm_mask_alignr_epi8(W, U, A, B, N) __extension__ ({ \
+  (__m128i)__builtin_ia32_palignr128_mask((__v16qi)(__m128i)(A), \
+                                          (__v16qi)(__m128i)(B), (int)(N), \
+                                          (__v16qi)(__m128i)(W), \
+                                          (__mmask16)(U)); })
+
+#define _mm_maskz_alignr_epi8(U, A, B, N) __extension__ ({ \
+  (__m128i)__builtin_ia32_palignr128_mask((__v16qi)(__m128i)(A), \
+                                          (__v16qi)(__m128i)(B), (int)(N), \
+                                          (__v16qi)_mm_setzero_si128(), \
+                                          (__mmask16)(U)); })
+
+#define _mm256_mask_alignr_epi8(W, U, A, B, N) __extension__ ({ \
+  (__m256i)__builtin_ia32_palignr256_mask((__v32qi)(__m256i)(A), \
+                                          (__v32qi)(__m256i)(B), (int)(N), \
+                                          (__v32qi)(__m256i)(W), \
+                                          (__mmask32)(U)); })
+
+#define _mm256_maskz_alignr_epi8(U, A, B, N) __extension__ ({ \
+  (__m256i)__builtin_ia32_palignr256_mask((__v32qi)(__m256i)(A), \
+                                          (__v32qi)(__m256i)(B), (int)(N), \
+                                          (__v32qi)_mm256_setzero_si256(), \
+                                          (__mmask32)(U)); })
+
+#define _mm_dbsad_epu8(A, B, imm) __extension__ ({ \
+  (__m128i)__builtin_ia32_dbpsadbw128_mask((__v16qi)(__m128i)(A), \
+                                           (__v16qi)(__m128i)(B), (int)(imm), \
+                                           (__v8hi)_mm_setzero_hi(), \
+                                           (__mmask8)-1); })
+
+#define _mm_mask_dbsad_epu8(W, U, A, B, imm) __extension__ ({ \
+  (__m128i)__builtin_ia32_dbpsadbw128_mask((__v16qi)(__m128i)(A), \
+                                           (__v16qi)(__m128i)(B), (int)(imm), \
+                                           (__v8hi)(__m128i)(W), \
+                                           (__mmask8)(U)); })
+
+#define _mm_maskz_dbsad_epu8(U, A, B, imm) __extension__ ({ \
+  (__m128i)__builtin_ia32_dbpsadbw128_mask((__v16qi)(__m128i)(A), \
+                                           (__v16qi)(__m128i)(B), (int)(imm), \
+                                           (__v8hi)_mm_setzero_si128(), \
+                                           (__mmask8)(U)); })
+
+#define _mm256_dbsad_epu8(A, B, imm) __extension__ ({ \
+  (__m256i)__builtin_ia32_dbpsadbw256_mask((__v32qi)(__m256i)(A), \
+                                           (__v32qi)(__m256i)(B), (int)(imm), \
+                                           (__v16hi)_mm256_setzero_si256(), \
+                                           (__mmask16)-1); })
+
+#define _mm256_mask_dbsad_epu8(W, U, A, B, imm) __extension__ ({ \
+  (__m256i)__builtin_ia32_dbpsadbw256_mask((__v32qi)(__m256i)(A), \
+                                           (__v32qi)(__m256i)(B), (int)(imm), \
+                                           (__v16hi)(__m256i)(W), \
+                                           (__mmask16)(U)); })
+
+#define _mm256_maskz_dbsad_epu8(U, A, B, imm) __extension__ ({ \
+  (__m256i)__builtin_ia32_dbpsadbw256_mask((__v32qi)(__m256i)(A), \
+                                           (__v32qi)(__m256i)(B), (int)(imm), \
+                                           (__v16hi)_mm256_setzero_si256(), \
+                                           (__mmask16)(U)); })
 
 #undef __DEFAULT_FN_ATTRS
 
diff --git a/contrib/llvm/tools/clang/lib/Headers/avx512vlcdintrin.h b/contrib/llvm/tools/clang/lib/Headers/avx512vlcdintrin.h
new file mode 100644
index 000000000000..7b02e2e1f92a
--- /dev/null
+++ b/contrib/llvm/tools/clang/lib/Headers/avx512vlcdintrin.h
@@ -0,0 +1,263 @@
+/*===---- avx512vlcdintrin.h - AVX512VL and AVX512CD intrinsics ---------------------------===
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ *
+ *===-----------------------------------------------------------------------===
+ */
+#ifndef __IMMINTRIN_H
+#error "Never use <avx512vlcdintrin.h> directly; include <immintrin.h> instead."
+#endif
+
+#ifndef __AVX512VLCDINTRIN_H
+#define __AVX512VLCDINTRIN_H
+
+/* Define the default attributes for the functions in this file. */
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512cd")))
+
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_broadcastmb_epi64 (__mmask8 __A)
+{
+  return (__m128i) __builtin_ia32_broadcastmb128 (__A);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_broadcastmb_epi64 (__mmask8 __A)
+{
+  return (__m256i) __builtin_ia32_broadcastmb256 (__A);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_broadcastmw_epi32 (__mmask16 __A)
+{
+  return (__m128i) __builtin_ia32_broadcastmw128 (__A);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_broadcastmw_epi32 (__mmask16 __A)
+{
+  return (__m256i) __builtin_ia32_broadcastmw256 (__A);
+}
+
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_conflict_epi64 (__m128i __A)
+{
+  return (__m128i) __builtin_ia32_vpconflictdi_128_mask ((__v2di) __A,
+               (__v2di) _mm_undefined_si128 (),
+               (__mmask8) -1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_conflict_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
+{
+  return (__m128i) __builtin_ia32_vpconflictdi_128_mask ((__v2di) __A,
+               (__v2di) __W,
+               (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_conflict_epi64 (__mmask8 __U, __m128i __A)
+{
+  return (__m128i) __builtin_ia32_vpconflictdi_128_mask ((__v2di) __A,
+               (__v2di)
+               _mm_setzero_di (),
+               (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_conflict_epi64 (__m256i __A)
+{
+  return (__m256i) __builtin_ia32_vpconflictdi_256_mask ((__v4di) __A,
+               (__v4di)  _mm256_undefined_si256 (),
+               (__mmask8) -1);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_conflict_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
+{
+  return (__m256i) __builtin_ia32_vpconflictdi_256_mask ((__v4di) __A,
+               (__v4di) __W,
+               (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_conflict_epi64 (__mmask8 __U, __m256i __A)
+{
+  return (__m256i) __builtin_ia32_vpconflictdi_256_mask ((__v4di) __A,
+               (__v4di) _mm256_setzero_si256 (),
+               (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_conflict_epi32 (__m128i __A)
+{
+  return (__m128i) __builtin_ia32_vpconflictsi_128_mask ((__v4si) __A,
+               (__v4si) _mm_undefined_si128 (),
+               (__mmask8) -1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_conflict_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
+{
+  return (__m128i) __builtin_ia32_vpconflictsi_128_mask ((__v4si) __A,
+               (__v4si) __W,
+               (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_conflict_epi32 (__mmask8 __U, __m128i __A)
+{
+  return (__m128i) __builtin_ia32_vpconflictsi_128_mask ((__v4si) __A,
+               (__v4si) _mm_setzero_si128 (),
+               (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_conflict_epi32 (__m256i __A)
+{
+  return (__m256i) __builtin_ia32_vpconflictsi_256_mask ((__v8si) __A,
+               (__v8si) _mm256_undefined_si256 (),
+               (__mmask8) -1);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_conflict_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
+{
+  return (__m256i) __builtin_ia32_vpconflictsi_256_mask ((__v8si) __A,
+               (__v8si) __W,
+               (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_conflict_epi32 (__mmask8 __U, __m256i __A)
+{
+  return (__m256i) __builtin_ia32_vpconflictsi_256_mask ((__v8si) __A,
+               (__v8si)
+               _mm256_setzero_si256 (),
+               (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_lzcnt_epi32 (__m128i __A)
+{
+  return (__m128i) __builtin_ia32_vplzcntd_128_mask ((__v4si) __A,
+                 (__v4si)
+                 _mm_setzero_si128 (),
+                 (__mmask8) -1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_lzcnt_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
+{
+  return (__m128i) __builtin_ia32_vplzcntd_128_mask ((__v4si) __A,
+                 (__v4si) __W,
+                 (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_lzcnt_epi32 (__mmask8 __U, __m128i __A)
+{
+  return (__m128i) __builtin_ia32_vplzcntd_128_mask ((__v4si) __A,
+                 (__v4si)
+                 _mm_setzero_si128 (),
+                 (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_lzcnt_epi32 (__m256i __A)
+{
+  return (__m256i) __builtin_ia32_vplzcntd_256_mask ((__v8si) __A,
+                 (__v8si)
+                 _mm256_setzero_si256 (),
+                 (__mmask8) -1);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_lzcnt_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
+{
+  return (__m256i) __builtin_ia32_vplzcntd_256_mask ((__v8si) __A,
+                 (__v8si) __W,
+                 (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_lzcnt_epi32 (__mmask8 __U, __m256i __A)
+{
+  return (__m256i) __builtin_ia32_vplzcntd_256_mask ((__v8si) __A,
+                 (__v8si)
+                 _mm256_setzero_si256 (),
+                 (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_lzcnt_epi64 (__m128i __A)
+{
+  return (__m128i) __builtin_ia32_vplzcntq_128_mask ((__v2di) __A,
+                 (__v2di)
+                 _mm_setzero_di (),
+                 (__mmask8) -1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_lzcnt_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
+{
+  return (__m128i) __builtin_ia32_vplzcntq_128_mask ((__v2di) __A,
+                 (__v2di) __W,
+                 (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_lzcnt_epi64 (__mmask8 __U, __m128i __A)
+{
+  return (__m128i) __builtin_ia32_vplzcntq_128_mask ((__v2di) __A,
+                 (__v2di)
+                 _mm_setzero_di (),
+                 (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_lzcnt_epi64 (__m256i __A)
+{
+  return (__m256i) __builtin_ia32_vplzcntq_256_mask ((__v4di) __A,
+                 (__v4di)
+                 _mm256_setzero_si256 (),
+                 (__mmask8) -1);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_lzcnt_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
+{
+  return (__m256i) __builtin_ia32_vplzcntq_256_mask ((__v4di) __A,
+                 (__v4di) __W,
+                 (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_lzcnt_epi64 (__mmask8 __U, __m256i __A)
+{
+  return (__m256i) __builtin_ia32_vplzcntq_256_mask ((__v4di) __A,
+                 (__v4di)
+                 _mm256_setzero_si256 (),
+                 (__mmask8) __U);
+}
+
+#undef __DEFAULT_FN_ATTRS
+
+#endif /* __AVX512VLCDINTRIN_H */
diff --git a/contrib/llvm/tools/clang/lib/Headers/avx512vldqintrin.h b/contrib/llvm/tools/clang/lib/Headers/avx512vldqintrin.h
index dfd858e013da..8187bcd6b28e 100644
--- a/contrib/llvm/tools/clang/lib/Headers/avx512vldqintrin.h
+++ b/contrib/llvm/tools/clang/lib/Headers/avx512vldqintrin.h
@@ -33,7 +33,7 @@
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_mullo_epi64 (__m256i __A, __m256i __B) {
-  return (__m256i) ((__v4di) __A * (__v4di) __B);
+  return (__m256i) ((__v4du) __A * (__v4du) __B);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
@@ -55,7 +55,7 @@ _mm256_maskz_mullo_epi64 (__mmask8 __U, __m256i __A, __m256i __B) {
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_mullo_epi64 (__m128i __A, __m128i __B) {
-  return (__m128i) ((__v2di) __A * (__v2di) __B);
+  return (__m128i) ((__v2du) __A * (__v2du) __B);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS
@@ -852,101 +852,413 @@ _mm256_maskz_cvtepu64_ps (__mmask8 __U, __m256i __A) {
                 (__mmask8) __U);
 }
 
-#define _mm_range_pd(__A, __B, __C) __extension__ ({                         \
-  (__m128d) __builtin_ia32_rangepd128_mask ((__v2df) __A, (__v2df) __B, __C, \
-                (__v2df) _mm_setzero_pd(), (__mmask8) -1); })
-
-#define _mm_mask_range_pd(__W, __U, __A, __B, __C) __extension__ ({          \
-  (__m128d) __builtin_ia32_rangepd128_mask ((__v2df) __A, (__v2df) __B, __C, \
-                (__v2df) __W, (__mmask8) __U); })
-
-#define _mm_maskz_range_pd(__U, __A, __B, __C) __extension__ ({              \
-  (__m128d) __builtin_ia32_rangepd128_mask ((__v2df) __A, (__v2df) __B, __C, \
-                (__v2df) _mm_setzero_pd(), (__mmask8) __U); })
+#define _mm_range_pd(A, B, C) __extension__ ({                         \
+  (__m128d)__builtin_ia32_rangepd128_mask((__v2df)(__m128d)(A), \
+                                          (__v2df)(__m128d)(B), (int)(C), \
+                                          (__v2df)_mm_setzero_pd(), \
+                                          (__mmask8)-1); })
+
+#define _mm_mask_range_pd(W, U, A, B, C) __extension__ ({          \
+  (__m128d)__builtin_ia32_rangepd128_mask((__v2df)(__m128d)(A), \
+                                          (__v2df)(__m128d)(B), (int)(C), \
+                                          (__v2df)(__m128d)(W), \
+                                          (__mmask8)(U)); })
+
+#define _mm_maskz_range_pd(U, A, B, C) __extension__ ({              \
+  (__m128d)__builtin_ia32_rangepd128_mask((__v2df)(__m128d)(A), \
+                                          (__v2df)(__m128d)(B), (int)(C), \
+                                          (__v2df)_mm_setzero_pd(), \
+                                          (__mmask8)(U)); })
+
+#define _mm256_range_pd(A, B, C) __extension__ ({                      \
+  (__m256d)__builtin_ia32_rangepd256_mask((__v4df)(__m256d)(A), \
+                                          (__v4df)(__m256d)(B), (int)(C), \
+                                          (__v4df)_mm256_setzero_pd(), \
+                                          (__mmask8)-1); })
+
+#define _mm256_mask_range_pd(W, U, A, B, C) __extension__ ({       \
+  (__m256d)__builtin_ia32_rangepd256_mask((__v4df)(__m256d)(A), \
+                                          (__v4df)(__m256d)(B), (int)(C), \
+                                          (__v4df)(__m256d)(W), \
+                                          (__mmask8)(U)); })
+
+#define _mm256_maskz_range_pd(U, A, B, C) __extension__ ({           \
+  (__m256d)__builtin_ia32_rangepd256_mask((__v4df)(__m256d)(A), \
+                                          (__v4df)(__m256d)(B), (int)(C), \
+                                          (__v4df)_mm256_setzero_pd(), \
+                                          (__mmask8)(U)); })
+
+#define _mm_range_ps(A, B, C) __extension__ ({                         \
+  (__m128)__builtin_ia32_rangeps128_mask((__v4sf)(__m128)(A), \
+                                         (__v4sf)(__m128)(B), (int)(C), \
+                                         (__v4sf)_mm_setzero_ps(), \
+                                         (__mmask8)-1); })
+
+#define _mm_mask_range_ps(W, U, A, B, C) __extension__ ({          \
+  (__m128)__builtin_ia32_rangeps128_mask((__v4sf)(__m128)(A), \
+                                         (__v4sf)(__m128)(B), (int)(C), \
+                                         (__v4sf)(__m128)(W), (__mmask8)(U)); })
+
+#define _mm_maskz_range_ps(U, A, B, C) __extension__ ({              \
+  (__m128)__builtin_ia32_rangeps128_mask((__v4sf)(__m128)(A), \
+                                         (__v4sf)(__m128)(B), (int)(C), \
+                                         (__v4sf)_mm_setzero_ps(), \
+                                         (__mmask8)(U)); })
+
+#define _mm256_range_ps(A, B, C) __extension__ ({                      \
+  (__m256)__builtin_ia32_rangeps256_mask((__v8sf)(__m256)(A), \
+                                         (__v8sf)(__m256)(B), (int)(C), \
+                                         (__v8sf)_mm256_setzero_ps(), \
+                                         (__mmask8)-1); })
+
+#define _mm256_mask_range_ps(W, U, A, B, C) __extension__ ({       \
+  (__m256)__builtin_ia32_rangeps256_mask((__v8sf)(__m256)(A), \
+                                         (__v8sf)(__m256)(B), (int)(C), \
+                                         (__v8sf)(__m256)(W), (__mmask8)(U)); })
+
+#define _mm256_maskz_range_ps(U, A, B, C) __extension__ ({           \
+  (__m256)__builtin_ia32_rangeps256_mask((__v8sf)(__m256)(A), \
+                                         (__v8sf)(__m256)(B), (int)(C), \
+                                         (__v8sf)_mm256_setzero_ps(), \
+                                         (__mmask8)(U)); })
+
+#define _mm_reduce_pd(A, B) __extension__ ({                \
+  (__m128d)__builtin_ia32_reducepd128_mask((__v2df)(__m128d)(A), (int)(B), \
+                                           (__v2df)_mm_setzero_pd(), \
+                                           (__mmask8)-1); })
+
+#define _mm_mask_reduce_pd(W, U, A, B) __extension__ ({ \
+  (__m128d)__builtin_ia32_reducepd128_mask((__v2df)(__m128d)(A), (int)(B), \
+                                           (__v2df)(__m128d)(W), \
+                                           (__mmask8)(U)); })
+
+#define _mm_maskz_reduce_pd(U, A, B) __extension__ ({     \
+  (__m128d)__builtin_ia32_reducepd128_mask((__v2df)(__m128d)(A), (int)(B), \
+                                           (__v2df)_mm_setzero_pd(), \
+                                           (__mmask8)(U)); })
+
+#define _mm256_reduce_pd(A, B) __extension__ ({                \
+  (__m256d)__builtin_ia32_reducepd256_mask((__v4df)(__m256d)(A), (int)(B), \
+                                           (__v4df)_mm256_setzero_pd(), \
+                                           (__mmask8)-1); })
+
+#define _mm256_mask_reduce_pd(W, U, A, B) __extension__ ({ \
+  (__m256d)__builtin_ia32_reducepd256_mask((__v4df)(__m256d)(A), (int)(B), \
+                                           (__v4df)(__m256d)(W), \
+                                           (__mmask8)(U)); })
+
+#define _mm256_maskz_reduce_pd(U, A, B) __extension__ ({     \
+  (__m256d)__builtin_ia32_reducepd256_mask((__v4df)(__m256d)(A), (int)(B), \
+                                           (__v4df)_mm256_setzero_pd(), \
+                                           (__mmask8)(U)); })
+
+#define _mm_reduce_ps(A, B) __extension__ ({                   \
+  (__m128)__builtin_ia32_reduceps128_mask((__v4sf)(__m128)(A), (int)(B), \
+                                          (__v4sf)_mm_setzero_ps(), \
+                                          (__mmask8)-1); })
+
+#define _mm_mask_reduce_ps(W, U, A, B) __extension__ ({    \
+  (__m128)__builtin_ia32_reduceps128_mask((__v4sf)(__m128)(A), (int)(B), \
+                                          (__v4sf)(__m128)(W), \
+                                          (__mmask8)(U)); })
+
+#define _mm_maskz_reduce_ps(U, A, B) __extension__ ({        \
+  (__m128)__builtin_ia32_reduceps128_mask((__v4sf)(__m128)(A), (int)(B), \
+                                          (__v4sf)_mm_setzero_ps(), \
+                                          (__mmask8)(U)); })
+
+#define _mm256_reduce_ps(A, B) __extension__ ({                \
+  (__m256)__builtin_ia32_reduceps256_mask((__v8sf)(__m256)(A), (int)(B), \
+                                          (__v8sf)_mm256_setzero_ps(), \
+                                          (__mmask8)-1); })
+
+#define _mm256_mask_reduce_ps(W, U, A, B) __extension__ ({ \
+  (__m256)__builtin_ia32_reduceps256_mask((__v8sf)(__m256)(A), (int)(B), \
+                                          (__v8sf)(__m256)(W), \
+                                          (__mmask8)(U)); })
+
+#define _mm256_maskz_reduce_ps(U, A, B) __extension__ ({     \
+  (__m256)__builtin_ia32_reduceps256_mask((__v8sf)(__m256)(A), (int)(B), \
+                                          (__v8sf)_mm256_setzero_ps(), \
+                                          (__mmask8)(U)); })
+
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
+_mm_movepi32_mask (__m128i __A)
+{
+  return (__mmask8) __builtin_ia32_cvtd2mask128 ((__v4si) __A);
+}
+
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
+_mm256_movepi32_mask (__m256i __A)
+{
+  return (__mmask8) __builtin_ia32_cvtd2mask256 ((__v8si) __A);
+}
 
-#define _mm256_range_pd(__A, __B, __C) __extension__ ({                      \
-  (__m256d) __builtin_ia32_rangepd256_mask ((__v4df) __A, (__v4df) __B, __C, \
-                (__v4df) _mm256_setzero_pd(), (__mmask8) -1); })
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_movm_epi32 (__mmask8 __A)
+{
+  return (__m128i) __builtin_ia32_cvtmask2d128 (__A);
+}
 
-#define _mm256_mask_range_pd(__W, __U, __A, __B, __C) __extension__ ({       \
-  (__m256d) __builtin_ia32_rangepd256_mask ((__v4df) __A, (__v4df) __B, __C, \
-                (__v4df) __W, (__mmask8) __U); })
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_movm_epi32 (__mmask8 __A)
+{
+  return (__m256i) __builtin_ia32_cvtmask2d256 (__A);
+}
 
-#define _mm256_maskz_range_pd(__U, __A, __B, __C) __extension__ ({           \
-  (__m256d) __builtin_ia32_rangepd256_mask ((__v4df) __A, (__v4df) __B, __C, \
-                (__v4df) _mm256_setzero_pd(), (__mmask8) __U); })
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_movm_epi64 (__mmask8 __A)
+{
+  return (__m128i) __builtin_ia32_cvtmask2q128 (__A);
+}
 
-#define _mm_range_ps(__A, __B, __C) __extension__ ({                         \
-  (__m128) __builtin_ia32_rangeps128_mask ((__v4sf) __A, (__v4sf) __B, __C,  \
-                (__v4sf) _mm_setzero_ps(), (__mmask8) -1); })
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_movm_epi64 (__mmask8 __A)
+{
+  return (__m256i) __builtin_ia32_cvtmask2q256 (__A);
+}
 
-#define _mm_mask_range_ps(__W, __U, __A, __B, __C) __extension__ ({          \
-  (__m128) __builtin_ia32_rangeps128_mask ((__v4sf) __A, (__v4sf) __B, __C,  \
-                (__v4sf) __W, (__mmask8) __U); })
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
+_mm_movepi64_mask (__m128i __A)
+{
+  return (__mmask8) __builtin_ia32_cvtq2mask128 ((__v2di) __A);
+}
 
-#define _mm_maskz_range_ps(__U, __A, __B, __C) __extension__ ({              \
-  (__m128) __builtin_ia32_rangeps128_mask ((__v4sf) __A, (__v4sf) __B, __C,  \
-                (__v4sf) _mm_setzero_ps(), (__mmask8) __U); })
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
+_mm256_movepi64_mask (__m256i __A)
+{
+  return (__mmask8) __builtin_ia32_cvtq2mask256 ((__v4di) __A);
+}
 
-#define _mm256_range_ps(__A, __B, __C) __extension__ ({                      \
-  (__m256) __builtin_ia32_rangeps256_mask ((__v8sf) __A, (__v8sf) __B, __C,  \
-                (__v8sf) _mm256_setzero_ps(), (__mmask8) -1); })
+static __inline__ __m256 __DEFAULT_FN_ATTRS
+_mm256_broadcast_f32x2 (__m128 __A)
+{
+  return (__m256) __builtin_ia32_broadcastf32x2_256_mask ((__v4sf) __A,
+                (__v8sf)_mm256_undefined_ps(),
+                (__mmask8) -1);
+}
 
-#define _mm256_mask_range_ps(__W, __U, __A, __B, __C) __extension__ ({       \
-  (__m256) __builtin_ia32_rangeps256_mask ((__v8sf) __A, (__v8sf) __B, __C,  \
-                (__v8sf) __W, (__mmask8) __U); })
+static __inline__ __m256 __DEFAULT_FN_ATTRS
+_mm256_mask_broadcast_f32x2 (__m256 __O, __mmask8 __M, __m128 __A)
+{
+  return (__m256) __builtin_ia32_broadcastf32x2_256_mask ((__v4sf) __A,
+                (__v8sf) __O,
+                __M);
+}
 
-#define _mm256_maskz_range_ps(__U, __A, __B, __C) __extension__ ({           \
-  (__m256) __builtin_ia32_rangeps256_mask ((__v8sf) __A, (__v8sf) __B, __C,  \
-                (__v8sf) _mm256_setzero_ps(), (__mmask8) __U); })
+static __inline__ __m256 __DEFAULT_FN_ATTRS
+_mm256_maskz_broadcast_f32x2 (__mmask8 __M, __m128 __A)
+{
+  return (__m256) __builtin_ia32_broadcastf32x2_256_mask ((__v4sf) __A,
+                (__v8sf) _mm256_setzero_ps (),
+                __M);
+}
 
-#define _mm_reduce_pd(__A, __B) __extension__ ({                \
-  (__m128d) __builtin_ia32_reducepd128_mask ((__v2df) __A, __B, \
-                (__v2df) _mm_setzero_pd(), (__mmask8) -1); })
+static __inline__ __m256d __DEFAULT_FN_ATTRS
+_mm256_broadcast_f64x2 (__m128d __A)
+{
+  return (__m256d) __builtin_ia32_broadcastf64x2_256_mask ((__v2df) __A,
+                 (__v4df)_mm256_undefined_pd(),
+                 (__mmask8) -1);
+}
 
-#define _mm_mask_reduce_pd(__W, __U, __A, __B) __extension__ ({ \
-  (__m128d) __builtin_ia32_reducepd128_mask ((__v2df) __A, __B, \
-                (__v2df) __W, (__mmask8) __U); })
+static __inline__ __m256d __DEFAULT_FN_ATTRS
+_mm256_mask_broadcast_f64x2 (__m256d __O, __mmask8 __M, __m128d __A)
+{
+  return (__m256d) __builtin_ia32_broadcastf64x2_256_mask ((__v2df) __A,
+                 (__v4df) __O,
+                 __M);
+}
 
-#define _mm_maskz_reduce_pd(__U, __A, __B) __extension__ ({     \
-  (__m128d) __builtin_ia32_reducepd128_mask ((__v2df) __A, __B, \
-                (__v2df) _mm_setzero_pd(), (__mmask8) __U); })
+static __inline__ __m256d __DEFAULT_FN_ATTRS
+_mm256_maskz_broadcast_f64x2 (__mmask8 __M, __m128d __A)
+{
+  return (__m256d) __builtin_ia32_broadcastf64x2_256_mask ((__v2df) __A,
+                 (__v4df) _mm256_setzero_ps (),
+                 __M);
+}
 
-#define _mm256_reduce_pd(__A, __B) __extension__ ({                \
-  (__m256d) __builtin_ia32_reducepd256_mask ((__v4df) __A, __B,    \
-                (__v4df) _mm256_setzero_pd(), (__mmask8) -1); })
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_broadcast_i32x2 (__m128i __A)
+{
+  return (__m128i) __builtin_ia32_broadcasti32x2_128_mask ((__v4si) __A,
+                 (__v4si)_mm_undefined_si128(),
+                 (__mmask8) -1);
+}
 
-#define _mm256_mask_reduce_pd(__W, __U, __A, __B) __extension__ ({ \
-  (__m256d) __builtin_ia32_reducepd256_mask ((__v4df) __A, __B,    \
-                (__v4df) __W, (__mmask8) __U); })
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_broadcast_i32x2 (__m128i __O, __mmask8 __M, __m128i __A)
+{
+  return (__m128i) __builtin_ia32_broadcasti32x2_128_mask ((__v4si) __A,
+                 (__v4si) __O,
+                 __M);
+}
 
-#define _mm256_maskz_reduce_pd(__U, __A, __B) __extension__ ({     \
-  (__m256d) __builtin_ia32_reducepd256_mask ((__v4df) __A, __B,    \
-                (__v4df) _mm256_setzero_pd(), (__mmask8) __U); })
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_broadcast_i32x2 (__mmask8 __M, __m128i __A)
+{
+  return (__m128i) __builtin_ia32_broadcasti32x2_128_mask ((__v4si) __A,
+                 (__v4si) _mm_setzero_si128 (),
+                 __M);
+}
 
-#define _mm_reduce_ps(__A, __B) __extension__ ({                   \
-  (__m128) __builtin_ia32_reduceps128_mask ((__v4sf) __A, __B,     \
-                (__v4sf) _mm_setzero_ps(), (__mmask8) -1); })
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_broadcast_i32x2 (__m128i __A)
+{
+  return (__m256i) __builtin_ia32_broadcasti32x2_256_mask ((__v4si) __A,
+                 (__v8si)_mm256_undefined_si256(),
+                 (__mmask8) -1);
+}
 
-#define _mm_mask_reduce_ps(__W, __U, __A, __B) __extension__ ({    \
-  (__m128) __builtin_ia32_reduceps128_mask ((__v4sf) __A, __B,     \
-                (__v4sf) __W, (__mmask8) __U); })
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_broadcast_i32x2 (__m256i __O, __mmask8 __M, __m128i __A)
+{
+  return (__m256i) __builtin_ia32_broadcasti32x2_256_mask ((__v4si) __A,
+                 (__v8si) __O,
+                 __M);
+}
 
-#define _mm_maskz_reduce_ps(__U, __A, __B) __extension__ ({        \
-  (__m128) __builtin_ia32_reduceps128_mask ((__v4sf) __A, __B,     \
-                (__v4sf) _mm_setzero_ps(), (__mmask8) __U); })
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_broadcast_i32x2 (__mmask8 __M, __m128i __A)
+{
+  return (__m256i) __builtin_ia32_broadcasti32x2_256_mask ((__v4si) __A,
+                 (__v8si) _mm256_setzero_si256 (),
+                 __M);
+}
 
-#define _mm256_reduce_ps(__A, __B) __extension__ ({                \
-  (__m256) __builtin_ia32_reduceps256_mask ((__v8sf) __A, __B,     \
-                (__v8sf) _mm256_setzero_ps(), (__mmask8) -1); })
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_broadcast_i64x2 (__m128i __A)
+{
+  return (__m256i) __builtin_ia32_broadcasti64x2_256_mask ((__v2di) __A,
+                 (__v4di)_mm256_undefined_si256(),
+                 (__mmask8) -1);
+}
 
-#define _mm256_mask_reduce_ps(__W, __U, __A, __B) __extension__ ({ \
-  (__m256) __builtin_ia32_reduceps256_mask ((__v8sf) __A, __B,     \
-                (__v8sf) __W, (__mmask8) __U); })
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_broadcast_i64x2 (__m256i __O, __mmask8 __M, __m128i __A)
+{
+  return (__m256i) __builtin_ia32_broadcasti64x2_256_mask ((__v2di) __A,
+                 (__v4di) __O,
+                 __M);
+}
 
-#define _mm256_maskz_reduce_ps(__U, __A, __B) __extension__ ({     \
-  (__m256) __builtin_ia32_reduceps256_mask ((__v8sf) __A, __B,     \
-                (__v8sf) _mm256_setzero_ps(), (__mmask8) __U); })
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_broadcast_i64x2 (__mmask8 __M, __m128i __A)
+{
+  return (__m256i) __builtin_ia32_broadcasti64x2_256_mask ((__v2di) __A,
+                 (__v4di) _mm256_setzero_si256 (),
+                 __M);
+}
+
+#define _mm256_extractf64x2_pd(A, imm) __extension__ ({ \
+  (__m128d)__builtin_ia32_extractf64x2_256_mask((__v4df)(__m256d)(A), \
+                                                (int)(imm), \
+                                                (__v2df)_mm_setzero_pd(), \
+                                                (__mmask8)-1); })
+
+#define _mm256_mask_extractf64x2_pd(W, U, A, imm) __extension__ ({ \
+  (__m128d)__builtin_ia32_extractf64x2_256_mask((__v4df)(__m256d)(A), \
+                                                (int)(imm), \
+                                                (__v2df)(__m128d)(W), \
+                                                (__mmask8)(U)); })
+
+#define _mm256_maskz_extractf64x2_pd(U, A, imm) __extension__ ({ \
+  (__m128d)__builtin_ia32_extractf64x2_256_mask((__v4df)(__m256d)(A), \
+                                                (int)(imm), \
+                                                (__v2df)_mm_setzero_pd(), \
+                                                (__mmask8)(U)); })
+
+#define _mm256_extracti64x2_epi64(A, imm) __extension__ ({ \
+  (__m128i)__builtin_ia32_extracti64x2_256_mask((__v4di)(__m256i)(A), \
+                                                (int)(imm), \
+                                                (__v2di)_mm_setzero_di(), \
+                                                (__mmask8)-1); })
+
+#define _mm256_mask_extracti64x2_epi64(W, U, A, imm) __extension__ ({ \
+  (__m128i)__builtin_ia32_extracti64x2_256_mask((__v4di)(__m256i)(A), \
+                                                (int)(imm), \
+                                                (__v2di)(__m128i)(W), \
+                                                (__mmask8)(U)); })
+
+#define _mm256_maskz_extracti64x2_epi64(U, A, imm) __extension__ ({ \
+  (__m128i)__builtin_ia32_extracti64x2_256_mask((__v4di)(__m256i)(A), \
+                                                (int)(imm), \
+                                                (__v2di)_mm_setzero_di(), \
+                                                (__mmask8)(U)); })
+
+#define _mm256_insertf64x2(A, B, imm) __extension__ ({ \
+  (__m256d)__builtin_ia32_insertf64x2_256_mask((__v4df)(__m256d)(A), \
+                                               (__v2df)(__m128d)(B), \
+                                               (int)(imm), \
+                                               (__v4df)_mm256_setzero_pd(), \
+                                               (__mmask8)-1); })
+
+#define _mm256_mask_insertf64x2(W, U, A, B, imm) __extension__ ({ \
+  (__m256d)__builtin_ia32_insertf64x2_256_mask((__v4df)(__m256d)(A), \
+                                               (__v2df)(__m128d)(B), \
+                                               (int)(imm), \
+                                               (__v4df)(__m256d)(W), \
+                                               (__mmask8)(U)); })
+
+#define _mm256_maskz_insertf64x2(U, A, B, imm) __extension__ ({ \
+  (__m256d)__builtin_ia32_insertf64x2_256_mask((__v4df)(__m256d)(A), \
+                                               (__v2df)(__m128d)(B), \
+                                               (int)(imm), \
+                                               (__v4df)_mm256_setzero_pd(), \
+                                               (__mmask8)(U)); })
+
+#define _mm256_inserti64x2(A, B, imm) __extension__ ({ \
+  (__m256i)__builtin_ia32_inserti64x2_256_mask((__v4di)(__m256i)(A), \
+                                               (__v2di)(__m128i)(B), \
+                                               (int)(imm), \
+                                               (__v4di)_mm256_setzero_si256(), \
+                                               (__mmask8)-1); })
+
+#define _mm256_mask_inserti64x2(W, U, A, B, imm) __extension__ ({ \
+  (__m256i)__builtin_ia32_inserti64x2_256_mask((__v4di)(__m256i)(A), \
+                                               (__v2di)(__m128i)(B), \
+                                               (int)(imm), \
+                                               (__v4di)(__m256i)(W), \
+                                               (__mmask8)(U)); })
+
+#define _mm256_maskz_inserti64x2(U, A, B, imm) __extension__ ({ \
+  (__m256i)__builtin_ia32_inserti64x2_256_mask((__v4di)(__m256i)(A), \
+                                               (__v2di)(__m128i)(B), \
+                                               (int)(imm), \
+                                               (__v4di)_mm256_setzero_si256(), \
+                                               (__mmask8)(U)); })
+
+#define _mm_mask_fpclass_pd_mask(U, A, imm) __extension__ ({ \
+  (__mmask8)__builtin_ia32_fpclasspd128_mask((__v2df)(__m128d)(A), (int)(imm), \
+                                             (__mmask8)(U)); })
+
+#define _mm_fpclass_pd_mask(A, imm) __extension__ ({ \
+  (__mmask8)__builtin_ia32_fpclasspd128_mask((__v2df)(__m128d)(A), (int)(imm), \
+                                             (__mmask8)-1); })
+
+#define _mm256_mask_fpclass_pd_mask(U, A, imm) __extension__ ({ \
+  (__mmask8)__builtin_ia32_fpclasspd256_mask((__v4df)(__m256d)(A), (int)(imm), \
+                                             (__mmask8)(U)); })
+
+#define _mm256_fpclass_pd_mask(A, imm) __extension__ ({ \
+  (__mmask8)__builtin_ia32_fpclasspd256_mask((__v4df)(__m256d)(A), (int)(imm), \
+                                             (__mmask8)-1); })
+
+#define _mm_mask_fpclass_ps_mask(U, A, imm) __extension__ ({ \
+  (__mmask8)__builtin_ia32_fpclassps128_mask((__v4sf)(__m128)(A), (int)(imm), \
+                                             (__mmask8)(U)); })
+
+#define _mm_fpclass_ps_mask(A, imm) __extension__ ({ \
+  (__mmask8)__builtin_ia32_fpclassps128_mask((__v4sf)(__m128)(A), (int)(imm), \
+                                             (__mmask8)-1); })
+
+#define _mm256_mask_fpclass_ps_mask(U, A, imm) __extension__ ({ \
+  (__mmask8)__builtin_ia32_fpclassps256_mask((__v8sf)(__m256)(A), (int)(imm), \
+                                             (__mmask8)(U)); })
+
+#define _mm256_fpclass_ps_mask(A, imm) __extension__ ({ \
+  (__mmask8)__builtin_ia32_fpclassps256_mask((__v8sf)(__m256)(A), (int)(imm), \
+                                             (__mmask8)-1); })
 
 #undef __DEFAULT_FN_ATTRS
 
diff --git a/contrib/llvm/tools/clang/lib/Headers/avx512vlintrin.h b/contrib/llvm/tools/clang/lib/Headers/avx512vlintrin.h
index 8f13536fbb0e..295ce291f7ce 100644
--- a/contrib/llvm/tools/clang/lib/Headers/avx512vlintrin.h
+++ b/contrib/llvm/tools/clang/lib/Headers/avx512vlintrin.h
@@ -29,17 +29,22 @@
 #define __AVX512VLINTRIN_H
 
 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vl")))
-#define __DEFAULT_FN_ATTRS_BOTH __attribute__((__always_inline__, __nodebug__, __target__("avx512vl, avx512bw")))
+
+/* Doesn't require avx512vl, used in avx512dqintrin.h */
+static  __inline __m128i __attribute__((__always_inline__, __nodebug__, __target__("avx512f")))
+_mm_setzero_di(void) {
+  return (__m128i)(__v2di){ 0LL, 0LL};
+}
 
 /* Integer compare */
 
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS_BOTH
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm_cmpeq_epi32_mask(__m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_pcmpeqd128_mask((__v4si)__a, (__v4si)__b,
                                                   (__mmask8)-1);
 }
 
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS_BOTH
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm_mask_cmpeq_epi32_mask(__mmask8 __u, __m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_pcmpeqd128_mask((__v4si)__a, (__v4si)__b,
                                                   __u);
@@ -57,13 +62,13 @@ _mm_mask_cmpeq_epu32_mask(__mmask8 __u, __m128i __a, __m128i __b) {
                                                 __u);
 }
 
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS_BOTH
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm256_cmpeq_epi32_mask(__m256i __a, __m256i __b) {
   return (__mmask8)__builtin_ia32_pcmpeqd256_mask((__v8si)__a, (__v8si)__b,
                                                   (__mmask8)-1);
 }
 
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS_BOTH
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm256_mask_cmpeq_epi32_mask(__mmask8 __u, __m256i __a, __m256i __b) {
   return (__mmask8)__builtin_ia32_pcmpeqd256_mask((__v8si)__a, (__v8si)__b,
                                                   __u);
@@ -81,13 +86,13 @@ _mm256_mask_cmpeq_epu32_mask(__mmask8 __u, __m256i __a, __m256i __b) {
                                                 __u);
 }
 
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS_BOTH
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm_cmpeq_epi64_mask(__m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_pcmpeqq128_mask((__v2di)__a, (__v2di)__b,
                                                   (__mmask8)-1);
 }
 
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS_BOTH
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm_mask_cmpeq_epi64_mask(__mmask8 __u, __m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_pcmpeqq128_mask((__v2di)__a, (__v2di)__b,
                                                   __u);
@@ -105,13 +110,13 @@ _mm_mask_cmpeq_epu64_mask(__mmask8 __u, __m128i __a, __m128i __b) {
                                                 __u);
 }
 
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS_BOTH
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm256_cmpeq_epi64_mask(__m256i __a, __m256i __b) {
   return (__mmask8)__builtin_ia32_pcmpeqq256_mask((__v4di)__a, (__v4di)__b,
                                                   (__mmask8)-1);
 }
 
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS_BOTH
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm256_mask_cmpeq_epi64_mask(__mmask8 __u, __m256i __a, __m256i __b) {
   return (__mmask8)__builtin_ia32_pcmpeqq256_mask((__v4di)__a, (__v4di)__b,
                                                   __u);
@@ -226,13 +231,13 @@ _mm256_mask_cmpge_epu64_mask(__mmask8 __u, __m256i __a, __m256i __b) {
                                                 __u);
 }
 
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS_BOTH
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm_cmpgt_epi32_mask(__m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_pcmpgtd128_mask((__v4si)__a, (__v4si)__b,
                                                   (__mmask8)-1);
 }
 
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS_BOTH
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm_mask_cmpgt_epi32_mask(__mmask8 __u, __m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_pcmpgtd128_mask((__v4si)__a, (__v4si)__b,
                                                   __u);
@@ -250,13 +255,13 @@ _mm_mask_cmpgt_epu32_mask(__mmask8 __u, __m128i __a, __m128i __b) {
                                                 __u);
 }
 
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS_BOTH
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm256_cmpgt_epi32_mask(__m256i __a, __m256i __b) {
   return (__mmask8)__builtin_ia32_pcmpgtd256_mask((__v8si)__a, (__v8si)__b,
                                                   (__mmask8)-1);
 }
 
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS_BOTH
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm256_mask_cmpgt_epi32_mask(__mmask8 __u, __m256i __a, __m256i __b) {
   return (__mmask8)__builtin_ia32_pcmpgtd256_mask((__v8si)__a, (__v8si)__b,
                                                   __u);
@@ -274,13 +279,13 @@ _mm256_mask_cmpgt_epu32_mask(__mmask8 __u, __m256i __a, __m256i __b) {
                                                 __u);
 }
 
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS_BOTH
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm_cmpgt_epi64_mask(__m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_pcmpgtq128_mask((__v2di)__a, (__v2di)__b,
                                                   (__mmask8)-1);
 }
 
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS_BOTH
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm_mask_cmpgt_epi64_mask(__mmask8 __u, __m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_pcmpgtq128_mask((__v2di)__a, (__v2di)__b,
                                                   __u);
@@ -298,13 +303,13 @@ _mm_mask_cmpgt_epu64_mask(__mmask8 __u, __m128i __a, __m128i __b) {
                                                 __u);
 }
 
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS_BOTH
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm256_cmpgt_epi64_mask(__m256i __a, __m256i __b) {
   return (__mmask8)__builtin_ia32_pcmpgtq256_mask((__v4di)__a, (__v4di)__b,
                                                   (__mmask8)-1);
 }
 
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS_BOTH
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm256_mask_cmpgt_epi64_mask(__mmask8 __u, __m256i __a, __m256i __b) {
   return (__mmask8)__builtin_ia32_pcmpgtq256_mask((__v4di)__a, (__v4di)__b,
                                                   __u);
@@ -885,437 +890,352 @@ _mm_mask_mullo_epi32 (__m128i __W, __mmask16 __M, __m128i __A,
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
-_mm256_mask_and_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
-           __m256i __B)
+_mm256_mask_and_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
 {
-  return (__m256i) __builtin_ia32_pandd256_mask ((__v8si) __A,
-             (__v8si) __B,
-             (__v8si) __W,
-             (__mmask8) __U);
+  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
+                                             (__v8si)_mm256_and_si256(__A, __B),
+                                             (__v8si)__W);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
-_mm256_maskz_and_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
+_mm256_maskz_and_epi32(__mmask8 __U, __m256i __A, __m256i __B)
 {
-  return (__m256i) __builtin_ia32_pandd256_mask ((__v8si) __A,
-             (__v8si) __B,
-             (__v8si)
-             _mm256_setzero_si256 (),
-             (__mmask8) __U);
+  return (__m256i)_mm256_mask_and_epi32(_mm256_setzero_si256(), __U, __A, __B);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS
-_mm_mask_and_epi32 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
+_mm_mask_and_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
 {
-  return (__m128i) __builtin_ia32_pandd128_mask ((__v4si) __A,
-             (__v4si) __B,
-             (__v4si) __W,
-             (__mmask8) __U);
+  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
+                                             (__v4si)_mm_and_si128(__A, __B),
+                                             (__v4si)__W);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS
-_mm_maskz_and_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
+_mm_maskz_and_epi32(__mmask8 __U, __m128i __A, __m128i __B)
 {
-  return (__m128i) __builtin_ia32_pandd128_mask ((__v4si) __A,
-             (__v4si) __B,
-             (__v4si)
-             _mm_setzero_si128 (),
-             (__mmask8) __U);
+  return (__m128i)_mm_mask_and_epi32(_mm_setzero_si128(), __U, __A, __B);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
-_mm256_mask_andnot_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
-        __m256i __B)
+_mm256_mask_andnot_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
 {
-  return (__m256i) __builtin_ia32_pandnd256_mask ((__v8si) __A,
-              (__v8si) __B,
-              (__v8si) __W,
-              (__mmask8) __U);
+  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
+                                          (__v8si)_mm256_andnot_si256(__A, __B),
+                                          (__v8si)__W);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
-_mm256_maskz_andnot_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
+_mm256_maskz_andnot_epi32(__mmask8 __U, __m256i __A, __m256i __B)
 {
-  return (__m256i) __builtin_ia32_pandnd256_mask ((__v8si) __A,
-              (__v8si) __B,
-              (__v8si)
-              _mm256_setzero_si256 (),
-              (__mmask8) __U);
+  return (__m256i)_mm256_mask_andnot_epi32(_mm256_setzero_si256(),
+                                           __U, __A, __B);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS
-_mm_mask_andnot_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
-           __m128i __B)
+_mm_mask_andnot_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
 {
-  return (__m128i) __builtin_ia32_pandnd128_mask ((__v4si) __A,
-              (__v4si) __B,
-              (__v4si) __W,
-              (__mmask8) __U);
+  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
+                                             (__v4si)_mm_andnot_si128(__A, __B),
+                                             (__v4si)__W);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_maskz_andnot_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
 {
-  return (__m128i) __builtin_ia32_pandnd128_mask ((__v4si) __A,
-              (__v4si) __B,
-              (__v4si)
-              _mm_setzero_si128 (),
-              (__mmask8) __U);
+  return (__m128i)_mm_mask_andnot_epi32(_mm_setzero_si128(), __U, __A, __B);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
-_mm256_mask_or_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
-          __m256i __B)
+_mm256_mask_or_epi32 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
 {
-  return (__m256i) __builtin_ia32_pord256_mask ((__v8si) __A,
-            (__v8si) __B,
-            (__v8si) __W,
-            (__mmask8) __U);
+  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
+                                             (__v8si)_mm256_or_si256(__A, __B),
+                                             (__v8si)__W);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
-_mm256_maskz_or_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
+_mm256_maskz_or_epi32(__mmask8 __U, __m256i __A, __m256i __B)
 {
-  return (__m256i) __builtin_ia32_pord256_mask ((__v8si) __A,
-            (__v8si) __B,
-            (__v8si)
-            _mm256_setzero_si256 (),
-            (__mmask8) __U);
+  return (__m256i)_mm256_mask_or_epi32(_mm256_setzero_si256(), __U, __A, __B);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS
-_mm_mask_or_epi32 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
+_mm_mask_or_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
 {
-  return (__m128i) __builtin_ia32_pord128_mask ((__v4si) __A,
-            (__v4si) __B,
-            (__v4si) __W,
-            (__mmask8) __U);
+  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
+                                             (__v4si)_mm_or_si128(__A, __B),
+                                             (__v4si)__W);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS
-_mm_maskz_or_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
+_mm_maskz_or_epi32(__mmask8 __U, __m128i __A, __m128i __B)
 {
-  return (__m128i) __builtin_ia32_pord128_mask ((__v4si) __A,
-            (__v4si) __B,
-            (__v4si)
-            _mm_setzero_si128 (),
-            (__mmask8) __U);
+  return (__m128i)_mm_mask_or_epi32(_mm_setzero_si128(), __U, __A, __B);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
-_mm256_mask_xor_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
-           __m256i __B)
+_mm256_mask_xor_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
 {
-  return (__m256i) __builtin_ia32_pxord256_mask ((__v8si) __A,
-             (__v8si) __B,
-             (__v8si) __W,
-             (__mmask8) __U);
+  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
+                                             (__v8si)_mm256_xor_si256(__A, __B),
+                                             (__v8si)__W);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
-_mm256_maskz_xor_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
+_mm256_maskz_xor_epi32(__mmask8 __U, __m256i __A, __m256i __B)
 {
-  return (__m256i) __builtin_ia32_pxord256_mask ((__v8si) __A,
-             (__v8si) __B,
-             (__v8si)
-             _mm256_setzero_si256 (),
-             (__mmask8) __U);
+  return (__m256i)_mm256_mask_xor_epi32(_mm256_setzero_si256(), __U, __A, __B);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS
-_mm_mask_xor_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
+_mm_mask_xor_epi32(__m128i __W, __mmask8 __U, __m128i __A,
         __m128i __B)
 {
-  return (__m128i) __builtin_ia32_pxord128_mask ((__v4si) __A,
-             (__v4si) __B,
-             (__v4si) __W,
-             (__mmask8) __U);
+  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
+                                             (__v4si)_mm_xor_si128(__A, __B),
+                                             (__v4si)__W);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS
-_mm_maskz_xor_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
+_mm_maskz_xor_epi32(__mmask8 __U, __m128i __A, __m128i __B)
 {
-  return (__m128i) __builtin_ia32_pxord128_mask ((__v4si) __A,
-             (__v4si) __B,
-             (__v4si)
-             _mm_setzero_si128 (),
-             (__mmask8) __U);
+  return (__m128i)_mm_mask_xor_epi32(_mm_setzero_si128(), __U, __A, __B);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
-_mm256_mask_and_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
-           __m256i __B)
+_mm256_mask_and_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
 {
-  return (__m256i) __builtin_ia32_pandq256_mask ((__v4di) __A,
-             (__v4di) __B,
-             (__v4di) __W, __U);
+  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
+                                             (__v4di)_mm256_and_si256(__A, __B),
+                                             (__v4di)__W);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
-_mm256_maskz_and_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
+_mm256_maskz_and_epi64(__mmask8 __U, __m256i __A, __m256i __B)
 {
-  return (__m256i) __builtin_ia32_pandq256_mask ((__v4di) __A,
-             (__v4di) __B,
-             (__v4di)
-             _mm256_setzero_pd (),
-             __U);
+  return (__m256i)_mm256_mask_and_epi64(_mm256_setzero_si256(), __U, __A, __B);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS
-_mm_mask_and_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
-        __m128i __B)
+_mm_mask_and_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
 {
-  return (__m128i) __builtin_ia32_pandq128_mask ((__v2di) __A,
-             (__v2di) __B,
-             (__v2di) __W, __U);
+  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
+                                             (__v2di)_mm_and_si128(__A, __B),
+                                             (__v2di)__W);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS
-_mm_maskz_and_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
+_mm_maskz_and_epi64(__mmask8 __U, __m128i __A, __m128i __B)
 {
-  return (__m128i) __builtin_ia32_pandq128_mask ((__v2di) __A,
-             (__v2di) __B,
-             (__v2di)
-             _mm_setzero_pd (),
-             __U);
+  return (__m128i)_mm_mask_and_epi64(_mm_setzero_si128(), __U, __A, __B);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
-_mm256_mask_andnot_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
-        __m256i __B)
+_mm256_mask_andnot_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
 {
-  return (__m256i) __builtin_ia32_pandnq256_mask ((__v4di) __A,
-              (__v4di) __B,
-              (__v4di) __W, __U);
+  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
+                                          (__v4di)_mm256_andnot_si256(__A, __B),
+                                          (__v4di)__W);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
-_mm256_maskz_andnot_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
+_mm256_maskz_andnot_epi64(__mmask8 __U, __m256i __A, __m256i __B)
 {
-  return (__m256i) __builtin_ia32_pandnq256_mask ((__v4di) __A,
-              (__v4di) __B,
-              (__v4di)
-              _mm256_setzero_pd (),
-              __U);
+  return (__m256i)_mm256_mask_andnot_epi64(_mm256_setzero_si256(),
+                                           __U, __A, __B);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS
-_mm_mask_andnot_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
-           __m128i __B)
+_mm_mask_andnot_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
 {
-  return (__m128i) __builtin_ia32_pandnq128_mask ((__v2di) __A,
-              (__v2di) __B,
-              (__v2di) __W, __U);
+  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
+                                             (__v2di)_mm_andnot_si128(__A, __B),
+                                             (__v2di)__W);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS
-_mm_maskz_andnot_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
+_mm_maskz_andnot_epi64(__mmask8 __U, __m128i __A, __m128i __B)
 {
-  return (__m128i) __builtin_ia32_pandnq128_mask ((__v2di) __A,
-              (__v2di) __B,
-              (__v2di)
-              _mm_setzero_pd (),
-              __U);
+  return (__m128i)_mm_mask_andnot_epi64(_mm_setzero_si128(), __U, __A, __B);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
-_mm256_mask_or_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
-          __m256i __B)
+_mm256_mask_or_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
 {
-  return (__m256i) __builtin_ia32_porq256_mask ((__v4di) __A,
-            (__v4di) __B,
-            (__v4di) __W,
-            (__mmask8) __U);
+  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
+                                             (__v4di)_mm256_or_si256(__A, __B),
+                                             (__v4di)__W);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
-_mm256_maskz_or_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
+_mm256_maskz_or_epi64(__mmask8 __U, __m256i __A, __m256i __B)
 {
-  return (__m256i) __builtin_ia32_porq256_mask ((__v4di) __A,
-            (__v4di) __B,
-            (__v4di)
-            _mm256_setzero_si256 (),
-            (__mmask8) __U);
+  return (__m256i)_mm256_mask_or_epi64(_mm256_setzero_si256(), __U, __A, __B);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS
-_mm_mask_or_epi64 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
+_mm_mask_or_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
 {
-  return (__m128i) __builtin_ia32_porq128_mask ((__v2di) __A,
-            (__v2di) __B,
-            (__v2di) __W,
-            (__mmask8) __U);
+  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
+                                             (__v2di)_mm_or_si128(__A, __B),
+                                             (__v2di)__W);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS
-_mm_maskz_or_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
+_mm_maskz_or_epi64(__mmask8 __U, __m128i __A, __m128i __B)
 {
-  return (__m128i) __builtin_ia32_porq128_mask ((__v2di) __A,
-            (__v2di) __B,
-            (__v2di)
-            _mm_setzero_si128 (),
-            (__mmask8) __U);
+  return (__m128i)_mm_mask_or_epi64(_mm_setzero_si128(), __U, __A, __B);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
-_mm256_mask_xor_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
-           __m256i __B)
+_mm256_mask_xor_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
 {
-  return (__m256i) __builtin_ia32_pxorq256_mask ((__v4di) __A,
-             (__v4di) __B,
-             (__v4di) __W,
-             (__mmask8) __U);
+  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
+                                             (__v4di)_mm256_xor_si256(__A, __B),
+                                             (__v4di)__W);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
-_mm256_maskz_xor_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
+_mm256_maskz_xor_epi64(__mmask8 __U, __m256i __A, __m256i __B)
 {
-  return (__m256i) __builtin_ia32_pxorq256_mask ((__v4di) __A,
-             (__v4di) __B,
-             (__v4di)
-             _mm256_setzero_si256 (),
-             (__mmask8) __U);
+  return (__m256i)_mm256_mask_xor_epi64(_mm256_setzero_si256(), __U, __A, __B);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS
-_mm_mask_xor_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
+_mm_mask_xor_epi64(__m128i __W, __mmask8 __U, __m128i __A,
         __m128i __B)
 {
-  return (__m128i) __builtin_ia32_pxorq128_mask ((__v2di) __A,
-             (__v2di) __B,
-             (__v2di) __W,
-             (__mmask8) __U);
+  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
+                                             (__v2di)_mm_xor_si128(__A, __B),
+                                             (__v2di)__W);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS
-_mm_maskz_xor_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
+_mm_maskz_xor_epi64(__mmask8 __U, __m128i __A, __m128i __B)
 {
-  return (__m128i) __builtin_ia32_pxorq128_mask ((__v2di) __A,
-             (__v2di) __B,
-             (__v2di)
-             _mm_setzero_si128 (),
-             (__mmask8) __U);
+  return (__m128i)_mm_mask_xor_epi64(_mm_setzero_si128(), __U, __A, __B);
 }
 
 #define _mm_cmp_epi32_mask(a, b, p) __extension__ ({ \
   (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)(__m128i)(a), \
-                                        (__v4si)(__m128i)(b), \
-                                        (p), (__mmask8)-1); })
+                                        (__v4si)(__m128i)(b), (int)(p), \
+                                        (__mmask8)-1); })
 
 #define _mm_mask_cmp_epi32_mask(m, a, b, p) __extension__ ({ \
   (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)(__m128i)(a), \
-                                        (__v4si)(__m128i)(b), \
-                                        (p), (__mmask8)(m)); })
+                                        (__v4si)(__m128i)(b), (int)(p), \
+                                        (__mmask8)(m)); })
 
 #define _mm_cmp_epu32_mask(a, b, p) __extension__ ({ \
   (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)(__m128i)(a), \
-                                         (__v4si)(__m128i)(b), \
-                                         (p), (__mmask8)-1); })
+                                         (__v4si)(__m128i)(b), (int)(p), \
+                                         (__mmask8)-1); })
 
 #define _mm_mask_cmp_epu32_mask(m, a, b, p) __extension__ ({ \
   (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)(__m128i)(a), \
-                                         (__v4si)(__m128i)(b), \
-                                         (p), (__mmask8)(m)); })
+                                         (__v4si)(__m128i)(b), (int)(p), \
+                                         (__mmask8)(m)); })
 
 #define _mm256_cmp_epi32_mask(a, b, p) __extension__ ({ \
   (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)(__m256i)(a), \
-                                        (__v8si)(__m256i)(b), \
-                                        (p), (__mmask8)-1); })
+                                        (__v8si)(__m256i)(b), (int)(p), \
+                                        (__mmask8)-1); })
 
 #define _mm256_mask_cmp_epi32_mask(m, a, b, p) __extension__ ({ \
   (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)(__m256i)(a), \
-                                        (__v8si)(__m256i)(b), \
-                                        (p), (__mmask8)(m)); })
+                                        (__v8si)(__m256i)(b), (int)(p), \
+                                        (__mmask8)(m)); })
 
 #define _mm256_cmp_epu32_mask(a, b, p) __extension__ ({ \
   (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)(__m256i)(a), \
-                                         (__v8si)(__m256i)(b), \
-                                         (p), (__mmask8)-1); })
+                                         (__v8si)(__m256i)(b), (int)(p), \
+                                         (__mmask8)-1); })
 
 #define _mm256_mask_cmp_epu32_mask(m, a, b, p) __extension__ ({ \
   (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)(__m256i)(a), \
-                                         (__v8si)(__m256i)(b), \
-                                         (p), (__mmask8)(m)); })
+                                         (__v8si)(__m256i)(b), (int)(p), \
+                                         (__mmask8)(m)); })
 
 #define _mm_cmp_epi64_mask(a, b, p) __extension__ ({ \
   (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)(__m128i)(a), \
-                                        (__v2di)(__m128i)(b), \
-                                        (p), (__mmask8)-1); })
+                                        (__v2di)(__m128i)(b), (int)(p), \
+                                        (__mmask8)-1); })
 
 #define _mm_mask_cmp_epi64_mask(m, a, b, p) __extension__ ({ \
   (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)(__m128i)(a), \
-                                        (__v2di)(__m128i)(b), \
-                                        (p), (__mmask8)(m)); })
+                                        (__v2di)(__m128i)(b), (int)(p), \
+                                        (__mmask8)(m)); })
 
 #define _mm_cmp_epu64_mask(a, b, p) __extension__ ({ \
   (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)(__m128i)(a), \
-                                         (__v2di)(__m128i)(b), \
-                                         (p), (__mmask8)-1); })
+                                         (__v2di)(__m128i)(b), (int)(p), \
+                                         (__mmask8)-1); })
 
 #define _mm_mask_cmp_epu64_mask(m, a, b, p) __extension__ ({ \
   (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)(__m128i)(a), \
-                                         (__v2di)(__m128i)(b), \
-                                         (p), (__mmask8)(m)); })
+                                         (__v2di)(__m128i)(b), (int)(p), \
+                                         (__mmask8)(m)); })
 
 #define _mm256_cmp_epi64_mask(a, b, p) __extension__ ({ \
   (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)(__m256i)(a), \
-                                        (__v4di)(__m256i)(b), \
-                                        (p), (__mmask8)-1); })
+                                        (__v4di)(__m256i)(b), (int)(p), \
+                                        (__mmask8)-1); })
 
 #define _mm256_mask_cmp_epi64_mask(m, a, b, p) __extension__ ({ \
   (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)(__m256i)(a), \
-                                        (__v4di)(__m256i)(b), \
-                                        (p), (__mmask8)(m)); })
+                                        (__v4di)(__m256i)(b), (int)(p), \
+                                        (__mmask8)(m)); })
 
 #define _mm256_cmp_epu64_mask(a, b, p) __extension__ ({ \
   (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)(__m256i)(a), \
-                                         (__v4di)(__m256i)(b), \
-                                         (p), (__mmask8)-1); })
+                                         (__v4di)(__m256i)(b), (int)(p), \
+                                         (__mmask8)-1); })
 
 #define _mm256_mask_cmp_epu64_mask(m, a, b, p) __extension__ ({ \
   (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)(__m256i)(a), \
-                                         (__v4di)(__m256i)(b), \
-                                         (p), (__mmask8)(m)); })
+                                         (__v4di)(__m256i)(b), (int)(p), \
+                                         (__mmask8)(m)); })
 
 #define _mm256_cmp_ps_mask(a, b, p)  __extension__ ({ \
   (__mmask8)__builtin_ia32_cmpps256_mask((__v8sf)(__m256)(a), \
-                                         (__v8sf)(__m256)(b), \
-                                         (p), (__mmask8)-1); })
+                                         (__v8sf)(__m256)(b), (int)(p), \
+                                         (__mmask8)-1); })
 
 #define _mm256_mask_cmp_ps_mask(m, a, b, p)  __extension__ ({ \
   (__mmask8)__builtin_ia32_cmpps256_mask((__v8sf)(__m256)(a), \
-                                         (__v8sf)(__m256)(b), \
-                                         (p), (__mmask8)(m)); })
+                                         (__v8sf)(__m256)(b), (int)(p), \
+                                         (__mmask8)(m)); })
 
 #define _mm256_cmp_pd_mask(a, b, p)  __extension__ ({ \
-  (__mmask8)__builtin_ia32_cmppd256_mask((__v4df)(__m256)(a), \
-                                         (__v4df)(__m256)(b), \
-                                         (p), (__mmask8)-1); })
+  (__mmask8)__builtin_ia32_cmppd256_mask((__v4df)(__m256d)(a), \
+                                         (__v4df)(__m256d)(b), (int)(p), \
+                                         (__mmask8)-1); })
 
 #define _mm256_mask_cmp_pd_mask(m, a, b, p)  __extension__ ({ \
-  (__mmask8)__builtin_ia32_cmppd256_mask((__v4df)(__m256)(a), \
-                                         (__v4df)(__m256)(b), \
-                                         (p), (__mmask8)(m)); })
+  (__mmask8)__builtin_ia32_cmppd256_mask((__v4df)(__m256d)(a), \
+                                         (__v4df)(__m256d)(b), (int)(p), \
+                                         (__mmask8)(m)); })
 
-#define _mm128_cmp_ps_mask(a, b, p)  __extension__ ({ \
+#define _mm_cmp_ps_mask(a, b, p)  __extension__ ({ \
   (__mmask8)__builtin_ia32_cmpps128_mask((__v4sf)(__m128)(a), \
-                                         (__v4sf)(__m128)(b), \
-                                         (p), (__mmask8)-1); })
+                                         (__v4sf)(__m128)(b), (int)(p), \
+                                         (__mmask8)-1); })
 
-#define _mm128_mask_cmp_ps_mask(m, a, b, p)  __extension__ ({ \
+#define _mm_mask_cmp_ps_mask(m, a, b, p)  __extension__ ({ \
   (__mmask8)__builtin_ia32_cmpps128_mask((__v4sf)(__m128)(a), \
-                                         (__v4sf)(__m128)(b), \
-                                         (p), (__mmask8)(m)); })
+                                         (__v4sf)(__m128)(b), (int)(p), \
+                                         (__mmask8)(m)); })
 
-#define _mm128_cmp_pd_mask(a, b, p)  __extension__ ({ \
-  (__mmask8)__builtin_ia32_cmppd128_mask((__v2df)(__m128)(a), \
-                                         (__v2df)(__m128)(b), \
-                                         (p), (__mmask8)-1); })
+#define _mm_cmp_pd_mask(a, b, p)  __extension__ ({ \
+  (__mmask8)__builtin_ia32_cmppd128_mask((__v2df)(__m128d)(a), \
+                                         (__v2df)(__m128d)(b), (int)(p), \
+                                         (__mmask8)-1); })
 
-#define _mm128_mask_cmp_pd_mask(m, a, b, p)  __extension__ ({ \
-  (__mmask8)__builtin_ia32_cmppd128_mask((__v2df)(__m128)(a), \
-                                         (__v2df)(__m128)(b), \
-                                         (p), (__mmask8)(m)); })
+#define _mm_mask_cmp_pd_mask(m, a, b, p)  __extension__ ({ \
+  (__mmask8)__builtin_ia32_cmppd128_mask((__v2df)(__m128d)(a), \
+                                         (__v2df)(__m128d)(b), (int)(p), \
+                                         (__mmask8)(m)); })
 
 static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_mask_fmadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
@@ -2044,58 +1964,58 @@ _mm256_maskz_add_ps (__mmask16 __U, __m256 __A, __m256 __B) {
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_mask_blend_epi32 (__mmask8 __U, __m128i __A, __m128i __W) {
-  return (__m128i) __builtin_ia32_blendmd_128_mask ((__v4si) __A,
+  return (__m128i) __builtin_ia32_selectd_128 ((__mmask8) __U,
                 (__v4si) __W,
-                (__mmask8) __U);
+                (__v4si) __A);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_mask_blend_epi32 (__mmask8 __U, __m256i __A, __m256i __W) {
-  return (__m256i) __builtin_ia32_blendmd_256_mask ((__v8si) __A,
+  return (__m256i) __builtin_ia32_selectd_256 ((__mmask8) __U,
                 (__v8si) __W,
-                (__mmask8) __U);
+                (__v8si) __A);
 }
 
 static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_mask_blend_pd (__mmask8 __U, __m128d __A, __m128d __W) {
-  return (__m128d) __builtin_ia32_blendmpd_128_mask ((__v2df) __A,
+  return (__m128d) __builtin_ia32_selectpd_128 ((__mmask8) __U,
                  (__v2df) __W,
-                 (__mmask8) __U);
+                 (__v2df) __A);
 }
 
 static __inline__ __m256d __DEFAULT_FN_ATTRS
 _mm256_mask_blend_pd (__mmask8 __U, __m256d __A, __m256d __W) {
-  return (__m256d) __builtin_ia32_blendmpd_256_mask ((__v4df) __A,
+  return (__m256d) __builtin_ia32_selectpd_256 ((__mmask8) __U,
                  (__v4df) __W,
-                 (__mmask8) __U);
+                 (__v4df) __A);
 }
 
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_mask_blend_ps (__mmask8 __U, __m128 __A, __m128 __W) {
-  return (__m128) __builtin_ia32_blendmps_128_mask ((__v4sf) __A,
+  return (__m128) __builtin_ia32_selectps_128 ((__mmask8) __U,
                 (__v4sf) __W,
-                (__mmask8) __U);
+                (__v4sf) __A);
 }
 
 static __inline__ __m256 __DEFAULT_FN_ATTRS
 _mm256_mask_blend_ps (__mmask8 __U, __m256 __A, __m256 __W) {
-  return (__m256) __builtin_ia32_blendmps_256_mask ((__v8sf) __A,
+  return (__m256) __builtin_ia32_selectps_256 ((__mmask8) __U,
                 (__v8sf) __W,
-                (__mmask8) __U);
+                (__v8sf) __A);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_mask_blend_epi64 (__mmask8 __U, __m128i __A, __m128i __W) {
-  return (__m128i) __builtin_ia32_blendmq_128_mask ((__v2di) __A,
+  return (__m128i) __builtin_ia32_selectq_128 ((__mmask8) __U,
                 (__v2di) __W,
-                (__mmask8) __U);
+                (__v2di) __A);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_mask_blend_epi64 (__mmask8 __U, __m256i __A, __m256i __W) {
-  return (__m256i) __builtin_ia32_blendmq_256_mask ((__v4di) __A,
+  return (__m256i) __builtin_ia32_selectq_256 ((__mmask8) __U,
                 (__v4di) __W,
-                (__mmask8) __U);
+                (__v4di) __A);
 }
 
 static __inline__ __m128d __DEFAULT_FN_ATTRS
@@ -3833,61 +3753,79 @@ _mm256_maskz_min_epu64 (__mmask8 __M, __m256i __A, __m256i __B) {
               __M);
 }
 
-#define _mm_roundscale_pd(__A, __imm) __extension__ ({ \
-  (__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df) __A, \
-                   __imm, (__v2df) _mm_setzero_pd (), (__mmask8) -1); })
+#define _mm_roundscale_pd(A, imm) __extension__ ({ \
+  (__m128d)__builtin_ia32_rndscalepd_128_mask((__v2df)(__m128d)(A), \
+                                              (int)(imm), \
+                                              (__v2df)_mm_setzero_pd(), \
+                                              (__mmask8)-1); })
 
 
-#define _mm_mask_roundscale_pd(__W, __U, __A, __imm) __extension__ ({ \
-  (__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df) __A, __imm, \
-                   (__v2df) __W, (__mmask8) __U); })
+#define _mm_mask_roundscale_pd(W, U, A, imm) __extension__ ({ \
+  (__m128d)__builtin_ia32_rndscalepd_128_mask((__v2df)(__m128d)(A), \
+                                              (int)(imm), \
+                                              (__v2df)(__m128d)(W), \
+                                              (__mmask8)(U)); })
 
 
-#define _mm_maskz_roundscale_pd(__U, __A, __imm) __extension__ ({ \
-  (__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df) __A, __imm, \
-                   (__v2df) _mm_setzero_pd (), (__mmask8) __U); })
+#define _mm_maskz_roundscale_pd(U, A, imm) __extension__ ({ \
+  (__m128d)__builtin_ia32_rndscalepd_128_mask((__v2df)(__m128d)(A), \
+                                              (int)(imm), \
+                                              (__v2df)_mm_setzero_pd(), \
+                                              (__mmask8)(U)); })
 
 
-#define _mm256_roundscale_pd(__A, __imm) __extension__ ({ \
-  (__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df) __A, __imm, \
-                   (__v4df) _mm256_setzero_pd (), (__mmask8) -1); })
+#define _mm256_roundscale_pd(A, imm) __extension__ ({ \
+  (__m256d)__builtin_ia32_rndscalepd_256_mask((__v4df)(__m256d)(A), \
+                                              (int)(imm), \
+                                              (__v4df)_mm256_setzero_pd(), \
+                                              (__mmask8)-1); })
 
 
-#define _mm256_mask_roundscale_pd(__W, __U, __A, __imm) __extension__ ({ \
-  (__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df) __A, __imm, \
-                   (__v4df) __W, (__mmask8) __U); })
+#define _mm256_mask_roundscale_pd(W, U, A, imm) __extension__ ({ \
+  (__m256d)__builtin_ia32_rndscalepd_256_mask((__v4df)(__m256d)(A), \
+                                              (int)(imm), \
+                                              (__v4df)(__m256d)(W), \
+                                              (__mmask8)(U)); })
 
 
-#define _mm256_maskz_roundscale_pd(__U, __A, __imm)  __extension__ ({ \
-  (__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df) __A, __imm, \
-                   (__v4df) _mm256_setzero_pd(), (__mmask8) __U); })
+#define _mm256_maskz_roundscale_pd(U, A, imm)  __extension__ ({ \
+  (__m256d)__builtin_ia32_rndscalepd_256_mask((__v4df)(__m256d)(A), \
+                                              (int)(imm), \
+                                              (__v4df)_mm256_setzero_pd(), \
+                                              (__mmask8)(U)); })
 
-#define _mm_roundscale_ps(__A, __imm)  __extension__ ({ \
-  (__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf) __A, __imm, \
-                  (__v4sf) _mm_setzero_ps(), (__mmask8) -1); })
+#define _mm_roundscale_ps(A, imm)  __extension__ ({ \
+  (__m128)__builtin_ia32_rndscaleps_128_mask((__v4sf)(__m128)(A), (int)(imm), \
+                                             (__v4sf)_mm_setzero_ps(), \
+                                             (__mmask8)-1); })
 
 
-#define _mm_mask_roundscale_ps(__W, __U, __A, __imm)  __extension__ ({ \
-  (__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf) __A, __imm, \
-                  (__v4sf) __W, (__mmask8) __U); })
+#define _mm_mask_roundscale_ps(W, U, A, imm)  __extension__ ({ \
+  (__m128)__builtin_ia32_rndscaleps_128_mask((__v4sf)(__m128)(A), (int)(imm), \
+                                             (__v4sf)(__m128)(W), \
+                                             (__mmask8)(U)); })
 
 
-#define _mm_maskz_roundscale_ps(__U, __A, __imm)  __extension__ ({ \
-  (__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf) __A, __imm, \
-                  (__v4sf) _mm_setzero_ps(), (__mmask8) __U); })
+#define _mm_maskz_roundscale_ps(U, A, imm)  __extension__ ({ \
+  (__m128)__builtin_ia32_rndscaleps_128_mask((__v4sf)(__m128)(A), (int)(imm), \
+                                             (__v4sf)_mm_setzero_ps(), \
+                                             (__mmask8)(U)); })
 
-#define _mm256_roundscale_ps(__A, __imm)  __extension__ ({ \
-  (__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf) __A,__imm, \
-                  (__v8sf) _mm256_setzero_ps(), (__mmask8) -1); })
+#define _mm256_roundscale_ps(A, imm)  __extension__ ({ \
+  (__m256)__builtin_ia32_rndscaleps_256_mask((__v8sf)(__m256)(A), (int)(imm), \
+                                             (__v8sf)_mm256_setzero_ps(), \
+                                             (__mmask8)-1); })
 
-#define _mm256_mask_roundscale_ps(__W, __U, __A,__imm)  __extension__ ({ \
-  (__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf) __A, __imm, \
-                  (__v8sf) __W, (__mmask8) __U); })
+#define _mm256_mask_roundscale_ps(W, U, A, imm)  __extension__ ({ \
+  (__m256)__builtin_ia32_rndscaleps_256_mask((__v8sf)(__m256)(A), (int)(imm), \
+                                             (__v8sf)(__m256)(W), \
+                                             (__mmask8)(U)); })
 
 
-#define _mm256_maskz_roundscale_ps(__U, __A, __imm)  __extension__ ({ \
-  (__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf) __A, __imm, \
-                  (__v8sf) _mm256_setzero_ps(), (__mmask8) __U); })
+#define _mm256_maskz_roundscale_ps(U, A, imm)  __extension__ ({ \
+  (__m256)__builtin_ia32_rndscaleps_256_mask((__v8sf)(__m256)(A), (int)(imm), \
+                                             (__v8sf)_mm256_setzero_ps(), \
+                                             (__mmask8)(U)); })
 
 static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_scalef_pd (__m128d __A, __m128d __B) {
@@ -3996,153 +3934,165 @@ _mm256_maskz_scalef_ps (__mmask8 __U, __m256 __A, __m256 __B) {
                (__mmask8) __U);
 }
 
-#define _mm_i64scatter_pd(__addr,__index, __v1, __scale) __extension__ ({ \
-  __builtin_ia32_scatterdiv2df(__addr, (__mmask8) 0xFF, (__v2di) __index, \
-                              (__v2df) __v1, __scale); })
-
-#define _mm_mask_i64scatter_pd(__addr, __mask, __index, __v1, \
-                               __scale) __extension__ ({ \
-  __builtin_ia32_scatterdiv2df (__addr, __mask, (__v2di) __index, \
-                               (__v2df) __v1, __scale); })
-
-
-#define _mm_i64scatter_epi64(__addr, __index, __v1, __scale) __extension__ ({ \
-  __builtin_ia32_scatterdiv2di (__addr, (__mmask8) 0xFF, \
-        (__v2di) __index, (__v2di) __v1, __scale); })
-
-#define _mm_mask_i64scatter_epi64(__addr, __mask, __index, __v1,\
-                                  __scale) __extension__ ({ \
-  __builtin_ia32_scatterdiv2di (__addr, __mask, (__v2di) __index,\
-        (__v2di) __v1, __scale); })
-
-#define _mm256_i64scatter_pd(__addr, __index, __v1, __scale) __extension__ ({ \
-  __builtin_ia32_scatterdiv4df (__addr, (__mmask8) 0xFF,\
-        (__v4di) __index, (__v4df) __v1, __scale); })
-
-#define _mm256_mask_i64scatter_pd(__addr, __mask, __index, __v1,\
-                                   __scale) __extension__ ({ \
-  __builtin_ia32_scatterdiv4df (__addr, __mask, (__v4di) __index,\
-        (__v4df) __v1, __scale); })
-
-#define _mm256_i64scatter_epi64(__addr, __index, __v1, __scale) __extension__ ({ \
-  __builtin_ia32_scatterdiv4di (__addr, (__mmask8) 0xFF, (__v4di) __index,\
-                               (__v4di) __v1, __scale); })
-
-#define _mm256_mask_i64scatter_epi64(__addr, __mask, __index, __v1,\
-                                      __scale) __extension__ ({ \
-  __builtin_ia32_scatterdiv4di (__addr, __mask, (__v4di) __index,\
-        (__v4di) __v1, __scale); })
-
-#define _mm_i64scatter_ps(__addr, __index, __v1, __scale) __extension__ ({ \
-  __builtin_ia32_scatterdiv4sf (__addr, (__mmask8) 0xFF,\
-        (__v2di) __index, (__v4sf) __v1, __scale); })
-
-#define _mm_mask_i64scatter_ps(__addr, __mask, __index, __v1, \
-                                __scale) __extension__ ({ \
-  __builtin_ia32_scatterdiv4sf (__addr, __mask, (__v2di) __index,\
-        (__v4sf) __v1, __scale); })
-
-#define _mm_i64scatter_epi32(__addr, __index, __v1, \
-                              __scale) __extension__ ({ \
-  __builtin_ia32_scatterdiv4si (__addr, (__mmask8) 0xFF,\
-        (__v2di) __index, (__v4si) __v1, __scale); })
-
-#define _mm_mask_i64scatter_epi32(__addr, __mask, __index, __v1,\
-         __scale) __extension__ ({ \
-  __builtin_ia32_scatterdiv4si (__addr, __mask, (__v2di) __index,\
-        (__v4si) __v1, __scale); })
-
-#define _mm256_i64scatter_ps(__addr, __index, __v1, __scale) __extension__ ({ \
-  __builtin_ia32_scatterdiv8sf (__addr, (__mmask8) 0xFF, (__v4di) __index, \
-                              (__v4sf) __v1, __scale); })
-
-#define _mm256_mask_i64scatter_ps(__addr, __mask, __index, __v1, \
-                                   __scale) __extension__ ({ \
-  __builtin_ia32_scatterdiv8sf (__addr, __mask, (__v4di) __index, \
-        (__v4sf) __v1, __scale); })
-
-#define _mm256_i64scatter_epi32(__addr, __index, __v1, __scale) __extension__ ({ \
-  __builtin_ia32_scatterdiv8si (__addr, (__mmask8) 0xFF, \
-        (__v4di) __index, (__v4si) __v1, __scale); })
-
-#define _mm256_mask_i64scatter_epi32(__addr, __mask, __index, __v1, \
-                                      __scale) __extension__ ({  \
-  __builtin_ia32_scatterdiv8si(__addr, __mask, (__v4di) __index, \
-        (__v4si) __v1, __scale); })
-
-#define _mm_i32scatter_pd(__addr, __index, __v1,         \
-                          __scale) __extension__ ({      \
-  __builtin_ia32_scattersiv2df (__addr, (__mmask8) 0xFF, \
-        (__v4si) __index, (__v2df) __v1, __scale); })
-
-#define _mm_mask_i32scatter_pd(__addr, __mask, __index, __v1,    \
-                                __scale) __extension__ ({        \
-  __builtin_ia32_scattersiv2df (__addr, __mask, (__v4si) __index,\
-         (__v2df) __v1, __scale); })
-
-#define _mm_i32scatter_epi64(__addr, __index, __v1, __scale) __extension__ ({ \
-  __builtin_ia32_scattersiv2di (__addr, (__mmask8) 0xFF,                       \
-        (__v4si) __index, (__v2di) __v1, __scale); })
-
-#define _mm_mask_i32scatter_epi64(__addr, __mask, __index, __v1, \
-         __scale) __extension__ ({                                \
-  __builtin_ia32_scattersiv2di (__addr, __mask, (__v4si) __index, \
-        (__v2di) __v1, __scale); })
-
-#define _mm256_i32scatter_pd(__addr, __index, __v1, __scale) __extension__ ({ \
-  __builtin_ia32_scattersiv4df (__addr, (__mmask8) 0xFF,                      \
-        (__v4si) __index, (__v4df) __v1, __scale); })
-
-#define _mm256_mask_i32scatter_pd(__addr, __mask, __index, __v1, \
-         __scale) __extension__ ({                                \
-  __builtin_ia32_scattersiv4df (__addr, __mask, (__v4si) __index, \
-        (__v4df) __v1, __scale); })
-
-#define _mm256_i32scatter_epi64(__addr, __index, __v1,    \
-                                __scale) __extension__ ({ \
-  __builtin_ia32_scattersiv4di (__addr, (__mmask8) 0xFF,  \
-        (__v4si) __index, (__v4di) __v1, __scale); })
-
-#define _mm256_mask_i32scatter_epi64(__addr, __mask, __index, __v1, \
-            __scale) __extension__ ({                               \
-  __builtin_ia32_scattersiv4di (__addr, __mask, (__v4si) __index,   \
-        (__v4di) __v1, __scale); })
-
-#define _mm_i32scatter_ps(__addr, __index, __v1, __scale) __extension__ ({ \
-  __builtin_ia32_scattersiv4sf (__addr, (__mmask8) 0xFF,                   \
-        (__v4si) __index, (__v4sf) __v1, __scale); })
-
-#define _mm_mask_i32scatter_ps(__addr, __mask, __index, __v1,     \
-                               __scale) __extension__ ({          \
-  __builtin_ia32_scattersiv4sf (__addr, __mask, (__v4si) __index, \
-        (__v4sf) __v1, __scale); })
-
-#define _mm_i32scatter_epi32(__addr, __index, __v1, __scale) __extension__ ({ \
-  __builtin_ia32_scattersiv4si (__addr, (__mmask8) 0xFF,                       \
-        (__v4si) __index, (__v4si) __v1, __scale); })
-
-#define _mm_mask_i32scatter_epi32(__addr, __mask, __index, __v1, \
-                                  __scale) __extension__ ({      \
-  __builtin_ia32_scattersiv4si (__addr, __mask, (__v4si) __index,\
-        (__v4si) __v1, __scale); })
-
-#define _mm256_i32scatter_ps(__addr, __index, __v1, __scale) __extension__ ({ \
-  __builtin_ia32_scattersiv8sf (__addr, (__mmask8) 0xFF,                      \
-        (__v8si) __index, (__v8sf) __v1, __scale); })
-
-#define _mm256_mask_i32scatter_ps(__addr, __mask, __index, __v1, \
-                                   __scale) __extension__ ({     \
-  __builtin_ia32_scattersiv8sf (__addr, __mask, (__v8si) __index,\
-        (__v8sf) __v1, __scale); })
-
-#define _mm256_i32scatter_epi32(__addr, __index, __v1, __scale) __extension__ ({ \
-  __builtin_ia32_scattersiv8si (__addr, (__mmask8) 0xFF,                         \
-        (__v8si) __index, (__v8si) __v1, __scale); })
-
-#define _mm256_mask_i32scatter_epi32(__addr, __mask, __index, __v1, \
-            __scale) __extension__ ({                                \
-  __builtin_ia32_scattersiv8si (__addr, __mask, (__v8si) __index,    \
-        (__v8si) __v1, __scale); })
+#define _mm_i64scatter_pd(addr, index, v1, scale) __extension__ ({ \
+  __builtin_ia32_scatterdiv2df((double *)(addr), (__mmask8)-1, \
+                               (__v2di)(__m128i)(index), \
+                               (__v2df)(__m128d)(v1), (int)(scale)); })
+
+#define _mm_mask_i64scatter_pd(addr, mask, index, v1, scale) __extension__ ({ \
+  __builtin_ia32_scatterdiv2df((double *)(addr), (__mmask8)(mask), \
+                               (__v2di)(__m128i)(index), \
+                               (__v2df)(__m128d)(v1), (int)(scale)); })
+
+#define _mm_i64scatter_epi64(addr, index, v1, scale) __extension__ ({ \
+  __builtin_ia32_scatterdiv2di((long long *)(addr), (__mmask8)-1, \
+                               (__v2di)(__m128i)(index), \
+                               (__v2di)(__m128i)(v1), (int)(scale)); })
+
+#define _mm_mask_i64scatter_epi64(addr, mask, index, v1, scale) __extension__ ({ \
+  __builtin_ia32_scatterdiv2di((long long *)(addr), (__mmask8)(mask), \
+                               (__v2di)(__m128i)(index), \
+                               (__v2di)(__m128i)(v1), (int)(scale)); })
+
+#define _mm256_i64scatter_pd(addr, index, v1, scale) __extension__ ({ \
+  __builtin_ia32_scatterdiv4df((double *)(addr), (__mmask8)-1, \
+                               (__v4di)(__m256i)(index), \
+                               (__v4df)(__m256d)(v1), (int)(scale)); })
+
+#define _mm256_mask_i64scatter_pd(addr, mask, index, v1, scale) __extension__ ({ \
+  __builtin_ia32_scatterdiv4df((double *)(addr), (__mmask8)(mask), \
+                               (__v4di)(__m256i)(index), \
+                               (__v4df)(__m256d)(v1), (int)(scale)); })
+
+#define _mm256_i64scatter_epi64(addr, index, v1, scale) __extension__ ({ \
+  __builtin_ia32_scatterdiv4di((long long *)(addr), (__mmask8)-1, \
+                               (__v4di)(__m256i)(index), \
+                               (__v4di)(__m256i)(v1), (int)(scale)); })
+
+#define _mm256_mask_i64scatter_epi64(addr, mask, index, v1, scale) __extension__ ({ \
+  __builtin_ia32_scatterdiv4di((long long *)(addr), (__mmask8)(mask), \
+                               (__v4di)(__m256i)(index), \
+                               (__v4di)(__m256i)(v1), (int)(scale)); })
+
+#define _mm_i64scatter_ps(addr, index, v1, scale) __extension__ ({ \
+  __builtin_ia32_scatterdiv4sf((float *)(addr), (__mmask8)-1, \
+                               (__v2di)(__m128i)(index), (__v4sf)(__m128)(v1), \
+                               (int)(scale)); })
+
+#define _mm_mask_i64scatter_ps(addr, mask, index, v1, scale) __extension__ ({ \
+  __builtin_ia32_scatterdiv4sf((float *)(addr), (__mmask8)(mask), \
+                               (__v2di)(__m128i)(index), (__v4sf)(__m128)(v1), \
+                               (int)(scale)); })
+
+#define _mm_i64scatter_epi32(addr, index, v1, scale) __extension__ ({ \
+  __builtin_ia32_scatterdiv4si((int *)(addr), (__mmask8)-1, \
+                               (__v2di)(__m128i)(index), \
+                               (__v4si)(__m128i)(v1), (int)(scale)); })
+
+#define _mm_mask_i64scatter_epi32(addr, mask, index, v1, scale) __extension__ ({ \
+  __builtin_ia32_scatterdiv4si((int *)(addr), (__mmask8)(mask), \
+                               (__v2di)(__m128i)(index), \
+                               (__v4si)(__m128i)(v1), (int)(scale)); })
+
+#define _mm256_i64scatter_ps(addr, index, v1, scale) __extension__ ({ \
+  __builtin_ia32_scatterdiv8sf((float *)(addr), (__mmask8)-1, \
+                               (__v4di)(__m256i)(index), (__v4sf)(__m128)(v1), \
+                               (int)(scale)); })
+
+#define _mm256_mask_i64scatter_ps(addr, mask, index, v1, scale) __extension__ ({ \
+  __builtin_ia32_scatterdiv8sf((float *)(addr), (__mmask8)(mask), \
+                               (__v4di)(__m256i)(index), (__v4sf)(__m128)(v1), \
+                               (int)(scale)); })
+
+#define _mm256_i64scatter_epi32(addr, index, v1, scale) __extension__ ({ \
+  __builtin_ia32_scatterdiv8si((int *)(addr), (__mmask8)-1, \
+                               (__v4di)(__m256i)(index), \
+                               (__v4si)(__m128i)(v1), (int)(scale)); })
+
+#define _mm256_mask_i64scatter_epi32(addr, mask, index, v1, scale) __extension__ ({  \
+  __builtin_ia32_scatterdiv8si((int *)(addr), (__mmask8)(mask), \
+                               (__v4di)(__m256i)(index), \
+                               (__v4si)(__m128i)(v1), (int)(scale)); })
+
+#define _mm_i32scatter_pd(addr, index, v1, scale) __extension__ ({      \
+  __builtin_ia32_scattersiv2df((double *)(addr), (__mmask8)-1, \
+                               (__v4si)(__m128i)(index), \
+                               (__v2df)(__m128d)(v1), (int)(scale)); })
+
+#define _mm_mask_i32scatter_pd(addr, mask, index, v1, scale) __extension__ ({        \
+  __builtin_ia32_scattersiv2df((double *)(addr), (__mmask8)(mask), \
+                               (__v4si)(__m128i)(index), \
+                               (__v2df)(__m128d)(v1), (int)(scale)); })
+
+#define _mm_i32scatter_epi64(addr, index, v1, scale) __extension__ ({ \
+  __builtin_ia32_scattersiv2di((long long *)(addr), (__mmask8)-1, \
+                               (__v4si)(__m128i)(index), \
+                               (__v2di)(__m128i)(v1), (int)(scale)); })
+
+#define _mm_mask_i32scatter_epi64(addr, mask, index, v1, scale) __extension__ ({ \
+  __builtin_ia32_scattersiv2di((long long *)(addr), (__mmask8)(mask), \
+                               (__v4si)(__m128i)(index), \
+                               (__v2di)(__m128i)(v1), (int)(scale)); })
+
+#define _mm256_i32scatter_pd(addr, index, v1, scale) __extension__ ({ \
+  __builtin_ia32_scattersiv4df((double *)(addr), (__mmask8)-1, \
+                               (__v4si)(__m128i)(index), \
+                               (__v4df)(__m256d)(v1), (int)(scale)); })
+
+#define _mm256_mask_i32scatter_pd(addr, mask, index, v1, scale) __extension__ ({ \
+  __builtin_ia32_scattersiv4df((double *)(addr), (__mmask8)(mask), \
+                               (__v4si)(__m128i)(index), \
+                               (__v4df)(__m256d)(v1), (int)(scale)); })
+
+#define _mm256_i32scatter_epi64(addr, index, v1, scale) __extension__ ({ \
+  __builtin_ia32_scattersiv4di((long long *)(addr), (__mmask8)-1, \
+                               (__v4si)(__m128i)(index), \
+                               (__v4di)(__m256i)(v1), (int)(scale)); })
+
+#define _mm256_mask_i32scatter_epi64(addr, mask, index, v1, scale) __extension__ ({ \
+  __builtin_ia32_scattersiv4di((long long *)(addr), (__mmask8)(mask), \
+                               (__v4si)(__m128i)(index), \
+                               (__v4di)(__m256i)(v1), (int)(scale)); })
+
+#define _mm_i32scatter_ps(addr, index, v1, scale) __extension__ ({ \
+  __builtin_ia32_scattersiv4sf((float *)(addr), (__mmask8)-1, \
+                               (__v4si)(__m128i)(index), (__v4sf)(__m128)(v1), \
+                               (int)(scale)); })
+
+#define _mm_mask_i32scatter_ps(addr, mask, index, v1, scale) __extension__ ({ \
+  __builtin_ia32_scattersiv4sf((float *)(addr), (__mmask8)(mask), \
+                               (__v4si)(__m128i)(index), (__v4sf)(__m128)(v1), \
+                               (int)(scale)); })
+
+#define _mm_i32scatter_epi32(addr, index, v1, scale) __extension__ ({ \
+  __builtin_ia32_scattersiv4si((int *)(addr), (__mmask8)-1, \
+                               (__v4si)(__m128i)(index), \
+                               (__v4si)(__m128i)(v1), (int)(scale)); })
+
+#define _mm_mask_i32scatter_epi32(addr, mask, index, v1, scale) __extension__ ({ \
+  __builtin_ia32_scattersiv4si((int *)(addr), (__mmask8)(mask), \
+                               (__v4si)(__m128i)(index), \
+                               (__v4si)(__m128i)(v1), (int)(scale)); })
+
+#define _mm256_i32scatter_ps(addr, index, v1, scale) __extension__ ({ \
+  __builtin_ia32_scattersiv8sf((float *)(addr), (__mmask8)-1, \
+                               (__v8si)(__m256i)(index), (__v8sf)(__m256)(v1), \
+                               (int)(scale)); })
+
+#define _mm256_mask_i32scatter_ps(addr, mask, index, v1, scale) __extension__ ({ \
+  __builtin_ia32_scattersiv8sf((float *)(addr), (__mmask8)(mask), \
+                               (__v8si)(__m256i)(index), (__v8sf)(__m256)(v1), \
+                               (int)(scale)); })
+
+#define _mm256_i32scatter_epi32(addr, index, v1, scale) __extension__ ({ \
+  __builtin_ia32_scattersiv8si((int *)(addr), (__mmask8)-1, \
+                               (__v8si)(__m256i)(index), \
+                               (__v8si)(__m256i)(v1), (int)(scale)); })
+
+#define _mm256_mask_i32scatter_epi32(addr, mask, index, v1, scale) __extension__ ({ \
+  __builtin_ia32_scattersiv8si((int *)(addr), (__mmask8)(mask), \
+                               (__v8si)(__m256i)(index), \
+                               (__v8si)(__m256i)(v1), (int)(scale)); })
 
 static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_mask_sqrt_pd (__m128d __W, __mmask8 __U, __m128d __A) {
@@ -4600,7 +4550,4621 @@ _mm256_maskz_permutex2var_epi64 (__mmask8 __U, __m256i __A,
               __U);
 }
 
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_cvtepi8_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
+{
+  return (__m128i) __builtin_ia32_pmovsxbd128_mask ((__v16qi) __A,
+                (__v4si) __W,
+                (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_cvtepi8_epi32 (__mmask8 __U, __m128i __A)
+{
+  return (__m128i) __builtin_ia32_pmovsxbd128_mask ((__v16qi) __A,
+                (__v4si)
+                _mm_setzero_si128 (),
+                (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_cvtepi8_epi32 (__m256i __W, __mmask8 __U, __m128i __A)
+{
+  return (__m256i) __builtin_ia32_pmovsxbd256_mask ((__v16qi) __A,
+                (__v8si) __W,
+                (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_cvtepi8_epi32 (__mmask8 __U, __m128i __A)
+{
+  return (__m256i) __builtin_ia32_pmovsxbd256_mask ((__v16qi) __A,
+                (__v8si)
+                _mm256_setzero_si256 (),
+                (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_cvtepi8_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
+{
+  return (__m128i) __builtin_ia32_pmovsxbq128_mask ((__v16qi) __A,
+                (__v2di) __W,
+                (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_cvtepi8_epi64 (__mmask8 __U, __m128i __A)
+{
+  return (__m128i) __builtin_ia32_pmovsxbq128_mask ((__v16qi) __A,
+                (__v2di)
+                _mm_setzero_si128 (),
+                (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_cvtepi8_epi64 (__m256i __W, __mmask8 __U, __m128i __A)
+{
+  return (__m256i) __builtin_ia32_pmovsxbq256_mask ((__v16qi) __A,
+                (__v4di) __W,
+                (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_cvtepi8_epi64 (__mmask8 __U, __m128i __A)
+{
+  return (__m256i) __builtin_ia32_pmovsxbq256_mask ((__v16qi) __A,
+                (__v4di)
+                _mm256_setzero_si256 (),
+                (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_cvtepi32_epi64 (__m128i __W, __mmask8 __U, __m128i __X)
+{
+  return (__m128i) __builtin_ia32_pmovsxdq128_mask ((__v4si) __X,
+                (__v2di) __W,
+                (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_cvtepi32_epi64 (__mmask8 __U, __m128i __X)
+{
+  return (__m128i) __builtin_ia32_pmovsxdq128_mask ((__v4si) __X,
+                (__v2di)
+                _mm_setzero_si128 (),
+                (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_cvtepi32_epi64 (__m256i __W, __mmask8 __U, __m128i __X)
+{
+  return (__m256i) __builtin_ia32_pmovsxdq256_mask ((__v4si) __X,
+                (__v4di) __W,
+                (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_cvtepi32_epi64 (__mmask8 __U, __m128i __X)
+{
+  return (__m256i) __builtin_ia32_pmovsxdq256_mask ((__v4si) __X,
+                (__v4di)
+                _mm256_setzero_si256 (),
+                (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_cvtepi16_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
+{
+  return (__m128i) __builtin_ia32_pmovsxwd128_mask ((__v8hi) __A,
+                (__v4si) __W,
+                (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_cvtepi16_epi32 (__mmask8 __U, __m128i __A)
+{
+  return (__m128i) __builtin_ia32_pmovsxwd128_mask ((__v8hi) __A,
+                (__v4si)
+                _mm_setzero_si128 (),
+                (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_cvtepi16_epi32 (__m256i __W, __mmask8 __U, __m128i __A)
+{
+  return (__m256i) __builtin_ia32_pmovsxwd256_mask ((__v8hi) __A,
+                (__v8si) __W,
+                (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_cvtepi16_epi32 (__mmask8 __U, __m128i __A)
+{
+  return (__m256i) __builtin_ia32_pmovsxwd256_mask ((__v8hi) __A,
+                (__v8si)
+                _mm256_setzero_si256 (),
+                (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_cvtepi16_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
+{
+  return (__m128i) __builtin_ia32_pmovsxwq128_mask ((__v8hi) __A,
+                (__v2di) __W,
+                (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_cvtepi16_epi64 (__mmask8 __U, __m128i __A)
+{
+  return (__m128i) __builtin_ia32_pmovsxwq128_mask ((__v8hi) __A,
+                (__v2di)
+                _mm_setzero_si128 (),
+                (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_cvtepi16_epi64 (__m256i __W, __mmask8 __U, __m128i __A)
+{
+  return (__m256i) __builtin_ia32_pmovsxwq256_mask ((__v8hi) __A,
+                (__v4di) __W,
+                (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_cvtepi16_epi64 (__mmask8 __U, __m128i __A)
+{
+  return (__m256i) __builtin_ia32_pmovsxwq256_mask ((__v8hi) __A,
+                (__v4di)
+                _mm256_setzero_si256 (),
+                (__mmask8) __U);
+}
+
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_cvtepu8_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
+{
+  return (__m128i) __builtin_ia32_pmovzxbd128_mask ((__v16qi) __A,
+                (__v4si) __W,
+                (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_cvtepu8_epi32 (__mmask8 __U, __m128i __A)
+{
+  return (__m128i) __builtin_ia32_pmovzxbd128_mask ((__v16qi) __A,
+                (__v4si)
+                _mm_setzero_si128 (),
+                (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_cvtepu8_epi32 (__m256i __W, __mmask8 __U, __m128i __A)
+{
+  return (__m256i) __builtin_ia32_pmovzxbd256_mask ((__v16qi) __A,
+                (__v8si) __W,
+                (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_cvtepu8_epi32 (__mmask8 __U, __m128i __A)
+{
+  return (__m256i) __builtin_ia32_pmovzxbd256_mask ((__v16qi) __A,
+                (__v8si)
+                _mm256_setzero_si256 (),
+                (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_cvtepu8_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
+{
+  return (__m128i) __builtin_ia32_pmovzxbq128_mask ((__v16qi) __A,
+                (__v2di) __W,
+                (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_cvtepu8_epi64 (__mmask8 __U, __m128i __A)
+{
+  return (__m128i) __builtin_ia32_pmovzxbq128_mask ((__v16qi) __A,
+                (__v2di)
+                _mm_setzero_si128 (),
+                (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_cvtepu8_epi64 (__m256i __W, __mmask8 __U, __m128i __A)
+{
+  return (__m256i) __builtin_ia32_pmovzxbq256_mask ((__v16qi) __A,
+                (__v4di) __W,
+                (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_cvtepu8_epi64 (__mmask8 __U, __m128i __A)
+{
+  return (__m256i) __builtin_ia32_pmovzxbq256_mask ((__v16qi) __A,
+                (__v4di)
+                _mm256_setzero_si256 (),
+                (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_cvtepu32_epi64 (__m128i __W, __mmask8 __U, __m128i __X)
+{
+  return (__m128i) __builtin_ia32_pmovzxdq128_mask ((__v4si) __X,
+                (__v2di) __W,
+                (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_cvtepu32_epi64 (__mmask8 __U, __m128i __X)
+{
+  return (__m128i) __builtin_ia32_pmovzxdq128_mask ((__v4si) __X,
+                (__v2di)
+                _mm_setzero_si128 (),
+                (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_cvtepu32_epi64 (__m256i __W, __mmask8 __U, __m128i __X)
+{
+  return (__m256i) __builtin_ia32_pmovzxdq256_mask ((__v4si) __X,
+                (__v4di) __W,
+                (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_cvtepu32_epi64 (__mmask8 __U, __m128i __X)
+{
+  return (__m256i) __builtin_ia32_pmovzxdq256_mask ((__v4si) __X,
+                (__v4di)
+                _mm256_setzero_si256 (),
+                (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_cvtepu16_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
+{
+  return (__m128i) __builtin_ia32_pmovzxwd128_mask ((__v8hi) __A,
+                (__v4si) __W,
+                (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_cvtepu16_epi32 (__mmask8 __U, __m128i __A)
+{
+  return (__m128i) __builtin_ia32_pmovzxwd128_mask ((__v8hi) __A,
+                (__v4si)
+                _mm_setzero_si128 (),
+                (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_cvtepu16_epi32 (__m256i __W, __mmask8 __U, __m128i __A)
+{
+  return (__m256i) __builtin_ia32_pmovzxwd256_mask ((__v8hi) __A,
+                (__v8si) __W,
+                (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_cvtepu16_epi32 (__mmask8 __U, __m128i __A)
+{
+  return (__m256i) __builtin_ia32_pmovzxwd256_mask ((__v8hi) __A,
+                (__v8si)
+                _mm256_setzero_si256 (),
+                (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_cvtepu16_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
+{
+  return (__m128i) __builtin_ia32_pmovzxwq128_mask ((__v8hi) __A,
+                (__v2di) __W,
+                (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_cvtepu16_epi64 (__mmask8 __U, __m128i __A)
+{
+  return (__m128i) __builtin_ia32_pmovzxwq128_mask ((__v8hi) __A,
+                (__v2di)
+                _mm_setzero_si128 (),
+                (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_cvtepu16_epi64 (__m256i __W, __mmask8 __U, __m128i __A)
+{
+  return (__m256i) __builtin_ia32_pmovzxwq256_mask ((__v8hi) __A,
+                (__v4di) __W,
+                (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_cvtepu16_epi64 (__mmask8 __U, __m128i __A)
+{
+  return (__m256i) __builtin_ia32_pmovzxwq256_mask ((__v8hi) __A,
+                (__v4di)
+                _mm256_setzero_si256 (),
+                (__mmask8) __U);
+}
+
+
+#define _mm_rol_epi32(a, b) __extension__ ({\
+  (__m128i)__builtin_ia32_prold128_mask((__v4si)(__m128i)(a), (int)(b), \
+                                        (__v4si)_mm_setzero_si128(), \
+                                        (__mmask8)-1); })
+
+#define _mm_mask_rol_epi32(w, u, a, b) __extension__ ({\
+  (__m128i)__builtin_ia32_prold128_mask((__v4si)(__m128i)(a), (int)(b), \
+                                        (__v4si)(__m128i)(w), (__mmask8)(u)); })
+
+#define _mm_maskz_rol_epi32(u, a, b) __extension__ ({\
+  (__m128i)__builtin_ia32_prold128_mask((__v4si)(__m128i)(a), (int)(b), \
+                                        (__v4si)_mm_setzero_si128(), \
+                                        (__mmask8)(u)); })
+
+#define _mm256_rol_epi32(a, b) __extension__ ({\
+  (__m256i)__builtin_ia32_prold256_mask((__v8si)(__m256i)(a), (int)(b), \
+                                        (__v8si)_mm256_setzero_si256(), \
+                                        (__mmask8)-1); })
+
+#define _mm256_mask_rol_epi32(w, u, a, b) __extension__ ({\
+  (__m256i)__builtin_ia32_prold256_mask((__v8si)(__m256i)(a), (int)(b), \
+                                        (__v8si)(__m256i)(w), (__mmask8)(u)); })
+
+#define _mm256_maskz_rol_epi32(u, a, b) __extension__ ({\
+  (__m256i)__builtin_ia32_prold256_mask((__v8si)(__m256i)(a), (int)(b), \
+                                        (__v8si)_mm256_setzero_si256(), \
+                                        (__mmask8)(u)); })
+
+#define _mm_rol_epi64(a, b) __extension__ ({\
+  (__m128i)__builtin_ia32_prolq128_mask((__v2di)(__m128i)(a), (int)(b), \
+                                        (__v2di)_mm_setzero_di(), \
+                                        (__mmask8)-1); })
+
+#define _mm_mask_rol_epi64(w, u, a, b) __extension__ ({\
+  (__m128i)__builtin_ia32_prolq128_mask((__v2di)(__m128i)(a), (int)(b), \
+                                        (__v2di)(__m128i)(w), (__mmask8)(u)); })
+
+#define _mm_maskz_rol_epi64(u, a, b) __extension__ ({\
+  (__m128i)__builtin_ia32_prolq128_mask((__v2di)(__m128i)(a), (int)(b), \
+                                        (__v2di)_mm_setzero_di(), \
+                                        (__mmask8)(u)); })
+
+#define _mm256_rol_epi64(a, b) __extension__ ({\
+  (__m256i)__builtin_ia32_prolq256_mask((__v4di)(__m256i)(a), (int)(b), \
+                                        (__v4di)_mm256_setzero_si256(), \
+                                        (__mmask8)-1); })
+
+#define _mm256_mask_rol_epi64(w, u, a, b) __extension__ ({\
+  (__m256i)__builtin_ia32_prolq256_mask((__v4di)(__m256i)(a), (int)(b), \
+                                        (__v4di)(__m256i)(w), (__mmask8)(u)); })
+
+#define _mm256_maskz_rol_epi64(u, a, b) __extension__ ({\
+  (__m256i)__builtin_ia32_prolq256_mask((__v4di)(__m256i)(a), (int)(b), \
+                                        (__v4di)_mm256_setzero_si256(), \
+                                        (__mmask8)(u)); })
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_rolv_epi32 (__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_prolvd128_mask ((__v4si) __A,
+              (__v4si) __B,
+              (__v4si)
+              _mm_setzero_si128 (),
+              (__mmask8) -1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_rolv_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
+         __m128i __B)
+{
+  return (__m128i) __builtin_ia32_prolvd128_mask ((__v4si) __A,
+              (__v4si) __B,
+              (__v4si) __W,
+              (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_rolv_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_prolvd128_mask ((__v4si) __A,
+              (__v4si) __B,
+              (__v4si)
+              _mm_setzero_si128 (),
+              (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_rolv_epi32 (__m256i __A, __m256i __B)
+{
+  return (__m256i) __builtin_ia32_prolvd256_mask ((__v8si) __A,
+              (__v8si) __B,
+              (__v8si)
+              _mm256_setzero_si256 (),
+              (__mmask8) -1);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_rolv_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
+      __m256i __B)
+{
+  return (__m256i) __builtin_ia32_prolvd256_mask ((__v8si) __A,
+              (__v8si) __B,
+              (__v8si) __W,
+              (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_rolv_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
+{
+  return (__m256i) __builtin_ia32_prolvd256_mask ((__v8si) __A,
+              (__v8si) __B,
+              (__v8si)
+              _mm256_setzero_si256 (),
+              (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_rolv_epi64 (__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_prolvq128_mask ((__v2di) __A,
+              (__v2di) __B,
+              (__v2di)
+              _mm_setzero_di (),
+              (__mmask8) -1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_rolv_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
+         __m128i __B)
+{
+  return (__m128i) __builtin_ia32_prolvq128_mask ((__v2di) __A,
+              (__v2di) __B,
+              (__v2di) __W,
+              (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_rolv_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_prolvq128_mask ((__v2di) __A,
+              (__v2di) __B,
+              (__v2di)
+              _mm_setzero_di (),
+              (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_rolv_epi64 (__m256i __A, __m256i __B)
+{
+  return (__m256i) __builtin_ia32_prolvq256_mask ((__v4di) __A,
+              (__v4di) __B,
+              (__v4di)
+              _mm256_setzero_si256 (),
+              (__mmask8) -1);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_rolv_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
+      __m256i __B)
+{
+  return (__m256i) __builtin_ia32_prolvq256_mask ((__v4di) __A,
+              (__v4di) __B,
+              (__v4di) __W,
+              (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_rolv_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
+{
+  return (__m256i) __builtin_ia32_prolvq256_mask ((__v4di) __A,
+              (__v4di) __B,
+              (__v4di)
+              _mm256_setzero_si256 (),
+              (__mmask8) __U);
+}
+
+#define _mm_ror_epi32(A, B) __extension__ ({ \
+  (__m128i)__builtin_ia32_prord128_mask((__v4si)(__m128i)(A), (int)(B), \
+                                        (__v4si)_mm_setzero_si128(), \
+                                        (__mmask8)-1); })
+
+#define _mm_mask_ror_epi32(W, U, A, B) __extension__ ({ \
+  (__m128i)__builtin_ia32_prord128_mask((__v4si)(__m128i)(A), (int)(B), \
+                                        (__v4si)(__m128i)(W), (__mmask8)(U)); })
+
+#define _mm_maskz_ror_epi32(U, A, B) __extension__ ({ \
+  (__m128i)__builtin_ia32_prord128_mask((__v4si)(__m128i)(A), (int)(B), \
+                                        (__v4si)_mm_setzero_si128(), \
+                                        (__mmask8)(U)); })
+
+#define _mm256_ror_epi32(A, B) __extension__ ({ \
+  (__m256i)__builtin_ia32_prord256_mask((__v8si)(__m256i)(A), (int)(B), \
+                                        (__v8si)_mm256_setzero_si256(), \
+                                        (__mmask8)-1); })
+
+#define _mm256_mask_ror_epi32(W, U, A, B) __extension__ ({ \
+  (__m256i)__builtin_ia32_prord256_mask((__v8si)(__m256i)(A), (int)(B), \
+                                        (__v8si)(__m256i)(W), (__mmask8)(U)); })
+
+#define _mm256_maskz_ror_epi32(U, A, B) __extension__ ({ \
+  (__m256i)__builtin_ia32_prord256_mask((__v8si)(__m256i)(A), (int)(B), \
+                                        (__v8si)_mm256_setzero_si256(), \
+                                        (__mmask8)(U)); })
+
+#define _mm_ror_epi64(A, B) __extension__ ({ \
+  (__m128i)__builtin_ia32_prorq128_mask((__v2di)(__m128i)(A), (int)(B), \
+                                        (__v2di)_mm_setzero_di(), \
+                                        (__mmask8)-1); })
+
+#define _mm_mask_ror_epi64(W, U, A, B) __extension__ ({ \
+  (__m128i)__builtin_ia32_prorq128_mask((__v2di)(__m128i)(A), (int)(B), \
+                                        (__v2di)(__m128i)(W), (__mmask8)(U)); })
+
+#define _mm_maskz_ror_epi64(U, A, B) __extension__ ({ \
+  (__m128i)__builtin_ia32_prorq128_mask((__v2di)(__m128i)(A), (int)(B), \
+                                        (__v2di)_mm_setzero_di(), \
+                                        (__mmask8)(U)); })
+
+#define _mm256_ror_epi64(A, B) __extension__ ({ \
+  (__m256i)__builtin_ia32_prorq256_mask((__v4di)(__m256i)(A), (int)(B), \
+                                        (__v4di)_mm256_setzero_si256(), \
+                                        (__mmask8)-1); })
+
+#define _mm256_mask_ror_epi64(W, U, A, B) __extension__ ({ \
+  (__m256i)__builtin_ia32_prorq256_mask((__v4di)(__m256i)(A), (int)(B), \
+                                        (__v4di)(__m256i)(W), (__mmask8)(U)); })
+
+#define _mm256_maskz_ror_epi64(U, A, B) __extension__ ({ \
+  (__m256i)__builtin_ia32_prorq256_mask((__v4di)(__m256i)(A), (int)(B), \
+                                        (__v4di)_mm256_setzero_si256(), \
+                                        (__mmask8)(U)); })
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_sll_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
+        __m128i __B)
+{
+  return (__m128i) __builtin_ia32_pslld128_mask ((__v4si) __A,
+             (__v4si) __B,
+             (__v4si) __W,
+             (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_sll_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_pslld128_mask ((__v4si) __A,
+             (__v4si) __B,
+             (__v4si)
+             _mm_setzero_si128 (),
+             (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_sll_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
+           __m128i __B)
+{
+  return (__m256i) __builtin_ia32_pslld256_mask ((__v8si) __A,
+             (__v4si) __B,
+             (__v8si) __W,
+             (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_sll_epi32 (__mmask8 __U, __m256i __A, __m128i __B)
+{
+  return (__m256i) __builtin_ia32_pslld256_mask ((__v8si) __A,
+             (__v4si) __B,
+             (__v8si)
+             _mm256_setzero_si256 (),
+             (__mmask8) __U);
+}
+
+#define _mm_mask_slli_epi32(W, U, A, B) __extension__ ({ \
+  (__m128i)__builtin_ia32_pslldi128_mask((__v4si)(__m128i)(A), (int)(B), \
+                                         (__v4si)(__m128i)(W), \
+                                         (__mmask8)(U)); })
+
+#define _mm_maskz_slli_epi32(U, A, B) __extension__ ({ \
+  (__m128i)__builtin_ia32_pslldi128_mask((__v4si)(__m128i)(A), (int)(B), \
+                                         (__v4si)_mm_setzero_si128(), \
+                                         (__mmask8)(U)); })
+
+#define _mm256_mask_slli_epi32(W, U, A, B) __extension__ ({ \
+  (__m256i)__builtin_ia32_pslldi256_mask((__v8si)(__m256i)(A), (int)(B), \
+                                         (__v8si)(__m256i)(W), \
+                                         (__mmask8)(U)); })
+
+#define _mm256_maskz_slli_epi32(U, A, B) __extension__ ({ \
+  (__m256i)__builtin_ia32_pslldi256_mask((__v8si)(__m256i)(A), (int)(B), \
+                                         (__v8si)_mm256_setzero_si256(), \
+                                         (__mmask8)(U)); })
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_sll_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
+        __m128i __B)
+{
+  return (__m128i) __builtin_ia32_psllq128_mask ((__v2di) __A,
+             (__v2di) __B,
+             (__v2di) __W,
+             (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_sll_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_psllq128_mask ((__v2di) __A,
+             (__v2di) __B,
+             (__v2di)
+             _mm_setzero_di (),
+             (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_sll_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
+           __m128i __B)
+{
+  return (__m256i) __builtin_ia32_psllq256_mask ((__v4di) __A,
+             (__v2di) __B,
+             (__v4di) __W,
+             (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_sll_epi64 (__mmask8 __U, __m256i __A, __m128i __B)
+{
+  return (__m256i) __builtin_ia32_psllq256_mask ((__v4di) __A,
+             (__v2di) __B,
+             (__v4di)
+             _mm256_setzero_si256 (),
+             (__mmask8) __U);
+}
+
+#define _mm_mask_slli_epi64(W, U, A, B) __extension__ ({ \
+  (__m128i)__builtin_ia32_psllqi128_mask((__v2di)(__m128i)(A), (int)(B), \
+                                         (__v2di)(__m128i)(W), \
+                                         (__mmask8)(U)); })
+
+#define _mm_maskz_slli_epi64(U, A, B) __extension__ ({ \
+  (__m128i)__builtin_ia32_psllqi128_mask((__v2di)(__m128i)(A), (int)(B), \
+                                         (__v2di)_mm_setzero_di(), \
+                                         (__mmask8)(U)); })
+
+#define _mm256_mask_slli_epi64(W, U, A, B) __extension__ ({ \
+  (__m256i)__builtin_ia32_psllqi256_mask((__v4di)(__m256i)(A), (int)(B), \
+                                         (__v4di)(__m256i)(W), \
+                                         (__mmask8)(U)); })
+
+#define _mm256_maskz_slli_epi64(U, A, B) __extension__ ({ \
+  (__m256i)__builtin_ia32_psllqi256_mask((__v4di)(__m256i)(A), (int)(B), \
+                                         (__v4di)_mm256_setzero_si256(), \
+                                         (__mmask8)(U)); })
+
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_rorv_epi32 (__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_prorvd128_mask ((__v4si) __A,
+              (__v4si) __B,
+              (__v4si)
+              _mm_setzero_si128 (),
+              (__mmask8) -1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_rorv_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
+         __m128i __B)
+{
+  return (__m128i) __builtin_ia32_prorvd128_mask ((__v4si) __A,
+              (__v4si) __B,
+              (__v4si) __W,
+              (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_rorv_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_prorvd128_mask ((__v4si) __A,
+              (__v4si) __B,
+              (__v4si)
+              _mm_setzero_si128 (),
+              (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_rorv_epi32 (__m256i __A, __m256i __B)
+{
+  return (__m256i) __builtin_ia32_prorvd256_mask ((__v8si) __A,
+              (__v8si) __B,
+              (__v8si)
+              _mm256_setzero_si256 (),
+              (__mmask8) -1);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_rorv_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
+      __m256i __B)
+{
+  return (__m256i) __builtin_ia32_prorvd256_mask ((__v8si) __A,
+              (__v8si) __B,
+              (__v8si) __W,
+              (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_rorv_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
+{
+  return (__m256i) __builtin_ia32_prorvd256_mask ((__v8si) __A,
+              (__v8si) __B,
+              (__v8si)
+              _mm256_setzero_si256 (),
+              (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_rorv_epi64 (__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_prorvq128_mask ((__v2di) __A,
+              (__v2di) __B,
+              (__v2di)
+              _mm_setzero_di (),
+              (__mmask8) -1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_rorv_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
+         __m128i __B)
+{
+  return (__m128i) __builtin_ia32_prorvq128_mask ((__v2di) __A,
+              (__v2di) __B,
+              (__v2di) __W,
+              (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_rorv_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_prorvq128_mask ((__v2di) __A,
+              (__v2di) __B,
+              (__v2di)
+              _mm_setzero_di (),
+              (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_rorv_epi64 (__m256i __A, __m256i __B)
+{
+  return (__m256i) __builtin_ia32_prorvq256_mask ((__v4di) __A,
+              (__v4di) __B,
+              (__v4di)
+              _mm256_setzero_si256 (),
+              (__mmask8) -1);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_rorv_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
+      __m256i __B)
+{
+  return (__m256i) __builtin_ia32_prorvq256_mask ((__v4di) __A,
+              (__v4di) __B,
+              (__v4di) __W,
+              (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_rorv_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
+{
+  return (__m256i) __builtin_ia32_prorvq256_mask ((__v4di) __A,
+              (__v4di) __B,
+              (__v4di)
+              _mm256_setzero_si256 (),
+              (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_sllv_epi64 (__m128i __W, __mmask8 __U, __m128i __X,
+         __m128i __Y)
+{
+  return (__m128i) __builtin_ia32_psllv2di_mask ((__v2di) __X,
+             (__v2di) __Y,
+             (__v2di) __W,
+             (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_sllv_epi64 (__mmask8 __U, __m128i __X, __m128i __Y)
+{
+  return (__m128i) __builtin_ia32_psllv2di_mask ((__v2di) __X,
+             (__v2di) __Y,
+             (__v2di)
+             _mm_setzero_di (),
+             (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_sllv_epi64 (__m256i __W, __mmask8 __U, __m256i __X,
+      __m256i __Y)
+{
+  return (__m256i) __builtin_ia32_psllv4di_mask ((__v4di) __X,
+             (__v4di) __Y,
+             (__v4di) __W,
+             (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_sllv_epi64 (__mmask8 __U, __m256i __X, __m256i __Y)
+{
+  return (__m256i) __builtin_ia32_psllv4di_mask ((__v4di) __X,
+             (__v4di) __Y,
+             (__v4di)
+             _mm256_setzero_si256 (),
+             (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_sllv_epi32 (__m128i __W, __mmask8 __U, __m128i __X,
+         __m128i __Y)
+{
+  return (__m128i) __builtin_ia32_psllv4si_mask ((__v4si) __X,
+             (__v4si) __Y,
+             (__v4si) __W,
+             (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_sllv_epi32 (__mmask8 __U, __m128i __X, __m128i __Y)
+{
+  return (__m128i) __builtin_ia32_psllv4si_mask ((__v4si) __X,
+             (__v4si) __Y,
+             (__v4si)
+             _mm_setzero_si128 (),
+             (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_sllv_epi32 (__m256i __W, __mmask8 __U, __m256i __X,
+      __m256i __Y)
+{
+  return (__m256i) __builtin_ia32_psllv8si_mask ((__v8si) __X,
+             (__v8si) __Y,
+             (__v8si) __W,
+             (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_sllv_epi32 (__mmask8 __U, __m256i __X, __m256i __Y)
+{
+  return (__m256i) __builtin_ia32_psllv8si_mask ((__v8si) __X,
+             (__v8si) __Y,
+             (__v8si)
+             _mm256_setzero_si256 (),
+             (__mmask8) __U);
+}
+
+
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_srlv_epi64 (__m128i __W, __mmask8 __U, __m128i __X,
+         __m128i __Y)
+{
+  return (__m128i) __builtin_ia32_psrlv2di_mask ((__v2di) __X,
+             (__v2di) __Y,
+             (__v2di) __W,
+             (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_srlv_epi64 (__mmask8 __U, __m128i __X, __m128i __Y)
+{
+  return (__m128i) __builtin_ia32_psrlv2di_mask ((__v2di) __X,
+             (__v2di) __Y,
+             (__v2di)
+             _mm_setzero_di (),
+             (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_srlv_epi64 (__m256i __W, __mmask8 __U, __m256i __X,
+      __m256i __Y)
+{
+  return (__m256i) __builtin_ia32_psrlv4di_mask ((__v4di) __X,
+             (__v4di) __Y,
+             (__v4di) __W,
+             (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_srlv_epi64 (__mmask8 __U, __m256i __X, __m256i __Y)
+{
+  return (__m256i) __builtin_ia32_psrlv4di_mask ((__v4di) __X,
+             (__v4di) __Y,
+             (__v4di)
+             _mm256_setzero_si256 (),
+             (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_srlv_epi32 (__m128i __W, __mmask8 __U, __m128i __X,
+         __m128i __Y)
+{
+  return (__m128i) __builtin_ia32_psrlv4si_mask ((__v4si) __X,
+             (__v4si) __Y,
+             (__v4si) __W,
+             (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_srlv_epi32 (__mmask8 __U, __m128i __X, __m128i __Y)
+{
+  return (__m128i) __builtin_ia32_psrlv4si_mask ((__v4si) __X,
+             (__v4si) __Y,
+             (__v4si)
+             _mm_setzero_si128 (),
+             (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_srlv_epi32 (__m256i __W, __mmask8 __U, __m256i __X,
+      __m256i __Y)
+{
+  return (__m256i) __builtin_ia32_psrlv8si_mask ((__v8si) __X,
+             (__v8si) __Y,
+             (__v8si) __W,
+             (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_srlv_epi32 (__mmask8 __U, __m256i __X, __m256i __Y)
+{
+  return (__m256i) __builtin_ia32_psrlv8si_mask ((__v8si) __X,
+             (__v8si) __Y,
+             (__v8si)
+             _mm256_setzero_si256 (),
+             (__mmask8) __U);
+}
+
+
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_srl_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
+        __m128i __B)
+{
+  return (__m128i) __builtin_ia32_psrld128_mask ((__v4si) __A,
+             (__v4si) __B,
+             (__v4si) __W,
+             (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_srl_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_psrld128_mask ((__v4si) __A,
+             (__v4si) __B,
+             (__v4si)
+             _mm_setzero_si128 (),
+             (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_srl_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
+           __m128i __B)
+{
+  return (__m256i) __builtin_ia32_psrld256_mask ((__v8si) __A,
+             (__v4si) __B,
+             (__v8si) __W,
+             (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_srl_epi32 (__mmask8 __U, __m256i __A, __m128i __B)
+{
+  return (__m256i) __builtin_ia32_psrld256_mask ((__v8si) __A,
+             (__v4si) __B,
+             (__v8si)
+             _mm256_setzero_si256 (),
+             (__mmask8) __U);
+}
+
+#define _mm_mask_srli_epi32(W, U, A, imm) __extension__ ({ \
+  (__m128i)__builtin_ia32_psrldi128_mask((__v4si)(__m128i)(A), (int)(imm), \
+                                         (__v4si)(__m128i)(W), \
+                                         (__mmask8)(U)); })
+
+#define _mm_maskz_srli_epi32(U, A, imm) __extension__ ({ \
+  (__m128i)__builtin_ia32_psrldi128_mask((__v4si)(__m128i)(A), (int)(imm), \
+                                         (__v4si)_mm_setzero_si128(), \
+                                         (__mmask8)(U)); })
+
+#define _mm256_mask_srli_epi32(W, U, A, imm) __extension__ ({ \
+  (__m256i)__builtin_ia32_psrldi256_mask((__v8si)(__m256i)(A), (int)(imm), \
+                                         (__v8si)(__m256i)(W), \
+                                         (__mmask8)(U)); })
+
+#define _mm256_maskz_srli_epi32(U, A, imm) __extension__ ({ \
+  (__m256i)__builtin_ia32_psrldi256_mask((__v8si)(__m256i)(A), (int)(imm), \
+                                         (__v8si)_mm256_setzero_si256(), \
+                                         (__mmask8)(U)); })
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_srl_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
+        __m128i __B)
+{
+  return (__m128i) __builtin_ia32_psrlq128_mask ((__v2di) __A,
+             (__v2di) __B,
+             (__v2di) __W,
+             (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_srl_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_psrlq128_mask ((__v2di) __A,
+             (__v2di) __B,
+             (__v2di)
+             _mm_setzero_di (),
+             (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_srl_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
+           __m128i __B)
+{
+  return (__m256i) __builtin_ia32_psrlq256_mask ((__v4di) __A,
+             (__v2di) __B,
+             (__v4di) __W,
+             (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_srl_epi64 (__mmask8 __U, __m256i __A, __m128i __B)
+{
+  return (__m256i) __builtin_ia32_psrlq256_mask ((__v4di) __A,
+             (__v2di) __B,
+             (__v4di)
+             _mm256_setzero_si256 (),
+             (__mmask8) __U);
+}
+
+#define _mm_mask_srli_epi64(W, U, A, imm) __extension__ ({ \
+  (__m128i)__builtin_ia32_psrlqi128_mask((__v2di)(__m128i)(A), (int)(imm), \
+                                         (__v2di)(__m128i)(W), \
+                                         (__mmask8)(U)); })
+
+#define _mm_maskz_srli_epi64(U, A, imm) __extension__ ({ \
+  (__m128i)__builtin_ia32_psrlqi128_mask((__v2di)(__m128i)(A), (int)(imm), \
+                                         (__v2di)_mm_setzero_si128(), \
+                                         (__mmask8)(U)); })
+
+#define _mm256_mask_srli_epi64(W, U, A, imm) __extension__ ({ \
+  (__m256i)__builtin_ia32_psrlqi256_mask((__v4di)(__m256i)(A), (int)(imm), \
+                                         (__v4di)(__m256i)(W), \
+                                         (__mmask8)(U)); })
+
+#define _mm256_maskz_srli_epi64(U, A, imm) __extension__ ({ \
+  (__m256i)__builtin_ia32_psrlqi256_mask((__v4di)(__m256i)(A), (int)(imm), \
+                                         (__v4di)_mm256_setzero_si256(), \
+                                         (__mmask8)(U)); })
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_srav_epi32 (__m128i __W, __mmask8 __U, __m128i __X,
+         __m128i __Y)
+{
+  return (__m128i) __builtin_ia32_psrav4si_mask ((__v4si) __X,
+             (__v4si) __Y,
+             (__v4si) __W,
+             (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_srav_epi32 (__mmask8 __U, __m128i __X, __m128i __Y)
+{
+  return (__m128i) __builtin_ia32_psrav4si_mask ((__v4si) __X,
+             (__v4si) __Y,
+             (__v4si)
+             _mm_setzero_si128 (),
+             (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_srav_epi32 (__m256i __W, __mmask8 __U, __m256i __X,
+      __m256i __Y)
+{
+  return (__m256i) __builtin_ia32_psrav8si_mask ((__v8si) __X,
+             (__v8si) __Y,
+             (__v8si) __W,
+             (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_srav_epi32 (__mmask8 __U, __m256i __X, __m256i __Y)
+{
+  return (__m256i) __builtin_ia32_psrav8si_mask ((__v8si) __X,
+             (__v8si) __Y,
+             (__v8si)
+             _mm256_setzero_si256 (),
+             (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_srav_epi64 (__m128i __X, __m128i __Y)
+{
+  return (__m128i) __builtin_ia32_psravq128_mask ((__v2di) __X,
+              (__v2di) __Y,
+              (__v2di)
+              _mm_setzero_di (),
+              (__mmask8) -1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_srav_epi64 (__m128i __W, __mmask8 __U, __m128i __X,
+         __m128i __Y)
+{
+  return (__m128i) __builtin_ia32_psravq128_mask ((__v2di) __X,
+              (__v2di) __Y,
+              (__v2di) __W,
+              (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_srav_epi64 (__mmask8 __U, __m128i __X, __m128i __Y)
+{
+  return (__m128i) __builtin_ia32_psravq128_mask ((__v2di) __X,
+              (__v2di) __Y,
+              (__v2di)
+              _mm_setzero_di (),
+              (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_srav_epi64 (__m256i __X, __m256i __Y)
+{
+  return (__m256i) __builtin_ia32_psravq256_mask ((__v4di) __X,
+              (__v4di) __Y,
+              (__v4di)
+              _mm256_setzero_si256 (),
+              (__mmask8) -1);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_srav_epi64 (__m256i __W, __mmask8 __U, __m256i __X,
+      __m256i __Y)
+{
+  return (__m256i) __builtin_ia32_psravq256_mask ((__v4di) __X,
+              (__v4di) __Y,
+              (__v4di) __W,
+              (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_srav_epi64 (__mmask8 __U, __m256i __X, __m256i __Y)
+{
+  return (__m256i) __builtin_ia32_psravq256_mask ((__v4di) __X,
+              (__v4di) __Y,
+              (__v4di)
+              _mm256_setzero_si256 (),
+              (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_mov_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
+{
+  return (__m128i) __builtin_ia32_selectd_128 ((__mmask8) __U,
+                 (__v4si) __A,
+                 (__v4si) __W);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_mov_epi32 (__mmask8 __U, __m128i __A)
+{
+  return (__m128i) __builtin_ia32_selectd_128 ((__mmask8) __U,
+                 (__v4si) __A,
+                 (__v4si) _mm_setzero_si128 ());
+}
+
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_mov_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
+{
+  return (__m256i) __builtin_ia32_selectd_256 ((__mmask8) __U,
+                 (__v8si) __A,
+                 (__v8si) __W);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_mov_epi32 (__mmask8 __U, __m256i __A)
+{
+  return (__m256i) __builtin_ia32_selectd_256 ((__mmask8) __U,
+                 (__v8si) __A,
+                 (__v8si) _mm256_setzero_si256 ());
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_load_epi32 (__m128i __W, __mmask8 __U, void const *__P)
+{
+  return (__m128i) __builtin_ia32_movdqa32load128_mask ((__v4si *) __P,
+              (__v4si) __W,
+              (__mmask8)
+              __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_load_epi32 (__mmask8 __U, void const *__P)
+{
+  return (__m128i) __builtin_ia32_movdqa32load128_mask ((__v4si *) __P,
+              (__v4si)
+              _mm_setzero_si128 (),
+              (__mmask8)
+              __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_load_epi32 (__m256i __W, __mmask8 __U, void const *__P)
+{
+  return (__m256i) __builtin_ia32_movdqa32load256_mask ((__v8si *) __P,
+              (__v8si) __W,
+              (__mmask8)
+              __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_load_epi32 (__mmask8 __U, void const *__P)
+{
+  return (__m256i) __builtin_ia32_movdqa32load256_mask ((__v8si *) __P,
+              (__v8si)
+              _mm256_setzero_si256 (),
+              (__mmask8)
+              __U);
+}
+
+static __inline__ void __DEFAULT_FN_ATTRS
+_mm_mask_store_epi32 (void *__P, __mmask8 __U, __m128i __A)
+{
+  __builtin_ia32_movdqa32store128_mask ((__v4si *) __P,
+          (__v4si) __A,
+          (__mmask8) __U);
+}
+
+static __inline__ void __DEFAULT_FN_ATTRS
+_mm256_mask_store_epi32 (void *__P, __mmask8 __U, __m256i __A)
+{
+  __builtin_ia32_movdqa32store256_mask ((__v8si *) __P,
+          (__v8si) __A,
+          (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_mov_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
+{
+  return (__m128i) __builtin_ia32_selectq_128 ((__mmask8) __U,
+                 (__v2di) __A,
+                 (__v2di) __W);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_mov_epi64 (__mmask8 __U, __m128i __A)
+{
+  return (__m128i) __builtin_ia32_selectq_128 ((__mmask8) __U,
+                 (__v2di) __A,
+                 (__v2di) _mm_setzero_di ());
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_mov_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
+{
+  return (__m256i) __builtin_ia32_selectq_256 ((__mmask8) __U,
+                 (__v4di) __A,
+                 (__v4di) __W);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_mov_epi64 (__mmask8 __U, __m256i __A)
+{
+  return (__m256i) __builtin_ia32_selectq_256 ((__mmask8) __U,
+                 (__v4di) __A,
+                 (__v4di) _mm256_setzero_si256 ());
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_load_epi64 (__m128i __W, __mmask8 __U, void const *__P)
+{
+  return (__m128i) __builtin_ia32_movdqa64load128_mask ((__v2di *) __P,
+              (__v2di) __W,
+              (__mmask8)
+              __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_load_epi64 (__mmask8 __U, void const *__P)
+{
+  return (__m128i) __builtin_ia32_movdqa64load128_mask ((__v2di *) __P,
+              (__v2di)
+              _mm_setzero_di (),
+              (__mmask8)
+              __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_load_epi64 (__m256i __W, __mmask8 __U, void const *__P)
+{
+  return (__m256i) __builtin_ia32_movdqa64load256_mask ((__v4di *) __P,
+              (__v4di) __W,
+              (__mmask8)
+              __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_load_epi64 (__mmask8 __U, void const *__P)
+{
+  return (__m256i) __builtin_ia32_movdqa64load256_mask ((__v4di *) __P,
+              (__v4di)
+              _mm256_setzero_si256 (),
+              (__mmask8)
+              __U);
+}
+
+static __inline__ void __DEFAULT_FN_ATTRS
+_mm_mask_store_epi64 (void *__P, __mmask8 __U, __m128i __A)
+{
+  __builtin_ia32_movdqa64store128_mask ((__v2di *) __P,
+          (__v2di) __A,
+          (__mmask8) __U);
+}
+
+static __inline__ void __DEFAULT_FN_ATTRS
+_mm256_mask_store_epi64 (void *__P, __mmask8 __U, __m256i __A)
+{
+  __builtin_ia32_movdqa64store256_mask ((__v4di *) __P,
+          (__v4di) __A,
+          (__mmask8) __U);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_mask_movedup_pd (__m128d __W, __mmask8 __U, __m128d __A)
+{
+  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
+                                              (__v2df)_mm_movedup_pd(__A),
+                                              (__v2df)__W);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_maskz_movedup_pd (__mmask8 __U, __m128d __A)
+{
+  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
+                                              (__v2df)_mm_movedup_pd(__A),
+                                              (__v2df)_mm_setzero_pd());
+}
+
+static __inline__ __m256d __DEFAULT_FN_ATTRS
+_mm256_mask_movedup_pd (__m256d __W, __mmask8 __U, __m256d __A)
+{
+  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
+                                              (__v4df)_mm256_movedup_pd(__A),
+                                              (__v4df)__W);
+}
+
+static __inline__ __m256d __DEFAULT_FN_ATTRS
+_mm256_maskz_movedup_pd (__mmask8 __U, __m256d __A)
+{
+  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
+                                              (__v4df)_mm256_movedup_pd(__A),
+                                              (__v4df)_mm256_setzero_pd());
+}
+
+
+#define _mm_mask_set1_epi32(O, M, A) __extension__ ({ \
+  (__m128i)__builtin_ia32_pbroadcastd128_gpr_mask((int)(A), \
+                                                  (__v4si)(__m128i)(O), \
+                                                  (__mmask8)(M)); })
+
+#define _mm_maskz_set1_epi32(M, A) __extension__ ({ \
+  (__m128i)__builtin_ia32_pbroadcastd128_gpr_mask((int)(A), \
+                                                  (__v4si)_mm_setzero_si128(), \
+                                                  (__mmask8)(M)); })
+
+#define _mm256_mask_set1_epi32(O, M, A) __extension__ ({ \
+  (__m256i)__builtin_ia32_pbroadcastd256_gpr_mask((int)(A), \
+                                                  (__v8si)(__m256i)(O), \
+                                                  (__mmask8)(M)); })
+
+#define _mm256_maskz_set1_epi32(M, A) __extension__ ({ \
+  (__m256i)__builtin_ia32_pbroadcastd256_gpr_mask((int)(A), \
+                                                  (__v8si)_mm256_setzero_si256(), \
+                                                  (__mmask8)(M)); })
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_set1_epi64 (__m128i __O, __mmask8 __M, long long __A)
+{
+  return (__m128i) __builtin_ia32_pbroadcastq128_gpr_mask (__A, (__v2di) __O,
+                 __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_set1_epi64 (__mmask8 __M, long long __A)
+{
+  return (__m128i) __builtin_ia32_pbroadcastq128_gpr_mask (__A,
+                 (__v2di)
+                 _mm_setzero_si128 (),
+                 __M);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_set1_epi64 (__m256i __O, __mmask8 __M, long long __A)
+{
+  return (__m256i) __builtin_ia32_pbroadcastq256_gpr_mask (__A, (__v4di) __O,
+                 __M);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_set1_epi64 (__mmask8 __M, long long __A)
+{
+  return (__m256i) __builtin_ia32_pbroadcastq256_gpr_mask (__A,
+                 (__v4di)
+                 _mm256_setzero_si256 (),
+                 __M);
+}
+
+#define _mm_fixupimm_pd(A, B, C, imm) __extension__ ({ \
+  (__m128d)__builtin_ia32_fixupimmpd128_mask((__v2df)(__m128d)(A), \
+                                             (__v2df)(__m128d)(B), \
+                                             (__v2di)(__m128i)(C), (int)(imm), \
+                                             (__mmask8)-1); })
+
+#define _mm_mask_fixupimm_pd(A, U, B, C, imm) __extension__ ({ \
+  (__m128d)__builtin_ia32_fixupimmpd128_mask((__v2df)(__m128d)(A), \
+                                             (__v2df)(__m128d)(B), \
+                                             (__v2di)(__m128i)(C), (int)(imm), \
+                                             (__mmask8)(U)); })
+
+#define _mm_maskz_fixupimm_pd(U, A, B, C, imm) __extension__ ({ \
+  (__m128d)__builtin_ia32_fixupimmpd128_maskz((__v2df)(__m128d)(A), \
+                                              (__v2df)(__m128d)(B), \
+                                              (__v2di)(__m128i)(C), \
+                                              (int)(imm), (__mmask8)(U)); })
+
+#define _mm256_fixupimm_pd(A, B, C, imm) __extension__ ({ \
+  (__m256d)__builtin_ia32_fixupimmpd256_mask((__v4df)(__m256d)(A), \
+                                             (__v4df)(__m256d)(B), \
+                                             (__v4di)(__m256i)(C), (int)(imm), \
+                                             (__mmask8)-1); })
+
+#define _mm256_mask_fixupimm_pd(A, U, B, C, imm) __extension__ ({ \
+  (__m256d)__builtin_ia32_fixupimmpd256_mask((__v4df)(__m256d)(A), \
+                                             (__v4df)(__m256d)(B), \
+                                             (__v4di)(__m256i)(C), (int)(imm), \
+                                             (__mmask8)(U)); })
+
+#define _mm256_maskz_fixupimm_pd(U, A, B, C, imm) __extension__ ({ \
+  (__m256d)__builtin_ia32_fixupimmpd256_maskz((__v4df)(__m256d)(A), \
+                                              (__v4df)(__m256d)(B), \
+                                              (__v4di)(__m256i)(C), \
+                                              (int)(imm), (__mmask8)(U)); })
+
+#define _mm_fixupimm_ps(A, B, C, imm) __extension__ ({ \
+  (__m128)__builtin_ia32_fixupimmps128_mask((__v4sf)(__m128)(A), \
+                                            (__v4sf)(__m128)(B), \
+                                            (__v4si)(__m128i)(C), (int)(imm), \
+                                            (__mmask8)-1); })
+
+#define _mm_mask_fixupimm_ps(A, U, B, C, imm) __extension__ ({ \
+  (__m128)__builtin_ia32_fixupimmps128_mask((__v4sf)(__m128)(A), \
+                                            (__v4sf)(__m128)(B), \
+                                            (__v4si)(__m128i)(C), (int)(imm), \
+                                            (__mmask8)(U)); })
+
+#define _mm_maskz_fixupimm_ps(U, A, B, C, imm) __extension__ ({ \
+  (__m128)__builtin_ia32_fixupimmps128_maskz((__v4sf)(__m128)(A), \
+                                             (__v4sf)(__m128)(B), \
+                                             (__v4si)(__m128i)(C), (int)(imm), \
+                                             (__mmask8)(U)); })
+
+#define _mm256_fixupimm_ps(A, B, C, imm) __extension__ ({ \
+  (__m256)__builtin_ia32_fixupimmps256_mask((__v8sf)(__m256)(A), \
+                                            (__v8sf)(__m256)(B), \
+                                            (__v8si)(__m256i)(C), (int)(imm), \
+                                            (__mmask8)-1); })
+
+#define _mm256_mask_fixupimm_ps(A, U, B, C, imm) __extension__ ({ \
+  (__m256)__builtin_ia32_fixupimmps256_mask((__v8sf)(__m256)(A), \
+                                            (__v8sf)(__m256)(B), \
+                                            (__v8si)(__m256i)(C), (int)(imm), \
+                                            (__mmask8)(U)); })
+
+#define _mm256_maskz_fixupimm_ps(U, A, B, C, imm) __extension__ ({ \
+  (__m256)__builtin_ia32_fixupimmps256_maskz((__v8sf)(__m256)(A), \
+                                             (__v8sf)(__m256)(B), \
+                                             (__v8si)(__m256i)(C), (int)(imm), \
+                                             (__mmask8)(U)); })
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_mask_load_pd (__m128d __W, __mmask8 __U, void const *__P)
+{
+  return (__m128d) __builtin_ia32_loadapd128_mask ((__v2df *) __P,
+               (__v2df) __W,
+               (__mmask8) __U);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_maskz_load_pd (__mmask8 __U, void const *__P)
+{
+  return (__m128d) __builtin_ia32_loadapd128_mask ((__v2df *) __P,
+               (__v2df)
+               _mm_setzero_pd (),
+               (__mmask8) __U);
+}
+
+static __inline__ __m256d __DEFAULT_FN_ATTRS
+_mm256_mask_load_pd (__m256d __W, __mmask8 __U, void const *__P)
+{
+  return (__m256d) __builtin_ia32_loadapd256_mask ((__v4df *) __P,
+               (__v4df) __W,
+               (__mmask8) __U);
+}
+
+static __inline__ __m256d __DEFAULT_FN_ATTRS
+_mm256_maskz_load_pd (__mmask8 __U, void const *__P)
+{
+  return (__m256d) __builtin_ia32_loadapd256_mask ((__v4df *) __P,
+               (__v4df)
+               _mm256_setzero_pd (),
+               (__mmask8) __U);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_mask_load_ps (__m128 __W, __mmask8 __U, void const *__P)
+{
+  return (__m128) __builtin_ia32_loadaps128_mask ((__v4sf *) __P,
+              (__v4sf) __W,
+              (__mmask8) __U);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_maskz_load_ps (__mmask8 __U, void const *__P)
+{
+  return (__m128) __builtin_ia32_loadaps128_mask ((__v4sf *) __P,
+              (__v4sf)
+              _mm_setzero_ps (),
+              (__mmask8) __U);
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS
+_mm256_mask_load_ps (__m256 __W, __mmask8 __U, void const *__P)
+{
+  return (__m256) __builtin_ia32_loadaps256_mask ((__v8sf *) __P,
+              (__v8sf) __W,
+              (__mmask8) __U);
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS
+_mm256_maskz_load_ps (__mmask8 __U, void const *__P)
+{
+  return (__m256) __builtin_ia32_loadaps256_mask ((__v8sf *) __P,
+              (__v8sf)
+              _mm256_setzero_ps (),
+              (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_loadu_epi64 (__m128i __W, __mmask8 __U, void const *__P)
+{
+  return (__m128i) __builtin_ia32_loaddqudi128_mask ((__v2di *) __P,
+                 (__v2di) __W,
+                 (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_loadu_epi64 (__mmask8 __U, void const *__P)
+{
+  return (__m128i) __builtin_ia32_loaddqudi128_mask ((__v2di *) __P,
+                 (__v2di)
+                 _mm_setzero_si128 (),
+                 (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_loadu_epi64 (__m256i __W, __mmask8 __U, void const *__P)
+{
+  return (__m256i) __builtin_ia32_loaddqudi256_mask ((__v4di *) __P,
+                 (__v4di) __W,
+                 (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_loadu_epi64 (__mmask8 __U, void const *__P)
+{
+  return (__m256i) __builtin_ia32_loaddqudi256_mask ((__v4di *) __P,
+                 (__v4di)
+                 _mm256_setzero_si256 (),
+                 (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_loadu_epi32 (__m128i __W, __mmask8 __U, void const *__P)
+{
+  return (__m128i) __builtin_ia32_loaddqusi128_mask ((__v4si *) __P,
+                 (__v4si) __W,
+                 (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_loadu_epi32 (__mmask8 __U, void const *__P)
+{
+  return (__m128i) __builtin_ia32_loaddqusi128_mask ((__v4si *) __P,
+                 (__v4si)
+                 _mm_setzero_si128 (),
+                 (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_loadu_epi32 (__m256i __W, __mmask8 __U, void const *__P)
+{
+  return (__m256i) __builtin_ia32_loaddqusi256_mask ((__v8si *) __P,
+                 (__v8si) __W,
+                 (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_loadu_epi32 (__mmask8 __U, void const *__P)
+{
+  return (__m256i) __builtin_ia32_loaddqusi256_mask ((__v8si *) __P,
+                 (__v8si)
+                 _mm256_setzero_si256 (),
+                 (__mmask8) __U);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_mask_loadu_pd (__m128d __W, __mmask8 __U, void const *__P)
+{
+  return (__m128d) __builtin_ia32_loadupd128_mask ((__v2df *) __P,
+               (__v2df) __W,
+               (__mmask8) __U);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_maskz_loadu_pd (__mmask8 __U, void const *__P)
+{
+  return (__m128d) __builtin_ia32_loadupd128_mask ((__v2df *) __P,
+               (__v2df)
+               _mm_setzero_pd (),
+               (__mmask8) __U);
+}
+
+static __inline__ __m256d __DEFAULT_FN_ATTRS
+_mm256_mask_loadu_pd (__m256d __W, __mmask8 __U, void const *__P)
+{
+  return (__m256d) __builtin_ia32_loadupd256_mask ((__v4df *) __P,
+               (__v4df) __W,
+               (__mmask8) __U);
+}
+
+static __inline__ __m256d __DEFAULT_FN_ATTRS
+_mm256_maskz_loadu_pd (__mmask8 __U, void const *__P)
+{
+  return (__m256d) __builtin_ia32_loadupd256_mask ((__v4df *) __P,
+               (__v4df)
+               _mm256_setzero_pd (),
+               (__mmask8) __U);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_mask_loadu_ps (__m128 __W, __mmask8 __U, void const *__P)
+{
+  return (__m128) __builtin_ia32_loadups128_mask ((__v4sf *) __P,
+              (__v4sf) __W,
+              (__mmask8) __U);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_maskz_loadu_ps (__mmask8 __U, void const *__P)
+{
+  return (__m128) __builtin_ia32_loadups128_mask ((__v4sf *) __P,
+              (__v4sf)
+              _mm_setzero_ps (),
+              (__mmask8) __U);
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS
+_mm256_mask_loadu_ps (__m256 __W, __mmask8 __U, void const *__P)
+{
+  return (__m256) __builtin_ia32_loadups256_mask ((__v8sf *) __P,
+              (__v8sf) __W,
+              (__mmask8) __U);
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS
+_mm256_maskz_loadu_ps (__mmask8 __U, void const *__P)
+{
+  return (__m256) __builtin_ia32_loadups256_mask ((__v8sf *) __P,
+              (__v8sf)
+              _mm256_setzero_ps (),
+              (__mmask8) __U);
+}
+
+static __inline__ void __DEFAULT_FN_ATTRS
+_mm_mask_store_pd (void *__P, __mmask8 __U, __m128d __A)
+{
+  __builtin_ia32_storeapd128_mask ((__v2df *) __P,
+           (__v2df) __A,
+           (__mmask8) __U);
+}
+
+static __inline__ void __DEFAULT_FN_ATTRS
+_mm256_mask_store_pd (void *__P, __mmask8 __U, __m256d __A)
+{
+  __builtin_ia32_storeapd256_mask ((__v4df *) __P,
+           (__v4df) __A,
+           (__mmask8) __U);
+}
+
+static __inline__ void __DEFAULT_FN_ATTRS
+_mm_mask_store_ps (void *__P, __mmask8 __U, __m128 __A)
+{
+  __builtin_ia32_storeaps128_mask ((__v4sf *) __P,
+           (__v4sf) __A,
+           (__mmask8) __U);
+}
+
+static __inline__ void __DEFAULT_FN_ATTRS
+_mm256_mask_store_ps (void *__P, __mmask8 __U, __m256 __A)
+{
+  __builtin_ia32_storeaps256_mask ((__v8sf *) __P,
+           (__v8sf) __A,
+           (__mmask8) __U);
+}
+
+static __inline__ void __DEFAULT_FN_ATTRS
+_mm_mask_storeu_epi64 (void *__P, __mmask8 __U, __m128i __A)
+{
+  __builtin_ia32_storedqudi128_mask ((__v2di *) __P,
+             (__v2di) __A,
+             (__mmask8) __U);
+}
+
+static __inline__ void __DEFAULT_FN_ATTRS
+_mm256_mask_storeu_epi64 (void *__P, __mmask8 __U, __m256i __A)
+{
+  __builtin_ia32_storedqudi256_mask ((__v4di *) __P,
+             (__v4di) __A,
+             (__mmask8) __U);
+}
+
+static __inline__ void __DEFAULT_FN_ATTRS
+_mm_mask_storeu_epi32 (void *__P, __mmask8 __U, __m128i __A)
+{
+  __builtin_ia32_storedqusi128_mask ((__v4si *) __P,
+             (__v4si) __A,
+             (__mmask8) __U);
+}
+
+static __inline__ void __DEFAULT_FN_ATTRS
+_mm256_mask_storeu_epi32 (void *__P, __mmask8 __U, __m256i __A)
+{
+  __builtin_ia32_storedqusi256_mask ((__v8si *) __P,
+             (__v8si) __A,
+             (__mmask8) __U);
+}
+
+static __inline__ void __DEFAULT_FN_ATTRS
+_mm_mask_storeu_pd (void *__P, __mmask8 __U, __m128d __A)
+{
+  __builtin_ia32_storeupd128_mask ((__v2df *) __P,
+           (__v2df) __A,
+           (__mmask8) __U);
+}
+
+static __inline__ void __DEFAULT_FN_ATTRS
+_mm256_mask_storeu_pd (void *__P, __mmask8 __U, __m256d __A)
+{
+  __builtin_ia32_storeupd256_mask ((__v4df *) __P,
+           (__v4df) __A,
+           (__mmask8) __U);
+}
+
+static __inline__ void __DEFAULT_FN_ATTRS
+_mm_mask_storeu_ps (void *__P, __mmask8 __U, __m128 __A)
+{
+  __builtin_ia32_storeups128_mask ((__v4sf *) __P,
+           (__v4sf) __A,
+           (__mmask8) __U);
+}
+
+static __inline__ void __DEFAULT_FN_ATTRS
+_mm256_mask_storeu_ps (void *__P, __mmask8 __U, __m256 __A)
+{
+  __builtin_ia32_storeups256_mask ((__v8sf *) __P,
+           (__v8sf) __A,
+           (__mmask8) __U);
+}
+
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_mask_unpackhi_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
+{
+  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
+                                              (__v2df)_mm_unpackhi_pd(__A, __B),
+                                              (__v2df)__W);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_maskz_unpackhi_pd(__mmask8 __U, __m128d __A, __m128d __B)
+{
+  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
+                                              (__v2df)_mm_unpackhi_pd(__A, __B),
+                                              (__v2df)_mm_setzero_pd());
+}
+
+static __inline__ __m256d __DEFAULT_FN_ATTRS
+_mm256_mask_unpackhi_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
+{
+  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
+                                           (__v4df)_mm256_unpackhi_pd(__A, __B),
+                                           (__v4df)__W);
+}
+
+static __inline__ __m256d __DEFAULT_FN_ATTRS
+_mm256_maskz_unpackhi_pd(__mmask8 __U, __m256d __A, __m256d __B)
+{
+  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
+                                           (__v4df)_mm256_unpackhi_pd(__A, __B),
+                                           (__v4df)_mm256_setzero_pd());
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_mask_unpackhi_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
+{
+  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
+                                             (__v4sf)_mm_unpackhi_ps(__A, __B),
+                                             (__v4sf)__W);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_maskz_unpackhi_ps(__mmask8 __U, __m128 __A, __m128 __B)
+{
+  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
+                                             (__v4sf)_mm_unpackhi_ps(__A, __B),
+                                             (__v4sf)_mm_setzero_ps());
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS
+_mm256_mask_unpackhi_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
+{
+  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
+                                           (__v8sf)_mm256_unpackhi_ps(__A, __B),
+                                           (__v8sf)__W);
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS
+_mm256_maskz_unpackhi_ps(__mmask8 __U, __m256 __A, __m256 __B)
+{
+  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
+                                           (__v8sf)_mm256_unpackhi_ps(__A, __B),
+                                           (__v8sf)_mm256_setzero_ps());
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_mask_unpacklo_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
+{
+  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
+                                              (__v2df)_mm_unpacklo_pd(__A, __B),
+                                              (__v2df)__W);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_maskz_unpacklo_pd(__mmask8 __U, __m128d __A, __m128d __B)
+{
+  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
+                                              (__v2df)_mm_unpacklo_pd(__A, __B),
+                                              (__v2df)_mm_setzero_pd());
+}
+
+static __inline__ __m256d __DEFAULT_FN_ATTRS
+_mm256_mask_unpacklo_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
+{
+  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
+                                           (__v4df)_mm256_unpacklo_pd(__A, __B),
+                                           (__v4df)__W);
+}
+
+static __inline__ __m256d __DEFAULT_FN_ATTRS
+_mm256_maskz_unpacklo_pd(__mmask8 __U, __m256d __A, __m256d __B)
+{
+  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
+                                           (__v4df)_mm256_unpacklo_pd(__A, __B),
+                                           (__v4df)_mm256_setzero_pd());
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_mask_unpacklo_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
+{
+  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
+                                             (__v4sf)_mm_unpacklo_ps(__A, __B),
+                                             (__v4sf)__W);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_maskz_unpacklo_ps(__mmask8 __U, __m128 __A, __m128 __B)
+{
+  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
+                                             (__v4sf)_mm_unpacklo_ps(__A, __B),
+                                             (__v4sf)_mm_setzero_ps());
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS
+_mm256_mask_unpacklo_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
+{
+  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
+                                           (__v8sf)_mm256_unpacklo_ps(__A, __B),
+                                           (__v8sf)__W);
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS
+_mm256_maskz_unpacklo_ps(__mmask8 __U, __m256 __A, __m256 __B)
+{
+  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
+                                           (__v8sf)_mm256_unpacklo_ps(__A, __B),
+                                           (__v8sf)_mm256_setzero_ps());
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_rcp14_pd (__m128d __A)
+{
+  return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A,
+                (__v2df)
+                _mm_setzero_pd (),
+                (__mmask8) -1);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_mask_rcp14_pd (__m128d __W, __mmask8 __U, __m128d __A)
+{
+  return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A,
+                (__v2df) __W,
+                (__mmask8) __U);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_maskz_rcp14_pd (__mmask8 __U, __m128d __A)
+{
+  return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A,
+                (__v2df)
+                _mm_setzero_pd (),
+                (__mmask8) __U);
+}
+
+static __inline__ __m256d __DEFAULT_FN_ATTRS
+_mm256_rcp14_pd (__m256d __A)
+{
+  return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A,
+                (__v4df)
+                _mm256_setzero_pd (),
+                (__mmask8) -1);
+}
+
+static __inline__ __m256d __DEFAULT_FN_ATTRS
+_mm256_mask_rcp14_pd (__m256d __W, __mmask8 __U, __m256d __A)
+{
+  return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A,
+                (__v4df) __W,
+                (__mmask8) __U);
+}
+
+static __inline__ __m256d __DEFAULT_FN_ATTRS
+_mm256_maskz_rcp14_pd (__mmask8 __U, __m256d __A)
+{
+  return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A,
+                (__v4df)
+                _mm256_setzero_pd (),
+                (__mmask8) __U);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_rcp14_ps (__m128 __A)
+{
+  return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A,
+               (__v4sf)
+               _mm_setzero_ps (),
+               (__mmask8) -1);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_mask_rcp14_ps (__m128 __W, __mmask8 __U, __m128 __A)
+{
+  return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A,
+               (__v4sf) __W,
+               (__mmask8) __U);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_maskz_rcp14_ps (__mmask8 __U, __m128 __A)
+{
+  return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A,
+               (__v4sf)
+               _mm_setzero_ps (),
+               (__mmask8) __U);
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS
+_mm256_rcp14_ps (__m256 __A)
+{
+  return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A,
+               (__v8sf)
+               _mm256_setzero_ps (),
+               (__mmask8) -1);
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS
+_mm256_mask_rcp14_ps (__m256 __W, __mmask8 __U, __m256 __A)
+{
+  return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A,
+               (__v8sf) __W,
+               (__mmask8) __U);
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS
+_mm256_maskz_rcp14_ps (__mmask8 __U, __m256 __A)
+{
+  return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A,
+               (__v8sf)
+               _mm256_setzero_ps (),
+               (__mmask8) __U);
+}
+
+#define _mm_mask_permute_pd(W, U, X, C) __extension__ ({ \
+  (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
+                                       (__v2df)_mm_permute_pd((X), (C)), \
+                                       (__v2df)(__m128d)(W)); })
+
+#define _mm_maskz_permute_pd(U, X, C) __extension__ ({ \
+  (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
+                                       (__v2df)_mm_permute_pd((X), (C)), \
+                                       (__v2df)_mm_setzero_pd()); })
+
+#define _mm256_mask_permute_pd(W, U, X, C) __extension__ ({ \
+  (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
+                                       (__v4df)_mm256_permute_pd((X), (C)), \
+                                       (__v4df)(__m256d)(W)); })
+
+#define _mm256_maskz_permute_pd(U, X, C) __extension__ ({ \
+  (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
+                                       (__v4df)_mm256_permute_pd((X), (C)), \
+                                       (__v4df)_mm256_setzero_pd()); })
+
+#define _mm_mask_permute_ps(W, U, X, C) __extension__ ({ \
+  (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
+                                      (__v4sf)_mm_permute_ps((X), (C)), \
+                                      (__v4sf)(__m128)(W)); })
+
+#define _mm_maskz_permute_ps(U, X, C) __extension__ ({ \
+  (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
+                                      (__v4sf)_mm_permute_ps((X), (C)), \
+                                      (__v4sf)_mm_setzero_ps()); })
+
+#define _mm256_mask_permute_ps(W, U, X, C) __extension__ ({ \
+  (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
+                                      (__v8sf)_mm256_permute_ps((X), (C)), \
+                                      (__v8sf)(__m256)(W)); })
+
+#define _mm256_maskz_permute_ps(U, X, C) __extension__ ({ \
+  (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
+                                      (__v8sf)_mm256_permute_ps((X), (C)), \
+                                      (__v8sf)_mm256_setzero_ps()); })
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_mask_permutevar_pd (__m128d __W, __mmask8 __U, __m128d __A,
+      __m128i __C)
+{
+  return (__m128d) __builtin_ia32_vpermilvarpd_mask ((__v2df) __A,
+                 (__v2di) __C,
+                 (__v2df) __W,
+                 (__mmask8) __U);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_maskz_permutevar_pd (__mmask8 __U, __m128d __A, __m128i __C)
+{
+  return (__m128d) __builtin_ia32_vpermilvarpd_mask ((__v2df) __A,
+                 (__v2di) __C,
+                 (__v2df)
+                 _mm_setzero_pd (),
+                 (__mmask8) __U);
+}
+
+static __inline__ __m256d __DEFAULT_FN_ATTRS
+_mm256_mask_permutevar_pd (__m256d __W, __mmask8 __U, __m256d __A,
+         __m256i __C)
+{
+  return (__m256d) __builtin_ia32_vpermilvarpd256_mask ((__v4df) __A,
+              (__v4di) __C,
+              (__v4df) __W,
+              (__mmask8)
+              __U);
+}
+
+static __inline__ __m256d __DEFAULT_FN_ATTRS
+_mm256_maskz_permutevar_pd (__mmask8 __U, __m256d __A, __m256i __C)
+{
+  return (__m256d) __builtin_ia32_vpermilvarpd256_mask ((__v4df) __A,
+              (__v4di) __C,
+              (__v4df)
+              _mm256_setzero_pd (),
+              (__mmask8)
+              __U);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_mask_permutevar_ps (__m128 __W, __mmask8 __U, __m128 __A,
+      __m128i __C)
+{
+  return (__m128) __builtin_ia32_vpermilvarps_mask ((__v4sf) __A,
+                (__v4si) __C,
+                (__v4sf) __W,
+                (__mmask8) __U);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_maskz_permutevar_ps (__mmask8 __U, __m128 __A, __m128i __C)
+{
+  return (__m128) __builtin_ia32_vpermilvarps_mask ((__v4sf) __A,
+                (__v4si) __C,
+                (__v4sf)
+                _mm_setzero_ps (),
+                (__mmask8) __U);
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS
+_mm256_mask_permutevar_ps (__m256 __W, __mmask8 __U, __m256 __A,
+         __m256i __C)
+{
+  return (__m256) __builtin_ia32_vpermilvarps256_mask ((__v8sf) __A,
+                   (__v8si) __C,
+                   (__v8sf) __W,
+                   (__mmask8) __U);
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS
+_mm256_maskz_permutevar_ps (__mmask8 __U, __m256 __A, __m256i __C)
+{
+  return (__m256) __builtin_ia32_vpermilvarps256_mask ((__v8sf) __A,
+                   (__v8si) __C,
+                   (__v8sf)
+                   _mm256_setzero_ps (),
+                   (__mmask8) __U);
+}
+
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
+_mm_test_epi32_mask (__m128i __A, __m128i __B)
+{
+  return (__mmask8) __builtin_ia32_ptestmd128 ((__v4si) __A,
+                 (__v4si) __B,
+                 (__mmask8) -1);
+}
+
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
+_mm_mask_test_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B)
+{
+  return (__mmask8) __builtin_ia32_ptestmd128 ((__v4si) __A,
+                 (__v4si) __B, __U);
+}
+
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
+_mm256_test_epi32_mask (__m256i __A, __m256i __B)
+{
+  return (__mmask8) __builtin_ia32_ptestmd256 ((__v8si) __A,
+                 (__v8si) __B,
+                 (__mmask8) -1);
+}
+
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
+_mm256_mask_test_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B)
+{
+  return (__mmask8) __builtin_ia32_ptestmd256 ((__v8si) __A,
+                 (__v8si) __B, __U);
+}
+
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
+_mm_test_epi64_mask (__m128i __A, __m128i __B)
+{
+  return (__mmask8) __builtin_ia32_ptestmq128 ((__v2di) __A,
+                 (__v2di) __B,
+                 (__mmask8) -1);
+}
+
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
+_mm_mask_test_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B)
+{
+  return (__mmask8) __builtin_ia32_ptestmq128 ((__v2di) __A,
+                 (__v2di) __B, __U);
+}
+
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
+_mm256_test_epi64_mask (__m256i __A, __m256i __B)
+{
+  return (__mmask8) __builtin_ia32_ptestmq256 ((__v4di) __A,
+                 (__v4di) __B,
+                 (__mmask8) -1);
+}
+
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
+_mm256_mask_test_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B)
+{
+  return (__mmask8) __builtin_ia32_ptestmq256 ((__v4di) __A,
+                 (__v4di) __B, __U);
+}
+
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
+_mm_testn_epi32_mask (__m128i __A, __m128i __B)
+{
+  return (__mmask8) __builtin_ia32_ptestnmd128 ((__v4si) __A,
+            (__v4si) __B,
+            (__mmask8) -1);
+}
+
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
+_mm_mask_testn_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B)
+{
+  return (__mmask8) __builtin_ia32_ptestnmd128 ((__v4si) __A,
+            (__v4si) __B, __U);
+}
+
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
+_mm256_testn_epi32_mask (__m256i __A, __m256i __B)
+{
+  return (__mmask8) __builtin_ia32_ptestnmd256 ((__v8si) __A,
+            (__v8si) __B,
+            (__mmask8) -1);
+}
+
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
+_mm256_mask_testn_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B)
+{
+  return (__mmask8) __builtin_ia32_ptestnmd256 ((__v8si) __A,
+            (__v8si) __B, __U);
+}
+
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
+_mm_testn_epi64_mask (__m128i __A, __m128i __B)
+{
+  return (__mmask8) __builtin_ia32_ptestnmq128 ((__v2di) __A,
+            (__v2di) __B,
+            (__mmask8) -1);
+}
+
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
+_mm_mask_testn_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B)
+{
+  return (__mmask8) __builtin_ia32_ptestnmq128 ((__v2di) __A,
+            (__v2di) __B, __U);
+}
+
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
+_mm256_testn_epi64_mask (__m256i __A, __m256i __B)
+{
+  return (__mmask8) __builtin_ia32_ptestnmq256 ((__v4di) __A,
+            (__v4di) __B,
+            (__mmask8) -1);
+}
+
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
+_mm256_mask_testn_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B)
+{
+  return (__mmask8) __builtin_ia32_ptestnmq256 ((__v4di) __A,
+            (__v4di) __B, __U);
+}
+
+
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_unpackhi_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
+                                           (__v4si)_mm_unpackhi_epi32(__A, __B),
+                                           (__v4si)__W);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_unpackhi_epi32(__mmask8 __U, __m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
+                                           (__v4si)_mm_unpackhi_epi32(__A, __B),
+                                           (__v4si)_mm_setzero_si128());
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_unpackhi_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
+{
+  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
+                                        (__v8si)_mm256_unpackhi_epi32(__A, __B),
+                                        (__v8si)__W);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_unpackhi_epi32(__mmask8 __U, __m256i __A, __m256i __B)
+{
+  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
+                                        (__v8si)_mm256_unpackhi_epi32(__A, __B),
+                                        (__v8si)_mm256_setzero_si256());
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_unpackhi_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
+                                           (__v2di)_mm_unpackhi_epi64(__A, __B),
+                                           (__v2di)__W);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_unpackhi_epi64(__mmask8 __U, __m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
+                                           (__v2di)_mm_unpackhi_epi64(__A, __B),
+                                           (__v2di)_mm_setzero_di());
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_unpackhi_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
+{
+  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
+                                        (__v4di)_mm256_unpackhi_epi64(__A, __B),
+                                        (__v4di)__W);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_unpackhi_epi64(__mmask8 __U, __m256i __A, __m256i __B)
+{
+  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
+                                        (__v4di)_mm256_unpackhi_epi64(__A, __B),
+                                        (__v4di)_mm256_setzero_si256());
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_unpacklo_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
+                                           (__v4si)_mm_unpacklo_epi32(__A, __B),
+                                           (__v4si)__W);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_unpacklo_epi32(__mmask8 __U, __m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
+                                           (__v4si)_mm_unpacklo_epi32(__A, __B),
+                                           (__v4si)_mm_setzero_si128());
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_unpacklo_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
+{
+  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
+                                        (__v8si)_mm256_unpacklo_epi32(__A, __B),
+                                        (__v8si)__W);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_unpacklo_epi32(__mmask8 __U, __m256i __A, __m256i __B)
+{
+  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
+                                        (__v8si)_mm256_unpacklo_epi32(__A, __B),
+                                        (__v8si)_mm256_setzero_si256());
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_unpacklo_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
+                                           (__v2di)_mm_unpacklo_epi64(__A, __B),
+                                           (__v2di)__W);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_unpacklo_epi64(__mmask8 __U, __m128i __A, __m128i __B)
+{
+  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
+                                           (__v2di)_mm_unpacklo_epi64(__A, __B),
+                                           (__v2di)_mm_setzero_di());
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_unpacklo_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
+{
+  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
+                                        (__v4di)_mm256_unpacklo_epi64(__A, __B),
+                                        (__v4di)__W);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_unpacklo_epi64(__mmask8 __U, __m256i __A, __m256i __B)
+{
+  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
+                                        (__v4di)_mm256_unpacklo_epi64(__A, __B),
+                                        (__v4di)_mm256_setzero_si256());
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_sra_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
+        __m128i __B)
+{
+  return (__m128i) __builtin_ia32_psrad128_mask ((__v4si) __A,
+             (__v4si) __B,
+             (__v4si) __W,
+             (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_sra_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_psrad128_mask ((__v4si) __A,
+             (__v4si) __B,
+             (__v4si)
+             _mm_setzero_si128 (),
+             (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_sra_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
+           __m128i __B)
+{
+  return (__m256i) __builtin_ia32_psrad256_mask ((__v8si) __A,
+             (__v4si) __B,
+             (__v8si) __W,
+             (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_sra_epi32 (__mmask8 __U, __m256i __A, __m128i __B)
+{
+  return (__m256i) __builtin_ia32_psrad256_mask ((__v8si) __A,
+             (__v4si) __B,
+             (__v8si)
+             _mm256_setzero_si256 (),
+             (__mmask8) __U);
+}
+
+#define _mm_mask_srai_epi32(W, U, A, imm) __extension__ ({ \
+  (__m128i)__builtin_ia32_psradi128_mask((__v4si)(__m128i)(A), (int)(imm), \
+                                         (__v4si)(__m128i)(W), \
+                                         (__mmask8)(U)); })
+
+#define _mm_maskz_srai_epi32(U, A, imm) __extension__ ({ \
+  (__m128i)__builtin_ia32_psradi128_mask((__v4si)(__m128i)(A), (int)(imm), \
+                                         (__v4si)_mm_setzero_si128(), \
+                                         (__mmask8)(U)); })
+
+#define _mm256_mask_srai_epi32(W, U, A, imm) __extension__ ({ \
+  (__m256i)__builtin_ia32_psradi256_mask((__v8si)(__m256i)(A), (int)(imm), \
+                                         (__v8si)(__m256i)(W), \
+                                         (__mmask8)(U)); })
+
+#define _mm256_maskz_srai_epi32(U, A, imm) __extension__ ({ \
+  (__m256i)__builtin_ia32_psradi256_mask((__v8si)(__m256i)(A), (int)(imm), \
+                                         (__v8si)_mm256_setzero_si256(), \
+                                         (__mmask8)(U)); })
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_sra_epi64 (__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_psraq128_mask ((__v2di) __A,
+             (__v2di) __B,
+             (__v2di)
+             _mm_setzero_di (),
+             (__mmask8) -1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_sra_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
+        __m128i __B)
+{
+  return (__m128i) __builtin_ia32_psraq128_mask ((__v2di) __A,
+             (__v2di) __B,
+             (__v2di) __W,
+             (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_sra_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_psraq128_mask ((__v2di) __A,
+             (__v2di) __B,
+             (__v2di)
+             _mm_setzero_di (),
+             (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_sra_epi64 (__m256i __A, __m128i __B)
+{
+  return (__m256i) __builtin_ia32_psraq256_mask ((__v4di) __A,
+             (__v2di) __B,
+             (__v4di)
+             _mm256_setzero_si256 (),
+             (__mmask8) -1);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_sra_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
+           __m128i __B)
+{
+  return (__m256i) __builtin_ia32_psraq256_mask ((__v4di) __A,
+             (__v2di) __B,
+             (__v4di) __W,
+             (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_sra_epi64 (__mmask8 __U, __m256i __A, __m128i __B)
+{
+  return (__m256i) __builtin_ia32_psraq256_mask ((__v4di) __A,
+             (__v2di) __B,
+             (__v4di)
+             _mm256_setzero_si256 (),
+             (__mmask8) __U);
+}
+
+#define _mm_srai_epi64(A, imm) __extension__ ({ \
+  (__m128i)__builtin_ia32_psraqi128_mask((__v2di)(__m128i)(A), (int)(imm), \
+                                         (__v2di)_mm_setzero_di(), \
+                                         (__mmask8)-1); })
+
+#define _mm_mask_srai_epi64(W, U, A, imm) __extension__ ({ \
+  (__m128i)__builtin_ia32_psraqi128_mask((__v2di)(__m128i)(A), (int)(imm), \
+                                         (__v2di)(__m128i)(W), \
+                                         (__mmask8)(U)); })
+
+#define _mm_maskz_srai_epi64(U, A, imm) __extension__ ({ \
+  (__m128i)__builtin_ia32_psraqi128_mask((__v2di)(__m128i)(A), (int)(imm), \
+                                         (__v2di)_mm_setzero_si128(), \
+                                         (__mmask8)(U)); })
+
+#define _mm256_srai_epi64(A, imm) __extension__ ({ \
+  (__m256i)__builtin_ia32_psraqi256_mask((__v4di)(__m256i)(A), (int)(imm), \
+                                         (__v4di)_mm256_setzero_si256(), \
+                                         (__mmask8)-1); })
+
+#define _mm256_mask_srai_epi64(W, U, A, imm) __extension__ ({ \
+  (__m256i)__builtin_ia32_psraqi256_mask((__v4di)(__m256i)(A), (int)(imm), \
+                                         (__v4di)(__m256i)(W), \
+                                         (__mmask8)(U)); })
+
+#define _mm256_maskz_srai_epi64(U, A, imm) __extension__ ({ \
+  (__m256i)__builtin_ia32_psraqi256_mask((__v4di)(__m256i)(A), (int)(imm), \
+                                         (__v4di)_mm256_setzero_si256(), \
+                                         (__mmask8)(U)); })
+
+#define _mm_ternarylogic_epi32(A, B, C, imm) __extension__ ({ \
+  (__m128i)__builtin_ia32_pternlogd128_mask((__v4si)(__m128i)(A), \
+                                            (__v4si)(__m128i)(B), \
+                                            (__v4si)(__m128i)(C), (int)(imm), \
+                                            (__mmask8)-1); })
+
+#define _mm_mask_ternarylogic_epi32(A, U, B, C, imm) __extension__ ({ \
+  (__m128i)__builtin_ia32_pternlogd128_mask((__v4si)(__m128i)(A), \
+                                            (__v4si)(__m128i)(B), \
+                                            (__v4si)(__m128i)(C), (int)(imm), \
+                                            (__mmask8)(U)); })
+
+#define _mm_maskz_ternarylogic_epi32(U, A, B, C, imm) __extension__ ({ \
+  (__m128i)__builtin_ia32_pternlogd128_maskz((__v4si)(__m128i)(A), \
+                                             (__v4si)(__m128i)(B), \
+                                             (__v4si)(__m128i)(C), (int)(imm), \
+                                             (__mmask8)(U)); })
+
+#define _mm256_ternarylogic_epi32(A, B, C, imm) __extension__ ({ \
+  (__m256i)__builtin_ia32_pternlogd256_mask((__v8si)(__m256i)(A), \
+                                            (__v8si)(__m256i)(B), \
+                                            (__v8si)(__m256i)(C), (int)(imm), \
+                                            (__mmask8)-1); })
+
+#define _mm256_mask_ternarylogic_epi32(A, U, B, C, imm) __extension__ ({ \
+  (__m256i)__builtin_ia32_pternlogd256_mask((__v8si)(__m256i)(A), \
+                                            (__v8si)(__m256i)(B), \
+                                            (__v8si)(__m256i)(C), (int)(imm), \
+                                            (__mmask8)(U)); })
+
+#define _mm256_maskz_ternarylogic_epi32(U, A, B, C, imm) __extension__ ({ \
+  (__m256i)__builtin_ia32_pternlogd256_maskz((__v8si)(__m256i)(A), \
+                                             (__v8si)(__m256i)(B), \
+                                             (__v8si)(__m256i)(C), (int)(imm), \
+                                             (__mmask8)(U)); })
+
+#define _mm_ternarylogic_epi64(A, B, C, imm) __extension__ ({ \
+  (__m128i)__builtin_ia32_pternlogq128_mask((__v2di)(__m128i)(A), \
+                                            (__v2di)(__m128i)(B), \
+                                            (__v2di)(__m128i)(C), (int)(imm), \
+                                            (__mmask8)-1); })
+
+#define _mm_mask_ternarylogic_epi64(A, U, B, C, imm) __extension__ ({ \
+  (__m128i)__builtin_ia32_pternlogq128_mask((__v2di)(__m128i)(A), \
+                                            (__v2di)(__m128i)(B), \
+                                            (__v2di)(__m128i)(C), (int)(imm), \
+                                            (__mmask8)(U)); })
+
+#define _mm_maskz_ternarylogic_epi64(U, A, B, C, imm) __extension__ ({ \
+  (__m128i)__builtin_ia32_pternlogq128_maskz((__v2di)(__m128i)(A), \
+                                             (__v2di)(__m128i)(B), \
+                                             (__v2di)(__m128i)(C), (int)(imm), \
+                                             (__mmask8)(U)); })
+
+#define _mm256_ternarylogic_epi64(A, B, C, imm) __extension__ ({ \
+  (__m256i)__builtin_ia32_pternlogq256_mask((__v4di)(__m256i)(A), \
+                                            (__v4di)(__m256i)(B), \
+                                            (__v4di)(__m256i)(C), (int)(imm), \
+                                            (__mmask8)-1); })
+
+#define _mm256_mask_ternarylogic_epi64(A, U, B, C, imm) __extension__ ({ \
+  (__m256i)__builtin_ia32_pternlogq256_mask((__v4di)(__m256i)(A), \
+                                            (__v4di)(__m256i)(B), \
+                                            (__v4di)(__m256i)(C), (int)(imm), \
+                                            (__mmask8)(U)); })
+
+#define _mm256_maskz_ternarylogic_epi64(U, A, B, C, imm) __extension__ ({ \
+  (__m256i)__builtin_ia32_pternlogq256_maskz((__v4di)(__m256i)(A), \
+                                             (__v4di)(__m256i)(B), \
+                                             (__v4di)(__m256i)(C), (int)(imm), \
+                                             (__mmask8)(U)); })
+
+
+
+#define _mm256_shuffle_f32x4(A, B, imm) __extension__ ({ \
+  (__m256)__builtin_ia32_shuf_f32x4_256_mask((__v8sf)(__m256)(A), \
+                                             (__v8sf)(__m256)(B), (int)(imm), \
+                                             (__v8sf)_mm256_setzero_ps(), \
+                                             (__mmask8)-1); })
+
+#define _mm256_mask_shuffle_f32x4(W, U, A, B, imm) __extension__ ({ \
+  (__m256)__builtin_ia32_shuf_f32x4_256_mask((__v8sf)(__m256)(A), \
+                                             (__v8sf)(__m256)(B), (int)(imm), \
+                                             (__v8sf)(__m256)(W), \
+                                             (__mmask8)(U)); })
+
+#define _mm256_maskz_shuffle_f32x4(U, A, B, imm) __extension__ ({ \
+  (__m256)__builtin_ia32_shuf_f32x4_256_mask((__v8sf)(__m256)(A), \
+                                             (__v8sf)(__m256)(B), (int)(imm), \
+                                             (__v8sf)_mm256_setzero_ps(), \
+                                             (__mmask8)(U)); })
+
+#define _mm256_shuffle_f64x2(A, B, imm) __extension__ ({ \
+  (__m256d)__builtin_ia32_shuf_f64x2_256_mask((__v4df)(__m256d)(A), \
+                                              (__v4df)(__m256d)(B), \
+                                              (int)(imm), \
+                                              (__v4df)_mm256_setzero_pd(), \
+                                              (__mmask8)-1); })
+
+#define _mm256_mask_shuffle_f64x2(W, U, A, B, imm) __extension__ ({ \
+  (__m256d)__builtin_ia32_shuf_f64x2_256_mask((__v4df)(__m256d)(A), \
+                                              (__v4df)(__m256d)(B), \
+                                              (int)(imm), \
+                                              (__v4df)(__m256d)(W), \
+                                              (__mmask8)(U)); })
+
+#define _mm256_maskz_shuffle_f64x2(U, A, B, imm) __extension__ ({ \
+  (__m256d)__builtin_ia32_shuf_f64x2_256_mask((__v4df)(__m256d)(A), \
+                                              (__v4df)(__m256d)(B), \
+                                              (int)(imm), \
+                                              (__v4df)_mm256_setzero_pd(), \
+                                              (__mmask8)(U)); })
+
+#define _mm256_shuffle_i32x4(A, B, imm) __extension__ ({ \
+  (__m256i)__builtin_ia32_shuf_i32x4_256_mask((__v8si)(__m256i)(A), \
+                                              (__v8si)(__m256i)(B), \
+                                              (int)(imm), \
+                                              (__v8si)_mm256_setzero_si256(), \
+                                              (__mmask8)-1); })
+
+#define _mm256_mask_shuffle_i32x4(W, U, A, B, imm) __extension__ ({ \
+  (__m256i)__builtin_ia32_shuf_i32x4_256_mask((__v8si)(__m256i)(A), \
+                                              (__v8si)(__m256i)(B), \
+                                              (int)(imm), \
+                                              (__v8si)(__m256i)(W), \
+                                              (__mmask8)(U)); })
+
+#define _mm256_maskz_shuffle_i32x4(U, A, B, imm) __extension__ ({ \
+  (__m256i)__builtin_ia32_shuf_i32x4_256_mask((__v8si)(__m256i)(A), \
+                                              (__v8si)(__m256i)(B), \
+                                              (int)(imm), \
+                                              (__v8si)_mm256_setzero_si256(), \
+                                              (__mmask8)(U)); })
+
+#define _mm256_shuffle_i64x2(A, B, imm) __extension__ ({ \
+  (__m256i)__builtin_ia32_shuf_i64x2_256_mask((__v4di)(__m256i)(A), \
+                                              (__v4di)(__m256i)(B), \
+                                              (int)(imm), \
+                                              (__v4di)_mm256_setzero_si256(), \
+                                              (__mmask8)-1); })
+
+#define _mm256_mask_shuffle_i64x2(W, U, A, B, imm) __extension__ ({ \
+  (__m256i)__builtin_ia32_shuf_i64x2_256_mask((__v4di)(__m256i)(A), \
+                                              (__v4di)(__m256i)(B), \
+                                              (int)(imm), \
+                                              (__v4di)(__m256i)(W), \
+                                              (__mmask8)(U)); })
+
+#define _mm256_maskz_shuffle_i64x2(U, A, B, imm) __extension__ ({ \
+  (__m256i)__builtin_ia32_shuf_i64x2_256_mask((__v4di)(__m256i)(A), \
+                                              (__v4di)(__m256i)(B), \
+                                              (int)(imm), \
+                                              (__v4di)_mm256_setzero_si256(), \
+                                              (__mmask8)(U)); })
+
+#define _mm_mask_shuffle_pd(W, U, A, B, M) __extension__ ({ \
+  (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
+                                       (__v2df)_mm_shuffle_pd((A), (B), (M)), \
+                                       (__v2df)(__m128d)(W)); })
+
+#define _mm_maskz_shuffle_pd(U, A, B, M) __extension__ ({ \
+  (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
+                                       (__v2df)_mm_shuffle_pd((A), (B), (M)), \
+                                       (__v2df)_mm_setzero_pd()); })
+
+#define _mm256_mask_shuffle_pd(W, U, A, B, M) __extension__ ({ \
+  (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
+                                       (__v4df)_mm256_shuffle_pd((A), (B), (M)), \
+                                       (__v4df)(__m256d)(W)); })
+
+#define _mm256_maskz_shuffle_pd(U, A, B, M) __extension__ ({ \
+  (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
+                                       (__v4df)_mm256_shuffle_pd((A), (B), (M)), \
+                                       (__v4df)_mm256_setzero_pd()); })
+
+#define _mm_mask_shuffle_ps(W, U, A, B, M) __extension__ ({ \
+  (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
+                                      (__v4sf)_mm_shuffle_ps((A), (B), (M)), \
+                                      (__v4sf)(__m128)(W)); })
+
+#define _mm_maskz_shuffle_ps(U, A, B, M) __extension__ ({ \
+  (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
+                                      (__v4sf)_mm_shuffle_ps((A), (B), (M)), \
+                                      (__v4sf)_mm_setzero_ps()); })
+
+#define _mm256_mask_shuffle_ps(W, U, A, B, M) __extension__ ({ \
+  (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
+                                      (__v8sf)_mm256_shuffle_ps((A), (B), (M)), \
+                                      (__v8sf)(__m256)(W)); })
+
+#define _mm256_maskz_shuffle_ps(U, A, B, M) __extension__ ({ \
+  (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
+                                      (__v8sf)_mm256_shuffle_ps((A), (B), (M)), \
+                                      (__v8sf)_mm256_setzero_ps()); })
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_rsqrt14_pd (__m128d __A)
+{
+  return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A,
+                 (__v2df)
+                 _mm_setzero_pd (),
+                 (__mmask8) -1);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_mask_rsqrt14_pd (__m128d __W, __mmask8 __U, __m128d __A)
+{
+  return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A,
+                 (__v2df) __W,
+                 (__mmask8) __U);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_maskz_rsqrt14_pd (__mmask8 __U, __m128d __A)
+{
+  return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A,
+                 (__v2df)
+                 _mm_setzero_pd (),
+                 (__mmask8) __U);
+}
+
+static __inline__ __m256d __DEFAULT_FN_ATTRS
+_mm256_rsqrt14_pd (__m256d __A)
+{
+  return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A,
+                 (__v4df)
+                 _mm256_setzero_pd (),
+                 (__mmask8) -1);
+}
+
+static __inline__ __m256d __DEFAULT_FN_ATTRS
+_mm256_mask_rsqrt14_pd (__m256d __W, __mmask8 __U, __m256d __A)
+{
+  return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A,
+                 (__v4df) __W,
+                 (__mmask8) __U);
+}
+
+static __inline__ __m256d __DEFAULT_FN_ATTRS
+_mm256_maskz_rsqrt14_pd (__mmask8 __U, __m256d __A)
+{
+  return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A,
+                 (__v4df)
+                 _mm256_setzero_pd (),
+                 (__mmask8) __U);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_rsqrt14_ps (__m128 __A)
+{
+  return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A,
+                (__v4sf)
+                _mm_setzero_ps (),
+                (__mmask8) -1);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_mask_rsqrt14_ps (__m128 __W, __mmask8 __U, __m128 __A)
+{
+  return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A,
+                (__v4sf) __W,
+                (__mmask8) __U);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_maskz_rsqrt14_ps (__mmask8 __U, __m128 __A)
+{
+  return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A,
+                (__v4sf)
+                _mm_setzero_ps (),
+                (__mmask8) __U);
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS
+_mm256_rsqrt14_ps (__m256 __A)
+{
+  return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A,
+                (__v8sf)
+                _mm256_setzero_ps (),
+                (__mmask8) -1);
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS
+_mm256_mask_rsqrt14_ps (__m256 __W, __mmask8 __U, __m256 __A)
+{
+  return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A,
+                (__v8sf) __W,
+                (__mmask8) __U);
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS
+_mm256_maskz_rsqrt14_ps (__mmask8 __U, __m256 __A)
+{
+  return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A,
+                (__v8sf)
+                _mm256_setzero_ps (),
+                (__mmask8) __U);
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS
+_mm256_broadcast_f32x4 (__m128 __A)
+{
+  return (__m256) __builtin_ia32_broadcastf32x4_256_mask ((__v4sf) __A,
+                (__v8sf)_mm256_undefined_pd (),
+                (__mmask8) -1);
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS
+_mm256_mask_broadcast_f32x4 (__m256 __O, __mmask8 __M, __m128 __A)
+{
+  return (__m256) __builtin_ia32_broadcastf32x4_256_mask ((__v4sf) __A,
+                (__v8sf) __O,
+                __M);
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS
+_mm256_maskz_broadcast_f32x4 (__mmask8 __M, __m128 __A)
+{
+  return (__m256) __builtin_ia32_broadcastf32x4_256_mask ((__v4sf) __A,
+                (__v8sf) _mm256_setzero_ps (),
+                __M);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_broadcast_i32x4 (__m128i __A)
+{
+  return (__m256i) __builtin_ia32_broadcasti32x4_256_mask ((__v4si) __A,
+                 (__v8si)_mm256_undefined_si256 (),
+                 (__mmask8) -1);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_broadcast_i32x4 (__m256i __O, __mmask8 __M, __m128i __A)
+{
+  return (__m256i) __builtin_ia32_broadcasti32x4_256_mask ((__v4si) __A,
+                 (__v8si)
+                 __O, __M);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_broadcast_i32x4 (__mmask8 __M, __m128i __A)
+{
+  return (__m256i) __builtin_ia32_broadcasti32x4_256_mask ((__v4si)
+                 __A,
+                 (__v8si) _mm256_setzero_si256 (),
+                 __M);
+}
+
+static __inline__ __m256d __DEFAULT_FN_ATTRS
+_mm256_mask_broadcastsd_pd (__m256d __O, __mmask8 __M, __m128d __A)
+{
+  return (__m256d)__builtin_ia32_selectpd_256(__M,
+                                              (__v4df) _mm256_broadcastsd_pd(__A),
+                                              (__v4df) __O);
+}
+
+static __inline__ __m256d __DEFAULT_FN_ATTRS
+_mm256_maskz_broadcastsd_pd (__mmask8 __M, __m128d __A)
+{
+  return (__m256d)__builtin_ia32_selectpd_256(__M,
+                                              (__v4df) _mm256_broadcastsd_pd(__A),
+                                              (__v4df) _mm256_setzero_pd());
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_mask_broadcastss_ps (__m128 __O, __mmask8 __M, __m128 __A)
+{
+  return (__m128)__builtin_ia32_selectps_128(__M,
+                                             (__v4sf) _mm_broadcastss_ps(__A),
+                                             (__v4sf) __O);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_maskz_broadcastss_ps (__mmask8 __M, __m128 __A)
+{
+  return (__m128)__builtin_ia32_selectps_128(__M,
+                                             (__v4sf) _mm_broadcastss_ps(__A),
+                                             (__v4sf) _mm_setzero_ps());
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS
+_mm256_mask_broadcastss_ps (__m256 __O, __mmask8 __M, __m128 __A)
+{
+  return (__m256)__builtin_ia32_selectps_256(__M,
+                                             (__v8sf) _mm256_broadcastss_ps(__A),
+                                             (__v8sf) __O);
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS
+_mm256_maskz_broadcastss_ps (__mmask8 __M, __m128 __A)
+{
+  return (__m256)__builtin_ia32_selectps_256(__M,
+                                             (__v8sf) _mm256_broadcastss_ps(__A),
+                                             (__v8sf) _mm256_setzero_ps());
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_broadcastd_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
+{
+  return (__m128i)__builtin_ia32_selectd_128(__M,
+                                             (__v4si) _mm_broadcastd_epi32(__A),
+                                             (__v4si) __O);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_broadcastd_epi32 (__mmask8 __M, __m128i __A)
+{
+  return (__m128i)__builtin_ia32_selectd_128(__M,
+                                             (__v4si) _mm_broadcastd_epi32(__A),
+                                             (__v4si) _mm_setzero_si128());
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_broadcastd_epi32 (__m256i __O, __mmask8 __M, __m128i __A)
+{
+  return (__m256i)__builtin_ia32_selectd_256(__M,
+                                             (__v8si) _mm256_broadcastd_epi32(__A),
+                                             (__v8si) __O);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_broadcastd_epi32 (__mmask8 __M, __m128i __A)
+{
+  return (__m256i)__builtin_ia32_selectd_256(__M,
+                                             (__v8si) _mm256_broadcastd_epi32(__A),
+                                             (__v8si) _mm256_setzero_si256());
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_broadcastq_epi64 (__m128i __O, __mmask8 __M, __m128i __A)
+{
+  return (__m128i)__builtin_ia32_selectq_128(__M,
+                                             (__v2di) _mm_broadcastq_epi64(__A),
+                                             (__v2di) __O);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
+{
+  return (__m128i)__builtin_ia32_selectq_128(__M,
+                                             (__v2di) _mm_broadcastq_epi64(__A),
+                                             (__v2di) _mm_setzero_si128());
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_broadcastq_epi64 (__m256i __O, __mmask8 __M, __m128i __A)
+{
+  return (__m256i)__builtin_ia32_selectq_256(__M,
+                                             (__v4di) _mm256_broadcastq_epi64(__A),
+                                             (__v4di) __O);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
+{
+  return (__m256i)__builtin_ia32_selectq_256(__M,
+                                             (__v4di) _mm256_broadcastq_epi64(__A),
+                                             (__v4di) _mm256_setzero_si256());
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_cvtsepi32_epi8 (__m128i __A)
+{
+  return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A,
+               (__v16qi)_mm_undefined_si128(),
+               (__mmask8) -1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_cvtsepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
+{
+  return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A,
+               (__v16qi) __O, __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_cvtsepi32_epi8 (__mmask8 __M, __m128i __A)
+{
+  return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A,
+               (__v16qi) _mm_setzero_si128 (),
+               __M);
+}
+
+static __inline__ void __DEFAULT_FN_ATTRS
+_mm_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
+{
+  __builtin_ia32_pmovsdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm256_cvtsepi32_epi8 (__m256i __A)
+{
+  return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A,
+               (__v16qi)_mm_undefined_si128(),
+               (__mmask8) -1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm256_mask_cvtsepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
+{
+  return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A,
+               (__v16qi) __O, __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm256_maskz_cvtsepi32_epi8 (__mmask8 __M, __m256i __A)
+{
+  return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A,
+               (__v16qi) _mm_setzero_si128 (),
+               __M);
+}
+
+static __inline__ void __DEFAULT_FN_ATTRS
+_mm256_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
+{
+  __builtin_ia32_pmovsdb256mem_mask ((__v16qi *) __P, (__v8si) __A, __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_cvtsepi32_epi16 (__m128i __A)
+{
+  return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A,
+               (__v8hi)_mm_setzero_si128 (),
+               (__mmask8) -1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_cvtsepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
+{
+  return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A,
+               (__v8hi)__O,
+               __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_cvtsepi32_epi16 (__mmask8 __M, __m128i __A)
+{
+  return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A,
+               (__v8hi) _mm_setzero_si128 (),
+               __M);
+}
+
+static __inline__ void __DEFAULT_FN_ATTRS
+_mm_mask_cvtsepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
+{
+  __builtin_ia32_pmovsdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm256_cvtsepi32_epi16 (__m256i __A)
+{
+  return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A,
+               (__v8hi)_mm_undefined_si128(),
+               (__mmask8) -1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm256_mask_cvtsepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
+{
+  return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A,
+               (__v8hi) __O, __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm256_maskz_cvtsepi32_epi16 (__mmask8 __M, __m256i __A)
+{
+  return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A,
+               (__v8hi) _mm_setzero_si128 (),
+               __M);
+}
+
+static __inline__ void __DEFAULT_FN_ATTRS
+_mm256_mask_cvtsepi32_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
+{
+  __builtin_ia32_pmovsdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_cvtsepi64_epi8 (__m128i __A)
+{
+  return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A,
+               (__v16qi)_mm_undefined_si128(),
+               (__mmask8) -1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
+{
+  return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A,
+               (__v16qi) __O, __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_cvtsepi64_epi8 (__mmask8 __M, __m128i __A)
+{
+  return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A,
+               (__v16qi) _mm_setzero_si128 (),
+               __M);
+}
+
+static __inline__ void __DEFAULT_FN_ATTRS
+_mm_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
+{
+  __builtin_ia32_pmovsqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm256_cvtsepi64_epi8 (__m256i __A)
+{
+  return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A,
+               (__v16qi)_mm_undefined_si128(),
+               (__mmask8) -1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm256_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
+{
+  return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A,
+               (__v16qi) __O, __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm256_maskz_cvtsepi64_epi8 (__mmask8 __M, __m256i __A)
+{
+  return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A,
+               (__v16qi) _mm_setzero_si128 (),
+               __M);
+}
+
+static __inline__ void __DEFAULT_FN_ATTRS
+_mm256_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
+{
+  __builtin_ia32_pmovsqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_cvtsepi64_epi32 (__m128i __A)
+{
+  return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A,
+               (__v4si)_mm_undefined_si128(),
+               (__mmask8) -1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_cvtsepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
+{
+  return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A,
+               (__v4si) __O, __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_cvtsepi64_epi32 (__mmask8 __M, __m128i __A)
+{
+  return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A,
+               (__v4si) _mm_setzero_si128 (),
+               __M);
+}
+
+static __inline__ void __DEFAULT_FN_ATTRS
+_mm_mask_cvtsepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A)
+{
+  __builtin_ia32_pmovsqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm256_cvtsepi64_epi32 (__m256i __A)
+{
+  return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A,
+               (__v4si)_mm_undefined_si128(),
+               (__mmask8) -1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm256_mask_cvtsepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A)
+{
+  return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A,
+               (__v4si)__O,
+               __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm256_maskz_cvtsepi64_epi32 (__mmask8 __M, __m256i __A)
+{
+  return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A,
+               (__v4si) _mm_setzero_si128 (),
+               __M);
+}
+
+static __inline__ void __DEFAULT_FN_ATTRS
+_mm256_mask_cvtsepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A)
+{
+  __builtin_ia32_pmovsqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_cvtsepi64_epi16 (__m128i __A)
+{
+  return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A,
+               (__v8hi)_mm_undefined_si128(),
+               (__mmask8) -1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
+{
+  return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A,
+               (__v8hi) __O, __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_cvtsepi64_epi16 (__mmask8 __M, __m128i __A)
+{
+  return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A,
+               (__v8hi) _mm_setzero_si128 (),
+               __M);
+}
+
+static __inline__ void __DEFAULT_FN_ATTRS
+_mm_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
+{
+  __builtin_ia32_pmovsqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm256_cvtsepi64_epi16 (__m256i __A)
+{
+  return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A,
+               (__v8hi)_mm_undefined_si128(),
+               (__mmask8) -1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm256_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
+{
+  return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A,
+               (__v8hi) __O, __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm256_maskz_cvtsepi64_epi16 (__mmask8 __M, __m256i __A)
+{
+  return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A,
+               (__v8hi) _mm_setzero_si128 (),
+               __M);
+}
+
+static __inline__ void __DEFAULT_FN_ATTRS
+_mm256_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
+{
+  __builtin_ia32_pmovsqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_cvtusepi32_epi8 (__m128i __A)
+{
+  return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A,
+                (__v16qi)_mm_undefined_si128(),
+                (__mmask8) -1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_cvtusepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
+{
+  return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A,
+                (__v16qi) __O,
+                __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_cvtusepi32_epi8 (__mmask8 __M, __m128i __A)
+{
+  return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A,
+                (__v16qi) _mm_setzero_si128 (),
+                __M);
+}
+
+static __inline__ void __DEFAULT_FN_ATTRS
+_mm_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
+{
+  __builtin_ia32_pmovusdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm256_cvtusepi32_epi8 (__m256i __A)
+{
+  return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A,
+                (__v16qi)_mm_undefined_si128(),
+                (__mmask8) -1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm256_mask_cvtusepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
+{
+  return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A,
+                (__v16qi) __O,
+                __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm256_maskz_cvtusepi32_epi8 (__mmask8 __M, __m256i __A)
+{
+  return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A,
+                (__v16qi) _mm_setzero_si128 (),
+                __M);
+}
+
+static __inline__ void __DEFAULT_FN_ATTRS
+_mm256_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
+{
+  __builtin_ia32_pmovusdb256mem_mask ((__v16qi*) __P, (__v8si) __A, __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_cvtusepi32_epi16 (__m128i __A)
+{
+  return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A,
+                (__v8hi)_mm_undefined_si128(),
+                (__mmask8) -1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_cvtusepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
+{
+  return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A,
+                (__v8hi) __O, __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_cvtusepi32_epi16 (__mmask8 __M, __m128i __A)
+{
+  return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A,
+                (__v8hi) _mm_setzero_si128 (),
+                __M);
+}
+
+static __inline__ void __DEFAULT_FN_ATTRS
+_mm_mask_cvtusepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
+{
+  __builtin_ia32_pmovusdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm256_cvtusepi32_epi16 (__m256i __A)
+{
+  return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A,
+                (__v8hi) _mm_undefined_si128(),
+                (__mmask8) -1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm256_mask_cvtusepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
+{
+  return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A,
+                (__v8hi) __O, __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm256_maskz_cvtusepi32_epi16 (__mmask8 __M, __m256i __A)
+{
+  return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A,
+                (__v8hi) _mm_setzero_si128 (),
+                __M);
+}
+
+static __inline__ void __DEFAULT_FN_ATTRS
+_mm256_mask_cvtusepi32_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
+{
+  __builtin_ia32_pmovusdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_cvtusepi64_epi8 (__m128i __A)
+{
+  return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A,
+                (__v16qi)_mm_undefined_si128(),
+                (__mmask8) -1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
+{
+  return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A,
+                (__v16qi) __O,
+                __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_cvtusepi64_epi8 (__mmask8 __M, __m128i __A)
+{
+  return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A,
+                (__v16qi) _mm_setzero_si128 (),
+                __M);
+}
+
+static __inline__ void __DEFAULT_FN_ATTRS
+_mm_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
+{
+  __builtin_ia32_pmovusqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm256_cvtusepi64_epi8 (__m256i __A)
+{
+  return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A,
+                (__v16qi)_mm_undefined_si128(),
+                (__mmask8) -1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm256_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
+{
+  return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A,
+                (__v16qi) __O,
+                __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm256_maskz_cvtusepi64_epi8 (__mmask8 __M, __m256i __A)
+{
+  return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A,
+                (__v16qi) _mm_setzero_si128 (),
+                __M);
+}
+
+static __inline__ void __DEFAULT_FN_ATTRS
+_mm256_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
+{
+  __builtin_ia32_pmovusqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_cvtusepi64_epi32 (__m128i __A)
+{
+  return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A,
+                (__v4si)_mm_undefined_si128(),
+                (__mmask8) -1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_cvtusepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
+{
+  return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A,
+                (__v4si) __O, __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_cvtusepi64_epi32 (__mmask8 __M, __m128i __A)
+{
+  return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A,
+                (__v4si) _mm_setzero_si128 (),
+                __M);
+}
+
+static __inline__ void __DEFAULT_FN_ATTRS
+_mm_mask_cvtusepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A)
+{
+  __builtin_ia32_pmovusqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm256_cvtusepi64_epi32 (__m256i __A)
+{
+  return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A,
+                (__v4si)_mm_undefined_si128(),
+                (__mmask8) -1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm256_mask_cvtusepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A)
+{
+  return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A,
+                (__v4si) __O, __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm256_maskz_cvtusepi64_epi32 (__mmask8 __M, __m256i __A)
+{
+  return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A,
+                (__v4si) _mm_setzero_si128 (),
+                __M);
+}
+
+static __inline__ void __DEFAULT_FN_ATTRS
+_mm256_mask_cvtusepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A)
+{
+  __builtin_ia32_pmovusqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_cvtusepi64_epi16 (__m128i __A)
+{
+  return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A,
+                (__v8hi)_mm_undefined_si128(),
+                (__mmask8) -1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
+{
+  return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A,
+                (__v8hi) __O, __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_cvtusepi64_epi16 (__mmask8 __M, __m128i __A)
+{
+  return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A,
+                (__v8hi) _mm_setzero_si128 (),
+                __M);
+}
+
+static __inline__ void __DEFAULT_FN_ATTRS
+_mm_mask_cvtusepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
+{
+  __builtin_ia32_pmovusqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm256_cvtusepi64_epi16 (__m256i __A)
+{
+  return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A,
+                (__v8hi)_mm_undefined_si128(),
+                (__mmask8) -1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm256_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
+{
+  return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A,
+                (__v8hi) __O, __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm256_maskz_cvtusepi64_epi16 (__mmask8 __M, __m256i __A)
+{
+  return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A,
+                (__v8hi) _mm_setzero_si128 (),
+                __M);
+}
+
+static __inline__ void __DEFAULT_FN_ATTRS
+_mm256_mask_cvtusepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
+{
+  return __builtin_ia32_pmovusqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_cvtepi32_epi8 (__m128i __A)
+{
+  return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A,
+              (__v16qi)_mm_undefined_si128(),
+              (__mmask8) -1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_cvtepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
+{
+  return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A,
+              (__v16qi) __O, __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_cvtepi32_epi8 (__mmask8 __M, __m128i __A)
+{
+  return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A,
+              (__v16qi)
+              _mm_setzero_si128 (),
+              __M);
+}
+
+static __inline__ void __DEFAULT_FN_ATTRS
+_mm_mask_cvtepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
+{
+  __builtin_ia32_pmovdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm256_cvtepi32_epi8 (__m256i __A)
+{
+  return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A,
+              (__v16qi)_mm_undefined_si128(),
+              (__mmask8) -1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm256_mask_cvtepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
+{
+  return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A,
+              (__v16qi) __O, __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm256_maskz_cvtepi32_epi8 (__mmask8 __M, __m256i __A)
+{
+  return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A,
+              (__v16qi) _mm_setzero_si128 (),
+              __M);
+}
+
+static __inline__ void __DEFAULT_FN_ATTRS
+_mm256_mask_cvtepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
+{
+  __builtin_ia32_pmovdb256mem_mask ((__v16qi *) __P, (__v8si) __A, __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_cvtepi32_epi16 (__m128i __A)
+{
+  return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A,
+              (__v8hi) _mm_setzero_si128 (),
+              (__mmask8) -1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_cvtepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
+{
+  return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A,
+              (__v8hi) __O, __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_cvtepi32_epi16 (__mmask8 __M, __m128i __A)
+{
+  return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A,
+              (__v8hi) _mm_setzero_si128 (),
+              __M);
+}
+
+static __inline__ void __DEFAULT_FN_ATTRS
+_mm_mask_cvtepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
+{
+  __builtin_ia32_pmovdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm256_cvtepi32_epi16 (__m256i __A)
+{
+  return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A,
+              (__v8hi)_mm_setzero_si128 (),
+              (__mmask8) -1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm256_mask_cvtepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
+{
+  return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A,
+              (__v8hi) __O, __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm256_maskz_cvtepi32_epi16 (__mmask8 __M, __m256i __A)
+{
+  return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A,
+              (__v8hi) _mm_setzero_si128 (),
+              __M);
+}
+
+static __inline__ void __DEFAULT_FN_ATTRS
+_mm256_mask_cvtepi32_storeu_epi16 (void *  __P, __mmask8 __M, __m256i __A)
+{
+  __builtin_ia32_pmovdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_cvtepi64_epi8 (__m128i __A)
+{
+  return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A,
+              (__v16qi) _mm_undefined_si128(),
+              (__mmask8) -1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
+{
+  return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A,
+              (__v16qi) __O, __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_cvtepi64_epi8 (__mmask8 __M, __m128i __A)
+{
+  return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A,
+              (__v16qi) _mm_setzero_si128 (),
+              __M);
+}
+
+static __inline__ void __DEFAULT_FN_ATTRS
+_mm_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
+{
+  __builtin_ia32_pmovqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm256_cvtepi64_epi8 (__m256i __A)
+{
+  return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A,
+              (__v16qi) _mm_undefined_si128(),
+              (__mmask8) -1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm256_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
+{
+  return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A,
+              (__v16qi) __O, __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm256_maskz_cvtepi64_epi8 (__mmask8 __M, __m256i __A)
+{
+  return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A,
+              (__v16qi) _mm_setzero_si128 (),
+              __M);
+}
+
+static __inline__ void __DEFAULT_FN_ATTRS
+_mm256_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
+{
+  __builtin_ia32_pmovqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_cvtepi64_epi32 (__m128i __A)
+{
+  return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A,
+              (__v4si)_mm_undefined_si128(),
+              (__mmask8) -1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_cvtepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
+{
+  return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A,
+              (__v4si) __O, __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_cvtepi64_epi32 (__mmask8 __M, __m128i __A)
+{
+  return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A,
+              (__v4si) _mm_setzero_si128 (),
+              __M);
+}
+
+static __inline__ void __DEFAULT_FN_ATTRS
+_mm_mask_cvtepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A)
+{
+  __builtin_ia32_pmovqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm256_cvtepi64_epi32 (__m256i __A)
+{
+  return (__m128i) __builtin_ia32_pmovqd256_mask ((__v4di) __A,
+              (__v4si) _mm_undefined_si128(),
+              (__mmask8) -1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm256_mask_cvtepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A)
+{
+  return (__m128i) __builtin_ia32_pmovqd256_mask ((__v4di) __A,
+              (__v4si) __O, __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm256_maskz_cvtepi64_epi32 (__mmask8 __M, __m256i __A)
+{
+  return (__m128i) __builtin_ia32_pmovqd256_mask ((__v4di) __A,
+              (__v4si) _mm_setzero_si128 (),
+              __M);
+}
+
+static __inline__ void __DEFAULT_FN_ATTRS
+_mm256_mask_cvtepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A)
+{
+  __builtin_ia32_pmovqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_cvtepi64_epi16 (__m128i __A)
+{
+  return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A,
+              (__v8hi) _mm_undefined_si128(),
+              (__mmask8) -1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
+{
+  return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A,
+              (__v8hi)__O,
+              __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_cvtepi64_epi16 (__mmask8 __M, __m128i __A)
+{
+  return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A,
+              (__v8hi) _mm_setzero_si128 (),
+              __M);
+}
+
+static __inline__ void __DEFAULT_FN_ATTRS
+_mm_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
+{
+  __builtin_ia32_pmovqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm256_cvtepi64_epi16 (__m256i __A)
+{
+  return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A,
+              (__v8hi)_mm_undefined_si128(),
+              (__mmask8) -1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm256_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
+{
+  return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A,
+              (__v8hi) __O, __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm256_maskz_cvtepi64_epi16 (__mmask8 __M, __m256i __A)
+{
+  return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A,
+              (__v8hi) _mm_setzero_si128 (),
+              __M);
+}
+
+static __inline__ void __DEFAULT_FN_ATTRS
+_mm256_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
+{
+  __builtin_ia32_pmovqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M);
+}
+
+#define _mm256_extractf32x4_ps(A, imm) __extension__ ({ \
+  (__m128)__builtin_ia32_extractf32x4_256_mask((__v8sf)(__m256)(A), \
+                                               (int)(imm), \
+                                               (__v4sf)_mm_setzero_ps(), \
+                                               (__mmask8)-1); })
+
+#define _mm256_mask_extractf32x4_ps(W, U, A, imm) __extension__ ({ \
+  (__m128)__builtin_ia32_extractf32x4_256_mask((__v8sf)(__m256)(A), \
+                                               (int)(imm), \
+                                               (__v4sf)(__m128)(W), \
+                                               (__mmask8)(U)); })
+
+#define _mm256_maskz_extractf32x4_ps(U, A, imm) __extension__ ({ \
+  (__m128)__builtin_ia32_extractf32x4_256_mask((__v8sf)(__m256)(A), \
+                                               (int)(imm), \
+                                               (__v4sf)_mm_setzero_ps(), \
+                                               (__mmask8)(U)); })
+
+#define _mm256_extracti32x4_epi32(A, imm) __extension__ ({ \
+  (__m128i)__builtin_ia32_extracti32x4_256_mask((__v8si)(__m256i)(A), \
+                                                (int)(imm), \
+                                                (__v4si)_mm_setzero_si128(), \
+                                                (__mmask8)-1); })
+
+#define _mm256_mask_extracti32x4_epi32(W, U, A, imm) __extension__ ({ \
+  (__m128i)__builtin_ia32_extracti32x4_256_mask((__v8si)(__m256i)(A), \
+                                                (int)(imm), \
+                                                (__v4si)(__m128i)(W), \
+                                                (__mmask8)(U)); })
+
+#define _mm256_maskz_extracti32x4_epi32(U, A, imm) __extension__ ({ \
+  (__m128i)__builtin_ia32_extracti32x4_256_mask((__v8si)(__m256i)(A), \
+                                                (int)(imm), \
+                                                (__v4si)_mm_setzero_si128(), \
+                                                (__mmask8)(U)); })
+
+#define _mm256_insertf32x4(A, B, imm) __extension__ ({ \
+  (__m256)__builtin_ia32_insertf32x4_256_mask((__v8sf)(__m256)(A), \
+                                              (__v4sf)(__m128)(B), (int)(imm), \
+                                              (__v8sf)_mm256_setzero_ps(), \
+                                              (__mmask8)-1); })
+
+#define _mm256_mask_insertf32x4(W, U, A, B, imm) __extension__ ({ \
+  (__m256)__builtin_ia32_insertf32x4_256_mask((__v8sf)(__m256)(A), \
+                                              (__v4sf)(__m128)(B), (int)(imm), \
+                                              (__v8sf)(__m256)(W), \
+                                              (__mmask8)(U)); })
+
+#define _mm256_maskz_insertf32x4(U, A, B, imm) __extension__ ({ \
+  (__m256)__builtin_ia32_insertf32x4_256_mask((__v8sf)(__m256)(A), \
+                                              (__v4sf)(__m128)(B), (int)(imm), \
+                                              (__v8sf)_mm256_setzero_ps(), \
+                                              (__mmask8)(U)); })
+
+#define _mm256_inserti32x4(A, B, imm) __extension__ ({ \
+  (__m256i)__builtin_ia32_inserti32x4_256_mask((__v8si)(__m256i)(A), \
+                                               (__v4si)(__m128i)(B), \
+                                               (int)(imm), \
+                                               (__v8si)_mm256_setzero_si256(), \
+                                               (__mmask8)-1); })
+
+#define _mm256_mask_inserti32x4(W, U, A, B, imm) __extension__ ({ \
+  (__m256i)__builtin_ia32_inserti32x4_256_mask((__v8si)(__m256i)(A), \
+                                               (__v4si)(__m128i)(B), \
+                                               (int)(imm), \
+                                               (__v8si)(__m256i)(W), \
+                                               (__mmask8)(U)); })
+
+#define _mm256_maskz_inserti32x4(U, A, B, imm) __extension__ ({ \
+  (__m256i)__builtin_ia32_inserti32x4_256_mask((__v8si)(__m256i)(A), \
+                                               (__v4si)(__m128i)(B), \
+                                               (int)(imm), \
+                                               (__v8si)_mm256_setzero_si256(), \
+                                               (__mmask8)(U)); })
+
+#define _mm_getmant_pd(A, B, C) __extension__({\
+  (__m128d)__builtin_ia32_getmantpd128_mask((__v2df)(__m128d)(A), \
+                                            (int)(((C)<<2) | (B)), \
+                                            (__v2df)_mm_setzero_pd(), \
+                                            (__mmask8)-1); })
+
+#define _mm_mask_getmant_pd(W, U, A, B, C) __extension__({\
+  (__m128d)__builtin_ia32_getmantpd128_mask((__v2df)(__m128d)(A), \
+                                            (int)(((C)<<2) | (B)), \
+                                            (__v2df)(__m128d)(W), \
+                                            (__mmask8)(U)); })
+
+#define _mm_maskz_getmant_pd(U, A, B, C) __extension__({\
+  (__m128d)__builtin_ia32_getmantpd128_mask((__v2df)(__m128d)(A), \
+                                            (int)(((C)<<2) | (B)), \
+                                            (__v2df)_mm_setzero_pd(), \
+                                            (__mmask8)(U)); })
+
+#define _mm256_getmant_pd(A, B, C) __extension__ ({ \
+  (__m256d)__builtin_ia32_getmantpd256_mask((__v4df)(__m256d)(A), \
+                                            (int)(((C)<<2) | (B)), \
+                                            (__v4df)_mm256_setzero_pd(), \
+                                            (__mmask8)-1); })
+
+#define _mm256_mask_getmant_pd(W, U, A, B, C) __extension__ ({ \
+  (__m256d)__builtin_ia32_getmantpd256_mask((__v4df)(__m256d)(A), \
+                                            (int)(((C)<<2) | (B)), \
+                                            (__v4df)(__m256d)(W), \
+                                            (__mmask8)(U)); })
+
+#define _mm256_maskz_getmant_pd(U, A, B, C) __extension__ ({ \
+  (__m256d)__builtin_ia32_getmantpd256_mask((__v4df)(__m256d)(A), \
+                                            (int)(((C)<<2) | (B)), \
+                                            (__v4df)_mm256_setzero_pd(), \
+                                            (__mmask8)(U)); })
+
+#define _mm_getmant_ps(A, B, C) __extension__ ({ \
+  (__m128)__builtin_ia32_getmantps128_mask((__v4sf)(__m128)(A), \
+                                           (int)(((C)<<2) | (B)), \
+                                           (__v4sf)_mm_setzero_ps(), \
+                                           (__mmask8)-1); })
+
+#define _mm_mask_getmant_ps(W, U, A, B, C) __extension__ ({ \
+  (__m128)__builtin_ia32_getmantps128_mask((__v4sf)(__m128)(A), \
+                                           (int)(((C)<<2) | (B)), \
+                                           (__v4sf)(__m128)(W), \
+                                           (__mmask8)(U)); })
+
+#define _mm_maskz_getmant_ps(U, A, B, C) __extension__ ({ \
+  (__m128)__builtin_ia32_getmantps128_mask((__v4sf)(__m128)(A), \
+                                           (int)(((C)<<2) | (B)), \
+                                           (__v4sf)_mm_setzero_ps(), \
+                                           (__mmask8)(U)); })
+
+#define _mm256_getmant_ps(A, B, C) __extension__ ({ \
+  (__m256)__builtin_ia32_getmantps256_mask((__v8sf)(__m256)(A), \
+                                           (int)(((C)<<2) | (B)), \
+                                           (__v8sf)_mm256_setzero_ps(), \
+                                           (__mmask8)-1); })
+
+#define _mm256_mask_getmant_ps(W, U, A, B, C) __extension__ ({ \
+  (__m256)__builtin_ia32_getmantps256_mask((__v8sf)(__m256)(A), \
+                                           (int)(((C)<<2) | (B)), \
+                                           (__v8sf)(__m256)(W), \
+                                           (__mmask8)(U)); })
+
+#define _mm256_maskz_getmant_ps(U, A, B, C) __extension__ ({ \
+  (__m256)__builtin_ia32_getmantps256_mask((__v8sf)(__m256)(A), \
+                                           (int)(((C)<<2) | (B)), \
+                                           (__v8sf)_mm256_setzero_ps(), \
+                                           (__mmask8)(U)); })
+
+#define _mm_mmask_i64gather_pd(v1_old, mask, index, addr, scale) __extension__ ({\
+  (__m128d)__builtin_ia32_gather3div2df((__v2df)(__m128d)(v1_old), \
+                                        (double const *)(addr), \
+                                        (__v2di)(__m128i)(index), \
+                                        (__mmask8)(mask), (int)(scale)); })
+
+#define _mm_mmask_i64gather_epi64(v1_old, mask, index, addr, scale) __extension__ ({\
+  (__m128i)__builtin_ia32_gather3div2di((__v2di)(__m128i)(v1_old), \
+                                        (long long const *)(addr), \
+                                        (__v2di)(__m128i)(index), \
+                                        (__mmask8)(mask), (int)(scale)); })
+
+#define _mm256_mmask_i64gather_pd(v1_old, mask, index, addr, scale) __extension__ ({\
+  (__m256d)__builtin_ia32_gather3div4df((__v4df)(__m256d)(v1_old), \
+                                        (double const *)(addr), \
+                                        (__v4di)(__m256i)(index), \
+                                        (__mmask8)(mask), (int)(scale)); })
+
+#define _mm256_mmask_i64gather_epi64(v1_old, mask, index, addr, scale) __extension__ ({\
+  (__m256i)__builtin_ia32_gather3div4di((__v4di)(__m256i)(v1_old), \
+                                        (long long const *)(addr), \
+                                        (__v4di)(__m256i)(index), \
+                                        (__mmask8)(mask), (int)(scale)); })
+
+#define _mm_mmask_i64gather_ps(v1_old, mask, index, addr, scale) __extension__ ({\
+  (__m128)__builtin_ia32_gather3div4sf((__v4sf)(__m128)(v1_old), \
+                                       (float const *)(addr), \
+                                       (__v2di)(__m128i)(index), \
+                                       (__mmask8)(mask), (int)(scale)); })
+
+#define _mm_mmask_i64gather_epi32(v1_old, mask, index, addr, scale) __extension__ ({\
+  (__m128i)__builtin_ia32_gather3div4si((__v4si)(__m128i)(v1_old), \
+                                        (int const *)(addr), \
+                                        (__v2di)(__m128i)(index), \
+                                        (__mmask8)(mask), (int)(scale)); })
+
+#define _mm256_mmask_i64gather_ps(v1_old, mask, index, addr, scale) __extension__ ({\
+  (__m128)__builtin_ia32_gather3div8sf((__v4sf)(__m128)(v1_old), \
+                                       (float const *)(addr), \
+                                       (__v4di)(__m256i)(index), \
+                                       (__mmask8)(mask), (int)(scale)); })
+
+#define _mm256_mmask_i64gather_epi32(v1_old, mask, index, addr, scale) __extension__ ({\
+  (__m128i)__builtin_ia32_gather3div8si((__v4si)(__m128i)(v1_old), \
+                                        (int const *)(addr), \
+                                        (__v4di)(__m256i)(index), \
+                                        (__mmask8)(mask), (int)(scale)); })
+
+#define _mm_mmask_i32gather_pd(v1_old, mask, index, addr, scale) __extension__ ({\
+  (__m128d)__builtin_ia32_gather3siv2df((__v2df)(__m128d)(v1_old), \
+                                        (double const *)(addr), \
+                                        (__v4si)(__m128i)(index), \
+                                        (__mmask8)(mask), (int)(scale)); })
+
+#define _mm_mmask_i32gather_epi64(v1_old, mask, index, addr, scale) __extension__ ({\
+  (__m128i)__builtin_ia32_gather3siv2di((__v2di)(__m128i)(v1_old), \
+                                        (long long const *)(addr), \
+                                        (__v4si)(__m128i)(index), \
+                                        (__mmask8)(mask), (int)(scale)); })
+
+#define _mm256_mmask_i32gather_pd(v1_old, mask, index, addr, scale) __extension__ ({\
+  (__m256d)__builtin_ia32_gather3siv4df((__v4df)(__m256d)(v1_old), \
+                                        (double const *)(addr), \
+                                        (__v4si)(__m128i)(index), \
+                                        (__mmask8)(mask), (int)(scale)); })
+
+#define _mm256_mmask_i32gather_epi64(v1_old, mask, index, addr, scale) __extension__ ({\
+  (__m256i)__builtin_ia32_gather3siv4di((__v4di)(__m256i)(v1_old), \
+                                        (long long const *)(addr), \
+                                        (__v4si)(__m128i)(index), \
+                                        (__mmask8)(mask), (int)(scale)); })
+
+#define _mm_mmask_i32gather_ps(v1_old, mask, index, addr, scale) __extension__ ({\
+  (__m128)__builtin_ia32_gather3siv4sf((__v4sf)(__m128)(v1_old), \
+                                       (float const *)(addr), \
+                                       (__v4si)(__m128i)(index), \
+                                       (__mmask8)(mask), (int)(scale)); })
+
+#define _mm_mmask_i32gather_epi32(v1_old, mask, index, addr, scale) __extension__ ({\
+  (__m128i)__builtin_ia32_gather3siv4si((__v4si)(__m128i)(v1_old), \
+                                        (int const *)(addr), \
+                                        (__v4si)(__m128i)(index), \
+                                        (__mmask8)(mask), (int)(scale)); })
+
+#define _mm256_mmask_i32gather_ps(v1_old, mask, index, addr, scale) __extension__ ({\
+  (__m256)__builtin_ia32_gather3siv8sf((__v8sf)(__m256)(v1_old), \
+                                       (float const *)(addr), \
+                                       (__v8si)(__m256i)(index), \
+                                       (__mmask8)(mask), (int)(scale)); })
+
+#define _mm256_mmask_i32gather_epi32(v1_old, mask, index, addr, scale) __extension__ ({\
+  (__m256i)__builtin_ia32_gather3siv8si((__v8si)(__m256i)(v1_old), \
+                                        (int const *)(addr), \
+                                        (__v8si)(__m256i)(index), \
+                                        (__mmask8)(mask), (int)(scale)); })
+
+#define _mm256_permutex_pd(X, C) __extension__ ({ \
+  (__m256d)__builtin_shufflevector((__v4df)(__m256d)(X), \
+                                   (__v4df)_mm256_undefined_pd(), \
+                                   ((C) >> 0) & 0x3, ((C) >> 2) & 0x3, \
+                                   ((C) >> 4) & 0x3, ((C) >> 6) & 0x3); })
+
+#define _mm256_mask_permutex_pd(W, U, X, C) __extension__ ({ \
+  (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
+                                       (__v4df)_mm256_permutex_pd((X), (C)), \
+                                       (__v4df)(__m256d)(W)); })
+
+#define _mm256_maskz_permutex_pd(U, X, C) __extension__ ({ \
+  (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
+                                       (__v4df)_mm256_permutex_pd((X), (C)), \
+                                       (__v4df)_mm256_setzero_pd()); })
+
+#define _mm256_permutex_epi64(X, C) __extension__ ({ \
+  (__m256i)__builtin_shufflevector((__v4di)(__m256i)(X), \
+                                   (__v4di)_mm256_undefined_si256(), \
+                                   ((C) >> 0) & 0x3, ((C) >> 2) & 0x3, \
+                                   ((C) >> 4) & 0x3, ((C) >> 6) & 0x3); })
+
+#define _mm256_mask_permutex_epi64(W, U, X, C) __extension__ ({ \
+  (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
+                                      (__v4di)_mm256_permutex_epi64((X), (C)), \
+                                      (__v4di)(__m256i)(W)); })
+
+#define _mm256_maskz_permutex_epi64(U, X, C) __extension__ ({ \
+  (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
+                                      (__v4di)_mm256_permutex_epi64((X), (C)), \
+                                      (__v4di)_mm256_setzero_si256()); })
+
+static __inline__ __m256d __DEFAULT_FN_ATTRS
+_mm256_permutexvar_pd (__m256i __X, __m256d __Y)
+{
+  return (__m256d) __builtin_ia32_permvardf256_mask ((__v4df) __Y,
+                 (__v4di) __X,
+                 (__v4df) _mm256_undefined_si256 (),
+                 (__mmask8) -1);
+}
+
+static __inline__ __m256d __DEFAULT_FN_ATTRS
+_mm256_mask_permutexvar_pd (__m256d __W, __mmask8 __U, __m256i __X,
+          __m256d __Y)
+{
+  return (__m256d) __builtin_ia32_permvardf256_mask ((__v4df) __Y,
+                 (__v4di) __X,
+                 (__v4df) __W,
+                 (__mmask8) __U);
+}
+
+static __inline__ __m256d __DEFAULT_FN_ATTRS
+_mm256_maskz_permutexvar_pd (__mmask8 __U, __m256i __X, __m256d __Y)
+{
+  return (__m256d) __builtin_ia32_permvardf256_mask ((__v4df) __Y,
+                 (__v4di) __X,
+                 (__v4df) _mm256_setzero_pd (),
+                 (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_permutexvar_epi64 (__mmask8 __M, __m256i __X, __m256i __Y)
+{
+  return (__m256i) __builtin_ia32_permvardi256_mask ((__v4di) __Y,
+                 (__v4di) __X,
+                 (__v4di) _mm256_setzero_si256 (),
+                 (__mmask8) __M);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_permutexvar_epi64 ( __m256i __X, __m256i __Y)
+{
+  return (__m256i) __builtin_ia32_permvardi256_mask ((__v4di) __Y,
+                 (__v4di) __X,
+                 (__v4di) _mm256_undefined_si256 (),
+                 (__mmask8) -1);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_permutexvar_epi64 (__m256i __W, __mmask8 __M, __m256i __X,
+             __m256i __Y)
+{
+  return (__m256i) __builtin_ia32_permvardi256_mask ((__v4di) __Y,
+                 (__v4di) __X,
+                 (__v4di) __W,
+                 __M);
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS
+_mm256_mask_permutexvar_ps (__m256 __W, __mmask8 __U, __m256i __X,
+          __m256 __Y)
+{
+  return (__m256) __builtin_ia32_permvarsf256_mask ((__v8sf) __Y,
+                (__v8si) __X,
+                (__v8sf) __W,
+                (__mmask8) __U);
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS
+_mm256_maskz_permutexvar_ps (__mmask8 __U, __m256i __X, __m256 __Y)
+{
+  return (__m256) __builtin_ia32_permvarsf256_mask ((__v8sf) __Y,
+                (__v8si) __X,
+                (__v8sf) _mm256_setzero_ps (),
+                (__mmask8) __U);
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS
+_mm256_permutexvar_ps (__m256i __X, __m256 __Y)
+{
+  return (__m256) __builtin_ia32_permvarsf256_mask ((__v8sf) __Y,
+                (__v8si) __X,
+                (__v8sf) _mm256_undefined_si256 (),
+                (__mmask8) -1);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_permutexvar_epi32 (__mmask8 __M, __m256i __X, __m256i __Y)
+{
+  return (__m256i) __builtin_ia32_permvarsi256_mask ((__v8si) __Y,
+                 (__v8si) __X,
+                 (__v8si) _mm256_setzero_si256 (),
+                 __M);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_permutexvar_epi32 (__m256i __W, __mmask8 __M, __m256i __X,
+             __m256i __Y)
+{
+  return (__m256i) __builtin_ia32_permvarsi256_mask ((__v8si) __Y,
+                 (__v8si) __X,
+                 (__v8si) __W,
+                 (__mmask8) __M);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_permutexvar_epi32 (__m256i __X, __m256i __Y)
+{
+  return (__m256i) __builtin_ia32_permvarsi256_mask ((__v8si) __Y,
+                 (__v8si) __X,
+                 (__v8si) _mm256_undefined_si256(),
+                 (__mmask8) -1);
+}
+
+#define _mm_alignr_epi32(A, B, imm) __extension__ ({ \
+  (__m128i)__builtin_ia32_alignd128_mask((__v4si)(__m128i)(A), \
+                                         (__v4si)(__m128i)(B), (int)(imm), \
+                                         (__v4si)_mm_undefined_si128(), \
+                                         (__mmask8)-1); })
+
+#define _mm_mask_alignr_epi32(W, U, A, B, imm) __extension__ ({ \
+  (__m128i)__builtin_ia32_alignd128_mask((__v4si)(__m128i)(A), \
+                                         (__v4si)(__m128i)(B), (int)(imm), \
+                                         (__v4si)(__m128i)(W), \
+                                         (__mmask8)(U)); })
+
+#define _mm_maskz_alignr_epi32(U, A, B, imm) __extension__ ({ \
+  (__m128i)__builtin_ia32_alignd128_mask((__v4si)(__m128i)(A), \
+                                         (__v4si)(__m128i)(B), (int)(imm), \
+                                         (__v4si)_mm_setzero_si128(), \
+                                         (__mmask8)(U)); })
+
+#define _mm256_alignr_epi32(A, B, imm) __extension__ ({ \
+  (__m256i)__builtin_ia32_alignd256_mask((__v8si)(__m256i)(A), \
+                                         (__v8si)(__m256i)(B), (int)(imm), \
+                                         (__v8si)_mm256_undefined_si256(), \
+                                         (__mmask8)-1); })
+
+#define _mm256_mask_alignr_epi32(W, U, A, B, imm) __extension__ ({ \
+  (__m256i)__builtin_ia32_alignd256_mask((__v8si)(__m256i)(A), \
+                                         (__v8si)(__m256i)(B), (int)(imm), \
+                                         (__v8si)(__m256i)(W), \
+                                         (__mmask8)(U)); })
+
+#define _mm256_maskz_alignr_epi32(U, A, B, imm) __extension__ ({ \
+  (__m256i)__builtin_ia32_alignd256_mask((__v8si)(__m256i)(A), \
+                                         (__v8si)(__m256i)(B), (int)(imm), \
+                                         (__v8si)_mm256_setzero_si256(), \
+                                         (__mmask8)(U)); })
+
+#define _mm_alignr_epi64(A, B, imm) __extension__ ({ \
+  (__m128i)__builtin_ia32_alignq128_mask((__v2di)(__m128i)(A), \
+                                         (__v2di)(__m128i)(B), (int)(imm), \
+                                         (__v2di)_mm_setzero_di(), \
+                                         (__mmask8)-1); })
+
+#define _mm_mask_alignr_epi64(W, U, A, B, imm) __extension__ ({ \
+  (__m128i)__builtin_ia32_alignq128_mask((__v2di)(__m128i)(A), \
+                                         (__v2di)(__m128i)(B), (int)(imm), \
+                                         (__v2di)(__m128i)(W), \
+                                         (__mmask8)(U)); })
+
+#define _mm_maskz_alignr_epi64(U, A, B, imm) __extension__ ({ \
+  (__m128i)__builtin_ia32_alignq128_mask((__v2di)(__m128i)(A), \
+                                         (__v2di)(__m128i)(B), (int)(imm), \
+                                         (__v2di)_mm_setzero_di(), \
+                                         (__mmask8)(U)); })
+
+#define _mm256_alignr_epi64(A, B, imm) __extension__ ({ \
+  (__m256i)__builtin_ia32_alignq256_mask((__v4di)(__m256i)(A), \
+                                         (__v4di)(__m256i)(B), (int)(imm), \
+                                         (__v4di)_mm256_undefined_pd(), \
+                                         (__mmask8)-1); })
+
+#define _mm256_mask_alignr_epi64(W, U, A, B, imm) __extension__ ({ \
+  (__m256i)__builtin_ia32_alignq256_mask((__v4di)(__m256i)(A), \
+                                         (__v4di)(__m256i)(B), (int)(imm), \
+                                         (__v4di)(__m256i)(W), \
+                                         (__mmask8)(U)); })
+
+#define _mm256_maskz_alignr_epi64(U, A, B, imm) __extension__ ({ \
+  (__m256i)__builtin_ia32_alignq256_mask((__v4di)(__m256i)(A), \
+                                         (__v4di)(__m256i)(B), (int)(imm), \
+                                         (__v4di)_mm256_setzero_si256(), \
+                                         (__mmask8)(U)); })
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_mask_movehdup_ps (__m128 __W, __mmask8 __U, __m128 __A)
+{
+  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
+                                             (__v4sf)_mm_movehdup_ps(__A),
+                                             (__v4sf)__W);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_maskz_movehdup_ps (__mmask8 __U, __m128 __A)
+{
+  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
+                                             (__v4sf)_mm_movehdup_ps(__A),
+                                             (__v4sf)_mm_setzero_ps());
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS
+_mm256_mask_movehdup_ps (__m256 __W, __mmask8 __U, __m256 __A)
+{
+  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
+                                             (__v8sf)_mm256_movehdup_ps(__A),
+                                             (__v8sf)__W);
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS
+_mm256_maskz_movehdup_ps (__mmask8 __U, __m256 __A)
+{
+  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
+                                             (__v8sf)_mm256_movehdup_ps(__A),
+                                             (__v8sf)_mm256_setzero_ps());
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_mask_moveldup_ps (__m128 __W, __mmask8 __U, __m128 __A)
+{
+  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
+                                             (__v4sf)_mm_moveldup_ps(__A),
+                                             (__v4sf)__W);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_maskz_moveldup_ps (__mmask8 __U, __m128 __A)
+{
+  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
+                                             (__v4sf)_mm_moveldup_ps(__A),
+                                             (__v4sf)_mm_setzero_ps());
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS
+_mm256_mask_moveldup_ps (__m256 __W, __mmask8 __U, __m256 __A)
+{
+  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
+                                             (__v8sf)_mm256_moveldup_ps(__A),
+                                             (__v8sf)__W);
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS
+_mm256_maskz_moveldup_ps (__mmask8 __U, __m256 __A)
+{
+  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
+                                             (__v8sf)_mm256_moveldup_ps(__A),
+                                             (__v8sf)_mm256_setzero_ps());
+}
+
+#define _mm256_mask_shuffle_epi32(W, U, A, I) __extension__({\
+  (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
+                                      (__v8si)_mm256_shuffle_epi32((A), (I)), \
+                                      (__v8si)(__m256i)(W)); })
+
+#define _mm256_maskz_shuffle_epi32(U, A, I) __extension__({\
+  (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
+                                      (__v8si)_mm256_shuffle_epi32((A), (I)), \
+                                      (__v8si)_mm256_setzero_si256()); })
+
+#define _mm_mask_shuffle_epi32(W, U, A, I) __extension__({\
+  (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
+                                      (__v4si)_mm_shuffle_epi32((A), (I)), \
+                                      (__v4si)(__m128i)(W)); })
+
+#define _mm_maskz_shuffle_epi32(U, A, I) __extension__({\
+  (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
+                                      (__v4si)_mm_shuffle_epi32((A), (I)), \
+                                      (__v4si)_mm_setzero_si128()); })
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_mask_mov_pd (__m128d __W, __mmask8 __U, __m128d __A)
+{
+  return (__m128d) __builtin_ia32_selectpd_128 ((__mmask8) __U,
+              (__v2df) __A,
+              (__v2df) __W);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_maskz_mov_pd (__mmask8 __U, __m128d __A)
+{
+  return (__m128d) __builtin_ia32_selectpd_128 ((__mmask8) __U,
+              (__v2df) __A,
+              (__v2df) _mm_setzero_pd ());
+}
+
+static __inline__ __m256d __DEFAULT_FN_ATTRS
+_mm256_mask_mov_pd (__m256d __W, __mmask8 __U, __m256d __A)
+{
+  return (__m256d) __builtin_ia32_selectpd_256 ((__mmask8) __U,
+              (__v4df) __A,
+              (__v4df) __W);
+}
+
+static __inline__ __m256d __DEFAULT_FN_ATTRS
+_mm256_maskz_mov_pd (__mmask8 __U, __m256d __A)
+{
+  return (__m256d) __builtin_ia32_selectpd_256 ((__mmask8) __U,
+              (__v4df) __A,
+              (__v4df) _mm256_setzero_pd ());
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_mask_mov_ps (__m128 __W, __mmask8 __U, __m128 __A)
+{
+  return (__m128) __builtin_ia32_selectps_128 ((__mmask8) __U,
+             (__v4sf) __A,
+             (__v4sf) __W);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_maskz_mov_ps (__mmask8 __U, __m128 __A)
+{
+  return (__m128) __builtin_ia32_selectps_128 ((__mmask8) __U,
+             (__v4sf) __A,
+             (__v4sf) _mm_setzero_ps ());
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS
+_mm256_mask_mov_ps (__m256 __W, __mmask8 __U, __m256 __A)
+{
+  return (__m256) __builtin_ia32_selectps_256 ((__mmask8) __U,
+             (__v8sf) __A,
+             (__v8sf) __W);
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS
+_mm256_maskz_mov_ps (__mmask8 __U, __m256 __A)
+{
+  return (__m256) __builtin_ia32_selectps_256 ((__mmask8) __U,
+             (__v8sf) __A,
+             (__v8sf) _mm256_setzero_ps ());
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_mask_cvtph_ps (__m128 __W, __mmask8 __U, __m128i __A)
+{
+  return (__m128) __builtin_ia32_vcvtph2ps_mask ((__v8hi) __A,
+             (__v4sf) __W,
+             (__mmask8) __U);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_maskz_cvtph_ps (__mmask8 __U, __m128i __A)
+{
+  return (__m128) __builtin_ia32_vcvtph2ps_mask ((__v8hi) __A,
+             (__v4sf)
+             _mm_setzero_ps (),
+             (__mmask8) __U);
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS
+_mm256_mask_cvtph_ps (__m256 __W, __mmask8 __U, __m128i __A)
+{
+  return (__m256) __builtin_ia32_vcvtph2ps256_mask ((__v8hi) __A,
+                (__v8sf) __W,
+                (__mmask8) __U);
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS
+_mm256_maskz_cvtph_ps (__mmask8 __U, __m128i __A)
+{
+  return (__m256) __builtin_ia32_vcvtph2ps256_mask ((__v8hi) __A,
+                (__v8sf)
+                _mm256_setzero_ps (),
+                (__mmask8) __U);
+}
+
+static __inline __m128i __DEFAULT_FN_ATTRS
+_mm_mask_cvtps_ph (__m128i __W, __mmask8 __U, __m128 __A)
+{
+  return (__m128i) __builtin_ia32_vcvtps2ph_mask ((__v4sf) __A, _MM_FROUND_CUR_DIRECTION,
+                                                  (__v8hi) __W,
+                                                  (__mmask8) __U);
+}
+
+static __inline __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_cvtps_ph (__mmask8 __U, __m128 __A)
+{
+  return (__m128i) __builtin_ia32_vcvtps2ph_mask ((__v4sf) __A, _MM_FROUND_CUR_DIRECTION,
+                                                  (__v8hi) _mm_setzero_si128 (),
+                                                  (__mmask8) __U);
+}
+
+#define _mm_mask_cvt_roundps_ph(W, U, A, I) __extension__ ({ \
+  (__m128i)__builtin_ia32_vcvtps2ph_mask((__v4sf)(__m128)(A), (int)(I), \
+                                         (__v8hi)(__m128i)(W), \
+                                         (__mmask8)(U)); })
+
+#define _mm_maskz_cvt_roundps_ph(U, A, I) __extension__ ({ \
+  (__m128i)__builtin_ia32_vcvtps2ph_mask((__v4sf)(__m128)(A), (int)(I), \
+                                         (__v8hi)_mm_setzero_si128(), \
+                                         (__mmask8)(U)); })
+
+static __inline __m128i __DEFAULT_FN_ATTRS
+_mm256_mask_cvtps_ph (__m128i __W, __mmask8 __U, __m256 __A)
+{
+  return (__m128i) __builtin_ia32_vcvtps2ph256_mask ((__v8sf) __A, _MM_FROUND_CUR_DIRECTION,
+                                                      (__v8hi) __W,
+                                                      (__mmask8) __U);
+}
+
+static __inline __m128i __DEFAULT_FN_ATTRS
+_mm256_maskz_cvtps_ph ( __mmask8 __U, __m256 __A)
+{
+  return (__m128i) __builtin_ia32_vcvtps2ph256_mask ((__v8sf) __A, _MM_FROUND_CUR_DIRECTION,
+                                                      (__v8hi) _mm_setzero_si128(),
+                                                      (__mmask8) __U);
+}
+#define _mm256_mask_cvt_roundps_ph(W, U, A, I) __extension__ ({ \
+  (__m128i)__builtin_ia32_vcvtps2ph256_mask((__v8sf)(__m256)(A), (int)(I), \
+                                            (__v8hi)(__m128i)(W), \
+                                            (__mmask8)(U)); })
+
+#define _mm256_maskz_cvt_roundps_ph(U, A, I) __extension__ ({ \
+  (__m128i)__builtin_ia32_vcvtps2ph256_mask((__v8sf)(__m256)(A), (int)(I), \
+                                            (__v8hi)_mm_setzero_si128(), \
+                                            (__mmask8)(U)); })
+
+
 #undef __DEFAULT_FN_ATTRS
-#undef __DEFAULT_FN_ATTRS_BOTH
 
 #endif /* __AVX512VLINTRIN_H */
diff --git a/contrib/llvm/tools/clang/lib/Headers/avxintrin.h b/contrib/llvm/tools/clang/lib/Headers/avxintrin.h
index 6d1ca5473dcf..32e8546817b3 100644
--- a/contrib/llvm/tools/clang/lib/Headers/avxintrin.h
+++ b/contrib/llvm/tools/clang/lib/Headers/avxintrin.h
@@ -35,6 +35,12 @@ typedef int __v8si __attribute__ ((__vector_size__ (32)));
 typedef short __v16hi __attribute__ ((__vector_size__ (32)));
 typedef char __v32qi __attribute__ ((__vector_size__ (32)));
 
+/* Unsigned types */
+typedef unsigned long long __v4du __attribute__ ((__vector_size__ (32)));
+typedef unsigned int __v8su __attribute__ ((__vector_size__ (32)));
+typedef unsigned short __v16hu __attribute__ ((__vector_size__ (32)));
+typedef unsigned char __v32qu __attribute__ ((__vector_size__ (32)));
+
 /* We need an explicitly signed variant for char. Note that this shouldn't
  * appear in the interface though. */
 typedef signed char __v32qs __attribute__((__vector_size__(32)));
@@ -47,193 +53,703 @@ typedef long long __m256i __attribute__((__vector_size__(32)));
 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx")))
 
 /* Arithmetic */
+/// \brief Adds two 256-bit vectors of [4 x double].
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VADDPD / ADDPD instruction.
+///
+/// \param __a
+///    A 256-bit vector of [4 x double] containing one of the source operands.
+/// \param __b
+///    A 256-bit vector of [4 x double] containing one of the source operands.
+/// \returns A 256-bit vector of [4 x double] containing the sums of both
+///    operands.
 static __inline __m256d __DEFAULT_FN_ATTRS
 _mm256_add_pd(__m256d __a, __m256d __b)
 {
-  return __a+__b;
-}
-
+  return (__m256d)((__v4df)__a+(__v4df)__b);
+}
+
+/// \brief Adds two 256-bit vectors of [8 x float].
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VADDPS / ADDPS instruction.
+///
+/// \param __a
+///    A 256-bit vector of [8 x float] containing one of the source operands.
+/// \param __b
+///    A 256-bit vector of [8 x float] containing one of the source operands.
+/// \returns A 256-bit vector of [8 x float] containing the sums of both
+///    operands.
 static __inline __m256 __DEFAULT_FN_ATTRS
 _mm256_add_ps(__m256 __a, __m256 __b)
 {
-  return __a+__b;
-}
-
+  return (__m256)((__v8sf)__a+(__v8sf)__b);
+}
+
+/// \brief Subtracts two 256-bit vectors of [4 x double].
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VSUBPD / SUBPD instruction.
+///
+/// \param __a
+///    A 256-bit vector of [4 x double] containing the minuend.
+/// \param __b
+///    A 256-bit vector of [4 x double] containing the subtrahend.
+/// \returns A 256-bit vector of [4 x double] containing the differences between
+///    both operands.
 static __inline __m256d __DEFAULT_FN_ATTRS
 _mm256_sub_pd(__m256d __a, __m256d __b)
 {
-  return __a-__b;
-}
-
+  return (__m256d)((__v4df)__a-(__v4df)__b);
+}
+
+/// \brief Subtracts two 256-bit vectors of [8 x float].
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VSUBPS / SUBPS instruction.
+///
+/// \param __a
+///    A 256-bit vector of [8 x float] containing the minuend.
+/// \param __b
+///    A 256-bit vector of [8 x float] containing the subtrahend.
+/// \returns A 256-bit vector of [8 x float] containing the differences between
+///    both operands.
 static __inline __m256 __DEFAULT_FN_ATTRS
 _mm256_sub_ps(__m256 __a, __m256 __b)
 {
-  return __a-__b;
-}
-
+  return (__m256)((__v8sf)__a-(__v8sf)__b);
+}
+
+/// \brief Adds the even-indexed values and subtracts the odd-indexed values of
+///    two 256-bit vectors of [4 x double].
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VADDSUBPD / ADDSUBPD instruction.
+///
+/// \param __a
+///    A 256-bit vector of [4 x double] containing the left source operand.
+/// \param __b
+///    A 256-bit vector of [4 x double] containing the right source operand.
+/// \returns A 256-bit vector of [4 x double] containing the alternating sums
+///    and differences between both operands.
 static __inline __m256d __DEFAULT_FN_ATTRS
 _mm256_addsub_pd(__m256d __a, __m256d __b)
 {
   return (__m256d)__builtin_ia32_addsubpd256((__v4df)__a, (__v4df)__b);
 }
 
+/// \brief Adds the even-indexed values and subtracts the odd-indexed values of
+///    two 256-bit vectors of [8 x float].
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VADDSUBPS / ADDSUBPS instruction.
+///
+/// \param __a
+///    A 256-bit vector of [8 x float] containing the left source operand.
+/// \param __b
+///    A 256-bit vector of [8 x float] containing the right source operand.
+/// \returns A 256-bit vector of [8 x float] containing the alternating sums and
+///    differences between both operands.
 static __inline __m256 __DEFAULT_FN_ATTRS
 _mm256_addsub_ps(__m256 __a, __m256 __b)
 {
   return (__m256)__builtin_ia32_addsubps256((__v8sf)__a, (__v8sf)__b);
 }
 
+/// \brief Divides two 256-bit vectors of [4 x double].
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VDIVPD / DIVPD instruction.
+///
+/// \param __a
+///    A 256-bit vector of [4 x double] containing the dividend.
+/// \param __b
+///    A 256-bit vector of [4 x double] containing the divisor.
+/// \returns A 256-bit vector of [4 x double] containing the quotients of both
+///    operands.
 static __inline __m256d __DEFAULT_FN_ATTRS
 _mm256_div_pd(__m256d __a, __m256d __b)
 {
-  return __a / __b;
-}
-
+  return (__m256d)((__v4df)__a/(__v4df)__b);
+}
+
+/// \brief Divides two 256-bit vectors of [8 x float].
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VDIVPS / DIVPS instruction.
+///
+/// \param __a
+///    A 256-bit vector of [8 x float] containing the dividend.
+/// \param __b
+///    A 256-bit vector of [8 x float] containing the divisor.
+/// \returns A 256-bit vector of [8 x float] containing the quotients of both
+///    operands.
 static __inline __m256 __DEFAULT_FN_ATTRS
 _mm256_div_ps(__m256 __a, __m256 __b)
 {
-  return __a / __b;
-}
-
+  return (__m256)((__v8sf)__a/(__v8sf)__b);
+}
+
+/// \brief Compares two 256-bit vectors of [4 x double] and returns the greater
+///    of each pair of values.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VMAXPD / MAXPD instruction.
+///
+/// \param __a
+///    A 256-bit vector of [4 x double] containing one of the operands.
+/// \param __b
+///    A 256-bit vector of [4 x double] containing one of the operands.
+/// \returns A 256-bit vector of [4 x double] containing the maximum values
+///    between both operands.
 static __inline __m256d __DEFAULT_FN_ATTRS
 _mm256_max_pd(__m256d __a, __m256d __b)
 {
   return (__m256d)__builtin_ia32_maxpd256((__v4df)__a, (__v4df)__b);
 }
 
+/// \brief Compares two 256-bit vectors of [8 x float] and returns the greater
+///    of each pair of values.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VMAXPS / MAXPS instruction.
+///
+/// \param __a
+///    A 256-bit vector of [8 x float] containing one of the operands.
+/// \param __b
+///    A 256-bit vector of [8 x float] containing one of the operands.
+/// \returns A 256-bit vector of [8 x float] containing the maximum values
+///    between both operands.
 static __inline __m256 __DEFAULT_FN_ATTRS
 _mm256_max_ps(__m256 __a, __m256 __b)
 {
   return (__m256)__builtin_ia32_maxps256((__v8sf)__a, (__v8sf)__b);
 }
 
+/// \brief Compares two 256-bit vectors of [4 x double] and returns the lesser
+///    of each pair of values.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VMINPD / MINPD instruction.
+///
+/// \param __a
+///    A 256-bit vector of [4 x double] containing one of the operands.
+/// \param __b
+///    A 256-bit vector of [4 x double] containing one of the operands.
+/// \returns A 256-bit vector of [4 x double] containing the minimum values
+///    between both operands.
 static __inline __m256d __DEFAULT_FN_ATTRS
 _mm256_min_pd(__m256d __a, __m256d __b)
 {
   return (__m256d)__builtin_ia32_minpd256((__v4df)__a, (__v4df)__b);
 }
 
+/// \brief Compares two 256-bit vectors of [8 x float] and returns the lesser
+///    of each pair of values.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VMINPS / MINPS instruction.
+///
+/// \param __a
+///    A 256-bit vector of [8 x float] containing one of the operands.
+/// \param __b
+///    A 256-bit vector of [8 x float] containing one of the operands.
+/// \returns A 256-bit vector of [8 x float] containing the minimum values
+///    between both operands.
 static __inline __m256 __DEFAULT_FN_ATTRS
 _mm256_min_ps(__m256 __a, __m256 __b)
 {
   return (__m256)__builtin_ia32_minps256((__v8sf)__a, (__v8sf)__b);
 }
 
+/// \brief Multiplies two 256-bit vectors of [4 x double].
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VMULPD / MULPD instruction.
+///
+/// \param __a
+///    A 256-bit vector of [4 x double] containing one of the operands.
+/// \param __b
+///    A 256-bit vector of [4 x double] containing one of the operands.
+/// \returns A 256-bit vector of [4 x double] containing the products of both
+///    operands.
 static __inline __m256d __DEFAULT_FN_ATTRS
 _mm256_mul_pd(__m256d __a, __m256d __b)
 {
-  return __a * __b;
-}
-
+  return (__m256d)((__v4df)__a * (__v4df)__b);
+}
+
+/// \brief Multiplies two 256-bit vectors of [8 x float].
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VMULPS / MULPS instruction.
+///
+/// \param __a
+///    A 256-bit vector of [8 x float] containing one of the operands.
+/// \param __b
+///    A 256-bit vector of [8 x float] containing one of the operands.
+/// \returns A 256-bit vector of [8 x float] containing the products of both
+///    operands.
 static __inline __m256 __DEFAULT_FN_ATTRS
 _mm256_mul_ps(__m256 __a, __m256 __b)
 {
-  return __a * __b;
+  return (__m256)((__v8sf)__a * (__v8sf)__b);
 }
 
+/// \brief Calculates the square roots of the values in a 256-bit vector of
+///    [4 x double].
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VSQRTPD / SQRTPD instruction.
+///
+/// \param __a
+///    A 256-bit vector of [4 x double].
+/// \returns A 256-bit vector of [4 x double] containing the square roots of the
+///    values in the operand.
 static __inline __m256d __DEFAULT_FN_ATTRS
 _mm256_sqrt_pd(__m256d __a)
 {
   return (__m256d)__builtin_ia32_sqrtpd256((__v4df)__a);
 }
 
+/// \brief Calculates the square roots of the values in a 256-bit vector of
+///    [8 x float].
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VSQRTPS / SQRTPS instruction.
+///
+/// \param __a
+///    A 256-bit vector of [8 x float].
+/// \returns A 256-bit vector of [8 x float] containing the square roots of the
+///    values in the operand.
 static __inline __m256 __DEFAULT_FN_ATTRS
 _mm256_sqrt_ps(__m256 __a)
 {
   return (__m256)__builtin_ia32_sqrtps256((__v8sf)__a);
 }
 
+/// \brief Calculates the reciprocal square roots of the values in a 256-bit
+///    vector of [8 x float].
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VRSQRTPS / RSQRTPS instruction.
+///
+/// \param __a
+///    A 256-bit vector of [8 x float].
+/// \returns A 256-bit vector of [8 x float] containing the reciprocal square
+///    roots of the values in the operand.
 static __inline __m256 __DEFAULT_FN_ATTRS
 _mm256_rsqrt_ps(__m256 __a)
 {
   return (__m256)__builtin_ia32_rsqrtps256((__v8sf)__a);
 }
 
+/// \brief Calculates the reciprocals of the values in a 256-bit vector of
+///    [8 x float].
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VRCPPS / RCPPS instruction.
+///
+/// \param __a
+///    A 256-bit vector of [8 x float].
+/// \returns A 256-bit vector of [8 x float] containing the reciprocals of the
+///    values in the operand.
 static __inline __m256 __DEFAULT_FN_ATTRS
 _mm256_rcp_ps(__m256 __a)
 {
   return (__m256)__builtin_ia32_rcpps256((__v8sf)__a);
 }
 
+/// \brief Rounds the values in a 256-bit vector of [4 x double] as specified
+///    by the byte operand. The source values are rounded to integer values and
+///    returned as 64-bit double-precision floating-point values.
+///
+/// \headerfile <x86intrin.h>
+///
+/// \code
+/// __m256d _mm256_round_pd(__m256d V, const int M);
+/// \endcode
+///
+/// This intrinsic corresponds to the \c VROUNDPD / ROUNDPD instruction.
+///
+/// \param V
+///    A 256-bit vector of [4 x double].
+/// \param M
+///    An integer value that specifies the rounding operation.
+///    Bits [7:4] are reserved.
+///    Bit [3] is a precision exception value:
+///    0: A normal PE exception is used.
+///    1: The PE field is not updated.
+///    Bit [2] is the rounding control source:
+///    0: Use bits [1:0] of M.
+///    1: Use the current MXCSR setting.
+///    Bits [1:0] contain the rounding control definition:
+///    00: Nearest.
+///    01: Downward (toward negative infinity).
+///    10: Upward (toward positive infinity).
+///    11: Truncated.
+/// \returns A 256-bit vector of [4 x double] containing the rounded values.
 #define _mm256_round_pd(V, M) __extension__ ({ \
     (__m256d)__builtin_ia32_roundpd256((__v4df)(__m256d)(V), (M)); })
 
+/// \brief Rounds the values stored in a 256-bit vector of [8 x float] as
+///    specified by the byte operand. The source values are rounded to integer
+///    values and returned as floating-point values.
+///
+/// \headerfile <x86intrin.h>
+///
+/// \code
+/// __m256 _mm256_round_ps(__m256 V, const int M);
+/// \endcode
+///
+/// This intrinsic corresponds to the \c VROUNDPS / ROUNDPS instruction.
+///
+/// \param V
+///    A 256-bit vector of [8 x float].
+/// \param M
+///    An integer value that specifies the rounding operation.
+///    Bits [7:4] are reserved.
+///    Bit [3] is a precision exception value:
+///    0: A normal PE exception is used.
+///    1: The PE field is not updated.
+///    Bit [2] is the rounding control source:
+///    0: Use bits [1:0] of M.
+///    1: Use the current MXCSR setting.
+///    Bits [1:0] contain the rounding control definition:
+///    00: Nearest.
+///    01: Downward (toward negative infinity).
+///    10: Upward (toward positive infinity).
+///    11: Truncated.
+/// \returns A 256-bit vector of [8 x float] containing the rounded values.
 #define _mm256_round_ps(V, M) __extension__ ({ \
   (__m256)__builtin_ia32_roundps256((__v8sf)(__m256)(V), (M)); })
 
+/// \brief Rounds up the values stored in a 256-bit vector of [4 x double]. The
+///    source values are rounded up to integer values and returned as 64-bit
+///    double-precision floating-point values.
+///
+/// \headerfile <x86intrin.h>
+///
+/// \code
+/// __m256d _mm256_ceil_pd(__m256d V);
+/// \endcode
+///
+/// This intrinsic corresponds to the \c VROUNDPD / ROUNDPD instruction.
+///
+/// \param V
+///    A 256-bit vector of [4 x double].
+/// \returns A 256-bit vector of [4 x double] containing the rounded up values.
 #define _mm256_ceil_pd(V)  _mm256_round_pd((V), _MM_FROUND_CEIL)
+
+/// \brief Rounds down the values stored in a 256-bit vector of [4 x double].
+///    The source values are rounded down to integer values and returned as
+///    64-bit double-precision floating-point values.
+///
+/// \headerfile <x86intrin.h>
+///
+/// \code
+/// __m256d _mm256_floor_pd(__m256d V);
+/// \endcode
+///
+/// This intrinsic corresponds to the \c VROUNDPD / ROUNDPD instruction.
+///
+/// \param V
+///    A 256-bit vector of [4 x double].
+/// \returns A 256-bit vector of [4 x double] containing the rounded down
+///    values.
 #define _mm256_floor_pd(V) _mm256_round_pd((V), _MM_FROUND_FLOOR)
+
+/// \brief Rounds up the values stored in a 256-bit vector of [8 x float]. The
+///    source values are rounded up to integer values and returned as
+///    floating-point values.
+///
+/// \headerfile <x86intrin.h>
+///
+/// \code
+/// __m256 _mm256_ceil_ps(__m256 V);
+/// \endcode
+///
+/// This intrinsic corresponds to the \c VROUNDPS / ROUNDPS instruction.
+///
+/// \param V
+///    A 256-bit vector of [8 x float].
+/// \returns A 256-bit vector of [8 x float] containing the rounded up values.
 #define _mm256_ceil_ps(V)  _mm256_round_ps((V), _MM_FROUND_CEIL)
+
+/// \brief Rounds down the values stored in a 256-bit vector of [8 x float]. The
+///    source values are rounded down to integer values and returned as
+///    floating-point values.
+///
+/// \headerfile <x86intrin.h>
+///
+/// \code
+/// __m256 _mm256_floor_ps(__m256 V);
+/// \endcode
+///
+/// This intrinsic corresponds to the \c VROUNDPS / ROUNDPS instruction.
+///
+/// \param V
+///    A 256-bit vector of [8 x float].
+/// \returns A 256-bit vector of [8 x float] containing the rounded down values.
 #define _mm256_floor_ps(V) _mm256_round_ps((V), _MM_FROUND_FLOOR)
 
 /* Logical */
+/// \brief Performs a bitwise AND of two 256-bit vectors of [4 x double].
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VANDPD / ANDPD instruction.
+///
+/// \param __a
+///    A 256-bit vector of [4 x double] containing one of the source operands.
+/// \param __b
+///    A 256-bit vector of [4 x double] containing one of the source operands.
+/// \returns A 256-bit vector of [4 x double] containing the bitwise AND of the
+///    values between both operands.
 static __inline __m256d __DEFAULT_FN_ATTRS
 _mm256_and_pd(__m256d __a, __m256d __b)
 {
-  return (__m256d)((__v4di)__a & (__v4di)__b);
-}
-
+  return (__m256d)((__v4du)__a & (__v4du)__b);
+}
+
+/// \brief Performs a bitwise AND of two 256-bit vectors of [8 x float].
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VANDPS / ANDPS instruction.
+///
+/// \param __a
+///    A 256-bit vector of [8 x float] containing one of the source operands.
+/// \param __b
+///    A 256-bit vector of [8 x float] containing one of the source operands.
+/// \returns A 256-bit vector of [8 x float] containing the bitwise AND of the
+///    values between both operands.
 static __inline __m256 __DEFAULT_FN_ATTRS
 _mm256_and_ps(__m256 __a, __m256 __b)
 {
-  return (__m256)((__v8si)__a & (__v8si)__b);
-}
-
+  return (__m256)((__v8su)__a & (__v8su)__b);
+}
+
+/// \brief Performs a bitwise AND of two 256-bit vectors of [4 x double], using
+///    the one's complement of the values contained in the first source operand.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VANDNPD / ANDNPD instruction.
+///
+/// \param __a
+///    A 256-bit vector of [4 x double] containing the left source operand. The
+///    one's complement of this value is used in the bitwise AND.
+/// \param __b
+///    A 256-bit vector of [4 x double] containing the right source operand.
+/// \returns A 256-bit vector of [4 x double] containing the bitwise AND of the
+///    values of the second operand and the one's complement of the first
+///    operand.
 static __inline __m256d __DEFAULT_FN_ATTRS
 _mm256_andnot_pd(__m256d __a, __m256d __b)
 {
-  return (__m256d)(~(__v4di)__a & (__v4di)__b);
-}
-
+  return (__m256d)(~(__v4du)__a & (__v4du)__b);
+}
+
+/// \brief Performs a bitwise AND of two 256-bit vectors of [8 x float], using
+///    the one's complement of the values contained in the first source operand.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VANDNPS / ANDNPS instruction.
+///
+/// \param __a
+///    A 256-bit vector of [8 x float] containing the left source operand. The
+///    one's complement of this value is used in the bitwise AND.
+/// \param __b
+///    A 256-bit vector of [8 x float] containing the right source operand.
+/// \returns A 256-bit vector of [8 x float] containing the bitwise AND of the
+///    values of the second operand and the one's complement of the first
+///    operand.
 static __inline __m256 __DEFAULT_FN_ATTRS
 _mm256_andnot_ps(__m256 __a, __m256 __b)
 {
-  return (__m256)(~(__v8si)__a & (__v8si)__b);
-}
-
+  return (__m256)(~(__v8su)__a & (__v8su)__b);
+}
+
+/// \brief Performs a bitwise OR of two 256-bit vectors of [4 x double].
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VORPD / ORPD instruction.
+///
+/// \param __a
+///    A 256-bit vector of [4 x double] containing one of the source operands.
+/// \param __b
+///    A 256-bit vector of [4 x double] containing one of the source operands.
+/// \returns A 256-bit vector of [4 x double] containing the bitwise OR of the
+///    values between both operands.
 static __inline __m256d __DEFAULT_FN_ATTRS
 _mm256_or_pd(__m256d __a, __m256d __b)
 {
-  return (__m256d)((__v4di)__a | (__v4di)__b);
-}
-
+  return (__m256d)((__v4du)__a | (__v4du)__b);
+}
+
+/// \brief Performs a bitwise OR of two 256-bit vectors of [8 x float].
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VORPS / ORPS instruction.
+///
+/// \param __a
+///    A 256-bit vector of [8 x float] containing one of the source operands.
+/// \param __b
+///    A 256-bit vector of [8 x float] containing one of the source operands.
+/// \returns A 256-bit vector of [8 x float] containing the bitwise OR of the
+///    values between both operands.
 static __inline __m256 __DEFAULT_FN_ATTRS
 _mm256_or_ps(__m256 __a, __m256 __b)
 {
-  return (__m256)((__v8si)__a | (__v8si)__b);
-}
-
+  return (__m256)((__v8su)__a | (__v8su)__b);
+}
+
+/// \brief Performs a bitwise XOR of two 256-bit vectors of [4 x double].
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VXORPD / XORPD instruction.
+///
+/// \param __a
+///    A 256-bit vector of [4 x double] containing one of the source operands.
+/// \param __b
+///    A 256-bit vector of [4 x double] containing one of the source operands.
+/// \returns A 256-bit vector of [4 x double] containing the bitwise XOR of the
+///    values between both operands.
 static __inline __m256d __DEFAULT_FN_ATTRS
 _mm256_xor_pd(__m256d __a, __m256d __b)
 {
-  return (__m256d)((__v4di)__a ^ (__v4di)__b);
-}
-
+  return (__m256d)((__v4du)__a ^ (__v4du)__b);
+}
+
+/// \brief Performs a bitwise XOR of two 256-bit vectors of [8 x float].
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VXORPS / XORPS instruction.
+///
+/// \param __a
+///    A 256-bit vector of [8 x float] containing one of the source operands.
+/// \param __b
+///    A 256-bit vector of [8 x float] containing one of the source operands.
+/// \returns A 256-bit vector of [8 x float] containing the bitwise XOR of the
+///    values between both operands.
 static __inline __m256 __DEFAULT_FN_ATTRS
 _mm256_xor_ps(__m256 __a, __m256 __b)
 {
-  return (__m256)((__v8si)__a ^ (__v8si)__b);
+  return (__m256)((__v8su)__a ^ (__v8su)__b);
 }
 
 /* Horizontal arithmetic */
+/// \brief Horizontally adds the adjacent pairs of values contained in two
+///    256-bit vectors of [4 x double].
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VHADDPD / HADDPD instruction.
+///
+/// \param __a
+///    A 256-bit vector of [4 x double] containing one of the source operands.
+///    The horizontal sums of the values are returned in the even-indexed
+///    elements of a vector of [4 x double].
+/// \param __b
+///    A 256-bit vector of [4 x double] containing one of the source operands.
+///    The horizontal sums of the values are returned in the odd-indexed
+///    elements of a vector of [4 x double].
+/// \returns A 256-bit vector of [4 x double] containing the horizontal sums of
+///    both operands.
 static __inline __m256d __DEFAULT_FN_ATTRS
 _mm256_hadd_pd(__m256d __a, __m256d __b)
 {
   return (__m256d)__builtin_ia32_haddpd256((__v4df)__a, (__v4df)__b);
 }
 
+/// \brief Horizontally adds the adjacent pairs of values contained in two
+///    256-bit vectors of [8 x float].
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VHADDPS / HADDPS instruction.
+///
+/// \param __a
+///    A 256-bit vector of [8 x float] containing one of the source operands.
+///    The horizontal sums of the values are returned in the elements with
+///    index 0, 1, 4, 5 of a vector of [8 x float].
+/// \param __b
+///    A 256-bit vector of [8 x float] containing one of the source operands.
+///    The horizontal sums of the values are returned in the elements with
+///    index 2, 3, 6, 7 of a vector of [8 x float].
+/// \returns A 256-bit vector of [8 x float] containing the horizontal sums of
+///    both operands.
 static __inline __m256 __DEFAULT_FN_ATTRS
 _mm256_hadd_ps(__m256 __a, __m256 __b)
 {
   return (__m256)__builtin_ia32_haddps256((__v8sf)__a, (__v8sf)__b);
 }
 
+/// \brief Horizontally subtracts the adjacent pairs of values contained in two
+///    256-bit vectors of [4 x double].
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VHSUBPD / HSUBPD instruction.
+///
+/// \param __a
+///    A 256-bit vector of [4 x double] containing one of the source operands.
+///    The horizontal differences between the values are returned in the
+///    even-indexed elements of a vector of [4 x double].
+/// \param __b
+///    A 256-bit vector of [4 x double] containing one of the source operands.
+///    The horizontal differences between the values are returned in the
+///    odd-indexed elements of a vector of [4 x double].
+/// \returns A 256-bit vector of [4 x double] containing the horizontal
+///    differences of both operands.
 static __inline __m256d __DEFAULT_FN_ATTRS
 _mm256_hsub_pd(__m256d __a, __m256d __b)
 {
   return (__m256d)__builtin_ia32_hsubpd256((__v4df)__a, (__v4df)__b);
 }
 
+/// \brief Horizontally subtracts the adjacent pairs of values contained in two
+///    256-bit vectors of [8 x float].
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VHSUBPS / HSUBPS instruction.
+///
+/// \param __a
+///    A 256-bit vector of [8 x float] containing one of the source operands.
+///    The horizontal differences between the values are returned in the
+///    elements with index 0, 1, 4, 5 of a vector of [8 x float].
+/// \param __b
+///    A 256-bit vector of [8 x float] containing one of the source operands.
+///    The horizontal differences between the values are returned in the
+///    elements with index 2, 3, 6, 7 of a vector of [8 x float].
+/// \returns A 256-bit vector of [8 x float] containing the horizontal
+///    differences of both operands.
 static __inline __m256 __DEFAULT_FN_ATTRS
 _mm256_hsub_ps(__m256 __a, __m256 __b)
 {
@@ -241,71 +757,600 @@ _mm256_hsub_ps(__m256 __a, __m256 __b)
 }
 
 /* Vector permutations */
+/// \brief Copies the values in a 128-bit vector of [2 x double] as specified
+///    by the 128-bit integer vector operand.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VPERMILPD / PERMILPD instruction.
+///
+/// \param __a
+///    A 128-bit vector of [2 x double].
+/// \param __c
+///    A 128-bit integer vector operand specifying how the values are to be
+///    copied.
+///    Bit [1]:
+///    0: Bits [63:0] of the source are copied to bits [63:0] of the
+///    returned vector.
+///    1: Bits [127:64] of the source are copied to bits [63:0] of the
+///    returned vector.
+///    Bit [65]:
+///    0: Bits [63:0] of the source are copied to bits [127:64] of the
+///    returned vector.
+///    1: Bits [127:64] of the source are copied to bits [127:64] of the
+///    returned vector.
+/// \returns A 128-bit vector of [2 x double] containing the copied values.
 static __inline __m128d __DEFAULT_FN_ATTRS
 _mm_permutevar_pd(__m128d __a, __m128i __c)
 {
   return (__m128d)__builtin_ia32_vpermilvarpd((__v2df)__a, (__v2di)__c);
 }
 
+/// \brief Copies the values in a 256-bit vector of [4 x double] as
+///    specified by the 256-bit integer vector operand.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VPERMILPD / PERMILPD instruction.
+///
+/// \param __a
+///    A 256-bit vector of [4 x double].
+/// \param __c
+///    A 256-bit integer vector operand specifying how the values are to be
+///    copied.
+///    Bit [1]:
+///    0: Bits [63:0] of the source are copied to bits [63:0] of the
+///    returned vector.
+///    1: Bits [127:64] of the source are copied to bits [63:0] of the
+///    returned vector.
+///    Bit [65]:
+///    0: Bits [63:0] of the source are copied to bits [127:64] of the
+///    returned vector.
+///    1: Bits [127:64] of the source are copied to bits [127:64] of the
+///    returned vector.
+///    Bit [129]:
+///    0: Bits [191:128] of the source are copied to bits [191:128] of the
+///    returned vector.
+///    1: Bits [255:192] of the source are copied to bits [191:128] of the
+///    returned vector.
+///    Bit [193]:
+///    0: Bits [191:128] of the source are copied to bits [255:192] of the
+///    returned vector.
+///    1: Bits [255:192] of the source are copied to bits [255:192] of the
+///    returned vector.
+/// \returns A 256-bit vector of [4 x double] containing the copied values.
 static __inline __m256d __DEFAULT_FN_ATTRS
 _mm256_permutevar_pd(__m256d __a, __m256i __c)
 {
   return (__m256d)__builtin_ia32_vpermilvarpd256((__v4df)__a, (__v4di)__c);
 }
 
+/// \brief Copies the values stored in a 128-bit vector of [4 x float] as
+///    specified by the 128-bit integer vector operand.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VPERMILPS / PERMILPS instruction.
+///
+/// \param __a
+///    A 128-bit vector of [4 x float].
+/// \param __c
+///    A 128-bit integer vector operand specifying how the values are to be
+///    copied.
+///    Bits [1:0]:
+///    00: Bits [31:0] of the source are copied to bits [31:0] of the
+///    returned vector.
+///    01: Bits [63:32] of the source are copied to bits [31:0] of the
+///    returned vector.
+///    10: Bits [95:64] of the source are copied to bits [31:0] of the
+///    returned vector.
+///    11: Bits [127:96] of the source are copied to bits [31:0] of the
+///    returned vector.
+///    Bits [33:32]:
+///    00: Bits [31:0] of the source are copied to bits [63:32] of the
+///    returned vector.
+///    01: Bits [63:32] of the source are copied to bits [63:32] of the
+///    returned vector.
+///    10: Bits [95:64] of the source are copied to bits [63:32] of the
+///    returned vector.
+///    11: Bits [127:96] of the source are copied to bits [63:32] of the
+///    returned vector.
+///    Bits [65:64]:
+///    00: Bits [31:0] of the source are copied to bits [95:64] of the
+///    returned vector.
+///    01: Bits [63:32] of the source are copied to bits [95:64] of the
+///    returned vector.
+///    10: Bits [95:64] of the source are copied to bits [95:64] of the
+///    returned vector.
+///    11: Bits [127:96] of the source are copied to bits [95:64] of the
+///    returned vector.
+///    Bits [97:96]:
+///    00: Bits [31:0] of the source are copied to bits [127:96] of the
+///    returned vector.
+///    01: Bits [63:32] of the source are copied to bits [127:96] of the
+///    returned vector.
+///    10: Bits [95:64] of the source are copied to bits [127:96] of the
+///    returned vector.
+///    11: Bits [127:96] of the source are copied to bits [127:96] of the
+///    returned vector.
+/// \returns A 128-bit vector of [4 x float] containing the copied values.
 static __inline __m128 __DEFAULT_FN_ATTRS
 _mm_permutevar_ps(__m128 __a, __m128i __c)
 {
   return (__m128)__builtin_ia32_vpermilvarps((__v4sf)__a, (__v4si)__c);
 }
 
+/// \brief Copies the values stored in a 256-bit vector of [8 x float] as
+///    specified by the 256-bit integer vector operand.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VPERMILPS / PERMILPS instruction.
+///
+/// \param __a
+///    A 256-bit vector of [8 x float].
+/// \param __c
+///    A 256-bit integer vector operand specifying how the values are to be
+///    copied.
+///    Bits [1:0]:
+///    00: Bits [31:0] of the source are copied to bits [31:0] of the
+///    returned vector.
+///    01: Bits [63:32] of the source are copied to bits [31:0] of the
+///    returned vector.
+///    10: Bits [95:64] of the source are copied to bits [31:0] of the
+///    returned vector.
+///    11: Bits [127:96] of the source are copied to bits [31:0] of the
+///    returned vector.
+///    Bits [33:32]:
+///    00: Bits [31:0] of the source are copied to bits [63:32] of the
+///    returned vector.
+///    01: Bits [63:32] of the source are copied to bits [63:32] of the
+///    returned vector.
+///    10: Bits [95:64] of the source are copied to bits [63:32] of the
+///    returned vector.
+///    11: Bits [127:96] of the source are copied to bits [63:32] of the
+///    returned vector.
+///    Bits [65:64]:
+///    00: Bits [31:0] of the source are copied to bits [95:64] of the
+///    returned vector.
+///    01: Bits [63:32] of the source are copied to bits [95:64] of the
+///    returned vector.
+///    10: Bits [95:64] of the source are copied to bits [95:64] of the
+///    returned vector.
+///    11: Bits [127:96] of the source are copied to bits [95:64] of the
+///    returned vector.
+///    Bits [97:96]:
+///    00: Bits [31:0] of the source are copied to bits [127:96] of the
+///    returned vector.
+///    01: Bits [63:32] of the source are copied to bits [127:96] of the
+///    returned vector.
+///    10: Bits [95:64] of the source are copied to bits [127:96] of the
+///    returned vector.
+///    11: Bits [127:96] of the source are copied to bits [127:96] of the
+///    returned vector.
+///    Bits [129:128]:
+///    00: Bits [159:128] of the source are copied to bits [159:128] of the
+///    returned vector.
+///    01: Bits [191:160] of the source are copied to bits [159:128] of the
+///    returned vector.
+///    10: Bits [223:192] of the source are copied to bits [159:128] of the
+///    returned vector.
+///    11: Bits [255:224] of the source are copied to bits [159:128] of the
+///    returned vector.
+///    Bits [161:160]:
+///    00: Bits [159:128] of the source are copied to bits [191:160] of the
+///    returned vector.
+///    01: Bits [191:160] of the source are copied to bits [191:160] of the
+///    returned vector.
+///    10: Bits [223:192] of the source are copied to bits [191:160] of the
+///    returned vector.
+///    11: Bits [255:224] of the source are copied to bits [191:160] of the
+///    returned vector.
+///    Bits [193:192]:
+///    00: Bits [159:128] of the source are copied to bits [223:192] of the
+///    returned vector.
+///    01: Bits [191:160] of the source are copied to bits [223:192] of the
+///    returned vector.
+///    10: Bits [223:192] of the source are copied to bits [223:192] of the
+///    returned vector.
+///    11: Bits [255:224] of the source are copied to bits [223:192] of the
+///    returned vector.
+///    Bits [225:224]:
+///    00: Bits [159:128] of the source are copied to bits [255:224] of the
+///    returned vector.
+///    01: Bits [191:160] of the source are copied to bits [255:224] of the
+///    returned vector.
+///    10: Bits [223:192] of the source are copied to bits [255:224] of the
+///    returned vector.
+///    11: Bits [255:224] of the source are copied to bits [255:224] of the
+///    returned vector.
+/// \returns A 256-bit vector of [8 x float] containing the copied values.
 static __inline __m256 __DEFAULT_FN_ATTRS
 _mm256_permutevar_ps(__m256 __a, __m256i __c)
 {
   return (__m256)__builtin_ia32_vpermilvarps256((__v8sf)__a, (__v8si)__c);
 }
 
+/// \brief Copies the values in a 128-bit vector of [2 x double] as
+///    specified by the immediate integer operand.
+///
+/// \headerfile <x86intrin.h>
+///
+/// \code
+/// __m128d _mm_permute_pd(__m128d A, const int C);
+/// \endcode
+///
+/// This intrinsic corresponds to the \c VPERMILPD / PERMILPD instruction.
+///
+/// \param A
+///    A 128-bit vector of [2 x double].
+/// \param C
+///    An immediate integer operand specifying how the values are to be copied.
+///    Bit [0]:
+///    0: Bits [63:0] of the source are copied to bits [63:0] of the
+///    returned vector.
+///    1: Bits [127:64] of the source are copied to bits [63:0] of the
+///    returned vector.
+///    Bit [1]:
+///    0: Bits [63:0] of the source are copied to bits [127:64] of the
+///    returned vector.
+///    1: Bits [127:64] of the source are copied to bits [127:64] of the
+///    returned vector.
+/// \returns A 128-bit vector of [2 x double] containing the copied values.
 #define _mm_permute_pd(A, C) __extension__ ({ \
   (__m128d)__builtin_shufflevector((__v2df)(__m128d)(A), \
-                                   (__v2df)_mm_setzero_pd(), \
-                                   (C) & 0x1, ((C) & 0x2) >> 1); })
-
+                                   (__v2df)_mm_undefined_pd(), \
+                                   ((C) >> 0) & 0x1, ((C) >> 1) & 0x1); })
+
+/// \brief Copies the values in a 256-bit vector of [4 x double] as
+///    specified by the immediate integer operand.
+///
+/// \headerfile <x86intrin.h>
+///
+/// \code
+/// __m256d _mm256_permute_pd(__m256d A, const int C);
+/// \endcode
+///
+/// This intrinsic corresponds to the \c VPERMILPD / PERMILPD instruction.
+///
+/// \param A
+///    A 256-bit vector of [4 x double].
+/// \param C
+///    An immediate integer operand specifying how the values are to be copied.
+///    Bit [0]:
+///    0: Bits [63:0] of the source are copied to bits [63:0] of the
+///    returned vector.
+///    1: Bits [127:64] of the source are copied to bits [63:0] of the
+///    returned vector.
+///    Bit [1]:
+///    0: Bits [63:0] of the source are copied to bits [127:64] of the
+///    returned vector.
+///    1: Bits [127:64] of the source are copied to bits [127:64] of the
+///    returned vector.
+///    Bit [2]:
+///    0: Bits [191:128] of the source are copied to bits [191:128] of the
+///    returned vector.
+///    1: Bits [255:192] of the source are copied to bits [191:128] of the
+///    returned vector.
+///    Bit [3]:
+///    0: Bits [191:128] of the source are copied to bits [255:192] of the
+///    returned vector.
+///    1: Bits [255:192] of the source are copied to bits [255:192] of the
+///    returned vector.
+/// \returns A 256-bit vector of [4 x double] containing the copied values.
 #define _mm256_permute_pd(A, C) __extension__ ({ \
   (__m256d)__builtin_shufflevector((__v4df)(__m256d)(A), \
-                                   (__v4df)_mm256_setzero_pd(), \
-                                   (C) & 0x1, ((C) & 0x2) >> 1, \
-                                   2 + (((C) & 0x4) >> 2), \
-                                   2 + (((C) & 0x8) >> 3)); })
-
+                                   (__v4df)_mm256_undefined_pd(), \
+                                   0 + (((C) >> 0) & 0x1), \
+                                   0 + (((C) >> 1) & 0x1), \
+                                   2 + (((C) >> 2) & 0x1), \
+                                   2 + (((C) >> 3) & 0x1)); })
+
+/// \brief Copies the values in a 128-bit vector of [4 x float] as
+///    specified by the immediate integer operand.
+///
+/// \headerfile <x86intrin.h>
+///
+/// \code
+/// __m128 _mm_permute_ps(__m128 A, const int C);
+/// \endcode
+///
+/// This intrinsic corresponds to the \c VPERMILPS / PERMILPS instruction.
+///
+/// \param A
+///    A 128-bit vector of [4 x float].
+/// \param C
+///    An immediate integer operand specifying how the values are to be copied.
+///    Bits [1:0]:
+///    00: Bits [31:0] of the source are copied to bits [31:0] of the
+///    returned vector.
+///    01: Bits [63:32] of the source are copied to bits [31:0] of the
+///    returned vector.
+///    10: Bits [95:64] of the source are copied to bits [31:0] of the
+///    returned vector.
+///    11: Bits [127:96] of the source are copied to bits [31:0] of the
+///    returned vector.
+///    Bits [3:2]:
+///    00: Bits [31:0] of the source are copied to bits [63:32] of the
+///    returned vector.
+///    01: Bits [63:32] of the source are copied to bits [63:32] of the
+///    returned vector.
+///    10: Bits [95:64] of the source are copied to bits [63:32] of the
+///    returned vector.
+///    11: Bits [127:96] of the source are copied to bits [63:32] of the
+///    returned vector.
+///    Bits [5:4]:
+///    00: Bits [31:0] of the source are copied to bits [95:64] of the
+///    returned vector.
+///    01: Bits [63:32] of the source are copied to bits [95:64] of the
+///    returned vector.
+///    10: Bits [95:64] of the source are copied to bits [95:64] of the
+///    returned vector.
+///    11: Bits [127:96] of the source are copied to bits [95:64] of the
+///    returned vector.
+///    Bits [7:6]:
+///    00: Bits [31:0] of the source are copied to bits [127:96] of the
+///    returned vector.
+///    01: Bits [63:32] of the source are copied to bits [127:96] of the
+///    returned vector.
+///    10: Bits [95:64] of the source are copied to bits [127:96] of the
+///    returned vector.
+///    11: Bits [127:96] of the source are copied to bits [127:96] of the
+///    returned vector.
+/// \returns A 128-bit vector of [4 x float] containing the copied values.
 #define _mm_permute_ps(A, C) __extension__ ({ \
   (__m128)__builtin_shufflevector((__v4sf)(__m128)(A), \
-                                  (__v4sf)_mm_setzero_ps(), \
-                                   (C) & 0x3, ((C) & 0xc) >> 2, \
-                                   ((C) & 0x30) >> 4, ((C) & 0xc0) >> 6); })
-
+                                  (__v4sf)_mm_undefined_ps(), \
+                                  ((C) >> 0) & 0x3, ((C) >> 2) & 0x3, \
+                                  ((C) >> 4) & 0x3, ((C) >> 6) & 0x3); })
+
+/// \brief Copies the values in a 256-bit vector of [8 x float] as
+///    specified by the immediate integer operand.
+///
+/// \headerfile <x86intrin.h>
+///
+/// \code
+/// __m256 _mm256_permute_ps(__m256 A, const int C);
+/// \endcode
+///
+/// This intrinsic corresponds to the \c VPERMILPS / PERMILPS instruction.
+///
+/// \param A
+///    A 256-bit vector of [8 x float].
+/// \param C
+///    An immediate integer operand specifying how the values are to be copied.
+///    Bits [1:0]:
+///    00: Bits [31:0] of the source are copied to bits [31:0] of the
+///    returned vector.
+///    01: Bits [63:32] of the source are copied to bits [31:0] of the
+///    returned vector.
+///    10: Bits [95:64] of the source are copied to bits [31:0] of the
+///    returned vector.
+///    11: Bits [127:96] of the source are copied to bits [31:0] of the
+///    returned vector.
+///    Bits [3:2]:
+///    00: Bits [31:0] of the source are copied to bits [63:32] of the
+///    returned vector.
+///    01: Bits [63:32] of the source are copied to bits [63:32] of the
+///    returned vector.
+///    10: Bits [95:64] of the source are copied to bits [63:32] of the
+///    returned vector.
+///    11: Bits [127:96] of the source are copied to bits [63:32] of the
+///    returned vector.
+///    Bits [5:4]:
+///    00: Bits [31:0] of the source are copied to bits [95:64] of the
+///    returned vector.
+///    01: Bits [63:32] of the source are copied to bits [95:64] of the
+///    returned vector.
+///    10: Bits [95:64] of the source are copied to bits [95:64] of the
+///    returned vector.
+///    11: Bits [127:96] of the source are copied to bits [95:64] of the
+///    returned vector.
+///    Bits [7:6]:
+///    00: Bits [31:0] of the source are copied to bits [127:96] of the
+///    returned vector.
+///    01: Bits [63:32] of the source are copied to bits [127:96] of the
+///    returned vector.
+///    10: Bits [95:64] of the source are copied to bits [127:96] of the
+///    returned vector.
+///    11: Bits [127:96] of the source are copied to bits [127:96] of the
+///    returned vector.
+///    Bits [1:0]:
+///    00: Bits [159:128] of the source are copied to bits [159:128] of the
+///    returned vector.
+///    01: Bits [191:160] of the source are copied to bits [159:128] of the
+///    returned vector.
+///    10: Bits [223:192] of the source are copied to bits [159:128] of the
+///    returned vector.
+///    11: Bits [255:224] of the source are copied to bits [159:128] of the
+///    returned vector.
+///    Bits [3:2]:
+///    00: Bits [159:128] of the source are copied to bits [191:160] of the
+///    returned vector.
+///    01: Bits [191:160] of the source are copied to bits [191:160] of the
+///    returned vector.
+///    10: Bits [223:192] of the source are copied to bits [191:160] of the
+///    returned vector.
+///    11: Bits [255:224] of the source are copied to bits [191:160] of the
+///    returned vector.
+///    Bits [5:4]:
+///    00: Bits [159:128] of the source are copied to bits [223:192] of the
+///    returned vector.
+///    01: Bits [191:160] of the source are copied to bits [223:192] of the
+///    returned vector.
+///    10: Bits [223:192] of the source are copied to bits [223:192] of the
+///    returned vector.
+///    11: Bits [255:224] of the source are copied to bits [223:192] of the
+///    returned vector.
+///    Bits [7:6]:
+///    00: Bits [159:128] of the source are copied to bits [255:224] of the
+///    returned vector.
+///    01: Bits [191:160] of the source are copied to bits [255:224] of the
+///    returned vector.
+///    10: Bits [223:192] of the source are copied to bits [255:224] of the
+///    returned vector.
+///    11: Bits [255:224] of the source are copied to bits [255:224] of the
+///    returned vector.
+/// \returns A 256-bit vector of [8 x float] containing the copied values.
 #define _mm256_permute_ps(A, C) __extension__ ({ \
   (__m256)__builtin_shufflevector((__v8sf)(__m256)(A), \
-                                  (__v8sf)_mm256_setzero_ps(), \
-                                  (C) & 0x3, ((C) & 0xc) >> 2, \
-                                  ((C) & 0x30) >> 4, ((C) & 0xc0) >> 6, \
-                                  4 + (((C) & 0x03) >> 0), \
-                                  4 + (((C) & 0x0c) >> 2), \
-                                  4 + (((C) & 0x30) >> 4), \
-                                  4 + (((C) & 0xc0) >> 6)); })
-
+                                  (__v8sf)_mm256_undefined_ps(), \
+                                  0 + (((C) >> 0) & 0x3), \
+                                  0 + (((C) >> 2) & 0x3), \
+                                  0 + (((C) >> 4) & 0x3), \
+                                  0 + (((C) >> 6) & 0x3), \
+                                  4 + (((C) >> 0) & 0x3), \
+                                  4 + (((C) >> 2) & 0x3), \
+                                  4 + (((C) >> 4) & 0x3), \
+                                  4 + (((C) >> 6) & 0x3)); })
+
+/// \brief Permutes 128-bit data values stored in two 256-bit vectors of
+///    [4 x double], as specified by the immediate integer operand.
+///
+/// \headerfile <x86intrin.h>
+///
+/// \code
+/// __m256d _mm256_permute2f128_pd(__m256d V1, __m256d V2, const int M);
+/// \endcode
+///
+/// This intrinsic corresponds to the \c VPERM2F128 / PERM2F128 instruction.
+///
+/// \param V1
+///    A 256-bit vector of [4 x double].
+/// \param V2
+///    A 256-bit vector of [4 x double.
+/// \param M
+///    An immediate integer operand specifying how the values are to be
+///    permuted.
+///    Bits [1:0]:
+///    00: Bits [127:0] of operand V1 are copied to bits [127:0] of the
+///    destination.
+///    01: Bits [255:128] of operand V1 are copied to bits [127:0] of the
+///    destination.
+///    10: Bits [127:0] of operand V2 are copied to bits [127:0] of the
+///    destination.
+///    11: Bits [255:128] of operand V2 are copied to bits [127:0] of the
+///    destination.
+///    Bits [5:4]:
+///    00: Bits [127:0] of operand V1 are copied to bits [255:128] of the
+///    destination.
+///    01: Bits [255:128] of operand V1 are copied to bits [255:128] of the
+///    destination.
+///    10: Bits [127:0] of operand V2 are copied to bits [255:128] of the
+///    destination.
+///    11: Bits [255:128] of operand V2 are copied to bits [255:128] of the
+///    destination.
+/// \returns A 256-bit vector of [4 x double] containing the copied values.
 #define _mm256_permute2f128_pd(V1, V2, M) __extension__ ({ \
   (__m256d)__builtin_ia32_vperm2f128_pd256((__v4df)(__m256d)(V1), \
                                            (__v4df)(__m256d)(V2), (M)); })
 
+/// \brief Permutes 128-bit data values stored in two 256-bit vectors of
+///    [8 x float], as specified by the immediate integer operand.
+///
+/// \headerfile <x86intrin.h>
+///
+/// \code
+/// __m256 _mm256_permute2f128_ps(__m256 V1, __m256 V2, const int M);
+/// \endcode
+///
+/// This intrinsic corresponds to the \c VPERM2F128 / PERM2F128 instruction.
+///
+/// \param V1
+///    A 256-bit vector of [8 x float].
+/// \param V2
+///    A 256-bit vector of [8 x float].
+/// \param M
+///    An immediate integer operand specifying how the values are to be
+///    permuted.
+///    Bits [1:0]:
+///    00: Bits [127:0] of operand V1 are copied to bits [127:0] of the
+///    destination.
+///    01: Bits [255:128] of operand V1 are copied to bits [127:0] of the
+///    destination.
+///    10: Bits [127:0] of operand V2 are copied to bits [127:0] of the
+///    destination.
+///    11: Bits [255:128] of operand V2 are copied to bits [127:0] of the
+///    destination.
+///    Bits [5:4]:
+///    00: Bits [127:0] of operand V1 are copied to bits [255:128] of the
+///    destination.
+///    01: Bits [255:128] of operand V1 are copied to bits [255:128] of the
+///    destination.
+///    10: Bits [127:0] of operand V2 are copied to bits [255:128] of the
+///    destination.
+///    11: Bits [255:128] of operand V2 are copied to bits [255:128] of the
+///    destination.
+/// \returns A 256-bit vector of [8 x float] containing the copied values.
 #define _mm256_permute2f128_ps(V1, V2, M) __extension__ ({ \
   (__m256)__builtin_ia32_vperm2f128_ps256((__v8sf)(__m256)(V1), \
                                           (__v8sf)(__m256)(V2), (M)); })
 
+/// \brief Permutes 128-bit data values stored in two 256-bit integer vectors,
+///    as specified by the immediate integer operand.
+///
+/// \headerfile <x86intrin.h>
+///
+/// \code
+/// __m256i _mm256_permute2f128_si256(__m256i V1, __m256i V2, const int M);
+/// \endcode
+///
+/// This intrinsic corresponds to the \c VPERM2F128 / PERM2F128 instruction.
+///
+/// \param V1
+///    A 256-bit integer vector.
+/// \param V2
+///    A 256-bit integer vector.
+/// \param M
+///    An immediate integer operand specifying how the values are to be copied.
+///    Bits [1:0]:
+///    00: Bits [127:0] of operand V1 are copied to bits [127:0] of the
+///    destination.
+///    01: Bits [255:128] of operand V1 are copied to bits [127:0] of the
+///    destination.
+///    10: Bits [127:0] of operand V2 are copied to bits [127:0] of the
+///    destination.
+///    11: Bits [255:128] of operand V2 are copied to bits [127:0] of the
+///    destination.
+///    Bits [5:4]:
+///    00: Bits [127:0] of operand V1 are copied to bits [255:128] of the
+///    destination.
+///    01: Bits [255:128] of operand V1 are copied to bits [255:128] of the
+///    destination.
+///    10: Bits [127:0] of operand V2 are copied to bits [255:128] of the
+///    destination.
+///    11: Bits [255:128] of operand V2 are copied to bits [255:128] of the
+///    destination.
+/// \returns A 256-bit integer vector containing the copied values.
 #define _mm256_permute2f128_si256(V1, V2, M) __extension__ ({ \
   (__m256i)__builtin_ia32_vperm2f128_si256((__v8si)(__m256i)(V1), \
                                            (__v8si)(__m256i)(V2), (M)); })
 
 /* Vector Blend */
+/// \brief Merges 64-bit double-precision data values stored in either of the
+///    two 256-bit vectors of [4 x double], as specified by the immediate
+///    integer operand.
+///
+/// \headerfile <x86intrin.h>
+///
+/// \code
+/// __m256d _mm256_blend_pd(__m256d V1, __m256d V2, const int M);
+/// \endcode
+///
+/// This intrinsic corresponds to the \c VBLENDPD / BLENDPD instruction.
+///
+/// \param V1
+///    A 256-bit vector of [4 x double].
+/// \param V2
+///    A 256-bit vector of [4 x double].
+/// \param M
+///    An immediate integer operand, with mask bits [3:0] specifying how the
+///    values are to be copied. The position of the mask bit corresponds to the
+///    index of a copied value. When a mask bit is 0, the corresponding 64-bit
+///    element in operand V1 is copied to the same position in the destination.
+///    When a mask bit is 1, the corresponding 64-bit element in operand V2 is
+///    copied to the same position in the destination.
+/// \returns A 256-bit vector of [4 x double] containing the copied values.
 #define _mm256_blend_pd(V1, V2, M) __extension__ ({ \
   (__m256d)__builtin_shufflevector((__v4df)(__m256d)(V1), \
                                    (__v4df)(__m256d)(V2), \
@@ -314,6 +1359,30 @@ _mm256_permutevar_ps(__m256 __a, __m256i __c)
                                    (((M) & 0x04) ? 6 : 2), \
                                    (((M) & 0x08) ? 7 : 3)); })
 
+/// \brief Merges 32-bit single-precision data values stored in either of the
+///    two 256-bit vectors of [8 x float], as specified by the immediate
+///    integer operand.
+///
+/// \headerfile <x86intrin.h>
+///
+/// \code
+/// __m256 _mm256_blend_ps(__m256 V1, __m256 V2, const int M);
+/// \endcode
+///
+/// This intrinsic corresponds to the \c VBLENDPS / BLENDPS instruction.
+///
+/// \param V1
+///    A 256-bit vector of [8 x float].
+/// \param V2
+///    A 256-bit vector of [8 x float].
+/// \param M
+///    An immediate integer operand, with mask bits [7:0] specifying how the
+///    values are to be copied. The position of the mask bit corresponds to the
+///    index of a copied value. When a mask bit is 0, the corresponding 32-bit
+///    element in operand V1 is copied to the same position in the destination.
+///    When a mask bit is 1, the corresponding 32-bit element in operand V2 is
+///    copied to the same position in the destination.
+/// \returns A 256-bit vector of [8 x float] containing the copied values.
 #define _mm256_blend_ps(V1, V2, M) __extension__ ({ \
   (__m256)__builtin_shufflevector((__v8sf)(__m256)(V1), \
                                   (__v8sf)(__m256)(V2), \
@@ -326,6 +1395,27 @@ _mm256_permutevar_ps(__m256 __a, __m256i __c)
                                   (((M) & 0x40) ? 14 : 6), \
                                   (((M) & 0x80) ? 15 : 7)); })
 
+/// \brief Merges 64-bit double-precision data values stored in either of the
+///    two 256-bit vectors of [4 x double], as specified by the 256-bit vector
+///    operand.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VBLENDVPD / BLENDVPD instruction.
+///
+/// \param __a
+///    A 256-bit vector of [4 x double].
+/// \param __b
+///    A 256-bit vector of [4 x double].
+/// \param __c
+///    A 256-bit vector operand, with mask bits 255, 191, 127, and 63 specifying
+///    how the values are to be copied. The position of the mask bit corresponds
+///    to the most significant bit of a copied value. When a mask bit is 0, the
+///    corresponding 64-bit element in operand __a is copied to the same
+///    position in the destination. When a mask bit is 1, the corresponding
+///    64-bit element in operand __b is copied to the same position in the
+///    destination.
+/// \returns A 256-bit vector of [4 x double] containing the copied values.
 static __inline __m256d __DEFAULT_FN_ATTRS
 _mm256_blendv_pd(__m256d __a, __m256d __b, __m256d __c)
 {
@@ -333,6 +1423,27 @@ _mm256_blendv_pd(__m256d __a, __m256d __b, __m256d __c)
     (__v4df)__a, (__v4df)__b, (__v4df)__c);
 }
 
+/// \brief Merges 32-bit single-precision data values stored in either of the
+///    two 256-bit vectors of [8 x float], as specified by the 256-bit vector
+///    operand.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VBLENDVPS / BLENDVPS instruction.
+///
+/// \param __a
+///    A 256-bit vector of [8 x float].
+/// \param __b
+///    A 256-bit vector of [8 x float].
+/// \param __c
+///    A 256-bit vector operand, with mask bits 255, 223, 191, 159, 127, 95, 63,
+///    and 31 specifying how the values are to be copied. The position of the
+///    mask bit corresponds to the most significant bit of a copied value. When
+///    a mask bit is 0, the corresponding 32-bit element in operand __a is
+///    copied to the same position in the destination. When a mask bit is 1, the
+///    corresponding 32-bit element in operand __b is copied to the same
+///    position in the destination.
+/// \returns A 256-bit vector of [8 x float] containing the copied values.
 static __inline __m256 __DEFAULT_FN_ATTRS
 _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c)
 {
@@ -341,30 +1452,154 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c)
 }
 
 /* Vector Dot Product */
+/// \brief Computes two dot products in parallel, using the lower and upper
+///    halves of two [8 x float] vectors as input to the two computations, and
+///    returning the two dot products in the lower and upper halves of the
+///    [8 x float] result. The immediate integer operand controls which
+///    input elements will contribute to the dot product, and where the final
+///    results are returned. In general, for each dot product, the four
+///    corresponding elements of the input vectors are multiplied; the first
+///    two and second two products are summed, then the two sums are added to
+///    form the final result.
+///
+/// \headerfile <x86intrin.h>
+///
+/// \code
+/// __m256 _mm256_dp_ps(__m256 V1, __m256 V2, const int M);
+/// \endcode
+///
+/// This intrinsic corresponds to the \c VDPPS / DPPS instruction.
+///
+/// \param V1
+///    A vector of [8 x float] values, treated as two [4 x float] vectors.
+/// \param V2
+///    A vector of [8 x float] values, treated as two [4 x float] vectors.
+/// \param M
+///    An immediate integer argument. Bits [7:4] determine which elements of
+///    the input vectors are used, with bit [4] corresponding to the lowest
+///    element and bit [7] corresponding to the highest element of each [4 x
+///    float] subvector. If a bit is set, the corresponding elements from the
+///    two input vectors are used as an input for dot product; otherwise that
+///    input is treated as zero. Bits [3:0] determine which elements of the
+///    result will receive a copy of the final dot product, with bit [0]
+///    corresponding to the lowest element and bit [3] corresponding to the
+///    highest element of each [4 x float] subvector. If a bit is set, the dot
+///    product is returned in the corresponding element; otherwise that element
+///    is set to zero. The bitmask is applied in the same way to each of the
+///    two parallel dot product computations.
+/// \returns A 256-bit vector of [8 x float] containing the two dot products.
 #define _mm256_dp_ps(V1, V2, M) __extension__ ({ \
   (__m256)__builtin_ia32_dpps256((__v8sf)(__m256)(V1), \
                                  (__v8sf)(__m256)(V2), (M)); })
 
 /* Vector shuffle */
+/// \brief Selects 8 float values from the 256-bit operands of [8 x float], as
+///    specified by the immediate value operand. The four selected elements in
+///    each operand are copied to the destination according to the bits
+///    specified in the immediate operand. The selected elements from the first
+///    256-bit operand are copied to bits [63:0] and bits [191:128] of the
+///    destination, and the selected elements from the second 256-bit operand
+///    are copied to bits [127:64] and bits [255:192] of the destination. For
+///    example, if bits [7:0] of the immediate operand contain a value of 0xFF,
+///    the 256-bit destination vector would contain the following values: b[7],
+///    b[7], a[7], a[7], b[3], b[3], a[3], a[3].
+///
+/// \headerfile <x86intrin.h>
+///
+/// \code
+/// __m256 _mm256_shuffle_ps(__m256 a, __m256 b, const int mask);
+/// \endcode
+///
+/// This intrinsic corresponds to the \c VSHUFPS / SHUFPS instruction.
+///
+/// \param a
+///    A 256-bit vector of [8 x float]. The four selected elements in this
+///    operand are copied to bits [63:0] and bits [191:128] in the destination,
+///    according to the bits specified in the immediate operand.
+/// \param b
+///    A 256-bit vector of [8 x float]. The four selected elements in this
+///    operand are copied to bits [127:64] and bits [255:192] in the
+///    destination, according to the bits specified in the immediate operand.
+/// \param mask
+///    An immediate value containing an 8-bit value specifying which elements to
+///    copy from a and b. Bits [3:0] specify the values copied from operand a.
+///    Bits [7:4] specify the values copied from operand b.
+///    The destinations within the 256-bit destination are assigned values as
+///    follows, according to the bit value assignments described below:
+///    Bits [1:0] are used to assign values to bits [31:0] and [159:128] in the
+///    destination.
+///    Bits [3:2] are used to assign values to bits [63:32] and [191:160] in the
+///    destination.
+///    Bits [5:4] are used to assign values to bits [95:64] and [223:192] in the
+///    destination.
+///    Bits [7:6] are used to assign values to bits [127:96] and [255:224] in
+///    the destination.
+///    Bit value assignments:
+///    00: Bits [31:0] and [159:128] are copied from the selected operand.
+///    01: Bits [63:32] and [191:160] are copied from the selected operand.
+///    10: Bits [95:64] and [223:192] are copied from the selected operand.
+///    11: Bits [127:96] and [255:224] are copied from the selected operand.
+/// \returns A 256-bit vector of [8 x float] containing the shuffled values.
 #define _mm256_shuffle_ps(a, b, mask) __extension__ ({ \
-        (__m256)__builtin_shufflevector((__v8sf)(__m256)(a), \
-                                        (__v8sf)(__m256)(b), \
-                                        (mask) & 0x3, \
-                                        ((mask) & 0xc) >> 2, \
-                                        (((mask) & 0x30) >> 4) + 8, \
-                                        (((mask) & 0xc0) >> 6) + 8, \
-                                        ((mask) & 0x3) + 4, \
-                                        (((mask) & 0xc) >> 2) + 4, \
-                                        (((mask) & 0x30) >> 4) + 12, \
-                                        (((mask) & 0xc0) >> 6) + 12); })
-
+  (__m256)__builtin_shufflevector((__v8sf)(__m256)(a), \
+                                  (__v8sf)(__m256)(b), \
+                                  0  + (((mask) >> 0) & 0x3), \
+                                  0  + (((mask) >> 2) & 0x3), \
+                                  8  + (((mask) >> 4) & 0x3), \
+                                  8  + (((mask) >> 6) & 0x3), \
+                                  4  + (((mask) >> 0) & 0x3), \
+                                  4  + (((mask) >> 2) & 0x3), \
+                                  12 + (((mask) >> 4) & 0x3), \
+                                  12 + (((mask) >> 6) & 0x3)); })
+
+/// \brief Selects four double-precision values from the 256-bit operands of
+///    [4 x double], as specified by the immediate value operand. The selected
+///    elements from the first 256-bit operand are copied to bits [63:0] and
+///    bits [191:128] in the destination, and the selected elements from the
+///    second 256-bit operand are copied to bits [127:64] and bits [255:192] in
+///    the destination. For example, if bits [3:0] of the immediate operand
+///    contain a value of 0xF, the 256-bit destination vector would contain the
+///    following values: b[3], a[3], b[1], a[1].
+///
+/// \headerfile <x86intrin.h>
+///
+/// \code
+/// __m256d _mm256_shuffle_pd(__m256d a, __m256d b, const int mask);
+/// \endcode
+///
+/// This intrinsic corresponds to the \c VSHUFPD / SHUFPD instruction.
+///
+/// \param a
+///    A 256-bit vector of [4 x double].
+/// \param b
+///    A 256-bit vector of [4 x double].
+/// \param mask
+///    An immediate value containing 8-bit values specifying which elements to
+///    copy from a and b:
+///    Bit [0]=0: Bits [63:0] are copied from a to bits [63:0] of the
+///    destination.
+///    Bit [0]=1: Bits [127:64] are copied from a to bits [63:0] of the
+///    destination.
+///    Bit [1]=0: Bits [63:0] are copied from b to bits [127:64] of the
+///    destination.
+///    Bit [1]=1: Bits [127:64] are copied from b to bits [127:64] of the
+///    destination.
+///    Bit [2]=0: Bits [191:128] are copied from a to bits [191:128] of the
+///    destination.
+///    Bit [2]=1: Bits [255:192] are copied from a to bits [191:128] of the
+///    destination.
+///    Bit [3]=0: Bits [191:128] are copied from b to bits [255:192] of the
+///    destination.
+///    Bit [3]=1: Bits [255:192] are copied from b to bits [255:192] of the
+///    destination.
+/// \returns A 256-bit vector of [4 x double] containing the shuffled values.
 #define _mm256_shuffle_pd(a, b, mask) __extension__ ({ \
-        (__m256d)__builtin_shufflevector((__v4df)(__m256d)(a), \
-                                         (__v4df)(__m256d)(b), \
-                                         (mask) & 0x1, \
-                                         (((mask) & 0x2) >> 1) + 4, \
-                                         (((mask) & 0x4) >> 2) + 2, \
-                                         (((mask) & 0x8) >> 3) + 6); })
+  (__m256d)__builtin_shufflevector((__v4df)(__m256d)(a), \
+                                   (__v4df)(__m256d)(b), \
+                                   0 + (((mask) >> 0) & 0x1), \
+                                   4 + (((mask) >> 1) & 0x1), \
+                                   2 + (((mask) >> 2) & 0x1), \
+                                   6 + (((mask) >> 3) & 0x1)); })
 
 /* Compare */
 #define _CMP_EQ_OQ    0x00 /* Equal (ordered, non-signaling)  */
@@ -400,30 +1635,235 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c)
 #define _CMP_GT_OQ    0x1e /* Greater-than (ordered, non-signaling)  */
 #define _CMP_TRUE_US  0x1f /* True (unordered, signaling)  */
 
+/// \brief Compares each of the corresponding double-precision values of two
+///    128-bit vectors of [2 x double], using the operation specified by the
+///    immediate integer operand. Returns a [2 x double] vector consisting of
+///    two doubles corresponding to the two comparison results: zero if the
+///    comparison is false, and all 1's if the comparison is true.
+///
+/// \headerfile <x86intrin.h>
+///
+/// \code
+/// __m128d _mm_cmp_pd(__m128d a, __m128d b, const int c);
+/// \endcode
+///
+/// This intrinsic corresponds to the \c VCMPPD / CMPPD instruction.
+///
+/// \param a
+///    A 128-bit vector of [2 x double].
+/// \param b
+///    A 128-bit vector of [2 x double].
+/// \param c
+///    An immediate integer operand, with bits [4:0] specifying which comparison
+///    operation to use:
+///    00h, 08h, 10h, 18h: Equal
+///    01h, 09h, 11h, 19h: Less than
+///    02h, 0Ah, 12h, 1Ah: Less than or equal / Greater than or equal (swapped
+///                        operands)
+///    03h, 0Bh, 13h, 1Bh: Unordered
+///    04h, 0Ch, 14h, 1Ch: Not equal
+///    05h, 0Dh, 15h, 1Dh: Not less than / Not greater than (swapped operands)
+///    06h, 0Eh, 16h, 1Eh: Not less than or equal / Not greater than or equal
+///                        (swapped operands)
+///    07h, 0Fh, 17h, 1Fh: Ordered
+/// \returns A 128-bit vector of [2 x double] containing the comparison results.
 #define _mm_cmp_pd(a, b, c) __extension__ ({ \
   (__m128d)__builtin_ia32_cmppd((__v2df)(__m128d)(a), \
                                 (__v2df)(__m128d)(b), (c)); })
 
+/// \brief Compares each of the corresponding values of two 128-bit vectors of
+///    [4 x float], using the operation specified by the immediate integer
+///    operand. Returns a [4 x float] vector consisting of four floats
+///    corresponding to the four comparison results: zero if the comparison is
+///    false, and all 1's if the comparison is true.
+///
+/// \headerfile <x86intrin.h>
+///
+/// \code
+/// __m128 _mm_cmp_ps(__m128 a, __m128 b, const int c);
+/// \endcode
+///
+/// This intrinsic corresponds to the \c VCMPPS / CMPPS instruction.
+///
+/// \param a
+///    A 128-bit vector of [4 x float].
+/// \param b
+///    A 128-bit vector of [4 x float].
+/// \param c
+///    An immediate integer operand, with bits [4:0] specifying which comparison
+///    operation to use:
+///    00h, 08h, 10h, 18h: Equal
+///    01h, 09h, 11h, 19h: Less than
+///    02h, 0Ah, 12h, 1Ah: Less than or equal / Greater than or equal (swapped
+///                        operands)
+///    03h, 0Bh, 13h, 1Bh: Unordered
+///    04h, 0Ch, 14h, 1Ch: Not equal
+///    05h, 0Dh, 15h, 1Dh: Not less than / Not greater than (swapped operands)
+///    06h, 0Eh, 16h, 1Eh: Not less than or equal / Not greater than or equal
+///                       (swapped operands)
+///    07h, 0Fh, 17h, 1Fh: Ordered
+/// \returns A 128-bit vector of [4 x float] containing the comparison results.
 #define _mm_cmp_ps(a, b, c) __extension__ ({ \
   (__m128)__builtin_ia32_cmpps((__v4sf)(__m128)(a), \
                                (__v4sf)(__m128)(b), (c)); })
 
+/// \brief Compares each of the corresponding double-precision values of two
+///    256-bit vectors of [4 x double], using the operation specified by the
+///    immediate integer operand. Returns a [4 x double] vector consisting of
+///    four doubles corresponding to the four comparison results: zero if the
+///    comparison is false, and all 1's if the comparison is true.
+///
+/// \headerfile <x86intrin.h>
+///
+/// \code
+/// __m256d _mm256_cmp_pd(__m256d a, __m256d b, const int c);
+/// \endcode
+///
+/// This intrinsic corresponds to the \c VCMPPD / CMPPD instruction.
+///
+/// \param a
+///    A 256-bit vector of [4 x double].
+/// \param b
+///    A 256-bit vector of [4 x double].
+/// \param c
+///    An immediate integer operand, with bits [4:0] specifying which comparison
+///    operation to use:
+///    00h, 08h, 10h, 18h: Equal
+///    01h, 09h, 11h, 19h: Less than
+///    02h, 0Ah, 12h, 1Ah: Less than or equal / Greater than or equal (swapped
+///                        operands)
+///    03h, 0Bh, 13h, 1Bh: Unordered
+///    04h, 0Ch, 14h, 1Ch: Not equal
+///    05h, 0Dh, 15h, 1Dh: Not less than / Not greater than (swapped operands)
+///    06h, 0Eh, 16h, 1Eh: Not less than or equal / Not greater than or equal
+///                        (swapped operands)
+///    07h, 0Fh, 17h, 1Fh: Ordered
+/// \returns A 256-bit vector of [4 x double] containing the comparison results.
 #define _mm256_cmp_pd(a, b, c) __extension__ ({ \
   (__m256d)__builtin_ia32_cmppd256((__v4df)(__m256d)(a), \
                                    (__v4df)(__m256d)(b), (c)); })
 
+/// \brief Compares each of the corresponding values of two 256-bit vectors of
+///    [8 x float], using the operation specified by the immediate integer
+///    operand. Returns a [8 x float] vector consisting of eight floats
+///    corresponding to the eight comparison results: zero if the comparison is
+///    false, and all 1's if the comparison is true.
+///
+/// \headerfile <x86intrin.h>
+///
+/// \code
+/// __m256 _mm256_cmp_ps(__m256 a, __m256 b, const int c);
+/// \endcode
+///
+/// This intrinsic corresponds to the \c VCMPPS / CMPPS instruction.
+///
+/// \param a
+///    A 256-bit vector of [8 x float].
+/// \param b
+///    A 256-bit vector of [8 x float].
+/// \param c
+///    An immediate integer operand, with bits [4:0] specifying which comparison
+///    operation to use:
+///    00h, 08h, 10h, 18h: Equal
+///    01h, 09h, 11h, 19h: Less than
+///    02h, 0Ah, 12h, 1Ah: Less than or equal / Greater than or equal (swapped
+///                        operands)
+///    03h, 0Bh, 13h, 1Bh: Unordered
+///    04h, 0Ch, 14h, 1Ch: Not equal
+///    05h, 0Dh, 15h, 1Dh: Not less than / Not greater than (swapped operands)
+///    06h, 0Eh, 16h, 1Eh: Not less than or equal / Not greater than or equal
+///                       (swapped operands)
+///    07h, 0Fh, 17h, 1Fh: Ordered
+/// \returns A 256-bit vector of [8 x float] containing the comparison results.
 #define _mm256_cmp_ps(a, b, c) __extension__ ({ \
   (__m256)__builtin_ia32_cmpps256((__v8sf)(__m256)(a), \
                                   (__v8sf)(__m256)(b), (c)); })
 
+/// \brief Compares each of the corresponding scalar double-precision values of
+///    two 128-bit vectors of [2 x double], using the operation specified by the
+///    immediate integer operand. If the result is true, all 64 bits of the
+///    destination vector are set; otherwise they are cleared.
+///
+/// \headerfile <x86intrin.h>
+///
+/// \code
+/// __m128d _mm_cmp_sd(__m128d a, __m128d b, const int c);
+/// \endcode
+///
+/// This intrinsic corresponds to the \c VCMPSD / CMPSD instruction.
+///
+/// \param a
+///    A 128-bit vector of [2 x double].
+/// \param b
+///    A 128-bit vector of [2 x double].
+/// \param c
+///    An immediate integer operand, with bits [4:0] specifying which comparison
+///    operation to use:
+///    00h, 08h, 10h, 18h: Equal
+///    01h, 09h, 11h, 19h: Less than
+///    02h, 0Ah, 12h, 1Ah: Less than or equal / Greater than or equal (swapped
+///                        operands)
+///    03h, 0Bh, 13h, 1Bh: Unordered
+///    04h, 0Ch, 14h, 1Ch: Not equal
+///    05h, 0Dh, 15h, 1Dh: Not less than / Not greater than (swapped operands)
+///    06h, 0Eh, 16h, 1Eh: Not less than or equal / Not greater than or equal
+///                       (swapped operands)
+///    07h, 0Fh, 17h, 1Fh: Ordered
+/// \returns A 128-bit vector of [2 x double] containing the comparison results.
 #define _mm_cmp_sd(a, b, c) __extension__ ({ \
   (__m128d)__builtin_ia32_cmpsd((__v2df)(__m128d)(a), \
                                 (__v2df)(__m128d)(b), (c)); })
 
+/// \brief Compares each of the corresponding scalar values of two 128-bit
+///    vectors of [4 x float], using the operation specified by the immediate
+///    integer operand. If the result is true, all 32 bits of the destination
+///    vector are set; otherwise they are cleared.
+///
+/// \headerfile <x86intrin.h>
+///
+/// \code
+/// __m128 _mm_cmp_ss(__m128 a, __m128 b, const int c);
+/// \endcode
+///
+/// This intrinsic corresponds to the \c VCMPSS / CMPSS instruction.
+///
+/// \param a
+///    A 128-bit vector of [4 x float].
+/// \param b
+///    A 128-bit vector of [4 x float].
+/// \param c
+///    An immediate integer operand, with bits [4:0] specifying which comparison
+///    operation to use:
+///    00h, 08h, 10h, 18h: Equal
+///    01h, 09h, 11h, 19h: Less than
+///    02h, 0Ah, 12h, 1Ah: Less than or equal / Greater than or equal (swapped
+///                        operands)
+///    03h, 0Bh, 13h, 1Bh: Unordered
+///    04h, 0Ch, 14h, 1Ch: Not equal
+///    05h, 0Dh, 15h, 1Dh: Not less than / Not greater than (swapped operands)
+///    06h, 0Eh, 16h, 1Eh: Not less than or equal / Not greater than or equal
+///                       (swapped operands)
+///    07h, 0Fh, 17h, 1Fh: Ordered
+/// \returns A 128-bit vector of [4 x float] containing the comparison results.
 #define _mm_cmp_ss(a, b, c) __extension__ ({ \
   (__m128)__builtin_ia32_cmpss((__v4sf)(__m128)(a), \
                                (__v4sf)(__m128)(b), (c)); })
 
+/// \brief Takes a [8 x i32] vector and returns the vector element value
+///    indexed by the immediate constant operand.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VEXTRACTF128+COMPOSITE /
+///   EXTRACTF128+COMPOSITE instruction.
+///
+/// \param __a
+///    A 256-bit vector of [8 x i32].
+/// \param __imm
+///    An immediate integer operand with bits [2:0] determining which vector
+///    element is extracted and returned.
+/// \returns A 32-bit integer containing the extracted 32 bits of extended
+///    packed data.
 static __inline int __DEFAULT_FN_ATTRS
 _mm256_extract_epi32(__m256i __a, const int __imm)
 {
@@ -431,21 +1871,66 @@ _mm256_extract_epi32(__m256i __a, const int __imm)
   return __b[__imm & 7];
 }
 
+/// \brief Takes a [16 x i16] vector and returns the vector element value
+///    indexed by the immediate constant operand.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VEXTRACTF128+COMPOSITE /
+///    EXTRACTF128+COMPOSITE instruction.
+///
+/// \param __a
+///    A 256-bit integer vector of [16 x i16].
+/// \param __imm
+///    An immediate integer operand with bits [3:0] determining which vector
+///    element is extracted and returned.
+/// \returns A 32-bit integer containing the extracted 16 bits of zero extended
+///    packed data.
 static __inline int __DEFAULT_FN_ATTRS
 _mm256_extract_epi16(__m256i __a, const int __imm)
 {
   __v16hi __b = (__v16hi)__a;
-  return __b[__imm & 15];
-}
-
+  return (unsigned short)__b[__imm & 15];
+}
+
+/// \brief Takes a [32 x i8] vector and returns the vector element value
+///    indexed by the immediate constant operand.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VEXTRACTF128+COMPOSITE /
+///    EXTRACTF128+COMPOSITE instruction.
+///
+/// \param __a
+///    A 256-bit integer vector of [32 x i8].
+/// \param __imm
+///    An immediate integer operand with bits [4:0] determining which vector
+///    element is extracted and returned.
+/// \returns A 32-bit integer containing the extracted 8 bits of zero extended
+///    packed data.
 static __inline int __DEFAULT_FN_ATTRS
 _mm256_extract_epi8(__m256i __a, const int __imm)
 {
   __v32qi __b = (__v32qi)__a;
-  return __b[__imm & 31];
+  return (unsigned char)__b[__imm & 31];
 }
 
 #ifdef __x86_64__
+/// \brief Takes a [4 x i64] vector and returns the vector element value
+///    indexed by the immediate constant operand.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VEXTRACTF128+COMPOSITE /
+///    EXTRACTF128+COMPOSITE instruction.
+///
+/// \param __a
+///    A 256-bit integer vector of [4 x i64].
+/// \param __imm
+///    An immediate integer operand with bits [1:0] determining which vector
+///    element is extracted and returned.
+/// \returns A 64-bit integer containing the extracted 64 bits of extended
+///    packed data.
 static __inline long long  __DEFAULT_FN_ATTRS
 _mm256_extract_epi64(__m256i __a, const int __imm)
 {
@@ -454,6 +1939,24 @@ _mm256_extract_epi64(__m256i __a, const int __imm)
 }
 #endif
 
+/// \brief Takes a [8 x i32] vector and replaces the vector element value
+///    indexed by the immediate constant operand by a new value. Returns the
+///    modified vector.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VINSERTF128+COMPOSITE /
+///    INSERTF128+COMPOSITE instruction.
+///
+/// \param __a
+///    A vector of [8 x i32] to be used by the insert operation.
+/// \param __b
+///    An integer value. The replacement value for the insert operation.
+/// \param __imm
+///    An immediate integer specifying the index of the vector element to be
+///    replaced.
+/// \returns A copy of vector __a, after replacing its element indexed by __imm
+///     with __b.
 static __inline __m256i __DEFAULT_FN_ATTRS
 _mm256_insert_epi32(__m256i __a, int __b, int const __imm)
 {
@@ -462,6 +1965,25 @@ _mm256_insert_epi32(__m256i __a, int __b, int const __imm)
   return (__m256i)__c;
 }
 
+
+/// \brief Takes a [16 x i16] vector and replaces the vector element value
+///    indexed by the immediate constant operand with a new value. Returns the
+///    modified vector.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VINSERTF128+COMPOSITE /
+///    INSERTF128+COMPOSITE instruction.
+///
+/// \param __a
+///    A vector of [16 x i16] to be used by the insert operation.
+/// \param __b
+///    An i16 integer value. The replacement value for the insert operation.
+/// \param __imm
+///    An immediate integer specifying the index of the vector element to be
+///    replaced.
+/// \returns A copy of vector __a, after replacing its element indexed by __imm
+///     with __b.
 static __inline __m256i __DEFAULT_FN_ATTRS
 _mm256_insert_epi16(__m256i __a, int __b, int const __imm)
 {
@@ -470,6 +1992,24 @@ _mm256_insert_epi16(__m256i __a, int __b, int const __imm)
   return (__m256i)__c;
 }
 
+/// \brief Takes a [32 x i8] vector and replaces the vector element value
+///    indexed by the immediate constant operand with a new value. Returns the
+///    modified vector.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VINSERTF128+COMPOSITE /
+///    INSERTF128+COMPOSITE instruction.
+///
+/// \param __a
+///    A vector of [32 x i8] to be used by the insert operation.
+/// \param __b
+///    An i8 integer value. The replacement value for the insert operation.
+/// \param __imm
+///    An immediate integer specifying the index of the vector element to be
+///    replaced.
+/// \returns A copy of vector __a, after replacing its element indexed by __imm
+///    with __b.
 static __inline __m256i __DEFAULT_FN_ATTRS
 _mm256_insert_epi8(__m256i __a, int __b, int const __imm)
 {
@@ -479,6 +2019,24 @@ _mm256_insert_epi8(__m256i __a, int __b, int const __imm)
 }
 
 #ifdef __x86_64__
+/// \brief Takes a [4 x i64] vector and replaces the vector element value
+///    indexed by the immediate constant operand with a new value. Returns the
+///    modified vector.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VINSERTF128+COMPOSITE /
+///    INSERTF128+COMPOSITE instruction.
+///
+/// \param __a
+///    A vector of [4 x i64] to be used by the insert operation.
+/// \param __b
+///    A 64-bit integer value. The replacement value for the insert operation.
+/// \param __imm
+///    An immediate integer specifying the index of the vector element to be
+///    replaced.
+/// \returns A copy of vector __a, after replacing its element indexed by __imm
+///     with __b.
 static __inline __m256i __DEFAULT_FN_ATTRS
 _mm256_insert_epi64(__m256i __a, long long __b, int const __imm)
 {
@@ -489,24 +2047,61 @@ _mm256_insert_epi64(__m256i __a, long long __b, int const __imm)
 #endif
 
 /* Conversion */
+/// \brief Converts a vector of [4 x i32] into a vector of [4 x double].
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VCVTDQ2PD / CVTDQ2PD instruction.
+///
+/// \param __a
+///    A 128-bit integer vector of [4 x i32].
+/// \returns A 256-bit vector of [4 x double] containing the converted values.
 static __inline __m256d __DEFAULT_FN_ATTRS
 _mm256_cvtepi32_pd(__m128i __a)
 {
-  return (__m256d)__builtin_ia32_cvtdq2pd256((__v4si) __a);
+  return (__m256d)__builtin_convertvector((__v4si)__a, __v4df);
 }
 
+/// \brief Converts a vector of [8 x i32] into a vector of [8 x float].
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VCVTDQ2PS / CVTDQ2PS instruction.
+///
+/// \param __a
+///    A 256-bit integer vector.
+/// \returns A 256-bit vector of [8 x float] containing the converted values.
 static __inline __m256 __DEFAULT_FN_ATTRS
 _mm256_cvtepi32_ps(__m256i __a)
 {
   return (__m256)__builtin_ia32_cvtdq2ps256((__v8si) __a);
 }
 
+/// \brief Converts a 256-bit vector of [4 x double] into a 128-bit vector of
+///    [4 x float].
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VCVTPD2PS / CVTPD2PS instruction.
+///
+/// \param __a
+///    A 256-bit vector of [4 x double].
+/// \returns A 128-bit vector of [4 x float] containing the converted values.
 static __inline __m128 __DEFAULT_FN_ATTRS
 _mm256_cvtpd_ps(__m256d __a)
 {
   return (__m128)__builtin_ia32_cvtpd2ps256((__v4df) __a);
 }
 
+/// \brief Converts a vector of [8 x float] into a vector of [8 x i32].
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VCVTPS2DQ / CVTPS2DQ instruction.
+///
+/// \param __a
+///    A 256-bit vector of [8 x float].
+/// \returns A 256-bit integer vector containing the converted values.
 static __inline __m256i __DEFAULT_FN_ATTRS
 _mm256_cvtps_epi32(__m256 __a)
 {
@@ -516,7 +2111,7 @@ _mm256_cvtps_epi32(__m256 __a)
 static __inline __m256d __DEFAULT_FN_ATTRS
 _mm256_cvtps_pd(__m128 __a)
 {
-  return (__m256d)__builtin_ia32_cvtps2pd256((__v4sf) __a);
+  return (__m256d)__builtin_convertvector((__v4sf)__a, __v4df);
 }
 
 static __inline __m128i __DEFAULT_FN_ATTRS
@@ -537,48 +2132,67 @@ _mm256_cvttps_epi32(__m256 __a)
   return (__m256i)__builtin_ia32_cvttps2dq256((__v8sf) __a);
 }
 
+static __inline double __DEFAULT_FN_ATTRS
+_mm256_cvtsd_f64(__m256d __a)
+{
+ return __a[0];
+}
+
+static __inline int __DEFAULT_FN_ATTRS
+_mm256_cvtsi256_si32(__m256i __a)
+{
+ __v8si __b = (__v8si)__a;
+ return __b[0];
+}
+
+static __inline float __DEFAULT_FN_ATTRS
+_mm256_cvtss_f32(__m256 __a)
+{
+ return __a[0];
+}
+
 /* Vector replicate */
 static __inline __m256 __DEFAULT_FN_ATTRS
 _mm256_movehdup_ps(__m256 __a)
 {
-  return __builtin_shufflevector(__a, __a, 1, 1, 3, 3, 5, 5, 7, 7);
+  return __builtin_shufflevector((__v8sf)__a, (__v8sf)__a, 1, 1, 3, 3, 5, 5, 7, 7);
 }
 
 static __inline __m256 __DEFAULT_FN_ATTRS
 _mm256_moveldup_ps(__m256 __a)
 {
-  return __builtin_shufflevector(__a, __a, 0, 0, 2, 2, 4, 4, 6, 6);
+  return __builtin_shufflevector((__v8sf)__a, (__v8sf)__a, 0, 0, 2, 2, 4, 4, 6, 6);
 }
 
 static __inline __m256d __DEFAULT_FN_ATTRS
 _mm256_movedup_pd(__m256d __a)
 {
-  return __builtin_shufflevector(__a, __a, 0, 0, 2, 2);
+  return __builtin_shufflevector((__v4df)__a, (__v4df)__a, 0, 0, 2, 2);
 }
 
 /* Unpack and Interleave */
 static __inline __m256d __DEFAULT_FN_ATTRS
 _mm256_unpackhi_pd(__m256d __a, __m256d __b)
 {
-  return __builtin_shufflevector(__a, __b, 1, 5, 1+2, 5+2);
+  return __builtin_shufflevector((__v4df)__a, (__v4df)__b, 1, 5, 1+2, 5+2);
 }
 
 static __inline __m256d __DEFAULT_FN_ATTRS
 _mm256_unpacklo_pd(__m256d __a, __m256d __b)
 {
-  return __builtin_shufflevector(__a, __b, 0, 4, 0+2, 4+2);
+  return __builtin_shufflevector((__v4df)__a, (__v4df)__b, 0, 4, 0+2, 4+2);
 }
 
 static __inline __m256 __DEFAULT_FN_ATTRS
 _mm256_unpackhi_ps(__m256 __a, __m256 __b)
 {
-  return __builtin_shufflevector(__a, __b, 2, 10, 2+1, 10+1, 6, 14, 6+1, 14+1);
+  return __builtin_shufflevector((__v8sf)__a, (__v8sf)__b, 2, 10, 2+1, 10+1, 6, 14, 6+1, 14+1);
 }
 
 static __inline __m256 __DEFAULT_FN_ATTRS
 _mm256_unpacklo_ps(__m256 __a, __m256 __b)
 {
-  return __builtin_shufflevector(__a, __b, 0, 8, 0+1, 8+1, 4, 12, 4+1, 12+1);
+  return __builtin_shufflevector((__v8sf)__a, (__v8sf)__b, 0, 8, 0+1, 8+1, 4, 12, 4+1, 12+1);
 }
 
 /* Bit Test */
@@ -723,13 +2337,13 @@ _mm256_broadcast_ss(float const *__a)
 static __inline __m256d __DEFAULT_FN_ATTRS
 _mm256_broadcast_pd(__m128d const *__a)
 {
-  return (__m256d)__builtin_ia32_vbroadcastf128_pd256(__a);
+  return (__m256d)__builtin_ia32_vbroadcastf128_pd256((__v2df const *)__a);
 }
 
 static __inline __m256 __DEFAULT_FN_ATTRS
 _mm256_broadcast_ps(__m128 const *__a)
 {
-  return (__m256)__builtin_ia32_vbroadcastf128_ps256(__a);
+  return (__m256)__builtin_ia32_vbroadcastf128_ps256((__v4sf const *)__a);
 }
 
 /* SIMD load ops */
@@ -800,13 +2414,19 @@ _mm256_store_ps(float *__p, __m256 __a)
 static __inline void __DEFAULT_FN_ATTRS
 _mm256_storeu_pd(double *__p, __m256d __a)
 {
-  __builtin_ia32_storeupd256(__p, (__v4df)__a);
+  struct __storeu_pd {
+    __m256d __v;
+  } __attribute__((__packed__, __may_alias__));
+  ((struct __storeu_pd*)__p)->__v = __a;
 }
 
 static __inline void __DEFAULT_FN_ATTRS
 _mm256_storeu_ps(float *__p, __m256 __a)
 {
-  __builtin_ia32_storeups256(__p, (__v8sf)__a);
+  struct __storeu_ps {
+    __m256 __v;
+  } __attribute__((__packed__, __may_alias__));
+  ((struct __storeu_ps*)__p)->__v = __a;
 }
 
 static __inline void __DEFAULT_FN_ATTRS
@@ -818,7 +2438,10 @@ _mm256_store_si256(__m256i *__p, __m256i __a)
 static __inline void __DEFAULT_FN_ATTRS
 _mm256_storeu_si256(__m256i *__p, __m256i __a)
 {
-  __builtin_ia32_storedqu256((char *)__p, (__v32qi)__a);
+  struct __storeu_si256 {
+    __m256i __v;
+  } __attribute__((__packed__, __may_alias__));
+  ((struct __storeu_si256*)__p)->__v = __a;
 }
 
 /* Conditional load ops */
@@ -876,36 +2499,36 @@ _mm_maskstore_ps(float *__p, __m128i __m, __m128 __a)
 static __inline void __DEFAULT_FN_ATTRS
 _mm256_stream_si256(__m256i *__a, __m256i __b)
 {
-  __builtin_ia32_movntdq256((__v4di *)__a, (__v4di)__b);
+  __builtin_nontemporal_store((__v4di)__b, (__v4di*)__a);
 }
 
 static __inline void __DEFAULT_FN_ATTRS
 _mm256_stream_pd(double *__a, __m256d __b)
 {
-  __builtin_ia32_movntpd256(__a, (__v4df)__b);
+  __builtin_nontemporal_store((__v4df)__b, (__v4df*)__a);
 }
 
 static __inline void __DEFAULT_FN_ATTRS
 _mm256_stream_ps(float *__p, __m256 __a)
 {
-  __builtin_ia32_movntps256(__p, (__v8sf)__a);
+  __builtin_nontemporal_store((__v8sf)__a, (__v8sf*)__p);
 }
 
 /* Create vectors */
 static __inline__ __m256d __DEFAULT_FN_ATTRS
-_mm256_undefined_pd()
+_mm256_undefined_pd(void)
 {
   return (__m256d)__builtin_ia32_undef256();
 }
 
 static __inline__ __m256 __DEFAULT_FN_ATTRS
-_mm256_undefined_ps()
+_mm256_undefined_ps(void)
 {
   return (__m256)__builtin_ia32_undef256();
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
-_mm256_undefined_si256()
+_mm256_undefined_si256(void)
 {
   return (__m256i)__builtin_ia32_undef256();
 }
@@ -1117,37 +2740,37 @@ _mm256_castsi256_pd(__m256i __a)
 static __inline __m128d __DEFAULT_FN_ATTRS
 _mm256_castpd256_pd128(__m256d __a)
 {
-  return __builtin_shufflevector(__a, __a, 0, 1);
+  return __builtin_shufflevector((__v4df)__a, (__v4df)__a, 0, 1);
 }
 
 static __inline __m128 __DEFAULT_FN_ATTRS
 _mm256_castps256_ps128(__m256 __a)
 {
-  return __builtin_shufflevector(__a, __a, 0, 1, 2, 3);
+  return __builtin_shufflevector((__v8sf)__a, (__v8sf)__a, 0, 1, 2, 3);
 }
 
 static __inline __m128i __DEFAULT_FN_ATTRS
 _mm256_castsi256_si128(__m256i __a)
 {
-  return __builtin_shufflevector(__a, __a, 0, 1);
+  return __builtin_shufflevector((__v4di)__a, (__v4di)__a, 0, 1);
 }
 
 static __inline __m256d __DEFAULT_FN_ATTRS
 _mm256_castpd128_pd256(__m128d __a)
 {
-  return __builtin_shufflevector(__a, __a, 0, 1, -1, -1);
+  return __builtin_shufflevector((__v2df)__a, (__v2df)__a, 0, 1, -1, -1);
 }
 
 static __inline __m256 __DEFAULT_FN_ATTRS
 _mm256_castps128_ps256(__m128 __a)
 {
-  return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, -1, -1, -1, -1);
+  return __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 0, 1, 2, 3, -1, -1, -1, -1);
 }
 
 static __inline __m256i __DEFAULT_FN_ATTRS
 _mm256_castsi128_si256(__m128i __a)
 {
-  return __builtin_shufflevector(__a, __a, 0, 1, -1, -1);
+  return __builtin_shufflevector((__v2di)__a, (__v2di)__a, 0, 1, -1, -1);
 }
 
 /*
@@ -1194,7 +2817,7 @@ _mm256_castsi128_si256(__m128i __a)
 #define _mm256_extractf128_ps(V, M) __extension__ ({ \
   (__m128)__builtin_shufflevector( \
     (__v8sf)(__m256)(V), \
-    (__v8sf)(_mm256_setzero_ps()), \
+    (__v8sf)(_mm256_undefined_ps()), \
     (((M) & 1) ? 4 : 0), \
     (((M) & 1) ? 5 : 1), \
     (((M) & 1) ? 6 : 2), \
@@ -1203,14 +2826,14 @@ _mm256_castsi128_si256(__m128i __a)
 #define _mm256_extractf128_pd(V, M) __extension__ ({ \
   (__m128d)__builtin_shufflevector( \
     (__v4df)(__m256d)(V), \
-    (__v4df)(_mm256_setzero_pd()), \
+    (__v4df)(_mm256_undefined_pd()), \
     (((M) & 1) ? 2 : 0), \
     (((M) & 1) ? 3 : 1) );})
 
 #define _mm256_extractf128_si256(V, M) __extension__ ({ \
   (__m128i)__builtin_shufflevector( \
     (__v4di)(__m256i)(V), \
-    (__v4di)(_mm256_setzero_si256()), \
+    (__v4di)(_mm256_undefined_si256()), \
     (((M) & 1) ? 2 : 0), \
     (((M) & 1) ? 3 : 1) );})
 
@@ -1218,35 +2841,22 @@ _mm256_castsi128_si256(__m128i __a)
 static __inline __m256 __DEFAULT_FN_ATTRS
 _mm256_loadu2_m128(float const *__addr_hi, float const *__addr_lo)
 {
-  struct __loadu_ps {
-    __m128 __v;
-  } __attribute__((__packed__, __may_alias__));
-
-  __m256 __v256 = _mm256_castps128_ps256(((struct __loadu_ps*)__addr_lo)->__v);
-  return _mm256_insertf128_ps(__v256, ((struct __loadu_ps*)__addr_hi)->__v, 1);
+  __m256 __v256 = _mm256_castps128_ps256(_mm_loadu_ps(__addr_lo));
+  return _mm256_insertf128_ps(__v256, _mm_loadu_ps(__addr_hi), 1);
 }
 
 static __inline __m256d __DEFAULT_FN_ATTRS
 _mm256_loadu2_m128d(double const *__addr_hi, double const *__addr_lo)
 {
-  struct __loadu_pd {
-    __m128d __v;
-  } __attribute__((__packed__, __may_alias__));
-
-  __m256d __v256 = _mm256_castpd128_pd256(((struct __loadu_pd*)__addr_lo)->__v);
-  return _mm256_insertf128_pd(__v256, ((struct __loadu_pd*)__addr_hi)->__v, 1);
+  __m256d __v256 = _mm256_castpd128_pd256(_mm_loadu_pd(__addr_lo));
+  return _mm256_insertf128_pd(__v256, _mm_loadu_pd(__addr_hi), 1);
 }
 
 static __inline __m256i __DEFAULT_FN_ATTRS
 _mm256_loadu2_m128i(__m128i const *__addr_hi, __m128i const *__addr_lo)
 {
-  struct __loadu_si128 {
-    __m128i __v;
-  } __attribute__((__packed__, __may_alias__));
-  __m256i __v256 = _mm256_castsi128_si256(
-    ((struct __loadu_si128*)__addr_lo)->__v);
-  return _mm256_insertf128_si256(__v256,
-                                 ((struct __loadu_si128*)__addr_hi)->__v, 1);
+  __m256i __v256 = _mm256_castsi128_si256(_mm_loadu_si128(__addr_lo));
+  return _mm256_insertf128_si256(__v256, _mm_loadu_si128(__addr_hi), 1);
 }
 
 /* SIMD store ops (unaligned) */
@@ -1256,9 +2866,9 @@ _mm256_storeu2_m128(float *__addr_hi, float *__addr_lo, __m256 __a)
   __m128 __v128;
 
   __v128 = _mm256_castps256_ps128(__a);
-  __builtin_ia32_storeups(__addr_lo, __v128);
+  _mm_storeu_ps(__addr_lo, __v128);
   __v128 = _mm256_extractf128_ps(__a, 1);
-  __builtin_ia32_storeups(__addr_hi, __v128);
+  _mm_storeu_ps(__addr_hi, __v128);
 }
 
 static __inline void __DEFAULT_FN_ATTRS
@@ -1267,9 +2877,9 @@ _mm256_storeu2_m128d(double *__addr_hi, double *__addr_lo, __m256d __a)
   __m128d __v128;
 
   __v128 = _mm256_castpd256_pd128(__a);
-  __builtin_ia32_storeupd(__addr_lo, __v128);
+  _mm_storeu_pd(__addr_lo, __v128);
   __v128 = _mm256_extractf128_pd(__a, 1);
-  __builtin_ia32_storeupd(__addr_hi, __v128);
+  _mm_storeu_pd(__addr_hi, __v128);
 }
 
 static __inline void __DEFAULT_FN_ATTRS
@@ -1278,14 +2888,14 @@ _mm256_storeu2_m128i(__m128i *__addr_hi, __m128i *__addr_lo, __m256i __a)
   __m128i __v128;
 
   __v128 = _mm256_castsi256_si128(__a);
-  __builtin_ia32_storedqu((char *)__addr_lo, (__v16qi)__v128);
+  _mm_storeu_si128(__addr_lo, __v128);
   __v128 = _mm256_extractf128_si256(__a, 1);
-  __builtin_ia32_storedqu((char *)__addr_hi, (__v16qi)__v128);
+  _mm_storeu_si128(__addr_hi, __v128);
 }
 
 static __inline __m256 __DEFAULT_FN_ATTRS
 _mm256_set_m128 (__m128 __hi, __m128 __lo) {
-  return (__m256) __builtin_shufflevector(__lo, __hi, 0, 1, 2, 3, 4, 5, 6, 7);
+  return (__m256) __builtin_shufflevector((__v4sf)__lo, (__v4sf)__hi, 0, 1, 2, 3, 4, 5, 6, 7);
 }
 
 static __inline __m256d __DEFAULT_FN_ATTRS
diff --git a/contrib/llvm/tools/clang/lib/Headers/bmiintrin.h b/contrib/llvm/tools/clang/lib/Headers/bmiintrin.h
index da98792d8307..30acfaeb9f3b 100644
--- a/contrib/llvm/tools/clang/lib/Headers/bmiintrin.h
+++ b/contrib/llvm/tools/clang/lib/Headers/bmiintrin.h
@@ -28,12 +28,107 @@
 #ifndef __BMIINTRIN_H
 #define __BMIINTRIN_H
 
+/// \brief Counts the number of trailing zero bits in the operand.
+///
+/// \headerfile <x86intrin.h>
+///
+/// \code
+/// unsigned short _tzcnt_u16(unsigned short a);
+/// \endcode
+///
+/// This intrinsic corresponds to the \c TZCNT instruction.
+///
+/// \param a
+///    An unsigned 16-bit integer whose trailing zeros are to be counted.
+/// \returns An unsigned 16-bit integer containing the number of trailing zero
+///    bits in the operand.
 #define _tzcnt_u16(a)     (__tzcnt_u16((a)))
+
+/// \brief Performs a bitwise AND of the second operand with the one's
+///    complement of the first operand.
+///
+/// \headerfile <x86intrin.h>
+///
+/// \code
+/// unsigned int _andn_u32(unsigned int a, unsigned int b);
+/// \endcode
+///
+/// This intrinsic corresponds to the \c ANDN instruction.
+///
+/// \param a
+///    An unsigned integer containing one of the operands.
+/// \param b
+///    An unsigned integer containing one of the operands.
+/// \returns An unsigned integer containing the bitwise AND of the second
+///    operand with the one's complement of the first operand.
 #define _andn_u32(a, b)   (__andn_u32((a), (b)))
+
 /* _bextr_u32 != __bextr_u32 */
+/// \brief Clears all bits in the source except for the least significant bit
+///    containing a value of 1 and returns the result.
+///
+/// \headerfile <x86intrin.h>
+///
+/// \code
+/// unsigned int _blsi_u32(unsigned int a);
+/// \endcode
+///
+/// This intrinsic corresponds to the \c BLSI instruction.
+///
+/// \param a
+///    An unsigned integer whose bits are to be cleared.
+/// \returns An unsigned integer containing the result of clearing the bits from
+///    the source operand.
 #define _blsi_u32(a)      (__blsi_u32((a)))
+
+/// \brief Creates a mask whose bits are set to 1, using bit 0 up to and
+///    including the least siginificant bit that is set to 1 in the source
+///    operand and returns the result.
+///
+/// \headerfile <x86intrin.h>
+///
+/// \code
+/// unsigned int _blsmsk_u32(unsigned int a);
+/// \endcode
+///
+/// This intrinsic corresponds to the \c BLSMSK instruction.
+///
+/// \param a
+///    An unsigned integer used to create the mask.
+/// \returns An unsigned integer containing the newly created mask.
 #define _blsmsk_u32(a)    (__blsmsk_u32((a)))
+
+/// \brief Clears the least siginificant bit that is set to 1 in the source
+///    operand and returns the result.
+///
+/// \headerfile <x86intrin.h>
+///
+/// \code
+/// unsigned int _blsr_u32(unsigned int a);
+/// \endcode
+///
+/// This intrinsic corresponds to the \c BLSR instruction.
+///
+/// \param a
+///    An unsigned integer containing the operand to be cleared.
+/// \returns An unsigned integer containing the result of clearing the source
+///    operand.
 #define _blsr_u32(a)      (__blsr_u32((a)))
+
+/// \brief Counts the number of trailing zero bits in the operand.
+///
+/// \headerfile <x86intrin.h>
+///
+/// \code
+/// unsigned int _tzcnt_u32(unsigned int a);
+/// \endcode
+///
+/// This intrinsic corresponds to the \c TZCNT instruction.
+///
+/// \param a
+///    An unsigned 32-bit integer whose trailing zeros are to be counted.
+/// \returns An unsigned 32-bit integer containing the number of trailing zero
+///    bits in the operand.
 #define _tzcnt_u32(a)     (__tzcnt_u32((a)))
 
 /* Define the default attributes for the functions in this file. */
@@ -44,12 +139,35 @@
    to use it as a potentially faster version of BSF. */
 #define __RELAXED_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
 
+/// \brief Counts the number of trailing zero bits in the operand.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c TZCNT instruction.
+///
+/// \param __X
+///    An unsigned 16-bit integer whose trailing zeros are to be counted.
+/// \returns An unsigned 16-bit integer containing the number of trailing zero
+///    bits in the operand.
 static __inline__ unsigned short __RELAXED_FN_ATTRS
 __tzcnt_u16(unsigned short __X)
 {
   return __X ? __builtin_ctzs(__X) : 16;
 }
 
+/// \brief Performs a bitwise AND of the second operand with the one's
+///    complement of the first operand.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c ANDN instruction.
+///
+/// \param __X
+///    An unsigned integer containing one of the operands.
+/// \param __Y
+///    An unsigned integer containing one of the operands.
+/// \returns An unsigned integer containing the bitwise AND of the second
+///    operand with the one's complement of the first operand.
 static __inline__ unsigned int __DEFAULT_FN_ATTRS
 __andn_u32(unsigned int __X, unsigned int __Y)
 {
@@ -57,6 +175,21 @@ __andn_u32(unsigned int __X, unsigned int __Y)
 }
 
 /* AMD-specified, double-leading-underscore version of BEXTR */
+/// \brief Extracts the specified bits from the first operand and returns them
+///    in the least significant bits of the result.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c BEXTR instruction.
+///
+/// \param __X
+///    An unsigned integer whose bits are to be extracted.
+/// \param __Y
+///    An unsigned integer used to specify which bits are extracted. Bits [7:0]
+///    specify the index of the least significant bit. Bits [15:8] specify the
+///    number of bits to be extracted.
+/// \returns An unsigned integer whose least significant bits contain the
+///    extracted bits.
 static __inline__ unsigned int __DEFAULT_FN_ATTRS
 __bextr_u32(unsigned int __X, unsigned int __Y)
 {
@@ -64,45 +197,214 @@ __bextr_u32(unsigned int __X, unsigned int __Y)
 }
 
 /* Intel-specified, single-leading-underscore version of BEXTR */
+/// \brief Extracts the specified bits from the first operand and returns them
+///    in the least significant bits of the result.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c BEXTR instruction.
+///
+/// \param __X
+///    An unsigned integer whose bits are to be extracted.
+/// \param __Y
+///    An unsigned integer used to specify the index of the least significant
+///    bit for the bits to be extracted. Bits [7:0] specify the index.
+/// \param __Z
+///    An unsigned integer used to specify the number of bits to be extracted.
+///    Bits [7:0] specify the number of bits.
+/// \returns An unsigned integer whose least significant bits contain the
+///    extracted bits.
 static __inline__ unsigned int __DEFAULT_FN_ATTRS
 _bextr_u32(unsigned int __X, unsigned int __Y, unsigned int __Z)
 {
   return __builtin_ia32_bextr_u32 (__X, ((__Y & 0xff) | ((__Z & 0xff) << 8)));
 }
 
+/// \brief Clears all bits in the source except for the least significant bit
+///    containing a value of 1 and returns the result.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c BLSI instruction.
+///
+/// \param __X
+///    An unsigned integer whose bits are to be cleared.
+/// \returns An unsigned integer containing the result of clearing the bits from
+///    the source operand.
 static __inline__ unsigned int __DEFAULT_FN_ATTRS
 __blsi_u32(unsigned int __X)
 {
   return __X & -__X;
 }
 
+/// \brief Creates a mask whose bits are set to 1, using bit 0 up to and
+///    including the least siginificant bit that is set to 1 in the source
+///    operand and returns the result.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c BLSMSK instruction.
+///
+/// \param __X
+///    An unsigned integer used to create the mask.
+/// \returns An unsigned integer containing the newly created mask.
 static __inline__ unsigned int __DEFAULT_FN_ATTRS
 __blsmsk_u32(unsigned int __X)
 {
   return __X ^ (__X - 1);
 }
 
+/// \brief Clears the least siginificant bit that is set to 1 in the source
+///    operand and returns the result.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c BLSR instruction.
+///
+/// \param __X
+///    An unsigned integer containing the operand to be cleared.
+/// \returns An unsigned integer containing the result of clearing the source
+///    operand.
 static __inline__ unsigned int __DEFAULT_FN_ATTRS
 __blsr_u32(unsigned int __X)
 {
   return __X & (__X - 1);
 }
 
+/// \brief Counts the number of trailing zero bits in the operand.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c TZCNT instruction.
+///
+/// \param __X
+///    An unsigned 32-bit integer whose trailing zeros are to be counted.
+/// \returns An unsigned 32-bit integer containing the number of trailing zero
+///    bits in the operand.
 static __inline__ unsigned int __RELAXED_FN_ATTRS
 __tzcnt_u32(unsigned int __X)
 {
   return __X ? __builtin_ctz(__X) : 32;
 }
 
+/// \brief Counts the number of trailing zero bits in the operand.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c TZCNT instruction.
+///
+/// \param __X
+///    An unsigned 32-bit integer whose trailing zeros are to be counted.
+/// \returns An 32-bit integer containing the number of trailing zero
+///    bits in the operand.
+static __inline__ int __RELAXED_FN_ATTRS
+_mm_tzcnt_32(unsigned int __X)
+{
+  return __X ? __builtin_ctz(__X) : 32;
+}
+
 #ifdef __x86_64__
 
+/// \brief Performs a bitwise AND of the second operand with the one's
+///    complement of the first operand.
+///
+/// \headerfile <x86intrin.h>
+///
+/// \code
+/// unsigned long long _andn_u64 (unsigned long long a, unsigned long long b);
+/// \endcode
+///
+/// This intrinsic corresponds to the \c ANDN instruction.
+///
+/// \param a
+///    An unsigned 64-bit integer containing one of the operands.
+/// \param b
+///    An unsigned 64-bit integer containing one of the operands.
+/// \returns An unsigned 64-bit integer containing the bitwise AND of the second
+///    operand with the one's complement of the first operand.
 #define _andn_u64(a, b)   (__andn_u64((a), (b)))
+
 /* _bextr_u64 != __bextr_u64 */
+/// \brief Clears all bits in the source except for the least significant bit
+///    containing a value of 1 and returns the result.
+///
+/// \headerfile <x86intrin.h>
+///
+/// \code
+/// unsigned long long _blsi_u64(unsigned long long a);
+/// \endcode
+///
+/// This intrinsic corresponds to the \c BLSI instruction.
+///
+/// \param a
+///    An unsigned 64-bit integer whose bits are to be cleared.
+/// \returns An unsigned 64-bit integer containing the result of clearing the
+///    bits from the source operand.
 #define _blsi_u64(a)      (__blsi_u64((a)))
+
+/// \brief Creates a mask whose bits are set to 1, using bit 0 up to and
+///    including the least siginificant bit that is set to 1 in the source
+///    operand and returns the result.
+///
+/// \headerfile <x86intrin.h>
+///
+/// \code
+/// unsigned long long _blsmsk_u64(unsigned long long a);
+/// \endcode
+///
+/// This intrinsic corresponds to the \c BLSMSK instruction.
+///
+/// \param a
+///    An unsigned 64-bit integer used to create the mask.
+/// \returns A unsigned 64-bit integer containing the newly created mask.
 #define _blsmsk_u64(a)    (__blsmsk_u64((a)))
+
+/// \brief Clears the least siginificant bit that is set to 1 in the source
+///    operand and returns the result.
+///
+/// \headerfile <x86intrin.h>
+///
+/// \code
+/// unsigned long long _blsr_u64(unsigned long long a);
+/// \endcode
+///
+/// This intrinsic corresponds to the \c BLSR instruction.
+///
+/// \param a
+///    An unsigned 64-bit integer containing the operand to be cleared.
+/// \returns An unsigned 64-bit integer containing the result of clearing the
+///    source operand.
 #define _blsr_u64(a)      (__blsr_u64((a)))
+
+/// \brief Counts the number of trailing zero bits in the operand.
+///
+/// \headerfile <x86intrin.h>
+///
+/// \code
+/// unsigned long long _tzcnt_u64(unsigned long long a);
+/// \endcode
+///
+/// This intrinsic corresponds to the \c TZCNT instruction.
+///
+/// \param a
+///    An unsigned 64-bit integer whose trailing zeros are to be counted.
+/// \returns An unsigned 64-bit integer containing the number of trailing zero
+///    bits in the operand.
 #define _tzcnt_u64(a)     (__tzcnt_u64((a)))
 
+/// \brief Performs a bitwise AND of the second operand with the one's
+///    complement of the first operand.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c ANDN instruction.
+///
+/// \param __X
+///    An unsigned 64-bit integer containing one of the operands.
+/// \param __Y
+///    An unsigned 64-bit integer containing one of the operands.
+/// \returns An unsigned 64-bit integer containing the bitwise AND of the second
+///    operand with the one's complement of the first operand.
 static __inline__ unsigned long long __DEFAULT_FN_ATTRS
 __andn_u64 (unsigned long long __X, unsigned long long __Y)
 {
@@ -110,6 +412,21 @@ __andn_u64 (unsigned long long __X, unsigned long long __Y)
 }
 
 /* AMD-specified, double-leading-underscore version of BEXTR */
+/// \brief Extracts the specified bits from the first operand and returns them
+///    in the least significant bits of the result.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c BEXTR instruction.
+///
+/// \param __X
+///    An unsigned 64-bit integer whose bits are to be extracted.
+/// \param __Y
+///    An unsigned 64-bit integer used to specify which bits are extracted. Bits
+///    [7:0] specify the index of the least significant bit. Bits [15:8] specify
+///    the number of bits to be extracted.
+/// \returns An unsigned 64-bit integer whose least significant bits contain the
+///    extracted bits.
 static __inline__ unsigned long long __DEFAULT_FN_ATTRS
 __bextr_u64(unsigned long long __X, unsigned long long __Y)
 {
@@ -117,36 +434,112 @@ __bextr_u64(unsigned long long __X, unsigned long long __Y)
 }
 
 /* Intel-specified, single-leading-underscore version of BEXTR */
+/// \brief Extracts the specified bits from the first operand and returns them
+///     in the least significant bits of the result.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c BEXTR instruction.
+///
+/// \param __X
+///    An unsigned 64-bit integer whose bits are to be extracted.
+/// \param __Y
+///    An unsigned integer used to specify the index of the least significant
+///    bit for the bits to be extracted. Bits [7:0] specify the index.
+/// \param __Z
+///    An unsigned integer used to specify the number of bits to be extracted.
+///    Bits [7:0] specify the number of bits.
+/// \returns An unsigned 64-bit integer whose least significant bits contain the
+///    extracted bits.
 static __inline__ unsigned long long __DEFAULT_FN_ATTRS
 _bextr_u64(unsigned long long __X, unsigned int __Y, unsigned int __Z)
 {
   return __builtin_ia32_bextr_u64 (__X, ((__Y & 0xff) | ((__Z & 0xff) << 8)));
 }
 
+/// \brief Clears all bits in the source except for the least significant bit
+///    containing a value of 1 and returns the result.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c BLSI instruction.
+///
+/// \param __X
+///    An unsigned 64-bit integer whose bits are to be cleared.
+/// \returns An unsigned 64-bit integer containing the result of clearing the
+///    bits from the source operand.
 static __inline__ unsigned long long __DEFAULT_FN_ATTRS
 __blsi_u64(unsigned long long __X)
 {
   return __X & -__X;
 }
 
+/// \brief Creates a mask whose bits are set to 1, using bit 0 up to and
+///    including the least siginificant bit that is set to 1 in the source
+///    operand and returns the result.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c BLSMSK instruction.
+///
+/// \param __X
+///    An unsigned 64-bit integer used to create the mask.
+/// \returns A unsigned 64-bit integer containing the newly created mask.
 static __inline__ unsigned long long __DEFAULT_FN_ATTRS
 __blsmsk_u64(unsigned long long __X)
 {
   return __X ^ (__X - 1);
 }
 
+/// \brief Clears the least siginificant bit that is set to 1 in the source
+///    operand and returns the result.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c BLSR instruction.
+///
+/// \param __X
+///    An unsigned 64-bit integer containing the operand to be cleared.
+/// \returns An unsigned 64-bit integer containing the result of clearing the
+///    source operand.
 static __inline__ unsigned long long __DEFAULT_FN_ATTRS
 __blsr_u64(unsigned long long __X)
 {
   return __X & (__X - 1);
 }
 
+/// \brief Counts the number of trailing zero bits in the operand.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c TZCNT instruction.
+///
+/// \param __X
+///    An unsigned 64-bit integer whose trailing zeros are to be counted.
+/// \returns An unsigned 64-bit integer containing the number of trailing zero
+///    bits in the operand.
 static __inline__ unsigned long long __RELAXED_FN_ATTRS
 __tzcnt_u64(unsigned long long __X)
 {
   return __X ? __builtin_ctzll(__X) : 64;
 }
 
+/// \brief Counts the number of trailing zero bits in the operand.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c TZCNT instruction.
+///
+/// \param __X
+///    An unsigned 64-bit integer whose trailing zeros are to be counted.
+/// \returns An 64-bit integer containing the number of trailing zero
+///    bits in the operand.
+static __inline__ long long __RELAXED_FN_ATTRS
+_mm_tzcnt_64(unsigned long long __X)
+{
+  return __X ? __builtin_ctzll(__X) : 64;
+}
+
 #endif /* __x86_64__ */
 
 #undef __DEFAULT_FN_ATTRS
diff --git a/contrib/llvm/tools/clang/lib/Headers/clflushoptintrin.h b/contrib/llvm/tools/clang/lib/Headers/clflushoptintrin.h
new file mode 100644
index 000000000000..60e0ead76275
--- /dev/null
+++ b/contrib/llvm/tools/clang/lib/Headers/clflushoptintrin.h
@@ -0,0 +1,41 @@
+/*===---- clflushoptintrin.h - CLFLUSHOPT intrinsic ------------------------------------===
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ *
+ *===-----------------------------------------------------------------------===
+ */
+
+#ifndef __IMMINTRIN_H
+#error "Never use <clflushoptintrin.h> directly; include <immintrin.h> instead."
+#endif
+
+#ifndef __CLFLUSHOPTINTRIN_H
+#define __CLFLUSHOPTINTRIN_H
+
+/* Define the default attributes for the functions in this file. */
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__,  __target__("clflushopt")))
+
+static __inline__ void __DEFAULT_FN_ATTRS
+_mm_clflushopt(char * __m) {
+  __builtin_ia32_clflushopt(__m);
+}
+
+#undef __DEFAULT_FN_ATTRS
+
+#endif
diff --git a/contrib/llvm/tools/clang/lib/Headers/cpuid.h b/contrib/llvm/tools/clang/lib/Headers/cpuid.h
index 5da02e0e5152..400dcfacd552 100644
--- a/contrib/llvm/tools/clang/lib/Headers/cpuid.h
+++ b/contrib/llvm/tools/clang/lib/Headers/cpuid.h
@@ -82,6 +82,7 @@
 /* Features in %ecx for level 1 */
 #define bit_SSE3        0x00000001
 #define bit_PCLMULQDQ   0x00000002
+#define bit_PCLMUL      bit_PCLMULQDQ   /* for gcc compat */
 #define bit_DTES64      0x00000004
 #define bit_MONITOR     0x00000008
 #define bit_DSCPL       0x00000010
@@ -98,15 +99,19 @@
 #define bit_PCID        0x00020000
 #define bit_DCA         0x00040000
 #define bit_SSE41       0x00080000
+#define bit_SSE4_1      bit_SSE41       /* for gcc compat */
 #define bit_SSE42       0x00100000
+#define bit_SSE4_2      bit_SSE42       /* for gcc compat */
 #define bit_x2APIC      0x00200000
 #define bit_MOVBE       0x00400000
 #define bit_POPCNT      0x00800000
 #define bit_TSCDeadline 0x01000000
 #define bit_AESNI       0x02000000
+#define bit_AES         bit_AESNI       /* for gcc compat */
 #define bit_XSAVE       0x04000000
 #define bit_OSXSAVE     0x08000000
 #define bit_AVX         0x10000000
+#define bit_F16C        0x20000000
 #define bit_RDRND       0x40000000
 
 /* Features in %edx for level 1 */
@@ -119,6 +124,7 @@
 #define bit_PAE         0x00000040
 #define bit_MCE         0x00000080
 #define bit_CX8         0x00000100
+#define bit_CMPXCHG8B   bit_CX8         /* for gcc compat */
 #define bit_APIC        0x00000200
 #define bit_SEP         0x00000800
 #define bit_MTRR        0x00001000
@@ -133,7 +139,7 @@
 #define bit_ACPI        0x00400000
 #define bit_MMX         0x00800000
 #define bit_FXSR        0x01000000
-#define bit_FXSAVE      bit_FXSR    /* for gcc compat */
+#define bit_FXSAVE      bit_FXSR        /* for gcc compat */
 #define bit_SSE         0x02000000
 #define bit_SSE2        0x04000000
 #define bit_SS          0x08000000
diff --git a/contrib/llvm/tools/clang/lib/Headers/cuda_builtin_vars.h b/contrib/llvm/tools/clang/lib/Headers/cuda_builtin_vars.h
index 901356b3d5ce..6f5eb9c78d85 100644
--- a/contrib/llvm/tools/clang/lib/Headers/cuda_builtin_vars.h
+++ b/contrib/llvm/tools/clang/lib/Headers/cuda_builtin_vars.h
@@ -24,16 +24,20 @@
 #ifndef __CUDA_BUILTIN_VARS_H
 #define __CUDA_BUILTIN_VARS_H
 
+// Forward declares from vector_types.h.
+struct uint3;
+struct dim3;
+
 // The file implements built-in CUDA variables using __declspec(property).
 // https://msdn.microsoft.com/en-us/library/yhfk0thd.aspx
 // All read accesses of built-in variable fields get converted into calls to a
-// getter function which in turn would call appropriate builtin to fetch the
+// getter function which in turn calls the appropriate builtin to fetch the
 // value.
 //
 // Example:
 //    int x = threadIdx.x;
 // IR output:
-//  %0 = call i32 @llvm.ptx.read.tid.x() #3
+//  %0 = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() #3
 // PTX output:
 //  mov.u32     %r2, %tid.x;
 
@@ -60,33 +64,45 @@
   __attribute__((device)) TypeName *operator&() const __DELETE
 
 struct __cuda_builtin_threadIdx_t {
-  __CUDA_DEVICE_BUILTIN(x,__builtin_ptx_read_tid_x());
-  __CUDA_DEVICE_BUILTIN(y,__builtin_ptx_read_tid_y());
-  __CUDA_DEVICE_BUILTIN(z,__builtin_ptx_read_tid_z());
+  __CUDA_DEVICE_BUILTIN(x,__nvvm_read_ptx_sreg_tid_x());
+  __CUDA_DEVICE_BUILTIN(y,__nvvm_read_ptx_sreg_tid_y());
+  __CUDA_DEVICE_BUILTIN(z,__nvvm_read_ptx_sreg_tid_z());
+  // threadIdx should be convertible to uint3 (in fact in nvcc, it *is* a
+  // uint3).  This function is defined after we pull in vector_types.h.
+  __attribute__((device)) operator uint3() const;
 private:
   __CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_threadIdx_t);
 };
 
 struct __cuda_builtin_blockIdx_t {
-  __CUDA_DEVICE_BUILTIN(x,__builtin_ptx_read_ctaid_x());
-  __CUDA_DEVICE_BUILTIN(y,__builtin_ptx_read_ctaid_y());
-  __CUDA_DEVICE_BUILTIN(z,__builtin_ptx_read_ctaid_z());
+  __CUDA_DEVICE_BUILTIN(x,__nvvm_read_ptx_sreg_ctaid_x());
+  __CUDA_DEVICE_BUILTIN(y,__nvvm_read_ptx_sreg_ctaid_y());
+  __CUDA_DEVICE_BUILTIN(z,__nvvm_read_ptx_sreg_ctaid_z());
+  // blockIdx should be convertible to uint3 (in fact in nvcc, it *is* a
+  // uint3).  This function is defined after we pull in vector_types.h.
+  __attribute__((device)) operator uint3() const;
 private:
   __CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_blockIdx_t);
 };
 
 struct __cuda_builtin_blockDim_t {
-  __CUDA_DEVICE_BUILTIN(x,__builtin_ptx_read_ntid_x());
-  __CUDA_DEVICE_BUILTIN(y,__builtin_ptx_read_ntid_y());
-  __CUDA_DEVICE_BUILTIN(z,__builtin_ptx_read_ntid_z());
+  __CUDA_DEVICE_BUILTIN(x,__nvvm_read_ptx_sreg_ntid_x());
+  __CUDA_DEVICE_BUILTIN(y,__nvvm_read_ptx_sreg_ntid_y());
+  __CUDA_DEVICE_BUILTIN(z,__nvvm_read_ptx_sreg_ntid_z());
+  // blockDim should be convertible to dim3 (in fact in nvcc, it *is* a
+  // dim3).  This function is defined after we pull in vector_types.h.
+  __attribute__((device)) operator dim3() const;
 private:
   __CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_blockDim_t);
 };
 
 struct __cuda_builtin_gridDim_t {
-  __CUDA_DEVICE_BUILTIN(x,__builtin_ptx_read_nctaid_x());
-  __CUDA_DEVICE_BUILTIN(y,__builtin_ptx_read_nctaid_y());
-  __CUDA_DEVICE_BUILTIN(z,__builtin_ptx_read_nctaid_z());
+  __CUDA_DEVICE_BUILTIN(x,__nvvm_read_ptx_sreg_nctaid_x());
+  __CUDA_DEVICE_BUILTIN(y,__nvvm_read_ptx_sreg_nctaid_y());
+  __CUDA_DEVICE_BUILTIN(z,__nvvm_read_ptx_sreg_nctaid_z());
+  // gridDim should be convertible to dim3 (in fact in nvcc, it *is* a
+  // dim3).  This function is defined after we pull in vector_types.h.
+  __attribute__((device)) operator dim3() const;
 private:
   __CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_gridDim_t);
 };
diff --git a/contrib/llvm/tools/clang/lib/Headers/emmintrin.h b/contrib/llvm/tools/clang/lib/Headers/emmintrin.h
index cfc2c7161460..70d6d726110a 100644
--- a/contrib/llvm/tools/clang/lib/Headers/emmintrin.h
+++ b/contrib/llvm/tools/clang/lib/Headers/emmintrin.h
@@ -35,6 +35,11 @@ typedef long long __v2di __attribute__ ((__vector_size__ (16)));
 typedef short __v8hi __attribute__((__vector_size__(16)));
 typedef char __v16qi __attribute__((__vector_size__(16)));
 
+/* Unsigned types */
+typedef unsigned long long __v2du __attribute__ ((__vector_size__ (16)));
+typedef unsigned short __v8hu __attribute__((__vector_size__(16)));
+typedef unsigned char __v16qu __attribute__((__vector_size__(16)));
+
 /* We need an explicitly signed variant for char. Note that this shouldn't
  * appear in the interface though. */
 typedef signed char __v16qs __attribute__((__vector_size__(16)));
@@ -54,7 +59,7 @@ _mm_add_sd(__m128d __a, __m128d __b)
 static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_add_pd(__m128d __a, __m128d __b)
 {
-  return __a + __b;
+  return (__m128d)((__v2df)__a + (__v2df)__b);
 }
 
 static __inline__ __m128d __DEFAULT_FN_ATTRS
@@ -67,7 +72,7 @@ _mm_sub_sd(__m128d __a, __m128d __b)
 static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_sub_pd(__m128d __a, __m128d __b)
 {
-  return __a - __b;
+  return (__m128d)((__v2df)__a - (__v2df)__b);
 }
 
 static __inline__ __m128d __DEFAULT_FN_ATTRS
@@ -80,7 +85,7 @@ _mm_mul_sd(__m128d __a, __m128d __b)
 static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_mul_pd(__m128d __a, __m128d __b)
 {
-  return __a * __b;
+  return (__m128d)((__v2df)__a * (__v2df)__b);
 }
 
 static __inline__ __m128d __DEFAULT_FN_ATTRS
@@ -93,325 +98,326 @@ _mm_div_sd(__m128d __a, __m128d __b)
 static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_div_pd(__m128d __a, __m128d __b)
 {
-  return __a / __b;
+  return (__m128d)((__v2df)__a / (__v2df)__b);
 }
 
 static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_sqrt_sd(__m128d __a, __m128d __b)
 {
-  __m128d __c = __builtin_ia32_sqrtsd(__b);
+  __m128d __c = __builtin_ia32_sqrtsd((__v2df)__b);
   return (__m128d) { __c[0], __a[1] };
 }
 
 static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_sqrt_pd(__m128d __a)
 {
-  return __builtin_ia32_sqrtpd(__a);
+  return __builtin_ia32_sqrtpd((__v2df)__a);
 }
 
 static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_min_sd(__m128d __a, __m128d __b)
 {
-  return __builtin_ia32_minsd(__a, __b);
+  return __builtin_ia32_minsd((__v2df)__a, (__v2df)__b);
 }
 
 static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_min_pd(__m128d __a, __m128d __b)
 {
-  return __builtin_ia32_minpd(__a, __b);
+  return __builtin_ia32_minpd((__v2df)__a, (__v2df)__b);
 }
 
 static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_max_sd(__m128d __a, __m128d __b)
 {
-  return __builtin_ia32_maxsd(__a, __b);
+  return __builtin_ia32_maxsd((__v2df)__a, (__v2df)__b);
 }
 
 static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_max_pd(__m128d __a, __m128d __b)
 {
-  return __builtin_ia32_maxpd(__a, __b);
+  return __builtin_ia32_maxpd((__v2df)__a, (__v2df)__b);
 }
 
 static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_and_pd(__m128d __a, __m128d __b)
 {
-  return (__m128d)((__v4si)__a & (__v4si)__b);
+  return (__m128d)((__v4su)__a & (__v4su)__b);
 }
 
 static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_andnot_pd(__m128d __a, __m128d __b)
 {
-  return (__m128d)(~(__v4si)__a & (__v4si)__b);
+  return (__m128d)(~(__v4su)__a & (__v4su)__b);
 }
 
 static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_or_pd(__m128d __a, __m128d __b)
 {
-  return (__m128d)((__v4si)__a | (__v4si)__b);
+  return (__m128d)((__v4su)__a | (__v4su)__b);
 }
 
 static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_xor_pd(__m128d __a, __m128d __b)
 {
-  return (__m128d)((__v4si)__a ^ (__v4si)__b);
+  return (__m128d)((__v4su)__a ^ (__v4su)__b);
 }
 
 static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_cmpeq_pd(__m128d __a, __m128d __b)
 {
-  return (__m128d)__builtin_ia32_cmpeqpd(__a, __b);
+  return (__m128d)__builtin_ia32_cmpeqpd((__v2df)__a, (__v2df)__b);
 }
 
 static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_cmplt_pd(__m128d __a, __m128d __b)
 {
-  return (__m128d)__builtin_ia32_cmpltpd(__a, __b);
+  return (__m128d)__builtin_ia32_cmpltpd((__v2df)__a, (__v2df)__b);
 }
 
 static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_cmple_pd(__m128d __a, __m128d __b)
 {
-  return (__m128d)__builtin_ia32_cmplepd(__a, __b);
+  return (__m128d)__builtin_ia32_cmplepd((__v2df)__a, (__v2df)__b);
 }
 
 static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_cmpgt_pd(__m128d __a, __m128d __b)
 {
-  return (__m128d)__builtin_ia32_cmpltpd(__b, __a);
+  return (__m128d)__builtin_ia32_cmpltpd((__v2df)__b, (__v2df)__a);
 }
 
 static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_cmpge_pd(__m128d __a, __m128d __b)
 {
-  return (__m128d)__builtin_ia32_cmplepd(__b, __a);
+  return (__m128d)__builtin_ia32_cmplepd((__v2df)__b, (__v2df)__a);
 }
 
 static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_cmpord_pd(__m128d __a, __m128d __b)
 {
-  return (__m128d)__builtin_ia32_cmpordpd(__a, __b);
+  return (__m128d)__builtin_ia32_cmpordpd((__v2df)__a, (__v2df)__b);
 }
 
 static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_cmpunord_pd(__m128d __a, __m128d __b)
 {
-  return (__m128d)__builtin_ia32_cmpunordpd(__a, __b);
+  return (__m128d)__builtin_ia32_cmpunordpd((__v2df)__a, (__v2df)__b);
 }
 
 static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_cmpneq_pd(__m128d __a, __m128d __b)
 {
-  return (__m128d)__builtin_ia32_cmpneqpd(__a, __b);
+  return (__m128d)__builtin_ia32_cmpneqpd((__v2df)__a, (__v2df)__b);
 }
 
 static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_cmpnlt_pd(__m128d __a, __m128d __b)
 {
-  return (__m128d)__builtin_ia32_cmpnltpd(__a, __b);
+  return (__m128d)__builtin_ia32_cmpnltpd((__v2df)__a, (__v2df)__b);
 }
 
 static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_cmpnle_pd(__m128d __a, __m128d __b)
 {
-  return (__m128d)__builtin_ia32_cmpnlepd(__a, __b);
+  return (__m128d)__builtin_ia32_cmpnlepd((__v2df)__a, (__v2df)__b);
 }
 
 static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_cmpngt_pd(__m128d __a, __m128d __b)
 {
-  return (__m128d)__builtin_ia32_cmpnltpd(__b, __a);
+  return (__m128d)__builtin_ia32_cmpnltpd((__v2df)__b, (__v2df)__a);
 }
 
 static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_cmpnge_pd(__m128d __a, __m128d __b)
 {
-  return (__m128d)__builtin_ia32_cmpnlepd(__b, __a);
+  return (__m128d)__builtin_ia32_cmpnlepd((__v2df)__b, (__v2df)__a);
 }
 
 static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_cmpeq_sd(__m128d __a, __m128d __b)
 {
-  return (__m128d)__builtin_ia32_cmpeqsd(__a, __b);
+  return (__m128d)__builtin_ia32_cmpeqsd((__v2df)__a, (__v2df)__b);
 }
 
 static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_cmplt_sd(__m128d __a, __m128d __b)
 {
-  return (__m128d)__builtin_ia32_cmpltsd(__a, __b);
+  return (__m128d)__builtin_ia32_cmpltsd((__v2df)__a, (__v2df)__b);
 }
 
 static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_cmple_sd(__m128d __a, __m128d __b)
 {
-  return (__m128d)__builtin_ia32_cmplesd(__a, __b);
+  return (__m128d)__builtin_ia32_cmplesd((__v2df)__a, (__v2df)__b);
 }
 
 static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_cmpgt_sd(__m128d __a, __m128d __b)
 {
-  __m128d __c = __builtin_ia32_cmpltsd(__b, __a);
+  __m128d __c = __builtin_ia32_cmpltsd((__v2df)__b, (__v2df)__a);
   return (__m128d) { __c[0], __a[1] };
 }
 
 static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_cmpge_sd(__m128d __a, __m128d __b)
 {
-  __m128d __c = __builtin_ia32_cmplesd(__b, __a);
+  __m128d __c = __builtin_ia32_cmplesd((__v2df)__b, (__v2df)__a);
   return (__m128d) { __c[0], __a[1] };
 }
 
 static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_cmpord_sd(__m128d __a, __m128d __b)
 {
-  return (__m128d)__builtin_ia32_cmpordsd(__a, __b);
+  return (__m128d)__builtin_ia32_cmpordsd((__v2df)__a, (__v2df)__b);
 }
 
 static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_cmpunord_sd(__m128d __a, __m128d __b)
 {
-  return (__m128d)__builtin_ia32_cmpunordsd(__a, __b);
+  return (__m128d)__builtin_ia32_cmpunordsd((__v2df)__a, (__v2df)__b);
 }
 
 static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_cmpneq_sd(__m128d __a, __m128d __b)
 {
-  return (__m128d)__builtin_ia32_cmpneqsd(__a, __b);
+  return (__m128d)__builtin_ia32_cmpneqsd((__v2df)__a, (__v2df)__b);
 }
 
 static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_cmpnlt_sd(__m128d __a, __m128d __b)
 {
-  return (__m128d)__builtin_ia32_cmpnltsd(__a, __b);
+  return (__m128d)__builtin_ia32_cmpnltsd((__v2df)__a, (__v2df)__b);
 }
 
 static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_cmpnle_sd(__m128d __a, __m128d __b)
 {
-  return (__m128d)__builtin_ia32_cmpnlesd(__a, __b);
+  return (__m128d)__builtin_ia32_cmpnlesd((__v2df)__a, (__v2df)__b);
 }
 
 static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_cmpngt_sd(__m128d __a, __m128d __b)
 {
-  __m128d __c = __builtin_ia32_cmpnltsd(__b, __a);
+  __m128d __c = __builtin_ia32_cmpnltsd((__v2df)__b, (__v2df)__a);
   return (__m128d) { __c[0], __a[1] };
 }
 
 static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_cmpnge_sd(__m128d __a, __m128d __b)
 {
-  __m128d __c = __builtin_ia32_cmpnlesd(__b, __a);
+  __m128d __c = __builtin_ia32_cmpnlesd((__v2df)__b, (__v2df)__a);
   return (__m128d) { __c[0], __a[1] };
 }
 
 static __inline__ int __DEFAULT_FN_ATTRS
 _mm_comieq_sd(__m128d __a, __m128d __b)
 {
-  return __builtin_ia32_comisdeq(__a, __b);
+  return __builtin_ia32_comisdeq((__v2df)__a, (__v2df)__b);
 }
 
 static __inline__ int __DEFAULT_FN_ATTRS
 _mm_comilt_sd(__m128d __a, __m128d __b)
 {
-  return __builtin_ia32_comisdlt(__a, __b);
+  return __builtin_ia32_comisdlt((__v2df)__a, (__v2df)__b);
 }
 
 static __inline__ int __DEFAULT_FN_ATTRS
 _mm_comile_sd(__m128d __a, __m128d __b)
 {
-  return __builtin_ia32_comisdle(__a, __b);
+  return __builtin_ia32_comisdle((__v2df)__a, (__v2df)__b);
 }
 
 static __inline__ int __DEFAULT_FN_ATTRS
 _mm_comigt_sd(__m128d __a, __m128d __b)
 {
-  return __builtin_ia32_comisdgt(__a, __b);
+  return __builtin_ia32_comisdgt((__v2df)__a, (__v2df)__b);
 }
 
 static __inline__ int __DEFAULT_FN_ATTRS
 _mm_comige_sd(__m128d __a, __m128d __b)
 {
-  return __builtin_ia32_comisdge(__a, __b);
+  return __builtin_ia32_comisdge((__v2df)__a, (__v2df)__b);
 }
 
 static __inline__ int __DEFAULT_FN_ATTRS
 _mm_comineq_sd(__m128d __a, __m128d __b)
 {
-  return __builtin_ia32_comisdneq(__a, __b);
+  return __builtin_ia32_comisdneq((__v2df)__a, (__v2df)__b);
 }
 
 static __inline__ int __DEFAULT_FN_ATTRS
 _mm_ucomieq_sd(__m128d __a, __m128d __b)
 {
-  return __builtin_ia32_ucomisdeq(__a, __b);
+  return __builtin_ia32_ucomisdeq((__v2df)__a, (__v2df)__b);
 }
 
 static __inline__ int __DEFAULT_FN_ATTRS
 _mm_ucomilt_sd(__m128d __a, __m128d __b)
 {
-  return __builtin_ia32_ucomisdlt(__a, __b);
+  return __builtin_ia32_ucomisdlt((__v2df)__a, (__v2df)__b);
 }
 
 static __inline__ int __DEFAULT_FN_ATTRS
 _mm_ucomile_sd(__m128d __a, __m128d __b)
 {
-  return __builtin_ia32_ucomisdle(__a, __b);
+  return __builtin_ia32_ucomisdle((__v2df)__a, (__v2df)__b);
 }
 
 static __inline__ int __DEFAULT_FN_ATTRS
 _mm_ucomigt_sd(__m128d __a, __m128d __b)
 {
-  return __builtin_ia32_ucomisdgt(__a, __b);
+  return __builtin_ia32_ucomisdgt((__v2df)__a, (__v2df)__b);
 }
 
 static __inline__ int __DEFAULT_FN_ATTRS
 _mm_ucomige_sd(__m128d __a, __m128d __b)
 {
-  return __builtin_ia32_ucomisdge(__a, __b);
+  return __builtin_ia32_ucomisdge((__v2df)__a, (__v2df)__b);
 }
 
 static __inline__ int __DEFAULT_FN_ATTRS
 _mm_ucomineq_sd(__m128d __a, __m128d __b)
 {
-  return __builtin_ia32_ucomisdneq(__a, __b);
+  return __builtin_ia32_ucomisdneq((__v2df)__a, (__v2df)__b);
 }
 
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_cvtpd_ps(__m128d __a)
 {
-  return __builtin_ia32_cvtpd2ps(__a);
+  return __builtin_ia32_cvtpd2ps((__v2df)__a);
 }
 
 static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_cvtps_pd(__m128 __a)
 {
-  return __builtin_ia32_cvtps2pd(__a);
+  return (__m128d) __builtin_convertvector(
+      __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 0, 1), __v2df);
 }
 
 static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_cvtepi32_pd(__m128i __a)
 {
-  return __builtin_ia32_cvtdq2pd((__v4si)__a);
+  return (__m128d) __builtin_convertvector(
+      __builtin_shufflevector((__v4si)__a, (__v4si)__a, 0, 1), __v2df);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_cvtpd_epi32(__m128d __a)
 {
-  return __builtin_ia32_cvtpd2dq(__a);
+  return __builtin_ia32_cvtpd2dq((__v2df)__a);
 }
 
 static __inline__ int __DEFAULT_FN_ATTRS
 _mm_cvtsd_si32(__m128d __a)
 {
-  return __builtin_ia32_cvtsd2si(__a);
+  return __builtin_ia32_cvtsd2si((__v2df)__a);
 }
 
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_cvtsd_ss(__m128 __a, __m128d __b)
 {
-  __a[0] = __b[0];
-  return __a;
+  return (__m128)__builtin_ia32_cvtsd2ss((__v4sf)__a, (__v2df)__b);
 }
 
 static __inline__ __m128d __DEFAULT_FN_ATTRS
@@ -431,25 +437,25 @@ _mm_cvtss_sd(__m128d __a, __m128 __b)
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_cvttpd_epi32(__m128d __a)
 {
-  return (__m128i)__builtin_ia32_cvttpd2dq(__a);
+  return (__m128i)__builtin_ia32_cvttpd2dq((__v2df)__a);
 }
 
 static __inline__ int __DEFAULT_FN_ATTRS
 _mm_cvttsd_si32(__m128d __a)
 {
-  return __a[0];
+  return __builtin_ia32_cvttsd2si((__v2df)__a);
 }
 
 static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_cvtpd_pi32(__m128d __a)
 {
-  return (__m64)__builtin_ia32_cvtpd2pi(__a);
+  return (__m64)__builtin_ia32_cvtpd2pi((__v2df)__a);
 }
 
 static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_cvttpd_pi32(__m128d __a)
 {
-  return (__m64)__builtin_ia32_cvttpd2pi(__a);
+  return (__m64)__builtin_ia32_cvttpd2pi((__v2df)__a);
 }
 
 static __inline__ __m128d __DEFAULT_FN_ATTRS
@@ -486,7 +492,7 @@ static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_loadr_pd(double const *__dp)
 {
   __m128d __u = *(__m128d*)__dp;
-  return __builtin_shufflevector(__u, __u, 1, 0);
+  return __builtin_shufflevector((__v2df)__u, (__v2df)__u, 1, 0);
 }
 
 static __inline__ __m128d __DEFAULT_FN_ATTRS
@@ -498,6 +504,16 @@ _mm_loadu_pd(double const *__dp)
   return ((struct __loadu_pd*)__dp)->__v;
 }
 
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_loadu_si64(void const *__a)
+{
+  struct __loadu_si64 {
+    long long __v;
+  } __attribute__((__packed__, __may_alias__));
+  long long __u = ((struct __loadu_si64*)__a)->__v;
+  return (__m128i){__u, 0L};
+}
+
 static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_load_sd(double const *__dp)
 {
@@ -529,7 +545,7 @@ _mm_loadl_pd(__m128d __a, double const *__dp)
 }
 
 static __inline__ __m128d __DEFAULT_FN_ATTRS
-_mm_undefined_pd()
+_mm_undefined_pd(void)
 {
   return (__m128d)__builtin_ia32_undef128();
 }
@@ -580,31 +596,37 @@ _mm_store_sd(double *__dp, __m128d __a)
 }
 
 static __inline__ void __DEFAULT_FN_ATTRS
+_mm_store_pd(double *__dp, __m128d __a)
+{
+  *(__m128d*)__dp = __a;
+}
+
+static __inline__ void __DEFAULT_FN_ATTRS
 _mm_store1_pd(double *__dp, __m128d __a)
 {
-  struct __mm_store1_pd_struct {
-    double __u[2];
-  } __attribute__((__packed__, __may_alias__));
-  ((struct __mm_store1_pd_struct*)__dp)->__u[0] = __a[0];
-  ((struct __mm_store1_pd_struct*)__dp)->__u[1] = __a[0];
+  __a = __builtin_shufflevector((__v2df)__a, (__v2df)__a, 0, 0);
+  _mm_store_pd(__dp, __a);
 }
 
 static __inline__ void __DEFAULT_FN_ATTRS
-_mm_store_pd(double *__dp, __m128d __a)
+_mm_store_pd1(double *__dp, __m128d __a)
 {
-  *(__m128d *)__dp = __a;
+  return _mm_store1_pd(__dp, __a);
 }
 
 static __inline__ void __DEFAULT_FN_ATTRS
 _mm_storeu_pd(double *__dp, __m128d __a)
 {
-  __builtin_ia32_storeupd(__dp, __a);
+  struct __storeu_pd {
+    __m128d __v;
+  } __attribute__((__packed__, __may_alias__));
+  ((struct __storeu_pd*)__dp)->__v = __a;
 }
 
 static __inline__ void __DEFAULT_FN_ATTRS
 _mm_storer_pd(double *__dp, __m128d __a)
 {
-  __a = __builtin_shufflevector(__a, __a, 1, 0);
+  __a = __builtin_shufflevector((__v2df)__a, (__v2df)__a, 1, 0);
   *(__m128d *)__dp = __a;
 }
 
@@ -629,31 +651,31 @@ _mm_storel_pd(double *__dp, __m128d __a)
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_add_epi8(__m128i __a, __m128i __b)
 {
-  return (__m128i)((__v16qi)__a + (__v16qi)__b);
+  return (__m128i)((__v16qu)__a + (__v16qu)__b);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_add_epi16(__m128i __a, __m128i __b)
 {
-  return (__m128i)((__v8hi)__a + (__v8hi)__b);
+  return (__m128i)((__v8hu)__a + (__v8hu)__b);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_add_epi32(__m128i __a, __m128i __b)
 {
-  return (__m128i)((__v4si)__a + (__v4si)__b);
+  return (__m128i)((__v4su)__a + (__v4su)__b);
 }
 
 static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_add_si64(__m64 __a, __m64 __b)
 {
-  return (__m64)__builtin_ia32_paddq(__a, __b);
+  return (__m64)__builtin_ia32_paddq((__v1di)__a, (__v1di)__b);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_add_epi64(__m128i __a, __m128i __b)
 {
-  return __a + __b;
+  return (__m128i)((__v2du)__a + (__v2du)__b);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS
@@ -734,268 +756,792 @@ _mm_mulhi_epu16(__m128i __a, __m128i __b)
   return (__m128i)__builtin_ia32_pmulhuw128((__v8hi)__a, (__v8hi)__b);
 }
 
+/// \brief Multiplies the corresponding elements of two [8 x short] vectors and
+///    returns a vector containing the low-order 16 bits of each 32-bit product
+///    in the corresponding element.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VPMULLW / PMULLW instruction.
+///
+/// \param __a
+///    A 128-bit integer vector containing one of the source operands.
+/// \param __b
+///    A 128-bit integer vector containing one of the source operands.
+/// \returns A 128-bit integer vector containing the products of both operands.
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_mullo_epi16(__m128i __a, __m128i __b)
 {
-  return (__m128i)((__v8hi)__a * (__v8hi)__b);
-}
-
+  return (__m128i)((__v8hu)__a * (__v8hu)__b);
+}
+
+/// \brief Multiplies 32-bit unsigned integer values contained in the lower bits
+///    of the two 64-bit integer vectors and returns the 64-bit unsigned
+///    product.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c PMULUDQ instruction.
+///
+/// \param __a
+///    A 64-bit integer containing one of the source operands.
+/// \param __b
+///    A 64-bit integer containing one of the source operands.
+/// \returns A 64-bit integer vector containing the product of both operands.
 static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_mul_su32(__m64 __a, __m64 __b)
 {
   return __builtin_ia32_pmuludq((__v2si)__a, (__v2si)__b);
 }
 
+/// \brief Multiplies 32-bit unsigned integer values contained in the lower
+///    bits of the corresponding elements of two [2 x i64] vectors, and returns
+///    the 64-bit products in the corresponding elements of a [2 x i64] vector.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VPMULUDQ / PMULUDQ instruction.
+///
+/// \param __a
+///    A [2 x i64] vector containing one of the source operands.
+/// \param __b
+///    A [2 x i64] vector containing one of the source operands.
+/// \returns A [2 x i64] vector containing the product of both operands.
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_mul_epu32(__m128i __a, __m128i __b)
 {
   return __builtin_ia32_pmuludq128((__v4si)__a, (__v4si)__b);
 }
 
+/// \brief Computes the absolute differences of corresponding 8-bit integer
+///    values in two 128-bit vectors. Sums the first 8 absolute differences, and
+///    separately sums the second 8 absolute differences. Packss these two
+///    unsigned 16-bit integer sums into the upper and lower elements of a
+///    [2 x i64] vector.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VPSADBW / PSADBW instruction.
+///
+/// \param __a
+///    A 128-bit integer vector containing one of the source operands.
+/// \param __b
+///    A 128-bit integer vector containing one of the source operands.
+/// \returns A [2 x i64] vector containing the sums of the sets of absolute
+///    differences between both operands.
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_sad_epu8(__m128i __a, __m128i __b)
 {
   return __builtin_ia32_psadbw128((__v16qi)__a, (__v16qi)__b);
 }
 
+/// \brief Subtracts the corresponding 8-bit integer values in the operands.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VPSUBB / PSUBB instruction.
+///
+/// \param __a
+///    A 128-bit integer vector containing the minuends.
+/// \param __b
+///    A 128-bit integer vector containing the subtrahends.
+/// \returns A 128-bit integer vector containing the differences of the values
+///    in the operands.
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_sub_epi8(__m128i __a, __m128i __b)
 {
-  return (__m128i)((__v16qi)__a - (__v16qi)__b);
+  return (__m128i)((__v16qu)__a - (__v16qu)__b);
 }
 
+/// \brief Subtracts the corresponding 16-bit integer values in the operands.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VPSUBW / PSUBW instruction.
+///
+/// \param __a
+///    A 128-bit integer vector containing the minuends.
+/// \param __b
+///    A 128-bit integer vector containing the subtrahends.
+/// \returns A 128-bit integer vector containing the differences of the values
+///    in the operands.
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_sub_epi16(__m128i __a, __m128i __b)
 {
-  return (__m128i)((__v8hi)__a - (__v8hi)__b);
+  return (__m128i)((__v8hu)__a - (__v8hu)__b);
 }
 
+/// \brief Subtracts the corresponding 32-bit integer values in the operands.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VPSUBD / PSUBD instruction.
+///
+/// \param __a
+///    A 128-bit integer vector containing the minuends.
+/// \param __b
+///    A 128-bit integer vector containing the subtrahends.
+/// \returns A 128-bit integer vector containing the differences of the values
+///    in the operands.
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_sub_epi32(__m128i __a, __m128i __b)
 {
-  return (__m128i)((__v4si)__a - (__v4si)__b);
-}
-
+  return (__m128i)((__v4su)__a - (__v4su)__b);
+}
+
+/// \brief Subtracts signed or unsigned 64-bit integer values and writes the
+///    difference to the corresponding bits in the destination.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c PSUBQ instruction.
+///
+/// \param __a
+///    A 64-bit integer vector containing the minuend.
+/// \param __b
+///    A 64-bit integer vector containing the subtrahend.
+/// \returns A 64-bit integer vector containing the difference of the values in
+///    the operands.
 static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_sub_si64(__m64 __a, __m64 __b)
 {
-  return (__m64)__builtin_ia32_psubq(__a, __b);
+  return (__m64)__builtin_ia32_psubq((__v1di)__a, (__v1di)__b);
 }
 
+/// \brief Subtracts the corresponding elements of two [2 x i64] vectors.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VPSUBQ / PSUBQ instruction.
+///
+/// \param __a
+///    A 128-bit integer vector containing the minuends.
+/// \param __b
+///    A 128-bit integer vector containing the subtrahends.
+/// \returns A 128-bit integer vector containing the differences of the values
+///    in the operands.
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_sub_epi64(__m128i __a, __m128i __b)
 {
-  return __a - __b;
-}
-
+  return (__m128i)((__v2du)__a - (__v2du)__b);
+}
+
+/// \brief Subtracts corresponding 8-bit signed integer values in the input and
+///    returns the differences in the corresponding bytes in the destination.
+///    Differences greater than 7Fh are saturated to 7Fh, and differences less
+///    than 80h are saturated to 80h.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VPSUBSB / PSUBSB instruction.
+///
+/// \param __a
+///    A 128-bit integer vector containing the minuends.
+/// \param __b
+///    A 128-bit integer vector containing the subtrahends.
+/// \returns A 128-bit integer vector containing the differences of the values
+///    in the operands.
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_subs_epi8(__m128i __a, __m128i __b)
 {
   return (__m128i)__builtin_ia32_psubsb128((__v16qi)__a, (__v16qi)__b);
 }
 
+/// \brief Subtracts corresponding 16-bit signed integer values in the input and
+///    returns the differences in the corresponding bytes in the destination.
+///    Differences greater than 7FFFh are saturated to 7FFFh, and values less
+///    than 8000h are saturated to 8000h.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VPSUBSW / PSUBSW instruction.
+///
+/// \param __a
+///    A 128-bit integer vector containing the minuends.
+/// \param __b
+///    A 128-bit integer vector containing the subtrahends.
+/// \returns A 128-bit integer vector containing the differences of the values
+///    in the operands.
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_subs_epi16(__m128i __a, __m128i __b)
 {
   return (__m128i)__builtin_ia32_psubsw128((__v8hi)__a, (__v8hi)__b);
 }
 
+/// \brief Subtracts corresponding 8-bit unsigned integer values in the input
+///    and returns the differences in the corresponding bytes in the
+///    destination. Differences less than 00h are saturated to 00h.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VPSUBUSB / PSUBUSB instruction.
+///
+/// \param __a
+///    A 128-bit integer vector containing the minuends.
+/// \param __b
+///    A 128-bit integer vector containing the subtrahends.
+/// \returns A 128-bit integer vector containing the unsigned integer
+///    differences of the values in the operands.
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_subs_epu8(__m128i __a, __m128i __b)
 {
   return (__m128i)__builtin_ia32_psubusb128((__v16qi)__a, (__v16qi)__b);
 }
 
+/// \brief Subtracts corresponding 16-bit unsigned integer values in the input
+///    and returns the differences in the corresponding bytes in the
+///    destination. Differences less than 0000h are saturated to 0000h.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VPSUBUSW / PSUBUSW instruction.
+///
+/// \param __a
+///    A 128-bit integer vector containing the minuends.
+/// \param __b
+///    A 128-bit integer vector containing the subtrahends.
+/// \returns A 128-bit integer vector containing the unsigned integer
+///    differences of the values in the operands.
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_subs_epu16(__m128i __a, __m128i __b)
 {
   return (__m128i)__builtin_ia32_psubusw128((__v8hi)__a, (__v8hi)__b);
 }
 
+/// \brief Performs a bitwise AND of two 128-bit integer vectors.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VPAND / PAND instruction.
+///
+/// \param __a
+///    A 128-bit integer vector containing one of the source operands.
+/// \param __b
+///    A 128-bit integer vector containing one of the source operands.
+/// \returns A 128-bit integer vector containing the bitwise AND of the values
+///    in both operands.
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_and_si128(__m128i __a, __m128i __b)
 {
-  return __a & __b;
+  return (__m128i)((__v2du)__a & (__v2du)__b);
 }
 
+/// \brief Performs a bitwise AND of two 128-bit integer vectors, using the
+///    one's complement of the values contained in the first source operand.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VPANDN / PANDN instruction.
+///
+/// \param __a
+///    A 128-bit vector containing the left source operand. The one's complement
+///    of this value is used in the bitwise AND.
+/// \param __b
+///    A 128-bit vector containing the right source operand.
+/// \returns A 128-bit integer vector containing the bitwise AND of the one's
+///    complement of the first operand and the values in the second operand.
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_andnot_si128(__m128i __a, __m128i __b)
 {
-  return ~__a & __b;
-}
-
+  return (__m128i)(~(__v2du)__a & (__v2du)__b);
+}
+/// \brief Performs a bitwise OR of two 128-bit integer vectors.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VPOR / POR instruction.
+///
+/// \param __a
+///    A 128-bit integer vector containing one of the source operands.
+/// \param __b
+///    A 128-bit integer vector containing one of the source operands.
+/// \returns A 128-bit integer vector containing the bitwise OR of the values
+///    in both operands.
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_or_si128(__m128i __a, __m128i __b)
 {
-  return __a | __b;
+  return (__m128i)((__v2du)__a | (__v2du)__b);
 }
 
+/// \brief Performs a bitwise exclusive OR of two 128-bit integer vectors.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VPXOR / PXOR instruction.
+///
+/// \param __a
+///    A 128-bit integer vector containing one of the source operands.
+/// \param __b
+///    A 128-bit integer vector containing one of the source operands.
+/// \returns A 128-bit integer vector containing the bitwise exclusive OR of the
+///    values in both operands.
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_xor_si128(__m128i __a, __m128i __b)
 {
-  return __a ^ __b;
-}
-
-#define _mm_slli_si128(a, imm) __extension__ ({                         \
-  (__m128i)__builtin_shufflevector((__v16qi)_mm_setzero_si128(),        \
-                                   (__v16qi)(__m128i)(a),               \
-                                   ((imm)&0xF0) ? 0 : 16 - ((imm)&0xF), \
-                                   ((imm)&0xF0) ? 0 : 17 - ((imm)&0xF), \
-                                   ((imm)&0xF0) ? 0 : 18 - ((imm)&0xF), \
-                                   ((imm)&0xF0) ? 0 : 19 - ((imm)&0xF), \
-                                   ((imm)&0xF0) ? 0 : 20 - ((imm)&0xF), \
-                                   ((imm)&0xF0) ? 0 : 21 - ((imm)&0xF), \
-                                   ((imm)&0xF0) ? 0 : 22 - ((imm)&0xF), \
-                                   ((imm)&0xF0) ? 0 : 23 - ((imm)&0xF), \
-                                   ((imm)&0xF0) ? 0 : 24 - ((imm)&0xF), \
-                                   ((imm)&0xF0) ? 0 : 25 - ((imm)&0xF), \
-                                   ((imm)&0xF0) ? 0 : 26 - ((imm)&0xF), \
-                                   ((imm)&0xF0) ? 0 : 27 - ((imm)&0xF), \
-                                   ((imm)&0xF0) ? 0 : 28 - ((imm)&0xF), \
-                                   ((imm)&0xF0) ? 0 : 29 - ((imm)&0xF), \
-                                   ((imm)&0xF0) ? 0 : 30 - ((imm)&0xF), \
-                                   ((imm)&0xF0) ? 0 : 31 - ((imm)&0xF)); })
+  return (__m128i)((__v2du)__a ^ (__v2du)__b);
+}
+
+/// \brief Left-shifts the 128-bit integer vector operand by the specified
+///    number of bytes. Low-order bits are cleared.
+///
+/// \headerfile <x86intrin.h>
+///
+/// \code
+/// __m128i _mm_slli_si128(__m128i a, const int imm);
+/// \endcode
+///
+/// This intrinsic corresponds to the \c VPSLLDQ / PSLLDQ instruction.
+///
+/// \param a
+///    A 128-bit integer vector containing the source operand.
+/// \param imm
+///    An immediate value specifying the number of bytes to left-shift
+///    operand a.
+/// \returns A 128-bit integer vector containing the left-shifted value.
+#define _mm_slli_si128(a, imm) __extension__ ({                              \
+  (__m128i)__builtin_shufflevector(                                          \
+                                 (__v16qi)_mm_setzero_si128(),               \
+                                 (__v16qi)(__m128i)(a),                      \
+                                 ((char)(imm)&0xF0) ?  0 : 16 - (char)(imm), \
+                                 ((char)(imm)&0xF0) ?  1 : 17 - (char)(imm), \
+                                 ((char)(imm)&0xF0) ?  2 : 18 - (char)(imm), \
+                                 ((char)(imm)&0xF0) ?  3 : 19 - (char)(imm), \
+                                 ((char)(imm)&0xF0) ?  4 : 20 - (char)(imm), \
+                                 ((char)(imm)&0xF0) ?  5 : 21 - (char)(imm), \
+                                 ((char)(imm)&0xF0) ?  6 : 22 - (char)(imm), \
+                                 ((char)(imm)&0xF0) ?  7 : 23 - (char)(imm), \
+                                 ((char)(imm)&0xF0) ?  8 : 24 - (char)(imm), \
+                                 ((char)(imm)&0xF0) ?  9 : 25 - (char)(imm), \
+                                 ((char)(imm)&0xF0) ? 10 : 26 - (char)(imm), \
+                                 ((char)(imm)&0xF0) ? 11 : 27 - (char)(imm), \
+                                 ((char)(imm)&0xF0) ? 12 : 28 - (char)(imm), \
+                                 ((char)(imm)&0xF0) ? 13 : 29 - (char)(imm), \
+                                 ((char)(imm)&0xF0) ? 14 : 30 - (char)(imm), \
+                                 ((char)(imm)&0xF0) ? 15 : 31 - (char)(imm)); })
 
 #define _mm_bslli_si128(a, imm) \
   _mm_slli_si128((a), (imm))
 
+/// \brief Left-shifts each 16-bit value in the 128-bit integer vector operand
+///    by the specified number of bits. Low-order bits are cleared.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VPSLLW / PSLLW instruction.
+///
+/// \param __a
+///    A 128-bit integer vector containing the source operand.
+/// \param __count
+///    An integer value specifying the number of bits to left-shift each value
+///    in operand __a.
+/// \returns A 128-bit integer vector containing the left-shifted values.
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_slli_epi16(__m128i __a, int __count)
 {
   return (__m128i)__builtin_ia32_psllwi128((__v8hi)__a, __count);
 }
 
+/// \brief Left-shifts each 16-bit value in the 128-bit integer vector operand
+///    by the specified number of bits. Low-order bits are cleared.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VPSLLW / PSLLW instruction.
+///
+/// \param __a
+///    A 128-bit integer vector containing the source operand.
+/// \param __count
+///    A 128-bit integer vector in which bits [63:0] specify the number of bits
+///    to left-shift each value in operand __a.
+/// \returns A 128-bit integer vector containing the left-shifted values.
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_sll_epi16(__m128i __a, __m128i __count)
 {
   return (__m128i)__builtin_ia32_psllw128((__v8hi)__a, (__v8hi)__count);
 }
 
+/// \brief Left-shifts each 32-bit value in the 128-bit integer vector operand
+///    by the specified number of bits. Low-order bits are cleared.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VPSLLD / PSLLD instruction.
+///
+/// \param __a
+///    A 128-bit integer vector containing the source operand.
+/// \param __count
+///    An integer value specifying the number of bits to left-shift each value
+///    in operand __a.
+/// \returns A 128-bit integer vector containing the left-shifted values.
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_slli_epi32(__m128i __a, int __count)
 {
   return (__m128i)__builtin_ia32_pslldi128((__v4si)__a, __count);
 }
 
+/// \brief Left-shifts each 32-bit value in the 128-bit integer vector operand
+///    by the specified number of bits. Low-order bits are cleared.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VPSLLD / PSLLD instruction.
+///
+/// \param __a
+///    A 128-bit integer vector containing the source operand.
+/// \param __count
+///    A 128-bit integer vector in which bits [63:0] specify the number of bits
+///    to left-shift each value in operand __a.
+/// \returns A 128-bit integer vector containing the left-shifted values.
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_sll_epi32(__m128i __a, __m128i __count)
 {
   return (__m128i)__builtin_ia32_pslld128((__v4si)__a, (__v4si)__count);
 }
 
+/// \brief Left-shifts each 64-bit value in the 128-bit integer vector operand
+///    by the specified number of bits. Low-order bits are cleared.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VPSLLQ / PSLLQ instruction.
+///
+/// \param __a
+///    A 128-bit integer vector containing the source operand.
+/// \param __count
+///    An integer value specifying the number of bits to left-shift each value
+///    in operand __a.
+/// \returns A 128-bit integer vector containing the left-shifted values.
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_slli_epi64(__m128i __a, int __count)
 {
-  return __builtin_ia32_psllqi128(__a, __count);
+  return __builtin_ia32_psllqi128((__v2di)__a, __count);
 }
 
+/// \brief Left-shifts each 64-bit value in the 128-bit integer vector operand
+///    by the specified number of bits. Low-order bits are cleared.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VPSLLQ / PSLLQ instruction.
+///
+/// \param __a
+///    A 128-bit integer vector containing the source operand.
+/// \param __count
+///    A 128-bit integer vector in which bits [63:0] specify the number of bits
+///    to left-shift each value in operand __a.
+/// \returns A 128-bit integer vector containing the left-shifted values.
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_sll_epi64(__m128i __a, __m128i __count)
 {
-  return __builtin_ia32_psllq128(__a, __count);
+  return __builtin_ia32_psllq128((__v2di)__a, (__v2di)__count);
 }
 
+/// \brief Right-shifts each 16-bit value in the 128-bit integer vector operand
+///    by the specified number of bits. High-order bits are filled with the sign
+///    bit of the initial value.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VPSRAW / PSRAW instruction.
+///
+/// \param __a
+///    A 128-bit integer vector containing the source operand.
+/// \param __count
+///    An integer value specifying the number of bits to right-shift each value
+///    in operand __a.
+/// \returns A 128-bit integer vector containing the right-shifted values.
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_srai_epi16(__m128i __a, int __count)
 {
   return (__m128i)__builtin_ia32_psrawi128((__v8hi)__a, __count);
 }
 
+/// \brief Right-shifts each 16-bit value in the 128-bit integer vector operand
+///    by the specified number of bits. High-order bits are filled with the sign
+///    bit of the initial value.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VPSRAW / PSRAW instruction.
+///
+/// \param __a
+///    A 128-bit integer vector containing the source operand.
+/// \param __count
+///    A 128-bit integer vector in which bits [63:0] specify the number of bits
+///    to right-shift each value in operand __a.
+/// \returns A 128-bit integer vector containing the right-shifted values.
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_sra_epi16(__m128i __a, __m128i __count)
 {
   return (__m128i)__builtin_ia32_psraw128((__v8hi)__a, (__v8hi)__count);
 }
 
+/// \brief Right-shifts each 32-bit value in the 128-bit integer vector operand
+///    by the specified number of bits. High-order bits are filled with the sign
+///    bit of the initial value.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VPSRAD / PSRAD instruction.
+///
+/// \param __a
+///    A 128-bit integer vector containing the source operand.
+/// \param __count
+///    An integer value specifying the number of bits to right-shift each value
+///    in operand __a.
+/// \returns A 128-bit integer vector containing the right-shifted values.
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_srai_epi32(__m128i __a, int __count)
 {
   return (__m128i)__builtin_ia32_psradi128((__v4si)__a, __count);
 }
 
+/// \brief Right-shifts each 32-bit value in the 128-bit integer vector operand
+///    by the specified number of bits. High-order bits are filled with the sign
+///    bit of the initial value.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VPSRAD / PSRAD instruction.
+///
+/// \param __a
+///    A 128-bit integer vector containing the source operand.
+/// \param __count
+///    A 128-bit integer vector in which bits [63:0] specify the number of bits
+///    to right-shift each value in operand __a.
+/// \returns A 128-bit integer vector containing the right-shifted values.
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_sra_epi32(__m128i __a, __m128i __count)
 {
   return (__m128i)__builtin_ia32_psrad128((__v4si)__a, (__v4si)__count);
 }
 
-#define _mm_srli_si128(a, imm) __extension__ ({                          \
-  (__m128i)__builtin_shufflevector((__v16qi)(__m128i)(a),                \
-                                   (__v16qi)_mm_setzero_si128(),         \
-                                   ((imm)&0xF0) ? 16 : ((imm)&0xF) + 0,  \
-                                   ((imm)&0xF0) ? 16 : ((imm)&0xF) + 1,  \
-                                   ((imm)&0xF0) ? 16 : ((imm)&0xF) + 2,  \
-                                   ((imm)&0xF0) ? 16 : ((imm)&0xF) + 3,  \
-                                   ((imm)&0xF0) ? 16 : ((imm)&0xF) + 4,  \
-                                   ((imm)&0xF0) ? 16 : ((imm)&0xF) + 5,  \
-                                   ((imm)&0xF0) ? 16 : ((imm)&0xF) + 6,  \
-                                   ((imm)&0xF0) ? 16 : ((imm)&0xF) + 7,  \
-                                   ((imm)&0xF0) ? 16 : ((imm)&0xF) + 8,  \
-                                   ((imm)&0xF0) ? 16 : ((imm)&0xF) + 9,  \
-                                   ((imm)&0xF0) ? 16 : ((imm)&0xF) + 10, \
-                                   ((imm)&0xF0) ? 16 : ((imm)&0xF) + 11, \
-                                   ((imm)&0xF0) ? 16 : ((imm)&0xF) + 12, \
-                                   ((imm)&0xF0) ? 16 : ((imm)&0xF) + 13, \
-                                   ((imm)&0xF0) ? 16 : ((imm)&0xF) + 14, \
-                                   ((imm)&0xF0) ? 16 : ((imm)&0xF) + 15); })
+/// \brief Right-shifts the 128-bit integer vector operand by the specified
+///    number of bytes. High-order bits are cleared.
+///
+/// \headerfile <x86intrin.h>
+///
+/// \code
+/// __m128i _mm_srli_si128(__m128i a, const int imm);
+/// \endcode
+///
+/// This intrinsic corresponds to the \c VPSRLDQ / PSRLDQ instruction.
+///
+/// \param a
+///    A 128-bit integer vector containing the source operand.
+/// \param imm
+///    An immediate value specifying the number of bytes to right-shift operand
+///    a.
+/// \returns A 128-bit integer vector containing the right-shifted value.
+#define _mm_srli_si128(a, imm) __extension__ ({                              \
+  (__m128i)__builtin_shufflevector(                                          \
+                                 (__v16qi)(__m128i)(a),                      \
+                                 (__v16qi)_mm_setzero_si128(),               \
+                                 ((char)(imm)&0xF0) ? 16 : (char)(imm) + 0,  \
+                                 ((char)(imm)&0xF0) ? 17 : (char)(imm) + 1,  \
+                                 ((char)(imm)&0xF0) ? 18 : (char)(imm) + 2,  \
+                                 ((char)(imm)&0xF0) ? 19 : (char)(imm) + 3,  \
+                                 ((char)(imm)&0xF0) ? 20 : (char)(imm) + 4,  \
+                                 ((char)(imm)&0xF0) ? 21 : (char)(imm) + 5,  \
+                                 ((char)(imm)&0xF0) ? 22 : (char)(imm) + 6,  \
+                                 ((char)(imm)&0xF0) ? 23 : (char)(imm) + 7,  \
+                                 ((char)(imm)&0xF0) ? 24 : (char)(imm) + 8,  \
+                                 ((char)(imm)&0xF0) ? 25 : (char)(imm) + 9,  \
+                                 ((char)(imm)&0xF0) ? 26 : (char)(imm) + 10, \
+                                 ((char)(imm)&0xF0) ? 27 : (char)(imm) + 11, \
+                                 ((char)(imm)&0xF0) ? 28 : (char)(imm) + 12, \
+                                 ((char)(imm)&0xF0) ? 29 : (char)(imm) + 13, \
+                                 ((char)(imm)&0xF0) ? 30 : (char)(imm) + 14, \
+                                 ((char)(imm)&0xF0) ? 31 : (char)(imm) + 15); })
 
 #define _mm_bsrli_si128(a, imm) \
   _mm_srli_si128((a), (imm))
 
+/// \brief Right-shifts each of 16-bit values in the 128-bit integer vector
+///    operand by the specified number of bits. High-order bits are cleared.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VPSRLW / PSRLW instruction.
+///
+/// \param __a
+///    A 128-bit integer vector containing the source operand.
+/// \param __count
+///    An integer value specifying the number of bits to right-shift each value
+///    in operand __a.
+/// \returns A 128-bit integer vector containing the right-shifted values.
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_srli_epi16(__m128i __a, int __count)
 {
   return (__m128i)__builtin_ia32_psrlwi128((__v8hi)__a, __count);
 }
 
+/// \brief Right-shifts each of 16-bit values in the 128-bit integer vector
+///    operand by the specified number of bits. High-order bits are cleared.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VPSRLW / PSRLW instruction.
+///
+/// \param __a
+///    A 128-bit integer vector containing the source operand.
+/// \param __count
+///    A 128-bit integer vector in which bits [63:0] specify the number of bits
+///    to right-shift each value in operand __a.
+/// \returns A 128-bit integer vector containing the right-shifted values.
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_srl_epi16(__m128i __a, __m128i __count)
 {
   return (__m128i)__builtin_ia32_psrlw128((__v8hi)__a, (__v8hi)__count);
 }
 
+/// \brief Right-shifts each of 32-bit values in the 128-bit integer vector
+///    operand by the specified number of bits. High-order bits are cleared.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VPSRLD / PSRLD instruction.
+///
+/// \param __a
+///    A 128-bit integer vector containing the source operand.
+/// \param __count
+///    An integer value specifying the number of bits to right-shift each value
+///    in operand __a.
+/// \returns A 128-bit integer vector containing the right-shifted values.
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_srli_epi32(__m128i __a, int __count)
 {
   return (__m128i)__builtin_ia32_psrldi128((__v4si)__a, __count);
 }
 
+/// \brief Right-shifts each of 32-bit values in the 128-bit integer vector
+///    operand by the specified number of bits. High-order bits are cleared.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VPSRLD / PSRLD instruction.
+///
+/// \param __a
+///    A 128-bit integer vector containing the source operand.
+/// \param __count
+///    A 128-bit integer vector in which bits [63:0] specify the number of bits
+///    to right-shift each value in operand __a.
+/// \returns A 128-bit integer vector containing the right-shifted values.
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_srl_epi32(__m128i __a, __m128i __count)
 {
   return (__m128i)__builtin_ia32_psrld128((__v4si)__a, (__v4si)__count);
 }
 
+/// \brief Right-shifts each of 64-bit values in the 128-bit integer vector
+///    operand by the specified number of bits. High-order bits are cleared.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VPSRLQ / PSRLQ instruction.
+///
+/// \param __a
+///    A 128-bit integer vector containing the source operand.
+/// \param __count
+///    An integer value specifying the number of bits to right-shift each value
+///    in operand __a.
+/// \returns A 128-bit integer vector containing the right-shifted values.
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_srli_epi64(__m128i __a, int __count)
 {
-  return __builtin_ia32_psrlqi128(__a, __count);
+  return __builtin_ia32_psrlqi128((__v2di)__a, __count);
 }
 
+/// \brief Right-shifts each of 64-bit values in the 128-bit integer vector
+///    operand by the specified number of bits. High-order bits are cleared.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VPSRLQ / PSRLQ instruction.
+///
+/// \param __a
+///    A 128-bit integer vector containing the source operand.
+/// \param __count
+///    A 128-bit integer vector in which bits [63:0] specify the number of bits
+///    to right-shift each value in operand __a.
+/// \returns A 128-bit integer vector containing the right-shifted values.
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_srl_epi64(__m128i __a, __m128i __count)
 {
-  return __builtin_ia32_psrlq128(__a, __count);
+  return __builtin_ia32_psrlq128((__v2di)__a, (__v2di)__count);
 }
 
+/// \brief Compares each of the corresponding 8-bit values of the 128-bit
+///    integer vectors for equality. Each comparison yields 0h for false, FFh
+///    for true.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VPCMPEQB / PCMPEQB instruction.
+///
+/// \param __a
+///    A 128-bit integer vector.
+/// \param __b
+///    A 128-bit integer vector.
+/// \returns A 128-bit integer vector containing the comparison results.
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_cmpeq_epi8(__m128i __a, __m128i __b)
 {
   return (__m128i)((__v16qi)__a == (__v16qi)__b);
 }
 
+/// \brief Compares each of the corresponding 16-bit values of the 128-bit
+///    integer vectors for equality. Each comparison yields 0h for false, FFFFh
+///    for true.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VPCMPEQW / PCMPEQW instruction.
+///
+/// \param __a
+///    A 128-bit integer vector.
+/// \param __b
+///    A 128-bit integer vector.
+/// \returns A 128-bit integer vector containing the comparison results.
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_cmpeq_epi16(__m128i __a, __m128i __b)
 {
   return (__m128i)((__v8hi)__a == (__v8hi)__b);
 }
 
+/// \brief Compares each of the corresponding 32-bit values of the 128-bit
+///    integer vectors for equality. Each comparison yields 0h for false,
+///    FFFFFFFFh for true.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VPCMPEQD / PCMPEQD instruction.
+///
+/// \param __a
+///    A 128-bit integer vector.
+/// \param __b
+///    A 128-bit integer vector.
+/// \returns A 128-bit integer vector containing the comparison results.
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_cmpeq_epi32(__m128i __a, __m128i __b)
 {
   return (__m128i)((__v4si)__a == (__v4si)__b);
 }
 
+/// \brief Compares each of the corresponding signed 8-bit values of the 128-bit
+///    integer vectors to determine if the values in the first operand are
+///    greater than those in the second operand. Each comparison yields 0h for
+///    false, FFh for true.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VPCMPGTB / PCMPGTB instruction.
+///
+/// \param __a
+///    A 128-bit integer vector.
+/// \param __b
+///    A 128-bit integer vector.
+/// \returns A 128-bit integer vector containing the comparison results.
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_cmpgt_epi8(__m128i __a, __m128i __b)
 {
@@ -1004,30 +1550,100 @@ _mm_cmpgt_epi8(__m128i __a, __m128i __b)
   return (__m128i)((__v16qs)__a > (__v16qs)__b);
 }
 
+/// \brief Compares each of the corresponding signed 16-bit values of the
+///    128-bit integer vectors to determine if the values in the first operand
+///    are greater than those in the second operand. Each comparison yields 0h
+///    for false, FFFFh for true.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VPCMPGTW / PCMPGTW instruction.
+///
+/// \param __a
+///    A 128-bit integer vector.
+/// \param __b
+///    A 128-bit integer vector.
+/// \returns A 128-bit integer vector containing the comparison results.
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_cmpgt_epi16(__m128i __a, __m128i __b)
 {
   return (__m128i)((__v8hi)__a > (__v8hi)__b);
 }
 
+/// \brief Compares each of the corresponding signed 32-bit values of the
+///    128-bit integer vectors to determine if the values in the first operand
+///    are greater than those in the second operand. Each comparison yields 0h
+///    for false, FFFFFFFFh for true.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VPCMPGTD / PCMPGTD instruction.
+///
+/// \param __a
+///    A 128-bit integer vector.
+/// \param __b
+///    A 128-bit integer vector.
+/// \returns A 128-bit integer vector containing the comparison results.
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_cmpgt_epi32(__m128i __a, __m128i __b)
 {
   return (__m128i)((__v4si)__a > (__v4si)__b);
 }
 
+/// \brief Compares each of the corresponding signed 8-bit values of the 128-bit
+///    integer vectors to determine if the values in the first operand are less
+///    than those in the second operand. Each comparison yields 0h for false,
+///    FFh for true.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VPCMPGTB / PCMPGTB instruction.
+///
+/// \param __a
+///    A 128-bit integer vector.
+/// \param __b
+///    A 128-bit integer vector.
+/// \returns A 128-bit integer vector containing the comparison results.
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_cmplt_epi8(__m128i __a, __m128i __b)
 {
   return _mm_cmpgt_epi8(__b, __a);
 }
 
+/// \brief Compares each of the corresponding signed 16-bit values of the
+///    128-bit integer vectors to determine if the values in the first operand
+///    are less than those in the second operand. Each comparison yields 0h for
+///    false, FFFFh for true.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VPCMPGTW / PCMPGTW instruction.
+///
+/// \param __a
+///    A 128-bit integer vector.
+/// \param __b
+///    A 128-bit integer vector.
+/// \returns A 128-bit integer vector containing the comparison results.
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_cmplt_epi16(__m128i __a, __m128i __b)
 {
   return _mm_cmpgt_epi16(__b, __a);
 }
 
+/// \brief Compares each of the corresponding signed 32-bit values of the
+///    128-bit integer vectors to determine if the values in the first operand
+///    are less than those in the second operand. Each comparison yields 0h for
+///    false, FFFFFFFFh for true.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VPCMPGTD / PCMPGTD instruction.
+///
+/// \param __a
+///    A 128-bit integer vector.
+/// \param __b
+///    A 128-bit integer vector.
+/// \returns A 128-bit integer vector containing the comparison results.
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_cmplt_epi32(__m128i __a, __m128i __b)
 {
@@ -1035,6 +1651,23 @@ _mm_cmplt_epi32(__m128i __a, __m128i __b)
 }
 
 #ifdef __x86_64__
+/// \brief Converts a 64-bit signed integer value from the second operand into a
+///    double-precision value and returns it in the lower element of a [2 x
+///    double] vector; the upper element of the returned vector is copied from
+///    the upper element of the first operand.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VCVTSI2SD / CVTSI2SD instruction.
+///
+/// \param __a
+///    A 128-bit vector of [2 x double]. The upper 64 bits of this operand are
+///    copied to the upper 64 bits of the destination.
+/// \param __b
+///    A 64-bit signed integer operand containing the value to be converted.
+/// \returns A 128-bit vector of [2 x double] whose lower 64 bits contain the
+///    converted value of the second operand. The upper 64 bits are copied from
+///    the upper 64 bits of the first operand.
 static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_cvtsi64_sd(__m128d __a, long long __b)
 {
@@ -1042,37 +1675,98 @@ _mm_cvtsi64_sd(__m128d __a, long long __b)
   return __a;
 }
 
+/// \brief Converts the first (lower) element of a vector of [2 x double] into a
+///    64-bit signed integer value, according to the current rounding mode.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VCVTSD2SI / CVTSD2SI instruction.
+///
+/// \param __a
+///    A 128-bit vector of [2 x double]. The lower 64 bits are used in the
+///    conversion.
+/// \returns A 64-bit signed integer containing the converted value.
 static __inline__ long long __DEFAULT_FN_ATTRS
 _mm_cvtsd_si64(__m128d __a)
 {
-  return __builtin_ia32_cvtsd2si64(__a);
+  return __builtin_ia32_cvtsd2si64((__v2df)__a);
 }
 
+/// \brief Converts the first (lower) element of a vector of [2 x double] into a
+///    64-bit signed integer value, truncating the result when it is inexact.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VCVTTSD2SI / CVTTSD2SI instruction.
+///
+/// \param __a
+///    A 128-bit vector of [2 x double]. The lower 64 bits are used in the
+///    conversion.
+/// \returns A 64-bit signed integer containing the converted value.
 static __inline__ long long __DEFAULT_FN_ATTRS
 _mm_cvttsd_si64(__m128d __a)
 {
-  return __a[0];
+  return __builtin_ia32_cvttsd2si64((__v2df)__a);
 }
 #endif
 
+/// \brief Converts a vector of [4 x i32] into a vector of [4 x float].
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VCVTDQ2PS / CVTDQ2PS instruction.
+///
+/// \param __a
+///    A 128-bit integer vector.
+/// \returns A 128-bit vector of [4 x float] containing the converted values.
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_cvtepi32_ps(__m128i __a)
 {
   return __builtin_ia32_cvtdq2ps((__v4si)__a);
 }
 
+/// \brief Converts a vector of [4 x float] into a vector of [4 x i32].
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VCVTPS2DQ / CVTPS2DQ instruction.
+///
+/// \param __a
+///    A 128-bit vector of [4 x float].
+/// \returns A 128-bit integer vector of [4 x i32] containing the converted
+///    values.
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_cvtps_epi32(__m128 __a)
 {
-  return (__m128i)__builtin_ia32_cvtps2dq(__a);
+  return (__m128i)__builtin_ia32_cvtps2dq((__v4sf)__a);
 }
 
+/// \brief Converts a vector of [4 x float] into a vector of [4 x i32],
+///    truncating the result when it is inexact.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VCVTTPS2DQ / CVTTPS2DQ instruction.
+///
+/// \param __a
+///    A 128-bit vector of [4 x float].
+/// \returns A 128-bit vector of [4 x i32] containing the converted values.
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_cvttps_epi32(__m128 __a)
 {
-  return (__m128i)__builtin_ia32_cvttps2dq(__a);
+  return (__m128i)__builtin_ia32_cvttps2dq((__v4sf)__a);
 }
 
+/// \brief Returns a vector of [4 x i32] where the lowest element is the input
+///    operand and the remaining elements are zero.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VMOVD / MOVD instruction.
+///
+/// \param __a
+///    A 32-bit signed integer operand.
+/// \returns A 128-bit vector of [4 x i32].
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_cvtsi32_si128(int __a)
 {
@@ -1080,6 +1774,16 @@ _mm_cvtsi32_si128(int __a)
 }
 
 #ifdef __x86_64__
+/// \brief Returns a vector of [2 x i64] where the lower element is the input
+///    operand and the upper element is zero.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VMOVQ / MOVQ instruction.
+///
+/// \param __a
+///    A 64-bit signed integer operand containing the value to be converted.
+/// \returns A 128-bit vector of [2 x i64] containing the converted value.
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_cvtsi64_si128(long long __a)
 {
@@ -1087,6 +1791,17 @@ _mm_cvtsi64_si128(long long __a)
 }
 #endif
 
+/// \brief Moves the least significant 32 bits of a vector of [4 x i32] to a
+///    32-bit signed integer value.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VMOVD / MOVD instruction.
+///
+/// \param __a
+///    A vector of [4 x i32]. The least significant 32 bits are moved to the
+///    destination.
+/// \returns A 32-bit signed integer containing the moved value.
 static __inline__ int __DEFAULT_FN_ATTRS
 _mm_cvtsi128_si32(__m128i __a)
 {
@@ -1095,6 +1810,17 @@ _mm_cvtsi128_si32(__m128i __a)
 }
 
 #ifdef __x86_64__
+/// \brief Moves the least significant 64 bits of a vector of [2 x i64] to a
+///    64-bit signed integer value.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VMOVQ / MOVQ instruction.
+///
+/// \param __a
+///    A vector of [2 x i64]. The least significant 64 bits are moved to the
+///    destination.
+/// \returns A 64-bit signed integer containing the moved value.
 static __inline__ long long __DEFAULT_FN_ATTRS
 _mm_cvtsi128_si64(__m128i __a)
 {
@@ -1102,12 +1828,32 @@ _mm_cvtsi128_si64(__m128i __a)
 }
 #endif
 
+/// \brief Moves packed integer values from an aligned 128-bit memory location
+///    to elements in a 128-bit integer vector.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VMOVDQA / MOVDQA instruction.
+///
+/// \param __p
+///    An aligned pointer to a memory location containing integer values.
+/// \returns A 128-bit integer vector containing the moved values.
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_load_si128(__m128i const *__p)
 {
   return *__p;
 }
 
+/// \brief Moves packed integer values from an unaligned 128-bit memory location
+///    to elements in a 128-bit integer vector.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VMOVDQU / MOVDQU instruction.
+///
+/// \param __p
+///    A pointer to a memory location containing integer values.
+/// \returns A 128-bit integer vector containing the moved values.
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_loadu_si128(__m128i const *__p)
 {
@@ -1117,6 +1863,18 @@ _mm_loadu_si128(__m128i const *__p)
   return ((struct __loadu_si128*)__p)->__v;
 }
 
+/// \brief Returns a vector of [2 x i64] where the lower element is taken from
+///    the lower element of the operand, and the upper element is zero.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VMOVQ / MOVQ instruction.
+///
+/// \param __p
+///    A 128-bit vector of [2 x i64]. Bits [63:0] are written to bits [63:0] of
+///    the destination.
+/// \returns A 128-bit vector of [2 x i64]. The lower order bits contain the
+///    moved value. The higher order bits are cleared.
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_loadl_epi64(__m128i const *__p)
 {
@@ -1126,66 +1884,270 @@ _mm_loadl_epi64(__m128i const *__p)
   return (__m128i) { ((struct __mm_loadl_epi64_struct*)__p)->__u, 0};
 }
 
+/// \brief Generates a 128-bit vector of [4 x i32] with unspecified content.
+///    This could be used as an argument to another intrinsic function where the
+///    argument is required but the value is not actually used.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic has no corresponding instruction.
+///
+/// \returns A 128-bit vector of [4 x i32] with unspecified content.
 static __inline__ __m128i __DEFAULT_FN_ATTRS
-_mm_undefined_si128()
+_mm_undefined_si128(void)
 {
   return (__m128i)__builtin_ia32_undef128();
 }
 
+/// \brief Initializes both 64-bit values in a 128-bit vector of [2 x i64] with
+///    the specified 64-bit integer values.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic is a utility function and does not correspond to a specific
+///    instruction.
+///
+/// \param __q1
+///    A 64-bit integer value used to initialize the upper 64 bits of the
+///    destination vector of [2 x i64].
+/// \param __q0
+///    A 64-bit integer value used to initialize the lower 64 bits of the
+///    destination vector of [2 x i64].
+/// \returns An initialized 128-bit vector of [2 x i64] containing the values
+///    provided in the operands.
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_set_epi64x(long long __q1, long long __q0)
 {
   return (__m128i){ __q0, __q1 };
 }
 
+/// \brief Initializes both 64-bit values in a 128-bit vector of [2 x i64] with
+///    the specified 64-bit integer values.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic is a utility function and does not correspond to a specific
+///    instruction.
+///
+/// \param __q1
+///    A 64-bit integer value used to initialize the upper 64 bits of the
+///    destination vector of [2 x i64].
+/// \param __q0
+///    A 64-bit integer value used to initialize the lower 64 bits of the
+///    destination vector of [2 x i64].
+/// \returns An initialized 128-bit vector of [2 x i64] containing the values
+///    provided in the operands.
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_set_epi64(__m64 __q1, __m64 __q0)
 {
   return (__m128i){ (long long)__q0, (long long)__q1 };
 }
 
+/// \brief Initializes the 32-bit values in a 128-bit vector of [4 x i32] with
+///    the specified 32-bit integer values.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic is a utility function and does not correspond to a specific
+///    instruction.
+///
+/// \param __i3
+///    A 32-bit integer value used to initialize bits [127:96] of the
+///    destination vector.
+/// \param __i2
+///    A 32-bit integer value used to initialize bits [95:64] of the destination
+///    vector.
+/// \param __i1
+///    A 32-bit integer value used to initialize bits [63:32] of the destination
+///    vector.
+/// \param __i0
+///    A 32-bit integer value used to initialize bits [31:0] of the destination
+///    vector.
+/// \returns An initialized 128-bit vector of [4 x i32] containing the values
+///    provided in the operands.
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_set_epi32(int __i3, int __i2, int __i1, int __i0)
 {
   return (__m128i)(__v4si){ __i0, __i1, __i2, __i3};
 }
 
+/// \brief Initializes the 16-bit values in a 128-bit vector of [8 x i16] with
+///    the specified 16-bit integer values.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic is a utility function and does not correspond to a specific
+///    instruction.
+///
+/// \param __w7
+///    A 16-bit integer value used to initialize bits [127:112] of the
+///    destination vector.
+/// \param __w6
+///    A 16-bit integer value used to initialize bits [111:96] of the
+///    destination vector.
+/// \param __w5
+///    A 16-bit integer value used to initialize bits [95:80] of the destination
+///    vector.
+/// \param __w4
+///    A 16-bit integer value used to initialize bits [79:64] of the destination
+///    vector.
+/// \param __w3
+///    A 16-bit integer value used to initialize bits [63:48] of the destination
+///    vector.
+/// \param __w2
+///    A 16-bit integer value used to initialize bits [47:32] of the destination
+///    vector.
+/// \param __w1
+///    A 16-bit integer value used to initialize bits [31:16] of the destination
+///    vector.
+/// \param __w0
+///    A 16-bit integer value used to initialize bits [15:0] of the destination
+///    vector.
+/// \returns An initialized 128-bit vector of [8 x i16] containing the values
+///    provided in the operands.
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_set_epi16(short __w7, short __w6, short __w5, short __w4, short __w3, short __w2, short __w1, short __w0)
 {
   return (__m128i)(__v8hi){ __w0, __w1, __w2, __w3, __w4, __w5, __w6, __w7 };
 }
 
+/// \brief Initializes the 8-bit values in a 128-bit vector of [16 x i8] with
+///    the specified 8-bit integer values.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic is a utility function and does not correspond to a specific
+///    instruction.
+///
+/// \param __b15
+///    Initializes bits [127:120] of the destination vector.
+/// \param __b14
+///    Initializes bits [119:112] of the destination vector.
+/// \param __b13
+///    Initializes bits [111:104] of the destination vector.
+/// \param __b12
+///    Initializes bits [103:96] of the destination vector.
+/// \param __b11
+///    Initializes bits [95:88] of the destination vector.
+/// \param __b10
+///    Initializes bits [87:80] of the destination vector.
+/// \param __b9
+///    Initializes bits [79:72] of the destination vector.
+/// \param __b8
+///    Initializes bits [71:64] of the destination vector.
+/// \param __b7
+///    Initializes bits [63:56] of the destination vector.
+/// \param __b6
+///    Initializes bits [55:48] of the destination vector.
+/// \param __b5
+///    Initializes bits [47:40] of the destination vector.
+/// \param __b4
+///    Initializes bits [39:32] of the destination vector.
+/// \param __b3
+///    Initializes bits [31:24] of the destination vector.
+/// \param __b2
+///    Initializes bits [23:16] of the destination vector.
+/// \param __b1
+///    Initializes bits [15:8] of the destination vector.
+/// \param __b0
+///    Initializes bits [7:0] of the destination vector.
+/// \returns An initialized 128-bit vector of [16 x i8] containing the values
+///    provided in the operands.
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_set_epi8(char __b15, char __b14, char __b13, char __b12, char __b11, char __b10, char __b9, char __b8, char __b7, char __b6, char __b5, char __b4, char __b3, char __b2, char __b1, char __b0)
 {
   return (__m128i)(__v16qi){ __b0, __b1, __b2, __b3, __b4, __b5, __b6, __b7, __b8, __b9, __b10, __b11, __b12, __b13, __b14, __b15 };
 }
 
+/// \brief Initializes both values in a 128-bit integer vector with the
+///    specified 64-bit integer value.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic is a utility function and does not correspond to a specific
+///    instruction.
+///
+/// \param __q
+///    Integer value used to initialize the elements of the destination integer
+///    vector.
+/// \returns An initialized 128-bit integer vector of [2 x i64] with both
+///    elements containing the value provided in the operand.
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_set1_epi64x(long long __q)
 {
   return (__m128i){ __q, __q };
 }
 
+/// \brief Initializes both values in a 128-bit vector of [2 x i64] with the
+///    specified 64-bit value.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic is a utility function and does not correspond to a specific
+///    instruction.
+///
+/// \param __q
+///    A 64-bit value used to initialize the elements of the destination integer
+///    vector.
+/// \returns An initialized 128-bit vector of [2 x i64] with all elements
+///    containing the value provided in the operand.
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_set1_epi64(__m64 __q)
 {
   return (__m128i){ (long long)__q, (long long)__q };
 }
 
+/// \brief Initializes all values in a 128-bit vector of [4 x i32] with the
+///    specified 32-bit value.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic is a utility function and does not correspond to a specific
+///    instruction.
+///
+/// \param __i
+///    A 32-bit value used to initialize the elements of the destination integer
+///    vector.
+/// \returns An initialized 128-bit vector of [4 x i32] with all elements
+///    containing the value provided in the operand.
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_set1_epi32(int __i)
 {
   return (__m128i)(__v4si){ __i, __i, __i, __i };
 }
 
+/// \brief Initializes all values in a 128-bit vector of [8 x i16] with the
+///    specified 16-bit value.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic is a utility function and does not correspond to a specific
+///    instruction.
+///
+/// \param __w
+///    A 16-bit value used to initialize the elements of the destination integer
+///    vector.
+/// \returns An initialized 128-bit vector of [8 x i16] with all elements
+///    containing the value provided in the operand.
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_set1_epi16(short __w)
 {
   return (__m128i)(__v8hi){ __w, __w, __w, __w, __w, __w, __w, __w };
 }
 
+/// \brief Initializes all values in a 128-bit vector of [16 x i8] with the
+///    specified 8-bit value.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic is a utility function and does not correspond to a specific
+///    instruction.
+///
+/// \param __b
+///    An 8-bit value used to initialize the elements of the destination integer
+///    vector.
+/// \returns An initialized 128-bit vector of [16 x i8] with all elements
+///    containing the value provided in the operand.
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_set1_epi8(char __b)
 {
@@ -1231,7 +2193,10 @@ _mm_store_si128(__m128i *__p, __m128i __b)
 static __inline__ void __DEFAULT_FN_ATTRS
 _mm_storeu_si128(__m128i *__p, __m128i __b)
 {
-  __builtin_ia32_storedqu((char *)__p, (__v16qi)__b);
+  struct __storeu_si128 {
+    __m128i __v;
+  } __attribute__((__packed__, __may_alias__));
+  ((struct __storeu_si128*)__p)->__v = __b;
 }
 
 static __inline__ void __DEFAULT_FN_ATTRS
@@ -1252,13 +2217,13 @@ _mm_storel_epi64(__m128i *__p, __m128i __a)
 static __inline__ void __DEFAULT_FN_ATTRS
 _mm_stream_pd(double *__p, __m128d __a)
 {
-  __builtin_ia32_movntpd(__p, __a);
+  __builtin_nontemporal_store((__v2df)__a, (__v2df*)__p);
 }
 
 static __inline__ void __DEFAULT_FN_ATTRS
 _mm_stream_si128(__m128i *__p, __m128i __a)
 {
-  __builtin_ia32_movntdq(__p, __a);
+  __builtin_nontemporal_store((__v2di)__a, (__v2di*)__p);
 }
 
 static __inline__ void __DEFAULT_FN_ATTRS
@@ -1334,25 +2299,25 @@ _mm_movemask_epi8(__m128i __a)
 
 #define _mm_shuffle_epi32(a, imm) __extension__ ({ \
   (__m128i)__builtin_shufflevector((__v4si)(__m128i)(a), \
-                                   (__v4si)_mm_setzero_si128(), \
-                                   (imm) & 0x3, ((imm) & 0xc) >> 2, \
-                                   ((imm) & 0x30) >> 4, ((imm) & 0xc0) >> 6); })
+                                   (__v4si)_mm_undefined_si128(), \
+                                   ((imm) >> 0) & 0x3, ((imm) >> 2) & 0x3, \
+                                   ((imm) >> 4) & 0x3, ((imm) >> 6) & 0x3); })
 
 #define _mm_shufflelo_epi16(a, imm) __extension__ ({ \
   (__m128i)__builtin_shufflevector((__v8hi)(__m128i)(a), \
-                                   (__v8hi)_mm_setzero_si128(), \
-                                   (imm) & 0x3, ((imm) & 0xc) >> 2, \
-                                   ((imm) & 0x30) >> 4, ((imm) & 0xc0) >> 6, \
+                                   (__v8hi)_mm_undefined_si128(), \
+                                   ((imm) >> 0) & 0x3, ((imm) >> 2) & 0x3, \
+                                   ((imm) >> 4) & 0x3, ((imm) >> 6) & 0x3, \
                                    4, 5, 6, 7); })
 
 #define _mm_shufflehi_epi16(a, imm) __extension__ ({ \
   (__m128i)__builtin_shufflevector((__v8hi)(__m128i)(a), \
-                                   (__v8hi)_mm_setzero_si128(), \
+                                   (__v8hi)_mm_undefined_si128(), \
                                    0, 1, 2, 3, \
-                                   4 + (((imm) & 0x03) >> 0), \
-                                   4 + (((imm) & 0x0c) >> 2), \
-                                   4 + (((imm) & 0x30) >> 4), \
-                                   4 + (((imm) & 0xc0) >> 6)); })
+                                   4 + (((imm) >> 0) & 0x3), \
+                                   4 + (((imm) >> 2) & 0x3), \
+                                   4 + (((imm) >> 4) & 0x3), \
+                                   4 + (((imm) >> 6) & 0x3)); })
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_unpackhi_epi8(__m128i __a, __m128i __b)
@@ -1375,7 +2340,7 @@ _mm_unpackhi_epi32(__m128i __a, __m128i __b)
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_unpackhi_epi64(__m128i __a, __m128i __b)
 {
-  return (__m128i)__builtin_shufflevector(__a, __b, 1, 2+1);
+  return (__m128i)__builtin_shufflevector((__v2di)__a, (__v2di)__b, 1, 2+1);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS
@@ -1399,7 +2364,7 @@ _mm_unpacklo_epi32(__m128i __a, __m128i __b)
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_unpacklo_epi64(__m128i __a, __m128i __b)
 {
-  return (__m128i)__builtin_shufflevector(__a, __b, 0, 2+0);
+  return (__m128i)__builtin_shufflevector((__v2di)__a, (__v2di)__b, 0, 2+0);
 }
 
 static __inline__ __m64 __DEFAULT_FN_ATTRS
@@ -1417,30 +2382,31 @@ _mm_movpi64_epi64(__m64 __a)
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_move_epi64(__m128i __a)
 {
-  return __builtin_shufflevector(__a, (__m128i){ 0 }, 0, 2);
+  return __builtin_shufflevector((__v2di)__a, (__m128i){ 0 }, 0, 2);
 }
 
 static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_unpackhi_pd(__m128d __a, __m128d __b)
 {
-  return __builtin_shufflevector(__a, __b, 1, 2+1);
+  return __builtin_shufflevector((__v2df)__a, (__v2df)__b, 1, 2+1);
 }
 
 static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_unpacklo_pd(__m128d __a, __m128d __b)
 {
-  return __builtin_shufflevector(__a, __b, 0, 2+0);
+  return __builtin_shufflevector((__v2df)__a, (__v2df)__b, 0, 2+0);
 }
 
 static __inline__ int __DEFAULT_FN_ATTRS
 _mm_movemask_pd(__m128d __a)
 {
-  return __builtin_ia32_movmskpd(__a);
+  return __builtin_ia32_movmskpd((__v2df)__a);
 }
 
 #define _mm_shuffle_pd(a, b, i) __extension__ ({ \
   (__m128d)__builtin_shufflevector((__v2df)(__m128d)(a), (__v2df)(__m128d)(b), \
-                                   (i) & 1, (((i) & 2) >> 1) + 2); })
+                                   0 + (((i) >> 0) & 0x1), \
+                                   2 + (((i) >> 1) & 0x1)); })
 
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_castpd_ps(__m128d __a)
diff --git a/contrib/llvm/tools/clang/lib/Headers/f16cintrin.h b/contrib/llvm/tools/clang/lib/Headers/f16cintrin.h
index c655d98ee9ab..415bf732fb9f 100644
--- a/contrib/llvm/tools/clang/lib/Headers/f16cintrin.h
+++ b/contrib/llvm/tools/clang/lib/Headers/f16cintrin.h
@@ -29,11 +29,90 @@
 #define __F16CINTRIN_H
 
 /* Define the default attributes for the functions in this file. */
-#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("f16c")))
+#define __DEFAULT_FN_ATTRS \
+  __attribute__((__always_inline__, __nodebug__, __target__("f16c")))
 
-#define _mm_cvtps_ph(a, imm) __extension__ ({ \
- (__m128i)__builtin_ia32_vcvtps2ph((__v4sf)(__m128)(a), (imm)); })
+/// \brief Converts a 16-bit half-precision float value into a 32-bit float
+///    value.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VCVTPH2PS instruction.
+///
+/// \param __a
+///    A 16-bit half-precision float value.
+/// \returns The converted 32-bit float value.
+static __inline float __DEFAULT_FN_ATTRS
+_cvtsh_ss(unsigned short __a)
+{
+  __v8hi v = {(short)__a, 0, 0, 0, 0, 0, 0, 0};
+  __v4sf r = __builtin_ia32_vcvtph2ps(v);
+  return r[0];
+}
+
+/// \brief Converts a 32-bit single-precision float value to a 16-bit
+///    half-precision float value.
+///
+/// \headerfile <x86intrin.h>
+///
+/// \code
+/// unsigned short _cvtss_sh(float a, const int imm);
+/// \endcode
+///
+/// This intrinsic corresponds to the \c VCVTPS2PH instruction.
+///
+/// \param a
+///    A 32-bit single-precision float value to be converted to a 16-bit
+///    half-precision float value.
+/// \param imm
+///    An immediate value controlling rounding using bits [2:0]:
+///    000: Nearest
+///    001: Down
+///    010: Up
+///    011: Truncate
+///    1XX: Use MXCSR.RC for rounding
+/// \returns The converted 16-bit half-precision float value.
+#define _cvtss_sh(a, imm)  \
+  ((unsigned short)(((__v8hi)__builtin_ia32_vcvtps2ph((__v4sf){a, 0, 0, 0}, \
+                                                      (imm)))[0]))
+
+/// \brief Converts a 128-bit vector containing 32-bit float values into a
+///    128-bit vector containing 16-bit half-precision float values.
+///
+/// \headerfile <x86intrin.h>
+///
+/// \code
+/// __m128i _mm_cvtps_ph(__m128 a, const int imm);
+/// \endcode
+///
+/// This intrinsic corresponds to the \c VCVTPS2PH instruction.
+///
+/// \param a
+///    A 128-bit vector containing 32-bit float values.
+/// \param imm
+///    An immediate value controlling rounding using bits [2:0]:
+///    000: Nearest
+///    001: Down
+///    010: Up
+///    011: Truncate
+///    1XX: Use MXCSR.RC for rounding
+/// \returns A 128-bit vector containing converted 16-bit half-precision float
+///    values. The lower 64 bits are used to store the converted 16-bit
+///    half-precision floating-point values.
+#define _mm_cvtps_ph(a, imm) \
+  ((__m128i)__builtin_ia32_vcvtps2ph((__v4sf)(__m128)(a), (imm)))
 
+/// \brief Converts a 128-bit vector containing 16-bit half-precision float
+///    values into a 128-bit vector containing 32-bit float values.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VCVTPH2PS instruction.
+///
+/// \param __a
+///    A 128-bit vector containing 16-bit half-precision float values. The lower
+///    64 bits are used in the conversion.
+/// \returns A 128-bit vector of [4 x float] containing converted float values.
 static __inline __m128 __DEFAULT_FN_ATTRS
 _mm_cvtph_ps(__m128i __a)
 {
diff --git a/contrib/llvm/tools/clang/lib/Headers/float.h b/contrib/llvm/tools/clang/lib/Headers/float.h
index 238cf76b053c..a28269ebebbe 100644
--- a/contrib/llvm/tools/clang/lib/Headers/float.h
+++ b/contrib/llvm/tools/clang/lib/Headers/float.h
@@ -39,7 +39,9 @@
 #  undef FLT_MANT_DIG
 #  undef DBL_MANT_DIG
 #  undef LDBL_MANT_DIG
-#  undef DECIMAL_DIG
+#  if __STDC_VERSION__ >= 199901L || !defined(__STRICT_ANSI__)
+#    undef DECIMAL_DIG
+#  endif
 #  undef FLT_DIG
 #  undef DBL_DIG
 #  undef LDBL_DIG
@@ -68,6 +70,9 @@
 #    undef FLT_TRUE_MIN
 #    undef DBL_TRUE_MIN
 #    undef LDBL_TRUE_MIN
+#    undef FLT_DECIMAL_DIG
+#    undef DBL_DECIMAL_DIG
+#    undef LDBL_DECIMAL_DIG
 #  endif
 #endif
 
@@ -81,7 +86,9 @@
 #define DBL_MANT_DIG __DBL_MANT_DIG__
 #define LDBL_MANT_DIG __LDBL_MANT_DIG__
 
-#define DECIMAL_DIG __DECIMAL_DIG__
+#if __STDC_VERSION__ >= 199901L || !defined(__STRICT_ANSI__)
+#  define DECIMAL_DIG __DECIMAL_DIG__
+#endif
 
 #define FLT_DIG __FLT_DIG__
 #define DBL_DIG __DBL_DIG__
@@ -119,6 +126,9 @@
 #  define FLT_TRUE_MIN __FLT_DENORM_MIN__
 #  define DBL_TRUE_MIN __DBL_DENORM_MIN__
 #  define LDBL_TRUE_MIN __LDBL_DENORM_MIN__
+#  define FLT_DECIMAL_DIG __FLT_DECIMAL_DIG__
+#  define DBL_DECIMAL_DIG __DBL_DECIMAL_DIG__
+#  define LDBL_DECIMAL_DIG __LDBL_DECIMAL_DIG__
 #endif
 
 #endif /* __FLOAT_H */
diff --git a/contrib/llvm/tools/clang/lib/Headers/fma4intrin.h b/contrib/llvm/tools/clang/lib/Headers/fma4intrin.h
index f1178877b252..11aa8ceacf37 100644
--- a/contrib/llvm/tools/clang/lib/Headers/fma4intrin.h
+++ b/contrib/llvm/tools/clang/lib/Headers/fma4intrin.h
@@ -36,193 +36,193 @@
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_macc_ps(__m128 __A, __m128 __B, __m128 __C)
 {
-  return (__m128)__builtin_ia32_vfmaddps(__A, __B, __C);
+  return (__m128)__builtin_ia32_vfmaddps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
 }
 
 static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_macc_pd(__m128d __A, __m128d __B, __m128d __C)
 {
-  return (__m128d)__builtin_ia32_vfmaddpd(__A, __B, __C);
+  return (__m128d)__builtin_ia32_vfmaddpd((__v2df)__A, (__v2df)__B, (__v2df)__C);
 }
 
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_macc_ss(__m128 __A, __m128 __B, __m128 __C)
 {
-  return (__m128)__builtin_ia32_vfmaddss(__A, __B, __C);
+  return (__m128)__builtin_ia32_vfmaddss((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
 }
 
 static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_macc_sd(__m128d __A, __m128d __B, __m128d __C)
 {
-  return (__m128d)__builtin_ia32_vfmaddsd(__A, __B, __C);
+  return (__m128d)__builtin_ia32_vfmaddsd((__v2df)__A, (__v2df)__B, (__v2df)__C);
 }
 
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_msub_ps(__m128 __A, __m128 __B, __m128 __C)
 {
-  return (__m128)__builtin_ia32_vfmsubps(__A, __B, __C);
+  return (__m128)__builtin_ia32_vfmsubps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
 }
 
 static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_msub_pd(__m128d __A, __m128d __B, __m128d __C)
 {
-  return (__m128d)__builtin_ia32_vfmsubpd(__A, __B, __C);
+  return (__m128d)__builtin_ia32_vfmsubpd((__v2df)__A, (__v2df)__B, (__v2df)__C);
 }
 
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_msub_ss(__m128 __A, __m128 __B, __m128 __C)
 {
-  return (__m128)__builtin_ia32_vfmsubss(__A, __B, __C);
+  return (__m128)__builtin_ia32_vfmsubss((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
 }
 
 static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_msub_sd(__m128d __A, __m128d __B, __m128d __C)
 {
-  return (__m128d)__builtin_ia32_vfmsubsd(__A, __B, __C);
+  return (__m128d)__builtin_ia32_vfmsubsd((__v2df)__A, (__v2df)__B, (__v2df)__C);
 }
 
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_nmacc_ps(__m128 __A, __m128 __B, __m128 __C)
 {
-  return (__m128)__builtin_ia32_vfnmaddps(__A, __B, __C);
+  return (__m128)__builtin_ia32_vfnmaddps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
 }
 
 static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_nmacc_pd(__m128d __A, __m128d __B, __m128d __C)
 {
-  return (__m128d)__builtin_ia32_vfnmaddpd(__A, __B, __C);
+  return (__m128d)__builtin_ia32_vfnmaddpd((__v2df)__A, (__v2df)__B, (__v2df)__C);
 }
 
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_nmacc_ss(__m128 __A, __m128 __B, __m128 __C)
 {
-  return (__m128)__builtin_ia32_vfnmaddss(__A, __B, __C);
+  return (__m128)__builtin_ia32_vfnmaddss((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
 }
 
 static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_nmacc_sd(__m128d __A, __m128d __B, __m128d __C)
 {
-  return (__m128d)__builtin_ia32_vfnmaddsd(__A, __B, __C);
+  return (__m128d)__builtin_ia32_vfnmaddsd((__v2df)__A, (__v2df)__B, (__v2df)__C);
 }
 
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_nmsub_ps(__m128 __A, __m128 __B, __m128 __C)
 {
-  return (__m128)__builtin_ia32_vfnmsubps(__A, __B, __C);
+  return (__m128)__builtin_ia32_vfnmsubps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
 }
 
 static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_nmsub_pd(__m128d __A, __m128d __B, __m128d __C)
 {
-  return (__m128d)__builtin_ia32_vfnmsubpd(__A, __B, __C);
+  return (__m128d)__builtin_ia32_vfnmsubpd((__v2df)__A, (__v2df)__B, (__v2df)__C);
 }
 
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_nmsub_ss(__m128 __A, __m128 __B, __m128 __C)
 {
-  return (__m128)__builtin_ia32_vfnmsubss(__A, __B, __C);
+  return (__m128)__builtin_ia32_vfnmsubss((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
 }
 
 static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_nmsub_sd(__m128d __A, __m128d __B, __m128d __C)
 {
-  return (__m128d)__builtin_ia32_vfnmsubsd(__A, __B, __C);
+  return (__m128d)__builtin_ia32_vfnmsubsd((__v2df)__A, (__v2df)__B, (__v2df)__C);
 }
 
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_maddsub_ps(__m128 __A, __m128 __B, __m128 __C)
 {
-  return (__m128)__builtin_ia32_vfmaddsubps(__A, __B, __C);
+  return (__m128)__builtin_ia32_vfmaddsubps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
 }
 
 static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_maddsub_pd(__m128d __A, __m128d __B, __m128d __C)
 {
-  return (__m128d)__builtin_ia32_vfmaddsubpd(__A, __B, __C);
+  return (__m128d)__builtin_ia32_vfmaddsubpd((__v2df)__A, (__v2df)__B, (__v2df)__C);
 }
 
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_msubadd_ps(__m128 __A, __m128 __B, __m128 __C)
 {
-  return (__m128)__builtin_ia32_vfmsubaddps(__A, __B, __C);
+  return (__m128)__builtin_ia32_vfmsubaddps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
 }
 
 static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_msubadd_pd(__m128d __A, __m128d __B, __m128d __C)
 {
-  return (__m128d)__builtin_ia32_vfmsubaddpd(__A, __B, __C);
+  return (__m128d)__builtin_ia32_vfmsubaddpd((__v2df)__A, (__v2df)__B, (__v2df)__C);
 }
 
 static __inline__ __m256 __DEFAULT_FN_ATTRS
 _mm256_macc_ps(__m256 __A, __m256 __B, __m256 __C)
 {
-  return (__m256)__builtin_ia32_vfmaddps256(__A, __B, __C);
+  return (__m256)__builtin_ia32_vfmaddps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
 }
 
 static __inline__ __m256d __DEFAULT_FN_ATTRS
 _mm256_macc_pd(__m256d __A, __m256d __B, __m256d __C)
 {
-  return (__m256d)__builtin_ia32_vfmaddpd256(__A, __B, __C);
+  return (__m256d)__builtin_ia32_vfmaddpd256((__v4df)__A, (__v4df)__B, (__v4df)__C);
 }
 
 static __inline__ __m256 __DEFAULT_FN_ATTRS
 _mm256_msub_ps(__m256 __A, __m256 __B, __m256 __C)
 {
-  return (__m256)__builtin_ia32_vfmsubps256(__A, __B, __C);
+  return (__m256)__builtin_ia32_vfmsubps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
 }
 
 static __inline__ __m256d __DEFAULT_FN_ATTRS
 _mm256_msub_pd(__m256d __A, __m256d __B, __m256d __C)
 {
-  return (__m256d)__builtin_ia32_vfmsubpd256(__A, __B, __C);
+  return (__m256d)__builtin_ia32_vfmsubpd256((__v4df)__A, (__v4df)__B, (__v4df)__C);
 }
 
 static __inline__ __m256 __DEFAULT_FN_ATTRS
 _mm256_nmacc_ps(__m256 __A, __m256 __B, __m256 __C)
 {
-  return (__m256)__builtin_ia32_vfnmaddps256(__A, __B, __C);
+  return (__m256)__builtin_ia32_vfnmaddps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
 }
 
 static __inline__ __m256d __DEFAULT_FN_ATTRS
 _mm256_nmacc_pd(__m256d __A, __m256d __B, __m256d __C)
 {
-  return (__m256d)__builtin_ia32_vfnmaddpd256(__A, __B, __C);
+  return (__m256d)__builtin_ia32_vfnmaddpd256((__v4df)__A, (__v4df)__B, (__v4df)__C);
 }
 
 static __inline__ __m256 __DEFAULT_FN_ATTRS
 _mm256_nmsub_ps(__m256 __A, __m256 __B, __m256 __C)
 {
-  return (__m256)__builtin_ia32_vfnmsubps256(__A, __B, __C);
+  return (__m256)__builtin_ia32_vfnmsubps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
 }
 
 static __inline__ __m256d __DEFAULT_FN_ATTRS
 _mm256_nmsub_pd(__m256d __A, __m256d __B, __m256d __C)
 {
-  return (__m256d)__builtin_ia32_vfnmsubpd256(__A, __B, __C);
+  return (__m256d)__builtin_ia32_vfnmsubpd256((__v4df)__A, (__v4df)__B, (__v4df)__C);
 }
 
 static __inline__ __m256 __DEFAULT_FN_ATTRS
 _mm256_maddsub_ps(__m256 __A, __m256 __B, __m256 __C)
 {
-  return (__m256)__builtin_ia32_vfmaddsubps256(__A, __B, __C);
+  return (__m256)__builtin_ia32_vfmaddsubps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
 }
 
 static __inline__ __m256d __DEFAULT_FN_ATTRS
 _mm256_maddsub_pd(__m256d __A, __m256d __B, __m256d __C)
 {
-  return (__m256d)__builtin_ia32_vfmaddsubpd256(__A, __B, __C);
+  return (__m256d)__builtin_ia32_vfmaddsubpd256((__v4df)__A, (__v4df)__B, (__v4df)__C);
 }
 
 static __inline__ __m256 __DEFAULT_FN_ATTRS
 _mm256_msubadd_ps(__m256 __A, __m256 __B, __m256 __C)
 {
-  return (__m256)__builtin_ia32_vfmsubaddps256(__A, __B, __C);
+  return (__m256)__builtin_ia32_vfmsubaddps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
 }
 
 static __inline__ __m256d __DEFAULT_FN_ATTRS
 _mm256_msubadd_pd(__m256d __A, __m256d __B, __m256d __C)
 {
-  return (__m256d)__builtin_ia32_vfmsubaddpd256(__A, __B, __C);
+  return (__m256d)__builtin_ia32_vfmsubaddpd256((__v4df)__A, (__v4df)__B, (__v4df)__C);
 }
 
 #undef __DEFAULT_FN_ATTRS
diff --git a/contrib/llvm/tools/clang/lib/Headers/fmaintrin.h b/contrib/llvm/tools/clang/lib/Headers/fmaintrin.h
index 114a14380ea0..0e2ef0b1716b 100644
--- a/contrib/llvm/tools/clang/lib/Headers/fmaintrin.h
+++ b/contrib/llvm/tools/clang/lib/Headers/fmaintrin.h
@@ -34,193 +34,193 @@
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_fmadd_ps(__m128 __A, __m128 __B, __m128 __C)
 {
-  return (__m128)__builtin_ia32_vfmaddps(__A, __B, __C);
+  return (__m128)__builtin_ia32_vfmaddps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
 }
 
 static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_fmadd_pd(__m128d __A, __m128d __B, __m128d __C)
 {
-  return (__m128d)__builtin_ia32_vfmaddpd(__A, __B, __C);
+  return (__m128d)__builtin_ia32_vfmaddpd((__v2df)__A, (__v2df)__B, (__v2df)__C);
 }
 
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_fmadd_ss(__m128 __A, __m128 __B, __m128 __C)
 {
-  return (__m128)__builtin_ia32_vfmaddss(__A, __B, __C);
+  return (__m128)__builtin_ia32_vfmaddss((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
 }
 
 static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_fmadd_sd(__m128d __A, __m128d __B, __m128d __C)
 {
-  return (__m128d)__builtin_ia32_vfmaddsd(__A, __B, __C);
+  return (__m128d)__builtin_ia32_vfmaddsd((__v2df)__A, (__v2df)__B, (__v2df)__C);
 }
 
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_fmsub_ps(__m128 __A, __m128 __B, __m128 __C)
 {
-  return (__m128)__builtin_ia32_vfmsubps(__A, __B, __C);
+  return (__m128)__builtin_ia32_vfmsubps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
 }
 
 static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_fmsub_pd(__m128d __A, __m128d __B, __m128d __C)
 {
-  return (__m128d)__builtin_ia32_vfmsubpd(__A, __B, __C);
+  return (__m128d)__builtin_ia32_vfmsubpd((__v2df)__A, (__v2df)__B, (__v2df)__C);
 }
 
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_fmsub_ss(__m128 __A, __m128 __B, __m128 __C)
 {
-  return (__m128)__builtin_ia32_vfmsubss(__A, __B, __C);
+  return (__m128)__builtin_ia32_vfmsubss((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
 }
 
 static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_fmsub_sd(__m128d __A, __m128d __B, __m128d __C)
 {
-  return (__m128d)__builtin_ia32_vfmsubsd(__A, __B, __C);
+  return (__m128d)__builtin_ia32_vfmsubsd((__v2df)__A, (__v2df)__B, (__v2df)__C);
 }
 
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_fnmadd_ps(__m128 __A, __m128 __B, __m128 __C)
 {
-  return (__m128)__builtin_ia32_vfnmaddps(__A, __B, __C);
+  return (__m128)__builtin_ia32_vfnmaddps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
 }
 
 static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_fnmadd_pd(__m128d __A, __m128d __B, __m128d __C)
 {
-  return (__m128d)__builtin_ia32_vfnmaddpd(__A, __B, __C);
+  return (__m128d)__builtin_ia32_vfnmaddpd((__v2df)__A, (__v2df)__B, (__v2df)__C);
 }
 
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_fnmadd_ss(__m128 __A, __m128 __B, __m128 __C)
 {
-  return (__m128)__builtin_ia32_vfnmaddss(__A, __B, __C);
+  return (__m128)__builtin_ia32_vfnmaddss((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
 }
 
 static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_fnmadd_sd(__m128d __A, __m128d __B, __m128d __C)
 {
-  return (__m128d)__builtin_ia32_vfnmaddsd(__A, __B, __C);
+  return (__m128d)__builtin_ia32_vfnmaddsd((__v2df)__A, (__v2df)__B, (__v2df)__C);
 }
 
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_fnmsub_ps(__m128 __A, __m128 __B, __m128 __C)
 {
-  return (__m128)__builtin_ia32_vfnmsubps(__A, __B, __C);
+  return (__m128)__builtin_ia32_vfnmsubps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
 }
 
 static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_fnmsub_pd(__m128d __A, __m128d __B, __m128d __C)
 {
-  return (__m128d)__builtin_ia32_vfnmsubpd(__A, __B, __C);
+  return (__m128d)__builtin_ia32_vfnmsubpd((__v2df)__A, (__v2df)__B, (__v2df)__C);
 }
 
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_fnmsub_ss(__m128 __A, __m128 __B, __m128 __C)
 {
-  return (__m128)__builtin_ia32_vfnmsubss(__A, __B, __C);
+  return (__m128)__builtin_ia32_vfnmsubss((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
 }
 
 static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_fnmsub_sd(__m128d __A, __m128d __B, __m128d __C)
 {
-  return (__m128d)__builtin_ia32_vfnmsubsd(__A, __B, __C);
+  return (__m128d)__builtin_ia32_vfnmsubsd((__v2df)__A, (__v2df)__B, (__v2df)__C);
 }
 
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_fmaddsub_ps(__m128 __A, __m128 __B, __m128 __C)
 {
-  return (__m128)__builtin_ia32_vfmaddsubps(__A, __B, __C);
+  return (__m128)__builtin_ia32_vfmaddsubps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
 }
 
 static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_fmaddsub_pd(__m128d __A, __m128d __B, __m128d __C)
 {
-  return (__m128d)__builtin_ia32_vfmaddsubpd(__A, __B, __C);
+  return (__m128d)__builtin_ia32_vfmaddsubpd((__v2df)__A, (__v2df)__B, (__v2df)__C);
 }
 
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_fmsubadd_ps(__m128 __A, __m128 __B, __m128 __C)
 {
-  return (__m128)__builtin_ia32_vfmsubaddps(__A, __B, __C);
+  return (__m128)__builtin_ia32_vfmsubaddps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
 }
 
 static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_fmsubadd_pd(__m128d __A, __m128d __B, __m128d __C)
 {
-  return (__m128d)__builtin_ia32_vfmsubaddpd(__A, __B, __C);
+  return (__m128d)__builtin_ia32_vfmsubaddpd((__v2df)__A, (__v2df)__B, (__v2df)__C);
 }
 
 static __inline__ __m256 __DEFAULT_FN_ATTRS
 _mm256_fmadd_ps(__m256 __A, __m256 __B, __m256 __C)
 {
-  return (__m256)__builtin_ia32_vfmaddps256(__A, __B, __C);
+  return (__m256)__builtin_ia32_vfmaddps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
 }
 
 static __inline__ __m256d __DEFAULT_FN_ATTRS
 _mm256_fmadd_pd(__m256d __A, __m256d __B, __m256d __C)
 {
-  return (__m256d)__builtin_ia32_vfmaddpd256(__A, __B, __C);
+  return (__m256d)__builtin_ia32_vfmaddpd256((__v4df)__A, (__v4df)__B, (__v4df)__C);
 }
 
 static __inline__ __m256 __DEFAULT_FN_ATTRS
 _mm256_fmsub_ps(__m256 __A, __m256 __B, __m256 __C)
 {
-  return (__m256)__builtin_ia32_vfmsubps256(__A, __B, __C);
+  return (__m256)__builtin_ia32_vfmsubps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
 }
 
 static __inline__ __m256d __DEFAULT_FN_ATTRS
 _mm256_fmsub_pd(__m256d __A, __m256d __B, __m256d __C)
 {
-  return (__m256d)__builtin_ia32_vfmsubpd256(__A, __B, __C);
+  return (__m256d)__builtin_ia32_vfmsubpd256((__v4df)__A, (__v4df)__B, (__v4df)__C);
 }
 
 static __inline__ __m256 __DEFAULT_FN_ATTRS
 _mm256_fnmadd_ps(__m256 __A, __m256 __B, __m256 __C)
 {
-  return (__m256)__builtin_ia32_vfnmaddps256(__A, __B, __C);
+  return (__m256)__builtin_ia32_vfnmaddps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
 }
 
 static __inline__ __m256d __DEFAULT_FN_ATTRS
 _mm256_fnmadd_pd(__m256d __A, __m256d __B, __m256d __C)
 {
-  return (__m256d)__builtin_ia32_vfnmaddpd256(__A, __B, __C);
+  return (__m256d)__builtin_ia32_vfnmaddpd256((__v4df)__A, (__v4df)__B, (__v4df)__C);
 }
 
 static __inline__ __m256 __DEFAULT_FN_ATTRS
 _mm256_fnmsub_ps(__m256 __A, __m256 __B, __m256 __C)
 {
-  return (__m256)__builtin_ia32_vfnmsubps256(__A, __B, __C);
+  return (__m256)__builtin_ia32_vfnmsubps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
 }
 
 static __inline__ __m256d __DEFAULT_FN_ATTRS
 _mm256_fnmsub_pd(__m256d __A, __m256d __B, __m256d __C)
 {
-  return (__m256d)__builtin_ia32_vfnmsubpd256(__A, __B, __C);
+  return (__m256d)__builtin_ia32_vfnmsubpd256((__v4df)__A, (__v4df)__B, (__v4df)__C);
 }
 
 static __inline__ __m256 __DEFAULT_FN_ATTRS
 _mm256_fmaddsub_ps(__m256 __A, __m256 __B, __m256 __C)
 {
-  return (__m256)__builtin_ia32_vfmaddsubps256(__A, __B, __C);
+  return (__m256)__builtin_ia32_vfmaddsubps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
 }
 
 static __inline__ __m256d __DEFAULT_FN_ATTRS
 _mm256_fmaddsub_pd(__m256d __A, __m256d __B, __m256d __C)
 {
-  return (__m256d)__builtin_ia32_vfmaddsubpd256(__A, __B, __C);
+  return (__m256d)__builtin_ia32_vfmaddsubpd256((__v4df)__A, (__v4df)__B, (__v4df)__C);
 }
 
 static __inline__ __m256 __DEFAULT_FN_ATTRS
 _mm256_fmsubadd_ps(__m256 __A, __m256 __B, __m256 __C)
 {
-  return (__m256)__builtin_ia32_vfmsubaddps256(__A, __B, __C);
+  return (__m256)__builtin_ia32_vfmsubaddps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
 }
 
 static __inline__ __m256d __DEFAULT_FN_ATTRS
 _mm256_fmsubadd_pd(__m256d __A, __m256d __B, __m256d __C)
 {
-  return (__m256d)__builtin_ia32_vfmsubaddpd256(__A, __B, __C);
+  return (__m256d)__builtin_ia32_vfmsubaddpd256((__v4df)__A, (__v4df)__B, (__v4df)__C);
 }
 
 #undef __DEFAULT_FN_ATTRS
diff --git a/contrib/llvm/tools/clang/lib/Headers/htmintrin.h b/contrib/llvm/tools/clang/lib/Headers/htmintrin.h
index 0088c7ccab93..69c8d7bb57f0 100644
--- a/contrib/llvm/tools/clang/lib/Headers/htmintrin.h
+++ b/contrib/llvm/tools/clang/lib/Headers/htmintrin.h
@@ -164,24 +164,24 @@ struct __htm_tdb {
 /* Helper intrinsics to retry tbegin in case of transient failure.  */
 
 static __inline int __attribute__((__always_inline__, __nodebug__))
-__builtin_tbegin_retry_null (int retry)
+__builtin_tbegin_retry_null (int __retry)
 {
   int cc, i = 0;
 
   while ((cc = __builtin_tbegin(0)) == _HTM_TBEGIN_TRANSIENT
-         && i++ < retry)
+         && i++ < __retry)
     __builtin_tx_assist(i);
 
   return cc;
 }
 
 static __inline int __attribute__((__always_inline__, __nodebug__))
-__builtin_tbegin_retry_tdb (void *tdb, int retry)
+__builtin_tbegin_retry_tdb (void *__tdb, int __retry)
 {
   int cc, i = 0;
 
-  while ((cc = __builtin_tbegin(tdb)) == _HTM_TBEGIN_TRANSIENT
-         && i++ < retry)
+  while ((cc = __builtin_tbegin(__tdb)) == _HTM_TBEGIN_TRANSIENT
+         && i++ < __retry)
     __builtin_tx_assist(i);
 
   return cc;
@@ -193,24 +193,24 @@ __builtin_tbegin_retry_tdb (void *tdb, int retry)
    __builtin_tbegin_retry_tdb(tdb, retry))
 
 static __inline int __attribute__((__always_inline__, __nodebug__))
-__builtin_tbegin_retry_nofloat_null (int retry)
+__builtin_tbegin_retry_nofloat_null (int __retry)
 {
   int cc, i = 0;
 
   while ((cc = __builtin_tbegin_nofloat(0)) == _HTM_TBEGIN_TRANSIENT
-         && i++ < retry)
+         && i++ < __retry)
     __builtin_tx_assist(i);
 
   return cc;
 }
 
 static __inline int __attribute__((__always_inline__, __nodebug__))
-__builtin_tbegin_retry_nofloat_tdb (void *tdb, int retry)
+__builtin_tbegin_retry_nofloat_tdb (void *__tdb, int __retry)
 {
   int cc, i = 0;
 
-  while ((cc = __builtin_tbegin_nofloat(tdb)) == _HTM_TBEGIN_TRANSIENT
-         && i++ < retry)
+  while ((cc = __builtin_tbegin_nofloat(__tdb)) == _HTM_TBEGIN_TRANSIENT
+         && i++ < __retry)
     __builtin_tx_assist(i);
 
   return cc;
diff --git a/contrib/llvm/tools/clang/lib/Headers/htmxlintrin.h b/contrib/llvm/tools/clang/lib/Headers/htmxlintrin.h
index c7571ecd0661..16dc7056c6b0 100644
--- a/contrib/llvm/tools/clang/lib/Headers/htmxlintrin.h
+++ b/contrib/llvm/tools/clang/lib/Headers/htmxlintrin.h
@@ -62,18 +62,18 @@ __TM_simple_begin (void)
 
 extern __inline long
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-__TM_begin (void* const TM_buff)
+__TM_begin (void* const __TM_buff)
 {
-  *_TEXASRL_PTR (TM_buff) = 0;
+  *_TEXASRL_PTR (__TM_buff) = 0;
   if (__builtin_expect (__builtin_tbegin (0), 1))
     return _HTM_TBEGIN_STARTED;
 #ifdef __powerpc64__
-  *_TEXASR_PTR (TM_buff) = __builtin_get_texasr ();
+  *_TEXASR_PTR (__TM_buff) = __builtin_get_texasr ();
 #else
-  *_TEXASRU_PTR (TM_buff) = __builtin_get_texasru ();
-  *_TEXASRL_PTR (TM_buff) = __builtin_get_texasr ();
+  *_TEXASRU_PTR (__TM_buff) = __builtin_get_texasru ();
+  *_TEXASRL_PTR (__TM_buff) = __builtin_get_texasr ();
 #endif
-  *_TFIAR_PTR (TM_buff) = __builtin_get_tfiar ();
+  *_TFIAR_PTR (__TM_buff) = __builtin_get_tfiar ();
   return 0;
 }
 
@@ -95,9 +95,9 @@ __TM_abort (void)
 
 extern __inline void
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-__TM_named_abort (unsigned char const code)
+__TM_named_abort (unsigned char const __code)
 {
-  __builtin_tabort (code);
+  __builtin_tabort (__code);
 }
 
 extern __inline void
@@ -116,47 +116,47 @@ __TM_suspend (void)
 
 extern __inline long
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-__TM_is_user_abort (void* const TM_buff)
+__TM_is_user_abort (void* const __TM_buff)
 {
-  texasru_t texasru = *_TEXASRU_PTR (TM_buff);
+  texasru_t texasru = *_TEXASRU_PTR (__TM_buff);
   return _TEXASRU_ABORT (texasru);
 }
 
 extern __inline long
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-__TM_is_named_user_abort (void* const TM_buff, unsigned char *code)
+__TM_is_named_user_abort (void* const __TM_buff, unsigned char *__code)
 {
-  texasru_t texasru = *_TEXASRU_PTR (TM_buff);
+  texasru_t texasru = *_TEXASRU_PTR (__TM_buff);
 
-  *code = _TEXASRU_FAILURE_CODE (texasru);
+  *__code = _TEXASRU_FAILURE_CODE (texasru);
   return _TEXASRU_ABORT (texasru);
 }
 
 extern __inline long
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-__TM_is_illegal (void* const TM_buff)
+__TM_is_illegal (void* const __TM_buff)
 {
-  texasru_t texasru = *_TEXASRU_PTR (TM_buff);
+  texasru_t texasru = *_TEXASRU_PTR (__TM_buff);
   return _TEXASRU_DISALLOWED (texasru);
 }
 
 extern __inline long
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-__TM_is_footprint_exceeded (void* const TM_buff)
+__TM_is_footprint_exceeded (void* const __TM_buff)
 {
-  texasru_t texasru = *_TEXASRU_PTR (TM_buff);
+  texasru_t texasru = *_TEXASRU_PTR (__TM_buff);
   return _TEXASRU_FOOTPRINT_OVERFLOW (texasru);
 }
 
 extern __inline long
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-__TM_nesting_depth (void* const TM_buff)
+__TM_nesting_depth (void* const __TM_buff)
 {
   texasrl_t texasrl;
 
   if (_HTM_STATE (__builtin_ttest ()) == _HTM_NONTRANSACTIONAL)
     {
-      texasrl = *_TEXASRL_PTR (TM_buff);
+      texasrl = *_TEXASRL_PTR (__TM_buff);
       if (!_TEXASR_FAILURE_SUMMARY (texasrl))
         texasrl = 0;
     }
@@ -168,15 +168,15 @@ __TM_nesting_depth (void* const TM_buff)
 
 extern __inline long
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-__TM_is_nested_too_deep(void* const TM_buff)
+__TM_is_nested_too_deep(void* const __TM_buff)
 {
-  texasru_t texasru = *_TEXASRU_PTR (TM_buff);
+  texasru_t texasru = *_TEXASRU_PTR (__TM_buff);
   return _TEXASRU_NESTING_OVERFLOW (texasru);
 }
 
 extern __inline long
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-__TM_is_conflict(void* const TM_buff)
+__TM_is_conflict(void* const __TM_buff)
 {
   texasru_t texasru = *_TEXASRU_PTR (TM_buff);
   /* Return TEXASR bits 11 (Self-Induced Conflict) through
@@ -186,24 +186,24 @@ __TM_is_conflict(void* const TM_buff)
 
 extern __inline long
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-__TM_is_failure_persistent(void* const TM_buff)
+__TM_is_failure_persistent(void* const __TM_buff)
 {
-  texasru_t texasru = *_TEXASRU_PTR (TM_buff);
+  texasru_t texasru = *_TEXASRU_PTR (__TM_buff);
   return _TEXASRU_FAILURE_PERSISTENT (texasru);
 }
 
 extern __inline long
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-__TM_failure_address(void* const TM_buff)
+__TM_failure_address(void* const __TM_buff)
 {
-  return *_TFIAR_PTR (TM_buff);
+  return *_TFIAR_PTR (__TM_buff);
 }
 
 extern __inline long long
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-__TM_failure_code(void* const TM_buff)
+__TM_failure_code(void* const __TM_buff)
 {
-  return *_TEXASR_PTR (TM_buff);
+  return *_TEXASR_PTR (__TM_buff);
 }
 
 #ifdef __cplusplus
@@ -227,9 +227,9 @@ __TM_simple_begin ()
 }
 
 static __inline long __attribute__((__always_inline__, __nodebug__))
-__TM_begin (void* const tdb)
+__TM_begin (void* const __tdb)
 {
-  return __builtin_tbegin_nofloat (tdb);
+  return __builtin_tbegin_nofloat (__tdb);
 }
 
 static __inline long __attribute__((__always_inline__, __nodebug__))
@@ -245,22 +245,22 @@ __TM_abort ()
 }
 
 static __inline void __attribute__((__always_inline__, __nodebug__))
-__TM_named_abort (unsigned char const code)
+__TM_named_abort (unsigned char const __code)
 {
-  return __builtin_tabort ((int)_HTM_FIRST_USER_ABORT_CODE + code);
+  return __builtin_tabort ((int)_HTM_FIRST_USER_ABORT_CODE + __code);
 }
 
 static __inline void __attribute__((__always_inline__, __nodebug__))
-__TM_non_transactional_store (void* const addr, long long const value)
+__TM_non_transactional_store (void* const __addr, long long const __value)
 {
-  __builtin_non_tx_store ((uint64_t*)addr, (uint64_t)value);
+  __builtin_non_tx_store ((uint64_t*)__addr, (uint64_t)__value);
 }
 
 static __inline long __attribute__((__always_inline__, __nodebug__))
-__TM_nesting_depth (void* const tdb_ptr)
+__TM_nesting_depth (void* const __tdb_ptr)
 {
   int depth = __builtin_tx_nesting_depth ();
-  struct __htm_tdb *tdb = (struct __htm_tdb*)tdb_ptr;
+  struct __htm_tdb *tdb = (struct __htm_tdb*)__tdb_ptr;
 
   if (depth != 0)
     return depth;
@@ -273,9 +273,9 @@ __TM_nesting_depth (void* const tdb_ptr)
 /* Transaction failure diagnostics */
 
 static __inline long __attribute__((__always_inline__, __nodebug__))
-__TM_is_user_abort (void* const tdb_ptr)
+__TM_is_user_abort (void* const __tdb_ptr)
 {
-  struct __htm_tdb *tdb = (struct __htm_tdb*)tdb_ptr;
+  struct __htm_tdb *tdb = (struct __htm_tdb*)__tdb_ptr;
 
   if (tdb->format != 1)
     return 0;
@@ -284,25 +284,25 @@ __TM_is_user_abort (void* const tdb_ptr)
 }
 
 static __inline long __attribute__((__always_inline__, __nodebug__))
-__TM_is_named_user_abort (void* const tdb_ptr, unsigned char* code)
+__TM_is_named_user_abort (void* const __tdb_ptr, unsigned char* __code)
 {
-  struct __htm_tdb *tdb = (struct __htm_tdb*)tdb_ptr;
+  struct __htm_tdb *tdb = (struct __htm_tdb*)__tdb_ptr;
 
   if (tdb->format != 1)
     return 0;
 
   if (tdb->abort_code >= _HTM_FIRST_USER_ABORT_CODE)
     {
-      *code = tdb->abort_code - _HTM_FIRST_USER_ABORT_CODE;
+      *__code = tdb->abort_code - _HTM_FIRST_USER_ABORT_CODE;
       return 1;
     }
   return 0;
 }
 
 static __inline long __attribute__((__always_inline__, __nodebug__))
-__TM_is_illegal (void* const tdb_ptr)
+__TM_is_illegal (void* const __tdb_ptr)
 {
-  struct __htm_tdb *tdb = (struct __htm_tdb*)tdb_ptr;
+  struct __htm_tdb *tdb = (struct __htm_tdb*)__tdb_ptr;
 
   return (tdb->format == 1
 	  && (tdb->abort_code == 4 /* unfiltered program interruption */
@@ -310,9 +310,9 @@ __TM_is_illegal (void* const tdb_ptr)
 }
 
 static __inline long __attribute__((__always_inline__, __nodebug__))
-__TM_is_footprint_exceeded (void* const tdb_ptr)
+__TM_is_footprint_exceeded (void* const __tdb_ptr)
 {
-  struct __htm_tdb *tdb = (struct __htm_tdb*)tdb_ptr;
+  struct __htm_tdb *tdb = (struct __htm_tdb*)__tdb_ptr;
 
   return (tdb->format == 1
 	  && (tdb->abort_code == 7 /* fetch overflow */
@@ -320,17 +320,17 @@ __TM_is_footprint_exceeded (void* const tdb_ptr)
 }
 
 static __inline long __attribute__((__always_inline__, __nodebug__))
-__TM_is_nested_too_deep (void* const tdb_ptr)
+__TM_is_nested_too_deep (void* const __tdb_ptr)
 {
-  struct __htm_tdb *tdb = (struct __htm_tdb*)tdb_ptr;
+  struct __htm_tdb *tdb = (struct __htm_tdb*)__tdb_ptr;
 
   return tdb->format == 1 && tdb->abort_code == 13; /* depth exceeded */
 }
 
 static __inline long __attribute__((__always_inline__, __nodebug__))
-__TM_is_conflict (void* const tdb_ptr)
+__TM_is_conflict (void* const __tdb_ptr)
 {
-  struct __htm_tdb *tdb = (struct __htm_tdb*)tdb_ptr;
+  struct __htm_tdb *tdb = (struct __htm_tdb*)__tdb_ptr;
 
   return (tdb->format == 1
 	  && (tdb->abort_code == 9 /* fetch conflict */
@@ -338,22 +338,22 @@ __TM_is_conflict (void* const tdb_ptr)
 }
 
 static __inline long __attribute__((__always_inline__, __nodebug__))
-__TM_is_failure_persistent (long const result)
+__TM_is_failure_persistent (long const __result)
 {
-  return result == _HTM_TBEGIN_PERSISTENT;
+  return __result == _HTM_TBEGIN_PERSISTENT;
 }
 
 static __inline long __attribute__((__always_inline__, __nodebug__))
-__TM_failure_address (void* const tdb_ptr)
+__TM_failure_address (void* const __tdb_ptr)
 {
-  struct __htm_tdb *tdb = (struct __htm_tdb*)tdb_ptr;
+  struct __htm_tdb *tdb = (struct __htm_tdb*)__tdb_ptr;
   return tdb->atia;
 }
 
 static __inline long __attribute__((__always_inline__, __nodebug__))
-__TM_failure_code (void* const tdb_ptr)
+__TM_failure_code (void* const __tdb_ptr)
 {
-  struct __htm_tdb *tdb = (struct __htm_tdb*)tdb_ptr;
+  struct __htm_tdb *tdb = (struct __htm_tdb*)__tdb_ptr;
 
   return tdb->abort_code;
 }
diff --git a/contrib/llvm/tools/clang/lib/Headers/ia32intrin.h b/contrib/llvm/tools/clang/lib/Headers/ia32intrin.h
index b2f82bb59e38..397f3fd13e01 100644
--- a/contrib/llvm/tools/clang/lib/Headers/ia32intrin.h
+++ b/contrib/llvm/tools/clang/lib/Headers/ia32intrin.h
@@ -74,4 +74,6 @@ __rdtscp(unsigned int *__A) {
 
 #define _rdtsc() __rdtsc()
 
+#define _rdpmc(A) __rdpmc(A)
+
 #endif /* __IA32INTRIN_H */
diff --git a/contrib/llvm/tools/clang/lib/Headers/immintrin.h b/contrib/llvm/tools/clang/lib/Headers/immintrin.h
index 637646122653..4b2752353d6f 100644
--- a/contrib/llvm/tools/clang/lib/Headers/immintrin.h
+++ b/contrib/llvm/tools/clang/lib/Headers/immintrin.h
@@ -24,22 +24,45 @@
 #ifndef __IMMINTRIN_H
 #define __IMMINTRIN_H
 
+#if !defined(_MSC_VER) || __has_feature(modules) || defined(__MMX__)
 #include <mmintrin.h>
+#endif
 
+#if !defined(_MSC_VER) || __has_feature(modules) || defined(__SSE__)
 #include <xmmintrin.h>
+#endif
 
+#if !defined(_MSC_VER) || __has_feature(modules) || defined(__SSE2__)
 #include <emmintrin.h>
+#endif
 
+#if !defined(_MSC_VER) || __has_feature(modules) || defined(__SSE3__)
 #include <pmmintrin.h>
+#endif
 
+#if !defined(_MSC_VER) || __has_feature(modules) || defined(__SSSE3__)
 #include <tmmintrin.h>
+#endif
 
+#if !defined(_MSC_VER) || __has_feature(modules) || \
+    (defined(__SSE4_2__) || defined(__SSE4_1__))
 #include <smmintrin.h>
+#endif
 
+#if !defined(_MSC_VER) || __has_feature(modules) || \
+    (defined(__AES__) || defined(__PCLMUL__))
 #include <wmmintrin.h>
+#endif
+
+#if !defined(_MSC_VER) || __has_feature(modules) || defined(__CLFLUSHOPT__)
+#include <clflushoptintrin.h>
+#endif
 
+#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX__)
 #include <avxintrin.h>
+#endif
 
+#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX2__)
 #include <avx2intrin.h>
 
 /* The 256-bit versions of functions in f16cintrin.h.
@@ -54,33 +77,90 @@ _mm256_cvtph_ps(__m128i __a)
 {
   return (__m256)__builtin_ia32_vcvtph2ps256((__v8hi)__a);
 }
+#endif /* __AVX2__ */
 
+#if !defined(_MSC_VER) || __has_feature(modules) || defined(__BMI__)
 #include <bmiintrin.h>
+#endif
 
+#if !defined(_MSC_VER) || __has_feature(modules) || defined(__BMI2__)
 #include <bmi2intrin.h>
+#endif
 
+#if !defined(_MSC_VER) || __has_feature(modules) || defined(__LZCNT__)
 #include <lzcntintrin.h>
+#endif
 
+#if !defined(_MSC_VER) || __has_feature(modules) || defined(__FMA__)
 #include <fmaintrin.h>
+#endif
 
+#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512F__)
 #include <avx512fintrin.h>
+#endif
 
+#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512VL__)
 #include <avx512vlintrin.h>
+#endif
 
+#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512BW__)
 #include <avx512bwintrin.h>
+#endif
 
+#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512CD__)
 #include <avx512cdintrin.h>
+#endif
 
+#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512DQ__)
 #include <avx512dqintrin.h>
+#endif
 
+#if !defined(_MSC_VER) || __has_feature(modules) || \
+    (defined(__AVX512VL__) && defined(__AVX512BW__))
 #include <avx512vlbwintrin.h>
+#endif
+
+#if !defined(_MSC_VER) || __has_feature(modules) || \
+    (defined(__AVX512VL__) && defined(__AVX512CD__))
+#include <avx512vlcdintrin.h>
+#endif
 
+#if !defined(_MSC_VER) || __has_feature(modules) || \
+    (defined(__AVX512VL__) && defined(__AVX512DQ__))
 #include <avx512vldqintrin.h>
+#endif
 
+#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512ER__)
 #include <avx512erintrin.h>
+#endif
 
+#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512IFMA__)
+#include <avx512ifmaintrin.h>
+#endif
+
+#if !defined(_MSC_VER) || __has_feature(modules) || \
+    (defined(__AVX512IFMA__) && defined(__AVX512VL__))
+#include <avx512ifmavlintrin.h>
+#endif
+
+#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512VBMI__)
+#include <avx512vbmiintrin.h>
+#endif
+
+#if !defined(_MSC_VER) || __has_feature(modules) || \
+    (defined(__AVX512VBMI__) && defined(__AVX512VL__))
+#include <avx512vbmivlintrin.h>
+#endif
+
+#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512PF__)
+#include <avx512pfintrin.h>
+#endif
+
+#if !defined(_MSC_VER) || __has_feature(modules) || defined(__PKU__)
 #include <pkuintrin.h>
+#endif
 
+#if !defined(_MSC_VER) || __has_feature(modules) || defined(__RDRND__)
 static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd")))
 _rdrand16_step(unsigned short *__p)
 {
@@ -93,6 +173,18 @@ _rdrand32_step(unsigned int *__p)
   return __builtin_ia32_rdrand32_step(__p);
 }
 
+/* __bit_scan_forward */
+static __inline__ int __attribute__((__always_inline__, __nodebug__))
+_bit_scan_forward(int __A) {
+  return __builtin_ctz(__A);
+}
+
+/* __bit_scan_reverse */
+static __inline__ int __attribute__((__always_inline__, __nodebug__))
+_bit_scan_reverse(int __A) {
+  return 31 - __builtin_clz(__A);
+}
+
 #ifdef __x86_64__
 static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd")))
 _rdrand64_step(unsigned long long *__p)
@@ -100,7 +192,9 @@ _rdrand64_step(unsigned long long *__p)
   return __builtin_ia32_rdrand64_step(__p);
 }
 #endif
+#endif /* __RDRND__ */
 
+#if !defined(_MSC_VER) || __has_feature(modules) || defined(__FSGSBASE__)
 #ifdef __x86_64__
 static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
 _readfsbase_u32(void)
@@ -149,23 +243,38 @@ _writegsbase_u64(unsigned long long __V)
 {
   return __builtin_ia32_wrgsbase64(__V);
 }
+
 #endif
+#endif /* __FSGSBASE__ */
 
+#if !defined(_MSC_VER) || __has_feature(modules) || defined(__RTM__)
 #include <rtmintrin.h>
-
 #include <xtestintrin.h>
+#endif
 
+#if !defined(_MSC_VER) || __has_feature(modules) || defined(__SHA__)
 #include <shaintrin.h>
+#endif
 
+#if !defined(_MSC_VER) || __has_feature(modules) || defined(__FXSR__)
 #include <fxsrintrin.h>
+#endif
 
+#if !defined(_MSC_VER) || __has_feature(modules) || defined(__XSAVE__)
 #include <xsaveintrin.h>
+#endif
 
+#if !defined(_MSC_VER) || __has_feature(modules) || defined(__XSAVEOPT__)
 #include <xsaveoptintrin.h>
+#endif
 
+#if !defined(_MSC_VER) || __has_feature(modules) || defined(__XSAVEC__)
 #include <xsavecintrin.h>
+#endif
 
+#if !defined(_MSC_VER) || __has_feature(modules) || defined(__XSAVES__)
 #include <xsavesintrin.h>
+#endif
 
 /* Some intrinsics inside adxintrin.h are available only on processors with ADX,
  * whereas others are also available at all times. */
diff --git a/contrib/llvm/tools/clang/lib/Headers/Intrin.h b/contrib/llvm/tools/clang/lib/Headers/intrin.h
index 6c1d0d16eabf..f18711ad1ecf 100644
--- a/contrib/llvm/tools/clang/lib/Headers/Intrin.h
+++ b/contrib/llvm/tools/clang/lib/Headers/intrin.h
@@ -1,4 +1,4 @@
-/* ===-------- Intrin.h ---------------------------------------------------===
+/* ===-------- intrin.h ---------------------------------------------------===
  *
  * Permission is hereby granted, free of charge, to any person obtaining a copy
  * of this software and associated documentation files (the "Software"), to deal
@@ -23,7 +23,7 @@
 
 /* Only include this if we're compiling for the windows platform. */
 #ifndef _MSC_VER
-#include_next <Intrin.h>
+#include_next <intrin.h>
 #else
 
 #ifndef __INTRIN_H
@@ -250,7 +250,6 @@ unsigned long __cdecl _lrotl(unsigned long, int);
 static __inline__
 unsigned long __cdecl _lrotr(unsigned long, int);
 static __inline__
-static __inline__
 void _ReadBarrier(void);
 static __inline__
 void _ReadWriteBarrier(void);
@@ -667,20 +666,20 @@ _InterlockedDecrement64(__int64 volatile *_Value) {
 \*----------------------------------------------------------------------------*/
 static __inline__ char __DEFAULT_FN_ATTRS
 _InterlockedAnd8(char volatile *_Value, char _Mask) {
-  return __atomic_and_fetch(_Value, _Mask, __ATOMIC_SEQ_CST);
+  return __atomic_fetch_and(_Value, _Mask, __ATOMIC_SEQ_CST);
 }
 static __inline__ short __DEFAULT_FN_ATTRS
 _InterlockedAnd16(short volatile *_Value, short _Mask) {
-  return __atomic_and_fetch(_Value, _Mask, __ATOMIC_SEQ_CST);
+  return __atomic_fetch_and(_Value, _Mask, __ATOMIC_SEQ_CST);
 }
 static __inline__ long __DEFAULT_FN_ATTRS
 _InterlockedAnd(long volatile *_Value, long _Mask) {
-  return __atomic_and_fetch(_Value, _Mask, __ATOMIC_SEQ_CST);
+  return __atomic_fetch_and(_Value, _Mask, __ATOMIC_SEQ_CST);
 }
 #ifdef __x86_64__
 static __inline__ __int64 __DEFAULT_FN_ATTRS
 _InterlockedAnd64(__int64 volatile *_Value, __int64 _Mask) {
-  return __atomic_and_fetch(_Value, _Mask, __ATOMIC_SEQ_CST);
+  return __atomic_fetch_and(_Value, _Mask, __ATOMIC_SEQ_CST);
 }
 #endif
 /*----------------------------------------------------------------------------*\
@@ -688,20 +687,20 @@ _InterlockedAnd64(__int64 volatile *_Value, __int64 _Mask) {
 \*----------------------------------------------------------------------------*/
 static __inline__ char __DEFAULT_FN_ATTRS
 _InterlockedOr8(char volatile *_Value, char _Mask) {
-  return __atomic_or_fetch(_Value, _Mask, __ATOMIC_SEQ_CST);
+  return __atomic_fetch_or(_Value, _Mask, __ATOMIC_SEQ_CST);
 }
 static __inline__ short __DEFAULT_FN_ATTRS
 _InterlockedOr16(short volatile *_Value, short _Mask) {
-  return __atomic_or_fetch(_Value, _Mask, __ATOMIC_SEQ_CST);
+  return __atomic_fetch_or(_Value, _Mask, __ATOMIC_SEQ_CST);
 }
 static __inline__ long __DEFAULT_FN_ATTRS
 _InterlockedOr(long volatile *_Value, long _Mask) {
-  return __atomic_or_fetch(_Value, _Mask, __ATOMIC_SEQ_CST);
+  return __atomic_fetch_or(_Value, _Mask, __ATOMIC_SEQ_CST);
 }
 #ifdef __x86_64__
 static __inline__ __int64 __DEFAULT_FN_ATTRS
 _InterlockedOr64(__int64 volatile *_Value, __int64 _Mask) {
-  return __atomic_or_fetch(_Value, _Mask, __ATOMIC_SEQ_CST);
+  return __atomic_fetch_or(_Value, _Mask, __ATOMIC_SEQ_CST);
 }
 #endif
 /*----------------------------------------------------------------------------*\
@@ -709,20 +708,20 @@ _InterlockedOr64(__int64 volatile *_Value, __int64 _Mask) {
 \*----------------------------------------------------------------------------*/
 static __inline__ char __DEFAULT_FN_ATTRS
 _InterlockedXor8(char volatile *_Value, char _Mask) {
-  return __atomic_xor_fetch(_Value, _Mask, __ATOMIC_SEQ_CST);
+  return __atomic_fetch_xor(_Value, _Mask, __ATOMIC_SEQ_CST);
 }
 static __inline__ short __DEFAULT_FN_ATTRS
 _InterlockedXor16(short volatile *_Value, short _Mask) {
-  return __atomic_xor_fetch(_Value, _Mask, __ATOMIC_SEQ_CST);
+  return __atomic_fetch_xor(_Value, _Mask, __ATOMIC_SEQ_CST);
 }
 static __inline__ long __DEFAULT_FN_ATTRS
 _InterlockedXor(long volatile *_Value, long _Mask) {
-  return __atomic_xor_fetch(_Value, _Mask, __ATOMIC_SEQ_CST);
+  return __atomic_fetch_xor(_Value, _Mask, __ATOMIC_SEQ_CST);
 }
 #ifdef __x86_64__
 static __inline__ __int64 __DEFAULT_FN_ATTRS
 _InterlockedXor64(__int64 volatile *_Value, __int64 _Mask) {
-  return __atomic_xor_fetch(_Value, _Mask, __ATOMIC_SEQ_CST);
+  return __atomic_fetch_xor(_Value, _Mask, __ATOMIC_SEQ_CST);
 }
 #endif
 /*----------------------------------------------------------------------------*\
@@ -807,20 +806,24 @@ static __inline__ unsigned char __DEFAULT_FN_ATTRS
 __readfsbyte(unsigned long __offset) {
   return *__ptr_to_addr_space(257, unsigned char, __offset);
 }
-static __inline__ unsigned __int64 __DEFAULT_FN_ATTRS
-__readfsqword(unsigned long __offset) {
-  return *__ptr_to_addr_space(257, unsigned __int64, __offset);
-}
 static __inline__ unsigned short __DEFAULT_FN_ATTRS
 __readfsword(unsigned long __offset) {
   return *__ptr_to_addr_space(257, unsigned short, __offset);
 }
+static __inline__ unsigned __int64 __DEFAULT_FN_ATTRS
+__readfsqword(unsigned long __offset) {
+  return *__ptr_to_addr_space(257, unsigned __int64, __offset);
+}
 #endif
 #ifdef __x86_64__
 static __inline__ unsigned char __DEFAULT_FN_ATTRS
 __readgsbyte(unsigned long __offset) {
   return *__ptr_to_addr_space(256, unsigned char, __offset);
 }
+static __inline__ unsigned short __DEFAULT_FN_ATTRS
+__readgsword(unsigned long __offset) {
+  return *__ptr_to_addr_space(256, unsigned short, __offset);
+}
 static __inline__ unsigned long __DEFAULT_FN_ATTRS
 __readgsdword(unsigned long __offset) {
   return *__ptr_to_addr_space(256, unsigned long, __offset);
@@ -829,10 +832,6 @@ static __inline__ unsigned __int64 __DEFAULT_FN_ATTRS
 __readgsqword(unsigned long __offset) {
   return *__ptr_to_addr_space(256, unsigned __int64, __offset);
 }
-static __inline__ unsigned short __DEFAULT_FN_ATTRS
-__readgsword(unsigned long __offset) {
-  return *__ptr_to_addr_space(256, unsigned short, __offset);
-}
 #endif
 #undef __ptr_to_addr_space
 /*----------------------------------------------------------------------------*\
diff --git a/contrib/llvm/tools/clang/lib/Headers/inttypes.h b/contrib/llvm/tools/clang/lib/Headers/inttypes.h
index 3d59d141deb2..1d8eabab0f8e 100644
--- a/contrib/llvm/tools/clang/lib/Headers/inttypes.h
+++ b/contrib/llvm/tools/clang/lib/Headers/inttypes.h
@@ -23,6 +23,10 @@
 #ifndef __CLANG_INTTYPES_H
 #define __CLANG_INTTYPES_H
 
+#if defined(_MSC_VER) && _MSC_VER < 1800
+#error MSVC does not have inttypes.h prior to Visual Studio 2013
+#endif
+
 #include_next <inttypes.h>
 
 #if defined(_MSC_VER) && _MSC_VER < 1900
diff --git a/contrib/llvm/tools/clang/lib/Headers/mm3dnow.h b/contrib/llvm/tools/clang/lib/Headers/mm3dnow.h
index cb93faf2b6a4..294866c1dc0d 100644
--- a/contrib/llvm/tools/clang/lib/Headers/mm3dnow.h
+++ b/contrib/llvm/tools/clang/lib/Headers/mm3dnow.h
@@ -33,7 +33,7 @@ typedef float __v2sf __attribute__((__vector_size__(8)));
 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("3dnow")))
 
 static __inline__ void __DEFAULT_FN_ATTRS
-_m_femms() {
+_m_femms(void) {
   __builtin_ia32_femms();
 }
 
diff --git a/contrib/llvm/tools/clang/lib/Headers/mmintrin.h b/contrib/llvm/tools/clang/lib/Headers/mmintrin.h
index 162cb1aa1711..cefd6053aa80 100644
--- a/contrib/llvm/tools/clang/lib/Headers/mmintrin.h
+++ b/contrib/llvm/tools/clang/lib/Headers/mmintrin.h
@@ -26,6 +26,7 @@
 
 typedef long long __m64 __attribute__((__vector_size__(8)));
 
+typedef long long __v1di __attribute__((__vector_size__(8)));
 typedef int __v2si __attribute__((__vector_size__(8)));
 typedef short __v4hi __attribute__((__vector_size__(8)));
 typedef char __v8qi __attribute__((__vector_size__(8)));
@@ -33,366 +34,1314 @@ typedef char __v8qi __attribute__((__vector_size__(8)));
 /* Define the default attributes for the functions in this file. */
 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("mmx")))
 
+/// \brief Clears the MMX state by setting the state of the x87 stack registers
+///    to empty.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c EMMS instruction.
+///
 static __inline__ void __DEFAULT_FN_ATTRS
 _mm_empty(void)
 {
     __builtin_ia32_emms();
 }
 
+/// \brief Constructs a 64-bit integer vector, setting the lower 32 bits to the
+///    value of the 32-bit integer parameter and setting the upper 32 bits to 0.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VMOVD / MOVD instruction.
+///
+/// \param __i
+///    A 32-bit integer value.
+/// \returns A 64-bit integer vector. The lower 32 bits contain the value of the
+///    parameter. The upper 32 bits are set to 0.
 static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_cvtsi32_si64(int __i)
 {
     return (__m64)__builtin_ia32_vec_init_v2si(__i, 0);
 }
 
+/// \brief Returns the lower 32 bits of a 64-bit integer vector as a 32-bit
+///    signed integer.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VMOVD / MOVD instruction.
+///
+/// \param __m
+///    A 64-bit integer vector.
+/// \returns A 32-bit signed integer value containing the lower 32 bits of the
+///    parameter.
 static __inline__ int __DEFAULT_FN_ATTRS
 _mm_cvtsi64_si32(__m64 __m)
 {
     return __builtin_ia32_vec_ext_v2si((__v2si)__m, 0);
 }
 
+/// \brief Casts a 64-bit signed integer value into a 64-bit integer vector.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VMOVQ / MOVD instruction.
+///
+/// \param __i
+///    A 64-bit signed integer.
+/// \returns A 64-bit integer vector containing the same bitwise pattern as the
+///    parameter.
 static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_cvtsi64_m64(long long __i)
 {
     return (__m64)__i;
 }
 
+/// \brief Casts a 64-bit integer vector into a 64-bit signed integer value.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VMOVQ / MOVD instruction.
+///
+/// \param __m
+///    A 64-bit integer vector.
+/// \returns A 64-bit signed integer containing the same bitwise pattern as the
+///    parameter.
 static __inline__ long long __DEFAULT_FN_ATTRS
 _mm_cvtm64_si64(__m64 __m)
 {
     return (long long)__m;
 }
 
+/// \brief Converts 16-bit signed integers from both 64-bit integer vector
+///    parameters of [4 x i16] into 8-bit signed integer values, and constructs
+///    a 64-bit integer vector of [8 x i8] as the result. Positive values
+///    greater than 0x7F are saturated to 0x7F. Negative values less than 0x80
+///    are saturated to 0x80.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c PACKSSWB instruction.
+///
+/// \param __m1
+///    A 64-bit integer vector of [4 x i16]. Each 16-bit element is treated as a
+///    16-bit signed integer and is converted to an 8-bit signed integer with
+///    saturation. Positive values greater than 0x7F are saturated to 0x7F.
+///    Negative values less than 0x80 are saturated to 0x80. The converted
+///    [4 x i8] values are written to the lower 32 bits of the result.
+/// \param __m2
+///    A 64-bit integer vector of [4 x i16]. Each 16-bit element is treated as a
+///    16-bit signed integer and is converted to an 8-bit signed integer with
+///    saturation. Positive values greater than 0x7F are saturated to 0x7F.
+///    Negative values less than 0x80 are saturated to 0x80. The converted
+///    [4 x i8] values are written to the upper 32 bits of the result.
+/// \returns A 64-bit integer vector of [8 x i8] containing the converted
+///    values.
 static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_packs_pi16(__m64 __m1, __m64 __m2)
 {
     return (__m64)__builtin_ia32_packsswb((__v4hi)__m1, (__v4hi)__m2);
 }
 
+/// \brief Converts 32-bit signed integers from both 64-bit integer vector
+///    parameters of [2 x i32] into 16-bit signed integer values, and constructs
+///    a 64-bit integer vector of [4 x i16] as the result. Positive values
+///    greater than 0x7FFF are saturated to 0x7FFF. Negative values less than
+///    0x8000 are saturated to 0x8000.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c PACKSSDW instruction.
+///
+/// \param __m1
+///    A 64-bit integer vector of [2 x i32]. Each 32-bit element is treated as a
+///    32-bit signed integer and is converted to a 16-bit signed integer with
+///    saturation. Positive values greater than 0x7FFF are saturated to 0x7FFF.
+///    Negative values less than 0x8000 are saturated to 0x8000. The converted
+///    [2 x i16] values are written to the lower 32 bits of the result.
+/// \param __m2
+///    A 64-bit integer vector of [2 x i32]. Each 32-bit element is treated as a
+///    32-bit signed integer and is converted to a 16-bit signed integer with
+///    saturation. Positive values greater than 0x7FFF are saturated to 0x7FFF.
+///    Negative values less than 0x8000 are saturated to 0x8000. The converted
+///    [2 x i16] values are written to the upper 32 bits of the result.
+/// \returns A 64-bit integer vector of [4 x i16] containing the converted
+///    values.
 static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_packs_pi32(__m64 __m1, __m64 __m2)
 {
     return (__m64)__builtin_ia32_packssdw((__v2si)__m1, (__v2si)__m2);
 }
 
+/// \brief Converts 16-bit signed integers from both 64-bit integer vector
+///    parameters of [4 x i16] into 8-bit unsigned integer values, and
+///    constructs a 64-bit integer vector of [8 x i8] as the result. Values
+///    greater than 0xFF are saturated to 0xFF. Values less than 0 are saturated
+///    to 0.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c PACKUSWB instruction.
+///
+/// \param __m1
+///    A 64-bit integer vector of [4 x i16]. Each 16-bit element is treated as a
+///    16-bit signed integer and is converted to an 8-bit unsigned integer with
+///    saturation. Values greater than 0xFF are saturated to 0xFF. Values less
+///    than 0 are saturated to 0. The converted [4 x i8] values are written to
+///    the lower 32 bits of the result.
+/// \param __m2
+///    A 64-bit integer vector of [4 x i16]. Each 16-bit element is treated as a
+///    16-bit signed integer and is converted to an 8-bit unsigned integer with
+///    saturation. Values greater than 0xFF are saturated to 0xFF. Values less
+///    than 0 are saturated to 0. The converted [4 x i8] values are written to
+///    the upper 32 bits of the result.
+/// \returns A 64-bit integer vector of [8 x i8] containing the converted
+///    values.
 static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_packs_pu16(__m64 __m1, __m64 __m2)
 {
     return (__m64)__builtin_ia32_packuswb((__v4hi)__m1, (__v4hi)__m2);
 }
 
+/// \brief Unpacks the upper 32 bits from two 64-bit integer vectors of [8 x i8]
+///    and interleaves them into a 64-bit integer vector of [8 x i8].
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c PUNPCKHBW instruction.
+///
+/// \param __m1
+///    A 64-bit integer vector of [8 x i8].
+///    Bits [39:32] are written to bits [7:0] of the result.
+///    Bits [47:40] are written to bits [23:16] of the result.
+///    Bits [55:48] are written to bits [39:32] of the result.
+///    Bits [63:56] are written to bits [55:48] of the result.
+/// \param __m2
+///    A 64-bit integer vector of [8 x i8].
+///    Bits [39:32] are written to bits [15:8] of the result.
+///    Bits [47:40] are written to bits [31:24] of the result.
+///    Bits [55:48] are written to bits [47:40] of the result.
+///    Bits [63:56] are written to bits [63:56] of the result.
+/// \returns A 64-bit integer vector of [8 x i8] containing the interleaved
+///    values.
 static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_unpackhi_pi8(__m64 __m1, __m64 __m2)
 {
     return (__m64)__builtin_ia32_punpckhbw((__v8qi)__m1, (__v8qi)__m2);
 }
 
+/// \brief Unpacks the upper 32 bits from two 64-bit integer vectors of
+///    [4 x i16] and interleaves them into a 64-bit integer vector of [4 x i16].
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c PUNPCKHWD instruction.
+///
+/// \param __m1
+///    A 64-bit integer vector of [4 x i16].
+///    Bits [47:32] are written to bits [15:0] of the result.
+///    Bits [63:48] are written to bits [47:32] of the result.
+/// \param __m2
+///    A 64-bit integer vector of [4 x i16].
+///    Bits [47:32] are written to bits [31:16] of the result.
+///    Bits [63:48] are written to bits [63:48] of the result.
+/// \returns A 64-bit integer vector of [4 x i16] containing the interleaved
+///    values.
 static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_unpackhi_pi16(__m64 __m1, __m64 __m2)
 {
     return (__m64)__builtin_ia32_punpckhwd((__v4hi)__m1, (__v4hi)__m2);
 }
 
+/// \brief Unpacks the upper 32 bits from two 64-bit integer vectors of
+///    [2 x i32] and interleaves them into a 64-bit integer vector of [2 x i32].
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c PUNPCKHDQ instruction.
+///
+/// \param __m1
+///    A 64-bit integer vector of [2 x i32]. The upper 32 bits are written to
+///    the lower 32 bits of the result.
+/// \param __m2
+///    A 64-bit integer vector of [2 x i32]. The upper 32 bits are written to
+///    the upper 32 bits of the result.
+/// \returns A 64-bit integer vector of [2 x i32] containing the interleaved
+///    values.
 static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_unpackhi_pi32(__m64 __m1, __m64 __m2)
 {
     return (__m64)__builtin_ia32_punpckhdq((__v2si)__m1, (__v2si)__m2);
 }
 
+/// \brief Unpacks the lower 32 bits from two 64-bit integer vectors of [8 x i8]
+///    and interleaves them into a 64-bit integer vector of [8 x i8].
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c PUNPCKLBW instruction.
+///
+/// \param __m1
+///    A 64-bit integer vector of [8 x i8].
+///    Bits [7:0] are written to bits [7:0] of the result.
+///    Bits [15:8] are written to bits [23:16] of the result.
+///    Bits [23:16] are written to bits [39:32] of the result.
+///    Bits [31:24] are written to bits [55:48] of the result.
+/// \param __m2
+///    A 64-bit integer vector of [8 x i8].
+///    Bits [7:0] are written to bits [15:8] of the result.
+///    Bits [15:8] are written to bits [31:24] of the result.
+///    Bits [23:16] are written to bits [47:40] of the result.
+///    Bits [31:24] are written to bits [63:56] of the result.
+/// \returns A 64-bit integer vector of [8 x i8] containing the interleaved
+///    values.
 static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_unpacklo_pi8(__m64 __m1, __m64 __m2)
 {
     return (__m64)__builtin_ia32_punpcklbw((__v8qi)__m1, (__v8qi)__m2);
 }
 
+/// \brief Unpacks the lower 32 bits from two 64-bit integer vectors of
+///    [4 x i16] and interleaves them into a 64-bit integer vector of [4 x i16].
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c PUNPCKLWD instruction.
+///
+/// \param __m1
+///    A 64-bit integer vector of [4 x i16].
+///    Bits [15:0] are written to bits [15:0] of the result.
+///    Bits [31:16] are written to bits [47:32] of the result.
+/// \param __m2
+///    A 64-bit integer vector of [4 x i16].
+///    Bits [15:0] are written to bits [31:16] of the result.
+///    Bits [31:16] are written to bits [63:48] of the result.
+/// \returns A 64-bit integer vector of [4 x i16] containing the interleaved
+///    values.
 static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_unpacklo_pi16(__m64 __m1, __m64 __m2)
 {
     return (__m64)__builtin_ia32_punpcklwd((__v4hi)__m1, (__v4hi)__m2);
 }
 
+/// \brief Unpacks the lower 32 bits from two 64-bit integer vectors of
+///    [2 x i32] and interleaves them into a 64-bit integer vector of [2 x i32].
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c PUNPCKLDQ instruction.
+///
+/// \param __m1
+///    A 64-bit integer vector of [2 x i32]. The lower 32 bits are written to
+///    the lower 32 bits of the result.
+/// \param __m2
+///    A 64-bit integer vector of [2 x i32]. The lower 32 bits are written to
+///    the upper 32 bits of the result.
+/// \returns A 64-bit integer vector of [2 x i32] containing the interleaved
+///    values.
 static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_unpacklo_pi32(__m64 __m1, __m64 __m2)
 {
     return (__m64)__builtin_ia32_punpckldq((__v2si)__m1, (__v2si)__m2);
 }
 
+/// \brief Adds each 8-bit integer element of the first 64-bit integer vector
+///    of [8 x i8] to the corresponding 8-bit integer element of the second
+///    64-bit integer vector of [8 x i8]. The lower 8 bits of the results are
+///    packed into a 64-bit integer vector of [8 x i8].
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c PADDB instruction.
+///
+/// \param __m1
+///    A 64-bit integer vector of [8 x i8].
+/// \param __m2
+///    A 64-bit integer vector of [8 x i8].
+/// \returns A 64-bit integer vector of [8 x i8] containing the sums of both
+///    parameters.
 static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_add_pi8(__m64 __m1, __m64 __m2)
 {
     return (__m64)__builtin_ia32_paddb((__v8qi)__m1, (__v8qi)__m2);
 }
 
+/// \brief Adds each 16-bit integer element of the first 64-bit integer vector
+///    of [4 x i16] to the corresponding 16-bit integer element of the second
+///    64-bit integer vector of [4 x i16]. The lower 16 bits of the results are
+///    packed into a 64-bit integer vector of [4 x i16].
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c PADDW instruction.
+///
+/// \param __m1
+///    A 64-bit integer vector of [4 x i16].
+/// \param __m2
+///    A 64-bit integer vector of [4 x i16].
+/// \returns A 64-bit integer vector of [4 x i16] containing the sums of both
+///    parameters.
 static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_add_pi16(__m64 __m1, __m64 __m2)
 {
     return (__m64)__builtin_ia32_paddw((__v4hi)__m1, (__v4hi)__m2);
 }
 
+/// \brief Adds each 32-bit integer element of the first 64-bit integer vector
+///    of [2 x i32] to the corresponding 32-bit integer element of the second
+///    64-bit integer vector of [2 x i32]. The lower 32 bits of the results are
+///    packed into a 64-bit integer vector of [2 x i32].
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c PADDD instruction.
+///
+/// \param __m1
+///    A 64-bit integer vector of [2 x i32].
+/// \param __m2
+///    A 64-bit integer vector of [2 x i32].
+/// \returns A 64-bit integer vector of [2 x i32] containing the sums of both
+///    parameters.
 static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_add_pi32(__m64 __m1, __m64 __m2)
 {
     return (__m64)__builtin_ia32_paddd((__v2si)__m1, (__v2si)__m2);
 }
 
+/// \brief Adds each 8-bit signed integer element of the first 64-bit integer
+///    vector of [8 x i8] to the corresponding 8-bit signed integer element of
+///    the second 64-bit integer vector of [8 x i8]. Positive sums greater than
+///    0x7F are saturated to 0x7F. Negative sums less than 0x80 are saturated to
+///    0x80. The results are packed into a 64-bit integer vector of [8 x i8].
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c PADDSB instruction.
+///
+/// \param __m1
+///    A 64-bit integer vector of [8 x i8].
+/// \param __m2
+///    A 64-bit integer vector of [8 x i8].
+/// \returns A 64-bit integer vector of [8 x i8] containing the saturated sums
+///    of both parameters.
 static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_adds_pi8(__m64 __m1, __m64 __m2)
 {
     return (__m64)__builtin_ia32_paddsb((__v8qi)__m1, (__v8qi)__m2);
 }
 
+/// \brief Adds each 16-bit signed integer element of the first 64-bit integer
+///    vector of [4 x i16] to the corresponding 16-bit signed integer element of
+///    the second 64-bit integer vector of [4 x i16]. Positive sums greater than
+///    0x7FFF are saturated to 0x7FFF. Negative sums less than 0x8000 are
+///    saturated to 0x8000. The results are packed into a 64-bit integer vector
+///    of [4 x i16].
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c PADDSW instruction.
+///
+/// \param __m1
+///    A 64-bit integer vector of [4 x i16].
+/// \param __m2
+///    A 64-bit integer vector of [4 x i16].
+/// \returns A 64-bit integer vector of [4 x i16] containing the saturated sums
+///    of both parameters.
 static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_adds_pi16(__m64 __m1, __m64 __m2)
 {
     return (__m64)__builtin_ia32_paddsw((__v4hi)__m1, (__v4hi)__m2);
 }
 
+/// \brief Adds each 8-bit unsigned integer element of the first 64-bit integer
+///    vector of [8 x i8] to the corresponding 8-bit unsigned integer element of
+///    the second 64-bit integer vector of [8 x i8]. Sums greater than 0xFF are
+///    saturated to 0xFF. The results are packed into a 64-bit integer vector of
+///    [8 x i8].
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c PADDUSB instruction.
+///
+/// \param __m1
+///    A 64-bit integer vector of [8 x i8].
+/// \param __m2
+///    A 64-bit integer vector of [8 x i8].
+/// \returns A 64-bit integer vector of [8 x i8] containing the saturated
+///    unsigned sums of both parameters.
 static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_adds_pu8(__m64 __m1, __m64 __m2)
 {
     return (__m64)__builtin_ia32_paddusb((__v8qi)__m1, (__v8qi)__m2);
 }
 
+/// \brief Adds each 16-bit unsigned integer element of the first 64-bit integer
+///    vector of [4 x i16] to the corresponding 16-bit unsigned integer element
+///    of the second 64-bit integer vector of [4 x i16]. Sums greater than
+///    0xFFFF are saturated to 0xFFFF. The results are packed into a 64-bit
+///    integer vector of [4 x i16].
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c PADDUSW instruction.
+///
+/// \param __m1
+///    A 64-bit integer vector of [4 x i16].
+/// \param __m2
+///    A 64-bit integer vector of [4 x i16].
+/// \returns A 64-bit integer vector of [4 x i16] containing the saturated
+///    unsigned sums of both parameters.
 static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_adds_pu16(__m64 __m1, __m64 __m2)
 {
     return (__m64)__builtin_ia32_paddusw((__v4hi)__m1, (__v4hi)__m2);
 }
 
+/// \brief Subtracts each 8-bit integer element of the second 64-bit integer
+///    vector of [8 x i8] from the corresponding 8-bit integer element of the
+///    first 64-bit integer vector of [8 x i8]. The lower 8 bits of the results
+///    are packed into a 64-bit integer vector of [8 x i8].
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c PSUBB instruction.
+///
+/// \param __m1
+///    A 64-bit integer vector of [8 x i8] containing the minuends.
+/// \param __m2
+///    A 64-bit integer vector of [8 x i8] containing the subtrahends.
+/// \returns A 64-bit integer vector of [8 x i8] containing the differences of
+///    both parameters.
 static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_sub_pi8(__m64 __m1, __m64 __m2)
 {
     return (__m64)__builtin_ia32_psubb((__v8qi)__m1, (__v8qi)__m2);
 }
 
+/// \brief Subtracts each 16-bit integer element of the second 64-bit integer
+///    vector of [4 x i16] from the corresponding 16-bit integer element of the
+///    first 64-bit integer vector of [4 x i16]. The lower 16 bits of the
+///    results are packed into a 64-bit integer vector of [4 x i16].
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c PSUBW instruction.
+///
+/// \param __m1
+///    A 64-bit integer vector of [4 x i16] containing the minuends.
+/// \param __m2
+///    A 64-bit integer vector of [4 x i16] containing the subtrahends.
+/// \returns A 64-bit integer vector of [4 x i16] containing the differences of
+///    both parameters.
 static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_sub_pi16(__m64 __m1, __m64 __m2)
 {
     return (__m64)__builtin_ia32_psubw((__v4hi)__m1, (__v4hi)__m2);
 }
 
+/// \brief Subtracts each 32-bit integer element of the second 64-bit integer
+///    vector of [2 x i32] from the corresponding 32-bit integer element of the
+///    first 64-bit integer vector of [2 x i32]. The lower 32 bits of the
+///    results are packed into a 64-bit integer vector of [2 x i32].
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c PSUBD instruction.
+///
+/// \param __m1
+///    A 64-bit integer vector of [2 x i32] containing the minuends.
+/// \param __m2
+///    A 64-bit integer vector of [2 x i32] containing the subtrahends.
+/// \returns A 64-bit integer vector of [2 x i32] containing the differences of
+///    both parameters.
 static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_sub_pi32(__m64 __m1, __m64 __m2)
 {
     return (__m64)__builtin_ia32_psubd((__v2si)__m1, (__v2si)__m2);
 }
 
+/// \brief Subtracts each 8-bit signed integer element of the second 64-bit
+///    integer vector of [8 x i8] from the corresponding 8-bit signed integer
+///    element of the first 64-bit integer vector of [8 x i8]. Positive results
+///    greater than 0x7F are saturated to 0x7F. Negative results less than 0x80
+///    are saturated to 0x80. The results are packed into a 64-bit integer
+///    vector of [8 x i8].
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c PSUBSB instruction.
+///
+/// \param __m1
+///    A 64-bit integer vector of [8 x i8] containing the minuends.
+/// \param __m2
+///    A 64-bit integer vector of [8 x i8] containing the subtrahends.
+/// \returns A 64-bit integer vector of [8 x i8] containing the saturated
+///    differences of both parameters.
 static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_subs_pi8(__m64 __m1, __m64 __m2)
 {
     return (__m64)__builtin_ia32_psubsb((__v8qi)__m1, (__v8qi)__m2);
 }
 
+/// \brief Subtracts each 16-bit signed integer element of the second 64-bit
+///    integer vector of [4 x i16] from the corresponding 16-bit signed integer
+///    element of the first 64-bit integer vector of [4 x i16]. Positive results
+///    greater than 0x7FFF are saturated to 0x7FFF. Negative results less than
+///    0x8000 are saturated to 0x8000. The results are packed into a 64-bit
+///    integer vector of [4 x i16].
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c PSUBSW instruction.
+///
+/// \param __m1
+///    A 64-bit integer vector of [4 x i16] containing the minuends.
+/// \param __m2
+///    A 64-bit integer vector of [4 x i16] containing the subtrahends.
+/// \returns A 64-bit integer vector of [4 x i16] containing the saturated
+///    differences of both parameters.
 static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_subs_pi16(__m64 __m1, __m64 __m2)
 {
     return (__m64)__builtin_ia32_psubsw((__v4hi)__m1, (__v4hi)__m2);
 }
 
+/// \brief Subtracts each 8-bit unsigned integer element of the second 64-bit
+///    integer vector of [8 x i8] from the corresponding 8-bit unsigned integer
+///    element of the first 64-bit integer vector of [8 x i8]. If an element of
+///    the first vector is less than the corresponding element of the second
+///    vector, the result is saturated to 0. The results are packed into a
+///    64-bit integer vector of [8 x i8].
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c PSUBUSB instruction.
+///
+/// \param __m1
+///    A 64-bit integer vector of [8 x i8] containing the minuends.
+/// \param __m2
+///    A 64-bit integer vector of [8 x i8] containing the subtrahends.
+/// \returns A 64-bit integer vector of [8 x i8] containing the saturated
+///    differences of both parameters.
 static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_subs_pu8(__m64 __m1, __m64 __m2)
 {
     return (__m64)__builtin_ia32_psubusb((__v8qi)__m1, (__v8qi)__m2);
 }
 
+/// \brief Subtracts each 16-bit unsigned integer element of the second 64-bit
+///    integer vector of [4 x i16] from the corresponding 16-bit unsigned
+///    integer element of the first 64-bit integer vector of [4 x i16]. If an
+///    element of the first vector is less than the corresponding element of the
+///    second vector, the result is saturated to 0. The results are packed into
+///    a 64-bit integer vector of [4 x i16].
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c PSUBUSW instruction.
+///
+/// \param __m1
+///    A 64-bit integer vector of [4 x i16] containing the minuends.
+/// \param __m2
+///    A 64-bit integer vector of [4 x i16] containing the subtrahends.
+/// \returns A 64-bit integer vector of [4 x i16] containing the saturated
+///    differences of both parameters.
 static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_subs_pu16(__m64 __m1, __m64 __m2)
 {
     return (__m64)__builtin_ia32_psubusw((__v4hi)__m1, (__v4hi)__m2);
 }
 
+/// \brief Multiplies each 16-bit signed integer element of the first 64-bit
+///    integer vector of [4 x i16] by the corresponding 16-bit signed integer
+///    element of the second 64-bit integer vector of [4 x i16] and get four
+///    32-bit products. Adds adjacent pairs of products to get two 32-bit sums.
+///    The lower 32 bits of these two sums are packed into a 64-bit integer
+///    vector of [2 x i32]. For example, bits [15:0] of both parameters are
+///    multiplied, bits [31:16] of both parameters are multiplied, and the sum
+///    of both results is written to bits [31:0] of the result.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c PMADDWD instruction.
+///
+/// \param __m1
+///    A 64-bit integer vector of [4 x i16].
+/// \param __m2
+///    A 64-bit integer vector of [4 x i16].
+/// \returns A 64-bit integer vector of [2 x i32] containing the sums of
+///    products of both parameters.
 static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_madd_pi16(__m64 __m1, __m64 __m2)
 {
     return (__m64)__builtin_ia32_pmaddwd((__v4hi)__m1, (__v4hi)__m2);
 }
 
+/// \brief Multiplies each 16-bit signed integer element of the first 64-bit
+///    integer vector of [4 x i16] by the corresponding 16-bit signed integer
+///    element of the second 64-bit integer vector of [4 x i16]. Packs the upper
+///    16 bits of the 32-bit products into a 64-bit integer vector of [4 x i16].
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c PMULHW instruction.
+///
+/// \param __m1
+///    A 64-bit integer vector of [4 x i16].
+/// \param __m2
+///    A 64-bit integer vector of [4 x i16].
+/// \returns A 64-bit integer vector of [4 x i16] containing the upper 16 bits
+///    of the products of both parameters.
 static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_mulhi_pi16(__m64 __m1, __m64 __m2)
 {
     return (__m64)__builtin_ia32_pmulhw((__v4hi)__m1, (__v4hi)__m2);
 }
 
+/// \brief Multiplies each 16-bit signed integer element of the first 64-bit
+///    integer vector of [4 x i16] by the corresponding 16-bit signed integer
+///    element of the second 64-bit integer vector of [4 x i16]. Packs the lower
+///    16 bits of the 32-bit products into a 64-bit integer vector of [4 x i16].
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c PMULLW instruction.
+///
+/// \param __m1
+///    A 64-bit integer vector of [4 x i16].
+/// \param __m2
+///    A 64-bit integer vector of [4 x i16].
+/// \returns A 64-bit integer vector of [4 x i16] containing the lower 16 bits
+///    of the products of both parameters.
 static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_mullo_pi16(__m64 __m1, __m64 __m2)
 {
     return (__m64)__builtin_ia32_pmullw((__v4hi)__m1, (__v4hi)__m2);
 }
 
+/// \brief Left-shifts each 16-bit signed integer element of the first
+///    parameter, which is a 64-bit integer vector of [4 x i16], by the number
+///    of bits specified by the second parameter, which is a 64-bit integer. The
+///    lower 16 bits of the results are packed into a 64-bit integer vector of
+///    [4 x i16].
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c PSLLW instruction.
+///
+/// \param __m
+///    A 64-bit integer vector of [4 x i16].
+/// \param __count
+///    A 64-bit integer vector interpreted as a single 64-bit integer.
+/// \returns A 64-bit integer vector of [4 x i16] containing the left-shifted
+///    values. If __count is greater or equal to 16, the result is set to all 0.
 static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_sll_pi16(__m64 __m, __m64 __count)
 {
     return (__m64)__builtin_ia32_psllw((__v4hi)__m, __count);
 }
 
+/// \brief Left-shifts each 16-bit signed integer element of a 64-bit integer
+///    vector of [4 x i16] by the number of bits specified by a 32-bit integer.
+///    The lower 16 bits of the results are packed into a 64-bit integer vector
+///    of [4 x i16].
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c PSLLW instruction.
+///
+/// \param __m
+///    A 64-bit integer vector of [4 x i16].
+/// \param __count
+///    A 32-bit integer value.
+/// \returns A 64-bit integer vector of [4 x i16] containing the left-shifted
+///    values. If __count is greater or equal to 16, the result is set to all 0.
 static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_slli_pi16(__m64 __m, int __count)
 {
     return (__m64)__builtin_ia32_psllwi((__v4hi)__m, __count);
 }
 
+/// \brief Left-shifts each 32-bit signed integer element of the first
+///    parameter, which is a 64-bit integer vector of [2 x i32], by the number
+///    of bits specified by the second parameter, which is a 64-bit integer. The
+///    lower 32 bits of the results are packed into a 64-bit integer vector of
+///    [2 x i32].
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c PSLLD instruction.
+///
+/// \param __m
+///    A 64-bit integer vector of [2 x i32].
+/// \param __count
+///    A 64-bit integer vector interpreted as a single 64-bit integer.
+/// \returns A 64-bit integer vector of [2 x i32] containing the left-shifted
+///    values. If __count is greater or equal to 32, the result is set to all 0.
 static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_sll_pi32(__m64 __m, __m64 __count)
 {
     return (__m64)__builtin_ia32_pslld((__v2si)__m, __count);
 }
 
+/// \brief Left-shifts each 32-bit signed integer element of a 64-bit integer
+///    vector of [2 x i32] by the number of bits specified by a 32-bit integer.
+///    The lower 32 bits of the results are packed into a 64-bit integer vector
+///    of [2 x i32].
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c PSLLD instruction.
+///
+/// \param __m
+///    A 64-bit integer vector of [2 x i32].
+/// \param __count
+///    A 32-bit integer value.
+/// \returns A 64-bit integer vector of [2 x i32] containing the left-shifted
+///    values. If __count is greater or equal to 32, the result is set to all 0.
 static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_slli_pi32(__m64 __m, int __count)
 {
     return (__m64)__builtin_ia32_pslldi((__v2si)__m, __count);
 }
 
+/// \brief Left-shifts the first 64-bit integer parameter by the number of bits
+///    specified by the second 64-bit integer parameter. The lower 64 bits of
+///    result are returned.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c PSLLQ instruction.
+///
+/// \param __m
+///    A 64-bit integer vector interpreted as a single 64-bit integer.
+/// \param __count
+///    A 64-bit integer vector interpreted as a single 64-bit integer.
+/// \returns A 64-bit integer vector containing the left-shifted value. If
+///     __count is greater or equal to 64, the result is set to 0.
 static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_sll_si64(__m64 __m, __m64 __count)
 {
-    return (__m64)__builtin_ia32_psllq(__m, __count);
+    return (__m64)__builtin_ia32_psllq((__v1di)__m, __count);
 }
 
+/// \brief Left-shifts the first parameter, which is a 64-bit integer, by the
+///    number of bits specified by the second parameter, which is a 32-bit
+///    integer. The lower 64 bits of result are returned.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c PSLLQ instruction.
+///
+/// \param __m
+///    A 64-bit integer vector interpreted as a single 64-bit integer.
+/// \param __count
+///    A 32-bit integer value.
+/// \returns A 64-bit integer vector containing the left-shifted value. If
+///     __count is greater or equal to 64, the result is set to 0.
 static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_slli_si64(__m64 __m, int __count)
 {
-    return (__m64)__builtin_ia32_psllqi(__m, __count);
-}
-
+    return (__m64)__builtin_ia32_psllqi((__v1di)__m, __count);
+}
+
+/// \brief Right-shifts each 16-bit integer element of the first parameter,
+///    which is a 64-bit integer vector of [4 x i16], by the number of bits
+///    specified by the second parameter, which is a 64-bit integer. High-order
+///    bits are filled with the sign bit of the initial value of each 16-bit
+///    element. The 16-bit results are packed into a 64-bit integer vector of
+///    [4 x i16].
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c PSRAW instruction.
+///
+/// \param __m
+///    A 64-bit integer vector of [4 x i16].
+/// \param __count
+///    A 64-bit integer vector interpreted as a single 64-bit integer.
+/// \returns A 64-bit integer vector of [4 x i16] containing the right-shifted
+///    values.
 static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_sra_pi16(__m64 __m, __m64 __count)
 {
     return (__m64)__builtin_ia32_psraw((__v4hi)__m, __count);
 }
 
+/// \brief Right-shifts each 16-bit integer element of a 64-bit integer vector
+///    of [4 x i16] by the number of bits specified by a 32-bit integer.
+///    High-order bits are filled with the sign bit of the initial value of each
+///    16-bit element. The 16-bit results are packed into a 64-bit integer
+///    vector of [4 x i16].
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c PSRAW instruction.
+///
+/// \param __m
+///    A 64-bit integer vector of [4 x i16].
+/// \param __count
+///    A 32-bit integer value.
+/// \returns A 64-bit integer vector of [4 x i16] containing the right-shifted
+///    values.
 static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_srai_pi16(__m64 __m, int __count)
 {
     return (__m64)__builtin_ia32_psrawi((__v4hi)__m, __count);
 }
 
+/// \brief Right-shifts each 32-bit integer element of the first parameter,
+///    which is a 64-bit integer vector of [2 x i32], by the number of bits
+///    specified by the second parameter, which is a 64-bit integer. High-order
+///    bits are filled with the sign bit of the initial value of each 32-bit
+///    element. The 32-bit results are packed into a 64-bit integer vector of
+///    [2 x i32].
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c PSRAD instruction.
+///
+/// \param __m
+///    A 64-bit integer vector of [2 x i32].
+/// \param __count
+///    A 64-bit integer vector interpreted as a single 64-bit integer.
+/// \returns A 64-bit integer vector of [2 x i32] containing the right-shifted
+///    values.
 static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_sra_pi32(__m64 __m, __m64 __count)
 {
     return (__m64)__builtin_ia32_psrad((__v2si)__m, __count);
 }
 
+/// \brief Right-shifts each 32-bit integer element of a 64-bit integer vector
+///    of [2 x i32] by the number of bits specified by a 32-bit integer.
+///    High-order bits are filled with the sign bit of the initial value of each
+///    32-bit element. The 32-bit results are packed into a 64-bit integer
+///    vector of [2 x i32].
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c PSRAD instruction.
+///
+/// \param __m
+///    A 64-bit integer vector of [2 x i32].
+/// \param __count
+///    A 32-bit integer value.
+/// \returns A 64-bit integer vector of [2 x i32] containing the right-shifted
+///    values.
 static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_srai_pi32(__m64 __m, int __count)
 {
     return (__m64)__builtin_ia32_psradi((__v2si)__m, __count);
 }
 
+/// \brief Right-shifts each 16-bit integer element of the first parameter,
+///    which is a 64-bit integer vector of [4 x i16], by the number of bits
+///    specified by the second parameter, which is a 64-bit integer. High-order
+///    bits are cleared. The 16-bit results are packed into a 64-bit integer
+///    vector of [4 x i16].
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c PSRLW instruction.
+///
+/// \param __m
+///    A 64-bit integer vector of [4 x i16].
+/// \param __count
+///    A 64-bit integer vector interpreted as a single 64-bit integer.
+/// \returns A 64-bit integer vector of [4 x i16] containing the right-shifted
+///    values.
 static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_srl_pi16(__m64 __m, __m64 __count)
 {
     return (__m64)__builtin_ia32_psrlw((__v4hi)__m, __count);
 }
 
+/// \brief Right-shifts each 16-bit integer element of a 64-bit integer vector
+///    of [4 x i16] by the number of bits specified by a 32-bit integer.
+///    High-order bits are cleared. The 16-bit results are packed into a 64-bit
+///    integer vector of [4 x i16].
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c PSRLW instruction.
+///
+/// \param __m
+///    A 64-bit integer vector of [4 x i16].
+/// \param __count
+///    A 32-bit integer value.
+/// \returns A 64-bit integer vector of [4 x i16] containing the right-shifted
+///    values.
 static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_srli_pi16(__m64 __m, int __count)
 {
     return (__m64)__builtin_ia32_psrlwi((__v4hi)__m, __count);
 }
 
+/// \brief Right-shifts each 32-bit integer element of the first parameter,
+///    which is a 64-bit integer vector of [2 x i32], by the number of bits
+///    specified by the second parameter, which is a 64-bit integer. High-order
+///    bits are cleared. The 32-bit results are packed into a 64-bit integer
+///    vector of [2 x i32].
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c PSRLD instruction.
+///
+/// \param __m
+///    A 64-bit integer vector of [2 x i32].
+/// \param __count
+///    A 64-bit integer vector interpreted as a single 64-bit integer.
+/// \returns A 64-bit integer vector of [2 x i32] containing the right-shifted
+///    values.
 static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_srl_pi32(__m64 __m, __m64 __count)
 {
     return (__m64)__builtin_ia32_psrld((__v2si)__m, __count);
 }
 
+/// \brief Right-shifts each 32-bit integer element of a 64-bit integer vector
+///    of [2 x i32] by the number of bits specified by a 32-bit integer.
+///    High-order bits are cleared. The 32-bit results are packed into a 64-bit
+///    integer vector of [2 x i32].
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c PSRLD instruction.
+///
+/// \param __m
+///    A 64-bit integer vector of [2 x i32].
+/// \param __count
+///    A 32-bit integer value.
+/// \returns A 64-bit integer vector of [2 x i32] containing the right-shifted
+///    values.
 static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_srli_pi32(__m64 __m, int __count)
 {
     return (__m64)__builtin_ia32_psrldi((__v2si)__m, __count);
 }
 
+/// \brief Right-shifts the first 64-bit integer parameter by the number of bits
+///    specified by the second 64-bit integer parameter. High-order bits are
+///    cleared.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c PSRLQ instruction.
+///
+/// \param __m
+///    A 64-bit integer vector interpreted as a single 64-bit integer.
+/// \param __count
+///    A 64-bit integer vector interpreted as a single 64-bit integer.
+/// \returns A 64-bit integer vector containing the right-shifted value.
 static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_srl_si64(__m64 __m, __m64 __count)
 {
-    return (__m64)__builtin_ia32_psrlq(__m, __count);
+    return (__m64)__builtin_ia32_psrlq((__v1di)__m, __count);
 }
 
+/// \brief Right-shifts the first parameter, which is a 64-bit integer, by the
+///    number of bits specified by the second parameter, which is a 32-bit
+///    integer. High-order bits are cleared.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c PSRLQ instruction.
+///
+/// \param __m
+///    A 64-bit integer vector interpreted as a single 64-bit integer.
+/// \param __count
+///    A 32-bit integer value.
+/// \returns A 64-bit integer vector containing the right-shifted value.
 static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_srli_si64(__m64 __m, int __count)
 {
-    return (__m64)__builtin_ia32_psrlqi(__m, __count);
+    return (__m64)__builtin_ia32_psrlqi((__v1di)__m, __count);
 }
 
+/// \brief Performs a bitwise AND of two 64-bit integer vectors.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c PAND instruction.
+///
+/// \param __m1
+///    A 64-bit integer vector.
+/// \param __m2
+///    A 64-bit integer vector.
+/// \returns A 64-bit integer vector containing the bitwise AND of both
+///    parameters.
 static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_and_si64(__m64 __m1, __m64 __m2)
 {
-    return __builtin_ia32_pand(__m1, __m2);
-}
-
+    return __builtin_ia32_pand((__v1di)__m1, (__v1di)__m2);
+}
+
+/// \brief Performs a bitwise NOT of the first 64-bit integer vector, and then
+///    performs a bitwise AND of the intermediate result and the second 64-bit
+///    integer vector.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c PANDN instruction.
+///
+/// \param __m1
+///    A 64-bit integer vector. The one's complement of this parameter is used
+///    in the bitwise AND.
+/// \param __m2
+///    A 64-bit integer vector.
+/// \returns A 64-bit integer vector containing the bitwise AND of the second
+///    parameter and the one's complement of the first parameter.
 static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_andnot_si64(__m64 __m1, __m64 __m2)
 {
-    return __builtin_ia32_pandn(__m1, __m2);
+    return __builtin_ia32_pandn((__v1di)__m1, (__v1di)__m2);
 }
 
+/// \brief Performs a bitwise OR of two 64-bit integer vectors.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c POR instruction.
+///
+/// \param __m1
+///    A 64-bit integer vector.
+/// \param __m2
+///    A 64-bit integer vector.
+/// \returns A 64-bit integer vector containing the bitwise OR of both
+///    parameters.
 static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_or_si64(__m64 __m1, __m64 __m2)
 {
-    return __builtin_ia32_por(__m1, __m2);
+    return __builtin_ia32_por((__v1di)__m1, (__v1di)__m2);
 }
 
+/// \brief Performs a bitwise exclusive OR of two 64-bit integer vectors.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c PXOR instruction.
+///
+/// \param __m1
+///    A 64-bit integer vector.
+/// \param __m2
+///    A 64-bit integer vector.
+/// \returns A 64-bit integer vector containing the bitwise exclusive OR of both
+///    parameters.
 static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_xor_si64(__m64 __m1, __m64 __m2)
 {
-    return __builtin_ia32_pxor(__m1, __m2);
-}
-
+    return __builtin_ia32_pxor((__v1di)__m1, (__v1di)__m2);
+}
+
+/// \brief Compares the 8-bit integer elements of two 64-bit integer vectors of
+///    [8 x i8] to determine if the element of the first vector is equal to the
+///    corresponding element of the second vector. The comparison yields 0 for
+///    false, 0xFF for true.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c PCMPEQB instruction.
+///
+/// \param __m1
+///    A 64-bit integer vector of [8 x i8].
+/// \param __m2
+///    A 64-bit integer vector of [8 x i8].
+/// \returns A 64-bit integer vector of [8 x i8] containing the comparison
+///    results.
 static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_cmpeq_pi8(__m64 __m1, __m64 __m2)
 {
     return (__m64)__builtin_ia32_pcmpeqb((__v8qi)__m1, (__v8qi)__m2);
 }
 
+/// \brief Compares the 16-bit integer elements of two 64-bit integer vectors of
+///    [4 x i16] to determine if the element of the first vector is equal to the
+///    corresponding element of the second vector. The comparison yields 0 for
+///    false, 0xFFFF for true.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c PCMPEQW instruction.
+///
+/// \param __m1
+///    A 64-bit integer vector of [4 x i16].
+/// \param __m2
+///    A 64-bit integer vector of [4 x i16].
+/// \returns A 64-bit integer vector of [4 x i16] containing the comparison
+///    results.
 static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_cmpeq_pi16(__m64 __m1, __m64 __m2)
 {
     return (__m64)__builtin_ia32_pcmpeqw((__v4hi)__m1, (__v4hi)__m2);
 }
 
+/// \brief Compares the 32-bit integer elements of two 64-bit integer vectors of
+///    [2 x i32] to determine if the element of the first vector is equal to the
+///    corresponding element of the second vector. The comparison yields 0 for
+///    false, 0xFFFFFFFF for true.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c PCMPEQD instruction.
+///
+/// \param __m1
+///    A 64-bit integer vector of [2 x i32].
+/// \param __m2
+///    A 64-bit integer vector of [2 x i32].
+/// \returns A 64-bit integer vector of [2 x i32] containing the comparison
+///    results.
 static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_cmpeq_pi32(__m64 __m1, __m64 __m2)
 {
     return (__m64)__builtin_ia32_pcmpeqd((__v2si)__m1, (__v2si)__m2);
 }
 
+/// \brief Compares the 8-bit integer elements of two 64-bit integer vectors of
+///    [8 x i8] to determine if the element of the first vector is greater than
+///    the corresponding element of the second vector. The comparison yields 0
+///    for false, 0xFF for true.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c PCMPGTB instruction.
+///
+/// \param __m1
+///    A 64-bit integer vector of [8 x i8].
+/// \param __m2
+///    A 64-bit integer vector of [8 x i8].
+/// \returns A 64-bit integer vector of [8 x i8] containing the comparison
+///    results.
 static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_cmpgt_pi8(__m64 __m1, __m64 __m2)
 {
     return (__m64)__builtin_ia32_pcmpgtb((__v8qi)__m1, (__v8qi)__m2);
 }
 
+/// \brief Compares the 16-bit integer elements of two 64-bit integer vectors of
+///    [4 x i16] to determine if the element of the first vector is greater than
+///    the corresponding element of the second vector. The comparison yields 0
+///    for false, 0xFFFF for true.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c PCMPGTW instruction.
+///
+/// \param __m1
+///    A 64-bit integer vector of [4 x i16].
+/// \param __m2
+///    A 64-bit integer vector of [4 x i16].
+/// \returns A 64-bit integer vector of [4 x i16] containing the comparison
+///    results.
 static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_cmpgt_pi16(__m64 __m1, __m64 __m2)
 {
     return (__m64)__builtin_ia32_pcmpgtw((__v4hi)__m1, (__v4hi)__m2);
 }
 
+/// \brief Compares the 32-bit integer elements of two 64-bit integer vectors of
+///    [2 x i32] to determine if the element of the first vector is greater than
+///    the corresponding element of the second vector. The comparison yields 0
+///    for false, 0xFFFFFFFF for true.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c PCMPGTD instruction.
+///
+/// \param __m1
+///    A 64-bit integer vector of [2 x i32].
+/// \param __m2
+///    A 64-bit integer vector of [2 x i32].
+/// \returns A 64-bit integer vector of [2 x i32] containing the comparison
+///    results.
 static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_cmpgt_pi32(__m64 __m1, __m64 __m2)
 {
     return (__m64)__builtin_ia32_pcmpgtd((__v2si)__m1, (__v2si)__m2);
 }
 
+/// \brief Constructs a 64-bit integer vector initialized to zero.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the the \c VXORPS / XORPS instruction.
+///
+/// \returns An initialized 64-bit integer vector with all elements set to zero.
 static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_setzero_si64(void)
 {
     return (__m64){ 0LL };
 }
 
+/// \brief Constructs a 64-bit integer vector initialized with the specified
+///    32-bit integer values.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic is a utility function and does not correspond to a specific
+///    instruction.
+///
+/// \param __i1
+///    A 32-bit integer value used to initialize the upper 32 bits of the
+///    result.
+/// \param __i0
+///    A 32-bit integer value used to initialize the lower 32 bits of the
+///    result.
+/// \returns An initialized 64-bit integer vector.
 static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_set_pi32(int __i1, int __i0)
 {
     return (__m64)__builtin_ia32_vec_init_v2si(__i0, __i1);
 }
 
+/// \brief Constructs a 64-bit integer vector initialized with the specified
+///    16-bit integer values.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic is a utility function and does not correspond to a specific
+///    instruction.
+///
+/// \param __s3
+///    A 16-bit integer value used to initialize bits [63:48] of the result.
+/// \param __s2
+///    A 16-bit integer value used to initialize bits [47:32] of the result.
+/// \param __s1
+///    A 16-bit integer value used to initialize bits [31:16] of the result.
+/// \param __s0
+///    A 16-bit integer value used to initialize bits [15:0] of the result.
+/// \returns An initialized 64-bit integer vector.
 static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_set_pi16(short __s3, short __s2, short __s1, short __s0)
 {
     return (__m64)__builtin_ia32_vec_init_v4hi(__s0, __s1, __s2, __s3);
 }
 
+/// \brief Constructs a 64-bit integer vector initialized with the specified
+///    8-bit integer values.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic is a utility function and does not correspond to a specific
+///    instruction.
+///
+/// \param __b7
+///    An 8-bit integer value used to initialize bits [63:56] of the result.
+/// \param __b6
+///    An 8-bit integer value used to initialize bits [55:48] of the result.
+/// \param __b5
+///    An 8-bit integer value used to initialize bits [47:40] of the result.
+/// \param __b4
+///    An 8-bit integer value used to initialize bits [39:32] of the result.
+/// \param __b3
+///    An 8-bit integer value used to initialize bits [31:24] of the result.
+/// \param __b2
+///    An 8-bit integer value used to initialize bits [23:16] of the result.
+/// \param __b1
+///    An 8-bit integer value used to initialize bits [15:8] of the result.
+/// \param __b0
+///    An 8-bit integer value used to initialize bits [7:0] of the result.
+/// \returns An initialized 64-bit integer vector.
 static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_set_pi8(char __b7, char __b6, char __b5, char __b4, char __b3, char __b2,
             char __b1, char __b0)
@@ -401,36 +1350,129 @@ _mm_set_pi8(char __b7, char __b6, char __b5, char __b4, char __b3, char __b2,
                                                __b4, __b5, __b6, __b7);
 }
 
+/// \brief Constructs a 64-bit integer vector of [2 x i32], with each of the
+///    32-bit integer vector elements set to the specified 32-bit integer
+///    value.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VPSHUFD / PSHUFD instruction.
+///
+/// \param __i
+///    A 32-bit integer value used to initialize each vector element of the
+///    result.
+/// \returns An initialized 64-bit integer vector of [2 x i32].
 static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_set1_pi32(int __i)
 {
     return _mm_set_pi32(__i, __i);
 }
 
+/// \brief Constructs a 64-bit integer vector of [4 x i16], with each of the
+///    16-bit integer vector elements set to the specified 16-bit integer
+///    value.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VPSHUFLW / PSHUFLW instruction.
+///
+/// \param __w
+///    A 16-bit integer value used to initialize each vector element of the
+///    result.
+/// \returns An initialized 64-bit integer vector of [4 x i16].
 static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_set1_pi16(short __w)
 {
     return _mm_set_pi16(__w, __w, __w, __w);
 }
 
+/// \brief Constructs a 64-bit integer vector of [8 x i8], with each of the
+///    8-bit integer vector elements set to the specified 8-bit integer value.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VPUNPCKLBW + VPSHUFLW / \c PUNPCKLBW +
+///    PSHUFLW instruction.
+///
+/// \param __b
+///    An 8-bit integer value used to initialize each vector element of the
+///    result.
+/// \returns An initialized 64-bit integer vector of [8 x i8].
 static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_set1_pi8(char __b)
 {
     return _mm_set_pi8(__b, __b, __b, __b, __b, __b, __b, __b);
 }
 
+/// \brief Constructs a 64-bit integer vector, initialized in reverse order with
+///    the specified 32-bit integer values.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic is a utility function and does not correspond to a specific
+///    instruction.
+///
+/// \param __i0
+///    A 32-bit integer value used to initialize the lower 32 bits of the
+///    result.
+/// \param __i1
+///    A 32-bit integer value used to initialize the upper 32 bits of the
+///    result.
+/// \returns An initialized 64-bit integer vector.
 static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_setr_pi32(int __i0, int __i1)
 {
     return _mm_set_pi32(__i1, __i0);
 }
 
+/// \brief Constructs a 64-bit integer vector, initialized in reverse order with
+///    the specified 16-bit integer values.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic is a utility function and does not correspond to a specific
+///    instruction.
+///
+/// \param __w0
+///    A 16-bit integer value used to initialize bits [15:0] of the result.
+/// \param __w1
+///    A 16-bit integer value used to initialize bits [31:16] of the result.
+/// \param __w2
+///    A 16-bit integer value used to initialize bits [47:32] of the result.
+/// \param __w3
+///    A 16-bit integer value used to initialize bits [63:48] of the result.
+/// \returns An initialized 64-bit integer vector.
 static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_setr_pi16(short __w0, short __w1, short __w2, short __w3)
 {
     return _mm_set_pi16(__w3, __w2, __w1, __w0);
 }
 
+/// \brief Constructs a 64-bit integer vector, initialized in reverse order with
+///    the specified 8-bit integer values.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic is a utility function and does not correspond to a specific
+///    instruction.
+///
+/// \param __b0
+///    An 8-bit integer value used to initialize bits [7:0] of the result.
+/// \param __b1
+///    An 8-bit integer value used to initialize bits [15:8] of the result.
+/// \param __b2
+///    An 8-bit integer value used to initialize bits [23:16] of the result.
+/// \param __b3
+///    An 8-bit integer value used to initialize bits [31:24] of the result.
+/// \param __b4
+///    An 8-bit integer value used to initialize bits [39:32] of the result.
+/// \param __b5
+///    An 8-bit integer value used to initialize bits [47:40] of the result.
+/// \param __b6
+///    An 8-bit integer value used to initialize bits [55:48] of the result.
+/// \param __b7
+///    An 8-bit integer value used to initialize bits [63:56] of the result.
+/// \returns An initialized 64-bit integer vector.
 static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_setr_pi8(char __b0, char __b1, char __b2, char __b3, char __b4, char __b5,
              char __b6, char __b7)
diff --git a/contrib/llvm/tools/clang/lib/Headers/module.modulemap b/contrib/llvm/tools/clang/lib/Headers/module.modulemap
index b147e891dceb..3e40d2c08d8c 100644
--- a/contrib/llvm/tools/clang/lib/Headers/module.modulemap
+++ b/contrib/llvm/tools/clang/lib/Headers/module.modulemap
@@ -1,3 +1,26 @@
+/*===---- module.modulemap - intrinsics module map -------------------------===
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ *
+ *===-----------------------------------------------------------------------===
+ */
+
 module _Builtin_intrinsics [system] [extern_c] {
   explicit module altivec {
     requires altivec
@@ -24,7 +47,20 @@ module _Builtin_intrinsics [system] [extern_c] {
     export *
 
     header "immintrin.h"
+    textual header "f16cintrin.h"
+    textual header "avxintrin.h"
+    textual header "avx2intrin.h"
+    textual header "avx512fintrin.h"
+    textual header "avx512erintrin.h"
+    textual header "fmaintrin.h"
+
     header "x86intrin.h"
+    textual header "bmiintrin.h"
+    textual header "bmi2intrin.h"
+    textual header "lzcntintrin.h"
+    textual header "xopintrin.h"
+    textual header "fma4intrin.h"
+    textual header "mwaitxintrin.h"
 
     explicit module mm_malloc {
       header "mm_malloc.h"
@@ -39,11 +75,8 @@ module _Builtin_intrinsics [system] [extern_c] {
       header "mmintrin.h"
     }
 
-    explicit module f16c {
-      header "f16cintrin.h"
-    }
-
     explicit module sse {
+      export mm_malloc
       export mmx
       export sse2 // note: for hackish <emmintrin.h> dependency
       header "xmmintrin.h"
@@ -79,46 +112,6 @@ module _Builtin_intrinsics [system] [extern_c] {
       header "ammintrin.h"
     }
 
-    explicit module avx {
-      export sse4_2
-      header "avxintrin.h"
-    }
-
-    explicit module avx2 {
-      export avx
-      header "avx2intrin.h"
-    }
-
-    explicit module avx512f {
-      export avx2
-      header "avx512fintrin.h"
-    }
-
-    explicit module avx512er {
-      header "avx512erintrin.h"
-    }
-
-    explicit module bmi {
-      header "bmiintrin.h"
-    }
-
-    explicit module bmi2 {
-      header "bmi2intrin.h"
-    }
-
-    explicit module fma {
-      header "fmaintrin.h"
-    }
-
-    explicit module fma4 {
-      export sse3
-      header "fma4intrin.h"
-    }
-
-    explicit module lzcnt {
-      header "lzcntintrin.h"
-    }
-
     explicit module popcnt {
       header "popcntintrin.h"
     }
@@ -127,11 +120,6 @@ module _Builtin_intrinsics [system] [extern_c] {
       header "mm3dnow.h"
     }
 
-    explicit module xop {
-      export fma4
-      header "xopintrin.h"
-    }
-
     explicit module aes_pclmul {
       header "wmmintrin.h"
       export aes
@@ -169,3 +157,8 @@ module _Builtin_intrinsics [system] [extern_c] {
 module _Builtin_stddef_max_align_t [system] [extern_c] {
   header "__stddef_max_align_t.h"
 }
+
+module opencl_c {
+  requires opencl
+  header "opencl-c.h"
+}
diff --git a/contrib/llvm/tools/clang/lib/Headers/msa.h b/contrib/llvm/tools/clang/lib/Headers/msa.h
new file mode 100644
index 000000000000..da680f5ca9ee
--- /dev/null
+++ b/contrib/llvm/tools/clang/lib/Headers/msa.h
@@ -0,0 +1,583 @@
+/*===---- msa.h - MIPS MSA intrinsics --------------------------------------===
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ *
+ *===-----------------------------------------------------------------------===
+ */
+
+#ifndef _MSA_H
+#define _MSA_H 1
+
+#if defined(__mips_msa)
+typedef signed char v16i8 __attribute__((vector_size(16), aligned(16)));
+typedef signed char v16i8_b __attribute__((vector_size(16), aligned(1)));
+typedef unsigned char v16u8 __attribute__((vector_size(16), aligned(16)));
+typedef unsigned char v16u8_b __attribute__((vector_size(16), aligned(1)));
+typedef short v8i16 __attribute__((vector_size(16), aligned(16)));
+typedef short v8i16_h __attribute__((vector_size(16), aligned(2)));
+typedef unsigned short v8u16 __attribute__((vector_size(16), aligned(16)));
+typedef unsigned short v8u16_h __attribute__((vector_size(16), aligned(2)));
+typedef int v4i32 __attribute__((vector_size(16), aligned(16)));
+typedef int v4i32_w __attribute__((vector_size(16), aligned(4)));
+typedef unsigned int v4u32 __attribute__((vector_size(16), aligned(16)));
+typedef unsigned int v4u32_w __attribute__((vector_size(16), aligned(4)));
+typedef long long v2i64 __attribute__((vector_size(16), aligned(16)));
+typedef long long v2i64_d __attribute__((vector_size(16), aligned(8)));
+typedef unsigned long long v2u64 __attribute__((vector_size(16), aligned(16)));
+typedef unsigned long long v2u64_d __attribute__((vector_size(16), aligned(8)));
+typedef float v4f32 __attribute__((vector_size(16), aligned(16)));
+typedef float v4f32_w __attribute__((vector_size(16), aligned(4)));
+typedef double v2f64 __attribute__ ((vector_size(16), aligned(16)));
+typedef double v2f64_d __attribute__ ((vector_size(16), aligned(8)));
+
+#define __msa_sll_b __builtin_msa_sll_b
+#define __msa_sll_h __builtin_msa_sll_h
+#define __msa_sll_w __builtin_msa_sll_w
+#define __msa_sll_d __builtin_msa_sll_d
+#define __msa_slli_b __builtin_msa_slli_b
+#define __msa_slli_h __builtin_msa_slli_h
+#define __msa_slli_w __builtin_msa_slli_w
+#define __msa_slli_d __builtin_msa_slli_d
+#define __msa_sra_b __builtin_msa_sra_b
+#define __msa_sra_h __builtin_msa_sra_h
+#define __msa_sra_w __builtin_msa_sra_w
+#define __msa_sra_d __builtin_msa_sra_d
+#define __msa_srai_b __builtin_msa_srai_b
+#define __msa_srai_h __builtin_msa_srai_h
+#define __msa_srai_w __builtin_msa_srai_w
+#define __msa_srai_d __builtin_msa_srai_d
+#define __msa_srar_b __builtin_msa_srar_b
+#define __msa_srar_h __builtin_msa_srar_h
+#define __msa_srar_w __builtin_msa_srar_w
+#define __msa_srar_d __builtin_msa_srar_d
+#define __msa_srari_b __builtin_msa_srari_b
+#define __msa_srari_h __builtin_msa_srari_h
+#define __msa_srari_w __builtin_msa_srari_w
+#define __msa_srari_d __builtin_msa_srari_d
+#define __msa_srl_b __builtin_msa_srl_b
+#define __msa_srl_h __builtin_msa_srl_h
+#define __msa_srl_w __builtin_msa_srl_w
+#define __msa_srl_d __builtin_msa_srl_d
+#define __msa_srli_b __builtin_msa_srli_b
+#define __msa_srli_h __builtin_msa_srli_h
+#define __msa_srli_w __builtin_msa_srli_w
+#define __msa_srli_d __builtin_msa_srli_d
+#define __msa_srlr_b __builtin_msa_srlr_b
+#define __msa_srlr_h __builtin_msa_srlr_h
+#define __msa_srlr_w __builtin_msa_srlr_w
+#define __msa_srlr_d __builtin_msa_srlr_d
+#define __msa_srlri_b __builtin_msa_srlri_b
+#define __msa_srlri_h __builtin_msa_srlri_h
+#define __msa_srlri_w __builtin_msa_srlri_w
+#define __msa_srlri_d __builtin_msa_srlri_d
+#define __msa_bclr_b __builtin_msa_bclr_b
+#define __msa_bclr_h __builtin_msa_bclr_h
+#define __msa_bclr_w __builtin_msa_bclr_w
+#define __msa_bclr_d __builtin_msa_bclr_d
+#define __msa_bclri_b __builtin_msa_bclri_b
+#define __msa_bclri_h __builtin_msa_bclri_h
+#define __msa_bclri_w __builtin_msa_bclri_w
+#define __msa_bclri_d __builtin_msa_bclri_d
+#define __msa_bset_b __builtin_msa_bset_b
+#define __msa_bset_h __builtin_msa_bset_h
+#define __msa_bset_w __builtin_msa_bset_w
+#define __msa_bset_d __builtin_msa_bset_d
+#define __msa_bseti_b __builtin_msa_bseti_b
+#define __msa_bseti_h __builtin_msa_bseti_h
+#define __msa_bseti_w __builtin_msa_bseti_w
+#define __msa_bseti_d __builtin_msa_bseti_d
+#define __msa_bneg_b __builtin_msa_bneg_b
+#define __msa_bneg_h __builtin_msa_bneg_h
+#define __msa_bneg_w __builtin_msa_bneg_w
+#define __msa_bneg_d __builtin_msa_bneg_d
+#define __msa_bnegi_b __builtin_msa_bnegi_b
+#define __msa_bnegi_h __builtin_msa_bnegi_h
+#define __msa_bnegi_w __builtin_msa_bnegi_w
+#define __msa_bnegi_d __builtin_msa_bnegi_d
+#define __msa_binsl_b __builtin_msa_binsl_b
+#define __msa_binsl_h __builtin_msa_binsl_h
+#define __msa_binsl_w __builtin_msa_binsl_w
+#define __msa_binsl_d __builtin_msa_binsl_d
+#define __msa_binsli_b __builtin_msa_binsli_b
+#define __msa_binsli_h __builtin_msa_binsli_h
+#define __msa_binsli_w __builtin_msa_binsli_w
+#define __msa_binsli_d __builtin_msa_binsli_d
+#define __msa_binsr_b __builtin_msa_binsr_b
+#define __msa_binsr_h __builtin_msa_binsr_h
+#define __msa_binsr_w __builtin_msa_binsr_w
+#define __msa_binsr_d __builtin_msa_binsr_d
+#define __msa_binsri_b __builtin_msa_binsri_b
+#define __msa_binsri_h __builtin_msa_binsri_h
+#define __msa_binsri_w __builtin_msa_binsri_w
+#define __msa_binsri_d __builtin_msa_binsri_d
+#define __msa_addv_b __builtin_msa_addv_b
+#define __msa_addv_h __builtin_msa_addv_h
+#define __msa_addv_w __builtin_msa_addv_w
+#define __msa_addv_d __builtin_msa_addv_d
+#define __msa_addvi_b __builtin_msa_addvi_b
+#define __msa_addvi_h __builtin_msa_addvi_h
+#define __msa_addvi_w __builtin_msa_addvi_w
+#define __msa_addvi_d __builtin_msa_addvi_d
+#define __msa_subv_b __builtin_msa_subv_b
+#define __msa_subv_h __builtin_msa_subv_h
+#define __msa_subv_w __builtin_msa_subv_w
+#define __msa_subv_d __builtin_msa_subv_d
+#define __msa_subvi_b __builtin_msa_subvi_b
+#define __msa_subvi_h __builtin_msa_subvi_h
+#define __msa_subvi_w __builtin_msa_subvi_w
+#define __msa_subvi_d __builtin_msa_subvi_d
+#define __msa_max_s_b __builtin_msa_max_s_b
+#define __msa_max_s_h __builtin_msa_max_s_h
+#define __msa_max_s_w __builtin_msa_max_s_w
+#define __msa_max_s_d __builtin_msa_max_s_d
+#define __msa_maxi_s_b __builtin_msa_maxi_s_b
+#define __msa_maxi_s_h __builtin_msa_maxi_s_h
+#define __msa_maxi_s_w __builtin_msa_maxi_s_w
+#define __msa_maxi_s_d __builtin_msa_maxi_s_d
+#define __msa_max_u_b __builtin_msa_max_u_b
+#define __msa_max_u_h __builtin_msa_max_u_h
+#define __msa_max_u_w __builtin_msa_max_u_w
+#define __msa_max_u_d __builtin_msa_max_u_d
+#define __msa_maxi_u_b __builtin_msa_maxi_u_b
+#define __msa_maxi_u_h __builtin_msa_maxi_u_h
+#define __msa_maxi_u_w __builtin_msa_maxi_u_w
+#define __msa_maxi_u_d __builtin_msa_maxi_u_d
+#define __msa_min_s_b __builtin_msa_min_s_b
+#define __msa_min_s_h __builtin_msa_min_s_h
+#define __msa_min_s_w __builtin_msa_min_s_w
+#define __msa_min_s_d __builtin_msa_min_s_d
+#define __msa_mini_s_b __builtin_msa_mini_s_b
+#define __msa_mini_s_h __builtin_msa_mini_s_h
+#define __msa_mini_s_w __builtin_msa_mini_s_w
+#define __msa_mini_s_d __builtin_msa_mini_s_d
+#define __msa_min_u_b __builtin_msa_min_u_b
+#define __msa_min_u_h __builtin_msa_min_u_h
+#define __msa_min_u_w __builtin_msa_min_u_w
+#define __msa_min_u_d __builtin_msa_min_u_d
+#define __msa_mini_u_b __builtin_msa_mini_u_b
+#define __msa_mini_u_h __builtin_msa_mini_u_h
+#define __msa_mini_u_w __builtin_msa_mini_u_w
+#define __msa_mini_u_d __builtin_msa_mini_u_d
+#define __msa_max_a_b __builtin_msa_max_a_b
+#define __msa_max_a_h __builtin_msa_max_a_h
+#define __msa_max_a_w __builtin_msa_max_a_w
+#define __msa_max_a_d __builtin_msa_max_a_d
+#define __msa_min_a_b __builtin_msa_min_a_b
+#define __msa_min_a_h __builtin_msa_min_a_h
+#define __msa_min_a_w __builtin_msa_min_a_w
+#define __msa_min_a_d __builtin_msa_min_a_d
+#define __msa_ceq_b __builtin_msa_ceq_b
+#define __msa_ceq_h __builtin_msa_ceq_h
+#define __msa_ceq_w __builtin_msa_ceq_w
+#define __msa_ceq_d __builtin_msa_ceq_d
+#define __msa_ceqi_b __builtin_msa_ceqi_b
+#define __msa_ceqi_h __builtin_msa_ceqi_h
+#define __msa_ceqi_w __builtin_msa_ceqi_w
+#define __msa_ceqi_d __builtin_msa_ceqi_d
+#define __msa_clt_s_b __builtin_msa_clt_s_b
+#define __msa_clt_s_h __builtin_msa_clt_s_h
+#define __msa_clt_s_w __builtin_msa_clt_s_w
+#define __msa_clt_s_d __builtin_msa_clt_s_d
+#define __msa_clti_s_b __builtin_msa_clti_s_b
+#define __msa_clti_s_h __builtin_msa_clti_s_h
+#define __msa_clti_s_w __builtin_msa_clti_s_w
+#define __msa_clti_s_d __builtin_msa_clti_s_d
+#define __msa_clt_u_b __builtin_msa_clt_u_b
+#define __msa_clt_u_h __builtin_msa_clt_u_h
+#define __msa_clt_u_w __builtin_msa_clt_u_w
+#define __msa_clt_u_d __builtin_msa_clt_u_d
+#define __msa_clti_u_b __builtin_msa_clti_u_b
+#define __msa_clti_u_h __builtin_msa_clti_u_h
+#define __msa_clti_u_w __builtin_msa_clti_u_w
+#define __msa_clti_u_d __builtin_msa_clti_u_d
+#define __msa_cle_s_b __builtin_msa_cle_s_b
+#define __msa_cle_s_h __builtin_msa_cle_s_h
+#define __msa_cle_s_w __builtin_msa_cle_s_w
+#define __msa_cle_s_d __builtin_msa_cle_s_d
+#define __msa_clei_s_b __builtin_msa_clei_s_b
+#define __msa_clei_s_h __builtin_msa_clei_s_h
+#define __msa_clei_s_w __builtin_msa_clei_s_w
+#define __msa_clei_s_d __builtin_msa_clei_s_d
+#define __msa_cle_u_b __builtin_msa_cle_u_b
+#define __msa_cle_u_h __builtin_msa_cle_u_h
+#define __msa_cle_u_w __builtin_msa_cle_u_w
+#define __msa_cle_u_d __builtin_msa_cle_u_d
+#define __msa_clei_u_b __builtin_msa_clei_u_b
+#define __msa_clei_u_h __builtin_msa_clei_u_h
+#define __msa_clei_u_w __builtin_msa_clei_u_w
+#define __msa_clei_u_d __builtin_msa_clei_u_d
+#define __msa_ld_b __builtin_msa_ld_b
+#define __msa_ld_h __builtin_msa_ld_h
+#define __msa_ld_w __builtin_msa_ld_w
+#define __msa_ld_d __builtin_msa_ld_d
+#define __msa_st_b __builtin_msa_st_b
+#define __msa_st_h __builtin_msa_st_h
+#define __msa_st_w __builtin_msa_st_w
+#define __msa_st_d __builtin_msa_st_d
+#define __msa_sat_s_b __builtin_msa_sat_s_b
+#define __msa_sat_s_h __builtin_msa_sat_s_h
+#define __msa_sat_s_w __builtin_msa_sat_s_w
+#define __msa_sat_s_d __builtin_msa_sat_s_d
+#define __msa_sat_u_b __builtin_msa_sat_u_b
+#define __msa_sat_u_h __builtin_msa_sat_u_h
+#define __msa_sat_u_w __builtin_msa_sat_u_w
+#define __msa_sat_u_d __builtin_msa_sat_u_d
+#define __msa_add_a_b __builtin_msa_add_a_b
+#define __msa_add_a_h __builtin_msa_add_a_h
+#define __msa_add_a_w __builtin_msa_add_a_w
+#define __msa_add_a_d __builtin_msa_add_a_d
+#define __msa_adds_a_b __builtin_msa_adds_a_b
+#define __msa_adds_a_h __builtin_msa_adds_a_h
+#define __msa_adds_a_w __builtin_msa_adds_a_w
+#define __msa_adds_a_d __builtin_msa_adds_a_d
+#define __msa_adds_s_b __builtin_msa_adds_s_b
+#define __msa_adds_s_h __builtin_msa_adds_s_h
+#define __msa_adds_s_w __builtin_msa_adds_s_w
+#define __msa_adds_s_d __builtin_msa_adds_s_d
+#define __msa_adds_u_b __builtin_msa_adds_u_b
+#define __msa_adds_u_h __builtin_msa_adds_u_h
+#define __msa_adds_u_w __builtin_msa_adds_u_w
+#define __msa_adds_u_d __builtin_msa_adds_u_d
+#define __msa_ave_s_b __builtin_msa_ave_s_b
+#define __msa_ave_s_h __builtin_msa_ave_s_h
+#define __msa_ave_s_w __builtin_msa_ave_s_w
+#define __msa_ave_s_d __builtin_msa_ave_s_d
+#define __msa_ave_u_b __builtin_msa_ave_u_b
+#define __msa_ave_u_h __builtin_msa_ave_u_h
+#define __msa_ave_u_w __builtin_msa_ave_u_w
+#define __msa_ave_u_d __builtin_msa_ave_u_d
+#define __msa_aver_s_b __builtin_msa_aver_s_b
+#define __msa_aver_s_h __builtin_msa_aver_s_h
+#define __msa_aver_s_w __builtin_msa_aver_s_w
+#define __msa_aver_s_d __builtin_msa_aver_s_d
+#define __msa_aver_u_b __builtin_msa_aver_u_b
+#define __msa_aver_u_h __builtin_msa_aver_u_h
+#define __msa_aver_u_w __builtin_msa_aver_u_w
+#define __msa_aver_u_d __builtin_msa_aver_u_d
+#define __msa_subs_s_b __builtin_msa_subs_s_b
+#define __msa_subs_s_h __builtin_msa_subs_s_h
+#define __msa_subs_s_w __builtin_msa_subs_s_w
+#define __msa_subs_s_d __builtin_msa_subs_s_d
+#define __msa_subs_u_b __builtin_msa_subs_u_b
+#define __msa_subs_u_h __builtin_msa_subs_u_h
+#define __msa_subs_u_w __builtin_msa_subs_u_w
+#define __msa_subs_u_d __builtin_msa_subs_u_d
+#define __msa_subsuu_s_b __builtin_msa_subsuu_s_b
+#define __msa_subsuu_s_h __builtin_msa_subsuu_s_h
+#define __msa_subsuu_s_w __builtin_msa_subsuu_s_w
+#define __msa_subsuu_s_d __builtin_msa_subsuu_s_d
+#define __msa_subsus_u_b __builtin_msa_subsus_u_b
+#define __msa_subsus_u_h __builtin_msa_subsus_u_h
+#define __msa_subsus_u_w __builtin_msa_subsus_u_w
+#define __msa_subsus_u_d __builtin_msa_subsus_u_d
+#define __msa_asub_s_b __builtin_msa_asub_s_b
+#define __msa_asub_s_h __builtin_msa_asub_s_h
+#define __msa_asub_s_w __builtin_msa_asub_s_w
+#define __msa_asub_s_d __builtin_msa_asub_s_d
+#define __msa_asub_u_b __builtin_msa_asub_u_b
+#define __msa_asub_u_h __builtin_msa_asub_u_h
+#define __msa_asub_u_w __builtin_msa_asub_u_w
+#define __msa_asub_u_d __builtin_msa_asub_u_d
+#define __msa_mulv_b __builtin_msa_mulv_b
+#define __msa_mulv_h __builtin_msa_mulv_h
+#define __msa_mulv_w __builtin_msa_mulv_w
+#define __msa_mulv_d __builtin_msa_mulv_d
+#define __msa_maddv_b __builtin_msa_maddv_b
+#define __msa_maddv_h __builtin_msa_maddv_h
+#define __msa_maddv_w __builtin_msa_maddv_w
+#define __msa_maddv_d __builtin_msa_maddv_d
+#define __msa_msubv_b __builtin_msa_msubv_b
+#define __msa_msubv_h __builtin_msa_msubv_h
+#define __msa_msubv_w __builtin_msa_msubv_w
+#define __msa_msubv_d __builtin_msa_msubv_d
+#define __msa_div_s_b __builtin_msa_div_s_b
+#define __msa_div_s_h __builtin_msa_div_s_h
+#define __msa_div_s_w __builtin_msa_div_s_w
+#define __msa_div_s_d __builtin_msa_div_s_d
+#define __msa_div_u_b __builtin_msa_div_u_b
+#define __msa_div_u_h __builtin_msa_div_u_h
+#define __msa_div_u_w __builtin_msa_div_u_w
+#define __msa_div_u_d __builtin_msa_div_u_d
+#define __msa_hadd_s_h __builtin_msa_hadd_s_h
+#define __msa_hadd_s_w __builtin_msa_hadd_s_w
+#define __msa_hadd_s_d __builtin_msa_hadd_s_d
+#define __msa_hadd_u_h __builtin_msa_hadd_u_h
+#define __msa_hadd_u_w __builtin_msa_hadd_u_w
+#define __msa_hadd_u_d __builtin_msa_hadd_u_d
+#define __msa_hsub_s_h __builtin_msa_hsub_s_h
+#define __msa_hsub_s_w __builtin_msa_hsub_s_w
+#define __msa_hsub_s_d __builtin_msa_hsub_s_d
+#define __msa_hsub_u_h __builtin_msa_hsub_u_h
+#define __msa_hsub_u_w __builtin_msa_hsub_u_w
+#define __msa_hsub_u_d __builtin_msa_hsub_u_d
+#define __msa_mod_s_b __builtin_msa_mod_s_b
+#define __msa_mod_s_h __builtin_msa_mod_s_h
+#define __msa_mod_s_w __builtin_msa_mod_s_w
+#define __msa_mod_s_d __builtin_msa_mod_s_d
+#define __msa_mod_u_b __builtin_msa_mod_u_b
+#define __msa_mod_u_h __builtin_msa_mod_u_h
+#define __msa_mod_u_w __builtin_msa_mod_u_w
+#define __msa_mod_u_d __builtin_msa_mod_u_d
+#define __msa_dotp_s_h __builtin_msa_dotp_s_h
+#define __msa_dotp_s_w __builtin_msa_dotp_s_w
+#define __msa_dotp_s_d __builtin_msa_dotp_s_d
+#define __msa_dotp_u_h __builtin_msa_dotp_u_h
+#define __msa_dotp_u_w __builtin_msa_dotp_u_w
+#define __msa_dotp_u_d __builtin_msa_dotp_u_d
+#define __msa_dpadd_s_h __builtin_msa_dpadd_s_h
+#define __msa_dpadd_s_w __builtin_msa_dpadd_s_w
+#define __msa_dpadd_s_d __builtin_msa_dpadd_s_d
+#define __msa_dpadd_u_h __builtin_msa_dpadd_u_h
+#define __msa_dpadd_u_w __builtin_msa_dpadd_u_w
+#define __msa_dpadd_u_d __builtin_msa_dpadd_u_d
+#define __msa_dpsub_s_h __builtin_msa_dpsub_s_h
+#define __msa_dpsub_s_w __builtin_msa_dpsub_s_w
+#define __msa_dpsub_s_d __builtin_msa_dpsub_s_d
+#define __msa_dpsub_u_h __builtin_msa_dpsub_u_h
+#define __msa_dpsub_u_w __builtin_msa_dpsub_u_w
+#define __msa_dpsub_u_d __builtin_msa_dpsub_u_d
+#define __msa_sld_b __builtin_msa_sld_b
+#define __msa_sld_h __builtin_msa_sld_h
+#define __msa_sld_w __builtin_msa_sld_w
+#define __msa_sld_d __builtin_msa_sld_d
+#define __msa_sldi_b __builtin_msa_sldi_b
+#define __msa_sldi_h __builtin_msa_sldi_h
+#define __msa_sldi_w __builtin_msa_sldi_w
+#define __msa_sldi_d __builtin_msa_sldi_d
+#define __msa_splat_b __builtin_msa_splat_b
+#define __msa_splat_h __builtin_msa_splat_h
+#define __msa_splat_w __builtin_msa_splat_w
+#define __msa_splat_d __builtin_msa_splat_d
+#define __msa_splati_b __builtin_msa_splati_b
+#define __msa_splati_h __builtin_msa_splati_h
+#define __msa_splati_w __builtin_msa_splati_w
+#define __msa_splati_d __builtin_msa_splati_d
+#define __msa_pckev_b __builtin_msa_pckev_b
+#define __msa_pckev_h __builtin_msa_pckev_h
+#define __msa_pckev_w __builtin_msa_pckev_w
+#define __msa_pckev_d __builtin_msa_pckev_d
+#define __msa_pckod_b __builtin_msa_pckod_b
+#define __msa_pckod_h __builtin_msa_pckod_h
+#define __msa_pckod_w __builtin_msa_pckod_w
+#define __msa_pckod_d __builtin_msa_pckod_d
+#define __msa_ilvl_b __builtin_msa_ilvl_b
+#define __msa_ilvl_h __builtin_msa_ilvl_h
+#define __msa_ilvl_w __builtin_msa_ilvl_w
+#define __msa_ilvl_d __builtin_msa_ilvl_d
+#define __msa_ilvr_b __builtin_msa_ilvr_b
+#define __msa_ilvr_h __builtin_msa_ilvr_h
+#define __msa_ilvr_w __builtin_msa_ilvr_w
+#define __msa_ilvr_d __builtin_msa_ilvr_d
+#define __msa_ilvev_b __builtin_msa_ilvev_b
+#define __msa_ilvev_h __builtin_msa_ilvev_h
+#define __msa_ilvev_w __builtin_msa_ilvev_w
+#define __msa_ilvev_d __builtin_msa_ilvev_d
+#define __msa_ilvod_b __builtin_msa_ilvod_b
+#define __msa_ilvod_h __builtin_msa_ilvod_h
+#define __msa_ilvod_w __builtin_msa_ilvod_w
+#define __msa_ilvod_d __builtin_msa_ilvod_d
+#define __msa_vshf_b __builtin_msa_vshf_b
+#define __msa_vshf_h __builtin_msa_vshf_h
+#define __msa_vshf_w __builtin_msa_vshf_w
+#define __msa_vshf_d __builtin_msa_vshf_d
+#define __msa_and_v __builtin_msa_and_v
+#define __msa_andi_b __builtin_msa_andi_b
+#define __msa_or_v __builtin_msa_or_v
+#define __msa_ori_b __builtin_msa_ori_b
+#define __msa_nor_v __builtin_msa_nor_v
+#define __msa_nori_b __builtin_msa_nori_b
+#define __msa_xor_v __builtin_msa_xor_v
+#define __msa_xori_b __builtin_msa_xori_b
+#define __msa_bmnz_v __builtin_msa_bmnz_v
+#define __msa_bmnzi_b __builtin_msa_bmnzi_b
+#define __msa_bmz_v __builtin_msa_bmz_v
+#define __msa_bmzi_b __builtin_msa_bmzi_b
+#define __msa_bsel_v __builtin_msa_bsel_v
+#define __msa_bseli_b __builtin_msa_bseli_b
+#define __msa_shf_b __builtin_msa_shf_b
+#define __msa_shf_h __builtin_msa_shf_h
+#define __msa_shf_w __builtin_msa_shf_w
+#define __msa_test_bnz_v __builtin_msa_bnz_v
+#define __msa_test_bz_v __builtin_msa_bz_v
+#define __msa_fill_b __builtin_msa_fill_b
+#define __msa_fill_h __builtin_msa_fill_h
+#define __msa_fill_w __builtin_msa_fill_w
+#define __msa_fill_d __builtin_msa_fill_d
+#define __msa_pcnt_b __builtin_msa_pcnt_b
+#define __msa_pcnt_h __builtin_msa_pcnt_h
+#define __msa_pcnt_w __builtin_msa_pcnt_w
+#define __msa_pcnt_d __builtin_msa_pcnt_d
+#define __msa_nloc_b __builtin_msa_nloc_b
+#define __msa_nloc_h __builtin_msa_nloc_h
+#define __msa_nloc_w __builtin_msa_nloc_w
+#define __msa_nloc_d __builtin_msa_nloc_d
+#define __msa_nlzc_b __builtin_msa_nlzc_b
+#define __msa_nlzc_h __builtin_msa_nlzc_h
+#define __msa_nlzc_w __builtin_msa_nlzc_w
+#define __msa_nlzc_d __builtin_msa_nlzc_d
+#define __msa_copy_s_b __builtin_msa_copy_s_b
+#define __msa_copy_s_h __builtin_msa_copy_s_h
+#define __msa_copy_s_w __builtin_msa_copy_s_w
+#define __msa_copy_s_d __builtin_msa_copy_s_d
+#define __msa_copy_u_b __builtin_msa_copy_u_b
+#define __msa_copy_u_h __builtin_msa_copy_u_h
+#define __msa_copy_u_w __builtin_msa_copy_u_w
+#define __msa_copy_u_d __builtin_msa_copy_u_d
+#define __msa_insert_b __builtin_msa_insert_b
+#define __msa_insert_h __builtin_msa_insert_h
+#define __msa_insert_w __builtin_msa_insert_w
+#define __msa_insert_d __builtin_msa_insert_d
+#define __msa_insve_b __builtin_msa_insve_b
+#define __msa_insve_h __builtin_msa_insve_h
+#define __msa_insve_w __builtin_msa_insve_w
+#define __msa_insve_d __builtin_msa_insve_d
+#define __msa_test_bnz_b __builtin_msa_bnz_b
+#define __msa_test_bnz_h __builtin_msa_bnz_h
+#define __msa_test_bnz_w __builtin_msa_bnz_w
+#define __msa_test_bnz_d __builtin_msa_bnz_d
+#define __msa_test_bz_b __builtin_msa_bz_b
+#define __msa_test_bz_h __builtin_msa_bz_h
+#define __msa_test_bz_w __builtin_msa_bz_w
+#define __msa_test_bz_d __builtin_msa_bz_d
+#define __msa_ldi_b __builtin_msa_ldi_b
+#define __msa_ldi_h __builtin_msa_ldi_h
+#define __msa_ldi_w __builtin_msa_ldi_w
+#define __msa_ldi_d __builtin_msa_ldi_d
+#define __msa_fcaf_w __builtin_msa_fcaf_w
+#define __msa_fcaf_d __builtin_msa_fcaf_d
+#define __msa_fcor_w __builtin_msa_fcor_w
+#define __msa_fcor_d __builtin_msa_fcor_d
+#define __msa_fcun_w __builtin_msa_fcun_w
+#define __msa_fcun_d __builtin_msa_fcun_d
+#define __msa_fcune_w __builtin_msa_fcune_w
+#define __msa_fcune_d __builtin_msa_fcune_d
+#define __msa_fcueq_w __builtin_msa_fcueq_w
+#define __msa_fcueq_d __builtin_msa_fcueq_d
+#define __msa_fceq_w __builtin_msa_fceq_w
+#define __msa_fceq_d __builtin_msa_fceq_d
+#define __msa_fcne_w __builtin_msa_fcne_w
+#define __msa_fcne_d __builtin_msa_fcne_d
+#define __msa_fclt_w __builtin_msa_fclt_w
+#define __msa_fclt_d __builtin_msa_fclt_d
+#define __msa_fcult_w __builtin_msa_fcult_w
+#define __msa_fcult_d __builtin_msa_fcult_d
+#define __msa_fcle_w __builtin_msa_fcle_w
+#define __msa_fcle_d __builtin_msa_fcle_d
+#define __msa_fcule_w __builtin_msa_fcule_w
+#define __msa_fcule_d __builtin_msa_fcule_d
+#define __msa_fsaf_w __builtin_msa_fsaf_w
+#define __msa_fsaf_d __builtin_msa_fsaf_d
+#define __msa_fsor_w __builtin_msa_fsor_w
+#define __msa_fsor_d __builtin_msa_fsor_d
+#define __msa_fsun_w __builtin_msa_fsun_w
+#define __msa_fsun_d __builtin_msa_fsun_d
+#define __msa_fsune_w __builtin_msa_fsune_w
+#define __msa_fsune_d __builtin_msa_fsune_d
+#define __msa_fsueq_w __builtin_msa_fsueq_w
+#define __msa_fsueq_d __builtin_msa_fsueq_d
+#define __msa_fseq_w __builtin_msa_fseq_w
+#define __msa_fseq_d __builtin_msa_fseq_d
+#define __msa_fsne_w __builtin_msa_fsne_w
+#define __msa_fsne_d __builtin_msa_fsne_d
+#define __msa_fslt_w __builtin_msa_fslt_w
+#define __msa_fslt_d __builtin_msa_fslt_d
+#define __msa_fsult_w __builtin_msa_fsult_w
+#define __msa_fsult_d __builtin_msa_fsult_d
+#define __msa_fsle_w __builtin_msa_fsle_w
+#define __msa_fsle_d __builtin_msa_fsle_d
+#define __msa_fsule_w __builtin_msa_fsule_w
+#define __msa_fsule_d __builtin_msa_fsule_d
+#define __msa_fadd_w __builtin_msa_fadd_w
+#define __msa_fadd_d __builtin_msa_fadd_d
+#define __msa_fsub_w __builtin_msa_fsub_w
+#define __msa_fsub_d __builtin_msa_fsub_d
+#define __msa_fmul_w __builtin_msa_fmul_w
+#define __msa_fmul_d __builtin_msa_fmul_d
+#define __msa_fdiv_w __builtin_msa_fdiv_w
+#define __msa_fdiv_d __builtin_msa_fdiv_d
+#define __msa_fmadd_w __builtin_msa_fmadd_w
+#define __msa_fmadd_d __builtin_msa_fmadd_d
+#define __msa_fmsub_w __builtin_msa_fmsub_w
+#define __msa_fmsub_d __builtin_msa_fmsub_d
+#define __msa_fexp2_w __builtin_msa_fexp2_w
+#define __msa_fexp2_d __builtin_msa_fexp2_d
+#define __msa_fexdo_h __builtin_msa_fexdo_h
+#define __msa_fexdo_w __builtin_msa_fexdo_w
+#define __msa_ftq_h __builtin_msa_ftq_h
+#define __msa_ftq_w __builtin_msa_ftq_w
+#define __msa_fmin_w __builtin_msa_fmin_w
+#define __msa_fmin_d __builtin_msa_fmin_d
+#define __msa_fmin_a_w __builtin_msa_fmin_a_w
+#define __msa_fmin_a_d __builtin_msa_fmin_a_d
+#define __msa_fmax_w __builtin_msa_fmax_w
+#define __msa_fmax_d __builtin_msa_fmax_d
+#define __msa_fmax_a_w __builtin_msa_fmax_a_w
+#define __msa_fmax_a_d __builtin_msa_fmax_a_d
+#define __msa_mul_q_h __builtin_msa_mul_q_h
+#define __msa_mul_q_w __builtin_msa_mul_q_w
+#define __msa_mulr_q_h __builtin_msa_mulr_q_h
+#define __msa_mulr_q_w __builtin_msa_mulr_q_w
+#define __msa_madd_q_h __builtin_msa_madd_q_h
+#define __msa_madd_q_w __builtin_msa_madd_q_w
+#define __msa_maddr_q_h __builtin_msa_maddr_q_h
+#define __msa_maddr_q_w __builtin_msa_maddr_q_w
+#define __msa_msub_q_h __builtin_msa_msub_q_h
+#define __msa_msub_q_w __builtin_msa_msub_q_w
+#define __msa_msubr_q_h __builtin_msa_msubr_q_h
+#define __msa_msubr_q_w __builtin_msa_msubr_q_w
+#define __msa_fclass_w __builtin_msa_fclass_w
+#define __msa_fclass_d __builtin_msa_fclass_d
+#define __msa_fsqrt_w __builtin_msa_fsqrt_w
+#define __msa_fsqrt_d __builtin_msa_fsqrt_d
+#define __msa_frcp_w __builtin_msa_frcp_w
+#define __msa_frcp_d __builtin_msa_frcp_d
+#define __msa_frint_w __builtin_msa_frint_w
+#define __msa_frint_d __builtin_msa_frint_d
+#define __msa_frsqrt_w __builtin_msa_frsqrt_w
+#define __msa_frsqrt_d __builtin_msa_frsqrt_d
+#define __msa_flog2_w __builtin_msa_flog2_w
+#define __msa_flog2_d __builtin_msa_flog2_d
+#define __msa_fexupl_w __builtin_msa_fexupl_w
+#define __msa_fexupl_d __builtin_msa_fexupl_d
+#define __msa_fexupr_w __builtin_msa_fexupr_w
+#define __msa_fexupr_d __builtin_msa_fexupr_d
+#define __msa_ffql_w __builtin_msa_ffql_w
+#define __msa_ffql_d __builtin_msa_ffql_d
+#define __msa_ffqr_w __builtin_msa_ffqr_w
+#define __msa_ffqr_d __builtin_msa_ffqr_d
+#define __msa_ftint_s_w __builtin_msa_ftint_s_w
+#define __msa_ftint_s_d __builtin_msa_ftint_s_d
+#define __msa_ftint_u_w __builtin_msa_ftint_u_w
+#define __msa_ftint_u_d __builtin_msa_ftint_u_d
+#define __msa_ftrunc_s_w __builtin_msa_ftrunc_s_w
+#define __msa_ftrunc_s_d __builtin_msa_ftrunc_s_d
+#define __msa_ftrunc_u_w __builtin_msa_ftrunc_u_w
+#define __msa_ftrunc_u_d __builtin_msa_ftrunc_u_d
+#define __msa_ffint_s_w __builtin_msa_ffint_s_w
+#define __msa_ffint_s_d __builtin_msa_ffint_s_d
+#define __msa_ffint_u_w __builtin_msa_ffint_u_w
+#define __msa_ffint_u_d __builtin_msa_ffint_u_d
+#define __msa_cfcmsa __builtin_msa_cfcmsa
+#define __msa_move_v __builtin_msa_move_v
+#define __msa_cast_to_vector_float __builtin_msa_cast_to_vector_float
+#define __msa_cast_to_vector_double __builtin_msa_cast_to_vector_double
+#define __msa_cast_to_scalar_float __builtin_msa_cast_to_scalar_float
+#define __msa_cast_to_scalar_double __builtin_msa_cast_to_scalar_double
+#endif /* defined(__mips_msa) */
+#endif /* _MSA_H */
diff --git a/contrib/llvm/tools/clang/lib/Headers/mwaitxintrin.h b/contrib/llvm/tools/clang/lib/Headers/mwaitxintrin.h
new file mode 100644
index 000000000000..635f2ac6cab5
--- /dev/null
+++ b/contrib/llvm/tools/clang/lib/Headers/mwaitxintrin.h
@@ -0,0 +1,47 @@
+/*===---- mwaitxintrin.h - MONITORX/MWAITX intrinsics ----------------------===
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ *
+ *===-----------------------------------------------------------------------===
+ */
+
+#ifndef __X86INTRIN_H
+#error "Never use <mwaitxintrin.h> directly; include <x86intrin.h> instead."
+#endif
+
+#ifndef _MWAITXINTRIN_H
+#define _MWAITXINTRIN_H
+
+/* Define the default attributes for the functions in this file. */
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__,  __target__("mwaitx")))
+static __inline__ void __DEFAULT_FN_ATTRS
+_mm_monitorx(void const * __p, unsigned __extensions, unsigned __hints)
+{
+  __builtin_ia32_monitorx((void *)__p, __extensions, __hints);
+}
+
+static __inline__ void __DEFAULT_FN_ATTRS
+_mm_mwaitx(unsigned __extensions, unsigned __hints, unsigned __clock)
+{
+  __builtin_ia32_mwaitx(__extensions, __hints, __clock);
+}
+
+#undef __DEFAULT_FN_ATTRS
+
+#endif /* _MWAITXINTRIN_H */
diff --git a/contrib/llvm/tools/clang/lib/Headers/opencl-c.h b/contrib/llvm/tools/clang/lib/Headers/opencl-c.h
new file mode 100644
index 000000000000..802927490e7f
--- /dev/null
+++ b/contrib/llvm/tools/clang/lib/Headers/opencl-c.h
@@ -0,0 +1,16962 @@
+//===--- opencl-c.h - OpenCL C language builtin function header -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _OPENCL_H_
+#define _OPENCL_H_
+
+#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0
+#ifndef cl_khr_depth_images
+#define cl_khr_depth_images
+#endif //cl_khr_depth_images
+#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0
+
+#define __ovld __attribute__((overloadable))
+
+// Optimizations
+#define __purefn __attribute__((pure))
+#define __cnfn __attribute__((const))
+
+// built-in scalar data types:
+
+/**
+ * An unsigned 8-bit integer.
+ */
+typedef unsigned char uchar;
+
+/**
+ * An unsigned 16-bit integer.
+ */
+typedef unsigned short ushort;
+
+/**
+ * An unsigned 32-bit integer.
+ */
+typedef unsigned int uint;
+
+/**
+ * An unsigned 64-bit integer.
+ */
+typedef unsigned long ulong;
+
+/**
+ * The unsigned integer type of the result of the sizeof operator. This
+ * is a 32-bit unsigned integer if CL_DEVICE_ADDRESS_BITS
+ * defined in table 4.3 is 32-bits and is a 64-bit unsigned integer if
+ * CL_DEVICE_ADDRESS_BITS is 64-bits.
+ */
+typedef __SIZE_TYPE__ size_t;
+
+/**
+ * A signed integer type that is the result of subtracting two pointers.
+ * This is a 32-bit signed integer if CL_DEVICE_ADDRESS_BITS
+ * defined in table 4.3 is 32-bits and is a 64-bit signed integer if
+ * CL_DEVICE_ADDRESS_BITS is 64-bits.
+ */
+typedef __PTRDIFF_TYPE__ ptrdiff_t;
+
+/**
+* A signed integer type with the property that any valid pointer to
+* void can be converted to this type, then converted back to pointer
+* to void, and the result will compare equal to the original pointer.
+*/
+typedef __INTPTR_TYPE__ intptr_t;
+
+/**
+* An unsigned integer type with the property that any valid pointer to
+* void can be converted to this type, then converted back to pointer
+* to void, and the result will compare equal to the original pointer.
+*/
+typedef __UINTPTR_TYPE__ uintptr_t;
+
+// built-in vector data types:
+typedef char char2 __attribute__((ext_vector_type(2)));
+typedef char char3 __attribute__((ext_vector_type(3)));
+typedef char char4 __attribute__((ext_vector_type(4)));
+typedef char char8 __attribute__((ext_vector_type(8)));
+typedef char char16 __attribute__((ext_vector_type(16)));
+typedef uchar uchar2 __attribute__((ext_vector_type(2)));
+typedef uchar uchar3 __attribute__((ext_vector_type(3)));
+typedef uchar uchar4 __attribute__((ext_vector_type(4)));
+typedef uchar uchar8 __attribute__((ext_vector_type(8)));
+typedef uchar uchar16 __attribute__((ext_vector_type(16)));
+typedef short short2 __attribute__((ext_vector_type(2)));
+typedef short short3 __attribute__((ext_vector_type(3)));
+typedef short short4 __attribute__((ext_vector_type(4)));
+typedef short short8 __attribute__((ext_vector_type(8)));
+typedef short short16 __attribute__((ext_vector_type(16)));
+typedef ushort ushort2 __attribute__((ext_vector_type(2)));
+typedef ushort ushort3 __attribute__((ext_vector_type(3)));
+typedef ushort ushort4 __attribute__((ext_vector_type(4)));
+typedef ushort ushort8 __attribute__((ext_vector_type(8)));
+typedef ushort ushort16 __attribute__((ext_vector_type(16)));
+typedef int int2 __attribute__((ext_vector_type(2)));
+typedef int int3 __attribute__((ext_vector_type(3)));
+typedef int int4 __attribute__((ext_vector_type(4)));
+typedef int int8 __attribute__((ext_vector_type(8)));
+typedef int int16 __attribute__((ext_vector_type(16)));
+typedef uint uint2 __attribute__((ext_vector_type(2)));
+typedef uint uint3 __attribute__((ext_vector_type(3)));
+typedef uint uint4 __attribute__((ext_vector_type(4)));
+typedef uint uint8 __attribute__((ext_vector_type(8)));
+typedef uint uint16 __attribute__((ext_vector_type(16)));
+typedef long long2 __attribute__((ext_vector_type(2)));
+typedef long long3 __attribute__((ext_vector_type(3)));
+typedef long long4 __attribute__((ext_vector_type(4)));
+typedef long long8 __attribute__((ext_vector_type(8)));
+typedef long long16 __attribute__((ext_vector_type(16)));
+typedef ulong ulong2 __attribute__((ext_vector_type(2)));
+typedef ulong ulong3 __attribute__((ext_vector_type(3)));
+typedef ulong ulong4 __attribute__((ext_vector_type(4)));
+typedef ulong ulong8 __attribute__((ext_vector_type(8)));
+typedef ulong ulong16 __attribute__((ext_vector_type(16)));
+typedef float float2 __attribute__((ext_vector_type(2)));
+typedef float float3 __attribute__((ext_vector_type(3)));
+typedef float float4 __attribute__((ext_vector_type(4)));
+typedef float float8 __attribute__((ext_vector_type(8)));
+typedef float float16 __attribute__((ext_vector_type(16)));
+#ifdef cl_khr_fp16
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+typedef half half2 __attribute__((ext_vector_type(2)));
+typedef half half3 __attribute__((ext_vector_type(3)));
+typedef half half4 __attribute__((ext_vector_type(4)));
+typedef half half8 __attribute__((ext_vector_type(8)));
+typedef half half16 __attribute__((ext_vector_type(16)));
+#endif
+#ifdef cl_khr_fp64
+#if __OPENCL_C_VERSION__ < CL_VERSION_1_2
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+#endif
+typedef double double2 __attribute__((ext_vector_type(2)));
+typedef double double3 __attribute__((ext_vector_type(3)));
+typedef double double4 __attribute__((ext_vector_type(4)));
+typedef double double8 __attribute__((ext_vector_type(8)));
+typedef double double16 __attribute__((ext_vector_type(16)));
+#endif
+
+#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0
+#define NULL ((void*)0)
+#endif
+
+/**
+ * Value of maximum non-infinite single-precision floating-point
+ * number.
+ */
+#define MAXFLOAT 0x1.fffffep127f
+
+/**
+ * A positive float constant expression. HUGE_VALF evaluates
+ * to +infinity. Used as an error value returned by the built-in
+ * math functions.
+ */
+#define HUGE_VALF (__builtin_huge_valf())
+
+/**
+ * A positive double constant expression. HUGE_VAL evaluates
+ * to +infinity. Used as an error value returned by the built-in
+ * math functions.
+ */
+#define HUGE_VAL (__builtin_huge_val())
+
+/**
+ * A constant expression of type float representing positive or
+ * unsigned infinity.
+ */
+#define INFINITY (__builtin_inff())
+
+/**
+ * A constant expression of type float representing a quiet NaN.
+ */
+#define NAN as_float(INT_MAX)
+
+#define FP_ILOGB0    INT_MIN
+#define FP_ILOGBNAN    INT_MAX
+
+#define FLT_DIG 6
+#define FLT_MANT_DIG 24
+#define FLT_MAX_10_EXP +38
+#define FLT_MAX_EXP +128
+#define FLT_MIN_10_EXP -37
+#define FLT_MIN_EXP -125
+#define FLT_RADIX 2
+#define FLT_MAX 0x1.fffffep127f
+#define FLT_MIN 0x1.0p-126f
+#define FLT_EPSILON 0x1.0p-23f
+
+#define M_E_F         2.71828182845904523536028747135266250f
+#define M_LOG2E_F     1.44269504088896340735992468100189214f
+#define M_LOG10E_F    0.434294481903251827651128918916605082f
+#define M_LN2_F       0.693147180559945309417232121458176568f
+#define M_LN10_F      2.30258509299404568401799145468436421f
+#define M_PI_F        3.14159265358979323846264338327950288f
+#define M_PI_2_F      1.57079632679489661923132169163975144f
+#define M_PI_4_F      0.785398163397448309615660845819875721f
+#define M_1_PI_F      0.318309886183790671537767526745028724f
+#define M_2_PI_F      0.636619772367581343075535053490057448f
+#define M_2_SQRTPI_F  1.12837916709551257389615890312154517f
+#define M_SQRT2_F     1.41421356237309504880168872420969808f
+#define M_SQRT1_2_F   0.707106781186547524400844362104849039f
+
+#define DBL_DIG 15
+#define DBL_MANT_DIG 53
+#define DBL_MAX_10_EXP +308
+#define DBL_MAX_EXP +1024
+#define DBL_MIN_10_EXP -307
+#define DBL_MIN_EXP -1021
+#define DBL_RADIX 2
+#define DBL_MAX 0x1.fffffffffffffp1023
+#define DBL_MIN 0x1.0p-1022
+#define DBL_EPSILON 0x1.0p-52
+
+#define M_E           0x1.5bf0a8b145769p+1
+#define M_LOG2E       0x1.71547652b82fep+0
+#define M_LOG10E      0x1.bcb7b1526e50ep-2
+#define M_LN2         0x1.62e42fefa39efp-1
+#define M_LN10        0x1.26bb1bbb55516p+1
+#define M_PI          0x1.921fb54442d18p+1
+#define M_PI_2        0x1.921fb54442d18p+0
+#define M_PI_4        0x1.921fb54442d18p-1
+#define M_1_PI        0x1.45f306dc9c883p-2
+#define M_2_PI        0x1.45f306dc9c883p-1
+#define M_2_SQRTPI    0x1.20dd750429b6dp+0
+#define M_SQRT2       0x1.6a09e667f3bcdp+0
+#define M_SQRT1_2     0x1.6a09e667f3bcdp-1
+
+#ifdef cl_khr_fp16
+
+#define HALF_DIG 3
+#define HALF_MANT_DIG 11
+#define HALF_MAX_10_EXP +4
+#define HALF_MAX_EXP +16
+#define HALF_MIN_10_EXP -4
+#define HALF_MIN_EXP -13
+#define HALF_RADIX 2
+#define HALF_MAX ((0x1.ffcp15h))
+#define HALF_MIN ((0x1.0p-14h))
+#define HALF_EPSILON ((0x1.0p-10h))
+
+#define M_E_H         2.71828182845904523536028747135266250h
+#define M_LOG2E_H     1.44269504088896340735992468100189214h
+#define M_LOG10E_H    0.434294481903251827651128918916605082h
+#define M_LN2_H       0.693147180559945309417232121458176568h
+#define M_LN10_H      2.30258509299404568401799145468436421h
+#define M_PI_H        3.14159265358979323846264338327950288h
+#define M_PI_2_H      1.57079632679489661923132169163975144h
+#define M_PI_4_H      0.785398163397448309615660845819875721h
+#define M_1_PI_H      0.318309886183790671537767526745028724h
+#define M_2_PI_H      0.636619772367581343075535053490057448h
+#define M_2_SQRTPI_H  1.12837916709551257389615890312154517h
+#define M_SQRT2_H     1.41421356237309504880168872420969808h
+#define M_SQRT1_2_H   0.707106781186547524400844362104849039h
+
+#endif //cl_khr_fp16
+
+#define CHAR_BIT    8
+#define SCHAR_MAX  127
+#define SCHAR_MIN  (-128)
+#define UCHAR_MAX  255
+#define CHAR_MAX  SCHAR_MAX
+#define CHAR_MIN  SCHAR_MIN
+#define USHRT_MAX  65535
+#define SHRT_MAX  32767
+#define SHRT_MIN  (-32768)
+#define UINT_MAX  0xffffffff
+#define INT_MAX    2147483647
+#define INT_MIN    (-2147483647-1)
+#define ULONG_MAX  0xffffffffffffffffUL
+#define LONG_MAX  0x7fffffffffffffffL
+#define LONG_MIN  (-0x7fffffffffffffffL-1)
+
+// OpenCL v1.1/1.2/2.0 s6.2.3 - Explicit conversions
+
+char __ovld __cnfn convert_char_rte(char);
+char __ovld __cnfn convert_char_sat_rte(char);
+char __ovld __cnfn convert_char_rtz(char);
+char __ovld __cnfn convert_char_sat_rtz(char);
+char __ovld __cnfn convert_char_rtp(char);
+char __ovld __cnfn convert_char_sat_rtp(char);
+char __ovld __cnfn convert_char_rtn(char);
+char __ovld __cnfn convert_char_sat_rtn(char);
+char __ovld __cnfn convert_char(char);
+char __ovld __cnfn convert_char_sat(char);
+char __ovld __cnfn convert_char_rte(uchar);
+char __ovld __cnfn convert_char_sat_rte(uchar);
+char __ovld __cnfn convert_char_rtz(uchar);
+char __ovld __cnfn convert_char_sat_rtz(uchar);
+char __ovld __cnfn convert_char_rtp(uchar);
+char __ovld __cnfn convert_char_sat_rtp(uchar);
+char __ovld __cnfn convert_char_rtn(uchar);
+char __ovld __cnfn convert_char_sat_rtn(uchar);
+char __ovld __cnfn convert_char(uchar);
+char __ovld __cnfn convert_char_sat(uchar);
+char __ovld __cnfn convert_char_rte(short);
+char __ovld __cnfn convert_char_sat_rte(short);
+char __ovld __cnfn convert_char_rtz(short);
+char __ovld __cnfn convert_char_sat_rtz(short);
+char __ovld __cnfn convert_char_rtp(short);
+char __ovld __cnfn convert_char_sat_rtp(short);
+char __ovld __cnfn convert_char_rtn(short);
+char __ovld __cnfn convert_char_sat_rtn(short);
+char __ovld __cnfn convert_char(short);
+char __ovld __cnfn convert_char_sat(short);
+char __ovld __cnfn convert_char_rte(ushort);
+char __ovld __cnfn convert_char_sat_rte(ushort);
+char __ovld __cnfn convert_char_rtz(ushort);
+char __ovld __cnfn convert_char_sat_rtz(ushort);
+char __ovld __cnfn convert_char_rtp(ushort);
+char __ovld __cnfn convert_char_sat_rtp(ushort);
+char __ovld __cnfn convert_char_rtn(ushort);
+char __ovld __cnfn convert_char_sat_rtn(ushort);
+char __ovld __cnfn convert_char(ushort);
+char __ovld __cnfn convert_char_sat(ushort);
+char __ovld __cnfn convert_char_rte(int);
+char __ovld __cnfn convert_char_sat_rte(int);
+char __ovld __cnfn convert_char_rtz(int);
+char __ovld __cnfn convert_char_sat_rtz(int);
+char __ovld __cnfn convert_char_rtp(int);
+char __ovld __cnfn convert_char_sat_rtp(int);
+char __ovld __cnfn convert_char_rtn(int);
+char __ovld __cnfn convert_char_sat_rtn(int);
+char __ovld __cnfn convert_char(int);
+char __ovld __cnfn convert_char_sat(int);
+char __ovld __cnfn convert_char_rte(uint);
+char __ovld __cnfn convert_char_sat_rte(uint);
+char __ovld __cnfn convert_char_rtz(uint);
+char __ovld __cnfn convert_char_sat_rtz(uint);
+char __ovld __cnfn convert_char_rtp(uint);
+char __ovld __cnfn convert_char_sat_rtp(uint);
+char __ovld __cnfn convert_char_rtn(uint);
+char __ovld __cnfn convert_char_sat_rtn(uint);
+char __ovld __cnfn convert_char(uint);
+char __ovld __cnfn convert_char_sat(uint);
+char __ovld __cnfn convert_char_rte(long);
+char __ovld __cnfn convert_char_sat_rte(long);
+char __ovld __cnfn convert_char_rtz(long);
+char __ovld __cnfn convert_char_sat_rtz(long);
+char __ovld __cnfn convert_char_rtp(long);
+char __ovld __cnfn convert_char_sat_rtp(long);
+char __ovld __cnfn convert_char_rtn(long);
+char __ovld __cnfn convert_char_sat_rtn(long);
+char __ovld __cnfn convert_char(long);
+char __ovld __cnfn convert_char_sat(long);
+char __ovld __cnfn convert_char_rte(ulong);
+char __ovld __cnfn convert_char_sat_rte(ulong);
+char __ovld __cnfn convert_char_rtz(ulong);
+char __ovld __cnfn convert_char_sat_rtz(ulong);
+char __ovld __cnfn convert_char_rtp(ulong);
+char __ovld __cnfn convert_char_sat_rtp(ulong);
+char __ovld __cnfn convert_char_rtn(ulong);
+char __ovld __cnfn convert_char_sat_rtn(ulong);
+char __ovld __cnfn convert_char(ulong);
+char __ovld __cnfn convert_char_sat(ulong);
+char __ovld __cnfn convert_char_rte(float);
+char __ovld __cnfn convert_char_sat_rte(float);
+char __ovld __cnfn convert_char_rtz(float);
+char __ovld __cnfn convert_char_sat_rtz(float);
+char __ovld __cnfn convert_char_rtp(float);
+char __ovld __cnfn convert_char_sat_rtp(float);
+char __ovld __cnfn convert_char_rtn(float);
+char __ovld __cnfn convert_char_sat_rtn(float);
+char __ovld __cnfn convert_char(float);
+char __ovld __cnfn convert_char_sat(float);
+uchar __ovld __cnfn convert_uchar_rte(char);
+uchar __ovld __cnfn convert_uchar_sat_rte(char);
+uchar __ovld __cnfn convert_uchar_rtz(char);
+uchar __ovld __cnfn convert_uchar_sat_rtz(char);
+uchar __ovld __cnfn convert_uchar_rtp(char);
+uchar __ovld __cnfn convert_uchar_sat_rtp(char);
+uchar __ovld __cnfn convert_uchar_rtn(char);
+uchar __ovld __cnfn convert_uchar_sat_rtn(char);
+uchar __ovld __cnfn convert_uchar(char);
+uchar __ovld __cnfn convert_uchar_sat(char);
+uchar __ovld __cnfn convert_uchar_rte(uchar);
+uchar __ovld __cnfn convert_uchar_sat_rte(uchar);
+uchar __ovld __cnfn convert_uchar_rtz(uchar);
+uchar __ovld __cnfn convert_uchar_sat_rtz(uchar);
+uchar __ovld __cnfn convert_uchar_rtp(uchar);
+uchar __ovld __cnfn convert_uchar_sat_rtp(uchar);
+uchar __ovld __cnfn convert_uchar_rtn(uchar);
+uchar __ovld __cnfn convert_uchar_sat_rtn(uchar);
+uchar __ovld __cnfn convert_uchar(uchar);
+uchar __ovld __cnfn convert_uchar_sat(uchar);
+uchar __ovld __cnfn convert_uchar_rte(short);
+uchar __ovld __cnfn convert_uchar_sat_rte(short);
+uchar __ovld __cnfn convert_uchar_rtz(short);
+uchar __ovld __cnfn convert_uchar_sat_rtz(short);
+uchar __ovld __cnfn convert_uchar_rtp(short);
+uchar __ovld __cnfn convert_uchar_sat_rtp(short);
+uchar __ovld __cnfn convert_uchar_rtn(short);
+uchar __ovld __cnfn convert_uchar_sat_rtn(short);
+uchar __ovld __cnfn convert_uchar(short);
+uchar __ovld __cnfn convert_uchar_sat(short);
+uchar __ovld __cnfn convert_uchar_rte(ushort);
+uchar __ovld __cnfn convert_uchar_sat_rte(ushort);
+uchar __ovld __cnfn convert_uchar_rtz(ushort);
+uchar __ovld __cnfn convert_uchar_sat_rtz(ushort);
+uchar __ovld __cnfn convert_uchar_rtp(ushort);
+uchar __ovld __cnfn convert_uchar_sat_rtp(ushort);
+uchar __ovld __cnfn convert_uchar_rtn(ushort);
+uchar __ovld __cnfn convert_uchar_sat_rtn(ushort);
+uchar __ovld __cnfn convert_uchar(ushort);
+uchar __ovld __cnfn convert_uchar_sat(ushort);
+uchar __ovld __cnfn convert_uchar_rte(int);
+uchar __ovld __cnfn convert_uchar_sat_rte(int);
+uchar __ovld __cnfn convert_uchar_rtz(int);
+uchar __ovld __cnfn convert_uchar_sat_rtz(int);
+uchar __ovld __cnfn convert_uchar_rtp(int);
+uchar __ovld __cnfn convert_uchar_sat_rtp(int);
+uchar __ovld __cnfn convert_uchar_rtn(int);
+uchar __ovld __cnfn convert_uchar_sat_rtn(int);
+uchar __ovld __cnfn convert_uchar(int);
+uchar __ovld __cnfn convert_uchar_sat(int);
+uchar __ovld __cnfn convert_uchar_rte(uint);
+uchar __ovld __cnfn convert_uchar_sat_rte(uint);
+uchar __ovld __cnfn convert_uchar_rtz(uint);
+uchar __ovld __cnfn convert_uchar_sat_rtz(uint);
+uchar __ovld __cnfn convert_uchar_rtp(uint);
+uchar __ovld __cnfn convert_uchar_sat_rtp(uint);
+uchar __ovld __cnfn convert_uchar_rtn(uint);
+uchar __ovld __cnfn convert_uchar_sat_rtn(uint);
+uchar __ovld __cnfn convert_uchar(uint);
+uchar __ovld __cnfn convert_uchar_sat(uint);
+uchar __ovld __cnfn convert_uchar_rte(long);
+uchar __ovld __cnfn convert_uchar_sat_rte(long);
+uchar __ovld __cnfn convert_uchar_rtz(long);
+uchar __ovld __cnfn convert_uchar_sat_rtz(long);
+uchar __ovld __cnfn convert_uchar_rtp(long);
+uchar __ovld __cnfn convert_uchar_sat_rtp(long);
+uchar __ovld __cnfn convert_uchar_rtn(long);
+uchar __ovld __cnfn convert_uchar_sat_rtn(long);
+uchar __ovld __cnfn convert_uchar(long);
+uchar __ovld __cnfn convert_uchar_sat(long);
+uchar __ovld __cnfn convert_uchar_rte(ulong);
+uchar __ovld __cnfn convert_uchar_sat_rte(ulong);
+uchar __ovld __cnfn convert_uchar_rtz(ulong);
+uchar __ovld __cnfn convert_uchar_sat_rtz(ulong);
+uchar __ovld __cnfn convert_uchar_rtp(ulong);
+uchar __ovld __cnfn convert_uchar_sat_rtp(ulong);
+uchar __ovld __cnfn convert_uchar_rtn(ulong);
+uchar __ovld __cnfn convert_uchar_sat_rtn(ulong);
+uchar __ovld __cnfn convert_uchar(ulong);
+uchar __ovld __cnfn convert_uchar_sat(ulong);
+uchar __ovld __cnfn convert_uchar_rte(float);
+uchar __ovld __cnfn convert_uchar_sat_rte(float);
+uchar __ovld __cnfn convert_uchar_rtz(float);
+uchar __ovld __cnfn convert_uchar_sat_rtz(float);
+uchar __ovld __cnfn convert_uchar_rtp(float);
+uchar __ovld __cnfn convert_uchar_sat_rtp(float);
+uchar __ovld __cnfn convert_uchar_rtn(float);
+uchar __ovld __cnfn convert_uchar_sat_rtn(float);
+uchar __ovld __cnfn convert_uchar(float);
+uchar __ovld __cnfn convert_uchar_sat(float);
+
+short __ovld __cnfn convert_short_rte(char);
+short __ovld __cnfn convert_short_sat_rte(char);
+short __ovld __cnfn convert_short_rtz(char);
+short __ovld __cnfn convert_short_sat_rtz(char);
+short __ovld __cnfn convert_short_rtp(char);
+short __ovld __cnfn convert_short_sat_rtp(char);
+short __ovld __cnfn convert_short_rtn(char);
+short __ovld __cnfn convert_short_sat_rtn(char);
+short __ovld __cnfn convert_short(char);
+short __ovld __cnfn convert_short_sat(char);
+short __ovld __cnfn convert_short_rte(uchar);
+short __ovld __cnfn convert_short_sat_rte(uchar);
+short __ovld __cnfn convert_short_rtz(uchar);
+short __ovld __cnfn convert_short_sat_rtz(uchar);
+short __ovld __cnfn convert_short_rtp(uchar);
+short __ovld __cnfn convert_short_sat_rtp(uchar);
+short __ovld __cnfn convert_short_rtn(uchar);
+short __ovld __cnfn convert_short_sat_rtn(uchar);
+short __ovld __cnfn convert_short(uchar);
+short __ovld __cnfn convert_short_sat(uchar);
+short __ovld __cnfn convert_short_rte(short);
+short __ovld __cnfn convert_short_sat_rte(short);
+short __ovld __cnfn convert_short_rtz(short);
+short __ovld __cnfn convert_short_sat_rtz(short);
+short __ovld __cnfn convert_short_rtp(short);
+short __ovld __cnfn convert_short_sat_rtp(short);
+short __ovld __cnfn convert_short_rtn(short);
+short __ovld __cnfn convert_short_sat_rtn(short);
+short __ovld __cnfn convert_short(short);
+short __ovld __cnfn convert_short_sat(short);
+short __ovld __cnfn convert_short_rte(ushort);
+short __ovld __cnfn convert_short_sat_rte(ushort);
+short __ovld __cnfn convert_short_rtz(ushort);
+short __ovld __cnfn convert_short_sat_rtz(ushort);
+short __ovld __cnfn convert_short_rtp(ushort);
+short __ovld __cnfn convert_short_sat_rtp(ushort);
+short __ovld __cnfn convert_short_rtn(ushort);
+short __ovld __cnfn convert_short_sat_rtn(ushort);
+short __ovld __cnfn convert_short(ushort);
+short __ovld __cnfn convert_short_sat(ushort);
+short __ovld __cnfn convert_short_rte(int);
+short __ovld __cnfn convert_short_sat_rte(int);
+short __ovld __cnfn convert_short_rtz(int);
+short __ovld __cnfn convert_short_sat_rtz(int);
+short __ovld __cnfn convert_short_rtp(int);
+short __ovld __cnfn convert_short_sat_rtp(int);
+short __ovld __cnfn convert_short_rtn(int);
+short __ovld __cnfn convert_short_sat_rtn(int);
+short __ovld __cnfn convert_short(int);
+short __ovld __cnfn convert_short_sat(int);
+short __ovld __cnfn convert_short_rte(uint);
+short __ovld __cnfn convert_short_sat_rte(uint);
+short __ovld __cnfn convert_short_rtz(uint);
+short __ovld __cnfn convert_short_sat_rtz(uint);
+short __ovld __cnfn convert_short_rtp(uint);
+short __ovld __cnfn convert_short_sat_rtp(uint);
+short __ovld __cnfn convert_short_rtn(uint);
+short __ovld __cnfn convert_short_sat_rtn(uint);
+short __ovld __cnfn convert_short(uint);
+short __ovld __cnfn convert_short_sat(uint);
+short __ovld __cnfn convert_short_rte(long);
+short __ovld __cnfn convert_short_sat_rte(long);
+short __ovld __cnfn convert_short_rtz(long);
+short __ovld __cnfn convert_short_sat_rtz(long);
+short __ovld __cnfn convert_short_rtp(long);
+short __ovld __cnfn convert_short_sat_rtp(long);
+short __ovld __cnfn convert_short_rtn(long);
+short __ovld __cnfn convert_short_sat_rtn(long);
+short __ovld __cnfn convert_short(long);
+short __ovld __cnfn convert_short_sat(long);
+short __ovld __cnfn convert_short_rte(ulong);
+short __ovld __cnfn convert_short_sat_rte(ulong);
+short __ovld __cnfn convert_short_rtz(ulong);
+short __ovld __cnfn convert_short_sat_rtz(ulong);
+short __ovld __cnfn convert_short_rtp(ulong);
+short __ovld __cnfn convert_short_sat_rtp(ulong);
+short __ovld __cnfn convert_short_rtn(ulong);
+short __ovld __cnfn convert_short_sat_rtn(ulong);
+short __ovld __cnfn convert_short(ulong);
+short __ovld __cnfn convert_short_sat(ulong);
+short __ovld __cnfn convert_short_rte(float);
+short __ovld __cnfn convert_short_sat_rte(float);
+short __ovld __cnfn convert_short_rtz(float);
+short __ovld __cnfn convert_short_sat_rtz(float);
+short __ovld __cnfn convert_short_rtp(float);
+short __ovld __cnfn convert_short_sat_rtp(float);
+short __ovld __cnfn convert_short_rtn(float);
+short __ovld __cnfn convert_short_sat_rtn(float);
+short __ovld __cnfn convert_short(float);
+short __ovld __cnfn convert_short_sat(float);
+ushort __ovld __cnfn convert_ushort_rte(char);
+ushort __ovld __cnfn convert_ushort_sat_rte(char);
+ushort __ovld __cnfn convert_ushort_rtz(char);
+ushort __ovld __cnfn convert_ushort_sat_rtz(char);
+ushort __ovld __cnfn convert_ushort_rtp(char);
+ushort __ovld __cnfn convert_ushort_sat_rtp(char);
+ushort __ovld __cnfn convert_ushort_rtn(char);
+ushort __ovld __cnfn convert_ushort_sat_rtn(char);
+ushort __ovld __cnfn convert_ushort(char);
+ushort __ovld __cnfn convert_ushort_sat(char);
+ushort __ovld __cnfn convert_ushort_rte(uchar);
+ushort __ovld __cnfn convert_ushort_sat_rte(uchar);
+ushort __ovld __cnfn convert_ushort_rtz(uchar);
+ushort __ovld __cnfn convert_ushort_sat_rtz(uchar);
+ushort __ovld __cnfn convert_ushort_rtp(uchar);
+ushort __ovld __cnfn convert_ushort_sat_rtp(uchar);
+ushort __ovld __cnfn convert_ushort_rtn(uchar);
+ushort __ovld __cnfn convert_ushort_sat_rtn(uchar);
+ushort __ovld __cnfn convert_ushort(uchar);
+ushort __ovld __cnfn convert_ushort_sat(uchar);
+ushort __ovld __cnfn convert_ushort_rte(short);
+ushort __ovld __cnfn convert_ushort_sat_rte(short);
+ushort __ovld __cnfn convert_ushort_rtz(short);
+ushort __ovld __cnfn convert_ushort_sat_rtz(short);
+ushort __ovld __cnfn convert_ushort_rtp(short);
+ushort __ovld __cnfn convert_ushort_sat_rtp(short);
+ushort __ovld __cnfn convert_ushort_rtn(short);
+ushort __ovld __cnfn convert_ushort_sat_rtn(short);
+ushort __ovld __cnfn convert_ushort(short);
+ushort __ovld __cnfn convert_ushort_sat(short);
+ushort __ovld __cnfn convert_ushort_rte(ushort);
+ushort __ovld __cnfn convert_ushort_sat_rte(ushort);
+ushort __ovld __cnfn convert_ushort_rtz(ushort);
+ushort __ovld __cnfn convert_ushort_sat_rtz(ushort);
+ushort __ovld __cnfn convert_ushort_rtp(ushort);
+ushort __ovld __cnfn convert_ushort_sat_rtp(ushort);
+ushort __ovld __cnfn convert_ushort_rtn(ushort);
+ushort __ovld __cnfn convert_ushort_sat_rtn(ushort);
+ushort __ovld __cnfn convert_ushort(ushort);
+ushort __ovld __cnfn convert_ushort_sat(ushort);
+ushort __ovld __cnfn convert_ushort_rte(int);
+ushort __ovld __cnfn convert_ushort_sat_rte(int);
+ushort __ovld __cnfn convert_ushort_rtz(int);
+ushort __ovld __cnfn convert_ushort_sat_rtz(int);
+ushort __ovld __cnfn convert_ushort_rtp(int);
+ushort __ovld __cnfn convert_ushort_sat_rtp(int);
+ushort __ovld __cnfn convert_ushort_rtn(int);
+ushort __ovld __cnfn convert_ushort_sat_rtn(int);
+ushort __ovld __cnfn convert_ushort(int);
+ushort __ovld __cnfn convert_ushort_sat(int);
+ushort __ovld __cnfn convert_ushort_rte(uint);
+ushort __ovld __cnfn convert_ushort_sat_rte(uint);
+ushort __ovld __cnfn convert_ushort_rtz(uint);
+ushort __ovld __cnfn convert_ushort_sat_rtz(uint);
+ushort __ovld __cnfn convert_ushort_rtp(uint);
+ushort __ovld __cnfn convert_ushort_sat_rtp(uint);
+ushort __ovld __cnfn convert_ushort_rtn(uint);
+ushort __ovld __cnfn convert_ushort_sat_rtn(uint);
+ushort __ovld __cnfn convert_ushort(uint);
+ushort __ovld __cnfn convert_ushort_sat(uint);
+ushort __ovld __cnfn convert_ushort_rte(long);
+ushort __ovld __cnfn convert_ushort_sat_rte(long);
+ushort __ovld __cnfn convert_ushort_rtz(long);
+ushort __ovld __cnfn convert_ushort_sat_rtz(long);
+ushort __ovld __cnfn convert_ushort_rtp(long);
+ushort __ovld __cnfn convert_ushort_sat_rtp(long);
+ushort __ovld __cnfn convert_ushort_rtn(long);
+ushort __ovld __cnfn convert_ushort_sat_rtn(long);
+ushort __ovld __cnfn convert_ushort(long);
+ushort __ovld __cnfn convert_ushort_sat(long);
+ushort __ovld __cnfn convert_ushort_rte(ulong);
+ushort __ovld __cnfn convert_ushort_sat_rte(ulong);
+ushort __ovld __cnfn convert_ushort_rtz(ulong);
+ushort __ovld __cnfn convert_ushort_sat_rtz(ulong);
+ushort __ovld __cnfn convert_ushort_rtp(ulong);
+ushort __ovld __cnfn convert_ushort_sat_rtp(ulong);
+ushort __ovld __cnfn convert_ushort_rtn(ulong);
+ushort __ovld __cnfn convert_ushort_sat_rtn(ulong);
+ushort __ovld __cnfn convert_ushort(ulong);
+ushort __ovld __cnfn convert_ushort_sat(ulong);
+ushort __ovld __cnfn convert_ushort_rte(float);
+ushort __ovld __cnfn convert_ushort_sat_rte(float);
+ushort __ovld __cnfn convert_ushort_rtz(float);
+ushort __ovld __cnfn convert_ushort_sat_rtz(float);
+ushort __ovld __cnfn convert_ushort_rtp(float);
+ushort __ovld __cnfn convert_ushort_sat_rtp(float);
+ushort __ovld __cnfn convert_ushort_rtn(float);
+ushort __ovld __cnfn convert_ushort_sat_rtn(float);
+ushort __ovld __cnfn convert_ushort(float);
+ushort __ovld __cnfn convert_ushort_sat(float);
+int __ovld __cnfn convert_int_rte(char);
+int __ovld __cnfn convert_int_sat_rte(char);
+int __ovld __cnfn convert_int_rtz(char);
+int __ovld __cnfn convert_int_sat_rtz(char);
+int __ovld __cnfn convert_int_rtp(char);
+int __ovld __cnfn convert_int_sat_rtp(char);
+int __ovld __cnfn convert_int_rtn(char);
+int __ovld __cnfn convert_int_sat_rtn(char);
+int __ovld __cnfn convert_int(char);
+int __ovld __cnfn convert_int_sat(char);
+int __ovld __cnfn convert_int_rte(uchar);
+int __ovld __cnfn convert_int_sat_rte(uchar);
+int __ovld __cnfn convert_int_rtz(uchar);
+int __ovld __cnfn convert_int_sat_rtz(uchar);
+int __ovld __cnfn convert_int_rtp(uchar);
+int __ovld __cnfn convert_int_sat_rtp(uchar);
+int __ovld __cnfn convert_int_rtn(uchar);
+int __ovld __cnfn convert_int_sat_rtn(uchar);
+int __ovld __cnfn convert_int(uchar);
+int __ovld __cnfn convert_int_sat(uchar);
+int __ovld __cnfn convert_int_rte(short);
+int __ovld __cnfn convert_int_sat_rte(short);
+int __ovld __cnfn convert_int_rtz(short);
+int __ovld __cnfn convert_int_sat_rtz(short);
+int __ovld __cnfn convert_int_rtp(short);
+int __ovld __cnfn convert_int_sat_rtp(short);
+int __ovld __cnfn convert_int_rtn(short);
+int __ovld __cnfn convert_int_sat_rtn(short);
+int __ovld __cnfn convert_int(short);
+int __ovld __cnfn convert_int_sat(short);
+int __ovld __cnfn convert_int_rte(ushort);
+int __ovld __cnfn convert_int_sat_rte(ushort);
+int __ovld __cnfn convert_int_rtz(ushort);
+int __ovld __cnfn convert_int_sat_rtz(ushort);
+int __ovld __cnfn convert_int_rtp(ushort);
+int __ovld __cnfn convert_int_sat_rtp(ushort);
+int __ovld __cnfn convert_int_rtn(ushort);
+int __ovld __cnfn convert_int_sat_rtn(ushort);
+int __ovld __cnfn convert_int(ushort);
+int __ovld __cnfn convert_int_sat(ushort);
+int __ovld __cnfn convert_int_rte(int);
+int __ovld __cnfn convert_int_sat_rte(int);
+int __ovld __cnfn convert_int_rtz(int);
+int __ovld __cnfn convert_int_sat_rtz(int);
+int __ovld __cnfn convert_int_rtp(int);
+int __ovld __cnfn convert_int_sat_rtp(int);
+int __ovld __cnfn convert_int_rtn(int);
+int __ovld __cnfn convert_int_sat_rtn(int);
+int __ovld __cnfn convert_int(int);
+int __ovld __cnfn convert_int_sat(int);
+int __ovld __cnfn convert_int_rte(uint);
+int __ovld __cnfn convert_int_sat_rte(uint);
+int __ovld __cnfn convert_int_rtz(uint);
+int __ovld __cnfn convert_int_sat_rtz(uint);
+int __ovld __cnfn convert_int_rtp(uint);
+int __ovld __cnfn convert_int_sat_rtp(uint);
+int __ovld __cnfn convert_int_rtn(uint);
+int __ovld __cnfn convert_int_sat_rtn(uint);
+int __ovld __cnfn convert_int(uint);
+int __ovld __cnfn convert_int_sat(uint);
+int __ovld __cnfn convert_int_rte(long);
+int __ovld __cnfn convert_int_sat_rte(long);
+int __ovld __cnfn convert_int_rtz(long);
+int __ovld __cnfn convert_int_sat_rtz(long);
+int __ovld __cnfn convert_int_rtp(long);
+int __ovld __cnfn convert_int_sat_rtp(long);
+int __ovld __cnfn convert_int_rtn(long);
+int __ovld __cnfn convert_int_sat_rtn(long);
+int __ovld __cnfn convert_int(long);
+int __ovld __cnfn convert_int_sat(long);
+int __ovld __cnfn convert_int_rte(ulong);
+int __ovld __cnfn convert_int_sat_rte(ulong);
+int __ovld __cnfn convert_int_rtz(ulong);
+int __ovld __cnfn convert_int_sat_rtz(ulong);
+int __ovld __cnfn convert_int_rtp(ulong);
+int __ovld __cnfn convert_int_sat_rtp(ulong);
+int __ovld __cnfn convert_int_rtn(ulong);
+int __ovld __cnfn convert_int_sat_rtn(ulong);
+int __ovld __cnfn convert_int(ulong);
+int __ovld __cnfn convert_int_sat(ulong);
+int __ovld __cnfn convert_int_rte(float);
+int __ovld __cnfn convert_int_sat_rte(float);
+int __ovld __cnfn convert_int_rtz(float);
+int __ovld __cnfn convert_int_sat_rtz(float);
+int __ovld __cnfn convert_int_rtp(float);
+int __ovld __cnfn convert_int_sat_rtp(float);
+int __ovld __cnfn convert_int_rtn(float);
+int __ovld __cnfn convert_int_sat_rtn(float);
+int __ovld __cnfn convert_int(float);
+int __ovld __cnfn convert_int_sat(float);
+uint __ovld __cnfn convert_uint_rte(char);
+uint __ovld __cnfn convert_uint_sat_rte(char);
+uint __ovld __cnfn convert_uint_rtz(char);
+uint __ovld __cnfn convert_uint_sat_rtz(char);
+uint __ovld __cnfn convert_uint_rtp(char);
+uint __ovld __cnfn convert_uint_sat_rtp(char);
+uint __ovld __cnfn convert_uint_rtn(char);
+uint __ovld __cnfn convert_uint_sat_rtn(char);
+uint __ovld __cnfn convert_uint(char);
+uint __ovld __cnfn convert_uint_sat(char);
+uint __ovld __cnfn convert_uint_rte(uchar);
+uint __ovld __cnfn convert_uint_sat_rte(uchar);
+uint __ovld __cnfn convert_uint_rtz(uchar);
+uint __ovld __cnfn convert_uint_sat_rtz(uchar);
+uint __ovld __cnfn convert_uint_rtp(uchar);
+uint __ovld __cnfn convert_uint_sat_rtp(uchar);
+uint __ovld __cnfn convert_uint_rtn(uchar);
+uint __ovld __cnfn convert_uint_sat_rtn(uchar);
+uint __ovld __cnfn convert_uint(uchar);
+uint __ovld __cnfn convert_uint_sat(uchar);
+uint __ovld __cnfn convert_uint_rte(short);
+uint __ovld __cnfn convert_uint_sat_rte(short);
+uint __ovld __cnfn convert_uint_rtz(short);
+uint __ovld __cnfn convert_uint_sat_rtz(short);
+uint __ovld __cnfn convert_uint_rtp(short);
+uint __ovld __cnfn convert_uint_sat_rtp(short);
+uint __ovld __cnfn convert_uint_rtn(short);
+uint __ovld __cnfn convert_uint_sat_rtn(short);
+uint __ovld __cnfn convert_uint(short);
+uint __ovld __cnfn convert_uint_sat(short);
+uint __ovld __cnfn convert_uint_rte(ushort);
+uint __ovld __cnfn convert_uint_sat_rte(ushort);
+uint __ovld __cnfn convert_uint_rtz(ushort);
+uint __ovld __cnfn convert_uint_sat_rtz(ushort);
+uint __ovld __cnfn convert_uint_rtp(ushort);
+uint __ovld __cnfn convert_uint_sat_rtp(ushort);
+uint __ovld __cnfn convert_uint_rtn(ushort);
+uint __ovld __cnfn convert_uint_sat_rtn(ushort);
+uint __ovld __cnfn convert_uint(ushort);
+uint __ovld __cnfn convert_uint_sat(ushort);
+uint __ovld __cnfn convert_uint_rte(int);
+uint __ovld __cnfn convert_uint_sat_rte(int);
+uint __ovld __cnfn convert_uint_rtz(int);
+uint __ovld __cnfn convert_uint_sat_rtz(int);
+uint __ovld __cnfn convert_uint_rtp(int);
+uint __ovld __cnfn convert_uint_sat_rtp(int);
+uint __ovld __cnfn convert_uint_rtn(int);
+uint __ovld __cnfn convert_uint_sat_rtn(int);
+uint __ovld __cnfn convert_uint(int);
+uint __ovld __cnfn convert_uint_sat(int);
+uint __ovld __cnfn convert_uint_rte(uint);
+uint __ovld __cnfn convert_uint_sat_rte(uint);
+uint __ovld __cnfn convert_uint_rtz(uint);
+uint __ovld __cnfn convert_uint_sat_rtz(uint);
+uint __ovld __cnfn convert_uint_rtp(uint);
+uint __ovld __cnfn convert_uint_sat_rtp(uint);
+uint __ovld __cnfn convert_uint_rtn(uint);
+uint __ovld __cnfn convert_uint_sat_rtn(uint);
+uint __ovld __cnfn convert_uint(uint);
+uint __ovld __cnfn convert_uint_sat(uint);
+uint __ovld __cnfn convert_uint_rte(long);
+uint __ovld __cnfn convert_uint_sat_rte(long);
+uint __ovld __cnfn convert_uint_rtz(long);
+uint __ovld __cnfn convert_uint_sat_rtz(long);
+uint __ovld __cnfn convert_uint_rtp(long);
+uint __ovld __cnfn convert_uint_sat_rtp(long);
+uint __ovld __cnfn convert_uint_rtn(long);
+uint __ovld __cnfn convert_uint_sat_rtn(long);
+uint __ovld __cnfn convert_uint(long);
+uint __ovld __cnfn convert_uint_sat(long);
+uint __ovld __cnfn convert_uint_rte(ulong);
+uint __ovld __cnfn convert_uint_sat_rte(ulong);
+uint __ovld __cnfn convert_uint_rtz(ulong);
+uint __ovld __cnfn convert_uint_sat_rtz(ulong);
+uint __ovld __cnfn convert_uint_rtp(ulong);
+uint __ovld __cnfn convert_uint_sat_rtp(ulong);
+uint __ovld __cnfn convert_uint_rtn(ulong);
+uint __ovld __cnfn convert_uint_sat_rtn(ulong);
+uint __ovld __cnfn convert_uint(ulong);
+uint __ovld __cnfn convert_uint_sat(ulong);
+uint __ovld __cnfn convert_uint_rte(float);
+uint __ovld __cnfn convert_uint_sat_rte(float);
+uint __ovld __cnfn convert_uint_rtz(float);
+uint __ovld __cnfn convert_uint_sat_rtz(float);
+uint __ovld __cnfn convert_uint_rtp(float);
+uint __ovld __cnfn convert_uint_sat_rtp(float);
+uint __ovld __cnfn convert_uint_rtn(float);
+uint __ovld __cnfn convert_uint_sat_rtn(float);
+uint __ovld __cnfn convert_uint(float);
+uint __ovld __cnfn convert_uint_sat(float);
+long __ovld __cnfn convert_long_rte(char);
+long __ovld __cnfn convert_long_sat_rte(char);
+long __ovld __cnfn convert_long_rtz(char);
+long __ovld __cnfn convert_long_sat_rtz(char);
+long __ovld __cnfn convert_long_rtp(char);
+long __ovld __cnfn convert_long_sat_rtp(char);
+long __ovld __cnfn convert_long_rtn(char);
+long __ovld __cnfn convert_long_sat_rtn(char);
+long __ovld __cnfn convert_long(char);
+long __ovld __cnfn convert_long_sat(char);
+long __ovld __cnfn convert_long_rte(uchar);
+long __ovld __cnfn convert_long_sat_rte(uchar);
+long __ovld __cnfn convert_long_rtz(uchar);
+long __ovld __cnfn convert_long_sat_rtz(uchar);
+long __ovld __cnfn convert_long_rtp(uchar);
+long __ovld __cnfn convert_long_sat_rtp(uchar);
+long __ovld __cnfn convert_long_rtn(uchar);
+long __ovld __cnfn convert_long_sat_rtn(uchar);
+long __ovld __cnfn convert_long(uchar);
+long __ovld __cnfn convert_long_sat(uchar);
+long __ovld __cnfn convert_long_rte(short);
+long __ovld __cnfn convert_long_sat_rte(short);
+long __ovld __cnfn convert_long_rtz(short);
+long __ovld __cnfn convert_long_sat_rtz(short);
+long __ovld __cnfn convert_long_rtp(short);
+long __ovld __cnfn convert_long_sat_rtp(short);
+long __ovld __cnfn convert_long_rtn(short);
+long __ovld __cnfn convert_long_sat_rtn(short);
+long __ovld __cnfn convert_long(short);
+long __ovld __cnfn convert_long_sat(short);
+long __ovld __cnfn convert_long_rte(ushort);
+long __ovld __cnfn convert_long_sat_rte(ushort);
+long __ovld __cnfn convert_long_rtz(ushort);
+long __ovld __cnfn convert_long_sat_rtz(ushort);
+long __ovld __cnfn convert_long_rtp(ushort);
+long __ovld __cnfn convert_long_sat_rtp(ushort);
+long __ovld __cnfn convert_long_rtn(ushort);
+long __ovld __cnfn convert_long_sat_rtn(ushort);
+long __ovld __cnfn convert_long(ushort);
+long __ovld __cnfn convert_long_sat(ushort);
+long __ovld __cnfn convert_long_rte(int);
+long __ovld __cnfn convert_long_sat_rte(int);
+long __ovld __cnfn convert_long_rtz(int);
+long __ovld __cnfn convert_long_sat_rtz(int);
+long __ovld __cnfn convert_long_rtp(int);
+long __ovld __cnfn convert_long_sat_rtp(int);
+long __ovld __cnfn convert_long_rtn(int);
+long __ovld __cnfn convert_long_sat_rtn(int);
+long __ovld __cnfn convert_long(int);
+long __ovld __cnfn convert_long_sat(int);
+long __ovld __cnfn convert_long_rte(uint);
+long __ovld __cnfn convert_long_sat_rte(uint);
+long __ovld __cnfn convert_long_rtz(uint);
+long __ovld __cnfn convert_long_sat_rtz(uint);
+long __ovld __cnfn convert_long_rtp(uint);
+long __ovld __cnfn convert_long_sat_rtp(uint);
+long __ovld __cnfn convert_long_rtn(uint);
+long __ovld __cnfn convert_long_sat_rtn(uint);
+long __ovld __cnfn convert_long(uint);
+long __ovld __cnfn convert_long_sat(uint);
+long __ovld __cnfn convert_long_rte(long);
+long __ovld __cnfn convert_long_sat_rte(long);
+long __ovld __cnfn convert_long_rtz(long);
+long __ovld __cnfn convert_long_sat_rtz(long);
+long __ovld __cnfn convert_long_rtp(long);
+long __ovld __cnfn convert_long_sat_rtp(long);
+long __ovld __cnfn convert_long_rtn(long);
+long __ovld __cnfn convert_long_sat_rtn(long);
+long __ovld __cnfn convert_long(long);
+long __ovld __cnfn convert_long_sat(long);
+long __ovld __cnfn convert_long_rte(ulong);
+long __ovld __cnfn convert_long_sat_rte(ulong);
+long __ovld __cnfn convert_long_rtz(ulong);
+long __ovld __cnfn convert_long_sat_rtz(ulong);
+long __ovld __cnfn convert_long_rtp(ulong);
+long __ovld __cnfn convert_long_sat_rtp(ulong);
+long __ovld __cnfn convert_long_rtn(ulong);
+long __ovld __cnfn convert_long_sat_rtn(ulong);
+long __ovld __cnfn convert_long(ulong);
+long __ovld __cnfn convert_long_sat(ulong);
+long __ovld __cnfn convert_long_rte(float);
+long __ovld __cnfn convert_long_sat_rte(float);
+long __ovld __cnfn convert_long_rtz(float);
+long __ovld __cnfn convert_long_sat_rtz(float);
+long __ovld __cnfn convert_long_rtp(float);
+long __ovld __cnfn convert_long_sat_rtp(float);
+long __ovld __cnfn convert_long_rtn(float);
+long __ovld __cnfn convert_long_sat_rtn(float);
+long __ovld __cnfn convert_long(float);
+long __ovld __cnfn convert_long_sat(float);
+ulong __ovld __cnfn convert_ulong_rte(char);
+ulong __ovld __cnfn convert_ulong_sat_rte(char);
+ulong __ovld __cnfn convert_ulong_rtz(char);
+ulong __ovld __cnfn convert_ulong_sat_rtz(char);
+ulong __ovld __cnfn convert_ulong_rtp(char);
+ulong __ovld __cnfn convert_ulong_sat_rtp(char);
+ulong __ovld __cnfn convert_ulong_rtn(char);
+ulong __ovld __cnfn convert_ulong_sat_rtn(char);
+ulong __ovld __cnfn convert_ulong(char);
+ulong __ovld __cnfn convert_ulong_sat(char);
+ulong __ovld __cnfn convert_ulong_rte(uchar);
+ulong __ovld __cnfn convert_ulong_sat_rte(uchar);
+ulong __ovld __cnfn convert_ulong_rtz(uchar);
+ulong __ovld __cnfn convert_ulong_sat_rtz(uchar);
+ulong __ovld __cnfn convert_ulong_rtp(uchar);
+ulong __ovld __cnfn convert_ulong_sat_rtp(uchar);
+ulong __ovld __cnfn convert_ulong_rtn(uchar);
+ulong __ovld __cnfn convert_ulong_sat_rtn(uchar);
+ulong __ovld __cnfn convert_ulong(uchar);
+ulong __ovld __cnfn convert_ulong_sat(uchar);
+ulong __ovld __cnfn convert_ulong_rte(short);
+ulong __ovld __cnfn convert_ulong_sat_rte(short);
+ulong __ovld __cnfn convert_ulong_rtz(short);
+ulong __ovld __cnfn convert_ulong_sat_rtz(short);
+ulong __ovld __cnfn convert_ulong_rtp(short);
+ulong __ovld __cnfn convert_ulong_sat_rtp(short);
+ulong __ovld __cnfn convert_ulong_rtn(short);
+ulong __ovld __cnfn convert_ulong_sat_rtn(short);
+ulong __ovld __cnfn convert_ulong(short);
+ulong __ovld __cnfn convert_ulong_sat(short);
+ulong __ovld __cnfn convert_ulong_rte(ushort);
+ulong __ovld __cnfn convert_ulong_sat_rte(ushort);
+ulong __ovld __cnfn convert_ulong_rtz(ushort);
+ulong __ovld __cnfn convert_ulong_sat_rtz(ushort);
+ulong __ovld __cnfn convert_ulong_rtp(ushort);
+ulong __ovld __cnfn convert_ulong_sat_rtp(ushort);
+ulong __ovld __cnfn convert_ulong_rtn(ushort);
+ulong __ovld __cnfn convert_ulong_sat_rtn(ushort);
+ulong __ovld __cnfn convert_ulong(ushort);
+ulong __ovld __cnfn convert_ulong_sat(ushort);
+ulong __ovld __cnfn convert_ulong_rte(int);
+ulong __ovld __cnfn convert_ulong_sat_rte(int);
+ulong __ovld __cnfn convert_ulong_rtz(int);
+ulong __ovld __cnfn convert_ulong_sat_rtz(int);
+ulong __ovld __cnfn convert_ulong_rtp(int);
+ulong __ovld __cnfn convert_ulong_sat_rtp(int);
+ulong __ovld __cnfn convert_ulong_rtn(int);
+ulong __ovld __cnfn convert_ulong_sat_rtn(int);
+ulong __ovld __cnfn convert_ulong(int);
+ulong __ovld __cnfn convert_ulong_sat(int);
+ulong __ovld __cnfn convert_ulong_rte(uint);
+ulong __ovld __cnfn convert_ulong_sat_rte(uint);
+ulong __ovld __cnfn convert_ulong_rtz(uint);
+ulong __ovld __cnfn convert_ulong_sat_rtz(uint);
+ulong __ovld __cnfn convert_ulong_rtp(uint);
+ulong __ovld __cnfn convert_ulong_sat_rtp(uint);
+ulong __ovld __cnfn convert_ulong_rtn(uint);
+ulong __ovld __cnfn convert_ulong_sat_rtn(uint);
+ulong __ovld __cnfn convert_ulong(uint);
+ulong __ovld __cnfn convert_ulong_sat(uint);
+ulong __ovld __cnfn convert_ulong_rte(long);
+ulong __ovld __cnfn convert_ulong_sat_rte(long);
+ulong __ovld __cnfn convert_ulong_rtz(long);
+ulong __ovld __cnfn convert_ulong_sat_rtz(long);
+ulong __ovld __cnfn convert_ulong_rtp(long);
+ulong __ovld __cnfn convert_ulong_sat_rtp(long);
+ulong __ovld __cnfn convert_ulong_rtn(long);
+ulong __ovld __cnfn convert_ulong_sat_rtn(long);
+ulong __ovld __cnfn convert_ulong(long);
+ulong __ovld __cnfn convert_ulong_sat(long);
+ulong __ovld __cnfn convert_ulong_rte(ulong);
+ulong __ovld __cnfn convert_ulong_sat_rte(ulong);
+ulong __ovld __cnfn convert_ulong_rtz(ulong);
+ulong __ovld __cnfn convert_ulong_sat_rtz(ulong);
+ulong __ovld __cnfn convert_ulong_rtp(ulong);
+ulong __ovld __cnfn convert_ulong_sat_rtp(ulong);
+ulong __ovld __cnfn convert_ulong_rtn(ulong);
+ulong __ovld __cnfn convert_ulong_sat_rtn(ulong);
+ulong __ovld __cnfn convert_ulong(ulong);
+ulong __ovld __cnfn convert_ulong_sat(ulong);
+ulong __ovld __cnfn convert_ulong_rte(float);
+ulong __ovld __cnfn convert_ulong_sat_rte(float);
+ulong __ovld __cnfn convert_ulong_rtz(float);
+ulong __ovld __cnfn convert_ulong_sat_rtz(float);
+ulong __ovld __cnfn convert_ulong_rtp(float);
+ulong __ovld __cnfn convert_ulong_sat_rtp(float);
+ulong __ovld __cnfn convert_ulong_rtn(float);
+ulong __ovld __cnfn convert_ulong_sat_rtn(float);
+ulong __ovld __cnfn convert_ulong(float);
+ulong __ovld __cnfn convert_ulong_sat(float);
+float __ovld __cnfn convert_float_rte(char);
+float __ovld __cnfn convert_float_rtz(char);
+float __ovld __cnfn convert_float_rtp(char);
+float __ovld __cnfn convert_float_rtn(char);
+float __ovld __cnfn convert_float(char);
+float __ovld __cnfn convert_float_rte(uchar);
+float __ovld __cnfn convert_float_rtz(uchar);
+float __ovld __cnfn convert_float_rtp(uchar);
+float __ovld __cnfn convert_float_rtn(uchar);
+float __ovld __cnfn convert_float(uchar);
+float __ovld __cnfn convert_float_rte(short);
+float __ovld __cnfn convert_float_rtz(short);
+float __ovld __cnfn convert_float_rtp(short);
+float __ovld __cnfn convert_float_rtn(short);
+float __ovld __cnfn convert_float(short);
+float __ovld __cnfn convert_float_rte(ushort);
+float __ovld __cnfn convert_float_rtz(ushort);
+float __ovld __cnfn convert_float_rtp(ushort);
+float __ovld __cnfn convert_float_rtn(ushort);
+float __ovld __cnfn convert_float(ushort);
+float __ovld __cnfn convert_float_rte(int);
+float __ovld __cnfn convert_float_rtz(int);
+float __ovld __cnfn convert_float_rtp(int);
+float __ovld __cnfn convert_float_rtn(int);
+float __ovld __cnfn convert_float(int);
+float __ovld __cnfn convert_float_rte(uint);
+float __ovld __cnfn convert_float_rtz(uint);
+float __ovld __cnfn convert_float_rtp(uint);
+float __ovld __cnfn convert_float_rtn(uint);
+float __ovld __cnfn convert_float(uint);
+float __ovld __cnfn convert_float_rte(long);
+float __ovld __cnfn convert_float_rtz(long);
+float __ovld __cnfn convert_float_rtp(long);
+float __ovld __cnfn convert_float_rtn(long);
+float __ovld __cnfn convert_float(long);
+float __ovld __cnfn convert_float_rte(ulong);
+float __ovld __cnfn convert_float_rtz(ulong);
+float __ovld __cnfn convert_float_rtp(ulong);
+float __ovld __cnfn convert_float_rtn(ulong);
+float __ovld __cnfn convert_float(ulong);
+float __ovld __cnfn convert_float_rte(float);
+float __ovld __cnfn convert_float_rtz(float);
+float __ovld __cnfn convert_float_rtp(float);
+float __ovld __cnfn convert_float_rtn(float);
+float __ovld __cnfn convert_float(float);
+char2 __ovld __cnfn convert_char2_rte(char2);
+char2 __ovld __cnfn convert_char2_sat_rte(char2);
+char2 __ovld __cnfn convert_char2_rtz(char2);
+char2 __ovld __cnfn convert_char2_sat_rtz(char2);
+char2 __ovld __cnfn convert_char2_rtp(char2);
+char2 __ovld __cnfn convert_char2_sat_rtp(char2);
+char2 __ovld __cnfn convert_char2_rtn(char2);
+char2 __ovld __cnfn convert_char2_sat_rtn(char2);
+char2 __ovld __cnfn convert_char2(char2);
+char2 __ovld __cnfn convert_char2_sat(char2);
+char2 __ovld __cnfn convert_char2_rte(uchar2);
+char2 __ovld __cnfn convert_char2_sat_rte(uchar2);
+char2 __ovld __cnfn convert_char2_rtz(uchar2);
+char2 __ovld __cnfn convert_char2_sat_rtz(uchar2);
+char2 __ovld __cnfn convert_char2_rtp(uchar2);
+char2 __ovld __cnfn convert_char2_sat_rtp(uchar2);
+char2 __ovld __cnfn convert_char2_rtn(uchar2);
+char2 __ovld __cnfn convert_char2_sat_rtn(uchar2);
+char2 __ovld __cnfn convert_char2(uchar2);
+char2 __ovld __cnfn convert_char2_sat(uchar2);
+char2 __ovld __cnfn convert_char2_rte(short2);
+char2 __ovld __cnfn convert_char2_sat_rte(short2);
+char2 __ovld __cnfn convert_char2_rtz(short2);
+char2 __ovld __cnfn convert_char2_sat_rtz(short2);
+char2 __ovld __cnfn convert_char2_rtp(short2);
+char2 __ovld __cnfn convert_char2_sat_rtp(short2);
+char2 __ovld __cnfn convert_char2_rtn(short2);
+char2 __ovld __cnfn convert_char2_sat_rtn(short2);
+char2 __ovld __cnfn convert_char2(short2);
+char2 __ovld __cnfn convert_char2_sat(short2);
+char2 __ovld __cnfn convert_char2_rte(ushort2);
+char2 __ovld __cnfn convert_char2_sat_rte(ushort2);
+char2 __ovld __cnfn convert_char2_rtz(ushort2);
+char2 __ovld __cnfn convert_char2_sat_rtz(ushort2);
+char2 __ovld __cnfn convert_char2_rtp(ushort2);
+char2 __ovld __cnfn convert_char2_sat_rtp(ushort2);
+char2 __ovld __cnfn convert_char2_rtn(ushort2);
+char2 __ovld __cnfn convert_char2_sat_rtn(ushort2);
+char2 __ovld __cnfn convert_char2(ushort2);
+char2 __ovld __cnfn convert_char2_sat(ushort2);
+char2 __ovld __cnfn convert_char2_rte(int2);
+char2 __ovld __cnfn convert_char2_sat_rte(int2);
+char2 __ovld __cnfn convert_char2_rtz(int2);
+char2 __ovld __cnfn convert_char2_sat_rtz(int2);
+char2 __ovld __cnfn convert_char2_rtp(int2);
+char2 __ovld __cnfn convert_char2_sat_rtp(int2);
+char2 __ovld __cnfn convert_char2_rtn(int2);
+char2 __ovld __cnfn convert_char2_sat_rtn(int2);
+char2 __ovld __cnfn convert_char2(int2);
+char2 __ovld __cnfn convert_char2_sat(int2);
+char2 __ovld __cnfn convert_char2_rte(uint2);
+char2 __ovld __cnfn convert_char2_sat_rte(uint2);
+char2 __ovld __cnfn convert_char2_rtz(uint2);
+char2 __ovld __cnfn convert_char2_sat_rtz(uint2);
+char2 __ovld __cnfn convert_char2_rtp(uint2);
+char2 __ovld __cnfn convert_char2_sat_rtp(uint2);
+char2 __ovld __cnfn convert_char2_rtn(uint2);
+char2 __ovld __cnfn convert_char2_sat_rtn(uint2);
+char2 __ovld __cnfn convert_char2(uint2);
+char2 __ovld __cnfn convert_char2_sat(uint2);
+char2 __ovld __cnfn convert_char2_rte(long2);
+char2 __ovld __cnfn convert_char2_sat_rte(long2);
+char2 __ovld __cnfn convert_char2_rtz(long2);
+char2 __ovld __cnfn convert_char2_sat_rtz(long2);
+char2 __ovld __cnfn convert_char2_rtp(long2);
+char2 __ovld __cnfn convert_char2_sat_rtp(long2);
+char2 __ovld __cnfn convert_char2_rtn(long2);
+char2 __ovld __cnfn convert_char2_sat_rtn(long2);
+char2 __ovld __cnfn convert_char2(long2);
+char2 __ovld __cnfn convert_char2_sat(long2);
+char2 __ovld __cnfn convert_char2_rte(ulong2);
+char2 __ovld __cnfn convert_char2_sat_rte(ulong2);
+char2 __ovld __cnfn convert_char2_rtz(ulong2);
+char2 __ovld __cnfn convert_char2_sat_rtz(ulong2);
+char2 __ovld __cnfn convert_char2_rtp(ulong2);
+char2 __ovld __cnfn convert_char2_sat_rtp(ulong2);
+char2 __ovld __cnfn convert_char2_rtn(ulong2);
+char2 __ovld __cnfn convert_char2_sat_rtn(ulong2);
+char2 __ovld __cnfn convert_char2(ulong2);
+char2 __ovld __cnfn convert_char2_sat(ulong2);
+char2 __ovld __cnfn convert_char2_rte(float2);
+char2 __ovld __cnfn convert_char2_sat_rte(float2);
+char2 __ovld __cnfn convert_char2_rtz(float2);
+char2 __ovld __cnfn convert_char2_sat_rtz(float2);
+char2 __ovld __cnfn convert_char2_rtp(float2);
+char2 __ovld __cnfn convert_char2_sat_rtp(float2);
+char2 __ovld __cnfn convert_char2_rtn(float2);
+char2 __ovld __cnfn convert_char2_sat_rtn(float2);
+char2 __ovld __cnfn convert_char2(float2);
+char2 __ovld __cnfn convert_char2_sat(float2);
+uchar2 __ovld __cnfn convert_uchar2_rte(char2);
+uchar2 __ovld __cnfn convert_uchar2_sat_rte(char2);
+uchar2 __ovld __cnfn convert_uchar2_rtz(char2);
+uchar2 __ovld __cnfn convert_uchar2_sat_rtz(char2);
+uchar2 __ovld __cnfn convert_uchar2_rtp(char2);
+uchar2 __ovld __cnfn convert_uchar2_sat_rtp(char2);
+uchar2 __ovld __cnfn convert_uchar2_rtn(char2);
+uchar2 __ovld __cnfn convert_uchar2_sat_rtn(char2);
+uchar2 __ovld __cnfn convert_uchar2(char2);
+uchar2 __ovld __cnfn convert_uchar2_sat(char2);
+uchar2 __ovld __cnfn convert_uchar2_rte(uchar2);
+uchar2 __ovld __cnfn convert_uchar2_sat_rte(uchar2);
+uchar2 __ovld __cnfn convert_uchar2_rtz(uchar2);
+uchar2 __ovld __cnfn convert_uchar2_sat_rtz(uchar2);
+uchar2 __ovld __cnfn convert_uchar2_rtp(uchar2);
+uchar2 __ovld __cnfn convert_uchar2_sat_rtp(uchar2);
+uchar2 __ovld __cnfn convert_uchar2_rtn(uchar2);
+uchar2 __ovld __cnfn convert_uchar2_sat_rtn(uchar2);
+uchar2 __ovld __cnfn convert_uchar2(uchar2);
+uchar2 __ovld __cnfn convert_uchar2_sat(uchar2);
+uchar2 __ovld __cnfn convert_uchar2_rte(short2);
+uchar2 __ovld __cnfn convert_uchar2_sat_rte(short2);
+uchar2 __ovld __cnfn convert_uchar2_rtz(short2);
+uchar2 __ovld __cnfn convert_uchar2_sat_rtz(short2);
+uchar2 __ovld __cnfn convert_uchar2_rtp(short2);
+uchar2 __ovld __cnfn convert_uchar2_sat_rtp(short2);
+uchar2 __ovld __cnfn convert_uchar2_rtn(short2);
+uchar2 __ovld __cnfn convert_uchar2_sat_rtn(short2);
+uchar2 __ovld __cnfn convert_uchar2(short2);
+uchar2 __ovld __cnfn convert_uchar2_sat(short2);
+uchar2 __ovld __cnfn convert_uchar2_rte(ushort2);
+uchar2 __ovld __cnfn convert_uchar2_sat_rte(ushort2);
+uchar2 __ovld __cnfn convert_uchar2_rtz(ushort2);
+uchar2 __ovld __cnfn convert_uchar2_sat_rtz(ushort2);
+uchar2 __ovld __cnfn convert_uchar2_rtp(ushort2);
+uchar2 __ovld __cnfn convert_uchar2_sat_rtp(ushort2);
+uchar2 __ovld __cnfn convert_uchar2_rtn(ushort2);
+uchar2 __ovld __cnfn convert_uchar2_sat_rtn(ushort2);
+uchar2 __ovld __cnfn convert_uchar2(ushort2);
+uchar2 __ovld __cnfn convert_uchar2_sat(ushort2);
+uchar2 __ovld __cnfn convert_uchar2_rte(int2);
+uchar2 __ovld __cnfn convert_uchar2_sat_rte(int2);
+uchar2 __ovld __cnfn convert_uchar2_rtz(int2);
+uchar2 __ovld __cnfn convert_uchar2_sat_rtz(int2);
+uchar2 __ovld __cnfn convert_uchar2_rtp(int2);
+uchar2 __ovld __cnfn convert_uchar2_sat_rtp(int2);
+uchar2 __ovld __cnfn convert_uchar2_rtn(int2);
+uchar2 __ovld __cnfn convert_uchar2_sat_rtn(int2);
+uchar2 __ovld __cnfn convert_uchar2(int2);
+uchar2 __ovld __cnfn convert_uchar2_sat(int2);
+uchar2 __ovld __cnfn convert_uchar2_rte(uint2);
+uchar2 __ovld __cnfn convert_uchar2_sat_rte(uint2);
+uchar2 __ovld __cnfn convert_uchar2_rtz(uint2);
+uchar2 __ovld __cnfn convert_uchar2_sat_rtz(uint2);
+uchar2 __ovld __cnfn convert_uchar2_rtp(uint2);
+uchar2 __ovld __cnfn convert_uchar2_sat_rtp(uint2);
+uchar2 __ovld __cnfn convert_uchar2_rtn(uint2);
+uchar2 __ovld __cnfn convert_uchar2_sat_rtn(uint2);
+uchar2 __ovld __cnfn convert_uchar2(uint2);
+uchar2 __ovld __cnfn convert_uchar2_sat(uint2);
+uchar2 __ovld __cnfn convert_uchar2_rte(long2);
+uchar2 __ovld __cnfn convert_uchar2_sat_rte(long2);
+uchar2 __ovld __cnfn convert_uchar2_rtz(long2);
+uchar2 __ovld __cnfn convert_uchar2_sat_rtz(long2);
+uchar2 __ovld __cnfn convert_uchar2_rtp(long2);
+uchar2 __ovld __cnfn convert_uchar2_sat_rtp(long2);
+uchar2 __ovld __cnfn convert_uchar2_rtn(long2);
+uchar2 __ovld __cnfn convert_uchar2_sat_rtn(long2);
+uchar2 __ovld __cnfn convert_uchar2(long2);
+uchar2 __ovld __cnfn convert_uchar2_sat(long2);
+uchar2 __ovld __cnfn convert_uchar2_rte(ulong2);
+uchar2 __ovld __cnfn convert_uchar2_sat_rte(ulong2);
+uchar2 __ovld __cnfn convert_uchar2_rtz(ulong2);
+uchar2 __ovld __cnfn convert_uchar2_sat_rtz(ulong2);
+uchar2 __ovld __cnfn convert_uchar2_rtp(ulong2);
+uchar2 __ovld __cnfn convert_uchar2_sat_rtp(ulong2);
+uchar2 __ovld __cnfn convert_uchar2_rtn(ulong2);
+uchar2 __ovld __cnfn convert_uchar2_sat_rtn(ulong2);
+uchar2 __ovld __cnfn convert_uchar2(ulong2);
+uchar2 __ovld __cnfn convert_uchar2_sat(ulong2);
+uchar2 __ovld __cnfn convert_uchar2_rte(float2);
+uchar2 __ovld __cnfn convert_uchar2_sat_rte(float2);
+uchar2 __ovld __cnfn convert_uchar2_rtz(float2);
+uchar2 __ovld __cnfn convert_uchar2_sat_rtz(float2);
+uchar2 __ovld __cnfn convert_uchar2_rtp(float2);
+uchar2 __ovld __cnfn convert_uchar2_sat_rtp(float2);
+uchar2 __ovld __cnfn convert_uchar2_rtn(float2);
+uchar2 __ovld __cnfn convert_uchar2_sat_rtn(float2);
+uchar2 __ovld __cnfn convert_uchar2(float2);
+uchar2 __ovld __cnfn convert_uchar2_sat(float2);
+short2 __ovld __cnfn convert_short2_rte(char2);
+short2 __ovld __cnfn convert_short2_sat_rte(char2);
+short2 __ovld __cnfn convert_short2_rtz(char2);
+short2 __ovld __cnfn convert_short2_sat_rtz(char2);
+short2 __ovld __cnfn convert_short2_rtp(char2);
+short2 __ovld __cnfn convert_short2_sat_rtp(char2);
+short2 __ovld __cnfn convert_short2_rtn(char2);
+short2 __ovld __cnfn convert_short2_sat_rtn(char2);
+short2 __ovld __cnfn convert_short2(char2);
+short2 __ovld __cnfn convert_short2_sat(char2);
+short2 __ovld __cnfn convert_short2_rte(uchar2);
+short2 __ovld __cnfn convert_short2_sat_rte(uchar2);
+short2 __ovld __cnfn convert_short2_rtz(uchar2);
+short2 __ovld __cnfn convert_short2_sat_rtz(uchar2);
+short2 __ovld __cnfn convert_short2_rtp(uchar2);
+short2 __ovld __cnfn convert_short2_sat_rtp(uchar2);
+short2 __ovld __cnfn convert_short2_rtn(uchar2);
+short2 __ovld __cnfn convert_short2_sat_rtn(uchar2);
+short2 __ovld __cnfn convert_short2(uchar2);
+short2 __ovld __cnfn convert_short2_sat(uchar2);
+short2 __ovld __cnfn convert_short2_rte(short2);
+short2 __ovld __cnfn convert_short2_sat_rte(short2);
+short2 __ovld __cnfn convert_short2_rtz(short2);
+short2 __ovld __cnfn convert_short2_sat_rtz(short2);
+short2 __ovld __cnfn convert_short2_rtp(short2);
+short2 __ovld __cnfn convert_short2_sat_rtp(short2);
+short2 __ovld __cnfn convert_short2_rtn(short2);
+short2 __ovld __cnfn convert_short2_sat_rtn(short2);
+short2 __ovld __cnfn convert_short2(short2);
+short2 __ovld __cnfn convert_short2_sat(short2);
+short2 __ovld __cnfn convert_short2_rte(ushort2);
+short2 __ovld __cnfn convert_short2_sat_rte(ushort2);
+short2 __ovld __cnfn convert_short2_rtz(ushort2);
+short2 __ovld __cnfn convert_short2_sat_rtz(ushort2);
+short2 __ovld __cnfn convert_short2_rtp(ushort2);
+short2 __ovld __cnfn convert_short2_sat_rtp(ushort2);
+short2 __ovld __cnfn convert_short2_rtn(ushort2);
+short2 __ovld __cnfn convert_short2_sat_rtn(ushort2);
+short2 __ovld __cnfn convert_short2(ushort2);
+short2 __ovld __cnfn convert_short2_sat(ushort2);
+short2 __ovld __cnfn convert_short2_rte(int2);
+short2 __ovld __cnfn convert_short2_sat_rte(int2);
+short2 __ovld __cnfn convert_short2_rtz(int2);
+short2 __ovld __cnfn convert_short2_sat_rtz(int2);
+short2 __ovld __cnfn convert_short2_rtp(int2);
+short2 __ovld __cnfn convert_short2_sat_rtp(int2);
+short2 __ovld __cnfn convert_short2_rtn(int2);
+short2 __ovld __cnfn convert_short2_sat_rtn(int2);
+short2 __ovld __cnfn convert_short2(int2);
+short2 __ovld __cnfn convert_short2_sat(int2);
+short2 __ovld __cnfn convert_short2_rte(uint2);
+short2 __ovld __cnfn convert_short2_sat_rte(uint2);
+short2 __ovld __cnfn convert_short2_rtz(uint2);
+short2 __ovld __cnfn convert_short2_sat_rtz(uint2);
+short2 __ovld __cnfn convert_short2_rtp(uint2);
+short2 __ovld __cnfn convert_short2_sat_rtp(uint2);
+short2 __ovld __cnfn convert_short2_rtn(uint2);
+short2 __ovld __cnfn convert_short2_sat_rtn(uint2);
+short2 __ovld __cnfn convert_short2(uint2);
+short2 __ovld __cnfn convert_short2_sat(uint2);
+short2 __ovld __cnfn convert_short2_rte(long2);
+short2 __ovld __cnfn convert_short2_sat_rte(long2);
+short2 __ovld __cnfn convert_short2_rtz(long2);
+short2 __ovld __cnfn convert_short2_sat_rtz(long2);
+short2 __ovld __cnfn convert_short2_rtp(long2);
+short2 __ovld __cnfn convert_short2_sat_rtp(long2);
+short2 __ovld __cnfn convert_short2_rtn(long2);
+short2 __ovld __cnfn convert_short2_sat_rtn(long2);
+short2 __ovld __cnfn convert_short2(long2);
+short2 __ovld __cnfn convert_short2_sat(long2);
+short2 __ovld __cnfn convert_short2_rte(ulong2);
+short2 __ovld __cnfn convert_short2_sat_rte(ulong2);
+short2 __ovld __cnfn convert_short2_rtz(ulong2);
+short2 __ovld __cnfn convert_short2_sat_rtz(ulong2);
+short2 __ovld __cnfn convert_short2_rtp(ulong2);
+short2 __ovld __cnfn convert_short2_sat_rtp(ulong2);
+short2 __ovld __cnfn convert_short2_rtn(ulong2);
+short2 __ovld __cnfn convert_short2_sat_rtn(ulong2);
+short2 __ovld __cnfn convert_short2(ulong2);
+short2 __ovld __cnfn convert_short2_sat(ulong2);
+short2 __ovld __cnfn convert_short2_rte(float2);
+short2 __ovld __cnfn convert_short2_sat_rte(float2);
+short2 __ovld __cnfn convert_short2_rtz(float2);
+short2 __ovld __cnfn convert_short2_sat_rtz(float2);
+short2 __ovld __cnfn convert_short2_rtp(float2);
+short2 __ovld __cnfn convert_short2_sat_rtp(float2);
+short2 __ovld __cnfn convert_short2_rtn(float2);
+short2 __ovld __cnfn convert_short2_sat_rtn(float2);
+short2 __ovld __cnfn convert_short2(float2);
+short2 __ovld __cnfn convert_short2_sat(float2);
+ushort2 __ovld __cnfn convert_ushort2_rte(char2);
+ushort2 __ovld __cnfn convert_ushort2_sat_rte(char2);
+ushort2 __ovld __cnfn convert_ushort2_rtz(char2);
+ushort2 __ovld __cnfn convert_ushort2_sat_rtz(char2);
+ushort2 __ovld __cnfn convert_ushort2_rtp(char2);
+ushort2 __ovld __cnfn convert_ushort2_sat_rtp(char2);
+ushort2 __ovld __cnfn convert_ushort2_rtn(char2);
+ushort2 __ovld __cnfn convert_ushort2_sat_rtn(char2);
+ushort2 __ovld __cnfn convert_ushort2(char2);
+ushort2 __ovld __cnfn convert_ushort2_sat(char2);
+ushort2 __ovld __cnfn convert_ushort2_rte(uchar2);
+ushort2 __ovld __cnfn convert_ushort2_sat_rte(uchar2);
+ushort2 __ovld __cnfn convert_ushort2_rtz(uchar2);
+ushort2 __ovld __cnfn convert_ushort2_sat_rtz(uchar2);
+ushort2 __ovld __cnfn convert_ushort2_rtp(uchar2);
+ushort2 __ovld __cnfn convert_ushort2_sat_rtp(uchar2);
+ushort2 __ovld __cnfn convert_ushort2_rtn(uchar2);
+ushort2 __ovld __cnfn convert_ushort2_sat_rtn(uchar2);
+ushort2 __ovld __cnfn convert_ushort2(uchar2);
+ushort2 __ovld __cnfn convert_ushort2_sat(uchar2);
+ushort2 __ovld __cnfn convert_ushort2_rte(short2);
+ushort2 __ovld __cnfn convert_ushort2_sat_rte(short2);
+ushort2 __ovld __cnfn convert_ushort2_rtz(short2);
+ushort2 __ovld __cnfn convert_ushort2_sat_rtz(short2);
+ushort2 __ovld __cnfn convert_ushort2_rtp(short2);
+ushort2 __ovld __cnfn convert_ushort2_sat_rtp(short2);
+ushort2 __ovld __cnfn convert_ushort2_rtn(short2);
+ushort2 __ovld __cnfn convert_ushort2_sat_rtn(short2);
+ushort2 __ovld __cnfn convert_ushort2(short2);
+ushort2 __ovld __cnfn convert_ushort2_sat(short2);
+ushort2 __ovld __cnfn convert_ushort2_rte(ushort2);
+ushort2 __ovld __cnfn convert_ushort2_sat_rte(ushort2);
+ushort2 __ovld __cnfn convert_ushort2_rtz(ushort2);
+ushort2 __ovld __cnfn convert_ushort2_sat_rtz(ushort2);
+ushort2 __ovld __cnfn convert_ushort2_rtp(ushort2);
+ushort2 __ovld __cnfn convert_ushort2_sat_rtp(ushort2);
+ushort2 __ovld __cnfn convert_ushort2_rtn(ushort2);
+ushort2 __ovld __cnfn convert_ushort2_sat_rtn(ushort2);
+ushort2 __ovld __cnfn convert_ushort2(ushort2);
+ushort2 __ovld __cnfn convert_ushort2_sat(ushort2);
+ushort2 __ovld __cnfn convert_ushort2_rte(int2);
+ushort2 __ovld __cnfn convert_ushort2_sat_rte(int2);
+ushort2 __ovld __cnfn convert_ushort2_rtz(int2);
+ushort2 __ovld __cnfn convert_ushort2_sat_rtz(int2);
+ushort2 __ovld __cnfn convert_ushort2_rtp(int2);
+ushort2 __ovld __cnfn convert_ushort2_sat_rtp(int2);
+ushort2 __ovld __cnfn convert_ushort2_rtn(int2);
+ushort2 __ovld __cnfn convert_ushort2_sat_rtn(int2);
+ushort2 __ovld __cnfn convert_ushort2(int2);
+ushort2 __ovld __cnfn convert_ushort2_sat(int2);
+ushort2 __ovld __cnfn convert_ushort2_rte(uint2);
+ushort2 __ovld __cnfn convert_ushort2_sat_rte(uint2);
+ushort2 __ovld __cnfn convert_ushort2_rtz(uint2);
+ushort2 __ovld __cnfn convert_ushort2_sat_rtz(uint2);
+ushort2 __ovld __cnfn convert_ushort2_rtp(uint2);
+ushort2 __ovld __cnfn convert_ushort2_sat_rtp(uint2);
+ushort2 __ovld __cnfn convert_ushort2_rtn(uint2);
+ushort2 __ovld __cnfn convert_ushort2_sat_rtn(uint2);
+ushort2 __ovld __cnfn convert_ushort2(uint2);
+ushort2 __ovld __cnfn convert_ushort2_sat(uint2);
+ushort2 __ovld __cnfn convert_ushort2_rte(long2);
+ushort2 __ovld __cnfn convert_ushort2_sat_rte(long2);
+ushort2 __ovld __cnfn convert_ushort2_rtz(long2);
+ushort2 __ovld __cnfn convert_ushort2_sat_rtz(long2);
+ushort2 __ovld __cnfn convert_ushort2_rtp(long2);
+ushort2 __ovld __cnfn convert_ushort2_sat_rtp(long2);
+ushort2 __ovld __cnfn convert_ushort2_rtn(long2);
+ushort2 __ovld __cnfn convert_ushort2_sat_rtn(long2);
+ushort2 __ovld __cnfn convert_ushort2(long2);
+ushort2 __ovld __cnfn convert_ushort2_sat(long2);
+ushort2 __ovld __cnfn convert_ushort2_rte(ulong2);
+ushort2 __ovld __cnfn convert_ushort2_sat_rte(ulong2);
+ushort2 __ovld __cnfn convert_ushort2_rtz(ulong2);
+ushort2 __ovld __cnfn convert_ushort2_sat_rtz(ulong2);
+ushort2 __ovld __cnfn convert_ushort2_rtp(ulong2);
+ushort2 __ovld __cnfn convert_ushort2_sat_rtp(ulong2);
+ushort2 __ovld __cnfn convert_ushort2_rtn(ulong2);
+ushort2 __ovld __cnfn convert_ushort2_sat_rtn(ulong2);
+ushort2 __ovld __cnfn convert_ushort2(ulong2);
+ushort2 __ovld __cnfn convert_ushort2_sat(ulong2);
+ushort2 __ovld __cnfn convert_ushort2_rte(float2);
+ushort2 __ovld __cnfn convert_ushort2_sat_rte(float2);
+ushort2 __ovld __cnfn convert_ushort2_rtz(float2);
+ushort2 __ovld __cnfn convert_ushort2_sat_rtz(float2);
+ushort2 __ovld __cnfn convert_ushort2_rtp(float2);
+ushort2 __ovld __cnfn convert_ushort2_sat_rtp(float2);
+ushort2 __ovld __cnfn convert_ushort2_rtn(float2);
+ushort2 __ovld __cnfn convert_ushort2_sat_rtn(float2);
+ushort2 __ovld __cnfn convert_ushort2(float2);
+ushort2 __ovld __cnfn convert_ushort2_sat(float2);
+int2 __ovld __cnfn convert_int2_rte(char2);
+int2 __ovld __cnfn convert_int2_sat_rte(char2);
+int2 __ovld __cnfn convert_int2_rtz(char2);
+int2 __ovld __cnfn convert_int2_sat_rtz(char2);
+int2 __ovld __cnfn convert_int2_rtp(char2);
+int2 __ovld __cnfn convert_int2_sat_rtp(char2);
+int2 __ovld __cnfn convert_int2_rtn(char2);
+int2 __ovld __cnfn convert_int2_sat_rtn(char2);
+int2 __ovld __cnfn convert_int2(char2);
+int2 __ovld __cnfn convert_int2_sat(char2);
+int2 __ovld __cnfn convert_int2_rte(uchar2);
+int2 __ovld __cnfn convert_int2_sat_rte(uchar2);
+int2 __ovld __cnfn convert_int2_rtz(uchar2);
+int2 __ovld __cnfn convert_int2_sat_rtz(uchar2);
+int2 __ovld __cnfn convert_int2_rtp(uchar2);
+int2 __ovld __cnfn convert_int2_sat_rtp(uchar2);
+int2 __ovld __cnfn convert_int2_rtn(uchar2);
+int2 __ovld __cnfn convert_int2_sat_rtn(uchar2);
+int2 __ovld __cnfn convert_int2(uchar2);
+int2 __ovld __cnfn convert_int2_sat(uchar2);
+int2 __ovld __cnfn convert_int2_rte(short2);
+int2 __ovld __cnfn convert_int2_sat_rte(short2);
+int2 __ovld __cnfn convert_int2_rtz(short2);
+int2 __ovld __cnfn convert_int2_sat_rtz(short2);
+int2 __ovld __cnfn convert_int2_rtp(short2);
+int2 __ovld __cnfn convert_int2_sat_rtp(short2);
+int2 __ovld __cnfn convert_int2_rtn(short2);
+int2 __ovld __cnfn convert_int2_sat_rtn(short2);
+int2 __ovld __cnfn convert_int2(short2);
+int2 __ovld __cnfn convert_int2_sat(short2);
+int2 __ovld __cnfn convert_int2_rte(ushort2);
+int2 __ovld __cnfn convert_int2_sat_rte(ushort2);
+int2 __ovld __cnfn convert_int2_rtz(ushort2);
+int2 __ovld __cnfn convert_int2_sat_rtz(ushort2);
+int2 __ovld __cnfn convert_int2_rtp(ushort2);
+int2 __ovld __cnfn convert_int2_sat_rtp(ushort2);
+int2 __ovld __cnfn convert_int2_rtn(ushort2);
+int2 __ovld __cnfn convert_int2_sat_rtn(ushort2);
+int2 __ovld __cnfn convert_int2(ushort2);
+int2 __ovld __cnfn convert_int2_sat(ushort2);
+int2 __ovld __cnfn convert_int2_rte(int2);
+int2 __ovld __cnfn convert_int2_sat_rte(int2);
+int2 __ovld __cnfn convert_int2_rtz(int2);
+int2 __ovld __cnfn convert_int2_sat_rtz(int2);
+int2 __ovld __cnfn convert_int2_rtp(int2);
+int2 __ovld __cnfn convert_int2_sat_rtp(int2);
+int2 __ovld __cnfn convert_int2_rtn(int2);
+int2 __ovld __cnfn convert_int2_sat_rtn(int2);
+int2 __ovld __cnfn convert_int2(int2);
+int2 __ovld __cnfn convert_int2_sat(int2);
+int2 __ovld __cnfn convert_int2_rte(uint2);
+int2 __ovld __cnfn convert_int2_sat_rte(uint2);
+int2 __ovld __cnfn convert_int2_rtz(uint2);
+int2 __ovld __cnfn convert_int2_sat_rtz(uint2);
+int2 __ovld __cnfn convert_int2_rtp(uint2);
+int2 __ovld __cnfn convert_int2_sat_rtp(uint2);
+int2 __ovld __cnfn convert_int2_rtn(uint2);
+int2 __ovld __cnfn convert_int2_sat_rtn(uint2);
+int2 __ovld __cnfn convert_int2(uint2);
+int2 __ovld __cnfn convert_int2_sat(uint2);
+int2 __ovld __cnfn convert_int2_rte(long2);
+int2 __ovld __cnfn convert_int2_sat_rte(long2);
+int2 __ovld __cnfn convert_int2_rtz(long2);
+int2 __ovld __cnfn convert_int2_sat_rtz(long2);
+int2 __ovld __cnfn convert_int2_rtp(long2);
+int2 __ovld __cnfn convert_int2_sat_rtp(long2);
+int2 __ovld __cnfn convert_int2_rtn(long2);
+int2 __ovld __cnfn convert_int2_sat_rtn(long2);
+int2 __ovld __cnfn convert_int2(long2);
+int2 __ovld __cnfn convert_int2_sat(long2);
+int2 __ovld __cnfn convert_int2_rte(ulong2);
+int2 __ovld __cnfn convert_int2_sat_rte(ulong2);
+int2 __ovld __cnfn convert_int2_rtz(ulong2);
+int2 __ovld __cnfn convert_int2_sat_rtz(ulong2);
+int2 __ovld __cnfn convert_int2_rtp(ulong2);
+int2 __ovld __cnfn convert_int2_sat_rtp(ulong2);
+int2 __ovld __cnfn convert_int2_rtn(ulong2);
+int2 __ovld __cnfn convert_int2_sat_rtn(ulong2);
+int2 __ovld __cnfn convert_int2(ulong2);
+int2 __ovld __cnfn convert_int2_sat(ulong2);
+int2 __ovld __cnfn convert_int2_rte(float2);
+int2 __ovld __cnfn convert_int2_sat_rte(float2);
+int2 __ovld __cnfn convert_int2_rtz(float2);
+int2 __ovld __cnfn convert_int2_sat_rtz(float2);
+int2 __ovld __cnfn convert_int2_rtp(float2);
+int2 __ovld __cnfn convert_int2_sat_rtp(float2);
+int2 __ovld __cnfn convert_int2_rtn(float2);
+int2 __ovld __cnfn convert_int2_sat_rtn(float2);
+int2 __ovld __cnfn convert_int2(float2);
+int2 __ovld __cnfn convert_int2_sat(float2);
+uint2 __ovld __cnfn convert_uint2_rte(char2);
+uint2 __ovld __cnfn convert_uint2_sat_rte(char2);
+uint2 __ovld __cnfn convert_uint2_rtz(char2);
+uint2 __ovld __cnfn convert_uint2_sat_rtz(char2);
+uint2 __ovld __cnfn convert_uint2_rtp(char2);
+uint2 __ovld __cnfn convert_uint2_sat_rtp(char2);
+uint2 __ovld __cnfn convert_uint2_rtn(char2);
+uint2 __ovld __cnfn convert_uint2_sat_rtn(char2);
+uint2 __ovld __cnfn convert_uint2(char2);
+uint2 __ovld __cnfn convert_uint2_sat(char2);
+uint2 __ovld __cnfn convert_uint2_rte(uchar2);
+uint2 __ovld __cnfn convert_uint2_sat_rte(uchar2);
+uint2 __ovld __cnfn convert_uint2_rtz(uchar2);
+uint2 __ovld __cnfn convert_uint2_sat_rtz(uchar2);
+uint2 __ovld __cnfn convert_uint2_rtp(uchar2);
+uint2 __ovld __cnfn convert_uint2_sat_rtp(uchar2);
+uint2 __ovld __cnfn convert_uint2_rtn(uchar2);
+uint2 __ovld __cnfn convert_uint2_sat_rtn(uchar2);
+uint2 __ovld __cnfn convert_uint2(uchar2);
+uint2 __ovld __cnfn convert_uint2_sat(uchar2);
+uint2 __ovld __cnfn convert_uint2_rte(short2);
+uint2 __ovld __cnfn convert_uint2_sat_rte(short2);
+uint2 __ovld __cnfn convert_uint2_rtz(short2);
+uint2 __ovld __cnfn convert_uint2_sat_rtz(short2);
+uint2 __ovld __cnfn convert_uint2_rtp(short2);
+uint2 __ovld __cnfn convert_uint2_sat_rtp(short2);
+uint2 __ovld __cnfn convert_uint2_rtn(short2);
+uint2 __ovld __cnfn convert_uint2_sat_rtn(short2);
+uint2 __ovld __cnfn convert_uint2(short2);
+uint2 __ovld __cnfn convert_uint2_sat(short2);
+uint2 __ovld __cnfn convert_uint2_rte(ushort2);
+uint2 __ovld __cnfn convert_uint2_sat_rte(ushort2);
+uint2 __ovld __cnfn convert_uint2_rtz(ushort2);
+uint2 __ovld __cnfn convert_uint2_sat_rtz(ushort2);
+uint2 __ovld __cnfn convert_uint2_rtp(ushort2);
+uint2 __ovld __cnfn convert_uint2_sat_rtp(ushort2);
+uint2 __ovld __cnfn convert_uint2_rtn(ushort2);
+uint2 __ovld __cnfn convert_uint2_sat_rtn(ushort2);
+uint2 __ovld __cnfn convert_uint2(ushort2);
+uint2 __ovld __cnfn convert_uint2_sat(ushort2);
+uint2 __ovld __cnfn convert_uint2_rte(int2);
+uint2 __ovld __cnfn convert_uint2_sat_rte(int2);
+uint2 __ovld __cnfn convert_uint2_rtz(int2);
+uint2 __ovld __cnfn convert_uint2_sat_rtz(int2);
+uint2 __ovld __cnfn convert_uint2_rtp(int2);
+uint2 __ovld __cnfn convert_uint2_sat_rtp(int2);
+uint2 __ovld __cnfn convert_uint2_rtn(int2);
+uint2 __ovld __cnfn convert_uint2_sat_rtn(int2);
+uint2 __ovld __cnfn convert_uint2(int2);
+uint2 __ovld __cnfn convert_uint2_sat(int2);
+uint2 __ovld __cnfn convert_uint2_rte(uint2);
+uint2 __ovld __cnfn convert_uint2_sat_rte(uint2);
+uint2 __ovld __cnfn convert_uint2_rtz(uint2);
+uint2 __ovld __cnfn convert_uint2_sat_rtz(uint2);
+uint2 __ovld __cnfn convert_uint2_rtp(uint2);
+uint2 __ovld __cnfn convert_uint2_sat_rtp(uint2);
+uint2 __ovld __cnfn convert_uint2_rtn(uint2);
+uint2 __ovld __cnfn convert_uint2_sat_rtn(uint2);
+uint2 __ovld __cnfn convert_uint2(uint2);
+uint2 __ovld __cnfn convert_uint2_sat(uint2);
+uint2 __ovld __cnfn convert_uint2_rte(long2);
+uint2 __ovld __cnfn convert_uint2_sat_rte(long2);
+uint2 __ovld __cnfn convert_uint2_rtz(long2);
+uint2 __ovld __cnfn convert_uint2_sat_rtz(long2);
+uint2 __ovld __cnfn convert_uint2_rtp(long2);
+uint2 __ovld __cnfn convert_uint2_sat_rtp(long2);
+uint2 __ovld __cnfn convert_uint2_rtn(long2);
+uint2 __ovld __cnfn convert_uint2_sat_rtn(long2);
+uint2 __ovld __cnfn convert_uint2(long2);
+uint2 __ovld __cnfn convert_uint2_sat(long2);
+uint2 __ovld __cnfn convert_uint2_rte(ulong2);
+uint2 __ovld __cnfn convert_uint2_sat_rte(ulong2);
+uint2 __ovld __cnfn convert_uint2_rtz(ulong2);
+uint2 __ovld __cnfn convert_uint2_sat_rtz(ulong2);
+uint2 __ovld __cnfn convert_uint2_rtp(ulong2);
+uint2 __ovld __cnfn convert_uint2_sat_rtp(ulong2);
+uint2 __ovld __cnfn convert_uint2_rtn(ulong2);
+uint2 __ovld __cnfn convert_uint2_sat_rtn(ulong2);
+uint2 __ovld __cnfn convert_uint2(ulong2);
+uint2 __ovld __cnfn convert_uint2_sat(ulong2);
+uint2 __ovld __cnfn convert_uint2_rte(float2);
+uint2 __ovld __cnfn convert_uint2_sat_rte(float2);
+uint2 __ovld __cnfn convert_uint2_rtz(float2);
+uint2 __ovld __cnfn convert_uint2_sat_rtz(float2);
+uint2 __ovld __cnfn convert_uint2_rtp(float2);
+uint2 __ovld __cnfn convert_uint2_sat_rtp(float2);
+uint2 __ovld __cnfn convert_uint2_rtn(float2);
+uint2 __ovld __cnfn convert_uint2_sat_rtn(float2);
+uint2 __ovld __cnfn convert_uint2(float2);
+uint2 __ovld __cnfn convert_uint2_sat(float2);
+long2 __ovld __cnfn convert_long2_rte(char2);
+long2 __ovld __cnfn convert_long2_sat_rte(char2);
+long2 __ovld __cnfn convert_long2_rtz(char2);
+long2 __ovld __cnfn convert_long2_sat_rtz(char2);
+long2 __ovld __cnfn convert_long2_rtp(char2);
+long2 __ovld __cnfn convert_long2_sat_rtp(char2);
+long2 __ovld __cnfn convert_long2_rtn(char2);
+long2 __ovld __cnfn convert_long2_sat_rtn(char2);
+long2 __ovld __cnfn convert_long2(char2);
+long2 __ovld __cnfn convert_long2_sat(char2);
+long2 __ovld __cnfn convert_long2_rte(uchar2);
+long2 __ovld __cnfn convert_long2_sat_rte(uchar2);
+long2 __ovld __cnfn convert_long2_rtz(uchar2);
+long2 __ovld __cnfn convert_long2_sat_rtz(uchar2);
+long2 __ovld __cnfn convert_long2_rtp(uchar2);
+long2 __ovld __cnfn convert_long2_sat_rtp(uchar2);
+long2 __ovld __cnfn convert_long2_rtn(uchar2);
+long2 __ovld __cnfn convert_long2_sat_rtn(uchar2);
+long2 __ovld __cnfn convert_long2(uchar2);
+long2 __ovld __cnfn convert_long2_sat(uchar2);
+long2 __ovld __cnfn convert_long2_rte(short2);
+long2 __ovld __cnfn convert_long2_sat_rte(short2);
+long2 __ovld __cnfn convert_long2_rtz(short2);
+long2 __ovld __cnfn convert_long2_sat_rtz(short2);
+long2 __ovld __cnfn convert_long2_rtp(short2);
+long2 __ovld __cnfn convert_long2_sat_rtp(short2);
+long2 __ovld __cnfn convert_long2_rtn(short2);
+long2 __ovld __cnfn convert_long2_sat_rtn(short2);
+long2 __ovld __cnfn convert_long2(short2);
+long2 __ovld __cnfn convert_long2_sat(short2);
+long2 __ovld __cnfn convert_long2_rte(ushort2);
+long2 __ovld __cnfn convert_long2_sat_rte(ushort2);
+long2 __ovld __cnfn convert_long2_rtz(ushort2);
+long2 __ovld __cnfn convert_long2_sat_rtz(ushort2);
+long2 __ovld __cnfn convert_long2_rtp(ushort2);
+long2 __ovld __cnfn convert_long2_sat_rtp(ushort2);
+long2 __ovld __cnfn convert_long2_rtn(ushort2);
+long2 __ovld __cnfn convert_long2_sat_rtn(ushort2);
+long2 __ovld __cnfn convert_long2(ushort2);
+long2 __ovld __cnfn convert_long2_sat(ushort2);
+long2 __ovld __cnfn convert_long2_rte(int2);
+long2 __ovld __cnfn convert_long2_sat_rte(int2);
+long2 __ovld __cnfn convert_long2_rtz(int2);
+long2 __ovld __cnfn convert_long2_sat_rtz(int2);
+long2 __ovld __cnfn convert_long2_rtp(int2);
+long2 __ovld __cnfn convert_long2_sat_rtp(int2);
+long2 __ovld __cnfn convert_long2_rtn(int2);
+long2 __ovld __cnfn convert_long2_sat_rtn(int2);
+long2 __ovld __cnfn convert_long2(int2);
+long2 __ovld __cnfn convert_long2_sat(int2);
+long2 __ovld __cnfn convert_long2_rte(uint2);
+long2 __ovld __cnfn convert_long2_sat_rte(uint2);
+long2 __ovld __cnfn convert_long2_rtz(uint2);
+long2 __ovld __cnfn convert_long2_sat_rtz(uint2);
+long2 __ovld __cnfn convert_long2_rtp(uint2);
+long2 __ovld __cnfn convert_long2_sat_rtp(uint2);
+long2 __ovld __cnfn convert_long2_rtn(uint2);
+long2 __ovld __cnfn convert_long2_sat_rtn(uint2);
+long2 __ovld __cnfn convert_long2(uint2);
+long2 __ovld __cnfn convert_long2_sat(uint2);
+long2 __ovld __cnfn convert_long2_rte(long2);
+long2 __ovld __cnfn convert_long2_sat_rte(long2);
+long2 __ovld __cnfn convert_long2_rtz(long2);
+long2 __ovld __cnfn convert_long2_sat_rtz(long2);
+long2 __ovld __cnfn convert_long2_rtp(long2);
+long2 __ovld __cnfn convert_long2_sat_rtp(long2);
+long2 __ovld __cnfn convert_long2_rtn(long2);
+long2 __ovld __cnfn convert_long2_sat_rtn(long2);
+long2 __ovld __cnfn convert_long2(long2);
+long2 __ovld __cnfn convert_long2_sat(long2);
+long2 __ovld __cnfn convert_long2_rte(ulong2);
+long2 __ovld __cnfn convert_long2_sat_rte(ulong2);
+long2 __ovld __cnfn convert_long2_rtz(ulong2);
+long2 __ovld __cnfn convert_long2_sat_rtz(ulong2);
+long2 __ovld __cnfn convert_long2_rtp(ulong2);
+long2 __ovld __cnfn convert_long2_sat_rtp(ulong2);
+long2 __ovld __cnfn convert_long2_rtn(ulong2);
+long2 __ovld __cnfn convert_long2_sat_rtn(ulong2);
+long2 __ovld __cnfn convert_long2(ulong2);
+long2 __ovld __cnfn convert_long2_sat(ulong2);
+long2 __ovld __cnfn convert_long2_rte(float2);
+long2 __ovld __cnfn convert_long2_sat_rte(float2);
+long2 __ovld __cnfn convert_long2_rtz(float2);
+long2 __ovld __cnfn convert_long2_sat_rtz(float2);
+long2 __ovld __cnfn convert_long2_rtp(float2);
+long2 __ovld __cnfn convert_long2_sat_rtp(float2);
+long2 __ovld __cnfn convert_long2_rtn(float2);
+long2 __ovld __cnfn convert_long2_sat_rtn(float2);
+long2 __ovld __cnfn convert_long2(float2);
+long2 __ovld __cnfn convert_long2_sat(float2);
+ulong2 __ovld __cnfn convert_ulong2_rte(char2);
+ulong2 __ovld __cnfn convert_ulong2_sat_rte(char2);
+ulong2 __ovld __cnfn convert_ulong2_rtz(char2);
+ulong2 __ovld __cnfn convert_ulong2_sat_rtz(char2);
+ulong2 __ovld __cnfn convert_ulong2_rtp(char2);
+ulong2 __ovld __cnfn convert_ulong2_sat_rtp(char2);
+ulong2 __ovld __cnfn convert_ulong2_rtn(char2);
+ulong2 __ovld __cnfn convert_ulong2_sat_rtn(char2);
+ulong2 __ovld __cnfn convert_ulong2(char2);
+ulong2 __ovld __cnfn convert_ulong2_sat(char2);
+ulong2 __ovld __cnfn convert_ulong2_rte(uchar2);
+ulong2 __ovld __cnfn convert_ulong2_sat_rte(uchar2);
+ulong2 __ovld __cnfn convert_ulong2_rtz(uchar2);
+ulong2 __ovld __cnfn convert_ulong2_sat_rtz(uchar2);
+ulong2 __ovld __cnfn convert_ulong2_rtp(uchar2);
+ulong2 __ovld __cnfn convert_ulong2_sat_rtp(uchar2);
+ulong2 __ovld __cnfn convert_ulong2_rtn(uchar2);
+ulong2 __ovld __cnfn convert_ulong2_sat_rtn(uchar2);
+ulong2 __ovld __cnfn convert_ulong2(uchar2);
+ulong2 __ovld __cnfn convert_ulong2_sat(uchar2);
+ulong2 __ovld __cnfn convert_ulong2_rte(short2);
+ulong2 __ovld __cnfn convert_ulong2_sat_rte(short2);
+ulong2 __ovld __cnfn convert_ulong2_rtz(short2);
+ulong2 __ovld __cnfn convert_ulong2_sat_rtz(short2);
+ulong2 __ovld __cnfn convert_ulong2_rtp(short2);
+ulong2 __ovld __cnfn convert_ulong2_sat_rtp(short2);
+ulong2 __ovld __cnfn convert_ulong2_rtn(short2);
+ulong2 __ovld __cnfn convert_ulong2_sat_rtn(short2);
+ulong2 __ovld __cnfn convert_ulong2(short2);
+ulong2 __ovld __cnfn convert_ulong2_sat(short2);
+ulong2 __ovld __cnfn convert_ulong2_rte(ushort2);
+ulong2 __ovld __cnfn convert_ulong2_sat_rte(ushort2);
+ulong2 __ovld __cnfn convert_ulong2_rtz(ushort2);
+ulong2 __ovld __cnfn convert_ulong2_sat_rtz(ushort2);
+ulong2 __ovld __cnfn convert_ulong2_rtp(ushort2);
+ulong2 __ovld __cnfn convert_ulong2_sat_rtp(ushort2);
+ulong2 __ovld __cnfn convert_ulong2_rtn(ushort2);
+ulong2 __ovld __cnfn convert_ulong2_sat_rtn(ushort2);
+ulong2 __ovld __cnfn convert_ulong2(ushort2);
+ulong2 __ovld __cnfn convert_ulong2_sat(ushort2);
+ulong2 __ovld __cnfn convert_ulong2_rte(int2);
+ulong2 __ovld __cnfn convert_ulong2_sat_rte(int2);
+ulong2 __ovld __cnfn convert_ulong2_rtz(int2);
+ulong2 __ovld __cnfn convert_ulong2_sat_rtz(int2);
+ulong2 __ovld __cnfn convert_ulong2_rtp(int2);
+ulong2 __ovld __cnfn convert_ulong2_sat_rtp(int2);
+ulong2 __ovld __cnfn convert_ulong2_rtn(int2);
+ulong2 __ovld __cnfn convert_ulong2_sat_rtn(int2);
+ulong2 __ovld __cnfn convert_ulong2(int2);
+ulong2 __ovld __cnfn convert_ulong2_sat(int2);
+ulong2 __ovld __cnfn convert_ulong2_rte(uint2);
+ulong2 __ovld __cnfn convert_ulong2_sat_rte(uint2);
+ulong2 __ovld __cnfn convert_ulong2_rtz(uint2);
+ulong2 __ovld __cnfn convert_ulong2_sat_rtz(uint2);
+ulong2 __ovld __cnfn convert_ulong2_rtp(uint2);
+ulong2 __ovld __cnfn convert_ulong2_sat_rtp(uint2);
+ulong2 __ovld __cnfn convert_ulong2_rtn(uint2);
+ulong2 __ovld __cnfn convert_ulong2_sat_rtn(uint2);
+ulong2 __ovld __cnfn convert_ulong2(uint2);
+ulong2 __ovld __cnfn convert_ulong2_sat(uint2);
+ulong2 __ovld __cnfn convert_ulong2_rte(long2);
+ulong2 __ovld __cnfn convert_ulong2_sat_rte(long2);
+ulong2 __ovld __cnfn convert_ulong2_rtz(long2);
+ulong2 __ovld __cnfn convert_ulong2_sat_rtz(long2);
+ulong2 __ovld __cnfn convert_ulong2_rtp(long2);
+ulong2 __ovld __cnfn convert_ulong2_sat_rtp(long2);
+ulong2 __ovld __cnfn convert_ulong2_rtn(long2);
+ulong2 __ovld __cnfn convert_ulong2_sat_rtn(long2);
+ulong2 __ovld __cnfn convert_ulong2(long2);
+ulong2 __ovld __cnfn convert_ulong2_sat(long2);
+ulong2 __ovld __cnfn convert_ulong2_rte(ulong2);
+ulong2 __ovld __cnfn convert_ulong2_sat_rte(ulong2);
+ulong2 __ovld __cnfn convert_ulong2_rtz(ulong2);
+ulong2 __ovld __cnfn convert_ulong2_sat_rtz(ulong2);
+ulong2 __ovld __cnfn convert_ulong2_rtp(ulong2);
+ulong2 __ovld __cnfn convert_ulong2_sat_rtp(ulong2);
+ulong2 __ovld __cnfn convert_ulong2_rtn(ulong2);
+ulong2 __ovld __cnfn convert_ulong2_sat_rtn(ulong2);
+ulong2 __ovld __cnfn convert_ulong2(ulong2);
+ulong2 __ovld __cnfn convert_ulong2_sat(ulong2);
+ulong2 __ovld __cnfn convert_ulong2_rte(float2);
+ulong2 __ovld __cnfn convert_ulong2_sat_rte(float2);
+ulong2 __ovld __cnfn convert_ulong2_rtz(float2);
+ulong2 __ovld __cnfn convert_ulong2_sat_rtz(float2);
+ulong2 __ovld __cnfn convert_ulong2_rtp(float2);
+ulong2 __ovld __cnfn convert_ulong2_sat_rtp(float2);
+ulong2 __ovld __cnfn convert_ulong2_rtn(float2);
+ulong2 __ovld __cnfn convert_ulong2_sat_rtn(float2);
+ulong2 __ovld __cnfn convert_ulong2(float2);
+ulong2 __ovld __cnfn convert_ulong2_sat(float2);
+float2 __ovld __cnfn convert_float2_rte(char2);
+float2 __ovld __cnfn convert_float2_rtz(char2);
+float2 __ovld __cnfn convert_float2_rtp(char2);
+float2 __ovld __cnfn convert_float2_rtn(char2);
+float2 __ovld __cnfn convert_float2(char2);
+float2 __ovld __cnfn convert_float2_rte(uchar2);
+float2 __ovld __cnfn convert_float2_rtz(uchar2);
+float2 __ovld __cnfn convert_float2_rtp(uchar2);
+float2 __ovld __cnfn convert_float2_rtn(uchar2);
+float2 __ovld __cnfn convert_float2(uchar2);
+float2 __ovld __cnfn convert_float2_rte(short2);
+float2 __ovld __cnfn convert_float2_rtz(short2);
+float2 __ovld __cnfn convert_float2_rtp(short2);
+float2 __ovld __cnfn convert_float2_rtn(short2);
+float2 __ovld __cnfn convert_float2(short2);
+float2 __ovld __cnfn convert_float2_rte(ushort2);
+float2 __ovld __cnfn convert_float2_rtz(ushort2);
+float2 __ovld __cnfn convert_float2_rtp(ushort2);
+float2 __ovld __cnfn convert_float2_rtn(ushort2);
+float2 __ovld __cnfn convert_float2(ushort2);
+float2 __ovld __cnfn convert_float2_rte(int2);
+float2 __ovld __cnfn convert_float2_rtz(int2);
+float2 __ovld __cnfn convert_float2_rtp(int2);
+float2 __ovld __cnfn convert_float2_rtn(int2);
+float2 __ovld __cnfn convert_float2(int2);
+float2 __ovld __cnfn convert_float2_rte(uint2);
+float2 __ovld __cnfn convert_float2_rtz(uint2);
+float2 __ovld __cnfn convert_float2_rtp(uint2);
+float2 __ovld __cnfn convert_float2_rtn(uint2);
+float2 __ovld __cnfn convert_float2(uint2);
+float2 __ovld __cnfn convert_float2_rte(long2);
+float2 __ovld __cnfn convert_float2_rtz(long2);
+float2 __ovld __cnfn convert_float2_rtp(long2);
+float2 __ovld __cnfn convert_float2_rtn(long2);
+float2 __ovld __cnfn convert_float2(long2);
+float2 __ovld __cnfn convert_float2_rte(ulong2);
+float2 __ovld __cnfn convert_float2_rtz(ulong2);
+float2 __ovld __cnfn convert_float2_rtp(ulong2);
+float2 __ovld __cnfn convert_float2_rtn(ulong2);
+float2 __ovld __cnfn convert_float2(ulong2);
+float2 __ovld __cnfn convert_float2_rte(float2);
+float2 __ovld __cnfn convert_float2_rtz(float2);
+float2 __ovld __cnfn convert_float2_rtp(float2);
+float2 __ovld __cnfn convert_float2_rtn(float2);
+float2 __ovld __cnfn convert_float2(float2);
+char3 __ovld __cnfn convert_char3_rte(char3);
+char3 __ovld __cnfn convert_char3_sat_rte(char3);
+char3 __ovld __cnfn convert_char3_rtz(char3);
+char3 __ovld __cnfn convert_char3_sat_rtz(char3);
+char3 __ovld __cnfn convert_char3_rtp(char3);
+char3 __ovld __cnfn convert_char3_sat_rtp(char3);
+char3 __ovld __cnfn convert_char3_rtn(char3);
+char3 __ovld __cnfn convert_char3_sat_rtn(char3);
+char3 __ovld __cnfn convert_char3(char3);
+char3 __ovld __cnfn convert_char3_sat(char3);
+char3 __ovld __cnfn convert_char3_rte(uchar3);
+char3 __ovld __cnfn convert_char3_sat_rte(uchar3);
+char3 __ovld __cnfn convert_char3_rtz(uchar3);
+char3 __ovld __cnfn convert_char3_sat_rtz(uchar3);
+char3 __ovld __cnfn convert_char3_rtp(uchar3);
+char3 __ovld __cnfn convert_char3_sat_rtp(uchar3);
+char3 __ovld __cnfn convert_char3_rtn(uchar3);
+char3 __ovld __cnfn convert_char3_sat_rtn(uchar3);
+char3 __ovld __cnfn convert_char3(uchar3);
+char3 __ovld __cnfn convert_char3_sat(uchar3);
+char3 __ovld __cnfn convert_char3_rte(short3);
+char3 __ovld __cnfn convert_char3_sat_rte(short3);
+char3 __ovld __cnfn convert_char3_rtz(short3);
+char3 __ovld __cnfn convert_char3_sat_rtz(short3);
+char3 __ovld __cnfn convert_char3_rtp(short3);
+char3 __ovld __cnfn convert_char3_sat_rtp(short3);
+char3 __ovld __cnfn convert_char3_rtn(short3);
+char3 __ovld __cnfn convert_char3_sat_rtn(short3);
+char3 __ovld __cnfn convert_char3(short3);
+char3 __ovld __cnfn convert_char3_sat(short3);
+char3 __ovld __cnfn convert_char3_rte(ushort3);
+char3 __ovld __cnfn convert_char3_sat_rte(ushort3);
+char3 __ovld __cnfn convert_char3_rtz(ushort3);
+char3 __ovld __cnfn convert_char3_sat_rtz(ushort3);
+char3 __ovld __cnfn convert_char3_rtp(ushort3);
+char3 __ovld __cnfn convert_char3_sat_rtp(ushort3);
+char3 __ovld __cnfn convert_char3_rtn(ushort3);
+char3 __ovld __cnfn convert_char3_sat_rtn(ushort3);
+char3 __ovld __cnfn convert_char3(ushort3);
+char3 __ovld __cnfn convert_char3_sat(ushort3);
+char3 __ovld __cnfn convert_char3_rte(int3);
+char3 __ovld __cnfn convert_char3_sat_rte(int3);
+char3 __ovld __cnfn convert_char3_rtz(int3);
+char3 __ovld __cnfn convert_char3_sat_rtz(int3);
+char3 __ovld __cnfn convert_char3_rtp(int3);
+char3 __ovld __cnfn convert_char3_sat_rtp(int3);
+char3 __ovld __cnfn convert_char3_rtn(int3);
+char3 __ovld __cnfn convert_char3_sat_rtn(int3);
+char3 __ovld __cnfn convert_char3(int3);
+char3 __ovld __cnfn convert_char3_sat(int3);
+char3 __ovld __cnfn convert_char3_rte(uint3);
+char3 __ovld __cnfn convert_char3_sat_rte(uint3);
+char3 __ovld __cnfn convert_char3_rtz(uint3);
+char3 __ovld __cnfn convert_char3_sat_rtz(uint3);
+char3 __ovld __cnfn convert_char3_rtp(uint3);
+char3 __ovld __cnfn convert_char3_sat_rtp(uint3);
+char3 __ovld __cnfn convert_char3_rtn(uint3);
+char3 __ovld __cnfn convert_char3_sat_rtn(uint3);
+char3 __ovld __cnfn convert_char3(uint3);
+char3 __ovld __cnfn convert_char3_sat(uint3);
+char3 __ovld __cnfn convert_char3_rte(long3);
+char3 __ovld __cnfn convert_char3_sat_rte(long3);
+char3 __ovld __cnfn convert_char3_rtz(long3);
+char3 __ovld __cnfn convert_char3_sat_rtz(long3);
+char3 __ovld __cnfn convert_char3_rtp(long3);
+char3 __ovld __cnfn convert_char3_sat_rtp(long3);
+char3 __ovld __cnfn convert_char3_rtn(long3);
+char3 __ovld __cnfn convert_char3_sat_rtn(long3);
+char3 __ovld __cnfn convert_char3(long3);
+char3 __ovld __cnfn convert_char3_sat(long3);
+char3 __ovld __cnfn convert_char3_rte(ulong3);
+char3 __ovld __cnfn convert_char3_sat_rte(ulong3);
+char3 __ovld __cnfn convert_char3_rtz(ulong3);
+char3 __ovld __cnfn convert_char3_sat_rtz(ulong3);
+char3 __ovld __cnfn convert_char3_rtp(ulong3);
+char3 __ovld __cnfn convert_char3_sat_rtp(ulong3);
+char3 __ovld __cnfn convert_char3_rtn(ulong3);
+char3 __ovld __cnfn convert_char3_sat_rtn(ulong3);
+char3 __ovld __cnfn convert_char3(ulong3);
+char3 __ovld __cnfn convert_char3_sat(ulong3);
+char3 __ovld __cnfn convert_char3_rte(float3);
+char3 __ovld __cnfn convert_char3_sat_rte(float3);
+char3 __ovld __cnfn convert_char3_rtz(float3);
+char3 __ovld __cnfn convert_char3_sat_rtz(float3);
+char3 __ovld __cnfn convert_char3_rtp(float3);
+char3 __ovld __cnfn convert_char3_sat_rtp(float3);
+char3 __ovld __cnfn convert_char3_rtn(float3);
+char3 __ovld __cnfn convert_char3_sat_rtn(float3);
+char3 __ovld __cnfn convert_char3(float3);
+char3 __ovld __cnfn convert_char3_sat(float3);
+uchar3 __ovld __cnfn convert_uchar3_rte(char3);
+uchar3 __ovld __cnfn convert_uchar3_sat_rte(char3);
+uchar3 __ovld __cnfn convert_uchar3_rtz(char3);
+uchar3 __ovld __cnfn convert_uchar3_sat_rtz(char3);
+uchar3 __ovld __cnfn convert_uchar3_rtp(char3);
+uchar3 __ovld __cnfn convert_uchar3_sat_rtp(char3);
+uchar3 __ovld __cnfn convert_uchar3_rtn(char3);
+uchar3 __ovld __cnfn convert_uchar3_sat_rtn(char3);
+uchar3 __ovld __cnfn convert_uchar3(char3);
+uchar3 __ovld __cnfn convert_uchar3_sat(char3);
+uchar3 __ovld __cnfn convert_uchar3_rte(uchar3);
+uchar3 __ovld __cnfn convert_uchar3_sat_rte(uchar3);
+uchar3 __ovld __cnfn convert_uchar3_rtz(uchar3);
+uchar3 __ovld __cnfn convert_uchar3_sat_rtz(uchar3);
+uchar3 __ovld __cnfn convert_uchar3_rtp(uchar3);
+uchar3 __ovld __cnfn convert_uchar3_sat_rtp(uchar3);
+uchar3 __ovld __cnfn convert_uchar3_rtn(uchar3);
+uchar3 __ovld __cnfn convert_uchar3_sat_rtn(uchar3);
+uchar3 __ovld __cnfn convert_uchar3(uchar3);
+uchar3 __ovld __cnfn convert_uchar3_sat(uchar3);
+uchar3 __ovld __cnfn convert_uchar3_rte(short3);
+uchar3 __ovld __cnfn convert_uchar3_sat_rte(short3);
+uchar3 __ovld __cnfn convert_uchar3_rtz(short3);
+uchar3 __ovld __cnfn convert_uchar3_sat_rtz(short3);
+uchar3 __ovld __cnfn convert_uchar3_rtp(short3);
+uchar3 __ovld __cnfn convert_uchar3_sat_rtp(short3);
+uchar3 __ovld __cnfn convert_uchar3_rtn(short3);
+uchar3 __ovld __cnfn convert_uchar3_sat_rtn(short3);
+uchar3 __ovld __cnfn convert_uchar3(short3);
+uchar3 __ovld __cnfn convert_uchar3_sat(short3);
+uchar3 __ovld __cnfn convert_uchar3_rte(ushort3);
+uchar3 __ovld __cnfn convert_uchar3_sat_rte(ushort3);
+uchar3 __ovld __cnfn convert_uchar3_rtz(ushort3);
+uchar3 __ovld __cnfn convert_uchar3_sat_rtz(ushort3);
+uchar3 __ovld __cnfn convert_uchar3_rtp(ushort3);
+uchar3 __ovld __cnfn convert_uchar3_sat_rtp(ushort3);
+uchar3 __ovld __cnfn convert_uchar3_rtn(ushort3);
+uchar3 __ovld __cnfn convert_uchar3_sat_rtn(ushort3);
+uchar3 __ovld __cnfn convert_uchar3(ushort3);
+uchar3 __ovld __cnfn convert_uchar3_sat(ushort3);
+uchar3 __ovld __cnfn convert_uchar3_rte(int3);
+uchar3 __ovld __cnfn convert_uchar3_sat_rte(int3);
+uchar3 __ovld __cnfn convert_uchar3_rtz(int3);
+uchar3 __ovld __cnfn convert_uchar3_sat_rtz(int3);
+uchar3 __ovld __cnfn convert_uchar3_rtp(int3);
+uchar3 __ovld __cnfn convert_uchar3_sat_rtp(int3);
+uchar3 __ovld __cnfn convert_uchar3_rtn(int3);
+uchar3 __ovld __cnfn convert_uchar3_sat_rtn(int3);
+uchar3 __ovld __cnfn convert_uchar3(int3);
+uchar3 __ovld __cnfn convert_uchar3_sat(int3);
+uchar3 __ovld __cnfn convert_uchar3_rte(uint3);
+uchar3 __ovld __cnfn convert_uchar3_sat_rte(uint3);
+uchar3 __ovld __cnfn convert_uchar3_rtz(uint3);
+uchar3 __ovld __cnfn convert_uchar3_sat_rtz(uint3);
+uchar3 __ovld __cnfn convert_uchar3_rtp(uint3);
+uchar3 __ovld __cnfn convert_uchar3_sat_rtp(uint3);
+uchar3 __ovld __cnfn convert_uchar3_rtn(uint3);
+uchar3 __ovld __cnfn convert_uchar3_sat_rtn(uint3);
+uchar3 __ovld __cnfn convert_uchar3(uint3);
+uchar3 __ovld __cnfn convert_uchar3_sat(uint3);
+uchar3 __ovld __cnfn convert_uchar3_rte(long3);
+uchar3 __ovld __cnfn convert_uchar3_sat_rte(long3);
+uchar3 __ovld __cnfn convert_uchar3_rtz(long3);
+uchar3 __ovld __cnfn convert_uchar3_sat_rtz(long3);
+uchar3 __ovld __cnfn convert_uchar3_rtp(long3);
+uchar3 __ovld __cnfn convert_uchar3_sat_rtp(long3);
+uchar3 __ovld __cnfn convert_uchar3_rtn(long3);
+uchar3 __ovld __cnfn convert_uchar3_sat_rtn(long3);
+uchar3 __ovld __cnfn convert_uchar3(long3);
+uchar3 __ovld __cnfn convert_uchar3_sat(long3);
+uchar3 __ovld __cnfn convert_uchar3_rte(ulong3);
+uchar3 __ovld __cnfn convert_uchar3_sat_rte(ulong3);
+uchar3 __ovld __cnfn convert_uchar3_rtz(ulong3);
+uchar3 __ovld __cnfn convert_uchar3_sat_rtz(ulong3);
+uchar3 __ovld __cnfn convert_uchar3_rtp(ulong3);
+uchar3 __ovld __cnfn convert_uchar3_sat_rtp(ulong3);
+uchar3 __ovld __cnfn convert_uchar3_rtn(ulong3);
+uchar3 __ovld __cnfn convert_uchar3_sat_rtn(ulong3);
+uchar3 __ovld __cnfn convert_uchar3(ulong3);
+uchar3 __ovld __cnfn convert_uchar3_sat(ulong3);
+uchar3 __ovld __cnfn convert_uchar3_rte(float3);
+uchar3 __ovld __cnfn convert_uchar3_sat_rte(float3);
+uchar3 __ovld __cnfn convert_uchar3_rtz(float3);
+uchar3 __ovld __cnfn convert_uchar3_sat_rtz(float3);
+uchar3 __ovld __cnfn convert_uchar3_rtp(float3);
+uchar3 __ovld __cnfn convert_uchar3_sat_rtp(float3);
+uchar3 __ovld __cnfn convert_uchar3_rtn(float3);
+uchar3 __ovld __cnfn convert_uchar3_sat_rtn(float3);
+uchar3 __ovld __cnfn convert_uchar3(float3);
+uchar3 __ovld __cnfn convert_uchar3_sat(float3);
+short3 __ovld __cnfn convert_short3_rte(char3);
+short3 __ovld __cnfn convert_short3_sat_rte(char3);
+short3 __ovld __cnfn convert_short3_rtz(char3);
+short3 __ovld __cnfn convert_short3_sat_rtz(char3);
+short3 __ovld __cnfn convert_short3_rtp(char3);
+short3 __ovld __cnfn convert_short3_sat_rtp(char3);
+short3 __ovld __cnfn convert_short3_rtn(char3);
+short3 __ovld __cnfn convert_short3_sat_rtn(char3);
+short3 __ovld __cnfn convert_short3(char3);
+short3 __ovld __cnfn convert_short3_sat(char3);
+short3 __ovld __cnfn convert_short3_rte(uchar3);
+short3 __ovld __cnfn convert_short3_sat_rte(uchar3);
+short3 __ovld __cnfn convert_short3_rtz(uchar3);
+short3 __ovld __cnfn convert_short3_sat_rtz(uchar3);
+short3 __ovld __cnfn convert_short3_rtp(uchar3);
+short3 __ovld __cnfn convert_short3_sat_rtp(uchar3);
+short3 __ovld __cnfn convert_short3_rtn(uchar3);
+short3 __ovld __cnfn convert_short3_sat_rtn(uchar3);
+short3 __ovld __cnfn convert_short3(uchar3);
+short3 __ovld __cnfn convert_short3_sat(uchar3);
+short3 __ovld __cnfn convert_short3_rte(short3);
+short3 __ovld __cnfn convert_short3_sat_rte(short3);
+short3 __ovld __cnfn convert_short3_rtz(short3);
+short3 __ovld __cnfn convert_short3_sat_rtz(short3);
+short3 __ovld __cnfn convert_short3_rtp(short3);
+short3 __ovld __cnfn convert_short3_sat_rtp(short3);
+short3 __ovld __cnfn convert_short3_rtn(short3);
+short3 __ovld __cnfn convert_short3_sat_rtn(short3);
+short3 __ovld __cnfn convert_short3(short3);
+short3 __ovld __cnfn convert_short3_sat(short3);
+short3 __ovld __cnfn convert_short3_rte(ushort3);
+short3 __ovld __cnfn convert_short3_sat_rte(ushort3);
+short3 __ovld __cnfn convert_short3_rtz(ushort3);
+short3 __ovld __cnfn convert_short3_sat_rtz(ushort3);
+short3 __ovld __cnfn convert_short3_rtp(ushort3);
+short3 __ovld __cnfn convert_short3_sat_rtp(ushort3);
+short3 __ovld __cnfn convert_short3_rtn(ushort3);
+short3 __ovld __cnfn convert_short3_sat_rtn(ushort3);
+short3 __ovld __cnfn convert_short3(ushort3);
+short3 __ovld __cnfn convert_short3_sat(ushort3);
+short3 __ovld __cnfn convert_short3_rte(int3);
+short3 __ovld __cnfn convert_short3_sat_rte(int3);
+short3 __ovld __cnfn convert_short3_rtz(int3);
+short3 __ovld __cnfn convert_short3_sat_rtz(int3);
+short3 __ovld __cnfn convert_short3_rtp(int3);
+short3 __ovld __cnfn convert_short3_sat_rtp(int3);
+short3 __ovld __cnfn convert_short3_rtn(int3);
+short3 __ovld __cnfn convert_short3_sat_rtn(int3);
+short3 __ovld __cnfn convert_short3(int3);
+short3 __ovld __cnfn convert_short3_sat(int3);
+short3 __ovld __cnfn convert_short3_rte(uint3);
+short3 __ovld __cnfn convert_short3_sat_rte(uint3);
+short3 __ovld __cnfn convert_short3_rtz(uint3);
+short3 __ovld __cnfn convert_short3_sat_rtz(uint3);
+short3 __ovld __cnfn convert_short3_rtp(uint3);
+short3 __ovld __cnfn convert_short3_sat_rtp(uint3);
+short3 __ovld __cnfn convert_short3_rtn(uint3);
+short3 __ovld __cnfn convert_short3_sat_rtn(uint3);
+short3 __ovld __cnfn convert_short3(uint3);
+short3 __ovld __cnfn convert_short3_sat(uint3);
+short3 __ovld __cnfn convert_short3_rte(long3);
+short3 __ovld __cnfn convert_short3_sat_rte(long3);
+short3 __ovld __cnfn convert_short3_rtz(long3);
+short3 __ovld __cnfn convert_short3_sat_rtz(long3);
+short3 __ovld __cnfn convert_short3_rtp(long3);
+short3 __ovld __cnfn convert_short3_sat_rtp(long3);
+short3 __ovld __cnfn convert_short3_rtn(long3);
+short3 __ovld __cnfn convert_short3_sat_rtn(long3);
+short3 __ovld __cnfn convert_short3(long3);
+short3 __ovld __cnfn convert_short3_sat(long3);
+short3 __ovld __cnfn convert_short3_rte(ulong3);
+short3 __ovld __cnfn convert_short3_sat_rte(ulong3);
+short3 __ovld __cnfn convert_short3_rtz(ulong3);
+short3 __ovld __cnfn convert_short3_sat_rtz(ulong3);
+short3 __ovld __cnfn convert_short3_rtp(ulong3);
+short3 __ovld __cnfn convert_short3_sat_rtp(ulong3);
+short3 __ovld __cnfn convert_short3_rtn(ulong3);
+short3 __ovld __cnfn convert_short3_sat_rtn(ulong3);
+short3 __ovld __cnfn convert_short3(ulong3);
+short3 __ovld __cnfn convert_short3_sat(ulong3);
+short3 __ovld __cnfn convert_short3_rte(float3);
+short3 __ovld __cnfn convert_short3_sat_rte(float3);
+short3 __ovld __cnfn convert_short3_rtz(float3);
+short3 __ovld __cnfn convert_short3_sat_rtz(float3);
+short3 __ovld __cnfn convert_short3_rtp(float3);
+short3 __ovld __cnfn convert_short3_sat_rtp(float3);
+short3 __ovld __cnfn convert_short3_rtn(float3);
+short3 __ovld __cnfn convert_short3_sat_rtn(float3);
+short3 __ovld __cnfn convert_short3(float3);
+short3 __ovld __cnfn convert_short3_sat(float3);
+ushort3 __ovld __cnfn convert_ushort3_rte(char3);
+ushort3 __ovld __cnfn convert_ushort3_sat_rte(char3);
+ushort3 __ovld __cnfn convert_ushort3_rtz(char3);
+ushort3 __ovld __cnfn convert_ushort3_sat_rtz(char3);
+ushort3 __ovld __cnfn convert_ushort3_rtp(char3);
+ushort3 __ovld __cnfn convert_ushort3_sat_rtp(char3);
+ushort3 __ovld __cnfn convert_ushort3_rtn(char3);
+ushort3 __ovld __cnfn convert_ushort3_sat_rtn(char3);
+ushort3 __ovld __cnfn convert_ushort3(char3);
+ushort3 __ovld __cnfn convert_ushort3_sat(char3);
+ushort3 __ovld __cnfn convert_ushort3_rte(uchar3);
+ushort3 __ovld __cnfn convert_ushort3_sat_rte(uchar3);
+ushort3 __ovld __cnfn convert_ushort3_rtz(uchar3);
+ushort3 __ovld __cnfn convert_ushort3_sat_rtz(uchar3);
+ushort3 __ovld __cnfn convert_ushort3_rtp(uchar3);
+ushort3 __ovld __cnfn convert_ushort3_sat_rtp(uchar3);
+ushort3 __ovld __cnfn convert_ushort3_rtn(uchar3);
+ushort3 __ovld __cnfn convert_ushort3_sat_rtn(uchar3);
+ushort3 __ovld __cnfn convert_ushort3(uchar3);
+ushort3 __ovld __cnfn convert_ushort3_sat(uchar3);
+ushort3 __ovld __cnfn convert_ushort3_rte(short3);
+ushort3 __ovld __cnfn convert_ushort3_sat_rte(short3);
+ushort3 __ovld __cnfn convert_ushort3_rtz(short3);
+ushort3 __ovld __cnfn convert_ushort3_sat_rtz(short3);
+ushort3 __ovld __cnfn convert_ushort3_rtp(short3);
+ushort3 __ovld __cnfn convert_ushort3_sat_rtp(short3);
+ushort3 __ovld __cnfn convert_ushort3_rtn(short3);
+ushort3 __ovld __cnfn convert_ushort3_sat_rtn(short3);
+ushort3 __ovld __cnfn convert_ushort3(short3);
+ushort3 __ovld __cnfn convert_ushort3_sat(short3);
+ushort3 __ovld __cnfn convert_ushort3_rte(ushort3);
+ushort3 __ovld __cnfn convert_ushort3_sat_rte(ushort3);
+ushort3 __ovld __cnfn convert_ushort3_rtz(ushort3);
+ushort3 __ovld __cnfn convert_ushort3_sat_rtz(ushort3);
+ushort3 __ovld __cnfn convert_ushort3_rtp(ushort3);
+ushort3 __ovld __cnfn convert_ushort3_sat_rtp(ushort3);
+ushort3 __ovld __cnfn convert_ushort3_rtn(ushort3);
+ushort3 __ovld __cnfn convert_ushort3_sat_rtn(ushort3);
+ushort3 __ovld __cnfn convert_ushort3(ushort3);
+ushort3 __ovld __cnfn convert_ushort3_sat(ushort3);
+ushort3 __ovld __cnfn convert_ushort3_rte(int3);
+ushort3 __ovld __cnfn convert_ushort3_sat_rte(int3);
+ushort3 __ovld __cnfn convert_ushort3_rtz(int3);
+ushort3 __ovld __cnfn convert_ushort3_sat_rtz(int3);
+ushort3 __ovld __cnfn convert_ushort3_rtp(int3);
+ushort3 __ovld __cnfn convert_ushort3_sat_rtp(int3);
+ushort3 __ovld __cnfn convert_ushort3_rtn(int3);
+ushort3 __ovld __cnfn convert_ushort3_sat_rtn(int3);
+ushort3 __ovld __cnfn convert_ushort3(int3);
+ushort3 __ovld __cnfn convert_ushort3_sat(int3);
+ushort3 __ovld __cnfn convert_ushort3_rte(uint3);
+ushort3 __ovld __cnfn convert_ushort3_sat_rte(uint3);
+ushort3 __ovld __cnfn convert_ushort3_rtz(uint3);
+ushort3 __ovld __cnfn convert_ushort3_sat_rtz(uint3);
+ushort3 __ovld __cnfn convert_ushort3_rtp(uint3);
+ushort3 __ovld __cnfn convert_ushort3_sat_rtp(uint3);
+ushort3 __ovld __cnfn convert_ushort3_rtn(uint3);
+ushort3 __ovld __cnfn convert_ushort3_sat_rtn(uint3);
+ushort3 __ovld __cnfn convert_ushort3(uint3);
+ushort3 __ovld __cnfn convert_ushort3_sat(uint3);
+ushort3 __ovld __cnfn convert_ushort3_rte(long3);
+ushort3 __ovld __cnfn convert_ushort3_sat_rte(long3);
+ushort3 __ovld __cnfn convert_ushort3_rtz(long3);
+ushort3 __ovld __cnfn convert_ushort3_sat_rtz(long3);
+ushort3 __ovld __cnfn convert_ushort3_rtp(long3);
+ushort3 __ovld __cnfn convert_ushort3_sat_rtp(long3);
+ushort3 __ovld __cnfn convert_ushort3_rtn(long3);
+ushort3 __ovld __cnfn convert_ushort3_sat_rtn(long3);
+ushort3 __ovld __cnfn convert_ushort3(long3);
+ushort3 __ovld __cnfn convert_ushort3_sat(long3);
+ushort3 __ovld __cnfn convert_ushort3_rte(ulong3);
+ushort3 __ovld __cnfn convert_ushort3_sat_rte(ulong3);
+ushort3 __ovld __cnfn convert_ushort3_rtz(ulong3);
+ushort3 __ovld __cnfn convert_ushort3_sat_rtz(ulong3);
+ushort3 __ovld __cnfn convert_ushort3_rtp(ulong3);
+ushort3 __ovld __cnfn convert_ushort3_sat_rtp(ulong3);
+ushort3 __ovld __cnfn convert_ushort3_rtn(ulong3);
+ushort3 __ovld __cnfn convert_ushort3_sat_rtn(ulong3);
+ushort3 __ovld __cnfn convert_ushort3(ulong3);
+ushort3 __ovld __cnfn convert_ushort3_sat(ulong3);
+ushort3 __ovld __cnfn convert_ushort3_rte(float3);
+ushort3 __ovld __cnfn convert_ushort3_sat_rte(float3);
+ushort3 __ovld __cnfn convert_ushort3_rtz(float3);
+ushort3 __ovld __cnfn convert_ushort3_sat_rtz(float3);
+ushort3 __ovld __cnfn convert_ushort3_rtp(float3);
+ushort3 __ovld __cnfn convert_ushort3_sat_rtp(float3);
+ushort3 __ovld __cnfn convert_ushort3_rtn(float3);
+ushort3 __ovld __cnfn convert_ushort3_sat_rtn(float3);
+ushort3 __ovld __cnfn convert_ushort3(float3);
+ushort3 __ovld __cnfn convert_ushort3_sat(float3);
+int3 __ovld __cnfn convert_int3_rte(char3);
+int3 __ovld __cnfn convert_int3_sat_rte(char3);
+int3 __ovld __cnfn convert_int3_rtz(char3);
+int3 __ovld __cnfn convert_int3_sat_rtz(char3);
+int3 __ovld __cnfn convert_int3_rtp(char3);
+int3 __ovld __cnfn convert_int3_sat_rtp(char3);
+int3 __ovld __cnfn convert_int3_rtn(char3);
+int3 __ovld __cnfn convert_int3_sat_rtn(char3);
+int3 __ovld __cnfn convert_int3(char3);
+int3 __ovld __cnfn convert_int3_sat(char3);
+int3 __ovld __cnfn convert_int3_rte(uchar3);
+int3 __ovld __cnfn convert_int3_sat_rte(uchar3);
+int3 __ovld __cnfn convert_int3_rtz(uchar3);
+int3 __ovld __cnfn convert_int3_sat_rtz(uchar3);
+int3 __ovld __cnfn convert_int3_rtp(uchar3);
+int3 __ovld __cnfn convert_int3_sat_rtp(uchar3);
+int3 __ovld __cnfn convert_int3_rtn(uchar3);
+int3 __ovld __cnfn convert_int3_sat_rtn(uchar3);
+int3 __ovld __cnfn convert_int3(uchar3);
+int3 __ovld __cnfn convert_int3_sat(uchar3);
+int3 __ovld __cnfn convert_int3_rte(short3);
+int3 __ovld __cnfn convert_int3_sat_rte(short3);
+int3 __ovld __cnfn convert_int3_rtz(short3);
+int3 __ovld __cnfn convert_int3_sat_rtz(short3);
+int3 __ovld __cnfn convert_int3_rtp(short3);
+int3 __ovld __cnfn convert_int3_sat_rtp(short3);
+int3 __ovld __cnfn convert_int3_rtn(short3);
+int3 __ovld __cnfn convert_int3_sat_rtn(short3);
+int3 __ovld __cnfn convert_int3(short3);
+int3 __ovld __cnfn convert_int3_sat(short3);
+int3 __ovld __cnfn convert_int3_rte(ushort3);
+int3 __ovld __cnfn convert_int3_sat_rte(ushort3);
+int3 __ovld __cnfn convert_int3_rtz(ushort3);
+int3 __ovld __cnfn convert_int3_sat_rtz(ushort3);
+int3 __ovld __cnfn convert_int3_rtp(ushort3);
+int3 __ovld __cnfn convert_int3_sat_rtp(ushort3);
+int3 __ovld __cnfn convert_int3_rtn(ushort3);
+int3 __ovld __cnfn convert_int3_sat_rtn(ushort3);
+int3 __ovld __cnfn convert_int3(ushort3);
+int3 __ovld __cnfn convert_int3_sat(ushort3);
+int3 __ovld __cnfn convert_int3_rte(int3);
+int3 __ovld __cnfn convert_int3_sat_rte(int3);
+int3 __ovld __cnfn convert_int3_rtz(int3);
+int3 __ovld __cnfn convert_int3_sat_rtz(int3);
+int3 __ovld __cnfn convert_int3_rtp(int3);
+int3 __ovld __cnfn convert_int3_sat_rtp(int3);
+int3 __ovld __cnfn convert_int3_rtn(int3);
+int3 __ovld __cnfn convert_int3_sat_rtn(int3);
+int3 __ovld __cnfn convert_int3(int3);
+int3 __ovld __cnfn convert_int3_sat(int3);
+int3 __ovld __cnfn convert_int3_rte(uint3);
+int3 __ovld __cnfn convert_int3_sat_rte(uint3);
+int3 __ovld __cnfn convert_int3_rtz(uint3);
+int3 __ovld __cnfn convert_int3_sat_rtz(uint3);
+int3 __ovld __cnfn convert_int3_rtp(uint3);
+int3 __ovld __cnfn convert_int3_sat_rtp(uint3);
+int3 __ovld __cnfn convert_int3_rtn(uint3);
+int3 __ovld __cnfn convert_int3_sat_rtn(uint3);
+int3 __ovld __cnfn convert_int3(uint3);
+int3 __ovld __cnfn convert_int3_sat(uint3);
+int3 __ovld __cnfn convert_int3_rte(long3);
+int3 __ovld __cnfn convert_int3_sat_rte(long3);
+int3 __ovld __cnfn convert_int3_rtz(long3);
+int3 __ovld __cnfn convert_int3_sat_rtz(long3);
+int3 __ovld __cnfn convert_int3_rtp(long3);
+int3 __ovld __cnfn convert_int3_sat_rtp(long3);
+int3 __ovld __cnfn convert_int3_rtn(long3);
+int3 __ovld __cnfn convert_int3_sat_rtn(long3);
+int3 __ovld __cnfn convert_int3(long3);
+int3 __ovld __cnfn convert_int3_sat(long3);
+int3 __ovld __cnfn convert_int3_rte(ulong3);
+int3 __ovld __cnfn convert_int3_sat_rte(ulong3);
+int3 __ovld __cnfn convert_int3_rtz(ulong3);
+int3 __ovld __cnfn convert_int3_sat_rtz(ulong3);
+int3 __ovld __cnfn convert_int3_rtp(ulong3);
+int3 __ovld __cnfn convert_int3_sat_rtp(ulong3);
+int3 __ovld __cnfn convert_int3_rtn(ulong3);
+int3 __ovld __cnfn convert_int3_sat_rtn(ulong3);
+int3 __ovld __cnfn convert_int3(ulong3);
+int3 __ovld __cnfn convert_int3_sat(ulong3);
+int3 __ovld __cnfn convert_int3_rte(float3);
+int3 __ovld __cnfn convert_int3_sat_rte(float3);
+int3 __ovld __cnfn convert_int3_rtz(float3);
+int3 __ovld __cnfn convert_int3_sat_rtz(float3);
+int3 __ovld __cnfn convert_int3_rtp(float3);
+int3 __ovld __cnfn convert_int3_sat_rtp(float3);
+int3 __ovld __cnfn convert_int3_rtn(float3);
+int3 __ovld __cnfn convert_int3_sat_rtn(float3);
+int3 __ovld __cnfn convert_int3(float3);
+int3 __ovld __cnfn convert_int3_sat(float3);
+uint3 __ovld __cnfn convert_uint3_rte(char3);
+uint3 __ovld __cnfn convert_uint3_sat_rte(char3);
+uint3 __ovld __cnfn convert_uint3_rtz(char3);
+uint3 __ovld __cnfn convert_uint3_sat_rtz(char3);
+uint3 __ovld __cnfn convert_uint3_rtp(char3);
+uint3 __ovld __cnfn convert_uint3_sat_rtp(char3);
+uint3 __ovld __cnfn convert_uint3_rtn(char3);
+uint3 __ovld __cnfn convert_uint3_sat_rtn(char3);
+uint3 __ovld __cnfn convert_uint3(char3);
+uint3 __ovld __cnfn convert_uint3_sat(char3);
+uint3 __ovld __cnfn convert_uint3_rte(uchar3);
+uint3 __ovld __cnfn convert_uint3_sat_rte(uchar3);
+uint3 __ovld __cnfn convert_uint3_rtz(uchar3);
+uint3 __ovld __cnfn convert_uint3_sat_rtz(uchar3);
+uint3 __ovld __cnfn convert_uint3_rtp(uchar3);
+uint3 __ovld __cnfn convert_uint3_sat_rtp(uchar3);
+uint3 __ovld __cnfn convert_uint3_rtn(uchar3);
+uint3 __ovld __cnfn convert_uint3_sat_rtn(uchar3);
+uint3 __ovld __cnfn convert_uint3(uchar3);
+uint3 __ovld __cnfn convert_uint3_sat(uchar3);
+uint3 __ovld __cnfn convert_uint3_rte(short3);
+uint3 __ovld __cnfn convert_uint3_sat_rte(short3);
+uint3 __ovld __cnfn convert_uint3_rtz(short3);
+uint3 __ovld __cnfn convert_uint3_sat_rtz(short3);
+uint3 __ovld __cnfn convert_uint3_rtp(short3);
+uint3 __ovld __cnfn convert_uint3_sat_rtp(short3);
+uint3 __ovld __cnfn convert_uint3_rtn(short3);
+uint3 __ovld __cnfn convert_uint3_sat_rtn(short3);
+uint3 __ovld __cnfn convert_uint3(short3);
+uint3 __ovld __cnfn convert_uint3_sat(short3);
+uint3 __ovld __cnfn convert_uint3_rte(ushort3);
+uint3 __ovld __cnfn convert_uint3_sat_rte(ushort3);
+uint3 __ovld __cnfn convert_uint3_rtz(ushort3);
+uint3 __ovld __cnfn convert_uint3_sat_rtz(ushort3);
+uint3 __ovld __cnfn convert_uint3_rtp(ushort3);
+uint3 __ovld __cnfn convert_uint3_sat_rtp(ushort3);
+uint3 __ovld __cnfn convert_uint3_rtn(ushort3);
+uint3 __ovld __cnfn convert_uint3_sat_rtn(ushort3);
+uint3 __ovld __cnfn convert_uint3(ushort3);
+uint3 __ovld __cnfn convert_uint3_sat(ushort3);
+uint3 __ovld __cnfn convert_uint3_rte(int3);
+uint3 __ovld __cnfn convert_uint3_sat_rte(int3);
+uint3 __ovld __cnfn convert_uint3_rtz(int3);
+uint3 __ovld __cnfn convert_uint3_sat_rtz(int3);
+uint3 __ovld __cnfn convert_uint3_rtp(int3);
+uint3 __ovld __cnfn convert_uint3_sat_rtp(int3);
+uint3 __ovld __cnfn convert_uint3_rtn(int3);
+uint3 __ovld __cnfn convert_uint3_sat_rtn(int3);
+uint3 __ovld __cnfn convert_uint3(int3);
+uint3 __ovld __cnfn convert_uint3_sat(int3);
+uint3 __ovld __cnfn convert_uint3_rte(uint3);
+uint3 __ovld __cnfn convert_uint3_sat_rte(uint3);
+uint3 __ovld __cnfn convert_uint3_rtz(uint3);
+uint3 __ovld __cnfn convert_uint3_sat_rtz(uint3);
+uint3 __ovld __cnfn convert_uint3_rtp(uint3);
+uint3 __ovld __cnfn convert_uint3_sat_rtp(uint3);
+uint3 __ovld __cnfn convert_uint3_rtn(uint3);
+uint3 __ovld __cnfn convert_uint3_sat_rtn(uint3);
+uint3 __ovld __cnfn convert_uint3(uint3);
+uint3 __ovld __cnfn convert_uint3_sat(uint3);
+uint3 __ovld __cnfn convert_uint3_rte(long3);
+uint3 __ovld __cnfn convert_uint3_sat_rte(long3);
+uint3 __ovld __cnfn convert_uint3_rtz(long3);
+uint3 __ovld __cnfn convert_uint3_sat_rtz(long3);
+uint3 __ovld __cnfn convert_uint3_rtp(long3);
+uint3 __ovld __cnfn convert_uint3_sat_rtp(long3);
+uint3 __ovld __cnfn convert_uint3_rtn(long3);
+uint3 __ovld __cnfn convert_uint3_sat_rtn(long3);
+uint3 __ovld __cnfn convert_uint3(long3);
+uint3 __ovld __cnfn convert_uint3_sat(long3);
+uint3 __ovld __cnfn convert_uint3_rte(ulong3);
+uint3 __ovld __cnfn convert_uint3_sat_rte(ulong3);
+uint3 __ovld __cnfn convert_uint3_rtz(ulong3);
+uint3 __ovld __cnfn convert_uint3_sat_rtz(ulong3);
+uint3 __ovld __cnfn convert_uint3_rtp(ulong3);
+uint3 __ovld __cnfn convert_uint3_sat_rtp(ulong3);
+uint3 __ovld __cnfn convert_uint3_rtn(ulong3);
+uint3 __ovld __cnfn convert_uint3_sat_rtn(ulong3);
+uint3 __ovld __cnfn convert_uint3(ulong3);
+uint3 __ovld __cnfn convert_uint3_sat(ulong3);
+uint3 __ovld __cnfn convert_uint3_rte(float3);
+uint3 __ovld __cnfn convert_uint3_sat_rte(float3);
+uint3 __ovld __cnfn convert_uint3_rtz(float3);
+uint3 __ovld __cnfn convert_uint3_sat_rtz(float3);
+uint3 __ovld __cnfn convert_uint3_rtp(float3);
+uint3 __ovld __cnfn convert_uint3_sat_rtp(float3);
+uint3 __ovld __cnfn convert_uint3_rtn(float3);
+uint3 __ovld __cnfn convert_uint3_sat_rtn(float3);
+uint3 __ovld __cnfn convert_uint3(float3);
+uint3 __ovld __cnfn convert_uint3_sat(float3);
+long3 __ovld __cnfn convert_long3_rte(char3);
+long3 __ovld __cnfn convert_long3_sat_rte(char3);
+long3 __ovld __cnfn convert_long3_rtz(char3);
+long3 __ovld __cnfn convert_long3_sat_rtz(char3);
+long3 __ovld __cnfn convert_long3_rtp(char3);
+long3 __ovld __cnfn convert_long3_sat_rtp(char3);
+long3 __ovld __cnfn convert_long3_rtn(char3);
+long3 __ovld __cnfn convert_long3_sat_rtn(char3);
+long3 __ovld __cnfn convert_long3(char3);
+long3 __ovld __cnfn convert_long3_sat(char3);
+long3 __ovld __cnfn convert_long3_rte(uchar3);
+long3 __ovld __cnfn convert_long3_sat_rte(uchar3);
+long3 __ovld __cnfn convert_long3_rtz(uchar3);
+long3 __ovld __cnfn convert_long3_sat_rtz(uchar3);
+long3 __ovld __cnfn convert_long3_rtp(uchar3);
+long3 __ovld __cnfn convert_long3_sat_rtp(uchar3);
+long3 __ovld __cnfn convert_long3_rtn(uchar3);
+long3 __ovld __cnfn convert_long3_sat_rtn(uchar3);
+long3 __ovld __cnfn convert_long3(uchar3);
+long3 __ovld __cnfn convert_long3_sat(uchar3);
+long3 __ovld __cnfn convert_long3_rte(short3);
+long3 __ovld __cnfn convert_long3_sat_rte(short3);
+long3 __ovld __cnfn convert_long3_rtz(short3);
+long3 __ovld __cnfn convert_long3_sat_rtz(short3);
+long3 __ovld __cnfn convert_long3_rtp(short3);
+long3 __ovld __cnfn convert_long3_sat_rtp(short3);
+long3 __ovld __cnfn convert_long3_rtn(short3);
+long3 __ovld __cnfn convert_long3_sat_rtn(short3);
+long3 __ovld __cnfn convert_long3(short3);
+long3 __ovld __cnfn convert_long3_sat(short3);
+long3 __ovld __cnfn convert_long3_rte(ushort3);
+long3 __ovld __cnfn convert_long3_sat_rte(ushort3);
+long3 __ovld __cnfn convert_long3_rtz(ushort3);
+long3 __ovld __cnfn convert_long3_sat_rtz(ushort3);
+long3 __ovld __cnfn convert_long3_rtp(ushort3);
+long3 __ovld __cnfn convert_long3_sat_rtp(ushort3);
+long3 __ovld __cnfn convert_long3_rtn(ushort3);
+long3 __ovld __cnfn convert_long3_sat_rtn(ushort3);
+long3 __ovld __cnfn convert_long3(ushort3);
+long3 __ovld __cnfn convert_long3_sat(ushort3);
+long3 __ovld __cnfn convert_long3_rte(int3);
+long3 __ovld __cnfn convert_long3_sat_rte(int3);
+long3 __ovld __cnfn convert_long3_rtz(int3);
+long3 __ovld __cnfn convert_long3_sat_rtz(int3);
+long3 __ovld __cnfn convert_long3_rtp(int3);
+long3 __ovld __cnfn convert_long3_sat_rtp(int3);
+long3 __ovld __cnfn convert_long3_rtn(int3);
+long3 __ovld __cnfn convert_long3_sat_rtn(int3);
+long3 __ovld __cnfn convert_long3(int3);
+long3 __ovld __cnfn convert_long3_sat(int3);
+long3 __ovld __cnfn convert_long3_rte(uint3);
+long3 __ovld __cnfn convert_long3_sat_rte(uint3);
+long3 __ovld __cnfn convert_long3_rtz(uint3);
+long3 __ovld __cnfn convert_long3_sat_rtz(uint3);
+long3 __ovld __cnfn convert_long3_rtp(uint3);
+long3 __ovld __cnfn convert_long3_sat_rtp(uint3);
+long3 __ovld __cnfn convert_long3_rtn(uint3);
+long3 __ovld __cnfn convert_long3_sat_rtn(uint3);
+long3 __ovld __cnfn convert_long3(uint3);
+long3 __ovld __cnfn convert_long3_sat(uint3);
+long3 __ovld __cnfn convert_long3_rte(long3);
+long3 __ovld __cnfn convert_long3_sat_rte(long3);
+long3 __ovld __cnfn convert_long3_rtz(long3);
+long3 __ovld __cnfn convert_long3_sat_rtz(long3);
+long3 __ovld __cnfn convert_long3_rtp(long3);
+long3 __ovld __cnfn convert_long3_sat_rtp(long3);
+long3 __ovld __cnfn convert_long3_rtn(long3);
+long3 __ovld __cnfn convert_long3_sat_rtn(long3);
+long3 __ovld __cnfn convert_long3(long3);
+long3 __ovld __cnfn convert_long3_sat(long3);
+long3 __ovld __cnfn convert_long3_rte(ulong3);
+long3 __ovld __cnfn convert_long3_sat_rte(ulong3);
+long3 __ovld __cnfn convert_long3_rtz(ulong3);
+long3 __ovld __cnfn convert_long3_sat_rtz(ulong3);
+long3 __ovld __cnfn convert_long3_rtp(ulong3);
+long3 __ovld __cnfn convert_long3_sat_rtp(ulong3);
+long3 __ovld __cnfn convert_long3_rtn(ulong3);
+long3 __ovld __cnfn convert_long3_sat_rtn(ulong3);
+long3 __ovld __cnfn convert_long3(ulong3);
+long3 __ovld __cnfn convert_long3_sat(ulong3);
+long3 __ovld __cnfn convert_long3_rte(float3);
+long3 __ovld __cnfn convert_long3_sat_rte(float3);
+long3 __ovld __cnfn convert_long3_rtz(float3);
+long3 __ovld __cnfn convert_long3_sat_rtz(float3);
+long3 __ovld __cnfn convert_long3_rtp(float3);
+long3 __ovld __cnfn convert_long3_sat_rtp(float3);
+long3 __ovld __cnfn convert_long3_rtn(float3);
+long3 __ovld __cnfn convert_long3_sat_rtn(float3);
+long3 __ovld __cnfn convert_long3(float3);
+long3 __ovld __cnfn convert_long3_sat(float3);
+ulong3 __ovld __cnfn convert_ulong3_rte(char3);
+ulong3 __ovld __cnfn convert_ulong3_sat_rte(char3);
+ulong3 __ovld __cnfn convert_ulong3_rtz(char3);
+ulong3 __ovld __cnfn convert_ulong3_sat_rtz(char3);
+ulong3 __ovld __cnfn convert_ulong3_rtp(char3);
+ulong3 __ovld __cnfn convert_ulong3_sat_rtp(char3);
+ulong3 __ovld __cnfn convert_ulong3_rtn(char3);
+ulong3 __ovld __cnfn convert_ulong3_sat_rtn(char3);
+ulong3 __ovld __cnfn convert_ulong3(char3);
+ulong3 __ovld __cnfn convert_ulong3_sat(char3);
+ulong3 __ovld __cnfn convert_ulong3_rte(uchar3);
+ulong3 __ovld __cnfn convert_ulong3_sat_rte(uchar3);
+ulong3 __ovld __cnfn convert_ulong3_rtz(uchar3);
+ulong3 __ovld __cnfn convert_ulong3_sat_rtz(uchar3);
+ulong3 __ovld __cnfn convert_ulong3_rtp(uchar3);
+ulong3 __ovld __cnfn convert_ulong3_sat_rtp(uchar3);
+ulong3 __ovld __cnfn convert_ulong3_rtn(uchar3);
+ulong3 __ovld __cnfn convert_ulong3_sat_rtn(uchar3);
+ulong3 __ovld __cnfn convert_ulong3(uchar3);
+ulong3 __ovld __cnfn convert_ulong3_sat(uchar3);
+ulong3 __ovld __cnfn convert_ulong3_rte(short3);
+ulong3 __ovld __cnfn convert_ulong3_sat_rte(short3);
+ulong3 __ovld __cnfn convert_ulong3_rtz(short3);
+ulong3 __ovld __cnfn convert_ulong3_sat_rtz(short3);
+ulong3 __ovld __cnfn convert_ulong3_rtp(short3);
+ulong3 __ovld __cnfn convert_ulong3_sat_rtp(short3);
+ulong3 __ovld __cnfn convert_ulong3_rtn(short3);
+ulong3 __ovld __cnfn convert_ulong3_sat_rtn(short3);
+ulong3 __ovld __cnfn convert_ulong3(short3);
+ulong3 __ovld __cnfn convert_ulong3_sat(short3);
+ulong3 __ovld __cnfn convert_ulong3_rte(ushort3);
+ulong3 __ovld __cnfn convert_ulong3_sat_rte(ushort3);
+ulong3 __ovld __cnfn convert_ulong3_rtz(ushort3);
+ulong3 __ovld __cnfn convert_ulong3_sat_rtz(ushort3);
+ulong3 __ovld __cnfn convert_ulong3_rtp(ushort3);
+ulong3 __ovld __cnfn convert_ulong3_sat_rtp(ushort3);
+ulong3 __ovld __cnfn convert_ulong3_rtn(ushort3);
+ulong3 __ovld __cnfn convert_ulong3_sat_rtn(ushort3);
+ulong3 __ovld __cnfn convert_ulong3(ushort3);
+ulong3 __ovld __cnfn convert_ulong3_sat(ushort3);
+ulong3 __ovld __cnfn convert_ulong3_rte(int3);
+ulong3 __ovld __cnfn convert_ulong3_sat_rte(int3);
+ulong3 __ovld __cnfn convert_ulong3_rtz(int3);
+ulong3 __ovld __cnfn convert_ulong3_sat_rtz(int3);
+ulong3 __ovld __cnfn convert_ulong3_rtp(int3);
+ulong3 __ovld __cnfn convert_ulong3_sat_rtp(int3);
+ulong3 __ovld __cnfn convert_ulong3_rtn(int3);
+ulong3 __ovld __cnfn convert_ulong3_sat_rtn(int3);
+ulong3 __ovld __cnfn convert_ulong3(int3);
+ulong3 __ovld __cnfn convert_ulong3_sat(int3);
+ulong3 __ovld __cnfn convert_ulong3_rte(uint3);
+ulong3 __ovld __cnfn convert_ulong3_sat_rte(uint3);
+ulong3 __ovld __cnfn convert_ulong3_rtz(uint3);
+ulong3 __ovld __cnfn convert_ulong3_sat_rtz(uint3);
+ulong3 __ovld __cnfn convert_ulong3_rtp(uint3);
+ulong3 __ovld __cnfn convert_ulong3_sat_rtp(uint3);
+ulong3 __ovld __cnfn convert_ulong3_rtn(uint3);
+ulong3 __ovld __cnfn convert_ulong3_sat_rtn(uint3);
+ulong3 __ovld __cnfn convert_ulong3(uint3);
+ulong3 __ovld __cnfn convert_ulong3_sat(uint3);
+ulong3 __ovld __cnfn convert_ulong3_rte(long3);
+ulong3 __ovld __cnfn convert_ulong3_sat_rte(long3);
+ulong3 __ovld __cnfn convert_ulong3_rtz(long3);
+ulong3 __ovld __cnfn convert_ulong3_sat_rtz(long3);
+ulong3 __ovld __cnfn convert_ulong3_rtp(long3);
+ulong3 __ovld __cnfn convert_ulong3_sat_rtp(long3);
+ulong3 __ovld __cnfn convert_ulong3_rtn(long3);
+ulong3 __ovld __cnfn convert_ulong3_sat_rtn(long3);
+ulong3 __ovld __cnfn convert_ulong3(long3);
+ulong3 __ovld __cnfn convert_ulong3_sat(long3);
+ulong3 __ovld __cnfn convert_ulong3_rte(ulong3);
+ulong3 __ovld __cnfn convert_ulong3_sat_rte(ulong3);
+ulong3 __ovld __cnfn convert_ulong3_rtz(ulong3);
+ulong3 __ovld __cnfn convert_ulong3_sat_rtz(ulong3);
+ulong3 __ovld __cnfn convert_ulong3_rtp(ulong3);
+ulong3 __ovld __cnfn convert_ulong3_sat_rtp(ulong3);
+ulong3 __ovld __cnfn convert_ulong3_rtn(ulong3);
+ulong3 __ovld __cnfn convert_ulong3_sat_rtn(ulong3);
+ulong3 __ovld __cnfn convert_ulong3(ulong3);
+ulong3 __ovld __cnfn convert_ulong3_sat(ulong3);
+ulong3 __ovld __cnfn convert_ulong3_rte(float3);
+ulong3 __ovld __cnfn convert_ulong3_sat_rte(float3);
+ulong3 __ovld __cnfn convert_ulong3_rtz(float3);
+ulong3 __ovld __cnfn convert_ulong3_sat_rtz(float3);
+ulong3 __ovld __cnfn convert_ulong3_rtp(float3);
+ulong3 __ovld __cnfn convert_ulong3_sat_rtp(float3);
+ulong3 __ovld __cnfn convert_ulong3_rtn(float3);
+ulong3 __ovld __cnfn convert_ulong3_sat_rtn(float3);
+ulong3 __ovld __cnfn convert_ulong3(float3);
+ulong3 __ovld __cnfn convert_ulong3_sat(float3);
+float3 __ovld __cnfn convert_float3_rte(char3);
+float3 __ovld __cnfn convert_float3_rtz(char3);
+float3 __ovld __cnfn convert_float3_rtp(char3);
+float3 __ovld __cnfn convert_float3_rtn(char3);
+float3 __ovld __cnfn convert_float3(char3);
+float3 __ovld __cnfn convert_float3_rte(uchar3);
+float3 __ovld __cnfn convert_float3_rtz(uchar3);
+float3 __ovld __cnfn convert_float3_rtp(uchar3);
+float3 __ovld __cnfn convert_float3_rtn(uchar3);
+float3 __ovld __cnfn convert_float3(uchar3);
+float3 __ovld __cnfn convert_float3_rte(short3);
+float3 __ovld __cnfn convert_float3_rtz(short3);
+float3 __ovld __cnfn convert_float3_rtp(short3);
+float3 __ovld __cnfn convert_float3_rtn(short3);
+float3 __ovld __cnfn convert_float3(short3);
+float3 __ovld __cnfn convert_float3_rte(ushort3);
+float3 __ovld __cnfn convert_float3_rtz(ushort3);
+float3 __ovld __cnfn convert_float3_rtp(ushort3);
+float3 __ovld __cnfn convert_float3_rtn(ushort3);
+float3 __ovld __cnfn convert_float3(ushort3);
+float3 __ovld __cnfn convert_float3_rte(int3);
+float3 __ovld __cnfn convert_float3_rtz(int3);
+float3 __ovld __cnfn convert_float3_rtp(int3);
+float3 __ovld __cnfn convert_float3_rtn(int3);
+float3 __ovld __cnfn convert_float3(int3);
+float3 __ovld __cnfn convert_float3_rte(uint3);
+float3 __ovld __cnfn convert_float3_rtz(uint3);
+float3 __ovld __cnfn convert_float3_rtp(uint3);
+float3 __ovld __cnfn convert_float3_rtn(uint3);
+float3 __ovld __cnfn convert_float3(uint3);
+float3 __ovld __cnfn convert_float3_rte(long3);
+float3 __ovld __cnfn convert_float3_rtz(long3);
+float3 __ovld __cnfn convert_float3_rtp(long3);
+float3 __ovld __cnfn convert_float3_rtn(long3);
+float3 __ovld __cnfn convert_float3(long3);
+float3 __ovld __cnfn convert_float3_rte(ulong3);
+float3 __ovld __cnfn convert_float3_rtz(ulong3);
+float3 __ovld __cnfn convert_float3_rtp(ulong3);
+float3 __ovld __cnfn convert_float3_rtn(ulong3);
+float3 __ovld __cnfn convert_float3(ulong3);
+float3 __ovld __cnfn convert_float3_rte(float3);
+float3 __ovld __cnfn convert_float3_rtz(float3);
+float3 __ovld __cnfn convert_float3_rtp(float3);
+float3 __ovld __cnfn convert_float3_rtn(float3);
+float3 __ovld __cnfn convert_float3(float3);
+char4 __ovld __cnfn convert_char4_rte(char4);
+char4 __ovld __cnfn convert_char4_sat_rte(char4);
+char4 __ovld __cnfn convert_char4_rtz(char4);
+char4 __ovld __cnfn convert_char4_sat_rtz(char4);
+char4 __ovld __cnfn convert_char4_rtp(char4);
+char4 __ovld __cnfn convert_char4_sat_rtp(char4);
+char4 __ovld __cnfn convert_char4_rtn(char4);
+char4 __ovld __cnfn convert_char4_sat_rtn(char4);
+char4 __ovld __cnfn convert_char4(char4);
+char4 __ovld __cnfn convert_char4_sat(char4);
+char4 __ovld __cnfn convert_char4_rte(uchar4);
+char4 __ovld __cnfn convert_char4_sat_rte(uchar4);
+char4 __ovld __cnfn convert_char4_rtz(uchar4);
+char4 __ovld __cnfn convert_char4_sat_rtz(uchar4);
+char4 __ovld __cnfn convert_char4_rtp(uchar4);
+char4 __ovld __cnfn convert_char4_sat_rtp(uchar4);
+char4 __ovld __cnfn convert_char4_rtn(uchar4);
+char4 __ovld __cnfn convert_char4_sat_rtn(uchar4);
+char4 __ovld __cnfn convert_char4(uchar4);
+char4 __ovld __cnfn convert_char4_sat(uchar4);
+char4 __ovld __cnfn convert_char4_rte(short4);
+char4 __ovld __cnfn convert_char4_sat_rte(short4);
+char4 __ovld __cnfn convert_char4_rtz(short4);
+char4 __ovld __cnfn convert_char4_sat_rtz(short4);
+char4 __ovld __cnfn convert_char4_rtp(short4);
+char4 __ovld __cnfn convert_char4_sat_rtp(short4);
+char4 __ovld __cnfn convert_char4_rtn(short4);
+char4 __ovld __cnfn convert_char4_sat_rtn(short4);
+char4 __ovld __cnfn convert_char4(short4);
+char4 __ovld __cnfn convert_char4_sat(short4);
+char4 __ovld __cnfn convert_char4_rte(ushort4);
+char4 __ovld __cnfn convert_char4_sat_rte(ushort4);
+char4 __ovld __cnfn convert_char4_rtz(ushort4);
+char4 __ovld __cnfn convert_char4_sat_rtz(ushort4);
+char4 __ovld __cnfn convert_char4_rtp(ushort4);
+char4 __ovld __cnfn convert_char4_sat_rtp(ushort4);
+char4 __ovld __cnfn convert_char4_rtn(ushort4);
+char4 __ovld __cnfn convert_char4_sat_rtn(ushort4);
+char4 __ovld __cnfn convert_char4(ushort4);
+char4 __ovld __cnfn convert_char4_sat(ushort4);
+char4 __ovld __cnfn convert_char4_rte(int4);
+char4 __ovld __cnfn convert_char4_sat_rte(int4);
+char4 __ovld __cnfn convert_char4_rtz(int4);
+char4 __ovld __cnfn convert_char4_sat_rtz(int4);
+char4 __ovld __cnfn convert_char4_rtp(int4);
+char4 __ovld __cnfn convert_char4_sat_rtp(int4);
+char4 __ovld __cnfn convert_char4_rtn(int4);
+char4 __ovld __cnfn convert_char4_sat_rtn(int4);
+char4 __ovld __cnfn convert_char4(int4);
+char4 __ovld __cnfn convert_char4_sat(int4);
+char4 __ovld __cnfn convert_char4_rte(uint4);
+char4 __ovld __cnfn convert_char4_sat_rte(uint4);
+char4 __ovld __cnfn convert_char4_rtz(uint4);
+char4 __ovld __cnfn convert_char4_sat_rtz(uint4);
+char4 __ovld __cnfn convert_char4_rtp(uint4);
+char4 __ovld __cnfn convert_char4_sat_rtp(uint4);
+char4 __ovld __cnfn convert_char4_rtn(uint4);
+char4 __ovld __cnfn convert_char4_sat_rtn(uint4);
+char4 __ovld __cnfn convert_char4(uint4);
+char4 __ovld __cnfn convert_char4_sat(uint4);
+char4 __ovld __cnfn convert_char4_rte(long4);
+char4 __ovld __cnfn convert_char4_sat_rte(long4);
+char4 __ovld __cnfn convert_char4_rtz(long4);
+char4 __ovld __cnfn convert_char4_sat_rtz(long4);
+char4 __ovld __cnfn convert_char4_rtp(long4);
+char4 __ovld __cnfn convert_char4_sat_rtp(long4);
+char4 __ovld __cnfn convert_char4_rtn(long4);
+char4 __ovld __cnfn convert_char4_sat_rtn(long4);
+char4 __ovld __cnfn convert_char4(long4);
+char4 __ovld __cnfn convert_char4_sat(long4);
+char4 __ovld __cnfn convert_char4_rte(ulong4);
+char4 __ovld __cnfn convert_char4_sat_rte(ulong4);
+char4 __ovld __cnfn convert_char4_rtz(ulong4);
+char4 __ovld __cnfn convert_char4_sat_rtz(ulong4);
+char4 __ovld __cnfn convert_char4_rtp(ulong4);
+char4 __ovld __cnfn convert_char4_sat_rtp(ulong4);
+char4 __ovld __cnfn convert_char4_rtn(ulong4);
+char4 __ovld __cnfn convert_char4_sat_rtn(ulong4);
+char4 __ovld __cnfn convert_char4(ulong4);
+char4 __ovld __cnfn convert_char4_sat(ulong4);
+char4 __ovld __cnfn convert_char4_rte(float4);
+char4 __ovld __cnfn convert_char4_sat_rte(float4);
+char4 __ovld __cnfn convert_char4_rtz(float4);
+char4 __ovld __cnfn convert_char4_sat_rtz(float4);
+char4 __ovld __cnfn convert_char4_rtp(float4);
+char4 __ovld __cnfn convert_char4_sat_rtp(float4);
+char4 __ovld __cnfn convert_char4_rtn(float4);
+char4 __ovld __cnfn convert_char4_sat_rtn(float4);
+char4 __ovld __cnfn convert_char4(float4);
+char4 __ovld __cnfn convert_char4_sat(float4);
+uchar4 __ovld __cnfn convert_uchar4_rte(char4);
+uchar4 __ovld __cnfn convert_uchar4_sat_rte(char4);
+uchar4 __ovld __cnfn convert_uchar4_rtz(char4);
+uchar4 __ovld __cnfn convert_uchar4_sat_rtz(char4);
+uchar4 __ovld __cnfn convert_uchar4_rtp(char4);
+uchar4 __ovld __cnfn convert_uchar4_sat_rtp(char4);
+uchar4 __ovld __cnfn convert_uchar4_rtn(char4);
+uchar4 __ovld __cnfn convert_uchar4_sat_rtn(char4);
+uchar4 __ovld __cnfn convert_uchar4(char4);
+uchar4 __ovld __cnfn convert_uchar4_sat(char4);
+uchar4 __ovld __cnfn convert_uchar4_rte(uchar4);
+uchar4 __ovld __cnfn convert_uchar4_sat_rte(uchar4);
+uchar4 __ovld __cnfn convert_uchar4_rtz(uchar4);
+uchar4 __ovld __cnfn convert_uchar4_sat_rtz(uchar4);
+uchar4 __ovld __cnfn convert_uchar4_rtp(uchar4);
+uchar4 __ovld __cnfn convert_uchar4_sat_rtp(uchar4);
+uchar4 __ovld __cnfn convert_uchar4_rtn(uchar4);
+uchar4 __ovld __cnfn convert_uchar4_sat_rtn(uchar4);
+uchar4 __ovld __cnfn convert_uchar4(uchar4);
+uchar4 __ovld __cnfn convert_uchar4_sat(uchar4);
+uchar4 __ovld __cnfn convert_uchar4_rte(short4);
+uchar4 __ovld __cnfn convert_uchar4_sat_rte(short4);
+uchar4 __ovld __cnfn convert_uchar4_rtz(short4);
+uchar4 __ovld __cnfn convert_uchar4_sat_rtz(short4);
+uchar4 __ovld __cnfn convert_uchar4_rtp(short4);
+uchar4 __ovld __cnfn convert_uchar4_sat_rtp(short4);
+uchar4 __ovld __cnfn convert_uchar4_rtn(short4);
+uchar4 __ovld __cnfn convert_uchar4_sat_rtn(short4);
+uchar4 __ovld __cnfn convert_uchar4(short4);
+uchar4 __ovld __cnfn convert_uchar4_sat(short4);
+uchar4 __ovld __cnfn convert_uchar4_rte(ushort4);
+uchar4 __ovld __cnfn convert_uchar4_sat_rte(ushort4);
+uchar4 __ovld __cnfn convert_uchar4_rtz(ushort4);
+uchar4 __ovld __cnfn convert_uchar4_sat_rtz(ushort4);
+uchar4 __ovld __cnfn convert_uchar4_rtp(ushort4);
+uchar4 __ovld __cnfn convert_uchar4_sat_rtp(ushort4);
+uchar4 __ovld __cnfn convert_uchar4_rtn(ushort4);
+uchar4 __ovld __cnfn convert_uchar4_sat_rtn(ushort4);
+uchar4 __ovld __cnfn convert_uchar4(ushort4);
+uchar4 __ovld __cnfn convert_uchar4_sat(ushort4);
+uchar4 __ovld __cnfn convert_uchar4_rte(int4);
+uchar4 __ovld __cnfn convert_uchar4_sat_rte(int4);
+uchar4 __ovld __cnfn convert_uchar4_rtz(int4);
+uchar4 __ovld __cnfn convert_uchar4_sat_rtz(int4);
+uchar4 __ovld __cnfn convert_uchar4_rtp(int4);
+uchar4 __ovld __cnfn convert_uchar4_sat_rtp(int4);
+uchar4 __ovld __cnfn convert_uchar4_rtn(int4);
+uchar4 __ovld __cnfn convert_uchar4_sat_rtn(int4);
+uchar4 __ovld __cnfn convert_uchar4(int4);
+uchar4 __ovld __cnfn convert_uchar4_sat(int4);
+uchar4 __ovld __cnfn convert_uchar4_rte(uint4);
+uchar4 __ovld __cnfn convert_uchar4_sat_rte(uint4);
+uchar4 __ovld __cnfn convert_uchar4_rtz(uint4);
+uchar4 __ovld __cnfn convert_uchar4_sat_rtz(uint4);
+uchar4 __ovld __cnfn convert_uchar4_rtp(uint4);
+uchar4 __ovld __cnfn convert_uchar4_sat_rtp(uint4);
+uchar4 __ovld __cnfn convert_uchar4_rtn(uint4);
+uchar4 __ovld __cnfn convert_uchar4_sat_rtn(uint4);
+uchar4 __ovld __cnfn convert_uchar4(uint4);
+uchar4 __ovld __cnfn convert_uchar4_sat(uint4);
+uchar4 __ovld __cnfn convert_uchar4_rte(long4);
+uchar4 __ovld __cnfn convert_uchar4_sat_rte(long4);
+uchar4 __ovld __cnfn convert_uchar4_rtz(long4);
+uchar4 __ovld __cnfn convert_uchar4_sat_rtz(long4);
+uchar4 __ovld __cnfn convert_uchar4_rtp(long4);
+uchar4 __ovld __cnfn convert_uchar4_sat_rtp(long4);
+uchar4 __ovld __cnfn convert_uchar4_rtn(long4);
+uchar4 __ovld __cnfn convert_uchar4_sat_rtn(long4);
+uchar4 __ovld __cnfn convert_uchar4(long4);
+uchar4 __ovld __cnfn convert_uchar4_sat(long4);
+uchar4 __ovld __cnfn convert_uchar4_rte(ulong4);
+uchar4 __ovld __cnfn convert_uchar4_sat_rte(ulong4);
+uchar4 __ovld __cnfn convert_uchar4_rtz(ulong4);
+uchar4 __ovld __cnfn convert_uchar4_sat_rtz(ulong4);
+uchar4 __ovld __cnfn convert_uchar4_rtp(ulong4);
+uchar4 __ovld __cnfn convert_uchar4_sat_rtp(ulong4);
+uchar4 __ovld __cnfn convert_uchar4_rtn(ulong4);
+uchar4 __ovld __cnfn convert_uchar4_sat_rtn(ulong4);
+uchar4 __ovld __cnfn convert_uchar4(ulong4);
+uchar4 __ovld __cnfn convert_uchar4_sat(ulong4);
+uchar4 __ovld __cnfn convert_uchar4_rte(float4);
+uchar4 __ovld __cnfn convert_uchar4_sat_rte(float4);
+uchar4 __ovld __cnfn convert_uchar4_rtz(float4);
+uchar4 __ovld __cnfn convert_uchar4_sat_rtz(float4);
+uchar4 __ovld __cnfn convert_uchar4_rtp(float4);
+uchar4 __ovld __cnfn convert_uchar4_sat_rtp(float4);
+uchar4 __ovld __cnfn convert_uchar4_rtn(float4);
+uchar4 __ovld __cnfn convert_uchar4_sat_rtn(float4);
+uchar4 __ovld __cnfn convert_uchar4(float4);
+uchar4 __ovld __cnfn convert_uchar4_sat(float4);
+short4 __ovld __cnfn convert_short4_rte(char4);
+short4 __ovld __cnfn convert_short4_sat_rte(char4);
+short4 __ovld __cnfn convert_short4_rtz(char4);
+short4 __ovld __cnfn convert_short4_sat_rtz(char4);
+short4 __ovld __cnfn convert_short4_rtp(char4);
+short4 __ovld __cnfn convert_short4_sat_rtp(char4);
+short4 __ovld __cnfn convert_short4_rtn(char4);
+short4 __ovld __cnfn convert_short4_sat_rtn(char4);
+short4 __ovld __cnfn convert_short4(char4);
+short4 __ovld __cnfn convert_short4_sat(char4);
+short4 __ovld __cnfn convert_short4_rte(uchar4);
+short4 __ovld __cnfn convert_short4_sat_rte(uchar4);
+short4 __ovld __cnfn convert_short4_rtz(uchar4);
+short4 __ovld __cnfn convert_short4_sat_rtz(uchar4);
+short4 __ovld __cnfn convert_short4_rtp(uchar4);
+short4 __ovld __cnfn convert_short4_sat_rtp(uchar4);
+short4 __ovld __cnfn convert_short4_rtn(uchar4);
+short4 __ovld __cnfn convert_short4_sat_rtn(uchar4);
+short4 __ovld __cnfn convert_short4(uchar4);
+short4 __ovld __cnfn convert_short4_sat(uchar4);
+short4 __ovld __cnfn convert_short4_rte(short4);
+short4 __ovld __cnfn convert_short4_sat_rte(short4);
+short4 __ovld __cnfn convert_short4_rtz(short4);
+short4 __ovld __cnfn convert_short4_sat_rtz(short4);
+short4 __ovld __cnfn convert_short4_rtp(short4);
+short4 __ovld __cnfn convert_short4_sat_rtp(short4);
+short4 __ovld __cnfn convert_short4_rtn(short4);
+short4 __ovld __cnfn convert_short4_sat_rtn(short4);
+short4 __ovld __cnfn convert_short4(short4);
+short4 __ovld __cnfn convert_short4_sat(short4);
+short4 __ovld __cnfn convert_short4_rte(ushort4);
+short4 __ovld __cnfn convert_short4_sat_rte(ushort4);
+short4 __ovld __cnfn convert_short4_rtz(ushort4);
+short4 __ovld __cnfn convert_short4_sat_rtz(ushort4);
+short4 __ovld __cnfn convert_short4_rtp(ushort4);
+short4 __ovld __cnfn convert_short4_sat_rtp(ushort4);
+short4 __ovld __cnfn convert_short4_rtn(ushort4);
+short4 __ovld __cnfn convert_short4_sat_rtn(ushort4);
+short4 __ovld __cnfn convert_short4(ushort4);
+short4 __ovld __cnfn convert_short4_sat(ushort4);
+short4 __ovld __cnfn convert_short4_rte(int4);
+short4 __ovld __cnfn convert_short4_sat_rte(int4);
+short4 __ovld __cnfn convert_short4_rtz(int4);
+short4 __ovld __cnfn convert_short4_sat_rtz(int4);
+short4 __ovld __cnfn convert_short4_rtp(int4);
+short4 __ovld __cnfn convert_short4_sat_rtp(int4);
+short4 __ovld __cnfn convert_short4_rtn(int4);
+short4 __ovld __cnfn convert_short4_sat_rtn(int4);
+short4 __ovld __cnfn convert_short4(int4);
+short4 __ovld __cnfn convert_short4_sat(int4);
+short4 __ovld __cnfn convert_short4_rte(uint4);
+short4 __ovld __cnfn convert_short4_sat_rte(uint4);
+short4 __ovld __cnfn convert_short4_rtz(uint4);
+short4 __ovld __cnfn convert_short4_sat_rtz(uint4);
+short4 __ovld __cnfn convert_short4_rtp(uint4);
+short4 __ovld __cnfn convert_short4_sat_rtp(uint4);
+short4 __ovld __cnfn convert_short4_rtn(uint4);
+short4 __ovld __cnfn convert_short4_sat_rtn(uint4);
+short4 __ovld __cnfn convert_short4(uint4);
+short4 __ovld __cnfn convert_short4_sat(uint4);
+short4 __ovld __cnfn convert_short4_rte(long4);
+short4 __ovld __cnfn convert_short4_sat_rte(long4);
+short4 __ovld __cnfn convert_short4_rtz(long4);
+short4 __ovld __cnfn convert_short4_sat_rtz(long4);
+short4 __ovld __cnfn convert_short4_rtp(long4);
+short4 __ovld __cnfn convert_short4_sat_rtp(long4);
+short4 __ovld __cnfn convert_short4_rtn(long4);
+short4 __ovld __cnfn convert_short4_sat_rtn(long4);
+short4 __ovld __cnfn convert_short4(long4);
+short4 __ovld __cnfn convert_short4_sat(long4);
+short4 __ovld __cnfn convert_short4_rte(ulong4);
+short4 __ovld __cnfn convert_short4_sat_rte(ulong4);
+short4 __ovld __cnfn convert_short4_rtz(ulong4);
+short4 __ovld __cnfn convert_short4_sat_rtz(ulong4);
+short4 __ovld __cnfn convert_short4_rtp(ulong4);
+short4 __ovld __cnfn convert_short4_sat_rtp(ulong4);
+short4 __ovld __cnfn convert_short4_rtn(ulong4);
+short4 __ovld __cnfn convert_short4_sat_rtn(ulong4);
+short4 __ovld __cnfn convert_short4(ulong4);
+short4 __ovld __cnfn convert_short4_sat(ulong4);
+short4 __ovld __cnfn convert_short4_rte(float4);
+short4 __ovld __cnfn convert_short4_sat_rte(float4);
+short4 __ovld __cnfn convert_short4_rtz(float4);
+short4 __ovld __cnfn convert_short4_sat_rtz(float4);
+short4 __ovld __cnfn convert_short4_rtp(float4);
+short4 __ovld __cnfn convert_short4_sat_rtp(float4);
+short4 __ovld __cnfn convert_short4_rtn(float4);
+short4 __ovld __cnfn convert_short4_sat_rtn(float4);
+short4 __ovld __cnfn convert_short4(float4);
+short4 __ovld __cnfn convert_short4_sat(float4);
+ushort4 __ovld __cnfn convert_ushort4_rte(char4);
+ushort4 __ovld __cnfn convert_ushort4_sat_rte(char4);
+ushort4 __ovld __cnfn convert_ushort4_rtz(char4);
+ushort4 __ovld __cnfn convert_ushort4_sat_rtz(char4);
+ushort4 __ovld __cnfn convert_ushort4_rtp(char4);
+ushort4 __ovld __cnfn convert_ushort4_sat_rtp(char4);
+ushort4 __ovld __cnfn convert_ushort4_rtn(char4);
+ushort4 __ovld __cnfn convert_ushort4_sat_rtn(char4);
+ushort4 __ovld __cnfn convert_ushort4(char4);
+ushort4 __ovld __cnfn convert_ushort4_sat(char4);
+ushort4 __ovld __cnfn convert_ushort4_rte(uchar4);
+ushort4 __ovld __cnfn convert_ushort4_sat_rte(uchar4);
+ushort4 __ovld __cnfn convert_ushort4_rtz(uchar4);
+ushort4 __ovld __cnfn convert_ushort4_sat_rtz(uchar4);
+ushort4 __ovld __cnfn convert_ushort4_rtp(uchar4);
+ushort4 __ovld __cnfn convert_ushort4_sat_rtp(uchar4);
+ushort4 __ovld __cnfn convert_ushort4_rtn(uchar4);
+ushort4 __ovld __cnfn convert_ushort4_sat_rtn(uchar4);
+ushort4 __ovld __cnfn convert_ushort4(uchar4);
+ushort4 __ovld __cnfn convert_ushort4_sat(uchar4);
+ushort4 __ovld __cnfn convert_ushort4_rte(short4);
+ushort4 __ovld __cnfn convert_ushort4_sat_rte(short4);
+ushort4 __ovld __cnfn convert_ushort4_rtz(short4);
+ushort4 __ovld __cnfn convert_ushort4_sat_rtz(short4);
+ushort4 __ovld __cnfn convert_ushort4_rtp(short4);
+ushort4 __ovld __cnfn convert_ushort4_sat_rtp(short4);
+ushort4 __ovld __cnfn convert_ushort4_rtn(short4);
+ushort4 __ovld __cnfn convert_ushort4_sat_rtn(short4);
+ushort4 __ovld __cnfn convert_ushort4(short4);
+ushort4 __ovld __cnfn convert_ushort4_sat(short4);
+ushort4 __ovld __cnfn convert_ushort4_rte(ushort4);
+ushort4 __ovld __cnfn convert_ushort4_sat_rte(ushort4);
+ushort4 __ovld __cnfn convert_ushort4_rtz(ushort4);
+ushort4 __ovld __cnfn convert_ushort4_sat_rtz(ushort4);
+ushort4 __ovld __cnfn convert_ushort4_rtp(ushort4);
+ushort4 __ovld __cnfn convert_ushort4_sat_rtp(ushort4);
+ushort4 __ovld __cnfn convert_ushort4_rtn(ushort4);
+ushort4 __ovld __cnfn convert_ushort4_sat_rtn(ushort4);
+ushort4 __ovld __cnfn convert_ushort4(ushort4);
+ushort4 __ovld __cnfn convert_ushort4_sat(ushort4);
+ushort4 __ovld __cnfn convert_ushort4_rte(int4);
+ushort4 __ovld __cnfn convert_ushort4_sat_rte(int4);
+ushort4 __ovld __cnfn convert_ushort4_rtz(int4);
+ushort4 __ovld __cnfn convert_ushort4_sat_rtz(int4);
+ushort4 __ovld __cnfn convert_ushort4_rtp(int4);
+ushort4 __ovld __cnfn convert_ushort4_sat_rtp(int4);
+ushort4 __ovld __cnfn convert_ushort4_rtn(int4);
+ushort4 __ovld __cnfn convert_ushort4_sat_rtn(int4);
+ushort4 __ovld __cnfn convert_ushort4(int4);
+ushort4 __ovld __cnfn convert_ushort4_sat(int4);
+ushort4 __ovld __cnfn convert_ushort4_rte(uint4);
+ushort4 __ovld __cnfn convert_ushort4_sat_rte(uint4);
+ushort4 __ovld __cnfn convert_ushort4_rtz(uint4);
+ushort4 __ovld __cnfn convert_ushort4_sat_rtz(uint4);
+ushort4 __ovld __cnfn convert_ushort4_rtp(uint4);
+ushort4 __ovld __cnfn convert_ushort4_sat_rtp(uint4);
+ushort4 __ovld __cnfn convert_ushort4_rtn(uint4);
+ushort4 __ovld __cnfn convert_ushort4_sat_rtn(uint4);
+ushort4 __ovld __cnfn convert_ushort4(uint4);
+ushort4 __ovld __cnfn convert_ushort4_sat(uint4);
+ushort4 __ovld __cnfn convert_ushort4_rte(long4);
+ushort4 __ovld __cnfn convert_ushort4_sat_rte(long4);
+ushort4 __ovld __cnfn convert_ushort4_rtz(long4);
+ushort4 __ovld __cnfn convert_ushort4_sat_rtz(long4);
+ushort4 __ovld __cnfn convert_ushort4_rtp(long4);
+ushort4 __ovld __cnfn convert_ushort4_sat_rtp(long4);
+ushort4 __ovld __cnfn convert_ushort4_rtn(long4);
+ushort4 __ovld __cnfn convert_ushort4_sat_rtn(long4);
+ushort4 __ovld __cnfn convert_ushort4(long4);
+ushort4 __ovld __cnfn convert_ushort4_sat(long4);
+ushort4 __ovld __cnfn convert_ushort4_rte(ulong4);
+ushort4 __ovld __cnfn convert_ushort4_sat_rte(ulong4);
+ushort4 __ovld __cnfn convert_ushort4_rtz(ulong4);
+ushort4 __ovld __cnfn convert_ushort4_sat_rtz(ulong4);
+ushort4 __ovld __cnfn convert_ushort4_rtp(ulong4);
+ushort4 __ovld __cnfn convert_ushort4_sat_rtp(ulong4);
+ushort4 __ovld __cnfn convert_ushort4_rtn(ulong4);
+ushort4 __ovld __cnfn convert_ushort4_sat_rtn(ulong4);
+ushort4 __ovld __cnfn convert_ushort4(ulong4);
+ushort4 __ovld __cnfn convert_ushort4_sat(ulong4);
+ushort4 __ovld __cnfn convert_ushort4_rte(float4);
+ushort4 __ovld __cnfn convert_ushort4_sat_rte(float4);
+ushort4 __ovld __cnfn convert_ushort4_rtz(float4);
+ushort4 __ovld __cnfn convert_ushort4_sat_rtz(float4);
+ushort4 __ovld __cnfn convert_ushort4_rtp(float4);
+ushort4 __ovld __cnfn convert_ushort4_sat_rtp(float4);
+ushort4 __ovld __cnfn convert_ushort4_rtn(float4);
+ushort4 __ovld __cnfn convert_ushort4_sat_rtn(float4);
+ushort4 __ovld __cnfn convert_ushort4(float4);
+ushort4 __ovld __cnfn convert_ushort4_sat(float4);
+int4 __ovld __cnfn convert_int4_rte(char4);
+int4 __ovld __cnfn convert_int4_sat_rte(char4);
+int4 __ovld __cnfn convert_int4_rtz(char4);
+int4 __ovld __cnfn convert_int4_sat_rtz(char4);
+int4 __ovld __cnfn convert_int4_rtp(char4);
+int4 __ovld __cnfn convert_int4_sat_rtp(char4);
+int4 __ovld __cnfn convert_int4_rtn(char4);
+int4 __ovld __cnfn convert_int4_sat_rtn(char4);
+int4 __ovld __cnfn convert_int4(char4);
+int4 __ovld __cnfn convert_int4_sat(char4);
+int4 __ovld __cnfn convert_int4_rte(uchar4);
+int4 __ovld __cnfn convert_int4_sat_rte(uchar4);
+int4 __ovld __cnfn convert_int4_rtz(uchar4);
+int4 __ovld __cnfn convert_int4_sat_rtz(uchar4);
+int4 __ovld __cnfn convert_int4_rtp(uchar4);
+int4 __ovld __cnfn convert_int4_sat_rtp(uchar4);
+int4 __ovld __cnfn convert_int4_rtn(uchar4);
+int4 __ovld __cnfn convert_int4_sat_rtn(uchar4);
+int4 __ovld __cnfn convert_int4(uchar4);
+int4 __ovld __cnfn convert_int4_sat(uchar4);
+int4 __ovld __cnfn convert_int4_rte(short4);
+int4 __ovld __cnfn convert_int4_sat_rte(short4);
+int4 __ovld __cnfn convert_int4_rtz(short4);
+int4 __ovld __cnfn convert_int4_sat_rtz(short4);
+int4 __ovld __cnfn convert_int4_rtp(short4);
+int4 __ovld __cnfn convert_int4_sat_rtp(short4);
+int4 __ovld __cnfn convert_int4_rtn(short4);
+int4 __ovld __cnfn convert_int4_sat_rtn(short4);
+int4 __ovld __cnfn convert_int4(short4);
+int4 __ovld __cnfn convert_int4_sat(short4);
+int4 __ovld __cnfn convert_int4_rte(ushort4);
+int4 __ovld __cnfn convert_int4_sat_rte(ushort4);
+int4 __ovld __cnfn convert_int4_rtz(ushort4);
+int4 __ovld __cnfn convert_int4_sat_rtz(ushort4);
+int4 __ovld __cnfn convert_int4_rtp(ushort4);
+int4 __ovld __cnfn convert_int4_sat_rtp(ushort4);
+int4 __ovld __cnfn convert_int4_rtn(ushort4);
+int4 __ovld __cnfn convert_int4_sat_rtn(ushort4);
+int4 __ovld __cnfn convert_int4(ushort4);
+int4 __ovld __cnfn convert_int4_sat(ushort4);
+int4 __ovld __cnfn convert_int4_rte(int4);
+int4 __ovld __cnfn convert_int4_sat_rte(int4);
+int4 __ovld __cnfn convert_int4_rtz(int4);
+int4 __ovld __cnfn convert_int4_sat_rtz(int4);
+int4 __ovld __cnfn convert_int4_rtp(int4);
+int4 __ovld __cnfn convert_int4_sat_rtp(int4);
+int4 __ovld __cnfn convert_int4_rtn(int4);
+int4 __ovld __cnfn convert_int4_sat_rtn(int4);
+int4 __ovld __cnfn convert_int4(int4);
+int4 __ovld __cnfn convert_int4_sat(int4);
+int4 __ovld __cnfn convert_int4_rte(uint4);
+int4 __ovld __cnfn convert_int4_sat_rte(uint4);
+int4 __ovld __cnfn convert_int4_rtz(uint4);
+int4 __ovld __cnfn convert_int4_sat_rtz(uint4);
+int4 __ovld __cnfn convert_int4_rtp(uint4);
+int4 __ovld __cnfn convert_int4_sat_rtp(uint4);
+int4 __ovld __cnfn convert_int4_rtn(uint4);
+int4 __ovld __cnfn convert_int4_sat_rtn(uint4);
+int4 __ovld __cnfn convert_int4(uint4);
+int4 __ovld __cnfn convert_int4_sat(uint4);
+int4 __ovld __cnfn convert_int4_rte(long4);
+int4 __ovld __cnfn convert_int4_sat_rte(long4);
+int4 __ovld __cnfn convert_int4_rtz(long4);
+int4 __ovld __cnfn convert_int4_sat_rtz(long4);
+int4 __ovld __cnfn convert_int4_rtp(long4);
+int4 __ovld __cnfn convert_int4_sat_rtp(long4);
+int4 __ovld __cnfn convert_int4_rtn(long4);
+int4 __ovld __cnfn convert_int4_sat_rtn(long4);
+int4 __ovld __cnfn convert_int4(long4);
+int4 __ovld __cnfn convert_int4_sat(long4);
+int4 __ovld __cnfn convert_int4_rte(ulong4);
+int4 __ovld __cnfn convert_int4_sat_rte(ulong4);
+int4 __ovld __cnfn convert_int4_rtz(ulong4);
+int4 __ovld __cnfn convert_int4_sat_rtz(ulong4);
+int4 __ovld __cnfn convert_int4_rtp(ulong4);
+int4 __ovld __cnfn convert_int4_sat_rtp(ulong4);
+int4 __ovld __cnfn convert_int4_rtn(ulong4);
+int4 __ovld __cnfn convert_int4_sat_rtn(ulong4);
+int4 __ovld __cnfn convert_int4(ulong4);
+int4 __ovld __cnfn convert_int4_sat(ulong4);
+int4 __ovld __cnfn convert_int4_rte(float4);
+int4 __ovld __cnfn convert_int4_sat_rte(float4);
+int4 __ovld __cnfn convert_int4_rtz(float4);
+int4 __ovld __cnfn convert_int4_sat_rtz(float4);
+int4 __ovld __cnfn convert_int4_rtp(float4);
+int4 __ovld __cnfn convert_int4_sat_rtp(float4);
+int4 __ovld __cnfn convert_int4_rtn(float4);
+int4 __ovld __cnfn convert_int4_sat_rtn(float4);
+int4 __ovld __cnfn convert_int4(float4);
+int4 __ovld __cnfn convert_int4_sat(float4);
+uint4 __ovld __cnfn convert_uint4_rte(char4);
+uint4 __ovld __cnfn convert_uint4_sat_rte(char4);
+uint4 __ovld __cnfn convert_uint4_rtz(char4);
+uint4 __ovld __cnfn convert_uint4_sat_rtz(char4);
+uint4 __ovld __cnfn convert_uint4_rtp(char4);
+uint4 __ovld __cnfn convert_uint4_sat_rtp(char4);
+uint4 __ovld __cnfn convert_uint4_rtn(char4);
+uint4 __ovld __cnfn convert_uint4_sat_rtn(char4);
+uint4 __ovld __cnfn convert_uint4(char4);
+uint4 __ovld __cnfn convert_uint4_sat(char4);
+uint4 __ovld __cnfn convert_uint4_rte(uchar4);
+uint4 __ovld __cnfn convert_uint4_sat_rte(uchar4);
+uint4 __ovld __cnfn convert_uint4_rtz(uchar4);
+uint4 __ovld __cnfn convert_uint4_sat_rtz(uchar4);
+uint4 __ovld __cnfn convert_uint4_rtp(uchar4);
+uint4 __ovld __cnfn convert_uint4_sat_rtp(uchar4);
+uint4 __ovld __cnfn convert_uint4_rtn(uchar4);
+uint4 __ovld __cnfn convert_uint4_sat_rtn(uchar4);
+uint4 __ovld __cnfn convert_uint4(uchar4);
+uint4 __ovld __cnfn convert_uint4_sat(uchar4);
+uint4 __ovld __cnfn convert_uint4_rte(short4);
+uint4 __ovld __cnfn convert_uint4_sat_rte(short4);
+uint4 __ovld __cnfn convert_uint4_rtz(short4);
+uint4 __ovld __cnfn convert_uint4_sat_rtz(short4);
+uint4 __ovld __cnfn convert_uint4_rtp(short4);
+uint4 __ovld __cnfn convert_uint4_sat_rtp(short4);
+uint4 __ovld __cnfn convert_uint4_rtn(short4);
+uint4 __ovld __cnfn convert_uint4_sat_rtn(short4);
+uint4 __ovld __cnfn convert_uint4(short4);
+uint4 __ovld __cnfn convert_uint4_sat(short4);
+uint4 __ovld __cnfn convert_uint4_rte(ushort4);
+uint4 __ovld __cnfn convert_uint4_sat_rte(ushort4);
+uint4 __ovld __cnfn convert_uint4_rtz(ushort4);
+uint4 __ovld __cnfn convert_uint4_sat_rtz(ushort4);
+uint4 __ovld __cnfn convert_uint4_rtp(ushort4);
+uint4 __ovld __cnfn convert_uint4_sat_rtp(ushort4);
+uint4 __ovld __cnfn convert_uint4_rtn(ushort4);
+uint4 __ovld __cnfn convert_uint4_sat_rtn(ushort4);
+uint4 __ovld __cnfn convert_uint4(ushort4);
+uint4 __ovld __cnfn convert_uint4_sat(ushort4);
+uint4 __ovld __cnfn convert_uint4_rte(int4);
+uint4 __ovld __cnfn convert_uint4_sat_rte(int4);
+uint4 __ovld __cnfn convert_uint4_rtz(int4);
+uint4 __ovld __cnfn convert_uint4_sat_rtz(int4);
+uint4 __ovld __cnfn convert_uint4_rtp(int4);
+uint4 __ovld __cnfn convert_uint4_sat_rtp(int4);
+uint4 __ovld __cnfn convert_uint4_rtn(int4);
+uint4 __ovld __cnfn convert_uint4_sat_rtn(int4);
+uint4 __ovld __cnfn convert_uint4(int4);
+uint4 __ovld __cnfn convert_uint4_sat(int4);
+uint4 __ovld __cnfn convert_uint4_rte(uint4);
+uint4 __ovld __cnfn convert_uint4_sat_rte(uint4);
+uint4 __ovld __cnfn convert_uint4_rtz(uint4);
+uint4 __ovld __cnfn convert_uint4_sat_rtz(uint4);
+uint4 __ovld __cnfn convert_uint4_rtp(uint4);
+uint4 __ovld __cnfn convert_uint4_sat_rtp(uint4);
+uint4 __ovld __cnfn convert_uint4_rtn(uint4);
+uint4 __ovld __cnfn convert_uint4_sat_rtn(uint4);
+uint4 __ovld __cnfn convert_uint4(uint4);
+uint4 __ovld __cnfn convert_uint4_sat(uint4);
+uint4 __ovld __cnfn convert_uint4_rte(long4);
+uint4 __ovld __cnfn convert_uint4_sat_rte(long4);
+uint4 __ovld __cnfn convert_uint4_rtz(long4);
+uint4 __ovld __cnfn convert_uint4_sat_rtz(long4);
+uint4 __ovld __cnfn convert_uint4_rtp(long4);
+uint4 __ovld __cnfn convert_uint4_sat_rtp(long4);
+uint4 __ovld __cnfn convert_uint4_rtn(long4);
+uint4 __ovld __cnfn convert_uint4_sat_rtn(long4);
+uint4 __ovld __cnfn convert_uint4(long4);
+uint4 __ovld __cnfn convert_uint4_sat(long4);
+uint4 __ovld __cnfn convert_uint4_rte(ulong4);
+uint4 __ovld __cnfn convert_uint4_sat_rte(ulong4);
+uint4 __ovld __cnfn convert_uint4_rtz(ulong4);
+uint4 __ovld __cnfn convert_uint4_sat_rtz(ulong4);
+uint4 __ovld __cnfn convert_uint4_rtp(ulong4);
+uint4 __ovld __cnfn convert_uint4_sat_rtp(ulong4);
+uint4 __ovld __cnfn convert_uint4_rtn(ulong4);
+uint4 __ovld __cnfn convert_uint4_sat_rtn(ulong4);
+uint4 __ovld __cnfn convert_uint4(ulong4);
+uint4 __ovld __cnfn convert_uint4_sat(ulong4);
+uint4 __ovld __cnfn convert_uint4_rte(float4);
+uint4 __ovld __cnfn convert_uint4_sat_rte(float4);
+uint4 __ovld __cnfn convert_uint4_rtz(float4);
+uint4 __ovld __cnfn convert_uint4_sat_rtz(float4);
+uint4 __ovld __cnfn convert_uint4_rtp(float4);
+uint4 __ovld __cnfn convert_uint4_sat_rtp(float4);
+uint4 __ovld __cnfn convert_uint4_rtn(float4);
+uint4 __ovld __cnfn convert_uint4_sat_rtn(float4);
+uint4 __ovld __cnfn convert_uint4(float4);
+uint4 __ovld __cnfn convert_uint4_sat(float4);
+long4 __ovld __cnfn convert_long4_rte(char4);
+long4 __ovld __cnfn convert_long4_sat_rte(char4);
+long4 __ovld __cnfn convert_long4_rtz(char4);
+long4 __ovld __cnfn convert_long4_sat_rtz(char4);
+long4 __ovld __cnfn convert_long4_rtp(char4);
+long4 __ovld __cnfn convert_long4_sat_rtp(char4);
+long4 __ovld __cnfn convert_long4_rtn(char4);
+long4 __ovld __cnfn convert_long4_sat_rtn(char4);
+long4 __ovld __cnfn convert_long4(char4);
+long4 __ovld __cnfn convert_long4_sat(char4);
+long4 __ovld __cnfn convert_long4_rte(uchar4);
+long4 __ovld __cnfn convert_long4_sat_rte(uchar4);
+long4 __ovld __cnfn convert_long4_rtz(uchar4);
+long4 __ovld __cnfn convert_long4_sat_rtz(uchar4);
+long4 __ovld __cnfn convert_long4_rtp(uchar4);
+long4 __ovld __cnfn convert_long4_sat_rtp(uchar4);
+long4 __ovld __cnfn convert_long4_rtn(uchar4);
+long4 __ovld __cnfn convert_long4_sat_rtn(uchar4);
+long4 __ovld __cnfn convert_long4(uchar4);
+long4 __ovld __cnfn convert_long4_sat(uchar4);
+long4 __ovld __cnfn convert_long4_rte(short4);
+long4 __ovld __cnfn convert_long4_sat_rte(short4);
+long4 __ovld __cnfn convert_long4_rtz(short4);
+long4 __ovld __cnfn convert_long4_sat_rtz(short4);
+long4 __ovld __cnfn convert_long4_rtp(short4);
+long4 __ovld __cnfn convert_long4_sat_rtp(short4);
+long4 __ovld __cnfn convert_long4_rtn(short4);
+long4 __ovld __cnfn convert_long4_sat_rtn(short4);
+long4 __ovld __cnfn convert_long4(short4);
+long4 __ovld __cnfn convert_long4_sat(short4);
+long4 __ovld __cnfn convert_long4_rte(ushort4);
+long4 __ovld __cnfn convert_long4_sat_rte(ushort4);
+long4 __ovld __cnfn convert_long4_rtz(ushort4);
+long4 __ovld __cnfn convert_long4_sat_rtz(ushort4);
+long4 __ovld __cnfn convert_long4_rtp(ushort4);
+long4 __ovld __cnfn convert_long4_sat_rtp(ushort4);
+long4 __ovld __cnfn convert_long4_rtn(ushort4);
+long4 __ovld __cnfn convert_long4_sat_rtn(ushort4);
+long4 __ovld __cnfn convert_long4(ushort4);
+long4 __ovld __cnfn convert_long4_sat(ushort4);
+long4 __ovld __cnfn convert_long4_rte(int4);
+long4 __ovld __cnfn convert_long4_sat_rte(int4);
+long4 __ovld __cnfn convert_long4_rtz(int4);
+long4 __ovld __cnfn convert_long4_sat_rtz(int4);
+long4 __ovld __cnfn convert_long4_rtp(int4);
+long4 __ovld __cnfn convert_long4_sat_rtp(int4);
+long4 __ovld __cnfn convert_long4_rtn(int4);
+long4 __ovld __cnfn convert_long4_sat_rtn(int4);
+long4 __ovld __cnfn convert_long4(int4);
+long4 __ovld __cnfn convert_long4_sat(int4);
+long4 __ovld __cnfn convert_long4_rte(uint4);
+long4 __ovld __cnfn convert_long4_sat_rte(uint4);
+long4 __ovld __cnfn convert_long4_rtz(uint4);
+long4 __ovld __cnfn convert_long4_sat_rtz(uint4);
+long4 __ovld __cnfn convert_long4_rtp(uint4);
+long4 __ovld __cnfn convert_long4_sat_rtp(uint4);
+long4 __ovld __cnfn convert_long4_rtn(uint4);
+long4 __ovld __cnfn convert_long4_sat_rtn(uint4);
+long4 __ovld __cnfn convert_long4(uint4);
+long4 __ovld __cnfn convert_long4_sat(uint4);
+long4 __ovld __cnfn convert_long4_rte(long4);
+long4 __ovld __cnfn convert_long4_sat_rte(long4);
+long4 __ovld __cnfn convert_long4_rtz(long4);
+long4 __ovld __cnfn convert_long4_sat_rtz(long4);
+long4 __ovld __cnfn convert_long4_rtp(long4);
+long4 __ovld __cnfn convert_long4_sat_rtp(long4);
+long4 __ovld __cnfn convert_long4_rtn(long4);
+long4 __ovld __cnfn convert_long4_sat_rtn(long4);
+long4 __ovld __cnfn convert_long4(long4);
+long4 __ovld __cnfn convert_long4_sat(long4);
+long4 __ovld __cnfn convert_long4_rte(ulong4);
+long4 __ovld __cnfn convert_long4_sat_rte(ulong4);
+long4 __ovld __cnfn convert_long4_rtz(ulong4);
+long4 __ovld __cnfn convert_long4_sat_rtz(ulong4);
+long4 __ovld __cnfn convert_long4_rtp(ulong4);
+long4 __ovld __cnfn convert_long4_sat_rtp(ulong4);
+long4 __ovld __cnfn convert_long4_rtn(ulong4);
+long4 __ovld __cnfn convert_long4_sat_rtn(ulong4);
+long4 __ovld __cnfn convert_long4(ulong4);
+long4 __ovld __cnfn convert_long4_sat(ulong4);
+long4 __ovld __cnfn convert_long4_rte(float4);
+long4 __ovld __cnfn convert_long4_sat_rte(float4);
+long4 __ovld __cnfn convert_long4_rtz(float4);
+long4 __ovld __cnfn convert_long4_sat_rtz(float4);
+long4 __ovld __cnfn convert_long4_rtp(float4);
+long4 __ovld __cnfn convert_long4_sat_rtp(float4);
+long4 __ovld __cnfn convert_long4_rtn(float4);
+long4 __ovld __cnfn convert_long4_sat_rtn(float4);
+long4 __ovld __cnfn convert_long4(float4);
+long4 __ovld __cnfn convert_long4_sat(float4);
+ulong4 __ovld __cnfn convert_ulong4_rte(char4);
+ulong4 __ovld __cnfn convert_ulong4_sat_rte(char4);
+ulong4 __ovld __cnfn convert_ulong4_rtz(char4);
+ulong4 __ovld __cnfn convert_ulong4_sat_rtz(char4);
+ulong4 __ovld __cnfn convert_ulong4_rtp(char4);
+ulong4 __ovld __cnfn convert_ulong4_sat_rtp(char4);
+ulong4 __ovld __cnfn convert_ulong4_rtn(char4);
+ulong4 __ovld __cnfn convert_ulong4_sat_rtn(char4);
+ulong4 __ovld __cnfn convert_ulong4(char4);
+ulong4 __ovld __cnfn convert_ulong4_sat(char4);
+ulong4 __ovld __cnfn convert_ulong4_rte(uchar4);
+ulong4 __ovld __cnfn convert_ulong4_sat_rte(uchar4);
+ulong4 __ovld __cnfn convert_ulong4_rtz(uchar4);
+ulong4 __ovld __cnfn convert_ulong4_sat_rtz(uchar4);
+ulong4 __ovld __cnfn convert_ulong4_rtp(uchar4);
+ulong4 __ovld __cnfn convert_ulong4_sat_rtp(uchar4);
+ulong4 __ovld __cnfn convert_ulong4_rtn(uchar4);
+ulong4 __ovld __cnfn convert_ulong4_sat_rtn(uchar4);
+ulong4 __ovld __cnfn convert_ulong4(uchar4);
+ulong4 __ovld __cnfn convert_ulong4_sat(uchar4);
+ulong4 __ovld __cnfn convert_ulong4_rte(short4);
+ulong4 __ovld __cnfn convert_ulong4_sat_rte(short4);
+ulong4 __ovld __cnfn convert_ulong4_rtz(short4);
+ulong4 __ovld __cnfn convert_ulong4_sat_rtz(short4);
+ulong4 __ovld __cnfn convert_ulong4_rtp(short4);
+ulong4 __ovld __cnfn convert_ulong4_sat_rtp(short4);
+ulong4 __ovld __cnfn convert_ulong4_rtn(short4);
+ulong4 __ovld __cnfn convert_ulong4_sat_rtn(short4);
+ulong4 __ovld __cnfn convert_ulong4(short4);
+ulong4 __ovld __cnfn convert_ulong4_sat(short4);
+ulong4 __ovld __cnfn convert_ulong4_rte(ushort4);
+ulong4 __ovld __cnfn convert_ulong4_sat_rte(ushort4);
+ulong4 __ovld __cnfn convert_ulong4_rtz(ushort4);
+ulong4 __ovld __cnfn convert_ulong4_sat_rtz(ushort4);
+ulong4 __ovld __cnfn convert_ulong4_rtp(ushort4);
+ulong4 __ovld __cnfn convert_ulong4_sat_rtp(ushort4);
+ulong4 __ovld __cnfn convert_ulong4_rtn(ushort4);
+ulong4 __ovld __cnfn convert_ulong4_sat_rtn(ushort4);
+ulong4 __ovld __cnfn convert_ulong4(ushort4);
+ulong4 __ovld __cnfn convert_ulong4_sat(ushort4);
+ulong4 __ovld __cnfn convert_ulong4_rte(int4);
+ulong4 __ovld __cnfn convert_ulong4_sat_rte(int4);
+ulong4 __ovld __cnfn convert_ulong4_rtz(int4);
+ulong4 __ovld __cnfn convert_ulong4_sat_rtz(int4);
+ulong4 __ovld __cnfn convert_ulong4_rtp(int4);
+ulong4 __ovld __cnfn convert_ulong4_sat_rtp(int4);
+ulong4 __ovld __cnfn convert_ulong4_rtn(int4);
+ulong4 __ovld __cnfn convert_ulong4_sat_rtn(int4);
+ulong4 __ovld __cnfn convert_ulong4(int4);
+ulong4 __ovld __cnfn convert_ulong4_sat(int4);
+ulong4 __ovld __cnfn convert_ulong4_rte(uint4);
+ulong4 __ovld __cnfn convert_ulong4_sat_rte(uint4);
+ulong4 __ovld __cnfn convert_ulong4_rtz(uint4);
+ulong4 __ovld __cnfn convert_ulong4_sat_rtz(uint4);
+ulong4 __ovld __cnfn convert_ulong4_rtp(uint4);
+ulong4 __ovld __cnfn convert_ulong4_sat_rtp(uint4);
+ulong4 __ovld __cnfn convert_ulong4_rtn(uint4);
+ulong4 __ovld __cnfn convert_ulong4_sat_rtn(uint4);
+ulong4 __ovld __cnfn convert_ulong4(uint4);
+ulong4 __ovld __cnfn convert_ulong4_sat(uint4);
+ulong4 __ovld __cnfn convert_ulong4_rte(long4);
+ulong4 __ovld __cnfn convert_ulong4_sat_rte(long4);
+ulong4 __ovld __cnfn convert_ulong4_rtz(long4);
+ulong4 __ovld __cnfn convert_ulong4_sat_rtz(long4);
+ulong4 __ovld __cnfn convert_ulong4_rtp(long4);
+ulong4 __ovld __cnfn convert_ulong4_sat_rtp(long4);
+ulong4 __ovld __cnfn convert_ulong4_rtn(long4);
+ulong4 __ovld __cnfn convert_ulong4_sat_rtn(long4);
+ulong4 __ovld __cnfn convert_ulong4(long4);
+ulong4 __ovld __cnfn convert_ulong4_sat(long4);
+ulong4 __ovld __cnfn convert_ulong4_rte(ulong4);
+ulong4 __ovld __cnfn convert_ulong4_sat_rte(ulong4);
+ulong4 __ovld __cnfn convert_ulong4_rtz(ulong4);
+ulong4 __ovld __cnfn convert_ulong4_sat_rtz(ulong4);
+ulong4 __ovld __cnfn convert_ulong4_rtp(ulong4);
+ulong4 __ovld __cnfn convert_ulong4_sat_rtp(ulong4);
+ulong4 __ovld __cnfn convert_ulong4_rtn(ulong4);
+ulong4 __ovld __cnfn convert_ulong4_sat_rtn(ulong4);
+ulong4 __ovld __cnfn convert_ulong4(ulong4);
+ulong4 __ovld __cnfn convert_ulong4_sat(ulong4);
+ulong4 __ovld __cnfn convert_ulong4_rte(float4);
+ulong4 __ovld __cnfn convert_ulong4_sat_rte(float4);
+ulong4 __ovld __cnfn convert_ulong4_rtz(float4);
+ulong4 __ovld __cnfn convert_ulong4_sat_rtz(float4);
+ulong4 __ovld __cnfn convert_ulong4_rtp(float4);
+ulong4 __ovld __cnfn convert_ulong4_sat_rtp(float4);
+ulong4 __ovld __cnfn convert_ulong4_rtn(float4);
+ulong4 __ovld __cnfn convert_ulong4_sat_rtn(float4);
+ulong4 __ovld __cnfn convert_ulong4(float4);
+ulong4 __ovld __cnfn convert_ulong4_sat(float4);
+float4 __ovld __cnfn convert_float4_rte(char4);
+float4 __ovld __cnfn convert_float4_rtz(char4);
+float4 __ovld __cnfn convert_float4_rtp(char4);
+float4 __ovld __cnfn convert_float4_rtn(char4);
+float4 __ovld __cnfn convert_float4(char4);
+float4 __ovld __cnfn convert_float4_rte(uchar4);
+float4 __ovld __cnfn convert_float4_rtz(uchar4);
+float4 __ovld __cnfn convert_float4_rtp(uchar4);
+float4 __ovld __cnfn convert_float4_rtn(uchar4);
+float4 __ovld __cnfn convert_float4(uchar4);
+float4 __ovld __cnfn convert_float4_rte(short4);
+float4 __ovld __cnfn convert_float4_rtz(short4);
+float4 __ovld __cnfn convert_float4_rtp(short4);
+float4 __ovld __cnfn convert_float4_rtn(short4);
+float4 __ovld __cnfn convert_float4(short4);
+float4 __ovld __cnfn convert_float4_rte(ushort4);
+float4 __ovld __cnfn convert_float4_rtz(ushort4);
+float4 __ovld __cnfn convert_float4_rtp(ushort4);
+float4 __ovld __cnfn convert_float4_rtn(ushort4);
+float4 __ovld __cnfn convert_float4(ushort4);
+float4 __ovld __cnfn convert_float4_rte(int4);
+float4 __ovld __cnfn convert_float4_rtz(int4);
+float4 __ovld __cnfn convert_float4_rtp(int4);
+float4 __ovld __cnfn convert_float4_rtn(int4);
+float4 __ovld __cnfn convert_float4(int4);
+float4 __ovld __cnfn convert_float4_rte(uint4);
+float4 __ovld __cnfn convert_float4_rtz(uint4);
+float4 __ovld __cnfn convert_float4_rtp(uint4);
+float4 __ovld __cnfn convert_float4_rtn(uint4);
+float4 __ovld __cnfn convert_float4(uint4);
+float4 __ovld __cnfn convert_float4_rte(long4);
+float4 __ovld __cnfn convert_float4_rtz(long4);
+float4 __ovld __cnfn convert_float4_rtp(long4);
+float4 __ovld __cnfn convert_float4_rtn(long4);
+float4 __ovld __cnfn convert_float4(long4);
+float4 __ovld __cnfn convert_float4_rte(ulong4);
+float4 __ovld __cnfn convert_float4_rtz(ulong4);
+float4 __ovld __cnfn convert_float4_rtp(ulong4);
+float4 __ovld __cnfn convert_float4_rtn(ulong4);
+float4 __ovld __cnfn convert_float4(ulong4);
+float4 __ovld __cnfn convert_float4_rte(float4);
+float4 __ovld __cnfn convert_float4_rtz(float4);
+float4 __ovld __cnfn convert_float4_rtp(float4);
+float4 __ovld __cnfn convert_float4_rtn(float4);
+float4 __ovld __cnfn convert_float4(float4);
+char8 __ovld __cnfn convert_char8_rte(char8);
+char8 __ovld __cnfn convert_char8_sat_rte(char8);
+char8 __ovld __cnfn convert_char8_rtz(char8);
+char8 __ovld __cnfn convert_char8_sat_rtz(char8);
+char8 __ovld __cnfn convert_char8_rtp(char8);
+char8 __ovld __cnfn convert_char8_sat_rtp(char8);
+char8 __ovld __cnfn convert_char8_rtn(char8);
+char8 __ovld __cnfn convert_char8_sat_rtn(char8);
+char8 __ovld __cnfn convert_char8(char8);
+char8 __ovld __cnfn convert_char8_sat(char8);
+char8 __ovld __cnfn convert_char8_rte(uchar8);
+char8 __ovld __cnfn convert_char8_sat_rte(uchar8);
+char8 __ovld __cnfn convert_char8_rtz(uchar8);
+char8 __ovld __cnfn convert_char8_sat_rtz(uchar8);
+char8 __ovld __cnfn convert_char8_rtp(uchar8);
+char8 __ovld __cnfn convert_char8_sat_rtp(uchar8);
+char8 __ovld __cnfn convert_char8_rtn(uchar8);
+char8 __ovld __cnfn convert_char8_sat_rtn(uchar8);
+char8 __ovld __cnfn convert_char8(uchar8);
+char8 __ovld __cnfn convert_char8_sat(uchar8);
+char8 __ovld __cnfn convert_char8_rte(short8);
+char8 __ovld __cnfn convert_char8_sat_rte(short8);
+char8 __ovld __cnfn convert_char8_rtz(short8);
+char8 __ovld __cnfn convert_char8_sat_rtz(short8);
+char8 __ovld __cnfn convert_char8_rtp(short8);
+char8 __ovld __cnfn convert_char8_sat_rtp(short8);
+char8 __ovld __cnfn convert_char8_rtn(short8);
+char8 __ovld __cnfn convert_char8_sat_rtn(short8);
+char8 __ovld __cnfn convert_char8(short8);
+char8 __ovld __cnfn convert_char8_sat(short8);
+char8 __ovld __cnfn convert_char8_rte(ushort8);
+char8 __ovld __cnfn convert_char8_sat_rte(ushort8);
+char8 __ovld __cnfn convert_char8_rtz(ushort8);
+char8 __ovld __cnfn convert_char8_sat_rtz(ushort8);
+char8 __ovld __cnfn convert_char8_rtp(ushort8);
+char8 __ovld __cnfn convert_char8_sat_rtp(ushort8);
+char8 __ovld __cnfn convert_char8_rtn(ushort8);
+char8 __ovld __cnfn convert_char8_sat_rtn(ushort8);
+char8 __ovld __cnfn convert_char8(ushort8);
+char8 __ovld __cnfn convert_char8_sat(ushort8);
+char8 __ovld __cnfn convert_char8_rte(int8);
+char8 __ovld __cnfn convert_char8_sat_rte(int8);
+char8 __ovld __cnfn convert_char8_rtz(int8);
+char8 __ovld __cnfn convert_char8_sat_rtz(int8);
+char8 __ovld __cnfn convert_char8_rtp(int8);
+char8 __ovld __cnfn convert_char8_sat_rtp(int8);
+char8 __ovld __cnfn convert_char8_rtn(int8);
+char8 __ovld __cnfn convert_char8_sat_rtn(int8);
+char8 __ovld __cnfn convert_char8(int8);
+char8 __ovld __cnfn convert_char8_sat(int8);
+char8 __ovld __cnfn convert_char8_rte(uint8);
+char8 __ovld __cnfn convert_char8_sat_rte(uint8);
+char8 __ovld __cnfn convert_char8_rtz(uint8);
+char8 __ovld __cnfn convert_char8_sat_rtz(uint8);
+char8 __ovld __cnfn convert_char8_rtp(uint8);
+char8 __ovld __cnfn convert_char8_sat_rtp(uint8);
+char8 __ovld __cnfn convert_char8_rtn(uint8);
+char8 __ovld __cnfn convert_char8_sat_rtn(uint8);
+char8 __ovld __cnfn convert_char8(uint8);
+char8 __ovld __cnfn convert_char8_sat(uint8);
+char8 __ovld __cnfn convert_char8_rte(long8);
+char8 __ovld __cnfn convert_char8_sat_rte(long8);
+char8 __ovld __cnfn convert_char8_rtz(long8);
+char8 __ovld __cnfn convert_char8_sat_rtz(long8);
+char8 __ovld __cnfn convert_char8_rtp(long8);
+char8 __ovld __cnfn convert_char8_sat_rtp(long8);
+char8 __ovld __cnfn convert_char8_rtn(long8);
+char8 __ovld __cnfn convert_char8_sat_rtn(long8);
+char8 __ovld __cnfn convert_char8(long8);
+char8 __ovld __cnfn convert_char8_sat(long8);
+char8 __ovld __cnfn convert_char8_rte(ulong8);
+char8 __ovld __cnfn convert_char8_sat_rte(ulong8);
+char8 __ovld __cnfn convert_char8_rtz(ulong8);
+char8 __ovld __cnfn convert_char8_sat_rtz(ulong8);
+char8 __ovld __cnfn convert_char8_rtp(ulong8);
+char8 __ovld __cnfn convert_char8_sat_rtp(ulong8);
+char8 __ovld __cnfn convert_char8_rtn(ulong8);
+char8 __ovld __cnfn convert_char8_sat_rtn(ulong8);
+char8 __ovld __cnfn convert_char8(ulong8);
+char8 __ovld __cnfn convert_char8_sat(ulong8);
+char8 __ovld __cnfn convert_char8_rte(float8);
+char8 __ovld __cnfn convert_char8_sat_rte(float8);
+char8 __ovld __cnfn convert_char8_rtz(float8);
+char8 __ovld __cnfn convert_char8_sat_rtz(float8);
+char8 __ovld __cnfn convert_char8_rtp(float8);
+char8 __ovld __cnfn convert_char8_sat_rtp(float8);
+char8 __ovld __cnfn convert_char8_rtn(float8);
+char8 __ovld __cnfn convert_char8_sat_rtn(float8);
+char8 __ovld __cnfn convert_char8(float8);
+char8 __ovld __cnfn convert_char8_sat(float8);
+uchar8 __ovld __cnfn convert_uchar8_rte(char8);
+uchar8 __ovld __cnfn convert_uchar8_sat_rte(char8);
+uchar8 __ovld __cnfn convert_uchar8_rtz(char8);
+uchar8 __ovld __cnfn convert_uchar8_sat_rtz(char8);
+uchar8 __ovld __cnfn convert_uchar8_rtp(char8);
+uchar8 __ovld __cnfn convert_uchar8_sat_rtp(char8);
+uchar8 __ovld __cnfn convert_uchar8_rtn(char8);
+uchar8 __ovld __cnfn convert_uchar8_sat_rtn(char8);
+uchar8 __ovld __cnfn convert_uchar8(char8);
+uchar8 __ovld __cnfn convert_uchar8_sat(char8);
+uchar8 __ovld __cnfn convert_uchar8_rte(uchar8);
+uchar8 __ovld __cnfn convert_uchar8_sat_rte(uchar8);
+uchar8 __ovld __cnfn convert_uchar8_rtz(uchar8);
+uchar8 __ovld __cnfn convert_uchar8_sat_rtz(uchar8);
+uchar8 __ovld __cnfn convert_uchar8_rtp(uchar8);
+uchar8 __ovld __cnfn convert_uchar8_sat_rtp(uchar8);
+uchar8 __ovld __cnfn convert_uchar8_rtn(uchar8);
+uchar8 __ovld __cnfn convert_uchar8_sat_rtn(uchar8);
+uchar8 __ovld __cnfn convert_uchar8(uchar8);
+uchar8 __ovld __cnfn convert_uchar8_sat(uchar8);
+uchar8 __ovld __cnfn convert_uchar8_rte(short8);
+uchar8 __ovld __cnfn convert_uchar8_sat_rte(short8);
+uchar8 __ovld __cnfn convert_uchar8_rtz(short8);
+uchar8 __ovld __cnfn convert_uchar8_sat_rtz(short8);
+uchar8 __ovld __cnfn convert_uchar8_rtp(short8);
+uchar8 __ovld __cnfn convert_uchar8_sat_rtp(short8);
+uchar8 __ovld __cnfn convert_uchar8_rtn(short8);
+uchar8 __ovld __cnfn convert_uchar8_sat_rtn(short8);
+uchar8 __ovld __cnfn convert_uchar8(short8);
+uchar8 __ovld __cnfn convert_uchar8_sat(short8);
+uchar8 __ovld __cnfn convert_uchar8_rte(ushort8);
+uchar8 __ovld __cnfn convert_uchar8_sat_rte(ushort8);
+uchar8 __ovld __cnfn convert_uchar8_rtz(ushort8);
+uchar8 __ovld __cnfn convert_uchar8_sat_rtz(ushort8);
+uchar8 __ovld __cnfn convert_uchar8_rtp(ushort8);
+uchar8 __ovld __cnfn convert_uchar8_sat_rtp(ushort8);
+uchar8 __ovld __cnfn convert_uchar8_rtn(ushort8);
+uchar8 __ovld __cnfn convert_uchar8_sat_rtn(ushort8);
+uchar8 __ovld __cnfn convert_uchar8(ushort8);
+uchar8 __ovld __cnfn convert_uchar8_sat(ushort8);
+uchar8 __ovld __cnfn convert_uchar8_rte(int8);
+uchar8 __ovld __cnfn convert_uchar8_sat_rte(int8);
+uchar8 __ovld __cnfn convert_uchar8_rtz(int8);
+uchar8 __ovld __cnfn convert_uchar8_sat_rtz(int8);
+uchar8 __ovld __cnfn convert_uchar8_rtp(int8);
+uchar8 __ovld __cnfn convert_uchar8_sat_rtp(int8);
+uchar8 __ovld __cnfn convert_uchar8_rtn(int8);
+uchar8 __ovld __cnfn convert_uchar8_sat_rtn(int8);
+uchar8 __ovld __cnfn convert_uchar8(int8);
+uchar8 __ovld __cnfn convert_uchar8_sat(int8);
+uchar8 __ovld __cnfn convert_uchar8_rte(uint8);
+uchar8 __ovld __cnfn convert_uchar8_sat_rte(uint8);
+uchar8 __ovld __cnfn convert_uchar8_rtz(uint8);
+uchar8 __ovld __cnfn convert_uchar8_sat_rtz(uint8);
+uchar8 __ovld __cnfn convert_uchar8_rtp(uint8);
+uchar8 __ovld __cnfn convert_uchar8_sat_rtp(uint8);
+uchar8 __ovld __cnfn convert_uchar8_rtn(uint8);
+uchar8 __ovld __cnfn convert_uchar8_sat_rtn(uint8);
+uchar8 __ovld __cnfn convert_uchar8(uint8);
+uchar8 __ovld __cnfn convert_uchar8_sat(uint8);
+uchar8 __ovld __cnfn convert_uchar8_rte(long8);
+uchar8 __ovld __cnfn convert_uchar8_sat_rte(long8);
+uchar8 __ovld __cnfn convert_uchar8_rtz(long8);
+uchar8 __ovld __cnfn convert_uchar8_sat_rtz(long8);
+uchar8 __ovld __cnfn convert_uchar8_rtp(long8);
+uchar8 __ovld __cnfn convert_uchar8_sat_rtp(long8);
+uchar8 __ovld __cnfn convert_uchar8_rtn(long8);
+uchar8 __ovld __cnfn convert_uchar8_sat_rtn(long8);
+uchar8 __ovld __cnfn convert_uchar8(long8);
+uchar8 __ovld __cnfn convert_uchar8_sat(long8);
+uchar8 __ovld __cnfn convert_uchar8_rte(ulong8);
+uchar8 __ovld __cnfn convert_uchar8_sat_rte(ulong8);
+uchar8 __ovld __cnfn convert_uchar8_rtz(ulong8);
+uchar8 __ovld __cnfn convert_uchar8_sat_rtz(ulong8);
+uchar8 __ovld __cnfn convert_uchar8_rtp(ulong8);
+uchar8 __ovld __cnfn convert_uchar8_sat_rtp(ulong8);
+uchar8 __ovld __cnfn convert_uchar8_rtn(ulong8);
+uchar8 __ovld __cnfn convert_uchar8_sat_rtn(ulong8);
+uchar8 __ovld __cnfn convert_uchar8(ulong8);
+uchar8 __ovld __cnfn convert_uchar8_sat(ulong8);
+uchar8 __ovld __cnfn convert_uchar8_rte(float8);
+uchar8 __ovld __cnfn convert_uchar8_sat_rte(float8);
+uchar8 __ovld __cnfn convert_uchar8_rtz(float8);
+uchar8 __ovld __cnfn convert_uchar8_sat_rtz(float8);
+uchar8 __ovld __cnfn convert_uchar8_rtp(float8);
+uchar8 __ovld __cnfn convert_uchar8_sat_rtp(float8);
+uchar8 __ovld __cnfn convert_uchar8_rtn(float8);
+uchar8 __ovld __cnfn convert_uchar8_sat_rtn(float8);
+uchar8 __ovld __cnfn convert_uchar8(float8);
+uchar8 __ovld __cnfn convert_uchar8_sat(float8);
+short8 __ovld __cnfn convert_short8_rte(char8);
+short8 __ovld __cnfn convert_short8_sat_rte(char8);
+short8 __ovld __cnfn convert_short8_rtz(char8);
+short8 __ovld __cnfn convert_short8_sat_rtz(char8);
+short8 __ovld __cnfn convert_short8_rtp(char8);
+short8 __ovld __cnfn convert_short8_sat_rtp(char8);
+short8 __ovld __cnfn convert_short8_rtn(char8);
+short8 __ovld __cnfn convert_short8_sat_rtn(char8);
+short8 __ovld __cnfn convert_short8(char8);
+short8 __ovld __cnfn convert_short8_sat(char8);
+short8 __ovld __cnfn convert_short8_rte(uchar8);
+short8 __ovld __cnfn convert_short8_sat_rte(uchar8);
+short8 __ovld __cnfn convert_short8_rtz(uchar8);
+short8 __ovld __cnfn convert_short8_sat_rtz(uchar8);
+short8 __ovld __cnfn convert_short8_rtp(uchar8);
+short8 __ovld __cnfn convert_short8_sat_rtp(uchar8);
+short8 __ovld __cnfn convert_short8_rtn(uchar8);
+short8 __ovld __cnfn convert_short8_sat_rtn(uchar8);
+short8 __ovld __cnfn convert_short8(uchar8);
+short8 __ovld __cnfn convert_short8_sat(uchar8);
+short8 __ovld __cnfn convert_short8_rte(short8);
+short8 __ovld __cnfn convert_short8_sat_rte(short8);
+short8 __ovld __cnfn convert_short8_rtz(short8);
+short8 __ovld __cnfn convert_short8_sat_rtz(short8);
+short8 __ovld __cnfn convert_short8_rtp(short8);
+short8 __ovld __cnfn convert_short8_sat_rtp(short8);
+short8 __ovld __cnfn convert_short8_rtn(short8);
+short8 __ovld __cnfn convert_short8_sat_rtn(short8);
+short8 __ovld __cnfn convert_short8(short8);
+short8 __ovld __cnfn convert_short8_sat(short8);
+short8 __ovld __cnfn convert_short8_rte(ushort8);
+short8 __ovld __cnfn convert_short8_sat_rte(ushort8);
+short8 __ovld __cnfn convert_short8_rtz(ushort8);
+short8 __ovld __cnfn convert_short8_sat_rtz(ushort8);
+short8 __ovld __cnfn convert_short8_rtp(ushort8);
+short8 __ovld __cnfn convert_short8_sat_rtp(ushort8);
+short8 __ovld __cnfn convert_short8_rtn(ushort8);
+short8 __ovld __cnfn convert_short8_sat_rtn(ushort8);
+short8 __ovld __cnfn convert_short8(ushort8);
+short8 __ovld __cnfn convert_short8_sat(ushort8);
+short8 __ovld __cnfn convert_short8_rte(int8);
+short8 __ovld __cnfn convert_short8_sat_rte(int8);
+short8 __ovld __cnfn convert_short8_rtz(int8);
+short8 __ovld __cnfn convert_short8_sat_rtz(int8);
+short8 __ovld __cnfn convert_short8_rtp(int8);
+short8 __ovld __cnfn convert_short8_sat_rtp(int8);
+short8 __ovld __cnfn convert_short8_rtn(int8);
+short8 __ovld __cnfn convert_short8_sat_rtn(int8);
+short8 __ovld __cnfn convert_short8(int8);
+short8 __ovld __cnfn convert_short8_sat(int8);
+short8 __ovld __cnfn convert_short8_rte(uint8);
+short8 __ovld __cnfn convert_short8_sat_rte(uint8);
+short8 __ovld __cnfn convert_short8_rtz(uint8);
+short8 __ovld __cnfn convert_short8_sat_rtz(uint8);
+short8 __ovld __cnfn convert_short8_rtp(uint8);
+short8 __ovld __cnfn convert_short8_sat_rtp(uint8);
+short8 __ovld __cnfn convert_short8_rtn(uint8);
+short8 __ovld __cnfn convert_short8_sat_rtn(uint8);
+short8 __ovld __cnfn convert_short8(uint8);
+short8 __ovld __cnfn convert_short8_sat(uint8);
+short8 __ovld __cnfn convert_short8_rte(long8);
+short8 __ovld __cnfn convert_short8_sat_rte(long8);
+short8 __ovld __cnfn convert_short8_rtz(long8);
+short8 __ovld __cnfn convert_short8_sat_rtz(long8);
+short8 __ovld __cnfn convert_short8_rtp(long8);
+short8 __ovld __cnfn convert_short8_sat_rtp(long8);
+short8 __ovld __cnfn convert_short8_rtn(long8);
+short8 __ovld __cnfn convert_short8_sat_rtn(long8);
+short8 __ovld __cnfn convert_short8(long8);
+short8 __ovld __cnfn convert_short8_sat(long8);
+short8 __ovld __cnfn convert_short8_rte(ulong8);
+short8 __ovld __cnfn convert_short8_sat_rte(ulong8);
+short8 __ovld __cnfn convert_short8_rtz(ulong8);
+short8 __ovld __cnfn convert_short8_sat_rtz(ulong8);
+short8 __ovld __cnfn convert_short8_rtp(ulong8);
+short8 __ovld __cnfn convert_short8_sat_rtp(ulong8);
+short8 __ovld __cnfn convert_short8_rtn(ulong8);
+short8 __ovld __cnfn convert_short8_sat_rtn(ulong8);
+short8 __ovld __cnfn convert_short8(ulong8);
+short8 __ovld __cnfn convert_short8_sat(ulong8);
+short8 __ovld __cnfn convert_short8_rte(float8);
+short8 __ovld __cnfn convert_short8_sat_rte(float8);
+short8 __ovld __cnfn convert_short8_rtz(float8);
+short8 __ovld __cnfn convert_short8_sat_rtz(float8);
+short8 __ovld __cnfn convert_short8_rtp(float8);
+short8 __ovld __cnfn convert_short8_sat_rtp(float8);
+short8 __ovld __cnfn convert_short8_rtn(float8);
+short8 __ovld __cnfn convert_short8_sat_rtn(float8);
+short8 __ovld __cnfn convert_short8(float8);
+short8 __ovld __cnfn convert_short8_sat(float8);
+ushort8 __ovld __cnfn convert_ushort8_rte(char8);
+ushort8 __ovld __cnfn convert_ushort8_sat_rte(char8);
+ushort8 __ovld __cnfn convert_ushort8_rtz(char8);
+ushort8 __ovld __cnfn convert_ushort8_sat_rtz(char8);
+ushort8 __ovld __cnfn convert_ushort8_rtp(char8);
+ushort8 __ovld __cnfn convert_ushort8_sat_rtp(char8);
+ushort8 __ovld __cnfn convert_ushort8_rtn(char8);
+ushort8 __ovld __cnfn convert_ushort8_sat_rtn(char8);
+ushort8 __ovld __cnfn convert_ushort8(char8);
+ushort8 __ovld __cnfn convert_ushort8_sat(char8);
+ushort8 __ovld __cnfn convert_ushort8_rte(uchar8);
+ushort8 __ovld __cnfn convert_ushort8_sat_rte(uchar8);
+ushort8 __ovld __cnfn convert_ushort8_rtz(uchar8);
+ushort8 __ovld __cnfn convert_ushort8_sat_rtz(uchar8);
+ushort8 __ovld __cnfn convert_ushort8_rtp(uchar8);
+ushort8 __ovld __cnfn convert_ushort8_sat_rtp(uchar8);
+ushort8 __ovld __cnfn convert_ushort8_rtn(uchar8);
+ushort8 __ovld __cnfn convert_ushort8_sat_rtn(uchar8);
+ushort8 __ovld __cnfn convert_ushort8(uchar8);
+ushort8 __ovld __cnfn convert_ushort8_sat(uchar8);
+ushort8 __ovld __cnfn convert_ushort8_rte(short8);
+ushort8 __ovld __cnfn convert_ushort8_sat_rte(short8);
+ushort8 __ovld __cnfn convert_ushort8_rtz(short8);
+ushort8 __ovld __cnfn convert_ushort8_sat_rtz(short8);
+ushort8 __ovld __cnfn convert_ushort8_rtp(short8);
+ushort8 __ovld __cnfn convert_ushort8_sat_rtp(short8);
+ushort8 __ovld __cnfn convert_ushort8_rtn(short8);
+ushort8 __ovld __cnfn convert_ushort8_sat_rtn(short8);
+ushort8 __ovld __cnfn convert_ushort8(short8);
+ushort8 __ovld __cnfn convert_ushort8_sat(short8);
+ushort8 __ovld __cnfn convert_ushort8_rte(ushort8);
+ushort8 __ovld __cnfn convert_ushort8_sat_rte(ushort8);
+ushort8 __ovld __cnfn convert_ushort8_rtz(ushort8);
+ushort8 __ovld __cnfn convert_ushort8_sat_rtz(ushort8);
+ushort8 __ovld __cnfn convert_ushort8_rtp(ushort8);
+ushort8 __ovld __cnfn convert_ushort8_sat_rtp(ushort8);
+ushort8 __ovld __cnfn convert_ushort8_rtn(ushort8);
+ushort8 __ovld __cnfn convert_ushort8_sat_rtn(ushort8);
+ushort8 __ovld __cnfn convert_ushort8(ushort8);
+ushort8 __ovld __cnfn convert_ushort8_sat(ushort8);
+ushort8 __ovld __cnfn convert_ushort8_rte(int8);
+ushort8 __ovld __cnfn convert_ushort8_sat_rte(int8);
+ushort8 __ovld __cnfn convert_ushort8_rtz(int8);
+ushort8 __ovld __cnfn convert_ushort8_sat_rtz(int8);
+ushort8 __ovld __cnfn convert_ushort8_rtp(int8);
+ushort8 __ovld __cnfn convert_ushort8_sat_rtp(int8);
+ushort8 __ovld __cnfn convert_ushort8_rtn(int8);
+ushort8 __ovld __cnfn convert_ushort8_sat_rtn(int8);
+ushort8 __ovld __cnfn convert_ushort8(int8);
+ushort8 __ovld __cnfn convert_ushort8_sat(int8);
+ushort8 __ovld __cnfn convert_ushort8_rte(uint8);
+ushort8 __ovld __cnfn convert_ushort8_sat_rte(uint8);
+ushort8 __ovld __cnfn convert_ushort8_rtz(uint8);
+ushort8 __ovld __cnfn convert_ushort8_sat_rtz(uint8);
+ushort8 __ovld __cnfn convert_ushort8_rtp(uint8);
+ushort8 __ovld __cnfn convert_ushort8_sat_rtp(uint8);
+ushort8 __ovld __cnfn convert_ushort8_rtn(uint8);
+ushort8 __ovld __cnfn convert_ushort8_sat_rtn(uint8);
+ushort8 __ovld __cnfn convert_ushort8(uint8);
+ushort8 __ovld __cnfn convert_ushort8_sat(uint8);
+ushort8 __ovld __cnfn convert_ushort8_rte(long8);
+ushort8 __ovld __cnfn convert_ushort8_sat_rte(long8);
+ushort8 __ovld __cnfn convert_ushort8_rtz(long8);
+ushort8 __ovld __cnfn convert_ushort8_sat_rtz(long8);
+ushort8 __ovld __cnfn convert_ushort8_rtp(long8);
+ushort8 __ovld __cnfn convert_ushort8_sat_rtp(long8);
+ushort8 __ovld __cnfn convert_ushort8_rtn(long8);
+ushort8 __ovld __cnfn convert_ushort8_sat_rtn(long8);
+ushort8 __ovld __cnfn convert_ushort8(long8);
+ushort8 __ovld __cnfn convert_ushort8_sat(long8);
+ushort8 __ovld __cnfn convert_ushort8_rte(ulong8);
+ushort8 __ovld __cnfn convert_ushort8_sat_rte(ulong8);
+ushort8 __ovld __cnfn convert_ushort8_rtz(ulong8);
+ushort8 __ovld __cnfn convert_ushort8_sat_rtz(ulong8);
+ushort8 __ovld __cnfn convert_ushort8_rtp(ulong8);
+ushort8 __ovld __cnfn convert_ushort8_sat_rtp(ulong8);
+ushort8 __ovld __cnfn convert_ushort8_rtn(ulong8);
+ushort8 __ovld __cnfn convert_ushort8_sat_rtn(ulong8);
+ushort8 __ovld __cnfn convert_ushort8(ulong8);
+ushort8 __ovld __cnfn convert_ushort8_sat(ulong8);
+ushort8 __ovld __cnfn convert_ushort8_rte(float8);
+ushort8 __ovld __cnfn convert_ushort8_sat_rte(float8);
+ushort8 __ovld __cnfn convert_ushort8_rtz(float8);
+ushort8 __ovld __cnfn convert_ushort8_sat_rtz(float8);
+ushort8 __ovld __cnfn convert_ushort8_rtp(float8);
+ushort8 __ovld __cnfn convert_ushort8_sat_rtp(float8);
+ushort8 __ovld __cnfn convert_ushort8_rtn(float8);
+ushort8 __ovld __cnfn convert_ushort8_sat_rtn(float8);
+ushort8 __ovld __cnfn convert_ushort8(float8);
+ushort8 __ovld __cnfn convert_ushort8_sat(float8);
+int8 __ovld __cnfn convert_int8_rte(char8);
+int8 __ovld __cnfn convert_int8_sat_rte(char8);
+int8 __ovld __cnfn convert_int8_rtz(char8);
+int8 __ovld __cnfn convert_int8_sat_rtz(char8);
+int8 __ovld __cnfn convert_int8_rtp(char8);
+int8 __ovld __cnfn convert_int8_sat_rtp(char8);
+int8 __ovld __cnfn convert_int8_rtn(char8);
+int8 __ovld __cnfn convert_int8_sat_rtn(char8);
+int8 __ovld __cnfn convert_int8(char8);
+int8 __ovld __cnfn convert_int8_sat(char8);
+int8 __ovld __cnfn convert_int8_rte(uchar8);
+int8 __ovld __cnfn convert_int8_sat_rte(uchar8);
+int8 __ovld __cnfn convert_int8_rtz(uchar8);
+int8 __ovld __cnfn convert_int8_sat_rtz(uchar8);
+int8 __ovld __cnfn convert_int8_rtp(uchar8);
+int8 __ovld __cnfn convert_int8_sat_rtp(uchar8);
+int8 __ovld __cnfn convert_int8_rtn(uchar8);
+int8 __ovld __cnfn convert_int8_sat_rtn(uchar8);
+int8 __ovld __cnfn convert_int8(uchar8);
+int8 __ovld __cnfn convert_int8_sat(uchar8);
+int8 __ovld __cnfn convert_int8_rte(short8);
+int8 __ovld __cnfn convert_int8_sat_rte(short8);
+int8 __ovld __cnfn convert_int8_rtz(short8);
+int8 __ovld __cnfn convert_int8_sat_rtz(short8);
+int8 __ovld __cnfn convert_int8_rtp(short8);
+int8 __ovld __cnfn convert_int8_sat_rtp(short8);
+int8 __ovld __cnfn convert_int8_rtn(short8);
+int8 __ovld __cnfn convert_int8_sat_rtn(short8);
+int8 __ovld __cnfn convert_int8(short8);
+int8 __ovld __cnfn convert_int8_sat(short8);
+int8 __ovld __cnfn convert_int8_rte(ushort8);
+int8 __ovld __cnfn convert_int8_sat_rte(ushort8);
+int8 __ovld __cnfn convert_int8_rtz(ushort8);
+int8 __ovld __cnfn convert_int8_sat_rtz(ushort8);
+int8 __ovld __cnfn convert_int8_rtp(ushort8);
+int8 __ovld __cnfn convert_int8_sat_rtp(ushort8);
+int8 __ovld __cnfn convert_int8_rtn(ushort8);
+int8 __ovld __cnfn convert_int8_sat_rtn(ushort8);
+int8 __ovld __cnfn convert_int8(ushort8);
+int8 __ovld __cnfn convert_int8_sat(ushort8);
+int8 __ovld __cnfn convert_int8_rte(int8);
+int8 __ovld __cnfn convert_int8_sat_rte(int8);
+int8 __ovld __cnfn convert_int8_rtz(int8);
+int8 __ovld __cnfn convert_int8_sat_rtz(int8);
+int8 __ovld __cnfn convert_int8_rtp(int8);
+int8 __ovld __cnfn convert_int8_sat_rtp(int8);
+int8 __ovld __cnfn convert_int8_rtn(int8);
+int8 __ovld __cnfn convert_int8_sat_rtn(int8);
+int8 __ovld __cnfn convert_int8(int8);
+int8 __ovld __cnfn convert_int8_sat(int8);
+int8 __ovld __cnfn convert_int8_rte(uint8);
+int8 __ovld __cnfn convert_int8_sat_rte(uint8);
+int8 __ovld __cnfn convert_int8_rtz(uint8);
+int8 __ovld __cnfn convert_int8_sat_rtz(uint8);
+int8 __ovld __cnfn convert_int8_rtp(uint8);
+int8 __ovld __cnfn convert_int8_sat_rtp(uint8);
+int8 __ovld __cnfn convert_int8_rtn(uint8);
+int8 __ovld __cnfn convert_int8_sat_rtn(uint8);
+int8 __ovld __cnfn convert_int8(uint8);
+int8 __ovld __cnfn convert_int8_sat(uint8);
+int8 __ovld __cnfn convert_int8_rte(long8);
+int8 __ovld __cnfn convert_int8_sat_rte(long8);
+int8 __ovld __cnfn convert_int8_rtz(long8);
+int8 __ovld __cnfn convert_int8_sat_rtz(long8);
+int8 __ovld __cnfn convert_int8_rtp(long8);
+int8 __ovld __cnfn convert_int8_sat_rtp(long8);
+int8 __ovld __cnfn convert_int8_rtn(long8);
+int8 __ovld __cnfn convert_int8_sat_rtn(long8);
+int8 __ovld __cnfn convert_int8(long8);
+int8 __ovld __cnfn convert_int8_sat(long8);
+int8 __ovld __cnfn convert_int8_rte(ulong8);
+int8 __ovld __cnfn convert_int8_sat_rte(ulong8);
+int8 __ovld __cnfn convert_int8_rtz(ulong8);
+int8 __ovld __cnfn convert_int8_sat_rtz(ulong8);
+int8 __ovld __cnfn convert_int8_rtp(ulong8);
+int8 __ovld __cnfn convert_int8_sat_rtp(ulong8);
+int8 __ovld __cnfn convert_int8_rtn(ulong8);
+int8 __ovld __cnfn convert_int8_sat_rtn(ulong8);
+int8 __ovld __cnfn convert_int8(ulong8);
+int8 __ovld __cnfn convert_int8_sat(ulong8);
+int8 __ovld __cnfn convert_int8_rte(float8);
+int8 __ovld __cnfn convert_int8_sat_rte(float8);
+int8 __ovld __cnfn convert_int8_rtz(float8);
+int8 __ovld __cnfn convert_int8_sat_rtz(float8);
+int8 __ovld __cnfn convert_int8_rtp(float8);
+int8 __ovld __cnfn convert_int8_sat_rtp(float8);
+int8 __ovld __cnfn convert_int8_rtn(float8);
+int8 __ovld __cnfn convert_int8_sat_rtn(float8);
+int8 __ovld __cnfn convert_int8(float8);
+int8 __ovld __cnfn convert_int8_sat(float8);
+uint8 __ovld __cnfn convert_uint8_rte(char8);
+uint8 __ovld __cnfn convert_uint8_sat_rte(char8);
+uint8 __ovld __cnfn convert_uint8_rtz(char8);
+uint8 __ovld __cnfn convert_uint8_sat_rtz(char8);
+uint8 __ovld __cnfn convert_uint8_rtp(char8);
+uint8 __ovld __cnfn convert_uint8_sat_rtp(char8);
+uint8 __ovld __cnfn convert_uint8_rtn(char8);
+uint8 __ovld __cnfn convert_uint8_sat_rtn(char8);
+uint8 __ovld __cnfn convert_uint8(char8);
+uint8 __ovld __cnfn convert_uint8_sat(char8);
+uint8 __ovld __cnfn convert_uint8_rte(uchar8);
+uint8 __ovld __cnfn convert_uint8_sat_rte(uchar8);
+uint8 __ovld __cnfn convert_uint8_rtz(uchar8);
+uint8 __ovld __cnfn convert_uint8_sat_rtz(uchar8);
+uint8 __ovld __cnfn convert_uint8_rtp(uchar8);
+uint8 __ovld __cnfn convert_uint8_sat_rtp(uchar8);
+uint8 __ovld __cnfn convert_uint8_rtn(uchar8);
+uint8 __ovld __cnfn convert_uint8_sat_rtn(uchar8);
+uint8 __ovld __cnfn convert_uint8(uchar8);
+uint8 __ovld __cnfn convert_uint8_sat(uchar8);
+uint8 __ovld __cnfn convert_uint8_rte(short8);
+uint8 __ovld __cnfn convert_uint8_sat_rte(short8);
+uint8 __ovld __cnfn convert_uint8_rtz(short8);
+uint8 __ovld __cnfn convert_uint8_sat_rtz(short8);
+uint8 __ovld __cnfn convert_uint8_rtp(short8);
+uint8 __ovld __cnfn convert_uint8_sat_rtp(short8);
+uint8 __ovld __cnfn convert_uint8_rtn(short8);
+uint8 __ovld __cnfn convert_uint8_sat_rtn(short8);
+uint8 __ovld __cnfn convert_uint8(short8);
+uint8 __ovld __cnfn convert_uint8_sat(short8);
+uint8 __ovld __cnfn convert_uint8_rte(ushort8);
+uint8 __ovld __cnfn convert_uint8_sat_rte(ushort8);
+uint8 __ovld __cnfn convert_uint8_rtz(ushort8);
+uint8 __ovld __cnfn convert_uint8_sat_rtz(ushort8);
+uint8 __ovld __cnfn convert_uint8_rtp(ushort8);
+uint8 __ovld __cnfn convert_uint8_sat_rtp(ushort8);
+uint8 __ovld __cnfn convert_uint8_rtn(ushort8);
+uint8 __ovld __cnfn convert_uint8_sat_rtn(ushort8);
+uint8 __ovld __cnfn convert_uint8(ushort8);
+uint8 __ovld __cnfn convert_uint8_sat(ushort8);
+uint8 __ovld __cnfn convert_uint8_rte(int8);
+uint8 __ovld __cnfn convert_uint8_sat_rte(int8);
+uint8 __ovld __cnfn convert_uint8_rtz(int8);
+uint8 __ovld __cnfn convert_uint8_sat_rtz(int8);
+uint8 __ovld __cnfn convert_uint8_rtp(int8);
+uint8 __ovld __cnfn convert_uint8_sat_rtp(int8);
+uint8 __ovld __cnfn convert_uint8_rtn(int8);
+uint8 __ovld __cnfn convert_uint8_sat_rtn(int8);
+uint8 __ovld __cnfn convert_uint8(int8);
+uint8 __ovld __cnfn convert_uint8_sat(int8);
+uint8 __ovld __cnfn convert_uint8_rte(uint8);
+uint8 __ovld __cnfn convert_uint8_sat_rte(uint8);
+uint8 __ovld __cnfn convert_uint8_rtz(uint8);
+uint8 __ovld __cnfn convert_uint8_sat_rtz(uint8);
+uint8 __ovld __cnfn convert_uint8_rtp(uint8);
+uint8 __ovld __cnfn convert_uint8_sat_rtp(uint8);
+uint8 __ovld __cnfn convert_uint8_rtn(uint8);
+uint8 __ovld __cnfn convert_uint8_sat_rtn(uint8);
+uint8 __ovld __cnfn convert_uint8(uint8);
+uint8 __ovld __cnfn convert_uint8_sat(uint8);
+uint8 __ovld __cnfn convert_uint8_rte(long8);
+uint8 __ovld __cnfn convert_uint8_sat_rte(long8);
+uint8 __ovld __cnfn convert_uint8_rtz(long8);
+uint8 __ovld __cnfn convert_uint8_sat_rtz(long8);
+uint8 __ovld __cnfn convert_uint8_rtp(long8);
+uint8 __ovld __cnfn convert_uint8_sat_rtp(long8);
+uint8 __ovld __cnfn convert_uint8_rtn(long8);
+uint8 __ovld __cnfn convert_uint8_sat_rtn(long8);
+uint8 __ovld __cnfn convert_uint8(long8);
+uint8 __ovld __cnfn convert_uint8_sat(long8);
+uint8 __ovld __cnfn convert_uint8_rte(ulong8);
+uint8 __ovld __cnfn convert_uint8_sat_rte(ulong8);
+uint8 __ovld __cnfn convert_uint8_rtz(ulong8);
+uint8 __ovld __cnfn convert_uint8_sat_rtz(ulong8);
+uint8 __ovld __cnfn convert_uint8_rtp(ulong8);
+uint8 __ovld __cnfn convert_uint8_sat_rtp(ulong8);
+uint8 __ovld __cnfn convert_uint8_rtn(ulong8);
+uint8 __ovld __cnfn convert_uint8_sat_rtn(ulong8);
+uint8 __ovld __cnfn convert_uint8(ulong8);
+uint8 __ovld __cnfn convert_uint8_sat(ulong8);
+uint8 __ovld __cnfn convert_uint8_rte(float8);
+uint8 __ovld __cnfn convert_uint8_sat_rte(float8);
+uint8 __ovld __cnfn convert_uint8_rtz(float8);
+uint8 __ovld __cnfn convert_uint8_sat_rtz(float8);
+uint8 __ovld __cnfn convert_uint8_rtp(float8);
+uint8 __ovld __cnfn convert_uint8_sat_rtp(float8);
+uint8 __ovld __cnfn convert_uint8_rtn(float8);
+uint8 __ovld __cnfn convert_uint8_sat_rtn(float8);
+uint8 __ovld __cnfn convert_uint8(float8);
+uint8 __ovld __cnfn convert_uint8_sat(float8);
+long8 __ovld __cnfn convert_long8_rte(char8);
+long8 __ovld __cnfn convert_long8_sat_rte(char8);
+long8 __ovld __cnfn convert_long8_rtz(char8);
+long8 __ovld __cnfn convert_long8_sat_rtz(char8);
+long8 __ovld __cnfn convert_long8_rtp(char8);
+long8 __ovld __cnfn convert_long8_sat_rtp(char8);
+long8 __ovld __cnfn convert_long8_rtn(char8);
+long8 __ovld __cnfn convert_long8_sat_rtn(char8);
+long8 __ovld __cnfn convert_long8(char8);
+long8 __ovld __cnfn convert_long8_sat(char8);
+long8 __ovld __cnfn convert_long8_rte(uchar8);
+long8 __ovld __cnfn convert_long8_sat_rte(uchar8);
+long8 __ovld __cnfn convert_long8_rtz(uchar8);
+long8 __ovld __cnfn convert_long8_sat_rtz(uchar8);
+long8 __ovld __cnfn convert_long8_rtp(uchar8);
+long8 __ovld __cnfn convert_long8_sat_rtp(uchar8);
+long8 __ovld __cnfn convert_long8_rtn(uchar8);
+long8 __ovld __cnfn convert_long8_sat_rtn(uchar8);
+long8 __ovld __cnfn convert_long8(uchar8);
+long8 __ovld __cnfn convert_long8_sat(uchar8);
+long8 __ovld __cnfn convert_long8_rte(short8);
+long8 __ovld __cnfn convert_long8_sat_rte(short8);
+long8 __ovld __cnfn convert_long8_rtz(short8);
+long8 __ovld __cnfn convert_long8_sat_rtz(short8);
+long8 __ovld __cnfn convert_long8_rtp(short8);
+long8 __ovld __cnfn convert_long8_sat_rtp(short8);
+long8 __ovld __cnfn convert_long8_rtn(short8);
+long8 __ovld __cnfn convert_long8_sat_rtn(short8);
+long8 __ovld __cnfn convert_long8(short8);
+long8 __ovld __cnfn convert_long8_sat(short8);
+long8 __ovld __cnfn convert_long8_rte(ushort8);
+long8 __ovld __cnfn convert_long8_sat_rte(ushort8);
+long8 __ovld __cnfn convert_long8_rtz(ushort8);
+long8 __ovld __cnfn convert_long8_sat_rtz(ushort8);
+long8 __ovld __cnfn convert_long8_rtp(ushort8);
+long8 __ovld __cnfn convert_long8_sat_rtp(ushort8);
+long8 __ovld __cnfn convert_long8_rtn(ushort8);
+long8 __ovld __cnfn convert_long8_sat_rtn(ushort8);
+long8 __ovld __cnfn convert_long8(ushort8);
+long8 __ovld __cnfn convert_long8_sat(ushort8);
+long8 __ovld __cnfn convert_long8_rte(int8);
+long8 __ovld __cnfn convert_long8_sat_rte(int8);
+long8 __ovld __cnfn convert_long8_rtz(int8);
+long8 __ovld __cnfn convert_long8_sat_rtz(int8);
+long8 __ovld __cnfn convert_long8_rtp(int8);
+long8 __ovld __cnfn convert_long8_sat_rtp(int8);
+long8 __ovld __cnfn convert_long8_rtn(int8);
+long8 __ovld __cnfn convert_long8_sat_rtn(int8);
+long8 __ovld __cnfn convert_long8(int8);
+long8 __ovld __cnfn convert_long8_sat(int8);
+long8 __ovld __cnfn convert_long8_rte(uint8);
+long8 __ovld __cnfn convert_long8_sat_rte(uint8);
+long8 __ovld __cnfn convert_long8_rtz(uint8);
+long8 __ovld __cnfn convert_long8_sat_rtz(uint8);
+long8 __ovld __cnfn convert_long8_rtp(uint8);
+long8 __ovld __cnfn convert_long8_sat_rtp(uint8);
+long8 __ovld __cnfn convert_long8_rtn(uint8);
+long8 __ovld __cnfn convert_long8_sat_rtn(uint8);
+long8 __ovld __cnfn convert_long8(uint8);
+long8 __ovld __cnfn convert_long8_sat(uint8);
+long8 __ovld __cnfn convert_long8_rte(long8);
+long8 __ovld __cnfn convert_long8_sat_rte(long8);
+long8 __ovld __cnfn convert_long8_rtz(long8);
+long8 __ovld __cnfn convert_long8_sat_rtz(long8);
+long8 __ovld __cnfn convert_long8_rtp(long8);
+long8 __ovld __cnfn convert_long8_sat_rtp(long8);
+long8 __ovld __cnfn convert_long8_rtn(long8);
+long8 __ovld __cnfn convert_long8_sat_rtn(long8);
+long8 __ovld __cnfn convert_long8(long8);
+long8 __ovld __cnfn convert_long8_sat(long8);
+long8 __ovld __cnfn convert_long8_rte(ulong8);
+long8 __ovld __cnfn convert_long8_sat_rte(ulong8);
+long8 __ovld __cnfn convert_long8_rtz(ulong8);
+long8 __ovld __cnfn convert_long8_sat_rtz(ulong8);
+long8 __ovld __cnfn convert_long8_rtp(ulong8);
+long8 __ovld __cnfn convert_long8_sat_rtp(ulong8);
+long8 __ovld __cnfn convert_long8_rtn(ulong8);
+long8 __ovld __cnfn convert_long8_sat_rtn(ulong8);
+long8 __ovld __cnfn convert_long8(ulong8);
+long8 __ovld __cnfn convert_long8_sat(ulong8);
+long8 __ovld __cnfn convert_long8_rte(float8);
+long8 __ovld __cnfn convert_long8_sat_rte(float8);
+long8 __ovld __cnfn convert_long8_rtz(float8);
+long8 __ovld __cnfn convert_long8_sat_rtz(float8);
+long8 __ovld __cnfn convert_long8_rtp(float8);
+long8 __ovld __cnfn convert_long8_sat_rtp(float8);
+long8 __ovld __cnfn convert_long8_rtn(float8);
+long8 __ovld __cnfn convert_long8_sat_rtn(float8);
+long8 __ovld __cnfn convert_long8(float8);
+long8 __ovld __cnfn convert_long8_sat(float8);
+ulong8 __ovld __cnfn convert_ulong8_rte(char8);
+ulong8 __ovld __cnfn convert_ulong8_sat_rte(char8);
+ulong8 __ovld __cnfn convert_ulong8_rtz(char8);
+ulong8 __ovld __cnfn convert_ulong8_sat_rtz(char8);
+ulong8 __ovld __cnfn convert_ulong8_rtp(char8);
+ulong8 __ovld __cnfn convert_ulong8_sat_rtp(char8);
+ulong8 __ovld __cnfn convert_ulong8_rtn(char8);
+ulong8 __ovld __cnfn convert_ulong8_sat_rtn(char8);
+ulong8 __ovld __cnfn convert_ulong8(char8);
+ulong8 __ovld __cnfn convert_ulong8_sat(char8);
+ulong8 __ovld __cnfn convert_ulong8_rte(uchar8);
+ulong8 __ovld __cnfn convert_ulong8_sat_rte(uchar8);
+ulong8 __ovld __cnfn convert_ulong8_rtz(uchar8);
+ulong8 __ovld __cnfn convert_ulong8_sat_rtz(uchar8);
+ulong8 __ovld __cnfn convert_ulong8_rtp(uchar8);
+ulong8 __ovld __cnfn convert_ulong8_sat_rtp(uchar8);
+ulong8 __ovld __cnfn convert_ulong8_rtn(uchar8);
+ulong8 __ovld __cnfn convert_ulong8_sat_rtn(uchar8);
+ulong8 __ovld __cnfn convert_ulong8(uchar8);
+ulong8 __ovld __cnfn convert_ulong8_sat(uchar8);
+ulong8 __ovld __cnfn convert_ulong8_rte(short8);
+ulong8 __ovld __cnfn convert_ulong8_sat_rte(short8);
+ulong8 __ovld __cnfn convert_ulong8_rtz(short8);
+ulong8 __ovld __cnfn convert_ulong8_sat_rtz(short8);
+ulong8 __ovld __cnfn convert_ulong8_rtp(short8);
+ulong8 __ovld __cnfn convert_ulong8_sat_rtp(short8);
+ulong8 __ovld __cnfn convert_ulong8_rtn(short8);
+ulong8 __ovld __cnfn convert_ulong8_sat_rtn(short8);
+ulong8 __ovld __cnfn convert_ulong8(short8);
+ulong8 __ovld __cnfn convert_ulong8_sat(short8);
+ulong8 __ovld __cnfn convert_ulong8_rte(ushort8);
+ulong8 __ovld __cnfn convert_ulong8_sat_rte(ushort8);
+ulong8 __ovld __cnfn convert_ulong8_rtz(ushort8);
+ulong8 __ovld __cnfn convert_ulong8_sat_rtz(ushort8);
+ulong8 __ovld __cnfn convert_ulong8_rtp(ushort8);
+ulong8 __ovld __cnfn convert_ulong8_sat_rtp(ushort8);
+ulong8 __ovld __cnfn convert_ulong8_rtn(ushort8);
+ulong8 __ovld __cnfn convert_ulong8_sat_rtn(ushort8);
+ulong8 __ovld __cnfn convert_ulong8(ushort8);
+ulong8 __ovld __cnfn convert_ulong8_sat(ushort8);
+ulong8 __ovld __cnfn convert_ulong8_rte(int8);
+ulong8 __ovld __cnfn convert_ulong8_sat_rte(int8);
+ulong8 __ovld __cnfn convert_ulong8_rtz(int8);
+ulong8 __ovld __cnfn convert_ulong8_sat_rtz(int8);
+ulong8 __ovld __cnfn convert_ulong8_rtp(int8);
+ulong8 __ovld __cnfn convert_ulong8_sat_rtp(int8);
+ulong8 __ovld __cnfn convert_ulong8_rtn(int8);
+ulong8 __ovld __cnfn convert_ulong8_sat_rtn(int8);
+ulong8 __ovld __cnfn convert_ulong8(int8);
+ulong8 __ovld __cnfn convert_ulong8_sat(int8);
+ulong8 __ovld __cnfn convert_ulong8_rte(uint8);
+ulong8 __ovld __cnfn convert_ulong8_sat_rte(uint8);
+ulong8 __ovld __cnfn convert_ulong8_rtz(uint8);
+ulong8 __ovld __cnfn convert_ulong8_sat_rtz(uint8);
+ulong8 __ovld __cnfn convert_ulong8_rtp(uint8);
+ulong8 __ovld __cnfn convert_ulong8_sat_rtp(uint8);
+ulong8 __ovld __cnfn convert_ulong8_rtn(uint8);
+ulong8 __ovld __cnfn convert_ulong8_sat_rtn(uint8);
+ulong8 __ovld __cnfn convert_ulong8(uint8);
+ulong8 __ovld __cnfn convert_ulong8_sat(uint8);
+ulong8 __ovld __cnfn convert_ulong8_rte(long8);
+ulong8 __ovld __cnfn convert_ulong8_sat_rte(long8);
+ulong8 __ovld __cnfn convert_ulong8_rtz(long8);
+ulong8 __ovld __cnfn convert_ulong8_sat_rtz(long8);
+ulong8 __ovld __cnfn convert_ulong8_rtp(long8);
+ulong8 __ovld __cnfn convert_ulong8_sat_rtp(long8);
+ulong8 __ovld __cnfn convert_ulong8_rtn(long8);
+ulong8 __ovld __cnfn convert_ulong8_sat_rtn(long8);
+ulong8 __ovld __cnfn convert_ulong8(long8);
+ulong8 __ovld __cnfn convert_ulong8_sat(long8);
+ulong8 __ovld __cnfn convert_ulong8_rte(ulong8);
+ulong8 __ovld __cnfn convert_ulong8_sat_rte(ulong8);
+ulong8 __ovld __cnfn convert_ulong8_rtz(ulong8);
+ulong8 __ovld __cnfn convert_ulong8_sat_rtz(ulong8);
+ulong8 __ovld __cnfn convert_ulong8_rtp(ulong8);
+ulong8 __ovld __cnfn convert_ulong8_sat_rtp(ulong8);
+ulong8 __ovld __cnfn convert_ulong8_rtn(ulong8);
+ulong8 __ovld __cnfn convert_ulong8_sat_rtn(ulong8);
+ulong8 __ovld __cnfn convert_ulong8(ulong8);
+ulong8 __ovld __cnfn convert_ulong8_sat(ulong8);
+ulong8 __ovld __cnfn convert_ulong8_rte(float8);
+ulong8 __ovld __cnfn convert_ulong8_sat_rte(float8);
+ulong8 __ovld __cnfn convert_ulong8_rtz(float8);
+ulong8 __ovld __cnfn convert_ulong8_sat_rtz(float8);
+ulong8 __ovld __cnfn convert_ulong8_rtp(float8);
+ulong8 __ovld __cnfn convert_ulong8_sat_rtp(float8);
+ulong8 __ovld __cnfn convert_ulong8_rtn(float8);
+ulong8 __ovld __cnfn convert_ulong8_sat_rtn(float8);
+ulong8 __ovld __cnfn convert_ulong8(float8);
+ulong8 __ovld __cnfn convert_ulong8_sat(float8);
+float8 __ovld __cnfn convert_float8_rte(char8);
+float8 __ovld __cnfn convert_float8_rtz(char8);
+float8 __ovld __cnfn convert_float8_rtp(char8);
+float8 __ovld __cnfn convert_float8_rtn(char8);
+float8 __ovld __cnfn convert_float8(char8);
+float8 __ovld __cnfn convert_float8_rte(uchar8);
+float8 __ovld __cnfn convert_float8_rtz(uchar8);
+float8 __ovld __cnfn convert_float8_rtp(uchar8);
+float8 __ovld __cnfn convert_float8_rtn(uchar8);
+float8 __ovld __cnfn convert_float8(uchar8);
+float8 __ovld __cnfn convert_float8_rte(short8);
+float8 __ovld __cnfn convert_float8_rtz(short8);
+float8 __ovld __cnfn convert_float8_rtp(short8);
+float8 __ovld __cnfn convert_float8_rtn(short8);
+float8 __ovld __cnfn convert_float8(short8);
+float8 __ovld __cnfn convert_float8_rte(ushort8);
+float8 __ovld __cnfn convert_float8_rtz(ushort8);
+float8 __ovld __cnfn convert_float8_rtp(ushort8);
+float8 __ovld __cnfn convert_float8_rtn(ushort8);
+float8 __ovld __cnfn convert_float8(ushort8);
+float8 __ovld __cnfn convert_float8_rte(int8);
+float8 __ovld __cnfn convert_float8_rtz(int8);
+float8 __ovld __cnfn convert_float8_rtp(int8);
+float8 __ovld __cnfn convert_float8_rtn(int8);
+float8 __ovld __cnfn convert_float8(int8);
+float8 __ovld __cnfn convert_float8_rte(uint8);
+float8 __ovld __cnfn convert_float8_rtz(uint8);
+float8 __ovld __cnfn convert_float8_rtp(uint8);
+float8 __ovld __cnfn convert_float8_rtn(uint8);
+float8 __ovld __cnfn convert_float8(uint8);
+float8 __ovld __cnfn convert_float8_rte(long8);
+float8 __ovld __cnfn convert_float8_rtz(long8);
+float8 __ovld __cnfn convert_float8_rtp(long8);
+float8 __ovld __cnfn convert_float8_rtn(long8);
+float8 __ovld __cnfn convert_float8(long8);
+float8 __ovld __cnfn convert_float8_rte(ulong8);
+float8 __ovld __cnfn convert_float8_rtz(ulong8);
+float8 __ovld __cnfn convert_float8_rtp(ulong8);
+float8 __ovld __cnfn convert_float8_rtn(ulong8);
+float8 __ovld __cnfn convert_float8(ulong8);
+float8 __ovld __cnfn convert_float8_rte(float8);
+float8 __ovld __cnfn convert_float8_rtz(float8);
+float8 __ovld __cnfn convert_float8_rtp(float8);
+float8 __ovld __cnfn convert_float8_rtn(float8);
+float8 __ovld __cnfn convert_float8(float8);
+char16 __ovld __cnfn convert_char16_rte(char16);
+char16 __ovld __cnfn convert_char16_sat_rte(char16);
+char16 __ovld __cnfn convert_char16_rtz(char16);
+char16 __ovld __cnfn convert_char16_sat_rtz(char16);
+char16 __ovld __cnfn convert_char16_rtp(char16);
+char16 __ovld __cnfn convert_char16_sat_rtp(char16);
+char16 __ovld __cnfn convert_char16_rtn(char16);
+char16 __ovld __cnfn convert_char16_sat_rtn(char16);
+char16 __ovld __cnfn convert_char16(char16);
+char16 __ovld __cnfn convert_char16_sat(char16);
+char16 __ovld __cnfn convert_char16_rte(uchar16);
+char16 __ovld __cnfn convert_char16_sat_rte(uchar16);
+char16 __ovld __cnfn convert_char16_rtz(uchar16);
+char16 __ovld __cnfn convert_char16_sat_rtz(uchar16);
+char16 __ovld __cnfn convert_char16_rtp(uchar16);
+char16 __ovld __cnfn convert_char16_sat_rtp(uchar16);
+char16 __ovld __cnfn convert_char16_rtn(uchar16);
+char16 __ovld __cnfn convert_char16_sat_rtn(uchar16);
+char16 __ovld __cnfn convert_char16(uchar16);
+char16 __ovld __cnfn convert_char16_sat(uchar16);
+char16 __ovld __cnfn convert_char16_rte(short16);
+char16 __ovld __cnfn convert_char16_sat_rte(short16);
+char16 __ovld __cnfn convert_char16_rtz(short16);
+char16 __ovld __cnfn convert_char16_sat_rtz(short16);
+char16 __ovld __cnfn convert_char16_rtp(short16);
+char16 __ovld __cnfn convert_char16_sat_rtp(short16);
+char16 __ovld __cnfn convert_char16_rtn(short16);
+char16 __ovld __cnfn convert_char16_sat_rtn(short16);
+char16 __ovld __cnfn convert_char16(short16);
+char16 __ovld __cnfn convert_char16_sat(short16);
+char16 __ovld __cnfn convert_char16_rte(ushort16);
+char16 __ovld __cnfn convert_char16_sat_rte(ushort16);
+char16 __ovld __cnfn convert_char16_rtz(ushort16);
+char16 __ovld __cnfn convert_char16_sat_rtz(ushort16);
+char16 __ovld __cnfn convert_char16_rtp(ushort16);
+char16 __ovld __cnfn convert_char16_sat_rtp(ushort16);
+char16 __ovld __cnfn convert_char16_rtn(ushort16);
+char16 __ovld __cnfn convert_char16_sat_rtn(ushort16);
+char16 __ovld __cnfn convert_char16(ushort16);
+char16 __ovld __cnfn convert_char16_sat(ushort16);
+char16 __ovld __cnfn convert_char16_rte(int16);
+char16 __ovld __cnfn convert_char16_sat_rte(int16);
+char16 __ovld __cnfn convert_char16_rtz(int16);
+char16 __ovld __cnfn convert_char16_sat_rtz(int16);
+char16 __ovld __cnfn convert_char16_rtp(int16);
+char16 __ovld __cnfn convert_char16_sat_rtp(int16);
+char16 __ovld __cnfn convert_char16_rtn(int16);
+char16 __ovld __cnfn convert_char16_sat_rtn(int16);
+char16 __ovld __cnfn convert_char16(int16);
+char16 __ovld __cnfn convert_char16_sat(int16);
+char16 __ovld __cnfn convert_char16_rte(uint16);
+char16 __ovld __cnfn convert_char16_sat_rte(uint16);
+char16 __ovld __cnfn convert_char16_rtz(uint16);
+char16 __ovld __cnfn convert_char16_sat_rtz(uint16);
+char16 __ovld __cnfn convert_char16_rtp(uint16);
+char16 __ovld __cnfn convert_char16_sat_rtp(uint16);
+char16 __ovld __cnfn convert_char16_rtn(uint16);
+char16 __ovld __cnfn convert_char16_sat_rtn(uint16);
+char16 __ovld __cnfn convert_char16(uint16);
+char16 __ovld __cnfn convert_char16_sat(uint16);
+char16 __ovld __cnfn convert_char16_rte(long16);
+char16 __ovld __cnfn convert_char16_sat_rte(long16);
+char16 __ovld __cnfn convert_char16_rtz(long16);
+char16 __ovld __cnfn convert_char16_sat_rtz(long16);
+char16 __ovld __cnfn convert_char16_rtp(long16);
+char16 __ovld __cnfn convert_char16_sat_rtp(long16);
+char16 __ovld __cnfn convert_char16_rtn(long16);
+char16 __ovld __cnfn convert_char16_sat_rtn(long16);
+char16 __ovld __cnfn convert_char16(long16);
+char16 __ovld __cnfn convert_char16_sat(long16);
+char16 __ovld __cnfn convert_char16_rte(ulong16);
+char16 __ovld __cnfn convert_char16_sat_rte(ulong16);
+char16 __ovld __cnfn convert_char16_rtz(ulong16);
+char16 __ovld __cnfn convert_char16_sat_rtz(ulong16);
+char16 __ovld __cnfn convert_char16_rtp(ulong16);
+char16 __ovld __cnfn convert_char16_sat_rtp(ulong16);
+char16 __ovld __cnfn convert_char16_rtn(ulong16);
+char16 __ovld __cnfn convert_char16_sat_rtn(ulong16);
+char16 __ovld __cnfn convert_char16(ulong16);
+char16 __ovld __cnfn convert_char16_sat(ulong16);
+char16 __ovld __cnfn convert_char16_rte(float16);
+char16 __ovld __cnfn convert_char16_sat_rte(float16);
+char16 __ovld __cnfn convert_char16_rtz(float16);
+char16 __ovld __cnfn convert_char16_sat_rtz(float16);
+char16 __ovld __cnfn convert_char16_rtp(float16);
+char16 __ovld __cnfn convert_char16_sat_rtp(float16);
+char16 __ovld __cnfn convert_char16_rtn(float16);
+char16 __ovld __cnfn convert_char16_sat_rtn(float16);
+char16 __ovld __cnfn convert_char16(float16);
+char16 __ovld __cnfn convert_char16_sat(float16);
+uchar16 __ovld __cnfn convert_uchar16_rte(char16);
+uchar16 __ovld __cnfn convert_uchar16_sat_rte(char16);
+uchar16 __ovld __cnfn convert_uchar16_rtz(char16);
+uchar16 __ovld __cnfn convert_uchar16_sat_rtz(char16);
+uchar16 __ovld __cnfn convert_uchar16_rtp(char16);
+uchar16 __ovld __cnfn convert_uchar16_sat_rtp(char16);
+uchar16 __ovld __cnfn convert_uchar16_rtn(char16);
+uchar16 __ovld __cnfn convert_uchar16_sat_rtn(char16);
+uchar16 __ovld __cnfn convert_uchar16(char16);
+uchar16 __ovld __cnfn convert_uchar16_sat(char16);
+uchar16 __ovld __cnfn convert_uchar16_rte(uchar16);
+uchar16 __ovld __cnfn convert_uchar16_sat_rte(uchar16);
+uchar16 __ovld __cnfn convert_uchar16_rtz(uchar16);
+uchar16 __ovld __cnfn convert_uchar16_sat_rtz(uchar16);
+uchar16 __ovld __cnfn convert_uchar16_rtp(uchar16);
+uchar16 __ovld __cnfn convert_uchar16_sat_rtp(uchar16);
+uchar16 __ovld __cnfn convert_uchar16_rtn(uchar16);
+uchar16 __ovld __cnfn convert_uchar16_sat_rtn(uchar16);
+uchar16 __ovld __cnfn convert_uchar16(uchar16);
+uchar16 __ovld __cnfn convert_uchar16_sat(uchar16);
+uchar16 __ovld __cnfn convert_uchar16_rte(short16);
+uchar16 __ovld __cnfn convert_uchar16_sat_rte(short16);
+uchar16 __ovld __cnfn convert_uchar16_rtz(short16);
+uchar16 __ovld __cnfn convert_uchar16_sat_rtz(short16);
+uchar16 __ovld __cnfn convert_uchar16_rtp(short16);
+uchar16 __ovld __cnfn convert_uchar16_sat_rtp(short16);
+uchar16 __ovld __cnfn convert_uchar16_rtn(short16);
+uchar16 __ovld __cnfn convert_uchar16_sat_rtn(short16);
+uchar16 __ovld __cnfn convert_uchar16(short16);
+uchar16 __ovld __cnfn convert_uchar16_sat(short16);
+uchar16 __ovld __cnfn convert_uchar16_rte(ushort16);
+uchar16 __ovld __cnfn convert_uchar16_sat_rte(ushort16);
+uchar16 __ovld __cnfn convert_uchar16_rtz(ushort16);
+uchar16 __ovld __cnfn convert_uchar16_sat_rtz(ushort16);
+uchar16 __ovld __cnfn convert_uchar16_rtp(ushort16);
+uchar16 __ovld __cnfn convert_uchar16_sat_rtp(ushort16);
+uchar16 __ovld __cnfn convert_uchar16_rtn(ushort16);
+uchar16 __ovld __cnfn convert_uchar16_sat_rtn(ushort16);
+uchar16 __ovld __cnfn convert_uchar16(ushort16);
+uchar16 __ovld __cnfn convert_uchar16_sat(ushort16);
+uchar16 __ovld __cnfn convert_uchar16_rte(int16);
+uchar16 __ovld __cnfn convert_uchar16_sat_rte(int16);
+uchar16 __ovld __cnfn convert_uchar16_rtz(int16);
+uchar16 __ovld __cnfn convert_uchar16_sat_rtz(int16);
+uchar16 __ovld __cnfn convert_uchar16_rtp(int16);
+uchar16 __ovld __cnfn convert_uchar16_sat_rtp(int16);
+uchar16 __ovld __cnfn convert_uchar16_rtn(int16);
+uchar16 __ovld __cnfn convert_uchar16_sat_rtn(int16);
+uchar16 __ovld __cnfn convert_uchar16(int16);
+uchar16 __ovld __cnfn convert_uchar16_sat(int16);
+uchar16 __ovld __cnfn convert_uchar16_rte(uint16);
+uchar16 __ovld __cnfn convert_uchar16_sat_rte(uint16);
+uchar16 __ovld __cnfn convert_uchar16_rtz(uint16);
+uchar16 __ovld __cnfn convert_uchar16_sat_rtz(uint16);
+uchar16 __ovld __cnfn convert_uchar16_rtp(uint16);
+uchar16 __ovld __cnfn convert_uchar16_sat_rtp(uint16);
+uchar16 __ovld __cnfn convert_uchar16_rtn(uint16);
+uchar16 __ovld __cnfn convert_uchar16_sat_rtn(uint16);
+uchar16 __ovld __cnfn convert_uchar16(uint16);
+uchar16 __ovld __cnfn convert_uchar16_sat(uint16);
+uchar16 __ovld __cnfn convert_uchar16_rte(long16);
+uchar16 __ovld __cnfn convert_uchar16_sat_rte(long16);
+uchar16 __ovld __cnfn convert_uchar16_rtz(long16);
+uchar16 __ovld __cnfn convert_uchar16_sat_rtz(long16);
+uchar16 __ovld __cnfn convert_uchar16_rtp(long16);
+uchar16 __ovld __cnfn convert_uchar16_sat_rtp(long16);
+uchar16 __ovld __cnfn convert_uchar16_rtn(long16);
+uchar16 __ovld __cnfn convert_uchar16_sat_rtn(long16);
+uchar16 __ovld __cnfn convert_uchar16(long16);
+uchar16 __ovld __cnfn convert_uchar16_sat(long16);
+uchar16 __ovld __cnfn convert_uchar16_rte(ulong16);
+uchar16 __ovld __cnfn convert_uchar16_sat_rte(ulong16);
+uchar16 __ovld __cnfn convert_uchar16_rtz(ulong16);
+uchar16 __ovld __cnfn convert_uchar16_sat_rtz(ulong16);
+uchar16 __ovld __cnfn convert_uchar16_rtp(ulong16);
+uchar16 __ovld __cnfn convert_uchar16_sat_rtp(ulong16);
+uchar16 __ovld __cnfn convert_uchar16_rtn(ulong16);
+uchar16 __ovld __cnfn convert_uchar16_sat_rtn(ulong16);
+uchar16 __ovld __cnfn convert_uchar16(ulong16);
+uchar16 __ovld __cnfn convert_uchar16_sat(ulong16);
+uchar16 __ovld __cnfn convert_uchar16_rte(float16);
+uchar16 __ovld __cnfn convert_uchar16_sat_rte(float16);
+uchar16 __ovld __cnfn convert_uchar16_rtz(float16);
+uchar16 __ovld __cnfn convert_uchar16_sat_rtz(float16);
+uchar16 __ovld __cnfn convert_uchar16_rtp(float16);
+uchar16 __ovld __cnfn convert_uchar16_sat_rtp(float16);
+uchar16 __ovld __cnfn convert_uchar16_rtn(float16);
+uchar16 __ovld __cnfn convert_uchar16_sat_rtn(float16);
+uchar16 __ovld __cnfn convert_uchar16(float16);
+uchar16 __ovld __cnfn convert_uchar16_sat(float16);
+short16 __ovld __cnfn convert_short16_rte(char16);
+short16 __ovld __cnfn convert_short16_sat_rte(char16);
+short16 __ovld __cnfn convert_short16_rtz(char16);
+short16 __ovld __cnfn convert_short16_sat_rtz(char16);
+short16 __ovld __cnfn convert_short16_rtp(char16);
+short16 __ovld __cnfn convert_short16_sat_rtp(char16);
+short16 __ovld __cnfn convert_short16_rtn(char16);
+short16 __ovld __cnfn convert_short16_sat_rtn(char16);
+short16 __ovld __cnfn convert_short16(char16);
+short16 __ovld __cnfn convert_short16_sat(char16);
+short16 __ovld __cnfn convert_short16_rte(uchar16);
+short16 __ovld __cnfn convert_short16_sat_rte(uchar16);
+short16 __ovld __cnfn convert_short16_rtz(uchar16);
+short16 __ovld __cnfn convert_short16_sat_rtz(uchar16);
+short16 __ovld __cnfn convert_short16_rtp(uchar16);
+short16 __ovld __cnfn convert_short16_sat_rtp(uchar16);
+short16 __ovld __cnfn convert_short16_rtn(uchar16);
+short16 __ovld __cnfn convert_short16_sat_rtn(uchar16);
+short16 __ovld __cnfn convert_short16(uchar16);
+short16 __ovld __cnfn convert_short16_sat(uchar16);
+short16 __ovld __cnfn convert_short16_rte(short16);
+short16 __ovld __cnfn convert_short16_sat_rte(short16);
+short16 __ovld __cnfn convert_short16_rtz(short16);
+short16 __ovld __cnfn convert_short16_sat_rtz(short16);
+short16 __ovld __cnfn convert_short16_rtp(short16);
+short16 __ovld __cnfn convert_short16_sat_rtp(short16);
+short16 __ovld __cnfn convert_short16_rtn(short16);
+short16 __ovld __cnfn convert_short16_sat_rtn(short16);
+short16 __ovld __cnfn convert_short16(short16);
+short16 __ovld __cnfn convert_short16_sat(short16);
+short16 __ovld __cnfn convert_short16_rte(ushort16);
+short16 __ovld __cnfn convert_short16_sat_rte(ushort16);
+short16 __ovld __cnfn convert_short16_rtz(ushort16);
+short16 __ovld __cnfn convert_short16_sat_rtz(ushort16);
+short16 __ovld __cnfn convert_short16_rtp(ushort16);
+short16 __ovld __cnfn convert_short16_sat_rtp(ushort16);
+short16 __ovld __cnfn convert_short16_rtn(ushort16);
+short16 __ovld __cnfn convert_short16_sat_rtn(ushort16);
+short16 __ovld __cnfn convert_short16(ushort16);
+short16 __ovld __cnfn convert_short16_sat(ushort16);
+short16 __ovld __cnfn convert_short16_rte(int16);
+short16 __ovld __cnfn convert_short16_sat_rte(int16);
+short16 __ovld __cnfn convert_short16_rtz(int16);
+short16 __ovld __cnfn convert_short16_sat_rtz(int16);
+short16 __ovld __cnfn convert_short16_rtp(int16);
+short16 __ovld __cnfn convert_short16_sat_rtp(int16);
+short16 __ovld __cnfn convert_short16_rtn(int16);
+short16 __ovld __cnfn convert_short16_sat_rtn(int16);
+short16 __ovld __cnfn convert_short16(int16);
+short16 __ovld __cnfn convert_short16_sat(int16);
+short16 __ovld __cnfn convert_short16_rte(uint16);
+short16 __ovld __cnfn convert_short16_sat_rte(uint16);
+short16 __ovld __cnfn convert_short16_rtz(uint16);
+short16 __ovld __cnfn convert_short16_sat_rtz(uint16);
+short16 __ovld __cnfn convert_short16_rtp(uint16);
+short16 __ovld __cnfn convert_short16_sat_rtp(uint16);
+short16 __ovld __cnfn convert_short16_rtn(uint16);
+short16 __ovld __cnfn convert_short16_sat_rtn(uint16);
+short16 __ovld __cnfn convert_short16(uint16);
+short16 __ovld __cnfn convert_short16_sat(uint16);
+short16 __ovld __cnfn convert_short16_rte(long16);
+short16 __ovld __cnfn convert_short16_sat_rte(long16);
+short16 __ovld __cnfn convert_short16_rtz(long16);
+short16 __ovld __cnfn convert_short16_sat_rtz(long16);
+short16 __ovld __cnfn convert_short16_rtp(long16);
+short16 __ovld __cnfn convert_short16_sat_rtp(long16);
+short16 __ovld __cnfn convert_short16_rtn(long16);
+short16 __ovld __cnfn convert_short16_sat_rtn(long16);
+short16 __ovld __cnfn convert_short16(long16);
+short16 __ovld __cnfn convert_short16_sat(long16);
+short16 __ovld __cnfn convert_short16_rte(ulong16);
+short16 __ovld __cnfn convert_short16_sat_rte(ulong16);
+short16 __ovld __cnfn convert_short16_rtz(ulong16);
+short16 __ovld __cnfn convert_short16_sat_rtz(ulong16);
+short16 __ovld __cnfn convert_short16_rtp(ulong16);
+short16 __ovld __cnfn convert_short16_sat_rtp(ulong16);
+short16 __ovld __cnfn convert_short16_rtn(ulong16);
+short16 __ovld __cnfn convert_short16_sat_rtn(ulong16);
+short16 __ovld __cnfn convert_short16(ulong16);
+short16 __ovld __cnfn convert_short16_sat(ulong16);
+short16 __ovld __cnfn convert_short16_rte(float16);
+short16 __ovld __cnfn convert_short16_sat_rte(float16);
+short16 __ovld __cnfn convert_short16_rtz(float16);
+short16 __ovld __cnfn convert_short16_sat_rtz(float16);
+short16 __ovld __cnfn convert_short16_rtp(float16);
+short16 __ovld __cnfn convert_short16_sat_rtp(float16);
+short16 __ovld __cnfn convert_short16_rtn(float16);
+short16 __ovld __cnfn convert_short16_sat_rtn(float16);
+short16 __ovld __cnfn convert_short16(float16);
+short16 __ovld __cnfn convert_short16_sat(float16);
+ushort16 __ovld __cnfn convert_ushort16_rte(char16);
+ushort16 __ovld __cnfn convert_ushort16_sat_rte(char16);
+ushort16 __ovld __cnfn convert_ushort16_rtz(char16);
+ushort16 __ovld __cnfn convert_ushort16_sat_rtz(char16);
+ushort16 __ovld __cnfn convert_ushort16_rtp(char16);
+ushort16 __ovld __cnfn convert_ushort16_sat_rtp(char16);
+ushort16 __ovld __cnfn convert_ushort16_rtn(char16);
+ushort16 __ovld __cnfn convert_ushort16_sat_rtn(char16);
+ushort16 __ovld __cnfn convert_ushort16(char16);
+ushort16 __ovld __cnfn convert_ushort16_sat(char16);
+ushort16 __ovld __cnfn convert_ushort16_rte(uchar16);
+ushort16 __ovld __cnfn convert_ushort16_sat_rte(uchar16);
+ushort16 __ovld __cnfn convert_ushort16_rtz(uchar16);
+ushort16 __ovld __cnfn convert_ushort16_sat_rtz(uchar16);
+ushort16 __ovld __cnfn convert_ushort16_rtp(uchar16);
+ushort16 __ovld __cnfn convert_ushort16_sat_rtp(uchar16);
+ushort16 __ovld __cnfn convert_ushort16_rtn(uchar16);
+ushort16 __ovld __cnfn convert_ushort16_sat_rtn(uchar16);
+ushort16 __ovld __cnfn convert_ushort16(uchar16);
+ushort16 __ovld __cnfn convert_ushort16_sat(uchar16);
+ushort16 __ovld __cnfn convert_ushort16_rte(short16);
+ushort16 __ovld __cnfn convert_ushort16_sat_rte(short16);
+ushort16 __ovld __cnfn convert_ushort16_rtz(short16);
+ushort16 __ovld __cnfn convert_ushort16_sat_rtz(short16);
+ushort16 __ovld __cnfn convert_ushort16_rtp(short16);
+ushort16 __ovld __cnfn convert_ushort16_sat_rtp(short16);
+ushort16 __ovld __cnfn convert_ushort16_rtn(short16);
+ushort16 __ovld __cnfn convert_ushort16_sat_rtn(short16);
+ushort16 __ovld __cnfn convert_ushort16(short16);
+ushort16 __ovld __cnfn convert_ushort16_sat(short16);
+ushort16 __ovld __cnfn convert_ushort16_rte(ushort16);
+ushort16 __ovld __cnfn convert_ushort16_sat_rte(ushort16);
+ushort16 __ovld __cnfn convert_ushort16_rtz(ushort16);
+ushort16 __ovld __cnfn convert_ushort16_sat_rtz(ushort16);
+ushort16 __ovld __cnfn convert_ushort16_rtp(ushort16);
+ushort16 __ovld __cnfn convert_ushort16_sat_rtp(ushort16);
+ushort16 __ovld __cnfn convert_ushort16_rtn(ushort16);
+ushort16 __ovld __cnfn convert_ushort16_sat_rtn(ushort16);
+ushort16 __ovld __cnfn convert_ushort16(ushort16);
+ushort16 __ovld __cnfn convert_ushort16_sat(ushort16);
+ushort16 __ovld __cnfn convert_ushort16_rte(int16);
+ushort16 __ovld __cnfn convert_ushort16_sat_rte(int16);
+ushort16 __ovld __cnfn convert_ushort16_rtz(int16);
+ushort16 __ovld __cnfn convert_ushort16_sat_rtz(int16);
+ushort16 __ovld __cnfn convert_ushort16_rtp(int16);
+ushort16 __ovld __cnfn convert_ushort16_sat_rtp(int16);
+ushort16 __ovld __cnfn convert_ushort16_rtn(int16);
+ushort16 __ovld __cnfn convert_ushort16_sat_rtn(int16);
+ushort16 __ovld __cnfn convert_ushort16(int16);
+ushort16 __ovld __cnfn convert_ushort16_sat(int16);
+ushort16 __ovld __cnfn convert_ushort16_rte(uint16);
+ushort16 __ovld __cnfn convert_ushort16_sat_rte(uint16);
+ushort16 __ovld __cnfn convert_ushort16_rtz(uint16);
+ushort16 __ovld __cnfn convert_ushort16_sat_rtz(uint16);
+ushort16 __ovld __cnfn convert_ushort16_rtp(uint16);
+ushort16 __ovld __cnfn convert_ushort16_sat_rtp(uint16);
+ushort16 __ovld __cnfn convert_ushort16_rtn(uint16);
+ushort16 __ovld __cnfn convert_ushort16_sat_rtn(uint16);
+ushort16 __ovld __cnfn convert_ushort16(uint16);
+ushort16 __ovld __cnfn convert_ushort16_sat(uint16);
+ushort16 __ovld __cnfn convert_ushort16_rte(long16);
+ushort16 __ovld __cnfn convert_ushort16_sat_rte(long16);
+ushort16 __ovld __cnfn convert_ushort16_rtz(long16);
+ushort16 __ovld __cnfn convert_ushort16_sat_rtz(long16);
+ushort16 __ovld __cnfn convert_ushort16_rtp(long16);
+ushort16 __ovld __cnfn convert_ushort16_sat_rtp(long16);
+ushort16 __ovld __cnfn convert_ushort16_rtn(long16);
+ushort16 __ovld __cnfn convert_ushort16_sat_rtn(long16);
+ushort16 __ovld __cnfn convert_ushort16(long16);
+ushort16 __ovld __cnfn convert_ushort16_sat(long16);
+ushort16 __ovld __cnfn convert_ushort16_rte(ulong16);
+ushort16 __ovld __cnfn convert_ushort16_sat_rte(ulong16);
+ushort16 __ovld __cnfn convert_ushort16_rtz(ulong16);
+ushort16 __ovld __cnfn convert_ushort16_sat_rtz(ulong16);
+ushort16 __ovld __cnfn convert_ushort16_rtp(ulong16);
+ushort16 __ovld __cnfn convert_ushort16_sat_rtp(ulong16);
+ushort16 __ovld __cnfn convert_ushort16_rtn(ulong16);
+ushort16 __ovld __cnfn convert_ushort16_sat_rtn(ulong16);
+ushort16 __ovld __cnfn convert_ushort16(ulong16);
+ushort16 __ovld __cnfn convert_ushort16_sat(ulong16);
+ushort16 __ovld __cnfn convert_ushort16_rte(float16);
+ushort16 __ovld __cnfn convert_ushort16_sat_rte(float16);
+ushort16 __ovld __cnfn convert_ushort16_rtz(float16);
+ushort16 __ovld __cnfn convert_ushort16_sat_rtz(float16);
+ushort16 __ovld __cnfn convert_ushort16_rtp(float16);
+ushort16 __ovld __cnfn convert_ushort16_sat_rtp(float16);
+ushort16 __ovld __cnfn convert_ushort16_rtn(float16);
+ushort16 __ovld __cnfn convert_ushort16_sat_rtn(float16);
+ushort16 __ovld __cnfn convert_ushort16(float16);
+ushort16 __ovld __cnfn convert_ushort16_sat(float16);
+int16 __ovld __cnfn convert_int16_rte(char16);
+int16 __ovld __cnfn convert_int16_sat_rte(char16);
+int16 __ovld __cnfn convert_int16_rtz(char16);
+int16 __ovld __cnfn convert_int16_sat_rtz(char16);
+int16 __ovld __cnfn convert_int16_rtp(char16);
+int16 __ovld __cnfn convert_int16_sat_rtp(char16);
+int16 __ovld __cnfn convert_int16_rtn(char16);
+int16 __ovld __cnfn convert_int16_sat_rtn(char16);
+int16 __ovld __cnfn convert_int16(char16);
+int16 __ovld __cnfn convert_int16_sat(char16);
+int16 __ovld __cnfn convert_int16_rte(uchar16);
+int16 __ovld __cnfn convert_int16_sat_rte(uchar16);
+int16 __ovld __cnfn convert_int16_rtz(uchar16);
+int16 __ovld __cnfn convert_int16_sat_rtz(uchar16);
+int16 __ovld __cnfn convert_int16_rtp(uchar16);
+int16 __ovld __cnfn convert_int16_sat_rtp(uchar16);
+int16 __ovld __cnfn convert_int16_rtn(uchar16);
+int16 __ovld __cnfn convert_int16_sat_rtn(uchar16);
+int16 __ovld __cnfn convert_int16(uchar16);
+int16 __ovld __cnfn convert_int16_sat(uchar16);
+int16 __ovld __cnfn convert_int16_rte(short16);
+int16 __ovld __cnfn convert_int16_sat_rte(short16);
+int16 __ovld __cnfn convert_int16_rtz(short16);
+int16 __ovld __cnfn convert_int16_sat_rtz(short16);
+int16 __ovld __cnfn convert_int16_rtp(short16);
+int16 __ovld __cnfn convert_int16_sat_rtp(short16);
+int16 __ovld __cnfn convert_int16_rtn(short16);
+int16 __ovld __cnfn convert_int16_sat_rtn(short16);
+int16 __ovld __cnfn convert_int16(short16);
+int16 __ovld __cnfn convert_int16_sat(short16);
+int16 __ovld __cnfn convert_int16_rte(ushort16);
+int16 __ovld __cnfn convert_int16_sat_rte(ushort16);
+int16 __ovld __cnfn convert_int16_rtz(ushort16);
+int16 __ovld __cnfn convert_int16_sat_rtz(ushort16);
+int16 __ovld __cnfn convert_int16_rtp(ushort16);
+int16 __ovld __cnfn convert_int16_sat_rtp(ushort16);
+int16 __ovld __cnfn convert_int16_rtn(ushort16);
+int16 __ovld __cnfn convert_int16_sat_rtn(ushort16);
+int16 __ovld __cnfn convert_int16(ushort16);
+int16 __ovld __cnfn convert_int16_sat(ushort16);
+int16 __ovld __cnfn convert_int16_rte(int16);
+int16 __ovld __cnfn convert_int16_sat_rte(int16);
+int16 __ovld __cnfn convert_int16_rtz(int16);
+int16 __ovld __cnfn convert_int16_sat_rtz(int16);
+int16 __ovld __cnfn convert_int16_rtp(int16);
+int16 __ovld __cnfn convert_int16_sat_rtp(int16);
+int16 __ovld __cnfn convert_int16_rtn(int16);
+int16 __ovld __cnfn convert_int16_sat_rtn(int16);
+int16 __ovld __cnfn convert_int16(int16);
+int16 __ovld __cnfn convert_int16_sat(int16);
+int16 __ovld __cnfn convert_int16_rte(uint16);
+int16 __ovld __cnfn convert_int16_sat_rte(uint16);
+int16 __ovld __cnfn convert_int16_rtz(uint16);
+int16 __ovld __cnfn convert_int16_sat_rtz(uint16);
+int16 __ovld __cnfn convert_int16_rtp(uint16);
+int16 __ovld __cnfn convert_int16_sat_rtp(uint16);
+int16 __ovld __cnfn convert_int16_rtn(uint16);
+int16 __ovld __cnfn convert_int16_sat_rtn(uint16);
+int16 __ovld __cnfn convert_int16(uint16);
+int16 __ovld __cnfn convert_int16_sat(uint16);
+int16 __ovld __cnfn convert_int16_rte(long16);
+int16 __ovld __cnfn convert_int16_sat_rte(long16);
+int16 __ovld __cnfn convert_int16_rtz(long16);
+int16 __ovld __cnfn convert_int16_sat_rtz(long16);
+int16 __ovld __cnfn convert_int16_rtp(long16);
+int16 __ovld __cnfn convert_int16_sat_rtp(long16);
+int16 __ovld __cnfn convert_int16_rtn(long16);
+int16 __ovld __cnfn convert_int16_sat_rtn(long16);
+int16 __ovld __cnfn convert_int16(long16);
+int16 __ovld __cnfn convert_int16_sat(long16);
+int16 __ovld __cnfn convert_int16_rte(ulong16);
+int16 __ovld __cnfn convert_int16_sat_rte(ulong16);
+int16 __ovld __cnfn convert_int16_rtz(ulong16);
+int16 __ovld __cnfn convert_int16_sat_rtz(ulong16);
+int16 __ovld __cnfn convert_int16_rtp(ulong16);
+int16 __ovld __cnfn convert_int16_sat_rtp(ulong16);
+int16 __ovld __cnfn convert_int16_rtn(ulong16);
+int16 __ovld __cnfn convert_int16_sat_rtn(ulong16);
+int16 __ovld __cnfn convert_int16(ulong16);
+int16 __ovld __cnfn convert_int16_sat(ulong16);
+int16 __ovld __cnfn convert_int16_rte(float16);
+int16 __ovld __cnfn convert_int16_sat_rte(float16);
+int16 __ovld __cnfn convert_int16_rtz(float16);
+int16 __ovld __cnfn convert_int16_sat_rtz(float16);
+int16 __ovld __cnfn convert_int16_rtp(float16);
+int16 __ovld __cnfn convert_int16_sat_rtp(float16);
+int16 __ovld __cnfn convert_int16_rtn(float16);
+int16 __ovld __cnfn convert_int16_sat_rtn(float16);
+int16 __ovld __cnfn convert_int16(float16);
+int16 __ovld __cnfn convert_int16_sat(float16);
+uint16 __ovld __cnfn convert_uint16_rte(char16);
+uint16 __ovld __cnfn convert_uint16_sat_rte(char16);
+uint16 __ovld __cnfn convert_uint16_rtz(char16);
+uint16 __ovld __cnfn convert_uint16_sat_rtz(char16);
+uint16 __ovld __cnfn convert_uint16_rtp(char16);
+uint16 __ovld __cnfn convert_uint16_sat_rtp(char16);
+uint16 __ovld __cnfn convert_uint16_rtn(char16);
+uint16 __ovld __cnfn convert_uint16_sat_rtn(char16);
+uint16 __ovld __cnfn convert_uint16(char16);
+uint16 __ovld __cnfn convert_uint16_sat(char16);
+uint16 __ovld __cnfn convert_uint16_rte(uchar16);
+uint16 __ovld __cnfn convert_uint16_sat_rte(uchar16);
+uint16 __ovld __cnfn convert_uint16_rtz(uchar16);
+uint16 __ovld __cnfn convert_uint16_sat_rtz(uchar16);
+uint16 __ovld __cnfn convert_uint16_rtp(uchar16);
+uint16 __ovld __cnfn convert_uint16_sat_rtp(uchar16);
+uint16 __ovld __cnfn convert_uint16_rtn(uchar16);
+uint16 __ovld __cnfn convert_uint16_sat_rtn(uchar16);
+uint16 __ovld __cnfn convert_uint16(uchar16);
+uint16 __ovld __cnfn convert_uint16_sat(uchar16);
+uint16 __ovld __cnfn convert_uint16_rte(short16);
+uint16 __ovld __cnfn convert_uint16_sat_rte(short16);
+uint16 __ovld __cnfn convert_uint16_rtz(short16);
+uint16 __ovld __cnfn convert_uint16_sat_rtz(short16);
+uint16 __ovld __cnfn convert_uint16_rtp(short16);
+uint16 __ovld __cnfn convert_uint16_sat_rtp(short16);
+uint16 __ovld __cnfn convert_uint16_rtn(short16);
+uint16 __ovld __cnfn convert_uint16_sat_rtn(short16);
+uint16 __ovld __cnfn convert_uint16(short16);
+uint16 __ovld __cnfn convert_uint16_sat(short16);
+uint16 __ovld __cnfn convert_uint16_rte(ushort16);
+uint16 __ovld __cnfn convert_uint16_sat_rte(ushort16);
+uint16 __ovld __cnfn convert_uint16_rtz(ushort16);
+uint16 __ovld __cnfn convert_uint16_sat_rtz(ushort16);
+uint16 __ovld __cnfn convert_uint16_rtp(ushort16);
+uint16 __ovld __cnfn convert_uint16_sat_rtp(ushort16);
+uint16 __ovld __cnfn convert_uint16_rtn(ushort16);
+uint16 __ovld __cnfn convert_uint16_sat_rtn(ushort16);
+uint16 __ovld __cnfn convert_uint16(ushort16);
+uint16 __ovld __cnfn convert_uint16_sat(ushort16);
+uint16 __ovld __cnfn convert_uint16_rte(int16);
+uint16 __ovld __cnfn convert_uint16_sat_rte(int16);
+uint16 __ovld __cnfn convert_uint16_rtz(int16);
+uint16 __ovld __cnfn convert_uint16_sat_rtz(int16);
+uint16 __ovld __cnfn convert_uint16_rtp(int16);
+uint16 __ovld __cnfn convert_uint16_sat_rtp(int16);
+uint16 __ovld __cnfn convert_uint16_rtn(int16);
+uint16 __ovld __cnfn convert_uint16_sat_rtn(int16);
+uint16 __ovld __cnfn convert_uint16(int16);
+uint16 __ovld __cnfn convert_uint16_sat(int16);
+uint16 __ovld __cnfn convert_uint16_rte(uint16);
+uint16 __ovld __cnfn convert_uint16_sat_rte(uint16);
+uint16 __ovld __cnfn convert_uint16_rtz(uint16);
+uint16 __ovld __cnfn convert_uint16_sat_rtz(uint16);
+uint16 __ovld __cnfn convert_uint16_rtp(uint16);
+uint16 __ovld __cnfn convert_uint16_sat_rtp(uint16);
+uint16 __ovld __cnfn convert_uint16_rtn(uint16);
+uint16 __ovld __cnfn convert_uint16_sat_rtn(uint16);
+uint16 __ovld __cnfn convert_uint16(uint16);
+uint16 __ovld __cnfn convert_uint16_sat(uint16);
+uint16 __ovld __cnfn convert_uint16_rte(long16);
+uint16 __ovld __cnfn convert_uint16_sat_rte(long16);
+uint16 __ovld __cnfn convert_uint16_rtz(long16);
+uint16 __ovld __cnfn convert_uint16_sat_rtz(long16);
+uint16 __ovld __cnfn convert_uint16_rtp(long16);
+uint16 __ovld __cnfn convert_uint16_sat_rtp(long16);
+uint16 __ovld __cnfn convert_uint16_rtn(long16);
+uint16 __ovld __cnfn convert_uint16_sat_rtn(long16);
+uint16 __ovld __cnfn convert_uint16(long16);
+uint16 __ovld __cnfn convert_uint16_sat(long16);
+uint16 __ovld __cnfn convert_uint16_rte(ulong16);
+uint16 __ovld __cnfn convert_uint16_sat_rte(ulong16);
+uint16 __ovld __cnfn convert_uint16_rtz(ulong16);
+uint16 __ovld __cnfn convert_uint16_sat_rtz(ulong16);
+uint16 __ovld __cnfn convert_uint16_rtp(ulong16);
+uint16 __ovld __cnfn convert_uint16_sat_rtp(ulong16);
+uint16 __ovld __cnfn convert_uint16_rtn(ulong16);
+uint16 __ovld __cnfn convert_uint16_sat_rtn(ulong16);
+uint16 __ovld __cnfn convert_uint16(ulong16);
+uint16 __ovld __cnfn convert_uint16_sat(ulong16);
+uint16 __ovld __cnfn convert_uint16_rte(float16);
+uint16 __ovld __cnfn convert_uint16_sat_rte(float16);
+uint16 __ovld __cnfn convert_uint16_rtz(float16);
+uint16 __ovld __cnfn convert_uint16_sat_rtz(float16);
+uint16 __ovld __cnfn convert_uint16_rtp(float16);
+uint16 __ovld __cnfn convert_uint16_sat_rtp(float16);
+uint16 __ovld __cnfn convert_uint16_rtn(float16);
+uint16 __ovld __cnfn convert_uint16_sat_rtn(float16);
+uint16 __ovld __cnfn convert_uint16(float16);
+uint16 __ovld __cnfn convert_uint16_sat(float16);
+long16 __ovld __cnfn convert_long16_rte(char16);
+long16 __ovld __cnfn convert_long16_sat_rte(char16);
+long16 __ovld __cnfn convert_long16_rtz(char16);
+long16 __ovld __cnfn convert_long16_sat_rtz(char16);
+long16 __ovld __cnfn convert_long16_rtp(char16);
+long16 __ovld __cnfn convert_long16_sat_rtp(char16);
+long16 __ovld __cnfn convert_long16_rtn(char16);
+long16 __ovld __cnfn convert_long16_sat_rtn(char16);
+long16 __ovld __cnfn convert_long16(char16);
+long16 __ovld __cnfn convert_long16_sat(char16);
+long16 __ovld __cnfn convert_long16_rte(uchar16);
+long16 __ovld __cnfn convert_long16_sat_rte(uchar16);
+long16 __ovld __cnfn convert_long16_rtz(uchar16);
+long16 __ovld __cnfn convert_long16_sat_rtz(uchar16);
+long16 __ovld __cnfn convert_long16_rtp(uchar16);
+long16 __ovld __cnfn convert_long16_sat_rtp(uchar16);
+long16 __ovld __cnfn convert_long16_rtn(uchar16);
+long16 __ovld __cnfn convert_long16_sat_rtn(uchar16);
+long16 __ovld __cnfn convert_long16(uchar16);
+long16 __ovld __cnfn convert_long16_sat(uchar16);
+long16 __ovld __cnfn convert_long16_rte(short16);
+long16 __ovld __cnfn convert_long16_sat_rte(short16);
+long16 __ovld __cnfn convert_long16_rtz(short16);
+long16 __ovld __cnfn convert_long16_sat_rtz(short16);
+long16 __ovld __cnfn convert_long16_rtp(short16);
+long16 __ovld __cnfn convert_long16_sat_rtp(short16);
+long16 __ovld __cnfn convert_long16_rtn(short16);
+long16 __ovld __cnfn convert_long16_sat_rtn(short16);
+long16 __ovld __cnfn convert_long16(short16);
+long16 __ovld __cnfn convert_long16_sat(short16);
+long16 __ovld __cnfn convert_long16_rte(ushort16);
+long16 __ovld __cnfn convert_long16_sat_rte(ushort16);
+long16 __ovld __cnfn convert_long16_rtz(ushort16);
+long16 __ovld __cnfn convert_long16_sat_rtz(ushort16);
+long16 __ovld __cnfn convert_long16_rtp(ushort16);
+long16 __ovld __cnfn convert_long16_sat_rtp(ushort16);
+long16 __ovld __cnfn convert_long16_rtn(ushort16);
+long16 __ovld __cnfn convert_long16_sat_rtn(ushort16);
+long16 __ovld __cnfn convert_long16(ushort16);
+long16 __ovld __cnfn convert_long16_sat(ushort16);
+long16 __ovld __cnfn convert_long16_rte(int16);
+long16 __ovld __cnfn convert_long16_sat_rte(int16);
+long16 __ovld __cnfn convert_long16_rtz(int16);
+long16 __ovld __cnfn convert_long16_sat_rtz(int16);
+long16 __ovld __cnfn convert_long16_rtp(int16);
+long16 __ovld __cnfn convert_long16_sat_rtp(int16);
+long16 __ovld __cnfn convert_long16_rtn(int16);
+long16 __ovld __cnfn convert_long16_sat_rtn(int16);
+long16 __ovld __cnfn convert_long16(int16);
+long16 __ovld __cnfn convert_long16_sat(int16);
+long16 __ovld __cnfn convert_long16_rte(uint16);
+long16 __ovld __cnfn convert_long16_sat_rte(uint16);
+long16 __ovld __cnfn convert_long16_rtz(uint16);
+long16 __ovld __cnfn convert_long16_sat_rtz(uint16);
+long16 __ovld __cnfn convert_long16_rtp(uint16);
+long16 __ovld __cnfn convert_long16_sat_rtp(uint16);
+long16 __ovld __cnfn convert_long16_rtn(uint16);
+long16 __ovld __cnfn convert_long16_sat_rtn(uint16);
+long16 __ovld __cnfn convert_long16(uint16);
+long16 __ovld __cnfn convert_long16_sat(uint16);
+long16 __ovld __cnfn convert_long16_rte(long16);
+long16 __ovld __cnfn convert_long16_sat_rte(long16);
+long16 __ovld __cnfn convert_long16_rtz(long16);
+long16 __ovld __cnfn convert_long16_sat_rtz(long16);
+long16 __ovld __cnfn convert_long16_rtp(long16);
+long16 __ovld __cnfn convert_long16_sat_rtp(long16);
+long16 __ovld __cnfn convert_long16_rtn(long16);
+long16 __ovld __cnfn convert_long16_sat_rtn(long16);
+long16 __ovld __cnfn convert_long16(long16);
+long16 __ovld __cnfn convert_long16_sat(long16);
+long16 __ovld __cnfn convert_long16_rte(ulong16);
+long16 __ovld __cnfn convert_long16_sat_rte(ulong16);
+long16 __ovld __cnfn convert_long16_rtz(ulong16);
+long16 __ovld __cnfn convert_long16_sat_rtz(ulong16);
+long16 __ovld __cnfn convert_long16_rtp(ulong16);
+long16 __ovld __cnfn convert_long16_sat_rtp(ulong16);
+long16 __ovld __cnfn convert_long16_rtn(ulong16);
+long16 __ovld __cnfn convert_long16_sat_rtn(ulong16);
+long16 __ovld __cnfn convert_long16(ulong16);
+long16 __ovld __cnfn convert_long16_sat(ulong16);
+long16 __ovld __cnfn convert_long16_rte(float16);
+long16 __ovld __cnfn convert_long16_sat_rte(float16);
+long16 __ovld __cnfn convert_long16_rtz(float16);
+long16 __ovld __cnfn convert_long16_sat_rtz(float16);
+long16 __ovld __cnfn convert_long16_rtp(float16);
+long16 __ovld __cnfn convert_long16_sat_rtp(float16);
+long16 __ovld __cnfn convert_long16_rtn(float16);
+long16 __ovld __cnfn convert_long16_sat_rtn(float16);
+long16 __ovld __cnfn convert_long16(float16);
+long16 __ovld __cnfn convert_long16_sat(float16);
+ulong16 __ovld __cnfn convert_ulong16_rte(char16);
+ulong16 __ovld __cnfn convert_ulong16_sat_rte(char16);
+ulong16 __ovld __cnfn convert_ulong16_rtz(char16);
+ulong16 __ovld __cnfn convert_ulong16_sat_rtz(char16);
+ulong16 __ovld __cnfn convert_ulong16_rtp(char16);
+ulong16 __ovld __cnfn convert_ulong16_sat_rtp(char16);
+ulong16 __ovld __cnfn convert_ulong16_rtn(char16);
+ulong16 __ovld __cnfn convert_ulong16_sat_rtn(char16);
+ulong16 __ovld __cnfn convert_ulong16(char16);
+ulong16 __ovld __cnfn convert_ulong16_sat(char16);
+ulong16 __ovld __cnfn convert_ulong16_rte(uchar16);
+ulong16 __ovld __cnfn convert_ulong16_sat_rte(uchar16);
+ulong16 __ovld __cnfn convert_ulong16_rtz(uchar16);
+ulong16 __ovld __cnfn convert_ulong16_sat_rtz(uchar16);
+ulong16 __ovld __cnfn convert_ulong16_rtp(uchar16);
+ulong16 __ovld __cnfn convert_ulong16_sat_rtp(uchar16);
+ulong16 __ovld __cnfn convert_ulong16_rtn(uchar16);
+ulong16 __ovld __cnfn convert_ulong16_sat_rtn(uchar16);
+ulong16 __ovld __cnfn convert_ulong16(uchar16);
+ulong16 __ovld __cnfn convert_ulong16_sat(uchar16);
+ulong16 __ovld __cnfn convert_ulong16_rte(short16);
+ulong16 __ovld __cnfn convert_ulong16_sat_rte(short16);
+ulong16 __ovld __cnfn convert_ulong16_rtz(short16);
+ulong16 __ovld __cnfn convert_ulong16_sat_rtz(short16);
+ulong16 __ovld __cnfn convert_ulong16_rtp(short16);
+ulong16 __ovld __cnfn convert_ulong16_sat_rtp(short16);
+ulong16 __ovld __cnfn convert_ulong16_rtn(short16);
+ulong16 __ovld __cnfn convert_ulong16_sat_rtn(short16);
+ulong16 __ovld __cnfn convert_ulong16(short16);
+ulong16 __ovld __cnfn convert_ulong16_sat(short16);
+ulong16 __ovld __cnfn convert_ulong16_rte(ushort16);
+ulong16 __ovld __cnfn convert_ulong16_sat_rte(ushort16);
+ulong16 __ovld __cnfn convert_ulong16_rtz(ushort16);
+ulong16 __ovld __cnfn convert_ulong16_sat_rtz(ushort16);
+ulong16 __ovld __cnfn convert_ulong16_rtp(ushort16);
+ulong16 __ovld __cnfn convert_ulong16_sat_rtp(ushort16);
+ulong16 __ovld __cnfn convert_ulong16_rtn(ushort16);
+ulong16 __ovld __cnfn convert_ulong16_sat_rtn(ushort16);
+ulong16 __ovld __cnfn convert_ulong16(ushort16);
+ulong16 __ovld __cnfn convert_ulong16_sat(ushort16);
+ulong16 __ovld __cnfn convert_ulong16_rte(int16);
+ulong16 __ovld __cnfn convert_ulong16_sat_rte(int16);
+ulong16 __ovld __cnfn convert_ulong16_rtz(int16);
+ulong16 __ovld __cnfn convert_ulong16_sat_rtz(int16);
+ulong16 __ovld __cnfn convert_ulong16_rtp(int16);
+ulong16 __ovld __cnfn convert_ulong16_sat_rtp(int16);
+ulong16 __ovld __cnfn convert_ulong16_rtn(int16);
+ulong16 __ovld __cnfn convert_ulong16_sat_rtn(int16);
+ulong16 __ovld __cnfn convert_ulong16(int16);
+ulong16 __ovld __cnfn convert_ulong16_sat(int16);
+ulong16 __ovld __cnfn convert_ulong16_rte(uint16);
+ulong16 __ovld __cnfn convert_ulong16_sat_rte(uint16);
+ulong16 __ovld __cnfn convert_ulong16_rtz(uint16);
+ulong16 __ovld __cnfn convert_ulong16_sat_rtz(uint16);
+ulong16 __ovld __cnfn convert_ulong16_rtp(uint16);
+ulong16 __ovld __cnfn convert_ulong16_sat_rtp(uint16);
+ulong16 __ovld __cnfn convert_ulong16_rtn(uint16);
+ulong16 __ovld __cnfn convert_ulong16_sat_rtn(uint16);
+ulong16 __ovld __cnfn convert_ulong16(uint16);
+ulong16 __ovld __cnfn convert_ulong16_sat(uint16);
+ulong16 __ovld __cnfn convert_ulong16_rte(long16);
+ulong16 __ovld __cnfn convert_ulong16_sat_rte(long16);
+ulong16 __ovld __cnfn convert_ulong16_rtz(long16);
+ulong16 __ovld __cnfn convert_ulong16_sat_rtz(long16);
+ulong16 __ovld __cnfn convert_ulong16_rtp(long16);
+ulong16 __ovld __cnfn convert_ulong16_sat_rtp(long16);
+ulong16 __ovld __cnfn convert_ulong16_rtn(long16);
+ulong16 __ovld __cnfn convert_ulong16_sat_rtn(long16);
+ulong16 __ovld __cnfn convert_ulong16(long16);
+ulong16 __ovld __cnfn convert_ulong16_sat(long16);
+ulong16 __ovld __cnfn convert_ulong16_rte(ulong16);
+ulong16 __ovld __cnfn convert_ulong16_sat_rte(ulong16);
+ulong16 __ovld __cnfn convert_ulong16_rtz(ulong16);
+ulong16 __ovld __cnfn convert_ulong16_sat_rtz(ulong16);
+ulong16 __ovld __cnfn convert_ulong16_rtp(ulong16);
+ulong16 __ovld __cnfn convert_ulong16_sat_rtp(ulong16);
+ulong16 __ovld __cnfn convert_ulong16_rtn(ulong16);
+ulong16 __ovld __cnfn convert_ulong16_sat_rtn(ulong16);
+ulong16 __ovld __cnfn convert_ulong16(ulong16);
+ulong16 __ovld __cnfn convert_ulong16_sat(ulong16);
+ulong16 __ovld __cnfn convert_ulong16_rte(float16);
+ulong16 __ovld __cnfn convert_ulong16_sat_rte(float16);
+ulong16 __ovld __cnfn convert_ulong16_rtz(float16);
+ulong16 __ovld __cnfn convert_ulong16_sat_rtz(float16);
+ulong16 __ovld __cnfn convert_ulong16_rtp(float16);
+ulong16 __ovld __cnfn convert_ulong16_sat_rtp(float16);
+ulong16 __ovld __cnfn convert_ulong16_rtn(float16);
+ulong16 __ovld __cnfn convert_ulong16_sat_rtn(float16);
+ulong16 __ovld __cnfn convert_ulong16(float16);
+ulong16 __ovld __cnfn convert_ulong16_sat(float16);
+float16 __ovld __cnfn convert_float16_rte(char16);
+float16 __ovld __cnfn convert_float16_rtz(char16);
+float16 __ovld __cnfn convert_float16_rtp(char16);
+float16 __ovld __cnfn convert_float16_rtn(char16);
+float16 __ovld __cnfn convert_float16(char16);
+float16 __ovld __cnfn convert_float16_rte(uchar16);
+float16 __ovld __cnfn convert_float16_rtz(uchar16);
+float16 __ovld __cnfn convert_float16_rtp(uchar16);
+float16 __ovld __cnfn convert_float16_rtn(uchar16);
+float16 __ovld __cnfn convert_float16(uchar16);
+float16 __ovld __cnfn convert_float16_rte(short16);
+float16 __ovld __cnfn convert_float16_rtz(short16);
+float16 __ovld __cnfn convert_float16_rtp(short16);
+float16 __ovld __cnfn convert_float16_rtn(short16);
+float16 __ovld __cnfn convert_float16(short16);
+float16 __ovld __cnfn convert_float16_rte(ushort16);
+float16 __ovld __cnfn convert_float16_rtz(ushort16);
+float16 __ovld __cnfn convert_float16_rtp(ushort16);
+float16 __ovld __cnfn convert_float16_rtn(ushort16);
+float16 __ovld __cnfn convert_float16(ushort16);
+float16 __ovld __cnfn convert_float16_rte(int16);
+float16 __ovld __cnfn convert_float16_rtz(int16);
+float16 __ovld __cnfn convert_float16_rtp(int16);
+float16 __ovld __cnfn convert_float16_rtn(int16);
+float16 __ovld __cnfn convert_float16(int16);
+float16 __ovld __cnfn convert_float16_rte(uint16);
+float16 __ovld __cnfn convert_float16_rtz(uint16);
+float16 __ovld __cnfn convert_float16_rtp(uint16);
+float16 __ovld __cnfn convert_float16_rtn(uint16);
+float16 __ovld __cnfn convert_float16(uint16);
+float16 __ovld __cnfn convert_float16_rte(long16);
+float16 __ovld __cnfn convert_float16_rtz(long16);
+float16 __ovld __cnfn convert_float16_rtp(long16);
+float16 __ovld __cnfn convert_float16_rtn(long16);
+float16 __ovld __cnfn convert_float16(long16);
+float16 __ovld __cnfn convert_float16_rte(ulong16);
+float16 __ovld __cnfn convert_float16_rtz(ulong16);
+float16 __ovld __cnfn convert_float16_rtp(ulong16);
+float16 __ovld __cnfn convert_float16_rtn(ulong16);
+float16 __ovld __cnfn convert_float16(ulong16);
+float16 __ovld __cnfn convert_float16_rte(float16);
+float16 __ovld __cnfn convert_float16_rtz(float16);
+float16 __ovld __cnfn convert_float16_rtp(float16);
+float16 __ovld __cnfn convert_float16_rtn(float16);
+float16 __ovld __cnfn convert_float16(float16);
+
+// Conversions with double data type parameters or return value.
+
+#ifdef cl_khr_fp64
+char __ovld __cnfn convert_char(double);
+char __ovld __cnfn convert_char_rte(double);
+char __ovld __cnfn convert_char_rtn(double);
+char __ovld __cnfn convert_char_rtp(double);
+char __ovld __cnfn convert_char_rtz(double);
+char __ovld __cnfn convert_char_sat(double);
+char __ovld __cnfn convert_char_sat_rte(double);
+char __ovld __cnfn convert_char_sat_rtn(double);
+char __ovld __cnfn convert_char_sat_rtp(double);
+char __ovld __cnfn convert_char_sat_rtz(double);
+char2 __ovld __cnfn convert_char2(double2);
+char2 __ovld __cnfn convert_char2_rte(double2);
+char2 __ovld __cnfn convert_char2_rtn(double2);
+char2 __ovld __cnfn convert_char2_rtp(double2);
+char2 __ovld __cnfn convert_char2_rtz(double2);
+char2 __ovld __cnfn convert_char2_sat(double2);
+char2 __ovld __cnfn convert_char2_sat_rte(double2);
+char2 __ovld __cnfn convert_char2_sat_rtn(double2);
+char2 __ovld __cnfn convert_char2_sat_rtp(double2);
+char2 __ovld __cnfn convert_char2_sat_rtz(double2);
+char3 __ovld __cnfn convert_char3(double3);
+char3 __ovld __cnfn convert_char3_rte(double3);
+char3 __ovld __cnfn convert_char3_rtn(double3);
+char3 __ovld __cnfn convert_char3_rtp(double3);
+char3 __ovld __cnfn convert_char3_rtz(double3);
+char3 __ovld __cnfn convert_char3_sat(double3);
+char3 __ovld __cnfn convert_char3_sat_rte(double3);
+char3 __ovld __cnfn convert_char3_sat_rtn(double3);
+char3 __ovld __cnfn convert_char3_sat_rtp(double3);
+char3 __ovld __cnfn convert_char3_sat_rtz(double3);
+char4 __ovld __cnfn convert_char4(double4);
+char4 __ovld __cnfn convert_char4_rte(double4);
+char4 __ovld __cnfn convert_char4_rtn(double4);
+char4 __ovld __cnfn convert_char4_rtp(double4);
+char4 __ovld __cnfn convert_char4_rtz(double4);
+char4 __ovld __cnfn convert_char4_sat(double4);
+char4 __ovld __cnfn convert_char4_sat_rte(double4);
+char4 __ovld __cnfn convert_char4_sat_rtn(double4);
+char4 __ovld __cnfn convert_char4_sat_rtp(double4);
+char4 __ovld __cnfn convert_char4_sat_rtz(double4);
+char8 __ovld __cnfn convert_char8(double8);
+char8 __ovld __cnfn convert_char8_rte(double8);
+char8 __ovld __cnfn convert_char8_rtn(double8);
+char8 __ovld __cnfn convert_char8_rtp(double8);
+char8 __ovld __cnfn convert_char8_rtz(double8);
+char8 __ovld __cnfn convert_char8_sat(double8);
+char8 __ovld __cnfn convert_char8_sat_rte(double8);
+char8 __ovld __cnfn convert_char8_sat_rtn(double8);
+char8 __ovld __cnfn convert_char8_sat_rtp(double8);
+char8 __ovld __cnfn convert_char8_sat_rtz(double8);
+char16 __ovld __cnfn convert_char16(double16);
+char16 __ovld __cnfn convert_char16_rte(double16);
+char16 __ovld __cnfn convert_char16_rtn(double16);
+char16 __ovld __cnfn convert_char16_rtp(double16);
+char16 __ovld __cnfn convert_char16_rtz(double16);
+char16 __ovld __cnfn convert_char16_sat(double16);
+char16 __ovld __cnfn convert_char16_sat_rte(double16);
+char16 __ovld __cnfn convert_char16_sat_rtn(double16);
+char16 __ovld __cnfn convert_char16_sat_rtp(double16);
+char16 __ovld __cnfn convert_char16_sat_rtz(double16);
+
+uchar __ovld __cnfn convert_uchar(double);
+uchar __ovld __cnfn convert_uchar_rte(double);
+uchar __ovld __cnfn convert_uchar_rtn(double);
+uchar __ovld __cnfn convert_uchar_rtp(double);
+uchar __ovld __cnfn convert_uchar_rtz(double);
+uchar __ovld __cnfn convert_uchar_sat(double);
+uchar __ovld __cnfn convert_uchar_sat_rte(double);
+uchar __ovld __cnfn convert_uchar_sat_rtn(double);
+uchar __ovld __cnfn convert_uchar_sat_rtp(double);
+uchar __ovld __cnfn convert_uchar_sat_rtz(double);
+uchar2 __ovld __cnfn convert_uchar2(double2);
+uchar2 __ovld __cnfn convert_uchar2_rte(double2);
+uchar2 __ovld __cnfn convert_uchar2_rtn(double2);
+uchar2 __ovld __cnfn convert_uchar2_rtp(double2);
+uchar2 __ovld __cnfn convert_uchar2_rtz(double2);
+uchar2 __ovld __cnfn convert_uchar2_sat(double2);
+uchar2 __ovld __cnfn convert_uchar2_sat_rte(double2);
+uchar2 __ovld __cnfn convert_uchar2_sat_rtn(double2);
+uchar2 __ovld __cnfn convert_uchar2_sat_rtp(double2);
+uchar2 __ovld __cnfn convert_uchar2_sat_rtz(double2);
+uchar3 __ovld __cnfn convert_uchar3(double3);
+uchar3 __ovld __cnfn convert_uchar3_rte(double3);
+uchar3 __ovld __cnfn convert_uchar3_rtn(double3);
+uchar3 __ovld __cnfn convert_uchar3_rtp(double3);
+uchar3 __ovld __cnfn convert_uchar3_rtz(double3);
+uchar3 __ovld __cnfn convert_uchar3_sat(double3);
+uchar3 __ovld __cnfn convert_uchar3_sat_rte(double3);
+uchar3 __ovld __cnfn convert_uchar3_sat_rtn(double3);
+uchar3 __ovld __cnfn convert_uchar3_sat_rtp(double3);
+uchar3 __ovld __cnfn convert_uchar3_sat_rtz(double3);
+uchar4 __ovld __cnfn convert_uchar4(double4);
+uchar4 __ovld __cnfn convert_uchar4_rte(double4);
+uchar4 __ovld __cnfn convert_uchar4_rtn(double4);
+uchar4 __ovld __cnfn convert_uchar4_rtp(double4);
+uchar4 __ovld __cnfn convert_uchar4_rtz(double4);
+uchar4 __ovld __cnfn convert_uchar4_sat(double4);
+uchar4 __ovld __cnfn convert_uchar4_sat_rte(double4);
+uchar4 __ovld __cnfn convert_uchar4_sat_rtn(double4);
+uchar4 __ovld __cnfn convert_uchar4_sat_rtp(double4);
+uchar4 __ovld __cnfn convert_uchar4_sat_rtz(double4);
+uchar8 __ovld __cnfn convert_uchar8(double8);
+uchar8 __ovld __cnfn convert_uchar8_rte(double8);
+uchar8 __ovld __cnfn convert_uchar8_rtn(double8);
+uchar8 __ovld __cnfn convert_uchar8_rtp(double8);
+uchar8 __ovld __cnfn convert_uchar8_rtz(double8);
+uchar8 __ovld __cnfn convert_uchar8_sat(double8);
+uchar8 __ovld __cnfn convert_uchar8_sat_rte(double8);
+uchar8 __ovld __cnfn convert_uchar8_sat_rtn(double8);
+uchar8 __ovld __cnfn convert_uchar8_sat_rtp(double8);
+uchar8 __ovld __cnfn convert_uchar8_sat_rtz(double8);
+uchar16 __ovld __cnfn convert_uchar16(double16);
+uchar16 __ovld __cnfn convert_uchar16_rte(double16);
+uchar16 __ovld __cnfn convert_uchar16_rtn(double16);
+uchar16 __ovld __cnfn convert_uchar16_rtp(double16);
+uchar16 __ovld __cnfn convert_uchar16_rtz(double16);
+uchar16 __ovld __cnfn convert_uchar16_sat(double16);
+uchar16 __ovld __cnfn convert_uchar16_sat_rte(double16);
+uchar16 __ovld __cnfn convert_uchar16_sat_rtn(double16);
+uchar16 __ovld __cnfn convert_uchar16_sat_rtp(double16);
+uchar16 __ovld __cnfn convert_uchar16_sat_rtz(double16);
+
+short __ovld __cnfn convert_short(double);
+short __ovld __cnfn convert_short_rte(double);
+short __ovld __cnfn convert_short_rtn(double);
+short __ovld __cnfn convert_short_rtp(double);
+short __ovld __cnfn convert_short_rtz(double);
+short __ovld __cnfn convert_short_sat(double);
+short __ovld __cnfn convert_short_sat_rte(double);
+short __ovld __cnfn convert_short_sat_rtn(double);
+short __ovld __cnfn convert_short_sat_rtp(double);
+short __ovld __cnfn convert_short_sat_rtz(double);
+short2 __ovld __cnfn convert_short2(double2);
+short2 __ovld __cnfn convert_short2_rte(double2);
+short2 __ovld __cnfn convert_short2_rtn(double2);
+short2 __ovld __cnfn convert_short2_rtp(double2);
+short2 __ovld __cnfn convert_short2_rtz(double2);
+short2 __ovld __cnfn convert_short2_sat(double2);
+short2 __ovld __cnfn convert_short2_sat_rte(double2);
+short2 __ovld __cnfn convert_short2_sat_rtn(double2);
+short2 __ovld __cnfn convert_short2_sat_rtp(double2);
+short2 __ovld __cnfn convert_short2_sat_rtz(double2);
+short3 __ovld __cnfn convert_short3(double3);
+short3 __ovld __cnfn convert_short3_rte(double3);
+short3 __ovld __cnfn convert_short3_rtn(double3);
+short3 __ovld __cnfn convert_short3_rtp(double3);
+short3 __ovld __cnfn convert_short3_rtz(double3);
+short3 __ovld __cnfn convert_short3_sat(double3);
+short3 __ovld __cnfn convert_short3_sat_rte(double3);
+short3 __ovld __cnfn convert_short3_sat_rtn(double3);
+short3 __ovld __cnfn convert_short3_sat_rtp(double3);
+short3 __ovld __cnfn convert_short3_sat_rtz(double3);
+short4 __ovld __cnfn convert_short4(double4);
+short4 __ovld __cnfn convert_short4_rte(double4);
+short4 __ovld __cnfn convert_short4_rtn(double4);
+short4 __ovld __cnfn convert_short4_rtp(double4);
+short4 __ovld __cnfn convert_short4_rtz(double4);
+short4 __ovld __cnfn convert_short4_sat(double4);
+short4 __ovld __cnfn convert_short4_sat_rte(double4);
+short4 __ovld __cnfn convert_short4_sat_rtn(double4);
+short4 __ovld __cnfn convert_short4_sat_rtp(double4);
+short4 __ovld __cnfn convert_short4_sat_rtz(double4);
+short8 __ovld __cnfn convert_short8(double8);
+short8 __ovld __cnfn convert_short8_rte(double8);
+short8 __ovld __cnfn convert_short8_rtn(double8);
+short8 __ovld __cnfn convert_short8_rtp(double8);
+short8 __ovld __cnfn convert_short8_rtz(double8);
+short8 __ovld __cnfn convert_short8_sat(double8);
+short8 __ovld __cnfn convert_short8_sat_rte(double8);
+short8 __ovld __cnfn convert_short8_sat_rtn(double8);
+short8 __ovld __cnfn convert_short8_sat_rtp(double8);
+short8 __ovld __cnfn convert_short8_sat_rtz(double8);
+short16 __ovld __cnfn convert_short16(double16);
+short16 __ovld __cnfn convert_short16_rte(double16);
+short16 __ovld __cnfn convert_short16_rtn(double16);
+short16 __ovld __cnfn convert_short16_rtp(double16);
+short16 __ovld __cnfn convert_short16_rtz(double16);
+short16 __ovld __cnfn convert_short16_sat(double16);
+short16 __ovld __cnfn convert_short16_sat_rte(double16);
+short16 __ovld __cnfn convert_short16_sat_rtn(double16);
+short16 __ovld __cnfn convert_short16_sat_rtp(double16);
+short16 __ovld __cnfn convert_short16_sat_rtz(double16);
+
+ushort __ovld __cnfn convert_ushort(double);
+ushort __ovld __cnfn convert_ushort_rte(double);
+ushort __ovld __cnfn convert_ushort_rtn(double);
+ushort __ovld __cnfn convert_ushort_rtp(double);
+ushort __ovld __cnfn convert_ushort_rtz(double);
+ushort __ovld __cnfn convert_ushort_sat(double);
+ushort __ovld __cnfn convert_ushort_sat_rte(double);
+ushort __ovld __cnfn convert_ushort_sat_rtn(double);
+ushort __ovld __cnfn convert_ushort_sat_rtp(double);
+ushort __ovld __cnfn convert_ushort_sat_rtz(double);
+ushort2 __ovld __cnfn convert_ushort2(double2);
+ushort2 __ovld __cnfn convert_ushort2_rte(double2);
+ushort2 __ovld __cnfn convert_ushort2_rtn(double2);
+ushort2 __ovld __cnfn convert_ushort2_rtp(double2);
+ushort2 __ovld __cnfn convert_ushort2_rtz(double2);
+ushort2 __ovld __cnfn convert_ushort2_sat(double2);
+ushort2 __ovld __cnfn convert_ushort2_sat_rte(double2);
+ushort2 __ovld __cnfn convert_ushort2_sat_rtn(double2);
+ushort2 __ovld __cnfn convert_ushort2_sat_rtp(double2);
+ushort2 __ovld __cnfn convert_ushort2_sat_rtz(double2);
+ushort3 __ovld __cnfn convert_ushort3(double3);
+ushort3 __ovld __cnfn convert_ushort3_rte(double3);
+ushort3 __ovld __cnfn convert_ushort3_rtn(double3);
+ushort3 __ovld __cnfn convert_ushort3_rtp(double3);
+ushort3 __ovld __cnfn convert_ushort3_rtz(double3);
+ushort3 __ovld __cnfn convert_ushort3_sat(double3);
+ushort3 __ovld __cnfn convert_ushort3_sat_rte(double3);
+ushort3 __ovld __cnfn convert_ushort3_sat_rtn(double3);
+ushort3 __ovld __cnfn convert_ushort3_sat_rtp(double3);
+ushort3 __ovld __cnfn convert_ushort3_sat_rtz(double3);
+ushort4 __ovld __cnfn convert_ushort4(double4);
+ushort4 __ovld __cnfn convert_ushort4_rte(double4);
+ushort4 __ovld __cnfn convert_ushort4_rtn(double4);
+ushort4 __ovld __cnfn convert_ushort4_rtp(double4);
+ushort4 __ovld __cnfn convert_ushort4_rtz(double4);
+ushort4 __ovld __cnfn convert_ushort4_sat(double4);
+ushort4 __ovld __cnfn convert_ushort4_sat_rte(double4);
+ushort4 __ovld __cnfn convert_ushort4_sat_rtn(double4);
+ushort4 __ovld __cnfn convert_ushort4_sat_rtp(double4);
+ushort4 __ovld __cnfn convert_ushort4_sat_rtz(double4);
+ushort8 __ovld __cnfn convert_ushort8(double8);
+ushort8 __ovld __cnfn convert_ushort8_rte(double8);
+ushort8 __ovld __cnfn convert_ushort8_rtn(double8);
+ushort8 __ovld __cnfn convert_ushort8_rtp(double8);
+ushort8 __ovld __cnfn convert_ushort8_rtz(double8);
+ushort8 __ovld __cnfn convert_ushort8_sat(double8);
+ushort8 __ovld __cnfn convert_ushort8_sat_rte(double8);
+ushort8 __ovld __cnfn convert_ushort8_sat_rtn(double8);
+ushort8 __ovld __cnfn convert_ushort8_sat_rtp(double8);
+ushort8 __ovld __cnfn convert_ushort8_sat_rtz(double8);
+ushort16 __ovld __cnfn convert_ushort16(double16);
+ushort16 __ovld __cnfn convert_ushort16_rte(double16);
+ushort16 __ovld __cnfn convert_ushort16_rtn(double16);
+ushort16 __ovld __cnfn convert_ushort16_rtp(double16);
+ushort16 __ovld __cnfn convert_ushort16_rtz(double16);
+ushort16 __ovld __cnfn convert_ushort16_sat(double16);
+ushort16 __ovld __cnfn convert_ushort16_sat_rte(double16);
+ushort16 __ovld __cnfn convert_ushort16_sat_rtn(double16);
+ushort16 __ovld __cnfn convert_ushort16_sat_rtp(double16);
+ushort16 __ovld __cnfn convert_ushort16_sat_rtz(double16);
+
+int __ovld __cnfn convert_int(double);
+int __ovld __cnfn convert_int_rte(double);
+int __ovld __cnfn convert_int_rtn(double);
+int __ovld __cnfn convert_int_rtp(double);
+int __ovld __cnfn convert_int_rtz(double);
+int __ovld __cnfn convert_int_sat(double);
+int __ovld __cnfn convert_int_sat_rte(double);
+int __ovld __cnfn convert_int_sat_rtn(double);
+int __ovld __cnfn convert_int_sat_rtp(double);
+int __ovld __cnfn convert_int_sat_rtz(double);
+int2 __ovld __cnfn convert_int2(double2);
+int2 __ovld __cnfn convert_int2_rte(double2);
+int2 __ovld __cnfn convert_int2_rtn(double2);
+int2 __ovld __cnfn convert_int2_rtp(double2);
+int2 __ovld __cnfn convert_int2_rtz(double2);
+int2 __ovld __cnfn convert_int2_sat(double2);
+int2 __ovld __cnfn convert_int2_sat_rte(double2);
+int2 __ovld __cnfn convert_int2_sat_rtn(double2);
+int2 __ovld __cnfn convert_int2_sat_rtp(double2);
+int2 __ovld __cnfn convert_int2_sat_rtz(double2);
+int3 __ovld __cnfn convert_int3(double3);
+int3 __ovld __cnfn convert_int3_rte(double3);
+int3 __ovld __cnfn convert_int3_rtn(double3);
+int3 __ovld __cnfn convert_int3_rtp(double3);
+int3 __ovld __cnfn convert_int3_rtz(double3);
+int3 __ovld __cnfn convert_int3_sat(double3);
+int3 __ovld __cnfn convert_int3_sat_rte(double3);
+int3 __ovld __cnfn convert_int3_sat_rtn(double3);
+int3 __ovld __cnfn convert_int3_sat_rtp(double3);
+int3 __ovld __cnfn convert_int3_sat_rtz(double3);
+int4 __ovld __cnfn convert_int4(double4);
+int4 __ovld __cnfn convert_int4_rte(double4);
+int4 __ovld __cnfn convert_int4_rtn(double4);
+int4 __ovld __cnfn convert_int4_rtp(double4);
+int4 __ovld __cnfn convert_int4_rtz(double4);
+int4 __ovld __cnfn convert_int4_sat(double4);
+int4 __ovld __cnfn convert_int4_sat_rte(double4);
+int4 __ovld __cnfn convert_int4_sat_rtn(double4);
+int4 __ovld __cnfn convert_int4_sat_rtp(double4);
+int4 __ovld __cnfn convert_int4_sat_rtz(double4);
+int8 __ovld __cnfn convert_int8(double8);
+int8 __ovld __cnfn convert_int8_rte(double8);
+int8 __ovld __cnfn convert_int8_rtn(double8);
+int8 __ovld __cnfn convert_int8_rtp(double8);
+int8 __ovld __cnfn convert_int8_rtz(double8);
+int8 __ovld __cnfn convert_int8_sat(double8);
+int8 __ovld __cnfn convert_int8_sat_rte(double8);
+int8 __ovld __cnfn convert_int8_sat_rtn(double8);
+int8 __ovld __cnfn convert_int8_sat_rtp(double8);
+int8 __ovld __cnfn convert_int8_sat_rtz(double8);
+int16 __ovld __cnfn convert_int16(double16);
+int16 __ovld __cnfn convert_int16_rte(double16);
+int16 __ovld __cnfn convert_int16_rtn(double16);
+int16 __ovld __cnfn convert_int16_rtp(double16);
+int16 __ovld __cnfn convert_int16_rtz(double16);
+int16 __ovld __cnfn convert_int16_sat(double16);
+int16 __ovld __cnfn convert_int16_sat_rte(double16);
+int16 __ovld __cnfn convert_int16_sat_rtn(double16);
+int16 __ovld __cnfn convert_int16_sat_rtp(double16);
+int16 __ovld __cnfn convert_int16_sat_rtz(double16);
+
+uint __ovld __cnfn convert_uint(double);
+uint __ovld __cnfn convert_uint_rte(double);
+uint __ovld __cnfn convert_uint_rtn(double);
+uint __ovld __cnfn convert_uint_rtp(double);
+uint __ovld __cnfn convert_uint_rtz(double);
+uint __ovld __cnfn convert_uint_sat(double);
+uint __ovld __cnfn convert_uint_sat_rte(double);
+uint __ovld __cnfn convert_uint_sat_rtn(double);
+uint __ovld __cnfn convert_uint_sat_rtp(double);
+uint __ovld __cnfn convert_uint_sat_rtz(double);
+uint2 __ovld __cnfn convert_uint2(double2);
+uint2 __ovld __cnfn convert_uint2_rte(double2);
+uint2 __ovld __cnfn convert_uint2_rtn(double2);
+uint2 __ovld __cnfn convert_uint2_rtp(double2);
+uint2 __ovld __cnfn convert_uint2_rtz(double2);
+uint2 __ovld __cnfn convert_uint2_sat(double2);
+uint2 __ovld __cnfn convert_uint2_sat_rte(double2);
+uint2 __ovld __cnfn convert_uint2_sat_rtn(double2);
+uint2 __ovld __cnfn convert_uint2_sat_rtp(double2);
+uint2 __ovld __cnfn convert_uint2_sat_rtz(double2);
+uint3 __ovld __cnfn convert_uint3(double3);
+uint3 __ovld __cnfn convert_uint3_rte(double3);
+uint3 __ovld __cnfn convert_uint3_rtn(double3);
+uint3 __ovld __cnfn convert_uint3_rtp(double3);
+uint3 __ovld __cnfn convert_uint3_rtz(double3);
+uint3 __ovld __cnfn convert_uint3_sat(double3);
+uint3 __ovld __cnfn convert_uint3_sat_rte(double3);
+uint3 __ovld __cnfn convert_uint3_sat_rtn(double3);
+uint3 __ovld __cnfn convert_uint3_sat_rtp(double3);
+uint3 __ovld __cnfn convert_uint3_sat_rtz(double3);
+uint4 __ovld __cnfn convert_uint4(double4);
+uint4 __ovld __cnfn convert_uint4_rte(double4);
+uint4 __ovld __cnfn convert_uint4_rtn(double4);
+uint4 __ovld __cnfn convert_uint4_rtp(double4);
+uint4 __ovld __cnfn convert_uint4_rtz(double4);
+uint4 __ovld __cnfn convert_uint4_sat(double4);
+uint4 __ovld __cnfn convert_uint4_sat_rte(double4);
+uint4 __ovld __cnfn convert_uint4_sat_rtn(double4);
+uint4 __ovld __cnfn convert_uint4_sat_rtp(double4);
+uint4 __ovld __cnfn convert_uint4_sat_rtz(double4);
+uint8 __ovld __cnfn convert_uint8(double8);
+uint8 __ovld __cnfn convert_uint8_rte(double8);
+uint8 __ovld __cnfn convert_uint8_rtn(double8);
+uint8 __ovld __cnfn convert_uint8_rtp(double8);
+uint8 __ovld __cnfn convert_uint8_rtz(double8);
+uint8 __ovld __cnfn convert_uint8_sat(double8);
+uint8 __ovld __cnfn convert_uint8_sat_rte(double8);
+uint8 __ovld __cnfn convert_uint8_sat_rtn(double8);
+uint8 __ovld __cnfn convert_uint8_sat_rtp(double8);
+uint8 __ovld __cnfn convert_uint8_sat_rtz(double8);
+uint16 __ovld __cnfn convert_uint16(double16);
+uint16 __ovld __cnfn convert_uint16_rte(double16);
+uint16 __ovld __cnfn convert_uint16_rtn(double16);
+uint16 __ovld __cnfn convert_uint16_rtp(double16);
+uint16 __ovld __cnfn convert_uint16_rtz(double16);
+uint16 __ovld __cnfn convert_uint16_sat(double16);
+uint16 __ovld __cnfn convert_uint16_sat_rte(double16);
+uint16 __ovld __cnfn convert_uint16_sat_rtn(double16);
+uint16 __ovld __cnfn convert_uint16_sat_rtp(double16);
+uint16 __ovld __cnfn convert_uint16_sat_rtz(double16);
+
+long __ovld __cnfn convert_long(double);
+long __ovld __cnfn convert_long_rte(double);
+long __ovld __cnfn convert_long_rtn(double);
+long __ovld __cnfn convert_long_rtp(double);
+long __ovld __cnfn convert_long_rtz(double);
+long __ovld __cnfn convert_long_sat(double);
+long __ovld __cnfn convert_long_sat_rte(double);
+long __ovld __cnfn convert_long_sat_rtn(double);
+long __ovld __cnfn convert_long_sat_rtp(double);
+long __ovld __cnfn convert_long_sat_rtz(double);
+long2 __ovld __cnfn convert_long2(double2);
+long2 __ovld __cnfn convert_long2_rte(double2);
+long2 __ovld __cnfn convert_long2_rtn(double2);
+long2 __ovld __cnfn convert_long2_rtp(double2);
+long2 __ovld __cnfn convert_long2_rtz(double2);
+long2 __ovld __cnfn convert_long2_sat(double2);
+long2 __ovld __cnfn convert_long2_sat_rte(double2);
+long2 __ovld __cnfn convert_long2_sat_rtn(double2);
+long2 __ovld __cnfn convert_long2_sat_rtp(double2);
+long2 __ovld __cnfn convert_long2_sat_rtz(double2);
+long3 __ovld __cnfn convert_long3(double3);
+long3 __ovld __cnfn convert_long3_rte(double3);
+long3 __ovld __cnfn convert_long3_rtn(double3);
+long3 __ovld __cnfn convert_long3_rtp(double3);
+long3 __ovld __cnfn convert_long3_rtz(double3);
+long3 __ovld __cnfn convert_long3_sat(double3);
+long3 __ovld __cnfn convert_long3_sat_rte(double3);
+long3 __ovld __cnfn convert_long3_sat_rtn(double3);
+long3 __ovld __cnfn convert_long3_sat_rtp(double3);
+long3 __ovld __cnfn convert_long3_sat_rtz(double3);
+long4 __ovld __cnfn convert_long4(double4);
+long4 __ovld __cnfn convert_long4_rte(double4);
+long4 __ovld __cnfn convert_long4_rtn(double4);
+long4 __ovld __cnfn convert_long4_rtp(double4);
+long4 __ovld __cnfn convert_long4_rtz(double4);
+long4 __ovld __cnfn convert_long4_sat(double4);
+long4 __ovld __cnfn convert_long4_sat_rte(double4);
+long4 __ovld __cnfn convert_long4_sat_rtn(double4);
+long4 __ovld __cnfn convert_long4_sat_rtp(double4);
+long4 __ovld __cnfn convert_long4_sat_rtz(double4);
+long8 __ovld __cnfn convert_long8(double8);
+long8 __ovld __cnfn convert_long8_rte(double8);
+long8 __ovld __cnfn convert_long8_rtn(double8);
+long8 __ovld __cnfn convert_long8_rtp(double8);
+long8 __ovld __cnfn convert_long8_rtz(double8);
+long8 __ovld __cnfn convert_long8_sat(double8);
+long8 __ovld __cnfn convert_long8_sat_rte(double8);
+long8 __ovld __cnfn convert_long8_sat_rtn(double8);
+long8 __ovld __cnfn convert_long8_sat_rtp(double8);
+long8 __ovld __cnfn convert_long8_sat_rtz(double8);
+long16 __ovld __cnfn convert_long16(double16);
+long16 __ovld __cnfn convert_long16_rte(double16);
+long16 __ovld __cnfn convert_long16_rtn(double16);
+long16 __ovld __cnfn convert_long16_rtp(double16);
+long16 __ovld __cnfn convert_long16_rtz(double16);
+long16 __ovld __cnfn convert_long16_sat(double16);
+long16 __ovld __cnfn convert_long16_sat_rte(double16);
+long16 __ovld __cnfn convert_long16_sat_rtn(double16);
+long16 __ovld __cnfn convert_long16_sat_rtp(double16);
+long16 __ovld __cnfn convert_long16_sat_rtz(double16);
+
+ulong __ovld __cnfn convert_ulong(double);
+ulong __ovld __cnfn convert_ulong_rte(double);
+ulong __ovld __cnfn convert_ulong_rtn(double);
+ulong __ovld __cnfn convert_ulong_rtp(double);
+ulong __ovld __cnfn convert_ulong_rtz(double);
+ulong __ovld __cnfn convert_ulong_sat(double);
+ulong __ovld __cnfn convert_ulong_sat_rte(double);
+ulong __ovld __cnfn convert_ulong_sat_rtn(double);
+ulong __ovld __cnfn convert_ulong_sat_rtp(double);
+ulong __ovld __cnfn convert_ulong_sat_rtz(double);
+ulong2 __ovld __cnfn convert_ulong2(double2);
+ulong2 __ovld __cnfn convert_ulong2_rte(double2);
+ulong2 __ovld __cnfn convert_ulong2_rtn(double2);
+ulong2 __ovld __cnfn convert_ulong2_rtp(double2);
+ulong2 __ovld __cnfn convert_ulong2_rtz(double2);
+ulong2 __ovld __cnfn convert_ulong2_sat(double2);
+ulong2 __ovld __cnfn convert_ulong2_sat_rte(double2);
+ulong2 __ovld __cnfn convert_ulong2_sat_rtn(double2);
+ulong2 __ovld __cnfn convert_ulong2_sat_rtp(double2);
+ulong2 __ovld __cnfn convert_ulong2_sat_rtz(double2);
+ulong3 __ovld __cnfn convert_ulong3(double3);
+ulong3 __ovld __cnfn convert_ulong3_rte(double3);
+ulong3 __ovld __cnfn convert_ulong3_rtn(double3);
+ulong3 __ovld __cnfn convert_ulong3_rtp(double3);
+ulong3 __ovld __cnfn convert_ulong3_rtz(double3);
+ulong3 __ovld __cnfn convert_ulong3_sat(double3);
+ulong3 __ovld __cnfn convert_ulong3_sat_rte(double3);
+ulong3 __ovld __cnfn convert_ulong3_sat_rtn(double3);
+ulong3 __ovld __cnfn convert_ulong3_sat_rtp(double3);
+ulong3 __ovld __cnfn convert_ulong3_sat_rtz(double3);
+ulong4 __ovld __cnfn convert_ulong4(double4);
+ulong4 __ovld __cnfn convert_ulong4_rte(double4);
+ulong4 __ovld __cnfn convert_ulong4_rtn(double4);
+ulong4 __ovld __cnfn convert_ulong4_rtp(double4);
+ulong4 __ovld __cnfn convert_ulong4_rtz(double4);
+ulong4 __ovld __cnfn convert_ulong4_sat(double4);
+ulong4 __ovld __cnfn convert_ulong4_sat_rte(double4);
+ulong4 __ovld __cnfn convert_ulong4_sat_rtn(double4);
+ulong4 __ovld __cnfn convert_ulong4_sat_rtp(double4);
+ulong4 __ovld __cnfn convert_ulong4_sat_rtz(double4);
+ulong8 __ovld __cnfn convert_ulong8(double8);
+ulong8 __ovld __cnfn convert_ulong8_rte(double8);
+ulong8 __ovld __cnfn convert_ulong8_rtn(double8);
+ulong8 __ovld __cnfn convert_ulong8_rtp(double8);
+ulong8 __ovld __cnfn convert_ulong8_rtz(double8);
+ulong8 __ovld __cnfn convert_ulong8_sat(double8);
+ulong8 __ovld __cnfn convert_ulong8_sat_rte(double8);
+ulong8 __ovld __cnfn convert_ulong8_sat_rtn(double8);
+ulong8 __ovld __cnfn convert_ulong8_sat_rtp(double8);
+ulong8 __ovld __cnfn convert_ulong8_sat_rtz(double8);
+ulong16 __ovld __cnfn convert_ulong16(double16);
+ulong16 __ovld __cnfn convert_ulong16_rte(double16);
+ulong16 __ovld __cnfn convert_ulong16_rtn(double16);
+ulong16 __ovld __cnfn convert_ulong16_rtp(double16);
+ulong16 __ovld __cnfn convert_ulong16_rtz(double16);
+ulong16 __ovld __cnfn convert_ulong16_sat(double16);
+ulong16 __ovld __cnfn convert_ulong16_sat_rte(double16);
+ulong16 __ovld __cnfn convert_ulong16_sat_rtn(double16);
+ulong16 __ovld __cnfn convert_ulong16_sat_rtp(double16);
+ulong16 __ovld __cnfn convert_ulong16_sat_rtz(double16);
+
+float __ovld __cnfn convert_float(double);
+float __ovld __cnfn convert_float_rte(double);
+float __ovld __cnfn convert_float_rtn(double);
+float __ovld __cnfn convert_float_rtp(double);
+float __ovld __cnfn convert_float_rtz(double);
+float2 __ovld __cnfn convert_float2(double2);
+float2 __ovld __cnfn convert_float2_rte(double2);
+float2 __ovld __cnfn convert_float2_rtn(double2);
+float2 __ovld __cnfn convert_float2_rtp(double2);
+float2 __ovld __cnfn convert_float2_rtz(double2);
+float3 __ovld __cnfn convert_float3(double3);
+float3 __ovld __cnfn convert_float3_rte(double3);
+float3 __ovld __cnfn convert_float3_rtn(double3);
+float3 __ovld __cnfn convert_float3_rtp(double3);
+float3 __ovld __cnfn convert_float3_rtz(double3);
+float4 __ovld __cnfn convert_float4(double4);
+float4 __ovld __cnfn convert_float4_rte(double4);
+float4 __ovld __cnfn convert_float4_rtn(double4);
+float4 __ovld __cnfn convert_float4_rtp(double4);
+float4 __ovld __cnfn convert_float4_rtz(double4);
+float8 __ovld __cnfn convert_float8(double8);
+float8 __ovld __cnfn convert_float8_rte(double8);
+float8 __ovld __cnfn convert_float8_rtn(double8);
+float8 __ovld __cnfn convert_float8_rtp(double8);
+float8 __ovld __cnfn convert_float8_rtz(double8);
+float16 __ovld __cnfn convert_float16(double16);
+float16 __ovld __cnfn convert_float16_rte(double16);
+float16 __ovld __cnfn convert_float16_rtn(double16);
+float16 __ovld __cnfn convert_float16_rtp(double16);
+float16 __ovld __cnfn convert_float16_rtz(double16);
+
+double __ovld __cnfn convert_double(char);
+double __ovld __cnfn convert_double(double);
+double __ovld __cnfn convert_double(float);
+double __ovld __cnfn convert_double(int);
+double __ovld __cnfn convert_double(long);
+double __ovld __cnfn convert_double(short);
+double __ovld __cnfn convert_double(uchar);
+double __ovld __cnfn convert_double(uint);
+double __ovld __cnfn convert_double(ulong);
+double __ovld __cnfn convert_double(ushort);
+double __ovld __cnfn convert_double_rte(char);
+double __ovld __cnfn convert_double_rte(double);
+double __ovld __cnfn convert_double_rte(float);
+double __ovld __cnfn convert_double_rte(int);
+double __ovld __cnfn convert_double_rte(long);
+double __ovld __cnfn convert_double_rte(short);
+double __ovld __cnfn convert_double_rte(uchar);
+double __ovld __cnfn convert_double_rte(uint);
+double __ovld __cnfn convert_double_rte(ulong);
+double __ovld __cnfn convert_double_rte(ushort);
+double __ovld __cnfn convert_double_rtn(char);
+double __ovld __cnfn convert_double_rtn(double);
+double __ovld __cnfn convert_double_rtn(float);
+double __ovld __cnfn convert_double_rtn(int);
+double __ovld __cnfn convert_double_rtn(long);
+double __ovld __cnfn convert_double_rtn(short);
+double __ovld __cnfn convert_double_rtn(uchar);
+double __ovld __cnfn convert_double_rtn(uint);
+double __ovld __cnfn convert_double_rtn(ulong);
+double __ovld __cnfn convert_double_rtn(ushort);
+double __ovld __cnfn convert_double_rtp(char);
+double __ovld __cnfn convert_double_rtp(double);
+double __ovld __cnfn convert_double_rtp(float);
+double __ovld __cnfn convert_double_rtp(int);
+double __ovld __cnfn convert_double_rtp(long);
+double __ovld __cnfn convert_double_rtp(short);
+double __ovld __cnfn convert_double_rtp(uchar);
+double __ovld __cnfn convert_double_rtp(uint);
+double __ovld __cnfn convert_double_rtp(ulong);
+double __ovld __cnfn convert_double_rtp(ushort);
+double __ovld __cnfn convert_double_rtz(char);
+double __ovld __cnfn convert_double_rtz(double);
+double __ovld __cnfn convert_double_rtz(float);
+double __ovld __cnfn convert_double_rtz(int);
+double __ovld __cnfn convert_double_rtz(long);
+double __ovld __cnfn convert_double_rtz(short);
+double __ovld __cnfn convert_double_rtz(uchar);
+double __ovld __cnfn convert_double_rtz(uint);
+double __ovld __cnfn convert_double_rtz(ulong);
+double __ovld __cnfn convert_double_rtz(ushort);
+double2 __ovld __cnfn convert_double2(char2);
+double2 __ovld __cnfn convert_double2(double2);
+double2 __ovld __cnfn convert_double2(float2);
+double2 __ovld __cnfn convert_double2(int2);
+double2 __ovld __cnfn convert_double2(long2);
+double2 __ovld __cnfn convert_double2(short2);
+double2 __ovld __cnfn convert_double2(uchar2);
+double2 __ovld __cnfn convert_double2(uint2);
+double2 __ovld __cnfn convert_double2(ulong2);
+double2 __ovld __cnfn convert_double2(ushort2);
+double2 __ovld __cnfn convert_double2_rte(char2);
+double2 __ovld __cnfn convert_double2_rte(double2);
+double2 __ovld __cnfn convert_double2_rte(float2);
+double2 __ovld __cnfn convert_double2_rte(int2);
+double2 __ovld __cnfn convert_double2_rte(long2);
+double2 __ovld __cnfn convert_double2_rte(short2);
+double2 __ovld __cnfn convert_double2_rte(uchar2);
+double2 __ovld __cnfn convert_double2_rte(uint2);
+double2 __ovld __cnfn convert_double2_rte(ulong2);
+double2 __ovld __cnfn convert_double2_rte(ushort2);
+double2 __ovld __cnfn convert_double2_rtn(char2);
+double2 __ovld __cnfn convert_double2_rtn(double2);
+double2 __ovld __cnfn convert_double2_rtn(float2);
+double2 __ovld __cnfn convert_double2_rtn(int2);
+double2 __ovld __cnfn convert_double2_rtn(long2);
+double2 __ovld __cnfn convert_double2_rtn(short2);
+double2 __ovld __cnfn convert_double2_rtn(uchar2);
+double2 __ovld __cnfn convert_double2_rtn(uint2);
+double2 __ovld __cnfn convert_double2_rtn(ulong2);
+double2 __ovld __cnfn convert_double2_rtn(ushort2);
+double2 __ovld __cnfn convert_double2_rtp(char2);
+double2 __ovld __cnfn convert_double2_rtp(double2);
+double2 __ovld __cnfn convert_double2_rtp(float2);
+double2 __ovld __cnfn convert_double2_rtp(int2);
+double2 __ovld __cnfn convert_double2_rtp(long2);
+double2 __ovld __cnfn convert_double2_rtp(short2);
+double2 __ovld __cnfn convert_double2_rtp(uchar2);
+double2 __ovld __cnfn convert_double2_rtp(uint2);
+double2 __ovld __cnfn convert_double2_rtp(ulong2);
+double2 __ovld __cnfn convert_double2_rtp(ushort2);
+double2 __ovld __cnfn convert_double2_rtz(char2);
+double2 __ovld __cnfn convert_double2_rtz(double2);
+double2 __ovld __cnfn convert_double2_rtz(float2);
+double2 __ovld __cnfn convert_double2_rtz(int2);
+double2 __ovld __cnfn convert_double2_rtz(long2);
+double2 __ovld __cnfn convert_double2_rtz(short2);
+double2 __ovld __cnfn convert_double2_rtz(uchar2);
+double2 __ovld __cnfn convert_double2_rtz(uint2);
+double2 __ovld __cnfn convert_double2_rtz(ulong2);
+double2 __ovld __cnfn convert_double2_rtz(ushort2);
+double3 __ovld __cnfn convert_double3(char3);
+double3 __ovld __cnfn convert_double3(double3);
+double3 __ovld __cnfn convert_double3(float3);
+double3 __ovld __cnfn convert_double3(int3);
+double3 __ovld __cnfn convert_double3(long3);
+double3 __ovld __cnfn convert_double3(short3);
+double3 __ovld __cnfn convert_double3(uchar3);
+double3 __ovld __cnfn convert_double3(uint3);
+double3 __ovld __cnfn convert_double3(ulong3);
+double3 __ovld __cnfn convert_double3(ushort3);
+double3 __ovld __cnfn convert_double3_rte(char3);
+double3 __ovld __cnfn convert_double3_rte(double3);
+double3 __ovld __cnfn convert_double3_rte(float3);
+double3 __ovld __cnfn convert_double3_rte(int3);
+double3 __ovld __cnfn convert_double3_rte(long3);
+double3 __ovld __cnfn convert_double3_rte(short3);
+double3 __ovld __cnfn convert_double3_rte(uchar3);
+double3 __ovld __cnfn convert_double3_rte(uint3);
+double3 __ovld __cnfn convert_double3_rte(ulong3);
+double3 __ovld __cnfn convert_double3_rte(ushort3);
+double3 __ovld __cnfn convert_double3_rtn(char3);
+double3 __ovld __cnfn convert_double3_rtn(double3);
+double3 __ovld __cnfn convert_double3_rtn(float3);
+double3 __ovld __cnfn convert_double3_rtn(int3);
+double3 __ovld __cnfn convert_double3_rtn(long3);
+double3 __ovld __cnfn convert_double3_rtn(short3);
+double3 __ovld __cnfn convert_double3_rtn(uchar3);
+double3 __ovld __cnfn convert_double3_rtn(uint3);
+double3 __ovld __cnfn convert_double3_rtn(ulong3);
+double3 __ovld __cnfn convert_double3_rtn(ushort3);
+double3 __ovld __cnfn convert_double3_rtp(char3);
+double3 __ovld __cnfn convert_double3_rtp(double3);
+double3 __ovld __cnfn convert_double3_rtp(float3);
+double3 __ovld __cnfn convert_double3_rtp(int3);
+double3 __ovld __cnfn convert_double3_rtp(long3);
+double3 __ovld __cnfn convert_double3_rtp(short3);
+double3 __ovld __cnfn convert_double3_rtp(uchar3);
+double3 __ovld __cnfn convert_double3_rtp(uint3);
+double3 __ovld __cnfn convert_double3_rtp(ulong3);
+double3 __ovld __cnfn convert_double3_rtp(ushort3);
+double3 __ovld __cnfn convert_double3_rtz(char3);
+double3 __ovld __cnfn convert_double3_rtz(double3);
+double3 __ovld __cnfn convert_double3_rtz(float3);
+double3 __ovld __cnfn convert_double3_rtz(int3);
+double3 __ovld __cnfn convert_double3_rtz(long3);
+double3 __ovld __cnfn convert_double3_rtz(short3);
+double3 __ovld __cnfn convert_double3_rtz(uchar3);
+double3 __ovld __cnfn convert_double3_rtz(uint3);
+double3 __ovld __cnfn convert_double3_rtz(ulong3);
+double3 __ovld __cnfn convert_double3_rtz(ushort3);
+double4 __ovld __cnfn convert_double4(char4);
+double4 __ovld __cnfn convert_double4(double4);
+double4 __ovld __cnfn convert_double4(float4);
+double4 __ovld __cnfn convert_double4(int4);
+double4 __ovld __cnfn convert_double4(long4);
+double4 __ovld __cnfn convert_double4(short4);
+double4 __ovld __cnfn convert_double4(uchar4);
+double4 __ovld __cnfn convert_double4(uint4);
+double4 __ovld __cnfn convert_double4(ulong4);
+double4 __ovld __cnfn convert_double4(ushort4);
+double4 __ovld __cnfn convert_double4_rte(char4);
+double4 __ovld __cnfn convert_double4_rte(double4);
+double4 __ovld __cnfn convert_double4_rte(float4);
+double4 __ovld __cnfn convert_double4_rte(int4);
+double4 __ovld __cnfn convert_double4_rte(long4);
+double4 __ovld __cnfn convert_double4_rte(short4);
+double4 __ovld __cnfn convert_double4_rte(uchar4);
+double4 __ovld __cnfn convert_double4_rte(uint4);
+double4 __ovld __cnfn convert_double4_rte(ulong4);
+double4 __ovld __cnfn convert_double4_rte(ushort4);
+double4 __ovld __cnfn convert_double4_rtn(char4);
+double4 __ovld __cnfn convert_double4_rtn(double4);
+double4 __ovld __cnfn convert_double4_rtn(float4);
+double4 __ovld __cnfn convert_double4_rtn(int4);
+double4 __ovld __cnfn convert_double4_rtn(long4);
+double4 __ovld __cnfn convert_double4_rtn(short4);
+double4 __ovld __cnfn convert_double4_rtn(uchar4);
+double4 __ovld __cnfn convert_double4_rtn(uint4);
+double4 __ovld __cnfn convert_double4_rtn(ulong4);
+double4 __ovld __cnfn convert_double4_rtn(ushort4);
+double4 __ovld __cnfn convert_double4_rtp(char4);
+double4 __ovld __cnfn convert_double4_rtp(double4);
+double4 __ovld __cnfn convert_double4_rtp(float4);
+double4 __ovld __cnfn convert_double4_rtp(int4);
+double4 __ovld __cnfn convert_double4_rtp(long4);
+double4 __ovld __cnfn convert_double4_rtp(short4);
+double4 __ovld __cnfn convert_double4_rtp(uchar4);
+double4 __ovld __cnfn convert_double4_rtp(uint4);
+double4 __ovld __cnfn convert_double4_rtp(ulong4);
+double4 __ovld __cnfn convert_double4_rtp(ushort4);
+double4 __ovld __cnfn convert_double4_rtz(char4);
+double4 __ovld __cnfn convert_double4_rtz(double4);
+double4 __ovld __cnfn convert_double4_rtz(float4);
+double4 __ovld __cnfn convert_double4_rtz(int4);
+double4 __ovld __cnfn convert_double4_rtz(long4);
+double4 __ovld __cnfn convert_double4_rtz(short4);
+double4 __ovld __cnfn convert_double4_rtz(uchar4);
+double4 __ovld __cnfn convert_double4_rtz(uint4);
+double4 __ovld __cnfn convert_double4_rtz(ulong4);
+double4 __ovld __cnfn convert_double4_rtz(ushort4);
+double8 __ovld __cnfn convert_double8(char8);
+double8 __ovld __cnfn convert_double8(double8);
+double8 __ovld __cnfn convert_double8(float8);
+double8 __ovld __cnfn convert_double8(int8);
+double8 __ovld __cnfn convert_double8(long8);
+double8 __ovld __cnfn convert_double8(short8);
+double8 __ovld __cnfn convert_double8(uchar8);
+double8 __ovld __cnfn convert_double8(uint8);
+double8 __ovld __cnfn convert_double8(ulong8);
+double8 __ovld __cnfn convert_double8(ushort8);
+double8 __ovld __cnfn convert_double8_rte(char8);
+double8 __ovld __cnfn convert_double8_rte(double8);
+double8 __ovld __cnfn convert_double8_rte(float8);
+double8 __ovld __cnfn convert_double8_rte(int8);
+double8 __ovld __cnfn convert_double8_rte(long8);
+double8 __ovld __cnfn convert_double8_rte(short8);
+double8 __ovld __cnfn convert_double8_rte(uchar8);
+double8 __ovld __cnfn convert_double8_rte(uint8);
+double8 __ovld __cnfn convert_double8_rte(ulong8);
+double8 __ovld __cnfn convert_double8_rte(ushort8);
+double8 __ovld __cnfn convert_double8_rtn(char8);
+double8 __ovld __cnfn convert_double8_rtn(double8);
+double8 __ovld __cnfn convert_double8_rtn(float8);
+double8 __ovld __cnfn convert_double8_rtn(int8);
+double8 __ovld __cnfn convert_double8_rtn(long8);
+double8 __ovld __cnfn convert_double8_rtn(short8);
+double8 __ovld __cnfn convert_double8_rtn(uchar8);
+double8 __ovld __cnfn convert_double8_rtn(uint8);
+double8 __ovld __cnfn convert_double8_rtn(ulong8);
+double8 __ovld __cnfn convert_double8_rtn(ushort8);
+double8 __ovld __cnfn convert_double8_rtp(char8);
+double8 __ovld __cnfn convert_double8_rtp(double8);
+double8 __ovld __cnfn convert_double8_rtp(float8);
+double8 __ovld __cnfn convert_double8_rtp(int8);
+double8 __ovld __cnfn convert_double8_rtp(long8);
+double8 __ovld __cnfn convert_double8_rtp(short8);
+double8 __ovld __cnfn convert_double8_rtp(uchar8);
+double8 __ovld __cnfn convert_double8_rtp(uint8);
+double8 __ovld __cnfn convert_double8_rtp(ulong8);
+double8 __ovld __cnfn convert_double8_rtp(ushort8);
+double8 __ovld __cnfn convert_double8_rtz(char8);
+double8 __ovld __cnfn convert_double8_rtz(double8);
+double8 __ovld __cnfn convert_double8_rtz(float8);
+double8 __ovld __cnfn convert_double8_rtz(int8);
+double8 __ovld __cnfn convert_double8_rtz(long8);
+double8 __ovld __cnfn convert_double8_rtz(short8);
+double8 __ovld __cnfn convert_double8_rtz(uchar8);
+double8 __ovld __cnfn convert_double8_rtz(uint8);
+double8 __ovld __cnfn convert_double8_rtz(ulong8);
+double8 __ovld __cnfn convert_double8_rtz(ushort8);
+double16 __ovld __cnfn convert_double16(char16);
+double16 __ovld __cnfn convert_double16(double16);
+double16 __ovld __cnfn convert_double16(float16);
+double16 __ovld __cnfn convert_double16(int16);
+double16 __ovld __cnfn convert_double16(long16);
+double16 __ovld __cnfn convert_double16(short16);
+double16 __ovld __cnfn convert_double16(uchar16);
+double16 __ovld __cnfn convert_double16(uint16);
+double16 __ovld __cnfn convert_double16(ulong16);
+double16 __ovld __cnfn convert_double16(ushort16);
+double16 __ovld __cnfn convert_double16_rte(char16);
+double16 __ovld __cnfn convert_double16_rte(double16);
+double16 __ovld __cnfn convert_double16_rte(float16);
+double16 __ovld __cnfn convert_double16_rte(int16);
+double16 __ovld __cnfn convert_double16_rte(long16);
+double16 __ovld __cnfn convert_double16_rte(short16);
+double16 __ovld __cnfn convert_double16_rte(uchar16);
+double16 __ovld __cnfn convert_double16_rte(uint16);
+double16 __ovld __cnfn convert_double16_rte(ulong16);
+double16 __ovld __cnfn convert_double16_rte(ushort16);
+double16 __ovld __cnfn convert_double16_rtn(char16);
+double16 __ovld __cnfn convert_double16_rtn(double16);
+double16 __ovld __cnfn convert_double16_rtn(float16);
+double16 __ovld __cnfn convert_double16_rtn(int16);
+double16 __ovld __cnfn convert_double16_rtn(long16);
+double16 __ovld __cnfn convert_double16_rtn(short16);
+double16 __ovld __cnfn convert_double16_rtn(uchar16);
+double16 __ovld __cnfn convert_double16_rtn(uint16);
+double16 __ovld __cnfn convert_double16_rtn(ulong16);
+double16 __ovld __cnfn convert_double16_rtn(ushort16);
+double16 __ovld __cnfn convert_double16_rtp(char16);
+double16 __ovld __cnfn convert_double16_rtp(double16);
+double16 __ovld __cnfn convert_double16_rtp(float16);
+double16 __ovld __cnfn convert_double16_rtp(int16);
+double16 __ovld __cnfn convert_double16_rtp(long16);
+double16 __ovld __cnfn convert_double16_rtp(short16);
+double16 __ovld __cnfn convert_double16_rtp(uchar16);
+double16 __ovld __cnfn convert_double16_rtp(uint16);
+double16 __ovld __cnfn convert_double16_rtp(ulong16);
+double16 __ovld __cnfn convert_double16_rtp(ushort16);
+double16 __ovld __cnfn convert_double16_rtz(char16);
+double16 __ovld __cnfn convert_double16_rtz(double16);
+double16 __ovld __cnfn convert_double16_rtz(float16);
+double16 __ovld __cnfn convert_double16_rtz(int16);
+double16 __ovld __cnfn convert_double16_rtz(long16);
+double16 __ovld __cnfn convert_double16_rtz(short16);
+double16 __ovld __cnfn convert_double16_rtz(uchar16);
+double16 __ovld __cnfn convert_double16_rtz(uint16);
+double16 __ovld __cnfn convert_double16_rtz(ulong16);
+double16 __ovld __cnfn convert_double16_rtz(ushort16);
+#endif //cl_khr_fp64
+
+#ifdef cl_khr_fp16
+// Convert half types to non-double types.
+uchar __ovld __cnfn convert_uchar(half);
+uchar __ovld __cnfn convert_uchar_rte(half);
+uchar __ovld __cnfn convert_uchar_rtp(half);
+uchar __ovld __cnfn convert_uchar_rtn(half);
+uchar __ovld __cnfn convert_uchar_rtz(half);
+uchar __ovld __cnfn convert_uchar_sat(half);
+uchar __ovld __cnfn convert_uchar_sat_rte(half);
+uchar __ovld __cnfn convert_uchar_sat_rtp(half);
+uchar __ovld __cnfn convert_uchar_sat_rtn(half);
+uchar __ovld __cnfn convert_uchar_sat_rtz(half);
+uchar2 __ovld __cnfn convert_uchar2(half2);
+uchar2 __ovld __cnfn convert_uchar2_rte(half2);
+uchar2 __ovld __cnfn convert_uchar2_rtp(half2);
+uchar2 __ovld __cnfn convert_uchar2_rtn(half2);
+uchar2 __ovld __cnfn convert_uchar2_rtz(half2);
+uchar2 __ovld __cnfn convert_uchar2_sat(half2);
+uchar2 __ovld __cnfn convert_uchar2_sat_rte(half2);
+uchar2 __ovld __cnfn convert_uchar2_sat_rtp(half2);
+uchar2 __ovld __cnfn convert_uchar2_sat_rtn(half2);
+uchar2 __ovld __cnfn convert_uchar2_sat_rtz(half2);
+uchar3 __ovld __cnfn convert_uchar3(half3);
+uchar3 __ovld __cnfn convert_uchar3_rte(half3);
+uchar3 __ovld __cnfn convert_uchar3_rtp(half3);
+uchar3 __ovld __cnfn convert_uchar3_rtn(half3);
+uchar3 __ovld __cnfn convert_uchar3_rtz(half3);
+uchar3 __ovld __cnfn convert_uchar3_sat(half3);
+uchar3 __ovld __cnfn convert_uchar3_sat_rte(half3);
+uchar3 __ovld __cnfn convert_uchar3_sat_rtp(half3);
+uchar3 __ovld __cnfn convert_uchar3_sat_rtn(half3);
+uchar3 __ovld __cnfn convert_uchar3_sat_rtz(half3);
+uchar4 __ovld __cnfn convert_uchar4(half4);
+uchar4 __ovld __cnfn convert_uchar4_rte(half4);
+uchar4 __ovld __cnfn convert_uchar4_rtp(half4);
+uchar4 __ovld __cnfn convert_uchar4_rtn(half4);
+uchar4 __ovld __cnfn convert_uchar4_rtz(half4);
+uchar4 __ovld __cnfn convert_uchar4_sat(half4);
+uchar4 __ovld __cnfn convert_uchar4_sat_rte(half4);
+uchar4 __ovld __cnfn convert_uchar4_sat_rtp(half4);
+uchar4 __ovld __cnfn convert_uchar4_sat_rtn(half4);
+uchar4 __ovld __cnfn convert_uchar4_sat_rtz(half4);
+uchar8 __ovld __cnfn convert_uchar8(half8);
+uchar8 __ovld __cnfn convert_uchar8_rte(half8);
+uchar8 __ovld __cnfn convert_uchar8_rtp(half8);
+uchar8 __ovld __cnfn convert_uchar8_rtn(half8);
+uchar8 __ovld __cnfn convert_uchar8_rtz(half8);
+uchar8 __ovld __cnfn convert_uchar8_sat(half8);
+uchar8 __ovld __cnfn convert_uchar8_sat_rte(half8);
+uchar8 __ovld __cnfn convert_uchar8_sat_rtp(half8);
+uchar8 __ovld __cnfn convert_uchar8_sat_rtn(half8);
+uchar8 __ovld __cnfn convert_uchar8_sat_rtz(half8);
+uchar16 __ovld __cnfn convert_uchar16(half16);
+uchar16 __ovld __cnfn convert_uchar16_rte(half16);
+uchar16 __ovld __cnfn convert_uchar16_rtp(half16);
+uchar16 __ovld __cnfn convert_uchar16_rtn(half16);
+uchar16 __ovld __cnfn convert_uchar16_rtz(half16);
+uchar16 __ovld __cnfn convert_uchar16_sat(half16);
+uchar16 __ovld __cnfn convert_uchar16_sat_rte(half16);
+uchar16 __ovld __cnfn convert_uchar16_sat_rtp(half16);
+uchar16 __ovld __cnfn convert_uchar16_sat_rtn(half16);
+uchar16 __ovld __cnfn convert_uchar16_sat_rtz(half16);
+ushort __ovld __cnfn convert_ushort(half);
+ushort __ovld __cnfn convert_ushort_rte(half);
+ushort __ovld __cnfn convert_ushort_rtp(half);
+ushort __ovld __cnfn convert_ushort_rtn(half);
+ushort __ovld __cnfn convert_ushort_rtz(half);
+ushort __ovld __cnfn convert_ushort_sat(half);
+ushort __ovld __cnfn convert_ushort_sat_rte(half);
+ushort __ovld __cnfn convert_ushort_sat_rtp(half);
+ushort __ovld __cnfn convert_ushort_sat_rtn(half);
+ushort __ovld __cnfn convert_ushort_sat_rtz(half);
+ushort2 __ovld __cnfn convert_ushort2(half2);
+ushort2 __ovld __cnfn convert_ushort2_rte(half2);
+ushort2 __ovld __cnfn convert_ushort2_rtp(half2);
+ushort2 __ovld __cnfn convert_ushort2_rtn(half2);
+ushort2 __ovld __cnfn convert_ushort2_rtz(half2);
+ushort2 __ovld __cnfn convert_ushort2_sat(half2);
+ushort2 __ovld __cnfn convert_ushort2_sat_rte(half2);
+ushort2 __ovld __cnfn convert_ushort2_sat_rtp(half2);
+ushort2 __ovld __cnfn convert_ushort2_sat_rtn(half2);
+ushort2 __ovld __cnfn convert_ushort2_sat_rtz(half2);
+ushort3 __ovld __cnfn convert_ushort3(half3);
+ushort3 __ovld __cnfn convert_ushort3_rte(half3);
+ushort3 __ovld __cnfn convert_ushort3_rtp(half3);
+ushort3 __ovld __cnfn convert_ushort3_rtn(half3);
+ushort3 __ovld __cnfn convert_ushort3_rtz(half3);
+ushort3 __ovld __cnfn convert_ushort3_sat(half3);
+ushort3 __ovld __cnfn convert_ushort3_sat_rte(half3);
+ushort3 __ovld __cnfn convert_ushort3_sat_rtp(half3);
+ushort3 __ovld __cnfn convert_ushort3_sat_rtn(half3);
+ushort3 __ovld __cnfn convert_ushort3_sat_rtz(half3);
+ushort4 __ovld __cnfn convert_ushort4(half4);
+ushort4 __ovld __cnfn convert_ushort4_rte(half4);
+ushort4 __ovld __cnfn convert_ushort4_rtp(half4);
+ushort4 __ovld __cnfn convert_ushort4_rtn(half4);
+ushort4 __ovld __cnfn convert_ushort4_rtz(half4);
+ushort4 __ovld __cnfn convert_ushort4_sat(half4);
+ushort4 __ovld __cnfn convert_ushort4_sat_rte(half4);
+ushort4 __ovld __cnfn convert_ushort4_sat_rtp(half4);
+ushort4 __ovld __cnfn convert_ushort4_sat_rtn(half4);
+ushort4 __ovld __cnfn convert_ushort4_sat_rtz(half4);
+ushort8 __ovld __cnfn convert_ushort8(half8);
+ushort8 __ovld __cnfn convert_ushort8_rte(half8);
+ushort8 __ovld __cnfn convert_ushort8_rtp(half8);
+ushort8 __ovld __cnfn convert_ushort8_rtn(half8);
+ushort8 __ovld __cnfn convert_ushort8_rtz(half8);
+ushort8 __ovld __cnfn convert_ushort8_sat(half8);
+ushort8 __ovld __cnfn convert_ushort8_sat_rte(half8);
+ushort8 __ovld __cnfn convert_ushort8_sat_rtp(half8);
+ushort8 __ovld __cnfn convert_ushort8_sat_rtn(half8);
+ushort8 __ovld __cnfn convert_ushort8_sat_rtz(half8);
+ushort16 __ovld __cnfn convert_ushort16(half16);
+ushort16 __ovld __cnfn convert_ushort16_rte(half16);
+ushort16 __ovld __cnfn convert_ushort16_rtp(half16);
+ushort16 __ovld __cnfn convert_ushort16_rtn(half16);
+ushort16 __ovld __cnfn convert_ushort16_rtz(half16);
+ushort16 __ovld __cnfn convert_ushort16_sat(half16);
+ushort16 __ovld __cnfn convert_ushort16_sat_rte(half16);
+ushort16 __ovld __cnfn convert_ushort16_sat_rtp(half16);
+ushort16 __ovld __cnfn convert_ushort16_sat_rtn(half16);
+ushort16 __ovld __cnfn convert_ushort16_sat_rtz(half16);
+uint __ovld __cnfn convert_uint(half);
+uint __ovld __cnfn convert_uint_rte(half);
+uint __ovld __cnfn convert_uint_rtp(half);
+uint __ovld __cnfn convert_uint_rtn(half);
+uint __ovld __cnfn convert_uint_rtz(half);
+uint __ovld __cnfn convert_uint_sat(half);
+uint __ovld __cnfn convert_uint_sat_rte(half);
+uint __ovld __cnfn convert_uint_sat_rtp(half);
+uint __ovld __cnfn convert_uint_sat_rtn(half);
+uint __ovld __cnfn convert_uint_sat_rtz(half);
+uint2 __ovld __cnfn convert_uint2(half2);
+uint2 __ovld __cnfn convert_uint2_rte(half2);
+uint2 __ovld __cnfn convert_uint2_rtp(half2);
+uint2 __ovld __cnfn convert_uint2_rtn(half2);
+uint2 __ovld __cnfn convert_uint2_rtz(half2);
+uint2 __ovld __cnfn convert_uint2_sat(half2);
+uint2 __ovld __cnfn convert_uint2_sat_rte(half2);
+uint2 __ovld __cnfn convert_uint2_sat_rtp(half2);
+uint2 __ovld __cnfn convert_uint2_sat_rtn(half2);
+uint2 __ovld __cnfn convert_uint2_sat_rtz(half2);
+uint3 __ovld __cnfn convert_uint3(half3);
+uint3 __ovld __cnfn convert_uint3_rte(half3);
+uint3 __ovld __cnfn convert_uint3_rtp(half3);
+uint3 __ovld __cnfn convert_uint3_rtn(half3);
+uint3 __ovld __cnfn convert_uint3_rtz(half3);
+uint3 __ovld __cnfn convert_uint3_sat(half3);
+uint3 __ovld __cnfn convert_uint3_sat_rte(half3);
+uint3 __ovld __cnfn convert_uint3_sat_rtp(half3);
+uint3 __ovld __cnfn convert_uint3_sat_rtn(half3);
+uint3 __ovld __cnfn convert_uint3_sat_rtz(half3);
+uint4 __ovld __cnfn convert_uint4(half4);
+uint4 __ovld __cnfn convert_uint4_rte(half4);
+uint4 __ovld __cnfn convert_uint4_rtp(half4);
+uint4 __ovld __cnfn convert_uint4_rtn(half4);
+uint4 __ovld __cnfn convert_uint4_rtz(half4);
+uint4 __ovld __cnfn convert_uint4_sat(half4);
+uint4 __ovld __cnfn convert_uint4_sat_rte(half4);
+uint4 __ovld __cnfn convert_uint4_sat_rtp(half4);
+uint4 __ovld __cnfn convert_uint4_sat_rtn(half4);
+uint4 __ovld __cnfn convert_uint4_sat_rtz(half4);
+uint8 __ovld __cnfn convert_uint8(half8);
+uint8 __ovld __cnfn convert_uint8_rte(half8);
+uint8 __ovld __cnfn convert_uint8_rtp(half8);
+uint8 __ovld __cnfn convert_uint8_rtn(half8);
+uint8 __ovld __cnfn convert_uint8_rtz(half8);
+uint8 __ovld __cnfn convert_uint8_sat(half8);
+uint8 __ovld __cnfn convert_uint8_sat_rte(half8);
+uint8 __ovld __cnfn convert_uint8_sat_rtp(half8);
+uint8 __ovld __cnfn convert_uint8_sat_rtn(half8);
+uint8 __ovld __cnfn convert_uint8_sat_rtz(half8);
+uint16 __ovld __cnfn convert_uint16(half16);
+uint16 __ovld __cnfn convert_uint16_rte(half16);
+uint16 __ovld __cnfn convert_uint16_rtp(half16);
+uint16 __ovld __cnfn convert_uint16_rtn(half16);
+uint16 __ovld __cnfn convert_uint16_rtz(half16);
+uint16 __ovld __cnfn convert_uint16_sat(half16);
+uint16 __ovld __cnfn convert_uint16_sat_rte(half16);
+uint16 __ovld __cnfn convert_uint16_sat_rtp(half16);
+uint16 __ovld __cnfn convert_uint16_sat_rtn(half16);
+uint16 __ovld __cnfn convert_uint16_sat_rtz(half16);
+ulong __ovld __cnfn convert_ulong(half);
+ulong __ovld __cnfn convert_ulong_rte(half);
+ulong __ovld __cnfn convert_ulong_rtp(half);
+ulong __ovld __cnfn convert_ulong_rtn(half);
+ulong __ovld __cnfn convert_ulong_rtz(half);
+ulong __ovld __cnfn convert_ulong_sat(half);
+ulong __ovld __cnfn convert_ulong_sat_rte(half);
+ulong __ovld __cnfn convert_ulong_sat_rtp(half);
+ulong __ovld __cnfn convert_ulong_sat_rtn(half);
+ulong __ovld __cnfn convert_ulong_sat_rtz(half);
+ulong2 __ovld __cnfn convert_ulong2(half2);
+ulong2 __ovld __cnfn convert_ulong2_rte(half2);
+ulong2 __ovld __cnfn convert_ulong2_rtp(half2);
+ulong2 __ovld __cnfn convert_ulong2_rtn(half2);
+ulong2 __ovld __cnfn convert_ulong2_rtz(half2);
+ulong2 __ovld __cnfn convert_ulong2_sat(half2);
+ulong2 __ovld __cnfn convert_ulong2_sat_rte(half2);
+ulong2 __ovld __cnfn convert_ulong2_sat_rtp(half2);
+ulong2 __ovld __cnfn convert_ulong2_sat_rtn(half2);
+ulong2 __ovld __cnfn convert_ulong2_sat_rtz(half2);
+ulong3 __ovld __cnfn convert_ulong3(half3);
+ulong3 __ovld __cnfn convert_ulong3_rte(half3);
+ulong3 __ovld __cnfn convert_ulong3_rtp(half3);
+ulong3 __ovld __cnfn convert_ulong3_rtn(half3);
+ulong3 __ovld __cnfn convert_ulong3_rtz(half3);
+ulong3 __ovld __cnfn convert_ulong3_sat(half3);
+ulong3 __ovld __cnfn convert_ulong3_sat_rte(half3);
+ulong3 __ovld __cnfn convert_ulong3_sat_rtp(half3);
+ulong3 __ovld __cnfn convert_ulong3_sat_rtn(half3);
+ulong3 __ovld __cnfn convert_ulong3_sat_rtz(half3);
+ulong4 __ovld __cnfn convert_ulong4(half4);
+ulong4 __ovld __cnfn convert_ulong4_rte(half4);
+ulong4 __ovld __cnfn convert_ulong4_rtp(half4);
+ulong4 __ovld __cnfn convert_ulong4_rtn(half4);
+ulong4 __ovld __cnfn convert_ulong4_rtz(half4);
+ulong4 __ovld __cnfn convert_ulong4_sat(half4);
+ulong4 __ovld __cnfn convert_ulong4_sat_rte(half4);
+ulong4 __ovld __cnfn convert_ulong4_sat_rtp(half4);
+ulong4 __ovld __cnfn convert_ulong4_sat_rtn(half4);
+ulong4 __ovld __cnfn convert_ulong4_sat_rtz(half4);
+ulong8 __ovld __cnfn convert_ulong8(half8);
+ulong8 __ovld __cnfn convert_ulong8_rte(half8);
+ulong8 __ovld __cnfn convert_ulong8_rtp(half8);
+ulong8 __ovld __cnfn convert_ulong8_rtn(half8);
+ulong8 __ovld __cnfn convert_ulong8_rtz(half8);
+ulong8 __ovld __cnfn convert_ulong8_sat(half8);
+ulong8 __ovld __cnfn convert_ulong8_sat_rte(half8);
+ulong8 __ovld __cnfn convert_ulong8_sat_rtp(half8);
+ulong8 __ovld __cnfn convert_ulong8_sat_rtn(half8);
+ulong8 __ovld __cnfn convert_ulong8_sat_rtz(half8);
+ulong16 __ovld __cnfn convert_ulong16(half16);
+ulong16 __ovld __cnfn convert_ulong16_rte(half16);
+ulong16 __ovld __cnfn convert_ulong16_rtp(half16);
+ulong16 __ovld __cnfn convert_ulong16_rtn(half16);
+ulong16 __ovld __cnfn convert_ulong16_rtz(half16);
+ulong16 __ovld __cnfn convert_ulong16_sat(half16);
+ulong16 __ovld __cnfn convert_ulong16_sat_rte(half16);
+ulong16 __ovld __cnfn convert_ulong16_sat_rtp(half16);
+ulong16 __ovld __cnfn convert_ulong16_sat_rtn(half16);
+ulong16 __ovld __cnfn convert_ulong16_sat_rtz(half16);
+char __ovld __cnfn convert_char(half);
+char __ovld __cnfn convert_char_rte(half);
+char __ovld __cnfn convert_char_rtp(half);
+char __ovld __cnfn convert_char_rtn(half);
+char __ovld __cnfn convert_char_rtz(half);
+char __ovld __cnfn convert_char_sat(half);
+char __ovld __cnfn convert_char_sat_rte(half);
+char __ovld __cnfn convert_char_sat_rtp(half);
+char __ovld __cnfn convert_char_sat_rtn(half);
+char __ovld __cnfn convert_char_sat_rtz(half);
+char2 __ovld __cnfn convert_char2(half2);
+char2 __ovld __cnfn convert_char2_rte(half2);
+char2 __ovld __cnfn convert_char2_rtp(half2);
+char2 __ovld __cnfn convert_char2_rtn(half2);
+char2 __ovld __cnfn convert_char2_rtz(half2);
+char2 __ovld __cnfn convert_char2_sat(half2);
+char2 __ovld __cnfn convert_char2_sat_rte(half2);
+char2 __ovld __cnfn convert_char2_sat_rtp(half2);
+char2 __ovld __cnfn convert_char2_sat_rtn(half2);
+char2 __ovld __cnfn convert_char2_sat_rtz(half2);
+char3 __ovld __cnfn convert_char3(half3);
+char3 __ovld __cnfn convert_char3_rte(half3);
+char3 __ovld __cnfn convert_char3_rtp(half3);
+char3 __ovld __cnfn convert_char3_rtn(half3);
+char3 __ovld __cnfn convert_char3_rtz(half3);
+char3 __ovld __cnfn convert_char3_sat(half3);
+char3 __ovld __cnfn convert_char3_sat_rte(half3);
+char3 __ovld __cnfn convert_char3_sat_rtp(half3);
+char3 __ovld __cnfn convert_char3_sat_rtn(half3);
+char3 __ovld __cnfn convert_char3_sat_rtz(half3);
+char4 __ovld __cnfn convert_char4(half4);
+char4 __ovld __cnfn convert_char4_rte(half4);
+char4 __ovld __cnfn convert_char4_rtp(half4);
+char4 __ovld __cnfn convert_char4_rtn(half4);
+char4 __ovld __cnfn convert_char4_rtz(half4);
+char4 __ovld __cnfn convert_char4_sat(half4);
+char4 __ovld __cnfn convert_char4_sat_rte(half4);
+char4 __ovld __cnfn convert_char4_sat_rtp(half4);
+char4 __ovld __cnfn convert_char4_sat_rtn(half4);
+char4 __ovld __cnfn convert_char4_sat_rtz(half4);
+char8 __ovld __cnfn convert_char8(half8);
+char8 __ovld __cnfn convert_char8_rte(half8);
+char8 __ovld __cnfn convert_char8_rtp(half8);
+char8 __ovld __cnfn convert_char8_rtn(half8);
+char8 __ovld __cnfn convert_char8_rtz(half8);
+char8 __ovld __cnfn convert_char8_sat(half8);
+char8 __ovld __cnfn convert_char8_sat_rte(half8);
+char8 __ovld __cnfn convert_char8_sat_rtp(half8);
+char8 __ovld __cnfn convert_char8_sat_rtn(half8);
+char8 __ovld __cnfn convert_char8_sat_rtz(half8);
+char16 __ovld __cnfn convert_char16(half16);
+char16 __ovld __cnfn convert_char16_rte(half16);
+char16 __ovld __cnfn convert_char16_rtp(half16);
+char16 __ovld __cnfn convert_char16_rtn(half16);
+char16 __ovld __cnfn convert_char16_rtz(half16);
+char16 __ovld __cnfn convert_char16_sat(half16);
+char16 __ovld __cnfn convert_char16_sat_rte(half16);
+char16 __ovld __cnfn convert_char16_sat_rtp(half16);
+char16 __ovld __cnfn convert_char16_sat_rtn(half16);
+char16 __ovld __cnfn convert_char16_sat_rtz(half16);
+short __ovld __cnfn convert_short(half);
+short __ovld __cnfn convert_short_rte(half);
+short __ovld __cnfn convert_short_rtp(half);
+short __ovld __cnfn convert_short_rtn(half);
+short __ovld __cnfn convert_short_rtz(half);
+short __ovld __cnfn convert_short_sat(half);
+short __ovld __cnfn convert_short_sat_rte(half);
+short __ovld __cnfn convert_short_sat_rtp(half);
+short __ovld __cnfn convert_short_sat_rtn(half);
+short __ovld __cnfn convert_short_sat_rtz(half);
+short2 __ovld __cnfn convert_short2(half2);
+short2 __ovld __cnfn convert_short2_rte(half2);
+short2 __ovld __cnfn convert_short2_rtp(half2);
+short2 __ovld __cnfn convert_short2_rtn(half2);
+short2 __ovld __cnfn convert_short2_rtz(half2);
+short2 __ovld __cnfn convert_short2_sat(half2);
+short2 __ovld __cnfn convert_short2_sat_rte(half2);
+short2 __ovld __cnfn convert_short2_sat_rtp(half2);
+short2 __ovld __cnfn convert_short2_sat_rtn(half2);
+short2 __ovld __cnfn convert_short2_sat_rtz(half2);
+short3 __ovld __cnfn convert_short3(half3);
+short3 __ovld __cnfn convert_short3_rte(half3);
+short3 __ovld __cnfn convert_short3_rtp(half3);
+short3 __ovld __cnfn convert_short3_rtn(half3);
+short3 __ovld __cnfn convert_short3_rtz(half3);
+short3 __ovld __cnfn convert_short3_sat(half3);
+short3 __ovld __cnfn convert_short3_sat_rte(half3);
+short3 __ovld __cnfn convert_short3_sat_rtp(half3);
+short3 __ovld __cnfn convert_short3_sat_rtn(half3);
+short3 __ovld __cnfn convert_short3_sat_rtz(half3);
+short4 __ovld __cnfn convert_short4(half4);
+short4 __ovld __cnfn convert_short4_rte(half4);
+short4 __ovld __cnfn convert_short4_rtp(half4);
+short4 __ovld __cnfn convert_short4_rtn(half4);
+short4 __ovld __cnfn convert_short4_rtz(half4);
+short4 __ovld __cnfn convert_short4_sat(half4);
+short4 __ovld __cnfn convert_short4_sat_rte(half4);
+short4 __ovld __cnfn convert_short4_sat_rtp(half4);
+short4 __ovld __cnfn convert_short4_sat_rtn(half4);
+short4 __ovld __cnfn convert_short4_sat_rtz(half4);
+short8 __ovld __cnfn convert_short8(half8);
+short8 __ovld __cnfn convert_short8_rte(half8);
+short8 __ovld __cnfn convert_short8_rtp(half8);
+short8 __ovld __cnfn convert_short8_rtn(half8);
+short8 __ovld __cnfn convert_short8_rtz(half8);
+short8 __ovld __cnfn convert_short8_sat(half8);
+short8 __ovld __cnfn convert_short8_sat_rte(half8);
+short8 __ovld __cnfn convert_short8_sat_rtp(half8);
+short8 __ovld __cnfn convert_short8_sat_rtn(half8);
+short8 __ovld __cnfn convert_short8_sat_rtz(half8);
+short16 __ovld __cnfn convert_short16(half16);
+short16 __ovld __cnfn convert_short16_rte(half16);
+short16 __ovld __cnfn convert_short16_rtp(half16);
+short16 __ovld __cnfn convert_short16_rtn(half16);
+short16 __ovld __cnfn convert_short16_rtz(half16);
+short16 __ovld __cnfn convert_short16_sat(half16);
+short16 __ovld __cnfn convert_short16_sat_rte(half16);
+short16 __ovld __cnfn convert_short16_sat_rtp(half16);
+short16 __ovld __cnfn convert_short16_sat_rtn(half16);
+short16 __ovld __cnfn convert_short16_sat_rtz(half16);
+int __ovld __cnfn convert_int(half);
+int __ovld __cnfn convert_int_rte(half);
+int __ovld __cnfn convert_int_rtp(half);
+int __ovld __cnfn convert_int_rtn(half);
+int __ovld __cnfn convert_int_rtz(half);
+int __ovld __cnfn convert_int_sat(half);
+int __ovld __cnfn convert_int_sat_rte(half);
+int __ovld __cnfn convert_int_sat_rtp(half);
+int __ovld __cnfn convert_int_sat_rtn(half);
+int __ovld __cnfn convert_int_sat_rtz(half);
+int2 __ovld __cnfn convert_int2(half2);
+int2 __ovld __cnfn convert_int2_rte(half2);
+int2 __ovld __cnfn convert_int2_rtp(half2);
+int2 __ovld __cnfn convert_int2_rtn(half2);
+int2 __ovld __cnfn convert_int2_rtz(half2);
+int2 __ovld __cnfn convert_int2_sat(half2);
+int2 __ovld __cnfn convert_int2_sat_rte(half2);
+int2 __ovld __cnfn convert_int2_sat_rtp(half2);
+int2 __ovld __cnfn convert_int2_sat_rtn(half2);
+int2 __ovld __cnfn convert_int2_sat_rtz(half2);
+int3 __ovld __cnfn convert_int3(half3);
+int3 __ovld __cnfn convert_int3_rte(half3);
+int3 __ovld __cnfn convert_int3_rtp(half3);
+int3 __ovld __cnfn convert_int3_rtn(half3);
+int3 __ovld __cnfn convert_int3_rtz(half3);
+int3 __ovld __cnfn convert_int3_sat(half3);
+int3 __ovld __cnfn convert_int3_sat_rte(half3);
+int3 __ovld __cnfn convert_int3_sat_rtp(half3);
+int3 __ovld __cnfn convert_int3_sat_rtn(half3);
+int3 __ovld __cnfn convert_int3_sat_rtz(half3);
+int4 __ovld __cnfn convert_int4(half4);
+int4 __ovld __cnfn convert_int4_rte(half4);
+int4 __ovld __cnfn convert_int4_rtp(half4);
+int4 __ovld __cnfn convert_int4_rtn(half4);
+int4 __ovld __cnfn convert_int4_rtz(half4);
+int4 __ovld __cnfn convert_int4_sat(half4);
+int4 __ovld __cnfn convert_int4_sat_rte(half4);
+int4 __ovld __cnfn convert_int4_sat_rtp(half4);
+int4 __ovld __cnfn convert_int4_sat_rtn(half4);
+int4 __ovld __cnfn convert_int4_sat_rtz(half4);
+int8 __ovld __cnfn convert_int8(half8);
+int8 __ovld __cnfn convert_int8_rte(half8);
+int8 __ovld __cnfn convert_int8_rtp(half8);
+int8 __ovld __cnfn convert_int8_rtn(half8);
+int8 __ovld __cnfn convert_int8_rtz(half8);
+int8 __ovld __cnfn convert_int8_sat(half8);
+int8 __ovld __cnfn convert_int8_sat_rte(half8);
+int8 __ovld __cnfn convert_int8_sat_rtp(half8);
+int8 __ovld __cnfn convert_int8_sat_rtn(half8);
+int8 __ovld __cnfn convert_int8_sat_rtz(half8);
+int16 __ovld __cnfn convert_int16(half16);
+int16 __ovld __cnfn convert_int16_rte(half16);
+int16 __ovld __cnfn convert_int16_rtp(half16);
+int16 __ovld __cnfn convert_int16_rtn(half16);
+int16 __ovld __cnfn convert_int16_rtz(half16);
+int16 __ovld __cnfn convert_int16_sat(half16);
+int16 __ovld __cnfn convert_int16_sat_rte(half16);
+int16 __ovld __cnfn convert_int16_sat_rtp(half16);
+int16 __ovld __cnfn convert_int16_sat_rtn(half16);
+int16 __ovld __cnfn convert_int16_sat_rtz(half16);
+long __ovld __cnfn convert_long(half);
+long __ovld __cnfn convert_long_rte(half);
+long __ovld __cnfn convert_long_rtp(half);
+long __ovld __cnfn convert_long_rtn(half);
+long __ovld __cnfn convert_long_rtz(half);
+long __ovld __cnfn convert_long_sat(half);
+long __ovld __cnfn convert_long_sat_rte(half);
+long __ovld __cnfn convert_long_sat_rtp(half);
+long __ovld __cnfn convert_long_sat_rtn(half);
+long __ovld __cnfn convert_long_sat_rtz(half);
+long2 __ovld __cnfn convert_long2(half2);
+long2 __ovld __cnfn convert_long2_rte(half2);
+long2 __ovld __cnfn convert_long2_rtp(half2);
+long2 __ovld __cnfn convert_long2_rtn(half2);
+long2 __ovld __cnfn convert_long2_rtz(half2);
+long2 __ovld __cnfn convert_long2_sat(half2);
+long2 __ovld __cnfn convert_long2_sat_rte(half2);
+long2 __ovld __cnfn convert_long2_sat_rtp(half2);
+long2 __ovld __cnfn convert_long2_sat_rtn(half2);
+long2 __ovld __cnfn convert_long2_sat_rtz(half2);
+long3 __ovld __cnfn convert_long3(half3);
+long3 __ovld __cnfn convert_long3_rte(half3);
+long3 __ovld __cnfn convert_long3_rtp(half3);
+long3 __ovld __cnfn convert_long3_rtn(half3);
+long3 __ovld __cnfn convert_long3_rtz(half3);
+long3 __ovld __cnfn convert_long3_sat(half3);
+long3 __ovld __cnfn convert_long3_sat_rte(half3);
+long3 __ovld __cnfn convert_long3_sat_rtp(half3);
+long3 __ovld __cnfn convert_long3_sat_rtn(half3);
+long3 __ovld __cnfn convert_long3_sat_rtz(half3);
+long4 __ovld __cnfn convert_long4(half4);
+long4 __ovld __cnfn convert_long4_rte(half4);
+long4 __ovld __cnfn convert_long4_rtp(half4);
+long4 __ovld __cnfn convert_long4_rtn(half4);
+long4 __ovld __cnfn convert_long4_rtz(half4);
+long4 __ovld __cnfn convert_long4_sat(half4);
+long4 __ovld __cnfn convert_long4_sat_rte(half4);
+long4 __ovld __cnfn convert_long4_sat_rtp(half4);
+long4 __ovld __cnfn convert_long4_sat_rtn(half4);
+long4 __ovld __cnfn convert_long4_sat_rtz(half4);
+long8 __ovld __cnfn convert_long8(half8);
+long8 __ovld __cnfn convert_long8_rte(half8);
+long8 __ovld __cnfn convert_long8_rtp(half8);
+long8 __ovld __cnfn convert_long8_rtn(half8);
+long8 __ovld __cnfn convert_long8_rtz(half8);
+long8 __ovld __cnfn convert_long8_sat(half8);
+long8 __ovld __cnfn convert_long8_sat_rte(half8);
+long8 __ovld __cnfn convert_long8_sat_rtp(half8);
+long8 __ovld __cnfn convert_long8_sat_rtn(half8);
+long8 __ovld __cnfn convert_long8_sat_rtz(half8);
+long16 __ovld __cnfn convert_long16(half16);
+long16 __ovld __cnfn convert_long16_rte(half16);
+long16 __ovld __cnfn convert_long16_rtp(half16);
+long16 __ovld __cnfn convert_long16_rtn(half16);
+long16 __ovld __cnfn convert_long16_rtz(half16);
+long16 __ovld __cnfn convert_long16_sat(half16);
+long16 __ovld __cnfn convert_long16_sat_rte(half16);
+long16 __ovld __cnfn convert_long16_sat_rtp(half16);
+long16 __ovld __cnfn convert_long16_sat_rtn(half16);
+long16 __ovld __cnfn convert_long16_sat_rtz(half16);
+float __ovld __cnfn convert_float(half);
+float __ovld __cnfn convert_float_rte(half);
+float __ovld __cnfn convert_float_rtp(half);
+float __ovld __cnfn convert_float_rtn(half);
+float __ovld __cnfn convert_float_rtz(half);
+float2 __ovld __cnfn convert_float2(half2);
+float2 __ovld __cnfn convert_float2_rte(half2);
+float2 __ovld __cnfn convert_float2_rtp(half2);
+float2 __ovld __cnfn convert_float2_rtn(half2);
+float2 __ovld __cnfn convert_float2_rtz(half2);
+float3 __ovld __cnfn convert_float3(half3);
+float3 __ovld __cnfn convert_float3_rte(half3);
+float3 __ovld __cnfn convert_float3_rtp(half3);
+float3 __ovld __cnfn convert_float3_rtn(half3);
+float3 __ovld __cnfn convert_float3_rtz(half3);
+float4 __ovld __cnfn convert_float4(half4);
+float4 __ovld __cnfn convert_float4_rte(half4);
+float4 __ovld __cnfn convert_float4_rtp(half4);
+float4 __ovld __cnfn convert_float4_rtn(half4);
+float4 __ovld __cnfn convert_float4_rtz(half4);
+float8 __ovld __cnfn convert_float8(half8);
+float8 __ovld __cnfn convert_float8_rte(half8);
+float8 __ovld __cnfn convert_float8_rtp(half8);
+float8 __ovld __cnfn convert_float8_rtn(half8);
+float8 __ovld __cnfn convert_float8_rtz(half8);
+float16 __ovld __cnfn convert_float16(half16);
+float16 __ovld __cnfn convert_float16_rte(half16);
+float16 __ovld __cnfn convert_float16_rtp(half16);
+float16 __ovld __cnfn convert_float16_rtn(half16);
+float16 __ovld __cnfn convert_float16_rtz(half16);
+
+// Convert non-double types to half types.
+half __ovld __cnfn convert_half(uchar);
+half __ovld __cnfn convert_half(ushort);
+half __ovld __cnfn convert_half(uint);
+half __ovld __cnfn convert_half(ulong);
+half __ovld __cnfn convert_half(char);
+half __ovld __cnfn convert_half(short);
+half __ovld __cnfn convert_half(int);
+half __ovld __cnfn convert_half(long);
+half __ovld __cnfn convert_half(float);
+half __ovld __cnfn convert_half(half);
+half __ovld __cnfn convert_half_rte(uchar);
+half __ovld __cnfn convert_half_rte(ushort);
+half __ovld __cnfn convert_half_rte(uint);
+half __ovld __cnfn convert_half_rte(ulong);
+half __ovld __cnfn convert_half_rte(char);
+half __ovld __cnfn convert_half_rte(short);
+half __ovld __cnfn convert_half_rte(int);
+half __ovld __cnfn convert_half_rte(long);
+half __ovld __cnfn convert_half_rte(float);
+half __ovld __cnfn convert_half_rte(half);
+half __ovld __cnfn convert_half_rtp(uchar);
+half __ovld __cnfn convert_half_rtp(ushort);
+half __ovld __cnfn convert_half_rtp(uint);
+half __ovld __cnfn convert_half_rtp(ulong);
+half __ovld __cnfn convert_half_rtp(char);
+half __ovld __cnfn convert_half_rtp(short);
+half __ovld __cnfn convert_half_rtp(int);
+half __ovld __cnfn convert_half_rtp(long);
+half __ovld __cnfn convert_half_rtp(float);
+half __ovld __cnfn convert_half_rtp(half);
+half __ovld __cnfn convert_half_rtn(uchar);
+half __ovld __cnfn convert_half_rtn(ushort);
+half __ovld __cnfn convert_half_rtn(uint);
+half __ovld __cnfn convert_half_rtn(ulong);
+half __ovld __cnfn convert_half_rtn(char);
+half __ovld __cnfn convert_half_rtn(short);
+half __ovld __cnfn convert_half_rtn(int);
+half __ovld __cnfn convert_half_rtn(long);
+half __ovld __cnfn convert_half_rtn(float);
+half __ovld __cnfn convert_half_rtn(half);
+half __ovld __cnfn convert_half_rtz(uchar);
+half __ovld __cnfn convert_half_rtz(ushort);
+half __ovld __cnfn convert_half_rtz(uint);
+half __ovld __cnfn convert_half_rtz(ulong);
+half __ovld __cnfn convert_half_rtz(char);
+half __ovld __cnfn convert_half_rtz(short);
+half __ovld __cnfn convert_half_rtz(int);
+half __ovld __cnfn convert_half_rtz(long);
+half __ovld __cnfn convert_half_rtz(float);
+half __ovld __cnfn convert_half_rtz(half);
+half2 __ovld __cnfn convert_half2(char2);
+half2 __ovld __cnfn convert_half2(uchar2);
+half2 __ovld __cnfn convert_half2(short2);
+half2 __ovld __cnfn convert_half2(ushort2);
+half2 __ovld __cnfn convert_half2(int2);
+half2 __ovld __cnfn convert_half2(uint2);
+half2 __ovld __cnfn convert_half2(long2);
+half2 __ovld __cnfn convert_half2(ulong2);
+half2 __ovld __cnfn convert_half2(float2);
+half2 __ovld __cnfn convert_half2(half2);
+half2 __ovld __cnfn convert_half2_rte(char2);
+half2 __ovld __cnfn convert_half2_rte(uchar2);
+half2 __ovld __cnfn convert_half2_rte(short2);
+half2 __ovld __cnfn convert_half2_rte(ushort2);
+half2 __ovld __cnfn convert_half2_rte(int2);
+half2 __ovld __cnfn convert_half2_rte(uint2);
+half2 __ovld __cnfn convert_half2_rte(long2);
+half2 __ovld __cnfn convert_half2_rte(ulong2);
+half2 __ovld __cnfn convert_half2_rte(float2);
+half2 __ovld __cnfn convert_half2_rte(half2);
+half2 __ovld __cnfn convert_half2_rtp(char2);
+half2 __ovld __cnfn convert_half2_rtp(uchar2);
+half2 __ovld __cnfn convert_half2_rtp(short2);
+half2 __ovld __cnfn convert_half2_rtp(ushort2);
+half2 __ovld __cnfn convert_half2_rtp(int2);
+half2 __ovld __cnfn convert_half2_rtp(uint2);
+half2 __ovld __cnfn convert_half2_rtp(long2);
+half2 __ovld __cnfn convert_half2_rtp(ulong2);
+half2 __ovld __cnfn convert_half2_rtp(float2);
+half2 __ovld __cnfn convert_half2_rtp(half2);
+half2 __ovld __cnfn convert_half2_rtn(char2);
+half2 __ovld __cnfn convert_half2_rtn(uchar2);
+half2 __ovld __cnfn convert_half2_rtn(short2);
+half2 __ovld __cnfn convert_half2_rtn(ushort2);
+half2 __ovld __cnfn convert_half2_rtn(int2);
+half2 __ovld __cnfn convert_half2_rtn(uint2);
+half2 __ovld __cnfn convert_half2_rtn(long2);
+half2 __ovld __cnfn convert_half2_rtn(ulong2);
+half2 __ovld __cnfn convert_half2_rtn(float2);
+half2 __ovld __cnfn convert_half2_rtn(half2);
+half2 __ovld __cnfn convert_half2_rtz(char2);
+half2 __ovld __cnfn convert_half2_rtz(uchar2);
+half2 __ovld __cnfn convert_half2_rtz(short2);
+half2 __ovld __cnfn convert_half2_rtz(ushort2);
+half2 __ovld __cnfn convert_half2_rtz(int2);
+half2 __ovld __cnfn convert_half2_rtz(uint2);
+half2 __ovld __cnfn convert_half2_rtz(long2);
+half2 __ovld __cnfn convert_half2_rtz(ulong2);
+half2 __ovld __cnfn convert_half2_rtz(float2);
+half2 __ovld __cnfn convert_half2_rtz(half2);
+half3 __ovld __cnfn convert_half3(char3);
+half3 __ovld __cnfn convert_half3(uchar3);
+half3 __ovld __cnfn convert_half3(short3);
+half3 __ovld __cnfn convert_half3(ushort3);
+half3 __ovld __cnfn convert_half3(int3);
+half3 __ovld __cnfn convert_half3(uint3);
+half3 __ovld __cnfn convert_half3(long3);
+half3 __ovld __cnfn convert_half3(ulong3);
+half3 __ovld __cnfn convert_half3(float3);
+half3 __ovld __cnfn convert_half3(half3);
+half3 __ovld __cnfn convert_half3_rte(char3);
+half3 __ovld __cnfn convert_half3_rte(uchar3);
+half3 __ovld __cnfn convert_half3_rte(short3);
+half3 __ovld __cnfn convert_half3_rte(ushort3);
+half3 __ovld __cnfn convert_half3_rte(int3);
+half3 __ovld __cnfn convert_half3_rte(uint3);
+half3 __ovld __cnfn convert_half3_rte(long3);
+half3 __ovld __cnfn convert_half3_rte(ulong3);
+half3 __ovld __cnfn convert_half3_rte(float3);
+half3 __ovld __cnfn convert_half3_rte(half3);
+half3 __ovld __cnfn convert_half3_rtp(char3);
+half3 __ovld __cnfn convert_half3_rtp(uchar3);
+half3 __ovld __cnfn convert_half3_rtp(short3);
+half3 __ovld __cnfn convert_half3_rtp(ushort3);
+half3 __ovld __cnfn convert_half3_rtp(int3);
+half3 __ovld __cnfn convert_half3_rtp(uint3);
+half3 __ovld __cnfn convert_half3_rtp(long3);
+half3 __ovld __cnfn convert_half3_rtp(ulong3);
+half3 __ovld __cnfn convert_half3_rtp(float3);
+half3 __ovld __cnfn convert_half3_rtp(half3);
+half3 __ovld __cnfn convert_half3_rtn(char3);
+half3 __ovld __cnfn convert_half3_rtn(uchar3);
+half3 __ovld __cnfn convert_half3_rtn(short3);
+half3 __ovld __cnfn convert_half3_rtn(ushort3);
+half3 __ovld __cnfn convert_half3_rtn(int3);
+half3 __ovld __cnfn convert_half3_rtn(uint3);
+half3 __ovld __cnfn convert_half3_rtn(long3);
+half3 __ovld __cnfn convert_half3_rtn(ulong3);
+half3 __ovld __cnfn convert_half3_rtn(float3);
+half3 __ovld __cnfn convert_half3_rtn(half3);
+half3 __ovld __cnfn convert_half3_rtz(char3);
+half3 __ovld __cnfn convert_half3_rtz(uchar3);
+half3 __ovld __cnfn convert_half3_rtz(short3);
+half3 __ovld __cnfn convert_half3_rtz(ushort3);
+half3 __ovld __cnfn convert_half3_rtz(int3);
+half3 __ovld __cnfn convert_half3_rtz(uint3);
+half3 __ovld __cnfn convert_half3_rtz(long3);
+half3 __ovld __cnfn convert_half3_rtz(ulong3);
+half3 __ovld __cnfn convert_half3_rtz(float3);
+half3 __ovld __cnfn convert_half3_rtz(half3);
+half4 __ovld __cnfn convert_half4(char4);
+half4 __ovld __cnfn convert_half4(uchar4);
+half4 __ovld __cnfn convert_half4(short4);
+half4 __ovld __cnfn convert_half4(ushort4);
+half4 __ovld __cnfn convert_half4(int4);
+half4 __ovld __cnfn convert_half4(uint4);
+half4 __ovld __cnfn convert_half4(long4);
+half4 __ovld __cnfn convert_half4(ulong4);
+half4 __ovld __cnfn convert_half4(float4);
+half4 __ovld __cnfn convert_half4(half4);
+half4 __ovld __cnfn convert_half4_rte(char4);
+half4 __ovld __cnfn convert_half4_rte(uchar4);
+half4 __ovld __cnfn convert_half4_rte(short4);
+half4 __ovld __cnfn convert_half4_rte(ushort4);
+half4 __ovld __cnfn convert_half4_rte(int4);
+half4 __ovld __cnfn convert_half4_rte(uint4);
+half4 __ovld __cnfn convert_half4_rte(long4);
+half4 __ovld __cnfn convert_half4_rte(ulong4);
+half4 __ovld __cnfn convert_half4_rte(float4);
+half4 __ovld __cnfn convert_half4_rte(half4);
+half4 __ovld __cnfn convert_half4_rtp(char4);
+half4 __ovld __cnfn convert_half4_rtp(uchar4);
+half4 __ovld __cnfn convert_half4_rtp(short4);
+half4 __ovld __cnfn convert_half4_rtp(ushort4);
+half4 __ovld __cnfn convert_half4_rtp(int4);
+half4 __ovld __cnfn convert_half4_rtp(uint4);
+half4 __ovld __cnfn convert_half4_rtp(long4);
+half4 __ovld __cnfn convert_half4_rtp(ulong4);
+half4 __ovld __cnfn convert_half4_rtp(float4);
+half4 __ovld __cnfn convert_half4_rtp(half4);
+half4 __ovld __cnfn convert_half4_rtn(char4);
+half4 __ovld __cnfn convert_half4_rtn(uchar4);
+half4 __ovld __cnfn convert_half4_rtn(short4);
+half4 __ovld __cnfn convert_half4_rtn(ushort4);
+half4 __ovld __cnfn convert_half4_rtn(int4);
+half4 __ovld __cnfn convert_half4_rtn(uint4);
+half4 __ovld __cnfn convert_half4_rtn(long4);
+half4 __ovld __cnfn convert_half4_rtn(ulong4);
+half4 __ovld __cnfn convert_half4_rtn(float4);
+half4 __ovld __cnfn convert_half4_rtn(half4);
+half4 __ovld __cnfn convert_half4_rtz(char4);
+half4 __ovld __cnfn convert_half4_rtz(uchar4);
+half4 __ovld __cnfn convert_half4_rtz(short4);
+half4 __ovld __cnfn convert_half4_rtz(ushort4);
+half4 __ovld __cnfn convert_half4_rtz(int4);
+half4 __ovld __cnfn convert_half4_rtz(uint4);
+half4 __ovld __cnfn convert_half4_rtz(long4);
+half4 __ovld __cnfn convert_half4_rtz(ulong4);
+half4 __ovld __cnfn convert_half4_rtz(float4);
+half4 __ovld __cnfn convert_half4_rtz(half4);
+half8 __ovld __cnfn convert_half8(char8);
+half8 __ovld __cnfn convert_half8(uchar8);
+half8 __ovld __cnfn convert_half8(short8);
+half8 __ovld __cnfn convert_half8(ushort8);
+half8 __ovld __cnfn convert_half8(int8);
+half8 __ovld __cnfn convert_half8(uint8);
+half8 __ovld __cnfn convert_half8(long8);
+half8 __ovld __cnfn convert_half8(ulong8);
+half8 __ovld __cnfn convert_half8(float8);
+half8 __ovld __cnfn convert_half8(half8);
+half8 __ovld __cnfn convert_half8_rte(char8);
+half8 __ovld __cnfn convert_half8_rte(uchar8);
+half8 __ovld __cnfn convert_half8_rte(short8);
+half8 __ovld __cnfn convert_half8_rte(ushort8);
+half8 __ovld __cnfn convert_half8_rte(int8);
+half8 __ovld __cnfn convert_half8_rte(uint8);
+half8 __ovld __cnfn convert_half8_rte(long8);
+half8 __ovld __cnfn convert_half8_rte(ulong8);
+half8 __ovld __cnfn convert_half8_rte(float8);
+half8 __ovld __cnfn convert_half8_rte(half8);
+half8 __ovld __cnfn convert_half8_rtp(char8);
+half8 __ovld __cnfn convert_half8_rtp(uchar8);
+half8 __ovld __cnfn convert_half8_rtp(short8);
+half8 __ovld __cnfn convert_half8_rtp(ushort8);
+half8 __ovld __cnfn convert_half8_rtp(int8);
+half8 __ovld __cnfn convert_half8_rtp(uint8);
+half8 __ovld __cnfn convert_half8_rtp(long8);
+half8 __ovld __cnfn convert_half8_rtp(ulong8);
+half8 __ovld __cnfn convert_half8_rtp(float8);
+half8 __ovld __cnfn convert_half8_rtp(half8);
+half8 __ovld __cnfn convert_half8_rtn(char8);
+half8 __ovld __cnfn convert_half8_rtn(uchar8);
+half8 __ovld __cnfn convert_half8_rtn(short8);
+half8 __ovld __cnfn convert_half8_rtn(ushort8);
+half8 __ovld __cnfn convert_half8_rtn(int8);
+half8 __ovld __cnfn convert_half8_rtn(uint8);
+half8 __ovld __cnfn convert_half8_rtn(long8);
+half8 __ovld __cnfn convert_half8_rtn(ulong8);
+half8 __ovld __cnfn convert_half8_rtn(float8);
+half8 __ovld __cnfn convert_half8_rtn(half8);
+half8 __ovld __cnfn convert_half8_rtz(char8);
+half8 __ovld __cnfn convert_half8_rtz(uchar8);
+half8 __ovld __cnfn convert_half8_rtz(short8);
+half8 __ovld __cnfn convert_half8_rtz(ushort8);
+half8 __ovld __cnfn convert_half8_rtz(int8);
+half8 __ovld __cnfn convert_half8_rtz(uint8);
+half8 __ovld __cnfn convert_half8_rtz(long8);
+half8 __ovld __cnfn convert_half8_rtz(ulong8);
+half8 __ovld __cnfn convert_half8_rtz(float8);
+half8 __ovld __cnfn convert_half8_rtz(half8);
+half16 __ovld __cnfn convert_half16(char16);
+half16 __ovld __cnfn convert_half16(uchar16);
+half16 __ovld __cnfn convert_half16(short16);
+half16 __ovld __cnfn convert_half16(ushort16);
+half16 __ovld __cnfn convert_half16(int16);
+half16 __ovld __cnfn convert_half16(uint16);
+half16 __ovld __cnfn convert_half16(long16);
+half16 __ovld __cnfn convert_half16(ulong16);
+half16 __ovld __cnfn convert_half16(float16);
+half16 __ovld __cnfn convert_half16(half16);
+half16 __ovld __cnfn convert_half16_rte(char16);
+half16 __ovld __cnfn convert_half16_rte(uchar16);
+half16 __ovld __cnfn convert_half16_rte(short16);
+half16 __ovld __cnfn convert_half16_rte(ushort16);
+half16 __ovld __cnfn convert_half16_rte(int16);
+half16 __ovld __cnfn convert_half16_rte(uint16);
+half16 __ovld __cnfn convert_half16_rte(long16);
+half16 __ovld __cnfn convert_half16_rte(ulong16);
+half16 __ovld __cnfn convert_half16_rte(float16);
+half16 __ovld __cnfn convert_half16_rte(half16);
+half16 __ovld __cnfn convert_half16_rtp(char16);
+half16 __ovld __cnfn convert_half16_rtp(uchar16);
+half16 __ovld __cnfn convert_half16_rtp(short16);
+half16 __ovld __cnfn convert_half16_rtp(ushort16);
+half16 __ovld __cnfn convert_half16_rtp(int16);
+half16 __ovld __cnfn convert_half16_rtp(uint16);
+half16 __ovld __cnfn convert_half16_rtp(long16);
+half16 __ovld __cnfn convert_half16_rtp(ulong16);
+half16 __ovld __cnfn convert_half16_rtp(float16);
+half16 __ovld __cnfn convert_half16_rtp(half16);
+half16 __ovld __cnfn convert_half16_rtn(char16);
+half16 __ovld __cnfn convert_half16_rtn(uchar16);
+half16 __ovld __cnfn convert_half16_rtn(short16);
+half16 __ovld __cnfn convert_half16_rtn(ushort16);
+half16 __ovld __cnfn convert_half16_rtn(int16);
+half16 __ovld __cnfn convert_half16_rtn(uint16);
+half16 __ovld __cnfn convert_half16_rtn(long16);
+half16 __ovld __cnfn convert_half16_rtn(ulong16);
+half16 __ovld __cnfn convert_half16_rtn(float16);
+half16 __ovld __cnfn convert_half16_rtn(half16);
+half16 __ovld __cnfn convert_half16_rtz(char16);
+half16 __ovld __cnfn convert_half16_rtz(uchar16);
+half16 __ovld __cnfn convert_half16_rtz(short16);
+half16 __ovld __cnfn convert_half16_rtz(ushort16);
+half16 __ovld __cnfn convert_half16_rtz(int16);
+half16 __ovld __cnfn convert_half16_rtz(uint16);
+half16 __ovld __cnfn convert_half16_rtz(long16);
+half16 __ovld __cnfn convert_half16_rtz(ulong16);
+half16 __ovld __cnfn convert_half16_rtz(float16);
+half16 __ovld __cnfn convert_half16_rtz(half16);
+
+// Convert half types to double types.
+#ifdef cl_khr_fp64
+double __ovld __cnfn convert_double(half);
+double __ovld __cnfn convert_double_rte(half);
+double __ovld __cnfn convert_double_rtp(half);
+double __ovld __cnfn convert_double_rtn(half);
+double __ovld __cnfn convert_double_rtz(half);
+double2 __ovld __cnfn convert_double2(half2);
+double2 __ovld __cnfn convert_double2_rte(half2);
+double2 __ovld __cnfn convert_double2_rtp(half2);
+double2 __ovld __cnfn convert_double2_rtn(half2);
+double2 __ovld __cnfn convert_double2_rtz(half2);
+double3 __ovld __cnfn convert_double3(half3);
+double3 __ovld __cnfn convert_double3_rte(half3);
+double3 __ovld __cnfn convert_double3_rtp(half3);
+double3 __ovld __cnfn convert_double3_rtn(half3);
+double3 __ovld __cnfn convert_double3_rtz(half3);
+double4 __ovld __cnfn convert_double4(half4);
+double4 __ovld __cnfn convert_double4_rte(half4);
+double4 __ovld __cnfn convert_double4_rtp(half4);
+double4 __ovld __cnfn convert_double4_rtn(half4);
+double4 __ovld __cnfn convert_double4_rtz(half4);
+double8 __ovld __cnfn convert_double8(half8);
+double8 __ovld __cnfn convert_double8_rte(half8);
+double8 __ovld __cnfn convert_double8_rtp(half8);
+double8 __ovld __cnfn convert_double8_rtn(half8);
+double8 __ovld __cnfn convert_double8_rtz(half8);
+double16 __ovld __cnfn convert_double16(half16);
+double16 __ovld __cnfn convert_double16_rte(half16);
+double16 __ovld __cnfn convert_double16_rtp(half16);
+double16 __ovld __cnfn convert_double16_rtn(half16);
+double16 __ovld __cnfn convert_double16_rtz(half16);
+
+// Convert double types to half types.
+half __ovld __cnfn convert_half(double);
+half __ovld __cnfn convert_half_rte(double);
+half __ovld __cnfn convert_half_rtp(double);
+half __ovld __cnfn convert_half_rtn(double);
+half __ovld __cnfn convert_half_rtz(double);
+half2 __ovld __cnfn convert_half2(double2);
+half2 __ovld __cnfn convert_half2_rte(double2);
+half2 __ovld __cnfn convert_half2_rtp(double2);
+half2 __ovld __cnfn convert_half2_rtn(double2);
+half2 __ovld __cnfn convert_half2_rtz(double2);
+half3 __ovld __cnfn convert_half3(double3);
+half3 __ovld __cnfn convert_half3_rte(double3);
+half3 __ovld __cnfn convert_half3_rtp(double3);
+half3 __ovld __cnfn convert_half3_rtn(double3);
+half3 __ovld __cnfn convert_half3_rtz(double3);
+half4 __ovld __cnfn convert_half4(double4);
+half4 __ovld __cnfn convert_half4_rte(double4);
+half4 __ovld __cnfn convert_half4_rtp(double4);
+half4 __ovld __cnfn convert_half4_rtn(double4);
+half4 __ovld __cnfn convert_half4_rtz(double4);
+half8 __ovld __cnfn convert_half8(double8);
+half8 __ovld __cnfn convert_half8_rte(double8);
+half8 __ovld __cnfn convert_half8_rtp(double8);
+half8 __ovld __cnfn convert_half8_rtn(double8);
+half8 __ovld __cnfn convert_half8_rtz(double8);
+half16 __ovld __cnfn convert_half16(double16);
+half16 __ovld __cnfn convert_half16_rte(double16);
+half16 __ovld __cnfn convert_half16_rtp(double16);
+half16 __ovld __cnfn convert_half16_rtn(double16);
+half16 __ovld __cnfn convert_half16_rtz(double16);
+#endif //cl_khr_fp64
+
+#endif // cl_khr_fp16
+
+/**
+ * OpenCL v1.1/1.2/2.0 s6.2.4.2 - as_type operators
+ * Reinterprets a data type as another data type of the same size
+ */
+char __ovld __cnfn as_char(char);
+char __ovld __cnfn as_char(uchar);
+
+char2 __ovld __cnfn as_char2(char2);
+char2 __ovld __cnfn as_char2(uchar2);
+char2 __ovld __cnfn as_char2(short);
+char2 __ovld __cnfn as_char2(ushort);
+
+char3 __ovld __cnfn as_char3(char3);
+char3 __ovld __cnfn as_char3(char4);
+char3 __ovld __cnfn as_char3(uchar3);
+char3 __ovld __cnfn as_char3(uchar4);
+char3 __ovld __cnfn as_char3(short2);
+char3 __ovld __cnfn as_char3(ushort2);
+char3 __ovld __cnfn as_char3(int);
+char3 __ovld __cnfn as_char3(uint);
+char3 __ovld __cnfn as_char3(float);
+
+char4 __ovld __cnfn as_char4(char3);
+char4 __ovld __cnfn as_char4(char4);
+char4 __ovld __cnfn as_char4(uchar3);
+char4 __ovld __cnfn as_char4(uchar4);
+char4 __ovld __cnfn as_char4(short2);
+char4 __ovld __cnfn as_char4(ushort2);
+char4 __ovld __cnfn as_char4(int);
+char4 __ovld __cnfn as_char4(uint);
+char4 __ovld __cnfn as_char4(float);
+
+char8 __ovld __cnfn as_char8(char8);
+char8 __ovld __cnfn as_char8(uchar8);
+char8 __ovld __cnfn as_char8(short3);
+char8 __ovld __cnfn as_char8(short4);
+char8 __ovld __cnfn as_char8(ushort3);
+char8 __ovld __cnfn as_char8(ushort4);
+char8 __ovld __cnfn as_char8(int2);
+char8 __ovld __cnfn as_char8(uint2);
+char8 __ovld __cnfn as_char8(long);
+char8 __ovld __cnfn as_char8(ulong);
+char8 __ovld __cnfn as_char8(float2);
+
+char16 __ovld __cnfn as_char16(char16);
+char16 __ovld __cnfn as_char16(uchar16);
+char16 __ovld __cnfn as_char16(short8);
+char16 __ovld __cnfn as_char16(ushort8);
+char16 __ovld __cnfn as_char16(int3);
+char16 __ovld __cnfn as_char16(int4);
+char16 __ovld __cnfn as_char16(uint3);
+char16 __ovld __cnfn as_char16(uint4);
+char16 __ovld __cnfn as_char16(long2);
+char16 __ovld __cnfn as_char16(ulong2);
+char16 __ovld __cnfn as_char16(float3);
+char16 __ovld __cnfn as_char16(float4);
+
+uchar __ovld __cnfn as_uchar(char);
+uchar __ovld __cnfn as_uchar(uchar);
+
+uchar2 __ovld __cnfn as_uchar2(char2);
+uchar2 __ovld __cnfn as_uchar2(uchar2);
+uchar2 __ovld __cnfn as_uchar2(short);
+uchar2 __ovld __cnfn as_uchar2(ushort);
+
+uchar3 __ovld __cnfn as_uchar3(char3);
+uchar3 __ovld __cnfn as_uchar3(char4);
+uchar3 __ovld __cnfn as_uchar3(uchar3);
+uchar3 __ovld __cnfn as_uchar3(uchar4);
+uchar3 __ovld __cnfn as_uchar3(short2);
+uchar3 __ovld __cnfn as_uchar3(ushort2);
+uchar3 __ovld __cnfn as_uchar3(int);
+uchar3 __ovld __cnfn as_uchar3(uint);
+uchar3 __ovld __cnfn as_uchar3(float);
+
+uchar4 __ovld __cnfn as_uchar4(char3);
+uchar4 __ovld __cnfn as_uchar4(char4);
+uchar4 __ovld __cnfn as_uchar4(uchar3);
+uchar4 __ovld __cnfn as_uchar4(uchar4);
+uchar4 __ovld __cnfn as_uchar4(short2);
+uchar4 __ovld __cnfn as_uchar4(ushort2);
+uchar4 __ovld __cnfn as_uchar4(int);
+uchar4 __ovld __cnfn as_uchar4(uint);
+uchar4 __ovld __cnfn as_uchar4(float);
+
+uchar8 __ovld __cnfn as_uchar8(char8);
+uchar8 __ovld __cnfn as_uchar8(uchar8);
+uchar8 __ovld __cnfn as_uchar8(short3);
+uchar8 __ovld __cnfn as_uchar8(short4);
+uchar8 __ovld __cnfn as_uchar8(ushort3);
+uchar8 __ovld __cnfn as_uchar8(ushort4);
+uchar8 __ovld __cnfn as_uchar8(int2);
+uchar8 __ovld __cnfn as_uchar8(uint2);
+uchar8 __ovld __cnfn as_uchar8(long);
+uchar8 __ovld __cnfn as_uchar8(ulong);
+uchar8 __ovld __cnfn as_uchar8(float2);
+
+uchar16 __ovld __cnfn as_uchar16(char16);
+uchar16 __ovld __cnfn as_uchar16(uchar16);
+uchar16 __ovld __cnfn as_uchar16(short8);
+uchar16 __ovld __cnfn as_uchar16(ushort8);
+uchar16 __ovld __cnfn as_uchar16(int3);
+uchar16 __ovld __cnfn as_uchar16(int4);
+uchar16 __ovld __cnfn as_uchar16(uint3);
+uchar16 __ovld __cnfn as_uchar16(uint4);
+uchar16 __ovld __cnfn as_uchar16(long2);
+uchar16 __ovld __cnfn as_uchar16(ulong2);
+uchar16 __ovld __cnfn as_uchar16(float3);
+uchar16 __ovld __cnfn as_uchar16(float4);
+
+short __ovld __cnfn as_short(char2);
+short __ovld __cnfn as_short(uchar2);
+short __ovld __cnfn as_short(short);
+short __ovld __cnfn as_short(ushort);
+
+short2 __ovld __cnfn as_short2(char3);
+short2 __ovld __cnfn as_short2(char4);
+short2 __ovld __cnfn as_short2(uchar3);
+short2 __ovld __cnfn as_short2(uchar4);
+short2 __ovld __cnfn as_short2(short2);
+short2 __ovld __cnfn as_short2(ushort2);
+short2 __ovld __cnfn as_short2(int);
+short2 __ovld __cnfn as_short2(uint);
+short2 __ovld __cnfn as_short2(float);
+
+short3 __ovld __cnfn as_short3(char8);
+short3 __ovld __cnfn as_short3(uchar8);
+short3 __ovld __cnfn as_short3(short3);
+short3 __ovld __cnfn as_short3(short4);
+short3 __ovld __cnfn as_short3(ushort3);
+short3 __ovld __cnfn as_short3(ushort4);
+short3 __ovld __cnfn as_short3(int2);
+short3 __ovld __cnfn as_short3(uint2);
+short3 __ovld __cnfn as_short3(long);
+short3 __ovld __cnfn as_short3(ulong);
+short3 __ovld __cnfn as_short3(float2);
+
+short4 __ovld __cnfn as_short4(char8);
+short4 __ovld __cnfn as_short4(uchar8);
+short4 __ovld __cnfn as_short4(short3);
+short4 __ovld __cnfn as_short4(short4);
+short4 __ovld __cnfn as_short4(ushort3);
+short4 __ovld __cnfn as_short4(ushort4);
+short4 __ovld __cnfn as_short4(int2);
+short4 __ovld __cnfn as_short4(uint2);
+short4 __ovld __cnfn as_short4(long);
+short4 __ovld __cnfn as_short4(ulong);
+short4 __ovld __cnfn as_short4(float2);
+
+short8 __ovld __cnfn as_short8(char16);
+short8 __ovld __cnfn as_short8(uchar16);
+short8 __ovld __cnfn as_short8(short8);
+short8 __ovld __cnfn as_short8(ushort8);
+short8 __ovld __cnfn as_short8(int3);
+short8 __ovld __cnfn as_short8(int4);
+short8 __ovld __cnfn as_short8(uint3);
+short8 __ovld __cnfn as_short8(uint4);
+short8 __ovld __cnfn as_short8(long2);
+short8 __ovld __cnfn as_short8(ulong2);
+short8 __ovld __cnfn as_short8(float3);
+short8 __ovld __cnfn as_short8(float4);
+
+short16 __ovld __cnfn as_short16(short16);
+short16 __ovld __cnfn as_short16(ushort16);
+short16 __ovld __cnfn as_short16(int8);
+short16 __ovld __cnfn as_short16(uint8);
+short16 __ovld __cnfn as_short16(long3);
+short16 __ovld __cnfn as_short16(long4);
+short16 __ovld __cnfn as_short16(ulong3);
+short16 __ovld __cnfn as_short16(ulong4);
+short16 __ovld __cnfn as_short16(float8);
+
+ushort __ovld __cnfn as_ushort(char2);
+ushort __ovld __cnfn as_ushort(uchar2);
+ushort __ovld __cnfn as_ushort(short);
+ushort __ovld __cnfn as_ushort(ushort);
+
+ushort2 __ovld __cnfn as_ushort2(char3);
+ushort2 __ovld __cnfn as_ushort2(char4);
+ushort2 __ovld __cnfn as_ushort2(uchar3);
+ushort2 __ovld __cnfn as_ushort2(uchar4);
+ushort2 __ovld __cnfn as_ushort2(short2);
+ushort2 __ovld __cnfn as_ushort2(ushort2);
+ushort2 __ovld __cnfn as_ushort2(int);
+ushort2 __ovld __cnfn as_ushort2(uint);
+ushort2 __ovld __cnfn as_ushort2(float);
+
+ushort3 __ovld __cnfn as_ushort3(char8);
+ushort3 __ovld __cnfn as_ushort3(uchar8);
+ushort3 __ovld __cnfn as_ushort3(short3);
+ushort3 __ovld __cnfn as_ushort3(short4);
+ushort3 __ovld __cnfn as_ushort3(ushort3);
+ushort3 __ovld __cnfn as_ushort3(ushort4);
+ushort3 __ovld __cnfn as_ushort3(int2);
+ushort3 __ovld __cnfn as_ushort3(uint2);
+ushort3 __ovld __cnfn as_ushort3(long);
+ushort3 __ovld __cnfn as_ushort3(ulong);
+ushort3 __ovld __cnfn as_ushort3(float2);
+
+ushort4 __ovld __cnfn as_ushort4(char8);
+ushort4 __ovld __cnfn as_ushort4(uchar8);
+ushort4 __ovld __cnfn as_ushort4(short3);
+ushort4 __ovld __cnfn as_ushort4(short4);
+ushort4 __ovld __cnfn as_ushort4(ushort3);
+ushort4 __ovld __cnfn as_ushort4(ushort4);
+ushort4 __ovld __cnfn as_ushort4(int2);
+ushort4 __ovld __cnfn as_ushort4(uint2);
+ushort4 __ovld __cnfn as_ushort4(long);
+ushort4 __ovld __cnfn as_ushort4(ulong);
+ushort4 __ovld __cnfn as_ushort4(float2);
+
+ushort8 __ovld __cnfn as_ushort8(char16);
+ushort8 __ovld __cnfn as_ushort8(uchar16);
+ushort8 __ovld __cnfn as_ushort8(short8);
+ushort8 __ovld __cnfn as_ushort8(ushort8);
+ushort8 __ovld __cnfn as_ushort8(int3);
+ushort8 __ovld __cnfn as_ushort8(int4);
+ushort8 __ovld __cnfn as_ushort8(uint3);
+ushort8 __ovld __cnfn as_ushort8(uint4);
+ushort8 __ovld __cnfn as_ushort8(long2);
+ushort8 __ovld __cnfn as_ushort8(ulong2);
+ushort8 __ovld __cnfn as_ushort8(float3);
+ushort8 __ovld __cnfn as_ushort8(float4);
+
+ushort16 __ovld __cnfn as_ushort16(short16);
+ushort16 __ovld __cnfn as_ushort16(ushort16);
+ushort16 __ovld __cnfn as_ushort16(int8);
+ushort16 __ovld __cnfn as_ushort16(uint8);
+ushort16 __ovld __cnfn as_ushort16(long3);
+ushort16 __ovld __cnfn as_ushort16(long4);
+ushort16 __ovld __cnfn as_ushort16(ulong3);
+ushort16 __ovld __cnfn as_ushort16(ulong4);
+ushort16 __ovld __cnfn as_ushort16(float8);
+
+int __ovld __cnfn as_int(char3);
+int __ovld __cnfn as_int(char4);
+int __ovld __cnfn as_int(uchar3);
+int __ovld __cnfn as_int(uchar4);
+int __ovld __cnfn as_int(short2);
+int __ovld __cnfn as_int(ushort2);
+int __ovld __cnfn as_int(int);
+int __ovld __cnfn as_int(uint);
+int __ovld __cnfn as_int(float);
+
+int2 __ovld __cnfn as_int2(char8);
+int2 __ovld __cnfn as_int2(uchar8);
+int2 __ovld __cnfn as_int2(short3);
+int2 __ovld __cnfn as_int2(short4);
+int2 __ovld __cnfn as_int2(ushort3);
+int2 __ovld __cnfn as_int2(ushort4);
+int2 __ovld __cnfn as_int2(int2);
+int2 __ovld __cnfn as_int2(uint2);
+int2 __ovld __cnfn as_int2(long);
+int2 __ovld __cnfn as_int2(ulong);
+int2 __ovld __cnfn as_int2(float2);
+
+int3 __ovld __cnfn as_int3(char16);
+int3 __ovld __cnfn as_int3(uchar16);
+int3 __ovld __cnfn as_int3(short8);
+int3 __ovld __cnfn as_int3(ushort8);
+int3 __ovld __cnfn as_int3(int3);
+int3 __ovld __cnfn as_int3(int4);
+int3 __ovld __cnfn as_int3(uint3);
+int3 __ovld __cnfn as_int3(uint4);
+int3 __ovld __cnfn as_int3(long2);
+int3 __ovld __cnfn as_int3(ulong2);
+int3 __ovld __cnfn as_int3(float3);
+int3 __ovld __cnfn as_int3(float4);
+
+int4 __ovld __cnfn as_int4(char16);
+int4 __ovld __cnfn as_int4(uchar16);
+int4 __ovld __cnfn as_int4(short8);
+int4 __ovld __cnfn as_int4(ushort8);
+int4 __ovld __cnfn as_int4(int3);
+int4 __ovld __cnfn as_int4(int4);
+int4 __ovld __cnfn as_int4(uint3);
+int4 __ovld __cnfn as_int4(uint4);
+int4 __ovld __cnfn as_int4(long2);
+int4 __ovld __cnfn as_int4(ulong2);
+int4 __ovld __cnfn as_int4(float3);
+int4 __ovld __cnfn as_int4(float4);
+
+int8 __ovld __cnfn as_int8(short16);
+int8 __ovld __cnfn as_int8(ushort16);
+int8 __ovld __cnfn as_int8(int8);
+int8 __ovld __cnfn as_int8(uint8);
+int8 __ovld __cnfn as_int8(long3);
+int8 __ovld __cnfn as_int8(long4);
+int8 __ovld __cnfn as_int8(ulong3);
+int8 __ovld __cnfn as_int8(ulong4);
+int8 __ovld __cnfn as_int8(float8);
+
+int16 __ovld __cnfn as_int16(int16);
+int16 __ovld __cnfn as_int16(uint16);
+int16 __ovld __cnfn as_int16(long8);
+int16 __ovld __cnfn as_int16(ulong8);
+int16 __ovld __cnfn as_int16(float16);
+
+uint __ovld __cnfn as_uint(char3);
+uint __ovld __cnfn as_uint(char4);
+uint __ovld __cnfn as_uint(uchar3);
+uint __ovld __cnfn as_uint(uchar4);
+uint __ovld __cnfn as_uint(short2);
+uint __ovld __cnfn as_uint(ushort2);
+uint __ovld __cnfn as_uint(int);
+uint __ovld __cnfn as_uint(uint);
+uint __ovld __cnfn as_uint(float);
+
+uint2 __ovld __cnfn as_uint2(char8);
+uint2 __ovld __cnfn as_uint2(uchar8);
+uint2 __ovld __cnfn as_uint2(short3);
+uint2 __ovld __cnfn as_uint2(short4);
+uint2 __ovld __cnfn as_uint2(ushort3);
+uint2 __ovld __cnfn as_uint2(ushort4);
+uint2 __ovld __cnfn as_uint2(int2);
+uint2 __ovld __cnfn as_uint2(uint2);
+uint2 __ovld __cnfn as_uint2(long);
+uint2 __ovld __cnfn as_uint2(ulong);
+uint2 __ovld __cnfn as_uint2(float2);
+
+uint3 __ovld __cnfn as_uint3(char16);
+uint3 __ovld __cnfn as_uint3(uchar16);
+uint3 __ovld __cnfn as_uint3(short8);
+uint3 __ovld __cnfn as_uint3(ushort8);
+uint3 __ovld __cnfn as_uint3(int3);
+uint3 __ovld __cnfn as_uint3(int4);
+uint3 __ovld __cnfn as_uint3(uint3);
+uint3 __ovld __cnfn as_uint3(uint4);
+uint3 __ovld __cnfn as_uint3(long2);
+uint3 __ovld __cnfn as_uint3(ulong2);
+uint3 __ovld __cnfn as_uint3(float3);
+uint3 __ovld __cnfn as_uint3(float4);
+
+uint4 __ovld __cnfn as_uint4(char16);
+uint4 __ovld __cnfn as_uint4(uchar16);
+uint4 __ovld __cnfn as_uint4(short8);
+uint4 __ovld __cnfn as_uint4(ushort8);
+uint4 __ovld __cnfn as_uint4(int3);
+uint4 __ovld __cnfn as_uint4(int4);
+uint4 __ovld __cnfn as_uint4(uint3);
+uint4 __ovld __cnfn as_uint4(uint4);
+uint4 __ovld __cnfn as_uint4(long2);
+uint4 __ovld __cnfn as_uint4(ulong2);
+uint4 __ovld __cnfn as_uint4(float3);
+uint4 __ovld __cnfn as_uint4(float4);
+
+uint8 __ovld __cnfn as_uint8(short16);
+uint8 __ovld __cnfn as_uint8(ushort16);
+uint8 __ovld __cnfn as_uint8(int8);
+uint8 __ovld __cnfn as_uint8(uint8);
+uint8 __ovld __cnfn as_uint8(long3);
+uint8 __ovld __cnfn as_uint8(long4);
+uint8 __ovld __cnfn as_uint8(ulong3);
+uint8 __ovld __cnfn as_uint8(ulong4);
+uint8 __ovld __cnfn as_uint8(float8);
+
+uint16 __ovld __cnfn as_uint16(int16);
+uint16 __ovld __cnfn as_uint16(uint16);
+uint16 __ovld __cnfn as_uint16(long8);
+uint16 __ovld __cnfn as_uint16(ulong8);
+uint16 __ovld __cnfn as_uint16(float16);
+
+long __ovld __cnfn as_long(char8);
+long __ovld __cnfn as_long(uchar8);
+long __ovld __cnfn as_long(short3);
+long __ovld __cnfn as_long(short4);
+long __ovld __cnfn as_long(ushort3);
+long __ovld __cnfn as_long(ushort4);
+long __ovld __cnfn as_long(int2);
+long __ovld __cnfn as_long(uint2);
+long __ovld __cnfn as_long(long);
+long __ovld __cnfn as_long(ulong);
+long __ovld __cnfn as_long(float2);
+
+long2 __ovld __cnfn as_long2(char16);
+long2 __ovld __cnfn as_long2(uchar16);
+long2 __ovld __cnfn as_long2(short8);
+long2 __ovld __cnfn as_long2(ushort8);
+long2 __ovld __cnfn as_long2(int3);
+long2 __ovld __cnfn as_long2(int4);
+long2 __ovld __cnfn as_long2(uint3);
+long2 __ovld __cnfn as_long2(uint4);
+long2 __ovld __cnfn as_long2(long2);
+long2 __ovld __cnfn as_long2(ulong2);
+long2 __ovld __cnfn as_long2(float3);
+long2 __ovld __cnfn as_long2(float4);
+
+long3 __ovld __cnfn as_long3(short16);
+long3 __ovld __cnfn as_long3(ushort16);
+long3 __ovld __cnfn as_long3(int8);
+long3 __ovld __cnfn as_long3(uint8);
+long3 __ovld __cnfn as_long3(long3);
+long3 __ovld __cnfn as_long3(long4);
+long3 __ovld __cnfn as_long3(ulong3);
+long3 __ovld __cnfn as_long3(ulong4);
+long3 __ovld __cnfn as_long3(float8);
+
+long4 __ovld __cnfn as_long4(short16);
+long4 __ovld __cnfn as_long4(ushort16);
+long4 __ovld __cnfn as_long4(int8);
+long4 __ovld __cnfn as_long4(uint8);
+long4 __ovld __cnfn as_long4(long3);
+long4 __ovld __cnfn as_long4(long4);
+long4 __ovld __cnfn as_long4(ulong3);
+long4 __ovld __cnfn as_long4(ulong4);
+long4 __ovld __cnfn as_long4(float8);
+
+long8 __ovld __cnfn as_long8(int16);
+long8 __ovld __cnfn as_long8(uint16);
+long8 __ovld __cnfn as_long8(long8);
+long8 __ovld __cnfn as_long8(ulong8);
+long8 __ovld __cnfn as_long8(float16);
+
+long16 __ovld __cnfn as_long16(long16);
+long16 __ovld __cnfn as_long16(ulong16);
+
+ulong __ovld __cnfn as_ulong(char8);
+ulong __ovld __cnfn as_ulong(uchar8);
+ulong __ovld __cnfn as_ulong(short3);
+ulong __ovld __cnfn as_ulong(short4);
+ulong __ovld __cnfn as_ulong(ushort3);
+ulong __ovld __cnfn as_ulong(ushort4);
+ulong __ovld __cnfn as_ulong(int2);
+ulong __ovld __cnfn as_ulong(uint2);
+ulong __ovld __cnfn as_ulong(long);
+ulong __ovld __cnfn as_ulong(ulong);
+ulong __ovld __cnfn as_ulong(float2);
+
+ulong2 __ovld __cnfn as_ulong2(char16);
+ulong2 __ovld __cnfn as_ulong2(uchar16);
+ulong2 __ovld __cnfn as_ulong2(short8);
+ulong2 __ovld __cnfn as_ulong2(ushort8);
+ulong2 __ovld __cnfn as_ulong2(int3);
+ulong2 __ovld __cnfn as_ulong2(int4);
+ulong2 __ovld __cnfn as_ulong2(uint3);
+ulong2 __ovld __cnfn as_ulong2(uint4);
+ulong2 __ovld __cnfn as_ulong2(long2);
+ulong2 __ovld __cnfn as_ulong2(ulong2);
+ulong2 __ovld __cnfn as_ulong2(float3);
+ulong2 __ovld __cnfn as_ulong2(float4);
+
+ulong3 __ovld __cnfn as_ulong3(short16);
+ulong3 __ovld __cnfn as_ulong3(ushort16);
+ulong3 __ovld __cnfn as_ulong3(int8);
+ulong3 __ovld __cnfn as_ulong3(uint8);
+ulong3 __ovld __cnfn as_ulong3(long3);
+ulong3 __ovld __cnfn as_ulong3(long4);
+ulong3 __ovld __cnfn as_ulong3(ulong3);
+ulong3 __ovld __cnfn as_ulong3(ulong4);
+ulong3 __ovld __cnfn as_ulong3(float8);
+
+ulong4 __ovld __cnfn as_ulong4(short16);
+ulong4 __ovld __cnfn as_ulong4(ushort16);
+ulong4 __ovld __cnfn as_ulong4(int8);
+ulong4 __ovld __cnfn as_ulong4(uint8);
+ulong4 __ovld __cnfn as_ulong4(long3);
+ulong4 __ovld __cnfn as_ulong4(long4);
+ulong4 __ovld __cnfn as_ulong4(ulong3);
+ulong4 __ovld __cnfn as_ulong4(ulong4);
+ulong4 __ovld __cnfn as_ulong4(float8);
+
+ulong8 __ovld __cnfn as_ulong8(int16);
+ulong8 __ovld __cnfn as_ulong8(uint16);
+ulong8 __ovld __cnfn as_ulong8(long8);
+ulong8 __ovld __cnfn as_ulong8(ulong8);
+ulong8 __ovld __cnfn as_ulong8(float16);
+
+ulong16 __ovld __cnfn as_ulong16(long16);
+ulong16 __ovld __cnfn as_ulong16(ulong16);
+
+float __ovld __cnfn as_float(char3);
+float __ovld __cnfn as_float(char4);
+float __ovld __cnfn as_float(uchar3);
+float __ovld __cnfn as_float(uchar4);
+float __ovld __cnfn as_float(short2);
+float __ovld __cnfn as_float(ushort2);
+float __ovld __cnfn as_float(int);
+float __ovld __cnfn as_float(uint);
+float __ovld __cnfn as_float(float);
+
+float2 __ovld __cnfn as_float2(char8);
+float2 __ovld __cnfn as_float2(uchar8);
+float2 __ovld __cnfn as_float2(short3);
+float2 __ovld __cnfn as_float2(short4);
+float2 __ovld __cnfn as_float2(ushort3);
+float2 __ovld __cnfn as_float2(ushort4);
+float2 __ovld __cnfn as_float2(int2);
+float2 __ovld __cnfn as_float2(uint2);
+float2 __ovld __cnfn as_float2(long);
+float2 __ovld __cnfn as_float2(ulong);
+float2 __ovld __cnfn as_float2(float2);
+
+float3 __ovld __cnfn as_float3(char16);
+float3 __ovld __cnfn as_float3(uchar16);
+float3 __ovld __cnfn as_float3(short8);
+float3 __ovld __cnfn as_float3(ushort8);
+float3 __ovld __cnfn as_float3(int3);
+float3 __ovld __cnfn as_float3(int4);
+float3 __ovld __cnfn as_float3(uint3);
+float3 __ovld __cnfn as_float3(uint4);
+float3 __ovld __cnfn as_float3(long2);
+float3 __ovld __cnfn as_float3(ulong2);
+float3 __ovld __cnfn as_float3(float3);
+float3 __ovld __cnfn as_float3(float4);
+
+float4 __ovld __cnfn as_float4(char16);
+float4 __ovld __cnfn as_float4(uchar16);
+float4 __ovld __cnfn as_float4(short8);
+float4 __ovld __cnfn as_float4(ushort8);
+float4 __ovld __cnfn as_float4(int3);
+float4 __ovld __cnfn as_float4(int4);
+float4 __ovld __cnfn as_float4(uint3);
+float4 __ovld __cnfn as_float4(uint4);
+float4 __ovld __cnfn as_float4(long2);
+float4 __ovld __cnfn as_float4(ulong2);
+float4 __ovld __cnfn as_float4(float3);
+float4 __ovld __cnfn as_float4(float4);
+
+float8 __ovld __cnfn as_float8(short16);
+float8 __ovld __cnfn as_float8(ushort16);
+float8 __ovld __cnfn as_float8(int8);
+float8 __ovld __cnfn as_float8(uint8);
+float8 __ovld __cnfn as_float8(long3);
+float8 __ovld __cnfn as_float8(long4);
+float8 __ovld __cnfn as_float8(ulong3);
+float8 __ovld __cnfn as_float8(ulong4);
+float8 __ovld __cnfn as_float8(float8);
+
+float16 __ovld __cnfn as_float16(int16);
+float16 __ovld __cnfn as_float16(uint16);
+float16 __ovld __cnfn as_float16(long8);
+float16 __ovld __cnfn as_float16(ulong8);
+float16 __ovld __cnfn as_float16(float16);
+
+#ifdef cl_khr_fp64
+char8 __ovld __cnfn as_char8(double);
+char16 __ovld __cnfn as_char16(double2);
+uchar8 __ovld __cnfn as_uchar8(double);
+uchar16 __ovld __cnfn as_uchar16(double2);
+short3 __ovld __cnfn as_short3(double);
+short4 __ovld __cnfn as_short4(double);
+short8 __ovld __cnfn as_short8(double2);
+short16 __ovld __cnfn as_short16(double3);
+short16 __ovld __cnfn as_short16(double4);
+ushort3 __ovld __cnfn as_ushort3(double);
+ushort4 __ovld __cnfn as_ushort4(double);
+ushort8 __ovld __cnfn as_ushort8(double2);
+ushort16 __ovld __cnfn as_ushort16(double3);
+ushort16 __ovld __cnfn as_ushort16(double4);
+int2 __ovld __cnfn as_int2(double);
+int3 __ovld __cnfn as_int3(double2);
+int4 __ovld __cnfn as_int4(double2);
+int8 __ovld __cnfn as_int8(double3);
+int8 __ovld __cnfn as_int8(double4);
+int16 __ovld __cnfn as_int16(double8);
+uint2 __ovld __cnfn as_uint2(double);
+uint3 __ovld __cnfn as_uint3(double2);
+uint4 __ovld __cnfn as_uint4(double2);
+uint8 __ovld __cnfn as_uint8(double3);
+uint8 __ovld __cnfn as_uint8(double4);
+uint16 __ovld __cnfn as_uint16(double8);
+long __ovld __cnfn as_long(double);
+long2 __ovld __cnfn as_long2(double2);
+long3 __ovld __cnfn as_long3(double3);
+long3 __ovld __cnfn as_long3(double4);
+long4 __ovld __cnfn as_long4(double3);
+long4 __ovld __cnfn as_long4(double4);
+long8 __ovld __cnfn as_long8(double8);
+long16 __ovld __cnfn as_long16(double16);
+ulong __ovld __cnfn as_ulong(double);
+ulong2 __ovld __cnfn as_ulong2(double2);
+ulong3 __ovld __cnfn as_ulong3(double3);
+ulong3 __ovld __cnfn as_ulong3(double4);
+ulong4 __ovld __cnfn as_ulong4(double3);
+ulong4 __ovld __cnfn as_ulong4(double4);
+ulong8 __ovld __cnfn as_ulong8(double8);
+ulong16 __ovld __cnfn as_ulong16(double16);
+float2 __ovld __cnfn as_float2(double);
+float3 __ovld __cnfn as_float3(double2);
+float4 __ovld __cnfn as_float4(double2);
+float8 __ovld __cnfn as_float8(double3);
+float8 __ovld __cnfn as_float8(double4);
+float16 __ovld __cnfn as_float16(double8);
+double __ovld __cnfn as_double(char8);
+double __ovld __cnfn as_double(uchar8);
+double __ovld __cnfn as_double(short3);
+double __ovld __cnfn as_double(short4);
+double __ovld __cnfn as_double(ushort3);
+double __ovld __cnfn as_double(ushort4);
+double __ovld __cnfn as_double(int2);
+double __ovld __cnfn as_double(uint2);
+double __ovld __cnfn as_double(long);
+double __ovld __cnfn as_double(ulong);
+double __ovld __cnfn as_double(float2);
+double __ovld __cnfn as_double(double);
+double2 __ovld __cnfn as_double2(char16);
+double2 __ovld __cnfn as_double2(uchar16);
+double2 __ovld __cnfn as_double2(short8);
+double2 __ovld __cnfn as_double2(ushort8);
+double2 __ovld __cnfn as_double2(int3);
+double2 __ovld __cnfn as_double2(int4);
+double2 __ovld __cnfn as_double2(uint3);
+double2 __ovld __cnfn as_double2(uint4);
+double2 __ovld __cnfn as_double2(long2);
+double2 __ovld __cnfn as_double2(ulong2);
+double2 __ovld __cnfn as_double2(float3);
+double2 __ovld __cnfn as_double2(float4);
+double2 __ovld __cnfn as_double2(double2);
+double3 __ovld __cnfn as_double3(short16);
+double3 __ovld __cnfn as_double3(ushort16);
+double3 __ovld __cnfn as_double3(int8);
+double3 __ovld __cnfn as_double3(uint8);
+double3 __ovld __cnfn as_double3(long3);
+double3 __ovld __cnfn as_double3(long4);
+double3 __ovld __cnfn as_double3(ulong3);
+double3 __ovld __cnfn as_double3(ulong4);
+double3 __ovld __cnfn as_double3(float8);
+double3 __ovld __cnfn as_double3(double3);
+double3 __ovld __cnfn as_double3(double4);
+double4 __ovld __cnfn as_double4(short16);
+double4 __ovld __cnfn as_double4(ushort16);
+double4 __ovld __cnfn as_double4(int8);
+double4 __ovld __cnfn as_double4(uint8);
+double4 __ovld __cnfn as_double4(long3);
+double4 __ovld __cnfn as_double4(long4);
+double4 __ovld __cnfn as_double4(ulong3);
+double4 __ovld __cnfn as_double4(ulong4);
+double4 __ovld __cnfn as_double4(float8);
+double4 __ovld __cnfn as_double4(double3);
+double4 __ovld __cnfn as_double4(double4);
+double8 __ovld __cnfn as_double8(int16);
+double8 __ovld __cnfn as_double8(uint16);
+double8 __ovld __cnfn as_double8(long8);
+double8 __ovld __cnfn as_double8(ulong8);
+double8 __ovld __cnfn as_double8(float16);
+double8 __ovld __cnfn as_double8(double8);
+double16 __ovld __cnfn as_double16(long16);
+double16 __ovld __cnfn as_double16(ulong16);
+double16 __ovld __cnfn as_double16(double16);
+#endif //cl_khr_fp64
+
+#ifdef cl_khr_fp16
+char2 __ovld __cnfn as_char2(half);
+char3 __ovld __cnfn as_char3(half2);
+char4 __ovld __cnfn as_char4(half2);
+char8 __ovld __cnfn as_char8(half3);
+char8 __ovld __cnfn as_char8(half4);
+char16 __ovld __cnfn as_char16(half8);
+uchar2 __ovld __cnfn as_uchar2(half);
+uchar3 __ovld __cnfn as_uchar3(half2);
+uchar4 __ovld __cnfn as_uchar4(half2);
+uchar8 __ovld __cnfn as_uchar8(half3);
+uchar8 __ovld __cnfn as_uchar8(half4);
+uchar16 __ovld __cnfn as_uchar16(half8);
+short __ovld __cnfn as_short(half);
+short2 __ovld __cnfn as_short2(half2);
+short3 __ovld __cnfn as_short3(half3);
+short3 __ovld __cnfn as_short3(half4);
+short4 __ovld __cnfn as_short4(half3);
+short4 __ovld __cnfn as_short4(half4);
+short8 __ovld __cnfn as_short8(half8);
+short16 __ovld __cnfn as_short16(half16);
+ushort __ovld __cnfn as_ushort(half);
+ushort2 __ovld __cnfn as_ushort2(half2);
+ushort3 __ovld __cnfn as_ushort3(half3);
+ushort3 __ovld __cnfn as_ushort3(half4);
+ushort4 __ovld __cnfn as_ushort4(half3);
+ushort4 __ovld __cnfn as_ushort4(half4);
+ushort8 __ovld __cnfn as_ushort8(half8);
+ushort16 __ovld __cnfn as_ushort16(half16);
+int __ovld __cnfn as_int(half2);
+int2 __ovld __cnfn as_int2(half3);
+int2 __ovld __cnfn as_int2(half4);
+int3 __ovld __cnfn as_int3(half8);
+int4 __ovld __cnfn as_int4(half8);
+int8 __ovld __cnfn as_int8(half16);
+uint __ovld __cnfn as_uint(half2);
+uint2 __ovld __cnfn as_uint2(half3);
+uint2 __ovld __cnfn as_uint2(half4);
+uint3 __ovld __cnfn as_uint3(half8);
+uint4 __ovld __cnfn as_uint4(half8);
+uint8 __ovld __cnfn as_uint8(half16);
+long __ovld __cnfn as_long(half3);
+long __ovld __cnfn as_long(half4);
+long2 __ovld __cnfn as_long2(half8);
+long3 __ovld __cnfn as_long3(half16);
+long4 __ovld __cnfn as_long4(half16);
+ulong __ovld __cnfn as_ulong(half3);
+ulong __ovld __cnfn as_ulong(half4);
+ulong2 __ovld __cnfn as_ulong2(half8);
+ulong3 __ovld __cnfn as_ulong3(half16);
+ulong4 __ovld __cnfn as_ulong4(half16);
+half __ovld __cnfn as_half(char2);
+half __ovld __cnfn as_half(uchar2);
+half __ovld __cnfn as_half(short);
+half __ovld __cnfn as_half(ushort);
+half __ovld __cnfn as_half(half);
+half2 __ovld __cnfn as_half2(char3);
+half2 __ovld __cnfn as_half2(char4);
+half2 __ovld __cnfn as_half2(uchar3);
+half2 __ovld __cnfn as_half2(uchar4);
+half2 __ovld __cnfn as_half2(short2);
+half2 __ovld __cnfn as_half2(ushort2);
+half2 __ovld __cnfn as_half2(int);
+half2 __ovld __cnfn as_half2(uint);
+half2 __ovld __cnfn as_half2(half2);
+half2 __ovld __cnfn as_half2(float);
+half3 __ovld __cnfn as_half3(char8);
+half3 __ovld __cnfn as_half3(uchar8);
+half3 __ovld __cnfn as_half3(short3);
+half3 __ovld __cnfn as_half3(short4);
+half3 __ovld __cnfn as_half3(ushort3);
+half3 __ovld __cnfn as_half3(ushort4);
+half3 __ovld __cnfn as_half3(int2);
+half3 __ovld __cnfn as_half3(uint2);
+half3 __ovld __cnfn as_half3(long);
+half3 __ovld __cnfn as_half3(ulong);
+half3 __ovld __cnfn as_half3(half3);
+half3 __ovld __cnfn as_half3(half4);
+half3 __ovld __cnfn as_half3(float2);
+half4 __ovld __cnfn as_half4(char8);
+half4 __ovld __cnfn as_half4(uchar8);
+half4 __ovld __cnfn as_half4(short3);
+half4 __ovld __cnfn as_half4(short4);
+half4 __ovld __cnfn as_half4(ushort3);
+half4 __ovld __cnfn as_half4(ushort4);
+half4 __ovld __cnfn as_half4(int2);
+half4 __ovld __cnfn as_half4(uint2);
+half4 __ovld __cnfn as_half4(long);
+half4 __ovld __cnfn as_half4(ulong);
+half4 __ovld __cnfn as_half4(half3);
+half4 __ovld __cnfn as_half4(half4);
+half4 __ovld __cnfn as_half4(float2);
+half8 __ovld __cnfn as_half8(char16);
+half8 __ovld __cnfn as_half8(uchar16);
+half8 __ovld __cnfn as_half8(short8);
+half8 __ovld __cnfn as_half8(ushort8);
+half8 __ovld __cnfn as_half8(int3);
+half8 __ovld __cnfn as_half8(int4);
+half8 __ovld __cnfn as_half8(uint3);
+half8 __ovld __cnfn as_half8(uint4);
+half8 __ovld __cnfn as_half8(long2);
+half8 __ovld __cnfn as_half8(ulong2);
+half8 __ovld __cnfn as_half8(half8);
+half8 __ovld __cnfn as_half8(float3);
+half8 __ovld __cnfn as_half8(float4);
+half16 __ovld __cnfn as_half16(short16);
+half16 __ovld __cnfn as_half16(ushort16);
+half16 __ovld __cnfn as_half16(int8);
+half16 __ovld __cnfn as_half16(uint8);
+half16 __ovld __cnfn as_half16(long3);
+half16 __ovld __cnfn as_half16(long4);
+half16 __ovld __cnfn as_half16(ulong3);
+half16 __ovld __cnfn as_half16(ulong4);
+half16 __ovld __cnfn as_half16(half16);
+half16 __ovld __cnfn as_half16(float8);
+float __ovld __cnfn as_float(half2);
+float2 __ovld __cnfn as_float2(half3);
+float2 __ovld __cnfn as_float2(half4);
+float3 __ovld __cnfn as_float3(half8);
+float4 __ovld __cnfn as_float4(half8);
+float8 __ovld __cnfn as_float8(half16);
+
+#ifdef cl_khr_fp64
+half3 __ovld __cnfn as_half3(double);
+half4 __ovld __cnfn as_half4(double);
+half8 __ovld __cnfn as_half8(double2);
+half16 __ovld __cnfn as_half16(double3);
+half16 __ovld __cnfn as_half16(double4);
+double __ovld __cnfn as_double(half3);
+double __ovld __cnfn as_double(half4);
+double2 __ovld __cnfn as_double2(half8);
+double3 __ovld __cnfn as_double3(half16);
+double4 __ovld __cnfn as_double4(half16);
+#endif //cl_khr_fp64
+#endif //cl_khr_fp16
+
+// OpenCL v1.1 s6.9, v1.2/2.0 s6.10 - Function qualifiers
+
+#define __kernel_exec(X, typen) __kernel \
+	__attribute__((work_group_size_hint(X, 1, 1))) \
+	__attribute__((vec_type_hint(typen)))
+
+#define kernel_exec(X, typen) __kernel \
+	__attribute__((work_group_size_hint(X, 1, 1))) \
+	__attribute__((vec_type_hint(typen)))
+
+// OpenCL v1.1 s6.11.1, v1.2 s6.12.1, v2.0 s6.13.1 - Work-item Functions
+
+/**
+ * Returns the number of dimensions in use. This is the
+ * value given to the work_dim argument specified in
+ * clEnqueueNDRangeKernel.
+ * For clEnqueueTask, this returns 1.
+ */
+uint __ovld __cnfn get_work_dim(void);
+
+/**
+ * Returns the number of global work-items specified for
+ * dimension identified by dimindx. This value is given by
+ * the global_work_size argument to
+ * clEnqueueNDRangeKernel. Valid values of dimindx
+ * are 0 to get_work_dim() - 1. For other values of
+ * dimindx, get_global_size() returns 1.
+ * For clEnqueueTask, this always returns 1.
+ */
+size_t __ovld __cnfn get_global_size(uint dimindx);
+
+/**
+ * Returns the unique global work-item ID value for
+ * dimension identified by dimindx. The global work-item
+ * ID specifies the work-item ID based on the number of
+ * global work-items specified to execute the kernel. Valid
+ * values of dimindx are 0 to get_work_dim() - 1. For
+ * other values of dimindx, get_global_id() returns 0.
+ * For clEnqueueTask, this returns 0.
+ */
+size_t __ovld __cnfn get_global_id(uint dimindx);
+
+/**
+ * Returns the number of local work-items specified in
+ * dimension identified by dimindx. This value is given by
+ * the local_work_size argument to
+ * clEnqueueNDRangeKernel if local_work_size is not
+ * NULL; otherwise the OpenCL implementation chooses
+ * an appropriate local_work_size value which is returned
+ * by this function. Valid values of dimindx are 0 to
+ * get_work_dim() - 1. For other values of dimindx,
+ * get_local_size() returns 1.
+ * For clEnqueueTask, this always returns 1.
+ */
+size_t __ovld __cnfn get_local_size(uint dimindx);
+
+/**
+ * Returns the unique local work-item ID i.e. a work-item
+ * within a specific work-group for dimension identified by
+ * dimindx. Valid values of dimindx are 0 to
+ * get_work_dim() - 1. For other values of dimindx,
+ * get_local_id() returns 0.
+ * For clEnqueueTask, this returns 0.
+ */
+size_t __ovld __cnfn get_local_id(uint dimindx);
+
+/**
+ * Returns the number of work-groups that will execute a
+ * kernel for dimension identified by dimindx.
+ * Valid values of dimindx are 0 to get_work_dim() - 1.
+ * For other values of dimindx, get_num_groups () returns
+ * 1.
+ * For clEnqueueTask, this always returns 1.
+ */
+size_t __ovld __cnfn get_num_groups(uint dimindx);
+
+/**
+ * get_group_id returns the work-group ID which is a
+ * number from 0 .. get_num_groups(dimindx) - 1.
+ * Valid values of dimindx are 0 to get_work_dim() - 1.
+ * For other values, get_group_id() returns 0.
+ * For clEnqueueTask, this returns 0.
+ */
+size_t __ovld __cnfn get_group_id(uint dimindx);
+
+/**
+ * get_global_offset returns the offset values specified in
+ * global_work_offset argument to
+ * clEnqueueNDRangeKernel.
+ * Valid values of dimindx are 0 to get_work_dim() - 1.
+ * For other values, get_global_offset() returns 0.
+ * For clEnqueueTask, this returns 0.
+ */
+size_t __ovld __cnfn get_global_offset(uint dimindx);
+
+#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0
+size_t __ovld get_enqueued_local_size(uint dimindx);
+size_t __ovld get_global_linear_id(void);
+size_t __ovld get_local_linear_id(void);
+#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0
+
+// OpenCL v1.1 s6.11.2, v1.2 s6.12.2, v2.0 s6.13.2 - Math functions
+
+/**
+ * Arc cosine function.
+ */
+float __ovld __cnfn acos(float);
+float2 __ovld __cnfn acos(float2);
+float3 __ovld __cnfn acos(float3);
+float4 __ovld __cnfn acos(float4);
+float8 __ovld __cnfn acos(float8);
+float16 __ovld __cnfn acos(float16);
+#ifdef cl_khr_fp64
+double __ovld __cnfn acos(double);
+double2 __ovld __cnfn acos(double2);
+double3 __ovld __cnfn acos(double3);
+double4 __ovld __cnfn acos(double4);
+double8 __ovld __cnfn acos(double8);
+double16 __ovld __cnfn acos(double16);
+#endif //cl_khr_fp64
+#ifdef cl_khr_fp16
+half __ovld __cnfn acos(half);
+half2 __ovld __cnfn acos(half2);
+half3 __ovld __cnfn acos(half3);
+half4 __ovld __cnfn acos(half4);
+half8 __ovld __cnfn acos(half8);
+half16 __ovld __cnfn acos(half16);
+#endif //cl_khr_fp16
+
+/**
+ * Inverse hyperbolic cosine.
+ */
+float __ovld __cnfn acosh(float);
+float2 __ovld __cnfn acosh(float2);
+float3 __ovld __cnfn acosh(float3);
+float4 __ovld __cnfn acosh(float4);
+float8 __ovld __cnfn acosh(float8);
+float16 __ovld __cnfn acosh(float16);
+#ifdef cl_khr_fp64
+double __ovld __cnfn acosh(double);
+double2 __ovld __cnfn acosh(double2);
+double3 __ovld __cnfn acosh(double3);
+double4 __ovld __cnfn acosh(double4);
+double8 __ovld __cnfn acosh(double8);
+double16 __ovld __cnfn acosh(double16);
+#endif //cl_khr_fp64
+#ifdef cl_khr_fp16
+half __ovld __cnfn acosh(half);
+half2 __ovld __cnfn acosh(half2);
+half3 __ovld __cnfn acosh(half3);
+half4 __ovld __cnfn acosh(half4);
+half8 __ovld __cnfn acosh(half8);
+half16 __ovld __cnfn acosh(half16);
+#endif //cl_khr_fp16
+
+/**
+ * Compute acos (x) / PI.
+ */
+float __ovld __cnfn acospi(float x);
+float2 __ovld __cnfn acospi(float2 x);
+float3 __ovld __cnfn acospi(float3 x);
+float4 __ovld __cnfn acospi(float4 x);
+float8 __ovld __cnfn acospi(float8 x);
+float16 __ovld __cnfn acospi(float16 x);
+#ifdef cl_khr_fp64
+double __ovld __cnfn acospi(double x);
+double2 __ovld __cnfn acospi(double2 x);
+double3 __ovld __cnfn acospi(double3 x);
+double4 __ovld __cnfn acospi(double4 x);
+double8 __ovld __cnfn acospi(double8 x);
+double16 __ovld __cnfn acospi(double16 x);
+#endif //cl_khr_fp64
+#ifdef cl_khr_fp16
+half __ovld __cnfn acospi(half x);
+half2 __ovld __cnfn acospi(half2 x);
+half3 __ovld __cnfn acospi(half3 x);
+half4 __ovld __cnfn acospi(half4 x);
+half8 __ovld __cnfn acospi(half8 x);
+half16 __ovld __cnfn acospi(half16 x);
+#endif //cl_khr_fp16
+
+/**
+ * Arc sine function.
+ */
+float __ovld __cnfn asin(float);
+float2 __ovld __cnfn asin(float2);
+float3 __ovld __cnfn asin(float3);
+float4 __ovld __cnfn asin(float4);
+float8 __ovld __cnfn asin(float8);
+float16 __ovld __cnfn asin(float16);
+#ifdef cl_khr_fp64
+double __ovld __cnfn asin(double);
+double2 __ovld __cnfn asin(double2);
+double3 __ovld __cnfn asin(double3);
+double4 __ovld __cnfn asin(double4);
+double8 __ovld __cnfn asin(double8);
+double16 __ovld __cnfn asin(double16);
+#endif //cl_khr_fp64
+#ifdef cl_khr_fp16
+half __ovld __cnfn asin(half);
+half2 __ovld __cnfn asin(half2);
+half3 __ovld __cnfn asin(half3);
+half4 __ovld __cnfn asin(half4);
+half8 __ovld __cnfn asin(half8);
+half16 __ovld __cnfn asin(half16);
+#endif //cl_khr_fp16
+
+/**
+ * Inverse hyperbolic sine.
+ */
+float __ovld __cnfn asinh(float);
+float2 __ovld __cnfn asinh(float2);
+float3 __ovld __cnfn asinh(float3);
+float4 __ovld __cnfn asinh(float4);
+float8 __ovld __cnfn asinh(float8);
+float16 __ovld __cnfn asinh(float16);
+#ifdef cl_khr_fp64
+double __ovld __cnfn asinh(double);
+double2 __ovld __cnfn asinh(double2);
+double3 __ovld __cnfn asinh(double3);
+double4 __ovld __cnfn asinh(double4);
+double8 __ovld __cnfn asinh(double8);
+double16 __ovld __cnfn asinh(double16);
+#endif //cl_khr_fp64
+#ifdef cl_khr_fp16
+half __ovld __cnfn asinh(half);
+half2 __ovld __cnfn asinh(half2);
+half3 __ovld __cnfn asinh(half3);
+half4 __ovld __cnfn asinh(half4);
+half8 __ovld __cnfn asinh(half8);
+half16 __ovld __cnfn asinh(half16);
+#endif //cl_khr_fp16
+
+/**
+ * Compute asin (x) / PI.
+ */
+float __ovld __cnfn asinpi(float x);
+float2 __ovld __cnfn asinpi(float2 x);
+float3 __ovld __cnfn asinpi(float3 x);
+float4 __ovld __cnfn asinpi(float4 x);
+float8 __ovld __cnfn asinpi(float8 x);
+float16 __ovld __cnfn asinpi(float16 x);
+#ifdef cl_khr_fp64
+double __ovld __cnfn asinpi(double x);
+double2 __ovld __cnfn asinpi(double2 x);
+double3 __ovld __cnfn asinpi(double3 x);
+double4 __ovld __cnfn asinpi(double4 x);
+double8 __ovld __cnfn asinpi(double8 x);
+double16 __ovld __cnfn asinpi(double16 x);
+#endif //cl_khr_fp64
+#ifdef cl_khr_fp16
+half __ovld __cnfn asinpi(half x);
+half2 __ovld __cnfn asinpi(half2 x);
+half3 __ovld __cnfn asinpi(half3 x);
+half4 __ovld __cnfn asinpi(half4 x);
+half8 __ovld __cnfn asinpi(half8 x);
+half16 __ovld __cnfn asinpi(half16 x);
+#endif //cl_khr_fp16
+
+/**
+ * Arc tangent function.
+ */
+float __ovld __cnfn atan(float y_over_x);
+float2 __ovld __cnfn atan(float2 y_over_x);
+float3 __ovld __cnfn atan(float3 y_over_x);
+float4 __ovld __cnfn atan(float4 y_over_x);
+float8 __ovld __cnfn atan(float8 y_over_x);
+float16 __ovld __cnfn atan(float16 y_over_x);
+#ifdef cl_khr_fp64
+double __ovld __cnfn atan(double y_over_x);
+double2 __ovld __cnfn atan(double2 y_over_x);
+double3 __ovld __cnfn atan(double3 y_over_x);
+double4 __ovld __cnfn atan(double4 y_over_x);
+double8 __ovld __cnfn atan(double8 y_over_x);
+double16 __ovld __cnfn atan(double16 y_over_x);
+#endif //cl_khr_fp64
+#ifdef cl_khr_fp16
+half __ovld __cnfn atan(half y_over_x);
+half2 __ovld __cnfn atan(half2 y_over_x);
+half3 __ovld __cnfn atan(half3 y_over_x);
+half4 __ovld __cnfn atan(half4 y_over_x);
+half8 __ovld __cnfn atan(half8 y_over_x);
+half16 __ovld __cnfn atan(half16 y_over_x);
+#endif //cl_khr_fp16
+
+/**
+ * Arc tangent of y / x.
+ */
+float __ovld __cnfn atan2(float y, float x);
+float2 __ovld __cnfn atan2(float2 y, float2 x);
+float3 __ovld __cnfn atan2(float3 y, float3 x);
+float4 __ovld __cnfn atan2(float4 y, float4 x);
+float8 __ovld __cnfn atan2(float8 y, float8 x);
+float16 __ovld __cnfn atan2(float16 y, float16 x);
+#ifdef cl_khr_fp64
+double __ovld __cnfn atan2(double y, double x);
+double2 __ovld __cnfn atan2(double2 y, double2 x);
+double3 __ovld __cnfn atan2(double3 y, double3 x);
+double4 __ovld __cnfn atan2(double4 y, double4 x);
+double8 __ovld __cnfn atan2(double8 y, double8 x);
+double16 __ovld __cnfn atan2(double16 y, double16 x);
+#endif //cl_khr_fp64
+#ifdef cl_khr_fp16
+half __ovld __cnfn atan2(half y, half x);
+half2 __ovld __cnfn atan2(half2 y, half2 x);
+half3 __ovld __cnfn atan2(half3 y, half3 x);
+half4 __ovld __cnfn atan2(half4 y, half4 x);
+half8 __ovld __cnfn atan2(half8 y, half8 x);
+half16 __ovld __cnfn atan2(half16 y, half16 x);
+#endif //cl_khr_fp16
+
+/**
+ * Hyperbolic arc tangent.
+ */
+float __ovld __cnfn atanh(float);
+float2 __ovld __cnfn atanh(float2);
+float3 __ovld __cnfn atanh(float3);
+float4 __ovld __cnfn atanh(float4);
+float8 __ovld __cnfn atanh(float8);
+float16 __ovld __cnfn atanh(float16);
+#ifdef cl_khr_fp64
+double __ovld __cnfn atanh(double);
+double2 __ovld __cnfn atanh(double2);
+double3 __ovld __cnfn atanh(double3);
+double4 __ovld __cnfn atanh(double4);
+double8 __ovld __cnfn atanh(double8);
+double16 __ovld __cnfn atanh(double16);
+#endif //cl_khr_fp64
+#ifdef cl_khr_fp16
+half __ovld __cnfn atanh(half);
+half2 __ovld __cnfn atanh(half2);
+half3 __ovld __cnfn atanh(half3);
+half4 __ovld __cnfn atanh(half4);
+half8 __ovld __cnfn atanh(half8);
+half16 __ovld __cnfn atanh(half16);
+#endif //cl_khr_fp16
+
+/**
+ * Compute atan (x) / PI.
+ */
+float __ovld __cnfn atanpi(float x);
+float2 __ovld __cnfn atanpi(float2 x);
+float3 __ovld __cnfn atanpi(float3 x);
+float4 __ovld __cnfn atanpi(float4 x);
+float8 __ovld __cnfn atanpi(float8 x);
+float16 __ovld __cnfn atanpi(float16 x);
+#ifdef cl_khr_fp64
+double __ovld __cnfn atanpi(double x);
+double2 __ovld __cnfn atanpi(double2 x);
+double3 __ovld __cnfn atanpi(double3 x);
+double4 __ovld __cnfn atanpi(double4 x);
+double8 __ovld __cnfn atanpi(double8 x);
+double16 __ovld __cnfn atanpi(double16 x);
+#endif //cl_khr_fp64
+#ifdef cl_khr_fp16
+half __ovld __cnfn atanpi(half x);
+half2 __ovld __cnfn atanpi(half2 x);
+half3 __ovld __cnfn atanpi(half3 x);
+half4 __ovld __cnfn atanpi(half4 x);
+half8 __ovld __cnfn atanpi(half8 x);
+half16 __ovld __cnfn atanpi(half16 x);
+#endif //cl_khr_fp16
+
+/**
+ * Compute atan2 (y, x) / PI.
+ */
+float __ovld __cnfn atan2pi(float y, float x);
+float2 __ovld __cnfn atan2pi(float2 y, float2 x);
+float3 __ovld __cnfn atan2pi(float3 y, float3 x);
+float4 __ovld __cnfn atan2pi(float4 y, float4 x);
+float8 __ovld __cnfn atan2pi(float8 y, float8 x);
+float16 __ovld __cnfn atan2pi(float16 y, float16 x);
+#ifdef cl_khr_fp64
+double __ovld __cnfn atan2pi(double y, double x);
+double2 __ovld __cnfn atan2pi(double2 y, double2 x);
+double3 __ovld __cnfn atan2pi(double3 y, double3 x);
+double4 __ovld __cnfn atan2pi(double4 y, double4 x);
+double8 __ovld __cnfn atan2pi(double8 y, double8 x);
+double16 __ovld __cnfn atan2pi(double16 y, double16 x);
+#endif //cl_khr_fp64
+#ifdef cl_khr_fp16
+half __ovld __cnfn atan2pi(half y, half x);
+half2 __ovld __cnfn atan2pi(half2 y, half2 x);
+half3 __ovld __cnfn atan2pi(half3 y, half3 x);
+half4 __ovld __cnfn atan2pi(half4 y, half4 x);
+half8 __ovld __cnfn atan2pi(half8 y, half8 x);
+half16 __ovld __cnfn atan2pi(half16 y, half16 x);
+#endif //cl_khr_fp16
+
+/**
+ * Compute cube-root.
+ */
+float __ovld __cnfn cbrt(float);
+float2 __ovld __cnfn cbrt(float2);
+float3 __ovld __cnfn cbrt(float3);
+float4 __ovld __cnfn cbrt(float4);
+float8 __ovld __cnfn cbrt(float8);
+float16 __ovld __cnfn cbrt(float16);
+#ifdef cl_khr_fp64
+double __ovld __cnfn cbrt(double);
+double2 __ovld __cnfn cbrt(double2);
+double3 __ovld __cnfn cbrt(double3);
+double4 __ovld __cnfn cbrt(double4);
+double8 __ovld __cnfn cbrt(double8);
+double16 __ovld __cnfn cbrt(double16);
+#endif //cl_khr_fp64
+#ifdef cl_khr_fp16
+half __ovld __cnfn cbrt(half);
+half2 __ovld __cnfn cbrt(half2);
+half3 __ovld __cnfn cbrt(half3);
+half4 __ovld __cnfn cbrt(half4);
+half8 __ovld __cnfn cbrt(half8);
+half16 __ovld __cnfn cbrt(half16);
+#endif //cl_khr_fp16
+
+/**
+ * Round to integral value using the round to positive
+ * infinity rounding mode.
+ */
+float __ovld __cnfn ceil(float);
+float2 __ovld __cnfn ceil(float2);
+float3 __ovld __cnfn ceil(float3);
+float4 __ovld __cnfn ceil(float4);
+float8 __ovld __cnfn ceil(float8);
+float16 __ovld __cnfn ceil(float16);
+#ifdef cl_khr_fp64
+double __ovld __cnfn ceil(double);
+double2 __ovld __cnfn ceil(double2);
+double3 __ovld __cnfn ceil(double3);
+double4 __ovld __cnfn ceil(double4);
+double8 __ovld __cnfn ceil(double8);
+double16 __ovld __cnfn ceil(double16);
+#endif //cl_khr_fp64
+#ifdef cl_khr_fp16
+half __ovld __cnfn ceil(half);
+half2 __ovld __cnfn ceil(half2);
+half3 __ovld __cnfn ceil(half3);
+half4 __ovld __cnfn ceil(half4);
+half8 __ovld __cnfn ceil(half8);
+half16 __ovld __cnfn ceil(half16);
+#endif //cl_khr_fp16
+
+/**
+ * Returns x with its sign changed to match the sign of y.
+ */
+float __ovld __cnfn copysign(float x, float y);
+float2 __ovld __cnfn copysign(float2 x, float2 y);
+float3 __ovld __cnfn copysign(float3 x, float3 y);
+float4 __ovld __cnfn copysign(float4 x, float4 y);
+float8 __ovld __cnfn copysign(float8 x, float8 y);
+float16 __ovld __cnfn copysign(float16 x, float16 y);
+#ifdef cl_khr_fp64
+double __ovld __cnfn copysign(double x, double y);
+double2 __ovld __cnfn copysign(double2 x, double2 y);
+double3 __ovld __cnfn copysign(double3 x, double3 y);
+double4 __ovld __cnfn copysign(double4 x, double4 y);
+double8 __ovld __cnfn copysign(double8 x, double8 y);
+double16 __ovld __cnfn copysign(double16 x, double16 y);
+#endif //cl_khr_fp64
+#ifdef cl_khr_fp16
+half __ovld __cnfn copysign(half x, half y);
+half2 __ovld __cnfn copysign(half2 x, half2 y);
+half3 __ovld __cnfn copysign(half3 x, half3 y);
+half4 __ovld __cnfn copysign(half4 x, half4 y);
+half8 __ovld __cnfn copysign(half8 x, half8 y);
+half16 __ovld __cnfn copysign(half16 x, half16 y);
+#endif //cl_khr_fp16
+
+/**
+ * Compute cosine.
+ */
+float __ovld __cnfn cos(float);
+float2 __ovld __cnfn cos(float2);
+float3 __ovld __cnfn cos(float3);
+float4 __ovld __cnfn cos(float4);
+float8 __ovld __cnfn cos(float8);
+float16 __ovld __cnfn cos(float16);
+#ifdef cl_khr_fp64
+double __ovld __cnfn cos(double);
+double2 __ovld __cnfn cos(double2);
+double3 __ovld __cnfn cos(double3);
+double4 __ovld __cnfn cos(double4);
+double8 __ovld __cnfn cos(double8);
+double16 __ovld __cnfn cos(double16);
+#endif //cl_khr_fp64
+#ifdef cl_khr_fp16
+half __ovld __cnfn cos(half);
+half2 __ovld __cnfn cos(half2);
+half3 __ovld __cnfn cos(half3);
+half4 __ovld __cnfn cos(half4);
+half8 __ovld __cnfn cos(half8);
+half16 __ovld __cnfn cos(half16);
+#endif //cl_khr_fp16
+
+/**
+ * Compute hyperbolic cosine.
+ */
+float __ovld __cnfn cosh(float);
+float2 __ovld __cnfn cosh(float2);
+float3 __ovld __cnfn cosh(float3);
+float4 __ovld __cnfn cosh(float4);
+float8 __ovld __cnfn cosh(float8);
+float16 __ovld __cnfn cosh(float16);
+#ifdef cl_khr_fp64
+double __ovld __cnfn cosh(double);
+double2 __ovld __cnfn cosh(double2);
+double3 __ovld __cnfn cosh(double3);
+double4 __ovld __cnfn cosh(double4);
+double8 __ovld __cnfn cosh(double8);
+double16 __ovld __cnfn cosh(double16);
+#endif //cl_khr_fp64
+#ifdef cl_khr_fp16
+half __ovld __cnfn cosh(half);
+half2 __ovld __cnfn cosh(half2);
+half3 __ovld __cnfn cosh(half3);
+half4 __ovld __cnfn cosh(half4);
+half8 __ovld __cnfn cosh(half8);
+half16 __ovld __cnfn cosh(half16);
+#endif //cl_khr_fp16
+
+/**
+ * Compute cos (PI * x).
+ */
+float __ovld __cnfn cospi(float x);
+float2 __ovld __cnfn cospi(float2 x);
+float3 __ovld __cnfn cospi(float3 x);
+float4 __ovld __cnfn cospi(float4 x);
+float8 __ovld __cnfn cospi(float8 x);
+float16 __ovld __cnfn cospi(float16 x);
+#ifdef cl_khr_fp64
+double __ovld __cnfn cospi(double x);
+double2 __ovld __cnfn cospi(double2 x);
+double3 __ovld __cnfn cospi(double3 x);
+double4 __ovld __cnfn cospi(double4 x);
+double8 __ovld __cnfn cospi(double8 x);
+double16 __ovld __cnfn cospi(double16 x);
+#endif //cl_khr_fp64
+#ifdef cl_khr_fp16
+half __ovld __cnfn cospi(half x);
+half2 __ovld __cnfn cospi(half2 x);
+half3 __ovld __cnfn cospi(half3 x);
+half4 __ovld __cnfn cospi(half4 x);
+half8 __ovld __cnfn cospi(half8 x);
+half16 __ovld __cnfn cospi(half16 x);
+#endif //cl_khr_fp16
+
+/**
+ * Complementary error function.
+ */
+float __ovld __cnfn erfc(float);
+float2 __ovld __cnfn erfc(float2);
+float3 __ovld __cnfn erfc(float3);
+float4 __ovld __cnfn erfc(float4);
+float8 __ovld __cnfn erfc(float8);
+float16 __ovld __cnfn erfc(float16);
+#ifdef cl_khr_fp64
+double __ovld __cnfn erfc(double);
+double2 __ovld __cnfn erfc(double2);
+double3 __ovld __cnfn erfc(double3);
+double4 __ovld __cnfn erfc(double4);
+double8 __ovld __cnfn erfc(double8);
+double16 __ovld __cnfn erfc(double16);
+#endif //cl_khr_fp64
+#ifdef cl_khr_fp16
+half __ovld __cnfn erfc(half);
+half2 __ovld __cnfn erfc(half2);
+half3 __ovld __cnfn erfc(half3);
+half4 __ovld __cnfn erfc(half4);
+half8 __ovld __cnfn erfc(half8);
+half16 __ovld __cnfn erfc(half16);
+#endif //cl_khr_fp16
+
+/**
+ * Error function encountered in integrating the
+ * normal distribution.
+ */
+float __ovld __cnfn erf(float);
+float2 __ovld __cnfn erf(float2);
+float3 __ovld __cnfn erf(float3);
+float4 __ovld __cnfn erf(float4);
+float8 __ovld __cnfn erf(float8);
+float16 __ovld __cnfn erf(float16);
+#ifdef cl_khr_fp64
+double __ovld __cnfn erf(double);
+double2 __ovld __cnfn erf(double2);
+double3 __ovld __cnfn erf(double3);
+double4 __ovld __cnfn erf(double4);
+double8 __ovld __cnfn erf(double8);
+double16 __ovld __cnfn erf(double16);
+#endif //cl_khr_fp64
+#ifdef cl_khr_fp16
+half __ovld __cnfn erf(half);
+half2 __ovld __cnfn erf(half2);
+half3 __ovld __cnfn erf(half3);
+half4 __ovld __cnfn erf(half4);
+half8 __ovld __cnfn erf(half8);
+half16 __ovld __cnfn erf(half16);
+#endif //cl_khr_fp16
+
+/**
+ * Compute the base e exponential function of x.
+ */
+float __ovld __cnfn exp(float x);
+float2 __ovld __cnfn exp(float2 x);
+float3 __ovld __cnfn exp(float3 x);
+float4 __ovld __cnfn exp(float4 x);
+float8 __ovld __cnfn exp(float8 x);
+float16 __ovld __cnfn exp(float16 x);
+#ifdef cl_khr_fp64
+double __ovld __cnfn exp(double x);
+double2 __ovld __cnfn exp(double2 x);
+double3 __ovld __cnfn exp(double3 x);
+double4 __ovld __cnfn exp(double4 x);
+double8 __ovld __cnfn exp(double8 x);
+double16 __ovld __cnfn exp(double16 x);
+#endif //cl_khr_fp64
+#ifdef cl_khr_fp16
+half __ovld __cnfn exp(half x);
+half2 __ovld __cnfn exp(half2 x);
+half3 __ovld __cnfn exp(half3 x);
+half4 __ovld __cnfn exp(half4 x);
+half8 __ovld __cnfn exp(half8 x);
+half16 __ovld __cnfn exp(half16 x);
+#endif //cl_khr_fp16
+
+/**
+ * Exponential base 2 function.
+ */
+float __ovld __cnfn exp2(float);
+float2 __ovld __cnfn exp2(float2);
+float3 __ovld __cnfn exp2(float3);
+float4 __ovld __cnfn exp2(float4);
+float8 __ovld __cnfn exp2(float8);
+float16 __ovld __cnfn exp2(float16);
+#ifdef cl_khr_fp64
+double __ovld __cnfn exp2(double);
+double2 __ovld __cnfn exp2(double2);
+double3 __ovld __cnfn exp2(double3);
+double4 __ovld __cnfn exp2(double4);
+double8 __ovld __cnfn exp2(double8);
+double16 __ovld __cnfn exp2(double16);
+#endif //cl_khr_fp64
+#ifdef cl_khr_fp16
+half __ovld __cnfn exp2(half);
+half2 __ovld __cnfn exp2(half2);
+half3 __ovld __cnfn exp2(half3);
+half4 __ovld __cnfn exp2(half4);
+half8 __ovld __cnfn exp2(half8);
+half16 __ovld __cnfn exp2(half16);
+#endif //cl_khr_fp16
+
+/**
+ * Exponential base 10 function.
+ */
+float __ovld __cnfn exp10(float);
+float2 __ovld __cnfn exp10(float2);
+float3 __ovld __cnfn exp10(float3);
+float4 __ovld __cnfn exp10(float4);
+float8 __ovld __cnfn exp10(float8);
+float16 __ovld __cnfn exp10(float16);
+#ifdef cl_khr_fp64
+double __ovld __cnfn exp10(double);
+double2 __ovld __cnfn exp10(double2);
+double3 __ovld __cnfn exp10(double3);
+double4 __ovld __cnfn exp10(double4);
+double8 __ovld __cnfn exp10(double8);
+double16 __ovld __cnfn exp10(double16);
+#endif //cl_khr_fp64
+#ifdef cl_khr_fp16
+half __ovld __cnfn exp10(half);
+half2 __ovld __cnfn exp10(half2);
+half3 __ovld __cnfn exp10(half3);
+half4 __ovld __cnfn exp10(half4);
+half8 __ovld __cnfn exp10(half8);
+half16 __ovld __cnfn exp10(half16);
+#endif //cl_khr_fp16
+
+/**
+ * Compute e^x- 1.0.
+ */
+float __ovld __cnfn expm1(float x);
+float2 __ovld __cnfn expm1(float2 x);
+float3 __ovld __cnfn expm1(float3 x);
+float4 __ovld __cnfn expm1(float4 x);
+float8 __ovld __cnfn expm1(float8 x);
+float16 __ovld __cnfn expm1(float16 x);
+#ifdef cl_khr_fp64
+double __ovld __cnfn expm1(double x);
+double2 __ovld __cnfn expm1(double2 x);
+double3 __ovld __cnfn expm1(double3 x);
+double4 __ovld __cnfn expm1(double4 x);
+double8 __ovld __cnfn expm1(double8 x);
+double16 __ovld __cnfn expm1(double16 x);
+#endif //cl_khr_fp64
+#ifdef cl_khr_fp16
+half __ovld __cnfn expm1(half x);
+half2 __ovld __cnfn expm1(half2 x);
+half3 __ovld __cnfn expm1(half3 x);
+half4 __ovld __cnfn expm1(half4 x);
+half8 __ovld __cnfn expm1(half8 x);
+half16 __ovld __cnfn expm1(half16 x);
+#endif //cl_khr_fp16
+
+/**
+ * Compute absolute value of a floating-point number.
+ */
+float __ovld __cnfn fabs(float);
+float2 __ovld __cnfn fabs(float2);
+float3 __ovld __cnfn fabs(float3);
+float4 __ovld __cnfn fabs(float4);
+float8 __ovld __cnfn fabs(float8);
+float16 __ovld __cnfn fabs(float16);
+#ifdef cl_khr_fp64
+double __ovld __cnfn fabs(double);
+double2 __ovld __cnfn fabs(double2);
+double3 __ovld __cnfn fabs(double3);
+double4 __ovld __cnfn fabs(double4);
+double8 __ovld __cnfn fabs(double8);
+double16 __ovld __cnfn fabs(double16);
+#endif //cl_khr_fp64
+#ifdef cl_khr_fp16
+half __ovld __cnfn fabs(half);
+half2 __ovld __cnfn fabs(half2);
+half3 __ovld __cnfn fabs(half3);
+half4 __ovld __cnfn fabs(half4);
+half8 __ovld __cnfn fabs(half8);
+half16 __ovld __cnfn fabs(half16);
+#endif //cl_khr_fp16
+
+/**
+ * x - y if x > y, +0 if x is less than or equal to y.
+ */
+float __ovld __cnfn fdim(float x, float y);
+float2 __ovld __cnfn fdim(float2 x, float2 y);
+float3 __ovld __cnfn fdim(float3 x, float3 y);
+float4 __ovld __cnfn fdim(float4 x, float4 y);
+float8 __ovld __cnfn fdim(float8 x, float8 y);
+float16 __ovld __cnfn fdim(float16 x, float16 y);
+#ifdef cl_khr_fp64
+double __ovld __cnfn fdim(double x, double y);
+double2 __ovld __cnfn fdim(double2 x, double2 y);
+double3 __ovld __cnfn fdim(double3 x, double3 y);
+double4 __ovld __cnfn fdim(double4 x, double4 y);
+double8 __ovld __cnfn fdim(double8 x, double8 y);
+double16 __ovld __cnfn fdim(double16 x, double16 y);
+#endif //cl_khr_fp64
+#ifdef cl_khr_fp16
+half __ovld __cnfn fdim(half x, half y);
+half2 __ovld __cnfn fdim(half2 x, half2 y);
+half3 __ovld __cnfn fdim(half3 x, half3 y);
+half4 __ovld __cnfn fdim(half4 x, half4 y);
+half8 __ovld __cnfn fdim(half8 x, half8 y);
+half16 __ovld __cnfn fdim(half16 x, half16 y);
+#endif //cl_khr_fp16
+
+/**
+ * Round to integral value using the round to -ve
+ * infinity rounding mode.
+ */
+float __ovld __cnfn floor(float);
+float2 __ovld __cnfn floor(float2);
+float3 __ovld __cnfn floor(float3);
+float4 __ovld __cnfn floor(float4);
+float8 __ovld __cnfn floor(float8);
+float16 __ovld __cnfn floor(float16);
+#ifdef cl_khr_fp64
+double __ovld __cnfn floor(double);
+double2 __ovld __cnfn floor(double2);
+double3 __ovld __cnfn floor(double3);
+double4 __ovld __cnfn floor(double4);
+double8 __ovld __cnfn floor(double8);
+double16 __ovld __cnfn floor(double16);
+#endif //cl_khr_fp64
+#ifdef cl_khr_fp16
+half __ovld __cnfn floor(half);
+half2 __ovld __cnfn floor(half2);
+half3 __ovld __cnfn floor(half3);
+half4 __ovld __cnfn floor(half4);
+half8 __ovld __cnfn floor(half8);
+half16 __ovld __cnfn floor(half16);
+#endif //cl_khr_fp16
+
+/**
+ * Returns the correctly rounded floating-point
+ * representation of the sum of c with the infinitely
+ * precise product of a and b. Rounding of
+ * intermediate products shall not occur. Edge case
+ * behavior is per the IEEE 754-2008 standard.
+ */
+float __ovld __cnfn fma(float a, float b, float c);
+float2 __ovld __cnfn fma(float2 a, float2 b, float2 c);
+float3 __ovld __cnfn fma(float3 a, float3 b, float3 c);
+float4 __ovld __cnfn fma(float4 a, float4 b, float4 c);
+float8 __ovld __cnfn fma(float8 a, float8 b, float8 c);
+float16 __ovld __cnfn fma(float16 a, float16 b, float16 c);
+#ifdef cl_khr_fp64
+double __ovld __cnfn fma(double a, double b, double c);
+double2 __ovld __cnfn fma(double2 a, double2 b, double2 c);
+double3 __ovld __cnfn fma(double3 a, double3 b, double3 c);
+double4 __ovld __cnfn fma(double4 a, double4 b, double4 c);
+double8 __ovld __cnfn fma(double8 a, double8 b, double8 c);
+double16 __ovld __cnfn fma(double16 a, double16 b, double16 c);
+#endif //cl_khr_fp64
+#ifdef cl_khr_fp16
+half __ovld __cnfn fma(half a, half b, half c);
+half2 __ovld __cnfn fma(half2 a, half2 b, half2 c);
+half3 __ovld __cnfn fma(half3 a, half3 b, half3 c);
+half4 __ovld __cnfn fma(half4 a, half4 b, half4 c);
+half8 __ovld __cnfn fma(half8 a, half8 b, half8 c);
+half16 __ovld __cnfn fma(half16 a, half16 b, half16 c);
+#endif //cl_khr_fp16
+
+/**
+ * Returns y if x < y, otherwise it returns x. If one
+ * argument is a NaN, fmax() returns the other
+ * argument. If both arguments are NaNs, fmax()
+ * returns a NaN.
+ */
+float __ovld __cnfn fmax(float x, float y);
+float2 __ovld __cnfn fmax(float2 x, float2 y);
+float3 __ovld __cnfn fmax(float3 x, float3 y);
+float4 __ovld __cnfn fmax(float4 x, float4 y);
+float8 __ovld __cnfn fmax(float8 x, float8 y);
+float16 __ovld __cnfn fmax(float16 x, float16 y);
+float2 __ovld __cnfn fmax(float2 x, float y);
+float3 __ovld __cnfn fmax(float3 x, float y);
+float4 __ovld __cnfn fmax(float4 x, float y);
+float8 __ovld __cnfn fmax(float8 x, float y);
+float16 __ovld __cnfn fmax(float16 x, float y);
+#ifdef cl_khr_fp64
+double __ovld __cnfn fmax(double x, double y);
+double2 __ovld __cnfn fmax(double2 x, double2 y);
+double3 __ovld __cnfn fmax(double3 x, double3 y);
+double4 __ovld __cnfn fmax(double4 x, double4 y);
+double8 __ovld __cnfn fmax(double8 x, double8 y);
+double16 __ovld __cnfn fmax(double16 x, double16 y);
+double2 __ovld __cnfn fmax(double2 x, double y);
+double3 __ovld __cnfn fmax(double3 x, double y);
+double4 __ovld __cnfn fmax(double4 x, double y);
+double8 __ovld __cnfn fmax(double8 x, double y);
+double16 __ovld __cnfn fmax(double16 x, double y);
+#endif //cl_khr_fp64
+#ifdef cl_khr_fp16
+half __ovld __cnfn fmax(half x, half y);
+half2 __ovld __cnfn fmax(half2 x, half2 y);
+half3 __ovld __cnfn fmax(half3 x, half3 y);
+half4 __ovld __cnfn fmax(half4 x, half4 y);
+half8 __ovld __cnfn fmax(half8 x, half8 y);
+half16 __ovld __cnfn fmax(half16 x, half16 y);
+half2 __ovld __cnfn fmax(half2 x, half y);
+half3 __ovld __cnfn fmax(half3 x, half y);
+half4 __ovld __cnfn fmax(half4 x, half y);
+half8 __ovld __cnfn fmax(half8 x, half y);
+half16 __ovld __cnfn fmax(half16 x, half y);
+#endif //cl_khr_fp16
+
+/**
+ * Returns y if y < x, otherwise it returns x. If one
+ * argument is a NaN, fmin() returns the other
+ * argument. If both arguments are NaNs, fmin()
+ * returns a NaN.
+ */
+float __ovld __cnfn fmin(float x, float y);
+float2 __ovld __cnfn fmin(float2 x, float2 y);
+float3 __ovld __cnfn fmin(float3 x, float3 y);
+float4 __ovld __cnfn fmin(float4 x, float4 y);
+float8 __ovld __cnfn fmin(float8 x, float8 y);
+float16 __ovld __cnfn fmin(float16 x, float16 y);
+float2 __ovld __cnfn fmin(float2 x, float y);
+float3 __ovld __cnfn fmin(float3 x, float y);
+float4 __ovld __cnfn fmin(float4 x, float y);
+float8 __ovld __cnfn fmin(float8 x, float y);
+float16 __ovld __cnfn fmin(float16 x, float y);
+#ifdef cl_khr_fp64
+double __ovld __cnfn fmin(double x, double y);
+double2 __ovld __cnfn fmin(double2 x, double2 y);
+double3 __ovld __cnfn fmin(double3 x, double3 y);
+double4 __ovld __cnfn fmin(double4 x, double4 y);
+double8 __ovld __cnfn fmin(double8 x, double8 y);
+double16 __ovld __cnfn fmin(double16 x, double16 y);
+double2 __ovld __cnfn fmin(double2 x, double y);
+double3 __ovld __cnfn fmin(double3 x, double y);
+double4 __ovld __cnfn fmin(double4 x, double y);
+double8 __ovld __cnfn fmin(double8 x, double y);
+double16 __ovld __cnfn fmin(double16 x, double y);
+#endif //cl_khr_fp64
+#ifdef cl_khr_fp16
+half __ovld __cnfn fmin(half x, half y);
+half2 __ovld __cnfn fmin(half2 x, half2 y);
+half3 __ovld __cnfn fmin(half3 x, half3 y);
+half4 __ovld __cnfn fmin(half4 x, half4 y);
+half8 __ovld __cnfn fmin(half8 x, half8 y);
+half16 __ovld __cnfn fmin(half16 x, half16 y);
+half2 __ovld __cnfn fmin(half2 x, half y);
+half3 __ovld __cnfn fmin(half3 x, half y);
+half4 __ovld __cnfn fmin(half4 x, half y);
+half8 __ovld __cnfn fmin(half8 x, half y);
+half16 __ovld __cnfn fmin(half16 x, half y);
+#endif //cl_khr_fp16
+
+/**
+ * Modulus. Returns x - y * trunc (x/y).
+ */
+float __ovld __cnfn fmod(float x, float y);
+float2 __ovld __cnfn fmod(float2 x, float2 y);
+float3 __ovld __cnfn fmod(float3 x, float3 y);
+float4 __ovld __cnfn fmod(float4 x, float4 y);
+float8 __ovld __cnfn fmod(float8 x, float8 y);
+float16 __ovld __cnfn fmod(float16 x, float16 y);
+#ifdef cl_khr_fp64
+double __ovld __cnfn fmod(double x, double y);
+double2 __ovld __cnfn fmod(double2 x, double2 y);
+double3 __ovld __cnfn fmod(double3 x, double3 y);
+double4 __ovld __cnfn fmod(double4 x, double4 y);
+double8 __ovld __cnfn fmod(double8 x, double8 y);
+double16 __ovld __cnfn fmod(double16 x, double16 y);
+#endif //cl_khr_fp64
+#ifdef cl_khr_fp16
+half __ovld __cnfn fmod(half x, half y);
+half2 __ovld __cnfn fmod(half2 x, half2 y);
+half3 __ovld __cnfn fmod(half3 x, half3 y);
+half4 __ovld __cnfn fmod(half4 x, half4 y);
+half8 __ovld __cnfn fmod(half8 x, half8 y);
+half16 __ovld __cnfn fmod(half16 x, half16 y);
+#endif //cl_khr_fp16
+
+/**
+ * Returns fmin(x - floor (x), 0x1.fffffep-1f ).
+ * floor(x) is returned in iptr.
+ */
+#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0
+float __ovld fract(float x, float *iptr);
+float2 __ovld fract(float2 x, float2 *iptr);
+float3 __ovld fract(float3 x, float3 *iptr);
+float4 __ovld fract(float4 x, float4 *iptr);
+float8 __ovld fract(float8 x, float8 *iptr);
+float16 __ovld fract(float16 x, float16 *iptr);
+#ifdef cl_khr_fp64
+double __ovld fract(double x, double *iptr);
+double2 __ovld fract(double2 x, double2 *iptr);
+double3 __ovld fract(double3 x, double3 *iptr);
+double4 __ovld fract(double4 x, double4 *iptr);
+double8 __ovld fract(double8 x, double8 *iptr);
+double16 __ovld fract(double16 x, double16 *iptr);
+#endif //cl_khr_fp64
+#ifdef cl_khr_fp16
+half __ovld fract(half x, half *iptr);
+half2 __ovld fract(half2 x, half2 *iptr);
+half3 __ovld fract(half3 x, half3 *iptr);
+half4 __ovld fract(half4 x, half4 *iptr);
+half8 __ovld fract(half8 x, half8 *iptr);
+half16 __ovld fract(half16 x, half16 *iptr);
+#endif //cl_khr_fp16
+#else
+float __ovld fract(float x, __global float *iptr);
+float2 __ovld fract(float2 x, __global float2 *iptr);
+float3 __ovld fract(float3 x, __global float3 *iptr);
+float4 __ovld fract(float4 x, __global float4 *iptr);
+float8 __ovld fract(float8 x, __global float8 *iptr);
+float16 __ovld fract(float16 x, __global float16 *iptr);
+float __ovld fract(float x, __local float *iptr);
+float2 __ovld fract(float2 x, __local float2 *iptr);
+float3 __ovld fract(float3 x, __local float3 *iptr);
+float4 __ovld fract(float4 x, __local float4 *iptr);
+float8 __ovld fract(float8 x, __local float8 *iptr);
+float16 __ovld fract(float16 x, __local float16 *iptr);
+float __ovld fract(float x, __private float *iptr);
+float2 __ovld fract(float2 x, __private float2 *iptr);
+float3 __ovld fract(float3 x, __private float3 *iptr);
+float4 __ovld fract(float4 x, __private float4 *iptr);
+float8 __ovld fract(float8 x, __private float8 *iptr);
+float16 __ovld fract(float16 x, __private float16 *iptr);
+#ifdef cl_khr_fp64
+double __ovld fract(double x, __global double *iptr);
+double2 __ovld fract(double2 x, __global double2 *iptr);
+double3 __ovld fract(double3 x, __global double3 *iptr);
+double4 __ovld fract(double4 x, __global double4 *iptr);
+double8 __ovld fract(double8 x, __global double8 *iptr);
+double16 __ovld fract(double16 x, __global double16 *iptr);
+double __ovld fract(double x, __local double *iptr);
+double2 __ovld fract(double2 x, __local double2 *iptr);
+double3 __ovld fract(double3 x, __local double3 *iptr);
+double4 __ovld fract(double4 x, __local double4 *iptr);
+double8 __ovld fract(double8 x, __local double8 *iptr);
+double16 __ovld fract(double16 x, __local double16 *iptr);
+double __ovld fract(double x, __private double *iptr);
+double2 __ovld fract(double2 x, __private double2 *iptr);
+double3 __ovld fract(double3 x, __private double3 *iptr);
+double4 __ovld fract(double4 x, __private double4 *iptr);
+double8 __ovld fract(double8 x, __private double8 *iptr);
+double16 __ovld fract(double16 x, __private double16 *iptr);
+#endif //cl_khr_fp64
+#ifdef cl_khr_fp16
+half __ovld fract(half x, __global half *iptr);
+half2 __ovld fract(half2 x, __global half2 *iptr);
+half3 __ovld fract(half3 x, __global half3 *iptr);
+half4 __ovld fract(half4 x, __global half4 *iptr);
+half8 __ovld fract(half8 x, __global half8 *iptr);
+half16 __ovld fract(half16 x, __global half16 *iptr);
+half __ovld fract(half x, __local half *iptr);
+half2 __ovld fract(half2 x, __local half2 *iptr);
+half3 __ovld fract(half3 x, __local half3 *iptr);
+half4 __ovld fract(half4 x, __local half4 *iptr);
+half8 __ovld fract(half8 x, __local half8 *iptr);
+half16 __ovld fract(half16 x, __local half16 *iptr);
+half __ovld fract(half x, __private half *iptr);
+half2 __ovld fract(half2 x, __private half2 *iptr);
+half3 __ovld fract(half3 x, __private half3 *iptr);
+half4 __ovld fract(half4 x, __private half4 *iptr);
+half8 __ovld fract(half8 x, __private half8 *iptr);
+half16 __ovld fract(half16 x, __private half16 *iptr);
+#endif //cl_khr_fp16
+#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0
+
+/**
+ * Extract mantissa and exponent from x. For each
+ * component the mantissa returned is a float with
+ * magnitude in the interval [1/2, 1) or 0. Each
+ * component of x equals mantissa returned * 2^exp.
+ */
+#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0
+float __ovld frexp(float x, int *exp);
+float2 __ovld frexp(float2 x, int2 *exp);
+float3 __ovld frexp(float3 x, int3 *exp);
+float4 __ovld frexp(float4 x, int4 *exp);
+float8 __ovld frexp(float8 x, int8 *exp);
+float16 __ovld frexp(float16 x, int16 *exp);
+#ifdef cl_khr_fp64
+double __ovld frexp(double x, int *exp);
+double2 __ovld frexp(double2 x, int2 *exp);
+double3 __ovld frexp(double3 x, int3 *exp);
+double4 __ovld frexp(double4 x, int4 *exp);
+double8 __ovld frexp(double8 x, int8 *exp);
+double16 __ovld frexp(double16 x, int16 *exp);
+#endif //cl_khr_fp64
+#ifdef cl_khr_fp16
+half __ovld frexp(half x, int *exp);
+half2 __ovld frexp(half2 x, int2 *exp);
+half3 __ovld frexp(half3 x, int3 *exp);
+half4 __ovld frexp(half4 x, int4 *exp);
+half8 __ovld frexp(half8 x, int8 *exp);
+half16 __ovld frexp(half16 x, int16 *exp);
+#endif //cl_khr_fp16
+#else
+float __ovld frexp(float x, __global int *exp);
+float2 __ovld frexp(float2 x, __global int2 *exp);
+float3 __ovld frexp(float3 x, __global int3 *exp);
+float4 __ovld frexp(float4 x, __global int4 *exp);
+float8 __ovld frexp(float8 x, __global int8 *exp);
+float16 __ovld frexp(float16 x, __global int16 *exp);
+float __ovld frexp(float x, __local int *exp);
+float2 __ovld frexp(float2 x, __local int2 *exp);
+float3 __ovld frexp(float3 x, __local int3 *exp);
+float4 __ovld frexp(float4 x, __local int4 *exp);
+float8 __ovld frexp(float8 x, __local int8 *exp);
+float16 __ovld frexp(float16 x, __local int16 *exp);
+float __ovld frexp(float x, __private int *exp);
+float2 __ovld frexp(float2 x, __private int2 *exp);
+float3 __ovld frexp(float3 x, __private int3 *exp);
+float4 __ovld frexp(float4 x, __private int4 *exp);
+float8 __ovld frexp(float8 x, __private int8 *exp);
+float16 __ovld frexp(float16 x, __private int16 *exp);
+#ifdef cl_khr_fp64
+double __ovld frexp(double x, __global int *exp);
+double2 __ovld frexp(double2 x, __global int2 *exp);
+double3 __ovld frexp(double3 x, __global int3 *exp);
+double4 __ovld frexp(double4 x, __global int4 *exp);
+double8 __ovld frexp(double8 x, __global int8 *exp);
+double16 __ovld frexp(double16 x, __global int16 *exp);
+double __ovld frexp(double x, __local int *exp);
+double2 __ovld frexp(double2 x, __local int2 *exp);
+double3 __ovld frexp(double3 x, __local int3 *exp);
+double4 __ovld frexp(double4 x, __local int4 *exp);
+double8 __ovld frexp(double8 x, __local int8 *exp);
+double16 __ovld frexp(double16 x, __local int16 *exp);
+double __ovld frexp(double x, __private int *exp);
+double2 __ovld frexp(double2 x, __private int2 *exp);
+double3 __ovld frexp(double3 x, __private int3 *exp);
+double4 __ovld frexp(double4 x, __private int4 *exp);
+double8 __ovld frexp(double8 x, __private int8 *exp);
+double16 __ovld frexp(double16 x, __private int16 *exp);
+#endif //cl_khr_fp64
+#ifdef cl_khr_fp16
+half __ovld frexp(half x, __global int *exp);
+half2 __ovld frexp(half2 x, __global int2 *exp);
+half3 __ovld frexp(half3 x, __global int3 *exp);
+half4 __ovld frexp(half4 x, __global int4 *exp);
+half8 __ovld frexp(half8 x, __global int8 *exp);
+half16 __ovld frexp(half16 x, __global int16 *exp);
+half __ovld frexp(half x, __local int *exp);
+half2 __ovld frexp(half2 x, __local int2 *exp);
+half3 __ovld frexp(half3 x, __local int3 *exp);
+half4 __ovld frexp(half4 x, __local int4 *exp);
+half8 __ovld frexp(half8 x, __local int8 *exp);
+half16 __ovld frexp(half16 x, __local int16 *exp);
+half __ovld frexp(half x, __private int *exp);
+half2 __ovld frexp(half2 x, __private int2 *exp);
+half3 __ovld frexp(half3 x, __private int3 *exp);
+half4 __ovld frexp(half4 x, __private int4 *exp);
+half8 __ovld frexp(half8 x, __private int8 *exp);
+half16 __ovld frexp(half16 x, __private int16 *exp);
+#endif //cl_khr_fp16
+#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0
+
+/**
+ * Compute the value of the square root of x^2 + y^2
+ * without undue overflow or underflow.
+ */
+float __ovld __cnfn hypot(float x, float y);
+float2 __ovld __cnfn hypot(float2 x, float2 y);
+float3 __ovld __cnfn hypot(float3 x, float3 y);
+float4 __ovld __cnfn hypot(float4 x, float4 y);
+float8 __ovld __cnfn hypot(float8 x, float8 y);
+float16 __ovld __cnfn hypot(float16 x, float16 y);
+#ifdef cl_khr_fp64
+double __ovld __cnfn hypot(double x, double y);
+double2 __ovld __cnfn hypot(double2 x, double2 y);
+double3 __ovld __cnfn hypot(double3 x, double3 y);
+double4 __ovld __cnfn hypot(double4 x, double4 y);
+double8 __ovld __cnfn hypot(double8 x, double8 y);
+double16 __ovld __cnfn hypot(double16 x, double16 y);
+#endif //cl_khr_fp64
+#ifdef cl_khr_fp16
+half __ovld __cnfn hypot(half x, half y);
+half2 __ovld __cnfn hypot(half2 x, half2 y);
+half3 __ovld __cnfn hypot(half3 x, half3 y);
+half4 __ovld __cnfn hypot(half4 x, half4 y);
+half8 __ovld __cnfn hypot(half8 x, half8 y);
+half16 __ovld __cnfn hypot(half16 x, half16 y);
+#endif //cl_khr_fp16
+
+/**
+ * Return the exponent as an integer value.
+ */
+int __ovld __cnfn ilogb(float x);
+int2 __ovld __cnfn ilogb(float2 x);
+int3 __ovld __cnfn ilogb(float3 x);
+int4 __ovld __cnfn ilogb(float4 x);
+int8 __ovld __cnfn ilogb(float8 x);
+int16 __ovld __cnfn ilogb(float16 x);
+#ifdef cl_khr_fp64
+int __ovld __cnfn ilogb(double x);
+int2 __ovld __cnfn ilogb(double2 x);
+int3 __ovld __cnfn ilogb(double3 x);
+int4 __ovld __cnfn ilogb(double4 x);
+int8 __ovld __cnfn ilogb(double8 x);
+int16 __ovld __cnfn ilogb(double16 x);
+#endif //cl_khr_fp64
+#ifdef cl_khr_fp16
+int __ovld __cnfn ilogb(half x);
+int2 __ovld __cnfn ilogb(half2 x);
+int3 __ovld __cnfn ilogb(half3 x);
+int4 __ovld __cnfn ilogb(half4 x);
+int8 __ovld __cnfn ilogb(half8 x);
+int16 __ovld __cnfn ilogb(half16 x);
+#endif //cl_khr_fp16
+
+/**
+ * Multiply x by 2 to the power n.
+ */
+float __ovld __cnfn ldexp(float x, int n);
+float2 __ovld __cnfn ldexp(float2 x, int2 n);
+float3 __ovld __cnfn ldexp(float3 x, int3 n);
+float4 __ovld __cnfn ldexp(float4 x, int4 n);
+float8 __ovld __cnfn ldexp(float8 x, int8 n);
+float16 __ovld __cnfn ldexp(float16 x, int16 n);
+float2 __ovld __cnfn ldexp(float2 x, int n);
+float3 __ovld __cnfn ldexp(float3 x, int n);
+float4 __ovld __cnfn ldexp(float4 x, int n);
+float8 __ovld __cnfn ldexp(float8 x, int n);
+float16 __ovld __cnfn ldexp(float16 x, int n);
+#ifdef cl_khr_fp64
+double __ovld __cnfn ldexp(double x, int n);
+double2 __ovld __cnfn ldexp(double2 x, int2 n);
+double3 __ovld __cnfn ldexp(double3 x, int3 n);
+double4 __ovld __cnfn ldexp(double4 x, int4 n);
+double8 __ovld __cnfn ldexp(double8 x, int8 n);
+double16 __ovld __cnfn ldexp(double16 x, int16 n);
+double2 __ovld __cnfn ldexp(double2 x, int n);
+double3 __ovld __cnfn ldexp(double3 x, int n);
+double4 __ovld __cnfn ldexp(double4 x, int n);
+double8 __ovld __cnfn ldexp(double8 x, int n);
+double16 __ovld __cnfn ldexp(double16 x, int n);
+#endif //cl_khr_fp64
+#ifdef cl_khr_fp16
+half __ovld __cnfn ldexp(half x, int n);
+half2 __ovld __cnfn ldexp(half2 x, int2 n);
+half3 __ovld __cnfn ldexp(half3 x, int3 n);
+half4 __ovld __cnfn ldexp(half4 x, int4 n);
+half8 __ovld __cnfn ldexp(half8 x, int8 n);
+half16 __ovld __cnfn ldexp(half16 x, int16 n);
+half2 __ovld __cnfn ldexp(half2 x, int n);
+half3 __ovld __cnfn ldexp(half3 x, int n);
+half4 __ovld __cnfn ldexp(half4 x, int n);
+half8 __ovld __cnfn ldexp(half8 x, int n);
+half16 __ovld __cnfn ldexp(half16 x, int n);
+#endif //cl_khr_fp16
+
+/**
+ * Log gamma function. Returns the natural
+ * logarithm of the absolute value of the gamma
+ * function. The sign of the gamma function is
+ * returned in the signp argument of lgamma_r.
+ */
+float __ovld __cnfn lgamma(float x);
+float2 __ovld __cnfn lgamma(float2 x);
+float3 __ovld __cnfn lgamma(float3 x);
+float4 __ovld __cnfn lgamma(float4 x);
+float8 __ovld __cnfn lgamma(float8 x);
+float16 __ovld __cnfn lgamma(float16 x);
+#ifdef cl_khr_fp64
+double __ovld __cnfn lgamma(double x);
+double2 __ovld __cnfn lgamma(double2 x);
+double3 __ovld __cnfn lgamma(double3 x);
+double4 __ovld __cnfn lgamma(double4 x);
+double8 __ovld __cnfn lgamma(double8 x);
+double16 __ovld __cnfn lgamma(double16 x);
+#endif //cl_khr_fp64
+#ifdef cl_khr_fp16
+half __ovld __cnfn lgamma(half x);
+half2 __ovld __cnfn lgamma(half2 x);
+half3 __ovld __cnfn lgamma(half3 x);
+half4 __ovld __cnfn lgamma(half4 x);
+half8 __ovld __cnfn lgamma(half8 x);
+half16 __ovld __cnfn lgamma(half16 x);
+#endif //cl_khr_fp16
+
+#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0
+float __ovld lgamma_r(float x, int *signp);
+float2 __ovld lgamma_r(float2 x, int2 *signp);
+float3 __ovld lgamma_r(float3 x, int3 *signp);
+float4 __ovld lgamma_r(float4 x, int4 *signp);
+float8 __ovld lgamma_r(float8 x, int8 *signp);
+float16 __ovld lgamma_r(float16 x, int16 *signp);
+#ifdef cl_khr_fp64
+double __ovld lgamma_r(double x, int *signp);
+double2 __ovld lgamma_r(double2 x, int2 *signp);
+double3 __ovld lgamma_r(double3 x, int3 *signp);
+double4 __ovld lgamma_r(double4 x, int4 *signp);
+double8 __ovld lgamma_r(double8 x, int8 *signp);
+double16 __ovld lgamma_r(double16 x, int16 *signp);
+#endif //cl_khr_fp64
+#ifdef cl_khr_fp16
+half __ovld lgamma_r(half x, int *signp);
+half2 __ovld lgamma_r(half2 x, int2 *signp);
+half3 __ovld lgamma_r(half3 x, int3 *signp);
+half4 __ovld lgamma_r(half4 x, int4 *signp);
+half8 __ovld lgamma_r(half8 x, int8 *signp);
+half16 __ovld lgamma_r(half16 x, int16 *signp);
+#endif //cl_khr_fp16
+#else
+float __ovld lgamma_r(float x, __global int *signp);
+float2 __ovld lgamma_r(float2 x, __global int2 *signp);
+float3 __ovld lgamma_r(float3 x, __global int3 *signp);
+float4 __ovld lgamma_r(float4 x, __global int4 *signp);
+float8 __ovld lgamma_r(float8 x, __global int8 *signp);
+float16 __ovld lgamma_r(float16 x, __global int16 *signp);
+float __ovld lgamma_r(float x, __local int *signp);
+float2 __ovld lgamma_r(float2 x, __local int2 *signp);
+float3 __ovld lgamma_r(float3 x, __local int3 *signp);
+float4 __ovld lgamma_r(float4 x, __local int4 *signp);
+float8 __ovld lgamma_r(float8 x, __local int8 *signp);
+float16 __ovld lgamma_r(float16 x, __local int16 *signp);
+float __ovld lgamma_r(float x, __private int *signp);
+float2 __ovld lgamma_r(float2 x, __private int2 *signp);
+float3 __ovld lgamma_r(float3 x, __private int3 *signp);
+float4 __ovld lgamma_r(float4 x, __private int4 *signp);
+float8 __ovld lgamma_r(float8 x, __private int8 *signp);
+float16 __ovld lgamma_r(float16 x, __private int16 *signp);
+#ifdef cl_khr_fp64
+double __ovld lgamma_r(double x, __global int *signp);
+double2 __ovld lgamma_r(double2 x, __global int2 *signp);
+double3 __ovld lgamma_r(double3 x, __global int3 *signp);
+double4 __ovld lgamma_r(double4 x, __global int4 *signp);
+double8 __ovld lgamma_r(double8 x, __global int8 *signp);
+double16 __ovld lgamma_r(double16 x, __global int16 *signp);
+double __ovld lgamma_r(double x, __local int *signp);
+double2 __ovld lgamma_r(double2 x, __local int2 *signp);
+double3 __ovld lgamma_r(double3 x, __local int3 *signp);
+double4 __ovld lgamma_r(double4 x, __local int4 *signp);
+double8 __ovld lgamma_r(double8 x, __local int8 *signp);
+double16 __ovld lgamma_r(double16 x, __local int16 *signp);
+double __ovld lgamma_r(double x, __private int *signp);
+double2 __ovld lgamma_r(double2 x, __private int2 *signp);
+double3 __ovld lgamma_r(double3 x, __private int3 *signp);
+double4 __ovld lgamma_r(double4 x, __private int4 *signp);
+double8 __ovld lgamma_r(double8 x, __private int8 *signp);
+double16 __ovld lgamma_r(double16 x, __private int16 *signp);
+#endif //cl_khr_fp64
+#ifdef cl_khr_fp16
+half __ovld lgamma_r(half x, __global int *signp);
+half2 __ovld lgamma_r(half2 x, __global int2 *signp);
+half3 __ovld lgamma_r(half3 x, __global int3 *signp);
+half4 __ovld lgamma_r(half4 x, __global int4 *signp);
+half8 __ovld lgamma_r(half8 x, __global int8 *signp);
+half16 __ovld lgamma_r(half16 x, __global int16 *signp);
+half __ovld lgamma_r(half x, __local int *signp);
+half2 __ovld lgamma_r(half2 x, __local int2 *signp);
+half3 __ovld lgamma_r(half3 x, __local int3 *signp);
+half4 __ovld lgamma_r(half4 x, __local int4 *signp);
+half8 __ovld lgamma_r(half8 x, __local int8 *signp);
+half16 __ovld lgamma_r(half16 x, __local int16 *signp);
+half __ovld lgamma_r(half x, __private int *signp);
+half2 __ovld lgamma_r(half2 x, __private int2 *signp);
+half3 __ovld lgamma_r(half3 x, __private int3 *signp);
+half4 __ovld lgamma_r(half4 x, __private int4 *signp);
+half8 __ovld lgamma_r(half8 x, __private int8 *signp);
+half16 __ovld lgamma_r(half16 x, __private int16 *signp);
+#endif //cl_khr_fp16
+#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0
+
+/**
+ * Compute natural logarithm.
+ */
+float __ovld __cnfn log(float);
+float2 __ovld __cnfn log(float2);
+float3 __ovld __cnfn log(float3);
+float4 __ovld __cnfn log(float4);
+float8 __ovld __cnfn log(float8);
+float16 __ovld __cnfn log(float16);
+#ifdef cl_khr_fp64
+double __ovld __cnfn log(double);
+double2 __ovld __cnfn log(double2);
+double3 __ovld __cnfn log(double3);
+double4 __ovld __cnfn log(double4);
+double8 __ovld __cnfn log(double8);
+double16 __ovld __cnfn log(double16);
+#endif //cl_khr_fp64
+#ifdef cl_khr_fp16
+half __ovld __cnfn log(half);
+half2 __ovld __cnfn log(half2);
+half3 __ovld __cnfn log(half3);
+half4 __ovld __cnfn log(half4);
+half8 __ovld __cnfn log(half8);
+half16 __ovld __cnfn log(half16);
+#endif //cl_khr_fp16
+
+/**
+ * Compute a base 2 logarithm.
+ */
+float __ovld __cnfn log2(float);
+float2 __ovld __cnfn log2(float2);
+float3 __ovld __cnfn log2(float3);
+float4 __ovld __cnfn log2(float4);
+float8 __ovld __cnfn log2(float8);
+float16 __ovld __cnfn log2(float16);
+#ifdef cl_khr_fp64
+double __ovld __cnfn log2(double);
+double2 __ovld __cnfn log2(double2);
+double3 __ovld __cnfn log2(double3);
+double4 __ovld __cnfn log2(double4);
+double8 __ovld __cnfn log2(double8);
+double16 __ovld __cnfn log2(double16);
+#endif //cl_khr_fp64
+#ifdef cl_khr_fp16
+half __ovld __cnfn log2(half);
+half2 __ovld __cnfn log2(half2);
+half3 __ovld __cnfn log2(half3);
+half4 __ovld __cnfn log2(half4);
+half8 __ovld __cnfn log2(half8);
+half16 __ovld __cnfn log2(half16);
+#endif //cl_khr_fp16
+
+/**
+ * Compute a base 10 logarithm.
+ */
+float __ovld __cnfn log10(float);
+float2 __ovld __cnfn log10(float2);
+float3 __ovld __cnfn log10(float3);
+float4 __ovld __cnfn log10(float4);
+float8 __ovld __cnfn log10(float8);
+float16 __ovld __cnfn log10(float16);
+#ifdef cl_khr_fp64
+double __ovld __cnfn log10(double);
+double2 __ovld __cnfn log10(double2);
+double3 __ovld __cnfn log10(double3);
+double4 __ovld __cnfn log10(double4);
+double8 __ovld __cnfn log10(double8);
+double16 __ovld __cnfn log10(double16);
+#endif //cl_khr_fp64
+#ifdef cl_khr_fp16
+half __ovld __cnfn log10(half);
+half2 __ovld __cnfn log10(half2);
+half3 __ovld __cnfn log10(half3);
+half4 __ovld __cnfn log10(half4);
+half8 __ovld __cnfn log10(half8);
+half16 __ovld __cnfn log10(half16);
+#endif //cl_khr_fp16
+
+/**
+ * Compute a base e logarithm of (1.0 + x).
+ */
+float __ovld __cnfn log1p(float x);
+float2 __ovld __cnfn log1p(float2 x);
+float3 __ovld __cnfn log1p(float3 x);
+float4 __ovld __cnfn log1p(float4 x);
+float8 __ovld __cnfn log1p(float8 x);
+float16 __ovld __cnfn log1p(float16 x);
+#ifdef cl_khr_fp64
+double __ovld __cnfn log1p(double x);
+double2 __ovld __cnfn log1p(double2 x);
+double3 __ovld __cnfn log1p(double3 x);
+double4 __ovld __cnfn log1p(double4 x);
+double8 __ovld __cnfn log1p(double8 x);
+double16 __ovld __cnfn log1p(double16 x);
+#endif //cl_khr_fp64
+#ifdef cl_khr_fp16
+half __ovld __cnfn log1p(half x);
+half2 __ovld __cnfn log1p(half2 x);
+half3 __ovld __cnfn log1p(half3 x);
+half4 __ovld __cnfn log1p(half4 x);
+half8 __ovld __cnfn log1p(half8 x);
+half16 __ovld __cnfn log1p(half16 x);
+#endif //cl_khr_fp16
+
+/**
+ * Compute the exponent of x, which is the integral
+ * part of logr | x |.
+ */
+float __ovld __cnfn logb(float x);
+float2 __ovld __cnfn logb(float2 x);
+float3 __ovld __cnfn logb(float3 x);
+float4 __ovld __cnfn logb(float4 x);
+float8 __ovld __cnfn logb(float8 x);
+float16 __ovld __cnfn logb(float16 x);
+#ifdef cl_khr_fp64
+double __ovld __cnfn logb(double x);
+double2 __ovld __cnfn logb(double2 x);
+double3 __ovld __cnfn logb(double3 x);
+double4 __ovld __cnfn logb(double4 x);
+double8 __ovld __cnfn logb(double8 x);
+double16 __ovld __cnfn logb(double16 x);
+#endif //cl_khr_fp64
+#ifdef cl_khr_fp16
+half __ovld __cnfn logb(half x);
+half2 __ovld __cnfn logb(half2 x);
+half3 __ovld __cnfn logb(half3 x);
+half4 __ovld __cnfn logb(half4 x);
+half8 __ovld __cnfn logb(half8 x);
+half16 __ovld __cnfn logb(half16 x);
+#endif //cl_khr_fp16
+
+/**
+ * mad approximates a * b + c. Whether or how the
+ * product of a * b is rounded and how supernormal or
+ * subnormal intermediate products are handled is not
+ * defined. mad is intended to be used where speed is
+ * preferred over accuracy.
+ */
+float __ovld __cnfn mad(float a, float b, float c);
+float2 __ovld __cnfn mad(float2 a, float2 b, float2 c);
+float3 __ovld __cnfn mad(float3 a, float3 b, float3 c);
+float4 __ovld __cnfn mad(float4 a, float4 b, float4 c);
+float8 __ovld __cnfn mad(float8 a, float8 b, float8 c);
+float16 __ovld __cnfn mad(float16 a, float16 b, float16 c);
+#ifdef cl_khr_fp64
+double __ovld __cnfn mad(double a, double b, double c);
+double2 __ovld __cnfn mad(double2 a, double2 b, double2 c);
+double3 __ovld __cnfn mad(double3 a, double3 b, double3 c);
+double4 __ovld __cnfn mad(double4 a, double4 b, double4 c);
+double8 __ovld __cnfn mad(double8 a, double8 b, double8 c);
+double16 __ovld __cnfn mad(double16 a, double16 b, double16 c);
+#endif //cl_khr_fp64
+#ifdef cl_khr_fp16
+half __ovld __cnfn mad(half a, half b, half c);
+half2 __ovld __cnfn mad(half2 a, half2 b, half2 c);
+half3 __ovld __cnfn mad(half3 a, half3 b, half3 c);
+half4 __ovld __cnfn mad(half4 a, half4 b, half4 c);
+half8 __ovld __cnfn mad(half8 a, half8 b, half8 c);
+half16 __ovld __cnfn mad(half16 a, half16 b, half16 c);
+#endif //cl_khr_fp16
+
+/**
+ * Returns x if | x | > | y |, y if | y | > | x |, otherwise
+ * fmax(x, y).
+ */
+float __ovld __cnfn maxmag(float x, float y);
+float2 __ovld __cnfn maxmag(float2 x, float2 y);
+float3 __ovld __cnfn maxmag(float3 x, float3 y);
+float4 __ovld __cnfn maxmag(float4 x, float4 y);
+float8 __ovld __cnfn maxmag(float8 x, float8 y);
+float16 __ovld __cnfn maxmag(float16 x, float16 y);
+#ifdef cl_khr_fp64
+double __ovld __cnfn maxmag(double x, double y);
+double2 __ovld __cnfn maxmag(double2 x, double2 y);
+double3 __ovld __cnfn maxmag(double3 x, double3 y);
+double4 __ovld __cnfn maxmag(double4 x, double4 y);
+double8 __ovld __cnfn maxmag(double8 x, double8 y);
+double16 __ovld __cnfn maxmag(double16 x, double16 y);
+#endif //cl_khr_fp64
+#ifdef cl_khr_fp16
+half __ovld __cnfn maxmag(half x, half y);
+half2 __ovld __cnfn maxmag(half2 x, half2 y);
+half3 __ovld __cnfn maxmag(half3 x, half3 y);
+half4 __ovld __cnfn maxmag(half4 x, half4 y);
+half8 __ovld __cnfn maxmag(half8 x, half8 y);
+half16 __ovld __cnfn maxmag(half16 x, half16 y);
+#endif //cl_khr_fp16
+
+/**
+ * Returns x if | x | < | y |, y if | y | < | x |, otherwise
+ * fmin(x, y).
+ */
+float __ovld __cnfn minmag(float x, float y);
+float2 __ovld __cnfn minmag(float2 x, float2 y);
+float3 __ovld __cnfn minmag(float3 x, float3 y);
+float4 __ovld __cnfn minmag(float4 x, float4 y);
+float8 __ovld __cnfn minmag(float8 x, float8 y);
+float16 __ovld __cnfn minmag(float16 x, float16 y);
+#ifdef cl_khr_fp64
+double __ovld __cnfn minmag(double x, double y);
+double2 __ovld __cnfn minmag(double2 x, double2 y);
+double3 __ovld __cnfn minmag(double3 x, double3 y);
+double4 __ovld __cnfn minmag(double4 x, double4 y);
+double8 __ovld __cnfn minmag(double8 x, double8 y);
+double16 __ovld __cnfn minmag(double16 x, double16 y);
+#endif //cl_khr_fp64
+#ifdef cl_khr_fp16
+half __ovld __cnfn minmag(half x, half y);
+half2 __ovld __cnfn minmag(half2 x, half2 y);
+half3 __ovld __cnfn minmag(half3 x, half3 y);
+half4 __ovld __cnfn minmag(half4 x, half4 y);
+half8 __ovld __cnfn minmag(half8 x, half8 y);
+half16 __ovld __cnfn minmag(half16 x, half16 y);
+#endif //cl_khr_fp16
+
+/**
+ * Decompose a floating-point number. The modf
+ * function breaks the argument x into integral and
+ * fractional parts, each of which has the same sign as
+ * the argument. It stores the integral part in the object
+ * pointed to by iptr.
+ */
+#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0
+float __ovld modf(float x, float *iptr);
+float2 __ovld modf(float2 x, float2 *iptr);
+float3 __ovld modf(float3 x, float3 *iptr);
+float4 __ovld modf(float4 x, float4 *iptr);
+float8 __ovld modf(float8 x, float8 *iptr);
+float16 __ovld modf(float16 x, float16 *iptr);
+#ifdef cl_khr_fp64
+double __ovld modf(double x, double *iptr);
+double2 __ovld modf(double2 x, double2 *iptr);
+double3 __ovld modf(double3 x, double3 *iptr);
+double4 __ovld modf(double4 x, double4 *iptr);
+double8 __ovld modf(double8 x, double8 *iptr);
+double16 __ovld modf(double16 x, double16 *iptr);
+#endif //cl_khr_fp64
+#ifdef cl_khr_fp16
+half __ovld modf(half x, half *iptr);
+half2 __ovld modf(half2 x, half2 *iptr);
+half3 __ovld modf(half3 x, half3 *iptr);
+half4 __ovld modf(half4 x, half4 *iptr);
+half8 __ovld modf(half8 x, half8 *iptr);
+half16 __ovld modf(half16 x, half16 *iptr);
+#endif //cl_khr_fp16
+#else
+float __ovld modf(float x, __global float *iptr);
+float2 __ovld modf(float2 x, __global float2 *iptr);
+float3 __ovld modf(float3 x, __global float3 *iptr);
+float4 __ovld modf(float4 x, __global float4 *iptr);
+float8 __ovld modf(float8 x, __global float8 *iptr);
+float16 __ovld modf(float16 x, __global float16 *iptr);
+float __ovld modf(float x, __local float *iptr);
+float2 __ovld modf(float2 x, __local float2 *iptr);
+float3 __ovld modf(float3 x, __local float3 *iptr);
+float4 __ovld modf(float4 x, __local float4 *iptr);
+float8 __ovld modf(float8 x, __local float8 *iptr);
+float16 __ovld modf(float16 x, __local float16 *iptr);
+float __ovld modf(float x, __private float *iptr);
+float2 __ovld modf(float2 x, __private float2 *iptr);
+float3 __ovld modf(float3 x, __private float3 *iptr);
+float4 __ovld modf(float4 x, __private float4 *iptr);
+float8 __ovld modf(float8 x, __private float8 *iptr);
+float16 __ovld modf(float16 x, __private float16 *iptr);
+#ifdef cl_khr_fp64
+double __ovld modf(double x, __global double *iptr);
+double2 __ovld modf(double2 x, __global double2 *iptr);
+double3 __ovld modf(double3 x, __global double3 *iptr);
+double4 __ovld modf(double4 x, __global double4 *iptr);
+double8 __ovld modf(double8 x, __global double8 *iptr);
+double16 __ovld modf(double16 x, __global double16 *iptr);
+double __ovld modf(double x, __local double *iptr);
+double2 __ovld modf(double2 x, __local double2 *iptr);
+double3 __ovld modf(double3 x, __local double3 *iptr);
+double4 __ovld modf(double4 x, __local double4 *iptr);
+double8 __ovld modf(double8 x, __local double8 *iptr);
+double16 __ovld modf(double16 x, __local double16 *iptr);
+double __ovld modf(double x, __private double *iptr);
+double2 __ovld modf(double2 x, __private double2 *iptr);
+double3 __ovld modf(double3 x, __private double3 *iptr);
+double4 __ovld modf(double4 x, __private double4 *iptr);
+double8 __ovld modf(double8 x, __private double8 *iptr);
+double16 __ovld modf(double16 x, __private double16 *iptr);
+#endif //cl_khr_fp64
+#ifdef cl_khr_fp16
+half __ovld modf(half x, __global half *iptr);
+half2 __ovld modf(half2 x, __global half2 *iptr);
+half3 __ovld modf(half3 x, __global half3 *iptr);
+half4 __ovld modf(half4 x, __global half4 *iptr);
+half8 __ovld modf(half8 x, __global half8 *iptr);
+half16 __ovld modf(half16 x, __global half16 *iptr);
+half __ovld modf(half x, __local half *iptr);
+half2 __ovld modf(half2 x, __local half2 *iptr);
+half3 __ovld modf(half3 x, __local half3 *iptr);
+half4 __ovld modf(half4 x, __local half4 *iptr);
+half8 __ovld modf(half8 x, __local half8 *iptr);
+half16 __ovld modf(half16 x, __local half16 *iptr);
+half __ovld modf(half x, __private half *iptr);
+half2 __ovld modf(half2 x, __private half2 *iptr);
+half3 __ovld modf(half3 x, __private half3 *iptr);
+half4 __ovld modf(half4 x, __private half4 *iptr);
+half8 __ovld modf(half8 x, __private half8 *iptr);
+half16 __ovld modf(half16 x, __private half16 *iptr);
+#endif //cl_khr_fp16
+#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0
+
+/**
+ * Returns a quiet NaN. The nancode may be placed
+ * in the significand of the resulting NaN.
+ */
+float __ovld __cnfn nan(uint nancode);
+float2 __ovld __cnfn nan(uint2 nancode);
+float3 __ovld __cnfn nan(uint3 nancode);
+float4 __ovld __cnfn nan(uint4 nancode);
+float8 __ovld __cnfn nan(uint8 nancode);
+float16 __ovld __cnfn nan(uint16 nancode);
+#ifdef cl_khr_fp64
+double __ovld __cnfn nan(ulong nancode);
+double2 __ovld __cnfn nan(ulong2 nancode);
+double3 __ovld __cnfn nan(ulong3 nancode);
+double4 __ovld __cnfn nan(ulong4 nancode);
+double8 __ovld __cnfn nan(ulong8 nancode);
+double16 __ovld __cnfn nan(ulong16 nancode);
+#endif //cl_khr_fp64
+#ifdef cl_khr_fp16
+half __ovld __cnfn nan(ushort nancode);
+half2 __ovld __cnfn nan(ushort2 nancode);
+half3 __ovld __cnfn nan(ushort3 nancode);
+half4 __ovld __cnfn nan(ushort4 nancode);
+half8 __ovld __cnfn nan(ushort8 nancode);
+half16 __ovld __cnfn nan(ushort16 nancode);
+#endif //cl_khr_fp16
+
+/**
+ * Computes the next representable single-precision
+ * floating-point value following x in the direction of
+ * y. Thus, if y is less than x, nextafter() returns the
+ * largest representable floating-point number less
+ * than x.
+ */
+float __ovld __cnfn nextafter(float x, float y);
+float2 __ovld __cnfn nextafter(float2 x, float2 y);
+float3 __ovld __cnfn nextafter(float3 x, float3 y);
+float4 __ovld __cnfn nextafter(float4 x, float4 y);
+float8 __ovld __cnfn nextafter(float8 x, float8 y);
+float16 __ovld __cnfn nextafter(float16 x, float16 y);
+#ifdef cl_khr_fp64
+double __ovld __cnfn nextafter(double x, double y);
+double2 __ovld __cnfn nextafter(double2 x, double2 y);
+double3 __ovld __cnfn nextafter(double3 x, double3 y);
+double4 __ovld __cnfn nextafter(double4 x, double4 y);
+double8 __ovld __cnfn nextafter(double8 x, double8 y);
+double16 __ovld __cnfn nextafter(double16 x, double16 y);
+#endif //cl_khr_fp64
+#ifdef cl_khr_fp16
+half __ovld __cnfn nextafter(half x, half y);
+half2 __ovld __cnfn nextafter(half2 x, half2 y);
+half3 __ovld __cnfn nextafter(half3 x, half3 y);
+half4 __ovld __cnfn nextafter(half4 x, half4 y);
+half8 __ovld __cnfn nextafter(half8 x, half8 y);
+half16 __ovld __cnfn nextafter(half16 x, half16 y);
+#endif //cl_khr_fp16
+
+/**
+ * Compute x to the power y.
+ */
+float __ovld __cnfn pow(float x, float y);
+float2 __ovld __cnfn pow(float2 x, float2 y);
+float3 __ovld __cnfn pow(float3 x, float3 y);
+float4 __ovld __cnfn pow(float4 x, float4 y);
+float8 __ovld __cnfn pow(float8 x, float8 y);
+float16 __ovld __cnfn pow(float16 x, float16 y);
+#ifdef cl_khr_fp64
+double __ovld __cnfn pow(double x, double y);
+double2 __ovld __cnfn pow(double2 x, double2 y);
+double3 __ovld __cnfn pow(double3 x, double3 y);
+double4 __ovld __cnfn pow(double4 x, double4 y);
+double8 __ovld __cnfn pow(double8 x, double8 y);
+double16 __ovld __cnfn pow(double16 x, double16 y);
+#endif //cl_khr_fp64
+#ifdef cl_khr_fp16
+half __ovld __cnfn pow(half x, half y);
+half2 __ovld __cnfn pow(half2 x, half2 y);
+half3 __ovld __cnfn pow(half3 x, half3 y);
+half4 __ovld __cnfn pow(half4 x, half4 y);
+half8 __ovld __cnfn pow(half8 x, half8 y);
+half16 __ovld __cnfn pow(half16 x, half16 y);
+#endif //cl_khr_fp16
+
+/**
+ * Compute x to the power y, where y is an integer.
+ */
+float __ovld __cnfn pown(float x, int y);
+float2 __ovld __cnfn pown(float2 x, int2 y);
+float3 __ovld __cnfn pown(float3 x, int3 y);
+float4 __ovld __cnfn pown(float4 x, int4 y);
+float8 __ovld __cnfn pown(float8 x, int8 y);
+float16 __ovld __cnfn pown(float16 x, int16 y);
+#ifdef cl_khr_fp64
+double __ovld __cnfn pown(double x, int y);
+double2 __ovld __cnfn pown(double2 x, int2 y);
+double3 __ovld __cnfn pown(double3 x, int3 y);
+double4 __ovld __cnfn pown(double4 x, int4 y);
+double8 __ovld __cnfn pown(double8 x, int8 y);
+double16 __ovld __cnfn pown(double16 x, int16 y);
+#endif //cl_khr_fp64
+#ifdef cl_khr_fp16
+half __ovld __cnfn pown(half x, int y);
+half2 __ovld __cnfn pown(half2 x, int2 y);
+half3 __ovld __cnfn pown(half3 x, int3 y);
+half4 __ovld __cnfn pown(half4 x, int4 y);
+half8 __ovld __cnfn pown(half8 x, int8 y);
+half16 __ovld __cnfn pown(half16 x, int16 y);
+#endif //cl_khr_fp16
+
+/**
+ * Compute x to the power y, where x is >= 0.
+ */
+float __ovld __cnfn powr(float x, float y);
+float2 __ovld __cnfn powr(float2 x, float2 y);
+float3 __ovld __cnfn powr(float3 x, float3 y);
+float4 __ovld __cnfn powr(float4 x, float4 y);
+float8 __ovld __cnfn powr(float8 x, float8 y);
+float16 __ovld __cnfn powr(float16 x, float16 y);
+#ifdef cl_khr_fp64
+double __ovld __cnfn powr(double x, double y);
+double2 __ovld __cnfn powr(double2 x, double2 y);
+double3 __ovld __cnfn powr(double3 x, double3 y);
+double4 __ovld __cnfn powr(double4 x, double4 y);
+double8 __ovld __cnfn powr(double8 x, double8 y);
+double16 __ovld __cnfn powr(double16 x, double16 y);
+#endif //cl_khr_fp64
+#ifdef cl_khr_fp16
+half __ovld __cnfn powr(half x, half y);
+half2 __ovld __cnfn powr(half2 x, half2 y);
+half3 __ovld __cnfn powr(half3 x, half3 y);
+half4 __ovld __cnfn powr(half4 x, half4 y);
+half8 __ovld __cnfn powr(half8 x, half8 y);
+half16 __ovld __cnfn powr(half16 x, half16 y);
+#endif //cl_khr_fp16
+
+/**
+ * Compute the value r such that r = x - n*y, where n
+ * is the integer nearest the exact value of x/y. If there
+ * are two integers closest to x/y, n shall be the even
+ * one. If r is zero, it is given the same sign as x.
+ */
+float __ovld __cnfn remainder(float x, float y);
+float2 __ovld __cnfn remainder(float2 x, float2 y);
+float3 __ovld __cnfn remainder(float3 x, float3 y);
+float4 __ovld __cnfn remainder(float4 x, float4 y);
+float8 __ovld __cnfn remainder(float8 x, float8 y);
+float16 __ovld __cnfn remainder(float16 x, float16 y);
+#ifdef cl_khr_fp64
+double __ovld __cnfn remainder(double x, double y);
+double2 __ovld __cnfn remainder(double2 x, double2 y);
+double3 __ovld __cnfn remainder(double3 x, double3 y);
+double4 __ovld __cnfn remainder(double4 x, double4 y);
+double8 __ovld __cnfn remainder(double8 x, double8 y);
+double16 __ovld __cnfn remainder(double16 x, double16 y);
+#endif //cl_khr_fp64
+#ifdef cl_khr_fp16
+half __ovld __cnfn remainder(half x, half y);
+half2 __ovld __cnfn remainder(half2 x, half2 y);
+half3 __ovld __cnfn remainder(half3 x, half3 y);
+half4 __ovld __cnfn remainder(half4 x, half4 y);
+half8 __ovld __cnfn remainder(half8 x, half8 y);
+half16 __ovld __cnfn remainder(half16 x, half16 y);
+#endif //cl_khr_fp16
+
+/**
+ * The remquo function computes the value r such
+ * that r = x - n*y, where n is the integer nearest the
+ * exact value of x/y. If there are two integers closest
+ * to x/y, n shall be the even one. If r is zero, it is
+ * given the same sign as x. This is the same value
+ * that is returned by the remainder function.
+ * remquo also calculates the lower seven bits of the
+ * integral quotient x/y, and gives that value the same
+ * sign as x/y. It stores this signed value in the object
+ * pointed to by quo.
+ */
+#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0
+float __ovld remquo(float x, float y, int *quo);
+float2 __ovld remquo(float2 x, float2 y, int2 *quo);
+float3 __ovld remquo(float3 x, float3 y, int3 *quo);
+float4 __ovld remquo(float4 x, float4 y, int4 *quo);
+float8 __ovld remquo(float8 x, float8 y, int8 *quo);
+float16 __ovld remquo(float16 x, float16 y, int16 *quo);
+#ifdef cl_khr_fp64
+double __ovld remquo(double x, double y, int *quo);
+double2 __ovld remquo(double2 x, double2 y, int2 *quo);
+double3 __ovld remquo(double3 x, double3 y, int3 *quo);
+double4 __ovld remquo(double4 x, double4 y, int4 *quo);
+double8 __ovld remquo(double8 x, double8 y, int8 *quo);
+double16 __ovld remquo(double16 x, double16 y, int16 *quo);
+#endif //cl_khr_fp64
+#ifdef cl_khr_fp16
+half __ovld remquo(half x, half y, int *quo);
+half2 __ovld remquo(half2 x, half2 y, int2 *quo);
+half3 __ovld remquo(half3 x, half3 y, int3 *quo);
+half4 __ovld remquo(half4 x, half4 y, int4 *quo);
+half8 __ovld remquo(half8 x, half8 y, int8 *quo);
+half16 __ovld remquo(half16 x, half16 y, int16 *quo);
+
+#endif //cl_khr_fp16
+#else
+float __ovld remquo(float x, float y, __global int *quo);
+float2 __ovld remquo(float2 x, float2 y, __global int2 *quo);
+float3 __ovld remquo(float3 x, float3 y, __global int3 *quo);
+float4 __ovld remquo(float4 x, float4 y, __global int4 *quo);
+float8 __ovld remquo(float8 x, float8 y, __global int8 *quo);
+float16 __ovld remquo(float16 x, float16 y, __global int16 *quo);
+float __ovld remquo(float x, float y, __local int *quo);
+float2 __ovld remquo(float2 x, float2 y, __local int2 *quo);
+float3 __ovld remquo(float3 x, float3 y, __local int3 *quo);
+float4 __ovld remquo(float4 x, float4 y, __local int4 *quo);
+float8 __ovld remquo(float8 x, float8 y, __local int8 *quo);
+float16 __ovld remquo(float16 x, float16 y, __local int16 *quo);
+float __ovld remquo(float x, float y, __private int *quo);
+float2 __ovld remquo(float2 x, float2 y, __private int2 *quo);
+float3 __ovld remquo(float3 x, float3 y, __private int3 *quo);
+float4 __ovld remquo(float4 x, float4 y, __private int4 *quo);
+float8 __ovld remquo(float8 x, float8 y, __private int8 *quo);
+float16 __ovld remquo(float16 x, float16 y, __private int16 *quo);
+#ifdef cl_khr_fp64
+double __ovld remquo(double x, double y, __global int *quo);
+double2 __ovld remquo(double2 x, double2 y, __global int2 *quo);
+double3 __ovld remquo(double3 x, double3 y, __global int3 *quo);
+double4 __ovld remquo(double4 x, double4 y, __global int4 *quo);
+double8 __ovld remquo(double8 x, double8 y, __global int8 *quo);
+double16 __ovld remquo(double16 x, double16 y, __global int16 *quo);
+double __ovld remquo(double x, double y, __local int *quo);
+double2 __ovld remquo(double2 x, double2 y, __local int2 *quo);
+double3 __ovld remquo(double3 x, double3 y, __local int3 *quo);
+double4 __ovld remquo(double4 x, double4 y, __local int4 *quo);
+double8 __ovld remquo(double8 x, double8 y, __local int8 *quo);
+double16 __ovld remquo(double16 x, double16 y, __local int16 *quo);
+double __ovld remquo(double x, double y, __private int *quo);
+double2 __ovld remquo(double2 x, double2 y, __private int2 *quo);
+double3 __ovld remquo(double3 x, double3 y, __private int3 *quo);
+double4 __ovld remquo(double4 x, double4 y, __private int4 *quo);
+double8 __ovld remquo(double8 x, double8 y, __private int8 *quo);
+double16 __ovld remquo(double16 x, double16 y, __private int16 *quo);
+#endif //cl_khr_fp64
+#ifdef cl_khr_fp16
+half __ovld remquo(half x, half y, __global int *quo);
+half2 __ovld remquo(half2 x, half2 y, __global int2 *quo);
+half3 __ovld remquo(half3 x, half3 y, __global int3 *quo);
+half4 __ovld remquo(half4 x, half4 y, __global int4 *quo);
+half8 __ovld remquo(half8 x, half8 y, __global int8 *quo);
+half16 __ovld remquo(half16 x, half16 y, __global int16 *quo);
+half __ovld remquo(half x, half y, __local int *quo);
+half2 __ovld remquo(half2 x, half2 y, __local int2 *quo);
+half3 __ovld remquo(half3 x, half3 y, __local int3 *quo);
+half4 __ovld remquo(half4 x, half4 y, __local int4 *quo);
+half8 __ovld remquo(half8 x, half8 y, __local int8 *quo);
+half16 __ovld remquo(half16 x, half16 y, __local int16 *quo);
+half __ovld remquo(half x, half y, __private int *quo);
+half2 __ovld remquo(half2 x, half2 y, __private int2 *quo);
+half3 __ovld remquo(half3 x, half3 y, __private int3 *quo);
+half4 __ovld remquo(half4 x, half4 y, __private int4 *quo);
+half8 __ovld remquo(half8 x, half8 y, __private int8 *quo);
+half16 __ovld remquo(half16 x, half16 y, __private int16 *quo);
+#endif //cl_khr_fp16
+#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0
+/**
+ * Round to integral value (using round to nearest
+ * even rounding mode) in floating-point format.
+ * Refer to section 7.1 for description of rounding
+ * modes.
+ */
+float __ovld __cnfn rint(float);
+float2 __ovld __cnfn rint(float2);
+float3 __ovld __cnfn rint(float3);
+float4 __ovld __cnfn rint(float4);
+float8 __ovld __cnfn rint(float8);
+float16 __ovld __cnfn rint(float16);
+#ifdef cl_khr_fp64
+double __ovld __cnfn rint(double);
+double2 __ovld __cnfn rint(double2);
+double3 __ovld __cnfn rint(double3);
+double4 __ovld __cnfn rint(double4);
+double8 __ovld __cnfn rint(double8);
+double16 __ovld __cnfn rint(double16);
+#endif //cl_khr_fp64
+#ifdef cl_khr_fp16
+half __ovld __cnfn rint(half);
+half2 __ovld __cnfn rint(half2);
+half3 __ovld __cnfn rint(half3);
+half4 __ovld __cnfn rint(half4);
+half8 __ovld __cnfn rint(half8);
+half16 __ovld __cnfn rint(half16);
+#endif //cl_khr_fp16
+
+/**
+ * Compute x to the power 1/y.
+ */
+float __ovld __cnfn rootn(float x, int y);
+float2 __ovld __cnfn rootn(float2 x, int2 y);
+float3 __ovld __cnfn rootn(float3 x, int3 y);
+float4 __ovld __cnfn rootn(float4 x, int4 y);
+float8 __ovld __cnfn rootn(float8 x, int8 y);
+float16 __ovld __cnfn rootn(float16 x, int16 y);
+#ifdef cl_khr_fp64
+double __ovld __cnfn rootn(double x, int y);
+double2 __ovld __cnfn rootn(double2 x, int2 y);
+double3 __ovld __cnfn rootn(double3 x, int3 y);
+double4 __ovld __cnfn rootn(double4 x, int4 y);
+double8 __ovld __cnfn rootn(double8 x, int8 y);
+double16 __ovld __cnfn rootn(double16 x, int16 y);
+#endif //cl_khr_fp64
+#ifdef cl_khr_fp16
+half __ovld __cnfn rootn(half x, int y);
+half2 __ovld __cnfn rootn(half2 x, int2 y);
+half3 __ovld __cnfn rootn(half3 x, int3 y);
+half4 __ovld __cnfn rootn(half4 x, int4 y);
+half8 __ovld __cnfn rootn(half8 x, int8 y);
+half16 __ovld __cnfn rootn(half16 x, int16 y);
+#endif //cl_khr_fp16
+
+/**
+ * Return the integral value nearest to x rounding
+ * halfway cases away from zero, regardless of the
+ * current rounding direction.
+ */
+float __ovld __cnfn round(float x);
+float2 __ovld __cnfn round(float2 x);
+float3 __ovld __cnfn round(float3 x);
+float4 __ovld __cnfn round(float4 x);
+float8 __ovld __cnfn round(float8 x);
+float16 __ovld __cnfn round(float16 x);
+#ifdef cl_khr_fp64
+double __ovld __cnfn round(double x);
+double2 __ovld __cnfn round(double2 x);
+double3 __ovld __cnfn round(double3 x);
+double4 __ovld __cnfn round(double4 x);
+double8 __ovld __cnfn round(double8 x);
+double16 __ovld __cnfn round(double16 x);
+#endif //cl_khr_fp64
+#ifdef cl_khr_fp16
+half __ovld __cnfn round(half x);
+half2 __ovld __cnfn round(half2 x);
+half3 __ovld __cnfn round(half3 x);
+half4 __ovld __cnfn round(half4 x);
+half8 __ovld __cnfn round(half8 x);
+half16 __ovld __cnfn round(half16 x);
+#endif //cl_khr_fp16
+
+/**
+ * Compute inverse square root.
+ */
+float __ovld __cnfn rsqrt(float);
+float2 __ovld __cnfn rsqrt(float2);
+float3 __ovld __cnfn rsqrt(float3);
+float4 __ovld __cnfn rsqrt(float4);
+float8 __ovld __cnfn rsqrt(float8);
+float16 __ovld __cnfn rsqrt(float16);
+#ifdef cl_khr_fp64
+double __ovld __cnfn rsqrt(double);
+double2 __ovld __cnfn rsqrt(double2);
+double3 __ovld __cnfn rsqrt(double3);
+double4 __ovld __cnfn rsqrt(double4);
+double8 __ovld __cnfn rsqrt(double8);
+double16 __ovld __cnfn rsqrt(double16);
+#endif //cl_khr_fp64
+#ifdef cl_khr_fp16
+half __ovld __cnfn rsqrt(half);
+half2 __ovld __cnfn rsqrt(half2);
+half3 __ovld __cnfn rsqrt(half3);
+half4 __ovld __cnfn rsqrt(half4);
+half8 __ovld __cnfn rsqrt(half8);
+half16 __ovld __cnfn rsqrt(half16);
+#endif //cl_khr_fp16
+
+/**
+ * Compute sine.
+ */
+float __ovld __cnfn sin(float);
+float2 __ovld __cnfn sin(float2);
+float3 __ovld __cnfn sin(float3);
+float4 __ovld __cnfn sin(float4);
+float8 __ovld __cnfn sin(float8);
+float16 __ovld __cnfn sin(float16);
+#ifdef cl_khr_fp64
+double __ovld __cnfn sin(double);
+double2 __ovld __cnfn sin(double2);
+double3 __ovld __cnfn sin(double3);
+double4 __ovld __cnfn sin(double4);
+double8 __ovld __cnfn sin(double8);
+double16 __ovld __cnfn sin(double16);
+#endif //cl_khr_fp64
+#ifdef cl_khr_fp16
+half __ovld __cnfn sin(half);
+half2 __ovld __cnfn sin(half2);
+half3 __ovld __cnfn sin(half3);
+half4 __ovld __cnfn sin(half4);
+half8 __ovld __cnfn sin(half8);
+half16 __ovld __cnfn sin(half16);
+#endif //cl_khr_fp16
+
+/**
+ * Compute sine and cosine of x. The computed sine
+ * is the return value and computed cosine is returned
+ * in cosval.
+ */
+#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0
+float __ovld sincos(float x, float *cosval);
+float2 __ovld sincos(float2 x, float2 *cosval);
+float3 __ovld sincos(float3 x, float3 *cosval);
+float4 __ovld sincos(float4 x, float4 *cosval);
+float8 __ovld sincos(float8 x, float8 *cosval);
+float16 __ovld sincos(float16 x, float16 *cosval);
+#ifdef cl_khr_fp64
+double __ovld sincos(double x, double *cosval);
+double2 __ovld sincos(double2 x, double2 *cosval);
+double3 __ovld sincos(double3 x, double3 *cosval);
+double4 __ovld sincos(double4 x, double4 *cosval);
+double8 __ovld sincos(double8 x, double8 *cosval);
+double16 __ovld sincos(double16 x, double16 *cosval);
+#endif //cl_khr_fp64
+#ifdef cl_khr_fp16
+half __ovld sincos(half x, half *cosval);
+half2 __ovld sincos(half2 x, half2 *cosval);
+half3 __ovld sincos(half3 x, half3 *cosval);
+half4 __ovld sincos(half4 x, half4 *cosval);
+half8 __ovld sincos(half8 x, half8 *cosval);
+half16 __ovld sincos(half16 x, half16 *cosval);
+#endif //cl_khr_fp16
+#else
+float __ovld sincos(float x, __global float *cosval);
+float2 __ovld sincos(float2 x, __global float2 *cosval);
+float3 __ovld sincos(float3 x, __global float3 *cosval);
+float4 __ovld sincos(float4 x, __global float4 *cosval);
+float8 __ovld sincos(float8 x, __global float8 *cosval);
+float16 __ovld sincos(float16 x, __global float16 *cosval);
+float __ovld sincos(float x, __local float *cosval);
+float2 __ovld sincos(float2 x, __local float2 *cosval);
+float3 __ovld sincos(float3 x, __local float3 *cosval);
+float4 __ovld sincos(float4 x, __local float4 *cosval);
+float8 __ovld sincos(float8 x, __local float8 *cosval);
+float16 __ovld sincos(float16 x, __local float16 *cosval);
+float __ovld sincos(float x, __private float *cosval);
+float2 __ovld sincos(float2 x, __private float2 *cosval);
+float3 __ovld sincos(float3 x, __private float3 *cosval);
+float4 __ovld sincos(float4 x, __private float4 *cosval);
+float8 __ovld sincos(float8 x, __private float8 *cosval);
+float16 __ovld sincos(float16 x, __private float16 *cosval);
+#ifdef cl_khr_fp64
+double __ovld sincos(double x, __global double *cosval);
+double2 __ovld sincos(double2 x, __global double2 *cosval);
+double3 __ovld sincos(double3 x, __global double3 *cosval);
+double4 __ovld sincos(double4 x, __global double4 *cosval);
+double8 __ovld sincos(double8 x, __global double8 *cosval);
+double16 __ovld sincos(double16 x, __global double16 *cosval);
+double __ovld sincos(double x, __local double *cosval);
+double2 __ovld sincos(double2 x, __local double2 *cosval);
+double3 __ovld sincos(double3 x, __local double3 *cosval);
+double4 __ovld sincos(double4 x, __local double4 *cosval);
+double8 __ovld sincos(double8 x, __local double8 *cosval);
+double16 __ovld sincos(double16 x, __local double16 *cosval);
+double __ovld sincos(double x, __private double *cosval);
+double2 __ovld sincos(double2 x, __private double2 *cosval);
+double3 __ovld sincos(double3 x, __private double3 *cosval);
+double4 __ovld sincos(double4 x, __private double4 *cosval);
+double8 __ovld sincos(double8 x, __private double8 *cosval);
+double16 __ovld sincos(double16 x, __private double16 *cosval);
+#endif //cl_khr_fp64
+#ifdef cl_khr_fp16
+half __ovld sincos(half x, __global half *cosval);
+half2 __ovld sincos(half2 x, __global half2 *cosval);
+half3 __ovld sincos(half3 x, __global half3 *cosval);
+half4 __ovld sincos(half4 x, __global half4 *cosval);
+half8 __ovld sincos(half8 x, __global half8 *cosval);
+half16 __ovld sincos(half16 x, __global half16 *cosval);
+half __ovld sincos(half x, __local half *cosval);
+half2 __ovld sincos(half2 x, __local half2 *cosval);
+half3 __ovld sincos(half3 x, __local half3 *cosval);
+half4 __ovld sincos(half4 x, __local half4 *cosval);
+half8 __ovld sincos(half8 x, __local half8 *cosval);
+half16 __ovld sincos(half16 x, __local half16 *cosval);
+half __ovld sincos(half x, __private half *cosval);
+half2 __ovld sincos(half2 x, __private half2 *cosval);
+half3 __ovld sincos(half3 x, __private half3 *cosval);
+half4 __ovld sincos(half4 x, __private half4 *cosval);
+half8 __ovld sincos(half8 x, __private half8 *cosval);
+half16 __ovld sincos(half16 x, __private half16 *cosval);
+#endif //cl_khr_fp16
+#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0
+
+/**
+ * Compute hyperbolic sine.
+ */
+float __ovld __cnfn sinh(float);
+float2 __ovld __cnfn sinh(float2);
+float3 __ovld __cnfn sinh(float3);
+float4 __ovld __cnfn sinh(float4);
+float8 __ovld __cnfn sinh(float8);
+float16 __ovld __cnfn sinh(float16);
+#ifdef cl_khr_fp64
+double __ovld __cnfn sinh(double);
+double2 __ovld __cnfn sinh(double2);
+double3 __ovld __cnfn sinh(double3);
+double4 __ovld __cnfn sinh(double4);
+double8 __ovld __cnfn sinh(double8);
+double16 __ovld __cnfn sinh(double16);
+#endif //cl_khr_fp64
+#ifdef cl_khr_fp16
+half __ovld __cnfn sinh(half);
+half2 __ovld __cnfn sinh(half2);
+half3 __ovld __cnfn sinh(half3);
+half4 __ovld __cnfn sinh(half4);
+half8 __ovld __cnfn sinh(half8);
+half16 __ovld __cnfn sinh(half16);
+#endif //cl_khr_fp16
+
+/**
+ * Compute sin (PI * x).
+ */
+float __ovld __cnfn sinpi(float x);
+float2 __ovld __cnfn sinpi(float2 x);
+float3 __ovld __cnfn sinpi(float3 x);
+float4 __ovld __cnfn sinpi(float4 x);
+float8 __ovld __cnfn sinpi(float8 x);
+float16 __ovld __cnfn sinpi(float16 x);
+#ifdef cl_khr_fp64
+double __ovld __cnfn sinpi(double x);
+double2 __ovld __cnfn sinpi(double2 x);
+double3 __ovld __cnfn sinpi(double3 x);
+double4 __ovld __cnfn sinpi(double4 x);
+double8 __ovld __cnfn sinpi(double8 x);
+double16 __ovld __cnfn sinpi(double16 x);
+#endif //cl_khr_fp64
+#ifdef cl_khr_fp16
+half __ovld __cnfn sinpi(half x);
+half2 __ovld __cnfn sinpi(half2 x);
+half3 __ovld __cnfn sinpi(half3 x);
+half4 __ovld __cnfn sinpi(half4 x);
+half8 __ovld __cnfn sinpi(half8 x);
+half16 __ovld __cnfn sinpi(half16 x);
+#endif //cl_khr_fp16
+
+/**
+ * Compute square root.
+ */
+float __ovld __cnfn sqrt(float);
+float2 __ovld __cnfn sqrt(float2);
+float3 __ovld __cnfn sqrt(float3);
+float4 __ovld __cnfn sqrt(float4);
+float8 __ovld __cnfn sqrt(float8);
+float16 __ovld __cnfn sqrt(float16);
+#ifdef cl_khr_fp64
+double __ovld __cnfn sqrt(double);
+double2 __ovld __cnfn sqrt(double2);
+double3 __ovld __cnfn sqrt(double3);
+double4 __ovld __cnfn sqrt(double4);
+double8 __ovld __cnfn sqrt(double8);
+double16 __ovld __cnfn sqrt(double16);
+#endif //cl_khr_fp64
+#ifdef cl_khr_fp16
+half __ovld __cnfn sqrt(half);
+half2 __ovld __cnfn sqrt(half2);
+half3 __ovld __cnfn sqrt(half3);
+half4 __ovld __cnfn sqrt(half4);
+half8 __ovld __cnfn sqrt(half8);
+half16 __ovld __cnfn sqrt(half16);
+#endif //cl_khr_fp16
+
+/**
+ * Compute tangent.
+ */
+float __ovld __cnfn tan(float);
+float2 __ovld __cnfn tan(float2);
+float3 __ovld __cnfn tan(float3);
+float4 __ovld __cnfn tan(float4);
+float8 __ovld __cnfn tan(float8);
+float16 __ovld __cnfn tan(float16);
+#ifdef cl_khr_fp64
+double __ovld __cnfn tan(double);
+double2 __ovld __cnfn tan(double2);
+double3 __ovld __cnfn tan(double3);
+double4 __ovld __cnfn tan(double4);
+double8 __ovld __cnfn tan(double8);
+double16 __ovld __cnfn tan(double16);
+#endif //cl_khr_fp64
+#ifdef cl_khr_fp16
+half __ovld __cnfn tan(half);
+half2 __ovld __cnfn tan(half2);
+half3 __ovld __cnfn tan(half3);
+half4 __ovld __cnfn tan(half4);
+half8 __ovld __cnfn tan(half8);
+half16 __ovld __cnfn tan(half16);
+#endif //cl_khr_fp16
+
+/**
+ * Compute hyperbolic tangent.
+ */
+float __ovld __cnfn tanh(float);
+float2 __ovld __cnfn tanh(float2);
+float3 __ovld __cnfn tanh(float3);
+float4 __ovld __cnfn tanh(float4);
+float8 __ovld __cnfn tanh(float8);
+float16 __ovld __cnfn tanh(float16);
+#ifdef cl_khr_fp64
+double __ovld __cnfn tanh(double);
+double2 __ovld __cnfn tanh(double2);
+double3 __ovld __cnfn tanh(double3);
+double4 __ovld __cnfn tanh(double4);
+double8 __ovld __cnfn tanh(double8);
+double16 __ovld __cnfn tanh(double16);
+#endif //cl_khr_fp64
+#ifdef cl_khr_fp16
+half __ovld __cnfn tanh(half);
+half2 __ovld __cnfn tanh(half2);
+half3 __ovld __cnfn tanh(half3);
+half4 __ovld __cnfn tanh(half4);
+half8 __ovld __cnfn tanh(half8);
+half16 __ovld __cnfn tanh(half16);
+#endif //cl_khr_fp16
+
+/**
+ * Compute tan (PI * x).
+ */
+float __ovld __cnfn tanpi(float x);
+float2 __ovld __cnfn tanpi(float2 x);
+float3 __ovld __cnfn tanpi(float3 x);
+float4 __ovld __cnfn tanpi(float4 x);
+float8 __ovld __cnfn tanpi(float8 x);
+float16 __ovld __cnfn tanpi(float16 x);
+#ifdef cl_khr_fp64
+double __ovld __cnfn tanpi(double x);
+double2 __ovld __cnfn tanpi(double2 x);
+double3 __ovld __cnfn tanpi(double3 x);
+double4 __ovld __cnfn tanpi(double4 x);
+double8 __ovld __cnfn tanpi(double8 x);
+double16 __ovld __cnfn tanpi(double16 x);
+#endif //cl_khr_fp64
+#ifdef cl_khr_fp16
+half __ovld __cnfn tanpi(half x);
+half2 __ovld __cnfn tanpi(half2 x);
+half3 __ovld __cnfn tanpi(half3 x);
+half4 __ovld __cnfn tanpi(half4 x);
+half8 __ovld __cnfn tanpi(half8 x);
+half16 __ovld __cnfn tanpi(half16 x);
+#endif //cl_khr_fp16
+
+/**
+ * Compute the gamma function.
+ */
+float __ovld __cnfn tgamma(float);
+float2 __ovld __cnfn tgamma(float2);
+float3 __ovld __cnfn tgamma(float3);
+float4 __ovld __cnfn tgamma(float4);
+float8 __ovld __cnfn tgamma(float8);
+float16 __ovld __cnfn tgamma(float16);
+#ifdef cl_khr_fp64
+double __ovld __cnfn tgamma(double);
+double2 __ovld __cnfn tgamma(double2);
+double3 __ovld __cnfn tgamma(double3);
+double4 __ovld __cnfn tgamma(double4);
+double8 __ovld __cnfn tgamma(double8);
+double16 __ovld __cnfn tgamma(double16);
+#endif //cl_khr_fp64
+#ifdef cl_khr_fp16
+half __ovld __cnfn tgamma(half);
+half2 __ovld __cnfn tgamma(half2);
+half3 __ovld __cnfn tgamma(half3);
+half4 __ovld __cnfn tgamma(half4);
+half8 __ovld __cnfn tgamma(half8);
+half16 __ovld __cnfn tgamma(half16);
+#endif //cl_khr_fp16
+
+/**
+ * Round to integral value using the round to zero
+ * rounding mode.
+ */
+float __ovld __cnfn trunc(float);
+float2 __ovld __cnfn trunc(float2);
+float3 __ovld __cnfn trunc(float3);
+float4 __ovld __cnfn trunc(float4);
+float8 __ovld __cnfn trunc(float8);
+float16 __ovld __cnfn trunc(float16);
+#ifdef cl_khr_fp64
+double __ovld __cnfn trunc(double);
+double2 __ovld __cnfn trunc(double2);
+double3 __ovld __cnfn trunc(double3);
+double4 __ovld __cnfn trunc(double4);
+double8 __ovld __cnfn trunc(double8);
+double16 __ovld __cnfn trunc(double16);
+#endif //cl_khr_fp64
+#ifdef cl_khr_fp16
+half __ovld __cnfn trunc(half);
+half2 __ovld __cnfn trunc(half2);
+half3 __ovld __cnfn trunc(half3);
+half4 __ovld __cnfn trunc(half4);
+half8 __ovld __cnfn trunc(half8);
+half16 __ovld __cnfn trunc(half16);
+#endif //cl_khr_fp16
+
+/**
+ * Compute cosine. x must be in the range -2^16 ... +2^16.
+ */
+float __ovld __cnfn half_cos(float x);
+float2 __ovld __cnfn half_cos(float2 x);
+float3 __ovld __cnfn half_cos(float3 x);
+float4 __ovld __cnfn half_cos(float4 x);
+float8 __ovld __cnfn half_cos(float8 x);
+float16 __ovld __cnfn half_cos(float16 x);
+
+/**
+ * Compute x / y.
+ */
+float __ovld __cnfn half_divide(float x, float y);
+float2 __ovld __cnfn half_divide(float2 x, float2 y);
+float3 __ovld __cnfn half_divide(float3 x, float3 y);
+float4 __ovld __cnfn half_divide(float4 x, float4 y);
+float8 __ovld __cnfn half_divide(float8 x, float8 y);
+float16 __ovld __cnfn half_divide(float16 x, float16 y);
+
+/**
+ * Compute the base- e exponential of x.
+ */
+float __ovld __cnfn half_exp(float x);
+float2 __ovld __cnfn half_exp(float2 x);
+float3 __ovld __cnfn half_exp(float3 x);
+float4 __ovld __cnfn half_exp(float4 x);
+float8 __ovld __cnfn half_exp(float8 x);
+float16 __ovld __cnfn half_exp(float16 x);
+
+/**
+ * Compute the base- 2 exponential of x.
+ */
+float __ovld __cnfn half_exp2(float x);
+float2 __ovld __cnfn half_exp2(float2 x);
+float3 __ovld __cnfn half_exp2(float3 x);
+float4 __ovld __cnfn half_exp2(float4 x);
+float8 __ovld __cnfn half_exp2(float8 x);
+float16 __ovld __cnfn half_exp2(float16 x);
+
+/**
+ * Compute the base- 10 exponential of x.
+ */
+float __ovld __cnfn half_exp10(float x);
+float2 __ovld __cnfn half_exp10(float2 x);
+float3 __ovld __cnfn half_exp10(float3 x);
+float4 __ovld __cnfn half_exp10(float4 x);
+float8 __ovld __cnfn half_exp10(float8 x);
+float16 __ovld __cnfn half_exp10(float16 x);
+
+/**
+ * Compute natural logarithm.
+ */
+float __ovld __cnfn half_log(float x);
+float2 __ovld __cnfn half_log(float2 x);
+float3 __ovld __cnfn half_log(float3 x);
+float4 __ovld __cnfn half_log(float4 x);
+float8 __ovld __cnfn half_log(float8 x);
+float16 __ovld __cnfn half_log(float16 x);
+
+/**
+ * Compute a base 2 logarithm.
+ */
+float __ovld __cnfn half_log2(float x);
+float2 __ovld __cnfn half_log2(float2 x);
+float3 __ovld __cnfn half_log2(float3 x);
+float4 __ovld __cnfn half_log2(float4 x);
+float8 __ovld __cnfn half_log2(float8 x);
+float16 __ovld __cnfn half_log2(float16 x);
+
+/**
+ * Compute a base 10 logarithm.
+ */
+float __ovld __cnfn half_log10(float x);
+float2 __ovld __cnfn half_log10(float2 x);
+float3 __ovld __cnfn half_log10(float3 x);
+float4 __ovld __cnfn half_log10(float4 x);
+float8 __ovld __cnfn half_log10(float8 x);
+float16 __ovld __cnfn half_log10(float16 x);
+
+/**
+ * Compute x to the power y, where x is >= 0.
+ */
+float __ovld __cnfn half_powr(float x, float y);
+float2 __ovld __cnfn half_powr(float2 x, float2 y);
+float3 __ovld __cnfn half_powr(float3 x, float3 y);
+float4 __ovld __cnfn half_powr(float4 x, float4 y);
+float8 __ovld __cnfn half_powr(float8 x, float8 y);
+float16 __ovld __cnfn half_powr(float16 x, float16 y);
+
+/**
+ * Compute reciprocal.
+ */
+float __ovld __cnfn half_recip(float x);
+float2 __ovld __cnfn half_recip(float2 x);
+float3 __ovld __cnfn half_recip(float3 x);
+float4 __ovld __cnfn half_recip(float4 x);
+float8 __ovld __cnfn half_recip(float8 x);
+float16 __ovld __cnfn half_recip(float16 x);
+
+/**
+ * Compute inverse square root.
+ */
+float __ovld __cnfn half_rsqrt(float x);
+float2 __ovld __cnfn half_rsqrt(float2 x);
+float3 __ovld __cnfn half_rsqrt(float3 x);
+float4 __ovld __cnfn half_rsqrt(float4 x);
+float8 __ovld __cnfn half_rsqrt(float8 x);
+float16 __ovld __cnfn half_rsqrt(float16 x);
+
+/**
+ * Compute sine. x must be in the range -2^16 ... +2^16.
+ */
+float __ovld __cnfn half_sin(float x);
+float2 __ovld __cnfn half_sin(float2 x);
+float3 __ovld __cnfn half_sin(float3 x);
+float4 __ovld __cnfn half_sin(float4 x);
+float8 __ovld __cnfn half_sin(float8 x);
+float16 __ovld __cnfn half_sin(float16 x);
+
+/**
+ * Compute square root.
+ */
+float __ovld __cnfn half_sqrt(float x);
+float2 __ovld __cnfn half_sqrt(float2 x);
+float3 __ovld __cnfn half_sqrt(float3 x);
+float4 __ovld __cnfn half_sqrt(float4 x);
+float8 __ovld __cnfn half_sqrt(float8 x);
+float16 __ovld __cnfn half_sqrt(float16 x);
+
+/**
+ * Compute tangent. x must be in the range -216 ... +216.
+ */
+float __ovld __cnfn half_tan(float x);
+float2 __ovld __cnfn half_tan(float2 x);
+float3 __ovld __cnfn half_tan(float3 x);
+float4 __ovld __cnfn half_tan(float4 x);
+float8 __ovld __cnfn half_tan(float8 x);
+float16 __ovld __cnfn half_tan(float16 x);
+
+/**
+ * Compute cosine over an implementation-defined range.
+ * The maximum error is implementation-defined.
+ */
+float __ovld __cnfn native_cos(float x);
+float2 __ovld __cnfn native_cos(float2 x);
+float3 __ovld __cnfn native_cos(float3 x);
+float4 __ovld __cnfn native_cos(float4 x);
+float8 __ovld __cnfn native_cos(float8 x);
+float16 __ovld __cnfn native_cos(float16 x);
+#ifdef cl_khr_fp64
+double __ovld __cnfn native_cos(double x);
+double2 __ovld __cnfn native_cos(double2 x);
+double3 __ovld __cnfn native_cos(double3 x);
+double4 __ovld __cnfn native_cos(double4 x);
+double8 __ovld __cnfn native_cos(double8 x);
+double16 __ovld __cnfn native_cos(double16 x);
+#endif //cl_khr_fp64
+
+/**
+ * Compute x / y over an implementation-defined range.
+ * The maximum error is implementation-defined.
+ */
+float __ovld __cnfn native_divide(float x, float y);
+float2 __ovld __cnfn native_divide(float2 x, float2 y);
+float3 __ovld __cnfn native_divide(float3 x, float3 y);
+float4 __ovld __cnfn native_divide(float4 x, float4 y);
+float8 __ovld __cnfn native_divide(float8 x, float8 y);
+float16 __ovld __cnfn native_divide(float16 x, float16 y);
+#ifdef cl_khr_fp64
+double __ovld __cnfn native_divide(double x, double y);
+double2 __ovld __cnfn native_divide(double2 x, double2 y);
+double3 __ovld __cnfn native_divide(double3 x, double3 y);
+double4 __ovld __cnfn native_divide(double4 x, double4 y);
+double8 __ovld __cnfn native_divide(double8 x, double8 y);
+double16 __ovld __cnfn native_divide(double16 x, double16 y);
+#endif //cl_khr_fp64
+
+/**
+ * Compute the base- e exponential of x over an
+ * implementation-defined range. The maximum error is
+ * implementation-defined.
+ */
+float __ovld __cnfn native_exp(float x);
+float2 __ovld __cnfn native_exp(float2 x);
+float3 __ovld __cnfn native_exp(float3 x);
+float4 __ovld __cnfn native_exp(float4 x);
+float8 __ovld __cnfn native_exp(float8 x);
+float16 __ovld __cnfn native_exp(float16 x);
+#ifdef cl_khr_fp64
+double __ovld __cnfn native_exp(double x);
+double2 __ovld __cnfn native_exp(double2 x);
+double3 __ovld __cnfn native_exp(double3 x);
+double4 __ovld __cnfn native_exp(double4 x);
+double8 __ovld __cnfn native_exp(double8 x);
+double16 __ovld __cnfn native_exp(double16 x);
+#endif //cl_khr_fp64
+
+/**
+ * Compute the base- 2 exponential of x over an
+ * implementation-defined range. The maximum error is
+ * implementation-defined.
+ */
+float __ovld __cnfn native_exp2(float x);
+float2 __ovld __cnfn native_exp2(float2 x);
+float3 __ovld __cnfn native_exp2(float3 x);
+float4 __ovld __cnfn native_exp2(float4 x);
+float8 __ovld __cnfn native_exp2(float8 x);
+float16 __ovld __cnfn native_exp2(float16 x);
+#ifdef cl_khr_fp64
+double __ovld __cnfn native_exp2(double x);
+double2 __ovld __cnfn native_exp2(double2 x);
+double3 __ovld __cnfn native_exp2(double3 x);
+double4 __ovld __cnfn native_exp2(double4 x);
+double8 __ovld __cnfn native_exp2(double8 x);
+double16 __ovld __cnfn native_exp2(double16 x);
+#endif //cl_khr_fp64
+
+/**
+ * Compute the base- 10 exponential of x over an
+ * implementation-defined range. The maximum error is
+ * implementation-defined.
+ */
+float __ovld __cnfn native_exp10(float x);
+float2 __ovld __cnfn native_exp10(float2 x);
+float3 __ovld __cnfn native_exp10(float3 x);
+float4 __ovld __cnfn native_exp10(float4 x);
+float8 __ovld __cnfn native_exp10(float8 x);
+float16 __ovld __cnfn native_exp10(float16 x);
+#ifdef cl_khr_fp64
+double __ovld __cnfn native_exp10(double x);
+double2 __ovld __cnfn native_exp10(double2 x);
+double3 __ovld __cnfn native_exp10(double3 x);
+double4 __ovld __cnfn native_exp10(double4 x);
+double8 __ovld __cnfn native_exp10(double8 x);
+double16 __ovld __cnfn native_exp10(double16 x);
+#endif //cl_khr_fp64
+
+/**
+ * Compute natural logarithm over an implementationdefined
+ * range. The maximum error is implementation
+ * defined.
+ */
+float __ovld __cnfn native_log(float x);
+float2 __ovld __cnfn native_log(float2 x);
+float3 __ovld __cnfn native_log(float3 x);
+float4 __ovld __cnfn native_log(float4 x);
+float8 __ovld __cnfn native_log(float8 x);
+float16 __ovld __cnfn native_log(float16 x);
+#ifdef cl_khr_fp64
+double __ovld __cnfn native_log(double x);
+double2 __ovld __cnfn native_log(double2 x);
+double3 __ovld __cnfn native_log(double3 x);
+double4 __ovld __cnfn native_log(double4 x);
+double8 __ovld __cnfn native_log(double8 x);
+double16 __ovld __cnfn native_log(double16 x);
+#endif //cl_khr_fp64
+
+/**
+ * Compute a base 2 logarithm over an implementationdefined
+ * range. The maximum error is implementationdefined.
+ */
+float __ovld __cnfn native_log2(float x);
+float2 __ovld __cnfn native_log2(float2 x);
+float3 __ovld __cnfn native_log2(float3 x);
+float4 __ovld __cnfn native_log2(float4 x);
+float8 __ovld __cnfn native_log2(float8 x);
+float16 __ovld __cnfn native_log2(float16 x);
+#ifdef cl_khr_fp64
+double __ovld __cnfn native_log2(double x);
+double2 __ovld __cnfn native_log2(double2 x);
+double3 __ovld __cnfn native_log2(double3 x);
+double4 __ovld __cnfn native_log2(double4 x);
+double8 __ovld __cnfn native_log2(double8 x);
+double16 __ovld __cnfn native_log2(double16 x);
+#endif //cl_khr_fp64
+
+/**
+ * Compute a base 10 logarithm over an implementationdefined
+ * range. The maximum error is implementationdefined.
+ */
+float __ovld __cnfn native_log10(float x);
+float2 __ovld __cnfn native_log10(float2 x);
+float3 __ovld __cnfn native_log10(float3 x);
+float4 __ovld __cnfn native_log10(float4 x);
+float8 __ovld __cnfn native_log10(float8 x);
+float16 __ovld __cnfn native_log10(float16 x);
+#ifdef cl_khr_fp64
+double __ovld __cnfn native_log10(double x);
+double2 __ovld __cnfn native_log10(double2 x);
+double3 __ovld __cnfn native_log10(double3 x);
+double4 __ovld __cnfn native_log10(double4 x);
+double8 __ovld __cnfn native_log10(double8 x);
+double16 __ovld __cnfn native_log10(double16 x);
+#endif //cl_khr_fp64
+
+/**
+ * Compute x to the power y, where x is >= 0. The range of
+ * x and y are implementation-defined. The maximum error
+ * is implementation-defined.
+ */
+float __ovld __cnfn native_powr(float x, float y);
+float2 __ovld __cnfn native_powr(float2 x, float2 y);
+float3 __ovld __cnfn native_powr(float3 x, float3 y);
+float4 __ovld __cnfn native_powr(float4 x, float4 y);
+float8 __ovld __cnfn native_powr(float8 x, float8 y);
+float16 __ovld __cnfn native_powr(float16 x, float16 y);
+#ifdef cl_khr_fp64
+double __ovld __cnfn native_powr(double x, double y);
+double2 __ovld __cnfn native_powr(double2 x, double2 y);
+double3 __ovld __cnfn native_powr(double3 x, double3 y);
+double4 __ovld __cnfn native_powr(double4 x, double4 y);
+double8 __ovld __cnfn native_powr(double8 x, double8 y);
+double16 __ovld __cnfn native_powr(double16 x, double16 y);
+#endif //cl_khr_fp64
+
+/**
+ * Compute reciprocal over an implementation-defined
+ * range. The maximum error is implementation-defined.
+ */
+float __ovld __cnfn native_recip(float x);
+float2 __ovld __cnfn native_recip(float2 x);
+float3 __ovld __cnfn native_recip(float3 x);
+float4 __ovld __cnfn native_recip(float4 x);
+float8 __ovld __cnfn native_recip(float8 x);
+float16 __ovld __cnfn native_recip(float16 x);
+#ifdef cl_khr_fp64
+double __ovld __cnfn native_recip(double x);
+double2 __ovld __cnfn native_recip(double2 x);
+double3 __ovld __cnfn native_recip(double3 x);
+double4 __ovld __cnfn native_recip(double4 x);
+double8 __ovld __cnfn native_recip(double8 x);
+double16 __ovld __cnfn native_recip(double16 x);
+#endif //cl_khr_fp64
+
+/**
+ * Compute inverse square root over an implementationdefined
+ * range. The maximum error is implementationdefined.
+ */
+float __ovld __cnfn native_rsqrt(float x);
+float2 __ovld __cnfn native_rsqrt(float2 x);
+float3 __ovld __cnfn native_rsqrt(float3 x);
+float4 __ovld __cnfn native_rsqrt(float4 x);
+float8 __ovld __cnfn native_rsqrt(float8 x);
+float16 __ovld __cnfn native_rsqrt(float16 x);
+#ifdef cl_khr_fp64
+double __ovld __cnfn native_rsqrt(double x);
+double2 __ovld __cnfn native_rsqrt(double2 x);
+double3 __ovld __cnfn native_rsqrt(double3 x);
+double4 __ovld __cnfn native_rsqrt(double4 x);
+double8 __ovld __cnfn native_rsqrt(double8 x);
+double16 __ovld __cnfn native_rsqrt(double16 x);
+#endif //cl_khr_fp64
+
+/**
+ * Compute sine over an implementation-defined range.
+ * The maximum error is implementation-defined.
+ */
+float __ovld __cnfn native_sin(float x);
+float2 __ovld __cnfn native_sin(float2 x);
+float3 __ovld __cnfn native_sin(float3 x);
+float4 __ovld __cnfn native_sin(float4 x);
+float8 __ovld __cnfn native_sin(float8 x);
+float16 __ovld __cnfn native_sin(float16 x);
+#ifdef cl_khr_fp64
+double __ovld __cnfn native_sin(double x);
+double2 __ovld __cnfn native_sin(double2 x);
+double3 __ovld __cnfn native_sin(double3 x);
+double4 __ovld __cnfn native_sin(double4 x);
+double8 __ovld __cnfn native_sin(double8 x);
+double16 __ovld __cnfn native_sin(double16 x);
+#endif //cl_khr_fp64
+
+/**
+ * Compute square root over an implementation-defined
+ * range. The maximum error is implementation-defined.
+ */
+float __ovld __cnfn native_sqrt(float x);
+float2 __ovld __cnfn native_sqrt(float2 x);
+float3 __ovld __cnfn native_sqrt(float3 x);
+float4 __ovld __cnfn native_sqrt(float4 x);
+float8 __ovld __cnfn native_sqrt(float8 x);
+float16 __ovld __cnfn native_sqrt(float16 x);
+#ifdef cl_khr_fp64
+double __ovld __cnfn native_sqrt(double x);
+double2 __ovld __cnfn native_sqrt(double2 x);
+double3 __ovld __cnfn native_sqrt(double3 x);
+double4 __ovld __cnfn native_sqrt(double4 x);
+double8 __ovld __cnfn native_sqrt(double8 x);
+double16 __ovld __cnfn native_sqrt(double16 x);
+#endif //cl_khr_fp64
+
+/**
+ * Compute tangent over an implementation-defined range.
+ * The maximum error is implementation-defined.
+ */
+float __ovld __cnfn native_tan(float x);
+float2 __ovld __cnfn native_tan(float2 x);
+float3 __ovld __cnfn native_tan(float3 x);
+float4 __ovld __cnfn native_tan(float4 x);
+float8 __ovld __cnfn native_tan(float8 x);
+float16 __ovld __cnfn native_tan(float16 x);
+#ifdef cl_khr_fp64
+double __ovld __cnfn native_tan(double x);
+double2 __ovld __cnfn native_tan(double2 x);
+double3 __ovld __cnfn native_tan(double3 x);
+double4 __ovld __cnfn native_tan(double4 x);
+double8 __ovld __cnfn native_tan(double8 x);
+double16 __ovld __cnfn native_tan(double16 x);
+#endif //cl_khr_fp64
+
+// OpenCL v1.1 s6.11.3, v1.2 s6.12.3, v2.0 s6.13.3 - Integer Functions
+
+/**
+ * Returns | x |.
+ */
+uchar __ovld __cnfn abs(char x);
+uchar __ovld __cnfn abs(uchar x);
+uchar2 __ovld __cnfn abs(char2 x);
+uchar2 __ovld __cnfn abs(uchar2 x);
+uchar3 __ovld __cnfn abs(char3 x);
+uchar3 __ovld __cnfn abs(uchar3 x);
+uchar4 __ovld __cnfn abs(char4 x);
+uchar4 __ovld __cnfn abs(uchar4 x);
+uchar8 __ovld __cnfn abs(char8 x);
+uchar8 __ovld __cnfn abs(uchar8 x);
+uchar16 __ovld __cnfn abs(char16 x);
+uchar16 __ovld __cnfn abs(uchar16 x);
+ushort __ovld __cnfn abs(short x);
+ushort __ovld __cnfn abs(ushort x);
+ushort2 __ovld __cnfn abs(short2 x);
+ushort2 __ovld __cnfn abs(ushort2 x);
+ushort3 __ovld __cnfn abs(short3 x);
+ushort3 __ovld __cnfn abs(ushort3 x);
+ushort4 __ovld __cnfn abs(short4 x);
+ushort4 __ovld __cnfn abs(ushort4 x);
+ushort8 __ovld __cnfn abs(short8 x);
+ushort8 __ovld __cnfn abs(ushort8 x);
+ushort16 __ovld __cnfn abs(short16 x);
+ushort16 __ovld __cnfn abs(ushort16 x);
+uint __ovld __cnfn abs(int x);
+uint __ovld __cnfn abs(uint x);
+uint2 __ovld __cnfn abs(int2 x);
+uint2 __ovld __cnfn abs(uint2 x);
+uint3 __ovld __cnfn abs(int3 x);
+uint3 __ovld __cnfn abs(uint3 x);
+uint4 __ovld __cnfn abs(int4 x);
+uint4 __ovld __cnfn abs(uint4 x);
+uint8 __ovld __cnfn abs(int8 x);
+uint8 __ovld __cnfn abs(uint8 x);
+uint16 __ovld __cnfn abs(int16 x);
+uint16 __ovld __cnfn abs(uint16 x);
+ulong __ovld __cnfn abs(long x);
+ulong __ovld __cnfn abs(ulong x);
+ulong2 __ovld __cnfn abs(long2 x);
+ulong2 __ovld __cnfn abs(ulong2 x);
+ulong3 __ovld __cnfn abs(long3 x);
+ulong3 __ovld __cnfn abs(ulong3 x);
+ulong4 __ovld __cnfn abs(long4 x);
+ulong4 __ovld __cnfn abs(ulong4 x);
+ulong8 __ovld __cnfn abs(long8 x);
+ulong8 __ovld __cnfn abs(ulong8 x);
+ulong16 __ovld __cnfn abs(long16 x);
+ulong16 __ovld __cnfn abs(ulong16 x);
+
+/**
+ * Returns | x - y | without modulo overflow.
+ */
+uchar __ovld __cnfn abs_diff(char x, char y);
+uchar __ovld __cnfn abs_diff(uchar x, uchar y);
+uchar2 __ovld __cnfn abs_diff(char2 x, char2 y);
+uchar2 __ovld __cnfn abs_diff(uchar2 x, uchar2 y);
+uchar3 __ovld __cnfn abs_diff(char3 x, char3 y);
+uchar3 __ovld __cnfn abs_diff(uchar3 x, uchar3 y);
+uchar4 __ovld __cnfn abs_diff(char4 x, char4 y);
+uchar4 __ovld __cnfn abs_diff(uchar4 x, uchar4 y);
+uchar8 __ovld __cnfn abs_diff(char8 x, char8 y);
+uchar8 __ovld __cnfn abs_diff(uchar8 x, uchar8 y);
+uchar16 __ovld __cnfn abs_diff(char16 x, char16 y);
+uchar16 __ovld __cnfn abs_diff(uchar16 x, uchar16 y);
+ushort __ovld __cnfn abs_diff(short x, short y);
+ushort __ovld __cnfn abs_diff(ushort x, ushort y);
+ushort2 __ovld __cnfn abs_diff(short2 x, short2 y);
+ushort2 __ovld __cnfn abs_diff(ushort2 x, ushort2 y);
+ushort3 __ovld __cnfn abs_diff(short3 x, short3 y);
+ushort3 __ovld __cnfn abs_diff(ushort3 x, ushort3 y);
+ushort4 __ovld __cnfn abs_diff(short4 x, short4 y);
+ushort4 __ovld __cnfn abs_diff(ushort4 x, ushort4 y);
+ushort8 __ovld __cnfn abs_diff(short8 x, short8 y);
+ushort8 __ovld __cnfn abs_diff(ushort8 x, ushort8 y);
+ushort16 __ovld __cnfn abs_diff(short16 x, short16 y);
+ushort16 __ovld __cnfn abs_diff(ushort16 x, ushort16 y);
+uint __ovld __cnfn abs_diff(int x, int y);
+uint __ovld __cnfn abs_diff(uint x, uint y);
+uint2 __ovld __cnfn abs_diff(int2 x, int2 y);
+uint2 __ovld __cnfn abs_diff(uint2 x, uint2 y);
+uint3 __ovld __cnfn abs_diff(int3 x, int3 y);
+uint3 __ovld __cnfn abs_diff(uint3 x, uint3 y);
+uint4 __ovld __cnfn abs_diff(int4 x, int4 y);
+uint4 __ovld __cnfn abs_diff(uint4 x, uint4 y);
+uint8 __ovld __cnfn abs_diff(int8 x, int8 y);
+uint8 __ovld __cnfn abs_diff(uint8 x, uint8 y);
+uint16 __ovld __cnfn abs_diff(int16 x, int16 y);
+uint16 __ovld __cnfn abs_diff(uint16 x, uint16 y);
+ulong __ovld __cnfn abs_diff(long x, long y);
+ulong __ovld __cnfn abs_diff(ulong x, ulong y);
+ulong2 __ovld __cnfn abs_diff(long2 x, long2 y);
+ulong2 __ovld __cnfn abs_diff(ulong2 x, ulong2 y);
+ulong3 __ovld __cnfn abs_diff(long3 x, long3 y);
+ulong3 __ovld __cnfn abs_diff(ulong3 x, ulong3 y);
+ulong4 __ovld __cnfn abs_diff(long4 x, long4 y);
+ulong4 __ovld __cnfn abs_diff(ulong4 x, ulong4 y);
+ulong8 __ovld __cnfn abs_diff(long8 x, long8 y);
+ulong8 __ovld __cnfn abs_diff(ulong8 x, ulong8 y);
+ulong16 __ovld __cnfn abs_diff(long16 x, long16 y);
+ulong16 __ovld __cnfn abs_diff(ulong16 x, ulong16 y);
+
+/**
+ * Returns x + y and saturates the result.
+ */
+char __ovld __cnfn add_sat(char x, char y);
+uchar __ovld __cnfn add_sat(uchar x, uchar y);
+char2 __ovld __cnfn add_sat(char2 x, char2 y);
+uchar2 __ovld __cnfn add_sat(uchar2 x, uchar2 y);
+char3 __ovld __cnfn add_sat(char3 x, char3 y);
+uchar3 __ovld __cnfn add_sat(uchar3 x, uchar3 y);
+char4 __ovld __cnfn add_sat(char4 x, char4 y);
+uchar4 __ovld __cnfn add_sat(uchar4 x, uchar4 y);
+char8 __ovld __cnfn add_sat(char8 x, char8 y);
+uchar8 __ovld __cnfn add_sat(uchar8 x, uchar8 y);
+char16 __ovld __cnfn add_sat(char16 x, char16 y);
+uchar16 __ovld __cnfn add_sat(uchar16 x, uchar16 y);
+short __ovld __cnfn add_sat(short x, short y);
+ushort __ovld __cnfn add_sat(ushort x, ushort y);
+short2 __ovld __cnfn add_sat(short2 x, short2 y);
+ushort2 __ovld __cnfn add_sat(ushort2 x, ushort2 y);
+short3 __ovld __cnfn add_sat(short3 x, short3 y);
+ushort3 __ovld __cnfn add_sat(ushort3 x, ushort3 y);
+short4 __ovld __cnfn add_sat(short4 x, short4 y);
+ushort4 __ovld __cnfn add_sat(ushort4 x, ushort4 y);
+short8 __ovld __cnfn add_sat(short8 x, short8 y);
+ushort8 __ovld __cnfn add_sat(ushort8 x, ushort8 y);
+short16 __ovld __cnfn add_sat(short16 x, short16 y);
+ushort16 __ovld __cnfn add_sat(ushort16 x, ushort16 y);
+int __ovld __cnfn add_sat(int x, int y);
+uint __ovld __cnfn add_sat(uint x, uint y);
+int2 __ovld __cnfn add_sat(int2 x, int2 y);
+uint2 __ovld __cnfn add_sat(uint2 x, uint2 y);
+int3 __ovld __cnfn add_sat(int3 x, int3 y);
+uint3 __ovld __cnfn add_sat(uint3 x, uint3 y);
+int4 __ovld __cnfn add_sat(int4 x, int4 y);
+uint4 __ovld __cnfn add_sat(uint4 x, uint4 y);
+int8 __ovld __cnfn add_sat(int8 x, int8 y);
+uint8 __ovld __cnfn add_sat(uint8 x, uint8 y);
+int16 __ovld __cnfn add_sat(int16 x, int16 y);
+uint16 __ovld __cnfn add_sat(uint16 x, uint16 y);
+long __ovld __cnfn add_sat(long x, long y);
+ulong __ovld __cnfn add_sat(ulong x, ulong y);
+long2 __ovld __cnfn add_sat(long2 x, long2 y);
+ulong2 __ovld __cnfn add_sat(ulong2 x, ulong2 y);
+long3 __ovld __cnfn add_sat(long3 x, long3 y);
+ulong3 __ovld __cnfn add_sat(ulong3 x, ulong3 y);
+long4 __ovld __cnfn add_sat(long4 x, long4 y);
+ulong4 __ovld __cnfn add_sat(ulong4 x, ulong4 y);
+long8 __ovld __cnfn add_sat(long8 x, long8 y);
+ulong8 __ovld __cnfn add_sat(ulong8 x, ulong8 y);
+long16 __ovld __cnfn add_sat(long16 x, long16 y);
+ulong16 __ovld __cnfn add_sat(ulong16 x, ulong16 y);
+
+/**
+ * Returns (x + y) >> 1. The intermediate sum does
+ * not modulo overflow.
+ */
+char __ovld __cnfn hadd(char x, char y);
+uchar __ovld __cnfn hadd(uchar x, uchar y);
+char2 __ovld __cnfn hadd(char2 x, char2 y);
+uchar2 __ovld __cnfn hadd(uchar2 x, uchar2 y);
+char3 __ovld __cnfn hadd(char3 x, char3 y);
+uchar3 __ovld __cnfn hadd(uchar3 x, uchar3 y);
+char4 __ovld __cnfn hadd(char4 x, char4 y);
+uchar4 __ovld __cnfn hadd(uchar4 x, uchar4 y);
+char8 __ovld __cnfn hadd(char8 x, char8 y);
+uchar8 __ovld __cnfn hadd(uchar8 x, uchar8 y);
+char16 __ovld __cnfn hadd(char16 x, char16 y);
+uchar16 __ovld __cnfn hadd(uchar16 x, uchar16 y);
+short __ovld __cnfn hadd(short x, short y);
+ushort __ovld __cnfn hadd(ushort x, ushort y);
+short2 __ovld __cnfn hadd(short2 x, short2 y);
+ushort2 __ovld __cnfn hadd(ushort2 x, ushort2 y);
+short3 __ovld __cnfn hadd(short3 x, short3 y);
+ushort3 __ovld __cnfn hadd(ushort3 x, ushort3 y);
+short4 __ovld __cnfn hadd(short4 x, short4 y);
+ushort4 __ovld __cnfn hadd(ushort4 x, ushort4 y);
+short8 __ovld __cnfn hadd(short8 x, short8 y);
+ushort8 __ovld __cnfn hadd(ushort8 x, ushort8 y);
+short16 __ovld __cnfn hadd(short16 x, short16 y);
+ushort16 __ovld __cnfn hadd(ushort16 x, ushort16 y);
+int __ovld __cnfn hadd(int x, int y);
+uint __ovld __cnfn hadd(uint x, uint y);
+int2 __ovld __cnfn hadd(int2 x, int2 y);
+uint2 __ovld __cnfn hadd(uint2 x, uint2 y);
+int3 __ovld __cnfn hadd(int3 x, int3 y);
+uint3 __ovld __cnfn hadd(uint3 x, uint3 y);
+int4 __ovld __cnfn hadd(int4 x, int4 y);
+uint4 __ovld __cnfn hadd(uint4 x, uint4 y);
+int8 __ovld __cnfn hadd(int8 x, int8 y);
+uint8 __ovld __cnfn hadd(uint8 x, uint8 y);
+int16 __ovld __cnfn hadd(int16 x, int16 y);
+uint16 __ovld __cnfn hadd(uint16 x, uint16 y);
+long __ovld __cnfn hadd(long x, long y);
+ulong __ovld __cnfn hadd(ulong x, ulong y);
+long2 __ovld __cnfn hadd(long2 x, long2 y);
+ulong2 __ovld __cnfn hadd(ulong2 x, ulong2 y);
+long3 __ovld __cnfn hadd(long3 x, long3 y);
+ulong3 __ovld __cnfn hadd(ulong3 x, ulong3 y);
+long4 __ovld __cnfn hadd(long4 x, long4 y);
+ulong4 __ovld __cnfn hadd(ulong4 x, ulong4 y);
+long8 __ovld __cnfn hadd(long8 x, long8 y);
+ulong8 __ovld __cnfn hadd(ulong8 x, ulong8 y);
+long16 __ovld __cnfn hadd(long16 x, long16 y);
+ulong16 __ovld __cnfn hadd(ulong16 x, ulong16 y);
+
+/**
+ * Returns (x + y + 1) >> 1. The intermediate sum
+ * does not modulo overflow.
+ */
+char __ovld __cnfn rhadd(char x, char y);
+uchar __ovld __cnfn rhadd(uchar x, uchar y);
+char2 __ovld __cnfn rhadd(char2 x, char2 y);
+uchar2 __ovld __cnfn rhadd(uchar2 x, uchar2 y);
+char3 __ovld __cnfn rhadd(char3 x, char3 y);
+uchar3 __ovld __cnfn rhadd(uchar3 x, uchar3 y);
+char4 __ovld __cnfn rhadd(char4 x, char4 y);
+uchar4 __ovld __cnfn rhadd(uchar4 x, uchar4 y);
+char8 __ovld __cnfn rhadd(char8 x, char8 y);
+uchar8 __ovld __cnfn rhadd(uchar8 x, uchar8 y);
+char16 __ovld __cnfn rhadd(char16 x, char16 y);
+uchar16 __ovld __cnfn rhadd(uchar16 x, uchar16 y);
+short __ovld __cnfn rhadd(short x, short y);
+ushort __ovld __cnfn rhadd(ushort x, ushort y);
+short2 __ovld __cnfn rhadd(short2 x, short2 y);
+ushort2 __ovld __cnfn rhadd(ushort2 x, ushort2 y);
+short3 __ovld __cnfn rhadd(short3 x, short3 y);
+ushort3 __ovld __cnfn rhadd(ushort3 x, ushort3 y);
+short4 __ovld __cnfn rhadd(short4 x, short4 y);
+ushort4 __ovld __cnfn rhadd(ushort4 x, ushort4 y);
+short8 __ovld __cnfn rhadd(short8 x, short8 y);
+ushort8 __ovld __cnfn rhadd(ushort8 x, ushort8 y);
+short16 __ovld __cnfn rhadd(short16 x, short16 y);
+ushort16 __ovld __cnfn rhadd(ushort16 x, ushort16 y);
+int __ovld __cnfn rhadd(int x, int y);
+uint __ovld __cnfn rhadd(uint x, uint y);
+int2 __ovld __cnfn rhadd(int2 x, int2 y);
+uint2 __ovld __cnfn rhadd(uint2 x, uint2 y);
+int3 __ovld __cnfn rhadd(int3 x, int3 y);
+uint3 __ovld __cnfn rhadd(uint3 x, uint3 y);
+int4 __ovld __cnfn rhadd(int4 x, int4 y);
+uint4 __ovld __cnfn rhadd(uint4 x, uint4 y);
+int8 __ovld __cnfn rhadd(int8 x, int8 y);
+uint8 __ovld __cnfn rhadd(uint8 x, uint8 y);
+int16 __ovld __cnfn rhadd(int16 x, int16 y);
+uint16 __ovld __cnfn rhadd(uint16 x, uint16 y);
+long __ovld __cnfn rhadd(long x, long y);
+ulong __ovld __cnfn rhadd(ulong x, ulong y);
+long2 __ovld __cnfn rhadd(long2 x, long2 y);
+ulong2 __ovld __cnfn rhadd(ulong2 x, ulong2 y);
+long3 __ovld __cnfn rhadd(long3 x, long3 y);
+ulong3 __ovld __cnfn rhadd(ulong3 x, ulong3 y);
+long4 __ovld __cnfn rhadd(long4 x, long4 y);
+ulong4 __ovld __cnfn rhadd(ulong4 x, ulong4 y);
+long8 __ovld __cnfn rhadd(long8 x, long8 y);
+ulong8 __ovld __cnfn rhadd(ulong8 x, ulong8 y);
+long16 __ovld __cnfn rhadd(long16 x, long16 y);
+ulong16 __ovld __cnfn rhadd(ulong16 x, ulong16 y);
+
+/**
+ * Returns min(max(x, minval), maxval).
+ * Results are undefined if minval > maxval.
+ */
+char __ovld __cnfn clamp(char x, char minval, char maxval);
+uchar __ovld __cnfn clamp(uchar x, uchar minval, uchar maxval);
+char2 __ovld __cnfn clamp(char2 x, char2 minval, char2 maxval);
+uchar2 __ovld __cnfn clamp(uchar2 x, uchar2 minval, uchar2 maxval);
+char3 __ovld __cnfn clamp(char3 x, char3 minval, char3 maxval);
+uchar3 __ovld __cnfn clamp(uchar3 x, uchar3 minval, uchar3 maxval);
+char4 __ovld __cnfn clamp(char4 x, char4 minval, char4 maxval);
+uchar4 __ovld __cnfn clamp(uchar4 x, uchar4 minval, uchar4 maxval);
+char8 __ovld __cnfn clamp(char8 x, char8 minval, char8 maxval);
+uchar8 __ovld __cnfn clamp(uchar8 x, uchar8 minval, uchar8 maxval);
+char16 __ovld __cnfn clamp(char16 x, char16 minval, char16 maxval);
+uchar16 __ovld __cnfn clamp(uchar16 x, uchar16 minval, uchar16 maxval);
+short __ovld __cnfn clamp(short x, short minval, short maxval);
+ushort __ovld __cnfn clamp(ushort x, ushort minval, ushort maxval);
+short2 __ovld __cnfn clamp(short2 x, short2 minval, short2 maxval);
+ushort2 __ovld __cnfn clamp(ushort2 x, ushort2 minval, ushort2 maxval);
+short3 __ovld __cnfn clamp(short3 x, short3 minval, short3 maxval);
+ushort3 __ovld __cnfn clamp(ushort3 x, ushort3 minval, ushort3 maxval);
+short4 __ovld __cnfn clamp(short4 x, short4 minval, short4 maxval);
+ushort4 __ovld __cnfn clamp(ushort4 x, ushort4 minval, ushort4 maxval);
+short8 __ovld __cnfn clamp(short8 x, short8 minval, short8 maxval);
+ushort8 __ovld __cnfn clamp(ushort8 x, ushort8 minval, ushort8 maxval);
+short16 __ovld __cnfn clamp(short16 x, short16 minval, short16 maxval);
+ushort16 __ovld __cnfn clamp(ushort16 x, ushort16 minval, ushort16 maxval);
+int __ovld __cnfn clamp(int x, int minval, int maxval);
+uint __ovld __cnfn clamp(uint x, uint minval, uint maxval);
+int2 __ovld __cnfn clamp(int2 x, int2 minval, int2 maxval);
+uint2 __ovld __cnfn clamp(uint2 x, uint2 minval, uint2 maxval);
+int3 __ovld __cnfn clamp(int3 x, int3 minval, int3 maxval);
+uint3 __ovld __cnfn clamp(uint3 x, uint3 minval, uint3 maxval);
+int4 __ovld __cnfn clamp(int4 x, int4 minval, int4 maxval);
+uint4 __ovld __cnfn clamp(uint4 x, uint4 minval, uint4 maxval);
+int8 __ovld __cnfn clamp(int8 x, int8 minval, int8 maxval);
+uint8 __ovld __cnfn clamp(uint8 x, uint8 minval, uint8 maxval);
+int16 __ovld __cnfn clamp(int16 x, int16 minval, int16 maxval);
+uint16 __ovld __cnfn clamp(uint16 x, uint16 minval, uint16 maxval);
+long __ovld __cnfn clamp(long x, long minval, long maxval);
+ulong __ovld __cnfn clamp(ulong x, ulong minval, ulong maxval);
+long2 __ovld __cnfn clamp(long2 x, long2 minval, long2 maxval);
+ulong2 __ovld __cnfn clamp(ulong2 x, ulong2 minval, ulong2 maxval);
+long3 __ovld __cnfn clamp(long3 x, long3 minval, long3 maxval);
+ulong3 __ovld __cnfn clamp(ulong3 x, ulong3 minval, ulong3 maxval);
+long4 __ovld __cnfn clamp(long4 x, long4 minval, long4 maxval);
+ulong4 __ovld __cnfn clamp(ulong4 x, ulong4 minval, ulong4 maxval);
+long8 __ovld __cnfn clamp(long8 x, long8 minval, long8 maxval);
+ulong8 __ovld __cnfn clamp(ulong8 x, ulong8 minval, ulong8 maxval);
+long16 __ovld __cnfn clamp(long16 x, long16 minval, long16 maxval);
+ulong16 __ovld __cnfn clamp(ulong16 x, ulong16 minval, ulong16 maxval);
+char __ovld __cnfn clamp(char x, char minval, char maxval);
+uchar __ovld __cnfn clamp(uchar x, uchar minval, uchar maxval);
+char2 __ovld __cnfn clamp(char2 x, char minval, char maxval);
+uchar2 __ovld __cnfn clamp(uchar2 x, uchar minval, uchar maxval);
+char3 __ovld __cnfn clamp(char3 x, char minval, char maxval);
+uchar3 __ovld __cnfn clamp(uchar3 x, uchar minval, uchar maxval);
+char4 __ovld __cnfn clamp(char4 x, char minval, char maxval);
+uchar4 __ovld __cnfn clamp(uchar4 x, uchar minval, uchar maxval);
+char8 __ovld __cnfn clamp(char8 x, char minval, char maxval);
+uchar8 __ovld __cnfn clamp(uchar8 x, uchar minval, uchar maxval);
+char16 __ovld __cnfn clamp(char16 x, char minval, char maxval);
+uchar16 __ovld __cnfn clamp(uchar16 x, uchar minval, uchar maxval);
+short __ovld __cnfn clamp(short x, short minval, short maxval);
+ushort __ovld __cnfn clamp(ushort x, ushort minval, ushort maxval);
+short2 __ovld __cnfn clamp(short2 x, short minval, short maxval);
+ushort2 __ovld __cnfn clamp(ushort2 x, ushort minval, ushort maxval);
+short3 __ovld __cnfn clamp(short3 x, short minval, short maxval);
+ushort3 __ovld __cnfn clamp(ushort3 x, ushort minval, ushort maxval);
+short4 __ovld __cnfn clamp(short4 x, short minval, short maxval);
+ushort4 __ovld __cnfn clamp(ushort4 x, ushort minval, ushort maxval);
+short8 __ovld __cnfn clamp(short8 x, short minval, short maxval);
+ushort8 __ovld __cnfn clamp(ushort8 x, ushort minval, ushort maxval);
+short16 __ovld __cnfn clamp(short16 x, short minval, short maxval);
+ushort16 __ovld __cnfn clamp(ushort16 x, ushort minval, ushort maxval);
+int __ovld __cnfn clamp(int x, int minval, int maxval);
+uint __ovld __cnfn clamp(uint x, uint minval, uint maxval);
+int2 __ovld __cnfn clamp(int2 x, int minval, int maxval);
+uint2 __ovld __cnfn clamp(uint2 x, uint minval, uint maxval);
+int3 __ovld __cnfn clamp(int3 x, int minval, int maxval);
+uint3 __ovld __cnfn clamp(uint3 x, uint minval, uint maxval);
+int4 __ovld __cnfn clamp(int4 x, int minval, int maxval);
+uint4 __ovld __cnfn clamp(uint4 x, uint minval, uint maxval);
+int8 __ovld __cnfn clamp(int8 x, int minval, int maxval);
+uint8 __ovld __cnfn clamp(uint8 x, uint minval, uint maxval);
+int16 __ovld __cnfn clamp(int16 x, int minval, int maxval);
+uint16 __ovld __cnfn clamp(uint16 x, uint minval, uint maxval);
+long __ovld __cnfn clamp(long x, long minval, long maxval);
+ulong __ovld __cnfn clamp(ulong x, ulong minval, ulong maxval);
+long2 __ovld __cnfn clamp(long2 x, long minval, long maxval);
+ulong2 __ovld __cnfn clamp(ulong2 x, ulong minval, ulong maxval);
+long3 __ovld __cnfn clamp(long3 x, long minval, long maxval);
+ulong3 __ovld __cnfn clamp(ulong3 x, ulong minval, ulong maxval);
+long4 __ovld __cnfn clamp(long4 x, long minval, long maxval);
+ulong4 __ovld __cnfn clamp(ulong4 x, ulong minval, ulong maxval);
+long8 __ovld __cnfn clamp(long8 x, long minval, long maxval);
+ulong8 __ovld __cnfn clamp(ulong8 x, ulong minval, ulong maxval);
+long16 __ovld __cnfn clamp(long16 x, long minval, long maxval);
+ulong16 __ovld __cnfn clamp(ulong16 x, ulong minval, ulong maxval);
+
+/**
+ * Returns the number of leading 0-bits in x, starting
+ * at the most significant bit position.
+ */
+char __ovld __cnfn clz(char x);
+uchar __ovld __cnfn clz(uchar x);
+char2 __ovld __cnfn clz(char2 x);
+uchar2 __ovld __cnfn clz(uchar2 x);
+char3 __ovld __cnfn clz(char3 x);
+uchar3 __ovld __cnfn clz(uchar3 x);
+char4 __ovld __cnfn clz(char4 x);
+uchar4 __ovld __cnfn clz(uchar4 x);
+char8 __ovld __cnfn clz(char8 x);
+uchar8 __ovld __cnfn clz(uchar8 x);
+char16 __ovld __cnfn clz(char16 x);
+uchar16 __ovld __cnfn clz(uchar16 x);
+short __ovld __cnfn clz(short x);
+ushort __ovld __cnfn clz(ushort x);
+short2 __ovld __cnfn clz(short2 x);
+ushort2 __ovld __cnfn clz(ushort2 x);
+short3 __ovld __cnfn clz(short3 x);
+ushort3 __ovld __cnfn clz(ushort3 x);
+short4 __ovld __cnfn clz(short4 x);
+ushort4 __ovld __cnfn clz(ushort4 x);
+short8 __ovld __cnfn clz(short8 x);
+ushort8 __ovld __cnfn clz(ushort8 x);
+short16 __ovld __cnfn clz(short16 x);
+ushort16 __ovld __cnfn clz(ushort16 x);
+int __ovld __cnfn clz(int x);
+uint __ovld __cnfn clz(uint x);
+int2 __ovld __cnfn clz(int2 x);
+uint2 __ovld __cnfn clz(uint2 x);
+int3 __ovld __cnfn clz(int3 x);
+uint3 __ovld __cnfn clz(uint3 x);
+int4 __ovld __cnfn clz(int4 x);
+uint4 __ovld __cnfn clz(uint4 x);
+int8 __ovld __cnfn clz(int8 x);
+uint8 __ovld __cnfn clz(uint8 x);
+int16 __ovld __cnfn clz(int16 x);
+uint16 __ovld __cnfn clz(uint16 x);
+long __ovld __cnfn clz(long x);
+ulong __ovld __cnfn clz(ulong x);
+long2 __ovld __cnfn clz(long2 x);
+ulong2 __ovld __cnfn clz(ulong2 x);
+long3 __ovld __cnfn clz(long3 x);
+ulong3 __ovld __cnfn clz(ulong3 x);
+long4 __ovld __cnfn clz(long4 x);
+ulong4 __ovld __cnfn clz(ulong4 x);
+long8 __ovld __cnfn clz(long8 x);
+ulong8 __ovld __cnfn clz(ulong8 x);
+long16 __ovld __cnfn clz(long16 x);
+ulong16 __ovld __cnfn clz(ulong16 x);
+
+/**
+ * Returns the count of trailing 0-bits in x. If x is 0,
+ * returns the size in bits of the type of x or
+ * component type of x, if x is a vector.
+ */
+#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0
+char __ovld ctz(char x);
+uchar __ovld ctz(uchar x);
+char2 __ovld ctz(char2 x);
+uchar2 __ovld ctz(uchar2 x);
+char3 __ovld ctz(char3 x);
+uchar3 __ovld ctz(uchar3 x);
+char4 __ovld ctz(char4 x);
+uchar4 __ovld ctz(uchar4 x);
+char8 __ovld ctz(char8 x);
+uchar8 __ovld ctz(uchar8 x);
+char16 __ovld ctz(char16 x);
+uchar16 __ovld ctz(uchar16 x);
+short __ovld ctz(short x);
+ushort __ovld ctz(ushort x);
+short2 __ovld ctz(short2 x);
+ushort2 __ovld ctz(ushort2 x);
+short3 __ovld ctz(short3 x);
+ushort3 __ovld ctz(ushort3 x);
+short4 __ovld ctz(short4 x);
+ushort4 __ovld ctz(ushort4 x);
+short8 __ovld ctz(short8 x);
+ushort8 __ovld ctz(ushort8 x);
+short16 __ovld ctz(short16 x);
+ushort16 __ovld ctz(ushort16 x);
+int __ovld ctz(int x);
+uint __ovld ctz(uint x);
+int2 __ovld ctz(int2 x);
+uint2 __ovld ctz(uint2 x);
+int3 __ovld ctz(int3 x);
+uint3 __ovld ctz(uint3 x);
+int4 __ovld ctz(int4 x);
+uint4 __ovld ctz(uint4 x);
+int8 __ovld ctz(int8 x);
+uint8 __ovld ctz(uint8 x);
+int16 __ovld ctz(int16 x);
+uint16 __ovld ctz(uint16 x);
+long __ovld ctz(long x);
+ulong __ovld ctz(ulong x);
+long2 __ovld ctz(long2 x);
+ulong2 __ovld ctz(ulong2 x);
+long3 __ovld ctz(long3 x);
+ulong3 __ovld ctz(ulong3 x);
+long4 __ovld ctz(long4 x);
+ulong4 __ovld ctz(ulong4 x);
+long8 __ovld ctz(long8 x);
+ulong8 __ovld ctz(ulong8 x);
+long16 __ovld ctz(long16 x);
+ulong16 __ovld ctz(ulong16 x);
+#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0
+
+/**
+ * Returns mul_hi(a, b) + c.
+ */
+char __ovld __cnfn mad_hi(char a, char b, char c);
+uchar __ovld __cnfn mad_hi(uchar a, uchar b, uchar c);
+char2 __ovld __cnfn mad_hi(char2 a, char2 b, char2 c);
+uchar2 __ovld __cnfn mad_hi(uchar2 a, uchar2 b, uchar2 c);
+char3 __ovld __cnfn mad_hi(char3 a, char3 b, char3 c);
+uchar3 __ovld __cnfn mad_hi(uchar3 a, uchar3 b, uchar3 c);
+char4 __ovld __cnfn mad_hi(char4 a, char4 b, char4 c);
+uchar4 __ovld __cnfn mad_hi(uchar4 a, uchar4 b, uchar4 c);
+char8 __ovld __cnfn mad_hi(char8 a, char8 b, char8 c);
+uchar8 __ovld __cnfn mad_hi(uchar8 a, uchar8 b, uchar8 c);
+char16 __ovld __cnfn mad_hi(char16 a, char16 b, char16 c);
+uchar16 __ovld __cnfn mad_hi(uchar16 a, uchar16 b, uchar16 c);
+short __ovld __cnfn mad_hi(short a, short b, short c);
+ushort __ovld __cnfn mad_hi(ushort a, ushort b, ushort c);
+short2 __ovld __cnfn mad_hi(short2 a, short2 b, short2 c);
+ushort2 __ovld __cnfn mad_hi(ushort2 a, ushort2 b, ushort2 c);
+short3 __ovld __cnfn mad_hi(short3 a, short3 b, short3 c);
+ushort3 __ovld __cnfn mad_hi(ushort3 a, ushort3 b, ushort3 c);
+short4 __ovld __cnfn mad_hi(short4 a, short4 b, short4 c);
+ushort4 __ovld __cnfn mad_hi(ushort4 a, ushort4 b, ushort4 c);
+short8 __ovld __cnfn mad_hi(short8 a, short8 b, short8 c);
+ushort8 __ovld __cnfn mad_hi(ushort8 a, ushort8 b, ushort8 c);
+short16 __ovld __cnfn mad_hi(short16 a, short16 b, short16 c);
+ushort16 __ovld __cnfn mad_hi(ushort16 a, ushort16 b, ushort16 c);
+int __ovld __cnfn mad_hi(int a, int b, int c);
+uint __ovld __cnfn mad_hi(uint a, uint b, uint c);
+int2 __ovld __cnfn mad_hi(int2 a, int2 b, int2 c);
+uint2 __ovld __cnfn mad_hi(uint2 a, uint2 b, uint2 c);
+int3 __ovld __cnfn mad_hi(int3 a, int3 b, int3 c);
+uint3 __ovld __cnfn mad_hi(uint3 a, uint3 b, uint3 c);
+int4 __ovld __cnfn mad_hi(int4 a, int4 b, int4 c);
+uint4 __ovld __cnfn mad_hi(uint4 a, uint4 b, uint4 c);
+int8 __ovld __cnfn mad_hi(int8 a, int8 b, int8 c);
+uint8 __ovld __cnfn mad_hi(uint8 a, uint8 b, uint8 c);
+int16 __ovld __cnfn mad_hi(int16 a, int16 b, int16 c);
+uint16 __ovld __cnfn mad_hi(uint16 a, uint16 b, uint16 c);
+long __ovld __cnfn mad_hi(long a, long b, long c);
+ulong __ovld __cnfn mad_hi(ulong a, ulong b, ulong c);
+long2 __ovld __cnfn mad_hi(long2 a, long2 b, long2 c);
+ulong2 __ovld __cnfn mad_hi(ulong2 a, ulong2 b, ulong2 c);
+long3 __ovld __cnfn mad_hi(long3 a, long3 b, long3 c);
+ulong3 __ovld __cnfn mad_hi(ulong3 a, ulong3 b, ulong3 c);
+long4 __ovld __cnfn mad_hi(long4 a, long4 b, long4 c);
+ulong4 __ovld __cnfn mad_hi(ulong4 a, ulong4 b, ulong4 c);
+long8 __ovld __cnfn mad_hi(long8 a, long8 b, long8 c);
+ulong8 __ovld __cnfn mad_hi(ulong8 a, ulong8 b, ulong8 c);
+long16 __ovld __cnfn mad_hi(long16 a, long16 b, long16 c);
+ulong16 __ovld __cnfn mad_hi(ulong16 a, ulong16 b, ulong16 c);
+
+/**
+ * Returns a * b + c and saturates the result.
+ */
+char __ovld __cnfn mad_sat(char a, char b, char c);
+uchar __ovld __cnfn mad_sat(uchar a, uchar b, uchar c);
+char2 __ovld __cnfn mad_sat(char2 a, char2 b, char2 c);
+uchar2 __ovld __cnfn mad_sat(uchar2 a, uchar2 b, uchar2 c);
+char3 __ovld __cnfn mad_sat(char3 a, char3 b, char3 c);
+uchar3 __ovld __cnfn mad_sat(uchar3 a, uchar3 b, uchar3 c);
+char4 __ovld __cnfn mad_sat(char4 a, char4 b, char4 c);
+uchar4 __ovld __cnfn mad_sat(uchar4 a, uchar4 b, uchar4 c);
+char8 __ovld __cnfn mad_sat(char8 a, char8 b, char8 c);
+uchar8 __ovld __cnfn mad_sat(uchar8 a, uchar8 b, uchar8 c);
+char16 __ovld __cnfn mad_sat(char16 a, char16 b, char16 c);
+uchar16 __ovld __cnfn mad_sat(uchar16 a, uchar16 b, uchar16 c);
+short __ovld __cnfn mad_sat(short a, short b, short c);
+ushort __ovld __cnfn mad_sat(ushort a, ushort b, ushort c);
+short2 __ovld __cnfn mad_sat(short2 a, short2 b, short2 c);
+ushort2 __ovld __cnfn mad_sat(ushort2 a, ushort2 b, ushort2 c);
+short3 __ovld __cnfn mad_sat(short3 a, short3 b, short3 c);
+ushort3 __ovld __cnfn mad_sat(ushort3 a, ushort3 b, ushort3 c);
+short4 __ovld __cnfn mad_sat(short4 a, short4 b, short4 c);
+ushort4 __ovld __cnfn mad_sat(ushort4 a, ushort4 b, ushort4 c);
+short8 __ovld __cnfn mad_sat(short8 a, short8 b, short8 c);
+ushort8 __ovld __cnfn mad_sat(ushort8 a, ushort8 b, ushort8 c);
+short16 __ovld __cnfn mad_sat(short16 a, short16 b, short16 c);
+ushort16 __ovld __cnfn mad_sat(ushort16 a, ushort16 b, ushort16 c);
+int __ovld __cnfn mad_sat(int a, int b, int c);
+uint __ovld __cnfn mad_sat(uint a, uint b, uint c);
+int2 __ovld __cnfn mad_sat(int2 a, int2 b, int2 c);
+uint2 __ovld __cnfn mad_sat(uint2 a, uint2 b, uint2 c);
+int3 __ovld __cnfn mad_sat(int3 a, int3 b, int3 c);
+uint3 __ovld __cnfn mad_sat(uint3 a, uint3 b, uint3 c);
+int4 __ovld __cnfn mad_sat(int4 a, int4 b, int4 c);
+uint4 __ovld __cnfn mad_sat(uint4 a, uint4 b, uint4 c);
+int8 __ovld __cnfn mad_sat(int8 a, int8 b, int8 c);
+uint8 __ovld __cnfn mad_sat(uint8 a, uint8 b, uint8 c);
+int16 __ovld __cnfn mad_sat(int16 a, int16 b, int16 c);
+uint16 __ovld __cnfn mad_sat(uint16 a, uint16 b, uint16 c);
+long __ovld __cnfn mad_sat(long a, long b, long c);
+ulong __ovld __cnfn mad_sat(ulong a, ulong b, ulong c);
+long2 __ovld __cnfn mad_sat(long2 a, long2 b, long2 c);
+ulong2 __ovld __cnfn mad_sat(ulong2 a, ulong2 b, ulong2 c);
+long3 __ovld __cnfn mad_sat(long3 a, long3 b, long3 c);
+ulong3 __ovld __cnfn mad_sat(ulong3 a, ulong3 b, ulong3 c);
+long4 __ovld __cnfn mad_sat(long4 a, long4 b, long4 c);
+ulong4 __ovld __cnfn mad_sat(ulong4 a, ulong4 b, ulong4 c);
+long8 __ovld __cnfn mad_sat(long8 a, long8 b, long8 c);
+ulong8 __ovld __cnfn mad_sat(ulong8 a, ulong8 b, ulong8 c);
+long16 __ovld __cnfn mad_sat(long16 a, long16 b, long16 c);
+ulong16 __ovld __cnfn mad_sat(ulong16 a, ulong16 b, ulong16 c);
+
+/**
+ * Returns y if x < y, otherwise it returns x.
+ */
+char __ovld __cnfn max(char x, char y);
+uchar __ovld __cnfn max(uchar x, uchar y);
+char2 __ovld __cnfn max(char2 x, char2 y);
+uchar2 __ovld __cnfn max(uchar2 x, uchar2 y);
+char3 __ovld __cnfn max(char3 x, char3 y);
+uchar3 __ovld __cnfn max(uchar3 x, uchar3 y);
+char4 __ovld __cnfn max(char4 x, char4 y);
+uchar4 __ovld __cnfn max(uchar4 x, uchar4 y);
+char8 __ovld __cnfn max(char8 x, char8 y);
+uchar8 __ovld __cnfn max(uchar8 x, uchar8 y);
+char16 __ovld __cnfn max(char16 x, char16 y);
+uchar16 __ovld __cnfn max(uchar16 x, uchar16 y);
+short __ovld __cnfn max(short x, short y);
+ushort __ovld __cnfn max(ushort x, ushort y);
+short2 __ovld __cnfn max(short2 x, short2 y);
+ushort2 __ovld __cnfn max(ushort2 x, ushort2 y);
+short3 __ovld __cnfn max(short3 x, short3 y);
+ushort3 __ovld __cnfn max(ushort3 x, ushort3 y);
+short4 __ovld __cnfn max(short4 x, short4 y);
+ushort4 __ovld __cnfn max(ushort4 x, ushort4 y);
+short8 __ovld __cnfn max(short8 x, short8 y);
+ushort8 __ovld __cnfn max(ushort8 x, ushort8 y);
+short16 __ovld __cnfn max(short16 x, short16 y);
+ushort16 __ovld __cnfn max(ushort16 x, ushort16 y);
+int __ovld __cnfn max(int x, int y);
+uint __ovld __cnfn max(uint x, uint y);
+int2 __ovld __cnfn max(int2 x, int2 y);
+uint2 __ovld __cnfn max(uint2 x, uint2 y);
+int3 __ovld __cnfn max(int3 x, int3 y);
+uint3 __ovld __cnfn max(uint3 x, uint3 y);
+int4 __ovld __cnfn max(int4 x, int4 y);
+uint4 __ovld __cnfn max(uint4 x, uint4 y);
+int8 __ovld __cnfn max(int8 x, int8 y);
+uint8 __ovld __cnfn max(uint8 x, uint8 y);
+int16 __ovld __cnfn max(int16 x, int16 y);
+uint16 __ovld __cnfn max(uint16 x, uint16 y);
+long __ovld __cnfn max(long x, long y);
+ulong __ovld __cnfn max(ulong x, ulong y);
+long2 __ovld __cnfn max(long2 x, long2 y);
+ulong2 __ovld __cnfn max(ulong2 x, ulong2 y);
+long3 __ovld __cnfn max(long3 x, long3 y);
+ulong3 __ovld __cnfn max(ulong3 x, ulong3 y);
+long4 __ovld __cnfn max(long4 x, long4 y);
+ulong4 __ovld __cnfn max(ulong4 x, ulong4 y);
+long8 __ovld __cnfn max(long8 x, long8 y);
+ulong8 __ovld __cnfn max(ulong8 x, ulong8 y);
+long16 __ovld __cnfn max(long16 x, long16 y);
+ulong16 __ovld __cnfn max(ulong16 x, ulong16 y);
+char __ovld __cnfn max(char x, char y);
+uchar __ovld __cnfn max(uchar x, uchar y);
+char2 __ovld __cnfn max(char2 x, char y);
+uchar2 __ovld __cnfn max(uchar2 x, uchar y);
+char3 __ovld __cnfn max(char3 x, char y);
+uchar3 __ovld __cnfn max(uchar3 x, uchar y);
+char4 __ovld __cnfn max(char4 x, char y);
+uchar4 __ovld __cnfn max(uchar4 x, uchar y);
+char8 __ovld __cnfn max(char8 x, char y);
+uchar8 __ovld __cnfn max(uchar8 x, uchar y);
+char16 __ovld __cnfn max(char16 x, char y);
+uchar16 __ovld __cnfn max(uchar16 x, uchar y);
+short __ovld __cnfn max(short x, short y);
+ushort __ovld __cnfn max(ushort x, ushort y);
+short2 __ovld __cnfn max(short2 x, short y);
+ushort2 __ovld __cnfn max(ushort2 x, ushort y);
+short3 __ovld __cnfn max(short3 x, short y);
+ushort3 __ovld __cnfn max(ushort3 x, ushort y);
+short4 __ovld __cnfn max(short4 x, short y);
+ushort4 __ovld __cnfn max(ushort4 x, ushort y);
+short8 __ovld __cnfn max(short8 x, short y);
+ushort8 __ovld __cnfn max(ushort8 x, ushort y);
+short16 __ovld __cnfn max(short16 x, short y);
+ushort16 __ovld __cnfn max(ushort16 x, ushort y);
+int __ovld __cnfn max(int x, int y);
+uint __ovld __cnfn max(uint x, uint y);
+int2 __ovld __cnfn max(int2 x, int y);
+uint2 __ovld __cnfn max(uint2 x, uint y);
+int3 __ovld __cnfn max(int3 x, int y);
+uint3 __ovld __cnfn max(uint3 x, uint y);
+int4 __ovld __cnfn max(int4 x, int y);
+uint4 __ovld __cnfn max(uint4 x, uint y);
+int8 __ovld __cnfn max(int8 x, int y);
+uint8 __ovld __cnfn max(uint8 x, uint y);
+int16 __ovld __cnfn max(int16 x, int y);
+uint16 __ovld __cnfn max(uint16 x, uint y);
+long __ovld __cnfn max(long x, long y);
+ulong __ovld __cnfn max(ulong x, ulong y);
+long2 __ovld __cnfn max(long2 x, long y);
+ulong2 __ovld __cnfn max(ulong2 x, ulong y);
+long3 __ovld __cnfn max(long3 x, long y);
+ulong3 __ovld __cnfn max(ulong3 x, ulong y);
+long4 __ovld __cnfn max(long4 x, long y);
+ulong4 __ovld __cnfn max(ulong4 x, ulong y);
+long8 __ovld __cnfn max(long8 x, long y);
+ulong8 __ovld __cnfn max(ulong8 x, ulong y);
+long16 __ovld __cnfn max(long16 x, long y);
+ulong16 __ovld __cnfn max(ulong16 x, ulong y);
+
+/**
+ * Returns y if y < x, otherwise it returns x.
+ */
+char __ovld __cnfn min(char x, char y);
+uchar __ovld __cnfn min(uchar x, uchar y);
+char2 __ovld __cnfn min(char2 x, char2 y);
+uchar2 __ovld __cnfn min(uchar2 x, uchar2 y);
+char3 __ovld __cnfn min(char3 x, char3 y);
+uchar3 __ovld __cnfn min(uchar3 x, uchar3 y);
+char4 __ovld __cnfn min(char4 x, char4 y);
+uchar4 __ovld __cnfn min(uchar4 x, uchar4 y);
+char8 __ovld __cnfn min(char8 x, char8 y);
+uchar8 __ovld __cnfn min(uchar8 x, uchar8 y);
+char16 __ovld __cnfn min(char16 x, char16 y);
+uchar16 __ovld __cnfn min(uchar16 x, uchar16 y);
+short __ovld __cnfn min(short x, short y);
+ushort __ovld __cnfn min(ushort x, ushort y);
+short2 __ovld __cnfn min(short2 x, short2 y);
+ushort2 __ovld __cnfn min(ushort2 x, ushort2 y);
+short3 __ovld __cnfn min(short3 x, short3 y);
+ushort3 __ovld __cnfn min(ushort3 x, ushort3 y);
+short4 __ovld __cnfn min(short4 x, short4 y);
+ushort4 __ovld __cnfn min(ushort4 x, ushort4 y);
+short8 __ovld __cnfn min(short8 x, short8 y);
+ushort8 __ovld __cnfn min(ushort8 x, ushort8 y);
+short16 __ovld __cnfn min(short16 x, short16 y);
+ushort16 __ovld __cnfn min(ushort16 x, ushort16 y);
+int __ovld __cnfn min(int x, int y);
+uint __ovld __cnfn min(uint x, uint y);
+int2 __ovld __cnfn min(int2 x, int2 y);
+uint2 __ovld __cnfn min(uint2 x, uint2 y);
+int3 __ovld __cnfn min(int3 x, int3 y);
+uint3 __ovld __cnfn min(uint3 x, uint3 y);
+int4 __ovld __cnfn min(int4 x, int4 y);
+uint4 __ovld __cnfn min(uint4 x, uint4 y);
+int8 __ovld __cnfn min(int8 x, int8 y);
+uint8 __ovld __cnfn min(uint8 x, uint8 y);
+int16 __ovld __cnfn min(int16 x, int16 y);
+uint16 __ovld __cnfn min(uint16 x, uint16 y);
+long __ovld __cnfn min(long x, long y);
+ulong __ovld __cnfn min(ulong x, ulong y);
+long2 __ovld __cnfn min(long2 x, long2 y);
+ulong2 __ovld __cnfn min(ulong2 x, ulong2 y);
+long3 __ovld __cnfn min(long3 x, long3 y);
+ulong3 __ovld __cnfn min(ulong3 x, ulong3 y);
+long4 __ovld __cnfn min(long4 x, long4 y);
+ulong4 __ovld __cnfn min(ulong4 x, ulong4 y);
+long8 __ovld __cnfn min(long8 x, long8 y);
+ulong8 __ovld __cnfn min(ulong8 x, ulong8 y);
+long16 __ovld __cnfn min(long16 x, long16 y);
+ulong16 __ovld __cnfn min(ulong16 x, ulong16 y);
+char __ovld __cnfn min(char x, char y);
+uchar __ovld __cnfn min(uchar x, uchar y);
+char2 __ovld __cnfn min(char2 x, char y);
+uchar2 __ovld __cnfn min(uchar2 x, uchar y);
+char3 __ovld __cnfn min(char3 x, char y);
+uchar3 __ovld __cnfn min(uchar3 x, uchar y);
+char4 __ovld __cnfn min(char4 x, char y);
+uchar4 __ovld __cnfn min(uchar4 x, uchar y);
+char8 __ovld __cnfn min(char8 x, char y);
+uchar8 __ovld __cnfn min(uchar8 x, uchar y);
+char16 __ovld __cnfn min(char16 x, char y);
+uchar16 __ovld __cnfn min(uchar16 x, uchar y);
+short __ovld __cnfn min(short x, short y);
+ushort __ovld __cnfn min(ushort x, ushort y);
+short2 __ovld __cnfn min(short2 x, short y);
+ushort2 __ovld __cnfn min(ushort2 x, ushort y);
+short3 __ovld __cnfn min(short3 x, short y);
+ushort3 __ovld __cnfn min(ushort3 x, ushort y);
+short4 __ovld __cnfn min(short4 x, short y);
+ushort4 __ovld __cnfn min(ushort4 x, ushort y);
+short8 __ovld __cnfn min(short8 x, short y);
+ushort8 __ovld __cnfn min(ushort8 x, ushort y);
+short16 __ovld __cnfn min(short16 x, short y);
+ushort16 __ovld __cnfn min(ushort16 x, ushort y);
+int __ovld __cnfn min(int x, int y);
+uint __ovld __cnfn min(uint x, uint y);
+int2 __ovld __cnfn min(int2 x, int y);
+uint2 __ovld __cnfn min(uint2 x, uint y);
+int3 __ovld __cnfn min(int3 x, int y);
+uint3 __ovld __cnfn min(uint3 x, uint y);
+int4 __ovld __cnfn min(int4 x, int y);
+uint4 __ovld __cnfn min(uint4 x, uint y);
+int8 __ovld __cnfn min(int8 x, int y);
+uint8 __ovld __cnfn min(uint8 x, uint y);
+int16 __ovld __cnfn min(int16 x, int y);
+uint16 __ovld __cnfn min(uint16 x, uint y);
+long __ovld __cnfn min(long x, long y);
+ulong __ovld __cnfn min(ulong x, ulong y);
+long2 __ovld __cnfn min(long2 x, long y);
+ulong2 __ovld __cnfn min(ulong2 x, ulong y);
+long3 __ovld __cnfn min(long3 x, long y);
+ulong3 __ovld __cnfn min(ulong3 x, ulong y);
+long4 __ovld __cnfn min(long4 x, long y);
+ulong4 __ovld __cnfn min(ulong4 x, ulong y);
+long8 __ovld __cnfn min(long8 x, long y);
+ulong8 __ovld __cnfn min(ulong8 x, ulong y);
+long16 __ovld __cnfn min(long16 x, long y);
+ulong16 __ovld __cnfn min(ulong16 x, ulong y);
+
+/**
+ * Computes x * y and returns the high half of the
+ * product of x and y.
+ */
+char __ovld __cnfn mul_hi(char x, char y);
+uchar __ovld __cnfn mul_hi(uchar x, uchar y);
+char2 __ovld __cnfn mul_hi(char2 x, char2 y);
+uchar2 __ovld __cnfn mul_hi(uchar2 x, uchar2 y);
+char3 __ovld __cnfn mul_hi(char3 x, char3 y);
+uchar3 __ovld __cnfn mul_hi(uchar3 x, uchar3 y);
+char4 __ovld __cnfn mul_hi(char4 x, char4 y);
+uchar4 __ovld __cnfn mul_hi(uchar4 x, uchar4 y);
+char8 __ovld __cnfn mul_hi(char8 x, char8 y);
+uchar8 __ovld __cnfn mul_hi(uchar8 x, uchar8 y);
+char16 __ovld __cnfn mul_hi(char16 x, char16 y);
+uchar16 __ovld __cnfn mul_hi(uchar16 x, uchar16 y);
+short __ovld __cnfn mul_hi(short x, short y);
+ushort __ovld __cnfn mul_hi(ushort x, ushort y);
+short2 __ovld __cnfn mul_hi(short2 x, short2 y);
+ushort2 __ovld __cnfn mul_hi(ushort2 x, ushort2 y);
+short3 __ovld __cnfn mul_hi(short3 x, short3 y);
+ushort3 __ovld __cnfn mul_hi(ushort3 x, ushort3 y);
+short4 __ovld __cnfn mul_hi(short4 x, short4 y);
+ushort4 __ovld __cnfn mul_hi(ushort4 x, ushort4 y);
+short8 __ovld __cnfn mul_hi(short8 x, short8 y);
+ushort8 __ovld __cnfn mul_hi(ushort8 x, ushort8 y);
+short16 __ovld __cnfn mul_hi(short16 x, short16 y);
+ushort16 __ovld __cnfn mul_hi(ushort16 x, ushort16 y);
+int __ovld __cnfn mul_hi(int x, int y);
+uint __ovld __cnfn mul_hi(uint x, uint y);
+int2 __ovld __cnfn mul_hi(int2 x, int2 y);
+uint2 __ovld __cnfn mul_hi(uint2 x, uint2 y);
+int3 __ovld __cnfn mul_hi(int3 x, int3 y);
+uint3 __ovld __cnfn mul_hi(uint3 x, uint3 y);
+int4 __ovld __cnfn mul_hi(int4 x, int4 y);
+uint4 __ovld __cnfn mul_hi(uint4 x, uint4 y);
+int8 __ovld __cnfn mul_hi(int8 x, int8 y);
+uint8 __ovld __cnfn mul_hi(uint8 x, uint8 y);
+int16 __ovld __cnfn mul_hi(int16 x, int16 y);
+uint16 __ovld __cnfn mul_hi(uint16 x, uint16 y);
+long __ovld __cnfn mul_hi(long x, long y);
+ulong __ovld __cnfn mul_hi(ulong x, ulong y);
+long2 __ovld __cnfn mul_hi(long2 x, long2 y);
+ulong2 __ovld __cnfn mul_hi(ulong2 x, ulong2 y);
+long3 __ovld __cnfn mul_hi(long3 x, long3 y);
+ulong3 __ovld __cnfn mul_hi(ulong3 x, ulong3 y);
+long4 __ovld __cnfn mul_hi(long4 x, long4 y);
+ulong4 __ovld __cnfn mul_hi(ulong4 x, ulong4 y);
+long8 __ovld __cnfn mul_hi(long8 x, long8 y);
+ulong8 __ovld __cnfn mul_hi(ulong8 x, ulong8 y);
+long16 __ovld __cnfn mul_hi(long16 x, long16 y);
+ulong16 __ovld __cnfn mul_hi(ulong16 x, ulong16 y);
+
+/**
+ * For each element in v, the bits are shifted left by
+ * the number of bits given by the corresponding
+ * element in i (subject to usual shift modulo rules
+ * described in section 6.3). Bits shifted off the left
+ * side of the element are shifted back in from the
+ * right.
+ */
+char __ovld __cnfn rotate(char v, char i);
+uchar __ovld __cnfn rotate(uchar v, uchar i);
+char2 __ovld __cnfn rotate(char2 v, char2 i);
+uchar2 __ovld __cnfn rotate(uchar2 v, uchar2 i);
+char3 __ovld __cnfn rotate(char3 v, char3 i);
+uchar3 __ovld __cnfn rotate(uchar3 v, uchar3 i);
+char4 __ovld __cnfn rotate(char4 v, char4 i);
+uchar4 __ovld __cnfn rotate(uchar4 v, uchar4 i);
+char8 __ovld __cnfn rotate(char8 v, char8 i);
+uchar8 __ovld __cnfn rotate(uchar8 v, uchar8 i);
+char16 __ovld __cnfn rotate(char16 v, char16 i);
+uchar16 __ovld __cnfn rotate(uchar16 v, uchar16 i);
+short __ovld __cnfn rotate(short v, short i);
+ushort __ovld __cnfn rotate(ushort v, ushort i);
+short2 __ovld __cnfn rotate(short2 v, short2 i);
+ushort2 __ovld __cnfn rotate(ushort2 v, ushort2 i);
+short3 __ovld __cnfn rotate(short3 v, short3 i);
+ushort3 __ovld __cnfn rotate(ushort3 v, ushort3 i);
+short4 __ovld __cnfn rotate(short4 v, short4 i);
+ushort4 __ovld __cnfn rotate(ushort4 v, ushort4 i);
+short8 __ovld __cnfn rotate(short8 v, short8 i);
+ushort8 __ovld __cnfn rotate(ushort8 v, ushort8 i);
+short16 __ovld __cnfn rotate(short16 v, short16 i);
+ushort16 __ovld __cnfn rotate(ushort16 v, ushort16 i);
+int __ovld __cnfn rotate(int v, int i);
+uint __ovld __cnfn rotate(uint v, uint i);
+int2 __ovld __cnfn rotate(int2 v, int2 i);
+uint2 __ovld __cnfn rotate(uint2 v, uint2 i);
+int3 __ovld __cnfn rotate(int3 v, int3 i);
+uint3 __ovld __cnfn rotate(uint3 v, uint3 i);
+int4 __ovld __cnfn rotate(int4 v, int4 i);
+uint4 __ovld __cnfn rotate(uint4 v, uint4 i);
+int8 __ovld __cnfn rotate(int8 v, int8 i);
+uint8 __ovld __cnfn rotate(uint8 v, uint8 i);
+int16 __ovld __cnfn rotate(int16 v, int16 i);
+uint16 __ovld __cnfn rotate(uint16 v, uint16 i);
+long __ovld __cnfn rotate(long v, long i);
+ulong __ovld __cnfn rotate(ulong v, ulong i);
+long2 __ovld __cnfn rotate(long2 v, long2 i);
+ulong2 __ovld __cnfn rotate(ulong2 v, ulong2 i);
+long3 __ovld __cnfn rotate(long3 v, long3 i);
+ulong3 __ovld __cnfn rotate(ulong3 v, ulong3 i);
+long4 __ovld __cnfn rotate(long4 v, long4 i);
+ulong4 __ovld __cnfn rotate(ulong4 v, ulong4 i);
+long8 __ovld __cnfn rotate(long8 v, long8 i);
+ulong8 __ovld __cnfn rotate(ulong8 v, ulong8 i);
+long16 __ovld __cnfn rotate(long16 v, long16 i);
+ulong16 __ovld __cnfn rotate(ulong16 v, ulong16 i);
+
+/**
+ * Returns x - y and saturates the result.
+ */
+char __ovld __cnfn sub_sat(char x, char y);
+uchar __ovld __cnfn sub_sat(uchar x, uchar y);
+char2 __ovld __cnfn sub_sat(char2 x, char2 y);
+uchar2 __ovld __cnfn sub_sat(uchar2 x, uchar2 y);
+char3 __ovld __cnfn sub_sat(char3 x, char3 y);
+uchar3 __ovld __cnfn sub_sat(uchar3 x, uchar3 y);
+char4 __ovld __cnfn sub_sat(char4 x, char4 y);
+uchar4 __ovld __cnfn sub_sat(uchar4 x, uchar4 y);
+char8 __ovld __cnfn sub_sat(char8 x, char8 y);
+uchar8 __ovld __cnfn sub_sat(uchar8 x, uchar8 y);
+char16 __ovld __cnfn sub_sat(char16 x, char16 y);
+uchar16 __ovld __cnfn sub_sat(uchar16 x, uchar16 y);
+short __ovld __cnfn sub_sat(short x, short y);
+ushort __ovld __cnfn sub_sat(ushort x, ushort y);
+short2 __ovld __cnfn sub_sat(short2 x, short2 y);
+ushort2 __ovld __cnfn sub_sat(ushort2 x, ushort2 y);
+short3 __ovld __cnfn sub_sat(short3 x, short3 y);
+ushort3 __ovld __cnfn sub_sat(ushort3 x, ushort3 y);
+short4 __ovld __cnfn sub_sat(short4 x, short4 y);
+ushort4 __ovld __cnfn sub_sat(ushort4 x, ushort4 y);
+short8 __ovld __cnfn sub_sat(short8 x, short8 y);
+ushort8 __ovld __cnfn sub_sat(ushort8 x, ushort8 y);
+short16 __ovld __cnfn sub_sat(short16 x, short16 y);
+ushort16 __ovld __cnfn sub_sat(ushort16 x, ushort16 y);
+int __ovld __cnfn sub_sat(int x, int y);
+uint __ovld __cnfn sub_sat(uint x, uint y);
+int2 __ovld __cnfn sub_sat(int2 x, int2 y);
+uint2 __ovld __cnfn sub_sat(uint2 x, uint2 y);
+int3 __ovld __cnfn sub_sat(int3 x, int3 y);
+uint3 __ovld __cnfn sub_sat(uint3 x, uint3 y);
+int4 __ovld __cnfn sub_sat(int4 x, int4 y);
+uint4 __ovld __cnfn sub_sat(uint4 x, uint4 y);
+int8 __ovld __cnfn sub_sat(int8 x, int8 y);
+uint8 __ovld __cnfn sub_sat(uint8 x, uint8 y);
+int16 __ovld __cnfn sub_sat(int16 x, int16 y);
+uint16 __ovld __cnfn sub_sat(uint16 x, uint16 y);
+long __ovld __cnfn sub_sat(long x, long y);
+ulong __ovld __cnfn sub_sat(ulong x, ulong y);
+long2 __ovld __cnfn sub_sat(long2 x, long2 y);
+ulong2 __ovld __cnfn sub_sat(ulong2 x, ulong2 y);
+long3 __ovld __cnfn sub_sat(long3 x, long3 y);
+ulong3 __ovld __cnfn sub_sat(ulong3 x, ulong3 y);
+long4 __ovld __cnfn sub_sat(long4 x, long4 y);
+ulong4 __ovld __cnfn sub_sat(ulong4 x, ulong4 y);
+long8 __ovld __cnfn sub_sat(long8 x, long8 y);
+ulong8 __ovld __cnfn sub_sat(ulong8 x, ulong8 y);
+long16 __ovld __cnfn sub_sat(long16 x, long16 y);
+ulong16 __ovld __cnfn sub_sat(ulong16 x, ulong16 y);
+
+/**
+ * result[i] = ((short)hi[i] << 8) | lo[i]
+ * result[i] = ((ushort)hi[i] << 8) | lo[i]
+ */
+short __ovld __cnfn upsample(char hi, uchar lo);
+ushort __ovld __cnfn upsample(uchar hi, uchar lo);
+short2 __ovld __cnfn upsample(char2 hi, uchar2 lo);
+short3 __ovld __cnfn upsample(char3 hi, uchar3 lo);
+short4 __ovld __cnfn upsample(char4 hi, uchar4 lo);
+short8 __ovld __cnfn upsample(char8 hi, uchar8 lo);
+short16 __ovld __cnfn upsample(char16 hi, uchar16 lo);
+ushort2 __ovld __cnfn upsample(uchar2 hi, uchar2 lo);
+ushort3 __ovld __cnfn upsample(uchar3 hi, uchar3 lo);
+ushort4 __ovld __cnfn upsample(uchar4 hi, uchar4 lo);
+ushort8 __ovld __cnfn upsample(uchar8 hi, uchar8 lo);
+ushort16 __ovld __cnfn upsample(uchar16 hi, uchar16 lo);
+
+/**
+ * result[i] = ((int)hi[i] << 16) | lo[i]
+ * result[i] = ((uint)hi[i] << 16) | lo[i]
+ */
+int __ovld __cnfn upsample(short hi, ushort lo);
+uint __ovld __cnfn upsample(ushort hi, ushort lo);
+int2 __ovld __cnfn upsample(short2 hi, ushort2 lo);
+int3 __ovld __cnfn upsample(short3 hi, ushort3 lo);
+int4 __ovld __cnfn upsample(short4 hi, ushort4 lo);
+int8 __ovld __cnfn upsample(short8 hi, ushort8 lo);
+int16 __ovld __cnfn upsample(short16 hi, ushort16 lo);
+uint2 __ovld __cnfn upsample(ushort2 hi, ushort2 lo);
+uint3 __ovld __cnfn upsample(ushort3 hi, ushort3 lo);
+uint4 __ovld __cnfn upsample(ushort4 hi, ushort4 lo);
+uint8 __ovld __cnfn upsample(ushort8 hi, ushort8 lo);
+uint16 __ovld __cnfn upsample(ushort16 hi, ushort16 lo);
+/**
+ * result[i] = ((long)hi[i] << 32) | lo[i]
+ * result[i] = ((ulong)hi[i] << 32) | lo[i]
+ */
+long __ovld __cnfn upsample(int hi, uint lo);
+ulong __ovld __cnfn upsample(uint hi, uint lo);
+long2 __ovld __cnfn upsample(int2 hi, uint2 lo);
+long3 __ovld __cnfn upsample(int3 hi, uint3 lo);
+long4 __ovld __cnfn upsample(int4 hi, uint4 lo);
+long8 __ovld __cnfn upsample(int8 hi, uint8 lo);
+long16 __ovld __cnfn upsample(int16 hi, uint16 lo);
+ulong2 __ovld __cnfn upsample(uint2 hi, uint2 lo);
+ulong3 __ovld __cnfn upsample(uint3 hi, uint3 lo);
+ulong4 __ovld __cnfn upsample(uint4 hi, uint4 lo);
+ulong8 __ovld __cnfn upsample(uint8 hi, uint8 lo);
+ulong16 __ovld __cnfn upsample(uint16 hi, uint16 lo);
+
+/*
+ * popcount(x): returns the number of set bit in x
+ */
+char __ovld __cnfn popcount(char x);
+uchar __ovld __cnfn popcount(uchar x);
+char2 __ovld __cnfn popcount(char2 x);
+uchar2 __ovld __cnfn popcount(uchar2 x);
+char3 __ovld __cnfn popcount(char3 x);
+uchar3 __ovld __cnfn popcount(uchar3 x);
+char4 __ovld __cnfn popcount(char4 x);
+uchar4 __ovld __cnfn popcount(uchar4 x);
+char8 __ovld __cnfn popcount(char8 x);
+uchar8 __ovld __cnfn popcount(uchar8 x);
+char16 __ovld __cnfn popcount(char16 x);
+uchar16 __ovld __cnfn popcount(uchar16 x);
+short __ovld __cnfn popcount(short x);
+ushort __ovld __cnfn popcount(ushort x);
+short2 __ovld __cnfn popcount(short2 x);
+ushort2 __ovld __cnfn popcount(ushort2 x);
+short3 __ovld __cnfn popcount(short3 x);
+ushort3 __ovld __cnfn popcount(ushort3 x);
+short4 __ovld __cnfn popcount(short4 x);
+ushort4 __ovld __cnfn popcount(ushort4 x);
+short8 __ovld __cnfn popcount(short8 x);
+ushort8 __ovld __cnfn popcount(ushort8 x);
+short16 __ovld __cnfn popcount(short16 x);
+ushort16 __ovld __cnfn popcount(ushort16 x);
+int __ovld __cnfn popcount(int x);
+uint __ovld __cnfn popcount(uint x);
+int2 __ovld __cnfn popcount(int2 x);
+uint2 __ovld __cnfn popcount(uint2 x);
+int3 __ovld __cnfn popcount(int3 x);
+uint3 __ovld __cnfn popcount(uint3 x);
+int4 __ovld __cnfn popcount(int4 x);
+uint4 __ovld __cnfn popcount(uint4 x);
+int8 __ovld __cnfn popcount(int8 x);
+uint8 __ovld __cnfn popcount(uint8 x);
+int16 __ovld __cnfn popcount(int16 x);
+uint16 __ovld __cnfn popcount(uint16 x);
+long __ovld __cnfn popcount(long x);
+ulong __ovld __cnfn popcount(ulong x);
+long2 __ovld __cnfn popcount(long2 x);
+ulong2 __ovld __cnfn popcount(ulong2 x);
+long3 __ovld __cnfn popcount(long3 x);
+ulong3 __ovld __cnfn popcount(ulong3 x);
+long4 __ovld __cnfn popcount(long4 x);
+ulong4 __ovld __cnfn popcount(ulong4 x);
+long8 __ovld __cnfn popcount(long8 x);
+ulong8 __ovld __cnfn popcount(ulong8 x);
+long16 __ovld __cnfn popcount(long16 x);
+ulong16 __ovld __cnfn popcount(ulong16 x);
+
+/**
+ * Multiply two 24-bit integer values x and y and add
+ * the 32-bit integer result to the 32-bit integer z.
+ * Refer to definition of mul24 to see how the 24-bit
+ * integer multiplication is performed.
+ */
+int __ovld __cnfn mad24(int x, int y, int z);
+uint __ovld __cnfn mad24(uint x, uint y, uint z);
+int2 __ovld __cnfn mad24(int2 x, int2 y, int2 z);
+uint2 __ovld __cnfn mad24(uint2 x, uint2 y, uint2 z);
+int3 __ovld __cnfn mad24(int3 x, int3 y, int3 z);
+uint3 __ovld __cnfn mad24(uint3 x, uint3 y, uint3 z);
+int4 __ovld __cnfn mad24(int4 x, int4 y, int4 z);
+uint4 __ovld __cnfn mad24(uint4 x, uint4 y, uint4 z);
+int8 __ovld __cnfn mad24(int8 x, int8 y, int8 z);
+uint8 __ovld __cnfn mad24(uint8 x, uint8 y, uint8 z);
+int16 __ovld __cnfn mad24(int16 x, int16 y, int16 z);
+uint16 __ovld __cnfn mad24(uint16 x, uint16 y, uint16 z);
+
+/**
+ * Multiply two 24-bit integer values x and y. x and y
+ * are 32-bit integers but only the low 24-bits are used
+ * to perform the multiplication. mul24 should only
+ * be used when values in x and y are in the range [-
+ * 2^23, 2^23-1] if x and y are signed integers and in the
+ * range [0, 2^24-1] if x and y are unsigned integers. If
+ * x and y are not in this range, the multiplication
+ * result is implementation-defined.
+ */
+int __ovld __cnfn mul24(int x, int y);
+uint __ovld __cnfn mul24(uint x, uint y);
+int2 __ovld __cnfn mul24(int2 x, int2 y);
+uint2 __ovld __cnfn mul24(uint2 x, uint2 y);
+int3 __ovld __cnfn mul24(int3 x, int3 y);
+uint3 __ovld __cnfn mul24(uint3 x, uint3 y);
+int4 __ovld __cnfn mul24(int4 x, int4 y);
+uint4 __ovld __cnfn mul24(uint4 x, uint4 y);
+int8 __ovld __cnfn mul24(int8 x, int8 y);
+uint8 __ovld __cnfn mul24(uint8 x, uint8 y);
+int16 __ovld __cnfn mul24(int16 x, int16 y);
+uint16 __ovld __cnfn mul24(uint16 x, uint16 y);
+
+// OpenCL v1.1 s6.11.4, v1.2 s6.12.4, v2.0 s6.13.4 - Common Functions
+
+/**
+ * Returns fmin(fmax(x, minval), maxval).
+ * Results are undefined if minval > maxval.
+ */
+float __ovld __cnfn clamp(float x, float minval, float maxval);
+float2 __ovld __cnfn clamp(float2 x, float2 minval, float2 maxval);
+float3 __ovld __cnfn clamp(float3 x, float3 minval, float3 maxval);
+float4 __ovld __cnfn clamp(float4 x, float4 minval, float4 maxval);
+float8 __ovld __cnfn clamp(float8 x, float8 minval, float8 maxval);
+float16 __ovld __cnfn clamp(float16 x, float16 minval, float16 maxval);
+float2 __ovld __cnfn clamp(float2 x, float minval, float maxval);
+float3 __ovld __cnfn clamp(float3 x, float minval, float maxval);
+float4 __ovld __cnfn clamp(float4 x, float minval, float maxval);
+float8 __ovld __cnfn clamp(float8 x, float minval, float maxval);
+float16 __ovld __cnfn clamp(float16 x, float minval, float maxval);
+#ifdef cl_khr_fp64
+double __ovld __cnfn clamp(double x, double minval, double maxval);
+double2 __ovld __cnfn clamp(double2 x, double2 minval, double2 maxval);
+double3 __ovld __cnfn clamp(double3 x, double3 minval, double3 maxval);
+double4 __ovld __cnfn clamp(double4 x, double4 minval, double4 maxval);
+double8 __ovld __cnfn clamp(double8 x, double8 minval, double8 maxval);
+double16 __ovld __cnfn clamp(double16 x, double16 minval, double16 maxval);
+double2 __ovld __cnfn clamp(double2 x, double minval, double maxval);
+double3 __ovld __cnfn clamp(double3 x, double minval, double maxval);
+double4 __ovld __cnfn clamp(double4 x, double minval, double maxval);
+double8 __ovld __cnfn clamp(double8 x, double minval, double maxval);
+double16 __ovld __cnfn clamp(double16 x, double minval, double maxval);
+#endif //cl_khr_fp64
+#ifdef cl_khr_fp16
+half __ovld __cnfn clamp(half x, half minval, half maxval);
+half2 __ovld __cnfn clamp(half2 x, half2 minval, half2 maxval);
+half3 __ovld __cnfn clamp(half3 x, half3 minval, half3 maxval);
+half4 __ovld __cnfn clamp(half4 x, half4 minval, half4 maxval);
+half8 __ovld __cnfn clamp(half8 x, half8 minval, half8 maxval);
+half16 __ovld __cnfn clamp(half16 x, half16 minval, half16 maxval);
+half2 __ovld __cnfn clamp(half2 x, half minval, half maxval);
+half3 __ovld __cnfn clamp(half3 x, half minval, half maxval);
+half4 __ovld __cnfn clamp(half4 x, half minval, half maxval);
+half8 __ovld __cnfn clamp(half8 x, half minval, half maxval);
+half16 __ovld __cnfn clamp(half16 x, half minval, half maxval);
+#endif //cl_khr_fp16
+
+/**
+ * Converts radians to degrees, i.e. (180 / PI) *
+ * radians.
+ */
+float __ovld __cnfn degrees(float radians);
+float2 __ovld __cnfn degrees(float2 radians);
+float3 __ovld __cnfn degrees(float3 radians);
+float4 __ovld __cnfn degrees(float4 radians);
+float8 __ovld __cnfn degrees(float8 radians);
+float16 __ovld __cnfn degrees(float16 radians);
+#ifdef cl_khr_fp64
+double __ovld __cnfn degrees(double radians);
+double2 __ovld __cnfn degrees(double2 radians);
+double3 __ovld __cnfn degrees(double3 radians);
+double4 __ovld __cnfn degrees(double4 radians);
+double8 __ovld __cnfn degrees(double8 radians);
+double16 __ovld __cnfn degrees(double16 radians);
+#endif //cl_khr_fp64
+#ifdef cl_khr_fp16
+half __ovld __cnfn degrees(half radians);
+half2 __ovld __cnfn degrees(half2 radians);
+half3 __ovld __cnfn degrees(half3 radians);
+half4 __ovld __cnfn degrees(half4 radians);
+half8 __ovld __cnfn degrees(half8 radians);
+half16 __ovld __cnfn degrees(half16 radians);
+#endif //cl_khr_fp16
+
+/**
+ * Returns y if x < y, otherwise it returns x. If x and y
+ * are infinite or NaN, the return values are undefined.
+ */
+float __ovld __cnfn max(float x, float y);
+float2 __ovld __cnfn max(float2 x, float2 y);
+float3 __ovld __cnfn max(float3 x, float3 y);
+float4 __ovld __cnfn max(float4 x, float4 y);
+float8 __ovld __cnfn max(float8 x, float8 y);
+float16 __ovld __cnfn max(float16 x, float16 y);
+float2 __ovld __cnfn max(float2 x, float y);
+float3 __ovld __cnfn max(float3 x, float y);
+float4 __ovld __cnfn max(float4 x, float y);
+float8 __ovld __cnfn max(float8 x, float y);
+float16 __ovld __cnfn max(float16 x, float y);
+#ifdef cl_khr_fp64
+double __ovld __cnfn max(double x, double y);
+double2 __ovld __cnfn max(double2 x, double2 y);
+double3 __ovld __cnfn max(double3 x, double3 y);
+double4 __ovld __cnfn max(double4 x, double4 y);
+double8 __ovld __cnfn max(double8 x, double8 y);
+double16 __ovld __cnfn max(double16 x, double16 y);
+double2 __ovld __cnfn max(double2 x, double y);
+double3 __ovld __cnfn max(double3 x, double y);
+double4 __ovld __cnfn max(double4 x, double y);
+double8 __ovld __cnfn max(double8 x, double y);
+double16 __ovld __cnfn max(double16 x, double y);
+#endif //cl_khr_fp64
+#ifdef cl_khr_fp16
+half __ovld __cnfn max(half x, half y);
+half2 __ovld __cnfn max(half2 x, half2 y);
+half3 __ovld __cnfn max(half3 x, half3 y);
+half4 __ovld __cnfn max(half4 x, half4 y);
+half8 __ovld __cnfn max(half8 x, half8 y);
+half16 __ovld __cnfn max(half16 x, half16 y);
+half2 __ovld __cnfn max(half2 x, half y);
+half3 __ovld __cnfn max(half3 x, half y);
+half4 __ovld __cnfn max(half4 x, half y);
+half8 __ovld __cnfn max(half8 x, half y);
+half16 __ovld __cnfn max(half16 x, half y);
+#endif //cl_khr_fp16
+
+/**
+ * Returns y if y < x, otherwise it returns x. If x and y
+ * are infinite or NaN, the return values are undefined.
+ */
+float __ovld __cnfn min(float x, float y);
+float2 __ovld __cnfn min(float2 x, float2 y);
+float3 __ovld __cnfn min(float3 x, float3 y);
+float4 __ovld __cnfn min(float4 x, float4 y);
+float8 __ovld __cnfn min(float8 x, float8 y);
+float16 __ovld __cnfn min(float16 x, float16 y);
+float2 __ovld __cnfn min(float2 x, float y);
+float3 __ovld __cnfn min(float3 x, float y);
+float4 __ovld __cnfn min(float4 x, float y);
+float8 __ovld __cnfn min(float8 x, float y);
+float16 __ovld __cnfn min(float16 x, float y);
+#ifdef cl_khr_fp64
+double __ovld __cnfn min(double x, double y);
+double2 __ovld __cnfn min(double2 x, double2 y);
+double3 __ovld __cnfn min(double3 x, double3 y);
+double4 __ovld __cnfn min(double4 x, double4 y);
+double8 __ovld __cnfn min(double8 x, double8 y);
+double16 __ovld __cnfn min(double16 x, double16 y);
+double2 __ovld __cnfn min(double2 x, double y);
+double3 __ovld __cnfn min(double3 x, double y);
+double4 __ovld __cnfn min(double4 x, double y);
+double8 __ovld __cnfn min(double8 x, double y);
+double16 __ovld __cnfn min(double16 x, double y);
+#endif //cl_khr_fp64
+#ifdef cl_khr_fp16
+half __ovld __cnfn min(half x, half y);
+half2 __ovld __cnfn min(half2 x, half2 y);
+half3 __ovld __cnfn min(half3 x, half3 y);
+half4 __ovld __cnfn min(half4 x, half4 y);
+half8 __ovld __cnfn min(half8 x, half8 y);
+half16 __ovld __cnfn min(half16 x, half16 y);
+half2 __ovld __cnfn min(half2 x, half y);
+half3 __ovld __cnfn min(half3 x, half y);
+half4 __ovld __cnfn min(half4 x, half y);
+half8 __ovld __cnfn min(half8 x, half y);
+half16 __ovld __cnfn min(half16 x, half y);
+#endif //cl_khr_fp16
+
+/**
+ * Returns the linear blend of x & y implemented as:
+ * x + (y - x) * a
+ * a must be a value in the range 0.0 ... 1.0. If a is not
+ * in the range 0.0 ... 1.0, the return values are
+ * undefined.
+ */
+float __ovld __cnfn mix(float x, float y, float a);
+float2 __ovld __cnfn mix(float2 x, float2 y, float2 a);
+float3 __ovld __cnfn mix(float3 x, float3 y, float3 a);
+float4 __ovld __cnfn mix(float4 x, float4 y, float4 a);
+float8 __ovld __cnfn mix(float8 x, float8 y, float8 a);
+float16 __ovld __cnfn mix(float16 x, float16 y, float16 a);
+float2 __ovld __cnfn mix(float2 x, float2 y, float a);
+float3 __ovld __cnfn mix(float3 x, float3 y, float a);
+float4 __ovld __cnfn mix(float4 x, float4 y, float a);
+float8 __ovld __cnfn mix(float8 x, float8 y, float a);
+float16 __ovld __cnfn mix(float16 x, float16 y, float a);
+#ifdef cl_khr_fp64
+double __ovld __cnfn mix(double x, double y, double a);
+double2 __ovld __cnfn mix(double2 x, double2 y, double2 a);
+double3 __ovld __cnfn mix(double3 x, double3 y, double3 a);
+double4 __ovld __cnfn mix(double4 x, double4 y, double4 a);
+double8 __ovld __cnfn mix(double8 x, double8 y, double8 a);
+double16 __ovld __cnfn mix(double16 x, double16 y, double16 a);
+double2 __ovld __cnfn mix(double2 x, double2 y, double a);
+double3 __ovld __cnfn mix(double3 x, double3 y, double a);
+double4 __ovld __cnfn mix(double4 x, double4 y, double a);
+double8 __ovld __cnfn mix(double8 x, double8 y, double a);
+double16 __ovld __cnfn mix(double16 x, double16 y, double a);
+#endif //cl_khr_fp64
+#ifdef cl_khr_fp16
+half __ovld __cnfn mix(half x, half y, half a);
+half2 __ovld __cnfn mix(half2 x, half2 y, half2 a);
+half3 __ovld __cnfn mix(half3 x, half3 y, half3 a);
+half4 __ovld __cnfn mix(half4 x, half4 y, half4 a);
+half8 __ovld __cnfn mix(half8 x, half8 y, half8 a);
+half16 __ovld __cnfn mix(half16 x, half16 y, half16 a);
+half2 __ovld __cnfn mix(half2 x, half2 y, half a);
+half3 __ovld __cnfn mix(half3 x, half3 y, half a);
+half4 __ovld __cnfn mix(half4 x, half4 y, half a);
+half8 __ovld __cnfn mix(half8 x, half8 y, half a);
+half16 __ovld __cnfn mix(half16 x, half16 y, half a);
+#endif //cl_khr_fp16
+
+/**
+ * Converts degrees to radians, i.e. (PI / 180) *
+ * degrees.
+ */
+float __ovld __cnfn radians(float degrees);
+float2 __ovld __cnfn radians(float2 degrees);
+float3 __ovld __cnfn radians(float3 degrees);
+float4 __ovld __cnfn radians(float4 degrees);
+float8 __ovld __cnfn radians(float8 degrees);
+float16 __ovld __cnfn radians(float16 degrees);
+#ifdef cl_khr_fp64
+double __ovld __cnfn radians(double degrees);
+double2 __ovld __cnfn radians(double2 degrees);
+double3 __ovld __cnfn radians(double3 degrees);
+double4 __ovld __cnfn radians(double4 degrees);
+double8 __ovld __cnfn radians(double8 degrees);
+double16 __ovld __cnfn radians(double16 degrees);
+#endif //cl_khr_fp64
+#ifdef cl_khr_fp16
+half __ovld __cnfn radians(half degrees);
+half2 __ovld __cnfn radians(half2 degrees);
+half3 __ovld __cnfn radians(half3 degrees);
+half4 __ovld __cnfn radians(half4 degrees);
+half8 __ovld __cnfn radians(half8 degrees);
+half16 __ovld __cnfn radians(half16 degrees);
+#endif //cl_khr_fp16
+
+/**
+ * Returns 0.0 if x < edge, otherwise it returns 1.0.
+ */
+float __ovld __cnfn step(float edge, float x);
+float2 __ovld __cnfn step(float2 edge, float2 x);
+float3 __ovld __cnfn step(float3 edge, float3 x);
+float4 __ovld __cnfn step(float4 edge, float4 x);
+float8 __ovld __cnfn step(float8 edge, float8 x);
+float16 __ovld __cnfn step(float16 edge, float16 x);
+float2 __ovld __cnfn step(float edge, float2 x);
+float3 __ovld __cnfn step(float edge, float3 x);
+float4 __ovld __cnfn step(float edge, float4 x);
+float8 __ovld __cnfn step(float edge, float8 x);
+float16 __ovld __cnfn step(float edge, float16 x);
+#ifdef cl_khr_fp64
+double __ovld __cnfn step(double edge, double x);
+double2 __ovld __cnfn step(double2 edge, double2 x);
+double3 __ovld __cnfn step(double3 edge, double3 x);
+double4 __ovld __cnfn step(double4 edge, double4 x);
+double8 __ovld __cnfn step(double8 edge, double8 x);
+double16 __ovld __cnfn step(double16 edge, double16 x);
+double2 __ovld __cnfn step(double edge, double2 x);
+double3 __ovld __cnfn step(double edge, double3 x);
+double4 __ovld __cnfn step(double edge, double4 x);
+double8 __ovld __cnfn step(double edge, double8 x);
+double16 __ovld __cnfn step(double edge, double16 x);
+#endif //cl_khr_fp64
+#ifdef cl_khr_fp16
+half __ovld __cnfn step(half edge, half x);
+half2 __ovld __cnfn step(half2 edge, half2 x);
+half3 __ovld __cnfn step(half3 edge, half3 x);
+half4 __ovld __cnfn step(half4 edge, half4 x);
+half8 __ovld __cnfn step(half8 edge, half8 x);
+half16 __ovld __cnfn step(half16 edge, half16 x);
+half __ovld __cnfn step(half edge, half x);
+half2 __ovld __cnfn step(half edge, half2 x);
+half3 __ovld __cnfn step(half edge, half3 x);
+half4 __ovld __cnfn step(half edge, half4 x);
+half8 __ovld __cnfn step(half edge, half8 x);
+half16 __ovld __cnfn step(half edge, half16 x);
+#endif //cl_khr_fp16
+
+/**
+ * Returns 0.0 if x <= edge0 and 1.0 if x >= edge1 and
+ * performs smooth Hermite interpolation between 0
+ * and 1when edge0 < x < edge1. This is useful in
+ * cases where you would want a threshold function
+ * with a smooth transition.
+ * This is equivalent to:
+ * gentype t;
+ * t = clamp ((x - edge0) / (edge1 - edge0), 0, 1);
+ * return t * t * (3 - 2 * t);
+ * Results are undefined if edge0 >= edge1 or if x,
+ * edge0 or edge1 is a NaN.
+ */
+float __ovld __cnfn smoothstep(float edge0, float edge1, float x);
+float2 __ovld __cnfn smoothstep(float2 edge0, float2 edge1, float2 x);
+float3 __ovld __cnfn smoothstep(float3 edge0, float3 edge1, float3 x);
+float4 __ovld __cnfn smoothstep(float4 edge0, float4 edge1, float4 x);
+float8 __ovld __cnfn smoothstep(float8 edge0, float8 edge1, float8 x);
+float16 __ovld __cnfn smoothstep(float16 edge0, float16 edge1, float16 x);
+float2 __ovld __cnfn smoothstep(float edge0, float edge1, float2 x);
+float3 __ovld __cnfn smoothstep(float edge0, float edge1, float3 x);
+float4 __ovld __cnfn smoothstep(float edge0, float edge1, float4 x);
+float8 __ovld __cnfn smoothstep(float edge0, float edge1, float8 x);
+float16 __ovld __cnfn smoothstep(float edge0, float edge1, float16 x);
+#ifdef cl_khr_fp64
+double __ovld __cnfn smoothstep(double edge0, double edge1, double x);
+double2 __ovld __cnfn smoothstep(double2 edge0, double2 edge1, double2 x);
+double3 __ovld __cnfn smoothstep(double3 edge0, double3 edge1, double3 x);
+double4 __ovld __cnfn smoothstep(double4 edge0, double4 edge1, double4 x);
+double8 __ovld __cnfn smoothstep(double8 edge0, double8 edge1, double8 x);
+double16 __ovld __cnfn smoothstep(double16 edge0, double16 edge1, double16 x);
+double2 __ovld __cnfn smoothstep(double edge0, double edge1, double2 x);
+double3 __ovld __cnfn smoothstep(double edge0, double edge1, double3 x);
+double4 __ovld __cnfn smoothstep(double edge0, double edge1, double4 x);
+double8 __ovld __cnfn smoothstep(double edge0, double edge1, double8 x);
+double16 __ovld __cnfn smoothstep(double edge0, double edge1, double16 x);
+#endif //cl_khr_fp64
+#ifdef cl_khr_fp16
+half __ovld __cnfn smoothstep(half edge0, half edge1, half x);
+half2 __ovld __cnfn smoothstep(half2 edge0, half2 edge1, half2 x);
+half3 __ovld __cnfn smoothstep(half3 edge0, half3 edge1, half3 x);
+half4 __ovld __cnfn smoothstep(half4 edge0, half4 edge1, half4 x);
+half8 __ovld __cnfn smoothstep(half8 edge0, half8 edge1, half8 x);
+half16 __ovld __cnfn smoothstep(half16 edge0, half16 edge1, half16 x);
+half __ovld __cnfn smoothstep(half edge0, half edge1, half x);
+half2 __ovld __cnfn smoothstep(half edge0, half edge1, half2 x);
+half3 __ovld __cnfn smoothstep(half edge0, half edge1, half3 x);
+half4 __ovld __cnfn smoothstep(half edge0, half edge1, half4 x);
+half8 __ovld __cnfn smoothstep(half edge0, half edge1, half8 x);
+half16 __ovld __cnfn smoothstep(half edge0, half edge1, half16 x);
+#endif //cl_khr_fp16
+
+/**
+ * Returns 1.0 if x > 0, -0.0 if x = -0.0, +0.0 if x =
+ * +0.0, or -1.0 if x < 0. Returns 0.0 if x is a NaN.
+ */
+float __ovld __cnfn sign(float x);
+float2 __ovld __cnfn sign(float2 x);
+float3 __ovld __cnfn sign(float3 x);
+float4 __ovld __cnfn sign(float4 x);
+float8 __ovld __cnfn sign(float8 x);
+float16 __ovld __cnfn sign(float16 x);
+#ifdef cl_khr_fp64
+double __ovld __cnfn sign(double x);
+double2 __ovld __cnfn sign(double2 x);
+double3 __ovld __cnfn sign(double3 x);
+double4 __ovld __cnfn sign(double4 x);
+double8 __ovld __cnfn sign(double8 x);
+double16 __ovld __cnfn sign(double16 x);
+#endif //cl_khr_fp64
+#ifdef cl_khr_fp16
+half __ovld __cnfn sign(half x);
+half2 __ovld __cnfn sign(half2 x);
+half3 __ovld __cnfn sign(half3 x);
+half4 __ovld __cnfn sign(half4 x);
+half8 __ovld __cnfn sign(half8 x);
+half16 __ovld __cnfn sign(half16 x);
+#endif //cl_khr_fp16
+
+// OpenCL v1.1 s6.11.5, v1.2 s6.12.5, v2.0 s6.13.5 - Geometric Functions
+
+/**
+ * Returns the cross product of p0.xyz and p1.xyz. The
+ * w component of float4 result returned will be 0.0.
+ */
+float4 __ovld __cnfn cross(float4 p0, float4 p1);
+float3 __ovld __cnfn cross(float3 p0, float3 p1);
+#ifdef cl_khr_fp64
+double4 __ovld __cnfn cross(double4 p0, double4 p1);
+double3 __ovld __cnfn cross(double3 p0, double3 p1);
+#endif //cl_khr_fp64
+#ifdef cl_khr_fp16
+half4 __ovld __cnfn cross(half4 p0, half4 p1);
+half3 __ovld __cnfn cross(half3 p0, half3 p1);
+#endif //cl_khr_fp16
+
+/**
+ * Compute dot product.
+ */
+float __ovld __cnfn dot(float p0, float p1);
+float __ovld __cnfn dot(float2 p0, float2 p1);
+float __ovld __cnfn dot(float3 p0, float3 p1);
+float __ovld __cnfn dot(float4 p0, float4 p1);
+#ifdef cl_khr_fp64
+double __ovld __cnfn dot(double p0, double p1);
+double __ovld __cnfn dot(double2 p0, double2 p1);
+double __ovld __cnfn dot(double3 p0, double3 p1);
+double __ovld __cnfn dot(double4 p0, double4 p1);
+#endif //cl_khr_fp64
+#ifdef cl_khr_fp16
+half __ovld __cnfn dot(half p0, half p1);
+half __ovld __cnfn dot(half2 p0, half2 p1);
+half __ovld __cnfn dot(half3 p0, half3 p1);
+half __ovld __cnfn dot(half4 p0, half4 p1);
+#endif //cl_khr_fp16
+
+/**
+ * Returns the distance between p0 and p1. This is
+ * calculated as length(p0 - p1).
+ */
+float __ovld __cnfn distance(float p0, float p1);
+float __ovld __cnfn distance(float2 p0, float2 p1);
+float __ovld __cnfn distance(float3 p0, float3 p1);
+float __ovld __cnfn distance(float4 p0, float4 p1);
+#ifdef cl_khr_fp64
+double __ovld __cnfn distance(double p0, double p1);
+double __ovld __cnfn distance(double2 p0, double2 p1);
+double __ovld __cnfn distance(double3 p0, double3 p1);
+double __ovld __cnfn distance(double4 p0, double4 p1);
+#endif //cl_khr_fp64
+#ifdef cl_khr_fp16
+half __ovld __cnfn distance(half p0, half p1);
+half __ovld __cnfn distance(half2 p0, half2 p1);
+half __ovld __cnfn distance(half3 p0, half3 p1);
+half __ovld __cnfn distance(half4 p0, half4 p1);
+#endif //cl_khr_fp16
+
+/**
+ * Return the length of vector p, i.e.,
+ * sqrt(p.x2 + p.y 2 + ...)
+ */
+float __ovld __cnfn length(float p);
+float __ovld __cnfn length(float2 p);
+float __ovld __cnfn length(float3 p);
+float __ovld __cnfn length(float4 p);
+#ifdef cl_khr_fp64
+double __ovld __cnfn length(double p);
+double __ovld __cnfn length(double2 p);
+double __ovld __cnfn length(double3 p);
+double __ovld __cnfn length(double4 p);
+#endif //cl_khr_fp64
+#ifdef cl_khr_fp16
+half __ovld __cnfn length(half p);
+half __ovld __cnfn length(half2 p);
+half __ovld __cnfn length(half3 p);
+half __ovld __cnfn length(half4 p);
+#endif //cl_khr_fp16
+
+/**
+ * Returns a vector in the same direction as p but with a
+ * length of 1.
+ */
+float __ovld __cnfn normalize(float p);
+float2 __ovld __cnfn normalize(float2 p);
+float3 __ovld __cnfn normalize(float3 p);
+float4 __ovld __cnfn normalize(float4 p);
+#ifdef cl_khr_fp64
+double __ovld __cnfn normalize(double p);
+double2 __ovld __cnfn normalize(double2 p);
+double3 __ovld __cnfn normalize(double3 p);
+double4 __ovld __cnfn normalize(double4 p);
+#endif //cl_khr_fp64
+#ifdef cl_khr_fp16
+half __ovld __cnfn normalize(half p);
+half2 __ovld __cnfn normalize(half2 p);
+half3 __ovld __cnfn normalize(half3 p);
+half4 __ovld __cnfn normalize(half4 p);
+#endif //cl_khr_fp16
+
+/**
+ * Returns fast_length(p0 - p1).
+ */
+float __ovld __cnfn fast_distance(float p0, float p1);
+float __ovld __cnfn fast_distance(float2 p0, float2 p1);
+float __ovld __cnfn fast_distance(float3 p0, float3 p1);
+float __ovld __cnfn fast_distance(float4 p0, float4 p1);
+#ifdef cl_khr_fp16
+half __ovld __cnfn fast_distance(half p0, half p1);
+half __ovld __cnfn fast_distance(half2 p0, half2 p1);
+half __ovld __cnfn fast_distance(half3 p0, half3 p1);
+half __ovld __cnfn fast_distance(half4 p0, half4 p1);
+#endif //cl_khr_fp16
+
+/**
+ * Returns the length of vector p computed as:
+ * half_sqrt(p.x2 + p.y2 + ...)
+ */
+float __ovld __cnfn fast_length(float p);
+float __ovld __cnfn fast_length(float2 p);
+float __ovld __cnfn fast_length(float3 p);
+float __ovld __cnfn fast_length(float4 p);
+#ifdef cl_khr_fp16
+half __ovld __cnfn fast_length(half p);
+half __ovld __cnfn fast_length(half2 p);
+half __ovld __cnfn fast_length(half3 p);
+half __ovld __cnfn fast_length(half4 p);
+#endif //cl_khr_fp16
+
+/**
+ * Returns a vector in the same direction as p but with a
+ * length of 1. fast_normalize is computed as:
+ * p * half_rsqrt (p.x^2 + p.y^2 + ... )
+ * The result shall be within 8192 ulps error from the
+ * infinitely precise result of
+ * if (all(p == 0.0f))
+ * result = p;
+ * else
+ * result = p / sqrt (p.x^2 + p.y^2 + ...);
+ * with the following exceptions:
+ * 1) If the sum of squares is greater than FLT_MAX
+ * then the value of the floating-point values in the
+ * result vector are undefined.
+ * 2) If the sum of squares is less than FLT_MIN then
+ * the implementation may return back p.
+ * 3) If the device is in "denorms are flushed to zero"
+ * mode, individual operand elements with magnitude
+ * less than sqrt(FLT_MIN) may be flushed to zero
+ * before proceeding with the calculation.
+ */
+float __ovld __cnfn fast_normalize(float p);
+float2 __ovld __cnfn fast_normalize(float2 p);
+float3 __ovld __cnfn fast_normalize(float3 p);
+float4 __ovld __cnfn fast_normalize(float4 p);
+#ifdef cl_khr_fp16
+half __ovld __cnfn fast_normalize(half p);
+half2 __ovld __cnfn fast_normalize(half2 p);
+half3 __ovld __cnfn fast_normalize(half3 p);
+half4 __ovld __cnfn fast_normalize(half4 p);
+#endif //cl_khr_fp16
+
+// OpenCL v1.1 s6.11.6, v1.2 s6.12.6, v2.0 s6.13.6 - Relational Functions
+
+/**
+ * intn isequal (floatn x, floatn y)
+ * Returns the component-wise compare of x == y.
+ */
+int __ovld __cnfn isequal(float x, float y);
+int2 __ovld __cnfn isequal(float2 x, float2 y);
+int3 __ovld __cnfn isequal(float3 x, float3 y);
+int4 __ovld __cnfn isequal(float4 x, float4 y);
+int8 __ovld __cnfn isequal(float8 x, float8 y);
+int16 __ovld __cnfn isequal(float16 x, float16 y);
+#ifdef cl_khr_fp64
+int __ovld __cnfn isequal(double x, double y);
+long2 __ovld __cnfn isequal(double2 x, double2 y);
+long3 __ovld __cnfn isequal(double3 x, double3 y);
+long4 __ovld __cnfn isequal(double4 x, double4 y);
+long8 __ovld __cnfn isequal(double8 x, double8 y);
+long16 __ovld __cnfn isequal(double16 x, double16 y);
+#endif //cl_khr_fp64
+#ifdef cl_khr_fp16
+int __ovld __cnfn isequal(half x, half y);
+short2 __ovld __cnfn isequal(half2 x, half2 y);
+short3 __ovld __cnfn isequal(half3 x, half3 y);
+short4 __ovld __cnfn isequal(half4 x, half4 y);
+short8 __ovld __cnfn isequal(half8 x, half8 y);
+short16 __ovld __cnfn isequal(half16 x, half16 y);
+#endif //cl_khr_fp16
+
+/**
+ * Returns the component-wise compare of x != y.
+ */
+int __ovld __cnfn isnotequal(float x, float y);
+int2 __ovld __cnfn isnotequal(float2 x, float2 y);
+int3 __ovld __cnfn isnotequal(float3 x, float3 y);
+int4 __ovld __cnfn isnotequal(float4 x, float4 y);
+int8 __ovld __cnfn isnotequal(float8 x, float8 y);
+int16 __ovld __cnfn isnotequal(float16 x, float16 y);
+#ifdef cl_khr_fp64
+int __ovld __cnfn isnotequal(double x, double y);
+long2 __ovld __cnfn isnotequal(double2 x, double2 y);
+long3 __ovld __cnfn isnotequal(double3 x, double3 y);
+long4 __ovld __cnfn isnotequal(double4 x, double4 y);
+long8 __ovld __cnfn isnotequal(double8 x, double8 y);
+long16 __ovld __cnfn isnotequal(double16 x, double16 y);
+#endif //cl_khr_fp64
+#ifdef cl_khr_fp16
+int __ovld __cnfn isnotequal(half x, half y);
+short2 __ovld __cnfn isnotequal(half2 x, half2 y);
+short3 __ovld __cnfn isnotequal(half3 x, half3 y);
+short4 __ovld __cnfn isnotequal(half4 x, half4 y);
+short8 __ovld __cnfn isnotequal(half8 x, half8 y);
+short16 __ovld __cnfn isnotequal(half16 x, half16 y);
+#endif //cl_khr_fp16
+
+/**
+ * Returns the component-wise compare of x > y.
+ */
+int __ovld __cnfn isgreater(float x, float y);
+int2 __ovld __cnfn isgreater(float2 x, float2 y);
+int3 __ovld __cnfn isgreater(float3 x, float3 y);
+int4 __ovld __cnfn isgreater(float4 x, float4 y);
+int8 __ovld __cnfn isgreater(float8 x, float8 y);
+int16 __ovld __cnfn isgreater(float16 x, float16 y);
+#ifdef cl_khr_fp64
+int __ovld __cnfn isgreater(double x, double y);
+long2 __ovld __cnfn isgreater(double2 x, double2 y);
+long3 __ovld __cnfn isgreater(double3 x, double3 y);
+long4 __ovld __cnfn isgreater(double4 x, double4 y);
+long8 __ovld __cnfn isgreater(double8 x, double8 y);
+long16 __ovld __cnfn isgreater(double16 x, double16 y);
+#endif //cl_khr_fp64
+#ifdef cl_khr_fp16
+int __ovld __cnfn isgreater(half x, half y);
+short2 __ovld __cnfn isgreater(half2 x, half2 y);
+short3 __ovld __cnfn isgreater(half3 x, half3 y);
+short4 __ovld __cnfn isgreater(half4 x, half4 y);
+short8 __ovld __cnfn isgreater(half8 x, half8 y);
+short16 __ovld __cnfn isgreater(half16 x, half16 y);
+#endif //cl_khr_fp16
+
+/**
+ * Returns the component-wise compare of x >= y.
+ */
+int __ovld __cnfn isgreaterequal(float x, float y);
+int2 __ovld __cnfn isgreaterequal(float2 x, float2 y);
+int3 __ovld __cnfn isgreaterequal(float3 x, float3 y);
+int4 __ovld __cnfn isgreaterequal(float4 x, float4 y);
+int8 __ovld __cnfn isgreaterequal(float8 x, float8 y);
+int16 __ovld __cnfn isgreaterequal(float16 x, float16 y);
+#ifdef cl_khr_fp64
+int __ovld __cnfn isgreaterequal(double x, double y);
+long2 __ovld __cnfn isgreaterequal(double2 x, double2 y);
+long3 __ovld __cnfn isgreaterequal(double3 x, double3 y);
+long4 __ovld __cnfn isgreaterequal(double4 x, double4 y);
+long8 __ovld __cnfn isgreaterequal(double8 x, double8 y);
+long16 __ovld __cnfn isgreaterequal(double16 x, double16 y);
+#endif //cl_khr_fp64
+#ifdef cl_khr_fp16
+int __ovld __cnfn isgreaterequal(half x, half y);
+short2 __ovld __cnfn isgreaterequal(half2 x, half2 y);
+short3 __ovld __cnfn isgreaterequal(half3 x, half3 y);
+short4 __ovld __cnfn isgreaterequal(half4 x, half4 y);
+short8 __ovld __cnfn isgreaterequal(half8 x, half8 y);
+short16 __ovld __cnfn isgreaterequal(half16 x, half16 y);
+#endif //cl_khr_fp16
+
+/**
+ * Returns the component-wise compare of x < y.
+ */
+int __ovld __cnfn isless(float x, float y);
+int2 __ovld __cnfn isless(float2 x, float2 y);
+int3 __ovld __cnfn isless(float3 x, float3 y);
+int4 __ovld __cnfn isless(float4 x, float4 y);
+int8 __ovld __cnfn isless(float8 x, float8 y);
+int16 __ovld __cnfn isless(float16 x, float16 y);
+#ifdef cl_khr_fp64
+int __ovld __cnfn isless(double x, double y);
+long2 __ovld __cnfn isless(double2 x, double2 y);
+long3 __ovld __cnfn isless(double3 x, double3 y);
+long4 __ovld __cnfn isless(double4 x, double4 y);
+long8 __ovld __cnfn isless(double8 x, double8 y);
+long16 __ovld __cnfn isless(double16 x, double16 y);
+#endif //cl_khr_fp64
+#ifdef cl_khr_fp16
+int __ovld __cnfn isless(half x, half y);
+short2 __ovld __cnfn isless(half2 x, half2 y);
+short3 __ovld __cnfn isless(half3 x, half3 y);
+short4 __ovld __cnfn isless(half4 x, half4 y);
+short8 __ovld __cnfn isless(half8 x, half8 y);
+short16 __ovld __cnfn isless(half16 x, half16 y);
+#endif //cl_khr_fp16
+
+/**
+ * Returns the component-wise compare of x <= y.
+ */
+int __ovld __cnfn islessequal(float x, float y);
+int2 __ovld __cnfn islessequal(float2 x, float2 y);
+int3 __ovld __cnfn islessequal(float3 x, float3 y);
+int4 __ovld __cnfn islessequal(float4 x, float4 y);
+int8 __ovld __cnfn islessequal(float8 x, float8 y);
+int16 __ovld __cnfn islessequal(float16 x, float16 y);
+#ifdef cl_khr_fp64
+int __ovld __cnfn islessequal(double x, double y);
+long2 __ovld __cnfn islessequal(double2 x, double2 y);
+long3 __ovld __cnfn islessequal(double3 x, double3 y);
+long4 __ovld __cnfn islessequal(double4 x, double4 y);
+long8 __ovld __cnfn islessequal(double8 x, double8 y);
+long16 __ovld __cnfn islessequal(double16 x, double16 y);
+#endif //cl_khr_fp64
+#ifdef cl_khr_fp16
+int __ovld __cnfn islessequal(half x, half y);
+short2 __ovld __cnfn islessequal(half2 x, half2 y);
+short3 __ovld __cnfn islessequal(half3 x, half3 y);
+short4 __ovld __cnfn islessequal(half4 x, half4 y);
+short8 __ovld __cnfn islessequal(half8 x, half8 y);
+short16 __ovld __cnfn islessequal(half16 x, half16 y);
+#endif //cl_khr_fp16
+
+/**
+ * Returns the component-wise compare of
+ * (x < y) || (x > y) .
+ */
+int __ovld __cnfn islessgreater(float x, float y);
+int2 __ovld __cnfn islessgreater(float2 x, float2 y);
+int3 __ovld __cnfn islessgreater(float3 x, float3 y);
+int4 __ovld __cnfn islessgreater(float4 x, float4 y);
+int8 __ovld __cnfn islessgreater(float8 x, float8 y);
+int16 __ovld __cnfn islessgreater(float16 x, float16 y);
+#ifdef cl_khr_fp64
+int __ovld __cnfn islessgreater(double x, double y);
+long2 __ovld __cnfn islessgreater(double2 x, double2 y);
+long3 __ovld __cnfn islessgreater(double3 x, double3 y);
+long4 __ovld __cnfn islessgreater(double4 x, double4 y);
+long8 __ovld __cnfn islessgreater(double8 x, double8 y);
+long16 __ovld __cnfn islessgreater(double16 x, double16 y);
+#endif //cl_khr_fp64
+#ifdef cl_khr_fp16
+int __ovld __cnfn islessgreater(half x, half y);
+short2 __ovld __cnfn islessgreater(half2 x, half2 y);
+short3 __ovld __cnfn islessgreater(half3 x, half3 y);
+short4 __ovld __cnfn islessgreater(half4 x, half4 y);
+short8 __ovld __cnfn islessgreater(half8 x, half8 y);
+short16 __ovld __cnfn islessgreater(half16 x, half16 y);
+#endif //cl_khr_fp16
+
+/**
+ * Test for finite value.
+ */
+int __ovld __cnfn isfinite(float);
+int2 __ovld __cnfn isfinite(float2);
+int3 __ovld __cnfn isfinite(float3);
+int4 __ovld __cnfn isfinite(float4);
+int8 __ovld __cnfn isfinite(float8);
+int16 __ovld __cnfn isfinite(float16);
+#ifdef cl_khr_fp64
+int __ovld __cnfn isfinite(double);
+long2 __ovld __cnfn isfinite(double2);
+long3 __ovld __cnfn isfinite(double3);
+long4 __ovld __cnfn isfinite(double4);
+long8 __ovld __cnfn isfinite(double8);
+long16 __ovld __cnfn isfinite(double16);
+#endif //cl_khr_fp64
+#ifdef cl_khr_fp16
+int __ovld __cnfn isfinite(half);
+short2 __ovld __cnfn isfinite(half2);
+short3 __ovld __cnfn isfinite(half3);
+short4 __ovld __cnfn isfinite(half4);
+short8 __ovld __cnfn isfinite(half8);
+short16 __ovld __cnfn isfinite(half16);
+#endif //cl_khr_fp16
+
+/**
+ * Test for infinity value (+ve or -ve) .
+ */
+int __ovld __cnfn isinf(float);
+int2 __ovld __cnfn isinf(float2);
+int3 __ovld __cnfn isinf(float3);
+int4 __ovld __cnfn isinf(float4);
+int8 __ovld __cnfn isinf(float8);
+int16 __ovld __cnfn isinf(float16);
+#ifdef cl_khr_fp64
+int __ovld __cnfn isinf(double);
+long2 __ovld __cnfn isinf(double2);
+long3 __ovld __cnfn isinf(double3);
+long4 __ovld __cnfn isinf(double4);
+long8 __ovld __cnfn isinf(double8);
+long16 __ovld __cnfn isinf(double16);
+#endif //cl_khr_fp64
+#ifdef cl_khr_fp16
+int __ovld __cnfn isinf(half);
+short2 __ovld __cnfn isinf(half2);
+short3 __ovld __cnfn isinf(half3);
+short4 __ovld __cnfn isinf(half4);
+short8 __ovld __cnfn isinf(half8);
+short16 __ovld __cnfn isinf(half16);
+#endif //cl_khr_fp16
+
+/**
+ * Test for a NaN.
+ */
+int __ovld __cnfn isnan(float);
+int2 __ovld __cnfn isnan(float2);
+int3 __ovld __cnfn isnan(float3);
+int4 __ovld __cnfn isnan(float4);
+int8 __ovld __cnfn isnan(float8);
+int16 __ovld __cnfn isnan(float16);
+#ifdef cl_khr_fp64
+int __ovld __cnfn isnan(double);
+long2 __ovld __cnfn isnan(double2);
+long3 __ovld __cnfn isnan(double3);
+long4 __ovld __cnfn isnan(double4);
+long8 __ovld __cnfn isnan(double8);
+long16 __ovld __cnfn isnan(double16);
+#endif //cl_khr_fp64
+#ifdef cl_khr_fp16
+int __ovld __cnfn isnan(half);
+short2 __ovld __cnfn isnan(half2);
+short3 __ovld __cnfn isnan(half3);
+short4 __ovld __cnfn isnan(half4);
+short8 __ovld __cnfn isnan(half8);
+short16 __ovld __cnfn isnan(half16);
+#endif //cl_khr_fp16
+
+/**
+ * Test for a normal value.
+ */
+int __ovld __cnfn isnormal(float);
+int2 __ovld __cnfn isnormal(float2);
+int3 __ovld __cnfn isnormal(float3);
+int4 __ovld __cnfn isnormal(float4);
+int8 __ovld __cnfn isnormal(float8);
+int16 __ovld __cnfn isnormal(float16);
+#ifdef cl_khr_fp64
+int __ovld __cnfn isnormal(double);
+long2 __ovld __cnfn isnormal(double2);
+long3 __ovld __cnfn isnormal(double3);
+long4 __ovld __cnfn isnormal(double4);
+long8 __ovld __cnfn isnormal(double8);
+long16 __ovld __cnfn isnormal(double16);
+#endif //cl_khr_fp64
+#ifdef cl_khr_fp16
+int __ovld __cnfn isnormal(half);
+short2 __ovld __cnfn isnormal(half2);
+short3 __ovld __cnfn isnormal(half3);
+short4 __ovld __cnfn isnormal(half4);
+short8 __ovld __cnfn isnormal(half8);
+short16 __ovld __cnfn isnormal(half16);
+#endif //cl_khr_fp16
+
+/**
+ * Test if arguments are ordered. isordered() takes
+ * arguments x and y, and returns the result
+ * isequal(x, x) && isequal(y, y).
+ */
+int __ovld __cnfn isordered(float x, float y);
+int2 __ovld __cnfn isordered(float2 x, float2 y);
+int3 __ovld __cnfn isordered(float3 x, float3 y);
+int4 __ovld __cnfn isordered(float4 x, float4 y);
+int8 __ovld __cnfn isordered(float8 x, float8 y);
+int16 __ovld __cnfn isordered(float16 x, float16 y);
+#ifdef cl_khr_fp64
+int __ovld __cnfn isordered(double x, double y);
+long2 __ovld __cnfn isordered(double2 x, double2 y);
+long3 __ovld __cnfn isordered(double3 x, double3 y);
+long4 __ovld __cnfn isordered(double4 x, double4 y);
+long8 __ovld __cnfn isordered(double8 x, double8 y);
+long16 __ovld __cnfn isordered(double16 x, double16 y);
+#endif //cl_khr_fp64
+#ifdef cl_khr_fp16
+int __ovld __cnfn isordered(half x, half y);
+short2 __ovld __cnfn isordered(half2 x, half2 y);
+short3 __ovld __cnfn isordered(half3 x, half3 y);
+short4 __ovld __cnfn isordered(half4 x, half4 y);
+short8 __ovld __cnfn isordered(half8 x, half8 y);
+short16 __ovld __cnfn isordered(half16 x, half16 y);
+#endif //cl_khr_fp16
+
+/**
+ * Test if arguments are unordered. isunordered()
+ * takes arguments x and y, returning non-zero if x or y
+ * is NaN, and zero otherwise.
+ */
+int __ovld __cnfn isunordered(float x, float y);
+int2 __ovld __cnfn isunordered(float2 x, float2 y);
+int3 __ovld __cnfn isunordered(float3 x, float3 y);
+int4 __ovld __cnfn isunordered(float4 x, float4 y);
+int8 __ovld __cnfn isunordered(float8 x, float8 y);
+int16 __ovld __cnfn isunordered(float16 x, float16 y);
+#ifdef cl_khr_fp64
+int __ovld __cnfn isunordered(double x, double y);
+long2 __ovld __cnfn isunordered(double2 x, double2 y);
+long3 __ovld __cnfn isunordered(double3 x, double3 y);
+long4 __ovld __cnfn isunordered(double4 x, double4 y);
+long8 __ovld __cnfn isunordered(double8 x, double8 y);
+long16 __ovld __cnfn isunordered(double16 x, double16 y);
+#endif //cl_khr_fp64
+#ifdef cl_khr_fp16
+int __ovld __cnfn isunordered(half x, half y);
+short2 __ovld __cnfn isunordered(half2 x, half2 y);
+short3 __ovld __cnfn isunordered(half3 x, half3 y);
+short4 __ovld __cnfn isunordered(half4 x, half4 y);
+short8 __ovld __cnfn isunordered(half8 x, half8 y);
+short16 __ovld __cnfn isunordered(half16 x, half16 y);
+#endif //cl_khr_fp16
+
+/**
+ * Test for sign bit. The scalar version of the function
+ * returns a 1 if the sign bit in the float is set else returns
+ * 0. The vector version of the function returns the
+ * following for each component in floatn: a -1 if the
+ * sign bit in the float is set else returns 0.
+ */
+int __ovld __cnfn signbit(float);
+int2 __ovld __cnfn signbit(float2);
+int3 __ovld __cnfn signbit(float3);
+int4 __ovld __cnfn signbit(float4);
+int8 __ovld __cnfn signbit(float8);
+int16 __ovld __cnfn signbit(float16);
+#ifdef cl_khr_fp64
+int __ovld __cnfn signbit(double);
+long2 __ovld __cnfn signbit(double2);
+long3 __ovld __cnfn signbit(double3);
+long4 __ovld __cnfn signbit(double4);
+long8 __ovld __cnfn signbit(double8);
+long16 __ovld __cnfn signbit(double16);
+#endif //cl_khr_fp64
+#ifdef cl_khr_fp16
+int __ovld __cnfn signbit(half);
+short2 __ovld __cnfn signbit(half2);
+short3 __ovld __cnfn signbit(half3);
+short4 __ovld __cnfn signbit(half4);
+short8 __ovld __cnfn signbit(half8);
+short16 __ovld __cnfn signbit(half16);
+#endif //cl_khr_fp16
+
+/**
+ * Returns 1 if the most significant bit in any component
+ * of x is set; otherwise returns 0.
+ */
+int __ovld __cnfn any(char x);
+int __ovld __cnfn any(char2 x);
+int __ovld __cnfn any(char3 x);
+int __ovld __cnfn any(char4 x);
+int __ovld __cnfn any(char8 x);
+int __ovld __cnfn any(char16 x);
+int __ovld __cnfn any(short x);
+int __ovld __cnfn any(short2 x);
+int __ovld __cnfn any(short3 x);
+int __ovld __cnfn any(short4 x);
+int __ovld __cnfn any(short8 x);
+int __ovld __cnfn any(short16 x);
+int __ovld __cnfn any(int x);
+int __ovld __cnfn any(int2 x);
+int __ovld __cnfn any(int3 x);
+int __ovld __cnfn any(int4 x);
+int __ovld __cnfn any(int8 x);
+int __ovld __cnfn any(int16 x);
+int __ovld __cnfn any(long x);
+int __ovld __cnfn any(long2 x);
+int __ovld __cnfn any(long3 x);
+int __ovld __cnfn any(long4 x);
+int __ovld __cnfn any(long8 x);
+int __ovld __cnfn any(long16 x);
+
+/**
+ * Returns 1 if the most significant bit in all components
+ * of x is set; otherwise returns 0.
+ */
+int __ovld __cnfn all(char x);
+int __ovld __cnfn all(char2 x);
+int __ovld __cnfn all(char3 x);
+int __ovld __cnfn all(char4 x);
+int __ovld __cnfn all(char8 x);
+int __ovld __cnfn all(char16 x);
+int __ovld __cnfn all(short x);
+int __ovld __cnfn all(short2 x);
+int __ovld __cnfn all(short3 x);
+int __ovld __cnfn all(short4 x);
+int __ovld __cnfn all(short8 x);
+int __ovld __cnfn all(short16 x);
+int __ovld __cnfn all(int x);
+int __ovld __cnfn all(int2 x);
+int __ovld __cnfn all(int3 x);
+int __ovld __cnfn all(int4 x);
+int __ovld __cnfn all(int8 x);
+int __ovld __cnfn all(int16 x);
+int __ovld __cnfn all(long x);
+int __ovld __cnfn all(long2 x);
+int __ovld __cnfn all(long3 x);
+int __ovld __cnfn all(long4 x);
+int __ovld __cnfn all(long8 x);
+int __ovld __cnfn all(long16 x);
+
+/**
+ * Each bit of the result is the corresponding bit of a if
+ * the corresponding bit of c is 0. Otherwise it is the
+ * corresponding bit of b.
+ */
+char __ovld __cnfn bitselect(char a, char b, char c);
+uchar __ovld __cnfn bitselect(uchar a, uchar b, uchar c);
+char2 __ovld __cnfn bitselect(char2 a, char2 b, char2 c);
+uchar2 __ovld __cnfn bitselect(uchar2 a, uchar2 b, uchar2 c);
+char3 __ovld __cnfn bitselect(char3 a, char3 b, char3 c);
+uchar3 __ovld __cnfn bitselect(uchar3 a, uchar3 b, uchar3 c);
+char4 __ovld __cnfn bitselect(char4 a, char4 b, char4 c);
+uchar4 __ovld __cnfn bitselect(uchar4 a, uchar4 b, uchar4 c);
+char8 __ovld __cnfn bitselect(char8 a, char8 b, char8 c);
+uchar8 __ovld __cnfn bitselect(uchar8 a, uchar8 b, uchar8 c);
+char16 __ovld __cnfn bitselect(char16 a, char16 b, char16 c);
+uchar16 __ovld __cnfn bitselect(uchar16 a, uchar16 b, uchar16 c);
+short __ovld __cnfn bitselect(short a, short b, short c);
+ushort __ovld __cnfn bitselect(ushort a, ushort b, ushort c);
+short2 __ovld __cnfn bitselect(short2 a, short2 b, short2 c);
+ushort2 __ovld __cnfn bitselect(ushort2 a, ushort2 b, ushort2 c);
+short3 __ovld __cnfn bitselect(short3 a, short3 b, short3 c);
+ushort3 __ovld __cnfn bitselect(ushort3 a, ushort3 b, ushort3 c);
+short4 __ovld __cnfn bitselect(short4 a, short4 b, short4 c);
+ushort4 __ovld __cnfn bitselect(ushort4 a, ushort4 b, ushort4 c);
+short8 __ovld __cnfn bitselect(short8 a, short8 b, short8 c);
+ushort8 __ovld __cnfn bitselect(ushort8 a, ushort8 b, ushort8 c);
+short16 __ovld __cnfn bitselect(short16 a, short16 b, short16 c);
+ushort16 __ovld __cnfn bitselect(ushort16 a, ushort16 b, ushort16 c);
+int __ovld __cnfn bitselect(int a, int b, int c);
+uint __ovld __cnfn bitselect(uint a, uint b, uint c);
+int2 __ovld __cnfn bitselect(int2 a, int2 b, int2 c);
+uint2 __ovld __cnfn bitselect(uint2 a, uint2 b, uint2 c);
+int3 __ovld __cnfn bitselect(int3 a, int3 b, int3 c);
+uint3 __ovld __cnfn bitselect(uint3 a, uint3 b, uint3 c);
+int4 __ovld __cnfn bitselect(int4 a, int4 b, int4 c);
+uint4 __ovld __cnfn bitselect(uint4 a, uint4 b, uint4 c);
+int8 __ovld __cnfn bitselect(int8 a, int8 b, int8 c);
+uint8 __ovld __cnfn bitselect(uint8 a, uint8 b, uint8 c);
+int16 __ovld __cnfn bitselect(int16 a, int16 b, int16 c);
+uint16 __ovld __cnfn bitselect(uint16 a, uint16 b, uint16 c);
+long __ovld __cnfn bitselect(long a, long b, long c);
+ulong __ovld __cnfn bitselect(ulong a, ulong b, ulong c);
+long2 __ovld __cnfn bitselect(long2 a, long2 b, long2 c);
+ulong2 __ovld __cnfn bitselect(ulong2 a, ulong2 b, ulong2 c);
+long3 __ovld __cnfn bitselect(long3 a, long3 b, long3 c);
+ulong3 __ovld __cnfn bitselect(ulong3 a, ulong3 b, ulong3 c);
+long4 __ovld __cnfn bitselect(long4 a, long4 b, long4 c);
+ulong4 __ovld __cnfn bitselect(ulong4 a, ulong4 b, ulong4 c);
+long8 __ovld __cnfn bitselect(long8 a, long8 b, long8 c);
+ulong8 __ovld __cnfn bitselect(ulong8 a, ulong8 b, ulong8 c);
+long16 __ovld __cnfn bitselect(long16 a, long16 b, long16 c);
+ulong16 __ovld __cnfn bitselect(ulong16 a, ulong16 b, ulong16 c);
+float __ovld __cnfn bitselect(float a, float b, float c);
+float2 __ovld __cnfn bitselect(float2 a, float2 b, float2 c);
+float3 __ovld __cnfn bitselect(float3 a, float3 b, float3 c);
+float4 __ovld __cnfn bitselect(float4 a, float4 b, float4 c);
+float8 __ovld __cnfn bitselect(float8 a, float8 b, float8 c);
+float16 __ovld __cnfn bitselect(float16 a, float16 b, float16 c);
+#ifdef cl_khr_fp64
+double __ovld __cnfn bitselect(double a, double b, double c);
+double2 __ovld __cnfn bitselect(double2 a, double2 b, double2 c);
+double3 __ovld __cnfn bitselect(double3 a, double3 b, double3 c);
+double4 __ovld __cnfn bitselect(double4 a, double4 b, double4 c);
+double8 __ovld __cnfn bitselect(double8 a, double8 b, double8 c);
+double16 __ovld __cnfn bitselect(double16 a, double16 b, double16 c);
+#endif //cl_khr_fp64
+#ifdef cl_khr_fp16
+half __ovld __cnfn bitselect(half a, half b, half c);
+half2 __ovld __cnfn bitselect(half2 a, half2 b, half2 c);
+half3 __ovld __cnfn bitselect(half3 a, half3 b, half3 c);
+half4 __ovld __cnfn bitselect(half4 a, half4 b, half4 c);
+half8 __ovld __cnfn bitselect(half8 a, half8 b, half8 c);
+half16 __ovld __cnfn bitselect(half16 a, half16 b, half16 c);
+#endif //cl_khr_fp16
+
+/**
+ * For each component of a vector type,
+ * result[i] = if MSB of c[i] is set ? b[i] : a[i].
+ * For a scalar type, result = c ? b : a.
+ */
+char __ovld __cnfn select(char a, char b, char c);
+uchar __ovld __cnfn select(uchar a, uchar b, char c);
+char2 __ovld __cnfn select(char2 a, char2 b, char2 c);
+uchar2 __ovld __cnfn select(uchar2 a, uchar2 b, char2 c);
+char3 __ovld __cnfn select(char3 a, char3 b, char3 c);
+uchar3 __ovld __cnfn select(uchar3 a, uchar3 b, char3 c);
+char4 __ovld __cnfn select(char4 a, char4 b, char4 c);
+uchar4 __ovld __cnfn select(uchar4 a, uchar4 b, char4 c);
+char8 __ovld __cnfn select(char8 a, char8 b, char8 c);
+uchar8 __ovld __cnfn select(uchar8 a, uchar8 b, char8 c);
+char16 __ovld __cnfn select(char16 a, char16 b, char16 c);
+uchar16 __ovld __cnfn select(uchar16 a, uchar16 b, char16 c);
+short __ovld __cnfn select(short a, short b, char c);
+ushort __ovld __cnfn select(ushort a, ushort b, char c);
+short2 __ovld __cnfn select(short2 a, short2 b, char2 c);
+ushort2 __ovld __cnfn select(ushort2 a, ushort2 b, char2 c);
+short3 __ovld __cnfn select(short3 a, short3 b, char3 c);
+ushort3 __ovld __cnfn select(ushort3 a, ushort3 b, char3 c);
+short4 __ovld __cnfn select(short4 a, short4 b, char4 c);
+ushort4 __ovld __cnfn select(ushort4 a, ushort4 b, char4 c);
+short8 __ovld __cnfn select(short8 a, short8 b, char8 c);
+ushort8 __ovld __cnfn select(ushort8 a, ushort8 b, char8 c);
+short16 __ovld __cnfn select(short16 a, short16 b, char16 c);
+ushort16 __ovld __cnfn select(ushort16 a, ushort16 b, char16 c);
+int __ovld __cnfn select(int a, int b, char c);
+uint __ovld __cnfn select(uint a, uint b, char c);
+int2 __ovld __cnfn select(int2 a, int2 b, char2 c);
+uint2 __ovld __cnfn select(uint2 a, uint2 b, char2 c);
+int3 __ovld __cnfn select(int3 a, int3 b, char3 c);
+uint3 __ovld __cnfn select(uint3 a, uint3 b, char3 c);
+int4 __ovld __cnfn select(int4 a, int4 b, char4 c);
+uint4 __ovld __cnfn select(uint4 a, uint4 b, char4 c);
+int8 __ovld __cnfn select(int8 a, int8 b, char8 c);
+uint8 __ovld __cnfn select(uint8 a, uint8 b, char8 c);
+int16 __ovld __cnfn select(int16 a, int16 b, char16 c);
+uint16 __ovld __cnfn select(uint16 a, uint16 b, char16 c);
+long __ovld __cnfn select(long a, long b, char c);
+ulong __ovld __cnfn select(ulong a, ulong b, char c);
+long2 __ovld __cnfn select(long2 a, long2 b, char2 c);
+ulong2 __ovld __cnfn select(ulong2 a, ulong2 b, char2 c);
+long3 __ovld __cnfn select(long3 a, long3 b, char3 c);
+ulong3 __ovld __cnfn select(ulong3 a, ulong3 b, char3 c);
+long4 __ovld __cnfn select(long4 a, long4 b, char4 c);
+ulong4 __ovld __cnfn select(ulong4 a, ulong4 b, char4 c);
+long8 __ovld __cnfn select(long8 a, long8 b, char8 c);
+ulong8 __ovld __cnfn select(ulong8 a, ulong8 b, char8 c);
+long16 __ovld __cnfn select(long16 a, long16 b, char16 c);
+ulong16 __ovld __cnfn select(ulong16 a, ulong16 b, char16 c);
+float __ovld __cnfn select(float a, float b, char c);
+float2 __ovld __cnfn select(float2 a, float2 b, char2 c);
+float3 __ovld __cnfn select(float3 a, float3 b, char3 c);
+float4 __ovld __cnfn select(float4 a, float4 b, char4 c);
+float8 __ovld __cnfn select(float8 a, float8 b, char8 c);
+float16 __ovld __cnfn select(float16 a, float16 b, char16 c);
+char __ovld __cnfn select(char a, char b, short c);
+uchar __ovld __cnfn select(uchar a, uchar b, short c);
+char2 __ovld __cnfn select(char2 a, char2 b, short2 c);
+uchar2 __ovld __cnfn select(uchar2 a, uchar2 b, short2 c);
+char3 __ovld __cnfn select(char3 a, char3 b, short3 c);
+uchar3 __ovld __cnfn select(uchar3 a, uchar3 b, short3 c);
+char4 __ovld __cnfn select(char4 a, char4 b, short4 c);
+uchar4 __ovld __cnfn select(uchar4 a, uchar4 b, short4 c);
+char8 __ovld __cnfn select(char8 a, char8 b, short8 c);
+uchar8 __ovld __cnfn select(uchar8 a, uchar8 b, short8 c);
+char16 __ovld __cnfn select(char16 a, char16 b, short16 c);
+uchar16 __ovld __cnfn select(uchar16 a, uchar16 b, short16 c);
+short __ovld __cnfn select(short a, short b, short c);
+ushort __ovld __cnfn select(ushort a, ushort b, short c);
+short2 __ovld __cnfn select(short2 a, short2 b, short2 c);
+ushort2 __ovld __cnfn select(ushort2 a, ushort2 b, short2 c);
+short3 __ovld __cnfn select(short3 a, short3 b, short3 c);
+ushort3 __ovld __cnfn select(ushort3 a, ushort3 b, short3 c);
+short4 __ovld __cnfn select(short4 a, short4 b, short4 c);
+ushort4 __ovld __cnfn select(ushort4 a, ushort4 b, short4 c);
+short8 __ovld __cnfn select(short8 a, short8 b, short8 c);
+ushort8 __ovld __cnfn select(ushort8 a, ushort8 b, short8 c);
+short16 __ovld __cnfn select(short16 a, short16 b, short16 c);
+ushort16 __ovld __cnfn select(ushort16 a, ushort16 b, short16 c);
+int __ovld __cnfn select(int a, int b, short c);
+uint __ovld __cnfn select(uint a, uint b, short c);
+int2 __ovld __cnfn select(int2 a, int2 b, short2 c);
+uint2 __ovld __cnfn select(uint2 a, uint2 b, short2 c);
+int3 __ovld __cnfn select(int3 a, int3 b, short3 c);
+uint3 __ovld __cnfn select(uint3 a, uint3 b, short3 c);
+int4 __ovld __cnfn select(int4 a, int4 b, short4 c);
+uint4 __ovld __cnfn select(uint4 a, uint4 b, short4 c);
+int8 __ovld __cnfn select(int8 a, int8 b, short8 c);
+uint8 __ovld __cnfn select(uint8 a, uint8 b, short8 c);
+int16 __ovld __cnfn select(int16 a, int16 b, short16 c);
+uint16 __ovld __cnfn select(uint16 a, uint16 b, short16 c);
+long __ovld __cnfn select(long a, long b, short c);
+ulong __ovld __cnfn select(ulong a, ulong b, short c);
+long2 __ovld __cnfn select(long2 a, long2 b, short2 c);
+ulong2 __ovld __cnfn select(ulong2 a, ulong2 b, short2 c);
+long3 __ovld __cnfn select(long3 a, long3 b, short3 c);
+ulong3 __ovld __cnfn select(ulong3 a, ulong3 b, short3 c);
+long4 __ovld __cnfn select(long4 a, long4 b, short4 c);
+ulong4 __ovld __cnfn select(ulong4 a, ulong4 b, short4 c);
+long8 __ovld __cnfn select(long8 a, long8 b, short8 c);
+ulong8 __ovld __cnfn select(ulong8 a, ulong8 b, short8 c);
+long16 __ovld __cnfn select(long16 a, long16 b, short16 c);
+ulong16 __ovld __cnfn select(ulong16 a, ulong16 b, short16 c);
+float __ovld __cnfn select(float a, float b, short c);
+float2 __ovld __cnfn select(float2 a, float2 b, short2 c);
+float3 __ovld __cnfn select(float3 a, float3 b, short3 c);
+float4 __ovld __cnfn select(float4 a, float4 b, short4 c);
+float8 __ovld __cnfn select(float8 a, float8 b, short8 c);
+float16 __ovld __cnfn select(float16 a, float16 b, short16 c);
+char __ovld __cnfn select(char a, char b, int c);
+uchar __ovld __cnfn select(uchar a, uchar b, int c);
+char2 __ovld __cnfn select(char2 a, char2 b, int2 c);
+uchar2 __ovld __cnfn select(uchar2 a, uchar2 b, int2 c);
+char3 __ovld __cnfn select(char3 a, char3 b, int3 c);
+uchar3 __ovld __cnfn select(uchar3 a, uchar3 b, int3 c);
+char4 __ovld __cnfn select(char4 a, char4 b, int4 c);
+uchar4 __ovld __cnfn select(uchar4 a, uchar4 b, int4 c);
+char8 __ovld __cnfn select(char8 a, char8 b, int8 c);
+uchar8 __ovld __cnfn select(uchar8 a, uchar8 b, int8 c);
+char16 __ovld __cnfn select(char16 a, char16 b, int16 c);
+uchar16 __ovld __cnfn select(uchar16 a, uchar16 b, int16 c);
+short __ovld __cnfn select(short a, short b, int c);
+ushort __ovld __cnfn select(ushort a, ushort b, int c);
+short2 __ovld __cnfn select(short2 a, short2 b, int2 c);
+ushort2 __ovld __cnfn select(ushort2 a, ushort2 b, int2 c);
+short3 __ovld __cnfn select(short3 a, short3 b, int3 c);
+ushort3 __ovld __cnfn select(ushort3 a, ushort3 b, int3 c);
+short4 __ovld __cnfn select(short4 a, short4 b, int4 c);
+ushort4 __ovld __cnfn select(ushort4 a, ushort4 b, int4 c);
+short8 __ovld __cnfn select(short8 a, short8 b, int8 c);
+ushort8 __ovld __cnfn select(ushort8 a, ushort8 b, int8 c);
+short16 __ovld __cnfn select(short16 a, short16 b, int16 c);
+ushort16 __ovld __cnfn select(ushort16 a, ushort16 b, int16 c);
+int __ovld __cnfn select(int a, int b, int c);
+uint __ovld __cnfn select(uint a, uint b, int c);
+int2 __ovld __cnfn select(int2 a, int2 b, int2 c);
+uint2 __ovld __cnfn select(uint2 a, uint2 b, int2 c);
+int3 __ovld __cnfn select(int3 a, int3 b, int3 c);
+uint3 __ovld __cnfn select(uint3 a, uint3 b, int3 c);
+int4 __ovld __cnfn select(int4 a, int4 b, int4 c);
+uint4 __ovld __cnfn select(uint4 a, uint4 b, int4 c);
+int8 __ovld __cnfn select(int8 a, int8 b, int8 c);
+uint8 __ovld __cnfn select(uint8 a, uint8 b, int8 c);
+int16 __ovld __cnfn select(int16 a, int16 b, int16 c);
+uint16 __ovld __cnfn select(uint16 a, uint16 b, int16 c);
+long __ovld __cnfn select(long a, long b, int c);
+ulong __ovld __cnfn select(ulong a, ulong b, int c);
+long2 __ovld __cnfn select(long2 a, long2 b, int2 c);
+ulong2 __ovld __cnfn select(ulong2 a, ulong2 b, int2 c);
+long3 __ovld __cnfn select(long3 a, long3 b, int3 c);
+ulong3 __ovld __cnfn select(ulong3 a, ulong3 b, int3 c);
+long4 __ovld __cnfn select(long4 a, long4 b, int4 c);
+ulong4 __ovld __cnfn select(ulong4 a, ulong4 b, int4 c);
+long8 __ovld __cnfn select(long8 a, long8 b, int8 c);
+ulong8 __ovld __cnfn select(ulong8 a, ulong8 b, int8 c);
+long16 __ovld __cnfn select(long16 a, long16 b, int16 c);
+ulong16 __ovld __cnfn select(ulong16 a, ulong16 b, int16 c);
+float __ovld __cnfn select(float a, float b, int c);
+float2 __ovld __cnfn select(float2 a, float2 b, int2 c);
+float3 __ovld __cnfn select(float3 a, float3 b, int3 c);
+float4 __ovld __cnfn select(float4 a, float4 b, int4 c);
+float8 __ovld __cnfn select(float8 a, float8 b, int8 c);
+float16 __ovld __cnfn select(float16 a, float16 b, int16 c);
+char __ovld __cnfn select(char a, char b, long c);
+uchar __ovld __cnfn select(uchar a, uchar b, long c);
+char2 __ovld __cnfn select(char2 a, char2 b, long2 c);
+uchar2 __ovld __cnfn select(uchar2 a, uchar2 b, long2 c);
+char3 __ovld __cnfn select(char3 a, char3 b, long3 c);
+uchar3 __ovld __cnfn select(uchar3 a, uchar3 b, long3 c);
+char4 __ovld __cnfn select(char4 a, char4 b, long4 c);
+uchar4 __ovld __cnfn select(uchar4 a, uchar4 b, long4 c);
+char8 __ovld __cnfn select(char8 a, char8 b, long8 c);
+uchar8 __ovld __cnfn select(uchar8 a, uchar8 b, long8 c);
+char16 __ovld __cnfn select(char16 a, char16 b, long16 c);
+uchar16 __ovld __cnfn select(uchar16 a, uchar16 b, long16 c);
+short __ovld __cnfn select(short a, short b, long c);
+ushort __ovld __cnfn select(ushort a, ushort b, long c);
+short2 __ovld __cnfn select(short2 a, short2 b, long2 c);
+ushort2 __ovld __cnfn select(ushort2 a, ushort2 b, long2 c);
+short3 __ovld __cnfn select(short3 a, short3 b, long3 c);
+ushort3 __ovld __cnfn select(ushort3 a, ushort3 b, long3 c);
+short4 __ovld __cnfn select(short4 a, short4 b, long4 c);
+ushort4 __ovld __cnfn select(ushort4 a, ushort4 b, long4 c);
+short8 __ovld __cnfn select(short8 a, short8 b, long8 c);
+ushort8 __ovld __cnfn select(ushort8 a, ushort8 b, long8 c);
+short16 __ovld __cnfn select(short16 a, short16 b, long16 c);
+ushort16 __ovld __cnfn select(ushort16 a, ushort16 b, long16 c);
+int __ovld __cnfn select(int a, int b, long c);
+uint __ovld __cnfn select(uint a, uint b, long c);
+int2 __ovld __cnfn select(int2 a, int2 b, long2 c);
+uint2 __ovld __cnfn select(uint2 a, uint2 b, long2 c);
+int3 __ovld __cnfn select(int3 a, int3 b, long3 c);
+uint3 __ovld __cnfn select(uint3 a, uint3 b, long3 c);
+int4 __ovld __cnfn select(int4 a, int4 b, long4 c);
+uint4 __ovld __cnfn select(uint4 a, uint4 b, long4 c);
+int8 __ovld __cnfn select(int8 a, int8 b, long8 c);
+uint8 __ovld __cnfn select(uint8 a, uint8 b, long8 c);
+int16 __ovld __cnfn select(int16 a, int16 b, long16 c);
+uint16 __ovld __cnfn select(uint16 a, uint16 b, long16 c);
+long __ovld __cnfn select(long a, long b, long c);
+ulong __ovld __cnfn select(ulong a, ulong b, long c);
+long2 __ovld __cnfn select(long2 a, long2 b, long2 c);
+ulong2 __ovld __cnfn select(ulong2 a, ulong2 b, long2 c);
+long3 __ovld __cnfn select(long3 a, long3 b, long3 c);
+ulong3 __ovld __cnfn select(ulong3 a, ulong3 b, long3 c);
+long4 __ovld __cnfn select(long4 a, long4 b, long4 c);
+ulong4 __ovld __cnfn select(ulong4 a, ulong4 b, long4 c);
+long8 __ovld __cnfn select(long8 a, long8 b, long8 c);
+ulong8 __ovld __cnfn select(ulong8 a, ulong8 b, long8 c);
+long16 __ovld __cnfn select(long16 a, long16 b, long16 c);
+ulong16 __ovld __cnfn select(ulong16 a, ulong16 b, long16 c);
+float __ovld __cnfn select(float a, float b, long c);
+float2 __ovld __cnfn select(float2 a, float2 b, long2 c);
+float3 __ovld __cnfn select(float3 a, float3 b, long3 c);
+float4 __ovld __cnfn select(float4 a, float4 b, long4 c);
+float8 __ovld __cnfn select(float8 a, float8 b, long8 c);
+float16 __ovld __cnfn select(float16 a, float16 b, long16 c);
+char __ovld __cnfn select(char a, char b, uchar c);
+uchar __ovld __cnfn select(uchar a, uchar b, uchar c);
+char2 __ovld __cnfn select(char2 a, char2 b, uchar2 c);
+uchar2 __ovld __cnfn select(uchar2 a, uchar2 b, uchar2 c);
+char3 __ovld __cnfn select(char3 a, char3 b, uchar3 c);
+uchar3 __ovld __cnfn select(uchar3 a, uchar3 b, uchar3 c);
+char4 __ovld __cnfn select(char4 a, char4 b, uchar4 c);
+uchar4 __ovld __cnfn select(uchar4 a, uchar4 b, uchar4 c);
+char8 __ovld __cnfn select(char8 a, char8 b, uchar8 c);
+uchar8 __ovld __cnfn select(uchar8 a, uchar8 b, uchar8 c);
+char16 __ovld __cnfn select(char16 a, char16 b, uchar16 c);
+uchar16 __ovld __cnfn select(uchar16 a, uchar16 b, uchar16 c);
+short __ovld __cnfn select(short a, short b, uchar c);
+ushort __ovld __cnfn select(ushort a, ushort b, uchar c);
+short2 __ovld __cnfn select(short2 a, short2 b, uchar2 c);
+ushort2 __ovld __cnfn select(ushort2 a, ushort2 b, uchar2 c);
+short3 __ovld __cnfn select(short3 a, short3 b, uchar3 c);
+ushort3 __ovld __cnfn select(ushort3 a, ushort3 b, uchar3 c);
+short4 __ovld __cnfn select(short4 a, short4 b, uchar4 c);
+ushort4 __ovld __cnfn select(ushort4 a, ushort4 b, uchar4 c);
+short8 __ovld __cnfn select(short8 a, short8 b, uchar8 c);
+ushort8 __ovld __cnfn select(ushort8 a, ushort8 b, uchar8 c);
+short16 __ovld __cnfn select(short16 a, short16 b, uchar16 c);
+ushort16 __ovld __cnfn select(ushort16 a, ushort16 b, uchar16 c);
+int __ovld __cnfn select(int a, int b, uchar c);
+uint __ovld __cnfn select(uint a, uint b, uchar c);
+int2 __ovld __cnfn select(int2 a, int2 b, uchar2 c);
+uint2 __ovld __cnfn select(uint2 a, uint2 b, uchar2 c);
+int3 __ovld __cnfn select(int3 a, int3 b, uchar3 c);
+uint3 __ovld __cnfn select(uint3 a, uint3 b, uchar3 c);
+int4 __ovld __cnfn select(int4 a, int4 b, uchar4 c);
+uint4 __ovld __cnfn select(uint4 a, uint4 b, uchar4 c);
+int8 __ovld __cnfn select(int8 a, int8 b, uchar8 c);
+uint8 __ovld __cnfn select(uint8 a, uint8 b, uchar8 c);
+int16 __ovld __cnfn select(int16 a, int16 b, uchar16 c);
+uint16 __ovld __cnfn select(uint16 a, uint16 b, uchar16 c);
+long __ovld __cnfn select(long a, long b, uchar c);
+ulong __ovld __cnfn select(ulong a, ulong b, uchar c);
+long2 __ovld __cnfn select(long2 a, long2 b, uchar2 c);
+ulong2 __ovld __cnfn select(ulong2 a, ulong2 b, uchar2 c);
+long3 __ovld __cnfn select(long3 a, long3 b, uchar3 c);
+ulong3 __ovld __cnfn select(ulong3 a, ulong3 b, uchar3 c);
+long4 __ovld __cnfn select(long4 a, long4 b, uchar4 c);
+ulong4 __ovld __cnfn select(ulong4 a, ulong4 b, uchar4 c);
+long8 __ovld __cnfn select(long8 a, long8 b, uchar8 c);
+ulong8 __ovld __cnfn select(ulong8 a, ulong8 b, uchar8 c);
+long16 __ovld __cnfn select(long16 a, long16 b, uchar16 c);
+ulong16 __ovld __cnfn select(ulong16 a, ulong16 b, uchar16 c);
+float __ovld __cnfn select(float a, float b, uchar c);
+float2 __ovld __cnfn select(float2 a, float2 b, uchar2 c);
+float3 __ovld __cnfn select(float3 a, float3 b, uchar3 c);
+float4 __ovld __cnfn select(float4 a, float4 b, uchar4 c);
+float8 __ovld __cnfn select(float8 a, float8 b, uchar8 c);
+float16 __ovld __cnfn select(float16 a, float16 b, uchar16 c);
+char __ovld __cnfn select(char a, char b, ushort c);
+uchar __ovld __cnfn select(uchar a, uchar b, ushort c);
+char2 __ovld __cnfn select(char2 a, char2 b, ushort2 c);
+uchar2 __ovld __cnfn select(uchar2 a, uchar2 b, ushort2 c);
+char3 __ovld __cnfn select(char3 a, char3 b, ushort3 c);
+uchar3 __ovld __cnfn select(uchar3 a, uchar3 b, ushort3 c);
+char4 __ovld __cnfn select(char4 a, char4 b, ushort4 c);
+uchar4 __ovld __cnfn select(uchar4 a, uchar4 b, ushort4 c);
+char8 __ovld __cnfn select(char8 a, char8 b, ushort8 c);
+uchar8 __ovld __cnfn select(uchar8 a, uchar8 b, ushort8 c);
+char16 __ovld __cnfn select(char16 a, char16 b, ushort16 c);
+uchar16 __ovld __cnfn select(uchar16 a, uchar16 b, ushort16 c);
+short __ovld __cnfn select(short a, short b, ushort c);
+ushort __ovld __cnfn select(ushort a, ushort b, ushort c);
+short2 __ovld __cnfn select(short2 a, short2 b, ushort2 c);
+ushort2 __ovld __cnfn select(ushort2 a, ushort2 b, ushort2 c);
+short3 __ovld __cnfn select(short3 a, short3 b, ushort3 c);
+ushort3 __ovld __cnfn select(ushort3 a, ushort3 b, ushort3 c);
+short4 __ovld __cnfn select(short4 a, short4 b, ushort4 c);
+ushort4 __ovld __cnfn select(ushort4 a, ushort4 b, ushort4 c);
+short8 __ovld __cnfn select(short8 a, short8 b, ushort8 c);
+ushort8 __ovld __cnfn select(ushort8 a, ushort8 b, ushort8 c);
+short16 __ovld __cnfn select(short16 a, short16 b, ushort16 c);
+ushort16 __ovld __cnfn select(ushort16 a, ushort16 b, ushort16 c);
+int __ovld __cnfn select(int a, int b, ushort c);
+uint __ovld __cnfn select(uint a, uint b, ushort c);
+int2 __ovld __cnfn select(int2 a, int2 b, ushort2 c);
+uint2 __ovld __cnfn select(uint2 a, uint2 b, ushort2 c);
+int3 __ovld __cnfn select(int3 a, int3 b, ushort3 c);
+uint3 __ovld __cnfn select(uint3 a, uint3 b, ushort3 c);
+int4 __ovld __cnfn select(int4 a, int4 b, ushort4 c);
+uint4 __ovld __cnfn select(uint4 a, uint4 b, ushort4 c);
+int8 __ovld __cnfn select(int8 a, int8 b, ushort8 c);
+uint8 __ovld __cnfn select(uint8 a, uint8 b, ushort8 c);
+int16 __ovld __cnfn select(int16 a, int16 b, ushort16 c);
+uint16 __ovld __cnfn select(uint16 a, uint16 b, ushort16 c);
+long __ovld __cnfn select(long a, long b, ushort c);
+ulong __ovld __cnfn select(ulong a, ulong b, ushort c);
+long2 __ovld __cnfn select(long2 a, long2 b, ushort2 c);
+ulong2 __ovld __cnfn select(ulong2 a, ulong2 b, ushort2 c);
+long3 __ovld __cnfn select(long3 a, long3 b, ushort3 c);
+ulong3 __ovld __cnfn select(ulong3 a, ulong3 b, ushort3 c);
+long4 __ovld __cnfn select(long4 a, long4 b, ushort4 c);
+ulong4 __ovld __cnfn select(ulong4 a, ulong4 b, ushort4 c);
+long8 __ovld __cnfn select(long8 a, long8 b, ushort8 c);
+ulong8 __ovld __cnfn select(ulong8 a, ulong8 b, ushort8 c);
+long16 __ovld __cnfn select(long16 a, long16 b, ushort16 c);
+ulong16 __ovld __cnfn select(ulong16 a, ulong16 b, ushort16 c);
+float __ovld __cnfn select(float a, float b, ushort c);
+float2 __ovld __cnfn select(float2 a, float2 b, ushort2 c);
+float3 __ovld __cnfn select(float3 a, float3 b, ushort3 c);
+float4 __ovld __cnfn select(float4 a, float4 b, ushort4 c);
+float8 __ovld __cnfn select(float8 a, float8 b, ushort8 c);
+float16 __ovld __cnfn select(float16 a, float16 b, ushort16 c);
+char __ovld __cnfn select(char a, char b, uint c);
+uchar __ovld __cnfn select(uchar a, uchar b, uint c);
+char2 __ovld __cnfn select(char2 a, char2 b, uint2 c);
+uchar2 __ovld __cnfn select(uchar2 a, uchar2 b, uint2 c);
+char3 __ovld __cnfn select(char3 a, char3 b, uint3 c);
+uchar3 __ovld __cnfn select(uchar3 a, uchar3 b, uint3 c);
+char4 __ovld __cnfn select(char4 a, char4 b, uint4 c);
+uchar4 __ovld __cnfn select(uchar4 a, uchar4 b, uint4 c);
+char8 __ovld __cnfn select(char8 a, char8 b, uint8 c);
+uchar8 __ovld __cnfn select(uchar8 a, uchar8 b, uint8 c);
+char16 __ovld __cnfn select(char16 a, char16 b, uint16 c);
+uchar16 __ovld __cnfn select(uchar16 a, uchar16 b, uint16 c);
+short __ovld __cnfn select(short a, short b, uint c);
+ushort __ovld __cnfn select(ushort a, ushort b, uint c);
+short2 __ovld __cnfn select(short2 a, short2 b, uint2 c);
+ushort2 __ovld __cnfn select(ushort2 a, ushort2 b, uint2 c);
+short3 __ovld __cnfn select(short3 a, short3 b, uint3 c);
+ushort3 __ovld __cnfn select(ushort3 a, ushort3 b, uint3 c);
+short4 __ovld __cnfn select(short4 a, short4 b, uint4 c);
+ushort4 __ovld __cnfn select(ushort4 a, ushort4 b, uint4 c);
+short8 __ovld __cnfn select(short8 a, short8 b, uint8 c);
+ushort8 __ovld __cnfn select(ushort8 a, ushort8 b, uint8 c);
+short16 __ovld __cnfn select(short16 a, short16 b, uint16 c);
+ushort16 __ovld __cnfn select(ushort16 a, ushort16 b, uint16 c);
+int __ovld __cnfn select(int a, int b, uint c);
+uint __ovld __cnfn select(uint a, uint b, uint c);
+int2 __ovld __cnfn select(int2 a, int2 b, uint2 c);
+uint2 __ovld __cnfn select(uint2 a, uint2 b, uint2 c);
+int3 __ovld __cnfn select(int3 a, int3 b, uint3 c);
+uint3 __ovld __cnfn select(uint3 a, uint3 b, uint3 c);
+int4 __ovld __cnfn select(int4 a, int4 b, uint4 c);
+uint4 __ovld __cnfn select(uint4 a, uint4 b, uint4 c);
+int8 __ovld __cnfn select(int8 a, int8 b, uint8 c);
+uint8 __ovld __cnfn select(uint8 a, uint8 b, uint8 c);
+int16 __ovld __cnfn select(int16 a, int16 b, uint16 c);
+uint16 __ovld __cnfn select(uint16 a, uint16 b, uint16 c);
+long __ovld __cnfn select(long a, long b, uint c);
+ulong __ovld __cnfn select(ulong a, ulong b, uint c);
+long2 __ovld __cnfn select(long2 a, long2 b, uint2 c);
+ulong2 __ovld __cnfn select(ulong2 a, ulong2 b, uint2 c);
+long3 __ovld __cnfn select(long3 a, long3 b, uint3 c);
+ulong3 __ovld __cnfn select(ulong3 a, ulong3 b, uint3 c);
+long4 __ovld __cnfn select(long4 a, long4 b, uint4 c);
+ulong4 __ovld __cnfn select(ulong4 a, ulong4 b, uint4 c);
+long8 __ovld __cnfn select(long8 a, long8 b, uint8 c);
+ulong8 __ovld __cnfn select(ulong8 a, ulong8 b, uint8 c);
+long16 __ovld __cnfn select(long16 a, long16 b, uint16 c);
+ulong16 __ovld __cnfn select(ulong16 a, ulong16 b, uint16 c);
+float __ovld __cnfn select(float a, float b, uint c);
+float2 __ovld __cnfn select(float2 a, float2 b, uint2 c);
+float3 __ovld __cnfn select(float3 a, float3 b, uint3 c);
+float4 __ovld __cnfn select(float4 a, float4 b, uint4 c);
+float8 __ovld __cnfn select(float8 a, float8 b, uint8 c);
+float16 __ovld __cnfn select(float16 a, float16 b, uint16 c);
+char __ovld __cnfn select(char a, char b, ulong c);
+uchar __ovld __cnfn select(uchar a, uchar b, ulong c);
+char2 __ovld __cnfn select(char2 a, char2 b, ulong2 c);
+uchar2 __ovld __cnfn select(uchar2 a, uchar2 b, ulong2 c);
+char3 __ovld __cnfn select(char3 a, char3 b, ulong3 c);
+uchar3 __ovld __cnfn select(uchar3 a, uchar3 b, ulong3 c);
+char4 __ovld __cnfn select(char4 a, char4 b, ulong4 c);
+uchar4 __ovld __cnfn select(uchar4 a, uchar4 b, ulong4 c);
+char8 __ovld __cnfn select(char8 a, char8 b, ulong8 c);
+uchar8 __ovld __cnfn select(uchar8 a, uchar8 b, ulong8 c);
+char16 __ovld __cnfn select(char16 a, char16 b, ulong16 c);
+uchar16 __ovld __cnfn select(uchar16 a, uchar16 b, ulong16 c);
+short __ovld __cnfn select(short a, short b, ulong c);
+ushort __ovld __cnfn select(ushort a, ushort b, ulong c);
+short2 __ovld __cnfn select(short2 a, short2 b, ulong2 c);
+ushort2 __ovld __cnfn select(ushort2 a, ushort2 b, ulong2 c);
+short3 __ovld __cnfn select(short3 a, short3 b, ulong3 c);
+ushort3 __ovld __cnfn select(ushort3 a, ushort3 b, ulong3 c);
+short4 __ovld __cnfn select(short4 a, short4 b, ulong4 c);
+ushort4 __ovld __cnfn select(ushort4 a, ushort4 b, ulong4 c);
+short8 __ovld __cnfn select(short8 a, short8 b, ulong8 c);
+ushort8 __ovld __cnfn select(ushort8 a, ushort8 b, ulong8 c);
+short16 __ovld __cnfn select(short16 a, short16 b, ulong16 c);
+ushort16 __ovld __cnfn select(ushort16 a, ushort16 b, ulong16 c);
+int __ovld __cnfn select(int a, int b, ulong c);
+uint __ovld __cnfn select(uint a, uint b, ulong c);
+int2 __ovld __cnfn select(int2 a, int2 b, ulong2 c);
+uint2 __ovld __cnfn select(uint2 a, uint2 b, ulong2 c);
+int3 __ovld __cnfn select(int3 a, int3 b, ulong3 c);
+uint3 __ovld __cnfn select(uint3 a, uint3 b, ulong3 c);
+int4 __ovld __cnfn select(int4 a, int4 b, ulong4 c);
+uint4 __ovld __cnfn select(uint4 a, uint4 b, ulong4 c);
+int8 __ovld __cnfn select(int8 a, int8 b, ulong8 c);
+uint8 __ovld __cnfn select(uint8 a, uint8 b, ulong8 c);
+int16 __ovld __cnfn select(int16 a, int16 b, ulong16 c);
+uint16 __ovld __cnfn select(uint16 a, uint16 b, ulong16 c);
+long __ovld __cnfn select(long a, long b, ulong c);
+ulong __ovld __cnfn select(ulong a, ulong b, ulong c);
+long2 __ovld __cnfn select(long2 a, long2 b, ulong2 c);
+ulong2 __ovld __cnfn select(ulong2 a, ulong2 b, ulong2 c);
+long3 __ovld __cnfn select(long3 a, long3 b, ulong3 c);
+ulong3 __ovld __cnfn select(ulong3 a, ulong3 b, ulong3 c);
+long4 __ovld __cnfn select(long4 a, long4 b, ulong4 c);
+ulong4 __ovld __cnfn select(ulong4 a, ulong4 b, ulong4 c);
+long8 __ovld __cnfn select(long8 a, long8 b, ulong8 c);
+ulong8 __ovld __cnfn select(ulong8 a, ulong8 b, ulong8 c);
+long16 __ovld __cnfn select(long16 a, long16 b, ulong16 c);
+ulong16 __ovld __cnfn select(ulong16 a, ulong16 b, ulong16 c);
+float __ovld __cnfn select(float a, float b, ulong c);
+float2 __ovld __cnfn select(float2 a, float2 b, ulong2 c);
+float3 __ovld __cnfn select(float3 a, float3 b, ulong3 c);
+float4 __ovld __cnfn select(float4 a, float4 b, ulong4 c);
+float8 __ovld __cnfn select(float8 a, float8 b, ulong8 c);
+float16 __ovld __cnfn select(float16 a, float16 b, ulong16 c);
+#ifdef cl_khr_fp64
+double __ovld __cnfn select(double a, double b, long c);
+double2 __ovld __cnfn select(double2 a, double2 b, long2 c);
+double3 __ovld __cnfn select(double3 a, double3 b, long3 c);
+double4 __ovld __cnfn select(double4 a, double4 b, long4 c);
+double8 __ovld __cnfn select(double8 a, double8 b, long8 c);
+double16 __ovld __cnfn select(double16 a, double16 b, long16 c);
+double __ovld __cnfn select(double a, double b, ulong c);
+double2 __ovld __cnfn select(double2 a, double2 b, ulong2 c);
+double3 __ovld __cnfn select(double3 a, double3 b, ulong3 c);
+double4 __ovld __cnfn select(double4 a, double4 b, ulong4 c);
+double8 __ovld __cnfn select(double8 a, double8 b, ulong8 c);
+double16 __ovld __cnfn select(double16 a, double16 b, ulong16 c);
+#endif //cl_khr_fp64
+#ifdef cl_khr_fp16
+half __ovld __cnfn select(half a, half b, short c);
+half2 __ovld __cnfn select(half2 a, half2 b, short2 c);
+half3 __ovld __cnfn select(half3 a, half3 b, short3 c);
+half4 __ovld __cnfn select(half4 a, half4 b, short4 c);
+half8 __ovld __cnfn select(half8 a, half8 b, short8 c);
+half16 __ovld __cnfn select(half16 a, half16 b, short16 c);
+half __ovld __cnfn select(half a, half b, ushort c);
+half2 __ovld __cnfn select(half2 a, half2 b, ushort2 c);
+half3 __ovld __cnfn select(half3 a, half3 b, ushort3 c);
+half4 __ovld __cnfn select(half4 a, half4 b, ushort4 c);
+half8 __ovld __cnfn select(half8 a, half8 b, ushort8 c);
+half16 __ovld __cnfn select(half16 a, half16 b, ushort16 c);
+#endif //cl_khr_fp16
+
+// OpenCL v1.1 s6.11.7, v1.2 s6.12.7, v2.0 s6.13.7 - Vector Data Load and Store Functions
+// OpenCL extensions v1.1 s9.6.6, v1.2 s9.5.6, v2.0 s9.4.6 - Vector Data Load and Store Functions for Half Type
+/**
+ * Use generic type gentype to indicate the built-in data types
+ * char, uchar, short, ushort, int, uint, long, ulong, float,
+ * double or half.
+ *
+ * vloadn return sizeof (gentypen) bytes of data read from address (p + (offset * n)).
+ *
+ * vstoren write sizeof (gentypen) bytes given by data to address (p + (offset * n)).
+ *
+ * The address computed as (p + (offset * n)) must be 
+ * 8-bit aligned if gentype is char, uchar;
+ * 16-bit aligned if gentype is short, ushort, half;
+ * 32-bit aligned if gentype is int, uint, float;
+ * 64-bit aligned if gentype is long, ulong, double.
+ */
+
+char2 __ovld vload2(size_t offset, const __constant char *p);
+uchar2 __ovld vload2(size_t offset, const __constant uchar *p);
+short2 __ovld vload2(size_t offset, const __constant short *p);
+ushort2 __ovld vload2(size_t offset, const __constant ushort *p);
+int2 __ovld vload2(size_t offset, const __constant int *p);
+uint2 __ovld vload2(size_t offset, const __constant uint *p);
+long2 __ovld vload2(size_t offset, const __constant long *p);
+ulong2 __ovld vload2(size_t offset, const __constant ulong *p);
+float2 __ovld vload2(size_t offset, const __constant float *p);
+char3 __ovld vload3(size_t offset, const __constant char *p);
+uchar3 __ovld vload3(size_t offset, const __constant uchar *p);
+short3 __ovld vload3(size_t offset, const __constant short *p);
+ushort3 __ovld vload3(size_t offset, const __constant ushort *p);
+int3 __ovld vload3(size_t offset, const __constant int *p);
+uint3 __ovld vload3(size_t offset, const __constant uint *p);
+long3 __ovld vload3(size_t offset, const __constant long *p);
+ulong3 __ovld vload3(size_t offset, const __constant ulong *p);
+float3 __ovld vload3(size_t offset, const __constant float *p);
+char4 __ovld vload4(size_t offset, const __constant char *p);
+uchar4 __ovld vload4(size_t offset, const __constant uchar *p);
+short4 __ovld vload4(size_t offset, const __constant short *p);
+ushort4 __ovld vload4(size_t offset, const __constant ushort *p);
+int4 __ovld vload4(size_t offset, const __constant int *p);
+uint4 __ovld vload4(size_t offset, const __constant uint *p);
+long4 __ovld vload4(size_t offset, const __constant long *p);
+ulong4 __ovld vload4(size_t offset, const __constant ulong *p);
+float4 __ovld vload4(size_t offset, const __constant float *p);
+char8 __ovld vload8(size_t offset, const __constant char *p);
+uchar8 __ovld vload8(size_t offset, const __constant uchar *p);
+short8 __ovld vload8(size_t offset, const __constant short *p);
+ushort8 __ovld vload8(size_t offset, const __constant ushort *p);
+int8 __ovld vload8(size_t offset, const __constant int *p);
+uint8 __ovld vload8(size_t offset, const __constant uint *p);
+long8 __ovld vload8(size_t offset, const __constant long *p);
+ulong8 __ovld vload8(size_t offset, const __constant ulong *p);
+float8 __ovld vload8(size_t offset, const __constant float *p);
+char16 __ovld vload16(size_t offset, const __constant char *p);
+uchar16 __ovld vload16(size_t offset, const __constant uchar *p);
+short16 __ovld vload16(size_t offset, const __constant short *p);
+ushort16 __ovld vload16(size_t offset, const __constant ushort *p);
+int16 __ovld vload16(size_t offset, const __constant int *p);
+uint16 __ovld vload16(size_t offset, const __constant uint *p);
+long16 __ovld vload16(size_t offset, const __constant long *p);
+ulong16 __ovld vload16(size_t offset, const __constant ulong *p);
+float16 __ovld vload16(size_t offset, const __constant float *p);
+#ifdef cl_khr_fp64
+double2 __ovld vload2(size_t offset, const __constant double *p);
+double3 __ovld vload3(size_t offset, const __constant double *p);
+double4 __ovld vload4(size_t offset, const __constant double *p);
+double8 __ovld vload8(size_t offset, const __constant double *p);
+double16 __ovld vload16(size_t offset, const __constant double *p);
+#endif //cl_khr_fp64
+
+#ifdef cl_khr_fp16
+half __ovld vload(size_t offset, const __constant half *p);
+half2 __ovld vload2(size_t offset, const __constant half *p);
+half3 __ovld vload3(size_t offset, const __constant half *p);
+half4 __ovld vload4(size_t offset, const __constant half *p);
+half8 __ovld vload8(size_t offset, const __constant half *p);
+half16 __ovld vload16(size_t offset, const __constant half *p);
+#endif //cl_khr_fp16
+
+#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0
+char2 __ovld vload2(size_t offset, const char *p);
+uchar2 __ovld vload2(size_t offset, const uchar *p);
+short2 __ovld vload2(size_t offset, const short *p);
+ushort2 __ovld vload2(size_t offset, const ushort *p);
+int2 __ovld vload2(size_t offset, const int *p);
+uint2 __ovld vload2(size_t offset, const uint *p);
+long2 __ovld vload2(size_t offset, const long *p);
+ulong2 __ovld vload2(size_t offset, const ulong *p);
+float2 __ovld vload2(size_t offset, const float *p);
+char3 __ovld vload3(size_t offset, const char *p);
+uchar3 __ovld vload3(size_t offset, const uchar *p);
+short3 __ovld vload3(size_t offset, const short *p);
+ushort3 __ovld vload3(size_t offset, const ushort *p);
+int3 __ovld vload3(size_t offset, const int *p);
+uint3 __ovld vload3(size_t offset, const uint *p);
+long3 __ovld vload3(size_t offset, const long *p);
+ulong3 __ovld vload3(size_t offset, const ulong *p);
+float3 __ovld vload3(size_t offset, const float *p);
+char4 __ovld vload4(size_t offset, const char *p);
+uchar4 __ovld vload4(size_t offset, const uchar *p);
+short4 __ovld vload4(size_t offset, const short *p);
+ushort4 __ovld vload4(size_t offset, const ushort *p);
+int4 __ovld vload4(size_t offset, const int *p);
+uint4 __ovld vload4(size_t offset, const uint *p);
+long4 __ovld vload4(size_t offset, const long *p);
+ulong4 __ovld vload4(size_t offset, const ulong *p);
+float4 __ovld vload4(size_t offset, const float *p);
+char8 __ovld vload8(size_t offset, const char *p);
+uchar8 __ovld vload8(size_t offset, const uchar *p);
+short8 __ovld vload8(size_t offset, const short *p);
+ushort8 __ovld vload8(size_t offset, const ushort *p);
+int8 __ovld vload8(size_t offset, const int *p);
+uint8 __ovld vload8(size_t offset, const uint *p);
+long8 __ovld vload8(size_t offset, const long *p);
+ulong8 __ovld vload8(size_t offset, const ulong *p);
+float8 __ovld vload8(size_t offset, const float *p);
+char16 __ovld vload16(size_t offset, const char *p);
+uchar16 __ovld vload16(size_t offset, const uchar *p);
+short16 __ovld vload16(size_t offset, const short *p);
+ushort16 __ovld vload16(size_t offset, const ushort *p);
+int16 __ovld vload16(size_t offset, const int *p);
+uint16 __ovld vload16(size_t offset, const uint *p);
+long16 __ovld vload16(size_t offset, const long *p);
+ulong16 __ovld vload16(size_t offset, const ulong *p);
+float16 __ovld vload16(size_t offset, const float *p);
+
+#ifdef cl_khr_fp64
+double2 __ovld vload2(size_t offset, const double *p);
+double3 __ovld vload3(size_t offset, const double *p);
+double4 __ovld vload4(size_t offset, const double *p);
+double8 __ovld vload8(size_t offset, const double *p);
+double16 __ovld vload16(size_t offset, const double *p);
+#endif //cl_khr_fp64
+
+#ifdef cl_khr_fp16
+half __ovld vload(size_t offset, const half *p);
+half2 __ovld vload2(size_t offset, const half *p);
+half3 __ovld vload3(size_t offset, const half *p);
+half4 __ovld vload4(size_t offset, const half *p);
+half8 __ovld vload8(size_t offset, const half *p);
+half16 __ovld vload16(size_t offset, const half *p);
+#endif //cl_khr_fp16
+#else
+char2 __ovld vload2(size_t offset, const __global char *p);
+uchar2 __ovld vload2(size_t offset, const __global uchar *p);
+short2 __ovld vload2(size_t offset, const __global short *p);
+ushort2 __ovld vload2(size_t offset, const __global ushort *p);
+int2 __ovld vload2(size_t offset, const __global int *p);
+uint2 __ovld vload2(size_t offset, const __global uint *p);
+long2 __ovld vload2(size_t offset, const __global long *p);
+ulong2 __ovld vload2(size_t offset, const __global ulong *p);
+float2 __ovld vload2(size_t offset, const __global float *p);
+char3 __ovld vload3(size_t offset, const __global char *p);
+uchar3 __ovld vload3(size_t offset, const __global uchar *p);
+short3 __ovld vload3(size_t offset, const __global short *p);
+ushort3 __ovld vload3(size_t offset, const __global ushort *p);
+int3 __ovld vload3(size_t offset, const __global int *p);
+uint3 __ovld vload3(size_t offset, const __global uint *p);
+long3 __ovld vload3(size_t offset, const __global long *p);
+ulong3 __ovld vload3(size_t offset, const __global ulong *p);
+float3 __ovld vload3(size_t offset, const __global float *p);
+char4 __ovld vload4(size_t offset, const __global char *p);
+uchar4 __ovld vload4(size_t offset, const __global uchar *p);
+short4 __ovld vload4(size_t offset, const __global short *p);
+ushort4 __ovld vload4(size_t offset, const __global ushort *p);
+int4 __ovld vload4(size_t offset, const __global int *p);
+uint4 __ovld vload4(size_t offset, const __global uint *p);
+long4 __ovld vload4(size_t offset, const __global long *p);
+ulong4 __ovld vload4(size_t offset, const __global ulong *p);
+float4 __ovld vload4(size_t offset, const __global float *p);
+char8 __ovld vload8(size_t offset, const __global char *p);
+uchar8 __ovld vload8(size_t offset, const __global uchar *p);
+short8 __ovld vload8(size_t offset, const __global short *p);
+ushort8 __ovld vload8(size_t offset, const __global ushort *p);
+int8 __ovld vload8(size_t offset, const __global int *p);
+uint8 __ovld vload8(size_t offset, const __global uint *p);
+long8 __ovld vload8(size_t offset, const __global long *p);
+ulong8 __ovld vload8(size_t offset, const __global ulong *p);
+float8 __ovld vload8(size_t offset, const __global float *p);
+char16 __ovld vload16(size_t offset, const __global char *p);
+uchar16 __ovld vload16(size_t offset, const __global uchar *p);
+short16 __ovld vload16(size_t offset, const __global short *p);
+ushort16 __ovld vload16(size_t offset, const __global ushort *p);
+int16 __ovld vload16(size_t offset, const __global int *p);
+uint16 __ovld vload16(size_t offset, const __global uint *p);
+long16 __ovld vload16(size_t offset, const __global long *p);
+ulong16 __ovld vload16(size_t offset, const __global ulong *p);
+float16 __ovld vload16(size_t offset, const __global float *p);
+char2 __ovld vload2(size_t offset, const __local char *p);
+uchar2 __ovld vload2(size_t offset, const __local uchar *p);
+short2 __ovld vload2(size_t offset, const __local short *p);
+ushort2 __ovld vload2(size_t offset, const __local ushort *p);
+int2 __ovld vload2(size_t offset, const __local int *p);
+uint2 __ovld vload2(size_t offset, const __local uint *p);
+long2 __ovld vload2(size_t offset, const __local long *p);
+ulong2 __ovld vload2(size_t offset, const __local ulong *p);
+float2 __ovld vload2(size_t offset, const __local float *p);
+char3 __ovld vload3(size_t offset, const __local char *p);
+uchar3 __ovld vload3(size_t offset, const __local uchar *p);
+short3 __ovld vload3(size_t offset, const __local short *p);
+ushort3 __ovld vload3(size_t offset, const __local ushort *p);
+int3 __ovld vload3(size_t offset, const __local int *p);
+uint3 __ovld vload3(size_t offset, const __local uint *p);
+long3 __ovld vload3(size_t offset, const __local long *p);
+ulong3 __ovld vload3(size_t offset, const __local ulong *p);
+float3 __ovld vload3(size_t offset, const __local float *p);
+char4 __ovld vload4(size_t offset, const __local char *p);
+uchar4 __ovld vload4(size_t offset, const __local uchar *p);
+short4 __ovld vload4(size_t offset, const __local short *p);
+ushort4 __ovld vload4(size_t offset, const __local ushort *p);
+int4 __ovld vload4(size_t offset, const __local int *p);
+uint4 __ovld vload4(size_t offset, const __local uint *p);
+long4 __ovld vload4(size_t offset, const __local long *p);
+ulong4 __ovld vload4(size_t offset, const __local ulong *p);
+float4 __ovld vload4(size_t offset, const __local float *p);
+char8 __ovld vload8(size_t offset, const __local char *p);
+uchar8 __ovld vload8(size_t offset, const __local uchar *p);
+short8 __ovld vload8(size_t offset, const __local short *p);
+ushort8 __ovld vload8(size_t offset, const __local ushort *p);
+int8 __ovld vload8(size_t offset, const __local int *p);
+uint8 __ovld vload8(size_t offset, const __local uint *p);
+long8 __ovld vload8(size_t offset, const __local long *p);
+ulong8 __ovld vload8(size_t offset, const __local ulong *p);
+float8 __ovld vload8(size_t offset, const __local float *p);
+char16 __ovld vload16(size_t offset, const __local char *p);
+uchar16 __ovld vload16(size_t offset, const __local uchar *p);
+short16 __ovld vload16(size_t offset, const __local short *p);
+ushort16 __ovld vload16(size_t offset, const __local ushort *p);
+int16 __ovld vload16(size_t offset, const __local int *p);
+uint16 __ovld vload16(size_t offset, const __local uint *p);
+long16 __ovld vload16(size_t offset, const __local long *p);
+ulong16 __ovld vload16(size_t offset, const __local ulong *p);
+float16 __ovld vload16(size_t offset, const __local float *p);
+char2 __ovld vload2(size_t offset, const __private char *p);
+uchar2 __ovld vload2(size_t offset, const __private uchar *p);
+short2 __ovld vload2(size_t offset, const __private short *p);
+ushort2 __ovld vload2(size_t offset, const __private ushort *p);
+int2 __ovld vload2(size_t offset, const __private int *p);
+uint2 __ovld vload2(size_t offset, const __private uint *p);
+long2 __ovld vload2(size_t offset, const __private long *p);
+ulong2 __ovld vload2(size_t offset, const __private ulong *p);
+float2 __ovld vload2(size_t offset, const __private float *p);
+char3 __ovld vload3(size_t offset, const __private char *p);
+uchar3 __ovld vload3(size_t offset, const __private uchar *p);
+short3 __ovld vload3(size_t offset, const __private short *p);
+ushort3 __ovld vload3(size_t offset, const __private ushort *p);
+int3 __ovld vload3(size_t offset, const __private int *p);
+uint3 __ovld vload3(size_t offset, const __private uint *p);
+long3 __ovld vload3(size_t offset, const __private long *p);
+ulong3 __ovld vload3(size_t offset, const __private ulong *p);
+float3 __ovld vload3(size_t offset, const __private float *p);
+char4 __ovld vload4(size_t offset, const __private char *p);
+uchar4 __ovld vload4(size_t offset, const __private uchar *p);
+short4 __ovld vload4(size_t offset, const __private short *p);
+ushort4 __ovld vload4(size_t offset, const __private ushort *p);
+int4 __ovld vload4(size_t offset, const __private int *p);
+uint4 __ovld vload4(size_t offset, const __private uint *p);
+long4 __ovld vload4(size_t offset, const __private long *p);
+ulong4 __ovld vload4(size_t offset, const __private ulong *p);
+float4 __ovld vload4(size_t offset, const __private float *p);
+char8 __ovld vload8(size_t offset, const __private char *p);
+uchar8 __ovld vload8(size_t offset, const __private uchar *p);
+short8 __ovld vload8(size_t offset, const __private short *p);
+ushort8 __ovld vload8(size_t offset, const __private ushort *p);
+int8 __ovld vload8(size_t offset, const __private int *p);
+uint8 __ovld vload8(size_t offset, const __private uint *p);
+long8 __ovld vload8(size_t offset, const __private long *p);
+ulong8 __ovld vload8(size_t offset, const __private ulong *p);
+float8 __ovld vload8(size_t offset, const __private float *p);
+char16 __ovld vload16(size_t offset, const __private char *p);
+uchar16 __ovld vload16(size_t offset, const __private uchar *p);
+short16 __ovld vload16(size_t offset, const __private short *p);
+ushort16 __ovld vload16(size_t offset, const __private ushort *p);
+int16 __ovld vload16(size_t offset, const __private int *p);
+uint16 __ovld vload16(size_t offset, const __private uint *p);
+long16 __ovld vload16(size_t offset, const __private long *p);
+ulong16 __ovld vload16(size_t offset, const __private ulong *p);
+float16 __ovld vload16(size_t offset, const __private float *p);
+
+#ifdef cl_khr_fp64
+double2 __ovld vload2(size_t offset, const __global double *p);
+double3 __ovld vload3(size_t offset, const __global double *p);
+double4 __ovld vload4(size_t offset, const __global double *p);
+double8 __ovld vload8(size_t offset, const __global double *p);
+double16 __ovld vload16(size_t offset, const __global double *p);
+double2 __ovld vload2(size_t offset, const __local double *p);
+double3 __ovld vload3(size_t offset, const __local double *p);
+double4 __ovld vload4(size_t offset, const __local double *p);
+double8 __ovld vload8(size_t offset, const __local double *p);
+double16 __ovld vload16(size_t offset, const __local double *p);
+double2 __ovld vload2(size_t offset, const __private double *p);
+double3 __ovld vload3(size_t offset, const __private double *p);
+double4 __ovld vload4(size_t offset, const __private double *p);
+double8 __ovld vload8(size_t offset, const __private double *p);
+double16 __ovld vload16(size_t offset, const __private double *p);
+#endif //cl_khr_fp64
+
+#ifdef cl_khr_fp16
+half __ovld vload(size_t offset, const __global half *p);
+half2 __ovld vload2(size_t offset, const __global half *p);
+half3 __ovld vload3(size_t offset, const __global half *p);
+half4 __ovld vload4(size_t offset, const __global half *p);
+half8 __ovld vload8(size_t offset, const __global half *p);
+half16 __ovld vload16(size_t offset, const __global half *p);
+half __ovld vload(size_t offset, const __local half *p);
+half2 __ovld vload2(size_t offset, const __local half *p);
+half3 __ovld vload3(size_t offset, const __local half *p);
+half4 __ovld vload4(size_t offset, const __local half *p);
+half8 __ovld vload8(size_t offset, const __local half *p);
+half16 __ovld vload16(size_t offset, const __local half *p);
+half __ovld vload(size_t offset, const __private half *p);
+half2 __ovld vload2(size_t offset, const __private half *p);
+half3 __ovld vload3(size_t offset, const __private half *p);
+half4 __ovld vload4(size_t offset, const __private half *p);
+half8 __ovld vload8(size_t offset, const __private half *p);
+half16 __ovld vload16(size_t offset, const __private half *p);
+#endif //cl_khr_fp16
+#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0
+
+#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0
+void __ovld vstore2(char2 data, size_t offset, char *p);
+void __ovld vstore2(uchar2 data, size_t offset, uchar *p);
+void __ovld vstore2(short2 data, size_t offset, short *p);
+void __ovld vstore2(ushort2 data, size_t offset, ushort *p);
+void __ovld vstore2(int2 data, size_t offset, int *p);
+void __ovld vstore2(uint2 data, size_t offset, uint *p);
+void __ovld vstore2(long2 data, size_t offset, long *p);
+void __ovld vstore2(ulong2 data, size_t offset, ulong *p);
+void __ovld vstore2(float2 data, size_t offset, float *p);
+void __ovld vstore3(char3 data, size_t offset, char *p);
+void __ovld vstore3(uchar3 data, size_t offset, uchar *p);
+void __ovld vstore3(short3 data, size_t offset, short *p);
+void __ovld vstore3(ushort3 data, size_t offset, ushort *p);
+void __ovld vstore3(int3 data, size_t offset, int *p);
+void __ovld vstore3(uint3 data, size_t offset, uint *p);
+void __ovld vstore3(long3 data, size_t offset, long *p);
+void __ovld vstore3(ulong3 data, size_t offset, ulong *p);
+void __ovld vstore3(float3 data, size_t offset, float *p);
+void __ovld vstore4(char4 data, size_t offset, char *p);
+void __ovld vstore4(uchar4 data, size_t offset, uchar *p);
+void __ovld vstore4(short4 data, size_t offset, short *p);
+void __ovld vstore4(ushort4 data, size_t offset, ushort *p);
+void __ovld vstore4(int4 data, size_t offset, int *p);
+void __ovld vstore4(uint4 data, size_t offset, uint *p);
+void __ovld vstore4(long4 data, size_t offset, long *p);
+void __ovld vstore4(ulong4 data, size_t offset, ulong *p);
+void __ovld vstore4(float4 data, size_t offset, float *p);
+void __ovld vstore8(char8 data, size_t offset, char *p);
+void __ovld vstore8(uchar8 data, size_t offset, uchar *p);
+void __ovld vstore8(short8 data, size_t offset, short *p);
+void __ovld vstore8(ushort8 data, size_t offset, ushort *p);
+void __ovld vstore8(int8 data, size_t offset, int *p);
+void __ovld vstore8(uint8 data, size_t offset, uint *p);
+void __ovld vstore8(long8 data, size_t offset, long *p);
+void __ovld vstore8(ulong8 data, size_t offset, ulong *p);
+void __ovld vstore8(float8 data, size_t offset, float *p);
+void __ovld vstore16(char16 data, size_t offset, char *p);
+void __ovld vstore16(uchar16 data, size_t offset, uchar *p);
+void __ovld vstore16(short16 data, size_t offset, short *p);
+void __ovld vstore16(ushort16 data, size_t offset, ushort *p);
+void __ovld vstore16(int16 data, size_t offset, int *p);
+void __ovld vstore16(uint16 data, size_t offset, uint *p);
+void __ovld vstore16(long16 data, size_t offset, long *p);
+void __ovld vstore16(ulong16 data, size_t offset, ulong *p);
+void __ovld vstore16(float16 data, size_t offset, float *p);
+#ifdef cl_khr_fp64
+void __ovld vstore2(double2 data, size_t offset, double *p);
+void __ovld vstore3(double3 data, size_t offset, double *p);
+void __ovld vstore4(double4 data, size_t offset, double *p);
+void __ovld vstore8(double8 data, size_t offset, double *p);
+void __ovld vstore16(double16 data, size_t offset, double *p);
+#endif //cl_khr_fp64
+#ifdef cl_khr_fp16
+void __ovld vstore(half data, size_t offset, half *p);
+void __ovld vstore2(half2 data, size_t offset, half *p);
+void __ovld vstore3(half3 data, size_t offset, half *p);
+void __ovld vstore4(half4 data, size_t offset, half *p);
+void __ovld vstore8(half8 data, size_t offset, half *p);
+void __ovld vstore16(half16 data, size_t offset, half *p);
+#endif //cl_khr_fp16
+#else
+void __ovld vstore2(char2 data, size_t offset, __global char *p);
+void __ovld vstore2(uchar2 data, size_t offset, __global uchar *p);
+void __ovld vstore2(short2 data, size_t offset, __global short *p);
+void __ovld vstore2(ushort2 data, size_t offset, __global ushort *p);
+void __ovld vstore2(int2 data, size_t offset, __global int *p);
+void __ovld vstore2(uint2 data, size_t offset, __global uint *p);
+void __ovld vstore2(long2 data, size_t offset, __global long *p);
+void __ovld vstore2(ulong2 data, size_t offset, __global ulong *p);
+void __ovld vstore2(float2 data, size_t offset, __global float *p);
+void __ovld vstore3(char3 data, size_t offset, __global char *p);
+void __ovld vstore3(uchar3 data, size_t offset, __global uchar *p);
+void __ovld vstore3(short3 data, size_t offset, __global short *p);
+void __ovld vstore3(ushort3 data, size_t offset, __global ushort *p);
+void __ovld vstore3(int3 data, size_t offset, __global int *p);
+void __ovld vstore3(uint3 data, size_t offset, __global uint *p);
+void __ovld vstore3(long3 data, size_t offset, __global long *p);
+void __ovld vstore3(ulong3 data, size_t offset, __global ulong *p);
+void __ovld vstore3(float3 data, size_t offset, __global float *p);
+void __ovld vstore4(char4 data, size_t offset, __global char *p);
+void __ovld vstore4(uchar4 data, size_t offset, __global uchar *p);
+void __ovld vstore4(short4 data, size_t offset, __global short *p);
+void __ovld vstore4(ushort4 data, size_t offset, __global ushort *p);
+void __ovld vstore4(int4 data, size_t offset, __global int *p);
+void __ovld vstore4(uint4 data, size_t offset, __global uint *p);
+void __ovld vstore4(long4 data, size_t offset, __global long *p);
+void __ovld vstore4(ulong4 data, size_t offset, __global ulong *p);
+void __ovld vstore4(float4 data, size_t offset, __global float *p);
+void __ovld vstore8(char8 data, size_t offset, __global char *p);
+void __ovld vstore8(uchar8 data, size_t offset, __global uchar *p);
+void __ovld vstore8(short8 data, size_t offset, __global short *p);
+void __ovld vstore8(ushort8 data, size_t offset, __global ushort *p);
+void __ovld vstore8(int8 data, size_t offset, __global int *p);
+void __ovld vstore8(uint8 data, size_t offset, __global uint *p);
+void __ovld vstore8(long8 data, size_t offset, __global long *p);
+void __ovld vstore8(ulong8 data, size_t offset, __global ulong *p);
+void __ovld vstore8(float8 data, size_t offset, __global float *p);
+void __ovld vstore16(char16 data, size_t offset, __global char *p);
+void __ovld vstore16(uchar16 data, size_t offset, __global uchar *p);
+void __ovld vstore16(short16 data, size_t offset, __global short *p);
+void __ovld vstore16(ushort16 data, size_t offset, __global ushort *p);
+void __ovld vstore16(int16 data, size_t offset, __global int *p);
+void __ovld vstore16(uint16 data, size_t offset, __global uint *p);
+void __ovld vstore16(long16 data, size_t offset, __global long *p);
+void __ovld vstore16(ulong16 data, size_t offset, __global ulong *p);
+void __ovld vstore16(float16 data, size_t offset, __global float *p);
+void __ovld vstore2(char2 data, size_t offset, __local char *p);
+void __ovld vstore2(uchar2 data, size_t offset, __local uchar *p);
+void __ovld vstore2(short2 data, size_t offset, __local short *p);
+void __ovld vstore2(ushort2 data, size_t offset, __local ushort *p);
+void __ovld vstore2(int2 data, size_t offset, __local int *p);
+void __ovld vstore2(uint2 data, size_t offset, __local uint *p);
+void __ovld vstore2(long2 data, size_t offset, __local long *p);
+void __ovld vstore2(ulong2 data, size_t offset, __local ulong *p);
+void __ovld vstore2(float2 data, size_t offset, __local float *p);
+void __ovld vstore3(char3 data, size_t offset, __local char *p);
+void __ovld vstore3(uchar3 data, size_t offset, __local uchar *p);
+void __ovld vstore3(short3 data, size_t offset, __local short *p);
+void __ovld vstore3(ushort3 data, size_t offset, __local ushort *p);
+void __ovld vstore3(int3 data, size_t offset, __local int *p);
+void __ovld vstore3(uint3 data, size_t offset, __local uint *p);
+void __ovld vstore3(long3 data, size_t offset, __local long *p);
+void __ovld vstore3(ulong3 data, size_t offset, __local ulong *p);
+void __ovld vstore3(float3 data, size_t offset, __local float *p);
+void __ovld vstore4(char4 data, size_t offset, __local char *p);
+void __ovld vstore4(uchar4 data, size_t offset, __local uchar *p);
+void __ovld vstore4(short4 data, size_t offset, __local short *p);
+void __ovld vstore4(ushort4 data, size_t offset, __local ushort *p);
+void __ovld vstore4(int4 data, size_t offset, __local int *p);
+void __ovld vstore4(uint4 data, size_t offset, __local uint *p);
+void __ovld vstore4(long4 data, size_t offset, __local long *p);
+void __ovld vstore4(ulong4 data, size_t offset, __local ulong *p);
+void __ovld vstore4(float4 data, size_t offset, __local float *p);
+void __ovld vstore8(char8 data, size_t offset, __local char *p);
+void __ovld vstore8(uchar8 data, size_t offset, __local uchar *p);
+void __ovld vstore8(short8 data, size_t offset, __local short *p);
+void __ovld vstore8(ushort8 data, size_t offset, __local ushort *p);
+void __ovld vstore8(int8 data, size_t offset, __local int *p);
+void __ovld vstore8(uint8 data, size_t offset, __local uint *p);
+void __ovld vstore8(long8 data, size_t offset, __local long *p);
+void __ovld vstore8(ulong8 data, size_t offset, __local ulong *p);
+void __ovld vstore8(float8 data, size_t offset, __local float *p);
+void __ovld vstore16(char16 data, size_t offset, __local char *p);
+void __ovld vstore16(uchar16 data, size_t offset, __local uchar *p);
+void __ovld vstore16(short16 data, size_t offset, __local short *p);
+void __ovld vstore16(ushort16 data, size_t offset, __local ushort *p);
+void __ovld vstore16(int16 data, size_t offset, __local int *p);
+void __ovld vstore16(uint16 data, size_t offset, __local uint *p);
+void __ovld vstore16(long16 data, size_t offset, __local long *p);
+void __ovld vstore16(ulong16 data, size_t offset, __local ulong *p);
+void __ovld vstore16(float16 data, size_t offset, __local float *p);
+void __ovld vstore2(char2 data, size_t offset, __private char *p);
+void __ovld vstore2(uchar2 data, size_t offset, __private uchar *p);
+void __ovld vstore2(short2 data, size_t offset, __private short *p);
+void __ovld vstore2(ushort2 data, size_t offset, __private ushort *p);
+void __ovld vstore2(int2 data, size_t offset, __private int *p);
+void __ovld vstore2(uint2 data, size_t offset, __private uint *p);
+void __ovld vstore2(long2 data, size_t offset, __private long *p);
+void __ovld vstore2(ulong2 data, size_t offset, __private ulong *p);
+void __ovld vstore2(float2 data, size_t offset, __private float *p);
+void __ovld vstore3(char3 data, size_t offset, __private char *p);
+void __ovld vstore3(uchar3 data, size_t offset, __private uchar *p);
+void __ovld vstore3(short3 data, size_t offset, __private short *p);
+void __ovld vstore3(ushort3 data, size_t offset, __private ushort *p);
+void __ovld vstore3(int3 data, size_t offset, __private int *p);
+void __ovld vstore3(uint3 data, size_t offset, __private uint *p);
+void __ovld vstore3(long3 data, size_t offset, __private long *p);
+void __ovld vstore3(ulong3 data, size_t offset, __private ulong *p);
+void __ovld vstore3(float3 data, size_t offset, __private float *p);
+void __ovld vstore4(char4 data, size_t offset, __private char *p);
+void __ovld vstore4(uchar4 data, size_t offset, __private uchar *p);
+void __ovld vstore4(short4 data, size_t offset, __private short *p);
+void __ovld vstore4(ushort4 data, size_t offset, __private ushort *p);
+void __ovld vstore4(int4 data, size_t offset, __private int *p);
+void __ovld vstore4(uint4 data, size_t offset, __private uint *p);
+void __ovld vstore4(long4 data, size_t offset, __private long *p);
+void __ovld vstore4(ulong4 data, size_t offset, __private ulong *p);
+void __ovld vstore4(float4 data, size_t offset, __private float *p);
+void __ovld vstore8(char8 data, size_t offset, __private char *p);
+void __ovld vstore8(uchar8 data, size_t offset, __private uchar *p);
+void __ovld vstore8(short8 data, size_t offset, __private short *p);
+void __ovld vstore8(ushort8 data, size_t offset, __private ushort *p);
+void __ovld vstore8(int8 data, size_t offset, __private int *p);
+void __ovld vstore8(uint8 data, size_t offset, __private uint *p);
+void __ovld vstore8(long8 data, size_t offset, __private long *p);
+void __ovld vstore8(ulong8 data, size_t offset, __private ulong *p);
+void __ovld vstore8(float8 data, size_t offset, __private float *p);
+void __ovld vstore16(char16 data, size_t offset, __private char *p);
+void __ovld vstore16(uchar16 data, size_t offset, __private uchar *p);
+void __ovld vstore16(short16 data, size_t offset, __private short *p);
+void __ovld vstore16(ushort16 data, size_t offset, __private ushort *p);
+void __ovld vstore16(int16 data, size_t offset, __private int *p);
+void __ovld vstore16(uint16 data, size_t offset, __private uint *p);
+void __ovld vstore16(long16 data, size_t offset, __private long *p);
+void __ovld vstore16(ulong16 data, size_t offset, __private ulong *p);
+void __ovld vstore16(float16 data, size_t offset, __private float *p);
+#ifdef cl_khr_fp64
+void __ovld vstore2(double2 data, size_t offset, __global double *p);
+void __ovld vstore3(double3 data, size_t offset, __global double *p);
+void __ovld vstore4(double4 data, size_t offset, __global double *p);
+void __ovld vstore8(double8 data, size_t offset, __global double *p);
+void __ovld vstore16(double16 data, size_t offset, __global double *p);
+void __ovld vstore2(double2 data, size_t offset, __local double *p);
+void __ovld vstore3(double3 data, size_t offset, __local double *p);
+void __ovld vstore4(double4 data, size_t offset, __local double *p);
+void __ovld vstore8(double8 data, size_t offset, __local double *p);
+void __ovld vstore16(double16 data, size_t offset, __local double *p);
+void __ovld vstore2(double2 data, size_t offset, __private double *p);
+void __ovld vstore3(double3 data, size_t offset, __private double *p);
+void __ovld vstore4(double4 data, size_t offset, __private double *p);
+void __ovld vstore8(double8 data, size_t offset, __private double *p);
+void __ovld vstore16(double16 data, size_t offset, __private double *p);
+#endif //cl_khr_fp64
+#ifdef cl_khr_fp16
+void __ovld vstore(half data, size_t offset, __global half *p);
+void __ovld vstore2(half2 data, size_t offset, __global half *p);
+void __ovld vstore3(half3 data, size_t offset, __global half *p);
+void __ovld vstore4(half4 data, size_t offset, __global half *p);
+void __ovld vstore8(half8 data, size_t offset, __global half *p);
+void __ovld vstore16(half16 data, size_t offset, __global half *p);
+void __ovld vstore(half data, size_t offset, __local half *p);
+void __ovld vstore2(half2 data, size_t offset, __local half *p);
+void __ovld vstore3(half3 data, size_t offset, __local half *p);
+void __ovld vstore4(half4 data, size_t offset, __local half *p);
+void __ovld vstore8(half8 data, size_t offset, __local half *p);
+void __ovld vstore16(half16 data, size_t offset, __local half *p);
+void __ovld vstore(half data, size_t offset, __private half *p);
+void __ovld vstore2(half2 data, size_t offset, __private half *p);
+void __ovld vstore3(half3 data, size_t offset, __private half *p);
+void __ovld vstore4(half4 data, size_t offset, __private half *p);
+void __ovld vstore8(half8 data, size_t offset, __private half *p);
+void __ovld vstore16(half16 data, size_t offset, __private half *p);
+#endif //cl_khr_fp16
+#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0
+
+/**
+ * Read sizeof (half) bytes of data from address
+ * (p + offset). The data read is interpreted as a
+ * half value. The half value is converted to a
+ * float value and the float value is returned.
+ * The read address computed as (p + offset)
+ * must be 16-bit aligned.
+ */
+float __ovld vload_half(size_t offset, const __constant half *p);
+#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0
+float __ovld vload_half(size_t offset, const half *p);
+#else
+float __ovld vload_half(size_t offset, const __global half *p);
+float __ovld vload_half(size_t offset, const __local half *p);
+float __ovld vload_half(size_t offset, const __private half *p);
+#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0
+
+/**
+ * Read sizeof (halfn) bytes of data from address
+ * (p + (offset * n)). The data read is interpreted
+ * as a halfn value. The halfn value read is
+ * converted to a floatn value and the floatn
+ * value is returned. The read address computed
+ * as (p + (offset * n)) must be 16-bit aligned.
+ */
+float2 __ovld vload_half2(size_t offset, const __constant half *p);
+float3 __ovld vload_half3(size_t offset, const __constant half *p);
+float4 __ovld vload_half4(size_t offset, const __constant half *p);
+float8 __ovld vload_half8(size_t offset, const __constant half *p);
+float16 __ovld vload_half16(size_t offset, const __constant half *p);
+#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0
+float2 __ovld vload_half2(size_t offset, const half *p);
+float3 __ovld vload_half3(size_t offset, const half *p);
+float4 __ovld vload_half4(size_t offset, const half *p);
+float8 __ovld vload_half8(size_t offset, const half *p);
+float16 __ovld vload_half16(size_t offset, const half *p);
+#else
+float2 __ovld vload_half2(size_t offset, const __global half *p);
+float3 __ovld vload_half3(size_t offset, const __global half *p);
+float4 __ovld vload_half4(size_t offset, const __global half *p);
+float8 __ovld vload_half8(size_t offset, const __global half *p);
+float16 __ovld vload_half16(size_t offset, const __global half *p);
+float2 __ovld vload_half2(size_t offset, const __local half *p);
+float3 __ovld vload_half3(size_t offset, const __local half *p);
+float4 __ovld vload_half4(size_t offset, const __local half *p);
+float8 __ovld vload_half8(size_t offset, const __local half *p);
+float16 __ovld vload_half16(size_t offset, const __local half *p);
+float2 __ovld vload_half2(size_t offset, const __private half *p);
+float3 __ovld vload_half3(size_t offset, const __private half *p);
+float4 __ovld vload_half4(size_t offset, const __private half *p);
+float8 __ovld vload_half8(size_t offset, const __private half *p);
+float16 __ovld vload_half16(size_t offset, const __private half *p);
+#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0
+
+/**
+ * The float value given by data is first
+ * converted to a half value using the appropriate
+ * rounding mode. The half value is then written
+ * to address computed as (p + offset). The
+ * address computed as (p + offset) must be 16-
+ * bit aligned.
+ * vstore_half use the current rounding mode.
+ * The default current rounding mode is round to
+ * nearest even.
+ */
+#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0
+void __ovld vstore_half(float data, size_t offset, half *p);
+void __ovld vstore_half_rte(float data, size_t offset, half *p);
+void __ovld vstore_half_rtz(float data, size_t offset, half *p);
+void __ovld vstore_half_rtp(float data, size_t offset, half *p);
+void __ovld vstore_half_rtn(float data, size_t offset, half *p);
+#ifdef cl_khr_fp64
+void __ovld vstore_half(double data, size_t offset, half *p);
+void __ovld vstore_half_rte(double data, size_t offset, half *p);
+void __ovld vstore_half_rtz(double data, size_t offset, half *p);
+void __ovld vstore_half_rtp(double data, size_t offset, half *p);
+void __ovld vstore_half_rtn(double data, size_t offset, half *p);
+#endif //cl_khr_fp64
+#else
+void __ovld vstore_half(float data, size_t offset, __global half *p);
+void __ovld vstore_half_rte(float data, size_t offset, __global half *p);
+void __ovld vstore_half_rtz(float data, size_t offset, __global half *p);
+void __ovld vstore_half_rtp(float data, size_t offset, __global half *p);
+void __ovld vstore_half_rtn(float data, size_t offset, __global half *p);
+void __ovld vstore_half(float data, size_t offset, __local half *p);
+void __ovld vstore_half_rte(float data, size_t offset, __local half *p);
+void __ovld vstore_half_rtz(float data, size_t offset, __local half *p);
+void __ovld vstore_half_rtp(float data, size_t offset, __local half *p);
+void __ovld vstore_half_rtn(float data, size_t offset, __local half *p);
+void __ovld vstore_half(float data, size_t offset, __private half *p);
+void __ovld vstore_half_rte(float data, size_t offset, __private half *p);
+void __ovld vstore_half_rtz(float data, size_t offset, __private half *p);
+void __ovld vstore_half_rtp(float data, size_t offset, __private half *p);
+void __ovld vstore_half_rtn(float data, size_t offset, __private half *p);
+#ifdef cl_khr_fp64
+void __ovld vstore_half(double data, size_t offset, __global half *p);
+void __ovld vstore_half_rte(double data, size_t offset, __global half *p);
+void __ovld vstore_half_rtz(double data, size_t offset, __global half *p);
+void __ovld vstore_half_rtp(double data, size_t offset, __global half *p);
+void __ovld vstore_half_rtn(double data, size_t offset, __global half *p);
+void __ovld vstore_half(double data, size_t offset, __local half *p);
+void __ovld vstore_half_rte(double data, size_t offset, __local half *p);
+void __ovld vstore_half_rtz(double data, size_t offset, __local half *p);
+void __ovld vstore_half_rtp(double data, size_t offset, __local half *p);
+void __ovld vstore_half_rtn(double data, size_t offset, __local half *p);
+void __ovld vstore_half(double data, size_t offset, __private half *p);
+void __ovld vstore_half_rte(double data, size_t offset, __private half *p);
+void __ovld vstore_half_rtz(double data, size_t offset, __private half *p);
+void __ovld vstore_half_rtp(double data, size_t offset, __private half *p);
+void __ovld vstore_half_rtn(double data, size_t offset, __private half *p);
+#endif //cl_khr_fp64
+#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0
+
+/**
+ * The floatn value given by data is converted to
+ * a halfn value using the appropriate rounding
+ * mode. The halfn value is then written to
+ * address computed as (p + (offset * n)). The
+ * address computed as (p + (offset * n)) must be
+ * 16-bit aligned.
+ * vstore_halfn uses the current rounding mode.
+ * The default current rounding mode is round to
+ * nearest even.
+ */
+#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0
+void __ovld vstore_half2(float2 data, size_t offset, half *p);
+void __ovld vstore_half3(float3 data, size_t offset, half *p);
+void __ovld vstore_half4(float4 data, size_t offset, half *p);
+void __ovld vstore_half8(float8 data, size_t offset, half *p);
+void __ovld vstore_half16(float16 data, size_t offset, half *p);
+void __ovld vstore_half2_rte(float2 data, size_t offset, half *p);
+void __ovld vstore_half3_rte(float3 data, size_t offset, half *p);
+void __ovld vstore_half4_rte(float4 data, size_t offset, half *p);
+void __ovld vstore_half8_rte(float8 data, size_t offset, half *p);
+void __ovld vstore_half16_rte(float16 data, size_t offset, half *p);
+void __ovld vstore_half2_rtz(float2 data, size_t offset, half *p);
+void __ovld vstore_half3_rtz(float3 data, size_t offset, half *p);
+void __ovld vstore_half4_rtz(float4 data, size_t offset, half *p);
+void __ovld vstore_half8_rtz(float8 data, size_t offset, half *p);
+void __ovld vstore_half16_rtz(float16 data, size_t offset, half *p);
+void __ovld vstore_half2_rtp(float2 data, size_t offset, half *p);
+void __ovld vstore_half3_rtp(float3 data, size_t offset, half *p);
+void __ovld vstore_half4_rtp(float4 data, size_t offset, half *p);
+void __ovld vstore_half8_rtp(float8 data, size_t offset, half *p);
+void __ovld vstore_half16_rtp(float16 data, size_t offset, half *p);
+void __ovld vstore_half2_rtn(float2 data, size_t offset, half *p);
+void __ovld vstore_half3_rtn(float3 data, size_t offset, half *p);
+void __ovld vstore_half4_rtn(float4 data, size_t offset, half *p);
+void __ovld vstore_half8_rtn(float8 data, size_t offset, half *p);
+void __ovld vstore_half16_rtn(float16 data, size_t offset, half *p);
+#ifdef cl_khr_fp64
+void __ovld vstore_half2(double2 data, size_t offset, half *p);
+void __ovld vstore_half3(double3 data, size_t offset, half *p);
+void __ovld vstore_half4(double4 data, size_t offset, half *p);
+void __ovld vstore_half8(double8 data, size_t offset, half *p);
+void __ovld vstore_half16(double16 data, size_t offset, half *p);
+void __ovld vstore_half2_rte(double2 data, size_t offset, half *p);
+void __ovld vstore_half3_rte(double3 data, size_t offset, half *p);
+void __ovld vstore_half4_rte(double4 data, size_t offset, half *p);
+void __ovld vstore_half8_rte(double8 data, size_t offset, half *p);
+void __ovld vstore_half16_rte(double16 data, size_t offset, half *p);
+void __ovld vstore_half2_rtz(double2 data, size_t offset, half *p);
+void __ovld vstore_half3_rtz(double3 data, size_t offset, half *p);
+void __ovld vstore_half4_rtz(double4 data, size_t offset, half *p);
+void __ovld vstore_half8_rtz(double8 data, size_t offset, half *p);
+void __ovld vstore_half16_rtz(double16 data, size_t offset, half *p);
+void __ovld vstore_half2_rtp(double2 data, size_t offset, half *p);
+void __ovld vstore_half3_rtp(double3 data, size_t offset, half *p);
+void __ovld vstore_half4_rtp(double4 data, size_t offset, half *p);
+void __ovld vstore_half8_rtp(double8 data, size_t offset, half *p);
+void __ovld vstore_half16_rtp(double16 data, size_t offset, half *p);
+void __ovld vstore_half2_rtn(double2 data, size_t offset, half *p);
+void __ovld vstore_half3_rtn(double3 data, size_t offset, half *p);
+void __ovld vstore_half4_rtn(double4 data, size_t offset, half *p);
+void __ovld vstore_half8_rtn(double8 data, size_t offset, half *p);
+void __ovld vstore_half16_rtn(double16 data, size_t offset, half *p);
+#endif //cl_khr_fp64
+#else
+void __ovld vstore_half2(float2 data, size_t offset, __global half *p);
+void __ovld vstore_half3(float3 data, size_t offset, __global half *p);
+void __ovld vstore_half4(float4 data, size_t offset, __global half *p);
+void __ovld vstore_half8(float8 data, size_t offset, __global half *p);
+void __ovld vstore_half16(float16 data, size_t offset, __global half *p);
+void __ovld vstore_half2_rte(float2 data, size_t offset, __global half *p);
+void __ovld vstore_half3_rte(float3 data, size_t offset, __global half *p);
+void __ovld vstore_half4_rte(float4 data, size_t offset, __global half *p);
+void __ovld vstore_half8_rte(float8 data, size_t offset, __global half *p);
+void __ovld vstore_half16_rte(float16 data, size_t offset, __global half *p);
+void __ovld vstore_half2_rtz(float2 data, size_t offset, __global half *p);
+void __ovld vstore_half3_rtz(float3 data, size_t offset, __global half *p);
+void __ovld vstore_half4_rtz(float4 data, size_t offset, __global half *p);
+void __ovld vstore_half8_rtz(float8 data, size_t offset, __global half *p);
+void __ovld vstore_half16_rtz(float16 data, size_t offset, __global half *p);
+void __ovld vstore_half2_rtp(float2 data, size_t offset, __global half *p);
+void __ovld vstore_half3_rtp(float3 data, size_t offset, __global half *p);
+void __ovld vstore_half4_rtp(float4 data, size_t offset, __global half *p);
+void __ovld vstore_half8_rtp(float8 data, size_t offset, __global half *p);
+void __ovld vstore_half16_rtp(float16 data, size_t offset, __global half *p);
+void __ovld vstore_half2_rtn(float2 data, size_t offset, __global half *p);
+void __ovld vstore_half3_rtn(float3 data, size_t offset, __global half *p);
+void __ovld vstore_half4_rtn(float4 data, size_t offset, __global half *p);
+void __ovld vstore_half8_rtn(float8 data, size_t offset, __global half *p);
+void __ovld vstore_half16_rtn(float16 data, size_t offset, __global half *p);
+void __ovld vstore_half2(float2 data, size_t offset, __local half *p);
+void __ovld vstore_half3(float3 data, size_t offset, __local half *p);
+void __ovld vstore_half4(float4 data, size_t offset, __local half *p);
+void __ovld vstore_half8(float8 data, size_t offset, __local half *p);
+void __ovld vstore_half16(float16 data, size_t offset, __local half *p);
+void __ovld vstore_half2_rte(float2 data, size_t offset, __local half *p);
+void __ovld vstore_half3_rte(float3 data, size_t offset, __local half *p);
+void __ovld vstore_half4_rte(float4 data, size_t offset, __local half *p);
+void __ovld vstore_half8_rte(float8 data, size_t offset, __local half *p);
+void __ovld vstore_half16_rte(float16 data, size_t offset, __local half *p);
+void __ovld vstore_half2_rtz(float2 data, size_t offset, __local half *p);
+void __ovld vstore_half3_rtz(float3 data, size_t offset, __local half *p);
+void __ovld vstore_half4_rtz(float4 data, size_t offset, __local half *p);
+void __ovld vstore_half8_rtz(float8 data, size_t offset, __local half *p);
+void __ovld vstore_half16_rtz(float16 data, size_t offset, __local half *p);
+void __ovld vstore_half2_rtp(float2 data, size_t offset, __local half *p);
+void __ovld vstore_half3_rtp(float3 data, size_t offset, __local half *p);
+void __ovld vstore_half4_rtp(float4 data, size_t offset, __local half *p);
+void __ovld vstore_half8_rtp(float8 data, size_t offset, __local half *p);
+void __ovld vstore_half16_rtp(float16 data, size_t offset, __local half *p);
+void __ovld vstore_half2_rtn(float2 data, size_t offset, __local half *p);
+void __ovld vstore_half3_rtn(float3 data, size_t offset, __local half *p);
+void __ovld vstore_half4_rtn(float4 data, size_t offset, __local half *p);
+void __ovld vstore_half8_rtn(float8 data, size_t offset, __local half *p);
+void __ovld vstore_half16_rtn(float16 data, size_t offset, __local half *p);
+void __ovld vstore_half2(float2 data, size_t offset, __private half *p);
+void __ovld vstore_half3(float3 data, size_t offset, __private half *p);
+void __ovld vstore_half4(float4 data, size_t offset, __private half *p);
+void __ovld vstore_half8(float8 data, size_t offset, __private half *p);
+void __ovld vstore_half16(float16 data, size_t offset, __private half *p);
+void __ovld vstore_half2_rte(float2 data, size_t offset, __private half *p);
+void __ovld vstore_half3_rte(float3 data, size_t offset, __private half *p);
+void __ovld vstore_half4_rte(float4 data, size_t offset, __private half *p);
+void __ovld vstore_half8_rte(float8 data, size_t offset, __private half *p);
+void __ovld vstore_half16_rte(float16 data, size_t offset, __private half *p);
+void __ovld vstore_half2_rtz(float2 data, size_t offset, __private half *p);
+void __ovld vstore_half3_rtz(float3 data, size_t offset, __private half *p);
+void __ovld vstore_half4_rtz(float4 data, size_t offset, __private half *p);
+void __ovld vstore_half8_rtz(float8 data, size_t offset, __private half *p);
+void __ovld vstore_half16_rtz(float16 data, size_t offset, __private half *p);
+void __ovld vstore_half2_rtp(float2 data, size_t offset, __private half *p);
+void __ovld vstore_half3_rtp(float3 data, size_t offset, __private half *p);
+void __ovld vstore_half4_rtp(float4 data, size_t offset, __private half *p);
+void __ovld vstore_half8_rtp(float8 data, size_t offset, __private half *p);
+void __ovld vstore_half16_rtp(float16 data, size_t offset, __private half *p);
+void __ovld vstore_half2_rtn(float2 data, size_t offset, __private half *p);
+void __ovld vstore_half3_rtn(float3 data, size_t offset, __private half *p);
+void __ovld vstore_half4_rtn(float4 data, size_t offset, __private half *p);
+void __ovld vstore_half8_rtn(float8 data, size_t offset, __private half *p);
+void __ovld vstore_half16_rtn(float16 data, size_t offset, __private half *p);
+#ifdef cl_khr_fp64
+void __ovld vstore_half2(double2 data, size_t offset, __global half *p);
+void __ovld vstore_half3(double3 data, size_t offset, __global half *p);
+void __ovld vstore_half4(double4 data, size_t offset, __global half *p);
+void __ovld vstore_half8(double8 data, size_t offset, __global half *p);
+void __ovld vstore_half16(double16 data, size_t offset, __global half *p);
+void __ovld vstore_half2_rte(double2 data, size_t offset, __global half *p);
+void __ovld vstore_half3_rte(double3 data, size_t offset, __global half *p);
+void __ovld vstore_half4_rte(double4 data, size_t offset, __global half *p);
+void __ovld vstore_half8_rte(double8 data, size_t offset, __global half *p);
+void __ovld vstore_half16_rte(double16 data, size_t offset, __global half *p);
+void __ovld vstore_half2_rtz(double2 data, size_t offset, __global half *p);
+void __ovld vstore_half3_rtz(double3 data, size_t offset, __global half *p);
+void __ovld vstore_half4_rtz(double4 data, size_t offset, __global half *p);
+void __ovld vstore_half8_rtz(double8 data, size_t offset, __global half *p);
+void __ovld vstore_half16_rtz(double16 data, size_t offset, __global half *p);
+void __ovld vstore_half2_rtp(double2 data, size_t offset, __global half *p);
+void __ovld vstore_half3_rtp(double3 data, size_t offset, __global half *p);
+void __ovld vstore_half4_rtp(double4 data, size_t offset, __global half *p);
+void __ovld vstore_half8_rtp(double8 data, size_t offset, __global half *p);
+void __ovld vstore_half16_rtp(double16 data, size_t offset, __global half *p);
+void __ovld vstore_half2_rtn(double2 data, size_t offset, __global half *p);
+void __ovld vstore_half3_rtn(double3 data, size_t offset, __global half *p);
+void __ovld vstore_half4_rtn(double4 data, size_t offset, __global half *p);
+void __ovld vstore_half8_rtn(double8 data, size_t offset, __global half *p);
+void __ovld vstore_half16_rtn(double16 data, size_t offset, __global half *p);
+void __ovld vstore_half2(double2 data, size_t offset, __local half *p);
+void __ovld vstore_half3(double3 data, size_t offset, __local half *p);
+void __ovld vstore_half4(double4 data, size_t offset, __local half *p);
+void __ovld vstore_half8(double8 data, size_t offset, __local half *p);
+void __ovld vstore_half16(double16 data, size_t offset, __local half *p);
+void __ovld vstore_half2_rte(double2 data, size_t offset, __local half *p);
+void __ovld vstore_half3_rte(double3 data, size_t offset, __local half *p);
+void __ovld vstore_half4_rte(double4 data, size_t offset, __local half *p);
+void __ovld vstore_half8_rte(double8 data, size_t offset, __local half *p);
+void __ovld vstore_half16_rte(double16 data, size_t offset, __local half *p);
+void __ovld vstore_half2_rtz(double2 data, size_t offset, __local half *p);
+void __ovld vstore_half3_rtz(double3 data, size_t offset, __local half *p);
+void __ovld vstore_half4_rtz(double4 data, size_t offset, __local half *p);
+void __ovld vstore_half8_rtz(double8 data, size_t offset, __local half *p);
+void __ovld vstore_half16_rtz(double16 data, size_t offset, __local half *p);
+void __ovld vstore_half2_rtp(double2 data, size_t offset, __local half *p);
+void __ovld vstore_half3_rtp(double3 data, size_t offset, __local half *p);
+void __ovld vstore_half4_rtp(double4 data, size_t offset, __local half *p);
+void __ovld vstore_half8_rtp(double8 data, size_t offset, __local half *p);
+void __ovld vstore_half16_rtp(double16 data, size_t offset, __local half *p);
+void __ovld vstore_half2_rtn(double2 data, size_t offset, __local half *p);
+void __ovld vstore_half3_rtn(double3 data, size_t offset, __local half *p);
+void __ovld vstore_half4_rtn(double4 data, size_t offset, __local half *p);
+void __ovld vstore_half8_rtn(double8 data, size_t offset, __local half *p);
+void __ovld vstore_half16_rtn(double16 data, size_t offset, __local half *p);
+void __ovld vstore_half2(double2 data, size_t offset, __private half *p);
+void __ovld vstore_half3(double3 data, size_t offset, __private half *p);
+void __ovld vstore_half4(double4 data, size_t offset, __private half *p);
+void __ovld vstore_half8(double8 data, size_t offset, __private half *p);
+void __ovld vstore_half16(double16 data, size_t offset, __private half *p);
+void __ovld vstore_half2_rte(double2 data, size_t offset, __private half *p);
+void __ovld vstore_half3_rte(double3 data, size_t offset, __private half *p);
+void __ovld vstore_half4_rte(double4 data, size_t offset, __private half *p);
+void __ovld vstore_half8_rte(double8 data, size_t offset, __private half *p);
+void __ovld vstore_half16_rte(double16 data, size_t offset, __private half *p);
+void __ovld vstore_half2_rtz(double2 data, size_t offset, __private half *p);
+void __ovld vstore_half3_rtz(double3 data, size_t offset, __private half *p);
+void __ovld vstore_half4_rtz(double4 data, size_t offset, __private half *p);
+void __ovld vstore_half8_rtz(double8 data, size_t offset, __private half *p);
+void __ovld vstore_half16_rtz(double16 data, size_t offset, __private half *p);
+void __ovld vstore_half2_rtp(double2 data, size_t offset, __private half *p);
+void __ovld vstore_half3_rtp(double3 data, size_t offset, __private half *p);
+void __ovld vstore_half4_rtp(double4 data, size_t offset, __private half *p);
+void __ovld vstore_half8_rtp(double8 data, size_t offset, __private half *p);
+void __ovld vstore_half16_rtp(double16 data, size_t offset, __private half *p);
+void __ovld vstore_half2_rtn(double2 data, size_t offset, __private half *p);
+void __ovld vstore_half3_rtn(double3 data, size_t offset, __private half *p);
+void __ovld vstore_half4_rtn(double4 data, size_t offset, __private half *p);
+void __ovld vstore_half8_rtn(double8 data, size_t offset, __private half *p);
+void __ovld vstore_half16_rtn(double16 data, size_t offset, __private half *p);
+#endif //cl_khr_fp64
+#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0
+
+/**
+ * For n = 1, 2, 4, 8 and 16 read sizeof (halfn)
+ * bytes of data from address (p + (offset * n)).
+ * The data read is interpreted as a halfn value.
+ * The halfn value read is converted to a floatn
+ * value and the floatn value is returned.
+ * The address computed as (p + (offset * n))
+ * must be aligned to sizeof (halfn) bytes.
+ * For n = 3, vloada_half3 reads a half3 from
+ * address (p + (offset * 4)) and returns a float3.
+ * The address computed as (p + (offset * 4))
+ * must be aligned to sizeof (half) * 4 bytes.
+ */
+float __ovld vloada_half(size_t offset, const __constant half *p);
+float2 __ovld vloada_half2(size_t offset, const __constant half *p);
+float3 __ovld vloada_half3(size_t offset, const __constant half *p);
+float4 __ovld vloada_half4(size_t offset, const __constant half *p);
+float8 __ovld vloada_half8(size_t offset, const __constant half *p);
+float16 __ovld vloada_half16(size_t offset, const __constant half *p);
+#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0
+float __ovld vloada_half(size_t offset, const half *p);
+float2 __ovld vloada_half2(size_t offset, const half *p);
+float3 __ovld vloada_half3(size_t offset, const half *p);
+float4 __ovld vloada_half4(size_t offset, const half *p);
+float8 __ovld vloada_half8(size_t offset, const half *p);
+float16 __ovld vloada_half16(size_t offset, const half *p);
+#else
+float __ovld vloada_half(size_t offset, const __global half *p);
+float2 __ovld vloada_half2(size_t offset, const __global half *p);
+float3 __ovld vloada_half3(size_t offset, const __global half *p);
+float4 __ovld vloada_half4(size_t offset, const __global half *p);
+float8 __ovld vloada_half8(size_t offset, const __global half *p);
+float16 __ovld vloada_half16(size_t offset, const __global half *p);
+float __ovld vloada_half(size_t offset, const __local half *p);
+float2 __ovld vloada_half2(size_t offset, const __local half *p);
+float3 __ovld vloada_half3(size_t offset, const __local half *p);
+float4 __ovld vloada_half4(size_t offset, const __local half *p);
+float8 __ovld vloada_half8(size_t offset, const __local half *p);
+float16 __ovld vloada_half16(size_t offset, const __local half *p);
+float __ovld vloada_half(size_t offset, const __private half *p);
+float2 __ovld vloada_half2(size_t offset, const __private half *p);
+float3 __ovld vloada_half3(size_t offset, const __private half *p);
+float4 __ovld vloada_half4(size_t offset, const __private half *p);
+float8 __ovld vloada_half8(size_t offset, const __private half *p);
+float16 __ovld vloada_half16(size_t offset, const __private half *p);
+#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0
+
+/**
+ * The floatn value given by data is converted to
+ * a halfn value using the appropriate rounding
+ * mode.
+ * For n = 1, 2, 4, 8 and 16, the halfn value is
+ * written to the address computed as (p + (offset
+ * * n)). The address computed as (p + (offset *
+ * n)) must be aligned to sizeof (halfn) bytes.
+ * For n = 3, the half3 value is written to the
+ * address computed as (p + (offset * 4)). The
+ * address computed as (p + (offset * 4)) must be
+ * aligned to sizeof (half) * 4 bytes.
+ * vstorea_halfn uses the current rounding
+ * mode. The default current rounding mode is
+ * round to nearest even.
+ */
+#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0
+void __ovld vstorea_half(float data, size_t offset, half *p);
+void __ovld vstorea_half2(float2 data, size_t offset, half *p);
+void __ovld vstorea_half3(float3 data, size_t offset, half *p);
+void __ovld vstorea_half4(float4 data, size_t offset, half *p);
+void __ovld vstorea_half8(float8 data, size_t offset, half *p);
+void __ovld vstorea_half16(float16 data, size_t offset, half *p);
+
+void __ovld vstorea_half_rte(float data, size_t offset, half *p);
+void __ovld vstorea_half2_rte(float2 data, size_t offset, half *p);
+void __ovld vstorea_half3_rte(float3 data, size_t offset, half *p);
+void __ovld vstorea_half4_rte(float4 data, size_t offset, half *p);
+void __ovld vstorea_half8_rte(float8 data, size_t offset, half *p);
+void __ovld vstorea_half16_rte(float16 data, size_t offset, half *p);
+
+void __ovld vstorea_half_rtz(float data, size_t offset, half *p);
+void __ovld vstorea_half2_rtz(float2 data, size_t offset, half *p);
+void __ovld vstorea_half3_rtz(float3 data, size_t offset, half *p);
+void __ovld vstorea_half4_rtz(float4 data, size_t offset, half *p);
+void __ovld vstorea_half8_rtz(float8 data, size_t offset, half *p);
+void __ovld vstorea_half16_rtz(float16 data, size_t offset, half *p);
+
+void __ovld vstorea_half_rtp(float data, size_t offset, half *p);
+void __ovld vstorea_half2_rtp(float2 data, size_t offset, half *p);
+void __ovld vstorea_half3_rtp(float3 data, size_t offset, half *p);
+void __ovld vstorea_half4_rtp(float4 data, size_t offset, half *p);
+void __ovld vstorea_half8_rtp(float8 data, size_t offset, half *p);
+void __ovld vstorea_half16_rtp(float16 data, size_t offset, half *p);
+
+void __ovld vstorea_half_rtn(float data, size_t offset, half *p);
+void __ovld vstorea_half2_rtn(float2 data, size_t offset, half *p);
+void __ovld vstorea_half3_rtn(float3 data, size_t offset, half *p);
+void __ovld vstorea_half4_rtn(float4 data, size_t offset, half *p);
+void __ovld vstorea_half8_rtn(float8 data, size_t offset, half *p);
+void __ovld vstorea_half16_rtn(float16 data, size_t offset, half *p);
+
+#ifdef cl_khr_fp64
+void __ovld vstorea_half(double data, size_t offset, half *p);
+void __ovld vstorea_half2(double2 data, size_t offset, half *p);
+void __ovld vstorea_half3(double3 data, size_t offset, half *p);
+void __ovld vstorea_half4(double4 data, size_t offset, half *p);
+void __ovld vstorea_half8(double8 data, size_t offset, half *p);
+void __ovld vstorea_half16(double16 data, size_t offset, half *p);
+
+void __ovld vstorea_half_rte(double data, size_t offset, half *p);
+void __ovld vstorea_half2_rte(double2 data, size_t offset, half *p);
+void __ovld vstorea_half3_rte(double3 data, size_t offset, half *p);
+void __ovld vstorea_half4_rte(double4 data, size_t offset, half *p);
+void __ovld vstorea_half8_rte(double8 data, size_t offset, half *p);
+void __ovld vstorea_half16_rte(double16 data, size_t offset, half *p);
+
+void __ovld vstorea_half_rtz(double data, size_t offset, half *p);
+void __ovld vstorea_half2_rtz(double2 data, size_t offset, half *p);
+void __ovld vstorea_half3_rtz(double3 data, size_t offset, half *p);
+void __ovld vstorea_half4_rtz(double4 data, size_t offset, half *p);
+void __ovld vstorea_half8_rtz(double8 data, size_t offset, half *p);
+void __ovld vstorea_half16_rtz(double16 data, size_t offset, half *p);
+
+void __ovld vstorea_half_rtp(double data, size_t offset, half *p);
+void __ovld vstorea_half2_rtp(double2 data, size_t offset, half *p);
+void __ovld vstorea_half3_rtp(double3 data, size_t offset, half *p);
+void __ovld vstorea_half4_rtp(double4 data, size_t offset, half *p);
+void __ovld vstorea_half8_rtp(double8 data, size_t offset, half *p);
+void __ovld vstorea_half16_rtp(double16 data, size_t offset, half *p);
+
+void __ovld vstorea_half_rtn(double data, size_t offset, half *p);
+void __ovld vstorea_half2_rtn(double2 data, size_t offset, half *p);
+void __ovld vstorea_half3_rtn(double3 data, size_t offset, half *p);
+void __ovld vstorea_half4_rtn(double4 data, size_t offset, half *p);
+void __ovld vstorea_half8_rtn(double8 data, size_t offset, half *p);
+void __ovld vstorea_half16_rtn(double16 data, size_t offset, half *p);
+#endif //cl_khr_fp64
+
+#else
+void __ovld vstorea_half(float data, size_t offset, __global half *p);
+void __ovld vstorea_half2(float2 data, size_t offset, __global half *p);
+void __ovld vstorea_half3(float3 data, size_t offset, __global half *p);
+void __ovld vstorea_half4(float4 data, size_t offset, __global half *p);
+void __ovld vstorea_half8(float8 data, size_t offset, __global half *p);
+void __ovld vstorea_half16(float16 data, size_t offset, __global half *p);
+
+void __ovld vstorea_half_rte(float data, size_t offset, __global half *p);
+void __ovld vstorea_half2_rte(float2 data, size_t offset, __global half *p);
+void __ovld vstorea_half3_rte(float3 data, size_t offset, __global half *p);
+void __ovld vstorea_half4_rte(float4 data, size_t offset, __global half *p);
+void __ovld vstorea_half8_rte(float8 data, size_t offset, __global half *p);
+void __ovld vstorea_half16_rte(float16 data, size_t offset, __global half *p);
+
+void __ovld vstorea_half_rtz(float data, size_t offset, __global half *p);
+void __ovld vstorea_half2_rtz(float2 data, size_t offset, __global half *p);
+void __ovld vstorea_half3_rtz(float3 data, size_t offset, __global half *p);
+void __ovld vstorea_half4_rtz(float4 data, size_t offset, __global half *p);
+void __ovld vstorea_half8_rtz(float8 data, size_t offset, __global half *p);
+void __ovld vstorea_half16_rtz(float16 data, size_t offset, __global half *p);
+
+void __ovld vstorea_half_rtp(float data, size_t offset, __global half *p);
+void __ovld vstorea_half2_rtp(float2 data, size_t offset, __global half *p);
+void __ovld vstorea_half3_rtp(float3 data, size_t offset, __global half *p);
+void __ovld vstorea_half4_rtp(float4 data, size_t offset, __global half *p);
+void __ovld vstorea_half8_rtp(float8 data, size_t offset, __global half *p);
+void __ovld vstorea_half16_rtp(float16 data, size_t offset, __global half *p);
+
+void __ovld vstorea_half_rtn(float data, size_t offset, __global half *p);
+void __ovld vstorea_half2_rtn(float2 data, size_t offset, __global half *p);
+void __ovld vstorea_half3_rtn(float3 data, size_t offset, __global half *p);
+void __ovld vstorea_half4_rtn(float4 data, size_t offset, __global half *p);
+void __ovld vstorea_half8_rtn(float8 data, size_t offset, __global half *p);
+void __ovld vstorea_half16_rtn(float16 data, size_t offset, __global half *p);
+
+void __ovld vstorea_half(float data, size_t offset, __local half *p);
+void __ovld vstorea_half2(float2 data, size_t offset, __local half *p);
+void __ovld vstorea_half3(float3 data, size_t offset, __local half *p);
+void __ovld vstorea_half4(float4 data, size_t offset, __local half *p);
+void __ovld vstorea_half8(float8 data, size_t offset, __local half *p);
+void __ovld vstorea_half16(float16 data, size_t offset, __local half *p);
+
+void __ovld vstorea_half_rte(float data, size_t offset, __local half *p);
+void __ovld vstorea_half2_rte(float2 data, size_t offset, __local half *p);
+void __ovld vstorea_half3_rte(float3 data, size_t offset, __local half *p);
+void __ovld vstorea_half4_rte(float4 data, size_t offset, __local half *p);
+void __ovld vstorea_half8_rte(float8 data, size_t offset, __local half *p);
+void __ovld vstorea_half16_rte(float16 data, size_t offset, __local half *p);
+
+void __ovld vstorea_half_rtz(float data, size_t offset, __local half *p);
+void __ovld vstorea_half2_rtz(float2 data, size_t offset, __local half *p);
+void __ovld vstorea_half3_rtz(float3 data, size_t offset, __local half *p);
+void __ovld vstorea_half4_rtz(float4 data, size_t offset, __local half *p);
+void __ovld vstorea_half8_rtz(float8 data, size_t offset, __local half *p);
+void __ovld vstorea_half16_rtz(float16 data, size_t offset, __local half *p);
+
+void __ovld vstorea_half_rtp(float data, size_t offset, __local half *p);
+void __ovld vstorea_half2_rtp(float2 data, size_t offset, __local half *p);
+void __ovld vstorea_half3_rtp(float3 data, size_t offset, __local half *p);
+void __ovld vstorea_half4_rtp(float4 data, size_t offset, __local half *p);
+void __ovld vstorea_half8_rtp(float8 data, size_t offset, __local half *p);
+void __ovld vstorea_half16_rtp(float16 data, size_t offset, __local half *p);
+
+void __ovld vstorea_half_rtn(float data, size_t offset, __local half *p);
+void __ovld vstorea_half2_rtn(float2 data, size_t offset, __local half *p);
+void __ovld vstorea_half3_rtn(float3 data, size_t offset, __local half *p);
+void __ovld vstorea_half4_rtn(float4 data, size_t offset, __local half *p);
+void __ovld vstorea_half8_rtn(float8 data, size_t offset, __local half *p);
+void __ovld vstorea_half16_rtn(float16 data, size_t offset, __local half *p);
+
+void __ovld vstorea_half(float data, size_t offset, __private half *p);
+void __ovld vstorea_half2(float2 data, size_t offset, __private half *p);
+void __ovld vstorea_half3(float3 data, size_t offset, __private half *p);
+void __ovld vstorea_half4(float4 data, size_t offset, __private half *p);
+void __ovld vstorea_half8(float8 data, size_t offset, __private half *p);
+void __ovld vstorea_half16(float16 data, size_t offset, __private half *p);
+
+void __ovld vstorea_half_rte(float data, size_t offset, __private half *p);
+void __ovld vstorea_half2_rte(float2 data, size_t offset, __private half *p);
+void __ovld vstorea_half3_rte(float3 data, size_t offset, __private half *p);
+void __ovld vstorea_half4_rte(float4 data, size_t offset, __private half *p);
+void __ovld vstorea_half8_rte(float8 data, size_t offset, __private half *p);
+void __ovld vstorea_half16_rte(float16 data, size_t offset, __private half *p);
+
+void __ovld vstorea_half_rtz(float data, size_t offset, __private half *p);
+void __ovld vstorea_half2_rtz(float2 data, size_t offset, __private half *p);
+void __ovld vstorea_half3_rtz(float3 data, size_t offset, __private half *p);
+void __ovld vstorea_half4_rtz(float4 data, size_t offset, __private half *p);
+void __ovld vstorea_half8_rtz(float8 data, size_t offset, __private half *p);
+void __ovld vstorea_half16_rtz(float16 data, size_t offset, __private half *p);
+
+void __ovld vstorea_half_rtp(float data, size_t offset, __private half *p);
+void __ovld vstorea_half2_rtp(float2 data, size_t offset, __private half *p);
+void __ovld vstorea_half3_rtp(float3 data, size_t offset, __private half *p);
+void __ovld vstorea_half4_rtp(float4 data, size_t offset, __private half *p);
+void __ovld vstorea_half8_rtp(float8 data, size_t offset, __private half *p);
+void __ovld vstorea_half16_rtp(float16 data, size_t offset, __private half *p);
+
+void __ovld vstorea_half_rtn(float data, size_t offset, __private half *p);
+void __ovld vstorea_half2_rtn(float2 data, size_t offset, __private half *p);
+void __ovld vstorea_half3_rtn(float3 data, size_t offset, __private half *p);
+void __ovld vstorea_half4_rtn(float4 data, size_t offset, __private half *p);
+void __ovld vstorea_half8_rtn(float8 data, size_t offset, __private half *p);
+void __ovld vstorea_half16_rtn(float16 data, size_t offset, __private half *p);
+
+#ifdef cl_khr_fp64
+void __ovld vstorea_half(double data, size_t offset, __global half *p);
+void __ovld vstorea_half2(double2 data, size_t offset, __global half *p);
+void __ovld vstorea_half3(double3 data, size_t offset, __global half *p);
+void __ovld vstorea_half4(double4 data, size_t offset, __global half *p);
+void __ovld vstorea_half8(double8 data, size_t offset, __global half *p);
+void __ovld vstorea_half16(double16 data, size_t offset, __global half *p);
+
+void __ovld vstorea_half_rte(double data, size_t offset, __global half *p);
+void __ovld vstorea_half2_rte(double2 data, size_t offset, __global half *p);
+void __ovld vstorea_half3_rte(double3 data, size_t offset, __global half *p);
+void __ovld vstorea_half4_rte(double4 data, size_t offset, __global half *p);
+void __ovld vstorea_half8_rte(double8 data, size_t offset, __global half *p);
+void __ovld vstorea_half16_rte(double16 data, size_t offset, __global half *p);
+
+void __ovld vstorea_half_rtz(double data, size_t offset, __global half *p);
+void __ovld vstorea_half2_rtz(double2 data, size_t offset, __global half *p);
+void __ovld vstorea_half3_rtz(double3 data, size_t offset, __global half *p);
+void __ovld vstorea_half4_rtz(double4 data, size_t offset, __global half *p);
+void __ovld vstorea_half8_rtz(double8 data, size_t offset, __global half *p);
+void __ovld vstorea_half16_rtz(double16 data, size_t offset, __global half *p);
+
+void __ovld vstorea_half_rtp(double data, size_t offset, __global half *p);
+void __ovld vstorea_half2_rtp(double2 data, size_t offset, __global half *p);
+void __ovld vstorea_half3_rtp(double3 data, size_t offset, __global half *p);
+void __ovld vstorea_half4_rtp(double4 data, size_t offset, __global half *p);
+void __ovld vstorea_half8_rtp(double8 data, size_t offset, __global half *p);
+void __ovld vstorea_half16_rtp(double16 data, size_t offset, __global half *p);
+
+void __ovld vstorea_half_rtn(double data, size_t offset, __global half *p);
+void __ovld vstorea_half2_rtn(double2 data, size_t offset, __global half *p);
+void __ovld vstorea_half3_rtn(double3 data, size_t offset, __global half *p);
+void __ovld vstorea_half4_rtn(double4 data, size_t offset, __global half *p);
+void __ovld vstorea_half8_rtn(double8 data, size_t offset, __global half *p);
+void __ovld vstorea_half16_rtn(double16 data, size_t offset, __global half *p);
+
+void __ovld vstorea_half(double data, size_t offset, __local half *p);
+void __ovld vstorea_half2(double2 data, size_t offset, __local half *p);
+void __ovld vstorea_half3(double3 data, size_t offset, __local half *p);
+void __ovld vstorea_half4(double4 data, size_t offset, __local half *p);
+void __ovld vstorea_half8(double8 data, size_t offset, __local half *p);
+void __ovld vstorea_half16(double16 data, size_t offset, __local half *p);
+
+void __ovld vstorea_half_rte(double data, size_t offset, __local half *p);
+void __ovld vstorea_half2_rte(double2 data, size_t offset, __local half *p);
+void __ovld vstorea_half3_rte(double3 data, size_t offset, __local half *p);
+void __ovld vstorea_half4_rte(double4 data, size_t offset, __local half *p);
+void __ovld vstorea_half8_rte(double8 data, size_t offset, __local half *p);
+void __ovld vstorea_half16_rte(double16 data, size_t offset, __local half *p);
+
+void __ovld vstorea_half_rtz(double data, size_t offset, __local half *p);
+void __ovld vstorea_half2_rtz(double2 data, size_t offset, __local half *p);
+void __ovld vstorea_half3_rtz(double3 data, size_t offset, __local half *p);
+void __ovld vstorea_half4_rtz(double4 data, size_t offset, __local half *p);
+void __ovld vstorea_half8_rtz(double8 data, size_t offset, __local half *p);
+void __ovld vstorea_half16_rtz(double16 data, size_t offset, __local half *p);
+
+void __ovld vstorea_half_rtp(double data, size_t offset, __local half *p);
+void __ovld vstorea_half2_rtp(double2 data, size_t offset, __local half *p);
+void __ovld vstorea_half3_rtp(double3 data, size_t offset, __local half *p);
+void __ovld vstorea_half4_rtp(double4 data, size_t offset, __local half *p);
+void __ovld vstorea_half8_rtp(double8 data, size_t offset, __local half *p);
+void __ovld vstorea_half16_rtp(double16 data, size_t offset, __local half *p);
+
+void __ovld vstorea_half_rtn(double data, size_t offset, __local half *p);
+void __ovld vstorea_half2_rtn(double2 data, size_t offset, __local half *p);
+void __ovld vstorea_half3_rtn(double3 data, size_t offset, __local half *p);
+void __ovld vstorea_half4_rtn(double4 data, size_t offset, __local half *p);
+void __ovld vstorea_half8_rtn(double8 data, size_t offset, __local half *p);
+void __ovld vstorea_half16_rtn(double16 data, size_t offset, __local half *p);
+
+void __ovld vstorea_half(double data, size_t offset, __private half *p);
+void __ovld vstorea_half2(double2 data, size_t offset, __private half *p);
+void __ovld vstorea_half3(double3 data, size_t offset, __private half *p);
+void __ovld vstorea_half4(double4 data, size_t offset, __private half *p);
+void __ovld vstorea_half8(double8 data, size_t offset, __private half *p);
+void __ovld vstorea_half16(double16 data, size_t offset, __private half *p);
+
+void __ovld vstorea_half_rte(double data, size_t offset, __private half *p);
+void __ovld vstorea_half2_rte(double2 data, size_t offset, __private half *p);
+void __ovld vstorea_half3_rte(double3 data, size_t offset, __private half *p);
+void __ovld vstorea_half4_rte(double4 data, size_t offset, __private half *p);
+void __ovld vstorea_half8_rte(double8 data, size_t offset, __private half *p);
+void __ovld vstorea_half16_rte(double16 data, size_t offset, __private half *p);
+
+void __ovld vstorea_half_rtz(double data, size_t offset, __private half *p);
+void __ovld vstorea_half2_rtz(double2 data, size_t offset, __private half *p);
+void __ovld vstorea_half3_rtz(double3 data, size_t offset, __private half *p);
+void __ovld vstorea_half4_rtz(double4 data, size_t offset, __private half *p);
+void __ovld vstorea_half8_rtz(double8 data, size_t offset, __private half *p);
+void __ovld vstorea_half16_rtz(double16 data, size_t offset, __private half *p);
+
+void __ovld vstorea_half_rtp(double data, size_t offset, __private half *p);
+void __ovld vstorea_half2_rtp(double2 data, size_t offset, __private half *p);
+void __ovld vstorea_half3_rtp(double3 data, size_t offset, __private half *p);
+void __ovld vstorea_half4_rtp(double4 data, size_t offset, __private half *p);
+void __ovld vstorea_half8_rtp(double8 data, size_t offset, __private half *p);
+void __ovld vstorea_half16_rtp(double16 data, size_t offset, __private half *p);
+
+void __ovld vstorea_half_rtn(double data, size_t offset, __private half *p);
+void __ovld vstorea_half2_rtn(double2 data,size_t offset, __private half *p);
+void __ovld vstorea_half3_rtn(double3 data,size_t offset, __private half *p);
+void __ovld vstorea_half4_rtn(double4 data,size_t offset, __private half *p);
+void __ovld vstorea_half8_rtn(double8 data,size_t offset, __private half *p);
+void __ovld vstorea_half16_rtn(double16 data,size_t offset, __private half *p);
+#endif //cl_khr_fp64
+#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0
+
+// OpenCL v1.1 s6.11.8, v1.2 s6.12.8, v2.0 s6.13.8 - Synchronization Functions
+
+// Flag type and values for barrier, mem_fence, read_mem_fence, write_mem_fence
+typedef uint cl_mem_fence_flags;
+
+/**
+ * Queue a memory fence to ensure correct
+ * ordering of memory operations to local memory
+ */
+#define CLK_LOCAL_MEM_FENCE    0x01
+
+/**
+ * Queue a memory fence to ensure correct
+ * ordering of memory operations to global memory
+ */
+#define CLK_GLOBAL_MEM_FENCE   0x02
+
+#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0
+/**
+ * Queue a memory fence to ensure correct ordering of memory
+ * operations between work-items of a work-group to
+ * image memory.
+ */
+#define CLK_IMAGE_MEM_FENCE  0x04
+#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0
+
+/**
+ * All work-items in a work-group executing the kernel
+ * on a processor must execute this function before any
+ * are allowed to continue execution beyond the barrier.
+ * This function must be encountered by all work-items in
+ * a work-group executing the kernel.
+ * If barrier is inside a conditional statement, then all
+ * work-items must enter the conditional if any work-item
+ * enters the conditional statement and executes the
+ * barrier.
+ * If barrer is inside a loop, all work-items must execute
+ * the barrier for each iteration of the loop before any are
+ * allowed to continue execution beyond the barrier.
+ * The barrier function also queues a memory fence
+ * (reads and writes) to ensure correct ordering of
+ * memory operations to local or global memory.
+ * The flags argument specifies the memory address space
+ * and can be set to a combination of the following literal
+ * values.
+ * CLK_LOCAL_MEM_FENCE - The barrier function
+ * will either flush any variables stored in local memory
+ * or queue a memory fence to ensure correct ordering of
+ * memory operations to local memory.
+ * CLK_GLOBAL_MEM_FENCE - The barrier function
+ * will queue a memory fence to ensure correct ordering
+ * of memory operations to global memory. This can be
+ * useful when work-items, for example, write to buffer or
+ * image objects and then want to read the updated data.
+ */
+
+void __ovld barrier(cl_mem_fence_flags flags);
+
+#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0
+
+typedef enum memory_scope
+{
+  memory_scope_work_item,
+  memory_scope_work_group,
+  memory_scope_device,
+  memory_scope_all_svm_devices,
+  memory_scope_sub_group
+} memory_scope;
+
+void __ovld work_group_barrier(cl_mem_fence_flags flags, memory_scope scope);
+void __ovld work_group_barrier(cl_mem_fence_flags flags);
+#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0
+
+// OpenCL v1.1 s6.11.9, v1.2 s6.12.9 - Explicit Memory Fence Functions
+
+/**
+ * Orders loads and stores of a work-item
+ * executing a kernel. This means that loads
+ * and stores preceding the mem_fence will
+ * be committed to memory before any loads
+ * and stores following the mem_fence.
+ * The flags argument specifies the memory
+ * address space and can be set to a
+ * combination of the following literal
+ * values:
+ * CLK_LOCAL_MEM_FENCE
+ * CLK_GLOBAL_MEM_FENCE.
+ */
+void __ovld mem_fence(cl_mem_fence_flags flags);
+
+/**
+ * Read memory barrier that orders only
+ * loads.
+ * The flags argument specifies the memory
+ * address space and can be set to to a
+ * combination of the following literal
+ * values:
+ * CLK_LOCAL_MEM_FENCE
+ * CLK_GLOBAL_MEM_FENCE.
+ */
+void __ovld read_mem_fence(cl_mem_fence_flags flags);
+
+/**
+ * Write memory barrier that orders only
+ * stores.
+ * The flags argument specifies the memory
+ * address space and can be set to to a
+ * combination of the following literal
+ * values:
+ * CLK_LOCAL_MEM_FENCE
+ * CLK_GLOBAL_MEM_FENCE.
+ */
+void __ovld write_mem_fence(cl_mem_fence_flags flags);
+
+// OpenCL v2.0 s6.13.9 - Address Space Qualifier Functions
+
+#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0
+cl_mem_fence_flags __ovld get_fence(const void *ptr);
+cl_mem_fence_flags __ovld get_fence(void *ptr);
+
+/** 
+ * Builtin functions to_global, to_local, and to_private need to be declared as Clang builtin functions
+ * and checked in Sema since they should be declared as
+ *   addr gentype* to_addr (gentype*);
+ * where gentype is builtin type or user defined type.
+ */
+
+#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0
+
+// OpenCL v1.1 s6.11.10, v1.2 s6.12.10, v2.0 s6.13.10 - Async Copies from Global to Local Memory, Local to Global Memory, and Prefetch
+
+/**
+ * event_t async_work_group_copy (
+ * __global gentype *dst,
+ * const __local gentype *src,
+ * size_t num_elements,
+ * event_t event)
+ * Perform an async copy of num_elements
+ * gentype elements from src to dst. The async
+ * copy is performed by all work-items in a workgroup
+ * and this built-in function must therefore
+ * be encountered by all work-items in a workgroup
+ * executing the kernel with the same
+ * argument values; otherwise the results are
+ * undefined.
+ * Returns an event object that can be used by
+ * wait_group_events to wait for the async copy
+ * to finish. The event argument can also be used
+ * to associate the async_work_group_copy with
+ * a previous async copy allowing an event to be
+ * shared by multiple async copies; otherwise event
+ * should be zero.
+ * If event argument is non-zero, the event object
+ * supplied in event argument will be returned.
+ * This function does not perform any implicit
+ * synchronization of source data such as using a
+ * barrier before performing the copy.
+ */
+event_t __ovld async_work_group_copy(__local char *dst, const __global char *src, size_t num_elements, event_t event);
+event_t __ovld async_work_group_copy(__local uchar *dst, const __global uchar *src, size_t num_elements, event_t event);
+event_t __ovld async_work_group_copy(__local short *dst, const __global short *src, size_t num_elements, event_t event);
+event_t __ovld async_work_group_copy(__local ushort *dst, const __global ushort *src, size_t num_elements, event_t event);
+event_t __ovld async_work_group_copy(__local int *dst, const __global int *src, size_t num_elements, event_t event);
+event_t __ovld async_work_group_copy(__local uint *dst, const __global uint *src, size_t num_elements, event_t event);
+event_t __ovld async_work_group_copy(__local long *dst, const __global long *src, size_t num_elements, event_t event);
+event_t __ovld async_work_group_copy(__local ulong *dst, const __global ulong *src, size_t num_elements, event_t event);
+event_t __ovld async_work_group_copy(__local float *dst, const __global float *src, size_t num_elements, event_t event);
+event_t __ovld async_work_group_copy(__local char2 *dst, const __global char2 *src, size_t num_elements, event_t event);
+event_t __ovld async_work_group_copy(__local uchar2 *dst, const __global uchar2 *src, size_t num_elements, event_t event);
+event_t __ovld async_work_group_copy(__local short2 *dst, const __global short2 *src, size_t num_elements, event_t event);
+event_t __ovld async_work_group_copy(__local ushort2 *dst, const __global ushort2 *src, size_t num_elements, event_t event);
+event_t __ovld async_work_group_copy(__local int2 *dst, const __global int2 *src, size_t num_elements, event_t event);
+event_t __ovld async_work_group_copy(__local uint2 *dst, const __global uint2 *src, size_t num_elements, event_t event);
+event_t __ovld async_work_group_copy(__local long2 *dst, const __global long2 *src, size_t num_elements, event_t event);
+event_t __ovld async_work_group_copy(__local ulong2 *dst, const __global ulong2 *src, size_t num_elements, event_t event);
+event_t __ovld async_work_group_copy(__local float2 *dst, const __global float2 *src, size_t num_elements, event_t event);
+event_t __ovld async_work_group_copy(__local char3 *dst, const __global char3 *src, size_t num_elements, event_t event);
+event_t __ovld async_work_group_copy(__local uchar3 *dst, const __global uchar3 *src, size_t num_elements, event_t event);
+event_t __ovld async_work_group_copy(__local short3 *dst, const __global short3 *src, size_t num_elements, event_t event);
+event_t __ovld async_work_group_copy(__local ushort3 *dst, const __global ushort3 *src, size_t num_elements, event_t event);
+event_t __ovld async_work_group_copy(__local int3 *dst, const __global int3 *src, size_t num_elements, event_t event);
+event_t __ovld async_work_group_copy(__local uint3 *dst, const __global uint3 *src, size_t num_elements, event_t event);
+event_t __ovld async_work_group_copy(__local long3 *dst, const __global long3 *src, size_t num_elements, event_t event);
+event_t __ovld async_work_group_copy(__local ulong3 *dst, const __global ulong3 *src, size_t num_elements, event_t event);
+event_t __ovld async_work_group_copy(__local float3 *dst, const __global float3 *src, size_t num_elements, event_t event);
+event_t __ovld async_work_group_copy(__local char4 *dst, const __global char4 *src, size_t num_elements, event_t event);
+event_t __ovld async_work_group_copy(__local uchar4 *dst, const __global uchar4 *src, size_t num_elements, event_t event);
+event_t __ovld async_work_group_copy(__local short4 *dst, const __global short4 *src, size_t num_elements, event_t event);
+event_t __ovld async_work_group_copy(__local ushort4 *dst, const __global ushort4 *src, size_t num_elements, event_t event);
+event_t __ovld async_work_group_copy(__local int4 *dst, const __global int4 *src, size_t num_elements, event_t event);
+event_t __ovld async_work_group_copy(__local uint4 *dst, const __global uint4 *src, size_t num_elements, event_t event);
+event_t __ovld async_work_group_copy(__local long4 *dst, const __global long4 *src, size_t num_elements, event_t event);
+event_t __ovld async_work_group_copy(__local ulong4 *dst, const __global ulong4 *src, size_t num_elements, event_t event);
+event_t __ovld async_work_group_copy(__local float4 *dst, const __global float4 *src, size_t num_elements, event_t event);
+event_t __ovld async_work_group_copy(__local char8 *dst, const __global char8 *src, size_t num_elements, event_t event);
+event_t __ovld async_work_group_copy(__local uchar8 *dst, const __global uchar8 *src, size_t num_elements, event_t event);
+event_t __ovld async_work_group_copy(__local short8 *dst, const __global short8 *src, size_t num_elements, event_t event);
+event_t __ovld async_work_group_copy(__local ushort8 *dst, const __global ushort8 *src, size_t num_elements, event_t event);
+event_t __ovld async_work_group_copy(__local int8 *dst, const __global int8 *src, size_t num_elements, event_t event);
+event_t __ovld async_work_group_copy(__local uint8 *dst, const __global uint8 *src, size_t num_elements, event_t event);
+event_t __ovld async_work_group_copy(__local long8 *dst, const __global long8 *src, size_t num_elements, event_t event);
+event_t __ovld async_work_group_copy(__local ulong8 *dst, const __global ulong8 *src, size_t num_elements, event_t event);
+event_t __ovld async_work_group_copy(__local float8 *dst, const __global float8 *src, size_t num_elements, event_t event);
+event_t __ovld async_work_group_copy(__local char16 *dst, const __global char16 *src, size_t num_elements, event_t event);
+event_t __ovld async_work_group_copy(__local uchar16 *dst, const __global uchar16 *src, size_t num_elements, event_t event);
+event_t __ovld async_work_group_copy(__local short16 *dst, const __global short16 *src, size_t num_elements, event_t event);
+event_t __ovld async_work_group_copy(__local ushort16 *dst, const __global ushort16 *src, size_t num_elements, event_t event);
+event_t __ovld async_work_group_copy(__local int16 *dst, const __global int16 *src, size_t num_elements, event_t event);
+event_t __ovld async_work_group_copy(__local uint16 *dst, const __global uint16 *src, size_t num_elements, event_t event);
+event_t __ovld async_work_group_copy(__local long16 *dst, const __global long16 *src, size_t num_elements, event_t event);
+event_t __ovld async_work_group_copy(__local ulong16 *dst, const __global ulong16 *src, size_t num_elements, event_t event);
+event_t __ovld async_work_group_copy(__local float16 *dst, const __global float16 *src, size_t num_elements, event_t event);
+event_t __ovld async_work_group_copy(__global char *dst, const __local char *src, size_t num_elements, event_t event);
+event_t __ovld async_work_group_copy(__global uchar *dst, const __local uchar *src, size_t num_elements, event_t event);
+event_t __ovld async_work_group_copy(__global short *dst, const __local short *src, size_t num_elements, event_t event);
+event_t __ovld async_work_group_copy(__global ushort *dst, const __local ushort *src, size_t num_elements, event_t event);
+event_t __ovld async_work_group_copy(__global int *dst, const __local int *src, size_t num_elements, event_t event);
+event_t __ovld async_work_group_copy(__global uint *dst, const __local uint *src, size_t num_elements, event_t event);
+event_t __ovld async_work_group_copy(__global long *dst, const __local long *src, size_t num_elements, event_t event);
+event_t __ovld async_work_group_copy(__global ulong *dst, const __local ulong *src, size_t num_elements, event_t event);
+event_t __ovld async_work_group_copy(__global float *dst, const __local float *src, size_t num_elements, event_t event);
+event_t __ovld async_work_group_copy(__global char2 *dst, const __local char2 *src, size_t num_elements, event_t event);
+event_t __ovld async_work_group_copy(__global uchar2 *dst, const __local uchar2 *src, size_t num_elements, event_t event);
+event_t __ovld async_work_group_copy(__global short2 *dst, const __local short2 *src, size_t num_elements, event_t event);
+event_t __ovld async_work_group_copy(__global ushort2 *dst, const __local ushort2 *src, size_t num_elements, event_t event);
+event_t __ovld async_work_group_copy(__global int2 *dst, const __local int2 *src, size_t num_elements, event_t event);
+event_t __ovld async_work_group_copy(__global uint2 *dst, const __local uint2 *src, size_t num_elements, event_t event);
+event_t __ovld async_work_group_copy(__global long2 *dst, const __local long2 *src, size_t num_elements, event_t event);
+event_t __ovld async_work_group_copy(__global ulong2 *dst, const __local ulong2 *src, size_t num_elements, event_t event);
+event_t __ovld async_work_group_copy(__global float2 *dst, const __local float2 *src, size_t num_elements, event_t event);
+event_t __ovld async_work_group_copy(__global char3 *dst, const __local char3 *src, size_t num_elements, event_t event);
+event_t __ovld async_work_group_copy(__global uchar3 *dst, const __local uchar3 *src, size_t num_elements, event_t event);
+event_t __ovld async_work_group_copy(__global short3 *dst, const __local short3 *src, size_t num_elements, event_t event);
+event_t __ovld async_work_group_copy(__global ushort3 *dst, const __local ushort3 *src, size_t num_elements, event_t event);
+event_t __ovld async_work_group_copy(__global int3 *dst, const __local int3 *src, size_t num_elements, event_t event);
+event_t __ovld async_work_group_copy(__global uint3 *dst, const __local uint3 *src, size_t num_elements, event_t event);
+event_t __ovld async_work_group_copy(__global long3 *dst, const __local long3 *src, size_t num_elements, event_t event);
+event_t __ovld async_work_group_copy(__global ulong3 *dst, const __local ulong3 *src, size_t num_elements, event_t event);
+event_t __ovld async_work_group_copy(__global float3 *dst, const __local float3 *src, size_t num_elements, event_t event);
+event_t __ovld async_work_group_copy(__global char4 *dst, const __local char4 *src, size_t num_elements, event_t event);
+event_t __ovld async_work_group_copy(__global uchar4 *dst, const __local uchar4 *src, size_t num_elements, event_t event);
+event_t __ovld async_work_group_copy(__global short4 *dst, const __local short4 *src, size_t num_elements, event_t event);
+event_t __ovld async_work_group_copy(__global ushort4 *dst, const __local ushort4 *src, size_t num_elements, event_t event);
+event_t __ovld async_work_group_copy(__global int4 *dst, const __local int4 *src, size_t num_elements, event_t event);
+event_t __ovld async_work_group_copy(__global uint4 *dst, const __local uint4 *src, size_t num_elements, event_t event);
+event_t __ovld async_work_group_copy(__global long4 *dst, const __local long4 *src, size_t num_elements, event_t event);
+event_t __ovld async_work_group_copy(__global ulong4 *dst, const __local ulong4 *src, size_t num_elements, event_t event);
+event_t __ovld async_work_group_copy(__global float4 *dst, const __local float4 *src, size_t num_elements, event_t event);
+event_t __ovld async_work_group_copy(__global char8 *dst, const __local char8 *src, size_t num_elements, event_t event);
+event_t __ovld async_work_group_copy(__global uchar8 *dst, const __local uchar8 *src, size_t num_elements, event_t event);
+event_t __ovld async_work_group_copy(__global short8 *dst, const __local short8 *src, size_t num_elements, event_t event);
+event_t __ovld async_work_group_copy(__global ushort8 *dst, const __local ushort8 *src, size_t num_elements, event_t event);
+event_t __ovld async_work_group_copy(__global int8 *dst, const __local int8 *src, size_t num_elements, event_t event);
+event_t __ovld async_work_group_copy(__global uint8 *dst, const __local uint8 *src, size_t num_elements, event_t event);
+event_t __ovld async_work_group_copy(__global long8 *dst, const __local long8 *src, size_t num_elements, event_t event);
+event_t __ovld async_work_group_copy(__global ulong8 *dst, const __local ulong8 *src, size_t num_elements, event_t event);
+event_t __ovld async_work_group_copy(__global float8 *dst, const __local float8 *src, size_t num_elements, event_t event);
+event_t __ovld async_work_group_copy(__global char16 *dst, const __local char16 *src, size_t num_elements, event_t event);
+event_t __ovld async_work_group_copy(__global uchar16 *dst, const __local uchar16 *src, size_t num_elements, event_t event);
+event_t __ovld async_work_group_copy(__global short16 *dst, const __local short16 *src, size_t num_elements, event_t event);
+event_t __ovld async_work_group_copy(__global ushort16 *dst, const __local ushort16 *src, size_t num_elements, event_t event);
+event_t __ovld async_work_group_copy(__global int16 *dst, const __local int16 *src, size_t num_elements, event_t event);
+event_t __ovld async_work_group_copy(__global uint16 *dst, const __local uint16 *src, size_t num_elements, event_t event);
+event_t __ovld async_work_group_copy(__global long16 *dst, const __local long16 *src, size_t num_elements, event_t event);
+event_t __ovld async_work_group_copy(__global ulong16 *dst, const __local ulong16 *src, size_t num_elements, event_t event);
+event_t __ovld async_work_group_copy(__global float16 *dst, const __local float16 *src, size_t num_elements, event_t event);
+#ifdef cl_khr_fp64
+event_t __ovld async_work_group_copy(__local double *dst, const __global double *src, size_t num_elements, event_t event);
+event_t __ovld async_work_group_copy(__local double2 *dst, const __global double2 *src, size_t num_elements, event_t event);
+event_t __ovld async_work_group_copy(__local double3 *dst, const __global double3 *src, size_t num_elements, event_t event);
+event_t __ovld async_work_group_copy(__local double4 *dst, const __global double4 *src, size_t num_elements, event_t event);
+event_t __ovld async_work_group_copy(__local double8 *dst, const __global double8 *src, size_t num_elements, event_t event);
+event_t __ovld async_work_group_copy(__local double16 *dst, const __global double16 *src, size_t num_elements, event_t event);
+event_t __ovld async_work_group_copy(__global double *dst, const __local double *src, size_t num_elements, event_t event);
+event_t __ovld async_work_group_copy(__global double2 *dst, const __local double2 *src, size_t num_elements, event_t event);
+event_t __ovld async_work_group_copy(__global double3 *dst, const __local double3 *src, size_t num_elements, event_t event);
+event_t __ovld async_work_group_copy(__global double4 *dst, const __local double4 *src, size_t num_elements, event_t event);
+event_t __ovld async_work_group_copy(__global double8 *dst, const __local double8 *src, size_t num_elements, event_t event);
+event_t __ovld async_work_group_copy(__global double16 *dst, const __local double16 *src, size_t num_elements, event_t event);
+#endif //cl_khr_fp64
+#ifdef cl_khr_fp16
+event_t __ovld async_work_group_copy(__local half *dst, const __global half *src, size_t num_elements, event_t event);
+event_t __ovld async_work_group_copy(__local half2 *dst, const __global half2 *src, size_t num_elements, event_t event);
+event_t __ovld async_work_group_copy(__local half3 *dst, const __global half3 *src, size_t num_elements, event_t event);
+event_t __ovld async_work_group_copy(__local half4 *dst, const __global half4 *src, size_t num_elements, event_t event);
+event_t __ovld async_work_group_copy(__local half8 *dst, const __global half8 *src, size_t num_elements, event_t event);
+event_t __ovld async_work_group_copy(__local half16 *dst, const __global half16 *src, size_t num_elements, event_t event);
+event_t __ovld async_work_group_copy(__global half *dst, const __local half *src, size_t num_elements, event_t event);
+event_t __ovld async_work_group_copy(__global half2 *dst, const __local half2 *src, size_t num_elements, event_t event);
+event_t __ovld async_work_group_copy(__global half3 *dst, const __local half3 *src, size_t num_elements, event_t event);
+event_t __ovld async_work_group_copy(__global half4 *dst, const __local half4 *src, size_t num_elements, event_t event);
+event_t __ovld async_work_group_copy(__global half8 *dst, const __local half8 *src, size_t num_elements, event_t event);
+event_t __ovld async_work_group_copy(__global half16 *dst, const __local half16 *src, size_t num_elements, event_t event);
+#endif //cl_khr_fp16
+
+/**
+ * Perform an async gather of num_elements
+ * gentype elements from src to dst. The
+ * src_stride is the stride in elements for each
+ * gentype element read from src. The dst_stride
+ * is the stride in elements for each gentype
+ * element written to dst. The async gather is
+ * performed by all work-items in a work-group.
+ * This built-in function must therefore be
+ * encountered by all work-items in a work-group
+ * executing the kernel with the same argument
+ * values; otherwise the results are undefined.
+ * Returns an event object that can be used by
+ * wait_group_events to wait for the async copy
+ * to finish. The event argument can also be used
+ * to associate the
+ * async_work_group_strided_copy with a
+ * previous async copy allowing an event to be
+ * shared by multiple async copies; otherwise event
+ * should be zero.
+ * If event argument is non-zero, the event object
+ * supplied in event argument will be returned.
+ * This function does not perform any implicit
+ * synchronization of source data such as using a
+ * barrier before performing the copy.
+ */
+event_t __ovld async_work_group_strided_copy(__local char *dst, const __global char *src, size_t num_elements, size_t src_stride, event_t event);
+event_t __ovld async_work_group_strided_copy(__local uchar *dst, const __global uchar *src, size_t num_elements, size_t src_stride, event_t event);
+event_t __ovld async_work_group_strided_copy(__local short *dst, const __global short *src, size_t num_elements, size_t src_stride, event_t event);
+event_t __ovld async_work_group_strided_copy(__local ushort *dst, const __global ushort *src, size_t num_elements, size_t src_stride, event_t event);
+event_t __ovld async_work_group_strided_copy(__local int *dst, const __global int *src, size_t num_elements, size_t src_stride, event_t event);
+event_t __ovld async_work_group_strided_copy(__local uint *dst, const __global uint *src, size_t num_elements, size_t src_stride, event_t event);
+event_t __ovld async_work_group_strided_copy(__local long *dst, const __global long *src, size_t num_elements, size_t src_stride, event_t event);
+event_t __ovld async_work_group_strided_copy(__local ulong *dst, const __global ulong *src, size_t num_elements, size_t src_stride, event_t event);
+event_t __ovld async_work_group_strided_copy(__local float *dst, const __global float *src, size_t num_elements, size_t src_stride, event_t event);
+event_t __ovld async_work_group_strided_copy(__local char2 *dst, const __global char2 *src, size_t num_elements, size_t src_stride, event_t event);
+event_t __ovld async_work_group_strided_copy(__local uchar2 *dst, const __global uchar2 *src, size_t num_elements, size_t src_stride, event_t event);
+event_t __ovld async_work_group_strided_copy(__local short2 *dst, const __global short2 *src, size_t num_elements, size_t src_stride, event_t event);
+event_t __ovld async_work_group_strided_copy(__local ushort2 *dst, const __global ushort2 *src, size_t num_elements, size_t src_stride, event_t event);
+event_t __ovld async_work_group_strided_copy(__local int2 *dst, const __global int2 *src, size_t num_elements, size_t src_stride, event_t event);
+event_t __ovld async_work_group_strided_copy(__local uint2 *dst, const __global uint2 *src, size_t num_elements, size_t src_stride, event_t event);
+event_t __ovld async_work_group_strided_copy(__local long2 *dst, const __global long2 *src, size_t num_elements, size_t src_stride, event_t event);
+event_t __ovld async_work_group_strided_copy(__local ulong2 *dst, const __global ulong2 *src, size_t num_elements, size_t src_stride, event_t event);
+event_t __ovld async_work_group_strided_copy(__local float2 *dst, const __global float2 *src, size_t num_elements, size_t src_stride, event_t event);
+event_t __ovld async_work_group_strided_copy(__local char3 *dst, const __global char3 *src, size_t num_elements, size_t src_stride, event_t event);
+event_t __ovld async_work_group_strided_copy(__local uchar3 *dst, const __global uchar3 *src, size_t num_elements, size_t src_stride, event_t event);
+event_t __ovld async_work_group_strided_copy(__local short3 *dst, const __global short3 *src, size_t num_elements, size_t src_stride, event_t event);
+event_t __ovld async_work_group_strided_copy(__local ushort3 *dst, const __global ushort3 *src, size_t num_elements, size_t src_stride, event_t event);
+event_t __ovld async_work_group_strided_copy(__local int3 *dst, const __global int3 *src, size_t num_elements, size_t src_stride, event_t event);
+event_t __ovld async_work_group_strided_copy(__local uint3 *dst, const __global uint3 *src, size_t num_elements, size_t src_stride, event_t event);
+event_t __ovld async_work_group_strided_copy(__local long3 *dst, const __global long3 *src, size_t num_elements, size_t src_stride, event_t event);
+event_t __ovld async_work_group_strided_copy(__local ulong3 *dst, const __global ulong3 *src, size_t num_elements, size_t src_stride, event_t event);
+event_t __ovld async_work_group_strided_copy(__local float3 *dst, const __global float3 *src, size_t num_elements, size_t src_stride, event_t event);
+event_t __ovld async_work_group_strided_copy(__local char4 *dst, const __global char4 *src, size_t num_elements, size_t src_stride, event_t event);
+event_t __ovld async_work_group_strided_copy(__local uchar4 *dst, const __global uchar4 *src, size_t num_elements, size_t src_stride, event_t event);
+event_t __ovld async_work_group_strided_copy(__local short4 *dst, const __global short4 *src, size_t num_elements, size_t src_stride, event_t event);
+event_t __ovld async_work_group_strided_copy(__local ushort4 *dst, const __global ushort4 *src, size_t num_elements, size_t src_stride, event_t event);
+event_t __ovld async_work_group_strided_copy(__local int4 *dst, const __global int4 *src, size_t num_elements, size_t src_stride, event_t event);
+event_t __ovld async_work_group_strided_copy(__local uint4 *dst, const __global uint4 *src, size_t num_elements, size_t src_stride, event_t event);
+event_t __ovld async_work_group_strided_copy(__local long4 *dst, const __global long4 *src, size_t num_elements, size_t src_stride, event_t event);
+event_t __ovld async_work_group_strided_copy(__local ulong4 *dst, const __global ulong4 *src, size_t num_elements, size_t src_stride, event_t event);
+event_t __ovld async_work_group_strided_copy(__local float4 *dst, const __global float4 *src, size_t num_elements, size_t src_stride, event_t event);
+event_t __ovld async_work_group_strided_copy(__local char8 *dst, const __global char8 *src, size_t num_elements, size_t src_stride, event_t event);
+event_t __ovld async_work_group_strided_copy(__local uchar8 *dst, const __global uchar8 *src, size_t num_elements, size_t src_stride, event_t event);
+event_t __ovld async_work_group_strided_copy(__local short8 *dst, const __global short8 *src, size_t num_elements, size_t src_stride, event_t event);
+event_t __ovld async_work_group_strided_copy(__local ushort8 *dst, const __global ushort8 *src, size_t num_elements, size_t src_stride, event_t event);
+event_t __ovld async_work_group_strided_copy(__local int8 *dst, const __global int8 *src, size_t num_elements, size_t src_stride, event_t event);
+event_t __ovld async_work_group_strided_copy(__local uint8 *dst, const __global uint8 *src, size_t num_elements, size_t src_stride, event_t event);
+event_t __ovld async_work_group_strided_copy(__local long8 *dst, const __global long8 *src, size_t num_elements, size_t src_stride, event_t event);
+event_t __ovld async_work_group_strided_copy(__local ulong8 *dst, const __global ulong8 *src, size_t num_elements, size_t src_stride, event_t event);
+event_t __ovld async_work_group_strided_copy(__local float8 *dst, const __global float8 *src, size_t num_elements, size_t src_stride, event_t event);
+event_t __ovld async_work_group_strided_copy(__local char16 *dst, const __global char16 *src, size_t num_elements, size_t src_stride, event_t event);
+event_t __ovld async_work_group_strided_copy(__local uchar16 *dst, const __global uchar16 *src, size_t num_elements, size_t src_stride, event_t event);
+event_t __ovld async_work_group_strided_copy(__local short16 *dst, const __global short16 *src, size_t num_elements, size_t src_stride, event_t event);
+event_t __ovld async_work_group_strided_copy(__local ushort16 *dst, const __global ushort16 *src, size_t num_elements, size_t src_stride, event_t event);
+event_t __ovld async_work_group_strided_copy(__local int16 *dst, const __global int16 *src, size_t num_elements, size_t src_stride, event_t event);
+event_t __ovld async_work_group_strided_copy(__local uint16 *dst, const __global uint16 *src, size_t num_elements, size_t src_stride, event_t event);
+event_t __ovld async_work_group_strided_copy(__local long16 *dst, const __global long16 *src, size_t num_elements, size_t src_stride, event_t event);
+event_t __ovld async_work_group_strided_copy(__local ulong16 *dst, const __global ulong16 *src, size_t num_elements, size_t src_stride, event_t event);
+event_t __ovld async_work_group_strided_copy(__local float16 *dst, const __global float16 *src, size_t num_elements, size_t src_stride, event_t event);
+event_t __ovld async_work_group_strided_copy(__global char *dst, const __local char *src, size_t num_elements, size_t dst_stride, event_t event);
+event_t __ovld async_work_group_strided_copy(__global uchar *dst, const __local uchar *src, size_t num_elements, size_t dst_stride, event_t event);
+event_t __ovld async_work_group_strided_copy(__global short *dst, const __local short *src, size_t num_elements, size_t dst_stride, event_t event);
+event_t __ovld async_work_group_strided_copy(__global ushort *dst, const __local ushort *src, size_t num_elements, size_t dst_stride, event_t event);
+event_t __ovld async_work_group_strided_copy(__global int *dst, const __local int *src, size_t num_elements, size_t dst_stride, event_t event);
+event_t __ovld async_work_group_strided_copy(__global uint *dst, const __local uint *src, size_t num_elements, size_t dst_stride, event_t event);
+event_t __ovld async_work_group_strided_copy(__global long *dst, const __local long *src, size_t num_elements, size_t dst_stride, event_t event);
+event_t __ovld async_work_group_strided_copy(__global ulong *dst, const __local ulong *src, size_t num_elements, size_t dst_stride, event_t event);
+event_t __ovld async_work_group_strided_copy(__global float *dst, const __local float *src, size_t num_elements, size_t dst_stride, event_t event);
+event_t __ovld async_work_group_strided_copy(__global char2 *dst, const __local char2 *src, size_t num_elements, size_t dst_stride, event_t event);
+event_t __ovld async_work_group_strided_copy(__global uchar2 *dst, const __local uchar2 *src, size_t num_elements, size_t dst_stride, event_t event);
+event_t __ovld async_work_group_strided_copy(__global short2 *dst, const __local short2 *src, size_t num_elements, size_t dst_stride, event_t event);
+event_t __ovld async_work_group_strided_copy(__global ushort2 *dst, const __local ushort2 *src, size_t num_elements, size_t dst_stride, event_t event);
+event_t __ovld async_work_group_strided_copy(__global int2 *dst, const __local int2 *src, size_t num_elements, size_t dst_stride, event_t event);
+event_t __ovld async_work_group_strided_copy(__global uint2 *dst, const __local uint2 *src, size_t num_elements, size_t dst_stride, event_t event);
+event_t __ovld async_work_group_strided_copy(__global long2 *dst, const __local long2 *src, size_t num_elements, size_t dst_stride, event_t event);
+event_t __ovld async_work_group_strided_copy(__global ulong2 *dst, const __local ulong2 *src, size_t num_elements, size_t dst_stride, event_t event);
+event_t __ovld async_work_group_strided_copy(__global float2 *dst, const __local float2 *src, size_t num_elements, size_t dst_stride, event_t event);
+event_t __ovld async_work_group_strided_copy(__global char3 *dst, const __local char3 *src, size_t num_elements, size_t dst_stride, event_t event);
+event_t __ovld async_work_group_strided_copy(__global uchar3 *dst, const __local uchar3 *src, size_t num_elements, size_t dst_stride, event_t event);
+event_t __ovld async_work_group_strided_copy(__global short3 *dst, const __local short3 *src, size_t num_elements, size_t dst_stride, event_t event);
+event_t __ovld async_work_group_strided_copy(__global ushort3 *dst, const __local ushort3 *src, size_t num_elements, size_t dst_stride, event_t event);
+event_t __ovld async_work_group_strided_copy(__global int3 *dst, const __local int3 *src, size_t num_elements, size_t dst_stride, event_t event);
+event_t __ovld async_work_group_strided_copy(__global uint3 *dst, const __local uint3 *src, size_t num_elements, size_t dst_stride, event_t event);
+event_t __ovld async_work_group_strided_copy(__global long3 *dst, const __local long3 *src, size_t num_elements, size_t dst_stride, event_t event);
+event_t __ovld async_work_group_strided_copy(__global ulong3 *dst, const __local ulong3 *src, size_t num_elements, size_t dst_stride, event_t event);
+event_t __ovld async_work_group_strided_copy(__global float3 *dst, const __local float3 *src, size_t num_elements, size_t dst_stride, event_t event);
+event_t __ovld async_work_group_strided_copy(__global char4 *dst, const __local char4 *src, size_t num_elements, size_t dst_stride, event_t event);
+event_t __ovld async_work_group_strided_copy(__global uchar4 *dst, const __local uchar4 *src, size_t num_elements, size_t dst_stride, event_t event);
+event_t __ovld async_work_group_strided_copy(__global short4 *dst, const __local short4 *src, size_t num_elements, size_t dst_stride, event_t event);
+event_t __ovld async_work_group_strided_copy(__global ushort4 *dst, const __local ushort4 *src, size_t num_elements, size_t dst_stride, event_t event);
+event_t __ovld async_work_group_strided_copy(__global int4 *dst, const __local int4 *src, size_t num_elements, size_t dst_stride, event_t event);
+event_t __ovld async_work_group_strided_copy(__global uint4 *dst, const __local uint4 *src, size_t num_elements, size_t dst_stride, event_t event);
+event_t __ovld async_work_group_strided_copy(__global long4 *dst, const __local long4 *src, size_t num_elements, size_t dst_stride, event_t event);
+event_t __ovld async_work_group_strided_copy(__global ulong4 *dst, const __local ulong4 *src, size_t num_elements, size_t dst_stride, event_t event);
+event_t __ovld async_work_group_strided_copy(__global float4 *dst, const __local float4 *src, size_t num_elements, size_t dst_stride, event_t event);
+event_t __ovld async_work_group_strided_copy(__global char8 *dst, const __local char8 *src, size_t num_elements, size_t dst_stride, event_t event);
+event_t __ovld async_work_group_strided_copy(__global uchar8 *dst, const __local uchar8 *src, size_t num_elements, size_t dst_stride, event_t event);
+event_t __ovld async_work_group_strided_copy(__global short8 *dst, const __local short8 *src, size_t num_elements, size_t dst_stride, event_t event);
+event_t __ovld async_work_group_strided_copy(__global ushort8 *dst, const __local ushort8 *src, size_t num_elements, size_t dst_stride, event_t event);
+event_t __ovld async_work_group_strided_copy(__global int8 *dst, const __local int8 *src, size_t num_elements, size_t dst_stride, event_t event);
+event_t __ovld async_work_group_strided_copy(__global uint8 *dst, const __local uint8 *src, size_t num_elements, size_t dst_stride, event_t event);
+event_t __ovld async_work_group_strided_copy(__global long8 *dst, const __local long8 *src, size_t num_elements, size_t dst_stride, event_t event);
+event_t __ovld async_work_group_strided_copy(__global ulong8 *dst, const __local ulong8 *src, size_t num_elements, size_t dst_stride, event_t event);
+event_t __ovld async_work_group_strided_copy(__global float8 *dst, const __local float8 *src, size_t num_elements, size_t dst_stride, event_t event);
+event_t __ovld async_work_group_strided_copy(__global char16 *dst, const __local char16 *src, size_t num_elements, size_t dst_stride, event_t event);
+event_t __ovld async_work_group_strided_copy(__global uchar16 *dst, const __local uchar16 *src, size_t num_elements, size_t dst_stride, event_t event);
+event_t __ovld async_work_group_strided_copy(__global short16 *dst, const __local short16 *src, size_t num_elements, size_t dst_stride, event_t event);
+event_t __ovld async_work_group_strided_copy(__global ushort16 *dst, const __local ushort16 *src, size_t num_elements, size_t dst_stride, event_t event);
+event_t __ovld async_work_group_strided_copy(__global int16 *dst, const __local int16 *src, size_t num_elements, size_t dst_stride, event_t event);
+event_t __ovld async_work_group_strided_copy(__global uint16 *dst, const __local uint16 *src, size_t num_elements, size_t dst_stride, event_t event);
+event_t __ovld async_work_group_strided_copy(__global long16 *dst, const __local long16 *src, size_t num_elements, size_t dst_stride, event_t event);
+event_t __ovld async_work_group_strided_copy(__global ulong16 *dst, const __local ulong16 *src, size_t num_elements, size_t dst_stride, event_t event);
+event_t __ovld async_work_group_strided_copy(__global float16 *dst, const __local float16 *src, size_t num_elements, size_t dst_stride, event_t event);
+#ifdef cl_khr_fp64
+event_t __ovld async_work_group_strided_copy(__local double *dst, const __global double *src, size_t num_elements, size_t src_stride, event_t event);
+event_t __ovld async_work_group_strided_copy(__local double2 *dst, const __global double2 *src, size_t num_elements, size_t src_stride, event_t event);
+event_t __ovld async_work_group_strided_copy(__local double3 *dst, const __global double3 *src, size_t num_elements, size_t src_stride, event_t event);
+event_t __ovld async_work_group_strided_copy(__local double4 *dst, const __global double4 *src, size_t num_elements, size_t src_stride, event_t event);
+event_t __ovld async_work_group_strided_copy(__local double8 *dst, const __global double8 *src, size_t num_elements, size_t src_stride, event_t event);
+event_t __ovld async_work_group_strided_copy(__local double16 *dst, const __global double16 *src, size_t num_elements, size_t src_stride, event_t event);
+event_t __ovld async_work_group_strided_copy(__global double *dst, const __local double *src, size_t num_elements, size_t dst_stride, event_t event);
+event_t __ovld async_work_group_strided_copy(__global double2 *dst, const __local double2 *src, size_t num_elements, size_t dst_stride, event_t event);
+event_t __ovld async_work_group_strided_copy(__global double3 *dst, const __local double3 *src, size_t num_elements, size_t dst_stride, event_t event);
+event_t __ovld async_work_group_strided_copy(__global double4 *dst, const __local double4 *src, size_t num_elements, size_t dst_stride, event_t event);
+event_t __ovld async_work_group_strided_copy(__global double8 *dst, const __local double8 *src, size_t num_elements, size_t dst_stride, event_t event);
+event_t __ovld async_work_group_strided_copy(__global double16 *dst, const __local double16 *src, size_t num_elements, size_t dst_stride, event_t event);
+#endif //cl_khr_fp64
+#ifdef cl_khr_fp16
+event_t __ovld async_work_group_strided_copy(__local half *dst, const __global half *src, size_t num_elements, size_t src_stride, event_t event);
+event_t __ovld async_work_group_strided_copy(__local half2 *dst, const __global half2 *src, size_t num_elements, size_t src_stride, event_t event);
+event_t __ovld async_work_group_strided_copy(__local half3 *dst, const __global half3 *src, size_t num_elements, size_t src_stride, event_t event);
+event_t __ovld async_work_group_strided_copy(__local half4 *dst, const __global half4 *src, size_t num_elements, size_t src_stride, event_t event);
+event_t __ovld async_work_group_strided_copy(__local half8 *dst, const __global half8 *src, size_t num_elements, size_t src_stride, event_t event);
+event_t __ovld async_work_group_strided_copy(__local half16 *dst, const __global half16 *src, size_t num_elements, size_t src_stride, event_t event);
+event_t __ovld async_work_group_strided_copy(__global half *dst, const __local half *src, size_t num_elements, size_t dst_stride, event_t event);
+event_t __ovld async_work_group_strided_copy(__global half2 *dst, const __local half2 *src, size_t num_elements, size_t dst_stride, event_t event);
+event_t __ovld async_work_group_strided_copy(__global half3 *dst, const __local half3 *src, size_t num_elements, size_t dst_stride, event_t event);
+event_t __ovld async_work_group_strided_copy(__global half4 *dst, const __local half4 *src, size_t num_elements, size_t dst_stride, event_t event);
+event_t __ovld async_work_group_strided_copy(__global half8 *dst, const __local half8 *src, size_t num_elements, size_t dst_stride, event_t event);
+event_t __ovld async_work_group_strided_copy(__global half16 *dst, const __local half16 *src, size_t num_elements, size_t dst_stride, event_t event);
+#endif //cl_khr_fp16
+
+/**
+ * Wait for events that identify the
+ * async_work_group_copy operations to
+ * complete. The event objects specified in
+ * event_list will be released after the wait is
+ * performed.
+ * This function must be encountered by all workitems
+ * in a work-group executing the kernel with
+ * the same num_events and event objects specified
+ * in event_list; otherwise the results are undefined.
+ */
+void __ovld wait_group_events(int num_events, event_t *event_list);
+
+/**
+ * Prefetch num_elements * sizeof(gentype)
+ * bytes into the global cache. The prefetch
+ * instruction is applied to a work-item in a workgroup
+ * and does not affect the functional
+ * behavior of the kernel.
+ */
+void __ovld prefetch(const __global char *p, size_t num_elements);
+void __ovld prefetch(const __global uchar *p, size_t num_elements);
+void __ovld prefetch(const __global short *p, size_t num_elements);
+void __ovld prefetch(const __global ushort *p, size_t num_elements);
+void __ovld prefetch(const __global int *p, size_t num_elements);
+void __ovld prefetch(const __global uint *p, size_t num_elements);
+void __ovld prefetch(const __global long *p, size_t num_elements);
+void __ovld prefetch(const __global ulong *p, size_t num_elements);
+void __ovld prefetch(const __global float *p, size_t num_elements);
+void __ovld prefetch(const __global char2 *p, size_t num_elements);
+void __ovld prefetch(const __global uchar2 *p, size_t num_elements);
+void __ovld prefetch(const __global short2 *p, size_t num_elements);
+void __ovld prefetch(const __global ushort2 *p, size_t num_elements);
+void __ovld prefetch(const __global int2 *p, size_t num_elements);
+void __ovld prefetch(const __global uint2 *p, size_t num_elements);
+void __ovld prefetch(const __global long2 *p, size_t num_elements);
+void __ovld prefetch(const __global ulong2 *p, size_t num_elements);
+void __ovld prefetch(const __global float2 *p, size_t num_elements);
+void __ovld prefetch(const __global char3 *p, size_t num_elements);
+void __ovld prefetch(const __global uchar3 *p, size_t num_elements);
+void __ovld prefetch(const __global short3 *p, size_t num_elements);
+void __ovld prefetch(const __global ushort3 *p, size_t num_elements);
+void __ovld prefetch(const __global int3 *p, size_t num_elements);
+void __ovld prefetch(const __global uint3 *p, size_t num_elements);
+void __ovld prefetch(const __global long3 *p, size_t num_elements);
+void __ovld prefetch(const __global ulong3 *p, size_t num_elements);
+void __ovld prefetch(const __global float3 *p, size_t num_elements);
+void __ovld prefetch(const __global char4 *p, size_t num_elements);
+void __ovld prefetch(const __global uchar4 *p, size_t num_elements);
+void __ovld prefetch(const __global short4 *p, size_t num_elements);
+void __ovld prefetch(const __global ushort4 *p, size_t num_elements);
+void __ovld prefetch(const __global int4 *p, size_t num_elements);
+void __ovld prefetch(const __global uint4 *p, size_t num_elements);
+void __ovld prefetch(const __global long4 *p, size_t num_elements);
+void __ovld prefetch(const __global ulong4 *p, size_t num_elements);
+void __ovld prefetch(const __global float4 *p, size_t num_elements);
+void __ovld prefetch(const __global char8 *p, size_t num_elements);
+void __ovld prefetch(const __global uchar8 *p, size_t num_elements);
+void __ovld prefetch(const __global short8 *p, size_t num_elements);
+void __ovld prefetch(const __global ushort8 *p, size_t num_elements);
+void __ovld prefetch(const __global int8 *p, size_t num_elements);
+void __ovld prefetch(const __global uint8 *p, size_t num_elements);
+void __ovld prefetch(const __global long8 *p, size_t num_elements);
+void __ovld prefetch(const __global ulong8 *p, size_t num_elements);
+void __ovld prefetch(const __global float8 *p, size_t num_elements);
+void __ovld prefetch(const __global char16 *p, size_t num_elements);
+void __ovld prefetch(const __global uchar16 *p, size_t num_elements);
+void __ovld prefetch(const __global short16 *p, size_t num_elements);
+void __ovld prefetch(const __global ushort16 *p, size_t num_elements);
+void __ovld prefetch(const __global int16 *p, size_t num_elements);
+void __ovld prefetch(const __global uint16 *p, size_t num_elements);
+void __ovld prefetch(const __global long16 *p, size_t num_elements);
+void __ovld prefetch(const __global ulong16 *p, size_t num_elements);
+void __ovld prefetch(const __global float16 *p, size_t num_elements);
+#ifdef cl_khr_fp64
+void __ovld prefetch(const __global double *p, size_t num_elements);
+void __ovld prefetch(const __global double2 *p, size_t num_elements);
+void __ovld prefetch(const __global double3 *p, size_t num_elements);
+void __ovld prefetch(const __global double4 *p, size_t num_elements);
+void __ovld prefetch(const __global double8 *p, size_t num_elements);
+void __ovld prefetch(const __global double16 *p, size_t num_elements);
+#endif //cl_khr_fp64
+#ifdef cl_khr_fp16
+void __ovld prefetch(const __global half *p, size_t num_elements);
+void __ovld prefetch(const __global half2 *p, size_t num_elements);
+void __ovld prefetch(const __global half3 *p, size_t num_elements);
+void __ovld prefetch(const __global half4 *p, size_t num_elements);
+void __ovld prefetch(const __global half8 *p, size_t num_elements);
+void __ovld prefetch(const __global half16 *p, size_t num_elements);
+#endif // cl_khr_fp16
+
+// OpenCL v1.1 s6.11.1, v1.2 s6.12.11 - Atomic Functions
+
+#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics)
+#pragma OPENCL EXTENSION cl_khr_int64_base_atomics : enable
+#pragma OPENCL EXTENSION cl_khr_int64_extended_atomics : enable
+#endif
+/**
+ * Read the 32-bit value (referred to as old)
+ * stored at location pointed by p. Compute
+ * (old + val) and store result at location
+ * pointed by p. The function returns old.
+ */
+int __ovld atomic_add(volatile __global int *p, int val);
+unsigned int __ovld atomic_add(volatile __global unsigned int *p, unsigned int val);
+int __ovld atomic_add(volatile __local int *p, int val);
+unsigned int __ovld atomic_add(volatile __local unsigned int *p, unsigned int val);
+
+#if defined(cl_khr_global_int32_base_atomics)
+int __ovld atom_add(volatile __global int *p, int val);
+unsigned int __ovld atom_add(volatile __global unsigned int *p, unsigned int val);
+#endif
+#if defined(cl_khr_local_int32_base_atomics)
+int __ovld atom_add(volatile __local int *p, int val);
+unsigned int __ovld atom_add(volatile __local unsigned int *p, unsigned int val);
+#endif
+
+#if defined(cl_khr_int64_base_atomics)
+long __ovld atom_add(volatile __global long *p, long val);
+unsigned long __ovld atom_add(volatile __global unsigned long *p, unsigned long val);
+long __ovld atom_add(volatile __local long *p, long val);
+unsigned long __ovld atom_add(volatile __local unsigned long *p, unsigned long val);
+#endif
+
+/**
+ * Read the 32-bit value (referred to as old) stored at location pointed by p.
+ * Compute (old - val) and store result at location pointed by p. The function
+ * returns old.
+ */
+int __ovld atomic_sub(volatile __global int *p, int val);
+unsigned int __ovld atomic_sub(volatile __global unsigned int *p, unsigned int val);
+int __ovld atomic_sub(volatile __local int *p, int val);
+unsigned int __ovld atomic_sub(volatile __local unsigned int *p, unsigned int val);
+
+#if defined(cl_khr_global_int32_base_atomics)
+int __ovld atom_sub(volatile __global int *p, int val);
+unsigned int __ovld atom_sub(volatile __global unsigned int *p, unsigned int val);
+#endif
+#if defined(cl_khr_local_int32_base_atomics)
+int __ovld atom_sub(volatile __local int *p, int val);
+unsigned int __ovld atom_sub(volatile __local unsigned int *p, unsigned int val);
+#endif
+
+#if defined(cl_khr_int64_base_atomics)
+long __ovld atom_sub(volatile __global long *p, long val);
+unsigned long __ovld atom_sub(volatile __global unsigned long *p, unsigned long val);
+long __ovld atom_sub(volatile __local long *p, long val);
+unsigned long __ovld atom_sub(volatile __local unsigned long *p, unsigned long val);
+#endif
+
+/**
+ * Swaps the old value stored at location p
+ * with new value given by val. Returns old
+ * value.
+ */
+int __ovld atomic_xchg(volatile __global int *p, int val);
+unsigned int __ovld atomic_xchg(volatile __global unsigned int *p, unsigned int val);
+int __ovld atomic_xchg(volatile __local int *p, int val);
+unsigned int __ovld atomic_xchg(volatile __local unsigned int *p, unsigned int val);
+float __ovld atomic_xchg(volatile __global float *p, float val);
+float __ovld atomic_xchg(volatile __local float *p, float val);
+
+#if defined(cl_khr_global_int32_base_atomics)
+int __ovld atom_xchg(volatile __global int *p, int val);
+int __ovld atom_xchg(volatile __local int *p, int val);
+#endif
+#if defined(cl_khr_local_int32_base_atomics)
+unsigned int __ovld atom_xchg(volatile __global unsigned int *p, unsigned int val);
+unsigned int __ovld atom_xchg(volatile __local unsigned int *p, unsigned int val);
+#endif
+
+#if defined(cl_khr_int64_base_atomics)
+long __ovld atom_xchg(volatile __global long *p, long val);
+long __ovld atom_xchg(volatile __local long *p, long val);
+unsigned long __ovld atom_xchg(volatile __global unsigned long *p, unsigned long val);
+unsigned long __ovld atom_xchg(volatile __local unsigned long *p, unsigned long val);
+#endif
+
+/**
+ * Read the 32-bit value (referred to as old)
+ * stored at location pointed by p. Compute
+ * (old + 1) and store result at location
+ * pointed by p. The function returns old.
+ */
+int __ovld atomic_inc(volatile __global int *p);
+unsigned int __ovld atomic_inc(volatile __global unsigned int *p);
+int __ovld atomic_inc(volatile __local int *p);
+unsigned int __ovld atomic_inc(volatile __local unsigned int *p);
+
+#if defined(cl_khr_global_int32_base_atomics)
+int __ovld atom_inc(volatile __global int *p);
+unsigned int __ovld atom_inc(volatile __global unsigned int *p);
+#endif
+#if defined(cl_khr_local_int32_base_atomics)
+int __ovld atom_inc(volatile __local int *p);
+unsigned int __ovld atom_inc(volatile __local unsigned int *p);
+#endif
+
+#if defined(cl_khr_int64_base_atomics)
+long __ovld atom_inc(volatile __global long *p);
+unsigned long __ovld atom_inc(volatile __global unsigned long *p);
+long __ovld atom_inc(volatile __local long *p);
+unsigned long __ovld atom_inc(volatile __local unsigned long *p);
+#endif
+
+/**
+ * Read the 32-bit value (referred to as old)
+ * stored at location pointed by p. Compute
+ * (old - 1) and store result at location
+ * pointed by p. The function returns old.
+ */
+int __ovld atomic_dec(volatile __global int *p);
+unsigned int __ovld atomic_dec(volatile __global unsigned int *p);
+int __ovld atomic_dec(volatile __local int *p);
+unsigned int __ovld atomic_dec(volatile __local unsigned int *p);
+
+#if defined(cl_khr_global_int32_base_atomics)
+int __ovld atom_dec(volatile __global int *p);
+unsigned int __ovld atom_dec(volatile __global unsigned int *p);
+#endif
+#if defined(cl_khr_local_int32_base_atomics)
+int __ovld atom_dec(volatile __local int *p);
+unsigned int __ovld atom_dec(volatile __local unsigned int *p);
+#endif
+
+#if defined(cl_khr_int64_base_atomics)
+long __ovld atom_dec(volatile __global long *p);
+unsigned long __ovld atom_dec(volatile __global unsigned long *p);
+long __ovld atom_dec(volatile __local long *p);
+unsigned long __ovld atom_dec(volatile __local unsigned long *p);
+#endif
+
+/**
+ * Read the 32-bit value (referred to as old)
+ * stored at location pointed by p. Compute
+ * (old == cmp) ? val : old and store result at
+ * location pointed by p. The function
+ * returns old.
+ */
+int __ovld atomic_cmpxchg(volatile __global int *p, int cmp, int val);
+unsigned int __ovld atomic_cmpxchg(volatile __global unsigned int *p, unsigned int cmp, unsigned int val);
+int __ovld atomic_cmpxchg(volatile __local int *p, int cmp, int val);
+unsigned int __ovld atomic_cmpxchg(volatile __local unsigned int *p, unsigned int cmp, unsigned int val);
+
+#if defined(cl_khr_global_int32_base_atomics)
+int __ovld atom_cmpxchg(volatile __global int *p, int cmp, int val);
+unsigned int __ovld atom_cmpxchg(volatile __global unsigned int *p, unsigned int cmp, unsigned int val);
+#endif
+#if defined(cl_khr_local_int32_base_atomics)
+int __ovld atom_cmpxchg(volatile __local int *p, int cmp, int val);
+unsigned int __ovld atom_cmpxchg(volatile __local unsigned int *p, unsigned int cmp, unsigned int val);
+#endif
+
+#if defined(cl_khr_int64_base_atomics)
+long __ovld atom_cmpxchg(volatile __global long *p, long cmp, long val);
+unsigned long __ovld atom_cmpxchg(volatile __global unsigned long *p, unsigned long cmp, unsigned long val);
+long __ovld atom_cmpxchg(volatile __local long *p, long cmp, long val);
+unsigned long __ovld atom_cmpxchg(volatile __local unsigned long *p, unsigned long cmp, unsigned long val);
+#endif
+
+/**
+ * Read the 32-bit value (referred to as old)
+ * stored at location pointed by p. Compute
+ * min(old, val) and store minimum value at
+ * location pointed by p. The function
+ * returns old.
+ */
+int __ovld atomic_min(volatile __global int *p, int val);
+unsigned int __ovld atomic_min(volatile __global unsigned int *p, unsigned int val);
+int __ovld atomic_min(volatile __local int *p, int val);
+unsigned int __ovld atomic_min(volatile __local unsigned int *p, unsigned int val);
+
+#if defined(cl_khr_global_int32_extended_atomics)
+int __ovld atom_min(volatile __global int *p, int val);
+unsigned int __ovld atom_min(volatile __global unsigned int *p, unsigned int val);
+#endif
+#if defined(cl_khr_local_int32_extended_atomics)
+int __ovld atom_min(volatile __local int *p, int val);
+unsigned int __ovld atom_min(volatile __local unsigned int *p, unsigned int val);
+#endif
+
+#if defined(cl_khr_int64_extended_atomics)
+long __ovld atom_min(volatile __global long *p, long val);
+unsigned long __ovld atom_min(volatile __global unsigned long *p, unsigned long val);
+#endif
+#if defined(cl_khr_local_int32_extended_atomics)
+long __ovld atom_min(volatile __local long *p, long val);
+unsigned long __ovld atom_min(volatile __local unsigned long *p, unsigned long val);
+#endif
+
+/**
+ * Read the 32-bit value (referred to as old)
+ * stored at location pointed by p. Compute
+ * max(old, val) and store maximum value at
+ * location pointed by p. The function
+ * returns old.
+ */
+int __ovld atomic_max(volatile __global int *p, int val);
+unsigned int __ovld atomic_max(volatile __global unsigned int *p, unsigned int val);
+int __ovld atomic_max(volatile __local int *p, int val);
+unsigned int __ovld atomic_max(volatile __local unsigned int *p, unsigned int val);
+
+#if defined(cl_khr_global_int32_extended_atomics)
+int __ovld atom_max(volatile __global int *p, int val);
+unsigned int __ovld atom_max(volatile __global unsigned int *p, unsigned int val);
+#endif
+#if defined(cl_khr_local_int32_extended_atomics)
+int __ovld atom_max(volatile __local int *p, int val);
+unsigned int __ovld atom_max(volatile __local unsigned int *p, unsigned int val);
+#endif
+
+#if defined(cl_khr_int64_extended_atomics)
+long __ovld atom_max(volatile __global long *p, long val);
+unsigned long __ovld atom_max(volatile __global unsigned long *p, unsigned long val);
+long __ovld atom_max(volatile __local long *p, long val);
+unsigned long __ovld atom_max(volatile __local unsigned long *p, unsigned long val);
+#endif
+
+/**
+ * Read the 32-bit value (referred to as old)
+ * stored at location pointed by p. Compute
+ * (old & val) and store result at location
+ * pointed by p. The function returns old.
+ */
+int __ovld atomic_and(volatile __global int *p, int val);
+unsigned int __ovld atomic_and(volatile __global unsigned int *p, unsigned int val);
+int __ovld atomic_and(volatile __local int *p, int val);
+unsigned int __ovld atomic_and(volatile __local unsigned int *p, unsigned int val);
+
+#if defined(cl_khr_global_int32_extended_atomics)
+int __ovld atom_and(volatile __global int *p, int val);
+unsigned int __ovld atom_and(volatile __global unsigned int *p, unsigned int val);
+#endif
+#if defined(cl_khr_local_int32_extended_atomics)
+int __ovld atom_and(volatile __local int *p, int val);
+unsigned int __ovld atom_and(volatile __local unsigned int *p, unsigned int val);
+#endif
+
+#if defined(cl_khr_int64_extended_atomics)
+long __ovld atom_and(volatile __global long *p, long val);
+unsigned long __ovld atom_and(volatile __global unsigned long *p, unsigned long val);
+long __ovld atom_and(volatile __local long *p, long val);
+unsigned long __ovld atom_and(volatile __local unsigned long *p, unsigned long val);
+#endif
+
+/**
+ * Read the 32-bit value (referred to as old)
+ * stored at location pointed by p. Compute
+ * (old | val) and store result at location
+ * pointed by p. The function returns old.
+ */
+int __ovld atomic_or(volatile __global int *p, int val);
+unsigned int __ovld atomic_or(volatile __global unsigned int *p, unsigned int val);
+int __ovld atomic_or(volatile __local int *p, int val);
+unsigned int __ovld atomic_or(volatile __local unsigned int *p, unsigned int val);
+
+#if defined(cl_khr_global_int32_extended_atomics)
+int __ovld atom_or(volatile __global int *p, int val);
+unsigned int __ovld atom_or(volatile __global unsigned int *p, unsigned int val);
+#endif
+#if defined(cl_khr_local_int32_extended_atomics)
+int __ovld atom_or(volatile __local int *p, int val);
+unsigned int __ovld atom_or(volatile __local unsigned int *p, unsigned int val);
+#endif
+
+#if defined(cl_khr_int64_extended_atomics)
+long __ovld atom_or(volatile __global long *p, long val);
+unsigned long __ovld atom_or(volatile __global unsigned long *p, unsigned long val);
+long __ovld atom_or(volatile __local long *p, long val);
+unsigned long __ovld atom_or(volatile __local unsigned long *p, unsigned long val);
+#endif
+
+/**
+ * Read the 32-bit value (referred to as old)
+ * stored at location pointed by p. Compute
+ * (old ^ val) and store result at location
+ * pointed by p. The function returns old.
+ */
+int __ovld atomic_xor(volatile __global int *p, int val);
+unsigned int __ovld atomic_xor(volatile __global unsigned int *p, unsigned int val);
+int __ovld atomic_xor(volatile __local int *p, int val);
+unsigned int __ovld atomic_xor(volatile __local unsigned int *p, unsigned int val);
+
+#if defined(cl_khr_global_int32_extended_atomics)
+int __ovld atom_xor(volatile __global int *p, int val);
+unsigned int __ovld atom_xor(volatile __global unsigned int *p, unsigned int val);
+#endif
+#if defined(cl_khr_local_int32_extended_atomics)
+int __ovld atom_xor(volatile __local int *p, int val);
+unsigned int __ovld atom_xor(volatile __local unsigned int *p, unsigned int val);
+#endif
+
+#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics)
+#pragma OPENCL EXTENSION cl_khr_int64_base_atomics : disable
+#pragma OPENCL EXTENSION cl_khr_int64_extended_atomics : disable
+#endif
+
+// OpenCL v2.0 s6.13.11 - Atomics Functions
+
+#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0
+#ifndef ATOMIC_VAR_INIT
+#define ATOMIC_VAR_INIT(x) (x)
+#endif //ATOMIC_VAR_INIT
+#define ATOMIC_FLAG_INIT 0
+
+// enum values aligned with what clang uses in EmitAtomicExpr()
+typedef enum memory_order
+{
+  memory_order_relaxed,
+  memory_order_acquire,
+  memory_order_release,
+  memory_order_acq_rel,
+  memory_order_seq_cst
+} memory_order;
+
+// double atomics support requires extensions cl_khr_int64_base_atomics and cl_khr_int64_extended_atomics
+#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics)
+#pragma OPENCL EXTENSION cl_khr_int64_base_atomics : enable
+#pragma OPENCL EXTENSION cl_khr_int64_extended_atomics : enable
+#endif
+
+// atomic_init()
+void __ovld atomic_init(volatile atomic_int *object, int value);
+void __ovld atomic_init(volatile atomic_uint *object, uint value);
+void __ovld atomic_init(volatile atomic_float *object, float value);
+#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics)
+void __ovld atomic_init(volatile atomic_long *object, long value);
+void __ovld atomic_init(volatile atomic_ulong *object, ulong value);
+#ifdef cl_khr_fp64
+void __ovld atomic_init(volatile atomic_double *object, double value);
+#endif //cl_khr_fp64
+#endif
+
+// atomic_work_item_fence()
+void __ovld atomic_work_item_fence(cl_mem_fence_flags flags, memory_order order, memory_scope scope);
+
+// atomic_fetch()
+
+int __ovld atomic_fetch_add(volatile atomic_int *object, int operand);
+int __ovld atomic_fetch_add_explicit(volatile atomic_int *object, int operand, memory_order order);
+int __ovld atomic_fetch_add_explicit(volatile atomic_int *object, int operand, memory_order order, memory_scope scope);
+uint __ovld atomic_fetch_add(volatile atomic_uint *object, uint operand);
+uint __ovld atomic_fetch_add_explicit(volatile atomic_uint *object, uint operand, memory_order order);
+uint __ovld atomic_fetch_add_explicit(volatile atomic_uint *object, uint operand, memory_order order, memory_scope scope);
+int __ovld atomic_fetch_sub(volatile atomic_int *object, int operand);
+int __ovld atomic_fetch_sub_explicit(volatile atomic_int *object, int operand, memory_order order);
+int __ovld atomic_fetch_sub_explicit(volatile atomic_int *object, int operand, memory_order order, memory_scope scope);
+uint __ovld atomic_fetch_sub(volatile atomic_uint *object, uint operand);
+uint __ovld atomic_fetch_sub_explicit(volatile atomic_uint *object, uint operand, memory_order order);
+uint __ovld atomic_fetch_sub_explicit(volatile atomic_uint *object, uint operand, memory_order order, memory_scope scope);
+int __ovld atomic_fetch_or(volatile atomic_int *object, int operand);
+int __ovld atomic_fetch_or_explicit(volatile atomic_int *object, int operand, memory_order order);
+int __ovld atomic_fetch_or_explicit(volatile atomic_int *object, int operand, memory_order order, memory_scope scope);
+uint __ovld atomic_fetch_or(volatile atomic_uint *object, uint operand);
+uint __ovld atomic_fetch_or_explicit(volatile atomic_uint *object, uint operand, memory_order order);
+uint __ovld atomic_fetch_or_explicit(volatile atomic_uint *object, uint operand, memory_order order, memory_scope scope);
+int __ovld atomic_fetch_xor(volatile atomic_int *object, int operand);
+int __ovld atomic_fetch_xor_explicit(volatile atomic_int *object, int operand, memory_order order);
+int __ovld atomic_fetch_xor_explicit(volatile atomic_int *object, int operand, memory_order order, memory_scope scope);
+uint __ovld atomic_fetch_xor(volatile atomic_uint *object, uint operand);
+uint __ovld atomic_fetch_xor_explicit(volatile atomic_uint *object, uint operand, memory_order order);
+uint __ovld atomic_fetch_xor_explicit(volatile atomic_uint *object, uint operand, memory_order order, memory_scope scope);
+int __ovld atomic_fetch_and(volatile atomic_int *object, int operand);
+int __ovld atomic_fetch_and_explicit(volatile atomic_int *object, int operand, memory_order order);
+int __ovld atomic_fetch_and_explicit(volatile atomic_int *object, int operand, memory_order order, memory_scope scope);
+uint __ovld atomic_fetch_and(volatile atomic_uint *object, uint operand);
+uint __ovld atomic_fetch_and_explicit(volatile atomic_uint *object, uint operand, memory_order order);
+uint __ovld atomic_fetch_and_explicit(volatile atomic_uint *object, uint operand, memory_order order, memory_scope scope);
+int __ovld atomic_fetch_min(volatile atomic_int *object, int operand);
+int __ovld atomic_fetch_min_explicit(volatile atomic_int *object, int operand, memory_order order);
+int __ovld atomic_fetch_min_explicit(volatile atomic_int *object, int operand, memory_order order, memory_scope scope);
+uint __ovld atomic_fetch_min(volatile atomic_uint *object, uint operand);
+uint __ovld atomic_fetch_min_explicit(volatile atomic_uint *object, uint operand, memory_order order);
+uint __ovld atomic_fetch_min_explicit(volatile atomic_uint *object, uint operand, memory_order order, memory_scope scope);
+uint __ovld atomic_fetch_min(volatile atomic_uint *object, int operand);
+uint __ovld atomic_fetch_min_explicit(volatile atomic_uint *object, int operand, memory_order order);
+uint __ovld atomic_fetch_min_explicit(volatile atomic_uint *object, int operand, memory_order order, memory_scope scope);
+int __ovld atomic_fetch_max(volatile atomic_int *object, int operand);
+int __ovld atomic_fetch_max_explicit(volatile atomic_int *object, int operand, memory_order order);
+int __ovld atomic_fetch_max_explicit(volatile atomic_int *object, int operand, memory_order order, memory_scope scope);
+uint __ovld atomic_fetch_max(volatile atomic_uint *object, uint operand);
+uint __ovld atomic_fetch_max_explicit(volatile atomic_uint *object, uint operand, memory_order order);
+uint __ovld atomic_fetch_max_explicit(volatile atomic_uint *object, uint operand, memory_order order, memory_scope scope);
+uint __ovld atomic_fetch_max(volatile atomic_uint *object, int operand);
+uint __ovld atomic_fetch_max_explicit(volatile atomic_uint *object, int operand, memory_order order);
+uint __ovld atomic_fetch_max_explicit(volatile atomic_uint *object, int operand, memory_order order, memory_scope scope);
+
+#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics)
+long __ovld atomic_fetch_add(volatile atomic_long *object, long operand);
+long __ovld atomic_fetch_add_explicit(volatile atomic_long *object, long operand, memory_order order);
+long __ovld atomic_fetch_add_explicit(volatile atomic_long *object, long operand, memory_order order, memory_scope scope);
+ulong __ovld atomic_fetch_add(volatile atomic_ulong *object, ulong operand);
+ulong __ovld atomic_fetch_add_explicit(volatile atomic_ulong *object, ulong operand, memory_order order);
+ulong __ovld atomic_fetch_add_explicit(volatile atomic_ulong *object, ulong operand, memory_order order, memory_scope scope);
+long __ovld atomic_fetch_sub(volatile atomic_long *object, long operand);
+long __ovld atomic_fetch_sub_explicit(volatile atomic_long *object, long operand, memory_order order);
+long __ovld atomic_fetch_sub_explicit(volatile atomic_long *object, long operand, memory_order order, memory_scope scope);
+ulong __ovld atomic_fetch_sub(volatile atomic_ulong *object, ulong operand);
+ulong __ovld atomic_fetch_sub_explicit(volatile atomic_ulong *object, ulong operand, memory_order order);
+ulong __ovld atomic_fetch_sub_explicit(volatile atomic_ulong *object, ulong operand, memory_order order, memory_scope scope);
+long __ovld atomic_fetch_or(volatile atomic_long *object, long operand);
+long __ovld atomic_fetch_or_explicit(volatile atomic_long *object, long operand, memory_order order);
+long __ovld atomic_fetch_or_explicit(volatile atomic_long *object, long operand, memory_order order, memory_scope scope);
+ulong __ovld atomic_fetch_or(volatile atomic_ulong *object, ulong operand);
+ulong __ovld atomic_fetch_or_explicit(volatile atomic_ulong *object, ulong operand, memory_order order);
+ulong __ovld atomic_fetch_or_explicit(volatile atomic_ulong *object, ulong operand, memory_order order, memory_scope scope);
+long __ovld atomic_fetch_xor(volatile atomic_long *object, long operand);
+long __ovld atomic_fetch_xor_explicit(volatile atomic_long *object, long operand, memory_order order);
+long __ovld atomic_fetch_xor_explicit(volatile atomic_long *object, long operand, memory_order order, memory_scope scope);
+ulong __ovld atomic_fetch_xor(volatile atomic_ulong *object, ulong operand);
+ulong __ovld atomic_fetch_xor_explicit(volatile atomic_ulong *object, ulong operand, memory_order order);
+ulong __ovld atomic_fetch_xor_explicit(volatile atomic_ulong *object, ulong operand, memory_order order, memory_scope scope);
+long __ovld atomic_fetch_and(volatile atomic_long *object, long operand);
+long __ovld atomic_fetch_and_explicit(volatile atomic_long *object, long operand, memory_order order);
+long __ovld atomic_fetch_and_explicit(volatile atomic_long *object, long operand, memory_order order, memory_scope scope);
+ulong __ovld atomic_fetch_and(volatile atomic_ulong *object, ulong operand);
+ulong __ovld atomic_fetch_and_explicit(volatile atomic_ulong *object, ulong operand, memory_order order);
+ulong __ovld atomic_fetch_and_explicit(volatile atomic_ulong *object, ulong operand, memory_order order, memory_scope scope);
+long __ovld atomic_fetch_min(volatile atomic_long *object, long operand);
+long __ovld atomic_fetch_min_explicit(volatile atomic_long *object, long operand, memory_order order);
+long __ovld atomic_fetch_min_explicit(volatile atomic_long *object, long operand, memory_order order, memory_scope scope);
+ulong __ovld atomic_fetch_min(volatile atomic_ulong *object, ulong operand);
+ulong __ovld atomic_fetch_min_explicit(volatile atomic_ulong *object, ulong operand, memory_order order);
+ulong __ovld atomic_fetch_min_explicit(volatile atomic_ulong *object, ulong operand, memory_order order, memory_scope scope);
+ulong __ovld atomic_fetch_min(volatile atomic_ulong *object, long operand);
+ulong __ovld atomic_fetch_min_explicit(volatile atomic_ulong *object, long operand, memory_order order);
+ulong __ovld atomic_fetch_min_explicit(volatile atomic_ulong *object, long operand, memory_order order, memory_scope scope);
+long __ovld atomic_fetch_max(volatile atomic_long *object, long operand);
+long __ovld atomic_fetch_max_explicit(volatile atomic_long *object, long operand, memory_order order);
+long __ovld atomic_fetch_max_explicit(volatile atomic_long *object, long operand, memory_order order, memory_scope scope);
+ulong __ovld atomic_fetch_max(volatile atomic_ulong *object, ulong operand);
+ulong __ovld atomic_fetch_max_explicit(volatile atomic_ulong *object, ulong operand, memory_order order);
+ulong __ovld atomic_fetch_max_explicit(volatile atomic_ulong *object, ulong operand, memory_order order, memory_scope scope);
+ulong __ovld atomic_fetch_max(volatile atomic_ulong *object, long operand);
+ulong __ovld atomic_fetch_max_explicit(volatile atomic_ulong *object, long operand, memory_order order);
+ulong __ovld atomic_fetch_max_explicit(volatile atomic_ulong *object, long operand, memory_order order, memory_scope scope);
+#endif //defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics)
+
+// OpenCL v2.0 s6.13.11.7.5:
+// add/sub: atomic type argument can be uintptr_t/intptr_t, value type argument can be ptrdiff_t.
+// or/xor/and/min/max: atomic type argument can be intptr_t/uintptr_t, value type argument can be intptr_t/uintptr_t.
+
+#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) 
+uintptr_t __ovld atomic_fetch_add(volatile atomic_uintptr_t *object, ptrdiff_t operand);
+uintptr_t __ovld atomic_fetch_add_explicit(volatile atomic_uintptr_t *object, ptrdiff_t operand, memory_order order);
+uintptr_t __ovld atomic_fetch_add_explicit(volatile atomic_uintptr_t *object, ptrdiff_t operand, memory_order order, memory_scope scope);
+uintptr_t __ovld atomic_fetch_sub(volatile atomic_uintptr_t *object, ptrdiff_t operand);
+uintptr_t __ovld atomic_fetch_sub_explicit(volatile atomic_uintptr_t *object, ptrdiff_t operand, memory_order order);
+uintptr_t __ovld atomic_fetch_sub_explicit(volatile atomic_uintptr_t *object, ptrdiff_t operand, memory_order order, memory_scope scope);
+
+uintptr_t __ovld atomic_fetch_or(volatile atomic_uintptr_t *object, intptr_t operand);
+uintptr_t __ovld atomic_fetch_or_explicit(volatile atomic_uintptr_t *object, intptr_t operand, memory_order order);
+uintptr_t __ovld atomic_fetch_or_explicit(volatile atomic_uintptr_t *object, intptr_t operand, memory_order order, memory_scope scope);
+uintptr_t __ovld atomic_fetch_xor(volatile atomic_uintptr_t *object, intptr_t operand);
+uintptr_t __ovld atomic_fetch_xor_explicit(volatile atomic_uintptr_t *object, intptr_t operand, memory_order order);
+uintptr_t __ovld atomic_fetch_xor_explicit(volatile atomic_uintptr_t *object, intptr_t operand, memory_order order, memory_scope scope);
+uintptr_t __ovld atomic_fetch_and(volatile atomic_uintptr_t *object, intptr_t operand);
+uintptr_t __ovld atomic_fetch_and_explicit(volatile atomic_uintptr_t *object, intptr_t operand, memory_order order);
+uintptr_t __ovld atomic_fetch_and_explicit(volatile atomic_uintptr_t *object, intptr_t operand, memory_order order, memory_scope scope);
+uintptr_t __ovld atomic_fetch_min(volatile atomic_uintptr_t *object, intptr_t opermax);
+uintptr_t __ovld atomic_fetch_min_explicit(volatile atomic_uintptr_t *object, intptr_t opermax, memory_order minder);
+uintptr_t __ovld atomic_fetch_min_explicit(volatile atomic_uintptr_t *object, intptr_t opermax, memory_order minder, memory_scope scope);
+uintptr_t __ovld atomic_fetch_max(volatile atomic_uintptr_t *object, intptr_t opermax);
+uintptr_t __ovld atomic_fetch_max_explicit(volatile atomic_uintptr_t *object, intptr_t opermax, memory_order minder);
+uintptr_t __ovld atomic_fetch_max_explicit(volatile atomic_uintptr_t *object, intptr_t opermax, memory_order minder, memory_scope scope);
+
+intptr_t __ovld atomic_fetch_or(volatile atomic_intptr_t *object, uintptr_t operand);
+intptr_t __ovld atomic_fetch_or_explicit(volatile atomic_intptr_t *object, uintptr_t operand, memory_order order);
+intptr_t __ovld atomic_fetch_or_explicit(volatile atomic_intptr_t *object, uintptr_t operand, memory_order order, memory_scope scope);
+intptr_t __ovld atomic_fetch_xor(volatile atomic_intptr_t *object, uintptr_t operand);
+intptr_t __ovld atomic_fetch_xor_explicit(volatile atomic_intptr_t *object, uintptr_t operand, memory_order order);
+intptr_t __ovld atomic_fetch_xor_explicit(volatile atomic_intptr_t *object, uintptr_t operand, memory_order order, memory_scope scope);
+intptr_t __ovld atomic_fetch_and(volatile atomic_intptr_t *object, uintptr_t operand);
+intptr_t __ovld atomic_fetch_and_explicit(volatile atomic_intptr_t *object, uintptr_t operand, memory_order order);
+intptr_t __ovld atomic_fetch_and_explicit(volatile atomic_intptr_t *object, uintptr_t operand, memory_order order, memory_scope scope);
+intptr_t __ovld atomic_fetch_min(volatile atomic_intptr_t *object, uintptr_t opermax);
+intptr_t __ovld atomic_fetch_min_explicit(volatile atomic_intptr_t *object, uintptr_t opermax, memory_order minder);
+intptr_t __ovld atomic_fetch_min_explicit(volatile atomic_intptr_t *object, uintptr_t opermax, memory_order minder, memory_scope scope);
+intptr_t __ovld atomic_fetch_max(volatile atomic_intptr_t *object, uintptr_t opermax);
+intptr_t __ovld atomic_fetch_max_explicit(volatile atomic_intptr_t *object, uintptr_t opermax, memory_order minder);
+intptr_t __ovld atomic_fetch_max_explicit(volatile atomic_intptr_t *object, uintptr_t opermax, memory_order minder, memory_scope scope);
+#endif
+
+// atomic_store()
+
+void __ovld atomic_store(volatile atomic_int *object, int desired);
+void __ovld atomic_store_explicit(volatile atomic_int *object, int desired, memory_order order);
+void __ovld atomic_store_explicit(volatile atomic_int *object, int desired, memory_order order, memory_scope scope);
+void __ovld atomic_store(volatile atomic_uint *object, uint desired);
+void __ovld atomic_store_explicit(volatile atomic_uint *object, uint desired, memory_order order);
+void __ovld atomic_store_explicit(volatile atomic_uint *object, uint desired, memory_order order, memory_scope scope);
+void __ovld atomic_store(volatile atomic_float *object, float desired);
+void __ovld atomic_store_explicit(volatile atomic_float *object, float desired, memory_order order);
+void __ovld atomic_store_explicit(volatile atomic_float *object, float desired, memory_order order, memory_scope scope);
+#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics)
+#ifdef cl_khr_fp64
+void __ovld atomic_store(volatile atomic_double *object, double desired);
+void __ovld atomic_store_explicit(volatile atomic_double *object, double desired, memory_order order);
+void __ovld atomic_store_explicit(volatile atomic_double *object, double desired, memory_order order, memory_scope scope);
+#endif //cl_khr_fp64
+void __ovld atomic_store(volatile atomic_long *object, long desired);
+void __ovld atomic_store_explicit(volatile atomic_long *object, long desired, memory_order order);
+void __ovld atomic_store_explicit(volatile atomic_long *object, long desired, memory_order order, memory_scope scope);
+void __ovld atomic_store(volatile atomic_ulong *object, ulong desired);
+void __ovld atomic_store_explicit(volatile atomic_ulong *object, ulong desired, memory_order order);
+void __ovld atomic_store_explicit(volatile atomic_ulong *object, ulong desired, memory_order order, memory_scope scope);
+#endif
+
+// atomic_load()
+
+int __ovld atomic_load(volatile atomic_int *object);
+int __ovld atomic_load_explicit(volatile atomic_int *object, memory_order order);
+int __ovld atomic_load_explicit(volatile atomic_int *object, memory_order order, memory_scope scope);
+uint __ovld atomic_load(volatile atomic_uint *object);
+uint __ovld atomic_load_explicit(volatile atomic_uint *object, memory_order order);
+uint __ovld atomic_load_explicit(volatile atomic_uint *object, memory_order order, memory_scope scope);
+float __ovld atomic_load(volatile atomic_float *object);
+float __ovld atomic_load_explicit(volatile atomic_float *object, memory_order order);
+float __ovld atomic_load_explicit(volatile atomic_float *object, memory_order order, memory_scope scope);
+#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics)
+#ifdef cl_khr_fp64
+double __ovld atomic_load(volatile atomic_double *object);
+double __ovld atomic_load_explicit(volatile atomic_double *object, memory_order order);
+double __ovld atomic_load_explicit(volatile atomic_double *object, memory_order order, memory_scope scope);
+#endif //cl_khr_fp64
+long __ovld atomic_load(volatile atomic_long *object);
+long __ovld atomic_load_explicit(volatile atomic_long *object, memory_order order);
+long __ovld atomic_load_explicit(volatile atomic_long *object, memory_order order, memory_scope scope);
+ulong __ovld atomic_load(volatile atomic_ulong *object);
+ulong __ovld atomic_load_explicit(volatile atomic_ulong *object, memory_order order);
+ulong __ovld atomic_load_explicit(volatile atomic_ulong *object, memory_order order, memory_scope scope);
+#endif
+
+// atomic_exchange()
+
+int __ovld atomic_exchange(volatile atomic_int *object, int desired);
+int __ovld atomic_exchange_explicit(volatile atomic_int *object, int desired, memory_order order);
+int __ovld atomic_exchange_explicit(volatile atomic_int *object, int desired, memory_order order, memory_scope scope);
+uint __ovld atomic_exchange(volatile atomic_uint *object, uint desired);
+uint __ovld atomic_exchange_explicit(volatile atomic_uint *object, uint desired, memory_order order);
+uint __ovld atomic_exchange_explicit(volatile atomic_uint *object, uint desired, memory_order order, memory_scope scope);
+float __ovld atomic_exchange(volatile atomic_float *object, float desired);
+float __ovld atomic_exchange_explicit(volatile atomic_float *object, float desired, memory_order order);
+float __ovld atomic_exchange_explicit(volatile atomic_float *object, float desired, memory_order order, memory_scope scope);
+#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics)
+#ifdef cl_khr_fp64
+double __ovld atomic_exchange(volatile atomic_double *object, double desired);
+double __ovld atomic_exchange_explicit(volatile atomic_double *object, double desired, memory_order order);
+double __ovld atomic_exchange_explicit(volatile atomic_double *object, double desired, memory_order order, memory_scope scope);
+#endif //cl_khr_fp64
+long __ovld atomic_exchange(volatile atomic_long *object, long desired);
+long __ovld atomic_exchange_explicit(volatile atomic_long *object, long desired, memory_order order);
+long __ovld atomic_exchange_explicit(volatile atomic_long *object, long desired, memory_order order, memory_scope scope);
+ulong __ovld atomic_exchange(volatile atomic_ulong *object, ulong desired);
+ulong __ovld atomic_exchange_explicit(volatile atomic_ulong *object, ulong desired, memory_order order);
+ulong __ovld atomic_exchange_explicit(volatile atomic_ulong *object, ulong desired, memory_order order, memory_scope scope);
+#endif
+
+// atomic_compare_exchange_strong() and atomic_compare_exchange_weak()
+
+bool __ovld atomic_compare_exchange_strong(volatile atomic_int *object, int *expected, int desired);
+bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_int *object, int *expected,
+                                                                                 int desired, memory_order success, memory_order failure);
+bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_int *object, int *expected,
+                                                                                 int desired, memory_order success, memory_order failure, memory_scope scope);
+bool __ovld atomic_compare_exchange_strong(volatile atomic_uint *object, uint *expected, uint desired);
+bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_uint *object, uint *expected,
+                                                                                 uint desired, memory_order success, memory_order failure);
+bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_uint *object, uint *expected,
+                                                                                 uint desired, memory_order success, memory_order failure, memory_scope scope);
+bool __ovld atomic_compare_exchange_weak(volatile atomic_int *object, int *expected, int desired);
+bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_int *object, int *expected,
+                                                                                 int desired, memory_order success, memory_order failure);
+bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_int *object, int *expected,
+                                                                                 int desired, memory_order success, memory_order failure, memory_scope scope);
+bool __ovld atomic_compare_exchange_weak(volatile atomic_uint *object, uint *expected, uint desired);
+bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_uint *object, uint *expected,
+                                                                                 uint desired, memory_order success, memory_order failure);
+bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_uint *object, uint *expected,
+                                                                                 uint desired, memory_order success, memory_order failure, memory_scope scope);
+bool __ovld atomic_compare_exchange_strong(volatile atomic_float *object, float *expected, float desired);
+bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_float *object, float *expected,
+                                                                                 float desired, memory_order success, memory_order failure);
+bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_float *object, float *expected,
+                                                                                 float desired, memory_order success, memory_order failure, memory_scope scope);
+bool __ovld atomic_compare_exchange_weak(volatile atomic_float *object, float *expected, float desired);
+bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_float *object, float *expected,
+                                                                                 float desired, memory_order success, memory_order failure);
+bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_float *object, float *expected,
+                                                                                 float desired, memory_order success, memory_order failure, memory_scope scope);
+#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics)
+#ifdef cl_khr_fp64
+bool __ovld atomic_compare_exchange_strong(volatile atomic_double *object, double *expected, double desired);
+bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_double *object, double *expected,
+                                                                                 double desired, memory_order success, memory_order failure);
+bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_double *object, double *expected,
+                                                                                 double desired, memory_order success, memory_order failure, memory_scope scope);
+bool __ovld atomic_compare_exchange_weak(volatile atomic_double *object, double *expected, double desired);
+bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_double *object, double *expected,
+                                                                                 double desired, memory_order success, memory_order failure);
+bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_double *object, double *expected,
+                                                                                 double desired, memory_order success, memory_order failure, memory_scope scope);
+#endif //cl_khr_fp64
+bool __ovld atomic_compare_exchange_strong(volatile atomic_long *object, long *expected, long desired);
+bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_long *object, long *expected,
+                                                                                 long desired, memory_order success, memory_order failure);
+bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_long *object, long *expected,
+                                                                                 long desired, memory_order success, memory_order failure, memory_scope scope);
+bool __ovld atomic_compare_exchange_weak(volatile atomic_long *object, long *expected, long desired);
+bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_long *object, long *expected,
+                                                                                 long desired, memory_order success, memory_order failure);
+bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_long *object, long *expected,
+                                                                                 long desired, memory_order success, memory_order failure, memory_scope scope);
+bool __ovld atomic_compare_exchange_strong(volatile atomic_ulong *object, ulong *expected, ulong desired);
+bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_ulong *object, ulong *expected,
+                                                                                 ulong desired, memory_order success, memory_order failure);
+bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_ulong *object, ulong *expected,
+                                                                                 ulong desired, memory_order success, memory_order failure, memory_scope scope);
+bool __ovld atomic_compare_exchange_weak(volatile atomic_ulong *object, ulong *expected, ulong desired);
+bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_ulong *object, ulong *expected,
+                                                                                 ulong desired, memory_order success, memory_order failure);
+bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_ulong *object, ulong *expected,
+                                                                                 ulong desired, memory_order success, memory_order failure, memory_scope scope);
+#endif
+
+// atomic_flag_test_and_set() and atomic_flag_clear()
+
+bool __ovld atomic_flag_test_and_set(volatile atomic_flag *object);
+bool __ovld atomic_flag_test_and_set_explicit(volatile atomic_flag *object, memory_order order);
+bool __ovld atomic_flag_test_and_set_explicit(volatile atomic_flag *object, memory_order order, memory_scope scope);
+void __ovld atomic_flag_clear(volatile atomic_flag *object);
+void __ovld atomic_flag_clear_explicit(volatile atomic_flag *object, memory_order order);
+void __ovld atomic_flag_clear_explicit(volatile atomic_flag *object, memory_order order, memory_scope scope);
+
+#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0
+
+// OpenCL v1.1 s6.11.12, v1.2 s6.12.12, v2.0 s6.13.12 - Miscellaneous Vector Functions
+
+/**
+ * The shuffle and shuffle2 built-in functions construct
+ * a permutation of elements from one or two input
+ * vectors respectively that are of the same type,
+ * returning a vector with the same element type as the
+ * input and length that is the same as the shuffle mask.
+ * The size of each element in the mask must match the
+ * size of each element in the result. For shuffle, only
+ * the ilogb(2m-1) least significant bits of each mask
+ * element are considered. For shuffle2, only the
+ * ilogb(2m-1)+1 least significant bits of each mask
+ * element are considered. Other bits in the mask shall
+ * be ignored.
+ * The elements of the input vectors are numbered from
+ * left to right across one or both of the vectors. For this
+ * purpose, the number of elements in a vector is given
+ * by vec_step(gentypem). The shuffle mask operand
+ * specifies, for each element of the result vector, which
+ * element of the one or two input vectors the result
+ * element gets.
+ * Examples:
+ * uint4 mask = (uint4)(3, 2,
+ * 1, 0);
+ * float4 a;
+ * float4 r = shuffle(a, mask);
+ * // r.s0123 = a.wzyx
+ * uint8 mask = (uint8)(0, 1, 2, 3,
+ * 4, 5, 6, 7);
+ * float4 a, b;
+ * float8 r = shuffle2(a, b, mask);
+ * // r.s0123 = a.xyzw
+ * // r.s4567 = b.xyzw
+ * uint4 mask;
+ * float8 a;
+ * float4 b;
+ * b = shuffle(a, mask);
+ * Examples that are not valid are:
+ * uint8 mask;
+ * short16 a;
+ * short8 b;
+ * b = shuffle(a, mask); <- not valid
+ */
+char2 __ovld __cnfn shuffle(char2 x, uchar2 mask);
+char2 __ovld __cnfn shuffle(char4 x, uchar2 mask);
+char2 __ovld __cnfn shuffle(char8 x, uchar2 mask);
+char2 __ovld __cnfn shuffle(char16 x, uchar2 mask);
+
+uchar2 __ovld __cnfn shuffle(uchar2 x, uchar2 mask);
+uchar2 __ovld __cnfn shuffle(uchar4 x, uchar2 mask);
+uchar2 __ovld __cnfn shuffle(uchar8 x, uchar2 mask);
+uchar2 __ovld __cnfn shuffle(uchar16 x, uchar2 mask);
+
+short2 __ovld __cnfn shuffle(short2 x, ushort2 mask);
+short2 __ovld __cnfn shuffle(short4 x, ushort2 mask);
+short2 __ovld __cnfn shuffle(short8 x, ushort2 mask);
+short2 __ovld __cnfn shuffle(short16 x, ushort2 mask);
+
+ushort2 __ovld __cnfn shuffle(ushort2 x, ushort2 mask);
+ushort2 __ovld __cnfn shuffle(ushort4 x, ushort2 mask);
+ushort2 __ovld __cnfn shuffle(ushort8 x, ushort2 mask);
+ushort2 __ovld __cnfn shuffle(ushort16 x, ushort2 mask);
+
+int2 __ovld __cnfn shuffle(int2 x, uint2 mask);
+int2 __ovld __cnfn shuffle(int4 x, uint2 mask);
+int2 __ovld __cnfn shuffle(int8 x, uint2 mask);
+int2 __ovld __cnfn shuffle(int16 x, uint2 mask);
+
+uint2 __ovld __cnfn shuffle(uint2 x, uint2 mask);
+uint2 __ovld __cnfn shuffle(uint4 x, uint2 mask);
+uint2 __ovld __cnfn shuffle(uint8 x, uint2 mask);
+uint2 __ovld __cnfn shuffle(uint16 x, uint2 mask);
+
+long2 __ovld __cnfn shuffle(long2 x, ulong2 mask);
+long2 __ovld __cnfn shuffle(long4 x, ulong2 mask);
+long2 __ovld __cnfn shuffle(long8 x, ulong2 mask);
+long2 __ovld __cnfn shuffle(long16 x, ulong2 mask);
+
+ulong2 __ovld __cnfn shuffle(ulong2 x, ulong2 mask);
+ulong2 __ovld __cnfn shuffle(ulong4 x, ulong2 mask);
+ulong2 __ovld __cnfn shuffle(ulong8 x, ulong2 mask);
+ulong2 __ovld __cnfn shuffle(ulong16 x, ulong2 mask);
+
+float2 __ovld __cnfn shuffle(float2 x, uint2 mask);
+float2 __ovld __cnfn shuffle(float4 x, uint2 mask);
+float2 __ovld __cnfn shuffle(float8 x, uint2 mask);
+float2 __ovld __cnfn shuffle(float16 x, uint2 mask);
+
+char4 __ovld __cnfn shuffle(char2 x, uchar4 mask);
+char4 __ovld __cnfn shuffle(char4 x, uchar4 mask);
+char4 __ovld __cnfn shuffle(char8 x, uchar4 mask);
+char4 __ovld __cnfn shuffle(char16 x, uchar4 mask);
+
+uchar4 __ovld __cnfn shuffle(uchar2 x, uchar4 mask);
+uchar4 __ovld __cnfn shuffle(uchar4 x, uchar4 mask);
+uchar4 __ovld __cnfn shuffle(uchar8 x, uchar4 mask);
+uchar4 __ovld __cnfn shuffle(uchar16 x, uchar4 mask);
+
+short4 __ovld __cnfn shuffle(short2 x, ushort4 mask);
+short4 __ovld __cnfn shuffle(short4 x, ushort4 mask);
+short4 __ovld __cnfn shuffle(short8 x, ushort4 mask);
+short4 __ovld __cnfn shuffle(short16 x, ushort4 mask);
+
+ushort4 __ovld __cnfn shuffle(ushort2 x, ushort4 mask);
+ushort4 __ovld __cnfn shuffle(ushort4 x, ushort4 mask);
+ushort4 __ovld __cnfn shuffle(ushort8 x, ushort4 mask);
+ushort4 __ovld __cnfn shuffle(ushort16 x, ushort4 mask);
+
+int4 __ovld __cnfn shuffle(int2 x, uint4 mask);
+int4 __ovld __cnfn shuffle(int4 x, uint4 mask);
+int4 __ovld __cnfn shuffle(int8 x, uint4 mask);
+int4 __ovld __cnfn shuffle(int16 x, uint4 mask);
+
+uint4 __ovld __cnfn shuffle(uint2 x, uint4 mask);
+uint4 __ovld __cnfn shuffle(uint4 x, uint4 mask);
+uint4 __ovld __cnfn shuffle(uint8 x, uint4 mask);
+uint4 __ovld __cnfn shuffle(uint16 x, uint4 mask);
+
+long4 __ovld __cnfn shuffle(long2 x, ulong4 mask);
+long4 __ovld __cnfn shuffle(long4 x, ulong4 mask);
+long4 __ovld __cnfn shuffle(long8 x, ulong4 mask);
+long4 __ovld __cnfn shuffle(long16 x, ulong4 mask);
+
+ulong4 __ovld __cnfn shuffle(ulong2 x, ulong4 mask);
+ulong4 __ovld __cnfn shuffle(ulong4 x, ulong4 mask);
+ulong4 __ovld __cnfn shuffle(ulong8 x, ulong4 mask);
+ulong4 __ovld __cnfn shuffle(ulong16 x, ulong4 mask);
+
+float4 __ovld __cnfn shuffle(float2 x, uint4 mask);
+float4 __ovld __cnfn shuffle(float4 x, uint4 mask);
+float4 __ovld __cnfn shuffle(float8 x, uint4 mask);
+float4 __ovld __cnfn shuffle(float16 x, uint4 mask);
+
+char8 __ovld __cnfn shuffle(char2 x, uchar8 mask);
+char8 __ovld __cnfn shuffle(char4 x, uchar8 mask);
+char8 __ovld __cnfn shuffle(char8 x, uchar8 mask);
+char8 __ovld __cnfn shuffle(char16 x, uchar8 mask);
+
+uchar8 __ovld __cnfn shuffle(uchar2 x, uchar8 mask);
+uchar8 __ovld __cnfn shuffle(uchar4 x, uchar8 mask);
+uchar8 __ovld __cnfn shuffle(uchar8 x, uchar8 mask);
+uchar8 __ovld __cnfn shuffle(uchar16 x, uchar8 mask);
+
+short8 __ovld __cnfn shuffle(short2 x, ushort8 mask);
+short8 __ovld __cnfn shuffle(short4 x, ushort8 mask);
+short8 __ovld __cnfn shuffle(short8 x, ushort8 mask);
+short8 __ovld __cnfn shuffle(short16 x, ushort8 mask);
+
+ushort8 __ovld __cnfn shuffle(ushort2 x, ushort8 mask);
+ushort8 __ovld __cnfn shuffle(ushort4 x, ushort8 mask);
+ushort8 __ovld __cnfn shuffle(ushort8 x, ushort8 mask);
+ushort8 __ovld __cnfn shuffle(ushort16 x, ushort8 mask);
+
+int8 __ovld __cnfn shuffle(int2 x, uint8 mask);
+int8 __ovld __cnfn shuffle(int4 x, uint8 mask);
+int8 __ovld __cnfn shuffle(int8 x, uint8 mask);
+int8 __ovld __cnfn shuffle(int16 x, uint8 mask);
+
+uint8 __ovld __cnfn shuffle(uint2 x, uint8 mask);
+uint8 __ovld __cnfn shuffle(uint4 x, uint8 mask);
+uint8 __ovld __cnfn shuffle(uint8 x, uint8 mask);
+uint8 __ovld __cnfn shuffle(uint16 x, uint8 mask);
+
+long8 __ovld __cnfn shuffle(long2 x, ulong8 mask);
+long8 __ovld __cnfn shuffle(long4 x, ulong8 mask);
+long8 __ovld __cnfn shuffle(long8 x, ulong8 mask);
+long8 __ovld __cnfn shuffle(long16 x, ulong8 mask);
+
+ulong8 __ovld __cnfn shuffle(ulong2 x, ulong8 mask);
+ulong8 __ovld __cnfn shuffle(ulong4 x, ulong8 mask);
+ulong8 __ovld __cnfn shuffle(ulong8 x, ulong8 mask);
+ulong8 __ovld __cnfn shuffle(ulong16 x, ulong8 mask);
+
+float8 __ovld __cnfn shuffle(float2 x, uint8 mask);
+float8 __ovld __cnfn shuffle(float4 x, uint8 mask);
+float8 __ovld __cnfn shuffle(float8 x, uint8 mask);
+float8 __ovld __cnfn shuffle(float16 x, uint8 mask);
+
+char16 __ovld __cnfn shuffle(char2 x, uchar16 mask);
+char16 __ovld __cnfn shuffle(char4 x, uchar16 mask);
+char16 __ovld __cnfn shuffle(char8 x, uchar16 mask);
+char16 __ovld __cnfn shuffle(char16 x, uchar16 mask);
+
+uchar16 __ovld __cnfn shuffle(uchar2 x, uchar16 mask);
+uchar16 __ovld __cnfn shuffle(uchar4 x, uchar16 mask);
+uchar16 __ovld __cnfn shuffle(uchar8 x, uchar16 mask);
+uchar16 __ovld __cnfn shuffle(uchar16 x, uchar16 mask);
+
+short16 __ovld __cnfn shuffle(short2 x, ushort16 mask);
+short16 __ovld __cnfn shuffle(short4 x, ushort16 mask);
+short16 __ovld __cnfn shuffle(short8 x, ushort16 mask);
+short16 __ovld __cnfn shuffle(short16 x, ushort16 mask);
+
+ushort16 __ovld __cnfn shuffle(ushort2 x, ushort16 mask);
+ushort16 __ovld __cnfn shuffle(ushort4 x, ushort16 mask);
+ushort16 __ovld __cnfn shuffle(ushort8 x, ushort16 mask);
+ushort16 __ovld __cnfn shuffle(ushort16 x, ushort16 mask);
+
+int16 __ovld __cnfn shuffle(int2 x, uint16 mask);
+int16 __ovld __cnfn shuffle(int4 x, uint16 mask);
+int16 __ovld __cnfn shuffle(int8 x, uint16 mask);
+int16 __ovld __cnfn shuffle(int16 x, uint16 mask);
+
+uint16 __ovld __cnfn shuffle(uint2 x, uint16 mask);
+uint16 __ovld __cnfn shuffle(uint4 x, uint16 mask);
+uint16 __ovld __cnfn shuffle(uint8 x, uint16 mask);
+uint16 __ovld __cnfn shuffle(uint16 x, uint16 mask);
+
+long16 __ovld __cnfn shuffle(long2 x, ulong16 mask);
+long16 __ovld __cnfn shuffle(long4 x, ulong16 mask);
+long16 __ovld __cnfn shuffle(long8 x, ulong16 mask);
+long16 __ovld __cnfn shuffle(long16 x, ulong16 mask);
+
+ulong16 __ovld __cnfn shuffle(ulong2 x, ulong16 mask);
+ulong16 __ovld __cnfn shuffle(ulong4 x, ulong16 mask);
+ulong16 __ovld __cnfn shuffle(ulong8 x, ulong16 mask);
+ulong16 __ovld __cnfn shuffle(ulong16 x, ulong16 mask);
+
+float16 __ovld __cnfn shuffle(float2 x, uint16 mask);
+float16 __ovld __cnfn shuffle(float4 x, uint16 mask);
+float16 __ovld __cnfn shuffle(float8 x, uint16 mask);
+float16 __ovld __cnfn shuffle(float16 x, uint16 mask);
+
+#ifdef cl_khr_fp64
+double2 __ovld __cnfn shuffle(double2 x, ulong2 mask);
+double2 __ovld __cnfn shuffle(double4 x, ulong2 mask);
+double2 __ovld __cnfn shuffle(double8 x, ulong2 mask);
+double2 __ovld __cnfn shuffle(double16 x, ulong2 mask);
+
+double4 __ovld __cnfn shuffle(double2 x, ulong4 mask);
+double4 __ovld __cnfn shuffle(double4 x, ulong4 mask);
+double4 __ovld __cnfn shuffle(double8 x, ulong4 mask);
+double4 __ovld __cnfn shuffle(double16 x, ulong4 mask);
+
+double8 __ovld __cnfn shuffle(double2 x, ulong8 mask);
+double8 __ovld __cnfn shuffle(double4 x, ulong8 mask);
+double8 __ovld __cnfn shuffle(double8 x, ulong8 mask);
+double8 __ovld __cnfn shuffle(double16 x, ulong8 mask);
+
+double16 __ovld __cnfn shuffle(double2 x, ulong16 mask);
+double16 __ovld __cnfn shuffle(double4 x, ulong16 mask);
+double16 __ovld __cnfn shuffle(double8 x, ulong16 mask);
+double16 __ovld __cnfn shuffle(double16 x, ulong16 mask);
+#endif //cl_khr_fp64
+
+#ifdef cl_khr_fp16
+half2 __ovld __cnfn shuffle(half2 x, ushort2 mask);
+half2 __ovld __cnfn shuffle(half4 x, ushort2 mask);
+half2 __ovld __cnfn shuffle(half8 x, ushort2 mask);
+half2 __ovld __cnfn shuffle(half16 x, ushort2 mask);
+
+half4 __ovld __cnfn shuffle(half2 x, ushort4 mask);
+half4 __ovld __cnfn shuffle(half4 x, ushort4 mask);
+half4 __ovld __cnfn shuffle(half8 x, ushort4 mask);
+half4 __ovld __cnfn shuffle(half16 x, ushort4 mask);
+
+half8 __ovld __cnfn shuffle(half2 x, ushort8 mask);
+half8 __ovld __cnfn shuffle(half4 x, ushort8 mask);
+half8 __ovld __cnfn shuffle(half8 x, ushort8 mask);
+half8 __ovld __cnfn shuffle(half16 x, ushort8 mask);
+
+half16 __ovld __cnfn shuffle(half2 x, ushort16 mask);
+half16 __ovld __cnfn shuffle(half4 x, ushort16 mask);
+half16 __ovld __cnfn shuffle(half8 x, ushort16 mask);
+half16 __ovld __cnfn shuffle(half16 x, ushort16 mask);
+#endif //cl_khr_fp16
+
+char2 __ovld __cnfn shuffle2(char2 x, char2 y, uchar2 mask);
+char2 __ovld __cnfn shuffle2(char4 x, char4 y, uchar2 mask);
+char2 __ovld __cnfn shuffle2(char8 x, char8 y, uchar2 mask);
+char2 __ovld __cnfn shuffle2(char16 x, char16 y, uchar2 mask);
+
+uchar2 __ovld __cnfn shuffle2(uchar2 x, uchar2 y, uchar2 mask);
+uchar2 __ovld __cnfn shuffle2(uchar4 x, uchar4 y, uchar2 mask);
+uchar2 __ovld __cnfn shuffle2(uchar8 x, uchar8 y, uchar2 mask);
+uchar2 __ovld __cnfn shuffle2(uchar16 x, uchar16 y, uchar2 mask);
+
+short2 __ovld __cnfn shuffle2(short2 x, short2 y, ushort2 mask);
+short2 __ovld __cnfn shuffle2(short4 x, short4 y, ushort2 mask);
+short2 __ovld __cnfn shuffle2(short8 x, short8 y, ushort2 mask);
+short2 __ovld __cnfn shuffle2(short16 x, short16 y, ushort2 mask);
+
+ushort2 __ovld __cnfn shuffle2(ushort2 x, ushort2 y, ushort2 mask);
+ushort2 __ovld __cnfn shuffle2(ushort4 x, ushort4 y, ushort2 mask);
+ushort2 __ovld __cnfn shuffle2(ushort8 x, ushort8 y, ushort2 mask);
+ushort2 __ovld __cnfn shuffle2(ushort16 x, ushort16 y, ushort2 mask);
+
+int2 __ovld __cnfn shuffle2(int2 x, int2 y, uint2 mask);
+int2 __ovld __cnfn shuffle2(int4 x, int4 y, uint2 mask);
+int2 __ovld __cnfn shuffle2(int8 x, int8 y, uint2 mask);
+int2 __ovld __cnfn shuffle2(int16 x, int16 y, uint2 mask);
+
+uint2 __ovld __cnfn shuffle2(uint2 x, uint2 y, uint2 mask);
+uint2 __ovld __cnfn shuffle2(uint4 x, uint4 y, uint2 mask);
+uint2 __ovld __cnfn shuffle2(uint8 x, uint8 y, uint2 mask);
+uint2 __ovld __cnfn shuffle2(uint16 x, uint16 y, uint2 mask);
+
+long2 __ovld __cnfn shuffle2(long2 x, long2 y, ulong2 mask);
+long2 __ovld __cnfn shuffle2(long4 x, long4 y, ulong2 mask);
+long2 __ovld __cnfn shuffle2(long8 x, long8 y, ulong2 mask);
+long2 __ovld __cnfn shuffle2(long16 x, long16 y, ulong2 mask);
+
+ulong2 __ovld __cnfn shuffle2(ulong2 x, ulong2 y, ulong2 mask);
+ulong2 __ovld __cnfn shuffle2(ulong4 x, ulong4 y, ulong2 mask);
+ulong2 __ovld __cnfn shuffle2(ulong8 x, ulong8 y, ulong2 mask);
+ulong2 __ovld __cnfn shuffle2(ulong16 x, ulong16 y, ulong2 mask);
+
+float2 __ovld __cnfn shuffle2(float2 x, float2 y, uint2 mask);
+float2 __ovld __cnfn shuffle2(float4 x, float4 y, uint2 mask);
+float2 __ovld __cnfn shuffle2(float8 x, float8 y, uint2 mask);
+float2 __ovld __cnfn shuffle2(float16 x, float16 y, uint2 mask);
+
+char4 __ovld __cnfn shuffle2(char2 x, char2 y, uchar4 mask);
+char4 __ovld __cnfn shuffle2(char4 x, char4 y, uchar4 mask);
+char4 __ovld __cnfn shuffle2(char8 x, char8 y, uchar4 mask);
+char4 __ovld __cnfn shuffle2(char16 x, char16 y, uchar4 mask);
+
+uchar4 __ovld __cnfn shuffle2(uchar2 x, uchar2 y, uchar4 mask);
+uchar4 __ovld __cnfn shuffle2(uchar4 x, uchar4 y, uchar4 mask);
+uchar4 __ovld __cnfn shuffle2(uchar8 x, uchar8 y, uchar4 mask);
+uchar4 __ovld __cnfn shuffle2(uchar16 x, uchar16 y, uchar4 mask);
+
+short4 __ovld __cnfn shuffle2(short2 x, short2 y, ushort4 mask);
+short4 __ovld __cnfn shuffle2(short4 x, short4 y, ushort4 mask);
+short4 __ovld __cnfn shuffle2(short8 x, short8 y, ushort4 mask);
+short4 __ovld __cnfn shuffle2(short16 x, short16 y, ushort4 mask);
+
+ushort4 __ovld __cnfn shuffle2(ushort2 x, ushort2 y, ushort4 mask);
+ushort4 __ovld __cnfn shuffle2(ushort4 x, ushort4 y, ushort4 mask);
+ushort4 __ovld __cnfn shuffle2(ushort8 x, ushort8 y, ushort4 mask);
+ushort4 __ovld __cnfn shuffle2(ushort16 x, ushort16 y, ushort4 mask);
+
+int4 __ovld __cnfn shuffle2(int2 x, int2 y, uint4 mask);
+int4 __ovld __cnfn shuffle2(int4 x, int4 y, uint4 mask);
+int4 __ovld __cnfn shuffle2(int8 x, int8 y, uint4 mask);
+int4 __ovld __cnfn shuffle2(int16 x, int16 y, uint4 mask);
+
+uint4 __ovld __cnfn shuffle2(uint2 x, uint2 y, uint4 mask);
+uint4 __ovld __cnfn shuffle2(uint4 x, uint4 y, uint4 mask);
+uint4 __ovld __cnfn shuffle2(uint8 x, uint8 y, uint4 mask);
+uint4 __ovld __cnfn shuffle2(uint16 x, uint16 y, uint4 mask);
+
+long4 __ovld __cnfn shuffle2(long2 x, long2 y, ulong4 mask);
+long4 __ovld __cnfn shuffle2(long4 x, long4 y, ulong4 mask);
+long4 __ovld __cnfn shuffle2(long8 x, long8 y, ulong4 mask);
+long4 __ovld __cnfn shuffle2(long16 x, long16 y, ulong4 mask);
+
+ulong4 __ovld __cnfn shuffle2(ulong2 x, ulong2 y, ulong4 mask);
+ulong4 __ovld __cnfn shuffle2(ulong4 x, ulong4 y, ulong4 mask);
+ulong4 __ovld __cnfn shuffle2(ulong8 x, ulong8 y, ulong4 mask);
+ulong4 __ovld __cnfn shuffle2(ulong16 x, ulong16 y, ulong4 mask);
+
+float4 __ovld __cnfn shuffle2(float2 x, float2 y, uint4 mask);
+float4 __ovld __cnfn shuffle2(float4 x, float4 y, uint4 mask);
+float4 __ovld __cnfn shuffle2(float8 x, float8 y, uint4 mask);
+float4 __ovld __cnfn shuffle2(float16 x, float16 y, uint4 mask);
+
+char8 __ovld __cnfn shuffle2(char2 x, char2 y, uchar8 mask);
+char8 __ovld __cnfn shuffle2(char4 x, char4 y, uchar8 mask);
+char8 __ovld __cnfn shuffle2(char8 x, char8 y, uchar8 mask);
+char8 __ovld __cnfn shuffle2(char16 x, char16 y, uchar8 mask);
+
+uchar8 __ovld __cnfn shuffle2(uchar2 x, uchar2 y, uchar8 mask);
+uchar8 __ovld __cnfn shuffle2(uchar4 x, uchar4 y, uchar8 mask);
+uchar8 __ovld __cnfn shuffle2(uchar8 x, uchar8 y, uchar8 mask);
+uchar8 __ovld __cnfn shuffle2(uchar16 x, uchar16 y, uchar8 mask);
+
+short8 __ovld __cnfn shuffle2(short2 x, short2 y, ushort8 mask);
+short8 __ovld __cnfn shuffle2(short4 x, short4 y, ushort8 mask);
+short8 __ovld __cnfn shuffle2(short8 x, short8 y, ushort8 mask);
+short8 __ovld __cnfn shuffle2(short16 x, short16 y, ushort8 mask);
+
+ushort8 __ovld __cnfn shuffle2(ushort2 x, ushort2 y, ushort8 mask);
+ushort8 __ovld __cnfn shuffle2(ushort4 x, ushort4 y, ushort8 mask);
+ushort8 __ovld __cnfn shuffle2(ushort8 x, ushort8 y, ushort8 mask);
+ushort8 __ovld __cnfn shuffle2(ushort16 x, ushort16 y, ushort8 mask);
+
+int8 __ovld __cnfn shuffle2(int2 x, int2 y, uint8 mask);
+int8 __ovld __cnfn shuffle2(int4 x, int4 y, uint8 mask);
+int8 __ovld __cnfn shuffle2(int8 x, int8 y, uint8 mask);
+int8 __ovld __cnfn shuffle2(int16 x, int16 y, uint8 mask);
+
+uint8 __ovld __cnfn shuffle2(uint2 x, uint2 y, uint8 mask);
+uint8 __ovld __cnfn shuffle2(uint4 x, uint4 y, uint8 mask);
+uint8 __ovld __cnfn shuffle2(uint8 x, uint8 y, uint8 mask);
+uint8 __ovld __cnfn shuffle2(uint16 x, uint16 y, uint8 mask);
+
+long8 __ovld __cnfn shuffle2(long2 x, long2 y, ulong8 mask);
+long8 __ovld __cnfn shuffle2(long4 x, long4 y, ulong8 mask);
+long8 __ovld __cnfn shuffle2(long8 x, long8 y, ulong8 mask);
+long8 __ovld __cnfn shuffle2(long16 x, long16 y, ulong8 mask);
+
+ulong8 __ovld __cnfn shuffle2(ulong2 x, ulong2 y, ulong8 mask);
+ulong8 __ovld __cnfn shuffle2(ulong4 x, ulong4 y, ulong8 mask);
+ulong8 __ovld __cnfn shuffle2(ulong8 x, ulong8 y, ulong8 mask);
+ulong8 __ovld __cnfn shuffle2(ulong16 x, ulong16 y, ulong8 mask);
+
+float8 __ovld __cnfn shuffle2(float2 x, float2 y, uint8 mask);
+float8 __ovld __cnfn shuffle2(float4 x, float4 y, uint8 mask);
+float8 __ovld __cnfn shuffle2(float8 x, float8 y, uint8 mask);
+float8 __ovld __cnfn shuffle2(float16 x, float16 y, uint8 mask);
+
+char16 __ovld __cnfn shuffle2(char2 x, char2 y, uchar16 mask);
+char16 __ovld __cnfn shuffle2(char4 x, char4 y, uchar16 mask);
+char16 __ovld __cnfn shuffle2(char8 x, char8 y, uchar16 mask);
+char16 __ovld __cnfn shuffle2(char16 x, char16 y, uchar16 mask);
+
+uchar16 __ovld __cnfn shuffle2(uchar2 x, uchar2 y, uchar16 mask);
+uchar16 __ovld __cnfn shuffle2(uchar4 x, uchar4 y, uchar16 mask);
+uchar16 __ovld __cnfn shuffle2(uchar8 x, uchar8 y, uchar16 mask);
+uchar16 __ovld __cnfn shuffle2(uchar16 x, uchar16 y, uchar16 mask);
+
+short16 __ovld __cnfn shuffle2(short2 x, short2 y, ushort16 mask);
+short16 __ovld __cnfn shuffle2(short4 x, short4 y, ushort16 mask);
+short16 __ovld __cnfn shuffle2(short8 x, short8 y, ushort16 mask);
+short16 __ovld __cnfn shuffle2(short16 x, short16 y, ushort16 mask);
+
+ushort16 __ovld __cnfn shuffle2(ushort2 x, ushort2 y, ushort16 mask);
+ushort16 __ovld __cnfn shuffle2(ushort4 x, ushort4 y, ushort16 mask);
+ushort16 __ovld __cnfn shuffle2(ushort8 x, ushort8 y, ushort16 mask);
+ushort16 __ovld __cnfn shuffle2(ushort16 x, ushort16 y, ushort16 mask);
+
+int16 __ovld __cnfn shuffle2(int2 x, int2 y, uint16 mask);
+int16 __ovld __cnfn shuffle2(int4 x, int4 y, uint16 mask);
+int16 __ovld __cnfn shuffle2(int8 x, int8 y, uint16 mask);
+int16 __ovld __cnfn shuffle2(int16 x, int16 y, uint16 mask);
+
+uint16 __ovld __cnfn shuffle2(uint2 x, uint2 y, uint16 mask);
+uint16 __ovld __cnfn shuffle2(uint4 x, uint4 y, uint16 mask);
+uint16 __ovld __cnfn shuffle2(uint8 x, uint8 y, uint16 mask);
+uint16 __ovld __cnfn shuffle2(uint16 x, uint16 y, uint16 mask);
+
+long16 __ovld __cnfn shuffle2(long2 x, long2 y, ulong16 mask);
+long16 __ovld __cnfn shuffle2(long4 x, long4 y, ulong16 mask);
+long16 __ovld __cnfn shuffle2(long8 x, long8 y, ulong16 mask);
+long16 __ovld __cnfn shuffle2(long16 x, long16 y, ulong16 mask);
+
+ulong16 __ovld __cnfn shuffle2(ulong2 x, ulong2 y, ulong16 mask);
+ulong16 __ovld __cnfn shuffle2(ulong4 x, ulong4 y, ulong16 mask);
+ulong16 __ovld __cnfn shuffle2(ulong8 x, ulong8 y, ulong16 mask);
+ulong16 __ovld __cnfn shuffle2(ulong16 x, ulong16 y, ulong16 mask);
+
+float16 __ovld __cnfn shuffle2(float2 x, float2 y, uint16 mask);
+float16 __ovld __cnfn shuffle2(float4 x, float4 y, uint16 mask);
+float16 __ovld __cnfn shuffle2(float8 x, float8 y, uint16 mask);
+float16 __ovld __cnfn shuffle2(float16 x, float16 y, uint16 mask);
+
+#ifdef cl_khr_fp64
+double2 __ovld __cnfn shuffle2(double2 x, double2 y, ulong2 mask);
+double2 __ovld __cnfn shuffle2(double4 x, double4 y, ulong2 mask);
+double2 __ovld __cnfn shuffle2(double8 x, double8 y, ulong2 mask);
+double2 __ovld __cnfn shuffle2(double16 x, double16 y, ulong2 mask);
+
+double4 __ovld __cnfn shuffle2(double2 x, double2 y, ulong4 mask);
+double4 __ovld __cnfn shuffle2(double4 x, double4 y, ulong4 mask);
+double4 __ovld __cnfn shuffle2(double8 x, double8 y, ulong4 mask);
+double4 __ovld __cnfn shuffle2(double16 x, double16 y, ulong4 mask);
+
+double8 __ovld __cnfn shuffle2(double2 x, double2 y, ulong8 mask);
+double8 __ovld __cnfn shuffle2(double4 x, double4 y, ulong8 mask);
+double8 __ovld __cnfn shuffle2(double8 x, double8 y, ulong8 mask);
+double8 __ovld __cnfn shuffle2(double16 x, double16 y, ulong8 mask);
+
+double16 __ovld __cnfn shuffle2(double2 x, double2 y, ulong16 mask);
+double16 __ovld __cnfn shuffle2(double4 x, double4 y, ulong16 mask);
+double16 __ovld __cnfn shuffle2(double8 x, double8 y, ulong16 mask);
+double16 __ovld __cnfn shuffle2(double16 x, double16 y, ulong16 mask);
+#endif //cl_khr_fp64
+
+#ifdef cl_khr_fp16
+half2 __ovld __cnfn shuffle2(half2 x, half2 y, ushort2 mask);
+half2 __ovld __cnfn shuffle2(half4 x, half4 y, ushort2 mask);
+half2 __ovld __cnfn shuffle2(half8 x, half8 y, ushort2 mask);
+half2 __ovld __cnfn shuffle2(half16 x, half16 y, ushort2 mask);
+
+half4 __ovld __cnfn shuffle2(half2 x, half2 y, ushort4 mask);
+half4 __ovld __cnfn shuffle2(half4 x, half4 y, ushort4 mask);
+half4 __ovld __cnfn shuffle2(half8 x, half8 y, ushort4 mask);
+half4 __ovld __cnfn shuffle2(half16 x, half16 y, ushort4 mask);
+
+half8 __ovld __cnfn shuffle2(half2 x, half2 y, ushort8 mask);
+half8 __ovld __cnfn shuffle2(half4 x, half4 y, ushort8 mask);
+half8 __ovld __cnfn shuffle2(half8 x, half8 y, ushort8 mask);
+half8 __ovld __cnfn shuffle2(half16 x, half16 y, ushort8 mask);
+
+half16 __ovld __cnfn shuffle2(half2 x, half2 y, ushort16 mask);
+half16 __ovld __cnfn shuffle2(half4 x, half4 y, ushort16 mask);
+half16 __ovld __cnfn shuffle2(half8 x, half8 y, ushort16 mask);
+half16 __ovld __cnfn shuffle2(half16 x, half16 y, ushort16 mask);
+#endif //cl_khr_fp16
+
+// OpenCL v1.2 s6.12.13, v2.0 s6.13.13 - printf
+
+int printf(__constant const char* st, ...);
+
+// OpenCL v1.1 s6.11.3, v1.2 s6.12.14, v2.0 s6.13.14 - Image Read and Write Functions
+
+// These values need to match the runtime equivalent
+//
+// Addressing Mode.
+//
+#define CLK_ADDRESS_NONE                0
+#define CLK_ADDRESS_CLAMP_TO_EDGE       2
+#define CLK_ADDRESS_CLAMP               4
+#define CLK_ADDRESS_REPEAT              6
+#define CLK_ADDRESS_MIRRORED_REPEAT     8
+
+//
+// Coordination Normalization
+//
+#define CLK_NORMALIZED_COORDS_FALSE     0
+#define CLK_NORMALIZED_COORDS_TRUE      1
+
+//
+// Filtering Mode.
+//
+#define CLK_FILTER_NEAREST              0x10
+#define CLK_FILTER_LINEAR               0x20
+
+/**
+ * Use the coordinate (coord.xy) to do an element lookup in
+ * the 2D image object specified by image.
+ *
+ * Use the coordinate (coord.x, coord.y, coord.z) to do
+ * an element lookup in the 3D image object specified
+ * by image. coord.w is ignored.
+ *
+ * Use the coordinate (coord.z) to index into the
+ * 2D image array object specified by image_array
+ * and (coord.x, coord.y) to do an element lookup in
+ * the 2D image object specified by image.
+ *
+ * Use the coordinate (x) to do an element lookup in
+ * the 1D image object specified by image.
+ *
+ * Use the coordinate (coord.y) to index into the
+ * 1D image array object specified by image_array
+ * and (coord.x) to do an element lookup in
+ * the 1D image object specified by image.
+ *
+ * Use the coordinate (cood.xy) and sample to do an
+ * element lookup in the 2D multi-sample image specified
+ * by image.
+ *
+ * Use coord.xy and sample to do an element
+ * lookup in the 2D multi-sample image layer
+ * identified by index coord.z in the 2D multi-sample
+ * image array specified by image.
+ *
+ * For mipmap images, use the mip-level specified by
+ * the Level-of-Detail (lod) or use gradients for LOD
+ * computation.
+ *
+ * read_imagef returns floating-point values in the
+ * range [0.0 ... 1.0] for image objects created with
+ * image_channel_data_type set to one of the predefined
+ * packed formats or CL_UNORM_INT8, or
+ * CL_UNORM_INT16.
+ *
+ * read_imagef returns floating-point values in the
+ * range [-1.0 ... 1.0] for image objects created with
+ * image_channel_data_type set to CL_SNORM_INT8,
+ * or CL_SNORM_INT16.
+ *
+ * read_imagef returns floating-point values for image
+ * objects created with image_channel_data_type set to
+ * CL_HALF_FLOAT or CL_FLOAT.
+ *
+ * read_imagei and read_imageui return
+ * unnormalized signed integer and unsigned integer
+ * values respectively. Each channel will be stored in a
+ * 32-bit integer.
+ *
+ * read_imagei can only be used with image objects
+ * created with image_channel_data_type set to one of
+ * the following values:
+ * CL_SIGNED_INT8,
+ * CL_SIGNED_INT16 and
+ * CL_SIGNED_INT32.
+ * If the image_channel_data_type is not one of the
+ * above values, the values returned by read_imagei
+ * are undefined.
+ *
+ * read_imageui can only be used with image objects
+ * created with image_channel_data_type set to one of
+ * the following values:
+ * CL_UNSIGNED_INT8,
+ * CL_UNSIGNED_INT16 and
+ * CL_UNSIGNED_INT32.
+ * If the image_channel_data_type is not one of the
+ * above values, the values returned by read_imageui
+ * are undefined.
+ *
+ * The read_image{i|ui} calls support a nearest filter
+ * only. The filter_mode specified in sampler
+ * must be set to CLK_FILTER_NEAREST; otherwise
+ * the values returned are undefined.
+ 
+ * The read_image{f|i|ui} calls that take
+ * integer coordinates must use a sampler with
+ * normalized coordinates set to
+ * CLK_NORMALIZED_COORDS_FALSE and
+ * addressing mode set to
+ * CLK_ADDRESS_CLAMP_TO_EDGE,
+ * CLK_ADDRESS_CLAMP or CLK_ADDRESS_NONE;
+ * otherwise the values returned are undefined.
+ *
+ * Values returned by read_imagef for image objects
+ * with image_channel_data_type values not specified
+ * in the description above are undefined.
+ */
+
+float4 __purefn __ovld read_imagef(read_only image2d_t image, sampler_t sampler, int2 coord);
+float4 __purefn __ovld read_imagef(read_only image2d_t image, sampler_t sampler, float2 coord);
+
+int4 __purefn __ovld read_imagei(read_only image2d_t image, sampler_t sampler, int2 coord);
+int4 __purefn __ovld read_imagei(read_only image2d_t image, sampler_t sampler, float2 coord);
+uint4 __purefn __ovld read_imageui(read_only image2d_t image, sampler_t sampler, int2 coord);
+uint4 __purefn __ovld read_imageui(read_only image2d_t image, sampler_t sampler, float2 coord);
+
+float4 __purefn __ovld read_imagef(read_only image3d_t image, sampler_t sampler, int4 coord);
+float4 __purefn __ovld read_imagef(read_only image3d_t image, sampler_t sampler, float4 coord);
+
+int4 __purefn __ovld read_imagei(read_only image3d_t image, sampler_t sampler, int4 coord);
+int4 __purefn __ovld read_imagei(read_only image3d_t image, sampler_t sampler, float4 coord);
+uint4 __purefn __ovld read_imageui(read_only image3d_t image, sampler_t sampler, int4 coord);
+uint4 __purefn __ovld read_imageui(read_only image3d_t image, sampler_t sampler, float4 coord);
+
+float4 __purefn __ovld read_imagef(read_only image2d_array_t image_array, sampler_t sampler, int4 coord);
+float4 __purefn __ovld read_imagef(read_only image2d_array_t image_array, sampler_t sampler, float4 coord);
+
+int4 __purefn __ovld read_imagei(read_only image2d_array_t image_array, sampler_t sampler, int4 coord);
+int4 __purefn __ovld read_imagei(read_only image2d_array_t image_array, sampler_t sampler, float4 coord);
+uint4 __purefn __ovld read_imageui(read_only image2d_array_t image_array, sampler_t sampler, int4 coord);
+uint4 __purefn __ovld read_imageui(read_only image2d_array_t image_array, sampler_t sampler, float4 coord);
+
+float4 __purefn __ovld read_imagef(read_only image1d_t image, sampler_t sampler, int coord);
+float4 __purefn __ovld read_imagef(read_only image1d_t image, sampler_t sampler, float coord);
+
+int4 __purefn __ovld read_imagei(read_only image1d_t image, sampler_t sampler, int coord);
+int4 __purefn __ovld read_imagei(read_only image1d_t image, sampler_t sampler, float coord);
+uint4 __purefn __ovld read_imageui(read_only image1d_t image, sampler_t sampler, int coord);
+uint4 __purefn __ovld read_imageui(read_only image1d_t image, sampler_t sampler, float coord);
+
+float4 __purefn __ovld read_imagef(read_only image1d_array_t image_array, sampler_t sampler, int2 coord);
+float4 __purefn __ovld read_imagef(read_only image1d_array_t image_array, sampler_t sampler, float2 coord);
+
+int4 __purefn __ovld read_imagei(read_only image1d_array_t image_array, sampler_t sampler, int2 coord);
+int4 __purefn __ovld read_imagei(read_only image1d_array_t image_array, sampler_t sampler, float2 coord);
+uint4 __purefn __ovld read_imageui(read_only image1d_array_t image_array, sampler_t sampler, int2 coord);
+uint4 __purefn __ovld read_imageui(read_only image1d_array_t image_array, sampler_t sampler, float2 coord);
+
+#ifdef cl_khr_depth_images
+float __purefn __ovld read_imagef(read_only image2d_depth_t image, sampler_t sampler, float2 coord);
+float __purefn __ovld read_imagef(read_only image2d_depth_t image, sampler_t sampler, int2 coord);
+
+float __purefn __ovld read_imagef(read_only image2d_array_depth_t image, sampler_t sampler, float4 coord);
+float __purefn __ovld read_imagef(read_only image2d_array_depth_t image, sampler_t sampler, int4 coord);
+#endif //cl_khr_depth_images
+
+#if defined(cl_khr_gl_msaa_sharing)
+float4 __purefn __ovld read_imagef(read_only image2d_msaa_t image, int2 coord, int sample);
+int4 __purefn __ovld read_imagei(read_only image2d_msaa_t image, int2 coord, int sample);
+uint4 __purefn __ovld read_imageui(read_only image2d_msaa_t image, int2 coord, int sample);
+
+float __purefn __ovld read_imagef(read_only image2d_msaa_depth_t image, int2 coord, int sample);
+
+float4 __purefn __ovld read_imagef(read_only image2d_array_msaa_t image, int4 coord, int sample);
+int4 __purefn __ovld read_imagei(read_only image2d_array_msaa_t image, int4 coord, int sample);
+uint4 __purefn __ovld read_imageui(read_only image2d_array_msaa_t image, int4 coord, int sample);
+
+float __purefn __ovld read_imagef(read_only image2d_array_msaa_depth_t image, int4 coord, int sample);
+#endif //cl_khr_gl_msaa_sharing
+
+// OpenCL Extension v2.0 s9.18 - Mipmaps
+#ifdef cl_khr_mipmap_image
+
+float4 __purefn __ovld read_imagef(read_only image1d_t image, sampler_t sampler, float coord, float lod);
+int4 __purefn __ovld read_imagei(read_only image1d_t image, sampler_t sampler, float coord, float lod);
+uint4 __purefn __ovld read_imageui(read_only image1d_t image, sampler_t sampler, float coord, float lod);
+
+float4 __purefn __ovld read_imagef(read_only image1d_array_t image_array, sampler_t sampler, float2 coord, float lod);
+int4 __purefn __ovld read_imagei(read_only image1d_array_t image_array, sampler_t sampler, float2 coord, float lod);
+uint4 __purefn __ovld read_imageui(read_only image1d_array_t image_array, sampler_t sampler, float2 coord, float lod);
+
+float4 __purefn __ovld read_imagef(read_only image2d_t image, sampler_t sampler, float2 coord, float lod);
+int4 __purefn __ovld read_imagei(read_only image2d_t image, sampler_t sampler, float2 coord, float lod);
+uint4 __purefn __ovld read_imageui(read_only image2d_t image, sampler_t sampler, float2 coord, float lod);
+
+float __purefn __ovld read_imagef(read_only image2d_depth_t image, sampler_t sampler, float2 coord, float lod);
+
+float4 __purefn __ovld read_imagef(read_only image2d_array_t image_array, sampler_t sampler, float4 coord, float lod);
+int4 __purefn __ovld read_imagei(read_only image2d_array_t image_array, sampler_t sampler, float4 coord, float lod);
+uint4 __purefn __ovld read_imageui(read_only image2d_array_t image_array, sampler_t sampler, float4 coord, float lod);
+
+float __purefn __ovld read_imagef(read_only image2d_array_depth_t image, sampler_t sampler, float4 coord, float lod);
+
+float4 __purefn __ovld read_imagef(read_only image3d_t image, sampler_t sampler, float4 coord, float lod);
+int4 __purefn __ovld read_imagei(read_only image3d_t image, sampler_t sampler, float4 coord, float lod);
+uint4 __purefn __ovld read_imageui(read_only image3d_t image, sampler_t sampler, float4 coord, float lod);
+
+float4 __purefn __ovld read_imagef(read_only image1d_t image, sampler_t sampler, float coord, float gradientX, float gradientY);
+int4 __purefn __ovld read_imagei(read_only image1d_t image, sampler_t sampler, float coord, float gradientX, float gradientY);
+uint4 __purefn __ovld read_imageui(read_only image1d_t image, sampler_t sampler, float coord, float gradientX, float gradientY);
+
+float4 __purefn __ovld read_imagef(read_only image1d_array_t image_array, sampler_t sampler, float2 coord, float gradientX, float gradientY);
+int4 __purefn __ovld read_imagei(read_only image1d_array_t image_array, sampler_t sampler, float2 coord, float gradientX, float gradientY);
+uint4 __purefn __ovld read_imageui(read_only image1d_array_t image_array, sampler_t sampler, float2 coord, float gradientX, float gradientY);
+
+float4 __purefn __ovld read_imagef(read_only image2d_t image, sampler_t sampler, float2 coord, float2 gradientX, float2 gradientY);
+int4 __purefn __ovld read_imagei(read_only image2d_t image, sampler_t sampler, float2 coord, float2 gradientX, float2 gradientY);
+uint4 __purefn __ovld read_imageui(read_only image2d_t image, sampler_t sampler, float2 coord, float2 gradientX, float2 gradientY);
+
+float __purefn __ovld read_imagef(read_only image2d_depth_t image, sampler_t sampler, float2 coord, float2 gradientX, float2 gradientY);
+
+float4 __purefn __ovld read_imagef(read_only image2d_array_t image_array, sampler_t sampler, float4 coord, float2 gradientX, float2 gradientY);
+int4 __purefn __ovld read_imagei(read_only image2d_array_t image_array, sampler_t sampler, float4 coord, float2 gradientX, float2 gradientY);
+uint4 __purefn __ovld read_imageui(read_only image2d_array_t image_array, sampler_t sampler, float4 coord, float2 gradientX, float2 gradientY);
+
+float __purefn __ovld read_imagef(read_only image2d_array_depth_t image, sampler_t sampler, float4 coord, float2 gradientX, float2 gradientY);
+
+float4 __purefn __ovld read_imagef(read_only image3d_t image, sampler_t sampler, float4 coord, float4 gradientX, float4 gradientY);
+int4 __purefn __ovld read_imagei(read_only image3d_t image, sampler_t sampler, float4 coord, float4 gradientX, float4 gradientY);
+uint4 __purefn __ovld read_imageui(read_only image3d_t image, sampler_t sampler, float4 coord, float4 gradientX, float4 gradientY);
+
+float4 __purefn __ovld read_imagef(read_only image1d_t image, sampler_t sampler, float coord, float lod);
+int4 __purefn __ovld read_imagei(read_only image1d_t image, sampler_t sampler, float coord, float lod);
+uint4 __purefn __ovld read_imageui(read_only image1d_t image, sampler_t sampler, float coord, float lod);
+
+float4 __purefn __ovld read_imagef(read_only image1d_array_t image_array, sampler_t sampler, float2 coord, float lod);
+int4 __purefn __ovld read_imagei(read_only image1d_array_t image_array, sampler_t sampler, float2 coord, float lod);
+uint4 __purefn __ovld read_imageui(read_only image1d_array_t image_array, sampler_t sampler, float2 coord, float lod);
+
+float4 __purefn __ovld read_imagef(read_only image2d_t image, sampler_t sampler, float2 coord, float lod);
+int4 __purefn __ovld read_imagei(read_only image2d_t image, sampler_t sampler, float2 coord, float lod);
+uint4 __purefn __ovld read_imageui(read_only image2d_t image, sampler_t sampler, float2 coord, float lod);
+
+float __purefn __ovld read_imagef(read_only image2d_depth_t image, sampler_t sampler, float2 coord, float lod);
+
+float4 __purefn __ovld read_imagef(read_only image2d_array_t image_array, sampler_t sampler, float4 coord, float lod);
+int4 __purefn __ovld read_imagei(read_only image2d_array_t image_array, sampler_t sampler, float4 coord, float lod);
+uint4 __purefn __ovld read_imageui(read_only image2d_array_t image_array, sampler_t sampler, float4 coord, float lod);
+
+float __purefn __ovld read_imagef(read_only image2d_array_depth_t image, sampler_t sampler, float4 coord, float lod);
+
+float4 __purefn __ovld read_imagef(read_only image3d_t image, sampler_t sampler, float4 coord, float lod);
+int4 __purefn __ovld read_imagei(read_only image3d_t image, sampler_t sampler, float4 coord, float lod);
+uint4 __purefn __ovld read_imageui(read_only image3d_t image, sampler_t sampler, float4 coord, float lod);
+
+#endif //cl_khr_mipmap_image
+
+/**
+* Sampler-less Image Access
+*/
+
+float4 __purefn __ovld read_imagef(read_only image1d_t image, int coord);
+int4 __purefn __ovld read_imagei(read_only image1d_t image, int coord);
+uint4 __purefn __ovld read_imageui(read_only image1d_t image, int coord);
+
+float4 __purefn __ovld read_imagef(read_only image1d_buffer_t image, int coord);
+int4 __purefn __ovld read_imagei(read_only image1d_buffer_t image, int coord);
+uint4 __purefn __ovld read_imageui(read_only image1d_buffer_t image, int coord);
+
+float4 __purefn __ovld read_imagef(read_only image1d_array_t image, int2 coord);
+int4 __purefn __ovld read_imagei(read_only image1d_array_t image, int2 coord);
+uint4 __purefn __ovld read_imageui(read_only image1d_array_t image, int2 coord);
+
+float4 __purefn __ovld read_imagef(read_only image2d_t image, int2 coord);
+int4 __purefn __ovld read_imagei(read_only image2d_t image, int2 coord);
+uint4 __purefn __ovld read_imageui(read_only image2d_t image, int2 coord);
+
+float4 __purefn __ovld read_imagef(read_only image2d_array_t image, int4 coord);
+int4 __purefn __ovld read_imagei(read_only image2d_array_t image, int4 coord);
+uint4 __purefn __ovld read_imageui(read_only image2d_array_t image, int4 coord);
+
+#ifdef cl_khr_depth_images
+float __purefn __ovld read_imagef(read_only image2d_depth_t image, int2 coord);
+float __purefn __ovld read_imagef(read_only image2d_array_depth_t image, int4 coord);
+#endif //cl_khr_depth_images
+
+float4 __purefn __ovld read_imagef(read_only image3d_t image, int4 coord);
+int4 __purefn __ovld read_imagei(read_only image3d_t image, int4 coord);
+uint4 __purefn __ovld read_imageui(read_only image3d_t image, int4 coord);
+
+// Image read functions returning half4 type
+#ifdef cl_khr_fp16
+half4 __purefn __ovld read_imageh(read_only image1d_t image, sampler_t sampler, int coord);
+half4 __purefn __ovld read_imageh(read_only image1d_t image, sampler_t sampler, float coord);
+half4 __purefn __ovld read_imageh(read_only image1d_array_t image, sampler_t sampler, int2 coord);
+half4 __purefn __ovld read_imageh(read_only image1d_array_t image, sampler_t sampler, float2 coord);
+half4 __purefn __ovld read_imageh(read_only image2d_t image, sampler_t sampler, int2 coord);
+half4 __purefn __ovld read_imageh(read_only image2d_t image, sampler_t sampler, float2 coord);
+half4 __purefn __ovld read_imageh(read_only image3d_t image, sampler_t sampler, int4 coord);
+half4 __purefn __ovld read_imageh(read_only image3d_t image, sampler_t sampler, float4 coord);
+half4 __purefn __ovld read_imageh(read_only image2d_array_t image, sampler_t sampler, int4 coord);
+half4 __purefn __ovld read_imageh(read_only image2d_array_t image, sampler_t sampler, float4 coord);
+half4 __purefn __ovld read_imageh(read_only image1d_t image, int coord);
+half4 __purefn __ovld read_imageh(read_only image2d_t image, int2 coord);
+half4 __purefn __ovld read_imageh(read_only image3d_t image, int4 coord);
+half4 __purefn __ovld read_imageh(read_only image1d_array_t image, int2 coord);
+half4 __purefn __ovld read_imageh(read_only image2d_array_t image, int4 coord);
+half4 __purefn __ovld read_imageh(read_only image1d_buffer_t image, int coord);
+#endif //cl_khr_fp16
+
+// Image read functions for read_write images
+#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0
+float4 __purefn __ovld read_imagef(read_write image1d_t image, int coord);
+int4 __purefn __ovld read_imagei(read_write image1d_t image, int coord);
+uint4 __purefn __ovld read_imageui(read_write image1d_t image, int coord);
+
+float4 __purefn __ovld read_imagef(read_write image1d_buffer_t image, int coord);
+int4 __purefn __ovld read_imagei(read_write image1d_buffer_t image, int coord);
+uint4 __purefn __ovld read_imageui(read_write image1d_buffer_t image, int coord);
+
+float4 __purefn __ovld read_imagef(read_write image1d_array_t image, int2 coord);
+int4 __purefn __ovld read_imagei(read_write image1d_array_t image, int2 coord);
+uint4 __purefn __ovld read_imageui(read_write image1d_array_t image, int2 coord);
+
+float4 __purefn __ovld read_imagef(read_write image2d_t image, int2 coord);
+int4 __purefn __ovld read_imagei(read_write image2d_t image, int2 coord);
+uint4 __purefn __ovld read_imageui(read_write image2d_t image, int2 coord);
+
+float4 __purefn __ovld read_imagef(read_write image2d_array_t image, int4 coord);
+int4 __purefn __ovld read_imagei(read_write image2d_array_t image, int4 coord);
+uint4 __purefn __ovld read_imageui(read_write image2d_array_t image, int4 coord);
+
+float4 __purefn __ovld read_imagef(read_write image3d_t image, int4 coord);
+int4 __purefn __ovld read_imagei(read_write image3d_t image, int4 coord);
+uint4 __purefn __ovld read_imageui(read_write image3d_t image, int4 coord);
+
+#ifdef cl_khr_depth_images
+float __purefn __ovld read_imagef(read_write image2d_depth_t image, int2 coord);
+float __purefn __ovld read_imagef(read_write image2d_array_depth_t image, int4 coord);
+#endif //cl_khr_depth_images
+
+#if cl_khr_gl_msaa_sharing
+float4 __purefn __ovld read_imagef(read_write image2d_msaa_t image, int2 coord, int sample);
+int4 __purefn __ovld read_imagei(read_write image2d_msaa_t image, int2 coord, int sample);
+uint4 __purefn __ovld read_imageui(read_write image2d_msaa_t image, int2 coord, int sample);
+
+float4 __purefn __ovld read_imagef(read_write image2d_array_msaa_t image, int4 coord, int sample);
+int4 __purefn __ovld read_imagei(read_write image2d_array_msaa_t image, int4 coord, int sample);
+uint4 __purefn __ovld read_imageui(read_write image2d_array_msaa_t image, int4 coord, int sample);
+
+float __purefn __ovld read_imagef(read_write image2d_msaa_depth_t image, int2 coord, int sample);
+float __purefn __ovld read_imagef(read_write image2d_array_msaa_depth_t image, int4 coord, int sample);
+#endif //cl_khr_gl_msaa_sharing
+
+#ifdef cl_khr_mipmap_image
+float4 __purefn __ovld read_imagef(read_write image1d_t image, sampler_t sampler, float coord, float lod);
+int4 __purefn __ovld read_imagei(read_write image1d_t image, sampler_t sampler, float coord, float lod);
+uint4 __purefn __ovld read_imageui(read_write image1d_t image, sampler_t sampler, float coord, float lod);
+
+float4 __purefn __ovld read_imagef(read_write image1d_array_t image_array, sampler_t sampler, float2 coord, float lod);
+int4 __purefn __ovld read_imagei(read_write image1d_array_t image_array, sampler_t sampler, float2 coord, float lod);
+uint4 __purefn __ovld read_imageui(read_write image1d_array_t image_array, sampler_t sampler, float2 coord, float lod);
+
+float4 __purefn __ovld read_imagef(read_write image2d_t image, sampler_t sampler, float2 coord, float lod);
+int4 __purefn __ovld read_imagei(read_write image2d_t image, sampler_t sampler, float2 coord, float lod);
+uint4 __purefn __ovld read_imageui(read_write image2d_t image, sampler_t sampler, float2 coord, float lod);
+
+float __purefn __ovld read_imagef(read_write image2d_depth_t image, sampler_t sampler, float2 coord, float lod);
+
+float4 __purefn __ovld read_imagef(read_write image2d_array_t image_array, sampler_t sampler, float4 coord, float lod);
+int4 __purefn __ovld read_imagei(read_write image2d_array_t image_array, sampler_t sampler, float4 coord, float lod);
+uint4 __purefn __ovld read_imageui(read_write image2d_array_t image_array, sampler_t sampler, float4 coord, float lod);
+
+float __purefn __ovld read_imagef(read_write image2d_array_depth_t image, sampler_t sampler, float4 coord, float lod);
+
+float4 __purefn __ovld read_imagef(read_write image3d_t image, sampler_t sampler, float4 coord, float lod);
+int4 __purefn __ovld read_imagei(read_write image3d_t image, sampler_t sampler, float4 coord, float lod);
+uint4 __purefn __ovld read_imageui(read_write image3d_t image, sampler_t sampler, float4 coord, float lod);
+
+float4 __purefn __ovld read_imagef(read_write image1d_t image, sampler_t sampler, float coord, float gradientX, float gradientY);
+int4 __purefn __ovld read_imagei(read_write image1d_t image, sampler_t sampler, float coord, float gradientX, float gradientY);
+uint4 __purefn __ovld read_imageui(read_write image1d_t image, sampler_t sampler, float coord, float gradientX, float gradientY);
+
+float4 __purefn __ovld read_imagef(read_write image1d_array_t image_array, sampler_t sampler, float2 coord, float gradientX, float gradientY);
+int4 __purefn __ovld read_imagei(read_write image1d_array_t image_array, sampler_t sampler, float2 coord, float gradientX, float gradientY);
+uint4 __purefn __ovld read_imageui(read_write image1d_array_t image_array, sampler_t sampler, float2 coord, float gradientX, float gradientY);
+
+float4 __purefn __ovld read_imagef(read_write image2d_t image, sampler_t sampler, float2 coord, float2 gradientX, float2 gradientY);
+int4 __purefn __ovld read_imagei(read_write image2d_t image, sampler_t sampler, float2 coord, float2 gradientX, float2 gradientY);
+uint4 __purefn __ovld read_imageui(read_write image2d_t image, sampler_t sampler, float2 coord, float2 gradientX, float2 gradientY);
+
+float __purefn __ovld read_imagef(read_write image2d_depth_t image, sampler_t sampler, float2 coord, float2 gradientX, float2 gradientY);
+
+float4 __purefn __ovld read_imagef(read_write image2d_array_t image_array, sampler_t sampler, float4 coord, float2 gradientX, float2 gradientY);
+int4 __purefn __ovld read_imagei(read_write image2d_array_t image_array, sampler_t sampler, float4 coord, float2 gradientX, float2 gradientY);
+uint4 __purefn __ovld read_imageui(read_write image2d_array_t image_array, sampler_t sampler, float4 coord, float2 gradientX, float2 gradientY);
+
+float __purefn __ovld read_imagef(read_write image2d_array_depth_t image, sampler_t sampler, float4 coord, float2 gradientX, float2 gradientY);
+
+float4 __purefn __ovld read_imagef(read_write image3d_t image, sampler_t sampler, float4 coord, float4 gradientX, float4 gradientY);
+int4 __purefn __ovld read_imagei(read_write image3d_t image, sampler_t sampler, float4 coord, float4 gradientX, float4 gradientY);
+uint4 __purefn __ovld read_imageui(read_write image3d_t image, sampler_t sampler, float4 coord, float4 gradientX, float4 gradientY);
+
+float4 __purefn __ovld read_imagef(read_write image1d_t image, sampler_t sampler, float coord, float lod);
+int4 __purefn __ovld read_imagei(read_write image1d_t image, sampler_t sampler, float coord, float lod);
+uint4 __purefn __ovld read_imageui(read_write image1d_t image, sampler_t sampler, float coord, float lod);
+
+float4 __purefn __ovld read_imagef(read_write image1d_array_t image_array, sampler_t sampler, float2 coord, float lod);
+int4 __purefn __ovld read_imagei(read_write image1d_array_t image_array, sampler_t sampler, float2 coord, float lod);
+uint4 __purefn __ovld read_imageui(read_write image1d_array_t image_array, sampler_t sampler, float2 coord, float lod);
+
+float4 __purefn __ovld read_imagef(read_write image2d_t image, sampler_t sampler, float2 coord, float lod);
+int4 __purefn __ovld read_imagei(read_write image2d_t image, sampler_t sampler, float2 coord, float lod);
+uint4 __purefn __ovld read_imageui(read_write image2d_t image, sampler_t sampler, float2 coord, float lod);
+
+float __purefn __ovld read_imagef(read_write image2d_depth_t image, sampler_t sampler, float2 coord, float lod);
+
+float4 __purefn __ovld read_imagef(read_write image2d_array_t image_array, sampler_t sampler, float4 coord, float lod);
+int4 __purefn __ovld read_imagei(read_write image2d_array_t image_array, sampler_t sampler, float4 coord, float lod);
+uint4 __purefn __ovld read_imageui(read_write image2d_array_t image_array, sampler_t sampler, float4 coord, float lod);
+
+float __purefn __ovld read_imagef(read_write image2d_array_depth_t image, sampler_t sampler, float4 coord, float lod);
+
+float4 __purefn __ovld read_imagef(read_write image3d_t image, sampler_t sampler, float4 coord, float lod);
+int4 __purefn __ovld read_imagei(read_write image3d_t image, sampler_t sampler, float4 coord, float lod);
+uint4 __purefn __ovld read_imageui(read_write image3d_t image, sampler_t sampler, float4 coord, float lod);
+#endif //cl_khr_mipmap_image
+
+// Image read functions returning half4 type
+#ifdef cl_khr_fp16
+half4 __purefn __ovld read_imageh(read_write image1d_t image, int coord);
+half4 __purefn __ovld read_imageh(read_write image2d_t image, int2 coord);
+half4 __purefn __ovld read_imageh(read_write image3d_t image, int4 coord);
+half4 __purefn __ovld read_imageh(read_write image1d_array_t image, int2 coord);
+half4 __purefn __ovld read_imageh(read_write image2d_array_t image, int4 coord);
+half4 __purefn __ovld read_imageh(read_write image1d_buffer_t image, int coord);
+#endif //cl_khr_fp16
+#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0
+
+/**
+ * Write color value to location specified by coordinate
+ * (coord.x, coord.y) in the 2D image object specified by image.
+ * (coord.x, coord.y) are considered to be unnormalized coordinates
+ * and must be in the range 0 ... image width - 1, and 0
+ * ... image height - 1.
+
+ * Write color value to location specified by coordinate
+ * (coord.x, coord.y) in the 2D image object specified by index
+ * (coord.z) of the 2D image array object image_array.
+ * (coord.x, coord.y) are considered to be unnormalized
+ * coordinates and must be in the range 0 ... image width
+ * - 1.
+ *
+ * Write color value to location specified by coordinate
+ * (coord) in the 1D image (buffer) object specified by image.
+ * coord is considered to be unnormalized coordinates
+ * and must be in the range 0 ... image width - 1.
+ *
+ * Write color value to location specified by coordinate
+ * (coord.x) in the 1D image object specified by index
+ * (coord.y) of the 1D image array object image_array.
+ * x is considered to be unnormalized coordinates
+ * and must be in the range 0 ... image width - 1.
+ *
+ * Write color value to location specified by coordinate
+ * (coord.x, coord.y, coord.z) in the 3D image object specified by image.
+ * coord.x & coord.y are considered to be unnormalized coordinates
+ * and must be in the range 0 ... image width - 1, and 0
+ * ... image height - 1.
+ *
+ * For mipmap images, use mip-level specified by lod.
+ *
+ * Appropriate data format conversion to the specified
+ * image format is done before writing the color value.
+ *
+ * write_imagef can only be used with image objects
+ * created with image_channel_data_type set to one of
+ * the pre-defined packed formats or set to
+ * CL_SNORM_INT8, CL_UNORM_INT8,
+ * CL_SNORM_INT16, CL_UNORM_INT16,
+ * CL_HALF_FLOAT or CL_FLOAT. Appropriate data
+ * format conversion will be done to convert channel
+ * data from a floating-point value to actual data format
+ * in which the channels are stored.
+ *
+ * write_imagei can only be used with image objects
+ * created with image_channel_data_type set to one of
+ * the following values:
+ * CL_SIGNED_INT8,
+ * CL_SIGNED_INT16 and
+ * CL_SIGNED_INT32.
+ *
+ * write_imageui can only be used with image objects
+ * created with image_channel_data_type set to one of
+ * the following values:
+ * CL_UNSIGNED_INT8,
+ * CL_UNSIGNED_INT16 and
+ * CL_UNSIGNED_INT32.
+ *
+ * The behavior of write_imagef, write_imagei and
+ * write_imageui for image objects created with
+ * image_channel_data_type values not specified in
+ * the description above or with (x, y) coordinate
+ * values that are not in the range (0 ... image width -1,
+ * 0 ... image height - 1), respectively, is undefined.
+ */
+void __ovld write_imagef(write_only image2d_t image, int2 coord, float4 color);
+void __ovld write_imagei(write_only image2d_t image, int2 coord, int4 color);
+void __ovld write_imageui(write_only image2d_t image, int2 coord, uint4 color);
+
+void __ovld write_imagef(write_only image2d_array_t image_array, int4 coord, float4 color);
+void __ovld write_imagei(write_only image2d_array_t image_array, int4 coord, int4 color);
+void __ovld write_imageui(write_only image2d_array_t image_array, int4 coord, uint4 color);
+
+void __ovld write_imagef(write_only image1d_t image, int coord, float4 color);
+void __ovld write_imagei(write_only image1d_t image, int coord, int4 color);
+void __ovld write_imageui(write_only image1d_t image, int coord, uint4 color);
+
+void __ovld write_imagef(write_only image1d_buffer_t image, int coord, float4 color);
+void __ovld write_imagei(write_only image1d_buffer_t image, int coord, int4 color);
+void __ovld write_imageui(write_only image1d_buffer_t image, int coord, uint4 color);
+
+void __ovld write_imagef(write_only image1d_array_t image_array, int2 coord, float4 color);
+void __ovld write_imagei(write_only image1d_array_t image_array, int2 coord, int4 color);
+void __ovld write_imageui(write_only image1d_array_t image_array, int2 coord, uint4 color);
+
+void __ovld write_imagef(write_only image3d_t image, int4 coord, float4 color);
+void __ovld write_imagei(write_only image3d_t image, int4 coord, int4 color);
+void __ovld write_imageui(write_only image3d_t image, int4 coord, uint4 color);
+
+#ifdef cl_khr_depth_images
+void __ovld write_imagef(write_only image2d_depth_t image, int2 coord, float color);
+void __ovld write_imagef(write_only image2d_array_depth_t image, int4 coord, float color);
+#endif //cl_khr_depth_images
+
+// OpenCL Extension v2.0 s9.18 - Mipmaps
+#ifdef cl_khr_mipmap_image
+void __ovld write_imagef(write_only image1d_t image, int coord, int lod, float4 color);
+void __ovld write_imagei(write_only image1d_t image, int coord, int lod, int4 color);
+void __ovld write_imageui(write_only image1d_t image, int coord, int lod, uint4 color);
+
+void __ovld write_imagef(write_only image1d_array_t image_array, int2 coord, int lod, float4 color);
+void __ovld write_imagei(write_only image1d_array_t image_array, int2 coord, int lod, int4 color);
+void __ovld write_imageui(write_only image1d_array_t image_array, int2 coord, int lod, uint4 color);
+
+void __ovld write_imagef(write_only image2d_t image, int2 coord, int lod, float4 color);
+void __ovld write_imagei(write_only image2d_t image, int2 coord, int lod, int4 color);
+void __ovld write_imageui(write_only image2d_t image, int2 coord, int lod, uint4 color);
+
+void __ovld write_imagef(write_only image2d_array_t image_array, int4 coord, int lod, float4 color);
+void __ovld write_imagei(write_only image2d_array_t image_array, int4 coord, int lod, int4 color);
+void __ovld write_imageui(write_only image2d_array_t image_array, int4 coord, int lod, uint4 color);
+
+void __ovld write_imagef(write_only image2d_depth_t image, int2 coord, int lod, float color);
+void __ovld write_imagef(write_only image2d_array_depth_t image, int4 coord, int lod, float color);
+
+void __ovld write_imagef(write_only image3d_t image, int4 coord, int lod, float4 color);
+void __ovld write_imagei(write_only image3d_t image, int4 coord, int lod, int4 color);
+void __ovld write_imageui(write_only image3d_t image, int4 coord, int lod, uint4 color);
+#endif //cl_khr_mipmap_image
+
+// Image write functions for half4 type
+#ifdef cl_khr_fp16
+void __ovld write_imageh(write_only image1d_t image, int coord, half4 color);
+void __ovld write_imageh(write_only image2d_t image, int2 coord, half4 color);
+void __ovld write_imageh(write_only image3d_t image, int4 coord, half4 color);
+void __ovld write_imageh(write_only image1d_array_t image, int2 coord, half4 color);
+void __ovld write_imageh(write_only image2d_array_t image, int4 coord, half4 color);
+void __ovld write_imageh(write_only image1d_buffer_t image, int coord, half4 color);
+#endif //cl_khr_fp16
+
+// Image write functions for read_write images
+#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0
+void __ovld write_imagef(read_write image2d_t image, int2 coord, float4 color);
+void __ovld write_imagei(read_write image2d_t image, int2 coord, int4 color);
+void __ovld write_imageui(read_write image2d_t image, int2 coord, uint4 color);
+
+void __ovld write_imagef(read_write image2d_array_t image_array, int4 coord, float4 color);
+void __ovld write_imagei(read_write image2d_array_t image_array, int4 coord, int4 color);
+void __ovld write_imageui(read_write image2d_array_t image_array, int4 coord, uint4 color);
+
+void __ovld write_imagef(read_write image1d_t image, int coord, float4 color);
+void __ovld write_imagei(read_write image1d_t image, int coord, int4 color);
+void __ovld write_imageui(read_write image1d_t image, int coord, uint4 color);
+
+void __ovld write_imagef(read_write image1d_buffer_t image, int coord, float4 color);
+void __ovld write_imagei(read_write image1d_buffer_t image, int coord, int4 color);
+void __ovld write_imageui(read_write image1d_buffer_t image, int coord, uint4 color);
+
+void __ovld write_imagef(read_write image1d_array_t image_array, int2 coord, float4 color);
+void __ovld write_imagei(read_write image1d_array_t image_array, int2 coord, int4 color);
+void __ovld write_imageui(read_write image1d_array_t image_array, int2 coord, uint4 color);
+
+void __ovld write_imagef(read_write image3d_t image, int4 coord, float4 color);
+void __ovld write_imagei(read_write image3d_t image, int4 coord, int4 color);
+void __ovld write_imageui(read_write image3d_t image, int4 coord, uint4 color);
+
+#ifdef cl_khr_depth_images
+void __ovld write_imagef(read_write image2d_depth_t image, int2 coord, float color);
+void __ovld write_imagef(read_write image2d_array_depth_t image, int4 coord, float color);
+#endif //cl_khr_depth_images
+
+#ifdef cl_khr_mipmap_image
+void __ovld write_imagef(read_write image1d_t image, int coord, int lod, float4 color);
+void __ovld write_imagei(read_write image1d_t image, int coord, int lod, int4 color);
+void __ovld write_imageui(read_write image1d_t image, int coord, int lod, uint4 color);
+
+void __ovld write_imagef(read_write image1d_array_t image_array, int2 coord, int lod, float4 color);
+void __ovld write_imagei(read_write image1d_array_t image_array, int2 coord, int lod, int4 color);
+void __ovld write_imageui(read_write image1d_array_t image_array, int2 coord, int lod, uint4 color);
+
+void __ovld write_imagef(read_write image2d_t image, int2 coord, int lod, float4 color);
+void __ovld write_imagei(read_write image2d_t image, int2 coord, int lod, int4 color);
+void __ovld write_imageui(read_write image2d_t image, int2 coord, int lod, uint4 color);
+
+void __ovld write_imagef(read_write image2d_array_t image_array, int4 coord, int lod, float4 color);
+void __ovld write_imagei(read_write image2d_array_t image_array, int4 coord, int lod, int4 color);
+void __ovld write_imageui(read_write image2d_array_t image_array, int4 coord, int lod, uint4 color);
+
+void __ovld write_imagef(read_write image2d_depth_t image, int2 coord, int lod, float color);
+void __ovld write_imagef(read_write image2d_array_depth_t image, int4 coord, int lod, float color);
+
+void __ovld write_imagef(read_write image3d_t image, int4 coord, int lod, float4 color);
+void __ovld write_imagei(read_write image3d_t image, int4 coord, int lod, int4 color);
+void __ovld write_imageui(read_write image3d_t image, int4 coord, int lod, uint4 color);
+#endif //cl_khr_mipmap_image
+
+// Image write functions for half4 type
+#ifdef cl_khr_fp16
+void __ovld write_imageh(read_write image1d_t image, int coord, half4 color);
+void __ovld write_imageh(read_write image2d_t image, int2 coord, half4 color);
+void __ovld write_imageh(read_write image3d_t image, int4 coord, half4 color);
+void __ovld write_imageh(read_write image1d_array_t image, int2 coord, half4 color);
+void __ovld write_imageh(read_write image2d_array_t image, int4 coord, half4 color);
+void __ovld write_imageh(read_write image1d_buffer_t image, int coord, half4 color);
+#endif //cl_khr_fp16
+#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0
+
+// Note: In OpenCL v1.0/1.1/1.2, image argument of image query builtin functions does not have
+// access qualifier, which by default assume read_only access qualifier. Image query builtin
+// functions with write_only image argument should also be declared.
+
+/**
+ * Return the image width in pixels.
+ *
+  */
+int __ovld __cnfn get_image_width(read_only image1d_t image);
+int __ovld __cnfn get_image_width(read_only image1d_buffer_t image);
+int __ovld __cnfn get_image_width(read_only image2d_t image);
+int __ovld __cnfn get_image_width(read_only image3d_t image);
+int __ovld __cnfn get_image_width(read_only image1d_array_t image);
+int __ovld __cnfn get_image_width(read_only image2d_array_t image);
+#ifdef cl_khr_depth_images
+int __ovld __cnfn get_image_width(read_only image2d_depth_t image);
+int __ovld __cnfn get_image_width(read_only image2d_array_depth_t image);
+#endif //cl_khr_depth_images
+#if defined(cl_khr_gl_msaa_sharing)
+int __ovld __cnfn get_image_width(read_only image2d_msaa_t image);
+int __ovld __cnfn get_image_width(read_only image2d_msaa_depth_t image);
+int __ovld __cnfn get_image_width(read_only image2d_array_msaa_t image);
+int __ovld __cnfn get_image_width(read_only image2d_array_msaa_depth_t image);
+#endif //cl_khr_gl_msaa_sharing
+
+int __ovld __cnfn get_image_width(write_only image1d_t image);
+int __ovld __cnfn get_image_width(write_only image1d_buffer_t image);
+int __ovld __cnfn get_image_width(write_only image2d_t image);
+int __ovld __cnfn get_image_width(write_only image3d_t image);
+int __ovld __cnfn get_image_width(write_only image1d_array_t image);
+int __ovld __cnfn get_image_width(write_only image2d_array_t image);
+#ifdef cl_khr_depth_images
+int __ovld __cnfn get_image_width(write_only image2d_depth_t image);
+int __ovld __cnfn get_image_width(write_only image2d_array_depth_t image);
+#endif //cl_khr_depth_images
+#if defined(cl_khr_gl_msaa_sharing)
+int __ovld __cnfn get_image_width(write_only image2d_msaa_t image);
+int __ovld __cnfn get_image_width(write_only image2d_msaa_depth_t image);
+int __ovld __cnfn get_image_width(write_only image2d_array_msaa_t image);
+int __ovld __cnfn get_image_width(write_only image2d_array_msaa_depth_t image);
+#endif //cl_khr_gl_msaa_sharing
+
+#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0
+int __ovld __cnfn get_image_width(read_write image1d_t image);
+int __ovld __cnfn get_image_width(read_write image1d_buffer_t image);
+int __ovld __cnfn get_image_width(read_write image2d_t image);
+int __ovld __cnfn get_image_width(read_write image3d_t image);
+int __ovld __cnfn get_image_width(read_write image1d_array_t image);
+int __ovld __cnfn get_image_width(read_write image2d_array_t image);
+#ifdef cl_khr_depth_images
+int __ovld __cnfn get_image_width(read_write image2d_depth_t image);
+int __ovld __cnfn get_image_width(read_write image2d_array_depth_t image);
+#endif //cl_khr_depth_images
+#if defined(cl_khr_gl_msaa_sharing)
+int __ovld __cnfn get_image_width(read_write image2d_msaa_t image);
+int __ovld __cnfn get_image_width(read_write image2d_msaa_depth_t image);
+int __ovld __cnfn get_image_width(read_write image2d_array_msaa_t image);
+int __ovld __cnfn get_image_width(read_write image2d_array_msaa_depth_t image);
+#endif //cl_khr_gl_msaa_sharing
+#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0
+
+/**
+ * Return the image height in pixels.
+ */
+int __ovld __cnfn get_image_height(read_only image2d_t image);
+int __ovld __cnfn get_image_height(read_only image3d_t image);
+int __ovld __cnfn get_image_height(read_only image2d_array_t image);
+#ifdef cl_khr_depth_images
+int __ovld __cnfn get_image_height(read_only image2d_depth_t image);
+int __ovld __cnfn get_image_height(read_only image2d_array_depth_t image);
+#endif //cl_khr_depth_images
+#if defined(cl_khr_gl_msaa_sharing)
+int __ovld __cnfn get_image_height(read_only image2d_msaa_t image);
+int __ovld __cnfn get_image_height(read_only image2d_msaa_depth_t image);
+int __ovld __cnfn get_image_height(read_only image2d_array_msaa_t image);
+int __ovld __cnfn get_image_height(read_only image2d_array_msaa_depth_t image);
+#endif //cl_khr_gl_msaa_sharing
+
+int __ovld __cnfn get_image_height(write_only image2d_t image);
+int __ovld __cnfn get_image_height(write_only image3d_t image);
+int __ovld __cnfn get_image_height(write_only image2d_array_t image);
+#ifdef cl_khr_depth_images
+int __ovld __cnfn get_image_height(write_only image2d_depth_t image);
+int __ovld __cnfn get_image_height(write_only image2d_array_depth_t image);
+#endif //cl_khr_depth_images
+#if defined(cl_khr_gl_msaa_sharing)
+int __ovld __cnfn get_image_height(write_only image2d_msaa_t image);
+int __ovld __cnfn get_image_height(write_only image2d_msaa_depth_t image);
+int __ovld __cnfn get_image_height(write_only image2d_array_msaa_t image);
+int __ovld __cnfn get_image_height(write_only image2d_array_msaa_depth_t image);
+#endif //cl_khr_gl_msaa_sharing
+
+#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0
+int __ovld __cnfn get_image_height(read_write image2d_t image);
+int __ovld __cnfn get_image_height(read_write image3d_t image);
+int __ovld __cnfn get_image_height(read_write image2d_array_t image);
+#ifdef cl_khr_depth_images
+int __ovld __cnfn get_image_height(read_write image2d_depth_t image);
+int __ovld __cnfn get_image_height(read_write image2d_array_depth_t image);
+#endif //cl_khr_depth_images
+#if defined(cl_khr_gl_msaa_sharing)
+int __ovld __cnfn get_image_height(read_write image2d_msaa_t image);
+int __ovld __cnfn get_image_height(read_write image2d_msaa_depth_t image);
+int __ovld __cnfn get_image_height(read_write image2d_array_msaa_t image);
+int __ovld __cnfn get_image_height(read_write image2d_array_msaa_depth_t image);
+#endif //cl_khr_gl_msaa_sharing
+#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0
+
+/**
+ * Return the image depth in pixels.
+ */
+int __ovld __cnfn get_image_depth(read_only image3d_t image);
+
+int __ovld __cnfn get_image_depth(write_only image3d_t image);
+
+#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0
+int __ovld __cnfn get_image_depth(read_write image3d_t image);
+#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0
+
+// OpenCL Extension v2.0 s9.18 - Mipmaps
+#ifdef cl_khr_mipmap_image
+/**
+ * Return the image miplevels.
+ */
+
+int __ovld get_image_num_mip_levels(read_only image1d_t image);
+int __ovld get_image_num_mip_levels(read_only image2d_t image);
+int __ovld get_image_num_mip_levels(read_only image3d_t image);
+
+int __ovld get_image_num_mip_levels(write_only image1d_t image);
+int __ovld get_image_num_mip_levels(write_only image2d_t image);
+int __ovld get_image_num_mip_levels(write_only image3d_t image);
+
+#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0
+int __ovld get_image_num_mip_levels(read_write image1d_t image);
+int __ovld get_image_num_mip_levels(read_write image2d_t image);
+int __ovld get_image_num_mip_levels(read_write image3d_t image);
+#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0
+
+int __ovld get_image_num_mip_levels(read_only image1d_array_t image);
+int __ovld get_image_num_mip_levels(read_only image2d_array_t image);
+int __ovld get_image_num_mip_levels(read_only image2d_array_depth_t image);
+int __ovld get_image_num_mip_levels(read_only image2d_depth_t image);
+
+int __ovld get_image_num_mip_levels(write_only image1d_array_t image);
+int __ovld get_image_num_mip_levels(write_only image2d_array_t image);
+int __ovld get_image_num_mip_levels(write_only image2d_array_depth_t image);
+int __ovld get_image_num_mip_levels(write_only image2d_depth_t image);
+
+#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0
+int __ovld get_image_num_mip_levels(read_write image1d_array_t image);
+int __ovld get_image_num_mip_levels(read_write image2d_array_t image);
+int __ovld get_image_num_mip_levels(read_write image2d_array_depth_t image);
+int __ovld get_image_num_mip_levels(read_write image2d_depth_t image);
+#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0
+
+#endif //cl_khr_mipmap_image
+
+/**
+ * Return the channel data type. Valid values are:
+ * CLK_SNORM_INT8
+ * CLK_SNORM_INT16
+ * CLK_UNORM_INT8
+ * CLK_UNORM_INT16
+ * CLK_UNORM_SHORT_565
+ * CLK_UNORM_SHORT_555
+ * CLK_UNORM_SHORT_101010
+ * CLK_SIGNED_INT8
+ * CLK_SIGNED_INT16
+ * CLK_SIGNED_INT32
+ * CLK_UNSIGNED_INT8
+ * CLK_UNSIGNED_INT16
+ * CLK_UNSIGNED_INT32
+ * CLK_HALF_FLOAT
+ * CLK_FLOAT
+ */
+
+//
+// Channel Datatype.
+//
+#define CLK_SNORM_INT8        0x10D0
+#define CLK_SNORM_INT16       0x10D1
+#define CLK_UNORM_INT8        0x10D2
+#define CLK_UNORM_INT16       0x10D3
+#define CLK_UNORM_SHORT_565   0x10D4
+#define CLK_UNORM_SHORT_555   0x10D5
+#define CLK_UNORM_INT_101010  0x10D6
+#define CLK_SIGNED_INT8       0x10D7
+#define CLK_SIGNED_INT16      0x10D8
+#define CLK_SIGNED_INT32      0x10D9
+#define CLK_UNSIGNED_INT8     0x10DA
+#define CLK_UNSIGNED_INT16    0x10DB
+#define CLK_UNSIGNED_INT32    0x10DC
+#define CLK_HALF_FLOAT        0x10DD
+#define CLK_FLOAT             0x10DE
+#define CLK_UNORM_INT24       0x10DF
+
+int __ovld __cnfn get_image_channel_data_type(read_only image1d_t image);
+int __ovld __cnfn get_image_channel_data_type(read_only image1d_buffer_t image);
+int __ovld __cnfn get_image_channel_data_type(read_only image2d_t image);
+int __ovld __cnfn get_image_channel_data_type(read_only image3d_t image);
+int __ovld __cnfn get_image_channel_data_type(read_only image1d_array_t image);
+int __ovld __cnfn get_image_channel_data_type(read_only image2d_array_t image);
+#ifdef cl_khr_depth_images
+int __ovld __cnfn get_image_channel_data_type(read_only image2d_depth_t image);
+int __ovld __cnfn get_image_channel_data_type(read_only image2d_array_depth_t image);
+#endif //cl_khr_depth_images
+#if defined(cl_khr_gl_msaa_sharing)
+int __ovld __cnfn get_image_channel_data_type(read_only image2d_msaa_t image);
+int __ovld __cnfn get_image_channel_data_type(read_only image2d_msaa_depth_t image);
+int __ovld __cnfn get_image_channel_data_type(read_only image2d_array_msaa_t image);
+int __ovld __cnfn get_image_channel_data_type(read_only image2d_array_msaa_depth_t image);
+#endif //cl_khr_gl_msaa_sharing
+
+int __ovld __cnfn get_image_channel_data_type(write_only image1d_t image);
+int __ovld __cnfn get_image_channel_data_type(write_only image1d_buffer_t image);
+int __ovld __cnfn get_image_channel_data_type(write_only image2d_t image);
+int __ovld __cnfn get_image_channel_data_type(write_only image3d_t image);
+int __ovld __cnfn get_image_channel_data_type(write_only image1d_array_t image);
+int __ovld __cnfn get_image_channel_data_type(write_only image2d_array_t image);
+#ifdef cl_khr_depth_images
+int __ovld __cnfn get_image_channel_data_type(write_only image2d_depth_t image);
+int __ovld __cnfn get_image_channel_data_type(write_only image2d_array_depth_t image);
+#endif //cl_khr_depth_images
+#if defined(cl_khr_gl_msaa_sharing)
+int __ovld __cnfn get_image_channel_data_type(write_only image2d_msaa_t image);
+int __ovld __cnfn get_image_channel_data_type(write_only image2d_msaa_depth_t image);
+int __ovld __cnfn get_image_channel_data_type(write_only image2d_array_msaa_t image);
+int __ovld __cnfn get_image_channel_data_type(write_only image2d_array_msaa_depth_t image);
+#endif //cl_khr_gl_msaa_sharing
+
+#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0
+int __ovld __cnfn get_image_channel_data_type(read_write image1d_t image);
+int __ovld __cnfn get_image_channel_data_type(read_write image1d_buffer_t image);
+int __ovld __cnfn get_image_channel_data_type(read_write image2d_t image);
+int __ovld __cnfn get_image_channel_data_type(read_write image3d_t image);
+int __ovld __cnfn get_image_channel_data_type(read_write image1d_array_t image);
+int __ovld __cnfn get_image_channel_data_type(read_write image2d_array_t image);
+#ifdef cl_khr_depth_images
+int __ovld __cnfn get_image_channel_data_type(read_write image2d_depth_t image);
+int __ovld __cnfn get_image_channel_data_type(read_write image2d_array_depth_t image);
+#endif //cl_khr_depth_images
+#if defined(cl_khr_gl_msaa_sharing)
+int __ovld __cnfn get_image_channel_data_type(read_write image2d_msaa_t image);
+int __ovld __cnfn get_image_channel_data_type(read_write image2d_msaa_depth_t image);
+int __ovld __cnfn get_image_channel_data_type(read_write image2d_array_msaa_t image);
+int __ovld __cnfn get_image_channel_data_type(read_write image2d_array_msaa_depth_t image);
+#endif //cl_khr_gl_msaa_sharing
+#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0
+
+/**
+ * Return the image channel order. Valid values are:
+ * CLK_A
+ * CLK_R
+ * CLK_Rx
+ * CLK_RG
+ * CLK_RGx
+ * CLK_RA
+ * CLK_RGB
+ * CLK_RGBx
+ * CLK_RGBA
+ * CLK_ARGB
+ * CLK_BGRA
+ * CLK_INTENSITY
+ * CLK_LUMINANCE
+ */
+// Channel order, numbering must be aligned with cl_channel_order in cl.h
+//
+#define CLK_R         0x10B0
+#define CLK_A         0x10B1
+#define CLK_RG        0x10B2
+#define CLK_RA        0x10B3
+#define CLK_RGB       0x10B4
+#define CLK_RGBA      0x10B5
+#define CLK_BGRA      0x10B6
+#define CLK_ARGB      0x10B7
+#define CLK_INTENSITY 0x10B8
+#define CLK_LUMINANCE 0x10B9
+#define CLK_Rx                0x10BA
+#define CLK_RGx               0x10BB
+#define CLK_RGBx              0x10BC
+#define CLK_DEPTH             0x10BD
+#define CLK_DEPTH_STENCIL     0x10BE
+#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0
+#define CLK_sRGB              0x10BF
+#define CLK_sRGBA             0x10C1
+#define CLK_sRGBx             0x10C0
+#define CLK_sBGRA             0x10C2
+#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0
+
+int __ovld __cnfn get_image_channel_order(read_only image1d_t image);
+int __ovld __cnfn get_image_channel_order(read_only image1d_buffer_t image);
+int __ovld __cnfn get_image_channel_order(read_only image2d_t image);
+int __ovld __cnfn get_image_channel_order(read_only image3d_t image);
+int __ovld __cnfn get_image_channel_order(read_only image1d_array_t image);
+int __ovld __cnfn get_image_channel_order(read_only image2d_array_t image);
+#ifdef cl_khr_depth_images
+int __ovld __cnfn get_image_channel_order(read_only image2d_depth_t image);
+int __ovld __cnfn get_image_channel_order(read_only image2d_array_depth_t image);
+#endif //cl_khr_depth_images
+#if defined(cl_khr_gl_msaa_sharing)
+int __ovld __cnfn get_image_channel_order(read_only image2d_msaa_t image);
+int __ovld __cnfn get_image_channel_order(read_only image2d_msaa_depth_t image);
+int __ovld __cnfn get_image_channel_order(read_only image2d_array_msaa_t image);
+int __ovld __cnfn get_image_channel_order(read_only image2d_array_msaa_depth_t image);
+#endif //cl_khr_gl_msaa_sharing
+
+int __ovld __cnfn get_image_channel_order(write_only image1d_t image);
+int __ovld __cnfn get_image_channel_order(write_only image1d_buffer_t image);
+int __ovld __cnfn get_image_channel_order(write_only image2d_t image);
+int __ovld __cnfn get_image_channel_order(write_only image3d_t image);
+int __ovld __cnfn get_image_channel_order(write_only image1d_array_t image);
+int __ovld __cnfn get_image_channel_order(write_only image2d_array_t image);
+#ifdef cl_khr_depth_images
+int __ovld __cnfn get_image_channel_order(write_only image2d_depth_t image);
+int __ovld __cnfn get_image_channel_order(write_only image2d_array_depth_t image);
+#endif //cl_khr_depth_images
+#if defined(cl_khr_gl_msaa_sharing)
+int __ovld __cnfn get_image_channel_order(write_only image2d_msaa_t image);
+int __ovld __cnfn get_image_channel_order(write_only image2d_msaa_depth_t image);
+int __ovld __cnfn get_image_channel_order(write_only image2d_array_msaa_t image);
+int __ovld __cnfn get_image_channel_order(write_only image2d_array_msaa_depth_t image);
+#endif //cl_khr_gl_msaa_sharing
+
+#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0
+int __ovld __cnfn get_image_channel_order(read_write image1d_t image);
+int __ovld __cnfn get_image_channel_order(read_write image1d_buffer_t image);
+int __ovld __cnfn get_image_channel_order(read_write image2d_t image);
+int __ovld __cnfn get_image_channel_order(read_write image3d_t image);
+int __ovld __cnfn get_image_channel_order(read_write image1d_array_t image);
+int __ovld __cnfn get_image_channel_order(read_write image2d_array_t image);
+#ifdef cl_khr_depth_images
+int __ovld __cnfn get_image_channel_order(read_write image2d_depth_t image);
+int __ovld __cnfn get_image_channel_order(read_write image2d_array_depth_t image);
+#endif //cl_khr_depth_images
+#if defined(cl_khr_gl_msaa_sharing)
+int __ovld __cnfn get_image_channel_order(read_write image2d_msaa_t image);
+int __ovld __cnfn get_image_channel_order(read_write image2d_msaa_depth_t image);
+int __ovld __cnfn get_image_channel_order(read_write image2d_array_msaa_t image);
+int __ovld __cnfn get_image_channel_order(read_write image2d_array_msaa_depth_t image);
+#endif //cl_khr_gl_msaa_sharing
+#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0
+
+/**
+ * Return the 2D image width and height as an int2
+ * type. The width is returned in the x component, and
+ * the height in the y component.
+ */
+int2 __ovld __cnfn get_image_dim(read_only image2d_t image);
+int2 __ovld __cnfn get_image_dim(read_only image2d_array_t image);
+#ifdef cl_khr_depth_images
+int2 __ovld __cnfn get_image_dim(read_only image2d_array_depth_t image);
+int2 __ovld __cnfn get_image_dim(read_only image2d_depth_t image);
+#endif //cl_khr_depth_images
+#if defined(cl_khr_gl_msaa_sharing)
+int2 __ovld __cnfn get_image_dim(read_only image2d_msaa_t image);
+int2 __ovld __cnfn get_image_dim(read_only image2d_msaa_depth_t image);
+int2 __ovld __cnfn get_image_dim(read_only image2d_array_msaa_t image);
+int2 __ovld __cnfn get_image_dim(read_only image2d_array_msaa_depth_t image);
+#endif //cl_khr_gl_msaa_sharing
+
+int2 __ovld __cnfn get_image_dim(write_only image2d_t image);
+int2 __ovld __cnfn get_image_dim(write_only image2d_array_t image);
+#ifdef cl_khr_depth_images
+int2 __ovld __cnfn get_image_dim(write_only image2d_array_depth_t image);
+int2 __ovld __cnfn get_image_dim(write_only image2d_depth_t image);
+#endif //cl_khr_depth_images
+#if defined(cl_khr_gl_msaa_sharing)
+int2 __ovld __cnfn get_image_dim(write_only image2d_msaa_t image);
+int2 __ovld __cnfn get_image_dim(write_only image2d_msaa_depth_t image);
+int2 __ovld __cnfn get_image_dim(write_only image2d_array_msaa_t image);
+int2 __ovld __cnfn get_image_dim(write_only image2d_array_msaa_depth_t image);
+#endif //cl_khr_gl_msaa_sharing
+
+#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0
+int2 __ovld __cnfn get_image_dim(read_write image2d_t image);
+int2 __ovld __cnfn get_image_dim(read_write image2d_array_t image);
+#ifdef cl_khr_depth_images
+int2 __ovld __cnfn get_image_dim(read_write image2d_array_depth_t image);
+int2 __ovld __cnfn get_image_dim(read_write image2d_depth_t image);
+#endif //cl_khr_depth_images
+#if defined(cl_khr_gl_msaa_sharing)
+int2 __ovld __cnfn get_image_dim(read_write image2d_msaa_t image);
+int2 __ovld __cnfn get_image_dim(read_write image2d_msaa_depth_t image);
+int2 __ovld __cnfn get_image_dim(read_write image2d_array_msaa_t image);
+int2 __ovld __cnfn get_image_dim(read_write image2d_array_msaa_depth_t image);
+#endif //cl_khr_gl_msaa_sharing
+#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0
+
+/**
+ * Return the 3D image width, height, and depth as an
+ * int4 type. The width is returned in the x
+ * component, height in the y component, depth in the z
+ * component and the w component is 0.
+ */
+int4 __ovld __cnfn get_image_dim(read_only image3d_t image);
+int4 __ovld __cnfn get_image_dim(write_only image3d_t image);
+#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0
+int4 __ovld __cnfn get_image_dim(read_write image3d_t image);
+#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0
+
+/**
+ * Return the image array size.
+ */
+
+size_t __ovld __cnfn get_image_array_size(read_only image1d_array_t image_array);
+size_t __ovld __cnfn get_image_array_size(read_only image2d_array_t image_array);
+#ifdef cl_khr_depth_images
+size_t __ovld __cnfn get_image_array_size(read_only image2d_array_depth_t image_array);
+#endif //cl_khr_depth_images
+#if defined(cl_khr_gl_msaa_sharing)
+size_t __ovld __cnfn get_image_array_size(read_only image2d_array_msaa_t image_array);
+size_t __ovld __cnfn get_image_array_size(read_only image2d_array_msaa_depth_t image_array);
+#endif //cl_khr_gl_msaa_sharing
+
+size_t __ovld __cnfn get_image_array_size(write_only image1d_array_t image_array);
+size_t __ovld __cnfn get_image_array_size(write_only image2d_array_t image_array);
+#ifdef cl_khr_depth_images
+size_t __ovld __cnfn get_image_array_size(write_only image2d_array_depth_t image_array);
+#endif //cl_khr_depth_images
+#if defined(cl_khr_gl_msaa_sharing)
+size_t __ovld __cnfn get_image_array_size(write_only image2d_array_msaa_t image_array);
+size_t __ovld __cnfn get_image_array_size(write_only image2d_array_msaa_depth_t image_array);
+#endif //cl_khr_gl_msaa_sharing
+
+#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0
+size_t __ovld __cnfn get_image_array_size(read_write image1d_array_t image_array);
+size_t __ovld __cnfn get_image_array_size(read_write image2d_array_t image_array);
+#ifdef cl_khr_depth_images
+size_t __ovld __cnfn get_image_array_size(read_write image2d_array_depth_t image_array);
+#endif //cl_khr_depth_images
+#if defined(cl_khr_gl_msaa_sharing)
+size_t __ovld __cnfn get_image_array_size(read_write image2d_array_msaa_t image_array);
+size_t __ovld __cnfn get_image_array_size(read_write image2d_array_msaa_depth_t image_array);
+#endif //cl_khr_gl_msaa_sharing
+#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0
+
+/**
+* Return the number of samples associated with image
+*/
+#if defined(cl_khr_gl_msaa_sharing)
+int __ovld get_image_num_samples(read_only image2d_msaa_t image);
+int __ovld get_image_num_samples(read_only image2d_msaa_depth_t image);
+int __ovld get_image_num_samples(read_only image2d_array_msaa_depth_t image);
+int __ovld get_image_num_samples(read_only image2d_array_msaa_t image);
+int __ovld get_image_num_samples(read_only image2d_array_msaa_depth_t image);
+
+int __ovld get_image_num_samples(write_only image2d_msaa_t image);
+int __ovld get_image_num_samples(write_only image2d_msaa_depth_t image);
+int __ovld get_image_num_samples(write_only image2d_array_msaa_depth_t image);
+int __ovld get_image_num_samples(write_only image2d_array_msaa_t image);
+int __ovld get_image_num_samples(write_only image2d_array_msaa_depth_t image);
+
+#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0
+int __ovld get_image_num_samples(read_write image2d_msaa_t image);
+int __ovld get_image_num_samples(read_write image2d_msaa_depth_t image);
+int __ovld get_image_num_samples(read_write image2d_array_msaa_depth_t image);
+int __ovld get_image_num_samples(read_write image2d_array_msaa_t image);
+int __ovld get_image_num_samples(read_write image2d_array_msaa_depth_t image);
+#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0
+#endif
+
+// OpenCL v2.0 s6.13.15 - Work-group Functions
+
+#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0
+int __ovld work_group_all(int predicate);
+int __ovld work_group_any(int predicate);
+
+#ifdef cl_khr_fp16
+half __ovld work_group_broadcast(half a, size_t local_id);
+half __ovld work_group_broadcast(half a, size_t x, size_t y);
+half __ovld work_group_broadcast(half a, size_t x, size_t y, size_t z);
+#endif
+int __ovld work_group_broadcast(int a, size_t local_id);
+int __ovld work_group_broadcast(int a, size_t x, size_t y);
+int __ovld work_group_broadcast(int a, size_t x, size_t y, size_t z);
+uint __ovld work_group_broadcast(uint a, size_t local_id);
+uint __ovld work_group_broadcast(uint a, size_t x, size_t y);
+uint __ovld work_group_broadcast(uint a, size_t x, size_t y, size_t z);
+long __ovld work_group_broadcast(long a, size_t local_id);
+long __ovld work_group_broadcast(long a, size_t x, size_t y);
+long __ovld work_group_broadcast(long a, size_t x, size_t y, size_t z);
+ulong __ovld work_group_broadcast(ulong a, size_t local_id);
+ulong __ovld work_group_broadcast(ulong a, size_t x, size_t y);
+ulong __ovld work_group_broadcast(ulong a, size_t x, size_t y, size_t z);
+float __ovld work_group_broadcast(float a, size_t local_id);
+float __ovld work_group_broadcast(float a, size_t x, size_t y);
+float __ovld work_group_broadcast(float a, size_t x, size_t y, size_t z);
+#ifdef cl_khr_fp64
+double __ovld work_group_broadcast(double a, size_t local_id);
+double __ovld work_group_broadcast(double a, size_t x, size_t y);
+double __ovld work_group_broadcast(double a, size_t x, size_t y, size_t z);
+#endif //cl_khr_fp64
+
+#ifdef cl_khr_fp16
+half __ovld work_group_reduce_add(half x);
+half __ovld work_group_reduce_min(half x);
+half __ovld work_group_reduce_max(half x);
+half __ovld work_group_scan_exclusive_add(half x);
+half __ovld work_group_scan_exclusive_min(half x);
+half __ovld work_group_scan_exclusive_max(half x);
+half __ovld work_group_scan_inclusive_add(half x);
+half __ovld work_group_scan_inclusive_min(half x);
+half __ovld work_group_scan_inclusive_max(half x);
+#endif
+int __ovld work_group_reduce_add(int x);
+int __ovld work_group_reduce_min(int x);
+int __ovld work_group_reduce_max(int x);
+int __ovld work_group_scan_exclusive_add(int x);
+int __ovld work_group_scan_exclusive_min(int x);
+int __ovld work_group_scan_exclusive_max(int x);
+int __ovld work_group_scan_inclusive_add(int x);
+int __ovld work_group_scan_inclusive_min(int x);
+int __ovld work_group_scan_inclusive_max(int x);
+uint __ovld work_group_reduce_add(uint x);
+uint __ovld work_group_reduce_min(uint x);
+uint __ovld work_group_reduce_max(uint x);
+uint __ovld work_group_scan_exclusive_add(uint x);
+uint __ovld work_group_scan_exclusive_min(uint x);
+uint __ovld work_group_scan_exclusive_max(uint x);
+uint __ovld work_group_scan_inclusive_add(uint x);
+uint __ovld work_group_scan_inclusive_min(uint x);
+uint __ovld work_group_scan_inclusive_max(uint x);
+long __ovld work_group_reduce_add(long x);
+long __ovld work_group_reduce_min(long x);
+long __ovld work_group_reduce_max(long x);
+long __ovld work_group_scan_exclusive_add(long x);
+long __ovld work_group_scan_exclusive_min(long x);
+long __ovld work_group_scan_exclusive_max(long x);
+long __ovld work_group_scan_inclusive_add(long x);
+long __ovld work_group_scan_inclusive_min(long x);
+long __ovld work_group_scan_inclusive_max(long x);
+ulong __ovld work_group_reduce_add(ulong x);
+ulong __ovld work_group_reduce_min(ulong x);
+ulong __ovld work_group_reduce_max(ulong x);
+ulong __ovld work_group_scan_exclusive_add(ulong x);
+ulong __ovld work_group_scan_exclusive_min(ulong x);
+ulong __ovld work_group_scan_exclusive_max(ulong x);
+ulong __ovld work_group_scan_inclusive_add(ulong x);
+ulong __ovld work_group_scan_inclusive_min(ulong x);
+ulong __ovld work_group_scan_inclusive_max(ulong x);
+float __ovld work_group_reduce_add(float x);
+float __ovld work_group_reduce_min(float x);
+float __ovld work_group_reduce_max(float x);
+float __ovld work_group_scan_exclusive_add(float x);
+float __ovld work_group_scan_exclusive_min(float x);
+float __ovld work_group_scan_exclusive_max(float x);
+float __ovld work_group_scan_inclusive_add(float x);
+float __ovld work_group_scan_inclusive_min(float x);
+float __ovld work_group_scan_inclusive_max(float x);
+#ifdef cl_khr_fp64
+double __ovld work_group_reduce_add(double x);
+double __ovld work_group_reduce_min(double x);
+double __ovld work_group_reduce_max(double x);
+double __ovld work_group_scan_exclusive_add(double x);
+double __ovld work_group_scan_exclusive_min(double x);
+double __ovld work_group_scan_exclusive_max(double x);
+double __ovld work_group_scan_inclusive_add(double x);
+double __ovld work_group_scan_inclusive_min(double x);
+double __ovld work_group_scan_inclusive_max(double x);
+#endif //cl_khr_fp64
+
+#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0
+
+// OpenCL v2.0 s6.13.16 - Pipe Functions
+#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0
+#define PIPE_RESERVE_ID_VALID_BIT (1U << 30)
+#define CLK_NULL_RESERVE_ID (__builtin_astype(((void*)(__SIZE_MAX__)), reserve_id_t))
+bool __ovld is_valid_reserve_id(reserve_id_t reserve_id);
+#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0
+
+
+// OpenCL v2.0 s6.13.17 - Enqueue Kernels
+#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0
+
+#define CL_COMPLETE                                 0x0
+#define CL_RUNNING                                  0x1
+#define CL_SUBMITTED                                0x2
+#define CL_QUEUED                                   0x3
+
+#define CLK_SUCCESS                                 0
+#define CLK_ENQUEUE_FAILURE                         -101
+#define CLK_INVALID_QUEUE                           -102
+#define CLK_INVALID_NDRANGE                         -160
+#define CLK_INVALID_EVENT_WAIT_LIST                 -57
+#define CLK_DEVICE_QUEUE_FULL                       -161
+#define CLK_INVALID_ARG_SIZE                        -51
+#define CLK_EVENT_ALLOCATION_FAILURE                -100
+#define CLK_OUT_OF_RESOURCES                        -5
+
+#define CLK_NULL_QUEUE                              0
+#define CLK_NULL_EVENT (__builtin_astype(((void*)(__SIZE_MAX__)), clk_event_t))
+
+// execution model related definitions
+#define CLK_ENQUEUE_FLAGS_NO_WAIT                   0x0
+#define CLK_ENQUEUE_FLAGS_WAIT_KERNEL               0x1
+#define CLK_ENQUEUE_FLAGS_WAIT_WORK_GROUP           0x2
+
+typedef int kernel_enqueue_flags_t;
+typedef int clk_profiling_info;
+
+// Profiling info name (see capture_event_profiling_info)
+#define CLK_PROFILING_COMMAND_EXEC_TIME 0x1
+
+#define MAX_WORK_DIM        3
+
+// ToDo: Remove definition of ndrange_t in Clang as an opaque type and add back
+// the following ndrange_t definition.
+#if 0
+typedef struct {
+    unsigned int workDimension;
+    size_t globalWorkOffset[MAX_WORK_DIM];
+    size_t globalWorkSize[MAX_WORK_DIM];
+    size_t localWorkSize[MAX_WORK_DIM];
+} ndrange_t;
+#endif
+
+ndrange_t __ovld ndrange_1D(size_t);
+ndrange_t __ovld ndrange_1D(size_t, size_t);
+ndrange_t __ovld ndrange_1D(size_t, size_t, size_t);
+
+ndrange_t __ovld ndrange_2D(const size_t[2]);
+ndrange_t __ovld ndrange_2D(const size_t[2], const size_t[2]);
+ndrange_t __ovld ndrange_2D(const size_t[2], const size_t[2], const size_t[2]);
+
+ndrange_t __ovld ndrange_3D(const size_t[3]);
+ndrange_t __ovld ndrange_3D(const size_t[3], const size_t[3]);
+ndrange_t __ovld ndrange_3D(const size_t[3], const size_t[3], const size_t[3]);
+
+int __ovld enqueue_marker(queue_t, uint, const __private clk_event_t*, __private clk_event_t*);
+
+void __ovld retain_event(clk_event_t);
+
+void __ovld release_event(clk_event_t);
+
+clk_event_t create_user_event(void);
+
+void __ovld set_user_event_status(clk_event_t e, int state);
+
+bool is_valid_event (clk_event_t event);
+
+void __ovld capture_event_profiling_info(clk_event_t, clk_profiling_info, __global void* value);
+
+queue_t __ovld get_default_queue(void);
+#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0
+
+// OpenCL Extension v2.0 s9.17 - Sub-groups
+
+#if defined(cl_intel_subgroups) || defined(cl_khr_subgroups)
+// Shared Sub Group Functions
+uint    __ovld get_sub_group_size(void);
+uint    __ovld get_max_sub_group_size(void);
+uint    __ovld get_num_sub_groups(void);
+#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0
+uint    __ovld get_enqueued_num_sub_groups(void);
+#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0
+uint    __ovld get_sub_group_id(void);
+uint    __ovld get_sub_group_local_id(void);
+
+void    __ovld sub_group_barrier(cl_mem_fence_flags flags);
+#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0
+void    __ovld sub_group_barrier(cl_mem_fence_flags flags, memory_scope scope);
+#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0
+
+int     __ovld sub_group_all(int predicate);
+int     __ovld sub_group_any(int predicate);
+
+int     __ovld sub_group_broadcast(int   x, uint sub_group_local_id);
+uint    __ovld sub_group_broadcast(uint  x, uint sub_group_local_id);
+long    __ovld sub_group_broadcast(long  x, uint sub_group_local_id);
+ulong   __ovld sub_group_broadcast(ulong x, uint sub_group_local_id);
+float   __ovld sub_group_broadcast(float x, uint sub_group_local_id);
+
+int     __ovld sub_group_reduce_add(int   x);
+uint    __ovld sub_group_reduce_add(uint  x);
+long    __ovld sub_group_reduce_add(long  x);
+ulong   __ovld sub_group_reduce_add(ulong x);
+float   __ovld sub_group_reduce_add(float x);
+int     __ovld sub_group_reduce_min(int   x);
+uint    __ovld sub_group_reduce_min(uint  x);
+long    __ovld sub_group_reduce_min(long  x);
+ulong   __ovld sub_group_reduce_min(ulong x);
+float   __ovld sub_group_reduce_min(float x);
+int     __ovld sub_group_reduce_max(int   x);
+uint    __ovld sub_group_reduce_max(uint  x);
+long    __ovld sub_group_reduce_max(long  x);
+ulong   __ovld sub_group_reduce_max(ulong x);
+float   __ovld sub_group_reduce_max(float x);
+
+int     __ovld sub_group_scan_exclusive_add(int   x);
+uint    __ovld sub_group_scan_exclusive_add(uint  x);
+long    __ovld sub_group_scan_exclusive_add(long  x);
+ulong   __ovld sub_group_scan_exclusive_add(ulong x);
+float   __ovld sub_group_scan_exclusive_add(float x);
+int     __ovld sub_group_scan_exclusive_min(int   x);
+uint    __ovld sub_group_scan_exclusive_min(uint  x);
+long    __ovld sub_group_scan_exclusive_min(long  x);
+ulong   __ovld sub_group_scan_exclusive_min(ulong x);
+float   __ovld sub_group_scan_exclusive_min(float x);
+int     __ovld sub_group_scan_exclusive_max(int   x);
+uint    __ovld sub_group_scan_exclusive_max(uint  x);
+long    __ovld sub_group_scan_exclusive_max(long  x);
+ulong   __ovld sub_group_scan_exclusive_max(ulong x);
+float   __ovld sub_group_scan_exclusive_max(float x);
+
+int     __ovld sub_group_scan_inclusive_add(int   x);
+uint    __ovld sub_group_scan_inclusive_add(uint  x);
+long    __ovld sub_group_scan_inclusive_add(long  x);
+ulong   __ovld sub_group_scan_inclusive_add(ulong x);
+float   __ovld sub_group_scan_inclusive_add(float x);
+int     __ovld sub_group_scan_inclusive_min(int   x);
+uint    __ovld sub_group_scan_inclusive_min(uint  x);
+long    __ovld sub_group_scan_inclusive_min(long  x);
+ulong   __ovld sub_group_scan_inclusive_min(ulong x);
+float   __ovld sub_group_scan_inclusive_min(float x);
+int     __ovld sub_group_scan_inclusive_max(int   x);
+uint    __ovld sub_group_scan_inclusive_max(uint  x);
+long    __ovld sub_group_scan_inclusive_max(long  x);
+ulong   __ovld sub_group_scan_inclusive_max(ulong x);
+float   __ovld sub_group_scan_inclusive_max(float x);
+
+#ifdef cl_khr_fp16
+half    __ovld sub_group_broadcast(half x, uint sub_group_local_id);
+half    __ovld sub_group_reduce_add(half x);
+half    __ovld sub_group_reduce_min(half x);
+half    __ovld sub_group_reduce_max(half x);
+half    __ovld sub_group_scan_exclusive_add(half x);
+half    __ovld sub_group_scan_exclusive_min(half x);
+half    __ovld sub_group_scan_exclusive_max(half x);
+half    __ovld sub_group_scan_inclusive_add(half x);
+half    __ovld sub_group_scan_inclusive_min(half x);
+half    __ovld sub_group_scan_inclusive_max(half x);
+#endif //cl_khr_fp16
+
+#ifdef cl_khr_fp64
+double  __ovld sub_group_broadcast(double x, uint sub_group_local_id);
+double  __ovld sub_group_reduce_add(double x);
+double  __ovld sub_group_reduce_min(double x);
+double  __ovld sub_group_reduce_max(double x);
+double  __ovld sub_group_scan_exclusive_add(double x);
+double  __ovld sub_group_scan_exclusive_min(double x);
+double  __ovld sub_group_scan_exclusive_max(double x);
+double  __ovld sub_group_scan_inclusive_add(double x);
+double  __ovld sub_group_scan_inclusive_min(double x);
+double  __ovld sub_group_scan_inclusive_max(double x);
+#endif //cl_khr_fp64
+
+#endif //cl_khr_subgroups cl_intel_subgroups
+
+// Disable any extensions we may have enabled previously.
+#pragma OPENCL EXTENSION all : disable
+
+#undef __cnfn
+#undef __ovld
+#endif //_OPENCL_H_
diff --git a/contrib/llvm/tools/clang/lib/Headers/pkuintrin.h b/contrib/llvm/tools/clang/lib/Headers/pkuintrin.h
index ad123481cf1e..9e5459450b62 100644
--- a/contrib/llvm/tools/clang/lib/Headers/pkuintrin.h
+++ b/contrib/llvm/tools/clang/lib/Headers/pkuintrin.h
@@ -38,9 +38,9 @@ _rdpkru_u32(void)
 }
 
 static __inline__ void __DEFAULT_FN_ATTRS
-_wrpkru(unsigned int val)
+_wrpkru(unsigned int __val)
 {
-  return __builtin_ia32_wrpkru(val);
+  return __builtin_ia32_wrpkru(__val);
 }
 
 #undef __DEFAULT_FN_ATTRS
diff --git a/contrib/llvm/tools/clang/lib/Headers/pmmintrin.h b/contrib/llvm/tools/clang/lib/Headers/pmmintrin.h
index 0ff940912483..5b1058069c44 100644
--- a/contrib/llvm/tools/clang/lib/Headers/pmmintrin.h
+++ b/contrib/llvm/tools/clang/lib/Headers/pmmintrin.h
@@ -27,68 +27,235 @@
 #include <emmintrin.h>
 
 /* Define the default attributes for the functions in this file. */
-#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sse3")))
+#define __DEFAULT_FN_ATTRS \
+  __attribute__((__always_inline__, __nodebug__, __target__("sse3")))
 
+/// \brief Loads data from an unaligned memory location to elements in a 128-bit
+///    vector. If the address of the data is not 16-byte aligned, the
+///    instruction may read two adjacent aligned blocks of memory to retrieve
+///    the requested data.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VLDDQU instruction.
+///
+/// \param __p
+///    A pointer to a 128-bit integer vector containing integer values.
+/// \returns A 128-bit vector containing the moved values.
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_lddqu_si128(__m128i const *__p)
 {
   return (__m128i)__builtin_ia32_lddqu((char const *)__p);
 }
 
+/// \brief Adds the even-indexed values and subtracts the odd-indexed values of
+///    two 128-bit vectors of [4 x float].
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VADDSUBPS instruction.
+///
+/// \param __a
+///    A 128-bit vector of [4 x float] containing the left source operand.
+/// \param __b
+///    A 128-bit vector of [4 x float] containing the right source operand.
+/// \returns A 128-bit vector of [4 x float] containing the alternating sums and
+///    differences of both operands.
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_addsub_ps(__m128 __a, __m128 __b)
 {
-  return __builtin_ia32_addsubps(__a, __b);
+  return __builtin_ia32_addsubps((__v4sf)__a, (__v4sf)__b);
 }
 
+/// \brief Horizontally adds the adjacent pairs of values contained in two
+///    128-bit vectors of [4 x float].
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VHADDPS instruction.
+///
+/// \param __a
+///    A 128-bit vector of [4 x float] containing one of the source operands.
+///    The horizontal sums of the values are stored in the lower bits of the
+///    destination.
+/// \param __b
+///    A 128-bit vector of [4 x float] containing one of the source operands.
+///    The horizontal sums of the values are stored in the upper bits of the
+///    destination.
+/// \returns A 128-bit vector of [4 x float] containing the horizontal sums of
+///    both operands.
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_hadd_ps(__m128 __a, __m128 __b)
 {
-  return __builtin_ia32_haddps(__a, __b);
+  return __builtin_ia32_haddps((__v4sf)__a, (__v4sf)__b);
 }
 
+/// \brief Horizontally subtracts the adjacent pairs of values contained in two
+///    128-bit vectors of [4 x float].
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VHSUBPS instruction.
+///
+/// \param __a
+///    A 128-bit vector of [4 x float] containing one of the source operands.
+///    The horizontal differences between the values are stored in the lower
+///    bits of the destination.
+/// \param __b
+///    A 128-bit vector of [4 x float] containing one of the source operands.
+///    The horizontal differences between the values are stored in the upper
+///    bits of the destination.
+/// \returns A 128-bit vector of [4 x float] containing the horizontal
+///    differences of both operands.
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_hsub_ps(__m128 __a, __m128 __b)
 {
-  return __builtin_ia32_hsubps(__a, __b);
+  return __builtin_ia32_hsubps((__v4sf)__a, (__v4sf)__b);
 }
 
+/// \brief Moves and duplicates high-order (odd-indexed) values from a 128-bit
+///    vector of [4 x float] to float values stored in a 128-bit vector of
+///    [4 x float].
+///    Bits [127:96] of the source are written to bits [127:96] and [95:64] of
+///    the destination.
+///    Bits [63:32] of the source are written to bits [63:32] and [31:0] of the
+///    destination.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VMOVSHDUP instruction.
+///
+/// \param __a
+///    A 128-bit vector of [4 x float].
+/// \returns A 128-bit vector of [4 x float] containing the moved and duplicated
+///    values.
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_movehdup_ps(__m128 __a)
 {
-  return __builtin_shufflevector(__a, __a, 1, 1, 3, 3);
+  return __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 1, 1, 3, 3);
 }
 
+/// \brief Duplicates low-order (even-indexed) values from a 128-bit
+///    vector of [4 x float] to float values stored in a 128-bit vector of
+///    [4 x float].
+///    Bits [95:64] of the source are written to bits [127:96] and [95:64] of
+///    the destination.
+///    Bits [31:0] of the source are written to bits [63:32] and [31:0] of the
+///    destination.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VMOVSLDUP instruction.
+///
+/// \param __a
+///    A 128-bit vector of [4 x float].
+/// \returns A 128-bit vector of [4 x float] containing the moved and duplicated
+///    values.
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_moveldup_ps(__m128 __a)
 {
-  return __builtin_shufflevector(__a, __a, 0, 0, 2, 2);
+  return __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 0, 0, 2, 2);
 }
 
+/// \brief Adds the even-indexed values and subtracts the odd-indexed values of
+///    two 128-bit vectors of [2 x double].
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VADDSUBPD instruction.
+///
+/// \param __a
+///    A 128-bit vector of [2 x double] containing the left source operand.
+/// \param __b
+///    A 128-bit vector of [2 x double] containing the right source operand.
+/// \returns A 128-bit vector of [2 x double] containing the alternating sums
+///    and differences of both operands.
 static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_addsub_pd(__m128d __a, __m128d __b)
 {
-  return __builtin_ia32_addsubpd(__a, __b);
+  return __builtin_ia32_addsubpd((__v2df)__a, (__v2df)__b);
 }
 
+/// \brief Horizontally adds the pairs of values contained in two 128-bit
+///    vectors of [2 x double].
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VHADDPD instruction.
+///
+/// \param __a
+///    A 128-bit vector of [2 x double] containing one of the source operands.
+///    The horizontal sum of the values is stored in the lower bits of the
+///    destination.
+/// \param __b
+///    A 128-bit vector of [2 x double] containing one of the source operands.
+///    The horizontal sum of the values is stored in the upper bits of the
+///    destination.
+/// \returns A 128-bit vector of [2 x double] containing the horizontal sums of
+///    both operands.
 static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_hadd_pd(__m128d __a, __m128d __b)
 {
-  return __builtin_ia32_haddpd(__a, __b);
+  return __builtin_ia32_haddpd((__v2df)__a, (__v2df)__b);
 }
 
+/// \brief Horizontally subtracts the pairs of values contained in two 128-bit
+///    vectors of [2 x double].
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VHSUBPD instruction.
+///
+/// \param __a
+///    A 128-bit vector of [2 x double] containing one of the source operands.
+///    The horizontal difference of the values is stored in the lower bits of
+///    the destination.
+/// \param __b
+///    A 128-bit vector of [2 x double] containing one of the source operands.
+///    The horizontal difference of the values is stored in the upper bits of
+///    the destination.
+/// \returns A 128-bit vector of [2 x double] containing the horizontal
+///    differences of both operands.
 static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_hsub_pd(__m128d __a, __m128d __b)
 {
-  return __builtin_ia32_hsubpd(__a, __b);
+  return __builtin_ia32_hsubpd((__v2df)__a, (__v2df)__b);
 }
 
+/// \brief Moves and duplicates one double-precision value to double-precision
+///    values stored in a 128-bit vector of [2 x double].
+///
+/// \headerfile <x86intrin.h>
+///
+/// \code
+/// __m128d _mm_loaddup_pd(double const * dp);
+/// \endcode
+///
+/// This intrinsic corresponds to the \c VMOVDDUP instruction.
+///
+/// \param dp
+///    A pointer to a double-precision value to be moved and duplicated.
+/// \returns A 128-bit vector of [2 x double] containing the moved and
+///    duplicated values.
 #define        _mm_loaddup_pd(dp)        _mm_load1_pd(dp)
 
+/// \brief Moves and duplicates the double-precision value in the lower bits of
+///    a 128-bit vector of [2 x double] to double-precision values stored in a
+///    128-bit vector of [2 x double].
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VMOVDDUP instruction.
+///
+/// \param __a
+///    A 128-bit vector of [2 x double]. Bits [63:0] are written to bits
+///    [127:64] and [63:0] of the destination.
+/// \returns A 128-bit vector of [2 x double] containing the moved and
+///    duplicated values.
 static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_movedup_pd(__m128d __a)
 {
-  return __builtin_shufflevector(__a, __a, 0, 0);
+  return __builtin_shufflevector((__v2df)__a, (__v2df)__a, 0, 0);
 }
 
 #define _MM_DENORMALS_ZERO_ON   (0x0040)
@@ -99,12 +266,40 @@ _mm_movedup_pd(__m128d __a)
 #define _MM_GET_DENORMALS_ZERO_MODE() (_mm_getcsr() & _MM_DENORMALS_ZERO_MASK)
 #define _MM_SET_DENORMALS_ZERO_MODE(x) (_mm_setcsr((_mm_getcsr() & ~_MM_DENORMALS_ZERO_MASK) | (x)))
 
+/// \brief Establishes a linear address memory range to be monitored and puts
+///    the processor in the monitor event pending state. Data stored in the
+///    monitored address range causes the processor to exit the pending state.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c MONITOR instruction.
+///
+/// \param __p
+///    The memory range to be monitored. The size of the range is determined by
+///    CPUID function 0000_0005h.
+/// \param __extensions
+///    Optional extensions for the monitoring state.
+/// \param __hints
+///    Optional hints for the monitoring state.
 static __inline__ void __DEFAULT_FN_ATTRS
 _mm_monitor(void const *__p, unsigned __extensions, unsigned __hints)
 {
   __builtin_ia32_monitor((void *)__p, __extensions, __hints);
 }
 
+/// \brief Used with the MONITOR instruction to wait while the processor is in
+///    the monitor event pending state. Data stored in the monitored address
+///    range causes the processor to exit the pending state.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c MWAIT instruction.
+///
+/// \param __extensions
+///    Optional extensions for the monitoring state, which may vary by
+///    processor.
+/// \param __hints
+///    Optional hints for the monitoring state, which may vary by processor.
 static __inline__ void __DEFAULT_FN_ATTRS
 _mm_mwait(unsigned __extensions, unsigned __hints)
 {
diff --git a/contrib/llvm/tools/clang/lib/Headers/popcntintrin.h b/contrib/llvm/tools/clang/lib/Headers/popcntintrin.h
index 6fcda65c7807..7e2f1670805f 100644
--- a/contrib/llvm/tools/clang/lib/Headers/popcntintrin.h
+++ b/contrib/llvm/tools/clang/lib/Headers/popcntintrin.h
@@ -27,12 +27,32 @@
 /* Define the default attributes for the functions in this file. */
 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("popcnt")))
 
+/// \brief Counts the number of bits in the source operand having a value of 1.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c POPCNT instruction.
+///
+/// \param __A
+///    An unsigned 32-bit integer operand.
+/// \returns A 32-bit integer containing the number of bits with value 1 in the
+///    source operand.
 static __inline__ int __DEFAULT_FN_ATTRS
 _mm_popcnt_u32(unsigned int __A)
 {
   return __builtin_popcount(__A);
 }
 
+/// \brief Counts the number of bits in the source operand having a value of 1.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c POPCNT instruction.
+///
+/// \param __A
+///    A signed 32-bit integer operand.
+/// \returns A 32-bit integer containing the number of bits with value 1 in the
+///    source operand.
 static __inline__ int __DEFAULT_FN_ATTRS
 _popcnt32(int __A)
 {
@@ -40,12 +60,32 @@ _popcnt32(int __A)
 }
 
 #ifdef __x86_64__
+/// \brief Counts the number of bits in the source operand having a value of 1.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c POPCNT instruction.
+///
+/// \param __A
+///    An unsigned 64-bit integer operand.
+/// \returns A 64-bit integer containing the number of bits with value 1 in the
+///    source operand.
 static __inline__ long long __DEFAULT_FN_ATTRS
 _mm_popcnt_u64(unsigned long long __A)
 {
   return __builtin_popcountll(__A);
 }
 
+/// \brief Counts the number of bits in the source operand having a value of 1.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c POPCNT instruction.
+///
+/// \param __A
+///    A signed 64-bit integer operand.
+/// \returns A 64-bit integer containing the number of bits with value 1 in the
+///    source operand.
 static __inline__ long long __DEFAULT_FN_ATTRS
 _popcnt64(long long __A)
 {
diff --git a/contrib/llvm/tools/clang/lib/Headers/smmintrin.h b/contrib/llvm/tools/clang/lib/Headers/smmintrin.h
index 69ad07f42ad6..e48ab034f46f 100644
--- a/contrib/llvm/tools/clang/lib/Headers/smmintrin.h
+++ b/contrib/llvm/tools/clang/lib/Headers/smmintrin.h
@@ -121,7 +121,7 @@ _mm_blendv_epi8 (__m128i __V1, __m128i __V2, __m128i __M)
 static __inline__  __m128i __DEFAULT_FN_ATTRS
 _mm_mullo_epi32 (__m128i __V1, __m128i __V2)
 {
-  return (__m128i) ((__v4si)__V1 * (__v4si)__V2);
+  return (__m128i) ((__v4su)__V1 * (__v4su)__V2);
 }
 
 static __inline__  __m128i __DEFAULT_FN_ATTRS
@@ -220,16 +220,16 @@ _mm_max_epu32 (__m128i __V1, __m128i __V2)
 #define _mm_insert_epi8(X, I, N) (__extension__                           \
                                   ({ __v16qi __a = (__v16qi)(__m128i)(X); \
                                      __a[(N) & 15] = (I);                 \
-                                     __a;}))
+                                     (__m128i)__a;}))
 #define _mm_insert_epi32(X, I, N) (__extension__                         \
                                    ({ __v4si __a = (__v4si)(__m128i)(X); \
                                       __a[(N) & 3] = (I);                \
-                                      __a;}))
+                                      (__m128i)__a;}))
 #ifdef __x86_64__
 #define _mm_insert_epi64(X, I, N) (__extension__                         \
                                    ({ __v2di __a = (__v2di)(__m128i)(X); \
                                       __a[(N) & 1] = (I);                \
-                                      __a;}))
+                                      (__m128i)__a;}))
 #endif /* __x86_64__ */
 
 /* Extract int from packed integer array at index.  This returns the element
@@ -299,7 +299,6 @@ _mm_cvtepi8_epi64(__m128i __V)
 {
   /* This function always performs a signed extension, but __v16qi is a char
      which may be signed or unsigned, so use __v16qs. */
-  typedef signed char __v16qs __attribute__((__vector_size__(16)));
   return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__V, (__v16qs)__V, 0, 1), __v2di);
 }
 
@@ -325,37 +324,37 @@ _mm_cvtepi32_epi64(__m128i __V)
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_cvtepu8_epi16(__m128i __V)
 {
-  return (__m128i) __builtin_ia32_pmovzxbw128((__v16qi) __V);
+  return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v16qu)__V, (__v16qu)__V, 0, 1, 2, 3, 4, 5, 6, 7), __v8hi);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_cvtepu8_epi32(__m128i __V)
 {
-  return (__m128i) __builtin_ia32_pmovzxbd128((__v16qi)__V);
+  return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v16qu)__V, (__v16qu)__V, 0, 1, 2, 3), __v4si);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_cvtepu8_epi64(__m128i __V)
 {
-  return (__m128i) __builtin_ia32_pmovzxbq128((__v16qi)__V);
+  return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v16qu)__V, (__v16qu)__V, 0, 1), __v2di);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_cvtepu16_epi32(__m128i __V)
 {
-  return (__m128i) __builtin_ia32_pmovzxwd128((__v8hi)__V);
+  return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v8hu)__V, (__v8hu)__V, 0, 1, 2, 3), __v4si);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_cvtepu16_epi64(__m128i __V)
 {
-  return (__m128i) __builtin_ia32_pmovzxwq128((__v8hi)__V);
+  return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v8hu)__V, (__v8hu)__V, 0, 1), __v2di);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_cvtepu32_epi64(__m128i __V)
 {
-  return (__m128i) __builtin_ia32_pmovzxdq128((__v4si)__V);
+  return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v4su)__V, (__v4su)__V, 0, 1), __v2di);
 }
 
 /* SSE4 Pack with Unsigned Saturation.  */
diff --git a/contrib/llvm/tools/clang/lib/Headers/tbmintrin.h b/contrib/llvm/tools/clang/lib/Headers/tbmintrin.h
index 785961c6ab86..1d0d746a824a 100644
--- a/contrib/llvm/tools/clang/lib/Headers/tbmintrin.h
+++ b/contrib/llvm/tools/clang/lib/Headers/tbmintrin.h
@@ -36,57 +36,57 @@
                                            (unsigned int)(b)))
 
 static __inline__ unsigned int __DEFAULT_FN_ATTRS
-__blcfill_u32(unsigned int a)
+__blcfill_u32(unsigned int __a)
 {
-  return a & (a + 1);
+  return __a & (__a + 1);
 }
 
 static __inline__ unsigned int __DEFAULT_FN_ATTRS
-__blci_u32(unsigned int a)
+__blci_u32(unsigned int __a)
 {
-  return a | ~(a + 1);
+  return __a | ~(__a + 1);
 }
 
 static __inline__ unsigned int __DEFAULT_FN_ATTRS
-__blcic_u32(unsigned int a)
+__blcic_u32(unsigned int __a)
 {
-  return ~a & (a + 1);
+  return ~__a & (__a + 1);
 }
 
 static __inline__ unsigned int __DEFAULT_FN_ATTRS
-__blcmsk_u32(unsigned int a)
+__blcmsk_u32(unsigned int __a)
 {
-  return a ^ (a + 1);
+  return __a ^ (__a + 1);
 }
 
 static __inline__ unsigned int __DEFAULT_FN_ATTRS
-__blcs_u32(unsigned int a)
+__blcs_u32(unsigned int __a)
 {
-  return a | (a + 1);
+  return __a | (__a + 1);
 }
 
 static __inline__ unsigned int __DEFAULT_FN_ATTRS
-__blsfill_u32(unsigned int a)
+__blsfill_u32(unsigned int __a)
 {
-  return a | (a - 1);
+  return __a | (__a - 1);
 }
 
 static __inline__ unsigned int __DEFAULT_FN_ATTRS
-__blsic_u32(unsigned int a)
+__blsic_u32(unsigned int __a)
 {
-  return ~a | (a - 1);
+  return ~__a | (__a - 1);
 }
 
 static __inline__ unsigned int __DEFAULT_FN_ATTRS
-__t1mskc_u32(unsigned int a)
+__t1mskc_u32(unsigned int __a)
 {
-  return ~a | (a + 1);
+  return ~__a | (__a + 1);
 }
 
 static __inline__ unsigned int __DEFAULT_FN_ATTRS
-__tzmsk_u32(unsigned int a)
+__tzmsk_u32(unsigned int __a)
 {
-  return ~a & (a - 1);
+  return ~__a & (__a - 1);
 }
 
 #ifdef __x86_64__
@@ -95,57 +95,57 @@ __tzmsk_u32(unsigned int a)
                                                  (unsigned long long)(b)))
 
 static __inline__ unsigned long long __DEFAULT_FN_ATTRS
-__blcfill_u64(unsigned long long a)
+__blcfill_u64(unsigned long long __a)
 {
-  return a & (a + 1);
+  return __a & (__a + 1);
 }
 
 static __inline__ unsigned long long __DEFAULT_FN_ATTRS
-__blci_u64(unsigned long long a)
+__blci_u64(unsigned long long __a)
 {
-  return a | ~(a + 1);
+  return __a | ~(__a + 1);
 }
 
 static __inline__ unsigned long long __DEFAULT_FN_ATTRS
-__blcic_u64(unsigned long long a)
+__blcic_u64(unsigned long long __a)
 {
-  return ~a & (a + 1);
+  return ~__a & (__a + 1);
 }
 
 static __inline__ unsigned long long __DEFAULT_FN_ATTRS
-__blcmsk_u64(unsigned long long a)
+__blcmsk_u64(unsigned long long __a)
 {
-  return a ^ (a + 1);
+  return __a ^ (__a + 1);
 }
 
 static __inline__ unsigned long long __DEFAULT_FN_ATTRS
-__blcs_u64(unsigned long long a)
+__blcs_u64(unsigned long long __a)
 {
-  return a | (a + 1);
+  return __a | (__a + 1);
 }
 
 static __inline__ unsigned long long __DEFAULT_FN_ATTRS
-__blsfill_u64(unsigned long long a)
+__blsfill_u64(unsigned long long __a)
 {
-  return a | (a - 1);
+  return __a | (__a - 1);
 }
 
 static __inline__ unsigned long long __DEFAULT_FN_ATTRS
-__blsic_u64(unsigned long long a)
+__blsic_u64(unsigned long long __a)
 {
-  return ~a | (a - 1);
+  return ~__a | (__a - 1);
 }
 
 static __inline__ unsigned long long __DEFAULT_FN_ATTRS
-__t1mskc_u64(unsigned long long a)
+__t1mskc_u64(unsigned long long __a)
 {
-  return ~a | (a + 1);
+  return ~__a | (__a + 1);
 }
 
 static __inline__ unsigned long long __DEFAULT_FN_ATTRS
-__tzmsk_u64(unsigned long long a)
+__tzmsk_u64(unsigned long long __a)
 {
-  return ~a & (a - 1);
+  return ~__a & (__a - 1);
 }
 #endif
 
diff --git a/contrib/llvm/tools/clang/lib/Headers/tmmintrin.h b/contrib/llvm/tools/clang/lib/Headers/tmmintrin.h
index 0002890c1393..a72796ba4a68 100644
--- a/contrib/llvm/tools/clang/lib/Headers/tmmintrin.h
+++ b/contrib/llvm/tools/clang/lib/Headers/tmmintrin.h
@@ -29,187 +29,739 @@
 /* Define the default attributes for the functions in this file. */
 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("ssse3")))
 
+/// \brief Computes the absolute value of each of the packed 8-bit signed
+///    integers in the source operand and stores the 8-bit unsigned integer
+///    results in the destination.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c PABSB instruction.
+///
+/// \param __a
+///    A 64-bit vector of [8 x i8].
+/// \returns A 64-bit integer vector containing the absolute values of the
+///    elements in the operand.
 static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_abs_pi8(__m64 __a)
 {
     return (__m64)__builtin_ia32_pabsb((__v8qi)__a);
 }
 
+/// \brief Computes the absolute value of each of the packed 8-bit signed
+///    integers in the source operand and stores the 8-bit unsigned integer
+///    results in the destination.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VPABSB instruction.
+///
+/// \param __a
+///    A 128-bit vector of [16 x i8].
+/// \returns A 128-bit integer vector containing the absolute values of the
+///    elements in the operand.
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_abs_epi8(__m128i __a)
 {
     return (__m128i)__builtin_ia32_pabsb128((__v16qi)__a);
 }
 
+/// \brief Computes the absolute value of each of the packed 16-bit signed
+///    integers in the source operand and stores the 16-bit unsigned integer
+///    results in the destination.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c PABSW instruction.
+///
+/// \param __a
+///    A 64-bit vector of [4 x i16].
+/// \returns A 64-bit integer vector containing the absolute values of the
+///    elements in the operand.
 static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_abs_pi16(__m64 __a)
 {
     return (__m64)__builtin_ia32_pabsw((__v4hi)__a);
 }
 
+/// \brief Computes the absolute value of each of the packed 16-bit signed
+///    integers in the source operand and stores the 16-bit unsigned integer
+///    results in the destination.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VPABSW instruction.
+///
+/// \param __a
+///    A 128-bit vector of [8 x i16].
+/// \returns A 128-bit integer vector containing the absolute values of the
+///    elements in the operand.
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_abs_epi16(__m128i __a)
 {
     return (__m128i)__builtin_ia32_pabsw128((__v8hi)__a);
 }
 
+/// \brief Computes the absolute value of each of the packed 32-bit signed
+///    integers in the source operand and stores the 32-bit unsigned integer
+///    results in the destination.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c PABSD instruction.
+///
+/// \param __a
+///    A 64-bit vector of [2 x i32].
+/// \returns A 64-bit integer vector containing the absolute values of the
+///    elements in the operand.
 static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_abs_pi32(__m64 __a)
 {
     return (__m64)__builtin_ia32_pabsd((__v2si)__a);
 }
 
+/// \brief Computes the absolute value of each of the packed 32-bit signed
+///    integers in the source operand and stores the 32-bit unsigned integer
+///    results in the destination.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VPABSD instruction.
+///
+/// \param __a
+///    A 128-bit vector of [4 x i32].
+/// \returns A 128-bit integer vector containing the absolute values of the
+///    elements in the operand.
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_abs_epi32(__m128i __a)
 {
     return (__m128i)__builtin_ia32_pabsd128((__v4si)__a);
 }
 
+/// \brief Concatenates the two 128-bit integer vector operands, and
+///    right-shifts the result by the number of bytes specified in the immediate
+///    operand.
+///
+/// \headerfile <x86intrin.h>
+///
+/// \code
+/// __m128i _mm_alignr_epi8(__m128i a, __m128i b, const int n);
+/// \endcode
+///
+/// This intrinsic corresponds to the \c PALIGNR instruction.
+///
+/// \param a
+///    A 128-bit vector of [16 x i8] containing one of the source operands.
+/// \param b
+///    A 128-bit vector of [16 x i8] containing one of the source operands.
+/// \param n
+///    An immediate operand specifying how many bytes to right-shift the result.
+/// \returns A 128-bit integer vector containing the concatenated right-shifted
+///    value.
 #define _mm_alignr_epi8(a, b, n) __extension__ ({ \
   (__m128i)__builtin_ia32_palignr128((__v16qi)(__m128i)(a), \
                                      (__v16qi)(__m128i)(b), (n)); })
 
+/// \brief Concatenates the two 64-bit integer vector operands, and right-shifts
+///    the result by the number of bytes specified in the immediate operand.
+///
+/// \headerfile <x86intrin.h>
+///
+/// \code
+/// __m64 _mm_alignr_pi8(__m64 a, __m64 b, const int n);
+/// \endcode
+///
+/// This intrinsic corresponds to the \c PALIGNR instruction.
+///
+/// \param a
+///    A 64-bit vector of [8 x i8] containing one of the source operands.
+/// \param b
+///    A 64-bit vector of [8 x i8] containing one of the source operands.
+/// \param n
+///    An immediate operand specifying how many bytes to right-shift the result.
+/// \returns A 64-bit integer vector containing the concatenated right-shifted
+///    value.
 #define _mm_alignr_pi8(a, b, n) __extension__ ({ \
   (__m64)__builtin_ia32_palignr((__v8qi)(__m64)(a), (__v8qi)(__m64)(b), (n)); })
 
+/// \brief Horizontally adds the adjacent pairs of values contained in 2 packed
+///    128-bit vectors of [8 x i16].
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VPHADDW instruction.
+///
+/// \param __a
+///    A 128-bit vector of [8 x i16] containing one of the source operands. The
+///    horizontal sums of the values are stored in the lower bits of the
+///    destination.
+/// \param __b
+///    A 128-bit vector of [8 x i16] containing one of the source operands. The
+///    horizontal sums of the values are stored in the upper bits of the
+///    destination.
+/// \returns A 128-bit vector of [8 x i16] containing the horizontal sums of
+///    both operands.
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_hadd_epi16(__m128i __a, __m128i __b)
 {
     return (__m128i)__builtin_ia32_phaddw128((__v8hi)__a, (__v8hi)__b);
 }
 
+/// \brief Horizontally adds the adjacent pairs of values contained in 2 packed
+///    128-bit vectors of [4 x i32].
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VPHADDD instruction.
+///
+/// \param __a
+///    A 128-bit vector of [4 x i32] containing one of the source operands. The
+///    horizontal sums of the values are stored in the lower bits of the
+///    destination.
+/// \param __b
+///    A 128-bit vector of [4 x i32] containing one of the source operands. The
+///    horizontal sums of the values are stored in the upper bits of the
+///    destination.
+/// \returns A 128-bit vector of [4 x i32] containing the horizontal sums of
+///    both operands.
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_hadd_epi32(__m128i __a, __m128i __b)
 {
     return (__m128i)__builtin_ia32_phaddd128((__v4si)__a, (__v4si)__b);
 }
 
+/// \brief Horizontally adds the adjacent pairs of values contained in 2 packed
+///    64-bit vectors of [4 x i16].
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c PHADDW instruction.
+///
+/// \param __a
+///    A 64-bit vector of [4 x i16] containing one of the source operands. The
+///    horizontal sums of the values are stored in the lower bits of the
+///    destination.
+/// \param __b
+///    A 64-bit vector of [4 x i16] containing one of the source operands. The
+///    horizontal sums of the values are stored in the upper bits of the
+///    destination.
+/// \returns A 64-bit vector of [4 x i16] containing the horizontal sums of both
+///    operands.
 static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_hadd_pi16(__m64 __a, __m64 __b)
 {
     return (__m64)__builtin_ia32_phaddw((__v4hi)__a, (__v4hi)__b);
 }
 
+/// \brief Horizontally adds the adjacent pairs of values contained in 2 packed
+///    64-bit vectors of [2 x i32].
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c PHADDD instruction.
+///
+/// \param __a
+///    A 64-bit vector of [2 x i32] containing one of the source operands. The
+///    horizontal sums of the values are stored in the lower bits of the
+///    destination.
+/// \param __b
+///    A 64-bit vector of [2 x i32] containing one of the source operands. The
+///    horizontal sums of the values are stored in the upper bits of the
+///    destination.
+/// \returns A 64-bit vector of [2 x i32] containing the horizontal sums of both
+///    operands.
 static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_hadd_pi32(__m64 __a, __m64 __b)
 {
     return (__m64)__builtin_ia32_phaddd((__v2si)__a, (__v2si)__b);
 }
 
+/// \brief Horizontally adds the adjacent pairs of values contained in 2 packed
+///    128-bit vectors of [8 x i16]. Positive sums greater than 7FFFh are
+///    saturated to 7FFFh. Negative sums less than 8000h are saturated to 8000h.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VPHADDSW instruction.
+///
+/// \param __a
+///    A 128-bit vector of [8 x i16] containing one of the source operands. The
+///    horizontal sums of the values are stored in the lower bits of the
+///    destination.
+/// \param __b
+///    A 128-bit vector of [8 x i16] containing one of the source operands. The
+///    horizontal sums of the values are stored in the upper bits of the
+///    destination.
+/// \returns A 128-bit vector of [8 x i16] containing the horizontal saturated
+///    sums of both operands.
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_hadds_epi16(__m128i __a, __m128i __b)
 {
     return (__m128i)__builtin_ia32_phaddsw128((__v8hi)__a, (__v8hi)__b);
 }
 
+/// \brief Horizontally adds the adjacent pairs of values contained in 2 packed
+///    64-bit vectors of [4 x i16]. Positive sums greater than 7FFFh are
+///    saturated to 7FFFh. Negative sums less than 8000h are saturated to 8000h.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c PHADDSW instruction.
+///
+/// \param __a
+///    A 64-bit vector of [4 x i16] containing one of the source operands. The
+///    horizontal sums of the values are stored in the lower bits of the
+///    destination.
+/// \param __b
+///    A 64-bit vector of [4 x i16] containing one of the source operands. The
+///    horizontal sums of the values are stored in the upper bits of the
+///    destination.
+/// \returns A 64-bit vector of [4 x i16] containing the horizontal saturated
+///    sums of both operands.
 static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_hadds_pi16(__m64 __a, __m64 __b)
 {
     return (__m64)__builtin_ia32_phaddsw((__v4hi)__a, (__v4hi)__b);
 }
 
+/// \brief Horizontally subtracts the adjacent pairs of values contained in 2
+///    packed 128-bit vectors of [8 x i16].
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VPHSUBW instruction.
+///
+/// \param __a
+///    A 128-bit vector of [8 x i16] containing one of the source operands. The
+///    horizontal differences between the values are stored in the lower bits of
+///    the destination.
+/// \param __b
+///    A 128-bit vector of [8 x i16] containing one of the source operands. The
+///    horizontal differences between the values are stored in the upper bits of
+///    the destination.
+/// \returns A 128-bit vector of [8 x i16] containing the horizontal differences
+///    of both operands.
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_hsub_epi16(__m128i __a, __m128i __b)
 {
     return (__m128i)__builtin_ia32_phsubw128((__v8hi)__a, (__v8hi)__b);
 }
 
+/// \brief Horizontally subtracts the adjacent pairs of values contained in 2
+///    packed 128-bit vectors of [4 x i32].
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VPHSUBD instruction.
+///
+/// \param __a
+///    A 128-bit vector of [4 x i32] containing one of the source operands. The
+///    horizontal differences between the values are stored in the lower bits of
+///    the destination.
+/// \param __b
+///    A 128-bit vector of [4 x i32] containing one of the source operands. The
+///    horizontal differences between the values are stored in the upper bits of
+///    the destination.
+/// \returns A 128-bit vector of [4 x i32] containing the horizontal differences
+///    of both operands.
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_hsub_epi32(__m128i __a, __m128i __b)
 {
     return (__m128i)__builtin_ia32_phsubd128((__v4si)__a, (__v4si)__b);
 }
 
+/// \brief Horizontally subtracts the adjacent pairs of values contained in 2
+///    packed 64-bit vectors of [4 x i16].
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c PHSUBW instruction.
+///
+/// \param __a
+///    A 64-bit vector of [4 x i16] containing one of the source operands. The
+///    horizontal differences between the values are stored in the lower bits of
+///    the destination.
+/// \param __b
+///    A 64-bit vector of [4 x i16] containing one of the source operands. The
+///    horizontal differences between the values are stored in the upper bits of
+///    the destination.
+/// \returns A 64-bit vector of [4 x i16] containing the horizontal differences
+///    of both operands.
 static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_hsub_pi16(__m64 __a, __m64 __b)
 {
     return (__m64)__builtin_ia32_phsubw((__v4hi)__a, (__v4hi)__b);
 }
 
+/// \brief Horizontally subtracts the adjacent pairs of values contained in 2
+///    packed 64-bit vectors of [2 x i32].
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c PHSUBD instruction.
+///
+/// \param __a
+///    A 64-bit vector of [2 x i32] containing one of the source operands. The
+///    horizontal differences between the values are stored in the lower bits of
+///    the destination.
+/// \param __b
+///    A 64-bit vector of [2 x i32] containing one of the source operands. The
+///    horizontal differences between the values are stored in the upper bits of
+///    the destination.
+/// \returns A 64-bit vector of [2 x i32] containing the horizontal differences
+///    of both operands.
 static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_hsub_pi32(__m64 __a, __m64 __b)
 {
     return (__m64)__builtin_ia32_phsubd((__v2si)__a, (__v2si)__b);
 }
 
+/// \brief Horizontally subtracts the adjacent pairs of values contained in 2
+///    packed 128-bit vectors of [8 x i16]. Positive differences greater than
+///    7FFFh are saturated to 7FFFh. Negative differences less than 8000h are
+///    saturated to 8000h.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VPHSUBSW instruction.
+///
+/// \param __a
+///    A 128-bit vector of [8 x i16] containing one of the source operands. The
+///    horizontal differences between the values are stored in the lower bits of
+///    the destination.
+/// \param __b
+///    A 128-bit vector of [8 x i16] containing one of the source operands. The
+///    horizontal differences between the values are stored in the upper bits of
+///    the destination.
+/// \returns A 128-bit vector of [8 x i16] containing the horizontal saturated
+///    differences of both operands.
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_hsubs_epi16(__m128i __a, __m128i __b)
 {
     return (__m128i)__builtin_ia32_phsubsw128((__v8hi)__a, (__v8hi)__b);
 }
 
+/// \brief Horizontally subtracts the adjacent pairs of values contained in 2
+///    packed 64-bit vectors of [4 x i16]. Positive differences greater than
+///    7FFFh are saturated to 7FFFh. Negative differences less than 8000h are
+///    saturated to 8000h.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c PHSUBSW instruction.
+///
+/// \param __a
+///    A 64-bit vector of [4 x i16] containing one of the source operands. The
+///    horizontal differences between the values are stored in the lower bits of
+///    the destination.
+/// \param __b
+///    A 64-bit vector of [4 x i16] containing one of the source operands. The
+///    horizontal differences between the values are stored in the upper bits of
+///    the destination.
+/// \returns A 64-bit vector of [4 x i16] containing the horizontal saturated
+///    differences of both operands.
 static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_hsubs_pi16(__m64 __a, __m64 __b)
 {
     return (__m64)__builtin_ia32_phsubsw((__v4hi)__a, (__v4hi)__b);
 }
 
+/// \brief Multiplies corresponding pairs of packed 8-bit unsigned integer
+///    values contained in the first source operand and packed 8-bit signed
+///    integer values contained in the second source operand, adds pairs of
+///    contiguous products with signed saturation, and writes the 16-bit sums to
+///    the corresponding bits in the destination. For example, bits [7:0] of
+///    both operands are multiplied, bits [15:8] of both operands are
+///    multiplied, and the sum of both results is written to bits [15:0] of the
+///    destination.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VPMADDUBSW instruction.
+///
+/// \param __a
+///    A 128-bit integer vector containing the first source operand.
+/// \param __b
+///    A 128-bit integer vector containing the second source operand.
+/// \returns A 128-bit integer vector containing the sums of products of both
+///    operands:
+///    R0 := (__a0 * __b0) + (__a1 * __b1)
+///    R1 := (__a2 * __b2) + (__a3 * __b3)
+///    R2 := (__a4 * __b4) + (__a5 * __b5)
+///    R3 := (__a6 * __b6) + (__a7 * __b7)
+///    R4 := (__a8 * __b8) + (__a9 * __b9)
+///    R5 := (__a10 * __b10) + (__a11 * __b11)
+///    R6 := (__a12 * __b12) + (__a13 * __b13)
+///    R7 := (__a14 * __b14) + (__a15 * __b15)
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_maddubs_epi16(__m128i __a, __m128i __b)
 {
     return (__m128i)__builtin_ia32_pmaddubsw128((__v16qi)__a, (__v16qi)__b);
 }
 
+/// \brief Multiplies corresponding pairs of packed 8-bit unsigned integer
+///    values contained in the first source operand and packed 8-bit signed
+///    integer values contained in the second source operand, adds pairs of
+///    contiguous products with signed saturation, and writes the 16-bit sums to
+///    the corresponding bits in the destination. For example, bits [7:0] of
+///    both operands are multiplied, bits [15:8] of both operands are
+///    multiplied, and the sum of both results is written to bits [15:0] of the
+///    destination.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c PMADDUBSW instruction.
+///
+/// \param __a
+///    A 64-bit integer vector containing the first source operand.
+/// \param __b
+///    A 64-bit integer vector containing the second source operand.
+/// \returns A 64-bit integer vector containing the sums of products of both
+///    operands:
+///    R0 := (__a0 * __b0) + (__a1 * __b1)
+///    R1 := (__a2 * __b2) + (__a3 * __b3)
+///    R2 := (__a4 * __b4) + (__a5 * __b5)
+///    R3 := (__a6 * __b6) + (__a7 * __b7)
 static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_maddubs_pi16(__m64 __a, __m64 __b)
 {
     return (__m64)__builtin_ia32_pmaddubsw((__v8qi)__a, (__v8qi)__b);
 }
 
+/// \brief Multiplies packed 16-bit signed integer values, truncates the 32-bit
+///    products to the 18 most significant bits by right-shifting, rounds the
+///    truncated value by adding 1, and writes bits [16:1] to the destination.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VPMULHRSW instruction.
+///
+/// \param __a
+///    A 128-bit vector of [8 x i16] containing one of the source operands.
+/// \param __b
+///    A 128-bit vector of [8 x i16] containing one of the source operands.
+/// \returns A 128-bit vector of [8 x i16] containing the rounded and scaled
+///    products of both operands.
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_mulhrs_epi16(__m128i __a, __m128i __b)
 {
     return (__m128i)__builtin_ia32_pmulhrsw128((__v8hi)__a, (__v8hi)__b);
 }
 
+/// \brief Multiplies packed 16-bit signed integer values, truncates the 32-bit
+///    products to the 18 most significant bits by right-shifting, rounds the
+///    truncated value by adding 1, and writes bits [16:1] to the destination.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c PMULHRSW instruction.
+///
+/// \param __a
+///    A 64-bit vector of [4 x i16] containing one of the source operands.
+/// \param __b
+///    A 64-bit vector of [4 x i16] containing one of the source operands.
+/// \returns A 64-bit vector of [4 x i16] containing the rounded and scaled
+///    products of both operands.
 static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_mulhrs_pi16(__m64 __a, __m64 __b)
 {
     return (__m64)__builtin_ia32_pmulhrsw((__v4hi)__a, (__v4hi)__b);
 }
 
+/// \brief Copies the 8-bit integers from a 128-bit integer vector to the
+///    destination or clears 8-bit values in the destination, as specified by
+///    the second source operand.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VPSHUFB instruction.
+///
+/// \param __a
+///    A 128-bit integer vector containing the values to be copied.
+/// \param __b
+///    A 128-bit integer vector containing control bytes corresponding to
+///    positions in the destination:
+///    Bit 7:
+///    1: Clear the corresponding byte in the destination.
+///    0: Copy the selected source byte to the corresponding byte in the
+///    destination.
+///    Bits [6:4] Reserved.
+///    Bits [3:0] select the source byte to be copied.
+/// \returns A 128-bit integer vector containing the copied or cleared values.
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_shuffle_epi8(__m128i __a, __m128i __b)
 {
     return (__m128i)__builtin_ia32_pshufb128((__v16qi)__a, (__v16qi)__b);
 }
 
+/// \brief Copies the 8-bit integers from a 64-bit integer vector to the
+///    destination or clears 8-bit values in the destination, as specified by
+///    the second source operand.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c PSHUFB instruction.
+///
+/// \param __a
+///    A 64-bit integer vector containing the values to be copied.
+/// \param __b
+///    A 64-bit integer vector containing control bytes corresponding to
+///    positions in the destination:
+///    Bit 7:
+///    1: Clear the corresponding byte in the destination.
+///    0: Copy the selected source byte to the corresponding byte in the
+///    destination.
+///    Bits [3:0] select the source byte to be copied.
+/// \returns A 64-bit integer vector containing the copied or cleared values.
 static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_shuffle_pi8(__m64 __a, __m64 __b)
 {
     return (__m64)__builtin_ia32_pshufb((__v8qi)__a, (__v8qi)__b);
 }
 
+/// \brief For each 8-bit integer in the first source operand, perform one of
+///    the following actions as specified by the second source operand: If the
+///    byte in the second source is negative, calculate the two's complement of
+///    the corresponding byte in the first source, and write that value to the
+///    destination. If the byte in the second source is positive, copy the
+///    corresponding byte from the first source to the destination. If the byte
+///    in the second source is zero, clear the corresponding byte in the
+///    destination.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VPSIGNB instruction.
+///
+/// \param __a
+///    A 128-bit integer vector containing the values to be copied.
+/// \param __b
+///    A 128-bit integer vector containing control bytes corresponding to
+///    positions in the destination.
+/// \returns A 128-bit integer vector containing the resultant values.
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_sign_epi8(__m128i __a, __m128i __b)
 {
     return (__m128i)__builtin_ia32_psignb128((__v16qi)__a, (__v16qi)__b);
 }
 
+/// \brief For each 16-bit integer in the first source operand, perform one of
+///    the following actions as specified by the second source operand: If the
+///    word in the second source is negative, calculate the two's complement of
+///    the corresponding word in the first source, and write that value to the
+///    destination. If the word in the second source is positive, copy the
+///    corresponding word from the first source to the destination. If the word
+///    in the second source is zero, clear the corresponding word in the
+///    destination.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VPSIGNW instruction.
+///
+/// \param __a
+///    A 128-bit integer vector containing the values to be copied.
+/// \param __b
+///    A 128-bit integer vector containing control words corresponding to
+///    positions in the destination.
+/// \returns A 128-bit integer vector containing the resultant values.
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_sign_epi16(__m128i __a, __m128i __b)
 {
     return (__m128i)__builtin_ia32_psignw128((__v8hi)__a, (__v8hi)__b);
 }
 
+/// \brief For each 32-bit integer in the first source operand, perform one of
+///    the following actions as specified by the second source operand: If the
+///    doubleword in the second source is negative, calculate the two's
+///    complement of the corresponding word in the first source, and write that
+///    value to the destination. If the doubleword in the second source is
+///    positive, copy the corresponding word from the first source to the
+///    destination. If the doubleword in the second source is zero, clear the
+///    corresponding word in the destination.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VPSIGND instruction.
+///
+/// \param __a
+///    A 128-bit integer vector containing the values to be copied.
+/// \param __b
+///    A 128-bit integer vector containing control doublewords corresponding to
+///    positions in the destination.
+/// \returns A 128-bit integer vector containing the resultant values.
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_sign_epi32(__m128i __a, __m128i __b)
 {
     return (__m128i)__builtin_ia32_psignd128((__v4si)__a, (__v4si)__b);
 }
 
+/// \brief For each 8-bit integer in the first source operand, perform one of
+///    the following actions as specified by the second source operand: If the
+///    byte in the second source is negative, calculate the two's complement of
+///    the corresponding byte in the first source, and write that value to the
+///    destination. If the byte in the second source is positive, copy the
+///    corresponding byte from the first source to the destination. If the byte
+///    in the second source is zero, clear the corresponding byte in the
+///    destination.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c PSIGNB instruction.
+///
+/// \param __a
+///    A 64-bit integer vector containing the values to be copied.
+/// \param __b
+///    A 64-bit integer vector containing control bytes corresponding to
+///    positions in the destination.
+/// \returns A 64-bit integer vector containing the resultant values.
 static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_sign_pi8(__m64 __a, __m64 __b)
 {
     return (__m64)__builtin_ia32_psignb((__v8qi)__a, (__v8qi)__b);
 }
 
+/// \brief For each 16-bit integer in the first source operand, perform one of
+///    the following actions as specified by the second source operand: If the
+///    word in the second source is negative, calculate the two's complement of
+///    the corresponding word in the first source, and write that value to the
+///    destination. If the word in the second source is positive, copy the
+///    corresponding word from the first source to the destination. If the word
+///    in the second source is zero, clear the corresponding word in the
+///    destination.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c PSIGNW instruction.
+///
+/// \param __a
+///    A 64-bit integer vector containing the values to be copied.
+/// \param __b
+///    A 64-bit integer vector containing control words corresponding to
+///    positions in the destination.
+/// \returns A 64-bit integer vector containing the resultant values.
 static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_sign_pi16(__m64 __a, __m64 __b)
 {
     return (__m64)__builtin_ia32_psignw((__v4hi)__a, (__v4hi)__b);
 }
 
+/// \brief For each 32-bit integer in the first source operand, perform one of
+///    the following actions as specified by the second source operand: If the
+///    doubleword in the second source is negative, calculate the two's
+///    complement of the corresponding doubleword in the first source, and
+///    write that value to the destination. If the doubleword in the second
+///    source is positive, copy the corresponding doubleword from the first
+///    source to the destination. If the doubleword in the second source is
+///    zero, clear the corresponding doubleword in the destination.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c PSIGND instruction.
+///
+/// \param __a
+///    A 64-bit integer vector containing the values to be copied.
+/// \param __b
+///    A 64-bit integer vector containing two control doublewords corresponding
+///    to positions in the destination.
+/// \returns A 64-bit integer vector containing the resultant values.
 static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_sign_pi32(__m64 __a, __m64 __b)
 {
diff --git a/contrib/llvm/tools/clang/lib/Headers/unwind.h b/contrib/llvm/tools/clang/lib/Headers/unwind.h
index 303d79288aee..4f74a3478740 100644
--- a/contrib/llvm/tools/clang/lib/Headers/unwind.h
+++ b/contrib/llvm/tools/clang/lib/Headers/unwind.h
@@ -79,6 +79,10 @@ struct _Unwind_Context;
 struct _Unwind_Exception;
 typedef enum {
   _URC_NO_REASON = 0,
+#if defined(__arm__) && !defined(__USING_SJLJ_EXCEPTIONS__) && \
+    !defined(__ARM_DWARF_EH__)
+  _URC_OK = 0, /* used by ARM EHABI */
+#endif
   _URC_FOREIGN_EXCEPTION_CAUGHT = 1,
 
   _URC_FATAL_PHASE2_ERROR = 2,
@@ -88,7 +92,11 @@ typedef enum {
   _URC_END_OF_STACK = 5,
   _URC_HANDLER_FOUND = 6,
   _URC_INSTALL_CONTEXT = 7,
-  _URC_CONTINUE_UNWIND = 8
+  _URC_CONTINUE_UNWIND = 8,
+#if defined(__arm__) && !defined(__USING_SJLJ_EXCEPTIONS__) && \
+    !defined(__ARM_DWARF_EH__)
+  _URC_FAILURE = 9 /* used by ARM EHABI */
+#endif
 } _Unwind_Reason_Code;
 
 typedef enum {
@@ -150,6 +158,15 @@ typedef enum {
   _UVRSR_FAILED = 2
 } _Unwind_VRS_Result;
 
+#if !defined(__USING_SJLJ_EXCEPTIONS__) && !defined(__ARM_DWARF_EH__)
+typedef uint32_t _Unwind_State;
+#define _US_VIRTUAL_UNWIND_FRAME  ((_Unwind_State)0)
+#define _US_UNWIND_FRAME_STARTING ((_Unwind_State)1)
+#define _US_UNWIND_FRAME_RESUME   ((_Unwind_State)2)
+#define _US_ACTION_MASK           ((_Unwind_State)3)
+#define _US_FORCE_UNWIND          ((_Unwind_State)8)
+#endif
+
 _Unwind_VRS_Result _Unwind_VRS_Get(struct _Unwind_Context *__context,
   _Unwind_VRS_RegClass __regclass,
   uint32_t __regno,
diff --git a/contrib/llvm/tools/clang/lib/Headers/x86intrin.h b/contrib/llvm/tools/clang/lib/Headers/x86intrin.h
index 4d8077e38291..81a404f55d01 100644
--- a/contrib/llvm/tools/clang/lib/Headers/x86intrin.h
+++ b/contrib/llvm/tools/clang/lib/Headers/x86intrin.h
@@ -28,29 +28,57 @@
 
 #include <immintrin.h>
 
+#if !defined(_MSC_VER) || __has_feature(modules) || defined(__3dNOW__)
 #include <mm3dnow.h>
+#endif
 
+#if !defined(_MSC_VER) || __has_feature(modules) || defined(__BMI__)
 #include <bmiintrin.h>
+#endif
 
+#if !defined(_MSC_VER) || __has_feature(modules) || defined(__BMI2__)
 #include <bmi2intrin.h>
+#endif
 
+#if !defined(_MSC_VER) || __has_feature(modules) || defined(__LZCNT__)
 #include <lzcntintrin.h>
+#endif
 
+#if !defined(_MSC_VER) || __has_feature(modules) || defined(__POPCNT__)
 #include <popcntintrin.h>
+#endif
 
+#if !defined(_MSC_VER) || __has_feature(modules) || defined(__RDSEED__)
 #include <rdseedintrin.h>
+#endif
 
+#if !defined(_MSC_VER) || __has_feature(modules) || defined(__PRFCHW__)
 #include <prfchwintrin.h>
+#endif
 
+#if !defined(_MSC_VER) || __has_feature(modules) || defined(__SSE4A__)
 #include <ammintrin.h>
+#endif
 
+#if !defined(_MSC_VER) || __has_feature(modules) || defined(__FMA4__)
 #include <fma4intrin.h>
+#endif
 
+#if !defined(_MSC_VER) || __has_feature(modules) || defined(__XOP__)
 #include <xopintrin.h>
+#endif
 
+#if !defined(_MSC_VER) || __has_feature(modules) || defined(__TBM__)
 #include <tbmintrin.h>
+#endif
 
+#if !defined(_MSC_VER) || __has_feature(modules) || defined(__F16C__)
 #include <f16cintrin.h>
+#endif
+
+#if !defined(_MSC_VER) || __has_feature(modules) || defined(__MWAITX__)
+#include <mwaitxintrin.h>
+#endif
 
 /* FIXME: LWP */
 
diff --git a/contrib/llvm/tools/clang/lib/Headers/xmmintrin.h b/contrib/llvm/tools/clang/lib/Headers/xmmintrin.h
index ae0b2cd1b26e..99cddb0fac82 100644
--- a/contrib/llvm/tools/clang/lib/Headers/xmmintrin.h
+++ b/contrib/llvm/tools/clang/lib/Headers/xmmintrin.h
@@ -30,6 +30,9 @@ typedef int __v4si __attribute__((__vector_size__(16)));
 typedef float __v4sf __attribute__((__vector_size__(16)));
 typedef float __m128 __attribute__((__vector_size__(16)));
 
+/* Unsigned types */
+typedef unsigned int __v4su __attribute__((__vector_size__(16)));
+
 /* This header should only be included in a hosted environment as it depends on
  * a standard library to provide allocation routines. */
 #if __STDC_HOSTED__
@@ -39,6 +42,21 @@ typedef float __m128 __attribute__((__vector_size__(16)));
 /* Define the default attributes for the functions in this file. */
 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sse")))
 
+/// \brief Adds the 32-bit float values in the low-order bits of the operands.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VADDSS / ADDSS instructions.
+///
+/// \param __a
+///    A 128-bit vector of [4 x float] containing one of the source operands.
+///    The lower 32 bits of this operand are used in the calculation.
+/// \param __b
+///    A 128-bit vector of [4 x float] containing one of the source operands.
+///    The lower 32 bits of this operand are used in the calculation.
+/// \returns A 128-bit vector of [4 x float] whose lower 32 bits contain the sum
+///    of the lower 32 bits of both operands. The upper 96 bits are copied from
+///    the upper 96 bits of the first source operand.
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_add_ss(__m128 __a, __m128 __b)
 {
@@ -46,12 +64,41 @@ _mm_add_ss(__m128 __a, __m128 __b)
   return __a;
 }
 
+/// \brief Adds two 128-bit vectors of [4 x float], and returns the results of
+///    the addition.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VADDPS / ADDPS instructions.
+///
+/// \param __a
+///    A 128-bit vector of [4 x float] containing one of the source operands.
+/// \param __b
+///    A 128-bit vector of [4 x float] containing one of the source operands.
+/// \returns A 128-bit vector of [4 x float] containing the sums of both
+///    operands.
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_add_ps(__m128 __a, __m128 __b)
 {
-  return __a + __b;
-}
-
+  return (__m128)((__v4sf)__a + (__v4sf)__b);
+}
+
+/// \brief Subtracts the 32-bit float value in the low-order bits of the second
+///    operand from the corresponding value in the first operand.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VSUBSS / SUBSS instructions.
+///
+/// \param __a
+///    A 128-bit vector of [4 x float] containing the minuend. The lower 32 bits
+///    of this operand are used in the calculation.
+/// \param __b
+///    A 128-bit vector of [4 x float] containing the subtrahend. The lower 32
+///    bits of this operand are used in the calculation.
+/// \returns A 128-bit vector of [4 x float] whose lower 32 bits contain the
+///    difference of the lower 32 bits of both operands. The upper 96 bits are
+///    copied from the upper 96 bits of the first source operand.
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_sub_ss(__m128 __a, __m128 __b)
 {
@@ -59,12 +106,42 @@ _mm_sub_ss(__m128 __a, __m128 __b)
   return __a;
 }
 
+/// \brief Subtracts each of the values of the second operand from the first
+///    operand, both of which are 128-bit vectors of [4 x float] and returns
+///    the results of the subtraction.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VSUBPS / SUBPS instructions.
+///
+/// \param __a
+///    A 128-bit vector of [4 x float] containing the minuend.
+/// \param __b
+///    A 128-bit vector of [4 x float] containing the subtrahend.
+/// \returns A 128-bit vector of [4 x float] containing the differences between
+///    both operands.
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_sub_ps(__m128 __a, __m128 __b)
 {
-  return __a - __b;
-}
-
+  return (__m128)((__v4sf)__a - (__v4sf)__b);
+}
+
+/// \brief Multiplies two 32-bit float values in the low-order bits of the
+///    operands.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VMULSS / MULSS instructions.
+///
+/// \param __a
+///    A 128-bit vector of [4 x float] containing one of the source operands.
+///    The lower 32 bits of this operand are used in the calculation.
+/// \param __b
+///    A 128-bit vector of [4 x float] containing one of the source operands.
+///    The lower 32 bits of this operand are used in the calculation.
+/// \returns A 128-bit vector of [4 x float] containing the product of the lower
+///    32 bits of both operands. The upper 96 bits are copied from the upper 96
+///    bits of the first source operand.
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_mul_ss(__m128 __a, __m128 __b)
 {
@@ -72,12 +149,41 @@ _mm_mul_ss(__m128 __a, __m128 __b)
   return __a;
 }
 
+/// \brief Multiplies two 128-bit vectors of [4 x float] and returns the
+///    results of the multiplication.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VMULPS / MULPS instructions.
+///
+/// \param __a
+///    A 128-bit vector of [4 x float] containing one of the source operands.
+/// \param __b
+///    A 128-bit vector of [4 x float] containing one of the source operands.
+/// \returns A 128-bit vector of [4 x float] containing the products of both
+///    operands.
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_mul_ps(__m128 __a, __m128 __b)
 {
-  return __a * __b;
-}
-
+  return (__m128)((__v4sf)__a * (__v4sf)__b);
+}
+
+/// \brief Divides the value in the low-order 32 bits of the first operand by
+///    the corresponding value in the second operand.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VDIVSS / DIVSS instructions.
+///
+/// \param __a
+///    A 128-bit vector of [4 x float] containing the dividend. The lower 32
+///    bits of this operand are used in the calculation.
+/// \param __b
+///    A 128-bit vector of [4 x float] containing the divisor. The lower 32 bits
+///    of this operand are used in the calculation.
+/// \returns A 128-bit vector of [4 x float] containing the quotients of the
+///    lower 32 bits of both operands. The upper 96 bits are copied from the
+///    upper 96 bits of the first source operand.
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_div_ss(__m128 __a, __m128 __b)
 {
@@ -85,329 +191,1091 @@ _mm_div_ss(__m128 __a, __m128 __b)
   return __a;
 }
 
+/// \brief Divides two 128-bit vectors of [4 x float].
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VDIVPS / DIVPS instructions.
+///
+/// \param __a
+///    A 128-bit vector of [4 x float] containing the dividend.
+/// \param __b
+///    A 128-bit vector of [4 x float] containing the divisor.
+/// \returns A 128-bit vector of [4 x float] containing the quotients of both
+///    operands.
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_div_ps(__m128 __a, __m128 __b)
 {
-  return __a / __b;
+  return (__m128)((__v4sf)__a / (__v4sf)__b);
 }
 
+/// \brief Calculates the square root of the value stored in the low-order bits
+///    of a 128-bit vector of [4 x float].
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VSQRTSS / SQRTSS instructions.
+///
+/// \param __a
+///    A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
+///    used in the calculation.
+/// \returns A 128-bit vector of [4 x float] containing the square root of the
+///    value in the low-order bits of the operand.
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_sqrt_ss(__m128 __a)
 {
-  __m128 __c = __builtin_ia32_sqrtss(__a);
+  __m128 __c = __builtin_ia32_sqrtss((__v4sf)__a);
   return (__m128) { __c[0], __a[1], __a[2], __a[3] };
 }
 
+/// \brief Calculates the square roots of the values stored in a 128-bit vector
+///    of [4 x float].
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VSQRTPS / SQRTPS instructions.
+///
+/// \param __a
+///    A 128-bit vector of [4 x float].
+/// \returns A 128-bit vector of [4 x float] containing the square roots of the
+///    values in the operand.
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_sqrt_ps(__m128 __a)
 {
-  return __builtin_ia32_sqrtps(__a);
+  return __builtin_ia32_sqrtps((__v4sf)__a);
 }
 
+/// \brief Calculates the approximate reciprocal of the value stored in the
+///    low-order bits of a 128-bit vector of [4 x float].
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VRCPSS / RCPSS instructions.
+///
+/// \param __a
+///    A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
+///    used in the calculation.
+/// \returns A 128-bit vector of [4 x float] containing the approximate
+///    reciprocal of the value in the low-order bits of the operand.
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_rcp_ss(__m128 __a)
 {
-  __m128 __c = __builtin_ia32_rcpss(__a);
+  __m128 __c = __builtin_ia32_rcpss((__v4sf)__a);
   return (__m128) { __c[0], __a[1], __a[2], __a[3] };
 }
 
+/// \brief Calculates the approximate reciprocals of the values stored in a
+///    128-bit vector of [4 x float].
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VRCPPS / RCPPS instructions.
+///
+/// \param __a
+///    A 128-bit vector of [4 x float].
+/// \returns A 128-bit vector of [4 x float] containing the approximate
+///    reciprocals of the values in the operand.
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_rcp_ps(__m128 __a)
 {
-  return __builtin_ia32_rcpps(__a);
+  return __builtin_ia32_rcpps((__v4sf)__a);
 }
 
+/// \brief Calculates the approximate reciprocal of the square root of the value
+///    stored in the low-order bits of a 128-bit vector of [4 x float].
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VRSQRTSS / RSQRTSS instructions.
+///
+/// \param __a
+///    A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
+///    used in the calculation.
+/// \returns A 128-bit vector of [4 x float] containing the approximate
+///    reciprocal of the square root of the value in the low-order bits of the
+///    operand.
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_rsqrt_ss(__m128 __a)
 {
-  __m128 __c = __builtin_ia32_rsqrtss(__a);
+  __m128 __c = __builtin_ia32_rsqrtss((__v4sf)__a);
   return (__m128) { __c[0], __a[1], __a[2], __a[3] };
 }
 
+/// \brief Calculates the approximate reciprocals of the square roots of the
+///    values stored in a 128-bit vector of [4 x float].
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VRSQRTPS / RSQRTPS instructions.
+///
+/// \param __a
+///    A 128-bit vector of [4 x float].
+/// \returns A 128-bit vector of [4 x float] containing the approximate
+///    reciprocals of the square roots of the values in the operand.
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_rsqrt_ps(__m128 __a)
 {
-  return __builtin_ia32_rsqrtps(__a);
-}
-
+  return __builtin_ia32_rsqrtps((__v4sf)__a);
+}
+
+/// \brief Compares two 32-bit float values in the low-order bits of both
+///    operands and returns the lesser value in the low-order bits of the
+///    vector of [4 x float].
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VMINSS / MINSS instructions.
+///
+/// \param __a
+///    A 128-bit vector of [4 x float] containing one of the operands. The lower
+///    32 bits of this operand are used in the comparison.
+/// \param __b
+///    A 128-bit vector of [4 x float] containing one of the operands. The lower
+///    32 bits of this operand are used in the comparison.
+/// \returns A 128-bit vector of [4 x float] whose lower 32 bits contain the
+///    minimum value between both operands. The upper 96 bits are copied from
+///    the upper 96 bits of the first source operand.
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_min_ss(__m128 __a, __m128 __b)
 {
-  return __builtin_ia32_minss(__a, __b);
+  return __builtin_ia32_minss((__v4sf)__a, (__v4sf)__b);
 }
 
+/// \brief Compares two 128-bit vectors of [4 x float] and returns the
+///    lesser of each pair of values.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VMINPS / MINPS instructions.
+///
+/// \param __a
+///    A 128-bit vector of [4 x float] containing one of the operands.
+/// \param __b
+///    A 128-bit vector of [4 x float] containing one of the operands.
+/// \returns A 128-bit vector of [4 x float] containing the minimum values
+///    between both operands.
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_min_ps(__m128 __a, __m128 __b)
 {
-  return __builtin_ia32_minps(__a, __b);
-}
-
+  return __builtin_ia32_minps((__v4sf)__a, (__v4sf)__b);
+}
+
+/// \brief Compares two 32-bit float values in the low-order bits of both
+///    operands and returns the greater value in the low-order bits of
+///    a vector [4 x float].
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VMAXSS / MAXSS instructions.
+///
+/// \param __a
+///    A 128-bit vector of [4 x float] containing one of the operands. The lower
+///    32 bits of this operand are used in the comparison.
+/// \param __b
+///    A 128-bit vector of [4 x float] containing one of the operands. The lower
+///    32 bits of this operand are used in the comparison.
+/// \returns A 128-bit vector of [4 x float] whose lower 32 bits contain the
+///    maximum value between both operands. The upper 96 bits are copied from
+///    the upper 96 bits of the first source operand.
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_max_ss(__m128 __a, __m128 __b)
 {
-  return __builtin_ia32_maxss(__a, __b);
+  return __builtin_ia32_maxss((__v4sf)__a, (__v4sf)__b);
 }
 
+/// \brief Compares two 128-bit vectors of [4 x float] and returns the greater
+///    of each pair of values.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VMAXPS / MAXPS instructions.
+///
+/// \param __a
+///    A 128-bit vector of [4 x float] containing one of the operands.
+/// \param __b
+///    A 128-bit vector of [4 x float] containing one of the operands.
+/// \returns A 128-bit vector of [4 x float] containing the maximum values
+///    between both operands.
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_max_ps(__m128 __a, __m128 __b)
 {
-  return __builtin_ia32_maxps(__a, __b);
+  return __builtin_ia32_maxps((__v4sf)__a, (__v4sf)__b);
 }
 
+/// \brief Performs a bitwise AND of two 128-bit vectors of [4 x float].
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VANDPS / ANDPS instructions.
+///
+/// \param __a
+///    A 128-bit vector containing one of the source operands.
+/// \param __b
+///    A 128-bit vector containing one of the source operands.
+/// \returns A 128-bit vector of [4 x float] containing the bitwise AND of the
+///    values between both operands.
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_and_ps(__m128 __a, __m128 __b)
 {
-  return (__m128)((__v4si)__a & (__v4si)__b);
-}
-
+  return (__m128)((__v4su)__a & (__v4su)__b);
+}
+
+/// \brief Performs a bitwise AND of two 128-bit vectors of [4 x float], using
+///    the one's complement of the values contained in the first source
+///    operand.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VANDNPS / ANDNPS instructions.
+///
+/// \param __a
+///    A 128-bit vector of [4 x float] containing the first source operand. The
+///    one's complement of this value is used in the bitwise AND.
+/// \param __b
+///    A 128-bit vector of [4 x float] containing the second source operand.
+/// \returns A 128-bit vector of [4 x float] containing the bitwise AND of the
+///    one's complement of the first operand and the values in the second
+///    operand.
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_andnot_ps(__m128 __a, __m128 __b)
 {
-  return (__m128)(~(__v4si)__a & (__v4si)__b);
+  return (__m128)(~(__v4su)__a & (__v4su)__b);
 }
 
+/// \brief Performs a bitwise OR of two 128-bit vectors of [4 x float].
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VORPS / ORPS instructions.
+///
+/// \param __a
+///    A 128-bit vector of [4 x float] containing one of the source operands.
+/// \param __b
+///    A 128-bit vector of [4 x float] containing one of the source operands.
+/// \returns A 128-bit vector of [4 x float] containing the bitwise OR of the
+///    values between both operands.
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_or_ps(__m128 __a, __m128 __b)
 {
-  return (__m128)((__v4si)__a | (__v4si)__b);
+  return (__m128)((__v4su)__a | (__v4su)__b);
 }
 
+/// \brief Performs a bitwise exclusive OR of two 128-bit vectors of
+///    [4 x float].
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VXORPS / XORPS instructions.
+///
+/// \param __a
+///    A 128-bit vector of [4 x float] containing one of the source operands.
+/// \param __b
+///    A 128-bit vector of [4 x float] containing one of the source operands.
+/// \returns A 128-bit vector of [4 x float] containing the bitwise exclusive OR
+///    of the values between both operands.
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_xor_ps(__m128 __a, __m128 __b)
 {
-  return (__m128)((__v4si)__a ^ (__v4si)__b);
-}
-
+  return (__m128)((__v4su)__a ^ (__v4su)__b);
+}
+
+/// \brief Compares two 32-bit float values in the low-order bits of both
+///    operands for equality and returns the result of the comparison in the
+///    low-order bits of a vector [4 x float].
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VCMPEQSS / CMPEQSS instructions.
+///
+/// \param __a
+///    A 128-bit vector of [4 x float] containing one of the operands. The lower
+///    32 bits of this operand are used in the comparison.
+/// \param __b
+///    A 128-bit vector of [4 x float] containing one of the operands. The lower
+///    32 bits of this operand are used in the comparison.
+/// \returns A 128-bit vector of [4 x float] containing the comparison results
+///    in the low-order bits.
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_cmpeq_ss(__m128 __a, __m128 __b)
 {
-  return (__m128)__builtin_ia32_cmpeqss(__a, __b);
+  return (__m128)__builtin_ia32_cmpeqss((__v4sf)__a, (__v4sf)__b);
 }
 
+/// \brief Compares each of the corresponding 32-bit float values of the
+///    128-bit vectors of [4 x float] for equality.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VCMPEQPS / CMPEQPS instructions.
+///
+/// \param __a
+///    A 128-bit vector of [4 x float].
+/// \param __b
+///    A 128-bit vector of [4 x float].
+/// \returns A 128-bit vector of [4 x float] containing the comparison results.
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_cmpeq_ps(__m128 __a, __m128 __b)
 {
-  return (__m128)__builtin_ia32_cmpeqps(__a, __b);
-}
-
+  return (__m128)__builtin_ia32_cmpeqps((__v4sf)__a, (__v4sf)__b);
+}
+
+/// \brief Compares two 32-bit float values in the low-order bits of both
+///    operands to determine if the value in the first operand is less than the
+///    corresponding value in the second operand and returns the result of the
+///    comparison in the low-order bits of a vector of [4 x float].
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VCMPLTSS / CMPLTSS instructions.
+///
+/// \param __a
+///    A 128-bit vector of [4 x float] containing one of the operands. The lower
+///    32 bits of this operand are used in the comparison.
+/// \param __b
+///    A 128-bit vector of [4 x float] containing one of the operands. The lower
+///    32 bits of this operand are used in the comparison.
+/// \returns A 128-bit vector of [4 x float] containing the comparison results
+///    in the low-order bits.
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_cmplt_ss(__m128 __a, __m128 __b)
 {
-  return (__m128)__builtin_ia32_cmpltss(__a, __b);
+  return (__m128)__builtin_ia32_cmpltss((__v4sf)__a, (__v4sf)__b);
 }
 
+/// \brief Compares each of the corresponding 32-bit float values of the
+///    128-bit vectors of [4 x float] to determine if the values in the first
+///    operand are less than those in the second operand.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VCMPLTPS / CMPLTPS instructions.
+///
+/// \param __a
+///    A 128-bit vector of [4 x float].
+/// \param __b
+///    A 128-bit vector of [4 x float].
+/// \returns A 128-bit vector of [4 x float] containing the comparison results.
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_cmplt_ps(__m128 __a, __m128 __b)
 {
-  return (__m128)__builtin_ia32_cmpltps(__a, __b);
-}
-
+  return (__m128)__builtin_ia32_cmpltps((__v4sf)__a, (__v4sf)__b);
+}
+
+/// \brief Compares two 32-bit float values in the low-order bits of both
+///    operands to determine if the value in the first operand is less than or
+///    equal to the corresponding value in the second operand and returns the
+///    result of the comparison in the low-order bits of a vector of
+///    [4 x float].
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VCMPLESS / CMPLESS instructions.
+///
+/// \param __a
+///    A 128-bit vector of [4 x float] containing one of the operands. The lower
+///    32 bits of this operand are used in the comparison.
+/// \param __b
+///    A 128-bit vector of [4 x float] containing one of the operands. The lower
+///    32 bits of this operand are used in the comparison.
+/// \returns A 128-bit vector of [4 x float] containing the comparison results
+///    in the low-order bits.
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_cmple_ss(__m128 __a, __m128 __b)
 {
-  return (__m128)__builtin_ia32_cmpless(__a, __b);
+  return (__m128)__builtin_ia32_cmpless((__v4sf)__a, (__v4sf)__b);
 }
 
+/// \brief Compares each of the corresponding 32-bit float values of the
+///    128-bit vectors of [4 x float] to determine if the values in the first
+///    operand are less than or equal to those in the second operand.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VCMPLEPS / CMPLEPS instructions.
+///
+/// \param __a
+///    A 128-bit vector of [4 x float].
+/// \param __b
+///    A 128-bit vector of [4 x float].
+/// \returns A 128-bit vector of [4 x float] containing the comparison results.
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_cmple_ps(__m128 __a, __m128 __b)
 {
-  return (__m128)__builtin_ia32_cmpleps(__a, __b);
-}
-
+  return (__m128)__builtin_ia32_cmpleps((__v4sf)__a, (__v4sf)__b);
+}
+
+/// \brief Compares two 32-bit float values in the low-order bits of both
+///    operands to determine if the value in the first operand is greater than
+///    the corresponding value in the second operand and returns the result of
+///    the comparison in the low-order bits of a vector of [4 x float].
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VCMPLTSS / CMPLTSS instructions.
+///
+/// \param __a
+///    A 128-bit vector of [4 x float] containing one of the operands. The lower
+///    32 bits of this operand are used in the comparison.
+/// \param __b
+///    A 128-bit vector of [4 x float] containing one of the operands. The lower
+///    32 bits of this operand are used in the comparison.
+/// \returns A 128-bit vector of [4 x float] containing the comparison results
+///    in the low-order bits.
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_cmpgt_ss(__m128 __a, __m128 __b)
 {
-  return (__m128)__builtin_shufflevector(__a,
-                                         __builtin_ia32_cmpltss(__b, __a),
+  return (__m128)__builtin_shufflevector((__v4sf)__a,
+                                         (__v4sf)__builtin_ia32_cmpltss((__v4sf)__b, (__v4sf)__a),
                                          4, 1, 2, 3);
 }
 
+/// \brief Compares each of the corresponding 32-bit float values of the
+///    128-bit vectors of [4 x float] to determine if the values in the first
+///    operand are greater than those in the second operand.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VCMPLTPS / CMPLTPS instructions.
+///
+/// \param __a
+///    A 128-bit vector of [4 x float].
+/// \param __b
+///    A 128-bit vector of [4 x float].
+/// \returns A 128-bit vector of [4 x float] containing the comparison results.
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_cmpgt_ps(__m128 __a, __m128 __b)
 {
-  return (__m128)__builtin_ia32_cmpltps(__b, __a);
-}
-
+  return (__m128)__builtin_ia32_cmpltps((__v4sf)__b, (__v4sf)__a);
+}
+
+/// \brief Compares two 32-bit float values in the low-order bits of both
+///    operands to determine if the value in the first operand is greater than
+///    or equal to the corresponding value in the second operand and returns
+///    the result of the comparison in the low-order bits of a vector of
+///    [4 x float].
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VCMPLESS / CMPLESS instructions.
+///
+/// \param __a
+///    A 128-bit vector of [4 x float] containing one of the operands. The lower
+///    32 bits of this operand are used in the comparison.
+/// \param __b
+///    A 128-bit vector of [4 x float] containing one of the operands. The lower
+///    32 bits of this operand are used in the comparison.
+/// \returns A 128-bit vector of [4 x float] containing the comparison results
+///    in the low-order bits.
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_cmpge_ss(__m128 __a, __m128 __b)
 {
-  return (__m128)__builtin_shufflevector(__a,
-                                         __builtin_ia32_cmpless(__b, __a),
+  return (__m128)__builtin_shufflevector((__v4sf)__a,
+                                         (__v4sf)__builtin_ia32_cmpless((__v4sf)__b, (__v4sf)__a),
                                          4, 1, 2, 3);
 }
 
+/// \brief Compares each of the corresponding 32-bit float values of the
+///    128-bit vectors of [4 x float] to determine if the values in the first
+///    operand are greater than or equal to those in the second operand.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VCMPLEPS / CMPLEPS instructions.
+///
+/// \param __a
+///    A 128-bit vector of [4 x float].
+/// \param __b
+///    A 128-bit vector of [4 x float].
+/// \returns A 128-bit vector of [4 x float] containing the comparison results.
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_cmpge_ps(__m128 __a, __m128 __b)
 {
-  return (__m128)__builtin_ia32_cmpleps(__b, __a);
-}
-
+  return (__m128)__builtin_ia32_cmpleps((__v4sf)__b, (__v4sf)__a);
+}
+
+/// \brief Compares two 32-bit float values in the low-order bits of both
+///    operands for inequality and returns the result of the comparison in the
+///    low-order bits of a vector of [4 x float].
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VCMPNEQSS / CMPNEQSS instructions.
+///
+/// \param __a
+///    A 128-bit vector of [4 x float] containing one of the operands. The lower
+///    32 bits of this operand are used in the comparison.
+/// \param __b
+///    A 128-bit vector of [4 x float] containing one of the operands. The lower
+///    32 bits of this operand are used in the comparison.
+/// \returns A 128-bit vector of [4 x float] containing the comparison results
+///    in the low-order bits.
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_cmpneq_ss(__m128 __a, __m128 __b)
 {
-  return (__m128)__builtin_ia32_cmpneqss(__a, __b);
+  return (__m128)__builtin_ia32_cmpneqss((__v4sf)__a, (__v4sf)__b);
 }
 
+/// \brief Compares each of the corresponding 32-bit float values of the
+///    128-bit vectors of [4 x float] for inequality.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VCMPNEQPS / CMPNEQPS instructions.
+///
+/// \param __a
+///    A 128-bit vector of [4 x float].
+/// \param __b
+///    A 128-bit vector of [4 x float].
+/// \returns A 128-bit vector of [4 x float] containing the comparison results.
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_cmpneq_ps(__m128 __a, __m128 __b)
 {
-  return (__m128)__builtin_ia32_cmpneqps(__a, __b);
-}
-
+  return (__m128)__builtin_ia32_cmpneqps((__v4sf)__a, (__v4sf)__b);
+}
+
+/// \brief Compares two 32-bit float values in the low-order bits of both
+///    operands to determine if the value in the first operand is not less than
+///    the corresponding value in the second operand and returns the result of
+///    the comparison in the low-order bits of a vector of [4 x float].
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VCMPNLTSS / CMPNLTSS instructions.
+///
+/// \param __a
+///    A 128-bit vector of [4 x float] containing one of the operands. The lower
+///    32 bits of this operand are used in the comparison.
+/// \param __b
+///    A 128-bit vector of [4 x float] containing one of the operands. The lower
+///    32 bits of this operand are used in the comparison.
+/// \returns A 128-bit vector of [4 x float] containing the comparison results
+///    in the low-order bits.
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_cmpnlt_ss(__m128 __a, __m128 __b)
 {
-  return (__m128)__builtin_ia32_cmpnltss(__a, __b);
+  return (__m128)__builtin_ia32_cmpnltss((__v4sf)__a, (__v4sf)__b);
 }
 
+/// \brief Compares each of the corresponding 32-bit float values of the
+///    128-bit vectors of [4 x float] to determine if the values in the first
+///    operand are not less than those in the second operand.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VCMPNLTPS / CMPNLTPS instructions.
+///
+/// \param __a
+///    A 128-bit vector of [4 x float].
+/// \param __b
+///    A 128-bit vector of [4 x float].
+/// \returns A 128-bit vector of [4 x float] containing the comparison results.
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_cmpnlt_ps(__m128 __a, __m128 __b)
 {
-  return (__m128)__builtin_ia32_cmpnltps(__a, __b);
-}
-
+  return (__m128)__builtin_ia32_cmpnltps((__v4sf)__a, (__v4sf)__b);
+}
+
+/// \brief Compares two 32-bit float values in the low-order bits of both
+///    operands to determine if the value in the first operand is not less than
+///    or equal to the corresponding value in the second operand and returns
+///    the result of the comparison in the low-order bits of a vector of
+///    [4 x float].
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VCMPNLESS / CMPNLESS instructions.
+///
+/// \param __a
+///    A 128-bit vector of [4 x float] containing one of the operands. The lower
+///    32 bits of this operand are used in the comparison.
+/// \param __b
+///    A 128-bit vector of [4 x float] containing one of the operands. The lower
+///    32 bits of this operand are used in the comparison.
+/// \returns A 128-bit vector of [4 x float] containing the comparison results
+///    in the low-order bits.
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_cmpnle_ss(__m128 __a, __m128 __b)
 {
-  return (__m128)__builtin_ia32_cmpnless(__a, __b);
+  return (__m128)__builtin_ia32_cmpnless((__v4sf)__a, (__v4sf)__b);
 }
 
+/// \brief Compares each of the corresponding 32-bit float values of the
+///    128-bit vectors of [4 x float] to determine if the values in the first
+///    operand are not less than or equal to those in the second operand.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VCMPNLEPS / CMPNLEPS instructions.
+///
+/// \param __a
+///    A 128-bit vector of [4 x float].
+/// \param __b
+///    A 128-bit vector of [4 x float].
+/// \returns A 128-bit vector of [4 x float] containing the comparison results.
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_cmpnle_ps(__m128 __a, __m128 __b)
 {
-  return (__m128)__builtin_ia32_cmpnleps(__a, __b);
-}
-
+  return (__m128)__builtin_ia32_cmpnleps((__v4sf)__a, (__v4sf)__b);
+}
+
+/// \brief Compares two 32-bit float values in the low-order bits of both
+///    operands to determine if the value in the first operand is not greater
+///    than the corresponding value in the second operand and returns the
+///    result of the comparison in the low-order bits of a vector of
+///    [4 x float].
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VCMPNLTSS / CMPNLTSS instructions.
+///
+/// \param __a
+///    A 128-bit vector of [4 x float] containing one of the operands. The lower
+///    32 bits of this operand are used in the comparison.
+/// \param __b
+///    A 128-bit vector of [4 x float] containing one of the operands. The lower
+///    32 bits of this operand are used in the comparison.
+/// \returns A 128-bit vector of [4 x float] containing the comparison results
+///    in the low-order bits.
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_cmpngt_ss(__m128 __a, __m128 __b)
 {
-  return (__m128)__builtin_shufflevector(__a,
-                                         __builtin_ia32_cmpnltss(__b, __a),
+  return (__m128)__builtin_shufflevector((__v4sf)__a,
+                                         (__v4sf)__builtin_ia32_cmpnltss((__v4sf)__b, (__v4sf)__a),
                                          4, 1, 2, 3);
 }
 
+/// \brief Compares each of the corresponding 32-bit float values of the
+///    128-bit vectors of [4 x float] to determine if the values in the first
+///    operand are not greater than those in the second operand.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VCMPNLTPS / CMPNLTPS instructions.
+///
+/// \param __a
+///    A 128-bit vector of [4 x float].
+/// \param __b
+///    A 128-bit vector of [4 x float].
+/// \returns A 128-bit vector of [4 x float] containing the comparison results.
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_cmpngt_ps(__m128 __a, __m128 __b)
 {
-  return (__m128)__builtin_ia32_cmpnltps(__b, __a);
-}
-
+  return (__m128)__builtin_ia32_cmpnltps((__v4sf)__b, (__v4sf)__a);
+}
+
+/// \brief Compares two 32-bit float values in the low-order bits of both
+///    operands to determine if the value in the first operand is not greater
+///    than or equal to the corresponding value in the second operand and
+///    returns the result of the comparison in the low-order bits of a vector
+///    of [4 x float].
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VCMPNLESS / CMPNLESS instructions.
+///
+/// \param __a
+///    A 128-bit vector of [4 x float] containing one of the operands. The lower
+///    32 bits of this operand are used in the comparison.
+/// \param __b
+///    A 128-bit vector of [4 x float] containing one of the operands. The lower
+///    32 bits of this operand are used in the comparison.
+/// \returns A 128-bit vector of [4 x float] containing the comparison results
+///    in the low-order bits.
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_cmpnge_ss(__m128 __a, __m128 __b)
 {
-  return (__m128)__builtin_shufflevector(__a,
-                                         __builtin_ia32_cmpnless(__b, __a),
+  return (__m128)__builtin_shufflevector((__v4sf)__a,
+                                         (__v4sf)__builtin_ia32_cmpnless((__v4sf)__b, (__v4sf)__a),
                                          4, 1, 2, 3);
 }
 
+/// \brief Compares each of the corresponding 32-bit float values of the
+///    128-bit vectors of [4 x float] to determine if the values in the first
+///    operand are not greater than or equal to those in the second operand.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VCMPNLEPS / CMPNLEPS instructions.
+///
+/// \param __a
+///    A 128-bit vector of [4 x float].
+/// \param __b
+///    A 128-bit vector of [4 x float].
+/// \returns A 128-bit vector of [4 x float] containing the comparison results.
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_cmpnge_ps(__m128 __a, __m128 __b)
 {
-  return (__m128)__builtin_ia32_cmpnleps(__b, __a);
-}
-
+  return (__m128)__builtin_ia32_cmpnleps((__v4sf)__b, (__v4sf)__a);
+}
+
+/// \brief Compares two 32-bit float values in the low-order bits of both
+///    operands to determine if the value in the first operand is ordered with
+///    respect to the corresponding value in the second operand and returns the
+///    result of the comparison in the low-order bits of a vector of
+///    [4 x float].
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VCMPORDSS / CMPORDSS instructions.
+///
+/// \param __a
+///    A 128-bit vector of [4 x float] containing one of the operands. The lower
+///    32 bits of this operand are used in the comparison.
+/// \param __b
+///    A 128-bit vector of [4 x float] containing one of the operands. The lower
+///    32 bits of this operand are used in the comparison.
+/// \returns A 128-bit vector of [4 x float] containing the comparison results
+///    in the low-order bits.
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_cmpord_ss(__m128 __a, __m128 __b)
 {
-  return (__m128)__builtin_ia32_cmpordss(__a, __b);
+  return (__m128)__builtin_ia32_cmpordss((__v4sf)__a, (__v4sf)__b);
 }
 
+/// \brief Compares each of the corresponding 32-bit float values of the
+///    128-bit vectors of [4 x float] to determine if the values in the first
+///    operand are ordered with respect to those in the second operand.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VCMPORDPS / CMPORDPS instructions.
+///
+/// \param __a
+///    A 128-bit vector of [4 x float].
+/// \param __b
+///    A 128-bit vector of [4 x float].
+/// \returns A 128-bit vector of [4 x float] containing the comparison results.
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_cmpord_ps(__m128 __a, __m128 __b)
 {
-  return (__m128)__builtin_ia32_cmpordps(__a, __b);
-}
-
+  return (__m128)__builtin_ia32_cmpordps((__v4sf)__a, (__v4sf)__b);
+}
+
+/// \brief Compares two 32-bit float values in the low-order bits of both
+///    operands to determine if the value in the first operand is unordered
+///    with respect to the corresponding value in the second operand and
+///    returns the result of the comparison in the low-order bits of a vector
+///    of [4 x float].
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VCMPUNORDSS / CMPUNORDSS instructions.
+///
+/// \param __a
+///    A 128-bit vector of [4 x float] containing one of the operands. The lower
+///    32 bits of this operand are used in the comparison.
+/// \param __b
+///    A 128-bit vector of [4 x float] containing one of the operands. The lower
+///    32 bits of this operand are used in the comparison.
+/// \returns A 128-bit vector of [4 x float] containing the comparison results
+///    in the low-order bits.
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_cmpunord_ss(__m128 __a, __m128 __b)
 {
-  return (__m128)__builtin_ia32_cmpunordss(__a, __b);
+  return (__m128)__builtin_ia32_cmpunordss((__v4sf)__a, (__v4sf)__b);
 }
 
+/// \brief Compares each of the corresponding 32-bit float values of the
+///    128-bit vectors of [4 x float] to determine if the values in the first
+///    operand are unordered with respect to those in the second operand.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VCMPUNORDPS / CMPUNORDPS instructions.
+///
+/// \param __a
+///    A 128-bit vector of [4 x float].
+/// \param __b
+///    A 128-bit vector of [4 x float].
+/// \returns A 128-bit vector of [4 x float] containing the comparison results.
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_cmpunord_ps(__m128 __a, __m128 __b)
 {
-  return (__m128)__builtin_ia32_cmpunordps(__a, __b);
-}
-
+  return (__m128)__builtin_ia32_cmpunordps((__v4sf)__a, (__v4sf)__b);
+}
+
+/// \brief Compares two 32-bit float values in the low-order bits of both
+///    operands for equality and returns the result of the comparison.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VCOMISS / COMISS instructions.
+///
+/// \param __a
+///    A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
+///    used in the comparison.
+/// \param __b
+///    A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
+///    used in the comparison.
+/// \returns An integer containing the comparison results.
 static __inline__ int __DEFAULT_FN_ATTRS
 _mm_comieq_ss(__m128 __a, __m128 __b)
 {
-  return __builtin_ia32_comieq(__a, __b);
-}
-
+  return __builtin_ia32_comieq((__v4sf)__a, (__v4sf)__b);
+}
+
+/// \brief Compares two 32-bit float values in the low-order bits of both
+///    operands to determine if the first operand is less than the second
+///    operand and returns the result of the comparison.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VCOMISS / COMISS instructions.
+///
+/// \param __a
+///    A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
+///    used in the comparison.
+/// \param __b
+///    A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
+///    used in the comparison.
+/// \returns An integer containing the comparison results.
 static __inline__ int __DEFAULT_FN_ATTRS
 _mm_comilt_ss(__m128 __a, __m128 __b)
 {
-  return __builtin_ia32_comilt(__a, __b);
-}
-
+  return __builtin_ia32_comilt((__v4sf)__a, (__v4sf)__b);
+}
+
+/// \brief Compares two 32-bit float values in the low-order bits of both
+///    operands to determine if the first operand is less than or equal to the
+///    second operand and returns the result of the comparison.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VCOMISS / COMISS instructions.
+///
+/// \param __a
+///    A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
+///    used in the comparison.
+/// \param __b
+///    A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
+///    used in the comparison.
+/// \returns An integer containing the comparison results.
 static __inline__ int __DEFAULT_FN_ATTRS
 _mm_comile_ss(__m128 __a, __m128 __b)
 {
-  return __builtin_ia32_comile(__a, __b);
-}
-
+  return __builtin_ia32_comile((__v4sf)__a, (__v4sf)__b);
+}
+
+/// \brief Compares two 32-bit float values in the low-order bits of both
+///    operands to determine if the first operand is greater than the second
+///    operand and returns the result of the comparison.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VCOMISS / COMISS instructions.
+///
+/// \param __a
+///    A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
+///    used in the comparison.
+/// \param __b
+///    A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
+///    used in the comparison.
+/// \returns An integer containing the comparison results.
 static __inline__ int __DEFAULT_FN_ATTRS
 _mm_comigt_ss(__m128 __a, __m128 __b)
 {
-  return __builtin_ia32_comigt(__a, __b);
-}
-
+  return __builtin_ia32_comigt((__v4sf)__a, (__v4sf)__b);
+}
+
+/// \brief Compares two 32-bit float values in the low-order bits of both
+///    operands to determine if the first operand is greater than or equal to
+///    the second operand and returns the result of the comparison.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VCOMISS / COMISS instructions.
+///
+/// \param __a
+///    A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
+///    used in the comparison.
+/// \param __b
+///    A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
+///    used in the comparison.
+/// \returns An integer containing the comparison results.
 static __inline__ int __DEFAULT_FN_ATTRS
 _mm_comige_ss(__m128 __a, __m128 __b)
 {
-  return __builtin_ia32_comige(__a, __b);
-}
-
+  return __builtin_ia32_comige((__v4sf)__a, (__v4sf)__b);
+}
+
+/// \brief Compares two 32-bit float values in the low-order bits of both
+///    operands to determine if the first operand is not equal to the second
+///    operand and returns the result of the comparison.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VCOMISS / COMISS instructions.
+///
+/// \param __a
+///    A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
+///    used in the comparison.
+/// \param __b
+///    A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
+///    used in the comparison.
+/// \returns An integer containing the comparison results.
 static __inline__ int __DEFAULT_FN_ATTRS
 _mm_comineq_ss(__m128 __a, __m128 __b)
 {
-  return __builtin_ia32_comineq(__a, __b);
-}
-
+  return __builtin_ia32_comineq((__v4sf)__a, (__v4sf)__b);
+}
+
+/// \brief Performs an unordered comparison of two 32-bit float values using
+///    the low-order bits of both operands to determine equality and returns
+///    the result of the comparison.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VUCOMISS / UCOMISS instructions.
+///
+/// \param __a
+///    A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
+///    used in the comparison.
+/// \param __b
+///    A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
+///    used in the comparison.
+/// \returns An integer containing the comparison results.
 static __inline__ int __DEFAULT_FN_ATTRS
 _mm_ucomieq_ss(__m128 __a, __m128 __b)
 {
-  return __builtin_ia32_ucomieq(__a, __b);
-}
-
+  return __builtin_ia32_ucomieq((__v4sf)__a, (__v4sf)__b);
+}
+
+/// \brief Performs an unordered comparison of two 32-bit float values using
+///    the low-order bits of both operands to determine if the first operand is
+///    less than the second operand and returns the result of the comparison.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VUCOMISS / UCOMISS instructions.
+///
+/// \param __a
+///    A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
+///    used in the comparison.
+/// \param __b
+///    A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
+///    used in the comparison.
+/// \returns An integer containing the comparison results.
 static __inline__ int __DEFAULT_FN_ATTRS
 _mm_ucomilt_ss(__m128 __a, __m128 __b)
 {
-  return __builtin_ia32_ucomilt(__a, __b);
-}
-
+  return __builtin_ia32_ucomilt((__v4sf)__a, (__v4sf)__b);
+}
+
+/// \brief Performs an unordered comparison of two 32-bit float values using
+///    the low-order bits of both operands to determine if the first operand
+///    is less than or equal to the second operand and returns the result of
+///    the comparison.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VUCOMISS / UCOMISS instructions.
+///
+/// \param __a
+///    A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
+///    used in the comparison.
+/// \param __b
+///    A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
+///    used in the comparison.
+/// \returns An integer containing the comparison results.
 static __inline__ int __DEFAULT_FN_ATTRS
 _mm_ucomile_ss(__m128 __a, __m128 __b)
 {
-  return __builtin_ia32_ucomile(__a, __b);
-}
-
+  return __builtin_ia32_ucomile((__v4sf)__a, (__v4sf)__b);
+}
+
+/// \brief Performs an unordered comparison of two 32-bit float values using
+///    the low-order bits of both operands to determine if the first operand
+///    is greater than the second operand and returns the result of the
+///    comparison.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VUCOMISS / UCOMISS instructions.
+///
+/// \param __a
+///    A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
+///    used in the comparison.
+/// \param __b
+///    A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
+///    used in the comparison.
+/// \returns An integer containing the comparison results.
 static __inline__ int __DEFAULT_FN_ATTRS
 _mm_ucomigt_ss(__m128 __a, __m128 __b)
 {
-  return __builtin_ia32_ucomigt(__a, __b);
-}
-
+  return __builtin_ia32_ucomigt((__v4sf)__a, (__v4sf)__b);
+}
+
+/// \brief Performs an unordered comparison of two 32-bit float values using
+///    the low-order bits of both operands to determine if the first operand is
+///    greater than or equal to the second operand and returns the result of
+///    the comparison.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VUCOMISS / UCOMISS instructions.
+///
+/// \param __a
+///    A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
+///    used in the comparison.
+/// \param __b
+///    A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
+///    used in the comparison.
+/// \returns An integer containing the comparison results.
 static __inline__ int __DEFAULT_FN_ATTRS
 _mm_ucomige_ss(__m128 __a, __m128 __b)
 {
-  return __builtin_ia32_ucomige(__a, __b);
-}
-
+  return __builtin_ia32_ucomige((__v4sf)__a, (__v4sf)__b);
+}
+
+/// \brief Performs an unordered comparison of two 32-bit float values using
+///    the low-order bits of both operands to determine inequality and returns
+///    the result of the comparison.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VUCOMISS / UCOMISS instructions.
+///
+/// \param __a
+///    A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
+///    used in the comparison.
+/// \param __b
+///    A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
+///    used in the comparison.
+/// \returns An integer containing the comparison results.
 static __inline__ int __DEFAULT_FN_ATTRS
 _mm_ucomineq_ss(__m128 __a, __m128 __b)
 {
-  return __builtin_ia32_ucomineq(__a, __b);
+  return __builtin_ia32_ucomineq((__v4sf)__a, (__v4sf)__b);
 }
 
+/// \brief Converts a float value contained in the lower 32 bits of a vector of
+///    [4 x float] into a 32-bit integer.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VCVTSS2SI / CVTSS2SI instructions.
+///
+/// \param __a
+///    A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
+///    used in the conversion.
+/// \returns A 32-bit integer containing the converted value.
 static __inline__ int __DEFAULT_FN_ATTRS
 _mm_cvtss_si32(__m128 __a)
 {
-  return __builtin_ia32_cvtss2si(__a);
+  return __builtin_ia32_cvtss2si((__v4sf)__a);
 }
 
+/// \brief Converts a float value contained in the lower 32 bits of a vector of
+///    [4 x float] into a 32-bit integer.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VCVTSS2SI / CVTSS2SI instructions.
+///
+/// \param __a
+///    A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
+///    used in the conversion.
+/// \returns A 32-bit integer containing the converted value.
 static __inline__ int __DEFAULT_FN_ATTRS
 _mm_cvt_ss2si(__m128 __a)
 {
@@ -416,56 +1284,161 @@ _mm_cvt_ss2si(__m128 __a)
 
 #ifdef __x86_64__
 
+/// \brief Converts a float value contained in the lower 32 bits of a vector of
+///    [4 x float] into a 64-bit integer.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VCVTSS2SI / CVTSS2SI instructions.
+///
+/// \param __a
+///    A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
+///    used in the conversion.
+/// \returns A 64-bit integer containing the converted value.
 static __inline__ long long __DEFAULT_FN_ATTRS
 _mm_cvtss_si64(__m128 __a)
 {
-  return __builtin_ia32_cvtss2si64(__a);
+  return __builtin_ia32_cvtss2si64((__v4sf)__a);
 }
 
 #endif
 
+/// \brief Converts two low-order float values in a 128-bit vector of
+///    [4 x float] into a 64-bit vector of [2 x i32].
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c CVTPS2PI instruction.
+///
+/// \param __a
+///    A 128-bit vector of [4 x float].
+/// \returns A 64-bit integer vector containing the converted values.
 static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_cvtps_pi32(__m128 __a)
 {
-  return (__m64)__builtin_ia32_cvtps2pi(__a);
+  return (__m64)__builtin_ia32_cvtps2pi((__v4sf)__a);
 }
 
+/// \brief Converts two low-order float values in a 128-bit vector of
+///    [4 x float] into a 64-bit vector of [2 x i32].
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c CVTPS2PI instruction.
+///
+/// \param __a
+///    A 128-bit vector of [4 x float].
+/// \returns A 64-bit integer vector containing the converted values.
 static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_cvt_ps2pi(__m128 __a)
 {
   return _mm_cvtps_pi32(__a);
 }
 
+/// \brief Converts a float value contained in the lower 32 bits of a vector of
+///    [4 x float] into a 32-bit integer, truncating the result when it is
+///    inexact.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VCVTTSS2SI / CVTTSS2SI instructions.
+///
+/// \param __a
+///    A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
+///    used in the conversion.
+/// \returns A 32-bit integer containing the converted value.
 static __inline__ int __DEFAULT_FN_ATTRS
 _mm_cvttss_si32(__m128 __a)
 {
-  return __a[0];
-}
-
+  return __builtin_ia32_cvttss2si((__v4sf)__a);
+}
+
+/// \brief Converts a float value contained in the lower 32 bits of a vector of
+///    [4 x float] into a 32-bit integer, truncating the result when it is
+///    inexact.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VCVTTSS2SI / CVTTSS2SI instructions.
+///
+/// \param __a
+///    A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
+///    used in the conversion.
+/// \returns A 32-bit integer containing the converted value.
 static __inline__ int __DEFAULT_FN_ATTRS
 _mm_cvtt_ss2si(__m128 __a)
 {
   return _mm_cvttss_si32(__a);
 }
 
+/// \brief Converts a float value contained in the lower 32 bits of a vector of
+///    [4 x float] into a 64-bit integer, truncating the result when it is
+///    inexact.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VCVTTSS2SI / CVTTSS2SI instructions.
+///
+/// \param __a
+///    A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
+///    used in the conversion.
+/// \returns A 64-bit integer containing the converted value.
 static __inline__ long long __DEFAULT_FN_ATTRS
 _mm_cvttss_si64(__m128 __a)
 {
-  return __a[0];
+  return __builtin_ia32_cvttss2si64((__v4sf)__a);
 }
 
+/// \brief Converts two low-order float values in a 128-bit vector of
+///    [4 x float] into a 64-bit vector of [2 x i32], truncating the result
+///    when it is inexact.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c CVTTPS2PI / VTTPS2PI instructions.
+///
+/// \param __a
+///    A 128-bit vector of [4 x float].
+/// \returns A 64-bit integer vector containing the converted values.
 static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_cvttps_pi32(__m128 __a)
 {
-  return (__m64)__builtin_ia32_cvttps2pi(__a);
+  return (__m64)__builtin_ia32_cvttps2pi((__v4sf)__a);
 }
 
+/// \brief Converts two low-order float values in a 128-bit vector of [4 x
+///    float] into a 64-bit vector of [2 x i32], truncating the result when it
+///    is inexact.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c CVTTPS2PI instruction.
+///
+/// \param __a
+///    A 128-bit vector of [4 x float].
+/// \returns A 64-bit integer vector containing the converted values.
 static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_cvtt_ps2pi(__m128 __a)
 {
   return _mm_cvttps_pi32(__a);
 }
 
+/// \brief Converts a 32-bit signed integer value into a floating point value
+///    and writes it to the lower 32 bits of the destination. The remaining
+///    higher order elements of the destination vector are copied from the
+///    corresponding elements in the first operand.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VCVTSI2SS / CVTSI2SS instruction.
+///
+/// \param __a
+///    A 128-bit vector of [4 x float].
+/// \param __b
+///    A 32-bit signed integer operand containing the value to be converted.
+/// \returns A 128-bit vector of [4 x float] whose lower 32 bits contain the
+///    converted value of the second operand. The upper 96 bits are copied from
+///    the upper 96 bits of the first operand.
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_cvtsi32_ss(__m128 __a, int __b)
 {
@@ -473,6 +1446,22 @@ _mm_cvtsi32_ss(__m128 __a, int __b)
   return __a;
 }
 
+/// \brief Converts a 32-bit signed integer value into a floating point value
+///    and writes it to the lower 32 bits of the destination. The remaining
+///    higher order elements of the destination are copied from the
+///    corresponding elements in the first operand.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VCVTSI2SS / CVTSI2SS instruction.
+///
+/// \param __a
+///    A 128-bit vector of [4 x float].
+/// \param __b
+///    A 32-bit signed integer operand containing the value to be converted.
+/// \returns A 128-bit vector of [4 x float] whose lower 32 bits contain the
+///    converted value of the second operand. The upper 96 bits are copied from
+///    the upper 96 bits of the first operand.
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_cvt_si2ss(__m128 __a, int __b)
 {
@@ -481,6 +1470,22 @@ _mm_cvt_si2ss(__m128 __a, int __b)
 
 #ifdef __x86_64__
 
+/// \brief Converts a 64-bit signed integer value into a floating point value
+///    and writes it to the lower 32 bits of the destination. The remaining
+///    higher order elements of the destination are copied from the
+///    corresponding elements in the first operand.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VCVTSI2SS / CVTSI2SS instruction.
+///
+/// \param __a
+///    A 128-bit vector of [4 x float].
+/// \param __b
+///    A 64-bit signed integer operand containing the value to be converted.
+/// \returns A 128-bit vector of [4 x float] whose lower 32 bits contain the
+///    converted value of the second operand. The upper 96 bits are copied from
+///    the upper 96 bits of the first operand.
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_cvtsi64_ss(__m128 __a, long long __b)
 {
@@ -490,24 +1495,84 @@ _mm_cvtsi64_ss(__m128 __a, long long __b)
 
 #endif
 
+/// \brief Converts two elements of a 64-bit vector of [2 x i32] into two
+///    floating point values and writes them to the lower 64-bits of the
+///    destination. The remaining higher order elements of the destination are
+///    copied from the corresponding elements in the first operand.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c CVTPI2PS instruction.
+///
+/// \param __a
+///    A 128-bit vector of [4 x float].
+/// \param __b
+///    A 64-bit vector of [2 x i32]. The elements in this vector are converted
+///    and written to the corresponding low-order elements in the destination.
+/// \returns A 128-bit vector of [4 x float] whose lower 64 bits contain the
+///    converted value of the second operand. The upper 64 bits are copied from
+///    the upper 64 bits of the first operand.
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_cvtpi32_ps(__m128 __a, __m64 __b)
 {
-  return __builtin_ia32_cvtpi2ps(__a, (__v2si)__b);
-}
-
+  return __builtin_ia32_cvtpi2ps((__v4sf)__a, (__v2si)__b);
+}
+
+/// \brief Converts two elements of a 64-bit vector of [2 x i32] into two
+///    floating point values and writes them to the lower 64-bits of the
+///    destination. The remaining higher order elements of the destination are
+///    copied from the corresponding elements in the first operand.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c CVTPI2PS instruction.
+///
+/// \param __a
+///    A 128-bit vector of [4 x float].
+/// \param __b
+///    A 64-bit vector of [2 x i32]. The elements in this vector are converted
+///    and written to the corresponding low-order elements in the destination.
+/// \returns A 128-bit vector of [4 x float] whose lower 64 bits contain the
+///    converted value from the second operand. The upper 64 bits are copied
+///    from the upper 64 bits of the first operand.
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_cvt_pi2ps(__m128 __a, __m64 __b)
 {
   return _mm_cvtpi32_ps(__a, __b);
 }
 
+/// \brief Extracts a float value contained in the lower 32 bits of a vector of
+///    [4 x float].
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VMOVSS / MOVSS instruction.
+///
+/// \param __a
+///    A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
+///    used in the extraction.
+/// \returns A 32-bit float containing the extracted value.
 static __inline__ float __DEFAULT_FN_ATTRS
 _mm_cvtss_f32(__m128 __a)
 {
   return __a[0];
 }
 
+/// \brief Loads two packed float values from the address __p into the
+///     high-order bits of a 128-bit vector of [4 x float]. The low-order bits
+///     are copied from the low-order bits of the first operand.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VMOVHPD / MOVHPD instruction.
+///
+/// \param __a
+///    A 128-bit vector of [4 x float]. Bits [63:0] are written to bits [63:0]
+///    of the destination.
+/// \param __p
+///    A pointer to two packed float values. Bits [63:0] are written to bits
+///    [127:64] of the destination.
+/// \returns A 128-bit vector of [4 x float] containing the moved values.
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_loadh_pi(__m128 __a, const __m64 *__p)
 {
@@ -520,6 +1585,21 @@ _mm_loadh_pi(__m128 __a, const __m64 *__p)
   return __builtin_shufflevector(__a, __bb, 0, 1, 4, 5);
 }
 
+/// \brief Loads two packed float values from the address __p into the low-order
+///    bits of a 128-bit vector of [4 x float]. The high-order bits are copied
+///    from the high-order bits of the first operand.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VMOVLPD / MOVLPD instruction.
+///
+/// \param __a
+///    A 128-bit vector of [4 x float]. Bits [127:64] are written to bits
+///    [127:64] of the destination.
+/// \param __p
+///    A pointer to two packed float values. Bits [63:0] are written to bits
+///    [63:0] of the destination.
+/// \returns A 128-bit vector of [4 x float] containing the moved values.
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_loadl_pi(__m128 __a, const __m64 *__p)
 {
@@ -532,6 +1612,21 @@ _mm_loadl_pi(__m128 __a, const __m64 *__p)
   return __builtin_shufflevector(__a, __bb, 4, 5, 2, 3);
 }
 
+/// \brief Constructs a 128-bit floating-point vector of [4 x float]. The lower
+///    32 bits of the vector are initialized with the single-precision
+///    floating-point value loaded from a specified memory location. The upper
+///    96 bits are set to zero.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VMOVSS / MOVSS instruction.
+///
+/// \param __p
+///    A pointer to a 32-bit memory location containing a single-precision
+///    floating-point value.
+/// \returns An initialized 128-bit floating-point vector of [4 x float]. The
+///    lower 32 bits contain the value loaded from the memory location. The
+///    upper 96 bits are set to zero.
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_load_ss(const float *__p)
 {
@@ -542,6 +1637,18 @@ _mm_load_ss(const float *__p)
   return (__m128){ __u, 0, 0, 0 };
 }
 
+/// \brief Loads a 32-bit float value and duplicates it to all four vector
+///    elements of a 128-bit vector of [4 x float].
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VMOVSS / MOVSS + \c shuffling
+///    instruction.
+///
+/// \param __p
+///    A pointer to a float value to be loaded and duplicated.
+/// \returns A 128-bit vector of [4 x float] containing the loaded
+///    and duplicated values.
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_load1_ps(const float *__p)
 {
@@ -554,12 +1661,34 @@ _mm_load1_ps(const float *__p)
 
 #define        _mm_load_ps1(p) _mm_load1_ps(p)
 
+/// \brief Loads a 128-bit floating-point vector of [4 x float] from an aligned
+///    memory location.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VMOVAPS / MOVAPS instruction.
+///
+/// \param __p
+///    A pointer to a 128-bit memory location. The address of the memory
+///    location has to be 128-bit aligned.
+/// \returns A 128-bit vector of [4 x float] containing the loaded valus.
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_load_ps(const float *__p)
 {
   return *(__m128*)__p;
 }
 
+/// \brief Loads a 128-bit floating-point vector of [4 x float] from an
+///    unaligned memory location.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VMOVUPS / MOVUPS instruction.
+///
+/// \param __p
+///    A pointer to a 128-bit memory location. The address of the memory
+///    location does not have to be aligned.
+/// \returns A 128-bit vector of [4 x float] containing the loaded values.
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_loadu_ps(const float *__p)
 {
@@ -569,25 +1698,72 @@ _mm_loadu_ps(const float *__p)
   return ((struct __loadu_ps*)__p)->__v;
 }
 
+/// \brief Loads four packed float values, in reverse order, from an aligned
+///    memory location to 32-bit elements in a 128-bit vector of [4 x float].
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VMOVAPS / MOVAPS + \c shuffling
+///    instruction.
+///
+/// \param __p
+///    A pointer to a 128-bit memory location. The address of the memory
+///    location has to be 128-bit aligned.
+/// \returns A 128-bit vector of [4 x float] containing the moved values, loaded
+///    in reverse order.
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_loadr_ps(const float *__p)
 {
   __m128 __a = _mm_load_ps(__p);
-  return __builtin_shufflevector(__a, __a, 3, 2, 1, 0);
+  return __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 3, 2, 1, 0);
 }
 
+/// \brief Create a 128-bit vector of [4 x float] with undefined values.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic has no corresponding instruction.
+///
+/// \returns A 128-bit vector of [4 x float] containing undefined values.
+
 static __inline__ __m128 __DEFAULT_FN_ATTRS
-_mm_undefined_ps()
+_mm_undefined_ps(void)
 {
   return (__m128)__builtin_ia32_undef128();
 }
 
+/// \brief Constructs a 128-bit floating-point vector of [4 x float]. The lower
+///    32 bits of the vector are initialized with the specified single-precision
+///    floating-point value. The upper 96 bits are set to zero.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VMOVSS / MOVSS instruction.
+///
+/// \param __w
+///    A single-precision floating-point value used to initialize the lower 32
+///    bits of the result.
+/// \returns An initialized 128-bit floating-point vector of [4 x float]. The
+///    lower 32 bits contain the value provided in the source operand. The
+///    upper 96 bits are set to zero.
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_set_ss(float __w)
 {
   return (__m128){ __w, 0, 0, 0 };
 }
 
+/// \brief Constructs a 128-bit floating-point vector of [4 x float], with each
+///    of the four single-precision floating-point vector elements set to the
+///    specified single-precision floating-point value.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VPERMILPS / PERMILPS instruction.
+///
+/// \param __w
+///    A single-precision floating-point value used to initialize each vector
+///    element of the result.
+/// \returns An initialized 128-bit floating-point vector of [4 x float].
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_set1_ps(float __w)
 {
@@ -595,42 +1771,139 @@ _mm_set1_ps(float __w)
 }
 
 /* Microsoft specific. */
+/// \brief Constructs a 128-bit floating-point vector of [4 x float], with each
+///    of the four single-precision floating-point vector elements set to the
+///    specified single-precision floating-point value.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VPERMILPS / PERMILPS instruction.
+///
+/// \param __w
+///    A single-precision floating-point value used to initialize each vector
+///    element of the result.
+/// \returns An initialized 128-bit floating-point vector of [4 x float].
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_set_ps1(float __w)
 {
     return _mm_set1_ps(__w);
 }
 
+/// \brief Constructs a 128-bit floating-point vector of [4 x float]
+///    initialized with the specified single-precision floating-point values.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic is a utility function and does not correspond to a specific
+///    instruction.
+///
+/// \param __z
+///    A single-precision floating-point value used to initialize bits [127:96]
+///    of the result.
+/// \param __y
+///    A single-precision floating-point value used to initialize bits [95:64]
+///    of the result.
+/// \param __x
+///    A single-precision floating-point value used to initialize bits [63:32]
+///    of the result.
+/// \param __w
+///    A single-precision floating-point value used to initialize bits [31:0]
+///    of the result.
+/// \returns An initialized 128-bit floating-point vector of [4 x float].
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_set_ps(float __z, float __y, float __x, float __w)
 {
   return (__m128){ __w, __x, __y, __z };
 }
 
+/// \brief Constructs a 128-bit floating-point vector of [4 x float],
+///    initialized in reverse order with the specified 32-bit single-precision
+///    float-point values.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic is a utility function and does not correspond to a specific
+///    instruction.
+///
+/// \param __z
+///    A single-precision floating-point value used to initialize bits [31:0]
+///    of the result.
+/// \param __y
+///    A single-precision floating-point value used to initialize bits [63:32]
+///    of the result.
+/// \param __x
+///    A single-precision floating-point value used to initialize bits [95:64]
+///    of the result.
+/// \param __w
+///    A single-precision floating-point value used to initialize bits [127:96]
+///    of the result.
+/// \returns An initialized 128-bit floating-point vector of [4 x float].
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_setr_ps(float __z, float __y, float __x, float __w)
 {
   return (__m128){ __z, __y, __x, __w };
 }
 
+/// \brief Constructs a 128-bit floating-point vector of [4 x float] initialized
+///    to zero.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VXORPS / XORPS instruction.
+///
+/// \returns An initialized 128-bit floating-point vector of [4 x float] with
+///    all elements set to zero.
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_setzero_ps(void)
 {
   return (__m128){ 0, 0, 0, 0 };
 }
 
+/// \brief Stores the upper 64 bits of a 128-bit vector of [4 x float] to a
+///    memory location.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VPEXTRQ / MOVQ instruction.
+///
+/// \param __p
+///    A pointer to a 64-bit memory location.
+/// \param __a
+///    A 128-bit vector of [4 x float] containing the values to be stored.
 static __inline__ void __DEFAULT_FN_ATTRS
 _mm_storeh_pi(__m64 *__p, __m128 __a)
 {
-  __builtin_ia32_storehps((__v2si *)__p, __a);
+  __builtin_ia32_storehps((__v2si *)__p, (__v4sf)__a);
 }
 
+/// \brief Stores the lower 64 bits of a 128-bit vector of [4 x float] to a
+///     memory location.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VMOVLPS / MOVLPS instruction.
+///
+/// \param __p
+///    A pointer to a memory location that will receive the float values.
+/// \param __a
+///    A 128-bit vector of [4 x float] containing the values to be stored.
 static __inline__ void __DEFAULT_FN_ATTRS
 _mm_storel_pi(__m64 *__p, __m128 __a)
 {
-  __builtin_ia32_storelps((__v2si *)__p, __a);
+  __builtin_ia32_storelps((__v2si *)__p, (__v4sf)__a);
 }
 
+/// \brief Stores the lower 32 bits of a 128-bit vector of [4 x float] to a
+///     memory location.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VMOVSS / MOVSS instruction.
+///
+/// \param __p
+///    A pointer to a 32-bit memory location.
+/// \param __a
+///    A 128-bit vector of [4 x float] containing the value to be stored.
 static __inline__ void __DEFAULT_FN_ATTRS
 _mm_store_ss(float *__p, __m128 __a)
 {
@@ -640,35 +1913,101 @@ _mm_store_ss(float *__p, __m128 __a)
   ((struct __mm_store_ss_struct*)__p)->__u = __a[0];
 }
 
+/// \brief Stores float values from a 128-bit vector of [4 x float] to an
+///    unaligned memory location.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VMOVUPS / MOVUPS instruction.
+///
+/// \param __p
+///    A pointer to a 128-bit memory location. The address of the memory
+///    location does not have to be aligned.
+/// \param __a
+///    A 128-bit vector of [4 x float] containing the values to be stored.
 static __inline__ void __DEFAULT_FN_ATTRS
 _mm_storeu_ps(float *__p, __m128 __a)
 {
-  __builtin_ia32_storeups(__p, __a);
-}
-
+  struct __storeu_ps {
+    __m128 __v;
+  } __attribute__((__packed__, __may_alias__));
+  ((struct __storeu_ps*)__p)->__v = __a;
+}
+
+/// \brief Stores the lower 32 bits of a 128-bit vector of [4 x float] into
+///    four contiguous elements in an aligned memory location.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to \c VMOVAPS / MOVAPS + \c shuffling
+///    instruction.
+///
+/// \param __p
+///    A pointer to a 128-bit memory location.
+/// \param __a
+///    A 128-bit vector of [4 x float] whose lower 32 bits are stored to each
+///    of the four contiguous elements pointed by __p.
 static __inline__ void __DEFAULT_FN_ATTRS
-_mm_store1_ps(float *__p, __m128 __a)
+_mm_store_ps(float *__p, __m128 __a)
 {
-  __a = __builtin_shufflevector(__a, __a, 0, 0, 0, 0);
-  _mm_storeu_ps(__p, __a);
-}
-
+  *(__m128*)__p = __a;
+}
+
+/// \brief Stores the lower 32 bits of a 128-bit vector of [4 x float] into
+///    four contiguous elements in an aligned memory location.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to \c VMOVAPS / MOVAPS + \c shuffling
+///    instruction.
+///
+/// \param __p
+///    A pointer to a 128-bit memory location.
+/// \param __a
+///    A 128-bit vector of [4 x float] whose lower 32 bits are stored to each
+///    of the four contiguous elements pointed by __p.
 static __inline__ void __DEFAULT_FN_ATTRS
-_mm_store_ps1(float *__p, __m128 __a)
+_mm_store1_ps(float *__p, __m128 __a)
 {
-    return _mm_store1_ps(__p, __a);
+  __a = __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 0, 0, 0, 0);
+  _mm_store_ps(__p, __a);
 }
 
+/// \brief Stores float values from a 128-bit vector of [4 x float] to an
+///    aligned memory location.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VMOVAPS / MOVAPS instruction.
+///
+/// \param __p
+///    A pointer to a 128-bit memory location. The address of the memory
+///    location has to be 128-bit aligned.
+/// \param __a
+///    A 128-bit vector of [4 x float] containing the values to be stored.
 static __inline__ void __DEFAULT_FN_ATTRS
-_mm_store_ps(float *__p, __m128 __a)
+_mm_store_ps1(float *__p, __m128 __a)
 {
-  *(__m128 *)__p = __a;
-}
-
+  return _mm_store1_ps(__p, __a);
+}
+
+/// \brief Stores float values from a 128-bit vector of [4 x float] to an
+///    aligned memory location in reverse order.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VMOVAPS / MOVAPS + \c shuffling
+///    instruction.
+///
+/// \param __p
+///    A pointer to a 128-bit memory location. The address of the memory
+///    location has to be 128-bit aligned.
+/// \param __a
+///    A 128-bit vector of [4 x float] containing the values to be stored.
 static __inline__ void __DEFAULT_FN_ATTRS
 _mm_storer_ps(float *__p, __m128 __a)
 {
-  __a = __builtin_shufflevector(__a, __a, 3, 2, 1, 0);
+  __a = __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 3, 2, 1, 0);
   _mm_store_ps(__p, __a);
 }
 
@@ -681,153 +2020,599 @@ _mm_storer_ps(float *__p, __m128 __a)
 /* FIXME: We have to #define this because "sel" must be a constant integer, and
    Sema doesn't do any form of constant propagation yet. */
 
+/// \brief Loads one cache line of data from the specified address to a location
+///    closer to the processor.
+///
+/// \headerfile <x86intrin.h>
+///
+/// \code
+/// void _mm_prefetch(const void * a, const int sel);
+/// \endcode
+///
+/// This intrinsic corresponds to the \c PREFETCHNTA instruction.
+///
+/// \param a
+///    A pointer to a memory location containing a cache line of data.
+/// \param sel
+///    A predefined integer constant specifying the type of prefetch operation:
+///    _MM_HINT_NTA: Move data using the non-temporal access (NTA) hint.
+///    The PREFETCHNTA instruction will be generated.
+///    _MM_HINT_T0: Move data using the T0 hint. The PREFETCHT0 instruction will
+///    be generated.
+///    _MM_HINT_T1: Move data using the T1 hint. The PREFETCHT1 instruction will
+///    be generated.
+///    _MM_HINT_T2: Move data using the T2 hint. The PREFETCHT2 instruction will
+///    be generated.
 #define _mm_prefetch(a, sel) (__builtin_prefetch((void *)(a), 0, (sel)))
 #endif
 
+/// \brief Stores a 64-bit integer in the specified aligned memory location. To
+///    minimize caching, the data is flagged as non-temporal (unlikely to be
+///    used again soon).
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c MOVNTQ instruction.
+///
+/// \param __p
+///    A pointer to an aligned memory location used to store the register value.
+/// \param __a
+///    A 64-bit integer containing the value to be stored.
 static __inline__ void __DEFAULT_FN_ATTRS
 _mm_stream_pi(__m64 *__p, __m64 __a)
 {
   __builtin_ia32_movntq(__p, __a);
 }
 
+/// \brief Moves packed float values from a 128-bit vector of [4 x float] to a
+///    128-bit aligned memory location. To minimize caching, the data is flagged
+///    as non-temporal (unlikely to be used again soon).
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VMOVNTPS / MOVNTPS instruction.
+///
+/// \param __p
+///    A pointer to a 128-bit aligned memory location that will receive the
+///    integer values.
+/// \param __a
+///    A 128-bit vector of [4 x float] containing the values to be moved.
 static __inline__ void __DEFAULT_FN_ATTRS
 _mm_stream_ps(float *__p, __m128 __a)
 {
-  __builtin_ia32_movntps(__p, __a);
+  __builtin_nontemporal_store((__v4sf)__a, (__v4sf*)__p);
 }
 
+/// \brief Forces strong memory ordering (serialization) between store
+///    instructions preceding this instruction and store instructions following
+///    this instruction, ensuring the system completes all previous stores
+///    before executing subsequent stores.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c SFENCE instruction.
+///
 static __inline__ void __DEFAULT_FN_ATTRS
 _mm_sfence(void)
 {
   __builtin_ia32_sfence();
 }
 
-static __inline__ int __DEFAULT_FN_ATTRS
-_mm_extract_pi16(__m64 __a, int __n)
-{
-  __v4hi __b = (__v4hi)__a;
-  return (unsigned short)__b[__n & 3];
-}
-
-static __inline__ __m64 __DEFAULT_FN_ATTRS
-_mm_insert_pi16(__m64 __a, int __d, int __n)
-{
-   __v4hi __b = (__v4hi)__a;
-   __b[__n & 3] = __d;
-   return (__m64)__b;
-}
-
+/// \brief Extracts 16-bit element from a 64-bit vector of [4 x i16] and
+///    returns it, as specified by the immediate integer operand.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VPEXTRW / PEXTRW instruction.
+///
+/// \param __a
+///    A 64-bit vector of [4 x i16].
+/// \param __n
+///    An immediate integer operand that determines which bits are extracted:
+///    0: Bits [15:0] are copied to the destination.
+///    1: Bits [31:16] are copied to the destination.
+///    2: Bits [47:32] are copied to the destination.
+///    3: Bits [63:48] are copied to the destination.
+/// \returns A 16-bit integer containing the extracted 16 bits of packed data.
+#define _mm_extract_pi16(a, n) __extension__ ({ \
+  (int)__builtin_ia32_vec_ext_v4hi((__m64)a, (int)n); })
+
+/// \brief Copies data from the 64-bit vector of [4 x i16] to the destination,
+///    and inserts the lower 16-bits of an integer operand at the 16-bit offset
+///    specified by the immediate operand __n.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VPINSRW / PINSRW instruction.
+///
+/// \param __a
+///    A 64-bit vector of [4 x i16].
+/// \param __d
+///    An integer. The lower 16-bit value from this operand is written to the
+///    destination at the offset specified by operand __n.
+/// \param __n
+///    An immediate integer operant that determines which the bits to be used
+///    in the destination.
+///    0: Bits [15:0] are copied to the destination.
+///    1: Bits [31:16] are copied to the destination.
+///    2: Bits [47:32] are copied to the destination.
+///    3: Bits [63:48] are copied to the destination.
+///    The remaining bits in the destination are copied from the corresponding
+///    bits in operand __a.
+/// \returns A 64-bit integer vector containing the copied packed data from the
+///    operands.
+#define _mm_insert_pi16(a, d, n) __extension__ ({ \
+  (__m64)__builtin_ia32_vec_set_v4hi((__m64)a, (int)d, (int)n); })
+
+/// \brief Compares each of the corresponding packed 16-bit integer values of
+///    the 64-bit integer vectors, and writes the greater value to the
+///    corresponding bits in the destination.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c PMAXSW instruction.
+///
+/// \param __a
+///    A 64-bit integer vector containing one of the source operands.
+/// \param __b
+///    A 64-bit integer vector containing one of the source operands.
+/// \returns A 64-bit integer vector containing the comparison results.
 static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_max_pi16(__m64 __a, __m64 __b)
 {
   return (__m64)__builtin_ia32_pmaxsw((__v4hi)__a, (__v4hi)__b);
 }
 
+/// \brief Compares each of the corresponding packed 8-bit unsigned integer
+///    values of the 64-bit integer vectors, and writes the greater value to the
+///    corresponding bits in the destination.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c PMAXUB instruction.
+///
+/// \param __a
+///    A 64-bit integer vector containing one of the source operands.
+/// \param __b
+///    A 64-bit integer vector containing one of the source operands.
+/// \returns A 64-bit integer vector containing the comparison results.
 static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_max_pu8(__m64 __a, __m64 __b)
 {
   return (__m64)__builtin_ia32_pmaxub((__v8qi)__a, (__v8qi)__b);
 }
 
+/// \brief Compares each of the corresponding packed 16-bit integer values of
+///    the 64-bit integer vectors, and writes the lesser value to the
+///    corresponding bits in the destination.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c PMINSW instruction.
+///
+/// \param __a
+///    A 64-bit integer vector containing one of the source operands.
+/// \param __b
+///    A 64-bit integer vector containing one of the source operands.
+/// \returns A 64-bit integer vector containing the comparison results.
 static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_min_pi16(__m64 __a, __m64 __b)
 {
   return (__m64)__builtin_ia32_pminsw((__v4hi)__a, (__v4hi)__b);
 }
 
+/// \brief Compares each of the corresponding packed 8-bit unsigned integer
+///    values of the 64-bit integer vectors, and writes the lesser value to the
+///    corresponding bits in the destination.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c PMINUB instruction.
+///
+/// \param __a
+///    A 64-bit integer vector containing one of the source operands.
+/// \param __b
+///    A 64-bit integer vector containing one of the source operands.
+/// \returns A 64-bit integer vector containing the comparison results.
 static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_min_pu8(__m64 __a, __m64 __b)
 {
   return (__m64)__builtin_ia32_pminub((__v8qi)__a, (__v8qi)__b);
 }
 
+/// \brief Takes the most significant bit from each 8-bit element in a 64-bit
+///    integer vector to create a 16-bit mask value. Zero-extends the value to
+///    32-bit integer and writes it to the destination.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c PMOVMSKB instruction.
+///
+/// \param __a
+///    A 64-bit integer vector containing the values with bits to be extracted.
+/// \returns The most significant bit from each 8-bit element in the operand,
+///    written to bits [15:0].
 static __inline__ int __DEFAULT_FN_ATTRS
 _mm_movemask_pi8(__m64 __a)
 {
   return __builtin_ia32_pmovmskb((__v8qi)__a);
 }
 
+/// \brief Multiplies packed 16-bit unsigned integer values and writes the
+///    high-order 16 bits of each 32-bit product to the corresponding bits in
+///    the destination.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c PMULHUW instruction.
+///
+/// \param __a
+///    A 64-bit integer vector containing one of the source operands.
+/// \param __b
+///    A 64-bit integer vector containing one of the source operands.
+/// \returns A 64-bit integer vector containing the products of both operands.
 static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_mulhi_pu16(__m64 __a, __m64 __b)
 {
   return (__m64)__builtin_ia32_pmulhuw((__v4hi)__a, (__v4hi)__b);
 }
 
+/// \brief Shuffles the 4 16-bit integers from a 64-bit integer vector to the
+///    destination, as specified by the immediate value operand.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c PSHUFW instruction.
+///
+/// \code
+/// __m64 _mm_shuffle_pi16(__m64 a, const int n);
+/// \endcode
+///
+/// \param a
+///    A 64-bit integer vector containing the values to be shuffled.
+/// \param n
+///    An immediate value containing an 8-bit value specifying which elements to
+///    copy from a. The destinations within the 64-bit destination are assigned
+///    values as follows:
+///    Bits [1:0] are used to assign values to bits [15:0] in the destination.
+///    Bits [3:2] are used to assign values to bits [31:16] in the destination.
+///    Bits [5:4] are used to assign values to bits [47:32] in the destination.
+///    Bits [7:6] are used to assign values to bits [63:48] in the destination.
+///    Bit value assignments:
+///    00: assigned from bits [15:0] of a.
+///    01: assigned from bits [31:16] of a.
+///    10: assigned from bits [47:32] of a.
+///    11: assigned from bits [63:48] of a.
+/// \returns A 64-bit integer vector containing the shuffled values.
 #define _mm_shuffle_pi16(a, n) __extension__ ({ \
   (__m64)__builtin_ia32_pshufw((__v4hi)(__m64)(a), (n)); })
 
+/// \brief Conditionally copies the values from each 8-bit element in the first
+///    64-bit integer vector operand to the specified memory location, as
+///    specified by the most significant bit in the corresponding element in the
+///    second 64-bit integer vector operand. To minimize caching, the data is
+///    flagged as non-temporal (unlikely to be used again soon).
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c MASKMOVQ instruction.
+///
+/// \param __d
+///    A 64-bit integer vector containing the values with elements to be copied.
+/// \param __n
+///    A 64-bit integer vector operand. The most significant bit from each 8-bit
+///    element determines whether the corresponding element in operand __d is
+///    copied. If the most significant bit of a given element is 1, the
+///    corresponding element in operand __d is copied.
+/// \param __p
+///    A pointer to a 64-bit memory location that will receive the conditionally
+///    copied integer values. The address of the memory location does not have
+///    to be aligned.
 static __inline__ void __DEFAULT_FN_ATTRS
 _mm_maskmove_si64(__m64 __d, __m64 __n, char *__p)
 {
   __builtin_ia32_maskmovq((__v8qi)__d, (__v8qi)__n, __p);
 }
 
+/// \brief Computes the rounded averages of the packed unsigned 8-bit integer
+///    values and writes the averages to the corresponding bits in the
+///    destination.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c PAVGB instruction.
+///
+/// \param __a
+///    A 64-bit integer vector containing one of the source operands.
+/// \param __b
+///    A 64-bit integer vector containing one of the source operands.
+/// \returns A 64-bit integer vector containing the averages of both operands.
 static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_avg_pu8(__m64 __a, __m64 __b)
 {
   return (__m64)__builtin_ia32_pavgb((__v8qi)__a, (__v8qi)__b);
 }
 
+/// \brief Computes the rounded averages of the packed unsigned 16-bit integer
+///    values and writes the averages to the corresponding bits in the
+///    destination.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c PAVGW instruction.
+///
+/// \param __a
+///    A 64-bit integer vector containing one of the source operands.
+/// \param __b
+///    A 64-bit integer vector containing one of the source operands.
+/// \returns A 64-bit integer vector containing the averages of both operands.
 static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_avg_pu16(__m64 __a, __m64 __b)
 {
   return (__m64)__builtin_ia32_pavgw((__v4hi)__a, (__v4hi)__b);
 }
 
+/// \brief Subtracts the corresponding 8-bit unsigned integer values of the two
+///    64-bit vector operands and computes the absolute value for each of the
+///    difference. Then sum of the 8 absolute differences is written to the
+///    bits [15:0] of the destination; the remaining bits [63:16] are cleared.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c PSADBW instruction.
+///
+/// \param __a
+///    A 64-bit integer vector containing one of the source operands.
+/// \param __b
+///    A 64-bit integer vector containing one of the source operands.
+/// \returns A 64-bit integer vector whose lower 16 bits contain the sums of the
+///    sets of absolute differences between both operands. The upper bits are
+///    cleared.
 static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_sad_pu8(__m64 __a, __m64 __b)
 {
   return (__m64)__builtin_ia32_psadbw((__v8qi)__a, (__v8qi)__b);
 }
 
+/// \brief Returns the contents of the MXCSR register as a 32-bit unsigned
+///    integer value. There are several groups of macros associated with this
+///    intrinsic, including:
+///    * For checking exception states: _MM_EXCEPT_INVALID, _MM_EXCEPT_DIV_ZERO,
+///      _MM_EXCEPT_DENORM, _MM_EXCEPT_OVERFLOW, _MM_EXCEPT_UNDERFLOW,
+///      _MM_EXCEPT_INEXACT. There is a convenience wrapper
+///      _MM_GET_EXCEPTION_STATE().
+///    * For checking exception masks: _MM_MASK_UNDERFLOW, _MM_MASK_OVERFLOW,
+///      _MM_MASK_INVALID, _MM_MASK_DENORM, _MM_MASK_DIV_ZERO, _MM_MASK_INEXACT.
+///      There is a convenience wrapper _MM_GET_EXCEPTION_MASK().
+///    * For checking rounding modes: _MM_ROUND_NEAREST, _MM_ROUND_DOWN,
+///      _MM_ROUND_UP, _MM_ROUND_TOWARD_ZERO. There is a convenience wrapper
+///      _MM_GET_ROUNDING_MODE(x) where x is one of these macros.
+///    * For checking flush-to-zero mode: _MM_FLUSH_ZERO_ON, _MM_FLUSH_ZERO_OFF.
+///      There is a convenience wrapper _MM_GET_FLUSH_ZERO_MODE().
+///    * For checking denormals-are-zero mode: _MM_DENORMALS_ZERO_ON,
+///      _MM_DENORMALS_ZERO_OFF. There is a convenience wrapper
+///      _MM_GET_DENORMALS_ZERO_MODE().
+///
+///    For example, the expression below checks if an overflow exception has
+///    occurred:
+///      ( _mm_getcsr() & _MM_EXCEPT_OVERFLOW )
+///
+///    The following example gets the current rounding mode:
+///      _MM_GET_ROUNDING_MODE()
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VSTMXCSR / STMXCSR instruction.
+///
+/// \returns A 32-bit unsigned integer containing the contents of the MXCSR
+///    register.
 static __inline__ unsigned int __DEFAULT_FN_ATTRS
 _mm_getcsr(void)
 {
   return __builtin_ia32_stmxcsr();
 }
 
+/// \brief Sets the MXCSR register with the 32-bit unsigned integer value. There
+///    are several groups of macros associated with this intrinsic, including:
+///    * For setting exception states: _MM_EXCEPT_INVALID, _MM_EXCEPT_DIV_ZERO,
+///      _MM_EXCEPT_DENORM, _MM_EXCEPT_OVERFLOW, _MM_EXCEPT_UNDERFLOW,
+///      _MM_EXCEPT_INEXACT. There is a convenience wrapper
+///      _MM_SET_EXCEPTION_STATE(x) where x is one of these macros.
+///    * For setting exception masks: _MM_MASK_UNDERFLOW, _MM_MASK_OVERFLOW,
+///      _MM_MASK_INVALID, _MM_MASK_DENORM, _MM_MASK_DIV_ZERO, _MM_MASK_INEXACT.
+///      There is a convenience wrapper _MM_SET_EXCEPTION_MASK(x) where x is one
+///      of these macros.
+///    * For setting rounding modes: _MM_ROUND_NEAREST, _MM_ROUND_DOWN,
+///      _MM_ROUND_UP, _MM_ROUND_TOWARD_ZERO. There is a convenience wrapper
+///      _MM_SET_ROUNDING_MODE(x) where x is one of these macros.
+///    * For setting flush-to-zero mode: _MM_FLUSH_ZERO_ON, _MM_FLUSH_ZERO_OFF.
+///      There is a convenience wrapper _MM_SET_FLUSH_ZERO_MODE(x) where x is
+///      one of these macros.
+///    * For setting denormals-are-zero mode: _MM_DENORMALS_ZERO_ON,
+///      _MM_DENORMALS_ZERO_OFF. There is a convenience wrapper
+///      _MM_SET_DENORMALS_ZERO_MODE(x) where x is one of these macros.
+///
+///    For example, the following expression causes subsequent floating-point
+///    operations to round up:
+///      _mm_setcsr(_mm_getcsr() | _MM_ROUND_UP)
+///
+///    The following example sets the DAZ and FTZ flags:
+///      void setFlags() {
+///        _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON)
+///        _MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON)
+///      }
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VLDMXCSR / LDMXCSR instruction.
+///
+/// \param __i
+///    A 32-bit unsigned integer value to be written to the MXCSR register.
 static __inline__ void __DEFAULT_FN_ATTRS
 _mm_setcsr(unsigned int __i)
 {
   __builtin_ia32_ldmxcsr(__i);
 }
 
+/// \brief Selects 4 float values from the 128-bit operands of [4 x float], as
+///    specified by the immediate value operand.
+///
+/// \headerfile <x86intrin.h>
+///
+/// \code
+/// __m128 _mm_shuffle_ps(__m128 a, __m128 b, const int mask);
+/// \endcode
+///
+/// This intrinsic corresponds to the \c VSHUFPS / SHUFPS instruction.
+///
+/// \param a
+///    A 128-bit vector of [4 x float].
+/// \param b
+///    A 128-bit vector of [4 x float].
+/// \param mask
+///    An immediate value containing an 8-bit value specifying which elements to
+///    copy from a and b.
+///    Bits [3:0] specify the values copied from operand a.
+///    Bits [7:4] specify the values copied from operand b. The destinations
+///    within the 128-bit destination are assigned values as follows:
+///    Bits [1:0] are used to assign values to bits [31:0] in the destination.
+///    Bits [3:2] are used to assign values to bits [63:32] in the destination.
+///    Bits [5:4] are used to assign values to bits [95:64] in the destination.
+///    Bits [7:6] are used to assign values to bits [127:96] in the destination.
+///    Bit value assignments:
+///    00: Bits [31:0] copied from the specified operand.
+///    01: Bits [63:32] copied from the specified operand.
+///    10: Bits [95:64] copied from the specified operand.
+///    11: Bits [127:96] copied from the specified operand.
+/// \returns A 128-bit vector of [4 x float] containing the shuffled values.
 #define _mm_shuffle_ps(a, b, mask) __extension__ ({ \
   (__m128)__builtin_shufflevector((__v4sf)(__m128)(a), (__v4sf)(__m128)(b), \
-                                  (mask) & 0x3, ((mask) & 0xc) >> 2, \
-                                  (((mask) & 0x30) >> 4) + 4, \
-                                  (((mask) & 0xc0) >> 6) + 4); })
-
+                                  0 + (((mask) >> 0) & 0x3), \
+                                  0 + (((mask) >> 2) & 0x3), \
+                                  4 + (((mask) >> 4) & 0x3), \
+                                  4 + (((mask) >> 6) & 0x3)); })
+
+/// \brief Unpacks the high-order (index 2,3) values from two 128-bit vectors of
+///    [4 x float] and interleaves them into a 128-bit vector of [4 x
+///    float].
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VUNPCKHPS / UNPCKHPS instruction.
+///
+/// \param __a
+///    A 128-bit vector of [4 x float].
+///    Bits [95:64] are written to bits [31:0] of the destination.
+///    Bits [127:96] are written to bits [95:64] of the destination.
+/// \param __b
+///    A 128-bit vector of [4 x float].
+///    Bits [95:64] are written to bits [63:32] of the destination.
+///    Bits [127:96] are written to bits [127:96] of the destination.
+/// \returns A 128-bit vector of [4 x float] containing the interleaved values.
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_unpackhi_ps(__m128 __a, __m128 __b)
 {
-  return __builtin_shufflevector(__a, __b, 2, 6, 3, 7);
-}
-
+  return __builtin_shufflevector((__v4sf)__a, (__v4sf)__b, 2, 6, 3, 7);
+}
+
+/// \brief Unpacks the low-order (index 0,1) values from two 128-bit vectors of
+///    [4 x float] and interleaves them into a 128-bit vector of [4 x
+///    float].
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VUNPCKLPS / UNPCKLPS instruction.
+///
+/// \param __a
+///    A 128-bit vector of [4 x float].
+///    Bits [31:0] are written to bits [31:0] of the destination.
+///    Bits [63:32] are written to bits [95:64] of the destination.
+/// \param __b
+///    A 128-bit vector of [4 x float].
+///    Bits [31:0] are written to bits [63:32] of the destination.
+///    Bits [63:32] are written to bits [127:96] of the destination.
+/// \returns A 128-bit vector of [4 x float] containing the interleaved values.
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_unpacklo_ps(__m128 __a, __m128 __b)
 {
-  return __builtin_shufflevector(__a, __b, 0, 4, 1, 5);
-}
-
+  return __builtin_shufflevector((__v4sf)__a, (__v4sf)__b, 0, 4, 1, 5);
+}
+
+/// \brief Constructs a 128-bit floating-point vector of [4 x float]. The lower
+///    32 bits are set to the lower 32 bits of the second parameter. The upper
+///    96 bits are set to the upper 96 bits of the first parameter.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VMOVSS / MOVSS instruction.
+///
+/// \param __a
+///    A 128-bit floating-point vector of [4 x float]. The upper 96 bits are
+///    written to the upper 96 bits of the result.
+/// \param __b
+///    A 128-bit floating-point vector of [4 x float]. The lower 32 bits are
+///    written to the lower 32 bits of the result.
+/// \returns A 128-bit floating-point vector of [4 x float].
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_move_ss(__m128 __a, __m128 __b)
 {
-  return __builtin_shufflevector(__a, __b, 4, 1, 2, 3);
-}
-
+  return __builtin_shufflevector((__v4sf)__a, (__v4sf)__b, 4, 1, 2, 3);
+}
+
+/// \brief Constructs a 128-bit floating-point vector of [4 x float]. The lower
+///    64 bits are set to the upper 64 bits of the second parameter. The upper
+///    64 bits are set to the upper 64 bits of the first parameter.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VUNPCKHPD / UNPCKHPD instruction.
+///
+/// \param __a
+///    A 128-bit floating-point vector of [4 x float]. The upper 64 bits are
+///    written to the upper 64 bits of the result.
+/// \param __b
+///    A 128-bit floating-point vector of [4 x float]. The upper 64 bits are
+///    written to the lower 64 bits of the result.
+/// \returns A 128-bit floating-point vector of [4 x float].
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_movehl_ps(__m128 __a, __m128 __b)
 {
-  return __builtin_shufflevector(__a, __b, 6, 7, 2, 3);
-}
-
+  return __builtin_shufflevector((__v4sf)__a, (__v4sf)__b, 6, 7, 2, 3);
+}
+
+/// \brief Constructs a 128-bit floating-point vector of [4 x float]. The lower
+///    64 bits are set to the lower 64 bits of the first parameter. The upper
+///    64 bits are set to the lower 64 bits of the second parameter.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VUNPCKLPD / UNPCKLPD instruction.
+///
+/// \param __a
+///    A 128-bit floating-point vector of [4 x float]. The lower 64 bits are
+///    written to the lower 64 bits of the result.
+/// \param __b
+///    A 128-bit floating-point vector of [4 x float]. The lower 64 bits are
+///    written to the upper 64 bits of the result.
+/// \returns A 128-bit floating-point vector of [4 x float].
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_movelh_ps(__m128 __a, __m128 __b)
 {
-  return __builtin_shufflevector(__a, __b, 0, 1, 4, 5);
+  return __builtin_shufflevector((__v4sf)__a, (__v4sf)__b, 0, 1, 4, 5);
 }
 
+/// \brief Converts a 64-bit vector of [4 x i16] into a 128-bit vector of [4 x
+///    float].
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c CVTPI2PS + \c COMPOSITE instruction.
+///
+/// \param __a
+///    A 64-bit vector of [4 x i16]. The elements of the destination are copied
+///    from the corresponding elements in this operand.
+/// \returns A 128-bit vector of [4 x float] containing the copied and converted
+///    values from the operand.
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_cvtpi16_ps(__m64 __a)
 {
@@ -846,6 +2631,18 @@ _mm_cvtpi16_ps(__m64 __a)
   return __r;
 }
 
+/// \brief Converts a 64-bit vector of 16-bit unsigned integer values into a
+///    128-bit vector of [4 x float].
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c CVTPI2PS + \c COMPOSITE instruction.
+///
+/// \param __a
+///    A 64-bit vector of 16-bit unsigned integer values. The elements of the
+///    destination are copied from the corresponding elements in this operand.
+/// \returns A 128-bit vector of [4 x float] containing the copied and converted
+///    values from the operand.
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_cvtpu16_ps(__m64 __a)
 {
@@ -863,6 +2660,18 @@ _mm_cvtpu16_ps(__m64 __a)
   return __r;
 }
 
+/// \brief Converts the lower four 8-bit values from a 64-bit vector of [8 x i8]
+///    into a 128-bit vector of [4 x float].
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c CVTPI2PS + \c COMPOSITE instruction.
+///
+/// \param __a
+///    A 64-bit vector of [8 x i8]. The elements of the destination are copied
+///    from the corresponding lower 4 elements in this operand.
+/// \returns A 128-bit vector of [4 x float] containing the copied and converted
+///    values from the operand.
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_cvtpi8_ps(__m64 __a)
 {
@@ -875,6 +2684,19 @@ _mm_cvtpi8_ps(__m64 __a)
   return _mm_cvtpi16_ps(__b);
 }
 
+/// \brief Converts the lower four unsigned 8-bit integer values from a 64-bit
+///    vector of [8 x u8] into a 128-bit vector of [4 x float].
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c CVTPI2PS + \c COMPOSITE instruction.
+///
+/// \param __a
+///    A 64-bit vector of unsigned 8-bit integer values. The elements of the
+///    destination are copied from the corresponding lower 4 elements in this
+///    operand.
+/// \returns A 128-bit vector of [4 x float] containing the copied and converted
+///    values from the source operand.
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_cvtpu8_ps(__m64 __a)
 {
@@ -886,6 +2708,22 @@ _mm_cvtpu8_ps(__m64 __a)
   return _mm_cvtpi16_ps(__b);
 }
 
+/// \brief Converts the two 32-bit signed integer values from each 64-bit vector
+///    operand of [2 x i32] into a 128-bit vector of [4 x float].
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c CVTPI2PS + \c COMPOSITE instruction.
+///
+/// \param __a
+///    A 64-bit vector of [2 x i32]. The lower elements of the destination are
+///    copied from the elements in this operand.
+/// \param __b
+///    A 64-bit vector of [2 x i32]. The upper elements of the destination are
+///    copied from the elements in this operand.
+/// \returns A 128-bit vector of [4 x float] whose lower 64 bits contain the
+///    copied and converted values from the first operand. The upper 64 bits
+///    contain the copied and converted values from the second operand.
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_cvtpi32x2_ps(__m64 __a, __m64 __b)
 {
@@ -898,6 +2736,22 @@ _mm_cvtpi32x2_ps(__m64 __a, __m64 __b)
   return _mm_cvtpi32_ps(__c, __a);
 }
 
+/// \brief Converts each single-precision floating-point element of a 128-bit
+///    floating-point vector of [4 x float] into a 16-bit signed integer, and
+///    packs the results into a 64-bit integer vector of [4 x i16]. If the
+///    floating-point element is NaN or infinity, or if the floating-point
+///    element is greater than 0x7FFFFFFF or less than -0x8000, it is converted
+///    to 0x8000. Otherwise if the floating-point element is greater
+///    than 0x7FFF, it is converted to 0x7FFF.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c CVTPS2PI + \c COMPOSITE instruction.
+///
+/// \param __a
+///    A 128-bit floating-point vector of [4 x float].
+/// \returns A 64-bit integer vector of [4 x i16] containing the converted
+///    values.
 static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_cvtps_pi16(__m128 __a)
 {
@@ -910,6 +2764,23 @@ _mm_cvtps_pi16(__m128 __a)
   return _mm_packs_pi32(__b, __c);
 }
 
+/// \brief Converts each single-precision floating-point element of a 128-bit
+///    floating-point vector of [4 x float] into an 8-bit signed integer, and
+///    packs the results into the lower 32 bits of a 64-bit integer vector of
+///    [8 x i8]. The upper 32 bits of the vector are set to 0. If the
+///    floating-point element is NaN or infinity, or if the floating-point
+///    element is greater than 0x7FFFFFFF or less than -0x80, it is converted
+///    to 0x80. Otherwise if the floating-point element is greater
+///    than 0x7F, it is converted to 0x7F.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c CVTPS2PI + \c COMPOSITE instruction.
+///
+/// \param __a
+///    128-bit floating-point vector of [4 x float].
+/// \returns A 64-bit integer vector of [8 x i8]. The lower 32 bits contain the
+///    converted values and the uppper 32 bits are set to zero.
 static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_cvtps_pi8(__m128 __a)
 {
@@ -921,16 +2792,28 @@ _mm_cvtps_pi8(__m128 __a)
   return _mm_packs_pi16(__b, __c);
 }
 
+/// \brief Extracts the sign bits from each single-precision floating-point
+///    element of a 128-bit floating-point vector of [4 x float] and returns the
+///    sign bits in bits [0:3] of the result. Bits [31:4] of the result are set
+///    to zero.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c VMOVMSKPS / MOVMSKPS instruction.
+///
+/// \param __a
+///    A 128-bit floating-point vector of [4 x float].
+/// \returns A 32-bit integer value. Bits [3:0] contain the sign bits from each
+///    single-precision floating-point element of the parameter. Bits [31:4] are
+///    set to zero.
 static __inline__ int __DEFAULT_FN_ATTRS
 _mm_movemask_ps(__m128 __a)
 {
-  return __builtin_ia32_movmskps(__a);
+  return __builtin_ia32_movmskps((__v4sf)__a);
 }
 
 
-#ifdef _MSC_VER
-#define _MM_ALIGN16 __declspec(align(16))
-#endif
+#define _MM_ALIGN16 __attribute__((aligned(16)))
 
 #define _MM_SHUFFLE(z, y, x, w) (((z) << 6) | ((y) << 4) | ((x) << 2) | (w))
 
@@ -1003,7 +2886,7 @@ do { \
 #undef __DEFAULT_FN_ATTRS
 
 /* Ugly hack for backwards-compatibility (compatible with gcc) */
-#if defined(__SSE2__) && !__has_feature(modules)
+#if defined(__SSE2__) && !__building_module(_Builtin_intrinsics)
 #include <emmintrin.h>
 #endif
 
diff --git a/contrib/llvm/tools/clang/lib/Headers/xopintrin.h b/contrib/llvm/tools/clang/lib/Headers/xopintrin.h
index f07f51c27515..bdf0cec32645 100644
--- a/contrib/llvm/tools/clang/lib/Headers/xopintrin.h
+++ b/contrib/llvm/tools/clang/lib/Headers/xopintrin.h
@@ -198,13 +198,13 @@ _mm_hsubq_epi32(__m128i __A)
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_cmov_si128(__m128i __A, __m128i __B, __m128i __C)
 {
-  return (__m128i)__builtin_ia32_vpcmov(__A, __B, __C);
+  return (__m128i)__builtin_ia32_vpcmov((__v2di)__A, (__v2di)__B, (__v2di)__C);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_cmov_si256(__m256i __A, __m256i __B, __m256i __C)
 {
-  return (__m256i)__builtin_ia32_vpcmov_256(__A, __B, __C);
+  return (__m256i)__builtin_ia32_vpcmov_256((__v4di)__A, (__v4di)__B, (__v4di)__C);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS
diff --git a/contrib/llvm/tools/clang/lib/Index/CodegenNameGenerator.cpp b/contrib/llvm/tools/clang/lib/Index/CodegenNameGenerator.cpp
new file mode 100644
index 000000000000..92740b05703b
--- /dev/null
+++ b/contrib/llvm/tools/clang/lib/Index/CodegenNameGenerator.cpp
@@ -0,0 +1,195 @@
+//===- CodegenNameGenerator.cpp - Codegen name generation -----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Determines the name that the symbol will get for code generation.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Index/CodegenNameGenerator.h"
+#include "clang/AST/ASTContext.h"
+#include "clang/AST/DeclCXX.h"
+#include "clang/AST/DeclObjC.h"
+#include "clang/AST/Mangle.h"
+#include "clang/AST/VTableBuilder.h"
+#include "clang/Basic/TargetInfo.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Mangler.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace clang;
+using namespace clang::index;
+
+struct CodegenNameGenerator::Implementation {
+  std::unique_ptr<MangleContext> MC;
+  llvm::DataLayout DL;
+
+  Implementation(ASTContext &Ctx)
+    : MC(Ctx.createMangleContext()),
+      DL(Ctx.getTargetInfo().getDataLayout()) {}
+
+  bool writeName(const Decl *D, raw_ostream &OS) {
+    // First apply frontend mangling.
+    SmallString<128> FrontendBuf;
+    llvm::raw_svector_ostream FrontendBufOS(FrontendBuf);
+    if (auto *FD = dyn_cast<FunctionDecl>(D)) {
+      if (FD->isDependentContext())
+        return true;
+      if (writeFuncOrVarName(FD, FrontendBufOS))
+        return true;
+    } else if (auto *VD = dyn_cast<VarDecl>(D)) {
+      if (writeFuncOrVarName(VD, FrontendBufOS))
+        return true;
+    } else if (auto *MD = dyn_cast<ObjCMethodDecl>(D)) {
+      MC->mangleObjCMethodNameWithoutSize(MD, OS);
+      return false;
+    } else if (auto *ID = dyn_cast<ObjCInterfaceDecl>(D)) {
+      writeObjCClassName(ID, FrontendBufOS);
+    } else {
+      return true;
+    }
+
+    // Now apply backend mangling.
+    llvm::Mangler::getNameWithPrefix(OS, FrontendBufOS.str(), DL);
+    return false;
+  }
+
+  std::string getName(const Decl *D) {
+    std::string Name;
+    {
+      llvm::raw_string_ostream OS(Name);
+      writeName(D, OS);
+    }
+    return Name;
+  }
+
+  std::vector<std::string> getAllManglings(const Decl *D) {
+    if (!(isa<CXXRecordDecl>(D) || isa<CXXMethodDecl>(D)))
+      return {};
+
+    const NamedDecl *ND = cast<NamedDecl>(D);
+
+    ASTContext &Ctx = ND->getASTContext();
+    std::unique_ptr<MangleContext> M(Ctx.createMangleContext());
+
+    std::vector<std::string> Manglings;
+
+    auto hasDefaultCXXMethodCC = [](ASTContext &C, const CXXMethodDecl *MD) {
+      auto DefaultCC = C.getDefaultCallingConvention(/*IsVariadic=*/false,
+                                                     /*IsCSSMethod=*/true);
+      auto CC = MD->getType()->getAs<FunctionProtoType>()->getCallConv();
+      return CC == DefaultCC;
+    };
+
+    if (const auto *CD = dyn_cast_or_null<CXXConstructorDecl>(ND)) {
+      Manglings.emplace_back(getMangledStructor(CD, Ctor_Base));
+
+      if (Ctx.getTargetInfo().getCXXABI().isItaniumFamily())
+        if (!CD->getParent()->isAbstract())
+          Manglings.emplace_back(getMangledStructor(CD, Ctor_Complete));
+
+      if (Ctx.getTargetInfo().getCXXABI().isMicrosoft())
+        if (CD->hasAttr<DLLExportAttr>() && CD->isDefaultConstructor())
+          if (!(hasDefaultCXXMethodCC(Ctx, CD) && CD->getNumParams() == 0))
+            Manglings.emplace_back(getMangledStructor(CD, Ctor_DefaultClosure));
+    } else if (const auto *DD = dyn_cast_or_null<CXXDestructorDecl>(ND)) {
+      Manglings.emplace_back(getMangledStructor(DD, Dtor_Base));
+      if (Ctx.getTargetInfo().getCXXABI().isItaniumFamily()) {
+        Manglings.emplace_back(getMangledStructor(DD, Dtor_Complete));
+        if (DD->isVirtual())
+          Manglings.emplace_back(getMangledStructor(DD, Dtor_Deleting));
+      }
+    } else if (const auto *MD = dyn_cast_or_null<CXXMethodDecl>(ND)) {
+      Manglings.emplace_back(getName(ND));
+      if (MD->isVirtual())
+        if (const auto *TIV = Ctx.getVTableContext()->getThunkInfo(MD))
+          for (const auto &T : *TIV)
+            Manglings.emplace_back(getMangledThunk(MD, T));
+    }
+
+    return Manglings;
+  }
+
+private:
+  bool writeFuncOrVarName(const NamedDecl *D, raw_ostream &OS) {
+    if (MC->shouldMangleDeclName(D)) {
+      if (const auto *CtorD = dyn_cast<CXXConstructorDecl>(D))
+        MC->mangleCXXCtor(CtorD, Ctor_Complete, OS);
+      else if (const auto *DtorD = dyn_cast<CXXDestructorDecl>(D))
+        MC->mangleCXXDtor(DtorD, Dtor_Complete, OS);
+      else
+        MC->mangleName(D, OS);
+      return false;
+    } else {
+      IdentifierInfo *II = D->getIdentifier();
+      if (!II)
+        return true;
+      OS << II->getName();
+      return false;
+    }
+  }
+
+  void writeObjCClassName(const ObjCInterfaceDecl *D, raw_ostream &OS) {
+    OS << getClassSymbolPrefix();
+    OS << D->getObjCRuntimeNameAsString();
+  }
+
+  static StringRef getClassSymbolPrefix() {
+    return "OBJC_CLASS_$_";
+  }
+
+  std::string getMangledStructor(const NamedDecl *ND, unsigned StructorType) {
+    std::string FrontendBuf;
+    llvm::raw_string_ostream FOS(FrontendBuf);
+
+    if (const auto *CD = dyn_cast_or_null<CXXConstructorDecl>(ND))
+      MC->mangleCXXCtor(CD, static_cast<CXXCtorType>(StructorType), FOS);
+    else if (const auto *DD = dyn_cast_or_null<CXXDestructorDecl>(ND))
+      MC->mangleCXXDtor(DD, static_cast<CXXDtorType>(StructorType), FOS);
+
+    std::string BackendBuf;
+    llvm::raw_string_ostream BOS(BackendBuf);
+
+    llvm::Mangler::getNameWithPrefix(BOS, FOS.str(), DL);
+
+    return BOS.str();
+  }
+
+  std::string getMangledThunk(const CXXMethodDecl *MD, const ThunkInfo &T) {
+    std::string FrontendBuf;
+    llvm::raw_string_ostream FOS(FrontendBuf);
+
+    MC->mangleThunk(MD, T, FOS);
+
+    std::string BackendBuf;
+    llvm::raw_string_ostream BOS(BackendBuf);
+
+    llvm::Mangler::getNameWithPrefix(BOS, FOS.str(), DL);
+
+    return BOS.str();
+  }
+};
+
+CodegenNameGenerator::CodegenNameGenerator(ASTContext &Ctx)
+  : Impl(new Implementation(Ctx)) {
+}
+
+CodegenNameGenerator::~CodegenNameGenerator() {
+}
+
+bool CodegenNameGenerator::writeName(const Decl *D, raw_ostream &OS) {
+  return Impl->writeName(D, OS);
+}
+
+std::string CodegenNameGenerator::getName(const Decl *D) {
+  return Impl->getName(D);
+}
+
+std::vector<std::string> CodegenNameGenerator::getAllManglings(const Decl *D) {
+  return Impl->getAllManglings(D);
+}
diff --git a/contrib/llvm/tools/clang/lib/Index/CommentToXML.cpp b/contrib/llvm/tools/clang/lib/Index/CommentToXML.cpp
index 15f1696cbe91..c4beef249466 100644
--- a/contrib/llvm/tools/clang/lib/Index/CommentToXML.cpp
+++ b/contrib/llvm/tools/clang/lib/Index/CommentToXML.cpp
@@ -592,9 +592,8 @@ void getSourceTextOfDeclaration(const DeclInfo *ThisDecl,
 
 void CommentASTToXMLConverter::formatTextOfDeclaration(
     const DeclInfo *DI, SmallString<128> &Declaration) {
-  // FIXME. formatting API expects null terminated input string.
-  // There might be more efficient way of doing this.
-  std::string StringDecl = Declaration.str();
+  // Formatting API expects null terminated input string.
+  StringRef StringDecl(Declaration.c_str(), Declaration.size());
 
   // Formatter specific code.
   // Form a unique in memory buffer name.
diff --git a/contrib/llvm/tools/clang/lib/Index/IndexBody.cpp b/contrib/llvm/tools/clang/lib/Index/IndexBody.cpp
new file mode 100644
index 000000000000..4908d852e896
--- /dev/null
+++ b/contrib/llvm/tools/clang/lib/Index/IndexBody.cpp
@@ -0,0 +1,359 @@
+//===- IndexBody.cpp - Indexing statements --------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "IndexingContext.h"
+#include "clang/AST/RecursiveASTVisitor.h"
+
+using namespace clang;
+using namespace clang::index;
+
+namespace {
+
+class BodyIndexer : public RecursiveASTVisitor<BodyIndexer> {
+  IndexingContext &IndexCtx;
+  const NamedDecl *Parent;
+  const DeclContext *ParentDC;
+  SmallVector<Stmt*, 16> StmtStack;
+
+  typedef RecursiveASTVisitor<BodyIndexer> base;
+public:
+  BodyIndexer(IndexingContext &indexCtx,
+              const NamedDecl *Parent, const DeclContext *DC)
+    : IndexCtx(indexCtx), Parent(Parent), ParentDC(DC) { }
+  
+  bool shouldWalkTypesOfTypeLocs() const { return false; }
+
+  bool dataTraverseStmtPre(Stmt *S) {
+    StmtStack.push_back(S);
+    return true;
+  }
+
+  bool dataTraverseStmtPost(Stmt *S) {
+    assert(StmtStack.back() == S);
+    StmtStack.pop_back();
+    return true;
+  }
+
+  bool TraverseTypeLoc(TypeLoc TL) {
+    IndexCtx.indexTypeLoc(TL, Parent, ParentDC);
+    return true;
+  }
+
+  bool TraverseNestedNameSpecifierLoc(NestedNameSpecifierLoc NNS) {
+    IndexCtx.indexNestedNameSpecifierLoc(NNS, Parent, ParentDC);
+    return true;
+  }
+
+  SymbolRoleSet getRolesForRef(const Expr *E,
+                               SmallVectorImpl<SymbolRelation> &Relations) {
+    SymbolRoleSet Roles{};
+    assert(!StmtStack.empty() && E == StmtStack.back());
+    if (StmtStack.size() == 1)
+      return Roles;
+    auto It = StmtStack.end()-2;
+    while (isa<CastExpr>(*It) || isa<ParenExpr>(*It)) {
+      if (auto ICE = dyn_cast<ImplicitCastExpr>(*It)) {
+        if (ICE->getCastKind() == CK_LValueToRValue)
+          Roles |= (unsigned)(unsigned)SymbolRole::Read;
+      }
+      if (It == StmtStack.begin())
+        break;
+      --It;
+    }
+    const Stmt *Parent = *It;
+
+    if (auto BO = dyn_cast<BinaryOperator>(Parent)) {
+      if (BO->getOpcode() == BO_Assign && BO->getLHS()->IgnoreParenCasts() == E)
+        Roles |= (unsigned)SymbolRole::Write;
+
+    } else if (auto UO = dyn_cast<UnaryOperator>(Parent)) {
+      if (UO->isIncrementDecrementOp()) {
+        Roles |= (unsigned)SymbolRole::Read;
+        Roles |= (unsigned)SymbolRole::Write;
+      } else if (UO->getOpcode() == UO_AddrOf) {
+        Roles |= (unsigned)SymbolRole::AddressOf;
+      }
+
+    } else if (auto CA = dyn_cast<CompoundAssignOperator>(Parent)) {
+      if (CA->getLHS()->IgnoreParenCasts() == E) {
+        Roles |= (unsigned)SymbolRole::Read;
+        Roles |= (unsigned)SymbolRole::Write;
+      }
+
+    } else if (auto CE = dyn_cast<CallExpr>(Parent)) {
+      if (CE->getCallee()->IgnoreParenCasts() == E) {
+        addCallRole(Roles, Relations);
+        if (auto *ME = dyn_cast<MemberExpr>(E)) {
+          if (auto *CXXMD = dyn_cast_or_null<CXXMethodDecl>(ME->getMemberDecl()))
+            if (CXXMD->isVirtual() && !ME->hasQualifier()) {
+              Roles |= (unsigned)SymbolRole::Dynamic;
+              auto BaseTy = ME->getBase()->IgnoreImpCasts()->getType();
+              if (!BaseTy.isNull())
+                if (auto *CXXRD = BaseTy->getPointeeCXXRecordDecl())
+                  Relations.emplace_back((unsigned)SymbolRole::RelationReceivedBy,
+                                         CXXRD);
+            }
+        }
+      } else if (auto CXXOp = dyn_cast<CXXOperatorCallExpr>(CE)) {
+        if (CXXOp->getNumArgs() > 0 && CXXOp->getArg(0)->IgnoreParenCasts() == E) {
+          OverloadedOperatorKind Op = CXXOp->getOperator();
+          if (Op == OO_Equal) {
+            Roles |= (unsigned)SymbolRole::Write;
+          } else if ((Op >= OO_PlusEqual && Op <= OO_PipeEqual) ||
+                     Op == OO_LessLessEqual || Op == OO_GreaterGreaterEqual ||
+                     Op == OO_PlusPlus || Op == OO_MinusMinus) {
+            Roles |= (unsigned)SymbolRole::Read;
+            Roles |= (unsigned)SymbolRole::Write;
+          } else if (Op == OO_Amp) {
+            Roles |= (unsigned)SymbolRole::AddressOf;
+          }
+        }
+      }
+    }
+
+    return Roles;
+  }
+
+  void addCallRole(SymbolRoleSet &Roles,
+                   SmallVectorImpl<SymbolRelation> &Relations) {
+    Roles |= (unsigned)SymbolRole::Call;
+    if (auto *FD = dyn_cast<FunctionDecl>(ParentDC))
+      Relations.emplace_back((unsigned)SymbolRole::RelationCalledBy, FD);
+    else if (auto *MD = dyn_cast<ObjCMethodDecl>(ParentDC))
+      Relations.emplace_back((unsigned)SymbolRole::RelationCalledBy, MD);
+  }
+
+  bool VisitDeclRefExpr(DeclRefExpr *E) {
+    SmallVector<SymbolRelation, 4> Relations;
+    SymbolRoleSet Roles = getRolesForRef(E, Relations);
+    return IndexCtx.handleReference(E->getDecl(), E->getLocation(),
+                                    Parent, ParentDC, Roles, Relations, E);
+  }
+
+  bool VisitMemberExpr(MemberExpr *E) {
+    SourceLocation Loc = E->getMemberLoc();
+    if (Loc.isInvalid())
+      Loc = E->getLocStart();
+    SmallVector<SymbolRelation, 4> Relations;
+    SymbolRoleSet Roles = getRolesForRef(E, Relations);
+    return IndexCtx.handleReference(E->getMemberDecl(), Loc,
+                                    Parent, ParentDC, Roles, Relations, E);
+  }
+
+  bool VisitDesignatedInitExpr(DesignatedInitExpr *E) {
+    for (DesignatedInitExpr::Designator &D : llvm::reverse(E->designators())) {
+      if (D.isFieldDesignator())
+        return IndexCtx.handleReference(D.getField(), D.getFieldLoc(), Parent,
+                                        ParentDC, SymbolRoleSet(), {}, E);
+    }
+    return true;
+  }
+
+  bool VisitObjCIvarRefExpr(ObjCIvarRefExpr *E) {
+    SmallVector<SymbolRelation, 4> Relations;
+    SymbolRoleSet Roles = getRolesForRef(E, Relations);
+    return IndexCtx.handleReference(E->getDecl(), E->getLocation(),
+                                    Parent, ParentDC, Roles, Relations, E);
+  }
+
+  bool VisitObjCMessageExpr(ObjCMessageExpr *E) {
+    auto isDynamic = [](const ObjCMessageExpr *MsgE)->bool {
+      if (MsgE->getReceiverKind() != ObjCMessageExpr::Instance)
+        return false;
+      if (auto *RecE = dyn_cast<ObjCMessageExpr>(
+              MsgE->getInstanceReceiver()->IgnoreParenCasts())) {
+        if (RecE->getMethodFamily() == OMF_alloc)
+          return false;
+      }
+      return true;
+    };
+
+    if (ObjCMethodDecl *MD = E->getMethodDecl()) {
+      SymbolRoleSet Roles{};
+      SmallVector<SymbolRelation, 2> Relations;
+      addCallRole(Roles, Relations);
+      if (E->isImplicit())
+        Roles |= (unsigned)SymbolRole::Implicit;
+
+      if (isDynamic(E)) {
+        Roles |= (unsigned)SymbolRole::Dynamic;
+        if (auto *RecD = E->getReceiverInterface())
+          Relations.emplace_back((unsigned)SymbolRole::RelationReceivedBy, RecD);
+      }
+
+      return IndexCtx.handleReference(MD, E->getSelectorStartLoc(),
+                                      Parent, ParentDC, Roles, Relations, E);
+    }
+    return true;
+  }
+
+  bool VisitObjCPropertyRefExpr(ObjCPropertyRefExpr *E) {
+    if (E->isExplicitProperty())
+      return IndexCtx.handleReference(E->getExplicitProperty(), E->getLocation(),
+                                      Parent, ParentDC, SymbolRoleSet(), {}, E);
+
+    // No need to do a handleReference for the objc method, because there will
+    // be a message expr as part of PseudoObjectExpr.
+    return true;
+  }
+
+  bool VisitMSPropertyRefExpr(MSPropertyRefExpr *E) {
+    return IndexCtx.handleReference(E->getPropertyDecl(), E->getMemberLoc(),
+                                    Parent, ParentDC, SymbolRoleSet(), {}, E);
+  }
+
+  bool VisitObjCProtocolExpr(ObjCProtocolExpr *E) {
+    return IndexCtx.handleReference(E->getProtocol(), E->getProtocolIdLoc(),
+                                    Parent, ParentDC, SymbolRoleSet(), {}, E);
+  }
+
+  bool passObjCLiteralMethodCall(const ObjCMethodDecl *MD, const Expr *E) {
+    SymbolRoleSet Roles{};
+    SmallVector<SymbolRelation, 2> Relations;
+    addCallRole(Roles, Relations);
+    Roles |= (unsigned)SymbolRole::Implicit;
+    return IndexCtx.handleReference(MD, E->getLocStart(),
+                                    Parent, ParentDC, Roles, Relations, E);
+  }
+
+  bool VisitObjCBoxedExpr(ObjCBoxedExpr *E) {
+    if (ObjCMethodDecl *MD = E->getBoxingMethod()) {
+      return passObjCLiteralMethodCall(MD, E);
+    }
+    return true;
+  }
+  
+  bool VisitObjCDictionaryLiteral(ObjCDictionaryLiteral *E) {
+    if (ObjCMethodDecl *MD = E->getDictWithObjectsMethod()) {
+      return passObjCLiteralMethodCall(MD, E);
+    }
+    return true;
+  }
+
+  bool VisitObjCArrayLiteral(ObjCArrayLiteral *E) {
+    if (ObjCMethodDecl *MD = E->getArrayWithObjectsMethod()) {
+      return passObjCLiteralMethodCall(MD, E);
+    }
+    return true;
+  }
+
+  bool VisitCXXConstructExpr(CXXConstructExpr *E) {
+    SymbolRoleSet Roles{};
+    SmallVector<SymbolRelation, 2> Relations;
+    addCallRole(Roles, Relations);
+    return IndexCtx.handleReference(E->getConstructor(), E->getLocation(),
+                                    Parent, ParentDC, Roles, Relations, E);
+  }
+
+  bool TraverseCXXOperatorCallExpr(CXXOperatorCallExpr *E,
+                                   DataRecursionQueue *Q = nullptr) {
+    if (E->getOperatorLoc().isInvalid())
+      return true; // implicit.
+    return base::TraverseCXXOperatorCallExpr(E, Q);
+  }
+
+  bool VisitDeclStmt(DeclStmt *S) {
+    if (IndexCtx.shouldIndexFunctionLocalSymbols()) {
+      IndexCtx.indexDeclGroupRef(S->getDeclGroup());
+      return true;
+    }
+
+    DeclGroupRef DG = S->getDeclGroup();
+    for (DeclGroupRef::iterator I = DG.begin(), E = DG.end(); I != E; ++I) {
+      const Decl *D = *I;
+      if (!D)
+        continue;
+      if (!IndexCtx.isFunctionLocalDecl(D))
+        IndexCtx.indexTopLevelDecl(D);
+    }
+
+    return true;
+  }
+
+  bool TraverseLambdaCapture(LambdaExpr *LE, const LambdaCapture *C) {
+    if (C->capturesThis() || C->capturesVLAType())
+      return true;
+
+    if (C->capturesVariable() && IndexCtx.shouldIndexFunctionLocalSymbols())
+      return IndexCtx.handleReference(C->getCapturedVar(), C->getLocation(),
+                                      Parent, ParentDC, SymbolRoleSet());
+
+    // FIXME: Lambda init-captures.
+    return true;
+  }
+
+  // RecursiveASTVisitor visits both syntactic and semantic forms, duplicating
+  // the things that we visit. Make sure to only visit the semantic form.
+  // Also visit things that are in the syntactic form but not the semantic one,
+  // for example the indices in DesignatedInitExprs.
+  bool TraverseInitListExpr(InitListExpr *S, DataRecursionQueue *Q = nullptr) {
+
+    class SyntacticFormIndexer :
+              public RecursiveASTVisitor<SyntacticFormIndexer> {
+      IndexingContext &IndexCtx;
+      const NamedDecl *Parent;
+      const DeclContext *ParentDC;
+
+    public:
+      SyntacticFormIndexer(IndexingContext &indexCtx,
+                            const NamedDecl *Parent, const DeclContext *DC)
+        : IndexCtx(indexCtx), Parent(Parent), ParentDC(DC) { }
+
+      bool shouldWalkTypesOfTypeLocs() const { return false; }
+
+      bool VisitDesignatedInitExpr(DesignatedInitExpr *E) {
+        for (DesignatedInitExpr::Designator &D : llvm::reverse(E->designators())) {
+          if (D.isFieldDesignator())
+            return IndexCtx.handleReference(D.getField(), D.getFieldLoc(),
+                                            Parent, ParentDC, SymbolRoleSet(),
+                                            {}, E);
+        }
+        return true;
+      }
+    };
+
+    auto visitForm = [&](InitListExpr *Form) {
+      for (Stmt *SubStmt : Form->children()) {
+        if (!TraverseStmt(SubStmt, Q))
+          return false;
+      }
+      return true;
+    };
+
+    InitListExpr *SemaForm = S->isSemanticForm() ? S : S->getSemanticForm();
+    InitListExpr *SyntaxForm = S->isSemanticForm() ? S->getSyntacticForm() : S;
+
+    if (SemaForm) {
+      // Visit things present in syntactic form but not the semantic form.
+      if (SyntaxForm) {
+        SyntacticFormIndexer(IndexCtx, Parent, ParentDC).TraverseStmt(SyntaxForm);
+      }
+      return visitForm(SemaForm);
+    }
+
+    // No semantic, try the syntactic.
+    if (SyntaxForm) {
+      return visitForm(SyntaxForm);
+    }
+
+    return true;
+  }
+};
+
+} // anonymous namespace
+
+void IndexingContext::indexBody(const Stmt *S, const NamedDecl *Parent,
+                                const DeclContext *DC) {
+  if (!S)
+    return;
+
+  if (!DC)
+    DC = Parent->getLexicalDeclContext();
+  BodyIndexer(*this, Parent, DC).TraverseStmt(const_cast<Stmt*>(S));
+}
diff --git a/contrib/llvm/tools/clang/lib/Index/IndexDecl.cpp b/contrib/llvm/tools/clang/lib/Index/IndexDecl.cpp
new file mode 100644
index 000000000000..eb3e15114735
--- /dev/null
+++ b/contrib/llvm/tools/clang/lib/Index/IndexDecl.cpp
@@ -0,0 +1,454 @@
+//===- IndexDecl.cpp - Indexing declarations ------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "IndexingContext.h"
+#include "clang/Index/IndexDataConsumer.h"
+#include "clang/AST/DeclVisitor.h"
+
+using namespace clang;
+using namespace index;
+
+#define TRY_TO(CALL_EXPR)                                                      \
+  do {                                                                         \
+    if (!CALL_EXPR)                                                            \
+      return false;                                                            \
+  } while (0)
+
+namespace {
+
+class IndexingDeclVisitor : public ConstDeclVisitor<IndexingDeclVisitor, bool> {
+  IndexingContext &IndexCtx;
+
+public:
+  explicit IndexingDeclVisitor(IndexingContext &indexCtx)
+    : IndexCtx(indexCtx) { }
+
+  bool Handled = true;
+
+  bool VisitDecl(const Decl *D) {
+    Handled = false;
+    return true;
+  }
+
+  /// \brief Returns true if the given method has been defined explicitly by the
+  /// user.
+  static bool hasUserDefined(const ObjCMethodDecl *D,
+                             const ObjCImplDecl *Container) {
+    const ObjCMethodDecl *MD = Container->getMethod(D->getSelector(),
+                                                    D->isInstanceMethod());
+    return MD && !MD->isImplicit() && MD->isThisDeclarationADefinition();
+  }
+
+  void handleDeclarator(const DeclaratorDecl *D,
+                        const NamedDecl *Parent = nullptr) {
+    if (!Parent) Parent = D;
+
+    IndexCtx.indexTypeSourceInfo(D->getTypeSourceInfo(), Parent);
+    IndexCtx.indexNestedNameSpecifierLoc(D->getQualifierLoc(), Parent);
+    if (IndexCtx.shouldIndexFunctionLocalSymbols()) {
+      // Only index parameters in definitions, parameters in declarations are
+      // not useful.
+      if (const ParmVarDecl *Parm = dyn_cast<ParmVarDecl>(D)) {
+        auto *DC = Parm->getDeclContext();
+        if (auto *FD = dyn_cast<FunctionDecl>(DC)) {
+          if (FD->isThisDeclarationADefinition())
+            IndexCtx.handleDecl(Parm);
+        } else if (auto *MD = dyn_cast<ObjCMethodDecl>(DC)) {
+          if (MD->isThisDeclarationADefinition())
+            IndexCtx.handleDecl(Parm);
+        } else {
+          IndexCtx.handleDecl(Parm);
+        }
+      } else if (const FunctionDecl *FD = dyn_cast<FunctionDecl>(D)) {
+        if (FD->isThisDeclarationADefinition()) {
+          for (auto PI : FD->parameters()) {
+            IndexCtx.handleDecl(PI);
+          }
+        }
+      }
+    }
+  }
+
+  bool handleObjCMethod(const ObjCMethodDecl *D) {
+    if (!IndexCtx.handleDecl(D, (unsigned)SymbolRole::Dynamic))
+      return false;
+    IndexCtx.indexTypeSourceInfo(D->getReturnTypeSourceInfo(), D);
+    for (const auto *I : D->parameters())
+      handleDeclarator(I, D);
+
+    if (D->isThisDeclarationADefinition()) {
+      const Stmt *Body = D->getBody();
+      if (Body) {
+        IndexCtx.indexBody(Body, D, D);
+      }
+    }
+    return true;
+  }
+
+  bool VisitFunctionDecl(const FunctionDecl *D) {
+    if (D->isDeleted())
+      return true;
+
+    SymbolRoleSet Roles{};
+    SmallVector<SymbolRelation, 4> Relations;
+    if (auto *CXXMD = dyn_cast<CXXMethodDecl>(D)) {
+      if (CXXMD->isVirtual())
+        Roles |= (unsigned)SymbolRole::Dynamic;
+      for (auto I = CXXMD->begin_overridden_methods(),
+           E = CXXMD->end_overridden_methods(); I != E; ++I) {
+        Relations.emplace_back((unsigned)SymbolRole::RelationOverrideOf, *I);
+      }
+    }
+
+    if (!IndexCtx.handleDecl(D, Roles, Relations))
+      return false;
+    handleDeclarator(D);
+
+    if (const CXXConstructorDecl *Ctor = dyn_cast<CXXConstructorDecl>(D)) {
+      // Constructor initializers.
+      for (const auto *Init : Ctor->inits()) {
+        if (Init->isWritten()) {
+          IndexCtx.indexTypeSourceInfo(Init->getTypeSourceInfo(), D);
+          if (const FieldDecl *Member = Init->getAnyMember())
+            IndexCtx.handleReference(Member, Init->getMemberLocation(), D, D,
+                                     (unsigned)SymbolRole::Write);
+          IndexCtx.indexBody(Init->getInit(), D, D);
+        }
+      }
+    }
+
+    if (D->isThisDeclarationADefinition()) {
+      const Stmt *Body = D->getBody();
+      if (Body) {
+        IndexCtx.indexBody(Body, D, D);
+      }
+    }
+    return true;
+  }
+
+  bool VisitVarDecl(const VarDecl *D) {
+    if (!IndexCtx.handleDecl(D))
+      return false;
+    handleDeclarator(D);
+    IndexCtx.indexBody(D->getInit(), D);
+    return true;
+  }
+
+  bool VisitFieldDecl(const FieldDecl *D) {
+    if (!IndexCtx.handleDecl(D))
+      return false;
+    handleDeclarator(D);
+    if (D->isBitField())
+      IndexCtx.indexBody(D->getBitWidth(), D);
+    else if (D->hasInClassInitializer())
+      IndexCtx.indexBody(D->getInClassInitializer(), D);
+    return true;
+  }
+
+  bool VisitObjCIvarDecl(const ObjCIvarDecl *D) {
+    if (D->getSynthesize()) {
+      // For synthesized ivars, use the location of the ObjC implementation,
+      // not the location of the property.
+      // Otherwise the header file containing the @interface will have different
+      // indexing contents based on whether the @implementation was present or
+      // not in the translation unit.
+      return IndexCtx.handleDecl(D,
+                                 cast<Decl>(D->getDeclContext())->getLocation(),
+                                 (unsigned)SymbolRole::Implicit);
+    }
+    if (!IndexCtx.handleDecl(D))
+      return false;
+    handleDeclarator(D);
+    return true;
+  }
+
+  bool VisitMSPropertyDecl(const MSPropertyDecl *D) {
+    handleDeclarator(D);
+    return true;
+  }
+
+  bool VisitEnumConstantDecl(const EnumConstantDecl *D) {
+    if (!IndexCtx.handleDecl(D))
+      return false;
+    IndexCtx.indexBody(D->getInitExpr(), D);
+    return true;
+  }
+
+  bool VisitTypedefNameDecl(const TypedefNameDecl *D) {
+    if (!IndexCtx.handleDecl(D))
+      return false;
+    IndexCtx.indexTypeSourceInfo(D->getTypeSourceInfo(), D);
+    return true;
+  }
+
+  bool VisitTagDecl(const TagDecl *D) {
+    // Non-free standing tags are handled in indexTypeSourceInfo.
+    if (D->isFreeStanding()) {
+      if (D->isThisDeclarationADefinition()) {
+        IndexCtx.indexTagDecl(D);
+      } else {
+        auto *Parent = dyn_cast<NamedDecl>(D->getDeclContext());
+        return IndexCtx.handleReference(D, D->getLocation(), Parent,
+                                        D->getLexicalDeclContext(),
+                                        SymbolRoleSet());
+      }
+    }
+    return true;
+  }
+
+  bool handleReferencedProtocols(const ObjCProtocolList &ProtList,
+                                 const ObjCContainerDecl *ContD) {
+    ObjCInterfaceDecl::protocol_loc_iterator LI = ProtList.loc_begin();
+    for (ObjCInterfaceDecl::protocol_iterator
+         I = ProtList.begin(), E = ProtList.end(); I != E; ++I, ++LI) {
+      SourceLocation Loc = *LI;
+      ObjCProtocolDecl *PD = *I;
+      TRY_TO(IndexCtx.handleReference(PD, Loc, ContD, ContD,
+          SymbolRoleSet(),
+          SymbolRelation{(unsigned)SymbolRole::RelationBaseOf, ContD}));
+    }
+    return true;
+  }
+
+  bool VisitObjCInterfaceDecl(const ObjCInterfaceDecl *D) {
+    if (D->isThisDeclarationADefinition()) {
+      TRY_TO(IndexCtx.handleDecl(D));
+      if (auto *SuperD = D->getSuperClass()) {
+        TRY_TO(IndexCtx.handleReference(SuperD, D->getSuperClassLoc(), D, D,
+            SymbolRoleSet(),
+            SymbolRelation{(unsigned)SymbolRole::RelationBaseOf, D}));
+      }
+      TRY_TO(handleReferencedProtocols(D->getReferencedProtocols(), D));
+      TRY_TO(IndexCtx.indexDeclContext(D));
+    } else {
+      return IndexCtx.handleReference(D, D->getLocation(), nullptr,
+                                      D->getDeclContext(), SymbolRoleSet());
+    }
+    return true;
+  }
+
+  bool VisitObjCProtocolDecl(const ObjCProtocolDecl *D) {
+    if (D->isThisDeclarationADefinition()) {
+      TRY_TO(IndexCtx.handleDecl(D));
+      TRY_TO(handleReferencedProtocols(D->getReferencedProtocols(), D));
+      TRY_TO(IndexCtx.indexDeclContext(D));
+    } else {
+      return IndexCtx.handleReference(D, D->getLocation(), nullptr,
+                                      D->getDeclContext(), SymbolRoleSet());
+    }
+    return true;
+  }
+
+  bool VisitObjCImplementationDecl(const ObjCImplementationDecl *D) {
+    const ObjCInterfaceDecl *Class = D->getClassInterface();
+    if (!Class)
+      return true;
+
+    if (Class->isImplicitInterfaceDecl())
+      IndexCtx.handleDecl(Class);
+
+    if (!IndexCtx.handleDecl(D))
+      return false;
+
+    // Index the ivars first to make sure the synthesized ivars are indexed
+    // before indexing the methods that can reference them.
+    for (const auto *IvarI : D->ivars())
+      IndexCtx.indexDecl(IvarI);
+    for (const auto *I : D->decls()) {
+      if (!isa<ObjCIvarDecl>(I))
+        IndexCtx.indexDecl(I);
+    }
+
+    return true;
+  }
+
+  bool VisitObjCCategoryDecl(const ObjCCategoryDecl *D) {
+    if (!IndexCtx.handleDecl(D))
+      return false;
+    IndexCtx.indexDeclContext(D);
+    return true;
+  }
+
+  bool VisitObjCCategoryImplDecl(const ObjCCategoryImplDecl *D) {
+    const ObjCCategoryDecl *Cat = D->getCategoryDecl();
+    if (!Cat)
+      return true;
+
+    if (!IndexCtx.handleDecl(D))
+      return false;
+    IndexCtx.indexDeclContext(D);
+    return true;
+  }
+
+  bool VisitObjCMethodDecl(const ObjCMethodDecl *D) {
+    // Methods associated with a property, even user-declared ones, are
+    // handled when we handle the property.
+    if (D->isPropertyAccessor())
+      return true;
+
+    handleObjCMethod(D);
+    return true;
+  }
+
+  bool VisitObjCPropertyDecl(const ObjCPropertyDecl *D) {
+    if (ObjCMethodDecl *MD = D->getGetterMethodDecl())
+      if (MD->getLexicalDeclContext() == D->getLexicalDeclContext())
+        handleObjCMethod(MD);
+    if (ObjCMethodDecl *MD = D->getSetterMethodDecl())
+      if (MD->getLexicalDeclContext() == D->getLexicalDeclContext())
+        handleObjCMethod(MD);
+    if (!IndexCtx.handleDecl(D))
+      return false;
+    IndexCtx.indexTypeSourceInfo(D->getTypeSourceInfo(), D);
+    return true;
+  }
+
+  bool VisitObjCPropertyImplDecl(const ObjCPropertyImplDecl *D) {
+    ObjCPropertyDecl *PD = D->getPropertyDecl();
+    if (!IndexCtx.handleReference(PD, D->getLocation(),
+                             /*Parent=*/cast<NamedDecl>(D->getDeclContext()),
+                             D->getDeclContext(), SymbolRoleSet(), {},
+                             /*RefE=*/nullptr, D))
+      return false;
+
+    if (D->getPropertyImplementation() == ObjCPropertyImplDecl::Dynamic)
+      return true;
+    assert(D->getPropertyImplementation() == ObjCPropertyImplDecl::Synthesize);
+    
+    if (ObjCIvarDecl *IvarD = D->getPropertyIvarDecl()) {
+      if (!IvarD->getSynthesize())
+        IndexCtx.handleReference(IvarD, D->getPropertyIvarDeclLoc(), nullptr,
+                                 D->getDeclContext(), SymbolRoleSet());
+    }
+
+    auto *ImplD = cast<ObjCImplDecl>(D->getDeclContext());
+    if (ObjCMethodDecl *MD = PD->getGetterMethodDecl()) {
+      if (MD->isPropertyAccessor() &&
+          !hasUserDefined(MD, ImplD))
+        IndexCtx.handleDecl(MD, D->getLocation(), SymbolRoleSet(), {}, ImplD);
+    }
+    if (ObjCMethodDecl *MD = PD->getSetterMethodDecl()) {
+      if (MD->isPropertyAccessor() &&
+          !hasUserDefined(MD, ImplD))
+        IndexCtx.handleDecl(MD, D->getLocation(), SymbolRoleSet(), {}, ImplD);
+    }
+    return true;
+  }
+
+  bool VisitNamespaceDecl(const NamespaceDecl *D) {
+    if (!IndexCtx.handleDecl(D))
+      return false;
+    IndexCtx.indexDeclContext(D);
+    return true;
+  }
+
+  bool VisitUsingDecl(const UsingDecl *D) {
+    const DeclContext *DC = D->getDeclContext()->getRedeclContext();
+    const NamedDecl *Parent = dyn_cast<NamedDecl>(DC);
+
+    IndexCtx.indexNestedNameSpecifierLoc(D->getQualifierLoc(), Parent,
+                                         D->getLexicalDeclContext());
+    for (const auto *I : D->shadows())
+      IndexCtx.handleReference(I->getUnderlyingDecl(), D->getLocation(), Parent,
+                               D->getLexicalDeclContext(), SymbolRoleSet());
+    return true;
+  }
+
+  bool VisitUsingDirectiveDecl(const UsingDirectiveDecl *D) {
+    const DeclContext *DC = D->getDeclContext()->getRedeclContext();
+    const NamedDecl *Parent = dyn_cast<NamedDecl>(DC);
+
+    IndexCtx.indexNestedNameSpecifierLoc(D->getQualifierLoc(), Parent,
+                                         D->getLexicalDeclContext());
+    return IndexCtx.handleReference(D->getNominatedNamespaceAsWritten(),
+                                    D->getLocation(), Parent,
+                                    D->getLexicalDeclContext(),
+                                    SymbolRoleSet());
+  }
+
+  bool VisitClassTemplateSpecializationDecl(const
+                                           ClassTemplateSpecializationDecl *D) {
+    // FIXME: Notify subsequent callbacks if info comes from implicit
+    // instantiation.
+    if (D->isThisDeclarationADefinition())
+      IndexCtx.indexTagDecl(D);
+    return true;
+  }
+
+  bool VisitTemplateDecl(const TemplateDecl *D) {
+    // FIXME: Template parameters.
+    return Visit(D->getTemplatedDecl());
+  }
+
+  bool VisitFriendDecl(const FriendDecl *D) {
+    if (auto ND = D->getFriendDecl()) {
+      // FIXME: Ignore a class template in a dependent context, these are not
+      // linked properly with their redeclarations, ending up with duplicate
+      // USRs.
+      // See comment "Friend templates are visible in fairly strange ways." in
+      // SemaTemplate.cpp which precedes code that prevents the friend template
+      // from becoming visible from the enclosing context.
+      if (isa<ClassTemplateDecl>(ND) && D->getDeclContext()->isDependentContext())
+        return true;
+      return Visit(ND);
+    }
+    if (auto Ty = D->getFriendType()) {
+      IndexCtx.indexTypeSourceInfo(Ty, cast<NamedDecl>(D->getDeclContext()));
+    }
+    return true;
+  }
+
+  bool VisitImportDecl(const ImportDecl *D) {
+    return IndexCtx.importedModule(D);
+  }
+};
+
+} // anonymous namespace
+
+bool IndexingContext::indexDecl(const Decl *D) {
+  if (D->isImplicit() && shouldIgnoreIfImplicit(D))
+    return true;
+
+  if (isTemplateImplicitInstantiation(D))
+    return true;
+
+  IndexingDeclVisitor Visitor(*this);
+  bool ShouldContinue = Visitor.Visit(D);
+  if (!ShouldContinue)
+    return false;
+
+  if (!Visitor.Handled && isa<DeclContext>(D))
+    return indexDeclContext(cast<DeclContext>(D));
+
+  return true;
+}
+
+bool IndexingContext::indexDeclContext(const DeclContext *DC) {
+  for (const auto *I : DC->decls())
+    if (!indexDecl(I))
+      return false;
+  return true;
+}
+
+bool IndexingContext::indexTopLevelDecl(const Decl *D) {
+  if (D->getLocation().isInvalid())
+    return true;
+
+  if (isa<ObjCMethodDecl>(D))
+    return true; // Wait for the objc container.
+
+  return indexDecl(D);
+}
+
+bool IndexingContext::indexDeclGroupRef(DeclGroupRef DG) {
+  for (DeclGroupRef::iterator I = DG.begin(), E = DG.end(); I != E; ++I)
+    if (!indexTopLevelDecl(*I))
+      return false;
+  return true;
+}
diff --git a/contrib/llvm/tools/clang/lib/Index/IndexSymbol.cpp b/contrib/llvm/tools/clang/lib/Index/IndexSymbol.cpp
new file mode 100644
index 000000000000..13a845230072
--- /dev/null
+++ b/contrib/llvm/tools/clang/lib/Index/IndexSymbol.cpp
@@ -0,0 +1,385 @@
+//===--- IndexSymbol.cpp - Types and functions for indexing symbols -------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Index/IndexSymbol.h"
+#include "clang/AST/DeclCXX.h"
+#include "clang/AST/DeclObjC.h"
+#include "clang/AST/DeclTemplate.h"
+#include "clang/AST/PrettyPrinter.h"
+
+using namespace clang;
+using namespace clang::index;
+
+/// \returns true if \c D is a subclass of 'XCTestCase'.
+static bool isUnitTestCase(const ObjCInterfaceDecl *D) {
+  if (!D)
+    return false;
+  while (const ObjCInterfaceDecl *SuperD = D->getSuperClass()) {
+    if (SuperD->getName() == "XCTestCase")
+      return true;
+    D = SuperD;
+  }
+  return false;
+}
+
+/// \returns true if \c D is in a subclass of 'XCTestCase', returns void, has
+/// no parameters, and its name starts with 'test'.
+static bool isUnitTest(const ObjCMethodDecl *D) {
+  if (!D->parameters().empty())
+    return false;
+  if (!D->getReturnType()->isVoidType())
+    return false;
+  if (!D->getSelector().getNameForSlot(0).startswith("test"))
+    return false;
+  return isUnitTestCase(D->getClassInterface());
+}
+
+static void checkForIBOutlets(const Decl *D, SymbolSubKindSet &SubKindSet) {
+  if (D->hasAttr<IBOutletAttr>()) {
+    SubKindSet |= (unsigned)SymbolSubKind::IBAnnotated;
+  } else if (D->hasAttr<IBOutletCollectionAttr>()) {
+    SubKindSet |= (unsigned)SymbolSubKind::IBAnnotated;
+    SubKindSet |= (unsigned)SymbolSubKind::IBOutletCollection;
+  }
+}
+
+SymbolInfo index::getSymbolInfo(const Decl *D) {
+  assert(D);
+  SymbolInfo Info;
+  Info.Kind = SymbolKind::Unknown;
+  Info.SubKinds = SymbolSubKindSet();
+  Info.Lang = SymbolLanguage::C;
+
+  if (const TagDecl *TD = dyn_cast<TagDecl>(D)) {
+    switch (TD->getTagKind()) {
+    case TTK_Struct:
+      Info.Kind = SymbolKind::Struct; break;
+    case TTK_Union:
+      Info.Kind = SymbolKind::Union; break;
+    case TTK_Class:
+      Info.Kind = SymbolKind::Class;
+      Info.Lang = SymbolLanguage::CXX;
+      break;
+    case TTK_Interface:
+      Info.Kind = SymbolKind::Protocol;
+      Info.Lang = SymbolLanguage::CXX;
+      break;
+    case TTK_Enum:
+      Info.Kind = SymbolKind::Enum; break;
+    }
+
+    if (const CXXRecordDecl *CXXRec = dyn_cast<CXXRecordDecl>(D))
+      if (!CXXRec->isCLike())
+        Info.Lang = SymbolLanguage::CXX;
+
+    if (isa<ClassTemplatePartialSpecializationDecl>(D)) {
+      Info.SubKinds |= (unsigned)SymbolSubKind::Generic;
+      Info.SubKinds |= (unsigned)SymbolSubKind::TemplatePartialSpecialization;
+    } else if (isa<ClassTemplateSpecializationDecl>(D)) {
+      Info.SubKinds |= (unsigned)SymbolSubKind::Generic;
+      Info.SubKinds |= (unsigned)SymbolSubKind::TemplateSpecialization;
+    }
+
+  } else {
+    switch (D->getKind()) {
+    case Decl::Import:
+      Info.Kind = SymbolKind::Module;
+      break;
+    case Decl::Typedef:
+      Info.Kind = SymbolKind::TypeAlias; break; // Lang = C
+    case Decl::Function:
+      Info.Kind = SymbolKind::Function;
+      break;
+    case Decl::ParmVar:
+      Info.Kind = SymbolKind::Variable;
+      break;
+    case Decl::Var:
+      Info.Kind = SymbolKind::Variable;
+      if (isa<CXXRecordDecl>(D->getDeclContext())) {
+        Info.Kind = SymbolKind::StaticProperty;
+        Info.Lang = SymbolLanguage::CXX;
+      }
+      break;
+    case Decl::Field:
+      Info.Kind = SymbolKind::Field;
+      if (const CXXRecordDecl *
+            CXXRec = dyn_cast<CXXRecordDecl>(D->getDeclContext())) {
+        if (!CXXRec->isCLike())
+          Info.Lang = SymbolLanguage::CXX;
+      }
+      break;
+    case Decl::EnumConstant:
+      Info.Kind = SymbolKind::EnumConstant; break;
+    case Decl::ObjCInterface:
+    case Decl::ObjCImplementation: {
+      Info.Kind = SymbolKind::Class;
+      Info.Lang = SymbolLanguage::ObjC;
+      const ObjCInterfaceDecl *ClsD = dyn_cast<ObjCInterfaceDecl>(D);
+      if (!ClsD)
+        ClsD = cast<ObjCImplementationDecl>(D)->getClassInterface();
+      if (isUnitTestCase(ClsD))
+        Info.SubKinds |= (unsigned)SymbolSubKind::UnitTest;
+      break;
+    }
+    case Decl::ObjCProtocol:
+      Info.Kind = SymbolKind::Protocol;
+      Info.Lang = SymbolLanguage::ObjC;
+      break;
+    case Decl::ObjCCategory:
+    case Decl::ObjCCategoryImpl:
+      Info.Kind = SymbolKind::Extension;
+      Info.Lang = SymbolLanguage::ObjC;
+      break;
+    case Decl::ObjCMethod:
+      if (cast<ObjCMethodDecl>(D)->isInstanceMethod())
+        Info.Kind = SymbolKind::InstanceMethod;
+      else
+        Info.Kind = SymbolKind::ClassMethod;
+      Info.Lang = SymbolLanguage::ObjC;
+      if (isUnitTest(cast<ObjCMethodDecl>(D)))
+        Info.SubKinds |= (unsigned)SymbolSubKind::UnitTest;
+      if (D->hasAttr<IBActionAttr>())
+        Info.SubKinds |= (unsigned)SymbolSubKind::IBAnnotated;
+      break;
+    case Decl::ObjCProperty:
+      Info.Kind = SymbolKind::InstanceProperty;
+      Info.Lang = SymbolLanguage::ObjC;
+      checkForIBOutlets(D, Info.SubKinds);
+      break;
+    case Decl::ObjCIvar:
+      Info.Kind = SymbolKind::Field;
+      Info.Lang = SymbolLanguage::ObjC;
+      checkForIBOutlets(D, Info.SubKinds);
+      break;
+    case Decl::Namespace:
+      Info.Kind = SymbolKind::Namespace;
+      Info.Lang = SymbolLanguage::CXX;
+      break;
+    case Decl::NamespaceAlias:
+      Info.Kind = SymbolKind::NamespaceAlias;
+      Info.Lang = SymbolLanguage::CXX;
+      break;
+    case Decl::CXXConstructor:
+      Info.Kind = SymbolKind::Constructor;
+      Info.Lang = SymbolLanguage::CXX;
+      break;
+    case Decl::CXXDestructor:
+      Info.Kind = SymbolKind::Destructor;
+      Info.Lang = SymbolLanguage::CXX;
+      break;
+    case Decl::CXXConversion:
+      Info.Kind = SymbolKind::ConversionFunction;
+      Info.Lang = SymbolLanguage::CXX;
+      break;
+    case Decl::CXXMethod: {
+      const CXXMethodDecl *MD = cast<CXXMethodDecl>(D);
+      if (MD->isStatic())
+        Info.Kind = SymbolKind::StaticMethod;
+      else
+        Info.Kind = SymbolKind::InstanceMethod;
+      Info.Lang = SymbolLanguage::CXX;
+      break;
+    }
+    case Decl::ClassTemplate:
+      Info.Kind = SymbolKind::Class;
+      Info.SubKinds |= (unsigned)SymbolSubKind::Generic;
+      Info.Lang = SymbolLanguage::CXX;
+      break;
+    case Decl::FunctionTemplate:
+      Info.Kind = SymbolKind::Function;
+      Info.SubKinds |= (unsigned)SymbolSubKind::Generic;
+      Info.Lang = SymbolLanguage::CXX;
+      if (const CXXMethodDecl *MD = dyn_cast_or_null<CXXMethodDecl>(
+                           cast<FunctionTemplateDecl>(D)->getTemplatedDecl())) {
+        if (isa<CXXConstructorDecl>(MD))
+          Info.Kind = SymbolKind::Constructor;
+        else if (isa<CXXDestructorDecl>(MD))
+          Info.Kind = SymbolKind::Destructor;
+        else if (isa<CXXConversionDecl>(MD))
+          Info.Kind = SymbolKind::ConversionFunction;
+        else {
+          if (MD->isStatic())
+            Info.Kind = SymbolKind::StaticMethod;
+          else
+            Info.Kind = SymbolKind::InstanceMethod;
+        }
+      }
+      break;
+    case Decl::TypeAliasTemplate:
+      Info.Kind = SymbolKind::TypeAlias;
+      Info.Lang = SymbolLanguage::CXX;
+      Info.SubKinds |= (unsigned)SymbolSubKind::Generic;
+      break;
+    case Decl::TypeAlias:
+      Info.Kind = SymbolKind::TypeAlias;
+      Info.Lang = SymbolLanguage::CXX;
+      break;
+    default:
+      break;
+    }
+  }
+
+  if (Info.Kind == SymbolKind::Unknown)
+    return Info;
+
+  if (const FunctionDecl *FD = dyn_cast<FunctionDecl>(D)) {
+    if (FD->getTemplatedKind() ==
+          FunctionDecl::TK_FunctionTemplateSpecialization) {
+      Info.SubKinds |= (unsigned)SymbolSubKind::Generic;
+      Info.SubKinds |= (unsigned)SymbolSubKind::TemplateSpecialization;
+    }
+  }
+
+  if (Info.SubKinds & (unsigned)SymbolSubKind::Generic)
+    Info.Lang = SymbolLanguage::CXX;
+
+  return Info;
+}
+
+void index::applyForEachSymbolRole(SymbolRoleSet Roles,
+                                   llvm::function_ref<void(SymbolRole)> Fn) {
+#define APPLY_FOR_ROLE(Role) \
+  if (Roles & (unsigned)SymbolRole::Role) \
+    Fn(SymbolRole::Role)
+
+  APPLY_FOR_ROLE(Declaration);
+  APPLY_FOR_ROLE(Definition);
+  APPLY_FOR_ROLE(Reference);
+  APPLY_FOR_ROLE(Read);
+  APPLY_FOR_ROLE(Write);
+  APPLY_FOR_ROLE(Call);
+  APPLY_FOR_ROLE(Dynamic);
+  APPLY_FOR_ROLE(AddressOf);
+  APPLY_FOR_ROLE(Implicit);
+  APPLY_FOR_ROLE(RelationChildOf);
+  APPLY_FOR_ROLE(RelationBaseOf);
+  APPLY_FOR_ROLE(RelationOverrideOf);
+  APPLY_FOR_ROLE(RelationReceivedBy);
+  APPLY_FOR_ROLE(RelationCalledBy);
+
+#undef APPLY_FOR_ROLE
+}
+
+void index::printSymbolRoles(SymbolRoleSet Roles, raw_ostream &OS) {
+  bool VisitedOnce = false;
+  applyForEachSymbolRole(Roles, [&](SymbolRole Role) {
+    if (VisitedOnce)
+      OS << ',';
+    else
+      VisitedOnce = true;
+    switch (Role) {
+    case SymbolRole::Declaration: OS << "Decl"; break;
+    case SymbolRole::Definition: OS << "Def"; break;
+    case SymbolRole::Reference: OS << "Ref"; break;
+    case SymbolRole::Read: OS << "Read"; break;
+    case SymbolRole::Write: OS << "Writ"; break;
+    case SymbolRole::Call: OS << "Call"; break;
+    case SymbolRole::Dynamic: OS << "Dyn"; break;
+    case SymbolRole::AddressOf: OS << "Addr"; break;
+    case SymbolRole::Implicit: OS << "Impl"; break;
+    case SymbolRole::RelationChildOf: OS << "RelChild"; break;
+    case SymbolRole::RelationBaseOf: OS << "RelBase"; break;
+    case SymbolRole::RelationOverrideOf: OS << "RelOver"; break;
+    case SymbolRole::RelationReceivedBy: OS << "RelRec"; break;
+    case SymbolRole::RelationCalledBy: OS << "RelCall"; break;
+    }
+  });
+}
+
+bool index::printSymbolName(const Decl *D, const LangOptions &LO,
+                            raw_ostream &OS) {
+  if (auto *ND = dyn_cast<NamedDecl>(D)) {
+    PrintingPolicy Policy(LO);
+    // Forward references can have different template argument names. Suppress
+    // the template argument names in constructors to make their name more
+    // stable.
+    Policy.SuppressTemplateArgsInCXXConstructors = true;
+    DeclarationName DeclName = ND->getDeclName();
+    if (DeclName.isEmpty())
+      return true;
+    DeclName.print(OS, Policy);
+    return false;
+  } else {
+    return true;
+  }
+}
+
+StringRef index::getSymbolKindString(SymbolKind K) {
+  switch (K) {
+  case SymbolKind::Unknown: return "<unknown>";
+  case SymbolKind::Module: return "module";
+  case SymbolKind::Namespace: return "namespace";
+  case SymbolKind::NamespaceAlias: return "namespace-alias";
+  case SymbolKind::Macro: return "macro";
+  case SymbolKind::Enum: return "enum";
+  case SymbolKind::Struct: return "struct";
+  case SymbolKind::Class: return "class";
+  case SymbolKind::Protocol: return "protocol";
+  case SymbolKind::Extension: return "extension";
+  case SymbolKind::Union: return "union";
+  case SymbolKind::TypeAlias: return "type-alias";
+  case SymbolKind::Function: return "function";
+  case SymbolKind::Variable: return "variable";
+  case SymbolKind::Field: return "field";
+  case SymbolKind::EnumConstant: return "enumerator";
+  case SymbolKind::InstanceMethod: return "instance-method";
+  case SymbolKind::ClassMethod: return "class-method";
+  case SymbolKind::StaticMethod: return "static-method";
+  case SymbolKind::InstanceProperty: return "instance-property";
+  case SymbolKind::ClassProperty: return "class-property";
+  case SymbolKind::StaticProperty: return "static-property";
+  case SymbolKind::Constructor: return "constructor";
+  case SymbolKind::Destructor: return "destructor";
+  case SymbolKind::ConversionFunction: return "coversion-func";
+  }
+  llvm_unreachable("invalid symbol kind");
+}
+
+StringRef index::getSymbolLanguageString(SymbolLanguage K) {
+  switch (K) {
+  case SymbolLanguage::C: return "C";
+  case SymbolLanguage::ObjC: return "ObjC";
+  case SymbolLanguage::CXX: return "C++";
+  }
+  llvm_unreachable("invalid symbol language kind");
+}
+
+void index::applyForEachSymbolSubKind(SymbolSubKindSet SubKinds,
+                                  llvm::function_ref<void(SymbolSubKind)> Fn) {
+#define APPLY_FOR_SUBKIND(K) \
+  if (SubKinds & (unsigned)SymbolSubKind::K) \
+    Fn(SymbolSubKind::K)
+
+  APPLY_FOR_SUBKIND(Generic);
+  APPLY_FOR_SUBKIND(TemplatePartialSpecialization);
+  APPLY_FOR_SUBKIND(TemplateSpecialization);
+  APPLY_FOR_SUBKIND(UnitTest);
+  APPLY_FOR_SUBKIND(IBAnnotated);
+  APPLY_FOR_SUBKIND(IBOutletCollection);
+
+#undef APPLY_FOR_SUBKIND
+}
+
+void index::printSymbolSubKinds(SymbolSubKindSet SubKinds, raw_ostream &OS) {
+  bool VisitedOnce = false;
+  applyForEachSymbolSubKind(SubKinds, [&](SymbolSubKind SubKind) {
+    if (VisitedOnce)
+      OS << ',';
+    else
+      VisitedOnce = true;
+    switch (SubKind) {
+    case SymbolSubKind::Generic: OS << "Gen"; break;
+    case SymbolSubKind::TemplatePartialSpecialization: OS << "TPS"; break;
+    case SymbolSubKind::TemplateSpecialization: OS << "TS"; break;
+    case SymbolSubKind::UnitTest: OS << "test"; break;
+    case SymbolSubKind::IBAnnotated: OS << "IB"; break;
+    case SymbolSubKind::IBOutletCollection: OS << "IBColl"; break;
+    }
+  });
+}
diff --git a/contrib/llvm/tools/clang/lib/Index/IndexTypeSourceInfo.cpp b/contrib/llvm/tools/clang/lib/Index/IndexTypeSourceInfo.cpp
new file mode 100644
index 000000000000..619a9a48befd
--- /dev/null
+++ b/contrib/llvm/tools/clang/lib/Index/IndexTypeSourceInfo.cpp
@@ -0,0 +1,202 @@
+//===- IndexTypeSourceInfo.cpp - Indexing types ---------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "IndexingContext.h"
+#include "clang/AST/RecursiveASTVisitor.h"
+
+using namespace clang;
+using namespace index;
+
+namespace {
+
+class TypeIndexer : public RecursiveASTVisitor<TypeIndexer> {
+  IndexingContext &IndexCtx;
+  const NamedDecl *Parent;
+  const DeclContext *ParentDC;
+  bool IsBase;
+  SmallVector<SymbolRelation, 3> Relations;
+
+  typedef RecursiveASTVisitor<TypeIndexer> base;
+
+public:
+  TypeIndexer(IndexingContext &indexCtx, const NamedDecl *parent,
+              const DeclContext *DC, bool isBase)
+    : IndexCtx(indexCtx), Parent(parent), ParentDC(DC), IsBase(isBase) {
+    if (IsBase) {
+      assert(Parent);
+      Relations.emplace_back((unsigned)SymbolRole::RelationBaseOf, Parent);
+    }
+  }
+  
+  bool shouldWalkTypesOfTypeLocs() const { return false; }
+
+  bool VisitTypedefTypeLoc(TypedefTypeLoc TL) {
+    return IndexCtx.handleReference(TL.getTypedefNameDecl(), TL.getNameLoc(),
+                                    Parent, ParentDC, SymbolRoleSet(),
+                                    Relations);
+  }
+
+#define TRY_TO(CALL_EXPR)                                                      \
+  do {                                                                         \
+    if (!CALL_EXPR)                                                            \
+      return false;                                                            \
+  } while (0)
+
+  bool traverseParamVarHelper(ParmVarDecl *D) {
+    TRY_TO(TraverseNestedNameSpecifierLoc(D->getQualifierLoc()));
+    if (D->getTypeSourceInfo())
+      TRY_TO(TraverseTypeLoc(D->getTypeSourceInfo()->getTypeLoc()));
+    return true;
+  }
+
+  bool TraverseParmVarDecl(ParmVarDecl *D) {
+    // Avoid visiting default arguments from the definition that were already
+    // visited in the declaration.
+    // FIXME: A free function definition can have default arguments.
+    // Avoiding double visitaiton of default arguments should be handled by the
+    // visitor probably with a bit in the AST to indicate if the attached
+    // default argument was 'inherited' or written in source.
+    if (auto FD = dyn_cast<FunctionDecl>(D->getDeclContext())) {
+      if (FD->isThisDeclarationADefinition()) {
+        return traverseParamVarHelper(D);
+      }
+    }
+
+    return base::TraverseParmVarDecl(D);
+  }
+
+  bool TraverseNestedNameSpecifierLoc(NestedNameSpecifierLoc NNS) {
+    IndexCtx.indexNestedNameSpecifierLoc(NNS, Parent, ParentDC);
+    return true;
+  }
+
+  bool VisitTagTypeLoc(TagTypeLoc TL) {
+    TagDecl *D = TL.getDecl();
+    if (D->getParentFunctionOrMethod())
+      return true;
+
+    if (TL.isDefinition()) {
+      IndexCtx.indexTagDecl(D);
+      return true;
+    }
+
+    return IndexCtx.handleReference(D, TL.getNameLoc(),
+                                    Parent, ParentDC, SymbolRoleSet(),
+                                    Relations);
+  }
+
+  bool VisitObjCInterfaceTypeLoc(ObjCInterfaceTypeLoc TL) {
+    return IndexCtx.handleReference(TL.getIFaceDecl(), TL.getNameLoc(),
+                                    Parent, ParentDC, SymbolRoleSet());
+  }
+
+  bool VisitObjCObjectTypeLoc(ObjCObjectTypeLoc TL) {
+    for (unsigned i = 0, e = TL.getNumProtocols(); i != e; ++i) {
+      IndexCtx.handleReference(TL.getProtocol(i), TL.getProtocolLoc(i),
+                               Parent, ParentDC, SymbolRoleSet());
+    }
+    return true;
+  }
+
+  bool VisitTemplateSpecializationTypeLoc(TemplateSpecializationTypeLoc TL) {
+    if (const TemplateSpecializationType *T = TL.getTypePtr()) {
+      if (IndexCtx.shouldIndexImplicitTemplateInsts()) {
+        if (CXXRecordDecl *RD = T->getAsCXXRecordDecl())
+          IndexCtx.handleReference(RD, TL.getTemplateNameLoc(),
+                                   Parent, ParentDC, SymbolRoleSet(), Relations);
+      } else {
+        if (const TemplateDecl *D = T->getTemplateName().getAsTemplateDecl())
+          IndexCtx.handleReference(D, TL.getTemplateNameLoc(),
+                                   Parent, ParentDC, SymbolRoleSet(), Relations);
+      }
+    }
+    return true;
+  }
+
+  bool TraverseStmt(Stmt *S) {
+    IndexCtx.indexBody(S, Parent, ParentDC);
+    return true;
+  }
+};
+
+} // anonymous namespace
+
+void IndexingContext::indexTypeSourceInfo(TypeSourceInfo *TInfo,
+                                          const NamedDecl *Parent,
+                                          const DeclContext *DC,
+                                          bool isBase) {
+  if (!TInfo || TInfo->getTypeLoc().isNull())
+    return;
+  
+  indexTypeLoc(TInfo->getTypeLoc(), Parent, DC, isBase);
+}
+
+void IndexingContext::indexTypeLoc(TypeLoc TL,
+                                   const NamedDecl *Parent,
+                                   const DeclContext *DC,
+                                   bool isBase) {
+  if (TL.isNull())
+    return;
+
+  if (!DC)
+    DC = Parent->getLexicalDeclContext();
+  TypeIndexer(*this, Parent, DC, isBase).TraverseTypeLoc(TL);
+}
+
+void IndexingContext::indexNestedNameSpecifierLoc(NestedNameSpecifierLoc NNS,
+                                                  const NamedDecl *Parent,
+                                                  const DeclContext *DC) {
+  if (!NNS)
+    return;
+
+  if (NestedNameSpecifierLoc Prefix = NNS.getPrefix())
+    indexNestedNameSpecifierLoc(Prefix, Parent, DC);
+
+  if (!DC)
+    DC = Parent->getLexicalDeclContext();
+  SourceLocation Loc = NNS.getSourceRange().getBegin();
+
+  switch (NNS.getNestedNameSpecifier()->getKind()) {
+  case NestedNameSpecifier::Identifier:
+  case NestedNameSpecifier::Global:
+  case NestedNameSpecifier::Super:
+    break;
+
+  case NestedNameSpecifier::Namespace:
+    handleReference(NNS.getNestedNameSpecifier()->getAsNamespace(),
+                    Loc, Parent, DC, SymbolRoleSet());
+    break;
+  case NestedNameSpecifier::NamespaceAlias:
+    handleReference(NNS.getNestedNameSpecifier()->getAsNamespaceAlias(),
+                    Loc, Parent, DC, SymbolRoleSet());
+    break;
+
+  case NestedNameSpecifier::TypeSpec:
+  case NestedNameSpecifier::TypeSpecWithTemplate:
+    indexTypeLoc(NNS.getTypeLoc(), Parent, DC);
+    break;
+  }
+}
+
+void IndexingContext::indexTagDecl(const TagDecl *D) {
+  if (!shouldIndexFunctionLocalSymbols() && isFunctionLocalDecl(D))
+    return;
+
+  if (handleDecl(D)) {
+    if (D->isThisDeclarationADefinition()) {
+      indexNestedNameSpecifierLoc(D->getQualifierLoc(), D);
+      if (auto CXXRD = dyn_cast<CXXRecordDecl>(D)) {
+        for (const auto &I : CXXRD->bases()) {
+          indexTypeSourceInfo(I.getTypeSourceInfo(), CXXRD, CXXRD, /*isBase=*/true);
+        }
+      }
+      indexDeclContext(D);
+    }
+  }
+}
diff --git a/contrib/llvm/tools/clang/lib/Index/IndexingAction.cpp b/contrib/llvm/tools/clang/lib/Index/IndexingAction.cpp
new file mode 100644
index 000000000000..d7442931523f
--- /dev/null
+++ b/contrib/llvm/tools/clang/lib/Index/IndexingAction.cpp
@@ -0,0 +1,176 @@
+//===- IndexingAction.cpp - Frontend index action -------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Index/IndexingAction.h"
+#include "clang/Index/IndexDataConsumer.h"
+#include "IndexingContext.h"
+#include "clang/Frontend/FrontendAction.h"
+#include "clang/Frontend/MultiplexConsumer.h"
+#include "clang/Lex/Preprocessor.h"
+
+using namespace clang;
+using namespace clang::index;
+
+void IndexDataConsumer::_anchor() {}
+
+bool IndexDataConsumer::handleDeclOccurence(const Decl *D, SymbolRoleSet Roles,
+                                            ArrayRef<SymbolRelation> Relations,
+                                            FileID FID, unsigned Offset,
+                                            ASTNodeInfo ASTNode) {
+  return true;
+}
+
+bool IndexDataConsumer::handleMacroOccurence(const IdentifierInfo *Name,
+                                             const MacroInfo *MI, SymbolRoleSet Roles,
+                                             FileID FID, unsigned Offset) {
+  return true;
+}
+
+bool IndexDataConsumer::handleModuleOccurence(const ImportDecl *ImportD,
+                                              SymbolRoleSet Roles,
+                                              FileID FID, unsigned Offset) {
+  return true;
+}
+
+namespace {
+
+class IndexASTConsumer : public ASTConsumer {
+  IndexingContext &IndexCtx;
+
+public:
+  IndexASTConsumer(IndexingContext &IndexCtx)
+    : IndexCtx(IndexCtx) {}
+
+protected:
+  void Initialize(ASTContext &Context) override {
+    IndexCtx.setASTContext(Context);
+    IndexCtx.getDataConsumer().initialize(Context);
+  }
+
+  bool HandleTopLevelDecl(DeclGroupRef DG) override {
+    return IndexCtx.indexDeclGroupRef(DG);
+  }
+
+  void HandleInterestingDecl(DeclGroupRef DG) override {
+    // Ignore deserialized decls.
+  }
+
+  void HandleTopLevelDeclInObjCContainer(DeclGroupRef DG) override {
+    IndexCtx.indexDeclGroupRef(DG);
+  }
+
+  void HandleTranslationUnit(ASTContext &Ctx) override {
+  }
+};
+
+class IndexActionBase {
+protected:
+  std::shared_ptr<IndexDataConsumer> DataConsumer;
+  IndexingContext IndexCtx;
+
+  IndexActionBase(std::shared_ptr<IndexDataConsumer> dataConsumer,
+                  IndexingOptions Opts)
+    : DataConsumer(std::move(dataConsumer)),
+      IndexCtx(Opts, *DataConsumer) {}
+
+  std::unique_ptr<IndexASTConsumer> createIndexASTConsumer() {
+    return llvm::make_unique<IndexASTConsumer>(IndexCtx);
+  }
+
+  void finish() {
+    DataConsumer->finish();
+  }
+};
+
+class IndexAction : public ASTFrontendAction, IndexActionBase {
+public:
+  IndexAction(std::shared_ptr<IndexDataConsumer> DataConsumer,
+              IndexingOptions Opts)
+    : IndexActionBase(std::move(DataConsumer), Opts) {}
+
+protected:
+  std::unique_ptr<ASTConsumer> CreateASTConsumer(CompilerInstance &CI,
+                                                 StringRef InFile) override {
+    return createIndexASTConsumer();
+  }
+
+  void EndSourceFileAction() override {
+    FrontendAction::EndSourceFileAction();
+    finish();
+  }
+};
+
+class WrappingIndexAction : public WrapperFrontendAction, IndexActionBase {
+  bool IndexActionFailed = false;
+
+public:
+  WrappingIndexAction(std::unique_ptr<FrontendAction> WrappedAction,
+                      std::shared_ptr<IndexDataConsumer> DataConsumer,
+                      IndexingOptions Opts)
+    : WrapperFrontendAction(std::move(WrappedAction)),
+      IndexActionBase(std::move(DataConsumer), Opts) {}
+
+protected:
+  std::unique_ptr<ASTConsumer> CreateASTConsumer(CompilerInstance &CI,
+                                                 StringRef InFile) override;
+  void EndSourceFileAction() override;
+};
+
+} // anonymous namespace
+
+void WrappingIndexAction::EndSourceFileAction() {
+  // Invoke wrapped action's method.
+  WrapperFrontendAction::EndSourceFileAction();
+  if (!IndexActionFailed)
+    finish();
+}
+
+std::unique_ptr<ASTConsumer>
+WrappingIndexAction::CreateASTConsumer(CompilerInstance &CI, StringRef InFile) {
+  auto OtherConsumer = WrapperFrontendAction::CreateASTConsumer(CI, InFile);
+  if (!OtherConsumer) {
+    IndexActionFailed = true;
+    return nullptr;
+  }
+
+  std::vector<std::unique_ptr<ASTConsumer>> Consumers;
+  Consumers.push_back(std::move(OtherConsumer));
+  Consumers.push_back(createIndexASTConsumer());
+  return llvm::make_unique<MultiplexConsumer>(std::move(Consumers));
+}
+
+std::unique_ptr<FrontendAction>
+index::createIndexingAction(std::shared_ptr<IndexDataConsumer> DataConsumer,
+                            IndexingOptions Opts,
+                            std::unique_ptr<FrontendAction> WrappedAction) {
+  if (WrappedAction)
+    return llvm::make_unique<WrappingIndexAction>(std::move(WrappedAction),
+                                                  std::move(DataConsumer),
+                                                  Opts);
+  return llvm::make_unique<IndexAction>(std::move(DataConsumer), Opts);
+}
+
+
+static bool topLevelDeclVisitor(void *context, const Decl *D) {
+  IndexingContext &IndexCtx = *static_cast<IndexingContext*>(context);
+  return IndexCtx.indexTopLevelDecl(D);
+}
+
+static void indexTranslationUnit(ASTUnit &Unit, IndexingContext &IndexCtx) {
+  Unit.visitLocalTopLevelDecls(&IndexCtx, topLevelDeclVisitor);
+}
+
+void index::indexASTUnit(ASTUnit &Unit,
+                         std::shared_ptr<IndexDataConsumer> DataConsumer,
+                         IndexingOptions Opts) {
+  IndexingContext IndexCtx(Opts, *DataConsumer);
+  IndexCtx.setASTContext(Unit.getASTContext());
+  DataConsumer->initialize(Unit.getASTContext());
+  indexTranslationUnit(Unit, IndexCtx);
+}
diff --git a/contrib/llvm/tools/clang/lib/Index/IndexingContext.cpp b/contrib/llvm/tools/clang/lib/Index/IndexingContext.cpp
new file mode 100644
index 000000000000..bcc367c6626d
--- /dev/null
+++ b/contrib/llvm/tools/clang/lib/Index/IndexingContext.cpp
@@ -0,0 +1,334 @@
+//===- IndexingContext.cpp - Indexing context data ------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "IndexingContext.h"
+#include "clang/Index/IndexDataConsumer.h"
+#include "clang/AST/ASTContext.h"
+#include "clang/AST/DeclTemplate.h"
+#include "clang/AST/DeclObjC.h"
+#include "clang/Basic/SourceManager.h"
+
+using namespace clang;
+using namespace index;
+
+bool IndexingContext::shouldIndexFunctionLocalSymbols() const {
+  return IndexOpts.IndexFunctionLocals;
+}
+
+bool IndexingContext::handleDecl(const Decl *D,
+                                 SymbolRoleSet Roles,
+                                 ArrayRef<SymbolRelation> Relations) {
+  return handleDeclOccurrence(D, D->getLocation(), /*IsRef=*/false,
+                              cast<Decl>(D->getDeclContext()), Roles, Relations,
+                              nullptr, nullptr, D->getDeclContext());
+}
+
+bool IndexingContext::handleDecl(const Decl *D, SourceLocation Loc,
+                                 SymbolRoleSet Roles,
+                                 ArrayRef<SymbolRelation> Relations,
+                                 const DeclContext *DC) {
+  if (!DC)
+    DC = D->getDeclContext();
+  return handleDeclOccurrence(D, Loc, /*IsRef=*/false, cast<Decl>(DC),
+                              Roles, Relations,
+                              nullptr, nullptr, DC);
+}
+
+bool IndexingContext::handleReference(const NamedDecl *D, SourceLocation Loc,
+                                      const NamedDecl *Parent,
+                                      const DeclContext *DC,
+                                      SymbolRoleSet Roles,
+                                      ArrayRef<SymbolRelation> Relations,
+                                      const Expr *RefE,
+                                      const Decl *RefD) {
+  if (!shouldIndexFunctionLocalSymbols() && isFunctionLocalDecl(D))
+    return true;
+
+  if (isa<NonTypeTemplateParmDecl>(D) || isa<TemplateTypeParmDecl>(D))
+    return true;
+    
+  return handleDeclOccurrence(D, Loc, /*IsRef=*/true, Parent, Roles, Relations,
+                              RefE, RefD, DC);
+}
+
+bool IndexingContext::importedModule(const ImportDecl *ImportD) {
+  SourceLocation Loc;
+  auto IdLocs = ImportD->getIdentifierLocs();
+  if (!IdLocs.empty())
+    Loc = IdLocs.front();
+  else
+    Loc = ImportD->getLocation();
+  SourceManager &SM = Ctx->getSourceManager();
+  Loc = SM.getFileLoc(Loc);
+  if (Loc.isInvalid())
+    return true;
+
+  FileID FID;
+  unsigned Offset;
+  std::tie(FID, Offset) = SM.getDecomposedLoc(Loc);
+  if (FID.isInvalid())
+    return true;
+
+  bool Invalid = false;
+  const SrcMgr::SLocEntry &SEntry = SM.getSLocEntry(FID, &Invalid);
+  if (Invalid || !SEntry.isFile())
+    return true;
+
+  if (SEntry.getFile().getFileCharacteristic() != SrcMgr::C_User) {
+    switch (IndexOpts.SystemSymbolFilter) {
+    case IndexingOptions::SystemSymbolFilterKind::None:
+      return true;
+    case IndexingOptions::SystemSymbolFilterKind::DeclarationsOnly:
+    case IndexingOptions::SystemSymbolFilterKind::All:
+      break;
+    }
+  }
+
+  SymbolRoleSet Roles = (unsigned)SymbolRole::Declaration;
+  if (ImportD->isImplicit())
+    Roles |= (unsigned)SymbolRole::Implicit;
+
+  return DataConsumer.handleModuleOccurence(ImportD, Roles, FID, Offset);
+}
+
+bool IndexingContext::isFunctionLocalDecl(const Decl *D) {
+  assert(D);
+
+  if (isa<TemplateTemplateParmDecl>(D))
+    return true;
+
+  if (isa<ObjCTypeParamDecl>(D))
+    return true;
+
+  if (!D->getParentFunctionOrMethod())
+    return false;
+
+  if (const NamedDecl *ND = dyn_cast<NamedDecl>(D)) {
+    switch (ND->getFormalLinkage()) {
+    case NoLinkage:
+    case VisibleNoLinkage:
+    case InternalLinkage:
+      return true;
+    case UniqueExternalLinkage:
+      llvm_unreachable("Not a sema linkage");
+    case ExternalLinkage:
+      return false;
+    }
+  }
+
+  return true;
+}
+
+bool IndexingContext::isTemplateImplicitInstantiation(const Decl *D) {
+  TemplateSpecializationKind TKind = TSK_Undeclared;
+  if (const ClassTemplateSpecializationDecl *
+      SD = dyn_cast<ClassTemplateSpecializationDecl>(D)) {
+    TKind = SD->getSpecializationKind();
+  }
+  if (const FunctionDecl *FD = dyn_cast<FunctionDecl>(D)) {
+    TKind = FD->getTemplateSpecializationKind();
+  }
+  switch (TKind) {
+    case TSK_Undeclared:
+    case TSK_ExplicitSpecialization:
+      return false;
+    case TSK_ImplicitInstantiation:
+    case TSK_ExplicitInstantiationDeclaration:
+    case TSK_ExplicitInstantiationDefinition:
+      return true;
+  }
+  llvm_unreachable("invalid TemplateSpecializationKind");
+}
+
+bool IndexingContext::shouldIgnoreIfImplicit(const Decl *D) {
+  if (isa<ObjCInterfaceDecl>(D))
+    return false;
+  if (isa<ObjCCategoryDecl>(D))
+    return false;
+  if (isa<ObjCIvarDecl>(D))
+    return false;
+  if (isa<ObjCMethodDecl>(D))
+    return false;
+  if (isa<ImportDecl>(D))
+    return false;
+  return true;
+}
+
+static const Decl *adjustTemplateImplicitInstantiation(const Decl *D) {
+  if (const ClassTemplateSpecializationDecl *
+      SD = dyn_cast<ClassTemplateSpecializationDecl>(D)) {
+    return SD->getTemplateInstantiationPattern();
+  }
+  if (const FunctionDecl *FD = dyn_cast<FunctionDecl>(D)) {
+    return FD->getTemplateInstantiationPattern();
+  }
+  return nullptr;
+}
+
+static bool isDeclADefinition(const Decl *D, const DeclContext *ContainerDC, ASTContext &Ctx) {
+  if (auto VD = dyn_cast<VarDecl>(D))
+    return VD->isThisDeclarationADefinition(Ctx);
+
+  if (auto FD = dyn_cast<FunctionDecl>(D))
+    return FD->isThisDeclarationADefinition();
+
+  if (auto TD = dyn_cast<TagDecl>(D))
+    return TD->isThisDeclarationADefinition();
+
+  if (auto MD = dyn_cast<ObjCMethodDecl>(D))
+    return MD->isThisDeclarationADefinition() || isa<ObjCImplDecl>(ContainerDC);
+
+  if (isa<TypedefNameDecl>(D) ||
+      isa<EnumConstantDecl>(D) ||
+      isa<FieldDecl>(D) ||
+      isa<MSPropertyDecl>(D) ||
+      isa<ObjCImplDecl>(D) ||
+      isa<ObjCPropertyImplDecl>(D))
+    return true;
+
+  return false;
+}
+
+static const Decl *adjustParent(const Decl *Parent) {
+  if (!Parent)
+    return nullptr;
+  for (;; Parent = cast<Decl>(Parent->getDeclContext())) {
+    if (isa<TranslationUnitDecl>(Parent))
+      return nullptr;
+    if (isa<LinkageSpecDecl>(Parent) || isa<BlockDecl>(Parent))
+      continue;
+    if (auto NS = dyn_cast<NamespaceDecl>(Parent)) {
+      if (NS->isAnonymousNamespace())
+        continue;
+    } else if (auto RD = dyn_cast<RecordDecl>(Parent)) {
+      if (RD->isAnonymousStructOrUnion())
+        continue;
+    } else if (auto FD = dyn_cast<FieldDecl>(Parent)) {
+      if (FD->getDeclName().isEmpty())
+        continue;
+    }
+    return Parent;
+  }
+}
+
+static const Decl *getCanonicalDecl(const Decl *D) {
+  D = D->getCanonicalDecl();
+  if (auto TD = dyn_cast<TemplateDecl>(D)) {
+    D = TD->getTemplatedDecl();
+    assert(D->isCanonicalDecl());
+  }
+
+  return D;
+}
+
+bool IndexingContext::handleDeclOccurrence(const Decl *D, SourceLocation Loc,
+                                           bool IsRef, const Decl *Parent,
+                                           SymbolRoleSet Roles,
+                                           ArrayRef<SymbolRelation> Relations,
+                                           const Expr *OrigE,
+                                           const Decl *OrigD,
+                                           const DeclContext *ContainerDC) {
+  if (D->isImplicit() && !isa<ObjCMethodDecl>(D))
+    return true;
+  if (!isa<NamedDecl>(D) ||
+      (cast<NamedDecl>(D)->getDeclName().isEmpty() &&
+       !isa<TagDecl>(D) && !isa<ObjCCategoryDecl>(D)))
+    return true;
+
+  SourceManager &SM = Ctx->getSourceManager();
+  Loc = SM.getFileLoc(Loc);
+  if (Loc.isInvalid())
+    return true;
+
+  FileID FID;
+  unsigned Offset;
+  std::tie(FID, Offset) = SM.getDecomposedLoc(Loc);
+  if (FID.isInvalid())
+    return true;
+
+  bool Invalid = false;
+  const SrcMgr::SLocEntry &SEntry = SM.getSLocEntry(FID, &Invalid);
+  if (Invalid || !SEntry.isFile())
+    return true;
+
+  if (SEntry.getFile().getFileCharacteristic() != SrcMgr::C_User) {
+    switch (IndexOpts.SystemSymbolFilter) {
+    case IndexingOptions::SystemSymbolFilterKind::None:
+      return true;
+    case IndexingOptions::SystemSymbolFilterKind::DeclarationsOnly:
+      if (IsRef)
+        return true;
+      break;
+    case IndexingOptions::SystemSymbolFilterKind::All:
+      break;
+    }
+  }
+
+  if (isTemplateImplicitInstantiation(D)) {
+    if (!IsRef)
+      return true;
+    D = adjustTemplateImplicitInstantiation(D);
+    if (!D)
+      return true;
+    assert(!isTemplateImplicitInstantiation(D));
+  }
+
+  if (!OrigD)
+    OrigD = D;
+
+  if (IsRef)
+    Roles |= (unsigned)SymbolRole::Reference;
+  else if (isDeclADefinition(D, ContainerDC, *Ctx))
+    Roles |= (unsigned)SymbolRole::Definition;
+  else
+    Roles |= (unsigned)SymbolRole::Declaration;
+
+  D = getCanonicalDecl(D);
+  if (D->isImplicit() && !isa<ObjCMethodDecl>(D) &&
+      !(isa<FunctionDecl>(D) && cast<FunctionDecl>(D)->getBuiltinID())) {
+    // operator new declarations will link to the implicit one as canonical.
+    return true;
+  }
+  Parent = adjustParent(Parent);
+  if (Parent)
+    Parent = getCanonicalDecl(Parent);
+  assert((!Parent || !Parent->isImplicit() ||
+          (isa<FunctionDecl>(Parent) &&
+           cast<FunctionDecl>(Parent)->getBuiltinID()) ||
+          isa<ObjCInterfaceDecl>(Parent) || isa<ObjCMethodDecl>(Parent)) &&
+         "unexpected implicit parent!");
+
+  SmallVector<SymbolRelation, 6> FinalRelations;
+  FinalRelations.reserve(Relations.size()+1);
+
+  auto addRelation = [&](SymbolRelation Rel) {
+    auto It = std::find_if(FinalRelations.begin(), FinalRelations.end(),
+                [&](SymbolRelation Elem)->bool {
+                  return Elem.RelatedSymbol == Rel.RelatedSymbol;
+                });
+    if (It != FinalRelations.end()) {
+      It->Roles |= Rel.Roles;
+    } else {
+      FinalRelations.push_back(Rel);
+    }
+    Roles |= Rel.Roles;
+  };
+
+  if (!IsRef && Parent && !cast<DeclContext>(Parent)->isFunctionOrMethod()) {
+    addRelation(SymbolRelation{(unsigned)SymbolRole::RelationChildOf, Parent});
+  }
+  for (auto &Rel : Relations) {
+    addRelation(SymbolRelation(Rel.Roles,
+                               Rel.RelatedSymbol->getCanonicalDecl()));
+  }
+
+  IndexDataConsumer::ASTNodeInfo Node{ OrigE, OrigD, Parent, ContainerDC };
+  return DataConsumer.handleDeclOccurence(D, Roles, FinalRelations, FID, Offset,
+                                          Node);
+}
diff --git a/contrib/llvm/tools/clang/lib/Index/IndexingContext.h b/contrib/llvm/tools/clang/lib/Index/IndexingContext.h
new file mode 100644
index 000000000000..600fc433b58d
--- /dev/null
+++ b/contrib/llvm/tools/clang/lib/Index/IndexingContext.h
@@ -0,0 +1,121 @@
+//===- IndexingContext.h - Indexing context data ----------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_LIB_INDEX_INDEXINGCONTEXT_H
+#define LLVM_CLANG_LIB_INDEX_INDEXINGCONTEXT_H
+
+#include "clang/Basic/LLVM.h"
+#include "clang/Index/IndexSymbol.h"
+#include "clang/Index/IndexingAction.h"
+#include "llvm/ADT/ArrayRef.h"
+
+namespace clang {
+  class ASTContext;
+  class Decl;
+  class DeclGroupRef;
+  class ImportDecl;
+  class TagDecl;
+  class TypeSourceInfo;
+  class NamedDecl;
+  class ObjCMethodDecl;
+  class DeclContext;
+  class NestedNameSpecifierLoc;
+  class Stmt;
+  class Expr;
+  class TypeLoc;
+  class SourceLocation;
+
+namespace index {
+  class IndexDataConsumer;
+
+class IndexingContext {
+  IndexingOptions IndexOpts;
+  IndexDataConsumer &DataConsumer;
+  ASTContext *Ctx = nullptr;
+
+public:
+  IndexingContext(IndexingOptions IndexOpts, IndexDataConsumer &DataConsumer)
+    : IndexOpts(IndexOpts), DataConsumer(DataConsumer) {}
+
+  const IndexingOptions &getIndexOpts() const { return IndexOpts; }
+  IndexDataConsumer &getDataConsumer() { return DataConsumer; }
+
+  void setASTContext(ASTContext &ctx) { Ctx = &ctx; }
+
+  bool shouldSuppressRefs() const {
+    return false;
+  }
+
+  bool shouldIndexFunctionLocalSymbols() const;
+
+  bool shouldIndexImplicitTemplateInsts() const {
+    return false;
+  }
+
+  static bool isFunctionLocalDecl(const Decl *D);
+  static bool isTemplateImplicitInstantiation(const Decl *D);
+
+  bool handleDecl(const Decl *D, SymbolRoleSet Roles = SymbolRoleSet(),
+                  ArrayRef<SymbolRelation> Relations = None);
+
+  bool handleDecl(const Decl *D, SourceLocation Loc,
+                  SymbolRoleSet Roles = SymbolRoleSet(),
+                  ArrayRef<SymbolRelation> Relations = None,
+                  const DeclContext *DC = nullptr);
+
+  bool handleReference(const NamedDecl *D, SourceLocation Loc,
+                       const NamedDecl *Parent,
+                       const DeclContext *DC,
+                       SymbolRoleSet Roles,
+                       ArrayRef<SymbolRelation> Relations = None,
+                       const Expr *RefE = nullptr,
+                       const Decl *RefD = nullptr);
+
+  bool importedModule(const ImportDecl *ImportD);
+
+  bool indexDecl(const Decl *D);
+
+  void indexTagDecl(const TagDecl *D);
+
+  void indexTypeSourceInfo(TypeSourceInfo *TInfo, const NamedDecl *Parent,
+                           const DeclContext *DC = nullptr,
+                           bool isBase = false);
+
+  void indexTypeLoc(TypeLoc TL, const NamedDecl *Parent,
+                    const DeclContext *DC = nullptr,
+                    bool isBase = false);
+
+  void indexNestedNameSpecifierLoc(NestedNameSpecifierLoc NNS,
+                                   const NamedDecl *Parent,
+                                   const DeclContext *DC = nullptr);
+
+  bool indexDeclContext(const DeclContext *DC);
+
+  void indexBody(const Stmt *S, const NamedDecl *Parent,
+                 const DeclContext *DC = nullptr);
+
+  bool indexTopLevelDecl(const Decl *D);
+  bool indexDeclGroupRef(DeclGroupRef DG);
+
+private:
+  bool shouldIgnoreIfImplicit(const Decl *D);
+
+  bool handleDeclOccurrence(const Decl *D, SourceLocation Loc,
+                            bool IsRef, const Decl *Parent,
+                            SymbolRoleSet Roles,
+                            ArrayRef<SymbolRelation> Relations,
+                            const Expr *RefE,
+                            const Decl *RefD,
+                            const DeclContext *ContainerDC);
+};
+
+} // end namespace index
+} // end namespace clang
+
+#endif
diff --git a/contrib/llvm/tools/clang/lib/Index/USRGeneration.cpp b/contrib/llvm/tools/clang/lib/Index/USRGeneration.cpp
index c57694fc10a8..30f1add249b1 100644
--- a/contrib/llvm/tools/clang/lib/Index/USRGeneration.cpp
+++ b/contrib/llvm/tools/clang/lib/Index/USRGeneration.cpp
@@ -90,18 +90,23 @@ public:
   void VisitVarDecl(const VarDecl *D);
   void VisitNonTypeTemplateParmDecl(const NonTypeTemplateParmDecl *D);
   void VisitTemplateTemplateParmDecl(const TemplateTemplateParmDecl *D);
+
   void VisitLinkageSpecDecl(const LinkageSpecDecl *D) {
     IgnoreResults = true;
   }
+
   void VisitUsingDirectiveDecl(const UsingDirectiveDecl *D) {
     IgnoreResults = true;
   }
+
   void VisitUsingDecl(const UsingDecl *D) {
     IgnoreResults = true;
   }
+
   void VisitUnresolvedUsingValueDecl(const UnresolvedUsingValueDecl *D) {
     IgnoreResults = true;
   }
+
   void VisitUnresolvedUsingTypenameDecl(const UnresolvedUsingTypenameDecl *D) {
     IgnoreResults = true;
   }
@@ -126,14 +131,17 @@ public:
   void GenObjCClass(StringRef cls) {
     generateUSRForObjCClass(cls, Out);
   }
+
   /// Generate a USR for an Objective-C class category.
   void GenObjCCategory(StringRef cls, StringRef cat) {
     generateUSRForObjCCategory(cls, cat, Out);
   }
+
   /// Generate a USR fragment for an Objective-C property.
-  void GenObjCProperty(StringRef prop) {
-    generateUSRForObjCProperty(prop, Out);
+  void GenObjCProperty(StringRef prop, bool isClassProp) {
+    generateUSRForObjCProperty(prop, isClassProp, Out);
   }
+
   /// Generate a USR for an Objective-C protocol.
   void GenObjCProtocol(StringRef prot) {
     generateUSRForObjCProtocol(prot, Out);
@@ -148,7 +156,6 @@ public:
   ///  the decl had no name.
   bool EmitDeclName(const NamedDecl *D);
 };
-
 } // end anonymous namespace
 
 //===----------------------------------------------------------------------===//
@@ -203,10 +210,16 @@ void USRGenerator::VisitFunctionDecl(const FunctionDecl *D) {
     VisitTemplateParameterList(FunTmpl->getTemplateParameters());
   } else
     Out << "@F@";
-  D->printName(Out);
+
+  PrintingPolicy Policy(Context->getLangOpts());
+  // Forward references can have different template argument names. Suppress the
+  // template argument names in constructors to make their USR more stable.
+  Policy.SuppressTemplateArgsInCXXConstructors = true;
+  D->getDeclName().print(Out, Policy);
 
   ASTContext &Ctx = *Context;
-  if (!Ctx.getLangOpts().CPlusPlus || D->isExternC())
+  if ((!Ctx.getLangOpts().CPlusPlus || D->isExternC()) &&
+      !D->hasAttr<OverloadableAttr>())
     return;
 
   if (const TemplateArgumentList *
@@ -220,7 +233,7 @@ void USRGenerator::VisitFunctionDecl(const FunctionDecl *D) {
   }
 
   // Mangle in type information for the arguments.
-  for (auto PD : D->params()) {
+  for (auto PD : D->parameters()) {
     Out << '#';
     VisitType(PD->getType());
   }
@@ -287,13 +300,11 @@ void USRGenerator::VisitVarDecl(const VarDecl *D) {
 void USRGenerator::VisitNonTypeTemplateParmDecl(
                                         const NonTypeTemplateParmDecl *D) {
   GenLoc(D, /*IncludeOffset=*/true);
-  return;
 }
 
 void USRGenerator::VisitTemplateTemplateParmDecl(
                                         const TemplateTemplateParmDecl *D) {
   GenLoc(D, /*IncludeOffset=*/true);
-  return;
 }
 
 void USRGenerator::VisitNamespaceDecl(const NamespaceDecl *D) {
@@ -400,7 +411,7 @@ void USRGenerator::VisitObjCPropertyDecl(const ObjCPropertyDecl *D) {
     Visit(ID);
   else
     Visit(cast<Decl>(D->getDeclContext()));
-  GenObjCProperty(D->getName());
+  GenObjCProperty(D->getName(), D->isClassProperty());
 }
 
 void USRGenerator::VisitObjCPropertyImplDecl(const ObjCPropertyImplDecl *D) {
@@ -415,7 +426,8 @@ void USRGenerator::VisitObjCPropertyImplDecl(const ObjCPropertyImplDecl *D) {
 void USRGenerator::VisitTagDecl(const TagDecl *D) {
   // Add the location of the tag decl to handle resolution across
   // translation units.
-  if (ShouldGenerateLocation(D) && GenLoc(D, /*IncludeOffset=*/isLocal(D)))
+  if (!isa<EnumDecl>(D) &&
+      ShouldGenerateLocation(D) && GenLoc(D, /*IncludeOffset=*/isLocal(D)))
     return;
 
   D = D->getCanonicalDecl();
@@ -471,8 +483,16 @@ void USRGenerator::VisitTagDecl(const TagDecl *D) {
   else {
     if (D->isEmbeddedInDeclarator() && !D->isFreeStanding()) {
       printLoc(Out, D->getLocation(), Context->getSourceManager(), true);
-    } else
+    } else {
       Buf[off] = 'a';
+      if (auto *ED = dyn_cast<EnumDecl>(D)) {
+        // Distinguish USRs of anonymous enums by using their first enumerator.
+        auto enum_range = ED->enumerators();
+        if (enum_range.begin() != enum_range.end()) {
+          Out << '@' << **enum_range.begin();
+        }
+      }
+    }
   }
   }
   
@@ -500,7 +520,6 @@ void USRGenerator::VisitTypedefDecl(const TypedefDecl *D) {
 
 void USRGenerator::VisitTemplateTypeParmDecl(const TemplateTypeParmDecl *D) {
   GenLoc(D, /*IncludeOffset=*/true);
-  return;
 }
 
 bool USRGenerator::GenLoc(const Decl *D, bool IncludeOffset) {
@@ -599,24 +618,17 @@ void USRGenerator::VisitType(QualType T) {
           c = 'd'; break;
         case BuiltinType::LongDouble:
           c = 'D'; break;
+        case BuiltinType::Float128:
+          c = 'Q'; break;
         case BuiltinType::NullPtr:
           c = 'n'; break;
 #define BUILTIN_TYPE(Id, SingletonId)
 #define PLACEHOLDER_TYPE(Id, SingletonId) case BuiltinType::Id:
 #include "clang/AST/BuiltinTypes.def"
         case BuiltinType::Dependent:
-        case BuiltinType::OCLImage1d:
-        case BuiltinType::OCLImage1dArray:
-        case BuiltinType::OCLImage1dBuffer:
-        case BuiltinType::OCLImage2d:
-        case BuiltinType::OCLImage2dArray:
-        case BuiltinType::OCLImage2dDepth:
-        case BuiltinType::OCLImage2dArrayDepth:
-        case BuiltinType::OCLImage2dMSAA:
-        case BuiltinType::OCLImage2dArrayMSAA:
-        case BuiltinType::OCLImage2dMSAADepth:
-        case BuiltinType::OCLImage2dArrayMSAADepth:
-        case BuiltinType::OCLImage3d:
+#define IMAGE_TYPE(ImgType, Id, SingletonId, Access, Suffix) \
+        case BuiltinType::Id:
+#include "clang/Basic/OpenCLImageTypes.def"
         case BuiltinType::OCLEvent:
         case BuiltinType::OCLClkEvent:
         case BuiltinType::OCLQueue:
@@ -654,6 +666,11 @@ void USRGenerator::VisitType(QualType T) {
       T = PT->getPointeeType();
       continue;
     }
+    if (const ObjCObjectPointerType *OPT = T->getAs<ObjCObjectPointerType>()) {
+      Out << '*';
+      T = OPT->getPointeeType();
+      continue;
+    }
     if (const RValueReferenceType *RT = T->getAs<RValueReferenceType>()) {
       Out << "&&";
       T = RT->getPointeeType();
@@ -688,6 +705,18 @@ void USRGenerator::VisitType(QualType T) {
       VisitTagDecl(TT->getDecl());
       return;
     }
+    if (const ObjCInterfaceType *OIT = T->getAs<ObjCInterfaceType>()) {
+      Out << '$';
+      VisitObjCInterfaceDecl(OIT->getDecl());
+      return;
+    }
+    if (const ObjCObjectType *OIT = T->getAs<ObjCObjectType>()) {
+      Out << 'Q';
+      VisitType(OIT->getBaseType());
+      for (auto *Prot : OIT->getProtocols())
+        VisitObjCProtocolDecl(Prot);
+      return;
+    }
     if (const TemplateTypeParmType *TTP = T->getAs<TemplateTypeParmType>()) {
       Out << 't' << TTP->getDepth() << '.' << TTP->getIndex();
       return;
@@ -835,8 +864,9 @@ void clang::index::generateUSRForObjCMethod(StringRef Sel,
   OS << (IsInstanceMethod ? "(im)" : "(cm)") << Sel;
 }
 
-void clang::index::generateUSRForObjCProperty(StringRef Prop, raw_ostream &OS) {
-  OS << "(py)" << Prop;
+void clang::index::generateUSRForObjCProperty(StringRef Prop, bool isClassProp,
+                                              raw_ostream &OS) {
+  OS << (isClassProp ? "(cpy)" : "(py)") << Prop;
 }
 
 void clang::index::generateUSRForObjCProtocol(StringRef Prot, raw_ostream &OS) {
@@ -875,4 +905,3 @@ bool clang::index::generateUSRForMacro(const MacroDefinitionRecord *MD,
   Out << MD->getName()->getName();
   return false;
 }
-
diff --git a/contrib/llvm/tools/clang/lib/Lex/HeaderMap.cpp b/contrib/llvm/tools/clang/lib/Lex/HeaderMap.cpp
index 09d53846d4cf..4cace5b00245 100644
--- a/contrib/llvm/tools/clang/lib/Lex/HeaderMap.cpp
+++ b/contrib/llvm/tools/clang/lib/Lex/HeaderMap.cpp
@@ -12,48 +12,20 @@
 //===----------------------------------------------------------------------===//
 
 #include "clang/Lex/HeaderMap.h"
+#include "clang/Lex/HeaderMapTypes.h"
 #include "clang/Basic/CharInfo.h"
 #include "clang/Basic/FileManager.h"
 #include "llvm/ADT/SmallString.h"
+#include "llvm/Support/Compiler.h"
 #include "llvm/Support/DataTypes.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/MemoryBuffer.h"
-#include <cstdio>
+#include "llvm/Support/SwapByteOrder.h"
+#include "llvm/Support/Debug.h"
+#include <cstring>
 #include <memory>
 using namespace clang;
 
-//===----------------------------------------------------------------------===//
-// Data Structures and Manifest Constants
-//===----------------------------------------------------------------------===//
-
-enum {
-  HMAP_HeaderMagicNumber = ('h' << 24) | ('m' << 16) | ('a' << 8) | 'p',
-  HMAP_HeaderVersion = 1,
-
-  HMAP_EmptyBucketKey = 0
-};
-
-namespace clang {
-struct HMapBucket {
-  uint32_t Key;          // Offset (into strings) of key.
-
-  uint32_t Prefix;     // Offset (into strings) of value prefix.
-  uint32_t Suffix;     // Offset (into strings) of value suffix.
-};
-
-struct HMapHeader {
-  uint32_t Magic;           // Magic word, also indicates byte order.
-  uint16_t Version;         // Version number -- currently 1.
-  uint16_t Reserved;        // Reserved for future use - zero for now.
-  uint32_t StringsOffset;   // Offset to start of string pool.
-  uint32_t NumEntries;      // Number of entries in the string table.
-  uint32_t NumBuckets;      // Number of buckets (always a power of 2).
-  uint32_t MaxValueLength;  // Length of longest result path (excluding nul).
-  // An array of 'NumBuckets' HMapBucket objects follows this header.
-  // Strings follow the buckets, at StringsOffset.
-};
-} // end namespace clang.
-
 /// HashHMapKey - This is the 'well known' hash function required by the file
 /// format, used to look up keys in the hash table.  The hash table uses simple
 /// linear probing based on this function.
@@ -82,15 +54,25 @@ const HeaderMap *HeaderMap::Create(const FileEntry *FE, FileManager &FM) {
   if (FileSize <= sizeof(HMapHeader)) return nullptr;
 
   auto FileBuffer = FM.getBufferForFile(FE);
-  if (!FileBuffer) return nullptr;  // Unreadable file?
-  const char *FileStart = (*FileBuffer)->getBufferStart();
+  if (!FileBuffer || !*FileBuffer)
+    return nullptr;
+  bool NeedsByteSwap;
+  if (!checkHeader(**FileBuffer, NeedsByteSwap))
+    return nullptr;
+  return new HeaderMap(std::move(*FileBuffer), NeedsByteSwap);
+}
+
+bool HeaderMapImpl::checkHeader(const llvm::MemoryBuffer &File,
+                                bool &NeedsByteSwap) {
+  if (File.getBufferSize() <= sizeof(HMapHeader))
+    return false;
+  const char *FileStart = File.getBufferStart();
 
   // We know the file is at least as big as the header, check it now.
   const HMapHeader *Header = reinterpret_cast<const HMapHeader*>(FileStart);
 
   // Sniff it to see if it's a headermap by checking the magic number and
   // version.
-  bool NeedsByteSwap;
   if (Header->Magic == HMAP_HeaderMagicNumber &&
       Header->Version == HMAP_HeaderVersion)
     NeedsByteSwap = false;
@@ -98,12 +80,24 @@ const HeaderMap *HeaderMap::Create(const FileEntry *FE, FileManager &FM) {
            Header->Version == llvm::ByteSwap_16(HMAP_HeaderVersion))
     NeedsByteSwap = true;  // Mixed endianness headermap.
   else
-    return nullptr;  // Not a header map.
-
-  if (Header->Reserved != 0) return nullptr;
-
-  // Okay, everything looks good, create the header map.
-  return new HeaderMap(std::move(*FileBuffer), NeedsByteSwap);
+    return false;  // Not a header map.
+
+  if (Header->Reserved != 0)
+    return false;
+
+  // Check the number of buckets.  It should be a power of two, and there
+  // should be enough space in the file for all of them.
+  uint32_t NumBuckets = NeedsByteSwap
+                            ? llvm::sys::getSwappedBytes(Header->NumBuckets)
+                            : Header->NumBuckets;
+  if (!llvm::isPowerOf2_32(NumBuckets))
+    return false;
+  if (File.getBufferSize() <
+      sizeof(HMapHeader) + sizeof(HMapBucket) * NumBuckets)
+    return false;
+
+  // Okay, everything looks good.
+  return true;
 }
 
 //===----------------------------------------------------------------------===//
@@ -112,18 +106,18 @@ const HeaderMap *HeaderMap::Create(const FileEntry *FE, FileManager &FM) {
 
 
 /// getFileName - Return the filename of the headermap.
-const char *HeaderMap::getFileName() const {
+const char *HeaderMapImpl::getFileName() const {
   return FileBuffer->getBufferIdentifier();
 }
 
-unsigned HeaderMap::getEndianAdjustedWord(unsigned X) const {
+unsigned HeaderMapImpl::getEndianAdjustedWord(unsigned X) const {
   if (!NeedsBSwap) return X;
   return llvm::ByteSwap_32(X);
 }
 
 /// getHeader - Return a reference to the file header, in unbyte-swapped form.
 /// This method cannot fail.
-const HMapHeader &HeaderMap::getHeader() const {
+const HMapHeader &HeaderMapImpl::getHeader() const {
   // We know the file is at least as big as the header.  Return it.
   return *reinterpret_cast<const HMapHeader*>(FileBuffer->getBufferStart());
 }
@@ -131,42 +125,43 @@ const HMapHeader &HeaderMap::getHeader() const {
 /// getBucket - Return the specified hash table bucket from the header map,
 /// bswap'ing its fields as appropriate.  If the bucket number is not valid,
 /// this return a bucket with an empty key (0).
-HMapBucket HeaderMap::getBucket(unsigned BucketNo) const {
+HMapBucket HeaderMapImpl::getBucket(unsigned BucketNo) const {
+  assert(FileBuffer->getBufferSize() >=
+             sizeof(HMapHeader) + sizeof(HMapBucket) * BucketNo &&
+         "Expected bucket to be in range");
+
   HMapBucket Result;
   Result.Key = HMAP_EmptyBucketKey;
 
   const HMapBucket *BucketArray =
     reinterpret_cast<const HMapBucket*>(FileBuffer->getBufferStart() +
                                         sizeof(HMapHeader));
-
   const HMapBucket *BucketPtr = BucketArray+BucketNo;
-  if ((const char*)(BucketPtr+1) > FileBuffer->getBufferEnd()) {
-    Result.Prefix = 0;
-    Result.Suffix = 0;
-    return Result;  // Invalid buffer, corrupt hmap.
-  }
 
-  // Otherwise, the bucket is valid.  Load the values, bswapping as needed.
+  // Load the values, bswapping as needed.
   Result.Key    = getEndianAdjustedWord(BucketPtr->Key);
   Result.Prefix = getEndianAdjustedWord(BucketPtr->Prefix);
   Result.Suffix = getEndianAdjustedWord(BucketPtr->Suffix);
   return Result;
 }
 
-/// getString - Look up the specified string in the string table.  If the string
-/// index is not valid, it returns an empty string.
-const char *HeaderMap::getString(unsigned StrTabIdx) const {
+Optional<StringRef> HeaderMapImpl::getString(unsigned StrTabIdx) const {
   // Add the start of the string table to the idx.
   StrTabIdx += getEndianAdjustedWord(getHeader().StringsOffset);
 
   // Check for invalid index.
   if (StrTabIdx >= FileBuffer->getBufferSize())
-    return nullptr;
+    return None;
 
-  // Otherwise, we have a valid pointer into the file.  Just return it.  We know
-  // that the "string" can not overrun the end of the file, because the buffer
-  // is nul terminated by virtue of being a MemoryBuffer.
-  return FileBuffer->getBufferStart()+StrTabIdx;
+  const char *Data = FileBuffer->getBufferStart() + StrTabIdx;
+  unsigned MaxLen = FileBuffer->getBufferSize() - StrTabIdx;
+  unsigned Len = strnlen(Data, MaxLen);
+
+  // Check whether the buffer is null-terminated.
+  if (Len == MaxLen && Data[Len - 1])
+    return None;
+
+  return StringRef(Data, Len);
 }
 
 //===----------------------------------------------------------------------===//
@@ -174,22 +169,28 @@ const char *HeaderMap::getString(unsigned StrTabIdx) const {
 //===----------------------------------------------------------------------===//
 
 /// dump - Print the contents of this headermap to stderr.
-void HeaderMap::dump() const {
+LLVM_DUMP_METHOD void HeaderMapImpl::dump() const {
   const HMapHeader &Hdr = getHeader();
   unsigned NumBuckets = getEndianAdjustedWord(Hdr.NumBuckets);
 
-  fprintf(stderr, "Header Map %s:\n  %d buckets, %d entries\n",
-          getFileName(), NumBuckets,
-          getEndianAdjustedWord(Hdr.NumEntries));
+  llvm::dbgs() << "Header Map " << getFileName() << ":\n  " << NumBuckets
+               << ", " << getEndianAdjustedWord(Hdr.NumEntries) << "\n";
+
+  auto getStringOrInvalid = [this](unsigned Id) -> StringRef {
+    if (Optional<StringRef> S = getString(Id))
+      return *S;
+    return "<invalid>";
+  };
 
   for (unsigned i = 0; i != NumBuckets; ++i) {
     HMapBucket B = getBucket(i);
     if (B.Key == HMAP_EmptyBucketKey) continue;
 
-    const char *Key    = getString(B.Key);
-    const char *Prefix = getString(B.Prefix);
-    const char *Suffix = getString(B.Suffix);
-    fprintf(stderr, "  %d. %s -> '%s' '%s'\n", i, Key, Prefix, Suffix);
+    StringRef Key = getStringOrInvalid(B.Key);
+    StringRef Prefix = getStringOrInvalid(B.Prefix);
+    StringRef Suffix = getStringOrInvalid(B.Suffix);
+    llvm::dbgs() << "  " << i << ". " << Key << " -> '" << Prefix << "' '"
+                 << Suffix << "'\n";
   }
 }
 
@@ -199,22 +200,20 @@ const FileEntry *HeaderMap::LookupFile(
     StringRef Filename, FileManager &FM) const {
 
   SmallString<1024> Path;
-  StringRef Dest = lookupFilename(Filename, Path);
+  StringRef Dest = HeaderMapImpl::lookupFilename(Filename, Path);
   if (Dest.empty())
     return nullptr;
 
   return FM.getFile(Dest);
 }
 
-StringRef HeaderMap::lookupFilename(StringRef Filename,
-                                    SmallVectorImpl<char> &DestPath) const {
+StringRef HeaderMapImpl::lookupFilename(StringRef Filename,
+                                        SmallVectorImpl<char> &DestPath) const {
   const HMapHeader &Hdr = getHeader();
   unsigned NumBuckets = getEndianAdjustedWord(Hdr.NumBuckets);
 
-  // If the number of buckets is not a power of two, the headermap is corrupt.
-  // Don't probe infinitely.
-  if (NumBuckets & (NumBuckets-1))
-    return StringRef();
+  // Don't probe infinitely.  This should be checked before constructing.
+  assert(llvm::isPowerOf2_32(NumBuckets) && "Expected power of 2");
 
   // Linearly probe the hash table.
   for (unsigned Bucket = HashHMapKey(Filename);; ++Bucket) {
@@ -222,16 +221,22 @@ StringRef HeaderMap::lookupFilename(StringRef Filename,
     if (B.Key == HMAP_EmptyBucketKey) return StringRef(); // Hash miss.
 
     // See if the key matches.  If not, probe on.
-    if (!Filename.equals_lower(getString(B.Key)))
+    Optional<StringRef> Key = getString(B.Key);
+    if (LLVM_UNLIKELY(!Key))
+      continue;
+    if (!Filename.equals_lower(*Key))
       continue;
 
     // If so, we have a match in the hash table.  Construct the destination
     // path.
-    StringRef Prefix = getString(B.Prefix);
-    StringRef Suffix = getString(B.Suffix);
+    Optional<StringRef> Prefix = getString(B.Prefix);
+    Optional<StringRef> Suffix = getString(B.Suffix);
+
     DestPath.clear();
-    DestPath.append(Prefix.begin(), Prefix.end());
-    DestPath.append(Suffix.begin(), Suffix.end());
+    if (LLVM_LIKELY(Prefix && Suffix)) {
+      DestPath.append(Prefix->begin(), Prefix->end());
+      DestPath.append(Suffix->begin(), Suffix->end());
+    }
     return StringRef(DestPath.begin(), DestPath.size());
   }
 }
diff --git a/contrib/llvm/tools/clang/lib/Lex/HeaderSearch.cpp b/contrib/llvm/tools/clang/lib/Lex/HeaderSearch.cpp
index 2d005dd2e1f0..e5cc30e41c57 100644
--- a/contrib/llvm/tools/clang/lib/Lex/HeaderSearch.cpp
+++ b/contrib/llvm/tools/clang/lib/Lex/HeaderSearch.cpp
@@ -14,7 +14,6 @@
 #include "clang/Lex/HeaderSearch.h"
 #include "clang/Basic/FileManager.h"
 #include "clang/Basic/IdentifierTable.h"
-#include "clang/Frontend/PCHContainerOperations.h"
 #include "clang/Lex/ExternalPreprocessorSource.h"
 #include "clang/Lex/HeaderMap.h"
 #include "clang/Lex/HeaderSearchOptions.h"
@@ -29,6 +28,7 @@
 #include "llvm/Support/Path.h"
 #include "llvm/Support/raw_ostream.h"
 #include <cstdio>
+#include <utility>
 #if defined(LLVM_ON_UNIX)
 #include <limits.h>
 #endif
@@ -56,9 +56,9 @@ HeaderSearch::HeaderSearch(IntrusiveRefCntPtr<HeaderSearchOptions> HSOpts,
                            SourceManager &SourceMgr, DiagnosticsEngine &Diags,
                            const LangOptions &LangOpts,
                            const TargetInfo *Target)
-    : HSOpts(HSOpts), Diags(Diags), FileMgr(SourceMgr.getFileManager()),
-      FrameworkMap(64), ModMap(SourceMgr, Diags, LangOpts, Target, *this),
-      LangOpts(LangOpts) {
+    : HSOpts(std::move(HSOpts)), Diags(Diags),
+      FileMgr(SourceMgr.getFileManager()), FrameworkMap(64),
+      ModMap(SourceMgr, Diags, LangOpts, Target, *this) {
   AngledDirIdx = 0;
   SystemDirIdx = 0;
   NoCurDirSearch = false;
@@ -250,8 +250,9 @@ const char *DirectoryLookup::getName() const {
 }
 
 const FileEntry *HeaderSearch::getFileAndSuggestModule(
-    StringRef FileName, const DirectoryEntry *Dir, bool IsSystemHeaderDir,
-    Module *RequestingModule, ModuleMap::KnownHeader *SuggestedModule) {
+    StringRef FileName, SourceLocation IncludeLoc, const DirectoryEntry *Dir,
+    bool IsSystemHeaderDir, Module *RequestingModule,
+    ModuleMap::KnownHeader *SuggestedModule) {
   // If we have a module map that might map this header, load it and
   // check whether we'll have a suggestion for a module.
   const FileEntry *File = getFileMgr().getFile(FileName, /*OpenFile=*/true);
@@ -272,6 +273,7 @@ const FileEntry *HeaderSearch::getFileAndSuggestModule(
 const FileEntry *DirectoryLookup::LookupFile(
     StringRef &Filename,
     HeaderSearch &HS,
+    SourceLocation IncludeLoc,
     SmallVectorImpl<char> *SearchPath,
     SmallVectorImpl<char> *RelativePath,
     Module *RequestingModule,
@@ -297,7 +299,7 @@ const FileEntry *DirectoryLookup::LookupFile(
       RelativePath->append(Filename.begin(), Filename.end());
     }
 
-    return HS.getFileAndSuggestModule(TmpDir, getDir(),
+    return HS.getFileAndSuggestModule(TmpDir, IncludeLoc, getDir(),
                                       isSystemHeaderDirectory(),
                                       RequestingModule, SuggestedModule);
   }
@@ -567,7 +569,7 @@ const FileEntry *HeaderSearch::LookupFile(
     ArrayRef<std::pair<const FileEntry *, const DirectoryEntry *>> Includers,
     SmallVectorImpl<char> *SearchPath, SmallVectorImpl<char> *RelativePath,
     Module *RequestingModule, ModuleMap::KnownHeader *SuggestedModule,
-    bool SkipCache) {
+    bool SkipCache, bool BuildSystemModule) {
   if (SuggestedModule)
     *SuggestedModule = ModuleMap::KnownHeader();
     
@@ -585,7 +587,7 @@ const FileEntry *HeaderSearch::LookupFile(
       RelativePath->append(Filename.begin(), Filename.end());
     }
     // Otherwise, just return the file.
-    return getFileAndSuggestModule(Filename, nullptr,
+    return getFileAndSuggestModule(Filename, IncludeLoc, nullptr,
                                    /*IsSystemHeaderDir*/false,
                                    RequestingModule, SuggestedModule);
   }
@@ -615,13 +617,14 @@ const FileEntry *HeaderSearch::LookupFile(
       // getFileAndSuggestModule, because it's a reference to an element of
       // a container that could be reallocated across this call.
       //
-      // FIXME: If we have no includer, that means we're processing a #include
+      // If we have no includer, that means we're processing a #include
       // from a module build. We should treat this as a system header if we're
       // building a [system] module.
       bool IncluderIsSystemHeader =
-          Includer && getFileInfo(Includer).DirInfo != SrcMgr::C_User;
+          Includer ? getFileInfo(Includer).DirInfo != SrcMgr::C_User :
+          BuildSystemModule;
       if (const FileEntry *FE = getFileAndSuggestModule(
-              TmpDir, IncluderAndDir.second, IncluderIsSystemHeader,
+              TmpDir, IncludeLoc, IncluderAndDir.second, IncluderIsSystemHeader,
               RequestingModule, SuggestedModule)) {
         if (!Includer) {
           assert(First && "only first includer can have no file");
@@ -712,7 +715,7 @@ const FileEntry *HeaderSearch::LookupFile(
     bool InUserSpecifiedSystemFramework = false;
     bool HasBeenMapped = false;
     const FileEntry *FE = SearchDirs[i].LookupFile(
-        Filename, *this, SearchPath, RelativePath, RequestingModule,
+        Filename, *this, IncludeLoc, SearchPath, RelativePath, RequestingModule,
         SuggestedModule, InUserSpecifiedSystemFramework, HasBeenMapped,
         MappedName);
     if (HasBeenMapped) {
@@ -1343,19 +1346,20 @@ void HeaderSearch::collectAllModules(SmallVectorImpl<Module *> &Modules) {
                                 DirNative);
 
         // Search each of the ".framework" directories to load them as modules.
-        for (llvm::sys::fs::directory_iterator Dir(DirNative, EC), DirEnd;
+        vfs::FileSystem &FS = *FileMgr.getVirtualFileSystem();
+        for (vfs::directory_iterator Dir = FS.dir_begin(DirNative, EC), DirEnd;
              Dir != DirEnd && !EC; Dir.increment(EC)) {
-          if (llvm::sys::path::extension(Dir->path()) != ".framework")
+          if (llvm::sys::path::extension(Dir->getName()) != ".framework")
             continue;
 
           const DirectoryEntry *FrameworkDir =
-              FileMgr.getDirectory(Dir->path());
+              FileMgr.getDirectory(Dir->getName());
           if (!FrameworkDir)
             continue;
 
           // Load this framework module.
-          loadFrameworkModule(llvm::sys::path::stem(Dir->path()), FrameworkDir,
-                              IsSystem);
+          loadFrameworkModule(llvm::sys::path::stem(Dir->getName()),
+                              FrameworkDir, IsSystem);
         }
         continue;
       }
@@ -1410,13 +1414,66 @@ void HeaderSearch::loadSubdirectoryModuleMaps(DirectoryLookup &SearchDir) {
   std::error_code EC;
   SmallString<128> DirNative;
   llvm::sys::path::native(SearchDir.getDir()->getName(), DirNative);
-  for (llvm::sys::fs::directory_iterator Dir(DirNative, EC), DirEnd;
+  vfs::FileSystem &FS = *FileMgr.getVirtualFileSystem();
+  for (vfs::directory_iterator Dir = FS.dir_begin(DirNative, EC), DirEnd;
        Dir != DirEnd && !EC; Dir.increment(EC)) {
-    bool IsFramework = llvm::sys::path::extension(Dir->path()) == ".framework";
+    bool IsFramework =
+        llvm::sys::path::extension(Dir->getName()) == ".framework";
     if (IsFramework == SearchDir.isFramework())
-      loadModuleMapFile(Dir->path(), SearchDir.isSystemHeaderDirectory(),
+      loadModuleMapFile(Dir->getName(), SearchDir.isSystemHeaderDirectory(),
                         SearchDir.isFramework());
   }
 
   SearchDir.setSearchedAllModuleMaps(true);
 }
+
+std::string HeaderSearch::suggestPathToFileForDiagnostics(const FileEntry *File,
+                                                          bool *IsSystem) {
+  // FIXME: We assume that the path name currently cached in the FileEntry is
+  // the most appropriate one for this analysis (and that it's spelled the same
+  // way as the corresponding header search path).
+  const char *Name = File->getName();
+
+  unsigned BestPrefixLength = 0;
+  unsigned BestSearchDir;
+
+  for (unsigned I = 0; I != SearchDirs.size(); ++I) {
+    // FIXME: Support this search within frameworks and header maps.
+    if (!SearchDirs[I].isNormalDir())
+      continue;
+
+    const char *Dir = SearchDirs[I].getDir()->getName();
+    for (auto NI = llvm::sys::path::begin(Name),
+              NE = llvm::sys::path::end(Name),
+              DI = llvm::sys::path::begin(Dir),
+              DE = llvm::sys::path::end(Dir);
+         /*termination condition in loop*/; ++NI, ++DI) {
+      // '.' components in Name are ignored.
+      while (NI != NE && *NI == ".")
+        ++NI;
+      if (NI == NE)
+        break;
+
+      // '.' components in Dir are ignored.
+      while (DI != DE && *DI == ".")
+        ++DI;
+      if (DI == DE) {
+        // Dir is a prefix of Name, up to '.' components and choice of path
+        // separators.
+        unsigned PrefixLength = NI - llvm::sys::path::begin(Name);
+        if (PrefixLength > BestPrefixLength) {
+          BestPrefixLength = PrefixLength;
+          BestSearchDir = I;
+        }
+        break;
+      }
+
+      if (*NI != *DI)
+        break;
+    }
+  }
+
+  if (IsSystem)
+    *IsSystem = BestPrefixLength ? BestSearchDir >= SystemDirIdx : false;
+  return Name + BestPrefixLength;
+}
diff --git a/contrib/llvm/tools/clang/lib/Lex/Lexer.cpp b/contrib/llvm/tools/clang/lib/Lex/Lexer.cpp
index 27b0feb48270..9c2a0163acea 100644
--- a/contrib/llvm/tools/clang/lib/Lex/Lexer.cpp
+++ b/contrib/llvm/tools/clang/lib/Lex/Lexer.cpp
@@ -719,7 +719,9 @@ SourceLocation Lexer::AdvanceToTokenCharacter(SourceLocation TokStart,
   while (Lexer::isObviouslySimpleCharacter(*TokPtr)) {
     if (CharNo == 0)
       return TokStart.getLocWithOffset(PhysOffset);
-    ++TokPtr, --CharNo, ++PhysOffset;
+    ++TokPtr;
+    --CharNo;
+    ++PhysOffset;
   }
   
   // If we have a character that may be a trigraph or escaped newline, use a
@@ -1000,6 +1002,31 @@ StringRef Lexer::getImmediateMacroName(SourceLocation Loc,
   return ExpansionBuffer.substr(ExpansionInfo.second, MacroTokenLength);
 }
 
+StringRef Lexer::getImmediateMacroNameForDiagnostics(
+    SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts) {
+  assert(Loc.isMacroID() && "Only reasonble to call this on macros");
+  // Walk past macro argument expanions.
+  while (SM.isMacroArgExpansion(Loc))
+    Loc = SM.getImmediateExpansionRange(Loc).first;
+
+  // If the macro's spelling has no FileID, then it's actually a token paste
+  // or stringization (or similar) and not a macro at all.
+  if (!SM.getFileEntryForID(SM.getFileID(SM.getSpellingLoc(Loc))))
+    return StringRef();
+
+  // Find the spelling location of the start of the non-argument expansion
+  // range. This is where the macro name was spelled in order to begin
+  // expanding this macro.
+  Loc = SM.getSpellingLoc(SM.getImmediateExpansionRange(Loc).first);
+
+  // Dig out the buffer where the macro name was spelled and the extents of the
+  // name so that we can render it into the expansion note.
+  std::pair<FileID, unsigned> ExpansionInfo = SM.getDecomposedLoc(Loc);
+  unsigned MacroTokenLength = Lexer::MeasureTokenLength(Loc, SM, LangOpts);
+  StringRef ExpansionBuffer = SM.getBufferData(ExpansionInfo.first);
+  return ExpansionBuffer.substr(ExpansionInfo.second, MacroTokenLength);
+}
+
 bool Lexer::isIdentifierBodyChar(char c, const LangOptions &LangOpts) {
   return isIdentifierBody(c, LangOpts.DollarIdents);
 }
@@ -1580,14 +1607,15 @@ bool Lexer::LexNumericConstant(Token &Result, const char *CurPtr) {
 
   // If we have a hex FP constant, continue.
   if ((C == '-' || C == '+') && (PrevCh == 'P' || PrevCh == 'p')) {
-    // Outside C99, we accept hexadecimal floating point numbers as a
+    // Outside C99 and C++17, we accept hexadecimal floating point numbers as a
     // not-quite-conforming extension. Only do so if this looks like it's
     // actually meant to be a hexfloat, and not if it has a ud-suffix.
     bool IsHexFloat = true;
     if (!LangOpts.C99) {
       if (!isHexaLiteral(BufferPtr, LangOpts))
         IsHexFloat = false;
-      else if (std::find(BufferPtr, CurPtr, '_') != CurPtr)
+      else if (!getLangOpts().CPlusPlus1z &&
+               std::find(BufferPtr, CurPtr, '_') != CurPtr)
         IsHexFloat = false;
     }
     if (IsHexFloat)
@@ -2582,7 +2610,7 @@ static const char *FindConflictEnd(const char *CurPtr, const char *BufferEnd,
                                    ConflictMarkerKind CMK) {
   const char *Terminator = CMK == CMK_Perforce ? "<<<<\n" : ">>>>>>>";
   size_t TermLen = CMK == CMK_Perforce ? 5 : 7;
-  StringRef RestOfBuffer(CurPtr+TermLen, BufferEnd-CurPtr-TermLen);
+  auto RestOfBuffer = StringRef(CurPtr, BufferEnd - CurPtr).substr(TermLen);
   size_t Pos = RestOfBuffer.find(Terminator);
   while (Pos != StringRef::npos) {
     // Must occur at start of line.
@@ -2608,8 +2636,8 @@ bool Lexer::IsStartOfConflictMarker(const char *CurPtr) {
     return false;
   
   // Check to see if we have <<<<<<< or >>>>.
-  if ((BufferEnd-CurPtr < 8 || StringRef(CurPtr, 7) != "<<<<<<<") &&
-      (BufferEnd-CurPtr < 6 || StringRef(CurPtr, 5) != ">>>> "))
+  if (!StringRef(CurPtr, BufferEnd - CurPtr).startswith("<<<<<<<") &&
+      !StringRef(CurPtr, BufferEnd - CurPtr).startswith(">>>> "))
     return false;
 
   // If we have a situation where we don't care about conflict markers, ignore
@@ -3480,6 +3508,9 @@ LexNextToken:
     if (Char == '=') {
       CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
       Kind = tok::caretequal;
+    } else if (LangOpts.OpenCL && Char == '^') {
+      CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
+      Kind = tok::caretcaret;
     } else {
       Kind = tok::caret;
     }
diff --git a/contrib/llvm/tools/clang/lib/Lex/LiteralSupport.cpp b/contrib/llvm/tools/clang/lib/Lex/LiteralSupport.cpp
index 5b1c49344e8d..e68b82fb499a 100644
--- a/contrib/llvm/tools/clang/lib/Lex/LiteralSupport.cpp
+++ b/contrib/llvm/tools/clang/lib/Lex/LiteralSupport.cpp
@@ -522,8 +522,10 @@ NumericLiteralParser::NumericLiteralParser(StringRef TokSpelling,
   isLong = false;
   isUnsigned = false;
   isLongLong = false;
+  isHalf = false;
   isFloat = false;
   isImaginary = false;
+  isFloat128 = false;
   MicrosoftInteger = 0;
   hadError = false;
 
@@ -536,34 +538,10 @@ NumericLiteralParser::NumericLiteralParser(StringRef TokSpelling,
     s = SkipDigits(s);
     if (s == ThisTokEnd) {
       // Done.
-    } else if (isHexDigit(*s) && !(*s == 'e' || *s == 'E')) {
-      PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, s - ThisTokBegin),
-              diag::err_invalid_digit) << StringRef(s, 1) << 0;
-      hadError = true;
-      return;
-    } else if (*s == '.') {
-      checkSeparator(TokLoc, s, CSK_AfterDigits);
-      s++;
-      saw_period = true;
-      checkSeparator(TokLoc, s, CSK_BeforeDigits);
-      s = SkipDigits(s);
-    }
-    if ((*s == 'e' || *s == 'E')) { // exponent
-      checkSeparator(TokLoc, s, CSK_AfterDigits);
-      const char *Exponent = s;
-      s++;
-      saw_exponent = true;
-      if (*s == '+' || *s == '-')  s++; // sign
-      checkSeparator(TokLoc, s, CSK_BeforeDigits);
-      const char *first_non_digit = SkipDigits(s);
-      if (first_non_digit != s) {
-        s = first_non_digit;
-      } else {
-        PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, Exponent - ThisTokBegin),
-                diag::err_exponent_has_no_digits);
-        hadError = true;
+    } else {
+      ParseDecimalOrOctalCommon(TokLoc);
+      if (hadError)
         return;
-      }
     }
   }
 
@@ -579,12 +557,28 @@ NumericLiteralParser::NumericLiteralParser(StringRef TokSpelling,
   // we break out of the loop.
   for (; s != ThisTokEnd; ++s) {
     switch (*s) {
+    case 'h':      // FP Suffix for "half".
+    case 'H':
+      // OpenCL Extension v1.2 s9.5 - h or H suffix for half type.
+      if (!PP.getLangOpts().Half) break;
+      if (!isFPConstant) break;  // Error for integer constant.
+      if (isHalf || isFloat || isLong) break; // HH, FH, LH invalid.
+      isHalf = true;
+      continue;  // Success.
     case 'f':      // FP Suffix for "float"
     case 'F':
       if (!isFPConstant) break;  // Error for integer constant.
-      if (isFloat || isLong) break; // FF, LF invalid.
+      if (isHalf || isFloat || isLong || isFloat128)
+        break; // HF, FF, LF, QF invalid.
       isFloat = true;
       continue;  // Success.
+    case 'q':    // FP Suffix for "__float128"
+    case 'Q':
+      if (!isFPConstant) break;  // Error for integer constant.
+      if (isHalf || isFloat || isLong || isFloat128)
+        break; // HQ, FQ, LQ, QQ invalid.
+      isFloat128 = true;
+      continue;  // Success.
     case 'u':
     case 'U':
       if (isFPConstant) break;  // Error for floating constant.
@@ -594,7 +588,7 @@ NumericLiteralParser::NumericLiteralParser(StringRef TokSpelling,
     case 'l':
     case 'L':
       if (isLong || isLongLong) break;  // Cannot be repeated.
-      if (isFloat) break;               // LF invalid.
+      if (isHalf || isFloat || isFloat128) break;     // LH, LF, LQ invalid.
 
       // Check for long long.  The L's need to be adjacent and the same case.
       if (s[1] == s[0]) {
@@ -671,6 +665,7 @@ NumericLiteralParser::NumericLiteralParser(StringRef TokSpelling,
       isUnsigned = false;
       isLongLong = false;
       isFloat = false;
+      isHalf = false;
       isImaginary = false;
       MicrosoftInteger = 0;
 
@@ -693,6 +688,49 @@ NumericLiteralParser::NumericLiteralParser(StringRef TokSpelling,
   }
 }
 
+/// ParseDecimalOrOctalCommon - This method is called for decimal or octal
+/// numbers. It issues an error for illegal digits, and handles floating point
+/// parsing. If it detects a floating point number, the radix is set to 10.
+void NumericLiteralParser::ParseDecimalOrOctalCommon(SourceLocation TokLoc){
+  assert((radix == 8 || radix == 10) && "Unexpected radix");
+
+  // If we have a hex digit other than 'e' (which denotes a FP exponent) then
+  // the code is using an incorrect base.
+  if (isHexDigit(*s) && *s != 'e' && *s != 'E') {
+    PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, s-ThisTokBegin),
+            diag::err_invalid_digit) << StringRef(s, 1) << (radix == 8 ? 1 : 0);
+    hadError = true;
+    return;
+  }
+
+  if (*s == '.') {
+    checkSeparator(TokLoc, s, CSK_AfterDigits);
+    s++;
+    radix = 10;
+    saw_period = true;
+    checkSeparator(TokLoc, s, CSK_BeforeDigits);
+    s = SkipDigits(s); // Skip suffix.
+  }
+  if (*s == 'e' || *s == 'E') { // exponent
+    checkSeparator(TokLoc, s, CSK_AfterDigits);
+    const char *Exponent = s;
+    s++;
+    radix = 10;
+    saw_exponent = true;
+    if (*s == '+' || *s == '-')  s++; // sign
+    const char *first_non_digit = SkipDigits(s);
+    if (containsDigits(s, first_non_digit)) {
+      checkSeparator(TokLoc, s, CSK_BeforeDigits);
+      s = first_non_digit;
+    } else {
+      PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, Exponent-ThisTokBegin),
+              diag::err_exponent_has_no_digits);
+      hadError = true;
+      return;
+    }
+  }
+}
+
 /// Determine whether a suffix is a valid ud-suffix. We avoid treating reserved
 /// suffixes as ud-suffixes, because the diagnostic experience is better if we
 /// treat it as an invalid suffix.
@@ -752,21 +790,24 @@ void NumericLiteralParser::ParseNumberStartingWithZero(SourceLocation TokLoc) {
     radix = 16;
     DigitsBegin = s;
     s = SkipHexDigits(s);
-    bool noSignificand = (s == DigitsBegin);
+    bool HasSignificandDigits = containsDigits(DigitsBegin, s);
     if (s == ThisTokEnd) {
       // Done.
     } else if (*s == '.') {
       s++;
       saw_period = true;
       const char *floatDigitsBegin = s;
-      checkSeparator(TokLoc, s, CSK_BeforeDigits);
       s = SkipHexDigits(s);
-      noSignificand &= (floatDigitsBegin == s);
+      if (containsDigits(floatDigitsBegin, s))
+        HasSignificandDigits = true;
+      if (HasSignificandDigits)
+        checkSeparator(TokLoc, floatDigitsBegin, CSK_BeforeDigits);
     }
 
-    if (noSignificand) {
+    if (!HasSignificandDigits) {
       PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, s - ThisTokBegin),
-        diag::err_hexconstant_requires) << 1;
+              diag::err_hex_constant_requires)
+          << PP.getLangOpts().CPlusPlus << 1;
       hadError = true;
       return;
     }
@@ -780,7 +821,7 @@ void NumericLiteralParser::ParseNumberStartingWithZero(SourceLocation TokLoc) {
       saw_exponent = true;
       if (*s == '+' || *s == '-')  s++; // sign
       const char *first_non_digit = SkipDigits(s);
-      if (first_non_digit == s) {
+      if (!containsDigits(s, first_non_digit)) {
         PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, Exponent-ThisTokBegin),
                 diag::err_exponent_has_no_digits);
         hadError = true;
@@ -790,10 +831,15 @@ void NumericLiteralParser::ParseNumberStartingWithZero(SourceLocation TokLoc) {
       s = first_non_digit;
 
       if (!PP.getLangOpts().HexFloats)
-        PP.Diag(TokLoc, diag::ext_hexconstant_invalid);
+        PP.Diag(TokLoc, PP.getLangOpts().CPlusPlus
+                            ? diag::ext_hex_literal_invalid
+                            : diag::ext_hex_constant_invalid);
+      else if (PP.getLangOpts().CPlusPlus1z)
+        PP.Diag(TokLoc, diag::warn_cxx1z_hex_literal);
     } else if (saw_period) {
-      PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, s-ThisTokBegin),
-              diag::err_hexconstant_requires) << 0;
+      PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, s - ThisTokBegin),
+              diag::err_hex_constant_requires)
+          << PP.getLangOpts().CPlusPlus << 0;
       hadError = true;
     }
     return;
@@ -843,40 +889,7 @@ void NumericLiteralParser::ParseNumberStartingWithZero(SourceLocation TokLoc) {
     }
   }
 
-  // If we have a hex digit other than 'e' (which denotes a FP exponent) then
-  // the code is using an incorrect base.
-  if (isHexDigit(*s) && *s != 'e' && *s != 'E') {
-    PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, s-ThisTokBegin),
-            diag::err_invalid_digit) << StringRef(s, 1) << 1;
-    hadError = true;
-    return;
-  }
-
-  if (*s == '.') {
-    s++;
-    radix = 10;
-    saw_period = true;
-    checkSeparator(TokLoc, s, CSK_BeforeDigits);
-    s = SkipDigits(s); // Skip suffix.
-  }
-  if (*s == 'e' || *s == 'E') { // exponent
-    checkSeparator(TokLoc, s, CSK_AfterDigits);
-    const char *Exponent = s;
-    s++;
-    radix = 10;
-    saw_exponent = true;
-    if (*s == '+' || *s == '-')  s++; // sign
-    const char *first_non_digit = SkipDigits(s);
-    if (first_non_digit != s) {
-      checkSeparator(TokLoc, s, CSK_BeforeDigits);
-      s = first_non_digit;
-    } else {
-      PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, Exponent-ThisTokBegin),
-              diag::err_exponent_has_no_digits);
-      hadError = true;
-      return;
-    }
-  }
+  ParseDecimalOrOctalCommon(TokLoc);
 }
 
 static bool alwaysFitsInto64Bits(unsigned Radix, unsigned NumDigits) {
diff --git a/contrib/llvm/tools/clang/lib/Lex/MacroInfo.cpp b/contrib/llvm/tools/clang/lib/Lex/MacroInfo.cpp
index 0b4292fbeae5..2ef4387b99ba 100644
--- a/contrib/llvm/tools/clang/lib/Lex/MacroInfo.cpp
+++ b/contrib/llvm/tools/clang/lib/Lex/MacroInfo.cpp
@@ -126,7 +126,7 @@ bool MacroInfo::isIdenticalTo(const MacroInfo &Other, Preprocessor &PP,
   return true;
 }
 
-void MacroInfo::dump() const {
+LLVM_DUMP_METHOD void MacroInfo::dump() const {
   llvm::raw_ostream &Out = llvm::errs();
 
   // FIXME: Dump locations.
@@ -209,7 +209,7 @@ MacroDirective::findDirectiveAtLoc(SourceLocation L, SourceManager &SM) const {
   return DefInfo();
 }
 
-void MacroDirective::dump() const {
+LLVM_DUMP_METHOD void MacroDirective::dump() const {
   llvm::raw_ostream &Out = llvm::errs();
 
   switch (getKind()) {
diff --git a/contrib/llvm/tools/clang/lib/Lex/ModuleMap.cpp b/contrib/llvm/tools/clang/lib/Lex/ModuleMap.cpp
index a7524028a229..3e3215dee82a 100644
--- a/contrib/llvm/tools/clang/lib/Lex/ModuleMap.cpp
+++ b/contrib/llvm/tools/clang/lib/Lex/ModuleMap.cpp
@@ -89,16 +89,13 @@ ModuleMap::ModuleMap(SourceManager &SourceMgr, DiagnosticsEngine &Diags,
                      HeaderSearch &HeaderInfo)
     : SourceMgr(SourceMgr), Diags(Diags), LangOpts(LangOpts), Target(Target),
       HeaderInfo(HeaderInfo), BuiltinIncludeDir(nullptr),
-      CompilingModule(nullptr), SourceModule(nullptr), NumCreatedModules(0) {
+      SourceModule(nullptr), NumCreatedModules(0) {
   MMapLangOpts.LineComment = true;
 }
 
 ModuleMap::~ModuleMap() {
-  for (llvm::StringMap<Module *>::iterator I = Modules.begin(), 
-                                        IEnd = Modules.end();
-       I != IEnd; ++I) {
-    delete I->getValue();
-  }
+  for (auto &M : Modules)
+    delete M.getValue();
 }
 
 void ModuleMap::setTarget(const TargetInfo &Target) {
@@ -154,6 +151,7 @@ static bool isBuiltinHeader(StringRef FileName) {
            .Case("limits.h", true)
            .Case("stdalign.h", true)
            .Case("stdarg.h", true)
+           .Case("stdatomic.h", true)
            .Case("stdbool.h", true)
            .Case("stddef.h", true)
            .Case("stdint.h", true)
@@ -211,29 +209,25 @@ ModuleMap::findHeaderInUmbrellaDirs(const FileEntry *File,
 
 static bool violatesPrivateInclude(Module *RequestingModule,
                                    const FileEntry *IncFileEnt,
-                                   ModuleMap::ModuleHeaderRole Role,
-                                   Module *RequestedModule) {
-  bool IsPrivateRole = Role & ModuleMap::PrivateHeader;
+                                   ModuleMap::KnownHeader Header) {
 #ifndef NDEBUG
-  if (IsPrivateRole) {
+  if (Header.getRole() & ModuleMap::PrivateHeader) {
     // Check for consistency between the module header role
     // as obtained from the lookup and as obtained from the module.
     // This check is not cheap, so enable it only for debugging.
     bool IsPrivate = false;
     SmallVectorImpl<Module::Header> *HeaderList[] = {
-        &RequestedModule->Headers[Module::HK_Private],
-        &RequestedModule->Headers[Module::HK_PrivateTextual]};
+        &Header.getModule()->Headers[Module::HK_Private],
+        &Header.getModule()->Headers[Module::HK_PrivateTextual]};
     for (auto *Hs : HeaderList)
       IsPrivate |=
           std::find_if(Hs->begin(), Hs->end(), [&](const Module::Header &H) {
             return H.Entry == IncFileEnt;
           }) != Hs->end();
-    assert((!IsPrivateRole || IsPrivate) && "inconsistent headers and roles");
+    assert(IsPrivate && "inconsistent headers and roles");
   }
 #endif
-  return IsPrivateRole && (!RequestingModule ||
-                           RequestedModule->getTopLevelModule() !=
-                               RequestingModule->getTopLevelModule());
+  return !Header.isAccessibleFrom(RequestingModule);
 }
 
 static Module *getTopLevelOrNull(Module *M) {
@@ -241,6 +235,7 @@ static Module *getTopLevelOrNull(Module *M) {
 }
 
 void ModuleMap::diagnoseHeaderInclusion(Module *RequestingModule,
+                                        bool RequestingModuleIsModuleInterface,
                                         SourceLocation FilenameLoc,
                                         StringRef Filename,
                                         const FileEntry *File) {
@@ -260,8 +255,7 @@ void ModuleMap::diagnoseHeaderInclusion(Module *RequestingModule,
   if (Known != Headers.end()) {
     for (const KnownHeader &Header : Known->second) {
       // Remember private headers for later printing of a diagnostic.
-      if (violatesPrivateInclude(RequestingModule, File, Header.getRole(),
-                                 Header.getModule())) {
+      if (violatesPrivateInclude(RequestingModule, File, Header)) {
         Private = Header.getModule();
         continue;
       }
@@ -303,7 +297,7 @@ void ModuleMap::diagnoseHeaderInclusion(Module *RequestingModule,
   if (LangOpts.ModulesStrictDeclUse) {
     Diags.Report(FilenameLoc, diag::err_undeclared_use_of_module)
         << RequestingModule->getFullModuleName() << Filename;
-  } else if (RequestingModule) {
+  } else if (RequestingModule && RequestingModuleIsModuleInterface) {
     diag::kind DiagID = RequestingModule->getTopLevelModule()->IsFramework ?
         diag::warn_non_modular_include_in_framework_module :
         diag::warn_non_modular_include_in_module;
@@ -343,8 +337,8 @@ ModuleMap::KnownHeader ModuleMap::findModuleForHeader(const FileEntry *File) {
     ModuleMap::KnownHeader Result;
     // Iterate over all modules that 'File' is part of to find the best fit.
     for (KnownHeader &H : Known->second) {
-      // Prefer a header from the current module over all others.
-      if (H.getModule()->getTopLevelModule() == CompilingModule)
+      // Prefer a header from the source module over all others.
+      if (H.getModule()->getTopLevelModule() == SourceModule)
         return MakeResult(H);
       if (!Result || isBetterKnownHeader(H, Result))
         Result = H;
@@ -556,16 +550,10 @@ ModuleMap::findOrCreateModule(StringRef Name, Module *Parent, bool IsFramework,
   // Create a new module with this name.
   Module *Result = new Module(Name, SourceLocation(), Parent,
                               IsFramework, IsExplicit, NumCreatedModules++);
-  if (LangOpts.CurrentModule == Name) {
-    SourceModule = Result;
-    SourceModuleName = Name;
-  }
   if (!Parent) {
+    if (LangOpts.CurrentModule == Name)
+      SourceModule = Result;
     Modules[Name] = Result;
-    if (!LangOpts.CurrentModule.empty() && !CompilingModule &&
-        Name == LangOpts.CurrentModule) {
-      CompilingModule = Result;
-    }
   }
   return std::make_pair(Result, true);
 }
@@ -693,9 +681,10 @@ Module *ModuleMap::inferFrameworkModule(const DirectoryEntry *FrameworkDir,
                               NumCreatedModules++);
   InferredModuleAllowedBy[Result] = ModuleMapFile;
   Result->IsInferred = true;
-  if (LangOpts.CurrentModule == ModuleName) {
-    SourceModule = Result;
-    SourceModuleName = ModuleName;
+  if (!Parent) {
+    if (LangOpts.CurrentModule == ModuleName)
+      SourceModule = Result;
+    Modules[ModuleName] = Result;
   }
 
   Result->IsSystem |= Attrs.IsSystem;
@@ -703,9 +692,6 @@ Module *ModuleMap::inferFrameworkModule(const DirectoryEntry *FrameworkDir,
   Result->ConfigMacrosExhaustive |= Attrs.IsExhaustive;
   Result->Directory = FrameworkDir;
 
-  if (!Parent)
-    Modules[ModuleName] = Result;
-  
   // umbrella header "umbrella-header-name"
   //
   // The "Headers/" component of the name is implied because this is
@@ -725,13 +711,15 @@ Module *ModuleMap::inferFrameworkModule(const DirectoryEntry *FrameworkDir,
     = StringRef(FrameworkDir->getName());
   llvm::sys::path::append(SubframeworksDirName, "Frameworks");
   llvm::sys::path::native(SubframeworksDirName);
-  for (llvm::sys::fs::directory_iterator Dir(SubframeworksDirName, EC), DirEnd;
+  vfs::FileSystem &FS = *FileMgr.getVirtualFileSystem();
+  for (vfs::directory_iterator Dir = FS.dir_begin(SubframeworksDirName, EC),
+                               DirEnd;
        Dir != DirEnd && !EC; Dir.increment(EC)) {
-    if (!StringRef(Dir->path()).endswith(".framework"))
+    if (!StringRef(Dir->getName()).endswith(".framework"))
       continue;
 
-    if (const DirectoryEntry *SubframeworkDir
-          = FileMgr.getDirectory(Dir->path())) {
+    if (const DirectoryEntry *SubframeworkDir =
+            FileMgr.getDirectory(Dir->getName())) {
       // Note: as an egregious but useful hack, we use the real path here and
       // check whether it is actually a subdirectory of the parent directory.
       // This will not be the case if the 'subframework' is actually a symlink
@@ -774,6 +762,10 @@ void ModuleMap::setUmbrellaHeader(Module *Mod, const FileEntry *UmbrellaHeader,
   Mod->Umbrella = UmbrellaHeader;
   Mod->UmbrellaAsWritten = NameAsWritten.str();
   UmbrellaDirs[UmbrellaHeader->getDir()] = Mod;
+
+  // Notify callbacks that we just added a new header.
+  for (const auto &Cb : Callbacks)
+    Cb->moduleMapAddUmbrellaHeader(&SourceMgr.getFileManager(), UmbrellaHeader);
 }
 
 void ModuleMap::setUmbrellaDir(Module *Mod, const DirectoryEntry *UmbrellaDir,
@@ -812,13 +804,18 @@ void ModuleMap::addHeader(Module *Mod, Module::Header Header,
   HeaderList.push_back(KH);
   Mod->Headers[headerRoleToKind(Role)].push_back(std::move(Header));
 
-  bool isCompilingModuleHeader = Mod->getTopLevelModule() == CompilingModule;
+  bool isCompilingModuleHeader =
+      LangOpts.CompilingModule && Mod->getTopLevelModule() == SourceModule;
   if (!Imported || isCompilingModuleHeader) {
     // When we import HeaderFileInfo, the external source is expected to
     // set the isModuleHeader flag itself.
     HeaderInfo.MarkFileModuleHeader(Header.Entry, Role,
                                     isCompilingModuleHeader);
   }
+
+  // Notify callbacks that we just added a new header.
+  for (const auto &Cb : Callbacks)
+    Cb->moduleMapAddHeader(Header.Entry->getName());
 }
 
 void ModuleMap::excludeHeader(Module *Mod, Module::Header Header) {
@@ -853,7 +850,7 @@ void ModuleMap::setInferredModuleAllowedBy(Module *M, const FileEntry *ModMap) {
   InferredModuleAllowedBy[M] = ModMap;
 }
 
-void ModuleMap::dump() {
+LLVM_DUMP_METHOD void ModuleMap::dump() {
   llvm::errs() << "Modules:";
   for (llvm::StringMap<Module *>::iterator M = Modules.begin(), 
                                         MEnd = Modules.end(); 
@@ -920,6 +917,9 @@ Module *ModuleMap::inferModuleFromLocation(FullSourceLoc Loc) {
   if (Loc.isInvalid())
     return nullptr;
 
+  if (UmbrellaDirs.empty() && Headers.empty())
+    return nullptr;
+
   // Use the expansion location to determine which module we're in.
   FullSourceLoc ExpansionLoc = Loc.getExpansionLoc();
   if (!ExpansionLoc.isFileID())
@@ -1409,7 +1409,9 @@ void ModuleMapParser::parseModuleDecl() {
   
   // Parse the optional attribute list.
   Attributes Attrs;
-  parseOptionalAttributes(Attrs);
+  if (parseOptionalAttributes(Attrs))
+    return;
+
   
   // Parse the opening brace.
   if (!Tok.is(MMToken::LBrace)) {
@@ -1934,11 +1936,13 @@ void ModuleMapParser::parseUmbrellaDirDecl(SourceLocation UmbrellaLoc) {
     // uncommonly used Tcl module on Darwin platforms.
     std::error_code EC;
     SmallVector<Module::Header, 6> Headers;
-    for (llvm::sys::fs::recursive_directory_iterator I(Dir->getName(), EC), E;
+    vfs::FileSystem &FS = *SourceMgr.getFileManager().getVirtualFileSystem();
+    for (vfs::recursive_directory_iterator I(FS, Dir->getName(), EC), E;
          I != E && !EC; I.increment(EC)) {
-      if (const FileEntry *FE = SourceMgr.getFileManager().getFile(I->path())) {
+      if (const FileEntry *FE =
+              SourceMgr.getFileManager().getFile(I->getName())) {
 
-        Module::Header Header = {I->path(), FE};
+        Module::Header Header = {I->getName(), FE};
         Headers.push_back(std::move(Header));
       }
     }
@@ -2074,7 +2078,9 @@ void ModuleMapParser::parseConfigMacros() {
 
   // Parse the optional attributes.
   Attributes Attrs;
-  parseOptionalAttributes(Attrs);
+  if (parseOptionalAttributes(Attrs))
+    return;
+
   if (Attrs.IsExhaustive && !ActiveModule->Parent) {
     ActiveModule->ConfigMacrosExhaustive = true;
   }
@@ -2222,7 +2228,8 @@ void ModuleMapParser::parseInferredModuleDecl(bool Framework, bool Explicit) {
 
   // Parse optional attributes.
   Attributes Attrs;
-  parseOptionalAttributes(Attrs);
+  if (parseOptionalAttributes(Attrs))
+    return;
 
   if (ActiveModule) {
     // Note that we have an inferred submodule.
diff --git a/contrib/llvm/tools/clang/lib/Lex/PPCaching.cpp b/contrib/llvm/tools/clang/lib/Lex/PPCaching.cpp
index bd48ae64ab4b..4742aae5c123 100644
--- a/contrib/llvm/tools/clang/lib/Lex/PPCaching.cpp
+++ b/contrib/llvm/tools/clang/lib/Lex/PPCaching.cpp
@@ -116,3 +116,29 @@ void Preprocessor::AnnotatePreviousCachedTokens(const Token &Tok) {
     }
   }
 }
+
+bool Preprocessor::IsPreviousCachedToken(const Token &Tok) const {
+  // There's currently no cached token...
+  if (!CachedLexPos)
+    return false;
+
+  const Token LastCachedTok = CachedTokens[CachedLexPos - 1];
+  if (LastCachedTok.getKind() != Tok.getKind())
+    return false;
+
+  int RelOffset = 0;
+  if ((!getSourceManager().isInSameSLocAddrSpace(
+          Tok.getLocation(), getLastCachedTokenLocation(), &RelOffset)) ||
+      RelOffset)
+    return false;
+
+  return true;
+}
+
+void Preprocessor::ReplacePreviousCachedToken(ArrayRef<Token> NewToks) {
+  assert(CachedLexPos != 0 && "Expected to have some cached tokens");
+  CachedTokens.insert(CachedTokens.begin() + CachedLexPos - 1, NewToks.begin(),
+                      NewToks.end());
+  CachedTokens.erase(CachedTokens.begin() + CachedLexPos - 1 + NewToks.size());
+  CachedLexPos += NewToks.size() - 1;
+}
diff --git a/contrib/llvm/tools/clang/lib/Lex/PPDirectives.cpp b/contrib/llvm/tools/clang/lib/Lex/PPDirectives.cpp
index c02a0cb8d302..77f118fd3ccb 100644
--- a/contrib/llvm/tools/clang/lib/Lex/PPDirectives.cpp
+++ b/contrib/llvm/tools/clang/lib/Lex/PPDirectives.cpp
@@ -24,9 +24,14 @@
 #include "clang/Lex/ModuleLoader.h"
 #include "clang/Lex/Pragma.h"
 #include "llvm/ADT/APInt.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/iterator_range.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/Path.h"
 #include "llvm/Support/SaveAndRestore.h"
+
 using namespace clang;
 
 //===----------------------------------------------------------------------===//
@@ -136,6 +141,84 @@ static MacroDiag shouldWarnOnMacroUndef(Preprocessor &PP, IdentifierInfo *II) {
   return MD_NoWarn;
 }
 
+// Return true if we want to issue a diagnostic by default if we
+// encounter this name in a #include with the wrong case. For now,
+// this includes the standard C and C++ headers, Posix headers,
+// and Boost headers. Improper case for these #includes is a
+// potential portability issue.
+static bool warnByDefaultOnWrongCase(StringRef Include) {
+  // If the first component of the path is "boost", treat this like a standard header
+  // for the purposes of diagnostics.
+  if (::llvm::sys::path::begin(Include)->equals_lower("boost"))
+    return true;
+
+  // "condition_variable" is the longest standard header name at 18 characters.
+  // If the include file name is longer than that, it can't be a standard header.
+  static const size_t MaxStdHeaderNameLen = 18u;
+  if (Include.size() > MaxStdHeaderNameLen)
+    return false;
+
+  // Lowercase and normalize the search string.
+  SmallString<32> LowerInclude{Include};
+  for (char &Ch : LowerInclude) {
+    // In the ASCII range?
+    if (static_cast<unsigned char>(Ch) > 0x7f)
+      return false; // Can't be a standard header
+    // ASCII lowercase:
+    if (Ch >= 'A' && Ch <= 'Z')
+      Ch += 'a' - 'A';
+    // Normalize path separators for comparison purposes.
+    else if (::llvm::sys::path::is_separator(Ch))
+      Ch = '/';
+  }
+
+  // The standard C/C++ and Posix headers
+  return llvm::StringSwitch<bool>(LowerInclude)
+    // C library headers
+    .Cases("assert.h", "complex.h", "ctype.h", "errno.h", "fenv.h", true)
+    .Cases("float.h", "inttypes.h", "iso646.h", "limits.h", "locale.h", true)
+    .Cases("math.h", "setjmp.h", "signal.h", "stdalign.h", "stdarg.h", true)
+    .Cases("stdatomic.h", "stdbool.h", "stddef.h", "stdint.h", "stdio.h", true)
+    .Cases("stdlib.h", "stdnoreturn.h", "string.h", "tgmath.h", "threads.h", true)
+    .Cases("time.h", "uchar.h", "wchar.h", "wctype.h", true)
+
+    // C++ headers for C library facilities
+    .Cases("cassert", "ccomplex", "cctype", "cerrno", "cfenv", true)
+    .Cases("cfloat", "cinttypes", "ciso646", "climits", "clocale", true)
+    .Cases("cmath", "csetjmp", "csignal", "cstdalign", "cstdarg", true)
+    .Cases("cstdbool", "cstddef", "cstdint", "cstdio", "cstdlib", true)
+    .Cases("cstring", "ctgmath", "ctime", "cuchar", "cwchar", true)
+    .Case("cwctype", true)
+
+    // C++ library headers
+    .Cases("algorithm", "fstream", "list", "regex", "thread", true)
+    .Cases("array", "functional", "locale", "scoped_allocator", "tuple", true)
+    .Cases("atomic", "future", "map", "set", "type_traits", true)
+    .Cases("bitset", "initializer_list", "memory", "shared_mutex", "typeindex", true)
+    .Cases("chrono", "iomanip", "mutex", "sstream", "typeinfo", true)
+    .Cases("codecvt", "ios", "new", "stack", "unordered_map", true)
+    .Cases("complex", "iosfwd", "numeric", "stdexcept", "unordered_set", true)
+    .Cases("condition_variable", "iostream", "ostream", "streambuf", "utility", true)
+    .Cases("deque", "istream", "queue", "string", "valarray", true)
+    .Cases("exception", "iterator", "random", "strstream", "vector", true)
+    .Cases("forward_list", "limits", "ratio", "system_error", true)
+
+    // POSIX headers (which aren't also C headers)
+    .Cases("aio.h", "arpa/inet.h", "cpio.h", "dirent.h", "dlfcn.h", true)
+    .Cases("fcntl.h", "fmtmsg.h", "fnmatch.h", "ftw.h", "glob.h", true)
+    .Cases("grp.h", "iconv.h", "langinfo.h", "libgen.h", "monetary.h", true)
+    .Cases("mqueue.h", "ndbm.h", "net/if.h", "netdb.h", "netinet/in.h", true)
+    .Cases("netinet/tcp.h", "nl_types.h", "poll.h", "pthread.h", "pwd.h", true)
+    .Cases("regex.h", "sched.h", "search.h", "semaphore.h", "spawn.h", true)
+    .Cases("strings.h", "stropts.h", "sys/ipc.h", "sys/mman.h", "sys/msg.h", true)
+    .Cases("sys/resource.h", "sys/select.h",  "sys/sem.h", "sys/shm.h", "sys/socket.h", true)
+    .Cases("sys/stat.h", "sys/statvfs.h", "sys/time.h", "sys/times.h", "sys/types.h", true)
+    .Cases("sys/uio.h", "sys/un.h", "sys/utsname.h", "sys/wait.h", "syslog.h", true)
+    .Cases("tar.h", "termios.h", "trace.h", "ulimit.h", true)
+    .Cases("unistd.h", "utime.h", "utmpx.h", "wordexp.h", true)
+    .Default(false);
+}
+
 bool Preprocessor::CheckMacroName(Token &MacroNameTok, MacroUse isDefineUndef,
                                   bool *ShadowFlag) {
   // Missing macro name?
@@ -272,8 +355,6 @@ void Preprocessor::CheckEndOfDirective(const char *DirType, bool EnableMacros) {
   }
 }
 
-
-
 /// SkipExcludedConditionalBlock - We just read a \#if or related directive and
 /// decided that the subsequent tokens are in the \#if'd out portion of the
 /// file.  Lex the rest of the file, until we see an \#endif.  If
@@ -310,7 +391,7 @@ void Preprocessor::SkipExcludedConditionalBlock(SourceLocation IfTokenLoc,
       setCodeCompletionReached();
       continue;
     }
-    
+
     // If this is the end of the buffer, we have an error.
     if (Tok.is(tok::eof)) {
       // Emit errors for each unterminated conditional on the stack, including
@@ -497,7 +578,6 @@ void Preprocessor::SkipExcludedConditionalBlock(SourceLocation IfTokenLoc,
 }
 
 void Preprocessor::PTHSkipExcludedConditionalBlock() {
-
   while (1) {
     assert(CurPTHLexer);
     assert(CurPTHLexer->LexingRawMode == false);
@@ -571,28 +651,27 @@ void Preprocessor::PTHSkipExcludedConditionalBlock() {
     }
 
     // Otherwise, skip this block and go to the next one.
-    continue;
   }
 }
 
 Module *Preprocessor::getModuleForLocation(SourceLocation Loc) {
-  ModuleMap &ModMap = HeaderInfo.getModuleMap();
-  if (SourceMgr.isInMainFile(Loc)) {
-    if (Module *CurMod = getCurrentModule())
-      return CurMod;                               // Compiling a module.
-    return HeaderInfo.getModuleMap().SourceModule; // Compiling a source.
-  }
-  // Try to determine the module of the include directive.
-  // FIXME: Look into directly passing the FileEntry from LookupFile instead.
-  FileID IDOfIncl = SourceMgr.getFileID(SourceMgr.getExpansionLoc(Loc));
-  if (const FileEntry *EntryOfIncl = SourceMgr.getFileEntryForID(IDOfIncl)) {
-    // The include comes from a file.
-    return ModMap.findModuleForHeader(EntryOfIncl).getModule();
-  } else {
-    // The include does not come from a file,
-    // so it is probably a module compilation.
-    return getCurrentModule();
+  if (!SourceMgr.isInMainFile(Loc)) {
+    // Try to determine the module of the include directive.
+    // FIXME: Look into directly passing the FileEntry from LookupFile instead.
+    FileID IDOfIncl = SourceMgr.getFileID(SourceMgr.getExpansionLoc(Loc));
+    if (const FileEntry *EntryOfIncl = SourceMgr.getFileEntryForID(IDOfIncl)) {
+      // The include comes from an included file.
+      return HeaderInfo.getModuleMap()
+          .findModuleForHeader(EntryOfIncl)
+          .getModule();
+    }
   }
+
+  // This is either in the main file or not in a file at all. It belongs
+  // to the current module, if there is one.
+  return getLangOpts().CurrentModule.empty()
+             ? nullptr
+             : HeaderInfo.lookupModule(getLangOpts().CurrentModule);
 }
 
 Module *Preprocessor::getModuleContainingLocation(SourceLocation Loc) {
@@ -600,6 +679,62 @@ Module *Preprocessor::getModuleContainingLocation(SourceLocation Loc) {
       FullSourceLoc(Loc, SourceMgr));
 }
 
+const FileEntry *
+Preprocessor::getModuleHeaderToIncludeForDiagnostics(SourceLocation IncLoc,
+                                                     SourceLocation Loc) {
+  // If we have a module import syntax, we shouldn't include a header to
+  // make a particular module visible.
+  if (getLangOpts().ObjC2)
+    return nullptr;
+
+  // Figure out which module we'd want to import.
+  Module *M = getModuleContainingLocation(Loc);
+  if (!M)
+    return nullptr;
+
+  Module *TopM = M->getTopLevelModule();
+  Module *IncM = getModuleForLocation(IncLoc);
+
+  // Walk up through the include stack, looking through textual headers of M
+  // until we hit a non-textual header that we can #include. (We assume textual
+  // headers of a module with non-textual headers aren't meant to be used to
+  // import entities from the module.)
+  auto &SM = getSourceManager();
+  while (!Loc.isInvalid() && !SM.isInMainFile(Loc)) {
+    auto ID = SM.getFileID(SM.getExpansionLoc(Loc));
+    auto *FE = SM.getFileEntryForID(ID);
+
+    bool InTextualHeader = false;
+    for (auto Header : HeaderInfo.getModuleMap().findAllModulesForHeader(FE)) {
+      if (!Header.getModule()->isSubModuleOf(TopM))
+        continue;
+
+      if (!(Header.getRole() & ModuleMap::TextualHeader)) {
+        // If this is an accessible, non-textual header of M's top-level module
+        // that transitively includes the given location and makes the
+        // corresponding module visible, this is the thing to #include.
+        if (Header.isAccessibleFrom(IncM))
+          return FE;
+
+        // It's in a private header; we can't #include it.
+        // FIXME: If there's a public header in some module that re-exports it,
+        // then we could suggest including that, but it's not clear that's the
+        // expected way to make this entity visible.
+        continue;
+      }
+
+      InTextualHeader = true;
+    }
+
+    if (!InTextualHeader)
+      break;
+
+    Loc = SM.getIncludeLoc(ID);
+  }
+
+  return nullptr;
+}
+
 const FileEntry *Preprocessor::LookupFile(
     SourceLocation FilenameLoc,
     StringRef Filename,
@@ -611,12 +746,14 @@ const FileEntry *Preprocessor::LookupFile(
     SmallVectorImpl<char> *RelativePath,
     ModuleMap::KnownHeader *SuggestedModule,
     bool SkipCache) {
-  Module *RequestingModule = getModuleForLocation(FilenameLoc); 
+  Module *RequestingModule = getModuleForLocation(FilenameLoc);
+  bool RequestingModuleIsModuleInterface = !SourceMgr.isInMainFile(FilenameLoc);
 
   // If the header lookup mechanism may be relative to the current inclusion
   // stack, record the parent #includes.
   SmallVector<std::pair<const FileEntry *, const DirectoryEntry *>, 16>
       Includers;
+  bool BuildSystemModule = false;
   if (!FromDir && !FromFile) {
     FileID FID = getCurrentFileLexer()->getFileID();
     const FileEntry *FileEnt = SourceMgr.getFileEntryForID(FID);
@@ -634,9 +771,10 @@ const FileEntry *Preprocessor::LookupFile(
     // come from header declarations in the module map) relative to the module
     // map file.
     if (!FileEnt) {
-      if (FID == SourceMgr.getMainFileID() && MainFileDir)
+      if (FID == SourceMgr.getMainFileID() && MainFileDir) {
         Includers.push_back(std::make_pair(nullptr, MainFileDir));
-      else if ((FileEnt =
+        BuildSystemModule = getCurrentModule()->IsSystem;
+      } else if ((FileEnt =
                     SourceMgr.getFileEntryForID(SourceMgr.getMainFileID())))
         Includers.push_back(std::make_pair(FileEnt, FileMgr.getDirectory(".")));
     } else {
@@ -682,11 +820,13 @@ const FileEntry *Preprocessor::LookupFile(
   // Do a standard file entry lookup.
   const FileEntry *FE = HeaderInfo.LookupFile(
       Filename, FilenameLoc, isAngled, FromDir, CurDir, Includers, SearchPath,
-      RelativePath, RequestingModule, SuggestedModule, SkipCache);
+      RelativePath, RequestingModule, SuggestedModule, SkipCache,
+      BuildSystemModule);
   if (FE) {
     if (SuggestedModule && !LangOpts.AsmPreprocessor)
       HeaderInfo.getModuleMap().diagnoseHeaderInclusion(
-          RequestingModule, FilenameLoc, Filename, FE);
+          RequestingModule, RequestingModuleIsModuleInterface, FilenameLoc,
+          Filename, FE);
     return FE;
   }
 
@@ -702,7 +842,8 @@ const FileEntry *Preprocessor::LookupFile(
                                                     SuggestedModule))) {
         if (SuggestedModule && !LangOpts.AsmPreprocessor)
           HeaderInfo.getModuleMap().diagnoseHeaderInclusion(
-              RequestingModule, FilenameLoc, Filename, FE);
+              RequestingModule, RequestingModuleIsModuleInterface, FilenameLoc,
+              Filename, FE);
         return FE;
       }
     }
@@ -717,7 +858,8 @@ const FileEntry *Preprocessor::LookupFile(
                 RequestingModule, SuggestedModule))) {
           if (SuggestedModule && !LangOpts.AsmPreprocessor)
             HeaderInfo.getModuleMap().diagnoseHeaderInclusion(
-                RequestingModule, FilenameLoc, Filename, FE);
+                RequestingModule, RequestingModuleIsModuleInterface,
+                FilenameLoc, Filename, FE);
           return FE;
         }
       }
@@ -728,7 +870,6 @@ const FileEntry *Preprocessor::LookupFile(
   return nullptr;
 }
 
-
 //===----------------------------------------------------------------------===//
 // Preprocessor Directive Handling.
 //===----------------------------------------------------------------------===//
@@ -740,9 +881,11 @@ public:
     if (pp->MacroExpansionInDirectivesOverride)
       pp->DisableMacroExpansion = false;
   }
+
   ~ResetMacroExpansionHelper() {
     PP->DisableMacroExpansion = save;
   }
+
 private:
   Preprocessor *PP;
   bool save;
@@ -849,7 +992,7 @@ void Preprocessor::HandleDirective(Token &Result) {
       return HandleIncludeDirective(SavedHash.getLocation(), Result);
     case tok::pp___include_macros:
       // Handle -imacros.
-      return HandleIncludeMacrosDirective(SavedHash.getLocation(), Result); 
+      return HandleIncludeMacrosDirective(SavedHash.getLocation(), Result);
 
     // C99 6.10.3 - Macro Replacement.
     case tok::pp_define:
@@ -888,12 +1031,12 @@ void Preprocessor::HandleDirective(Token &Result) {
     case tok::pp_unassert:
       //isExtension = true;  // FIXME: implement #unassert
       break;
-        
+
     case tok::pp___public_macro:
       if (getLangOpts().Modules)
         return HandleMacroPublicDirective(Result);
       break;
-        
+
     case tok::pp___private_macro:
       if (getLangOpts().Modules)
         return HandleMacroPrivateDirective(Result);
@@ -907,20 +1050,20 @@ void Preprocessor::HandleDirective(Token &Result) {
   // various pseudo-ops.  Just return the # token and push back the following
   // token to be lexed next time.
   if (getLangOpts().AsmPreprocessor) {
-    Token *Toks = new Token[2];
+    auto Toks = llvm::make_unique<Token[]>(2);
     // Return the # and the token after it.
     Toks[0] = SavedHash;
     Toks[1] = Result;
-    
+
     // If the second token is a hashhash token, then we need to translate it to
     // unknown so the token lexer doesn't try to perform token pasting.
     if (Result.is(tok::hashhash))
       Toks[1].setKind(tok::unknown);
-    
+
     // Enter this token stream so that we re-lex the tokens.  Make sure to
     // enable macro expansion, in case the token after the # is an identifier
     // that is expanded.
-    EnterTokenStream(Toks, 2, false, true);
+    EnterTokenStream(std::move(Toks), 2, false);
     return;
   }
 
@@ -953,7 +1096,7 @@ static bool GetLineValue(Token &DigitTok, unsigned &Val,
   unsigned ActualLength = PP.getSpelling(DigitTok, DigitTokBegin, &Invalid);
   if (Invalid)
     return true;
-  
+
   // Verify that we have a simple digit-sequence, and compute the value.  This
   // is always a simple digit string computed in decimal, so we do this manually
   // here.
@@ -1004,7 +1147,7 @@ void Preprocessor::HandleLineDirective(Token &Tok) {
   unsigned LineNo;
   if (GetLineValue(DigitTok, LineNo, diag::err_pp_line_requires_integer,*this))
     return;
-  
+
   if (LineNo == 0)
     Diag(DigitTok, diag::ext_pp_line_zero);
 
@@ -1087,7 +1230,7 @@ static bool ReadLineMarkerFlags(bool &IsFileEntry, bool &IsFileExit,
     PresumedLoc PLoc = SM.getPresumedLoc(FlagTok.getLocation());
     if (PLoc.isInvalid())
       return true;
-    
+
     // If there is no include loc (main file) or if the include loc is in a
     // different physical file, then we aren't in a "1" line marker flag region.
     SourceLocation IncLoc = PLoc.getIncludeLoc();
@@ -1210,7 +1353,6 @@ void Preprocessor::HandleDigitDirective(Token &DigitTok) {
   }
 }
 
-
 /// HandleUserDiagnosticDirective - Handle a #warning or #error directive.
 ///
 void Preprocessor::HandleUserDiagnosticDirective(Token &Tok,
@@ -1229,7 +1371,7 @@ void Preprocessor::HandleUserDiagnosticDirective(Token &Tok,
 
   // Find the first non-whitespace character, so that we can make the
   // diagnostic more succinct.
-  StringRef Msg = StringRef(Message).ltrim(" ");
+  StringRef Msg = StringRef(Message).ltrim(' ');
 
   if (isWarning)
     Diag(Tok, diag::pp_hash_warning) << Msg;
@@ -1276,7 +1418,7 @@ void Preprocessor::HandleIdentSCCSDirective(Token &Tok) {
 void Preprocessor::HandleMacroPublicDirective(Token &Tok) {
   Token MacroNameTok;
   ReadMacroName(MacroNameTok, MU_Undef);
-  
+
   // Error reading macro name?  If so, diagnostic already issued.
   if (MacroNameTok.is(tok::eod))
     return;
@@ -1287,13 +1429,13 @@ void Preprocessor::HandleMacroPublicDirective(Token &Tok) {
   IdentifierInfo *II = MacroNameTok.getIdentifierInfo();
   // Okay, we finally have a valid identifier to undef.
   MacroDirective *MD = getLocalMacroDirective(II);
-  
+
   // If the macro is not defined, this is an error.
   if (!MD) {
     Diag(MacroNameTok, diag::err_pp_visibility_non_macro) << II;
     return;
   }
-  
+
   // Note that this macro has now been exported.
   appendMacroDirective(II, AllocateVisibilityMacroDirective(
                                 MacroNameTok.getLocation(), /*IsPublic=*/true));
@@ -1303,24 +1445,24 @@ void Preprocessor::HandleMacroPublicDirective(Token &Tok) {
 void Preprocessor::HandleMacroPrivateDirective(Token &Tok) {
   Token MacroNameTok;
   ReadMacroName(MacroNameTok, MU_Undef);
-  
+
   // Error reading macro name?  If so, diagnostic already issued.
   if (MacroNameTok.is(tok::eod))
     return;
-  
+
   // Check to see if this is the last token on the #__private_macro line.
   CheckEndOfDirective("__private_macro");
-  
+
   IdentifierInfo *II = MacroNameTok.getIdentifierInfo();
   // Okay, we finally have a valid identifier to undef.
   MacroDirective *MD = getLocalMacroDirective(II);
-  
+
   // If the macro is not defined, this is an error.
   if (!MD) {
     Diag(MacroNameTok, diag::err_pp_visibility_non_macro) << II;
     return;
   }
-  
+
   // Note that this macro has now been marked private.
   appendMacroDirective(II, AllocateVisibilityMacroDirective(
                                MacroNameTok.getLocation(), /*IsPublic=*/false));
@@ -1395,7 +1537,7 @@ bool Preprocessor::ConcatenateIncludeName(SmallString<128> &FilenameBuffer,
   Lex(CurTok);
   while (CurTok.isNot(tok::eod)) {
     End = CurTok.getLocation();
-    
+
     // FIXME: Provide code completion for #includes.
     if (CurTok.is(tok::code_completion)) {
       setCodeCompletionReached();
@@ -1442,13 +1584,13 @@ static void EnterAnnotationToken(Preprocessor &PP,
                                  tok::TokenKind Kind, void *AnnotationVal) {
   // FIXME: Produce this as the current token directly, rather than
   // allocating a new token for it.
-  Token *Tok = new Token[1];
+  auto Tok = llvm::make_unique<Token[]>(1);
   Tok[0].startToken();
   Tok[0].setKind(Kind);
   Tok[0].setLocation(Begin);
   Tok[0].setAnnotationEndLoc(End);
   Tok[0].setAnnotationValue(AnnotationVal);
-  PP.EnterTokenStream(Tok, 1, true, true);
+  PP.EnterTokenStream(std::move(Tok), 1, true);
 }
 
 /// \brief Produce a diagnostic informing the user that a #include or similar
@@ -1466,24 +1608,24 @@ static void diagnoseAutoModuleImport(
     PathString += Path[I].first->getName();
   }
   int IncludeKind = 0;
-  
+
   switch (IncludeTok.getIdentifierInfo()->getPPKeywordID()) {
   case tok::pp_include:
     IncludeKind = 0;
     break;
-    
+
   case tok::pp_import:
     IncludeKind = 1;
-    break;        
-      
+    break;
+
   case tok::pp_include_next:
     IncludeKind = 2;
     break;
-      
+
   case tok::pp___include_macros:
     IncludeKind = 3;
     break;
-      
+
   default:
     llvm_unreachable("unknown include directive kind");
   }
@@ -1496,17 +1638,49 @@ static void diagnoseAutoModuleImport(
                                       ("@import " + PathString + ";").str());
 }
 
+// Given a vector of path components and a string containing the real
+// path to the file, build a properly-cased replacement in the vector,
+// and return true if the replacement should be suggested.
+static bool trySimplifyPath(SmallVectorImpl<StringRef> &Components,
+                            StringRef RealPathName) {
+  auto RealPathComponentIter = llvm::sys::path::rbegin(RealPathName);
+  auto RealPathComponentEnd = llvm::sys::path::rend(RealPathName);
+  int Cnt = 0;
+  bool SuggestReplacement = false;
+  // Below is a best-effort to handle ".." in paths. It is admittedly
+  // not 100% correct in the presence of symlinks.
+  for (auto &Component : llvm::reverse(Components)) {
+    if ("." == Component) {
+    } else if (".." == Component) {
+      ++Cnt;
+    } else if (Cnt) {
+      --Cnt;
+    } else if (RealPathComponentIter != RealPathComponentEnd) {
+      if (Component != *RealPathComponentIter) {
+        // If these path components differ by more than just case, then we
+        // may be looking at symlinked paths. Bail on this diagnostic to avoid
+        // noisy false positives.
+        SuggestReplacement = RealPathComponentIter->equals_lower(Component);
+        if (!SuggestReplacement)
+          break;
+        Component = *RealPathComponentIter;
+      }
+      ++RealPathComponentIter;
+    }
+  }
+  return SuggestReplacement;
+}
+
 /// HandleIncludeDirective - The "\#include" tokens have just been read, read
 /// the file to be included from the lexer, then include it!  This is a common
 /// routine with functionality shared between \#include, \#include_next and
 /// \#import.  LookupFrom is set when this is a \#include_next directive, it
 /// specifies the file to start searching from.
-void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc, 
+void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc,
                                           Token &IncludeTok,
                                           const DirectoryLookup *LookupFrom,
                                           const FileEntry *LookupFromFile,
                                           bool isImport) {
-
   Token FilenameTok;
   CurPPLexer->LexIncludeFilename(FilenameTok);
 
@@ -1515,7 +1689,7 @@ void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc,
   StringRef Filename;
   SourceLocation End;
   SourceLocation CharEnd; // the end of this directive, in characters
-  
+
   switch (FilenameTok.getKind()) {
   case tok::eod:
     // If the token kind is EOD, the error has already been diagnosed.
@@ -1586,8 +1760,8 @@ void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc,
   }
 
   if (HeaderInfo.HasIncludeAliasMap()) {
-    // Map the filename with the brackets still attached.  If the name doesn't 
-    // map to anything, fall back on the filename we've already gotten the 
+    // Map the filename with the brackets still attached.  If the name doesn't
+    // map to anything, fall back on the filename we've already gotten the
     // spelling for.
     StringRef NewName = HeaderInfo.MapHeaderToIncludeAlias(OriginalFilename);
     if (!NewName.empty())
@@ -1624,7 +1798,7 @@ void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc,
           // Add the recovery path to the list of search paths.
           DirectoryLookup DL(DE, SrcMgr::C_User, false);
           HeaderInfo.AddSearchPath(DL, isAngled);
-          
+
           // Try the lookup again, skipping the cache.
           File = LookupFile(
               FilenameLoc,
@@ -1636,7 +1810,7 @@ void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc,
     }
 
     if (!SuppressIncludeNotFoundError) {
-      // If the file could not be located and it was included via angle 
+      // If the file could not be located and it was included via angle
       // brackets, we can attempt a lookup as though it were a quoted path to
       // provide the user with a possible fixit.
       if (isAngled) {
@@ -1649,8 +1823,8 @@ void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc,
             &SuggestedModule);
         if (File) {
           SourceRange Range(FilenameTok.getLocation(), CharEnd);
-          Diag(FilenameTok, diag::err_pp_file_not_found_not_fatal) << 
-            Filename << 
+          Diag(FilenameTok, diag::err_pp_file_not_found_not_fatal) <<
+            Filename <<
             FixItHint::CreateReplacement(Range, "\"" + Filename.str() + "\"");
         }
       }
@@ -1672,13 +1846,15 @@ void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc,
   // are processing this module textually (because we're building the module).
   if (File && SuggestedModule && getLangOpts().Modules &&
       SuggestedModule.getModule()->getTopLevelModuleName() !=
-          getLangOpts().CurrentModule &&
-      SuggestedModule.getModule()->getTopLevelModuleName() !=
-          getLangOpts().ImplementationOfModule) {
-
+          getLangOpts().CurrentModule) {
     // If this include corresponds to a module but that module is
     // unavailable, diagnose the situation and bail out.
-    if (!SuggestedModule.getModule()->isAvailable()) {
+    // FIXME: Remove this; loadModule does the same check (but produces
+    // slightly worse diagnostics).
+    if (!SuggestedModule.getModule()->isAvailable() &&
+        !SuggestedModule.getModule()
+             ->getTopLevelModule()
+             ->HasIncompatibleModuleFile) {
       clang::Module::Requirement Requirement;
       clang::Module::UnresolvedHeaderDirective MissingHeader;
       Module *M = SuggestedModule.getModule();
@@ -1711,7 +1887,7 @@ void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc,
     // We only do this in Objective-C, where we have a module-import syntax.
     if (getLangOpts().ObjC2)
       diagnoseAutoModuleImport(*this, HashLoc, IncludeTok, Path, CharEnd);
-    
+
     // Load the module to import its macros. We'll make the declarations
     // visible when the parser gets here.
     // FIXME: Pass SuggestedModule in here rather than converting it to a path
@@ -1759,7 +1935,7 @@ void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc,
 
   if (!File)
     return;
-  
+
   // The #included file will be considered to be a system header if either it is
   // in a system include directory, or if the #includer is a system include
   // header.
@@ -1770,6 +1946,39 @@ void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc,
   // FIXME: If we have a suggested module, and we've already visited this file,
   // don't bother entering it again. We know it has no further effect.
 
+  // Issue a diagnostic if the name of the file on disk has a different case
+  // than the one we're about to open.
+  const bool CheckIncludePathPortability =
+    File && !File->tryGetRealPathName().empty();
+
+  if (CheckIncludePathPortability) {
+    StringRef Name = LangOpts.MSVCCompat ? NormalizedPath.str() : Filename;
+    StringRef RealPathName = File->tryGetRealPathName();
+    SmallVector<StringRef, 16> Components(llvm::sys::path::begin(Name),
+                                          llvm::sys::path::end(Name));
+
+    if (trySimplifyPath(Components, RealPathName)) {
+      SmallString<128> Path;
+      Path.reserve(Name.size()+2);
+      Path.push_back(isAngled ? '<' : '"');
+      for (auto Component : Components) {
+        Path.append(Component);
+        // Append the separator the user used, or the close quote
+        Path.push_back(
+          Path.size() <= Filename.size() ? Filename[Path.size()-1] :
+            (isAngled ? '>' : '"'));
+      }
+      auto Replacement = Path.str().str();
+      // For user files and known standard headers, by default we issue a diagnostic.
+      // For other system headers, we don't. They can be controlled separately.
+      auto DiagId = (FileCharacter == SrcMgr::C_User || warnByDefaultOnWrongCase(Name)) ?
+          diag::pp_nonportable_path : diag::pp_nonportable_system_path;
+      SourceRange Range(FilenameTok.getLocation(), CharEnd);
+      Diag(FilenameTok, DiagId) << Replacement <<
+        FixItHint::CreateReplacement(Range, Replacement);
+    }
+  }
+
   // Ask HeaderInfo if we should enter this #include file.  If not, #including
   // this file will have no effect.
   if (ShouldEnter &&
@@ -1863,7 +2072,7 @@ void Preprocessor::HandleMicrosoftImportDirective(Token &Tok) {
   // so we can continue processing from there.
   Diag(Tok, diag::err_pp_import_directive_ms );
 
-  // Read tokens until we get to the end of the directive.  Note that the 
+  // Read tokens until we get to the end of the directive.  Note that the
   // directive can be split over multiple lines using the backslash character.
   DiscardUntilEndOfDirective();
 }
@@ -1930,7 +2139,7 @@ bool Preprocessor::ReadMacroDefinitionArgList(MacroInfo *MI, Token &Tok) {
       return true;
     case tok::ellipsis:  // #define X(... -> C99 varargs
       if (!LangOpts.C99)
-        Diag(Tok, LangOpts.CPlusPlus11 ? 
+        Diag(Tok, LangOpts.CPlusPlus11 ?
              diag::warn_cxx98_compat_variadic_macro :
              diag::ext_variadic_macro);
 
@@ -2147,7 +2356,6 @@ void Preprocessor::HandleDefineDirective(Token &DefineTok,
       // Get the next token of the macro.
       LexUnexpandedToken(Tok);
     }
-
   } else {
     // Otherwise, read the body of a function-like macro.  While we are at it,
     // check C99 6.10.3.2p1: ensure that # operators are followed by macro
@@ -2155,7 +2363,7 @@ void Preprocessor::HandleDefineDirective(Token &DefineTok,
     while (Tok.isNot(tok::eod)) {
       LastTok = Tok;
 
-      if (Tok.isNot(tok::hash) && Tok.isNot(tok::hashhash)) {
+      if (!Tok.isOneOf(tok::hash, tok::hashat, tok::hashhash)) {
         MI->AddTokenToBody(Tok);
 
         // Get the next token of the macro.
@@ -2176,11 +2384,10 @@ void Preprocessor::HandleDefineDirective(Token &DefineTok,
       }
 
       if (Tok.is(tok::hashhash)) {
-        
         // If we see token pasting, check if it looks like the gcc comma
         // pasting extension.  We'll use this information to suppress
         // diagnostics later on.
-        
+
         // Get the next token of the macro.
         LexUnexpandedToken(Tok);
 
@@ -2215,7 +2422,8 @@ void Preprocessor::HandleDefineDirective(Token &DefineTok,
           MI->AddTokenToBody(LastTok);
           continue;
         } else {
-          Diag(Tok, diag::err_pp_stringize_not_parameter);
+          Diag(Tok, diag::err_pp_stringize_not_parameter)
+            << LastTok.is(tok::hashat);
 
           // Disable __VA_ARGS__ again.
           Ident__VA_ARGS__->setIsPoisoned(true);
@@ -2292,7 +2500,7 @@ void Preprocessor::HandleDefineDirective(Token &DefineTok,
       if (!OtherMI->isUsed() && OtherMI->isWarnIfUnused())
         Diag(OtherMI->getDefinitionLoc(), diag::pp_macro_not_used);
 
-      // Warn if defining "__LINE__" and other builtins, per C99 6.10.8/4 and 
+      // Warn if defining "__LINE__" and other builtins, per C99 6.10.8/4 and
       // C++ [cpp.predefined]p4, but allow it as an extension.
       if (OtherMI->isBuiltinMacro())
         Diag(MacroNameTok, diag::ext_pp_redef_builtin_macro);
@@ -2365,7 +2573,6 @@ void Preprocessor::HandleUndefDirective(Token &UndefTok) {
                        AllocateUndefMacroDirective(MacroNameTok.getLocation()));
 }
 
-
 //===----------------------------------------------------------------------===//
 // Preprocessor Conditional Directive Handling.
 //===----------------------------------------------------------------------===//
@@ -2554,7 +2761,7 @@ void Preprocessor::HandleElifDirective(Token &ElifToken) {
 
   // If this is a #elif with a #else before it, report the error.
   if (CI.FoundElse) Diag(ElifToken, diag::pp_err_elif_after_else);
-  
+
   if (Callbacks)
     Callbacks->Elif(ElifToken.getLocation(),
                     SourceRange(ConditionalBegin, ConditionalEnd),
diff --git a/contrib/llvm/tools/clang/lib/Lex/PPExpressions.cpp b/contrib/llvm/tools/clang/lib/Lex/PPExpressions.cpp
index c40598c06756..94075ece35ca 100644
--- a/contrib/llvm/tools/clang/lib/Lex/PPExpressions.cpp
+++ b/contrib/llvm/tools/clang/lib/Lex/PPExpressions.cpp
@@ -33,12 +33,18 @@ namespace {
 /// conditional and the source range covered by it.
 class PPValue {
   SourceRange Range;
+  IdentifierInfo *II;
 public:
   llvm::APSInt Val;
 
   // Default ctor - Construct an 'invalid' PPValue.
   PPValue(unsigned BitWidth) : Val(BitWidth) {}
 
+  // If this value was produced by directly evaluating an identifier, produce
+  // that identifier.
+  IdentifierInfo *getIdentifier() const { return II; }
+  void setIdentifier(IdentifierInfo *II) { this->II = II; }
+
   unsigned getBitWidth() const { return Val.getBitWidth(); }
   bool isUnsigned() const { return Val.isUnsigned(); }
 
@@ -140,6 +146,51 @@ static bool EvaluateDefined(PPValue &Result, Token &PeekTok, DefinedTracker &DT,
     PP.LexNonComment(PeekTok);
   }
 
+  // [cpp.cond]p4:
+  //   Prior to evaluation, macro invocations in the list of preprocessing
+  //   tokens that will become the controlling constant expression are replaced
+  //   (except for those macro names modified by the 'defined' unary operator),
+  //   just as in normal text. If the token 'defined' is generated as a result
+  //   of this replacement process or use of the 'defined' unary operator does
+  //   not match one of the two specified forms prior to macro replacement, the
+  //   behavior is undefined.
+  // This isn't an idle threat, consider this program:
+  //   #define FOO
+  //   #define BAR defined(FOO)
+  //   #if BAR
+  //   ...
+  //   #else
+  //   ...
+  //   #endif
+  // clang and gcc will pick the #if branch while Visual Studio will take the
+  // #else branch.  Emit a warning about this undefined behavior.
+  if (beginLoc.isMacroID()) {
+    bool IsFunctionTypeMacro =
+        PP.getSourceManager()
+            .getSLocEntry(PP.getSourceManager().getFileID(beginLoc))
+            .getExpansion()
+            .isFunctionMacroExpansion();
+    // For object-type macros, it's easy to replace
+    //   #define FOO defined(BAR)
+    // with
+    //   #if defined(BAR)
+    //   #define FOO 1
+    //   #else
+    //   #define FOO 0
+    //   #endif
+    // and doing so makes sense since compilers handle this differently in
+    // practice (see example further up).  But for function-type macros,
+    // there is no good way to write
+    //   # define FOO(x) (defined(M_ ## x) && M_ ## x)
+    // in a different way, and compilers seem to agree on how to behave here.
+    // So warn by default on object-type macros, but only warn in -pedantic
+    // mode on function-type macros.
+    if (IsFunctionTypeMacro)
+      PP.Diag(beginLoc, diag::warn_defined_in_function_type_macro);
+    else
+      PP.Diag(beginLoc, diag::warn_defined_in_object_type_macro);
+  }
+
   // Invoke the 'defined' callback.
   if (PPCallbacks *Callbacks = PP.getPPCallbacks()) {
     Callbacks->Defined(macroToken, Macro,
@@ -164,6 +215,8 @@ static bool EvaluateValue(PPValue &Result, Token &PeekTok, DefinedTracker &DT,
                           bool ValueLive, Preprocessor &PP) {
   DT.State = DefinedTracker::Unknown;
 
+  Result.setIdentifier(nullptr);
+
   if (PeekTok.is(tok::code_completion)) {
     if (PP.getCodeCompletionHandler())
       PP.getCodeCompletionHandler()->CodeCompletePreprocessorExpression();
@@ -177,8 +230,8 @@ static bool EvaluateValue(PPValue &Result, Token &PeekTok, DefinedTracker &DT,
   if (IdentifierInfo *II = PeekTok.getIdentifierInfo()) {
     // Handle "defined X" and "defined(X)".
     if (II->isStr("defined"))
-      return(EvaluateDefined(Result, PeekTok, DT, ValueLive, PP));
-    
+      return EvaluateDefined(Result, PeekTok, DT, ValueLive, PP);
+
     // If this identifier isn't 'defined' or one of the special
     // preprocessor keywords and it wasn't macro expanded, it turns
     // into a simple 0, unless it is the C++ keyword "true", in which case it
@@ -189,6 +242,7 @@ static bool EvaluateValue(PPValue &Result, Token &PeekTok, DefinedTracker &DT,
       PP.Diag(PeekTok, diag::warn_pp_undef_identifier) << II;
     Result.Val = II->getTokenID() == tok::kw_true;
     Result.Val.setIsUnsigned(false);  // "0" is signed intmax_t 0.
+    Result.setIdentifier(II);
     Result.setRange(PeekTok.getLocation());
     PP.LexNonComment(PeekTok);
     return false;
@@ -347,6 +401,7 @@ static bool EvaluateValue(PPValue &Result, Token &PeekTok, DefinedTracker &DT,
       DT.State = DefinedTracker::Unknown;
     }
     Result.setRange(Start, PeekTok.getLocation());
+    Result.setIdentifier(nullptr);
     PP.LexNonComment(PeekTok);  // Eat the ).
     return false;
   }
@@ -356,6 +411,7 @@ static bool EvaluateValue(PPValue &Result, Token &PeekTok, DefinedTracker &DT,
     PP.LexNonComment(PeekTok);
     if (EvaluateValue(Result, PeekTok, DT, ValueLive, PP)) return true;
     Result.setBegin(Start);
+    Result.setIdentifier(nullptr);
     return false;
   }
   case tok::minus: {
@@ -363,6 +419,7 @@ static bool EvaluateValue(PPValue &Result, Token &PeekTok, DefinedTracker &DT,
     PP.LexNonComment(PeekTok);
     if (EvaluateValue(Result, PeekTok, DT, ValueLive, PP)) return true;
     Result.setBegin(Loc);
+    Result.setIdentifier(nullptr);
 
     // C99 6.5.3.3p3: The sign of the result matches the sign of the operand.
     Result.Val = -Result.Val;
@@ -383,6 +440,7 @@ static bool EvaluateValue(PPValue &Result, Token &PeekTok, DefinedTracker &DT,
     PP.LexNonComment(PeekTok);
     if (EvaluateValue(Result, PeekTok, DT, ValueLive, PP)) return true;
     Result.setBegin(Start);
+    Result.setIdentifier(nullptr);
 
     // C99 6.5.3.3p4: The sign of the result matches the sign of the operand.
     Result.Val = ~Result.Val;
@@ -398,6 +456,7 @@ static bool EvaluateValue(PPValue &Result, Token &PeekTok, DefinedTracker &DT,
     Result.Val = !Result.Val;
     // C99 6.5.3.3p5: The sign of the result is 'int', aka it is signed.
     Result.Val.setIsUnsigned(false);
+    Result.setIdentifier(nullptr);
 
     if (DT.State == DefinedTracker::DefinedMacro)
       DT.State = DefinedTracker::NotDefinedMacro;
@@ -446,6 +505,15 @@ static unsigned getPrecedence(tok::TokenKind Kind) {
   }
 }
 
+static void diagnoseUnexpectedOperator(Preprocessor &PP, PPValue &LHS,
+                                       Token &Tok) {
+  if (Tok.is(tok::l_paren) && LHS.getIdentifier())
+    PP.Diag(LHS.getRange().getBegin(), diag::err_pp_expr_bad_token_lparen)
+        << LHS.getIdentifier();
+  else
+    PP.Diag(Tok.getLocation(), diag::err_pp_expr_bad_token_binop)
+        << LHS.getRange();
+}
 
 /// EvaluateDirectiveSubExpr - Evaluate the subexpression whose first token is
 /// PeekTok, and whose precedence is PeekPrec.  This returns the result in LHS.
@@ -459,8 +527,7 @@ static bool EvaluateDirectiveSubExpr(PPValue &LHS, unsigned MinPrec,
   unsigned PeekPrec = getPrecedence(PeekTok.getKind());
   // If this token isn't valid, report the error.
   if (PeekPrec == ~0U) {
-    PP.Diag(PeekTok.getLocation(), diag::err_pp_expr_bad_token_binop)
-      << LHS.getRange();
+    diagnoseUnexpectedOperator(PP, LHS, PeekTok);
     return true;
   }
 
@@ -503,8 +570,7 @@ static bool EvaluateDirectiveSubExpr(PPValue &LHS, unsigned MinPrec,
 
     // If this token isn't valid, report the error.
     if (PeekPrec == ~0U) {
-      PP.Diag(PeekTok.getLocation(), diag::err_pp_expr_bad_token_binop)
-        << RHS.getRange();
+      diagnoseUnexpectedOperator(PP, RHS, PeekTok);
       return true;
     }
 
@@ -605,8 +671,10 @@ static bool EvaluateDirectiveSubExpr(PPValue &LHS, unsigned MinPrec,
     case tok::greatergreater: {
       // Determine whether overflow is about to happen.
       unsigned ShAmt = static_cast<unsigned>(RHS.Val.getLimitedValue());
-      if (ShAmt >= LHS.getBitWidth())
-        Overflow = true, ShAmt = LHS.getBitWidth()-1;
+      if (ShAmt >= LHS.getBitWidth()) {
+        Overflow = true;
+        ShAmt = LHS.getBitWidth()-1;
+      }
       Res = LHS.Val >> ShAmt;
       break;
     }
@@ -722,6 +790,7 @@ static bool EvaluateDirectiveSubExpr(PPValue &LHS, unsigned MinPrec,
     // Put the result back into 'LHS' for our next iteration.
     LHS.Val = Res;
     LHS.setEnd(RHS.getRange().getEnd());
+    RHS.setIdentifier(nullptr);
   }
 }
 
diff --git a/contrib/llvm/tools/clang/lib/Lex/PPLexerChange.cpp b/contrib/llvm/tools/clang/lib/Lex/PPLexerChange.cpp
index 2f09841c5b5d..e2eceafd983b 100644
--- a/contrib/llvm/tools/clang/lib/Lex/PPLexerChange.cpp
+++ b/contrib/llvm/tools/clang/lib/Lex/PPLexerChange.cpp
@@ -622,8 +622,8 @@ void Preprocessor::HandleMicrosoftCommentPaste(Token &Tok) {
 void Preprocessor::EnterSubmodule(Module *M, SourceLocation ImportLoc) {
   if (!getLangOpts().ModulesLocalVisibility) {
     // Just track that we entered this submodule.
-    BuildingSubmoduleStack.push_back(
-        BuildingSubmoduleInfo(M, ImportLoc, CurSubmoduleState));
+    BuildingSubmoduleStack.push_back(BuildingSubmoduleInfo(
+        M, ImportLoc, CurSubmoduleState, PendingModuleMacroNames.size()));
     return;
   }
 
@@ -664,8 +664,8 @@ void Preprocessor::EnterSubmodule(Module *M, SourceLocation ImportLoc) {
   }
 
   // Track that we entered this module.
-  BuildingSubmoduleStack.push_back(
-      BuildingSubmoduleInfo(M, ImportLoc, CurSubmoduleState));
+  BuildingSubmoduleStack.push_back(BuildingSubmoduleInfo(
+      M, ImportLoc, CurSubmoduleState, PendingModuleMacroNames.size()));
 
   // Switch to this submodule as the current submodule.
   CurSubmoduleState = &State;
@@ -675,42 +675,79 @@ void Preprocessor::EnterSubmodule(Module *M, SourceLocation ImportLoc) {
     makeModuleVisible(M, ImportLoc);
 }
 
+bool Preprocessor::needModuleMacros() const {
+  // If we're not within a submodule, we never need to create ModuleMacros.
+  if (BuildingSubmoduleStack.empty())
+    return false;
+  // If we are tracking module macro visibility even for textually-included
+  // headers, we need ModuleMacros.
+  if (getLangOpts().ModulesLocalVisibility)
+    return true;
+  // Otherwise, we only need module macros if we're actually compiling a module
+  // interface.
+  return getLangOpts().CompilingModule;
+}
+
 void Preprocessor::LeaveSubmodule() {
   auto &Info = BuildingSubmoduleStack.back();
 
   Module *LeavingMod = Info.M;
   SourceLocation ImportLoc = Info.ImportLoc;
 
+  if (!needModuleMacros() || 
+      (!getLangOpts().ModulesLocalVisibility &&
+       LeavingMod->getTopLevelModuleName() != getLangOpts().CurrentModule)) {
+    // If we don't need module macros, or this is not a module for which we
+    // are tracking macro visibility, don't build any, and preserve the list
+    // of pending names for the surrounding submodule.
+    BuildingSubmoduleStack.pop_back();
+    makeModuleVisible(LeavingMod, ImportLoc);
+    return;
+  }
+
   // Create ModuleMacros for any macros defined in this submodule.
-  for (auto &Macro : CurSubmoduleState->Macros) {
-    auto *II = const_cast<IdentifierInfo*>(Macro.first);
+  llvm::SmallPtrSet<const IdentifierInfo*, 8> VisitedMacros;
+  for (unsigned I = Info.OuterPendingModuleMacroNames;
+       I != PendingModuleMacroNames.size(); ++I) {
+    auto *II = const_cast<IdentifierInfo*>(PendingModuleMacroNames[I]);
+    if (!VisitedMacros.insert(II).second)
+      continue;
+
+    auto MacroIt = CurSubmoduleState->Macros.find(II);
+    if (MacroIt == CurSubmoduleState->Macros.end())
+      continue;
+    auto &Macro = MacroIt->second;
 
     // Find the starting point for the MacroDirective chain in this submodule.
     MacroDirective *OldMD = nullptr;
-    if (getLangOpts().ModulesLocalVisibility) {
+    auto *OldState = Info.OuterSubmoduleState;
+    if (getLangOpts().ModulesLocalVisibility)
+      OldState = &NullSubmoduleState;
+    if (OldState && OldState != CurSubmoduleState) {
       // FIXME: It'd be better to start at the state from when we most recently
       // entered this submodule, but it doesn't really matter.
-      auto &PredefMacros = NullSubmoduleState.Macros;
-      auto PredefMacroIt = PredefMacros.find(Macro.first);
-      if (PredefMacroIt == PredefMacros.end())
+      auto &OldMacros = OldState->Macros;
+      auto OldMacroIt = OldMacros.find(II);
+      if (OldMacroIt == OldMacros.end())
         OldMD = nullptr;
       else
-        OldMD = PredefMacroIt->second.getLatest();
+        OldMD = OldMacroIt->second.getLatest();
     }
 
     // This module may have exported a new macro. If so, create a ModuleMacro
     // representing that fact.
     bool ExplicitlyPublic = false;
-    for (auto *MD = Macro.second.getLatest(); MD != OldMD;
-         MD = MD->getPrevious()) {
+    for (auto *MD = Macro.getLatest(); MD != OldMD; MD = MD->getPrevious()) {
       assert(MD && "broken macro directive chain");
 
-      // Stop on macros defined in other submodules we #included along the way.
-      // There's no point doing this if we're tracking local submodule
-      // visibility, since there can be no such directives in our list.
+      // Stop on macros defined in other submodules of this module that we
+      // #included along the way. There's no point doing this if we're
+      // tracking local submodule visibility, since there can be no such
+      // directives in our list.
       if (!getLangOpts().ModulesLocalVisibility) {
         Module *Mod = getModuleContainingLocation(MD->getLocation());
-        if (Mod != LeavingMod)
+        if (Mod != LeavingMod &&
+            Mod->getTopLevelModule() == LeavingMod->getTopLevelModule())
           break;
       }
 
@@ -732,13 +769,14 @@ void Preprocessor::LeaveSubmodule() {
         bool IsNew;
         // Don't bother creating a module macro if it would represent a #undef
         // that doesn't override anything.
-        if (Def || !Macro.second.getOverriddenMacros().empty())
+        if (Def || !Macro.getOverriddenMacros().empty())
           addModuleMacro(LeavingMod, II, Def,
-                         Macro.second.getOverriddenMacros(), IsNew);
+                         Macro.getOverriddenMacros(), IsNew);
         break;
       }
     }
   }
+  PendingModuleMacroNames.resize(Info.OuterPendingModuleMacroNames);
 
   // FIXME: Before we leave this submodule, we should parse all the other
   // headers within it. Otherwise, we're left with an inconsistent state
diff --git a/contrib/llvm/tools/clang/lib/Lex/PPMacroExpansion.cpp b/contrib/llvm/tools/clang/lib/Lex/PPMacroExpansion.cpp
index 18348df0a39e..2ade6df9456a 100644
--- a/contrib/llvm/tools/clang/lib/Lex/PPMacroExpansion.cpp
+++ b/contrib/llvm/tools/clang/lib/Lex/PPMacroExpansion.cpp
@@ -52,6 +52,13 @@ void Preprocessor::appendMacroDirective(IdentifierInfo *II, MacroDirective *MD){
   StoredMD.setLatest(MD);
   StoredMD.overrideActiveModuleMacros(*this, II);
 
+  if (needModuleMacros()) {
+    // Track that we created a new macro directive, so we know we should
+    // consider building a ModuleMacro for it when we get to the end of
+    // the module.
+    PendingModuleMacroNames.push_back(II);
+  }
+
   // Set up the identifier as having associated macro history.
   II->setHasMacroDefinition(true);
   if (!MD->isDefined() && LeafModuleMacros.find(II) == LeafModuleMacros.end())
@@ -323,18 +330,11 @@ void Preprocessor::RegisterBuiltinMacros() {
   Ident__is_identifier    = RegisterBuiltinMacro(*this, "__is_identifier");
 
   // Modules.
-  if (LangOpts.Modules) {
-    Ident__building_module  = RegisterBuiltinMacro(*this, "__building_module");
-
-    // __MODULE__
-    if (!LangOpts.CurrentModule.empty())
-      Ident__MODULE__ = RegisterBuiltinMacro(*this, "__MODULE__");
-    else
-      Ident__MODULE__ = nullptr;
-  } else {
-    Ident__building_module = nullptr;
+  Ident__building_module  = RegisterBuiltinMacro(*this, "__building_module");
+  if (!LangOpts.CurrentModule.empty())
+    Ident__MODULE__ = RegisterBuiltinMacro(*this, "__MODULE__");
+  else
     Ident__MODULE__ = nullptr;
-  }
 }
 
 /// isTrivialSingleTokenExpansion - Return true if MI, which has a single token
@@ -723,6 +723,7 @@ MacroArgs *Preprocessor::ReadFunctionLikeMacroArgs(Token &MacroName,
   // heap allocations in the common case.
   SmallVector<Token, 64> ArgTokens;
   bool ContainsCodeCompletionTok = false;
+  bool FoundElidedComma = false;
 
   SourceLocation TooManyArgsLoc;
 
@@ -754,17 +755,20 @@ MacroArgs *Preprocessor::ReadFunctionLikeMacroArgs(Token &MacroName,
           // Do not lose the EOF/EOD.  Return it to the client.
           MacroName = Tok;
           return nullptr;
-        } else {
-          // Do not lose the EOF/EOD.
-          Token *Toks = new Token[1];
-          Toks[0] = Tok;
-          EnterTokenStream(Toks, 1, true, true);
-          break;
         }
+        // Do not lose the EOF/EOD.
+        auto Toks = llvm::make_unique<Token[]>(1);
+        Toks[0] = Tok;
+        EnterTokenStream(std::move(Toks), 1, true);
+        break;
       } else if (Tok.is(tok::r_paren)) {
         // If we found the ) token, the macro arg list is done.
         if (NumParens-- == 0) {
           MacroEnd = Tok.getLocation();
+          if (!ArgTokens.empty() &&
+              ArgTokens.back().commaAfterElided()) {
+            FoundElidedComma = true;
+          }
           break;
         }
       } else if (Tok.is(tok::l_paren)) {
@@ -909,7 +913,7 @@ MacroArgs *Preprocessor::ReadFunctionLikeMacroArgs(Token &MacroName,
       // then we have an empty "()" argument empty list.  This is fine, even if
       // the macro expects one argument (the argument is just empty).
       isVarargsElided = MI->isVariadic();
-    } else if (MI->isVariadic() &&
+    } else if ((FoundElidedComma || MI->isVariadic()) &&
                (NumActuals+1 == MinArgsExpected ||  // A(x, ...) -> A(X)
                 (NumActuals == 0 && MinArgsExpected == 2))) {// A(x,...) -> A()
       // Varargs where the named vararg parameter is missing: OK as extension.
@@ -1042,9 +1046,8 @@ static void ComputeDATE_TIME(SourceLocation &DATELoc, SourceLocation &TIMELoc,
 
 /// HasFeature - Return true if we recognize and implement the feature
 /// specified by the identifier as a standard language feature.
-static bool HasFeature(const Preprocessor &PP, const IdentifierInfo *II) {
+static bool HasFeature(const Preprocessor &PP, StringRef Feature) {
   const LangOptions &LangOpts = PP.getLangOpts();
-  StringRef Feature = II->getName();
 
   // Normalize the feature name, __foo__ becomes foo.
   if (Feature.startswith("__") && Feature.endswith("__") && Feature.size() >= 4)
@@ -1062,10 +1065,14 @@ static bool HasFeature(const Preprocessor &PP, const IdentifierInfo *II) {
       .Case("attribute_availability_with_version_underscores", true)
       .Case("attribute_availability_tvos", true)
       .Case("attribute_availability_watchos", true)
+      .Case("attribute_availability_with_strict", true)
+      .Case("attribute_availability_with_replacement", true)
+      .Case("attribute_availability_in_templates", true)
       .Case("attribute_cf_returns_not_retained", true)
       .Case("attribute_cf_returns_retained", true)
       .Case("attribute_cf_returns_on_parameters", true)
       .Case("attribute_deprecated_with_message", true)
+      .Case("attribute_deprecated_with_replacement", true)
       .Case("attribute_ext_vector_type", true)
       .Case("attribute_ns_returns_not_retained", true)
       .Case("attribute_ns_returns_retained", true)
@@ -1086,6 +1093,8 @@ static bool HasFeature(const Preprocessor &PP, const IdentifierInfo *II) {
       .Case("memory_sanitizer", LangOpts.Sanitize.has(SanitizerKind::Memory))
       .Case("thread_sanitizer", LangOpts.Sanitize.has(SanitizerKind::Thread))
       .Case("dataflow_sanitizer", LangOpts.Sanitize.has(SanitizerKind::DataFlow))
+      .Case("efficiency_sanitizer",
+            LangOpts.Sanitize.hasOneOf(SanitizerKind::Efficiency))
       // Objective-C features
       .Case("objc_arr", LangOpts.ObjCAutoRefCount) // FIXME: REMOVE?
       .Case("objc_arc", LangOpts.ObjCAutoRefCount)
@@ -1114,6 +1123,7 @@ static bool HasFeature(const Preprocessor &PP, const IdentifierInfo *II) {
       .Case("objc_bridge_id_on_typedefs", true)
       .Case("objc_generics", LangOpts.ObjC2)
       .Case("objc_generics_variance", LangOpts.ObjC2)
+      .Case("objc_class_property", LangOpts.ObjC2)
       // C11 features
       .Case("c_alignas", LangOpts.C11)
       .Case("c_alignof", LangOpts.C11)
@@ -1177,6 +1187,8 @@ static bool HasFeature(const Preprocessor &PP, const IdentifierInfo *II) {
       // FIXME: Should this be __has_feature or __has_extension?
       //.Case("raw_invocation_type", LangOpts.CPlusPlus)
       // Type traits
+      // N.B. Additional type traits should not be added to the following list.
+      // Instead, they should be detected by has_extension.
       .Case("has_nothrow_assign", LangOpts.CPlusPlus)
       .Case("has_nothrow_copy", LangOpts.CPlusPlus)
       .Case("has_nothrow_constructor", LangOpts.CPlusPlus)
@@ -1197,7 +1209,7 @@ static bool HasFeature(const Preprocessor &PP, const IdentifierInfo *II) {
       .Case("is_standard_layout", LangOpts.CPlusPlus)
       .Case("is_pod", LangOpts.CPlusPlus)
       .Case("is_polymorphic", LangOpts.CPlusPlus)
-      .Case("is_sealed", LangOpts.MicrosoftExt)
+      .Case("is_sealed", LangOpts.CPlusPlus && LangOpts.MicrosoftExt)
       .Case("is_trivial", LangOpts.CPlusPlus)
       .Case("is_trivially_assignable", LangOpts.CPlusPlus)
       .Case("is_trivially_constructible", LangOpts.CPlusPlus)
@@ -1213,8 +1225,8 @@ static bool HasFeature(const Preprocessor &PP, const IdentifierInfo *II) {
 /// HasExtension - Return true if we recognize and implement the feature
 /// specified by the identifier, either as an extension or a standard language
 /// feature.
-static bool HasExtension(const Preprocessor &PP, const IdentifierInfo *II) {
-  if (HasFeature(PP, II))
+static bool HasExtension(const Preprocessor &PP, StringRef Extension) {
+  if (HasFeature(PP, Extension))
     return true;
 
   // If the use of an extension results in an error diagnostic, extensions are
@@ -1224,7 +1236,6 @@ static bool HasExtension(const Preprocessor &PP, const IdentifierInfo *II) {
     return false;
 
   const LangOptions &LangOpts = PP.getLangOpts();
-  StringRef Extension = II->getName();
 
   // Normalize the extension name, __foo__ becomes foo.
   if (Extension.startswith("__") && Extension.endswith("__") &&
@@ -1408,47 +1419,120 @@ static bool EvaluateHasIncludeNext(Token &Tok,
   return EvaluateHasIncludeCommon(Tok, II, PP, Lookup, LookupFromFile);
 }
 
-/// \brief Process __building_module(identifier) expression.
-/// \returns true if we are building the named module, false otherwise.
-static bool EvaluateBuildingModule(Token &Tok,
-                                   IdentifierInfo *II, Preprocessor &PP) {
-  // Get '('.
-  PP.LexNonComment(Tok);
-
-  // Ensure we have a '('.
+/// \brief Process single-argument builtin feature-like macros that return
+/// integer values.
+static void EvaluateFeatureLikeBuiltinMacro(llvm::raw_svector_ostream& OS,
+                                            Token &Tok, IdentifierInfo *II,
+                                            Preprocessor &PP,
+                                            llvm::function_ref<
+                                              int(Token &Tok,
+                                                  bool &HasLexedNextTok)> Op) {
+  // Parse the initial '('.
+  PP.LexUnexpandedToken(Tok);
   if (Tok.isNot(tok::l_paren)) {
     PP.Diag(Tok.getLocation(), diag::err_pp_expected_after) << II
                                                             << tok::l_paren;
-    return false;
+
+    // Provide a dummy '0' value on output stream to elide further errors.
+    if (!Tok.isOneOf(tok::eof, tok::eod)) {
+      OS << 0;
+      Tok.setKind(tok::numeric_constant);
+    }
+    return;
   }
 
-  // Save '(' location for possible missing ')' message.
+  unsigned ParenDepth = 1;
   SourceLocation LParenLoc = Tok.getLocation();
+  llvm::Optional<int> Result;
+
+  Token ResultTok;
+  bool SuppressDiagnostic = false;
+  while (true) {
+    // Parse next token.
+    PP.LexUnexpandedToken(Tok);
+
+already_lexed:
+    switch (Tok.getKind()) {
+      case tok::eof:
+      case tok::eod:
+        // Don't provide even a dummy value if the eod or eof marker is
+        // reached.  Simply provide a diagnostic.
+        PP.Diag(Tok.getLocation(), diag::err_unterm_macro_invoc);
+        return;
 
-  // Get the module name.
-  PP.LexNonComment(Tok);
+      case tok::comma:
+        if (!SuppressDiagnostic) {
+          PP.Diag(Tok.getLocation(), diag::err_too_many_args_in_macro_invoc);
+          SuppressDiagnostic = true;
+        }
+        continue;
 
-  // Ensure that we have an identifier.
-  if (Tok.isNot(tok::identifier)) {
-    PP.Diag(Tok.getLocation(), diag::err_expected_id_building_module);
-    return false;
-  }
+      case tok::l_paren:
+        ++ParenDepth;
+        if (Result.hasValue())
+          break;
+        if (!SuppressDiagnostic) {
+          PP.Diag(Tok.getLocation(), diag::err_pp_nested_paren) << II;
+          SuppressDiagnostic = true;
+        }
+        continue;
 
-  bool Result
-    = Tok.getIdentifierInfo()->getName() == PP.getLangOpts().CurrentModule;
+      case tok::r_paren:
+        if (--ParenDepth > 0)
+          continue;
+
+        // The last ')' has been reached; return the value if one found or
+        // a diagnostic and a dummy value.
+        if (Result.hasValue())
+          OS << Result.getValue();
+        else {
+          OS << 0;
+          if (!SuppressDiagnostic)
+            PP.Diag(Tok.getLocation(), diag::err_too_few_args_in_macro_invoc);
+        }
+        Tok.setKind(tok::numeric_constant);
+        return;
 
-  // Get ')'.
-  PP.LexNonComment(Tok);
+      default: {
+        // Parse the macro argument, if one not found so far.
+        if (Result.hasValue())
+          break;
 
-  // Ensure we have a trailing ).
-  if (Tok.isNot(tok::r_paren)) {
-    PP.Diag(Tok.getLocation(), diag::err_pp_expected_after) << II
-                                                            << tok::r_paren;
-    PP.Diag(LParenLoc, diag::note_matching) << tok::l_paren;
-    return false;
+        bool HasLexedNextToken = false;
+        Result = Op(Tok, HasLexedNextToken);
+        ResultTok = Tok;
+        if (HasLexedNextToken)
+          goto already_lexed;
+        continue;
+      }
+    }
+
+    // Diagnose missing ')'.
+    if (!SuppressDiagnostic) {
+      if (auto Diag = PP.Diag(Tok.getLocation(), diag::err_pp_expected_after)) {
+        if (IdentifierInfo *LastII = ResultTok.getIdentifierInfo())
+          Diag << LastII;
+        else
+          Diag << ResultTok.getKind();
+        Diag << tok::r_paren << ResultTok.getLocation();
+      }
+      PP.Diag(LParenLoc, diag::note_matching) << tok::l_paren;
+      SuppressDiagnostic = true;
+    }
   }
+}
 
-  return Result;
+/// \brief Helper function to return the IdentifierInfo structure of a Token
+/// or generate a diagnostic if none available.
+static IdentifierInfo *ExpectFeatureIdentifierInfo(Token &Tok,
+                                                   Preprocessor &PP,
+                                                   signed DiagID) {
+  IdentifierInfo *II;
+  if (!Tok.isAnnotation() && (II = Tok.getIdentifierInfo()))
+    return II;
+
+  PP.Diag(Tok.getLocation(), DiagID);
+  return nullptr;
 }
 
 /// ExpandBuiltinMacro - If an identifier token is read that is to be expanded
@@ -1584,84 +1668,82 @@ void Preprocessor::ExpandBuiltinMacro(Token &Tok) {
     // __COUNTER__ expands to a simple numeric value.
     OS << CounterValue++;
     Tok.setKind(tok::numeric_constant);
-  } else if (II == Ident__has_feature   ||
-             II == Ident__has_extension ||
-             II == Ident__has_builtin   ||
-             II == Ident__is_identifier ||
-             II == Ident__has_attribute ||
-             II == Ident__has_declspec  ||
-             II == Ident__has_cpp_attribute) {
-    // The argument to these builtins should be a parenthesized identifier.
-    SourceLocation StartLoc = Tok.getLocation();
-
-    bool IsValid = false;
-    IdentifierInfo *FeatureII = nullptr;
-    IdentifierInfo *ScopeII = nullptr;
-
-    // Read the '('.
-    LexUnexpandedToken(Tok);
-    if (Tok.is(tok::l_paren)) {
-      // Read the identifier
-      LexUnexpandedToken(Tok);
-      if ((FeatureII = Tok.getIdentifierInfo())) {
-        // If we're checking __has_cpp_attribute, it is possible to receive a
-        // scope token. Read the "::", if it's available.
+  } else if (II == Ident__has_feature) {
+    EvaluateFeatureLikeBuiltinMacro(OS, Tok, II, *this,
+      [this](Token &Tok, bool &HasLexedNextToken) -> int {
+        IdentifierInfo *II = ExpectFeatureIdentifierInfo(Tok, *this,
+                                           diag::err_feature_check_malformed);
+        return II && HasFeature(*this, II->getName());
+      });
+  } else if (II == Ident__has_extension) {
+    EvaluateFeatureLikeBuiltinMacro(OS, Tok, II, *this,
+      [this](Token &Tok, bool &HasLexedNextToken) -> int {
+        IdentifierInfo *II = ExpectFeatureIdentifierInfo(Tok, *this,
+                                           diag::err_feature_check_malformed);
+        return II && HasExtension(*this, II->getName());
+      });
+  } else if (II == Ident__has_builtin) {
+    EvaluateFeatureLikeBuiltinMacro(OS, Tok, II, *this,
+      [this](Token &Tok, bool &HasLexedNextToken) -> int {
+        IdentifierInfo *II = ExpectFeatureIdentifierInfo(Tok, *this,
+                                           diag::err_feature_check_malformed);
+        if (!II)
+          return false;
+        else if (II->getBuiltinID() != 0)
+          return true;
+        else {
+          const LangOptions &LangOpts = getLangOpts();
+          return llvm::StringSwitch<bool>(II->getName())
+                      .Case("__make_integer_seq", LangOpts.CPlusPlus)
+                      .Case("__type_pack_element", LangOpts.CPlusPlus)
+                      .Default(false);
+        }
+      });
+  } else if (II == Ident__is_identifier) {
+    EvaluateFeatureLikeBuiltinMacro(OS, Tok, II, *this,
+      [](Token &Tok, bool &HasLexedNextToken) -> int {
+        return Tok.is(tok::identifier);
+      });
+  } else if (II == Ident__has_attribute) {
+    EvaluateFeatureLikeBuiltinMacro(OS, Tok, II, *this,
+      [this](Token &Tok, bool &HasLexedNextToken) -> int {
+        IdentifierInfo *II = ExpectFeatureIdentifierInfo(Tok, *this,
+                                           diag::err_feature_check_malformed);
+        return II ? hasAttribute(AttrSyntax::GNU, nullptr, II,
+                                 getTargetInfo(), getLangOpts()) : 0;
+      });
+  } else if (II == Ident__has_declspec) {
+    EvaluateFeatureLikeBuiltinMacro(OS, Tok, II, *this,
+      [this](Token &Tok, bool &HasLexedNextToken) -> int {
+        IdentifierInfo *II = ExpectFeatureIdentifierInfo(Tok, *this,
+                                           diag::err_feature_check_malformed);
+        return II ? hasAttribute(AttrSyntax::Declspec, nullptr, II,
+                                 getTargetInfo(), getLangOpts()) : 0;
+      });
+  } else if (II == Ident__has_cpp_attribute) {
+    EvaluateFeatureLikeBuiltinMacro(OS, Tok, II, *this,
+      [this](Token &Tok, bool &HasLexedNextToken) -> int {
+        IdentifierInfo *ScopeII = nullptr;
+        IdentifierInfo *II = ExpectFeatureIdentifierInfo(Tok, *this,
+                                           diag::err_feature_check_malformed);
+        if (!II)
+          return false;
+
+        // It is possible to receive a scope token.  Read the "::", if it is
+        // available, and the subsequent identifier.
         LexUnexpandedToken(Tok);
-        bool IsScopeValid = true;
-        if (II == Ident__has_cpp_attribute && Tok.is(tok::coloncolon)) {
+        if (Tok.isNot(tok::coloncolon))
+          HasLexedNextToken = true;
+        else {
+          ScopeII = II;
           LexUnexpandedToken(Tok);
-          // The first thing we read was not the feature, it was the scope.
-          ScopeII = FeatureII;
-          if ((FeatureII = Tok.getIdentifierInfo()))
-            LexUnexpandedToken(Tok);
-          else
-            IsScopeValid = false;          
+          II = ExpectFeatureIdentifierInfo(Tok, *this,
+                                           diag::err_feature_check_malformed);
         }
-        // Read the closing paren.
-        if (IsScopeValid && Tok.is(tok::r_paren))
-          IsValid = true;
-      }
-      // Eat tokens until ')'.
-      while (Tok.isNot(tok::r_paren) && Tok.isNot(tok::eod) &&
-             Tok.isNot(tok::eof))
-        LexUnexpandedToken(Tok);
-    }
 
-    int Value = 0;
-    if (!IsValid)
-      Diag(StartLoc, diag::err_feature_check_malformed);
-    else if (II == Ident__is_identifier)
-      Value = FeatureII->getTokenID() == tok::identifier;
-    else if (II == Ident__has_builtin) {
-      // Check for a builtin is trivial.
-      if (FeatureII->getBuiltinID() != 0) {
-        Value = true;
-      } else {
-        StringRef Feature = FeatureII->getName();
-        Value = llvm::StringSwitch<bool>(Feature)
-                    .Case("__make_integer_seq", getLangOpts().CPlusPlus)
-                    .Default(false);
-      }
-    } else if (II == Ident__has_attribute)
-      Value = hasAttribute(AttrSyntax::GNU, nullptr, FeatureII,
-                           getTargetInfo(), getLangOpts());
-    else if (II == Ident__has_cpp_attribute)
-      Value = hasAttribute(AttrSyntax::CXX, ScopeII, FeatureII,
-                           getTargetInfo(), getLangOpts());
-    else if (II == Ident__has_declspec)
-      Value = hasAttribute(AttrSyntax::Declspec, nullptr, FeatureII,
-                           getTargetInfo(), getLangOpts());
-    else if (II == Ident__has_extension)
-      Value = HasExtension(*this, FeatureII);
-    else {
-      assert(II == Ident__has_feature && "Must be feature check");
-      Value = HasFeature(*this, FeatureII);
-    }
-
-    if (!IsValid)
-      return;
-    OS << Value;
-    Tok.setKind(tok::numeric_constant);
+        return II ? hasAttribute(AttrSyntax::CXX, ScopeII, II,
+                                 getTargetInfo(), getLangOpts()) : 0;
+      });
   } else if (II == Ident__has_include ||
              II == Ident__has_include_next) {
     // The argument to these two builtins should be a parenthesized
@@ -1679,64 +1761,44 @@ void Preprocessor::ExpandBuiltinMacro(Token &Tok) {
     Tok.setKind(tok::numeric_constant);
   } else if (II == Ident__has_warning) {
     // The argument should be a parenthesized string literal.
-    // The argument to these builtins should be a parenthesized identifier.
-    SourceLocation StartLoc = Tok.getLocation();    
-    bool IsValid = false;
-    bool Value = false;
-    // Read the '('.
-    LexUnexpandedToken(Tok);
-    do {
-      if (Tok.isNot(tok::l_paren)) {
-        Diag(StartLoc, diag::err_warning_check_malformed);
-        break;
-      }
-
-      LexUnexpandedToken(Tok);
-      std::string WarningName;
-      SourceLocation StrStartLoc = Tok.getLocation();
-      if (!FinishLexStringLiteral(Tok, WarningName, "'__has_warning'",
-                                  /*MacroExpansion=*/false)) {
-        // Eat tokens until ')'.
-        while (Tok.isNot(tok::r_paren) && Tok.isNot(tok::eod) &&
-               Tok.isNot(tok::eof))
-          LexUnexpandedToken(Tok);
-        break;
-      }
-
-      // Is the end a ')'?
-      if (!(IsValid = Tok.is(tok::r_paren))) {
-        Diag(StartLoc, diag::err_warning_check_malformed);
-        break;
-      }
-
-      // FIXME: Should we accept "-R..." flags here, or should that be handled
-      // by a separate __has_remark?
-      if (WarningName.size() < 3 || WarningName[0] != '-' ||
-          WarningName[1] != 'W') {
-        Diag(StrStartLoc, diag::warn_has_warning_invalid_option);
-        break;
-      }
+    EvaluateFeatureLikeBuiltinMacro(OS, Tok, II, *this,
+      [this](Token &Tok, bool &HasLexedNextToken) -> int {
+        std::string WarningName;
+        SourceLocation StrStartLoc = Tok.getLocation();
+
+        HasLexedNextToken = Tok.is(tok::string_literal);
+        if (!FinishLexStringLiteral(Tok, WarningName, "'__has_warning'",
+                                    /*MacroExpansion=*/false))
+          return false;
+
+        // FIXME: Should we accept "-R..." flags here, or should that be
+        // handled by a separate __has_remark?
+        if (WarningName.size() < 3 || WarningName[0] != '-' ||
+            WarningName[1] != 'W') {
+          Diag(StrStartLoc, diag::warn_has_warning_invalid_option);
+          return false;
+        }
 
-      // Finally, check if the warning flags maps to a diagnostic group.
-      // We construct a SmallVector here to talk to getDiagnosticIDs().
-      // Although we don't use the result, this isn't a hot path, and not
-      // worth special casing.
-      SmallVector<diag::kind, 10> Diags;
-      Value = !getDiagnostics().getDiagnosticIDs()->
-        getDiagnosticsInGroup(diag::Flavor::WarningOrError,
-                              WarningName.substr(2), Diags);
-    } while (false);
-
-    if (!IsValid)
-      return;
-    OS << (int)Value;
-    Tok.setKind(tok::numeric_constant);
+        // Finally, check if the warning flags maps to a diagnostic group.
+        // We construct a SmallVector here to talk to getDiagnosticIDs().
+        // Although we don't use the result, this isn't a hot path, and not
+        // worth special casing.
+        SmallVector<diag::kind, 10> Diags;
+        return !getDiagnostics().getDiagnosticIDs()->
+                getDiagnosticsInGroup(diag::Flavor::WarningOrError,
+                                      WarningName.substr(2), Diags);
+      });
   } else if (II == Ident__building_module) {
     // The argument to this builtin should be an identifier. The
     // builtin evaluates to 1 when that identifier names the module we are
     // currently building.
-    OS << (int)EvaluateBuildingModule(Tok, II, *this);
-    Tok.setKind(tok::numeric_constant);
+    EvaluateFeatureLikeBuiltinMacro(OS, Tok, II, *this,
+      [this](Token &Tok, bool &HasLexedNextToken) -> int {
+        IdentifierInfo *II = ExpectFeatureIdentifierInfo(Tok, *this,
+                                       diag::err_expected_id_building_module);
+        return getLangOpts().CompilingModule && II &&
+               (II->getName() == getLangOpts().CurrentModule);
+      });
   } else if (II == Ident__MODULE__) {
     // The current module as an identifier.
     OS << getLangOpts().CurrentModule;
diff --git a/contrib/llvm/tools/clang/lib/Lex/Pragma.cpp b/contrib/llvm/tools/clang/lib/Lex/Pragma.cpp
index afb41a240776..3bdd31b26ff8 100644
--- a/contrib/llvm/tools/clang/lib/Lex/Pragma.cpp
+++ b/contrib/llvm/tools/clang/lib/Lex/Pragma.cpp
@@ -354,7 +354,9 @@ void Preprocessor::HandleMicrosoft__pragma(Token &Tok) {
 /// HandlePragmaOnce - Handle \#pragma once.  OnceTok is the 'once'.
 ///
 void Preprocessor::HandlePragmaOnce(Token &OnceTok) {
-  if (isInPrimaryFile()) {
+  // Don't honor the 'once' when handling the primary source file, unless
+  // this is a prefix to a TU, which indicates we're generating a PCH file.
+  if (isInPrimaryFile() && TUKind != TU_Prefix) {
     Diag(OnceTok, diag::pp_pragma_once_in_main_file);
     return;
   }
@@ -938,13 +940,13 @@ struct PragmaDebugHandler : public PragmaHandler {
     }
 
     SourceLocation NameLoc = Tok.getLocation();
-    Token *Toks = PP.getPreprocessorAllocator().Allocate<Token>(1);
-    Toks->startToken();
-    Toks->setKind(tok::annot_pragma_captured);
-    Toks->setLocation(NameLoc);
+    MutableArrayRef<Token> Toks(
+        PP.getPreprocessorAllocator().Allocate<Token>(1), 1);
+    Toks[0].startToken();
+    Toks[0].setKind(tok::annot_pragma_captured);
+    Toks[0].setLocation(NameLoc);
 
-    PP.EnterTokenStream(Toks, 1, /*DisableMacroExpansion=*/true,
-                        /*OwnsTokens=*/false);
+    PP.EnterTokenStream(Toks, /*DisableMacroExpansion=*/true);
   }
 
 // Disable MSVC warning about runtime stack overflow.
@@ -1024,10 +1026,19 @@ public:
       return;
     }
 
-    if (PP.getDiagnostics().setSeverityForGroup(
-            WarningName[1] == 'W' ? diag::Flavor::WarningOrError
-                                  : diag::Flavor::Remark,
-            WarningName.substr(2), SV, DiagLoc))
+    diag::Flavor Flavor = WarningName[1] == 'W' ? diag::Flavor::WarningOrError
+                                                : diag::Flavor::Remark;
+    StringRef Group = StringRef(WarningName).substr(2);
+    bool unknownDiag = false;
+    if (Group == "everything") {
+      // Special handling for pragma clang diagnostic ... "-Weverything".
+      // There is no formal group named "everything", so there has to be a
+      // special case for it.
+      PP.getDiagnostics().setSeverityForAll(Flavor, SV, DiagLoc);
+    } else
+      unknownDiag = PP.getDiagnostics().setSeverityForGroup(Flavor, Group, SV,
+                                                            DiagLoc);
+    if (unknownDiag)
       PP.Diag(StringLoc, diag::warn_pragma_diagnostic_unknown_warning)
         << WarningName;
     else if (Callbacks)
@@ -1481,6 +1492,13 @@ void Preprocessor::RegisterBuiltinPragmas() {
     AddPragmaHandler(new PragmaRegionHandler("region"));
     AddPragmaHandler(new PragmaRegionHandler("endregion"));
   }
+
+  // Pragmas added by plugins
+  for (PragmaHandlerRegistry::iterator it = PragmaHandlerRegistry::begin(),
+                                       ie = PragmaHandlerRegistry::end();
+       it != ie; ++it) {
+    AddPragmaHandler(it->instantiate().release());
+  }
 }
 
 /// Ignore all pragmas, useful for modes such as -Eonly which would otherwise
diff --git a/contrib/llvm/tools/clang/lib/Lex/Preprocessor.cpp b/contrib/llvm/tools/clang/lib/Lex/Preprocessor.cpp
index 142d9ce09049..78179dd7988d 100644
--- a/contrib/llvm/tools/clang/lib/Lex/Preprocessor.cpp
+++ b/contrib/llvm/tools/clang/lib/Lex/Preprocessor.cpp
@@ -51,8 +51,11 @@
 #include "llvm/Support/ConvertUTF.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/raw_ostream.h"
+#include <utility>
 using namespace clang;
 
+template class llvm::Registry<clang::PragmaHandler>;
+
 //===----------------------------------------------------------------------===//
 ExternalPreprocessorSource::~ExternalPreprocessorSource() { }
 
@@ -62,7 +65,7 @@ Preprocessor::Preprocessor(IntrusiveRefCntPtr<PreprocessorOptions> PPOpts,
                            ModuleLoader &TheModuleLoader,
                            IdentifierInfoLookup *IILookup, bool OwnsHeaders,
                            TranslationUnitKind TUKind)
-    : PPOpts(PPOpts), Diags(&diags), LangOpts(opts), Target(nullptr),
+    : PPOpts(std::move(PPOpts)), Diags(&diags), LangOpts(opts), Target(nullptr),
       AuxTarget(nullptr), FileMgr(Headers.getFileMgr()), SourceMgr(SM),
       ScratchBuf(new ScratchBuffer(SourceMgr)), HeaderInfo(Headers),
       TheModuleLoader(TheModuleLoader), ExternalSource(nullptr),
@@ -477,7 +480,7 @@ void Preprocessor::CreateString(StringRef Str, Token &Tok,
 }
 
 Module *Preprocessor::getCurrentModule() {
-  if (getLangOpts().CurrentModule.empty())
+  if (!getLangOpts().CompilingModule)
     return nullptr;
 
   return getHeaderSearchInfo().lookupModule(getLangOpts().CurrentModule);
diff --git a/contrib/llvm/tools/clang/lib/Lex/TokenLexer.cpp b/contrib/llvm/tools/clang/lib/Lex/TokenLexer.cpp
index c42966928e52..994bae632aec 100644
--- a/contrib/llvm/tools/clang/lib/Lex/TokenLexer.cpp
+++ b/contrib/llvm/tools/clang/lib/Lex/TokenLexer.cpp
@@ -18,8 +18,8 @@
 #include "clang/Lex/MacroInfo.h"
 #include "clang/Lex/Preprocessor.h"
 #include "llvm/ADT/SmallString.h"
-using namespace clang;
 
+using namespace clang;
 
 /// Create a TokenLexer for the specified macro with the specified actual
 /// arguments.  Note that this ctor takes ownership of the ActualArgs pointer.
@@ -76,8 +76,6 @@ void TokenLexer::Init(Token &Tok, SourceLocation ELEnd, MacroInfo *MI,
   Macro->DisableMacro();
 }
 
-
-
 /// Create a TokenLexer for the specified token stream.  This does not
 /// take ownership of the specified token vector.
 void TokenLexer::Init(const Token *TokArray, unsigned NumToks,
@@ -107,7 +105,6 @@ void TokenLexer::Init(const Token *TokArray, unsigned NumToks,
   }
 }
 
-
 void TokenLexer::destroy() {
   // If this was a function-like macro that actually uses its arguments, delete
   // the expanded tokens.
@@ -154,12 +151,17 @@ bool TokenLexer::MaybeRemoveCommaBeforeVaArgs(
   // Remove the comma.
   ResultToks.pop_back();
 
-  // If the comma was right after another paste (e.g. "X##,##__VA_ARGS__"),
-  // then removal of the comma should produce a placemarker token (in C99
-  // terms) which we model by popping off the previous ##, giving us a plain
-  // "X" when __VA_ARGS__ is empty.
-  if (!ResultToks.empty() && ResultToks.back().is(tok::hashhash))
-    ResultToks.pop_back();
+  if (!ResultToks.empty()) {
+    // If the comma was right after another paste (e.g. "X##,##__VA_ARGS__"),
+    // then removal of the comma should produce a placemarker token (in C99
+    // terms) which we model by popping off the previous ##, giving us a plain
+    // "X" when __VA_ARGS__ is empty.
+    if (ResultToks.back().is(tok::hashhash))
+      ResultToks.pop_back();
+
+    // Remember that this comma was elided.
+    ResultToks.back().setFlag(Token::CommaAfterElided);
+  }
 
   // Never add a space, even if the comma, ##, or arg had a space.
   NextTokGetsSpace = false;
@@ -169,7 +171,6 @@ bool TokenLexer::MaybeRemoveCommaBeforeVaArgs(
 /// Expand the arguments of a function-like macro so that we can quickly
 /// return preexpanded tokens from Tokens.
 void TokenLexer::ExpandFunctionArguments() {
-
   SmallVector<Token, 128> ResultToks;
 
   // Loop through 'Tokens', expanding them into ResultToks.  Keep
@@ -305,6 +306,7 @@ void TokenLexer::ExpandFunctionArguments() {
         // identifier.
         ResultToks[FirstResult].setFlagValue(Token::LeadingSpace,
                                              NextTokGetsSpace);
+        ResultToks[FirstResult].setFlagValue(Token::StartOfLine, false);
         NextTokGetsSpace = false;
       }
       continue;
@@ -388,8 +390,6 @@ void TokenLexer::ExpandFunctionArguments() {
       MaybeRemoveCommaBeforeVaArgs(ResultToks,
                                    /*HasPasteOperator=*/true,
                                    Macro, ArgNo, PP);
-
-    continue;
   }
 
   // If anything changed, install this as the new Tokens list.
@@ -794,6 +794,10 @@ static void updateConsecutiveMacroArgTokens(SourceManager &SM,
     // "characters" away.
     if (RelOffs < 0 || RelOffs > 50)
       break;
+
+    if (CurLoc.isMacroID() && !SM.isWrittenInSameFile(CurLoc, NextLoc))
+      break; // Token from a different macro.
+
     CurLoc = NextLoc;
   }
 
diff --git a/contrib/llvm/tools/clang/lib/Parse/ParseAST.cpp b/contrib/llvm/tools/clang/lib/Parse/ParseAST.cpp
index ccf947984945..1fb57a08c433 100644
--- a/contrib/llvm/tools/clang/lib/Parse/ParseAST.cpp
+++ b/contrib/llvm/tools/clang/lib/Parse/ParseAST.cpp
@@ -19,7 +19,6 @@
 #include "clang/Parse/ParseDiagnostic.h"
 #include "clang/Parse/Parser.h"
 #include "clang/Sema/CodeCompleteConsumer.h"
-#include "clang/Sema/ExternalSemaSource.h"
 #include "clang/Sema/Sema.h"
 #include "clang/Sema/SemaConsumer.h"
 #include "llvm/Support/CrashRecoveryContext.h"
diff --git a/contrib/llvm/tools/clang/lib/Parse/ParseCXXInlineMethods.cpp b/contrib/llvm/tools/clang/lib/Parse/ParseCXXInlineMethods.cpp
index e536644d5bf6..39fcc8270419 100644
--- a/contrib/llvm/tools/clang/lib/Parse/ParseCXXInlineMethods.cpp
+++ b/contrib/llvm/tools/clang/lib/Parse/ParseCXXInlineMethods.cpp
@@ -52,7 +52,8 @@ NamedDecl *Parser::ParseCXXInlineMethodDef(AccessSpecifier AS,
     }
   }
 
-  HandleMemberFunctionDeclDelays(D, FnD);
+  if (FnD)
+    HandleMemberFunctionDeclDelays(D, FnD);
 
   D.complete(FnD);
 
@@ -100,6 +101,12 @@ NamedDecl *Parser::ParseCXXInlineMethodDef(AccessSpecifier AS,
     return FnD;
   }
 
+  if (SkipFunctionBodies && (!FnD || Actions.canSkipFunctionBody(FnD)) &&
+      trySkippingFunctionBody()) {
+    Actions.ActOnSkippedFunctionBody(FnD);
+    return FnD;
+  }
+
   // In delayed template parsing mode, if we are within a class template
   // or if we are about to parse function member template then consume
   // the tokens and store them for parsing at the end of the translation unit.
@@ -325,7 +332,7 @@ void Parser::ParseLexedMethodDeclaration(LateParsedMethodDeclaration &LM) {
 
       // Parse the default argument from its saved token stream.
       Toks->push_back(Tok); // So that the current token doesn't get lost
-      PP.EnterTokenStream(&Toks->front(), Toks->size(), true, false);
+      PP.EnterTokenStream(*Toks, true);
 
       // Consume the previously-pushed token.
       ConsumeAnyToken();
@@ -380,7 +387,7 @@ void Parser::ParseLexedMethodDeclaration(LateParsedMethodDeclaration &LM) {
       assert (!OldParam->hasUnparsedDefaultArg());
       if (OldParam->hasUninstantiatedDefaultArg())
         Param->setUninstantiatedDefaultArg(
-                                      Param->getUninstantiatedDefaultArg());
+            OldParam->getUninstantiatedDefaultArg());
       else
         Param->setDefaultArg(OldParam->getInit());
     }
@@ -399,7 +406,7 @@ void Parser::ParseLexedMethodDeclaration(LateParsedMethodDeclaration &LM) {
 
     // Parse the default argument from its saved token stream.
     Toks->push_back(Tok); // So that the current token doesn't get lost
-    PP.EnterTokenStream(&Toks->front(), Toks->size(), true, false);
+    PP.EnterTokenStream(*Toks, true);
 
     // Consume the previously-pushed token.
     ConsumeAnyToken();
@@ -504,7 +511,7 @@ void Parser::ParseLexedMethodDef(LexedMethod &LM) {
   // Append the current token at the end of the new token stream so that it
   // doesn't get lost.
   LM.Toks.push_back(Tok);
-  PP.EnterTokenStream(LM.Toks.data(), LM.Toks.size(), true, false);
+  PP.EnterTokenStream(LM.Toks, true);
 
   // Consume the previously pushed token.
   ConsumeAnyToken(/*ConsumeCodeCompletionTok=*/true);
@@ -563,8 +570,10 @@ void Parser::ParseLexedMethodDef(LexedMethod &LM) {
   if (Tok.is(tok::eof) && Tok.getEofData() == LM.D)
     ConsumeAnyToken();
 
-  if (CXXMethodDecl *MD = dyn_cast_or_null<CXXMethodDecl>(LM.D))
-    Actions.ActOnFinishInlineMethodDef(MD);
+  if (auto *FD = dyn_cast_or_null<FunctionDecl>(LM.D))
+    if (isa<CXXMethodDecl>(FD) ||
+        FD->isInIdentifierNamespace(Decl::IDNS_OrdinaryFriend))
+      Actions.ActOnFinishInlineFunctionDef(FD);
 }
 
 /// ParseLexedMemberInitializers - We finished parsing the member specification
@@ -617,7 +626,7 @@ void Parser::ParseLexedMemberInitializer(LateParsedMemberInitializer &MI) {
   // Append the current token at the end of the new token stream so that it
   // doesn't get lost.
   MI.Toks.push_back(Tok);
-  PP.EnterTokenStream(MI.Toks.data(), MI.Toks.size(), true, false);
+  PP.EnterTokenStream(MI.Toks, true);
 
   // Consume the previously pushed token.
   ConsumeAnyToken(/*ConsumeCodeCompletionTok=*/true);
@@ -971,10 +980,10 @@ public:
     // Put back the original tokens.
     Self.SkipUntil(EndKind, StopAtSemi | StopBeforeMatch);
     if (Toks.size()) {
-      Token *Buffer = new Token[Toks.size()];
-      std::copy(Toks.begin() + 1, Toks.end(), Buffer);
+      auto Buffer = llvm::make_unique<Token[]>(Toks.size());
+      std::copy(Toks.begin() + 1, Toks.end(), Buffer.get());
       Buffer[Toks.size() - 1] = Self.Tok;
-      Self.PP.EnterTokenStream(Buffer, Toks.size(), true, /*Owned*/true);
+      Self.PP.EnterTokenStream(std::move(Buffer), Toks.size(), true);
 
       Self.Tok = Toks.front();
     }
diff --git a/contrib/llvm/tools/clang/lib/Parse/ParseDecl.cpp b/contrib/llvm/tools/clang/lib/Parse/ParseDecl.cpp
index c64b97d01b9a..45e1c3e465ce 100644
--- a/contrib/llvm/tools/clang/lib/Parse/ParseDecl.cpp
+++ b/contrib/llvm/tools/clang/lib/Parse/ParseDecl.cpp
@@ -28,6 +28,7 @@
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringSwitch.h"
+#include "llvm/Support/ScopedPrinter.h"
 
 using namespace clang;
 
@@ -609,7 +610,6 @@ void Parser::ParseMicrosoftTypeAttributes(ParsedAttributes &attrs) {
     case tok::kw___ptr64:
     case tok::kw___w64:
     case tok::kw___ptr32:
-    case tok::kw___unaligned:
     case tok::kw___sptr:
     case tok::kw___uptr: {
       IdentifierInfo *AttrName = Tok.getIdentifierInfo();
@@ -670,7 +670,7 @@ void Parser::ParseBorlandTypeAttributes(ParsedAttributes &attrs) {
   }
 }
 
-void Parser::ParseOpenCLAttributes(ParsedAttributes &attrs) {
+void Parser::ParseOpenCLKernelAttributes(ParsedAttributes &attrs) {
   // Treat these like attributes
   while (Tok.is(tok::kw___kernel)) {
     IdentifierInfo *AttrName = Tok.getIdentifierInfo();
@@ -720,7 +720,7 @@ static bool VersionNumberSeparator(const char Separator) {
 ///   simple-integer ',' simple-integer
 ///   simple-integer ',' simple-integer ',' simple-integer
 VersionTuple Parser::ParseVersionTuple(SourceRange &Range) {
-  Range = Tok.getLocation();
+  Range = SourceRange(Tok.getLocation(), Tok.getEndLoc());
 
   if (!Tok.is(tok::numeric_constant)) {
     Diag(Tok, diag::err_expected_version);
@@ -833,11 +833,15 @@ VersionTuple Parser::ParseVersionTuple(SourceRange &Range) {
 /// \brief Parse the contents of the "availability" attribute.
 ///
 /// availability-attribute:
-///   'availability' '(' platform ',' version-arg-list, opt-message')'
+///   'availability' '(' platform ',' opt-strict version-arg-list,
+///                      opt-replacement, opt-message')'
 ///
 /// platform:
 ///   identifier
 ///
+/// opt-strict:
+///   'strict' ','
+///
 /// version-arg-list:
 ///   version-arg
 ///   version-arg ',' version-arg-list
@@ -847,6 +851,8 @@ VersionTuple Parser::ParseVersionTuple(SourceRange &Range) {
 ///   'deprecated' '=' version
 ///   'obsoleted' = version
 ///   'unavailable'
+/// opt-replacement:
+///   'replacement' '=' <string>
 /// opt-message:
 ///   'message' '=' <string>
 void Parser::ParseAvailabilityAttribute(IdentifierInfo &Availability,
@@ -858,7 +864,7 @@ void Parser::ParseAvailabilityAttribute(IdentifierInfo &Availability,
                                         AttributeList::Syntax Syntax) {
   enum { Introduced, Deprecated, Obsoleted, Unknown };
   AvailabilityChange Changes[Unknown];
-  ExprResult MessageExpr;
+  ExprResult MessageExpr, ReplacementExpr;
 
   // Opening '('.
   BalancedDelimiterTracker T(*this, tok::l_paren);
@@ -867,13 +873,20 @@ void Parser::ParseAvailabilityAttribute(IdentifierInfo &Availability,
     return;
   }
 
-  // Parse the platform name,
+  // Parse the platform name.
   if (Tok.isNot(tok::identifier)) {
     Diag(Tok, diag::err_availability_expected_platform);
     SkipUntil(tok::r_paren, StopAtSemi);
     return;
   }
   IdentifierLoc *Platform = ParseIdentifierLoc();
+  // Canonicalize platform name from "macosx" to "macos".
+  if (Platform->Ident && Platform->Ident->getName() == "macosx")
+    Platform->Ident = PP.getIdentifierInfo("macos");
+  // Canonicalize platform name from "macosx_app_extension" to
+  // "macos_app_extension".
+  if (Platform->Ident && Platform->Ident->getName() == "macosx_app_extension")
+    Platform->Ident = PP.getIdentifierInfo("macos_app_extension");
 
   // Parse the ',' following the platform name.
   if (ExpectAndConsume(tok::comma)) {
@@ -889,10 +902,13 @@ void Parser::ParseAvailabilityAttribute(IdentifierInfo &Availability,
     Ident_obsoleted = PP.getIdentifierInfo("obsoleted");
     Ident_unavailable = PP.getIdentifierInfo("unavailable");
     Ident_message = PP.getIdentifierInfo("message");
+    Ident_strict = PP.getIdentifierInfo("strict");
+    Ident_replacement = PP.getIdentifierInfo("replacement");
   }
 
-  // Parse the set of introductions/deprecations/removals.
-  SourceLocation UnavailableLoc;
+  // Parse the optional "strict", the optional "replacement" and the set of
+  // introductions/deprecations/removals.
+  SourceLocation UnavailableLoc, StrictLoc;
   do {
     if (Tok.isNot(tok::identifier)) {
       Diag(Tok, diag::err_availability_expected_change);
@@ -902,6 +918,15 @@ void Parser::ParseAvailabilityAttribute(IdentifierInfo &Availability,
     IdentifierInfo *Keyword = Tok.getIdentifierInfo();
     SourceLocation KeywordLoc = ConsumeToken();
 
+    if (Keyword == Ident_strict) {
+      if (StrictLoc.isValid()) {
+        Diag(KeywordLoc, diag::err_availability_redundant)
+          << Keyword << SourceRange(StrictLoc);
+      }
+      StrictLoc = KeywordLoc;
+      continue;
+    }
+
     if (Keyword == Ident_unavailable) {
       if (UnavailableLoc.isValid()) {
         Diag(KeywordLoc, diag::err_availability_redundant)
@@ -917,14 +942,17 @@ void Parser::ParseAvailabilityAttribute(IdentifierInfo &Availability,
       return;
     }
     ConsumeToken();
-    if (Keyword == Ident_message) {
+    if (Keyword == Ident_message || Keyword == Ident_replacement) {
       if (Tok.isNot(tok::string_literal)) {
         Diag(Tok, diag::err_expected_string_literal)
           << /*Source='availability attribute'*/2;
         SkipUntil(tok::r_paren, StopAtSemi);
         return;
       }
-      MessageExpr = ParseStringLiteralExpression();
+      if (Keyword == Ident_message)
+        MessageExpr = ParseStringLiteralExpression();
+      else
+        ReplacementExpr = ParseStringLiteralExpression();
       // Also reject wide string literals.
       if (StringLiteral *MessageStringLiteral =
               cast_or_null<StringLiteral>(MessageExpr.get())) {
@@ -936,7 +964,10 @@ void Parser::ParseAvailabilityAttribute(IdentifierInfo &Availability,
           return;
         }
       }
-      break;
+      if (Keyword == Ident_message)
+        break;
+      else
+        continue;
     }
 
     // Special handling of 'NA' only when applied to introduced or
@@ -1023,7 +1054,7 @@ void Parser::ParseAvailabilityAttribute(IdentifierInfo &Availability,
                Changes[Deprecated],
                Changes[Obsoleted],
                UnavailableLoc, MessageExpr.get(),
-               Syntax);
+               Syntax, StrictLoc, ReplacementExpr.get());
 }
 
 /// \brief Parse the contents of the "objc_bridge_related" attribute.
@@ -1187,7 +1218,7 @@ void Parser::ParseLexedAttribute(LateParsedAttribute &LA,
   // Append the current token at the end of the new token stream so that it
   // doesn't get lost.
   LA.Toks.push_back(Tok);
-  PP.EnterTokenStream(LA.Toks.data(), LA.Toks.size(), true, false);
+  PP.EnterTokenStream(LA.Toks, true);
   // Consume the previously pushed token.
   ConsumeAnyToken(/*ConsumeCodeCompletionTok=*/true);
 
@@ -1402,8 +1433,8 @@ void Parser::handleDeclspecAlignBeforeClassKey(ParsedAttributesWithRange &Attrs,
   while (AL) {
     AttributeList *Next = AL->getNext();
 
-    // We only consider attributes using the appropriate '__declspec' spelling,
-    // this behavior doesn't extend to any other spellings.
+    // We only consider attributes using the appropriate '__declspec' spelling.
+    // This behavior doesn't extend to any other spellings.
     if (AL->getKind() == AttributeList::AT_Aligned &&
         AL->isDeclspecAttribute()) {
       // Stitch the attribute into the tag's attribute list.
@@ -1522,7 +1553,7 @@ Parser::ParseSimpleDeclaration(unsigned Context,
   // may get this far before the problem becomes obvious.
   if (DS.hasTagDefinition() &&
       DiagnoseMissingSemiAfterTagDefinition(DS, AS_none, DSContext))
-    return DeclGroupPtrTy();
+    return nullptr;
 
   // C99 6.7.2.3p6: Handle "struct-or-union identifier;", "enum { X };"
   // declaration-specifiers init-declarator-list[opt] ';'
@@ -1530,9 +1561,14 @@ Parser::ParseSimpleDeclaration(unsigned Context,
     ProhibitAttributes(Attrs);
     DeclEnd = Tok.getLocation();
     if (RequireSemi) ConsumeToken();
+    RecordDecl *AnonRecord = nullptr;
     Decl *TheDecl = Actions.ParsedFreeStandingDeclSpec(getCurScope(), AS_none,
-                                                       DS);
+                                                       DS, AnonRecord);
     DS.complete(TheDecl);
+    if (AnonRecord) {
+      Decl* decls[] = {AnonRecord, TheDecl};
+      return Actions.BuildDeclaratorGroup(decls, /*TypeMayContainAuto=*/false);
+    }
     return Actions.ConvertDeclToDeclGroup(TheDecl);
   }
 
@@ -1701,7 +1737,7 @@ Parser::DeclGroupPtrTy Parser::ParseDeclGroup(ParsingDeclSpec &DS,
   // Bail out if the first declarator didn't seem well-formed.
   if (!D.hasName() && !D.mayOmitIdentifier()) {
     SkipMalformedDecl();
-    return DeclGroupPtrTy();
+    return nullptr;
   }
 
   // Save late-parsed attributes for now; they need to be parsed in the
@@ -1766,19 +1802,19 @@ Parser::DeclGroupPtrTy Parser::ParseDeclGroup(ParsingDeclSpec &DS,
       } else {
         Diag(Tok, diag::err_expected_fn_body);
         SkipUntil(tok::semi);
-        return DeclGroupPtrTy();
+        return nullptr;
       }
     } else {
       if (Tok.is(tok::l_brace)) {
         Diag(Tok, diag::err_function_definition_not_allowed);
         SkipMalformedDecl();
-        return DeclGroupPtrTy();
+        return nullptr;
       }
     }
   }
 
   if (ParseAsmAttributesAfterDeclarator(D))
-    return DeclGroupPtrTy();
+    return nullptr;
 
   // C++0x [stmt.iter]p1: Check if we have a for-range-declarator. If so, we
   // must parse and analyze the for-range-initializer before the declaration is
@@ -1975,7 +2011,7 @@ Decl *Parser::ParseDeclarationAfterDeclaratorAndAttributes(
         TemplateParameterLists FakedParamLists;
         FakedParamLists.push_back(Actions.ActOnTemplateParameterList(
             0, SourceLocation(), TemplateInfo.TemplateLoc, LAngleLoc, None,
-            LAngleLoc));
+            LAngleLoc, nullptr));
 
         ThisDecl =
             Actions.ActOnTemplateDeclarator(getCurScope(), FakedParamLists, D);
@@ -2039,7 +2075,8 @@ Decl *Parser::ParseDeclarationAfterDeclaratorAndAttributes(
       if (Init.isInvalid()) {
         SmallVector<tok::TokenKind, 2> StopTokens;
         StopTokens.push_back(tok::comma);
-        if (D.getContext() == Declarator::ForContext)
+        if (D.getContext() == Declarator::ForContext ||
+            D.getContext() == Declarator::InitStmtContext)
           StopTokens.push_back(tok::r_paren);
         SkipUntil(StopTokens, StopAtSemi | StopBeforeMatch);
         Actions.ActOnInitializerError(ThisDecl);
@@ -2250,6 +2287,24 @@ bool Parser::ParseImplicitInt(DeclSpec &DS, CXXScopeSpec *SS,
     return false;
   }
 
+  if (getLangOpts().CPlusPlus && (!SS || SS->isEmpty()) &&
+      getLangOpts().MSVCCompat) {
+    // Lookup of an unqualified type name has failed in MSVC compatibility mode.
+    // Give Sema a chance to recover if we are in a template with dependent base
+    // classes.
+    if (ParsedType T = Actions.ActOnMSVCUnknownTypeName(
+            *Tok.getIdentifierInfo(), Tok.getLocation(),
+            DSC == DSC_template_type_arg)) {
+      const char *PrevSpec;
+      unsigned DiagID;
+      DS.SetTypeSpecType(DeclSpec::TST_typename, Loc, PrevSpec, DiagID, T,
+                         Actions.getASTContext().getPrintingPolicy());
+      DS.SetRangeEnd(Tok.getLocation());
+      ConsumeToken();
+      return false;
+    }
+  }
+
   // Otherwise, if we don't consume this token, we are going to emit an
   // error anyway.  Try to recover from various common problems.  Check
   // to see if this was a reference to a tag name without a tag specified.
@@ -2632,7 +2687,7 @@ void Parser::ParseDeclarationSpecifiers(DeclSpec &DS,
   bool AttrsLastTime = false;
   ParsedAttributesWithRange attrs(AttrFactory);
   // We use Sema's policy to get bool macros right.
-  const PrintingPolicy &Policy = Actions.getPrintingPolicy();
+  PrintingPolicy Policy = Actions.getPrintingPolicy();
   while (1) {
     bool isInvalid = false;
     bool isStorageClass = false;
@@ -2835,12 +2890,11 @@ void Parser::ParseDeclarationSpecifiers(DeclSpec &DS,
           << Next.getIdentifierInfo() << 1 /* type */;
       }
 
-      ParsedType TypeRep = Actions.getTypeName(*Next.getIdentifierInfo(),
-                                               Next.getLocation(),
-                                               getCurScope(), &SS,
-                                               false, false, ParsedType(),
-                                               /*IsCtorOrDtorName=*/false,
-                                               /*NonTrivialSourceInfo=*/true);
+      ParsedType TypeRep =
+          Actions.getTypeName(*Next.getIdentifierInfo(), Next.getLocation(),
+                              getCurScope(), &SS, false, false, nullptr,
+                              /*IsCtorOrDtorName=*/false,
+                              /*NonTrivialSourceInfo=*/true);
 
       // If the referenced identifier is not a type, then this declspec is
       // erroneous: We already checked about that it has no type specifier, and
@@ -2958,16 +3012,6 @@ void Parser::ParseDeclarationSpecifiers(DeclSpec &DS,
         Actions.getTypeName(*Tok.getIdentifierInfo(),
                             Tok.getLocation(), getCurScope());
 
-      // MSVC: If we weren't able to parse a default template argument, and it's
-      // just a simple identifier, create a DependentNameType.  This will allow
-      // us to defer the name lookup to template instantiation time, as long we
-      // forge a NestedNameSpecifier for the current context.
-      if (!TypeRep && DSContext == DSC_template_type_arg &&
-          getLangOpts().MSVCCompat && getCurScope()->isTemplateParamScope()) {
-        TypeRep = Actions.ActOnDelayedDefaultTemplateArg(
-            *Tok.getIdentifierInfo(), Tok.getLocation());
-      }
-
       // If this is not a typedef name, don't parse it as part of the declspec,
       // it must be an implicit int or an error.
       if (!TypeRep) {
@@ -3059,6 +3103,11 @@ void Parser::ParseDeclarationSpecifiers(DeclSpec &DS,
       break;
     }
 
+    case tok::kw___unaligned:
+      isInvalid = DS.SetTypeQual(DeclSpec::TQ_unaligned, Loc, PrevSpec, DiagID,
+                                 getLangOpts());
+      break;
+
     case tok::kw___sptr:
     case tok::kw___uptr:
     case tok::kw___ptr64:
@@ -3069,7 +3118,6 @@ void Parser::ParseDeclarationSpecifiers(DeclSpec &DS,
     case tok::kw___fastcall:
     case tok::kw___thiscall:
     case tok::kw___vectorcall:
-    case tok::kw___unaligned:
       ParseMicrosoftTypeAttributes(DS.getAttributes());
       continue;
 
@@ -3080,7 +3128,7 @@ void Parser::ParseDeclarationSpecifiers(DeclSpec &DS,
 
     // OpenCL single token adornments.
     case tok::kw___kernel:
-      ParseOpenCLAttributes(DS.getAttributes());
+      ParseOpenCLKernelAttributes(DS.getAttributes());
       continue;
 
     // Nullability type specifiers.
@@ -3278,6 +3326,10 @@ void Parser::ParseDeclarationSpecifiers(DeclSpec &DS,
       isInvalid = DS.SetTypeSpecType(DeclSpec::TST_double, Loc, PrevSpec,
                                      DiagID, Policy);
       break;
+    case tok::kw___float128:
+      isInvalid = DS.SetTypeSpecType(DeclSpec::TST_float128, Loc, PrevSpec,
+                                     DiagID, Policy);
+      break;
     case tok::kw_wchar_t:
       isInvalid = DS.SetTypeSpecType(DeclSpec::TST_wchar, Loc, PrevSpec,
                                      DiagID, Policy);
@@ -3335,6 +3387,12 @@ void Parser::ParseDeclarationSpecifiers(DeclSpec &DS,
       }
       isInvalid = DS.SetTypePipe(true, Loc, PrevSpec, DiagID, Policy);
       break;
+#define GENERIC_IMAGE_TYPE(ImgType, Id) \
+  case tok::kw_##ImgType##_t: \
+    isInvalid = DS.SetTypeSpecType(DeclSpec::TST_##ImgType##_t, Loc, PrevSpec, \
+                                   DiagID, Policy); \
+    break;
+#include "clang/Basic/OpenCLImageTypes.def"
     case tok::kw___unknown_anytype:
       isInvalid = DS.SetTypeSpecType(TST_unknown_anytype, Loc,
                                      PrevSpec, DiagID, Policy);
@@ -3403,6 +3461,22 @@ void Parser::ParseDeclarationSpecifiers(DeclSpec &DS,
       ParseDecltypeSpecifier(DS);
       continue;
 
+    case tok::annot_pragma_pack:
+      HandlePragmaPack();
+      continue;
+
+    case tok::annot_pragma_ms_pragma:
+      HandlePragmaMSPragma();
+      continue;
+
+    case tok::annot_pragma_ms_vtordisp:
+      HandlePragmaMSVtorDisp();
+      continue;
+
+    case tok::annot_pragma_ms_pointers_to_members:
+      HandlePragmaMSPointersToMembers();
+      continue;
+
     case tok::kw___underlying_type:
       ParseUnderlyingTypeSpecifier(DS);
       continue;
@@ -3473,9 +3547,13 @@ void Parser::ParseDeclarationSpecifiers(DeclSpec &DS,
       if (DiagID == diag::ext_duplicate_declspec)
         Diag(Tok, DiagID)
           << PrevSpec << FixItHint::CreateRemoval(Tok.getLocation());
-      else if (DiagID == diag::err_opencl_unknown_type_specifier)
-        Diag(Tok, DiagID) << PrevSpec << isStorageClass;
-      else
+      else if (DiagID == diag::err_opencl_unknown_type_specifier) {
+        const int OpenCLVer = getLangOpts().OpenCLVersion;
+        std::string VerSpec = llvm::to_string(OpenCLVer / 100) +
+                              std::string (".") +
+                              llvm::to_string((OpenCLVer % 100) / 10);
+        Diag(Tok, DiagID) << VerSpec << PrevSpec << isStorageClass;
+      } else
         Diag(Tok, DiagID) << PrevSpec;
     }
 
@@ -3521,8 +3599,10 @@ void Parser::ParseStructDeclaration(
   // If there are no declarators, this is a free-standing declaration
   // specifier. Let the actions module cope with it.
   if (Tok.is(tok::semi)) {
+    RecordDecl *AnonRecord = nullptr;
     Decl *TheDecl = Actions.ParsedFreeStandingDeclSpec(getCurScope(), AS_none,
-                                                       DS);
+                                                       DS, AnonRecord);
+    assert(!AnonRecord && "Did not expect anonymous struct or union here");
     DS.complete(TheDecl);
     return;
   }
@@ -3625,12 +3705,12 @@ void Parser::ParseStructUnionBody(SourceLocation RecordLoc,
 
     if (Tok.is(tok::annot_pragma_openmp)) {
       // Result can be ignored, because it must be always empty.
-      auto Res = ParseOpenMPDeclarativeDirective();
-      assert(!Res);
-      // Silence possible warnings.
-      (void)Res;
+      AccessSpecifier AS = AS_none;
+      ParsedAttributesWithRange Attrs(AttrFactory);
+      (void)ParseOpenMPDeclarativeDirectiveWithExtDecl(AS, Attrs);
       continue;
     }
+
     if (!Tok.is(tok::at)) {
       auto CFieldCallback = [&](ParsingFieldDeclarator &FD) {
         // Install the declarator into the current TagDecl.
@@ -3693,8 +3773,7 @@ void Parser::ParseStructUnionBody(SourceLocation RecordLoc,
                       T.getOpenLocation(), T.getCloseLocation(),
                       attrs.getList());
   StructScope.Exit();
-  Actions.ActOnTagFinishDefinition(getCurScope(), TagDecl,
-                                   T.getCloseLocation());
+  Actions.ActOnTagFinishDefinition(getCurScope(), TagDecl, T.getRange());
 }
 
 /// ParseEnumSpecifier
@@ -3788,7 +3867,7 @@ void Parser::ParseEnumSpecifier(SourceLocation StartLoc, DeclSpec &DS,
     ColonProtectionRAIIObject X(*this, AllowFixedUnderlyingType);
 
     CXXScopeSpec Spec;
-    if (ParseOptionalCXXScopeSpecifier(Spec, ParsedType(),
+    if (ParseOptionalCXXScopeSpecifier(Spec, nullptr,
                                        /*EnteringContext=*/true))
       return;
 
@@ -4189,7 +4268,7 @@ void Parser::ParseEnumBody(SourceLocation StartLoc, Decl *EnumDecl) {
   ParsedAttributes attrs(AttrFactory);
   MaybeParseGNUAttributes(attrs);
 
-  Actions.ActOnEnumBody(StartLoc, T.getOpenLocation(), T.getCloseLocation(),
+  Actions.ActOnEnumBody(StartLoc, T.getRange(),
                         EnumDecl, EnumConstantDecls,
                         getCurScope(),
                         attrs.getList());
@@ -4203,8 +4282,7 @@ void Parser::ParseEnumBody(SourceLocation StartLoc, Decl *EnumDecl) {
   }
 
   EnumScope.Exit();
-  Actions.ActOnTagFinishDefinition(getCurScope(), EnumDecl,
-                                   T.getCloseLocation());
+  Actions.ActOnTagFinishDefinition(getCurScope(), EnumDecl, T.getRange());
 
   // The next token must be valid after an enum definition. If not, a ';'
   // was probably forgotten.
@@ -4219,27 +4297,6 @@ void Parser::ParseEnumBody(SourceLocation StartLoc, Decl *EnumDecl) {
   }
 }
 
-/// isTypeSpecifierQualifier - Return true if the current token could be the
-/// start of a type-qualifier-list.
-bool Parser::isTypeQualifier() const {
-  switch (Tok.getKind()) {
-  default: return false;
-  // type-qualifier
-  case tok::kw_const:
-  case tok::kw_volatile:
-  case tok::kw_restrict:
-  case tok::kw___private:
-  case tok::kw___local:
-  case tok::kw___global:
-  case tok::kw___constant:
-  case tok::kw___generic:
-  case tok::kw___read_only:
-  case tok::kw___read_write:
-  case tok::kw___write_only:
-    return true;
-  }
-}
-
 /// isKnownToBeTypeSpecifier - Return true if we know that the specified token
 /// is definitely a type-specifier.  Return false if it isn't part of a type
 /// specifier or if we're not sure.
@@ -4264,12 +4321,15 @@ bool Parser::isKnownToBeTypeSpecifier(const Token &Tok) const {
   case tok::kw_half:
   case tok::kw_float:
   case tok::kw_double:
+  case tok::kw___float128:
   case tok::kw_bool:
   case tok::kw__Bool:
   case tok::kw__Decimal32:
   case tok::kw__Decimal64:
   case tok::kw__Decimal128:
   case tok::kw___vector:
+#define GENERIC_IMAGE_TYPE(ImgType, Id) case tok::kw_##ImgType##_t:
+#include "clang/Basic/OpenCLImageTypes.def"
 
     // struct-or-union-specifier (C99) or class-specifier (C++)
   case tok::kw_class:
@@ -4336,12 +4396,15 @@ bool Parser::isTypeSpecifierQualifier() {
   case tok::kw_half:
   case tok::kw_float:
   case tok::kw_double:
+  case tok::kw___float128:
   case tok::kw_bool:
   case tok::kw__Bool:
   case tok::kw__Decimal32:
   case tok::kw__Decimal64:
   case tok::kw__Decimal128:
   case tok::kw___vector:
+#define GENERIC_IMAGE_TYPE(ImgType, Id) case tok::kw_##ImgType##_t:
+#include "clang/Basic/OpenCLImageTypes.def"
 
     // struct-or-union-specifier (C99) or class-specifier (C++)
   case tok::kw_class:
@@ -4488,6 +4551,7 @@ bool Parser::isDeclarationSpecifier(bool DisambiguatingWithExpression) {
   case tok::kw_half:
   case tok::kw_float:
   case tok::kw_double:
+  case tok::kw___float128:
   case tok::kw_bool:
   case tok::kw__Bool:
   case tok::kw__Decimal32:
@@ -4578,6 +4642,8 @@ bool Parser::isDeclarationSpecifier(bool DisambiguatingWithExpression) {
   case tok::kw___read_only:
   case tok::kw___read_write:
   case tok::kw___write_only:
+#define GENERIC_IMAGE_TYPE(ImgType, Id) case tok::kw_##ImgType##_t:
+#include "clang/Basic/OpenCLImageTypes.def"
 
     return true;
   }
@@ -4588,7 +4654,7 @@ bool Parser::isConstructorDeclarator(bool IsUnqualified) {
 
   // Parse the C++ scope specifier.
   CXXScopeSpec SS;
-  if (ParseOptionalCXXScopeSpecifier(SS, ParsedType(),
+  if (ParseOptionalCXXScopeSpecifier(SS, nullptr,
                                      /*EnteringContext=*/true)) {
     TPA.Revert();
     return false;
@@ -4770,6 +4836,10 @@ void Parser::ParseTypeQualifierListOpt(DeclSpec &DS, unsigned AttrReqs,
       ParseOpenCLQualifiers(DS.getAttributes());
       break;
 
+    case tok::kw___unaligned:
+      isInvalid = DS.SetTypeQual(DeclSpec::TQ_unaligned, Loc, PrevSpec, DiagID,
+                                 getLangOpts());
+      break;
     case tok::kw___uptr:
       // GNU libc headers in C mode use '__uptr' as an identifer which conflicts
       // with the MS modifier keyword.
@@ -4787,7 +4857,6 @@ void Parser::ParseTypeQualifierListOpt(DeclSpec &DS, unsigned AttrReqs,
     case tok::kw___fastcall:
     case tok::kw___thiscall:
     case tok::kw___vectorcall:
-    case tok::kw___unaligned:
       if (AttrReqs & AR_DeclspecAttributesParsed) {
         ParseMicrosoftTypeAttributes(DS.getAttributes());
         continue;
@@ -4925,14 +4994,14 @@ void Parser::ParseDeclaratorInternal(Declarator &D,
   // Member pointers get special handling, since there's no place for the
   // scope spec in the generic path below.
   if (getLangOpts().CPlusPlus &&
-      (Tok.is(tok::coloncolon) ||
+      (Tok.is(tok::coloncolon) || Tok.is(tok::kw_decltype) ||
        (Tok.is(tok::identifier) &&
         (NextToken().is(tok::coloncolon) || NextToken().is(tok::less))) ||
        Tok.is(tok::annot_cxxscope))) {
     bool EnteringContext = D.getContext() == Declarator::FileContext ||
                            D.getContext() == Declarator::MemberContext;
     CXXScopeSpec SS;
-    ParseOptionalCXXScopeSpecifier(SS, ParsedType(), EnteringContext);
+    ParseOptionalCXXScopeSpecifier(SS, nullptr, EnteringContext);
 
     if (SS.isNotEmpty()) {
       if (Tok.isNot(tok::star)) {
@@ -4969,7 +5038,8 @@ void Parser::ParseDeclaratorInternal(Declarator &D,
   tok::TokenKind Kind = Tok.getKind();
 
   if (D.getDeclSpec().isTypeSpecPipe() && !isPipeDeclerator(D)) {
-    DeclSpec &DS = D.getMutableDeclSpec();
+    DeclSpec DS(AttrFactory);
+    ParseTypeQualifierListOpt(DS);
 
     D.AddTypeInfo(
         DeclaratorChunk::getPipe(DS.getTypeQualifiers(), DS.getPipeLoc()),
@@ -5009,7 +5079,8 @@ void Parser::ParseDeclaratorInternal(Declarator &D,
                                                 DS.getConstSpecLoc(),
                                                 DS.getVolatileSpecLoc(),
                                                 DS.getRestrictSpecLoc(),
-                                                DS.getAtomicSpecLoc()),
+                                                DS.getAtomicSpecLoc(),
+                                                DS.getUnalignedSpecLoc()),
                     DS.getAttributes(),
                     SourceLocation());
     else
@@ -5147,7 +5218,7 @@ void Parser::ParseDirectDeclarator(Declarator &D) {
     if (D.getCXXScopeSpec().isEmpty()) {
       bool EnteringContext = D.getContext() == Declarator::FileContext ||
                              D.getContext() == Declarator::MemberContext;
-      ParseOptionalCXXScopeSpecifier(D.getCXXScopeSpec(), ParsedType(),
+      ParseOptionalCXXScopeSpecifier(D.getCXXScopeSpec(), nullptr,
                                      EnteringContext);
     }
 
@@ -5208,11 +5279,8 @@ void Parser::ParseDirectDeclarator(Declarator &D) {
       bool HadScope = D.getCXXScopeSpec().isValid();
       if (ParseUnqualifiedId(D.getCXXScopeSpec(),
                              /*EnteringContext=*/true,
-                             /*AllowDestructorName=*/true,
-                             AllowConstructorName,
-                             ParsedType(),
-                             TemplateKWLoc,
-                             D.getName()) ||
+                             /*AllowDestructorName=*/true, AllowConstructorName,
+                             nullptr, TemplateKWLoc, D.getName()) ||
           // Once we're past the identifier, if the scope was bad, mark the
           // whole declarator bad.
           D.getCXXScopeSpec().isInvalid()) {
@@ -6013,6 +6081,9 @@ void Parser::ParseBracketDeclarator(Declarator &D) {
                                             T.getCloseLocation()),
                   attrs, T.getCloseLocation());
     return;
+  } else if (Tok.getKind() == tok::code_completion) {
+    Actions.CodeCompleteBracketDeclarator(getCurScope());
+    return cutOffParsing();
   }
 
   // If valid, this location is the position where we read the 'static' keyword.
diff --git a/contrib/llvm/tools/clang/lib/Parse/ParseDeclCXX.cpp b/contrib/llvm/tools/clang/lib/Parse/ParseDeclCXX.cpp
index 3f22ad4ddaba..6436e3dfc763 100644
--- a/contrib/llvm/tools/clang/lib/Parse/ParseDeclCXX.cpp
+++ b/contrib/llvm/tools/clang/lib/Parse/ParseDeclCXX.cpp
@@ -65,7 +65,7 @@ Parser::DeclGroupPtrTy Parser::ParseNamespace(unsigned Context,
   if (Tok.is(tok::code_completion)) {
     Actions.CodeCompleteNamespaceDecl(getCurScope());
     cutOffParsing();
-    return DeclGroupPtrTy();
+    return nullptr;
   }
 
   SourceLocation IdentLoc;
@@ -109,7 +109,7 @@ Parser::DeclGroupPtrTy Parser::ParseNamespace(unsigned Context,
       Diag(Tok, diag::err_expected) << tok::identifier;
       // Skip to end of the definition and eat the ';'.
       SkipUntil(tok::semi);
-      return DeclGroupPtrTy();
+      return nullptr;
     }
     if (attrLoc.isValid())
       Diag(attrLoc, diag::err_unexpected_namespace_attributes_alias);
@@ -126,7 +126,7 @@ Parser::DeclGroupPtrTy Parser::ParseNamespace(unsigned Context,
       Diag(Tok, diag::err_expected) << tok::l_brace;
     else
       Diag(Tok, diag::err_expected_either) << tok::identifier << tok::l_brace;
-    return DeclGroupPtrTy();
+    return nullptr;
   }
 
   if (getCurScope()->isClassScope() || getCurScope()->isTemplateParamScope() || 
@@ -134,7 +134,7 @@ Parser::DeclGroupPtrTy Parser::ParseNamespace(unsigned Context,
       getCurScope()->getFnParent()) {
     Diag(T.getOpenLocation(), diag::err_namespace_nonnamespace_scope);
     SkipUntil(tok::r_brace);
-    return DeclGroupPtrTy();
+    return nullptr;
   }
 
   if (ExtraIdent.empty()) {
@@ -267,7 +267,7 @@ Decl *Parser::ParseNamespaceAlias(SourceLocation NamespaceLoc,
 
   CXXScopeSpec SS;
   // Parse (optional) nested-name-specifier.
-  ParseOptionalCXXScopeSpecifier(SS, ParsedType(), /*EnteringContext=*/false);
+  ParseOptionalCXXScopeSpecifier(SS, nullptr, /*EnteringContext=*/false);
 
   if (SS.isInvalid() || Tok.isNot(tok::identifier)) {
     Diag(Tok, diag::err_expected_namespace_name);
@@ -442,7 +442,7 @@ Decl *Parser::ParseUsingDirective(unsigned Context,
 
   CXXScopeSpec SS;
   // Parse (optional) nested-name-specifier.
-  ParseOptionalCXXScopeSpecifier(SS, ParsedType(), /*EnteringContext=*/false);
+  ParseOptionalCXXScopeSpecifier(SS, nullptr, /*EnteringContext=*/false);
 
   IdentifierInfo *NamespcName = nullptr;
   SourceLocation IdentLoc = SourceLocation();
@@ -517,7 +517,7 @@ Decl *Parser::ParseUsingDeclaration(unsigned Context,
 
   // Parse nested-name-specifier.
   IdentifierInfo *LastII = nullptr;
-  ParseOptionalCXXScopeSpecifier(SS, ParsedType(), /*EnteringContext=*/false,
+  ParseOptionalCXXScopeSpecifier(SS, nullptr, /*EnteringContext=*/false,
                                  /*MayBePseudoDtor=*/nullptr,
                                  /*IsTypename=*/false,
                                  /*LastII=*/&LastII);
@@ -554,7 +554,7 @@ Decl *Parser::ParseUsingDeclaration(unsigned Context,
                  /*AllowDestructorName=*/true,
                  /*AllowConstructorName=*/!(Tok.is(tok::identifier) &&
                                             NextToken().is(tok::equal)),
-                 ParsedType(), TemplateKWLoc, Name)) {
+                 nullptr, TemplateKWLoc, Name)) {
     SkipUntil(tok::semi);
     return nullptr;
   }
@@ -944,7 +944,7 @@ TypeResult Parser::ParseBaseTypeSpecifier(SourceLocation &BaseLoc,
 
   // Parse optional nested-name-specifier
   CXXScopeSpec SS;
-  ParseOptionalCXXScopeSpecifier(SS, ParsedType(), /*EnteringContext=*/false);
+  ParseOptionalCXXScopeSpecifier(SS, nullptr, /*EnteringContext=*/false);
 
   BaseLoc = Tok.getLocation();
 
@@ -1006,8 +1006,8 @@ TypeResult Parser::ParseBaseTypeSpecifier(SourceLocation &BaseLoc,
     if (!Template) {
       TemplateArgList TemplateArgs;
       SourceLocation LAngleLoc, RAngleLoc;
-      ParseTemplateIdAfterTemplateName(TemplateTy(), IdLoc, SS,
-          true, LAngleLoc, TemplateArgs, RAngleLoc);
+      ParseTemplateIdAfterTemplateName(nullptr, IdLoc, SS, true, LAngleLoc,
+                                       TemplateArgs, RAngleLoc);
       return true;
     }
 
@@ -1037,11 +1037,10 @@ TypeResult Parser::ParseBaseTypeSpecifier(SourceLocation &BaseLoc,
 
   // We have an identifier; check whether it is actually a type.
   IdentifierInfo *CorrectedII = nullptr;
-  ParsedType Type = Actions.getTypeName(*Id, IdLoc, getCurScope(), &SS, true,
-                                        false, ParsedType(),
-                                        /*IsCtorOrDtorName=*/false,
-                                        /*NonTrivialTypeSourceInfo=*/true,
-                                        &CorrectedII);
+  ParsedType Type =
+      Actions.getTypeName(*Id, IdLoc, getCurScope(), &SS, true, false, nullptr,
+                          /*IsCtorOrDtorName=*/false,
+                          /*NonTrivialTypeSourceInfo=*/true, &CorrectedII);
   if (!Type) {
     Diag(IdLoc, diag::err_expected_class_name);
     return true;
@@ -1101,9 +1100,25 @@ bool Parser::isValidAfterTypeSpecifier(bool CouldBeBitfield) {
   // FIXME: we should emit semantic diagnostic when declaration
   // attribute is in type attribute position.
   case tok::kw___attribute:     // struct foo __attribute__((used)) x;
+  case tok::annot_pragma_pack:  // struct foo {...} _Pragma(pack(pop));
+  // struct foo {...} _Pragma(section(...));
+  case tok::annot_pragma_ms_pragma:
+  // struct foo {...} _Pragma(vtordisp(pop));
+  case tok::annot_pragma_ms_vtordisp:
+  // struct foo {...} _Pragma(pointers_to_members(...));
+  case tok::annot_pragma_ms_pointers_to_members:
     return true;
   case tok::colon:
     return CouldBeBitfield;     // enum E { ... }   :         2;
+  // Microsoft compatibility
+  case tok::kw___cdecl:         // struct foo {...} __cdecl      x;
+  case tok::kw___fastcall:      // struct foo {...} __fastcall   x;
+  case tok::kw___stdcall:       // struct foo {...} __stdcall    x;
+  case tok::kw___thiscall:      // struct foo {...} __thiscall   x;
+  case tok::kw___vectorcall:    // struct foo {...} __vectorcall x;
+    // We will diagnose these calling-convention specifiers on non-function
+    // declarations later, so claim they are valid after a type specifier.
+    return getLangOpts().MicrosoftExt;
   // Type qualifiers
   case tok::kw_const:           // struct foo {...} const     x;
   case tok::kw_volatile:        // struct foo {...} volatile  x;
@@ -1261,6 +1276,7 @@ void Parser::ParseClassSpecifier(tok::TokenKind TagTokKind,
       Tok.isOneOf(tok::kw___is_abstract,
                   tok::kw___is_arithmetic,
                   tok::kw___is_array,
+                  tok::kw___is_assignable,
                   tok::kw___is_base_of,
                   tok::kw___is_class,
                   tok::kw___is_complete_type,
@@ -1352,7 +1368,7 @@ void Parser::ParseClassSpecifier(tok::TokenKind TagTokKind,
 
     CXXScopeSpec Spec;
     bool HasValidSpec = true;
-    if (ParseOptionalCXXScopeSpecifier(Spec, ParsedType(), EnteringContext)) {
+    if (ParseOptionalCXXScopeSpecifier(Spec, nullptr, EnteringContext)) {
       DS.SetTypeSpecError();
       HasValidSpec = false;
     }
@@ -1381,9 +1397,8 @@ void Parser::ParseClassSpecifier(tok::TokenKind TagTokKind,
       // a class (or template thereof).
       TemplateArgList TemplateArgs;
       SourceLocation LAngleLoc, RAngleLoc;
-      if (ParseTemplateIdAfterTemplateName(TemplateTy(), NameLoc, SS,
-                                           true, LAngleLoc,
-                                           TemplateArgs, RAngleLoc)) {
+      if (ParseTemplateIdAfterTemplateName(
+              nullptr, NameLoc, SS, true, LAngleLoc, TemplateArgs, RAngleLoc)) {
         // We couldn't parse the template argument list at all, so don't
         // try to give any location information for the list.
         LAngleLoc = RAngleLoc = SourceLocation();
@@ -1396,7 +1411,7 @@ void Parser::ParseClassSpecifier(tok::TokenKind TagTokKind,
       // Strip off the last template parameter list if it was empty, since
       // we've removed its template argument list.
       if (TemplateParams && TemplateInfo.LastParameterListWasEmpty) {
-        if (TemplateParams && TemplateParams->size() > 1) {
+        if (TemplateParams->size() > 1) {
           TemplateParams->pop_back();
         } else {
           TemplateParams = nullptr;
@@ -1663,7 +1678,7 @@ void Parser::ParseClassSpecifier(tok::TokenKind TagTokKind,
           // template specialization.
           FakedParamLists.push_back(Actions.ActOnTemplateParameterList(
               0, SourceLocation(), TemplateInfo.TemplateLoc, LAngleLoc, None,
-              LAngleLoc));
+              LAngleLoc, nullptr));
           TemplateParams = &FakedParamLists;
         }
       }
@@ -2253,7 +2268,7 @@ Parser::ParseCXXClassMemberDeclaration(AccessSpecifier AS,
 
     ConsumeToken();
     SkipUntil(tok::r_brace, StopAtSemi);
-    return DeclGroupPtrTy();
+    return nullptr;
   }
 
   // Turn on colon protection early, while parsing declspec, although there is
@@ -2282,28 +2297,28 @@ Parser::ParseCXXClassMemberDeclaration(AccessSpecifier AS,
     if (isAccessDecl) {
       // Collect the scope specifier token we annotated earlier.
       CXXScopeSpec SS;
-      ParseOptionalCXXScopeSpecifier(SS, ParsedType(), 
+      ParseOptionalCXXScopeSpecifier(SS, nullptr,
                                      /*EnteringContext=*/false);
 
       if (SS.isInvalid()) {
         SkipUntil(tok::semi);
-        return DeclGroupPtrTy();
+        return nullptr;
       }
 
       // Try to parse an unqualified-id.
       SourceLocation TemplateKWLoc;
       UnqualifiedId Name;
-      if (ParseUnqualifiedId(SS, false, true, true, ParsedType(),
-                             TemplateKWLoc, Name)) {
+      if (ParseUnqualifiedId(SS, false, true, true, nullptr, TemplateKWLoc,
+                             Name)) {
         SkipUntil(tok::semi);
-        return DeclGroupPtrTy();
+        return nullptr;
       }
 
       // TODO: recover from mistakenly-qualified operator declarations.
       if (ExpectAndConsume(tok::semi, diag::err_expected_after,
                            "access declaration")) {
         SkipUntil(tok::semi);
-        return DeclGroupPtrTy();
+        return nullptr;
       }
 
       return DeclGroupPtrTy::make(DeclGroupRef(Actions.ActOnUsingDeclaration(
@@ -2361,7 +2376,7 @@ Parser::ParseCXXClassMemberDeclaration(AccessSpecifier AS,
     if (Tok.is(tok::kw_namespace)) {
       Diag(UsingLoc, diag::err_using_namespace_in_class);
       SkipUntil(tok::semi, StopBeforeMatch);
-      return DeclGroupPtrTy();
+      return nullptr;
     }
     SourceLocation DeclEnd;
     // Otherwise, it must be a using-declaration or an alias-declaration.
@@ -2391,7 +2406,7 @@ Parser::ParseCXXClassMemberDeclaration(AccessSpecifier AS,
       TemplateInfo.Kind == ParsedTemplateInfo::NonTemplate &&
       DiagnoseMissingSemiAfterTagDefinition(DS, AS, DSC_class,
                                             &CommonLateParsedAttrs))
-    return DeclGroupPtrTy();
+    return nullptr;
 
   MultiTemplateParamsArg TemplateParams(
       TemplateInfo.TemplateParams? TemplateInfo.TemplateParams->data()
@@ -2402,10 +2417,15 @@ Parser::ParseCXXClassMemberDeclaration(AccessSpecifier AS,
     if (DS.isFriendSpecified())
       ProhibitAttributes(FnAttrs);
 
-    Decl *TheDecl =
-      Actions.ParsedFreeStandingDeclSpec(getCurScope(), AS, DS, TemplateParams);
+    RecordDecl *AnonRecord = nullptr;
+    Decl *TheDecl = Actions.ParsedFreeStandingDeclSpec(
+        getCurScope(), AS, DS, TemplateParams, false, AnonRecord);
     DS.complete(TheDecl);
-    return DeclGroupPtrTy::make(DeclGroupRef(TheDecl));
+    if (AnonRecord) {
+      Decl* decls[] = {AnonRecord, TheDecl};
+      return Actions.BuildDeclaratorGroup(decls, /*TypeMayContainAuto=*/false);
+    }
+    return Actions.ConvertDeclToDeclGroup(TheDecl);
   }
 
   ParsingDeclarator DeclaratorInfo(*this, DS, Declarator::MemberContext);
@@ -2446,7 +2466,7 @@ Parser::ParseCXXClassMemberDeclaration(AccessSpecifier AS,
   if (ParseCXXMemberDeclaratorBeforeInitializer(
           DeclaratorInfo, VS, BitfieldSize, LateParsedAttrs)) {
     TryConsumeToken(tok::semi);
-    return DeclGroupPtrTy();
+    return nullptr;
   }
 
   // Check for a member function definition.
@@ -2495,7 +2515,7 @@ Parser::ParseCXXClassMemberDeclaration(AccessSpecifier AS,
         // Consume the optional ';'
         TryConsumeToken(tok::semi);
 
-        return DeclGroupPtrTy();
+        return nullptr;
       }
 
       if (DS.getStorageClassSpec() == DeclSpec::SCS_typedef) {
@@ -2698,7 +2718,7 @@ Parser::ParseCXXClassMemberDeclaration(AccessSpecifier AS,
     SkipUntil(tok::r_brace, StopAtSemi | StopBeforeMatch);
     // If we stopped at a ';', eat it.
     TryConsumeToken(tok::semi);
-    return DeclGroupPtrTy();
+    return nullptr;
   }
 
   return Actions.FinalizeDeclaratorGroup(getCurScope(), DS, DeclsInGroup);
@@ -2825,49 +2845,49 @@ Parser::DeclGroupPtrTy Parser::ParseCXXClassMemberDeclarationWithPragmas(
   if (getLangOpts().MicrosoftExt &&
       Tok.isOneOf(tok::kw___if_exists, tok::kw___if_not_exists)) {
     ParseMicrosoftIfExistsClassDeclaration(TagType, AS);
-    return DeclGroupPtrTy();
+    return nullptr;
   }
 
   // Check for extraneous top-level semicolon.
   if (Tok.is(tok::semi)) {
     ConsumeExtraSemi(InsideStruct, TagType);
-    return DeclGroupPtrTy();
+    return nullptr;
   }
 
   if (Tok.is(tok::annot_pragma_vis)) {
     HandlePragmaVisibility();
-    return DeclGroupPtrTy();
+    return nullptr;
   }
 
   if (Tok.is(tok::annot_pragma_pack)) {
     HandlePragmaPack();
-    return DeclGroupPtrTy();
+    return nullptr;
   }
 
   if (Tok.is(tok::annot_pragma_align)) {
     HandlePragmaAlign();
-    return DeclGroupPtrTy();
+    return nullptr;
   }
 
   if (Tok.is(tok::annot_pragma_ms_pointers_to_members)) {
     HandlePragmaMSPointersToMembers();
-    return DeclGroupPtrTy();
+    return nullptr;
   }
 
   if (Tok.is(tok::annot_pragma_ms_pragma)) {
     HandlePragmaMSPragma();
-    return DeclGroupPtrTy();
+    return nullptr;
   }
 
   if (Tok.is(tok::annot_pragma_ms_vtordisp)) {
     HandlePragmaMSVtorDisp();
-    return DeclGroupPtrTy();
+    return nullptr;
   }
 
   // If we see a namespace here, a close brace was missing somewhere.
   if (Tok.is(tok::kw_namespace)) {
     DiagnoseUnexpectedNamespace(cast<NamedDecl>(TagDecl));
-    return DeclGroupPtrTy();
+    return nullptr;
   }
 
   AccessSpecifier NewAS = getAccessSpecifierIfPresent();
@@ -2903,11 +2923,12 @@ Parser::DeclGroupPtrTy Parser::ParseCXXClassMemberDeclarationWithPragmas(
       AccessAttrs.clear();
     }
 
-    return DeclGroupPtrTy();
+    return nullptr;
   }
 
   if (Tok.is(tok::annot_pragma_openmp))
-    return ParseOpenMPDeclarativeDirective();
+    return ParseOpenMPDeclarativeDirectiveWithExtDecl(AS, AccessAttrs, TagType,
+                                                      TagDecl);
 
   // Parse all the comma separated declarators.
   return ParseCXXClassMemberDeclaration(AS, AccessAttrs.getList());
@@ -3122,8 +3143,7 @@ void Parser::ParseCXXMemberSpecification(SourceLocation RecordLoc,
   }
 
   if (TagDecl)
-    Actions.ActOnTagFinishDefinition(getCurScope(), TagDecl, 
-                                     T.getCloseLocation());
+    Actions.ActOnTagFinishDefinition(getCurScope(), TagDecl, T.getRange());
 
   // Leave the class scope.
   ParsingDef.Pop();
@@ -3189,28 +3209,30 @@ void Parser::ParseConstructorInitializer(Decl *ConstructorDecl) {
       Actions.CodeCompleteConstructorInitializer(ConstructorDecl,
                                                  MemInitializers);
       return cutOffParsing();
-    } else {
-      MemInitResult MemInit = ParseMemInitializer(ConstructorDecl);
-      if (!MemInit.isInvalid())
-        MemInitializers.push_back(MemInit.get());
-      else
-        AnyErrors = true;
     }
-    
+
+    MemInitResult MemInit = ParseMemInitializer(ConstructorDecl);
+    if (!MemInit.isInvalid())
+      MemInitializers.push_back(MemInit.get());
+    else
+      AnyErrors = true;
+
     if (Tok.is(tok::comma))
       ConsumeToken();
     else if (Tok.is(tok::l_brace))
       break;
-    // If the next token looks like a base or member initializer, assume that
-    // we're just missing a comma.
-    else if (Tok.isOneOf(tok::identifier, tok::coloncolon)) {
+    // If the previous initializer was valid and the next token looks like a
+    // base or member initializer, assume that we're just missing a comma.
+    else if (!MemInit.isInvalid() &&
+             Tok.isOneOf(tok::identifier, tok::coloncolon)) {
       SourceLocation Loc = PP.getLocForEndOfToken(PrevTokLocation);
       Diag(Loc, diag::err_ctor_init_missing_comma)
         << FixItHint::CreateInsertion(Loc, ", ");
     } else {
       // Skip over garbage, until we get to '{'.  Don't eat the '{'.
-      Diag(Tok.getLocation(), diag::err_expected_either) << tok::l_brace
-                                                         << tok::comma;
+      if (!MemInit.isInvalid())
+        Diag(Tok.getLocation(), diag::err_expected_either) << tok::l_brace
+                                                           << tok::comma;
       SkipUntil(tok::l_brace, StopAtSemi | StopBeforeMatch);
       break;
     }
@@ -3235,7 +3257,7 @@ void Parser::ParseConstructorInitializer(Decl *ConstructorDecl) {
 MemInitResult Parser::ParseMemInitializer(Decl *ConstructorDecl) {
   // parse '::'[opt] nested-name-specifier[opt]
   CXXScopeSpec SS;
-  ParseOptionalCXXScopeSpecifier(SS, ParsedType(), /*EnteringContext=*/false);
+  ParseOptionalCXXScopeSpecifier(SS, nullptr, /*EnteringContext=*/false);
   ParsedType TemplateTypeTy;
   if (Tok.is(tok::annot_template_id)) {
     TemplateIdAnnotation *TemplateId = takeTemplateIdAnnotation(Tok);
@@ -3397,10 +3419,11 @@ Parser::tryParseExceptionSpecification(bool Delayed,
     NoexceptExpr = ParseConstantExpression();
     T.consumeClose();
     // The argument must be contextually convertible to bool. We use
-    // ActOnBooleanCondition for this purpose.
+    // CheckBooleanCondition for this purpose.
+    // FIXME: Add a proper Sema entry point for this.
     if (!NoexceptExpr.isInvalid()) {
-      NoexceptExpr = Actions.ActOnBooleanCondition(getCurScope(), KeywordLoc,
-                                                   NoexceptExpr.get());
+      NoexceptExpr =
+          Actions.CheckBooleanCondition(KeywordLoc, NoexceptExpr.get());
       NoexceptRange = SourceRange(KeywordLoc, T.getCloseLocation());
     } else {
       NoexceptType = EST_None;
@@ -3630,7 +3653,10 @@ static bool IsBuiltInOrStandardCXX11Attribute(IdentifierInfo *AttrName,
   case AttributeList::AT_FallThrough:
   case AttributeList::AT_CXX11NoReturn:
     return true;
-
+  case AttributeList::AT_WarnUnusedResult:
+    return !ScopeName && AttrName->getName().equals("nodiscard");
+  case AttributeList::AT_Unused:
+    return !ScopeName && AttrName->getName().equals("maybe_unused");
   default:
     return false;
   }
@@ -3689,6 +3715,7 @@ bool Parser::ParseCXX11AttributeArgs(IdentifierInfo *AttrName,
         // The attribute was allowed to have arguments, but none were provided
         // even though the attribute parsed successfully. This is an error.
         Diag(LParenLoc, diag::err_attribute_requires_arguments) << AttrName;
+        Attr->setInvalid(true);
       } else if (!Attr->getMaxArgs()) {
         // The attribute parsed successfully, but was not allowed to have any
         // arguments. It doesn't matter whether any were provided -- the
@@ -3696,6 +3723,7 @@ bool Parser::ParseCXX11AttributeArgs(IdentifierInfo *AttrName,
         Diag(LParenLoc, diag::err_cxx11_attribute_forbids_arguments)
             << AttrName
             << FixItHint::CreateRemoval(SourceRange(LParenLoc, *EndLoc));
+        Attr->setInvalid(true);
       }
     }
   }
@@ -3742,6 +3770,23 @@ void Parser::ParseCXX11AttributeSpecifier(ParsedAttributes &attrs,
   ConsumeBracket();
   ConsumeBracket();
 
+  SourceLocation CommonScopeLoc;
+  IdentifierInfo *CommonScopeName = nullptr;
+  if (Tok.is(tok::kw_using)) {
+    Diag(Tok.getLocation(), getLangOpts().CPlusPlus1z
+                                ? diag::warn_cxx14_compat_using_attribute_ns
+                                : diag::ext_using_attribute_ns);
+    ConsumeToken();
+
+    CommonScopeName = TryParseCXX11AttributeIdentifier(CommonScopeLoc);
+    if (!CommonScopeName) {
+      Diag(Tok.getLocation(), diag::err_expected) << tok::identifier;
+      SkipUntil(tok::r_square, tok::colon, StopBeforeMatch);
+    }
+    if (!TryConsumeToken(tok::colon) && CommonScopeName)
+      Diag(Tok.getLocation(), diag::err_expected) << tok::colon;
+  }
+
   llvm::SmallDenseMap<IdentifierInfo*, SourceLocation, 4> SeenAttrs;
 
   while (Tok.isNot(tok::r_square)) {
@@ -3770,6 +3815,16 @@ void Parser::ParseCXX11AttributeSpecifier(ParsedAttributes &attrs,
       }
     }
 
+    if (CommonScopeName) {
+      if (ScopeName) {
+        Diag(ScopeLoc, diag::err_using_attribute_ns_conflict)
+            << SourceRange(CommonScopeLoc);
+      } else {
+        ScopeName = CommonScopeName;
+        ScopeLoc = CommonScopeLoc;
+      }
+    }
+
     bool StandardAttr = IsBuiltInOrStandardCXX11Attribute(AttrName, ScopeName);
     bool AttrParsed = false;
 
diff --git a/contrib/llvm/tools/clang/lib/Parse/ParseExpr.cpp b/contrib/llvm/tools/clang/lib/Parse/ParseExpr.cpp
index 1fd98c140e0e..3e87a73aafe8 100644
--- a/contrib/llvm/tools/clang/lib/Parse/ParseExpr.cpp
+++ b/contrib/llvm/tools/clang/lib/Parse/ParseExpr.cpp
@@ -263,6 +263,9 @@ Parser::ParseRHSOfBinaryExpression(ExprResult LHS, prec::Level MinPrec) {
     Token OpToken = Tok;
     ConsumeToken();
 
+    if (OpToken.is(tok::caretcaret)) {
+      return ExprError(Diag(Tok, diag::err_opencl_logical_exclusive_or));
+    }
     // Bail out when encountering a comma followed by a token which can't
     // possibly be the start of an expression. For instance:
     //   int f() { return 1, }
@@ -428,6 +431,7 @@ Parser::ParseRHSOfBinaryExpression(ExprResult LHS, prec::Level MinPrec) {
       }
     }
 
+    ExprResult OrigLHS = LHS;
     if (!LHS.isInvalid()) {
       // Combine the LHS and RHS into the LHS (e.g. build AST).
       if (TernaryMiddle.isInvalid()) {
@@ -442,13 +446,23 @@ Parser::ParseRHSOfBinaryExpression(ExprResult LHS, prec::Level MinPrec) {
 
         LHS = Actions.ActOnBinOp(getCurScope(), OpToken.getLocation(),
                                  OpToken.getKind(), LHS.get(), RHS.get());
-      } else
+
+      } else {
         LHS = Actions.ActOnConditionalOp(OpToken.getLocation(), ColonLoc,
                                          LHS.get(), TernaryMiddle.get(),
                                          RHS.get());
-    } else
-      // Ensure potential typos in the RHS aren't left undiagnosed.
+      }
+      // In this case, ActOnBinOp or ActOnConditionalOp performed the
+      // CorrectDelayedTyposInExpr check.
+      if (!getLangOpts().CPlusPlus)
+        continue;
+    }
+    // Ensure potential typos aren't left undiagnosed.
+    if (LHS.isInvalid()) {
+      Actions.CorrectDelayedTyposInExpr(OrigLHS);
+      Actions.CorrectDelayedTyposInExpr(TernaryMiddle);
       Actions.CorrectDelayedTyposInExpr(RHS);
+    }
   }
 }
 
@@ -513,7 +527,7 @@ class CastExpressionIdValidator : public CorrectionCandidateCallback {
 /// \p isAddressOfOperand exists because an id-expression that is the operand
 /// of address-of gets special treatment due to member pointers. NotCastExpr
 /// is set to true if the token is not the start of a cast-expression, and no
-/// diagnostic is emitted in this case.
+/// diagnostic is emitted in this case and no tokens are consumed.
 ///
 /// \verbatim
 ///       cast-expression: [C99 6.5.4]
@@ -787,6 +801,7 @@ ExprResult Parser::ParseCastExpression(bool isUnaryExpression,
           REVERTIBLE_TYPE_TRAIT(__is_abstract);
           REVERTIBLE_TYPE_TRAIT(__is_arithmetic);
           REVERTIBLE_TYPE_TRAIT(__is_array);
+          REVERTIBLE_TYPE_TRAIT(__is_assignable);
           REVERTIBLE_TYPE_TRAIT(__is_base_of);
           REVERTIBLE_TYPE_TRAIT(__is_class);
           REVERTIBLE_TYPE_TRAIT(__is_complete_type);
@@ -895,7 +910,7 @@ ExprResult Parser::ParseCastExpression(bool isUnaryExpression,
         ((Tok.is(tok::identifier) &&
          (NextToken().is(tok::colon) || NextToken().is(tok::r_square))) ||
          Tok.is(tok::code_completion))) {
-      Res = ParseObjCMessageExpressionBody(SourceLocation(), ILoc, ParsedType(),
+      Res = ParseObjCMessageExpressionBody(SourceLocation(), ILoc, nullptr,
                                            nullptr);
       break;
     }
@@ -995,6 +1010,8 @@ ExprResult Parser::ParseCastExpression(bool isUnaryExpression,
   case tok::kw__Generic:   // primary-expression: generic-selection [C11 6.5.1]
     Res = ParseGenericSelectionExpression();
     break;
+  case tok::kw___builtin_available:
+    return ParseAvailabilityCheckExpr(Tok.getLocation());
   case tok::kw___builtin_va_arg:
   case tok::kw___builtin_offsetof:
   case tok::kw___builtin_choose_expr:
@@ -1010,15 +1027,24 @@ ExprResult Parser::ParseCastExpression(bool isUnaryExpression,
     //   unary-expression:
     //     ++ cast-expression
     //     -- cast-expression
-    SourceLocation SavedLoc = ConsumeToken();
+    Token SavedTok = Tok;
+    ConsumeToken();
     // One special case is implicitly handled here: if the preceding tokens are
     // an ambiguous cast expression, such as "(T())++", then we recurse to
     // determine whether the '++' is prefix or postfix.
     Res = ParseCastExpression(!getLangOpts().CPlusPlus,
                               /*isAddressOfOperand*/false, NotCastExpr,
                               NotTypeCast);
+    if (NotCastExpr) {
+      // If we return with NotCastExpr = true, we must not consume any tokens,
+      // so put the token back where we found it.
+      assert(Res.isInvalid());
+      UnconsumeToken(SavedTok);
+      return ExprError();
+    }
     if (!Res.isInvalid())
-      Res = Actions.ActOnUnaryOp(getCurScope(), SavedLoc, SavedKind, Res.get());
+      Res = Actions.ActOnUnaryOp(getCurScope(), SavedTok.getLocation(),
+                                 SavedKind, Res.get());
     return Res;
   }
   case tok::amp: {         // unary-expression: '&' cast-expression
@@ -1148,10 +1174,14 @@ ExprResult Parser::ParseCastExpression(bool isUnaryExpression,
   case tok::kw_half:
   case tok::kw_float:
   case tok::kw_double:
+  case tok::kw___float128:
   case tok::kw_void:
   case tok::kw_typename:
   case tok::kw_typeof:
-  case tok::kw___vector: {
+  case tok::kw___vector:
+#define GENERIC_IMAGE_TYPE(ImgType, Id) case tok::kw_##ImgType##_t:
+#include "clang/Basic/OpenCLImageTypes.def"
+  {
     if (!getLangOpts().CPlusPlus) {
       Diag(Tok, diag::err_expected_expression);
       return ExprError();
@@ -1204,7 +1234,7 @@ ExprResult Parser::ParseCastExpression(bool isUnaryExpression,
         // type, translate it into a type and continue parsing as a
         // cast expression.
         CXXScopeSpec SS;
-        ParseOptionalCXXScopeSpecifier(SS, ParsedType(), 
+        ParseOptionalCXXScopeSpecifier(SS, nullptr,
                                        /*EnteringContext=*/false);
         AnnotateTemplateIdTokenAsType();
         return ParseCastExpression(isUnaryExpression, isAddressOfOperand,
@@ -1395,7 +1425,7 @@ Parser::ParsePostfixExpressionSuffix(ExprResult LHS) {
       if (getLangOpts().ObjC1 && !InMessageExpression && 
           (NextToken().is(tok::colon) || NextToken().is(tok::r_square))) {
         LHS = ParseObjCMessageExpressionBody(SourceLocation(), SourceLocation(),
-                                             ParsedType(), LHS.get());
+                                             nullptr, LHS.get());
         break;
       }
         
@@ -1416,8 +1446,10 @@ Parser::ParsePostfixExpressionSuffix(ExprResult LHS) {
 
       // Reject array indices starting with a lambda-expression. '[[' is
       // reserved for attributes.
-      if (CheckProhibitedCXX11Attribute())
+      if (CheckProhibitedCXX11Attribute()) {
+        (void)Actions.CorrectDelayedTyposInExpr(LHS);
         return ExprError();
+      }
 
       BalancedDelimiterTracker T(*this, tok::l_square);
       T.consumeOpen();
@@ -1445,6 +1477,7 @@ Parser::ParsePostfixExpressionSuffix(ExprResult LHS) {
 
       SourceLocation RLoc = Tok.getLocation();
 
+      ExprResult OrigLHS = LHS;
       if (!LHS.isInvalid() && !Idx.isInvalid() && !Length.isInvalid() &&
           Tok.is(tok::r_square)) {
         if (ColonLoc.isValid()) {
@@ -1455,7 +1488,10 @@ Parser::ParsePostfixExpressionSuffix(ExprResult LHS) {
                                                 Idx.get(), RLoc);
         }
       } else {
-        (void)Actions.CorrectDelayedTyposInExpr(LHS);
+        LHS = ExprError();
+      }
+      if (LHS.isInvalid()) {
+        (void)Actions.CorrectDelayedTyposInExpr(OrigLHS);
         (void)Actions.CorrectDelayedTyposInExpr(Idx);
         (void)Actions.CorrectDelayedTyposInExpr(Length);
         LHS = ExprError();
@@ -1606,7 +1642,7 @@ Parser::ParsePostfixExpressionSuffix(ExprResult LHS) {
                                        /*EnteringContext=*/false,
                                        &MayBePseudoDestructor);
         if (SS.isNotEmpty())
-          ObjectType = ParsedType();
+          ObjectType = nullptr;
       }
 
       if (Tok.is(tok::code_completion)) {
@@ -2160,7 +2196,7 @@ Parser::ParseParenExpression(ParenParseOption &ExprType, bool stopIfCastExpr,
 
   ExprResult Result(true);
   bool isAmbiguousTypeId;
-  CastTy = ParsedType();
+  CastTy = nullptr;
 
   if (Tok.is(tok::code_completion)) {
     Actions.CodeCompleteOrdinaryName(getCurScope(), 
@@ -2506,7 +2542,7 @@ ExprResult Parser::ParseGenericSelectionExpression() {
         return ExprError();
       }
       DefaultLoc = ConsumeToken();
-      Ty = ParsedType();
+      Ty = nullptr;
     } else {
       ColonProtectionRAIIObject X(*this);
       TypeResult TR = ParseTypeName();
@@ -2836,3 +2872,117 @@ ExprResult Parser::ParseObjCBoolLiteral() {
   tok::TokenKind Kind = Tok.getKind();
   return Actions.ActOnObjCBoolLiteral(ConsumeToken(), Kind);
 }
+
+/// Validate availability spec list, emitting diagnostics if necessary. Returns
+/// true if invalid.
+static bool CheckAvailabilitySpecList(Parser &P,
+                                      ArrayRef<AvailabilitySpec> AvailSpecs) {
+  llvm::SmallSet<StringRef, 4> Platforms;
+  bool HasOtherPlatformSpec = false;
+  bool Valid = true;
+  for (const auto &Spec : AvailSpecs) {
+    if (Spec.isOtherPlatformSpec()) {
+      if (HasOtherPlatformSpec) {
+        P.Diag(Spec.getBeginLoc(), diag::err_availability_query_repeated_star);
+        Valid = false;
+      }
+
+      HasOtherPlatformSpec = true;
+      continue;
+    }
+
+    bool Inserted = Platforms.insert(Spec.getPlatform()).second;
+    if (!Inserted) {
+      // Rule out multiple version specs referring to the same platform.
+      // For example, we emit an error for:
+      // @available(macos 10.10, macos 10.11, *)
+      StringRef Platform = Spec.getPlatform();
+      P.Diag(Spec.getBeginLoc(), diag::err_availability_query_repeated_platform)
+          << Spec.getEndLoc() << Platform;
+      Valid = false;
+    }
+  }
+
+  if (!HasOtherPlatformSpec) {
+    SourceLocation InsertWildcardLoc = AvailSpecs.back().getEndLoc();
+    P.Diag(InsertWildcardLoc, diag::err_availability_query_wildcard_required)
+        << FixItHint::CreateInsertion(InsertWildcardLoc, ", *");
+    return true;
+  }
+
+  return !Valid;
+}
+
+/// Parse availability query specification.
+///
+///  availability-spec:
+///     '*'
+///     identifier version-tuple
+Optional<AvailabilitySpec> Parser::ParseAvailabilitySpec() {
+  if (Tok.is(tok::star)) {
+    return AvailabilitySpec(ConsumeToken());
+  } else {
+    // Parse the platform name.
+    if (Tok.isNot(tok::identifier)) {
+      Diag(Tok, diag::err_avail_query_expected_platform_name);
+      return None;
+    }
+
+    IdentifierLoc *PlatformIdentifier = ParseIdentifierLoc();
+    SourceRange VersionRange;
+    VersionTuple Version = ParseVersionTuple(VersionRange);
+
+    if (Version.empty())
+      return None;
+
+    StringRef Platform = PlatformIdentifier->Ident->getName();
+
+    if (AvailabilityAttr::getPrettyPlatformName(Platform).empty()) {
+      Diag(PlatformIdentifier->Loc,
+           diag::err_avail_query_unrecognized_platform_name)
+          << Platform;
+      return None;
+    }
+
+    return AvailabilitySpec(Version, Platform, PlatformIdentifier->Loc,
+                            VersionRange.getEnd());
+  }
+}
+
+ExprResult Parser::ParseAvailabilityCheckExpr(SourceLocation BeginLoc) {
+  assert(Tok.is(tok::kw___builtin_available) ||
+         Tok.isObjCAtKeyword(tok::objc_available));
+
+  // Eat the available or __builtin_available.
+  ConsumeToken();
+
+  BalancedDelimiterTracker Parens(*this, tok::l_paren);
+  if (Parens.expectAndConsume())
+    return ExprError();
+
+  SmallVector<AvailabilitySpec, 4> AvailSpecs;
+  bool HasError = false;
+  while (true) {
+    Optional<AvailabilitySpec> Spec = ParseAvailabilitySpec();
+    if (!Spec)
+      HasError = true;
+    else
+      AvailSpecs.push_back(*Spec);
+
+    if (!TryConsumeToken(tok::comma))
+      break;
+  }
+
+  if (HasError) {
+    SkipUntil(tok::r_paren, StopAtSemi);
+    return ExprError();
+  }
+
+  CheckAvailabilitySpecList(*this, AvailSpecs);
+
+  if (Parens.consumeClose())
+    return ExprError();
+
+  return Actions.ActOnObjCAvailabilityCheckExpr(AvailSpecs, BeginLoc,
+                                                Parens.getCloseLocation());
+}
diff --git a/contrib/llvm/tools/clang/lib/Parse/ParseExprCXX.cpp b/contrib/llvm/tools/clang/lib/Parse/ParseExprCXX.cpp
index f8938ba3495b..85c1301fc967 100644
--- a/contrib/llvm/tools/clang/lib/Parse/ParseExprCXX.cpp
+++ b/contrib/llvm/tools/clang/lib/Parse/ParseExprCXX.cpp
@@ -283,8 +283,8 @@ bool Parser::ParseOptionalCXXScopeSpecifier(CXXScopeSpec &SS,
       //
       // To implement this, we clear out the object type as soon as we've
       // seen a leading '::' or part of a nested-name-specifier.
-      ObjectType = ParsedType();
-      
+      ObjectType = nullptr;
+
       if (Tok.is(tok::code_completion)) {
         // Code completion for a nested-name-specifier, where the code
         // code completion token follows the '::'.
@@ -597,7 +597,7 @@ ExprResult Parser::tryParseCXXIdExpression(CXXScopeSpec &SS, bool isAddressOfOpe
                          /*EnteringContext=*/false,
                          /*AllowDestructorName=*/false,
                          /*AllowConstructorName=*/false,
-                         /*ObjectType=*/ParsedType(), TemplateKWLoc, Name))
+                         /*ObjectType=*/nullptr, TemplateKWLoc, Name))
     return ExprError();
 
   // This is only the direct operand of an & operator if it is not
@@ -659,7 +659,7 @@ ExprResult Parser::ParseCXXIdExpression(bool isAddressOfOperand) {
   //   '::' unqualified-id
   //
   CXXScopeSpec SS;
-  ParseOptionalCXXScopeSpecifier(SS, ParsedType(), /*EnteringContext=*/false);
+  ParseOptionalCXXScopeSpecifier(SS, nullptr, /*EnteringContext=*/false);
 
   Token Replacement;
   ExprResult Result =
@@ -739,8 +739,11 @@ ExprResult Parser::TryParseLambdaExpression() {
          && Tok.is(tok::l_square)
          && "Not at the start of a possible lambda expression.");
 
-  const Token Next = NextToken(), After = GetLookAheadToken(2);
+  const Token Next = NextToken();
+  if (Next.is(tok::eof)) // Nothing else to lookup here...
+    return ExprEmpty();
 
+  const Token After = GetLookAheadToken(2);
   // If lookahead indicates this is a lambda...
   if (Next.is(tok::r_square) ||     // []
       Next.is(tok::equal) ||        // [=
@@ -846,8 +849,16 @@ Optional<unsigned> Parser::ParseLambdaIntroducer(LambdaIntroducer &Intro,
     IdentifierInfo *Id = nullptr;
     SourceLocation EllipsisLoc;
     ExprResult Init;
-    
-    if (Tok.is(tok::kw_this)) {
+
+    if (Tok.is(tok::star)) {
+      Loc = ConsumeToken(); 
+      if (Tok.is(tok::kw_this)) {
+        ConsumeToken();     
+        Kind = LCK_StarThis;      
+      } else {
+        return DiagResult(diag::err_expected_star_this_capture);
+      }
+    } else if (Tok.is(tok::kw_this)) {
       Kind = LCK_This;
       Loc = ConsumeToken();
     } else {
@@ -1658,46 +1669,58 @@ Parser::ParseCXXTypeConstructExpression(const DeclSpec &DS) {
 /// [GNU]   type-specifier-seq declarator simple-asm-expr[opt] attributes[opt]
 ///             '=' assignment-expression
 ///
-/// \param ExprOut if the condition was parsed as an expression, the parsed
-/// expression.
+/// In C++1z, a condition may in some contexts be preceded by an
+/// optional init-statement. This function will parse that too.
 ///
-/// \param DeclOut if the condition was parsed as a declaration, the parsed
-/// declaration.
+/// \param InitStmt If non-null, an init-statement is permitted, and if present
+/// will be parsed and stored here.
 ///
 /// \param Loc The location of the start of the statement that requires this
 /// condition, e.g., the "for" in a for loop.
 ///
-/// \param ConvertToBoolean Whether the condition expression should be
-/// converted to a boolean value.
-///
-/// \returns true if there was a parsing, false otherwise.
-bool Parser::ParseCXXCondition(ExprResult &ExprOut,
-                               Decl *&DeclOut,
-                               SourceLocation Loc,
-                               bool ConvertToBoolean) {
+/// \returns The parsed condition.
+Sema::ConditionResult Parser::ParseCXXCondition(StmtResult *InitStmt,
+                                                SourceLocation Loc,
+                                                Sema::ConditionKind CK) {
   if (Tok.is(tok::code_completion)) {
     Actions.CodeCompleteOrdinaryName(getCurScope(), Sema::PCC_Condition);
     cutOffParsing();
-    return true;
+    return Sema::ConditionError();
   }
 
   ParsedAttributesWithRange attrs(AttrFactory);
   MaybeParseCXX11Attributes(attrs);
 
-  if (!isCXXConditionDeclaration()) {
+  // Determine what kind of thing we have.
+  switch (isCXXConditionDeclarationOrInitStatement(InitStmt)) {
+  case ConditionOrInitStatement::Expression: {
     ProhibitAttributes(attrs);
 
     // Parse the expression.
-    ExprOut = ParseExpression(); // expression
-    DeclOut = nullptr;
-    if (ExprOut.isInvalid())
-      return true;
+    ExprResult Expr = ParseExpression(); // expression
+    if (Expr.isInvalid())
+      return Sema::ConditionError();
+
+    if (InitStmt && Tok.is(tok::semi)) {
+      *InitStmt = Actions.ActOnExprStmt(Expr.get());
+      ConsumeToken();
+      return ParseCXXCondition(nullptr, Loc, CK);
+    }
 
-    // If required, convert to a boolean value.
-    if (ConvertToBoolean)
-      ExprOut
-        = Actions.ActOnBooleanCondition(getCurScope(), Loc, ExprOut.get());
-    return ExprOut.isInvalid();
+    return Actions.ActOnCondition(getCurScope(), Loc, Expr.get(), CK);
+  }
+
+  case ConditionOrInitStatement::InitStmtDecl: {
+    SourceLocation DeclStart = Tok.getLocation(), DeclEnd;
+    DeclGroupPtrTy DG = ParseSimpleDeclaration(
+        Declarator::InitStmtContext, DeclEnd, attrs, /*RequireSemi=*/true);
+    *InitStmt = Actions.ActOnDeclStmt(DG, DeclStart, DeclEnd);
+    return ParseCXXCondition(nullptr, Loc, CK);
+  }
+
+  case ConditionOrInitStatement::ConditionDecl:
+  case ConditionOrInitStatement::Error:
+    break;
   }
 
   // type-specifier-seq
@@ -1715,7 +1738,7 @@ bool Parser::ParseCXXCondition(ExprResult &ExprOut,
     ExprResult AsmLabel(ParseSimpleAsm(&Loc));
     if (AsmLabel.isInvalid()) {
       SkipUntil(tok::semi, StopAtSemi);
-      return true;
+      return Sema::ConditionError();
     }
     DeclaratorInfo.setAsmLabel(AsmLabel.get());
     DeclaratorInfo.SetRangeEnd(Loc);
@@ -1727,8 +1750,9 @@ bool Parser::ParseCXXCondition(ExprResult &ExprOut,
   // Type-check the declaration itself.
   DeclResult Dcl = Actions.ActOnCXXConditionDeclaration(getCurScope(), 
                                                         DeclaratorInfo);
-  DeclOut = Dcl.get();
-  ExprOut = ExprError();
+  if (Dcl.isInvalid())
+    return Sema::ConditionError();
+  Decl *DeclOut = Dcl.get();
 
   // '=' assignment-expression
   // If a '==' or '+=' is found, suggest a fixit to '='.
@@ -1748,12 +1772,11 @@ bool Parser::ParseCXXCondition(ExprResult &ExprOut,
     SourceLocation LParen = ConsumeParen(), RParen = LParen;
     if (SkipUntil(tok::r_paren, StopAtSemi | StopBeforeMatch))
       RParen = ConsumeParen();
-    Diag(DeclOut ? DeclOut->getLocation() : LParen,
+    Diag(DeclOut->getLocation(),
          diag::err_expected_init_in_condition_lparen)
       << SourceRange(LParen, RParen);
   } else {
-    Diag(DeclOut ? DeclOut->getLocation() : Tok.getLocation(),
-         diag::err_expected_init_in_condition);
+    Diag(DeclOut->getLocation(), diag::err_expected_init_in_condition);
   }
 
   if (!InitExpr.isInvalid())
@@ -1762,12 +1785,8 @@ bool Parser::ParseCXXCondition(ExprResult &ExprOut,
   else
     Actions.ActOnInitializerError(DeclOut);
 
-  // FIXME: Build a reference to this declaration? Convert it to bool?
-  // (This is currently handled by Sema).
-
   Actions.FinalizeDeclaration(DeclOut);
-  
-  return false;
+  return Actions.ActOnConditionVariable(DeclOut, Loc, CK);
 }
 
 /// ParseCXXSimpleTypeSpecifier - [C++ 7.1.5.2] Simple type specifiers.
@@ -1863,6 +1882,9 @@ void Parser::ParseCXXSimpleTypeSpecifier(DeclSpec &DS) {
   case tok::kw_double:
     DS.SetTypeSpecType(DeclSpec::TST_double, Loc, PrevSpec, DiagID, Policy);
     break;
+  case tok::kw___float128:
+    DS.SetTypeSpecType(DeclSpec::TST_float128, Loc, PrevSpec, DiagID, Policy);
+    break;
   case tok::kw_wchar_t:
     DS.SetTypeSpecType(DeclSpec::TST_wchar, Loc, PrevSpec, DiagID, Policy);
     break;
@@ -2413,9 +2435,8 @@ bool Parser::ParseUnqualifiedId(CXXScopeSpec &SS, bool EnteringContext,
     if (AllowConstructorName && 
         Actions.isCurrentClassName(*Id, getCurScope(), &SS)) {
       // We have parsed a constructor name.
-      ParsedType Ty = Actions.getTypeName(*Id, IdLoc, getCurScope(),
-                                          &SS, false, false,
-                                          ParsedType(),
+      ParsedType Ty = Actions.getTypeName(*Id, IdLoc, getCurScope(), &SS, false,
+                                          false, nullptr,
                                           /*IsCtorOrDtorName=*/true,
                                           /*NonTrivialTypeSourceInfo=*/true);
       Result.setConstructorName(Ty, IdLoc, IdLoc);
@@ -2451,13 +2472,11 @@ bool Parser::ParseUnqualifiedId(CXXScopeSpec &SS, bool EnteringContext,
           << TemplateId->Name
           << FixItHint::CreateRemoval(
                     SourceRange(TemplateId->LAngleLoc, TemplateId->RAngleLoc));
-        ParsedType Ty = Actions.getTypeName(*TemplateId->Name,
-                                            TemplateId->TemplateNameLoc,
-                                            getCurScope(),
-                                            &SS, false, false,
-                                            ParsedType(),
-                                            /*IsCtorOrDtorName=*/true,
-                                            /*NontrivialTypeSourceInfo=*/true);
+        ParsedType Ty =
+            Actions.getTypeName(*TemplateId->Name, TemplateId->TemplateNameLoc,
+                                getCurScope(), &SS, false, false, nullptr,
+                                /*IsCtorOrDtorName=*/true,
+                                /*NontrivialTypeSourceInfo=*/true);
         Result.setConstructorName(Ty, TemplateId->TemplateNameLoc,
                                   TemplateId->RAngleLoc);
         ConsumeToken();
@@ -2541,7 +2560,7 @@ bool Parser::ParseUnqualifiedId(CXXScopeSpec &SS, bool EnteringContext,
       if (ParseOptionalCXXScopeSpecifier(SS, ObjectType, EnteringContext))
         return true;
       if (SS.isNotEmpty())
-        ObjectType = ParsedType();
+        ObjectType = nullptr;
       if (Tok.isNot(tok::identifier) || NextToken().is(tok::coloncolon) ||
           !SS.isSet()) {
         Diag(TildeLoc, diag::err_destructor_tilde_scope);
@@ -2563,7 +2582,7 @@ bool Parser::ParseUnqualifiedId(CXXScopeSpec &SS, bool EnteringContext,
     SourceLocation ClassNameLoc = ConsumeToken();
 
     if (TemplateSpecified || Tok.is(tok::less)) {
-      Result.setDestructorName(TildeLoc, ParsedType(), ClassNameLoc);
+      Result.setDestructorName(TildeLoc, nullptr, ClassNameLoc);
       return ParseUnqualifiedIdTemplateId(SS, TemplateKWLoc,
                                           ClassName, ClassNameLoc,
                                           EnteringContext, ObjectType,
@@ -3029,7 +3048,7 @@ Parser::ParseCXXAmbiguousParenExpression(ParenParseOption &ExprType,
   assert(isTypeIdInParens() && "Not a type-id!");
 
   ExprResult Result(true);
-  CastTy = ParsedType();
+  CastTy = nullptr;
 
   // We need to disambiguate a very ugly part of the C++ syntax:
   //
@@ -3084,12 +3103,19 @@ Parser::ParseCXXAmbiguousParenExpression(ParenParseOption &ExprType,
     ParseAs = NotCastExpr ? SimpleExpr : CastExpr;
   }
 
+  // Create a fake EOF to mark end of Toks buffer.
+  Token AttrEnd;
+  AttrEnd.startToken();
+  AttrEnd.setKind(tok::eof);
+  AttrEnd.setLocation(Tok.getLocation());
+  AttrEnd.setEofData(Toks.data());
+  Toks.push_back(AttrEnd);
+
   // The current token should go after the cached tokens.
   Toks.push_back(Tok);
   // Re-enter the stored parenthesized tokens into the token stream, so we may
   // parse them now.
-  PP.EnterTokenStream(Toks.data(), Toks.size(),
-                      true/*DisableMacroExpansion*/, false/*OwnsTokens*/);
+  PP.EnterTokenStream(Toks, true /*DisableMacroExpansion*/);
   // Drop the current token and bring the first cached one. It's the same token
   // as when we entered this function.
   ConsumeAnyToken();
@@ -3108,6 +3134,10 @@ Parser::ParseCXXAmbiguousParenExpression(ParenParseOption &ExprType,
     Tracker.consumeClose();
     ColonProt.restore();
 
+    // Consume EOF marker for Toks buffer.
+    assert(Tok.is(tok::eof) && Tok.getEofData() == AttrEnd.getEofData());
+    ConsumeAnyToken();
+
     if (ParseAs == CompoundLiteral) {
       ExprType = CompoundLiteral;
       if (DeclaratorInfo.isInvalidType())
@@ -3144,10 +3174,16 @@ Parser::ParseCXXAmbiguousParenExpression(ParenParseOption &ExprType,
 
   // Match the ')'.
   if (Result.isInvalid()) {
-    SkipUntil(tok::r_paren, StopAtSemi);
+    while (Tok.isNot(tok::eof))
+      ConsumeAnyToken();
+    assert(Tok.getEofData() == AttrEnd.getEofData());
+    ConsumeAnyToken();
     return ExprError();
   }
 
   Tracker.consumeClose();
+  // Consume EOF marker for Toks buffer.
+  assert(Tok.is(tok::eof) && Tok.getEofData() == AttrEnd.getEofData());
+  ConsumeAnyToken();
   return Result;
 }
diff --git a/contrib/llvm/tools/clang/lib/Parse/ParseInit.cpp b/contrib/llvm/tools/clang/lib/Parse/ParseInit.cpp
index 4896ff0d235a..2cdb9d3a22a6 100644
--- a/contrib/llvm/tools/clang/lib/Parse/ParseInit.cpp
+++ b/contrib/llvm/tools/clang/lib/Parse/ParseInit.cpp
@@ -216,10 +216,8 @@ ExprResult Parser::ParseInitializerWithPotentialDesignator() {
           NextToken().isNot(tok::period) && 
           getCurScope()->isInObjcMethodScope()) {
         CheckArrayDesignatorSyntax(*this, StartLoc, Desig);
-        return ParseAssignmentExprWithObjCMessageExprStart(StartLoc,
-                                                           ConsumeToken(),
-                                                           ParsedType(), 
-                                                           nullptr);
+        return ParseAssignmentExprWithObjCMessageExprStart(
+            StartLoc, ConsumeToken(), nullptr, nullptr);
       }
 
       // Parse the receiver, which is either a type or an expression.
@@ -257,10 +255,8 @@ ExprResult Parser::ParseInitializerWithPotentialDesignator() {
           NextToken().is(tok::period), ReceiverType)) {
       case Sema::ObjCSuperMessage:
         CheckArrayDesignatorSyntax(*this, StartLoc, Desig);
-        return ParseAssignmentExprWithObjCMessageExprStart(StartLoc,
-                                                           ConsumeToken(),
-                                                           ParsedType(),
-                                                           nullptr);
+        return ParseAssignmentExprWithObjCMessageExprStart(
+            StartLoc, ConsumeToken(), nullptr, nullptr);
 
       case Sema::ObjCClassMessage:
         CheckArrayDesignatorSyntax(*this, StartLoc, Desig);
@@ -320,10 +316,8 @@ ExprResult Parser::ParseInitializerWithPotentialDesignator() {
     if (getLangOpts().ObjC1 && Tok.isNot(tok::ellipsis) &&
         Tok.isNot(tok::r_square)) {
       CheckArrayDesignatorSyntax(*this, Tok.getLocation(), Desig);
-      return ParseAssignmentExprWithObjCMessageExprStart(StartLoc,
-                                                         SourceLocation(),
-                                                         ParsedType(),
-                                                         Idx.get());
+      return ParseAssignmentExprWithObjCMessageExprStart(
+          StartLoc, SourceLocation(), nullptr, Idx.get());
     }
 
     // If this is a normal array designator, remember it.
diff --git a/contrib/llvm/tools/clang/lib/Parse/ParseObjc.cpp b/contrib/llvm/tools/clang/lib/Parse/ParseObjc.cpp
index e72a1f62f942..67abe5839bfe 100644
--- a/contrib/llvm/tools/clang/lib/Parse/ParseObjc.cpp
+++ b/contrib/llvm/tools/clang/lib/Parse/ParseObjc.cpp
@@ -21,6 +21,7 @@
 #include "clang/Sema/Scope.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringExtras.h"
+
 using namespace clang;
 
 /// Skips attributes after an Objective-C @ directive. Emits a diagnostic.
@@ -50,7 +51,7 @@ Parser::DeclGroupPtrTy Parser::ParseObjCAtDirectives() {
   if (Tok.is(tok::code_completion)) {
     Actions.CodeCompleteObjCAtDirective(getCurScope());
     cutOffParsing();
-    return DeclGroupPtrTy();
+    return nullptr;
   }
 
   Decl *SingleDecl = nullptr;
@@ -99,16 +100,20 @@ class Parser::ObjCTypeParamListScope {
   Sema &Actions;
   Scope *S;
   ObjCTypeParamList *Params;
+
 public:
   ObjCTypeParamListScope(Sema &Actions, Scope *S)
       : Actions(Actions), S(S), Params(nullptr) {}
+
   ~ObjCTypeParamListScope() {
     leave();
   }
+
   void enter(ObjCTypeParamList *P) {
     assert(!Params);
     Params = P;
   }
+
   void leave() {
     if (Params)
       Actions.popObjCTypeParamList(S, Params);
@@ -334,16 +339,11 @@ Decl *Parser::ParseObjCAtInterfaceDeclaration(SourceLocation AtLoc,
 
     // Type arguments for the superclass or protocol conformances.
     if (Tok.is(tok::less)) {
-      parseObjCTypeArgsOrProtocolQualifiers(ParsedType(),
-                                            typeArgsLAngleLoc,
-                                            typeArgs,
-                                            typeArgsRAngleLoc,
-                                            LAngleLoc,
-                                            protocols,
-                                            protocolLocs,
-                                            EndProtoLoc,
-                                            /*consumeLastToken=*/true,
-                                            /*warnOnIncompleteProtocols=*/true);
+      parseObjCTypeArgsOrProtocolQualifiers(
+          nullptr, typeArgsLAngleLoc, typeArgs, typeArgsRAngleLoc, LAngleLoc,
+          protocols, protocolLocs, EndProtoLoc,
+          /*consumeLastToken=*/true,
+          /*warnOnIncompleteProtocols=*/true);
     }
   }
   
@@ -459,14 +459,8 @@ ObjCTypeParamList *Parser::parseObjCTypeParamListOrProtocolRefs(
     unsigned index = 0;
     for (const auto &pair : protocolIdents) {
       DeclResult typeParam = Actions.actOnObjCTypeParam(
-                               getCurScope(),
-                               ObjCTypeParamVariance::Invariant,
-                               SourceLocation(),
-                               index++,
-                               pair.first,
-                               pair.second,
-                               SourceLocation(),
-                               ParsedType());
+          getCurScope(), ObjCTypeParamVariance::Invariant, SourceLocation(),
+          index++, pair.first, pair.second, SourceLocation(), nullptr);
       if (typeParam.isUsable())
         typeParams.push_back(typeParam.get());
     }
@@ -542,16 +536,9 @@ ObjCTypeParamList *Parser::parseObjCTypeParamListOrProtocolRefs(
     }
 
     // Create the type parameter.
-    DeclResult typeParam = Actions.actOnObjCTypeParam(getCurScope(),
-                                                      variance,
-                                                      varianceLoc,
-                                                      typeParams.size(),
-                                                      paramName,
-                                                      paramLoc,
-                                                      colonLoc,
-                                                      boundType.isUsable()
-                                                        ? boundType.get()
-                                                        : ParsedType());
+    DeclResult typeParam = Actions.actOnObjCTypeParam(
+        getCurScope(), variance, varianceLoc, typeParams.size(), paramName,
+        paramLoc, colonLoc, boundType.isUsable() ? boundType.get() : nullptr);
     if (typeParam.isUsable())
       typeParams.push_back(typeParam.get());
   } while (TryConsumeToken(tok::comma));
@@ -865,6 +852,7 @@ static void diagnoseRedundantPropertyNullability(Parser &P,
 ///     nullable
 ///     null_unspecified
 ///     null_resettable
+///     class
 ///
 void Parser::ParseObjCPropertyAttribute(ObjCDeclSpec &DS) {
   assert(Tok.getKind() == tok::l_paren);
@@ -980,6 +968,8 @@ void Parser::ParseObjCPropertyAttribute(ObjCDeclSpec &DS) {
 
       // Also set the null_resettable bit.
       DS.setPropertyAttributes(ObjCDeclSpec::DQ_PR_null_resettable);
+    } else if (II->isStr("class")) {
+      DS.setPropertyAttributes(ObjCDeclSpec::DQ_PR_class);
     } else {
       Diag(AttrName, diag::err_objc_expected_property_attr) << II;
       SkipUntil(tok::r_paren, StopAtSemi);
@@ -1287,7 +1277,6 @@ ParsedType Parser::ParseObjCTypeName(ObjCDeclSpec &DS,
     if (context == Declarator::ObjCResultContext)
       dsContext = DSC_objc_method_result;
     ParseSpecifierQualifierList(declSpec, AS_none, dsContext);
-    declSpec.SetRangeEnd(Tok.getLocation());
     Declarator declarator(declSpec, context);
     ParseDeclarator(declarator);
 
@@ -1361,8 +1350,8 @@ Decl *Parser::ParseObjCMethodDecl(SourceLocation mLoc,
   ParsingDeclRAIIObject PD(*this, ParsingDeclRAIIObject::NoParent);
 
   if (Tok.is(tok::code_completion)) {
-    Actions.CodeCompleteObjCMethodDecl(getCurScope(), mType == tok::minus, 
-                                       /*ReturnType=*/ ParsedType());
+    Actions.CodeCompleteObjCMethodDecl(getCurScope(), mType == tok::minus,
+                                       /*ReturnType=*/nullptr);
     cutOffParsing();
     return nullptr;
   }
@@ -1432,7 +1421,7 @@ Decl *Parser::ParseObjCMethodDecl(SourceLocation mLoc,
     if (ExpectAndConsume(tok::colon))
       break;
 
-    ArgInfo.Type = ParsedType();
+    ArgInfo.Type = nullptr;
     if (Tok.is(tok::l_paren)) // Parse the argument type if present.
       ArgInfo.Type = ParseObjCTypeName(ArgInfo.DeclSpec,
                                        Declarator::ObjCParameterContext,
@@ -1703,11 +1692,18 @@ void Parser::parseObjCTypeArgsOrProtocolQualifiers(
     return;
   }
 
-  // We syntactically matched a type argument, so commit to parsing
-  // type arguments.
+  // We parsed an identifier list but stumbled into non single identifiers, this
+  // means we might (a) check that what we already parsed is a legitimate type
+  // (not a protocol or unknown type) and (b) parse the remaining ones, which
+  // must all be type args.
 
   // Convert the identifiers into type arguments.
   bool invalid = false;
+  IdentifierInfo *foundProtocolId = nullptr, *foundValidTypeId = nullptr;
+  SourceLocation foundProtocolSrcLoc, foundValidTypeSrcLoc;
+  SmallVector<IdentifierInfo *, 2> unknownTypeArgs;
+  SmallVector<SourceLocation, 2> unknownTypeArgsLoc;
+
   for (unsigned i = 0, n = identifiers.size(); i != n; ++i) {
     ParsedType typeArg
       = Actions.getTypeName(*identifiers[i], identifierLocs[i], getCurScope());
@@ -1721,17 +1717,32 @@ void Parser::parseObjCTypeArgsOrProtocolQualifiers(
       // Form a declarator to turn this into a type.
       Declarator D(DS, Declarator::TypeNameContext);
       TypeResult fullTypeArg = Actions.ActOnTypeName(getCurScope(), D);
-      if (fullTypeArg.isUsable())
+      if (fullTypeArg.isUsable()) {
         typeArgs.push_back(fullTypeArg.get());
-      else
+        if (!foundValidTypeId) {
+          foundValidTypeId = identifiers[i];
+          foundValidTypeSrcLoc = identifierLocs[i];
+        }
+      } else {
         invalid = true;
+        unknownTypeArgs.push_back(identifiers[i]);
+        unknownTypeArgsLoc.push_back(identifierLocs[i]);
+      }
     } else {
       invalid = true;
+      if (!Actions.LookupProtocol(identifiers[i], identifierLocs[i])) {
+        unknownTypeArgs.push_back(identifiers[i]);
+        unknownTypeArgsLoc.push_back(identifierLocs[i]);
+      } else if (!foundProtocolId) {
+        foundProtocolId = identifiers[i];
+        foundProtocolSrcLoc = identifierLocs[i];
+      }
     }
   }
 
   // Continue parsing type-names.
   do {
+    Token CurTypeTok = Tok;
     TypeResult typeArg = ParseTypeName();
 
     // Consume the '...' for a pack expansion.
@@ -1743,11 +1754,28 @@ void Parser::parseObjCTypeArgsOrProtocolQualifiers(
 
     if (typeArg.isUsable()) {
       typeArgs.push_back(typeArg.get());
+      if (!foundValidTypeId) {
+        foundValidTypeId = CurTypeTok.getIdentifierInfo();
+        foundValidTypeSrcLoc = CurTypeTok.getLocation();
+      }
     } else {
       invalid = true;
     }
   } while (TryConsumeToken(tok::comma));
 
+  // Diagnose the mix between type args and protocols.
+  if (foundProtocolId && foundValidTypeId)
+    Actions.DiagnoseTypeArgsAndProtocols(foundProtocolId, foundProtocolSrcLoc,
+                                         foundValidTypeId,
+                                         foundValidTypeSrcLoc);
+
+  // Diagnose unknown arg types.
+  ParsedType T;
+  if (unknownTypeArgs.size())
+    for (unsigned i = 0, e = unknownTypeArgsLoc.size(); i < e; ++i)
+      Actions.DiagnoseUnknownTypeName(unknownTypeArgs[i], unknownTypeArgsLoc[i],
+                                      getCurScope(), nullptr, T);
+
   // Parse the closing '>'.
   SourceLocation rAngleLoc;
   (void)ParseGreaterThanInTemplateList(rAngleLoc, consumeLastToken,
@@ -1976,7 +2004,6 @@ void Parser::ParseObjCClassInstanceVariables(Decl *interfaceDecl,
   }
   HelperActionsForIvarDeclarations(interfaceDecl, atLoc,
                                    T, AllIvarDecls, false);
-  return;
 }
 
 ///   objc-protocol-declaration:
@@ -2005,14 +2032,14 @@ Parser::ParseObjCAtProtocolDeclaration(SourceLocation AtLoc,
   if (Tok.is(tok::code_completion)) {
     Actions.CodeCompleteObjCProtocolDecl(getCurScope());
     cutOffParsing();
-    return DeclGroupPtrTy();
+    return nullptr;
   }
 
   MaybeSkipAttributes(tok::objc_protocol);
 
   if (Tok.isNot(tok::identifier)) {
     Diag(Tok, diag::err_expected) << tok::identifier; // missing protocol name.
-    return DeclGroupPtrTy();
+    return nullptr;
   }
   // Save the protocol name, then consume it.
   IdentifierInfo *protocolName = Tok.getIdentifierInfo();
@@ -2036,7 +2063,7 @@ Parser::ParseObjCAtProtocolDeclaration(SourceLocation AtLoc,
       if (Tok.isNot(tok::identifier)) {
         Diag(Tok, diag::err_expected) << tok::identifier;
         SkipUntil(tok::semi);
-        return DeclGroupPtrTy();
+        return nullptr;
       }
       ProtocolRefs.push_back(IdentifierLocPair(Tok.getIdentifierInfo(),
                                                Tok.getLocation()));
@@ -2047,7 +2074,7 @@ Parser::ParseObjCAtProtocolDeclaration(SourceLocation AtLoc,
     }
     // Consume the ';'.
     if (ExpectAndConsume(tok::semi, diag::err_expected_after, "@protocol"))
-      return DeclGroupPtrTy();
+      return nullptr;
 
     return Actions.ActOnForwardProtocolDeclaration(AtLoc, ProtocolRefs,
                                                    attrs.getList());
@@ -2062,7 +2089,7 @@ Parser::ParseObjCAtProtocolDeclaration(SourceLocation AtLoc,
       ParseObjCProtocolReferences(ProtocolRefs, ProtocolLocs, false, true,
                                   LAngleLoc, EndProtoLoc,
                                   /*consumeLastToken=*/true))
-    return DeclGroupPtrTy();
+    return nullptr;
 
   Decl *ProtoType =
     Actions.ActOnStartProtocolInterface(AtLoc, protocolName, nameLoc,
@@ -2096,7 +2123,7 @@ Parser::ParseObjCAtImplementationDeclaration(SourceLocation AtLoc) {
   if (Tok.is(tok::code_completion)) {
     Actions.CodeCompleteObjCImplementationDecl(getCurScope());
     cutOffParsing();
-    return DeclGroupPtrTy();
+    return nullptr;
   }
 
   MaybeSkipAttributes(tok::objc_implementation);
@@ -2104,7 +2131,7 @@ Parser::ParseObjCAtImplementationDeclaration(SourceLocation AtLoc) {
   if (Tok.isNot(tok::identifier)) {
     Diag(Tok, diag::err_expected)
         << tok::identifier; // missing class or category name.
-    return DeclGroupPtrTy();
+    return nullptr;
   }
   // We have a class or category name - consume it.
   IdentifierInfo *nameId = Tok.getIdentifierInfo();
@@ -2137,7 +2164,7 @@ Parser::ParseObjCAtImplementationDeclaration(SourceLocation AtLoc) {
     if (Tok.is(tok::code_completion)) {
       Actions.CodeCompleteObjCImplementationCategory(getCurScope(), nameId, nameLoc);
       cutOffParsing();
-      return DeclGroupPtrTy();
+      return nullptr;
     }
     
     if (Tok.is(tok::identifier)) {
@@ -2146,12 +2173,12 @@ Parser::ParseObjCAtImplementationDeclaration(SourceLocation AtLoc) {
     } else {
       Diag(Tok, diag::err_expected)
           << tok::identifier; // missing category name.
-      return DeclGroupPtrTy();
+      return nullptr;
     }
     if (Tok.isNot(tok::r_paren)) {
       Diag(Tok, diag::err_expected) << tok::r_paren;
       SkipUntil(tok::r_paren); // don't stop at ';'
-      return DeclGroupPtrTy();
+      return nullptr;
     }
     rparenLoc = ConsumeParen();
     if (Tok.is(tok::less)) { // we have illegal '<' try to recover
@@ -2178,7 +2205,7 @@ Parser::ParseObjCAtImplementationDeclaration(SourceLocation AtLoc) {
       if (Tok.isNot(tok::identifier)) {
         Diag(Tok, diag::err_expected)
             << tok::identifier; // missing super class name.
-        return DeclGroupPtrTy();
+        return nullptr;
       }
       superClassId = Tok.getIdentifierInfo();
       superClassLoc = ConsumeToken(); // Consume super class name
@@ -2232,7 +2259,7 @@ Parser::ParseObjCAtEndDeclaration(SourceRange atEnd) {
   else
     // missing @implementation
     Diag(atEnd.getBegin(), diag::err_expected_objc_container);
-  return DeclGroupPtrTy();
+  return nullptr;
 }
 
 Parser::ObjCImplParsingDataRAII::~ObjCImplParsingDataRAII() {
@@ -2345,8 +2372,10 @@ Decl *Parser::ParseObjCPropertySynthesize(SourceLocation atLoc) {
       propertyIvar = Tok.getIdentifierInfo();
       propertyIvarLoc = ConsumeToken(); // consume ivar-name
     }
-    Actions.ActOnPropertyImplDecl(getCurScope(), atLoc, propertyLoc, true,
-                                  propertyId, propertyIvar, propertyIvarLoc);
+    Actions.ActOnPropertyImplDecl(
+        getCurScope(), atLoc, propertyLoc, true,
+        propertyId, propertyIvar, propertyIvarLoc,
+        ObjCPropertyQueryKind::OBJC_PR_query_unknown);
     if (Tok.isNot(tok::comma))
       break;
     ConsumeToken(); // consume ','
@@ -2366,6 +2395,31 @@ Decl *Parser::ParseObjCPropertyDynamic(SourceLocation atLoc) {
   assert(Tok.isObjCAtKeyword(tok::objc_dynamic) &&
          "ParseObjCPropertyDynamic(): Expected '@dynamic'");
   ConsumeToken(); // consume dynamic
+
+  bool isClassProperty = false;
+  if (Tok.is(tok::l_paren)) {
+    ConsumeParen();
+    const IdentifierInfo *II = Tok.getIdentifierInfo();
+
+    if (!II) {
+      Diag(Tok, diag::err_objc_expected_property_attr) << II;
+      SkipUntil(tok::r_paren, StopAtSemi);
+    } else {
+      SourceLocation AttrName = ConsumeToken(); // consume attribute name
+      if (II->isStr("class")) {
+        isClassProperty = true;
+        if (Tok.isNot(tok::r_paren)) {
+          Diag(Tok, diag::err_expected) << tok::r_paren;
+          SkipUntil(tok::r_paren, StopAtSemi);
+        } else
+          ConsumeParen();
+      } else {
+        Diag(AttrName, diag::err_objc_expected_property_attr) << II;
+        SkipUntil(tok::r_paren, StopAtSemi);
+      }
+    }
+  }
+
   while (true) {
     if (Tok.is(tok::code_completion)) {
       Actions.CodeCompleteObjCPropertyDefinition(getCurScope());
@@ -2381,8 +2435,11 @@ Decl *Parser::ParseObjCPropertyDynamic(SourceLocation atLoc) {
     
     IdentifierInfo *propertyId = Tok.getIdentifierInfo();
     SourceLocation propertyLoc = ConsumeToken(); // consume property name
-    Actions.ActOnPropertyImplDecl(getCurScope(), atLoc, propertyLoc, false,
-                                  propertyId, nullptr, SourceLocation());
+    Actions.ActOnPropertyImplDecl(
+        getCurScope(), atLoc, propertyLoc, false,
+        propertyId, nullptr, SourceLocation(),
+        isClassProperty ? ObjCPropertyQueryKind::OBJC_PR_query_class :
+        ObjCPropertyQueryKind::OBJC_PR_query_unknown);
 
     if (Tok.isNot(tok::comma))
       break;
@@ -2599,6 +2656,12 @@ Parser::ParseObjCAutoreleasePoolStmt(SourceLocation atLoc) {
 /// StashAwayMethodOrFunctionBodyTokens -  Consume the tokens and store them 
 /// for later parsing.
 void Parser::StashAwayMethodOrFunctionBodyTokens(Decl *MDecl) {
+  if (SkipFunctionBodies && (!MDecl || Actions.canSkipFunctionBody(MDecl)) &&
+      trySkippingFunctionBody()) {
+    Actions.ActOnSkippedFunctionBody(MDecl);
+    return;
+  }
+
   LexedMethod* LM = new LexedMethod(this, MDecl);
   CurParsedObjCImpl->LateParsedObjCMethods.push_back(LM);
   CachedTokens &Toks = LM->Toks;
@@ -2795,6 +2858,8 @@ ExprResult Parser::ParseObjCAtExpression(SourceLocation AtLoc) {
       return ParsePostfixExpressionSuffix(ParseObjCProtocolExpression(AtLoc));
     case tok::objc_selector:
       return ParsePostfixExpressionSuffix(ParseObjCSelectorExpression(AtLoc));
+    case tok::objc_available:
+      return ParseAvailabilityCheckExpr(AtLoc);
       default: {
         const char *str = nullptr;
         if (GetLookAheadToken(1).is(tok::l_brace)) {
@@ -2923,7 +2988,6 @@ bool Parser::isStartOfObjCClassMessageMissingOpenBracket() {
       InMessageExpression)
     return false;
   
-  
   ParsedType Type;
 
   if (Tok.is(tok::annot_typename)) 
@@ -2977,8 +3041,8 @@ ExprResult Parser::ParseObjCMessageExpression() {
     // get in Objective-C.
     if (Tok.is(tok::identifier) && Tok.getIdentifierInfo() == Ident_super &&
         NextToken().isNot(tok::period) && getCurScope()->isInObjcMethodScope())
-      return ParseObjCMessageExpressionBody(LBracLoc, ConsumeToken(),
-                                            ParsedType(), nullptr);
+      return ParseObjCMessageExpressionBody(LBracLoc, ConsumeToken(), nullptr,
+                                            nullptr);
 
     // Parse the receiver, which is either a type or an expression.
     bool IsExpr;
@@ -2989,9 +3053,8 @@ ExprResult Parser::ParseObjCMessageExpression() {
     }
 
     if (IsExpr)
-      return ParseObjCMessageExpressionBody(LBracLoc, SourceLocation(),
-                                            ParsedType(),
-                                            static_cast<Expr*>(TypeOrExpr));
+      return ParseObjCMessageExpressionBody(LBracLoc, SourceLocation(), nullptr,
+                                            static_cast<Expr *>(TypeOrExpr));
 
     return ParseObjCMessageExpressionBody(LBracLoc, SourceLocation(), 
                               ParsedType::getFromOpaquePtr(TypeOrExpr),
@@ -3007,8 +3070,8 @@ ExprResult Parser::ParseObjCMessageExpression() {
                                        NextToken().is(tok::period),
                                        ReceiverType)) {
     case Sema::ObjCSuperMessage:
-      return ParseObjCMessageExpressionBody(LBracLoc, ConsumeToken(),
-                                            ParsedType(), nullptr);
+      return ParseObjCMessageExpressionBody(LBracLoc, ConsumeToken(), nullptr,
+                                            nullptr);
 
     case Sema::ObjCClassMessage:
       if (!ReceiverType) {
@@ -3049,8 +3112,8 @@ ExprResult Parser::ParseObjCMessageExpression() {
     return Res;
   }
 
-  return ParseObjCMessageExpressionBody(LBracLoc, SourceLocation(),
-                                        ParsedType(), Res.get());
+  return ParseObjCMessageExpressionBody(LBracLoc, SourceLocation(), nullptr,
+                                        Res.get());
 }
 
 /// \brief Parse the remainder of an Objective-C message following the
@@ -3553,7 +3616,7 @@ ExprResult Parser::ParseObjCSelectorExpression(SourceLocation AtLoc) {
                                              T.getOpenLocation(),
                                              T.getCloseLocation(),
                                              !HasOptionalParen);
- }
+}
 
 void Parser::ParseLexedObjCMethodDefs(LexedMethod &LM, bool parseMethod) {
   // MCDecl might be null due to error in method or c-function  prototype, etc.
@@ -3571,8 +3634,8 @@ void Parser::ParseLexedObjCMethodDefs(LexedMethod &LM, bool parseMethod) {
   // Append the current token at the end of the new token stream so that it
   // doesn't get lost.
   LM.Toks.push_back(Tok);
-  PP.EnterTokenStream(LM.Toks.data(), LM.Toks.size(), true, false);
-  
+  PP.EnterTokenStream(LM.Toks, true);
+
   // Consume the previously pushed token.
   ConsumeAnyToken(/*ConsumeCodeCompletionTok=*/true);
     
@@ -3595,6 +3658,8 @@ void Parser::ParseLexedObjCMethodDefs(LexedMethod &LM, bool parseMethod) {
   else {
     if (Tok.is(tok::colon))
       ParseConstructorInitializer(MCDecl);
+    else
+      Actions.ActOnDefaultCtorInitializers(MCDecl);
     ParseFunctionStatementBody(MCDecl, BodyScope);
   }
   
@@ -3609,6 +3674,4 @@ void Parser::ParseLexedObjCMethodDefs(LexedMethod &LM, bool parseMethod) {
       while (Tok.getLocation() != OrigLoc && Tok.isNot(tok::eof))
         ConsumeAnyToken();
   }
-  
-  return;
 }
diff --git a/contrib/llvm/tools/clang/lib/Parse/ParseOpenMP.cpp b/contrib/llvm/tools/clang/lib/Parse/ParseOpenMP.cpp
index a08db5490fa9..df7d9bc0d8c8 100644
--- a/contrib/llvm/tools/clang/lib/Parse/ParseOpenMP.cpp
+++ b/contrib/llvm/tools/clang/lib/Parse/ParseOpenMP.cpp
@@ -26,78 +26,537 @@ using namespace clang;
 // OpenMP declarative directives.
 //===----------------------------------------------------------------------===//
 
+namespace {
+enum OpenMPDirectiveKindEx {
+  OMPD_cancellation = OMPD_unknown + 1,
+  OMPD_data,
+  OMPD_declare,
+  OMPD_end,
+  OMPD_end_declare,
+  OMPD_enter,
+  OMPD_exit,
+  OMPD_point,
+  OMPD_reduction,
+  OMPD_target_enter,
+  OMPD_target_exit,
+  OMPD_update,
+  OMPD_distribute_parallel
+};
+
+class ThreadprivateListParserHelper final {
+  SmallVector<Expr *, 4> Identifiers;
+  Parser *P;
+
+public:
+  ThreadprivateListParserHelper(Parser *P) : P(P) {}
+  void operator()(CXXScopeSpec &SS, DeclarationNameInfo NameInfo) {
+    ExprResult Res =
+        P->getActions().ActOnOpenMPIdExpression(P->getCurScope(), SS, NameInfo);
+    if (Res.isUsable())
+      Identifiers.push_back(Res.get());
+  }
+  llvm::ArrayRef<Expr *> getIdentifiers() const { return Identifiers; }
+};
+} // namespace
+
+// Map token string to extended OMP token kind that are
+// OpenMPDirectiveKind + OpenMPDirectiveKindEx.
+static unsigned getOpenMPDirectiveKindEx(StringRef S) {
+  auto DKind = getOpenMPDirectiveKind(S);
+  if (DKind != OMPD_unknown)
+    return DKind;
+
+  return llvm::StringSwitch<unsigned>(S)
+      .Case("cancellation", OMPD_cancellation)
+      .Case("data", OMPD_data)
+      .Case("declare", OMPD_declare)
+      .Case("end", OMPD_end)
+      .Case("enter", OMPD_enter)
+      .Case("exit", OMPD_exit)
+      .Case("point", OMPD_point)
+      .Case("reduction", OMPD_reduction)
+      .Case("update", OMPD_update)
+      .Default(OMPD_unknown);
+}
+
 static OpenMPDirectiveKind ParseOpenMPDirectiveKind(Parser &P) {
   // Array of foldings: F[i][0] F[i][1] ===> F[i][2].
   // E.g.: OMPD_for OMPD_simd ===> OMPD_for_simd
   // TODO: add other combined directives in topological order.
-  const OpenMPDirectiveKind F[][3] = {
-      {OMPD_unknown /*cancellation*/, OMPD_unknown /*point*/,
-       OMPD_cancellation_point},
-      {OMPD_target, OMPD_unknown /*data*/, OMPD_target_data},
-      {OMPD_for, OMPD_simd, OMPD_for_simd},
-      {OMPD_parallel, OMPD_for, OMPD_parallel_for},
-      {OMPD_parallel_for, OMPD_simd, OMPD_parallel_for_simd},
-      {OMPD_parallel, OMPD_sections, OMPD_parallel_sections},
-      {OMPD_taskloop, OMPD_simd, OMPD_taskloop_simd}};
+  static const unsigned F[][3] = {
+    { OMPD_cancellation, OMPD_point, OMPD_cancellation_point },
+    { OMPD_declare, OMPD_reduction, OMPD_declare_reduction },
+    { OMPD_declare, OMPD_simd, OMPD_declare_simd },
+    { OMPD_declare, OMPD_target, OMPD_declare_target },
+    { OMPD_distribute, OMPD_parallel, OMPD_distribute_parallel },
+    { OMPD_distribute_parallel, OMPD_for, OMPD_distribute_parallel_for },
+    { OMPD_distribute_parallel_for, OMPD_simd, 
+      OMPD_distribute_parallel_for_simd },
+    { OMPD_distribute, OMPD_simd, OMPD_distribute_simd },
+    { OMPD_end, OMPD_declare, OMPD_end_declare },
+    { OMPD_end_declare, OMPD_target, OMPD_end_declare_target },
+    { OMPD_target, OMPD_data, OMPD_target_data },
+    { OMPD_target, OMPD_enter, OMPD_target_enter },
+    { OMPD_target, OMPD_exit, OMPD_target_exit },
+    { OMPD_target, OMPD_update, OMPD_target_update },
+    { OMPD_target_enter, OMPD_data, OMPD_target_enter_data },
+    { OMPD_target_exit, OMPD_data, OMPD_target_exit_data },
+    { OMPD_for, OMPD_simd, OMPD_for_simd },
+    { OMPD_parallel, OMPD_for, OMPD_parallel_for },
+    { OMPD_parallel_for, OMPD_simd, OMPD_parallel_for_simd },
+    { OMPD_parallel, OMPD_sections, OMPD_parallel_sections },
+    { OMPD_taskloop, OMPD_simd, OMPD_taskloop_simd },
+    { OMPD_target, OMPD_parallel, OMPD_target_parallel },
+    { OMPD_target_parallel, OMPD_for, OMPD_target_parallel_for },
+    { OMPD_target_parallel_for, OMPD_simd, OMPD_target_parallel_for_simd }
+  };
+  enum { CancellationPoint = 0, DeclareReduction = 1, TargetData = 2 };
   auto Tok = P.getCurToken();
-  auto DKind =
+  unsigned DKind =
       Tok.isAnnotation()
-          ? OMPD_unknown
-          : getOpenMPDirectiveKind(P.getPreprocessor().getSpelling(Tok));
+          ? static_cast<unsigned>(OMPD_unknown)
+          : getOpenMPDirectiveKindEx(P.getPreprocessor().getSpelling(Tok));
+  if (DKind == OMPD_unknown)
+    return OMPD_unknown;
 
-  bool TokenMatched = false;
   for (unsigned i = 0; i < llvm::array_lengthof(F); ++i) {
-    if (!Tok.isAnnotation() && DKind == OMPD_unknown) {
-      TokenMatched =
-          (i == 0) &&
-          !P.getPreprocessor().getSpelling(Tok).compare("cancellation");
+    if (DKind != F[i][0])
+      continue;
+
+    Tok = P.getPreprocessor().LookAhead(0);
+    unsigned SDKind =
+        Tok.isAnnotation()
+            ? static_cast<unsigned>(OMPD_unknown)
+            : getOpenMPDirectiveKindEx(P.getPreprocessor().getSpelling(Tok));
+    if (SDKind == OMPD_unknown)
+      continue;
+
+    if (SDKind == F[i][1]) {
+      P.ConsumeToken();
+      DKind = F[i][2];
+    }
+  }
+  return DKind < OMPD_unknown ? static_cast<OpenMPDirectiveKind>(DKind)
+                              : OMPD_unknown;
+}
+
+static DeclarationName parseOpenMPReductionId(Parser &P) {
+  Token Tok = P.getCurToken();
+  Sema &Actions = P.getActions();
+  OverloadedOperatorKind OOK = OO_None;
+  // Allow to use 'operator' keyword for C++ operators
+  bool WithOperator = false;
+  if (Tok.is(tok::kw_operator)) {
+    P.ConsumeToken();
+    Tok = P.getCurToken();
+    WithOperator = true;
+  }
+  switch (Tok.getKind()) {
+  case tok::plus: // '+'
+    OOK = OO_Plus;
+    break;
+  case tok::minus: // '-'
+    OOK = OO_Minus;
+    break;
+  case tok::star: // '*'
+    OOK = OO_Star;
+    break;
+  case tok::amp: // '&'
+    OOK = OO_Amp;
+    break;
+  case tok::pipe: // '|'
+    OOK = OO_Pipe;
+    break;
+  case tok::caret: // '^'
+    OOK = OO_Caret;
+    break;
+  case tok::ampamp: // '&&'
+    OOK = OO_AmpAmp;
+    break;
+  case tok::pipepipe: // '||'
+    OOK = OO_PipePipe;
+    break;
+  case tok::identifier: // identifier
+    if (!WithOperator)
+      break;
+  default:
+    P.Diag(Tok.getLocation(), diag::err_omp_expected_reduction_identifier);
+    P.SkipUntil(tok::colon, tok::r_paren, tok::annot_pragma_openmp_end,
+                Parser::StopBeforeMatch);
+    return DeclarationName();
+  }
+  P.ConsumeToken();
+  auto &DeclNames = Actions.getASTContext().DeclarationNames;
+  return OOK == OO_None ? DeclNames.getIdentifier(Tok.getIdentifierInfo())
+                        : DeclNames.getCXXOperatorName(OOK);
+}
+
+/// \brief Parse 'omp declare reduction' construct.
+///
+///       declare-reduction-directive:
+///        annot_pragma_openmp 'declare' 'reduction'
+///        '(' <reduction_id> ':' <type> {',' <type>} ':' <expression> ')'
+///        ['initializer' '(' ('omp_priv' '=' <expression>)|<function_call> ')']
+///        annot_pragma_openmp_end
+/// <reduction_id> is either a base language identifier or one of the following
+/// operators: '+', '-', '*', '&', '|', '^', '&&' and '||'.
+///
+Parser::DeclGroupPtrTy
+Parser::ParseOpenMPDeclareReductionDirective(AccessSpecifier AS) {
+  // Parse '('.
+  BalancedDelimiterTracker T(*this, tok::l_paren, tok::annot_pragma_openmp_end);
+  if (T.expectAndConsume(diag::err_expected_lparen_after,
+                         getOpenMPDirectiveName(OMPD_declare_reduction))) {
+    SkipUntil(tok::annot_pragma_openmp_end, StopBeforeMatch);
+    return DeclGroupPtrTy();
+  }
+
+  DeclarationName Name = parseOpenMPReductionId(*this);
+  if (Name.isEmpty() && Tok.is(tok::annot_pragma_openmp_end))
+    return DeclGroupPtrTy();
+
+  // Consume ':'.
+  bool IsCorrect = !ExpectAndConsume(tok::colon);
+
+  if (!IsCorrect && Tok.is(tok::annot_pragma_openmp_end))
+    return DeclGroupPtrTy();
+
+  IsCorrect = IsCorrect && !Name.isEmpty();
+
+  if (Tok.is(tok::colon) || Tok.is(tok::annot_pragma_openmp_end)) {
+    Diag(Tok.getLocation(), diag::err_expected_type);
+    IsCorrect = false;
+  }
+
+  if (!IsCorrect && Tok.is(tok::annot_pragma_openmp_end))
+    return DeclGroupPtrTy();
+
+  SmallVector<std::pair<QualType, SourceLocation>, 8> ReductionTypes;
+  // Parse list of types until ':' token.
+  do {
+    ColonProtectionRAIIObject ColonRAII(*this);
+    SourceRange Range;
+    TypeResult TR = ParseTypeName(&Range, Declarator::PrototypeContext, AS);
+    if (TR.isUsable()) {
+      auto ReductionType =
+          Actions.ActOnOpenMPDeclareReductionType(Range.getBegin(), TR);
+      if (!ReductionType.isNull()) {
+        ReductionTypes.push_back(
+            std::make_pair(ReductionType, Range.getBegin()));
+      }
     } else {
-      TokenMatched = DKind == F[i][0] && DKind != OMPD_unknown;
+      SkipUntil(tok::comma, tok::colon, tok::annot_pragma_openmp_end,
+                StopBeforeMatch);
     }
 
-    if (TokenMatched) {
-      Tok = P.getPreprocessor().LookAhead(0);
-      auto TokenIsAnnotation = Tok.isAnnotation();
-      auto SDKind =
-          TokenIsAnnotation
-              ? OMPD_unknown
-              : getOpenMPDirectiveKind(P.getPreprocessor().getSpelling(Tok));
-
-      if (!TokenIsAnnotation && SDKind == OMPD_unknown) {
-        TokenMatched =
-            ((i == 0) &&
-             !P.getPreprocessor().getSpelling(Tok).compare("point")) ||
-            ((i == 1) && !P.getPreprocessor().getSpelling(Tok).compare("data"));
-      } else {
-        TokenMatched = SDKind == F[i][1] && SDKind != OMPD_unknown;
+    if (Tok.is(tok::colon) || Tok.is(tok::annot_pragma_openmp_end))
+      break;
+
+    // Consume ','.
+    if (ExpectAndConsume(tok::comma)) {
+      IsCorrect = false;
+      if (Tok.is(tok::annot_pragma_openmp_end)) {
+        Diag(Tok.getLocation(), diag::err_expected_type);
+        return DeclGroupPtrTy();
       }
+    }
+  } while (Tok.isNot(tok::annot_pragma_openmp_end));
 
-      if (TokenMatched) {
-        P.ConsumeToken();
-        DKind = F[i][2];
+  if (ReductionTypes.empty()) {
+    SkipUntil(tok::annot_pragma_openmp_end, StopBeforeMatch);
+    return DeclGroupPtrTy();
+  }
+
+  if (!IsCorrect && Tok.is(tok::annot_pragma_openmp_end))
+    return DeclGroupPtrTy();
+
+  // Consume ':'.
+  if (ExpectAndConsume(tok::colon))
+    IsCorrect = false;
+
+  if (Tok.is(tok::annot_pragma_openmp_end)) {
+    Diag(Tok.getLocation(), diag::err_expected_expression);
+    return DeclGroupPtrTy();
+  }
+
+  DeclGroupPtrTy DRD = Actions.ActOnOpenMPDeclareReductionDirectiveStart(
+      getCurScope(), Actions.getCurLexicalContext(), Name, ReductionTypes, AS);
+
+  // Parse <combiner> expression and then parse initializer if any for each
+  // correct type.
+  unsigned I = 0, E = ReductionTypes.size();
+  for (auto *D : DRD.get()) {
+    TentativeParsingAction TPA(*this);
+    ParseScope OMPDRScope(this, Scope::FnScope | Scope::DeclScope |
+                                    Scope::OpenMPDirectiveScope);
+    // Parse <combiner> expression.
+    Actions.ActOnOpenMPDeclareReductionCombinerStart(getCurScope(), D);
+    ExprResult CombinerResult =
+        Actions.ActOnFinishFullExpr(ParseAssignmentExpression().get(),
+                                    D->getLocation(), /*DiscardedValue=*/true);
+    Actions.ActOnOpenMPDeclareReductionCombinerEnd(D, CombinerResult.get());
+
+    if (CombinerResult.isInvalid() && Tok.isNot(tok::r_paren) &&
+        Tok.isNot(tok::annot_pragma_openmp_end)) {
+      TPA.Commit();
+      IsCorrect = false;
+      break;
+    }
+    IsCorrect = !T.consumeClose() && IsCorrect && CombinerResult.isUsable();
+    ExprResult InitializerResult;
+    if (Tok.isNot(tok::annot_pragma_openmp_end)) {
+      // Parse <initializer> expression.
+      if (Tok.is(tok::identifier) &&
+          Tok.getIdentifierInfo()->isStr("initializer"))
+        ConsumeToken();
+      else {
+        Diag(Tok.getLocation(), diag::err_expected) << "'initializer'";
+        TPA.Commit();
+        IsCorrect = false;
+        break;
+      }
+      // Parse '('.
+      BalancedDelimiterTracker T(*this, tok::l_paren,
+                                 tok::annot_pragma_openmp_end);
+      IsCorrect =
+          !T.expectAndConsume(diag::err_expected_lparen_after, "initializer") &&
+          IsCorrect;
+      if (Tok.isNot(tok::annot_pragma_openmp_end)) {
+        ParseScope OMPDRScope(this, Scope::FnScope | Scope::DeclScope |
+                                        Scope::OpenMPDirectiveScope);
+        // Parse expression.
+        Actions.ActOnOpenMPDeclareReductionInitializerStart(getCurScope(), D);
+        InitializerResult = Actions.ActOnFinishFullExpr(
+            ParseAssignmentExpression().get(), D->getLocation(),
+            /*DiscardedValue=*/true);
+        Actions.ActOnOpenMPDeclareReductionInitializerEnd(
+            D, InitializerResult.get());
+        if (InitializerResult.isInvalid() && Tok.isNot(tok::r_paren) &&
+            Tok.isNot(tok::annot_pragma_openmp_end)) {
+          TPA.Commit();
+          IsCorrect = false;
+          break;
+        }
+        IsCorrect =
+            !T.consumeClose() && IsCorrect && !InitializerResult.isInvalid();
       }
     }
+
+    ++I;
+    // Revert parsing if not the last type, otherwise accept it, we're done with
+    // parsing.
+    if (I != E)
+      TPA.Revert();
+    else
+      TPA.Commit();
   }
-  return DKind;
+  return Actions.ActOnOpenMPDeclareReductionDirectiveEnd(getCurScope(), DRD,
+                                                         IsCorrect);
+}
+
+namespace {
+/// RAII that recreates function context for correct parsing of clauses of
+/// 'declare simd' construct.
+/// OpenMP, 2.8.2 declare simd Construct
+/// The expressions appearing in the clauses of this directive are evaluated in
+/// the scope of the arguments of the function declaration or definition.
+class FNContextRAII final {
+  Parser &P;
+  Sema::CXXThisScopeRAII *ThisScope;
+  Parser::ParseScope *TempScope;
+  Parser::ParseScope *FnScope;
+  bool HasTemplateScope = false;
+  bool HasFunScope = false;
+  FNContextRAII() = delete;
+  FNContextRAII(const FNContextRAII &) = delete;
+  FNContextRAII &operator=(const FNContextRAII &) = delete;
+
+public:
+  FNContextRAII(Parser &P, Parser::DeclGroupPtrTy Ptr) : P(P) {
+    Decl *D = *Ptr.get().begin();
+    NamedDecl *ND = dyn_cast<NamedDecl>(D);
+    RecordDecl *RD = dyn_cast_or_null<RecordDecl>(D->getDeclContext());
+    Sema &Actions = P.getActions();
+
+    // Allow 'this' within late-parsed attributes.
+    ThisScope = new Sema::CXXThisScopeRAII(Actions, RD, /*TypeQuals=*/0,
+                                           ND && ND->isCXXInstanceMember());
+
+    // If the Decl is templatized, add template parameters to scope.
+    HasTemplateScope = D->isTemplateDecl();
+    TempScope =
+        new Parser::ParseScope(&P, Scope::TemplateParamScope, HasTemplateScope);
+    if (HasTemplateScope)
+      Actions.ActOnReenterTemplateScope(Actions.getCurScope(), D);
+
+    // If the Decl is on a function, add function parameters to the scope.
+    HasFunScope = D->isFunctionOrFunctionTemplate();
+    FnScope = new Parser::ParseScope(&P, Scope::FnScope | Scope::DeclScope,
+                                     HasFunScope);
+    if (HasFunScope)
+      Actions.ActOnReenterFunctionContext(Actions.getCurScope(), D);
+  }
+  ~FNContextRAII() {
+    if (HasFunScope) {
+      P.getActions().ActOnExitFunctionContext();
+      FnScope->Exit(); // Pop scope, and remove Decls from IdResolver
+    }
+    if (HasTemplateScope)
+      TempScope->Exit();
+    delete FnScope;
+    delete TempScope;
+    delete ThisScope;
+  }
+};
+} // namespace
+
+/// Parses clauses for 'declare simd' directive.
+///    clause:
+///      'inbranch' | 'notinbranch'
+///      'simdlen' '(' <expr> ')'
+///      { 'uniform' '(' <argument_list> ')' }
+///      { 'aligned '(' <argument_list> [ ':' <alignment> ] ')' }
+///      { 'linear '(' <argument_list> [ ':' <step> ] ')' }
+static bool parseDeclareSimdClauses(
+    Parser &P, OMPDeclareSimdDeclAttr::BranchStateTy &BS, ExprResult &SimdLen,
+    SmallVectorImpl<Expr *> &Uniforms, SmallVectorImpl<Expr *> &Aligneds,
+    SmallVectorImpl<Expr *> &Alignments, SmallVectorImpl<Expr *> &Linears,
+    SmallVectorImpl<unsigned> &LinModifiers, SmallVectorImpl<Expr *> &Steps) {
+  SourceRange BSRange;
+  const Token &Tok = P.getCurToken();
+  bool IsError = false;
+  while (Tok.isNot(tok::annot_pragma_openmp_end)) {
+    if (Tok.isNot(tok::identifier))
+      break;
+    OMPDeclareSimdDeclAttr::BranchStateTy Out;
+    IdentifierInfo *II = Tok.getIdentifierInfo();
+    StringRef ClauseName = II->getName();
+    // Parse 'inranch|notinbranch' clauses.
+    if (OMPDeclareSimdDeclAttr::ConvertStrToBranchStateTy(ClauseName, Out)) {
+      if (BS != OMPDeclareSimdDeclAttr::BS_Undefined && BS != Out) {
+        P.Diag(Tok, diag::err_omp_declare_simd_inbranch_notinbranch)
+            << ClauseName
+            << OMPDeclareSimdDeclAttr::ConvertBranchStateTyToStr(BS) << BSRange;
+        IsError = true;
+      }
+      BS = Out;
+      BSRange = SourceRange(Tok.getLocation(), Tok.getEndLoc());
+      P.ConsumeToken();
+    } else if (ClauseName.equals("simdlen")) {
+      if (SimdLen.isUsable()) {
+        P.Diag(Tok, diag::err_omp_more_one_clause)
+            << getOpenMPDirectiveName(OMPD_declare_simd) << ClauseName << 0;
+        IsError = true;
+      }
+      P.ConsumeToken();
+      SourceLocation RLoc;
+      SimdLen = P.ParseOpenMPParensExpr(ClauseName, RLoc);
+      if (SimdLen.isInvalid())
+        IsError = true;
+    } else {
+      OpenMPClauseKind CKind = getOpenMPClauseKind(ClauseName);
+      if (CKind == OMPC_uniform || CKind == OMPC_aligned ||
+          CKind == OMPC_linear) {
+        Parser::OpenMPVarListDataTy Data;
+        auto *Vars = &Uniforms;
+        if (CKind == OMPC_aligned)
+          Vars = &Aligneds;
+        else if (CKind == OMPC_linear)
+          Vars = &Linears;
+
+        P.ConsumeToken();
+        if (P.ParseOpenMPVarList(OMPD_declare_simd,
+                                 getOpenMPClauseKind(ClauseName), *Vars, Data))
+          IsError = true;
+        if (CKind == OMPC_aligned)
+          Alignments.append(Aligneds.size() - Alignments.size(), Data.TailExpr);
+        else if (CKind == OMPC_linear) {
+          if (P.getActions().CheckOpenMPLinearModifier(Data.LinKind,
+                                                       Data.DepLinMapLoc))
+            Data.LinKind = OMPC_LINEAR_val;
+          LinModifiers.append(Linears.size() - LinModifiers.size(),
+                              Data.LinKind);
+          Steps.append(Linears.size() - Steps.size(), Data.TailExpr);
+        }
+      } else
+        // TODO: add parsing of other clauses.
+        break;
+    }
+    // Skip ',' if any.
+    if (Tok.is(tok::comma))
+      P.ConsumeToken();
+  }
+  return IsError;
+}
+
+/// Parse clauses for '#pragma omp declare simd'.
+Parser::DeclGroupPtrTy
+Parser::ParseOMPDeclareSimdClauses(Parser::DeclGroupPtrTy Ptr,
+                                   CachedTokens &Toks, SourceLocation Loc) {
+  PP.EnterToken(Tok);
+  PP.EnterTokenStream(Toks, /*DisableMacroExpansion=*/true);
+  // Consume the previously pushed token.
+  ConsumeAnyToken(/*ConsumeCodeCompletionTok=*/true);
+
+  FNContextRAII FnContext(*this, Ptr);
+  OMPDeclareSimdDeclAttr::BranchStateTy BS =
+      OMPDeclareSimdDeclAttr::BS_Undefined;
+  ExprResult Simdlen;
+  SmallVector<Expr *, 4> Uniforms;
+  SmallVector<Expr *, 4> Aligneds;
+  SmallVector<Expr *, 4> Alignments;
+  SmallVector<Expr *, 4> Linears;
+  SmallVector<unsigned, 4> LinModifiers;
+  SmallVector<Expr *, 4> Steps;
+  bool IsError =
+      parseDeclareSimdClauses(*this, BS, Simdlen, Uniforms, Aligneds,
+                              Alignments, Linears, LinModifiers, Steps);
+  // Need to check for extra tokens.
+  if (Tok.isNot(tok::annot_pragma_openmp_end)) {
+    Diag(Tok, diag::warn_omp_extra_tokens_at_eol)
+        << getOpenMPDirectiveName(OMPD_declare_simd);
+    while (Tok.isNot(tok::annot_pragma_openmp_end))
+      ConsumeAnyToken();
+  }
+  // Skip the last annot_pragma_openmp_end.
+  SourceLocation EndLoc = ConsumeToken();
+  if (!IsError) {
+    return Actions.ActOnOpenMPDeclareSimdDirective(
+        Ptr, BS, Simdlen.get(), Uniforms, Aligneds, Alignments, Linears,
+        LinModifiers, Steps, SourceRange(Loc, EndLoc));
+  }
+  return Ptr;
 }
 
 /// \brief Parsing of declarative OpenMP directives.
 ///
 ///       threadprivate-directive:
 ///         annot_pragma_openmp 'threadprivate' simple-variable-list
+///         annot_pragma_openmp_end
+///
+///       declare-reduction-directive:
+///        annot_pragma_openmp 'declare' 'reduction' [...]
+///        annot_pragma_openmp_end
+///
+///       declare-simd-directive:
+///         annot_pragma_openmp 'declare simd' {<clause> [,]}
+///         annot_pragma_openmp_end
+///         <function declaration/definition>
 ///
-Parser::DeclGroupPtrTy Parser::ParseOpenMPDeclarativeDirective() {
+Parser::DeclGroupPtrTy Parser::ParseOpenMPDeclarativeDirectiveWithExtDecl(
+    AccessSpecifier &AS, ParsedAttributesWithRange &Attrs,
+    DeclSpec::TST TagType, Decl *Tag) {
   assert(Tok.is(tok::annot_pragma_openmp) && "Not an OpenMP directive!");
   ParenBraceBracketBalancer BalancerRAIIObj(*this);
 
   SourceLocation Loc = ConsumeToken();
-  SmallVector<Expr *, 5> Identifiers;
   auto DKind = ParseOpenMPDirectiveKind(*this);
 
   switch (DKind) {
-  case OMPD_threadprivate:
+  case OMPD_threadprivate: {
     ConsumeToken();
-    if (!ParseOpenMPSimpleVarList(OMPD_threadprivate, Identifiers, true)) {
+    ThreadprivateListParserHelper Helper(this);
+    if (!ParseOpenMPSimpleVarList(OMPD_threadprivate, Helper, true)) {
       // The last seen token is annot_pragma_openmp_end - need to check for
       // extra tokens.
       if (Tok.isNot(tok::annot_pragma_openmp_end)) {
@@ -107,9 +566,140 @@ Parser::DeclGroupPtrTy Parser::ParseOpenMPDeclarativeDirective() {
       }
       // Skip the last annot_pragma_openmp_end.
       ConsumeToken();
-      return Actions.ActOnOpenMPThreadprivateDirective(Loc, Identifiers);
+      return Actions.ActOnOpenMPThreadprivateDirective(Loc,
+                                                       Helper.getIdentifiers());
+    }
+    break;
+  }
+  case OMPD_declare_reduction:
+    ConsumeToken();
+    if (auto Res = ParseOpenMPDeclareReductionDirective(AS)) {
+      // The last seen token is annot_pragma_openmp_end - need to check for
+      // extra tokens.
+      if (Tok.isNot(tok::annot_pragma_openmp_end)) {
+        Diag(Tok, diag::warn_omp_extra_tokens_at_eol)
+            << getOpenMPDirectiveName(OMPD_declare_reduction);
+        while (Tok.isNot(tok::annot_pragma_openmp_end))
+          ConsumeAnyToken();
+      }
+      // Skip the last annot_pragma_openmp_end.
+      ConsumeToken();
+      return Res;
     }
     break;
+  case OMPD_declare_simd: {
+    // The syntax is:
+    // { #pragma omp declare simd }
+    // <function-declaration-or-definition>
+    //
+    ConsumeToken();
+    CachedTokens Toks;
+    while(Tok.isNot(tok::annot_pragma_openmp_end)) {
+      Toks.push_back(Tok);
+      ConsumeAnyToken();
+    }
+    Toks.push_back(Tok);
+    ConsumeAnyToken();
+
+    DeclGroupPtrTy Ptr;
+    if (Tok.is(tok::annot_pragma_openmp))
+      Ptr = ParseOpenMPDeclarativeDirectiveWithExtDecl(AS, Attrs, TagType, Tag);
+    else if (Tok.isNot(tok::r_brace) && !isEofOrEom()) {
+      // Here we expect to see some function declaration.
+      if (AS == AS_none) {
+        assert(TagType == DeclSpec::TST_unspecified);
+        MaybeParseCXX11Attributes(Attrs);
+        MaybeParseMicrosoftAttributes(Attrs);
+        ParsingDeclSpec PDS(*this);
+        Ptr = ParseExternalDeclaration(Attrs, &PDS);
+      } else {
+        Ptr =
+            ParseCXXClassMemberDeclarationWithPragmas(AS, Attrs, TagType, Tag);
+      }
+    }
+    if (!Ptr) {
+      Diag(Loc, diag::err_omp_decl_in_declare_simd);
+      return DeclGroupPtrTy();
+    }
+    return ParseOMPDeclareSimdClauses(Ptr, Toks, Loc);
+  }
+  case OMPD_declare_target: {
+    SourceLocation DTLoc = ConsumeAnyToken();
+    if (Tok.isNot(tok::annot_pragma_openmp_end)) {
+      // OpenMP 4.5 syntax with list of entities.
+      llvm::SmallSetVector<const NamedDecl*, 16> SameDirectiveDecls;
+      while (Tok.isNot(tok::annot_pragma_openmp_end)) {
+        OMPDeclareTargetDeclAttr::MapTypeTy MT =
+            OMPDeclareTargetDeclAttr::MT_To;
+        if (Tok.is(tok::identifier)) {
+          IdentifierInfo *II = Tok.getIdentifierInfo();
+          StringRef ClauseName = II->getName();
+          // Parse 'to|link' clauses.
+          if (!OMPDeclareTargetDeclAttr::ConvertStrToMapTypeTy(ClauseName,
+                                                               MT)) {
+            Diag(Tok, diag::err_omp_declare_target_unexpected_clause)
+                << ClauseName;
+            break;
+          }
+          ConsumeToken();
+        }
+        auto Callback = [this, MT, &SameDirectiveDecls](
+            CXXScopeSpec &SS, DeclarationNameInfo NameInfo) {
+          Actions.ActOnOpenMPDeclareTargetName(getCurScope(), SS, NameInfo, MT,
+                                               SameDirectiveDecls);
+        };
+        if (ParseOpenMPSimpleVarList(OMPD_declare_target, Callback, true))
+          break;
+
+        // Consume optional ','.
+        if (Tok.is(tok::comma))
+          ConsumeToken();
+      }
+      SkipUntil(tok::annot_pragma_openmp_end, StopBeforeMatch);
+      ConsumeAnyToken();
+      return DeclGroupPtrTy();
+    }
+
+    // Skip the last annot_pragma_openmp_end.
+    ConsumeAnyToken();
+
+    if (!Actions.ActOnStartOpenMPDeclareTargetDirective(DTLoc))
+      return DeclGroupPtrTy();
+
+    DKind = ParseOpenMPDirectiveKind(*this);
+    while (DKind != OMPD_end_declare_target && DKind != OMPD_declare_target &&
+           Tok.isNot(tok::eof) && Tok.isNot(tok::r_brace)) {
+      ParsedAttributesWithRange attrs(AttrFactory);
+      MaybeParseCXX11Attributes(attrs);
+      MaybeParseMicrosoftAttributes(attrs);
+      ParseExternalDeclaration(attrs);
+      if (Tok.isAnnotation() && Tok.is(tok::annot_pragma_openmp)) {
+        TentativeParsingAction TPA(*this);
+        ConsumeToken();
+        DKind = ParseOpenMPDirectiveKind(*this);
+        if (DKind != OMPD_end_declare_target)
+          TPA.Revert();
+        else
+          TPA.Commit();
+      }
+    }
+
+    if (DKind == OMPD_end_declare_target) {
+      ConsumeAnyToken();
+      if (Tok.isNot(tok::annot_pragma_openmp_end)) {
+        Diag(Tok, diag::warn_omp_extra_tokens_at_eol)
+            << getOpenMPDirectiveName(OMPD_end_declare_target);
+        SkipUntil(tok::annot_pragma_openmp_end, StopBeforeMatch);
+      }
+      // Skip the last annot_pragma_openmp_end.
+      ConsumeAnyToken();
+    } else {
+      Diag(Tok, diag::err_expected_end_declare_target);
+      Diag(DTLoc, diag::note_matching) << "'#pragma omp declare target'";
+    }
+    Actions.ActOnFinishOpenMPDeclareTargetDirective();
+    return DeclGroupPtrTy();
+  }
   case OMPD_unknown:
     Diag(Tok, diag::err_omp_unknown_directive);
     break;
@@ -138,15 +728,27 @@ Parser::DeclGroupPtrTy Parser::ParseOpenMPDeclarativeDirective() {
   case OMPD_cancellation_point:
   case OMPD_cancel:
   case OMPD_target_data:
+  case OMPD_target_enter_data:
+  case OMPD_target_exit_data:
+  case OMPD_target_parallel:
+  case OMPD_target_parallel_for:
   case OMPD_taskloop:
   case OMPD_taskloop_simd:
   case OMPD_distribute:
+  case OMPD_end_declare_target:
+  case OMPD_target_update:
+  case OMPD_distribute_parallel_for:
+  case OMPD_distribute_parallel_for_simd:
+  case OMPD_distribute_simd:
+  case OMPD_target_parallel_for_simd:
     Diag(Tok, diag::err_omp_unexpected_directive)
         << getOpenMPDirectiveName(DKind);
     break;
   }
-  SkipUntil(tok::annot_pragma_openmp_end);
-  return DeclGroupPtrTy();
+  while (Tok.isNot(tok::annot_pragma_openmp_end))
+    ConsumeAnyToken();
+  ConsumeAnyToken();
+  return nullptr;
 }
 
 /// \brief Parsing of declarative or executable OpenMP directives.
@@ -155,21 +757,30 @@ Parser::DeclGroupPtrTy Parser::ParseOpenMPDeclarativeDirective() {
 ///         annot_pragma_openmp 'threadprivate' simple-variable-list
 ///         annot_pragma_openmp_end
 ///
+///       declare-reduction-directive:
+///         annot_pragma_openmp 'declare' 'reduction' '(' <reduction_id> ':'
+///         <type> {',' <type>} ':' <expression> ')' ['initializer' '('
+///         ('omp_priv' '=' <expression>|<function_call>) ')']
+///         annot_pragma_openmp_end
+///
 ///       executable-directive:
 ///         annot_pragma_openmp 'parallel' | 'simd' | 'for' | 'sections' |
 ///         'section' | 'single' | 'master' | 'critical' [ '(' <name> ')' ] |
 ///         'parallel for' | 'parallel sections' | 'task' | 'taskyield' |
 ///         'barrier' | 'taskwait' | 'flush' | 'ordered' | 'atomic' |
 ///         'for simd' | 'parallel for simd' | 'target' | 'target data' |
-///         'taskgroup' | 'teams' | 'taskloop' | 'taskloop simd' {clause} |
-///         'distribute'
+///         'taskgroup' | 'teams' | 'taskloop' | 'taskloop simd' |
+///         'distribute' | 'target enter data' | 'target exit data' |
+///         'target parallel' | 'target parallel for' |
+///         'target update' | 'distribute parallel for' |
+///         'distribute paralle for simd' | 'distribute simd' |
+///         'target parallel for simd' {clause}
 ///         annot_pragma_openmp_end
 ///
 StmtResult Parser::ParseOpenMPDeclarativeOrExecutableDirective(
     AllowedContsructsKind Allowed) {
   assert(Tok.is(tok::annot_pragma_openmp) && "Not an OpenMP directive!");
   ParenBraceBracketBalancer BalancerRAIIObj(*this);
-  SmallVector<Expr *, 5> Identifiers;
   SmallVector<OMPClause *, 5> Clauses;
   SmallVector<llvm::PointerIntPair<OMPClause *, 1, bool>, OMPC_unknown + 1>
   FirstClauses(OMPC_unknown + 1);
@@ -185,13 +796,14 @@ StmtResult Parser::ParseOpenMPDeclarativeOrExecutableDirective(
   bool FlushHasClause = false;
 
   switch (DKind) {
-  case OMPD_threadprivate:
+  case OMPD_threadprivate: {
     if (Allowed != ACK_Any) {
       Diag(Tok, diag::err_omp_immediate_directive)
           << getOpenMPDirectiveName(DKind) << 0;
     }
     ConsumeToken();
-    if (!ParseOpenMPSimpleVarList(OMPD_threadprivate, Identifiers, false)) {
+    ThreadprivateListParserHelper Helper(this);
+    if (!ParseOpenMPSimpleVarList(OMPD_threadprivate, Helper, false)) {
       // The last seen token is annot_pragma_openmp_end - need to check for
       // extra tokens.
       if (Tok.isNot(tok::annot_pragma_openmp_end)) {
@@ -199,12 +811,29 @@ StmtResult Parser::ParseOpenMPDeclarativeOrExecutableDirective(
             << getOpenMPDirectiveName(OMPD_threadprivate);
         SkipUntil(tok::annot_pragma_openmp_end, StopBeforeMatch);
       }
-      DeclGroupPtrTy Res =
-          Actions.ActOnOpenMPThreadprivateDirective(Loc, Identifiers);
+      DeclGroupPtrTy Res = Actions.ActOnOpenMPThreadprivateDirective(
+          Loc, Helper.getIdentifiers());
       Directive = Actions.ActOnDeclStmt(Res, Loc, Tok.getLocation());
     }
     SkipUntil(tok::annot_pragma_openmp_end);
     break;
+  }
+  case OMPD_declare_reduction:
+    ConsumeToken();
+    if (auto Res = ParseOpenMPDeclareReductionDirective(/*AS=*/AS_none)) {
+      // The last seen token is annot_pragma_openmp_end - need to check for
+      // extra tokens.
+      if (Tok.isNot(tok::annot_pragma_openmp_end)) {
+        Diag(Tok, diag::warn_omp_extra_tokens_at_eol)
+            << getOpenMPDirectiveName(OMPD_declare_reduction);
+        while (Tok.isNot(tok::annot_pragma_openmp_end))
+          ConsumeAnyToken();
+      }
+      ConsumeAnyToken();
+      Directive = Actions.ActOnDeclStmt(Res, Loc, Tok.getLocation());
+    } else
+      SkipUntil(tok::annot_pragma_openmp_end);
+    break;
   case OMPD_flush:
     if (PP.LookAhead(0).is(tok::l_paren)) {
       FlushHasClause = true;
@@ -217,6 +846,9 @@ StmtResult Parser::ParseOpenMPDeclarativeOrExecutableDirective(
   case OMPD_taskwait:
   case OMPD_cancellation_point:
   case OMPD_cancel:
+  case OMPD_target_enter_data:
+  case OMPD_target_exit_data:
+  case OMPD_target_update:
     if (Allowed == ACK_StatementsOpenMPNonStandalone) {
       Diag(Tok, diag::err_omp_immediate_directive)
           << getOpenMPDirectiveName(DKind) << 0;
@@ -242,9 +874,15 @@ StmtResult Parser::ParseOpenMPDeclarativeOrExecutableDirective(
   case OMPD_teams:
   case OMPD_taskgroup:
   case OMPD_target_data:
+  case OMPD_target_parallel:
+  case OMPD_target_parallel_for:
   case OMPD_taskloop:
   case OMPD_taskloop_simd:
-  case OMPD_distribute: {
+  case OMPD_distribute:
+  case OMPD_distribute_parallel_for:
+  case OMPD_distribute_parallel_for_simd:
+  case OMPD_distribute_simd:
+  case OMPD_target_parallel_for_simd: {
     ConsumeToken();
     // Parse directive name of the 'critical' directive if any.
     if (DKind == OMPD_critical) {
@@ -331,6 +969,13 @@ StmtResult Parser::ParseOpenMPDeclarativeOrExecutableDirective(
     OMPDirectiveScope.Exit();
     break;
   }
+  case OMPD_declare_simd:
+  case OMPD_declare_target:
+  case OMPD_end_declare_target:
+    Diag(Tok, diag::err_omp_unexpected_directive)
+        << getOpenMPDirectiveName(DKind);
+    SkipUntil(tok::annot_pragma_openmp_end);
+    break;
   case OMPD_unknown:
     Diag(Tok, diag::err_omp_unknown_directive);
     SkipUntil(tok::annot_pragma_openmp_end);
@@ -339,16 +984,15 @@ StmtResult Parser::ParseOpenMPDeclarativeOrExecutableDirective(
   return Directive;
 }
 
-/// \brief Parses list of simple variables for '#pragma omp threadprivate'
-/// directive.
-///
-///   simple-variable-list:
-///         '(' id-expression {, id-expression} ')'
-///
-bool Parser::ParseOpenMPSimpleVarList(OpenMPDirectiveKind Kind,
-                                      SmallVectorImpl<Expr *> &VarList,
-                                      bool AllowScopeSpecifier) {
-  VarList.clear();
+// Parses simple list:
+//   simple-variable-list:
+//         '(' id-expression {, id-expression} ')'
+//
+bool Parser::ParseOpenMPSimpleVarList(
+    OpenMPDirectiveKind Kind,
+    const llvm::function_ref<void(CXXScopeSpec &, DeclarationNameInfo)> &
+        Callback,
+    bool AllowScopeSpecifier) {
   // Parse '('.
   BalancedDelimiterTracker T(*this, tok::l_paren, tok::annot_pragma_openmp_end);
   if (T.expectAndConsume(diag::err_expected_lparen_after,
@@ -367,11 +1011,11 @@ bool Parser::ParseOpenMPSimpleVarList(OpenMPDirectiveKind Kind,
     NoIdentIsFound = false;
 
     if (AllowScopeSpecifier && getLangOpts().CPlusPlus &&
-        ParseOptionalCXXScopeSpecifier(SS, ParsedType(), false)) {
+        ParseOptionalCXXScopeSpecifier(SS, nullptr, false)) {
       IsCorrect = false;
       SkipUntil(tok::comma, tok::r_paren, tok::annot_pragma_openmp_end,
                 StopBeforeMatch);
-    } else if (ParseUnqualifiedId(SS, false, false, false, ParsedType(),
+    } else if (ParseUnqualifiedId(SS, false, false, false, nullptr,
                                   TemplateKWLoc, Name)) {
       IsCorrect = false;
       SkipUntil(tok::comma, tok::r_paren, tok::annot_pragma_openmp_end,
@@ -385,11 +1029,7 @@ bool Parser::ParseOpenMPSimpleVarList(OpenMPDirectiveKind Kind,
           << tok::identifier
           << SourceRange(PrevTok.getLocation(), PrevTokLocation);
     } else {
-      DeclarationNameInfo NameInfo = Actions.GetNameFromUnqualifiedId(Name);
-      ExprResult Res =
-          Actions.ActOnOpenMPIdExpression(getCurScope(), SS, NameInfo);
-      if (Res.isUsable())
-        VarList.push_back(Res.get());
+      Callback(SS, Actions.GetNameFromUnqualifiedId(Name));
     }
     // Consume ','.
     if (Tok.is(tok::comma)) {
@@ -405,7 +1045,7 @@ bool Parser::ParseOpenMPSimpleVarList(OpenMPDirectiveKind Kind,
   // Parse ')'.
   IsCorrect = !T.consumeClose() && IsCorrect;
 
-  return !IsCorrect && VarList.empty();
+  return !IsCorrect;
 }
 
 /// \brief Parsing of OpenMP clauses.
@@ -420,7 +1060,8 @@ bool Parser::ParseOpenMPSimpleVarList(OpenMPDirectiveKind Kind,
 ///       update-clause | capture-clause | seq_cst-clause | device-clause |
 ///       simdlen-clause | threads-clause | simd-clause | num_teams-clause |
 ///       thread_limit-clause | priority-clause | grainsize-clause |
-///       nogroup-clause | num_tasks-clause | hint-clause
+///       nogroup-clause | num_tasks-clause | hint-clause | to-clause |
+///       from-clause | is_device_ptr-clause
 ///
 OMPClause *Parser::ParseOpenMPClause(OpenMPDirectiveKind DKind,
                                      OpenMPClauseKind CKind, bool FirstClause) {
@@ -494,8 +1135,12 @@ OMPClause *Parser::ParseOpenMPClause(OpenMPDirectiveKind DKind,
     Clause = ParseOpenMPSimpleClause(CKind);
     break;
   case OMPC_schedule:
+  case OMPC_dist_schedule:
+  case OMPC_defaultmap:
     // OpenMP [2.7.1, Restrictions, p. 3]
     //  Only one schedule clause can appear on a loop directive.
+    // OpenMP [2.10.4, Restrictions, p. 106]
+    //  At most one defaultmap clause can appear on the directive.
     if (!FirstClause) {
       Diag(Tok, diag::err_omp_more_one_clause)
           << getOpenMPDirectiveName(DKind) << getOpenMPClauseName(CKind) << 0;
@@ -540,6 +1185,10 @@ OMPClause *Parser::ParseOpenMPClause(OpenMPDirectiveKind DKind,
   case OMPC_flush:
   case OMPC_depend:
   case OMPC_map:
+  case OMPC_to:
+  case OMPC_from:
+  case OMPC_use_device_ptr:
+  case OMPC_is_device_ptr:
     Clause = ParseOpenMPVarListClause(DKind, CKind);
     break;
   case OMPC_unknown:
@@ -548,6 +1197,7 @@ OMPClause *Parser::ParseOpenMPClause(OpenMPDirectiveKind DKind,
     SkipUntil(tok::annot_pragma_openmp_end, StopBeforeMatch);
     break;
   case OMPC_threadprivate:
+  case OMPC_uniform:
     Diag(Tok, diag::err_omp_unexpected_clause) << getOpenMPClauseName(CKind)
                                                << getOpenMPDirectiveName(DKind);
     SkipUntil(tok::comma, tok::annot_pragma_openmp_end, StopBeforeMatch);
@@ -556,6 +1206,28 @@ OMPClause *Parser::ParseOpenMPClause(OpenMPDirectiveKind DKind,
   return ErrorFound ? nullptr : Clause;
 }
 
+/// Parses simple expression in parens for single-expression clauses of OpenMP
+/// constructs.
+/// \param RLoc Returned location of right paren.
+ExprResult Parser::ParseOpenMPParensExpr(StringRef ClauseName,
+                                         SourceLocation &RLoc) {
+  BalancedDelimiterTracker T(*this, tok::l_paren, tok::annot_pragma_openmp_end);
+  if (T.expectAndConsume(diag::err_expected_lparen_after, ClauseName.data()))
+    return ExprError();
+
+  SourceLocation ELoc = Tok.getLocation();
+  ExprResult LHS(ParseCastExpression(
+      /*isUnaryExpression=*/false, /*isAddressOfOperand=*/false, NotTypeCast));
+  ExprResult Val(ParseRHSOfBinaryExpression(LHS, prec::Conditional));
+  Val = Actions.ActOnFinishFullExpr(Val.get(), ELoc);
+
+  // Parse ')'.
+  T.consumeClose();
+
+  RLoc = T.getCloseLocation();
+  return Val;
+}
+
 /// \brief Parsing of OpenMP clauses with single expressions like 'final',
 /// 'collapse', 'safelen', 'num_threads', 'simdlen', 'num_teams',
 /// 'thread_limit', 'simdlen', 'priority', 'grainsize', 'num_tasks' or 'hint'.
@@ -589,25 +1261,15 @@ OMPClause *Parser::ParseOpenMPClause(OpenMPDirectiveKind DKind,
 ///
 OMPClause *Parser::ParseOpenMPSingleExprClause(OpenMPClauseKind Kind) {
   SourceLocation Loc = ConsumeToken();
+  SourceLocation LLoc = Tok.getLocation();
+  SourceLocation RLoc;
 
-  BalancedDelimiterTracker T(*this, tok::l_paren, tok::annot_pragma_openmp_end);
-  if (T.expectAndConsume(diag::err_expected_lparen_after,
-                         getOpenMPClauseName(Kind)))
-    return nullptr;
-
-  SourceLocation ELoc = Tok.getLocation();
-  ExprResult LHS(ParseCastExpression(false, false, NotTypeCast));
-  ExprResult Val(ParseRHSOfBinaryExpression(LHS, prec::Conditional));
-  Val = Actions.ActOnFinishFullExpr(Val.get(), ELoc);
-
-  // Parse ')'.
-  T.consumeClose();
+  ExprResult Val = ParseOpenMPParensExpr(getOpenMPClauseName(Kind), RLoc);
 
   if (Val.isInvalid())
     return nullptr;
 
-  return Actions.ActOnOpenMPSingleExprClause(
-      Kind, Val.get(), Loc, T.getOpenLocation(), T.getCloseLocation());
+  return Actions.ActOnOpenMPSingleExprClause(Kind, Val.get(), Loc, LLoc, RLoc);
 }
 
 /// \brief Parsing of simple OpenMP clauses like 'default' or 'proc_bind'.
@@ -685,6 +1347,9 @@ OMPClause *Parser::ParseOpenMPClause(OpenMPClauseKind Kind) {
 ///    if-clause:
 ///      'if' '(' [ directive-name-modifier ':' ] expression ')'
 ///
+///    defaultmap:
+///      'defaultmap' '(' modifier ':' kind ')'
+///
 OMPClause *Parser::ParseOpenMPSingleExprWithArgClause(OpenMPClauseKind Kind) {
   SourceLocation Loc = ConsumeToken();
   SourceLocation DelimLoc;
@@ -744,6 +1409,35 @@ OMPClause *Parser::ParseOpenMPSingleExprWithArgClause(OpenMPClauseKind Kind) {
          Arg[ScheduleKind] == OMPC_SCHEDULE_guided) &&
         Tok.is(tok::comma))
       DelimLoc = ConsumeAnyToken();
+  } else if (Kind == OMPC_dist_schedule) {
+    Arg.push_back(getOpenMPSimpleClauseType(
+        Kind, Tok.isAnnotation() ? "" : PP.getSpelling(Tok)));
+    KLoc.push_back(Tok.getLocation());
+    if (Tok.isNot(tok::r_paren) && Tok.isNot(tok::comma) &&
+        Tok.isNot(tok::annot_pragma_openmp_end))
+      ConsumeAnyToken();
+    if (Arg.back() == OMPC_DIST_SCHEDULE_static && Tok.is(tok::comma))
+      DelimLoc = ConsumeAnyToken();
+  } else if (Kind == OMPC_defaultmap) {
+    // Get a defaultmap modifier
+    Arg.push_back(getOpenMPSimpleClauseType(
+        Kind, Tok.isAnnotation() ? "" : PP.getSpelling(Tok)));
+    KLoc.push_back(Tok.getLocation());
+    if (Tok.isNot(tok::r_paren) && Tok.isNot(tok::comma) &&
+        Tok.isNot(tok::annot_pragma_openmp_end))
+      ConsumeAnyToken();
+    // Parse ':'
+    if (Tok.is(tok::colon))
+      ConsumeAnyToken();
+    else if (Arg.back() != OMPC_DEFAULTMAP_MODIFIER_unknown)
+      Diag(Tok, diag::warn_pragma_expected_colon) << "defaultmap modifier";
+    // Get a defaultmap kind
+    Arg.push_back(getOpenMPSimpleClauseType(
+        Kind, Tok.isAnnotation() ? "" : PP.getSpelling(Tok)));
+    KLoc.push_back(Tok.getLocation());
+    if (Tok.isNot(tok::r_paren) && Tok.isNot(tok::comma) &&
+        Tok.isNot(tok::annot_pragma_openmp_end))
+      ConsumeAnyToken();
   } else {
     assert(Kind == OMPC_if);
     KLoc.push_back(Tok.getLocation());
@@ -758,8 +1452,9 @@ OMPClause *Parser::ParseOpenMPSingleExprWithArgClause(OpenMPClauseKind Kind) {
     }
   }
 
-  bool NeedAnExpression =
-      (Kind == OMPC_schedule && DelimLoc.isValid()) || Kind == OMPC_if;
+  bool NeedAnExpression = (Kind == OMPC_schedule && DelimLoc.isValid()) ||
+                          (Kind == OMPC_dist_schedule && DelimLoc.isValid()) ||
+                          Kind == OMPC_if;
   if (NeedAnExpression) {
     SourceLocation ELoc = Tok.getLocation();
     ExprResult LHS(ParseCastExpression(false, false, NotTypeCast));
@@ -820,65 +1515,24 @@ static bool ParseReductionId(Parser &P, CXXScopeSpec &ReductionIdScopeSpec,
   }
   return P.ParseUnqualifiedId(ReductionIdScopeSpec, /*EnteringContext*/ false,
                               /*AllowDestructorName*/ false,
-                              /*AllowConstructorName*/ false, ParsedType(),
+                              /*AllowConstructorName*/ false, nullptr,
                               TemplateKWLoc, ReductionId);
 }
 
-/// \brief Parsing of OpenMP clause 'private', 'firstprivate', 'lastprivate',
-/// 'shared', 'copyin', 'copyprivate', 'flush' or 'reduction'.
-///
-///    private-clause:
-///       'private' '(' list ')'
-///    firstprivate-clause:
-///       'firstprivate' '(' list ')'
-///    lastprivate-clause:
-///       'lastprivate' '(' list ')'
-///    shared-clause:
-///       'shared' '(' list ')'
-///    linear-clause:
-///       'linear' '(' linear-list [ ':' linear-step ] ')'
-///    aligned-clause:
-///       'aligned' '(' list [ ':' alignment ] ')'
-///    reduction-clause:
-///       'reduction' '(' reduction-identifier ':' list ')'
-///    copyprivate-clause:
-///       'copyprivate' '(' list ')'
-///    flush-clause:
-///       'flush' '(' list ')'
-///    depend-clause:
-///       'depend' '(' in | out | inout : list | source ')'
-///    map-clause:
-///       'map' '(' [ [ always , ]
-///          to | from | tofrom | alloc | release | delete ':' ] list ')';
-///
-/// For 'linear' clause linear-list may have the following forms:
-///  list
-///  modifier(list)
-/// where modifier is 'val' (C) or 'ref', 'val' or 'uval'(C++).
-OMPClause *Parser::ParseOpenMPVarListClause(OpenMPDirectiveKind DKind,
-                                            OpenMPClauseKind Kind) {
-  SourceLocation Loc = Tok.getLocation();
-  SourceLocation LOpen = ConsumeToken();
-  SourceLocation ColonLoc = SourceLocation();
-  // Optional scope specifier and unqualified id for reduction identifier.
-  CXXScopeSpec ReductionIdScopeSpec;
-  UnqualifiedId ReductionId;
+/// Parses clauses with list.
+bool Parser::ParseOpenMPVarList(OpenMPDirectiveKind DKind,
+                                OpenMPClauseKind Kind,
+                                SmallVectorImpl<Expr *> &Vars,
+                                OpenMPVarListDataTy &Data) {
+  UnqualifiedId UnqualifiedReductionId;
   bool InvalidReductionId = false;
-  OpenMPDependClauseKind DepKind = OMPC_DEPEND_unknown;
-  // OpenMP 4.1 [2.15.3.7, linear Clause]
-  //  If no modifier is specified it is assumed to be val.
-  OpenMPLinearClauseKind LinearModifier = OMPC_LINEAR_val;
-  OpenMPMapClauseKind MapType = OMPC_MAP_unknown;
-  OpenMPMapClauseKind MapTypeModifier = OMPC_MAP_unknown;
   bool MapTypeModifierSpecified = false;
-  bool UnexpectedId = false;
-  SourceLocation DepLinMapLoc;
 
   // Parse '('.
   BalancedDelimiterTracker T(*this, tok::l_paren, tok::annot_pragma_openmp_end);
   if (T.expectAndConsume(diag::err_expected_lparen_after,
                          getOpenMPClauseName(Kind)))
-    return nullptr;
+    return true;
 
   bool NeedRParenForLinear = false;
   BalancedDelimiterTracker LinearT(*this, tok::l_paren,
@@ -886,46 +1540,46 @@ OMPClause *Parser::ParseOpenMPVarListClause(OpenMPDirectiveKind DKind,
   // Handle reduction-identifier for reduction clause.
   if (Kind == OMPC_reduction) {
     ColonProtectionRAIIObject ColonRAII(*this);
-    if (getLangOpts().CPlusPlus) {
-      ParseOptionalCXXScopeSpecifier(ReductionIdScopeSpec, ParsedType(), false);
-    }
-    InvalidReductionId =
-        ParseReductionId(*this, ReductionIdScopeSpec, ReductionId);
+    if (getLangOpts().CPlusPlus)
+      ParseOptionalCXXScopeSpecifier(Data.ReductionIdScopeSpec,
+                                     /*ObjectType=*/nullptr,
+                                     /*EnteringContext=*/false);
+    InvalidReductionId = ParseReductionId(*this, Data.ReductionIdScopeSpec,
+                                          UnqualifiedReductionId);
     if (InvalidReductionId) {
       SkipUntil(tok::colon, tok::r_paren, tok::annot_pragma_openmp_end,
                 StopBeforeMatch);
     }
-    if (Tok.is(tok::colon)) {
-      ColonLoc = ConsumeToken();
-    } else {
+    if (Tok.is(tok::colon))
+      Data.ColonLoc = ConsumeToken();
+    else
       Diag(Tok, diag::warn_pragma_expected_colon) << "reduction identifier";
-    }
+    if (!InvalidReductionId)
+      Data.ReductionId =
+          Actions.GetNameFromUnqualifiedId(UnqualifiedReductionId);
   } else if (Kind == OMPC_depend) {
   // Handle dependency type for depend clause.
     ColonProtectionRAIIObject ColonRAII(*this);
-    DepKind = static_cast<OpenMPDependClauseKind>(getOpenMPSimpleClauseType(
-        Kind, Tok.is(tok::identifier) ? PP.getSpelling(Tok) : ""));
-    DepLinMapLoc = Tok.getLocation();
+    Data.DepKind =
+        static_cast<OpenMPDependClauseKind>(getOpenMPSimpleClauseType(
+            Kind, Tok.is(tok::identifier) ? PP.getSpelling(Tok) : ""));
+    Data.DepLinMapLoc = Tok.getLocation();
 
-    if (DepKind == OMPC_DEPEND_unknown) {
+    if (Data.DepKind == OMPC_DEPEND_unknown) {
       SkipUntil(tok::colon, tok::r_paren, tok::annot_pragma_openmp_end,
                 StopBeforeMatch);
     } else {
       ConsumeToken();
       // Special processing for depend(source) clause.
-      if (DKind == OMPD_ordered && DepKind == OMPC_DEPEND_source) {
+      if (DKind == OMPD_ordered && Data.DepKind == OMPC_DEPEND_source) {
         // Parse ')'.
         T.consumeClose();
-        return Actions.ActOnOpenMPVarListClause(
-            Kind, llvm::None, /*TailExpr=*/nullptr, Loc, LOpen,
-            /*ColonLoc=*/SourceLocation(), Tok.getLocation(),
-            ReductionIdScopeSpec, DeclarationNameInfo(), DepKind,
-            LinearModifier, MapTypeModifier, MapType, DepLinMapLoc);
+        return false;
       }
     }
-    if (Tok.is(tok::colon)) {
-      ColonLoc = ConsumeToken();
-    } else {
+    if (Tok.is(tok::colon))
+      Data.ColonLoc = ConsumeToken();
+    else {
       Diag(Tok, DKind == OMPD_ordered ? diag::warn_pragma_expected_colon_r_paren
                                       : diag::warn_pragma_expected_colon)
           << "dependency type";
@@ -933,9 +1587,9 @@ OMPClause *Parser::ParseOpenMPVarListClause(OpenMPDirectiveKind DKind,
   } else if (Kind == OMPC_linear) {
     // Try to parse modifier if any.
     if (Tok.is(tok::identifier) && PP.LookAhead(0).is(tok::l_paren)) {
-      LinearModifier = static_cast<OpenMPLinearClauseKind>(
+      Data.LinKind = static_cast<OpenMPLinearClauseKind>(
           getOpenMPSimpleClauseType(Kind, PP.getSpelling(Tok)));
-      DepLinMapLoc = ConsumeToken();
+      Data.DepLinMapLoc = ConsumeToken();
       LinearT.consumeOpen();
       NeedRParenForLinear = true;
     }
@@ -943,71 +1597,78 @@ OMPClause *Parser::ParseOpenMPVarListClause(OpenMPDirectiveKind DKind,
     // Handle map type for map clause.
     ColonProtectionRAIIObject ColonRAII(*this);
 
-    // the first identifier may be a list item, a map-type or
-    //   a map-type-modifier
-    MapType = static_cast<OpenMPMapClauseKind>(getOpenMPSimpleClauseType(
-        Kind, Tok.is(tok::identifier) ? PP.getSpelling(Tok) : ""));
-    DepLinMapLoc = Tok.getLocation();
+    /// The map clause modifier token can be either a identifier or the C++
+    /// delete keyword.
+    auto &&IsMapClauseModifierToken = [](const Token &Tok) -> bool {
+      return Tok.isOneOf(tok::identifier, tok::kw_delete);
+    };
+
+    // The first identifier may be a list item, a map-type or a
+    // map-type-modifier. The map modifier can also be delete which has the same
+    // spelling of the C++ delete keyword.
+    Data.MapType =
+        IsMapClauseModifierToken(Tok)
+            ? static_cast<OpenMPMapClauseKind>(
+                  getOpenMPSimpleClauseType(Kind, PP.getSpelling(Tok)))
+            : OMPC_MAP_unknown;
+    Data.DepLinMapLoc = Tok.getLocation();
     bool ColonExpected = false;
 
-    if (Tok.is(tok::identifier)) {
+    if (IsMapClauseModifierToken(Tok)) {
       if (PP.LookAhead(0).is(tok::colon)) {
-        MapType = static_cast<OpenMPMapClauseKind>(getOpenMPSimpleClauseType(
-            Kind, Tok.is(tok::identifier) ? PP.getSpelling(Tok) : ""));
-        if (MapType == OMPC_MAP_unknown) {
+        if (Data.MapType == OMPC_MAP_unknown)
           Diag(Tok, diag::err_omp_unknown_map_type);
-        } else if (MapType == OMPC_MAP_always) {
+        else if (Data.MapType == OMPC_MAP_always)
           Diag(Tok, diag::err_omp_map_type_missing);
-        }
         ConsumeToken();
       } else if (PP.LookAhead(0).is(tok::comma)) {
-        if (PP.LookAhead(1).is(tok::identifier) &&
+        if (IsMapClauseModifierToken(PP.LookAhead(1)) &&
             PP.LookAhead(2).is(tok::colon)) {
-          MapTypeModifier =
-              static_cast<OpenMPMapClauseKind>(getOpenMPSimpleClauseType(
-                   Kind, Tok.is(tok::identifier) ? PP.getSpelling(Tok) : ""));
-          if (MapTypeModifier != OMPC_MAP_always) {
+          Data.MapTypeModifier = Data.MapType;
+          if (Data.MapTypeModifier != OMPC_MAP_always) {
             Diag(Tok, diag::err_omp_unknown_map_type_modifier);
-            MapTypeModifier = OMPC_MAP_unknown;
-          } else {
+            Data.MapTypeModifier = OMPC_MAP_unknown;
+          } else
             MapTypeModifierSpecified = true;
-          }
 
           ConsumeToken();
           ConsumeToken();
 
-          MapType = static_cast<OpenMPMapClauseKind>(getOpenMPSimpleClauseType(
-              Kind, Tok.is(tok::identifier) ? PP.getSpelling(Tok) : ""));
-          if (MapType == OMPC_MAP_unknown || MapType == OMPC_MAP_always) {
+          Data.MapType =
+              IsMapClauseModifierToken(Tok)
+                  ? static_cast<OpenMPMapClauseKind>(
+                        getOpenMPSimpleClauseType(Kind, PP.getSpelling(Tok)))
+                  : OMPC_MAP_unknown;
+          if (Data.MapType == OMPC_MAP_unknown ||
+              Data.MapType == OMPC_MAP_always)
             Diag(Tok, diag::err_omp_unknown_map_type);
-          }
           ConsumeToken();
         } else {
-          MapType = OMPC_MAP_tofrom;
+          Data.MapType = OMPC_MAP_tofrom;
+          Data.IsMapTypeImplicit = true;
         }
       } else {
-        MapType = OMPC_MAP_tofrom;
+        Data.MapType = OMPC_MAP_tofrom;
+        Data.IsMapTypeImplicit = true;
       }
     } else {
-      UnexpectedId = true;
+      Data.MapType = OMPC_MAP_tofrom;
+      Data.IsMapTypeImplicit = true;
     }
 
-    if (Tok.is(tok::colon)) {
-      ColonLoc = ConsumeToken();
-    } else if (ColonExpected) {
+    if (Tok.is(tok::colon))
+      Data.ColonLoc = ConsumeToken();
+    else if (ColonExpected)
       Diag(Tok, diag::warn_pragma_expected_colon) << "map type";
-    }
   }
 
-  SmallVector<Expr *, 5> Vars;
   bool IsComma =
-      ((Kind != OMPC_reduction) && (Kind != OMPC_depend) &&
-       (Kind != OMPC_map)) ||
-      ((Kind == OMPC_reduction) && !InvalidReductionId) ||
-      ((Kind == OMPC_map) && (UnexpectedId || MapType != OMPC_MAP_unknown) &&
+      (Kind != OMPC_reduction && Kind != OMPC_depend && Kind != OMPC_map) ||
+      (Kind == OMPC_reduction && !InvalidReductionId) ||
+      (Kind == OMPC_map && Data.MapType != OMPC_MAP_unknown &&
        (!MapTypeModifierSpecified ||
-        (MapTypeModifierSpecified && MapTypeModifier == OMPC_MAP_always))) ||
-      ((Kind == OMPC_depend) && DepKind != OMPC_DEPEND_unknown);
+        Data.MapTypeModifier == OMPC_MAP_always)) ||
+      (Kind == OMPC_depend && Data.DepKind != OMPC_DEPEND_unknown);
   const bool MayHaveTail = (Kind == OMPC_linear || Kind == OMPC_aligned);
   while (IsComma || (Tok.isNot(tok::r_paren) && Tok.isNot(tok::colon) &&
                      Tok.isNot(tok::annot_pragma_openmp_end))) {
@@ -1015,9 +1676,9 @@ OMPClause *Parser::ParseOpenMPVarListClause(OpenMPDirectiveKind DKind,
     // Parse variable
     ExprResult VarExpr =
         Actions.CorrectDelayedTyposInExpr(ParseAssignmentExpression());
-    if (VarExpr.isUsable()) {
+    if (VarExpr.isUsable())
       Vars.push_back(VarExpr.get());
-    } else {
+    else {
       SkipUntil(tok::comma, tok::r_paren, tok::annot_pragma_openmp_end,
                 StopBeforeMatch);
     }
@@ -1039,15 +1700,14 @@ OMPClause *Parser::ParseOpenMPVarListClause(OpenMPDirectiveKind DKind,
     LinearT.consumeClose();
 
   // Parse ':' linear-step (or ':' alignment).
-  Expr *TailExpr = nullptr;
   const bool MustHaveTail = MayHaveTail && Tok.is(tok::colon);
   if (MustHaveTail) {
-    ColonLoc = Tok.getLocation();
+    Data.ColonLoc = Tok.getLocation();
     SourceLocation ELoc = ConsumeToken();
     ExprResult Tail = ParseAssignmentExpression();
     Tail = Actions.ActOnFinishFullExpr(Tail.get(), ELoc);
     if (Tail.isUsable())
-      TailExpr = Tail.get();
+      Data.TailExpr = Tail.get();
     else
       SkipUntil(tok::comma, tok::r_paren, tok::annot_pragma_openmp_end,
                 StopBeforeMatch);
@@ -1055,18 +1715,67 @@ OMPClause *Parser::ParseOpenMPVarListClause(OpenMPDirectiveKind DKind,
 
   // Parse ')'.
   T.consumeClose();
-  if ((Kind == OMPC_depend && DepKind != OMPC_DEPEND_unknown && Vars.empty()) ||
-      (Kind != OMPC_depend && Vars.empty()) || (MustHaveTail && !TailExpr) ||
-      (Kind == OMPC_map && MapType == OMPC_MAP_unknown) ||
-      InvalidReductionId) {
+  if ((Kind == OMPC_depend && Data.DepKind != OMPC_DEPEND_unknown &&
+       Vars.empty()) ||
+      (Kind != OMPC_depend && Kind != OMPC_map && Vars.empty()) ||
+      (MustHaveTail && !Data.TailExpr) || InvalidReductionId)
+    return true;
+  return false;
+}
+
+/// \brief Parsing of OpenMP clause 'private', 'firstprivate', 'lastprivate',
+/// 'shared', 'copyin', 'copyprivate', 'flush' or 'reduction'.
+///
+///    private-clause:
+///       'private' '(' list ')'
+///    firstprivate-clause:
+///       'firstprivate' '(' list ')'
+///    lastprivate-clause:
+///       'lastprivate' '(' list ')'
+///    shared-clause:
+///       'shared' '(' list ')'
+///    linear-clause:
+///       'linear' '(' linear-list [ ':' linear-step ] ')'
+///    aligned-clause:
+///       'aligned' '(' list [ ':' alignment ] ')'
+///    reduction-clause:
+///       'reduction' '(' reduction-identifier ':' list ')'
+///    copyprivate-clause:
+///       'copyprivate' '(' list ')'
+///    flush-clause:
+///       'flush' '(' list ')'
+///    depend-clause:
+///       'depend' '(' in | out | inout : list | source ')'
+///    map-clause:
+///       'map' '(' [ [ always , ]
+///          to | from | tofrom | alloc | release | delete ':' ] list ')';
+///    to-clause:
+///       'to' '(' list ')'
+///    from-clause:
+///       'from' '(' list ')'
+///    use_device_ptr-clause:
+///       'use_device_ptr' '(' list ')'
+///    is_device_ptr-clause:
+///       'is_device_ptr' '(' list ')'
+///
+/// For 'linear' clause linear-list may have the following forms:
+///  list
+///  modifier(list)
+/// where modifier is 'val' (C) or 'ref', 'val' or 'uval'(C++).
+OMPClause *Parser::ParseOpenMPVarListClause(OpenMPDirectiveKind DKind,
+                                            OpenMPClauseKind Kind) {
+  SourceLocation Loc = Tok.getLocation();
+  SourceLocation LOpen = ConsumeToken();
+  SmallVector<Expr *, 4> Vars;
+  OpenMPVarListDataTy Data;
+
+  if (ParseOpenMPVarList(DKind, Kind, Vars, Data))
     return nullptr;
-  }
 
   return Actions.ActOnOpenMPVarListClause(
-      Kind, Vars, TailExpr, Loc, LOpen, ColonLoc, Tok.getLocation(),
-      ReductionIdScopeSpec,
-      ReductionId.isValid() ? Actions.GetNameFromUnqualifiedId(ReductionId)
-                            : DeclarationNameInfo(),
-      DepKind, LinearModifier, MapTypeModifier, MapType, DepLinMapLoc);
+      Kind, Vars, Data.TailExpr, Loc, LOpen, Data.ColonLoc, Tok.getLocation(),
+      Data.ReductionIdScopeSpec, Data.ReductionId, Data.DepKind, Data.LinKind,
+      Data.MapTypeModifier, Data.MapType, Data.IsMapTypeImplicit,
+      Data.DepLinMapLoc);
 }
 
diff --git a/contrib/llvm/tools/clang/lib/Parse/ParsePragma.cpp b/contrib/llvm/tools/clang/lib/Parse/ParsePragma.cpp
index bc70942851e2..bff5d1170fe0 100644
--- a/contrib/llvm/tools/clang/lib/Parse/ParsePragma.cpp
+++ b/contrib/llvm/tools/clang/lib/Parse/ParsePragma.cpp
@@ -13,6 +13,7 @@
 
 #include "RAIIObjectsForParser.h"
 #include "clang/AST/ASTContext.h"
+#include "clang/Basic/PragmaKinds.h"
 #include "clang/Basic/TargetInfo.h"
 #include "clang/Lex/Preprocessor.h"
 #include "clang/Parse/ParseDiagnostic.h"
@@ -336,11 +337,9 @@ void Parser::HandlePragmaVisibility() {
 
 namespace {
 struct PragmaPackInfo {
-  Sema::PragmaPackKind Kind;
-  IdentifierInfo *Name;
+  Sema::PragmaMsStackAction Action;
+  StringRef SlotLabel;
   Token Alignment;
-  SourceLocation LParenLoc;
-  SourceLocation RParenLoc;
 };
 } // end anonymous namespace
 
@@ -355,15 +354,14 @@ void Parser::HandlePragmaPack() {
     if (Alignment.isInvalid())
       return;
   }
-  Actions.ActOnPragmaPack(Info->Kind, Info->Name, Alignment.get(), PragmaLoc,
-                          Info->LParenLoc, Info->RParenLoc);
+  Actions.ActOnPragmaPack(PragmaLoc, Info->Action, Info->SlotLabel,
+                          Alignment.get());
 }
 
 void Parser::HandlePragmaMSStruct() {
   assert(Tok.is(tok::annot_pragma_msstruct));
-  Sema::PragmaMSStructKind Kind =
-    static_cast<Sema::PragmaMSStructKind>(
-    reinterpret_cast<uintptr_t>(Tok.getAnnotationValue()));
+  PragmaMSStructKind Kind = static_cast<PragmaMSStructKind>(
+      reinterpret_cast<uintptr_t>(Tok.getAnnotationValue()));
   Actions.ActOnPragmaMSStruct(Kind);
   ConsumeToken(); // The annotation token.
 }
@@ -470,14 +468,24 @@ void Parser::HandlePragmaOpenCLExtension() {
   ConsumeToken(); // The annotation token.
 
   OpenCLOptions &f = Actions.getOpenCLOptions();
+  auto CLVer = getLangOpts().OpenCLVersion;
+  auto &Supp = getTargetInfo().getSupportedOpenCLOpts();
   // OpenCL 1.1 9.1: "The all variant sets the behavior for all extensions,
   // overriding all previously issued extension directives, but only if the
   // behavior is set to disable."
   if (state == 0 && ename->isStr("all")) {
-#define OPENCLEXT(nm)   f.nm = 0;
+#define OPENCLEXT(nm) \
+    if (Supp.is_##nm##_supported_extension(CLVer)) \
+      f.nm = 0;
 #include "clang/Basic/OpenCLExtensions.def"
   }
-#define OPENCLEXT(nm) else if (ename->isStr(#nm)) { f.nm = state; }
+#define OPENCLEXT(nm) else if (ename->isStr(#nm)) \
+   if (Supp.is_##nm##_supported_extension(CLVer)) \
+     f.nm = state; \
+   else if (Supp.is_##nm##_supported_core(CLVer)) \
+     PP.Diag(NameLoc, diag::warn_pragma_extension_is_core) << ename; \
+   else \
+     PP.Diag(NameLoc, diag::warn_pragma_unsupported_extension) << ename;
 #include "clang/Basic/OpenCLExtensions.def"
   else {
     PP.Diag(NameLoc, diag::warn_pragma_unknown_extension) << ename;
@@ -497,18 +505,19 @@ void Parser::HandlePragmaMSPointersToMembers() {
 void Parser::HandlePragmaMSVtorDisp() {
   assert(Tok.is(tok::annot_pragma_ms_vtordisp));
   uintptr_t Value = reinterpret_cast<uintptr_t>(Tok.getAnnotationValue());
-  Sema::PragmaVtorDispKind Kind =
-      static_cast<Sema::PragmaVtorDispKind>((Value >> 16) & 0xFFFF);
+  Sema::PragmaMsStackAction Action =
+      static_cast<Sema::PragmaMsStackAction>((Value >> 16) & 0xFFFF);
   MSVtorDispAttr::Mode Mode = MSVtorDispAttr::Mode(Value & 0xFFFF);
   SourceLocation PragmaLoc = ConsumeToken(); // The annotation token.
-  Actions.ActOnPragmaMSVtorDisp(Kind, PragmaLoc, Mode);
+  Actions.ActOnPragmaMSVtorDisp(Action, PragmaLoc, Mode);
 }
 
 void Parser::HandlePragmaMSPragma() {
   assert(Tok.is(tok::annot_pragma_ms_pragma));
   // Grab the tokens out of the annotation and enter them into the stream.
-  auto TheTokens = (std::pair<Token*, size_t> *)Tok.getAnnotationValue();
-  PP.EnterTokenStream(TheTokens->first, TheTokens->second, true, true);
+  auto TheTokens =
+      (std::pair<std::unique_ptr<Token[]>, size_t> *)Tok.getAnnotationValue();
+  PP.EnterTokenStream(std::move(TheTokens->first), TheTokens->second, true);
   SourceLocation PragmaLocation = ConsumeToken(); // The annotation token.
   assert(Tok.isAnyIdentifier());
   StringRef PragmaName = Tok.getIdentifierInfo()->getName();
@@ -798,14 +807,13 @@ bool Parser::HandlePragmaLoopHint(LoopHint &Hint) {
   Hint.OptionLoc = IdentifierLoc::create(
       Actions.Context, Info->Option.getLocation(), OptionInfo);
 
-  const Token *Toks = Info->Toks.data();
-  size_t TokSize = Info->Toks.size();
+  llvm::ArrayRef<Token> Toks = Info->Toks;
 
   // Return a valid hint if pragma unroll or nounroll were specified
   // without an argument.
   bool PragmaUnroll = PragmaNameInfo->getName() == "unroll";
   bool PragmaNoUnroll = PragmaNameInfo->getName() == "nounroll";
-  if (TokSize == 0 && (PragmaUnroll || PragmaNoUnroll)) {
+  if (Toks.empty() && (PragmaUnroll || PragmaNoUnroll)) {
     ConsumeToken(); // The annotation token.
     Hint.Range = Info->PragmaName.getLocation();
     return true;
@@ -813,26 +821,30 @@ bool Parser::HandlePragmaLoopHint(LoopHint &Hint) {
 
   // The constant expression is always followed by an eof token, which increases
   // the TokSize by 1.
-  assert(TokSize > 0 &&
+  assert(!Toks.empty() &&
          "PragmaLoopHintInfo::Toks must contain at least one token.");
 
   // If no option is specified the argument is assumed to be a constant expr.
   bool OptionUnroll = false;
+  bool OptionDistribute = false;
   bool StateOption = false;
   if (OptionInfo) { // Pragma Unroll does not specify an option.
     OptionUnroll = OptionInfo->isStr("unroll");
+    OptionDistribute = OptionInfo->isStr("distribute");
     StateOption = llvm::StringSwitch<bool>(OptionInfo->getName())
                       .Case("vectorize", true)
                       .Case("interleave", true)
-                      .Case("unroll", true)
-                      .Default(false);
+                      .Default(false) ||
+                  OptionUnroll || OptionDistribute;
   }
 
+  bool AssumeSafetyArg = !OptionUnroll && !OptionDistribute;
   // Verify loop hint has an argument.
   if (Toks[0].is(tok::eof)) {
     ConsumeToken(); // The annotation token.
     Diag(Toks[0].getLocation(), diag::err_pragma_loop_missing_argument)
-        << /*StateArgument=*/StateOption << /*FullKeyword=*/OptionUnroll;
+        << /*StateArgument=*/StateOption << /*FullKeyword=*/OptionUnroll
+        << /*AssumeSafetyKeyword=*/AssumeSafetyArg;
     return false;
   }
 
@@ -841,22 +853,26 @@ bool Parser::HandlePragmaLoopHint(LoopHint &Hint) {
     ConsumeToken(); // The annotation token.
     SourceLocation StateLoc = Toks[0].getLocation();
     IdentifierInfo *StateInfo = Toks[0].getIdentifierInfo();
-    if (!StateInfo ||
-        (!StateInfo->isStr("enable") && !StateInfo->isStr("disable") &&
-         ((OptionUnroll && !StateInfo->isStr("full")) ||
-          (!OptionUnroll && !StateInfo->isStr("assume_safety"))))) {
+
+    bool Valid = StateInfo &&
+                 llvm::StringSwitch<bool>(StateInfo->getName())
+                     .Cases("enable", "disable", true)
+                     .Case("full", OptionUnroll)
+                     .Case("assume_safety", AssumeSafetyArg)
+                     .Default(false);
+    if (!Valid) {
       Diag(Toks[0].getLocation(), diag::err_pragma_invalid_keyword)
-          << /*FullKeyword=*/OptionUnroll;
+          << /*FullKeyword=*/OptionUnroll
+          << /*AssumeSafetyKeyword=*/AssumeSafetyArg;
       return false;
     }
-    if (TokSize > 2)
+    if (Toks.size() > 2)
       Diag(Tok.getLocation(), diag::warn_pragma_extra_tokens_at_eol)
           << PragmaLoopHintString(Info->PragmaName, Info->Option);
     Hint.StateLoc = IdentifierLoc::create(Actions.Context, StateLoc, StateInfo);
   } else {
     // Enter constant expression including eof terminator into token stream.
-    PP.EnterTokenStream(Toks, TokSize, /*DisableMacroExpansion=*/false,
-                        /*OwnsTokens=*/false);
+    PP.EnterTokenStream(Toks, /*DisableMacroExpansion=*/false);
     ConsumeToken(); // The annotation token.
 
     ExprResult R = ParseConstantExpression();
@@ -881,7 +897,7 @@ bool Parser::HandlePragmaLoopHint(LoopHint &Hint) {
   }
 
   Hint.Range = SourceRange(Info->PragmaName.getLocation(),
-                           Info->Toks[TokSize - 1].getLocation());
+                           Info->Toks.back().getLocation());
   return true;
 }
 
@@ -934,15 +950,14 @@ void PragmaGCCVisibilityHandler::HandlePragma(Preprocessor &PP,
     return;
   }
 
-  Token *Toks = new Token[1];
+  auto Toks = llvm::make_unique<Token[]>(1);
   Toks[0].startToken();
   Toks[0].setKind(tok::annot_pragma_vis);
   Toks[0].setLocation(VisLoc);
   Toks[0].setAnnotationEndLoc(EndLoc);
   Toks[0].setAnnotationValue(
                           const_cast<void*>(static_cast<const void*>(VisType)));
-  PP.EnterTokenStream(Toks, 1, /*DisableMacroExpansion=*/true,
-                      /*OwnsTokens=*/true);
+  PP.EnterTokenStream(std::move(Toks), 1, /*DisableMacroExpansion=*/true);
 }
 
 // #pragma pack(...) comes in the following delicious flavors:
@@ -961,11 +976,10 @@ void PragmaPackHandler::HandlePragma(Preprocessor &PP,
     return;
   }
 
-  Sema::PragmaPackKind Kind = Sema::PPK_Default;
-  IdentifierInfo *Name = nullptr;
+  Sema::PragmaMsStackAction Action = Sema::PSK_Reset;
+  StringRef SlotLabel;
   Token Alignment;
   Alignment.startToken();
-  SourceLocation LParenLoc = Tok.getLocation();
   PP.Lex(Tok);
   if (Tok.is(tok::numeric_constant)) {
     Alignment = Tok;
@@ -975,18 +989,18 @@ void PragmaPackHandler::HandlePragma(Preprocessor &PP,
     // In MSVC/gcc, #pragma pack(4) sets the alignment without affecting
     // the push/pop stack.
     // In Apple gcc, #pragma pack(4) is equivalent to #pragma pack(push, 4)
-    if (PP.getLangOpts().ApplePragmaPack)
-      Kind = Sema::PPK_Push;
+    Action =
+        PP.getLangOpts().ApplePragmaPack ? Sema::PSK_Push_Set : Sema::PSK_Set;
   } else if (Tok.is(tok::identifier)) {
     const IdentifierInfo *II = Tok.getIdentifierInfo();
     if (II->isStr("show")) {
-      Kind = Sema::PPK_Show;
+      Action = Sema::PSK_Show;
       PP.Lex(Tok);
     } else {
       if (II->isStr("push")) {
-        Kind = Sema::PPK_Push;
+        Action = Sema::PSK_Push;
       } else if (II->isStr("pop")) {
-        Kind = Sema::PPK_Pop;
+        Action = Sema::PSK_Pop;
       } else {
         PP.Diag(Tok.getLocation(), diag::warn_pragma_invalid_action) << "pack";
         return;
@@ -997,11 +1011,12 @@ void PragmaPackHandler::HandlePragma(Preprocessor &PP,
         PP.Lex(Tok);
 
         if (Tok.is(tok::numeric_constant)) {
+          Action = (Sema::PragmaMsStackAction)(Action | Sema::PSK_Set);
           Alignment = Tok;
 
           PP.Lex(Tok);
         } else if (Tok.is(tok::identifier)) {
-          Name = Tok.getIdentifierInfo();
+          SlotLabel = Tok.getIdentifierInfo()->getName();
           PP.Lex(Tok);
 
           if (Tok.is(tok::comma)) {
@@ -1012,6 +1027,7 @@ void PragmaPackHandler::HandlePragma(Preprocessor &PP,
               return;
             }
 
+            Action = (Sema::PragmaMsStackAction)(Action | Sema::PSK_Set);
             Alignment = Tok;
 
             PP.Lex(Tok);
@@ -1026,7 +1042,7 @@ void PragmaPackHandler::HandlePragma(Preprocessor &PP,
     // In MSVC/gcc, #pragma pack() resets the alignment without affecting
     // the push/pop stack.
     // In Apple gcc #pragma pack() is equivalent to #pragma pack(pop).
-    Kind = Sema::PPK_Pop;
+    Action = Sema::PSK_Pop;
   }
 
   if (Tok.isNot(tok::r_paren)) {
@@ -1041,27 +1057,20 @@ void PragmaPackHandler::HandlePragma(Preprocessor &PP,
     return;
   }
 
-  PragmaPackInfo *Info = 
-    (PragmaPackInfo*) PP.getPreprocessorAllocator().Allocate(
-      sizeof(PragmaPackInfo), llvm::alignOf<PragmaPackInfo>());
-  new (Info) PragmaPackInfo();
-  Info->Kind = Kind;
-  Info->Name = Name;
+  PragmaPackInfo *Info =
+      PP.getPreprocessorAllocator().Allocate<PragmaPackInfo>(1);
+  Info->Action = Action;
+  Info->SlotLabel = SlotLabel;
   Info->Alignment = Alignment;
-  Info->LParenLoc = LParenLoc;
-  Info->RParenLoc = RParenLoc;
 
-  Token *Toks = 
-    (Token*) PP.getPreprocessorAllocator().Allocate(
-      sizeof(Token) * 1, llvm::alignOf<Token>());
-  new (Toks) Token();
+  MutableArrayRef<Token> Toks(PP.getPreprocessorAllocator().Allocate<Token>(1),
+                              1);
   Toks[0].startToken();
   Toks[0].setKind(tok::annot_pragma_pack);
   Toks[0].setLocation(PackLoc);
   Toks[0].setAnnotationEndLoc(RParenLoc);
   Toks[0].setAnnotationValue(static_cast<void*>(Info));
-  PP.EnterTokenStream(Toks, 1, /*DisableMacroExpansion=*/true,
-                      /*OwnsTokens=*/false);
+  PP.EnterTokenStream(Toks, /*DisableMacroExpansion=*/true);
 }
 
 // #pragma ms_struct on
@@ -1069,8 +1078,8 @@ void PragmaPackHandler::HandlePragma(Preprocessor &PP,
 void PragmaMSStructHandler::HandlePragma(Preprocessor &PP, 
                                          PragmaIntroducerKind Introducer,
                                          Token &MSStructTok) {
-  Sema::PragmaMSStructKind Kind = Sema::PMSST_OFF;
-  
+  PragmaMSStructKind Kind = PMSST_OFF;
+
   Token Tok;
   PP.Lex(Tok);
   if (Tok.isNot(tok::identifier)) {
@@ -1080,7 +1089,7 @@ void PragmaMSStructHandler::HandlePragma(Preprocessor &PP,
   SourceLocation EndLoc = Tok.getLocation();
   const IdentifierInfo *II = Tok.getIdentifierInfo();
   if (II->isStr("on")) {
-    Kind = Sema::PMSST_ON;
+    Kind = PMSST_ON;
     PP.Lex(Tok);
   }
   else if (II->isStr("off") || II->isStr("reset"))
@@ -1096,18 +1105,15 @@ void PragmaMSStructHandler::HandlePragma(Preprocessor &PP,
     return;
   }
 
-  Token *Toks =
-    (Token*) PP.getPreprocessorAllocator().Allocate(
-      sizeof(Token) * 1, llvm::alignOf<Token>());
-  new (Toks) Token();
+  MutableArrayRef<Token> Toks(PP.getPreprocessorAllocator().Allocate<Token>(1),
+                              1);
   Toks[0].startToken();
   Toks[0].setKind(tok::annot_pragma_msstruct);
   Toks[0].setLocation(MSStructTok.getLocation());
   Toks[0].setAnnotationEndLoc(EndLoc);
   Toks[0].setAnnotationValue(reinterpret_cast<void*>(
                              static_cast<uintptr_t>(Kind)));
-  PP.EnterTokenStream(Toks, 1, /*DisableMacroExpansion=*/true,
-                      /*OwnsTokens=*/false);
+  PP.EnterTokenStream(Toks, /*DisableMacroExpansion=*/true);
 }
 
 // #pragma 'align' '=' {'native','natural','mac68k','power','reset'}
@@ -1167,18 +1173,15 @@ static void ParseAlignPragma(Preprocessor &PP, Token &FirstTok,
     return;
   }
 
-  Token *Toks =
-    (Token*) PP.getPreprocessorAllocator().Allocate(
-      sizeof(Token) * 1, llvm::alignOf<Token>());
-  new (Toks) Token();
+  MutableArrayRef<Token> Toks(PP.getPreprocessorAllocator().Allocate<Token>(1),
+                              1);
   Toks[0].startToken();
   Toks[0].setKind(tok::annot_pragma_align);
   Toks[0].setLocation(FirstTok.getLocation());
   Toks[0].setAnnotationEndLoc(EndLoc);
   Toks[0].setAnnotationValue(reinterpret_cast<void*>(
                              static_cast<uintptr_t>(Kind)));
-  PP.EnterTokenStream(Toks, 1, /*DisableMacroExpansion=*/true,
-                      /*OwnsTokens=*/false);
+  PP.EnterTokenStream(Toks, /*DisableMacroExpansion=*/true);
 }
 
 void PragmaAlignHandler::HandlePragma(Preprocessor &PP, 
@@ -1260,9 +1263,9 @@ void PragmaUnusedHandler::HandlePragma(Preprocessor &PP,
   // This allows us to cache a "#pragma unused" that occurs inside an inline
   // C++ member function.
 
-  Token *Toks = 
-    (Token*) PP.getPreprocessorAllocator().Allocate(
-      sizeof(Token) * 2 * Identifiers.size(), llvm::alignOf<Token>());
+  MutableArrayRef<Token> Toks(
+      PP.getPreprocessorAllocator().Allocate<Token>(2 * Identifiers.size()),
+      2 * Identifiers.size());
   for (unsigned i=0; i != Identifiers.size(); i++) {
     Token &pragmaUnusedTok = Toks[2*i], &idTok = Toks[2*i+1];
     pragmaUnusedTok.startToken();
@@ -1270,8 +1273,7 @@ void PragmaUnusedHandler::HandlePragma(Preprocessor &PP,
     pragmaUnusedTok.setLocation(UnusedLoc);
     idTok = Identifiers[i];
   }
-  PP.EnterTokenStream(Toks, 2*Identifiers.size(),
-                      /*DisableMacroExpansion=*/true, /*OwnsTokens=*/false);
+  PP.EnterTokenStream(Toks, /*DisableMacroExpansion=*/true);
 }
 
 // #pragma weak identifier
@@ -1311,9 +1313,8 @@ void PragmaWeakHandler::HandlePragma(Preprocessor &PP,
   }
 
   if (HasAlias) {
-    Token *Toks = 
-      (Token*) PP.getPreprocessorAllocator().Allocate(
-        sizeof(Token) * 3, llvm::alignOf<Token>());
+    MutableArrayRef<Token> Toks(
+        PP.getPreprocessorAllocator().Allocate<Token>(3), 3);
     Token &pragmaUnusedTok = Toks[0];
     pragmaUnusedTok.startToken();
     pragmaUnusedTok.setKind(tok::annot_pragma_weakalias);
@@ -1321,20 +1322,17 @@ void PragmaWeakHandler::HandlePragma(Preprocessor &PP,
     pragmaUnusedTok.setAnnotationEndLoc(AliasName.getLocation());
     Toks[1] = WeakName;
     Toks[2] = AliasName;
-    PP.EnterTokenStream(Toks, 3,
-                        /*DisableMacroExpansion=*/true, /*OwnsTokens=*/false);
+    PP.EnterTokenStream(Toks, /*DisableMacroExpansion=*/true);
   } else {
-    Token *Toks = 
-      (Token*) PP.getPreprocessorAllocator().Allocate(
-        sizeof(Token) * 2, llvm::alignOf<Token>());
+    MutableArrayRef<Token> Toks(
+        PP.getPreprocessorAllocator().Allocate<Token>(2), 2);
     Token &pragmaUnusedTok = Toks[0];
     pragmaUnusedTok.startToken();
     pragmaUnusedTok.setKind(tok::annot_pragma_weak);
     pragmaUnusedTok.setLocation(WeakLoc);
     pragmaUnusedTok.setAnnotationEndLoc(WeakLoc);
     Toks[1] = WeakName;
-    PP.EnterTokenStream(Toks, 2,
-                        /*DisableMacroExpansion=*/true, /*OwnsTokens=*/false);
+    PP.EnterTokenStream(Toks, /*DisableMacroExpansion=*/true);
   }
 }
 
@@ -1370,9 +1368,8 @@ void PragmaRedefineExtnameHandler::HandlePragma(Preprocessor &PP,
     return;
   }
 
-  Token *Toks = 
-    (Token*) PP.getPreprocessorAllocator().Allocate(
-      sizeof(Token) * 3, llvm::alignOf<Token>());
+  MutableArrayRef<Token> Toks(PP.getPreprocessorAllocator().Allocate<Token>(3),
+                              3);
   Token &pragmaRedefTok = Toks[0];
   pragmaRedefTok.startToken();
   pragmaRedefTok.setKind(tok::annot_pragma_redefine_extname);
@@ -1380,8 +1377,7 @@ void PragmaRedefineExtnameHandler::HandlePragma(Preprocessor &PP,
   pragmaRedefTok.setAnnotationEndLoc(AliasName.getLocation());
   Toks[1] = RedefName;
   Toks[2] = AliasName;
-  PP.EnterTokenStream(Toks, 3,
-                      /*DisableMacroExpansion=*/true, /*OwnsTokens=*/false);
+  PP.EnterTokenStream(Toks, /*DisableMacroExpansion=*/true);
 }
 
 
@@ -1393,18 +1389,15 @@ PragmaFPContractHandler::HandlePragma(Preprocessor &PP,
   if (PP.LexOnOffSwitch(OOS))
     return;
 
-  Token *Toks =
-    (Token*) PP.getPreprocessorAllocator().Allocate(
-      sizeof(Token) * 1, llvm::alignOf<Token>());
-  new (Toks) Token();
+  MutableArrayRef<Token> Toks(PP.getPreprocessorAllocator().Allocate<Token>(1),
+                              1);
   Toks[0].startToken();
   Toks[0].setKind(tok::annot_pragma_fp_contract);
   Toks[0].setLocation(Tok.getLocation());
   Toks[0].setAnnotationEndLoc(Tok.getLocation());
   Toks[0].setAnnotationValue(reinterpret_cast<void*>(
                              static_cast<uintptr_t>(OOS)));
-  PP.EnterTokenStream(Toks, 1, /*DisableMacroExpansion=*/true,
-                      /*OwnsTokens=*/false);
+  PP.EnterTokenStream(Toks, /*DisableMacroExpansion=*/true);
 }
 
 void 
@@ -1452,17 +1445,14 @@ PragmaOpenCLExtensionHandler::HandlePragma(Preprocessor &PP,
   }
 
   OpenCLExtData data(ename, state);
-  Token *Toks =
-    (Token*) PP.getPreprocessorAllocator().Allocate(
-      sizeof(Token) * 1, llvm::alignOf<Token>());
-  new (Toks) Token();
+  MutableArrayRef<Token> Toks(PP.getPreprocessorAllocator().Allocate<Token>(1),
+                              1);
   Toks[0].startToken();
   Toks[0].setKind(tok::annot_pragma_opencl_extension);
   Toks[0].setLocation(NameLoc);
   Toks[0].setAnnotationValue(data.getOpaqueValue());
   Toks[0].setAnnotationEndLoc(StateLoc);
-  PP.EnterTokenStream(Toks, 1, /*DisableMacroExpansion=*/true,
-                      /*OwnsTokens=*/false);
+  PP.EnterTokenStream(Toks, /*DisableMacroExpansion=*/true);
 
   if (PP.getPPCallbacks())
     PP.getPPCallbacks()->PragmaOpenCLExtension(NameLoc, ename, 
@@ -1506,10 +1496,10 @@ PragmaOpenMPHandler::HandlePragma(Preprocessor &PP,
   Tok.setLocation(EodLoc);
   Pragma.push_back(Tok);
 
-  Token *Toks = new Token[Pragma.size()];
-  std::copy(Pragma.begin(), Pragma.end(), Toks);
-  PP.EnterTokenStream(Toks, Pragma.size(),
-                      /*DisableMacroExpansion=*/false, /*OwnsTokens=*/true);
+  auto Toks = llvm::make_unique<Token[]>(Pragma.size());
+  std::copy(Pragma.begin(), Pragma.end(), Toks.get());
+  PP.EnterTokenStream(std::move(Toks), Pragma.size(),
+                      /*DisableMacroExpansion=*/false);
 }
 
 /// \brief Handle '#pragma pointers_to_members'
@@ -1629,7 +1619,7 @@ void PragmaMSVtorDisp::HandlePragma(Preprocessor &PP,
   }
   PP.Lex(Tok);
 
-  Sema::PragmaVtorDispKind Kind = Sema::PVDK_Set;
+  Sema::PragmaMsStackAction Action = Sema::PSK_Set;
   const IdentifierInfo *II = Tok.getIdentifierInfo();
   if (II) {
     if (II->isStr("push")) {
@@ -1640,24 +1630,24 @@ void PragmaMSVtorDisp::HandlePragma(Preprocessor &PP,
         return;
       }
       PP.Lex(Tok);
-      Kind = Sema::PVDK_Push;
+      Action = Sema::PSK_Push_Set;
       // not push, could be on/off
     } else if (II->isStr("pop")) {
       // #pragma vtordisp(pop)
       PP.Lex(Tok);
-      Kind = Sema::PVDK_Pop;
+      Action = Sema::PSK_Pop;
     }
     // not push or pop, could be on/off
   } else {
     if (Tok.is(tok::r_paren)) {
       // #pragma vtordisp()
-      Kind = Sema::PVDK_Reset;
+      Action = Sema::PSK_Reset;
     }
   }
 
 
   uint64_t Value = 0;
-  if (Kind == Sema::PVDK_Push || Kind == Sema::PVDK_Set) {
+  if (Action & Sema::PSK_Push || Action & Sema::PSK_Set) {
     const IdentifierInfo *II = Tok.getIdentifierInfo();
     if (II && II->isStr("off")) {
       PP.Lex(Tok);
@@ -1699,7 +1689,7 @@ void PragmaMSVtorDisp::HandlePragma(Preprocessor &PP,
   AnnotTok.setLocation(VtorDispLoc);
   AnnotTok.setAnnotationEndLoc(EndLoc);
   AnnotTok.setAnnotationValue(reinterpret_cast<void *>(
-      static_cast<uintptr_t>((Kind << 16) | (Value & 0xFFFF))));
+      static_cast<uintptr_t>((Action << 16) | (Value & 0xFFFF))));
   PP.EnterToken(AnnotTok);
 }
 
@@ -1725,10 +1715,11 @@ void PragmaMSPragma::HandlePragma(Preprocessor &PP,
   TokenVector.push_back(EoF);
   // We must allocate this array with new because EnterTokenStream is going to
   // delete it later.
-  Token *TokenArray = new Token[TokenVector.size()];
-  std::copy(TokenVector.begin(), TokenVector.end(), TokenArray);
+  auto TokenArray = llvm::make_unique<Token[]>(TokenVector.size());
+  std::copy(TokenVector.begin(), TokenVector.end(), TokenArray.get());
   auto Value = new (PP.getPreprocessorAllocator())
-      std::pair<Token*, size_t>(std::make_pair(TokenArray, TokenVector.size()));
+      std::pair<std::unique_ptr<Token[]>, size_t>(std::move(TokenArray),
+                                                  TokenVector.size());
   AnnotTok.setAnnotationValue(Value);
   PP.EnterToken(AnnotTok);
 }
@@ -1746,10 +1737,10 @@ void PragmaMSPragma::HandlePragma(Preprocessor &PP,
 void PragmaDetectMismatchHandler::HandlePragma(Preprocessor &PP,
                                                PragmaIntroducerKind Introducer,
                                                Token &Tok) {
-  SourceLocation CommentLoc = Tok.getLocation();
+  SourceLocation DetectMismatchLoc = Tok.getLocation();
   PP.Lex(Tok);
   if (Tok.isNot(tok::l_paren)) {
-    PP.Diag(CommentLoc, diag::err_expected) << tok::l_paren;
+    PP.Diag(DetectMismatchLoc, diag::err_expected) << tok::l_paren;
     return;
   }
 
@@ -1784,10 +1775,10 @@ void PragmaDetectMismatchHandler::HandlePragma(Preprocessor &PP,
 
   // If the pragma is lexically sound, notify any interested PPCallbacks.
   if (PP.getPPCallbacks())
-    PP.getPPCallbacks()->PragmaDetectMismatch(CommentLoc, NameString,
+    PP.getPPCallbacks()->PragmaDetectMismatch(DetectMismatchLoc, NameString,
                                               ValueString);
 
-  Actions.ActOnPragmaDetectMismatch(NameString, ValueString);
+  Actions.ActOnPragmaDetectMismatch(DetectMismatchLoc, NameString, ValueString);
 }
 
 /// \brief Handle the microsoft \#pragma comment extension.
@@ -1818,22 +1809,22 @@ void PragmaCommentHandler::HandlePragma(Preprocessor &PP,
 
   // Verify that this is one of the 5 whitelisted options.
   IdentifierInfo *II = Tok.getIdentifierInfo();
-  Sema::PragmaMSCommentKind Kind =
-    llvm::StringSwitch<Sema::PragmaMSCommentKind>(II->getName())
-    .Case("linker",   Sema::PCK_Linker)
-    .Case("lib",      Sema::PCK_Lib)
-    .Case("compiler", Sema::PCK_Compiler)
-    .Case("exestr",   Sema::PCK_ExeStr)
-    .Case("user",     Sema::PCK_User)
-    .Default(Sema::PCK_Unknown);
-  if (Kind == Sema::PCK_Unknown) {
+  PragmaMSCommentKind Kind =
+    llvm::StringSwitch<PragmaMSCommentKind>(II->getName())
+    .Case("linker",   PCK_Linker)
+    .Case("lib",      PCK_Lib)
+    .Case("compiler", PCK_Compiler)
+    .Case("exestr",   PCK_ExeStr)
+    .Case("user",     PCK_User)
+    .Default(PCK_Unknown);
+  if (Kind == PCK_Unknown) {
     PP.Diag(Tok.getLocation(), diag::err_pragma_comment_unknown_kind);
     return;
   }
 
   // On PS4, issue a warning about any pragma comments other than
   // #pragma comment lib.
-  if (PP.getTargetInfo().getTriple().isPS4() && Kind != Sema::PCK_Lib) {
+  if (PP.getTargetInfo().getTriple().isPS4() && Kind != PCK_Lib) {
     PP.Diag(Tok.getLocation(), diag::warn_pragma_comment_ignored)
       << II->getName();
     return;
@@ -1869,7 +1860,7 @@ void PragmaCommentHandler::HandlePragma(Preprocessor &PP,
   if (PP.getPPCallbacks())
     PP.getPPCallbacks()->PragmaComment(CommentLoc, II, ArgumentString);
 
-  Actions.ActOnPragmaMSComment(Kind, ArgumentString);
+  Actions.ActOnPragmaMSComment(CommentLoc, Kind, ArgumentString);
 }
 
 // #pragma clang optimize off
@@ -2020,6 +2011,7 @@ void PragmaLoopHintHandler::HandlePragma(Preprocessor &PP,
                            .Case("vectorize", true)
                            .Case("interleave", true)
                            .Case("unroll", true)
+                           .Case("distribute", true)
                            .Case("vectorize_width", true)
                            .Case("interleave_count", true)
                            .Case("unroll_count", true)
@@ -2059,12 +2051,11 @@ void PragmaLoopHintHandler::HandlePragma(Preprocessor &PP,
     return;
   }
 
-  Token *TokenArray = new Token[TokenList.size()];
-  std::copy(TokenList.begin(), TokenList.end(), TokenArray);
+  auto TokenArray = llvm::make_unique<Token[]>(TokenList.size());
+  std::copy(TokenList.begin(), TokenList.end(), TokenArray.get());
 
-  PP.EnterTokenStream(TokenArray, TokenList.size(),
-                      /*DisableMacroExpansion=*/false,
-                      /*OwnsTokens=*/true);
+  PP.EnterTokenStream(std::move(TokenArray), TokenList.size(),
+                      /*DisableMacroExpansion=*/false);
 }
 
 /// \brief Handle the loop unroll optimization pragmas.
@@ -2127,12 +2118,12 @@ void PragmaUnrollHintHandler::HandlePragma(Preprocessor &PP,
   }
 
   // Generate the hint token.
-  Token *TokenArray = new Token[1];
+  auto TokenArray = llvm::make_unique<Token[]>(1);
   TokenArray[0].startToken();
   TokenArray[0].setKind(tok::annot_pragma_loop_hint);
   TokenArray[0].setLocation(PragmaName.getLocation());
   TokenArray[0].setAnnotationEndLoc(PragmaName.getLocation());
   TokenArray[0].setAnnotationValue(static_cast<void *>(Info));
-  PP.EnterTokenStream(TokenArray, 1, /*DisableMacroExpansion=*/false,
-                      /*OwnsTokens=*/true);
+  PP.EnterTokenStream(std::move(TokenArray), 1,
+                      /*DisableMacroExpansion=*/false);
 }
diff --git a/contrib/llvm/tools/clang/lib/Parse/ParseStmt.cpp b/contrib/llvm/tools/clang/lib/Parse/ParseStmt.cpp
index edf0dda7df8c..fa8eb12044be 100644
--- a/contrib/llvm/tools/clang/lib/Parse/ParseStmt.cpp
+++ b/contrib/llvm/tools/clang/lib/Parse/ParseStmt.cpp
@@ -107,6 +107,8 @@ Parser::ParseStatementOrDeclaration(StmtVector &Stmts,
 
   ParsedAttributesWithRange Attrs(AttrFactory);
   MaybeParseCXX11Attributes(Attrs, nullptr, /*MightBeObjCMessageSend*/ true);
+  if (!MaybeParseOpenCLUnrollHintAttribute(Attrs))
+    return StmtError();
 
   StmtResult Res = ParseStatementOrDeclarationAfterAttributes(
       Stmts, Allowed, TrailingElseLoc, Attrs);
@@ -1041,7 +1043,8 @@ StmtResult Parser::ParseCompoundStatementBody(bool isStmtExpr) {
 
 /// ParseParenExprOrCondition:
 /// [C  ]     '(' expression ')'
-/// [C++]     '(' condition ')'       [not allowed if OnlyAllowCondition=true]
+/// [C++]     '(' condition ')'
+/// [C++1z]   '(' init-statement[opt] condition ')'
 ///
 /// This function parses and performs error recovery on the specified condition
 /// or expression (depending on whether we're in C++ or C mode).  This function
@@ -1050,29 +1053,29 @@ StmtResult Parser::ParseCompoundStatementBody(bool isStmtExpr) {
 /// should try to recover harder.  It returns false if the condition is
 /// successfully parsed.  Note that a successful parse can still have semantic
 /// errors in the condition.
-bool Parser::ParseParenExprOrCondition(ExprResult &ExprResult,
-                                       Decl *&DeclResult,
+bool Parser::ParseParenExprOrCondition(StmtResult *InitStmt,
+                                       Sema::ConditionResult &Cond,
                                        SourceLocation Loc,
-                                       bool ConvertToBoolean) {
+                                       Sema::ConditionKind CK) {
   BalancedDelimiterTracker T(*this, tok::l_paren);
   T.consumeOpen();
 
   if (getLangOpts().CPlusPlus)
-    ParseCXXCondition(ExprResult, DeclResult, Loc, ConvertToBoolean);
+    Cond = ParseCXXCondition(InitStmt, Loc, CK);
   else {
-    ExprResult = ParseExpression();
-    DeclResult = nullptr;
+    ExprResult CondExpr = ParseExpression();
 
     // If required, convert to a boolean value.
-    if (!ExprResult.isInvalid() && ConvertToBoolean)
-      ExprResult
-        = Actions.ActOnBooleanCondition(getCurScope(), Loc, ExprResult.get());
+    if (CondExpr.isInvalid())
+      Cond = Sema::ConditionError();
+    else
+      Cond = Actions.ActOnCondition(getCurScope(), Loc, CondExpr.get(), CK);
   }
 
   // If the parser was confused by the condition and we don't have a ')', try to
   // recover by skipping ahead to a semi and bailing out.  If condexp is
   // semantically invalid but we have well formed code, keep going.
-  if (ExprResult.isInvalid() && !DeclResult && Tok.isNot(tok::r_paren)) {
+  if (Cond.isInvalid() && Tok.isNot(tok::r_paren)) {
     SkipUntil(tok::semi);
     // Skipping may have stopped if it found the containing ')'.  If so, we can
     // continue parsing the if statement.
@@ -1107,6 +1110,14 @@ StmtResult Parser::ParseIfStatement(SourceLocation *TrailingElseLoc) {
   assert(Tok.is(tok::kw_if) && "Not an if stmt!");
   SourceLocation IfLoc = ConsumeToken();  // eat the 'if'.
 
+  bool IsConstexpr = false;
+  if (Tok.is(tok::kw_constexpr)) {
+    Diag(Tok, getLangOpts().CPlusPlus1z ? diag::warn_cxx14_compat_constexpr_if
+                                        : diag::ext_constexpr_if);
+    IsConstexpr = true;
+    ConsumeToken();
+  }
+
   if (Tok.isNot(tok::l_paren)) {
     Diag(Tok, diag::err_expected_lparen_after) << "if";
     SkipUntil(tok::semi);
@@ -1130,12 +1141,16 @@ StmtResult Parser::ParseIfStatement(SourceLocation *TrailingElseLoc) {
   ParseScope IfScope(this, Scope::DeclScope | Scope::ControlScope, C99orCXX);
 
   // Parse the condition.
-  ExprResult CondExp;
-  Decl *CondVar = nullptr;
-  if (ParseParenExprOrCondition(CondExp, CondVar, IfLoc, true))
+  StmtResult InitStmt;
+  Sema::ConditionResult Cond;
+  if (ParseParenExprOrCondition(&InitStmt, Cond, IfLoc,
+                                IsConstexpr ? Sema::ConditionKind::ConstexprIf
+                                            : Sema::ConditionKind::Boolean))
     return StmtError();
 
-  FullExprArg FullCondExp(Actions.MakeFullExpr(CondExp.get(), IfLoc));
+  llvm::Optional<bool> ConstexprCondition;
+  if (IsConstexpr)
+    ConstexprCondition = Cond.getKnownValue();
 
   // C99 6.8.4p3 - In C99, the body of the if statement is a scope, even if
   // there is no compound stmt.  C90 does not have this clause.  We only do this
@@ -1161,7 +1176,13 @@ StmtResult Parser::ParseIfStatement(SourceLocation *TrailingElseLoc) {
   SourceLocation ThenStmtLoc = Tok.getLocation();
 
   SourceLocation InnerStatementTrailingElseLoc;
-  StmtResult ThenStmt(ParseStatement(&InnerStatementTrailingElseLoc));
+  StmtResult ThenStmt;
+  {
+    EnterExpressionEvaluationContext PotentiallyDiscarded(
+        Actions, Sema::DiscardedStatement, nullptr, false,
+        /*ShouldEnter=*/ConstexprCondition && !*ConstexprCondition);
+    ThenStmt = ParseStatement(&InnerStatementTrailingElseLoc);
+  }
 
   // Pop the 'if' scope if needed.
   InnerScope.Exit();
@@ -1187,8 +1208,12 @@ StmtResult Parser::ParseIfStatement(SourceLocation *TrailingElseLoc) {
     // The substatement in a selection-statement (each substatement, in the else
     // form of the if statement) implicitly defines a local scope.
     //
-    ParseScope InnerScope(this, Scope::DeclScope, C99orCXX, Tok.is(tok::l_brace));
+    ParseScope InnerScope(this, Scope::DeclScope, C99orCXX,
+                          Tok.is(tok::l_brace));
 
+    EnterExpressionEvaluationContext PotentiallyDiscarded(
+        Actions, Sema::DiscardedStatement, nullptr, false,
+        /*ShouldEnter=*/ConstexprCondition && *ConstexprCondition);
     ElseStmt = ParseStatement();
 
     // Pop the 'else' scope if needed.
@@ -1219,8 +1244,8 @@ StmtResult Parser::ParseIfStatement(SourceLocation *TrailingElseLoc) {
   if (ElseStmt.isInvalid())
     ElseStmt = Actions.ActOnNullStmt(ElseStmtLoc);
 
-  return Actions.ActOnIfStmt(IfLoc, FullCondExp, CondVar, ThenStmt.get(),
-                             ElseLoc, ElseStmt.get());
+  return Actions.ActOnIfStmt(IfLoc, IsConstexpr, InitStmt.get(), Cond,
+                             ThenStmt.get(), ElseLoc, ElseStmt.get());
 }
 
 /// ParseSwitchStatement
@@ -1257,13 +1282,14 @@ StmtResult Parser::ParseSwitchStatement(SourceLocation *TrailingElseLoc) {
   ParseScope SwitchScope(this, ScopeFlags);
 
   // Parse the condition.
-  ExprResult Cond;
-  Decl *CondVar = nullptr;
-  if (ParseParenExprOrCondition(Cond, CondVar, SwitchLoc, false))
+  StmtResult InitStmt;
+  Sema::ConditionResult Cond;
+  if (ParseParenExprOrCondition(&InitStmt, Cond, SwitchLoc,
+                                Sema::ConditionKind::Switch))
     return StmtError();
 
-  StmtResult Switch
-    = Actions.ActOnStartOfSwitchStmt(SwitchLoc, Cond.get(), CondVar);
+  StmtResult Switch =
+      Actions.ActOnStartOfSwitchStmt(SwitchLoc, InitStmt.get(), Cond);
 
   if (Switch.isInvalid()) {
     // Skip the switch body.
@@ -1345,13 +1371,11 @@ StmtResult Parser::ParseWhileStatement(SourceLocation *TrailingElseLoc) {
   ParseScope WhileScope(this, ScopeFlags);
 
   // Parse the condition.
-  ExprResult Cond;
-  Decl *CondVar = nullptr;
-  if (ParseParenExprOrCondition(Cond, CondVar, WhileLoc, true))
+  Sema::ConditionResult Cond;
+  if (ParseParenExprOrCondition(nullptr, Cond, WhileLoc,
+                                Sema::ConditionKind::Boolean))
     return StmtError();
 
-  FullExprArg FullCond(Actions.MakeFullExpr(Cond.get(), WhileLoc));
-
   // C99 6.8.5p5 - In C99, the body of the while statement is a scope, even if
   // there is no compound stmt.  C90 does not have this clause.  We only do this
   // if the body isn't a compound statement to avoid push/pop in common cases.
@@ -1372,10 +1396,10 @@ StmtResult Parser::ParseWhileStatement(SourceLocation *TrailingElseLoc) {
   InnerScope.Exit();
   WhileScope.Exit();
 
-  if ((Cond.isInvalid() && !CondVar) || Body.isInvalid())
+  if (Cond.isInvalid() || Body.isInvalid())
     return StmtError();
 
-  return Actions.ActOnWhileStmt(WhileLoc, FullCond, CondVar, Body.get());
+  return Actions.ActOnWhileStmt(WhileLoc, Cond, Body.get());
 }
 
 /// ParseDoStatement
@@ -1533,12 +1557,10 @@ StmtResult Parser::ParseForStatement(SourceLocation *TrailingElseLoc) {
 
   bool ForEach = false, ForRange = false;
   StmtResult FirstPart;
-  bool SecondPartIsInvalid = false;
-  FullExprArg SecondPart(Actions);
+  Sema::ConditionResult SecondPart;
   ExprResult Collection;
   ForRangeInit ForRangeInit;
   FullExprArg ThirdPart(Actions);
-  Decl *SecondVar = nullptr;
 
   if (Tok.is(tok::code_completion)) {
     Actions.CodeCompleteOrdinaryName(getCurScope(),
@@ -1632,7 +1654,7 @@ StmtResult Parser::ParseForStatement(SourceLocation *TrailingElseLoc) {
       ConsumeToken(); // consume 'in'
 
       if (Tok.is(tok::code_completion)) {
-        Actions.CodeCompleteObjCForCollection(getCurScope(), DeclGroupPtrTy());
+        Actions.CodeCompleteObjCForCollection(getCurScope(), nullptr);
         cutOffParsing();
         return StmtError();
       }
@@ -1643,7 +1665,7 @@ StmtResult Parser::ParseForStatement(SourceLocation *TrailingElseLoc) {
       Diag(Tok, diag::err_for_range_expected_decl)
         << FirstPart.get()->getSourceRange();
       SkipUntil(tok::r_paren, StopBeforeMatch);
-      SecondPartIsInvalid = true;
+      SecondPart = Sema::ConditionError();
     } else {
       if (!Value.isInvalid()) {
         Diag(Tok, diag::err_expected_semi_for);
@@ -1658,29 +1680,29 @@ StmtResult Parser::ParseForStatement(SourceLocation *TrailingElseLoc) {
 
   // Parse the second part of the for specifier.
   getCurScope()->AddFlags(Scope::BreakScope | Scope::ContinueScope);
-  if (!ForEach && !ForRange) {
-    assert(!SecondPart.get() && "Shouldn't have a second expression yet.");
+  if (!ForEach && !ForRange && !SecondPart.isInvalid()) {
     // Parse the second part of the for specifier.
     if (Tok.is(tok::semi)) {  // for (...;;
       // no second part.
     } else if (Tok.is(tok::r_paren)) {
       // missing both semicolons.
     } else {
-      ExprResult Second;
       if (getLangOpts().CPlusPlus)
-        ParseCXXCondition(Second, SecondVar, ForLoc, true);
+        SecondPart =
+            ParseCXXCondition(nullptr, ForLoc, Sema::ConditionKind::Boolean);
       else {
-        Second = ParseExpression();
-        if (!Second.isInvalid())
-          Second = Actions.ActOnBooleanCondition(getCurScope(), ForLoc,
-                                                 Second.get());
+        ExprResult SecondExpr = ParseExpression();
+        if (SecondExpr.isInvalid())
+          SecondPart = Sema::ConditionError();
+        else
+          SecondPart =
+              Actions.ActOnCondition(getCurScope(), ForLoc, SecondExpr.get(),
+                                     Sema::ConditionKind::Boolean);
       }
-      SecondPartIsInvalid = Second.isInvalid();
-      SecondPart = Actions.MakeFullExpr(Second.get(), ForLoc);
     }
 
     if (Tok.isNot(tok::semi)) {
-      if (!SecondPartIsInvalid || SecondVar)
+      if (!SecondPart.isInvalid())
         Diag(Tok, diag::err_expected_semi_for);
       else
         // Skip until semicolon or rparen, don't consume it.
@@ -1716,9 +1738,11 @@ StmtResult Parser::ParseForStatement(SourceLocation *TrailingElseLoc) {
   StmtResult ForEachStmt;
 
   if (ForRange) {
+    ExprResult CorrectedRange =
+        Actions.CorrectDelayedTyposInExpr(ForRangeInit.RangeExpr.get());
     ForRangeStmt = Actions.ActOnCXXForRangeStmt(
         getCurScope(), ForLoc, CoawaitLoc, FirstPart.get(),
-        ForRangeInit.ColonLoc, ForRangeInit.RangeExpr.get(),
+        ForRangeInit.ColonLoc, CorrectedRange.get(),
         T.getCloseLocation(), Sema::BFRK_Build);
 
   // Similarly, we need to do the semantic analysis for a for-range
@@ -1777,8 +1801,8 @@ StmtResult Parser::ParseForStatement(SourceLocation *TrailingElseLoc) {
     return Actions.FinishCXXForRangeStmt(ForRangeStmt.get(), Body.get());
 
   return Actions.ActOnForStmt(ForLoc, T.getOpenLocation(), FirstPart.get(),
-                              SecondPart, SecondVar, ThirdPart,
-                              T.getCloseLocation(), Body.get());
+                              SecondPart, ThirdPart, T.getCloseLocation(),
+                              Body.get());
 }
 
 /// ParseGotoStatement
@@ -1912,19 +1936,14 @@ Decl *Parser::ParseFunctionStatementBody(Decl *Decl, ParseScope &BodyScope) {
   assert(Tok.is(tok::l_brace));
   SourceLocation LBraceLoc = Tok.getLocation();
 
-  if (SkipFunctionBodies && (!Decl || Actions.canSkipFunctionBody(Decl)) &&
-      trySkippingFunctionBody()) {
-    BodyScope.Exit();
-    return Actions.ActOnSkippedFunctionBody(Decl);
-  }
-
   PrettyDeclStackTraceEntry CrashInfo(Actions, Decl, LBraceLoc,
                                       "parsing function body");
 
   // Save and reset current vtordisp stack if we have entered a C++ method body.
   bool IsCXXMethod =
       getLangOpts().CPlusPlus && Decl && isa<CXXMethodDecl>(Decl);
-  Sema::VtorDispStackRAII SavedVtorDispStack(Actions, IsCXXMethod);
+  Sema::PragmaStackSentinelRAII
+    PragmaStackSentinel(Actions, "InternalPragmaState", IsCXXMethod);
 
   // Do not enter a scope for the brace, as the arguments are in the same scope
   // (the function body) as the body itself.  Instead, just read the statement
@@ -1959,16 +1978,11 @@ Decl *Parser::ParseFunctionTryBlock(Decl *Decl, ParseScope &BodyScope) {
   else
     Actions.ActOnDefaultCtorInitializers(Decl);
 
-  if (SkipFunctionBodies && Actions.canSkipFunctionBody(Decl) &&
-      trySkippingFunctionBody()) {
-    BodyScope.Exit();
-    return Actions.ActOnSkippedFunctionBody(Decl);
-  }
-
   // Save and reset current vtordisp stack if we have entered a C++ method body.
   bool IsCXXMethod =
       getLangOpts().CPlusPlus && Decl && isa<CXXMethodDecl>(Decl);
-  Sema::VtorDispStackRAII SavedVtorDispStack(Actions, IsCXXMethod);
+  Sema::PragmaStackSentinelRAII
+    PragmaStackSentinel(Actions, "InternalPragmaState", IsCXXMethod);
 
   SourceLocation LBraceLoc = Tok.getLocation();
   StmtResult FnBody(ParseCXXTryBlockCommon(TryLoc, /*FnTry*/true));
@@ -1984,27 +1998,43 @@ Decl *Parser::ParseFunctionTryBlock(Decl *Decl, ParseScope &BodyScope) {
 }
 
 bool Parser::trySkippingFunctionBody() {
-  assert(Tok.is(tok::l_brace));
   assert(SkipFunctionBodies &&
          "Should only be called when SkipFunctionBodies is enabled");
-
   if (!PP.isCodeCompletionEnabled()) {
-    ConsumeBrace();
-    SkipUntil(tok::r_brace);
+    SkipFunctionBody();
     return true;
   }
 
   // We're in code-completion mode. Skip parsing for all function bodies unless
   // the body contains the code-completion point.
   TentativeParsingAction PA(*this);
-  ConsumeBrace();
-  if (SkipUntil(tok::r_brace, StopAtCodeCompletion)) {
+  bool IsTryCatch = Tok.is(tok::kw_try);
+  CachedTokens Toks;
+  bool ErrorInPrologue = ConsumeAndStoreFunctionPrologue(Toks);
+  if (llvm::any_of(Toks, [](const Token &Tok) {
+        return Tok.is(tok::code_completion);
+      })) {
+    PA.Revert();
+    return false;
+  }
+  if (ErrorInPrologue) {
     PA.Commit();
+    SkipMalformedDecl();
     return true;
   }
-
-  PA.Revert();
-  return false;
+  if (!SkipUntil(tok::r_brace, StopAtCodeCompletion)) {
+    PA.Revert();
+    return false;
+  }
+  while (IsTryCatch && Tok.is(tok::kw_catch)) {
+    if (!SkipUntil(tok::l_brace, StopAtCodeCompletion) ||
+        !SkipUntil(tok::r_brace, StopAtCodeCompletion)) {
+      PA.Revert();
+      return false;
+    }
+  }
+  PA.Commit();
+  return true;
 }
 
 /// ParseCXXTryBlock - Parse a C++ try-block.
@@ -2206,3 +2236,19 @@ void Parser::ParseMicrosoftIfExistsStatement(StmtVector &Stmts) {
   }
   Braces.consumeClose();
 }
+
+bool Parser::ParseOpenCLUnrollHintAttribute(ParsedAttributes &Attrs) {
+  MaybeParseGNUAttributes(Attrs);
+
+  if (Attrs.empty())
+    return true;
+
+  if (Attrs.getList()->getKind() != AttributeList::AT_OpenCLUnrollHint)
+    return true;
+
+  if (!(Tok.is(tok::kw_for) || Tok.is(tok::kw_while) || Tok.is(tok::kw_do))) {
+    Diag(Tok, diag::err_opencl_unroll_hint_on_non_loop);
+    return false;
+  }
+  return true;
+}
diff --git a/contrib/llvm/tools/clang/lib/Parse/ParseStmtAsm.cpp b/contrib/llvm/tools/clang/lib/Parse/ParseStmtAsm.cpp
index 142b473755de..1f63dc257b86 100644
--- a/contrib/llvm/tools/clang/lib/Parse/ParseStmtAsm.cpp
+++ b/contrib/llvm/tools/clang/lib/Parse/ParseStmtAsm.cpp
@@ -17,16 +17,17 @@
 #include "clang/Basic/Diagnostic.h"
 #include "clang/Basic/TargetInfo.h"
 #include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCInstPrinter.h"
 #include "llvm/MC/MCInstrInfo.h"
 #include "llvm/MC/MCObjectFileInfo.h"
 #include "llvm/MC/MCParser/MCAsmParser.h"
+#include "llvm/MC/MCParser/MCTargetAsmParser.h"
 #include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/MC/MCTargetAsmParser.h"
 #include "llvm/MC/MCTargetOptions.h"
 #include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/TargetRegistry.h"
@@ -199,9 +200,7 @@ ExprResult Parser::ParseMSAsmIdentifier(llvm::SmallVectorImpl<Token> &LineToks,
   // Also copy the current token over.
   LineToks.push_back(Tok);
 
-  PP.EnterTokenStream(LineToks.begin(), LineToks.size(),
-                      /*disable macros*/ true,
-                      /*owns tokens*/ false);
+  PP.EnterTokenStream(LineToks, /*DisableMacroExpansions*/ true);
 
   // Clear the current token and advance to the first token in LineToks.
   ConsumeAnyToken();
@@ -209,7 +208,7 @@ ExprResult Parser::ParseMSAsmIdentifier(llvm::SmallVectorImpl<Token> &LineToks,
   // Parse an optional scope-specifier if we're in C++.
   CXXScopeSpec SS;
   if (getLangOpts().CPlusPlus) {
-    ParseOptionalCXXScopeSpecifier(SS, ParsedType(), /*EnteringContext=*/false);
+    ParseOptionalCXXScopeSpecifier(SS, nullptr, /*EnteringContext=*/false);
   }
 
   // Require an identifier here.
@@ -221,12 +220,11 @@ ExprResult Parser::ParseMSAsmIdentifier(llvm::SmallVectorImpl<Token> &LineToks,
     Result = ParseCXXThis();
     Invalid = false;
   } else {
-    Invalid =
-        ParseUnqualifiedId(SS,
-                           /*EnteringContext=*/false,
-                           /*AllowDestructorName=*/false,
-                           /*AllowConstructorName=*/false,
-                           /*ObjectType=*/ParsedType(), TemplateKWLoc, Id);
+    Invalid = ParseUnqualifiedId(SS,
+                                 /*EnteringContext=*/false,
+                                 /*AllowDestructorName=*/false,
+                                 /*AllowConstructorName=*/false,
+                                 /*ObjectType=*/nullptr, TemplateKWLoc, Id);
     // Perform the lookup.
     Result = Actions.LookupInlineAsmIdentifier(SS, TemplateKWLoc, Id, Info,
                                                IsUnevaluatedContext);
@@ -337,6 +335,33 @@ static bool buildMSAsmString(Preprocessor &PP, SourceLocation AsmLoc,
   return false;
 }
 
+/// isTypeQualifier - Return true if the current token could be the
+/// start of a type-qualifier-list.
+static bool isTypeQualifier(const Token &Tok) {
+  switch (Tok.getKind()) {
+  default: return false;
+  // type-qualifier
+  case tok::kw_const:
+  case tok::kw_volatile:
+  case tok::kw_restrict:
+  case tok::kw___private:
+  case tok::kw___local:
+  case tok::kw___global:
+  case tok::kw___constant:
+  case tok::kw___generic:
+  case tok::kw___read_only:
+  case tok::kw___read_write:
+  case tok::kw___write_only:
+    return true;
+  }
+}
+
+// Determine if this is a GCC-style asm statement.
+static bool isGCCAsmStatement(const Token &TokAfterAsm) {
+  return TokAfterAsm.is(tok::l_paren) || TokAfterAsm.is(tok::kw_goto) ||
+         isTypeQualifier(TokAfterAsm);
+}
+
 /// ParseMicrosoftAsmStatement. When -fms-extensions/-fasm-blocks is enabled,
 /// this routine is called to collect the tokens for an MS asm statement.
 ///
@@ -392,6 +417,7 @@ StmtResult Parser::ParseMicrosoftAsmStatement(SourceLocation AsmLoc) {
     if (!InAsmComment && Tok.is(tok::l_brace)) {
       // Consume the opening brace.
       SkippedStartOfLine = Tok.isAtStartOfLine();
+      AsmToks.push_back(Tok);
       EndLoc = ConsumeBrace();
       BraceNesting++;
       LBraceLocs.push_back(EndLoc);
@@ -416,15 +442,19 @@ StmtResult Parser::ParseMicrosoftAsmStatement(SourceLocation AsmLoc) {
       if (ExpLoc.first != FID ||
           SrcMgr.getLineNumber(ExpLoc.first, ExpLoc.second) != LineNo) {
         // If this is a single-line __asm, we're done, except if the next
-        // line begins with an __asm too, in which case we finish a comment
+        // line is MS-style asm too, in which case we finish a comment
         // if needed and then keep processing the next line as a single
         // line __asm.
         bool isAsm = Tok.is(tok::kw_asm);
-        if (SingleLineMode && !isAsm)
+        if (SingleLineMode && (!isAsm || isGCCAsmStatement(NextToken())))
           break;
         // We're no longer in a comment.
         InAsmComment = false;
         if (isAsm) {
+          // If this is a new __asm {} block we want to process it seperately
+          // from the single-line __asm statements
+          if (PP.LookAhead(0).is(tok::l_brace))
+            break;
           LineNo = SrcMgr.getLineNumber(ExpLoc.first, ExpLoc.second);
           SkippedStartOfLine = Tok.isAtStartOfLine();
         }
@@ -440,6 +470,11 @@ StmtResult Parser::ParseMicrosoftAsmStatement(SourceLocation AsmLoc) {
         BraceCount == (savedBraceCount + BraceNesting)) {
       // Consume the closing brace.
       SkippedStartOfLine = Tok.isAtStartOfLine();
+      // Don't want to add the closing brace of the whole asm block
+      if (SingleLineMode || BraceNesting > 1) {
+        Tok.clearFlag(Token::LeadingSpace);
+        AsmToks.push_back(Tok);
+      }
       EndLoc = ConsumeBrace();
       BraceNesting--;
       // Finish if all of the opened braces in the inline asm section were
@@ -523,18 +558,22 @@ StmtResult Parser::ParseMicrosoftAsmStatement(SourceLocation AsmLoc) {
   if (buildMSAsmString(PP, AsmLoc, AsmToks, TokOffsets, AsmString))
     return StmtError();
 
+  TargetOptions TO = Actions.Context.getTargetInfo().getTargetOpts();
+  std::string FeaturesStr =
+      llvm::join(TO.Features.begin(), TO.Features.end(), ",");
+
   std::unique_ptr<llvm::MCRegisterInfo> MRI(TheTarget->createMCRegInfo(TT));
   std::unique_ptr<llvm::MCAsmInfo> MAI(TheTarget->createMCAsmInfo(*MRI, TT));
   // Get the instruction descriptor.
   std::unique_ptr<llvm::MCInstrInfo> MII(TheTarget->createMCInstrInfo());
   std::unique_ptr<llvm::MCObjectFileInfo> MOFI(new llvm::MCObjectFileInfo());
   std::unique_ptr<llvm::MCSubtargetInfo> STI(
-      TheTarget->createMCSubtargetInfo(TT, "", ""));
+      TheTarget->createMCSubtargetInfo(TT, TO.CPU, FeaturesStr));
 
   llvm::SourceMgr TempSrcMgr;
   llvm::MCContext Ctx(MAI.get(), MRI.get(), MOFI.get(), &TempSrcMgr);
-  MOFI->InitMCObjectFileInfo(TheTriple, llvm::Reloc::Default,
-                             llvm::CodeModel::Default, Ctx);
+  MOFI->InitMCObjectFileInfo(TheTriple, /*PIC*/ false, llvm::CodeModel::Default,
+                             Ctx);
   std::unique_ptr<llvm::MemoryBuffer> Buffer =
       llvm::MemoryBuffer::getMemBuffer(AsmString, "<MS inline asm>");
 
@@ -631,8 +670,7 @@ StmtResult Parser::ParseAsmStatement(bool &msAsm) {
   assert(Tok.is(tok::kw_asm) && "Not an asm stmt");
   SourceLocation AsmLoc = ConsumeToken();
 
-  if (getLangOpts().AsmBlocks && Tok.isNot(tok::l_paren) &&
-      !isTypeQualifier()) {
+  if (getLangOpts().AsmBlocks && !isGCCAsmStatement(Tok)) {
     msAsm = true;
     return ParseMicrosoftAsmStatement(AsmLoc);
   }
@@ -652,6 +690,14 @@ StmtResult Parser::ParseAsmStatement(bool &msAsm) {
 
   // Remember if this was a volatile asm.
   bool isVolatile = DS.getTypeQualifiers() & DeclSpec::TQ_volatile;
+
+  // TODO: support "asm goto" constructs (PR#9295).
+  if (Tok.is(tok::kw_goto)) {
+    Diag(Tok, diag::err_asm_goto_not_supported_yet);
+    SkipUntil(tok::r_paren, StopAtSemi);
+    return StmtError();
+  }
+
   if (Tok.isNot(tok::l_paren)) {
     Diag(Tok, diag::err_expected_lparen_after) << "asm";
     SkipUntil(tok::r_paren, StopAtSemi);
diff --git a/contrib/llvm/tools/clang/lib/Parse/ParseTemplate.cpp b/contrib/llvm/tools/clang/lib/Parse/ParseTemplate.cpp
index a4dcdb1e2a09..6cf7b6d3dc55 100644
--- a/contrib/llvm/tools/clang/lib/Parse/ParseTemplate.cpp
+++ b/contrib/llvm/tools/clang/lib/Parse/ParseTemplate.cpp
@@ -122,20 +122,15 @@ Parser::ParseTemplateDeclarationOrSpecialization(unsigned Context,
       return nullptr;
     }
 
-    ParamLists.push_back(
-      Actions.ActOnTemplateParameterList(CurTemplateDepthTracker.getDepth(), 
-                                         ExportLoc,
-                                         TemplateLoc, LAngleLoc,
-                                         TemplateParams, RAngleLoc));
-
+    ExprResult OptionalRequiresClauseConstraintER;
     if (!TemplateParams.empty()) {
       isSpecialization = false;
       ++CurTemplateDepthTracker;
 
       if (TryConsumeToken(tok::kw_requires)) {
-        ExprResult ER =
+        OptionalRequiresClauseConstraintER =
             Actions.CorrectDelayedTyposInExpr(ParseConstraintExpression());
-        if (!ER.isUsable()) {
+        if (!OptionalRequiresClauseConstraintER.isUsable()) {
           // Skip until the semi-colon or a '}'.
           SkipUntil(tok::r_brace, StopAtSemi | StopBeforeMatch);
           TryConsumeToken(tok::semi);
@@ -145,8 +140,15 @@ Parser::ParseTemplateDeclarationOrSpecialization(unsigned Context,
     } else {
       LastParamListWasEmpty = true;
     }
+
+    ParamLists.push_back(Actions.ActOnTemplateParameterList(
+        CurTemplateDepthTracker.getDepth(), ExportLoc, TemplateLoc, LAngleLoc,
+        TemplateParams, RAngleLoc, OptionalRequiresClauseConstraintER.get()));
   } while (Tok.isOneOf(tok::kw_export, tok::kw_template));
 
+  unsigned NewFlags = getCurScope()->getFlags() & ~Scope::TemplateParamScope;
+  ParseScopeFlags TemplateScopeFlags(this, NewFlags, isSpecialization);
+
   // Parse the actual template declaration.
   return ParseSingleDeclarationAfterTemplate(Context,
                                              ParsedTemplateInfo(&ParamLists,
@@ -209,11 +211,15 @@ Parser::ParseSingleDeclarationAfterTemplate(
   if (Tok.is(tok::semi)) {
     ProhibitAttributes(prefixAttrs);
     DeclEnd = ConsumeToken();
+    RecordDecl *AnonRecord = nullptr;
     Decl *Decl = Actions.ParsedFreeStandingDeclSpec(
         getCurScope(), AS, DS,
         TemplateInfo.TemplateParams ? *TemplateInfo.TemplateParams
                                     : MultiTemplateParamsArg(),
-        TemplateInfo.Kind == ParsedTemplateInfo::ExplicitInstantiation);
+        TemplateInfo.Kind == ParsedTemplateInfo::ExplicitInstantiation,
+        AnonRecord);
+    assert(!AnonRecord &&
+           "Anonymous unions/structs should not be valid with template");
     DS.complete(Decl);
     return Decl;
   }
@@ -280,7 +286,7 @@ Parser::ParseSingleDeclarationAfterTemplate(
         TemplateParameterLists FakedParamLists;
         FakedParamLists.push_back(Actions.ActOnTemplateParameterList(
             0, SourceLocation(), TemplateInfo.TemplateLoc, LAngleLoc, None,
-            LAngleLoc));
+            LAngleLoc, nullptr));
 
         return ParseFunctionDefinition(
             DeclaratorInfo, ParsedTemplateInfo(&FakedParamLists,
@@ -631,7 +637,7 @@ Parser::ParseTemplateTemplateParameter(unsigned Depth, unsigned Position) {
     Actions.ActOnTemplateParameterList(Depth, SourceLocation(),
                                        TemplateLoc, LAngleLoc,
                                        TemplateParams,
-                                       RAngleLoc);
+                                       RAngleLoc, nullptr);
 
   // Grab a default argument (if available).
   // Per C++0x [basic.scope.pdecl]p9, we parse the default argument before
@@ -827,6 +833,7 @@ bool Parser::ParseGreaterThanInTemplateList(SourceLocation &RAngleLoc,
   }
 
   // Strip the initial '>' from the token.
+  Token PrevTok = Tok;
   if (RemainingToken == tok::equal && Next.is(tok::equal) &&
       areTokensAdjacent(Tok, Next)) {
     // Join two adjacent '=' tokens into one, for cases like:
@@ -843,6 +850,21 @@ bool Parser::ParseGreaterThanInTemplateList(SourceLocation &RAngleLoc,
                                                  PP.getSourceManager(),
                                                  getLangOpts()));
 
+  // The advance from '>>' to '>' in a ObjectiveC template argument list needs
+  // to be properly reflected in the token cache to allow correct interaction
+  // between annotation and backtracking.
+  if (ObjCGenericList && PrevTok.getKind() == tok::greatergreater &&
+      RemainingToken == tok::greater && PP.IsPreviousCachedToken(PrevTok)) {
+    PrevTok.setKind(RemainingToken);
+    PrevTok.setLength(1);
+    // Break tok::greatergreater into two tok::greater but only add the second
+    // one in case the client asks to consume the last token.
+    if (ConsumeLastToken)
+      PP.ReplacePreviousCachedToken({PrevTok, Tok});
+    else
+      PP.ReplacePreviousCachedToken({PrevTok});
+  }
+
   if (!ConsumeLastToken) {
     // Since we're not supposed to consume the '>' token, we need to push
     // this token and revert the current token back to the '>'.
@@ -1061,7 +1083,7 @@ void Parser::AnnotateTemplateIdTokenAsType() {
                                   TemplateId->RAngleLoc);
   // Create the new "type" annotation token.
   Tok.setKind(tok::annot_typename);
-  setTypeAnnotation(Tok, Type.isInvalid() ? ParsedType() : Type.get());
+  setTypeAnnotation(Tok, Type.isInvalid() ? nullptr : Type.get());
   if (TemplateId->SS.isNotEmpty()) // it was a C++ qualified type name.
     Tok.setLocation(TemplateId->SS.getBeginLoc());
   // End location stays the same
@@ -1094,9 +1116,9 @@ ParsedTemplateArgument Parser::ParseTemplateTemplateArgument() {
   // followed by a token that terminates a template argument, such as ',', 
   // '>', or (in some cases) '>>'.
   CXXScopeSpec SS; // nested-name-specifier, if present
-  ParseOptionalCXXScopeSpecifier(SS, ParsedType(),
+  ParseOptionalCXXScopeSpecifier(SS, nullptr,
                                  /*EnteringContext=*/false);
-  
+
   ParsedTemplateArgument Result;
   SourceLocation EllipsisLoc;
   if (SS.isSet() && Tok.is(tok::kw_template)) {
@@ -1117,11 +1139,10 @@ ParsedTemplateArgument Parser::ParseTemplateTemplateArgument() {
       // template argument.
       TemplateTy Template;
       if (isEndOfTemplateArgument(Tok) &&
-          Actions.ActOnDependentTemplateName(getCurScope(),
-                                             SS, TemplateKWLoc, Name,
-                                             /*ObjectType=*/ ParsedType(),
-                                             /*EnteringContext=*/false,
-                                             Template))
+          Actions.ActOnDependentTemplateName(
+              getCurScope(), SS, TemplateKWLoc, Name,
+              /*ObjectType=*/nullptr,
+              /*EnteringContext=*/false, Template))
         Result = ParsedTemplateArgument(SS, Template, Name.StartLocation);
     }
   } else if (Tok.is(tok::identifier)) {
@@ -1135,13 +1156,11 @@ ParsedTemplateArgument Parser::ParseTemplateTemplateArgument() {
 
     if (isEndOfTemplateArgument(Tok)) {
       bool MemberOfUnknownSpecialization;
-      TemplateNameKind TNK = Actions.isTemplateName(getCurScope(), SS,
-                                               /*hasTemplateKeyword=*/false,
-                                                    Name,
-                                               /*ObjectType=*/ ParsedType(), 
-                                                    /*EnteringContext=*/false, 
-                                                    Template,
-                                                MemberOfUnknownSpecialization);
+      TemplateNameKind TNK = Actions.isTemplateName(
+          getCurScope(), SS,
+          /*hasTemplateKeyword=*/false, Name,
+          /*ObjectType=*/nullptr,
+          /*EnteringContext=*/false, Template, MemberOfUnknownSpecialization);
       if (TNK == TNK_Dependent_template_name || TNK == TNK_Type_template) {
         // We have an id-expression that refers to a class template or
         // (C++0x) alias template. 
@@ -1352,7 +1371,7 @@ void Parser::ParseLateTemplatedFuncDef(LateParsedTemplate &LPT) {
   // Append the current token at the end of the new token stream so that it
   // doesn't get lost.
   LPT.Toks.push_back(Tok);
-  PP.EnterTokenStream(LPT.Toks.data(), LPT.Toks.size(), true, false);
+  PP.EnterTokenStream(LPT.Toks, true);
 
   // Consume the previously pushed token.
   ConsumeAnyToken(/*ConsumeCodeCompletionTok=*/true);
diff --git a/contrib/llvm/tools/clang/lib/Parse/ParseTentative.cpp b/contrib/llvm/tools/clang/lib/Parse/ParseTentative.cpp
index 6fbcfd9bd217..7703c33b8780 100644
--- a/contrib/llvm/tools/clang/lib/Parse/ParseTentative.cpp
+++ b/contrib/llvm/tools/clang/lib/Parse/ParseTentative.cpp
@@ -125,10 +125,11 @@ bool Parser::isCXXSimpleDeclaration(bool AllowForRangeDecl) {
   // Ok, we have a simple-type-specifier/typename-specifier followed by a '(',
   // or an identifier which doesn't resolve as anything. We need tentative
   // parsing...
-
-  TentativeParsingAction PA(*this);
-  TPR = TryParseSimpleDeclaration(AllowForRangeDecl);
-  PA.Revert();
+ 
+  {
+    RevertingTentativeParsingAction PA(*this);
+    TPR = TryParseSimpleDeclaration(AllowForRangeDecl);
+  }
 
   // In case of an error, let the declaration parsing code handle it.
   if (TPR == TPResult::Error)
@@ -329,10 +330,70 @@ Parser::TPResult Parser::TryParseInitDeclaratorList() {
   return TPResult::Ambiguous;
 }
 
-/// isCXXConditionDeclaration - Disambiguates between a declaration or an
-/// expression for a condition of a if/switch/while/for statement.
-/// If during the disambiguation process a parsing error is encountered,
-/// the function returns true to let the declaration parsing code handle it.
+struct Parser::ConditionDeclarationOrInitStatementState {
+  Parser &P;
+  bool CanBeExpression = true;
+  bool CanBeCondition = true;
+  bool CanBeInitStatement;
+
+  ConditionDeclarationOrInitStatementState(Parser &P, bool CanBeInitStatement)
+      : P(P), CanBeInitStatement(CanBeInitStatement) {}
+
+  void markNotExpression() {
+    CanBeExpression = false;
+
+    if (CanBeCondition && CanBeInitStatement) {
+      // FIXME: Unify the parsing codepaths for condition variables and
+      // simple-declarations so that we don't need to eagerly figure out which
+      // kind we have here. (Just parse init-declarators until we reach a
+      // semicolon or right paren.)
+      RevertingTentativeParsingAction PA(P);
+      P.SkipUntil(tok::r_paren, tok::semi, StopBeforeMatch);
+      if (P.Tok.isNot(tok::r_paren))
+        CanBeCondition = false;
+      if (P.Tok.isNot(tok::semi))
+        CanBeInitStatement = false;
+    }
+  }
+
+  bool markNotCondition() {
+    CanBeCondition = false;
+    return !CanBeInitStatement || !CanBeExpression;
+  }
+
+  bool update(TPResult IsDecl) {
+    switch (IsDecl) {
+    case TPResult::True:
+      markNotExpression();
+      return true;
+    case TPResult::False:
+      CanBeCondition = CanBeInitStatement = false;
+      return true;
+    case TPResult::Ambiguous:
+      return false;
+    case TPResult::Error:
+      CanBeExpression = CanBeCondition = CanBeInitStatement = false;
+      return true;
+    }
+    llvm_unreachable("unknown tentative parse result");
+  }
+
+  ConditionOrInitStatement result() const {
+    assert(CanBeExpression + CanBeCondition + CanBeInitStatement < 2 &&
+           "result called but not yet resolved");
+    if (CanBeExpression)
+      return ConditionOrInitStatement::Expression;
+    if (CanBeCondition)
+      return ConditionOrInitStatement::ConditionDecl;
+    if (CanBeInitStatement)
+      return ConditionOrInitStatement::InitStmtDecl;
+    return ConditionOrInitStatement::Error;
+  }
+};
+
+/// \brief Disambiguates between a declaration in a condition, a
+/// simple-declaration in an init-statement, and an expression for
+/// a condition of a if/switch statement.
 ///
 ///       condition:
 ///         expression
@@ -341,47 +402,64 @@ Parser::TPResult Parser::TryParseInitDeclaratorList() {
 /// [C++11] type-specifier-seq declarator braced-init-list
 /// [GNU]   type-specifier-seq declarator simple-asm-expr[opt] attributes[opt]
 ///             '=' assignment-expression
+///       simple-declaration:
+///         decl-specifier-seq init-declarator-list[opt] ';'
 ///
-bool Parser::isCXXConditionDeclaration() {
-  TPResult TPR = isCXXDeclarationSpecifier();
-  if (TPR != TPResult::Ambiguous)
-    return TPR != TPResult::False; // Returns true for TPResult::True or
-                                   // TPResult::Error.
-
-  // FIXME: Add statistics about the number of ambiguous statements encountered
-  // and how they were resolved (number of declarations+number of expressions).
+/// Note that, unlike isCXXSimpleDeclaration, we must disambiguate all the way
+/// to the ';' to disambiguate cases like 'int(x))' (an expression) from
+/// 'int(x);' (a simple-declaration in an init-statement).
+Parser::ConditionOrInitStatement
+Parser::isCXXConditionDeclarationOrInitStatement(bool CanBeInitStatement) {
+  ConditionDeclarationOrInitStatementState State(*this, CanBeInitStatement);
 
-  // Ok, we have a simple-type-specifier/typename-specifier followed by a '('.
-  // We need tentative parsing...
+  if (State.update(isCXXDeclarationSpecifier()))
+    return State.result();
 
-  TentativeParsingAction PA(*this);
+  // It might be a declaration; we need tentative parsing.
+  RevertingTentativeParsingAction PA(*this);
 
-  // type-specifier-seq
-  TryConsumeDeclarationSpecifier();
+  // FIXME: A tag definition unambiguously tells us this is an init-statement.
+  if (State.update(TryConsumeDeclarationSpecifier()))
+    return State.result();
   assert(Tok.is(tok::l_paren) && "Expected '('");
 
-  // declarator
-  TPR = TryParseDeclarator(false/*mayBeAbstract*/);
+  while (true) {
+    // Consume a declarator.
+    if (State.update(TryParseDeclarator(false/*mayBeAbstract*/)))
+      return State.result();
+
+    // Attributes, asm label, or an initializer imply this is not an expression.
+    // FIXME: Disambiguate properly after an = instead of assuming that it's a
+    // valid declaration.
+    if (Tok.isOneOf(tok::equal, tok::kw_asm, tok::kw___attribute) ||
+        (getLangOpts().CPlusPlus11 && Tok.is(tok::l_brace))) {
+      State.markNotExpression();
+      return State.result();
+    }
 
-  // In case of an error, let the declaration parsing code handle it.
-  if (TPR == TPResult::Error)
-    TPR = TPResult::True;
+    // At this point, it can't be a condition any more, because a condition
+    // must have a brace-or-equal-initializer.
+    if (State.markNotCondition())
+      return State.result();
 
-  if (TPR == TPResult::Ambiguous) {
-    // '='
-    // [GNU] simple-asm-expr[opt] attributes[opt]
-    if (Tok.isOneOf(tok::equal, tok::kw_asm, tok::kw___attribute))
-      TPR = TPResult::True;
-    else if (getLangOpts().CPlusPlus11 && Tok.is(tok::l_brace))
-      TPR = TPResult::True;
-    else
-      TPR = TPResult::False;
-  }
+    // A parenthesized initializer could be part of an expression or a
+    // simple-declaration.
+    if (Tok.is(tok::l_paren)) {
+      ConsumeParen();
+      SkipUntil(tok::r_paren, StopAtSemi);
+    }
 
-  PA.Revert();
+    if (!TryConsumeToken(tok::comma))
+      break;
+  }
 
-  assert(TPR == TPResult::True || TPR == TPResult::False);
-  return TPR == TPResult::True;
+  // We reached the end. If it can now be some kind of decl, then it is.
+  if (State.CanBeCondition && Tok.is(tok::r_paren))
+    return ConditionOrInitStatement::ConditionDecl;
+  else if (State.CanBeInitStatement && Tok.is(tok::semi))
+    return ConditionOrInitStatement::InitStmtDecl;
+  else
+    return ConditionOrInitStatement::Expression;
 }
 
   /// \brief Determine whether the next set of tokens contains a type-id.
@@ -423,7 +501,7 @@ bool Parser::isCXXTypeId(TentativeCXXTypeIdContext Context, bool &isAmbiguous) {
   // Ok, we have a simple-type-specifier/typename-specifier followed by a '('.
   // We need tentative parsing...
 
-  TentativeParsingAction PA(*this);
+  RevertingTentativeParsingAction PA(*this);
 
   // type-specifier-seq
   TryConsumeDeclarationSpecifier();
@@ -456,8 +534,6 @@ bool Parser::isCXXTypeId(TentativeCXXTypeIdContext Context, bool &isAmbiguous) {
       TPR = TPResult::False;
   }
 
-  PA.Revert();
-
   assert(TPR == TPResult::True || TPR == TPResult::False);
   return TPR == TPResult::True;
 }
@@ -508,7 +584,7 @@ Parser::isCXX11AttributeSpecifier(bool Disambiguate,
   if (!Disambiguate && !getLangOpts().ObjC1)
     return CAK_AttributeSpecifier;
 
-  TentativeParsingAction PA(*this);
+  RevertingTentativeParsingAction PA(*this);
 
   // Opening brackets were checked for above.
   ConsumeBracket();
@@ -520,8 +596,6 @@ Parser::isCXX11AttributeSpecifier(bool Disambiguate,
     bool IsAttribute = SkipUntil(tok::r_square);
     IsAttribute &= Tok.is(tok::r_square);
 
-    PA.Revert();
-
     return IsAttribute ? CAK_AttributeSpecifier : CAK_InvalidAttributeSpecifier;
   }
 
@@ -542,8 +616,6 @@ Parser::isCXX11AttributeSpecifier(bool Disambiguate,
     // A lambda cannot end with ']]', and an attribute must.
     bool IsAttribute = Tok.is(tok::r_square);
 
-    PA.Revert();
-
     if (IsAttribute)
       // Case 1: C++11 attribute.
       return CAK_AttributeSpecifier;
@@ -564,7 +636,6 @@ Parser::isCXX11AttributeSpecifier(bool Disambiguate,
   while (Tok.isNot(tok::r_square)) {
     if (Tok.is(tok::comma)) {
       // Case 1: Stray commas can only occur in attributes.
-      PA.Revert();
       return CAK_AttributeSpecifier;
     }
 
@@ -611,8 +682,6 @@ Parser::isCXX11AttributeSpecifier(bool Disambiguate,
     }
   }
 
-  PA.Revert();
-
   if (IsAttribute)
     // Case 1: C++11 statement attribute.
     return CAK_AttributeSpecifier;
@@ -833,7 +902,7 @@ Parser::TPResult Parser::TryParseDeclarator(bool mayBeAbstract,
       // '(' abstract-declarator ')'
       if (Tok.isOneOf(tok::kw___attribute, tok::kw___declspec, tok::kw___cdecl,
                       tok::kw___stdcall, tok::kw___fastcall, tok::kw___thiscall,
-                      tok::kw___vectorcall, tok::kw___unaligned))
+                      tok::kw___vectorcall))
         return TPResult::True; // attributes indicate declaration
       TPResult TPR = TryParseDeclarator(mayBeAbstract, mayHaveIdentifier);
       if (TPR != TPResult::Ambiguous)
@@ -946,6 +1015,7 @@ Parser::isExpressionOrTypeSpecifierSimple(tok::TokenKind Kind) {
   case tok::kw_char:
   case tok::kw_const:
   case tok::kw_double:
+  case tok::kw___float128:
   case tok::kw_enum:
   case tok::kw_half:
   case tok::kw_float:
@@ -987,6 +1057,8 @@ Parser::isExpressionOrTypeSpecifierSimple(tok::TokenKind Kind) {
   case tok::kw___pixel:
   case tok::kw___bool:
   case tok::kw__Atomic:
+#define GENERIC_IMAGE_TYPE(ImgType, Id) case tok::kw_##ImgType##_t:
+#include "clang/Basic/OpenCLImageTypes.def"
   case tok::kw___unknown_anytype:
     return TPResult::False;
 
@@ -1317,7 +1389,7 @@ Parser::isCXXDeclarationSpecifier(Parser::TPResult BracedCastResult,
                                                      Tok.getAnnotationRange(),
                                                      SS);
         if (SS.getScopeRep() && SS.getScopeRep()->isDependent()) {
-          TentativeParsingAction PA(*this);
+          RevertingTentativeParsingAction PA(*this);
           ConsumeToken();
           ConsumeToken();
           bool isIdentifier = Tok.is(tok::identifier);
@@ -1325,7 +1397,6 @@ Parser::isCXXDeclarationSpecifier(Parser::TPResult BracedCastResult,
           if (!isIdentifier)
             TPR = isCXXDeclarationSpecifier(BracedCastResult,
                                             HasMissingTypename);
-          PA.Revert();
 
           if (isIdentifier ||
               TPR == TPResult::True || TPR == TPResult::Error)
@@ -1337,6 +1408,8 @@ Parser::isCXXDeclarationSpecifier(Parser::TPResult BracedCastResult,
             *HasMissingTypename = true;
             return TPResult::Ambiguous;
           }
+
+          // FIXME: Fails to either revert or commit the tentative parse!
         } else {
           // Try to resolve the name. If it doesn't exist, assume it was
           // intended to name a type and keep disambiguating.
@@ -1388,15 +1461,13 @@ Parser::isCXXDeclarationSpecifier(Parser::TPResult BracedCastResult,
     // In Objective-C, we might have a protocol-qualified type.
     if (getLangOpts().ObjC1 && NextToken().is(tok::less)) {
       // Tentatively parse the protocol qualifiers.
-      TentativeParsingAction PA(*this);
+      RevertingTentativeParsingAction PA(*this);
       ConsumeToken(); // The type token
       
       TPResult TPR = TryParseProtocolQualifiers();
       bool isFollowedByParen = Tok.is(tok::l_paren);
       bool isFollowedByBrace = Tok.is(tok::l_brace);
       
-      PA.Revert();
-      
       if (TPR == TPResult::Error)
         return TPResult::Error;
       
@@ -1424,6 +1495,7 @@ Parser::isCXXDeclarationSpecifier(Parser::TPResult BracedCastResult,
   case tok::kw_half:
   case tok::kw_float:
   case tok::kw_double:
+  case tok::kw___float128:
   case tok::kw_void:
   case tok::annot_decltype:
     if (NextToken().is(tok::l_paren))
@@ -1448,14 +1520,12 @@ Parser::isCXXDeclarationSpecifier(Parser::TPResult BracedCastResult,
     if (NextToken().isNot(tok::l_paren))
       return TPResult::True;
 
-    TentativeParsingAction PA(*this);
+    RevertingTentativeParsingAction PA(*this);
 
     TPResult TPR = TryParseTypeofSpecifier();
     bool isFollowedByParen = Tok.is(tok::l_paren);
     bool isFollowedByBrace = Tok.is(tok::l_brace);
 
-    PA.Revert();
-
     if (TPR == TPResult::Error)
       return TPResult::Error;
 
@@ -1515,6 +1585,7 @@ bool Parser::isCXXDeclarationSpecifierAType() {
   case tok::kw_half:
   case tok::kw_float:
   case tok::kw_double:
+  case tok::kw___float128:
   case tok::kw_void:
   case tok::kw___unknown_anytype:
   case tok::kw___auto_type:
@@ -1594,7 +1665,7 @@ bool Parser::isCXXFunctionDeclarator(bool *IsAmbiguous) {
   // ambiguities mentioned in 6.8, the resolution is to consider any construct
   // that could possibly be a declaration a declaration.
 
-  TentativeParsingAction PA(*this);
+  RevertingTentativeParsingAction PA(*this);
 
   ConsumeParen();
   bool InvalidAsDeclaration = false;
@@ -1618,8 +1689,6 @@ bool Parser::isCXXFunctionDeclarator(bool *IsAmbiguous) {
     }
   }
 
-  PA.Revert();
-
   if (IsAmbiguous && TPR == TPResult::Ambiguous)
     *IsAmbiguous = true;
 
diff --git a/contrib/llvm/tools/clang/lib/Parse/Parser.cpp b/contrib/llvm/tools/clang/lib/Parse/Parser.cpp
index ccefb3dd3f5d..f968f995d53f 100644
--- a/contrib/llvm/tools/clang/lib/Parse/Parser.cpp
+++ b/contrib/llvm/tools/clang/lib/Parse/Parser.cpp
@@ -491,6 +491,8 @@ void Parser::Initialize() {
   Ident_deprecated = nullptr;
   Ident_obsoleted = nullptr;
   Ident_unavailable = nullptr;
+  Ident_strict = nullptr;
+  Ident_replacement = nullptr;
 
   Ident__except = nullptr;
 
@@ -545,7 +547,7 @@ bool Parser::ParseTopLevelDecl(DeclGroupPtrTy &Result) {
   if (PP.isIncrementalProcessingEnabled() && Tok.is(tok::eof))
     ConsumeToken();
 
-  Result = DeclGroupPtrTy();
+  Result = nullptr;
   switch (Tok.getKind()) {
   case tok::annot_pragma_unused:
     HandlePragmaUnused();
@@ -625,52 +627,54 @@ Parser::ParseExternalDeclaration(ParsedAttributesWithRange &attrs,
 
   if (PP.isCodeCompletionReached()) {
     cutOffParsing();
-    return DeclGroupPtrTy();
+    return nullptr;
   }
 
   Decl *SingleDecl = nullptr;
   switch (Tok.getKind()) {
   case tok::annot_pragma_vis:
     HandlePragmaVisibility();
-    return DeclGroupPtrTy();
+    return nullptr;
   case tok::annot_pragma_pack:
     HandlePragmaPack();
-    return DeclGroupPtrTy();
+    return nullptr;
   case tok::annot_pragma_msstruct:
     HandlePragmaMSStruct();
-    return DeclGroupPtrTy();
+    return nullptr;
   case tok::annot_pragma_align:
     HandlePragmaAlign();
-    return DeclGroupPtrTy();
+    return nullptr;
   case tok::annot_pragma_weak:
     HandlePragmaWeak();
-    return DeclGroupPtrTy();
+    return nullptr;
   case tok::annot_pragma_weakalias:
     HandlePragmaWeakAlias();
-    return DeclGroupPtrTy();
+    return nullptr;
   case tok::annot_pragma_redefine_extname:
     HandlePragmaRedefineExtname();
-    return DeclGroupPtrTy();
+    return nullptr;
   case tok::annot_pragma_fp_contract:
     HandlePragmaFPContract();
-    return DeclGroupPtrTy();
+    return nullptr;
   case tok::annot_pragma_opencl_extension:
     HandlePragmaOpenCLExtension();
-    return DeclGroupPtrTy();
-  case tok::annot_pragma_openmp:
-    return ParseOpenMPDeclarativeDirective();
+    return nullptr;
+  case tok::annot_pragma_openmp: {
+    AccessSpecifier AS = AS_none;
+    return ParseOpenMPDeclarativeDirectiveWithExtDecl(AS, attrs);
+  }
   case tok::annot_pragma_ms_pointers_to_members:
     HandlePragmaMSPointersToMembers();
-    return DeclGroupPtrTy();
+    return nullptr;
   case tok::annot_pragma_ms_vtordisp:
     HandlePragmaMSVtorDisp();
-    return DeclGroupPtrTy();
+    return nullptr;
   case tok::annot_pragma_ms_pragma:
     HandlePragmaMSPragma();
-    return DeclGroupPtrTy();
+    return nullptr;
   case tok::annot_pragma_dump:
     HandlePragmaDump();
-    return DeclGroupPtrTy();
+    return nullptr;
   case tok::semi:
     // Either a C++11 empty-declaration or attribute-declaration.
     SingleDecl = Actions.ActOnEmptyDeclaration(getCurScope(),
@@ -681,10 +685,10 @@ Parser::ParseExternalDeclaration(ParsedAttributesWithRange &attrs,
   case tok::r_brace:
     Diag(Tok, diag::err_extraneous_closing_brace);
     ConsumeBrace();
-    return DeclGroupPtrTy();
+    return nullptr;
   case tok::eof:
     Diag(Tok, diag::err_expected_external_declaration);
-    return DeclGroupPtrTy();
+    return nullptr;
   case tok::kw___extension__: {
     // __extension__ silences extension warnings in the subexpression.
     ExtensionRAIIObject O(Diags);  // Use RAII to do this.
@@ -712,7 +716,7 @@ Parser::ParseExternalDeclaration(ParsedAttributesWithRange &attrs,
                      "top-level asm block");
 
     if (Result.isInvalid())
-      return DeclGroupPtrTy();
+      return nullptr;
     SingleDecl = Actions.ActOnFileScopeAsmDecl(Result.get(), StartLoc, EndLoc);
     break;
   }
@@ -723,7 +727,7 @@ Parser::ParseExternalDeclaration(ParsedAttributesWithRange &attrs,
     if (!getLangOpts().ObjC1) {
       Diag(Tok, diag::err_expected_external_declaration);
       ConsumeToken();
-      return DeclGroupPtrTy();
+      return nullptr;
     }
     SingleDecl = ParseObjCMethodDefinition();
     break;
@@ -732,7 +736,7 @@ Parser::ParseExternalDeclaration(ParsedAttributesWithRange &attrs,
                              CurParsedObjCImpl? Sema::PCC_ObjCImplementation
                                               : Sema::PCC_Namespace);
     cutOffParsing();
-    return DeclGroupPtrTy();
+    return nullptr;
   case tok::kw_using:
   case tok::kw_namespace:
   case tok::kw_typedef:
@@ -796,8 +800,8 @@ Parser::ParseExternalDeclaration(ParsedAttributesWithRange &attrs,
   case tok::kw___if_exists:
   case tok::kw___if_not_exists:
     ParseMicrosoftIfExistsExternalDeclaration();
-    return DeclGroupPtrTy();
-      
+    return nullptr;
+
   default:
   dont_know:
     // We can't tell whether this is a function-definition or declaration yet.
@@ -876,15 +880,21 @@ Parser::ParseDeclOrFunctionDefInternal(ParsedAttributesWithRange &attrs,
   // may get this far before the problem becomes obvious.
   if (DS.hasTagDefinition() &&
       DiagnoseMissingSemiAfterTagDefinition(DS, AS, DSC_top_level))
-    return DeclGroupPtrTy();
+    return nullptr;
 
   // C99 6.7.2.3p6: Handle "struct-or-union identifier;", "enum { X };"
   // declaration-specifiers init-declarator-list[opt] ';'
   if (Tok.is(tok::semi)) {
     ProhibitAttributes(attrs);
     ConsumeToken();
-    Decl *TheDecl = Actions.ParsedFreeStandingDeclSpec(getCurScope(), AS, DS);
+    RecordDecl *AnonRecord = nullptr;
+    Decl *TheDecl = Actions.ParsedFreeStandingDeclSpec(getCurScope(), AS_none,
+                                                       DS, AnonRecord);
     DS.complete(TheDecl);
+    if (AnonRecord) {
+      Decl* decls[] = {AnonRecord, TheDecl};
+      return Actions.BuildDeclaratorGroup(decls, /*TypeMayContainAuto=*/false);
+    }
     return Actions.ConvertDeclToDeclGroup(TheDecl);
   }
 
@@ -899,7 +909,7 @@ Parser::ParseDeclOrFunctionDefInternal(ParsedAttributesWithRange &attrs,
         !Tok.isObjCAtKeyword(tok::objc_protocol)) {
       Diag(Tok, diag::err_objc_unexpected_attr);
       SkipUntil(tok::semi); // FIXME: better skip?
-      return DeclGroupPtrTy();
+      return nullptr;
     }
 
     DS.abort();
@@ -1034,6 +1044,12 @@ Decl *Parser::ParseFunctionDefinition(ParsingDeclarator &D,
     D.complete(DP);
     D.getMutableDeclSpec().abort();
 
+    if (SkipFunctionBodies && (!DP || Actions.canSkipFunctionBody(DP)) &&
+        trySkippingFunctionBody()) {
+      BodyScope.Exit();
+      return Actions.ActOnSkippedFunctionBody(DP);
+    }
+
     CachedTokens Toks;
     LexTemplateFunctionForLateParsing(Toks);
 
@@ -1126,6 +1142,13 @@ Decl *Parser::ParseFunctionDefinition(ParsingDeclarator &D,
     return Res;
   }
 
+  if (SkipFunctionBodies && (!Res || Actions.canSkipFunctionBody(Res)) &&
+      trySkippingFunctionBody()) {
+    BodyScope.Exit();
+    Actions.ActOnSkippedFunctionBody(Res);
+    return Actions.ActOnFinishFunctionBody(Res, nullptr, false);
+  }
+
   if (Tok.is(tok::kw_try))
     return ParseFunctionTryBlock(Res, BodyScope);
 
@@ -1400,7 +1423,7 @@ Parser::TryAnnotateName(bool IsAddressOfOperand,
 
   CXXScopeSpec SS;
   if (getLangOpts().CPlusPlus &&
-      ParseOptionalCXXScopeSpecifier(SS, ParsedType(), EnteringContext))
+      ParseOptionalCXXScopeSpecifier(SS, nullptr, EnteringContext))
     return ANK_Error;
 
   if (Tok.isNot(tok::identifier) || SS.isInvalid()) {
@@ -1588,9 +1611,9 @@ bool Parser::TryAnnotateTypeOrScopeToken(bool EnteringContext, bool NeedType) {
     //            simple-template-id
     SourceLocation TypenameLoc = ConsumeToken();
     CXXScopeSpec SS;
-    if (ParseOptionalCXXScopeSpecifier(SS, /*ObjectType=*/ParsedType(), 
-                                       /*EnteringContext=*/false,
-                                       nullptr, /*IsTypename*/ true))
+    if (ParseOptionalCXXScopeSpecifier(SS, /*ObjectType=*/nullptr,
+                                       /*EnteringContext=*/false, nullptr,
+                                       /*IsTypename*/ true))
       return true;
     if (!SS.isSet()) {
       if (Tok.is(tok::identifier) || Tok.is(tok::annot_template_id) ||
@@ -1646,7 +1669,7 @@ bool Parser::TryAnnotateTypeOrScopeToken(bool EnteringContext, bool NeedType) {
 
     SourceLocation EndLoc = Tok.getLastLoc();
     Tok.setKind(tok::annot_typename);
-    setTypeAnnotation(Tok, Ty.isInvalid() ? ParsedType() : Ty.get());
+    setTypeAnnotation(Tok, Ty.isInvalid() ? nullptr : Ty.get());
     Tok.setAnnotationEndLoc(EndLoc);
     Tok.setLocation(TypenameLoc);
     PP.AnnotateCachedTokens(Tok);
@@ -1658,7 +1681,7 @@ bool Parser::TryAnnotateTypeOrScopeToken(bool EnteringContext, bool NeedType) {
 
   CXXScopeSpec SS;
   if (getLangOpts().CPlusPlus)
-    if (ParseOptionalCXXScopeSpecifier(SS, ParsedType(), EnteringContext))
+    if (ParseOptionalCXXScopeSpecifier(SS, nullptr, EnteringContext))
       return true;
 
   return TryAnnotateTypeOrScopeTokenAfterScopeSpec(EnteringContext, NeedType,
@@ -1675,15 +1698,12 @@ bool Parser::TryAnnotateTypeOrScopeTokenAfterScopeSpec(bool EnteringContext,
   if (Tok.is(tok::identifier)) {
     IdentifierInfo *CorrectedII = nullptr;
     // Determine whether the identifier is a type name.
-    if (ParsedType Ty = Actions.getTypeName(*Tok.getIdentifierInfo(),
-                                            Tok.getLocation(), getCurScope(),
-                                            &SS, false, 
-                                            NextToken().is(tok::period),
-                                            ParsedType(),
-                                            /*IsCtorOrDtorName=*/false,
-                                            /*NonTrivialTypeSourceInfo*/ true,
-                                            NeedType ? &CorrectedII
-                                                     : nullptr)) {
+    if (ParsedType Ty = Actions.getTypeName(
+            *Tok.getIdentifierInfo(), Tok.getLocation(), getCurScope(), &SS,
+            false, NextToken().is(tok::period), nullptr,
+            /*IsCtorOrDtorName=*/false,
+            /*NonTrivialTypeSourceInfo*/ true,
+            NeedType ? &CorrectedII : nullptr)) {
       // A FixIt was applied as a result of typo correction
       if (CorrectedII)
         Tok.setIdentifierInfo(CorrectedII);
@@ -1734,12 +1754,11 @@ bool Parser::TryAnnotateTypeOrScopeTokenAfterScopeSpec(bool EnteringContext,
       UnqualifiedId TemplateName;
       TemplateName.setIdentifier(Tok.getIdentifierInfo(), Tok.getLocation());
       bool MemberOfUnknownSpecialization;
-      if (TemplateNameKind TNK
-          = Actions.isTemplateName(getCurScope(), SS,
-                                   /*hasTemplateKeyword=*/false, TemplateName,
-                                   /*ObjectType=*/ ParsedType(),
-                                   EnteringContext,
-                                   Template, MemberOfUnknownSpecialization)) {
+      if (TemplateNameKind TNK =
+              Actions.isTemplateName(getCurScope(), SS,
+                                     /*hasTemplateKeyword=*/false, TemplateName,
+                                     /*ObjectType=*/nullptr, EnteringContext,
+                                     Template, MemberOfUnknownSpecialization)) {
         // Consume the identifier.
         ConsumeToken();
         if (AnnotateTemplateIdToken(Template, TNK, SS, SourceLocation(),
@@ -1793,7 +1812,7 @@ bool Parser::TryAnnotateCXXScopeToken(bool EnteringContext) {
          "Cannot be a type or scope token!");
 
   CXXScopeSpec SS;
-  if (ParseOptionalCXXScopeSpecifier(SS, ParsedType(), EnteringContext))
+  if (ParseOptionalCXXScopeSpecifier(SS, nullptr, EnteringContext))
     return true;
   if (SS.isEmpty())
     return false;
@@ -1897,7 +1916,7 @@ bool Parser::ParseMicrosoftIfExistsCondition(IfExistsCondition& Result) {
   
   // Parse nested-name-specifier.
   if (getLangOpts().CPlusPlus)
-    ParseOptionalCXXScopeSpecifier(Result.SS, ParsedType(),
+    ParseOptionalCXXScopeSpecifier(Result.SS, nullptr,
                                    /*EnteringContext=*/false);
 
   // Check nested-name specifier.
@@ -1908,8 +1927,8 @@ bool Parser::ParseMicrosoftIfExistsCondition(IfExistsCondition& Result) {
 
   // Parse the unqualified-id.
   SourceLocation TemplateKWLoc; // FIXME: parsed, but unused.
-  if (ParseUnqualifiedId(Result.SS, false, true, true, ParsedType(),
-                         TemplateKWLoc, Result.Name)) {
+  if (ParseUnqualifiedId(Result.SS, false, true, true, nullptr, TemplateKWLoc,
+                         Result.Name)) {
     T.skipToEnd();
     return true;
   }
@@ -1990,12 +2009,12 @@ Parser::DeclGroupPtrTy Parser::ParseModuleImport(SourceLocation AtLoc) {
       if (Tok.is(tok::code_completion)) {
         Actions.CodeCompleteModuleImport(ImportLoc, Path);
         cutOffParsing();
-        return DeclGroupPtrTy();
+        return nullptr;
       }
       
       Diag(Tok, diag::err_module_expected_ident);
       SkipUntil(tok::semi);
-      return DeclGroupPtrTy();
+      return nullptr;
     }
     
     // Record this part of the module path.
@@ -2013,14 +2032,14 @@ Parser::DeclGroupPtrTy Parser::ParseModuleImport(SourceLocation AtLoc) {
   if (PP.hadModuleLoaderFatalFailure()) {
     // With a fatal failure in the module loader, we abort parsing.
     cutOffParsing();
-    return DeclGroupPtrTy();
+    return nullptr;
   }
 
   DeclResult Import = Actions.ActOnModuleImport(AtLoc, ImportLoc, Path);
   ExpectAndConsumeSemi(diag::err_module_expected_semi);
   if (Import.isInvalid())
-    return DeclGroupPtrTy();
-  
+    return nullptr;
+
   return Actions.ConvertDeclToDeclGroup(Import.get());
 }
 
diff --git a/contrib/llvm/tools/clang/lib/Rewrite/HTMLRewrite.cpp b/contrib/llvm/tools/clang/lib/Rewrite/HTMLRewrite.cpp
index 275fbd0ebca2..2d82d8fd4bd1 100644
--- a/contrib/llvm/tools/clang/lib/Rewrite/HTMLRewrite.cpp
+++ b/contrib/llvm/tools/clang/lib/Rewrite/HTMLRewrite.cpp
@@ -502,7 +502,7 @@ void html::HighlightMacros(Rewriter &R, FileID FID, const Preprocessor& PP) {
 
   // Enter the tokens we just lexed.  This will cause them to be macro expanded
   // but won't enter sub-files (because we removed #'s).
-  TmpPP.EnterTokenStream(&TokenStream[0], TokenStream.size(), false, false);
+  TmpPP.EnterTokenStream(TokenStream, false);
 
   TokenConcatenation ConcatInfo(TmpPP);
 
diff --git a/contrib/llvm/tools/clang/lib/Rewrite/RewriteRope.cpp b/contrib/llvm/tools/clang/lib/Rewrite/RewriteRope.cpp
index 451ad07e4e84..030ab7732fc3 100644
--- a/contrib/llvm/tools/clang/lib/Rewrite/RewriteRope.cpp
+++ b/contrib/llvm/tools/clang/lib/Rewrite/RewriteRope.cpp
@@ -350,8 +350,10 @@ void RopePieceBTreeLeaf::erase(unsigned Offset, unsigned NumBytes) {
     PieceOffs += getPiece(i).size();
 
   // If we exactly include the last one, include it in the region to delete.
-  if (Offset+NumBytes == PieceOffs+getPiece(i).size())
-    PieceOffs += getPiece(i).size(), ++i;
+  if (Offset+NumBytes == PieceOffs+getPiece(i).size()) {
+    PieceOffs += getPiece(i).size();
+    ++i;
+  }
 
   // If we completely cover some RopePieces, erase them now.
   if (i != StartPiece) {
diff --git a/contrib/llvm/tools/clang/lib/Sema/AnalysisBasedWarnings.cpp b/contrib/llvm/tools/clang/lib/Sema/AnalysisBasedWarnings.cpp
index 5f74343fbd95..67762bde3439 100644
--- a/contrib/llvm/tools/clang/lib/Sema/AnalysisBasedWarnings.cpp
+++ b/contrib/llvm/tools/clang/lib/Sema/AnalysisBasedWarnings.cpp
@@ -889,7 +889,7 @@ static bool DiagnoseUninitializedUse(Sema &S, const VarDecl *VD,
   // the initializer of that declaration & we didn't already suggest
   // an initialization fixit.
   if (!SuggestInitializationFixit(S, VD))
-    S.Diag(VD->getLocStart(), diag::note_uninit_var_def)
+    S.Diag(VD->getLocStart(), diag::note_var_declared_here)
       << VD->getDeclName();
 
   return true;
@@ -1071,6 +1071,34 @@ namespace {
   };
 } // anonymous namespace
 
+static StringRef getFallthroughAttrSpelling(Preprocessor &PP,
+                                            SourceLocation Loc) {
+  TokenValue FallthroughTokens[] = {
+    tok::l_square, tok::l_square,
+    PP.getIdentifierInfo("fallthrough"),
+    tok::r_square, tok::r_square
+  };
+
+  TokenValue ClangFallthroughTokens[] = {
+    tok::l_square, tok::l_square, PP.getIdentifierInfo("clang"),
+    tok::coloncolon, PP.getIdentifierInfo("fallthrough"),
+    tok::r_square, tok::r_square
+  };
+
+  bool PreferClangAttr = !PP.getLangOpts().CPlusPlus1z;
+
+  StringRef MacroName;
+  if (PreferClangAttr)
+    MacroName = PP.getLastMacroWithSpelling(Loc, ClangFallthroughTokens);
+  if (MacroName.empty())
+    MacroName = PP.getLastMacroWithSpelling(Loc, FallthroughTokens);
+  if (MacroName.empty() && !PreferClangAttr)
+    MacroName = PP.getLastMacroWithSpelling(Loc, ClangFallthroughTokens);
+  if (MacroName.empty())
+    MacroName = PreferClangAttr ? "[[clang::fallthrough]]" : "[[fallthrough]]";
+  return MacroName;
+}
+
 static void DiagnoseSwitchLabelsFallthrough(Sema &S, AnalysisDeclContext &AC,
                                             bool PerFunction) {
   // Only perform this analysis when using C++11.  There is no good workflow
@@ -1129,15 +1157,7 @@ static void DiagnoseSwitchLabelsFallthrough(Sema &S, AnalysisDeclContext &AC,
         }
         if (!(B->empty() && Term && isa<BreakStmt>(Term))) {
           Preprocessor &PP = S.getPreprocessor();
-          TokenValue Tokens[] = {
-            tok::l_square, tok::l_square, PP.getIdentifierInfo("clang"),
-            tok::coloncolon, PP.getIdentifierInfo("fallthrough"),
-            tok::r_square, tok::r_square
-          };
-          StringRef AnnotationSpelling = "[[clang::fallthrough]]";
-          StringRef MacroName = PP.getLastMacroWithSpelling(L, Tokens);
-          if (!MacroName.empty())
-            AnnotationSpelling = MacroName;
+          StringRef AnnotationSpelling = getFallthroughAttrSpelling(PP, L);
           SmallString<64> TextToInsert(AnnotationSpelling);
           TextToInsert += "; ";
           S.Diag(L, diag::note_insert_fallthrough_fixit) <<
@@ -1151,7 +1171,7 @@ static void DiagnoseSwitchLabelsFallthrough(Sema &S, AnalysisDeclContext &AC,
   }
 
   for (const auto *F : FM.getFallthroughStmts())
-    S.Diag(F->getLocStart(), diag::warn_fallthrough_attr_invalid_placement);
+    S.Diag(F->getLocStart(), diag::err_fallthrough_attr_invalid_placement);
 }
 
 static bool isInLoop(const ASTContext &Ctx, const ParentMap &PM,
@@ -1302,21 +1322,27 @@ static void diagnoseRepeatedUseOfWeak(Sema &S,
       Ivar
     } ObjectKind;
 
-    const NamedDecl *D = Key.getProperty();
-    if (isa<VarDecl>(D))
+    const NamedDecl *KeyProp = Key.getProperty();
+    if (isa<VarDecl>(KeyProp))
       ObjectKind = Variable;
-    else if (isa<ObjCPropertyDecl>(D))
+    else if (isa<ObjCPropertyDecl>(KeyProp))
       ObjectKind = Property;
-    else if (isa<ObjCMethodDecl>(D))
+    else if (isa<ObjCMethodDecl>(KeyProp))
       ObjectKind = ImplicitProperty;
-    else if (isa<ObjCIvarDecl>(D))
+    else if (isa<ObjCIvarDecl>(KeyProp))
       ObjectKind = Ivar;
     else
       llvm_unreachable("Unexpected weak object kind!");
 
+    // Do not warn about IBOutlet weak property receivers being set to null
+    // since they are typically only used from the main thread.
+    if (const ObjCPropertyDecl *Prop = dyn_cast<ObjCPropertyDecl>(KeyProp))
+      if (Prop->hasAttr<IBOutletAttr>())
+        continue;
+
     // Show the first time the object was read.
     S.Diag(FirstRead->getLocStart(), DiagKind)
-      << int(ObjectKind) << D << int(FunctionKind)
+      << int(ObjectKind) << KeyProp << int(FunctionKind)
       << FirstRead->getSourceRange();
 
     // Print all the other accesses as notes.
@@ -1871,7 +1897,7 @@ AnalysisBasedWarnings::IssueWarnings(sema::AnalysisBasedWarnings::Policy P,
   if (cast<DeclContext>(D)->isDependentContext())
     return;
 
-  if (Diags.hasUncompilableErrorOccurred() || Diags.hasFatalErrorOccurred()) {
+  if (Diags.hasUncompilableErrorOccurred()) {
     // Flush out any possibly unreachable diagnostics.
     flushDiagnostics(S, fscope);
     return;
@@ -2038,7 +2064,8 @@ AnalysisBasedWarnings::IssueWarnings(sema::AnalysisBasedWarnings::Policy P,
       !Diags.isIgnored(diag::warn_unannotated_fallthrough, D->getLocStart());
   bool FallThroughDiagPerFunction = !Diags.isIgnored(
       diag::warn_unannotated_fallthrough_per_function, D->getLocStart());
-  if (FallThroughDiagFull || FallThroughDiagPerFunction) {
+  if (FallThroughDiagFull || FallThroughDiagPerFunction ||
+      fscope->HasFallthroughStmt) {
     DiagnoseSwitchLabelsFallthrough(S, AC, !FallThroughDiagFull);
   }
 
diff --git a/contrib/llvm/tools/clang/lib/Sema/AttributeList.cpp b/contrib/llvm/tools/clang/lib/Sema/AttributeList.cpp
index 3c61c95ad8ec..cae9393f9f3a 100644
--- a/contrib/llvm/tools/clang/lib/Sema/AttributeList.cpp
+++ b/contrib/llvm/tools/clang/lib/Sema/AttributeList.cpp
@@ -159,6 +159,7 @@ struct ParsedAttrInfo {
   unsigned HasCustomParsing : 1;
   unsigned IsTargetSpecific : 1;
   unsigned IsType : 1;
+  unsigned IsStmt : 1;
   unsigned IsKnownToGCC : 1;
 
   bool (*DiagAppertainsToDecl)(Sema &S, const AttributeList &Attr,
@@ -204,6 +205,10 @@ bool AttributeList::isTypeAttr() const {
   return getInfo(*this).IsType;
 }
 
+bool AttributeList::isStmtAttr() const {
+  return getInfo(*this).IsStmt;
+}
+
 bool AttributeList::existsInTarget(const TargetInfo &Target) const {
   return getInfo(*this).ExistsInTarget(Target);
 }
diff --git a/contrib/llvm/tools/clang/lib/Sema/CodeCompleteConsumer.cpp b/contrib/llvm/tools/clang/lib/Sema/CodeCompleteConsumer.cpp
index 18e9a5911641..9a4f0d921bf4 100644
--- a/contrib/llvm/tools/clang/lib/Sema/CodeCompleteConsumer.cpp
+++ b/contrib/llvm/tools/clang/lib/Sema/CodeCompleteConsumer.cpp
@@ -309,7 +309,7 @@ StringRef CodeCompletionTUInfo::getParentName(const DeclContext *DC) {
         if (!Interface) {
           // Assign an empty StringRef but with non-null data to distinguish
           // between empty because we didn't process the DeclContext yet.
-          CachedParentName = StringRef((const char *)~0U, 0);
+          CachedParentName = StringRef((const char *)(uintptr_t)~0U, 0);
           return StringRef();
         }
         
diff --git a/contrib/llvm/tools/clang/lib/Sema/DeclSpec.cpp b/contrib/llvm/tools/clang/lib/Sema/DeclSpec.cpp
index 6f6c4ca5848f..b9d2843b0558 100644
--- a/contrib/llvm/tools/clang/lib/Sema/DeclSpec.cpp
+++ b/contrib/llvm/tools/clang/lib/Sema/DeclSpec.cpp
@@ -15,10 +15,10 @@
 #include "clang/AST/ASTContext.h"
 #include "clang/AST/DeclCXX.h"
 #include "clang/AST/Expr.h"
+#include "clang/AST/LocInfoType.h"
 #include "clang/AST/TypeLoc.h"
 #include "clang/Basic/LangOptions.h"
 #include "clang/Basic/TargetInfo.h"
-#include "clang/Sema/LocInfoType.h"
 #include "clang/Sema/ParsedTemplate.h"
 #include "clang/Sema/Sema.h"
 #include "clang/Sema/SemaDiagnostic.h"
@@ -289,6 +289,7 @@ bool Declarator::isDeclarationOfFunction() const {
     case TST_decimal32:
     case TST_decimal64:
     case TST_double:
+    case TST_float128:
     case TST_enum:
     case TST_error:
     case TST_float:
@@ -302,6 +303,8 @@ bool Declarator::isDeclarationOfFunction() const {
     case TST_unspecified:
     case TST_void:
     case TST_wchar:
+#define GENERIC_IMAGE_TYPE(ImgType, Id) case TST_##ImgType##_t:
+#include "clang/Basic/OpenCLImageTypes.def"
       return false;
 
     case TST_decltype_auto:
@@ -455,6 +458,7 @@ const char *DeclSpec::getSpecifierName(DeclSpec::TST T,
   case DeclSpec::TST_half:        return "half";
   case DeclSpec::TST_float:       return "float";
   case DeclSpec::TST_double:      return "double";
+  case DeclSpec::TST_float128:    return "__float128";
   case DeclSpec::TST_bool:        return Policy.Bool ? "bool" : "_Bool";
   case DeclSpec::TST_decimal32:   return "_Decimal32";
   case DeclSpec::TST_decimal64:   return "_Decimal64";
@@ -474,6 +478,10 @@ const char *DeclSpec::getSpecifierName(DeclSpec::TST T,
   case DeclSpec::TST_underlyingType: return "__underlying_type";
   case DeclSpec::TST_unknown_anytype: return "__unknown_anytype";
   case DeclSpec::TST_atomic: return "_Atomic";
+#define GENERIC_IMAGE_TYPE(ImgType, Id) \
+  case DeclSpec::TST_##ImgType##_t: \
+    return #ImgType "_t";
+#include "clang/Basic/OpenCLImageTypes.def"
   case DeclSpec::TST_error:       return "(error)";
   }
   llvm_unreachable("Unknown typespec!");
@@ -486,6 +494,7 @@ const char *DeclSpec::getSpecifierName(TQ T) {
   case DeclSpec::TQ_restrict:    return "restrict";
   case DeclSpec::TQ_volatile:    return "volatile";
   case DeclSpec::TQ_atomic:      return "_Atomic";
+  case DeclSpec::TQ_unaligned:   return "__unaligned";
   }
   llvm_unreachable("Unknown typespec!");
 }
@@ -787,6 +796,7 @@ bool DeclSpec::SetTypeQual(TQ T, SourceLocation Loc, const char *&PrevSpec,
   case TQ_const:    TQ_constLoc = Loc; return false;
   case TQ_restrict: TQ_restrictLoc = Loc; return false;
   case TQ_volatile: TQ_volatileLoc = Loc; return false;
+  case TQ_unaligned: TQ_unalignedLoc = Loc; return false;
   case TQ_atomic:   TQ_atomicLoc = Loc; return false;
   }
 
@@ -953,10 +963,10 @@ void DeclSpec::Finish(Sema &S, const PrintingPolicy &Policy) {
        TypeSpecSign != TSS_unspecified ||
        TypeAltiVecVector || TypeAltiVecPixel || TypeAltiVecBool ||
        TypeQualifiers)) {
-    const unsigned NumLocs = 8;
+    const unsigned NumLocs = 9;
     SourceLocation ExtraLocs[NumLocs] = {
       TSWLoc, TSCLoc, TSSLoc, AltiVecLoc,
-      TQ_constLoc, TQ_restrictLoc, TQ_volatileLoc, TQ_atomicLoc
+      TQ_constLoc, TQ_restrictLoc, TQ_volatileLoc, TQ_atomicLoc, TQ_unalignedLoc
     };
     FixItHint Hints[NumLocs];
     SourceLocation FirstLoc;
diff --git a/contrib/llvm/tools/clang/lib/Sema/IdentifierResolver.cpp b/contrib/llvm/tools/clang/lib/Sema/IdentifierResolver.cpp
index 53263bac546f..0bdb19490bc5 100644
--- a/contrib/llvm/tools/clang/lib/Sema/IdentifierResolver.cpp
+++ b/contrib/llvm/tools/clang/lib/Sema/IdentifierResolver.cpp
@@ -381,7 +381,7 @@ void IdentifierResolver::updatingIdentifier(IdentifierInfo &II) {
     PP.getExternalSource()->updateOutOfDateIdentifier(II);
   
   if (II.isFromAST())
-    II.setChangedSinceDeserialization();
+    II.setFETokenInfoChangedSinceDeserialization();
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/tools/clang/lib/Sema/JumpDiagnostics.cpp b/contrib/llvm/tools/clang/lib/Sema/JumpDiagnostics.cpp
index c394d24d5fdc..bdbe06c4969d 100644
--- a/contrib/llvm/tools/clang/lib/Sema/JumpDiagnostics.cpp
+++ b/contrib/llvm/tools/clang/lib/Sema/JumpDiagnostics.cpp
@@ -270,7 +270,8 @@ void JumpScopeChecker::BuildScopeInformation(VarDecl *D,
 /// coherent VLA scope with a specified parent node.  Walk through the
 /// statements, adding any labels or gotos to LabelAndGotoScopes and recursively
 /// walking the AST as needed.
-void JumpScopeChecker::BuildScopeInformation(Stmt *S, unsigned &origParentScope) {
+void JumpScopeChecker::BuildScopeInformation(Stmt *S,
+                                             unsigned &origParentScope) {
   // If this is a statement, rather than an expression, scopes within it don't
   // propagate out into the enclosing scope.  Otherwise we have to worry
   // about block literals, which have the lifetime of their enclosing statement.
@@ -278,7 +279,7 @@ void JumpScopeChecker::BuildScopeInformation(Stmt *S, unsigned &origParentScope)
   unsigned &ParentScope = ((isa<Expr>(S) && !isa<StmtExpr>(S))
                             ? origParentScope : independentParentScope);
 
-  bool SkipFirstSubStmt = false;
+  unsigned StmtsToSkip = 0u;
 
   // If we found a label, remember that it is in ParentScope scope.
   switch (S->getStmtClass()) {
@@ -303,11 +304,15 @@ void JumpScopeChecker::BuildScopeInformation(Stmt *S, unsigned &origParentScope)
     break;
 
   case Stmt::SwitchStmtClass:
-    // Evaluate the condition variable before entering the scope of the switch
-    // statement.
+    // Evaluate the C++17 init stmt and condition variable
+    // before entering the scope of the switch statement.
+    if (Stmt *Init = cast<SwitchStmt>(S)->getInit()) {
+      BuildScopeInformation(Init, ParentScope);
+      ++StmtsToSkip;
+    }
     if (VarDecl *Var = cast<SwitchStmt>(S)->getConditionVariable()) {
       BuildScopeInformation(Var, ParentScope);
-      SkipFirstSubStmt = true;
+      ++StmtsToSkip;
     }
     // Fall through
 
@@ -318,199 +323,248 @@ void JumpScopeChecker::BuildScopeInformation(Stmt *S, unsigned &origParentScope)
     Jumps.push_back(S);
     break;
 
+  case Stmt::IfStmtClass: {
+    IfStmt *IS = cast<IfStmt>(S);
+    if (!IS->isConstexpr())
+      break;
+
+    if (VarDecl *Var = IS->getConditionVariable())
+      BuildScopeInformation(Var, ParentScope);
+
+    // Cannot jump into the middle of the condition.
+    unsigned NewParentScope = Scopes.size();
+    Scopes.push_back(GotoScope(ParentScope,
+                               diag::note_protected_by_constexpr_if, 0,
+                               IS->getLocStart()));
+    BuildScopeInformation(IS->getCond(), NewParentScope);
+
+    // Jumps into either arm of an 'if constexpr' are not allowed.
+    NewParentScope = Scopes.size();
+    Scopes.push_back(GotoScope(ParentScope,
+                               diag::note_protected_by_constexpr_if, 0,
+                               IS->getLocStart()));
+    BuildScopeInformation(IS->getThen(), NewParentScope);
+    if (Stmt *Else = IS->getElse()) {
+      NewParentScope = Scopes.size();
+      Scopes.push_back(GotoScope(ParentScope,
+                                 diag::note_protected_by_constexpr_if, 0,
+                                 IS->getLocStart()));
+      BuildScopeInformation(Else, NewParentScope);
+    }
+    return;
+  }
+
   case Stmt::CXXTryStmtClass: {
     CXXTryStmt *TS = cast<CXXTryStmt>(S);
-    unsigned newParentScope;
-    Scopes.push_back(GotoScope(ParentScope,
-                               diag::note_protected_by_cxx_try,
-                               diag::note_exits_cxx_try,
-                               TS->getSourceRange().getBegin()));
-    if (Stmt *TryBlock = TS->getTryBlock())
-      BuildScopeInformation(TryBlock, (newParentScope = Scopes.size()-1));
+    {
+      unsigned NewParentScope = Scopes.size();
+      Scopes.push_back(GotoScope(ParentScope,
+                                 diag::note_protected_by_cxx_try,
+                                 diag::note_exits_cxx_try,
+                                 TS->getSourceRange().getBegin()));
+      if (Stmt *TryBlock = TS->getTryBlock())
+        BuildScopeInformation(TryBlock, NewParentScope);
+    }
 
     // Jump from the catch into the try is not allowed either.
     for (unsigned I = 0, E = TS->getNumHandlers(); I != E; ++I) {
       CXXCatchStmt *CS = TS->getHandler(I);
+      unsigned NewParentScope = Scopes.size();
       Scopes.push_back(GotoScope(ParentScope,
                                  diag::note_protected_by_cxx_catch,
                                  diag::note_exits_cxx_catch,
                                  CS->getSourceRange().getBegin()));
-      BuildScopeInformation(CS->getHandlerBlock(),
-                            (newParentScope = Scopes.size()-1));
+      BuildScopeInformation(CS->getHandlerBlock(), NewParentScope);
     }
     return;
   }
 
   case Stmt::SEHTryStmtClass: {
     SEHTryStmt *TS = cast<SEHTryStmt>(S);
-    unsigned newParentScope;
-    Scopes.push_back(GotoScope(ParentScope,
-                               diag::note_protected_by_seh_try,
-                               diag::note_exits_seh_try,
-                               TS->getSourceRange().getBegin()));
-    if (Stmt *TryBlock = TS->getTryBlock())
-      BuildScopeInformation(TryBlock, (newParentScope = Scopes.size()-1));
+    {
+      unsigned NewParentScope = Scopes.size();
+      Scopes.push_back(GotoScope(ParentScope,
+                                 diag::note_protected_by_seh_try,
+                                 diag::note_exits_seh_try,
+                                 TS->getSourceRange().getBegin()));
+      if (Stmt *TryBlock = TS->getTryBlock())
+        BuildScopeInformation(TryBlock, NewParentScope);
+    }
 
     // Jump from __except or __finally into the __try are not allowed either.
     if (SEHExceptStmt *Except = TS->getExceptHandler()) {
+      unsigned NewParentScope = Scopes.size();
       Scopes.push_back(GotoScope(ParentScope,
                                  diag::note_protected_by_seh_except,
                                  diag::note_exits_seh_except,
                                  Except->getSourceRange().getBegin()));
-      BuildScopeInformation(Except->getBlock(),
-                            (newParentScope = Scopes.size()-1));
+      BuildScopeInformation(Except->getBlock(), NewParentScope);
     } else if (SEHFinallyStmt *Finally = TS->getFinallyHandler()) {
+      unsigned NewParentScope = Scopes.size();
       Scopes.push_back(GotoScope(ParentScope,
                                  diag::note_protected_by_seh_finally,
                                  diag::note_exits_seh_finally,
                                  Finally->getSourceRange().getBegin()));
-      BuildScopeInformation(Finally->getBlock(),
-                            (newParentScope = Scopes.size()-1));
+      BuildScopeInformation(Finally->getBlock(), NewParentScope);
     }
 
     return;
   }
 
-  default:
-    break;
-  }
-
-  for (Stmt *SubStmt : S->children()) {
-    if (SkipFirstSubStmt) {
-      SkipFirstSubStmt = false;
-      continue;
-    }
-
-    if (!SubStmt) continue;
-
-    // Cases, labels, and defaults aren't "scope parents".  It's also
-    // important to handle these iteratively instead of recursively in
-    // order to avoid blowing out the stack.
-    while (true) {
-      Stmt *Next;
-      if (CaseStmt *CS = dyn_cast<CaseStmt>(SubStmt))
-        Next = CS->getSubStmt();
-      else if (DefaultStmt *DS = dyn_cast<DefaultStmt>(SubStmt))
-        Next = DS->getSubStmt();
-      else if (LabelStmt *LS = dyn_cast<LabelStmt>(SubStmt))
-        Next = LS->getSubStmt();
-      else
-        break;
-
-      LabelAndGotoScopes[SubStmt] = ParentScope;
-      SubStmt = Next;
-    }
-
+  case Stmt::DeclStmtClass: {
     // If this is a declstmt with a VLA definition, it defines a scope from here
     // to the end of the containing context.
-    if (DeclStmt *DS = dyn_cast<DeclStmt>(SubStmt)) {
-      // The decl statement creates a scope if any of the decls in it are VLAs
-      // or have the cleanup attribute.
-      for (auto *I : DS->decls())
-        BuildScopeInformation(I, ParentScope);
-      continue;
-    }
+    DeclStmt *DS = cast<DeclStmt>(S);
+    // The decl statement creates a scope if any of the decls in it are VLAs
+    // or have the cleanup attribute.
+    for (auto *I : DS->decls())
+      BuildScopeInformation(I, origParentScope);
+    return;
+  }
+
+  case Stmt::ObjCAtTryStmtClass: {
     // Disallow jumps into any part of an @try statement by pushing a scope and
     // walking all sub-stmts in that scope.
-    if (ObjCAtTryStmt *AT = dyn_cast<ObjCAtTryStmt>(SubStmt)) {
-      unsigned newParentScope;
-      // Recursively walk the AST for the @try part.
+    ObjCAtTryStmt *AT = cast<ObjCAtTryStmt>(S);
+    // Recursively walk the AST for the @try part.
+    {
+      unsigned NewParentScope = Scopes.size();
       Scopes.push_back(GotoScope(ParentScope,
                                  diag::note_protected_by_objc_try,
                                  diag::note_exits_objc_try,
                                  AT->getAtTryLoc()));
       if (Stmt *TryPart = AT->getTryBody())
-        BuildScopeInformation(TryPart, (newParentScope = Scopes.size()-1));
-
-      // Jump from the catch to the finally or try is not valid.
-      for (unsigned I = 0, N = AT->getNumCatchStmts(); I != N; ++I) {
-        ObjCAtCatchStmt *AC = AT->getCatchStmt(I);
-        Scopes.push_back(GotoScope(ParentScope,
-                                   diag::note_protected_by_objc_catch,
-                                   diag::note_exits_objc_catch,
-                                   AC->getAtCatchLoc()));
-        // @catches are nested and it isn't
-        BuildScopeInformation(AC->getCatchBody(),
-                              (newParentScope = Scopes.size()-1));
-      }
+        BuildScopeInformation(TryPart, NewParentScope);
+    }
 
-      // Jump from the finally to the try or catch is not valid.
-      if (ObjCAtFinallyStmt *AF = AT->getFinallyStmt()) {
-        Scopes.push_back(GotoScope(ParentScope,
-                                   diag::note_protected_by_objc_finally,
-                                   diag::note_exits_objc_finally,
-                                   AF->getAtFinallyLoc()));
-        BuildScopeInformation(AF, (newParentScope = Scopes.size()-1));
-      }
+    // Jump from the catch to the finally or try is not valid.
+    for (unsigned I = 0, N = AT->getNumCatchStmts(); I != N; ++I) {
+      ObjCAtCatchStmt *AC = AT->getCatchStmt(I);
+      unsigned NewParentScope = Scopes.size();
+      Scopes.push_back(GotoScope(ParentScope,
+                                 diag::note_protected_by_objc_catch,
+                                 diag::note_exits_objc_catch,
+                                 AC->getAtCatchLoc()));
+      // @catches are nested and it isn't
+      BuildScopeInformation(AC->getCatchBody(), NewParentScope);
+    }
 
-      continue;
+    // Jump from the finally to the try or catch is not valid.
+    if (ObjCAtFinallyStmt *AF = AT->getFinallyStmt()) {
+      unsigned NewParentScope = Scopes.size();
+      Scopes.push_back(GotoScope(ParentScope,
+                                 diag::note_protected_by_objc_finally,
+                                 diag::note_exits_objc_finally,
+                                 AF->getAtFinallyLoc()));
+      BuildScopeInformation(AF, NewParentScope);
     }
 
-    unsigned newParentScope;
+    return;
+  }
+
+  case Stmt::ObjCAtSynchronizedStmtClass: {
     // Disallow jumps into the protected statement of an @synchronized, but
     // allow jumps into the object expression it protects.
-    if (ObjCAtSynchronizedStmt *AS =
-            dyn_cast<ObjCAtSynchronizedStmt>(SubStmt)) {
-      // Recursively walk the AST for the @synchronized object expr, it is
-      // evaluated in the normal scope.
-      BuildScopeInformation(AS->getSynchExpr(), ParentScope);
-
-      // Recursively walk the AST for the @synchronized part, protected by a new
-      // scope.
-      Scopes.push_back(GotoScope(ParentScope,
-                                 diag::note_protected_by_objc_synchronized,
-                                 diag::note_exits_objc_synchronized,
-                                 AS->getAtSynchronizedLoc()));
-      BuildScopeInformation(AS->getSynchBody(),
-                            (newParentScope = Scopes.size()-1));
-      continue;
-    }
+    ObjCAtSynchronizedStmt *AS = cast<ObjCAtSynchronizedStmt>(S);
+    // Recursively walk the AST for the @synchronized object expr, it is
+    // evaluated in the normal scope.
+    BuildScopeInformation(AS->getSynchExpr(), ParentScope);
+
+    // Recursively walk the AST for the @synchronized part, protected by a new
+    // scope.
+    unsigned NewParentScope = Scopes.size();
+    Scopes.push_back(GotoScope(ParentScope,
+                               diag::note_protected_by_objc_synchronized,
+                               diag::note_exits_objc_synchronized,
+                               AS->getAtSynchronizedLoc()));
+    BuildScopeInformation(AS->getSynchBody(), NewParentScope);
+    return;
+  }
 
+  case Stmt::ObjCAutoreleasePoolStmtClass: {
     // Disallow jumps into the protected statement of an @autoreleasepool.
-    if (ObjCAutoreleasePoolStmt *AS =
-            dyn_cast<ObjCAutoreleasePoolStmt>(SubStmt)) {
-      // Recursively walk the AST for the @autoreleasepool part, protected by a
-      // new scope.
-      Scopes.push_back(GotoScope(ParentScope,
-                                 diag::note_protected_by_objc_autoreleasepool,
-                                 diag::note_exits_objc_autoreleasepool,
-                                 AS->getAtLoc()));
-      BuildScopeInformation(AS->getSubStmt(),
-                            (newParentScope = Scopes.size() - 1));
-      continue;
-    }
+    ObjCAutoreleasePoolStmt *AS = cast<ObjCAutoreleasePoolStmt>(S);
+    // Recursively walk the AST for the @autoreleasepool part, protected by a
+    // new scope.
+    unsigned NewParentScope = Scopes.size();
+    Scopes.push_back(GotoScope(ParentScope,
+                               diag::note_protected_by_objc_autoreleasepool,
+                               diag::note_exits_objc_autoreleasepool,
+                               AS->getAtLoc()));
+    BuildScopeInformation(AS->getSubStmt(), NewParentScope);
+    return;
+  }
 
+  case Stmt::ExprWithCleanupsClass: {
     // Disallow jumps past full-expressions that use blocks with
     // non-trivial cleanups of their captures.  This is theoretically
     // implementable but a lot of work which we haven't felt up to doing.
-    if (ExprWithCleanups *EWC = dyn_cast<ExprWithCleanups>(SubStmt)) {
-      for (unsigned i = 0, e = EWC->getNumObjects(); i != e; ++i) {
-        const BlockDecl *BDecl = EWC->getObject(i);
-        for (const auto &CI : BDecl->captures()) {
-          VarDecl *variable = CI.getVariable();
-          BuildScopeInformation(variable, BDecl, ParentScope);
-        }
+    ExprWithCleanups *EWC = cast<ExprWithCleanups>(S);
+    for (unsigned i = 0, e = EWC->getNumObjects(); i != e; ++i) {
+      const BlockDecl *BDecl = EWC->getObject(i);
+      for (const auto &CI : BDecl->captures()) {
+        VarDecl *variable = CI.getVariable();
+        BuildScopeInformation(variable, BDecl, origParentScope);
       }
     }
+    break;
+  }
 
+  case Stmt::MaterializeTemporaryExprClass: {
     // Disallow jumps out of scopes containing temporaries lifetime-extended to
     // automatic storage duration.
-    if (MaterializeTemporaryExpr *MTE =
-            dyn_cast<MaterializeTemporaryExpr>(SubStmt)) {
-      if (MTE->getStorageDuration() == SD_Automatic) {
-        SmallVector<const Expr *, 4> CommaLHS;
-        SmallVector<SubobjectAdjustment, 4> Adjustments;
-        const Expr *ExtendedObject =
-            MTE->GetTemporaryExpr()->skipRValueSubobjectAdjustments(
-                CommaLHS, Adjustments);
-        if (ExtendedObject->getType().isDestructedType()) {
-          Scopes.push_back(GotoScope(ParentScope, 0,
-                                     diag::note_exits_temporary_dtor,
-                                     ExtendedObject->getExprLoc()));
-          ParentScope = Scopes.size()-1;
-        }
+    MaterializeTemporaryExpr *MTE = cast<MaterializeTemporaryExpr>(S);
+    if (MTE->getStorageDuration() == SD_Automatic) {
+      SmallVector<const Expr *, 4> CommaLHS;
+      SmallVector<SubobjectAdjustment, 4> Adjustments;
+      const Expr *ExtendedObject =
+          MTE->GetTemporaryExpr()->skipRValueSubobjectAdjustments(
+              CommaLHS, Adjustments);
+      if (ExtendedObject->getType().isDestructedType()) {
+        Scopes.push_back(GotoScope(ParentScope, 0,
+                                   diag::note_exits_temporary_dtor,
+                                   ExtendedObject->getExprLoc()));
+        origParentScope = Scopes.size()-1;
       }
     }
+    break;
+  }
+
+  case Stmt::CaseStmtClass:
+  case Stmt::DefaultStmtClass:
+  case Stmt::LabelStmtClass:
+    LabelAndGotoScopes[S] = ParentScope;
+    break;
+
+  default:
+    break;
+  }
+
+  for (Stmt *SubStmt : S->children()) {
+    if (!SubStmt)
+        continue;
+    if (StmtsToSkip) {
+      --StmtsToSkip;
+      continue;
+    }
+
+    // Cases, labels, and defaults aren't "scope parents".  It's also
+    // important to handle these iteratively instead of recursively in
+    // order to avoid blowing out the stack.
+    while (true) {
+      Stmt *Next;
+      if (CaseStmt *CS = dyn_cast<CaseStmt>(SubStmt))
+        Next = CS->getSubStmt();
+      else if (DefaultStmt *DS = dyn_cast<DefaultStmt>(SubStmt))
+        Next = DS->getSubStmt();
+      else if (LabelStmt *LS = dyn_cast<LabelStmt>(SubStmt))
+        Next = LS->getSubStmt();
+      else
+        break;
+
+      LabelAndGotoScopes[SubStmt] = ParentScope;
+      SubStmt = Next;
+    }
 
     // Recursively walk the AST.
     BuildScopeInformation(SubStmt, ParentScope);
diff --git a/contrib/llvm/tools/clang/lib/Sema/MultiplexExternalSemaSource.cpp b/contrib/llvm/tools/clang/lib/Sema/MultiplexExternalSemaSource.cpp
index 0f93421ac21b..eee4c00324ba 100644
--- a/contrib/llvm/tools/clang/lib/Sema/MultiplexExternalSemaSource.cpp
+++ b/contrib/llvm/tools/clang/lib/Sema/MultiplexExternalSemaSource.cpp
@@ -197,6 +197,11 @@ void MultiplexExternalSemaSource::ReadMethodPool(Selector Sel) {
     Sources[i]->ReadMethodPool(Sel);
 }
 
+void MultiplexExternalSemaSource::updateOutOfDateSelector(Selector Sel) {
+  for(size_t i = 0; i < Sources.size(); ++i)
+    Sources[i]->updateOutOfDateSelector(Sel);
+}
+
 void MultiplexExternalSemaSource::ReadKnownNamespaces(
                                    SmallVectorImpl<NamespaceDecl*> &Namespaces){
   for(size_t i = 0; i < Sources.size(); ++i)
@@ -204,7 +209,7 @@ void MultiplexExternalSemaSource::ReadKnownNamespaces(
 }
 
 void MultiplexExternalSemaSource::ReadUndefinedButUsed(
-                         llvm::DenseMap<NamedDecl*, SourceLocation> &Undefined){
+    llvm::MapVector<NamedDecl *, SourceLocation> &Undefined) {
   for(size_t i = 0; i < Sources.size(); ++i)
     Sources[i]->ReadUndefinedButUsed(Undefined);
 }
diff --git a/contrib/llvm/tools/clang/lib/Sema/Scope.cpp b/contrib/llvm/tools/clang/lib/Sema/Scope.cpp
index 7c70048acfbf..ae5b181c6728 100644
--- a/contrib/llvm/tools/clang/lib/Sema/Scope.cpp
+++ b/contrib/llvm/tools/clang/lib/Sema/Scope.cpp
@@ -18,7 +18,7 @@
 
 using namespace clang;
 
-void Scope::Init(Scope *parent, unsigned flags) {
+void Scope::setFlags(Scope *parent, unsigned flags) {
   AnyParent = parent;
   Flags = flags;
 
@@ -83,6 +83,10 @@ void Scope::Init(Scope *parent, unsigned flags) {
     else
       incrementMSManglingNumber();
   }
+}
+
+void Scope::Init(Scope *parent, unsigned flags) {
+  setFlags(parent, flags);
 
   DeclsInScope.clear();
   UsingDirectives.clear();
@@ -130,7 +134,7 @@ void Scope::mergeNRVOIntoParent() {
     getParent()->addNRVOCandidate(NRVO.getPointer());
 }
 
-void Scope::dump() const { dumpImpl(llvm::errs()); }
+LLVM_DUMP_METHOD void Scope::dump() const { dumpImpl(llvm::errs()); }
 
 void Scope::dumpImpl(raw_ostream &OS) const {
   unsigned Flags = getFlags();
diff --git a/contrib/llvm/tools/clang/lib/Sema/ScopeInfo.cpp b/contrib/llvm/tools/clang/lib/Sema/ScopeInfo.cpp
index cbd7ef7abb41..4b2e13e20deb 100644
--- a/contrib/llvm/tools/clang/lib/Sema/ScopeInfo.cpp
+++ b/contrib/llvm/tools/clang/lib/Sema/ScopeInfo.cpp
@@ -28,6 +28,7 @@ void FunctionScopeInfo::Clear() {
   HasBranchIntoScope = false;
   HasIndirectGoto = false;
   HasDroppedStmt = false;
+  HasOMPDeclareReductionCombiner = false;
   ObjCShouldCallSuper = false;
   ObjCIsDesignatedInit = false;
   ObjCWarnForNoDesignatedInitChain = false;
@@ -85,11 +86,13 @@ FunctionScopeInfo::WeakObjectProfileTy::getBaseInfo(const Expr *E) {
     if (BaseProp) {
       D = getBestPropertyDecl(BaseProp);
 
-      const Expr *DoubleBase = BaseProp->getBase();
-      if (const OpaqueValueExpr *OVE = dyn_cast<OpaqueValueExpr>(DoubleBase))
-        DoubleBase = OVE->getSourceExpr();
+      if (BaseProp->isObjectReceiver()) {
+        const Expr *DoubleBase = BaseProp->getBase();
+        if (const OpaqueValueExpr *OVE = dyn_cast<OpaqueValueExpr>(DoubleBase))
+          DoubleBase = OVE->getSourceExpr();
 
-      IsExact = DoubleBase->isObjCSelfExpr();
+        IsExact = DoubleBase->isObjCSelfExpr();
+      }
     }
     break;
   }
@@ -212,7 +215,7 @@ void FunctionScopeInfo::markSafeWeakUse(const Expr *E) {
 
   // Has there been a read from the object using this Expr?
   FunctionScopeInfo::WeakUseVector::reverse_iterator ThisUse =
-    std::find(Uses->second.rbegin(), Uses->second.rend(), WeakUseTy(E, true));
+      llvm::find(llvm::reverse(Uses->second), WeakUseTy(E, true));
   if (ThisUse == Uses->second.rend())
     return;
 
diff --git a/contrib/llvm/tools/clang/lib/Sema/Sema.cpp b/contrib/llvm/tools/clang/lib/Sema/Sema.cpp
index 39b8cc9f0c63..777747606304 100644
--- a/contrib/llvm/tools/clang/lib/Sema/Sema.cpp
+++ b/contrib/llvm/tools/clang/lib/Sema/Sema.cpp
@@ -52,13 +52,14 @@ ModuleLoader &Sema::getModuleLoader() const { return PP.getModuleLoader(); }
 PrintingPolicy Sema::getPrintingPolicy(const ASTContext &Context,
                                        const Preprocessor &PP) {
   PrintingPolicy Policy = Context.getPrintingPolicy();
+  // Our printing policy is copied over the ASTContext printing policy whenever
+  // a diagnostic is emitted, so recompute it.
   Policy.Bool = Context.getLangOpts().Bool;
   if (!Policy.Bool) {
-    if (const MacroInfo *
-          BoolMacro = PP.getMacroInfo(&Context.Idents.get("bool"))) {
+    if (const MacroInfo *BoolMacro = PP.getMacroInfo(Context.getBoolName())) {
       Policy.Bool = BoolMacro->isObjectLike() &&
-        BoolMacro->getNumTokens() == 1 &&
-        BoolMacro->getReplacementToken(0).is(tok::kw__Bool);
+                    BoolMacro->getNumTokens() == 1 &&
+                    BoolMacro->getReplacementToken(0).is(tok::kw__Bool);
     }
   }
 
@@ -79,14 +80,15 @@ Sema::Sema(Preprocessor &pp, ASTContext &ctxt, ASTConsumer &consumer,
     Diags(PP.getDiagnostics()), SourceMgr(PP.getSourceManager()),
     CollectStats(false), CodeCompleter(CodeCompleter),
     CurContext(nullptr), OriginalLexicalContext(nullptr),
-    PackContext(nullptr), MSStructPragmaOn(false),
+    MSStructPragmaOn(false),
     MSPointerToMemberRepresentationMethod(
         LangOpts.getMSPointerToMemberRepresentationMethod()),
-    VtorDispModeStack(1, MSVtorDispAttr::Mode(LangOpts.VtorDispMode)),
-    DataSegStack(nullptr), BSSSegStack(nullptr), ConstSegStack(nullptr),
-    CodeSegStack(nullptr), CurInitSeg(nullptr), VisContext(nullptr),
+    VtorDispStack(MSVtorDispAttr::Mode(LangOpts.VtorDispMode)),
+    PackStack(0), DataSegStack(nullptr), BSSSegStack(nullptr),
+    ConstSegStack(nullptr), CodeSegStack(nullptr), CurInitSeg(nullptr),
+    VisContext(nullptr),
     IsBuildingRecoveryCallExpr(false),
-    ExprNeedsCleanups(false), LateTemplateParser(nullptr),
+    Cleanup{}, LateTemplateParser(nullptr),
     LateTemplateParserCleanup(nullptr),
     OpaqueParser(nullptr), IdResolver(pp), StdInitializerList(nullptr),
     CXXTypeInfoDecl(nullptr), MSVCGuidDecl(nullptr),
@@ -122,7 +124,8 @@ Sema::Sema(Preprocessor &pp, ASTContext &ctxt, ASTConsumer &consumer,
   // Tell diagnostics how to render things from the AST library.
   Diags.SetArgToStringFn(&FormatASTNodeDiagnosticArgument, &Context);
 
-  ExprEvalContexts.emplace_back(PotentiallyEvaluated, 0, false, nullptr, false);
+  ExprEvalContexts.emplace_back(PotentiallyEvaluated, 0, CleanupInfo{}, nullptr,
+                                false);
 
   FunctionScopes.push_back(new FunctionScopeInfo(Diags));
 
@@ -191,6 +194,11 @@ void Sema::Initialize() {
       PushOnScopeChains(Context.getObjCProtocolDecl(), TUScope);
   }
 
+  // Create the internal type for the *StringMakeConstantString builtins.
+  DeclarationName ConstantString = &Context.Idents.get("__NSConstantString");
+  if (IdResolver.begin(ConstantString) == IdResolver.end())
+    PushOnScopeChains(Context.getCFConstantStringDecl(), TUScope);
+
   // Initialize Microsoft "predefined C++ types".
   if (getLangOpts().MSVCCompat) {
     if (getLangOpts().CPlusPlus &&
@@ -201,25 +209,17 @@ void Sema::Initialize() {
     addImplicitTypedef("size_t", Context.getSizeType());
   }
 
-  // Initialize predefined OpenCL types.
+  // Initialize predefined OpenCL types and supported optional core features.
   if (getLangOpts().OpenCL) {
-    addImplicitTypedef("image1d_t", Context.OCLImage1dTy);
-    addImplicitTypedef("image1d_array_t", Context.OCLImage1dArrayTy);
-    addImplicitTypedef("image1d_buffer_t", Context.OCLImage1dBufferTy);
-    addImplicitTypedef("image2d_t", Context.OCLImage2dTy);
-    addImplicitTypedef("image2d_array_t", Context.OCLImage2dArrayTy);
-    addImplicitTypedef("image3d_t", Context.OCLImage3dTy);
+#define OPENCLEXT(Ext) \
+     if (Context.getTargetInfo().getSupportedOpenCLOpts().is_##Ext##_supported_core( \
+         getLangOpts().OpenCLVersion)) \
+       getOpenCLOptions().Ext = 1;
+#include "clang/Basic/OpenCLExtensions.def"
+
     addImplicitTypedef("sampler_t", Context.OCLSamplerTy);
     addImplicitTypedef("event_t", Context.OCLEventTy);
     if (getLangOpts().OpenCLVersion >= 200) {
-      addImplicitTypedef("image2d_depth_t", Context.OCLImage2dDepthTy);
-      addImplicitTypedef("image2d_array_depth_t",
-                         Context.OCLImage2dArrayDepthTy);
-      addImplicitTypedef("image2d_msaa_t", Context.OCLImage2dMSAATy);
-      addImplicitTypedef("image2d_array_msaa_t", Context.OCLImage2dArrayMSAATy);
-      addImplicitTypedef("image2d_msaa_depth_t", Context.OCLImage2dMSAADepthTy);
-      addImplicitTypedef("image2d_array_msaa_depth_t",
-                         Context.OCLImage2dArrayMSAADepthTy);
       addImplicitTypedef("clk_event_t", Context.OCLClkEventTy);
       addImplicitTypedef("queue_t", Context.OCLQueueTy);
       addImplicitTypedef("ndrange_t", Context.OCLNDRangeTy);
@@ -261,7 +261,6 @@ void Sema::Initialize() {
 
 Sema::~Sema() {
   llvm::DeleteContainerSeconds(LateParsedTemplateMap);
-  if (PackContext) FreePackedContext();
   if (VisContext) FreeVisContext();
   // Kill all the active scopes.
   for (unsigned I = 1, E = FunctionScopes.size(); I != E; ++I)
@@ -470,13 +469,12 @@ static bool ShouldRemoveFromUnused(Sema *SemaRef, const DeclaratorDecl *D) {
   return false;
 }
 
-/// Obtains a sorted list of functions that are undefined but ODR-used.
+/// Obtains a sorted list of functions and variables that are undefined but
+/// ODR-used.
 void Sema::getUndefinedButUsed(
     SmallVectorImpl<std::pair<NamedDecl *, SourceLocation> > &Undefined) {
-  for (llvm::DenseMap<NamedDecl *, SourceLocation>::iterator
-         I = UndefinedButUsed.begin(), E = UndefinedButUsed.end();
-       I != E; ++I) {
-    NamedDecl *ND = I->first;
+  for (const auto &UndefinedUse : UndefinedButUsed) {
+    NamedDecl *ND = UndefinedUse.first;
 
     // Ignore attributes that have become invalid.
     if (ND->isInvalidDecl()) continue;
@@ -491,30 +489,15 @@ void Sema::getUndefinedButUsed(
           !FD->getMostRecentDecl()->isInlined())
         continue;
     } else {
-      if (cast<VarDecl>(ND)->hasDefinition() != VarDecl::DeclarationOnly)
+      auto *VD = cast<VarDecl>(ND);
+      if (VD->hasDefinition() != VarDecl::DeclarationOnly)
         continue;
-      if (ND->isExternallyVisible())
+      if (VD->isExternallyVisible() && !VD->getMostRecentDecl()->isInline())
         continue;
     }
 
-    Undefined.push_back(std::make_pair(ND, I->second));
+    Undefined.push_back(std::make_pair(ND, UndefinedUse.second));
   }
-
-  // Sort (in order of use site) so that we're not dependent on the iteration
-  // order through an llvm::DenseMap.
-  SourceManager &SM = Context.getSourceManager();
-  std::sort(Undefined.begin(), Undefined.end(),
-            [&SM](const std::pair<NamedDecl *, SourceLocation> &l,
-                  const std::pair<NamedDecl *, SourceLocation> &r) {
-    if (l.second.isValid() && !r.second.isValid())
-      return true;
-    if (!l.second.isValid() && r.second.isValid())
-      return false;
-    if (l.second != r.second)
-      return SM.isBeforeInTranslationUnit(l.second, r.second);
-    return SM.isBeforeInTranslationUnit(l.first->getLocation(),
-                                        r.first->getLocation());
-  });
 }
 
 /// checkUndefinedButUsed - Check for undefined objects with internal linkage
@@ -541,14 +524,22 @@ static void checkUndefinedButUsed(Sema &S) {
     if (!ND->isExternallyVisible()) {
       S.Diag(ND->getLocation(), diag::warn_undefined_internal)
         << isa<VarDecl>(ND) << ND;
-    } else {
-      assert(cast<FunctionDecl>(ND)->getMostRecentDecl()->isInlined() &&
+    } else if (auto *FD = dyn_cast<FunctionDecl>(ND)) {
+      (void)FD;
+      assert(FD->getMostRecentDecl()->isInlined() &&
              "used object requires definition but isn't inline or internal?");
+      // FIXME: This is ill-formed; we should reject.
       S.Diag(ND->getLocation(), diag::warn_undefined_inline) << ND;
+    } else {
+      assert(cast<VarDecl>(ND)->getMostRecentDecl()->isInline() &&
+             "used var requires definition but isn't inline or internal?");
+      S.Diag(ND->getLocation(), diag::err_undefined_inline_var) << ND;
     }
     if (I->second.isValid())
       S.Diag(I->second, diag::note_used_here);
   }
+
+  S.UndefinedButUsed.clear();
 }
 
 void Sema::LoadExternalWeakUndeclaredIdentifiers() {
@@ -744,6 +735,12 @@ void Sema::ActOnEndOfTranslationUnit() {
       !Diags.isIgnored(diag::warn_delegating_ctor_cycle, SourceLocation()))
     CheckDelegatingCtorCycles();
 
+  if (!Diags.hasErrorOccurred()) {
+    if (ExternalSource)
+      ExternalSource->ReadUndefinedButUsed(UndefinedButUsed);
+    checkUndefinedButUsed(*this);
+  }
+
   if (TUKind == TU_Module) {
     // If we are building a module, resolve all of the exported declarations
     // now.
@@ -877,10 +874,6 @@ void Sema::ActOnEndOfTranslationUnit() {
       }
     }
 
-    if (ExternalSource)
-      ExternalSource->ReadUndefinedButUsed(UndefinedButUsed);
-    checkUndefinedButUsed(*this);
-
     emitAndClearUnusedLocalTypedefWarnings();
   }
 
@@ -1204,11 +1197,19 @@ BlockScopeInfo *Sema::getCurBlock() {
   return CurBSI;
 }
 
-LambdaScopeInfo *Sema::getCurLambda() {
+LambdaScopeInfo *Sema::getCurLambda(bool IgnoreCapturedRegions) {
   if (FunctionScopes.empty())
     return nullptr;
 
-  auto CurLSI = dyn_cast<LambdaScopeInfo>(FunctionScopes.back());
+  auto I = FunctionScopes.rbegin();
+  if (IgnoreCapturedRegions) {
+    auto E = FunctionScopes.rend();
+    while (I != E && isa<CapturedRegionScopeInfo>(*I))
+      ++I;
+    if (I == E)
+      return nullptr;
+  }
+  auto *CurLSI = dyn_cast<LambdaScopeInfo>(*I);
   if (CurLSI && CurLSI->Lambda &&
       !CurLSI->Lambda->Encloses(CurContext)) {
     // We have switched contexts due to template instantiation.
@@ -1260,14 +1261,14 @@ void Sema::ActOnComment(SourceRange Comment) {
 ExternalSemaSource::~ExternalSemaSource() {}
 
 void ExternalSemaSource::ReadMethodPool(Selector Sel) { }
+void ExternalSemaSource::updateOutOfDateSelector(Selector Sel) { }
 
 void ExternalSemaSource::ReadKnownNamespaces(
                            SmallVectorImpl<NamespaceDecl *> &Namespaces) {
 }
 
 void ExternalSemaSource::ReadUndefinedButUsed(
-                       llvm::DenseMap<NamedDecl *, SourceLocation> &Undefined) {
-}
+    llvm::MapVector<NamedDecl *, SourceLocation> &Undefined) {}
 
 void ExternalSemaSource::ReadMismatchingDeleteExpressions(llvm::MapVector<
     FieldDecl *, llvm::SmallVector<std::pair<SourceLocation, bool>, 4>> &) {}
@@ -1281,10 +1282,10 @@ void PrettyDeclStackTraceEntry::print(raw_ostream &OS) const {
   }
   OS << Message;
 
-  if (TheDecl && isa<NamedDecl>(TheDecl)) {
-    std::string Name = cast<NamedDecl>(TheDecl)->getNameAsString();
-    if (!Name.empty())
-      OS << " '" << Name << '\'';
+  if (auto *ND = dyn_cast_or_null<NamedDecl>(TheDecl)) {
+    OS << " '";
+    ND->getNameForDiagnostic(OS, ND->getASTContext().getPrintingPolicy(), true);
+    OS << "'";
   }
 
   OS << '\n';
@@ -1509,7 +1510,8 @@ IdentifierInfo *Sema::getFloat128Identifier() const {
 void Sema::PushCapturedRegionScope(Scope *S, CapturedDecl *CD, RecordDecl *RD,
                                    CapturedRegionKind K) {
   CapturingScopeInfo *CSI = new CapturedRegionScopeInfo(
-      getDiagnostics(), S, CD, RD, CD->getContextParam(), K);
+      getDiagnostics(), S, CD, RD, CD->getContextParam(), K,
+      (getLangOpts().OpenMP && K == CR_OpenMP) ? getOpenMPNestingLevel() : 0);
   CSI->ReturnType = Context.VoidTy;
   FunctionScopes.push_back(CSI);
 }
diff --git a/contrib/llvm/tools/clang/lib/Sema/SemaAccess.cpp b/contrib/llvm/tools/clang/lib/Sema/SemaAccess.cpp
index e9772bc52049..98a918bd7d63 100644
--- a/contrib/llvm/tools/clang/lib/Sema/SemaAccess.cpp
+++ b/contrib/llvm/tools/clang/lib/Sema/SemaAccess.cpp
@@ -291,9 +291,10 @@ static AccessResult IsDerivedFromInclusive(const CXXRecordDecl *Derived,
   SmallVector<const CXXRecordDecl*, 8> Queue; // actually a stack
 
   while (true) {
-    if (Derived->isDependentContext() && !Derived->hasDefinition())
+    if (Derived->isDependentContext() && !Derived->hasDefinition() &&
+        !Derived->isLambda())
       return AR_dependent;
-    
+
     for (const auto &I : Derived->bases()) {
       const CXXRecordDecl *RD;
 
@@ -410,14 +411,8 @@ static AccessResult MatchesFriend(Sema &S,
     return AR_accessible;
 
   if (EC.isDependent()) {
-    CanQualType FriendTy
-      = S.Context.getCanonicalType(S.Context.getTypeDeclType(Friend));
-
-    for (EffectiveContext::record_iterator
-           I = EC.Records.begin(), E = EC.Records.end(); I != E; ++I) {
-      CanQualType ContextTy
-        = S.Context.getCanonicalType(S.Context.getTypeDeclType(*I));
-      if (MightInstantiateTo(S, ContextTy, FriendTy))
+    for (const CXXRecordDecl *Context : EC.Records) {
+      if (MightInstantiateTo(Context, Friend))
         return AR_dependent;
     }
   }
@@ -1615,10 +1610,10 @@ Sema::AccessResult Sema::CheckDestructorAccess(SourceLocation Loc,
 /// Checks access to a constructor.
 Sema::AccessResult Sema::CheckConstructorAccess(SourceLocation UseLoc,
                                                 CXXConstructorDecl *Constructor,
+                                                DeclAccessPair Found,
                                                 const InitializedEntity &Entity,
-                                                AccessSpecifier Access,
                                                 bool IsCopyBindingRefToTemp) {
-  if (!getLangOpts().AccessControl || Access == AS_public)
+  if (!getLangOpts().AccessControl || Found.getAccess() == AS_public)
     return AR_accessible;
 
   PartialDiagnostic PD(PDiag());
@@ -1652,17 +1647,17 @@ Sema::AccessResult Sema::CheckConstructorAccess(SourceLocation UseLoc,
 
   }
 
-  return CheckConstructorAccess(UseLoc, Constructor, Entity, Access, PD);
+  return CheckConstructorAccess(UseLoc, Constructor, Found, Entity, PD);
 }
 
 /// Checks access to a constructor.
 Sema::AccessResult Sema::CheckConstructorAccess(SourceLocation UseLoc,
                                                 CXXConstructorDecl *Constructor,
+                                                DeclAccessPair Found,
                                                 const InitializedEntity &Entity,
-                                                AccessSpecifier Access,
                                                 const PartialDiagnostic &PD) {
   if (!getLangOpts().AccessControl ||
-      Access == AS_public)
+      Found.getAccess() == AS_public)
     return AR_accessible;
 
   CXXRecordDecl *NamingClass = Constructor->getParent();
@@ -1670,16 +1665,28 @@ Sema::AccessResult Sema::CheckConstructorAccess(SourceLocation UseLoc,
   // Initializing a base sub-object is an instance method call on an
   // object of the derived class.  Otherwise, we have an instance method
   // call on an object of the constructed type.
+  //
+  // FIXME: If we have a parent, we're initializing the base class subobject
+  // in aggregate initialization. It's not clear whether the object class
+  // should be the base class or the derived class in that case.
   CXXRecordDecl *ObjectClass;
-  if (Entity.getKind() == InitializedEntity::EK_Base) {
+  if ((Entity.getKind() == InitializedEntity::EK_Base ||
+       Entity.getKind() == InitializedEntity::EK_Delegating) &&
+      !Entity.getParent()) {
     ObjectClass = cast<CXXConstructorDecl>(CurContext)->getParent();
+  } else if (auto *Shadow =
+                 dyn_cast<ConstructorUsingShadowDecl>(Found.getDecl())) {
+    // If we're using an inheriting constructor to construct an object,
+    // the object class is the derived class, not the base class.
+    ObjectClass = Shadow->getParent();
   } else {
     ObjectClass = NamingClass;
   }
 
-  AccessTarget AccessEntity(Context, AccessTarget::Member, NamingClass,
-                            DeclAccessPair::make(Constructor, Access),
-                            Context.getTypeDeclType(ObjectClass));
+  AccessTarget AccessEntity(
+      Context, AccessTarget::Member, NamingClass,
+      DeclAccessPair::make(Constructor, Found.getAccess()),
+      Context.getTypeDeclType(ObjectClass));
   AccessEntity.setDiag(PD);
 
   return CheckAccess(*this, UseLoc, AccessEntity);
@@ -1767,9 +1774,9 @@ Sema::AccessResult Sema::CheckFriendAccess(NamedDecl *target) {
   // while the ParsingDeclarator is active.
   EffectiveContext EC(CurContext);
   switch (CheckEffectiveAccess(*this, EC, target->getLocation(), entity)) {
-  case AR_accessible: return Sema::AR_accessible;
-  case AR_inaccessible: return Sema::AR_inaccessible;
-  case AR_dependent: return Sema::AR_dependent;
+  case ::AR_accessible: return Sema::AR_accessible;
+  case ::AR_inaccessible: return Sema::AR_inaccessible;
+  case ::AR_dependent: return Sema::AR_dependent;
   }
   llvm_unreachable("invalid access result");
 }
diff --git a/contrib/llvm/tools/clang/lib/Sema/SemaAttr.cpp b/contrib/llvm/tools/clang/lib/Sema/SemaAttr.cpp
index 5a29bad29f4d..0d7fba5c6709 100644
--- a/contrib/llvm/tools/clang/lib/Sema/SemaAttr.cpp
+++ b/contrib/llvm/tools/clang/lib/Sema/SemaAttr.cpp
@@ -25,103 +25,37 @@ using namespace clang;
 // Pragma 'pack' and 'options align'
 //===----------------------------------------------------------------------===//
 
-namespace {
-  struct PackStackEntry {
-    // We just use a sentinel to represent when the stack is set to mac68k
-    // alignment.
-    static const unsigned kMac68kAlignmentSentinel = ~0U;
-
-    unsigned Alignment;
-    IdentifierInfo *Name;
-  };
-
-  /// PragmaPackStack - Simple class to wrap the stack used by #pragma
-  /// pack.
-  class PragmaPackStack {
-    typedef std::vector<PackStackEntry> stack_ty;
-
-    /// Alignment - The current user specified alignment.
-    unsigned Alignment;
-
-    /// Stack - Entries in the #pragma pack stack, consisting of saved
-    /// alignments and optional names.
-    stack_ty Stack;
-
-  public:
-    PragmaPackStack() : Alignment(0) {}
-
-    void setAlignment(unsigned A) { Alignment = A; }
-    unsigned getAlignment() { return Alignment; }
-
-    /// push - Push the current alignment onto the stack, optionally
-    /// using the given \arg Name for the record, if non-zero.
-    void push(IdentifierInfo *Name) {
-      PackStackEntry PSE = { Alignment, Name };
-      Stack.push_back(PSE);
-    }
-
-    /// pop - Pop a record from the stack and restore the current
-    /// alignment to the previous value. If \arg Name is non-zero then
-    /// the first such named record is popped, otherwise the top record
-    /// is popped. Returns true if the pop succeeded.
-    bool pop(IdentifierInfo *Name, bool IsReset);
-  };
-}  // end anonymous namespace.
-
-bool PragmaPackStack::pop(IdentifierInfo *Name, bool IsReset) {
-  // If name is empty just pop top.
-  if (!Name) {
-    // An empty stack is a special case...
-    if (Stack.empty()) {
-      // If this isn't a reset, it is always an error.
-      if (!IsReset)
-        return false;
-
-      // Otherwise, it is an error only if some alignment has been set.
-      if (!Alignment)
-        return false;
-
-      // Otherwise, reset to the default alignment.
-      Alignment = 0;
-    } else {
-      Alignment = Stack.back().Alignment;
-      Stack.pop_back();
-    }
-
-    return true;
-  }
-
-  // Otherwise, find the named record.
-  for (unsigned i = Stack.size(); i != 0; ) {
-    --i;
-    if (Stack[i].Name == Name) {
-      // Found it, pop up to and including this record.
-      Alignment = Stack[i].Alignment;
-      Stack.erase(Stack.begin() + i, Stack.end());
-      return true;
-    }
+Sema::PragmaStackSentinelRAII::PragmaStackSentinelRAII(Sema &S,
+                                                       StringRef SlotLabel,
+                                                       bool ShouldAct)
+    : S(S), SlotLabel(SlotLabel), ShouldAct(ShouldAct) {
+  if (ShouldAct) {
+    S.VtorDispStack.SentinelAction(PSK_Push, SlotLabel);
+    S.DataSegStack.SentinelAction(PSK_Push, SlotLabel);
+    S.BSSSegStack.SentinelAction(PSK_Push, SlotLabel);
+    S.ConstSegStack.SentinelAction(PSK_Push, SlotLabel);
+    S.CodeSegStack.SentinelAction(PSK_Push, SlotLabel);
   }
-
-  return false;
 }
 
-
-/// FreePackedContext - Deallocate and null out PackContext.
-void Sema::FreePackedContext() {
-  delete static_cast<PragmaPackStack*>(PackContext);
-  PackContext = nullptr;
+Sema::PragmaStackSentinelRAII::~PragmaStackSentinelRAII() {
+  if (ShouldAct) {
+    S.VtorDispStack.SentinelAction(PSK_Pop, SlotLabel);
+    S.DataSegStack.SentinelAction(PSK_Pop, SlotLabel);
+    S.BSSSegStack.SentinelAction(PSK_Pop, SlotLabel);
+    S.ConstSegStack.SentinelAction(PSK_Pop, SlotLabel);
+    S.CodeSegStack.SentinelAction(PSK_Pop, SlotLabel);
+  }
 }
 
 void Sema::AddAlignmentAttributesForRecord(RecordDecl *RD) {
-  // If there is no pack context, we don't need any attributes.
-  if (!PackContext)
+  // If there is no pack value, we don't need any attributes.
+  if (!PackStack.CurrentValue)
     return;
 
-  PragmaPackStack *Stack = static_cast<PragmaPackStack*>(PackContext);
-
   // Otherwise, check to see if we need a max field alignment attribute.
-  if (unsigned Alignment = Stack->getAlignment()) {
-    if (Alignment == PackStackEntry::kMac68kAlignmentSentinel)
+  if (unsigned Alignment = PackStack.CurrentValue) {
+    if (Alignment == Sema::kMac68kAlignmentSentinel)
       RD->addAttr(AlignMac68kAttr::CreateImplicit(Context));
     else
       RD->addAttr(MaxFieldAlignmentAttr::CreateImplicit(Context,
@@ -136,18 +70,15 @@ void Sema::AddMsStructLayoutForRecord(RecordDecl *RD) {
   // FIXME: We should merge AddAlignmentAttributesForRecord with
   // AddMsStructLayoutForRecord into AddPragmaAttributesForRecord, which takes
   // all active pragmas and applies them as attributes to class definitions.
-  if (VtorDispModeStack.back() != getLangOpts().VtorDispMode)
+  if (VtorDispStack.CurrentValue != getLangOpts().VtorDispMode)
     RD->addAttr(
-        MSVtorDispAttr::CreateImplicit(Context, VtorDispModeStack.back()));
+        MSVtorDispAttr::CreateImplicit(Context, VtorDispStack.CurrentValue));
 }
 
 void Sema::ActOnPragmaOptionsAlign(PragmaOptionsAlignKind Kind,
                                    SourceLocation PragmaLoc) {
-  if (!PackContext)
-    PackContext = new PragmaPackStack();
-
-  PragmaPackStack *Context = static_cast<PragmaPackStack*>(PackContext);
-
+  PragmaMsStackAction Action = Sema::PSK_Reset;
+  unsigned Alignment = 0;
   switch (Kind) {
     // For all targets we support native and natural are the same.
     //
@@ -155,15 +86,15 @@ void Sema::ActOnPragmaOptionsAlign(PragmaOptionsAlignKind Kind,
   case POAK_Native:
   case POAK_Power:
   case POAK_Natural:
-    Context->push(nullptr);
-    Context->setAlignment(0);
+    Action = Sema::PSK_Push_Set;
+    Alignment = 0;
     break;
 
     // Note that '#pragma options align=packed' is not equivalent to attribute
     // packed, it has a different precedence relative to attribute aligned.
   case POAK_Packed:
-    Context->push(nullptr);
-    Context->setAlignment(1);
+    Action = Sema::PSK_Push_Set;
+    Alignment = 1;
     break;
 
   case POAK_Mac68k:
@@ -172,24 +103,31 @@ void Sema::ActOnPragmaOptionsAlign(PragmaOptionsAlignKind Kind,
       Diag(PragmaLoc, diag::err_pragma_options_align_mac68k_target_unsupported);
       return;
     }
-    Context->push(nullptr);
-    Context->setAlignment(PackStackEntry::kMac68kAlignmentSentinel);
+    Action = Sema::PSK_Push_Set;
+    Alignment = Sema::kMac68kAlignmentSentinel;
     break;
 
   case POAK_Reset:
     // Reset just pops the top of the stack, or resets the current alignment to
     // default.
-    if (!Context->pop(nullptr, /*IsReset=*/true)) {
-      Diag(PragmaLoc, diag::warn_pragma_options_align_reset_failed)
-        << "stack empty";
+    Action = Sema::PSK_Pop;
+    if (PackStack.Stack.empty()) {
+      if (PackStack.CurrentValue) {
+        Action = Sema::PSK_Reset;
+      } else {
+        Diag(PragmaLoc, diag::warn_pragma_options_align_reset_failed)
+            << "stack empty";
+        return;
+      }
     }
     break;
   }
+
+  PackStack.Act(PragmaLoc, Action, StringRef(), Alignment);
 }
 
-void Sema::ActOnPragmaPack(PragmaPackKind Kind, IdentifierInfo *Name,
-                           Expr *alignment, SourceLocation PragmaLoc,
-                           SourceLocation LParenLoc, SourceLocation RParenLoc) {
+void Sema::ActOnPragmaPack(SourceLocation PragmaLoc, PragmaMsStackAction Action,
+                           StringRef SlotLabel, Expr *alignment) {
   Expr *Alignment = static_cast<Expr *>(alignment);
 
   // If specified then alignment must be a "small" power of two.
@@ -210,87 +148,48 @@ void Sema::ActOnPragmaPack(PragmaPackKind Kind, IdentifierInfo *Name,
 
     AlignmentVal = (unsigned) Val.getZExtValue();
   }
-
-  if (!PackContext)
-    PackContext = new PragmaPackStack();
-
-  PragmaPackStack *Context = static_cast<PragmaPackStack*>(PackContext);
-
-  switch (Kind) {
-  case Sema::PPK_Default: // pack([n])
-    Context->setAlignment(AlignmentVal);
-    break;
-
-  case Sema::PPK_Show: // pack(show)
+  if (Action == Sema::PSK_Show) {
     // Show the current alignment, making sure to show the right value
     // for the default.
-    AlignmentVal = Context->getAlignment();
     // FIXME: This should come from the target.
+    AlignmentVal = PackStack.CurrentValue;
     if (AlignmentVal == 0)
       AlignmentVal = 8;
-    if (AlignmentVal == PackStackEntry::kMac68kAlignmentSentinel)
+    if (AlignmentVal == Sema::kMac68kAlignmentSentinel)
       Diag(PragmaLoc, diag::warn_pragma_pack_show) << "mac68k";
     else
       Diag(PragmaLoc, diag::warn_pragma_pack_show) << AlignmentVal;
-    break;
-
-  case Sema::PPK_Push: // pack(push [, id] [, [n])
-    Context->push(Name);
-    // Set the new alignment if specified.
-    if (Alignment)
-      Context->setAlignment(AlignmentVal);
-    break;
-
-  case Sema::PPK_Pop: // pack(pop [, id] [,  n])
-    // MSDN, C/C++ Preprocessor Reference > Pragma Directives > pack:
-    // "#pragma pack(pop, identifier, n) is undefined"
-    if (Alignment && Name)
+  }
+  // MSDN, C/C++ Preprocessor Reference > Pragma Directives > pack:
+  // "#pragma pack(pop, identifier, n) is undefined"
+  if (Action & Sema::PSK_Pop) {
+    if (Alignment && !SlotLabel.empty())
       Diag(PragmaLoc, diag::warn_pragma_pack_pop_identifer_and_alignment);
-
-    // Do the pop.
-    if (!Context->pop(Name, /*IsReset=*/false)) {
-      // If a name was specified then failure indicates the name
-      // wasn't found. Otherwise failure indicates the stack was
-      // empty.
-      Diag(PragmaLoc, diag::warn_pragma_pop_failed)
-          << "pack" << (Name ? "no record matching name" : "stack empty");
-
-      // FIXME: Warn about popping named records as MSVC does.
-    } else {
-      // Pop succeeded, set the new alignment if specified.
-      if (Alignment)
-        Context->setAlignment(AlignmentVal);
-    }
-    break;
+    if (PackStack.Stack.empty())
+      Diag(PragmaLoc, diag::warn_pragma_pop_failed) << "pack" << "stack empty";
   }
+
+  PackStack.Act(PragmaLoc, Action, SlotLabel, AlignmentVal);
 }
 
 void Sema::ActOnPragmaMSStruct(PragmaMSStructKind Kind) { 
   MSStructPragmaOn = (Kind == PMSST_ON);
 }
 
-void Sema::ActOnPragmaMSComment(PragmaMSCommentKind Kind, StringRef Arg) {
-  // FIXME: Serialize this.
-  switch (Kind) {
-  case PCK_Unknown:
-    llvm_unreachable("unexpected pragma comment kind");
-  case PCK_Linker:
-    Consumer.HandleLinkerOptionPragma(Arg);
-    return;
-  case PCK_Lib:
-    Consumer.HandleDependentLibrary(Arg);
-    return;
-  case PCK_Compiler:
-  case PCK_ExeStr:
-  case PCK_User:
-    return;  // We ignore all of these.
-  }
-  llvm_unreachable("invalid pragma comment kind");
+void Sema::ActOnPragmaMSComment(SourceLocation CommentLoc,
+                                PragmaMSCommentKind Kind, StringRef Arg) {
+  auto *PCD = PragmaCommentDecl::Create(
+      Context, Context.getTranslationUnitDecl(), CommentLoc, Kind, Arg);
+  Context.getTranslationUnitDecl()->addDecl(PCD);
+  Consumer.HandleTopLevelDecl(DeclGroupRef(PCD));
 }
 
-void Sema::ActOnPragmaDetectMismatch(StringRef Name, StringRef Value) {
-  // FIXME: Serialize this.
-  Consumer.HandleDetectMismatch(Name, Value);
+void Sema::ActOnPragmaDetectMismatch(SourceLocation Loc, StringRef Name,
+                                     StringRef Value) {
+  auto *PDMD = PragmaDetectMismatchDecl::Create(
+      Context, Context.getTranslationUnitDecl(), Loc, Name, Value);
+  Context.getTranslationUnitDecl()->addDecl(PDMD);
+  Consumer.HandleTopLevelDecl(DeclGroupRef(PDMD));
 }
 
 void Sema::ActOnPragmaMSPointersToMembers(
@@ -300,29 +199,13 @@ void Sema::ActOnPragmaMSPointersToMembers(
   ImplicitMSInheritanceAttrLoc = PragmaLoc;
 }
 
-void Sema::ActOnPragmaMSVtorDisp(PragmaVtorDispKind Kind,
+void Sema::ActOnPragmaMSVtorDisp(PragmaMsStackAction Action,
                                  SourceLocation PragmaLoc,
                                  MSVtorDispAttr::Mode Mode) {
-  switch (Kind) {
-  case PVDK_Set:
-    VtorDispModeStack.back() = Mode;
-    break;
-  case PVDK_Push:
-    VtorDispModeStack.push_back(Mode);
-    break;
-  case PVDK_Reset:
-    VtorDispModeStack.clear();
-    VtorDispModeStack.push_back(MSVtorDispAttr::Mode(LangOpts.VtorDispMode));
-    break;
-  case PVDK_Pop:
-    VtorDispModeStack.pop_back();
-    if (VtorDispModeStack.empty()) {
-      Diag(PragmaLoc, diag::warn_pragma_pop_failed) << "vtordisp"
-                                                    << "stack empty";
-      VtorDispModeStack.push_back(MSVtorDispAttr::Mode(LangOpts.VtorDispMode));
-    }
-    break;
-  }
+  if (Action & PSK_Pop && VtorDispStack.Stack.empty())
+    Diag(PragmaLoc, diag::warn_pragma_pop_failed) << "vtordisp"
+                                                  << "stack empty";
+  VtorDispStack.Act(PragmaLoc, Action, StringRef(), Mode);
 }
 
 template<typename ValueType>
@@ -331,7 +214,7 @@ void Sema::PragmaStack<ValueType>::Act(SourceLocation PragmaLocation,
                                        llvm::StringRef StackSlotLabel,
                                        ValueType Value) {
   if (Action == PSK_Reset) {
-    CurrentValue = nullptr;
+    CurrentValue = DefaultValue;
     return;
   }
   if (Action & PSK_Push)
@@ -339,8 +222,9 @@ void Sema::PragmaStack<ValueType>::Act(SourceLocation PragmaLocation,
   else if (Action & PSK_Pop) {
     if (!StackSlotLabel.empty()) {
       // If we've got a label, try to find it and jump there.
-      auto I = std::find_if(Stack.rbegin(), Stack.rend(),
-        [&](const Slot &x) { return x.StackSlotLabel == StackSlotLabel; });
+      auto I = llvm::find_if(llvm::reverse(Stack), [&](const Slot &x) {
+        return x.StackSlotLabel == StackSlotLabel;
+      });
       // If we found the label so pop from there.
       if (I != Stack.rend()) {
         CurrentValue = I->Value;
@@ -467,7 +351,8 @@ void Sema::ActOnPragmaUnused(const Token &IdTok, Scope *curScope,
   if (VD->isUsed())
     Diag(PragmaLoc, diag::warn_used_but_marked_unused) << Name;
 
-  VD->addAttr(UnusedAttr::CreateImplicit(Context, IdTok.getLocation()));
+  VD->addAttr(UnusedAttr::CreateImplicit(Context, UnusedAttr::GNU_unused,
+                                         IdTok.getLocation()));
 }
 
 void Sema::AddCFAuditedAttribute(Decl *D) {
diff --git a/contrib/llvm/tools/clang/lib/Sema/SemaCUDA.cpp b/contrib/llvm/tools/clang/lib/Sema/SemaCUDA.cpp
index 61dfdd3f7206..90af6d5a927f 100644
--- a/contrib/llvm/tools/clang/lib/Sema/SemaCUDA.cpp
+++ b/contrib/llvm/tools/clang/lib/Sema/SemaCUDA.cpp
@@ -11,11 +11,14 @@
 ///
 //===----------------------------------------------------------------------===//
 
-#include "clang/Sema/Sema.h"
 #include "clang/AST/ASTContext.h"
 #include "clang/AST/Decl.h"
+#include "clang/AST/ExprCXX.h"
 #include "clang/Lex/Preprocessor.h"
+#include "clang/Sema/Lookup.h"
+#include "clang/Sema/Sema.h"
 #include "clang/Sema/SemaDiagnostic.h"
+#include "clang/Sema/Template.h"
 #include "llvm/ADT/Optional.h"
 #include "llvm/ADT/SmallVector.h"
 using namespace clang;
@@ -67,33 +70,30 @@ Sema::CUDAFunctionTarget Sema::IdentifyCUDATarget(const FunctionDecl *D) {
 // Ph - preference in host mode
 // Pd - preference in device mode
 // H  - handled in (x)
-// Preferences: b-best, f-fallback, l-last resort, n-never.
+// Preferences: N:native, SS:same side, HD:host-device, WS:wrong side, --:never.
 //
-// | F  | T  | Ph | Pd |  H  |
-// |----+----+----+----+-----+
-// | d  | d  | b  | b  | (b) |
-// | d  | g  | n  | n  | (a) |
-// | d  | h  | l  | l  | (e) |
-// | d  | hd | f  | f  | (c) |
-// | g  | d  | b  | b  | (b) |
-// | g  | g  | n  | n  | (a) |
-// | g  | h  | l  | l  | (e) |
-// | g  | hd | f  | f  | (c) |
-// | h  | d  | l  | l  | (e) |
-// | h  | g  | b  | b  | (b) |
-// | h  | h  | b  | b  | (b) |
-// | h  | hd | f  | f  | (c) |
-// | hd | d  | l  | f  | (d) |
-// | hd | g  | f  | n  |(d/a)|
-// | hd | h  | f  | l  | (d) |
-// | hd | hd | b  | b  | (b) |
+// | F  | T  | Ph  | Pd  |  H  |
+// |----+----+-----+-----+-----+
+// | d  | d  | N   | N   | (c) |
+// | d  | g  | --  | --  | (a) |
+// | d  | h  | --  | --  | (e) |
+// | d  | hd | HD  | HD  | (b) |
+// | g  | d  | N   | N   | (c) |
+// | g  | g  | --  | --  | (a) |
+// | g  | h  | --  | --  | (e) |
+// | g  | hd | HD  | HD  | (b) |
+// | h  | d  | --  | --  | (e) |
+// | h  | g  | N   | N   | (c) |
+// | h  | h  | N   | N   | (c) |
+// | h  | hd | HD  | HD  | (b) |
+// | hd | d  | WS  | SS  | (d) |
+// | hd | g  | SS  | --  |(d/a)|
+// | hd | h  | SS  | WS  | (d) |
+// | hd | hd | HD  | HD  | (b) |
 
 Sema::CUDAFunctionPreference
 Sema::IdentifyCUDAPreference(const FunctionDecl *Caller,
                              const FunctionDecl *Callee) {
-  assert(getLangOpts().CUDATargetOverloads &&
-         "Should not be called w/o enabled target overloads.");
-
   assert(Callee && "Callee must be valid.");
   CUDAFunctionTarget CalleeTarget = IdentifyCUDATarget(Callee);
   CUDAFunctionTarget CallerTarget =
@@ -111,130 +111,62 @@ Sema::IdentifyCUDAPreference(const FunctionDecl *Caller,
        (CallerTarget == CFT_HostDevice && getLangOpts().CUDAIsDevice)))
     return CFP_Never;
 
-  // (b) Best case scenarios
+  // (b) Calling HostDevice is OK for everyone.
+  if (CalleeTarget == CFT_HostDevice)
+    return CFP_HostDevice;
+
+  // (c) Best case scenarios
   if (CalleeTarget == CallerTarget ||
       (CallerTarget == CFT_Host && CalleeTarget == CFT_Global) ||
       (CallerTarget == CFT_Global && CalleeTarget == CFT_Device))
-    return CFP_Best;
-
-  // (c) Calling HostDevice is OK as a fallback that works for everyone.
-  if (CalleeTarget == CFT_HostDevice)
-    return CFP_Fallback;
-
-  // Figure out what should be returned 'last resort' cases. Normally
-  // those would not be allowed, but we'll consider them if
-  // CUDADisableTargetCallChecks is true.
-  CUDAFunctionPreference QuestionableResult =
-      getLangOpts().CUDADisableTargetCallChecks ? CFP_LastResort : CFP_Never;
+    return CFP_Native;
 
   // (d) HostDevice behavior depends on compilation mode.
   if (CallerTarget == CFT_HostDevice) {
-    // Calling a function that matches compilation mode is OK.
-    // Calling a function from the other side is frowned upon.
-    if (getLangOpts().CUDAIsDevice)
-      return CalleeTarget == CFT_Device ? CFP_Fallback : QuestionableResult;
-    else
-      return (CalleeTarget == CFT_Host || CalleeTarget == CFT_Global)
-                 ? CFP_Fallback
-                 : QuestionableResult;
+    // It's OK to call a compilation-mode matching function from an HD one.
+    if ((getLangOpts().CUDAIsDevice && CalleeTarget == CFT_Device) ||
+        (!getLangOpts().CUDAIsDevice &&
+         (CalleeTarget == CFT_Host || CalleeTarget == CFT_Global)))
+      return CFP_SameSide;
+
+    // Calls from HD to non-mode-matching functions (i.e., to host functions
+    // when compiling in device mode or to device functions when compiling in
+    // host mode) are allowed at the sema level, but eventually rejected if
+    // they're ever codegened.  TODO: Reject said calls earlier.
+    return CFP_WrongSide;
   }
 
   // (e) Calling across device/host boundary is not something you should do.
   if ((CallerTarget == CFT_Host && CalleeTarget == CFT_Device) ||
       (CallerTarget == CFT_Device && CalleeTarget == CFT_Host) ||
       (CallerTarget == CFT_Global && CalleeTarget == CFT_Host))
-    return QuestionableResult;
+    return CFP_Never;
 
   llvm_unreachable("All cases should've been handled by now.");
 }
 
-bool Sema::CheckCUDATarget(const FunctionDecl *Caller,
-                           const FunctionDecl *Callee) {
-  // With target overloads enabled, we only disallow calling
-  // combinations with CFP_Never.
-  if (getLangOpts().CUDATargetOverloads)
-    return IdentifyCUDAPreference(Caller,Callee) == CFP_Never;
-
-  // The CUDADisableTargetCallChecks short-circuits this check: we assume all
-  // cross-target calls are valid.
-  if (getLangOpts().CUDADisableTargetCallChecks)
-    return false;
-
-  CUDAFunctionTarget CallerTarget = IdentifyCUDATarget(Caller),
-                     CalleeTarget = IdentifyCUDATarget(Callee);
-
-  // If one of the targets is invalid, the check always fails, no matter what
-  // the other target is.
-  if (CallerTarget == CFT_InvalidTarget || CalleeTarget == CFT_InvalidTarget)
-    return true;
-
-  // CUDA B.1.1 "The __device__ qualifier declares a function that is [...]
-  // Callable from the device only."
-  if (CallerTarget == CFT_Host && CalleeTarget == CFT_Device)
-    return true;
-
-  // CUDA B.1.2 "The __global__ qualifier declares a function that is [...]
-  // Callable from the host only."
-  // CUDA B.1.3 "The __host__ qualifier declares a function that is [...]
-  // Callable from the host only."
-  if ((CallerTarget == CFT_Device || CallerTarget == CFT_Global) &&
-      (CalleeTarget == CFT_Host || CalleeTarget == CFT_Global))
-    return true;
-
-  // CUDA B.1.3 "The __device__ and __host__ qualifiers can be used together
-  // however, in which case the function is compiled for both the host and the
-  // device. The __CUDA_ARCH__ macro [...] can be used to differentiate code
-  // paths between host and device."
-  if (CallerTarget == CFT_HostDevice && CalleeTarget != CFT_HostDevice) {
-    // If the caller is implicit then the check always passes.
-    if (Caller->isImplicit()) return false;
-
-    bool InDeviceMode = getLangOpts().CUDAIsDevice;
-    if (!InDeviceMode && CalleeTarget != CFT_Host)
-        return true;
-    if (InDeviceMode && CalleeTarget != CFT_Device) {
-      // Allow host device functions to call host functions if explicitly
-      // requested.
-      if (CalleeTarget == CFT_Host &&
-          getLangOpts().CUDAAllowHostCallsFromHostDevice) {
-        Diag(Caller->getLocation(),
-             diag::warn_host_calls_from_host_device)
-            << Callee->getNameAsString() << Caller->getNameAsString();
-        return false;
-      }
-
-      return true;
-    }
-  }
-
-  return false;
-}
-
-template <typename T, typename FetchDeclFn>
-static void EraseUnwantedCUDAMatchesImpl(Sema &S, const FunctionDecl *Caller,
-                                         llvm::SmallVectorImpl<T> &Matches,
-                                         FetchDeclFn FetchDecl) {
-  assert(S.getLangOpts().CUDATargetOverloads &&
-         "Should not be called w/o enabled target overloads.");
+template <typename T>
+static void EraseUnwantedCUDAMatchesImpl(
+    Sema &S, const FunctionDecl *Caller, llvm::SmallVectorImpl<T> &Matches,
+    std::function<const FunctionDecl *(const T &)> FetchDecl) {
   if (Matches.size() <= 1)
     return;
 
+  // Gets the CUDA function preference for a call from Caller to Match.
+  auto GetCFP = [&](const T &Match) {
+    return S.IdentifyCUDAPreference(Caller, FetchDecl(Match));
+  };
+
   // Find the best call preference among the functions in Matches.
-  Sema::CUDAFunctionPreference P, BestCFP = Sema::CFP_Never;
-  for (auto const &Match : Matches) {
-    P = S.IdentifyCUDAPreference(Caller, FetchDecl(Match));
-    if (P > BestCFP)
-      BestCFP = P;
-  }
+  Sema::CUDAFunctionPreference BestCFP = GetCFP(*std::max_element(
+      Matches.begin(), Matches.end(),
+      [&](const T &M1, const T &M2) { return GetCFP(M1) < GetCFP(M2); }));
 
   // Erase all functions with lower priority.
-  for (unsigned I = 0, N = Matches.size(); I != N;)
-    if (S.IdentifyCUDAPreference(Caller, FetchDecl(Matches[I])) < BestCFP) {
-      Matches[I] = Matches[--N];
-      Matches.resize(N);
-    } else {
-      ++I;
-    }
+  Matches.erase(
+      llvm::remove_if(Matches,
+                      [&](const T &Match) { return GetCFP(Match) < BestCFP; }),
+      Matches.end());
 }
 
 void Sema::EraseUnwantedCUDAMatches(const FunctionDecl *Caller,
@@ -273,12 +205,9 @@ static bool
 resolveCalleeCUDATargetConflict(Sema::CUDAFunctionTarget Target1,
                                 Sema::CUDAFunctionTarget Target2,
                                 Sema::CUDAFunctionTarget *ResolvedTarget) {
-  if (Target1 == Sema::CFT_Global && Target2 == Sema::CFT_Global) {
-    // TODO: this shouldn't happen, really. Methods cannot be marked __global__.
-    // Clang should detect this earlier and produce an error. Then this
-    // condition can be changed to an assertion.
-    return true;
-  }
+  // Only free functions and static member functions may be global.
+  assert(Target1 != Sema::CFT_Global);
+  assert(Target2 != Sema::CFT_Global);
 
   if (Target1 == Sema::CFT_HostDevice) {
     *ResolvedTarget = Target2;
@@ -422,3 +351,132 @@ bool Sema::inferCUDATargetForImplicitSpecialMember(CXXRecordDecl *ClassDecl,
 
   return false;
 }
+
+bool Sema::isEmptyCudaConstructor(SourceLocation Loc, CXXConstructorDecl *CD) {
+  if (!CD->isDefined() && CD->isTemplateInstantiation())
+    InstantiateFunctionDefinition(Loc, CD->getFirstDecl());
+
+  // (E.2.3.1, CUDA 7.5) A constructor for a class type is considered
+  // empty at a point in the translation unit, if it is either a
+  // trivial constructor
+  if (CD->isTrivial())
+    return true;
+
+  // ... or it satisfies all of the following conditions:
+  // The constructor function has been defined.
+  // The constructor function has no parameters,
+  // and the function body is an empty compound statement.
+  if (!(CD->hasTrivialBody() && CD->getNumParams() == 0))
+    return false;
+
+  // Its class has no virtual functions and no virtual base classes.
+  if (CD->getParent()->isDynamicClass())
+    return false;
+
+  // The only form of initializer allowed is an empty constructor.
+  // This will recursively check all base classes and member initializers
+  if (!llvm::all_of(CD->inits(), [&](const CXXCtorInitializer *CI) {
+        if (const CXXConstructExpr *CE =
+                dyn_cast<CXXConstructExpr>(CI->getInit()))
+          return isEmptyCudaConstructor(Loc, CE->getConstructor());
+        return false;
+      }))
+    return false;
+
+  return true;
+}
+
+bool Sema::isEmptyCudaDestructor(SourceLocation Loc, CXXDestructorDecl *DD) {
+  // No destructor -> no problem.
+  if (!DD)
+    return true;
+
+  if (!DD->isDefined() && DD->isTemplateInstantiation())
+    InstantiateFunctionDefinition(Loc, DD->getFirstDecl());
+
+  // (E.2.3.1, CUDA 7.5) A destructor for a class type is considered
+  // empty at a point in the translation unit, if it is either a
+  // trivial constructor
+  if (DD->isTrivial())
+    return true;
+
+  // ... or it satisfies all of the following conditions:
+  // The destructor function has been defined.
+  // and the function body is an empty compound statement.
+  if (!DD->hasTrivialBody())
+    return false;
+
+  const CXXRecordDecl *ClassDecl = DD->getParent();
+
+  // Its class has no virtual functions and no virtual base classes.
+  if (ClassDecl->isDynamicClass())
+    return false;
+
+  // Only empty destructors are allowed. This will recursively check
+  // destructors for all base classes...
+  if (!llvm::all_of(ClassDecl->bases(), [&](const CXXBaseSpecifier &BS) {
+        if (CXXRecordDecl *RD = BS.getType()->getAsCXXRecordDecl())
+          return isEmptyCudaDestructor(Loc, RD->getDestructor());
+        return true;
+      }))
+    return false;
+
+  // ... and member fields.
+  if (!llvm::all_of(ClassDecl->fields(), [&](const FieldDecl *Field) {
+        if (CXXRecordDecl *RD = Field->getType()
+                                    ->getBaseElementTypeUnsafe()
+                                    ->getAsCXXRecordDecl())
+          return isEmptyCudaDestructor(Loc, RD->getDestructor());
+        return true;
+      }))
+    return false;
+
+  return true;
+}
+
+// With -fcuda-host-device-constexpr, an unattributed constexpr function is
+// treated as implicitly __host__ __device__, unless:
+//  * it is a variadic function (device-side variadic functions are not
+//    allowed), or
+//  * a __device__ function with this signature was already declared, in which
+//    case in which case we output an error, unless the __device__ decl is in a
+//    system header, in which case we leave the constexpr function unattributed.
+void Sema::maybeAddCUDAHostDeviceAttrs(Scope *S, FunctionDecl *NewD,
+                                       const LookupResult &Previous) {
+  assert(getLangOpts().CUDA && "May be called only for CUDA compilations.");
+  if (!getLangOpts().CUDAHostDeviceConstexpr || !NewD->isConstexpr() ||
+      NewD->isVariadic() || NewD->hasAttr<CUDAHostAttr>() ||
+      NewD->hasAttr<CUDADeviceAttr>() || NewD->hasAttr<CUDAGlobalAttr>())
+    return;
+
+  // Is D a __device__ function with the same signature as NewD, ignoring CUDA
+  // attributes?
+  auto IsMatchingDeviceFn = [&](NamedDecl *D) {
+    if (UsingShadowDecl *Using = dyn_cast<UsingShadowDecl>(D))
+      D = Using->getTargetDecl();
+    FunctionDecl *OldD = D->getAsFunction();
+    return OldD && OldD->hasAttr<CUDADeviceAttr>() &&
+           !OldD->hasAttr<CUDAHostAttr>() &&
+           !IsOverload(NewD, OldD, /* UseMemberUsingDeclRules = */ false,
+                       /* ConsiderCudaAttrs = */ false);
+  };
+  auto It = llvm::find_if(Previous, IsMatchingDeviceFn);
+  if (It != Previous.end()) {
+    // We found a __device__ function with the same name and signature as NewD
+    // (ignoring CUDA attrs).  This is an error unless that function is defined
+    // in a system header, in which case we simply return without making NewD
+    // host+device.
+    NamedDecl *Match = *It;
+    if (!getSourceManager().isInSystemHeader(Match->getLocation())) {
+      Diag(NewD->getLocation(),
+           diag::err_cuda_unattributed_constexpr_cannot_overload_device)
+          << NewD->getName();
+      Diag(Match->getLocation(),
+           diag::note_cuda_conflicting_device_function_declared_here);
+    }
+    return;
+  }
+
+  NewD->addAttr(CUDAHostAttr::CreateImplicit(Context));
+  NewD->addAttr(CUDADeviceAttr::CreateImplicit(Context));
+}
diff --git a/contrib/llvm/tools/clang/lib/Sema/SemaCXXScopeSpec.cpp b/contrib/llvm/tools/clang/lib/Sema/SemaCXXScopeSpec.cpp
index f7aace625a92..949263d24897 100644
--- a/contrib/llvm/tools/clang/lib/Sema/SemaCXXScopeSpec.cpp
+++ b/contrib/llvm/tools/clang/lib/Sema/SemaCXXScopeSpec.cpp
@@ -117,8 +117,18 @@ DeclContext *Sema::computeDeclContext(const CXXScopeSpec &SS,
           // specializations, we're entering into the definition of that
           // class template partial specialization.
           if (ClassTemplatePartialSpecializationDecl *PartialSpec
-                = ClassTemplate->findPartialSpecialization(ContextType))
+                = ClassTemplate->findPartialSpecialization(ContextType)) {
+            // A declaration of the partial specialization must be visible.
+            // We can always recover here, because this only happens when we're
+            // entering the context, and that can't happen in a SFINAE context.
+            assert(!isSFINAEContext() &&
+                   "partial specialization scope specifier in SFINAE context?");
+            if (!hasVisibleDeclaration(PartialSpec))
+              diagnoseMissingImport(SS.getLastQualifierNameLoc(), PartialSpec,
+                                    MissingImportKind::PartialSpecialization,
+                                    /*Recover*/true);
             return PartialSpec;
+          }
         }
       } else if (const RecordType *RecordT = NNSType->getAs<RecordType>()) {
         // The nested name specifier refers to a member of a class template.
@@ -195,6 +205,8 @@ bool Sema::RequireCompleteDeclContext(CXXScopeSpec &SS,
   TagDecl *tag = dyn_cast<TagDecl>(DC);
 
   // If this is a dependent type, then we consider it complete.
+  // FIXME: This is wrong; we should require a (visible) definition to
+  // exist in this case too.
   if (!tag || tag->isDependentContext())
     return false;
 
@@ -218,10 +230,23 @@ bool Sema::RequireCompleteDeclContext(CXXScopeSpec &SS,
   // Fixed enum types are complete, but they aren't valid as scopes
   // until we see a definition, so awkwardly pull out this special
   // case.
-  // FIXME: The definition might not be visible; complain if it is not.
   const EnumType *enumType = dyn_cast_or_null<EnumType>(tagType);
-  if (!enumType || enumType->getDecl()->isCompleteDefinition())
+  if (!enumType)
     return false;
+  if (enumType->getDecl()->isCompleteDefinition()) {
+    // If we know about the definition but it is not visible, complain.
+    NamedDecl *SuggestedDef = nullptr;
+    if (!hasVisibleDefinition(enumType->getDecl(), &SuggestedDef,
+                              /*OnlyNeedComplete*/false)) {
+      // If the user is going to see an error here, recover by making the
+      // definition visible.
+      bool TreatAsComplete = !isSFINAEContext();
+      diagnoseMissingImport(loc, SuggestedDef, MissingImportKind::Definition,
+                            /*Recover*/TreatAsComplete);
+      return !TreatAsComplete;
+    }
+    return false;
+  }
 
   // Try to instantiate the definition, if this is a specialization of an
   // enumeration temploid.
@@ -606,6 +631,10 @@ bool Sema::BuildCXXNestedNameSpecifier(Scope *S,
         diagnoseTypo(Corrected, PDiag(diag::err_undeclared_var_use_suggest)
                                   << Name);
 
+      if (Corrected.getCorrectionSpecifier())
+        SS.MakeTrivial(Context, Corrected.getCorrectionSpecifier(),
+                       SourceRange(Found.getNameLoc()));
+
       if (NamedDecl *ND = Corrected.getFoundDecl())
         Found.addDecl(ND);
       Found.setLookupName(Corrected.getCorrection());
@@ -777,7 +806,7 @@ bool Sema::BuildCXXNestedNameSpecifier(Scope *S,
   if (!Found.empty()) {
     if (TypeDecl *TD = Found.getAsSingle<TypeDecl>())
       Diag(IdentifierLoc, diag::err_expected_class_or_namespace)
-          << QualType(TD->getTypeForDecl(), 0) << getLangOpts().CPlusPlus;
+          << Context.getTypeDeclType(TD) << getLangOpts().CPlusPlus;
     else {
       Diag(IdentifierLoc, diag::err_expected_class_or_namespace)
           << &Identifier << getLangOpts().CPlusPlus;
diff --git a/contrib/llvm/tools/clang/lib/Sema/SemaCast.cpp b/contrib/llvm/tools/clang/lib/Sema/SemaCast.cpp
index ad1d7da4d070..e83dd0716780 100644
--- a/contrib/llvm/tools/clang/lib/Sema/SemaCast.cpp
+++ b/contrib/llvm/tools/clang/lib/Sema/SemaCast.cpp
@@ -22,6 +22,7 @@
 #include "clang/AST/RecordLayout.h"
 #include "clang/Basic/PartialDiagnostic.h"
 #include "clang/Basic/TargetInfo.h"
+#include "clang/Lex/Preprocessor.h"
 #include "clang/Sema/Initialization.h"
 #include "llvm/ADT/SmallVector.h"
 #include <set>
@@ -640,8 +641,8 @@ void CastOperation::CheckDynamicCast() {
     // If we're dynamic_casting from a prvalue to an rvalue reference, we need
     // to materialize the prvalue before we bind the reference to it.
     if (SrcExpr.get()->isRValue())
-      SrcExpr = new (Self.Context) MaterializeTemporaryExpr(
-          SrcType, SrcExpr.get(), /*IsLValueReference*/false);
+      SrcExpr = Self.CreateMaterializeTemporaryExpr(
+          SrcType, SrcExpr.get(), /*IsLValueReference*/ false);
     SrcPointee = SrcType;
   }
 
@@ -1313,16 +1314,13 @@ TryStaticDowncast(Sema &Self, CanQualType SrcType, CanQualType DestType,
     }
     std::string PathDisplayStr;
     std::set<unsigned> DisplayedPaths;
-    for (CXXBasePaths::paths_iterator PI = Paths.begin(), PE = Paths.end();
-         PI != PE; ++PI) {
-      if (DisplayedPaths.insert(PI->back().SubobjectNumber).second) {
+    for (clang::CXXBasePath &Path : Paths) {
+      if (DisplayedPaths.insert(Path.back().SubobjectNumber).second) {
         // We haven't displayed a path to this particular base
         // class subobject yet.
         PathDisplayStr += "\n    ";
-        for (CXXBasePath::const_reverse_iterator EI = PI->rbegin(),
-                                                 EE = PI->rend();
-             EI != EE; ++EI)
-          PathDisplayStr += EI->Base->getType().getAsString() + " -> ";
+        for (CXXBasePathElement &PE : llvm::reverse(Path))
+          PathDisplayStr += PE.Base->getType().getAsString() + " -> ";
         PathDisplayStr += QualType(DestType).getAsString();
       }
     }
@@ -1402,8 +1400,10 @@ TryStaticMemberPointerUpcast(Sema &Self, ExprResult &SrcExpr, QualType SrcType,
 
   // Lock down the inheritance model right now in MS ABI, whether or not the
   // pointee types are the same.
-  if (Self.Context.getTargetInfo().getCXXABI().isMicrosoft())
+  if (Self.Context.getTargetInfo().getCXXABI().isMicrosoft()) {
     (void)Self.isCompleteType(OpRange.getBegin(), SrcType);
+    (void)Self.isCompleteType(OpRange.getBegin(), DestType);
+  }
 
   // T == T, modulo cv
   if (!Self.Context.hasSameUnqualifiedType(SrcMemPtr->getPointeeType(),
@@ -1646,8 +1646,8 @@ static TryCastResult TryConstCast(Sema &Self, ExprResult &SrcExpr,
   if (NeedToMaterializeTemporary)
     // This is a const_cast from a class prvalue to an rvalue reference type.
     // Materialize a temporary to store the result of the conversion.
-    SrcExpr = new (Self.Context) MaterializeTemporaryExpr(
-        SrcType, SrcExpr.get(), /*IsLValueReference*/ false);
+    SrcExpr = Self.CreateMaterializeTemporaryExpr(SrcType, SrcExpr.get(),
+                                                  /*IsLValueReference*/ false);
 
   return TC_Success;
 }
@@ -1724,6 +1724,97 @@ static void DiagnoseCastOfObjCSEL(Sema &Self, const ExprResult &SrcExpr,
     }
 }
 
+/// Diagnose casts that change the calling convention of a pointer to a function
+/// defined in the current TU.
+static void DiagnoseCallingConvCast(Sema &Self, const ExprResult &SrcExpr,
+                                    QualType DstType, SourceRange OpRange) {
+  // Check if this cast would change the calling convention of a function
+  // pointer type.
+  QualType SrcType = SrcExpr.get()->getType();
+  if (Self.Context.hasSameType(SrcType, DstType) ||
+      !SrcType->isFunctionPointerType() || !DstType->isFunctionPointerType())
+    return;
+  const auto *SrcFTy =
+      SrcType->castAs<PointerType>()->getPointeeType()->castAs<FunctionType>();
+  const auto *DstFTy =
+      DstType->castAs<PointerType>()->getPointeeType()->castAs<FunctionType>();
+  CallingConv SrcCC = SrcFTy->getCallConv();
+  CallingConv DstCC = DstFTy->getCallConv();
+  if (SrcCC == DstCC)
+    return;
+
+  // We have a calling convention cast. Check if the source is a pointer to a
+  // known, specific function that has already been defined.
+  Expr *Src = SrcExpr.get()->IgnoreParenImpCasts();
+  if (auto *UO = dyn_cast<UnaryOperator>(Src))
+    if (UO->getOpcode() == UO_AddrOf)
+      Src = UO->getSubExpr()->IgnoreParenImpCasts();
+  auto *DRE = dyn_cast<DeclRefExpr>(Src);
+  if (!DRE)
+    return;
+  auto *FD = dyn_cast<FunctionDecl>(DRE->getDecl());
+  const FunctionDecl *Definition;
+  if (!FD || !FD->hasBody(Definition))
+    return;
+
+  // Only warn if we are casting from the default convention to a non-default
+  // convention. This can happen when the programmer forgot to apply the calling
+  // convention to the function definition and then inserted this cast to
+  // satisfy the type system.
+  CallingConv DefaultCC = Self.getASTContext().getDefaultCallingConvention(
+      FD->isVariadic(), FD->isCXXInstanceMember());
+  if (DstCC == DefaultCC || SrcCC != DefaultCC)
+    return;
+
+  // Diagnose this cast, as it is probably bad.
+  StringRef SrcCCName = FunctionType::getNameForCallConv(SrcCC);
+  StringRef DstCCName = FunctionType::getNameForCallConv(DstCC);
+  Self.Diag(OpRange.getBegin(), diag::warn_cast_calling_conv)
+      << SrcCCName << DstCCName << OpRange;
+
+  // The checks above are cheaper than checking if the diagnostic is enabled.
+  // However, it's worth checking if the warning is enabled before we construct
+  // a fixit.
+  if (Self.Diags.isIgnored(diag::warn_cast_calling_conv, OpRange.getBegin()))
+    return;
+
+  // Try to suggest a fixit to change the calling convention of the function
+  // whose address was taken. Try to use the latest macro for the convention.
+  // For example, users probably want to write "WINAPI" instead of "__stdcall"
+  // to match the Windows header declarations.
+  SourceLocation NameLoc = Definition->getNameInfo().getLoc();
+  Preprocessor &PP = Self.getPreprocessor();
+  SmallVector<TokenValue, 6> AttrTokens;
+  SmallString<64> CCAttrText;
+  llvm::raw_svector_ostream OS(CCAttrText);
+  if (Self.getLangOpts().MicrosoftExt) {
+    // __stdcall or __vectorcall
+    OS << "__" << DstCCName;
+    IdentifierInfo *II = PP.getIdentifierInfo(OS.str());
+    AttrTokens.push_back(II->isKeyword(Self.getLangOpts())
+                             ? TokenValue(II->getTokenID())
+                             : TokenValue(II));
+  } else {
+    // __attribute__((stdcall)) or __attribute__((vectorcall))
+    OS << "__attribute__((" << DstCCName << "))";
+    AttrTokens.push_back(tok::kw___attribute);
+    AttrTokens.push_back(tok::l_paren);
+    AttrTokens.push_back(tok::l_paren);
+    IdentifierInfo *II = PP.getIdentifierInfo(DstCCName);
+    AttrTokens.push_back(II->isKeyword(Self.getLangOpts())
+                             ? TokenValue(II->getTokenID())
+                             : TokenValue(II));
+    AttrTokens.push_back(tok::r_paren);
+    AttrTokens.push_back(tok::r_paren);
+  }
+  StringRef AttrSpelling = PP.getLastMacroWithSpelling(NameLoc, AttrTokens);
+  if (!AttrSpelling.empty())
+    CCAttrText = AttrSpelling;
+  OS << ' ';
+  Self.Diag(NameLoc, diag::note_change_calling_conv_fixit)
+      << FD << DstCCName << FixItHint::CreateInsertion(NameLoc, CCAttrText);
+}
+
 static void checkIntToPointerCast(bool CStyle, SourceLocation Loc,
                                   const Expr *SrcExpr, QualType DestType,
                                   Sema &Self) {
@@ -1750,6 +1841,32 @@ static void checkIntToPointerCast(bool CStyle, SourceLocation Loc,
   }
 }
 
+static bool fixOverloadedReinterpretCastExpr(Sema &Self, QualType DestType,
+                                             ExprResult &Result) {
+  // We can only fix an overloaded reinterpret_cast if
+  // - it is a template with explicit arguments that resolves to an lvalue
+  //   unambiguously, or
+  // - it is the only function in an overload set that may have its address
+  //   taken.
+
+  Expr *E = Result.get();
+  // TODO: what if this fails because of DiagnoseUseOfDecl or something
+  // like it?
+  if (Self.ResolveAndFixSingleFunctionTemplateSpecialization(
+          Result,
+          Expr::getValueKindForType(DestType) == VK_RValue // Convert Fun to Ptr
+          ) &&
+      Result.isUsable())
+    return true;
+
+  // No guarantees that ResolveAndFixSingleFunctionTemplateSpecialization
+  // preserves Result.
+  Result = E;
+  if (!Self.resolveAndFixAddressOfOnlyViableOverloadCandidate(Result))
+    return false;
+  return Result.isUsable();
+}
+
 static TryCastResult TryReinterpretCast(Sema &Self, ExprResult &SrcExpr,
                                         QualType DestType, bool CStyle,
                                         SourceRange OpRange,
@@ -1761,21 +1878,15 @@ static TryCastResult TryReinterpretCast(Sema &Self, ExprResult &SrcExpr,
   QualType SrcType = SrcExpr.get()->getType();
 
   // Is the source an overloaded name? (i.e. &foo)
-  // If so, reinterpret_cast can not help us here (13.4, p1, bullet 5) ...
+  // If so, reinterpret_cast generally can not help us here (13.4, p1, bullet 5)
   if (SrcType == Self.Context.OverloadTy) {
-    // ... unless foo<int> resolves to an lvalue unambiguously.
-    // TODO: what if this fails because of DiagnoseUseOfDecl or something
-    // like it?
-    ExprResult SingleFunctionExpr = SrcExpr;
-    if (Self.ResolveAndFixSingleFunctionTemplateSpecialization(
-          SingleFunctionExpr,
-          Expr::getValueKindForType(DestType) == VK_RValue // Convert Fun to Ptr 
-        ) && SingleFunctionExpr.isUsable()) {
-      SrcExpr = SingleFunctionExpr;
-      SrcType = SrcExpr.get()->getType();
-    } else {
+    ExprResult FixedExpr = SrcExpr;
+    if (!fixOverloadedReinterpretCastExpr(Self, DestType, FixedExpr))
       return TC_NotApplicable;
-    }
+
+    assert(FixedExpr.isUsable() && "Invalid result fixing overloaded expr");
+    SrcExpr = FixedExpr;
+    SrcType = SrcExpr.get()->getType();
   }
 
   if (const ReferenceType *DestTypeTmp = DestType->getAs<ReferenceType>()) {
@@ -2008,7 +2119,9 @@ static TryCastResult TryReinterpretCast(Sema &Self, ExprResult &SrcExpr,
   }
   if (CStyle)
     DiagnoseCastOfObjCSEL(Self, SrcExpr, DestType);
-  
+
+  DiagnoseCallingConvCast(Self, SrcExpr, DestType, OpRange);
+
   // Not casting away constness, so the only remaining check is for compatible
   // pointer categories.
 
@@ -2313,6 +2426,22 @@ void CastOperation::CheckCStyleCast() {
       return;
     }
 
+    // OpenCL v2.0 s6.13.10 - Allow casts from '0' to event_t type.
+    if (Self.getLangOpts().OpenCL && DestType->isEventT()) {
+      llvm::APSInt CastInt;
+      if (SrcExpr.get()->EvaluateAsInt(CastInt, Self.Context)) {
+        if (0 == CastInt) {
+          Kind = CK_ZeroToOCLEvent;
+          return;
+        }
+        Self.Diag(OpRange.getBegin(),
+                  diag::error_opencl_cast_non_zero_to_event_t)
+                  << CastInt.toString(10) << SrcExpr.get()->getSourceRange();
+        SrcExpr = ExprError();
+        return;
+      }
+    }
+
     // Reject any other conversions to non-scalar types.
     Self.Diag(OpRange.getBegin(), diag::err_typecheck_cond_expect_scalar)
       << DestType << SrcExpr.get()->getSourceRange();
@@ -2427,6 +2556,7 @@ void CastOperation::CheckCStyleCast() {
   }
   
   DiagnoseCastOfObjCSEL(Self, SrcExpr, DestType);
+  DiagnoseCallingConvCast(Self, SrcExpr, DestType, OpRange);
   DiagnoseBadFunctionCast(Self, SrcExpr, DestType);
   Kind = Self.PrepareScalarCast(SrcExpr, DestType);
   if (SrcExpr.isInvalid())
diff --git a/contrib/llvm/tools/clang/lib/Sema/SemaChecking.cpp b/contrib/llvm/tools/clang/lib/Sema/SemaChecking.cpp
index 6c2834b750ae..ef04d60f8d3d 100644
--- a/contrib/llvm/tools/clang/lib/Sema/SemaChecking.cpp
+++ b/contrib/llvm/tools/clang/lib/Sema/SemaChecking.cpp
@@ -36,9 +36,12 @@
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallBitVector.h"
 #include "llvm/ADT/SmallString.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/Locale.h"
 #include "llvm/Support/ConvertUTF.h"
 #include "llvm/Support/raw_ostream.h"
 #include <limits>
+
 using namespace clang;
 using namespace sema;
 
@@ -258,6 +261,459 @@ static bool SemaBuiltinSEHScopeCheck(Sema &SemaRef, CallExpr *TheCall,
   return false;
 }
 
+static inline bool isBlockPointer(Expr *Arg) {
+  return Arg->getType()->isBlockPointerType();
+}
+
+/// OpenCL C v2.0, s6.13.17.2 - Checks that the block parameters are all local
+/// void*, which is a requirement of device side enqueue.
+static bool checkOpenCLBlockArgs(Sema &S, Expr *BlockArg) {
+  const BlockPointerType *BPT =
+      cast<BlockPointerType>(BlockArg->getType().getCanonicalType());
+  ArrayRef<QualType> Params =
+      BPT->getPointeeType()->getAs<FunctionProtoType>()->getParamTypes();
+  unsigned ArgCounter = 0;
+  bool IllegalParams = false;
+  // Iterate through the block parameters until either one is found that is not
+  // a local void*, or the block is valid.
+  for (ArrayRef<QualType>::iterator I = Params.begin(), E = Params.end();
+       I != E; ++I, ++ArgCounter) {
+    if (!(*I)->isPointerType() || !(*I)->getPointeeType()->isVoidType() ||
+        (*I)->getPointeeType().getQualifiers().getAddressSpace() !=
+            LangAS::opencl_local) {
+      // Get the location of the error. If a block literal has been passed
+      // (BlockExpr) then we can point straight to the offending argument,
+      // else we just point to the variable reference.
+      SourceLocation ErrorLoc;
+      if (isa<BlockExpr>(BlockArg)) {
+        BlockDecl *BD = cast<BlockExpr>(BlockArg)->getBlockDecl();
+        ErrorLoc = BD->getParamDecl(ArgCounter)->getLocStart();
+      } else if (isa<DeclRefExpr>(BlockArg)) {
+        ErrorLoc = cast<DeclRefExpr>(BlockArg)->getLocStart();
+      }
+      S.Diag(ErrorLoc,
+             diag::err_opencl_enqueue_kernel_blocks_non_local_void_args);
+      IllegalParams = true;
+    }
+  }
+
+  return IllegalParams;
+}
+
+/// OpenCL C v2.0, s6.13.17.6 - Check the argument to the
+/// get_kernel_work_group_size
+/// and get_kernel_preferred_work_group_size_multiple builtin functions.
+static bool SemaOpenCLBuiltinKernelWorkGroupSize(Sema &S, CallExpr *TheCall) {
+  if (checkArgCount(S, TheCall, 1))
+    return true;
+
+  Expr *BlockArg = TheCall->getArg(0);
+  if (!isBlockPointer(BlockArg)) {
+    S.Diag(BlockArg->getLocStart(),
+           diag::err_opencl_enqueue_kernel_expected_type) << "block";
+    return true;
+  }
+  return checkOpenCLBlockArgs(S, BlockArg);
+}
+
+static bool checkOpenCLEnqueueLocalSizeArgs(Sema &S, CallExpr *TheCall,
+                                            unsigned Start, unsigned End);
+
+/// OpenCL v2.0, s6.13.17.1 - Check that sizes are provided for all
+/// 'local void*' parameter of passed block.
+static bool checkOpenCLEnqueueVariadicArgs(Sema &S, CallExpr *TheCall,
+                                           Expr *BlockArg,
+                                           unsigned NumNonVarArgs) {
+  const BlockPointerType *BPT =
+      cast<BlockPointerType>(BlockArg->getType().getCanonicalType());
+  unsigned NumBlockParams =
+      BPT->getPointeeType()->getAs<FunctionProtoType>()->getNumParams();
+  unsigned TotalNumArgs = TheCall->getNumArgs();
+
+  // For each argument passed to the block, a corresponding uint needs to
+  // be passed to describe the size of the local memory.
+  if (TotalNumArgs != NumBlockParams + NumNonVarArgs) {
+    S.Diag(TheCall->getLocStart(),
+           diag::err_opencl_enqueue_kernel_local_size_args);
+    return true;
+  }
+
+  // Check that the sizes of the local memory are specified by integers.
+  return checkOpenCLEnqueueLocalSizeArgs(S, TheCall, NumNonVarArgs,
+                                         TotalNumArgs - 1);
+}
+
+/// OpenCL C v2.0, s6.13.17 - Enqueue kernel function contains four different
+/// overload formats specified in Table 6.13.17.1.
+/// int enqueue_kernel(queue_t queue,
+///                    kernel_enqueue_flags_t flags,
+///                    const ndrange_t ndrange,
+///                    void (^block)(void))
+/// int enqueue_kernel(queue_t queue,
+///                    kernel_enqueue_flags_t flags,
+///                    const ndrange_t ndrange,
+///                    uint num_events_in_wait_list,
+///                    clk_event_t *event_wait_list,
+///                    clk_event_t *event_ret,
+///                    void (^block)(void))
+/// int enqueue_kernel(queue_t queue,
+///                    kernel_enqueue_flags_t flags,
+///                    const ndrange_t ndrange,
+///                    void (^block)(local void*, ...),
+///                    uint size0, ...)
+/// int enqueue_kernel(queue_t queue,
+///                    kernel_enqueue_flags_t flags,
+///                    const ndrange_t ndrange,
+///                    uint num_events_in_wait_list,
+///                    clk_event_t *event_wait_list,
+///                    clk_event_t *event_ret,
+///                    void (^block)(local void*, ...),
+///                    uint size0, ...)
+static bool SemaOpenCLBuiltinEnqueueKernel(Sema &S, CallExpr *TheCall) {
+  unsigned NumArgs = TheCall->getNumArgs();
+
+  if (NumArgs < 4) {
+    S.Diag(TheCall->getLocStart(), diag::err_typecheck_call_too_few_args);
+    return true;
+  }
+
+  Expr *Arg0 = TheCall->getArg(0);
+  Expr *Arg1 = TheCall->getArg(1);
+  Expr *Arg2 = TheCall->getArg(2);
+  Expr *Arg3 = TheCall->getArg(3);
+
+  // First argument always needs to be a queue_t type.
+  if (!Arg0->getType()->isQueueT()) {
+    S.Diag(TheCall->getArg(0)->getLocStart(),
+           diag::err_opencl_enqueue_kernel_expected_type)
+        << S.Context.OCLQueueTy;
+    return true;
+  }
+
+  // Second argument always needs to be a kernel_enqueue_flags_t enum value.
+  if (!Arg1->getType()->isIntegerType()) {
+    S.Diag(TheCall->getArg(1)->getLocStart(),
+           diag::err_opencl_enqueue_kernel_expected_type)
+        << "'kernel_enqueue_flags_t' (i.e. uint)";
+    return true;
+  }
+
+  // Third argument is always an ndrange_t type.
+  if (!Arg2->getType()->isNDRangeT()) {
+    S.Diag(TheCall->getArg(2)->getLocStart(),
+           diag::err_opencl_enqueue_kernel_expected_type)
+        << S.Context.OCLNDRangeTy;
+    return true;
+  }
+
+  // With four arguments, there is only one form that the function could be
+  // called in: no events and no variable arguments.
+  if (NumArgs == 4) {
+    // check that the last argument is the right block type.
+    if (!isBlockPointer(Arg3)) {
+      S.Diag(Arg3->getLocStart(), diag::err_opencl_enqueue_kernel_expected_type)
+          << "block";
+      return true;
+    }
+    // we have a block type, check the prototype
+    const BlockPointerType *BPT =
+        cast<BlockPointerType>(Arg3->getType().getCanonicalType());
+    if (BPT->getPointeeType()->getAs<FunctionProtoType>()->getNumParams() > 0) {
+      S.Diag(Arg3->getLocStart(),
+             diag::err_opencl_enqueue_kernel_blocks_no_args);
+      return true;
+    }
+    return false;
+  }
+  // we can have block + varargs.
+  if (isBlockPointer(Arg3))
+    return (checkOpenCLBlockArgs(S, Arg3) ||
+            checkOpenCLEnqueueVariadicArgs(S, TheCall, Arg3, 4));
+  // last two cases with either exactly 7 args or 7 args and varargs.
+  if (NumArgs >= 7) {
+    // check common block argument.
+    Expr *Arg6 = TheCall->getArg(6);
+    if (!isBlockPointer(Arg6)) {
+      S.Diag(Arg6->getLocStart(), diag::err_opencl_enqueue_kernel_expected_type)
+          << "block";
+      return true;
+    }
+    if (checkOpenCLBlockArgs(S, Arg6))
+      return true;
+
+    // Forth argument has to be any integer type.
+    if (!Arg3->getType()->isIntegerType()) {
+      S.Diag(TheCall->getArg(3)->getLocStart(),
+             diag::err_opencl_enqueue_kernel_expected_type)
+          << "integer";
+      return true;
+    }
+    // check remaining common arguments.
+    Expr *Arg4 = TheCall->getArg(4);
+    Expr *Arg5 = TheCall->getArg(5);
+
+    // Fith argument is always passed as pointers to clk_event_t.
+    if (!Arg4->getType()->getPointeeOrArrayElementType()->isClkEventT()) {
+      S.Diag(TheCall->getArg(4)->getLocStart(),
+             diag::err_opencl_enqueue_kernel_expected_type)
+          << S.Context.getPointerType(S.Context.OCLClkEventTy);
+      return true;
+    }
+
+    // Sixth argument is always passed as pointers to clk_event_t.
+    if (!(Arg5->getType()->isPointerType() &&
+          Arg5->getType()->getPointeeType()->isClkEventT())) {
+      S.Diag(TheCall->getArg(5)->getLocStart(),
+             diag::err_opencl_enqueue_kernel_expected_type)
+          << S.Context.getPointerType(S.Context.OCLClkEventTy);
+      return true;
+    }
+
+    if (NumArgs == 7)
+      return false;
+
+    return checkOpenCLEnqueueVariadicArgs(S, TheCall, Arg6, 7);
+  }
+
+  // None of the specific case has been detected, give generic error
+  S.Diag(TheCall->getLocStart(),
+         diag::err_opencl_enqueue_kernel_incorrect_args);
+  return true;
+}
+
+/// Returns OpenCL access qual.
+static OpenCLAccessAttr *getOpenCLArgAccess(const Decl *D) {
+    return D->getAttr<OpenCLAccessAttr>();
+}
+
+/// Returns true if pipe element type is different from the pointer.
+static bool checkOpenCLPipeArg(Sema &S, CallExpr *Call) {
+  const Expr *Arg0 = Call->getArg(0);
+  // First argument type should always be pipe.
+  if (!Arg0->getType()->isPipeType()) {
+    S.Diag(Call->getLocStart(), diag::err_opencl_builtin_pipe_first_arg)
+        << Call->getDirectCallee() << Arg0->getSourceRange();
+    return true;
+  }
+  OpenCLAccessAttr *AccessQual =
+      getOpenCLArgAccess(cast<DeclRefExpr>(Arg0)->getDecl());
+  // Validates the access qualifier is compatible with the call.
+  // OpenCL v2.0 s6.13.16 - The access qualifiers for pipe should only be
+  // read_only and write_only, and assumed to be read_only if no qualifier is
+  // specified.
+  switch (Call->getDirectCallee()->getBuiltinID()) {
+  case Builtin::BIread_pipe:
+  case Builtin::BIreserve_read_pipe:
+  case Builtin::BIcommit_read_pipe:
+  case Builtin::BIwork_group_reserve_read_pipe:
+  case Builtin::BIsub_group_reserve_read_pipe:
+  case Builtin::BIwork_group_commit_read_pipe:
+  case Builtin::BIsub_group_commit_read_pipe:
+    if (!(!AccessQual || AccessQual->isReadOnly())) {
+      S.Diag(Arg0->getLocStart(),
+             diag::err_opencl_builtin_pipe_invalid_access_modifier)
+          << "read_only" << Arg0->getSourceRange();
+      return true;
+    }
+    break;
+  case Builtin::BIwrite_pipe:
+  case Builtin::BIreserve_write_pipe:
+  case Builtin::BIcommit_write_pipe:
+  case Builtin::BIwork_group_reserve_write_pipe:
+  case Builtin::BIsub_group_reserve_write_pipe:
+  case Builtin::BIwork_group_commit_write_pipe:
+  case Builtin::BIsub_group_commit_write_pipe:
+    if (!(AccessQual && AccessQual->isWriteOnly())) {
+      S.Diag(Arg0->getLocStart(),
+             diag::err_opencl_builtin_pipe_invalid_access_modifier)
+          << "write_only" << Arg0->getSourceRange();
+      return true;
+    }
+    break;
+  default:
+    break;
+  }
+  return false;
+}
+
+/// Returns true if pipe element type is different from the pointer.
+static bool checkOpenCLPipePacketType(Sema &S, CallExpr *Call, unsigned Idx) {
+  const Expr *Arg0 = Call->getArg(0);
+  const Expr *ArgIdx = Call->getArg(Idx);
+  const PipeType *PipeTy = cast<PipeType>(Arg0->getType());
+  const QualType EltTy = PipeTy->getElementType();
+  const PointerType *ArgTy = ArgIdx->getType()->getAs<PointerType>();
+  // The Idx argument should be a pointer and the type of the pointer and
+  // the type of pipe element should also be the same.
+  if (!ArgTy ||
+      !S.Context.hasSameType(
+          EltTy, ArgTy->getPointeeType()->getCanonicalTypeInternal())) {
+    S.Diag(Call->getLocStart(), diag::err_opencl_builtin_pipe_invalid_arg)
+        << Call->getDirectCallee() << S.Context.getPointerType(EltTy)
+        << ArgIdx->getType() << ArgIdx->getSourceRange();
+    return true;
+  }
+  return false;
+}
+
+// \brief Performs semantic analysis for the read/write_pipe call.
+// \param S Reference to the semantic analyzer.
+// \param Call A pointer to the builtin call.
+// \return True if a semantic error has been found, false otherwise.
+static bool SemaBuiltinRWPipe(Sema &S, CallExpr *Call) {
+  // OpenCL v2.0 s6.13.16.2 - The built-in read/write
+  // functions have two forms.
+  switch (Call->getNumArgs()) {
+  case 2: {
+    if (checkOpenCLPipeArg(S, Call))
+      return true;
+    // The call with 2 arguments should be
+    // read/write_pipe(pipe T, T*).
+    // Check packet type T.
+    if (checkOpenCLPipePacketType(S, Call, 1))
+      return true;
+  } break;
+
+  case 4: {
+    if (checkOpenCLPipeArg(S, Call))
+      return true;
+    // The call with 4 arguments should be
+    // read/write_pipe(pipe T, reserve_id_t, uint, T*).
+    // Check reserve_id_t.
+    if (!Call->getArg(1)->getType()->isReserveIDT()) {
+      S.Diag(Call->getLocStart(), diag::err_opencl_builtin_pipe_invalid_arg)
+          << Call->getDirectCallee() << S.Context.OCLReserveIDTy
+          << Call->getArg(1)->getType() << Call->getArg(1)->getSourceRange();
+      return true;
+    }
+
+    // Check the index.
+    const Expr *Arg2 = Call->getArg(2);
+    if (!Arg2->getType()->isIntegerType() &&
+        !Arg2->getType()->isUnsignedIntegerType()) {
+      S.Diag(Call->getLocStart(), diag::err_opencl_builtin_pipe_invalid_arg)
+          << Call->getDirectCallee() << S.Context.UnsignedIntTy
+          << Arg2->getType() << Arg2->getSourceRange();
+      return true;
+    }
+
+    // Check packet type T.
+    if (checkOpenCLPipePacketType(S, Call, 3))
+      return true;
+  } break;
+  default:
+    S.Diag(Call->getLocStart(), diag::err_opencl_builtin_pipe_arg_num)
+        << Call->getDirectCallee() << Call->getSourceRange();
+    return true;
+  }
+
+  return false;
+}
+
+// \brief Performs a semantic analysis on the {work_group_/sub_group_
+//        /_}reserve_{read/write}_pipe
+// \param S Reference to the semantic analyzer.
+// \param Call The call to the builtin function to be analyzed.
+// \return True if a semantic error was found, false otherwise.
+static bool SemaBuiltinReserveRWPipe(Sema &S, CallExpr *Call) {
+  if (checkArgCount(S, Call, 2))
+    return true;
+
+  if (checkOpenCLPipeArg(S, Call))
+    return true;
+
+  // Check the reserve size.
+  if (!Call->getArg(1)->getType()->isIntegerType() &&
+      !Call->getArg(1)->getType()->isUnsignedIntegerType()) {
+    S.Diag(Call->getLocStart(), diag::err_opencl_builtin_pipe_invalid_arg)
+        << Call->getDirectCallee() << S.Context.UnsignedIntTy
+        << Call->getArg(1)->getType() << Call->getArg(1)->getSourceRange();
+    return true;
+  }
+
+  return false;
+}
+
+// \brief Performs a semantic analysis on {work_group_/sub_group_
+//        /_}commit_{read/write}_pipe
+// \param S Reference to the semantic analyzer.
+// \param Call The call to the builtin function to be analyzed.
+// \return True if a semantic error was found, false otherwise.
+static bool SemaBuiltinCommitRWPipe(Sema &S, CallExpr *Call) {
+  if (checkArgCount(S, Call, 2))
+    return true;
+
+  if (checkOpenCLPipeArg(S, Call))
+    return true;
+
+  // Check reserve_id_t.
+  if (!Call->getArg(1)->getType()->isReserveIDT()) {
+    S.Diag(Call->getLocStart(), diag::err_opencl_builtin_pipe_invalid_arg)
+        << Call->getDirectCallee() << S.Context.OCLReserveIDTy
+        << Call->getArg(1)->getType() << Call->getArg(1)->getSourceRange();
+    return true;
+  }
+
+  return false;
+}
+
+// \brief Performs a semantic analysis on the call to built-in Pipe
+//        Query Functions.
+// \param S Reference to the semantic analyzer.
+// \param Call The call to the builtin function to be analyzed.
+// \return True if a semantic error was found, false otherwise.
+static bool SemaBuiltinPipePackets(Sema &S, CallExpr *Call) {
+  if (checkArgCount(S, Call, 1))
+    return true;
+
+  if (!Call->getArg(0)->getType()->isPipeType()) {
+    S.Diag(Call->getLocStart(), diag::err_opencl_builtin_pipe_first_arg)
+        << Call->getDirectCallee() << Call->getArg(0)->getSourceRange();
+    return true;
+  }
+
+  return false;
+}
+// \brief OpenCL v2.0 s6.13.9 - Address space qualifier functions.
+// \brief Performs semantic analysis for the to_global/local/private call.
+// \param S Reference to the semantic analyzer.
+// \param BuiltinID ID of the builtin function.
+// \param Call A pointer to the builtin call.
+// \return True if a semantic error has been found, false otherwise.
+static bool SemaOpenCLBuiltinToAddr(Sema &S, unsigned BuiltinID,
+                                    CallExpr *Call) {
+  if (Call->getNumArgs() != 1) {
+    S.Diag(Call->getLocStart(), diag::err_opencl_builtin_to_addr_arg_num)
+        << Call->getDirectCallee() << Call->getSourceRange();
+    return true;
+  }
+
+  auto RT = Call->getArg(0)->getType();
+  if (!RT->isPointerType() || RT->getPointeeType()
+      .getAddressSpace() == LangAS::opencl_constant) {
+    S.Diag(Call->getLocStart(), diag::err_opencl_builtin_to_addr_invalid_arg)
+        << Call->getArg(0) << Call->getDirectCallee() << Call->getSourceRange();
+    return true;
+  }
+
+  RT = RT->getPointeeType();
+  auto Qual = RT.getQualifiers();
+  switch (BuiltinID) {
+  case Builtin::BIto_global:
+    Qual.setAddressSpace(LangAS::opencl_global);
+    break;
+  case Builtin::BIto_local:
+    Qual.setAddressSpace(LangAS::opencl_local);
+    break;
+  default:
+    Qual.removeAddressSpace();
+  }
+  Call->setType(S.Context.getPointerType(S.Context.getQualifiedType(
+      RT.getUnqualifiedType(), Qual)));
+
+  return false;
+}
+
 ExprResult
 Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID,
                                CallExpr *TheCall) {
@@ -530,27 +986,22 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID,
   case Builtin::BI__builtin___vsnprintf_chk:
     SemaBuiltinMemChkCall(*this, FDecl, TheCall, 1, 3);
     break;
-
   case Builtin::BI__builtin_call_with_static_chain:
     if (SemaBuiltinCallWithStaticChain(*this, TheCall))
       return ExprError();
     break;
-
   case Builtin::BI__exception_code:
-  case Builtin::BI_exception_code: {
+  case Builtin::BI_exception_code:
     if (SemaBuiltinSEHScopeCheck(*this, TheCall, Scope::SEHExceptScope,
                                  diag::err_seh___except_block))
       return ExprError();
     break;
-  }
   case Builtin::BI__exception_info:
-  case Builtin::BI_exception_info: {
+  case Builtin::BI_exception_info:
     if (SemaBuiltinSEHScopeCheck(*this, TheCall, Scope::SEHFilterScope,
                                  diag::err_seh___except_filter))
       return ExprError();
     break;
-  }
-
   case Builtin::BI__GetExceptionInfo:
     if (checkArgCount(*this, TheCall, 1))
       return ExprError();
@@ -563,7 +1014,56 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID,
 
     TheCall->setType(Context.VoidPtrTy);
     break;
-
+  // OpenCL v2.0, s6.13.16 - Pipe functions
+  case Builtin::BIread_pipe:
+  case Builtin::BIwrite_pipe:
+    // Since those two functions are declared with var args, we need a semantic
+    // check for the argument.
+    if (SemaBuiltinRWPipe(*this, TheCall))
+      return ExprError();
+    break;
+  case Builtin::BIreserve_read_pipe:
+  case Builtin::BIreserve_write_pipe:
+  case Builtin::BIwork_group_reserve_read_pipe:
+  case Builtin::BIwork_group_reserve_write_pipe:
+  case Builtin::BIsub_group_reserve_read_pipe:
+  case Builtin::BIsub_group_reserve_write_pipe:
+    if (SemaBuiltinReserveRWPipe(*this, TheCall))
+      return ExprError();
+    // Since return type of reserve_read/write_pipe built-in function is
+    // reserve_id_t, which is not defined in the builtin def file , we used int
+    // as return type and need to override the return type of these functions.
+    TheCall->setType(Context.OCLReserveIDTy);
+    break;
+  case Builtin::BIcommit_read_pipe:
+  case Builtin::BIcommit_write_pipe:
+  case Builtin::BIwork_group_commit_read_pipe:
+  case Builtin::BIwork_group_commit_write_pipe:
+  case Builtin::BIsub_group_commit_read_pipe:
+  case Builtin::BIsub_group_commit_write_pipe:
+    if (SemaBuiltinCommitRWPipe(*this, TheCall))
+      return ExprError();
+    break;
+  case Builtin::BIget_pipe_num_packets:
+  case Builtin::BIget_pipe_max_packets:
+    if (SemaBuiltinPipePackets(*this, TheCall))
+      return ExprError();
+    break;
+  case Builtin::BIto_global:
+  case Builtin::BIto_local:
+  case Builtin::BIto_private:
+    if (SemaOpenCLBuiltinToAddr(*this, BuiltinID, TheCall))
+      return ExprError();
+    break;
+  // OpenCL v2.0, s6.13.17 - Enqueue kernel functions.
+  case Builtin::BIenqueue_kernel:
+    if (SemaOpenCLBuiltinEnqueueKernel(*this, TheCall))
+      return ExprError();
+    break;
+  case Builtin::BIget_kernel_work_group_size:
+  case Builtin::BIget_kernel_preferred_work_group_size_multiple:
+    if (SemaOpenCLBuiltinKernelWorkGroupSize(*this, TheCall))
+      return ExprError();
   }
 
   // Since the target specific builtins for each arch overlap, only check those
@@ -843,7 +1343,6 @@ bool Sema::CheckARMBuiltinExclusiveCall(unsigned BuiltinID, CallExpr *TheCall,
     return true;
   }
 
-
   if (IsLdrex) {
     TheCall->setType(ValType);
     return false;
@@ -931,7 +1430,7 @@ bool Sema::CheckAArch64BuiltinFunctionCall(unsigned BuiltinID,
 
   if (BuiltinID == AArch64::BI__builtin_arm_rsr64 ||
       BuiltinID == AArch64::BI__builtin_arm_wsr64)
-    return SemaBuiltinARMSpecialReg(BuiltinID, TheCall, 0, 5, false);
+    return SemaBuiltinARMSpecialReg(BuiltinID, TheCall, 0, 5, true);
 
   if (BuiltinID == AArch64::BI__builtin_arm_rsr ||
       BuiltinID == AArch64::BI__builtin_arm_rsrp ||
@@ -955,8 +1454,17 @@ bool Sema::CheckAArch64BuiltinFunctionCall(unsigned BuiltinID,
   return SemaBuiltinConstantArgRange(TheCall, i, l, u + l);
 }
 
+// CheckMipsBuiltinFunctionCall - Checks the constant value passed to the
+// intrinsic is correct. The switch statement is ordered by DSP, MSA. The
+// ordering for DSP is unspecified. MSA is ordered by the data format used
+// by the underlying instruction i.e., df/m, df/n and then by size.
+//
+// FIXME: The size tests here should instead be tablegen'd along with the
+//        definitions from include/clang/Basic/BuiltinsMips.def.
+// FIXME: GCC is strict on signedness for some of these intrinsics, we should
+//        be too.
 bool Sema::CheckMipsBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
-  unsigned i = 0, l = 0, u = 0;
+  unsigned i = 0, l = 0, u = 0, m = 0;
   switch (BuiltinID) {
   default: return false;
   case Mips::BI__builtin_mips_wrdsp: i = 1; l = 0; u = 63; break;
@@ -966,9 +1474,168 @@ bool Sema::CheckMipsBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
   case Mips::BI__builtin_mips_precr_sra_ph_w: i = 2; l = 0; u = 31; break;
   case Mips::BI__builtin_mips_precr_sra_r_ph_w: i = 2; l = 0; u = 31; break;
   case Mips::BI__builtin_mips_prepend: i = 2; l = 0; u = 31; break;
-  }
-
-  return SemaBuiltinConstantArgRange(TheCall, i, l, u);
+  // MSA instrinsics. Instructions (which the intrinsics maps to) which use the
+  // df/m field.
+  // These intrinsics take an unsigned 3 bit immediate.
+  case Mips::BI__builtin_msa_bclri_b:
+  case Mips::BI__builtin_msa_bnegi_b:
+  case Mips::BI__builtin_msa_bseti_b:
+  case Mips::BI__builtin_msa_sat_s_b:
+  case Mips::BI__builtin_msa_sat_u_b:
+  case Mips::BI__builtin_msa_slli_b:
+  case Mips::BI__builtin_msa_srai_b:
+  case Mips::BI__builtin_msa_srari_b:
+  case Mips::BI__builtin_msa_srli_b:
+  case Mips::BI__builtin_msa_srlri_b: i = 1; l = 0; u = 7; break;
+  case Mips::BI__builtin_msa_binsli_b:
+  case Mips::BI__builtin_msa_binsri_b: i = 2; l = 0; u = 7; break;
+  // These intrinsics take an unsigned 4 bit immediate.
+  case Mips::BI__builtin_msa_bclri_h:
+  case Mips::BI__builtin_msa_bnegi_h:
+  case Mips::BI__builtin_msa_bseti_h:
+  case Mips::BI__builtin_msa_sat_s_h:
+  case Mips::BI__builtin_msa_sat_u_h:
+  case Mips::BI__builtin_msa_slli_h:
+  case Mips::BI__builtin_msa_srai_h:
+  case Mips::BI__builtin_msa_srari_h:
+  case Mips::BI__builtin_msa_srli_h:
+  case Mips::BI__builtin_msa_srlri_h: i = 1; l = 0; u = 15; break;
+  case Mips::BI__builtin_msa_binsli_h:
+  case Mips::BI__builtin_msa_binsri_h: i = 2; l = 0; u = 15; break;
+  // These intrinsics take an unsigned 5 bit immedate.
+  // The first block of intrinsics actually have an unsigned 5 bit field,
+  // not a df/n field.
+  case Mips::BI__builtin_msa_clei_u_b:
+  case Mips::BI__builtin_msa_clei_u_h:
+  case Mips::BI__builtin_msa_clei_u_w:
+  case Mips::BI__builtin_msa_clei_u_d:
+  case Mips::BI__builtin_msa_clti_u_b:
+  case Mips::BI__builtin_msa_clti_u_h:
+  case Mips::BI__builtin_msa_clti_u_w:
+  case Mips::BI__builtin_msa_clti_u_d:
+  case Mips::BI__builtin_msa_maxi_u_b:
+  case Mips::BI__builtin_msa_maxi_u_h:
+  case Mips::BI__builtin_msa_maxi_u_w:
+  case Mips::BI__builtin_msa_maxi_u_d:
+  case Mips::BI__builtin_msa_mini_u_b:
+  case Mips::BI__builtin_msa_mini_u_h:
+  case Mips::BI__builtin_msa_mini_u_w:
+  case Mips::BI__builtin_msa_mini_u_d:
+  case Mips::BI__builtin_msa_addvi_b:
+  case Mips::BI__builtin_msa_addvi_h:
+  case Mips::BI__builtin_msa_addvi_w:
+  case Mips::BI__builtin_msa_addvi_d:
+  case Mips::BI__builtin_msa_bclri_w:
+  case Mips::BI__builtin_msa_bnegi_w:
+  case Mips::BI__builtin_msa_bseti_w:
+  case Mips::BI__builtin_msa_sat_s_w:
+  case Mips::BI__builtin_msa_sat_u_w:
+  case Mips::BI__builtin_msa_slli_w:
+  case Mips::BI__builtin_msa_srai_w:
+  case Mips::BI__builtin_msa_srari_w:
+  case Mips::BI__builtin_msa_srli_w:
+  case Mips::BI__builtin_msa_srlri_w:
+  case Mips::BI__builtin_msa_subvi_b:
+  case Mips::BI__builtin_msa_subvi_h:
+  case Mips::BI__builtin_msa_subvi_w:
+  case Mips::BI__builtin_msa_subvi_d: i = 1; l = 0; u = 31; break;
+  case Mips::BI__builtin_msa_binsli_w:
+  case Mips::BI__builtin_msa_binsri_w: i = 2; l = 0; u = 31; break;
+  // These intrinsics take an unsigned 6 bit immediate.
+  case Mips::BI__builtin_msa_bclri_d:
+  case Mips::BI__builtin_msa_bnegi_d:
+  case Mips::BI__builtin_msa_bseti_d:
+  case Mips::BI__builtin_msa_sat_s_d:
+  case Mips::BI__builtin_msa_sat_u_d:
+  case Mips::BI__builtin_msa_slli_d:
+  case Mips::BI__builtin_msa_srai_d:
+  case Mips::BI__builtin_msa_srari_d:
+  case Mips::BI__builtin_msa_srli_d:
+  case Mips::BI__builtin_msa_srlri_d: i = 1; l = 0; u = 63; break;
+  case Mips::BI__builtin_msa_binsli_d:
+  case Mips::BI__builtin_msa_binsri_d: i = 2; l = 0; u = 63; break;
+  // These intrinsics take a signed 5 bit immediate.
+  case Mips::BI__builtin_msa_ceqi_b:
+  case Mips::BI__builtin_msa_ceqi_h:
+  case Mips::BI__builtin_msa_ceqi_w:
+  case Mips::BI__builtin_msa_ceqi_d:
+  case Mips::BI__builtin_msa_clti_s_b:
+  case Mips::BI__builtin_msa_clti_s_h:
+  case Mips::BI__builtin_msa_clti_s_w:
+  case Mips::BI__builtin_msa_clti_s_d:
+  case Mips::BI__builtin_msa_clei_s_b:
+  case Mips::BI__builtin_msa_clei_s_h:
+  case Mips::BI__builtin_msa_clei_s_w:
+  case Mips::BI__builtin_msa_clei_s_d:
+  case Mips::BI__builtin_msa_maxi_s_b:
+  case Mips::BI__builtin_msa_maxi_s_h:
+  case Mips::BI__builtin_msa_maxi_s_w:
+  case Mips::BI__builtin_msa_maxi_s_d:
+  case Mips::BI__builtin_msa_mini_s_b:
+  case Mips::BI__builtin_msa_mini_s_h:
+  case Mips::BI__builtin_msa_mini_s_w:
+  case Mips::BI__builtin_msa_mini_s_d: i = 1; l = -16; u = 15; break;
+  // These intrinsics take an unsigned 8 bit immediate.
+  case Mips::BI__builtin_msa_andi_b:
+  case Mips::BI__builtin_msa_nori_b:
+  case Mips::BI__builtin_msa_ori_b:
+  case Mips::BI__builtin_msa_shf_b:
+  case Mips::BI__builtin_msa_shf_h:
+  case Mips::BI__builtin_msa_shf_w:
+  case Mips::BI__builtin_msa_xori_b: i = 1; l = 0; u = 255; break;
+  case Mips::BI__builtin_msa_bseli_b:
+  case Mips::BI__builtin_msa_bmnzi_b:
+  case Mips::BI__builtin_msa_bmzi_b: i = 2; l = 0; u = 255; break;
+  // df/n format
+  // These intrinsics take an unsigned 4 bit immediate.
+  case Mips::BI__builtin_msa_copy_s_b:
+  case Mips::BI__builtin_msa_copy_u_b:
+  case Mips::BI__builtin_msa_insve_b:
+  case Mips::BI__builtin_msa_splati_b: i = 1; l = 0; u = 15; break;
+  case Mips::BI__builtin_msa_sld_b:
+  case Mips::BI__builtin_msa_sldi_b: i = 2; l = 0; u = 15; break;
+  // These intrinsics take an unsigned 3 bit immediate.
+  case Mips::BI__builtin_msa_copy_s_h:
+  case Mips::BI__builtin_msa_copy_u_h:
+  case Mips::BI__builtin_msa_insve_h:
+  case Mips::BI__builtin_msa_splati_h: i = 1; l = 0; u = 7; break;
+  case Mips::BI__builtin_msa_sld_h:
+  case Mips::BI__builtin_msa_sldi_h: i = 2; l = 0; u = 7; break;
+  // These intrinsics take an unsigned 2 bit immediate.
+  case Mips::BI__builtin_msa_copy_s_w:
+  case Mips::BI__builtin_msa_copy_u_w:
+  case Mips::BI__builtin_msa_insve_w:
+  case Mips::BI__builtin_msa_splati_w: i = 1; l = 0; u = 3; break;
+  case Mips::BI__builtin_msa_sld_w:
+  case Mips::BI__builtin_msa_sldi_w: i = 2; l = 0; u = 3; break;
+  // These intrinsics take an unsigned 1 bit immediate.
+  case Mips::BI__builtin_msa_copy_s_d:
+  case Mips::BI__builtin_msa_copy_u_d:
+  case Mips::BI__builtin_msa_insve_d:
+  case Mips::BI__builtin_msa_splati_d: i = 1; l = 0; u = 1; break;
+  case Mips::BI__builtin_msa_sld_d:
+  case Mips::BI__builtin_msa_sldi_d: i = 2; l = 0; u = 1; break;
+  // Memory offsets and immediate loads.
+  // These intrinsics take a signed 10 bit immediate.
+  case Mips::BI__builtin_msa_ldi_b: i = 0; l = -128; u = 127; break;
+  case Mips::BI__builtin_msa_ldi_h:
+  case Mips::BI__builtin_msa_ldi_w:
+  case Mips::BI__builtin_msa_ldi_d: i = 0; l = -512; u = 511; break;
+  case Mips::BI__builtin_msa_ld_b: i = 1; l = -512; u = 511; m = 16; break;
+  case Mips::BI__builtin_msa_ld_h: i = 1; l = -1024; u = 1022; m = 16; break;
+  case Mips::BI__builtin_msa_ld_w: i = 1; l = -2048; u = 2044; m = 16; break;
+  case Mips::BI__builtin_msa_ld_d: i = 1; l = -4096; u = 4088; m = 16; break;
+  case Mips::BI__builtin_msa_st_b: i = 2; l = -512; u = 511; m = 16; break;
+  case Mips::BI__builtin_msa_st_h: i = 2; l = -1024; u = 1022; m = 16; break;
+  case Mips::BI__builtin_msa_st_w: i = 2; l = -2048; u = 2044; m = 16; break;
+  case Mips::BI__builtin_msa_st_d: i = 2; l = -4096; u = 4088; m = 16; break;
+  }
+
+  if (!m)
+    return SemaBuiltinConstantArgRange(TheCall, i, l, u);
+
+  return SemaBuiltinConstantArgRange(TheCall, i, l, u) ||
+         SemaBuiltinConstantArgMultiple(TheCall, i, m);
 }
 
 bool Sema::CheckPPCBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
@@ -1091,19 +1758,58 @@ static bool SemaBuiltinCpuSupports(Sema &S, CallExpr *TheCall) {
 }
 
 bool Sema::CheckX86BuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
-  unsigned i = 0, l = 0, u = 0;
+  int i = 0, l = 0, u = 0;
   switch (BuiltinID) {
-  default: return false;
+  default:
+    return false;
   case X86::BI__builtin_cpu_supports:
     return SemaBuiltinCpuSupports(*this, TheCall);
   case X86::BI__builtin_ms_va_start:
     return SemaBuiltinMSVAStart(TheCall);
-  case X86::BI_mm_prefetch: i = 1; l = 0; u = 3; break;
-  case X86::BI__builtin_ia32_sha1rnds4: i = 2, l = 0; u = 3; break;
+  case X86::BI__builtin_ia32_extractf64x4_mask:
+  case X86::BI__builtin_ia32_extracti64x4_mask:
+  case X86::BI__builtin_ia32_extractf32x8_mask:
+  case X86::BI__builtin_ia32_extracti32x8_mask:
+  case X86::BI__builtin_ia32_extractf64x2_256_mask:
+  case X86::BI__builtin_ia32_extracti64x2_256_mask:
+  case X86::BI__builtin_ia32_extractf32x4_256_mask:
+  case X86::BI__builtin_ia32_extracti32x4_256_mask:
+    i = 1; l = 0; u = 1;
+    break;
+  case X86::BI_mm_prefetch:
+  case X86::BI__builtin_ia32_extractf32x4_mask:
+  case X86::BI__builtin_ia32_extracti32x4_mask:
+  case X86::BI__builtin_ia32_extractf64x2_512_mask:
+  case X86::BI__builtin_ia32_extracti64x2_512_mask:
+    i = 1; l = 0; u = 3;
+    break;
+  case X86::BI__builtin_ia32_insertf32x8_mask:
+  case X86::BI__builtin_ia32_inserti32x8_mask:
+  case X86::BI__builtin_ia32_insertf64x4_mask:
+  case X86::BI__builtin_ia32_inserti64x4_mask:
+  case X86::BI__builtin_ia32_insertf64x2_256_mask:
+  case X86::BI__builtin_ia32_inserti64x2_256_mask:
+  case X86::BI__builtin_ia32_insertf32x4_256_mask:
+  case X86::BI__builtin_ia32_inserti32x4_256_mask:
+    i = 2; l = 0; u = 1;
+    break;
+  case X86::BI__builtin_ia32_sha1rnds4:
+  case X86::BI__builtin_ia32_shuf_f32x4_256_mask:
+  case X86::BI__builtin_ia32_shuf_f64x2_256_mask:
+  case X86::BI__builtin_ia32_shuf_i32x4_256_mask:
+  case X86::BI__builtin_ia32_shuf_i64x2_256_mask:
+  case X86::BI__builtin_ia32_insertf64x2_512_mask:
+  case X86::BI__builtin_ia32_inserti64x2_512_mask:
+  case X86::BI__builtin_ia32_insertf32x4_mask:
+  case X86::BI__builtin_ia32_inserti32x4_mask:
+    i = 2; l = 0; u = 3;
+    break;
   case X86::BI__builtin_ia32_vpermil2pd:
   case X86::BI__builtin_ia32_vpermil2pd256:
   case X86::BI__builtin_ia32_vpermil2ps:
-  case X86::BI__builtin_ia32_vpermil2ps256: i = 3, l = 0; u = 3; break;
+  case X86::BI__builtin_ia32_vpermil2ps256:
+    i = 3; l = 0; u = 3;
+    break;
   case X86::BI__builtin_ia32_cmpb128_mask:
   case X86::BI__builtin_ia32_cmpw128_mask:
   case X86::BI__builtin_ia32_cmpd128_mask:
@@ -1127,29 +1833,205 @@ bool Sema::CheckX86BuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
   case X86::BI__builtin_ia32_ucmpb512_mask:
   case X86::BI__builtin_ia32_ucmpw512_mask:
   case X86::BI__builtin_ia32_ucmpd512_mask:
-  case X86::BI__builtin_ia32_ucmpq512_mask: i = 2; l = 0; u = 7; break;
+  case X86::BI__builtin_ia32_ucmpq512_mask:
+  case X86::BI__builtin_ia32_vpcomub:
+  case X86::BI__builtin_ia32_vpcomuw:
+  case X86::BI__builtin_ia32_vpcomud:
+  case X86::BI__builtin_ia32_vpcomuq:
+  case X86::BI__builtin_ia32_vpcomb:
+  case X86::BI__builtin_ia32_vpcomw:
+  case X86::BI__builtin_ia32_vpcomd:
+  case X86::BI__builtin_ia32_vpcomq:
+    i = 2; l = 0; u = 7;
+    break;
   case X86::BI__builtin_ia32_roundps:
   case X86::BI__builtin_ia32_roundpd:
   case X86::BI__builtin_ia32_roundps256:
-  case X86::BI__builtin_ia32_roundpd256: i = 1, l = 0; u = 15; break;
+  case X86::BI__builtin_ia32_roundpd256:
+    i = 1; l = 0; u = 15;
+    break;
   case X86::BI__builtin_ia32_roundss:
-  case X86::BI__builtin_ia32_roundsd: i = 2, l = 0; u = 15; break;
+  case X86::BI__builtin_ia32_roundsd:
+  case X86::BI__builtin_ia32_rangepd128_mask:
+  case X86::BI__builtin_ia32_rangepd256_mask:
+  case X86::BI__builtin_ia32_rangepd512_mask:
+  case X86::BI__builtin_ia32_rangeps128_mask:
+  case X86::BI__builtin_ia32_rangeps256_mask:
+  case X86::BI__builtin_ia32_rangeps512_mask:
+  case X86::BI__builtin_ia32_getmantsd_round_mask:
+  case X86::BI__builtin_ia32_getmantss_round_mask:
+    i = 2; l = 0; u = 15;
+    break;
   case X86::BI__builtin_ia32_cmpps:
   case X86::BI__builtin_ia32_cmpss:
   case X86::BI__builtin_ia32_cmppd:
   case X86::BI__builtin_ia32_cmpsd:
   case X86::BI__builtin_ia32_cmpps256:
   case X86::BI__builtin_ia32_cmppd256:
+  case X86::BI__builtin_ia32_cmpps128_mask:
+  case X86::BI__builtin_ia32_cmppd128_mask:
+  case X86::BI__builtin_ia32_cmpps256_mask:
+  case X86::BI__builtin_ia32_cmppd256_mask:
   case X86::BI__builtin_ia32_cmpps512_mask:
-  case X86::BI__builtin_ia32_cmppd512_mask: i = 2; l = 0; u = 31; break;
-  case X86::BI__builtin_ia32_vpcomub:
-  case X86::BI__builtin_ia32_vpcomuw:
-  case X86::BI__builtin_ia32_vpcomud:
-  case X86::BI__builtin_ia32_vpcomuq:
-  case X86::BI__builtin_ia32_vpcomb:
-  case X86::BI__builtin_ia32_vpcomw:
-  case X86::BI__builtin_ia32_vpcomd:
-  case X86::BI__builtin_ia32_vpcomq: i = 2; l = 0; u = 7; break;
+  case X86::BI__builtin_ia32_cmppd512_mask:
+  case X86::BI__builtin_ia32_cmpsd_mask:
+  case X86::BI__builtin_ia32_cmpss_mask:
+    i = 2; l = 0; u = 31;
+    break;
+  case X86::BI__builtin_ia32_xabort:
+    i = 0; l = -128; u = 255;
+    break;
+  case X86::BI__builtin_ia32_pshufw:
+  case X86::BI__builtin_ia32_aeskeygenassist128:
+    i = 1; l = -128; u = 255;
+    break;
+  case X86::BI__builtin_ia32_vcvtps2ph:
+  case X86::BI__builtin_ia32_vcvtps2ph256:
+  case X86::BI__builtin_ia32_rndscaleps_128_mask:
+  case X86::BI__builtin_ia32_rndscalepd_128_mask:
+  case X86::BI__builtin_ia32_rndscaleps_256_mask:
+  case X86::BI__builtin_ia32_rndscalepd_256_mask:
+  case X86::BI__builtin_ia32_rndscaleps_mask:
+  case X86::BI__builtin_ia32_rndscalepd_mask:
+  case X86::BI__builtin_ia32_reducepd128_mask:
+  case X86::BI__builtin_ia32_reducepd256_mask:
+  case X86::BI__builtin_ia32_reducepd512_mask:
+  case X86::BI__builtin_ia32_reduceps128_mask:
+  case X86::BI__builtin_ia32_reduceps256_mask:
+  case X86::BI__builtin_ia32_reduceps512_mask:
+  case X86::BI__builtin_ia32_prold512_mask:
+  case X86::BI__builtin_ia32_prolq512_mask:
+  case X86::BI__builtin_ia32_prold128_mask:
+  case X86::BI__builtin_ia32_prold256_mask:
+  case X86::BI__builtin_ia32_prolq128_mask:
+  case X86::BI__builtin_ia32_prolq256_mask:
+  case X86::BI__builtin_ia32_prord128_mask:
+  case X86::BI__builtin_ia32_prord256_mask:
+  case X86::BI__builtin_ia32_prorq128_mask:
+  case X86::BI__builtin_ia32_prorq256_mask:
+  case X86::BI__builtin_ia32_psllwi512_mask:
+  case X86::BI__builtin_ia32_psllwi128_mask:
+  case X86::BI__builtin_ia32_psllwi256_mask:
+  case X86::BI__builtin_ia32_psrldi128_mask:
+  case X86::BI__builtin_ia32_psrldi256_mask:
+  case X86::BI__builtin_ia32_psrldi512_mask:
+  case X86::BI__builtin_ia32_psrlqi128_mask:
+  case X86::BI__builtin_ia32_psrlqi256_mask:
+  case X86::BI__builtin_ia32_psrlqi512_mask:
+  case X86::BI__builtin_ia32_psrawi512_mask:
+  case X86::BI__builtin_ia32_psrawi128_mask:
+  case X86::BI__builtin_ia32_psrawi256_mask:
+  case X86::BI__builtin_ia32_psrlwi512_mask:
+  case X86::BI__builtin_ia32_psrlwi128_mask:
+  case X86::BI__builtin_ia32_psrlwi256_mask:
+  case X86::BI__builtin_ia32_psradi128_mask:
+  case X86::BI__builtin_ia32_psradi256_mask:
+  case X86::BI__builtin_ia32_psradi512_mask:
+  case X86::BI__builtin_ia32_psraqi128_mask:
+  case X86::BI__builtin_ia32_psraqi256_mask:
+  case X86::BI__builtin_ia32_psraqi512_mask:
+  case X86::BI__builtin_ia32_pslldi128_mask:
+  case X86::BI__builtin_ia32_pslldi256_mask:
+  case X86::BI__builtin_ia32_pslldi512_mask:
+  case X86::BI__builtin_ia32_psllqi128_mask:
+  case X86::BI__builtin_ia32_psllqi256_mask:
+  case X86::BI__builtin_ia32_psllqi512_mask:
+  case X86::BI__builtin_ia32_fpclasspd128_mask:
+  case X86::BI__builtin_ia32_fpclasspd256_mask:
+  case X86::BI__builtin_ia32_fpclassps128_mask:
+  case X86::BI__builtin_ia32_fpclassps256_mask:
+  case X86::BI__builtin_ia32_fpclassps512_mask:
+  case X86::BI__builtin_ia32_fpclasspd512_mask:
+  case X86::BI__builtin_ia32_fpclasssd_mask:
+  case X86::BI__builtin_ia32_fpclassss_mask:
+    i = 1; l = 0; u = 255;
+    break;
+  case X86::BI__builtin_ia32_palignr:
+  case X86::BI__builtin_ia32_insertps128:
+  case X86::BI__builtin_ia32_dpps:
+  case X86::BI__builtin_ia32_dppd:
+  case X86::BI__builtin_ia32_dpps256:
+  case X86::BI__builtin_ia32_mpsadbw128:
+  case X86::BI__builtin_ia32_mpsadbw256:
+  case X86::BI__builtin_ia32_pcmpistrm128:
+  case X86::BI__builtin_ia32_pcmpistri128:
+  case X86::BI__builtin_ia32_pcmpistria128:
+  case X86::BI__builtin_ia32_pcmpistric128:
+  case X86::BI__builtin_ia32_pcmpistrio128:
+  case X86::BI__builtin_ia32_pcmpistris128:
+  case X86::BI__builtin_ia32_pcmpistriz128:
+  case X86::BI__builtin_ia32_pclmulqdq128:
+  case X86::BI__builtin_ia32_vperm2f128_pd256:
+  case X86::BI__builtin_ia32_vperm2f128_ps256:
+  case X86::BI__builtin_ia32_vperm2f128_si256:
+  case X86::BI__builtin_ia32_permti256:
+    i = 2; l = -128; u = 255;
+    break;
+  case X86::BI__builtin_ia32_palignr128:
+  case X86::BI__builtin_ia32_palignr256:
+  case X86::BI__builtin_ia32_palignr128_mask:
+  case X86::BI__builtin_ia32_palignr256_mask:
+  case X86::BI__builtin_ia32_palignr512_mask:
+  case X86::BI__builtin_ia32_alignq512_mask:
+  case X86::BI__builtin_ia32_alignd512_mask:
+  case X86::BI__builtin_ia32_alignd128_mask:
+  case X86::BI__builtin_ia32_alignd256_mask:
+  case X86::BI__builtin_ia32_alignq128_mask:
+  case X86::BI__builtin_ia32_alignq256_mask:
+  case X86::BI__builtin_ia32_vcomisd:
+  case X86::BI__builtin_ia32_vcomiss:
+  case X86::BI__builtin_ia32_shuf_f32x4_mask:
+  case X86::BI__builtin_ia32_shuf_f64x2_mask:
+  case X86::BI__builtin_ia32_shuf_i32x4_mask:
+  case X86::BI__builtin_ia32_shuf_i64x2_mask:
+  case X86::BI__builtin_ia32_dbpsadbw128_mask:
+  case X86::BI__builtin_ia32_dbpsadbw256_mask:
+  case X86::BI__builtin_ia32_dbpsadbw512_mask:
+    i = 2; l = 0; u = 255;
+    break;
+  case X86::BI__builtin_ia32_fixupimmpd512_mask:
+  case X86::BI__builtin_ia32_fixupimmpd512_maskz:
+  case X86::BI__builtin_ia32_fixupimmps512_mask:
+  case X86::BI__builtin_ia32_fixupimmps512_maskz:
+  case X86::BI__builtin_ia32_fixupimmsd_mask:
+  case X86::BI__builtin_ia32_fixupimmsd_maskz:
+  case X86::BI__builtin_ia32_fixupimmss_mask:
+  case X86::BI__builtin_ia32_fixupimmss_maskz:
+  case X86::BI__builtin_ia32_fixupimmpd128_mask:
+  case X86::BI__builtin_ia32_fixupimmpd128_maskz:
+  case X86::BI__builtin_ia32_fixupimmpd256_mask:
+  case X86::BI__builtin_ia32_fixupimmpd256_maskz:
+  case X86::BI__builtin_ia32_fixupimmps128_mask:
+  case X86::BI__builtin_ia32_fixupimmps128_maskz:
+  case X86::BI__builtin_ia32_fixupimmps256_mask:
+  case X86::BI__builtin_ia32_fixupimmps256_maskz:
+  case X86::BI__builtin_ia32_pternlogd512_mask:
+  case X86::BI__builtin_ia32_pternlogd512_maskz:
+  case X86::BI__builtin_ia32_pternlogq512_mask:
+  case X86::BI__builtin_ia32_pternlogq512_maskz:
+  case X86::BI__builtin_ia32_pternlogd128_mask:
+  case X86::BI__builtin_ia32_pternlogd128_maskz:
+  case X86::BI__builtin_ia32_pternlogd256_mask:
+  case X86::BI__builtin_ia32_pternlogd256_maskz:
+  case X86::BI__builtin_ia32_pternlogq128_mask:
+  case X86::BI__builtin_ia32_pternlogq128_maskz:
+  case X86::BI__builtin_ia32_pternlogq256_mask:
+  case X86::BI__builtin_ia32_pternlogq256_maskz:
+    i = 3; l = 0; u = 255;
+    break;
+  case X86::BI__builtin_ia32_pcmpestrm128:
+  case X86::BI__builtin_ia32_pcmpestri128:
+  case X86::BI__builtin_ia32_pcmpestria128:
+  case X86::BI__builtin_ia32_pcmpestric128:
+  case X86::BI__builtin_ia32_pcmpestrio128:
+  case X86::BI__builtin_ia32_pcmpestris128:
+  case X86::BI__builtin_ia32_pcmpestriz128:
+    i = 4; l = -128; u = 255;
+    break;
+  case X86::BI__builtin_ia32_rndscalesd_round_mask:
+  case X86::BI__builtin_ia32_rndscaless_round_mask:
+    i = 4; l = 0; u = 255;
+    break;
   }
   return SemaBuiltinConstantArgRange(TheCall, i, l, u);
 }
@@ -1534,10 +2416,10 @@ bool Sema::CheckOtherCall(CallExpr *TheCall, const FunctionProtoType *Proto) {
 }
 
 static bool isValidOrderingForOp(int64_t Ordering, AtomicExpr::AtomicOp Op) {
-  if (Ordering < AtomicExpr::AO_ABI_memory_order_relaxed ||
-      Ordering > AtomicExpr::AO_ABI_memory_order_seq_cst)
+  if (!llvm::isValidAtomicOrderingCABI(Ordering))
     return false;
 
+  auto OrderingCABI = (llvm::AtomicOrderingCABI)Ordering;
   switch (Op) {
   case AtomicExpr::AO__c11_atomic_init:
     llvm_unreachable("There is no ordering argument for an init");
@@ -1545,15 +2427,15 @@ static bool isValidOrderingForOp(int64_t Ordering, AtomicExpr::AtomicOp Op) {
   case AtomicExpr::AO__c11_atomic_load:
   case AtomicExpr::AO__atomic_load_n:
   case AtomicExpr::AO__atomic_load:
-    return Ordering != AtomicExpr::AO_ABI_memory_order_release &&
-           Ordering != AtomicExpr::AO_ABI_memory_order_acq_rel;
+    return OrderingCABI != llvm::AtomicOrderingCABI::release &&
+           OrderingCABI != llvm::AtomicOrderingCABI::acq_rel;
 
   case AtomicExpr::AO__c11_atomic_store:
   case AtomicExpr::AO__atomic_store:
   case AtomicExpr::AO__atomic_store_n:
-    return Ordering != AtomicExpr::AO_ABI_memory_order_consume &&
-           Ordering != AtomicExpr::AO_ABI_memory_order_acquire &&
-           Ordering != AtomicExpr::AO_ABI_memory_order_acq_rel;
+    return OrderingCABI != llvm::AtomicOrderingCABI::consume &&
+           OrderingCABI != llvm::AtomicOrderingCABI::acquire &&
+           OrderingCABI != llvm::AtomicOrderingCABI::acq_rel;
 
   default:
     return true;
@@ -1572,6 +2454,8 @@ ExprResult Sema::SemaAtomicOpsOverloaded(ExprResult TheCallResult,
     // C    __c11_atomic_load(A *, int)
     Load,
     // void __atomic_load(A *, CP, int)
+    LoadCopy,
+    // void __atomic_store(A *, CP, int)
     Copy,
     // C    __c11_atomic_add(A *, M, int)
     Arithmetic,
@@ -1584,8 +2468,8 @@ ExprResult Sema::SemaAtomicOpsOverloaded(ExprResult TheCallResult,
     // bool __atomic_compare_exchange(A *, C *, CP, bool, int, int)
     GNUCmpXchg
   } Form = Init;
-  const unsigned NumArgs[] = { 2, 2, 3, 3, 3, 4, 5, 6 };
-  const unsigned NumVals[] = { 1, 0, 1, 1, 1, 2, 2, 3 };
+  const unsigned NumArgs[] = { 2, 2, 3, 3, 3, 3, 4, 5, 6 };
+  const unsigned NumVals[] = { 1, 0, 1, 1, 1, 1, 2, 2, 3 };
   // where:
   //   C is an appropriate type,
   //   A is volatile _Atomic(C) for __c11 builtins and is C for GNU builtins,
@@ -1615,8 +2499,11 @@ ExprResult Sema::SemaAtomicOpsOverloaded(ExprResult TheCallResult,
     Form = Load;
     break;
 
-  case AtomicExpr::AO__c11_atomic_store:
   case AtomicExpr::AO__atomic_load:
+    Form = LoadCopy;
+    break;
+
+  case AtomicExpr::AO__c11_atomic_store:
   case AtomicExpr::AO__atomic_store:
   case AtomicExpr::AO__atomic_store_n:
     Form = Copy;
@@ -1680,7 +2567,11 @@ ExprResult Sema::SemaAtomicOpsOverloaded(ExprResult TheCallResult,
 
   // Inspect the first argument of the atomic operation.
   Expr *Ptr = TheCall->getArg(0);
-  Ptr = DefaultFunctionArrayLvalueConversion(Ptr).get();
+  ExprResult ConvertedPtr = DefaultFunctionArrayLvalueConversion(Ptr);
+  if (ConvertedPtr.isInvalid())
+    return ExprError();
+
+  Ptr = ConvertedPtr.get();
   const PointerType *pointerType = Ptr->getType()->getAs<PointerType>();
   if (!pointerType) {
     Diag(DRE->getLocStart(), diag::err_atomic_builtin_must_be_pointer)
@@ -1703,7 +2594,7 @@ ExprResult Sema::SemaAtomicOpsOverloaded(ExprResult TheCallResult,
       return ExprError();
     }
     ValType = AtomTy->getAs<AtomicType>()->getValueType();
-  } else if (Form != Load && Op != AtomicExpr::AO__atomic_load) {
+  } else if (Form != Load && Form != LoadCopy) {
     if (ValType.isConstQualified()) {
       Diag(DRE->getLocStart(), diag::err_atomic_op_needs_non_const_pointer)
         << Ptr->getType() << Ptr->getSourceRange();
@@ -1764,10 +2655,11 @@ ExprResult Sema::SemaAtomicOpsOverloaded(ExprResult TheCallResult,
 
   // atomic_fetch_or takes a pointer to a volatile 'A'.  We shouldn't let the
   // volatile-ness of the pointee-type inject itself into the result or the
-  // other operands.
+  // other operands. Similarly atomic_load can take a pointer to a const 'A'.
   ValType.removeLocalVolatile();
+  ValType.removeLocalConst();
   QualType ResultType = ValType;
-  if (Form == Copy || Form == GNUXchg || Form == Init)
+  if (Form == Copy || Form == LoadCopy || Form == GNUXchg || Form == Init)
     ResultType = Context.VoidTy;
   else if (Form == C11CmpXchg || Form == GNUCmpXchg)
     ResultType = Context.BoolTy;
@@ -1778,10 +2670,6 @@ ExprResult Sema::SemaAtomicOpsOverloaded(ExprResult TheCallResult,
   if (!IsC11 && !IsN)
     ByValType = Ptr->getType();
 
-  // FIXME: __atomic_load allows the first argument to be a a pointer to const
-  // but not the second argument. We need to manually remove possible const
-  // qualifiers.
-
   // The first argument --- the pointer --- has a fixed type; we
   // deduce the types of the rest of the arguments accordingly.  Walk
   // the remaining arguments, converting them to the deduced value type.
@@ -1848,6 +2736,7 @@ ExprResult Sema::SemaAtomicOpsOverloaded(ExprResult TheCallResult,
   case Load:
     SubExprs.push_back(TheCall->getArg(1)); // Order
     break;
+  case LoadCopy:
   case Copy:
   case Arithmetic:
   case Xchg:
@@ -1897,7 +2786,6 @@ ExprResult Sema::SemaAtomicOpsOverloaded(ExprResult TheCallResult,
   return AE;
 }
 
-
 /// checkBuiltinArgument - Given a call to a builtin function, perform
 /// normal type-checking on the given argument, updating the call in
 /// place.  This is useful when a builtin function requires custom
@@ -2443,6 +3331,7 @@ bool Sema::SemaBuiltinVAStartImpl(CallExpr *TheCall) {
   // block.
   QualType Type;
   SourceLocation ParamLoc;
+  bool IsCRegister = false;
 
   if (const DeclRefExpr *DR = dyn_cast<DeclRefExpr>(Arg)) {
     if (const ParmVarDecl *PV = dyn_cast<ParmVarDecl>(DR->getDecl())) {
@@ -2450,24 +3339,39 @@ bool Sema::SemaBuiltinVAStartImpl(CallExpr *TheCall) {
       // Get the last formal in the current function.
       const ParmVarDecl *LastArg;
       if (CurBlock)
-        LastArg = *(CurBlock->TheDecl->param_end()-1);
+        LastArg = CurBlock->TheDecl->parameters().back();
       else if (FunctionDecl *FD = getCurFunctionDecl())
-        LastArg = *(FD->param_end()-1);
+        LastArg = FD->parameters().back();
       else
-        LastArg = *(getCurMethodDecl()->param_end()-1);
+        LastArg = getCurMethodDecl()->parameters().back();
       SecondArgIsLastNamedArgument = PV == LastArg;
 
       Type = PV->getType();
       ParamLoc = PV->getLocation();
+      IsCRegister =
+          PV->getStorageClass() == SC_Register && !getLangOpts().CPlusPlus;
     }
   }
 
   if (!SecondArgIsLastNamedArgument)
     Diag(TheCall->getArg(1)->getLocStart(),
-         diag::warn_second_parameter_of_va_start_not_last_named_argument);
-  else if (Type->isReferenceType()) {
-    Diag(Arg->getLocStart(),
-         diag::warn_va_start_of_reference_type_is_undefined);
+         diag::warn_second_arg_of_va_start_not_last_named_param);
+  else if (IsCRegister || Type->isReferenceType() ||
+           Type->isSpecificBuiltinType(BuiltinType::Float) || [=] {
+             // Promotable integers are UB, but enumerations need a bit of
+             // extra checking to see what their promotable type actually is.
+             if (!Type->isPromotableIntegerType())
+               return false;
+             if (!Type->isEnumeralType())
+               return true;
+             const EnumDecl *ED = Type->getAs<EnumType>()->getDecl();
+             return !(ED &&
+                      Context.typesAreCompatible(ED->getPromotionType(), Type));
+           }()) {
+    unsigned Reason = 0;
+    if (Type->isReferenceType())  Reason = 1;
+    else if (IsCRegister)         Reason = 2;
+    Diag(Arg->getLocStart(), diag::warn_va_start_type_is_undefined) << Reason;
     Diag(ParamLoc, diag::note_parameter_type) << Type;
   }
 
@@ -2662,8 +3566,7 @@ ExprResult Sema::SemaBuiltinShuffleVector(CallExpr *TheCall) {
 
   // Determine which of the following types of shufflevector we're checking:
   // 1) unary, vector mask: (lhs, mask)
-  // 2) binary, vector mask: (lhs, rhs, mask)
-  // 3) binary, scalar mask: (lhs, rhs, index, ..., index)
+  // 2) binary, scalar mask: (lhs, rhs, index, ..., index)
   QualType resType = TheCall->getArg(0)->getType();
   unsigned numElements = 0;
 
@@ -2879,6 +3782,28 @@ bool Sema::SemaBuiltinConstantArgRange(CallExpr *TheCall, int ArgNum,
   return false;
 }
 
+/// SemaBuiltinConstantArgMultiple - Handle a check if argument ArgNum of CallExpr
+/// TheCall is a constant expression is a multiple of Num..
+bool Sema::SemaBuiltinConstantArgMultiple(CallExpr *TheCall, int ArgNum,
+                                          unsigned Num) {
+  llvm::APSInt Result;
+
+  // We can't check the value of a dependent argument.
+  Expr *Arg = TheCall->getArg(ArgNum);
+  if (Arg->isTypeDependent() || Arg->isValueDependent())
+    return false;
+
+  // Check constant-ness first.
+  if (SemaBuiltinConstantArg(TheCall, ArgNum, Result))
+    return true;
+
+  if (Result.getSExtValue() % Num != 0)
+    return Diag(TheCall->getLocStart(), diag::err_argument_not_multiple)
+      << Num << Arg->getSourceRange();
+
+  return false;
+}
+
 /// SemaBuiltinARMSpecialReg - Handle a check if argument ArgNum of CallExpr
 /// TheCall is an ARM/AArch64 special register string literal.
 bool Sema::SemaBuiltinARMSpecialReg(unsigned BuiltinID, CallExpr *TheCall,
@@ -3002,7 +3927,6 @@ bool Sema::SemaBuiltinLongjmp(CallExpr *TheCall) {
   return false;
 }
 
-
 /// SemaBuiltinSetjmp - Handle __builtin_setjmp(void *env[5]).
 /// This checks that the target supports __builtin_setjmp.
 bool Sema::SemaBuiltinSetjmp(CallExpr *TheCall) {
@@ -3013,12 +3937,68 @@ bool Sema::SemaBuiltinSetjmp(CallExpr *TheCall) {
 }
 
 namespace {
+class UncoveredArgHandler {
+  enum { Unknown = -1, AllCovered = -2 };
+  signed FirstUncoveredArg;
+  SmallVector<const Expr *, 4> DiagnosticExprs;
+
+public:
+  UncoveredArgHandler() : FirstUncoveredArg(Unknown) { }
+
+  bool hasUncoveredArg() const {
+    return (FirstUncoveredArg >= 0);
+  }
+
+  unsigned getUncoveredArg() const {
+    assert(hasUncoveredArg() && "no uncovered argument");
+    return FirstUncoveredArg;
+  }
+
+  void setAllCovered() {
+    // A string has been found with all arguments covered, so clear out
+    // the diagnostics.
+    DiagnosticExprs.clear();
+    FirstUncoveredArg = AllCovered;
+  }
+
+  void Update(signed NewFirstUncoveredArg, const Expr *StrExpr) {
+    assert(NewFirstUncoveredArg >= 0 && "Outside range");
+
+    // Don't update if a previous string covers all arguments.
+    if (FirstUncoveredArg == AllCovered)
+      return;
+
+    // UncoveredArgHandler tracks the highest uncovered argument index
+    // and with it all the strings that match this index.
+    if (NewFirstUncoveredArg == FirstUncoveredArg)
+      DiagnosticExprs.push_back(StrExpr);
+    else if (NewFirstUncoveredArg > FirstUncoveredArg) {
+      DiagnosticExprs.clear();
+      DiagnosticExprs.push_back(StrExpr);
+      FirstUncoveredArg = NewFirstUncoveredArg;
+    }
+  }
+
+  void Diagnose(Sema &S, bool IsFunctionCall, const Expr *ArgExpr);
+};
+
 enum StringLiteralCheckType {
   SLCT_NotALiteral,
   SLCT_UncheckedLiteral,
   SLCT_CheckedLiteral
 };
-}
+} // end anonymous namespace
+
+static void CheckFormatString(Sema &S, const StringLiteral *FExpr,
+                              const Expr *OrigFormatExpr,
+                              ArrayRef<const Expr *> Args,
+                              bool HasVAListArg, unsigned format_idx,
+                              unsigned firstDataArg,
+                              Sema::FormatStringType Type,
+                              bool inFunctionCall,
+                              Sema::VariadicCallType CallType,
+                              llvm::SmallBitVector &CheckedVarArgs,
+                              UncoveredArgHandler &UncoveredArg);
 
 // Determine if an expression is a string literal or constant string.
 // If this function returns false on the arguments to a function expecting a
@@ -3029,7 +4009,8 @@ checkFormatStringExpr(Sema &S, const Expr *E, ArrayRef<const Expr *> Args,
                       bool HasVAListArg, unsigned format_idx,
                       unsigned firstDataArg, Sema::FormatStringType Type,
                       Sema::VariadicCallType CallType, bool InFunctionCall,
-                      llvm::SmallBitVector &CheckedVarArgs) {
+                      llvm::SmallBitVector &CheckedVarArgs,
+                      UncoveredArgHandler &UncoveredArg) {
  tryAgain:
   if (E->isTypeDependent() || E->isValueDependent())
     return SLCT_NotALiteral;
@@ -3050,17 +4031,39 @@ checkFormatStringExpr(Sema &S, const Expr *E, ArrayRef<const Expr *> Args,
     // completely checked only if both sub-expressions were checked.
     const AbstractConditionalOperator *C =
         cast<AbstractConditionalOperator>(E);
-    StringLiteralCheckType Left =
-        checkFormatStringExpr(S, C->getTrueExpr(), Args,
-                              HasVAListArg, format_idx, firstDataArg,
-                              Type, CallType, InFunctionCall, CheckedVarArgs);
-    if (Left == SLCT_NotALiteral)
-      return SLCT_NotALiteral;
+
+    // Determine whether it is necessary to check both sub-expressions, for
+    // example, because the condition expression is a constant that can be
+    // evaluated at compile time.
+    bool CheckLeft = true, CheckRight = true;
+
+    bool Cond;
+    if (C->getCond()->EvaluateAsBooleanCondition(Cond, S.getASTContext())) {
+      if (Cond)
+        CheckRight = false;
+      else
+        CheckLeft = false;
+    }
+
+    StringLiteralCheckType Left;
+    if (!CheckLeft)
+      Left = SLCT_UncheckedLiteral;
+    else {
+      Left = checkFormatStringExpr(S, C->getTrueExpr(), Args,
+                                   HasVAListArg, format_idx, firstDataArg,
+                                   Type, CallType, InFunctionCall,
+                                   CheckedVarArgs, UncoveredArg);
+      if (Left == SLCT_NotALiteral || !CheckRight)
+        return Left;
+    }
+
     StringLiteralCheckType Right =
         checkFormatStringExpr(S, C->getFalseExpr(), Args,
                               HasVAListArg, format_idx, firstDataArg,
-                              Type, CallType, InFunctionCall, CheckedVarArgs);
-    return Left < Right ? Left : Right;
+                              Type, CallType, InFunctionCall, CheckedVarArgs,
+                              UncoveredArg);
+
+    return (CheckLeft && Left < Right) ? Left : Right;
   }
 
   case Stmt::ImplicitCastExprClass: {
@@ -3111,7 +4114,8 @@ checkFormatStringExpr(Sema &S, const Expr *E, ArrayRef<const Expr *> Args,
           return checkFormatStringExpr(S, Init, Args,
                                        HasVAListArg, format_idx,
                                        firstDataArg, Type, CallType,
-                                       /*InFunctionCall*/false, CheckedVarArgs);
+                                       /*InFunctionCall*/false, CheckedVarArgs,
+                                       UncoveredArg);
         }
       }
 
@@ -3166,7 +4170,7 @@ checkFormatStringExpr(Sema &S, const Expr *E, ArrayRef<const Expr *> Args,
         return checkFormatStringExpr(S, Arg, Args,
                                      HasVAListArg, format_idx, firstDataArg,
                                      Type, CallType, InFunctionCall,
-                                     CheckedVarArgs);
+                                     CheckedVarArgs, UncoveredArg);
       } else if (const FunctionDecl *FD = dyn_cast<FunctionDecl>(ND)) {
         unsigned BuiltinID = FD->getBuiltinID();
         if (BuiltinID == Builtin::BI__builtin___CFStringMakeConstantString ||
@@ -3175,7 +4179,8 @@ checkFormatStringExpr(Sema &S, const Expr *E, ArrayRef<const Expr *> Args,
           return checkFormatStringExpr(S, Arg, Args,
                                        HasVAListArg, format_idx,
                                        firstDataArg, Type, CallType,
-                                       InFunctionCall, CheckedVarArgs);
+                                       InFunctionCall, CheckedVarArgs,
+                                       UncoveredArg);
         }
       }
     }
@@ -3192,8 +4197,9 @@ checkFormatStringExpr(Sema &S, const Expr *E, ArrayRef<const Expr *> Args,
       StrE = cast<StringLiteral>(E);
 
     if (StrE) {
-      S.CheckFormatString(StrE, E, Args, HasVAListArg, format_idx, firstDataArg,
-                          Type, InFunctionCall, CallType, CheckedVarArgs);
+      CheckFormatString(S, StrE, E, Args, HasVAListArg, format_idx,
+                        firstDataArg, Type, InFunctionCall, CallType,
+                        CheckedVarArgs, UncoveredArg);
       return SLCT_CheckedLiteral;
     }
 
@@ -3261,10 +4267,20 @@ bool Sema::CheckFormatArguments(ArrayRef<const Expr *> Args,
   // C string (e.g. "%d")
   // ObjC string uses the same format specifiers as C string, so we can use
   // the same format string checking logic for both ObjC and C strings.
+  UncoveredArgHandler UncoveredArg;
   StringLiteralCheckType CT =
       checkFormatStringExpr(*this, OrigFormatExpr, Args, HasVAListArg,
                             format_idx, firstDataArg, Type, CallType,
-                            /*IsFunctionCall*/true, CheckedVarArgs);
+                            /*IsFunctionCall*/true, CheckedVarArgs,
+                            UncoveredArg);
+
+  // Generate a diagnostic where an uncovered argument is detected.
+  if (UncoveredArg.hasUncoveredArg()) {
+    unsigned ArgIdx = UncoveredArg.getUncoveredArg() + firstDataArg;
+    assert(ArgIdx < Args.size() && "ArgIdx outside bounds");
+    UncoveredArg.Diagnose(*this, /*IsFunctionCall*/true, Args[ArgIdx]);
+  }
+
   if (CT != SLCT_NotALiteral)
     // Literal format string found, check done!
     return CT == SLCT_CheckedLiteral;
@@ -3278,20 +4294,33 @@ bool Sema::CheckFormatArguments(ArrayRef<const Expr *> Args,
   // format is either NSString or CFString. This is a hack to prevent
   // diag when using the NSLocalizedString and CFCopyLocalizedString macros
   // which are usually used in place of NS and CF string literals.
-  if (Type == FST_NSString &&
-      SourceMgr.isInSystemMacro(Args[format_idx]->getLocStart()))
+  SourceLocation FormatLoc = Args[format_idx]->getLocStart();
+  if (Type == FST_NSString && SourceMgr.isInSystemMacro(FormatLoc))
     return false;
 
   // If there are no arguments specified, warn with -Wformat-security, otherwise
   // warn only with -Wformat-nonliteral.
-  if (Args.size() == firstDataArg)
-    Diag(Args[format_idx]->getLocStart(),
-         diag::warn_format_nonliteral_noargs)
+  if (Args.size() == firstDataArg) {
+    Diag(FormatLoc, diag::warn_format_nonliteral_noargs)
       << OrigFormatExpr->getSourceRange();
-  else
-    Diag(Args[format_idx]->getLocStart(),
-         diag::warn_format_nonliteral)
-           << OrigFormatExpr->getSourceRange();
+    switch (Type) {
+    default:
+      break;
+    case FST_Kprintf:
+    case FST_FreeBSDKPrintf:
+    case FST_Printf:
+      Diag(FormatLoc, diag::note_format_security_fixit)
+        << FixItHint::CreateInsertion(FormatLoc, "\"%s\", ");
+      break;
+    case FST_NSString:
+      Diag(FormatLoc, diag::note_format_security_fixit)
+        << FixItHint::CreateInsertion(FormatLoc, "@\"%@\", ");
+      break;
+    }
+  } else {
+    Diag(FormatLoc, diag::warn_format_nonliteral)
+      << OrigFormatExpr->getSourceRange();
+  }
   return false;
 }
 
@@ -3313,6 +4342,8 @@ protected:
   bool inFunctionCall;
   Sema::VariadicCallType CallType;
   llvm::SmallBitVector &CheckedVarArgs;
+  UncoveredArgHandler &UncoveredArg;
+
 public:
   CheckFormatHandler(Sema &s, const StringLiteral *fexpr,
                      const Expr *origFormatExpr, unsigned firstDataArg,
@@ -3320,14 +4351,15 @@ public:
                      ArrayRef<const Expr *> Args,
                      unsigned formatIdx, bool inFunctionCall,
                      Sema::VariadicCallType callType,
-                     llvm::SmallBitVector &CheckedVarArgs)
+                     llvm::SmallBitVector &CheckedVarArgs,
+                     UncoveredArgHandler &UncoveredArg)
     : S(s), FExpr(fexpr), OrigFormatExpr(origFormatExpr),
       FirstDataArg(firstDataArg), NumDataArgs(numDataArgs),
       Beg(beg), HasVAListArg(hasVAListArg),
       Args(Args), FormatIdx(formatIdx),
       usesPositionalArgs(false), atFirstArg(true),
       inFunctionCall(inFunctionCall), CallType(callType),
-      CheckedVarArgs(CheckedVarArgs) {
+      CheckedVarArgs(CheckedVarArgs), UncoveredArg(UncoveredArg) {
     CoveredArgs.resize(numDataArgs);
     CoveredArgs.reset();
   }
@@ -3362,12 +4394,11 @@ public:
   void HandleNullChar(const char *nullCharacter) override;
 
   template <typename Range>
-  static void EmitFormatDiagnostic(Sema &S, bool inFunctionCall,
-                                   const Expr *ArgumentExpr,
-                                   PartialDiagnostic PDiag,
-                                   SourceLocation StringLoc,
-                                   bool IsStringLocation, Range StringRange,
-                                   ArrayRef<FixItHint> Fixit = None);
+  static void
+  EmitFormatDiagnostic(Sema &S, bool inFunctionCall, const Expr *ArgumentExpr,
+                       const PartialDiagnostic &PDiag, SourceLocation StringLoc,
+                       bool IsStringLocation, Range StringRange,
+                       ArrayRef<FixItHint> Fixit = None);
 
 protected:
   bool HandleInvalidConversionSpecifier(unsigned argIndex, SourceLocation Loc,
@@ -3396,7 +4427,7 @@ protected:
                             bool IsStringLocation, Range StringRange,
                             ArrayRef<FixItHint> Fixit = None);
 };
-}
+} // end anonymous namespace
 
 SourceRange CheckFormatHandler::getFormatStringRange() {
   return OrigFormatExpr->getSourceRange();
@@ -3558,26 +4589,44 @@ const Expr *CheckFormatHandler::getDataArg(unsigned i) const {
 }
 
 void CheckFormatHandler::DoneProcessing() {
-    // Does the number of data arguments exceed the number of
-    // format conversions in the format string?
+  // Does the number of data arguments exceed the number of
+  // format conversions in the format string?
   if (!HasVAListArg) {
       // Find any arguments that weren't covered.
     CoveredArgs.flip();
     signed notCoveredArg = CoveredArgs.find_first();
     if (notCoveredArg >= 0) {
       assert((unsigned)notCoveredArg < NumDataArgs);
-      if (const Expr *E = getDataArg((unsigned) notCoveredArg)) {
-        SourceLocation Loc = E->getLocStart();
-        if (!S.getSourceManager().isInSystemMacro(Loc)) {
-          EmitFormatDiagnostic(S.PDiag(diag::warn_printf_data_arg_not_used),
-                               Loc, /*IsStringLocation*/false,
-                               getFormatStringRange());
-        }
-      }
+      UncoveredArg.Update(notCoveredArg, OrigFormatExpr);
+    } else {
+      UncoveredArg.setAllCovered();
     }
   }
 }
 
+void UncoveredArgHandler::Diagnose(Sema &S, bool IsFunctionCall,
+                                   const Expr *ArgExpr) {
+  assert(hasUncoveredArg() && DiagnosticExprs.size() > 0 &&
+         "Invalid state");
+
+  if (!ArgExpr)
+    return;
+
+  SourceLocation Loc = ArgExpr->getLocStart();
+
+  if (S.getSourceManager().isInSystemMacro(Loc))
+    return;
+
+  PartialDiagnostic PDiag = S.PDiag(diag::warn_printf_data_arg_not_used);
+  for (auto E : DiagnosticExprs)
+    PDiag << E->getSourceRange();
+
+  CheckFormatHandler::EmitFormatDiagnostic(
+                                  S, IsFunctionCall, DiagnosticExprs[0],
+                                  PDiag, Loc, /*IsStringLocation*/false,
+                                  DiagnosticExprs[0]->getSourceRange());
+}
+
 bool
 CheckFormatHandler::HandleInvalidConversionSpecifier(unsigned argIndex,
                                                      SourceLocation Loc,
@@ -3585,7 +4634,6 @@ CheckFormatHandler::HandleInvalidConversionSpecifier(unsigned argIndex,
                                                      unsigned specifierLen,
                                                      const char *csStart,
                                                      unsigned csLen) {
-  
   bool keepGoing = true;
   if (argIndex < NumDataArgs) {
     // Consider the argument coverered, even though the specifier doesn't
@@ -3600,12 +4648,41 @@ CheckFormatHandler::HandleInvalidConversionSpecifier(unsigned argIndex,
     // gibberish when trying to match arguments.
     keepGoing = false;
   }
-  
-  EmitFormatDiagnostic(S.PDiag(diag::warn_format_invalid_conversion)
-                         << StringRef(csStart, csLen),
-                       Loc, /*IsStringLocation*/true,
-                       getSpecifierRange(startSpec, specifierLen));
-  
+
+  StringRef Specifier(csStart, csLen);
+
+  // If the specifier in non-printable, it could be the first byte of a UTF-8
+  // sequence. In that case, print the UTF-8 code point. If not, print the byte
+  // hex value.
+  std::string CodePointStr;
+  if (!llvm::sys::locale::isPrint(*csStart)) {
+    UTF32 CodePoint;
+    const UTF8 **B = reinterpret_cast<const UTF8 **>(&csStart);
+    const UTF8 *E =
+        reinterpret_cast<const UTF8 *>(csStart + csLen);
+    ConversionResult Result =
+        llvm::convertUTF8Sequence(B, E, &CodePoint, strictConversion);
+
+    if (Result != conversionOK) {
+      unsigned char FirstChar = *csStart;
+      CodePoint = (UTF32)FirstChar;
+    }
+
+    llvm::raw_string_ostream OS(CodePointStr);
+    if (CodePoint < 256)
+      OS << "\\x" << llvm::format("%02x", CodePoint);
+    else if (CodePoint <= 0xFFFF)
+      OS << "\\u" << llvm::format("%04x", CodePoint);
+    else
+      OS << "\\U" << llvm::format("%08x", CodePoint);
+    OS.flush();
+    Specifier = CodePointStr;
+  }
+
+  EmitFormatDiagnostic(
+      S.PDiag(diag::warn_format_invalid_conversion) << Specifier, Loc,
+      /*IsStringLocation*/ true, getSpecifierRange(startSpec, specifierLen));
+
   return keepGoing;
 }
 
@@ -3632,6 +4709,10 @@ CheckFormatHandler::CheckNumArgs(
     EmitFormatDiagnostic(
       PDiag, getLocationOfByte(CS.getStart()), /*IsStringLocation*/true,
       getSpecifierRange(startSpecifier, specifierLen));
+
+    // Since more arguments than conversion tokens are given, by extension
+    // all arguments are covered, so mark this as so.
+    UncoveredArg.setAllCovered();
     return false;
   }
   return true;
@@ -3674,14 +4755,11 @@ void CheckFormatHandler::EmitFormatDiagnostic(PartialDiagnostic PDiag,
 /// templated so it can accept either a CharSourceRange or a SourceRange.
 ///
 /// \param FixIt optional fix it hint for the format string.
-template<typename Range>
-void CheckFormatHandler::EmitFormatDiagnostic(Sema &S, bool InFunctionCall,
-                                              const Expr *ArgumentExpr,
-                                              PartialDiagnostic PDiag,
-                                              SourceLocation Loc,
-                                              bool IsStringLocation,
-                                              Range StringRange,
-                                              ArrayRef<FixItHint> FixIt) {
+template <typename Range>
+void CheckFormatHandler::EmitFormatDiagnostic(
+    Sema &S, bool InFunctionCall, const Expr *ArgumentExpr,
+    const PartialDiagnostic &PDiag, SourceLocation Loc, bool IsStringLocation,
+    Range StringRange, ArrayRef<FixItHint> FixIt) {
   if (InFunctionCall) {
     const Sema::SemaDiagnosticBuilder &D = S.Diag(Loc, PDiag);
     D << StringRange;
@@ -3704,6 +4782,7 @@ void CheckFormatHandler::EmitFormatDiagnostic(Sema &S, bool InFunctionCall,
 namespace {
 class CheckPrintfHandler : public CheckFormatHandler {
   bool ObjCContext;
+
 public:
   CheckPrintfHandler(Sema &s, const StringLiteral *fexpr,
                      const Expr *origFormatExpr, unsigned firstDataArg,
@@ -3712,14 +4791,15 @@ public:
                      ArrayRef<const Expr *> Args,
                      unsigned formatIdx, bool inFunctionCall,
                      Sema::VariadicCallType CallType,
-                     llvm::SmallBitVector &CheckedVarArgs)
+                     llvm::SmallBitVector &CheckedVarArgs,
+                     UncoveredArgHandler &UncoveredArg)
     : CheckFormatHandler(s, fexpr, origFormatExpr, firstDataArg,
                          numDataArgs, beg, hasVAListArg, Args,
-                         formatIdx, inFunctionCall, CallType, CheckedVarArgs),
+                         formatIdx, inFunctionCall, CallType, CheckedVarArgs,
+                         UncoveredArg),
       ObjCContext(isObjC)
   {}
 
-
   bool HandleInvalidPrintfConversionSpecifier(
                                       const analyze_printf::PrintfSpecifier &FS,
                                       const char *startSpecifier,
@@ -3760,7 +4840,7 @@ public:
                                            const char *conversionPosition) 
                                              override;
 };
-}
+} // end anonymous namespace
 
 bool CheckPrintfHandler::HandleInvalidPrintfConversionSpecifier(
                                       const analyze_printf::PrintfSpecifier &FS,
@@ -3779,7 +4859,6 @@ bool CheckPrintfHandler::HandleAmount(
                                const analyze_format_string::OptionalAmount &Amt,
                                unsigned k, const char *startSpecifier,
                                unsigned specifierLen) {
-
   if (Amt.hasDataArgument()) {
     if (!HasVAListArg) {
       unsigned argIndex = Amt.getArgIndex();
@@ -3991,7 +5070,6 @@ CheckPrintfHandler::HandlePrintfSpecifier(const analyze_printf::PrintfSpecifier
                                             &FS,
                                           const char *startSpecifier,
                                           unsigned specifierLen) {
-
   using namespace analyze_format_string;
   using namespace analyze_printf;  
   const PrintfConversionSpecifier &CS = FS.getConversionSpecifier();
@@ -4361,7 +5439,6 @@ CheckPrintfHandler::checkFormatExpr(const analyze_printf::PrintfSpecifier &FS,
                            E->getLocStart(),
                            /*IsStringLocation*/ false, SpecRange,
                            FixItHint::CreateReplacement(SpecRange, os.str()));
-
     } else {
       // The canonical type for formatting this value is different from the
       // actual type of the expression. (This occurs, for example, with Darwin's
@@ -4500,11 +5577,12 @@ public:
                     ArrayRef<const Expr *> Args,
                     unsigned formatIdx, bool inFunctionCall,
                     Sema::VariadicCallType CallType,
-                    llvm::SmallBitVector &CheckedVarArgs)
+                    llvm::SmallBitVector &CheckedVarArgs,
+                    UncoveredArgHandler &UncoveredArg)
     : CheckFormatHandler(s, fexpr, origFormatExpr, firstDataArg,
                          numDataArgs, beg, hasVAListArg,
                          Args, formatIdx, inFunctionCall, CallType,
-                         CheckedVarArgs)
+                         CheckedVarArgs, UncoveredArg)
   {}
   
   bool HandleScanfSpecifier(const analyze_scanf::ScanfSpecifier &FS,
@@ -4518,7 +5596,7 @@ public:
 
   void HandleIncompleteScanList(const char *start, const char *end) override;
 };
-}
+} // end anonymous namespace
 
 void CheckScanfHandler::HandleIncompleteScanList(const char *start,
                                                  const char *end) {
@@ -4545,7 +5623,6 @@ bool CheckScanfHandler::HandleScanfSpecifier(
                                        const analyze_scanf::ScanfSpecifier &FS,
                                        const char *startSpecifier,
                                        unsigned specifierLen) {
-  
   using namespace analyze_scanf;
   using namespace analyze_format_string;  
 
@@ -4665,28 +5742,31 @@ bool CheckScanfHandler::HandleScanfSpecifier(
   return true;
 }
 
-void Sema::CheckFormatString(const StringLiteral *FExpr,
-                             const Expr *OrigFormatExpr,
-                             ArrayRef<const Expr *> Args,
-                             bool HasVAListArg, unsigned format_idx,
-                             unsigned firstDataArg, FormatStringType Type,
-                             bool inFunctionCall, VariadicCallType CallType,
-                             llvm::SmallBitVector &CheckedVarArgs) {
-  
+static void CheckFormatString(Sema &S, const StringLiteral *FExpr,
+                              const Expr *OrigFormatExpr,
+                              ArrayRef<const Expr *> Args,
+                              bool HasVAListArg, unsigned format_idx,
+                              unsigned firstDataArg,
+                              Sema::FormatStringType Type,
+                              bool inFunctionCall,
+                              Sema::VariadicCallType CallType,
+                              llvm::SmallBitVector &CheckedVarArgs,
+                              UncoveredArgHandler &UncoveredArg) {
   // CHECK: is the format string a wide literal?
   if (!FExpr->isAscii() && !FExpr->isUTF8()) {
     CheckFormatHandler::EmitFormatDiagnostic(
-      *this, inFunctionCall, Args[format_idx],
-      PDiag(diag::warn_format_string_is_wide_literal), FExpr->getLocStart(),
+      S, inFunctionCall, Args[format_idx],
+      S.PDiag(diag::warn_format_string_is_wide_literal), FExpr->getLocStart(),
       /*IsStringLocation*/true, OrigFormatExpr->getSourceRange());
     return;
   }
-  
+
   // Str - The format string.  NOTE: this is NOT null-terminated!
   StringRef StrRef = FExpr->getString();
   const char *Str = StrRef.data();
   // Account for cases where the string literal is truncated in a declaration.
-  const ConstantArrayType *T = Context.getAsConstantArrayType(FExpr->getType());
+  const ConstantArrayType *T =
+    S.Context.getAsConstantArrayType(FExpr->getType());
   assert(T && "String literal not of constant array type!");
   size_t TypeSize = T->getSize().getZExtValue();
   size_t StrLen = std::min(std::max(TypeSize, size_t(1)) - 1, StrRef.size());
@@ -4697,8 +5777,8 @@ void Sema::CheckFormatString(const StringLiteral *FExpr,
   if (TypeSize <= StrRef.size() &&
       StrRef.substr(0, TypeSize).find('\0') == StringRef::npos) {
     CheckFormatHandler::EmitFormatDiagnostic(
-        *this, inFunctionCall, Args[format_idx],
-        PDiag(diag::warn_printf_format_string_not_null_terminated),
+        S, inFunctionCall, Args[format_idx],
+        S.PDiag(diag::warn_printf_format_string_not_null_terminated),
         FExpr->getLocStart(),
         /*IsStringLocation=*/true, OrigFormatExpr->getSourceRange());
     return;
@@ -4707,32 +5787,35 @@ void Sema::CheckFormatString(const StringLiteral *FExpr,
   // CHECK: empty format string?
   if (StrLen == 0 && numDataArgs > 0) {
     CheckFormatHandler::EmitFormatDiagnostic(
-      *this, inFunctionCall, Args[format_idx],
-      PDiag(diag::warn_empty_format_string), FExpr->getLocStart(),
+      S, inFunctionCall, Args[format_idx],
+      S.PDiag(diag::warn_empty_format_string), FExpr->getLocStart(),
       /*IsStringLocation*/true, OrigFormatExpr->getSourceRange());
     return;
   }
-  
-  if (Type == FST_Printf || Type == FST_NSString ||
-      Type == FST_FreeBSDKPrintf || Type == FST_OSTrace) {
-    CheckPrintfHandler H(*this, FExpr, OrigFormatExpr, firstDataArg,
-                         numDataArgs, (Type == FST_NSString || Type == FST_OSTrace),
+
+  if (Type == Sema::FST_Printf || Type == Sema::FST_NSString ||
+      Type == Sema::FST_FreeBSDKPrintf || Type == Sema::FST_OSTrace) {
+    CheckPrintfHandler H(S, FExpr, OrigFormatExpr, firstDataArg,
+                         numDataArgs, (Type == Sema::FST_NSString ||
+                                       Type == Sema::FST_OSTrace),
                          Str, HasVAListArg, Args, format_idx,
-                         inFunctionCall, CallType, CheckedVarArgs);
-  
+                         inFunctionCall, CallType, CheckedVarArgs,
+                         UncoveredArg);
+
     if (!analyze_format_string::ParsePrintfString(H, Str, Str + StrLen,
-                                                  getLangOpts(),
-                                                  Context.getTargetInfo(),
-                                                  Type == FST_FreeBSDKPrintf))
+                                                  S.getLangOpts(),
+                                                  S.Context.getTargetInfo(),
+                                            Type == Sema::FST_FreeBSDKPrintf))
       H.DoneProcessing();
-  } else if (Type == FST_Scanf) {
-    CheckScanfHandler H(*this, FExpr, OrigFormatExpr, firstDataArg, numDataArgs,
+  } else if (Type == Sema::FST_Scanf) {
+    CheckScanfHandler H(S, FExpr, OrigFormatExpr, firstDataArg, numDataArgs,
                         Str, HasVAListArg, Args, format_idx,
-                        inFunctionCall, CallType, CheckedVarArgs);
-    
+                        inFunctionCall, CallType, CheckedVarArgs,
+                        UncoveredArg);
+
     if (!analyze_format_string::ParseScanfString(H, Str, Str + StrLen,
-                                                 getLangOpts(),
-                                                 Context.getTargetInfo()))
+                                                 S.getLangOpts(),
+                                                 S.Context.getTargetInfo()))
       H.DoneProcessing();
   } // TODO: handle other formats
 }
@@ -5145,7 +6228,6 @@ void Sema::CheckAbsoluteValueFunction(const CallExpr *Call,
 
   emitReplacement(*this, Call->getExprLoc(),
                   Call->getCallee()->getSourceRange(), NewAbsKind, ArgType);
-  return;
 }
 
 //===--- CHECK: Standard memory functions ---------------------------------===//
@@ -5191,7 +6273,7 @@ static const CXXRecordDecl *getContainedDynamicClass(QualType T,
 
   const CXXRecordDecl *RD = Ty->getAsCXXRecordDecl();
   RD = RD ? RD->getDefinition() : nullptr;
-  if (!RD)
+  if (!RD || RD->isInvalidDecl())
     return nullptr;
 
   if (RD->isDynamicClass())
@@ -5398,7 +6480,6 @@ void Sema::CheckMemaccessArguments(const CallExpr *Call,
         << FixItHint::CreateInsertion(ArgRange.getBegin(), "(void*)"));
     break;
   }
-
 }
 
 // A little helper routine: ignore addition and subtraction of integer literals.
@@ -5613,10 +6694,12 @@ void Sema::CheckStrncatArguments(const CallExpr *CE,
 
 //===--- CHECK: Return Address of Stack Variable --------------------------===//
 
-static Expr *EvalVal(Expr *E, SmallVectorImpl<DeclRefExpr *> &refVars,
-                     Decl *ParentDecl);
-static Expr *EvalAddr(Expr* E, SmallVectorImpl<DeclRefExpr *> &refVars,
-                      Decl *ParentDecl);
+static const Expr *EvalVal(const Expr *E,
+                           SmallVectorImpl<const DeclRefExpr *> &refVars,
+                           const Decl *ParentDecl);
+static const Expr *EvalAddr(const Expr *E,
+                            SmallVectorImpl<const DeclRefExpr *> &refVars,
+                            const Decl *ParentDecl);
 
 /// CheckReturnStackAddr - Check if a return statement returns the address
 ///   of a stack variable.
@@ -5624,8 +6707,8 @@ static void
 CheckReturnStackAddr(Sema &S, Expr *RetValExp, QualType lhsType,
                      SourceLocation ReturnLoc) {
 
-  Expr *stackE = nullptr;
-  SmallVector<DeclRefExpr *, 8> refVars;
+  const Expr *stackE = nullptr;
+  SmallVector<const DeclRefExpr *, 8> refVars;
 
   // Perform checking for returned stack addresses, local blocks,
   // label addresses or references to temporaries.
@@ -5639,6 +6722,12 @@ CheckReturnStackAddr(Sema &S, Expr *RetValExp, QualType lhsType,
   if (!stackE)
     return; // Nothing suspicious was found.
 
+  // Parameters are initalized in the calling scope, so taking the address
+  // of a parameter reference doesn't need a warning.
+  for (auto *DRE : refVars)
+    if (isa<ParmVarDecl>(DRE->getDecl()))
+      return;
+
   SourceLocation diagLoc;
   SourceRange diagRange;
   if (refVars.empty()) {
@@ -5653,7 +6742,8 @@ CheckReturnStackAddr(Sema &S, Expr *RetValExp, QualType lhsType,
     diagRange = refVars[0]->getSourceRange();
   }
 
-  if (DeclRefExpr *DR = dyn_cast<DeclRefExpr>(stackE)) { //address of local var.
+  if (const DeclRefExpr *DR = dyn_cast<DeclRefExpr>(stackE)) {
+    // address of local var
     S.Diag(diagLoc, diag::warn_ret_stack_addr_ref) << lhsType->isReferenceType()
      << DR->getDecl()->getDeclName() << diagRange;
   } else if (isa<BlockExpr>(stackE)) { // local block.
@@ -5661,6 +6751,13 @@ CheckReturnStackAddr(Sema &S, Expr *RetValExp, QualType lhsType,
   } else if (isa<AddrLabelExpr>(stackE)) { // address of label.
     S.Diag(diagLoc, diag::warn_ret_addr_label) << diagRange;
   } else { // local temporary.
+    // If there is an LValue->RValue conversion, then the value of the
+    // reference type is used, not the reference.
+    if (auto *ICE = dyn_cast<ImplicitCastExpr>(RetValExp)) {
+      if (ICE->getCastKind() == CK_LValueToRValue) {
+        return;
+      }
+    }
     S.Diag(diagLoc, diag::warn_ret_local_temp_addr_ref)
      << lhsType->isReferenceType() << diagRange;
   }
@@ -5668,12 +6765,12 @@ CheckReturnStackAddr(Sema &S, Expr *RetValExp, QualType lhsType,
   // Display the "trail" of reference variables that we followed until we
   // found the problematic expression using notes.
   for (unsigned i = 0, e = refVars.size(); i != e; ++i) {
-    VarDecl *VD = cast<VarDecl>(refVars[i]->getDecl());
+    const VarDecl *VD = cast<VarDecl>(refVars[i]->getDecl());
     // If this var binds to another reference var, show the range of the next
     // var, otherwise the var binds to the problematic expression, in which case
     // show the range of the expression.
-    SourceRange range = (i < e-1) ? refVars[i+1]->getSourceRange()
-                                  : stackE->getSourceRange();
+    SourceRange range = (i < e - 1) ? refVars[i + 1]->getSourceRange()
+                                    : stackE->getSourceRange();
     S.Diag(VD->getLocation(), diag::note_ref_var_local_bind)
         << VD->getDeclName() << range;
   }
@@ -5705,8 +6802,9 @@ CheckReturnStackAddr(Sema &S, Expr *RetValExp, QualType lhsType,
 ///   * arbitrary interplay between "&" and "*" operators
 ///   * pointer arithmetic from an address of a stack variable
 ///   * taking the address of an array element where the array is on the stack
-static Expr *EvalAddr(Expr *E, SmallVectorImpl<DeclRefExpr *> &refVars,
-                      Decl *ParentDecl) {
+static const Expr *EvalAddr(const Expr *E,
+                            SmallVectorImpl<const DeclRefExpr *> &refVars,
+                            const Decl *ParentDecl) {
   if (E->isTypeDependent())
     return nullptr;
 
@@ -5723,13 +6821,13 @@ static Expr *EvalAddr(Expr *E, SmallVectorImpl<DeclRefExpr *> &refVars,
   // EvalAddr and EvalVal appropriately.
   switch (E->getStmtClass()) {
   case Stmt::DeclRefExprClass: {
-    DeclRefExpr *DR = cast<DeclRefExpr>(E);
+    const DeclRefExpr *DR = cast<DeclRefExpr>(E);
 
     // If we leave the immediate function, the lifetime isn't about to end.
     if (DR->refersToEnclosingVariableOrCapture())
       return nullptr;
 
-    if (VarDecl *V = dyn_cast<VarDecl>(DR->getDecl()))
+    if (const VarDecl *V = dyn_cast<VarDecl>(DR->getDecl()))
       // If this is a reference variable, follow through to the expression that
       // it points to.
       if (V->hasLocalStorage() &&
@@ -5745,44 +6843,44 @@ static Expr *EvalAddr(Expr *E, SmallVectorImpl<DeclRefExpr *> &refVars,
   case Stmt::UnaryOperatorClass: {
     // The only unary operator that make sense to handle here
     // is AddrOf.  All others don't make sense as pointers.
-    UnaryOperator *U = cast<UnaryOperator>(E);
+    const UnaryOperator *U = cast<UnaryOperator>(E);
 
     if (U->getOpcode() == UO_AddrOf)
       return EvalVal(U->getSubExpr(), refVars, ParentDecl);
-    else
-      return nullptr;
+    return nullptr;
   }
 
   case Stmt::BinaryOperatorClass: {
     // Handle pointer arithmetic.  All other binary operators are not valid
     // in this context.
-    BinaryOperator *B = cast<BinaryOperator>(E);
+    const BinaryOperator *B = cast<BinaryOperator>(E);
     BinaryOperatorKind op = B->getOpcode();
 
     if (op != BO_Add && op != BO_Sub)
       return nullptr;
 
-    Expr *Base = B->getLHS();
+    const Expr *Base = B->getLHS();
 
     // Determine which argument is the real pointer base.  It could be
     // the RHS argument instead of the LHS.
-    if (!Base->getType()->isPointerType()) Base = B->getRHS();
+    if (!Base->getType()->isPointerType())
+      Base = B->getRHS();
 
-    assert (Base->getType()->isPointerType());
+    assert(Base->getType()->isPointerType());
     return EvalAddr(Base, refVars, ParentDecl);
   }
 
   // For conditional operators we need to see if either the LHS or RHS are
   // valid DeclRefExpr*s.  If one of them is valid, we return it.
   case Stmt::ConditionalOperatorClass: {
-    ConditionalOperator *C = cast<ConditionalOperator>(E);
+    const ConditionalOperator *C = cast<ConditionalOperator>(E);
 
     // Handle the GNU extension for missing LHS.
     // FIXME: That isn't a ConditionalOperator, so doesn't get here.
-    if (Expr *LHSExpr = C->getLHS()) {
+    if (const Expr *LHSExpr = C->getLHS()) {
       // In C++, we can have a throw-expression, which has 'void' type.
       if (!LHSExpr->getType()->isVoidType())
-        if (Expr *LHS = EvalAddr(LHSExpr, refVars, ParentDecl))
+        if (const Expr *LHS = EvalAddr(LHSExpr, refVars, ParentDecl))
           return LHS;
     }
 
@@ -5815,7 +6913,7 @@ static Expr *EvalAddr(Expr *E, SmallVectorImpl<DeclRefExpr *> &refVars,
   case Stmt::CXXDynamicCastExprClass:
   case Stmt::CXXConstCastExprClass:
   case Stmt::CXXReinterpretCastExprClass: {
-    Expr* SubExpr = cast<CastExpr>(E)->getSubExpr();
+    const Expr* SubExpr = cast<CastExpr>(E)->getSubExpr();
     switch (cast<CastExpr>(E)->getCastKind()) {
     case CK_LValueToRValue:
     case CK_NoOp:
@@ -5845,157 +6943,161 @@ static Expr *EvalAddr(Expr *E, SmallVectorImpl<DeclRefExpr *> &refVars,
   }
 
   case Stmt::MaterializeTemporaryExprClass:
-    if (Expr *Result = EvalAddr(
-                         cast<MaterializeTemporaryExpr>(E)->GetTemporaryExpr(),
-                                refVars, ParentDecl))
+    if (const Expr *Result =
+            EvalAddr(cast<MaterializeTemporaryExpr>(E)->GetTemporaryExpr(),
+                     refVars, ParentDecl))
       return Result;
-      
     return E;
-      
+
   // Everything else: we simply don't reason about them.
   default:
     return nullptr;
   }
 }
 
-
 ///  EvalVal - This function is complements EvalAddr in the mutual recursion.
 ///   See the comments for EvalAddr for more details.
-static Expr *EvalVal(Expr *E, SmallVectorImpl<DeclRefExpr *> &refVars,
-                     Decl *ParentDecl) {
-do {
-  // We should only be called for evaluating non-pointer expressions, or
-  // expressions with a pointer type that are not used as references but instead
-  // are l-values (e.g., DeclRefExpr with a pointer type).
-
-  // Our "symbolic interpreter" is just a dispatch off the currently
-  // viewed AST node.  We then recursively traverse the AST by calling
-  // EvalAddr and EvalVal appropriately.
-
-  E = E->IgnoreParens();
-  switch (E->getStmtClass()) {
-  case Stmt::ImplicitCastExprClass: {
-    ImplicitCastExpr *IE = cast<ImplicitCastExpr>(E);
-    if (IE->getValueKind() == VK_LValue) {
-      E = IE->getSubExpr();
-      continue;
+static const Expr *EvalVal(const Expr *E,
+                           SmallVectorImpl<const DeclRefExpr *> &refVars,
+                           const Decl *ParentDecl) {
+  do {
+    // We should only be called for evaluating non-pointer expressions, or
+    // expressions with a pointer type that are not used as references but
+    // instead
+    // are l-values (e.g., DeclRefExpr with a pointer type).
+
+    // Our "symbolic interpreter" is just a dispatch off the currently
+    // viewed AST node.  We then recursively traverse the AST by calling
+    // EvalAddr and EvalVal appropriately.
+
+    E = E->IgnoreParens();
+    switch (E->getStmtClass()) {
+    case Stmt::ImplicitCastExprClass: {
+      const ImplicitCastExpr *IE = cast<ImplicitCastExpr>(E);
+      if (IE->getValueKind() == VK_LValue) {
+        E = IE->getSubExpr();
+        continue;
+      }
+      return nullptr;
     }
-    return nullptr;
-  }
-
-  case Stmt::ExprWithCleanupsClass:
-    return EvalVal(cast<ExprWithCleanups>(E)->getSubExpr(), refVars,ParentDecl);
 
-  case Stmt::DeclRefExprClass: {
-    // When we hit a DeclRefExpr we are looking at code that refers to a
-    // variable's name. If it's not a reference variable we check if it has
-    // local storage within the function, and if so, return the expression.
-    DeclRefExpr *DR = cast<DeclRefExpr>(E);
+    case Stmt::ExprWithCleanupsClass:
+      return EvalVal(cast<ExprWithCleanups>(E)->getSubExpr(), refVars,
+                     ParentDecl);
 
-    // If we leave the immediate function, the lifetime isn't about to end.
-    if (DR->refersToEnclosingVariableOrCapture())
-      return nullptr;
+    case Stmt::DeclRefExprClass: {
+      // When we hit a DeclRefExpr we are looking at code that refers to a
+      // variable's name. If it's not a reference variable we check if it has
+      // local storage within the function, and if so, return the expression.
+      const DeclRefExpr *DR = cast<DeclRefExpr>(E);
 
-    if (VarDecl *V = dyn_cast<VarDecl>(DR->getDecl())) {
-      // Check if it refers to itself, e.g. "int& i = i;".
-      if (V == ParentDecl)
-        return DR;
+      // If we leave the immediate function, the lifetime isn't about to end.
+      if (DR->refersToEnclosingVariableOrCapture())
+        return nullptr;
 
-      if (V->hasLocalStorage()) {
-        if (!V->getType()->isReferenceType())
+      if (const VarDecl *V = dyn_cast<VarDecl>(DR->getDecl())) {
+        // Check if it refers to itself, e.g. "int& i = i;".
+        if (V == ParentDecl)
           return DR;
 
-        // Reference variable, follow through to the expression that
-        // it points to.
-        if (V->hasInit()) {
-          // Add the reference variable to the "trail".
-          refVars.push_back(DR);
-          return EvalVal(V->getInit(), refVars, V);
+        if (V->hasLocalStorage()) {
+          if (!V->getType()->isReferenceType())
+            return DR;
+
+          // Reference variable, follow through to the expression that
+          // it points to.
+          if (V->hasInit()) {
+            // Add the reference variable to the "trail".
+            refVars.push_back(DR);
+            return EvalVal(V->getInit(), refVars, V);
+          }
         }
       }
-    }
-
-    return nullptr;
-  }
 
-  case Stmt::UnaryOperatorClass: {
-    // The only unary operator that make sense to handle here
-    // is Deref.  All others don't resolve to a "name."  This includes
-    // handling all sorts of rvalues passed to a unary operator.
-    UnaryOperator *U = cast<UnaryOperator>(E);
-
-    if (U->getOpcode() == UO_Deref)
-      return EvalAddr(U->getSubExpr(), refVars, ParentDecl);
+      return nullptr;
+    }
 
-    return nullptr;
-  }
+    case Stmt::UnaryOperatorClass: {
+      // The only unary operator that make sense to handle here
+      // is Deref.  All others don't resolve to a "name."  This includes
+      // handling all sorts of rvalues passed to a unary operator.
+      const UnaryOperator *U = cast<UnaryOperator>(E);
 
-  case Stmt::ArraySubscriptExprClass: {
-    // Array subscripts are potential references to data on the stack.  We
-    // retrieve the DeclRefExpr* for the array variable if it indeed
-    // has local storage.
-    return EvalAddr(cast<ArraySubscriptExpr>(E)->getBase(), refVars,ParentDecl);
-  }
+      if (U->getOpcode() == UO_Deref)
+        return EvalAddr(U->getSubExpr(), refVars, ParentDecl);
 
-  case Stmt::OMPArraySectionExprClass: {
-    return EvalAddr(cast<OMPArraySectionExpr>(E)->getBase(), refVars,
-                    ParentDecl);
-  }
+      return nullptr;
+    }
 
-  case Stmt::ConditionalOperatorClass: {
-    // For conditional operators we need to see if either the LHS or RHS are
-    // non-NULL Expr's.  If one is non-NULL, we return it.
-    ConditionalOperator *C = cast<ConditionalOperator>(E);
+    case Stmt::ArraySubscriptExprClass: {
+      // Array subscripts are potential references to data on the stack.  We
+      // retrieve the DeclRefExpr* for the array variable if it indeed
+      // has local storage.
+      const auto *ASE = cast<ArraySubscriptExpr>(E);
+      if (ASE->isTypeDependent())
+        return nullptr;
+      return EvalAddr(ASE->getBase(), refVars, ParentDecl);
+    }
 
-    // Handle the GNU extension for missing LHS.
-    if (Expr *LHSExpr = C->getLHS()) {
-      // In C++, we can have a throw-expression, which has 'void' type.
-      if (!LHSExpr->getType()->isVoidType())
-        if (Expr *LHS = EvalVal(LHSExpr, refVars, ParentDecl))
-          return LHS;
+    case Stmt::OMPArraySectionExprClass: {
+      return EvalAddr(cast<OMPArraySectionExpr>(E)->getBase(), refVars,
+                      ParentDecl);
     }
 
-    // In C++, we can have a throw-expression, which has 'void' type.
-    if (C->getRHS()->getType()->isVoidType())
-      return nullptr;
+    case Stmt::ConditionalOperatorClass: {
+      // For conditional operators we need to see if either the LHS or RHS are
+      // non-NULL Expr's.  If one is non-NULL, we return it.
+      const ConditionalOperator *C = cast<ConditionalOperator>(E);
+
+      // Handle the GNU extension for missing LHS.
+      if (const Expr *LHSExpr = C->getLHS()) {
+        // In C++, we can have a throw-expression, which has 'void' type.
+        if (!LHSExpr->getType()->isVoidType())
+          if (const Expr *LHS = EvalVal(LHSExpr, refVars, ParentDecl))
+            return LHS;
+      }
 
-    return EvalVal(C->getRHS(), refVars, ParentDecl);
-  }
+      // In C++, we can have a throw-expression, which has 'void' type.
+      if (C->getRHS()->getType()->isVoidType())
+        return nullptr;
 
-  // Accesses to members are potential references to data on the stack.
-  case Stmt::MemberExprClass: {
-    MemberExpr *M = cast<MemberExpr>(E);
+      return EvalVal(C->getRHS(), refVars, ParentDecl);
+    }
 
-    // Check for indirect access.  We only want direct field accesses.
-    if (M->isArrow())
-      return nullptr;
+    // Accesses to members are potential references to data on the stack.
+    case Stmt::MemberExprClass: {
+      const MemberExpr *M = cast<MemberExpr>(E);
 
-    // Check whether the member type is itself a reference, in which case
-    // we're not going to refer to the member, but to what the member refers to.
-    if (M->getMemberDecl()->getType()->isReferenceType())
-      return nullptr;
+      // Check for indirect access.  We only want direct field accesses.
+      if (M->isArrow())
+        return nullptr;
 
-    return EvalVal(M->getBase(), refVars, ParentDecl);
-  }
+      // Check whether the member type is itself a reference, in which case
+      // we're not going to refer to the member, but to what the member refers
+      // to.
+      if (M->getMemberDecl()->getType()->isReferenceType())
+        return nullptr;
 
-  case Stmt::MaterializeTemporaryExprClass:
-    if (Expr *Result = EvalVal(
-                          cast<MaterializeTemporaryExpr>(E)->GetTemporaryExpr(),
-                               refVars, ParentDecl))
-      return Result;
-      
-    return E;
+      return EvalVal(M->getBase(), refVars, ParentDecl);
+    }
 
-  default:
-    // Check that we don't return or take the address of a reference to a
-    // temporary. This is only useful in C++.
-    if (!E->isTypeDependent() && E->isRValue())
+    case Stmt::MaterializeTemporaryExprClass:
+      if (const Expr *Result =
+              EvalVal(cast<MaterializeTemporaryExpr>(E)->GetTemporaryExpr(),
+                      refVars, ParentDecl))
+        return Result;
       return E;
 
-    // Everything else: we simply don't reason about them.
-    return nullptr;
-  }
-} while (true);
+    default:
+      // Check that we don't return or take the address of a reference to a
+      // temporary. This is only useful in C++.
+      if (!E->isTypeDependent() && E->isRValue())
+        return E;
+
+      // Everything else: we simply don't reason about them.
+      return nullptr;
+    }
+  } while (true);
 }
 
 void
@@ -6047,7 +7149,6 @@ void Sema::CheckFloatComparison(SourceLocation Loc, Expr* LHS, Expr *RHS) {
       if (DRL->getDecl() == DRR->getDecl())
         return;
 
-
   // Special case: check for comparisons against literals that can be exactly
   //  represented by APFloat.  In such cases, do not emit a warning.  This
   //  is a heuristic: often comparison against such literals are used to
@@ -6173,8 +7274,7 @@ struct IntRange {
   }
 };
 
-static IntRange GetValueRange(ASTContext &C, llvm::APSInt &value,
-                              unsigned MaxWidth) {
+IntRange GetValueRange(ASTContext &C, llvm::APSInt &value, unsigned MaxWidth) {
   if (value.isSigned() && value.isNegative())
     return IntRange(value.getMinSignedBits(), false);
 
@@ -6186,8 +7286,8 @@ static IntRange GetValueRange(ASTContext &C, llvm::APSInt &value,
   return IntRange(value.getActiveBits(), true);
 }
 
-static IntRange GetValueRange(ASTContext &C, APValue &result, QualType Ty,
-                              unsigned MaxWidth) {
+IntRange GetValueRange(ASTContext &C, APValue &result, QualType Ty,
+                       unsigned MaxWidth) {
   if (result.isInt())
     return GetValueRange(C, result.getInt(), MaxWidth);
 
@@ -6215,7 +7315,7 @@ static IntRange GetValueRange(ASTContext &C, APValue &result, QualType Ty,
   return IntRange(MaxWidth, Ty->isUnsignedIntegerOrEnumerationType());
 }
 
-static QualType GetExprType(Expr *E) {
+QualType GetExprType(const Expr *E) {
   QualType Ty = E->getType();
   if (const AtomicType *AtomicRHS = Ty->getAs<AtomicType>())
     Ty = AtomicRHS->getValueType();
@@ -6226,7 +7326,7 @@ static QualType GetExprType(Expr *E) {
 /// range of values it might take.
 ///
 /// \param MaxWidth - the width to which the value will be truncated
-static IntRange GetExprRange(ASTContext &C, Expr *E, unsigned MaxWidth) {
+IntRange GetExprRange(ASTContext &C, const Expr *E, unsigned MaxWidth) {
   E = E->IgnoreParens();
 
   // Try a full evaluation first.
@@ -6237,7 +7337,7 @@ static IntRange GetExprRange(ASTContext &C, Expr *E, unsigned MaxWidth) {
   // I think we only want to look through implicit casts here; if the
   // user has an explicit widening cast, we should treat the value as
   // being of the new, wider type.
-  if (ImplicitCastExpr *CE = dyn_cast<ImplicitCastExpr>(E)) {
+  if (const auto *CE = dyn_cast<ImplicitCastExpr>(E)) {
     if (CE->getCastKind() == CK_NoOp || CE->getCastKind() == CK_LValueToRValue)
       return GetExprRange(C, CE->getSubExpr(), MaxWidth);
 
@@ -6264,7 +7364,7 @@ static IntRange GetExprRange(ASTContext &C, Expr *E, unsigned MaxWidth) {
                     SubRange.NonNegative || OutputTypeRange.NonNegative);
   }
 
-  if (ConditionalOperator *CO = dyn_cast<ConditionalOperator>(E)) {
+  if (const auto *CO = dyn_cast<ConditionalOperator>(E)) {
     // If we can fold the condition, just take that operand.
     bool CondResult;
     if (CO->getCond()->EvaluateAsBooleanCondition(CondResult, C))
@@ -6278,7 +7378,7 @@ static IntRange GetExprRange(ASTContext &C, Expr *E, unsigned MaxWidth) {
     return IntRange::join(L, R);
   }
 
-  if (BinaryOperator *BO = dyn_cast<BinaryOperator>(E)) {
+  if (const auto *BO = dyn_cast<BinaryOperator>(E)) {
     switch (BO->getOpcode()) {
 
     // Boolean-valued operations are single-bit and positive.
@@ -6418,7 +7518,7 @@ static IntRange GetExprRange(ASTContext &C, Expr *E, unsigned MaxWidth) {
     return IntRange::join(L, R);
   }
 
-  if (UnaryOperator *UO = dyn_cast<UnaryOperator>(E)) {
+  if (const auto *UO = dyn_cast<UnaryOperator>(E)) {
     switch (UO->getOpcode()) {
     // Boolean-valued operations are white-listed.
     case UO_LNot:
@@ -6434,26 +7534,26 @@ static IntRange GetExprRange(ASTContext &C, Expr *E, unsigned MaxWidth) {
     }
   }
 
-  if (OpaqueValueExpr *OVE = dyn_cast<OpaqueValueExpr>(E))
+  if (const auto *OVE = dyn_cast<OpaqueValueExpr>(E))
     return GetExprRange(C, OVE->getSourceExpr(), MaxWidth);
 
-  if (FieldDecl *BitField = E->getSourceBitField())
+  if (const auto *BitField = E->getSourceBitField())
     return IntRange(BitField->getBitWidthValue(C),
                     BitField->getType()->isUnsignedIntegerOrEnumerationType());
 
   return IntRange::forValueOfType(C, GetExprType(E));
 }
 
-static IntRange GetExprRange(ASTContext &C, Expr *E) {
+IntRange GetExprRange(ASTContext &C, const Expr *E) {
   return GetExprRange(C, E, C.getIntWidth(GetExprType(E)));
 }
 
 /// Checks whether the given value, which currently has the given
 /// source semantics, has the same value when coerced through the
 /// target semantics.
-static bool IsSameFloatAfterCast(const llvm::APFloat &value,
-                                 const llvm::fltSemantics &Src,
-                                 const llvm::fltSemantics &Tgt) {
+bool IsSameFloatAfterCast(const llvm::APFloat &value,
+                          const llvm::fltSemantics &Src,
+                          const llvm::fltSemantics &Tgt) {
   llvm::APFloat truncated = value;
 
   bool ignored;
@@ -6468,9 +7568,9 @@ static bool IsSameFloatAfterCast(const llvm::APFloat &value,
 /// target semantics.
 ///
 /// The value might be a vector of floats (or a complex number).
-static bool IsSameFloatAfterCast(const APValue &value,
-                                 const llvm::fltSemantics &Src,
-                                 const llvm::fltSemantics &Tgt) {
+bool IsSameFloatAfterCast(const APValue &value,
+                          const llvm::fltSemantics &Src,
+                          const llvm::fltSemantics &Tgt) {
   if (value.isFloat())
     return IsSameFloatAfterCast(value.getFloat(), Src, Tgt);
 
@@ -6486,9 +7586,9 @@ static bool IsSameFloatAfterCast(const APValue &value,
           IsSameFloatAfterCast(value.getComplexFloatImag(), Src, Tgt));
 }
 
-static void AnalyzeImplicitConversions(Sema &S, Expr *E, SourceLocation CC);
+void AnalyzeImplicitConversions(Sema &S, Expr *E, SourceLocation CC);
 
-static bool IsZero(Sema &S, Expr *E) {
+bool IsZero(Sema &S, Expr *E) {
   // Suppress cases where we are comparing against an enum constant.
   if (const DeclRefExpr *DR =
       dyn_cast<DeclRefExpr>(E->IgnoreParenImpCasts()))
@@ -6503,7 +7603,7 @@ static bool IsZero(Sema &S, Expr *E) {
   return E->isIntegerConstantExpr(Value, S.Context) && Value == 0;
 }
 
-static bool HasEnumType(Expr *E) {
+bool HasEnumType(Expr *E) {
   // Strip off implicit integral promotions.
   while (ImplicitCastExpr *ICE = dyn_cast<ImplicitCastExpr>(E)) {
     if (ICE->getCastKind() != CK_IntegralCast &&
@@ -6515,7 +7615,7 @@ static bool HasEnumType(Expr *E) {
   return E->getType()->isEnumeralType();
 }
 
-static void CheckTrivialUnsignedComparison(Sema &S, BinaryOperator *E) {
+void CheckTrivialUnsignedComparison(Sema &S, BinaryOperator *E) {
   // Disable warning in template instantiations.
   if (!S.ActiveTemplateInstantiations.empty())
     return;
@@ -6543,10 +7643,9 @@ static void CheckTrivialUnsignedComparison(Sema &S, BinaryOperator *E) {
   }
 }
 
-static void DiagnoseOutOfRangeComparison(Sema &S, BinaryOperator *E,
-                                         Expr *Constant, Expr *Other,
-                                         llvm::APSInt Value,
-                                         bool RhsConstant) {
+void DiagnoseOutOfRangeComparison(Sema &S, BinaryOperator *E, Expr *Constant,
+                                  Expr *Other, const llvm::APSInt &Value,
+                                  bool RhsConstant) {
   // Disable warning in template instantiations.
   if (!S.ActiveTemplateInstantiations.empty())
     return;
@@ -6754,7 +7853,7 @@ static void DiagnoseOutOfRangeComparison(Sema &S, BinaryOperator *E,
 
 /// Analyze the operands of the given comparison.  Implements the
 /// fallback case from AnalyzeComparison.
-static void AnalyzeImpConvsInComparison(Sema &S, BinaryOperator *E) {
+void AnalyzeImpConvsInComparison(Sema &S, BinaryOperator *E) {
   AnalyzeImplicitConversions(S, E->getLHS(), E->getOperatorLoc());
   AnalyzeImplicitConversions(S, E->getRHS(), E->getOperatorLoc());
 }
@@ -6762,7 +7861,7 @@ static void AnalyzeImpConvsInComparison(Sema &S, BinaryOperator *E) {
 /// \brief Implements -Wsign-compare.
 ///
 /// \param E the binary operator to check for warnings
-static void AnalyzeComparison(Sema &S, BinaryOperator *E) {
+void AnalyzeComparison(Sema &S, BinaryOperator *E) {
   // The type the comparison is being performed in.
   QualType T = E->getLHS()->getType();
 
@@ -6863,8 +7962,8 @@ static void AnalyzeComparison(Sema &S, BinaryOperator *E) {
 /// Analyzes an attempt to assign the given value to a bitfield.
 ///
 /// Returns true if there was something fishy about the attempt.
-static bool AnalyzeBitFieldAssignment(Sema &S, FieldDecl *Bitfield, Expr *Init,
-                                      SourceLocation InitLoc) {
+bool AnalyzeBitFieldAssignment(Sema &S, FieldDecl *Bitfield, Expr *Init,
+                               SourceLocation InitLoc) {
   assert(Bitfield->isBitField());
   if (Bitfield->isInvalidDecl())
     return false;
@@ -6889,6 +7988,12 @@ static bool AnalyzeBitFieldAssignment(Sema &S, FieldDecl *Bitfield, Expr *Init,
   unsigned OriginalWidth = Value.getBitWidth();
   unsigned FieldWidth = Bitfield->getBitWidthValue(S.Context);
 
+  if (Value.isSigned() && Value.isNegative())
+    if (UnaryOperator *UO = dyn_cast<UnaryOperator>(OriginalInit))
+      if (UO->getOpcode() == UO_Minus)
+        if (isa<IntegerLiteral>(UO->getSubExpr()))
+          OriginalWidth = Value.getMinSignedBits();
+
   if (OriginalWidth <= FieldWidth)
     return false;
 
@@ -6918,7 +8023,7 @@ static bool AnalyzeBitFieldAssignment(Sema &S, FieldDecl *Bitfield, Expr *Init,
 
 /// Analyze the given simple or compound assignment for warning-worthy
 /// operations.
-static void AnalyzeAssignment(Sema &S, BinaryOperator *E) {
+void AnalyzeAssignment(Sema &S, BinaryOperator *E) {
   // Just recurse on the LHS.
   AnalyzeImplicitConversions(S, E->getLHS(), E->getOperatorLoc());
 
@@ -6937,9 +8042,9 @@ static void AnalyzeAssignment(Sema &S, BinaryOperator *E) {
 }
 
 /// Diagnose an implicit cast;  purely a helper for CheckImplicitConversion.
-static void DiagnoseImpCast(Sema &S, Expr *E, QualType SourceType, QualType T, 
-                            SourceLocation CContext, unsigned diag,
-                            bool pruneControlFlow = false) {
+void DiagnoseImpCast(Sema &S, Expr *E, QualType SourceType, QualType T, 
+                     SourceLocation CContext, unsigned diag,
+                     bool pruneControlFlow = false) {
   if (pruneControlFlow) {
     S.DiagRuntimeBehavior(E->getExprLoc(), E,
                           S.PDiag(diag)
@@ -6952,25 +8057,75 @@ static void DiagnoseImpCast(Sema &S, Expr *E, QualType SourceType, QualType T,
 }
 
 /// Diagnose an implicit cast;  purely a helper for CheckImplicitConversion.
-static void DiagnoseImpCast(Sema &S, Expr *E, QualType T,
-                            SourceLocation CContext, unsigned diag,
-                            bool pruneControlFlow = false) {
+void DiagnoseImpCast(Sema &S, Expr *E, QualType T, SourceLocation CContext,
+                     unsigned diag, bool pruneControlFlow = false) {
   DiagnoseImpCast(S, E, E->getType(), T, CContext, diag, pruneControlFlow);
 }
 
-/// Diagnose an implicit cast from a literal expression. Does not warn when the
-/// cast wouldn't lose information.
-void DiagnoseFloatingLiteralImpCast(Sema &S, FloatingLiteral *FL, QualType T,
-                                    SourceLocation CContext) {
-  // Try to convert the literal exactly to an integer. If we can, don't warn.
+
+/// Diagnose an implicit cast from a floating point value to an integer value.
+void DiagnoseFloatingImpCast(Sema &S, Expr *E, QualType T,
+
+                             SourceLocation CContext) {
+  const bool IsBool = T->isSpecificBuiltinType(BuiltinType::Bool);
+  const bool PruneWarnings = !S.ActiveTemplateInstantiations.empty();
+
+  Expr *InnerE = E->IgnoreParenImpCasts();
+  // We also want to warn on, e.g., "int i = -1.234"
+  if (UnaryOperator *UOp = dyn_cast<UnaryOperator>(InnerE))
+    if (UOp->getOpcode() == UO_Minus || UOp->getOpcode() == UO_Plus)
+      InnerE = UOp->getSubExpr()->IgnoreParenImpCasts();
+
+  const bool IsLiteral =
+      isa<FloatingLiteral>(E) || isa<FloatingLiteral>(InnerE);
+
+  llvm::APFloat Value(0.0);
+  bool IsConstant =
+    E->EvaluateAsFloat(Value, S.Context, Expr::SE_AllowSideEffects);
+  if (!IsConstant) {
+    return DiagnoseImpCast(S, E, T, CContext,
+                           diag::warn_impcast_float_integer, PruneWarnings);
+  }
+
   bool isExact = false;
-  const llvm::APFloat &Value = FL->getValue();
+
   llvm::APSInt IntegerValue(S.Context.getIntWidth(T),
                             T->hasUnsignedIntegerRepresentation());
-  if (Value.convertToInteger(IntegerValue,
-                             llvm::APFloat::rmTowardZero, &isExact)
-      == llvm::APFloat::opOK && isExact)
-    return;
+  if (Value.convertToInteger(IntegerValue, llvm::APFloat::rmTowardZero,
+                             &isExact) == llvm::APFloat::opOK &&
+      isExact) {
+    if (IsLiteral) return;
+    return DiagnoseImpCast(S, E, T, CContext, diag::warn_impcast_float_integer,
+                           PruneWarnings);
+  }
+
+  unsigned DiagID = 0;
+  if (IsLiteral) {
+    // Warn on floating point literal to integer.
+    DiagID = diag::warn_impcast_literal_float_to_integer;
+  } else if (IntegerValue == 0) {
+    if (Value.isZero()) {  // Skip -0.0 to 0 conversion.
+      return DiagnoseImpCast(S, E, T, CContext,
+                             diag::warn_impcast_float_integer, PruneWarnings);
+    }
+    // Warn on non-zero to zero conversion.
+    DiagID = diag::warn_impcast_float_to_integer_zero;
+  } else {
+    if (IntegerValue.isUnsigned()) {
+      if (!IntegerValue.isMaxValue()) {
+        return DiagnoseImpCast(S, E, T, CContext,
+                               diag::warn_impcast_float_integer, PruneWarnings);
+      }
+    } else {  // IntegerValue.isSigned()
+      if (!IntegerValue.isMaxSignedValue() &&
+          !IntegerValue.isMinSignedValue()) {
+        return DiagnoseImpCast(S, E, T, CContext,
+                               diag::warn_impcast_float_integer, PruneWarnings);
+      }
+    }
+    // Warn on evaluatable floating point expression to integer conversion.
+    DiagID = diag::warn_impcast_float_to_integer;
+  }
 
   // FIXME: Force the precision of the source value down so we don't print
   // digits which are usually useless (we don't really care here if we
@@ -6983,14 +8138,22 @@ void DiagnoseFloatingLiteralImpCast(Sema &S, FloatingLiteral *FL, QualType T,
   Value.toString(PrettySourceValue, precision);
 
   SmallString<16> PrettyTargetValue;
-  if (T->isSpecificBuiltinType(BuiltinType::Bool))
+  if (IsBool)
     PrettyTargetValue = Value.isZero() ? "false" : "true";
   else
     IntegerValue.toString(PrettyTargetValue);
 
-  S.Diag(FL->getExprLoc(), diag::warn_impcast_literal_float_to_integer)
-    << FL->getType() << T.getUnqualifiedType() << PrettySourceValue
-    << PrettyTargetValue << FL->getSourceRange() << SourceRange(CContext);
+  if (PruneWarnings) {
+    S.DiagRuntimeBehavior(E->getExprLoc(), E,
+                          S.PDiag(DiagID)
+                              << E->getType() << T.getUnqualifiedType()
+                              << PrettySourceValue << PrettyTargetValue
+                              << E->getSourceRange() << SourceRange(CContext));
+  } else {
+    S.Diag(E->getExprLoc(), DiagID)
+        << E->getType() << T.getUnqualifiedType() << PrettySourceValue
+        << PrettyTargetValue << E->getSourceRange() << SourceRange(CContext);
+  }
 }
 
 std::string PrettyPrintInRange(const llvm::APSInt &Value, IntRange Range) {
@@ -7002,7 +8165,7 @@ std::string PrettyPrintInRange(const llvm::APSInt &Value, IntRange Range) {
   return ValueInRange.toString(10);
 }
 
-static bool IsImplicitBoolFloatConversion(Sema &S, Expr *Ex, bool ToBool) {
+bool IsImplicitBoolFloatConversion(Sema &S, Expr *Ex, bool ToBool) {
   if (!isa<ImplicitCastExpr>(Ex))
     return false;
 
@@ -7042,8 +8205,7 @@ void CheckImplicitArgumentConversions(Sema &S, CallExpr *TheCall,
   }
 }
 
-static void DiagnoseNullConversion(Sema &S, Expr *E, QualType T,
-                                   SourceLocation CC) {
+void DiagnoseNullConversion(Sema &S, Expr *E, QualType T, SourceLocation CC) {
   if (S.Diags.isIgnored(diag::warn_impcast_null_pointer_to_integer,
                         E->getExprLoc()))
     return;
@@ -7065,14 +8227,21 @@ static void DiagnoseNullConversion(Sema &S, Expr *E, QualType T,
 
   SourceLocation Loc = E->getSourceRange().getBegin();
 
+  // Venture through the macro stacks to get to the source of macro arguments.
+  // The new location is a better location than the complete location that was
+  // passed in.
+  while (S.SourceMgr.isMacroArgExpansion(Loc))
+    Loc = S.SourceMgr.getImmediateMacroCallerLoc(Loc);
+
+  while (S.SourceMgr.isMacroArgExpansion(CC))
+    CC = S.SourceMgr.getImmediateMacroCallerLoc(CC);
+
   // __null is usually wrapped in a macro.  Go up a macro if that is the case.
-  if (NullKind == Expr::NPCK_GNUNull) {
-    if (Loc.isMacroID()) {
-      StringRef MacroName =
-          Lexer::getImmediateMacroName(Loc, S.SourceMgr, S.getLangOpts());
-      if (MacroName == "NULL")
-        Loc = S.SourceMgr.getImmediateExpansionRange(Loc).first;
-    }
+  if (NullKind == Expr::NPCK_GNUNull && Loc.isMacroID()) {
+    StringRef MacroName = Lexer::getImmediateMacroNameForDiagnostics(
+        Loc, S.SourceMgr, S.getLangOpts());
+    if (MacroName == "NULL")
+      Loc = S.SourceMgr.getImmediateExpansionRange(Loc).first;
   }
 
   // Only warn if the null and context location are in the same macro expansion.
@@ -7085,17 +8254,15 @@ static void DiagnoseNullConversion(Sema &S, Expr *E, QualType T,
                                       S.getFixItZeroLiteralForType(T, Loc));
 }
 
-static void checkObjCArrayLiteral(Sema &S, QualType TargetType,
-                                  ObjCArrayLiteral *ArrayLiteral);
-static void checkObjCDictionaryLiteral(Sema &S, QualType TargetType,
-                                       ObjCDictionaryLiteral *DictionaryLiteral);
+void checkObjCArrayLiteral(Sema &S, QualType TargetType,
+                           ObjCArrayLiteral *ArrayLiteral);
+void checkObjCDictionaryLiteral(Sema &S, QualType TargetType,
+                                ObjCDictionaryLiteral *DictionaryLiteral);
 
 /// Check a single element within a collection literal against the
 /// target element type.
-static void checkObjCCollectionLiteralElement(Sema &S,
-                                              QualType TargetElementType,
-                                              Expr *Element,
-                                              unsigned ElementKind) {
+void checkObjCCollectionLiteralElement(Sema &S, QualType TargetElementType,
+                                       Expr *Element, unsigned ElementKind) {
   // Skip a bitcast to 'id' or qualified 'id'.
   if (auto ICE = dyn_cast<ImplicitCastExpr>(Element)) {
     if (ICE->getCastKind() == CK_BitCast &&
@@ -7124,8 +8291,8 @@ static void checkObjCCollectionLiteralElement(Sema &S,
 
 /// Check an Objective-C array literal being converted to the given
 /// target type.
-static void checkObjCArrayLiteral(Sema &S, QualType TargetType,
-                                  ObjCArrayLiteral *ArrayLiteral) {
+void checkObjCArrayLiteral(Sema &S, QualType TargetType,
+                           ObjCArrayLiteral *ArrayLiteral) {
   if (!S.NSArrayDecl)
     return;
 
@@ -7152,9 +8319,8 @@ static void checkObjCArrayLiteral(Sema &S, QualType TargetType,
 
 /// Check an Objective-C dictionary literal being converted to the given
 /// target type.
-static void checkObjCDictionaryLiteral(
-              Sema &S, QualType TargetType,
-              ObjCDictionaryLiteral *DictionaryLiteral) {
+void checkObjCDictionaryLiteral(Sema &S, QualType TargetType,
+                                ObjCDictionaryLiteral *DictionaryLiteral) {
   if (!S.NSDictionaryDecl)
     return;
 
@@ -7180,6 +8346,32 @@ static void checkObjCDictionaryLiteral(
   }
 }
 
+// Helper function to filter out cases for constant width constant conversion.
+// Don't warn on char array initialization or for non-decimal values.
+bool isSameWidthConstantConversion(Sema &S, Expr *E, QualType T,
+                                   SourceLocation CC) {
+  // If initializing from a constant, and the constant starts with '0',
+  // then it is a binary, octal, or hexadecimal.  Allow these constants
+  // to fill all the bits, even if there is a sign change.
+  if (auto *IntLit = dyn_cast<IntegerLiteral>(E->IgnoreParenImpCasts())) {
+    const char FirstLiteralCharacter =
+        S.getSourceManager().getCharacterData(IntLit->getLocStart())[0];
+    if (FirstLiteralCharacter == '0')
+      return false;
+  }
+
+  // If the CC location points to a '{', and the type is char, then assume
+  // assume it is an array initialization.
+  if (CC.isValid() && T->isCharType()) {
+    const char FirstContextCharacter =
+        S.getSourceManager().getCharacterData(CC)[0];
+    if (FirstContextCharacter == '{')
+      return false;
+  }
+
+  return true;
+}
+
 void CheckImplicitConversion(Sema &S, Expr *E, QualType T,
                              SourceLocation CC, bool *ICContext = nullptr) {
   if (E->isTypeDependent() || E->isValueDependent()) return;
@@ -7284,7 +8476,6 @@ void CheckImplicitConversion(Sema &S, Expr *E, QualType T,
           return;
 
         DiagnoseImpCast(S, E, T, CC, diag::warn_impcast_float_precision);
-
       }
       // ... or possibly if we're increasing rank, too
       else if (TargetBT->getKind() > SourceBT->getKind()) {
@@ -7296,22 +8487,12 @@ void CheckImplicitConversion(Sema &S, Expr *E, QualType T,
       return;
     }
 
-    // If the target is integral, always warn.    
+    // If the target is integral, always warn.
     if (TargetBT && TargetBT->isInteger()) {
       if (S.SourceMgr.isInSystemMacro(CC))
         return;
-      
-      Expr *InnerE = E->IgnoreParenImpCasts();
-      // We also want to warn on, e.g., "int i = -1.234"
-      if (UnaryOperator *UOp = dyn_cast<UnaryOperator>(InnerE))
-        if (UOp->getOpcode() == UO_Minus || UOp->getOpcode() == UO_Plus)
-          InnerE = UOp->getSubExpr()->IgnoreParenImpCasts();
-
-      if (FloatingLiteral *FL = dyn_cast<FloatingLiteral>(InnerE)) {
-        DiagnoseFloatingLiteralImpCast(S, FL, T, CC);
-      } else {
-        DiagnoseImpCast(S, E, T, CC, diag::warn_impcast_float_integer);
-      }
+
+      DiagnoseFloatingImpCast(S, E, T, CC);
     }
 
     // Detect the case where a call result is converted from floating-point to
@@ -7358,7 +8539,7 @@ void CheckImplicitConversion(Sema &S, Expr *E, QualType T,
     // If the source is a constant, use a default-on diagnostic.
     // TODO: this should happen for bitfield stores, too.
     llvm::APSInt Value(32);
-    if (E->isIntegerConstantExpr(Value, S.Context)) {
+    if (E->EvaluateAsInt(Value, S.Context, Expr::SE_AllowSideEffects)) {
       if (S.SourceMgr.isInSystemMacro(CC))
         return;
 
@@ -7383,10 +8564,34 @@ void CheckImplicitConversion(Sema &S, Expr *E, QualType T,
     return DiagnoseImpCast(S, E, T, CC, diag::warn_impcast_integer_precision);
   }
 
+  if (TargetRange.Width == SourceRange.Width && !TargetRange.NonNegative &&
+      SourceRange.NonNegative && Source->isSignedIntegerType()) {
+    // Warn when doing a signed to signed conversion, warn if the positive
+    // source value is exactly the width of the target type, which will
+    // cause a negative value to be stored.
+
+    llvm::APSInt Value;
+    if (E->EvaluateAsInt(Value, S.Context, Expr::SE_AllowSideEffects) &&
+        !S.SourceMgr.isInSystemMacro(CC)) {
+      if (isSameWidthConstantConversion(S, E, T, CC)) {
+        std::string PrettySourceValue = Value.toString(10);
+        std::string PrettyTargetValue = PrettyPrintInRange(Value, TargetRange);
+
+        S.DiagRuntimeBehavior(
+            E->getExprLoc(), E,
+            S.PDiag(diag::warn_impcast_integer_precision_constant)
+                << PrettySourceValue << PrettyTargetValue << E->getType() << T
+                << E->getSourceRange() << clang::SourceRange(CC));
+        return;
+      }
+    }
+
+    // Fall through for non-constants to give a sign conversion warning.
+  }
+
   if ((TargetRange.NonNegative && !SourceRange.NonNegative) ||
       (!TargetRange.NonNegative && SourceRange.NonNegative &&
        SourceRange.Width == TargetRange.Width)) {
-        
     if (S.SourceMgr.isInSystemMacro(CC))
       return;
 
@@ -7429,8 +8634,6 @@ void CheckImplicitConversion(Sema &S, Expr *E, QualType T,
         return DiagnoseImpCast(S, E, SourceType, T, CC, 
                                diag::warn_impcast_different_enum_types);
       }
-  
-  return;
 }
 
 void CheckConditionalOperator(Sema &S, ConditionalOperator *E,
@@ -7446,7 +8649,6 @@ void CheckConditionalOperand(Sema &S, Expr *E, QualType T,
   AnalyzeImplicitConversions(S, E, CC);
   if (E->getType() != T)
     return CheckImplicitConversion(S, E, T, CC, &ICContext);
-  return;
 }
 
 void CheckConditionalOperator(Sema &S, ConditionalOperator *E,
@@ -7479,7 +8681,7 @@ void CheckConditionalOperator(Sema &S, ConditionalOperator *E,
 
 /// CheckBoolLikeConversion - Check conversion of given expression to boolean.
 /// Input argument E is a logical expression.
-static void CheckBoolLikeConversion(Sema &S, Expr *E, SourceLocation CC) {
+void CheckBoolLikeConversion(Sema &S, Expr *E, SourceLocation CC) {
   if (S.getLangOpts().Bool)
     return;
   CheckImplicitConversion(S, E->IgnoreParenImpCasts(), S.Context.BoolTy, CC);
@@ -7583,10 +8785,31 @@ void AnalyzeImplicitConversions(Sema &S, Expr *OrigE, SourceLocation CC) {
 
 } // end anonymous namespace
 
+static bool checkOpenCLEnqueueLocalSizeArgs(Sema &S, CallExpr *TheCall,
+                                            unsigned Start, unsigned End) {
+  bool IllegalParams = false;
+  for (unsigned I = Start; I <= End; ++I) {
+    QualType Ty = TheCall->getArg(I)->getType();
+    // Taking into account implicit conversions,
+    // allow any integer within 32 bits range
+    if (!Ty->isIntegerType() ||
+        S.Context.getTypeSizeInChars(Ty).getQuantity() > 4) {
+      S.Diag(TheCall->getArg(I)->getLocStart(),
+             diag::err_opencl_enqueue_kernel_invalid_local_size_type);
+      IllegalParams = true;
+    }
+    // Potentially emit standard warnings for implicit conversions if enabled
+    // using -Wconversion.
+    CheckImplicitConversion(S, TheCall->getArg(I), S.Context.UnsignedIntTy,
+                            TheCall->getArg(I)->getLocStart());
+  }
+  return IllegalParams;
+}
+
 // Helper function for Sema::DiagnoseAlwaysNonNullPointer.
 // Returns true when emitting a warning about taking the address of a reference.
 static bool CheckForReference(Sema &SemaRef, const Expr *E,
-                              PartialDiagnostic PD) {
+                              const PartialDiagnostic &PD) {
   E = E->IgnoreParenImpCasts();
 
   const FunctionDecl *FD = nullptr;
@@ -7681,7 +8904,8 @@ void Sema::DiagnoseAlwaysNonNullPointer(Expr *E,
     }
   }
 
-  auto ComplainAboutNonnullParamOrCall = [&](bool IsParam) {
+  auto ComplainAboutNonnullParamOrCall = [&](const Attr *NonnullAttr) {
+    bool IsParam = isa<NonNullAttr>(NonnullAttr);
     std::string Str;
     llvm::raw_string_ostream S(Str);
     E->printPretty(S, nullptr, getPrintingPolicy());
@@ -7689,13 +8913,14 @@ void Sema::DiagnoseAlwaysNonNullPointer(Expr *E,
                                 : diag::warn_cast_nonnull_to_bool;
     Diag(E->getExprLoc(), DiagID) << IsParam << S.str()
       << E->getSourceRange() << Range << IsEqual;
+    Diag(NonnullAttr->getLocation(), diag::note_declared_nonnull) << IsParam;
   };
 
   // If we have a CallExpr that is tagged with returns_nonnull, we can complain.
   if (auto *Call = dyn_cast<CallExpr>(E->IgnoreParenImpCasts())) {
     if (auto *Callee = Call->getDirectCallee()) {
-      if (Callee->hasAttr<ReturnsNonNullAttr>()) {
-        ComplainAboutNonnullParamOrCall(false);
+      if (const Attr *A = Callee->getAttr<ReturnsNonNullAttr>()) {
+        ComplainAboutNonnullParamOrCall(A);
         return;
       }
     }
@@ -7717,25 +8942,25 @@ void Sema::DiagnoseAlwaysNonNullPointer(Expr *E,
   if (const auto* PV = dyn_cast<ParmVarDecl>(D)) {
     if (getCurFunction() &&
         !getCurFunction()->ModifiedNonNullParams.count(PV)) {
-      if (PV->hasAttr<NonNullAttr>()) {
-        ComplainAboutNonnullParamOrCall(true);
+      if (const Attr *A = PV->getAttr<NonNullAttr>()) {
+        ComplainAboutNonnullParamOrCall(A);
         return;
       }
 
       if (const auto *FD = dyn_cast<FunctionDecl>(PV->getDeclContext())) {
-        auto ParamIter = std::find(FD->param_begin(), FD->param_end(), PV);
+        auto ParamIter = llvm::find(FD->parameters(), PV);
         assert(ParamIter != FD->param_end());
         unsigned ParamNo = std::distance(FD->param_begin(), ParamIter);
 
         for (const auto *NonNull : FD->specific_attrs<NonNullAttr>()) {
           if (!NonNull->args_size()) {
-              ComplainAboutNonnullParamOrCall(true);
+              ComplainAboutNonnullParamOrCall(NonNull);
               return;
           }
 
           for (unsigned ArgNo : NonNull->args()) {
             if (ArgNo == ParamNo) {
-              ComplainAboutNonnullParamOrCall(true);
+              ComplainAboutNonnullParamOrCall(NonNull);
               return;
             }
           }
@@ -7817,7 +9042,6 @@ void Sema::DiagnoseAlwaysNonNullPointer(Expr *E,
       << FixItHint::CreateInsertion(getLocForEndOfToken(E->getLocEnd()), "()");
 }
 
-
 /// Diagnoses "dangerous" implicit conversions within the given
 /// expression (which is a full expression).  Implements -Wconversion
 /// and -Wsign-compare.
@@ -7852,12 +9076,20 @@ void Sema::CheckBoolLikeConversion(Expr *E, SourceLocation CC) {
 /// Diagnose when expression is an integer constant expression and its evaluation
 /// results in integer overflow
 void Sema::CheckForIntOverflow (Expr *E) {
-  if (isa<BinaryOperator>(E->IgnoreParenCasts()))
-    E->IgnoreParenCasts()->EvaluateForOverflow(Context);
-  else if (auto InitList = dyn_cast<InitListExpr>(E))
-    for (Expr *E : InitList->inits())
-      if (isa<BinaryOperator>(E->IgnoreParenCasts()))
-        E->IgnoreParenCasts()->EvaluateForOverflow(Context);
+  // Use a work list to deal with nested struct initializers.
+  SmallVector<Expr *, 2> Exprs(1, E);
+
+  do {
+    Expr *E = Exprs.pop_back_val();
+
+    if (isa<BinaryOperator>(E->IgnoreParenCasts())) {
+      E->IgnoreParenCasts()->EvaluateForOverflow(Context);
+      continue;
+    }
+
+    if (auto InitList = dyn_cast<InitListExpr>(E))
+      Exprs.append(InitList->inits().begin(), InitList->inits().end());
+  } while (!Exprs.empty());
 }
 
 namespace {
@@ -7875,7 +9107,7 @@ class SequenceChecker : public EvaluatedExprVisitor<SequenceChecker> {
     struct Value {
       explicit Value(unsigned Parent) : Parent(Parent), Merged(false) {}
       unsigned Parent : 31;
-      bool Merged : 1;
+      unsigned Merged : 1;
     };
     SmallVector<Value, 8> Values;
 
@@ -7987,12 +9219,11 @@ class SequenceChecker : public EvaluatedExprVisitor<SequenceChecker> {
       Self.ModAsSideEffect = &ModAsSideEffect;
     }
     ~SequencedSubexpression() {
-      for (auto MI = ModAsSideEffect.rbegin(), ME = ModAsSideEffect.rend();
-           MI != ME; ++MI) {
-        UsageInfo &U = Self.UsageMap[MI->first];
+      for (auto &M : llvm::reverse(ModAsSideEffect)) {
+        UsageInfo &U = Self.UsageMap[M.first];
         auto &SideEffectUsage = U.Uses[UK_ModAsSideEffect];
-        Self.addUsage(U, MI->first, SideEffectUsage.Use, UK_ModAsValue);
-        SideEffectUsage = MI->second;
+        Self.addUsage(U, M.first, SideEffectUsage.Use, UK_ModAsValue);
+        SideEffectUsage = M.second;
       }
       Self.ModAsSideEffect = OldModAsSideEffect;
     }
@@ -8195,6 +9426,7 @@ public:
     notePostMod(O, BO, SemaRef.getLangOpts().CPlusPlus ? UK_ModAsValue
                                                        : UK_ModAsSideEffect);
   }
+
   void VisitCompoundAssignOperator(CompoundAssignOperator *CAO) {
     VisitBinAssign(CAO);
   }
@@ -8344,7 +9576,7 @@ public:
       Tree.merge(Elts[I]);
   }
 };
-}
+} // end anonymous namespace
 
 void Sema::CheckUnsequencedOperations(Expr *E) {
   SmallVector<Expr *, 8> WorkList;
@@ -8358,7 +9590,8 @@ void Sema::CheckUnsequencedOperations(Expr *E) {
 void Sema::CheckCompletedExpr(Expr *E, SourceLocation CheckLoc,
                               bool IsConstexpr) {
   CheckImplicitConversions(E, CheckLoc);
-  CheckUnsequencedOperations(E);
+  if (!E->isInstantiationDependent())
+    CheckUnsequencedOperations(E);
   if (!IsConstexpr && !E->isValueDependent())
     CheckForIntOverflow(E);
 }
@@ -8403,13 +9636,10 @@ static void diagnoseArrayStarInParamType(Sema &S, QualType PType,
 /// takes care of any checks that cannot be performed on the
 /// declaration itself, e.g., that the types of each of the function
 /// parameters are complete.
-bool Sema::CheckParmsForFunctionDef(ParmVarDecl *const *P,
-                                    ParmVarDecl *const *PEnd,
+bool Sema::CheckParmsForFunctionDef(ArrayRef<ParmVarDecl *> Parameters,
                                     bool CheckParameterNames) {
   bool HasInvalidParm = false;
-  for (; P != PEnd; ++P) {
-    ParmVarDecl *Param = *P;
-    
+  for (ParmVarDecl *Param : Parameters) {
     // C99 6.7.5.3p4: the parameters in a parameter type list in a
     // function declarator that is part of a function definition of
     // that function shall not have incomplete type.
@@ -8517,21 +9747,12 @@ void Sema::CheckCastAlign(Expr *Op, QualType T, SourceRange TRange) {
     << TRange << Op->getSourceRange();
 }
 
-static const Type* getElementType(const Expr *BaseExpr) {
-  const Type* EltType = BaseExpr->getType().getTypePtr();
-  if (EltType->isAnyPointerType())
-    return EltType->getPointeeType().getTypePtr();
-  else if (EltType->isArrayType())
-    return EltType->getBaseElementTypeUnsafe();
-  return EltType;
-}
-
 /// \brief Check whether this array fits the idiom of a size-one tail padded
 /// array member of a struct.
 ///
 /// We avoid emitting out-of-bounds access warnings for such arrays as they are
 /// commonly used to emulate flexible arrays in C89 code.
-static bool IsTailPaddedMemberArray(Sema &S, llvm::APInt Size,
+static bool IsTailPaddedMemberArray(Sema &S, const llvm::APInt &Size,
                                     const NamedDecl *ND) {
   if (Size != 1 || !ND) return false;
 
@@ -8580,7 +9801,8 @@ void Sema::CheckArrayAccess(const Expr *BaseExpr, const Expr *IndexExpr,
   if (IndexExpr->isValueDependent())
     return;
 
-  const Type *EffectiveType = getElementType(BaseExpr);
+  const Type *EffectiveType =
+      BaseExpr->getType()->getPointeeOrArrayElementType();
   BaseExpr = BaseExpr->IgnoreParenCasts();
   const ConstantArrayType *ArrayTy =
     Context.getAsConstantArrayType(BaseExpr->getType());
@@ -8604,7 +9826,7 @@ void Sema::CheckArrayAccess(const Expr *BaseExpr, const Expr *IndexExpr,
     if (!size.isStrictlyPositive())
       return;
 
-    const Type* BaseType = getElementType(BaseExpr);
+    const Type *BaseType = BaseExpr->getType()->getPointeeOrArrayElementType();
     if (BaseType != EffectiveType) {
       // Make sure we're comparing apples to apples when comparing index to size
       uint64_t ptrarith_typesize = Context.getTypeSize(EffectiveType);
@@ -8754,7 +9976,7 @@ namespace {
       Range = e->getSourceRange();
     }
   };
-}
+} // end anonymous namespace
 
 /// Consider whether capturing the given variable can possibly lead to
 /// a retain cycle.
@@ -8900,7 +10122,7 @@ namespace {
       }
     }
   };
-}
+} // end anonymous namespace
 
 /// Check whether the given argument is a block which captures a
 /// variable.
@@ -9136,7 +10358,6 @@ void Sema::CheckObjCCircularContainer(ObjCMessageExpr *Message) {
       }
     }
   }
-
 }
 
 /// Check a message send to see if it's likely to cause a retain cycle.
@@ -9340,7 +10561,7 @@ bool ShouldDiagnoseEmptyStmtBody(const SourceManager &SourceMgr,
 
   return true;
 }
-} // Unnamed namespace
+} // end anonymous namespace
 
 void Sema::DiagnoseEmptyStmtBody(SourceLocation StmtLoc,
                                  const Stmt *Body,
@@ -9436,7 +10657,6 @@ void Sema::DiagnoseEmptyLoopBody(const Stmt *S,
 /// DiagnoseSelfMove - Emits a warning if a value is moved to itself.
 void Sema::DiagnoseSelfMove(const Expr *LHSExpr, const Expr *RHSExpr,
                              SourceLocation OpLoc) {
-
   if (Diags.isIgnored(diag::warn_sizeof_pointer_expr_memaccess, OpLoc))
     return;
 
@@ -9675,7 +10895,7 @@ bool isLayoutCompatible(ASTContext &C, QualType T1, QualType T2) {
 
   return false;
 }
-}
+} // end anonymous namespace
 
 //===--- CHECK: pointer_with_type_tag attribute: datatypes should match ----//
 
@@ -9806,7 +11026,7 @@ bool GetMatchingCType(
   TypeInfo = I->second;
   return true;
 }
-} // unnamed namespace
+} // end anonymous namespace
 
 void Sema::RegisterTypeTagForDatatype(const IdentifierInfo *ArgumentKind,
                                       uint64_t MagicValue, QualType Type,
@@ -9839,7 +11059,7 @@ bool IsSameCharType(QualType T1, QualType T2) {
          (T1Kind == BuiltinType::Char_U && T2Kind == BuiltinType::UChar) ||
          (T1Kind == BuiltinType::Char_S && T2Kind == BuiltinType::SChar);
 }
-} // unnamed namespace
+} // end anonymous namespace
 
 void Sema::CheckArgumentWithTypeTag(const ArgumentWithTypeTagAttr *Attr,
                                     const Expr * const *ExprArgs) {
diff --git a/contrib/llvm/tools/clang/lib/Sema/SemaCodeComplete.cpp b/contrib/llvm/tools/clang/lib/Sema/SemaCodeComplete.cpp
index 21cf62585142..36babc4bc0cd 100644
--- a/contrib/llvm/tools/clang/lib/Sema/SemaCodeComplete.cpp
+++ b/contrib/llvm/tools/clang/lib/Sema/SemaCodeComplete.cpp
@@ -19,7 +19,6 @@
 #include "clang/Lex/MacroInfo.h"
 #include "clang/Lex/Preprocessor.h"
 #include "clang/Sema/CodeCompleteConsumer.h"
-#include "clang/Sema/ExternalSemaSource.h"
 #include "clang/Sema/Lookup.h"
 #include "clang/Sema/Overload.h"
 #include "clang/Sema/Scope.h"
@@ -482,12 +481,37 @@ getRequiredQualification(ASTContext &Context,
 
 /// Determine whether \p Id is a name reserved for the implementation (C99
 /// 7.1.3, C++ [lib.global.names]).
-static bool isReservedName(const IdentifierInfo *Id) {
+static bool isReservedName(const IdentifierInfo *Id,
+                           bool doubleUnderscoreOnly = false) {
   if (Id->getLength() < 2)
     return false;
   const char *Name = Id->getNameStart();
   return Name[0] == '_' &&
-         (Name[1] == '_' || (Name[1] >= 'A' && Name[1] <= 'Z'));
+         (Name[1] == '_' || (Name[1] >= 'A' && Name[1] <= 'Z' &&
+                             !doubleUnderscoreOnly));
+}
+
+// Some declarations have reserved names that we don't want to ever show.
+// Filter out names reserved for the implementation if they come from a
+// system header.
+static bool shouldIgnoreDueToReservedName(const NamedDecl *ND, Sema &SemaRef) {
+  const IdentifierInfo *Id = ND->getIdentifier();
+  if (!Id)
+    return false;
+
+  // Ignore reserved names for compiler provided decls.
+  if (isReservedName(Id) && ND->getLocation().isInvalid())
+    return true;
+
+  // For system headers ignore only double-underscore names.
+  // This allows for system headers providing private symbols with a single
+  // underscore.
+  if (isReservedName(Id, /*doubleUnderscoreOnly=*/true) &&
+       SemaRef.SourceMgr.isInSystemHeader(
+           SemaRef.SourceMgr.getSpellingLoc(ND->getLocation())))
+      return true;
+
+  return false;
 }
 
 bool ResultBuilder::isInterestingDecl(const NamedDecl *ND,
@@ -514,17 +538,9 @@ bool ResultBuilder::isInterestingDecl(const NamedDecl *ND,
   // Using declarations themselves are never added as results.
   if (isa<UsingDecl>(ND))
     return false;
-  
-  // Some declarations have reserved names that we don't want to ever show.
-  // Filter out names reserved for the implementation if they come from a
-  // system header.
-  // TODO: Add a predicate for this.
-  if (const IdentifierInfo *Id = ND->getIdentifier())
-    if (isReservedName(Id) &&
-        (ND->getLocation().isInvalid() ||
-         SemaRef.SourceMgr.isInSystemHeader(
-             SemaRef.SourceMgr.getSpellingLoc(ND->getLocation()))))
-        return false;
+
+  if (shouldIgnoreDueToReservedName(ND, SemaRef))
+    return false;
 
   if (Filter == &ResultBuilder::IsNestedNameSpecifier ||
       (isa<NamespaceDecl>(ND) &&
@@ -1518,7 +1534,6 @@ static void AddOrdinaryNameResults(Sema::ParserCompletionContext CCC,
                                    ResultBuilder &Results) {
   CodeCompletionAllocator &Allocator = Results.getAllocator();
   CodeCompletionBuilder Builder(Allocator, Results.getCodeCompletionTUInfo());
-  PrintingPolicy Policy = getCompletionPrintingPolicy(SemaRef);
   
   typedef CodeCompletionResult Result;
   switch (CCC) {
@@ -3046,6 +3061,7 @@ CXCursorKind clang::getCursorKindForDecl(const Decl *D) {
     case Decl::ClassTemplatePartialSpecialization:
       return CXCursor_ClassTemplatePartialSpecialization;
     case Decl::UsingDirective:     return CXCursor_UsingDirective;
+    case Decl::StaticAssert:       return CXCursor_StaticAssert;
     case Decl::TranslationUnit:    return CXCursor_TranslationUnit;
       
     case Decl::Using:
@@ -3209,7 +3225,7 @@ static void MaybeAddOverrideCalls(Sema &S, DeclContext *InContext,
   
   // We need to have names for all of the parameters, if we're going to 
   // generate a forwarding call.
-  for (auto P : Method->params())
+  for (auto P : Method->parameters())
     if (!P->getDeclName())
       return;
 
@@ -3241,7 +3257,7 @@ static void MaybeAddOverrideCalls(Sema &S, DeclContext *InContext,
                                          Overridden->getNameAsString()));
     Builder.AddChunk(CodeCompletionString::CK_LeftParen);
     bool FirstParam = true;
-    for (auto P : Method->params()) {
+    for (auto P : Method->parameters()) {
       if (FirstParam)
         FirstParam = false;
       else
@@ -3570,7 +3586,7 @@ static void AddObjCProperties(const CodeCompletionContext &CCContext,
   Container = getContainerDef(Container);
   
   // Add properties in this container.
-  for (const auto *P : Container->properties())
+  for (const auto *P : Container->instance_properties())
     if (AddedProperties.insert(P->getIdentifier()).second)
       Results.MaybeAddResult(Result(P, Results.getBasePriority(P), nullptr),
                              CurContext);
@@ -3812,12 +3828,19 @@ void Sema::CodeCompleteTypeQualifiers(DeclSpec &DS) {
   if (getLangOpts().C11 &&
       !(DS.getTypeQualifiers() & DeclSpec::TQ_atomic))
     Results.AddResult("_Atomic");
+  if (getLangOpts().MSVCCompat &&
+      !(DS.getTypeQualifiers() & DeclSpec::TQ_unaligned))
+    Results.AddResult("__unaligned");
   Results.ExitScope();
   HandleCodeCompleteResults(this, CodeCompleter, 
                             Results.getCompletionContext(),
                             Results.data(), Results.size());
 }
 
+void Sema::CodeCompleteBracketDeclarator(Scope *S) {
+  CodeCompleteExpression(S, QualType(getASTContext().getSizeType()));
+}
+
 void Sema::CodeCompleteCase(Scope *S) {
   if (getCurFunction()->SwitchStack.empty() || !CodeCompleter)
     return;
@@ -6189,7 +6212,7 @@ void Sema::CodeCompleteObjCPropertySynthesizeIvar(Scope *S,
   // Figure out which interface we're looking into.
   ObjCInterfaceDecl *Class = nullptr;
   if (ObjCImplementationDecl *ClassImpl
-                                 = dyn_cast<ObjCImplementationDecl>(Container))  
+                                 = dyn_cast<ObjCImplementationDecl>(Container))
     Class = ClassImpl->getClassInterface();
   else
     Class = cast<ObjCCategoryImplDecl>(Container)->getCategoryDecl()
@@ -6198,8 +6221,8 @@ void Sema::CodeCompleteObjCPropertySynthesizeIvar(Scope *S,
   // Determine the type of the property we're synthesizing.
   QualType PropertyType = Context.getObjCIdType();
   if (Class) {
-    if (ObjCPropertyDecl *Property
-                              = Class->FindPropertyDeclaration(PropertyName)) {
+    if (ObjCPropertyDecl *Property = Class->FindPropertyDeclaration(
+            PropertyName, ObjCPropertyQueryKind::OBJC_PR_query_instance)) {
       PropertyType 
         = Property->getType().getNonReferenceType().getUnqualifiedType();
       
@@ -7178,7 +7201,7 @@ void Sema::CodeCompleteObjCMethodDecl(Scope *S,
         Containers.push_back(Cat);
     
     for (unsigned I = 0, N = Containers.size(); I != N; ++I)
-      for (auto *P : Containers[I]->properties())
+      for (auto *P : Containers[I]->instance_properties())
         AddObjCKeyValueCompletions(P, IsInstanceMethod, ReturnType, Context, 
                                    KnownSelectors, Results);
   }
diff --git a/contrib/llvm/tools/clang/lib/Sema/SemaCoroutine.cpp b/contrib/llvm/tools/clang/lib/Sema/SemaCoroutine.cpp
index 4b4fd6b16a06..c8715fff4159 100644
--- a/contrib/llvm/tools/clang/lib/Sema/SemaCoroutine.cpp
+++ b/contrib/llvm/tools/clang/lib/Sema/SemaCoroutine.cpp
@@ -244,7 +244,7 @@ ExprResult Sema::BuildCoawaitExpr(SourceLocation Loc, Expr *E) {
   // If the expression is a temporary, materialize it as an lvalue so that we
   // can use it multiple times.
   if (E->getValueKind() == VK_RValue)
-    E = new (Context) MaterializeTemporaryExpr(E->getType(), E, true);
+    E = CreateMaterializeTemporaryExpr(E->getType(), E, true);
 
   // Build the await_ready, await_suspend, await_resume calls.
   ReadySuspendResumeResult RSS = buildCoawaitCalls(*this, Loc, E);
@@ -311,7 +311,7 @@ ExprResult Sema::BuildCoyieldExpr(SourceLocation Loc, Expr *E) {
   // If the expression is a temporary, materialize it as an lvalue so that we
   // can use it multiple times.
   if (E->getValueKind() == VK_RValue)
-    E = new (Context) MaterializeTemporaryExpr(E->getType(), E, true);
+    E = CreateMaterializeTemporaryExpr(E->getType(), E, true);
 
   // Build the await_ready, await_suspend, await_resume calls.
   ReadySuspendResumeResult RSS = buildCoawaitCalls(*this, Loc, E);
diff --git a/contrib/llvm/tools/clang/lib/Sema/SemaDecl.cpp b/contrib/llvm/tools/clang/lib/Sema/SemaDecl.cpp
index f95d1068cc59..41719d4e7b08 100644
--- a/contrib/llvm/tools/clang/lib/Sema/SemaDecl.cpp
+++ b/contrib/llvm/tools/clang/lib/Sema/SemaDecl.cpp
@@ -47,6 +47,7 @@
 #include <algorithm>
 #include <cstring>
 #include <functional>
+
 using namespace clang;
 using namespace sema;
 
@@ -88,7 +89,7 @@ class TypeNameValidatorCCC : public CorrectionCandidateCallback {
   bool AllowClassTemplates;
 };
 
-}
+} // end anonymous namespace
 
 /// \brief Determine whether the token kind starts a simple-type-specifier.
 bool Sema::isSimpleTypeSpecifier(tok::TokenKind Kind) const {
@@ -107,6 +108,7 @@ bool Sema::isSimpleTypeSpecifier(tok::TokenKind Kind) const {
   case tok::kw_half:
   case tok::kw_float:
   case tok::kw_double:
+  case tok::kw___float128:
   case tok::kw_wchar_t:
   case tok::kw_bool:
   case tok::kw___underlying_type:
@@ -134,7 +136,7 @@ enum class UnqualifiedTypeNameLookupResult {
   FoundNonType,
   FoundType
 };
-} // namespace
+} // end anonymous namespace
 
 /// \brief Tries to perform unqualified lookup of the type decls in bases for
 /// dependent class.
@@ -161,11 +163,17 @@ lookupUnqualifiedTypeNameInBase(Sema &S, const IdentifierInfo &II,
       auto *TD = TST->getTemplateName().getAsTemplateDecl();
       if (!TD)
         continue;
-      auto *BasePrimaryTemplate =
-          dyn_cast_or_null<CXXRecordDecl>(TD->getTemplatedDecl());
-      if (!BasePrimaryTemplate)
-        continue;
-      BaseRD = BasePrimaryTemplate;
+      if (auto *BasePrimaryTemplate =
+          dyn_cast_or_null<CXXRecordDecl>(TD->getTemplatedDecl())) {
+        if (BasePrimaryTemplate->getCanonicalDecl() != RD->getCanonicalDecl())
+          BaseRD = BasePrimaryTemplate;
+        else if (auto *CTD = dyn_cast<ClassTemplateDecl>(TD)) {
+          if (const ClassTemplatePartialSpecializationDecl *PS =
+                  CTD->findPartialSpecialization(Base.getType()))
+            if (PS->getCanonicalDecl() != RD->getCanonicalDecl())
+              BaseRD = PS;
+        }
+      }
     }
     if (BaseRD) {
       for (NamedDecl *ND : BaseRD->lookup(&II)) {
@@ -207,7 +215,7 @@ static ParsedType recoverFromTypeInKnownDependentBase(Sema &S,
       FoundTypeDecl = lookupUnqualifiedTypeNameInBase(S, II, NameLoc, RD);
   }
   if (FoundTypeDecl != UnqualifiedTypeNameLookupResult::FoundType)
-    return ParsedType();
+    return nullptr;
 
   // We found some types in dependent base classes.  Recover as if the user
   // wrote 'typename MyClass::II' instead of 'II'.  We'll fully resolve the
@@ -266,25 +274,25 @@ ParsedType Sema::getTypeName(const IdentifierInfo &II, SourceLocation NameLoc,
         // We therefore do not perform any name lookup if the result would
         // refer to a member of an unknown specialization.
         if (!isClassName && !IsCtorOrDtorName)
-          return ParsedType();
-        
+          return nullptr;
+
         // We know from the grammar that this name refers to a type,
         // so build a dependent node to describe the type.
         if (WantNontrivialTypeSourceInfo)
           return ActOnTypenameType(S, SourceLocation(), *SS, II, NameLoc).get();
-        
+
         NestedNameSpecifierLoc QualifierLoc = SS->getWithLocInContext(Context);
         QualType T = CheckTypenameType(ETK_None, SourceLocation(), QualifierLoc,
                                        II, NameLoc);
         return ParsedType::make(T);
       }
-      
-      return ParsedType();
+
+      return nullptr;
     }
-    
+
     if (!LookupCtx->isDependentContext() &&
         RequireCompleteDeclContext(*SS, LookupCtx))
-      return ParsedType();
+      return nullptr;
   }
 
   // FIXME: LookupNestedNameSpecifierName isn't the right kind of
@@ -302,7 +310,7 @@ ParsedType Sema::getTypeName(const IdentifierInfo &II, SourceLocation NameLoc,
     if (ObjectTypePtr && Result.empty()) {
       // C++ [basic.lookup.classref]p3:
       //   If the unqualified-id is ~type-name, the type-name is looked up
-      //   in the context of the entire postfix-expression. If the type T of 
+      //   in the context of the entire postfix-expression. If the type T of
       //   the object expression is of a class type C, the type-name is also
       //   looked up in the scope of class C. At least one of the lookups shall
       //   find a name that refers to (possibly cv-qualified) T.
@@ -346,8 +354,8 @@ ParsedType Sema::getTypeName(const IdentifierInfo &II, SourceLocation NameLoc,
           // identifier is not a template (typo correction for template names
           // is handled elsewhere).
           !(getLangOpts().CPlusPlus && NewSSPtr &&
-            isTemplateName(S, *NewSSPtr, false, TemplateName, ParsedType(),
-                           false, Template, MemberOfUnknownSpecialization))) {
+            isTemplateName(S, *NewSSPtr, false, TemplateName, nullptr, false,
+                           Template, MemberOfUnknownSpecialization))) {
         ParsedType Ty = getTypeName(*NewII, NameLoc, S, NewSSPtr,
                                     isClassName, HasTrailingDot, ObjectTypePtr,
                                     IsCtorOrDtorName,
@@ -367,7 +375,7 @@ ParsedType Sema::getTypeName(const IdentifierInfo &II, SourceLocation NameLoc,
   case LookupResult::FoundOverloaded:
   case LookupResult::FoundUnresolvedValue:
     Result.suppressDiagnostics();
-    return ParsedType();
+    return nullptr;
 
   case LookupResult::Ambiguous:
     // Recover from type-hiding ambiguities by hiding the type.  We'll
@@ -377,7 +385,7 @@ ParsedType Sema::getTypeName(const IdentifierInfo &II, SourceLocation NameLoc,
     // that only makes sense if the identifier was treated like a type.
     if (Result.getAmbiguityKind() == LookupResult::AmbiguousTagHiding) {
       Result.suppressDiagnostics();
-      return ParsedType();
+      return nullptr;
     }
 
     // Look to see if we have a type anywhere in the list of results.
@@ -399,7 +407,7 @@ ParsedType Sema::getTypeName(const IdentifierInfo &II, SourceLocation NameLoc,
       // will produce the ambiguity, or will complain that it expected
       // a type name.
       Result.suppressDiagnostics();
-      return ParsedType();
+      return nullptr;
     }
 
     // We found a type within the ambiguous lookup; diagnose the
@@ -430,7 +438,7 @@ ParsedType Sema::getTypeName(const IdentifierInfo &II, SourceLocation NameLoc,
         // Construct a type with type-source information.
         TypeLocBuilder Builder;
         Builder.pushTypeSpec(T).setNameLoc(NameLoc);
-        
+
         T = getElaboratedType(ETK_None, *SS, T);
         ElaboratedTypeLoc ElabTL = Builder.push<ElaboratedTypeLoc>(T);
         ElabTL.setElaboratedKeywordLoc(SourceLocation());
@@ -449,7 +457,7 @@ ParsedType Sema::getTypeName(const IdentifierInfo &II, SourceLocation NameLoc,
   if (T.isNull()) {
     // If it's not plausibly a type, suppress diagnostics.
     Result.suppressDiagnostics();
-    return ParsedType();
+    return nullptr;
   }
   return ParsedType::make(T);
 }
@@ -471,17 +479,53 @@ synthesizeCurrentNestedNameSpecifier(ASTContext &Context, DeclContext *DC) {
   llvm_unreachable("something isn't in TU scope?");
 }
 
-ParsedType Sema::ActOnDelayedDefaultTemplateArg(const IdentifierInfo &II,
-                                                SourceLocation NameLoc) {
-  // Accepting an undeclared identifier as a default argument for a template
-  // type parameter is a Microsoft extension.
-  Diag(NameLoc, diag::ext_ms_delayed_template_argument) << &II;
+/// Find the parent class with dependent bases of the innermost enclosing method
+/// context. Do not look for enclosing CXXRecordDecls directly, or we will end
+/// up allowing unqualified dependent type names at class-level, which MSVC
+/// correctly rejects.
+static const CXXRecordDecl *
+findRecordWithDependentBasesOfEnclosingMethod(const DeclContext *DC) {
+  for (; DC && DC->isDependentContext(); DC = DC->getLookupParent()) {
+    DC = DC->getPrimaryContext();
+    if (const auto *MD = dyn_cast<CXXMethodDecl>(DC))
+      if (MD->getParent()->hasAnyDependentBases())
+        return MD->getParent();
+  }
+  return nullptr;
+}
+
+ParsedType Sema::ActOnMSVCUnknownTypeName(const IdentifierInfo &II,
+                                          SourceLocation NameLoc,
+                                          bool IsTemplateTypeArg) {
+  assert(getLangOpts().MSVCCompat && "shouldn't be called in non-MSVC mode");
+
+  NestedNameSpecifier *NNS = nullptr;
+  if (IsTemplateTypeArg && getCurScope()->isTemplateParamScope()) {
+    // If we weren't able to parse a default template argument, delay lookup
+    // until instantiation time by making a non-dependent DependentTypeName. We
+    // pretend we saw a NestedNameSpecifier referring to the current scope, and
+    // lookup is retried.
+    // FIXME: This hurts our diagnostic quality, since we get errors like "no
+    // type named 'Foo' in 'current_namespace'" when the user didn't write any
+    // name specifiers.
+    NNS = synthesizeCurrentNestedNameSpecifier(Context, CurContext);
+    Diag(NameLoc, diag::ext_ms_delayed_template_argument) << &II;
+  } else if (const CXXRecordDecl *RD =
+                 findRecordWithDependentBasesOfEnclosingMethod(CurContext)) {
+    // Build a DependentNameType that will perform lookup into RD at
+    // instantiation time.
+    NNS = NestedNameSpecifier::Create(Context, nullptr, RD->isTemplateDecl(),
+                                      RD->getTypeForDecl());
+
+    // Diagnose that this identifier was undeclared, and retry the lookup during
+    // template instantiation.
+    Diag(NameLoc, diag::ext_undeclared_unqual_id_with_dependent_base) << &II
+                                                                      << RD;
+  } else {
+    // This is not a situation that we should recover from.
+    return ParsedType();
+  }
 
-  // Build a fake DependentNameType that will perform lookup into CurContext at
-  // instantiation time.  The name specifier isn't dependent, so template
-  // instantiation won't transform it.  It will retry the lookup, however.
-  NestedNameSpecifier *NNS =
-      synthesizeCurrentNestedNameSpecifier(Context, CurContext);
   QualType T = Context.getDependentNameType(ETK_None, NNS, &II);
 
   // Build type location information.  We synthesized the qualifier, so we have
@@ -548,7 +592,7 @@ bool Sema::isMicrosoftMissingTypename(const CXXScopeSpec *SS, Scope *S) {
       if (Context.hasSameUnqualifiedType(QualType(Ty, 1), Base.getType()))
         return true;
     return S->isFunctionPrototypeScope();
-  } 
+  }
   return CurContext->isFunctionOrMethod() || S->isFunctionPrototypeScope();
 }
 
@@ -559,8 +603,8 @@ void Sema::DiagnoseUnknownTypeName(IdentifierInfo *&II,
                                    ParsedType &SuggestedType,
                                    bool AllowClassTemplates) {
   // We don't have anything to suggest (yet).
-  SuggestedType = ParsedType();
-  
+  SuggestedType = nullptr;
+
   // There may have been a typo in the name of the type. Look up typo
   // results, in case we have something that we can suggest.
   if (TypoCorrection Corrected =
@@ -592,11 +636,11 @@ void Sema::DiagnoseUnknownTypeName(IdentifierInfo *&II,
       if (Corrected.getCorrectionSpecifier())
         tmpSS.MakeTrivial(Context, Corrected.getCorrectionSpecifier(),
                           SourceRange(IILoc));
-      SuggestedType = getTypeName(*Corrected.getCorrectionAsIdentifierInfo(),
-                                  IILoc, S, tmpSS.isSet() ? &tmpSS : SS, false,
-                                  false, ParsedType(),
-                                  /*IsCtorOrDtorName=*/false,
-                                  /*NonTrivialTypeSourceInfo=*/true);
+      SuggestedType =
+          getTypeName(*Corrected.getCorrectionAsIdentifierInfo(), IILoc, S,
+                      tmpSS.isSet() ? &tmpSS : SS, false, false, nullptr,
+                      /*IsCtorOrDtorName=*/false,
+                      /*NonTrivialTypeSourceInfo=*/true);
     }
     return;
   }
@@ -609,7 +653,7 @@ void Sema::DiagnoseUnknownTypeName(IdentifierInfo *&II,
     TemplateTy TemplateResult;
     bool MemberOfUnknownSpecialization;
     if (isTemplateName(S, SS ? *SS : EmptySS, /*hasTemplateKeyword=*/false,
-                       Name, ParsedType(), true, TemplateResult,
+                       Name, nullptr, true, TemplateResult,
                        MemberOfUnknownSpecialization) == TNK_Type_template) {
       TemplateName TplName = TemplateResult.get();
       Diag(IILoc, diag::err_template_missing_args) << TplName;
@@ -623,11 +667,11 @@ void Sema::DiagnoseUnknownTypeName(IdentifierInfo *&II,
 
   // FIXME: Should we move the logic that tries to recover from a missing tag
   // (struct, union, enum) from Parser::ParseImplicitInt here, instead?
-  
+
   if (!SS || (!SS->isSet() && !SS->isInvalid()))
     Diag(IILoc, diag::err_unknown_typename) << II;
   else if (DeclContext *DC = computeDeclContext(*SS, false))
-    Diag(IILoc, diag::err_typename_nested_not_found) 
+    Diag(IILoc, diag::err_typename_nested_not_found)
       << II << DC << SS->getRange();
   else if (isDependentScopeSpecifier(*SS)) {
     unsigned DiagID = diag::err_typename_missing;
@@ -641,25 +685,25 @@ void Sema::DiagnoseUnknownTypeName(IdentifierInfo *&II,
     SuggestedType = ActOnTypenameType(S, SourceLocation(),
                                       *SS, *II, IILoc).get();
   } else {
-    assert(SS && SS->isInvalid() && 
+    assert(SS && SS->isInvalid() &&
            "Invalid scope specifier has already been diagnosed");
   }
 }
 
 /// \brief Determine whether the given result set contains either a type name
-/// or 
+/// or
 static bool isResultTypeOrTemplate(LookupResult &R, const Token &NextToken) {
   bool CheckTemplate = R.getSema().getLangOpts().CPlusPlus &&
                        NextToken.is(tok::less);
-  
+
   for (LookupResult::iterator I = R.begin(), IEnd = R.end(); I != IEnd; ++I) {
     if (isa<TypeDecl>(*I) || isa<ObjCInterfaceDecl>(*I))
       return true;
-    
+
     if (CheckTemplate && isa<TemplateDecl>(*I))
       return true;
   }
-  
+
   return false;
 }
 
@@ -751,7 +795,7 @@ Sema::ClassifyName(Scope *S, CXXScopeSpec &SS, IdentifierInfo *&Name,
       return TypeInBase;
   }
 
-  // Perform lookup for Objective-C instance variables (including automatically 
+  // Perform lookup for Objective-C instance variables (including automatically
   // synthesized instance variables), if we're in an Objective-C method.
   // FIXME: This lookup really, really needs to be folded in to the normal
   // unqualified lookup mechanism.
@@ -760,10 +804,10 @@ Sema::ClassifyName(Scope *S, CXXScopeSpec &SS, IdentifierInfo *&Name,
     if (E.get() || E.isInvalid())
       return E;
   }
-  
+
   bool SecondTry = false;
   bool IsFilteredTemplateName = false;
-  
+
 Corrected:
   switch (Result.getResultKind()) {
   case LookupResult::NotFound:
@@ -774,18 +818,18 @@ Corrected:
       // FIXME: Reference?
       if (getLangOpts().CPlusPlus)
         return BuildDeclarationNameExpr(SS, Result, /*ADL=*/true);
-      
+
       // C90 6.3.2.2:
-      //   If the expression that precedes the parenthesized argument list in a 
-      //   function call consists solely of an identifier, and if no 
-      //   declaration is visible for this identifier, the identifier is 
+      //   If the expression that precedes the parenthesized argument list in a
+      //   function call consists solely of an identifier, and if no
+      //   declaration is visible for this identifier, the identifier is
       //   implicitly declared exactly as if, in the innermost block containing
       //   the function call, the declaration
       //
-      //     extern int identifier (); 
+      //     extern int identifier ();
+      //
+      //   appeared.
       //
-      //   appeared. 
-      // 
       // We also allow this in C99 as an extension.
       if (NamedDecl *D = ImplicitlyDefineFunction(NameLoc, *Name, S)) {
         Result.addDecl(D);
@@ -793,9 +837,9 @@ Corrected:
         return BuildDeclarationNameExpr(SS, Result, /*ADL=*/false);
       }
     }
-    
-    // In C, we first see whether there is a tag type by the same name, in 
-    // which case it's likely that the user just forgot to write "enum", 
+
+    // In C, we first see whether there is a tag type by the same name, in
+    // which case it's likely that the user just forgot to write "enum",
     // "struct", or "union".
     if (!getLangOpts().CPlusPlus && !SecondTry &&
         isTagTypeWithMissingTag(*this, Result, S, SS, Name, NameLoc)) {
@@ -807,7 +851,7 @@ Corrected:
     if (!SecondTry && CCC) {
       SecondTry = true;
       if (TypoCorrection Corrected = CorrectTypo(Result.getLookupNameInfo(),
-                                                 Result.getLookupKind(), S, 
+                                                 Result.getLookupKind(), S,
                                                  &SS, std::move(CCC),
                                                  CTK_ErrorRecovery)) {
         unsigned UnqualifiedDiag = diag::err_undeclared_var_use_suggest;
@@ -819,8 +863,8 @@ Corrected:
             UnderlyingFirstDecl && isa<TemplateDecl>(UnderlyingFirstDecl)) {
           UnqualifiedDiag = diag::err_no_template_suggest;
           QualifiedDiag = diag::err_no_member_template_suggest;
-        } else if (UnderlyingFirstDecl && 
-                   (isa<TypeDecl>(UnderlyingFirstDecl) || 
+        } else if (UnderlyingFirstDecl &&
+                   (isa<TypeDecl>(UnderlyingFirstDecl) ||
                     isa<ObjCInterfaceDecl>(UnderlyingFirstDecl) ||
                     isa<ObjCCompatibleAliasDecl>(UnderlyingFirstDecl))) {
           UnqualifiedDiag = diag::err_unknown_typename_suggest;
@@ -861,28 +905,28 @@ Corrected:
           ExprResult E(LookupInObjCMethod(Result, S, Ivar->getIdentifier()));
           return E;
         }
-        
+
         goto Corrected;
       }
     }
-      
+
     // We failed to correct; just fall through and let the parser deal with it.
     Result.suppressDiagnostics();
     return NameClassification::Unknown();
-      
+
   case LookupResult::NotFoundInCurrentInstantiation: {
-    // We performed name lookup into the current instantiation, and there were 
+    // We performed name lookup into the current instantiation, and there were
     // dependent bases, so we treat this result the same way as any other
     // dependent nested-name-specifier.
-      
+
     // C++ [temp.res]p2:
-    //   A name used in a template declaration or definition and that is 
-    //   dependent on a template-parameter is assumed not to name a type 
-    //   unless the applicable name lookup finds a type name or the name is 
+    //   A name used in a template declaration or definition and that is
+    //   dependent on a template-parameter is assumed not to name a type
+    //   unless the applicable name lookup finds a type name or the name is
     //   qualified by the keyword typename.
     //
     // FIXME: If the next token is '<', we might want to ask the parser to
-    // perform some heroics to see if we actually have a 
+    // perform some heroics to see if we actually have a
     // template-argument-list, which would indicate a missing 'template'
     // keyword here.
     return ActOnDependentIdExpression(SS, /*TemplateKWLoc=*/SourceLocation(),
@@ -894,7 +938,7 @@ Corrected:
   case LookupResult::FoundOverloaded:
   case LookupResult::FoundUnresolvedValue:
     break;
-      
+
   case LookupResult::Ambiguous:
     if (getLangOpts().CPlusPlus && NextToken.is(tok::less) &&
         hasAnyAcceptableTemplateNames(Result)) {
@@ -915,29 +959,29 @@ Corrected:
         break;
       }
     }
-      
+
     // Diagnose the ambiguity and return an error.
     return NameClassification::Error();
   }
-  
+
   if (getLangOpts().CPlusPlus && NextToken.is(tok::less) &&
       (IsFilteredTemplateName || hasAnyAcceptableTemplateNames(Result))) {
     // C++ [temp.names]p3:
     //   After name lookup (3.4) finds that a name is a template-name or that
     //   an operator-function-id or a literal- operator-id refers to a set of
-    //   overloaded functions any member of which is a function template if 
+    //   overloaded functions any member of which is a function template if
     //   this is followed by a <, the < is always taken as the delimiter of a
     //   template-argument-list and never as the less-than operator.
     if (!IsFilteredTemplateName)
       FilterAcceptableTemplateNames(Result);
-    
+
     if (!Result.empty()) {
       bool IsFunctionTemplate;
       bool IsVarTemplate;
       TemplateName Template;
       if (Result.end() - Result.begin() > 1) {
         IsFunctionTemplate = true;
-        Template = Context.getOverloadedTemplateName(Result.begin(), 
+        Template = Context.getOverloadedTemplateName(Result.begin(),
                                                      Result.end());
       } else {
         TemplateDecl *TD
@@ -946,19 +990,19 @@ Corrected:
         IsVarTemplate = isa<VarTemplateDecl>(TD);
 
         if (SS.isSet() && !SS.isInvalid())
-          Template = Context.getQualifiedTemplateName(SS.getScopeRep(), 
+          Template = Context.getQualifiedTemplateName(SS.getScopeRep(),
                                                     /*TemplateKeyword=*/false,
                                                       TD);
         else
           Template = TemplateName(TD);
       }
-      
+
       if (IsFunctionTemplate) {
         // Function templates always go through overload resolution, at which
         // point we'll perform the various checks (e.g., accessibility) we need
         // to based on which function we selected.
         Result.suppressDiagnostics();
-        
+
         return NameClassification::FunctionTemplate(Template);
       }
 
@@ -984,17 +1028,17 @@ Corrected:
             dyn_cast<ObjCCompatibleAliasDecl>(FirstDecl))
       Class = Alias->getClassInterface();
   }
-  
+
   if (Class) {
     DiagnoseUseOfDecl(Class, NameLoc);
-    
+
     if (NextToken.is(tok::period)) {
       // Interface. <something> is parsed as a property reference expression.
       // Just return "unknown" as a fall-through for now.
       Result.suppressDiagnostics();
       return NameClassification::Unknown();
     }
-    
+
     QualType T = Context.getObjCInterfaceType(Class);
     return ParsedType::make(T);
   }
@@ -1018,7 +1062,7 @@ Corrected:
       return buildNestedType(*this, SS, T, NameLoc);
     return ParsedType::make(T);
   }
-  
+
   if (FirstDecl->isCXXClassMember())
     return BuildPossibleImplicitMemberExpr(SS, SourceLocation(), Result,
                                            nullptr, S);
@@ -1035,15 +1079,15 @@ DeclContext *Sema::getContainingDC(DeclContext *DC) {
   // Functions defined inline within classes aren't parsed until we've
   // finished parsing the top-level class, so the top-level class is
   // the context we'll need to return to.
-  // A Lambda call operator whose parent is a class must not be treated 
+  // A Lambda call operator whose parent is a class must not be treated
   // as an inline member function.  A Lambda can be used legally
   // either as an in-class member initializer or a default argument.  These
   // are parsed once the class has been marked complete and so the containing
   // context would be the nested class (when the lambda is defined in one);
-  // If the class is not complete, then the lambda is being used in an 
+  // If the class is not complete, then the lambda is being used in an
   // ill-formed fashion (such as to specify the width of a bit-field, or
-  // in an array-bound) - in which case we still want to return the 
-  // lexically containing DC (which could be a nested class). 
+  // in an array-bound) - in which case we still want to return the
+  // lexically containing DC (which could be a nested class).
   if (isa<FunctionDecl>(DC) && !isLambdaCallOperator(DC)) {
     DC = DC->getLexicalParent();
 
@@ -1143,7 +1187,6 @@ void Sema::ExitDeclaratorContext(Scope *S) {
   // disappear.
 }
 
-
 void Sema::ActOnReenterFunctionContext(Scope* S, Decl *D) {
   // We assume that the caller has already called
   // ActOnReenterTemplateScope so getTemplatedDecl() works.
@@ -1168,7 +1211,6 @@ void Sema::ActOnReenterFunctionContext(Scope* S, Decl *D) {
   }
 }
 
-
 void Sema::ActOnExitFunctionContext() {
   // Same implementation as PopDeclContext, but returns to the lexical parent,
   // rather than the top-level class.
@@ -1177,7 +1219,6 @@ void Sema::ActOnExitFunctionContext() {
   assert(CurContext && "Popped translation unit!");
 }
 
-
 /// \brief Determine whether we allow overloading of the function
 /// PrevDecl with another declaration.
 ///
@@ -1226,7 +1267,7 @@ void Sema::PushOnScopeChains(NamedDecl *D, Scope *S, bool AddToContext) {
       cast<FunctionDecl>(D)->isFunctionTemplateSpecialization())
     return;
 
-  // If this replaces anything in the current scope, 
+  // If this replaces anything in the current scope,
   IdentifierResolver::iterator I = IdResolver.begin(D->getDeclName()),
                                IEnd = IdResolver.end();
   for (; I != IEnd; ++I) {
@@ -1240,7 +1281,7 @@ void Sema::PushOnScopeChains(NamedDecl *D, Scope *S, bool AddToContext) {
   }
 
   S->AddDecl(D);
-  
+
   if (isa<LabelDecl>(D) && !cast<LabelDecl>(D)->isGnuLocal()) {
     // Implicitly-generated labels may end up getting generated in an order that
     // isn't strictly lexical, which breaks name lookup. Be careful to insert
@@ -1253,7 +1294,7 @@ void Sema::PushOnScopeChains(NamedDecl *D, Scope *S, bool AddToContext) {
       } else if (IDC->Encloses(CurContext))
         break;
     }
-    
+
     IdResolver.InsertDeclAfter(I, D);
   } else {
     IdResolver.AddDecl(D);
@@ -1416,6 +1457,9 @@ bool Sema::ShouldWarnIfUnusedFileScopedDecl(const DeclaratorDecl *D) const {
     if (VD->isStaticDataMember() &&
         VD->getTemplateSpecializationKind() == TSK_ImplicitInstantiation)
       return false;
+
+    if (VD->isInline() && !isMainFileLoc(*this, VD->getLocation()))
+      return false;
   } else {
     return false;
   }
@@ -1469,7 +1513,7 @@ static bool ShouldDiagnoseUnusedDecl(const NamedDecl *D) {
 
   if (isa<TypedefNameDecl>(D))
     return true;
-  
+
   // White-list anything that isn't a local variable.
   if (!isa<VarDecl>(D) || isa<ParmVarDecl>(D) || isa<ImplicitParamDecl>(D))
     return false;
@@ -1487,7 +1531,7 @@ static bool ShouldDiagnoseUnusedDecl(const NamedDecl *D) {
     }
 
     // If we failed to complete the type for some reason, or if the type is
-    // dependent, don't diagnose the variable. 
+    // dependent, don't diagnose the variable.
     if (Ty->isIncompleteType() || Ty->isDependentType())
       return false;
 
@@ -1517,7 +1561,7 @@ static bool ShouldDiagnoseUnusedDecl(const NamedDecl *D) {
 
     // TODO: __attribute__((unused)) templates?
   }
-  
+
   return true;
 }
 
@@ -1531,7 +1575,6 @@ static void GenerateFixForUnusedDecl(const NamedDecl *D, ASTContext &Ctx,
     Hint = FixItHint::CreateRemoval(CharSourceRange::
                                     getCharRange(D->getLocStart(), AfterColon));
   }
-  return;
 }
 
 void Sema::DiagnoseUnusedNestedTypedefs(const RecordDecl *D) {
@@ -1558,7 +1601,7 @@ void Sema::DiagnoseUnusedDecl(const NamedDecl *D) {
     UnusedLocalTypedefNameCandidates.insert(TD);
     return;
   }
-  
+
   FixItHint Hint;
   GenerateFixForUnusedDecl(D, Context, Hint);
 
@@ -1608,13 +1651,23 @@ void Sema::ActOnPopScope(SourceLocation Loc, Scope *S) {
       if (const auto *RD = dyn_cast<RecordDecl>(D))
         DiagnoseUnusedNestedTypedefs(RD);
     }
-    
+
     // If this was a forward reference to a label, verify it was defined.
     if (LabelDecl *LD = dyn_cast<LabelDecl>(D))
       CheckPoppedLabel(LD, *this);
-    
-    // Remove this name from our lexical scope.
+
+    // Remove this name from our lexical scope, and warn on it if we haven't
+    // already.
     IdResolver.RemoveDecl(D);
+    auto ShadowI = ShadowingDecls.find(D);
+    if (ShadowI != ShadowingDecls.end()) {
+      if (const auto *FD = dyn_cast<FieldDecl>(ShadowI->second)) {
+        Diag(D->getLocation(), diag::warn_ctor_parm_shadows_field)
+            << D << FD << FD->getParent();
+        Diag(FD->getLocation(), diag::note_previous_declaration);
+      }
+      ShadowingDecls.erase(ShadowI);
+    }
   }
 }
 
@@ -1697,7 +1750,7 @@ static void LookupPredefedObjCSuperType(Sema &ThisSema, Scope *S,
   if (!II->isStr("objc_msgSendSuper"))
     return;
   ASTContext &Context = ThisSema.Context;
-    
+
   LookupResult Result(ThisSema, &Context.Idents.get("objc_super"),
                       SourceLocation(), Sema::LookupTagName);
   ThisSema.LookupName(Result, S);
@@ -1748,6 +1801,9 @@ NamedDecl *Sema::LazilyCreateBuiltin(IdentifierInfo *II, unsigned ID,
           << Context.BuiltinInfo.getName(ID);
   }
 
+  if (R.isNull())
+    return nullptr;
+
   DeclContext *Parent = Context.getTranslationUnitDecl();
   if (getLangOpts().CPlusPlus) {
     LinkageSpecDecl *CLinkageDecl =
@@ -1855,13 +1911,13 @@ bool Sema::isIncompatibleTypedef(TypeDecl *Old, TypedefNameDecl *New) {
     if (Old->getLocation().isValid())
       Diag(Old->getLocation(), diag::note_previous_definition);
     New->setInvalidDecl();
-    return true;    
+    return true;
   }
-  
+
   if (OldType != NewType &&
       !OldType->isDependentType() &&
       !NewType->isDependentType() &&
-      !Context.hasSameType(OldType, NewType)) { 
+      !Context.hasSameType(OldType, NewType)) {
     int Kind = isa<TypeAliasDecl>(Old) ? 1 : 0;
     Diag(New->getLocation(), diag::err_redefinition_different_typedef)
       << Kind << NewType << OldType;
@@ -2000,7 +2056,7 @@ void Sema::MergeTypedefNameDecl(Scope *S, TypedefNameDecl *New,
       return;
 
     // C++0x [dcl.typedef]p4:
-    //   In a given class scope, a typedef specifier can be used to redefine 
+    //   In a given class scope, a typedef specifier can be used to redefine
     //   any class-name declared in that scope that is not also a typedef-name
     //   to refer to the type to which it already refers.
     //
@@ -2032,7 +2088,7 @@ void Sema::MergeTypedefNameDecl(Scope *S, TypedefNameDecl *New,
   // Modules always permit redefinition of typedefs, as does C11.
   if (getLangOpts().Modules || getLangOpts().C11)
     return;
-  
+
   // If we have a redefinition of a typedef in C, emit a warning.  This warning
   // is normally mapped to an error, but can be controlled with
   // -Wtypedef-redefinition.  If either the original or the redefinition is
@@ -2194,9 +2250,11 @@ static bool mergeDeclAttribute(Sema &S, NamedDecl *D,
   unsigned AttrSpellingListIndex = Attr->getSpellingListIndex();
   if (const auto *AA = dyn_cast<AvailabilityAttr>(Attr))
     NewAttr = S.mergeAvailabilityAttr(D, AA->getRange(), AA->getPlatform(),
-                                      AA->getIntroduced(), AA->getDeprecated(),
+                                      AA->isImplicit(), AA->getIntroduced(),
+                                      AA->getDeprecated(),
                                       AA->getObsoleted(), AA->getUnavailable(),
-                                      AA->getMessage(), AMK,
+                                      AA->getMessage(), AA->getStrict(),
+                                      AA->getReplacement(), AMK,
                                       AttrSpellingListIndex);
   else if (const auto *VA = dyn_cast<VisibilityAttr>(Attr))
     NewAttr = S.mergeVisibilityAttr(D, VA->getRange(), VA->getVisibility(),
@@ -2252,6 +2310,8 @@ static bool mergeDeclAttribute(Sema &S, NamedDecl *D,
   if (NewAttr) {
     NewAttr->setInherited(true);
     D->addAttr(NewAttr);
+    if (isa<MSInheritanceAttr>(NewAttr))
+      S.Consumer.AssignInheritanceModel(cast<CXXRecordDecl>(D));
     return true;
   }
 
@@ -2267,11 +2327,8 @@ static const Decl *getDefinition(const Decl *D) {
       return Def;
     return VD->getActingDefinition();
   }
-  if (const FunctionDecl *FD = dyn_cast<FunctionDecl>(D)) {
-    const FunctionDecl* Def;
-    if (FD->isDefined(Def))
-      return Def;
-  }
+  if (const FunctionDecl *FD = dyn_cast<FunctionDecl>(D))
+    return FD->getDefinition();
   return nullptr;
 }
 
@@ -2296,7 +2353,7 @@ static void checkNewAttributesAfterDef(Sema &S, Decl *New, const Decl *Old) {
   for (unsigned I = 0, E = NewAttributes.size(); I != E;) {
     const Attr *NewAttribute = NewAttributes[I];
 
-    if (isa<AliasAttr>(NewAttribute)) {
+    if (isa<AliasAttr>(NewAttribute) || isa<IFuncAttr>(NewAttribute)) {
       if (FunctionDecl *FD = dyn_cast<FunctionDecl>(New)) {
         Sema::SkipBodyInfo SkipBody;
         S.CheckForFunctionRedefinition(FD, cast<FunctionDecl>(Def), &SkipBody);
@@ -2339,7 +2396,7 @@ static void checkNewAttributesAfterDef(Sema &S, Decl *New, const Decl *Old) {
       ++I;
       continue;
     } else if (const AlignedAttr *AA = dyn_cast<AlignedAttr>(NewAttribute)) {
-      if (AA->isAlignas()) { 
+      if (AA->isAlignas()) {
         // C++11 [dcl.align]p6:
         //   if any declaration of an entity has an alignment-specifier,
         //   every defining declaration of that entity shall specify an
@@ -2396,6 +2453,24 @@ void Sema::mergeDeclAttributes(NamedDecl *New, Decl *Old,
     }
   }
 
+  // Re-declaration cannot add abi_tag's.
+  if (const auto *NewAbiTagAttr = New->getAttr<AbiTagAttr>()) {
+    if (const auto *OldAbiTagAttr = Old->getAttr<AbiTagAttr>()) {
+      for (const auto &NewTag : NewAbiTagAttr->tags()) {
+        if (std::find(OldAbiTagAttr->tags_begin(), OldAbiTagAttr->tags_end(),
+                      NewTag) == OldAbiTagAttr->tags_end()) {
+          Diag(NewAbiTagAttr->getLocation(),
+               diag::err_new_abi_tag_on_redeclaration)
+              << NewTag;
+          Diag(OldAbiTagAttr->getLocation(), diag::note_previous_declaration);
+        }
+      }
+    } else {
+      Diag(NewAbiTagAttr->getLocation(), diag::err_abi_tag_on_redeclaration);
+      Diag(Old->getLocation(), diag::note_previous_declaration);
+    }
+  }
+
   if (!Old->hasAttrs())
     return;
 
@@ -2519,7 +2594,7 @@ struct GNUCompatibleParamWarning {
   QualType PromotedType;
 };
 
-}
+} // end anonymous namespace
 
 /// getSpecialMember - get the special member enum for a method.
 Sema::CXXSpecialMember Sema::getSpecialMember(const CXXMethodDecl *MD) {
@@ -2799,11 +2874,11 @@ bool Sema::MergeFunctionDecl(FunctionDecl *New, NamedDecl *&OldD,
       Diag(OldLocation, diag::note_previous_declaration);
       return true;
     }
-    
+
     NewTypeInfo = NewTypeInfo.withProducesResult(true);
     RequiresAdjustment = true;
   }
-  
+
   if (RequiresAdjustment) {
     const FunctionType *AdjustedType = New->getType()->getAs<FunctionType>();
     AdjustedType = Context.adjustFunctionType(AdjustedType, NewTypeInfo);
@@ -2956,11 +3031,11 @@ bool Sema::MergeFunctionDecl(FunctionDecl *New, NamedDecl *&OldD,
           NewMethod->setImplicit();
         } else {
           Diag(NewMethod->getLocation(),
-               diag::err_definition_of_implicitly_declared_member) 
+               diag::err_definition_of_implicitly_declared_member)
             << New << getSpecialMember(OldMethod);
           return true;
         }
-      } else if (OldMethod->isExplicitlyDefaulted() && !isFriend) {
+      } else if (OldMethod->getFirstDecl()->isExplicitlyDefaulted() && !isFriend) {
         Diag(NewMethod->getLocation(),
              diag::err_definition_of_explicitly_defaulted_member)
           << getSpecialMember(OldMethod);
@@ -3221,10 +3296,8 @@ bool Sema::MergeCompatibleFunctionDecls(FunctionDecl *New, FunctionDecl *Old,
   return false;
 }
 
-
 void Sema::mergeObjCMethodDecls(ObjCMethodDecl *newMethod,
                                 ObjCMethodDecl *oldMethod) {
-
   // Merge the attributes, including deprecated/unavailable
   AvailabilityMergeKind MergeKind =
     isa<ObjCProtocolDecl>(oldMethod->getDeclContext())
@@ -3245,6 +3318,22 @@ void Sema::mergeObjCMethodDecls(ObjCMethodDecl *newMethod,
   CheckObjCMethodOverride(newMethod, oldMethod);
 }
 
+static void diagnoseVarDeclTypeMismatch(Sema &S, VarDecl *New, VarDecl* Old) {
+  assert(!S.Context.hasSameType(New->getType(), Old->getType()));
+
+  S.Diag(New->getLocation(), New->isThisDeclarationADefinition()
+         ? diag::err_redefinition_different_type
+         : diag::err_redeclaration_different_type)
+    << New->getDeclName() << New->getType() << Old->getType();
+
+  diag::kind PrevDiag;
+  SourceLocation OldLocation;
+  std::tie(PrevDiag, OldLocation)
+    = getNoteDiagForInvalidRedeclaration(Old, New);
+  S.Diag(OldLocation, PrevDiag);
+  New->setInvalidDecl();
+}
+
 /// MergeVarDeclTypes - We parsed a variable 'New' which has the same name and
 /// scope as a previous declaration 'Old'.  Figure out how to merge their types,
 /// emitting diagnostics as appropriate.
@@ -3271,21 +3360,40 @@ void Sema::MergeVarDeclTypes(VarDecl *New, VarDecl *Old,
     //   object or function shall be identical, except that declarations for an
     //   array object can specify array types that differ by the presence or
     //   absence of a major array bound (8.3.4).
-    else if (Old->getType()->isIncompleteArrayType() &&
-             New->getType()->isArrayType()) {
-      const ArrayType *OldArray = Context.getAsArrayType(Old->getType());
-      const ArrayType *NewArray = Context.getAsArrayType(New->getType());
-      if (Context.hasSameType(OldArray->getElementType(),
-                              NewArray->getElementType()))
-        MergedT = New->getType();
-    } else if (Old->getType()->isArrayType() &&
-               New->getType()->isIncompleteArrayType()) {
+    else if (Old->getType()->isArrayType() && New->getType()->isArrayType()) {
       const ArrayType *OldArray = Context.getAsArrayType(Old->getType());
       const ArrayType *NewArray = Context.getAsArrayType(New->getType());
-      if (Context.hasSameType(OldArray->getElementType(),
-                              NewArray->getElementType()))
-        MergedT = Old->getType();
-    } else if (New->getType()->isObjCObjectPointerType() &&
+
+      // We are merging a variable declaration New into Old. If it has an array
+      // bound, and that bound differs from Old's bound, we should diagnose the
+      // mismatch.
+      if (!NewArray->isIncompleteArrayType()) {
+        for (VarDecl *PrevVD = Old->getMostRecentDecl(); PrevVD;
+             PrevVD = PrevVD->getPreviousDecl()) {
+          const ArrayType *PrevVDTy = Context.getAsArrayType(PrevVD->getType());
+          if (PrevVDTy->isIncompleteArrayType())
+            continue;
+
+          if (!Context.hasSameType(NewArray, PrevVDTy))
+            return diagnoseVarDeclTypeMismatch(*this, New, PrevVD);
+        }
+      }
+
+      if (OldArray->isIncompleteArrayType() && NewArray->isArrayType()) {
+        if (Context.hasSameType(OldArray->getElementType(),
+                                NewArray->getElementType()))
+          MergedT = New->getType();
+      }
+      // FIXME: Check visibility. New is hidden but has a complete type. If New
+      // has no array bound, it should not inherit one from Old, if Old is not
+      // visible.
+      else if (OldArray->isArrayType() && NewArray->isIncompleteArrayType()) {
+        if (Context.hasSameType(OldArray->getElementType(),
+                                NewArray->getElementType()))
+          MergedT = Old->getType();
+      }
+    }
+    else if (New->getType()->isObjCObjectPointerType() &&
                Old->getType()->isObjCObjectPointerType()) {
       MergedT = Context.mergeObjCGCQualifiers(New->getType(),
                                               Old->getType());
@@ -3311,27 +3419,7 @@ void Sema::MergeVarDeclTypes(VarDecl *New, VarDecl *Old,
         New->setType(Context.DependentTy);
       return;
     }
-
-    // FIXME: Even if this merging succeeds, some other non-visible declaration
-    // of this variable might have an incompatible type. For instance:
-    //
-    //   extern int arr[];
-    //   void f() { extern int arr[2]; }
-    //   void g() { extern int arr[3]; }
-    //
-    // Neither C nor C++ requires a diagnostic for this, but we should still try
-    // to diagnose it.
-    Diag(New->getLocation(), New->isThisDeclarationADefinition()
-                                 ? diag::err_redefinition_different_type
-                                 : diag::err_redeclaration_different_type)
-        << New->getDeclName() << New->getType() << Old->getType();
-
-    diag::kind PrevDiag;
-    SourceLocation OldLocation;
-    std::tie(PrevDiag, OldLocation) =
-        getNoteDiagForInvalidRedeclaration(Old, New);
-    Diag(OldLocation, PrevDiag);
-    return New->setInvalidDecl();
+    return diagnoseVarDeclTypeMismatch(*this, New, Old);
   }
 
   // Don't actually update the type on the new declaration if the old
@@ -3425,17 +3513,17 @@ void Sema::MergeVarDecl(VarDecl *New, LookupResult &Previous) {
 
   // C++ [class.mem]p1:
   //   A member shall not be declared twice in the member-specification [...]
-  // 
+  //
   // Here, we need only consider static data members.
   if (Old->isStaticDataMember() && !New->isOutOfLine()) {
-    Diag(New->getLocation(), diag::err_duplicate_member) 
+    Diag(New->getLocation(), diag::err_duplicate_member)
       << New->getIdentifier();
     Diag(Old->getLocation(), diag::note_previous_declaration);
     New->setInvalidDecl();
   }
-  
+
   mergeDeclAttributes(New, Old);
-  // Warn if an already-declared variable is made a weak_import in a subsequent 
+  // Warn if an already-declared variable is made a weak_import in a subsequent
   // declaration
   if (New->hasAttr<WeakImportAttr>() &&
       Old->getStorageClass() == SC_None &&
@@ -3533,6 +3621,23 @@ void Sema::MergeVarDecl(VarDecl *New, LookupResult &Previous) {
     return New->setInvalidDecl();
   }
 
+  if (New->isInline() && !Old->getMostRecentDecl()->isInline()) {
+    if (VarDecl *Def = Old->getDefinition()) {
+      // C++1z [dcl.fcn.spec]p4:
+      //   If the definition of a variable appears in a translation unit before
+      //   its first declaration as inline, the program is ill-formed.
+      Diag(New->getLocation(), diag::err_inline_decl_follows_def) << New;
+      Diag(Def->getLocation(), diag::note_previous_definition);
+    }
+  }
+
+  // If this redeclaration makes the function inline, we may need to add it to
+  // UndefinedButUsed.
+  if (!Old->isInline() && New->isInline() && Old->isUsed(false) &&
+      !Old->getDefinition() && !New->isThisDeclarationADefinition())
+    UndefinedButUsed.insert(std::make_pair(Old->getCanonicalDecl(),
+                                           SourceLocation()));
+
   if (New->getTLSKind() != Old->getTLSKind()) {
     if (!Old->getTLSKind()) {
       Diag(New->getLocation(), diag::err_thread_non_thread) << New->getDeclName();
@@ -3564,6 +3669,12 @@ void Sema::MergeVarDecl(VarDecl *New, LookupResult &Previous) {
          New->getDeclContext()->isDependentContext())) {
       // The previous definition is hidden, and multiple definitions are
       // permitted (in separate TUs). Form another definition of it.
+    } else if (Old->isStaticDataMember() &&
+               Old->getCanonicalDecl()->isInline() &&
+               Old->getCanonicalDecl()->isConstexpr()) {
+      // This definition won't be a definition any more once it's been merged.
+      Diag(New->getLocation(),
+           diag::warn_deprecated_redundant_constexpr_static_def);
     } else {
       Diag(New->getLocation(), diag::err_redefinition) << New;
       Diag(Def->getLocation(), diag::note_previous_definition);
@@ -3592,13 +3703,18 @@ void Sema::MergeVarDecl(VarDecl *New, LookupResult &Previous) {
   New->setAccess(Old->getAccess());
   if (NewTemplate)
     NewTemplate->setAccess(New->getAccess());
+
+  if (Old->isInline())
+    New->setImplicitlyInline();
 }
 
 /// ParsedFreeStandingDeclSpec - This method is invoked when a declspec with
 /// no declarator (e.g. "struct foo;") is parsed.
-Decl *Sema::ParsedFreeStandingDeclSpec(Scope *S, AccessSpecifier AS,
-                                       DeclSpec &DS) {
-  return ParsedFreeStandingDeclSpec(S, AS, DS, MultiTemplateParamsArg());
+Decl *
+Sema::ParsedFreeStandingDeclSpec(Scope *S, AccessSpecifier AS, DeclSpec &DS,
+                                 RecordDecl *&AnonRecord) {
+  return ParsedFreeStandingDeclSpec(S, AS, DS, MultiTemplateParamsArg(), false,
+                                    AnonRecord);
 }
 
 // The MS ABI changed between VS2013 and VS2015 with regard to numbers used to
@@ -3704,10 +3820,11 @@ static unsigned GetDiagnosticTypeSpecifierID(DeclSpec::TST T) {
 /// ParsedFreeStandingDeclSpec - This method is invoked when a declspec with
 /// no declarator (e.g. "struct foo;") is parsed. It also accepts template
 /// parameters to cope with template friend declarations.
-Decl *Sema::ParsedFreeStandingDeclSpec(Scope *S, AccessSpecifier AS,
-                                       DeclSpec &DS,
-                                       MultiTemplateParamsArg TemplateParams,
-                                       bool IsExplicitInstantiation) {
+Decl *
+Sema::ParsedFreeStandingDeclSpec(Scope *S, AccessSpecifier AS, DeclSpec &DS,
+                                 MultiTemplateParamsArg TemplateParams,
+                                 bool IsExplicitInstantiation,
+                                 RecordDecl *&AnonRecord) {
   Decl *TagD = nullptr;
   TagDecl *Tag = nullptr;
   if (DS.getTypeSpecType() == DeclSpec::TST_class ||
@@ -3745,6 +3862,10 @@ Decl *Sema::ParsedFreeStandingDeclSpec(Scope *S, AccessSpecifier AS,
            << DS.getSourceRange();
   }
 
+  if (DS.isInlineSpecified())
+    Diag(DS.getInlineSpecLoc(), diag::err_inline_non_function)
+        << getLangOpts().CPlusPlus1z;
+
   if (DS.isConstexprSpecified()) {
     // C++0x [dcl.constexpr]p1: constexpr can only be applied to declarations
     // and definitions of functions and variables.
@@ -3802,9 +3923,19 @@ Decl *Sema::ParsedFreeStandingDeclSpec(Scope *S, AccessSpecifier AS,
     if (!Record->getDeclName() && Record->isCompleteDefinition() &&
         DS.getStorageClassSpec() != DeclSpec::SCS_typedef) {
       if (getLangOpts().CPlusPlus ||
-          Record->getDeclContext()->isRecord())
+          Record->getDeclContext()->isRecord()) {
+        // If CurContext is a DeclContext that can contain statements,
+        // RecursiveASTVisitor won't visit the decls that
+        // BuildAnonymousStructOrUnion() will put into CurContext.
+        // Also store them here so that they can be part of the
+        // DeclStmt that gets created in this case.
+        // FIXME: Also return the IndirectFieldDecls created by
+        // BuildAnonymousStructOr union, for the same reason?
+        if (CurContext->isFunctionOrMethod())
+          AnonRecord = Record;
         return BuildAnonymousStructOrUnion(S, DS, AS, Record,
                                            Context.getPrintingPolicy());
+      }
 
       DeclaresAnything = false;
     }
@@ -3926,6 +4057,8 @@ Decl *Sema::ParsedFreeStandingDeclSpec(Scope *S, AccessSpecifier AS,
     // Restrict is covered above.
     if (DS.getTypeQualifiers() & DeclSpec::TQ_atomic)
       Diag(DS.getAtomicSpecLoc(), DiagID) << "_Atomic";
+    if (DS.getTypeQualifiers() & DeclSpec::TQ_unaligned)
+      Diag(DS.getUnalignedSpecLoc(), DiagID) << "__unaligned";
   }
 
   // Warn about ignored type attributes, for example:
@@ -3992,12 +4125,10 @@ static bool CheckAnonMemberRedeclaration(Sema &SemaRef,
 ///
 /// This routine is recursive, injecting the names of nested anonymous
 /// structs/unions into the owning context and scope as well.
-static bool InjectAnonymousStructOrUnionMembers(Sema &SemaRef, Scope *S,
-                                         DeclContext *Owner,
-                                         RecordDecl *AnonRecord,
-                                         AccessSpecifier AS,
-                                         SmallVectorImpl<NamedDecl *> &Chaining,
-                                         bool MSAnonStruct) {
+static bool
+InjectAnonymousStructOrUnionMembers(Sema &SemaRef, Scope *S, DeclContext *Owner,
+                                    RecordDecl *AnonRecord, AccessSpecifier AS,
+                                    SmallVectorImpl<NamedDecl *> &Chaining) {
   bool Invalid = false;
 
   // Look every FieldDecl and IndirectFieldDecl with a name.
@@ -4033,7 +4164,7 @@ static bool InjectAnonymousStructOrUnionMembers(Sema &SemaRef, Scope *S,
 
         IndirectFieldDecl *IndirectField = IndirectFieldDecl::Create(
             SemaRef.Context, Owner, VD->getLocation(), VD->getIdentifier(),
-            VD->getType(), NamedChain, Chaining.size());
+            VD->getType(), {NamedChain, Chaining.size()});
 
         for (const auto *Attr : VD->attrs())
           IndirectField->addAttr(Attr->clone(SemaRef.Context));
@@ -4143,7 +4274,7 @@ Decl *Sema::BuildAnonymousStructOrUnion(Scope *S, DeclSpec &DS,
             cast<NamespaceDecl>(Owner)->getDeclName()))) {
         Diag(Record->getLocation(), diag::err_anonymous_union_not_static)
           << FixItHint::CreateInsertion(Record->getLocation(), "static ");
-  
+
         // Recover by adding 'static'.
         DS.SetStorageClassSpec(*this, DeclSpec::SCS_static, SourceLocation(),
                                PrevSpec, DiagID, Policy);
@@ -4156,9 +4287,9 @@ Decl *Sema::BuildAnonymousStructOrUnion(Scope *S, DeclSpec &DS,
         Diag(DS.getStorageClassSpecLoc(),
              diag::err_anonymous_union_with_storage_spec)
           << FixItHint::CreateRemoval(DS.getStorageClassSpecLoc());
-  
+
         // Recover by removing the storage specifier.
-        DS.SetStorageClassSpec(*this, DeclSpec::SCS_unspecified, 
+        DS.SetStorageClassSpec(*this, DeclSpec::SCS_unspecified,
                                SourceLocation(),
                                PrevSpec, DiagID, Context.getPrintingPolicy());
       }
@@ -4185,6 +4316,11 @@ Decl *Sema::BuildAnonymousStructOrUnion(Scope *S, DeclSpec &DS,
              diag::ext_anonymous_struct_union_qualified)
           << Record->isUnion() << "_Atomic"
           << FixItHint::CreateRemoval(DS.getAtomicSpecLoc());
+      if (DS.getTypeQualifiers() & DeclSpec::TQ_unaligned)
+        Diag(DS.getUnalignedSpecLoc(),
+             diag::ext_anonymous_struct_union_qualified)
+          << Record->isUnion() << "__unaligned"
+          << FixItHint::CreateRemoval(DS.getUnalignedSpecLoc());
 
       DS.ClearTypeQualifiers();
     }
@@ -4254,7 +4390,7 @@ Decl *Sema::BuildAnonymousStructOrUnion(Scope *S, DeclSpec &DS,
           DK = diag::err_anonymous_record_with_function;
         else if (isa<VarDecl>(Mem))
           DK = diag::err_anonymous_record_with_static;
-        
+
         // Visual C++ allows type definition in anonymous struct or union.
         if (getLangOpts().MicrosoftExt &&
             DK == diag::err_anonymous_record_with_type)
@@ -4340,8 +4476,7 @@ Decl *Sema::BuildAnonymousStructOrUnion(Scope *S, DeclSpec &DS,
   SmallVector<NamedDecl*, 2> Chain;
   Chain.push_back(Anon);
 
-  if (InjectAnonymousStructOrUnionMembers(*this, S, Owner, Record, AS,
-                                          Chain, false))
+  if (InjectAnonymousStructOrUnionMembers(*this, S, Owner, Record, AS, Chain))
     Invalid = true;
 
   if (VarDecl *NewVD = dyn_cast<VarDecl>(Anon)) {
@@ -4413,7 +4548,7 @@ Decl *Sema::BuildMicrosoftCAnonymousStruct(Scope *S, DeclSpec &DS,
   if (RequireCompleteType(Anon->getLocation(), RecTy,
                           diag::err_field_incomplete) ||
       InjectAnonymousStructOrUnionMembers(*this, S, CurContext, RecordDef,
-                                          AS_none, Chain, true)) {
+                                          AS_none, Chain)) {
     Anon->setInvalidDecl();
     ParentDecl->setInvalidDecl();
   }
@@ -4662,7 +4797,7 @@ Decl *Sema::ActOnDeclarator(Scope *S, Declarator &D) {
 }
 
 /// DiagnoseClassNameShadow - Implement C++ [class.mem]p13:
-///   If T is the name of a class, then each of the following shall have a 
+///   If T is the name of a class, then each of the following shall have a
 ///   name different from T:
 ///     - every static data member of class T;
 ///     - every member function of class T
@@ -4683,12 +4818,12 @@ bool Sema::DiagnoseClassNameShadow(DeclContext *DC,
   return false;
 }
 
-/// \brief Diagnose a declaration whose declarator-id has the given 
+/// \brief Diagnose a declaration whose declarator-id has the given
 /// nested-name-specifier.
 ///
 /// \param SS The nested-name-specifier of the declarator-id.
 ///
-/// \param DC The declaration context to which the nested-name-specifier 
+/// \param DC The declaration context to which the nested-name-specifier
 /// resolves.
 ///
 /// \param Name The name of the entity being declared.
@@ -4734,15 +4869,15 @@ bool Sema::diagnoseQualifiedDeclaration(CXXScopeSpec &SS, DeclContext *DC,
       Diag(Loc, diag::err_invalid_declarator_global_scope)
         << Name << SS.getRange();
     else if (isa<FunctionDecl>(Cur))
-      Diag(Loc, diag::err_invalid_declarator_in_function) 
+      Diag(Loc, diag::err_invalid_declarator_in_function)
         << Name << SS.getRange();
     else if (isa<BlockDecl>(Cur))
-      Diag(Loc, diag::err_invalid_declarator_in_block) 
+      Diag(Loc, diag::err_invalid_declarator_in_block)
         << Name << SS.getRange();
     else
       Diag(Loc, diag::err_invalid_declarator_scope)
       << Name << cast<NamedDecl>(Cur) << cast<NamedDecl>(DC) << SS.getRange();
-    
+
     return true;
   }
 
@@ -4751,7 +4886,7 @@ bool Sema::diagnoseQualifiedDeclaration(CXXScopeSpec &SS, DeclContext *DC,
     Diag(Loc, diag::err_member_qualification)
       << Name << SS.getRange();
     SS.clear();
-    
+
     // C++ constructors and destructors with incorrect scopes can break
     // our AST invariants by having the wrong underlying types. If
     // that's the case, then drop this declaration entirely.
@@ -4760,10 +4895,10 @@ bool Sema::diagnoseQualifiedDeclaration(CXXScopeSpec &SS, DeclContext *DC,
         !Context.hasSameType(Name.getCXXNameType(),
                              Context.getTypeDeclType(cast<CXXRecordDecl>(Cur))))
       return true;
-    
+
     return false;
   }
-  
+
   // C++11 [dcl.meaning]p1:
   //   [...] "The nested-name-specifier of the qualified declarator-id shall
   //   not begin with a decltype-specifer"
@@ -4805,7 +4940,7 @@ NamedDecl *Sema::HandleDeclarator(Scope *S, Declarator &D,
   if (D.getCXXScopeSpec().isInvalid())
     D.setInvalidType();
   else if (D.getCXXScopeSpec().isSet()) {
-    if (DiagnoseUnexpandedParameterPack(D.getCXXScopeSpec(), 
+    if (DiagnoseUnexpandedParameterPack(D.getCXXScopeSpec(),
                                         UPPC_DeclarationQualifier))
       return nullptr;
 
@@ -4824,7 +4959,7 @@ NamedDecl *Sema::HandleDeclarator(Scope *S, Declarator &D,
     }
     bool IsDependentContext = DC->isDependentContext();
 
-    if (!IsDependentContext && 
+    if (!IsDependentContext &&
         RequireCompleteDeclContext(D.getCXXScopeSpec(), DC))
       return nullptr;
 
@@ -4904,11 +5039,11 @@ NamedDecl *Sema::HandleDeclarator(Scope *S, Declarator &D,
     LookupQualifiedName(Previous, DC);
 
     // C++ [dcl.meaning]p1:
-    //   When the declarator-id is qualified, the declaration shall refer to a 
-    //  previously declared member of the class or namespace to which the 
+    //   When the declarator-id is qualified, the declaration shall refer to a
+    //  previously declared member of the class or namespace to which the
     //  qualifier refers (or, in the case of a namespace, of an element of the
     //  inline namespace set of that namespace (7.3.1)) or to a specialization
-    //  thereof; [...] 
+    //  thereof; [...]
     //
     // Note that we already checked the context above, and that we do not have
     // enough information to make sure that Previous contains the declaration
@@ -4924,10 +5059,10 @@ NamedDecl *Sema::HandleDeclarator(Scope *S, Declarator &D,
     // In this case, Previous will point to the overload set
     // containing the two f's declared in X, but neither of them
     // matches.
-    
+
     // C++ [dcl.meaning]p1:
-    //   [...] the member shall not merely have been introduced by a 
-    //   using-declaration in the scope of the class or namespace nominated by 
+    //   [...] the member shall not merely have been introduced by a
+    //   using-declaration in the scope of the class or namespace nominated by
     //   the nested-name-specifier of the declarator-id.
     RemoveUsingDecls(Previous);
   }
@@ -4995,10 +5130,9 @@ NamedDecl *Sema::HandleDeclarator(Scope *S, Declarator &D,
   if (!New)
     return nullptr;
 
-  // If this has an identifier and is not an invalid redeclaration or 
-  // function template specialization, add it to the scope stack.
-  if (New->getDeclName() && AddToScope &&
-       !(D.isRedeclaration() && New->isInvalidDecl())) {
+  // If this has an identifier and is not a function template specialization,
+  // add it to the scope stack.
+  if (New->getDeclName() && AddToScope) {
     // Only make a locally-scoped extern declaration visible if it is the first
     // declaration of this entity. Qualified lookup for such an entity should
     // only find this declaration if there is no visible declaration of it.
@@ -5008,6 +5142,9 @@ NamedDecl *Sema::HandleDeclarator(Scope *S, Declarator &D,
       CurContext->addHiddenDecl(New);
   }
 
+  if (isInOpenMPDeclareTargetContext())
+    checkDeclIsAllowedInOpenMPTarget(nullptr, New);
+
   return New;
 }
 
@@ -5024,10 +5161,10 @@ static QualType TryToFixInvalidVariablyModifiedType(QualType T,
   // constant expression folding, like struct {char x[(int)(char*)2];}
   SizeIsNegative = false;
   Oversized = 0;
-  
+
   if (T->isDependentType())
     return QualType();
-  
+
   QualifierCollector Qs;
   const Type *Ty = Qs.strip(T);
 
@@ -5076,7 +5213,7 @@ static QualType TryToFixInvalidVariablyModifiedType(QualType T,
     Oversized = Res;
     return QualType();
   }
-  
+
   return Context.getConstantArrayType(VLATy->getElementType(),
                                       Res, ArrayType::Normal, 0);
 }
@@ -5154,11 +5291,7 @@ NamedDecl *Sema::findLocallyScopedExternCDecl(DeclarationName Name) {
 /// does not identify a function.
 void Sema::DiagnoseFunctionSpecifiers(const DeclSpec &DS) {
   // FIXME: We should probably indicate the identifier in question to avoid
-  // confusion for constructs like "inline int a(), b;"
-  if (DS.isInlineSpecified())
-    Diag(DS.getInlineSpecLoc(),
-         diag::err_inline_non_function);
-
+  // confusion for constructs like "virtual int a(), b;"
   if (DS.isVirtualSpecified())
     Diag(DS.getVirtualSpecLoc(),
          diag::err_virtual_non_function);
@@ -5187,6 +5320,9 @@ Sema::ActOnTypedefDeclarator(Scope* S, Declarator& D, DeclContext* DC,
 
   DiagnoseFunctionSpecifiers(D.getDeclSpec());
 
+  if (D.getDeclSpec().isInlineSpecified())
+    Diag(D.getDeclSpec().getInlineSpecLoc(), diag::err_inline_non_function)
+        << getLangOpts().CPlusPlus1z;
   if (D.getDeclSpec().isConstexprSpecified())
     Diag(D.getDeclSpec().getConstexprSpecLoc(), diag::err_invalid_constexpr)
       << 1;
@@ -5241,7 +5377,7 @@ Sema::CheckTypedefForVariablyModifiedType(Scope *S, TypedefNameDecl *NewTD) {
         else if (T->isVariableArrayType())
           Diag(NewTD->getLocation(), diag::err_vla_decl_in_file_scope);
         else if (Oversized.getBoolValue())
-          Diag(NewTD->getLocation(), diag::err_array_too_large) 
+          Diag(NewTD->getLocation(), diag::err_array_too_large)
             << Oversized.toString(10);
         else
           Diag(NewTD->getLocation(), diag::err_vm_decl_in_file_scope);
@@ -5251,7 +5387,6 @@ Sema::CheckTypedefForVariablyModifiedType(Scope *S, TypedefNameDecl *NewTD) {
   }
 }
 
-
 /// ActOnTypedefNameDecl - Perform semantic checking for a declaration which
 /// declares a typedef-name, either using the 'typedef' type specifier or via
 /// a C++0x [dcl.typedef]p2 alias-declaration: 'using T = A;'.
@@ -5323,12 +5458,12 @@ isOutOfScopePreviousDeclaration(NamedDecl *PrevDecl, DeclContext *DC,
     if (!OuterContext->isFunctionOrMethod())
       // This rule only applies to block-scope declarations.
       return false;
-    
+
     DeclContext *PrevOuterContext = PrevDecl->getDeclContext();
     if (PrevOuterContext->isRecord())
       // We found a member function: ignore it.
       return false;
-    
+
     // Find the innermost enclosing namespace for the new and
     // previous declarations.
     OuterContext = OuterContext->getEnclosingNamespaceContext();
@@ -5379,7 +5514,7 @@ bool Sema::inferObjCARCLifetime(ValueDecl *decl) {
     type = Context.getLifetimeQualifiedType(type, lifetime);
     decl->setType(type);
   }
-  
+
   if (VarDecl *var = dyn_cast<VarDecl>(decl)) {
     // Thread-local variables cannot have lifetime.
     if (lifetime && lifetime != Qualifiers::OCL_ExplicitNone &&
@@ -5389,7 +5524,7 @@ bool Sema::inferObjCARCLifetime(ValueDecl *decl) {
       return true;
     }
   }
-  
+
   return false;
 }
 
@@ -5418,7 +5553,7 @@ static void checkAttributesAfterMerging(Sema &S, NamedDecl &ND) {
       if (const auto *Attr = VD->getAttr<AliasAttr>()) {
         assert(VD->isThisDeclarationADefinition() &&
                !VD->isExternallyVisible() && "Broken AliasAttr handled late!");
-        S.Diag(Attr->getLocation(), diag::err_alias_is_definition) << VD;
+        S.Diag(Attr->getLocation(), diag::err_alias_is_definition) << VD << 0;
         VD->dropAttr<AliasAttr>();
       }
     }
@@ -5458,9 +5593,13 @@ static void checkAttributesAfterMerging(Sema &S, NamedDecl &ND) {
 
 static void checkDLLAttributeRedeclaration(Sema &S, NamedDecl *OldDecl,
                                            NamedDecl *NewDecl,
-                                           bool IsSpecialization) {
-  if (TemplateDecl *OldTD = dyn_cast<TemplateDecl>(OldDecl))
+                                           bool IsSpecialization,
+                                           bool IsDefinition) {
+  if (TemplateDecl *OldTD = dyn_cast<TemplateDecl>(OldDecl)) {
     OldDecl = OldTD->getTemplatedDecl();
+    if (!IsSpecialization)
+      IsDefinition = false;
+  }
   if (TemplateDecl *NewTD = dyn_cast<TemplateDecl>(NewDecl))
     NewDecl = NewTD->getTemplatedDecl();
 
@@ -5516,14 +5655,17 @@ static void checkDLLAttributeRedeclaration(Sema &S, NamedDecl *OldDecl,
 
   // A redeclaration is not allowed to drop a dllimport attribute, the only
   // exceptions being inline function definitions, local extern declarations,
-  // and qualified friend declarations.
-  // NB: MSVC converts such a declaration to dllexport.
+  // qualified friend declarations or special MSVC extension: in the last case,
+  // the declaration is treated as if it were marked dllexport.
   bool IsInline = false, IsStaticDataMember = false, IsQualifiedFriend = false;
-  if (const auto *VD = dyn_cast<VarDecl>(NewDecl))
+  bool IsMicrosoft = S.Context.getTargetInfo().getCXXABI().isMicrosoft();
+  if (const auto *VD = dyn_cast<VarDecl>(NewDecl)) {
     // Ignore static data because out-of-line definitions are diagnosed
     // separately.
     IsStaticDataMember = VD->isStaticDataMember();
-  else if (const auto *FD = dyn_cast<FunctionDecl>(NewDecl)) {
+    IsDefinition = VD->isThisDeclarationADefinition(S.Context) !=
+                   VarDecl::DeclarationOnly;
+  } else if (const auto *FD = dyn_cast<FunctionDecl>(NewDecl)) {
     IsInline = FD->isInlined();
     IsQualifiedFriend = FD->getQualifier() &&
                         FD->getFriendObjectKind() == Decl::FOK_Declared;
@@ -5531,15 +5673,25 @@ static void checkDLLAttributeRedeclaration(Sema &S, NamedDecl *OldDecl,
 
   if (OldImportAttr && !HasNewAttr && !IsInline && !IsStaticDataMember &&
       !NewDecl->isLocalExternDecl() && !IsQualifiedFriend) {
-    S.Diag(NewDecl->getLocation(),
-           diag::warn_redeclaration_without_attribute_prev_attribute_ignored)
-      << NewDecl << OldImportAttr;
-    S.Diag(OldDecl->getLocation(), diag::note_previous_declaration);
-    S.Diag(OldImportAttr->getLocation(), diag::note_previous_attribute);
-    OldDecl->dropAttr<DLLImportAttr>();
-    NewDecl->dropAttr<DLLImportAttr>();
-  } else if (IsInline && OldImportAttr &&
-             !S.Context.getTargetInfo().getCXXABI().isMicrosoft()) {
+    if (IsMicrosoft && IsDefinition) {
+      S.Diag(NewDecl->getLocation(),
+             diag::warn_redeclaration_without_import_attribute)
+          << NewDecl;
+      S.Diag(OldDecl->getLocation(), diag::note_previous_declaration);
+      NewDecl->dropAttr<DLLImportAttr>();
+      NewDecl->addAttr(::new (S.Context) DLLExportAttr(
+          NewImportAttr->getRange(), S.Context,
+          NewImportAttr->getSpellingListIndex()));
+    } else {
+      S.Diag(NewDecl->getLocation(),
+             diag::warn_redeclaration_without_attribute_prev_attribute_ignored)
+          << NewDecl << OldImportAttr;
+      S.Diag(OldDecl->getLocation(), diag::note_previous_declaration);
+      S.Diag(OldImportAttr->getLocation(), diag::note_previous_attribute);
+      OldDecl->dropAttr<DLLImportAttr>();
+      NewDecl->dropAttr<DLLImportAttr>();
+    }
+  } else if (IsInline && OldImportAttr && !IsMicrosoft) {
     // In MinGW, seeing a function declared inline drops the dllimport attribute.
     OldDecl->dropAttr<DLLImportAttr>();
     NewDecl->dropAttr<DLLImportAttr>();
@@ -5605,10 +5757,9 @@ static bool isIncompleteDeclExternC(Sema &S, const T *D) {
     if (!D->isInExternCContext() || D->template hasAttr<OverloadableAttr>())
       return false;
 
-    // So do CUDA's host/device attributes if overloading is enabled.
-    if (S.getLangOpts().CUDA && S.getLangOpts().CUDATargetOverloads &&
-        (D->template hasAttr<CUDADeviceAttr>() ||
-         D->template hasAttr<CUDAHostAttr>()))
+    // So do CUDA's host/device attributes.
+    if (S.getLangOpts().CUDA && (D->template hasAttr<CUDADeviceAttr>() ||
+                                 D->template hasAttr<CUDAHostAttr>()))
       return false;
   }
   return D->isExternC();
@@ -5616,7 +5767,7 @@ static bool isIncompleteDeclExternC(Sema &S, const T *D) {
 
 static bool shouldConsiderLinkage(const VarDecl *VD) {
   const DeclContext *DC = VD->getDeclContext()->getRedeclContext();
-  if (DC->isFunctionOrMethod())
+  if (DC->isFunctionOrMethod() || isa<OMPDeclareReductionDecl>(DC))
     return VD->hasExternalStorage();
   if (DC->isFileContext())
     return true;
@@ -5627,7 +5778,8 @@ static bool shouldConsiderLinkage(const VarDecl *VD) {
 
 static bool shouldConsiderLinkage(const FunctionDecl *FD) {
   const DeclContext *DC = FD->getDeclContext()->getRedeclContext();
-  if (DC->isFileContext() || DC->isFunctionOrMethod())
+  if (DC->isFileContext() || DC->isFunctionOrMethod() ||
+      isa<OMPDeclareReductionDecl>(DC))
     return true;
   if (DC->isRecord())
     return false;
@@ -5701,6 +5853,17 @@ Sema::ActOnVariableDeclarator(Scope *S, Declarator &D, DeclContext *DC,
   QualType R = TInfo->getType();
   DeclarationName Name = GetNameForDeclarator(D).getName();
 
+  // OpenCL v2.0 s6.9.b - Image type can only be used as a function argument.
+  // OpenCL v2.0 s6.13.16.1 - Pipe type can only be used as a function
+  // argument.
+  if (getLangOpts().OpenCL && (R->isImageType() || R->isPipeType())) {
+    Diag(D.getIdentifierLoc(),
+         diag::err_opencl_type_can_only_be_used_as_function_parameter)
+        << R;
+    D.setInvalidType();
+    return nullptr;
+  }
+
   DeclSpec::SCS SCSpec = D.getDeclSpec().getStorageClassSpec();
   StorageClass SC = StorageClassSpecToVarDeclStorageClass(D.getDeclSpec());
 
@@ -5847,7 +6010,7 @@ Sema::ActOnVariableDeclarator(Scope *S, Declarator &D, DeclContext *DC,
       case SC_PrivateExtern:
         llvm_unreachable("C storage class in c++!");
       }
-    }    
+    }
 
     if (SC == SC_Static && CurContext->isRecord()) {
       if (const CXXRecordDecl *RD = dyn_cast<CXXRecordDecl>(DC)) {
@@ -5964,11 +6127,18 @@ Sema::ActOnVariableDeclarator(Scope *S, Declarator &D, DeclContext *DC,
       NewVD->setTemplateParameterListsInfo(
           Context, TemplateParamLists.drop_back(VDTemplateParamLists));
 
-    if (D.getDeclSpec().isConstexprSpecified())
+    if (D.getDeclSpec().isConstexprSpecified()) {
       NewVD->setConstexpr(true);
+      // C++1z [dcl.spec.constexpr]p1:
+      //   A static data member declared with the constexpr specifier is
+      //   implicitly an inline variable.
+      if (NewVD->isStaticDataMember() && getLangOpts().CPlusPlus1z)
+        NewVD->setImplicitlyInline();
+    }
 
     if (D.getDeclSpec().isConceptSpecified()) {
-      NewVD->setConcept(true);
+      if (VarTemplateDecl *VTD = NewVD->getDescribedVarTemplate())
+        VTD->setConcept();
 
       // C++ Concepts TS [dcl.spec.concept]p2: A concept definition shall not
       // be declared with the thread_local, inline, friend, or constexpr
@@ -5986,6 +6156,41 @@ Sema::ActOnVariableDeclarator(Scope *S, Declarator &D, DeclContext *DC,
             << 0 << 3;
         NewVD->setInvalidDecl(true);
       }
+
+      // C++ Concepts TS [dcl.spec.concept]p1: The concept specifier shall be
+      // applied only to the definition of a function template or variable
+      // template, declared in namespace scope.
+      if (IsVariableTemplateSpecialization) {
+        Diag(D.getDeclSpec().getConceptSpecLoc(),
+             diag::err_concept_specified_specialization)
+            << (IsPartialSpecialization ? 2 : 1);
+      }
+
+      // C++ Concepts TS [dcl.spec.concept]p6: A variable concept has the
+      // following restrictions:
+      // - The declared type shall have the type bool.
+      if (!Context.hasSameType(NewVD->getType(), Context.BoolTy) &&
+          !NewVD->isInvalidDecl()) {
+        Diag(D.getIdentifierLoc(), diag::err_variable_concept_bool_decl);
+        NewVD->setInvalidDecl(true);
+      }
+    }
+  }
+
+  if (D.getDeclSpec().isInlineSpecified()) {
+    if (!getLangOpts().CPlusPlus) {
+      Diag(D.getDeclSpec().getInlineSpecLoc(), diag::err_inline_non_function)
+          << 0;
+    } else if (CurContext->isFunctionOrMethod()) {
+      // 'inline' is not allowed on block scope variable declaration.
+      Diag(D.getDeclSpec().getInlineSpecLoc(),
+           diag::err_inline_declaration_block_scope) << Name
+        << FixItHint::CreateRemoval(D.getDeclSpec().getInlineSpecLoc());
+    } else {
+      Diag(D.getDeclSpec().getInlineSpecLoc(),
+           getLangOpts().CPlusPlus1z ? diag::warn_cxx14_compat_inline_variable
+                                     : diag::ext_inline_variable);
+      NewVD->setInlineSpecified();
     }
   }
 
@@ -6209,6 +6414,25 @@ Sema::ActOnVariableDeclarator(Scope *S, Declarator &D, DeclContext *DC,
     if (!IsVariableTemplateSpecialization)
       D.setRedeclaration(CheckVariableDeclaration(NewVD, Previous));
 
+    // C++ Concepts TS [dcl.spec.concept]p7: A program shall not declare [...]
+    // an explicit specialization (14.8.3) or a partial specialization of a
+    // concept definition.
+    if (IsVariableTemplateSpecialization &&
+        !D.getDeclSpec().isConceptSpecified() && !Previous.empty() &&
+        Previous.isSingleResult()) {
+      NamedDecl *PreviousDecl = Previous.getFoundDecl();
+      if (VarTemplateDecl *VarTmpl = dyn_cast<VarTemplateDecl>(PreviousDecl)) {
+        if (VarTmpl->isConcept()) {
+          Diag(NewVD->getLocation(), diag::err_concept_specialized)
+              << 1                            /*variable*/
+              << (IsPartialSpecialization ? 2 /*partially specialized*/
+                                          : 1 /*explicitly specialized*/);
+          Diag(VarTmpl->getLocation(), diag::note_previous_declaration);
+          NewVD->setInvalidDecl();
+        }
+      }
+    }
+
     if (NewTemplate) {
       VarTemplateDecl *PrevVarTemplate =
           NewVD->getPreviousDecl()
@@ -6274,7 +6498,7 @@ Sema::ActOnVariableDeclarator(Scope *S, Declarator &D, DeclContext *DC,
   if (D.isRedeclaration() && !Previous.empty()) {
     checkDLLAttributeRedeclaration(
         *this, dyn_cast<NamedDecl>(Previous.getRepresentativeDecl()), NewVD,
-        IsExplicitSpecialization);
+        IsExplicitSpecialization, D.isFunctionDefinition());
   }
 
   if (NewTemplate) {
@@ -6287,6 +6511,17 @@ Sema::ActOnVariableDeclarator(Scope *S, Declarator &D, DeclContext *DC,
   return NewVD;
 }
 
+/// Enum describing the %select options in diag::warn_decl_shadow.
+enum ShadowedDeclKind { SDK_Local, SDK_Global, SDK_StaticMember, SDK_Field };
+
+/// Determine what kind of declaration we're shadowing.
+static ShadowedDeclKind computeShadowedDeclKind(const NamedDecl *ShadowedDecl,
+                                                const DeclContext *OldDC) {
+  if (isa<RecordDecl>(OldDC))
+    return isa<FieldDecl>(ShadowedDecl) ? SDK_Field : SDK_StaticMember;
+  return OldDC->isFileContext() ? SDK_Global : SDK_Local;
+}
+
 /// \brief Diagnose variable or built-in function shadowing.  Implements
 /// -Wshadow.
 ///
@@ -6315,12 +6550,23 @@ void Sema::CheckShadow(Scope *S, VarDecl *D, const LookupResult& R) {
   if (!isa<VarDecl>(ShadowedDecl) && !isa<FieldDecl>(ShadowedDecl))
     return;
 
-  // Fields are not shadowed by variables in C++ static methods.
-  if (isa<FieldDecl>(ShadowedDecl))
+  if (FieldDecl *FD = dyn_cast<FieldDecl>(ShadowedDecl)) {
+    // Fields are not shadowed by variables in C++ static methods.
     if (CXXMethodDecl *MD = dyn_cast<CXXMethodDecl>(NewDC))
       if (MD->isStatic())
         return;
 
+    // Fields shadowed by constructor parameters are a special case. Usually
+    // the constructor initializes the field with the parameter.
+    if (isa<CXXConstructorDecl>(NewDC) && isa<ParmVarDecl>(D)) {
+      // Remember that this was shadowed so we can either warn about its
+      // modification or its existence depending on warning settings.
+      D = D->getCanonicalDecl();
+      ShadowingDecls.insert({D, FD});
+      return;
+    }
+  }
+
   if (VarDecl *shadowedVar = dyn_cast<VarDecl>(ShadowedDecl))
     if (shadowedVar->isExternC()) {
       // For shadowing external vars, make sure that we point to the global
@@ -6342,29 +6588,19 @@ void Sema::CheckShadow(Scope *S, VarDecl *D, const LookupResult& R) {
 
     // TODO: should we warn about static data members shadowing
     // static data members from base classes?
-    
+
     // TODO: don't diagnose for inaccessible shadowed members.
     // This is hard to do perfectly because we might friend the
     // shadowing context, but that's just a false negative.
   }
 
-  // Determine what kind of declaration we're shadowing.
-  unsigned Kind;
-  if (isa<RecordDecl>(OldDC)) {
-    if (isa<FieldDecl>(ShadowedDecl))
-      Kind = 3; // field
-    else
-      Kind = 2; // static data member
-  } else if (OldDC->isFileContext())
-    Kind = 1; // global
-  else
-    Kind = 0; // local
 
   DeclarationName Name = R.getLookupName();
 
   // Emit warning and note.
   if (getSourceManager().isInSystemMacro(R.getNameLoc()))
     return;
+  ShadowedDeclKind Kind = computeShadowedDeclKind(ShadowedDecl, OldDC);
   Diag(R.getNameLoc(), diag::warn_decl_shadow) << Name << Kind << OldDC;
   Diag(ShadowedDecl->getLocation(), diag::note_previous_declaration);
 }
@@ -6380,6 +6616,30 @@ void Sema::CheckShadow(Scope *S, VarDecl *D) {
   CheckShadow(S, D, R);
 }
 
+/// Check if 'E', which is an expression that is about to be modified, refers
+/// to a constructor parameter that shadows a field.
+void Sema::CheckShadowingDeclModification(Expr *E, SourceLocation Loc) {
+  // Quickly ignore expressions that can't be shadowing ctor parameters.
+  if (!getLangOpts().CPlusPlus || ShadowingDecls.empty())
+    return;
+  E = E->IgnoreParenImpCasts();
+  auto *DRE = dyn_cast<DeclRefExpr>(E);
+  if (!DRE)
+    return;
+  const NamedDecl *D = cast<NamedDecl>(DRE->getDecl()->getCanonicalDecl());
+  auto I = ShadowingDecls.find(D);
+  if (I == ShadowingDecls.end())
+    return;
+  const NamedDecl *ShadowedDecl = I->second;
+  const DeclContext *OldDC = ShadowedDecl->getDeclContext();
+  Diag(Loc, diag::warn_modifying_shadowing_decl) << D << OldDC;
+  Diag(D->getLocation(), diag::note_var_declared_here) << D;
+  Diag(ShadowedDecl->getLocation(), diag::note_previous_declaration);
+
+  // Avoid issuing multiple warnings about the same decl.
+  ShadowingDecls.erase(I);
+}
+
 /// Check for conflict between this global or extern "C" declaration and
 /// previous global or extern "C" declarations. This is only used in C++.
 template<typename T>
@@ -6530,7 +6790,7 @@ void Sema::CheckVariableDeclarationType(VarDecl *NewVD) {
     return;
   }
 
-  // OpenCL v1.2 s6.8 -- The static qualifier is valid only in program
+  // OpenCL v1.2 s6.8 - The static qualifier is valid only in program
   // scope.
   if (getLangOpts().OpenCLVersion == 120 &&
       !getOpenCLOptions().cl_clang_storage_class_specifiers &&
@@ -6540,40 +6800,64 @@ void Sema::CheckVariableDeclarationType(VarDecl *NewVD) {
     return;
   }
 
-  // OpenCL v1.2 s6.5 - All program scope variables must be declared in the
-  // __constant address space.
-  // OpenCL v2.0 s6.5.1 - Variables defined at program scope and static
-  // variables inside a function can also be declared in the global
-  // address space.
   if (getLangOpts().OpenCL) {
-    if (NewVD->isFileVarDecl()) {
+    // OpenCL v2.0 s6.12.5 - The __block storage type is not supported.
+    if (NewVD->hasAttr<BlocksAttr>()) {
+      Diag(NewVD->getLocation(), diag::err_opencl_block_storage_type);
+      return;
+    }
+
+    if (T->isBlockPointerType()) {
+      // OpenCL v2.0 s6.12.5 - Any block declaration must be const qualified and
+      // can't use 'extern' storage class.
+      if (!T.isConstQualified()) {
+        Diag(NewVD->getLocation(), diag::err_opencl_invalid_block_declaration)
+            << 0 /*const*/;
+        NewVD->setInvalidDecl();
+        return;
+      }
+      if (NewVD->hasExternalStorage()) {
+        Diag(NewVD->getLocation(), diag::err_opencl_extern_block_declaration);
+        NewVD->setInvalidDecl();
+        return;
+      }
+      // OpenCL v2.0 s6.12.5 - Blocks with variadic arguments are not supported.
+      // TODO: this check is not enough as it doesn't diagnose the typedef
+      const BlockPointerType *BlkTy = T->getAs<BlockPointerType>();
+      const FunctionProtoType *FTy =
+          BlkTy->getPointeeType()->getAs<FunctionProtoType>();
+      if (FTy && FTy->isVariadic()) {
+        Diag(NewVD->getLocation(), diag::err_opencl_block_proto_variadic)
+            << T << NewVD->getSourceRange();
+        NewVD->setInvalidDecl();
+        return;
+      }
+    }
+    // OpenCL v1.2 s6.5 - All program scope variables must be declared in the
+    // __constant address space.
+    // OpenCL v2.0 s6.5.1 - Variables defined at program scope and static
+    // variables inside a function can also be declared in the global
+    // address space.
+    if (NewVD->isFileVarDecl() || NewVD->isStaticLocal() ||
+        NewVD->hasExternalStorage()) {
       if (!T->isSamplerT() &&
           !(T.getAddressSpace() == LangAS::opencl_constant ||
             (T.getAddressSpace() == LangAS::opencl_global &&
              getLangOpts().OpenCLVersion == 200))) {
+        int Scope = NewVD->isStaticLocal() | NewVD->hasExternalStorage() << 1;
         if (getLangOpts().OpenCLVersion == 200)
           Diag(NewVD->getLocation(), diag::err_opencl_global_invalid_addr_space)
-              << "global or constant";
+              << Scope << "global or constant";
         else
           Diag(NewVD->getLocation(), diag::err_opencl_global_invalid_addr_space)
-              << "constant";
+              << Scope << "constant";
         NewVD->setInvalidDecl();
         return;
       }
     } else {
-      // OpenCL v2.0 s6.5.1 - Variables defined at program scope and static
-      // variables inside a function can also be declared in the global
-      // address space.
-      if (NewVD->isStaticLocal() &&
-          !(T.getAddressSpace() == LangAS::opencl_constant ||
-            (T.getAddressSpace() == LangAS::opencl_global &&
-             getLangOpts().OpenCLVersion == 200))) {
-        if (getLangOpts().OpenCLVersion == 200)
-          Diag(NewVD->getLocation(), diag::err_opencl_global_invalid_addr_space)
-              << "global or constant";
-        else
-          Diag(NewVD->getLocation(), diag::err_opencl_global_invalid_addr_space)
-              << "constant";
+      if (T.getAddressSpace() == LangAS::opencl_global) {
+        Diag(NewVD->getLocation(), diag::err_opencl_function_variable)
+            << 1 /*is any function*/ << "global";
         NewVD->setInvalidDecl();
         return;
       }
@@ -6584,11 +6868,11 @@ void Sema::CheckVariableDeclarationType(VarDecl *NewVD) {
         FunctionDecl *FD = getCurFunctionDecl();
         if (FD && !FD->hasAttr<OpenCLKernelAttr>()) {
           if (T.getAddressSpace() == LangAS::opencl_constant)
-            Diag(NewVD->getLocation(), diag::err_opencl_non_kernel_variable)
-                << "constant";
+            Diag(NewVD->getLocation(), diag::err_opencl_function_variable)
+                << 0 /*non-kernel only*/ << "constant";
           else
-            Diag(NewVD->getLocation(), diag::err_opencl_non_kernel_variable)
-                << "local";
+            Diag(NewVD->getLocation(), diag::err_opencl_function_variable)
+                << 0 /*non-kernel only*/ << "local";
           NewVD->setInvalidDecl();
           return;
         }
@@ -6605,7 +6889,7 @@ void Sema::CheckVariableDeclarationType(VarDecl *NewVD) {
       Diag(NewVD->getLocation(), diag::warn_attribute_weak_on_local);
     }
   }
-  
+
   bool isVM = T->isVariablyModifiedType();
   if (isVM || NewVD->hasAttr<CleanupAttr>() ||
       NewVD->hasAttr<BlocksAttr>())
@@ -6821,7 +7105,7 @@ namespace {
     MultiTemplateParamsArg TemplateParamLists;
     bool AddToScope;
   };
-}
+} // end anonymous namespace
 
 namespace {
 
@@ -6865,7 +7149,7 @@ class DifferentNameValidatorCCC : public CorrectionCandidateCallback {
   CXXRecordDecl *ExpectedParent;
 };
 
-}
+} // end anonymous namespace
 
 /// \brief Generate diagnostics for an invalid function redeclaration.
 ///
@@ -7072,9 +7356,9 @@ static FunctionDecl* CreateNewFunctionDecl(Sema &SemaRef, Declarator &D,
       (D.isFunctionDeclarator() && D.getFunctionTypeInfo().hasPrototype) ||
       (!isa<FunctionType>(R.getTypePtr()) && R->isFunctionProtoType());
 
-    NewFD = FunctionDecl::Create(SemaRef.Context, DC, 
-                                 D.getLocStart(), NameInfo, R, 
-                                 TInfo, SC, isInline, 
+    NewFD = FunctionDecl::Create(SemaRef.Context, DC,
+                                 D.getLocStart(), NameInfo, R,
+                                 TInfo, SC, isInline,
                                  HasPrototype, false);
     if (D.isInvalidType())
       NewFD->setInvalidDecl();
@@ -7483,8 +7767,8 @@ Sema::ActOnFunctionDeclarator(Scope *S, Declarator &D, DeclContext *DC,
           Diag(NewFD->getLocation(), diag::err_destructor_template);
           NewFD->setInvalidDecl();
         }
-        
-        // If we're adding a template to a dependent context, we may need to 
+
+        // If we're adding a template to a dependent context, we may need to
         // rebuilding some of the types used within the template parameter list,
         // now that we know what the current instantiation is.
         if (DC->isDependentContext()) {
@@ -7492,7 +7776,6 @@ Sema::ActOnFunctionDeclarator(Scope *S, Declarator &D, DeclContext *DC,
           if (RebuildTemplateParamsInCurrentInstantiation(TemplateParams))
             Invalid = true;
         }
-        
 
         FunctionTemplate = FunctionTemplateDecl::Create(Context, DC,
                                                         NewFD->getLocation(),
@@ -7561,7 +7844,7 @@ Sema::ActOnFunctionDeclarator(Scope *S, Declarator &D, DeclContext *DC,
              diag::err_virtual_non_function);
       } else if (!CurContext->isRecord()) {
         // 'virtual' was specified outside of the class.
-        Diag(D.getDeclSpec().getVirtualSpecLoc(), 
+        Diag(D.getDeclSpec().getVirtualSpecLoc(),
              diag::err_virtual_out_of_class)
           << FixItHint::CreateRemoval(D.getDeclSpec().getVirtualSpecLoc());
       } else if (NewFD->getDescribedFunctionTemplate()) {
@@ -7599,12 +7882,12 @@ Sema::ActOnFunctionDeclarator(Scope *S, Declarator &D, DeclContext *DC,
     }
 
     // C++ [dcl.fct.spec]p3:
-    //  The inline specifier shall not appear on a block scope function 
+    //  The inline specifier shall not appear on a block scope function
     //  declaration.
     if (isInline && !NewFD->isInvalidDecl()) {
       if (CurContext->isFunctionOrMethod()) {
         // 'inline' is not allowed on block scope function declaration.
-        Diag(D.getDeclSpec().getInlineSpecLoc(), 
+        Diag(D.getDeclSpec().getInlineSpecLoc(),
              diag::err_inline_declaration_block_scope) << Name
           << FixItHint::CreateRemoval(D.getDeclSpec().getInlineSpecLoc());
       }
@@ -7612,22 +7895,22 @@ Sema::ActOnFunctionDeclarator(Scope *S, Declarator &D, DeclContext *DC,
 
     // C++ [dcl.fct.spec]p6:
     //  The explicit specifier shall be used only in the declaration of a
-    //  constructor or conversion function within its class definition; 
+    //  constructor or conversion function within its class definition;
     //  see 12.3.1 and 12.3.2.
     if (isExplicit && !NewFD->isInvalidDecl()) {
       if (!CurContext->isRecord()) {
         // 'explicit' was specified outside of the class.
-        Diag(D.getDeclSpec().getExplicitSpecLoc(), 
+        Diag(D.getDeclSpec().getExplicitSpecLoc(),
              diag::err_explicit_out_of_class)
           << FixItHint::CreateRemoval(D.getDeclSpec().getExplicitSpecLoc());
-      } else if (!isa<CXXConstructorDecl>(NewFD) && 
+      } else if (!isa<CXXConstructorDecl>(NewFD) &&
                  !isa<CXXConversionDecl>(NewFD)) {
         // 'explicit' was specified on a function that wasn't a constructor
         // or conversion function.
         Diag(D.getDeclSpec().getExplicitSpecLoc(),
              diag::err_explicit_non_ctor_or_conv_function)
           << FixItHint::CreateRemoval(D.getDeclSpec().getExplicitSpecLoc());
-      }      
+      }
     }
 
     if (isConstexpr) {
@@ -7643,6 +7926,10 @@ Sema::ActOnFunctionDeclarator(Scope *S, Declarator &D, DeclContext *DC,
     }
 
     if (isConcept) {
+      // This is a function concept.
+      if (FunctionTemplateDecl *FTD = NewFD->getDescribedFunctionTemplate())
+        FTD->setConcept();
+
       // C++ Concepts TS [dcl.spec.concept]p1: The concept specifier shall be
       // applied only to the definition of a function template [...]
       if (!D.isFunctionDefinition()) {
@@ -7668,6 +7955,14 @@ Sema::ActOnFunctionDeclarator(Scope *S, Declarator &D, DeclContext *DC,
 
         // C++ Concepts TS [dcl.spec.concept]p5: A function concept has the
         // following restrictions:
+        // - The declared return type shall have the type bool.
+        if (!Context.hasSameType(FPT->getReturnType(), Context.BoolTy)) {
+          Diag(D.getIdentifierLoc(), diag::err_function_concept_bool_ret);
+          NewFD->setInvalidDecl();
+        }
+
+        // C++ Concepts TS [dcl.spec.concept]p5: A function concept has the
+        // following restrictions:
         // - The declaration's parameter list shall be equivalent to an empty
         //   parameter list.
         if (FPT->getNumParams() > 0 || FPT->isVariadic())
@@ -7701,6 +7996,16 @@ Sema::ActOnFunctionDeclarator(Scope *S, Declarator &D, DeclContext *DC,
             << 1 << 3;
         NewFD->setInvalidDecl(true);
       }
+
+      // C++ Concepts TS [dcl.spec.concept]p1: The concept specifier shall be
+      // applied only to the definition of a function template or variable
+      // template, declared in namespace scope.
+      if (isFunctionTemplateSpecialization) {
+        Diag(D.getDeclSpec().getConceptSpecLoc(),
+             diag::err_concept_specified_specialization) << 1;
+        NewFD->setInvalidDecl(true);
+        return NewFD;
+      }
     }
 
     // If __module_private__ was specified, mark the function accordingly.
@@ -7734,11 +8039,11 @@ Sema::ActOnFunctionDeclarator(Scope *S, Declarator &D, DeclContext *DC,
       case FDK_Declaration:
       case FDK_Definition:
         break;
-        
+
       case FDK_Defaulted:
         NewFD->setDefaulted();
         break;
-        
+
       case FDK_Deleted:
         NewFD->setDeletedAsWritten();
         break;
@@ -7747,7 +8052,7 @@ Sema::ActOnFunctionDeclarator(Scope *S, Declarator &D, DeclContext *DC,
     if (isa<CXXMethodDecl>(NewFD) && DC == CurContext &&
         D.isFunctionDefinition()) {
       // C++ [class.mfct]p2:
-      //   A member function may be defined (8.4) in its class definition, in 
+      //   A member function may be defined (8.4) in its class definition, in
       //   which case it is an inline member function (7.1.2)
       NewFD->setImplicitlyInline();
     }
@@ -7825,7 +8130,6 @@ Sema::ActOnFunctionDeclarator(Scope *S, Declarator &D, DeclContext *DC,
           NewFD->setInvalidDecl();
       }
     }
-
   } else if (const FunctionProtoType *FT = R->getAs<FunctionProtoType>()) {
     // When we're declaring a function with a typedef, typeof, etc as in the
     // following example, we'll need to synthesize (unnamed)
@@ -7890,6 +8194,9 @@ Sema::ActOnFunctionDeclarator(Scope *S, Declarator &D, DeclContext *DC,
   // Handle attributes.
   ProcessDeclAttributes(S, NewFD, D);
 
+  if (getLangOpts().CUDA)
+    maybeAddCUDAHostDeviceAttrs(S, NewFD, Previous);
+
   if (getLangOpts().OpenCL) {
     // OpenCL v1.1 s6.5: Using an address space qualifier in a function return
     // type declaration will generate a compilation error.
@@ -7952,7 +8259,7 @@ Sema::ActOnFunctionDeclarator(Scope *S, Declarator &D, DeclContext *DC,
            diag::ext_operator_new_delete_declared_inline)
         << NewFD->getDeclName();
 
-    // If the declarator is a template-id, translate the parser's template 
+    // If the declarator is a template-id, translate the parser's template
     // argument list into our AST format.
     if (D.getName().getKind() == UnqualifiedId::IK_TemplateId) {
       TemplateIdAnnotation *TemplateId = D.getName().TemplateId;
@@ -7962,9 +8269,9 @@ Sema::ActOnFunctionDeclarator(Scope *S, Declarator &D, DeclContext *DC,
                                          TemplateId->NumArgs);
       translateTemplateArguments(TemplateArgsPtr,
                                  TemplateArgs);
-    
+
       HasExplicitTemplateArgs = true;
-    
+
       if (NewFD->isInvalidDecl()) {
         HasExplicitTemplateArgs = false;
       } else if (FunctionTemplate) {
@@ -8000,7 +8307,7 @@ Sema::ActOnFunctionDeclarator(Scope *S, Declarator &D, DeclContext *DC,
     if (isFunctionTemplateSpecialization && isFriend &&
         (NewFD->getType()->isDependentType() || DC->isDependentContext() ||
          TemplateSpecializationType::anyDependentTemplateArguments(
-            TemplateArgs.getArgumentArray(), TemplateArgs.size(),
+            TemplateArgs,
             InstantiationDependent))) {
       assert(HasExplicitTemplateArgs &&
              "friend function specialization without template args");
@@ -8008,10 +8315,10 @@ Sema::ActOnFunctionDeclarator(Scope *S, Declarator &D, DeclContext *DC,
                                                        Previous))
         NewFD->setInvalidDecl();
     } else if (isFunctionTemplateSpecialization) {
-      if (CurContext->isDependentContext() && CurContext->isRecord() 
+      if (CurContext->isDependentContext() && CurContext->isRecord()
           && !isFriend) {
         isDependentClassScopeExplicitSpecialization = true;
-        Diag(NewFD->getLocation(), getLangOpts().MicrosoftExt ? 
+        Diag(NewFD->getLocation(), getLangOpts().MicrosoftExt ?
           diag::ext_function_specialization_in_class :
           diag::err_function_specialization_in_class)
           << NewFD->getDeclName();
@@ -8020,7 +8327,7 @@ Sema::ActOnFunctionDeclarator(Scope *S, Declarator &D, DeclContext *DC,
                                                            : nullptr),
                                                      Previous))
         NewFD->setInvalidDecl();
-      
+
       // C++ [dcl.stc]p1:
       //   A storage-class-specifier shall not be specified in an explicit
       //   specialization (14.7.3)
@@ -8033,14 +8340,13 @@ Sema::ActOnFunctionDeclarator(Scope *S, Declarator &D, DeclContext *DC,
             << SC
             << FixItHint::CreateRemoval(
                                       D.getDeclSpec().getStorageClassSpecLoc());
-            
+
         else
-          Diag(NewFD->getLocation(), 
+          Diag(NewFD->getLocation(),
                diag::ext_explicit_specialization_storage_class)
             << FixItHint::CreateRemoval(
                                       D.getDeclSpec().getStorageClassSpecLoc());
       }
-      
     } else if (isExplicitSpecialization && isa<CXXMethodDecl>(NewFD)) {
       if (CheckMemberSpecialization(NewFD, Previous))
           NewFD->setInvalidDecl();
@@ -8086,7 +8392,7 @@ Sema::ActOnFunctionDeclarator(Scope *S, Declarator &D, DeclContext *DC,
     // If we have a function template, check the template parameter
     // list. This will check and merge default template arguments.
     if (FunctionTemplate) {
-      FunctionTemplateDecl *PrevTemplate = 
+      FunctionTemplateDecl *PrevTemplate =
                                      FunctionTemplate->getPreviousDecl();
       CheckTemplateParameterList(FunctionTemplate->getTemplateParameters(),
                        PrevTemplate ? PrevTemplate->getTemplateParameters()
@@ -8095,8 +8401,8 @@ Sema::ActOnFunctionDeclarator(Scope *S, Declarator &D, DeclContext *DC,
                               ? (D.isFunctionDefinition()
                                    ? TPC_FriendFunctionTemplateDefinition
                                    : TPC_FriendFunctionTemplate)
-                              : (D.getCXXScopeSpec().isSet() && 
-                                 DC && DC->isRecord() && 
+                              : (D.getCXXScopeSpec().isSet() &&
+                                 DC && DC->isRecord() &&
                                  DC->isDependentContext())
                                   ? TPC_ClassTemplateMember
                                   : TPC_FunctionTemplate);
@@ -8159,7 +8465,6 @@ Sema::ActOnFunctionDeclarator(Scope *S, Declarator &D, DeclContext *DC,
           return Result;
         }
       }
-
     } else if (!D.isFunctionDefinition() &&
                isa<CXXMethodDecl>(NewFD) && NewFD->isOutOfLine() &&
                !isFriend && !isFunctionTemplateSpecialization &&
@@ -8168,8 +8473,8 @@ Sema::ActOnFunctionDeclarator(Scope *S, Declarator &D, DeclContext *DC,
       // definition (C++ [class.mfct]p2).
       // Note that this is not the case for explicit specializations of
       // function templates or member functions of class templates, per
-      // C++ [temp.expl.spec]p2. We also allow these declarations as an 
-      // extension for compatibility with old SWIG code which likes to 
+      // C++ [temp.expl.spec]p2. We also allow these declarations as an
+      // extension for compatibility with old SWIG code which likes to
       // generate them.
       Diag(NewFD->getLocation(), diag::ext_out_of_line_declaration)
         << D.getCXXScopeSpec().getRange();
@@ -8181,7 +8486,7 @@ Sema::ActOnFunctionDeclarator(Scope *S, Declarator &D, DeclContext *DC,
 
   AddKnownFunctionAttributes(NewFD);
 
-  if (NewFD->hasAttr<OverloadableAttr>() && 
+  if (NewFD->hasAttr<OverloadableAttr>() &&
       !NewFD->getType()->getAs<FunctionProtoType>()) {
     Diag(NewFD->getLocation(),
          diag::err_attribute_overloadable_no_prototype)
@@ -8224,7 +8529,30 @@ Sema::ActOnFunctionDeclarator(Scope *S, Declarator &D, DeclContext *DC,
   if (D.isRedeclaration() && !Previous.empty()) {
     checkDLLAttributeRedeclaration(
         *this, dyn_cast<NamedDecl>(Previous.getRepresentativeDecl()), NewFD,
-        isExplicitSpecialization || isFunctionTemplateSpecialization);
+        isExplicitSpecialization || isFunctionTemplateSpecialization,
+        D.isFunctionDefinition());
+  }
+
+  if (getLangOpts().CUDA) {
+    IdentifierInfo *II = NewFD->getIdentifier();
+    if (II && II->isStr("cudaConfigureCall") && !NewFD->isInvalidDecl() &&
+        NewFD->getDeclContext()->getRedeclContext()->isTranslationUnit()) {
+      if (!R->getAs<FunctionType>()->getReturnType()->isScalarType())
+        Diag(NewFD->getLocation(), diag::err_config_scalar_return);
+
+      Context.setcudaConfigureCallDecl(NewFD);
+    }
+
+    // Variadic functions, other than a *declaration* of printf, are not allowed
+    // in device-side CUDA code, unless someone passed
+    // -fcuda-allow-variadic-functions.
+    if (!getLangOpts().CUDAAllowVariadicFunctions && NewFD->isVariadic() &&
+        (NewFD->hasAttr<CUDADeviceAttr>() ||
+         NewFD->hasAttr<CUDAGlobalAttr>()) &&
+        !(II && II->isStr("printf") && NewFD->isExternC() &&
+          !D.isFunctionDefinition())) {
+      Diag(NewFD->getLocation(), diag::err_variadic_device_fn);
+    }
   }
 
   if (getLangOpts().CPlusPlus) {
@@ -8242,7 +8570,7 @@ Sema::ActOnFunctionDeclarator(Scope *S, Declarator &D, DeclContext *DC,
       Diag(D.getIdentifierLoc(), diag::err_static_kernel);
       D.setInvalidType();
     }
-    
+
     // OpenCL v1.2, s6.9 -- Kernels can only have return type void.
     if (!NewFD->getReturnType()->isVoidType()) {
       SourceRange RTRange = NewFD->getReturnTypeSourceRange();
@@ -8253,12 +8581,10 @@ Sema::ActOnFunctionDeclarator(Scope *S, Declarator &D, DeclContext *DC,
     }
 
     llvm::SmallPtrSet<const Type *, 16> ValidTypes;
-    for (auto Param : NewFD->params())
+    for (auto Param : NewFD->parameters())
       checkIsValidOpenCLKernelParameter(*this, D, Param, ValidTypes);
   }
-  for (FunctionDecl::param_iterator PI = NewFD->param_begin(),
-       PE = NewFD->param_end(); PI != PE; ++PI) {
-    ParmVarDecl *Param = *PI;
+  for (const ParmVarDecl *Param : NewFD->parameters()) {
     QualType PT = Param->getType();
 
     // OpenCL 2.0 pipe restrictions forbids pipe packet types to be non-value
@@ -8276,25 +8602,13 @@ Sema::ActOnFunctionDeclarator(Scope *S, Declarator &D, DeclContext *DC,
 
   MarkUnusedFileScopedDecl(NewFD);
 
-  if (getLangOpts().CUDA)
-    if (IdentifierInfo *II = NewFD->getIdentifier())
-      if (!NewFD->isInvalidDecl() &&
-          NewFD->getDeclContext()->getRedeclContext()->isTranslationUnit()) {
-        if (II->isStr("cudaConfigureCall")) {
-          if (!R->getAs<FunctionType>()->getReturnType()->isScalarType())
-            Diag(NewFD->getLocation(), diag::err_config_scalar_return);
-
-          Context.setcudaConfigureCallDecl(NewFD);
-        }
-      }
-  
   // Here we have an function template explicit specialization at class scope.
   // The actually specialization will be postponed to template instatiation
   // time via the ClassScopeFunctionSpecializationDecl node.
   if (isDependentClassScopeExplicitSpecialization) {
     ClassScopeFunctionSpecializationDecl *NewSpec =
                          ClassScopeFunctionSpecializationDecl::Create(
-                                Context, CurContext, SourceLocation(), 
+                                Context, CurContext, SourceLocation(),
                                 cast<CXXMethodDecl>(NewFD),
                                 HasExplicitTemplateArgs, TemplateArgs);
     CurContext->addDecl(NewSpec);
@@ -8464,20 +8778,28 @@ bool Sema::CheckFunctionDeclaration(Scope *S, FunctionDecl *NewFD,
       FunctionTemplateDecl *NewTemplateDecl
         = NewFD->getDescribedFunctionTemplate();
       assert(NewTemplateDecl && "Template/non-template mismatch");
-      if (CXXMethodDecl *Method 
+      if (CXXMethodDecl *Method
             = dyn_cast<CXXMethodDecl>(NewTemplateDecl->getTemplatedDecl())) {
         Method->setAccess(OldTemplateDecl->getAccess());
         NewTemplateDecl->setAccess(OldTemplateDecl->getAccess());
       }
-      
+
       // If this is an explicit specialization of a member that is a function
       // template, mark it as a member specialization.
-      if (IsExplicitSpecialization && 
+      if (IsExplicitSpecialization &&
           NewTemplateDecl->getInstantiatedFromMemberTemplate()) {
         NewTemplateDecl->setMemberSpecialization();
         assert(OldTemplateDecl->isMemberSpecialization());
+        // Explicit specializations of a member template do not inherit deleted
+        // status from the parent member template that they are specializing.
+        if (OldTemplateDecl->getTemplatedDecl()->isDeleted()) {
+          FunctionDecl *const OldTemplatedDecl =
+              OldTemplateDecl->getTemplatedDecl();
+          assert(OldTemplatedDecl->getCanonicalDecl() == OldTemplatedDecl);
+          OldTemplatedDecl->setDeletedAsWritten(false);
+        }
       }
-      
+
     } else {
       // This needs to happen first so that 'inline' propagates.
       NewFD->setPreviousDeclaration(cast<FunctionDecl>(OldDecl));
@@ -8493,11 +8815,11 @@ bool Sema::CheckFunctionDeclaration(Scope *S, FunctionDecl *NewFD,
     // C++-specific checks.
     if (CXXConstructorDecl *Constructor = dyn_cast<CXXConstructorDecl>(NewFD)) {
       CheckConstructor(Constructor);
-    } else if (CXXDestructorDecl *Destructor = 
+    } else if (CXXDestructorDecl *Destructor =
                 dyn_cast<CXXDestructorDecl>(NewFD)) {
       CXXRecordDecl *Record = Destructor->getParent();
       QualType ClassType = Context.getTypeDeclType(Record);
-      
+
       // FIXME: Shouldn't we be able to perform this check even when the class
       // type is dependent? Both gcc and edg can handle that.
       if (!ClassType->isDependentType()) {
@@ -8517,7 +8839,7 @@ bool Sema::CheckFunctionDeclaration(Scope *S, FunctionDecl *NewFD,
 
     // Find any virtual functions that this function overrides.
     if (CXXMethodDecl *Method = dyn_cast<CXXMethodDecl>(NewFD)) {
-      if (!Method->isFunctionTemplateSpecialization() && 
+      if (!Method->isFunctionTemplateSpecialization() &&
           !Method->getDescribedFunctionTemplate() &&
           Method->isCanonicalDecl()) {
         if (AddOverriddenMethods(Method->getParent(), Method)) {
@@ -8527,7 +8849,7 @@ bool Sema::CheckFunctionDeclaration(Scope *S, FunctionDecl *NewFD,
           }
         }
       }
-      
+
       if (Method->isStatic())
         checkThisInStaticMemberFunctionType(Method);
     }
@@ -8553,7 +8875,7 @@ bool Sema::CheckFunctionDeclaration(Scope *S, FunctionDecl *NewFD,
       CheckCXXDefaultArguments(NewFD);
 
     // If this function declares a builtin function, check the type of this
-    // declaration against the expected type for the builtin. 
+    // declaration against the expected type for the builtin.
     if (unsigned BuiltinID = NewFD->getBuiltinID()) {
       ASTContext::GetBuiltinTypeError Error;
       LookupPredefedObjCSuperType(*this, S, NewFD->getIdentifier());
@@ -8565,7 +8887,7 @@ bool Sema::CheckFunctionDeclaration(Scope *S, FunctionDecl *NewFD,
       }
     }
 
-    // If this function is declared as being extern "C", then check to see if 
+    // If this function is declared as being extern "C", then check to see if
     // the function returns a UDT (class, struct, or union type) that is not C
     // compatible, and if it does, warn the user.
     // But, issue any diagnostic on the first declaration only.
@@ -8591,11 +8913,11 @@ void Sema::CheckMain(FunctionDecl* FD, const DeclSpec& DS) {
   // static main is not an error under C99, but we should warn about it.
   // We accept _Noreturn main as an extension.
   if (FD->getStorageClass() == SC_Static)
-    Diag(DS.getStorageClassSpecLoc(), getLangOpts().CPlusPlus 
-         ? diag::err_static_main : diag::warn_static_main) 
+    Diag(DS.getStorageClassSpecLoc(), getLangOpts().CPlusPlus
+         ? diag::err_static_main : diag::warn_static_main)
       << FixItHint::CreateRemoval(DS.getStorageClassSpecLoc());
   if (FD->isInlineSpecified())
-    Diag(DS.getInlineSpecLoc(), diag::err_inline_main) 
+    Diag(DS.getInlineSpecLoc(), diag::err_inline_main)
       << FixItHint::CreateRemoval(DS.getInlineSpecLoc());
   if (DS.isNoreturnSpecified()) {
     SourceLocation NoreturnLoc = DS.getNoreturnSpecLoc();
@@ -8722,7 +9044,7 @@ void Sema::CheckMain(FunctionDecl* FD, const DeclSpec& DS) {
   if (nparams == 1 && !FD->isInvalidDecl()) {
     Diag(FD->getLocation(), diag::warn_main_one_arg);
   }
-  
+
   if (!FD->isInvalidDecl() && FD->getDescribedFunctionTemplate()) {
     Diag(FD->getLocation(), diag::err_mainlike_template_decl) << FD;
     FD->setInvalidDecl();
@@ -8779,6 +9101,7 @@ namespace {
 
     bool isInitList;
     llvm::SmallVector<unsigned, 4> InitFieldIndex;
+
   public:
     typedef EvaluatedExprVisitor<SelfReferenceChecker> Inherited;
 
@@ -9000,7 +9323,7 @@ namespace {
       Inherited::VisitUnaryOperator(E);
     }
 
-    void VisitObjCMessageExpr(ObjCMessageExpr *E) { return; }
+    void VisitObjCMessageExpr(ObjCMessageExpr *E) {}
 
     void VisitCXXConstructExpr(CXXConstructExpr *E) {
       if (E->getConstructor()->isCopyConstructor()) {
@@ -9096,7 +9419,7 @@ namespace {
 
     SelfReferenceChecker(S, OrigDecl).CheckExpr(E);
   }
-}
+} // end anonymous namespace
 
 QualType Sema::deduceVarTypeFromInitializer(VarDecl *VDecl,
                                             DeclarationName Name, QualType Type,
@@ -9292,9 +9615,10 @@ void Sema::AddInitializerToDecl(Decl *RealDecl, Expr *Init,
   }
 
   VarDecl *Def;
-  if ((Def = VDecl->getDefinition()) && Def != VDecl) {
+  if ((Def = VDecl->getDefinition()) && Def != VDecl &&
+      (!VDecl->isStaticDataMember() || VDecl->isOutOfLine())) {
     NamedDecl *Hidden = nullptr;
-    if (!hasVisibleDefinition(Def, &Hidden) && 
+    if (!hasVisibleDefinition(Def, &Hidden) &&
         (VDecl->getFormalLinkage() == InternalLinkage ||
          VDecl->getDescribedVarTemplate() ||
          VDecl->getNumTemplateParameterLists() ||
@@ -9330,7 +9654,7 @@ void Sema::AddInitializerToDecl(Decl *RealDecl, Expr *Init,
            diag::note_previous_initializer)
           << 0;
       return;
-    }  
+    }
 
     if (VDecl->hasLocalStorage())
       getCurFunction()->setHasBranchProtectedScope();
@@ -9352,7 +9676,7 @@ void Sema::AddInitializerToDecl(Decl *RealDecl, Expr *Init,
   // Get the decls type and save a reference for later, since
   // CheckInitializerTypes may change it.
   QualType DclT = VDecl->getType(), SavT = DclT;
-  
+
   // Expressions default to 'id' when we're in a debugger
   // and we are assigning it to a variable of Objective-C pointer type.
   if (getLangOpts().DebuggerCastResultToId && DclT->isObjCObjectPointerType() &&
@@ -9400,7 +9724,9 @@ void Sema::AddInitializerToDecl(Decl *RealDecl, Expr *Init,
     if (VDecl->isInvalidDecl())
       return;
 
-    InitializationSequence InitSeq(*this, Entity, Kind, Args);
+    InitializationSequence InitSeq(*this, Entity, Kind, Args,
+                                   /*TopLevelOfInitList=*/false,
+                                   /*TreatUnavailableAsInvalid=*/false);
     ExprResult Result = InitSeq.Perform(*this, Entity, Kind, Args, &DclT);
     if (Result.isInvalid()) {
       VDecl->setInvalidDecl();
@@ -9486,7 +9812,7 @@ void Sema::AddInitializerToDecl(Decl *RealDecl, Expr *Init,
              diag::ext_aggregate_init_not_constant)
           << Culprit->getSourceRange();
     }
-  } else if (VDecl->isStaticDataMember() &&
+  } else if (VDecl->isStaticDataMember() && !VDecl->isInline() &&
              VDecl->getLexicalDeclContext()->isRecord()) {
     // This is an in-class initialization for a static data member, e.g.,
     //
@@ -9500,8 +9826,8 @@ void Sema::AddInitializerToDecl(Decl *RealDecl, Expr *Init,
     //   const enumeration type, see 9.4.2.
     //
     // C++11 [class.static.data]p3:
-    //   If a non-volatile const static data member is of integral or
-    //   enumeration type, its declaration in the class definition can
+    //   If a non-volatile non-inline const static data member is of integral
+    //   or enumeration type, its declaration in the class definition can
     //   specify a brace-or-equal-initializer in which every initalizer-clause
     //   that is an assignment-expression is a constant expression. A static
     //   data member of literal type can be declared in the class definition
@@ -9641,7 +9967,7 @@ void Sema::ActOnInitializerError(Decl *D) {
   if (Ty->isDependentType()) return;
 
   // Require a complete type.
-  if (RequireCompleteType(VD->getLocation(), 
+  if (RequireCompleteType(VD->getLocation(),
                           Context.getBaseElementType(Ty),
                           diag::err_typecheck_decl_incomplete_type)) {
     VD->setInvalidDecl();
@@ -9684,23 +10010,32 @@ void Sema::ActOnUninitializedDecl(Decl *RealDecl,
     // the definition of a variable [...] or the declaration of a static data
     // member.
     if (Var->isConstexpr() && !Var->isThisDeclarationADefinition()) {
-      if (Var->isStaticDataMember())
-        Diag(Var->getLocation(),
-             diag::err_constexpr_static_mem_var_requires_init)
-          << Var->getDeclName();
-      else
+      if (Var->isStaticDataMember()) {
+        // C++1z removes the relevant rule; the in-class declaration is always
+        // a definition there.
+        if (!getLangOpts().CPlusPlus1z) {
+          Diag(Var->getLocation(),
+               diag::err_constexpr_static_mem_var_requires_init)
+            << Var->getDeclName();
+          Var->setInvalidDecl();
+          return;
+        }
+      } else {
         Diag(Var->getLocation(), diag::err_invalid_constexpr_var_decl);
-      Var->setInvalidDecl();
-      return;
+        Var->setInvalidDecl();
+        return;
+      }
     }
 
     // C++ Concepts TS [dcl.spec.concept]p1: [...]  A variable template
     // definition having the concept specifier is called a variable concept. A
     // concept definition refers to [...] a variable concept and its initializer.
-    if (Var->isConcept()) {
-      Diag(Var->getLocation(), diag::err_var_concept_not_initialized);
-      Var->setInvalidDecl();
-      return;
+    if (VarTemplateDecl *VTD = Var->getDescribedVarTemplate()) {
+      if (VTD->isConcept()) {
+        Diag(Var->getLocation(), diag::err_var_concept_not_initialized);
+        Var->setInvalidDecl();
+        return;
+      }
     }
 
     // OpenCL v1.1 s6.5.3: variables declared in the constant address space must
@@ -9720,17 +10055,17 @@ void Sema::ActOnUninitializedDecl(Decl *RealDecl,
 
       // We have an out-of-line definition of a static data member
       // that has an in-class initializer, so we type-check this like
-      // a declaration. 
+      // a declaration.
       //
       // Fall through
-      
+
     case VarDecl::DeclarationOnly:
-      // It's only a declaration. 
+      // It's only a declaration.
 
       // Block scope. C99 6.7p7: If an identifier for an object is
       // declared with no linkage (C99 6.2.2p6), the type for the
       // object shall be complete.
-      if (!Type->isDependentType() && Var->isLocalVarDecl() && 
+      if (!Type->isDependentType() && Var->isLocalVarDecl() &&
           !Var->hasLinkage() && !Var->isInvalidDecl() &&
           RequireCompleteType(Var->getLocation(), Type,
                               diag::err_typecheck_decl_incomplete_type))
@@ -9747,7 +10082,7 @@ void Sema::ActOnUninitializedDecl(Decl *RealDecl,
         Diag(Var->getLocation(), diag::warn_private_extern);
         Diag(Var->getLocation(), diag::note_private_extern);
       }
-        
+
       return;
 
     case VarDecl::TentativeDefinition:
@@ -9852,7 +10187,7 @@ void Sema::ActOnUninitializedDecl(Decl *RealDecl,
           getCurFunction()->setHasBranchProtectedScope();
       }
     }
-    
+
     // C++03 [dcl.init]p9:
     //   If no initializer is specified for an object, and the
     //   object is of (possibly cv-qualified) non-POD class type (or
@@ -9886,6 +10221,10 @@ void Sema::ActOnUninitializedDecl(Decl *RealDecl,
 }
 
 void Sema::ActOnCXXForRangeDecl(Decl *D) {
+  // If there is no declaration, there was an error parsing it. Ignore it.
+  if (!D)
+    return;
+
   VarDecl *VD = dyn_cast<VarDecl>(D);
   if (!VD) {
     Diag(D->getLocation(), diag::err_for_range_decl_must_be_var);
@@ -9957,6 +10296,18 @@ Sema::ActOnCXXForRangeIdentifier(Scope *S, SourceLocation IdentLoc,
 void Sema::CheckCompleteVariableDeclaration(VarDecl *var) {
   if (var->isInvalidDecl()) return;
 
+  if (getLangOpts().OpenCL) {
+    // OpenCL v2.0 s6.12.5 - Every block variable declaration must have an
+    // initialiser
+    if (var->getTypeSourceInfo()->getType()->isBlockPointerType() &&
+        !var->hasInit()) {
+      Diag(var->getLocation(), diag::err_opencl_invalid_block_declaration)
+          << 1 /*Init*/;
+      var->setInvalidDecl();
+      return;
+    }
+  }
+
   // In Objective-C, don't allow jumps past the implicit initialization of a
   // local retaining variable.
   if (getLangOpts().ObjC1 &&
@@ -10014,7 +10365,6 @@ void Sema::CheckCompleteVariableDeclaration(VarDecl *var) {
       if (getLangOpts().CPlusPlus11)
         Diag(var->getLocation(), diag::note_use_thread_local);
     }
-
   }
 
   // Apply section attributes and pragmas to global variables.
@@ -10165,15 +10515,63 @@ Sema::FinalizeDeclaration(Decl *ThisDecl) {
     }
   }
 
-  // Static locals inherit dll attributes from their function.
   if (VD->isStaticLocal()) {
     if (FunctionDecl *FD =
             dyn_cast_or_null<FunctionDecl>(VD->getParentFunctionOrMethod())) {
+      // Static locals inherit dll attributes from their function.
       if (Attr *A = getDLLAttr(FD)) {
         auto *NewAttr = cast<InheritableAttr>(A->clone(getASTContext()));
         NewAttr->setInherited(true);
         VD->addAttr(NewAttr);
       }
+      // CUDA E.2.9.4: Within the body of a __device__ or __global__
+      // function, only __shared__ variables may be declared with
+      // static storage class.
+      if (getLangOpts().CUDA && getLangOpts().CUDAIsDevice &&
+          (FD->hasAttr<CUDADeviceAttr>() || FD->hasAttr<CUDAGlobalAttr>()) &&
+          !VD->hasAttr<CUDASharedAttr>()) {
+        Diag(VD->getLocation(), diag::err_device_static_local_var);
+        VD->setInvalidDecl();
+      }
+    }
+  }
+
+  // Perform check for initializers of device-side global variables.
+  // CUDA allows empty constructors as initializers (see E.2.3.1, CUDA
+  // 7.5). We must also apply the same checks to all __shared__
+  // variables whether they are local or not. CUDA also allows
+  // constant initializers for __constant__ and __device__ variables.
+  if (getLangOpts().CUDA && getLangOpts().CUDAIsDevice) {
+    const Expr *Init = VD->getInit();
+    if (Init && VD->hasGlobalStorage() &&
+        (VD->hasAttr<CUDADeviceAttr>() || VD->hasAttr<CUDAConstantAttr>() ||
+         VD->hasAttr<CUDASharedAttr>())) {
+      assert((!VD->isStaticLocal() || VD->hasAttr<CUDASharedAttr>()));
+      bool AllowedInit = false;
+      if (const CXXConstructExpr *CE = dyn_cast<CXXConstructExpr>(Init))
+        AllowedInit =
+            isEmptyCudaConstructor(VD->getLocation(), CE->getConstructor());
+      // We'll allow constant initializers even if it's a non-empty
+      // constructor according to CUDA rules. This deviates from NVCC,
+      // but allows us to handle things like constexpr constructors.
+      if (!AllowedInit &&
+          (VD->hasAttr<CUDADeviceAttr>() || VD->hasAttr<CUDAConstantAttr>()))
+        AllowedInit = VD->getInit()->isConstantInitializer(
+            Context, VD->getType()->isReferenceType());
+
+      // Also make sure that destructor, if there is one, is empty.
+      if (AllowedInit)
+        if (CXXRecordDecl *RD = VD->getType()->getAsCXXRecordDecl())
+          AllowedInit =
+              isEmptyCudaDestructor(VD->getLocation(), RD->getDestructor());
+
+      if (!AllowedInit) {
+        Diag(VD->getLocation(), VD->hasAttr<CUDASharedAttr>()
+                                    ? diag::err_shared_var_init
+                                    : diag::err_dynamic_var_init)
+            << Init->getSourceRange();
+        VD->setInvalidDecl();
+      }
     }
   }
 
@@ -10416,6 +10814,9 @@ Decl *Sema::ActOnParamDeclarator(Scope *S, Declarator &D) {
   if (DeclSpec::TSCS TSCS = DS.getThreadStorageClassSpec())
     Diag(DS.getThreadStorageClassSpecLoc(), diag::err_invalid_thread)
       << DeclSpec::getSpecifierName(TSCS);
+  if (DS.isInlineSpecified())
+    Diag(DS.getInlineSpecLoc(), diag::err_inline_non_function)
+        << getLangOpts().CPlusPlus1z;
   if (DS.isConstexprSpecified())
     Diag(DS.getConstexprSpecLoc(), diag::err_invalid_constexpr)
       << 0;
@@ -10431,7 +10832,7 @@ Decl *Sema::ActOnParamDeclarator(Scope *S, Declarator &D) {
     // Check that there are no default arguments inside the type of this
     // parameter.
     CheckExtraCXXDefaultArguments(D);
-    
+
     // Parameter declarators cannot be qualified (C++ [dcl.meaning]p1).
     if (D.getCXXScopeSpec().isSet()) {
       Diag(D.getIdentifierLoc(), diag::err_qualified_param_declarator)
@@ -10491,7 +10892,7 @@ Decl *Sema::ActOnParamDeclarator(Scope *S, Declarator &D) {
   assert(S->getFunctionPrototypeDepth() >= 1);
   New->setScopeInfo(S->getFunctionPrototypeDepth() - 1,
                     S->getNextFunctionPrototypeIndex());
-  
+
   // Add the parameter declaration into this scope.
   S->AddDecl(New);
   if (II)
@@ -10526,26 +10927,23 @@ ParmVarDecl *Sema::BuildParmVarDeclForTypedef(DeclContext *DC,
   return Param;
 }
 
-void Sema::DiagnoseUnusedParameters(ParmVarDecl * const *Param,
-                                    ParmVarDecl * const *ParamEnd) {
+void Sema::DiagnoseUnusedParameters(ArrayRef<ParmVarDecl *> Parameters) {
   // Don't diagnose unused-parameter errors in template instantiations; we
   // will already have done so in the template itself.
   if (!ActiveTemplateInstantiations.empty())
     return;
 
-  for (; Param != ParamEnd; ++Param) {
-    if (!(*Param)->isReferenced() && (*Param)->getDeclName() &&
-        !(*Param)->hasAttr<UnusedAttr>()) {
-      Diag((*Param)->getLocation(), diag::warn_unused_parameter)
-        << (*Param)->getDeclName();
+  for (const ParmVarDecl *Parameter : Parameters) {
+    if (!Parameter->isReferenced() && Parameter->getDeclName() &&
+        !Parameter->hasAttr<UnusedAttr>()) {
+      Diag(Parameter->getLocation(), diag::warn_unused_parameter)
+        << Parameter->getDeclName();
     }
   }
 }
 
-void Sema::DiagnoseSizeOfParametersAndReturnValue(ParmVarDecl * const *Param,
-                                                  ParmVarDecl * const *ParamEnd,
-                                                  QualType ReturnTy,
-                                                  NamedDecl *D) {
+void Sema::DiagnoseSizeOfParametersAndReturnValue(
+    ArrayRef<ParmVarDecl *> Parameters, QualType ReturnTy, NamedDecl *D) {
   if (LangOpts.NumLargeByValueCopy == 0) // No check.
     return;
 
@@ -10560,14 +10958,14 @@ void Sema::DiagnoseSizeOfParametersAndReturnValue(ParmVarDecl * const *Param,
 
   // Warn if any parameter is pass-by-value and larger than the specified
   // threshold.
-  for (; Param != ParamEnd; ++Param) {
-    QualType T = (*Param)->getType();
+  for (const ParmVarDecl *Parameter : Parameters) {
+    QualType T = Parameter->getType();
     if (T->isDependentType() || !T.isPODType(Context))
       continue;
     unsigned Size = Context.getTypeSizeInChars(T).getQuantity();
     if (Size > LangOpts.NumLargeByValueCopy)
-      Diag((*Param)->getLocation(), diag::warn_parameter_size)
-          << (*Param)->getDeclName() << Size;
+      Diag(Parameter->getLocation(), diag::warn_parameter_size)
+          << Parameter->getDeclName() << Size;
   }
 }
 
@@ -10599,7 +10997,7 @@ ParmVarDecl *Sema::CheckParameter(DeclContext *DC, SourceLocation StartLoc,
   }
 
   ParmVarDecl *New = ParmVarDecl::Create(Context, DC, StartLoc, NameLoc, Name,
-                                         Context.getAdjustedParameterType(T), 
+                                         Context.getAdjustedParameterType(T),
                                          TSInfo, SC, nullptr);
 
   // Parameters can not be abstract class types.
@@ -10613,7 +11011,8 @@ ParmVarDecl *Sema::CheckParameter(DeclContext *DC, SourceLocation StartLoc,
   // Parameter declarators cannot be interface types. All ObjC objects are
   // passed by reference.
   if (T->isObjCObjectType()) {
-    SourceLocation TypeEndLoc = TSInfo->getTypeLoc().getLocEnd();
+    SourceLocation TypeEndLoc =
+        getLocForEndOfToken(TSInfo->getTypeLoc().getLocEnd());
     Diag(NameLoc,
          diag::err_object_cannot_be_passed_returned_by_value) << 1 << T
       << FixItHint::CreateInsertion(TypeEndLoc, "*");
@@ -10621,7 +11020,7 @@ ParmVarDecl *Sema::CheckParameter(DeclContext *DC, SourceLocation StartLoc,
     New->setType(T);
   }
 
-  // ISO/IEC TR 18037 S6.7.3: "The type of an object with automatic storage 
+  // ISO/IEC TR 18037 S6.7.3: "The type of an object with automatic storage
   // duration shall not be qualified by an address-space qualifier."
   // Since all parameters have automatic store duration, they can not have
   // an address space.
@@ -10632,7 +11031,7 @@ ParmVarDecl *Sema::CheckParameter(DeclContext *DC, SourceLocation StartLoc,
       Diag(NameLoc, diag::err_arg_with_address_space);
       New->setInvalidDecl();
     }
-  }   
+  }
 
   return New;
 }
@@ -10686,11 +11085,11 @@ Sema::ActOnStartOfFunctionDef(Scope *FnBodyScope, Declarator &D,
   return ActOnStartOfFunctionDef(FnBodyScope, DP, SkipBody);
 }
 
-void Sema::ActOnFinishInlineMethodDef(CXXMethodDecl *D) {
-  Consumer.HandleInlineMethodDefinition(D);
+void Sema::ActOnFinishInlineFunctionDef(FunctionDecl *D) {
+  Consumer.HandleInlineFunctionDefinition(D);
 }
 
-static bool ShouldWarnAboutMissingPrototype(const FunctionDecl *FD, 
+static bool ShouldWarnAboutMissingPrototype(const FunctionDecl *FD,
                              const FunctionDecl*& PossibleZeroParamPrototype) {
   // Don't warn about invalid declarations.
   if (FD->isInvalidDecl())
@@ -10786,11 +11185,10 @@ Sema::CheckForFunctionRedefinition(FunctionDecl *FD,
   FD->setInvalidDecl();
 }
 
-
-static void RebuildLambdaScopeInfo(CXXMethodDecl *CallOperator, 
+static void RebuildLambdaScopeInfo(CXXMethodDecl *CallOperator,
                                    Sema &S) {
   CXXRecordDecl *const LambdaClass = CallOperator->getParent();
-  
+
   LambdaScopeInfo *LSI = S.PushLambdaScope();
   LSI->CallOperator = CallOperator;
   LSI->Lambda = LambdaClass;
@@ -10804,12 +11202,12 @@ static void RebuildLambdaScopeInfo(CXXMethodDecl *CallOperator,
   else if (LCD == LCD_ByRef)
     LSI->ImpCaptureStyle = CapturingScopeInfo::ImpCap_LambdaByref;
   DeclarationNameInfo DNI = CallOperator->getNameInfo();
-    
-  LSI->IntroducerRange = DNI.getCXXOperatorNameRange(); 
+
+  LSI->IntroducerRange = DNI.getCXXOperatorNameRange();
   LSI->Mutable = !CallOperator->isConst();
 
   // Add the captures to the LSI so they can be noted as already
-  // captured within tryCaptureVar. 
+  // captured within tryCaptureVar.
   auto I = LambdaClass->field_begin();
   for (const auto &C : LambdaClass->captures()) {
     if (C.capturesVariable()) {
@@ -10818,15 +11216,16 @@ static void RebuildLambdaScopeInfo(CXXMethodDecl *CallOperator,
         S.CurrentInstantiationScope->InstantiatedLocal(VD, VD);
       QualType CaptureType = VD->getType();
       const bool ByRef = C.getCaptureKind() == LCK_ByRef;
-      LSI->addCapture(VD, /*IsBlock*/false, ByRef, 
+      LSI->addCapture(VD, /*IsBlock*/false, ByRef,
           /*RefersToEnclosingVariableOrCapture*/true, C.getLocation(),
-          /*EllipsisLoc*/C.isPackExpansion() 
+          /*EllipsisLoc*/C.isPackExpansion()
                          ? C.getEllipsisLoc() : SourceLocation(),
           CaptureType, /*Expr*/ nullptr);
 
     } else if (C.capturesThis()) {
-      LSI->addThisCapture(/*Nested*/ false, C.getLocation(), 
-                              S.getCurrentThisType(), /*Expr*/ nullptr);
+      LSI->addThisCapture(/*Nested*/ false, C.getLocation(),
+                              /*Expr*/ nullptr,
+                              C.getCaptureKind() == LCK_StarThis);
     } else {
       LSI->addVLATypeCapture(C.getLocation(), I->getType());
     }
@@ -10838,7 +11237,7 @@ Decl *Sema::ActOnStartOfFunctionDef(Scope *FnBodyScope, Decl *D,
                                     SkipBodyInfo *SkipBody) {
   // Clear the last template instantiation error context.
   LastTemplateInstantiationErrorContext = ActiveTemplateInstantiation();
-  
+
   if (!D)
     return D;
   FunctionDecl *FD = nullptr;
@@ -10859,16 +11258,16 @@ Decl *Sema::ActOnStartOfFunctionDef(Scope *FnBodyScope, Decl *D,
 
   // If we are instantiating a generic lambda call operator, push
   // a LambdaScopeInfo onto the function stack.  But use the information
-  // that's already been calculated (ActOnLambdaExpr) to prime the current 
-  // LambdaScopeInfo.  
+  // that's already been calculated (ActOnLambdaExpr) to prime the current
+  // LambdaScopeInfo.
   // When the template operator is being specialized, the LambdaScopeInfo,
   // has to be properly restored so that tryCaptureVariable doesn't try
   // and capture any new variables. In addition when calculating potential
-  // captures during transformation of nested lambdas, it is necessary to 
-  // have the LSI properly restored. 
+  // captures during transformation of nested lambdas, it is necessary to
+  // have the LSI properly restored.
   if (isGenericLambdaCallOperatorSpecialization(FD)) {
     assert(ActiveTemplateInstantiations.size() &&
-      "There should be an active template instantiation on the stack " 
+      "There should be an active template instantiation on the stack "
       "when instantiating a generic lambda!");
     RebuildLambdaScopeInfo(cast<CXXMethodDecl>(D), *this);
   }
@@ -10898,11 +11297,11 @@ Decl *Sema::ActOnStartOfFunctionDef(Scope *FnBodyScope, Decl *D,
     PushDeclContext(FnBodyScope, FD);
 
   // Check the validity of our function parameters
-  CheckParmsForFunctionDef(FD->param_begin(), FD->param_end(),
+  CheckParmsForFunctionDef(FD->parameters(),
                            /*CheckParameterNames=*/true);
 
   // Introduce our parameters into the function scope
-  for (auto Param : FD->params()) {
+  for (auto Param : FD->parameters()) {
     Param->setOwningFunction(FD);
 
     // If this has an identifier, add it to the scope stack.
@@ -10965,15 +11364,15 @@ Decl *Sema::ActOnStartOfFunctionDef(Scope *FnBodyScope, Decl *D,
       getCurLexicalContext()->getDeclKind() != Decl::ObjCCategoryImpl &&
       getCurLexicalContext()->getDeclKind() != Decl::ObjCImplementation)
     Diag(FD->getLocation(), diag::warn_function_def_in_objc_container);
-    
+
   return D;
 }
 
 /// \brief Given the set of return statements within a function body,
-/// compute the variables that are subject to the named return value 
+/// compute the variables that are subject to the named return value
 /// optimization.
 ///
-/// Each of the variables that is subject to the named return value 
+/// Each of the variables that is subject to the named return value
 /// optimization will be marked as NRVO variables in the AST, and any
 /// return statement that has a marked NRVO variable as its NRVO candidate can
 /// use the named return value optimization.
@@ -11033,7 +11432,7 @@ Decl *Sema::ActOnSkippedFunctionBody(Decl *Decl) {
     FD->setHasSkippedBody();
   else if (ObjCMethodDecl *MD = dyn_cast_or_null<ObjCMethodDecl>(Decl))
     MD->setHasSkippedBody();
-  return ActOnFinishFunctionBody(Decl, nullptr);
+  return Decl;
 }
 
 Decl *Sema::ActOnFinishFunctionBody(Decl *D, Stmt *BodyArg) {
@@ -11053,22 +11452,26 @@ Decl *Sema::ActOnFinishFunctionBody(Decl *dcl, Stmt *Body,
   if (FD) {
     FD->setBody(Body);
 
-    if (getLangOpts().CPlusPlus14 && !FD->isInvalidDecl() && Body &&
-        !FD->isDependentContext() && FD->getReturnType()->isUndeducedType()) {
-      // If the function has a deduced result type but contains no 'return'
-      // statements, the result type as written must be exactly 'auto', and
-      // the deduced result type is 'void'.
-      if (!FD->getReturnType()->getAs<AutoType>()) {
-        Diag(dcl->getLocation(), diag::err_auto_fn_no_return_but_not_auto)
-            << FD->getReturnType();
-        FD->setInvalidDecl();
-      } else {
-        // Substitute 'void' for the 'auto' in the type.
-        TypeLoc ResultType = getReturnTypeLoc(FD);
-        Context.adjustDeducedFunctionResultType(
-            FD, SubstAutoType(ResultType.getType(), Context.VoidTy));
+    if (getLangOpts().CPlusPlus14) {
+      if (!FD->isInvalidDecl() && Body && !FD->isDependentContext() &&
+          FD->getReturnType()->isUndeducedType()) {
+        // If the function has a deduced result type but contains no 'return'
+        // statements, the result type as written must be exactly 'auto', and
+        // the deduced result type is 'void'.
+        if (!FD->getReturnType()->getAs<AutoType>()) {
+          Diag(dcl->getLocation(), diag::err_auto_fn_no_return_but_not_auto)
+              << FD->getReturnType();
+          FD->setInvalidDecl();
+        } else {
+          // Substitute 'void' for the 'auto' in the type.
+          TypeLoc ResultType = getReturnTypeLoc(FD);
+          Context.adjustDeducedFunctionResultType(
+              FD, SubstAutoType(ResultType.getType(), Context.VoidTy));
+        }
       }
     } else if (getLangOpts().CPlusPlus11 && isLambdaCallOperator(FD)) {
+      // In C++11, we don't use 'auto' deduction rules for lambda call
+      // operators because we don't support return type deduction.
       auto *LSI = getCurLambda();
       if (LSI->HasImplicitReturnType) {
         deduceClosureReturnType(*LSI);
@@ -11112,8 +11515,8 @@ Decl *Sema::ActOnFinishFunctionBody(Decl *dcl, Stmt *Body,
     if (!FD->isInvalidDecl()) {
       // Don't diagnose unused parameters of defaulted or deleted functions.
       if (!FD->isDeleted() && !FD->isDefaulted())
-        DiagnoseUnusedParameters(FD->param_begin(), FD->param_end());
-      DiagnoseSizeOfParametersAndReturnValue(FD->param_begin(), FD->param_end(),
+        DiagnoseUnusedParameters(FD->parameters());
+      DiagnoseSizeOfParametersAndReturnValue(FD->parameters(),
                                              FD->getReturnType(), FD);
 
       // If this is a structor, we need a vtable.
@@ -11121,7 +11524,7 @@ Decl *Sema::ActOnFinishFunctionBody(Decl *dcl, Stmt *Body,
         MarkVTableUsed(FD->getLocation(), Constructor->getParent());
       else if (CXXDestructorDecl *Destructor = dyn_cast<CXXDestructorDecl>(FD))
         MarkVTableUsed(FD->getLocation(), Destructor->getParent());
-      
+
       // Try to apply the named return value optimization. We have to check
       // if we can do this here because lambdas keep return statements around
       // to deduce an implicit return type.
@@ -11184,8 +11587,8 @@ Decl *Sema::ActOnFinishFunctionBody(Decl *dcl, Stmt *Body,
     assert(MD == getCurMethodDecl() && "Method parsing confused");
     MD->setBody(Body);
     if (!MD->isInvalidDecl()) {
-      DiagnoseUnusedParameters(MD->param_begin(), MD->param_end());
-      DiagnoseSizeOfParametersAndReturnValue(MD->param_begin(), MD->param_end(),
+      DiagnoseUnusedParameters(MD->parameters());
+      DiagnoseSizeOfParametersAndReturnValue(MD->parameters(),
                                              MD->getReturnType(), MD);
 
       if (Body)
@@ -11245,7 +11648,7 @@ Decl *Sema::ActOnFinishFunctionBody(Decl *dcl, Stmt *Body,
     // Verify this.
     if (FD && isa<CXXConstructorDecl>(FD) && isa<CXXTryStmt>(Body))
       DiagnoseReturnInConstructorExceptionHandler(cast<CXXTryStmt>(Body));
-    
+
     // Verify that gotos and switch cases don't jump into scopes illegally.
     if (getCurFunction()->NeedsScopeChecking() &&
         !PP.isCodeCompletionEnabled())
@@ -11258,7 +11661,7 @@ Decl *Sema::ActOnFinishFunctionBody(Decl *dcl, Stmt *Body,
       MarkBaseAndMemberDestructorsReferenced(Destructor->getLocation(),
                                              Destructor->getParent());
     }
-    
+
     // If any errors have occurred, clear out any temporaries that may have
     // been leftover. This ensures that these temporaries won't be picked up for
     // deletion in some later function.
@@ -11292,11 +11695,11 @@ Decl *Sema::ActOnFinishFunctionBody(Decl *dcl, Stmt *Body,
     assert(ExprCleanupObjects.size() ==
                ExprEvalContexts.back().NumCleanupObjects &&
            "Leftover temporaries in function");
-    assert(!ExprNeedsCleanups && "Unaccounted cleanups in function");
+    assert(!Cleanup.exprNeedsCleanups() && "Unaccounted cleanups in function");
     assert(MaybeODRUseExprs.empty() &&
            "Leftover expressions for odr-use checking");
   }
-  
+
   if (!IsInstantiation)
     PopDeclContext();
 
@@ -11311,7 +11714,6 @@ Decl *Sema::ActOnFinishFunctionBody(Decl *dcl, Stmt *Body,
   return dcl;
 }
 
-
 /// When we finish delayed parsing of an attribute, we must attach it to the
 /// relevant Decl.
 void Sema::ActOnFinishDelayedAttribute(Scope *S, Decl *D,
@@ -11319,14 +11721,13 @@ void Sema::ActOnFinishDelayedAttribute(Scope *S, Decl *D,
   // Always attach attributes to the underlying decl.
   if (TemplateDecl *TD = dyn_cast<TemplateDecl>(D))
     D = TD->getTemplatedDecl();
-  ProcessDeclAttributeList(S, D, Attrs.getList());  
-  
+  ProcessDeclAttributeList(S, D, Attrs.getList());
+
   if (CXXMethodDecl *Method = dyn_cast_or_null<CXXMethodDecl>(D))
     if (Method->isStatic())
       checkThisInStaticMemberFunctionAttributes(Method);
 }
 
-
 /// ImplicitlyDefineFunction - An undeclared identifier was used in a function
 /// call, forming a call to an implicitly defined function (per C99 6.5.1p2).
 NamedDecl *Sema::ImplicitlyDefineFunction(SourceLocation Loc,
@@ -11473,14 +11874,15 @@ void Sema::AddKnownFunctionAttributes(FunctionDecl *FD) {
                                          FD->getLocation()));
     if (Context.BuiltinInfo.isNoThrow(BuiltinID) && !FD->hasAttr<NoThrowAttr>())
       FD->addAttr(NoThrowAttr::CreateImplicit(Context, FD->getLocation()));
+    if (Context.BuiltinInfo.isPure(BuiltinID) && !FD->hasAttr<PureAttr>())
+      FD->addAttr(PureAttr::CreateImplicit(Context, FD->getLocation()));
     if (Context.BuiltinInfo.isConst(BuiltinID) && !FD->hasAttr<ConstAttr>())
       FD->addAttr(ConstAttr::CreateImplicit(Context, FD->getLocation()));
-    if (getLangOpts().CUDA && getLangOpts().CUDATargetOverloads &&
-        Context.BuiltinInfo.isTSBuiltin(BuiltinID) &&
+    if (getLangOpts().CUDA && Context.BuiltinInfo.isTSBuiltin(BuiltinID) &&
         !FD->hasAttr<CUDADeviceAttr>() && !FD->hasAttr<CUDAHostAttr>()) {
-      // Assign appropriate attribute depending on CUDA compilation
-      // mode and the target builtin belongs to. E.g. during host
-      // compilation, aux builtins are __device__, the rest are __host__.
+      // Add the appropriate attribute, depending on the CUDA compilation mode
+      // and which target the builtin belongs to. For example, during host
+      // compilation, aux builtins are __device__, while the rest are __host__.
       if (getLangOpts().CUDAIsDevice !=
           Context.BuiltinInfo.isAuxBuiltinID(BuiltinID))
         FD->addAttr(CUDADeviceAttr::CreateImplicit(Context, FD->getLocation()));
@@ -11489,6 +11891,16 @@ void Sema::AddKnownFunctionAttributes(FunctionDecl *FD) {
     }
   }
 
+  // If C++ exceptions are enabled but we are told extern "C" functions cannot
+  // throw, add an implicit nothrow attribute to any extern "C" function we come
+  // across.
+  if (getLangOpts().CXXExceptions && getLangOpts().ExternCNoUnwind &&
+      FD->isExternC() && !FD->hasAttr<NoThrowAttr>()) {
+    const auto *FPT = FD->getType()->getAs<FunctionProtoType>();
+    if (!FPT || FPT->getExceptionSpecType() == EST_None)
+      FD->addAttr(NoThrowAttr::CreateImplicit(Context, FD->getLocation()));
+  }
+
   IdentifierInfo *Name = FD->getIdentifier();
   if (!Name)
     return;
@@ -11543,7 +11955,7 @@ TypedefDecl *Sema::ParseTypedefDecl(Scope *S, Declarator &D, QualType T,
     NewTD->setInvalidDecl();
     return NewTD;
   }
-  
+
   if (D.getDeclSpec().isModulePrivateSpecified()) {
     if (CurContext->isFunctionOrMethod())
       Diag(NewTD->getLocation(), diag::err_module_private_local)
@@ -11553,7 +11965,7 @@ TypedefDecl *Sema::ParseTypedefDecl(Scope *S, Declarator &D, QualType T,
     else
       NewTD->setModulePrivate();
   }
-  
+
   // C++ [dcl.typedef]p8:
   //   If the typedef declaration defines an unnamed class (or
   //   enum), the first typedef-name declared by the declaration
@@ -11578,7 +11990,6 @@ TypedefDecl *Sema::ParseTypedefDecl(Scope *S, Declarator &D, QualType T,
   return NewTD;
 }
 
-
 /// \brief Check that this is a valid underlying type for an enum declaration.
 bool Sema::CheckEnumUnderlyingType(TypeSourceInfo *TI) {
   SourceLocation UnderlyingLoc = TI->getTypeLoc().getBeginLoc();
@@ -12006,7 +12417,7 @@ Decl *Sema::ActOnTag(Scope *S, unsigned TagSpec, TagUseKind TUK,
       }
 
       // A tag 'foo::bar' must already exist.
-      Diag(NameLoc, diag::err_not_tag_in_scope) 
+      Diag(NameLoc, diag::err_not_tag_in_scope)
         << Kind << Name << DC << SS.getRange();
       Name = nullptr;
       Invalid = true;
@@ -12030,12 +12441,13 @@ Decl *Sema::ActOnTag(Scope *S, unsigned TagSpec, TagUseKind TUK,
 
     // When declaring or defining a tag, ignore ambiguities introduced
     // by types using'ed into this scope.
-    if (Previous.isAmbiguous() && 
+    if (Previous.isAmbiguous() &&
         (TUK == TUK_Definition || TUK == TUK_Declaration)) {
       LookupResult::Filter F = Previous.makeFilter();
       while (F.hasNext()) {
         NamedDecl *ND = F.next();
-        if (ND->getDeclContext()->getRedeclContext() != SearchDC)
+        if (!ND->getDeclContext()->getRedeclContext()->Equals(
+                SearchDC->getRedeclContext()))
           F.erase();
       }
       F.done();
@@ -12106,10 +12518,10 @@ Decl *Sema::ActOnTag(Scope *S, unsigned TagSpec, TagUseKind TUK,
       DC->Equals(getStdNamespace()) && Name->isStr("bad_alloc")) {
     // This is a declaration of or a reference to "std::bad_alloc".
     isStdBadAlloc = true;
-    
+
     if (Previous.empty() && StdBadAlloc) {
       // std::bad_alloc has been implicitly declared (but made invisible to
-      // name lookup). Fill in this implicit declaration as the previous 
+      // name lookup). Fill in this implicit declaration as the previous
       // declaration, so that the declarations get chained appropriately.
       Previous.addDecl(getStdBadAlloc());
     }
@@ -12422,7 +12834,6 @@ Decl *Sema::ActOnTag(Scope *S, unsigned TagSpec, TagUseKind TUK,
       // is non-NULL, it's a definition of the tag declared by
       // PrevDecl. If it's NULL, we have a new definition.
 
-
     // Otherwise, PrevDecl is not a tag, but was found with tag
     // lookup.  This is only actually possible in C++, where a few
     // things like templates still live in the tag namespace.
@@ -12523,8 +12934,8 @@ CreateNewDecl:
         else if (getLangOpts().CPlusPlus)
           DiagID = diag::err_forward_ref_enum;
         Diag(Loc, DiagID);
-        
-        // If this is a forward-declared reference to an enumeration, make a 
+
+        // If this is a forward-declared reference to an enumeration, make a
         // note of it; we won't actually be introducing the declaration into
         // the declaration context.
         if (TUK == TUK_Reference)
@@ -12540,7 +12951,6 @@ CreateNewDecl:
         ED->setIntegerType(QualType(EnumUnderlying.get<const Type*>(), 0));
       ED->setPromotionType(ED->getIntegerType());
     }
-
   } else {
     // struct/union/class
 
@@ -12569,10 +12979,10 @@ CreateNewDecl:
   // Maybe add qualifier info.
   if (SS.isNotEmpty()) {
     if (SS.isSet()) {
-      // If this is either a declaration or a definition, check the 
+      // If this is either a declaration or a definition, check the
       // nested-name-specifier against the current context. We don't do this
       // for explicit specializations, because they have similar checking
-      // (with more specific diagnostics) in the call to 
+      // (with more specific diagnostics) in the call to
       // CheckMemberSpecialization, below.
       if (!isExplicitSpecialization &&
           (TUK == TUK_Definition || TUK == TUK_Declaration) &&
@@ -12681,7 +13091,6 @@ CreateNewDecl:
     PushOnScopeChains(New, S, !IsForwardReference);
     if (IsForwardReference)
       SearchDC->makeDeclVisibleInContext(New);
-
   } else {
     CurContext->addDecl(New);
   }
@@ -12709,7 +13118,7 @@ CreateNewDecl:
 void Sema::ActOnTagStartDefinition(Scope *S, Decl *TagD) {
   AdjustDeclIfTemplate(TagD);
   TagDecl *Tag = cast<TagDecl>(TagD);
-  
+
   // Enter the tag context.
   PushDeclContext(S, Tag);
 
@@ -12721,7 +13130,7 @@ void Sema::ActOnTagStartDefinition(Scope *S, Decl *TagD) {
 }
 
 Decl *Sema::ActOnObjCContainerStartDefinition(Decl *IDecl) {
-  assert(isa<ObjCContainerDecl>(IDecl) && 
+  assert(isa<ObjCContainerDecl>(IDecl) &&
          "ActOnObjCContainerStartDefinition - Not ObjCContainerDecl");
   DeclContext *OCD = cast<DeclContext>(IDecl);
   assert(getContainingDC(OCD) == CurContext &&
@@ -12768,10 +13177,10 @@ void Sema::ActOnStartCXXMemberDeclarations(Scope *S, Decl *TagD,
 }
 
 void Sema::ActOnTagFinishDefinition(Scope *S, Decl *TagD,
-                                    SourceLocation RBraceLoc) {
+                                    SourceRange BraceRange) {
   AdjustDeclIfTemplate(TagD);
   TagDecl *Tag = cast<TagDecl>(TagD);
-  Tag->setRBraceLoc(RBraceLoc);
+  Tag->setBraceRange(BraceRange);
 
   // Make sure we "complete" the definition even it is invalid.
   if (Tag->isBeingDefined()) {
@@ -12826,7 +13235,7 @@ void Sema::ActOnTagDefinitionError(Scope *S, Decl *TagD) {
   // ActOnStartCXXMemberDeclarations, so we don't have to mess with
   // the FieldCollector.
 
-  PopDeclContext();  
+  PopDeclContext();
 }
 
 // Note that FieldName may be null for anonymous bitfields.
@@ -12961,15 +13370,19 @@ FieldDecl *Sema::HandleField(Scope *S, RecordDecl *Record,
     D.setInvalidType();
   }
 
-  // OpenCL 1.2 spec, s6.9 r:
-  // The event type cannot be used to declare a structure or union field.
-  if (LangOpts.OpenCL && T->isEventT()) {
-    Diag(Loc, diag::err_event_t_struct_field);
+  // OpenCL v1.2 s6.9b,r & OpenCL v2.0 s6.12.5 - The following types cannot be
+  // used as structure or union field: image, sampler, event or block types.
+  if (LangOpts.OpenCL && (T->isEventT() || T->isImageType() ||
+                          T->isSamplerT() || T->isBlockPointerType())) {
+    Diag(Loc, diag::err_opencl_type_struct_or_union_field) << T;
     D.setInvalidType();
   }
 
   DiagnoseFunctionSpecifiers(D.getDeclSpec());
 
+  if (D.getDeclSpec().isInlineSpecified())
+    Diag(D.getDeclSpec().getInlineSpecLoc(), diag::err_inline_non_function)
+        << getLangOpts().CPlusPlus1z;
   if (DeclSpec::TSCS TSCS = D.getDeclSpec().getThreadStorageClassSpec())
     Diag(D.getDeclSpec().getThreadStorageClassSpecLoc(),
          diag::err_invalid_thread)
@@ -12984,11 +13397,11 @@ FieldDecl *Sema::HandleField(Scope *S, RecordDecl *Record,
     case LookupResult::FoundUnresolvedValue:
       PrevDecl = Previous.getAsSingle<NamedDecl>();
       break;
-      
+
     case LookupResult::FoundOverloaded:
       PrevDecl = Previous.getRepresentativeDecl();
       break;
-      
+
     case LookupResult::NotFound:
     case LookupResult::NotFoundInCurrentInstantiation:
     case LookupResult::Ambiguous:
@@ -13018,7 +13431,7 @@ FieldDecl *Sema::HandleField(Scope *S, RecordDecl *Record,
 
   if (D.getDeclSpec().isModulePrivateSpecified())
     NewFD->setModulePrivate();
-  
+
   if (NewFD->isInvalidDecl() && PrevDecl) {
     // Don't introduce NewFD into scope; there's already something
     // with the same name in the same scope.
@@ -13250,9 +13663,9 @@ bool Sema::CheckNontrivialField(FieldDecl *FD) {
         if (!getLangOpts().CPlusPlus11 &&
             getLangOpts().ObjCAutoRefCount && RDecl->hasObjectMember()) {
           // Objective-C++ ARC: it is an error to have a non-trivial field of
-          // a union. However, system headers in Objective-C programs 
+          // a union. However, system headers in Objective-C programs
           // occasionally have Objective-C lifetime objects within unions,
-          // and rather than cause the program to fail, we make those 
+          // and rather than cause the program to fail, we make those
           // members unavailable.
           SourceLocation Loc = FD->getLocation();
           if (getSourceManager().isInSystemHeader(Loc)) {
@@ -13348,7 +13761,7 @@ Decl *Sema::ActOnIvar(Scope *S,
     else
       EnclosingContext = EnclosingDecl;
   } else {
-    if (ObjCCategoryDecl *CDecl = 
+    if (ObjCCategoryDecl *CDecl =
         dyn_cast<ObjCCategoryDecl>(EnclosingDecl)) {
       if (LangOpts.ObjCRuntime.isFragile() || !CDecl->IsClassExtension()) {
         Diag(Loc, diag::err_misplaced_ivar) << CDecl->IsClassExtension();
@@ -13386,33 +13799,33 @@ Decl *Sema::ActOnIvar(Scope *S,
 
   if (D.getDeclSpec().isModulePrivateSpecified())
     NewID->setModulePrivate();
-  
+
   if (II) {
     // FIXME: When interfaces are DeclContexts, we'll need to add
     // these to the interface.
     S->AddDecl(NewID);
     IdResolver.AddDecl(NewID);
   }
-  
+
   if (LangOpts.ObjCRuntime.isNonFragile() &&
       !NewID->isInvalidDecl() && isa<ObjCInterfaceDecl>(EnclosingDecl))
     Diag(Loc, diag::warn_ivars_in_interface);
-  
+
   return NewID;
 }
 
-/// ActOnLastBitfield - This routine handles synthesized bitfields rules for 
-/// class and class extensions. For every class \@interface and class 
-/// extension \@interface, if the last ivar is a bitfield of any type, 
+/// ActOnLastBitfield - This routine handles synthesized bitfields rules for
+/// class and class extensions. For every class \@interface and class
+/// extension \@interface, if the last ivar is a bitfield of any type,
 /// then add an implicit `char :0` ivar to the end of that interface.
 void Sema::ActOnLastBitfield(SourceLocation DeclLoc,
                              SmallVectorImpl<Decl *> &AllIvarDecls) {
   if (LangOpts.ObjCRuntime.isFragile() || AllIvarDecls.empty())
     return;
-  
+
   Decl *ivarDecl = AllIvarDecls[AllIvarDecls.size()-1];
   ObjCIvarDecl *Ivar = cast<ObjCIvarDecl>(ivarDecl);
-  
+
   if (!Ivar->isBitField() || Ivar->getBitWidthValue(Context) == 0)
     return;
   ObjCInterfaceDecl *ID = dyn_cast<ObjCInterfaceDecl>(CurContext);
@@ -13431,7 +13844,7 @@ void Sema::ActOnLastBitfield(SourceLocation DeclLoc,
 
   Ivar = ObjCIvarDecl::Create(Context, cast<ObjCContainerDecl>(CurContext),
                               DeclLoc, DeclLoc, nullptr,
-                              Context.CharTy, 
+                              Context.CharTy,
                               Context.getTrivialTypeSourceInfo(Context.CharTy,
                                                                DeclLoc),
                               ObjCIvarDecl::Private, BW,
@@ -13460,7 +13873,7 @@ void Sema::ActOnFields(Scope *S, SourceLocation RecLoc, Decl *EnclosingDecl,
       break;
     }
   }
-  
+
   RecordDecl *Record = dyn_cast<RecordDecl>(EnclosingDecl);
 
   // Start counting up the number of named members; make sure to include
@@ -13514,7 +13927,7 @@ void Sema::ActOnFields(Scope *S, SourceLocation RecLoc, Decl *EnclosingDecl,
       FD->setInvalidDecl();
       EnclosingDecl->setInvalidDecl();
       continue;
-    } else if (FDTy->isIncompleteArrayType() && Record && 
+    } else if (FDTy->isIncompleteArrayType() && Record &&
                ((i + 1 == Fields.end() && !Record->isUnion()) ||
                 ((getLangOpts().MicrosoftExt ||
                   getLangOpts().CPlusPlus) &&
@@ -13530,14 +13943,12 @@ void Sema::ActOnFields(Scope *S, SourceLocation RecLoc, Decl *EnclosingDecl,
                      : getLangOpts().CPlusPlus
                            ? diag::ext_flexible_array_union_gnu
                            : diag::err_flexible_array_union;
-      else if (Fields.size() == 1)
+      else if (NumNamedMembers < 1)
         DiagID = getLangOpts().MicrosoftExt
                      ? diag::ext_flexible_array_empty_aggregate_ms
                      : getLangOpts().CPlusPlus
                            ? diag::ext_flexible_array_empty_aggregate_gnu
-                           : NumNamedMembers < 1
-                                 ? diag::err_flexible_array_empty_aggregate
-                                 : 0;
+                           : diag::err_flexible_array_empty_aggregate;
 
       if (DiagID)
         Diag(FD->getLocation(), DiagID) << FD->getDeclName()
@@ -13631,7 +14042,7 @@ void Sema::ActOnFields(Scope *S, SourceLocation RecLoc, Decl *EnclosingDecl,
                           UnavailableAttr::IR_ARCFieldWithOwnership, loc));
           }
         } else {
-          Diag(FD->getLocation(), diag::err_arc_objc_object_in_tag) 
+          Diag(FD->getLocation(), diag::err_arc_objc_object_in_tag)
             << T->isBlockPointerType() << Record->getTagKind();
         }
         ARCErrReported = true;
@@ -13644,7 +14055,7 @@ void Sema::ActOnFields(Scope *S, SourceLocation RecLoc, Decl *EnclosingDecl,
         Record->setHasObjectMember(true);
       else if (Context.getAsArrayType(FD->getType())) {
         QualType BaseType = Context.getBaseElementType(FD->getType());
-        if (BaseType->isRecordType() && 
+        if (BaseType->isRecordType() &&
             BaseType->getAs<RecordType>()->getDecl()->hasObjectMember())
           Record->setHasObjectMember(true);
         else if (BaseType->isObjCObjectPointerType() ||
@@ -13669,51 +14080,53 @@ void Sema::ActOnFields(Scope *S, SourceLocation RecLoc, Decl *EnclosingDecl,
                I = CXXRecord->conversion_begin(),
                E = CXXRecord->conversion_end(); I != E; ++I)
           I.setAccess((*I)->getAccess());
-        
-        if (!CXXRecord->isDependentType()) {
-          if (CXXRecord->hasUserDeclaredDestructor()) {
-            // Adjust user-defined destructor exception spec.
-            if (getLangOpts().CPlusPlus11)
-              AdjustDestructorExceptionSpec(CXXRecord,
-                                            CXXRecord->getDestructor());
-          }
+      }
 
+      if (!CXXRecord->isDependentType()) {
+        if (CXXRecord->hasUserDeclaredDestructor()) {
+          // Adjust user-defined destructor exception spec.
+          if (getLangOpts().CPlusPlus11)
+            AdjustDestructorExceptionSpec(CXXRecord,
+                                          CXXRecord->getDestructor());
+        }
+
+        if (!CXXRecord->isInvalidDecl()) {
           // Add any implicitly-declared members to this class.
           AddImplicitlyDeclaredMembersToClass(CXXRecord);
 
-          // If we have virtual base classes, we may end up finding multiple 
-          // final overriders for a given virtual function. Check for this 
+          // If we have virtual base classes, we may end up finding multiple
+          // final overriders for a given virtual function. Check for this
           // problem now.
           if (CXXRecord->getNumVBases()) {
             CXXFinalOverriderMap FinalOverriders;
             CXXRecord->getFinalOverriders(FinalOverriders);
-            
-            for (CXXFinalOverriderMap::iterator M = FinalOverriders.begin(), 
+
+            for (CXXFinalOverriderMap::iterator M = FinalOverriders.begin(),
                                              MEnd = FinalOverriders.end();
                  M != MEnd; ++M) {
-              for (OverridingMethods::iterator SO = M->second.begin(), 
+              for (OverridingMethods::iterator SO = M->second.begin(),
                                             SOEnd = M->second.end();
                    SO != SOEnd; ++SO) {
-                assert(SO->second.size() > 0 && 
+                assert(SO->second.size() > 0 &&
                        "Virtual function without overridding functions?");
                 if (SO->second.size() == 1)
                   continue;
-                
+
                 // C++ [class.virtual]p2:
                 //   In a derived class, if a virtual member function of a base
                 //   class subobject has more than one final overrider the
                 //   program is ill-formed.
                 Diag(Record->getLocation(), diag::err_multiple_final_overriders)
                   << (const NamedDecl *)M->first << Record;
-                Diag(M->first->getLocation(), 
+                Diag(M->first->getLocation(),
                      diag::note_overridden_virtual_function);
-                for (OverridingMethods::overriding_iterator 
-                          OM = SO->second.begin(), 
+                for (OverridingMethods::overriding_iterator
+                          OM = SO->second.begin(),
                        OMEnd = SO->second.end();
                      OM != OMEnd; ++OM)
                   Diag(OM->Method->getLocation(), diag::note_final_overrider)
                     << (const NamedDecl *)M->first << OM->Method->getParent();
-                
+
                 Record->setInvalidDecl();
               }
             }
@@ -13723,7 +14136,7 @@ void Sema::ActOnFields(Scope *S, SourceLocation RecLoc, Decl *EnclosingDecl,
         }
       }
     }
-    
+
     if (!Completed)
       Record->completeDefinition();
 
@@ -13812,7 +14225,7 @@ void Sema::ActOnFields(Scope *S, SourceLocation RecLoc, Decl *EnclosingDecl,
       CheckImplementationIvars(IMPDecl, ClsFields, RecFields.size(), RBrac);
       IMPDecl->setIvarLBraceLoc(LBrac);
       IMPDecl->setIvarRBraceLoc(RBrac);
-    } else if (ObjCCategoryDecl *CDecl = 
+    } else if (ObjCCategoryDecl *CDecl =
                 dyn_cast<ObjCCategoryDecl>(EnclosingDecl)) {
       // case of ivars in class extension; all other cases have been
       // reported as errors elsewhere.
@@ -13823,18 +14236,18 @@ void Sema::ActOnFields(Scope *S, SourceLocation RecLoc, Decl *EnclosingDecl,
       ObjCInterfaceDecl *IDecl = CDecl->getClassInterface();
       for (unsigned i = 0, e = RecFields.size(); i != e; ++i) {
         if (IDecl) {
-          if (const ObjCIvarDecl *ClsIvar = 
+          if (const ObjCIvarDecl *ClsIvar =
               IDecl->getIvarDecl(ClsFields[i]->getIdentifier())) {
-            Diag(ClsFields[i]->getLocation(), 
-                 diag::err_duplicate_ivar_declaration); 
+            Diag(ClsFields[i]->getLocation(),
+                 diag::err_duplicate_ivar_declaration);
             Diag(ClsIvar->getLocation(), diag::note_previous_definition);
             continue;
           }
           for (const auto *Ext : IDecl->known_extensions()) {
             if (const ObjCIvarDecl *ClsExtIvar
                   = Ext->getIvarDecl(ClsFields[i]->getIdentifier())) {
-              Diag(ClsFields[i]->getLocation(), 
-                   diag::err_duplicate_ivar_declaration); 
+              Diag(ClsFields[i]->getLocation(),
+                   diag::err_duplicate_ivar_declaration);
               Diag(ClsExtIvar->getLocation(), diag::note_previous_definition);
               continue;
             }
@@ -13859,37 +14272,37 @@ static bool isRepresentableIntegerValue(ASTContext &Context,
                                         QualType T) {
   assert(T->isIntegralType(Context) && "Integral type required!");
   unsigned BitWidth = Context.getIntWidth(T);
-  
+
   if (Value.isUnsigned() || Value.isNonNegative()) {
-    if (T->isSignedIntegerOrEnumerationType()) 
+    if (T->isSignedIntegerOrEnumerationType())
       --BitWidth;
     return Value.getActiveBits() <= BitWidth;
-  }  
+  }
   return Value.getMinSignedBits() <= BitWidth;
 }
 
 // \brief Given an integral type, return the next larger integral type
 // (or a NULL type of no such type exists).
 static QualType getNextLargerIntegralType(ASTContext &Context, QualType T) {
-  // FIXME: Int128/UInt128 support, which also needs to be introduced into 
+  // FIXME: Int128/UInt128 support, which also needs to be introduced into
   // enum checking below.
   assert(T->isIntegralType(Context) && "Integral type required!");
   const unsigned NumTypes = 4;
-  QualType SignedIntegralTypes[NumTypes] = { 
+  QualType SignedIntegralTypes[NumTypes] = {
     Context.ShortTy, Context.IntTy, Context.LongTy, Context.LongLongTy
   };
-  QualType UnsignedIntegralTypes[NumTypes] = { 
-    Context.UnsignedShortTy, Context.UnsignedIntTy, Context.UnsignedLongTy, 
+  QualType UnsignedIntegralTypes[NumTypes] = {
+    Context.UnsignedShortTy, Context.UnsignedIntTy, Context.UnsignedLongTy,
     Context.UnsignedLongLongTy
   };
-  
+
   unsigned BitWidth = Context.getTypeSize(T);
   QualType *Types = T->isSignedIntegerOrEnumerationType()? SignedIntegralTypes
                                                         : UnsignedIntegralTypes;
   for (unsigned I = 0; I != NumTypes; ++I)
     if (Context.getTypeSize(Types[I]) > BitWidth)
       return Types[I];
-  
+
   return QualType();
 }
 
@@ -13945,12 +14358,15 @@ EnumConstantDecl *Sema::CheckEnumConstant(EnumDecl *Enum,
             } else
               Diag(IdLoc, diag::err_enumerator_too_large) << EltTy;
           } else
-            Val = ImpCastExprToType(Val, EltTy, CK_IntegralCast).get();
+            Val = ImpCastExprToType(Val, EltTy,
+                                    EltTy->isBooleanType() ?
+                                    CK_IntegralToBoolean : CK_IntegralCast)
+                    .get();
         } else if (getLangOpts().CPlusPlus) {
           // C++11 [dcl.enum]p5:
           //   If the underlying type is not fixed, the type of each enumerator
           //   is the type of its initializing value:
-          //     - If an initializer is specified for an enumerator, the 
+          //     - If an initializer is specified for an enumerator, the
           //       initializing value has the same type as the expression.
           EltTy = Val->getType();
         } else {
@@ -13981,10 +14397,10 @@ EnumConstantDecl *Sema::CheckEnumConstant(EnumDecl *Enum,
       // C++0x [dcl.enum]p5:
       //   If the underlying type is not fixed, the type of each enumerator
       //   is the type of its initializing value:
-      //     - If no initializer is specified for the first enumerator, the 
+      //     - If no initializer is specified for the first enumerator, the
       //       initializing value has an unspecified integral type.
       //
-      // GCC uses 'int' for its unspecified integral type, as does 
+      // GCC uses 'int' for its unspecified integral type, as does
       // C99 6.7.2.2p3.
       if (Enum->isFixed()) {
         EltTy = Enum->getIntegerType();
@@ -14007,12 +14423,12 @@ EnumConstantDecl *Sema::CheckEnumConstant(EnumDecl *Enum,
         //     - Otherwise the type of the initializing value is the same as
         //       the type of the initializing value of the preceding enumerator
         //       unless the incremented value is not representable in that type,
-        //       in which case the type is an unspecified integral type 
+        //       in which case the type is an unspecified integral type
         //       sufficient to contain the incremented value. If no such type
         //       exists, the program is ill-formed.
         QualType T = getNextLargerIntegralType(Context, EltTy);
         if (T.isNull() || Enum->isFixed()) {
-          // There is no integral type larger enough to represent this 
+          // There is no integral type larger enough to represent this
           // value. Complain, then allow the value to wrap around.
           EnumVal = LastEnumConst->getInitVal();
           EnumVal = EnumVal.zext(EnumVal.getBitWidth() * 2);
@@ -14028,15 +14444,15 @@ EnumConstantDecl *Sema::CheckEnumConstant(EnumDecl *Enum,
         } else {
           EltTy = T;
         }
-        
+
         // Retrieve the last enumerator's value, extent that type to the
         // type that is supposed to be large enough to represent the incremented
         // value, then increment.
         EnumVal = LastEnumConst->getInitVal();
         EnumVal.setIsSigned(EltTy->isSignedIntegerOrEnumerationType());
         EnumVal = EnumVal.zextOrTrunc(Context.getIntWidth(EltTy));
-        ++EnumVal;        
-        
+        ++EnumVal;
+
         // If we're not in C++, diagnose the overflow of enumerator values,
         // which in C99 means that the enumerator value is not representable in
         // an int (C99 6.7.2.2p2). However, we support GCC's extension that
@@ -14054,12 +14470,12 @@ EnumConstantDecl *Sema::CheckEnumConstant(EnumDecl *Enum,
   }
 
   if (!EltTy->isDependentType()) {
-    // Make the enumerator value match the signedness and size of the 
+    // Make the enumerator value match the signedness and size of the
     // enumerator's type.
     EnumVal = EnumVal.extOrTrunc(Context.getIntWidth(EltTy));
     EnumVal.setIsSigned(EltTy->isSignedIntegerOrEnumerationType());
   }
-  
+
   return EnumConstantDecl::Create(Context, Enum, IdLoc, Id, EltTy,
                                   Val, EnumVal);
 }
@@ -14114,14 +14530,14 @@ Decl *Sema::ActOnEnumConstant(Scope *S, Decl *theEnumDecl, Decl *lastEnumConst,
   }
 
   // C++ [class.mem]p15:
-  // If T is the name of a class, then each of the following shall have a name 
+  // If T is the name of a class, then each of the following shall have a name
   // different from T:
-  // - every enumerator of every member of class T that is an unscoped 
+  // - every enumerator of every member of class T that is an unscoped
   // enumerated type
   if (!TheEnumDecl->isScoped())
     DiagnoseClassNameShadow(TheEnumDecl->getDeclContext(),
                             DeclarationNameInfo(Id, IdLoc));
-  
+
   EnumConstantDecl *New =
     CheckEnumConstant(TheEnumDecl, LastEnumConst, IdLoc, Id, Val);
   if (!New)
@@ -14360,8 +14776,8 @@ bool Sema::IsValueInFlagEnum(const EnumDecl *ED, const llvm::APInt &Val,
   return !(FlagMask & Val) || (AllowMask && !(FlagMask & ~Val));
 }
 
-void Sema::ActOnEnumBody(SourceLocation EnumLoc, SourceLocation LBraceLoc,
-                         SourceLocation RBraceLoc, Decl *EnumDeclX,
+void Sema::ActOnEnumBody(SourceLocation EnumLoc, SourceRange BraceRange,
+                         Decl *EnumDeclX,
                          ArrayRef<Decl *> Elements,
                          Scope *S, AttributeList *Attr) {
   EnumDecl *Enum = cast<EnumDecl>(EnumDeclX);
@@ -14430,7 +14846,7 @@ void Sema::ActOnEnumBody(SourceLocation EnumLoc, SourceLocation LBraceLoc,
   //   int, long long int, or unsigned long long int.
   // C99 6.4.4.3p2:
   //   An identifier declared as an enumeration constant has type int.
-  // The C99 rule is modified by a gcc extension 
+  // The C99 rule is modified by a gcc extension
   QualType BestPromotionType;
 
   bool Packed = Enum->hasAttr<PackedAttr>();
@@ -14646,8 +15062,8 @@ void Sema::diagnoseMisplacedModuleImport(Module *M, SourceLocation ImportLoc) {
   return checkModuleImportContext(*this, M, ImportLoc, CurContext);
 }
 
-DeclResult Sema::ActOnModuleImport(SourceLocation AtLoc, 
-                                   SourceLocation ImportLoc, 
+DeclResult Sema::ActOnModuleImport(SourceLocation AtLoc,
+                                   SourceLocation ImportLoc,
                                    ModuleIdPath Path) {
   Module *Mod =
       getModuleLoader().loadModule(ImportLoc, Path, Module::AllVisible,
@@ -14663,11 +15079,10 @@ DeclResult Sema::ActOnModuleImport(SourceLocation AtLoc,
   // of the same top-level module. Until we do, make it an error rather than
   // silently ignoring the import.
   if (Mod->getTopLevelModuleName() == getLangOpts().CurrentModule)
-    Diag(ImportLoc, diag::err_module_self_import)
+    Diag(ImportLoc, getLangOpts().CompilingModule
+                        ? diag::err_module_self_import
+                        : diag::err_module_import_in_implementation)
         << Mod->getFullModuleName() << getLangOpts().CurrentModule;
-  else if (Mod->getTopLevelModuleName() == getLangOpts().ImplementationOfModule)
-    Diag(ImportLoc, diag::err_module_import_in_implementation)
-        << Mod->getFullModuleName() << getLangOpts().ImplementationOfModule;
 
   SmallVector<SourceLocation, 2> IdentifierLocs;
   Module *ModCheck = Mod;
@@ -14677,13 +15092,13 @@ DeclResult Sema::ActOnModuleImport(SourceLocation AtLoc,
     if (!ModCheck)
       break;
     ModCheck = ModCheck->Parent;
-    
+
     IdentifierLocs.push_back(Path[I].second);
   }
 
-  ImportDecl *Import = ImportDecl::Create(Context, 
+  ImportDecl *Import = ImportDecl::Create(Context,
                                           Context.getTranslationUnitDecl(),
-                                          AtLoc.isValid()? AtLoc : ImportLoc, 
+                                          AtLoc.isValid()? AtLoc : ImportLoc,
                                           Mod, IdentifierLocs);
   Context.getTranslationUnitDecl()->addDecl(Import);
   return Import;
@@ -14701,9 +15116,17 @@ void Sema::ActOnModuleInclude(SourceLocation DirectiveLoc, Module *Mod) {
       TUKind == TU_Module &&
       getSourceManager().isWrittenInMainFile(DirectiveLoc);
 
-  // If this module import was due to an inclusion directive, create an 
+  // Similarly, if we're in the implementation of a module, don't
+  // synthesize an illegal module import. FIXME: Why not?
+  bool ShouldAddImport =
+      !IsInModuleIncludes &&
+      (getLangOpts().CompilingModule ||
+       getLangOpts().CurrentModule.empty() ||
+       getLangOpts().CurrentModule != Mod->getTopLevelModuleName());
+
+  // If this module import was due to an inclusion directive, create an
   // implicit import declaration to capture it in the AST.
-  if (!IsInModuleIncludes) {
+  if (ShouldAddImport) {
     TranslationUnitDecl *TU = getASTContext().getTranslationUnitDecl();
     ImportDecl *ImportD = ImportDecl::CreateImplicit(getASTContext(), TU,
                                                      DirectiveLoc, Mod,
@@ -14711,7 +15134,7 @@ void Sema::ActOnModuleInclude(SourceLocation DirectiveLoc, Module *Mod) {
     TU->addDecl(ImportD);
     Consumer.HandleImplicitImportDecl(ImportD);
   }
-  
+
   getModuleLoader().makeModuleVisible(Mod, Module::AllVisible, DirectiveLoc);
   VisibleModules.setVisible(Mod, DirectiveLoc);
 }
@@ -14731,6 +15154,9 @@ void Sema::ActOnModuleEnd(SourceLocation DirectiveLoc, Module *Mod) {
     VisibleModules = std::move(VisibleModulesStack.back());
     VisibleModulesStack.pop_back();
     VisibleModules.setVisible(Mod, DirectiveLoc);
+    // Leaving a module hides namespace names, so our visible namespace cache
+    // is now out of date.
+    VisibleNamespaceCache.clear();
   }
 }
 
diff --git a/contrib/llvm/tools/clang/lib/Sema/SemaDeclAttr.cpp b/contrib/llvm/tools/clang/lib/Sema/SemaDeclAttr.cpp
index f94c822b90f5..a5780a7d71fb 100644
--- a/contrib/llvm/tools/clang/lib/Sema/SemaDeclAttr.cpp
+++ b/contrib/llvm/tools/clang/lib/Sema/SemaDeclAttr.cpp
@@ -12,6 +12,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "clang/Sema/SemaInternal.h"
+#include "clang/AST/ASTConsumer.h"
 #include "clang/AST/ASTContext.h"
 #include "clang/AST/CXXInheritance.h"
 #include "clang/AST/DeclCXX.h"
@@ -27,10 +28,12 @@
 #include "clang/Lex/Preprocessor.h"
 #include "clang/Sema/DeclSpec.h"
 #include "clang/Sema/DelayedDiagnostic.h"
+#include "clang/Sema/Initialization.h"
 #include "clang/Sema/Lookup.h"
 #include "clang/Sema/Scope.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/Support/MathExtras.h"
+
 using namespace clang;
 using namespace sema;
 
@@ -40,7 +43,7 @@ namespace AttributeLangSupport {
     Cpp,
     ObjC
   };
-}
+} // end namespace AttributeLangSupport
 
 //===----------------------------------------------------------------------===//
 //  Helper functions
@@ -52,6 +55,7 @@ namespace AttributeLangSupport {
 static bool isFunctionOrMethod(const Decl *D) {
   return (D->getFunctionType() != nullptr) || isa<ObjCMethodDecl>(D);
 }
+
 /// \brief Return true if the given decl has function type (function or
 /// function-typed variable) or an Objective-C method or a block.
 static bool isFunctionOrMethodOrBlock(const Decl *D) {
@@ -801,6 +805,8 @@ static void handleLocksExcludedAttr(Sema &S, Decl *D,
 }
 
 static void handleEnableIfAttr(Sema &S, Decl *D, const AttributeList &Attr) {
+  S.Diag(Attr.getLoc(), diag::ext_clang_enable_if);
+
   Expr *Cond = Attr.getArgAsExpr(0);
   if (!Cond->isTypeDependent()) {
     ExprResult Converted = S.PerformContextuallyConvertToBool(Cond);
@@ -887,7 +893,6 @@ static void handleConsumableAttr(Sema &S, Decl *D, const AttributeList &Attr) {
                             Attr.getAttributeSpellingListIndex()));
 }
 
-
 static bool checkForConsumableClass(Sema &S, const CXXMethodDecl *MD,
                                         const AttributeList &Attr) {
   ASTContext &CurrContext = S.getASTContext();
@@ -905,7 +910,6 @@ static bool checkForConsumableClass(Sema &S, const CXXMethodDecl *MD,
   return true;
 }
 
-
 static void handleCallableWhenAttr(Sema &S, Decl *D,
                                    const AttributeList &Attr) {
   if (!checkAttributeAtLeastNumArgs(S, Attr, 1))
@@ -944,7 +948,6 @@ static void handleCallableWhenAttr(Sema &S, Decl *D,
                States.size(), Attr.getAttributeSpellingListIndex()));
 }
 
-
 static void handleParamTypestateAttr(Sema &S, Decl *D,
                                     const AttributeList &Attr) {
   ParamTypestateAttr::ConsumedState ParamState;
@@ -982,7 +985,6 @@ static void handleParamTypestateAttr(Sema &S, Decl *D,
                                 Attr.getAttributeSpellingListIndex()));
 }
 
-
 static void handleReturnTypestateAttr(Sema &S, Decl *D,
                                       const AttributeList &Attr) {
   ReturnTypestateAttr::ConsumedState ReturnState;
@@ -1031,7 +1033,6 @@ static void handleReturnTypestateAttr(Sema &S, Decl *D,
                                  Attr.getAttributeSpellingListIndex()));
 }
 
-
 static void handleSetTypestateAttr(Sema &S, Decl *D, const AttributeList &Attr) {
   if (!checkForConsumableClass(S, cast<CXXMethodDecl>(D), Attr))
     return;
@@ -1548,6 +1549,28 @@ static void handleWeakRefAttr(Sema &S, Decl *D, const AttributeList &Attr) {
                          Attr.getAttributeSpellingListIndex()));
 }
 
+static void handleIFuncAttr(Sema &S, Decl *D, const AttributeList &Attr) {
+  StringRef Str;
+  if (!S.checkStringLiteralArgumentAttr(Attr, 0, Str))
+    return;
+
+  // Aliases should be on declarations, not definitions.
+  const auto *FD = cast<FunctionDecl>(D);
+  if (FD->isThisDeclarationADefinition()) {
+    S.Diag(Attr.getLoc(), diag::err_alias_is_definition) << FD << 1;
+    return;
+  }
+  // FIXME: it should be handled as a target specific attribute.
+  if (S.Context.getTargetInfo().getTriple().getObjectFormat() !=
+          llvm::Triple::ELF) {
+    S.Diag(Attr.getLoc(), diag::warn_attribute_ignored) << Attr.getName();
+    return;
+  }
+
+  D->addAttr(::new (S.Context) IFuncAttr(Attr.getRange(), S.Context, Str,
+                                         Attr.getAttributeSpellingListIndex()));
+}
+
 static void handleAliasAttr(Sema &S, Decl *D, const AttributeList &Attr) {
   StringRef Str;
   if (!S.checkStringLiteralArgumentAttr(Attr, 0, Str))
@@ -1557,17 +1580,20 @@ static void handleAliasAttr(Sema &S, Decl *D, const AttributeList &Attr) {
     S.Diag(Attr.getLoc(), diag::err_alias_not_supported_on_darwin);
     return;
   }
+  if (S.Context.getTargetInfo().getTriple().isNVPTX()) {
+    S.Diag(Attr.getLoc(), diag::err_alias_not_supported_on_nvptx);
+  }
 
   // Aliases should be on declarations, not definitions.
   if (const auto *FD = dyn_cast<FunctionDecl>(D)) {
     if (FD->isThisDeclarationADefinition()) {
-      S.Diag(Attr.getLoc(), diag::err_alias_is_definition) << FD;
+      S.Diag(Attr.getLoc(), diag::err_alias_is_definition) << FD << 0;
       return;
     }
   } else {
     const auto *VD = cast<VarDecl>(D);
     if (VD->isThisDeclarationADefinition() && VD->isExternallyVisible()) {
-      S.Diag(Attr.getLoc(), diag::err_alias_is_definition) << VD;
+      S.Diag(Attr.getLoc(), diag::err_alias_is_definition) << VD << 0;
       return;
     }
   }
@@ -1804,6 +1830,28 @@ static void handleUsedAttr(Sema &S, Decl *D, const AttributeList &Attr) {
                       Attr.getAttributeSpellingListIndex()));
 }
 
+static void handleUnusedAttr(Sema &S, Decl *D, const AttributeList &Attr) {
+  bool IsCXX1zAttr = Attr.isCXX11Attribute() && !Attr.getScopeName();
+
+  if (IsCXX1zAttr && isa<VarDecl>(D)) {
+    // The C++1z spelling of this attribute cannot be applied to a static data
+    // member per [dcl.attr.unused]p2.
+    if (cast<VarDecl>(D)->isStaticDataMember()) {
+      S.Diag(Attr.getLoc(), diag::warn_attribute_wrong_decl_type)
+          << Attr.getName() << ExpectedForMaybeUnused;
+      return;
+    }
+  }
+
+  // If this is spelled as the standard C++1z attribute, but not in C++1z, warn
+  // about using it as an extension.
+  if (!S.getLangOpts().CPlusPlus1z && IsCXX1zAttr)
+    S.Diag(Attr.getLoc(), diag::ext_cxx1z_attr) << Attr.getName();
+
+  D->addAttr(::new (S.Context) UnusedAttr(
+      Attr.getRange(), S.Context, Attr.getAttributeSpellingListIndex()));
+}
+
 static void handleConstructorAttr(Sema &S, Decl *D, const AttributeList &Attr) {
   uint32_t priority = ConstructorAttr::DefaultPriority;
   if (Attr.getNumArgs() &&
@@ -1910,11 +1958,14 @@ static bool versionsMatch(const VersionTuple &X, const VersionTuple &Y,
 
 AvailabilityAttr *Sema::mergeAvailabilityAttr(NamedDecl *D, SourceRange Range,
                                               IdentifierInfo *Platform,
+                                              bool Implicit,
                                               VersionTuple Introduced,
                                               VersionTuple Deprecated,
                                               VersionTuple Obsoleted,
                                               bool IsUnavailable,
                                               StringRef Message,
+                                              bool IsStrict,
+                                              StringRef Replacement,
                                               AvailabilityMergeKind AMK,
                                               unsigned AttrSpellingListIndex) {
   VersionTuple MergedIntroduced = Introduced;
@@ -1952,14 +2003,14 @@ AvailabilityAttr *Sema::mergeAvailabilityAttr(NamedDecl *D, SourceRange Range,
       // If there is an existing availability attribute for this platform that
       // is explicit and the new one is implicit use the explicit one and
       // discard the new implicit attribute.
-      if (OldAA->getRange().isValid() && Range.isInvalid()) {
+      if (!OldAA->isImplicit() && Implicit) {
         return nullptr;
       }
 
       // If there is an existing attribute for this platform that is implicit
       // and the new attribute is explicit then erase the old one and
       // continue processing the attributes.
-      if (Range.isValid() && OldAA->getRange().isInvalid()) {
+      if (!Implicit && OldAA->isImplicit()) {
         Attrs.erase(Attrs.begin() + i);
         --e;
         continue;
@@ -2058,10 +2109,13 @@ AvailabilityAttr *Sema::mergeAvailabilityAttr(NamedDecl *D, SourceRange Range,
   if (!checkAvailabilityAttr(*this, Range, Platform, MergedIntroduced,
                              MergedDeprecated, MergedObsoleted) &&
       !OverrideOrImpl) {
-    return ::new (Context) AvailabilityAttr(Range, Context, Platform,
+    auto *Avail =  ::new (Context) AvailabilityAttr(Range, Context, Platform,
                                             Introduced, Deprecated,
                                             Obsoleted, IsUnavailable, Message,
+                                            IsStrict, Replacement,
                                             AttrSpellingListIndex);
+    Avail->setImplicit(Implicit);
+    return Avail;
   }
   return nullptr;
 }
@@ -2088,16 +2142,23 @@ static void handleAvailabilityAttr(Sema &S, Decl *D,
   AvailabilityChange Deprecated = Attr.getAvailabilityDeprecated();
   AvailabilityChange Obsoleted = Attr.getAvailabilityObsoleted();
   bool IsUnavailable = Attr.getUnavailableLoc().isValid();
+  bool IsStrict = Attr.getStrictLoc().isValid();
   StringRef Str;
   if (const StringLiteral *SE =
           dyn_cast_or_null<StringLiteral>(Attr.getMessageExpr()))
     Str = SE->getString();
+  StringRef Replacement;
+  if (const StringLiteral *SE =
+          dyn_cast_or_null<StringLiteral>(Attr.getReplacementExpr()))
+    Replacement = SE->getString();
 
   AvailabilityAttr *NewAttr = S.mergeAvailabilityAttr(ND, Attr.getRange(), II,
+                                                      false/*Implicit*/,
                                                       Introduced.Version,
                                                       Deprecated.Version,
                                                       Obsoleted.Version,
                                                       IsUnavailable, Str,
+                                                      IsStrict, Replacement,
                                                       Sema::AMK_None,
                                                       Index);
   if (NewAttr)
@@ -2136,12 +2197,15 @@ static void handleAvailabilityAttr(Sema &S, Decl *D,
         auto NewObsoleted = adjustWatchOSVersion(Obsoleted.Version);
 
         AvailabilityAttr *NewAttr = S.mergeAvailabilityAttr(ND,
-                                                            SourceRange(),
+                                                            Attr.getRange(),
                                                             NewII,
+                                                            true/*Implicit*/,
                                                             NewIntroduced,
                                                             NewDeprecated,
                                                             NewObsoleted,
                                                             IsUnavailable, Str,
+                                                            IsStrict,
+                                                            Replacement,
                                                             Sema::AMK_None,
                                                             Index);
         if (NewAttr)
@@ -2158,12 +2222,15 @@ static void handleAvailabilityAttr(Sema &S, Decl *D,
 
     if (NewII) {
         AvailabilityAttr *NewAttr = S.mergeAvailabilityAttr(ND,
-                                                            SourceRange(),
+                                                            Attr.getRange(),
                                                             NewII,
+                                                            true/*Implicit*/,
                                                             Introduced.Version,
                                                             Deprecated.Version,
                                                             Obsoleted.Version,
                                                             IsUnavailable, Str,
+                                                            IsStrict,
+                                                            Replacement,
                                                             Sema::AMK_None,
                                                             Index);
         if (NewAttr)
@@ -2455,6 +2522,12 @@ static void handleWarnUnusedResult(Sema &S, Decl *D, const AttributeList &Attr)
       return;
     }
   
+  // If this is spelled as the standard C++1z attribute, but not in C++1z, warn
+  // about using it as an extension.
+  if (!S.getLangOpts().CPlusPlus1z && Attr.isCXX11Attribute() &&
+      !Attr.getScopeName())
+    S.Diag(Attr.getLoc(), diag::ext_cxx1z_attr) << Attr.getName();
+
   D->addAttr(::new (S.Context) 
              WarnUnusedResultAttr(Attr.getRange(), S.Context,
                                   Attr.getAttributeSpellingListIndex()));
@@ -2611,7 +2684,6 @@ static void handleTargetAttr(Sema &S, Decl *D, const AttributeList &Attr) {
   D->addAttr(NewAttr);
 }
 
-
 static void handleCleanupAttr(Sema &S, Decl *D, const AttributeList &Attr) {
   VarDecl *VD = cast<VarDecl>(D);
   if (!VD->hasLocalStorage()) {
@@ -3069,7 +3141,6 @@ void Sema::AddAlignValueAttr(SourceRange AttrRange, Decl *D, Expr *E,
 
   // Save dependent expressions in the AST to be instantiated.
   D->addAttr(::new (Context) AlignValueAttr(TmpAttr));
-  return;
 }
 
 static void handleAlignedAttr(Sema &S, Decl *D, const AttributeList &Attr) {
@@ -3293,6 +3364,8 @@ bool Sema::checkMSInheritanceAttrOnDefinition(
 /// attribute.
 static void parseModeAttrArg(Sema &S, StringRef Str, unsigned &DestWidth,
                              bool &IntegerMode, bool &ComplexMode) {
+  IntegerMode = true;
+  ComplexMode = false;
   switch (Str.size()) {
   case 2:
     switch (Str[0]) {
@@ -3328,7 +3401,7 @@ static void parseModeAttrArg(Sema &S, StringRef Str, unsigned &DestWidth,
     // FIXME: glibc uses 'word' to define register_t; this is narrower than a
     // pointer on PIC16 and other embedded platforms.
     if (Str == "word")
-      DestWidth = S.Context.getTargetInfo().getPointerWidth(0);
+      DestWidth = S.Context.getTargetInfo().getRegisterWidth();
     else if (Str == "byte")
       DestWidth = S.Context.getTargetInfo().getCharWidth();
     break;
@@ -3359,9 +3432,15 @@ static void handleModeAttr(Sema &S, Decl *D, const AttributeList &Attr) {
   }
 
   IdentifierInfo *Name = Attr.getArgAsIdent(0)->Ident;
-  StringRef Str = Name->getName();
 
+  S.AddModeAttr(Attr.getRange(), D, Name, Attr.getAttributeSpellingListIndex());
+}
+
+void Sema::AddModeAttr(SourceRange AttrRange, Decl *D, IdentifierInfo *Name,
+                       unsigned SpellingListIndex, bool InInstantiation) {
+  StringRef Str = Name->getName();
   normalizeName(Str);
+  SourceLocation AttrLoc = AttrRange.getBegin();
 
   unsigned DestWidth = 0;
   bool IntegerMode = true;
@@ -3377,25 +3456,43 @@ static void handleModeAttr(Sema &S, Decl *D, const AttributeList &Attr) {
     if (VectorStringLength &&
         !Str.substr(1, VectorStringLength).getAsInteger(10, VectorSize) &&
         VectorSize.isPowerOf2()) {
-      parseModeAttrArg(S, Str.substr(VectorStringLength + 1), DestWidth,
+      parseModeAttrArg(*this, Str.substr(VectorStringLength + 1), DestWidth,
                        IntegerMode, ComplexMode);
-      S.Diag(Attr.getLoc(), diag::warn_vector_mode_deprecated);
+      // Avoid duplicate warning from template instantiation.
+      if (!InInstantiation)
+        Diag(AttrLoc, diag::warn_vector_mode_deprecated);
     } else {
       VectorSize = 0;
     }
   }
 
   if (!VectorSize)
-    parseModeAttrArg(S, Str, DestWidth, IntegerMode, ComplexMode);
+    parseModeAttrArg(*this, Str, DestWidth, IntegerMode, ComplexMode);
+
+  // FIXME: Sync this with InitializePredefinedMacros; we need to match int8_t
+  // and friends, at least with glibc.
+  // FIXME: Make sure floating-point mappings are accurate
+  // FIXME: Support XF and TF types
+  if (!DestWidth) {
+    Diag(AttrLoc, diag::err_machine_mode) << 0 /*Unknown*/ << Name;
+    return;
+  }
 
   QualType OldTy;
   if (TypedefNameDecl *TD = dyn_cast<TypedefNameDecl>(D))
     OldTy = TD->getUnderlyingType();
-  else if (ValueDecl *VD = dyn_cast<ValueDecl>(D))
-    OldTy = VD->getType();
-  else {
-    S.Diag(D->getLocation(), diag::err_attr_wrong_decl)
-      << Attr.getName() << Attr.getRange();
+  else if (EnumDecl *ED = dyn_cast<EnumDecl>(D)) {
+    // Something like 'typedef enum { X } __attribute__((mode(XX))) T;'.
+    // Try to get type from enum declaration, default to int.
+    OldTy = ED->getIntegerType();
+    if (OldTy.isNull())
+      OldTy = Context.IntTy;
+  } else
+    OldTy = cast<ValueDecl>(D)->getType();
+
+  if (OldTy->isDependentType()) {
+    D->addAttr(::new (Context)
+               ModeAttr(AttrRange, Context, Name, SpellingListIndex));
     return;
   }
 
@@ -3405,91 +3502,83 @@ static void handleModeAttr(Sema &S, Decl *D, const AttributeList &Attr) {
   if (const VectorType *VT = OldTy->getAs<VectorType>())
     OldElemTy = VT->getElementType();
 
-  if (!OldElemTy->getAs<BuiltinType>() && !OldElemTy->isComplexType())
-    S.Diag(Attr.getLoc(), diag::err_mode_not_primitive);
+  // GCC allows 'mode' attribute on enumeration types (even incomplete), except
+  // for vector modes. So, 'enum X __attribute__((mode(QI)));' forms a complete
+  // type, 'enum { A } __attribute__((mode(V4SI)))' is rejected.
+  if ((isa<EnumDecl>(D) || OldElemTy->getAs<EnumType>()) &&
+      VectorSize.getBoolValue()) {
+    Diag(AttrLoc, diag::err_enum_mode_vector_type) << Name << AttrRange;
+    return;
+  }
+  bool IntegralOrAnyEnumType =
+      OldElemTy->isIntegralOrEnumerationType() || OldElemTy->getAs<EnumType>();
+
+  if (!OldElemTy->getAs<BuiltinType>() && !OldElemTy->isComplexType() &&
+      !IntegralOrAnyEnumType)
+    Diag(AttrLoc, diag::err_mode_not_primitive);
   else if (IntegerMode) {
-    if (!OldElemTy->isIntegralOrEnumerationType())
-      S.Diag(Attr.getLoc(), diag::err_mode_wrong_type);
+    if (!IntegralOrAnyEnumType)
+      Diag(AttrLoc, diag::err_mode_wrong_type);
   } else if (ComplexMode) {
     if (!OldElemTy->isComplexType())
-      S.Diag(Attr.getLoc(), diag::err_mode_wrong_type);
+      Diag(AttrLoc, diag::err_mode_wrong_type);
   } else {
     if (!OldElemTy->isFloatingType())
-      S.Diag(Attr.getLoc(), diag::err_mode_wrong_type);
-  }
-
-  // FIXME: Sync this with InitializePredefinedMacros; we need to match int8_t
-  // and friends, at least with glibc.
-  // FIXME: Make sure floating-point mappings are accurate
-  // FIXME: Support XF and TF types
-  if (!DestWidth) {
-    S.Diag(Attr.getLoc(), diag::err_machine_mode) << 0 /*Unknown*/ << Name;
-    return;
+      Diag(AttrLoc, diag::err_mode_wrong_type);
   }
 
   QualType NewElemTy;
 
   if (IntegerMode)
-    NewElemTy = S.Context.getIntTypeForBitwidth(
-        DestWidth, OldElemTy->isSignedIntegerType());
+    NewElemTy = Context.getIntTypeForBitwidth(DestWidth,
+                                              OldElemTy->isSignedIntegerType());
   else
-    NewElemTy = S.Context.getRealTypeForBitwidth(DestWidth);
+    NewElemTy = Context.getRealTypeForBitwidth(DestWidth);
 
   if (NewElemTy.isNull()) {
-    S.Diag(Attr.getLoc(), diag::err_machine_mode) << 1 /*Unsupported*/ << Name;
+    Diag(AttrLoc, diag::err_machine_mode) << 1 /*Unsupported*/ << Name;
     return;
   }
 
   if (ComplexMode) {
-    NewElemTy = S.Context.getComplexType(NewElemTy);
+    NewElemTy = Context.getComplexType(NewElemTy);
   }
 
   QualType NewTy = NewElemTy;
   if (VectorSize.getBoolValue()) {
-    NewTy = S.Context.getVectorType(NewTy, VectorSize.getZExtValue(),
-                                    VectorType::GenericVector);
+    NewTy = Context.getVectorType(NewTy, VectorSize.getZExtValue(),
+                                  VectorType::GenericVector);
   } else if (const VectorType *OldVT = OldTy->getAs<VectorType>()) {
     // Complex machine mode does not support base vector types.
     if (ComplexMode) {
-      S.Diag(Attr.getLoc(), diag::err_complex_mode_vector_type);
+      Diag(AttrLoc, diag::err_complex_mode_vector_type);
       return;
     }
-    unsigned NumElements = S.Context.getTypeSize(OldElemTy) *
+    unsigned NumElements = Context.getTypeSize(OldElemTy) *
                            OldVT->getNumElements() /
-                           S.Context.getTypeSize(NewElemTy);
+                           Context.getTypeSize(NewElemTy);
     NewTy =
-        S.Context.getVectorType(NewElemTy, NumElements, OldVT->getVectorKind());
+        Context.getVectorType(NewElemTy, NumElements, OldVT->getVectorKind());
   }
 
   if (NewTy.isNull()) {
-    S.Diag(Attr.getLoc(), diag::err_mode_wrong_type);
+    Diag(AttrLoc, diag::err_mode_wrong_type);
     return;
   }
 
   // Install the new type.
   if (TypedefNameDecl *TD = dyn_cast<TypedefNameDecl>(D))
     TD->setModedTypeSourceInfo(TD->getTypeSourceInfo(), NewTy);
+  else if (EnumDecl *ED = dyn_cast<EnumDecl>(D))
+    ED->setIntegerType(NewTy);
   else
     cast<ValueDecl>(D)->setType(NewTy);
 
-  D->addAttr(::new (S.Context)
-             ModeAttr(Attr.getRange(), S.Context, Name,
-                      Attr.getAttributeSpellingListIndex()));
+  D->addAttr(::new (Context)
+             ModeAttr(AttrRange, Context, Name, SpellingListIndex));
 }
 
 static void handleNoDebugAttr(Sema &S, Decl *D, const AttributeList &Attr) {
-  if (const VarDecl *VD = dyn_cast<VarDecl>(D)) {
-    if (!VD->hasGlobalStorage())
-      S.Diag(Attr.getLoc(),
-             diag::warn_attribute_requires_functions_or_static_globals)
-        << Attr.getName();
-  } else if (!isFunctionOrMethod(D)) {
-    S.Diag(Attr.getLoc(),
-           diag::warn_attribute_requires_functions_or_static_globals)
-      << Attr.getName();
-    return;
-  }
-
   D->addAttr(::new (S.Context)
              NoDebugAttr(Attr.getRange(), S.Context,
                          Attr.getAttributeSpellingListIndex()));
@@ -3622,11 +3711,21 @@ static void handleGlobalAttr(Sema &S, Decl *D, const AttributeList &Attr) {
                               : FixItHint());
     return;
   }
+  if (const auto *Method = dyn_cast<CXXMethodDecl>(FD)) {
+    if (Method->isInstance()) {
+      S.Diag(Method->getLocStart(), diag::err_kern_is_nonstatic_method)
+          << Method;
+      return;
+    }
+    S.Diag(Method->getLocStart(), diag::warn_kern_is_method) << Method;
+  }
+  // Only warn for "inline" when compiling for host, to cut down on noise.
+  if (FD->isInlineSpecified() && !S.getLangOpts().CUDAIsDevice)
+    S.Diag(FD->getLocStart(), diag::warn_kern_is_inline) << FD;
 
   D->addAttr(::new (S.Context)
               CUDAGlobalAttr(Attr.getRange(), S.Context,
                              Attr.getAttributeSpellingListIndex()));
-
 }
 
 static void handleGNUInlineAttr(Sema &S, Decl *D, const AttributeList &Attr) {
@@ -3682,6 +3781,11 @@ static void handleCallConvAttr(Sema &S, Decl *D, const AttributeList &Attr) {
                PascalAttr(Attr.getRange(), S.Context,
                           Attr.getAttributeSpellingListIndex()));
     return;
+  case AttributeList::AT_SwiftCall:
+    D->addAttr(::new (S.Context)
+               SwiftCallAttr(Attr.getRange(), S.Context,
+                             Attr.getAttributeSpellingListIndex()));
+    return;
   case AttributeList::AT_VectorCall:
     D->addAttr(::new (S.Context)
                VectorCallAttr(Attr.getRange(), S.Context,
@@ -3720,7 +3824,14 @@ static void handleCallConvAttr(Sema &S, Decl *D, const AttributeList &Attr) {
                IntelOclBiccAttr(Attr.getRange(), S.Context,
                                 Attr.getAttributeSpellingListIndex()));
     return;
-
+  case AttributeList::AT_PreserveMost:
+    D->addAttr(::new (S.Context) PreserveMostAttr(
+        Attr.getRange(), S.Context, Attr.getAttributeSpellingListIndex()));
+    return;
+  case AttributeList::AT_PreserveAll:
+    D->addAttr(::new (S.Context) PreserveAllAttr(
+        Attr.getRange(), S.Context, Attr.getAttributeSpellingListIndex()));
+    return;
   default:
     llvm_unreachable("unexpected attribute kind");
   }
@@ -3731,6 +3842,11 @@ bool Sema::CheckCallingConvAttr(const AttributeList &attr, CallingConv &CC,
   if (attr.isInvalid())
     return true;
 
+  if (attr.hasProcessingCache()) {
+    CC = (CallingConv) attr.getProcessingCache();
+    return false;
+  }
+
   unsigned ReqArgs = attr.getKind() == AttributeList::AT_Pcs ? 1 : 0;
   if (!checkAttributeNumArgs(*this, attr, ReqArgs)) {
     attr.setInvalid();
@@ -3744,6 +3860,7 @@ bool Sema::CheckCallingConvAttr(const AttributeList &attr, CallingConv &CC,
   case AttributeList::AT_StdCall: CC = CC_X86StdCall; break;
   case AttributeList::AT_ThisCall: CC = CC_X86ThisCall; break;
   case AttributeList::AT_Pascal: CC = CC_X86Pascal; break;
+  case AttributeList::AT_SwiftCall: CC = CC_Swift; break;
   case AttributeList::AT_VectorCall: CC = CC_X86VectorCall; break;
   case AttributeList::AT_MSABI:
     CC = Context.getTargetInfo().getTriple().isOSWindows() ? CC_C :
@@ -3772,6 +3889,8 @@ bool Sema::CheckCallingConvAttr(const AttributeList &attr, CallingConv &CC,
     return true;
   }
   case AttributeList::AT_IntelOclBicc: CC = CC_IntelOclBicc; break;
+  case AttributeList::AT_PreserveMost: CC = CC_PreserveMost; break;
+  case AttributeList::AT_PreserveAll: CC = CC_PreserveAll; break;
   default: llvm_unreachable("unexpected attribute kind");
   }
 
@@ -3783,16 +3902,108 @@ bool Sema::CheckCallingConvAttr(const AttributeList &attr, CallingConv &CC,
 
     // This convention is not valid for the target. Use the default function or
     // method calling convention.
-    TargetInfo::CallingConvMethodType MT = TargetInfo::CCMT_Unknown;
-    if (FD)
-      MT = FD->isCXXInstanceMember() ? TargetInfo::CCMT_Member : 
-                                    TargetInfo::CCMT_NonMember;
-    CC = TI.getDefaultCallingConv(MT);
+    bool IsCXXMethod = false, IsVariadic = false;
+    if (FD) {
+      IsCXXMethod = FD->isCXXInstanceMember();
+      IsVariadic = FD->isVariadic();
+    }
+    CC = Context.getDefaultCallingConvention(IsVariadic, IsCXXMethod);
   }
 
+  attr.setProcessingCache((unsigned) CC);
   return false;
 }
 
+/// Pointer-like types in the default address space.
+static bool isValidSwiftContextType(QualType type) {
+  if (!type->hasPointerRepresentation())
+    return type->isDependentType();
+  return type->getPointeeType().getAddressSpace() == 0;
+}
+
+/// Pointers and references in the default address space.
+static bool isValidSwiftIndirectResultType(QualType type) {
+  if (auto ptrType = type->getAs<PointerType>()) {
+    type = ptrType->getPointeeType();
+  } else if (auto refType = type->getAs<ReferenceType>()) {
+    type = refType->getPointeeType();
+  } else {
+    return type->isDependentType();
+  }
+  return type.getAddressSpace() == 0;
+}
+
+/// Pointers and references to pointers in the default address space.
+static bool isValidSwiftErrorResultType(QualType type) {
+  if (auto ptrType = type->getAs<PointerType>()) {
+    type = ptrType->getPointeeType();
+  } else if (auto refType = type->getAs<ReferenceType>()) {
+    type = refType->getPointeeType();
+  } else {
+    return type->isDependentType();
+  }
+  if (!type.getQualifiers().empty())
+    return false;
+  return isValidSwiftContextType(type);
+}
+
+static void handleParameterABIAttr(Sema &S, Decl *D, const AttributeList &attr,
+                                   ParameterABI abi) {
+  S.AddParameterABIAttr(attr.getRange(), D, abi,
+                        attr.getAttributeSpellingListIndex());
+}
+
+void Sema::AddParameterABIAttr(SourceRange range, Decl *D, ParameterABI abi,
+                               unsigned spellingIndex) {
+
+  QualType type = cast<ParmVarDecl>(D)->getType();
+
+  if (auto existingAttr = D->getAttr<ParameterABIAttr>()) {
+    if (existingAttr->getABI() != abi) {
+      Diag(range.getBegin(), diag::err_attributes_are_not_compatible)
+        << getParameterABISpelling(abi) << existingAttr;
+      Diag(existingAttr->getLocation(), diag::note_conflicting_attribute);
+      return;
+    }
+  }
+
+  switch (abi) {
+  case ParameterABI::Ordinary:
+    llvm_unreachable("explicit attribute for ordinary parameter ABI?");
+
+  case ParameterABI::SwiftContext:
+    if (!isValidSwiftContextType(type)) {
+      Diag(range.getBegin(), diag::err_swift_abi_parameter_wrong_type)
+        << getParameterABISpelling(abi)
+        << /*pointer to pointer */ 0 << type;
+    }
+    D->addAttr(::new (Context)
+               SwiftContextAttr(range, Context, spellingIndex));
+    return;
+
+  case ParameterABI::SwiftErrorResult:
+    if (!isValidSwiftErrorResultType(type)) {
+      Diag(range.getBegin(), diag::err_swift_abi_parameter_wrong_type)
+        << getParameterABISpelling(abi)
+        << /*pointer to pointer */ 1 << type;
+    }
+    D->addAttr(::new (Context)
+               SwiftErrorResultAttr(range, Context, spellingIndex));
+    return;
+
+  case ParameterABI::SwiftIndirectResult:
+    if (!isValidSwiftIndirectResultType(type)) {
+      Diag(range.getBegin(), diag::err_swift_abi_parameter_wrong_type)
+        << getParameterABISpelling(abi)
+        << /*pointer*/ 0 << type;
+    }
+    D->addAttr(::new (Context)
+               SwiftIndirectResultAttr(range, Context, spellingIndex));
+    return;
+  }
+  llvm_unreachable("bad parameter ABI attribute");
+}
+
 /// Checks a regparm attribute, returning true if it is ill-formed and
 /// otherwise setting numParams to the appropriate value.
 bool Sema::CheckRegparmAttr(const AttributeList &Attr, unsigned &numParams) {
@@ -3829,49 +4040,60 @@ bool Sema::CheckRegparmAttr(const AttributeList &Attr, unsigned &numParams) {
   return false;
 }
 
-// Checks whether an argument of launch_bounds attribute is acceptable
-// May output an error.
-static bool checkLaunchBoundsArgument(Sema &S, Expr *E,
-                                      const CUDALaunchBoundsAttr &Attr,
-                                      const unsigned Idx) {
-
+// Checks whether an argument of launch_bounds attribute is
+// acceptable, performs implicit conversion to Rvalue, and returns
+// non-nullptr Expr result on success. Otherwise, it returns nullptr
+// and may output an error.
+static Expr *makeLaunchBoundsArgExpr(Sema &S, Expr *E,
+                                     const CUDALaunchBoundsAttr &Attr,
+                                     const unsigned Idx) {
   if (S.DiagnoseUnexpandedParameterPack(E))
-    return false;
+    return nullptr;
 
   // Accept template arguments for now as they depend on something else.
   // We'll get to check them when they eventually get instantiated.
   if (E->isValueDependent())
-    return true;
+    return E;
 
   llvm::APSInt I(64);
   if (!E->isIntegerConstantExpr(I, S.Context)) {
     S.Diag(E->getExprLoc(), diag::err_attribute_argument_n_type)
         << &Attr << Idx << AANT_ArgumentIntegerConstant << E->getSourceRange();
-    return false;
+    return nullptr;
   }
   // Make sure we can fit it in 32 bits.
   if (!I.isIntN(32)) {
     S.Diag(E->getExprLoc(), diag::err_ice_too_large) << I.toString(10, false)
                                                      << 32 << /* Unsigned */ 1;
-    return false;
+    return nullptr;
   }
   if (I < 0)
     S.Diag(E->getExprLoc(), diag::warn_attribute_argument_n_negative)
         << &Attr << Idx << E->getSourceRange();
 
-  return true;
+  // We may need to perform implicit conversion of the argument.
+  InitializedEntity Entity = InitializedEntity::InitializeParameter(
+      S.Context, S.Context.getConstType(S.Context.IntTy), /*consume*/ false);
+  ExprResult ValArg = S.PerformCopyInitialization(Entity, SourceLocation(), E);
+  assert(!ValArg.isInvalid() &&
+         "Unexpected PerformCopyInitialization() failure.");
+
+  return ValArg.getAs<Expr>();
 }
 
 void Sema::AddLaunchBoundsAttr(SourceRange AttrRange, Decl *D, Expr *MaxThreads,
                                Expr *MinBlocks, unsigned SpellingListIndex) {
   CUDALaunchBoundsAttr TmpAttr(AttrRange, Context, MaxThreads, MinBlocks,
                                SpellingListIndex);
-
-  if (!checkLaunchBoundsArgument(*this, MaxThreads, TmpAttr, 0))
+  MaxThreads = makeLaunchBoundsArgExpr(*this, MaxThreads, TmpAttr, 0);
+  if (MaxThreads == nullptr)
     return;
 
-  if (MinBlocks && !checkLaunchBoundsArgument(*this, MinBlocks, TmpAttr, 1))
-    return;
+  if (MinBlocks) {
+    MinBlocks = makeLaunchBoundsArgExpr(*this, MinBlocks, TmpAttr, 1);
+    if (MinBlocks == nullptr)
+      return;
+  }
 
   D->addAttr(::new (Context) CUDALaunchBoundsAttr(
       AttrRange, Context, MaxThreads, MinBlocks, SpellingListIndex));
@@ -3977,6 +4199,7 @@ static bool isValidSubjectOfNSAttribute(Sema &S, QualType type) {
          type->isObjCObjectPointerType() || 
          S.Context.isObjCNSObjectType(type);
 }
+
 static bool isValidSubjectOfCFAttribute(Sema &S, QualType type) {
   return type->isDependentType() || 
          type->isPointerType() || 
@@ -3984,36 +4207,49 @@ static bool isValidSubjectOfCFAttribute(Sema &S, QualType type) {
 }
 
 static void handleNSConsumedAttr(Sema &S, Decl *D, const AttributeList &Attr) {
+  S.AddNSConsumedAttr(Attr.getRange(), D, Attr.getAttributeSpellingListIndex(),
+                      Attr.getKind() == AttributeList::AT_NSConsumed,
+                      /*template instantiation*/ false);
+}
+
+void Sema::AddNSConsumedAttr(SourceRange attrRange, Decl *D,
+                             unsigned spellingIndex, bool isNSConsumed,
+                             bool isTemplateInstantiation) {
   ParmVarDecl *param = cast<ParmVarDecl>(D);
-  bool typeOK, cf;
+  bool typeOK;
 
-  if (Attr.getKind() == AttributeList::AT_NSConsumed) {
-    typeOK = isValidSubjectOfNSAttribute(S, param->getType());
-    cf = false;
+  if (isNSConsumed) {
+    typeOK = isValidSubjectOfNSAttribute(*this, param->getType());
   } else {
-    typeOK = isValidSubjectOfCFAttribute(S, param->getType());
-    cf = true;
+    typeOK = isValidSubjectOfCFAttribute(*this, param->getType());
   }
 
   if (!typeOK) {
-    S.Diag(D->getLocStart(), diag::warn_ns_attribute_wrong_parameter_type)
-      << Attr.getRange() << Attr.getName() << cf;
-    return;
-  }
-
-  if (cf)
-    param->addAttr(::new (S.Context)
-                   CFConsumedAttr(Attr.getRange(), S.Context,
-                                  Attr.getAttributeSpellingListIndex()));
+    // These attributes are normally just advisory, but in ARC, ns_consumed
+    // is significant.  Allow non-dependent code to contain inappropriate
+    // attributes even in ARC, but require template instantiations to be
+    // set up correctly.
+    Diag(D->getLocStart(),
+         (isTemplateInstantiation && isNSConsumed &&
+            getLangOpts().ObjCAutoRefCount
+          ? diag::err_ns_attribute_wrong_parameter_type
+          : diag::warn_ns_attribute_wrong_parameter_type))
+      << attrRange
+      << (isNSConsumed ? "ns_consumed" : "cf_consumed")
+      << (isNSConsumed ? /*objc pointers*/ 0 : /*cf pointers*/ 1);
+    return;
+  }
+
+  if (isNSConsumed)
+    param->addAttr(::new (Context)
+                   NSConsumedAttr(attrRange, Context, spellingIndex));
   else
-    param->addAttr(::new (S.Context)
-                   NSConsumedAttr(Attr.getRange(), S.Context,
-                                  Attr.getAttributeSpellingListIndex()));
+    param->addAttr(::new (Context)
+                   CFConsumedAttr(attrRange, Context, spellingIndex));
 }
 
 static void handleNSReturnsRetainedAttr(Sema &S, Decl *D,
                                         const AttributeList &Attr) {
-
   QualType returnType;
 
   if (ObjCMethodDecl *MD = dyn_cast<ObjCMethodDecl>(D))
@@ -4287,10 +4523,9 @@ static void handleObjCRuntimeName(Sema &S, Decl *D,
                                  Attr.getAttributeSpellingListIndex()));
 }
 
-// when a user wants to use objc_boxable with a union or struct
-// but she doesn't have access to the declaration (legacy/third-party code)
-// then she can 'enable' this feature via trick with a typedef
-// e.g.:
+// When a user wants to use objc_boxable with a union or struct
+// but they don't have access to the declaration (legacy/third-party code)
+// then they can 'enable' this feature with a typedef:
 // typedef struct __attribute((objc_boxable)) legacy_struct legacy_struct;
 static void handleObjCBoxable(Sema &S, Decl *D, const AttributeList &Attr) {
   bool notify = false;
@@ -4423,8 +4658,10 @@ static void handleMSInheritanceAttr(Sema &S, Decl *D, const AttributeList &Attr)
       D, Attr.getRange(), /*BestCase=*/true,
       Attr.getAttributeSpellingListIndex(),
       (MSInheritanceAttr::Spelling)Attr.getSemanticSpelling());
-  if (IA)
+  if (IA) {
     D->addAttr(IA);
+    S.Consumer.AssignInheritanceModel(cast<CXXRecordDecl>(D));
+  }
 }
 
 static void handleDeclspecThreadAttr(Sema &S, Decl *D,
@@ -4446,6 +4683,38 @@ static void handleDeclspecThreadAttr(Sema &S, Decl *D,
       Attr.getRange(), S.Context, Attr.getAttributeSpellingListIndex()));
 }
 
+static void handleAbiTagAttr(Sema &S, Decl *D, const AttributeList &Attr) {
+  SmallVector<StringRef, 4> Tags;
+  for (unsigned I = 0, E = Attr.getNumArgs(); I != E; ++I) {
+    StringRef Tag;
+    if (!S.checkStringLiteralArgumentAttr(Attr, I, Tag))
+      return;
+    Tags.push_back(Tag);
+  }
+
+  if (const auto *NS = dyn_cast<NamespaceDecl>(D)) {
+    if (!NS->isInline()) {
+      S.Diag(Attr.getLoc(), diag::warn_attr_abi_tag_namespace) << 0;
+      return;
+    }
+    if (NS->isAnonymousNamespace()) {
+      S.Diag(Attr.getLoc(), diag::warn_attr_abi_tag_namespace) << 1;
+      return;
+    }
+    if (Attr.getNumArgs() == 0)
+      Tags.push_back(NS->getName());
+  } else if (!checkAttributeAtLeastNumArgs(S, Attr, 1))
+    return;
+
+  // Store tags sorted and without duplicates.
+  std::sort(Tags.begin(), Tags.end());
+  Tags.erase(std::unique(Tags.begin(), Tags.end()), Tags.end());
+
+  D->addAttr(::new (S.Context)
+             AbiTagAttr(Attr.getRange(), S.Context, Tags.data(), Tags.size(),
+                        Attr.getAttributeSpellingListIndex()));
+}
+
 static void handleARMInterruptAttr(Sema &S, Decl *D,
                                    const AttributeList &Attr) {
   // Check the attribute arguments.
@@ -4570,17 +4839,90 @@ static void handleMipsInterruptAttr(Sema &S, Decl *D,
       Attr.getLoc(), S.Context, Kind, Attr.getAttributeSpellingListIndex()));
 }
 
+static void handleAnyX86InterruptAttr(Sema &S, Decl *D,
+                                      const AttributeList &Attr) {
+  // Semantic checks for a function with the 'interrupt' attribute.
+  // a) Must be a function.
+  // b) Must have the 'void' return type.
+  // c) Must take 1 or 2 arguments.
+  // d) The 1st argument must be a pointer.
+  // e) The 2nd argument (if any) must be an unsigned integer.
+  if (!isFunctionOrMethod(D) || !hasFunctionProto(D) || isInstanceMethod(D) ||
+      CXXMethodDecl::isStaticOverloadedOperator(
+          cast<NamedDecl>(D)->getDeclName().getCXXOverloadedOperator())) {
+    S.Diag(Attr.getLoc(), diag::warn_attribute_wrong_decl_type)
+        << Attr.getName() << ExpectedFunctionWithProtoType;
+    return;
+  }
+  // Interrupt handler must have void return type.
+  if (!getFunctionOrMethodResultType(D)->isVoidType()) {
+    S.Diag(getFunctionOrMethodResultSourceRange(D).getBegin(),
+           diag::err_anyx86_interrupt_attribute)
+        << (S.Context.getTargetInfo().getTriple().getArch() == llvm::Triple::x86
+                ? 0
+                : 1)
+        << 0;
+    return;
+  }
+  // Interrupt handler must have 1 or 2 parameters.
+  unsigned NumParams = getFunctionOrMethodNumParams(D);
+  if (NumParams < 1 || NumParams > 2) {
+    S.Diag(D->getLocStart(), diag::err_anyx86_interrupt_attribute)
+        << (S.Context.getTargetInfo().getTriple().getArch() == llvm::Triple::x86
+                ? 0
+                : 1)
+        << 1;
+    return;
+  }
+  // The first argument must be a pointer.
+  if (!getFunctionOrMethodParamType(D, 0)->isPointerType()) {
+    S.Diag(getFunctionOrMethodParamRange(D, 0).getBegin(),
+           diag::err_anyx86_interrupt_attribute)
+        << (S.Context.getTargetInfo().getTriple().getArch() == llvm::Triple::x86
+                ? 0
+                : 1)
+        << 2;
+    return;
+  }
+  // The second argument, if present, must be an unsigned integer.
+  unsigned TypeSize =
+      S.Context.getTargetInfo().getTriple().getArch() == llvm::Triple::x86_64
+          ? 64
+          : 32;
+  if (NumParams == 2 &&
+      (!getFunctionOrMethodParamType(D, 1)->isUnsignedIntegerType() ||
+       S.Context.getTypeSize(getFunctionOrMethodParamType(D, 1)) != TypeSize)) {
+    S.Diag(getFunctionOrMethodParamRange(D, 1).getBegin(),
+           diag::err_anyx86_interrupt_attribute)
+        << (S.Context.getTargetInfo().getTriple().getArch() == llvm::Triple::x86
+                ? 0
+                : 1)
+        << 3 << S.Context.getIntTypeForBitwidth(TypeSize, /*Signed=*/false);
+    return;
+  }
+  D->addAttr(::new (S.Context) AnyX86InterruptAttr(
+      Attr.getLoc(), S.Context, Attr.getAttributeSpellingListIndex()));
+  D->addAttr(UsedAttr::CreateImplicit(S.Context));
+}
+
 static void handleInterruptAttr(Sema &S, Decl *D, const AttributeList &Attr) {
   // Dispatch the interrupt attribute based on the current target.
-  if (S.Context.getTargetInfo().getTriple().getArch() == llvm::Triple::msp430)
+  switch (S.Context.getTargetInfo().getTriple().getArch()) {
+  case llvm::Triple::msp430:
     handleMSP430InterruptAttr(S, D, Attr);
-  else if (S.Context.getTargetInfo().getTriple().getArch() ==
-               llvm::Triple::mipsel ||
-           S.Context.getTargetInfo().getTriple().getArch() ==
-               llvm::Triple::mips)
+    break;
+  case llvm::Triple::mipsel:
+  case llvm::Triple::mips:
     handleMipsInterruptAttr(S, D, Attr);
-  else
+    break;
+  case llvm::Triple::x86:
+  case llvm::Triple::x86_64:
+    handleAnyX86InterruptAttr(S, D, Attr);
+    break;
+  default:
     handleARMInterruptAttr(S, D, Attr);
+    break;
+  }
 }
 
 static void handleAMDGPUNumVGPRAttr(Sema &S, Decl *D,
@@ -4634,6 +4976,24 @@ static void handleX86ForceAlignArgPointerAttr(Sema &S, Decl *D,
                                         Attr.getAttributeSpellingListIndex()));
 }
 
+static void handleLayoutVersion(Sema &S, Decl *D, const AttributeList &Attr) {
+  uint32_t Version;
+  Expr *VersionExpr = static_cast<Expr *>(Attr.getArgAsExpr(0));
+  if (!checkUInt32Argument(S, Attr, Attr.getArgAsExpr(0), Version))
+    return;
+
+  // TODO: Investigate what happens with the next major version of MSVC.
+  if (Version != LangOptions::MSVC2015) {
+    S.Diag(Attr.getLoc(), diag::err_attribute_argument_out_of_bounds)
+        << Attr.getName() << Version << VersionExpr->getSourceRange();
+    return;
+  }
+
+  D->addAttr(::new (S.Context)
+                 LayoutVersionAttr(Attr.getRange(), S.Context, Version,
+                                   Attr.getAttributeSpellingListIndex()));
+}
+
 DLLImportAttr *Sema::mergeDLLImportAttr(Decl *D, SourceRange Range,
                                         unsigned AttrSpellingListIndex) {
   if (D->hasAttr<DLLExportAttr>()) {
@@ -4827,19 +5187,34 @@ static void handleDeprecatedAttr(Sema &S, Decl *D, const AttributeList &Attr) {
     }
   }
 
+  // Handle the cases where the attribute has a text message.
+  StringRef Str, Replacement;
+  if (Attr.isArgExpr(0) && Attr.getArgAsExpr(0) &&
+      !S.checkStringLiteralArgumentAttr(Attr, 0, Str))
+    return;
+
+  // Only support a single optional message for Declspec and CXX11.
+  if (Attr.isDeclspecAttribute() || Attr.isCXX11Attribute())
+    checkAttributeAtMostNumArgs(S, Attr, 1);
+  else if (Attr.isArgExpr(1) && Attr.getArgAsExpr(1) &&
+           !S.checkStringLiteralArgumentAttr(Attr, 1, Replacement))
+    return;
+
   if (!S.getLangOpts().CPlusPlus14)
     if (Attr.isCXX11Attribute() &&
         !(Attr.hasScope() && Attr.getScopeName()->isStr("gnu")))
-      S.Diag(Attr.getLoc(), diag::ext_deprecated_attr_is_a_cxx14_extension);
+      S.Diag(Attr.getLoc(), diag::ext_cxx14_attr) << Attr.getName();
 
-  handleAttrWithMessage<DeprecatedAttr>(S, D, Attr);
+  D->addAttr(::new (S.Context) DeprecatedAttr(Attr.getRange(), S.Context, Str,
+                                   Replacement,
+                                   Attr.getAttributeSpellingListIndex()));
 }
 
 static void handleNoSanitizeAttr(Sema &S, Decl *D, const AttributeList &Attr) {
   if (!checkAttributeAtLeastNumArgs(S, Attr, 1))
     return;
 
-  std::vector<std::string> Sanitizers;
+  std::vector<StringRef> Sanitizers;
 
   for (unsigned I = 0, E = Attr.getNumArgs(); I != E; ++I) {
     StringRef SanitizerName;
@@ -4863,8 +5238,8 @@ static void handleNoSanitizeSpecificAttr(Sema &S, Decl *D,
                                          const AttributeList &Attr) {
   StringRef AttrName = Attr.getName()->getName();
   normalizeName(AttrName);
-  std::string SanitizerName =
-      llvm::StringSwitch<std::string>(AttrName)
+  StringRef SanitizerName =
+      llvm::StringSwitch<StringRef>(AttrName)
           .Case("no_address_safety_analysis", "address")
           .Case("no_sanitize_address", "address")
           .Case("no_sanitize_thread", "thread")
@@ -4882,6 +5257,15 @@ static void handleInternalLinkageAttr(Sema &S, Decl *D,
     D->addAttr(Internal);
 }
 
+static void handleOpenCLNoSVMAttr(Sema &S, Decl *D, const AttributeList &Attr) {
+  if (S.LangOpts.OpenCLVersion != 200)
+    S.Diag(Attr.getLoc(), diag::err_attribute_requires_opencl_version)
+        << Attr.getName() << "2.0" << 0;
+  else
+    S.Diag(Attr.getLoc(), diag::warn_opencl_attr_deprecated_ignored)
+        << Attr.getName() << "2.0";
+}
+
 /// Handles semantic checking for features that are common to all attributes,
 /// such as checking whether a parameter was properly specified, or the correct
 /// number of arguments were passed, etc.
@@ -4923,6 +5307,40 @@ static bool handleCommonAttributeFeatures(Sema &S, Scope *scope, Decl *D,
   return false;
 }
 
+static void handleOpenCLAccessAttr(Sema &S, Decl *D,
+                                   const AttributeList &Attr) {
+  if (D->isInvalidDecl())
+    return;
+
+  // Check if there is only one access qualifier.
+  if (D->hasAttr<OpenCLAccessAttr>()) {
+    S.Diag(Attr.getLoc(), diag::err_opencl_multiple_access_qualifiers)
+        << D->getSourceRange();
+    D->setInvalidDecl(true);
+    return;
+  }
+
+  // OpenCL v2.0 s6.6 - read_write can be used for image types to specify that an
+  // image object can be read and written.
+  // OpenCL v2.0 s6.13.6 - A kernel cannot read from and write to the same pipe
+  // object. Using the read_write (or __read_write) qualifier with the pipe
+  // qualifier is a compilation error.
+  if (const ParmVarDecl *PDecl = dyn_cast<ParmVarDecl>(D)) {
+    const Type *DeclTy = PDecl->getType().getCanonicalType().getTypePtr();
+    if (Attr.getName()->getName().find("read_write") != StringRef::npos) {
+      if (S.getLangOpts().OpenCLVersion < 200 || DeclTy->isPipeType()) {
+        S.Diag(Attr.getLoc(), diag::err_opencl_invalid_read_write)
+            << Attr.getName() << PDecl->getType() << DeclTy->isImageType();
+        D->setInvalidDecl(true);
+        return;
+      }
+    }
+  }
+
+  D->addAttr(::new (S.Context) OpenCLAccessAttr(
+      Attr.getRange(), S.Context, Attr.getAttributeSpellingListIndex()));
+}
+
 //===----------------------------------------------------------------------===//
 // Top Level Sema Entry Points
 //===----------------------------------------------------------------------===//
@@ -4958,8 +5376,13 @@ static void ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D,
 
   switch (Attr.getKind()) {
   default:
-    // Type attributes are handled elsewhere; silently move on.
-    assert(Attr.isTypeAttr() && "Non-type attribute not handled");
+    if (!Attr.isStmtAttr()) {
+      // Type attributes are handled elsewhere; silently move on.
+      assert(Attr.isTypeAttr() && "Non-type attribute not handled");
+      break;
+    }
+    S.Diag(Attr.getLoc(), diag::err_stmt_attribute_invalid_on_decl)
+        << Attr.getName() << D->getLocation();
     break;
   case AttributeList::AT_Interrupt:
     handleInterruptAttr(S, D, Attr);
@@ -4993,6 +5416,9 @@ static void ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D,
   case AttributeList::AT_IBOutletCollection:
     handleIBOutletCollection(S, D, Attr);
     break;
+  case AttributeList::AT_IFunc:
+    handleIFuncAttr(S, D, Attr);
+    break;
   case AttributeList::AT_Alias:
     handleAliasAttr(S, D, Attr);
     break;
@@ -5141,53 +5567,45 @@ static void ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D,
   case AttributeList::AT_VecReturn:
     handleVecReturnAttr(S, D, Attr);
     break;
-
   case AttributeList::AT_ObjCOwnership:
     handleObjCOwnershipAttr(S, D, Attr);
     break;
   case AttributeList::AT_ObjCPreciseLifetime:
     handleObjCPreciseLifetimeAttr(S, D, Attr);
     break;
-
   case AttributeList::AT_ObjCReturnsInnerPointer:
     handleObjCReturnsInnerPointerAttr(S, D, Attr);
     break;
-
   case AttributeList::AT_ObjCRequiresSuper:
     handleObjCRequiresSuperAttr(S, D, Attr);
     break;
-
   case AttributeList::AT_ObjCBridge:
     handleObjCBridgeAttr(S, scope, D, Attr);
     break;
-
   case AttributeList::AT_ObjCBridgeMutable:
     handleObjCBridgeMutableAttr(S, scope, D, Attr);
     break;
-
   case AttributeList::AT_ObjCBridgeRelated:
     handleObjCBridgeRelatedAttr(S, scope, D, Attr);
     break;
-
   case AttributeList::AT_ObjCDesignatedInitializer:
     handleObjCDesignatedInitializer(S, D, Attr);
     break;
-
   case AttributeList::AT_ObjCRuntimeName:
     handleObjCRuntimeName(S, D, Attr);
     break;
-
+   case AttributeList::AT_ObjCRuntimeVisible:
+    handleSimpleAttribute<ObjCRuntimeVisibleAttr>(S, D, Attr);
+    break;
   case AttributeList::AT_ObjCBoxable:
     handleObjCBoxable(S, D, Attr);
     break;
-          
   case AttributeList::AT_CFAuditedTransfer:
     handleCFAuditedTransferAttr(S, D, Attr);
     break;
   case AttributeList::AT_CFUnknownTransfer:
     handleCFUnknownTransferAttr(S, D, Attr);
     break;
-
   case AttributeList::AT_CFConsumed:
   case AttributeList::AT_NSConsumed:
     handleNSConsumedAttr(S, D, Attr);
@@ -5195,7 +5613,6 @@ static void ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D,
   case AttributeList::AT_NSConsumesSelf:
     handleSimpleAttribute<NSConsumesSelfAttr>(S, D, Attr);
     break;
-
   case AttributeList::AT_NSReturnsAutoreleased:
   case AttributeList::AT_NSReturnsNotRetained:
   case AttributeList::AT_CFReturnsNotRetained:
@@ -5212,11 +5629,9 @@ static void ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D,
   case AttributeList::AT_VecTypeHint:
     handleVecTypeHint(S, D, Attr);
     break;
-
   case AttributeList::AT_InitPriority:
     handleInitPriorityAttr(S, D, Attr);
     break;
-
   case AttributeList::AT_Packed:
     handlePackedAttr(S, D, Attr);
     break;
@@ -5242,7 +5657,7 @@ static void ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D,
     handleSimpleAttribute<ObjCRequiresPropertyDefsAttr>(S, D, Attr);
     break;
   case AttributeList::AT_Unused:
-    handleSimpleAttribute<UnusedAttr>(S, D, Attr);
+    handleUnusedAttr(S, D, Attr);
     break;
   case AttributeList::AT_ReturnsTwice:
     handleSimpleAttribute<ReturnsTwiceAttr>(S, D, Attr);
@@ -5324,24 +5739,48 @@ static void ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D,
   case AttributeList::AT_FastCall:
   case AttributeList::AT_ThisCall:
   case AttributeList::AT_Pascal:
+  case AttributeList::AT_SwiftCall:
   case AttributeList::AT_VectorCall:
   case AttributeList::AT_MSABI:
   case AttributeList::AT_SysVABI:
   case AttributeList::AT_Pcs:
   case AttributeList::AT_IntelOclBicc:
+  case AttributeList::AT_PreserveMost:
+  case AttributeList::AT_PreserveAll:
     handleCallConvAttr(S, D, Attr);
     break;
   case AttributeList::AT_OpenCLKernel:
     handleSimpleAttribute<OpenCLKernelAttr>(S, D, Attr);
     break;
-  case AttributeList::AT_OpenCLImageAccess:
-    handleSimpleAttribute<OpenCLImageAccessAttr>(S, D, Attr);
+  case AttributeList::AT_OpenCLAccess:
+    handleOpenCLAccessAttr(S, D, Attr);
+    break;
+  case AttributeList::AT_OpenCLNoSVM:
+    handleOpenCLNoSVMAttr(S, D, Attr);
+    break;
+  case AttributeList::AT_SwiftContext:
+    handleParameterABIAttr(S, D, Attr, ParameterABI::SwiftContext);
+    break;
+  case AttributeList::AT_SwiftErrorResult:
+    handleParameterABIAttr(S, D, Attr, ParameterABI::SwiftErrorResult);
+    break;
+  case AttributeList::AT_SwiftIndirectResult:
+    handleParameterABIAttr(S, D, Attr, ParameterABI::SwiftIndirectResult);
     break;
   case AttributeList::AT_InternalLinkage:
     handleInternalLinkageAttr(S, D, Attr);
     break;
+  case AttributeList::AT_LTOVisibilityPublic:
+    handleSimpleAttribute<LTOVisibilityPublicAttr>(S, D, Attr);
+    break;
 
   // Microsoft attributes:
+  case AttributeList::AT_EmptyBases:
+    handleSimpleAttribute<EmptyBasesAttr>(S, D, Attr);
+    break;
+  case AttributeList::AT_LayoutVersion:
+    handleLayoutVersion(S, D, Attr);
+    break;
   case AttributeList::AT_MSNoVTable:
     handleSimpleAttribute<MSNoVTableAttr>(S, D, Attr);
     break;
@@ -5361,6 +5800,10 @@ static void ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D,
     handleDeclspecThreadAttr(S, D, Attr);
     break;
 
+  case AttributeList::AT_AbiTag:
+    handleAbiTagAttr(S, D, Attr);
+    break;
+
   // Thread safety attributes:
   case AttributeList::AT_AssertExclusiveLock:
     handleAssertExclusiveLockAttr(S, D, Attr);
@@ -5466,6 +5909,13 @@ static void ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D,
   case AttributeList::AT_TypeTagForDatatype:
     handleTypeTagForDatatypeAttr(S, D, Attr);
     break;
+  case AttributeList::AT_RenderScriptKernel:
+    handleSimpleAttribute<RenderScriptKernelAttr>(S, D, Attr);
+    break;
+  // XRay attributes.
+  case AttributeList::AT_XRayInstrument:
+    handleSimpleAttribute<XRayInstrumentAttr>(S, D, Attr);
+    break;
   }
 }
 
@@ -5744,7 +6194,6 @@ static void handleDelayedForbiddenType(Sema &S, DelayedDiagnostic &diag,
   diag.Triggered = true;
 }
 
-
 static bool isDeclDeprecated(Decl *D) {
   do {
     if (D->isDeprecated())
@@ -5769,6 +6218,34 @@ static bool isDeclUnavailable(Decl *D) {
   return false;
 }
 
+static const AvailabilityAttr *getAttrForPlatform(ASTContext &Context,
+                                                  const Decl *D) {
+  // Check each AvailabilityAttr to find the one for this platform.
+  for (const auto *A : D->attrs()) {
+    if (const auto *Avail = dyn_cast<AvailabilityAttr>(A)) {
+      // FIXME: this is copied from CheckAvailability. We should try to
+      // de-duplicate.
+
+      // Check if this is an App Extension "platform", and if so chop off
+      // the suffix for matching with the actual platform.
+      StringRef ActualPlatform = Avail->getPlatform()->getName();
+      StringRef RealizedPlatform = ActualPlatform;
+      if (Context.getLangOpts().AppExt) {
+        size_t suffix = RealizedPlatform.rfind("_app_extension");
+        if (suffix != StringRef::npos)
+          RealizedPlatform = RealizedPlatform.slice(0, suffix);
+      }
+
+      StringRef TargetPlatform = Context.getTargetInfo().getPlatformName();
+
+      // Match the platform name.
+      if (RealizedPlatform == TargetPlatform)
+        return Avail;
+    }
+  }
+  return nullptr;
+}
+
 static void DoEmitAvailabilityWarning(Sema &S, Sema::AvailabilityDiagnostic K,
                                       Decl *Ctx, const NamedDecl *D,
                                       StringRef Message, SourceLocation Loc,
@@ -5850,7 +6327,6 @@ static void DoEmitAvailabilityWarning(Sema &S, Sema::AvailabilityDiagnostic K,
         }
       }
     }
-
     break;
 
   case Sema::AD_Partial:
@@ -5862,23 +6338,61 @@ static void DoEmitAvailabilityWarning(Sema &S, Sema::AvailabilityDiagnostic K,
     break;
   }
 
+  CharSourceRange UseRange;
+  StringRef Replacement;
+  if (K == Sema::AD_Deprecation) {
+    if (auto attr = D->getAttr<DeprecatedAttr>())
+      Replacement = attr->getReplacement();
+    if (auto attr = getAttrForPlatform(S.Context, D))
+      Replacement = attr->getReplacement();
+
+    if (!Replacement.empty())
+      UseRange =
+          CharSourceRange::getCharRange(Loc, S.getLocForEndOfToken(Loc));
+  }
+
   if (!Message.empty()) {
-    S.Diag(Loc, diag_message) << D << Message;
+    S.Diag(Loc, diag_message) << D << Message
+      << (UseRange.isValid() ?
+          FixItHint::CreateReplacement(UseRange, Replacement) : FixItHint());
     if (ObjCProperty)
       S.Diag(ObjCProperty->getLocation(), diag::note_property_attribute)
           << ObjCProperty->getDeclName() << property_note_select;
   } else if (!UnknownObjCClass) {
-    S.Diag(Loc, diag) << D;
+    S.Diag(Loc, diag) << D
+      << (UseRange.isValid() ?
+          FixItHint::CreateReplacement(UseRange, Replacement) : FixItHint());
     if (ObjCProperty)
       S.Diag(ObjCProperty->getLocation(), diag::note_property_attribute)
           << ObjCProperty->getDeclName() << property_note_select;
   } else {
-    S.Diag(Loc, diag_fwdclass_message) << D;
+    S.Diag(Loc, diag_fwdclass_message) << D
+      << (UseRange.isValid() ?
+          FixItHint::CreateReplacement(UseRange, Replacement) : FixItHint());
     S.Diag(UnknownObjCClass->getLocation(), diag::note_forward_class);
   }
 
-  S.Diag(D->getLocation(), diag_available_here)
-      << D << available_here_select_kind;
+  // The declaration can have multiple availability attributes, we are looking
+  // at one of them.
+  const AvailabilityAttr *A = getAttrForPlatform(S.Context, D);
+  if (A && A->isInherited()) {
+    for (const Decl *Redecl = D->getMostRecentDecl(); Redecl;
+         Redecl = Redecl->getPreviousDecl()) {
+      const AvailabilityAttr *AForRedecl = getAttrForPlatform(S.Context,
+                                                              Redecl);
+      if (AForRedecl && !AForRedecl->isInherited()) {
+        // If D is a declaration with inherited attributes, the note should
+        // point to the declaration with actual attributes.
+        S.Diag(Redecl->getLocation(), diag_available_here) << D
+            << available_here_select_kind;
+        break;
+      }
+    }
+  }
+  else
+    S.Diag(D->getLocation(), diag_available_here)
+        << D << available_here_select_kind;
+
   if (K == Sema::AD_Partial)
     S.Diag(Loc, diag::note_partial_availability_silence) << D;
 }
diff --git a/contrib/llvm/tools/clang/lib/Sema/SemaDeclCXX.cpp b/contrib/llvm/tools/clang/lib/Sema/SemaDeclCXX.cpp
index 82d81a85fa90..e161c87f1739 100644
--- a/contrib/llvm/tools/clang/lib/Sema/SemaDeclCXX.cpp
+++ b/contrib/llvm/tools/clang/lib/Sema/SemaDeclCXX.cpp
@@ -471,7 +471,7 @@ bool Sema::MergeCXXFunctionDecl(FunctionDecl *New, FunctionDecl *Old,
       continue;
     }
 
-    // We found our guy.
+    // We found the right previous declaration.
     break;
   }
 
@@ -3356,34 +3356,7 @@ BuildImplicitBaseInitializer(Sema &SemaRef, CXXConstructorDecl *Constructor,
   ExprResult BaseInit;
   
   switch (ImplicitInitKind) {
-  case IIK_Inherit: {
-    const CXXRecordDecl *Inherited =
-        Constructor->getInheritedConstructor()->getParent();
-    const CXXRecordDecl *Base = BaseSpec->getType()->getAsCXXRecordDecl();
-    if (Base && Inherited->getCanonicalDecl() == Base->getCanonicalDecl()) {
-      // C++11 [class.inhctor]p8:
-      //   Each expression in the expression-list is of the form
-      //   static_cast<T&&>(p), where p is the name of the corresponding
-      //   constructor parameter and T is the declared type of p.
-      SmallVector<Expr*, 16> Args;
-      for (unsigned I = 0, E = Constructor->getNumParams(); I != E; ++I) {
-        ParmVarDecl *PD = Constructor->getParamDecl(I);
-        ExprResult ArgExpr =
-            SemaRef.BuildDeclRefExpr(PD, PD->getType().getNonReferenceType(),
-                                     VK_LValue, SourceLocation());
-        if (ArgExpr.isInvalid())
-          return true;
-        Args.push_back(CastForMoving(SemaRef, ArgExpr.get(), PD->getType()));
-      }
-
-      InitializationKind InitKind = InitializationKind::CreateDirect(
-          Constructor->getLocation(), SourceLocation(), SourceLocation());
-      InitializationSequence InitSeq(SemaRef, InitEntity, InitKind, Args);
-      BaseInit = InitSeq.Perform(SemaRef, InitEntity, InitKind, Args);
-      break;
-    }
-  }
-  // Fall through.
+  case IIK_Inherit:
   case IIK_Default: {
     InitializationKind InitKind
       = InitializationKind::CreateDefault(Constructor->getLocation());
@@ -3694,12 +3667,12 @@ struct BaseAndFieldInfo {
   BaseAndFieldInfo(Sema &S, CXXConstructorDecl *Ctor, bool ErrorsInInits)
     : S(S), Ctor(Ctor), AnyErrorsInInits(ErrorsInInits) {
     bool Generated = Ctor->isImplicit() || Ctor->isDefaulted();
-    if (Generated && Ctor->isCopyConstructor())
+    if (Ctor->getInheritedConstructor())
+      IIK = IIK_Inherit;
+    else if (Generated && Ctor->isCopyConstructor())
       IIK = IIK_Copy;
     else if (Generated && Ctor->isMoveConstructor())
       IIK = IIK_Move;
-    else if (Ctor->getInheritedConstructor())
-      IIK = IIK_Inherit;
     else
       IIK = IIK_Default;
   }
@@ -4774,7 +4747,6 @@ void Sema::checkClassLevelDLLAttribute(CXXRecordDecl *Class) {
 
   // The class is either imported or exported.
   const bool ClassExported = ClassAttr->getKind() == attr::DLLExport;
-  const bool ClassImported = !ClassExported;
 
   TemplateSpecializationKind TSK = Class->getTemplateSpecializationKind();
 
@@ -4809,11 +4781,20 @@ void Sema::checkClassLevelDLLAttribute(CXXRecordDecl *Class) {
         if (!Context.getTargetInfo().getCXXABI().isMicrosoft())
           continue;
 
-        // MSVC versions before 2015 don't export the move assignment operators,
-        // so don't attempt to import them if we have a definition.
-        if (ClassImported && MD->isMoveAssignmentOperator() &&
+        // MSVC versions before 2015 don't export the move assignment operators
+        // and move constructor, so don't attempt to import/export them if
+        // we have a definition.
+        auto *Ctor = dyn_cast<CXXConstructorDecl>(MD);
+        if ((MD->isMoveAssignmentOperator() ||
+             (Ctor && Ctor->isMoveConstructor())) &&
             !getLangOpts().isCompatibleWithMSVC(LangOptions::MSVC2015))
           continue;
+
+        // MSVC2015 doesn't export trivial defaulted x-tor but copy assign
+        // operator is exported anyway.
+        if (getLangOpts().isCompatibleWithMSVC(LangOptions::MSVC2015) &&
+            (Ctor || isa<CXXDestructorDecl>(MD)) && MD->isTrivial())
+          continue;
       }
     }
 
@@ -4887,6 +4868,33 @@ void Sema::propagateDLLAttrToBaseClassTemplate(
   }
 }
 
+static void DefineImplicitSpecialMember(Sema &S, CXXMethodDecl *MD,
+                                        SourceLocation DefaultLoc) {
+  switch (S.getSpecialMember(MD)) {
+  case Sema::CXXDefaultConstructor:
+    S.DefineImplicitDefaultConstructor(DefaultLoc,
+                                       cast<CXXConstructorDecl>(MD));
+    break;
+  case Sema::CXXCopyConstructor:
+    S.DefineImplicitCopyConstructor(DefaultLoc, cast<CXXConstructorDecl>(MD));
+    break;
+  case Sema::CXXCopyAssignment:
+    S.DefineImplicitCopyAssignment(DefaultLoc, MD);
+    break;
+  case Sema::CXXDestructor:
+    S.DefineImplicitDestructor(DefaultLoc, cast<CXXDestructorDecl>(MD));
+    break;
+  case Sema::CXXMoveConstructor:
+    S.DefineImplicitMoveConstructor(DefaultLoc, cast<CXXConstructorDecl>(MD));
+    break;
+  case Sema::CXXMoveAssignment:
+    S.DefineImplicitMoveAssignment(DefaultLoc, MD);
+    break;
+  case Sema::CXXInvalid:
+    llvm_unreachable("Invalid special member.");
+  }
+}
+
 /// \brief Perform semantic checks on a class definition that has been
 /// completing, introducing implicitly-declared members, checking for
 /// abstract types, etc.
@@ -4982,8 +4990,8 @@ void Sema::CheckCompletedCXXClass(CXXRecordDecl *Record) {
 
       // For an explicitly defaulted or deleted special member, we defer
       // determining triviality until the class is complete. That time is now!
+      CXXSpecialMember CSM = getSpecialMember(M);
       if (!M->isImplicit() && !M->isUserProvided()) {
-        CXXSpecialMember CSM = getSpecialMember(M);
         if (CSM != CXXInvalid) {
           M->setTrivial(SpecialMemberIsTrivial(M, CSM));
 
@@ -4991,6 +4999,20 @@ void Sema::CheckCompletedCXXClass(CXXRecordDecl *Record) {
           Record->finishedDefaultedOrDeletedMember(M);
         }
       }
+
+      if (!M->isInvalidDecl() && M->isExplicitlyDefaulted() &&
+          M->hasAttr<DLLExportAttr>()) {
+        if (getLangOpts().isCompatibleWithMSVC(LangOptions::MSVC2015) &&
+            M->isTrivial() &&
+            (CSM == CXXDefaultConstructor || CSM == CXXCopyConstructor ||
+             CSM == CXXDestructor))
+          M->dropAttr<DLLExportAttr>();
+
+        if (M->hasAttr<DLLExportAttr>()) {
+          DefineImplicitSpecialMember(*this, M, M->getLocation());
+          ActOnFinishInlineFunctionDef(M);
+        }
+      }
     }
   }
 
@@ -5016,15 +5038,6 @@ void Sema::CheckCompletedCXXClass(CXXRecordDecl *Record) {
     Diag(Record->getLocation(), diag::warn_cxx_ms_struct);
   }
 
-  // Declare inheriting constructors. We do this eagerly here because:
-  // - The standard requires an eager diagnostic for conflicting inheriting
-  //   constructors from different classes.
-  // - The lazy declaration of the other implicit constructors is so as to not
-  //   waste space and performance on classes that are not meant to be
-  //   instantiated (e.g. meta-functions). This doesn't apply to classes that
-  //   have inheriting constructors.
-  DeclareInheritingConstructors(Record);
-
   checkClassLevelDLLAttribute(Record);
 }
 
@@ -5058,11 +5071,108 @@ static Sema::SpecialMemberOverloadResult *lookupCallFromSpecialMember(
                                LHSQuals & Qualifiers::Volatile);
 }
 
+class Sema::InheritedConstructorInfo {
+  Sema &S;
+  SourceLocation UseLoc;
+
+  /// A mapping from the base classes through which the constructor was
+  /// inherited to the using shadow declaration in that base class (or a null
+  /// pointer if the constructor was declared in that base class).
+  llvm::DenseMap<CXXRecordDecl *, ConstructorUsingShadowDecl *>
+      InheritedFromBases;
+
+public:
+  InheritedConstructorInfo(Sema &S, SourceLocation UseLoc,
+                           ConstructorUsingShadowDecl *Shadow)
+      : S(S), UseLoc(UseLoc) {
+    bool DiagnosedMultipleConstructedBases = false;
+    CXXRecordDecl *ConstructedBase = nullptr;
+    UsingDecl *ConstructedBaseUsing = nullptr;
+
+    // Find the set of such base class subobjects and check that there's a
+    // unique constructed subobject.
+    for (auto *D : Shadow->redecls()) {
+      auto *DShadow = cast<ConstructorUsingShadowDecl>(D);
+      auto *DNominatedBase = DShadow->getNominatedBaseClass();
+      auto *DConstructedBase = DShadow->getConstructedBaseClass();
+
+      InheritedFromBases.insert(
+          std::make_pair(DNominatedBase->getCanonicalDecl(),
+                         DShadow->getNominatedBaseClassShadowDecl()));
+      if (DShadow->constructsVirtualBase())
+        InheritedFromBases.insert(
+            std::make_pair(DConstructedBase->getCanonicalDecl(),
+                           DShadow->getConstructedBaseClassShadowDecl()));
+      else
+        assert(DNominatedBase == DConstructedBase);
+
+      // [class.inhctor.init]p2:
+      //   If the constructor was inherited from multiple base class subobjects
+      //   of type B, the program is ill-formed.
+      if (!ConstructedBase) {
+        ConstructedBase = DConstructedBase;
+        ConstructedBaseUsing = D->getUsingDecl();
+      } else if (ConstructedBase != DConstructedBase &&
+                 !Shadow->isInvalidDecl()) {
+        if (!DiagnosedMultipleConstructedBases) {
+          S.Diag(UseLoc, diag::err_ambiguous_inherited_constructor)
+              << Shadow->getTargetDecl();
+          S.Diag(ConstructedBaseUsing->getLocation(),
+               diag::note_ambiguous_inherited_constructor_using)
+              << ConstructedBase;
+          DiagnosedMultipleConstructedBases = true;
+        }
+        S.Diag(D->getUsingDecl()->getLocation(),
+               diag::note_ambiguous_inherited_constructor_using)
+            << DConstructedBase;
+      }
+    }
+
+    if (DiagnosedMultipleConstructedBases)
+      Shadow->setInvalidDecl();
+  }
+
+  /// Find the constructor to use for inherited construction of a base class,
+  /// and whether that base class constructor inherits the constructor from a
+  /// virtual base class (in which case it won't actually invoke it).
+  std::pair<CXXConstructorDecl *, bool>
+  findConstructorForBase(CXXRecordDecl *Base, CXXConstructorDecl *Ctor) const {
+    auto It = InheritedFromBases.find(Base->getCanonicalDecl());
+    if (It == InheritedFromBases.end())
+      return std::make_pair(nullptr, false);
+
+    // This is an intermediary class.
+    if (It->second)
+      return std::make_pair(
+          S.findInheritingConstructor(UseLoc, Ctor, It->second),
+          It->second->constructsVirtualBase());
+
+    // This is the base class from which the constructor was inherited.
+    return std::make_pair(Ctor, false);
+  }
+};
+
 /// Is the special member function which would be selected to perform the
 /// specified operation on the specified class type a constexpr constructor?
-static bool specialMemberIsConstexpr(Sema &S, CXXRecordDecl *ClassDecl,
-                                     Sema::CXXSpecialMember CSM,
-                                     unsigned Quals, bool ConstRHS) {
+static bool
+specialMemberIsConstexpr(Sema &S, CXXRecordDecl *ClassDecl,
+                         Sema::CXXSpecialMember CSM, unsigned Quals,
+                         bool ConstRHS,
+                         CXXConstructorDecl *InheritedCtor = nullptr,
+                         Sema::InheritedConstructorInfo *Inherited = nullptr) {
+  // If we're inheriting a constructor, see if we need to call it for this base
+  // class.
+  if (InheritedCtor) {
+    assert(CSM == Sema::CXXDefaultConstructor);
+    auto BaseCtor =
+        Inherited->findConstructorForBase(ClassDecl, InheritedCtor).first;
+    if (BaseCtor)
+      return BaseCtor->isConstexpr();
+  }
+
+  if (CSM == Sema::CXXDefaultConstructor)
+    return ClassDecl->hasConstexprDefaultConstructor();
+
   Sema::SpecialMemberOverloadResult *SMOR =
       lookupCallFromSpecialMember(S, ClassDecl, CSM, Quals, ConstRHS);
   if (!SMOR || !SMOR->getMethod())
@@ -5074,9 +5184,10 @@ static bool specialMemberIsConstexpr(Sema &S, CXXRecordDecl *ClassDecl,
 
 /// Determine whether the specified special member function would be constexpr
 /// if it were implicitly defined.
-static bool defaultedSpecialMemberIsConstexpr(Sema &S, CXXRecordDecl *ClassDecl,
-                                              Sema::CXXSpecialMember CSM,
-                                              bool ConstArg) {
+static bool defaultedSpecialMemberIsConstexpr(
+    Sema &S, CXXRecordDecl *ClassDecl, Sema::CXXSpecialMember CSM,
+    bool ConstArg, CXXConstructorDecl *InheritedCtor = nullptr,
+    Sema::InheritedConstructorInfo *Inherited = nullptr) {
   if (!S.getLangOpts().CPlusPlus11)
     return false;
 
@@ -5085,6 +5196,8 @@ static bool defaultedSpecialMemberIsConstexpr(Sema &S, CXXRecordDecl *ClassDecl,
   bool Ctor = true;
   switch (CSM) {
   case Sema::CXXDefaultConstructor:
+    if (Inherited)
+      break;
     // Since default constructor lookup is essentially trivial (and cannot
     // involve, for instance, template instantiation), we compute whether a
     // defaulted default constructor is constexpr directly within CXXRecordDecl.
@@ -5119,7 +5232,10 @@ static bool defaultedSpecialMemberIsConstexpr(Sema &S, CXXRecordDecl *ClassDecl,
   // will be initialized (if the constructor isn't deleted), we just don't know
   // which one.
   if (Ctor && ClassDecl->isUnion())
-    return true;
+    return CSM == Sema::CXXDefaultConstructor
+               ? ClassDecl->hasInClassInitializer() ||
+                     !ClassDecl->hasVariantMembers()
+               : true;
 
   //   -- the class shall not have any virtual base classes;
   if (Ctor && ClassDecl->getNumVBases())
@@ -5139,7 +5255,8 @@ static bool defaultedSpecialMemberIsConstexpr(Sema &S, CXXRecordDecl *ClassDecl,
     if (!BaseType) continue;
 
     CXXRecordDecl *BaseClassDecl = cast<CXXRecordDecl>(BaseType->getDecl());
-    if (!specialMemberIsConstexpr(S, BaseClassDecl, CSM, 0, ConstArg))
+    if (!specialMemberIsConstexpr(S, BaseClassDecl, CSM, 0, ConstArg,
+                                  InheritedCtor, Inherited))
       return false;
   }
 
@@ -5153,6 +5270,8 @@ static bool defaultedSpecialMemberIsConstexpr(Sema &S, CXXRecordDecl *ClassDecl,
   for (const auto *F : ClassDecl->fields()) {
     if (F->isInvalidDecl())
       continue;
+    if (CSM == Sema::CXXDefaultConstructor && F->hasInClassInitializer())
+      continue;
     QualType BaseType = S.Context.getBaseElementType(F->getType());
     if (const RecordType *RecordTy = BaseType->getAs<RecordType>()) {
       CXXRecordDecl *FieldRecDecl = cast<CXXRecordDecl>(RecordTy->getDecl());
@@ -5160,6 +5279,8 @@ static bool defaultedSpecialMemberIsConstexpr(Sema &S, CXXRecordDecl *ClassDecl,
                                     BaseType.getCVRQualifiers(),
                                     ConstArg && !F->isMutable()))
         return false;
+    } else if (CSM == Sema::CXXDefaultConstructor) {
+      return false;
     }
   }
 
@@ -5187,7 +5308,8 @@ computeImplicitExceptionSpec(Sema &S, SourceLocation Loc, CXXMethodDecl *MD) {
   }
   assert(cast<CXXConstructorDecl>(MD)->getInheritedConstructor() &&
          "only special members have implicit exception specs");
-  return S.ComputeInheritingCtorExceptionSpec(cast<CXXConstructorDecl>(MD));
+  return S.ComputeInheritingCtorExceptionSpec(Loc,
+                                              cast<CXXConstructorDecl>(MD));
 }
 
 static FunctionProtoType::ExtProtoInfo getImplicitMethodEPI(Sema &S,
@@ -5384,7 +5506,7 @@ void Sema::CheckExplicitlyDefaultedSpecialMember(CXXMethodDecl *MD) {
       //   [For a] user-provided explicitly-defaulted function [...] if such a
       //   function is implicitly defined as deleted, the program is ill-formed.
       Diag(MD->getLocation(), diag::err_out_of_line_default_deletes) << CSM;
-      ShouldDeleteSpecialMember(MD, CSM, /*Diagnose*/true);
+      ShouldDeleteSpecialMember(MD, CSM, nullptr, /*Diagnose*/true);
       HadError = true;
     }
   }
@@ -5445,6 +5567,7 @@ struct SpecialMemberDeletionInfo {
   Sema &S;
   CXXMethodDecl *MD;
   Sema::CXXSpecialMember CSM;
+  Sema::InheritedConstructorInfo *ICI;
   bool Diagnose;
 
   // Properties of the special member, computed for convenience.
@@ -5454,11 +5577,11 @@ struct SpecialMemberDeletionInfo {
   bool AllFieldsAreConst;
 
   SpecialMemberDeletionInfo(Sema &S, CXXMethodDecl *MD,
-                            Sema::CXXSpecialMember CSM, bool Diagnose)
-    : S(S), MD(MD), CSM(CSM), Diagnose(Diagnose),
-      IsConstructor(false), IsAssignment(false), IsMove(false),
-      ConstArg(false), Loc(MD->getLocation()),
-      AllFieldsAreConst(true) {
+                            Sema::CXXSpecialMember CSM,
+                            Sema::InheritedConstructorInfo *ICI, bool Diagnose)
+      : S(S), MD(MD), CSM(CSM), ICI(ICI), Diagnose(Diagnose),
+        IsConstructor(false), IsAssignment(false), IsMove(false),
+        ConstArg(false), Loc(MD->getLocation()), AllFieldsAreConst(true) {
     switch (CSM) {
       case Sema::CXXDefaultConstructor:
       case Sema::CXXCopyConstructor:
@@ -5490,6 +5613,10 @@ struct SpecialMemberDeletionInfo {
 
   bool inUnion() const { return MD->getParent()->isUnion(); }
 
+  Sema::CXXSpecialMember getEffectiveCSM() {
+    return ICI ? Sema::CXXInvalid : CSM;
+  }
+
   /// Look up the corresponding special member in the given class.
   Sema::SpecialMemberOverloadResult *lookupIn(CXXRecordDecl *Class,
                                               unsigned Quals, bool IsMutable) {
@@ -5566,13 +5693,13 @@ bool SpecialMemberDeletionInfo::shouldDeleteForSubobjectCall(
     if (Field) {
       S.Diag(Field->getLocation(),
              diag::note_deleted_special_member_class_subobject)
-        << CSM << MD->getParent() << /*IsField*/true
+        << getEffectiveCSM() << MD->getParent() << /*IsField*/true
         << Field << DiagKind << IsDtorCallInCtor;
     } else {
       CXXBaseSpecifier *Base = Subobj.get<CXXBaseSpecifier*>();
       S.Diag(Base->getLocStart(),
              diag::note_deleted_special_member_class_subobject)
-        << CSM << MD->getParent() << /*IsField*/false
+        << getEffectiveCSM() << MD->getParent() << /*IsField*/false
         << Base->getType() << DiagKind << IsDtorCallInCtor;
     }
 
@@ -5631,7 +5758,29 @@ bool SpecialMemberDeletionInfo::shouldDeleteForBase(CXXBaseSpecifier *Base) {
   CXXRecordDecl *BaseClass = Base->getType()->getAsCXXRecordDecl();
   // If program is correct, BaseClass cannot be null, but if it is, the error
   // must be reported elsewhere.
-  return BaseClass && shouldDeleteForClassSubobject(BaseClass, Base, 0);
+  if (!BaseClass)
+    return false;
+  // If we have an inheriting constructor, check whether we're calling an
+  // inherited constructor instead of a default constructor.
+  if (ICI) {
+    assert(CSM == Sema::CXXDefaultConstructor);
+    auto *BaseCtor =
+        ICI->findConstructorForBase(BaseClass, cast<CXXConstructorDecl>(MD)
+                                                   ->getInheritedConstructor()
+                                                   .getConstructor())
+            .first;
+    if (BaseCtor) {
+      if (BaseCtor->isDeleted() && Diagnose) {
+        S.Diag(Base->getLocStart(),
+               diag::note_deleted_special_member_class_subobject)
+          << getEffectiveCSM() << MD->getParent() << /*IsField*/false
+          << Base->getType() << /*Deleted*/1 << /*IsDtorCallInCtor*/false;
+        S.NoteDeletedFunction(BaseCtor);
+      }
+      return BaseCtor->isDeleted();
+    }
+  }
+  return shouldDeleteForClassSubobject(BaseClass, Base, 0);
 }
 
 /// Check whether we should delete a special member function due to the class
@@ -5646,7 +5795,7 @@ bool SpecialMemberDeletionInfo::shouldDeleteForField(FieldDecl *FD) {
     if (FieldType->isReferenceType() && !FD->hasInClassInitializer()) {
       if (Diagnose)
         S.Diag(FD->getLocation(), diag::note_deleted_default_ctor_uninit_field)
-          << MD->getParent() << FD << FieldType << /*Reference*/0;
+          << !!ICI << MD->getParent() << FD << FieldType << /*Reference*/0;
       return true;
     }
     // C++11 [class.ctor]p5: any non-variant non-static data member of
@@ -5658,7 +5807,7 @@ bool SpecialMemberDeletionInfo::shouldDeleteForField(FieldDecl *FD) {
         (!FieldRecord || !FieldRecord->hasUserProvidedDefaultConstructor())) {
       if (Diagnose)
         S.Diag(FD->getLocation(), diag::note_deleted_default_ctor_uninit_field)
-          << MD->getParent() << FD << FD->getType() << /*Const*/1;
+          << !!ICI << MD->getParent() << FD << FD->getType() << /*Const*/1;
       return true;
     }
 
@@ -5717,7 +5866,7 @@ bool SpecialMemberDeletionInfo::shouldDeleteForField(FieldDecl *FD) {
         if (Diagnose)
           S.Diag(FieldRecord->getLocation(),
                  diag::note_deleted_default_ctor_all_const)
-            << MD->getParent() << /*anonymous union*/1;
+            << !!ICI << MD->getParent() << /*anonymous union*/1;
         return true;
       }
 
@@ -5745,7 +5894,7 @@ bool SpecialMemberDeletionInfo::shouldDeleteForAllConstMembers() {
     if (Diagnose)
       S.Diag(MD->getParent()->getLocation(),
              diag::note_deleted_default_ctor_all_const)
-        << MD->getParent() << /*not anonymous union*/0;
+        << !!ICI << MD->getParent() << /*not anonymous union*/0;
     return true;
   }
   return false;
@@ -5755,6 +5904,7 @@ bool SpecialMemberDeletionInfo::shouldDeleteForAllConstMembers() {
 /// deleted, as specified in C++11 [class.ctor]p5, C++11 [class.copy]p11,
 /// C++11 [class.copy]p23, and C++11 [class.dtor]p5.
 bool Sema::ShouldDeleteSpecialMember(CXXMethodDecl *MD, CXXSpecialMember CSM,
+                                     InheritedConstructorInfo *ICI,
                                      bool Diagnose) {
   if (MD->isInvalidDecl())
     return false;
@@ -5844,7 +5994,7 @@ bool Sema::ShouldDeleteSpecialMember(CXXMethodDecl *MD, CXXSpecialMember CSM,
     }
   }
 
-  SpecialMemberDeletionInfo SMI(*this, MD, CSM, Diagnose);
+  SpecialMemberDeletionInfo SMI(*this, MD, CSM, ICI, Diagnose);
 
   for (auto &BI : RD->bases())
     if (!BI.isVirtual() &&
@@ -6452,27 +6602,33 @@ void Sema::ActOnFinishCXXMemberSpecification(Scope* S, SourceLocation RLoc,
 /// [special]p1).  This routine can only be executed just before the
 /// definition of the class is complete.
 void Sema::AddImplicitlyDeclaredMembersToClass(CXXRecordDecl *ClassDecl) {
-  if (!ClassDecl->hasUserDeclaredConstructor())
+  if (ClassDecl->needsImplicitDefaultConstructor()) {
     ++ASTContext::NumImplicitDefaultConstructors;
 
-  if (!ClassDecl->hasUserDeclaredCopyConstructor()) {
+    if (ClassDecl->hasInheritedConstructor())
+      DeclareImplicitDefaultConstructor(ClassDecl);
+  }
+
+  if (ClassDecl->needsImplicitCopyConstructor()) {
     ++ASTContext::NumImplicitCopyConstructors;
 
     // If the properties or semantics of the copy constructor couldn't be
     // determined while the class was being declared, force a declaration
     // of it now.
-    if (ClassDecl->needsOverloadResolutionForCopyConstructor())
+    if (ClassDecl->needsOverloadResolutionForCopyConstructor() ||
+        ClassDecl->hasInheritedConstructor())
       DeclareImplicitCopyConstructor(ClassDecl);
   }
 
   if (getLangOpts().CPlusPlus11 && ClassDecl->needsImplicitMoveConstructor()) {
     ++ASTContext::NumImplicitMoveConstructors;
 
-    if (ClassDecl->needsOverloadResolutionForMoveConstructor())
+    if (ClassDecl->needsOverloadResolutionForMoveConstructor() ||
+        ClassDecl->hasInheritedConstructor())
       DeclareImplicitMoveConstructor(ClassDecl);
   }
 
-  if (!ClassDecl->hasUserDeclaredCopyAssignment()) {
+  if (ClassDecl->needsImplicitCopyAssignment()) {
     ++ASTContext::NumImplicitCopyAssignmentOperators;
 
     // If we have a dynamic class, then the copy assignment operator may be
@@ -6480,7 +6636,8 @@ void Sema::AddImplicitlyDeclaredMembersToClass(CXXRecordDecl *ClassDecl) {
     // it shows up in the right place in the vtable and that we diagnose
     // problems with the implicit exception specification.
     if (ClassDecl->isDynamicClass() ||
-        ClassDecl->needsOverloadResolutionForCopyAssignment())
+        ClassDecl->needsOverloadResolutionForCopyAssignment() ||
+        ClassDecl->hasInheritedAssignment())
       DeclareImplicitCopyAssignment(ClassDecl);
   }
 
@@ -6489,11 +6646,12 @@ void Sema::AddImplicitlyDeclaredMembersToClass(CXXRecordDecl *ClassDecl) {
 
     // Likewise for the move assignment operator.
     if (ClassDecl->isDynamicClass() ||
-        ClassDecl->needsOverloadResolutionForMoveAssignment())
+        ClassDecl->needsOverloadResolutionForMoveAssignment() ||
+        ClassDecl->hasInheritedAssignment())
       DeclareImplicitMoveAssignment(ClassDecl);
   }
 
-  if (!ClassDecl->hasUserDeclaredDestructor()) {
+  if (ClassDecl->needsImplicitDestructor()) {
     ++ASTContext::NumImplicitDestructors;
 
     // If we have a dynamic class, then the destructor may be virtual, so we
@@ -7738,7 +7896,7 @@ bool Sema::CheckUsingShadowDecl(UsingDecl *Using, NamedDecl *Orig,
   // function will silently decide not to build a shadow decl, which
   // will pre-empt further diagnostics.
   //
-  // We don't need to do this in C++0x because we do the check once on
+  // We don't need to do this in C++11 because we do the check once on
   // the qualifier.
   //
   // FIXME: diagnose the following if we care enough:
@@ -7796,6 +7954,12 @@ bool Sema::CheckUsingShadowDecl(UsingDecl *Using, NamedDecl *Orig,
   for (LookupResult::iterator I = Previous.begin(), E = Previous.end();
          I != E; ++I) {
     NamedDecl *D = (*I)->getUnderlyingDecl();
+    // We can have UsingDecls in our Previous results because we use the same
+    // LookupResult for checking whether the UsingDecl itself is a valid
+    // redeclaration.
+    if (isa<UsingDecl>(D))
+      continue;
+
     if (IsEquivalentForUsingDecl(Context, D, Target)) {
       if (UsingShadowDecl *Shadow = dyn_cast<UsingShadowDecl>(*I))
         PrevShadow = Shadow;
@@ -7863,12 +8027,21 @@ bool Sema::CheckUsingShadowDecl(UsingDecl *Using, NamedDecl *Orig,
   return true;
 }
 
+/// Determine whether a direct base class is a virtual base class.
+static bool isVirtualDirectBase(CXXRecordDecl *Derived, CXXRecordDecl *Base) {
+  if (!Derived->getNumVBases())
+    return false;
+  for (auto &B : Derived->bases())
+    if (B.getType()->getAsCXXRecordDecl() == Base)
+      return B.isVirtual();
+  llvm_unreachable("not a direct base class");
+}
+
 /// Builds a shadow declaration corresponding to a 'using' declaration.
 UsingShadowDecl *Sema::BuildUsingShadowDecl(Scope *S,
                                             UsingDecl *UD,
                                             NamedDecl *Orig,
                                             UsingShadowDecl *PrevDecl) {
-
   // If we resolved to another shadow declaration, just coalesce them.
   NamedDecl *Target = Orig;
   if (isa<UsingShadowDecl>(Target)) {
@@ -7876,9 +8049,21 @@ UsingShadowDecl *Sema::BuildUsingShadowDecl(Scope *S,
     assert(!isa<UsingShadowDecl>(Target) && "nested shadow declaration");
   }
 
-  UsingShadowDecl *Shadow
-    = UsingShadowDecl::Create(Context, CurContext,
-                              UD->getLocation(), UD, Target);
+  NamedDecl *NonTemplateTarget = Target;
+  if (auto *TargetTD = dyn_cast<TemplateDecl>(Target))
+    NonTemplateTarget = TargetTD->getTemplatedDecl();
+
+  UsingShadowDecl *Shadow;
+  if (isa<CXXConstructorDecl>(NonTemplateTarget)) {
+    bool IsVirtualBase =
+        isVirtualDirectBase(cast<CXXRecordDecl>(CurContext),
+                            UD->getQualifier()->getAsRecordDecl());
+    Shadow = ConstructorUsingShadowDecl::Create(
+        Context, CurContext, UD->getLocation(), UD, Orig, IsVirtualBase);
+  } else {
+    Shadow = UsingShadowDecl::Create(Context, CurContext, UD->getLocation(), UD,
+                                     Target);
+  }
   UD->addShadowDecl(Shadow);
 
   Shadow->setAccess(UD->getAccess());
@@ -7980,6 +8165,9 @@ public:
     if (Candidate.WillReplaceSpecifier() && !Candidate.getCorrectionSpecifier())
       return false;
 
+    // FIXME: Don't correct to a name that CheckUsingDeclRedeclaration would
+    // reject.
+
     if (RequireMemberOf) {
       auto *FoundRecord = dyn_cast<CXXRecordDecl>(ND);
       if (FoundRecord && FoundRecord->isInjectedClassName()) {
@@ -8060,8 +8248,17 @@ NamedDecl *Sema::BuildUsingDeclaration(Scope *S, AccessSpecifier AS,
     return nullptr;
   }
 
+  // For an inheriting constructor declaration, the name of the using
+  // declaration is the name of a constructor in this class, not in the
+  // base class.
+  DeclarationNameInfo UsingName = NameInfo;
+  if (UsingName.getName().getNameKind() == DeclarationName::CXXConstructorName)
+    if (auto *RD = dyn_cast<CXXRecordDecl>(CurContext))
+      UsingName.setName(Context.DeclarationNames.getCXXConstructorName(
+          Context.getCanonicalType(Context.getRecordType(RD))));
+
   // Do the redeclaration lookup in the current scope.
-  LookupResult Previous(*this, NameInfo, LookupUsingDeclName,
+  LookupResult Previous(*this, UsingName, LookupUsingDeclName,
                         ForRedeclaration);
   Previous.setHideTags(false);
   if (S) {
@@ -8118,8 +8315,8 @@ NamedDecl *Sema::BuildUsingDeclaration(Scope *S, AccessSpecifier AS,
 
   auto Build = [&](bool Invalid) {
     UsingDecl *UD =
-        UsingDecl::Create(Context, CurContext, UsingLoc, QualifierLoc, NameInfo,
-                          HasTypenameKeyword);
+        UsingDecl::Create(Context, CurContext, UsingLoc, QualifierLoc,
+                          UsingName, HasTypenameKeyword);
     UD->setAccess(AS);
     CurContext->addDecl(UD);
     UD->setInvalidDecl(Invalid);
@@ -8174,6 +8371,9 @@ NamedDecl *Sema::BuildUsingDeclaration(Scope *S, AccessSpecifier AS,
       // If we corrected to an inheriting constructor, handle it as one.
       auto *RD = dyn_cast<CXXRecordDecl>(ND);
       if (RD && RD->isInjectedClassName()) {
+        // The parent of the injected class name is the class itself.
+        RD = cast<CXXRecordDecl>(RD->getParent());
+
         // Fix up the information we'll use to build the using declaration.
         if (Corrected.WillReplaceSpecifier()) {
           NestedNameSpecifierLocBuilder Builder;
@@ -8182,13 +8382,19 @@ NamedDecl *Sema::BuildUsingDeclaration(Scope *S, AccessSpecifier AS,
           QualifierLoc = Builder.getWithLocInContext(Context);
         }
 
-        NameInfo.setName(Context.DeclarationNames.getCXXConstructorName(
-            Context.getCanonicalType(Context.getRecordType(RD))));
-        NameInfo.setNamedTypeInfo(nullptr);
+        // In this case, the name we introduce is the name of a derived class
+        // constructor.
+        auto *CurClass = cast<CXXRecordDecl>(CurContext);
+        UsingName.setName(Context.DeclarationNames.getCXXConstructorName(
+            Context.getCanonicalType(Context.getRecordType(CurClass))));
+        UsingName.setNamedTypeInfo(nullptr);
         for (auto *Ctor : LookupConstructors(RD))
           R.addDecl(Ctor);
+        R.resolveKind();
       } else {
-        // FIXME: Pick up all the declarations if we found an overloaded function.
+        // FIXME: Pick up all the declarations if we found an overloaded
+        // function.
+        UsingName.setName(ND->getDeclName());
         R.addDecl(ND);
       }
     } else {
@@ -8221,7 +8427,7 @@ NamedDecl *Sema::BuildUsingDeclaration(Scope *S, AccessSpecifier AS,
     }
   }
 
-  // C++0x N2914 [namespace.udecl]p6:
+  // C++14 [namespace.udecl]p6:
   // A using-declaration shall not name a namespace.
   if (R.getAsSingle<NamespaceDecl>()) {
     Diag(IdentLoc, diag::err_using_decl_can_not_refer_to_namespace)
@@ -8229,19 +8435,28 @@ NamedDecl *Sema::BuildUsingDeclaration(Scope *S, AccessSpecifier AS,
     return BuildInvalid();
   }
 
+  // C++14 [namespace.udecl]p7:
+  // A using-declaration shall not name a scoped enumerator.
+  if (auto *ED = R.getAsSingle<EnumConstantDecl>()) {
+    if (cast<EnumDecl>(ED->getDeclContext())->isScoped()) {
+      Diag(IdentLoc, diag::err_using_decl_can_not_refer_to_scoped_enum)
+        << SS.getRange();
+      return BuildInvalid();
+    }
+  }
+
   UsingDecl *UD = BuildValid();
 
-  // The normal rules do not apply to inheriting constructor declarations.
-  if (NameInfo.getName().getNameKind() == DeclarationName::CXXConstructorName) {
+  // Some additional rules apply to inheriting constructors.
+  if (UsingName.getName().getNameKind() ==
+        DeclarationName::CXXConstructorName) {
     // Suppress access diagnostics; the access check is instead performed at the
     // point of use for an inheriting constructor.
     R.suppressDiagnostics();
-    CheckInheritingConstructorUsingDecl(UD);
-    return UD;
+    if (CheckInheritingConstructorUsingDecl(UD))
+      return UD;
   }
 
-  // Otherwise, look up the target name.
-
   for (LookupResult::iterator I = R.begin(), E = R.end(); I != E; ++I) {
     UsingShadowDecl *PrevDecl = nullptr;
     if (!CheckUsingShadowDecl(UD, *I, Previous, PrevDecl))
@@ -8353,8 +8568,10 @@ bool Sema::CheckUsingDeclQualifier(SourceLocation UsingLoc,
 
     // If we weren't able to compute a valid scope, it must be a
     // dependent class scope.
-    if (!NamedContext || NamedContext->isRecord()) {
-      auto *RD = dyn_cast_or_null<CXXRecordDecl>(NamedContext);
+    if (!NamedContext || NamedContext->getRedeclContext()->isRecord()) {
+      auto *RD = NamedContext
+                     ? cast<CXXRecordDecl>(NamedContext->getRedeclContext())
+                     : nullptr;
       if (RD && RequireCompleteDeclContext(const_cast<CXXScopeSpec&>(SS), RD))
         RD = nullptr;
 
@@ -8403,6 +8620,20 @@ bool Sema::CheckUsingDeclQualifier(SourceLocation UsingLoc,
         Diag(UsingLoc, diag::note_using_decl_class_member_workaround)
           << 2 // reference declaration
           << FixIt;
+      } else if (R.getAsSingle<EnumConstantDecl>()) {
+        // Don't provide a fixit outside C++11 mode; we don't want to suggest
+        // repeating the type of the enumeration here, and we can't do so if
+        // the type is anonymous.
+        FixItHint FixIt;
+        if (getLangOpts().CPlusPlus11) {
+          // Convert 'using X::Y;' to 'auto &Y = X::Y;'.
+          FixIt = FixItHint::CreateReplacement(
+              UsingLoc, "constexpr auto " + NameInfo.getName().getAsString() + " = ");
+        }
+
+        Diag(UsingLoc, diag::note_using_decl_class_member_workaround)
+          << (getLangOpts().CPlusPlus11 ? 4 : 3) // const[expr] variable
+          << FixIt;
       }
       return true;
     }
@@ -8438,7 +8669,7 @@ bool Sema::CheckUsingDeclQualifier(SourceLocation UsingLoc,
     return true;
 
   if (getLangOpts().CPlusPlus11) {
-    // C++0x [namespace.udecl]p3:
+    // C++11 [namespace.udecl]p3:
     //   In a using-declaration used as a member-declaration, the
     //   nested-name-specifier shall name a base class of the class
     //   being defined.
@@ -8579,6 +8810,10 @@ Decl *Sema::ActOnAliasDeclaration(Scope *S,
     }
     TemplateParameterList *TemplateParams = TemplateParamLists[0];
 
+    // Check that we can declare a template here.
+    if (CheckTemplateDeclScope(S, TemplateParams))
+      return nullptr;
+
     // Only consider previous declarations in the same scope.
     FilterLookupForScope(Previous, CurContext, S, /*ConsiderLinkage*/false,
                          /*ExplicitInstantiationOrSpecialization*/false);
@@ -8650,9 +8885,7 @@ Decl *Sema::ActOnAliasDeclaration(Scope *S,
     NewND = NewTD;
   }
 
-  if (!Redeclaration)
-    PushOnScopeChains(NewND, S);
-
+  PushOnScopeChains(NewND, S);
   ActOnDocumentableDecl(NewND);
   return NewND;
 }
@@ -8796,7 +9029,8 @@ Sema::ComputeDefaultedDefaultCtorExceptionSpec(SourceLocation Loc,
 }
 
 Sema::ImplicitExceptionSpecification
-Sema::ComputeInheritingCtorExceptionSpec(CXXConstructorDecl *CD) {
+Sema::ComputeInheritingCtorExceptionSpec(SourceLocation Loc,
+                                         CXXConstructorDecl *CD) {
   CXXRecordDecl *ClassDecl = CD->getParent();
 
   // C++ [except.spec]p14:
@@ -8805,36 +9039,26 @@ Sema::ComputeInheritingCtorExceptionSpec(CXXConstructorDecl *CD) {
   if (ClassDecl->isInvalidDecl())
     return ExceptSpec;
 
-  // Inherited constructor.
-  const CXXConstructorDecl *InheritedCD = CD->getInheritedConstructor();
-  const CXXRecordDecl *InheritedDecl = InheritedCD->getParent();
-  // FIXME: Copying or moving the parameters could add extra exceptions to the
-  // set, as could the default arguments for the inherited constructor. This
-  // will be addressed when we implement the resolution of core issue 1351.
-  ExceptSpec.CalledDecl(CD->getLocStart(), InheritedCD);
+  auto Inherited = CD->getInheritedConstructor();
+  InheritedConstructorInfo ICI(*this, Loc, Inherited.getShadowDecl());
 
-  // Direct base-class constructors.
-  for (const auto &B : ClassDecl->bases()) {
-    if (B.isVirtual()) // Handled below.
-      continue;
-
-    if (const RecordType *BaseType = B.getType()->getAs<RecordType>()) {
-      CXXRecordDecl *BaseClassDecl = cast<CXXRecordDecl>(BaseType->getDecl());
-      if (BaseClassDecl == InheritedDecl)
+  // Direct and virtual base-class constructors.
+  for (bool VBase : {false, true}) {
+    for (CXXBaseSpecifier &B :
+         VBase ? ClassDecl->vbases() : ClassDecl->bases()) {
+      // Don't visit direct vbases twice.
+      if (B.isVirtual() != VBase)
         continue;
-      CXXConstructorDecl *Constructor = LookupDefaultConstructor(BaseClassDecl);
-      if (Constructor)
-        ExceptSpec.CalledDecl(B.getLocStart(), Constructor);
-    }
-  }
 
-  // Virtual base-class constructors.
-  for (const auto &B : ClassDecl->vbases()) {
-    if (const RecordType *BaseType = B.getType()->getAs<RecordType>()) {
-      CXXRecordDecl *BaseClassDecl = cast<CXXRecordDecl>(BaseType->getDecl());
-      if (BaseClassDecl == InheritedDecl)
+      CXXRecordDecl *BaseClass = B.getType()->getAsCXXRecordDecl();
+      if (!BaseClass)
         continue;
-      CXXConstructorDecl *Constructor = LookupDefaultConstructor(BaseClassDecl);
+
+      CXXConstructorDecl *Constructor =
+          ICI.findConstructorForBase(BaseClass, Inherited.getConstructor())
+              .first;
+      if (!Constructor)
+        Constructor = LookupDefaultConstructor(BaseClass);
       if (Constructor)
         ExceptSpec.CalledDecl(B.getLocStart(), Constructor);
     }
@@ -8862,10 +9086,11 @@ namespace {
 struct DeclaringSpecialMember {
   Sema &S;
   Sema::SpecialMemberDecl D;
+  Sema::ContextRAII SavedContext;
   bool WasAlreadyBeingDeclared;
 
   DeclaringSpecialMember(Sema &S, CXXRecordDecl *RD, Sema::CXXSpecialMember CSM)
-    : S(S), D(RD, CSM) {
+    : S(S), D(RD, CSM), SavedContext(S, RD) {
     WasAlreadyBeingDeclared = !S.SpecialMembersBeingDeclared.insert(D).second;
     if (WasAlreadyBeingDeclared)
       // This almost never happens, but if it does, ensure that our cache
@@ -8887,6 +9112,21 @@ struct DeclaringSpecialMember {
 };
 }
 
+void Sema::CheckImplicitSpecialMemberDeclaration(Scope *S, FunctionDecl *FD) {
+  // Look up any existing declarations, but don't trigger declaration of all
+  // implicit special members with this name.
+  DeclarationName Name = FD->getDeclName();
+  LookupResult R(*this, Name, SourceLocation(), LookupOrdinaryName,
+                 ForRedeclaration);
+  for (auto *D : FD->getParent()->lookup(Name))
+    if (auto *Acceptable = R.getAcceptableDecl(D))
+      R.addDecl(Acceptable);
+  R.resolveKind();
+  R.suppressDiagnostics();
+
+  CheckFunctionDeclaration(S, FD, R, /*IsExplicitSpecialization*/false);
+}
+
 CXXConstructorDecl *Sema::DeclareImplicitDefaultConstructor(
                                                      CXXRecordDecl *ClassDecl) {
   // C++ [class.ctor]p5:
@@ -8935,13 +9175,16 @@ CXXConstructorDecl *Sema::DeclareImplicitDefaultConstructor(
   // constructors is easy to compute.
   DefaultCon->setTrivial(ClassDecl->hasTrivialDefaultConstructor());
 
-  if (ShouldDeleteSpecialMember(DefaultCon, CXXDefaultConstructor))
-    SetDeclDeleted(DefaultCon, ClassLoc);
-
   // Note that we have declared this constructor.
   ++ASTContext::NumImplicitDefaultConstructorsDeclared;
 
-  if (Scope *S = getScopeForContext(ClassDecl))
+  Scope *S = getScopeForContext(ClassDecl);
+  CheckImplicitSpecialMemberDeclaration(S, DefaultCon);
+
+  if (ShouldDeleteSpecialMember(DefaultCon, CXXDefaultConstructor))
+    SetDeclDeleted(DefaultCon, ClassLoc);
+
+  if (S)
     PushOnScopeChains(DefaultCon, S, false);
   ClassDecl->addDecl(DefaultCon);
 
@@ -8993,325 +9236,167 @@ void Sema::ActOnFinishDelayedMemberInitializers(Decl *D) {
   CheckDelayedMemberExceptionSpecs();
 }
 
-namespace {
-/// Information on inheriting constructors to declare.
-class InheritingConstructorInfo {
-public:
-  InheritingConstructorInfo(Sema &SemaRef, CXXRecordDecl *Derived)
-      : SemaRef(SemaRef), Derived(Derived) {
-    // Mark the constructors that we already have in the derived class.
-    //
-    // C++11 [class.inhctor]p3: [...] a constructor is implicitly declared [...]
-    //   unless there is a user-declared constructor with the same signature in
-    //   the class where the using-declaration appears.
-    visitAll(Derived, &InheritingConstructorInfo::noteDeclaredInDerived);
-  }
+/// Find or create the fake constructor we synthesize to model constructing an
+/// object of a derived class via a constructor of a base class.
+CXXConstructorDecl *
+Sema::findInheritingConstructor(SourceLocation Loc,
+                                CXXConstructorDecl *BaseCtor,
+                                ConstructorUsingShadowDecl *Shadow) {
+  CXXRecordDecl *Derived = Shadow->getParent();
+  SourceLocation UsingLoc = Shadow->getLocation();
+
+  // FIXME: Add a new kind of DeclarationName for an inherited constructor.
+  // For now we use the name of the base class constructor as a member of the
+  // derived class to indicate a (fake) inherited constructor name.
+  DeclarationName Name = BaseCtor->getDeclName();
+
+  // Check to see if we already have a fake constructor for this inherited
+  // constructor call.
+  for (NamedDecl *Ctor : Derived->lookup(Name))
+    if (declaresSameEntity(cast<CXXConstructorDecl>(Ctor)
+                               ->getInheritedConstructor()
+                               .getConstructor(),
+                           BaseCtor))
+      return cast<CXXConstructorDecl>(Ctor);
+
+  DeclarationNameInfo NameInfo(Name, UsingLoc);
+  TypeSourceInfo *TInfo =
+      Context.getTrivialTypeSourceInfo(BaseCtor->getType(), UsingLoc);
+  FunctionProtoTypeLoc ProtoLoc =
+      TInfo->getTypeLoc().IgnoreParens().castAs<FunctionProtoTypeLoc>();
+
+  // Check the inherited constructor is valid and find the list of base classes
+  // from which it was inherited.
+  InheritedConstructorInfo ICI(*this, Loc, Shadow);
+
+  bool Constexpr =
+      BaseCtor->isConstexpr() &&
+      defaultedSpecialMemberIsConstexpr(*this, Derived, CXXDefaultConstructor,
+                                        false, BaseCtor, &ICI);
+
+  CXXConstructorDecl *DerivedCtor = CXXConstructorDecl::Create(
+      Context, Derived, UsingLoc, NameInfo, TInfo->getType(), TInfo,
+      BaseCtor->isExplicit(), /*Inline=*/true,
+      /*ImplicitlyDeclared=*/true, Constexpr,
+      InheritedConstructor(Shadow, BaseCtor));
+  if (Shadow->isInvalidDecl())
+    DerivedCtor->setInvalidDecl();
+
+  // Build an unevaluated exception specification for this fake constructor.
+  const FunctionProtoType *FPT = TInfo->getType()->castAs<FunctionProtoType>();
+  FunctionProtoType::ExtProtoInfo EPI = FPT->getExtProtoInfo();
+  EPI.ExceptionSpec.Type = EST_Unevaluated;
+  EPI.ExceptionSpec.SourceDecl = DerivedCtor;
+  DerivedCtor->setType(Context.getFunctionType(FPT->getReturnType(),
+                                               FPT->getParamTypes(), EPI));
 
-  void inheritAll(CXXRecordDecl *RD) {
-    visitAll(RD, &InheritingConstructorInfo::inherit);
+  // Build the parameter declarations.
+  SmallVector<ParmVarDecl *, 16> ParamDecls;
+  for (unsigned I = 0, N = FPT->getNumParams(); I != N; ++I) {
+    TypeSourceInfo *TInfo =
+        Context.getTrivialTypeSourceInfo(FPT->getParamType(I), UsingLoc);
+    ParmVarDecl *PD = ParmVarDecl::Create(
+        Context, DerivedCtor, UsingLoc, UsingLoc, /*IdentifierInfo=*/nullptr,
+        FPT->getParamType(I), TInfo, SC_None, /*DefaultArg=*/nullptr);
+    PD->setScopeInfo(0, I);
+    PD->setImplicit();
+    // Ensure attributes are propagated onto parameters (this matters for
+    // format, pass_object_size, ...).
+    mergeDeclAttributes(PD, BaseCtor->getParamDecl(I));
+    ParamDecls.push_back(PD);
+    ProtoLoc.setParam(I, PD);
   }
 
-private:
-  /// Information about an inheriting constructor.
-  struct InheritingConstructor {
-    InheritingConstructor()
-      : DeclaredInDerived(false), BaseCtor(nullptr), DerivedCtor(nullptr) {}
-
-    /// If \c true, a constructor with this signature is already declared
-    /// in the derived class.
-    bool DeclaredInDerived;
-
-    /// The constructor which is inherited.
-    const CXXConstructorDecl *BaseCtor;
-
-    /// The derived constructor we declared.
-    CXXConstructorDecl *DerivedCtor;
-  };
-
-  /// Inheriting constructors with a given canonical type. There can be at
-  /// most one such non-template constructor, and any number of templated
-  /// constructors.
-  struct InheritingConstructorsForType {
-    InheritingConstructor NonTemplate;
-    SmallVector<std::pair<TemplateParameterList *, InheritingConstructor>, 4>
-        Templates;
-
-    InheritingConstructor &getEntry(Sema &S, const CXXConstructorDecl *Ctor) {
-      if (FunctionTemplateDecl *FTD = Ctor->getDescribedFunctionTemplate()) {
-        TemplateParameterList *ParamList = FTD->getTemplateParameters();
-        for (unsigned I = 0, N = Templates.size(); I != N; ++I)
-          if (S.TemplateParameterListsAreEqual(ParamList, Templates[I].first,
-                                               false, S.TPL_TemplateMatch))
-            return Templates[I].second;
-        Templates.push_back(std::make_pair(ParamList, InheritingConstructor()));
-        return Templates.back().second;
-      }
+  // Set up the new constructor.
+  assert(!BaseCtor->isDeleted() && "should not use deleted constructor");
+  DerivedCtor->setAccess(BaseCtor->getAccess());
+  DerivedCtor->setParams(ParamDecls);
+  Derived->addDecl(DerivedCtor);
 
-      return NonTemplate;
-    }
-  };
+  if (ShouldDeleteSpecialMember(DerivedCtor, CXXDefaultConstructor, &ICI))
+    SetDeclDeleted(DerivedCtor, UsingLoc);
 
-  /// Get or create the inheriting constructor record for a constructor.
-  InheritingConstructor &getEntry(const CXXConstructorDecl *Ctor,
-                                  QualType CtorType) {
-    return Map[CtorType.getCanonicalType()->castAs<FunctionProtoType>()]
-        .getEntry(SemaRef, Ctor);
-  }
+  return DerivedCtor;
+}
 
-  typedef void (InheritingConstructorInfo::*VisitFn)(const CXXConstructorDecl*);
+void Sema::NoteDeletedInheritingConstructor(CXXConstructorDecl *Ctor) {
+  InheritedConstructorInfo ICI(*this, Ctor->getLocation(),
+                               Ctor->getInheritedConstructor().getShadowDecl());
+  ShouldDeleteSpecialMember(Ctor, CXXDefaultConstructor, &ICI,
+                            /*Diagnose*/true);
+}
 
-  /// Process all constructors for a class.
-  void visitAll(const CXXRecordDecl *RD, VisitFn Callback) {
-    for (const auto *Ctor : RD->ctors())
-      (this->*Callback)(Ctor);
-    for (CXXRecordDecl::specific_decl_iterator<FunctionTemplateDecl>
-             I(RD->decls_begin()), E(RD->decls_end());
-         I != E; ++I) {
-      const FunctionDecl *FD = (*I)->getTemplatedDecl();
-      if (const CXXConstructorDecl *CD = dyn_cast<CXXConstructorDecl>(FD))
-        (this->*Callback)(CD);
-    }
-  }
+void Sema::DefineInheritingConstructor(SourceLocation CurrentLocation,
+                                       CXXConstructorDecl *Constructor) {
+  CXXRecordDecl *ClassDecl = Constructor->getParent();
+  assert(Constructor->getInheritedConstructor() &&
+         !Constructor->doesThisDeclarationHaveABody() &&
+         !Constructor->isDeleted());
+  if (Constructor->isInvalidDecl())
+    return;
 
-  /// Note that a constructor (or constructor template) was declared in Derived.
-  void noteDeclaredInDerived(const CXXConstructorDecl *Ctor) {
-    getEntry(Ctor, Ctor->getType()).DeclaredInDerived = true;
-  }
+  ConstructorUsingShadowDecl *Shadow =
+      Constructor->getInheritedConstructor().getShadowDecl();
+  CXXConstructorDecl *InheritedCtor =
+      Constructor->getInheritedConstructor().getConstructor();
 
-  /// Inherit a single constructor.
-  void inherit(const CXXConstructorDecl *Ctor) {
-    const FunctionProtoType *CtorType =
-        Ctor->getType()->castAs<FunctionProtoType>();
-    ArrayRef<QualType> ArgTypes = CtorType->getParamTypes();
-    FunctionProtoType::ExtProtoInfo EPI = CtorType->getExtProtoInfo();
+  // [class.inhctor.init]p1:
+  //   initialization proceeds as if a defaulted default constructor is used to
+  //   initialize the D object and each base class subobject from which the
+  //   constructor was inherited
 
-    SourceLocation UsingLoc = getUsingLoc(Ctor->getParent());
+  InheritedConstructorInfo ICI(*this, CurrentLocation, Shadow);
+  CXXRecordDecl *RD = Shadow->getParent();
+  SourceLocation InitLoc = Shadow->getLocation();
 
-    // Core issue (no number yet): the ellipsis is always discarded.
-    if (EPI.Variadic) {
-      SemaRef.Diag(UsingLoc, diag::warn_using_decl_constructor_ellipsis);
-      SemaRef.Diag(Ctor->getLocation(),
-                   diag::note_using_decl_constructor_ellipsis);
-      EPI.Variadic = false;
-    }
+  // Initializations are performed "as if by a defaulted default constructor",
+  // so enter the appropriate scope.
+  SynthesizedFunctionScope Scope(*this, Constructor);
+  DiagnosticErrorTrap Trap(Diags);
 
-    // Declare a constructor for each number of parameters.
-    //
-    // C++11 [class.inhctor]p1:
-    //   The candidate set of inherited constructors from the class X named in
-    //   the using-declaration consists of [... modulo defects ...] for each
-    //   constructor or constructor template of X, the set of constructors or
-    //   constructor templates that results from omitting any ellipsis parameter
-    //   specification and successively omitting parameters with a default
-    //   argument from the end of the parameter-type-list
-    unsigned MinParams = minParamsToInherit(Ctor);
-    unsigned Params = Ctor->getNumParams();
-    if (Params >= MinParams) {
-      do
-        declareCtor(UsingLoc, Ctor,
-                    SemaRef.Context.getFunctionType(
-                        Ctor->getReturnType(), ArgTypes.slice(0, Params), EPI));
-      while (Params > MinParams &&
-             Ctor->getParamDecl(--Params)->hasDefaultArg());
-    }
-  }
-
-  /// Find the using-declaration which specified that we should inherit the
-  /// constructors of \p Base.
-  SourceLocation getUsingLoc(const CXXRecordDecl *Base) {
-    // No fancy lookup required; just look for the base constructor name
-    // directly within the derived class.
-    ASTContext &Context = SemaRef.Context;
-    DeclarationName Name = Context.DeclarationNames.getCXXConstructorName(
-        Context.getCanonicalType(Context.getRecordType(Base)));
-    DeclContext::lookup_result Decls = Derived->lookup(Name);
-    return Decls.empty() ? Derived->getLocation() : Decls[0]->getLocation();
-  }
-
-  unsigned minParamsToInherit(const CXXConstructorDecl *Ctor) {
-    // C++11 [class.inhctor]p3:
-    //   [F]or each constructor template in the candidate set of inherited
-    //   constructors, a constructor template is implicitly declared
-    if (Ctor->getDescribedFunctionTemplate())
-      return 0;
-
-    //   For each non-template constructor in the candidate set of inherited
-    //   constructors other than a constructor having no parameters or a
-    //   copy/move constructor having a single parameter, a constructor is
-    //   implicitly declared [...]
-    if (Ctor->getNumParams() == 0)
-      return 1;
-    if (Ctor->isCopyOrMoveConstructor())
-      return 2;
-
-    // Per discussion on core reflector, never inherit a constructor which
-    // would become a default, copy, or move constructor of Derived either.
-    const ParmVarDecl *PD = Ctor->getParamDecl(0);
-    const ReferenceType *RT = PD->getType()->getAs<ReferenceType>();
-    return (RT && RT->getPointeeCXXRecordDecl() == Derived) ? 2 : 1;
-  }
-
-  /// Declare a single inheriting constructor, inheriting the specified
-  /// constructor, with the given type.
-  void declareCtor(SourceLocation UsingLoc, const CXXConstructorDecl *BaseCtor,
-                   QualType DerivedType) {
-    InheritingConstructor &Entry = getEntry(BaseCtor, DerivedType);
-
-    // C++11 [class.inhctor]p3:
-    //   ... a constructor is implicitly declared with the same constructor
-    //   characteristics unless there is a user-declared constructor with
-    //   the same signature in the class where the using-declaration appears
-    if (Entry.DeclaredInDerived)
-      return;
+  // Build explicit initializers for all base classes from which the
+  // constructor was inherited.
+  SmallVector<CXXCtorInitializer*, 8> Inits;
+  for (bool VBase : {false, true}) {
+    for (CXXBaseSpecifier &B : VBase ? RD->vbases() : RD->bases()) {
+      if (B.isVirtual() != VBase)
+        continue;
 
-    // C++11 [class.inhctor]p7:
-    //   If two using-declarations declare inheriting constructors with the
-    //   same signature, the program is ill-formed
-    if (Entry.DerivedCtor) {
-      if (BaseCtor->getParent() != Entry.BaseCtor->getParent()) {
-        // Only diagnose this once per constructor.
-        if (Entry.DerivedCtor->isInvalidDecl())
-          return;
-        Entry.DerivedCtor->setInvalidDecl();
-
-        SemaRef.Diag(UsingLoc, diag::err_using_decl_constructor_conflict);
-        SemaRef.Diag(BaseCtor->getLocation(),
-                     diag::note_using_decl_constructor_conflict_current_ctor);
-        SemaRef.Diag(Entry.BaseCtor->getLocation(),
-                     diag::note_using_decl_constructor_conflict_previous_ctor);
-        SemaRef.Diag(Entry.DerivedCtor->getLocation(),
-                     diag::note_using_decl_constructor_conflict_previous_using);
-      } else {
-        // Core issue (no number): if the same inheriting constructor is
-        // produced by multiple base class constructors from the same base
-        // class, the inheriting constructor is defined as deleted.
-        SemaRef.SetDeclDeleted(Entry.DerivedCtor, UsingLoc);
-      }
+      auto *BaseRD = B.getType()->getAsCXXRecordDecl();
+      if (!BaseRD)
+        continue;
 
-      return;
-    }
+      auto BaseCtor = ICI.findConstructorForBase(BaseRD, InheritedCtor);
+      if (!BaseCtor.first)
+        continue;
 
-    ASTContext &Context = SemaRef.Context;
-    DeclarationName Name = Context.DeclarationNames.getCXXConstructorName(
-        Context.getCanonicalType(Context.getRecordType(Derived)));
-    DeclarationNameInfo NameInfo(Name, UsingLoc);
+      MarkFunctionReferenced(CurrentLocation, BaseCtor.first);
+      ExprResult Init = new (Context) CXXInheritedCtorInitExpr(
+          InitLoc, B.getType(), BaseCtor.first, VBase, BaseCtor.second);
 
-    TemplateParameterList *TemplateParams = nullptr;
-    if (const FunctionTemplateDecl *FTD =
-            BaseCtor->getDescribedFunctionTemplate()) {
-      TemplateParams = FTD->getTemplateParameters();
-      // We're reusing template parameters from a different DeclContext. This
-      // is questionable at best, but works out because the template depth in
-      // both places is guaranteed to be 0.
-      // FIXME: Rebuild the template parameters in the new context, and
-      // transform the function type to refer to them.
+      auto *TInfo = Context.getTrivialTypeSourceInfo(B.getType(), InitLoc);
+      Inits.push_back(new (Context) CXXCtorInitializer(
+          Context, TInfo, VBase, InitLoc, Init.get(), InitLoc,
+          SourceLocation()));
     }
-
-    // Build type source info pointing at the using-declaration. This is
-    // required by template instantiation.
-    TypeSourceInfo *TInfo =
-        Context.getTrivialTypeSourceInfo(DerivedType, UsingLoc);
-    FunctionProtoTypeLoc ProtoLoc =
-        TInfo->getTypeLoc().IgnoreParens().castAs<FunctionProtoTypeLoc>();
-
-    CXXConstructorDecl *DerivedCtor = CXXConstructorDecl::Create(
-        Context, Derived, UsingLoc, NameInfo, DerivedType,
-        TInfo, BaseCtor->isExplicit(), /*Inline=*/true,
-        /*ImplicitlyDeclared=*/true, /*Constexpr=*/BaseCtor->isConstexpr());
-
-    // Build an unevaluated exception specification for this constructor.
-    const FunctionProtoType *FPT = DerivedType->castAs<FunctionProtoType>();
-    FunctionProtoType::ExtProtoInfo EPI = FPT->getExtProtoInfo();
-    EPI.ExceptionSpec.Type = EST_Unevaluated;
-    EPI.ExceptionSpec.SourceDecl = DerivedCtor;
-    DerivedCtor->setType(Context.getFunctionType(FPT->getReturnType(),
-                                                 FPT->getParamTypes(), EPI));
-
-    // Build the parameter declarations.
-    SmallVector<ParmVarDecl *, 16> ParamDecls;
-    for (unsigned I = 0, N = FPT->getNumParams(); I != N; ++I) {
-      TypeSourceInfo *TInfo =
-          Context.getTrivialTypeSourceInfo(FPT->getParamType(I), UsingLoc);
-      ParmVarDecl *PD = ParmVarDecl::Create(
-          Context, DerivedCtor, UsingLoc, UsingLoc, /*IdentifierInfo=*/nullptr,
-          FPT->getParamType(I), TInfo, SC_None, /*DefaultArg=*/nullptr);
-      PD->setScopeInfo(0, I);
-      PD->setImplicit();
-      ParamDecls.push_back(PD);
-      ProtoLoc.setParam(I, PD);
-    }
-
-    // Set up the new constructor.
-    DerivedCtor->setAccess(BaseCtor->getAccess());
-    DerivedCtor->setParams(ParamDecls);
-    DerivedCtor->setInheritedConstructor(BaseCtor);
-    if (BaseCtor->isDeleted())
-      SemaRef.SetDeclDeleted(DerivedCtor, UsingLoc);
-
-    // If this is a constructor template, build the template declaration.
-    if (TemplateParams) {
-      FunctionTemplateDecl *DerivedTemplate =
-          FunctionTemplateDecl::Create(SemaRef.Context, Derived, UsingLoc, Name,
-                                       TemplateParams, DerivedCtor);
-      DerivedTemplate->setAccess(BaseCtor->getAccess());
-      DerivedCtor->setDescribedFunctionTemplate(DerivedTemplate);
-      Derived->addDecl(DerivedTemplate);
-    } else {
-      Derived->addDecl(DerivedCtor);
-    }
-
-    Entry.BaseCtor = BaseCtor;
-    Entry.DerivedCtor = DerivedCtor;
   }
 
-  Sema &SemaRef;
-  CXXRecordDecl *Derived;
-  typedef llvm::DenseMap<const Type *, InheritingConstructorsForType> MapType;
-  MapType Map;
-};
-}
-
-void Sema::DeclareInheritingConstructors(CXXRecordDecl *ClassDecl) {
-  // Defer declaring the inheriting constructors until the class is
-  // instantiated.
-  if (ClassDecl->isDependentContext())
-    return;
-
-  // Find base classes from which we might inherit constructors.
-  SmallVector<CXXRecordDecl*, 4> InheritedBases;
-  for (const auto &BaseIt : ClassDecl->bases())
-    if (BaseIt.getInheritConstructors())
-      InheritedBases.push_back(BaseIt.getType()->getAsCXXRecordDecl());
-
-  // Go no further if we're not inheriting any constructors.
-  if (InheritedBases.empty())
-    return;
-
-  // Declare the inherited constructors.
-  InheritingConstructorInfo ICI(*this, ClassDecl);
-  for (unsigned I = 0, N = InheritedBases.size(); I != N; ++I)
-    ICI.inheritAll(InheritedBases[I]);
-}
+  // We now proceed as if for a defaulted default constructor, with the relevant
+  // initializers replaced.
 
-void Sema::DefineInheritingConstructor(SourceLocation CurrentLocation,
-                                       CXXConstructorDecl *Constructor) {
-  CXXRecordDecl *ClassDecl = Constructor->getParent();
-  assert(Constructor->getInheritedConstructor() &&
-         !Constructor->doesThisDeclarationHaveABody() &&
-         !Constructor->isDeleted());
-
-  SynthesizedFunctionScope Scope(*this, Constructor);
-  DiagnosticErrorTrap Trap(Diags);
-  if (SetCtorInitializers(Constructor, /*AnyErrors=*/false) ||
-      Trap.hasErrorOccurred()) {
-    Diag(CurrentLocation, diag::note_inhctor_synthesized_at)
-      << Context.getTagDeclType(ClassDecl);
+  bool HadError = SetCtorInitializers(Constructor, /*AnyErrors*/false, Inits);
+  if (HadError || Trap.hasErrorOccurred()) {
+    Diag(CurrentLocation, diag::note_inhctor_synthesized_at) << RD;
     Constructor->setInvalidDecl();
     return;
   }
 
-  SourceLocation Loc = Constructor->getLocation();
-  Constructor->setBody(new (Context) CompoundStmt(Loc));
+  // The exception specification is needed because we are defining the
+  // function.
+  ResolveExceptionSpec(CurrentLocation,
+                       Constructor->getType()->castAs<FunctionProtoType>());
+
+  Constructor->setBody(new (Context) CompoundStmt(InitLoc));
 
   Constructor->markUsed(Context);
   MarkVTableUsed(CurrentLocation, ClassDecl);
@@ -9319,8 +9404,9 @@ void Sema::DefineInheritingConstructor(SourceLocation CurrentLocation,
   if (ASTMutationListener *L = getASTMutationListener()) {
     L->CompletedImplicitDefinition(Constructor);
   }
-}
 
+  DiagnoseUninitializedFields(*this, Constructor);
+}
 
 Sema::ImplicitExceptionSpecification
 Sema::ComputeDefaultedDtorExceptionSpec(CXXMethodDecl *MD) {
@@ -9397,20 +9483,21 @@ CXXDestructorDecl *Sema::DeclareImplicitDestructor(CXXRecordDecl *ClassDecl) {
   FunctionProtoType::ExtProtoInfo EPI = getImplicitMethodEPI(*this, Destructor);
   Destructor->setType(Context.getFunctionType(Context.VoidTy, None, EPI));
 
-  AddOverriddenMethods(ClassDecl, Destructor);
-
   // We don't need to use SpecialMemberIsTrivial here; triviality for
   // destructors is easy to compute.
   Destructor->setTrivial(ClassDecl->hasTrivialDestructor());
 
-  if (ShouldDeleteSpecialMember(Destructor, CXXDestructor))
-    SetDeclDeleted(Destructor, ClassLoc);
-
   // Note that we have declared this destructor.
   ++ASTContext::NumImplicitDestructorsDeclared;
 
+  Scope *S = getScopeForContext(ClassDecl);
+  CheckImplicitSpecialMemberDeclaration(S, Destructor);
+
+  if (ShouldDeleteSpecialMember(Destructor, CXXDestructor))
+    SetDeclDeleted(Destructor, ClassLoc);
+
   // Introduce this destructor into its scope.
-  if (Scope *S = getScopeForContext(ClassDecl))
+  if (S)
     PushOnScopeChains(Destructor, S, false);
   ClassDecl->addDecl(Destructor);
 
@@ -9533,6 +9620,10 @@ void Sema::ActOnFinishCXXNonNestedClass(Decl *D) {
   if (RD && Context.getTargetInfo().getCXXABI().isMicrosoft())
     getDefaultArgExprsForConstructors(*this, RD);
 
+  referenceDLLExportedClassMethods();
+}
+
+void Sema::referenceDLLExportedClassMethods() {
   if (!DelayedDllExportClasses.empty()) {
     // Calling ReferenceDllExportedMethods might cause the current function to
     // be called again, so use a local copy of DelayedDllExportClasses.
@@ -9969,10 +10060,10 @@ buildSingleCopyAssignRecursively(Sema &S, SourceLocation Loc, QualType T,
                                     SizeType, VK_LValue, OK_Ordinary, Loc);
 
   // Construct the loop that copies all elements of this array.
-  return S.ActOnForStmt(Loc, Loc, InitStmt, 
-                        S.MakeFullExpr(Comparison),
-                        nullptr, S.MakeFullDiscardedValueExpr(Increment),
-                        Loc, Copy.get());
+  return S.ActOnForStmt(
+      Loc, Loc, InitStmt,
+      S.ActOnCondition(nullptr, Loc, Comparison, Sema::ConditionKind::Boolean),
+      S.MakeFullDiscardedValueExpr(Increment), Loc, Copy.get());
 }
 
 static StmtResult
@@ -10107,20 +10198,21 @@ CXXMethodDecl *Sema::DeclareImplicitCopyAssignment(CXXRecordDecl *ClassDecl) {
                                                nullptr);
   CopyAssignment->setParams(FromParam);
 
-  AddOverriddenMethods(ClassDecl, CopyAssignment);
-
   CopyAssignment->setTrivial(
     ClassDecl->needsOverloadResolutionForCopyAssignment()
       ? SpecialMemberIsTrivial(CopyAssignment, CXXCopyAssignment)
       : ClassDecl->hasTrivialCopyAssignment());
 
-  if (ShouldDeleteSpecialMember(CopyAssignment, CXXCopyAssignment))
-    SetDeclDeleted(CopyAssignment, ClassLoc);
-
   // Note that we have added this copy-assignment operator.
   ++ASTContext::NumImplicitCopyAssignmentOperatorsDeclared;
 
-  if (Scope *S = getScopeForContext(ClassDecl))
+  Scope *S = getScopeForContext(ClassDecl);
+  CheckImplicitSpecialMemberDeclaration(S, CopyAssignment);
+
+  if (ShouldDeleteSpecialMember(CopyAssignment, CXXCopyAssignment))
+    SetDeclDeleted(CopyAssignment, ClassLoc);
+
+  if (S)
     PushOnScopeChains(CopyAssignment, S, false);
   ClassDecl->addDecl(CopyAssignment);
 
@@ -10498,22 +10590,23 @@ CXXMethodDecl *Sema::DeclareImplicitMoveAssignment(CXXRecordDecl *ClassDecl) {
                                                nullptr);
   MoveAssignment->setParams(FromParam);
 
-  AddOverriddenMethods(ClassDecl, MoveAssignment);
-
   MoveAssignment->setTrivial(
     ClassDecl->needsOverloadResolutionForMoveAssignment()
       ? SpecialMemberIsTrivial(MoveAssignment, CXXMoveAssignment)
       : ClassDecl->hasTrivialMoveAssignment());
 
+  // Note that we have added this copy-assignment operator.
+  ++ASTContext::NumImplicitMoveAssignmentOperatorsDeclared;
+
+  Scope *S = getScopeForContext(ClassDecl);
+  CheckImplicitSpecialMemberDeclaration(S, MoveAssignment);
+
   if (ShouldDeleteSpecialMember(MoveAssignment, CXXMoveAssignment)) {
     ClassDecl->setImplicitMoveAssignmentIsDeleted();
     SetDeclDeleted(MoveAssignment, ClassLoc);
   }
 
-  // Note that we have added this copy-assignment operator.
-  ++ASTContext::NumImplicitMoveAssignmentOperatorsDeclared;
-
-  if (Scope *S = getScopeForContext(ClassDecl))
+  if (S)
     PushOnScopeChains(MoveAssignment, S, false);
   ClassDecl->addDecl(MoveAssignment);
 
@@ -10939,13 +11032,16 @@ CXXConstructorDecl *Sema::DeclareImplicitCopyConstructor(
       ? SpecialMemberIsTrivial(CopyConstructor, CXXCopyConstructor)
       : ClassDecl->hasTrivialCopyConstructor());
 
-  if (ShouldDeleteSpecialMember(CopyConstructor, CXXCopyConstructor))
-    SetDeclDeleted(CopyConstructor, ClassLoc);
-
   // Note that we have declared this constructor.
   ++ASTContext::NumImplicitCopyConstructorsDeclared;
 
-  if (Scope *S = getScopeForContext(ClassDecl))
+  Scope *S = getScopeForContext(ClassDecl);
+  CheckImplicitSpecialMemberDeclaration(S, CopyConstructor);
+
+  if (ShouldDeleteSpecialMember(CopyConstructor, CXXCopyConstructor))
+    SetDeclDeleted(CopyConstructor, ClassLoc);
+
+  if (S)
     PushOnScopeChains(CopyConstructor, S, false);
   ClassDecl->addDecl(CopyConstructor);
 
@@ -11116,15 +11212,18 @@ CXXConstructorDecl *Sema::DeclareImplicitMoveConstructor(
       ? SpecialMemberIsTrivial(MoveConstructor, CXXMoveConstructor)
       : ClassDecl->hasTrivialMoveConstructor());
 
+  // Note that we have declared this constructor.
+  ++ASTContext::NumImplicitMoveConstructorsDeclared;
+
+  Scope *S = getScopeForContext(ClassDecl);
+  CheckImplicitSpecialMemberDeclaration(S, MoveConstructor);
+
   if (ShouldDeleteSpecialMember(MoveConstructor, CXXMoveConstructor)) {
     ClassDecl->setImplicitMoveConstructorIsDeleted();
     SetDeclDeleted(MoveConstructor, ClassLoc);
   }
 
-  // Note that we have declared this constructor.
-  ++ASTContext::NumImplicitMoveConstructorsDeclared;
-
-  if (Scope *S = getScopeForContext(ClassDecl))
+  if (S)
     PushOnScopeChains(MoveConstructor, S, false);
   ClassDecl->addDecl(MoveConstructor);
 
@@ -11329,6 +11428,7 @@ static bool hasOneRealArgument(MultiExprArg Args) {
 
 ExprResult
 Sema::BuildCXXConstructExpr(SourceLocation ConstructLoc, QualType DeclInitType,
+                            NamedDecl *FoundDecl,
                             CXXConstructorDecl *Constructor,
                             MultiExprArg ExprArgs,
                             bool HadMultipleCandidates,
@@ -11349,24 +11449,51 @@ Sema::BuildCXXConstructExpr(SourceLocation ConstructLoc, QualType DeclInitType,
   //       with the same cv-unqualified type, the copy/move operation
   //       can be omitted by constructing the temporary object
   //       directly into the target of the omitted copy/move
-  if (ConstructKind == CXXConstructExpr::CK_Complete &&
+  if (ConstructKind == CXXConstructExpr::CK_Complete && Constructor &&
       Constructor->isCopyOrMoveConstructor() && hasOneRealArgument(ExprArgs)) {
     Expr *SubExpr = ExprArgs[0];
-    Elidable = SubExpr->isTemporaryObject(Context, Constructor->getParent());
+    Elidable = SubExpr->isTemporaryObject(
+        Context, cast<CXXRecordDecl>(FoundDecl->getDeclContext()));
   }
 
-  return BuildCXXConstructExpr(ConstructLoc, DeclInitType, Constructor,
+  return BuildCXXConstructExpr(ConstructLoc, DeclInitType,
+                               FoundDecl, Constructor,
                                Elidable, ExprArgs, HadMultipleCandidates,
                                IsListInitialization,
                                IsStdInitListInitialization, RequiresZeroInit,
                                ConstructKind, ParenRange);
 }
 
+ExprResult
+Sema::BuildCXXConstructExpr(SourceLocation ConstructLoc, QualType DeclInitType,
+                            NamedDecl *FoundDecl,
+                            CXXConstructorDecl *Constructor,
+                            bool Elidable,
+                            MultiExprArg ExprArgs,
+                            bool HadMultipleCandidates,
+                            bool IsListInitialization,
+                            bool IsStdInitListInitialization,
+                            bool RequiresZeroInit,
+                            unsigned ConstructKind,
+                            SourceRange ParenRange) {
+  if (auto *Shadow = dyn_cast<ConstructorUsingShadowDecl>(FoundDecl)) {
+    Constructor = findInheritingConstructor(ConstructLoc, Constructor, Shadow);
+    if (DiagnoseUseOfDecl(Constructor, ConstructLoc))
+      return ExprError(); 
+  }
+
+  return BuildCXXConstructExpr(
+      ConstructLoc, DeclInitType, Constructor, Elidable, ExprArgs,
+      HadMultipleCandidates, IsListInitialization, IsStdInitListInitialization,
+      RequiresZeroInit, ConstructKind, ParenRange);
+}
+
 /// BuildCXXConstructExpr - Creates a complete call to a constructor,
 /// including handling of its default argument expressions.
 ExprResult
 Sema::BuildCXXConstructExpr(SourceLocation ConstructLoc, QualType DeclInitType,
-                            CXXConstructorDecl *Constructor, bool Elidable,
+                            CXXConstructorDecl *Constructor,
+                            bool Elidable,
                             MultiExprArg ExprArgs,
                             bool HadMultipleCandidates,
                             bool IsListInitialization,
@@ -11374,11 +11501,16 @@ Sema::BuildCXXConstructExpr(SourceLocation ConstructLoc, QualType DeclInitType,
                             bool RequiresZeroInit,
                             unsigned ConstructKind,
                             SourceRange ParenRange) {
+  assert(declaresSameEntity(
+             Constructor->getParent(),
+             DeclInitType->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) &&
+         "given constructor for wrong type");
   MarkFunctionReferenced(ConstructLoc, Constructor);
+
   return CXXConstructExpr::Create(
-      Context, DeclInitType, ConstructLoc, Constructor, Elidable, ExprArgs,
-      HadMultipleCandidates, IsListInitialization, IsStdInitListInitialization,
-      RequiresZeroInit,
+      Context, DeclInitType, ConstructLoc, Constructor, Elidable,
+      ExprArgs, HadMultipleCandidates, IsListInitialization,
+      IsStdInitListInitialization, RequiresZeroInit,
       static_cast<CXXConstructExpr::ConstructionKind>(ConstructKind),
       ParenRange);
 }
@@ -11398,8 +11530,19 @@ ExprResult Sema::BuildCXXDefaultInitExpr(SourceLocation Loc, FieldDecl *Field) {
     CXXRecordDecl *ClassPattern = ParentRD->getTemplateInstantiationPattern();
     DeclContext::lookup_result Lookup =
         ClassPattern->lookup(Field->getDeclName());
-    assert(Lookup.size() == 1);
-    FieldDecl *Pattern = cast<FieldDecl>(Lookup[0]);
+
+    // Lookup can return at most two results: the pattern for the field, or the
+    // injected class name of the parent record. No other member can have the
+    // same name as the field.
+    assert(!Lookup.empty() && Lookup.size() <= 2 &&
+           "more than two lookup results for field name");
+    FieldDecl *Pattern = dyn_cast<FieldDecl>(Lookup[0]);
+    if (!Pattern) {
+      assert(isa<CXXRecordDecl>(Lookup[0]) &&
+             "cannot have other non-field member with same name");
+      Pattern = cast<FieldDecl>(Lookup[1]);
+    }
+
     if (InstantiateInClassInitializer(Loc, Field, Pattern,
                                       getTemplateInstantiationArgs(Field)))
       return ExprError();
@@ -11660,7 +11803,7 @@ bool Sema::CheckOverloadedOperatorDeclaration(FunctionDecl *FnDecl) {
                   diag::err_operator_overload_static) << FnDecl->getDeclName();
   } else {
     bool ClassOrEnumParam = false;
-    for (auto Param : FnDecl->params()) {
+    for (auto Param : FnDecl->parameters()) {
       QualType ParamType = Param->getType().getNonReferenceType();
       if (ParamType->isDependentType() || ParamType->isRecordType() ||
           ParamType->isEnumeralType()) {
@@ -11682,7 +11825,7 @@ bool Sema::CheckOverloadedOperatorDeclaration(FunctionDecl *FnDecl) {
   // Only the function-call operator allows default arguments
   // (C++ [over.call]p1).
   if (Op != OO_Call) {
-    for (auto Param : FnDecl->params()) {
+    for (auto Param : FnDecl->parameters()) {
       if (Param->hasDefaultArg())
         return Diag(Param->getLocation(),
                     diag::err_operator_overload_default_arg)
@@ -11765,6 +11908,49 @@ bool Sema::CheckOverloadedOperatorDeclaration(FunctionDecl *FnDecl) {
   return false;
 }
 
+static bool
+checkLiteralOperatorTemplateParameterList(Sema &SemaRef,
+                                          FunctionTemplateDecl *TpDecl) {
+  TemplateParameterList *TemplateParams = TpDecl->getTemplateParameters();
+
+  // Must have one or two template parameters.
+  if (TemplateParams->size() == 1) {
+    NonTypeTemplateParmDecl *PmDecl =
+        dyn_cast<NonTypeTemplateParmDecl>(TemplateParams->getParam(0));
+
+    // The template parameter must be a char parameter pack.
+    if (PmDecl && PmDecl->isTemplateParameterPack() &&
+        SemaRef.Context.hasSameType(PmDecl->getType(), SemaRef.Context.CharTy))
+      return false;
+
+  } else if (TemplateParams->size() == 2) {
+    TemplateTypeParmDecl *PmType =
+        dyn_cast<TemplateTypeParmDecl>(TemplateParams->getParam(0));
+    NonTypeTemplateParmDecl *PmArgs =
+        dyn_cast<NonTypeTemplateParmDecl>(TemplateParams->getParam(1));
+
+    // The second template parameter must be a parameter pack with the
+    // first template parameter as its type.
+    if (PmType && PmArgs && !PmType->isTemplateParameterPack() &&
+        PmArgs->isTemplateParameterPack()) {
+      const TemplateTypeParmType *TArgs =
+          PmArgs->getType()->getAs<TemplateTypeParmType>();
+      if (TArgs && TArgs->getDepth() == PmType->getDepth() &&
+          TArgs->getIndex() == PmType->getIndex()) {
+        if (SemaRef.ActiveTemplateInstantiations.empty())
+          SemaRef.Diag(TpDecl->getLocation(),
+                       diag::ext_string_literal_operator_template);
+        return false;
+      }
+    }
+  }
+
+  SemaRef.Diag(TpDecl->getTemplateParameters()->getSourceRange().getBegin(),
+               diag::err_literal_operator_template)
+      << TpDecl->getTemplateParameters()->getSourceRange();
+  return true;
+}
+
 /// CheckLiteralOperatorDeclaration - Check whether the declaration
 /// of this literal operator function is well-formed. If so, returns
 /// false; otherwise, emits appropriate diagnostics and returns true.
@@ -11780,10 +11966,9 @@ bool Sema::CheckLiteralOperatorDeclaration(FunctionDecl *FnDecl) {
     return true;
   }
 
-  bool Valid = false;
-
   // This might be the definition of a literal operator template.
   FunctionTemplateDecl *TpDecl = FnDecl->getDescribedFunctionTemplate();
+
   // This might be a specialization of a literal operator template.
   if (!TpDecl)
     TpDecl = FnDecl->getPrimaryTemplate();
@@ -11792,104 +11977,120 @@ bool Sema::CheckLiteralOperatorDeclaration(FunctionDecl *FnDecl) {
   // template <class T, T...> type operator "" name() are the only valid
   // template signatures, and the only valid signatures with no parameters.
   if (TpDecl) {
-    if (FnDecl->param_size() == 0) {
-      // Must have one or two template parameters
-      TemplateParameterList *Params = TpDecl->getTemplateParameters();
-      if (Params->size() == 1) {
-        NonTypeTemplateParmDecl *PmDecl =
-          dyn_cast<NonTypeTemplateParmDecl>(Params->getParam(0));
-
-        // The template parameter must be a char parameter pack.
-        if (PmDecl && PmDecl->isTemplateParameterPack() &&
-            Context.hasSameType(PmDecl->getType(), Context.CharTy))
-          Valid = true;
-      } else if (Params->size() == 2) {
-        TemplateTypeParmDecl *PmType =
-          dyn_cast<TemplateTypeParmDecl>(Params->getParam(0));
-        NonTypeTemplateParmDecl *PmArgs =
-          dyn_cast<NonTypeTemplateParmDecl>(Params->getParam(1));
-
-        // The second template parameter must be a parameter pack with the
-        // first template parameter as its type.
-        if (PmType && PmArgs &&
-            !PmType->isTemplateParameterPack() &&
-            PmArgs->isTemplateParameterPack()) {
-          const TemplateTypeParmType *TArgs =
-            PmArgs->getType()->getAs<TemplateTypeParmType>();
-          if (TArgs && TArgs->getDepth() == PmType->getDepth() &&
-              TArgs->getIndex() == PmType->getIndex()) {
-            Valid = true;
-            if (ActiveTemplateInstantiations.empty())
-              Diag(FnDecl->getLocation(),
-                   diag::ext_string_literal_operator_template);
-          }
-        }
+    if (FnDecl->param_size() != 0) {
+      Diag(FnDecl->getLocation(),
+           diag::err_literal_operator_template_with_params);
+      return true;
+    }
+
+    if (checkLiteralOperatorTemplateParameterList(*this, TpDecl))
+      return true;
+
+  } else if (FnDecl->param_size() == 1) {
+    const ParmVarDecl *Param = FnDecl->getParamDecl(0);
+
+    QualType ParamType = Param->getType().getUnqualifiedType();
+
+    // Only unsigned long long int, long double, any character type, and const
+    // char * are allowed as the only parameters.
+    if (ParamType->isSpecificBuiltinType(BuiltinType::ULongLong) ||
+        ParamType->isSpecificBuiltinType(BuiltinType::LongDouble) ||
+        Context.hasSameType(ParamType, Context.CharTy) ||
+        Context.hasSameType(ParamType, Context.WideCharTy) ||
+        Context.hasSameType(ParamType, Context.Char16Ty) ||
+        Context.hasSameType(ParamType, Context.Char32Ty)) {
+    } else if (const PointerType *Ptr = ParamType->getAs<PointerType>()) {
+      QualType InnerType = Ptr->getPointeeType();
+
+      // Pointer parameter must be a const char *.
+      if (!(Context.hasSameType(InnerType.getUnqualifiedType(),
+                                Context.CharTy) &&
+            InnerType.isConstQualified() && !InnerType.isVolatileQualified())) {
+        Diag(Param->getSourceRange().getBegin(),
+             diag::err_literal_operator_param)
+            << ParamType << "'const char *'" << Param->getSourceRange();
+        return true;
       }
+
+    } else if (ParamType->isRealFloatingType()) {
+      Diag(Param->getSourceRange().getBegin(), diag::err_literal_operator_param)
+          << ParamType << Context.LongDoubleTy << Param->getSourceRange();
+      return true;
+
+    } else if (ParamType->isIntegerType()) {
+      Diag(Param->getSourceRange().getBegin(), diag::err_literal_operator_param)
+          << ParamType << Context.UnsignedLongLongTy << Param->getSourceRange();
+      return true;
+
+    } else {
+      Diag(Param->getSourceRange().getBegin(),
+           diag::err_literal_operator_invalid_param)
+          << ParamType << Param->getSourceRange();
+      return true;
     }
-  } else if (FnDecl->param_size()) {
-    // Check the first parameter
+
+  } else if (FnDecl->param_size() == 2) {
     FunctionDecl::param_iterator Param = FnDecl->param_begin();
 
-    QualType T = (*Param)->getType().getUnqualifiedType();
-
-    // unsigned long long int, long double, and any character type are allowed
-    // as the only parameters.
-    if (Context.hasSameType(T, Context.UnsignedLongLongTy) ||
-        Context.hasSameType(T, Context.LongDoubleTy) ||
-        Context.hasSameType(T, Context.CharTy) ||
-        Context.hasSameType(T, Context.WideCharTy) ||
-        Context.hasSameType(T, Context.Char16Ty) ||
-        Context.hasSameType(T, Context.Char32Ty)) {
-      if (++Param == FnDecl->param_end())
-        Valid = true;
-      goto FinishedParams;
-    }
-
-    // Otherwise it must be a pointer to const; let's strip those qualifiers.
-    const PointerType *PT = T->getAs<PointerType>();
-    if (!PT)
-      goto FinishedParams;
-    T = PT->getPointeeType();
-    if (!T.isConstQualified() || T.isVolatileQualified())
-      goto FinishedParams;
-    T = T.getUnqualifiedType();
-
-    // Move on to the second parameter;
-    ++Param;
+    // First, verify that the first parameter is correct.
 
-    // If there is no second parameter, the first must be a const char *
-    if (Param == FnDecl->param_end()) {
-      if (Context.hasSameType(T, Context.CharTy))
-        Valid = true;
-      goto FinishedParams;
+    QualType FirstParamType = (*Param)->getType().getUnqualifiedType();
+
+    // Two parameter function must have a pointer to const as a
+    // first parameter; let's strip those qualifiers.
+    const PointerType *PT = FirstParamType->getAs<PointerType>();
+
+    if (!PT) {
+      Diag((*Param)->getSourceRange().getBegin(),
+           diag::err_literal_operator_param)
+          << FirstParamType << "'const char *'" << (*Param)->getSourceRange();
+      return true;
+    }
+
+    QualType PointeeType = PT->getPointeeType();
+    // First parameter must be const
+    if (!PointeeType.isConstQualified() || PointeeType.isVolatileQualified()) {
+      Diag((*Param)->getSourceRange().getBegin(),
+           diag::err_literal_operator_param)
+          << FirstParamType << "'const char *'" << (*Param)->getSourceRange();
+      return true;
     }
 
-    // const char *, const wchar_t*, const char16_t*, and const char32_t*
+    QualType InnerType = PointeeType.getUnqualifiedType();
+    // Only const char *, const wchar_t*, const char16_t*, and const char32_t*
     // are allowed as the first parameter to a two-parameter function
-    if (!(Context.hasSameType(T, Context.CharTy) ||
-          Context.hasSameType(T, Context.WideCharTy) ||
-          Context.hasSameType(T, Context.Char16Ty) ||
-          Context.hasSameType(T, Context.Char32Ty)))
-      goto FinishedParams;
-
-    // The second and final parameter must be an std::size_t
-    T = (*Param)->getType().getUnqualifiedType();
-    if (Context.hasSameType(T, Context.getSizeType()) &&
-        ++Param == FnDecl->param_end())
-      Valid = true;
-  }
-
-  // FIXME: This diagnostic is absolutely terrible.
-FinishedParams:
-  if (!Valid) {
-    Diag(FnDecl->getLocation(), diag::err_literal_operator_params)
-      << FnDecl->getDeclName();
+    if (!(Context.hasSameType(InnerType, Context.CharTy) ||
+          Context.hasSameType(InnerType, Context.WideCharTy) ||
+          Context.hasSameType(InnerType, Context.Char16Ty) ||
+          Context.hasSameType(InnerType, Context.Char32Ty))) {
+      Diag((*Param)->getSourceRange().getBegin(),
+           diag::err_literal_operator_param)
+          << FirstParamType << "'const char *'" << (*Param)->getSourceRange();
+      return true;
+    }
+
+    // Move on to the second and final parameter.
+    ++Param;
+
+    // The second parameter must be a std::size_t.
+    QualType SecondParamType = (*Param)->getType().getUnqualifiedType();
+    if (!Context.hasSameType(SecondParamType, Context.getSizeType())) {
+      Diag((*Param)->getSourceRange().getBegin(),
+           diag::err_literal_operator_param)
+          << SecondParamType << Context.getSizeType()
+          << (*Param)->getSourceRange();
+      return true;
+    }
+  } else {
+    Diag(FnDecl->getLocation(), diag::err_literal_operator_bad_param_count);
     return true;
   }
 
+  // Parameters are good.
+
   // A parameter-declaration-clause containing a default argument is not
   // equivalent to any of the permitted forms.
-  for (auto Param : FnDecl->params()) {
+  for (auto Param : FnDecl->parameters()) {
     if (Param->hasDefaultArg()) {
       Diag(Param->getDefaultArgRange().getBegin(),
            diag::err_literal_operator_default_argument)
@@ -12003,6 +12204,11 @@ VarDecl *Sema::BuildExceptionDeclaration(Scope *S,
     Invalid = true;
   }
 
+  if (ExDeclType->isVariablyModifiedType()) {
+    Diag(Loc, diag::err_catch_variably_modified) << ExDeclType;
+    Invalid = true;
+  }
+
   QualType BaseType = ExDeclType;
   int Mode = 0; // 0 for direct type, 1 for pointer, 2 for reference
   unsigned DK = diag::err_catch_incomplete;
@@ -12468,10 +12674,9 @@ Decl *Sema::ActOnFriendTypeDecl(Scope *S, const DeclSpec &DS,
   // friend a member of an arbitrary specialization of your template).
 
   Decl *D;
-  if (unsigned NumTempParamLists = TempParams.size())
+  if (!TempParams.empty())
     D = FriendTemplateDecl::Create(Context, CurContext, Loc,
-                                   NumTempParamLists,
-                                   TempParams.data(),
+                                   TempParams,
                                    TSI,
                                    DS.getFriendSpecLoc());
   else
@@ -12894,44 +13099,20 @@ void Sema::SetDeclDefaulted(Decl *Dcl, SourceLocation DefaultLoc) {
     // the record is complete.
     const FunctionDecl *Primary = MD;
     if (const FunctionDecl *Pattern = MD->getTemplateInstantiationPattern())
-      // Find the uninstantiated declaration that actually had the '= default'
-      // on it.
-      Pattern->isDefined(Primary);
+      // Ask the template instantiation pattern that actually had the
+      // '= default' on it.
+      Primary = Pattern;
 
     // If the method was defaulted on its first declaration, we will have
     // already performed the checking in CheckCompletedCXXClass. Such a
     // declaration doesn't trigger an implicit definition.
-    if (Primary == Primary->getCanonicalDecl())
+    if (Primary->getCanonicalDecl()->isDefaulted())
       return;
 
     CheckExplicitlyDefaultedSpecialMember(MD);
 
-    if (MD->isInvalidDecl())
-      return;
-
-    switch (Member) {
-    case CXXDefaultConstructor:
-      DefineImplicitDefaultConstructor(DefaultLoc,
-                                       cast<CXXConstructorDecl>(MD));
-      break;
-    case CXXCopyConstructor:
-      DefineImplicitCopyConstructor(DefaultLoc, cast<CXXConstructorDecl>(MD));
-      break;
-    case CXXCopyAssignment:
-      DefineImplicitCopyAssignment(DefaultLoc, MD);
-      break;
-    case CXXDestructor:
-      DefineImplicitDestructor(DefaultLoc, cast<CXXDestructorDecl>(MD));
-      break;
-    case CXXMoveConstructor:
-      DefineImplicitMoveConstructor(DefaultLoc, cast<CXXConstructorDecl>(MD));
-      break;
-    case CXXMoveAssignment:
-      DefineImplicitMoveAssignment(DefaultLoc, MD);
-      break;
-    case CXXInvalid:
-      llvm_unreachable("Invalid special member.");
-    }
+    if (!MD->isInvalidDecl())
+      DefineImplicitSpecialMember(*this, MD, DefaultLoc);
   } else {
     Diag(DefaultLoc, diag::err_default_special_members);
   }
@@ -13020,19 +13201,20 @@ bool Sema::CheckOverridingFunctionReturnType(const CXXMethodDecl *New,
     return true;
   }
 
-  // C++ [class.virtual]p6:
-  //   If the return type of D::f differs from the return type of B::f, the 
-  //   class type in the return type of D::f shall be complete at the point of
-  //   declaration of D::f or shall be the class type D.
-  if (const RecordType *RT = NewClassTy->getAs<RecordType>()) {
-    if (!RT->isBeingDefined() &&
-        RequireCompleteType(New->getLocation(), NewClassTy, 
-                            diag::err_covariant_return_incomplete,
-                            New->getDeclName()))
-    return true;
-  }
-
   if (!Context.hasSameUnqualifiedType(NewClassTy, OldClassTy)) {
+    // C++14 [class.virtual]p8:
+    //   If the class type in the covariant return type of D::f differs from
+    //   that of B::f, the class type in the return type of D::f shall be
+    //   complete at the point of declaration of D::f or shall be the class
+    //   type D.
+    if (const RecordType *RT = NewClassTy->getAs<RecordType>()) {
+      if (!RT->isBeingDefined() &&
+          RequireCompleteType(New->getLocation(), NewClassTy,
+                              diag::err_covariant_return_incomplete,
+                              New->getDeclName()))
+        return true;
+    }
+
     // Check if the new class derives from the old class.
     if (!IsDerivedFrom(New->getLocation(), NewClassTy, OldClassTy)) {
       Diag(New->getLocation(), diag::err_covariant_return_not_derived)
@@ -13069,7 +13251,7 @@ bool Sema::CheckOverridingFunctionReturnType(const CXXMethodDecl *New,
     Diag(Old->getLocation(), diag::note_overridden_virtual_function)
         << Old->getReturnTypeSourceRange();
     return true;
-  };
+  }
 
 
   // The new class type must have the same or less qualifiers as the old type.
@@ -13081,7 +13263,7 @@ bool Sema::CheckOverridingFunctionReturnType(const CXXMethodDecl *New,
     Diag(Old->getLocation(), diag::note_overridden_virtual_function)
         << Old->getReturnTypeSourceRange();
     return true;
-  };
+  }
 
   return false;
 }
@@ -13240,14 +13422,19 @@ void Sema::MarkVTableUsed(SourceLocation Loc, CXXRecordDecl *Class,
     // checks (i.e. operator delete() lookup) when the vtable is marked used, as
     // the deleting destructor is emitted with the vtable, not with the
     // destructor definition as in the Itanium ABI.
-    // If it has a definition, we do the check at that point instead.
-    if (Context.getTargetInfo().getCXXABI().isMicrosoft() &&
-        Class->hasUserDeclaredDestructor() &&
-        !Class->getDestructor()->isDefined() &&
-        !Class->getDestructor()->isDeleted()) {
+    if (Context.getTargetInfo().getCXXABI().isMicrosoft()) {
       CXXDestructorDecl *DD = Class->getDestructor();
-      ContextRAII SavedContext(*this, DD);
-      CheckDestructor(DD);
+      if (DD && DD->isVirtual() && !DD->isDeleted()) {
+        if (Class->hasUserDeclaredDestructor() && !DD->isDefined()) {
+          // If this is an out-of-line declaration, marking it referenced will
+          // not do anything. Manually call CheckDestructor to look up operator
+          // delete().
+          ContextRAII SavedContext(*this, DD);
+          CheckDestructor(DD);
+        } else {
+          MarkFunctionReferenced(Loc, Class->getDestructor());
+        }
+      }
     }
   }
 
@@ -13785,6 +13972,9 @@ MSPropertyDecl *Sema::HandleMSProperty(Scope *S, RecordDecl *Record,
 
   DiagnoseFunctionSpecifiers(D.getDeclSpec());
 
+  if (D.getDeclSpec().isInlineSpecified())
+    Diag(D.getDeclSpec().getInlineSpecLoc(), diag::err_inline_non_function)
+        << getLangOpts().CPlusPlus1z;
   if (DeclSpec::TSCS TSCS = D.getDeclSpec().getThreadStorageClassSpec())
     Diag(D.getDeclSpec().getThreadStorageClassSpecLoc(),
          diag::err_invalid_thread)
diff --git a/contrib/llvm/tools/clang/lib/Sema/SemaDeclObjC.cpp b/contrib/llvm/tools/clang/lib/Sema/SemaDeclObjC.cpp
index a2f41a7cc30a..738de77cecb7 100644
--- a/contrib/llvm/tools/clang/lib/Sema/SemaDeclObjC.cpp
+++ b/contrib/llvm/tools/clang/lib/Sema/SemaDeclObjC.cpp
@@ -21,7 +21,6 @@
 #include "clang/AST/ExprObjC.h"
 #include "clang/Basic/SourceManager.h"
 #include "clang/Sema/DeclSpec.h"
-#include "clang/Sema/ExternalSemaSource.h"
 #include "clang/Sema/Lookup.h"
 #include "clang/Sema/Scope.h"
 #include "clang/Sema/ScopeInfo.h"
@@ -320,11 +319,11 @@ void Sema::ActOnStartOfObjCMethodDef(Scope *FnBodyScope, Decl *D) {
   PushOnScopeChains(MDecl->getCmdDecl(), FnBodyScope);
 
   // The ObjC parser requires parameter names so there's no need to check.
-  CheckParmsForFunctionDef(MDecl->param_begin(), MDecl->param_end(),
+  CheckParmsForFunctionDef(MDecl->parameters(),
                            /*CheckParameterNames=*/false);
 
   // Introduce all of the other parameters into this scope.
-  for (auto *Param : MDecl->params()) {
+  for (auto *Param : MDecl->parameters()) {
     if (!Param->isInvalidDecl() &&
         getLangOpts().ObjCAutoRefCount &&
         !HasExplicitOwnershipAttr(*this, Param))
@@ -1303,6 +1302,16 @@ class ObjCTypeArgOrProtocolValidatorCCC : public CorrectionCandidateCallback {
 };
 } // end anonymous namespace
 
+void Sema::DiagnoseTypeArgsAndProtocols(IdentifierInfo *ProtocolId,
+                                        SourceLocation ProtocolLoc,
+                                        IdentifierInfo *TypeArgId,
+                                        SourceLocation TypeArgLoc,
+                                        bool SelectProtocolFirst) {
+  Diag(TypeArgLoc, diag::err_objc_type_args_and_protocols)
+      << SelectProtocolFirst << TypeArgId << ProtocolId
+      << SourceRange(ProtocolLoc);
+}
+
 void Sema::actOnObjCTypeArgsOrProtocolQualifiers(
        Scope *S,
        ParsedType baseType,
@@ -1493,6 +1502,7 @@ void Sema::actOnObjCTypeArgsOrProtocolQualifiers(
                                                 SourceLocation(),
                                                 SourceLocation(),
                                                 SourceLocation(),
+                                                SourceLocation(),
                                                 SourceLocation()),
                                                 parsedAttrs,
                                                 starLoc);
@@ -1570,11 +1580,9 @@ void Sema::actOnObjCTypeArgsOrProtocolQualifiers(
 
       // We have a conflict: some names refer to protocols and others
       // refer to types.
-      Diag(identifierLocs[i], diag::err_objc_type_args_and_protocols)
-        << (protocols[i] != nullptr)
-        << identifiers[i]
-        << identifiers[0]
-        << SourceRange(identifierLocs[0]);
+      DiagnoseTypeArgsAndProtocols(identifiers[0], identifierLocs[0],
+                                   identifiers[i], identifierLocs[i],
+                                   protocols[i] != nullptr);
 
       protocols.clear();
       typeArgs.clear();
@@ -1831,6 +1839,13 @@ Decl *Sema::ActOnStartCategoryImplementation(
   if (IDecl)
     DiagnoseUseOfDecl(IDecl, ClassLoc);
 
+  // If the interface has the objc_runtime_visible attribute, we
+  // cannot implement a category for it.
+  if (IDecl && IDecl->hasAttr<ObjCRuntimeVisibleAttr>()) {
+    Diag(ClassLoc, diag::err_objc_runtime_visible_category)
+      << IDecl->getDeclName();
+  }
+
   /// Check that CatName, category name, is not used in another implementation.
   if (CatIDecl) {
     if (CatIDecl->getImplementation()) {
@@ -1968,6 +1983,16 @@ Decl *Sema::ActOnStartClassImplementation(
                                         dyn_cast<NamedDecl>(IDecl), 
                                         IMPDecl->getLocation(), 1);
   }
+
+  // If the superclass has the objc_runtime_visible attribute, we
+  // cannot implement a subclass of it.
+  if (IDecl->getSuperClass() &&
+      IDecl->getSuperClass()->hasAttr<ObjCRuntimeVisibleAttr>()) {
+    Diag(ClassLoc, diag::err_objc_runtime_visible_subclass)
+      << IDecl->getDeclName()
+      << IDecl->getSuperClass()->getDeclName();
+  }
+
   return ActOnObjCContainerStartDefinition(IMPDecl);
 }
 
@@ -2734,7 +2759,8 @@ void Sema::MatchAllMethodDeclarations(const SelectorSet &InsMap,
   for (auto *I : CDecl->class_methods()) {
     if (!ClsMapSeen.insert(I->getSelector()).second)
       continue;
-    if (!ClsMap.count(I->getSelector())) {
+    if (!I->isPropertyAccessor() &&
+        !ClsMap.count(I->getSelector())) {
       if (ImmediateClass)
         WarnUndefinedMethod(*this, IMPDecl->getLocation(), I, IncompleteImpl,
                             diag::warn_undef_method_impl);
@@ -2743,12 +2769,14 @@ void Sema::MatchAllMethodDeclarations(const SelectorSet &InsMap,
         IMPDecl->getClassMethod(I->getSelector());
       assert(CDecl->getClassMethod(I->getSelector()) &&
              "Expected to find the method through lookup as well");
-      if (!WarnCategoryMethodImpl)
-        WarnConflictingTypedMethods(ImpMethodDecl, I, 
-                                    isa<ObjCProtocolDecl>(CDecl));
-      else
-        WarnExactTypedMethods(ImpMethodDecl, I,
-                              isa<ObjCProtocolDecl>(CDecl));
+      // ImpMethodDecl may be null as in a @dynamic property.
+      if (ImpMethodDecl) {
+        if (!WarnCategoryMethodImpl)
+          WarnConflictingTypedMethods(ImpMethodDecl, I,
+                                      isa<ObjCProtocolDecl>(CDecl));
+        else if (!I->isPropertyAccessor())
+          WarnExactTypedMethods(ImpMethodDecl, I, isa<ObjCProtocolDecl>(CDecl));
+      }
     }
   }
   
@@ -3147,6 +3175,26 @@ bool Sema::MatchTwoMethodDeclarations(const ObjCMethodDecl *left,
   return true;
 }
 
+static bool isMethodContextSameForKindofLookup(ObjCMethodDecl *Method,
+                                               ObjCMethodDecl *MethodInList) {
+  auto *MethodProtocol = dyn_cast<ObjCProtocolDecl>(Method->getDeclContext());
+  auto *MethodInListProtocol =
+      dyn_cast<ObjCProtocolDecl>(MethodInList->getDeclContext());
+  // If this method belongs to a protocol but the method in list does not, or
+  // vice versa, we say the context is not the same.
+  if ((MethodProtocol && !MethodInListProtocol) ||
+      (!MethodProtocol && MethodInListProtocol))
+    return false;
+
+  if (MethodProtocol && MethodInListProtocol)
+    return true;
+
+  ObjCInterfaceDecl *MethodInterface = Method->getClassInterface();
+  ObjCInterfaceDecl *MethodInListInterface =
+      MethodInList->getClassInterface();
+  return MethodInterface == MethodInListInterface;
+}
+
 void Sema::addMethodToGlobalList(ObjCMethodList *List,
                                  ObjCMethodDecl *Method) {
   // Record at the head of the list whether there were 0, 1, or >= 2 methods
@@ -3166,17 +3214,42 @@ void Sema::addMethodToGlobalList(ObjCMethodList *List,
   // We've seen a method with this name, see if we have already seen this type
   // signature.
   ObjCMethodList *Previous = List;
+  ObjCMethodList *ListWithSameDeclaration = nullptr;
   for (; List; Previous = List, List = List->getNext()) {
     // If we are building a module, keep all of the methods.
-    if (getLangOpts().Modules && !getLangOpts().CurrentModule.empty())
+    if (getLangOpts().CompilingModule)
       continue;
 
-    if (!MatchTwoMethodDeclarations(Method, List->getMethod())) {
+    bool SameDeclaration = MatchTwoMethodDeclarations(Method,
+                                                      List->getMethod());
+    // Looking for method with a type bound requires the correct context exists.
+    // We need to insert a method into the list if the context is different.
+    // If the method's declaration matches the list
+    // a> the method belongs to a different context: we need to insert it, in
+    //    order to emit the availability message, we need to prioritize over
+    //    availability among the methods with the same declaration.
+    // b> the method belongs to the same context: there is no need to insert a
+    //    new entry.
+    // If the method's declaration does not match the list, we insert it to the
+    // end.
+    if (!SameDeclaration ||
+        !isMethodContextSameForKindofLookup(Method, List->getMethod())) {
       // Even if two method types do not match, we would like to say
       // there is more than one declaration so unavailability/deprecated
       // warning is not too noisy.
       if (!Method->isDefined())
         List->setHasMoreThanOneDecl(true);
+
+      // For methods with the same declaration, the one that is deprecated
+      // should be put in the front for better diagnostics.
+      if (Method->isDeprecated() && SameDeclaration &&
+          !ListWithSameDeclaration && !List->getMethod()->isDeprecated())
+        ListWithSameDeclaration = List;
+
+      if (Method->isUnavailable() && SameDeclaration &&
+          !ListWithSameDeclaration &&
+          List->getMethod()->getAvailability() < AR_Deprecated)
+        ListWithSameDeclaration = List;
       continue;
     }
 
@@ -3212,6 +3285,16 @@ void Sema::addMethodToGlobalList(ObjCMethodList *List,
   // We have a new signature for an existing method - add it.
   // This is extremely rare. Only 1% of Cocoa selectors are "overloaded".
   ObjCMethodList *Mem = BumpAlloc.Allocate<ObjCMethodList>();
+
+  // We insert it right before ListWithSameDeclaration.
+  if (ListWithSameDeclaration) {
+    auto *List = new (Mem) ObjCMethodList(*ListWithSameDeclaration);
+    // FIXME: should we clear the other bits in ListWithSameDeclaration?
+    ListWithSameDeclaration->setMethod(Method);
+    ListWithSameDeclaration->setNext(List);
+    return;
+  }
+
   Previous->setNext(new (Mem) ObjCMethodList(Method));
 }
 
@@ -3222,6 +3305,12 @@ void Sema::ReadMethodPool(Selector Sel) {
   ExternalSource->ReadMethodPool(Sel);
 }
 
+void Sema::updateOutOfDateSelector(Selector Sel) {
+  if (!ExternalSource)
+    return;
+  ExternalSource->updateOutOfDateSelector(Sel);
+}
+
 void Sema::AddMethodToGlobalPool(ObjCMethodDecl *Method, bool impl,
                                  bool instance) {
   // Ignore methods of invalid containers.
@@ -3261,25 +3350,95 @@ static bool isAcceptableMethodMismatch(ObjCMethodDecl *chosen,
   return (chosen->getReturnType()->isIntegerType());
 }
 
+/// Return true if the given method is wthin the type bound.
+static bool FilterMethodsByTypeBound(ObjCMethodDecl *Method,
+                                     const ObjCObjectType *TypeBound) {
+  if (!TypeBound)
+    return true;
+
+  if (TypeBound->isObjCId())
+    // FIXME: should we handle the case of bounding to id<A, B> differently?
+    return true;
+
+  auto *BoundInterface = TypeBound->getInterface();
+  assert(BoundInterface && "unexpected object type!");
+
+  // Check if the Method belongs to a protocol. We should allow any method
+  // defined in any protocol, because any subclass could adopt the protocol.
+  auto *MethodProtocol = dyn_cast<ObjCProtocolDecl>(Method->getDeclContext());
+  if (MethodProtocol) {
+    return true;
+  }
+
+  // If the Method belongs to a class, check if it belongs to the class
+  // hierarchy of the class bound.
+  if (ObjCInterfaceDecl *MethodInterface = Method->getClassInterface()) {
+    // We allow methods declared within classes that are part of the hierarchy
+    // of the class bound (superclass of, subclass of, or the same as the class
+    // bound).
+    return MethodInterface == BoundInterface ||
+           MethodInterface->isSuperClassOf(BoundInterface) ||
+           BoundInterface->isSuperClassOf(MethodInterface);
+  }
+  llvm_unreachable("unknow method context");
+}
+
+/// We first select the type of the method: Instance or Factory, then collect
+/// all methods with that type.
 bool Sema::CollectMultipleMethodsInGlobalPool(
-    Selector Sel, SmallVectorImpl<ObjCMethodDecl *> &Methods, bool instance) {
+    Selector Sel, SmallVectorImpl<ObjCMethodDecl *> &Methods,
+    bool InstanceFirst, bool CheckTheOther,
+    const ObjCObjectType *TypeBound) {
   if (ExternalSource)
     ReadMethodPool(Sel);
 
   GlobalMethodPool::iterator Pos = MethodPool.find(Sel);
   if (Pos == MethodPool.end())
     return false;
+
   // Gather the non-hidden methods.
-  ObjCMethodList &MethList = instance ? Pos->second.first : Pos->second.second;
+  ObjCMethodList &MethList = InstanceFirst ? Pos->second.first :
+                             Pos->second.second;
   for (ObjCMethodList *M = &MethList; M; M = M->getNext())
-    if (M->getMethod() && !M->getMethod()->isHidden())
-      Methods.push_back(M->getMethod());
+    if (M->getMethod() && !M->getMethod()->isHidden()) {
+      if (FilterMethodsByTypeBound(M->getMethod(), TypeBound))
+        Methods.push_back(M->getMethod());
+    }
+
+  // Return if we find any method with the desired kind.
+  if (!Methods.empty())
+    return Methods.size() > 1;
+
+  if (!CheckTheOther)
+    return false;
+
+  // Gather the other kind.
+  ObjCMethodList &MethList2 = InstanceFirst ? Pos->second.second :
+                              Pos->second.first;
+  for (ObjCMethodList *M = &MethList2; M; M = M->getNext())
+    if (M->getMethod() && !M->getMethod()->isHidden()) {
+      if (FilterMethodsByTypeBound(M->getMethod(), TypeBound))
+        Methods.push_back(M->getMethod());
+    }
+
   return Methods.size() > 1;
 }
 
-bool Sema::AreMultipleMethodsInGlobalPool(Selector Sel, ObjCMethodDecl *BestMethod,
-                                          SourceRange R,
-                                          bool receiverIdOrClass) {
+bool Sema::AreMultipleMethodsInGlobalPool(
+    Selector Sel, ObjCMethodDecl *BestMethod, SourceRange R,
+    bool receiverIdOrClass, SmallVectorImpl<ObjCMethodDecl *> &Methods) {
+  // Diagnose finding more than one method in global pool.
+  SmallVector<ObjCMethodDecl *, 4> FilteredMethods;
+  FilteredMethods.push_back(BestMethod);
+
+  for (auto *M : Methods)
+    if (M != BestMethod && !M->hasAttr<UnavailableAttr>())
+      FilteredMethods.push_back(M);
+
+  if (FilteredMethods.size() > 1)
+    DiagnoseMultipleMethodInGlobalPool(FilteredMethods, Sel, R,
+                                       receiverIdOrClass);
+
   GlobalMethodPool::iterator Pos = MethodPool.find(Sel);
   // Test for no method in the pool which should not trigger any warning by
   // caller.
@@ -3287,17 +3446,6 @@ bool Sema::AreMultipleMethodsInGlobalPool(Selector Sel, ObjCMethodDecl *BestMeth
     return true;
   ObjCMethodList &MethList =
     BestMethod->isInstanceMethod() ? Pos->second.first : Pos->second.second;
-  
-  // Diagnose finding more than one method in global pool
-  SmallVector<ObjCMethodDecl *, 4> Methods;
-  Methods.push_back(BestMethod);
-  for (ObjCMethodList *ML = &MethList; ML; ML = ML->getNext())
-    if (ObjCMethodDecl *M = ML->getMethod())
-      if (!M->isHidden() && M != BestMethod && !M->hasAttr<UnavailableAttr>())
-        Methods.push_back(M);
-  if (Methods.size() > 1)
-    DiagnoseMultipleMethodInGlobalPool(Methods, Sel, R, receiverIdOrClass);
-
   return MethList.hasMoreThanOneDecl();
 }
 
@@ -3650,10 +3798,11 @@ Decl *Sema::ActOnAtEnd(Scope *S, SourceRange AtEnd, ArrayRef<Decl *> allMethods,
       // property will be synthesized when property with same name is
       // seen in the @implementation.
       for (const auto *Ext : IDecl->visible_extensions()) {
-        for (const auto *Property : Ext->properties()) {
+        for (const auto *Property : Ext->instance_properties()) {
           // Skip over properties declared @dynamic
           if (const ObjCPropertyImplDecl *PIDecl
-              = IC->FindPropertyImplDecl(Property->getIdentifier()))
+              = IC->FindPropertyImplDecl(Property->getIdentifier(),
+                                         Property->getQueryKind()))
             if (PIDecl->getPropertyImplementation() 
                   == ObjCPropertyImplDecl::Dynamic)
               continue;
@@ -3839,7 +3988,7 @@ public:
     }
   }
 
-  typedef llvm::SmallPtrSet<ObjCMethodDecl*, 128>::iterator iterator;
+  typedef llvm::SmallPtrSetImpl<ObjCMethodDecl*>::iterator iterator;
   iterator begin() const { return Overridden.begin(); }
   iterator end() const { return Overridden.end(); }
 
@@ -4463,6 +4612,9 @@ Decl *Sema::ActOnObjCExceptionDecl(Scope *S, Declarator &D) {
     Diag(DS.getStorageClassSpecLoc(), diag::err_storage_spec_on_catch_parm)
       << DeclSpec::getSpecifierName(SCS);
   }
+  if (DS.isInlineSpecified())
+    Diag(DS.getInlineSpecLoc(), diag::err_inline_non_function)
+        << getLangOpts().CPlusPlus1z;
   if (DeclSpec::TSCS TSCS = D.getDeclSpec().getThreadStorageClassSpec())
     Diag(D.getDeclSpec().getThreadStorageClassSpecLoc(),
          diag::err_invalid_thread)
diff --git a/contrib/llvm/tools/clang/lib/Sema/SemaExceptionSpec.cpp b/contrib/llvm/tools/clang/lib/Sema/SemaExceptionSpec.cpp
index f12bf2415dba..4a21eb308fe5 100644
--- a/contrib/llvm/tools/clang/lib/Sema/SemaExceptionSpec.cpp
+++ b/contrib/llvm/tools/clang/lib/Sema/SemaExceptionSpec.cpp
@@ -110,11 +110,17 @@ bool Sema::CheckSpecifiedExceptionType(QualType &T, SourceRange Range) {
   //   A type denoted in an exception-specification shall not denote a
   //   pointer or reference to an incomplete type, other than (cv) void* or a
   //   pointer or reference to a class currently being defined.
+  // In Microsoft mode, downgrade this to a warning.
+  unsigned DiagID = diag::err_incomplete_in_exception_spec;
+  bool ReturnValueOnError = true;
+  if (getLangOpts().MicrosoftExt) {
+    DiagID = diag::ext_incomplete_in_exception_spec;
+    ReturnValueOnError = false;
+  }
   if (!(PointeeT->isRecordType() &&
         PointeeT->getAs<RecordType>()->isBeingDefined()) &&
-      RequireCompleteType(Range.getBegin(), PointeeT,
-                          diag::err_incomplete_in_exception_spec, Kind, Range))
-    return true;
+      RequireCompleteType(Range.getBegin(), PointeeT, DiagID, Kind, Range))
+    return ReturnValueOnError;
 
   return false;
 }
@@ -995,6 +1001,10 @@ CanThrowResult Sema::canThrow(const Expr *E) {
     return mergeCanThrow(CT, canSubExprsThrow(*this, E));
   }
 
+  case Expr::CXXInheritedCtorInitExprClass:
+    return canCalleeThrow(*this, E,
+                          cast<CXXInheritedCtorInitExpr>(E)->getConstructor());
+
   case Expr::LambdaExprClass: {
     const LambdaExpr *Lambda = cast<LambdaExpr>(E);
     CanThrowResult CT = CT_Cannot;
@@ -1136,6 +1146,7 @@ CanThrowResult Sema::canThrow(const Expr *E) {
   case Expr::ObjCIndirectCopyRestoreExprClass:
   case Expr::ObjCProtocolExprClass:
   case Expr::ObjCSelectorExprClass:
+  case Expr::ObjCAvailabilityCheckExprClass:
   case Expr::OffsetOfExprClass:
   case Expr::PackExpansionExprClass:
   case Expr::PseudoObjectExprClass:
diff --git a/contrib/llvm/tools/clang/lib/Sema/SemaExpr.cpp b/contrib/llvm/tools/clang/lib/Sema/SemaExpr.cpp
index 5a2eb6060ee9..719e1e3502ca 100644
--- a/contrib/llvm/tools/clang/lib/Sema/SemaExpr.cpp
+++ b/contrib/llvm/tools/clang/lib/Sema/SemaExpr.cpp
@@ -49,7 +49,7 @@ using namespace sema;
 
 /// \brief Determine whether the use of this declaration is valid, without
 /// emitting diagnostics.
-bool Sema::CanUseDecl(NamedDecl *D) {
+bool Sema::CanUseDecl(NamedDecl *D, bool TreatUnavailableAsInvalid) {
   // See if this is an auto-typed variable whose initializer we are parsing.
   if (ParsingInitForAutoVars.count(D))
     return false;
@@ -67,7 +67,7 @@ bool Sema::CanUseDecl(NamedDecl *D) {
   }
 
   // See if this function is unavailable.
-  if (D->getAvailability() == AR_Unavailable &&
+  if (TreatUnavailableAsInvalid && D->getAvailability() == AR_Unavailable &&
       cast<Decl>(CurContext)->getAvailability() != AR_Unavailable)
     return false;
 
@@ -76,10 +76,14 @@ bool Sema::CanUseDecl(NamedDecl *D) {
 
 static void DiagnoseUnusedOfDecl(Sema &S, NamedDecl *D, SourceLocation Loc) {
   // Warn if this is used but marked unused.
-  if (D->hasAttr<UnusedAttr>()) {
-    const Decl *DC = cast_or_null<Decl>(S.getCurObjCLexicalContext());
-    if (DC && !DC->hasAttr<UnusedAttr>())
-      S.Diag(Loc, diag::warn_used_but_marked_unused) << D->getDeclName();
+  if (const auto *A = D->getAttr<UnusedAttr>()) {
+    // [[maybe_unused]] should not diagnose uses, but __attribute__((unused))
+    // should diagnose them.
+    if (A->getSemanticSpelling() != UnusedAttr::CXX11_maybe_unused) {
+      const Decl *DC = cast_or_null<Decl>(S.getCurObjCLexicalContext());
+      if (DC && !DC->hasAttr<UnusedAttr>())
+        S.Diag(Loc, diag::warn_used_but_marked_unused) << D->getDeclName();
+    }
   }
 }
 
@@ -137,7 +141,7 @@ DiagnoseAvailabilityOfDecl(Sema &S, NamedDecl *D, SourceLocation Loc,
 
   const ObjCPropertyDecl *ObjCPDecl = nullptr;
   if (Result == AR_Deprecated || Result == AR_Unavailable ||
-      AR_NotYetIntroduced) {
+      Result == AR_NotYetIntroduced) {
     if (const ObjCMethodDecl *MD = dyn_cast<ObjCMethodDecl>(D)) {
       if (const ObjCPropertyDecl *PD = MD->findPropertyDecl()) {
         AvailabilityResult PDeclResult = PD->getAvailability(nullptr);
@@ -212,25 +216,14 @@ void Sema::NoteDeletedFunction(FunctionDecl *Decl) {
     // deleted. This might fail, if that reason no longer applies.
     CXXSpecialMember CSM = getSpecialMember(Method);
     if (CSM != CXXInvalid)
-      ShouldDeleteSpecialMember(Method, CSM, /*Diagnose=*/true);
+      ShouldDeleteSpecialMember(Method, CSM, nullptr, /*Diagnose=*/true);
 
     return;
   }
 
-  if (CXXConstructorDecl *CD = dyn_cast<CXXConstructorDecl>(Decl)) {
-    if (CXXConstructorDecl *BaseCD =
-            const_cast<CXXConstructorDecl*>(CD->getInheritedConstructor())) {
-      Diag(Decl->getLocation(), diag::note_inherited_deleted_here);
-      if (BaseCD->isDeleted()) {
-        NoteDeletedFunction(BaseCD);
-      } else {
-        // FIXME: An explanation of why exactly it can't be inherited
-        // would be nice.
-        Diag(BaseCD->getLocation(), diag::note_cannot_inherit);
-      }
-      return;
-    }
-  }
+  auto *Ctor = dyn_cast<CXXConstructorDecl>(Decl);
+  if (Ctor && Ctor->isInheritingConstructor())
+    return NoteDeletedInheritingConstructor(Ctor);
 
   Diag(Decl->getLocation(), diag::note_availability_specified_here)
     << Decl << true;
@@ -357,7 +350,13 @@ bool Sema::DiagnoseUseOfDecl(NamedDecl *D, SourceLocation Loc,
   // See if this is a deleted function.
   if (FunctionDecl *FD = dyn_cast<FunctionDecl>(D)) {
     if (FD->isDeleted()) {
-      Diag(Loc, diag::err_deleted_function_use);
+      auto *Ctor = dyn_cast<CXXConstructorDecl>(FD);
+      if (Ctor && Ctor->isInheritingConstructor())
+        Diag(Loc, diag::err_deleted_inherited_ctor_use)
+            << Ctor->getParent()
+            << Ctor->getInheritedConstructor().getConstructor()->getParent();
+      else 
+        Diag(Loc, diag::err_deleted_function_use);
       NoteDeletedFunction(FD);
       return true;
     }
@@ -368,6 +367,19 @@ bool Sema::DiagnoseUseOfDecl(NamedDecl *D, SourceLocation Loc,
         DeduceReturnType(FD, Loc))
       return true;
   }
+
+  // [OpenMP 4.0], 2.15 declare reduction Directive, Restrictions
+  // Only the variables omp_in and omp_out are allowed in the combiner.
+  // Only the variables omp_priv and omp_orig are allowed in the
+  // initializer-clause.
+  auto *DRD = dyn_cast<OMPDeclareReductionDecl>(CurContext);
+  if (LangOpts.OpenMP && DRD && !CurContext->containsDecl(D) &&
+      isa<VarDecl>(D)) {
+    Diag(Loc, diag::err_omp_wrong_var_in_declare_reduction)
+        << getCurFunction()->HasOMPDeclareReductionCombiner;
+    Diag(D->getLocation(), diag::note_entity_declared_at) << D;
+    return true;
+  }
   DiagnoseAvailabilityOfDecl(*this, D, Loc, UnknownObjCClass,
                              ObjCPropertyAccess);
 
@@ -695,7 +707,7 @@ ExprResult Sema::DefaultLvalueConversion(Expr *E) {
   // balance that.
   if (getLangOpts().ObjCAutoRefCount &&
       E->getType().getObjCLifetime() == Qualifiers::OCL_Weak)
-    ExprNeedsCleanups = true;
+    Cleanup.setExprNeedsCleanups(true);
 
   ExprResult Res = ImplicitCastExpr::Create(Context, T, CK_LValueToRValue, E,
                                             nullptr, VK_RValue);
@@ -1138,6 +1150,48 @@ static QualType handleFloatConversion(Sema &S, ExprResult &LHS,
                                     /*convertFloat=*/!IsCompAssign);
 }
 
+/// \brief Diagnose attempts to convert between __float128 and long double if
+/// there is no support for such conversion. Helper function of
+/// UsualArithmeticConversions().
+static bool unsupportedTypeConversion(const Sema &S, QualType LHSType,
+                                      QualType RHSType) {
+  /*  No issue converting if at least one of the types is not a floating point
+      type or the two types have the same rank.
+  */
+  if (!LHSType->isFloatingType() || !RHSType->isFloatingType() ||
+      S.Context.getFloatingTypeOrder(LHSType, RHSType) == 0)
+    return false;
+
+  assert(LHSType->isFloatingType() && RHSType->isFloatingType() &&
+         "The remaining types must be floating point types.");
+
+  auto *LHSComplex = LHSType->getAs<ComplexType>();
+  auto *RHSComplex = RHSType->getAs<ComplexType>();
+
+  QualType LHSElemType = LHSComplex ?
+    LHSComplex->getElementType() : LHSType;
+  QualType RHSElemType = RHSComplex ?
+    RHSComplex->getElementType() : RHSType;
+
+  // No issue if the two types have the same representation
+  if (&S.Context.getFloatTypeSemantics(LHSElemType) ==
+      &S.Context.getFloatTypeSemantics(RHSElemType))
+    return false;
+
+  bool Float128AndLongDouble = (LHSElemType == S.Context.Float128Ty &&
+                                RHSElemType == S.Context.LongDoubleTy);
+  Float128AndLongDouble |= (LHSElemType == S.Context.LongDoubleTy &&
+                            RHSElemType == S.Context.Float128Ty);
+
+  /* We've handled the situation where __float128 and long double have the same
+     representation. The only other allowable conversion is if long double is
+     really just double.
+  */
+  return Float128AndLongDouble &&
+    (&S.Context.getFloatTypeSemantics(S.Context.LongDoubleTy) !=
+     &llvm::APFloat::IEEEdouble);
+}
+
 typedef ExprResult PerformCastFn(Sema &S, Expr *operand, QualType toType);
 
 namespace {
@@ -1301,6 +1355,11 @@ QualType Sema::UsualArithmeticConversions(ExprResult &LHS, ExprResult &RHS,
 
   // At this point, we have two different arithmetic types.
 
+  // Diagnose attempts to convert between __float128 and long double where
+  // such conversions currently can't be handled.
+  if (unsupportedTypeConversion(*this, LHSType, RHSType))
+    return QualType();
+
   // Handle complex types first (C99 6.3.1.8p1).
   if (LHSType->isComplexType() || RHSType->isComplexType())
     return handleComplexFloatConversion(*this, LHS, RHS, LHSType, RHSType,
@@ -1719,10 +1778,12 @@ Sema::BuildDeclRefExpr(ValueDecl *D, QualType Ty, ExprValueKind VK,
       !Diags.isIgnored(diag::warn_arc_repeated_use_of_weak, E->getLocStart()))
       recordUseOfEvaluatedWeak(E);
 
-  // Just in case we're building an illegal pointer-to-member.
-  FieldDecl *FD = dyn_cast<FieldDecl>(D);
-  if (FD && FD->isBitField())
-    E->setObjectKind(OK_BitField);
+  if (FieldDecl *FD = dyn_cast<FieldDecl>(D)) {
+    UnusedPrivateFields.remove(FD);
+    // Just in case we're building an illegal pointer-to-member.
+    if (FD->isBitField())
+      E->setObjectKind(OK_BitField);
+  }
 
   return E;
 }
@@ -2840,6 +2901,7 @@ ExprResult Sema::BuildDeclarationNameExpr(
     // Unresolved using declarations are dependent.
     case Decl::EnumConstant:
     case Decl::UnresolvedUsingValue:
+    case Decl::OMPDeclareReduction:
       valueKind = VK_RValue;
       break;
 
@@ -2877,6 +2939,7 @@ ExprResult Sema::BuildDeclarationNameExpr(
     case Decl::Var:
     case Decl::VarTemplateSpecialization:
     case Decl::VarTemplatePartialSpecialization:
+    case Decl::OMPCapturedExpr:
       // In C, "extern void blah;" is valid and is an r-value.
       if (!getLangOpts().CPlusPlus &&
           !type.hasQualifiers() &&
@@ -3297,12 +3360,21 @@ ExprResult Sema::ActOnNumericConstant(const Token &Tok, Scope *UDLScope) {
 
   if (Literal.isFloatingLiteral()) {
     QualType Ty;
-    if (Literal.isFloat)
+    if (Literal.isHalf){
+      if (getOpenCLOptions().cl_khr_fp16)
+        Ty = Context.HalfTy;
+      else {
+        Diag(Tok.getLocation(), diag::err_half_const_requires_fp16);
+        return ExprError();
+      }
+    } else if (Literal.isFloat)
       Ty = Context.FloatTy;
-    else if (!Literal.isLong)
-      Ty = Context.DoubleTy;
-    else
+    else if (Literal.isLong)
       Ty = Context.LongDoubleTy;
+    else if (Literal.isFloat128)
+      Ty = Context.Float128Ty;
+    else
+      Ty = Context.DoubleTy;
 
     Res = BuildFloatingLiteral(*this, Literal, Ty, Tok.getLocation());
 
@@ -3890,14 +3962,24 @@ Sema::CreateUnaryExprOrTypeTraitExpr(TypeSourceInfo *TInfo,
 
   if (T->isVariablyModifiedType() && FunctionScopes.size() > 1) {
     if (auto *TT = T->getAs<TypedefType>()) {
-      if (auto *CSI = dyn_cast<CapturingScopeInfo>(FunctionScopes.back())) {
+      for (auto I = FunctionScopes.rbegin(),
+                E = std::prev(FunctionScopes.rend());
+           I != E; ++I) {
+        auto *CSI = dyn_cast<CapturingScopeInfo>(*I);
+        if (CSI == nullptr)
+          break;
         DeclContext *DC = nullptr;
-        if (auto LSI = dyn_cast<LambdaScopeInfo>(CSI))
+        if (auto *LSI = dyn_cast<LambdaScopeInfo>(CSI))
           DC = LSI->CallOperator;
-        else if (auto CRSI = dyn_cast<CapturedRegionScopeInfo>(CSI))
+        else if (auto *CRSI = dyn_cast<CapturedRegionScopeInfo>(CSI))
           DC = CRSI->TheCapturedDecl;
-        if (DC && TT->getDecl()->getDeclContext() != DC)
+        else if (auto *BSI = dyn_cast<BlockScopeInfo>(CSI))
+          DC = BSI->TheDecl;
+        if (DC) {
+          if (DC->containsDecl(TT->getDecl()))
+            break;
           captureVariablyModifiedType(Context, T, CSI);
+        }
       }
     }
   }
@@ -4141,12 +4223,18 @@ ExprResult Sema::ActOnOMPArraySectionExpr(Expr *Base, SourceLocation LBLoc,
     ExprResult Result = CheckPlaceholderExpr(LowerBound);
     if (Result.isInvalid())
       return ExprError();
+    Result = DefaultLvalueConversion(Result.get());
+    if (Result.isInvalid())
+      return ExprError();
     LowerBound = Result.get();
   }
   if (Length && Length->getType()->isNonOverloadPlaceholderType()) {
     ExprResult Result = CheckPlaceholderExpr(Length);
     if (Result.isInvalid())
       return ExprError();
+    Result = DefaultLvalueConversion(Result.get());
+    if (Result.isInvalid())
+      return ExprError();
     Length = Result.get();
   }
 
@@ -4253,6 +4341,13 @@ ExprResult Sema::ActOnOMPArraySectionExpr(Expr *Base, SourceLocation LBLoc,
     return ExprError();
   }
 
+  if (!Base->getType()->isSpecificPlaceholderType(
+          BuiltinType::OMPArraySection)) {
+    ExprResult Result = DefaultFunctionArrayLvalueConversion(Base);
+    if (Result.isInvalid())
+      return ExprError();
+    Base = Result.get();
+  }
   return new (Context)
       OMPArraySectionExpr(Base, LowerBound, Length, Context.OMPArraySectionTy,
                           VK_LValue, OK_Ordinary, ColonLoc, RBLoc);
@@ -4427,6 +4522,11 @@ ExprResult Sema::BuildCXXDefaultArgExpr(SourceLocation CallLoc,
                                MutiLevelArgList.getInnermost());
     if (Inst.isInvalid())
       return ExprError();
+    if (Inst.isAlreadyInstantiating()) {
+      Diag(Param->getLocStart(), diag::err_recursive_default_argument) << FD;
+      Param->setInvalidDecl();
+      return ExprError();
+    }
 
     ExprResult Result;
     {
@@ -4466,6 +4566,13 @@ ExprResult Sema::BuildCXXDefaultArgExpr(SourceLocation CallLoc,
     }
   }
 
+  // If the default argument expression is not set yet, we are building it now.
+  if (!Param->hasInit()) {
+    Diag(Param->getLocStart(), diag::err_recursive_default_argument) << FD;
+    Param->setInvalidDecl();
+    return ExprError();
+  }
+
   // If the default expression creates temporaries, we need to
   // push them to the current stack of expression temporaries so they'll
   // be properly destroyed.
@@ -4473,15 +4580,15 @@ ExprResult Sema::BuildCXXDefaultArgExpr(SourceLocation CallLoc,
   // bound temporaries; see the comment in PR5810.
   // We don't need to do that with block decls, though, because
   // blocks in default argument expression can never capture anything.
-  if (isa<ExprWithCleanups>(Param->getInit())) {
+  if (auto Init = dyn_cast<ExprWithCleanups>(Param->getInit())) {
     // Set the "needs cleanups" bit regardless of whether there are
     // any explicit objects.
-    ExprNeedsCleanups = true;
+    Cleanup.setExprNeedsCleanups(Init->cleanupsHaveSideEffects());
 
     // Append all the objects to the cleanup list.  Right now, this
     // should always be a no-op, because blocks in default argument
     // expressions should never be able to capture anything.
-    assert(!cast<ExprWithCleanups>(Param->getInit())->getNumObjects() &&
+    assert(!Init->getNumObjects() &&
            "default argument expression has capturing blocks?");
   }
 
@@ -4866,6 +4973,9 @@ static bool isPlaceholderToRemoveAsArg(QualType type) {
 
   switch (placeholder->getKind()) {
   // Ignore all the non-placeholder types.
+#define IMAGE_TYPE(ImgType, Id, SingletonId, Access, Suffix) \
+  case BuiltinType::Id:
+#include "clang/Basic/OpenCLImageTypes.def"
 #define PLACEHOLDER_TYPE(ID, SINGLETON_ID)
 #define BUILTIN_TYPE(ID, SINGLETON_ID) case BuiltinType::ID:
 #include "clang/AST/BuiltinTypes.def"
@@ -4995,6 +5105,14 @@ static FunctionDecl *rewriteBuiltinFunctionDecl(Sema *Sema, ASTContext &Context,
   return OverloadDecl;
 }
 
+static bool isNumberOfArgsValidForCall(Sema &S, const FunctionDecl *Callee,
+                                       std::size_t NumArgs) {
+  if (S.TooManyArguments(Callee->getNumParams(), NumArgs,
+                         /*PartialOverloading=*/false))
+    return Callee->isVariadic();
+  return Callee->getMinRequiredArguments() <= NumArgs;
+}
+
 /// ActOnCallExpr - Handle a call to Fn with the specified array of arguments.
 /// This provides the location of the left/right parens and a list of comma
 /// locations.
@@ -5032,8 +5150,6 @@ Sema::ActOnCallExpr(Scope *S, Expr *Fn, SourceLocation LParenLoc,
 
     // Determine whether this is a dependent call inside a C++ template,
     // in which case we won't do any semantic analysis now.
-    // FIXME: Will need to cache the results of name lookup (including ADL) in
-    // Fn.
     bool Dependent = false;
     if (Fn->isTypeDependent())
       Dependent = true;
@@ -5126,7 +5242,14 @@ Sema::ActOnCallExpr(Scope *S, Expr *Fn, SourceLocation LParenLoc,
                                            Fn->getLocStart()))
       return ExprError();
 
-    if (FD->hasAttr<EnableIfAttr>()) {
+    // CheckEnableIf assumes that the we're passing in a sane number of args for
+    // FD, but that doesn't always hold true here. This is because, in some
+    // cases, we'll emit a diag about an ill-formed function call, but then
+    // we'll continue on as if the function call wasn't ill-formed. So, if the
+    // number of args looks incorrect, don't do enable_if checks; we should've
+    // already emitted an error about the bad call.
+    if (FD->hasAttr<EnableIfAttr>() &&
+        isNumberOfArgsValidForCall(*this, FD, ArgExprs.size())) {
       if (const EnableIfAttr *Attr = CheckEnableIf(FD, ArgExprs, true)) {
         Diag(Fn->getLocStart(),
              isa<CXXMethodDecl>(FD) ?
@@ -5192,6 +5315,12 @@ Sema::BuildResolvedCallExpr(Expr *Fn, NamedDecl *NDecl,
   FunctionDecl *FDecl = dyn_cast_or_null<FunctionDecl>(NDecl);
   unsigned BuiltinID = (FDecl ? FDecl->getBuiltinID() : 0);
 
+  // Functions with 'interrupt' attribute cannot be called directly.
+  if (FDecl && FDecl->hasAttr<AnyX86InterruptAttr>()) {
+    Diag(Fn->getExprLoc(), diag::err_anyx86_interrupt_called);
+    return ExprError();
+  }
+
   // Promote the function operand.
   // We special-case function promotion here because we only allow promoting
   // builtin functions to function pointers in the callee of a call.
@@ -5474,7 +5603,7 @@ void Sema::maybeExtendBlockObject(ExprResult &E) {
   E = ImplicitCastExpr::Create(Context, E.get()->getType(),
                                CK_ARCExtendBlockObject, E.get(),
                                /*base path*/ nullptr, VK_RValue);
-  ExprNeedsCleanups = true;
+  Cleanup.setExprNeedsCleanups(true);
 }
 
 /// Prepare a conversion of the given expression to an ObjC object
@@ -6122,30 +6251,87 @@ static QualType checkConditionalPointerCompatibility(Sema &S, ExprResult &LHS,
   lhptee = S.Context.getQualifiedType(lhptee.getUnqualifiedType(), lhQual);
   rhptee = S.Context.getQualifiedType(rhptee.getUnqualifiedType(), rhQual);
 
+  // For OpenCL:
+  // 1. If LHS and RHS types match exactly and:
+  //  (a) AS match => use standard C rules, no bitcast or addrspacecast
+  //  (b) AS overlap => generate addrspacecast
+  //  (c) AS don't overlap => give an error
+  // 2. if LHS and RHS types don't match:
+  //  (a) AS match => use standard C rules, generate bitcast
+  //  (b) AS overlap => generate addrspacecast instead of bitcast
+  //  (c) AS don't overlap => give an error
+
+  // For OpenCL, non-null composite type is returned only for cases 1a and 1b.
   QualType CompositeTy = S.Context.mergeTypes(lhptee, rhptee);
 
+  // OpenCL cases 1c, 2a, 2b, and 2c.
   if (CompositeTy.isNull()) {
-    S.Diag(Loc, diag::ext_typecheck_cond_incompatible_pointers)
-      << LHSTy << RHSTy << LHS.get()->getSourceRange()
-      << RHS.get()->getSourceRange();
     // In this situation, we assume void* type. No especially good
     // reason, but this is what gcc does, and we do have to pick
     // to get a consistent AST.
-    QualType incompatTy = S.Context.getPointerType(S.Context.VoidTy);
-    LHS = S.ImpCastExprToType(LHS.get(), incompatTy, CK_BitCast);
-    RHS = S.ImpCastExprToType(RHS.get(), incompatTy, CK_BitCast);
+    QualType incompatTy;
+    if (S.getLangOpts().OpenCL) {
+      // OpenCL v1.1 s6.5 - Conversion between pointers to distinct address
+      // spaces is disallowed.
+      unsigned ResultAddrSpace;
+      if (lhQual.isAddressSpaceSupersetOf(rhQual)) {
+        // Cases 2a and 2b.
+        ResultAddrSpace = lhQual.getAddressSpace();
+      } else if (rhQual.isAddressSpaceSupersetOf(lhQual)) {
+        // Cases 2a and 2b.
+        ResultAddrSpace = rhQual.getAddressSpace();
+      } else {
+        // Cases 1c and 2c.
+        S.Diag(Loc,
+               diag::err_typecheck_op_on_nonoverlapping_address_space_pointers)
+            << LHSTy << RHSTy << 2 << LHS.get()->getSourceRange()
+            << RHS.get()->getSourceRange();
+        return QualType();
+      }
+
+      // Continue handling cases 2a and 2b.
+      incompatTy = S.Context.getPointerType(
+          S.Context.getAddrSpaceQualType(S.Context.VoidTy, ResultAddrSpace));
+      LHS = S.ImpCastExprToType(LHS.get(), incompatTy,
+                                (lhQual.getAddressSpace() != ResultAddrSpace)
+                                    ? CK_AddressSpaceConversion /* 2b */
+                                    : CK_BitCast /* 2a */);
+      RHS = S.ImpCastExprToType(RHS.get(), incompatTy,
+                                (rhQual.getAddressSpace() != ResultAddrSpace)
+                                    ? CK_AddressSpaceConversion /* 2b */
+                                    : CK_BitCast /* 2a */);
+    } else {
+      S.Diag(Loc, diag::ext_typecheck_cond_incompatible_pointers)
+          << LHSTy << RHSTy << LHS.get()->getSourceRange()
+          << RHS.get()->getSourceRange();
+      incompatTy = S.Context.getPointerType(S.Context.VoidTy);
+      LHS = S.ImpCastExprToType(LHS.get(), incompatTy, CK_BitCast);
+      RHS = S.ImpCastExprToType(RHS.get(), incompatTy, CK_BitCast);
+    }
     return incompatTy;
   }
 
   // The pointer types are compatible.
   QualType ResultTy = CompositeTy.withCVRQualifiers(MergedCVRQual);
+  auto LHSCastKind = CK_BitCast, RHSCastKind = CK_BitCast;
   if (IsBlockPointer)
     ResultTy = S.Context.getBlockPointerType(ResultTy);
-  else
+  else {
+    // Cases 1a and 1b for OpenCL.
+    auto ResultAddrSpace = ResultTy.getQualifiers().getAddressSpace();
+    LHSCastKind = lhQual.getAddressSpace() == ResultAddrSpace
+                      ? CK_BitCast /* 1a */
+                      : CK_AddressSpaceConversion /* 1b */;
+    RHSCastKind = rhQual.getAddressSpace() == ResultAddrSpace
+                      ? CK_BitCast /* 1a */
+                      : CK_AddressSpaceConversion /* 1b */;
     ResultTy = S.Context.getPointerType(ResultTy);
+  }
 
-  LHS = S.ImpCastExprToType(LHS.get(), ResultTy, CK_BitCast);
-  RHS = S.ImpCastExprToType(RHS.get(), ResultTy, CK_BitCast);
+  // For case 1a of OpenCL, S.ImpCastExprToType will not insert bitcast
+  // if the target type does not change.
+  LHS = S.ImpCastExprToType(LHS.get(), ResultTy, LHSCastKind);
+  RHS = S.ImpCastExprToType(RHS.get(), ResultTy, RHSCastKind);
   return ResultTy;
 }
 
@@ -6413,6 +6599,18 @@ OpenCLCheckVectorConditional(Sema &S, ExprResult &Cond,
   return OpenCLConvertScalarsToVectors(S, LHS, RHS, CondTy, QuestionLoc);
 }
 
+/// \brief Return true if the Expr is block type
+static bool checkBlockType(Sema &S, const Expr *E) {
+  if (const CallExpr *CE = dyn_cast<CallExpr>(E)) {
+    QualType Ty = CE->getCallee()->getType();
+    if (Ty->isBlockPointerType()) {
+      S.Diag(E->getExprLoc(), diag::err_opencl_ternary_with_block);
+      return true;
+    }
+  }
+  return false;
+}
+
 /// Note that LHS is not null here, even if this is the gnu "x ?: y" extension.
 /// In that case, LHS = cond.
 /// C99 6.5.15
@@ -6462,6 +6660,22 @@ QualType Sema::CheckConditionalOperands(ExprResult &Cond, ExprResult &LHS,
   QualType LHSTy = LHS.get()->getType();
   QualType RHSTy = RHS.get()->getType();
 
+  // Diagnose attempts to convert between __float128 and long double where
+  // such conversions currently can't be handled.
+  if (unsupportedTypeConversion(*this, LHSTy, RHSTy)) {
+    Diag(QuestionLoc,
+         diag::err_typecheck_cond_incompatible_operands) << LHSTy << RHSTy
+      << LHS.get()->getSourceRange() << RHS.get()->getSourceRange();
+    return QualType();
+  }
+
+  // OpenCL v2.0 s6.12.5 - Blocks cannot be used as expressions of the ternary
+  // selection operator (?:).
+  if (getLangOpts().OpenCL &&
+      (checkBlockType(*this, LHS.get()) | checkBlockType(*this, RHS.get()))) {
+    return QualType();
+  }
+
   // If both operands have arithmetic type, do the usual arithmetic conversions
   // to find a common type: C99 6.5.15p3,5.
   if (LHSTy->isArithmeticType() && RHSTy->isArithmeticType()) {
@@ -6804,8 +7018,23 @@ ExprResult Sema::ActOnConditionalOp(SourceLocation QuestionLoc,
     // doesn't handle dependent types properly, so make sure any TypoExprs have
     // been dealt with before checking the operands.
     ExprResult CondResult = CorrectDelayedTyposInExpr(CondExpr);
-    if (!CondResult.isUsable()) return ExprError();
+    ExprResult LHSResult = CorrectDelayedTyposInExpr(LHSExpr);
+    ExprResult RHSResult = CorrectDelayedTyposInExpr(RHSExpr);
+
+    if (!CondResult.isUsable())
+      return ExprError();
+
+    if (LHSExpr) {
+      if (!LHSResult.isUsable())
+        return ExprError();
+    }
+
+    if (!RHSResult.isUsable())
+      return ExprError();
+
     CondExpr = CondResult.get();
+    LHSExpr = LHSResult.get();
+    RHSExpr = RHSResult.get();
   }
 
   // If this is the gnu "x ?: y" extension, analyze the types as though the LHS
@@ -6918,7 +7147,7 @@ checkPointerTypesForAssignment(Sema &S, QualType LHSType, QualType RHSType) {
     else if (lhq.getObjCLifetime() != rhq.getObjCLifetime())
       ConvTy = Sema::IncompatiblePointerDiscardsQualifiers;
     
-    // For GCC compatibility, other qualifier mismatches are treated
+    // For GCC/MS compatibility, other qualifier mismatches are treated
     // as still compatible in C.
     else ConvTy = Sema::CompatiblePointerDiscardsQualifiers;
   }
@@ -7170,9 +7399,30 @@ Sema::CheckAssignmentConstraints(QualType LHSType, ExprResult &RHS,
         return IncompatibleVectors;
       }
     }
+
+    // When the RHS comes from another lax conversion (e.g. binops between
+    // scalars and vectors) the result is canonicalized as a vector. When the
+    // LHS is also a vector, the lax is allowed by the condition above. Handle
+    // the case where LHS is a scalar.
+    if (LHSType->isScalarType()) {
+      const VectorType *VecType = RHSType->getAs<VectorType>();
+      if (VecType && VecType->getNumElements() == 1 &&
+          isLaxVectorConversion(RHSType, LHSType)) {
+        ExprResult *VecExpr = &RHS;
+        *VecExpr = ImpCastExprToType(VecExpr->get(), LHSType, CK_BitCast);
+        Kind = CK_BitCast;
+        return Compatible;
+      }
+    }
+
     return Incompatible;
   }
 
+  // Diagnose attempts to convert between __float128 and long double where
+  // such conversions currently can't be handled.
+  if (unsupportedTypeConversion(*this, LHSType, RHSType))
+    return Incompatible;
+
   // Arithmetic conversions.
   if (LHSType->isArithmeticType() && RHSType->isArithmeticType() &&
       !(getLangOpts().CPlusPlus && LHSType->isEnumeralType())) {
@@ -7539,13 +7789,24 @@ Sema::CheckSingleAssignmentConstraints(QualType LHSType, ExprResult &CallerRHS,
   if (result != Incompatible && RHS.get()->getType() != LHSType) {
     QualType Ty = LHSType.getNonLValueExprType(Context);
     Expr *E = RHS.get();
-    if (getLangOpts().ObjCAutoRefCount)
-      CheckObjCARCConversion(SourceRange(), Ty, E, CCK_ImplicitConversion,
-                             Diagnose, DiagnoseCFAudited);
+
+    // Check for various Objective-C errors. If we are not reporting
+    // diagnostics and just checking for errors, e.g., during overload
+    // resolution, return Incompatible to indicate the failure.
+    if (getLangOpts().ObjCAutoRefCount &&
+        CheckObjCARCConversion(SourceRange(), Ty, E, CCK_ImplicitConversion,
+                               Diagnose, DiagnoseCFAudited) != ACR_okay) {
+      if (!Diagnose)
+        return Incompatible;
+    }
     if (getLangOpts().ObjC1 &&
         (CheckObjCBridgeRelatedConversions(E->getLocStart(), LHSType,
                                            E->getType(), E, Diagnose) ||
          ConversionToObjCStringLiteralCheck(LHSType, E, Diagnose))) {
+      if (!Diagnose)
+        return Incompatible;
+      // Replace the expression with a corrected version and continue so we
+      // can find further errors.
       RHS = E;
       return Compatible;
     }
@@ -7693,14 +7954,16 @@ QualType Sema::CheckVectorOperands(ExprResult &LHS, ExprResult &RHS,
       return RHSType;
   }
 
-  // If we're allowing lax vector conversions, only the total (data) size
-  // needs to be the same.
-  // FIXME: Should we really be allowing this?
-  // FIXME: We really just pick the LHS type arbitrarily?
-  if (isLaxVectorConversion(RHSType, LHSType)) {
-    QualType resultType = LHSType;
-    RHS = ImpCastExprToType(RHS.get(), resultType, CK_BitCast);
-    return resultType;
+  // If we're allowing lax vector conversions, only the total (data) size needs
+  // to be the same. If one of the types is scalar, the result is always the
+  // vector type. Don't allow this if the scalar operand is an lvalue.
+  QualType VecType = LHSVecType ? LHSType : RHSType;
+  QualType ScalarType = LHSVecType ? RHSType : LHSType;
+  ExprResult *ScalarExpr = LHSVecType ? &RHS : &LHS;
+  if (isLaxVectorConversion(ScalarType, VecType) &&
+      !ScalarExpr->get()->isLValue()) {
+    *ScalarExpr = ImpCastExprToType(ScalarExpr->get(), VecType, CK_BitCast);
+    return VecType;
   }
 
   // Okay, the expression is invalid.
@@ -8309,7 +8572,7 @@ static void DiagnoseBadShiftValues(Sema& S, ExprResult &LHS, ExprResult &RHS,
 
   // If LHS does not have a signed type and non-negative value
   // then, the behavior is undefined. Warn about it.
-  if (Left.isNegative()) {
+  if (Left.isNegative() && !S.getLangOpts().isSignedOverflowDefined()) {
     S.DiagRuntimeBehavior(Loc, LHS.get(),
                           S.PDiag(diag::warn_shift_lhs_negative)
                             << LHS.get()->getSourceRange());
@@ -9244,7 +9507,7 @@ QualType Sema::CheckVectorCompareOperands(ExprResult &LHS, ExprResult &RHS,
   }
   
   // Return a signed type for the vector.
-  return GetSignedVectorType(LHSType);
+  return GetSignedVectorType(vType);
 }
 
 QualType Sema::CheckVectorLogicalOperands(ExprResult &LHS, ExprResult &RHS,
@@ -9411,7 +9674,16 @@ static NonConstCaptureKind isReferenceToNonConstCapture(Sema &S, Expr *E) {
 
   // Decide whether the first capture was for a block or a lambda.
   DeclContext *DC = S.CurContext, *Prev = nullptr;
-  while (DC != var->getDeclContext()) {
+  // Decide whether the first capture was for a block or a lambda.
+  while (DC) {
+    // For init-capture, it is possible that the variable belongs to the
+    // template pattern of the current context.
+    if (auto *FD = dyn_cast<FunctionDecl>(DC))
+      if (var->isInitCapture() &&
+          FD->getTemplateInstantiationPattern() == var->getDeclContext())
+        break;
+    if (DC == var->getDeclContext())
+      break;
     Prev = DC;
     DC = DC->getParent();
   }
@@ -9558,6 +9830,9 @@ static void DiagnoseConstAssignment(Sema &S, const Expr *E,
 /// emit an error and return true.  If so, return false.
 static bool CheckForModifiableLvalue(Expr *E, SourceLocation Loc, Sema &S) {
   assert(!E->hasPlaceholderType(BuiltinType::PseudoObject));
+
+  S.CheckShadowingDeclModification(E, Loc);
+
   SourceLocation OrigLoc = Loc;
   Expr::isModifiableLvalueResult IsLV = E->isModifiableLvalue(S.Context,
                                                               &Loc);
@@ -9798,6 +10073,67 @@ QualType Sema::CheckAssignmentOperands(Expr *LHSExpr, ExprResult &RHS,
           ? LHSType : LHSType.getUnqualifiedType());
 }
 
+// Only ignore explicit casts to void.
+static bool IgnoreCommaOperand(const Expr *E) {
+  E = E->IgnoreParens();
+
+  if (const CastExpr *CE = dyn_cast<CastExpr>(E)) {
+    if (CE->getCastKind() == CK_ToVoid) {
+      return true;
+    }
+  }
+
+  return false;
+}
+
+// Look for instances where it is likely the comma operator is confused with
+// another operator.  There is a whitelist of acceptable expressions for the
+// left hand side of the comma operator, otherwise emit a warning.
+void Sema::DiagnoseCommaOperator(const Expr *LHS, SourceLocation Loc) {
+  // No warnings in macros
+  if (Loc.isMacroID())
+    return;
+
+  // Don't warn in template instantiations.
+  if (!ActiveTemplateInstantiations.empty())
+    return;
+
+  // Scope isn't fine-grained enough to whitelist the specific cases, so
+  // instead, skip more than needed, then call back into here with the
+  // CommaVisitor in SemaStmt.cpp.
+  // The whitelisted locations are the initialization and increment portions
+  // of a for loop.  The additional checks are on the condition of
+  // if statements, do/while loops, and for loops.
+  const unsigned ForIncrementFlags =
+      Scope::ControlScope | Scope::ContinueScope | Scope::BreakScope;
+  const unsigned ForInitFlags = Scope::ControlScope | Scope::DeclScope;
+  const unsigned ScopeFlags = getCurScope()->getFlags();
+  if ((ScopeFlags & ForIncrementFlags) == ForIncrementFlags ||
+      (ScopeFlags & ForInitFlags) == ForInitFlags)
+    return;
+
+  // If there are multiple comma operators used together, get the RHS of the
+  // of the comma operator as the LHS.
+  while (const BinaryOperator *BO = dyn_cast<BinaryOperator>(LHS)) {
+    if (BO->getOpcode() != BO_Comma)
+      break;
+    LHS = BO->getRHS();
+  }
+
+  // Only allow some expressions on LHS to not warn.
+  if (IgnoreCommaOperand(LHS))
+    return;
+
+  Diag(Loc, diag::warn_comma_operator);
+  Diag(LHS->getLocStart(), diag::note_cast_to_void)
+      << LHS->getSourceRange()
+      << FixItHint::CreateInsertion(LHS->getLocStart(),
+                                    LangOpts.CPlusPlus ? "static_cast<void>("
+                                                       : "(void)(")
+      << FixItHint::CreateInsertion(PP.getLocForEndOfToken(LHS->getLocEnd()),
+                                    ")");
+}
+
 // C99 6.5.17
 static QualType CheckCommaOperands(Sema &S, ExprResult &LHS, ExprResult &RHS,
                                    SourceLocation Loc) {
@@ -9827,6 +10163,9 @@ static QualType CheckCommaOperands(Sema &S, ExprResult &LHS, ExprResult &RHS,
                             diag::err_incomplete_type);
   }
 
+  if (!S.getDiagnostics().isIgnored(diag::warn_comma_operator, Loc))
+    S.DiagnoseCommaOperator(LHS.get(), Loc);
+
   return RHS.get()->getType();
 }
 
@@ -10075,8 +10414,8 @@ QualType Sema::CheckAddressOfOperand(ExprResult &OrigOp, SourceLocation OpLoc) {
     if (sfinae)
       return QualType();
     // Materialize the temporary as an lvalue so that we can take its address.
-    OrigOp = op = new (Context)
-        MaterializeTemporaryExpr(op->getType(), OrigOp.get(), true);
+    OrigOp = op =
+        CreateMaterializeTemporaryExpr(op->getType(), OrigOp.get(), true);
   } else if (isa<ObjCSelectorExpr>(op)) {
     return Context.getPointerType(op->getType());
   } else if (lval == Expr::LV_MemberFunction) {
@@ -10199,6 +10538,7 @@ QualType Sema::CheckAddressOfOperand(ExprResult &OrigOp, SourceLocation OpLoc) {
   // If the operand has type "type", the result has type "pointer to type".
   if (op->getType()->isObjCObjectType())
     return Context.getObjCObjectPointerType(op->getType());
+
   return Context.getPointerType(op->getType());
 }
 
@@ -10240,7 +10580,9 @@ static QualType CheckIndirectionOperand(Sema &S, Expr *Op, ExprValueKind &VK,
   }
 
   if (const PointerType *PT = OpTy->getAs<PointerType>())
+  {
     Result = PT->getPointeeType();
+  }
   else if (const ObjCObjectPointerType *OPT =
              OpTy->getAs<ObjCObjectPointerType>())
     Result = OPT->getPointeeType();
@@ -10478,10 +10820,11 @@ ExprResult Sema::CreateBuiltinBinOp(SourceLocation OpLoc,
   }
 
   if (getLangOpts().OpenCL) {
+    QualType LHSTy = LHSExpr->getType();
+    QualType RHSTy = RHSExpr->getType();
     // OpenCLC v2.0 s6.13.11.1 allows atomic variables to be initialized by
     // the ATOMIC_VAR_INIT macro.
-    if (LHSExpr->getType()->isAtomicType() ||
-        RHSExpr->getType()->isAtomicType()) {
+    if (LHSTy->isAtomicType() || RHSTy->isAtomicType()) {
       SourceRange SR(LHSExpr->getLocStart(), RHSExpr->getLocEnd());
       if (BO_Assign == Opc)
         Diag(OpLoc, diag::err_atomic_init_constant) << SR;
@@ -10489,6 +10832,16 @@ ExprResult Sema::CreateBuiltinBinOp(SourceLocation OpLoc,
         ResultTy = InvalidOperands(OpLoc, LHS, RHS);
       return ExprError();
     }
+
+    // OpenCL special types - image, sampler, pipe, and blocks are to be used
+    // only with a builtin functions and therefore should be disallowed here.
+    if (LHSTy->isImageType() || RHSTy->isImageType() ||
+        LHSTy->isSamplerT() || RHSTy->isSamplerT() ||
+        LHSTy->isPipeType() || RHSTy->isPipeType() ||
+        LHSTy->isBlockPointerType() || RHSTy->isBlockPointerType()) {
+      ResultTy = InvalidOperands(OpLoc, LHS, RHS);
+      return ExprError();
+    }
   }
 
   switch (Opc) {
@@ -10959,8 +11312,13 @@ ExprResult Sema::CreateBuiltinUnaryOp(SourceLocation OpLoc,
   ExprObjectKind OK = OK_Ordinary;
   QualType resultType;
   if (getLangOpts().OpenCL) {
+    QualType Ty = InputExpr->getType();
     // The only legal unary operation for atomics is '&'.
-    if (Opc != UO_AddrOf && InputExpr->getType()->isAtomicType()) {
+    if ((Opc != UO_AddrOf && Ty->isAtomicType()) ||
+    // OpenCL special types - image, sampler, pipe, and blocks are to be used
+    // only with a builtin functions and therefore should be disallowed here.
+        (Ty->isImageType() || Ty->isSamplerT() || Ty->isPipeType()
+        || Ty->isBlockPointerType())) {
       return ExprError(Diag(OpLoc, diag::err_typecheck_unary_expr)
                        << InputExpr->getType()
                        << Input.get()->getSourceRange());
@@ -11273,7 +11631,8 @@ Sema::ActOnStmtExpr(SourceLocation LPLoc, Stmt *SubStmt,
 
   if (hasAnyUnrecoverableErrorsInThisFunction())
     DiscardCleanupsInEvaluationContext();
-  assert(!ExprNeedsCleanups && "cleanups within StmtExpr not correctly bound!");
+  assert(!Cleanup.exprNeedsCleanups() &&
+         "cleanups within StmtExpr not correctly bound!");
   PopExpressionEvaluationContext();
 
   // FIXME: there are a variety of strange constraints to enforce here, for
@@ -11697,8 +12056,7 @@ void Sema::ActOnBlockArguments(SourceLocation CaretLoc, Declarator &ParamInfo,
   // Set the parameters on the block decl.
   if (!Params.empty()) {
     CurBlock->TheDecl->setParams(Params);
-    CheckParmsForFunctionDef(CurBlock->TheDecl->param_begin(),
-                             CurBlock->TheDecl->param_end(),
+    CheckParmsForFunctionDef(CurBlock->TheDecl->parameters(),
                              /*CheckParameterNames=*/false);
   }
   
@@ -11706,7 +12064,7 @@ void Sema::ActOnBlockArguments(SourceLocation CaretLoc, Declarator &ParamInfo,
   ProcessDeclAttributes(CurScope, CurBlock->TheDecl, ParamInfo);
 
   // Put the parameter variables in scope.
-  for (auto AI : CurBlock->TheDecl->params()) {
+  for (auto AI : CurBlock->TheDecl->parameters()) {
     AI->setOwningFunction(CurBlock->TheDecl);
 
     // If this has an identifier, add it to the scope stack.
@@ -11736,12 +12094,13 @@ ExprResult Sema::ActOnBlockStmtExpr(SourceLocation CaretLoc,
                                     Stmt *Body, Scope *CurScope) {
   // If blocks are disabled, emit an error.
   if (!LangOpts.Blocks)
-    Diag(CaretLoc, diag::err_blocks_disable);
+    Diag(CaretLoc, diag::err_blocks_disable) << LangOpts.OpenCL;
 
   // Leave the expression-evaluation context.
   if (hasAnyUnrecoverableErrorsInThisFunction())
     DiscardCleanupsInEvaluationContext();
-  assert(!ExprNeedsCleanups && "cleanups within block not correctly bound!");
+  assert(!Cleanup.exprNeedsCleanups() &&
+         "cleanups within block not correctly bound!");
   PopExpressionEvaluationContext();
 
   BlockScopeInfo *BSI = cast<BlockScopeInfo>(FunctionScopes.back());
@@ -11805,8 +12164,7 @@ ExprResult Sema::ActOnBlockStmtExpr(SourceLocation CaretLoc,
     BlockTy = Context.getFunctionType(RetTy, None, EPI);
   }
 
-  DiagnoseUnusedParameters(BSI->TheDecl->param_begin(),
-                           BSI->TheDecl->param_end());
+  DiagnoseUnusedParameters(BSI->TheDecl->parameters());
   BlockTy = Context.getBlockPointerType(BlockTy);
 
   // If needed, diagnose invalid gotos and switches in the block.
@@ -11832,7 +12190,7 @@ ExprResult Sema::ActOnBlockStmtExpr(SourceLocation CaretLoc,
   if (Result->getBlockDecl()->hasCaptures()) {
     // First, this expression has a new cleanup object.
     ExprCleanupObjects.push_back(Result->getBlockDecl());
-    ExprNeedsCleanups = true;
+    Cleanup.setExprNeedsCleanups(true);
 
     // It also gets a branch-protected scope if any of the captured
     // variables needs destruction.
@@ -11848,9 +12206,8 @@ ExprResult Sema::ActOnBlockStmtExpr(SourceLocation CaretLoc,
   return Result;
 }
 
-ExprResult Sema::ActOnVAArg(SourceLocation BuiltinLoc,
-                                        Expr *E, ParsedType Ty,
-                                        SourceLocation RPLoc) {
+ExprResult Sema::ActOnVAArg(SourceLocation BuiltinLoc, Expr *E, ParsedType Ty,
+                            SourceLocation RPLoc) {
   TypeSourceInfo *TInfo;
   GetTypeFromParser(Ty, &TInfo);
   return BuildVAArgExpr(BuiltinLoc, E, TInfo, RPLoc);
@@ -11862,6 +12219,15 @@ ExprResult Sema::BuildVAArgExpr(SourceLocation BuiltinLoc,
   Expr *OrigExpr = E;
   bool IsMS = false;
 
+  // CUDA device code does not support varargs.
+  if (getLangOpts().CUDA && getLangOpts().CUDAIsDevice) {
+    if (const FunctionDecl *F = dyn_cast<FunctionDecl>(CurContext)) {
+      CUDAFunctionTarget T = IdentifyCUDATarget(F);
+      if (T == CFT_Global || T == CFT_Device || T == CFT_HostDevice)
+        return ExprError(Diag(E->getLocStart(), diag::err_va_arg_in_device));
+    }
+  }
+
   // It might be a __builtin_ms_va_list. (But don't ever mark a va_arg()
   // as Microsoft ABI on an actual Microsoft platform, where
   // __builtin_ms_va_list and __builtin_va_list are the same.)
@@ -12000,10 +12366,11 @@ bool Sema::ConversionToObjCStringLiteralCheck(QualType DstType, Expr *&Exp,
   StringLiteral *SL = dyn_cast<StringLiteral>(SrcExpr);
   if (!SL || !SL->isAscii())
     return false;
-  if (Diagnose)
+  if (Diagnose) {
     Diag(SL->getLocStart(), diag::err_missing_atsign_prefix)
       << FixItHint::CreateInsertion(SL->getLocStart(), "@");
-  Exp = BuildObjCStringLiteral(SL->getLocStart(), SL).get();
+    Exp = BuildObjCStringLiteral(SL->getLocStart(), SL).get();
+  }
   return true;
 }
 
@@ -12460,10 +12827,9 @@ void
 Sema::PushExpressionEvaluationContext(ExpressionEvaluationContext NewContext,
                                       Decl *LambdaContextDecl,
                                       bool IsDecltype) {
-  ExprEvalContexts.emplace_back(NewContext, ExprCleanupObjects.size(),
-                                ExprNeedsCleanups, LambdaContextDecl,
-                                IsDecltype);
-  ExprNeedsCleanups = false;
+  ExprEvalContexts.emplace_back(NewContext, ExprCleanupObjects.size(), Cleanup,
+                                LambdaContextDecl, IsDecltype);
+  Cleanup.reset();
   if (!MaybeODRUseExprs.empty())
     std::swap(MaybeODRUseExprs, ExprEvalContexts.back().SavedMaybeODRUseExprs);
 }
@@ -12514,12 +12880,12 @@ void Sema::PopExpressionEvaluationContext() {
   if (Rec.isUnevaluated() || Rec.Context == ConstantEvaluated) {
     ExprCleanupObjects.erase(ExprCleanupObjects.begin() + Rec.NumCleanupObjects,
                              ExprCleanupObjects.end());
-    ExprNeedsCleanups = Rec.ParentNeedsCleanups;
+    Cleanup = Rec.ParentCleanup;
     CleanupVarDeclMarking();
     std::swap(MaybeODRUseExprs, Rec.SavedMaybeODRUseExprs);
   // Otherwise, merge the contexts together.
   } else {
-    ExprNeedsCleanups |= Rec.ParentNeedsCleanups;
+    Cleanup.mergeFrom(Rec.ParentCleanup);
     MaybeODRUseExprs.insert(Rec.SavedMaybeODRUseExprs.begin(),
                             Rec.SavedMaybeODRUseExprs.end());
   }
@@ -12538,7 +12904,7 @@ void Sema::DiscardCleanupsInEvaluationContext() {
   ExprCleanupObjects.erase(
          ExprCleanupObjects.begin() + ExprEvalContexts.back().NumCleanupObjects,
          ExprCleanupObjects.end());
-  ExprNeedsCleanups = false;
+  Cleanup.reset();
   MaybeODRUseExprs.clear();
 }
 
@@ -12563,6 +12929,11 @@ static bool IsPotentiallyEvaluatedContext(Sema &SemaRef) {
       // definition of a null pointer constant is completely crazy.)
       return false;
 
+    case Sema::DiscardedStatement:
+      // These are technically a potentially evaluated but they have the effect
+      // of suppressing use marking.
+      return false;
+
     case Sema::ConstantEvaluated:
     case Sema::PotentiallyEvaluated:
       // We are in a potentially evaluated expression (or a constant-expression
@@ -12581,7 +12952,7 @@ static bool IsPotentiallyEvaluatedContext(Sema &SemaRef) {
 /// \brief Mark a function referenced, and check whether it is odr-used
 /// (C++ [basic.def.odr]p2, C99 6.9p3)
 void Sema::MarkFunctionReferenced(SourceLocation Loc, FunctionDecl *Func,
-                                  bool OdrUse) {
+                                  bool MightBeOdrUse) {
   assert(Func && "No function?");
 
   Func->setReferenced();
@@ -12592,39 +12963,53 @@ void Sema::MarkFunctionReferenced(SourceLocation Loc, FunctionDecl *Func,
   //   set of overloaded functions [...].
   //
   // We (incorrectly) mark overload resolution as an unevaluated context, so we
-  // can just check that here. Skip the rest of this function if we've already
-  // marked the function as used.
-  if (Func->isUsed(/*CheckUsedAttr=*/false) ||
-      !IsPotentiallyEvaluatedContext(*this)) {
-    // C++11 [temp.inst]p3:
-    //   Unless a function template specialization has been explicitly
-    //   instantiated or explicitly specialized, the function template
-    //   specialization is implicitly instantiated when the specialization is
-    //   referenced in a context that requires a function definition to exist.
-    //
-    // We consider constexpr function templates to be referenced in a context
-    // that requires a definition to exist whenever they are referenced.
-    //
-    // FIXME: This instantiates constexpr functions too frequently. If this is
-    // really an unevaluated context (and we're not just in the definition of a
-    // function template or overload resolution or other cases which we
-    // incorrectly consider to be unevaluated contexts), and we're not in a
-    // subexpression which we actually need to evaluate (for instance, a
-    // template argument, array bound or an expression in a braced-init-list),
-    // we are not permitted to instantiate this constexpr function definition.
-    //
-    // FIXME: This also implicitly defines special members too frequently. They
-    // are only supposed to be implicitly defined if they are odr-used, but they
-    // are not odr-used from constant expressions in unevaluated contexts.
-    // However, they cannot be referenced if they are deleted, and they are
-    // deleted whenever the implicit definition of the special member would
-    // fail.
-    if (!Func->isConstexpr() || Func->getBody())
-      return;
-    CXXMethodDecl *MD = dyn_cast<CXXMethodDecl>(Func);
-    if (!Func->isImplicitlyInstantiable() && (!MD || MD->isUserProvided()))
-      return;
-  }
+  // can just check that here.
+  bool OdrUse = MightBeOdrUse && IsPotentiallyEvaluatedContext(*this);
+
+  // Determine whether we require a function definition to exist, per
+  // C++11 [temp.inst]p3:
+  //   Unless a function template specialization has been explicitly
+  //   instantiated or explicitly specialized, the function template
+  //   specialization is implicitly instantiated when the specialization is
+  //   referenced in a context that requires a function definition to exist.
+  //
+  // We consider constexpr function templates to be referenced in a context
+  // that requires a definition to exist whenever they are referenced.
+  //
+  // FIXME: This instantiates constexpr functions too frequently. If this is
+  // really an unevaluated context (and we're not just in the definition of a
+  // function template or overload resolution or other cases which we
+  // incorrectly consider to be unevaluated contexts), and we're not in a
+  // subexpression which we actually need to evaluate (for instance, a
+  // template argument, array bound or an expression in a braced-init-list),
+  // we are not permitted to instantiate this constexpr function definition.
+  //
+  // FIXME: This also implicitly defines special members too frequently. They
+  // are only supposed to be implicitly defined if they are odr-used, but they
+  // are not odr-used from constant expressions in unevaluated contexts.
+  // However, they cannot be referenced if they are deleted, and they are
+  // deleted whenever the implicit definition of the special member would
+  // fail (with very few exceptions).
+  CXXMethodDecl *MD = dyn_cast<CXXMethodDecl>(Func);
+  bool NeedDefinition =
+      OdrUse || (Func->isConstexpr() && (Func->isImplicitlyInstantiable() ||
+                                         (MD && !MD->isUserProvided())));
+
+  // C++14 [temp.expl.spec]p6:
+  //   If a template [...] is explicitly specialized then that specialization
+  //   shall be declared before the first use of that specialization that would
+  //   cause an implicit instantiation to take place, in every translation unit
+  //   in which such a use occurs
+  if (NeedDefinition &&
+      (Func->getTemplateSpecializationKind() != TSK_Undeclared ||
+       Func->getMemberSpecializationInfo()))
+    checkSpecializationVisibility(Loc, Func);
+
+  // If we don't need to mark the function as used, and we don't need to
+  // try to provide a definition, there's nothing more to do.
+  if ((Func->isUsed(/*CheckUsedAttr=*/false) || !OdrUse) &&
+      (!NeedDefinition || Func->getBody()))
+    return;
 
   // Note that this declaration has been used.
   if (CXXConstructorDecl *Constructor = dyn_cast<CXXConstructorDecl>(Func)) {
@@ -12659,7 +13044,7 @@ void Sema::MarkFunctionReferenced(SourceLocation Loc, FunctionDecl *Func,
       if (MethodDecl->isDefaulted() && !MethodDecl->isDeleted()) {
         if (MethodDecl->isCopyAssignmentOperator())
           DefineImplicitCopyAssignment(Loc, MethodDecl);
-        else
+        else if (MethodDecl->isMoveAssignmentOperator())
           DefineImplicitMoveAssignment(Loc, MethodDecl);
       }
     } else if (isa<CXXConversionDecl>(MethodDecl) &&
@@ -12684,8 +13069,6 @@ void Sema::MarkFunctionReferenced(SourceLocation Loc, FunctionDecl *Func,
   if (FPT && isUnresolvedExceptionSpec(FPT->getExceptionSpecType()))
     ResolveExceptionSpec(Loc, FPT);
 
-  if (!OdrUse) return;
-
   // Implicit instantiation of function templates and member functions of
   // class templates.
   if (Func->isImplicitlyInstantiable()) {
@@ -12733,10 +13116,12 @@ void Sema::MarkFunctionReferenced(SourceLocation Loc, FunctionDecl *Func,
     // Walk redefinitions, as some of them may be instantiable.
     for (auto i : Func->redecls()) {
       if (!i->isUsed(false) && i->isImplicitlyInstantiable())
-        MarkFunctionReferenced(Loc, i);
+        MarkFunctionReferenced(Loc, i, OdrUse);
     }
   }
 
+  if (!OdrUse) return;
+
   // Keep track of used but undefined functions.
   if (!Func->isDefined()) {
     if (mightHaveNonExternalLinkage(Func))
@@ -12747,17 +13132,7 @@ void Sema::MarkFunctionReferenced(SourceLocation Loc, FunctionDecl *Func,
       UndefinedButUsed.insert(std::make_pair(Func->getCanonicalDecl(), Loc));
   }
 
-  // Normally the most current decl is marked used while processing the use and
-  // any subsequent decls are marked used by decl merging. This fails with
-  // template instantiation since marking can happen at the end of the file
-  // and, because of the two phase lookup, this function is called with at
-  // decl in the middle of a decl chain. We loop to maintain the invariant
-  // that once a decl is used, all decls after it are also used.
-  for (FunctionDecl *F = Func->getMostRecentDecl();; F = F->getPreviousDecl()) {
-    F->markUsed(Context);
-    if (F == Func)
-      break;
-  }
+  Func->markUsed(Context);
 }
 
 static void
@@ -12945,7 +13320,8 @@ static bool captureInBlock(BlockScopeInfo *BSI, VarDecl *Var,
     return false;
   }
   const bool HasBlocksAttr = Var->hasAttr<BlocksAttr>();
-  if (HasBlocksAttr || CaptureType->isReferenceType()) {
+  if (HasBlocksAttr || CaptureType->isReferenceType() ||
+      (S.getLangOpts().OpenMP && S.IsOpenMPCapturedDecl(Var))) {
     // Block capture by reference does not change the capture or
     // declaration reference types.
     ByRef = true;
@@ -13013,14 +13389,13 @@ static bool captureInCapturedRegion(CapturedRegionScopeInfo *RSI,
                                     QualType &DeclRefType, 
                                     const bool RefersToCapturedVariable,
                                     Sema &S) {
-  
   // By default, capture variables by reference.
   bool ByRef = true;
   // Using an LValue reference type is consistent with Lambdas (see below).
-  if (S.getLangOpts().OpenMP) {
-    ByRef = S.IsOpenMPCapturedByRef(Var, RSI);
-    if (S.IsOpenMPCapturedVar(Var))
+  if (S.getLangOpts().OpenMP && RSI->CapRegionKind == CR_OpenMP) {
+    if (S.IsOpenMPCapturedDecl(Var))
       DeclRefType = DeclRefType.getUnqualifiedType();
+    ByRef = S.IsOpenMPCapturedByRef(Var, RSI->OpenMPLevel);
   }
 
   if (ByRef)
@@ -13060,7 +13435,7 @@ static bool captureInCapturedRegion(CapturedRegionScopeInfo *RSI,
 
 /// \brief Create a field within the lambda class for the variable
 /// being captured.
-static void addAsFieldToClosureType(Sema &S, LambdaScopeInfo *LSI, VarDecl *Var,
+static void addAsFieldToClosureType(Sema &S, LambdaScopeInfo *LSI, 
                                     QualType FieldType, QualType DeclRefType,
                                     SourceLocation Loc,
                                     bool RefersToCapturedVariable) {
@@ -13154,7 +13529,7 @@ static bool captureInLambda(LambdaScopeInfo *LSI,
 
   // Capture this variable in the lambda.
   if (BuildAndDiagnose)
-    addAsFieldToClosureType(S, LSI, Var, CaptureType, DeclRefType, Loc,
+    addAsFieldToClosureType(S, LSI, CaptureType, DeclRefType, Loc,
                             RefersToCapturedVariable);
     
   // Compute the type of a reference to this captured variable.
@@ -13210,7 +13585,7 @@ bool Sema::tryCaptureVariable(
   // Capture global variables if it is required to use private copy of this
   // variable.
   bool IsGlobal = !Var->hasLocalStorage();
-  if (IsGlobal && !(LangOpts.OpenMP && IsOpenMPCapturedVar(Var)))
+  if (IsGlobal && !(LangOpts.OpenMP && IsOpenMPCapturedDecl(Var)))
     return true;
 
   // Walk up the stack to determine whether we can capture the variable,
@@ -13226,7 +13601,6 @@ bool Sema::tryCaptureVariable(
   bool Nested = false;
   bool Explicit = (Kind != TryCapture_Implicit);
   unsigned FunctionScopesIndex = MaxFunctionScopesIndex;
-  unsigned OpenMPLevel = 0;
   do {
     // Only block literals, captured statements, and lambda expressions can
     // capture; other scopes don't work.
@@ -13292,20 +13666,19 @@ bool Sema::tryCaptureVariable(
         // just break here. Similarly, global variables that are captured in a
         // target region should not be captured outside the scope of the region.
         if (RSI->CapRegionKind == CR_OpenMP) {
-          auto isTargetCap = isOpenMPTargetCapturedVar(Var, OpenMPLevel);
+          auto IsTargetCap = isOpenMPTargetCapturedDecl(Var, RSI->OpenMPLevel);
           // When we detect target captures we are looking from inside the
           // target region, therefore we need to propagate the capture from the
           // enclosing region. Therefore, the capture is not initially nested.
-          if (isTargetCap)
+          if (IsTargetCap)
             FunctionScopesIndex--;
 
-          if (isTargetCap || isOpenMPPrivateVar(Var, OpenMPLevel)) {
-            Nested = !isTargetCap;
+          if (IsTargetCap || isOpenMPPrivateDecl(Var, RSI->OpenMPLevel)) {
+            Nested = !IsTargetCap;
             DeclRefType = DeclRefType.getUnqualifiedType();
             CaptureType = Context.getLValueReferenceType(DeclRefType);
             break;
           }
-          ++OpenMPLevel;
         }
       }
     }
@@ -13316,8 +13689,9 @@ bool Sema::tryCaptureVariable(
         Diag(ExprLoc, diag::err_lambda_impcap) << Var->getDeclName();
         Diag(Var->getLocation(), diag::note_previous_decl) 
           << Var->getDeclName();
-        Diag(cast<LambdaScopeInfo>(CSI)->Lambda->getLocStart(),
-             diag::note_lambda_decl);
+        if (cast<LambdaScopeInfo>(CSI)->Lambda)
+          Diag(cast<LambdaScopeInfo>(CSI)->Lambda->getLocStart(),
+               diag::note_lambda_decl);
         // FIXME: If we error out because an outer lambda can not implicitly
         // capture a variable that an inner lambda explicitly captures, we
         // should have the inner lambda do the explicit capture - because
@@ -13511,7 +13885,8 @@ static void DoMarkVarDeclReferenced(Sema &SemaRef, SourceLocation Loc,
         (SemaRef.CurContext != Var->getDeclContext() &&
          Var->getDeclContext()->isFunctionOrMethod() && Var->hasLocalStorage());
     if (RefersToEnclosingScope) {
-      if (LambdaScopeInfo *const LSI = SemaRef.getCurLambda()) {
+      if (LambdaScopeInfo *const LSI =
+              SemaRef.getCurLambda(/*IgnoreCapturedRegions=*/true)) {
         // If a variable could potentially be odr-used, defer marking it so
         // until we finish analyzing the full expression for any
         // lvalue-to-rvalue
@@ -13539,6 +13914,12 @@ static void DoMarkVarDeclReferenced(Sema &SemaRef, SourceLocation Loc,
   assert(!isa<VarTemplatePartialSpecializationDecl>(Var) &&
          "Can't instantiate a partial template specialization.");
 
+  // If this might be a member specialization of a static data member, check
+  // the specialization is visible. We already did the checks for variable
+  // template specializations when we created them.
+  if (TSK != TSK_Undeclared && !isa<VarTemplateSpecializationDecl>(Var))
+    SemaRef.checkSpecializationVisibility(Loc, Var);
+
   // Perform implicit instantiation of static data members, static data member
   // templates of class templates, and variable template specializations. Delay
   // instantiations of variable templates, except for those that could be used
@@ -13582,7 +13963,8 @@ static void DoMarkVarDeclReferenced(Sema &SemaRef, SourceLocation Loc,
     }
   }
 
-  if(!MarkODRUsed) return;
+  if (!MarkODRUsed)
+    return;
 
   // Per C++11 [basic.def.odr], a variable is odr-used "unless it satisfies
   // the requirements for appearing in a constant expression (5.19) and, if
@@ -13610,13 +13992,16 @@ void Sema::MarkVariableReferenced(SourceLocation Loc, VarDecl *Var) {
 }
 
 static void MarkExprReferenced(Sema &SemaRef, SourceLocation Loc,
-                               Decl *D, Expr *E, bool OdrUse) {
+                               Decl *D, Expr *E, bool MightBeOdrUse) {
+  if (SemaRef.isInOpenMPDeclareTargetContext())
+    SemaRef.checkDeclIsAllowedInOpenMPTarget(E, D);
+
   if (VarDecl *Var = dyn_cast<VarDecl>(D)) {
     DoMarkVarDeclReferenced(SemaRef, Loc, Var, E);
     return;
   }
 
-  SemaRef.MarkAnyDeclReferenced(Loc, D, OdrUse);
+  SemaRef.MarkAnyDeclReferenced(Loc, D, MightBeOdrUse);
 
   // If this is a call to a method via a cast, also mark the method in the
   // derived class used in case codegen can devirtualize the call.
@@ -13638,7 +14023,7 @@ static void MarkExprReferenced(Sema &SemaRef, SourceLocation Loc,
   CXXMethodDecl *DM = MD->getCorrespondingMethodInClass(MostDerivedClassDecl);
   if (!DM || DM->isPure())
     return;
-  SemaRef.MarkAnyDeclReferenced(Loc, DM, OdrUse);
+  SemaRef.MarkAnyDeclReferenced(Loc, DM, MightBeOdrUse);
 } 
 
 /// \brief Perform reference-marking and odr-use handling for a DeclRefExpr.
@@ -13661,30 +14046,31 @@ void Sema::MarkMemberReferenced(MemberExpr *E) {
   //   overload resolution when referred to from a potentially-evaluated
   //   expression, is odr-used, unless it is a pure virtual function and its
   //   name is not explicitly qualified.
-  bool OdrUse = true;
+  bool MightBeOdrUse = true;
   if (E->performsVirtualDispatch(getLangOpts())) {
     if (CXXMethodDecl *Method = dyn_cast<CXXMethodDecl>(E->getMemberDecl()))
       if (Method->isPure())
-        OdrUse = false;
+        MightBeOdrUse = false;
   }
   SourceLocation Loc = E->getMemberLoc().isValid() ?
                             E->getMemberLoc() : E->getLocStart();
-  MarkExprReferenced(*this, Loc, E->getMemberDecl(), E, OdrUse);
+  MarkExprReferenced(*this, Loc, E->getMemberDecl(), E, MightBeOdrUse);
 }
 
 /// \brief Perform marking for a reference to an arbitrary declaration.  It
 /// marks the declaration referenced, and performs odr-use checking for
 /// functions and variables. This method should not be used when building a
 /// normal expression which refers to a variable.
-void Sema::MarkAnyDeclReferenced(SourceLocation Loc, Decl *D, bool OdrUse) {
-  if (OdrUse) {
+void Sema::MarkAnyDeclReferenced(SourceLocation Loc, Decl *D,
+                                 bool MightBeOdrUse) {
+  if (MightBeOdrUse) {
     if (auto *VD = dyn_cast<VarDecl>(D)) {
       MarkVariableReferenced(Loc, VD);
       return;
     }
   }
   if (auto *FD = dyn_cast<FunctionDecl>(D)) {
-    MarkFunctionReferenced(Loc, FD, OdrUse);
+    MarkFunctionReferenced(Loc, FD, MightBeOdrUse);
     return;
   }
   D->setReferenced();
@@ -13838,6 +14224,7 @@ bool Sema::DiagRuntimeBehavior(SourceLocation Loc, const Stmt *Statement,
   switch (ExprEvalContexts.back().Context) {
   case Unevaluated:
   case UnevaluatedAbstract:
+  case DiscardedStatement:
     // The argument will never be evaluated, so don't complain.
     break;
 
@@ -13987,7 +14374,8 @@ void Sema::DiagnoseEqualityWithExtraParens(ParenExpr *ParenE) {
     }
 }
 
-ExprResult Sema::CheckBooleanCondition(Expr *E, SourceLocation Loc) {
+ExprResult Sema::CheckBooleanCondition(SourceLocation Loc, Expr *E,
+                                       bool IsConstexpr) {
   DiagnoseAssignmentAsCondition(E);
   if (ParenExpr *parenE = dyn_cast<ParenExpr>(E))
     DiagnoseEqualityWithExtraParens(parenE);
@@ -13998,7 +14386,7 @@ ExprResult Sema::CheckBooleanCondition(Expr *E, SourceLocation Loc) {
 
   if (!E->isTypeDependent()) {
     if (getLangOpts().CPlusPlus)
-      return CheckCXXBooleanCondition(E); // C++ 6.4p4
+      return CheckCXXBooleanCondition(E, IsConstexpr); // C++ 6.4p4
 
     ExprResult ERes = DefaultFunctionArrayLvalueConversion(E);
     if (ERes.isInvalid())
@@ -14017,12 +14405,36 @@ ExprResult Sema::CheckBooleanCondition(Expr *E, SourceLocation Loc) {
   return E;
 }
 
-ExprResult Sema::ActOnBooleanCondition(Scope *S, SourceLocation Loc,
-                                       Expr *SubExpr) {
+Sema::ConditionResult Sema::ActOnCondition(Scope *S, SourceLocation Loc,
+                                           Expr *SubExpr, ConditionKind CK) {
+  // Empty conditions are valid in for-statements.
   if (!SubExpr)
-    return ExprError();
+    return ConditionResult();
+
+  ExprResult Cond;
+  switch (CK) {
+  case ConditionKind::Boolean:
+    Cond = CheckBooleanCondition(Loc, SubExpr);
+    break;
+
+  case ConditionKind::ConstexprIf:
+    Cond = CheckBooleanCondition(Loc, SubExpr, true);
+    break;
+
+  case ConditionKind::Switch:
+    Cond = CheckSwitchCondition(Loc, SubExpr);
+    break;
+  }
+  if (Cond.isInvalid())
+    return ConditionError();
 
-  return CheckBooleanCondition(SubExpr, Loc);
+  // FIXME: FullExprArg doesn't have an invalid bit, so check nullness instead.
+  FullExprArg FullExpr = MakeFullExpr(Cond.get(), Loc);
+  if (!FullExpr.get())
+    return ConditionError();
+
+  return ConditionResult(*this, nullptr, FullExpr,
+                         CK == ConditionKind::ConstexprIf);
 }
 
 namespace {
@@ -14457,6 +14869,12 @@ ExprResult RebuildUnknownAnyExpr::resolveDecl(Expr *E, ValueDecl *VD) {
 ExprResult Sema::checkUnknownAnyCast(SourceRange TypeRange, QualType CastType,
                                      Expr *CastExpr, CastKind &CastKind,
                                      ExprValueKind &VK, CXXCastPath &Path) {
+  // The type we're casting to must be either void or complete.
+  if (!CastType->isVoidType() &&
+      RequireCompleteType(TypeRange.getBegin(), CastType,
+                          diag::err_typecheck_cast_to_incomplete))
+    return ExprError();
+
   // Rewrite the casted expression from scratch.
   ExprResult result = RebuildUnknownAnyExpr(*this, CastType).Visit(CastExpr);
   if (!result.isUsable()) return ExprError();
@@ -14559,16 +14977,20 @@ ExprResult Sema::CheckPlaceholderExpr(Expr *E) {
   case BuiltinType::Overload: {
     // Try to resolve a single function template specialization.
     // This is obligatory.
-    ExprResult result = E;
-    if (ResolveAndFixSingleFunctionTemplateSpecialization(result, false)) {
-      return result;
+    ExprResult Result = E;
+    if (ResolveAndFixSingleFunctionTemplateSpecialization(Result, false))
+      return Result;
+
+    // No guarantees that ResolveAndFixSingleFunctionTemplateSpecialization
+    // leaves Result unchanged on failure.
+    Result = E;
+    if (resolveAndFixAddressOfOnlyViableOverloadCandidate(Result))
+      return Result;
 
     // If that failed, try to recover with a call.
-    } else {
-      tryToRecoverWithCall(result, PDiag(diag::err_ovl_unresolvable),
-                           /*complain*/ true);
-      return result;
-    }
+    tryToRecoverWithCall(Result, PDiag(diag::err_ovl_unresolvable),
+                         /*complain*/ true);
+    return Result;
   }
 
   // Bound member functions.
@@ -14627,8 +15049,10 @@ ExprResult Sema::CheckPlaceholderExpr(Expr *E) {
     return ExprError();
 
   // Everything else should be impossible.
-#define BUILTIN_TYPE(Id, SingletonId) \
+#define IMAGE_TYPE(ImgType, Id, SingletonId, Access, Suffix) \
   case BuiltinType::Id:
+#include "clang/Basic/OpenCLImageTypes.def"
+#define BUILTIN_TYPE(Id, SingletonId) case BuiltinType::Id:
 #define PLACEHOLDER_TYPE(Id, SingletonId)
 #include "clang/AST/BuiltinTypes.def"
     break;
@@ -14665,3 +15089,27 @@ Sema::ActOnObjCBoolLiteral(SourceLocation OpLoc, tok::TokenKind Kind) {
   return new (Context)
       ObjCBoolLiteralExpr(Kind == tok::kw___objc_yes, BoolT, OpLoc);
 }
+
+ExprResult Sema::ActOnObjCAvailabilityCheckExpr(
+    llvm::ArrayRef<AvailabilitySpec> AvailSpecs, SourceLocation AtLoc,
+    SourceLocation RParen) {
+
+  StringRef Platform = getASTContext().getTargetInfo().getPlatformName();
+
+  auto Spec = std::find_if(AvailSpecs.begin(), AvailSpecs.end(),
+                           [&](const AvailabilitySpec &Spec) {
+                             return Spec.getPlatform() == Platform;
+                           });
+
+  VersionTuple Version;
+  if (Spec != AvailSpecs.end())
+    Version = Spec->getVersion();
+  else
+    // This is the '*' case in @available. We should diagnose this; the
+    // programmer should explicitly account for this case if they target this
+    // platform.
+    Diag(AtLoc, diag::warn_available_using_star_case) << RParen << Platform;
+
+  return new (Context)
+      ObjCAvailabilityCheckExpr(Version, AtLoc, RParen, Context.BoolTy);
+}
diff --git a/contrib/llvm/tools/clang/lib/Sema/SemaExprCXX.cpp b/contrib/llvm/tools/clang/lib/Sema/SemaExprCXX.cpp
index 38fbea18d790..dfdd36752bf6 100644
--- a/contrib/llvm/tools/clang/lib/Sema/SemaExprCXX.cpp
+++ b/contrib/llvm/tools/clang/lib/Sema/SemaExprCXX.cpp
@@ -113,7 +113,7 @@ ParsedType Sema::getDestructorName(SourceLocation TildeLoc,
   bool LookInScope = false;
 
   if (SS.isInvalid())
-    return ParsedType();
+    return nullptr;
 
   // If we have an object type, it's because we are in a
   // pseudo-destructor-expression or a member access expression, and
@@ -198,7 +198,7 @@ ParsedType Sema::getDestructorName(SourceLocation TildeLoc,
 
     // FIXME: Should we be suppressing ambiguities here?
     if (Found.isAmbiguous())
-      return ParsedType();
+      return nullptr;
 
     if (TypeDecl *Type = Found.getAsSingle<TypeDecl>()) {
       QualType T = Context.getTypeDeclType(Type);
@@ -320,12 +320,12 @@ ParsedType Sema::getDestructorName(SourceLocation TildeLoc,
     }
   }
 
-  return ParsedType();
+  return nullptr;
 }
 
 ParsedType Sema::getDestructorType(const DeclSpec& DS, ParsedType ObjectType) {
     if (DS.getTypeSpecType() == DeclSpec::TST_error || !ObjectType)
-      return ParsedType();
+      return nullptr;
     assert(DS.getTypeSpecType() == DeclSpec::TST_decltype 
            && "only get destructor types from declspecs");
     QualType T = BuildDecltypeType(DS.getRepAsExpr(), DS.getTypeSpecTypeLoc());
@@ -336,7 +336,7 @@ ParsedType Sema::getDestructorType(const DeclSpec& DS, ParsedType ObjectType) {
       
     Diag(DS.getTypeSpecTypeLoc(), diag::err_destructor_expr_type_mismatch)
       << T << SearchType;
-    return ParsedType();
+    return nullptr;
 }
 
 bool Sema::checkLiteralOperatorId(const CXXScopeSpec &SS,
@@ -508,23 +508,60 @@ Sema::ActOnCXXTypeid(SourceLocation OpLoc, SourceLocation LParenLoc,
   return BuildCXXTypeId(TypeInfoType, OpLoc, (Expr*)TyOrExpr, RParenLoc);
 }
 
+/// Grabs __declspec(uuid()) off a type, or returns 0 if we cannot resolve to
+/// a single GUID.
+static void
+getUuidAttrOfType(Sema &SemaRef, QualType QT,
+                  llvm::SmallSetVector<const UuidAttr *, 1> &UuidAttrs) {
+  // Optionally remove one level of pointer, reference or array indirection.
+  const Type *Ty = QT.getTypePtr();
+  if (QT->isPointerType() || QT->isReferenceType())
+    Ty = QT->getPointeeType().getTypePtr();
+  else if (QT->isArrayType())
+    Ty = Ty->getBaseElementTypeUnsafe();
+
+  const auto *RD = Ty->getAsCXXRecordDecl();
+  if (!RD)
+    return;
+
+  if (const auto *Uuid = RD->getMostRecentDecl()->getAttr<UuidAttr>()) {
+    UuidAttrs.insert(Uuid);
+    return;
+  }
+
+  // __uuidof can grab UUIDs from template arguments.
+  if (const auto *CTSD = dyn_cast<ClassTemplateSpecializationDecl>(RD)) {
+    const TemplateArgumentList &TAL = CTSD->getTemplateArgs();
+    for (const TemplateArgument &TA : TAL.asArray()) {
+      const UuidAttr *UuidForTA = nullptr;
+      if (TA.getKind() == TemplateArgument::Type)
+        getUuidAttrOfType(SemaRef, TA.getAsType(), UuidAttrs);
+      else if (TA.getKind() == TemplateArgument::Declaration)
+        getUuidAttrOfType(SemaRef, TA.getAsDecl()->getType(), UuidAttrs);
+
+      if (UuidForTA)
+        UuidAttrs.insert(UuidForTA);
+    }
+  }
+}
+
 /// \brief Build a Microsoft __uuidof expression with a type operand.
 ExprResult Sema::BuildCXXUuidof(QualType TypeInfoType,
                                 SourceLocation TypeidLoc,
                                 TypeSourceInfo *Operand,
                                 SourceLocation RParenLoc) {
+  StringRef UuidStr;
   if (!Operand->getType()->isDependentType()) {
-    bool HasMultipleGUIDs = false;
-    if (!CXXUuidofExpr::GetUuidAttrOfType(Operand->getType(),
-                                          &HasMultipleGUIDs)) {
-      if (HasMultipleGUIDs)
-        return ExprError(Diag(TypeidLoc, diag::err_uuidof_with_multiple_guids));
-      else
-        return ExprError(Diag(TypeidLoc, diag::err_uuidof_without_guid));
-    }
+    llvm::SmallSetVector<const UuidAttr *, 1> UuidAttrs;
+    getUuidAttrOfType(*this, Operand->getType(), UuidAttrs);
+    if (UuidAttrs.empty())
+      return ExprError(Diag(TypeidLoc, diag::err_uuidof_without_guid));
+    if (UuidAttrs.size() > 1)
+      return ExprError(Diag(TypeidLoc, diag::err_uuidof_with_multiple_guids));
+    UuidStr = UuidAttrs.back()->getGuid();
   }
 
-  return new (Context) CXXUuidofExpr(TypeInfoType.withConst(), Operand,
+  return new (Context) CXXUuidofExpr(TypeInfoType.withConst(), Operand, UuidStr,
                                      SourceRange(TypeidLoc, RParenLoc));
 }
 
@@ -533,18 +570,22 @@ ExprResult Sema::BuildCXXUuidof(QualType TypeInfoType,
                                 SourceLocation TypeidLoc,
                                 Expr *E,
                                 SourceLocation RParenLoc) {
+  StringRef UuidStr;
   if (!E->getType()->isDependentType()) {
-    bool HasMultipleGUIDs = false;
-    if (!CXXUuidofExpr::GetUuidAttrOfType(E->getType(), &HasMultipleGUIDs) &&
-        !E->isNullPointerConstant(Context, Expr::NPC_ValueDependentIsNull)) {
-      if (HasMultipleGUIDs)
-        return ExprError(Diag(TypeidLoc, diag::err_uuidof_with_multiple_guids));
-      else
+    if (E->isNullPointerConstant(Context, Expr::NPC_ValueDependentIsNull)) {
+      UuidStr = "00000000-0000-0000-0000-000000000000";
+    } else {
+      llvm::SmallSetVector<const UuidAttr *, 1> UuidAttrs;
+      getUuidAttrOfType(*this, E->getType(), UuidAttrs);
+      if (UuidAttrs.empty())
         return ExprError(Diag(TypeidLoc, diag::err_uuidof_without_guid));
+      if (UuidAttrs.size() > 1)
+        return ExprError(Diag(TypeidLoc, diag::err_uuidof_with_multiple_guids));
+      UuidStr = UuidAttrs.back()->getGuid();
     }
   }
 
-  return new (Context) CXXUuidofExpr(TypeInfoType.withConst(), E,
+  return new (Context) CXXUuidofExpr(TypeInfoType.withConst(), E, UuidStr,
                                      SourceRange(TypeidLoc, RParenLoc));
 }
 
@@ -831,27 +872,123 @@ bool Sema::CheckCXXThrowOperand(SourceLocation ThrowLoc,
   return false;
 }
 
+static QualType adjustCVQualifiersForCXXThisWithinLambda(
+    ArrayRef<FunctionScopeInfo *> FunctionScopes, QualType ThisTy,
+    DeclContext *CurSemaContext, ASTContext &ASTCtx) {
+
+  QualType ClassType = ThisTy->getPointeeType();
+  LambdaScopeInfo *CurLSI = nullptr;
+  DeclContext *CurDC = CurSemaContext;
+
+  // Iterate through the stack of lambdas starting from the innermost lambda to
+  // the outermost lambda, checking if '*this' is ever captured by copy - since
+  // that could change the cv-qualifiers of the '*this' object.
+  // The object referred to by '*this' starts out with the cv-qualifiers of its
+  // member function.  We then start with the innermost lambda and iterate
+  // outward checking to see if any lambda performs a by-copy capture of '*this'
+  // - and if so, any nested lambda must respect the 'constness' of that
+  // capturing lamdbda's call operator.
+  //
+
+  // The issue is that we cannot rely entirely on the FunctionScopeInfo stack
+  // since ScopeInfos are pushed on during parsing and treetransforming. But
+  // since a generic lambda's call operator can be instantiated anywhere (even
+  // end of the TU) we need to be able to examine its enclosing lambdas and so
+  // we use the DeclContext to get a hold of the closure-class and query it for
+  // capture information.  The reason we don't just resort to always using the
+  // DeclContext chain is that it is only mature for lambda expressions
+  // enclosing generic lambda's call operators that are being instantiated.
+
+  for (int I = FunctionScopes.size();
+       I-- && isa<LambdaScopeInfo>(FunctionScopes[I]);
+       CurDC = getLambdaAwareParentOfDeclContext(CurDC)) {
+    CurLSI = cast<LambdaScopeInfo>(FunctionScopes[I]);
+    
+    if (!CurLSI->isCXXThisCaptured()) 
+        continue;
+      
+    auto C = CurLSI->getCXXThisCapture();
+
+    if (C.isCopyCapture()) {
+      ClassType.removeLocalCVRQualifiers(Qualifiers::CVRMask);
+      if (CurLSI->CallOperator->isConst())
+        ClassType.addConst();
+      return ASTCtx.getPointerType(ClassType);
+    }
+  }
+  // We've run out of ScopeInfos but check if CurDC is a lambda (which can
+  // happen during instantiation of generic lambdas)
+  if (isLambdaCallOperator(CurDC)) {
+    assert(CurLSI);
+    assert(isGenericLambdaCallOperatorSpecialization(CurLSI->CallOperator));
+    assert(CurDC == getLambdaAwareParentOfDeclContext(CurLSI->CallOperator));
+    
+    auto IsThisCaptured =
+        [](CXXRecordDecl *Closure, bool &IsByCopy, bool &IsConst) {
+      IsConst = false;
+      IsByCopy = false;
+      for (auto &&C : Closure->captures()) {
+        if (C.capturesThis()) {
+          if (C.getCaptureKind() == LCK_StarThis)
+            IsByCopy = true;
+          if (Closure->getLambdaCallOperator()->isConst())
+            IsConst = true;
+          return true;
+        }
+      }
+      return false;
+    };
+
+    bool IsByCopyCapture = false;
+    bool IsConstCapture = false;
+    CXXRecordDecl *Closure = cast<CXXRecordDecl>(CurDC->getParent());
+    while (Closure &&
+           IsThisCaptured(Closure, IsByCopyCapture, IsConstCapture)) {
+      if (IsByCopyCapture) {
+        ClassType.removeLocalCVRQualifiers(Qualifiers::CVRMask);
+        if (IsConstCapture)
+          ClassType.addConst();
+        return ASTCtx.getPointerType(ClassType);
+      }
+      Closure = isLambdaCallOperator(Closure->getParent())
+                    ? cast<CXXRecordDecl>(Closure->getParent()->getParent())
+                    : nullptr;
+    }
+  }
+  return ASTCtx.getPointerType(ClassType);
+}
+
 QualType Sema::getCurrentThisType() {
   DeclContext *DC = getFunctionLevelDeclContext();
   QualType ThisTy = CXXThisTypeOverride;
+
   if (CXXMethodDecl *method = dyn_cast<CXXMethodDecl>(DC)) {
     if (method && method->isInstance())
       ThisTy = method->getThisType(Context);
   }
-  if (ThisTy.isNull()) {
-    if (isGenericLambdaCallOperatorSpecialization(CurContext) &&
-        CurContext->getParent()->getParent()->isRecord()) {
-      // This is a generic lambda call operator that is being instantiated
-      // within a default initializer - so use the enclosing class as 'this'.
-      // There is no enclosing member function to retrieve the 'this' pointer
-      // from.
-      QualType ClassTy = Context.getTypeDeclType(
-          cast<CXXRecordDecl>(CurContext->getParent()->getParent()));
-      // There are no cv-qualifiers for 'this' within default initializers, 
-      // per [expr.prim.general]p4.
-      return Context.getPointerType(ClassTy);
-    }
+
+  if (ThisTy.isNull() && isLambdaCallOperator(CurContext) &&
+      !ActiveTemplateInstantiations.empty()) {
+
+    assert(isa<CXXRecordDecl>(DC) &&
+           "Trying to get 'this' type from static method?");
+
+    // This is a lambda call operator that is being instantiated as a default
+    // initializer. DC must point to the enclosing class type, so we can recover
+    // the 'this' type from it.
+
+    QualType ClassTy = Context.getTypeDeclType(cast<CXXRecordDecl>(DC));
+    // There are no cv-qualifiers for 'this' within default initializers,
+    // per [expr.prim.general]p4.
+    ThisTy = Context.getPointerType(ClassTy);
   }
+
+  // If we are within a lambda's call operator, the cv-qualifiers of 'this'
+  // might need to be adjusted if the lambda or any of its enclosing lambda's
+  // captures '*this' by copy.
+  if (!ThisTy.isNull() && isLambdaCallOperator(CurContext))
+    return adjustCVQualifiersForCXXThisWithinLambda(FunctionScopes, ThisTy,
+                                                    CurContext, Context);
   return ThisTy;
 }
 
@@ -870,6 +1007,8 @@ Sema::CXXThisScopeRAII::CXXThisScopeRAII(Sema &S,
   else
     Record = cast<CXXRecordDecl>(ContextDecl);
     
+  // We care only for CVR qualifiers here, so cut everything else.
+  CXXThisTypeQuals &= Qualifiers::FastMask;
   S.CXXThisTypeOverride
     = S.Context.getPointerType(
         S.Context.getRecordType(Record).withCVRQualifiers(CXXThisTypeQuals));
@@ -884,28 +1023,84 @@ Sema::CXXThisScopeRAII::~CXXThisScopeRAII() {
   }
 }
 
-static Expr *captureThis(ASTContext &Context, RecordDecl *RD,
-                         QualType ThisTy, SourceLocation Loc) {
-  FieldDecl *Field
-    = FieldDecl::Create(Context, RD, Loc, Loc, nullptr, ThisTy,
-                        Context.getTrivialTypeSourceInfo(ThisTy, Loc),
-                        nullptr, false, ICIS_NoInit);
+static Expr *captureThis(Sema &S, ASTContext &Context, RecordDecl *RD,
+                         QualType ThisTy, SourceLocation Loc,
+                         const bool ByCopy) {
+ 
+  QualType AdjustedThisTy = ThisTy;
+  // The type of the corresponding data member (not a 'this' pointer if 'by
+  // copy').
+  QualType CaptureThisFieldTy = ThisTy;
+  if (ByCopy) {
+    // If we are capturing the object referred to by '*this' by copy, ignore any
+    // cv qualifiers inherited from the type of the member function for the type
+    // of the closure-type's corresponding data member and any use of 'this'.
+    CaptureThisFieldTy = ThisTy->getPointeeType();
+    CaptureThisFieldTy.removeLocalCVRQualifiers(Qualifiers::CVRMask);
+    AdjustedThisTy = Context.getPointerType(CaptureThisFieldTy);
+  }
+  
+  FieldDecl *Field = FieldDecl::Create(
+      Context, RD, Loc, Loc, nullptr, CaptureThisFieldTy,
+      Context.getTrivialTypeSourceInfo(CaptureThisFieldTy, Loc), nullptr, false,
+      ICIS_NoInit);
+
   Field->setImplicit(true);
   Field->setAccess(AS_private);
   RD->addDecl(Field);
-  return new (Context) CXXThisExpr(Loc, ThisTy, /*isImplicit*/true);
+  Expr *This =
+      new (Context) CXXThisExpr(Loc, ThisTy, /*isImplicit*/ true);
+  if (ByCopy) {
+    Expr *StarThis =  S.CreateBuiltinUnaryOp(Loc,
+                                      UO_Deref,
+                                      This).get();
+    InitializedEntity Entity = InitializedEntity::InitializeLambdaCapture(
+      nullptr, CaptureThisFieldTy, Loc);
+    InitializationKind InitKind = InitializationKind::CreateDirect(Loc, Loc, Loc);
+    InitializationSequence Init(S, Entity, InitKind, StarThis);
+    ExprResult ER = Init.Perform(S, Entity, InitKind, StarThis);
+    if (ER.isInvalid()) return nullptr;
+    return ER.get();
+  }
+  return This;
 }
 
-bool Sema::CheckCXXThisCapture(SourceLocation Loc, bool Explicit, 
-    bool BuildAndDiagnose, const unsigned *const FunctionScopeIndexToStopAt) {
+bool Sema::CheckCXXThisCapture(SourceLocation Loc, const bool Explicit, 
+    bool BuildAndDiagnose, const unsigned *const FunctionScopeIndexToStopAt,
+    const bool ByCopy) {
   // We don't need to capture this in an unevaluated context.
   if (isUnevaluatedContext() && !Explicit)
     return true;
+  
+  assert((!ByCopy || Explicit) && "cannot implicitly capture *this by value");
 
   const unsigned MaxFunctionScopesIndex = FunctionScopeIndexToStopAt ?
-    *FunctionScopeIndexToStopAt : FunctionScopes.size() - 1;  
- // Otherwise, check that we can capture 'this'.
-  unsigned NumClosures = 0;
+    *FunctionScopeIndexToStopAt : FunctionScopes.size() - 1;
+  
+  // Check that we can capture the *enclosing object* (referred to by '*this')
+  // by the capturing-entity/closure (lambda/block/etc) at 
+  // MaxFunctionScopesIndex-deep on the FunctionScopes stack.  
+
+  // Note: The *enclosing object* can only be captured by-value by a 
+  // closure that is a lambda, using the explicit notation: 
+  //    [*this] { ... }.
+  // Every other capture of the *enclosing object* results in its by-reference
+  // capture.
+
+  // For a closure 'L' (at MaxFunctionScopesIndex in the FunctionScopes
+  // stack), we can capture the *enclosing object* only if:
+  // - 'L' has an explicit byref or byval capture of the *enclosing object*
+  // -  or, 'L' has an implicit capture.
+  // AND 
+  //   -- there is no enclosing closure
+  //   -- or, there is some enclosing closure 'E' that has already captured the 
+  //      *enclosing object*, and every intervening closure (if any) between 'E' 
+  //      and 'L' can implicitly capture the *enclosing object*.
+  //   -- or, every enclosing closure can implicitly capture the 
+  //      *enclosing object*
+  
+  
+  unsigned NumCapturingClosures = 0;
   for (unsigned idx = MaxFunctionScopesIndex; idx != 0; idx--) {
     if (CapturingScopeInfo *CSI =
             dyn_cast<CapturingScopeInfo>(FunctionScopes[idx])) {
@@ -917,44 +1112,69 @@ bool Sema::CheckCXXThisCapture(SourceLocation Loc, bool Explicit,
       if (LSI && isGenericLambdaCallOperatorSpecialization(LSI->CallOperator)) {
         // This context can't implicitly capture 'this'; fail out.
         if (BuildAndDiagnose)
-          Diag(Loc, diag::err_this_capture) << Explicit;
+          Diag(Loc, diag::err_this_capture)
+              << (Explicit && idx == MaxFunctionScopesIndex);
         return true;
       }
       if (CSI->ImpCaptureStyle == CapturingScopeInfo::ImpCap_LambdaByref ||
           CSI->ImpCaptureStyle == CapturingScopeInfo::ImpCap_LambdaByval ||
           CSI->ImpCaptureStyle == CapturingScopeInfo::ImpCap_Block ||
           CSI->ImpCaptureStyle == CapturingScopeInfo::ImpCap_CapturedRegion ||
-          Explicit) {
+          (Explicit && idx == MaxFunctionScopesIndex)) {
+        // Regarding (Explicit && idx == MaxFunctionScopesIndex): only the first
+        // iteration through can be an explicit capture, all enclosing closures,
+        // if any, must perform implicit captures.
+
         // This closure can capture 'this'; continue looking upwards.
-        NumClosures++;
-        Explicit = false;
+        NumCapturingClosures++;
         continue;
       }
       // This context can't implicitly capture 'this'; fail out.
       if (BuildAndDiagnose)
-        Diag(Loc, diag::err_this_capture) << Explicit;
+        Diag(Loc, diag::err_this_capture)
+            << (Explicit && idx == MaxFunctionScopesIndex);
       return true;
     }
     break;
   }
   if (!BuildAndDiagnose) return false;
-  // Mark that we're implicitly capturing 'this' in all the scopes we skipped.
+
+  // If we got here, then the closure at MaxFunctionScopesIndex on the
+  // FunctionScopes stack, can capture the *enclosing object*, so capture it
+  // (including implicit by-reference captures in any enclosing closures).
+
+  // In the loop below, respect the ByCopy flag only for the closure requesting
+  // the capture (i.e. first iteration through the loop below).  Ignore it for
+  // all enclosing closure's upto NumCapturingClosures (since they must be
+  // implicitly capturing the *enclosing  object* by reference (see loop
+  // above)).
+  assert((!ByCopy ||
+          dyn_cast<LambdaScopeInfo>(FunctionScopes[MaxFunctionScopesIndex])) &&
+         "Only a lambda can capture the enclosing object (referred to by "
+         "*this) by copy");
   // FIXME: We need to delay this marking in PotentiallyPotentiallyEvaluated
   // contexts.
-  for (unsigned idx = MaxFunctionScopesIndex; NumClosures; 
-      --idx, --NumClosures) {
+  QualType ThisTy = getCurrentThisType();
+  for (unsigned idx = MaxFunctionScopesIndex; NumCapturingClosures; 
+      --idx, --NumCapturingClosures) {
     CapturingScopeInfo *CSI = cast<CapturingScopeInfo>(FunctionScopes[idx]);
     Expr *ThisExpr = nullptr;
-    QualType ThisTy = getCurrentThisType();
-    if (LambdaScopeInfo *LSI = dyn_cast<LambdaScopeInfo>(CSI))
-      // For lambda expressions, build a field and an initializing expression.
-      ThisExpr = captureThis(Context, LSI->Lambda, ThisTy, Loc);
-    else if (CapturedRegionScopeInfo *RSI
+    
+    if (LambdaScopeInfo *LSI = dyn_cast<LambdaScopeInfo>(CSI)) {
+      // For lambda expressions, build a field and an initializing expression,
+      // and capture the *enclosing object* by copy only if this is the first
+      // iteration.
+      ThisExpr = captureThis(*this, Context, LSI->Lambda, ThisTy, Loc,
+                             ByCopy && idx == MaxFunctionScopesIndex);
+      
+    } else if (CapturedRegionScopeInfo *RSI
         = dyn_cast<CapturedRegionScopeInfo>(FunctionScopes[idx]))
-      ThisExpr = captureThis(Context, RSI->TheRecordDecl, ThisTy, Loc);
+      ThisExpr =
+          captureThis(*this, Context, RSI->TheRecordDecl, ThisTy, Loc,
+                      false/*ByCopy*/);
 
-    bool isNested = NumClosures > 1;
-    CSI->addThisCapture(isNested, Loc, ThisTy, ThisExpr);
+    bool isNested = NumCapturingClosures > 1;
+    CSI->addThisCapture(isNested, Loc, ThisExpr, ByCopy);
   }
   return false;
 }
@@ -996,7 +1216,14 @@ Sema::ActOnCXXTypeConstructExpr(ParsedType TypeRep,
   if (!TInfo)
     TInfo = Context.getTrivialTypeSourceInfo(Ty, SourceLocation());
 
-  return BuildCXXTypeConstructExpr(TInfo, LParenLoc, exprs, RParenLoc);
+  auto Result = BuildCXXTypeConstructExpr(TInfo, LParenLoc, exprs, RParenLoc);
+  // Avoid creating a non-type-dependent expression that contains typos.
+  // Non-type-dependent expressions are liable to be discarded without
+  // checking for embedded typos.
+  if (!Result.isInvalid() && Result.get()->isInstantiationDependent() &&
+      !Result.get()->isTypeDependent())
+    Result = CorrectDelayedTyposInExpr(Result.get());
+  return Result;
 }
 
 /// ActOnCXXTypeConstructExpr - Parse construction of a specified type.
@@ -1551,7 +1778,8 @@ Sema::BuildCXXNew(SourceRange Range, bool UseGlobal,
   // new.
   if (PlacementArgs.empty() && OperatorNew &&
       (OperatorNew->isImplicit() ||
-       getSourceManager().isInSystemHeader(OperatorNew->getLocStart()))) {
+       (OperatorNew->getLocStart().isValid() &&
+        getSourceManager().isInSystemHeader(OperatorNew->getLocStart())))) {
     if (unsigned Align = Context.getPreferredTypeAlign(AllocType.getTypePtr())){
       unsigned SuitableAlign = Context.getTargetInfo().getSuitableAlign();
       if (Align > SuitableAlign)
@@ -2113,14 +2341,13 @@ void Sema::DeclareGlobalNewDelete() {
 
   QualType VoidPtr = Context.getPointerType(Context.VoidTy);
   QualType SizeT = Context.getSizeType();
-  bool AssumeSaneOperatorNew = getLangOpts().AssumeSaneOperatorNew;
 
   DeclareGlobalAllocationFunction(
       Context.DeclarationNames.getCXXOperatorName(OO_New),
-      VoidPtr, SizeT, QualType(), AssumeSaneOperatorNew);
+      VoidPtr, SizeT, QualType());
   DeclareGlobalAllocationFunction(
       Context.DeclarationNames.getCXXOperatorName(OO_Array_New),
-      VoidPtr, SizeT, QualType(), AssumeSaneOperatorNew);
+      VoidPtr, SizeT, QualType());
   DeclareGlobalAllocationFunction(
       Context.DeclarationNames.getCXXOperatorName(OO_Delete),
       Context.VoidTy, VoidPtr);
@@ -2141,8 +2368,7 @@ void Sema::DeclareGlobalNewDelete() {
 /// allocation function if it doesn't already exist.
 void Sema::DeclareGlobalAllocationFunction(DeclarationName Name,
                                            QualType Return,
-                                           QualType Param1, QualType Param2,
-                                           bool AddRestrictAttr) {
+                                           QualType Param1, QualType Param2) {
   DeclContext *GlobalCtx = Context.getTranslationUnitDecl();
   unsigned NumParams = Param2.isNull() ? 1 : 2;
 
@@ -2165,9 +2391,6 @@ void Sema::DeclareGlobalAllocationFunction(DeclarationName Name,
         // FIXME: Do we need to check for default arguments here?
         if (InitialParam1Type == Param1 &&
             (NumParams == 1 || InitialParam2Type == Param2)) {
-          if (AddRestrictAttr && !Func->hasAttr<RestrictAttr>())
-            Func->addAttr(RestrictAttr::CreateImplicit(
-                Context, RestrictAttr::GNU_malloc));
           // Make the function visible to name lookup, even if we found it in
           // an unimported module. It either is an implicitly-declared global
           // allocation function, or is suppressing that function.
@@ -2210,10 +2433,6 @@ void Sema::DeclareGlobalAllocationFunction(DeclarationName Name,
   Alloc->addAttr(VisibilityAttr::CreateImplicit(Context,
                                                 VisibilityAttr::Default));
 
-  if (AddRestrictAttr)
-    Alloc->addAttr(
-        RestrictAttr::CreateImplicit(Context, RestrictAttr::GNU_malloc));
-
   ParmVarDecl *ParamDecls[2];
   for (unsigned I = 0; I != NumParams; ++I) {
     ParamDecls[I] = ParmVarDecl::Create(Context, Alloc, SourceLocation(),
@@ -2265,7 +2484,7 @@ FunctionDecl *Sema::FindUsualDeallocationFunction(SourceLocation StartLoc,
            "found an unexpected usual deallocation function");
   }
 
-  if (getLangOpts().CUDA && getLangOpts().CUDATargetOverloads)
+  if (getLangOpts().CUDA)
     EraseUnwantedCUDAMatches(dyn_cast<FunctionDecl>(CurContext), Matches);
 
   assert(Matches.size() == 1 &&
@@ -2299,7 +2518,7 @@ bool Sema::FindDeallocationFunction(SourceLocation StartLoc, CXXRecordDecl *RD,
       Matches.push_back(F.getPair());
   }
 
-  if (getLangOpts().CUDA && getLangOpts().CUDATargetOverloads)
+  if (getLangOpts().CUDA)
     EraseUnwantedCUDAMatches(dyn_cast<FunctionDecl>(CurContext), Matches);
 
   // There's exactly one suitable operator;  pick it.
@@ -2765,30 +2984,10 @@ Sema::ActOnCXXDelete(SourceLocation StartLoc, bool UseGlobal,
             return ExprError();
         }
 
-      // C++ [expr.delete]p3:
-      //   In the first alternative (delete object), if the static type of the
-      //   object to be deleted is different from its dynamic type, the static
-      //   type shall be a base class of the dynamic type of the object to be
-      //   deleted and the static type shall have a virtual destructor or the
-      //   behavior is undefined.
-      //
-      // Note: a final class cannot be derived from, no issue there
-      if (PointeeRD->isPolymorphic() && !PointeeRD->hasAttr<FinalAttr>()) {
-        CXXDestructorDecl *dtor = PointeeRD->getDestructor();
-        if (dtor && !dtor->isVirtual()) {
-          if (PointeeRD->isAbstract()) {
-            // If the class is abstract, we warn by default, because we're
-            // sure the code has undefined behavior.
-            Diag(StartLoc, diag::warn_delete_abstract_non_virtual_dtor)
-                << PointeeElem;
-          } else if (!ArrayForm) {
-            // Otherwise, if this is not an array delete, it's a bit suspect,
-            // but not necessarily wrong.
-            Diag(StartLoc, diag::warn_delete_non_virtual_dtor) << PointeeElem;
-          }
-        }
-      }
-
+      CheckVirtualDtorCall(PointeeRD->getDestructor(), StartLoc,
+                           /*IsDelete=*/true, /*CallCanBeVirtual=*/true,
+                           /*WarnOnNonAbstractTypes=*/!ArrayForm,
+                           SourceLocation());
     }
 
     if (!OperatorDelete)
@@ -2817,11 +3016,61 @@ Sema::ActOnCXXDelete(SourceLocation StartLoc, bool UseGlobal,
   return Result;
 }
 
+void Sema::CheckVirtualDtorCall(CXXDestructorDecl *dtor, SourceLocation Loc,
+                                bool IsDelete, bool CallCanBeVirtual,
+                                bool WarnOnNonAbstractTypes,
+                                SourceLocation DtorLoc) {
+  if (!dtor || dtor->isVirtual() || !CallCanBeVirtual)
+    return;
+
+  // C++ [expr.delete]p3:
+  //   In the first alternative (delete object), if the static type of the
+  //   object to be deleted is different from its dynamic type, the static
+  //   type shall be a base class of the dynamic type of the object to be
+  //   deleted and the static type shall have a virtual destructor or the
+  //   behavior is undefined.
+  //
+  const CXXRecordDecl *PointeeRD = dtor->getParent();
+  // Note: a final class cannot be derived from, no issue there
+  if (!PointeeRD->isPolymorphic() || PointeeRD->hasAttr<FinalAttr>())
+    return;
+
+  QualType ClassType = dtor->getThisType(Context)->getPointeeType();
+  if (PointeeRD->isAbstract()) {
+    // If the class is abstract, we warn by default, because we're
+    // sure the code has undefined behavior.
+    Diag(Loc, diag::warn_delete_abstract_non_virtual_dtor) << (IsDelete ? 0 : 1)
+                                                           << ClassType;
+  } else if (WarnOnNonAbstractTypes) {
+    // Otherwise, if this is not an array delete, it's a bit suspect,
+    // but not necessarily wrong.
+    Diag(Loc, diag::warn_delete_non_virtual_dtor) << (IsDelete ? 0 : 1)
+                                                  << ClassType;
+  }
+  if (!IsDelete) {
+    std::string TypeStr;
+    ClassType.getAsStringInternal(TypeStr, getPrintingPolicy());
+    Diag(DtorLoc, diag::note_delete_non_virtual)
+        << FixItHint::CreateInsertion(DtorLoc, TypeStr + "::");
+  }
+}
+
+Sema::ConditionResult Sema::ActOnConditionVariable(Decl *ConditionVar,
+                                                   SourceLocation StmtLoc,
+                                                   ConditionKind CK) {
+  ExprResult E =
+      CheckConditionVariable(cast<VarDecl>(ConditionVar), StmtLoc, CK);
+  if (E.isInvalid())
+    return ConditionError();
+  return ConditionResult(*this, ConditionVar, MakeFullExpr(E.get(), StmtLoc),
+                         CK == ConditionKind::ConstexprIf);
+}
+
 /// \brief Check the use of the given variable as a C++ condition in an if,
 /// while, do-while, or switch statement.
 ExprResult Sema::CheckConditionVariable(VarDecl *ConditionVar,
                                         SourceLocation StmtLoc,
-                                        bool ConvertToBoolean) {
+                                        ConditionKind CK) {
   if (ConditionVar->isInvalidDecl())
     return ExprError();
 
@@ -2845,17 +3094,22 @@ ExprResult Sema::CheckConditionVariable(VarDecl *ConditionVar,
 
   MarkDeclRefReferenced(cast<DeclRefExpr>(Condition.get()));
 
-  if (ConvertToBoolean) {
-    Condition = CheckBooleanCondition(Condition.get(), StmtLoc);
-    if (Condition.isInvalid())
-      return ExprError();
+  switch (CK) {
+  case ConditionKind::Boolean:
+    return CheckBooleanCondition(StmtLoc, Condition.get());
+
+  case ConditionKind::ConstexprIf:
+    return CheckBooleanCondition(StmtLoc, Condition.get(), true);
+
+  case ConditionKind::Switch:
+    return CheckSwitchCondition(StmtLoc, Condition.get());
   }
 
-  return Condition;
+  llvm_unreachable("unexpected condition kind");
 }
 
 /// CheckCXXBooleanCondition - Returns true if a conversion to bool is invalid.
-ExprResult Sema::CheckCXXBooleanCondition(Expr *CondExpr) {
+ExprResult Sema::CheckCXXBooleanCondition(Expr *CondExpr, bool IsConstexpr) {
   // C++ 6.4p4:
   // The value of a condition that is an initialized declaration in a statement
   // other than a switch statement is the value of the declared variable
@@ -2864,7 +3118,12 @@ ExprResult Sema::CheckCXXBooleanCondition(Expr *CondExpr) {
   // The value of a condition that is an expression is the value of the
   // expression, implicitly converted to bool.
   //
-  return PerformContextuallyConvertToBool(CondExpr);
+  // FIXME: Return this value to the caller so they don't need to recompute it.
+  llvm::APSInt Value(/*BitWidth*/1);
+  return (IsConstexpr && !CondExpr->isValueDependent())
+             ? CheckConvertedConstantExpression(CondExpr, Context.BoolTy, Value,
+                                                CCEK_ConstexprIf)
+             : PerformContextuallyConvertToBool(CondExpr);
 }
 
 /// Helper function to determine whether this is the (deprecated) C++
@@ -2898,7 +3157,8 @@ Sema::IsStringLiteralToNonConstPointerConversion(Expr *From, QualType ToType) {
               return (ToPointeeType->getKind() == BuiltinType::Char_U ||
                       ToPointeeType->getKind() == BuiltinType::Char_S);
             case StringLiteral::Wide:
-              return ToPointeeType->isWideCharType();
+              return Context.typesAreCompatible(Context.getWideCharType(),
+                                                QualType(ToPointeeType, 0));
           }
         }
       }
@@ -2927,14 +3187,13 @@ static ExprResult BuildCXXCastArgument(Sema &S,
     if (S.CompleteConstructorCall(Constructor, From, CastLoc, ConstructorArgs))
       return ExprError();
 
-    S.CheckConstructorAccess(CastLoc, Constructor,
-                             InitializedEntity::InitializeTemporary(Ty),
-                             Constructor->getAccess());
+    S.CheckConstructorAccess(CastLoc, Constructor, FoundDecl,
+                             InitializedEntity::InitializeTemporary(Ty));
     if (S.DiagnoseUseOfDecl(Method, CastLoc))
       return ExprError();
 
     ExprResult Result = S.BuildCXXConstructExpr(
-        CastLoc, Ty, cast<CXXConstructorDecl>(Method),
+        CastLoc, Ty, FoundDecl, cast<CXXConstructorDecl>(Method),
         ConstructorArgs, HadMultipleCandidates,
         /*ListInit*/ false, /*StdInitListInit*/ false, /*ZeroInit*/ false,
         CXXConstructExpr::CK_Complete, SourceRange());
@@ -3085,13 +3344,15 @@ Sema::PerformImplicitConversion(Expr *From, QualType ToType,
                                   ConstructorArgs))
         return ExprError();
       return BuildCXXConstructExpr(
-          /*FIXME:ConstructLoc*/ SourceLocation(), ToType, SCS.CopyConstructor,
+          /*FIXME:ConstructLoc*/ SourceLocation(), ToType,
+          SCS.FoundCopyConstructor, SCS.CopyConstructor,
           ConstructorArgs, /*HadMultipleCandidates*/ false,
           /*ListInit*/ false, /*StdInitListInit*/ false, /*ZeroInit*/ false,
           CXXConstructExpr::CK_Complete, SourceRange());
     }
     return BuildCXXConstructExpr(
-        /*FIXME:ConstructLoc*/ SourceLocation(), ToType, SCS.CopyConstructor,
+        /*FIXME:ConstructLoc*/ SourceLocation(), ToType,
+        SCS.FoundCopyConstructor, SCS.CopyConstructor,
         From, /*HadMultipleCandidates*/ false,
         /*ListInit*/ false, /*StdInitListInit*/ false, /*ZeroInit*/ false,
         CXXConstructExpr::CK_Complete, SourceRange());
@@ -3960,9 +4221,12 @@ static bool EvaluateUnaryTypeTrait(Sema &Self, TypeTrait UTT,
         // A template constructor is never a copy constructor.
         // FIXME: However, it may actually be selected at the actual overload
         // resolution point.
-        if (isa<FunctionTemplateDecl>(ND))
+        if (isa<FunctionTemplateDecl>(ND->getUnderlyingDecl()))
           continue;
-        const CXXConstructorDecl *Constructor = cast<CXXConstructorDecl>(ND);
+        // UsingDecl itself is not a constructor
+        if (isa<UsingDecl>(ND))
+          continue;
+        auto *Constructor = cast<CXXConstructorDecl>(ND->getUnderlyingDecl());
         if (Constructor->isCopyConstructor(FoundTQs)) {
           FoundConstructor = true;
           const FunctionProtoType *CPT
@@ -3996,9 +4260,12 @@ static bool EvaluateUnaryTypeTrait(Sema &Self, TypeTrait UTT,
       bool FoundConstructor = false;
       for (const auto *ND : Self.LookupConstructors(RD)) {
         // FIXME: In C++0x, a constructor template can be a default constructor.
-        if (isa<FunctionTemplateDecl>(ND))
+        if (isa<FunctionTemplateDecl>(ND->getUnderlyingDecl()))
+          continue;
+        // UsingDecl itself is not a constructor
+        if (isa<UsingDecl>(ND))
           continue;
-        const CXXConstructorDecl *Constructor = cast<CXXConstructorDecl>(ND);
+        auto *Constructor = cast<CXXConstructorDecl>(ND->getUnderlyingDecl());
         if (Constructor->isDefaultConstructor()) {
           FoundConstructor = true;
           const FunctionProtoType *CPT
@@ -4314,6 +4581,7 @@ static bool EvaluateBinaryTypeTrait(Sema &Self, TypeTrait BTT, QualType LhsT,
     return !Result.isInvalid() && !SFINAE.hasErrorOccurred();
   }
 
+  case BTT_IsAssignable:
   case BTT_IsNothrowAssignable:
   case BTT_IsTriviallyAssignable: {
     // C++11 [meta.unary.prop]p3:
@@ -4361,6 +4629,9 @@ static bool EvaluateBinaryTypeTrait(Sema &Self, TypeTrait BTT, QualType LhsT,
     if (Result.isInvalid() || SFINAE.hasErrorOccurred())
       return false;
 
+    if (BTT == BTT_IsAssignable)
+      return true;
+
     if (BTT == BTT_IsNothrowAssignable)
       return Self.canThrow(Result.get()) == CT_Cannot;
 
@@ -4652,7 +4923,7 @@ QualType Sema::CheckPointerToMemberOperands(ExprResult &LHS, ExprResult &RHS,
   return Result;
 }
 
-/// \brief Try to convert a type to another according to C++0x 5.16p3.
+/// \brief Try to convert a type to another according to C++11 5.16p3.
 ///
 /// This is part of the parameter validation for the ? operator. If either
 /// value operand is a class type, the two operands are attempted to be
@@ -4668,17 +4939,21 @@ static bool TryClassUnification(Sema &Self, Expr *From, Expr *To,
 
   InitializationKind Kind = InitializationKind::CreateCopy(To->getLocStart(),
                                                            SourceLocation());
-  // C++0x 5.16p3
+  // C++11 5.16p3
   //   The process for determining whether an operand expression E1 of type T1
   //   can be converted to match an operand expression E2 of type T2 is defined
   //   as follows:
-  //   -- If E2 is an lvalue:
-  bool ToIsLvalue = To->isLValue();
-  if (ToIsLvalue) {
-    //   E1 can be converted to match E2 if E1 can be implicitly converted to
-    //   type "lvalue reference to T2", subject to the constraint that in the
-    //   conversion the reference must bind directly to E1.
-    QualType T = Self.Context.getLValueReferenceType(ToType);
+  //   -- If E2 is an lvalue: E1 can be converted to match E2 if E1 can be
+  //      implicitly converted to type "lvalue reference to T2", subject to the
+  //      constraint that in the conversion the reference must bind directly to
+  //      an lvalue.
+  //   -- If E2 is an xvalue: E1 can be converted to match E2 if E1 can be
+  //      implicitly conveted to the type "rvalue reference to R2", subject to
+  //      the constraint that the reference must bind directly.
+  if (To->isLValue() || To->isXValue()) {
+    QualType T = To->isLValue() ? Self.Context.getLValueReferenceType(ToType)
+                                : Self.Context.getRValueReferenceType(ToType);
+
     InitializedEntity Entity = InitializedEntity::InitializeTemporary(T);
 
     InitializationSequence InitSeq(Self, Entity, Kind, From);
@@ -5029,6 +5304,12 @@ QualType Sema::CXXCheckConditionalOperands(ExprResult &Cond, ExprResult &LHS,
     QualType ResTy = UsualArithmeticConversions(LHS, RHS);
     if (LHS.isInvalid() || RHS.isInvalid())
       return QualType();
+    if (ResTy.isNull()) {
+      Diag(QuestionLoc,
+           diag::err_typecheck_cond_incompatible_operands) << LTy << RTy
+        << LHS.get()->getSourceRange() << RHS.get()->getSourceRange();
+      return QualType();
+    }
 
     LHS = ImpCastExprToType(LHS.get(), ResTy, PrepareScalarCast(LHS, ResTy));
     RHS = ImpCastExprToType(RHS.get(), ResTy, PrepareScalarCast(RHS, ResTy));
@@ -5390,7 +5671,7 @@ ExprResult Sema::MaybeBindToTemporary(Expr *E) {
     if (!ReturnsRetained && E->getType()->isObjCARCImplicitlyUnretainedType())
       return E;
 
-    ExprNeedsCleanups = true;
+    Cleanup.setExprNeedsCleanups(true);
 
     CastKind ck = (ReturnsRetained ? CK_ARCConsumeObject
                                    : CK_ARCReclaimReturnedObject);
@@ -5443,7 +5724,7 @@ ExprResult Sema::MaybeBindToTemporary(Expr *E) {
       return E;
 
     // We need a cleanup, but we don't need to remember the temporary.
-    ExprNeedsCleanups = true;
+    Cleanup.setExprNeedsCleanups(true);
   }
 
   CXXTemporary *Temp = CXXTemporary::Create(Context, Destructor);
@@ -5470,14 +5751,16 @@ Expr *Sema::MaybeCreateExprWithCleanups(Expr *SubExpr) {
 
   unsigned FirstCleanup = ExprEvalContexts.back().NumCleanupObjects;
   assert(ExprCleanupObjects.size() >= FirstCleanup);
-  assert(ExprNeedsCleanups || ExprCleanupObjects.size() == FirstCleanup);
-  if (!ExprNeedsCleanups)
+  assert(Cleanup.exprNeedsCleanups() ||
+         ExprCleanupObjects.size() == FirstCleanup);
+  if (!Cleanup.exprNeedsCleanups())
     return SubExpr;
 
   auto Cleanups = llvm::makeArrayRef(ExprCleanupObjects.begin() + FirstCleanup,
                                      ExprCleanupObjects.size() - FirstCleanup);
 
-  Expr *E = ExprWithCleanups::Create(Context, SubExpr, Cleanups);
+  auto *E = ExprWithCleanups::Create(
+      Context, SubExpr, Cleanup.cleanupsHaveSideEffects(), Cleanups);
   DiscardCleanupsInEvaluationContext();
 
   return E;
@@ -5488,7 +5771,7 @@ Stmt *Sema::MaybeCreateStmtWithCleanups(Stmt *SubStmt) {
 
   CleanupVarDeclMarking();
 
-  if (!ExprNeedsCleanups)
+  if (!Cleanup.exprNeedsCleanups())
     return SubStmt;
 
   // FIXME: In order to attach the temporaries, wrap the statement into
@@ -5594,7 +5877,7 @@ ExprResult Sema::ActOnDecltypeExpression(Expr *E) {
       return ExprError();
 
     // We need a cleanup, but we don't need to remember the temporary.
-    ExprNeedsCleanups = true;
+    Cleanup.setExprNeedsCleanups(true);
   }
 
   // Possibly strip off the top CXXBindTemporaryExpr.
@@ -5746,7 +6029,7 @@ ExprResult Sema::ActOnStartCXXMemberReference(Scope *S, Expr *Base,
     MayBePseudoDestructor = true;
     return Base;
   } else if (!BaseType->isRecordType()) {
-    ObjectType = ParsedType();
+    ObjectType = nullptr;
     MayBePseudoDestructor = true;
     return Base;
   }
@@ -5789,7 +6072,7 @@ static bool CheckArrow(Sema& S, QualType& ObjectType, Expr *&Base,
     if (const PointerType *Ptr = ObjectType->getAs<PointerType>()) {
       ObjectType = Ptr->getPointeeType();
     } else if (!Base->isTypeDependent()) {
-      // The user wrote "p->" when she probably meant "p."; fix it.
+      // The user wrote "p->" when they probably meant "p."; fix it.
       S.Diag(OpLoc, diag::err_typecheck_member_reference_suggestion)
         << ObjectType << true
         << FixItHint::CreateReplacement(OpLoc, ".");
@@ -6082,9 +6365,12 @@ ExprResult Sema::BuildCXXMemberCallExpr(Expr *E, NamedDecl *FoundDecl,
       // follows the normal lifetime rules for block literals instead of being
       // autoreleased.
       DiagnosticErrorTrap Trap(Diags);
+      PushExpressionEvaluationContext(PotentiallyEvaluated);
       ExprResult Exp = BuildBlockForLambdaConversion(E->getExprLoc(),
                                                      E->getExprLoc(),
                                                      Method, E);
+      PopExpressionEvaluationContext();
+
       if (Exp.isInvalid())
         Diag(E->getExprLoc(), diag::note_lambda_to_block_conv);
       return Exp;
@@ -6302,10 +6588,16 @@ static inline bool VariableCanNeverBeAConstantExpression(VarDecl *Var,
 static void CheckIfAnyEnclosingLambdasMustCaptureAnyPotentialCaptures(
     Expr *const FE, LambdaScopeInfo *const CurrentLSI, Sema &S) {
 
-  assert(!S.isUnevaluatedContext());  
-  assert(S.CurContext->isDependentContext()); 
-  assert(CurrentLSI->CallOperator == S.CurContext && 
+  assert(!S.isUnevaluatedContext());
+  assert(S.CurContext->isDependentContext());
+#ifndef NDEBUG
+  DeclContext *DC = S.CurContext;
+  while (DC && isa<CapturedDecl>(DC))
+    DC = DC->getParent();
+  assert(
+      CurrentLSI->CallOperator == DC &&
       "The current call operator must be synchronized with Sema's CurContext");
+#endif // NDEBUG
 
   const bool IsFullExprInstantiationDependent = FE->isInstantiationDependent();
 
@@ -6390,7 +6682,7 @@ static void CheckIfAnyEnclosingLambdasMustCaptureAnyPotentialCaptures(
 
 static ExprResult attemptRecovery(Sema &SemaRef,
                                   const TypoCorrectionConsumer &Consumer,
-                                  TypoCorrection TC) {
+                                  const TypoCorrection &TC) {
   LookupResult R(SemaRef, Consumer.getLookupResult().getLookupNameInfo(),
                  Consumer.getLookupResult().getLookupKind());
   const CXXScopeSpec *SS = Consumer.getSS();
@@ -6567,6 +6859,14 @@ public:
 
   ExprResult TransformBlockExpr(BlockExpr *E) { return Owned(E); }
 
+  ExprResult TransformObjCPropertyRefExpr(ObjCPropertyRefExpr *E) {
+    return Owned(E);
+  }
+
+  ExprResult TransformObjCIvarRefExpr(ObjCIvarRefExpr *E) {
+    return Owned(E);
+  }
+
   ExprResult Transform(Expr *E) {
     ExprResult Res;
     while (true) {
@@ -6763,7 +7063,8 @@ ExprResult Sema::ActOnFinishFullExpr(Expr *FE, SourceLocation CC,
   // and then the full-expression +n + ({ 0; }); ends, but it's too late 
   // for us to see that we need to capture n after all.
 
-  LambdaScopeInfo *const CurrentLSI = getCurLambda();
+  LambdaScopeInfo *const CurrentLSI =
+      getCurLambda(/*IgnoreCapturedRegions=*/true);
   // FIXME: PR 17877 showed that getCurLambda() can return a valid pointer 
   // even if CurContext is not a lambda call operator. Refer to that Bug Report
   // for an example of the code that might cause this asynchrony.  
@@ -6778,7 +7079,10 @@ ExprResult Sema::ActOnFinishFullExpr(Expr *FE, SourceLocation CC,
   //     constructor/destructor.
   //  - Teach the handful of places that iterate over FunctionScopes to 
   //    stop at the outermost enclosing lexical scope."
-  const bool IsInLambdaDeclContext = isLambdaCallOperator(CurContext);
+  DeclContext *DC = CurContext;
+  while (DC && isa<CapturedDecl>(DC))
+    DC = DC->getParent();
+  const bool IsInLambdaDeclContext = isLambdaCallOperator(DC);
   if (IsInLambdaDeclContext && CurrentLSI &&
       CurrentLSI->hasPotentialCaptures() && !FullExpr.isInvalid())
     CheckIfAnyEnclosingLambdasMustCaptureAnyPotentialCaptures(FE, CurrentLSI,
diff --git a/contrib/llvm/tools/clang/lib/Sema/SemaExprMember.cpp b/contrib/llvm/tools/clang/lib/Sema/SemaExprMember.cpp
index 9c345f8a69a3..283621889f80 100644
--- a/contrib/llvm/tools/clang/lib/Sema/SemaExprMember.cpp
+++ b/contrib/llvm/tools/clang/lib/Sema/SemaExprMember.cpp
@@ -142,6 +142,7 @@ static IMAKind ClassifyImplicitMemberAccess(Sema &SemaRef,
     AbstractInstanceResult = IMA_Abstract;
     break;
 
+  case Sema::DiscardedStatement:
   case Sema::ConstantEvaluated:
   case Sema::PotentiallyEvaluated:
   case Sema::PotentiallyEvaluatedIfUsed:
@@ -380,7 +381,8 @@ static Decl *FindGetterSetterNameDeclFromProtocolList(const ObjCProtocolDecl*PDe
                                                 const Selector &Sel,
                                                 ASTContext &Context) {
   if (Member)
-    if (ObjCPropertyDecl *PD = PDecl->FindPropertyDeclaration(Member))
+    if (ObjCPropertyDecl *PD = PDecl->FindPropertyDeclaration(
+            Member, ObjCPropertyQueryKind::OBJC_PR_query_instance))
       return PD;
   if (ObjCMethodDecl *OMD = PDecl->getInstanceMethod(Sel))
     return OMD;
@@ -401,7 +403,8 @@ static Decl *FindGetterSetterNameDecl(const ObjCObjectPointerType *QIdTy,
   Decl *GDecl = nullptr;
   for (const auto *I : QIdTy->quals()) {
     if (Member)
-      if (ObjCPropertyDecl *PD = I->FindPropertyDeclaration(Member)) {
+      if (ObjCPropertyDecl *PD = I->FindPropertyDeclaration(
+              Member, ObjCPropertyQueryKind::OBJC_PR_query_instance)) {
         GDecl = PD;
         break;
       }
@@ -900,6 +903,32 @@ static bool IsInFnTryBlockHandler(const Scope *S) {
   return false;
 }
 
+static VarDecl *
+getVarTemplateSpecialization(Sema &S, VarTemplateDecl *VarTempl,
+                      const TemplateArgumentListInfo *TemplateArgs,
+                      const DeclarationNameInfo &MemberNameInfo,
+                      SourceLocation TemplateKWLoc) {
+
+  if (!TemplateArgs) {
+    S.Diag(MemberNameInfo.getBeginLoc(), diag::err_template_decl_ref)
+        << /*Variable template*/ 1 << MemberNameInfo.getName()
+        << MemberNameInfo.getSourceRange();
+
+    S.Diag(VarTempl->getLocation(), diag::note_template_decl_here);
+
+    return nullptr;
+  }
+  DeclResult VDecl = S.CheckVarTemplateId(
+      VarTempl, TemplateKWLoc, MemberNameInfo.getLoc(), *TemplateArgs);
+  if (VDecl.isInvalid())
+    return nullptr;
+  VarDecl *Var = cast<VarDecl>(VDecl.get());
+  if (!Var->getTemplateSpecializationKind())
+    Var->setTemplateSpecializationKind(TSK_ImplicitInstantiation,
+                                       MemberNameInfo.getLoc());
+  return Var;
+}
+
 ExprResult
 Sema::BuildMemberReferenceExpr(Expr *BaseExpr, QualType BaseExprType,
                                SourceLocation OpLoc, bool IsArrow,
@@ -1067,9 +1096,23 @@ Sema::BuildMemberReferenceExpr(Expr *BaseExpr, QualType BaseExprType,
   // Handle the implicit-member-access case.
   if (!BaseExpr) {
     // If this is not an instance member, convert to a non-member access.
-    if (!MemberDecl->isCXXInstanceMember())
-      return BuildDeclarationNameExpr(SS, R.getLookupNameInfo(), MemberDecl);
-
+    if (!MemberDecl->isCXXInstanceMember()) {
+      // If this is a variable template, get the instantiated variable
+      // declaration corresponding to the supplied template arguments
+      // (while emitting diagnostics as necessary) that will be referenced
+      // by this expression.
+      assert((!TemplateArgs || isa<VarTemplateDecl>(MemberDecl)) &&
+             "How did we get template arguments here sans a variable template");
+      if (isa<VarTemplateDecl>(MemberDecl)) {
+        MemberDecl = getVarTemplateSpecialization(
+            *this, cast<VarTemplateDecl>(MemberDecl), TemplateArgs,
+            R.getLookupNameInfo(), TemplateKWLoc);
+        if (!MemberDecl)
+          return ExprError();
+      }
+      return BuildDeclarationNameExpr(SS, R.getLookupNameInfo(), MemberDecl,
+                                      FoundDecl, TemplateArgs);
+    }
     SourceLocation Loc = R.getNameLoc();
     if (SS.getRange().isValid())
       Loc = SS.getRange().getBegin();
@@ -1125,6 +1168,15 @@ Sema::BuildMemberReferenceExpr(Expr *BaseExpr, QualType BaseExprType,
                            TemplateKWLoc, Enum, FoundDecl, MemberNameInfo,
                            Enum->getType(), VK_RValue, OK_Ordinary);
   }
+  if (VarTemplateDecl *VarTempl = dyn_cast<VarTemplateDecl>(MemberDecl)) {
+    if (VarDecl *Var = getVarTemplateSpecialization(
+            *this, VarTempl, TemplateArgs, MemberNameInfo, TemplateKWLoc))
+      return BuildMemberExpr(*this, Context, BaseExpr, IsArrow, OpLoc, SS,
+                             TemplateKWLoc, Var, FoundDecl, MemberNameInfo,
+                             Var->getType().getNonReferenceType(), VK_LValue,
+                             OK_Ordinary);
+    return ExprError();
+  }
 
   // We found something that we didn't expect. Complain.
   if (isa<TypeDecl>(MemberDecl))
@@ -1324,7 +1376,9 @@ static ExprResult LookupMemberExpr(Sema &S, LookupResult &R,
           D = CAT->getClassInterface();
         ClassDeclared = cast<ObjCInterfaceDecl>(D);
       } else {
-        if (IsArrow && IDecl->FindPropertyDeclaration(Member)) {
+        if (IsArrow &&
+            IDecl->FindPropertyDeclaration(
+                Member, ObjCPropertyQueryKind::OBJC_PR_query_instance)) {
           S.Diag(MemberLoc, diag::err_property_found_suggest)
               << Member << BaseExpr.get()->getType()
               << FixItHint::CreateReplacement(OpLoc, ".");
@@ -1731,9 +1785,20 @@ BuildFieldReferenceExpr(Sema &S, Expr *BaseExpr, bool IsArrow,
                                   FoundDecl, Field);
   if (Base.isInvalid())
     return ExprError();
-  return BuildMemberExpr(S, S.Context, Base.get(), IsArrow, OpLoc, SS,
-                         /*TemplateKWLoc=*/SourceLocation(), Field, FoundDecl,
-                         MemberNameInfo, MemberType, VK, OK);
+  MemberExpr *ME =
+      BuildMemberExpr(S, S.Context, Base.get(), IsArrow, OpLoc, SS,
+                      /*TemplateKWLoc=*/SourceLocation(), Field, FoundDecl,
+                      MemberNameInfo, MemberType, VK, OK);
+
+  // Build a reference to a private copy for non-static data members in
+  // non-static member functions, privatized by OpenMP constructs.
+  if (S.getLangOpts().OpenMP && IsArrow &&
+      !S.CurContext->isDependentContext() &&
+      isa<CXXThisExpr>(Base.get()->IgnoreParenImpCasts())) {
+    if (auto *PrivateCopy = S.IsOpenMPCapturedDecl(Field))
+      return S.getOpenMPCapturedExpr(PrivateCopy, VK, OK, OpLoc);
+  }
+  return ME;
 }
 
 /// Builds an implicit member access expression.  The current context
diff --git a/contrib/llvm/tools/clang/lib/Sema/SemaExprObjC.cpp b/contrib/llvm/tools/clang/lib/Sema/SemaExprObjC.cpp
index c1fb906a5b19..8f0d4ff69576 100644
--- a/contrib/llvm/tools/clang/lib/Sema/SemaExprObjC.cpp
+++ b/contrib/llvm/tools/clang/lib/Sema/SemaExprObjC.cpp
@@ -1035,7 +1035,6 @@ ExprResult Sema::BuildObjCDictionaryLiteral(SourceRange SR,
     
     HasPackExpansions = true;
   }
-
   
   QualType Ty
     = Context.getObjCObjectPointerType(
@@ -1778,7 +1777,8 @@ HandleExprPropertyRefExpr(const ObjCObjectPointerType *OPT,
                           MemberName, BaseRange))
     return ExprError();
  
-  if (ObjCPropertyDecl *PD = IFace->FindPropertyDeclaration(Member)) {
+  if (ObjCPropertyDecl *PD = IFace->FindPropertyDeclaration(
+          Member, ObjCPropertyQueryKind::OBJC_PR_query_instance)) {
     // Check whether we can reference this property.
     if (DiagnoseUseOfDecl(PD, MemberLoc))
       return ExprError();
@@ -1793,7 +1793,8 @@ HandleExprPropertyRefExpr(const ObjCObjectPointerType *OPT,
   }
   // Check protocols on qualified interfaces.
   for (const auto *I : OPT->quals())
-    if (ObjCPropertyDecl *PD = I->FindPropertyDeclaration(Member)) {
+    if (ObjCPropertyDecl *PD = I->FindPropertyDeclaration(
+            Member, ObjCPropertyQueryKind::OBJC_PR_query_instance)) {
       // Check whether we can reference this property.
       if (DiagnoseUseOfDecl(PD, MemberLoc))
         return ExprError();
@@ -1816,7 +1817,7 @@ HandleExprPropertyRefExpr(const ObjCObjectPointerType *OPT,
   Selector Sel = PP.getSelectorTable().getNullarySelector(Member);
   ObjCMethodDecl *Getter = IFace->lookupInstanceMethod(Sel);
   
-  // May be founf in property's qualified list.
+  // May be found in property's qualified list.
   if (!Getter)
     Getter = LookupMethodInQualifiedType(Sel, OPT, true);
 
@@ -1836,7 +1837,7 @@ HandleExprPropertyRefExpr(const ObjCObjectPointerType *OPT,
                                            PP.getSelectorTable(), Member);
   ObjCMethodDecl *Setter = IFace->lookupInstanceMethod(SetterSel);
       
-  // May be founf in property's qualified list.
+  // May be found in property's qualified list.
   if (!Setter)
     Setter = LookupMethodInQualifiedType(SetterSel, OPT, true);
   
@@ -1852,8 +1853,9 @@ HandleExprPropertyRefExpr(const ObjCObjectPointerType *OPT,
   // Special warning if member name used in a property-dot for a setter accessor
   // does not use a property with same name; e.g. obj.X = ... for a property with
   // name 'x'.
-  if (Setter && Setter->isImplicit() && Setter->isPropertyAccessor()
-      && !IFace->FindPropertyDeclaration(Member)) {
+  if (Setter && Setter->isImplicit() && Setter->isPropertyAccessor() &&
+      !IFace->FindPropertyDeclaration(
+          Member, ObjCPropertyQueryKind::OBJC_PR_query_instance)) {
       if (const ObjCPropertyDecl *PDecl = Setter->findPropertyDecl()) {
         // Do not warn if user is using property-dot syntax to make call to
         // user named setter.
@@ -1883,12 +1885,29 @@ HandleExprPropertyRefExpr(const ObjCObjectPointerType *OPT,
                       LookupOrdinaryName, nullptr, nullptr,
                       llvm::make_unique<DeclFilterCCC<ObjCPropertyDecl>>(),
                       CTK_ErrorRecovery, IFace, false, OPT)) {
-    diagnoseTypo(Corrected, PDiag(diag::err_property_not_found_suggest)
-                              << MemberName << QualType(OPT, 0));
     DeclarationName TypoResult = Corrected.getCorrection();
-    return HandleExprPropertyRefExpr(OPT, BaseExpr, OpLoc,
-                                     TypoResult, MemberLoc,
-                                     SuperLoc, SuperType, Super);
+    if (TypoResult.isIdentifier() &&
+        TypoResult.getAsIdentifierInfo() == Member) {
+      // There is no need to try the correction if it is the same.
+      NamedDecl *ChosenDecl =
+        Corrected.isKeyword() ? nullptr : Corrected.getFoundDecl();
+      if (ChosenDecl && isa<ObjCPropertyDecl>(ChosenDecl))
+        if (cast<ObjCPropertyDecl>(ChosenDecl)->isClassProperty()) {
+          // This is a class property, we should not use the instance to
+          // access it.
+          Diag(MemberLoc, diag::err_class_property_found) << MemberName
+          << OPT->getInterfaceDecl()->getName()
+          << FixItHint::CreateReplacement(BaseExpr->getSourceRange(),
+                                          OPT->getInterfaceDecl()->getName());
+          return ExprError();
+        }
+    } else {
+      diagnoseTypo(Corrected, PDiag(diag::err_property_not_found_suggest)
+                                << MemberName << QualType(OPT, 0));
+      return HandleExprPropertyRefExpr(OPT, BaseExpr, OpLoc,
+                                       TypoResult, MemberLoc,
+                                       SuperLoc, SuperType, Super);
+    }
   }
   ObjCInterfaceDecl *ClassDeclared;
   if (ObjCIvarDecl *Ivar = 
@@ -1916,8 +1935,6 @@ HandleExprPropertyRefExpr(const ObjCObjectPointerType *OPT,
   return ExprError();
 }
 
-
-
 ExprResult Sema::
 ActOnClassPropertyRefExpr(IdentifierInfo &receiverName,
                           IdentifierInfo &propertyName,
@@ -2032,7 +2049,7 @@ class ObjCInterfaceOrSuperCCC : public CorrectionCandidateCallback {
   }
 };
 
-}
+} // end anonymous namespace
 
 Sema::ObjCMessageKind Sema::getObjCMessageKind(Scope *S,
                                                IdentifierInfo *Name,
@@ -2040,7 +2057,7 @@ Sema::ObjCMessageKind Sema::getObjCMessageKind(Scope *S,
                                                bool IsSuper,
                                                bool HasTrailingDot,
                                                ParsedType &ReceiverType) {
-  ReceiverType = ParsedType();
+  ReceiverType = nullptr;
 
   // If the identifier is "super" and there is no trailing dot, we're
   // messaging super. If the identifier is "super" and there is a
@@ -2183,7 +2200,6 @@ ExprResult Sema::ActOnSuperMessage(Scope *S,
                            LBracLoc, SelectorLocs, RBracLoc, Args);
 }
 
-
 ExprResult Sema::BuildClassMessageImplicit(QualType ReceiverType,
                                            bool isSuperReceiver,
                                            SourceLocation Loc,
@@ -2198,7 +2214,6 @@ ExprResult Sema::BuildClassMessageImplicit(QualType ReceiverType,
                           /*SuperLoc=*/isSuperReceiver ? Loc : SourceLocation(),
                            Sel, Method, Loc, Loc, Loc, Args,
                            /*isImplicit=*/true);
-
 }
 
 static void applyCocoaAPICheck(Sema &S, const ObjCMessageExpr *Msg,
@@ -2465,7 +2480,6 @@ ExprResult Sema::ActOnClassMessage(Scope *S,
   if (ReceiverType.isNull())
     return ExprError();
 
-
   if (!ReceiverTypeInfo)
     ReceiverTypeInfo = Context.getTrivialTypeSourceInfo(ReceiverType, LBracLoc);
 
@@ -2621,29 +2635,28 @@ ExprResult Sema::BuildInstanceMessage(Expr *Receiver,
   if (!Method) {
     // Handle messages to id and __kindof types (where we use the
     // global method pool).
-    // FIXME: The type bound is currently ignored by lookup in the
-    // global pool.
     const ObjCObjectType *typeBound = nullptr;
     bool receiverIsIdLike = ReceiverType->isObjCIdOrObjectKindOfType(Context,
                                                                      typeBound);
     if (receiverIsIdLike || ReceiverType->isBlockPointerType() ||
         (Receiver && Context.isObjCNSObjectType(Receiver->getType()))) {
-      Method = LookupInstanceMethodInGlobalPool(Sel, 
-                                                SourceRange(LBracLoc, RBracLoc),
-                                                receiverIsIdLike);
-      if (!Method)
-        Method = LookupFactoryMethodInGlobalPool(Sel, 
-                                                 SourceRange(LBracLoc,RBracLoc),
-                                                 receiverIsIdLike);
-      if (Method) {
+      SmallVector<ObjCMethodDecl*, 4> Methods;
+      // If we have a type bound, further filter the methods.
+      CollectMultipleMethodsInGlobalPool(Sel, Methods, true/*InstanceFirst*/,
+                                         true/*CheckTheOther*/, typeBound);
+      if (!Methods.empty()) {
+        // We chose the first method as the initial condidate, then try to
+        // select a better one.
+        Method = Methods[0];
+
         if (ObjCMethodDecl *BestMethod =
-              SelectBestMethod(Sel, ArgsIn, Method->isInstanceMethod()))
+            SelectBestMethod(Sel, ArgsIn, Method->isInstanceMethod(), Methods))
           Method = BestMethod;
+
         if (!AreMultipleMethodsInGlobalPool(Sel, Method,
                                             SourceRange(LBracLoc, RBracLoc),
-                                            receiverIsIdLike)) {
-          DiagnoseUseOfDecl(Method, SelLoc);
-        }
+                                            receiverIsIdLike, Methods))
+           DiagnoseUseOfDecl(Method, SelLoc);
       }
     } else if (ReceiverType->isObjCClassOrClassKindOfType() ||
                ReceiverType->isObjCQualifiedClassType()) {
@@ -2681,25 +2694,32 @@ ExprResult Sema::BuildInstanceMessage(Expr *Receiver,
         if (!Method) {
           // If not messaging 'self', look for any factory method named 'Sel'.
           if (!Receiver || !isSelfExpr(Receiver)) {
-            Method = LookupFactoryMethodInGlobalPool(Sel, 
-                                                SourceRange(LBracLoc, RBracLoc));
-            if (!Method) {
-              // If no class (factory) method was found, check if an _instance_
-              // method of the same name exists in the root class only.
-              Method = LookupInstanceMethodInGlobalPool(Sel,
-                                               SourceRange(LBracLoc, RBracLoc));
-              if (Method)
-                  if (const ObjCInterfaceDecl *ID =
-                      dyn_cast<ObjCInterfaceDecl>(Method->getDeclContext())) {
-                    if (ID->getSuperClass())
-                      Diag(SelLoc, diag::warn_root_inst_method_not_found)
-                      << Sel << SourceRange(LBracLoc, RBracLoc);
-                  }
+            // If no class (factory) method was found, check if an _instance_
+            // method of the same name exists in the root class only.
+            SmallVector<ObjCMethodDecl*, 4> Methods;
+            CollectMultipleMethodsInGlobalPool(Sel, Methods,
+                                               false/*InstanceFirst*/,
+                                               true/*CheckTheOther*/);
+            if (!Methods.empty()) {
+              // We chose the first method as the initial condidate, then try
+              // to select a better one.
+              Method = Methods[0];
+
+              // If we find an instance method, emit waring.
+              if (Method->isInstanceMethod()) {
+                if (const ObjCInterfaceDecl *ID =
+                    dyn_cast<ObjCInterfaceDecl>(Method->getDeclContext())) {
+                  if (ID->getSuperClass())
+                    Diag(SelLoc, diag::warn_root_inst_method_not_found)
+                        << Sel << SourceRange(LBracLoc, RBracLoc);
+                }
+              }
+
+             if (ObjCMethodDecl *BestMethod =
+                 SelectBestMethod(Sel, ArgsIn, Method->isInstanceMethod(),
+                                  Methods))
+               Method = BestMethod;
             }
-            if (Method)
-              if (ObjCMethodDecl *BestMethod =
-                  SelectBestMethod(Sel, ArgsIn, Method->isInstanceMethod()))
-                Method = BestMethod;
           }
         }
       }
@@ -2764,15 +2784,24 @@ ExprResult Sema::BuildInstanceMessage(Expr *Receiver,
             // behavior isn't very desirable, however we need it for GCC
             // compatibility. FIXME: should we deviate??
             if (OCIType->qual_empty()) {
-              Method = LookupInstanceMethodInGlobalPool(Sel,
-                                              SourceRange(LBracLoc, RBracLoc));
-              if (Method) {
-                if (auto BestMethod =
-                      SelectBestMethod(Sel, ArgsIn, Method->isInstanceMethod()))
+              SmallVector<ObjCMethodDecl*, 4> Methods;
+              CollectMultipleMethodsInGlobalPool(Sel, Methods,
+                                                 true/*InstanceFirst*/,
+                                                 false/*CheckTheOther*/);
+              if (!Methods.empty()) {
+                // We chose the first method as the initial condidate, then try
+                // to select a better one.
+                Method = Methods[0];
+
+                if (ObjCMethodDecl *BestMethod =
+                    SelectBestMethod(Sel, ArgsIn, Method->isInstanceMethod(),
+                                     Methods))
                   Method = BestMethod;
+
                 AreMultipleMethodsInGlobalPool(Sel, Method,
                                                SourceRange(LBracLoc, RBracLoc),
-                                               true);
+                                               true/*receiverIdOrClass*/,
+                                               Methods);
               }
               if (Method && !forwardClass)
                 Diag(SelLoc, diag::warn_maynot_respond)
@@ -3052,11 +3081,13 @@ enum ARCConversionTypeClass {
   /// struct A*
   ACTC_coreFoundation
 };
+
 static bool isAnyRetainable(ARCConversionTypeClass ACTC) {
   return (ACTC == ACTC_retainable ||
           ACTC == ACTC_coreFoundation ||
           ACTC == ACTC_voidPtr);
 }
+
 static bool isAnyCLike(ARCConversionTypeClass ACTC) {
   return ACTC == ACTC_none ||
          ACTC == ACTC_voidPtr ||
@@ -3328,7 +3359,7 @@ namespace {
       }
     }
   };
-}
+} // end anonymous namespace
 
 bool Sema::isKnownName(StringRef name) {
   if (name.empty())
@@ -3475,6 +3506,8 @@ diagnoseObjCARCConversion(Sema &S, SourceRange castRange,
     return;
 
   QualType castExprType = castExpr->getType();
+  // Defer emitting a diagnostic for bridge-related casts; that will be
+  // handled by CheckObjCBridgeRelatedConversions.
   TypedefNameDecl *TDNDecl = nullptr;
   if ((castACTC == ACTC_coreFoundation &&  exprACTC == ACTC_retainable &&
        ObjCBridgeRelatedAttrFromType(castType, TDNDecl)) ||
@@ -3780,7 +3813,6 @@ void Sema::CheckObjCBridgeRelatedCast(QualType castType, Expr *castExpr) {
     else if (PRE->isImplicitProperty()) {
       if (ObjCMethodDecl *Getter = PRE->getImplicitPropertyGetter())
         SrcType = Getter->getReturnType();
-      
     }
   }
   
@@ -3790,7 +3822,6 @@ void Sema::CheckObjCBridgeRelatedCast(QualType castType, Expr *castExpr) {
     return;
   CheckObjCBridgeRelatedConversions(castExpr->getLocStart(),
                                     castType, SrcType, castExpr);
-  return;
 }
 
 bool Sema::CheckTollFreeBridgeStaticCast(QualType castType, Expr *castExpr,
@@ -3919,16 +3950,16 @@ Sema::CheckObjCBridgeRelatedConversions(SourceLocation Loc,
           << FixItHint::CreateInsertion(SrcExprEndLoc, "]");
         Diag(RelatedClass->getLocStart(), diag::note_declared_at);
         Diag(TDNDecl->getLocStart(), diag::note_declared_at);
-      }
       
-      QualType receiverType = Context.getObjCInterfaceType(RelatedClass);
-      // Argument.
-      Expr *args[] = { SrcExpr };
-      ExprResult msg = BuildClassMessageImplicit(receiverType, false,
+        QualType receiverType = Context.getObjCInterfaceType(RelatedClass);
+        // Argument.
+        Expr *args[] = { SrcExpr };
+        ExprResult msg = BuildClassMessageImplicit(receiverType, false,
                                       ClassMethod->getLocation(),
                                       ClassMethod->getSelector(), ClassMethod,
                                       MultiExprArg(args, 1));
-      SrcExpr = msg.get();
+        SrcExpr = msg.get();
+      }
       return true;
     }
   }
@@ -3962,14 +3993,14 @@ Sema::CheckObjCBridgeRelatedConversions(SourceLocation Loc,
         }
         Diag(RelatedClass->getLocStart(), diag::note_declared_at);
         Diag(TDNDecl->getLocStart(), diag::note_declared_at);
-      }
       
-      ExprResult msg =
-        BuildInstanceMessageImplicit(SrcExpr, SrcType,
-                                     InstanceMethod->getLocation(),
-                                     InstanceMethod->getSelector(),
-                                     InstanceMethod, None);
-      SrcExpr = msg.get();
+        ExprResult msg =
+          BuildInstanceMessageImplicit(SrcExpr, SrcType,
+                                       InstanceMethod->getLocation(),
+                                       InstanceMethod->getSelector(),
+                                       InstanceMethod, None);
+        SrcExpr = msg.get();
+      }
       return true;
     }
   }
@@ -3993,9 +4024,9 @@ Sema::CheckObjCARCConversion(SourceRange castRange, QualType castType,
   ARCConversionTypeClass exprACTC = classifyTypeForARCConversion(castExprType);
   ARCConversionTypeClass castACTC = classifyTypeForARCConversion(effCastType);
   if (exprACTC == castACTC) {
-    // check for viablity and report error if casting an rvalue to a
+    // Check for viability and report error if casting an rvalue to a
     // life-time qualifier.
-    if (Diagnose && castACTC == ACTC_retainable &&
+    if (castACTC == ACTC_retainable &&
         (CCK == CCK_CStyleCast || CCK == CCK_OtherCast) &&
         castType != castExprType) {
       const Type *DT = castType.getTypePtr();
@@ -4011,10 +4042,12 @@ Sema::CheckObjCARCConversion(SourceRange castRange, QualType castType,
         QDT = AT->desugar();
       if (QDT != castType &&
           QDT.getObjCLifetime() !=  Qualifiers::OCL_None) {
-        SourceLocation loc =
-          (castRange.isValid() ? castRange.getBegin() 
-                              : castExpr->getExprLoc());
-        Diag(loc, diag::err_arc_nolifetime_behavior);
+        if (Diagnose) {
+          SourceLocation loc = (castRange.isValid() ? castRange.getBegin() 
+                                                    : castExpr->getExprLoc());
+          Diag(loc, diag::err_arc_nolifetime_behavior);
+        }
+        return ACR_error;
       }
     }
     return ACR_okay;
@@ -4051,7 +4084,7 @@ Sema::CheckObjCARCConversion(SourceRange castRange, QualType castType,
     castExpr = ImplicitCastExpr::Create(Context, castExpr->getType(),
                                         CK_ARCConsumeObject, castExpr,
                                         nullptr, VK_RValue);
-    ExprNeedsCleanups = true;
+    Cleanup.setExprNeedsCleanups(true);
     return ACR_okay;
   }
 
@@ -4062,24 +4095,26 @@ Sema::CheckObjCARCConversion(SourceRange castRange, QualType castType,
       CCK != CCK_ImplicitConversion)
     return ACR_unbridged;
 
-  // Do not issue bridge cast" diagnostic when implicit casting a cstring
-  // to 'NSString *'. Let caller issue a normal mismatched diagnostic with
-  // suitable fix-it.
+  // Issue a diagnostic about a missing @-sign when implicit casting a cstring
+  // to 'NSString *', instead of falling through to report a "bridge cast"
+  // diagnostic.
   if (castACTC == ACTC_retainable && exprACTC == ACTC_none &&
       ConversionToObjCStringLiteralCheck(castType, castExpr, Diagnose))
-    return ACR_okay;
+    return ACR_error;
   
   // Do not issue "bridge cast" diagnostic when implicit casting
   // a retainable object to a CF type parameter belonging to an audited
   // CF API function. Let caller issue a normal type mismatched diagnostic
   // instead.
-  if (Diagnose &&
-      (!DiagnoseCFAudited || exprACTC != ACTC_retainable ||
-        castACTC != ACTC_coreFoundation))
-    if (!(exprACTC == ACTC_voidPtr && castACTC == ACTC_retainable &&
-          (Opc == BO_NE || Opc == BO_EQ)))
+  if ((!DiagnoseCFAudited || exprACTC != ACTC_retainable ||
+       castACTC != ACTC_coreFoundation) &&
+      !(exprACTC == ACTC_voidPtr && castACTC == ACTC_retainable &&
+        (Opc == BO_NE || Opc == BO_EQ))) {
+    if (Diagnose)
       diagnoseObjCARCConversion(*this, castRange, castType, castACTC, castExpr,
                                 castExpr, exprACTC, CCK);
+    return ACR_error;
+  }
   return ACR_okay;
 }
 
@@ -4292,7 +4327,7 @@ ExprResult Sema::BuildObjCBridgedCast(SourceLocation LParenLoc,
                                                    TSInfo, SubExpr);
   
   if (MustConsume) {
-    ExprNeedsCleanups = true;
+    Cleanup.setExprNeedsCleanups(true);
     Result = ImplicitCastExpr::Create(Context, T, CK_ARCConsumeObject, Result, 
                                       nullptr, VK_RValue);
   }
diff --git a/contrib/llvm/tools/clang/lib/Sema/SemaInit.cpp b/contrib/llvm/tools/clang/lib/Sema/SemaInit.cpp
index c3a89463dc69..060ee3eef212 100644
--- a/contrib/llvm/tools/clang/lib/Sema/SemaInit.cpp
+++ b/contrib/llvm/tools/clang/lib/Sema/SemaInit.cpp
@@ -26,6 +26,7 @@
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 #include <map>
+
 using namespace clang;
 
 //===----------------------------------------------------------------------===//
@@ -204,6 +205,8 @@ static void CheckStringInit(Expr *Str, QualType &DeclT, const ArrayType *AT,
 // Semantic checking for initializer lists.
 //===----------------------------------------------------------------------===//
 
+namespace {
+
 /// @brief Semantic checking for initializer lists.
 ///
 /// The InitListChecker class contains a set of routines that each
@@ -231,11 +234,11 @@ static void CheckStringInit(Expr *Str, QualType &DeclT, const ArrayType *AT,
 /// point. CheckDesignatedInitializer() recursively steps into the
 /// designated subobject and manages backing out the recursion to
 /// initialize the subobjects after the one designated.
-namespace {
 class InitListChecker {
   Sema &SemaRef;
   bool hadError;
   bool VerifyOnly; // no diagnostics, no structure building
+  bool TreatUnavailableAsInvalid; // Used only in VerifyOnly mode.
   llvm::DenseMap<InitListExpr *, InitListExpr *> SyntacticToSemantic;
   InitListExpr *FullyStructuredList;
 
@@ -280,6 +283,7 @@ class InitListChecker {
                        unsigned &StructuredIndex);
   void CheckStructUnionTypes(const InitializedEntity &Entity,
                              InitListExpr *IList, QualType DeclType,
+                             CXXRecordDecl::base_class_range Bases,
                              RecordDecl::field_iterator Field,
                              bool SubobjectIsDesignatorContext, unsigned &Index,
                              InitListExpr *StructuredList,
@@ -317,7 +321,8 @@ class InitListChecker {
   static ExprResult PerformEmptyInit(Sema &SemaRef,
                                      SourceLocation Loc,
                                      const InitializedEntity &Entity,
-                                     bool VerifyOnly);
+                                     bool VerifyOnly,
+                                     bool TreatUnavailableAsInvalid);
 
   // Explanation on the "FillWithNoInit" mode:
   //
@@ -338,6 +343,10 @@ class InitListChecker {
   // in the InitListExpr, the "holes" in Case#1 are filled not with empty
   // initializers but with special "NoInitExpr" place holders, which tells the
   // CodeGen not to generate any initializers for these parts.
+  void FillInEmptyInitForBase(unsigned Init, const CXXBaseSpecifier &Base,
+                              const InitializedEntity &ParentEntity,
+                              InitListExpr *ILE, bool &RequiresSecondPass,
+                              bool FillWithNoInit);
   void FillInEmptyInitForField(unsigned Init, FieldDecl *Field,
                                const InitializedEntity &ParentEntity,
                                InitListExpr *ILE, bool &RequiresSecondPass,
@@ -353,19 +362,22 @@ class InitListChecker {
 
 public:
   InitListChecker(Sema &S, const InitializedEntity &Entity,
-                  InitListExpr *IL, QualType &T, bool VerifyOnly);
+                  InitListExpr *IL, QualType &T, bool VerifyOnly,
+                  bool TreatUnavailableAsInvalid);
   bool HadError() { return hadError; }
 
   // @brief Retrieves the fully-structured initializer list used for
   // semantic analysis and code generation.
   InitListExpr *getFullyStructuredList() const { return FullyStructuredList; }
 };
+
 } // end anonymous namespace
 
 ExprResult InitListChecker::PerformEmptyInit(Sema &SemaRef,
                                              SourceLocation Loc,
                                              const InitializedEntity &Entity,
-                                             bool VerifyOnly) {
+                                             bool VerifyOnly,
+                                             bool TreatUnavailableAsInvalid) {
   InitializationKind Kind = InitializationKind::CreateValue(Loc, Loc, Loc,
                                                             true);
   MultiExprArg SubInit;
@@ -419,8 +431,6 @@ ExprResult InitListChecker::PerformEmptyInit(Sema &SemaRef,
     if (CtorDecl->getMinRequiredArguments() == 0 &&
         CtorDecl->isExplicit() && R->getDeclName() &&
         SemaRef.SourceMgr.isInSystemHeader(CtorDecl->getLocation())) {
-
-
       bool IsInStd = false;
       for (NamespaceDecl *ND = dyn_cast<NamespaceDecl>(R->getDeclContext());
            ND && !IsInStd; ND = dyn_cast<NamespaceDecl>(ND->getParent())) {
@@ -437,7 +447,8 @@ ExprResult InitListChecker::PerformEmptyInit(Sema &SemaRef,
         InitSeq.InitializeFrom(
             SemaRef, Entity,
             InitializationKind::CreateValue(Loc, Loc, Loc, true),
-            MultiExprArg(), /*TopLevelOfInitList=*/false);
+            MultiExprArg(), /*TopLevelOfInitList=*/false,
+            TreatUnavailableAsInvalid);
         // Emit a warning for this.  System header warnings aren't shown
         // by default, but people working on system headers should see it.
         if (!VerifyOnly) {
@@ -474,10 +485,43 @@ void InitListChecker::CheckEmptyInitializable(const InitializedEntity &Entity,
                                               SourceLocation Loc) {
   assert(VerifyOnly &&
          "CheckEmptyInitializable is only inteded for verification mode.");
-  if (PerformEmptyInit(SemaRef, Loc, Entity, /*VerifyOnly*/true).isInvalid())
+  if (PerformEmptyInit(SemaRef, Loc, Entity, /*VerifyOnly*/true,
+                       TreatUnavailableAsInvalid).isInvalid())
     hadError = true;
 }
 
+void InitListChecker::FillInEmptyInitForBase(
+    unsigned Init, const CXXBaseSpecifier &Base,
+    const InitializedEntity &ParentEntity, InitListExpr *ILE,
+    bool &RequiresSecondPass, bool FillWithNoInit) {
+  assert(Init < ILE->getNumInits() && "should have been expanded");
+
+  InitializedEntity BaseEntity = InitializedEntity::InitializeBase(
+      SemaRef.Context, &Base, false, &ParentEntity);
+
+  if (!ILE->getInit(Init)) {
+    ExprResult BaseInit =
+        FillWithNoInit ? new (SemaRef.Context) NoInitExpr(Base.getType())
+                       : PerformEmptyInit(SemaRef, ILE->getLocEnd(), BaseEntity,
+                                          /*VerifyOnly*/ false,
+                                          TreatUnavailableAsInvalid);
+    if (BaseInit.isInvalid()) {
+      hadError = true;
+      return;
+    }
+
+    ILE->setInit(Init, BaseInit.getAs<Expr>());
+  } else if (InitListExpr *InnerILE =
+                 dyn_cast<InitListExpr>(ILE->getInit(Init))) {
+    FillInEmptyInitializations(BaseEntity, InnerILE,
+                               RequiresSecondPass, FillWithNoInit);
+  } else if (DesignatedInitUpdateExpr *InnerDIUE =
+               dyn_cast<DesignatedInitUpdateExpr>(ILE->getInit(Init))) {
+    FillInEmptyInitializations(BaseEntity, InnerDIUE->getUpdater(),
+                               RequiresSecondPass, /*FillWithNoInit =*/true);
+  }
+}
+
 void InitListChecker::FillInEmptyInitForField(unsigned Init, FieldDecl *Field,
                                         const InitializedEntity &ParentEntity,
                                               InitListExpr *ILE,
@@ -535,7 +579,8 @@ void InitListChecker::FillInEmptyInitForField(unsigned Init, FieldDecl *Field,
     }
 
     ExprResult MemberInit = PerformEmptyInit(SemaRef, Loc, MemberEntity,
-                                             /*VerifyOnly*/false);
+                                             /*VerifyOnly*/false,
+                                             TreatUnavailableAsInvalid);
     if (MemberInit.isInvalid()) {
       hadError = true;
       return;
@@ -592,14 +637,25 @@ InitListChecker::FillInEmptyInitializations(const InitializedEntity &Entity,
       // The fields beyond ILE->getNumInits() are default initialized, so in
       // order to leave them uninitialized, the ILE is expanded and the extra
       // fields are then filled with NoInitExpr.
-      unsigned NumFields = 0;
-      for (auto *Field : RDecl->fields())
-        if (!Field->isUnnamedBitfield())
-          ++NumFields;
-      if (ILE->getNumInits() < NumFields)
-        ILE->resizeInits(SemaRef.Context, NumFields);
+      unsigned NumElems = numStructUnionElements(ILE->getType());
+      if (RDecl->hasFlexibleArrayMember())
+        ++NumElems;
+      if (ILE->getNumInits() < NumElems)
+        ILE->resizeInits(SemaRef.Context, NumElems);
 
       unsigned Init = 0;
+
+      if (auto *CXXRD = dyn_cast<CXXRecordDecl>(RDecl)) {
+        for (auto &Base : CXXRD->bases()) {
+          if (hadError)
+            return;
+
+          FillInEmptyInitForBase(Init, Base, Entity, ILE, RequiresSecondPass,
+                                 FillWithNoInit);
+          ++Init;
+        }
+      }
+
       for (auto *Field : RDecl->fields()) {
         if (Field->isUnnamedBitfield())
           continue;
@@ -661,7 +717,8 @@ InitListChecker::FillInEmptyInitializations(const InitializedEntity &Entity,
       else {
         ExprResult ElementInit = PerformEmptyInit(SemaRef, ILE->getLocEnd(),
                                                   ElementEntity,
-                                                  /*VerifyOnly*/false);
+                                                  /*VerifyOnly*/false,
+                                                  TreatUnavailableAsInvalid);
         if (ElementInit.isInvalid()) {
           hadError = true;
           return;
@@ -707,11 +764,12 @@ InitListChecker::FillInEmptyInitializations(const InitializedEntity &Entity,
   }
 }
 
-
 InitListChecker::InitListChecker(Sema &S, const InitializedEntity &Entity,
                                  InitListExpr *IL, QualType &T,
-                                 bool VerifyOnly)
-  : SemaRef(S), VerifyOnly(VerifyOnly) {
+                                 bool VerifyOnly,
+                                 bool TreatUnavailableAsInvalid)
+  : SemaRef(S), VerifyOnly(VerifyOnly),
+    TreatUnavailableAsInvalid(TreatUnavailableAsInvalid) {
   // FIXME: Check that IL isn't already the semantic form of some other
   // InitListExpr. If it is, we'd create a broken AST.
 
@@ -744,6 +802,8 @@ int InitListChecker::numArrayElements(QualType DeclType) {
 int InitListChecker::numStructUnionElements(QualType DeclType) {
   RecordDecl *structDecl = DeclType->getAs<RecordType>()->getDecl();
   int InitializableMembers = 0;
+  if (auto *CXXRD = dyn_cast<CXXRecordDecl>(structDecl))
+    InitializableMembers += CXXRD->getNumBases();
   for (const auto *Field : structDecl->fields())
     if (!Field->isUnnamedBitfield())
       ++InitializableMembers;
@@ -888,7 +948,6 @@ static void warnBracedScalarInit(Sema &S, const InitializedEntity &Entity,
   }
 }
 
-
 /// Check whether the initializer \p IList (that was written with explicit
 /// braces) can be used to initialize an object of type \p T.
 ///
@@ -992,10 +1051,14 @@ void InitListChecker::CheckListElementTypes(const InitializedEntity &Entity,
     assert(DeclType->isAggregateType() &&
            "non-aggregate records should be handed in CheckSubElementType");
     RecordDecl *RD = DeclType->getAs<RecordType>()->getDecl();
-    CheckStructUnionTypes(Entity, IList, DeclType, RD->field_begin(),
-                          SubobjectIsDesignatorContext, Index,
-                          StructuredList, StructuredIndex,
-                          TopLevelObject);
+    auto Bases =
+        CXXRecordDecl::base_class_range(CXXRecordDecl::base_class_iterator(),
+                                        CXXRecordDecl::base_class_iterator());
+    if (auto *CXXRD = dyn_cast<CXXRecordDecl>(RD))
+      Bases = CXXRD->bases();
+    CheckStructUnionTypes(Entity, IList, DeclType, Bases, RD->field_begin(),
+                          SubobjectIsDesignatorContext, Index, StructuredList,
+                          StructuredIndex, TopLevelObject);
   } else if (DeclType->isArrayType()) {
     llvm::APSInt Zero(
                     SemaRef.Context.getTypeSize(SemaRef.Context.getSizeType()),
@@ -1130,8 +1193,8 @@ void InitListChecker::CheckSubElementType(const InitializedEntity &Entity,
     // Fall through for subaggregate initialization.
 
   } else {
-    assert((ElemType->isRecordType() || ElemType->isVectorType()) &&
-           "Unexpected type");
+    assert((ElemType->isRecordType() || ElemType->isVectorType() ||
+            ElemType->isClkEventT()) && "Unexpected type");
 
     // C99 6.7.8p13:
     //
@@ -1220,7 +1283,6 @@ void InitListChecker::CheckComplexType(const InitializedEntity &Entity,
   }
 }
 
-
 void InitListChecker::CheckScalarType(const InitializedEntity &Entity,
                                       InitListExpr *IList, QualType DeclType,
                                       unsigned &Index,
@@ -1672,16 +1734,13 @@ bool InitListChecker::CheckFlexibleArrayInit(const InitializedEntity &Entity,
   return FlexArrayDiag != diag::ext_flexible_array_init;
 }
 
-void InitListChecker::CheckStructUnionTypes(const InitializedEntity &Entity,
-                                            InitListExpr *IList,
-                                            QualType DeclType,
-                                            RecordDecl::field_iterator Field,
-                                            bool SubobjectIsDesignatorContext,
-                                            unsigned &Index,
-                                            InitListExpr *StructuredList,
-                                            unsigned &StructuredIndex,
-                                            bool TopLevelObject) {
-  RecordDecl* structDecl = DeclType->getAs<RecordType>()->getDecl();
+void InitListChecker::CheckStructUnionTypes(
+    const InitializedEntity &Entity, InitListExpr *IList, QualType DeclType,
+    CXXRecordDecl::base_class_range Bases, RecordDecl::field_iterator Field,
+    bool SubobjectIsDesignatorContext, unsigned &Index,
+    InitListExpr *StructuredList, unsigned &StructuredIndex,
+    bool TopLevelObject) {
+  RecordDecl *structDecl = DeclType->getAs<RecordType>()->getDecl();
 
   // If the record is invalid, some of it's members are invalid. To avoid
   // confusion, we forgo checking the intializer for the entire record.
@@ -1726,13 +1785,35 @@ void InitListChecker::CheckStructUnionTypes(const InitializedEntity &Entity,
     return;
   }
 
+  bool InitializedSomething = false;
+
+  // If we have any base classes, they are initialized prior to the fields.
+  for (auto &Base : Bases) {
+    Expr *Init = Index < IList->getNumInits() ? IList->getInit(Index) : nullptr;
+    SourceLocation InitLoc = Init ? Init->getLocStart() : IList->getLocEnd();
+
+    // Designated inits always initialize fields, so if we see one, all
+    // remaining base classes have no explicit initializer.
+    if (Init && isa<DesignatedInitExpr>(Init))
+      Init = nullptr;
+
+    InitializedEntity BaseEntity = InitializedEntity::InitializeBase(
+        SemaRef.Context, &Base, false, &Entity);
+    if (Init) {
+      CheckSubElementType(BaseEntity, IList, Base.getType(), Index,
+                          StructuredList, StructuredIndex);
+      InitializedSomething = true;
+    } else if (VerifyOnly) {
+      CheckEmptyInitializable(BaseEntity, InitLoc);
+    }
+  }
+
   // If structDecl is a forward declaration, this loop won't do
   // anything except look at designated initializers; That's okay,
   // because an error should get printed out elsewhere. It might be
   // worthwhile to skip over the rest of the initializer, though.
   RecordDecl *RD = DeclType->getAs<RecordType>()->getDecl();
   RecordDecl::field_iterator FieldEnd = RD->field_end();
-  bool InitializedSomething = false;
   bool CheckForMissingFields = true;
   while (Index < IList->getNumInits()) {
     Expr *Init = IList->getInit(Index);
@@ -1782,7 +1863,7 @@ void InitListChecker::CheckStructUnionTypes(const InitializedEntity &Entity,
     // Make sure we can use this declaration.
     bool InvalidUse;
     if (VerifyOnly)
-      InvalidUse = !SemaRef.CanUseDecl(*Field);
+      InvalidUse = !SemaRef.CanUseDecl(*Field, TreatUnavailableAsInvalid);
     else
       InvalidUse = SemaRef.DiagnoseUseOfDecl(*Field,
                                           IList->getInit(Index)->getLocStart());
@@ -1895,8 +1976,8 @@ static DesignatedInitExpr *CloneDesignatedInitExpr(Sema &SemaRef,
   SmallVector<Expr*, 4> IndexExprs(NumIndexExprs);
   for (unsigned I = 0; I < NumIndexExprs; ++I)
     IndexExprs[I] = DIE->getSubExpr(I + 1);
-  return DesignatedInitExpr::Create(SemaRef.Context, DIE->designators_begin(),
-                                    DIE->size(), IndexExprs,
+  return DesignatedInitExpr::Create(SemaRef.Context, DIE->designators(),
+                                    IndexExprs,
                                     DIE->getEqualOrColonLoc(),
                                     DIE->usesGNUSyntax(), DIE->getInit());
 }
@@ -1919,7 +2000,7 @@ class FieldInitializerValidatorCCC : public CorrectionCandidateCallback {
   RecordDecl *Record;
 };
 
-}
+} // end anonymous namespace
 
 /// @brief Check the well-formedness of a C99 designated initializer.
 ///
@@ -2146,8 +2227,10 @@ InitListChecker::CheckDesignatedInitializer(const InitializedEntity &Entity,
     for (auto *FI : RT->getDecl()->fields()) {
       if (FI->isUnnamedBitfield())
         continue;
-      if (KnownField == FI)
+      if (declaresSameEntity(KnownField, FI)) {
+        KnownField = FI;
         break;
+      }
       ++FieldIndex;
     }
 
@@ -2160,11 +2243,11 @@ InitListChecker::CheckDesignatedInitializer(const InitializedEntity &Entity,
       FieldIndex = 0;
       if (!VerifyOnly) {
         FieldDecl *CurrentField = StructuredList->getInitializedFieldInUnion();
-        if (CurrentField && CurrentField != *Field) {
+        if (CurrentField && !declaresSameEntity(CurrentField, *Field)) {
           assert(StructuredList->getNumInits() == 1
                  && "A union should never have more than one initializer!");
 
-          // we're about to throw away an initializer, emit warning
+          // We're about to throw away an initializer, emit warning.
           SemaRef.Diag(D->getFieldLoc(),
                        diag::warn_initializer_overrides)
             << D->getSourceRange();
@@ -2186,7 +2269,7 @@ InitListChecker::CheckDesignatedInitializer(const InitializedEntity &Entity,
     // Make sure we can use this declaration.
     bool InvalidUse;
     if (VerifyOnly)
-      InvalidUse = !SemaRef.CanUseDecl(*Field);
+      InvalidUse = !SemaRef.CanUseDecl(*Field, TreatUnavailableAsInvalid);
     else
       InvalidUse = SemaRef.DiagnoseUseOfDecl(*Field, D->getFieldLoc());
     if (InvalidUse) {
@@ -2276,7 +2359,7 @@ InitListChecker::CheckDesignatedInitializer(const InitializedEntity &Entity,
       if (CheckDesignatedInitializer(MemberEntity, IList, DIE, DesigIdx + 1,
                                      FieldType, nullptr, nullptr, Index,
                                      StructuredList, newStructuredIndex,
-                                     true, false))
+                                     FinishSubobjectInit, false))
         return true;
     }
 
@@ -2304,8 +2387,11 @@ InitListChecker::CheckDesignatedInitializer(const InitializedEntity &Entity,
     // Check the remaining fields within this class/struct/union subobject.
     bool prevHadError = hadError;
 
-    CheckStructUnionTypes(Entity, IList, CurrentObjectType, Field, false, Index,
-                          StructuredList, FieldIndex);
+    auto NoBases =
+        CXXRecordDecl::base_class_range(CXXRecordDecl::base_class_iterator(),
+                                        CXXRecordDecl::base_class_iterator());
+    CheckStructUnionTypes(Entity, IList, CurrentObjectType, NoBases, Field,
+                          false, Index, StructuredList, FieldIndex);
     return hadError && !prevHadError;
   }
 
@@ -2467,11 +2553,11 @@ InitListChecker::CheckDesignatedInitializer(const InitializedEntity &Entity,
     Index = OldIndex;
 
     ElementEntity.setElementIndex(ElementIndex);
-    if (CheckDesignatedInitializer(ElementEntity, IList, DIE, DesigIdx + 1,
-                                   ElementType, nullptr, nullptr, Index,
-                                   StructuredList, ElementIndex,
-                                   (DesignatedStartIndex == DesignatedEndIndex),
-                                   false))
+    if (CheckDesignatedInitializer(
+            ElementEntity, IList, DIE, DesigIdx + 1, ElementType, nullptr,
+            nullptr, Index, StructuredList, ElementIndex,
+            FinishSubobjectInit && (DesignatedStartIndex == DesignatedEndIndex),
+            false))
       return true;
 
     // Move to the next index in the array that we'll be initializing.
@@ -2751,7 +2837,7 @@ ExprResult Sema::ActOnDesignatedInitializer(Designation &Desig,
 
   DesignatedInitExpr *DIE
     = DesignatedInitExpr::Create(Context,
-                                 Designators.data(), Designators.size(),
+                                 Designators,
                                  InitExpressions, Loc, GNUSyntax,
                                  Init.getAs<Expr>());
 
@@ -2787,10 +2873,11 @@ InitializedEntity::InitializedEntity(ASTContext &Context, unsigned Index,
 InitializedEntity
 InitializedEntity::InitializeBase(ASTContext &Context,
                                   const CXXBaseSpecifier *Base,
-                                  bool IsInheritedVirtualBase) {
+                                  bool IsInheritedVirtualBase,
+                                  const InitializedEntity *Parent) {
   InitializedEntity Result;
   Result.Kind = EK_Base;
-  Result.Parent = nullptr;
+  Result.Parent = Parent;
   Result.Base = reinterpret_cast<uintptr_t>(Base);
   if (IsInheritedVirtualBase)
     Result.Base |= 0x01;
@@ -2928,7 +3015,7 @@ unsigned InitializedEntity::dumpImpl(raw_ostream &OS) const {
   return Depth + 1;
 }
 
-void InitializedEntity::dump() const {
+LLVM_DUMP_METHOD void InitializedEntity::dump() const {
   dumpImpl(llvm::errs());
 }
 
@@ -3137,13 +3224,9 @@ void InitializationSequence::AddListInitializationStep(QualType T) {
   Steps.push_back(S);
 }
 
-void
-InitializationSequence
-::AddConstructorInitializationStep(CXXConstructorDecl *Constructor,
-                                   AccessSpecifier Access,
-                                   QualType T,
-                                   bool HadMultipleCandidates,
-                                   bool FromInitList, bool AsInitList) {
+void InitializationSequence::AddConstructorInitializationStep(
+    DeclAccessPair FoundDecl, CXXConstructorDecl *Constructor, QualType T,
+    bool HadMultipleCandidates, bool FromInitList, bool AsInitList) {
   Step S;
   S.Kind = FromInitList ? AsInitList ? SK_StdInitializerListConstructorCall
                                      : SK_ConstructorInitializationFromList
@@ -3151,7 +3234,7 @@ InitializationSequence
   S.Type = T;
   S.Function.HadMultipleCandidates = HadMultipleCandidates;
   S.Function.Function = Constructor;
-  S.Function.FoundDecl = DeclAccessPair::make(Constructor, Access);
+  S.Function.FoundDecl = FoundDecl;
   Steps.push_back(S);
 }
 
@@ -3313,7 +3396,8 @@ static void TryListInitialization(Sema &S,
                                   const InitializedEntity &Entity,
                                   const InitializationKind &Kind,
                                   InitListExpr *InitList,
-                                  InitializationSequence &Sequence);
+                                  InitializationSequence &Sequence,
+                                  bool TreatUnavailableAsInvalid);
 
 /// \brief When initializing from init list via constructor, handle
 /// initialization of an object of type std::initializer_list<T>.
@@ -3323,7 +3407,8 @@ static void TryListInitialization(Sema &S,
 static bool TryInitializerListConstruction(Sema &S,
                                            InitListExpr *List,
                                            QualType DestType,
-                                           InitializationSequence &Sequence) {
+                                           InitializationSequence &Sequence,
+                                           bool TreatUnavailableAsInvalid) {
   QualType E;
   if (!S.isStdInitializerList(DestType, &E))
     return false;
@@ -3342,7 +3427,8 @@ static bool TryInitializerListConstruction(Sema &S,
       InitializedEntity::InitializeTemporary(ArrayType);
   InitializationKind Kind =
       InitializationKind::CreateDirectList(List->getExprLoc());
-  TryListInitialization(S, HiddenArray, Kind, List, Sequence);
+  TryListInitialization(S, HiddenArray, Kind, List, Sequence,
+                        TreatUnavailableAsInvalid);
   if (Sequence)
     Sequence.AddStdInitializerListConstructionStep(DestType);
   return true;
@@ -3359,18 +3445,13 @@ ResolveConstructorOverload(Sema &S, SourceLocation DeclLoc,
   CandidateSet.clear();
 
   for (NamedDecl *D : Ctors) {
-    DeclAccessPair FoundDecl = DeclAccessPair::make(D, D->getAccess());
-    bool SuppressUserConversions = false;
+    auto Info = getConstructorInfo(D);
+    if (!Info.Constructor)
+      continue;
 
-    // Find the constructor (which may be a template).
-    CXXConstructorDecl *Constructor = nullptr;
-    FunctionTemplateDecl *ConstructorTmpl = dyn_cast<FunctionTemplateDecl>(D);
-    if (ConstructorTmpl)
-      Constructor = cast<CXXConstructorDecl>(
-                                           ConstructorTmpl->getTemplatedDecl());
-    else {
-      Constructor = cast<CXXConstructorDecl>(D);
+    bool SuppressUserConversions = false;
 
+    if (!Info.ConstructorTmpl) {
       // C++11 [over.best.ics]p4:
       //   ... and the constructor or user-defined conversion function is a
       //   candidate by
@@ -3387,15 +3468,15 @@ ResolveConstructorOverload(Sema &S, SourceLocation DeclLoc,
       //     parameter of a constructor of X.
       if ((CopyInitializing ||
            (IsListInit && Args.size() == 1 && isa<InitListExpr>(Args[0]))) &&
-          Constructor->isCopyOrMoveConstructor())
+          Info.Constructor->isCopyOrMoveConstructor())
         SuppressUserConversions = true;
     }
 
-    if (!Constructor->isInvalidDecl() &&
-        (AllowExplicit || !Constructor->isExplicit()) &&
-        (!OnlyListConstructors || S.isInitListConstructor(Constructor))) {
-      if (ConstructorTmpl)
-        S.AddTemplateOverloadCandidate(ConstructorTmpl, FoundDecl,
+    if (!Info.Constructor->isInvalidDecl() &&
+        (AllowExplicit || !Info.Constructor->isExplicit()) &&
+        (!OnlyListConstructors || S.isInitListConstructor(Info.Constructor))) {
+      if (Info.ConstructorTmpl)
+        S.AddTemplateOverloadCandidate(Info.ConstructorTmpl, Info.FoundDecl,
                                        /*ExplicitArgs*/ nullptr, Args,
                                        CandidateSet, SuppressUserConversions);
       else {
@@ -3407,9 +3488,9 @@ ResolveConstructorOverload(Sema &S, SourceLocation DeclLoc,
         //     are also considered.
         bool AllowExplicitConv = AllowExplicit && !CopyInitializing && 
                                  Args.size() == 1 &&
-                                 Constructor->isCopyOrMoveConstructor();
-        S.AddOverloadCandidate(Constructor, FoundDecl, Args, CandidateSet,
-                               SuppressUserConversions,
+                                 Info.Constructor->isCopyOrMoveConstructor();
+        S.AddOverloadCandidate(Info.Constructor, Info.FoundDecl, Args,
+                               CandidateSet, SuppressUserConversions,
                                /*PartialOverloading=*/false,
                                /*AllowExplicit=*/AllowExplicitConv);
       }
@@ -3517,18 +3598,23 @@ static void TryConstructorInitialization(Sema &S,
   //   If a program calls for the default initialization of an object
   //   of a const-qualified type T, T shall be a class type with a
   //   user-provided default constructor.
+  // C++ core issue 253 proposal:
+  //   If the implicit default constructor initializes all subobjects, no
+  //   initializer should be required.
+  // The 253 proposal is for example needed to process libstdc++ headers in 5.x.
+  CXXConstructorDecl *CtorDecl = cast<CXXConstructorDecl>(Best->Function);
   if (Kind.getKind() == InitializationKind::IK_Default &&
-      Entity.getType().isConstQualified() &&
-      !cast<CXXConstructorDecl>(Best->Function)->isUserProvided()) {
-    if (!maybeRecoverWithZeroInitialization(S, Sequence, Entity))
-      Sequence.SetFailed(InitializationSequence::FK_DefaultInitOfConst);
-    return;
+      Entity.getType().isConstQualified()) {
+    if (!CtorDecl->getParent()->allowConstDefaultInit()) {
+      if (!maybeRecoverWithZeroInitialization(S, Sequence, Entity))
+        Sequence.SetFailed(InitializationSequence::FK_DefaultInitOfConst);
+      return;
+    }
   }
 
   // C++11 [over.match.list]p1:
   //   In copy-list-initialization, if an explicit constructor is chosen, the
   //   initializer is ill-formed.
-  CXXConstructorDecl *CtorDecl = cast<CXXConstructorDecl>(Best->Function);
   if (IsListInit && !Kind.AllowExplicit() && CtorDecl->isExplicit()) {
     Sequence.SetFailed(InitializationSequence::FK_ExplicitConstructor);
     return;
@@ -3538,7 +3624,7 @@ static void TryConstructorInitialization(Sema &S,
   // subsumed by the initialization.
   bool HadMultipleCandidates = (CandidateSet.size() > 1);
   Sequence.AddConstructorInitializationStep(
-      CtorDecl, Best->FoundDecl.getAccess(), DestType, HadMultipleCandidates,
+      Best->FoundDecl, CtorDecl, DestType, HadMultipleCandidates,
       IsListInit | IsInitListCopy, AsInitializerList);
 }
 
@@ -3591,7 +3677,8 @@ static void TryReferenceListInitialization(Sema &S,
                                            const InitializedEntity &Entity,
                                            const InitializationKind &Kind,
                                            InitListExpr *InitList,
-                                           InitializationSequence &Sequence) {
+                                           InitializationSequence &Sequence,
+                                           bool TreatUnavailableAsInvalid) {
   // First, catch C++03 where this isn't possible.
   if (!S.getLangOpts().CPlusPlus11) {
     Sequence.SetFailed(InitializationSequence::FK_ReferenceBindingToInitList);
@@ -3647,7 +3734,8 @@ static void TryReferenceListInitialization(Sema &S,
   // Not reference-related. Create a temporary and bind to that.
   InitializedEntity TempEntity = InitializedEntity::InitializeTemporary(cv1T1);
 
-  TryListInitialization(S, TempEntity, Kind, InitList, Sequence);
+  TryListInitialization(S, TempEntity, Kind, InitList, Sequence,
+                        TreatUnavailableAsInvalid);
   if (Sequence) {
     if (DestType->isRValueReferenceType() ||
         (T1Quals.hasConst() && !T1Quals.hasVolatile()))
@@ -3663,7 +3751,8 @@ static void TryListInitialization(Sema &S,
                                   const InitializedEntity &Entity,
                                   const InitializationKind &Kind,
                                   InitListExpr *InitList,
-                                  InitializationSequence &Sequence) {
+                                  InitializationSequence &Sequence,
+                                  bool TreatUnavailableAsInvalid) {
   QualType DestType = Entity.getType();
 
   // C++ doesn't allow scalar initialization with more than one argument.
@@ -3674,7 +3763,8 @@ static void TryListInitialization(Sema &S,
     return;
   }
   if (DestType->isReferenceType()) {
-    TryReferenceListInitialization(S, Entity, Kind, InitList, Sequence);
+    TryReferenceListInitialization(S, Entity, Kind, InitList, Sequence,
+                                   TreatUnavailableAsInvalid);
     return;
   }
 
@@ -3718,7 +3808,8 @@ static void TryListInitialization(Sema &S,
                                                    InitList->getRBraceLoc())
                 : Kind;
         Sequence.InitializeFrom(S, Entity, SubKind, SubInit,
-                                /*TopLevelOfInitList*/ true);
+                                /*TopLevelOfInitList*/ true,
+                                TreatUnavailableAsInvalid);
 
         // TryStringLiteralInitialization() (in InitializeFrom()) will fail if
         // the element is not an appropriately-typed string literal, in which
@@ -3750,7 +3841,8 @@ static void TryListInitialization(Sema &S,
 
       //   - Otherwise, if T is a specialization of std::initializer_list<E>,
       //     an initializer_list object constructed [...]
-      if (TryInitializerListConstruction(S, InitList, DestType, Sequence))
+      if (TryInitializerListConstruction(S, InitList, DestType, Sequence,
+                                         TreatUnavailableAsInvalid))
         return;
 
       //   - Otherwise, if T is a class type, constructors are considered.
@@ -3763,8 +3855,48 @@ static void TryListInitialization(Sema &S,
   }
 
   if (S.getLangOpts().CPlusPlus && !DestType->isAggregateType() &&
-      InitList->getNumInits() == 1 &&
-      InitList->getInit(0)->getType()->isRecordType()) {
+      InitList->getNumInits() == 1) {
+    Expr *E = InitList->getInit(0);
+
+    //   - Otherwise, if T is an enumeration with a fixed underlying type,
+    //     the initializer-list has a single element v, and the initialization
+    //     is direct-list-initialization, the object is initialized with the
+    //     value T(v); if a narrowing conversion is required to convert v to
+    //     the underlying type of T, the program is ill-formed.
+    auto *ET = DestType->getAs<EnumType>();
+    if (S.getLangOpts().CPlusPlus1z &&
+        Kind.getKind() == InitializationKind::IK_DirectList &&
+        ET && ET->getDecl()->isFixed() &&
+        !S.Context.hasSameUnqualifiedType(E->getType(), DestType) &&
+        (E->getType()->isIntegralOrEnumerationType() ||
+         E->getType()->isFloatingType())) {
+      // There are two ways that T(v) can work when T is an enumeration type.
+      // If there is either an implicit conversion sequence from v to T or
+      // a conversion function that can convert from v to T, then we use that.
+      // Otherwise, if v is of integral, enumeration, or floating-point type,
+      // it is converted to the enumeration type via its underlying type.
+      // There is no overlap possible between these two cases (except when the
+      // source value is already of the destination type), and the first
+      // case is handled by the general case for single-element lists below.
+      ImplicitConversionSequence ICS;
+      ICS.setStandard();
+      ICS.Standard.setAsIdentityConversion();
+      // If E is of a floating-point type, then the conversion is ill-formed
+      // due to narrowing, but go through the motions in order to produce the
+      // right diagnostic.
+      ICS.Standard.Second = E->getType()->isFloatingType()
+                                ? ICK_Floating_Integral
+                                : ICK_Integral_Conversion;
+      ICS.Standard.setFromType(E->getType());
+      ICS.Standard.setToType(0, E->getType());
+      ICS.Standard.setToType(1, DestType);
+      ICS.Standard.setToType(2, DestType);
+      Sequence.AddConversionSequenceStep(ICS, ICS.Standard.getToType(2),
+                                         /*TopLevelOfInitList*/true);
+      Sequence.RewrapReferenceInitList(Entity.getType(), InitList);
+      return;
+    }
+
     //   - Otherwise, if the initializer list has a single element of type E
     //     [...references are handled above...], the object or reference is
     //     initialized from that element (by copy-initialization for
@@ -3778,22 +3910,25 @@ static void TryListInitialization(Sema &S,
     // copy-initialization. This only matters if we might use an 'explicit'
     // conversion operator, so we only need to handle the cases where the source
     // is of record type.
-    InitializationKind SubKind =
-        Kind.getKind() == InitializationKind::IK_DirectList
-            ? InitializationKind::CreateDirect(Kind.getLocation(),
-                                               InitList->getLBraceLoc(),
-                                               InitList->getRBraceLoc())
-            : Kind;
-    Expr *SubInit[1] = { InitList->getInit(0) };
-    Sequence.InitializeFrom(S, Entity, SubKind, SubInit,
-                            /*TopLevelOfInitList*/true);
-    if (Sequence)
-      Sequence.RewrapReferenceInitList(Entity.getType(), InitList);
-    return;
+    if (InitList->getInit(0)->getType()->isRecordType()) {
+      InitializationKind SubKind =
+          Kind.getKind() == InitializationKind::IK_DirectList
+              ? InitializationKind::CreateDirect(Kind.getLocation(),
+                                                 InitList->getLBraceLoc(),
+                                                 InitList->getRBraceLoc())
+              : Kind;
+      Expr *SubInit[1] = { InitList->getInit(0) };
+      Sequence.InitializeFrom(S, Entity, SubKind, SubInit,
+                              /*TopLevelOfInitList*/true,
+                              TreatUnavailableAsInvalid);
+      if (Sequence)
+        Sequence.RewrapReferenceInitList(Entity.getType(), InitList);
+      return;
+    }
   }
 
   InitListChecker CheckInitList(S, Entity, InitList,
-          DestType, /*VerifyOnly=*/true);
+          DestType, /*VerifyOnly=*/true, TreatUnavailableAsInvalid);
   if (CheckInitList.HadError()) {
     Sequence.SetFailed(InitializationSequence::FK_ListInitializationFailed);
     return;
@@ -3847,26 +3982,19 @@ static OverloadingResult TryRefInitWithConversionFunction(Sema &S,
     CXXRecordDecl *T1RecordDecl = cast<CXXRecordDecl>(T1RecordType->getDecl());
 
     for (NamedDecl *D : S.LookupConstructors(T1RecordDecl)) {
-      DeclAccessPair FoundDecl = DeclAccessPair::make(D, D->getAccess());
-
-      // Find the constructor (which may be a template).
-      CXXConstructorDecl *Constructor = nullptr;
-      FunctionTemplateDecl *ConstructorTmpl = dyn_cast<FunctionTemplateDecl>(D);
-      if (ConstructorTmpl)
-        Constructor = cast<CXXConstructorDecl>(
-                                         ConstructorTmpl->getTemplatedDecl());
-      else
-        Constructor = cast<CXXConstructorDecl>(D);
+      auto Info = getConstructorInfo(D);
+      if (!Info.Constructor)
+        continue;
 
-      if (!Constructor->isInvalidDecl() &&
-          Constructor->isConvertingConstructor(AllowExplicit)) {
-        if (ConstructorTmpl)
-          S.AddTemplateOverloadCandidate(ConstructorTmpl, FoundDecl,
+      if (!Info.Constructor->isInvalidDecl() &&
+          Info.Constructor->isConvertingConstructor(AllowExplicit)) {
+        if (Info.ConstructorTmpl)
+          S.AddTemplateOverloadCandidate(Info.ConstructorTmpl, Info.FoundDecl,
                                          /*ExplicitArgs*/ nullptr,
                                          Initializer, CandidateSet,
                                          /*SuppressUserConversions=*/true);
         else
-          S.AddOverloadCandidate(Constructor, FoundDecl,
+          S.AddOverloadCandidate(Info.Constructor, Info.FoundDecl,
                                  Initializer, CandidateSet,
                                  /*SuppressUserConversions=*/true);
       }
@@ -4068,7 +4196,6 @@ convertQualifiersAndValueKindIfNecessary(Sema &S,
   return Initializer->getValueKind();
 }
 
-
 /// \brief Reference initialization without resolving overloaded functions.
 static void TryReferenceInitializationCore(Sema &S,
                                            const InitializedEntity &Entity,
@@ -4303,7 +4430,6 @@ static void TryReferenceInitializationCore(Sema &S,
   }
 
   Sequence.AddReferenceBindingStep(cv1T1, /*bindingTemporary=*/true);
-  return;
 }
 
 /// \brief Attempt character array initialization from a string literal
@@ -4472,27 +4598,19 @@ static void TryUserDefinedConversion(Sema &S,
              Con = CopyOfCon.begin(), ConEnd = CopyOfCon.end();
            Con != ConEnd; ++Con) {
         NamedDecl *D = *Con;
-        DeclAccessPair FoundDecl = DeclAccessPair::make(D, D->getAccess());
-
-        // Find the constructor (which may be a template).
-        CXXConstructorDecl *Constructor = nullptr;
-        FunctionTemplateDecl *ConstructorTmpl
-          = dyn_cast<FunctionTemplateDecl>(D);
-        if (ConstructorTmpl)
-          Constructor = cast<CXXConstructorDecl>(
-                                           ConstructorTmpl->getTemplatedDecl());
-        else
-          Constructor = cast<CXXConstructorDecl>(D);
+        auto Info = getConstructorInfo(D);
+        if (!Info.Constructor)
+          continue;
 
-        if (!Constructor->isInvalidDecl() &&
-            Constructor->isConvertingConstructor(AllowExplicit)) {
-          if (ConstructorTmpl)
-            S.AddTemplateOverloadCandidate(ConstructorTmpl, FoundDecl,
+        if (!Info.Constructor->isInvalidDecl() &&
+            Info.Constructor->isConvertingConstructor(AllowExplicit)) {
+          if (Info.ConstructorTmpl)
+            S.AddTemplateOverloadCandidate(Info.ConstructorTmpl, Info.FoundDecl,
                                            /*ExplicitArgs*/ nullptr,
                                            Initializer, CandidateSet,
                                            /*SuppressUserConversions=*/true);
           else
-            S.AddOverloadCandidate(Constructor, FoundDecl,
+            S.AddOverloadCandidate(Info.Constructor, Info.FoundDecl,
                                    Initializer, CandidateSet,
                                    /*SuppressUserConversions=*/true);
         }
@@ -4689,8 +4807,8 @@ static void checkIndirectCopyRestoreSource(Sema &S, Expr *src) {
   // If isWeakAccess to true, there will be an implicit 
   // load which requires a cleanup.
   if (S.getLangOpts().ObjCAutoRefCount && isWeakAccess)
-    S.ExprNeedsCleanups = true;
-  
+    S.Cleanup.setExprNeedsCleanups(true);
+
   if (iik == IIK_okay) return;
 
   S.Diag(src->getExprLoc(), diag::err_arc_nonlocal_writeback)
@@ -4800,9 +4918,11 @@ InitializationSequence::InitializationSequence(Sema &S,
                                                const InitializedEntity &Entity,
                                                const InitializationKind &Kind,
                                                MultiExprArg Args,
-                                               bool TopLevelOfInitList)
+                                               bool TopLevelOfInitList,
+                                               bool TreatUnavailableAsInvalid)
     : FailedCandidateSet(Kind.getLocation(), OverloadCandidateSet::CSK_Normal) {
-  InitializeFrom(S, Entity, Kind, Args, TopLevelOfInitList);
+  InitializeFrom(S, Entity, Kind, Args, TopLevelOfInitList,
+                 TreatUnavailableAsInvalid);
 }
 
 /// Tries to get a FunctionDecl out of `E`. If it succeeds and we can take the
@@ -4820,7 +4940,8 @@ void InitializationSequence::InitializeFrom(Sema &S,
                                             const InitializedEntity &Entity,
                                             const InitializationKind &Kind,
                                             MultiExprArg Args,
-                                            bool TopLevelOfInitList) {
+                                            bool TopLevelOfInitList,
+                                            bool TreatUnavailableAsInvalid) {
   ASTContext &Context = S.Context;
 
   // Eliminate non-overload placeholder types in the arguments.  We
@@ -4874,7 +4995,8 @@ void InitializationSequence::InitializeFrom(Sema &S,
   //       object is list-initialized (8.5.4).
   if (Kind.getKind() != InitializationKind::IK_Direct) {
     if (InitListExpr *InitList = dyn_cast_or_null<InitListExpr>(Initializer)) {
-      TryListInitialization(S, Entity, Kind, InitList, *this);
+      TryListInitialization(S, Entity, Kind, InitList, *this,
+                            TreatUnavailableAsInvalid);
       return;
     }
   }
@@ -4958,7 +5080,7 @@ void InitializationSequence::InitializeFrom(Sema &S,
              Entity.getKind() == InitializedEntity::EK_Member &&
              Initializer && isa<InitListExpr>(Initializer)) {
       TryListInitialization(S, Entity, Kind, cast<InitListExpr>(Initializer),
-                            *this);
+                            *this, TreatUnavailableAsInvalid);
       AddParenthesizedArrayInitStep(DestType);
     } else if (DestAT->getElementType()->isCharType())
       SetFailed(FK_ArrayNeedsInitListOrStringLiteral);
@@ -5232,38 +5354,33 @@ static void LookupCopyAndMoveConstructors(Sema &S,
   for (SmallVectorImpl<NamedDecl *>::iterator
          CI = Ctors.begin(), CE = Ctors.end(); CI != CE; ++CI) {
     NamedDecl *D = *CI;
-    CXXConstructorDecl *Constructor = nullptr;
+    auto Info = getConstructorInfo(D);
+    if (!Info.Constructor)
+      continue;
 
-    if ((Constructor = dyn_cast<CXXConstructorDecl>(D))) {
-      // Handle copy/moveconstructors, only.
-      if (!Constructor || Constructor->isInvalidDecl() ||
-          !Constructor->isCopyOrMoveConstructor() ||
-          !Constructor->isConvertingConstructor(/*AllowExplicit=*/true))
+    if (!Info.ConstructorTmpl) {
+      // Handle copy/move constructors, only.
+      if (Info.Constructor->isInvalidDecl() ||
+          !Info.Constructor->isCopyOrMoveConstructor() ||
+          !Info.Constructor->isConvertingConstructor(/*AllowExplicit=*/true))
         continue;
 
-      DeclAccessPair FoundDecl
-        = DeclAccessPair::make(Constructor, Constructor->getAccess());
-      S.AddOverloadCandidate(Constructor, FoundDecl,
+      S.AddOverloadCandidate(Info.Constructor, Info.FoundDecl,
                              CurInitExpr, CandidateSet);
       continue;
     }
 
     // Handle constructor templates.
-    FunctionTemplateDecl *ConstructorTmpl = cast<FunctionTemplateDecl>(D);
-    if (ConstructorTmpl->isInvalidDecl())
+    if (Info.ConstructorTmpl->isInvalidDecl())
       continue;
 
-    Constructor = cast<CXXConstructorDecl>(
-                                         ConstructorTmpl->getTemplatedDecl());
-    if (!Constructor->isConvertingConstructor(/*AllowExplicit=*/true))
+    if (!Info.Constructor->isConvertingConstructor(/*AllowExplicit=*/true))
       continue;
 
     // FIXME: Do we need to limit this to copy-constructor-like
     // candidates?
-    DeclAccessPair FoundDecl
-      = DeclAccessPair::make(ConstructorTmpl, ConstructorTmpl->getAccess());
-    S.AddTemplateOverloadCandidate(ConstructorTmpl, FoundDecl, nullptr,
-                                   CurInitExpr, CandidateSet, true);
+    S.AddTemplateOverloadCandidate(Info.ConstructorTmpl, Info.FoundDecl,
+                                   nullptr, CurInitExpr, CandidateSet, true);
   }
 }
 
@@ -5402,8 +5519,8 @@ static ExprResult CopyObject(Sema &S,
   SmallVector<Expr*, 8> ConstructorArgs;
   CurInit.get(); // Ownership transferred into MultiExprArg, below.
 
-  S.CheckConstructorAccess(Loc, Constructor, Entity,
-                           Best->FoundDecl.getAccess(), IsExtraneousCopy);
+  S.CheckConstructorAccess(Loc, Constructor, Best->FoundDecl, Entity,
+                           IsExtraneousCopy);
 
   if (IsExtraneousCopy) {
     // If this is a totally extraneous copy for C++03 reference
@@ -5438,7 +5555,8 @@ static ExprResult CopyObject(Sema &S,
     return ExprError();
 
   // Actually perform the constructor call.
-  CurInit = S.BuildCXXConstructExpr(Loc, T, Constructor, Elidable,
+  CurInit = S.BuildCXXConstructExpr(Loc, T, Best->FoundDecl, Constructor,
+                                    Elidable,
                                     ConstructorArgs,
                                     HadMultipleCandidates,
                                     /*ListInit*/ false,
@@ -5485,7 +5603,7 @@ static void CheckCXX98CompatAccessibleCopy(Sema &S,
   switch (OR) {
   case OR_Success:
     S.CheckConstructorAccess(Loc, cast<CXXConstructorDecl>(Best->Function),
-                             Entity, Best->FoundDecl.getAccess(), Diag);
+                             Best->FoundDecl, Entity, Diag);
     // FIXME: Check default arguments as far as that's possible.
     break;
 
@@ -5611,7 +5729,6 @@ PerformConstructorInitialization(Sema &S,
 
   if (isExplicitTemporary(Entity, Kind, NumArgs)) {
     // An explicitly-constructed temporary, e.g., X(1, 2).
-    S.MarkFunctionReferenced(Loc, Constructor);
     if (S.DiagnoseUseOfDecl(Constructor, Loc))
       return ExprError();
 
@@ -5623,10 +5740,19 @@ PerformConstructorInitialization(Sema &S,
       ? SourceRange(LBraceLoc, RBraceLoc)
       : Kind.getParenRange();
 
+    if (auto *Shadow = dyn_cast<ConstructorUsingShadowDecl>(
+            Step.Function.FoundDecl.getDecl())) {
+      Constructor = S.findInheritingConstructor(Loc, Constructor, Shadow);
+      if (S.DiagnoseUseOfDecl(Constructor, Loc))
+        return ExprError();
+    }
+    S.MarkFunctionReferenced(Loc, Constructor);
+
     CurInit = new (S.Context) CXXTemporaryObjectExpr(
-        S.Context, Constructor, TSInfo, ConstructorArgs, ParenOrBraceRange,
-        HadMultipleCandidates, IsListInitialization,
-        IsStdInitListInitialization, ConstructorInitRequiresZeroInit);
+        S.Context, Constructor, TSInfo,
+        ConstructorArgs, ParenOrBraceRange, HadMultipleCandidates,
+        IsListInitialization, IsStdInitListInitialization,
+        ConstructorInitRequiresZeroInit);
   } else {
     CXXConstructExpr::ConstructionKind ConstructKind =
       CXXConstructExpr::CK_Complete;
@@ -5651,6 +5777,7 @@ PerformConstructorInitialization(Sema &S,
     // unconditionally.
     if (Entity.allowsNRVO())
       CurInit = S.BuildCXXConstructExpr(Loc, Entity.getType(),
+                                        Step.Function.FoundDecl,
                                         Constructor, /*Elidable=*/true,
                                         ConstructorArgs,
                                         HadMultipleCandidates,
@@ -5661,6 +5788,7 @@ PerformConstructorInitialization(Sema &S,
                                         ParenOrBraceRange);
     else
       CurInit = S.BuildCXXConstructExpr(Loc, Entity.getType(),
+                                        Step.Function.FoundDecl,
                                         Constructor,
                                         ConstructorArgs,
                                         HadMultipleCandidates,
@@ -5674,8 +5802,7 @@ PerformConstructorInitialization(Sema &S,
     return ExprError();
 
   // Only check access if all of that succeeded.
-  S.CheckConstructorAccess(Loc, Constructor, Entity,
-                           Step.Function.FoundDecl.getAccess());
+  S.CheckConstructorAccess(Loc, Constructor, Step.Function.FoundDecl, Entity);
   if (S.DiagnoseUseOfDecl(Step.Function.FoundDecl, Loc))
     return ExprError();
 
@@ -5777,6 +5904,11 @@ static const InitializedEntity *getEntityForTemporaryLifetimeExtension(
                                                   FallbackDecl);
 
   case InitializedEntity::EK_Base:
+    // For subobjects, we look at the complete object.
+    if (Entity->getParent())
+      return getEntityForTemporaryLifetimeExtension(Entity->getParent(),
+                                                    Entity);
+    // Fall through.
   case InitializedEntity::EK_Delegating:
     // We can reach this case for aggregate initialization in a constructor:
     //   struct A { int &&r; };
@@ -6042,6 +6174,36 @@ static void CheckMoveOnConstruction(Sema &S, const Expr *InitExpr,
       << FixItHint::CreateRemoval(SourceRange(RParen, RParen));
 }
 
+static void CheckForNullPointerDereference(Sema &S, const Expr *E) {
+  // Check to see if we are dereferencing a null pointer.  If so, this is
+  // undefined behavior, so warn about it.  This only handles the pattern
+  // "*null", which is a very syntactic check.
+  if (const UnaryOperator *UO = dyn_cast<UnaryOperator>(E->IgnoreParenCasts()))
+    if (UO->getOpcode() == UO_Deref &&
+        UO->getSubExpr()->IgnoreParenCasts()->
+        isNullPointerConstant(S.Context, Expr::NPC_ValueDependentIsNotNull)) {
+    S.DiagRuntimeBehavior(UO->getOperatorLoc(), UO,
+                          S.PDiag(diag::warn_binding_null_to_reference)
+                            << UO->getSubExpr()->getSourceRange());
+  }
+}
+
+MaterializeTemporaryExpr *
+Sema::CreateMaterializeTemporaryExpr(QualType T, Expr *Temporary,
+                                     bool BoundToLvalueReference) {
+  auto MTE = new (Context)
+      MaterializeTemporaryExpr(T, Temporary, BoundToLvalueReference);
+
+  // Order an ExprWithCleanups for lifetime marks.
+  //
+  // TODO: It'll be good to have a single place to check the access of the
+  // destructor and generate ExprWithCleanups for various uses. Currently these
+  // are done in both CreateMaterializeTemporaryExpr and MaybeBindToTemporary,
+  // but there may be a chance to merge them.
+  Cleanup.setExprNeedsCleanups(false);
+  return MTE;
+}
+
 ExprResult
 InitializationSequence::Perform(Sema &S,
                                 const InitializedEntity &Entity,
@@ -6294,6 +6456,7 @@ InitializationSequence::Perform(Sema &S,
                                   /*IsInitializerList=*/false,
                                   ExtendingEntity->getDecl());
 
+      CheckForNullPointerDereference(S, CurInit.get());
       break;
 
     case SK_BindReferenceToTemporary: {
@@ -6305,7 +6468,7 @@ InitializationSequence::Perform(Sema &S,
         return ExprError();
 
       // Materialize the temporary into memory.
-      MaterializeTemporaryExpr *MTE = new (S.Context) MaterializeTemporaryExpr(
+      MaterializeTemporaryExpr *MTE = S.CreateMaterializeTemporaryExpr(
           Entity.getType().getNonReferenceType(), CurInit.get(),
           Entity.getType()->isLValueReferenceType());
 
@@ -6325,7 +6488,7 @@ InitializationSequence::Perform(Sema &S,
            MTE->getType()->isObjCLifetimeType()) ||
           (MTE->getStorageDuration() == SD_Automatic &&
            MTE->getType().isDestructedType()))
-        S.ExprNeedsCleanups = true;
+        S.Cleanup.setExprNeedsCleanups(true);
 
       CurInit = MTE;
       break;
@@ -6360,7 +6523,8 @@ InitializationSequence::Perform(Sema &S,
           return ExprError();
 
         // Build an expression that constructs a temporary.
-        CurInit = S.BuildCXXConstructExpr(Loc, Step->Type, Constructor,
+        CurInit = S.BuildCXXConstructExpr(Loc, Step->Type,
+                                          FoundFn, Constructor,
                                           ConstructorArgs,
                                           HadMultipleCandidates,
                                           /*ListInit*/ false,
@@ -6371,8 +6535,8 @@ InitializationSequence::Perform(Sema &S,
         if (CurInit.isInvalid())
           return ExprError();
 
-        S.CheckConstructorAccess(Kind.getLocation(), Constructor, Entity,
-                                 FoundFn.getAccess());
+        S.CheckConstructorAccess(Kind.getLocation(), Constructor, FoundFn,
+                                 Entity);
         if (S.DiagnoseUseOfDecl(FoundFn, Kind.getLocation()))
           return ExprError();
 
@@ -6499,7 +6663,8 @@ InitializationSequence::Perform(Sema &S,
       InitializedEntity TempEntity = InitializedEntity::InitializeTemporary(Ty);
       InitializedEntity InitEntity = IsTemporary ? TempEntity : Entity;
       InitListChecker PerformInitList(S, InitEntity,
-          InitList, Ty, /*VerifyOnly=*/false);
+          InitList, Ty, /*VerifyOnly=*/false,
+          /*TreatUnavailableAsInvalid=*/false);
       if (PerformInitList.HadError())
         return ExprError();
 
@@ -6715,9 +6880,9 @@ InitializationSequence::Perform(Sema &S,
         << CurInit.get()->getSourceRange();
 
       // Materialize the temporary into memory.
-      MaterializeTemporaryExpr *MTE = new (S.Context)
-          MaterializeTemporaryExpr(CurInit.get()->getType(), CurInit.get(),
-                                   /*BoundToLvalueReference=*/false);
+      MaterializeTemporaryExpr *MTE = S.CreateMaterializeTemporaryExpr(
+          CurInit.get()->getType(), CurInit.get(),
+          /*BoundToLvalueReference=*/false);
 
       // Maybe lifetime-extend the array temporary's subobjects to match the
       // entity's lifetime.
@@ -6870,7 +7035,8 @@ static void diagnoseListInit(Sema &S, const InitializedEntity &Entity,
   }
 
   InitListChecker DiagnoseInitList(S, Entity, InitList, DestType,
-                                   /*VerifyOnly=*/false);
+                                   /*VerifyOnly=*/false,
+                                   /*TreatUnavailableAsInvalid=*/false);
   assert(DiagnoseInitList.HadError() &&
          "Inconsistent init list check result.");
 }
@@ -7132,17 +7298,20 @@ bool InitializationSequence::Diagnose(Sema &S,
             isa<CXXConstructorDecl>(S.CurContext)) {
           // This is implicit default initialization of a member or
           // base within a constructor. If no viable function was
-          // found, notify the user that she needs to explicitly
+          // found, notify the user that they need to explicitly
           // initialize this base/member.
           CXXConstructorDecl *Constructor
             = cast<CXXConstructorDecl>(S.CurContext);
+          const CXXRecordDecl *InheritedFrom = nullptr;
+          if (auto Inherited = Constructor->getInheritedConstructor())
+            InheritedFrom = Inherited.getShadowDecl()->getNominatedBaseClass();
           if (Entity.getKind() == InitializedEntity::EK_Base) {
             S.Diag(Kind.getLocation(), diag::err_missing_default_ctor)
-              << (Constructor->getInheritedConstructor() ? 2 :
-                  Constructor->isImplicit() ? 1 : 0)
+              << (InheritedFrom ? 2 : Constructor->isImplicit() ? 1 : 0)
               << S.Context.getTypeDeclType(Constructor->getParent())
               << /*base=*/0
-              << Entity.getType();
+              << Entity.getType()
+              << InheritedFrom;
 
             RecordDecl *BaseDecl
               = Entity.getBaseSpecifier()->getType()->getAs<RecordType>()
@@ -7151,11 +7320,11 @@ bool InitializationSequence::Diagnose(Sema &S,
               << S.Context.getTagDeclType(BaseDecl);
           } else {
             S.Diag(Kind.getLocation(), diag::err_missing_default_ctor)
-              << (Constructor->getInheritedConstructor() ? 2 :
-                  Constructor->isImplicit() ? 1 : 0)
+              << (InheritedFrom ? 2 : Constructor->isImplicit() ? 1 : 0)
               << S.Context.getTypeDeclType(Constructor->getParent())
               << /*member=*/1
-              << Entity.getName();
+              << Entity.getName()
+              << InheritedFrom;
             S.Diag(Entity.getDecl()->getLocation(),
                    diag::note_member_declared_at);
 
diff --git a/contrib/llvm/tools/clang/lib/Sema/SemaLambda.cpp b/contrib/llvm/tools/clang/lib/Sema/SemaLambda.cpp
index 884add26e43a..0b3af262cd61 100644
--- a/contrib/llvm/tools/clang/lib/Sema/SemaLambda.cpp
+++ b/contrib/llvm/tools/clang/lib/Sema/SemaLambda.cpp
@@ -66,17 +66,20 @@ getStackIndexOfNearestEnclosingCaptureReadyLambda(
   // Label failure to capture.
   const Optional<unsigned> NoLambdaIsCaptureReady;
 
+  // Ignore all inner captured regions.
+  unsigned CurScopeIndex = FunctionScopes.size() - 1;
+  while (CurScopeIndex > 0 && isa<clang::sema::CapturedRegionScopeInfo>(
+                                  FunctionScopes[CurScopeIndex]))
+    --CurScopeIndex;
   assert(
-      isa<clang::sema::LambdaScopeInfo>(
-          FunctionScopes[FunctionScopes.size() - 1]) &&
+      isa<clang::sema::LambdaScopeInfo>(FunctionScopes[CurScopeIndex]) &&
       "The function on the top of sema's function-info stack must be a lambda");
-  
+
   // If VarToCapture is null, we are attempting to capture 'this'.
   const bool IsCapturingThis = !VarToCapture;
   const bool IsCapturingVariable = !IsCapturingThis;
 
   // Start with the current lambda at the top of the stack (highest index).
-  unsigned CurScopeIndex = FunctionScopes.size() - 1;
   DeclContext *EnclosingDC =
       cast<sema::LambdaScopeInfo>(FunctionScopes[CurScopeIndex])->CallOperator;
 
@@ -311,18 +314,21 @@ Sema::getCurrentMangleNumberContext(const DeclContext *DC,
   bool IsInNonspecializedTemplate =
     !ActiveTemplateInstantiations.empty() || CurContext->isDependentContext();
   switch (Kind) {
-  case Normal:
+  case Normal: {
     //  -- the bodies of non-exported nonspecialized template functions
     //  -- the bodies of inline functions
     if ((IsInNonspecializedTemplate &&
          !(ManglingContextDecl && isa<ParmVarDecl>(ManglingContextDecl))) ||
         isInInlineFunction(CurContext)) {
       ManglingContextDecl = nullptr;
+      while (auto *CD = dyn_cast<CapturedDecl>(DC))
+        DC = CD->getParent();
       return &Context.getManglingNumberContext(DC);
     }
 
     ManglingContextDecl = nullptr;
     return nullptr;
+  }
 
   case StaticDataMember:
     //  -- the initializers of nonspecialized static members of template classes
@@ -414,11 +420,10 @@ CXXMethodDecl *Sema::startLambdaDefinition(CXXRecordDecl *Class,
   // Add parameters.
   if (!Params.empty()) {
     Method->setParams(Params);
-    CheckParmsForFunctionDef(const_cast<ParmVarDecl **>(Params.begin()),
-                             const_cast<ParmVarDecl **>(Params.end()),
+    CheckParmsForFunctionDef(Params,
                              /*CheckParameterNames=*/false);
-    
-    for (auto P : Method->params())
+
+    for (auto P : Method->parameters())
       P->setOwningFunction(Method);
   }
 
@@ -617,6 +622,8 @@ void Sema::deduceClosureReturnType(CapturingScopeInfo &CSI) {
   assert(CSI.HasImplicitReturnType);
   // If it was ever a placeholder, it had to been deduced to DependentTy.
   assert(CSI.ReturnType.isNull() || !CSI.ReturnType->isUndeducedType()); 
+  assert((!isa<LambdaScopeInfo>(CSI) || !getLangOpts().CPlusPlus14) &&
+         "lambda expressions use auto deduction in C++14 onwards");
 
   // C++ core issue 975:
   //   If a lambda-expression does not include a trailing-return-type,
@@ -807,19 +814,13 @@ void Sema::ActOnStartOfLambdaDefinition(LambdaIntroducer &Intro,
   bool KnownDependent = false;
   LambdaScopeInfo *const LSI = getCurLambda();
   assert(LSI && "LambdaScopeInfo should be on stack!");
-  TemplateParameterList *TemplateParams = 
-            getGenericLambdaTemplateParameterList(LSI, *this);
-
-  if (Scope *TmplScope = CurScope->getTemplateParamParent()) {
-    // Since we have our own TemplateParams, so check if an outer scope
-    // has template params, only then are we in a dependent scope.
-    if (TemplateParams)  {
-      TmplScope = TmplScope->getParent();
-      TmplScope = TmplScope ? TmplScope->getTemplateParamParent() : nullptr;
-    }
-    if (TmplScope && !TmplScope->decl_empty())
-      KnownDependent = true;
-  }
+
+  // The lambda-expression's closure type might be dependent even if its
+  // semantic context isn't, if it appears within a default argument of a
+  // function template.
+  if (CurScope->getTemplateParamParent())
+    KnownDependent = true;
+
   // Determine the signature of the call operator.
   TypeSourceInfo *MethodTyInfo;
   bool ExplicitParams = true;
@@ -922,7 +923,12 @@ void Sema::ActOnStartOfLambdaDefinition(LambdaIntroducer &Intro,
     = Intro.Default == LCD_None? Intro.Range.getBegin() : Intro.DefaultLoc;
   for (auto C = Intro.Captures.begin(), E = Intro.Captures.end(); C != E;
        PrevCaptureLoc = C->Loc, ++C) {
-    if (C->Kind == LCK_This) {
+    if (C->Kind == LCK_This || C->Kind == LCK_StarThis) {
+      if (C->Kind == LCK_StarThis) 
+        Diag(C->Loc, !getLangOpts().CPlusPlus1z
+                             ? diag::ext_star_this_lambda_capture_cxx1z
+                             : diag::warn_cxx14_compat_star_this_lambda_capture);
+
       // C++11 [expr.prim.lambda]p8:
       //   An identifier or this shall not appear more than once in a 
       //   lambda-capture.
@@ -934,10 +940,12 @@ void Sema::ActOnStartOfLambdaDefinition(LambdaIntroducer &Intro,
         continue;
       }
 
-      // C++11 [expr.prim.lambda]p8:
-      //   If a lambda-capture includes a capture-default that is =, the 
-      //   lambda-capture shall not contain this [...].
-      if (Intro.Default == LCD_ByCopy) {
+      // C++1z [expr.prim.lambda]p8:
+      //  If a lambda-capture includes a capture-default that is =, each
+      //  simple-capture of that lambda-capture shall be of the form "&
+      //  identifier" or "* this". [ Note: The form [&,this] is redundant but
+      //  accepted for compatibility with ISO C++14. --end note ]
+      if (Intro.Default == LCD_ByCopy && C->Kind != LCK_StarThis) {
         Diag(C->Loc, diag::err_this_capture_with_copy_default)
             << FixItHint::CreateRemoval(
                 SourceRange(getLocForEndOfToken(PrevCaptureLoc), C->Loc));
@@ -953,7 +961,9 @@ void Sema::ActOnStartOfLambdaDefinition(LambdaIntroducer &Intro,
         continue;
       }
       
-      CheckCXXThisCapture(C->Loc, /*Explicit=*/true);
+      CheckCXXThisCapture(C->Loc, /*Explicit=*/true, /*BuildAndDiagnose*/ true,
+                          /*FunctionScopeIndexToStopAtPtr*/ nullptr,
+                          C->Kind == LCK_StarThis);
       continue;
     }
 
@@ -1144,8 +1154,8 @@ static void addFunctionPointerConversion(Sema &S,
                                          CXXMethodDecl *CallOperator) {
   // This conversion is explicitly disabled if the lambda's function has
   // pass_object_size attributes on any of its parameters.
-  if (std::any_of(CallOperator->param_begin(), CallOperator->param_end(),
-                  std::mem_fn(&ParmVarDecl::hasAttr<PassObjectSizeAttr>)))
+  if (llvm::any_of(CallOperator->parameters(),
+                   std::mem_fn(&ParmVarDecl::hasAttr<PassObjectSizeAttr>)))
     return;
 
   // Add the conversion to function pointer.
@@ -1493,7 +1503,7 @@ ExprResult Sema::BuildLambdaExpr(SourceLocation StartLoc, SourceLocation EndLoc,
   SourceRange IntroducerRange;
   bool ExplicitParams;
   bool ExplicitResultType;
-  bool LambdaExprNeedsCleanups;
+  CleanupInfo LambdaCleanup;
   bool ContainsUnexpandedParameterPack;
   SmallVector<VarDecl *, 4> ArrayIndexVars;
   SmallVector<unsigned, 4> ArrayIndexStarts;
@@ -1503,7 +1513,7 @@ ExprResult Sema::BuildLambdaExpr(SourceLocation StartLoc, SourceLocation EndLoc,
     IntroducerRange = LSI->IntroducerRange;
     ExplicitParams = LSI->ExplicitParams;
     ExplicitResultType = !LSI->HasImplicitReturnType;
-    LambdaExprNeedsCleanups = LSI->ExprNeedsCleanups;
+    LambdaCleanup = LSI->Cleanup;
     ContainsUnexpandedParameterPack = LSI->ContainsUnexpandedParameterPack;
     
     CallOperator->setLexicalDeclContext(Class);
@@ -1527,10 +1537,9 @@ ExprResult Sema::BuildLambdaExpr(SourceLocation StartLoc, SourceLocation EndLoc,
       // Handle 'this' capture.
       if (From.isThisCapture()) {
         Captures.push_back(
-            LambdaCapture(From.getLocation(), IsImplicit, LCK_This));
-        CaptureInits.push_back(new (Context) CXXThisExpr(From.getLocation(),
-                                                         getCurrentThisType(),
-                                                         /*isImplicit=*/true));
+            LambdaCapture(From.getLocation(), IsImplicit,
+                          From.isCopyCapture() ? LCK_StarThis : LCK_This));
+        CaptureInits.push_back(From.getInitExpr());
         ArrayIndexStarts.push_back(ArrayIndexVars.size());
         continue;
       }
@@ -1585,9 +1594,8 @@ ExprResult Sema::BuildLambdaExpr(SourceLocation StartLoc, SourceLocation EndLoc,
     CheckCompletedCXXClass(Class);
   }
 
-  if (LambdaExprNeedsCleanups)
-    ExprNeedsCleanups = true;
-  
+  Cleanup.mergeFrom(LambdaCleanup);
+
   LambdaExpr *Lambda = LambdaExpr::Create(Context, Class, IntroducerRange, 
                                           CaptureDefault, CaptureDefaultLoc,
                                           Captures, 
@@ -1619,6 +1627,7 @@ ExprResult Sema::BuildLambdaExpr(SourceLocation StartLoc, SourceLocation EndLoc,
       ExprEvalContexts.back().Lambdas.push_back(Lambda);
       break;
 
+    case DiscardedStatement:
     case PotentiallyEvaluated:
     case PotentiallyEvaluatedIfUsed:
       break;
@@ -1697,7 +1706,7 @@ ExprResult Sema::BuildBlockForLambdaConversion(SourceLocation CurrentLocation,
   // Create the block literal expression.
   Expr *BuildBlock = new (Context) BlockExpr(Block, Conv->getConversionType());
   ExprCleanupObjects.push_back(Block);
-  ExprNeedsCleanups = true;
+  Cleanup.setExprNeedsCleanups(true);
 
   return BuildBlock;
 }
diff --git a/contrib/llvm/tools/clang/lib/Sema/SemaLookup.cpp b/contrib/llvm/tools/clang/lib/Sema/SemaLookup.cpp
index 45dc2e33da93..e2550824fb69 100644
--- a/contrib/llvm/tools/clang/lib/Sema/SemaLookup.cpp
+++ b/contrib/llvm/tools/clang/lib/Sema/SemaLookup.cpp
@@ -29,7 +29,6 @@
 #include "clang/Lex/ModuleLoader.h"
 #include "clang/Lex/Preprocessor.h"
 #include "clang/Sema/DeclSpec.h"
-#include "clang/Sema/ExternalSemaSource.h"
 #include "clang/Sema/Overload.h"
 #include "clang/Sema/Scope.h"
 #include "clang/Sema/ScopeInfo.h"
@@ -280,6 +279,10 @@ static inline unsigned getIDNS(Sema::LookupNameKind NameKind,
     IDNS = Decl::IDNS_ObjCProtocol;
     break;
 
+  case Sema::LookupOMPReductionName:
+    IDNS = Decl::IDNS_OMPReduction;
+    break;
+
   case Sema::LookupAnyName:
     IDNS = Decl::IDNS_Ordinary | Decl::IDNS_Tag | Decl::IDNS_Member
       | Decl::IDNS_Using | Decl::IDNS_Namespace | Decl::IDNS_ObjCProtocol
@@ -419,6 +422,18 @@ static bool isPreferredLookupResult(Sema &S, Sema::LookupNameKind Kind,
     }
   }
 
+  // VarDecl can have incomplete array types, prefer the one with more complete
+  // array type.
+  if (VarDecl *DVD = dyn_cast<VarDecl>(DUnderlying)) {
+    VarDecl *EVD = cast<VarDecl>(EUnderlying);
+    if (EVD->getType()->isIncompleteType() &&
+        !DVD->getType()->isIncompleteType()) {
+      // Prefer the decl with a more complete type if visible.
+      return S.isVisible(DVD);
+    }
+    return false; // Avoid picking up a newer decl, just because it was newer.
+  }
+
   // For most kinds of declaration, it doesn't really matter which one we pick.
   if (!isa<FunctionDecl>(DUnderlying) && !isa<VarDecl>(DUnderlying)) {
     // If the existing declaration is hidden, prefer the new one. Otherwise,
@@ -432,10 +447,6 @@ static bool isPreferredLookupResult(Sema &S, Sema::LookupNameKind Kind,
     if (Prev == EUnderlying)
       return true;
   return false;
-
-  // If the existing declaration is hidden, prefer the new one. Otherwise,
-  // keep what we've got.
-  return !S.isVisible(Existing);
 }
 
 /// Determine whether \p D can hide a tag declaration.
@@ -669,24 +680,21 @@ static bool LookupBuiltin(Sema &S, LookupResult &R) {
       NameKind == Sema::LookupRedeclarationWithLinkage) {
     IdentifierInfo *II = R.getLookupName().getAsIdentifierInfo();
     if (II) {
-      if (S.getLangOpts().CPlusPlus11 && S.getLangOpts().GNUMode &&
-          II == S.getFloat128Identifier()) {
-        // libstdc++4.7's type_traits expects type __float128 to exist, so
-        // insert a dummy type to make that header build in gnu++11 mode.
-        R.addDecl(S.getASTContext().getFloat128StubType());
-        return true;
-      }
-      if (S.getLangOpts().CPlusPlus && NameKind == Sema::LookupOrdinaryName &&
-          II == S.getASTContext().getMakeIntegerSeqName()) {
-        R.addDecl(S.getASTContext().getMakeIntegerSeqDecl());
-        return true;
+      if (S.getLangOpts().CPlusPlus && NameKind == Sema::LookupOrdinaryName) {
+        if (II == S.getASTContext().getMakeIntegerSeqName()) {
+          R.addDecl(S.getASTContext().getMakeIntegerSeqDecl());
+          return true;
+        } else if (II == S.getASTContext().getTypePackElementName()) {
+          R.addDecl(S.getASTContext().getTypePackElementDecl());
+          return true;
+        }
       }
 
       // If this is a builtin on this (or all) targets, create the decl.
       if (unsigned BuiltinID = II->getBuiltinID()) {
-        // In C++, we don't have any predefined library functions like
-        // 'malloc'. Instead, we'll just error.
-        if (S.getLangOpts().CPlusPlus &&
+        // In C++ and OpenCL (spec v1.2 s6.9.f), we don't have any predefined
+        // library functions like 'malloc'. Instead, we'll just error.
+        if ((S.getLangOpts().CPlusPlus || S.getLangOpts().OpenCL) &&
             S.Context.BuiltinInfo.isPredefinedLibFunction(BuiltinID))
           return false;
 
@@ -734,11 +742,11 @@ void Sema::ForceDeclarationOfImplicitMembers(CXXRecordDecl *Class) {
   if (getLangOpts().CPlusPlus11) {
     // If the move constructor has not yet been declared, do so now.
     if (Class->needsImplicitMoveConstructor())
-      DeclareImplicitMoveConstructor(Class); // might not actually do it
+      DeclareImplicitMoveConstructor(Class);
 
     // If the move assignment operator has not yet been declared, do so now.
     if (Class->needsImplicitMoveAssignment())
-      DeclareImplicitMoveAssignment(Class); // might not actually do it
+      DeclareImplicitMoveAssignment(Class);
   }
 
   // If the destructor has not yet been declared, do so now.
@@ -1074,32 +1082,35 @@ bool Sema::CppLookupName(LookupResult &R, Scope *S) {
 
   for (; S && !isNamespaceOrTranslationUnitScope(S); S = S->getParent()) {
     DeclContext *Ctx = S->getEntity();
-
+    bool SearchNamespaceScope = true;
     // Check whether the IdResolver has anything in this scope.
-    bool Found = false;
     for (; I != IEnd && S->isDeclScope(*I); ++I) {
       if (NamedDecl *ND = R.getAcceptableDecl(*I)) {
-        if (NameKind == LookupRedeclarationWithLinkage) {
+        if (NameKind == LookupRedeclarationWithLinkage &&
+            !(*I)->isTemplateParameter()) {
+          // If it's a template parameter, we still find it, so we can diagnose
+          // the invalid redeclaration.
+
           // Determine whether this (or a previous) declaration is
           // out-of-scope.
           if (!LeftStartingScope && !Initial->isDeclScope(*I))
             LeftStartingScope = true;
 
           // If we found something outside of our starting scope that
-          // does not have linkage, skip it. If it's a template parameter,
-          // we still find it, so we can diagnose the invalid redeclaration.
-          if (LeftStartingScope && !((*I)->hasLinkage()) &&
-              !(*I)->isTemplateParameter()) {
+          // does not have linkage, skip it.
+          if (LeftStartingScope && !((*I)->hasLinkage())) {
             R.setShadowed();
             continue;
           }
+        } else {
+          // We found something in this scope, we should not look at the
+          // namespace scope
+          SearchNamespaceScope = false;
         }
-
-        Found = true;
         R.addDecl(ND);
       }
     }
-    if (Found) {
+    if (!SearchNamespaceScope) {
       R.resolveKind();
       if (S->isClassScope())
         if (CXXRecordDecl *Record = dyn_cast_or_null<CXXRecordDecl>(Ctx))
@@ -1470,6 +1481,35 @@ bool Sema::hasVisibleDefaultArgument(const NamedDecl *D,
                                      Modules);
 }
 
+bool Sema::hasVisibleMemberSpecialization(
+    const NamedDecl *D, llvm::SmallVectorImpl<Module *> *Modules) {
+  assert(isa<CXXRecordDecl>(D->getDeclContext()) &&
+         "not a member specialization");
+  for (auto *Redecl : D->redecls()) {
+    // If the specialization is declared at namespace scope, then it's a member
+    // specialization declaration. If it's lexically inside the class
+    // definition then it was instantiated.
+    //
+    // FIXME: This is a hack. There should be a better way to determine this.
+    // FIXME: What about MS-style explicit specializations declared within a
+    //        class definition?
+    if (Redecl->getLexicalDeclContext()->isFileContext()) {
+      auto *NonConstR = const_cast<NamedDecl*>(cast<NamedDecl>(Redecl));
+
+      if (isVisible(NonConstR))
+        return true;
+
+      if (Modules) {
+        Modules->push_back(getOwningModule(NonConstR));
+        const auto &Merged = Context.getModulesWithMergedDefinition(NonConstR);
+        Modules->insert(Modules->end(), Merged.begin(), Merged.end());
+      }
+    }
+  }
+
+  return false;
+}
+
 /// \brief Determine whether a declaration is visible to name lookup.
 ///
 /// This routine determines whether the declaration D is visible in the current
@@ -1570,19 +1610,58 @@ static NamedDecl *findAcceptableDecl(Sema &SemaRef, NamedDecl *D) {
   assert(!LookupResult::isVisible(SemaRef, D) && "not in slow case");
 
   for (auto RD : D->redecls()) {
-    if (auto ND = dyn_cast<NamedDecl>(RD)) {
-      // FIXME: This is wrong in the case where the previous declaration is not
-      // visible in the same scope as D. This needs to be done much more
-      // carefully.
-      if (LookupResult::isVisible(SemaRef, ND))
-        return ND;
-    }
+    // Don't bother with extra checks if we already know this one isn't visible.
+    if (RD == D)
+      continue;
+
+    auto ND = cast<NamedDecl>(RD);
+    // FIXME: This is wrong in the case where the previous declaration is not
+    // visible in the same scope as D. This needs to be done much more
+    // carefully.
+    if (LookupResult::isVisible(SemaRef, ND))
+      return ND;
   }
 
   return nullptr;
 }
 
+bool Sema::hasVisibleDeclarationSlow(const NamedDecl *D,
+                                     llvm::SmallVectorImpl<Module *> *Modules) {
+  assert(!isVisible(D) && "not in slow case");
+
+  for (auto *Redecl : D->redecls()) {
+    auto *NonConstR = const_cast<NamedDecl*>(cast<NamedDecl>(Redecl));
+    if (isVisible(NonConstR))
+      return true;
+
+    if (Modules) {
+      Modules->push_back(getOwningModule(NonConstR));
+      const auto &Merged = Context.getModulesWithMergedDefinition(NonConstR);
+      Modules->insert(Modules->end(), Merged.begin(), Merged.end());
+    }
+  }
+
+  return false;
+}
+
 NamedDecl *LookupResult::getAcceptableDeclSlow(NamedDecl *D) const {
+  if (auto *ND = dyn_cast<NamespaceDecl>(D)) {
+    // Namespaces are a bit of a special case: we expect there to be a lot of
+    // redeclarations of some namespaces, all declarations of a namespace are
+    // essentially interchangeable, all declarations are found by name lookup
+    // if any is, and namespaces are never looked up during template
+    // instantiation. So we benefit from caching the check in this case, and
+    // it is correct to do so.
+    auto *Key = ND->getCanonicalDecl();
+    if (auto *Acceptable = getSema().VisibleNamespaceCache.lookup(Key))
+      return Acceptable;
+    auto *Acceptable =
+        isVisible(getSema(), Key) ? Key : findAcceptableDecl(getSema(), Key);
+    if (Acceptable)
+      getSema().VisibleNamespaceCache.insert(std::make_pair(Key, Acceptable));
+    return Acceptable;
+  }
+
   return findAcceptableDecl(getSema(), D);
 }
 
@@ -1986,6 +2065,10 @@ bool Sema::LookupQualifiedName(LookupResult &R, DeclContext *LookupCtx,
       BaseCallback = &LookupAnyMember;
       break;
 
+    case LookupOMPReductionName:
+      BaseCallback = &CXXRecordDecl::FindOMPReductionMember;
+      break;
+
     case LookupUsingDeclName:
       // This lookup is for redeclarations only.
 
@@ -2409,7 +2492,7 @@ addAssociatedClassesAndNamespaces(AssociatedLookup &Result,
   // FIXME: That's not correct, we may have added this class only because it
   // was the enclosing class of another class, and in that case we won't have
   // added its base classes yet.
-  if (!Result.Classes.insert(Class).second)
+  if (!Result.Classes.insert(Class))
     return;
 
   // -- If T is a template-id, its associated namespaces and classes are
@@ -2459,7 +2542,7 @@ addAssociatedClassesAndNamespaces(AssociatedLookup &Result,
       if (!BaseType)
         continue;
       CXXRecordDecl *BaseDecl = cast<CXXRecordDecl>(BaseType->getDecl());
-      if (Result.Classes.insert(BaseDecl).second) {
+      if (Result.Classes.insert(BaseDecl)) {
         // Find the associated namespace for this base class.
         DeclContext *BaseCtx = BaseDecl->getDeclContext();
         CollectEnclosingNamespace(Result.Namespaces, BaseCtx);
@@ -2864,42 +2947,38 @@ Sema::SpecialMemberOverloadResult *Sema::LookupSpecialMember(CXXRecordDecl *RD,
   // from an external source and invalidate lookup_result.
   SmallVector<NamedDecl *, 8> Candidates(R.begin(), R.end());
 
-  for (auto *Cand : Candidates) {
-    if (Cand->isInvalidDecl())
+  for (NamedDecl *CandDecl : Candidates) {
+    if (CandDecl->isInvalidDecl())
       continue;
 
-    if (UsingShadowDecl *U = dyn_cast<UsingShadowDecl>(Cand)) {
-      // FIXME: [namespace.udecl]p15 says that we should only consider a
-      // using declaration here if it does not match a declaration in the
-      // derived class. We do not implement this correctly in other cases
-      // either.
-      Cand = U->getTargetDecl();
-
-      if (Cand->isInvalidDecl())
-        continue;
-    }
-
-    if (CXXMethodDecl *M = dyn_cast<CXXMethodDecl>(Cand)) {
+    DeclAccessPair Cand = DeclAccessPair::make(CandDecl, AS_public);
+    auto CtorInfo = getConstructorInfo(Cand);
+    if (CXXMethodDecl *M = dyn_cast<CXXMethodDecl>(Cand->getUnderlyingDecl())) {
       if (SM == CXXCopyAssignment || SM == CXXMoveAssignment)
-        AddMethodCandidate(M, DeclAccessPair::make(M, AS_public), RD, ThisTy,
-                           Classification, llvm::makeArrayRef(&Arg, NumArgs),
-                           OCS, true);
-      else
-        AddOverloadCandidate(M, DeclAccessPair::make(M, AS_public),
+        AddMethodCandidate(M, Cand, RD, ThisTy, Classification,
+                           llvm::makeArrayRef(&Arg, NumArgs), OCS, true);
+      else if (CtorInfo)
+        AddOverloadCandidate(CtorInfo.Constructor, CtorInfo.FoundDecl,
                              llvm::makeArrayRef(&Arg, NumArgs), OCS, true);
+      else
+        AddOverloadCandidate(M, Cand, llvm::makeArrayRef(&Arg, NumArgs), OCS,
+                             true);
     } else if (FunctionTemplateDecl *Tmpl =
-                 dyn_cast<FunctionTemplateDecl>(Cand)) {
+                 dyn_cast<FunctionTemplateDecl>(Cand->getUnderlyingDecl())) {
       if (SM == CXXCopyAssignment || SM == CXXMoveAssignment)
-        AddMethodTemplateCandidate(Tmpl, DeclAccessPair::make(Tmpl, AS_public),
-                                   RD, nullptr, ThisTy, Classification,
-                                   llvm::makeArrayRef(&Arg, NumArgs),
-                                   OCS, true);
+        AddMethodTemplateCandidate(
+            Tmpl, Cand, RD, nullptr, ThisTy, Classification,
+            llvm::makeArrayRef(&Arg, NumArgs), OCS, true);
+      else if (CtorInfo)
+        AddTemplateOverloadCandidate(
+            CtorInfo.ConstructorTmpl, CtorInfo.FoundDecl, nullptr,
+            llvm::makeArrayRef(&Arg, NumArgs), OCS, true);
       else
-        AddTemplateOverloadCandidate(Tmpl, DeclAccessPair::make(Tmpl, AS_public),
-                                     nullptr, llvm::makeArrayRef(&Arg, NumArgs),
-                                     OCS, true);
+        AddTemplateOverloadCandidate(
+            Tmpl, Cand, nullptr, llvm::makeArrayRef(&Arg, NumArgs), OCS, true);
     } else {
-      assert(isa<UsingDecl>(Cand) && "illegal Kind of operator = Decl");
+      assert(isa<UsingDecl>(Cand.getDecl()) &&
+             "illegal Kind of operator = Decl");
     }
   }
 
@@ -3119,7 +3198,7 @@ Sema::LookupLiteralOperator(Scope *S, LookupResult &R,
   if (FoundRaw && FoundTemplate) {
     Diag(R.getNameLoc(), diag::err_ovl_ambiguous_call) << R.getLookupName();
     for (LookupResult::iterator I = R.begin(), E = R.end(); I != E; ++I)
-      NoteOverloadCandidate((*I)->getUnderlyingDecl()->getAsFunction());
+      NoteOverloadCandidate(*I, (*I)->getUnderlyingDecl()->getAsFunction());
     return LOLR_Error;
   }
 
@@ -3984,8 +4063,8 @@ retry_lookup:
 
 void TypoCorrectionConsumer::performQualifiedLookups() {
   unsigned TypoLen = Typo->getName().size();
-  for (auto QR : QualifiedResults) {
-    for (auto NSI : Namespaces) {
+  for (const TypoCorrection &QR : QualifiedResults) {
+    for (const auto &NSI : Namespaces) {
       DeclContext *Ctx = NSI.DeclCtx;
       const Type *NSType = NSI.NameSpecifier->getAsType();
 
@@ -4073,10 +4152,8 @@ TypoCorrectionConsumer::NamespaceSpecifierSet::NamespaceSpecifierSet(
   // Build the list of identifiers that would be used for an absolute
   // (from the global context) NestedNameSpecifier referring to the current
   // context.
-  for (DeclContextList::reverse_iterator C = CurContextChain.rbegin(),
-                                         CEnd = CurContextChain.rend();
-       C != CEnd; ++C) {
-    if (NamespaceDecl *ND = dyn_cast_or_null<NamespaceDecl>(*C))
+  for (DeclContext *C : llvm::reverse(CurContextChain)) {
+    if (auto *ND = dyn_cast_or_null<NamespaceDecl>(C))
       CurContextIdentifiers.push_back(ND->getIdentifier());
   }
 
@@ -4104,13 +4181,11 @@ unsigned
 TypoCorrectionConsumer::NamespaceSpecifierSet::buildNestedNameSpecifier(
     DeclContextList &DeclChain, NestedNameSpecifier *&NNS) {
   unsigned NumSpecifiers = 0;
-  for (DeclContextList::reverse_iterator C = DeclChain.rbegin(),
-                                      CEnd = DeclChain.rend();
-       C != CEnd; ++C) {
-    if (NamespaceDecl *ND = dyn_cast_or_null<NamespaceDecl>(*C)) {
+  for (DeclContext *C : llvm::reverse(DeclChain)) {
+    if (auto *ND = dyn_cast_or_null<NamespaceDecl>(C)) {
       NNS = NestedNameSpecifier::Create(Context, NNS, ND);
       ++NumSpecifiers;
-    } else if (RecordDecl *RD = dyn_cast_or_null<RecordDecl>(*C)) {
+    } else if (auto *RD = dyn_cast_or_null<RecordDecl>(C)) {
       NNS = NestedNameSpecifier::Create(Context, NNS, RD->isTemplateDecl(),
                                         RD->getTypeForDecl());
       ++NumSpecifiers;
@@ -4127,10 +4202,9 @@ void TypoCorrectionConsumer::NamespaceSpecifierSet::addNameSpecifier(
   DeclContextList FullNamespaceDeclChain(NamespaceDeclChain);
 
   // Eliminate common elements from the two DeclContext chains.
-  for (DeclContextList::reverse_iterator C = CurContextChain.rbegin(),
-                                      CEnd = CurContextChain.rend();
-       C != CEnd && !NamespaceDeclChain.empty() &&
-       NamespaceDeclChain.back() == *C; ++C) {
+  for (DeclContext *C : llvm::reverse(CurContextChain)) {
+    if (NamespaceDeclChain.empty() || NamespaceDeclChain.back() != C)
+      break;
     NamespaceDeclChain.pop_back();
   }
 
@@ -4207,7 +4281,8 @@ static void LookupPotentialTypoResult(Sema &SemaRef,
         }
       }
 
-      if (ObjCPropertyDecl *Prop = Class->FindPropertyDeclaration(Name)) {
+      if (ObjCPropertyDecl *Prop = Class->FindPropertyDeclaration(
+              Name, ObjCPropertyQueryKind::OBJC_PR_query_instance)) {
         Res.addDecl(Prop);
         Res.resolveKind();
         return;
@@ -4704,11 +4779,20 @@ TypoExpr *Sema::CorrectTypoDelayed(
     const ObjCObjectPointerType *OPT) {
   assert(CCC && "CorrectTypoDelayed requires a CorrectionCandidateCallback");
 
-  TypoCorrection Empty;
   auto Consumer = makeTypoCorrectionConsumer(
       TypoName, LookupKind, S, SS, std::move(CCC), MemberContext,
       EnteringContext, OPT, Mode == CTK_ErrorRecovery);
 
+  // Give the external sema source a chance to correct the typo.
+  TypoCorrection ExternalTypo;
+  if (ExternalSource && Consumer) {
+    ExternalTypo = ExternalSource->CorrectTypo(
+        TypoName, LookupKind, S, SS, *Consumer->getCorrectionValidator(),
+        MemberContext, EnteringContext, OPT);
+    if (ExternalTypo)
+      Consumer->addCorrection(ExternalTypo);
+  }
+
   if (!Consumer || Consumer->empty())
     return nullptr;
 
@@ -4716,7 +4800,7 @@ TypoExpr *Sema::CorrectTypoDelayed(
   // is not more that about a third of the length of the typo's identifier.
   unsigned ED = Consumer->getBestEditDistance(true);
   IdentifierInfo *Typo = TypoName.getName().getAsIdentifierInfo();
-  if (ED > 0 && Typo->getName().size() / ED < 3)
+  if (!ExternalTypo && ED > 0 && Typo->getName().size() / ED < 3)
     return nullptr;
 
   ExprEvalContexts.back().NumTypos++;
@@ -4852,8 +4936,8 @@ void Sema::diagnoseTypo(const TypoCorrection &Correction,
 static NamedDecl *getDefinitionToImport(NamedDecl *D) {
   if (VarDecl *VD = dyn_cast<VarDecl>(D))
     return VD->getDefinition();
-  if (const FunctionDecl *FD = dyn_cast<FunctionDecl>(D))
-    return FD->isDefined(FD) ? const_cast<FunctionDecl*>(FD) : nullptr;
+  if (FunctionDecl *FD = dyn_cast<FunctionDecl>(D))
+    return FD->getDefinition();
   if (TagDecl *TD = dyn_cast<TagDecl>(D))
     return TD->getDefinition();
   if (ObjCInterfaceDecl *ID = dyn_cast<ObjCInterfaceDecl>(D))
@@ -4866,7 +4950,7 @@ static NamedDecl *getDefinitionToImport(NamedDecl *D) {
 }
 
 void Sema::diagnoseMissingImport(SourceLocation Loc, NamedDecl *Decl,
-                                 bool NeedDefinition, bool Recover) {
+                                 MissingImportKind MIK, bool Recover) {
   assert(!isVisible(Decl) && "missing import for non-hidden decl?");
 
   // Suggest importing a module providing the definition of this entity, if
@@ -4875,8 +4959,6 @@ void Sema::diagnoseMissingImport(SourceLocation Loc, NamedDecl *Decl,
   if (!Def)
     Def = Decl;
 
-  // FIXME: Add a Fix-It that imports the corresponding module or includes
-  // the header.
   Module *Owner = getOwningModule(Decl);
   assert(Owner && "definition of hidden declaration is not in a module");
 
@@ -4885,12 +4967,20 @@ void Sema::diagnoseMissingImport(SourceLocation Loc, NamedDecl *Decl,
   auto Merged = Context.getModulesWithMergedDefinition(Decl);
   OwningModules.insert(OwningModules.end(), Merged.begin(), Merged.end());
 
-  diagnoseMissingImport(Loc, Decl, Decl->getLocation(), OwningModules,
-                        NeedDefinition ? MissingImportKind::Definition
-                                       : MissingImportKind::Declaration,
+  diagnoseMissingImport(Loc, Decl, Decl->getLocation(), OwningModules, MIK,
                         Recover);
 }
 
+/// \brief Get a "quoted.h" or <angled.h> include path to use in a diagnostic
+/// suggesting the addition of a #include of the specified file.
+static std::string getIncludeStringForHeader(Preprocessor &PP,
+                                             const FileEntry *E) {
+  bool IsSystem;
+  auto Path =
+      PP.getHeaderSearchInfo().suggestPathToFileForDiagnostics(E, &IsSystem);
+  return (IsSystem ? '<' : '"') + Path + (IsSystem ? '>' : '"');
+}
+
 void Sema::diagnoseMissingImport(SourceLocation UseLoc, NamedDecl *Decl,
                                  SourceLocation DeclLoc,
                                  ArrayRef<Module *> Modules,
@@ -4911,7 +5001,18 @@ void Sema::diagnoseMissingImport(SourceLocation UseLoc, NamedDecl *Decl,
 
     Diag(UseLoc, diag::err_module_unimported_use_multiple)
       << (int)MIK << Decl << ModuleList;
+  } else if (const FileEntry *E =
+                 PP.getModuleHeaderToIncludeForDiagnostics(UseLoc, DeclLoc)) {
+    // The right way to make the declaration visible is to include a header;
+    // suggest doing so.
+    //
+    // FIXME: Find a smart place to suggest inserting a #include, and add
+    // a FixItHint there.
+    Diag(UseLoc, diag::err_module_unimported_use_header)
+      << (int)MIK << Decl << Modules[0]->getFullModuleName()
+      << getIncludeStringForHeader(PP, E);
   } else {
+    // FIXME: Add a FixItHint that imports the corresponding module.
     Diag(UseLoc, diag::err_module_unimported_use)
       << (int)MIK << Decl << Modules[0]->getFullModuleName();
   }
@@ -4927,6 +5028,12 @@ void Sema::diagnoseMissingImport(SourceLocation UseLoc, NamedDecl *Decl,
   case MissingImportKind::DefaultArgument:
     DiagID = diag::note_default_argument_declared_here;
     break;
+  case MissingImportKind::ExplicitSpecialization:
+    DiagID = diag::note_explicit_specialization_declared_here;
+    break;
+  case MissingImportKind::PartialSpecialization:
+    DiagID = diag::note_partial_specialization_declared_here;
+    break;
   }
   Diag(DeclLoc, DiagID);
 
@@ -4962,7 +5069,7 @@ void Sema::diagnoseTypo(const TypoCorrection &Correction,
     assert(Decl && "import required but no declaration to import");
 
     diagnoseMissingImport(Correction.getCorrectionRange().getBegin(), Decl,
-                          /*NeedDefinition*/ false, ErrorRecovery);
+                          MissingImportKind::Declaration, ErrorRecovery);
     return;
   }
 
diff --git a/contrib/llvm/tools/clang/lib/Sema/SemaObjCProperty.cpp b/contrib/llvm/tools/clang/lib/Sema/SemaObjCProperty.cpp
index 1cb84e448067..5e38751f44a5 100644
--- a/contrib/llvm/tools/clang/lib/Sema/SemaObjCProperty.cpp
+++ b/contrib/llvm/tools/clang/lib/Sema/SemaObjCProperty.cpp
@@ -303,6 +303,8 @@ makePropertyAttributesAsWritten(unsigned Attributes) {
     attributesAsWritten |= ObjCPropertyDecl::OBJC_PR_nonatomic;
   if (Attributes & ObjCDeclSpec::DQ_PR_atomic)
     attributesAsWritten |= ObjCPropertyDecl::OBJC_PR_atomic;
+  if (Attributes & ObjCDeclSpec::DQ_PR_class)
+    attributesAsWritten |= ObjCPropertyDecl::OBJC_PR_class;
   
   return (ObjCPropertyDecl::PropertyAttributeKind)attributesAsWritten;
 }
@@ -334,7 +336,6 @@ static bool LocPropertyAttribute( ASTContext &Context, const char *attrName,
     }
   } while (Tok.isNot(tok::r_paren));
   return false;
-  
 }
 
 /// Check for a mismatch in the atomicity of the given properties.
@@ -431,10 +432,13 @@ Sema::HandlePropertyInClassExtension(Scope *S,
     return nullptr;
   }
 
+  bool isClassProperty = (AttributesAsWritten & ObjCDeclSpec::DQ_PR_class) ||
+                         (Attributes & ObjCDeclSpec::DQ_PR_class);
+
   // Find the property in the extended class's primary class or
   // extensions.
-  ObjCPropertyDecl *PIDecl =
-    CCPrimary->FindPropertyVisibleInPrimaryClass(PropertyId);
+  ObjCPropertyDecl *PIDecl = CCPrimary->FindPropertyVisibleInPrimaryClass(
+      PropertyId, ObjCPropertyDecl::getQueryKind(isClassProperty));
 
   // If we found a property in an extension, complain. 
   if (PIDecl && isa<ObjCCategoryDecl>(PIDecl->getDeclContext())) {
@@ -612,8 +616,11 @@ ObjCPropertyDecl *Sema::CreatePropertyDecl(Scope *S,
                                                      PropertyId, AtLoc, 
                                                      LParenLoc, T, TInfo);
 
-  if (ObjCPropertyDecl *prevDecl =
-        ObjCPropertyDecl::findPropertyDecl(DC, PropertyId)) {
+  bool isClassProperty = (AttributesAsWritten & ObjCDeclSpec::DQ_PR_class) ||
+                         (Attributes & ObjCDeclSpec::DQ_PR_class);
+  // Class property and instance property can have the same name.
+  if (ObjCPropertyDecl *prevDecl = ObjCPropertyDecl::findPropertyDecl(
+          DC, PropertyId, ObjCPropertyDecl::getQueryKind(isClassProperty))) {
     Diag(PDecl->getLocation(), diag::err_duplicate_property);
     Diag(prevDecl->getLocation(), diag::note_property_declare);
     PDecl->setInvalidDecl();
@@ -691,6 +698,9 @@ ObjCPropertyDecl *Sema::CreatePropertyDecl(Scope *S,
   if (Attributes & ObjCDeclSpec::DQ_PR_null_resettable)
     PDecl->setPropertyAttributes(ObjCPropertyDecl::OBJC_PR_null_resettable);
 
+ if (Attributes & ObjCDeclSpec::DQ_PR_class)
+    PDecl->setPropertyAttributes(ObjCPropertyDecl::OBJC_PR_class);
+
   return PDecl;
 }
 
@@ -794,7 +804,6 @@ static void setImpliedPropertyAttributeForReadOnlyProperty(
     property->setPropertyAttributes(ObjCPropertyDecl::OBJC_PR_strong);
   else if (ivarLifetime == Qualifiers::OCL_Weak)
     property->setPropertyAttributes(ObjCPropertyDecl::OBJC_PR_weak);
-  return;
 }
 
 /// DiagnosePropertyMismatchDeclInProtocols - diagnose properties declared
@@ -847,7 +856,8 @@ DiagnosePropertyMismatchDeclInProtocols(Sema &S, SourceLocation AtLoc,
 }
 
 /// Determine whether any storage attributes were written on the property.
-static bool hasWrittenStorageAttribute(ObjCPropertyDecl *Prop) {
+static bool hasWrittenStorageAttribute(ObjCPropertyDecl *Prop,
+                                       ObjCPropertyQueryKind QueryKind) {
   if (Prop->getPropertyAttributesAsWritten() & OwnershipMask) return true;
 
   // If this is a readwrite property in a class extension that refines
@@ -870,8 +880,8 @@ static bool hasWrittenStorageAttribute(ObjCPropertyDecl *Prop) {
 
   // Look through all of the protocols.
   for (const auto *Proto : OrigClass->all_referenced_protocols()) {
-    if (ObjCPropertyDecl *OrigProp =
-          Proto->FindPropertyDeclaration(Prop->getIdentifier()))
+    if (ObjCPropertyDecl *OrigProp = Proto->FindPropertyDeclaration(
+            Prop->getIdentifier(), QueryKind))
       return OrigProp->getPropertyAttributesAsWritten() & OwnershipMask;
   }
 
@@ -888,7 +898,8 @@ Decl *Sema::ActOnPropertyImplDecl(Scope *S,
                                   bool Synthesize,
                                   IdentifierInfo *PropertyId,
                                   IdentifierInfo *PropertyIvar,
-                                  SourceLocation PropertyIvarLoc) {
+                                  SourceLocation PropertyIvarLoc,
+                                  ObjCPropertyQueryKind QueryKind) {
   ObjCContainerDecl *ClassImpDecl =
     dyn_cast<ObjCContainerDecl>(CurContext);
   // Make sure we have a context for the property implementation declaration.
@@ -915,11 +926,15 @@ Decl *Sema::ActOnPropertyImplDecl(Scope *S,
            "ActOnPropertyImplDecl - @implementation without @interface");
 
     // Look for this property declaration in the @implementation's @interface
-    property = IDecl->FindPropertyDeclaration(PropertyId);
+    property = IDecl->FindPropertyDeclaration(PropertyId, QueryKind);
     if (!property) {
       Diag(PropertyLoc, diag::error_bad_property_decl) << IDecl->getDeclName();
       return nullptr;
     }
+    if (property->isClassProperty() && Synthesize) {
+      Diag(PropertyLoc, diag::error_synthesize_on_class_property) << PropertyId;
+      return nullptr;
+    }
     unsigned PIkind = property->getPropertyAttributesAsWritten();
     if ((PIkind & (ObjCPropertyDecl::OBJC_PR_atomic |
                    ObjCPropertyDecl::OBJC_PR_nonatomic) ) == 0) {
@@ -993,7 +1008,7 @@ Decl *Sema::ActOnPropertyImplDecl(Scope *S,
     if (!Category)
       return nullptr;
     // Look for this property declaration in @implementation's category
-    property = Category->FindPropertyDeclaration(PropertyId);
+    property = Category->FindPropertyDeclaration(PropertyId, QueryKind);
     if (!property) {
       Diag(PropertyLoc, diag::error_bad_category_property_decl)
       << Category->getDeclName();
@@ -1105,7 +1120,7 @@ Decl *Sema::ActOnPropertyImplDecl(Scope *S,
 
         // It's an error if we have to do this and the user didn't
         // explicitly write an ownership attribute on the property.
-        if (!hasWrittenStorageAttribute(property) &&
+        if (!hasWrittenStorageAttribute(property, QueryKind) &&
             !(kind & ObjCPropertyDecl::OBJC_PR_strong)) {
           Diag(PropertyDiagLoc,
                diag::err_arc_objc_property_default_assign_on_object);
@@ -1340,7 +1355,7 @@ Decl *Sema::ActOnPropertyImplDecl(Scope *S,
       }
 
     if (ObjCPropertyImplDecl *PPIDecl
-        = IC->FindPropertyImplDecl(PropertyId)) {
+        = IC->FindPropertyImplDecl(PropertyId, QueryKind)) {
       Diag(PropertyLoc, diag::error_property_implemented) << PropertyId;
       Diag(PPIDecl->getLocation(), diag::note_previous_declaration);
       return nullptr;
@@ -1379,7 +1394,7 @@ Decl *Sema::ActOnPropertyImplDecl(Scope *S,
       }
 
     if (ObjCPropertyImplDecl *PPIDecl =
-        CatImplClass->FindPropertyImplDecl(PropertyId)) {
+        CatImplClass->FindPropertyImplDecl(PropertyId, QueryKind)) {
       Diag(PropertyDiagLoc, diag::error_property_implemented) << PropertyId;
       Diag(PPIDecl->getLocation(), diag::note_previous_declaration);
       return nullptr;
@@ -1478,24 +1493,26 @@ bool Sema::DiagnosePropertyAccessorMismatch(ObjCPropertyDecl *property,
   if (!GetterMethod)
     return false;
   QualType GetterType = GetterMethod->getReturnType().getNonReferenceType();
-  QualType PropertyIvarType = property->getType().getNonReferenceType();
-  bool compat = Context.hasSameType(PropertyIvarType, GetterType);
+  QualType PropertyRValueType =
+      property->getType().getNonReferenceType().getAtomicUnqualifiedType();
+  bool compat = Context.hasSameType(PropertyRValueType, GetterType);
   if (!compat) {
     const ObjCObjectPointerType *propertyObjCPtr = nullptr;
     const ObjCObjectPointerType *getterObjCPtr = nullptr;
-    if ((propertyObjCPtr = PropertyIvarType->getAs<ObjCObjectPointerType>()) && 
+    if ((propertyObjCPtr =
+             PropertyRValueType->getAs<ObjCObjectPointerType>()) &&
         (getterObjCPtr = GetterType->getAs<ObjCObjectPointerType>()))
       compat = Context.canAssignObjCInterfaces(getterObjCPtr, propertyObjCPtr);
-    else if (CheckAssignmentConstraints(Loc, GetterType, PropertyIvarType) 
+    else if (CheckAssignmentConstraints(Loc, GetterType, PropertyRValueType)
               != Compatible) {
           Diag(Loc, diag::error_property_accessor_type)
-            << property->getDeclName() << PropertyIvarType
+            << property->getDeclName() << PropertyRValueType
             << GetterMethod->getSelector() << GetterType;
           Diag(GetterMethod->getLocation(), diag::note_declared_at);
           return true;
     } else {
       compat = true;
-      QualType lhsType =Context.getCanonicalType(PropertyIvarType).getUnqualifiedType();
+      QualType lhsType = Context.getCanonicalType(PropertyRValueType);
       QualType rhsType =Context.getCanonicalType(GetterType).getUnqualifiedType();
       if (lhsType != rhsType && lhsType->isArithmeticType())
         compat = false;
@@ -1515,49 +1532,68 @@ bool Sema::DiagnosePropertyAccessorMismatch(ObjCPropertyDecl *property,
 
 /// CollectImmediateProperties - This routine collects all properties in
 /// the class and its conforming protocols; but not those in its super class.
-static void CollectImmediateProperties(ObjCContainerDecl *CDecl,
-                                       ObjCContainerDecl::PropertyMap &PropMap,
-                                       ObjCContainerDecl::PropertyMap &SuperPropMap,
-                                       bool IncludeProtocols = true) {
-
+static void
+CollectImmediateProperties(ObjCContainerDecl *CDecl,
+                           ObjCContainerDecl::PropertyMap &PropMap,
+                           ObjCContainerDecl::PropertyMap &SuperPropMap,
+                           bool CollectClassPropsOnly = false,
+                           bool IncludeProtocols = true) {
   if (ObjCInterfaceDecl *IDecl = dyn_cast<ObjCInterfaceDecl>(CDecl)) {
-    for (auto *Prop : IDecl->properties())
-      PropMap[Prop->getIdentifier()] = Prop;
+    for (auto *Prop : IDecl->properties()) {
+      if (CollectClassPropsOnly && !Prop->isClassProperty())
+        continue;
+      PropMap[std::make_pair(Prop->getIdentifier(), Prop->isClassProperty())] =
+          Prop;
+    }
 
     // Collect the properties from visible extensions.
     for (auto *Ext : IDecl->visible_extensions())
-      CollectImmediateProperties(Ext, PropMap, SuperPropMap, IncludeProtocols);
+      CollectImmediateProperties(Ext, PropMap, SuperPropMap,
+                                 CollectClassPropsOnly, IncludeProtocols);
 
     if (IncludeProtocols) {
       // Scan through class's protocols.
       for (auto *PI : IDecl->all_referenced_protocols())
-        CollectImmediateProperties(PI, PropMap, SuperPropMap);
+        CollectImmediateProperties(PI, PropMap, SuperPropMap,
+                                   CollectClassPropsOnly);
     }
   }
   if (ObjCCategoryDecl *CATDecl = dyn_cast<ObjCCategoryDecl>(CDecl)) {
-    for (auto *Prop : CATDecl->properties())
-      PropMap[Prop->getIdentifier()] = Prop;
+    for (auto *Prop : CATDecl->properties()) {
+      if (CollectClassPropsOnly && !Prop->isClassProperty())
+        continue;
+      PropMap[std::make_pair(Prop->getIdentifier(), Prop->isClassProperty())] =
+          Prop;
+    }
     if (IncludeProtocols) {
       // Scan through class's protocols.
       for (auto *PI : CATDecl->protocols())
-        CollectImmediateProperties(PI, PropMap, SuperPropMap);
+        CollectImmediateProperties(PI, PropMap, SuperPropMap,
+                                   CollectClassPropsOnly);
     }
   }
   else if (ObjCProtocolDecl *PDecl = dyn_cast<ObjCProtocolDecl>(CDecl)) {
     for (auto *Prop : PDecl->properties()) {
-      ObjCPropertyDecl *PropertyFromSuper = SuperPropMap[Prop->getIdentifier()];
+      if (CollectClassPropsOnly && !Prop->isClassProperty())
+        continue;
+      ObjCPropertyDecl *PropertyFromSuper =
+          SuperPropMap[std::make_pair(Prop->getIdentifier(),
+                                      Prop->isClassProperty())];
       // Exclude property for protocols which conform to class's super-class, 
       // as super-class has to implement the property.
       if (!PropertyFromSuper || 
           PropertyFromSuper->getIdentifier() != Prop->getIdentifier()) {
-        ObjCPropertyDecl *&PropEntry = PropMap[Prop->getIdentifier()];
+        ObjCPropertyDecl *&PropEntry =
+            PropMap[std::make_pair(Prop->getIdentifier(),
+                                   Prop->isClassProperty())];
         if (!PropEntry)
           PropEntry = Prop;
       }
     }
-    // scan through protocol's protocols.
+    // Scan through protocol's protocols.
     for (auto *PI : PDecl->protocols())
-      CollectImmediateProperties(PI, PropMap, SuperPropMap);
+      CollectImmediateProperties(PI, PropMap, SuperPropMap,
+                                 CollectClassPropsOnly);
   }
 }
 
@@ -1590,7 +1626,7 @@ Sema::IvarBacksCurrentMethodAccessor(ObjCInterfaceDecl *IFace,
   
   // look up a property declaration whose one of its accessors is implemented
   // by this method.
-  for (const auto *Property : IFace->properties()) {
+  for (const auto *Property : IFace->instance_properties()) {
     if ((Property->getGetterName() == IMD->getSelector() ||
          Property->getSetterName() == IMD->getSelector()) &&
         (Property->getPropertyIvarDecl() == IV))
@@ -1599,7 +1635,7 @@ Sema::IvarBacksCurrentMethodAccessor(ObjCInterfaceDecl *IFace,
   // Also look up property declaration in class extension whose one of its
   // accessors is implemented by this method.
   for (const auto *Ext : IFace->known_extensions())
-    for (const auto *Property : Ext->properties())
+    for (const auto *Property : Ext->instance_properties())
       if ((Property->getGetterName() == IMD->getSelector() ||
            Property->getSetterName() == IMD->getSelector()) &&
           (Property->getPropertyIvarDecl() == IV))
@@ -1632,7 +1668,6 @@ static bool SuperClassImplementsProperty(ObjCInterfaceDecl *IDecl,
 /// in class's \@implementation.
 void Sema::DefaultSynthesizeProperties(Scope *S, ObjCImplDecl* IMPDecl,
                                        ObjCInterfaceDecl *IDecl) {
-  
   ObjCInterfaceDecl::PropertyMap PropMap;
   ObjCInterfaceDecl::PropertyDeclOrder PropertyOrder;
   IDecl->collectPropertiesToImplement(PropMap, PropertyOrder);
@@ -1645,10 +1680,12 @@ void Sema::DefaultSynthesizeProperties(Scope *S, ObjCImplDecl* IMPDecl,
     ObjCPropertyDecl *Prop = PropertyOrder[i];
     // Is there a matching property synthesize/dynamic?
     if (Prop->isInvalidDecl() ||
+        Prop->isClassProperty() ||
         Prop->getPropertyImplementation() == ObjCPropertyDecl::Optional)
       continue;
     // Property may have been synthesized by user.
-    if (IMPDecl->FindPropertyImplDecl(Prop->getIdentifier()))
+    if (IMPDecl->FindPropertyImplDecl(
+            Prop->getIdentifier(), Prop->getQueryKind()))
       continue;
     if (IMPDecl->getInstanceMethod(Prop->getGetterName())) {
       if (Prop->getPropertyAttributes() & ObjCPropertyDecl::OBJC_PR_readonly)
@@ -1664,7 +1701,9 @@ void Sema::DefaultSynthesizeProperties(Scope *S, ObjCImplDecl* IMPDecl,
         Diag(PID->getLocation(), diag::note_property_synthesize);
       continue;
     }
-    ObjCPropertyDecl *PropInSuperClass = SuperPropMap[Prop->getIdentifier()];
+    ObjCPropertyDecl *PropInSuperClass =
+        SuperPropMap[std::make_pair(Prop->getIdentifier(),
+                                    Prop->isClassProperty())];
     if (ObjCProtocolDecl *Proto =
           dyn_cast<ObjCProtocolDecl>(Prop->getDeclContext())) {
       // We won't auto-synthesize properties declared in protocols.
@@ -1707,7 +1746,7 @@ void Sema::DefaultSynthesizeProperties(Scope *S, ObjCImplDecl* IMPDecl,
                             true,
                             /* property = */ Prop->getIdentifier(),
                             /* ivar = */ Prop->getDefaultSynthIvarName(Context),
-                            Prop->getLocation()));
+                            Prop->getLocation(), Prop->getQueryKind()));
     if (PIDecl) {
       Diag(Prop->getLocation(), diag::warn_missing_explicit_synthesis);
       Diag(IMPDecl->getLocation(), diag::note_while_in_implementation);
@@ -1726,34 +1765,42 @@ void Sema::DefaultSynthesizeProperties(Scope *S, Decl *D) {
       DefaultSynthesizeProperties(S, IC, IDecl);
 }
 
-static void DiagnoseUnimplementedAccessor(Sema &S,
-                                          ObjCInterfaceDecl *PrimaryClass,
-                                          Selector Method,
-                                          ObjCImplDecl* IMPDecl,
-                                          ObjCContainerDecl *CDecl,
-                                          ObjCCategoryDecl *C,
-                                          ObjCPropertyDecl *Prop,
-                                          Sema::SelectorSet &SMap) {
+static void DiagnoseUnimplementedAccessor(
+    Sema &S, ObjCInterfaceDecl *PrimaryClass, Selector Method,
+    ObjCImplDecl *IMPDecl, ObjCContainerDecl *CDecl, ObjCCategoryDecl *C,
+    ObjCPropertyDecl *Prop,
+    llvm::SmallPtrSet<const ObjCMethodDecl *, 8> &SMap) {
+  // Check to see if we have a corresponding selector in SMap and with the
+  // right method type.
+  auto I = std::find_if(SMap.begin(), SMap.end(),
+    [&](const ObjCMethodDecl *x) {
+      return x->getSelector() == Method &&
+             x->isClassMethod() == Prop->isClassProperty();
+    });
   // When reporting on missing property setter/getter implementation in
   // categories, do not report when they are declared in primary class,
   // class's protocol, or one of it super classes. This is because,
   // the class is going to implement them.
-  if (!SMap.count(Method) &&
+  if (I == SMap.end() &&
       (PrimaryClass == nullptr ||
-       !PrimaryClass->lookupPropertyAccessor(Method, C))) {
-        S.Diag(IMPDecl->getLocation(),
-               isa<ObjCCategoryDecl>(CDecl) ?
-               diag::warn_setter_getter_impl_required_in_category :
-               diag::warn_setter_getter_impl_required)
-            << Prop->getDeclName() << Method;
-        S.Diag(Prop->getLocation(),
-             diag::note_property_declare);
-        if (S.LangOpts.ObjCDefaultSynthProperties &&
-            S.LangOpts.ObjCRuntime.isNonFragile())
-          if (ObjCInterfaceDecl *ID = dyn_cast<ObjCInterfaceDecl>(CDecl))
-            if (const ObjCInterfaceDecl *RID = ID->isObjCRequiresPropertyDefs())
-            S.Diag(RID->getLocation(), diag::note_suppressed_class_declare);
-      }
+       !PrimaryClass->lookupPropertyAccessor(Method, C,
+                                             Prop->isClassProperty()))) {
+    unsigned diag =
+        isa<ObjCCategoryDecl>(CDecl)
+            ? (Prop->isClassProperty()
+                   ? diag::warn_impl_required_in_category_for_class_property
+                   : diag::warn_setter_getter_impl_required_in_category)
+            : (Prop->isClassProperty()
+                   ? diag::warn_impl_required_for_class_property
+                   : diag::warn_setter_getter_impl_required);
+    S.Diag(IMPDecl->getLocation(), diag) << Prop->getDeclName() << Method;
+    S.Diag(Prop->getLocation(), diag::note_property_declare);
+    if (S.LangOpts.ObjCDefaultSynthProperties &&
+        S.LangOpts.ObjCRuntime.isNonFragile())
+      if (ObjCInterfaceDecl *ID = dyn_cast<ObjCInterfaceDecl>(CDecl))
+        if (const ObjCInterfaceDecl *RID = ID->isObjCRequiresPropertyDefs())
+          S.Diag(RID->getLocation(), diag::note_suppressed_class_declare);
+  }
 }
 
 void Sema::DiagnoseUnimplementedProperties(Scope *S, ObjCImplDecl* IMPDecl,
@@ -1762,25 +1809,27 @@ void Sema::DiagnoseUnimplementedProperties(Scope *S, ObjCImplDecl* IMPDecl,
   ObjCContainerDecl::PropertyMap PropMap;
   ObjCInterfaceDecl *IDecl = dyn_cast<ObjCInterfaceDecl>(CDecl);
 
-  if (!SynthesizeProperties) {
-    ObjCContainerDecl::PropertyMap NoNeedToImplPropMap;
-    // Gather properties which need not be implemented in this class
-    // or category.
-    if (!IDecl)
-      if (ObjCCategoryDecl *C = dyn_cast<ObjCCategoryDecl>(CDecl)) {
-        // For categories, no need to implement properties declared in
-        // its primary class (and its super classes) if property is
-        // declared in one of those containers.
-        if ((IDecl = C->getClassInterface())) {
-          ObjCInterfaceDecl::PropertyDeclOrder PO;
-          IDecl->collectPropertiesToImplement(NoNeedToImplPropMap, PO);
-        }
+  // Since we don't synthesize class properties, we should emit diagnose even
+  // if SynthesizeProperties is true.
+  ObjCContainerDecl::PropertyMap NoNeedToImplPropMap;
+  // Gather properties which need not be implemented in this class
+  // or category.
+  if (!IDecl)
+    if (ObjCCategoryDecl *C = dyn_cast<ObjCCategoryDecl>(CDecl)) {
+      // For categories, no need to implement properties declared in
+      // its primary class (and its super classes) if property is
+      // declared in one of those containers.
+      if ((IDecl = C->getClassInterface())) {
+        ObjCInterfaceDecl::PropertyDeclOrder PO;
+        IDecl->collectPropertiesToImplement(NoNeedToImplPropMap, PO);
       }
-    if (IDecl)
-      CollectSuperClassPropertyImplementations(IDecl, NoNeedToImplPropMap);
+    }
+  if (IDecl)
+    CollectSuperClassPropertyImplementations(IDecl, NoNeedToImplPropMap);
     
-    CollectImmediateProperties(CDecl, PropMap, NoNeedToImplPropMap);
-  }
+  // When SynthesizeProperties is true, we only check class properties.
+  CollectImmediateProperties(CDecl, PropMap, NoNeedToImplPropMap,
+                             SynthesizeProperties/*CollectClassPropsOnly*/);
 
   // Scan the @interface to see if any of the protocols it adopts
   // require an explicit implementation, via attribute
@@ -1802,14 +1851,17 @@ void Sema::DiagnoseUnimplementedProperties(Scope *S, ObjCImplDecl* IMPDecl,
         ObjCContainerDecl::PropertyMap NoNeedToImplPropMap;
         LazyMap.reset(new ObjCContainerDecl::PropertyMap());
         CollectImmediateProperties(CDecl, *LazyMap, NoNeedToImplPropMap,
+                                   /* CollectClassPropsOnly */ false,
                                    /* IncludeProtocols */ false);
       }
       // Add the properties of 'PDecl' to the list of properties that
       // need to be implemented.
       for (auto *PropDecl : PDecl->properties()) {
-        if ((*LazyMap)[PropDecl->getIdentifier()])
+        if ((*LazyMap)[std::make_pair(PropDecl->getIdentifier(),
+                                      PropDecl->isClassProperty())])
           continue;
-        PropMap[PropDecl->getIdentifier()] = PropDecl;
+        PropMap[std::make_pair(PropDecl->getIdentifier(),
+                               PropDecl->isClassProperty())] = PropDecl;
       }
     }
   }
@@ -1821,10 +1873,10 @@ void Sema::DiagnoseUnimplementedProperties(Scope *S, ObjCImplDecl* IMPDecl,
   for (const auto *I : IMPDecl->property_impls())
     PropImplMap.insert(I->getPropertyDecl());
 
-  SelectorSet InsMap;
+  llvm::SmallPtrSet<const ObjCMethodDecl *, 8> InsMap;
   // Collect property accessors implemented in current implementation.
-  for (const auto *I : IMPDecl->instance_methods())
-    InsMap.insert(I->getSelector());
+  for (const auto *I : IMPDecl->methods())
+    InsMap.insert(I);
   
   ObjCCategoryDecl *C = dyn_cast<ObjCCategoryDecl>(CDecl);
   ObjCInterfaceDecl *PrimaryClass = nullptr;
@@ -1835,14 +1887,14 @@ void Sema::DiagnoseUnimplementedProperties(Scope *S, ObjCImplDecl* IMPDecl,
         // When reporting on missing setter/getters, do not report when
         // setter/getter is implemented in category's primary class
         // implementation.
-        for (const auto *I : IMP->instance_methods())
-          InsMap.insert(I->getSelector());
+        for (const auto *I : IMP->methods())
+          InsMap.insert(I);
       }
 
   for (ObjCContainerDecl::PropertyMap::iterator
        P = PropMap.begin(), E = PropMap.end(); P != E; ++P) {
     ObjCPropertyDecl *Prop = P->second;
-    // Is there a matching propery synthesize/dynamic?
+    // Is there a matching property synthesize/dynamic?
     if (Prop->isInvalidDecl() ||
         Prop->getPropertyImplementation() == ObjCPropertyDecl::Optional ||
         PropImplMap.count(Prop) ||
@@ -1894,13 +1946,13 @@ Sema::AtomicPropertySetterGetterRules (ObjCImplDecl* IMPDecl,
     return;
   ObjCContainerDecl::PropertyMap PM;
   for (auto *Prop : IDecl->properties())
-    PM[Prop->getIdentifier()] = Prop;
+    PM[std::make_pair(Prop->getIdentifier(), Prop->isClassProperty())] = Prop;
   for (const auto *Ext : IDecl->known_extensions())
     for (auto *Prop : Ext->properties())
-      PM[Prop->getIdentifier()] = Prop;
+      PM[std::make_pair(Prop->getIdentifier(), Prop->isClassProperty())] = Prop;
     
-    for (ObjCContainerDecl::PropertyMap::iterator I = PM.begin(), E = PM.end();
-         I != E; ++I) {
+  for (ObjCContainerDecl::PropertyMap::iterator I = PM.begin(), E = PM.end();
+       I != E; ++I) {
     const ObjCPropertyDecl *Property = I->second;
     ObjCMethodDecl *GetterMethod = nullptr;
     ObjCMethodDecl *SetterMethod = nullptr;
@@ -1911,8 +1963,12 @@ Sema::AtomicPropertySetterGetterRules (ObjCImplDecl* IMPDecl,
 
     if (!(AttributesAsWritten & ObjCPropertyDecl::OBJC_PR_atomic) &&
         !(AttributesAsWritten & ObjCPropertyDecl::OBJC_PR_nonatomic)) {
-      GetterMethod = IMPDecl->getInstanceMethod(Property->getGetterName());
-      SetterMethod = IMPDecl->getInstanceMethod(Property->getSetterName());
+      GetterMethod = Property->isClassProperty() ?
+                     IMPDecl->getClassMethod(Property->getGetterName()) :
+                     IMPDecl->getInstanceMethod(Property->getGetterName());
+      SetterMethod = Property->isClassProperty() ?
+                     IMPDecl->getClassMethod(Property->getSetterName()) :
+                     IMPDecl->getInstanceMethod(Property->getSetterName());
       LookedUpGetterSetter = true;
       if (GetterMethod) {
         Diag(GetterMethod->getLocation(),
@@ -1932,13 +1988,17 @@ Sema::AtomicPropertySetterGetterRules (ObjCImplDecl* IMPDecl,
     if ((Attributes & ObjCPropertyDecl::OBJC_PR_nonatomic) ||
         !(Attributes & ObjCPropertyDecl::OBJC_PR_readwrite))
       continue;
-    if (const ObjCPropertyImplDecl *PIDecl
-         = IMPDecl->FindPropertyImplDecl(Property->getIdentifier())) {
+    if (const ObjCPropertyImplDecl *PIDecl = IMPDecl->FindPropertyImplDecl(
+            Property->getIdentifier(), Property->getQueryKind())) {
       if (PIDecl->getPropertyImplementation() == ObjCPropertyImplDecl::Dynamic)
         continue;
       if (!LookedUpGetterSetter) {
-        GetterMethod = IMPDecl->getInstanceMethod(Property->getGetterName());
-        SetterMethod = IMPDecl->getInstanceMethod(Property->getSetterName());
+        GetterMethod = Property->isClassProperty() ?
+                       IMPDecl->getClassMethod(Property->getGetterName()) :
+                       IMPDecl->getInstanceMethod(Property->getGetterName());
+        SetterMethod = Property->isClassProperty() ?
+                       IMPDecl->getClassMethod(Property->getSetterName()) :
+                       IMPDecl->getInstanceMethod(Property->getSetterName());
       }
       if ((GetterMethod && !SetterMethod) || (!GetterMethod && SetterMethod)) {
         SourceLocation MethodLoc =
@@ -1981,6 +2041,7 @@ void Sema::DiagnoseOwningPropertyGetterSynthesis(const ObjCImplementationDecl *D
   for (const auto *PID : D->property_impls()) {
     const ObjCPropertyDecl *PD = PID->getPropertyDecl();
     if (PD && !PD->hasAttr<NSReturnsNotRetainedAttr>() &&
+        !PD->isClassProperty() &&
         !D->getInstanceMethod(PD->getGetterName())) {
       ObjCMethodDecl *method = PD->getGetterMethodDecl();
       if (!method)
@@ -2086,20 +2147,30 @@ void Sema::ProcessPropertyDecl(ObjCPropertyDecl *property) {
   if (CD->isInvalidDecl())
     return;
 
-  GetterMethod = CD->getInstanceMethod(property->getGetterName());
+  bool IsClassProperty = property->isClassProperty();
+  GetterMethod = IsClassProperty ?
+    CD->getClassMethod(property->getGetterName()) :
+    CD->getInstanceMethod(property->getGetterName());
+
   // if setter or getter is not found in class extension, it might be
   // in the primary class.
   if (!GetterMethod)
     if (const ObjCCategoryDecl *CatDecl = dyn_cast<ObjCCategoryDecl>(CD))
       if (CatDecl->IsClassExtension())
-        GetterMethod = CatDecl->getClassInterface()->
+        GetterMethod = IsClassProperty ? CatDecl->getClassInterface()->
+                         getClassMethod(property->getGetterName()) :
+                       CatDecl->getClassInterface()->
                          getInstanceMethod(property->getGetterName());
         
-  SetterMethod = CD->getInstanceMethod(property->getSetterName());
+  SetterMethod = IsClassProperty ?
+                 CD->getClassMethod(property->getSetterName()) :
+                 CD->getInstanceMethod(property->getSetterName());
   if (!SetterMethod)
     if (const ObjCCategoryDecl *CatDecl = dyn_cast<ObjCCategoryDecl>(CD))
       if (CatDecl->IsClassExtension())
-        SetterMethod = CatDecl->getClassInterface()->
+        SetterMethod = IsClassProperty ? CatDecl->getClassInterface()->
+                          getClassMethod(property->getSetterName()) :
+                       CatDecl->getClassInterface()->
                           getInstanceMethod(property->getSetterName());
   DiagnosePropertyAccessorMismatch(property, GetterMethod,
                                    property->getLocation());
@@ -2130,13 +2201,16 @@ void Sema::ProcessPropertyDecl(ObjCPropertyDecl *property) {
   // (which is odd, but allowed). Sema should be typechecking that the
   // declarations jive in that situation (which it is not currently).
   if (!GetterMethod) {
-    // No instance method of same name as property getter name was found.
+    // No instance/class method of same name as property getter name was found.
     // Declare a getter method and add it to the list of methods
     // for this class.
     SourceLocation Loc = property->getLocation();
 
+    // The getter returns the declared property type with all qualifiers
+    // removed.
+    QualType resultTy = property->getType().getAtomicUnqualifiedType();
+
     // If the property is null_resettable, the getter returns nonnull.
-    QualType resultTy = property->getType();
     if (property->getPropertyAttributes() &
         ObjCPropertyDecl::OBJC_PR_null_resettable) {
       QualType modifiedTy = resultTy;
@@ -2150,7 +2224,7 @@ void Sema::ProcessPropertyDecl(ObjCPropertyDecl *property) {
     GetterMethod = ObjCMethodDecl::Create(Context, Loc, Loc,
                              property->getGetterName(),
                              resultTy, nullptr, CD,
-                             /*isInstance=*/true, /*isVariadic=*/false,
+                             !IsClassProperty, /*isVariadic=*/false,
                              /*isPropertyAccessor=*/true,
                              /*isImplicitlyDeclared=*/true, /*isDefined=*/false,
                              (property->getPropertyImplementation() ==
@@ -2186,7 +2260,8 @@ void Sema::ProcessPropertyDecl(ObjCPropertyDecl *property) {
   if (!property->isReadOnly()) {
     // Find the default setter and if one not found, add one.
     if (!SetterMethod) {
-      // No instance method of same name as property setter name was found.
+      // No instance/class method of same name as property setter name was
+      // found.
       // Declare a setter method and add it to the list of methods
       // for this class.
       SourceLocation Loc = property->getLocation();
@@ -2194,7 +2269,7 @@ void Sema::ProcessPropertyDecl(ObjCPropertyDecl *property) {
       SetterMethod =
         ObjCMethodDecl::Create(Context, Loc, Loc,
                                property->getSetterName(), Context.VoidTy,
-                               nullptr, CD, /*isInstance=*/true,
+                               nullptr, CD, !IsClassProperty,
                                /*isVariadic=*/false,
                                /*isPropertyAccessor=*/true,
                                /*isImplicitlyDeclared=*/true,
@@ -2204,9 +2279,12 @@ void Sema::ProcessPropertyDecl(ObjCPropertyDecl *property) {
                                 ObjCMethodDecl::Optional :
                                 ObjCMethodDecl::Required);
 
+      // Remove all qualifiers from the setter's parameter type.
+      QualType paramTy =
+          property->getType().getUnqualifiedType().getAtomicUnqualifiedType();
+
       // If the property is null_resettable, the setter accepts a
       // nullable value.
-      QualType paramTy = property->getType().getUnqualifiedType();
       if (property->getPropertyAttributes() &
           ObjCPropertyDecl::OBJC_PR_null_resettable) {
         QualType modifiedTy = paramTy;
@@ -2257,10 +2335,17 @@ void Sema::ProcessPropertyDecl(ObjCPropertyDecl *property) {
   //   double bar = [foo bar];
   // }
   //
-  if (GetterMethod)
-    AddInstanceMethodToGlobalPool(GetterMethod);
-  if (SetterMethod)
-    AddInstanceMethodToGlobalPool(SetterMethod);
+  if (!IsClassProperty) {
+    if (GetterMethod)
+      AddInstanceMethodToGlobalPool(GetterMethod);
+    if (SetterMethod)
+      AddInstanceMethodToGlobalPool(SetterMethod);
+  } else {
+    if (GetterMethod)
+      AddFactoryMethodToGlobalPool(GetterMethod);
+    if (SetterMethod)
+      AddFactoryMethodToGlobalPool(SetterMethod);
+  }
 
   ObjCInterfaceDecl *CurrentClass = dyn_cast<ObjCInterfaceDecl>(CD);
   if (!CurrentClass) {
@@ -2447,5 +2532,4 @@ void Sema::CheckObjCPropertyAttributes(Decl *PDecl,
   if ((Attributes & ObjCDeclSpec::DQ_PR_readonly) &&
       (Attributes & ObjCDeclSpec::DQ_PR_setter))
     Diag(Loc, diag::warn_objc_readonly_property_has_setter);
-      
 }
diff --git a/contrib/llvm/tools/clang/lib/Sema/SemaOpenMP.cpp b/contrib/llvm/tools/clang/lib/Sema/SemaOpenMP.cpp
index 0d51ee11d109..b7ac48583e1a 100644
--- a/contrib/llvm/tools/clang/lib/Sema/SemaOpenMP.cpp
+++ b/contrib/llvm/tools/clang/lib/Sema/SemaOpenMP.cpp
@@ -15,12 +15,14 @@
 #include "TreeTransform.h"
 #include "clang/AST/ASTContext.h"
 #include "clang/AST/ASTMutationListener.h"
+#include "clang/AST/CXXInheritance.h"
 #include "clang/AST/Decl.h"
 #include "clang/AST/DeclCXX.h"
 #include "clang/AST/DeclOpenMP.h"
 #include "clang/AST/StmtCXX.h"
 #include "clang/AST/StmtOpenMP.h"
 #include "clang/AST/StmtVisitor.h"
+#include "clang/AST/TypeOrdering.h"
 #include "clang/Basic/OpenMPKinds.h"
 #include "clang/Basic/TargetInfo.h"
 #include "clang/Lex/Preprocessor.h"
@@ -43,111 +45,91 @@ enum DefaultDataSharingAttributes {
   DSA_shared = 1 << 1  /// \brief Default data sharing attribute 'shared'.
 };
 
-template <class T> struct MatchesAny {
-  explicit MatchesAny(ArrayRef<T> Arr) : Arr(std::move(Arr)) {}
-  bool operator()(T Kind) {
-    for (auto KindEl : Arr)
-      if (KindEl == Kind)
-        return true;
-    return false;
-  }
-
-private:
-  ArrayRef<T> Arr;
-};
-struct MatchesAlways {
-  MatchesAlways() {}
-  template <class T> bool operator()(T) { return true; }
-};
-
-typedef MatchesAny<OpenMPClauseKind> MatchesAnyClause;
-typedef MatchesAny<OpenMPDirectiveKind> MatchesAnyDirective;
-
 /// \brief Stack for tracking declarations used in OpenMP directives and
 /// clauses and their data-sharing attributes.
-class DSAStackTy {
+class DSAStackTy final {
 public:
-  struct DSAVarData {
-    OpenMPDirectiveKind DKind;
-    OpenMPClauseKind CKind;
-    DeclRefExpr *RefExpr;
+  struct DSAVarData final {
+    OpenMPDirectiveKind DKind = OMPD_unknown;
+    OpenMPClauseKind CKind = OMPC_unknown;
+    Expr *RefExpr = nullptr;
+    DeclRefExpr *PrivateCopy = nullptr;
     SourceLocation ImplicitDSALoc;
-    DSAVarData()
-        : DKind(OMPD_unknown), CKind(OMPC_unknown), RefExpr(nullptr),
-          ImplicitDSALoc() {}
-  };
-
-public:
-  struct MapInfo {
-    Expr *RefExpr;
+    DSAVarData() {}
   };
+  typedef llvm::SmallVector<std::pair<Expr *, OverloadedOperatorKind>, 4>
+      OperatorOffsetTy;
 
 private:
-  struct DSAInfo {
-    OpenMPClauseKind Attributes;
-    DeclRefExpr *RefExpr;
+  struct DSAInfo final {
+    OpenMPClauseKind Attributes = OMPC_unknown;
+    /// Pointer to a reference expression and a flag which shows that the
+    /// variable is marked as lastprivate(true) or not (false).
+    llvm::PointerIntPair<Expr *, 1, bool> RefExpr;
+    DeclRefExpr *PrivateCopy = nullptr;
   };
-  typedef llvm::SmallDenseMap<VarDecl *, DSAInfo, 64> DeclSAMapTy;
-  typedef llvm::SmallDenseMap<VarDecl *, DeclRefExpr *, 64> AlignedMapTy;
-  typedef llvm::DenseMap<VarDecl *, unsigned> LoopControlVariablesMapTy;
-  typedef llvm::SmallDenseMap<VarDecl *, MapInfo, 64> MappedDeclsTy;
+  typedef llvm::DenseMap<ValueDecl *, DSAInfo> DeclSAMapTy;
+  typedef llvm::DenseMap<ValueDecl *, Expr *> AlignedMapTy;
+  typedef std::pair<unsigned, VarDecl *> LCDeclInfo;
+  typedef llvm::DenseMap<ValueDecl *, LCDeclInfo> LoopControlVariablesMapTy;
+  typedef llvm::DenseMap<
+      ValueDecl *, OMPClauseMappableExprCommon::MappableExprComponentLists>
+      MappedExprComponentsTy;
   typedef llvm::StringMap<std::pair<OMPCriticalDirective *, llvm::APSInt>>
       CriticalsWithHintsTy;
+  typedef llvm::DenseMap<OMPDependClause *, OperatorOffsetTy>
+      DoacrossDependMapTy;
 
-  struct SharingMapTy {
+  struct SharingMapTy final {
     DeclSAMapTy SharingMap;
     AlignedMapTy AlignedMap;
-    MappedDeclsTy MappedDecls;
+    MappedExprComponentsTy MappedExprComponents;
     LoopControlVariablesMapTy LCVMap;
-    DefaultDataSharingAttributes DefaultAttr;
+    DefaultDataSharingAttributes DefaultAttr = DSA_unspecified;
     SourceLocation DefaultAttrLoc;
-    OpenMPDirectiveKind Directive;
+    OpenMPDirectiveKind Directive = OMPD_unknown;
     DeclarationNameInfo DirectiveName;
-    Scope *CurScope;
+    Scope *CurScope = nullptr;
     SourceLocation ConstructLoc;
+    /// Set of 'depend' clauses with 'sink|source' dependence kind. Required to
+    /// get the data (loop counters etc.) about enclosing loop-based construct.
+    /// This data is required during codegen.
+    DoacrossDependMapTy DoacrossDepends;
     /// \brief first argument (Expr *) contains optional argument of the
     /// 'ordered' clause, the second one is true if the regions has 'ordered'
     /// clause, false otherwise.
     llvm::PointerIntPair<Expr *, 1, bool> OrderedRegion;
-    bool NowaitRegion;
-    bool CancelRegion;
-    unsigned AssociatedLoops;
+    bool NowaitRegion = false;
+    bool CancelRegion = false;
+    unsigned AssociatedLoops = 1;
     SourceLocation InnerTeamsRegionLoc;
     SharingMapTy(OpenMPDirectiveKind DKind, DeclarationNameInfo Name,
                  Scope *CurScope, SourceLocation Loc)
-        : SharingMap(), AlignedMap(), LCVMap(), DefaultAttr(DSA_unspecified),
-          Directive(DKind), DirectiveName(std::move(Name)), CurScope(CurScope),
-          ConstructLoc(Loc), OrderedRegion(), NowaitRegion(false),
-          CancelRegion(false), AssociatedLoops(1), InnerTeamsRegionLoc() {}
-    SharingMapTy()
-        : SharingMap(), AlignedMap(), LCVMap(), DefaultAttr(DSA_unspecified),
-          Directive(OMPD_unknown), DirectiveName(), CurScope(nullptr),
-          ConstructLoc(), OrderedRegion(), NowaitRegion(false),
-          CancelRegion(false), AssociatedLoops(1), InnerTeamsRegionLoc() {}
+        : Directive(DKind), DirectiveName(Name), CurScope(CurScope),
+          ConstructLoc(Loc) {}
+    SharingMapTy() {}
   };
 
-  typedef SmallVector<SharingMapTy, 64> StackTy;
+  typedef SmallVector<SharingMapTy, 4> StackTy;
 
   /// \brief Stack of used declaration and their data-sharing attributes.
   StackTy Stack;
   /// \brief true, if check for DSA must be from parent directive, false, if
   /// from current directive.
-  OpenMPClauseKind ClauseKindMode;
+  OpenMPClauseKind ClauseKindMode = OMPC_unknown;
   Sema &SemaRef;
-  bool ForceCapturing;
+  bool ForceCapturing = false;
   CriticalsWithHintsTy Criticals;
 
   typedef SmallVector<SharingMapTy, 8>::reverse_iterator reverse_iterator;
 
-  DSAVarData getDSA(StackTy::reverse_iterator Iter, VarDecl *D);
+  DSAVarData getDSA(StackTy::reverse_iterator& Iter, ValueDecl *D);
 
   /// \brief Checks if the variable is a local for OpenMP region.
   bool isOpenMPLocal(VarDecl *D, StackTy::reverse_iterator Iter);
 
 public:
-  explicit DSAStackTy(Sema &S)
-      : Stack(1), ClauseKindMode(OMPC_unknown), SemaRef(S),
-        ForceCapturing(false) {}
+  explicit DSAStackTy(Sema &S) : Stack(1), SemaRef(S) {}
 
   bool isClauseParsingMode() const { return ClauseKindMode != OMPC_unknown; }
   void setClauseParsingMode(OpenMPClauseKind K) { ClauseKindMode = K; }
@@ -179,51 +161,54 @@ public:
   /// \brief If 'aligned' declaration for given variable \a D was not seen yet,
   /// add it and return NULL; otherwise return previous occurrence's expression
   /// for diagnostics.
-  DeclRefExpr *addUniqueAligned(VarDecl *D, DeclRefExpr *NewDE);
+  Expr *addUniqueAligned(ValueDecl *D, Expr *NewDE);
 
   /// \brief Register specified variable as loop control variable.
-  void addLoopControlVariable(VarDecl *D);
+  void addLoopControlVariable(ValueDecl *D, VarDecl *Capture);
   /// \brief Check if the specified variable is a loop control variable for
   /// current region.
   /// \return The index of the loop control variable in the list of associated
   /// for-loops (from outer to inner).
-  unsigned isLoopControlVariable(VarDecl *D);
+  LCDeclInfo isLoopControlVariable(ValueDecl *D);
   /// \brief Check if the specified variable is a loop control variable for
   /// parent region.
   /// \return The index of the loop control variable in the list of associated
   /// for-loops (from outer to inner).
-  unsigned isParentLoopControlVariable(VarDecl *D);
+  LCDeclInfo isParentLoopControlVariable(ValueDecl *D);
   /// \brief Get the loop control variable for the I-th loop (or nullptr) in
   /// parent directive.
-  VarDecl *getParentLoopControlVariable(unsigned I);
+  ValueDecl *getParentLoopControlVariable(unsigned I);
 
   /// \brief Adds explicit data sharing attribute to the specified declaration.
-  void addDSA(VarDecl *D, DeclRefExpr *E, OpenMPClauseKind A);
+  void addDSA(ValueDecl *D, Expr *E, OpenMPClauseKind A,
+              DeclRefExpr *PrivateCopy = nullptr);
 
   /// \brief Returns data sharing attributes from top of the stack for the
   /// specified declaration.
-  DSAVarData getTopDSA(VarDecl *D, bool FromParent);
+  DSAVarData getTopDSA(ValueDecl *D, bool FromParent);
   /// \brief Returns data-sharing attributes for the specified declaration.
-  DSAVarData getImplicitDSA(VarDecl *D, bool FromParent);
+  DSAVarData getImplicitDSA(ValueDecl *D, bool FromParent);
   /// \brief Checks if the specified variables has data-sharing attributes which
   /// match specified \a CPred predicate in any directive which matches \a DPred
   /// predicate.
-  template <class ClausesPredicate, class DirectivesPredicate>
-  DSAVarData hasDSA(VarDecl *D, ClausesPredicate CPred,
-                    DirectivesPredicate DPred, bool FromParent);
+  DSAVarData hasDSA(ValueDecl *D,
+                    const llvm::function_ref<bool(OpenMPClauseKind)> &CPred,
+                    const llvm::function_ref<bool(OpenMPDirectiveKind)> &DPred,
+                    bool FromParent);
   /// \brief Checks if the specified variables has data-sharing attributes which
   /// match specified \a CPred predicate in any innermost directive which
   /// matches \a DPred predicate.
-  template <class ClausesPredicate, class DirectivesPredicate>
-  DSAVarData hasInnermostDSA(VarDecl *D, ClausesPredicate CPred,
-                             DirectivesPredicate DPred,
-                             bool FromParent);
+  DSAVarData
+  hasInnermostDSA(ValueDecl *D,
+                  const llvm::function_ref<bool(OpenMPClauseKind)> &CPred,
+                  const llvm::function_ref<bool(OpenMPDirectiveKind)> &DPred,
+                  bool FromParent);
   /// \brief Checks if the specified variables has explicit data-sharing
   /// attributes which match specified \a CPred predicate at the specified
   /// OpenMP region.
-  bool hasExplicitDSA(VarDecl *D,
+  bool hasExplicitDSA(ValueDecl *D,
                       const llvm::function_ref<bool(OpenMPClauseKind)> &CPred,
-                      unsigned Level);
+                      unsigned Level, bool NotLastprivate = false);
 
   /// \brief Returns true if the directive at level \Level matches in the
   /// specified \a DPred predicate.
@@ -232,8 +217,10 @@ public:
       unsigned Level);
 
   /// \brief Finds a directive which matches specified \a DPred predicate.
-  template <class NamedDirectivesPredicate>
-  bool hasDirective(NamedDirectivesPredicate DPred, bool FromParent);
+  bool hasDirective(const llvm::function_ref<bool(OpenMPDirectiveKind,
+                                                  const DeclarationNameInfo &,
+                                                  SourceLocation)> &DPred,
+                    bool FromParent);
 
   /// \brief Returns currently analyzed directive.
   OpenMPDirectiveKind getCurrentDirective() const {
@@ -245,8 +232,6 @@ public:
       return Stack[Stack.size() - 2].Directive;
     return OMPD_unknown;
   }
-  /// \brief Return the directive associated with the provided scope.
-  OpenMPDirectiveKind getDirectiveForScope(const Scope *S) const;
 
   /// \brief Set default data sharing attribute to none.
   void setDefaultDSANone(SourceLocation Loc) {
@@ -338,42 +323,92 @@ public:
   Scope *getCurScope() { return Stack.back().CurScope; }
   SourceLocation getConstructLoc() { return Stack.back().ConstructLoc; }
 
-  MapInfo getMapInfoForVar(VarDecl *VD) {
-    MapInfo VarMI = {0};
-    for (auto Cnt = Stack.size() - 1; Cnt > 0; --Cnt) {
-      if (Stack[Cnt].MappedDecls.count(VD)) {
-        VarMI = Stack[Cnt].MappedDecls[VD];
-        break;
-      }
+  // Do the check specified in \a Check to all component lists and return true
+  // if any issue is found.
+  bool checkMappableExprComponentListsForDecl(
+      ValueDecl *VD, bool CurrentRegionOnly,
+      const llvm::function_ref<bool(
+          OMPClauseMappableExprCommon::MappableExprComponentListRef)> &Check) {
+    auto SI = Stack.rbegin();
+    auto SE = Stack.rend();
+
+    if (SI == SE)
+      return false;
+
+    if (CurrentRegionOnly) {
+      SE = std::next(SI);
+    } else {
+      ++SI;
     }
-    return VarMI;
-  }
 
-  void addMapInfoForVar(VarDecl *VD, MapInfo MI) {
-    if (Stack.size() > 1) {
-      Stack.back().MappedDecls[VD] = MI;
+    for (; SI != SE; ++SI) {
+      auto MI = SI->MappedExprComponents.find(VD);
+      if (MI != SI->MappedExprComponents.end())
+        for (auto &L : MI->second)
+          if (Check(L))
+            return true;
     }
+    return false;
   }
 
-  MapInfo IsMappedInCurrentRegion(VarDecl *VD) {
-    assert(Stack.size() > 1 && "Target level is 0");
-    MapInfo VarMI = {0};
-    if (Stack.size() > 1 && Stack.back().MappedDecls.count(VD)) {
-      VarMI = Stack.back().MappedDecls[VD];
+  // Create a new mappable expression component list associated with a given
+  // declaration and initialize it with the provided list of components.
+  void addMappableExpressionComponents(
+      ValueDecl *VD,
+      OMPClauseMappableExprCommon::MappableExprComponentListRef Components) {
+    assert(Stack.size() > 1 &&
+           "Not expecting to retrieve components from a empty stack!");
+    auto &MEC = Stack.back().MappedExprComponents[VD];
+    // Create new entry and append the new components there.
+    MEC.resize(MEC.size() + 1);
+    MEC.back().append(Components.begin(), Components.end());
+  }
+
+  unsigned getNestingLevel() const {
+    assert(Stack.size() > 1);
+    return Stack.size() - 2;
+  }
+  void addDoacrossDependClause(OMPDependClause *C, OperatorOffsetTy &OpsOffs) {
+    assert(Stack.size() > 2);
+    assert(isOpenMPWorksharingDirective(Stack[Stack.size() - 2].Directive));
+    Stack[Stack.size() - 2].DoacrossDepends.insert({C, OpsOffs});
+  }
+  llvm::iterator_range<DoacrossDependMapTy::const_iterator>
+  getDoacrossDependClauses() const {
+    assert(Stack.size() > 1);
+    if (isOpenMPWorksharingDirective(Stack[Stack.size() - 1].Directive)) {
+      auto &Ref = Stack[Stack.size() - 1].DoacrossDepends;
+      return llvm::make_range(Ref.begin(), Ref.end());
     }
-    return VarMI;
+    return llvm::make_range(Stack[0].DoacrossDepends.end(),
+                            Stack[0].DoacrossDepends.end());
   }
 };
 bool isParallelOrTaskRegion(OpenMPDirectiveKind DKind) {
-  return isOpenMPParallelDirective(DKind) || DKind == OMPD_task ||
-         isOpenMPTeamsDirective(DKind) || DKind == OMPD_unknown ||
-         isOpenMPTaskLoopDirective(DKind);
+  return isOpenMPParallelDirective(DKind) || isOpenMPTaskingDirective(DKind) ||
+         isOpenMPTeamsDirective(DKind) || DKind == OMPD_unknown;
 }
 } // namespace
 
-DSAStackTy::DSAVarData DSAStackTy::getDSA(StackTy::reverse_iterator Iter,
-                                          VarDecl *D) {
-  D = D->getCanonicalDecl();
+static ValueDecl *getCanonicalDecl(ValueDecl *D) {
+  auto *VD = dyn_cast<VarDecl>(D);
+  auto *FD = dyn_cast<FieldDecl>(D);
+  if (VD  != nullptr) {
+    VD = VD->getCanonicalDecl();
+    D = VD;
+  } else {
+    assert(FD);
+    FD = FD->getCanonicalDecl();
+    D = FD;
+  }
+  return D;
+}
+
+DSAStackTy::DSAVarData DSAStackTy::getDSA(StackTy::reverse_iterator& Iter,
+                                          ValueDecl *D) {
+  D = getCanonicalDecl(D);
+  auto *VD = dyn_cast<VarDecl>(D);
+  auto *FD = dyn_cast<FieldDecl>(D);
   DSAVarData DVar;
   if (Iter == std::prev(Stack.rend())) {
     // OpenMP [2.9.1.1, Data-sharing Attribute Rules for Variables Referenced
@@ -381,14 +416,18 @@ DSAStackTy::DSAVarData DSAStackTy::getDSA(StackTy::reverse_iterator Iter,
     //  File-scope or namespace-scope variables referenced in called routines
     //  in the region are shared unless they appear in a threadprivate
     //  directive.
-    if (!D->isFunctionOrMethodVarDecl() && !isa<ParmVarDecl>(D))
+    if (VD && !VD->isFunctionOrMethodVarDecl() && !isa<ParmVarDecl>(D))
       DVar.CKind = OMPC_shared;
 
     // OpenMP [2.9.1.2, Data-sharing Attribute Rules for Variables Referenced
     // in a region but not in construct]
     //  Variables with static storage duration that are declared in called
     //  routines in the region are shared.
-    if (D->hasGlobalStorage())
+    if (VD && VD->hasGlobalStorage())
+      DVar.CKind = OMPC_shared;
+
+    // Non-static data members are shared by default.
+    if (FD)
       DVar.CKind = OMPC_shared;
 
     return DVar;
@@ -399,8 +438,8 @@ DSAStackTy::DSAVarData DSAStackTy::getDSA(StackTy::reverse_iterator Iter,
   // in a Construct, C/C++, predetermined, p.1]
   // Variables with automatic storage duration that are declared in a scope
   // inside the construct are private.
-  if (isOpenMPLocal(D, Iter) && D->isLocalVarDecl() &&
-      (D->getStorageClass() == SC_Auto || D->getStorageClass() == SC_None)) {
+  if (VD && isOpenMPLocal(VD, Iter) && VD->isLocalVarDecl() &&
+      (VD->getStorageClass() == SC_Auto || VD->getStorageClass() == SC_None)) {
     DVar.CKind = OMPC_private;
     return DVar;
   }
@@ -408,7 +447,8 @@ DSAStackTy::DSAVarData DSAStackTy::getDSA(StackTy::reverse_iterator Iter,
   // Explicitly specified attributes and local variables with predetermined
   // attributes.
   if (Iter->SharingMap.count(D)) {
-    DVar.RefExpr = Iter->SharingMap[D].RefExpr;
+    DVar.RefExpr = Iter->SharingMap[D].RefExpr.getPointer();
+    DVar.PrivateCopy = Iter->SharingMap[D].PrivateCopy;
     DVar.CKind = Iter->SharingMap[D].Attributes;
     DVar.ImplicitDSALoc = Iter->DefaultAttrLoc;
     return DVar;
@@ -442,27 +482,24 @@ DSAStackTy::DSAVarData DSAStackTy::getDSA(StackTy::reverse_iterator Iter,
     //  In a task construct, if no default clause is present, a variable that in
     //  the enclosing context is determined to be shared by all implicit tasks
     //  bound to the current team is shared.
-    if (DVar.DKind == OMPD_task) {
+    if (isOpenMPTaskingDirective(DVar.DKind)) {
       DSAVarData DVarTemp;
       for (StackTy::reverse_iterator I = std::next(Iter), EE = Stack.rend();
            I != EE; ++I) {
         // OpenMP [2.9.1.1, Data-sharing Attribute Rules for Variables
-        // Referenced
-        // in a Construct, implicitly determined, p.6]
+        // Referenced in a Construct, implicitly determined, p.6]
         //  In a task construct, if no default clause is present, a variable
         //  whose data-sharing attribute is not determined by the rules above is
         //  firstprivate.
         DVarTemp = getDSA(I, D);
         if (DVarTemp.CKind != OMPC_shared) {
           DVar.RefExpr = nullptr;
-          DVar.DKind = OMPD_task;
           DVar.CKind = OMPC_firstprivate;
           return DVar;
         }
         if (isParallelOrTaskRegion(I->Directive))
           break;
       }
-      DVar.DKind = OMPD_task;
       DVar.CKind =
           (DVarTemp.CKind == OMPC_unknown) ? OMPC_firstprivate : OMPC_shared;
       return DVar;
@@ -473,12 +510,12 @@ DSAStackTy::DSAVarData DSAStackTy::getDSA(StackTy::reverse_iterator Iter,
   //  For constructs other than task, if no default clause is present, these
   //  variables inherit their data-sharing attributes from the enclosing
   //  context.
-  return getDSA(std::next(Iter), D);
+  return getDSA(++Iter, D);
 }
 
-DeclRefExpr *DSAStackTy::addUniqueAligned(VarDecl *D, DeclRefExpr *NewDE) {
+Expr *DSAStackTy::addUniqueAligned(ValueDecl *D, Expr *NewDE) {
   assert(Stack.size() > 1 && "Data sharing attributes stack is empty");
-  D = D->getCanonicalDecl();
+  D = getCanonicalDecl(D);
   auto It = Stack.back().AlignedMap.find(D);
   if (It == Stack.back().AlignedMap.end()) {
     assert(NewDE && "Unexpected nullptr expr to be added into aligned map");
@@ -491,46 +528,69 @@ DeclRefExpr *DSAStackTy::addUniqueAligned(VarDecl *D, DeclRefExpr *NewDE) {
   return nullptr;
 }
 
-void DSAStackTy::addLoopControlVariable(VarDecl *D) {
+void DSAStackTy::addLoopControlVariable(ValueDecl *D, VarDecl *Capture) {
   assert(Stack.size() > 1 && "Data-sharing attributes stack is empty");
-  D = D->getCanonicalDecl();
-  Stack.back().LCVMap.insert(std::make_pair(D, Stack.back().LCVMap.size() + 1));
+  D = getCanonicalDecl(D);
+  Stack.back().LCVMap.insert(
+      std::make_pair(D, LCDeclInfo(Stack.back().LCVMap.size() + 1, Capture)));
 }
 
-unsigned DSAStackTy::isLoopControlVariable(VarDecl *D) {
+DSAStackTy::LCDeclInfo DSAStackTy::isLoopControlVariable(ValueDecl *D) {
   assert(Stack.size() > 1 && "Data-sharing attributes stack is empty");
-  D = D->getCanonicalDecl();
-  return Stack.back().LCVMap.count(D) > 0 ? Stack.back().LCVMap[D] : 0;
+  D = getCanonicalDecl(D);
+  return Stack.back().LCVMap.count(D) > 0 ? Stack.back().LCVMap[D]
+                                          : LCDeclInfo(0, nullptr);
 }
 
-unsigned DSAStackTy::isParentLoopControlVariable(VarDecl *D) {
+DSAStackTy::LCDeclInfo DSAStackTy::isParentLoopControlVariable(ValueDecl *D) {
   assert(Stack.size() > 2 && "Data-sharing attributes stack is empty");
-  D = D->getCanonicalDecl();
+  D = getCanonicalDecl(D);
   return Stack[Stack.size() - 2].LCVMap.count(D) > 0
              ? Stack[Stack.size() - 2].LCVMap[D]
-             : 0;
+             : LCDeclInfo(0, nullptr);
 }
 
-VarDecl *DSAStackTy::getParentLoopControlVariable(unsigned I) {
+ValueDecl *DSAStackTy::getParentLoopControlVariable(unsigned I) {
   assert(Stack.size() > 2 && "Data-sharing attributes stack is empty");
   if (Stack[Stack.size() - 2].LCVMap.size() < I)
     return nullptr;
   for (auto &Pair : Stack[Stack.size() - 2].LCVMap) {
-    if (Pair.second == I)
+    if (Pair.second.first == I)
       return Pair.first;
   }
   return nullptr;
 }
 
-void DSAStackTy::addDSA(VarDecl *D, DeclRefExpr *E, OpenMPClauseKind A) {
-  D = D->getCanonicalDecl();
+void DSAStackTy::addDSA(ValueDecl *D, Expr *E, OpenMPClauseKind A,
+                        DeclRefExpr *PrivateCopy) {
+  D = getCanonicalDecl(D);
   if (A == OMPC_threadprivate) {
-    Stack[0].SharingMap[D].Attributes = A;
-    Stack[0].SharingMap[D].RefExpr = E;
+    auto &Data = Stack[0].SharingMap[D];
+    Data.Attributes = A;
+    Data.RefExpr.setPointer(E);
+    Data.PrivateCopy = nullptr;
   } else {
     assert(Stack.size() > 1 && "Data-sharing attributes stack is empty");
-    Stack.back().SharingMap[D].Attributes = A;
-    Stack.back().SharingMap[D].RefExpr = E;
+    auto &Data = Stack.back().SharingMap[D];
+    assert(Data.Attributes == OMPC_unknown || (A == Data.Attributes) ||
+           (A == OMPC_firstprivate && Data.Attributes == OMPC_lastprivate) ||
+           (A == OMPC_lastprivate && Data.Attributes == OMPC_firstprivate) ||
+           (isLoopControlVariable(D).first && A == OMPC_private));
+    if (A == OMPC_lastprivate && Data.Attributes == OMPC_firstprivate) {
+      Data.RefExpr.setInt(/*IntVal=*/true);
+      return;
+    }
+    const bool IsLastprivate =
+        A == OMPC_lastprivate || Data.Attributes == OMPC_lastprivate;
+    Data.Attributes = A;
+    Data.RefExpr.setPointerAndInt(E, IsLastprivate);
+    Data.PrivateCopy = PrivateCopy;
+    if (PrivateCopy) {
+      auto &Data = Stack.back().SharingMap[PrivateCopy->getDecl()];
+      Data.Attributes = A;
+      Data.RefExpr.setPointerAndInt(PrivateCopy, IsLastprivate);
+      Data.PrivateCopy = nullptr;
+    }
   }
 }
 
@@ -581,29 +641,35 @@ static DeclRefExpr *buildDeclRefExpr(Sema &S, VarDecl *D, QualType Ty,
                              VK_LValue);
 }
 
-DSAStackTy::DSAVarData DSAStackTy::getTopDSA(VarDecl *D, bool FromParent) {
-  D = D->getCanonicalDecl();
+DSAStackTy::DSAVarData DSAStackTy::getTopDSA(ValueDecl *D, bool FromParent) {
+  D = getCanonicalDecl(D);
   DSAVarData DVar;
 
   // OpenMP [2.9.1.1, Data-sharing Attribute Rules for Variables Referenced
   // in a Construct, C/C++, predetermined, p.1]
   //  Variables appearing in threadprivate directives are threadprivate.
-  if ((D->getTLSKind() != VarDecl::TLS_None &&
-       !(D->hasAttr<OMPThreadPrivateDeclAttr>() &&
+  auto *VD = dyn_cast<VarDecl>(D);
+  if ((VD && VD->getTLSKind() != VarDecl::TLS_None &&
+       !(VD->hasAttr<OMPThreadPrivateDeclAttr>() &&
          SemaRef.getLangOpts().OpenMPUseTLS &&
          SemaRef.getASTContext().getTargetInfo().isTLSSupported())) ||
-      (D->getStorageClass() == SC_Register && D->hasAttr<AsmLabelAttr>() &&
-       !D->isLocalVarDecl())) {
-    addDSA(D, buildDeclRefExpr(SemaRef, D, D->getType().getNonReferenceType(),
+      (VD && VD->getStorageClass() == SC_Register &&
+       VD->hasAttr<AsmLabelAttr>() && !VD->isLocalVarDecl())) {
+    addDSA(D, buildDeclRefExpr(SemaRef, VD, D->getType().getNonReferenceType(),
                                D->getLocation()),
            OMPC_threadprivate);
   }
   if (Stack[0].SharingMap.count(D)) {
-    DVar.RefExpr = Stack[0].SharingMap[D].RefExpr;
+    DVar.RefExpr = Stack[0].SharingMap[D].RefExpr.getPointer();
     DVar.CKind = OMPC_threadprivate;
     return DVar;
   }
 
+  if (Stack.size() == 1) {
+    // Not in OpenMP execution region and top scope was already checked.
+    return DVar;
+  }
+
   // OpenMP [2.9.1.1, Data-sharing Attribute Rules for Variables Referenced
   // in a Construct, C/C++, predetermined, p.4]
   //  Static data members are shared.
@@ -611,9 +677,9 @@ DSAStackTy::DSAVarData DSAStackTy::getTopDSA(VarDecl *D, bool FromParent) {
   // in a Construct, C/C++, predetermined, p.7]
   //  Variables with static storage duration that are declared in a scope
   //  inside the construct are shared.
-  if (D->isStaticDataMember()) {
-    DSAVarData DVarTemp =
-        hasDSA(D, isOpenMPPrivate, MatchesAlways(), FromParent);
+  auto &&MatchesAlways = [](OpenMPDirectiveKind) -> bool { return true; };
+  if (VD && VD->isStaticDataMember()) {
+    DSAVarData DVarTemp = hasDSA(D, isOpenMPPrivate, MatchesAlways, FromParent);
     if (DVarTemp.CKind != OMPC_unknown && DVarTemp.RefExpr)
       return DVar;
 
@@ -638,8 +704,9 @@ DSAStackTy::DSAVarData DSAStackTy::getTopDSA(VarDecl *D, bool FromParent) {
         RD->hasMutableFields())) {
     // Variables with const-qualified type having no mutable member may be
     // listed in a firstprivate clause, even if they are static data members.
-    DSAVarData DVarTemp = hasDSA(D, MatchesAnyClause(OMPC_firstprivate),
-                                 MatchesAlways(), FromParent);
+    DSAVarData DVarTemp = hasDSA(
+        D, [](OpenMPClauseKind C) -> bool { return C == OMPC_firstprivate; },
+        MatchesAlways, FromParent);
     if (DVarTemp.CKind == OMPC_firstprivate && DVarTemp.RefExpr)
       return DVar;
 
@@ -656,7 +723,8 @@ DSAStackTy::DSAVarData DSAStackTy::getTopDSA(VarDecl *D, bool FromParent) {
   }
   auto I = std::prev(StartI);
   if (I->SharingMap.count(D)) {
-    DVar.RefExpr = I->SharingMap[D].RefExpr;
+    DVar.RefExpr = I->SharingMap[D].RefExpr.getPointer();
+    DVar.PrivateCopy = I->SharingMap[D].PrivateCopy;
     DVar.CKind = I->SharingMap[D].Attributes;
     DVar.ImplicitDSALoc = I->DefaultAttrLoc;
   }
@@ -664,8 +732,9 @@ DSAStackTy::DSAVarData DSAStackTy::getTopDSA(VarDecl *D, bool FromParent) {
   return DVar;
 }
 
-DSAStackTy::DSAVarData DSAStackTy::getImplicitDSA(VarDecl *D, bool FromParent) {
-  D = D->getCanonicalDecl();
+DSAStackTy::DSAVarData DSAStackTy::getImplicitDSA(ValueDecl *D,
+                                                  bool FromParent) {
+  D = getCanonicalDecl(D);
   auto StartI = Stack.rbegin();
   auto EndI = std::prev(Stack.rend());
   if (FromParent && StartI != EndI) {
@@ -674,13 +743,14 @@ DSAStackTy::DSAVarData DSAStackTy::getImplicitDSA(VarDecl *D, bool FromParent) {
   return getDSA(StartI, D);
 }
 
-template <class ClausesPredicate, class DirectivesPredicate>
-DSAStackTy::DSAVarData DSAStackTy::hasDSA(VarDecl *D, ClausesPredicate CPred,
-                                          DirectivesPredicate DPred,
-                                          bool FromParent) {
-  D = D->getCanonicalDecl();
+DSAStackTy::DSAVarData
+DSAStackTy::hasDSA(ValueDecl *D,
+                   const llvm::function_ref<bool(OpenMPClauseKind)> &CPred,
+                   const llvm::function_ref<bool(OpenMPDirectiveKind)> &DPred,
+                   bool FromParent) {
+  D = getCanonicalDecl(D);
   auto StartI = std::next(Stack.rbegin());
-  auto EndI = std::prev(Stack.rend());
+  auto EndI = Stack.rend();
   if (FromParent && StartI != EndI) {
     StartI = std::next(StartI);
   }
@@ -694,13 +764,13 @@ DSAStackTy::DSAVarData DSAStackTy::hasDSA(VarDecl *D, ClausesPredicate CPred,
   return DSAVarData();
 }
 
-template <class ClausesPredicate, class DirectivesPredicate>
-DSAStackTy::DSAVarData
-DSAStackTy::hasInnermostDSA(VarDecl *D, ClausesPredicate CPred,
-                            DirectivesPredicate DPred, bool FromParent) {
-  D = D->getCanonicalDecl();
+DSAStackTy::DSAVarData DSAStackTy::hasInnermostDSA(
+    ValueDecl *D, const llvm::function_ref<bool(OpenMPClauseKind)> &CPred,
+    const llvm::function_ref<bool(OpenMPDirectiveKind)> &DPred,
+    bool FromParent) {
+  D = getCanonicalDecl(D);
   auto StartI = std::next(Stack.rbegin());
-  auto EndI = std::prev(Stack.rend());
+  auto EndI = Stack.rend();
   if (FromParent && StartI != EndI) {
     StartI = std::next(StartI);
   }
@@ -716,37 +786,41 @@ DSAStackTy::hasInnermostDSA(VarDecl *D, ClausesPredicate CPred,
 }
 
 bool DSAStackTy::hasExplicitDSA(
-    VarDecl *D, const llvm::function_ref<bool(OpenMPClauseKind)> &CPred,
-    unsigned Level) {
+    ValueDecl *D, const llvm::function_ref<bool(OpenMPClauseKind)> &CPred,
+    unsigned Level, bool NotLastprivate) {
   if (CPred(ClauseKindMode))
     return true;
-  if (isClauseParsingMode())
-    ++Level;
-  D = D->getCanonicalDecl();
-  auto StartI = Stack.rbegin();
-  auto EndI = std::prev(Stack.rend());
+  D = getCanonicalDecl(D);
+  auto StartI = std::next(Stack.begin());
+  auto EndI = Stack.end();
   if (std::distance(StartI, EndI) <= (int)Level)
     return false;
   std::advance(StartI, Level);
-  return (StartI->SharingMap.count(D) > 0) && StartI->SharingMap[D].RefExpr &&
-         CPred(StartI->SharingMap[D].Attributes);
+  return (StartI->SharingMap.count(D) > 0) &&
+         StartI->SharingMap[D].RefExpr.getPointer() &&
+         CPred(StartI->SharingMap[D].Attributes) &&
+         (!NotLastprivate || !StartI->SharingMap[D].RefExpr.getInt());
 }
 
 bool DSAStackTy::hasExplicitDirective(
     const llvm::function_ref<bool(OpenMPDirectiveKind)> &DPred,
     unsigned Level) {
-  if (isClauseParsingMode())
-    ++Level;
-  auto StartI = Stack.rbegin();
-  auto EndI = std::prev(Stack.rend());
+  auto StartI = std::next(Stack.begin());
+  auto EndI = Stack.end();
   if (std::distance(StartI, EndI) <= (int)Level)
     return false;
   std::advance(StartI, Level);
   return DPred(StartI->Directive);
 }
 
-template <class NamedDirectivesPredicate>
-bool DSAStackTy::hasDirective(NamedDirectivesPredicate DPred, bool FromParent) {
+bool DSAStackTy::hasDirective(
+    const llvm::function_ref<bool(OpenMPDirectiveKind,
+                                  const DeclarationNameInfo &, SourceLocation)>
+        &DPred,
+    bool FromParent) {
+  // We look only in the enclosing region.
+  if (Stack.size() < 2)
+    return false;
   auto StartI = std::next(Stack.rbegin());
   auto EndI = std::prev(Stack.rend());
   if (FromParent && StartI != EndI) {
@@ -759,31 +833,22 @@ bool DSAStackTy::hasDirective(NamedDirectivesPredicate DPred, bool FromParent) {
   return false;
 }
 
-OpenMPDirectiveKind DSAStackTy::getDirectiveForScope(const Scope *S) const {
-  for (auto I = Stack.rbegin(), EE = Stack.rend(); I != EE; ++I)
-    if (I->CurScope == S)
-      return I->Directive;
-  return OMPD_unknown;
-}
-
 void Sema::InitDataSharingAttributesStack() {
   VarDataSharingAttributesStack = new DSAStackTy(*this);
 }
 
 #define DSAStack static_cast<DSAStackTy *>(VarDataSharingAttributesStack)
 
-bool Sema::IsOpenMPCapturedByRef(VarDecl *VD,
-                                 const CapturedRegionScopeInfo *RSI) {
+bool Sema::IsOpenMPCapturedByRef(ValueDecl *D, unsigned Level) {
   assert(LangOpts.OpenMP && "OpenMP is not allowed");
 
   auto &Ctx = getASTContext();
   bool IsByRef = true;
 
   // Find the directive that is associated with the provided scope.
-  auto DKind = DSAStack->getDirectiveForScope(RSI->TheScope);
-  auto Ty = VD->getType();
+  auto Ty = D->getType();
 
-  if (isOpenMPTargetDirective(DKind)) {
+  if (DSAStack->hasExplicitDirective(isOpenMPTargetExecutionDirective, Level)) {
     // This table summarizes how a given variable should be passed to the device
     // given its type and the clauses where it appears. This table is based on
     // the description in OpenMP 4.5 [2.10.4, target Construct] and
@@ -838,31 +903,83 @@ bool Sema::IsOpenMPCapturedByRef(VarDecl *VD,
     //    array section, the runtime library may pass the NULL value to the
     //    device instead of the value passed to it by the compiler.
 
-    // FIXME: Right now, only implicit maps are implemented. Properly mapping
-    // values requires having the map, private, and firstprivate clauses SEMA
-    // and parsing in place, which we don't yet.
 
     if (Ty->isReferenceType())
       Ty = Ty->castAs<ReferenceType>()->getPointeeType();
-    IsByRef = !Ty->isScalarType();
+
+    // Locate map clauses and see if the variable being captured is referred to
+    // in any of those clauses. Here we only care about variables, not fields,
+    // because fields are part of aggregates.
+    bool IsVariableUsedInMapClause = false;
+    bool IsVariableAssociatedWithSection = false;
+
+    DSAStack->checkMappableExprComponentListsForDecl(
+        D, /*CurrentRegionOnly=*/true,
+        [&](OMPClauseMappableExprCommon::MappableExprComponentListRef
+                MapExprComponents) {
+
+          auto EI = MapExprComponents.rbegin();
+          auto EE = MapExprComponents.rend();
+
+          assert(EI != EE && "Invalid map expression!");
+
+          if (isa<DeclRefExpr>(EI->getAssociatedExpression()))
+            IsVariableUsedInMapClause |= EI->getAssociatedDeclaration() == D;
+
+          ++EI;
+          if (EI == EE)
+            return false;
+
+          if (isa<ArraySubscriptExpr>(EI->getAssociatedExpression()) ||
+              isa<OMPArraySectionExpr>(EI->getAssociatedExpression()) ||
+              isa<MemberExpr>(EI->getAssociatedExpression())) {
+            IsVariableAssociatedWithSection = true;
+            // There is nothing more we need to know about this variable.
+            return true;
+          }
+
+          // Keep looking for more map info.
+          return false;
+        });
+
+    if (IsVariableUsedInMapClause) {
+      // If variable is identified in a map clause it is always captured by
+      // reference except if it is a pointer that is dereferenced somehow.
+      IsByRef = !(Ty->isPointerType() && IsVariableAssociatedWithSection);
+    } else {
+      // By default, all the data that has a scalar type is mapped by copy.
+      IsByRef = !Ty->isScalarType();
+    }
   }
 
-  // When passing data by value, we need to make sure it fits the uintptr size
+  if (IsByRef && Ty.getNonReferenceType()->isScalarType()) {
+    IsByRef = !DSAStack->hasExplicitDSA(
+        D, [](OpenMPClauseKind K) -> bool { return K == OMPC_firstprivate; },
+        Level, /*NotLastprivate=*/true);
+  }
+
+  // When passing data by copy, we need to make sure it fits the uintptr size
   // and alignment, because the runtime library only deals with uintptr types.
   // If it does not fit the uintptr size, we need to pass the data by reference
   // instead.
   if (!IsByRef &&
       (Ctx.getTypeSizeInChars(Ty) >
            Ctx.getTypeSizeInChars(Ctx.getUIntPtrType()) ||
-       Ctx.getDeclAlign(VD) > Ctx.getTypeAlignInChars(Ctx.getUIntPtrType())))
+       Ctx.getDeclAlign(D) > Ctx.getTypeAlignInChars(Ctx.getUIntPtrType()))) {
     IsByRef = true;
+  }
 
   return IsByRef;
 }
 
-bool Sema::IsOpenMPCapturedVar(VarDecl *VD) {
+unsigned Sema::getOpenMPNestingLevel() const {
+  assert(getLangOpts().OpenMP);
+  return DSAStack->getNestingLevel();
+}
+
+VarDecl *Sema::IsOpenMPCapturedDecl(ValueDecl *D) {
   assert(LangOpts.OpenMP && "OpenMP is not allowed");
-  VD = VD->getCanonicalDecl();
+  D = getCanonicalDecl(D);
 
   // If we are attempting to capture a global variable in a directive with
   // 'target' we return true so that this global is also mapped to the device.
@@ -871,52 +988,55 @@ bool Sema::IsOpenMPCapturedVar(VarDecl *VD) {
   // then it should not be captured. Therefore, an extra check has to be
   // inserted here once support for 'declare target' is added.
   //
-  if (!VD->hasLocalStorage()) {
+  auto *VD = dyn_cast<VarDecl>(D);
+  if (VD && !VD->hasLocalStorage()) {
     if (DSAStack->getCurrentDirective() == OMPD_target &&
-        !DSAStack->isClauseParsingMode()) {
-      return true;
-    }
-    if (DSAStack->getCurScope() &&
-        DSAStack->hasDirective(
-            [](OpenMPDirectiveKind K, const DeclarationNameInfo &DNI,
-               SourceLocation Loc) -> bool {
-              return isOpenMPTargetDirective(K);
+        !DSAStack->isClauseParsingMode())
+      return VD;
+    if (DSAStack->hasDirective(
+            [](OpenMPDirectiveKind K, const DeclarationNameInfo &,
+               SourceLocation) -> bool {
+              return isOpenMPTargetExecutionDirective(K);
             },
-            false)) {
-      return true;
-    }
+            false))
+      return VD;
   }
 
   if (DSAStack->getCurrentDirective() != OMPD_unknown &&
       (!DSAStack->isClauseParsingMode() ||
        DSAStack->getParentDirective() != OMPD_unknown)) {
-    if (DSAStack->isLoopControlVariable(VD) ||
-        (VD->hasLocalStorage() &&
+    auto &&Info = DSAStack->isLoopControlVariable(D);
+    if (Info.first ||
+        (VD && VD->hasLocalStorage() &&
          isParallelOrTaskRegion(DSAStack->getCurrentDirective())) ||
-        DSAStack->isForceVarCapturing())
-      return true;
-    auto DVarPrivate = DSAStack->getTopDSA(VD, DSAStack->isClauseParsingMode());
+        (VD && DSAStack->isForceVarCapturing()))
+      return VD ? VD : Info.second;
+    auto DVarPrivate = DSAStack->getTopDSA(D, DSAStack->isClauseParsingMode());
     if (DVarPrivate.CKind != OMPC_unknown && isOpenMPPrivate(DVarPrivate.CKind))
-      return true;
-    DVarPrivate = DSAStack->hasDSA(VD, isOpenMPPrivate, MatchesAlways(),
-                                   DSAStack->isClauseParsingMode());
-    return DVarPrivate.CKind != OMPC_unknown;
+      return VD ? VD : cast<VarDecl>(DVarPrivate.PrivateCopy->getDecl());
+    DVarPrivate = DSAStack->hasDSA(
+        D, isOpenMPPrivate, [](OpenMPDirectiveKind) -> bool { return true; },
+        DSAStack->isClauseParsingMode());
+    if (DVarPrivate.CKind != OMPC_unknown)
+      return VD ? VD : cast<VarDecl>(DVarPrivate.PrivateCopy->getDecl());
   }
-  return false;
+  return nullptr;
 }
 
-bool Sema::isOpenMPPrivateVar(VarDecl *VD, unsigned Level) {
+bool Sema::isOpenMPPrivateDecl(ValueDecl *D, unsigned Level) {
   assert(LangOpts.OpenMP && "OpenMP is not allowed");
   return DSAStack->hasExplicitDSA(
-      VD, [](OpenMPClauseKind K) -> bool { return K == OMPC_private; }, Level);
+      D, [](OpenMPClauseKind K) -> bool { return K == OMPC_private; }, Level);
 }
 
-bool Sema::isOpenMPTargetCapturedVar(VarDecl *VD, unsigned Level) {
+bool Sema::isOpenMPTargetCapturedDecl(ValueDecl *D, unsigned Level) {
   assert(LangOpts.OpenMP && "OpenMP is not allowed");
   // Return true if the current level is no longer enclosed in a target region.
 
-  return !VD->hasLocalStorage() &&
-         DSAStack->hasExplicitDirective(isOpenMPTargetDirective, Level);
+  auto *VD = dyn_cast<VarDecl>(D);
+  return VD && !VD->hasLocalStorage() &&
+         DSAStack->hasExplicitDirective(isOpenMPTargetExecutionDirective,
+                                        Level);
 }
 
 void Sema::DestroyDataSharingAttributesStack() { delete DSAStack; }
@@ -951,7 +1071,8 @@ void Sema::EndOpenMPDSABlock(Stmt *CurDirective) {
             PrivateCopies.push_back(nullptr);
             continue;
           }
-          auto *VD = cast<VarDecl>(cast<DeclRefExpr>(DE)->getDecl());
+          auto *DRE = cast<DeclRefExpr>(DE->IgnoreParens());
+          VarDecl *VD = cast<VarDecl>(DRE->getDecl());
           QualType Type = VD->getType().getNonReferenceType();
           auto DVar = DSAStack->getTopDSA(VD, false);
           if (DVar.CKind == OMPC_lastprivate) {
@@ -975,9 +1096,8 @@ void Sema::EndOpenMPDSABlock(Stmt *CurDirective) {
           }
         }
         // Set initializers to private copies if no errors were found.
-        if (PrivateCopies.size() == Clause->varlist_size()) {
+        if (PrivateCopies.size() == Clause->varlist_size())
           Clause->setPrivateCopies(PrivateCopies);
-        }
       }
     }
   }
@@ -989,7 +1109,7 @@ void Sema::EndOpenMPDSABlock(Stmt *CurDirective) {
 
 static bool FinishOpenMPLinearClause(OMPLinearClause &Clause, DeclRefExpr *IV,
                                      Expr *NumIterations, Sema &SemaRef,
-                                     Scope *S);
+                                     Scope *S, DSAStackTy *Stack);
 
 namespace {
 
@@ -1009,6 +1129,23 @@ public:
     return false;
   }
 };
+
+class VarOrFuncDeclFilterCCC : public CorrectionCandidateCallback {
+private:
+  Sema &SemaRef;
+
+public:
+  explicit VarOrFuncDeclFilterCCC(Sema &S) : SemaRef(S) {}
+  bool ValidateCandidate(const TypoCorrection &Candidate) override {
+    NamedDecl *ND = Candidate.getCorrectionDecl();
+    if (isa<VarDecl>(ND) || isa<FunctionDecl>(ND)) {
+      return SemaRef.isDeclInScope(ND, SemaRef.getCurLexicalContext(),
+                                   SemaRef.getCurScope());
+    }
+    return false;
+  }
+};
+
 } // namespace
 
 ExprResult Sema::ActOnOpenMPIdExpression(Scope *CurScope,
@@ -1131,8 +1268,10 @@ ExprResult Sema::ActOnOpenMPIdExpression(Scope *CurScope,
   }
 
   QualType ExprType = VD->getType().getNonReferenceType();
-  ExprResult DE = buildDeclRefExpr(*this, VD, ExprType, Id.getLoc());
-  return DE;
+  return DeclRefExpr::Create(Context, NestedNameSpecifierLoc(),
+                             SourceLocation(), VD,
+                             /*RefersToEnclosingVariableOrCapture=*/false,
+                             Id.getLoc(), ExprType, VK_LValue);
 }
 
 Sema::DeclGroupPtrTy
@@ -1142,7 +1281,7 @@ Sema::ActOnOpenMPThreadprivateDirective(SourceLocation Loc,
     CurContext->addDecl(D);
     return DeclGroupPtrTy::make(DeclGroupRef(D));
   }
-  return DeclGroupPtrTy();
+  return nullptr;
 }
 
 namespace {
@@ -1182,6 +1321,10 @@ Sema::CheckOMPThreadPrivateDecl(SourceLocation Loc, ArrayRef<Expr *> VarList) {
     VarDecl *VD = cast<VarDecl>(DE->getDecl());
     SourceLocation ILoc = DE->getExprLoc();
 
+    // Mark variable as used.
+    VD->setReferenced();
+    VD->markUsed(Context);
+
     QualType QType = VD->getType();
     if (QType->isDependentType() || QType->isInstantiationDependentType()) {
       // It will be analyzed later.
@@ -1252,7 +1395,7 @@ Sema::CheckOMPThreadPrivateDecl(SourceLocation Loc, ArrayRef<Expr *> VarList) {
 }
 
 static void ReportOriginalDSA(Sema &SemaRef, DSAStackTy *Stack,
-                              const VarDecl *VD, DSAStackTy::DSAVarData DVar,
+                              const ValueDecl *D, DSAStackTy::DSAVarData DVar,
                               bool IsLoopIterVar = false) {
   if (DVar.RefExpr) {
     SemaRef.Diag(DVar.RefExpr->getExprLoc(), diag::note_omp_explicit_dsa)
@@ -1272,7 +1415,8 @@ static void ReportOriginalDSA(Sema &SemaRef, DSAStackTy *Stack,
     PDSA_Implicit
   } Reason = PDSA_Implicit;
   bool ReportHint = false;
-  auto ReportLoc = VD->getLocation();
+  auto ReportLoc = D->getLocation();
+  auto *VD = dyn_cast<VarDecl>(D);
   if (IsLoopIterVar) {
     if (DVar.CKind == OMPC_private)
       Reason = PDSA_LoopIterVarPrivate;
@@ -1280,18 +1424,19 @@ static void ReportOriginalDSA(Sema &SemaRef, DSAStackTy *Stack,
       Reason = PDSA_LoopIterVarLastprivate;
     else
       Reason = PDSA_LoopIterVarLinear;
-  } else if (DVar.DKind == OMPD_task && DVar.CKind == OMPC_firstprivate) {
+  } else if (isOpenMPTaskingDirective(DVar.DKind) &&
+             DVar.CKind == OMPC_firstprivate) {
     Reason = PDSA_TaskVarFirstprivate;
     ReportLoc = DVar.ImplicitDSALoc;
-  } else if (VD->isStaticLocal())
+  } else if (VD && VD->isStaticLocal())
     Reason = PDSA_StaticLocalVarShared;
-  else if (VD->isStaticDataMember())
+  else if (VD && VD->isStaticDataMember())
     Reason = PDSA_StaticMemberShared;
-  else if (VD->isFileVarDecl())
+  else if (VD && VD->isFileVarDecl())
     Reason = PDSA_GlobalVarShared;
-  else if (VD->getType().isConstant(SemaRef.getASTContext()))
+  else if (D->getType().isConstant(SemaRef.getASTContext()))
     Reason = PDSA_ConstVarShared;
-  else if (VD->isLocalVarDecl() && DVar.CKind == OMPC_private) {
+  else if (VD && VD->isLocalVarDecl() && DVar.CKind == OMPC_private) {
     ReportHint = true;
     Reason = PDSA_LocalVarPrivate;
   }
@@ -1312,10 +1457,13 @@ class DSAAttrChecker : public StmtVisitor<DSAAttrChecker, void> {
   bool ErrorFound;
   CapturedStmt *CS;
   llvm::SmallVector<Expr *, 8> ImplicitFirstprivate;
-  llvm::DenseMap<VarDecl *, Expr *> VarsWithInheritedDSA;
+  llvm::DenseMap<ValueDecl *, Expr *> VarsWithInheritedDSA;
 
 public:
   void VisitDeclRefExpr(DeclRefExpr *E) {
+    if (E->isTypeDependent() || E->isValueDependent() ||
+        E->containsUnexpandedParameterPack() || E->isInstantiationDependent())
+      return;
     if (auto *VD = dyn_cast<VarDecl>(E->getDecl())) {
       // Skip internally declared variables.
       if (VD->isLocalVarDecl() && !CS->capturesVariable(VD))
@@ -1342,14 +1490,14 @@ public:
       //  A list item that appears in a reduction clause of the innermost
       //  enclosing worksharing or parallel construct may not be accessed in an
       //  explicit task.
-      DVar = Stack->hasInnermostDSA(VD, MatchesAnyClause(OMPC_reduction),
-                                    [](OpenMPDirectiveKind K) -> bool {
-                                      return isOpenMPParallelDirective(K) ||
-                                             isOpenMPWorksharingDirective(K) ||
-                                             isOpenMPTeamsDirective(K);
-                                    },
-                                    false);
-      if (DKind == OMPD_task && DVar.CKind == OMPC_reduction) {
+      DVar = Stack->hasInnermostDSA(
+          VD, [](OpenMPClauseKind C) -> bool { return C == OMPC_reduction; },
+          [](OpenMPDirectiveKind K) -> bool {
+            return isOpenMPParallelDirective(K) ||
+                   isOpenMPWorksharingDirective(K) || isOpenMPTeamsDirective(K);
+          },
+          false);
+      if (isOpenMPTaskingDirective(DKind) && DVar.CKind == OMPC_reduction) {
         ErrorFound = true;
         SemaRef.Diag(ELoc, diag::err_omp_reduction_in_task);
         ReportOriginalDSA(SemaRef, Stack, VD, DVar);
@@ -1358,10 +1506,52 @@ public:
 
       // Define implicit data-sharing attributes for task.
       DVar = Stack->getImplicitDSA(VD, false);
-      if (DKind == OMPD_task && DVar.CKind != OMPC_shared)
+      if (isOpenMPTaskingDirective(DKind) && DVar.CKind != OMPC_shared &&
+          !Stack->isLoopControlVariable(VD).first)
         ImplicitFirstprivate.push_back(E);
     }
   }
+  void VisitMemberExpr(MemberExpr *E) {
+    if (E->isTypeDependent() || E->isValueDependent() ||
+        E->containsUnexpandedParameterPack() || E->isInstantiationDependent())
+      return;
+    if (isa<CXXThisExpr>(E->getBase()->IgnoreParens())) {
+      if (auto *FD = dyn_cast<FieldDecl>(E->getMemberDecl())) {
+        auto DVar = Stack->getTopDSA(FD, false);
+        // Check if the variable has explicit DSA set and stop analysis if it
+        // so.
+        if (DVar.RefExpr)
+          return;
+
+        auto ELoc = E->getExprLoc();
+        auto DKind = Stack->getCurrentDirective();
+        // OpenMP [2.9.3.6, Restrictions, p.2]
+        //  A list item that appears in a reduction clause of the innermost
+        //  enclosing worksharing or parallel construct may not be accessed in
+        //  an  explicit task.
+        DVar = Stack->hasInnermostDSA(
+            FD, [](OpenMPClauseKind C) -> bool { return C == OMPC_reduction; },
+            [](OpenMPDirectiveKind K) -> bool {
+              return isOpenMPParallelDirective(K) ||
+                     isOpenMPWorksharingDirective(K) ||
+                     isOpenMPTeamsDirective(K);
+            },
+            false);
+        if (isOpenMPTaskingDirective(DKind) && DVar.CKind == OMPC_reduction) {
+          ErrorFound = true;
+          SemaRef.Diag(ELoc, diag::err_omp_reduction_in_task);
+          ReportOriginalDSA(SemaRef, Stack, FD, DVar);
+          return;
+        }
+
+        // Define implicit data-sharing attributes for task.
+        DVar = Stack->getImplicitDSA(FD, false);
+        if (isOpenMPTaskingDirective(DKind) && DVar.CKind != OMPC_shared &&
+            !Stack->isLoopControlVariable(FD).first)
+          ImplicitFirstprivate.push_back(E);
+      }
+    }
+  }
   void VisitOMPExecutableDirective(OMPExecutableDirective *S) {
     for (auto *C : S->clauses()) {
       // Skip analysis of arguments of implicitly defined firstprivate clause
@@ -1382,7 +1572,7 @@ public:
 
   bool isErrorFound() { return ErrorFound; }
   ArrayRef<Expr *> getImplicitFirstprivate() { return ImplicitFirstprivate; }
-  llvm::DenseMap<VarDecl *, Expr *> &getVarsWithInheritedDSA() {
+  llvm::DenseMap<ValueDecl *, Expr *> &getVarsWithInheritedDSA() {
     return VarsWithInheritedDSA;
   }
 
@@ -1393,84 +1583,11 @@ public:
 
 void Sema::ActOnOpenMPRegionStart(OpenMPDirectiveKind DKind, Scope *CurScope) {
   switch (DKind) {
-  case OMPD_parallel: {
-    QualType KmpInt32Ty = Context.getIntTypeForBitwidth(32, 1);
-    QualType KmpInt32PtrTy =
-        Context.getPointerType(KmpInt32Ty).withConst().withRestrict();
-    Sema::CapturedParamNameType Params[] = {
-        std::make_pair(".global_tid.", KmpInt32PtrTy),
-        std::make_pair(".bound_tid.", KmpInt32PtrTy),
-        std::make_pair(StringRef(), QualType()) // __context with shared vars
-    };
-    ActOnCapturedRegionStart(DSAStack->getConstructLoc(), CurScope, CR_OpenMP,
-                             Params);
-    break;
-  }
-  case OMPD_simd: {
-    Sema::CapturedParamNameType Params[] = {
-        std::make_pair(StringRef(), QualType()) // __context with shared vars
-    };
-    ActOnCapturedRegionStart(DSAStack->getConstructLoc(), CurScope, CR_OpenMP,
-                             Params);
-    break;
-  }
-  case OMPD_for: {
-    Sema::CapturedParamNameType Params[] = {
-        std::make_pair(StringRef(), QualType()) // __context with shared vars
-    };
-    ActOnCapturedRegionStart(DSAStack->getConstructLoc(), CurScope, CR_OpenMP,
-                             Params);
-    break;
-  }
-  case OMPD_for_simd: {
-    Sema::CapturedParamNameType Params[] = {
-        std::make_pair(StringRef(), QualType()) // __context with shared vars
-    };
-    ActOnCapturedRegionStart(DSAStack->getConstructLoc(), CurScope, CR_OpenMP,
-                             Params);
-    break;
-  }
-  case OMPD_sections: {
-    Sema::CapturedParamNameType Params[] = {
-        std::make_pair(StringRef(), QualType()) // __context with shared vars
-    };
-    ActOnCapturedRegionStart(DSAStack->getConstructLoc(), CurScope, CR_OpenMP,
-                             Params);
-    break;
-  }
-  case OMPD_section: {
-    Sema::CapturedParamNameType Params[] = {
-        std::make_pair(StringRef(), QualType()) // __context with shared vars
-    };
-    ActOnCapturedRegionStart(DSAStack->getConstructLoc(), CurScope, CR_OpenMP,
-                             Params);
-    break;
-  }
-  case OMPD_single: {
-    Sema::CapturedParamNameType Params[] = {
-        std::make_pair(StringRef(), QualType()) // __context with shared vars
-    };
-    ActOnCapturedRegionStart(DSAStack->getConstructLoc(), CurScope, CR_OpenMP,
-                             Params);
-    break;
-  }
-  case OMPD_master: {
-    Sema::CapturedParamNameType Params[] = {
-        std::make_pair(StringRef(), QualType()) // __context with shared vars
-    };
-    ActOnCapturedRegionStart(DSAStack->getConstructLoc(), CurScope, CR_OpenMP,
-                             Params);
-    break;
-  }
-  case OMPD_critical: {
-    Sema::CapturedParamNameType Params[] = {
-        std::make_pair(StringRef(), QualType()) // __context with shared vars
-    };
-    ActOnCapturedRegionStart(DSAStack->getConstructLoc(), CurScope, CR_OpenMP,
-                             Params);
-    break;
-  }
-  case OMPD_parallel_for: {
+  case OMPD_parallel:
+  case OMPD_parallel_for:
+  case OMPD_parallel_for_simd:
+  case OMPD_parallel_sections:
+  case OMPD_teams: {
     QualType KmpInt32Ty = Context.getIntTypeForBitwidth(32, 1);
     QualType KmpInt32PtrTy =
         Context.getPointerType(KmpInt32Ty).withConst().withRestrict();
@@ -1483,46 +1600,78 @@ void Sema::ActOnOpenMPRegionStart(OpenMPDirectiveKind DKind, Scope *CurScope) {
                              Params);
     break;
   }
-  case OMPD_parallel_for_simd: {
-    QualType KmpInt32Ty = Context.getIntTypeForBitwidth(32, 1);
-    QualType KmpInt32PtrTy =
-        Context.getPointerType(KmpInt32Ty).withConst().withRestrict();
+  case OMPD_simd:
+  case OMPD_for:
+  case OMPD_for_simd:
+  case OMPD_sections:
+  case OMPD_section:
+  case OMPD_single:
+  case OMPD_master:
+  case OMPD_critical:
+  case OMPD_taskgroup:
+  case OMPD_distribute:
+  case OMPD_ordered:
+  case OMPD_atomic:
+  case OMPD_target_data:
+  case OMPD_target:
+  case OMPD_target_parallel:
+  case OMPD_target_parallel_for:
+  case OMPD_target_parallel_for_simd: {
     Sema::CapturedParamNameType Params[] = {
-        std::make_pair(".global_tid.", KmpInt32PtrTy),
-        std::make_pair(".bound_tid.", KmpInt32PtrTy),
         std::make_pair(StringRef(), QualType()) // __context with shared vars
     };
     ActOnCapturedRegionStart(DSAStack->getConstructLoc(), CurScope, CR_OpenMP,
                              Params);
     break;
   }
-  case OMPD_parallel_sections: {
+  case OMPD_task: {
     QualType KmpInt32Ty = Context.getIntTypeForBitwidth(32, 1);
-    QualType KmpInt32PtrTy =
-        Context.getPointerType(KmpInt32Ty).withConst().withRestrict();
+    QualType Args[] = {Context.VoidPtrTy.withConst().withRestrict()};
+    FunctionProtoType::ExtProtoInfo EPI;
+    EPI.Variadic = true;
+    QualType CopyFnType = Context.getFunctionType(Context.VoidTy, Args, EPI);
     Sema::CapturedParamNameType Params[] = {
-        std::make_pair(".global_tid.", KmpInt32PtrTy),
-        std::make_pair(".bound_tid.", KmpInt32PtrTy),
+        std::make_pair(".global_tid.", KmpInt32Ty),
+        std::make_pair(".part_id.", Context.getPointerType(KmpInt32Ty)),
+        std::make_pair(".privates.", Context.VoidPtrTy.withConst()),
+        std::make_pair(".copy_fn.",
+                       Context.getPointerType(CopyFnType).withConst()),
+        std::make_pair(".task_t.", Context.VoidPtrTy.withConst()),
         std::make_pair(StringRef(), QualType()) // __context with shared vars
     };
     ActOnCapturedRegionStart(DSAStack->getConstructLoc(), CurScope, CR_OpenMP,
                              Params);
+    // Mark this captured region as inlined, because we don't use outlined
+    // function directly.
+    getCurCapturedRegion()->TheCapturedDecl->addAttr(
+        AlwaysInlineAttr::CreateImplicit(
+            Context, AlwaysInlineAttr::Keyword_forceinline, SourceRange()));
     break;
   }
-  case OMPD_task: {
-    QualType KmpInt32Ty = Context.getIntTypeForBitwidth(32, 1);
+  case OMPD_taskloop:
+  case OMPD_taskloop_simd: {
+    QualType KmpInt32Ty =
+        Context.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
+    QualType KmpUInt64Ty =
+        Context.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
+    QualType KmpInt64Ty =
+        Context.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
     QualType Args[] = {Context.VoidPtrTy.withConst().withRestrict()};
     FunctionProtoType::ExtProtoInfo EPI;
     EPI.Variadic = true;
     QualType CopyFnType = Context.getFunctionType(Context.VoidTy, Args, EPI);
     Sema::CapturedParamNameType Params[] = {
         std::make_pair(".global_tid.", KmpInt32Ty),
-        std::make_pair(".part_id.", KmpInt32Ty),
+        std::make_pair(".part_id.", Context.getPointerType(KmpInt32Ty)),
         std::make_pair(".privates.",
                        Context.VoidPtrTy.withConst().withRestrict()),
         std::make_pair(
             ".copy_fn.",
             Context.getPointerType(CopyFnType).withConst().withRestrict()),
+        std::make_pair(".task_t.", Context.VoidPtrTy.withConst()),
+        std::make_pair(".lb.", KmpUInt64Ty),
+        std::make_pair(".ub.", KmpUInt64Ty), std::make_pair(".st.", KmpInt64Ty),
+        std::make_pair(".liter.", KmpInt32Ty),
         std::make_pair(StringRef(), QualType()) // __context with shared vars
     };
     ActOnCapturedRegionStart(DSAStack->getConstructLoc(), CurScope, CR_OpenMP,
@@ -1534,70 +1683,17 @@ void Sema::ActOnOpenMPRegionStart(OpenMPDirectiveKind DKind, Scope *CurScope) {
             Context, AlwaysInlineAttr::Keyword_forceinline, SourceRange()));
     break;
   }
-  case OMPD_ordered: {
-    Sema::CapturedParamNameType Params[] = {
-        std::make_pair(StringRef(), QualType()) // __context with shared vars
-    };
-    ActOnCapturedRegionStart(DSAStack->getConstructLoc(), CurScope, CR_OpenMP,
-                             Params);
-    break;
-  }
-  case OMPD_atomic: {
-    Sema::CapturedParamNameType Params[] = {
-        std::make_pair(StringRef(), QualType()) // __context with shared vars
-    };
-    ActOnCapturedRegionStart(DSAStack->getConstructLoc(), CurScope, CR_OpenMP,
-                             Params);
-    break;
-  }
-  case OMPD_target_data:
-  case OMPD_target: {
-    Sema::CapturedParamNameType Params[] = {
-        std::make_pair(StringRef(), QualType()) // __context with shared vars
-    };
-    ActOnCapturedRegionStart(DSAStack->getConstructLoc(), CurScope, CR_OpenMP,
-                             Params);
-    break;
-  }
-  case OMPD_teams: {
+  case OMPD_distribute_parallel_for_simd:
+  case OMPD_distribute_simd:
+  case OMPD_distribute_parallel_for: {
     QualType KmpInt32Ty = Context.getIntTypeForBitwidth(32, 1);
     QualType KmpInt32PtrTy =
         Context.getPointerType(KmpInt32Ty).withConst().withRestrict();
     Sema::CapturedParamNameType Params[] = {
         std::make_pair(".global_tid.", KmpInt32PtrTy),
         std::make_pair(".bound_tid.", KmpInt32PtrTy),
-        std::make_pair(StringRef(), QualType()) // __context with shared vars
-    };
-    ActOnCapturedRegionStart(DSAStack->getConstructLoc(), CurScope, CR_OpenMP,
-                             Params);
-    break;
-  }
-  case OMPD_taskgroup: {
-    Sema::CapturedParamNameType Params[] = {
-        std::make_pair(StringRef(), QualType()) // __context with shared vars
-    };
-    ActOnCapturedRegionStart(DSAStack->getConstructLoc(), CurScope, CR_OpenMP,
-                             Params);
-    break;
-  }
-  case OMPD_taskloop: {
-    Sema::CapturedParamNameType Params[] = {
-        std::make_pair(StringRef(), QualType()) // __context with shared vars
-    };
-    ActOnCapturedRegionStart(DSAStack->getConstructLoc(), CurScope, CR_OpenMP,
-                             Params);
-    break;
-  }
-  case OMPD_taskloop_simd: {
-    Sema::CapturedParamNameType Params[] = {
-        std::make_pair(StringRef(), QualType()) // __context with shared vars
-    };
-    ActOnCapturedRegionStart(DSAStack->getConstructLoc(), CurScope, CR_OpenMP,
-                             Params);
-    break;
-  }
-  case OMPD_distribute: {
-    Sema::CapturedParamNameType Params[] = {
+        std::make_pair(".previous.lb.", Context.getSizeType()),
+        std::make_pair(".previous.ub.", Context.getSizeType()),
         std::make_pair(StringRef(), QualType()) // __context with shared vars
     };
     ActOnCapturedRegionStart(DSAStack->getConstructLoc(), CurScope, CR_OpenMP,
@@ -1611,12 +1707,78 @@ void Sema::ActOnOpenMPRegionStart(OpenMPDirectiveKind DKind, Scope *CurScope) {
   case OMPD_cancellation_point:
   case OMPD_cancel:
   case OMPD_flush:
+  case OMPD_target_enter_data:
+  case OMPD_target_exit_data:
+  case OMPD_declare_reduction:
+  case OMPD_declare_simd:
+  case OMPD_declare_target:
+  case OMPD_end_declare_target:
+  case OMPD_target_update:
     llvm_unreachable("OpenMP Directive is not allowed");
   case OMPD_unknown:
     llvm_unreachable("Unknown OpenMP directive");
   }
 }
 
+static OMPCapturedExprDecl *buildCaptureDecl(Sema &S, IdentifierInfo *Id,
+                                             Expr *CaptureExpr, bool WithInit,
+                                             bool AsExpression) {
+  assert(CaptureExpr);
+  ASTContext &C = S.getASTContext();
+  Expr *Init = AsExpression ? CaptureExpr : CaptureExpr->IgnoreImpCasts();
+  QualType Ty = Init->getType();
+  if (CaptureExpr->getObjectKind() == OK_Ordinary && CaptureExpr->isGLValue()) {
+    if (S.getLangOpts().CPlusPlus)
+      Ty = C.getLValueReferenceType(Ty);
+    else {
+      Ty = C.getPointerType(Ty);
+      ExprResult Res =
+          S.CreateBuiltinUnaryOp(CaptureExpr->getExprLoc(), UO_AddrOf, Init);
+      if (!Res.isUsable())
+        return nullptr;
+      Init = Res.get();
+    }
+    WithInit = true;
+  }
+  auto *CED = OMPCapturedExprDecl::Create(C, S.CurContext, Id, Ty);
+  if (!WithInit)
+    CED->addAttr(OMPCaptureNoInitAttr::CreateImplicit(C, SourceRange()));
+  S.CurContext->addHiddenDecl(CED);
+  S.AddInitializerToDecl(CED, Init, /*DirectInit=*/false,
+                         /*TypeMayContainAuto=*/true);
+  return CED;
+}
+
+static DeclRefExpr *buildCapture(Sema &S, ValueDecl *D, Expr *CaptureExpr,
+                                 bool WithInit) {
+  OMPCapturedExprDecl *CD;
+  if (auto *VD = S.IsOpenMPCapturedDecl(D))
+    CD = cast<OMPCapturedExprDecl>(VD);
+  else
+    CD = buildCaptureDecl(S, D->getIdentifier(), CaptureExpr, WithInit,
+                          /*AsExpression=*/false);
+  return buildDeclRefExpr(S, CD, CD->getType().getNonReferenceType(),
+                          CaptureExpr->getExprLoc());
+}
+
+static ExprResult buildCapture(Sema &S, Expr *CaptureExpr, DeclRefExpr *&Ref) {
+  if (!Ref) {
+    auto *CD =
+        buildCaptureDecl(S, &S.getASTContext().Idents.get(".capture_expr."),
+                         CaptureExpr, /*WithInit=*/true, /*AsExpression=*/true);
+    Ref = buildDeclRefExpr(S, CD, CD->getType().getNonReferenceType(),
+                           CaptureExpr->getExprLoc());
+  }
+  ExprResult Res = Ref;
+  if (!S.getLangOpts().CPlusPlus &&
+      CaptureExpr->getObjectKind() == OK_Ordinary && CaptureExpr->isGLValue() &&
+      Ref->getType()->isPointerType())
+    Res = S.CreateBuiltinUnaryOp(CaptureExpr->getExprLoc(), UO_Deref, Ref);
+  if (!Res.isUsable())
+    return ExprError();
+  return CaptureExpr->isGLValue() ? Res : S.DefaultLvalueConversion(Res.get());
+}
+
 StmtResult Sema::ActOnOpenMPRegionEnd(StmtResult S,
                                       ArrayRef<OMPClause *> Clauses) {
   if (!S.isUsable()) {
@@ -1642,14 +1804,20 @@ StmtResult Sema::ActOnOpenMPRegionEnd(StmtResult S,
         }
       }
       DSAStack->setForceVarCapturing(/*V=*/false);
-    } else if (isParallelOrTaskRegion(DSAStack->getCurrentDirective()) &&
-               Clause->getClauseKind() == OMPC_schedule) {
+    } else if (isParallelOrTaskRegion(DSAStack->getCurrentDirective())) {
       // Mark all variables in private list clauses as used in inner region.
       // Required for proper codegen of combined directives.
       // TODO: add processing for other clauses.
-      if (auto *E = cast_or_null<Expr>(
-              cast<OMPScheduleClause>(Clause)->getHelperChunkSize()))
-        MarkDeclarationsReferencedInExpr(E);
+      if (auto *C = OMPClauseWithPreInit::get(Clause)) {
+        if (auto *DS = cast_or_null<DeclStmt>(C->getPreInitStmt())) {
+          for (auto *D : DS->decls())
+            MarkVariableReferenced(D->getLocation(), cast<VarDecl>(D));
+        }
+      }
+      if (auto *C = OMPClauseWithPostUpdate::get(Clause)) {
+        if (auto *E = C->getPostUpdateExpr())
+          MarkDeclarationsReferencedInExpr(E);
+      }
     }
     if (Clause->getClauseKind() == OMPC_schedule)
       SC = cast<OMPScheduleClause>(Clause);
@@ -1725,13 +1893,25 @@ static bool CheckNestingOfRegions(Sema &SemaRef, DSAStackTy *Stack,
   // | parallel         | ordered         | +                                  |
   // | parallel         | atomic          | *                                  |
   // | parallel         | target          | *                                  |
+  // | parallel         | target parallel | *                                  |
+  // | parallel         | target parallel | *                                  |
+  // |                  | for             |                                    |
+  // | parallel         | target enter    | *                                  |
+  // |                  | data            |                                    |
+  // | parallel         | target exit     | *                                  |
+  // |                  | data            |                                    |
   // | parallel         | teams           | +                                  |
   // | parallel         | cancellation    |                                    |
   // |                  | point           | !                                  |
   // | parallel         | cancel          | !                                  |
   // | parallel         | taskloop        | *                                  |
   // | parallel         | taskloop simd   | *                                  |
-  // | parallel         | distribute      |                                    |  
+  // | parallel         | distribute      | +                                  |
+  // | parallel         | distribute      | +                                  |
+  // |                  | parallel for    |                                    |
+  // | parallel         | distribute      | +                                  |
+  // |                  |parallel for simd|                                    |
+  // | parallel         | distribute simd | +                                  |
   // +------------------+-----------------+------------------------------------+
   // | for              | parallel        | *                                  |
   // | for              | for             | +                                  |
@@ -1754,13 +1934,27 @@ static bool CheckNestingOfRegions(Sema &SemaRef, DSAStackTy *Stack,
   // | for              | ordered         | * (if construct is ordered)        |
   // | for              | atomic          | *                                  |
   // | for              | target          | *                                  |
+  // | for              | target parallel | *                                  |
+  // | for              | target parallel | *                                  |
+  // |                  | for             |                                    |
+  // | for              | target enter    | *                                  |
+  // |                  | data            |                                    |
+  // | for              | target exit     | *                                  |
+  // |                  | data            |                                    |
   // | for              | teams           | +                                  |
   // | for              | cancellation    |                                    |
   // |                  | point           | !                                  |
   // | for              | cancel          | !                                  |
   // | for              | taskloop        | *                                  |
   // | for              | taskloop simd   | *                                  |
-  // | for              | distribute      |                                    |
+  // | for              | distribute      | +                                  |
+  // | for              | distribute      | +                                  |
+  // |                  | parallel for    |                                    |
+  // | for              | distribute      | +                                  |
+  // |                  |parallel for simd|                                    |
+  // | for              | distribute simd | +                                  |
+  // | for              | target parallel | +                                  |
+  // |                  | for simd        |                                    |
   // +------------------+-----------------+------------------------------------+
   // | master           | parallel        | *                                  |
   // | master           | for             | +                                  |
@@ -1783,13 +1977,27 @@ static bool CheckNestingOfRegions(Sema &SemaRef, DSAStackTy *Stack,
   // | master           | ordered         | +                                  |
   // | master           | atomic          | *                                  |
   // | master           | target          | *                                  |
+  // | master           | target parallel | *                                  |
+  // | master           | target parallel | *                                  |
+  // |                  | for             |                                    |
+  // | master           | target enter    | *                                  |
+  // |                  | data            |                                    |
+  // | master           | target exit     | *                                  |
+  // |                  | data            |                                    |
   // | master           | teams           | +                                  |
   // | master           | cancellation    |                                    |
   // |                  | point           |                                    |
   // | master           | cancel          |                                    |
   // | master           | taskloop        | *                                  |
   // | master           | taskloop simd   | *                                  |
-  // | master           | distribute      |                                    |
+  // | master           | distribute      | +                                  |
+  // | master           | distribute      | +                                  |
+  // |                  | parallel for    |                                    |
+  // | master           | distribute      | +                                  |
+  // |                  |parallel for simd|                                    |
+  // | master           | distribute simd | +                                  |
+  // | master           | target parallel | +                                  |
+  // |                  | for simd        |                                    |
   // +------------------+-----------------+------------------------------------+
   // | critical         | parallel        | *                                  |
   // | critical         | for             | +                                  |
@@ -1811,20 +2019,34 @@ static bool CheckNestingOfRegions(Sema &SemaRef, DSAStackTy *Stack,
   // | critical         | ordered         | +                                  |
   // | critical         | atomic          | *                                  |
   // | critical         | target          | *                                  |
+  // | critical         | target parallel | *                                  |
+  // | critical         | target parallel | *                                  |
+  // |                  | for             |                                    |
+  // | critical         | target enter    | *                                  |
+  // |                  | data            |                                    |
+  // | critical         | target exit     | *                                  |
+  // |                  | data            |                                    |
   // | critical         | teams           | +                                  |
   // | critical         | cancellation    |                                    |
   // |                  | point           |                                    |
   // | critical         | cancel          |                                    |
   // | critical         | taskloop        | *                                  |
   // | critical         | taskloop simd   | *                                  |
-  // | critical         | distribute      |                                    |
+  // | critical         | distribute      | +                                  |
+  // | critical         | distribute      | +                                  |
+  // |                  | parallel for    |                                    |
+  // | critical         | distribute      | +                                  |
+  // |                  |parallel for simd|                                    |
+  // | critical         | distribute simd | +                                  |
+  // | critical         | target parallel | +                                  |
+  // |                  | for simd        |                                    |
   // +------------------+-----------------+------------------------------------+
   // | simd             | parallel        |                                    |
   // | simd             | for             |                                    |
   // | simd             | for simd        |                                    |
   // | simd             | master          |                                    |
   // | simd             | critical        |                                    |
-  // | simd             | simd            |                                    |
+  // | simd             | simd            | *                                  |
   // | simd             | sections        |                                    |
   // | simd             | section         |                                    |
   // | simd             | single          |                                    |
@@ -1840,6 +2062,13 @@ static bool CheckNestingOfRegions(Sema &SemaRef, DSAStackTy *Stack,
   // | simd             | ordered         | + (with simd clause)               |
   // | simd             | atomic          |                                    |
   // | simd             | target          |                                    |
+  // | simd             | target parallel |                                    |
+  // | simd             | target parallel |                                    |
+  // |                  | for             |                                    |
+  // | simd             | target enter    |                                    |
+  // |                  | data            |                                    |
+  // | simd             | target exit     |                                    |
+  // |                  | data            |                                    |
   // | simd             | teams           |                                    |
   // | simd             | cancellation    |                                    |
   // |                  | point           |                                    |
@@ -1847,13 +2076,20 @@ static bool CheckNestingOfRegions(Sema &SemaRef, DSAStackTy *Stack,
   // | simd             | taskloop        |                                    |
   // | simd             | taskloop simd   |                                    |
   // | simd             | distribute      |                                    |
+  // | simd             | distribute      |                                    |
+  // |                  | parallel for    |                                    |
+  // | simd             | distribute      |                                    |
+  // |                  |parallel for simd|                                    |
+  // | simd             | distribute simd |                                    |
+  // | simd             | target parallel |                                    |
+  // |                  | for simd        |                                    |
   // +------------------+-----------------+------------------------------------+
   // | for simd         | parallel        |                                    |
   // | for simd         | for             |                                    |
   // | for simd         | for simd        |                                    |
   // | for simd         | master          |                                    |
   // | for simd         | critical        |                                    |
-  // | for simd         | simd            |                                    |
+  // | for simd         | simd            | *                                  |
   // | for simd         | sections        |                                    |
   // | for simd         | section         |                                    |
   // | for simd         | single          |                                    |
@@ -1869,6 +2105,13 @@ static bool CheckNestingOfRegions(Sema &SemaRef, DSAStackTy *Stack,
   // | for simd         | ordered         | + (with simd clause)               |
   // | for simd         | atomic          |                                    |
   // | for simd         | target          |                                    |
+  // | for simd         | target parallel |                                    |
+  // | for simd         | target parallel |                                    |
+  // |                  | for             |                                    |
+  // | for simd         | target enter    |                                    |
+  // |                  | data            |                                    |
+  // | for simd         | target exit     |                                    |
+  // |                  | data            |                                    |
   // | for simd         | teams           |                                    |
   // | for simd         | cancellation    |                                    |
   // |                  | point           |                                    |
@@ -1876,13 +2119,20 @@ static bool CheckNestingOfRegions(Sema &SemaRef, DSAStackTy *Stack,
   // | for simd         | taskloop        |                                    |
   // | for simd         | taskloop simd   |                                    |
   // | for simd         | distribute      |                                    |
+  // | for simd         | distribute      |                                    |
+  // |                  | parallel for    |                                    |
+  // | for simd         | distribute      |                                    |
+  // |                  |parallel for simd|                                    |
+  // | for simd         | distribute simd |                                    |
+  // | for simd         | target parallel |                                    |
+  // |                  | for simd        |                                    |
   // +------------------+-----------------+------------------------------------+
   // | parallel for simd| parallel        |                                    |
   // | parallel for simd| for             |                                    |
   // | parallel for simd| for simd        |                                    |
   // | parallel for simd| master          |                                    |
   // | parallel for simd| critical        |                                    |
-  // | parallel for simd| simd            |                                    |
+  // | parallel for simd| simd            | *                                  |
   // | parallel for simd| sections        |                                    |
   // | parallel for simd| section         |                                    |
   // | parallel for simd| single          |                                    |
@@ -1898,6 +2148,13 @@ static bool CheckNestingOfRegions(Sema &SemaRef, DSAStackTy *Stack,
   // | parallel for simd| ordered         | + (with simd clause)               |
   // | parallel for simd| atomic          |                                    |
   // | parallel for simd| target          |                                    |
+  // | parallel for simd| target parallel |                                    |
+  // | parallel for simd| target parallel |                                    |
+  // |                  | for             |                                    |
+  // | parallel for simd| target enter    |                                    |
+  // |                  | data            |                                    |
+  // | parallel for simd| target exit     |                                    |
+  // |                  | data            |                                    |
   // | parallel for simd| teams           |                                    |
   // | parallel for simd| cancellation    |                                    |
   // |                  | point           |                                    |
@@ -1905,6 +2162,12 @@ static bool CheckNestingOfRegions(Sema &SemaRef, DSAStackTy *Stack,
   // | parallel for simd| taskloop        |                                    |
   // | parallel for simd| taskloop simd   |                                    |
   // | parallel for simd| distribute      |                                    |
+  // | parallel for simd| distribute      |                                    |
+  // |                  | parallel for    |                                    |
+  // | parallel for simd| distribute      |                                    |
+  // |                  |parallel for simd|                                    |
+  // | parallel for simd| distribute simd |                                    |
+  // |                  | for simd        |                                    |
   // +------------------+-----------------+------------------------------------+
   // | sections         | parallel        | *                                  |
   // | sections         | for             | +                                  |
@@ -1927,13 +2190,27 @@ static bool CheckNestingOfRegions(Sema &SemaRef, DSAStackTy *Stack,
   // | sections         | ordered         | +                                  |
   // | sections         | atomic          | *                                  |
   // | sections         | target          | *                                  |
+  // | sections         | target parallel | *                                  |
+  // | sections         | target parallel | *                                  |
+  // |                  | for             |                                    |
+  // | sections         | target enter    | *                                  |
+  // |                  | data            |                                    |
+  // | sections         | target exit     | *                                  |
+  // |                  | data            |                                    |
   // | sections         | teams           | +                                  |
   // | sections         | cancellation    |                                    |
   // |                  | point           | !                                  |
   // | sections         | cancel          | !                                  |
   // | sections         | taskloop        | *                                  |
   // | sections         | taskloop simd   | *                                  |
-  // | sections         | distribute      |                                    |
+  // | sections         | distribute      | +                                  |
+  // | sections         | distribute      | +                                  |
+  // |                  | parallel for    |                                    |
+  // | sections         | distribute      | +                                  |
+  // |                  |parallel for simd|                                    |
+  // | sections         | distribute simd | +                                  |
+  // | sections         | target parallel | +                                  |
+  // |                  | for simd        |                                    |
   // +------------------+-----------------+------------------------------------+
   // | section          | parallel        | *                                  |
   // | section          | for             | +                                  |
@@ -1956,13 +2233,27 @@ static bool CheckNestingOfRegions(Sema &SemaRef, DSAStackTy *Stack,
   // | section          | ordered         | +                                  |
   // | section          | atomic          | *                                  |
   // | section          | target          | *                                  |
+  // | section          | target parallel | *                                  |
+  // | section          | target parallel | *                                  |
+  // |                  | for             |                                    |
+  // | section          | target enter    | *                                  |
+  // |                  | data            |                                    |
+  // | section          | target exit     | *                                  |
+  // |                  | data            |                                    |
   // | section          | teams           | +                                  |
   // | section          | cancellation    |                                    |
   // |                  | point           | !                                  |
   // | section          | cancel          | !                                  |
   // | section          | taskloop        | *                                  |
   // | section          | taskloop simd   | *                                  |
-  // | section          | distribute      |                                    |
+  // | section          | distribute      | +                                  |
+  // | section          | distribute      | +                                  |
+  // |                  | parallel for    |                                    |
+  // | section          | distribute      | +                                  |
+  // |                  |parallel for simd|                                    |
+  // | section          | distribute simd | +                                  |
+  // | section          | target parallel | +                                  |
+  // |                  | for simd        |                                    |
   // +------------------+-----------------+------------------------------------+
   // | single           | parallel        | *                                  |
   // | single           | for             | +                                  |
@@ -1985,13 +2276,27 @@ static bool CheckNestingOfRegions(Sema &SemaRef, DSAStackTy *Stack,
   // | single           | ordered         | +                                  |
   // | single           | atomic          | *                                  |
   // | single           | target          | *                                  |
+  // | single           | target parallel | *                                  |
+  // | single           | target parallel | *                                  |
+  // |                  | for             |                                    |
+  // | single           | target enter    | *                                  |
+  // |                  | data            |                                    |
+  // | single           | target exit     | *                                  |
+  // |                  | data            |                                    |
   // | single           | teams           | +                                  |
   // | single           | cancellation    |                                    |
   // |                  | point           |                                    |
   // | single           | cancel          |                                    |
   // | single           | taskloop        | *                                  |
   // | single           | taskloop simd   | *                                  |
-  // | single           | distribute      |                                    |
+  // | single           | distribute      | +                                  |
+  // | single           | distribute      | +                                  |
+  // |                  | parallel for    |                                    |
+  // | single           | distribute      | +                                  |
+  // |                  |parallel for simd|                                    |
+  // | single           | distribute simd | +                                  |
+  // | single           | target parallel | +                                  |
+  // |                  | for simd        |                                    |
   // +------------------+-----------------+------------------------------------+
   // | parallel for     | parallel        | *                                  |
   // | parallel for     | for             | +                                  |
@@ -2014,13 +2319,27 @@ static bool CheckNestingOfRegions(Sema &SemaRef, DSAStackTy *Stack,
   // | parallel for     | ordered         | * (if construct is ordered)        |
   // | parallel for     | atomic          | *                                  |
   // | parallel for     | target          | *                                  |
+  // | parallel for     | target parallel | *                                  |
+  // | parallel for     | target parallel | *                                  |
+  // |                  | for             |                                    |
+  // | parallel for     | target enter    | *                                  |
+  // |                  | data            |                                    |
+  // | parallel for     | target exit     | *                                  |
+  // |                  | data            |                                    |
   // | parallel for     | teams           | +                                  |
   // | parallel for     | cancellation    |                                    |
   // |                  | point           | !                                  |
   // | parallel for     | cancel          | !                                  |
   // | parallel for     | taskloop        | *                                  |
   // | parallel for     | taskloop simd   | *                                  |
-  // | parallel for     | distribute      |                                    |
+  // | parallel for     | distribute      | +                                  |
+  // | parallel for     | distribute      | +                                  |
+  // |                  | parallel for    |                                    |
+  // | parallel for     | distribute      | +                                  |
+  // |                  |parallel for simd|                                    |
+  // | parallel for     | distribute simd | +                                  |
+  // | parallel for     | target parallel | +                                  |
+  // |                  | for simd        |                                    |
   // +------------------+-----------------+------------------------------------+
   // | parallel sections| parallel        | *                                  |
   // | parallel sections| for             | +                                  |
@@ -2043,13 +2362,27 @@ static bool CheckNestingOfRegions(Sema &SemaRef, DSAStackTy *Stack,
   // | parallel sections| ordered         | +                                  |
   // | parallel sections| atomic          | *                                  |
   // | parallel sections| target          | *                                  |
+  // | parallel sections| target parallel | *                                  |
+  // | parallel sections| target parallel | *                                  |
+  // |                  | for             |                                    |
+  // | parallel sections| target enter    | *                                  |
+  // |                  | data            |                                    |
+  // | parallel sections| target exit     | *                                  |
+  // |                  | data            |                                    |
   // | parallel sections| teams           | +                                  |
   // | parallel sections| cancellation    |                                    |
   // |                  | point           | !                                  |
   // | parallel sections| cancel          | !                                  |
   // | parallel sections| taskloop        | *                                  |
   // | parallel sections| taskloop simd   | *                                  |
-  // | parallel sections| distribute      |                                    | 
+  // | parallel sections| distribute      | +                                  |
+  // | parallel sections| distribute      | +                                  |
+  // |                  | parallel for    |                                    |
+  // | parallel sections| distribute      | +                                  |
+  // |                  |parallel for simd|                                    |
+  // | parallel sections| distribute simd | +                                  |
+  // | parallel sections| target parallel | +                                  |
+  // |                  | for simd        |                                    |
   // +------------------+-----------------+------------------------------------+
   // | task             | parallel        | *                                  |
   // | task             | for             | +                                  |
@@ -2072,13 +2405,27 @@ static bool CheckNestingOfRegions(Sema &SemaRef, DSAStackTy *Stack,
   // | task             | ordered         | +                                  |
   // | task             | atomic          | *                                  |
   // | task             | target          | *                                  |
+  // | task             | target parallel | *                                  |
+  // | task             | target parallel | *                                  |
+  // |                  | for             |                                    |
+  // | task             | target enter    | *                                  |
+  // |                  | data            |                                    |
+  // | task             | target exit     | *                                  |
+  // |                  | data            |                                    |
   // | task             | teams           | +                                  |
   // | task             | cancellation    |                                    |
   // |                  | point           | !                                  |
   // | task             | cancel          | !                                  |
   // | task             | taskloop        | *                                  |
   // | task             | taskloop simd   | *                                  |
-  // | task             | distribute      |                                    |
+  // | task             | distribute      | +                                  |
+  // | task             | distribute      | +                                  |
+  // |                  | parallel for    |                                    |
+  // | task             | distribute      | +                                  |
+  // |                  |parallel for simd|                                    |
+  // | task             | distribute simd | +                                  |
+  // | task             | target parallel | +                                  |
+  // |                  | for simd        |                                    |
   // +------------------+-----------------+------------------------------------+
   // | ordered          | parallel        | *                                  |
   // | ordered          | for             | +                                  |
@@ -2101,13 +2448,27 @@ static bool CheckNestingOfRegions(Sema &SemaRef, DSAStackTy *Stack,
   // | ordered          | ordered         | +                                  |
   // | ordered          | atomic          | *                                  |
   // | ordered          | target          | *                                  |
+  // | ordered          | target parallel | *                                  |
+  // | ordered          | target parallel | *                                  |
+  // |                  | for             |                                    |
+  // | ordered          | target enter    | *                                  |
+  // |                  | data            |                                    |
+  // | ordered          | target exit     | *                                  |
+  // |                  | data            |                                    |
   // | ordered          | teams           | +                                  |
   // | ordered          | cancellation    |                                    |
   // |                  | point           |                                    |
   // | ordered          | cancel          |                                    |
   // | ordered          | taskloop        | *                                  |
   // | ordered          | taskloop simd   | *                                  |
-  // | ordered          | distribute      |                                    |
+  // | ordered          | distribute      | +                                  |
+  // | ordered          | distribute      | +                                  |
+  // |                  | parallel for    |                                    |
+  // | ordered          | distribute      | +                                  |
+  // |                  |parallel for simd|                                    |
+  // | ordered          | distribute simd | +                                  |
+  // | ordered          | target parallel | +                                  |
+  // |                  | for simd        |                                    |
   // +------------------+-----------------+------------------------------------+
   // | atomic           | parallel        |                                    |
   // | atomic           | for             |                                    |
@@ -2130,13 +2491,27 @@ static bool CheckNestingOfRegions(Sema &SemaRef, DSAStackTy *Stack,
   // | atomic           | ordered         |                                    |
   // | atomic           | atomic          |                                    |
   // | atomic           | target          |                                    |
+  // | atomic           | target parallel |                                    |
+  // | atomic           | target parallel |                                    |
+  // |                  | for             |                                    |
+  // | atomic           | target enter    |                                    |
+  // |                  | data            |                                    |
+  // | atomic           | target exit     |                                    |
+  // |                  | data            |                                    |
   // | atomic           | teams           |                                    |
   // | atomic           | cancellation    |                                    |
   // |                  | point           |                                    |
   // | atomic           | cancel          |                                    |
   // | atomic           | taskloop        |                                    |
   // | atomic           | taskloop simd   |                                    |
-  // | atomic           | distribute      |                                    | 
+  // | atomic           | distribute      |                                    |
+  // | atomic           | distribute      |                                    |
+  // |                  | parallel for    |                                    |
+  // | atomic           | distribute      |                                    |
+  // |                  |parallel for simd|                                    |
+  // | atomic           | distribute simd |                                    |
+  // | atomic           | target parallel |                                    |
+  // |                  | for simd        |                                    |
   // +------------------+-----------------+------------------------------------+
   // | target           | parallel        | *                                  |
   // | target           | for             | *                                  |
@@ -2158,14 +2533,142 @@ static bool CheckNestingOfRegions(Sema &SemaRef, DSAStackTy *Stack,
   // | target           | flush           | *                                  |
   // | target           | ordered         | *                                  |
   // | target           | atomic          | *                                  |
-  // | target           | target          | *                                  |
+  // | target           | target          |                                    |
+  // | target           | target parallel |                                    |
+  // | target           | target parallel |                                    |
+  // |                  | for             |                                    |
+  // | target           | target enter    |                                    |
+  // |                  | data            |                                    |
+  // | target           | target exit     |                                    |
+  // |                  | data            |                                    |
   // | target           | teams           | *                                  |
   // | target           | cancellation    |                                    |
   // |                  | point           |                                    |
   // | target           | cancel          |                                    |
   // | target           | taskloop        | *                                  |
   // | target           | taskloop simd   | *                                  |
-  // | target           | distribute      |                                    |
+  // | target           | distribute      | +                                  |
+  // | target           | distribute      | +                                  |
+  // |                  | parallel for    |                                    |
+  // | target           | distribute      | +                                  |
+  // |                  |parallel for simd|                                    |
+  // | target           | distribute simd | +                                  |
+  // | target           | target parallel |                                    |
+  // |                  | for simd        |                                    |
+  // +------------------+-----------------+------------------------------------+
+  // | target parallel  | parallel        | *                                  |
+  // | target parallel  | for             | *                                  |
+  // | target parallel  | for simd        | *                                  |
+  // | target parallel  | master          | *                                  |
+  // | target parallel  | critical        | *                                  |
+  // | target parallel  | simd            | *                                  |
+  // | target parallel  | sections        | *                                  |
+  // | target parallel  | section         | *                                  |
+  // | target parallel  | single          | *                                  |
+  // | target parallel  | parallel for    | *                                  |
+  // | target parallel  |parallel for simd| *                                  |
+  // | target parallel  |parallel sections| *                                  |
+  // | target parallel  | task            | *                                  |
+  // | target parallel  | taskyield       | *                                  |
+  // | target parallel  | barrier         | *                                  |
+  // | target parallel  | taskwait        | *                                  |
+  // | target parallel  | taskgroup       | *                                  |
+  // | target parallel  | flush           | *                                  |
+  // | target parallel  | ordered         | *                                  |
+  // | target parallel  | atomic          | *                                  |
+  // | target parallel  | target          |                                    |
+  // | target parallel  | target parallel |                                    |
+  // | target parallel  | target parallel |                                    |
+  // |                  | for             |                                    |
+  // | target parallel  | target enter    |                                    |
+  // |                  | data            |                                    |
+  // | target parallel  | target exit     |                                    |
+  // |                  | data            |                                    |
+  // | target parallel  | teams           |                                    |
+  // | target parallel  | cancellation    |                                    |
+  // |                  | point           | !                                  |
+  // | target parallel  | cancel          | !                                  |
+  // | target parallel  | taskloop        | *                                  |
+  // | target parallel  | taskloop simd   | *                                  |
+  // | target parallel  | distribute      |                                    |
+  // | target parallel  | distribute      |                                    |
+  // |                  | parallel for    |                                    |
+  // | target parallel  | distribute      |                                    |
+  // |                  |parallel for simd|                                    |
+  // | target parallel  | distribute simd |                                    |
+  // | target parallel  | target parallel |                                    |
+  // |                  | for simd        |                                    |
+  // +------------------+-----------------+------------------------------------+
+  // | target parallel  | parallel        | *                                  |
+  // | for              |                 |                                    |
+  // | target parallel  | for             | *                                  |
+  // | for              |                 |                                    |
+  // | target parallel  | for simd        | *                                  |
+  // | for              |                 |                                    |
+  // | target parallel  | master          | *                                  |
+  // | for              |                 |                                    |
+  // | target parallel  | critical        | *                                  |
+  // | for              |                 |                                    |
+  // | target parallel  | simd            | *                                  |
+  // | for              |                 |                                    |
+  // | target parallel  | sections        | *                                  |
+  // | for              |                 |                                    |
+  // | target parallel  | section         | *                                  |
+  // | for              |                 |                                    |
+  // | target parallel  | single          | *                                  |
+  // | for              |                 |                                    |
+  // | target parallel  | parallel for    | *                                  |
+  // | for              |                 |                                    |
+  // | target parallel  |parallel for simd| *                                  |
+  // | for              |                 |                                    |
+  // | target parallel  |parallel sections| *                                  |
+  // | for              |                 |                                    |
+  // | target parallel  | task            | *                                  |
+  // | for              |                 |                                    |
+  // | target parallel  | taskyield       | *                                  |
+  // | for              |                 |                                    |
+  // | target parallel  | barrier         | *                                  |
+  // | for              |                 |                                    |
+  // | target parallel  | taskwait        | *                                  |
+  // | for              |                 |                                    |
+  // | target parallel  | taskgroup       | *                                  |
+  // | for              |                 |                                    |
+  // | target parallel  | flush           | *                                  |
+  // | for              |                 |                                    |
+  // | target parallel  | ordered         | *                                  |
+  // | for              |                 |                                    |
+  // | target parallel  | atomic          | *                                  |
+  // | for              |                 |                                    |
+  // | target parallel  | target          |                                    |
+  // | for              |                 |                                    |
+  // | target parallel  | target parallel |                                    |
+  // | for              |                 |                                    |
+  // | target parallel  | target parallel |                                    |
+  // | for              | for             |                                    |
+  // | target parallel  | target enter    |                                    |
+  // | for              | data            |                                    |
+  // | target parallel  | target exit     |                                    |
+  // | for              | data            |                                    |
+  // | target parallel  | teams           |                                    |
+  // | for              |                 |                                    |
+  // | target parallel  | cancellation    |                                    |
+  // | for              | point           | !                                  |
+  // | target parallel  | cancel          | !                                  |
+  // | for              |                 |                                    |
+  // | target parallel  | taskloop        | *                                  |
+  // | for              |                 |                                    |
+  // | target parallel  | taskloop simd   | *                                  |
+  // | for              |                 |                                    |
+  // | target parallel  | distribute      |                                    |
+  // | for              |                 |                                    |
+  // | target parallel  | distribute      |                                    |
+  // | for              | parallel for    |                                    |
+  // | target parallel  | distribute      |                                    |
+  // | for              |parallel for simd|                                    |
+  // | target parallel  | distribute simd |                                    |
+  // | for              |                 |                                    |
+  // | target parallel  | target parallel |                                    |
+  // | for              | for simd        |                                    |
   // +------------------+-----------------+------------------------------------+
   // | teams            | parallel        | *                                  |
   // | teams            | for             | +                                  |
@@ -2188,6 +2691,13 @@ static bool CheckNestingOfRegions(Sema &SemaRef, DSAStackTy *Stack,
   // | teams            | ordered         | +                                  |
   // | teams            | atomic          | +                                  |
   // | teams            | target          | +                                  |
+  // | teams            | target parallel | +                                  |
+  // | teams            | target parallel | +                                  |
+  // |                  | for             |                                    |
+  // | teams            | target enter    | +                                  |
+  // |                  | data            |                                    |
+  // | teams            | target exit     | +                                  |
+  // |                  | data            |                                    |
   // | teams            | teams           | +                                  |
   // | teams            | cancellation    |                                    |
   // |                  | point           |                                    |
@@ -2195,6 +2705,13 @@ static bool CheckNestingOfRegions(Sema &SemaRef, DSAStackTy *Stack,
   // | teams            | taskloop        | +                                  |
   // | teams            | taskloop simd   | +                                  |
   // | teams            | distribute      | !                                  |
+  // | teams            | distribute      | !                                  |
+  // |                  | parallel for    |                                    |
+  // | teams            | distribute      | !                                  |
+  // |                  |parallel for simd|                                    |
+  // | teams            | distribute simd | !                                  |
+  // | teams            | target parallel | +                                  |
+  // |                  | for simd        |                                    |
   // +------------------+-----------------+------------------------------------+
   // | taskloop         | parallel        | *                                  |
   // | taskloop         | for             | +                                  |
@@ -2217,19 +2734,33 @@ static bool CheckNestingOfRegions(Sema &SemaRef, DSAStackTy *Stack,
   // | taskloop         | ordered         | +                                  |
   // | taskloop         | atomic          | *                                  |
   // | taskloop         | target          | *                                  |
+  // | taskloop         | target parallel | *                                  |
+  // | taskloop         | target parallel | *                                  |
+  // |                  | for             |                                    |
+  // | taskloop         | target enter    | *                                  |
+  // |                  | data            |                                    |
+  // | taskloop         | target exit     | *                                  |
+  // |                  | data            |                                    |
   // | taskloop         | teams           | +                                  |
   // | taskloop         | cancellation    |                                    |
   // |                  | point           |                                    |
   // | taskloop         | cancel          |                                    |
   // | taskloop         | taskloop        | *                                  |
-  // | taskloop         | distribute      |                                    |
+  // | taskloop         | distribute      | +                                  |
+  // | taskloop         | distribute      | +                                  |
+  // |                  | parallel for    |                                    |
+  // | taskloop         | distribute      | +                                  |
+  // |                  |parallel for simd|                                    |
+  // | taskloop         | distribute simd | +                                  |
+  // | taskloop         | target parallel | *                                  |
+  // |                  | for simd        |                                    |
   // +------------------+-----------------+------------------------------------+
   // | taskloop simd    | parallel        |                                    |
   // | taskloop simd    | for             |                                    |
   // | taskloop simd    | for simd        |                                    |
   // | taskloop simd    | master          |                                    |
   // | taskloop simd    | critical        |                                    |
-  // | taskloop simd    | simd            |                                    |
+  // | taskloop simd    | simd            | *                                  |
   // | taskloop simd    | sections        |                                    |
   // | taskloop simd    | section         |                                    |
   // | taskloop simd    | single          |                                    |
@@ -2245,6 +2776,13 @@ static bool CheckNestingOfRegions(Sema &SemaRef, DSAStackTy *Stack,
   // | taskloop simd    | ordered         | + (with simd clause)               |
   // | taskloop simd    | atomic          |                                    |
   // | taskloop simd    | target          |                                    |
+  // | taskloop simd    | target parallel |                                    |
+  // | taskloop simd    | target parallel |                                    |
+  // |                  | for             |                                    |
+  // | taskloop simd    | target enter    |                                    |
+  // |                  | data            |                                    |
+  // | taskloop simd    | target exit     |                                    |
+  // |                  | data            |                                    |
   // | taskloop simd    | teams           |                                    |
   // | taskloop simd    | cancellation    |                                    |
   // |                  | point           |                                    |
@@ -2252,6 +2790,13 @@ static bool CheckNestingOfRegions(Sema &SemaRef, DSAStackTy *Stack,
   // | taskloop simd    | taskloop        |                                    |
   // | taskloop simd    | taskloop simd   |                                    |
   // | taskloop simd    | distribute      |                                    |
+  // | taskloop simd    | distribute      |                                    |
+  // |                  | parallel for    |                                    |
+  // | taskloop simd    | distribute      |                                    |
+  // |                  |parallel for simd|                                    |
+  // | taskloop simd    | distribute simd |                                    |
+  // | taskloop simd    | target parallel |                                    |
+  // |                  | for simd        |                                    |
   // +------------------+-----------------+------------------------------------+
   // | distribute       | parallel        | *                                  |
   // | distribute       | for             | *                                  |
@@ -2274,6 +2819,13 @@ static bool CheckNestingOfRegions(Sema &SemaRef, DSAStackTy *Stack,
   // | distribute       | ordered         | +                                  |
   // | distribute       | atomic          | *                                  |
   // | distribute       | target          |                                    |
+  // | distribute       | target parallel |                                    |
+  // | distribute       | target parallel |                                    |
+  // |                  | for             |                                    |
+  // | distribute       | target enter    |                                    |
+  // |                  | data            |                                    |
+  // | distribute       | target exit     |                                    |
+  // |                  | data            |                                    |
   // | distribute       | teams           |                                    |
   // | distribute       | cancellation    | +                                  |
   // |                  | point           |                                    |
@@ -2281,9 +2833,274 @@ static bool CheckNestingOfRegions(Sema &SemaRef, DSAStackTy *Stack,
   // | distribute       | taskloop        | *                                  |
   // | distribute       | taskloop simd   | *                                  |
   // | distribute       | distribute      |                                    |
+  // | distribute       | distribute      |                                    |
+  // |                  | parallel for    |                                    |
+  // | distribute       | distribute      |                                    |
+  // |                  |parallel for simd|                                    |
+  // | distribute       | distribute simd |                                    |
+  // | distribute       | target parallel |                                    |
+  // |                  | for simd        |                                    |
+  // +------------------+-----------------+------------------------------------+
+  // | distribute       | parallel        | *                                  |
+  // | parallel for     |                 |                                    |
+  // | distribute       | for             | *                                  |
+  // | parallel for     |                 |                                    |
+  // | distribute       | for simd        | *                                  |
+  // | parallel for     |                 |                                    |
+  // | distribute       | master          | *                                  |
+  // | parallel for     |                 |                                    |
+  // | distribute       | critical        | *                                  |
+  // | parallel for     |                 |                                    |
+  // | distribute       | simd            | *                                  |
+  // | parallel for     |                 |                                    |
+  // | distribute       | sections        | *                                  |
+  // | parallel for     |                 |                                    |
+  // | distribute       | section         | *                                  |
+  // | parallel for     |                 |                                    |
+  // | distribute       | single          | *                                  |
+  // | parallel for     |                 |                                    |
+  // | distribute       | parallel for    | *                                  |
+  // | parallel for     |                 |                                    |
+  // | distribute       |parallel for simd| *                                  |
+  // | parallel for     |                 |                                    |
+  // | distribute       |parallel sections| *                                  |
+  // | parallel for     |                 |                                    |
+  // | distribute       | task            | *                                  |
+  // | parallel for     |                 |                                    |
+  // | parallel for     |                 |                                    |
+  // | distribute       | taskyield       | *                                  |
+  // | parallel for     |                 |                                    |
+  // | distribute       | barrier         | *                                  |
+  // | parallel for     |                 |                                    |
+  // | distribute       | taskwait        | *                                  |
+  // | parallel for     |                 |                                    |
+  // | distribute       | taskgroup       | *                                  |
+  // | parallel for     |                 |                                    |
+  // | distribute       | flush           | *                                  |
+  // | parallel for     |                 |                                    |
+  // | distribute       | ordered         | +                                  |
+  // | parallel for     |                 |                                    |
+  // | distribute       | atomic          | *                                  |
+  // | parallel for     |                 |                                    |
+  // | distribute       | target          |                                    |
+  // | parallel for     |                 |                                    |
+  // | distribute       | target parallel |                                    |
+  // | parallel for     |                 |                                    |
+  // | distribute       | target parallel |                                    |
+  // | parallel for     | for             |                                    |
+  // | distribute       | target enter    |                                    |
+  // | parallel for     | data            |                                    |
+  // | distribute       | target exit     |                                    |
+  // | parallel for     | data            |                                    |
+  // | distribute       | teams           |                                    |
+  // | parallel for     |                 |                                    |
+  // | distribute       | cancellation    | +                                  |
+  // | parallel for     | point           |                                    |
+  // | distribute       | cancel          | +                                  |
+  // | parallel for     |                 |                                    |
+  // | distribute       | taskloop        | *                                  |
+  // | parallel for     |                 |                                    |
+  // | distribute       | taskloop simd   | *                                  |
+  // | parallel for     |                 |                                    |
+  // | distribute       | distribute      |                                    |
+  // | parallel for     |                 |                                    |
+  // | distribute       | distribute      |                                    |
+  // | parallel for     | parallel for    |                                    |
+  // | distribute       | distribute      |                                    |
+  // | parallel for     |parallel for simd|                                    |
+  // | distribute       | distribute simd |                                    |
+  // | parallel for     |                 |                                    |
+  // | distribute       | target parallel |                                    |
+  // | parallel for     | for simd        |                                    |
+  // +------------------+-----------------+------------------------------------+
+  // | distribute       | parallel        | *                                  |
+  // | parallel for simd|                 |                                    |
+  // | distribute       | for             | *                                  |
+  // | parallel for simd|                 |                                    |
+  // | distribute       | for simd        | *                                  |
+  // | parallel for simd|                 |                                    |
+  // | distribute       | master          | *                                  |
+  // | parallel for simd|                 |                                    |
+  // | distribute       | critical        | *                                  |
+  // | parallel for simd|                 |                                    |
+  // | distribute       | simd            | *                                  |
+  // | parallel for simd|                 |                                    |
+  // | distribute       | sections        | *                                  |
+  // | parallel for simd|                 |                                    |
+  // | distribute       | section         | *                                  |
+  // | parallel for simd|                 |                                    |
+  // | distribute       | single          | *                                  |
+  // | parallel for simd|                 |                                    |
+  // | distribute       | parallel for    | *                                  |
+  // | parallel for simd|                 |                                    |
+  // | distribute       |parallel for simd| *                                  |
+  // | parallel for simd|                 |                                    |
+  // | distribute       |parallel sections| *                                  |
+  // | parallel for simd|                 |                                    |
+  // | distribute       | task            | *                                  |
+  // | parallel for simd|                 |                                    |
+  // | distribute       | taskyield       | *                                  |
+  // | parallel for simd|                 |                                    |
+  // | distribute       | barrier         | *                                  |
+  // | parallel for simd|                 |                                    |
+  // | distribute       | taskwait        | *                                  |
+  // | parallel for simd|                 |                                    |
+  // | distribute       | taskgroup       | *                                  |
+  // | parallel for simd|                 |                                    |
+  // | distribute       | flush           | *                                  |
+  // | parallel for simd|                 |                                    |
+  // | distribute       | ordered         | +                                  |
+  // | parallel for simd|                 |                                    |
+  // | distribute       | atomic          | *                                  |
+  // | parallel for simd|                 |                                    |
+  // | distribute       | target          |                                    |
+  // | parallel for simd|                 |                                    |
+  // | distribute       | target parallel |                                    |
+  // | parallel for simd|                 |                                    |
+  // | distribute       | target parallel |                                    |
+  // | parallel for simd| for             |                                    |
+  // | distribute       | target enter    |                                    |
+  // | parallel for simd| data            |                                    |
+  // | distribute       | target exit     |                                    |
+  // | parallel for simd| data            |                                    |
+  // | distribute       | teams           |                                    |
+  // | parallel for simd|                 |                                    |
+  // | distribute       | cancellation    | +                                  |
+  // | parallel for simd| point           |                                    |
+  // | distribute       | cancel          | +                                  |
+  // | parallel for simd|                 |                                    |
+  // | distribute       | taskloop        | *                                  |
+  // | parallel for simd|                 |                                    |
+  // | distribute       | taskloop simd   | *                                  |
+  // | parallel for simd|                 |                                    |
+  // | distribute       | distribute      |                                    |
+  // | parallel for simd|                 |                                    |
+  // | distribute       | distribute      | *                                  |
+  // | parallel for simd| parallel for    |                                    |
+  // | distribute       | distribute      | *                                  |
+  // | parallel for simd|parallel for simd|                                    |
+  // | distribute       | distribute simd | *                                  |
+  // | parallel for simd|                 |                                    |
+  // | distribute       | target parallel |                                    |
+  // | parallel for simd| for simd        |                                    |
+  // +------------------+-----------------+------------------------------------+
+  // | distribute simd  | parallel        | *                                  |
+  // | distribute simd  | for             | *                                  |
+  // | distribute simd  | for simd        | *                                  |
+  // | distribute simd  | master          | *                                  |
+  // | distribute simd  | critical        | *                                  |
+  // | distribute simd  | simd            | *                                  |
+  // | distribute simd  | sections        | *                                  |
+  // | distribute simd  | section         | *                                  |
+  // | distribute simd  | single          | *                                  |
+  // | distribute simd  | parallel for    | *                                  |
+  // | distribute simd  |parallel for simd| *                                  |
+  // | distribute simd  |parallel sections| *                                  |
+  // | distribute simd  | task            | *                                  |
+  // | distribute simd  | taskyield       | *                                  |
+  // | distribute simd  | barrier         | *                                  |
+  // | distribute simd  | taskwait        | *                                  |
+  // | distribute simd  | taskgroup       | *                                  |
+  // | distribute simd  | flush           | *                                  |
+  // | distribute simd  | ordered         | +                                  |
+  // | distribute simd  | atomic          | *                                  |
+  // | distribute simd  | target          | *                                  |
+  // | distribute simd  | target parallel | *                                  |
+  // | distribute simd  | target parallel | *                                  |
+  // |                  | for             |                                    |
+  // | distribute simd  | target enter    | *                                  |
+  // |                  | data            |                                    |
+  // | distribute simd  | target exit     | *                                  |
+  // |                  | data            |                                    |
+  // | distribute simd  | teams           | *                                  |
+  // | distribute simd  | cancellation    | +                                  |
+  // |                  | point           |                                    |
+  // | distribute simd  | cancel          | +                                  |
+  // | distribute simd  | taskloop        | *                                  |
+  // | distribute simd  | taskloop simd   | *                                  |
+  // | distribute simd  | distribute      |                                    |
+  // | distribute simd  | distribute      | *                                  |
+  // |                  | parallel for    |                                    |
+  // | distribute simd  | distribute      | *                                  |
+  // |                  |parallel for simd|                                    |
+  // | distribute simd  | distribute simd | *                                  |
+  // | distribute simd  | target parallel | *                                  |
+  // |                  | for simd        |                                    |
+  // +------------------+-----------------+------------------------------------+
+  // | target parallel  | parallel        | *                                  |
+  // | for simd         |                 |                                    |
+  // | target parallel  | for             | *                                  |
+  // | for simd         |                 |                                    |
+  // | target parallel  | for simd        | *                                  |
+  // | for simd         |                 |                                    |
+  // | target parallel  | master          | *                                  |
+  // | for simd         |                 |                                    |
+  // | target parallel  | critical        | *                                  |
+  // | for simd         |                 |                                    |
+  // | target parallel  | simd            | !                                  |
+  // | for simd         |                 |                                    |
+  // | target parallel  | sections        | *                                  |
+  // | for simd         |                 |                                    |
+  // | target parallel  | section         | *                                  |
+  // | for simd         |                 |                                    |
+  // | target parallel  | single          | *                                  |
+  // | for simd         |                 |                                    |
+  // | target parallel  | parallel for    | *                                  |
+  // | for simd         |                 |                                    |
+  // | target parallel  |parallel for simd| *                                  |
+  // | for simd         |                 |                                    |
+  // | target parallel  |parallel sections| *                                  |
+  // | for simd         |                 |                                    |
+  // | target parallel  | task            | *                                  |
+  // | for simd         |                 |                                    |
+  // | target parallel  | taskyield       | *                                  |
+  // | for simd         |                 |                                    |
+  // | target parallel  | barrier         | *                                  |
+  // | for simd         |                 |                                    |
+  // | target parallel  | taskwait        | *                                  |
+  // | for simd         |                 |                                    |
+  // | target parallel  | taskgroup       | *                                  |
+  // | for simd         |                 |                                    |
+  // | target parallel  | flush           | *                                  |
+  // | for simd         |                 |                                    |
+  // | target parallel  | ordered         | + (with simd clause)               |
+  // | for simd         |                 |                                    |
+  // | target parallel  | atomic          | *                                  |
+  // | for simd         |                 |                                    |
+  // | target parallel  | target          | *                                  |
+  // | for simd         |                 |                                    |
+  // | target parallel  | target parallel | *                                  |
+  // | for simd         |                 |                                    |
+  // | target parallel  | target parallel | *                                  |
+  // | for simd         | for             |                                    |
+  // | target parallel  | target enter    | *                                  |
+  // | for simd         | data            |                                    |
+  // | target parallel  | target exit     | *                                  |
+  // | for simd         | data            |                                    |
+  // | target parallel  | teams           | *                                  |
+  // | for simd         |                 |                                    |
+  // | target parallel  | cancellation    | *                                  |
+  // | for simd         | point           |                                    |
+  // | target parallel  | cancel          | *                                  |
+  // | for simd         |                 |                                    |
+  // | target parallel  | taskloop        | *                                  |
+  // | for simd         |                 |                                    |
+  // | target parallel  | taskloop simd   | *                                  |
+  // | for simd         |                 |                                    |
+  // | target parallel  | distribute      | *                                  |
+  // | for simd         |                 |                                    |
+  // | target parallel  | distribute      | *                                  |
+  // | for simd         | parallel for    |                                    |
+  // | target parallel  | distribute      | *                                  |
+  // | for simd         |parallel for simd|                                    |
+  // | target parallel  | distribute simd | *                                  |
+  // | for simd         |                 |                                    |
+  // | target parallel  | target parallel | *                                  |
+  // | for simd         | for simd        |                                    |
   // +------------------+-----------------+------------------------------------+
   if (Stack->getCurScope()) {
     auto ParentRegion = Stack->getParentDirective();
+    auto OffendingRegion = ParentRegion;
     bool NestingProhibited = false;
     bool CloseNesting = true;
     enum {
@@ -2297,10 +3114,15 @@ static bool CheckNestingOfRegions(Sema &SemaRef, DSAStackTy *Stack,
       // OpenMP [2.16, Nesting of Regions]
       // OpenMP constructs may not be nested inside a simd region.
       // OpenMP [2.8.1,simd Construct, Restrictions]
-      // An ordered construct with the simd clause is the only OpenMP construct
-      // that can appear in the simd region.
-      SemaRef.Diag(StartLoc, diag::err_omp_prohibited_region_simd);
-      return true;
+      // An ordered construct with the simd clause is the only OpenMP
+      // construct that can appear in the simd region.
+      // Allowing a SIMD consruct nested in another SIMD construct is an
+      // extension. The OpenMP 4.5 spec does not allow it. Issue a warning
+      // message.
+      SemaRef.Diag(StartLoc, (CurrentRegion != OMPD_simd)
+                                 ? diag::err_omp_prohibited_region_simd
+                                 : diag::warn_omp_nesting_simd);
+      return CurrentRegion != OMPD_simd;
     }
     if (ParentRegion == OMPD_atomic) {
       // OpenMP [2.16, Nesting of Regions]
@@ -2340,9 +3162,12 @@ static bool CheckNestingOfRegions(Sema &SemaRef, DSAStackTy *Stack,
       // OpenMP construct that matches the type specified in
       // construct-type-clause.
       NestingProhibited =
-          !((CancelRegion == OMPD_parallel && ParentRegion == OMPD_parallel) ||
+          !((CancelRegion == OMPD_parallel &&
+             (ParentRegion == OMPD_parallel ||
+              ParentRegion == OMPD_target_parallel)) ||
             (CancelRegion == OMPD_for &&
-             (ParentRegion == OMPD_for || ParentRegion == OMPD_parallel_for)) ||
+             (ParentRegion == OMPD_for || ParentRegion == OMPD_parallel_for ||
+              ParentRegion == OMPD_target_parallel_for)) ||
             (CancelRegion == OMPD_taskgroup && ParentRegion == OMPD_task) ||
             (CancelRegion == OMPD_sections &&
              (ParentRegion == OMPD_section || ParentRegion == OMPD_sections ||
@@ -2352,8 +3177,7 @@ static bool CheckNestingOfRegions(Sema &SemaRef, DSAStackTy *Stack,
       // A master region may not be closely nested inside a worksharing,
       // atomic, or explicit task region.
       NestingProhibited = isOpenMPWorksharingDirective(ParentRegion) ||
-                          ParentRegion == OMPD_task ||
-                          isOpenMPTaskLoopDirective(ParentRegion);
+                          isOpenMPTaskingDirective(ParentRegion);
     } else if (CurrentRegion == OMPD_critical && CurrentName.getName()) {
       // OpenMP [2.16, Nesting of Regions]
       // A critical region may not be nested (closely or otherwise) inside a
@@ -2387,21 +3211,21 @@ static bool CheckNestingOfRegions(Sema &SemaRef, DSAStackTy *Stack,
       // OpenMP [2.16, Nesting of Regions]
       // A barrier region may not be closely nested inside a worksharing,
       // explicit task, critical, ordered, atomic, or master region.
-      NestingProhibited =
-          isOpenMPWorksharingDirective(ParentRegion) ||
-          ParentRegion == OMPD_task || ParentRegion == OMPD_master ||
-          ParentRegion == OMPD_critical || ParentRegion == OMPD_ordered ||
-          isOpenMPTaskLoopDirective(ParentRegion);
+      NestingProhibited = isOpenMPWorksharingDirective(ParentRegion) ||
+                          isOpenMPTaskingDirective(ParentRegion) ||
+                          ParentRegion == OMPD_master ||
+                          ParentRegion == OMPD_critical ||
+                          ParentRegion == OMPD_ordered;
     } else if (isOpenMPWorksharingDirective(CurrentRegion) &&
                !isOpenMPParallelDirective(CurrentRegion)) {
       // OpenMP [2.16, Nesting of Regions]
       // A worksharing region may not be closely nested inside a worksharing,
       // explicit task, critical, ordered, atomic, or master region.
-      NestingProhibited =
-          isOpenMPWorksharingDirective(ParentRegion) ||
-          ParentRegion == OMPD_task || ParentRegion == OMPD_master ||
-          ParentRegion == OMPD_critical || ParentRegion == OMPD_ordered ||
-          isOpenMPTaskLoopDirective(ParentRegion);
+      NestingProhibited = isOpenMPWorksharingDirective(ParentRegion) ||
+                          isOpenMPTaskingDirective(ParentRegion) ||
+                          ParentRegion == OMPD_master ||
+                          ParentRegion == OMPD_critical ||
+                          ParentRegion == OMPD_ordered;
       Recommend = ShouldBeInParallelRegion;
     } else if (CurrentRegion == OMPD_ordered) {
       // OpenMP [2.16, Nesting of Regions]
@@ -2413,8 +3237,7 @@ static bool CheckNestingOfRegions(Sema &SemaRef, DSAStackTy *Stack,
       // An ordered construct with the simd clause is the only OpenMP construct
       // that can appear in the simd region.
       NestingProhibited = ParentRegion == OMPD_critical ||
-                          ParentRegion == OMPD_task ||
-                          isOpenMPTaskLoopDirective(ParentRegion) ||
+                          isOpenMPTaskingDirective(ParentRegion) ||
                           !(isOpenMPSimdDirective(ParentRegion) ||
                             Stack->isParentOrderedRegion());
       Recommend = ShouldBeInOrderedRegion;
@@ -2442,10 +3265,29 @@ static bool CheckNestingOfRegions(Sema &SemaRef, DSAStackTy *Stack,
       NestingProhibited = !isOpenMPTeamsDirective(ParentRegion);
       Recommend = ShouldBeInTeamsRegion;
     }
+    if (!NestingProhibited &&
+        (isOpenMPTargetExecutionDirective(CurrentRegion) ||
+         isOpenMPTargetDataManagementDirective(CurrentRegion))) {
+      // OpenMP 4.5 [2.17 Nesting of Regions]
+      // If a target, target update, target data, target enter data, or
+      // target exit data construct is encountered during execution of a
+      // target region, the behavior is unspecified.
+      NestingProhibited = Stack->hasDirective(
+          [&OffendingRegion](OpenMPDirectiveKind K, const DeclarationNameInfo &,
+                             SourceLocation) -> bool {
+            if (isOpenMPTargetExecutionDirective(K)) {
+              OffendingRegion = K;
+              return true;
+            } else
+              return false;
+          },
+          false /* don't skip top directive */);
+      CloseNesting = false;
+    }
     if (NestingProhibited) {
       SemaRef.Diag(StartLoc, diag::err_omp_prohibited_region)
-          << CloseNesting << getOpenMPDirectiveName(ParentRegion) << Recommend
-          << getOpenMPDirectiveName(CurrentRegion);
+          << CloseNesting << getOpenMPDirectiveName(OffendingRegion)
+          << Recommend << getOpenMPDirectiveName(CurrentRegion);
       return true;
     }
   }
@@ -2544,7 +3386,7 @@ StmtResult Sema::ActOnOpenMPExecutableDirective(
     return StmtError();
 
   llvm::SmallVector<OMPClause *, 8> ClausesWithImplicit;
-  llvm::DenseMap<VarDecl *, Expr *> VarsWithInheritedDSA;
+  llvm::DenseMap<ValueDecl *, Expr *> VarsWithInheritedDSA;
   bool ErrorFound = false;
   ClausesWithImplicit.append(Clauses.begin(), Clauses.end());
   if (AStmt) {
@@ -2679,6 +3521,18 @@ StmtResult Sema::ActOnOpenMPExecutableDirective(
                                      EndLoc);
     AllowedNameModifiers.push_back(OMPD_target);
     break;
+  case OMPD_target_parallel:
+    Res = ActOnOpenMPTargetParallelDirective(ClausesWithImplicit, AStmt,
+                                             StartLoc, EndLoc);
+    AllowedNameModifiers.push_back(OMPD_target);
+    AllowedNameModifiers.push_back(OMPD_parallel);
+    break;
+  case OMPD_target_parallel_for:
+    Res = ActOnOpenMPTargetParallelForDirective(
+        ClausesWithImplicit, AStmt, StartLoc, EndLoc, VarsWithInheritedDSA);
+    AllowedNameModifiers.push_back(OMPD_target);
+    AllowedNameModifiers.push_back(OMPD_parallel);
+    break;
   case OMPD_cancellation_point:
     assert(ClausesWithImplicit.empty() &&
            "No clauses are allowed for 'omp cancellation point' directive");
@@ -2698,6 +3552,16 @@ StmtResult Sema::ActOnOpenMPExecutableDirective(
                                          EndLoc);
     AllowedNameModifiers.push_back(OMPD_target_data);
     break;
+  case OMPD_target_enter_data:
+    Res = ActOnOpenMPTargetEnterDataDirective(ClausesWithImplicit, StartLoc,
+                                              EndLoc);
+    AllowedNameModifiers.push_back(OMPD_target_enter_data);
+    break;
+  case OMPD_target_exit_data:
+    Res = ActOnOpenMPTargetExitDataDirective(ClausesWithImplicit, StartLoc,
+                                             EndLoc);
+    AllowedNameModifiers.push_back(OMPD_target_exit_data);
+    break;
   case OMPD_taskloop:
     Res = ActOnOpenMPTaskLoopDirective(ClausesWithImplicit, AStmt, StartLoc,
                                        EndLoc, VarsWithInheritedDSA);
@@ -2712,7 +3576,37 @@ StmtResult Sema::ActOnOpenMPExecutableDirective(
     Res = ActOnOpenMPDistributeDirective(ClausesWithImplicit, AStmt, StartLoc,
                                          EndLoc, VarsWithInheritedDSA);
     break;
+  case OMPD_target_update:
+    assert(!AStmt && "Statement is not allowed for target update");
+    Res =
+        ActOnOpenMPTargetUpdateDirective(ClausesWithImplicit, StartLoc, EndLoc);
+    AllowedNameModifiers.push_back(OMPD_target_update);
+    break;
+  case OMPD_distribute_parallel_for:
+    Res = ActOnOpenMPDistributeParallelForDirective(
+        ClausesWithImplicit, AStmt, StartLoc, EndLoc, VarsWithInheritedDSA);
+    AllowedNameModifiers.push_back(OMPD_parallel);
+    break;
+  case OMPD_distribute_parallel_for_simd:
+    Res = ActOnOpenMPDistributeParallelForSimdDirective(
+        ClausesWithImplicit, AStmt, StartLoc, EndLoc, VarsWithInheritedDSA);
+    AllowedNameModifiers.push_back(OMPD_parallel);
+    break;
+  case OMPD_distribute_simd:
+    Res = ActOnOpenMPDistributeSimdDirective(
+        ClausesWithImplicit, AStmt, StartLoc, EndLoc, VarsWithInheritedDSA);
+    break;
+  case OMPD_target_parallel_for_simd:
+    Res = ActOnOpenMPTargetParallelForSimdDirective(
+        ClausesWithImplicit, AStmt, StartLoc, EndLoc, VarsWithInheritedDSA);
+    AllowedNameModifiers.push_back(OMPD_target);
+    AllowedNameModifiers.push_back(OMPD_parallel);
+    break;
+  case OMPD_declare_target:
+  case OMPD_end_declare_target:
   case OMPD_threadprivate:
+  case OMPD_declare_reduction:
+  case OMPD_declare_simd:
     llvm_unreachable("OpenMP Directive is not allowed");
   case OMPD_unknown:
     llvm_unreachable("Unknown OpenMP directive");
@@ -2733,6 +3627,252 @@ StmtResult Sema::ActOnOpenMPExecutableDirective(
   return Res;
 }
 
+Sema::DeclGroupPtrTy Sema::ActOnOpenMPDeclareSimdDirective(
+    DeclGroupPtrTy DG, OMPDeclareSimdDeclAttr::BranchStateTy BS, Expr *Simdlen,
+    ArrayRef<Expr *> Uniforms, ArrayRef<Expr *> Aligneds,
+    ArrayRef<Expr *> Alignments, ArrayRef<Expr *> Linears,
+    ArrayRef<unsigned> LinModifiers, ArrayRef<Expr *> Steps, SourceRange SR) {
+  assert(Aligneds.size() == Alignments.size());
+  assert(Linears.size() == LinModifiers.size());
+  assert(Linears.size() == Steps.size());
+  if (!DG || DG.get().isNull())
+    return DeclGroupPtrTy();
+
+  if (!DG.get().isSingleDecl()) {
+    Diag(SR.getBegin(), diag::err_omp_single_decl_in_declare_simd);
+    return DG;
+  }
+  auto *ADecl = DG.get().getSingleDecl();
+  if (auto *FTD = dyn_cast<FunctionTemplateDecl>(ADecl))
+    ADecl = FTD->getTemplatedDecl();
+
+  auto *FD = dyn_cast<FunctionDecl>(ADecl);
+  if (!FD) {
+    Diag(ADecl->getLocation(), diag::err_omp_function_expected);
+    return DeclGroupPtrTy();
+  }
+
+  // OpenMP [2.8.2, declare simd construct, Description]
+  // The parameter of the simdlen clause must be a constant positive integer
+  // expression.
+  ExprResult SL;
+  if (Simdlen)
+    SL = VerifyPositiveIntegerConstantInClause(Simdlen, OMPC_simdlen);
+  // OpenMP [2.8.2, declare simd construct, Description]
+  // The special this pointer can be used as if was one of the arguments to the
+  // function in any of the linear, aligned, or uniform clauses.
+  // The uniform clause declares one or more arguments to have an invariant
+  // value for all concurrent invocations of the function in the execution of a
+  // single SIMD loop.
+  llvm::DenseMap<Decl *, Expr *> UniformedArgs;
+  Expr *UniformedLinearThis = nullptr;
+  for (auto *E : Uniforms) {
+    E = E->IgnoreParenImpCasts();
+    if (auto *DRE = dyn_cast<DeclRefExpr>(E))
+      if (auto *PVD = dyn_cast<ParmVarDecl>(DRE->getDecl()))
+        if (FD->getNumParams() > PVD->getFunctionScopeIndex() &&
+            FD->getParamDecl(PVD->getFunctionScopeIndex())
+                    ->getCanonicalDecl() == PVD->getCanonicalDecl()) {
+          UniformedArgs.insert(std::make_pair(PVD->getCanonicalDecl(), E));
+          continue;
+        }
+    if (isa<CXXThisExpr>(E)) {
+      UniformedLinearThis = E;
+      continue;
+    }
+    Diag(E->getExprLoc(), diag::err_omp_param_or_this_in_clause)
+        << FD->getDeclName() << (isa<CXXMethodDecl>(ADecl) ? 1 : 0);
+  }
+  // OpenMP [2.8.2, declare simd construct, Description]
+  // The aligned clause declares that the object to which each list item points
+  // is aligned to the number of bytes expressed in the optional parameter of
+  // the aligned clause.
+  // The special this pointer can be used as if was one of the arguments to the
+  // function in any of the linear, aligned, or uniform clauses.
+  // The type of list items appearing in the aligned clause must be array,
+  // pointer, reference to array, or reference to pointer.
+  llvm::DenseMap<Decl *, Expr *> AlignedArgs;
+  Expr *AlignedThis = nullptr;
+  for (auto *E : Aligneds) {
+    E = E->IgnoreParenImpCasts();
+    if (auto *DRE = dyn_cast<DeclRefExpr>(E))
+      if (auto *PVD = dyn_cast<ParmVarDecl>(DRE->getDecl())) {
+        auto *CanonPVD = PVD->getCanonicalDecl();
+        if (FD->getNumParams() > PVD->getFunctionScopeIndex() &&
+            FD->getParamDecl(PVD->getFunctionScopeIndex())
+                    ->getCanonicalDecl() == CanonPVD) {
+          // OpenMP  [2.8.1, simd construct, Restrictions]
+          // A list-item cannot appear in more than one aligned clause.
+          if (AlignedArgs.count(CanonPVD) > 0) {
+            Diag(E->getExprLoc(), diag::err_omp_aligned_twice)
+                << 1 << E->getSourceRange();
+            Diag(AlignedArgs[CanonPVD]->getExprLoc(),
+                 diag::note_omp_explicit_dsa)
+                << getOpenMPClauseName(OMPC_aligned);
+            continue;
+          }
+          AlignedArgs[CanonPVD] = E;
+          QualType QTy = PVD->getType()
+                             .getNonReferenceType()
+                             .getUnqualifiedType()
+                             .getCanonicalType();
+          const Type *Ty = QTy.getTypePtrOrNull();
+          if (!Ty || (!Ty->isArrayType() && !Ty->isPointerType())) {
+            Diag(E->getExprLoc(), diag::err_omp_aligned_expected_array_or_ptr)
+                << QTy << getLangOpts().CPlusPlus << E->getSourceRange();
+            Diag(PVD->getLocation(), diag::note_previous_decl) << PVD;
+          }
+          continue;
+        }
+      }
+    if (isa<CXXThisExpr>(E)) {
+      if (AlignedThis) {
+        Diag(E->getExprLoc(), diag::err_omp_aligned_twice)
+            << 2 << E->getSourceRange();
+        Diag(AlignedThis->getExprLoc(), diag::note_omp_explicit_dsa)
+            << getOpenMPClauseName(OMPC_aligned);
+      }
+      AlignedThis = E;
+      continue;
+    }
+    Diag(E->getExprLoc(), diag::err_omp_param_or_this_in_clause)
+        << FD->getDeclName() << (isa<CXXMethodDecl>(ADecl) ? 1 : 0);
+  }
+  // The optional parameter of the aligned clause, alignment, must be a constant
+  // positive integer expression. If no optional parameter is specified,
+  // implementation-defined default alignments for SIMD instructions on the
+  // target platforms are assumed.
+  SmallVector<Expr *, 4> NewAligns;
+  for (auto *E : Alignments) {
+    ExprResult Align;
+    if (E)
+      Align = VerifyPositiveIntegerConstantInClause(E, OMPC_aligned);
+    NewAligns.push_back(Align.get());
+  }
+  // OpenMP [2.8.2, declare simd construct, Description]
+  // The linear clause declares one or more list items to be private to a SIMD
+  // lane and to have a linear relationship with respect to the iteration space
+  // of a loop.
+  // The special this pointer can be used as if was one of the arguments to the
+  // function in any of the linear, aligned, or uniform clauses.
+  // When a linear-step expression is specified in a linear clause it must be
+  // either a constant integer expression or an integer-typed parameter that is
+  // specified in a uniform clause on the directive.
+  llvm::DenseMap<Decl *, Expr *> LinearArgs;
+  const bool IsUniformedThis = UniformedLinearThis != nullptr;
+  auto MI = LinModifiers.begin();
+  for (auto *E : Linears) {
+    auto LinKind = static_cast<OpenMPLinearClauseKind>(*MI);
+    ++MI;
+    E = E->IgnoreParenImpCasts();
+    if (auto *DRE = dyn_cast<DeclRefExpr>(E))
+      if (auto *PVD = dyn_cast<ParmVarDecl>(DRE->getDecl())) {
+        auto *CanonPVD = PVD->getCanonicalDecl();
+        if (FD->getNumParams() > PVD->getFunctionScopeIndex() &&
+            FD->getParamDecl(PVD->getFunctionScopeIndex())
+                    ->getCanonicalDecl() == CanonPVD) {
+          // OpenMP  [2.15.3.7, linear Clause, Restrictions]
+          // A list-item cannot appear in more than one linear clause.
+          if (LinearArgs.count(CanonPVD) > 0) {
+            Diag(E->getExprLoc(), diag::err_omp_wrong_dsa)
+                << getOpenMPClauseName(OMPC_linear)
+                << getOpenMPClauseName(OMPC_linear) << E->getSourceRange();
+            Diag(LinearArgs[CanonPVD]->getExprLoc(),
+                 diag::note_omp_explicit_dsa)
+                << getOpenMPClauseName(OMPC_linear);
+            continue;
+          }
+          // Each argument can appear in at most one uniform or linear clause.
+          if (UniformedArgs.count(CanonPVD) > 0) {
+            Diag(E->getExprLoc(), diag::err_omp_wrong_dsa)
+                << getOpenMPClauseName(OMPC_linear)
+                << getOpenMPClauseName(OMPC_uniform) << E->getSourceRange();
+            Diag(UniformedArgs[CanonPVD]->getExprLoc(),
+                 diag::note_omp_explicit_dsa)
+                << getOpenMPClauseName(OMPC_uniform);
+            continue;
+          }
+          LinearArgs[CanonPVD] = E;
+          if (E->isValueDependent() || E->isTypeDependent() ||
+              E->isInstantiationDependent() ||
+              E->containsUnexpandedParameterPack())
+            continue;
+          (void)CheckOpenMPLinearDecl(CanonPVD, E->getExprLoc(), LinKind,
+                                      PVD->getOriginalType());
+          continue;
+        }
+      }
+    if (isa<CXXThisExpr>(E)) {
+      if (UniformedLinearThis) {
+        Diag(E->getExprLoc(), diag::err_omp_wrong_dsa)
+            << getOpenMPClauseName(OMPC_linear)
+            << getOpenMPClauseName(IsUniformedThis ? OMPC_uniform : OMPC_linear)
+            << E->getSourceRange();
+        Diag(UniformedLinearThis->getExprLoc(), diag::note_omp_explicit_dsa)
+            << getOpenMPClauseName(IsUniformedThis ? OMPC_uniform
+                                                   : OMPC_linear);
+        continue;
+      }
+      UniformedLinearThis = E;
+      if (E->isValueDependent() || E->isTypeDependent() ||
+          E->isInstantiationDependent() || E->containsUnexpandedParameterPack())
+        continue;
+      (void)CheckOpenMPLinearDecl(/*D=*/nullptr, E->getExprLoc(), LinKind,
+                                  E->getType());
+      continue;
+    }
+    Diag(E->getExprLoc(), diag::err_omp_param_or_this_in_clause)
+        << FD->getDeclName() << (isa<CXXMethodDecl>(ADecl) ? 1 : 0);
+  }
+  Expr *Step = nullptr;
+  Expr *NewStep = nullptr;
+  SmallVector<Expr *, 4> NewSteps;
+  for (auto *E : Steps) {
+    // Skip the same step expression, it was checked already.
+    if (Step == E || !E) {
+      NewSteps.push_back(E ? NewStep : nullptr);
+      continue;
+    }
+    Step = E;
+    if (auto *DRE = dyn_cast<DeclRefExpr>(Step))
+      if (auto *PVD = dyn_cast<ParmVarDecl>(DRE->getDecl())) {
+        auto *CanonPVD = PVD->getCanonicalDecl();
+        if (UniformedArgs.count(CanonPVD) == 0) {
+          Diag(Step->getExprLoc(), diag::err_omp_expected_uniform_param)
+              << Step->getSourceRange();
+        } else if (E->isValueDependent() || E->isTypeDependent() ||
+                   E->isInstantiationDependent() ||
+                   E->containsUnexpandedParameterPack() ||
+                   CanonPVD->getType()->hasIntegerRepresentation())
+          NewSteps.push_back(Step);
+        else {
+          Diag(Step->getExprLoc(), diag::err_omp_expected_int_param)
+              << Step->getSourceRange();
+        }
+        continue;
+      }
+    NewStep = Step;
+    if (Step && !Step->isValueDependent() && !Step->isTypeDependent() &&
+        !Step->isInstantiationDependent() &&
+        !Step->containsUnexpandedParameterPack()) {
+      NewStep = PerformOpenMPImplicitIntegerConversion(Step->getExprLoc(), Step)
+                    .get();
+      if (NewStep)
+        NewStep = VerifyIntegerConstantExpression(NewStep).get();
+    }
+    NewSteps.push_back(NewStep);
+  }
+  auto *NewAttr = OMPDeclareSimdDeclAttr::CreateImplicit(
+      Context, BS, SL.get(), const_cast<Expr **>(Uniforms.data()),
+      Uniforms.size(), const_cast<Expr **>(Aligneds.data()), Aligneds.size(),
+      const_cast<Expr **>(NewAligns.data()), NewAligns.size(),
+      const_cast<Expr **>(Linears.data()), Linears.size(),
+      const_cast<unsigned *>(LinModifiers.data()), LinModifiers.size(),
+      NewSteps.data(), NewSteps.size(), SR);
+  ADecl->addAttr(NewAttr);
+  return ConvertDeclToDeclGroup(ADecl);
+}
+
 StmtResult Sema::ActOnOpenMPParallelDirective(ArrayRef<OMPClause *> Clauses,
                                               Stmt *AStmt,
                                               SourceLocation StartLoc,
@@ -2772,33 +3912,29 @@ class OpenMPIterationSpaceChecker {
   /// \brief A source location for referring to increment later.
   SourceRange IncrementSrcRange;
   /// \brief Loop variable.
-  VarDecl *Var;
+  ValueDecl *LCDecl = nullptr;
   /// \brief Reference to loop variable.
-  DeclRefExpr *VarRef;
+  Expr *LCRef = nullptr;
   /// \brief Lower bound (initializer for the var).
-  Expr *LB;
+  Expr *LB = nullptr;
   /// \brief Upper bound.
-  Expr *UB;
+  Expr *UB = nullptr;
   /// \brief Loop step (increment).
-  Expr *Step;
+  Expr *Step = nullptr;
   /// \brief This flag is true when condition is one of:
   ///   Var <  UB
   ///   Var <= UB
   ///   UB  >  Var
   ///   UB  >= Var
-  bool TestIsLessOp;
+  bool TestIsLessOp = false;
   /// \brief This flag is true when condition is strict ( < or > ).
-  bool TestIsStrictOp;
+  bool TestIsStrictOp = false;
   /// \brief This flag is true when step is subtracted on each iteration.
-  bool SubtractStep;
+  bool SubtractStep = false;
 
 public:
   OpenMPIterationSpaceChecker(Sema &SemaRef, SourceLocation DefaultLoc)
-      : SemaRef(SemaRef), DefaultLoc(DefaultLoc), ConditionLoc(DefaultLoc),
-        InitSrcRange(SourceRange()), ConditionSrcRange(SourceRange()),
-        IncrementSrcRange(SourceRange()), Var(nullptr), VarRef(nullptr),
-        LB(nullptr), UB(nullptr), Step(nullptr), TestIsLessOp(false),
-        TestIsStrictOp(false), SubtractStep(false) {}
+      : SemaRef(SemaRef), DefaultLoc(DefaultLoc), ConditionLoc(DefaultLoc) {}
   /// \brief Check init-expr for canonical loop form and save loop counter
   /// variable - #Var and its initialization value - #LB.
   bool CheckInit(Stmt *S, bool EmitDiags = true);
@@ -2809,9 +3945,9 @@ public:
   /// does not conform, otherwise save loop step (#Step).
   bool CheckInc(Expr *S);
   /// \brief Return the loop counter variable.
-  VarDecl *GetLoopVar() const { return Var; }
+  ValueDecl *GetLoopDecl() const { return LCDecl; }
   /// \brief Return the reference expression to loop counter variable.
-  DeclRefExpr *GetLoopVarRefExpr() const { return VarRef; }
+  Expr *GetLoopDeclRefExpr() const { return LCRef; }
   /// \brief Source range of the loop init.
   SourceRange GetInitSrcRange() const { return InitSrcRange; }
   /// \brief Source range of the loop condition.
@@ -2821,11 +3957,15 @@ public:
   /// \brief True if the step should be subtracted.
   bool ShouldSubtractStep() const { return SubtractStep; }
   /// \brief Build the expression to calculate the number of iterations.
-  Expr *BuildNumIterations(Scope *S, const bool LimitedType) const;
+  Expr *
+  BuildNumIterations(Scope *S, const bool LimitedType,
+                     llvm::MapVector<Expr *, DeclRefExpr *> &Captures) const;
   /// \brief Build the precondition expression for the loops.
-  Expr *BuildPreCond(Scope *S, Expr *Cond) const;
+  Expr *BuildPreCond(Scope *S, Expr *Cond,
+                     llvm::MapVector<Expr *, DeclRefExpr *> &Captures) const;
   /// \brief Build reference expression to the counter be used for codegen.
-  Expr *BuildCounterVar() const;
+  DeclRefExpr *BuildCounterVar(llvm::MapVector<Expr *, DeclRefExpr *> &Captures,
+                               DSAStackTy &DSA) const;
   /// \brief Build reference expression to the private counter be used for
   /// codegen.
   Expr *BuildPrivateCounterVar() const;
@@ -2841,7 +3981,7 @@ private:
   /// expression.
   bool CheckIncRHS(Expr *RHS);
   /// \brief Helper to set loop counter variable and its initializer.
-  bool SetVarAndLB(VarDecl *NewVar, DeclRefExpr *NewVarRefExpr, Expr *NewLB);
+  bool SetLCDeclAndLB(ValueDecl *NewLCDecl, Expr *NewDeclRefExpr, Expr *NewLB);
   /// \brief Helper to set upper bound.
   bool SetUB(Expr *NewUB, bool LessOp, bool StrictOp, SourceRange SR,
              SourceLocation SL);
@@ -2850,16 +3990,16 @@ private:
 };
 
 bool OpenMPIterationSpaceChecker::Dependent() const {
-  if (!Var) {
+  if (!LCDecl) {
     assert(!LB && !UB && !Step);
     return false;
   }
-  return Var->getType()->isDependentType() || (LB && LB->isValueDependent()) ||
-         (UB && UB->isValueDependent()) || (Step && Step->isValueDependent());
+  return LCDecl->getType()->isDependentType() ||
+         (LB && LB->isValueDependent()) || (UB && UB->isValueDependent()) ||
+         (Step && Step->isValueDependent());
 }
 
-template <typename T>
-static T *getExprAsWritten(T *E) {
+static Expr *getExprAsWritten(Expr *E) {
   if (auto *ExprTemp = dyn_cast<ExprWithCleanups>(E))
     E = ExprTemp->getSubExpr();
 
@@ -2874,16 +4014,16 @@ static T *getExprAsWritten(T *E) {
   return E->IgnoreParens();
 }
 
-bool OpenMPIterationSpaceChecker::SetVarAndLB(VarDecl *NewVar,
-                                              DeclRefExpr *NewVarRefExpr,
-                                              Expr *NewLB) {
+bool OpenMPIterationSpaceChecker::SetLCDeclAndLB(ValueDecl *NewLCDecl,
+                                                 Expr *NewLCRefExpr,
+                                                 Expr *NewLB) {
   // State consistency checking to ensure correct usage.
-  assert(Var == nullptr && LB == nullptr && VarRef == nullptr &&
+  assert(LCDecl == nullptr && LB == nullptr && LCRef == nullptr &&
          UB == nullptr && Step == nullptr && !TestIsLessOp && !TestIsStrictOp);
-  if (!NewVar || !NewLB)
+  if (!NewLCDecl || !NewLB)
     return true;
-  Var = NewVar;
-  VarRef = NewVarRefExpr;
+  LCDecl = getCanonicalDecl(NewLCDecl);
+  LCRef = NewLCRefExpr;
   if (auto *CE = dyn_cast_or_null<CXXConstructExpr>(NewLB))
     if (const CXXConstructorDecl *Ctor = CE->getConstructor())
       if ((Ctor->isCopyOrMoveConstructor() ||
@@ -2897,8 +4037,8 @@ bool OpenMPIterationSpaceChecker::SetVarAndLB(VarDecl *NewVar,
 bool OpenMPIterationSpaceChecker::SetUB(Expr *NewUB, bool LessOp, bool StrictOp,
                                         SourceRange SR, SourceLocation SL) {
   // State consistency checking to ensure correct usage.
-  assert(Var != nullptr && LB != nullptr && UB == nullptr && Step == nullptr &&
-         !TestIsLessOp && !TestIsStrictOp);
+  assert(LCDecl != nullptr && LB != nullptr && UB == nullptr &&
+         Step == nullptr && !TestIsLessOp && !TestIsStrictOp);
   if (!NewUB)
     return true;
   UB = NewUB;
@@ -2911,7 +4051,7 @@ bool OpenMPIterationSpaceChecker::SetUB(Expr *NewUB, bool LessOp, bool StrictOp,
 
 bool OpenMPIterationSpaceChecker::SetStep(Expr *NewStep, bool Subtract) {
   // State consistency checking to ensure correct usage.
-  assert(Var != nullptr && LB != nullptr && Step == nullptr);
+  assert(LCDecl != nullptr && LB != nullptr && Step == nullptr);
   if (!NewStep)
     return true;
   if (!NewStep->isValueDependent()) {
@@ -2947,7 +4087,7 @@ bool OpenMPIterationSpaceChecker::SetStep(Expr *NewStep, bool Subtract) {
                              : (IsConstPos || (IsUnsigned && !Subtract))))) {
       SemaRef.Diag(NewStep->getExprLoc(),
                    diag::err_omp_loop_incr_not_compatible)
-          << Var << TestIsLessOp << NewStep->getSourceRange();
+          << LCDecl << TestIsLessOp << NewStep->getSourceRange();
       SemaRef.Diag(ConditionLoc,
                    diag::note_omp_loop_cond_requres_compatible_incr)
           << TestIsLessOp << ConditionSrcRange;
@@ -2980,14 +4120,28 @@ bool OpenMPIterationSpaceChecker::CheckInit(Stmt *S, bool EmitDiags) {
     }
     return true;
   }
+  if (auto *ExprTemp = dyn_cast<ExprWithCleanups>(S))
+    if (!ExprTemp->cleanupsHaveSideEffects())
+      S = ExprTemp->getSubExpr();
+
   InitSrcRange = S->getSourceRange();
   if (Expr *E = dyn_cast<Expr>(S))
     S = E->IgnoreParens();
   if (auto BO = dyn_cast<BinaryOperator>(S)) {
-    if (BO->getOpcode() == BO_Assign)
-      if (auto DRE = dyn_cast<DeclRefExpr>(BO->getLHS()->IgnoreParens()))
-        return SetVarAndLB(dyn_cast<VarDecl>(DRE->getDecl()), DRE,
-                           BO->getRHS());
+    if (BO->getOpcode() == BO_Assign) {
+      auto *LHS = BO->getLHS()->IgnoreParens();
+      if (auto *DRE = dyn_cast<DeclRefExpr>(LHS)) {
+        if (auto *CED = dyn_cast<OMPCapturedExprDecl>(DRE->getDecl()))
+          if (auto *ME = dyn_cast<MemberExpr>(getExprAsWritten(CED->getInit())))
+            return SetLCDeclAndLB(ME->getMemberDecl(), ME, BO->getRHS());
+        return SetLCDeclAndLB(DRE->getDecl(), DRE, BO->getRHS());
+      }
+      if (auto *ME = dyn_cast<MemberExpr>(LHS)) {
+        if (ME->isArrow() &&
+            isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts()))
+          return SetLCDeclAndLB(ME->getMemberDecl(), ME, BO->getRHS());
+      }
+    }
   } else if (auto DS = dyn_cast<DeclStmt>(S)) {
     if (DS->isSingleDecl()) {
       if (auto Var = dyn_cast_or_null<VarDecl>(DS->getSingleDecl())) {
@@ -2997,16 +4151,29 @@ bool OpenMPIterationSpaceChecker::CheckInit(Stmt *S, bool EmitDiags) {
             SemaRef.Diag(S->getLocStart(),
                          diag::ext_omp_loop_not_canonical_init)
                 << S->getSourceRange();
-          return SetVarAndLB(Var, nullptr, Var->getInit());
+          return SetLCDeclAndLB(Var, nullptr, Var->getInit());
         }
       }
     }
-  } else if (auto CE = dyn_cast<CXXOperatorCallExpr>(S))
-    if (CE->getOperator() == OO_Equal)
-      if (auto DRE = dyn_cast<DeclRefExpr>(CE->getArg(0)))
-        return SetVarAndLB(dyn_cast<VarDecl>(DRE->getDecl()), DRE,
-                           CE->getArg(1));
+  } else if (auto CE = dyn_cast<CXXOperatorCallExpr>(S)) {
+    if (CE->getOperator() == OO_Equal) {
+      auto *LHS = CE->getArg(0);
+      if (auto DRE = dyn_cast<DeclRefExpr>(LHS)) {
+        if (auto *CED = dyn_cast<OMPCapturedExprDecl>(DRE->getDecl()))
+          if (auto *ME = dyn_cast<MemberExpr>(getExprAsWritten(CED->getInit())))
+            return SetLCDeclAndLB(ME->getMemberDecl(), ME, BO->getRHS());
+        return SetLCDeclAndLB(DRE->getDecl(), DRE, CE->getArg(1));
+      }
+      if (auto *ME = dyn_cast<MemberExpr>(LHS)) {
+        if (ME->isArrow() &&
+            isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts()))
+          return SetLCDeclAndLB(ME->getMemberDecl(), ME, BO->getRHS());
+      }
+    }
+  }
 
+  if (Dependent() || SemaRef.CurContext->isDependentContext())
+    return false;
   if (EmitDiags) {
     SemaRef.Diag(S->getLocStart(), diag::err_omp_loop_not_canonical_init)
         << S->getSourceRange();
@@ -3016,7 +4183,7 @@ bool OpenMPIterationSpaceChecker::CheckInit(Stmt *S, bool EmitDiags) {
 
 /// \brief Ignore parenthesizes, implicit casts, copy constructor and return the
 /// variable (which may be the loop variable) if possible.
-static const VarDecl *GetInitVarDecl(const Expr *E) {
+static const ValueDecl *GetInitLCDecl(Expr *E) {
   if (!E)
     return nullptr;
   E = getExprAsWritten(E);
@@ -3026,10 +4193,18 @@ static const VarDecl *GetInitVarDecl(const Expr *E) {
            Ctor->isConvertingConstructor(/*AllowExplicit=*/false)) &&
           CE->getNumArgs() > 0 && CE->getArg(0) != nullptr)
         E = CE->getArg(0)->IgnoreParenImpCasts();
-  auto DRE = dyn_cast_or_null<DeclRefExpr>(E);
-  if (!DRE)
-    return nullptr;
-  return dyn_cast<VarDecl>(DRE->getDecl());
+  if (auto *DRE = dyn_cast_or_null<DeclRefExpr>(E)) {
+    if (auto *VD = dyn_cast<VarDecl>(DRE->getDecl())) {
+      if (auto *CED = dyn_cast<OMPCapturedExprDecl>(VD))
+        if (auto *ME = dyn_cast<MemberExpr>(getExprAsWritten(CED->getInit())))
+          return getCanonicalDecl(ME->getMemberDecl());
+      return getCanonicalDecl(VD);
+    }
+  }
+  if (auto *ME = dyn_cast_or_null<MemberExpr>(E))
+    if (ME->isArrow() && isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts()))
+      return getCanonicalDecl(ME->getMemberDecl());
+  return nullptr;
 }
 
 bool OpenMPIterationSpaceChecker::CheckCond(Expr *S) {
@@ -3040,19 +4215,19 @@ bool OpenMPIterationSpaceChecker::CheckCond(Expr *S) {
   //   b relational-op var
   //
   if (!S) {
-    SemaRef.Diag(DefaultLoc, diag::err_omp_loop_not_canonical_cond) << Var;
+    SemaRef.Diag(DefaultLoc, diag::err_omp_loop_not_canonical_cond) << LCDecl;
     return true;
   }
   S = getExprAsWritten(S);
   SourceLocation CondLoc = S->getLocStart();
   if (auto BO = dyn_cast<BinaryOperator>(S)) {
     if (BO->isRelationalOp()) {
-      if (GetInitVarDecl(BO->getLHS()) == Var)
+      if (GetInitLCDecl(BO->getLHS()) == LCDecl)
         return SetUB(BO->getRHS(),
                      (BO->getOpcode() == BO_LT || BO->getOpcode() == BO_LE),
                      (BO->getOpcode() == BO_LT || BO->getOpcode() == BO_GT),
                      BO->getSourceRange(), BO->getOperatorLoc());
-      if (GetInitVarDecl(BO->getRHS()) == Var)
+      if (GetInitLCDecl(BO->getRHS()) == LCDecl)
         return SetUB(BO->getLHS(),
                      (BO->getOpcode() == BO_GT || BO->getOpcode() == BO_GE),
                      (BO->getOpcode() == BO_LT || BO->getOpcode() == BO_GT),
@@ -3066,11 +4241,11 @@ bool OpenMPIterationSpaceChecker::CheckCond(Expr *S) {
       case OO_GreaterEqual:
       case OO_Less:
       case OO_LessEqual:
-        if (GetInitVarDecl(CE->getArg(0)) == Var)
+        if (GetInitLCDecl(CE->getArg(0)) == LCDecl)
           return SetUB(CE->getArg(1), Op == OO_Less || Op == OO_LessEqual,
                        Op == OO_Less || Op == OO_Greater, CE->getSourceRange(),
                        CE->getOperatorLoc());
-        if (GetInitVarDecl(CE->getArg(1)) == Var)
+        if (GetInitLCDecl(CE->getArg(1)) == LCDecl)
           return SetUB(CE->getArg(0), Op == OO_Greater || Op == OO_GreaterEqual,
                        Op == OO_Less || Op == OO_Greater, CE->getSourceRange(),
                        CE->getOperatorLoc());
@@ -3080,8 +4255,10 @@ bool OpenMPIterationSpaceChecker::CheckCond(Expr *S) {
       }
     }
   }
+  if (Dependent() || SemaRef.CurContext->isDependentContext())
+    return false;
   SemaRef.Diag(CondLoc, diag::err_omp_loop_not_canonical_cond)
-      << S->getSourceRange() << Var;
+      << S->getSourceRange() << LCDecl;
   return true;
 }
 
@@ -3095,22 +4272,24 @@ bool OpenMPIterationSpaceChecker::CheckIncRHS(Expr *RHS) {
   if (auto BO = dyn_cast<BinaryOperator>(RHS)) {
     if (BO->isAdditiveOp()) {
       bool IsAdd = BO->getOpcode() == BO_Add;
-      if (GetInitVarDecl(BO->getLHS()) == Var)
+      if (GetInitLCDecl(BO->getLHS()) == LCDecl)
         return SetStep(BO->getRHS(), !IsAdd);
-      if (IsAdd && GetInitVarDecl(BO->getRHS()) == Var)
+      if (IsAdd && GetInitLCDecl(BO->getRHS()) == LCDecl)
         return SetStep(BO->getLHS(), false);
     }
   } else if (auto CE = dyn_cast<CXXOperatorCallExpr>(RHS)) {
     bool IsAdd = CE->getOperator() == OO_Plus;
     if ((IsAdd || CE->getOperator() == OO_Minus) && CE->getNumArgs() == 2) {
-      if (GetInitVarDecl(CE->getArg(0)) == Var)
+      if (GetInitLCDecl(CE->getArg(0)) == LCDecl)
         return SetStep(CE->getArg(1), !IsAdd);
-      if (IsAdd && GetInitVarDecl(CE->getArg(1)) == Var)
+      if (IsAdd && GetInitLCDecl(CE->getArg(1)) == LCDecl)
         return SetStep(CE->getArg(0), false);
     }
   }
+  if (Dependent() || SemaRef.CurContext->isDependentContext())
+    return false;
   SemaRef.Diag(RHS->getLocStart(), diag::err_omp_loop_not_canonical_incr)
-      << RHS->getSourceRange() << Var;
+      << RHS->getSourceRange() << LCDecl;
   return true;
 }
 
@@ -3129,13 +4308,18 @@ bool OpenMPIterationSpaceChecker::CheckInc(Expr *S) {
   //   var = var - incr
   //
   if (!S) {
-    SemaRef.Diag(DefaultLoc, diag::err_omp_loop_not_canonical_incr) << Var;
+    SemaRef.Diag(DefaultLoc, diag::err_omp_loop_not_canonical_incr) << LCDecl;
     return true;
   }
+  if (auto *ExprTemp = dyn_cast<ExprWithCleanups>(S))
+    if (!ExprTemp->cleanupsHaveSideEffects())
+      S = ExprTemp->getSubExpr();
+
   IncrementSrcRange = S->getSourceRange();
   S = S->IgnoreParens();
   if (auto UO = dyn_cast<UnaryOperator>(S)) {
-    if (UO->isIncrementDecrementOp() && GetInitVarDecl(UO->getSubExpr()) == Var)
+    if (UO->isIncrementDecrementOp() &&
+        GetInitLCDecl(UO->getSubExpr()) == LCDecl)
       return SetStep(
           SemaRef.ActOnIntegerConstant(UO->getLocStart(),
                                        (UO->isDecrementOp() ? -1 : 1)).get(),
@@ -3144,11 +4328,11 @@ bool OpenMPIterationSpaceChecker::CheckInc(Expr *S) {
     switch (BO->getOpcode()) {
     case BO_AddAssign:
     case BO_SubAssign:
-      if (GetInitVarDecl(BO->getLHS()) == Var)
+      if (GetInitLCDecl(BO->getLHS()) == LCDecl)
         return SetStep(BO->getRHS(), BO->getOpcode() == BO_SubAssign);
       break;
     case BO_Assign:
-      if (GetInitVarDecl(BO->getLHS()) == Var)
+      if (GetInitLCDecl(BO->getLHS()) == LCDecl)
         return CheckIncRHS(BO->getRHS());
       break;
     default:
@@ -3158,7 +4342,7 @@ bool OpenMPIterationSpaceChecker::CheckInc(Expr *S) {
     switch (CE->getOperator()) {
     case OO_PlusPlus:
     case OO_MinusMinus:
-      if (GetInitVarDecl(CE->getArg(0)) == Var)
+      if (GetInitLCDecl(CE->getArg(0)) == LCDecl)
         return SetStep(
             SemaRef.ActOnIntegerConstant(
                         CE->getLocStart(),
@@ -3167,103 +4351,55 @@ bool OpenMPIterationSpaceChecker::CheckInc(Expr *S) {
       break;
     case OO_PlusEqual:
     case OO_MinusEqual:
-      if (GetInitVarDecl(CE->getArg(0)) == Var)
+      if (GetInitLCDecl(CE->getArg(0)) == LCDecl)
         return SetStep(CE->getArg(1), CE->getOperator() == OO_MinusEqual);
       break;
     case OO_Equal:
-      if (GetInitVarDecl(CE->getArg(0)) == Var)
+      if (GetInitLCDecl(CE->getArg(0)) == LCDecl)
         return CheckIncRHS(CE->getArg(1));
       break;
     default:
       break;
     }
   }
+  if (Dependent() || SemaRef.CurContext->isDependentContext())
+    return false;
   SemaRef.Diag(S->getLocStart(), diag::err_omp_loop_not_canonical_incr)
-      << S->getSourceRange() << Var;
+      << S->getSourceRange() << LCDecl;
   return true;
 }
 
-namespace {
-// Transform variables declared in GNU statement expressions to new ones to
-// avoid crash on codegen.
-class TransformToNewDefs : public TreeTransform<TransformToNewDefs> {
-  typedef TreeTransform<TransformToNewDefs> BaseTransform;
-
-public:
-  TransformToNewDefs(Sema &SemaRef) : BaseTransform(SemaRef) {}
-
-  Decl *TransformDefinition(SourceLocation Loc, Decl *D) {
-    if (auto *VD = cast<VarDecl>(D))
-      if (!isa<ParmVarDecl>(D) && !isa<VarTemplateSpecializationDecl>(D) &&
-          !isa<ImplicitParamDecl>(D)) {
-        auto *NewVD = VarDecl::Create(
-            SemaRef.Context, VD->getDeclContext(), VD->getLocStart(),
-            VD->getLocation(), VD->getIdentifier(), VD->getType(),
-            VD->getTypeSourceInfo(), VD->getStorageClass());
-        NewVD->setTSCSpec(VD->getTSCSpec());
-        NewVD->setInit(VD->getInit());
-        NewVD->setInitStyle(VD->getInitStyle());
-        NewVD->setExceptionVariable(VD->isExceptionVariable());
-        NewVD->setNRVOVariable(VD->isNRVOVariable());
-        NewVD->setCXXForRangeDecl(VD->isCXXForRangeDecl());
-        NewVD->setConstexpr(VD->isConstexpr());
-        NewVD->setInitCapture(VD->isInitCapture());
-        NewVD->setPreviousDeclInSameBlockScope(
-            VD->isPreviousDeclInSameBlockScope());
-        VD->getDeclContext()->addHiddenDecl(NewVD);
-        if (VD->hasAttrs())
-          NewVD->setAttrs(VD->getAttrs());
-        transformedLocalDecl(VD, NewVD);
-        return NewVD;
-      }
-    return BaseTransform::TransformDefinition(Loc, D);
-  }
-
-  ExprResult TransformDeclRefExpr(DeclRefExpr *E) {
-    if (auto *NewD = TransformDecl(E->getExprLoc(), E->getDecl()))
-      if (E->getDecl() != NewD) {
-        NewD->setReferenced();
-        NewD->markUsed(SemaRef.Context);
-        return DeclRefExpr::Create(
-            SemaRef.Context, E->getQualifierLoc(), E->getTemplateKeywordLoc(),
-            cast<ValueDecl>(NewD), E->refersToEnclosingVariableOrCapture(),
-            E->getNameInfo(), E->getType(), E->getValueKind());
-      }
-    return BaseTransform::TransformDeclRefExpr(E);
-  }
-};
+static ExprResult
+tryBuildCapture(Sema &SemaRef, Expr *Capture,
+                llvm::MapVector<Expr *, DeclRefExpr *> &Captures) {
+  if (SemaRef.CurContext->isDependentContext())
+    return ExprResult(Capture);
+  if (Capture->isEvaluatable(SemaRef.Context, Expr::SE_AllowSideEffects))
+    return SemaRef.PerformImplicitConversion(
+        Capture->IgnoreImpCasts(), Capture->getType(), Sema::AA_Converting,
+        /*AllowExplicit=*/true);
+  auto I = Captures.find(Capture);
+  if (I != Captures.end())
+    return buildCapture(SemaRef, Capture, I->second);
+  DeclRefExpr *Ref = nullptr;
+  ExprResult Res = buildCapture(SemaRef, Capture, Ref);
+  Captures[Capture] = Ref;
+  return Res;
 }
 
 /// \brief Build the expression to calculate the number of iterations.
-Expr *
-OpenMPIterationSpaceChecker::BuildNumIterations(Scope *S,
-                                                const bool LimitedType) const {
-  TransformToNewDefs Transform(SemaRef);
+Expr *OpenMPIterationSpaceChecker::BuildNumIterations(
+    Scope *S, const bool LimitedType,
+    llvm::MapVector<Expr *, DeclRefExpr *> &Captures) const {
   ExprResult Diff;
-  auto VarType = Var->getType().getNonReferenceType();
+  auto VarType = LCDecl->getType().getNonReferenceType();
   if (VarType->isIntegerType() || VarType->isPointerType() ||
       SemaRef.getLangOpts().CPlusPlus) {
     // Upper - Lower
     auto *UBExpr = TestIsLessOp ? UB : LB;
     auto *LBExpr = TestIsLessOp ? LB : UB;
-    Expr *Upper = Transform.TransformExpr(UBExpr).get();
-    Expr *Lower = Transform.TransformExpr(LBExpr).get();
-    if (!Upper || !Lower)
-      return nullptr;
-    if (!SemaRef.Context.hasSameType(Upper->getType(), UBExpr->getType())) {
-      Upper = SemaRef
-                  .PerformImplicitConversion(Upper, UBExpr->getType(),
-                                             Sema::AA_Converting,
-                                             /*AllowExplicit=*/true)
-                  .get();
-    }
-    if (!SemaRef.Context.hasSameType(Lower->getType(), LBExpr->getType())) {
-      Lower = SemaRef
-                  .PerformImplicitConversion(Lower, LBExpr->getType(),
-                                             Sema::AA_Converting,
-                                             /*AllowExplicit=*/true)
-                  .get();
-    }
+    Expr *Upper = tryBuildCapture(SemaRef, UBExpr, Captures).get();
+    Expr *Lower = tryBuildCapture(SemaRef, LBExpr, Captures).get();
     if (!Upper || !Lower)
       return nullptr;
 
@@ -3290,18 +4426,9 @@ OpenMPIterationSpaceChecker::BuildNumIterations(Scope *S,
     return nullptr;
 
   // Upper - Lower [- 1] + Step
-  auto *StepNoImp = Step->IgnoreImplicit();
-  auto NewStep = Transform.TransformExpr(StepNoImp);
-  if (NewStep.isInvalid())
+  auto NewStep = tryBuildCapture(SemaRef, Step, Captures);
+  if (!NewStep.isUsable())
     return nullptr;
-  if (!SemaRef.Context.hasSameType(NewStep.get()->getType(),
-                                   StepNoImp->getType())) {
-    NewStep = SemaRef.PerformImplicitConversion(
-        NewStep.get(), StepNoImp->getType(), Sema::AA_Converting,
-        /*AllowExplicit=*/true);
-    if (NewStep.isInvalid())
-      return nullptr;
-  }
   Diff = SemaRef.BuildBinOp(S, DefaultLoc, BO_Add, Diff.get(), NewStep.get());
   if (!Diff.isUsable())
     return nullptr;
@@ -3312,17 +4439,6 @@ OpenMPIterationSpaceChecker::BuildNumIterations(Scope *S,
     return nullptr;
 
   // (Upper - Lower [- 1] + Step) / Step
-  NewStep = Transform.TransformExpr(StepNoImp);
-  if (NewStep.isInvalid())
-    return nullptr;
-  if (!SemaRef.Context.hasSameType(NewStep.get()->getType(),
-                                   StepNoImp->getType())) {
-    NewStep = SemaRef.PerformImplicitConversion(
-        NewStep.get(), StepNoImp->getType(), Sema::AA_Converting,
-        /*AllowExplicit=*/true);
-    if (NewStep.isInvalid())
-      return nullptr;
-  }
   Diff = SemaRef.BuildBinOp(S, DefaultLoc, BO_Div, Diff.get(), NewStep.get());
   if (!Diff.isUsable())
     return nullptr;
@@ -3368,35 +4484,25 @@ OpenMPIterationSpaceChecker::BuildNumIterations(Scope *S,
   return Diff.get();
 }
 
-Expr *OpenMPIterationSpaceChecker::BuildPreCond(Scope *S, Expr *Cond) const {
+Expr *OpenMPIterationSpaceChecker::BuildPreCond(
+    Scope *S, Expr *Cond,
+    llvm::MapVector<Expr *, DeclRefExpr *> &Captures) const {
   // Try to build LB <op> UB, where <op> is <, >, <=, or >=.
   bool Suppress = SemaRef.getDiagnostics().getSuppressAllDiagnostics();
   SemaRef.getDiagnostics().setSuppressAllDiagnostics(/*Val=*/true);
-  TransformToNewDefs Transform(SemaRef);
-
-  auto NewLB = Transform.TransformExpr(LB);
-  auto NewUB = Transform.TransformExpr(UB);
-  if (NewLB.isInvalid() || NewUB.isInvalid())
-    return Cond;
-  if (!SemaRef.Context.hasSameType(NewLB.get()->getType(), LB->getType())) {
-    NewLB = SemaRef.PerformImplicitConversion(NewLB.get(), LB->getType(),
-                                              Sema::AA_Converting,
-                                              /*AllowExplicit=*/true);
-  }
-  if (!SemaRef.Context.hasSameType(NewUB.get()->getType(), UB->getType())) {
-    NewUB = SemaRef.PerformImplicitConversion(NewUB.get(), UB->getType(),
-                                              Sema::AA_Converting,
-                                              /*AllowExplicit=*/true);
-  }
-  if (NewLB.isInvalid() || NewUB.isInvalid())
-    return Cond;
+
+  auto NewLB = tryBuildCapture(SemaRef, LB, Captures);
+  auto NewUB = tryBuildCapture(SemaRef, UB, Captures);
+  if (!NewLB.isUsable() || !NewUB.isUsable())
+    return nullptr;
+
   auto CondExpr = SemaRef.BuildBinOp(
       S, DefaultLoc, TestIsLessOp ? (TestIsStrictOp ? BO_LT : BO_LE)
                                   : (TestIsStrictOp ? BO_GT : BO_GE),
       NewLB.get(), NewUB.get());
   if (CondExpr.isUsable()) {
-    if (!SemaRef.Context.hasSameType(CondExpr.get()->getType(),
-                                     SemaRef.Context.BoolTy))
+    if (!SemaRef.Context.hasSameUnqualifiedType(CondExpr.get()->getType(),
+                                                SemaRef.Context.BoolTy))
       CondExpr = SemaRef.PerformImplicitConversion(
           CondExpr.get(), SemaRef.Context.BoolTy, /*Action=*/Sema::AA_Casting,
           /*AllowExplicit=*/true);
@@ -3407,17 +4513,30 @@ Expr *OpenMPIterationSpaceChecker::BuildPreCond(Scope *S, Expr *Cond) const {
 }
 
 /// \brief Build reference expression to the counter be used for codegen.
-Expr *OpenMPIterationSpaceChecker::BuildCounterVar() const {
-  return buildDeclRefExpr(SemaRef, Var, Var->getType().getNonReferenceType(),
+DeclRefExpr *OpenMPIterationSpaceChecker::BuildCounterVar(
+    llvm::MapVector<Expr *, DeclRefExpr *> &Captures, DSAStackTy &DSA) const {
+  auto *VD = dyn_cast<VarDecl>(LCDecl);
+  if (!VD) {
+    VD = SemaRef.IsOpenMPCapturedDecl(LCDecl);
+    auto *Ref = buildDeclRefExpr(
+        SemaRef, VD, VD->getType().getNonReferenceType(), DefaultLoc);
+    DSAStackTy::DSAVarData Data = DSA.getTopDSA(LCDecl, /*FromParent=*/false);
+    // If the loop control decl is explicitly marked as private, do not mark it
+    // as captured again.
+    if (!isOpenMPPrivate(Data.CKind) || !Data.RefExpr)
+      Captures.insert(std::make_pair(LCRef, Ref));
+    return Ref;
+  }
+  return buildDeclRefExpr(SemaRef, VD, VD->getType().getNonReferenceType(),
                           DefaultLoc);
 }
 
 Expr *OpenMPIterationSpaceChecker::BuildPrivateCounterVar() const {
-  if (Var && !Var->isInvalidDecl()) {
-    auto Type = Var->getType().getNonReferenceType();
+  if (LCDecl && !LCDecl->isInvalidDecl()) {
+    auto Type = LCDecl->getType().getNonReferenceType();
     auto *PrivateVar =
-        buildVarDecl(SemaRef, DefaultLoc, Type, Var->getName(),
-                     Var->hasAttrs() ? &Var->getAttrs() : nullptr);
+        buildVarDecl(SemaRef, DefaultLoc, Type, LCDecl->getName(),
+                     LCDecl->hasAttrs() ? &LCDecl->getAttrs() : nullptr);
     if (PrivateVar->isInvalidDecl())
       return nullptr;
     return buildDeclRefExpr(SemaRef, PrivateVar, Type, DefaultLoc);
@@ -3432,23 +4551,23 @@ Expr *OpenMPIterationSpaceChecker::BuildCounterInit() const { return LB; }
 Expr *OpenMPIterationSpaceChecker::BuildCounterStep() const { return Step; }
 
 /// \brief Iteration space of a single for loop.
-struct LoopIterationSpace {
+struct LoopIterationSpace final {
   /// \brief Condition of the loop.
-  Expr *PreCond;
+  Expr *PreCond = nullptr;
   /// \brief This expression calculates the number of iterations in the loop.
   /// It is always possible to calculate it before starting the loop.
-  Expr *NumIterations;
+  Expr *NumIterations = nullptr;
   /// \brief The loop counter variable.
-  Expr *CounterVar;
+  Expr *CounterVar = nullptr;
   /// \brief Private loop counter variable.
-  Expr *PrivateCounterVar;
+  Expr *PrivateCounterVar = nullptr;
   /// \brief This is initializer for the initial value of #CounterVar.
-  Expr *CounterInit;
+  Expr *CounterInit = nullptr;
   /// \brief This is step for the #CounterVar used to generate its update:
   /// #CounterVar = #CounterInit + #CounterStep * CurrentIteration.
-  Expr *CounterStep;
+  Expr *CounterStep = nullptr;
   /// \brief Should step be subtracted?
-  bool Subtract;
+  bool Subtract = false;
   /// \brief Source range of the loop init.
   SourceRange InitSrcRange;
   /// \brief Source range of the loop condition.
@@ -3466,8 +4585,21 @@ void Sema::ActOnOpenMPLoopInitialization(SourceLocation ForLoc, Stmt *Init) {
   if (AssociatedLoops > 0 &&
       isOpenMPLoopDirective(DSAStack->getCurrentDirective())) {
     OpenMPIterationSpaceChecker ISC(*this, ForLoc);
-    if (!ISC.CheckInit(Init, /*EmitDiags=*/false))
-      DSAStack->addLoopControlVariable(ISC.GetLoopVar());
+    if (!ISC.CheckInit(Init, /*EmitDiags=*/false)) {
+      if (auto *D = ISC.GetLoopDecl()) {
+        auto *VD = dyn_cast<VarDecl>(D);
+        if (!VD) {
+          if (auto *Private = IsOpenMPCapturedDecl(D))
+            VD = Private;
+          else {
+            auto *Ref = buildCapture(*this, D, ISC.GetLoopDeclRefExpr(),
+                                     /*WithInit=*/false);
+            VD = cast<VarDecl>(Ref->getDecl());
+          }
+        }
+        DSAStack->addLoopControlVariable(D, VD);
+      }
+    }
     DSAStack->setAssociatedLoops(AssociatedLoops - 1);
   }
 }
@@ -3478,8 +4610,9 @@ static bool CheckOpenMPIterationSpace(
     OpenMPDirectiveKind DKind, Stmt *S, Sema &SemaRef, DSAStackTy &DSA,
     unsigned CurrentNestedLoopCount, unsigned NestedLoopCount,
     Expr *CollapseLoopCountExpr, Expr *OrderedLoopCountExpr,
-    llvm::DenseMap<VarDecl *, Expr *> &VarsWithImplicitDSA,
-    LoopIterationSpace &ResultIterSpace) {
+    llvm::DenseMap<ValueDecl *, Expr *> &VarsWithImplicitDSA,
+    LoopIterationSpace &ResultIterSpace,
+    llvm::MapVector<Expr *, DeclRefExpr *> &Captures) {
   // OpenMP [2.6, Canonical Loop Form]
   //   for (init-expr; test-expr; incr-expr) structured-block
   auto For = dyn_cast_or_null<ForStmt>(S);
@@ -3511,98 +4644,102 @@ static bool CheckOpenMPIterationSpace(
 
   // Check init.
   auto Init = For->getInit();
-  if (ISC.CheckInit(Init)) {
+  if (ISC.CheckInit(Init))
     return true;
-  }
 
   bool HasErrors = false;
 
   // Check loop variable's type.
-  auto Var = ISC.GetLoopVar();
+  if (auto *LCDecl = ISC.GetLoopDecl()) {
+    auto *LoopDeclRefExpr = ISC.GetLoopDeclRefExpr();
+
+    // OpenMP [2.6, Canonical Loop Form]
+    // Var is one of the following:
+    //   A variable of signed or unsigned integer type.
+    //   For C++, a variable of a random access iterator type.
+    //   For C, a variable of a pointer type.
+    auto VarType = LCDecl->getType().getNonReferenceType();
+    if (!VarType->isDependentType() && !VarType->isIntegerType() &&
+        !VarType->isPointerType() &&
+        !(SemaRef.getLangOpts().CPlusPlus && VarType->isOverloadableType())) {
+      SemaRef.Diag(Init->getLocStart(), diag::err_omp_loop_variable_type)
+          << SemaRef.getLangOpts().CPlusPlus;
+      HasErrors = true;
+    }
 
-  // OpenMP [2.6, Canonical Loop Form]
-  // Var is one of the following:
-  //   A variable of signed or unsigned integer type.
-  //   For C++, a variable of a random access iterator type.
-  //   For C, a variable of a pointer type.
-  auto VarType = Var->getType().getNonReferenceType();
-  if (!VarType->isDependentType() && !VarType->isIntegerType() &&
-      !VarType->isPointerType() &&
-      !(SemaRef.getLangOpts().CPlusPlus && VarType->isOverloadableType())) {
-    SemaRef.Diag(Init->getLocStart(), diag::err_omp_loop_variable_type)
-        << SemaRef.getLangOpts().CPlusPlus;
-    HasErrors = true;
-  }
-
-  // OpenMP, 2.14.1.1 Data-sharing Attribute Rules for Variables Referenced in a
-  // Construct
-  // The loop iteration variable(s) in the associated for-loop(s) of a for or
-  // parallel for construct is (are) private.
-  // The loop iteration variable in the associated for-loop of a simd construct
-  // with just one associated for-loop is linear with a constant-linear-step
-  // that is the increment of the associated for-loop.
-  // Exclude loop var from the list of variables with implicitly defined data
-  // sharing attributes.
-  VarsWithImplicitDSA.erase(Var);
-
-  // OpenMP [2.14.1.1, Data-sharing Attribute Rules for Variables Referenced in
-  // a Construct, C/C++].
-  // The loop iteration variable in the associated for-loop of a simd construct
-  // with just one associated for-loop may be listed in a linear clause with a
-  // constant-linear-step that is the increment of the associated for-loop.
-  // The loop iteration variable(s) in the associated for-loop(s) of a for or
-  // parallel for construct may be listed in a private or lastprivate clause.
-  DSAStackTy::DSAVarData DVar = DSA.getTopDSA(Var, false);
-  auto LoopVarRefExpr = ISC.GetLoopVarRefExpr();
-  // If LoopVarRefExpr is nullptr it means the corresponding loop variable is
-  // declared in the loop and it is predetermined as a private.
-  auto PredeterminedCKind =
-      isOpenMPSimdDirective(DKind)
-          ? ((NestedLoopCount == 1) ? OMPC_linear : OMPC_lastprivate)
-          : OMPC_private;
-  if (((isOpenMPSimdDirective(DKind) && DVar.CKind != OMPC_unknown &&
-        DVar.CKind != PredeterminedCKind) ||
-       ((isOpenMPWorksharingDirective(DKind) || DKind == OMPD_taskloop ||
-         isOpenMPDistributeDirective(DKind)) &&
-        !isOpenMPSimdDirective(DKind) && DVar.CKind != OMPC_unknown &&
-        DVar.CKind != OMPC_private && DVar.CKind != OMPC_lastprivate)) &&
-      (DVar.CKind != OMPC_private || DVar.RefExpr != nullptr)) {
-    SemaRef.Diag(Init->getLocStart(), diag::err_omp_loop_var_dsa)
-        << getOpenMPClauseName(DVar.CKind) << getOpenMPDirectiveName(DKind)
-        << getOpenMPClauseName(PredeterminedCKind);
-    if (DVar.RefExpr == nullptr)
-      DVar.CKind = PredeterminedCKind;
-    ReportOriginalDSA(SemaRef, &DSA, Var, DVar, /*IsLoopIterVar=*/true);
-    HasErrors = true;
-  } else if (LoopVarRefExpr != nullptr) {
-    // Make the loop iteration variable private (for worksharing constructs),
-    // linear (for simd directives with the only one associated loop) or
-    // lastprivate (for simd directives with several collapsed or ordered
-    // loops).
-    if (DVar.CKind == OMPC_unknown)
-      DVar = DSA.hasDSA(Var, isOpenMPPrivate, MatchesAlways(),
-                        /*FromParent=*/false);
-    DSA.addDSA(Var, LoopVarRefExpr, PredeterminedCKind);
-  }
-
-  assert(isOpenMPLoopDirective(DKind) && "DSA for non-loop vars");
-
-  // Check test-expr.
-  HasErrors |= ISC.CheckCond(For->getCond());
-
-  // Check incr-expr.
-  HasErrors |= ISC.CheckInc(For->getInc());
+    // OpenMP, 2.14.1.1 Data-sharing Attribute Rules for Variables Referenced in
+    // a Construct
+    // The loop iteration variable(s) in the associated for-loop(s) of a for or
+    // parallel for construct is (are) private.
+    // The loop iteration variable in the associated for-loop of a simd
+    // construct with just one associated for-loop is linear with a
+    // constant-linear-step that is the increment of the associated for-loop.
+    // Exclude loop var from the list of variables with implicitly defined data
+    // sharing attributes.
+    VarsWithImplicitDSA.erase(LCDecl);
+
+    // OpenMP [2.14.1.1, Data-sharing Attribute Rules for Variables Referenced
+    // in a Construct, C/C++].
+    // The loop iteration variable in the associated for-loop of a simd
+    // construct with just one associated for-loop may be listed in a linear
+    // clause with a constant-linear-step that is the increment of the
+    // associated for-loop.
+    // The loop iteration variable(s) in the associated for-loop(s) of a for or
+    // parallel for construct may be listed in a private or lastprivate clause.
+    DSAStackTy::DSAVarData DVar = DSA.getTopDSA(LCDecl, false);
+    // If LoopVarRefExpr is nullptr it means the corresponding loop variable is
+    // declared in the loop and it is predetermined as a private.
+    auto PredeterminedCKind =
+        isOpenMPSimdDirective(DKind)
+            ? ((NestedLoopCount == 1) ? OMPC_linear : OMPC_lastprivate)
+            : OMPC_private;
+    if (((isOpenMPSimdDirective(DKind) && DVar.CKind != OMPC_unknown &&
+          DVar.CKind != PredeterminedCKind) ||
+         ((isOpenMPWorksharingDirective(DKind) || DKind == OMPD_taskloop ||
+           isOpenMPDistributeDirective(DKind)) &&
+          !isOpenMPSimdDirective(DKind) && DVar.CKind != OMPC_unknown &&
+          DVar.CKind != OMPC_private && DVar.CKind != OMPC_lastprivate)) &&
+        (DVar.CKind != OMPC_private || DVar.RefExpr != nullptr)) {
+      SemaRef.Diag(Init->getLocStart(), diag::err_omp_loop_var_dsa)
+          << getOpenMPClauseName(DVar.CKind) << getOpenMPDirectiveName(DKind)
+          << getOpenMPClauseName(PredeterminedCKind);
+      if (DVar.RefExpr == nullptr)
+        DVar.CKind = PredeterminedCKind;
+      ReportOriginalDSA(SemaRef, &DSA, LCDecl, DVar, /*IsLoopIterVar=*/true);
+      HasErrors = true;
+    } else if (LoopDeclRefExpr != nullptr) {
+      // Make the loop iteration variable private (for worksharing constructs),
+      // linear (for simd directives with the only one associated loop) or
+      // lastprivate (for simd directives with several collapsed or ordered
+      // loops).
+      if (DVar.CKind == OMPC_unknown)
+        DVar = DSA.hasDSA(LCDecl, isOpenMPPrivate,
+                          [](OpenMPDirectiveKind) -> bool { return true; },
+                          /*FromParent=*/false);
+      DSA.addDSA(LCDecl, LoopDeclRefExpr, PredeterminedCKind);
+    }
+
+    assert(isOpenMPLoopDirective(DKind) && "DSA for non-loop vars");
+
+    // Check test-expr.
+    HasErrors |= ISC.CheckCond(For->getCond());
+
+    // Check incr-expr.
+    HasErrors |= ISC.CheckInc(For->getInc());
+  }
 
   if (ISC.Dependent() || SemaRef.CurContext->isDependentContext() || HasErrors)
     return HasErrors;
 
   // Build the loop's iteration space representation.
-  ResultIterSpace.PreCond = ISC.BuildPreCond(DSA.getCurScope(), For->getCond());
+  ResultIterSpace.PreCond =
+      ISC.BuildPreCond(DSA.getCurScope(), For->getCond(), Captures);
   ResultIterSpace.NumIterations = ISC.BuildNumIterations(
-      DSA.getCurScope(), (isOpenMPWorksharingDirective(DKind) ||
-                          isOpenMPTaskLoopDirective(DKind) ||
-                          isOpenMPDistributeDirective(DKind)));
-  ResultIterSpace.CounterVar = ISC.BuildCounterVar();
+      DSA.getCurScope(),
+      (isOpenMPWorksharingDirective(DKind) ||
+       isOpenMPTaskLoopDirective(DKind) || isOpenMPDistributeDirective(DKind)),
+      Captures);
+  ResultIterSpace.CounterVar = ISC.BuildCounterVar(Captures, DSA);
   ResultIterSpace.PrivateCounterVar = ISC.BuildPrivateCounterVar();
   ResultIterSpace.CounterInit = ISC.BuildCounterInit();
   ResultIterSpace.CounterStep = ISC.BuildCounterStep();
@@ -3622,23 +4759,15 @@ static bool CheckOpenMPIterationSpace(
 }
 
 /// \brief Build 'VarRef = Start.
-static ExprResult BuildCounterInit(Sema &SemaRef, Scope *S, SourceLocation Loc,
-                                   ExprResult VarRef, ExprResult Start) {
-  TransformToNewDefs Transform(SemaRef);
+static ExprResult
+BuildCounterInit(Sema &SemaRef, Scope *S, SourceLocation Loc, ExprResult VarRef,
+                 ExprResult Start,
+                 llvm::MapVector<Expr *, DeclRefExpr *> &Captures) {
   // Build 'VarRef = Start.
-  auto *StartNoImp = Start.get()->IgnoreImplicit();
-  auto NewStart = Transform.TransformExpr(StartNoImp);
-  if (NewStart.isInvalid())
+  auto NewStart = tryBuildCapture(SemaRef, Start.get(), Captures);
+  if (!NewStart.isUsable())
     return ExprError();
   if (!SemaRef.Context.hasSameType(NewStart.get()->getType(),
-                                   StartNoImp->getType())) {
-    NewStart = SemaRef.PerformImplicitConversion(
-        NewStart.get(), StartNoImp->getType(), Sema::AA_Converting,
-        /*AllowExplicit=*/true);
-    if (NewStart.isInvalid())
-      return ExprError();
-  }
-  if (!SemaRef.Context.hasSameType(NewStart.get()->getType(),
                                    VarRef.get()->getType())) {
     NewStart = SemaRef.PerformImplicitConversion(
         NewStart.get(), VarRef.get()->getType(), Sema::AA_Converting,
@@ -3653,58 +4782,74 @@ static ExprResult BuildCounterInit(Sema &SemaRef, Scope *S, SourceLocation Loc,
 }
 
 /// \brief Build 'VarRef = Start + Iter * Step'.
-static ExprResult BuildCounterUpdate(Sema &SemaRef, Scope *S,
-                                     SourceLocation Loc, ExprResult VarRef,
-                                     ExprResult Start, ExprResult Iter,
-                                     ExprResult Step, bool Subtract) {
+static ExprResult
+BuildCounterUpdate(Sema &SemaRef, Scope *S, SourceLocation Loc,
+                   ExprResult VarRef, ExprResult Start, ExprResult Iter,
+                   ExprResult Step, bool Subtract,
+                   llvm::MapVector<Expr *, DeclRefExpr *> *Captures = nullptr) {
   // Add parentheses (for debugging purposes only).
   Iter = SemaRef.ActOnParenExpr(Loc, Loc, Iter.get());
   if (!VarRef.isUsable() || !Start.isUsable() || !Iter.isUsable() ||
       !Step.isUsable())
     return ExprError();
 
-  auto *StepNoImp = Step.get()->IgnoreImplicit();
-  TransformToNewDefs Transform(SemaRef);
-  auto NewStep = Transform.TransformExpr(StepNoImp);
+  ExprResult NewStep = Step;
+  if (Captures)
+    NewStep = tryBuildCapture(SemaRef, Step.get(), *Captures);
   if (NewStep.isInvalid())
     return ExprError();
-  if (!SemaRef.Context.hasSameType(NewStep.get()->getType(),
-                                   StepNoImp->getType())) {
-    NewStep = SemaRef.PerformImplicitConversion(
-        NewStep.get(), StepNoImp->getType(), Sema::AA_Converting,
-        /*AllowExplicit=*/true);
-    if (NewStep.isInvalid())
-      return ExprError();
-  }
   ExprResult Update =
       SemaRef.BuildBinOp(S, Loc, BO_Mul, Iter.get(), NewStep.get());
   if (!Update.isUsable())
     return ExprError();
 
-  // Build 'VarRef = Start + Iter * Step'.
-  auto *StartNoImp = Start.get()->IgnoreImplicit();
-  auto NewStart = Transform.TransformExpr(StartNoImp);
+  // Try to build 'VarRef = Start, VarRef (+|-)= Iter * Step' or
+  // 'VarRef = Start (+|-) Iter * Step'.
+  ExprResult NewStart = Start;
+  if (Captures)
+    NewStart = tryBuildCapture(SemaRef, Start.get(), *Captures);
   if (NewStart.isInvalid())
     return ExprError();
-  if (!SemaRef.Context.hasSameType(NewStart.get()->getType(),
-                                   StartNoImp->getType())) {
-    NewStart = SemaRef.PerformImplicitConversion(
-        NewStart.get(), StartNoImp->getType(), Sema::AA_Converting,
-        /*AllowExplicit=*/true);
-    if (NewStart.isInvalid())
-      return ExprError();
+
+  // First attempt: try to build 'VarRef = Start, VarRef += Iter * Step'.
+  ExprResult SavedUpdate = Update;
+  ExprResult UpdateVal;
+  if (VarRef.get()->getType()->isOverloadableType() ||
+      NewStart.get()->getType()->isOverloadableType() ||
+      Update.get()->getType()->isOverloadableType()) {
+    bool Suppress = SemaRef.getDiagnostics().getSuppressAllDiagnostics();
+    SemaRef.getDiagnostics().setSuppressAllDiagnostics(/*Val=*/true);
+    Update =
+        SemaRef.BuildBinOp(S, Loc, BO_Assign, VarRef.get(), NewStart.get());
+    if (Update.isUsable()) {
+      UpdateVal =
+          SemaRef.BuildBinOp(S, Loc, Subtract ? BO_SubAssign : BO_AddAssign,
+                             VarRef.get(), SavedUpdate.get());
+      if (UpdateVal.isUsable()) {
+        Update = SemaRef.CreateBuiltinBinOp(Loc, BO_Comma, Update.get(),
+                                            UpdateVal.get());
+      }
+    }
+    SemaRef.getDiagnostics().setSuppressAllDiagnostics(Suppress);
   }
-  Update = SemaRef.BuildBinOp(S, Loc, (Subtract ? BO_Sub : BO_Add),
-                              NewStart.get(), Update.get());
-  if (!Update.isUsable())
-    return ExprError();
 
-  Update = SemaRef.PerformImplicitConversion(
-      Update.get(), VarRef.get()->getType(), Sema::AA_Converting, true);
-  if (!Update.isUsable())
-    return ExprError();
+  // Second attempt: try to build 'VarRef = Start (+|-) Iter * Step'.
+  if (!Update.isUsable() || !UpdateVal.isUsable()) {
+    Update = SemaRef.BuildBinOp(S, Loc, Subtract ? BO_Sub : BO_Add,
+                                NewStart.get(), SavedUpdate.get());
+    if (!Update.isUsable())
+      return ExprError();
 
-  Update = SemaRef.BuildBinOp(S, Loc, BO_Assign, VarRef.get(), Update.get());
+    if (!SemaRef.Context.hasSameType(Update.get()->getType(),
+                                     VarRef.get()->getType())) {
+      Update = SemaRef.PerformImplicitConversion(
+          Update.get(), VarRef.get()->getType(), Sema::AA_Converting, true);
+      if (!Update.isUsable())
+        return ExprError();
+    }
+
+    Update = SemaRef.BuildBinOp(S, Loc, BO_Assign, VarRef.get(), Update.get());
+  }
   return Update;
 }
 
@@ -3736,6 +4881,49 @@ static bool FitsInto(unsigned Bits, bool Signed, Expr *E, Sema &SemaRef) {
   return false;
 }
 
+/// Build preinits statement for the given declarations.
+static Stmt *buildPreInits(ASTContext &Context,
+                           SmallVectorImpl<Decl *> &PreInits) {
+  if (!PreInits.empty()) {
+    return new (Context) DeclStmt(
+        DeclGroupRef::Create(Context, PreInits.begin(), PreInits.size()),
+        SourceLocation(), SourceLocation());
+  }
+  return nullptr;
+}
+
+/// Build preinits statement for the given declarations.
+static Stmt *buildPreInits(ASTContext &Context,
+                           llvm::MapVector<Expr *, DeclRefExpr *> &Captures) {
+  if (!Captures.empty()) {
+    SmallVector<Decl *, 16> PreInits;
+    for (auto &Pair : Captures)
+      PreInits.push_back(Pair.second->getDecl());
+    return buildPreInits(Context, PreInits);
+  }
+  return nullptr;
+}
+
+/// Build postupdate expression for the given list of postupdates expressions.
+static Expr *buildPostUpdate(Sema &S, ArrayRef<Expr *> PostUpdates) {
+  Expr *PostUpdate = nullptr;
+  if (!PostUpdates.empty()) {
+    for (auto *E : PostUpdates) {
+      Expr *ConvE = S.BuildCStyleCastExpr(
+                         E->getExprLoc(),
+                         S.Context.getTrivialTypeSourceInfo(S.Context.VoidTy),
+                         E->getExprLoc(), E)
+                        .get();
+      PostUpdate = PostUpdate
+                       ? S.CreateBuiltinBinOp(ConvE->getExprLoc(), BO_Comma,
+                                              PostUpdate, ConvE)
+                             .get()
+                       : ConvE;
+    }
+  }
+  return PostUpdate;
+}
+
 /// \brief Called on a for stmt to check itself and nested loops (if any).
 /// \return Returns 0 if one of the collapsed stmts is not canonical for loop,
 /// number of collapsed loops otherwise.
@@ -3743,7 +4931,7 @@ static unsigned
 CheckOpenMPLoop(OpenMPDirectiveKind DKind, Expr *CollapseLoopCountExpr,
                 Expr *OrderedLoopCountExpr, Stmt *AStmt, Sema &SemaRef,
                 DSAStackTy &DSA,
-                llvm::DenseMap<VarDecl *, Expr *> &VarsWithImplicitDSA,
+                llvm::DenseMap<ValueDecl *, Expr *> &VarsWithImplicitDSA,
                 OMPLoopDirective::HelperExprs &Built) {
   unsigned NestedLoopCount = 1;
   if (CollapseLoopCountExpr) {
@@ -3769,6 +4957,7 @@ CheckOpenMPLoop(OpenMPDirectiveKind DKind, Expr *CollapseLoopCountExpr,
   }
   // This is helper routine for loop directives (e.g., 'for', 'simd',
   // 'for simd', etc.).
+  llvm::MapVector<Expr *, DeclRefExpr *> Captures;
   SmallVector<LoopIterationSpace, 4> IterSpaces;
   IterSpaces.resize(NestedLoopCount);
   Stmt *CurStmt = AStmt->IgnoreContainers(/* IgnoreCaptured */ true);
@@ -3776,7 +4965,7 @@ CheckOpenMPLoop(OpenMPDirectiveKind DKind, Expr *CollapseLoopCountExpr,
     if (CheckOpenMPIterationSpace(DKind, CurStmt, SemaRef, DSA, Cnt,
                                   NestedLoopCount, CollapseLoopCountExpr,
                                   OrderedLoopCountExpr, VarsWithImplicitDSA,
-                                  IterSpaces[Cnt]))
+                                  IterSpaces[Cnt], Captures))
       return 0;
     // Move on to the next nested for loop, or to the loop body.
     // OpenMP [2.8.1, simd construct, Restrictions]
@@ -3877,6 +5066,15 @@ CheckOpenMPLoop(OpenMPDirectiveKind DKind, Expr *CollapseLoopCountExpr,
            LastIteration32.get()->getType()->hasSignedIntegerRepresentation(),
            LastIteration64.get(), SemaRef)))
     LastIteration = LastIteration32;
+  QualType VType = LastIteration.get()->getType();
+  QualType RealVType = VType;
+  QualType StrideVType = VType;
+  if (isOpenMPTaskLoopDirective(DKind)) {
+    VType =
+        SemaRef.Context.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
+    StrideVType =
+        SemaRef.Context.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
+  }
 
   if (!LastIteration.isUsable())
     return 0;
@@ -3898,19 +5096,13 @@ CheckOpenMPLoop(OpenMPDirectiveKind DKind, Expr *CollapseLoopCountExpr,
       LastIteration.get()->isIntegerConstantExpr(Result, SemaRef.Context);
   ExprResult CalcLastIteration;
   if (!IsConstant) {
-    SourceLocation SaveLoc;
-    VarDecl *SaveVar =
-        buildVarDecl(SemaRef, SaveLoc, LastIteration.get()->getType(),
-                     ".omp.last.iteration");
-    ExprResult SaveRef = buildDeclRefExpr(
-        SemaRef, SaveVar, LastIteration.get()->getType(), SaveLoc);
-    CalcLastIteration = SemaRef.BuildBinOp(CurScope, SaveLoc, BO_Assign,
-                                           SaveRef.get(), LastIteration.get());
+    ExprResult SaveRef =
+        tryBuildCapture(SemaRef, LastIteration.get(), Captures);
     LastIteration = SaveRef;
 
     // Prepare SaveRef + 1.
     NumIterations = SemaRef.BuildBinOp(
-        CurScope, SaveLoc, BO_Add, SaveRef.get(),
+        CurScope, SourceLocation(), BO_Add, SaveRef.get(),
         SemaRef.ActOnIntegerConstant(SourceLocation(), 1).get());
     if (!NumIterations.isUsable())
       return 0;
@@ -3918,9 +5110,8 @@ CheckOpenMPLoop(OpenMPDirectiveKind DKind, Expr *CollapseLoopCountExpr,
 
   SourceLocation InitLoc = IterSpaces[0].InitSrcRange.getBegin();
 
-  QualType VType = LastIteration.get()->getType();
   // Build variables passed into runtime, nesessary for worksharing directives.
-  ExprResult LB, UB, IL, ST, EUB;
+  ExprResult LB, UB, IL, ST, EUB, PrevLB, PrevUB;
   if (isOpenMPWorksharingDirective(DKind) || isOpenMPTaskLoopDirective(DKind) ||
       isOpenMPDistributeDirective(DKind)) {
     // Lower bound variable, initialized with zero.
@@ -3947,8 +5138,9 @@ CheckOpenMPLoop(OpenMPDirectiveKind DKind, Expr *CollapseLoopCountExpr,
         /*DirectInit*/ false, /*TypeMayContainAuto*/ false);
 
     // Stride variable returned by runtime (we initialize it to 1 by default).
-    VarDecl *STDecl = buildVarDecl(SemaRef, InitLoc, VType, ".omp.stride");
-    ST = buildDeclRefExpr(SemaRef, STDecl, VType, InitLoc);
+    VarDecl *STDecl =
+        buildVarDecl(SemaRef, InitLoc, StrideVType, ".omp.stride");
+    ST = buildDeclRefExpr(SemaRef, STDecl, StrideVType, InitLoc);
     SemaRef.AddInitializerToDecl(
         STDecl, SemaRef.ActOnIntegerConstant(InitLoc, 1).get(),
         /*DirectInit*/ false, /*TypeMayContainAuto*/ false);
@@ -3962,14 +5154,39 @@ CheckOpenMPLoop(OpenMPDirectiveKind DKind, Expr *CollapseLoopCountExpr,
     EUB = SemaRef.BuildBinOp(CurScope, InitLoc, BO_Assign, UB.get(),
                              CondOp.get());
     EUB = SemaRef.ActOnFinishFullExpr(EUB.get());
+
+    // If we have a combined directive that combines 'distribute', 'for' or
+    // 'simd' we need to be able to access the bounds of the schedule of the
+    // enclosing region. E.g. in 'distribute parallel for' the bounds obtained
+    // by scheduling 'distribute' have to be passed to the schedule of 'for'.
+    if (isOpenMPLoopBoundSharingDirective(DKind)) {
+      auto *CD = cast<CapturedStmt>(AStmt)->getCapturedDecl();
+
+      // We expect to have at least 2 more parameters than the 'parallel'
+      // directive does - the lower and upper bounds of the previous schedule.
+      assert(CD->getNumParams() >= 4 &&
+             "Unexpected number of parameters in loop combined directive");
+
+      // Set the proper type for the bounds given what we learned from the
+      // enclosed loops.
+      auto *PrevLBDecl = CD->getParam(/*PrevLB=*/2);
+      auto *PrevUBDecl = CD->getParam(/*PrevUB=*/3);
+
+      // Previous lower and upper bounds are obtained from the region
+      // parameters.
+      PrevLB =
+          buildDeclRefExpr(SemaRef, PrevLBDecl, PrevLBDecl->getType(), InitLoc);
+      PrevUB =
+          buildDeclRefExpr(SemaRef, PrevUBDecl, PrevUBDecl->getType(), InitLoc);
+    }
   }
 
   // Build the iteration variable and its initialization before loop.
   ExprResult IV;
   ExprResult Init;
   {
-    VarDecl *IVDecl = buildVarDecl(SemaRef, InitLoc, VType, ".omp.iv");
-    IV = buildDeclRefExpr(SemaRef, IVDecl, VType, InitLoc);
+    VarDecl *IVDecl = buildVarDecl(SemaRef, InitLoc, RealVType, ".omp.iv");
+    IV = buildDeclRefExpr(SemaRef, IVDecl, RealVType, InitLoc);
     Expr *RHS = (isOpenMPWorksharingDirective(DKind) ||
                  isOpenMPTaskLoopDirective(DKind) ||
                  isOpenMPDistributeDirective(DKind))
@@ -4033,6 +5250,7 @@ CheckOpenMPLoop(OpenMPDirectiveKind DKind, Expr *CollapseLoopCountExpr,
   Built.Inits.resize(NestedLoopCount);
   Built.Updates.resize(NestedLoopCount);
   Built.Finals.resize(NestedLoopCount);
+  SmallVector<Expr *, 4> LoopMultipliers;
   {
     ExprResult Div;
     // Go from inner nested loop to outer.
@@ -4060,19 +5278,19 @@ CheckOpenMPLoop(OpenMPDirectiveKind DKind, Expr *CollapseLoopCountExpr,
       }
 
       // Build update: IS.CounterVar(Private) = IS.Start + Iter * IS.Step
-      auto *CounterVar = buildDeclRefExpr(
-          SemaRef, cast<VarDecl>(cast<DeclRefExpr>(IS.CounterVar)->getDecl()),
-          IS.CounterVar->getType(), IS.CounterVar->getExprLoc(),
-          /*RefersToCapture=*/true);
+      auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IS.CounterVar)->getDecl());
+      auto *CounterVar = buildDeclRefExpr(SemaRef, VD, IS.CounterVar->getType(),
+                                          IS.CounterVar->getExprLoc(),
+                                          /*RefersToCapture=*/true);
       ExprResult Init = BuildCounterInit(SemaRef, CurScope, UpdLoc, CounterVar,
-                                         IS.CounterInit);
+                                         IS.CounterInit, Captures);
       if (!Init.isUsable()) {
         HasErrors = true;
         break;
       }
-      ExprResult Update =
-          BuildCounterUpdate(SemaRef, CurScope, UpdLoc, CounterVar,
-                             IS.CounterInit, Iter, IS.CounterStep, IS.Subtract);
+      ExprResult Update = BuildCounterUpdate(
+          SemaRef, CurScope, UpdLoc, CounterVar, IS.CounterInit, Iter,
+          IS.CounterStep, IS.Subtract, &Captures);
       if (!Update.isUsable()) {
         HasErrors = true;
         break;
@@ -4081,7 +5299,7 @@ CheckOpenMPLoop(OpenMPDirectiveKind DKind, Expr *CollapseLoopCountExpr,
       // Build final: IS.CounterVar = IS.Start + IS.NumIters * IS.Step
       ExprResult Final = BuildCounterUpdate(
           SemaRef, CurScope, UpdLoc, CounterVar, IS.CounterInit,
-          IS.NumIterations, IS.CounterStep, IS.Subtract);
+          IS.NumIterations, IS.CounterStep, IS.Subtract, &Captures);
       if (!Final.isUsable()) {
         HasErrors = true;
         break;
@@ -4097,11 +5315,12 @@ CheckOpenMPLoop(OpenMPDirectiveKind DKind, Expr *CollapseLoopCountExpr,
 
         // Add parentheses (for debugging purposes only).
         if (Div.isUsable())
-          Div = SemaRef.ActOnParenExpr(UpdLoc, UpdLoc, Div.get());
+          Div = tryBuildCapture(SemaRef, Div.get(), Captures);
         if (!Div.isUsable()) {
           HasErrors = true;
           break;
         }
+        LoopMultipliers.push_back(Div.get());
       }
       if (!Update.isUsable() || !Final.isUsable()) {
         HasErrors = true;
@@ -4126,6 +5345,7 @@ CheckOpenMPLoop(OpenMPDirectiveKind DKind, Expr *CollapseLoopCountExpr,
   Built.CalcLastIteration =
       SemaRef.ActOnFinishFullExpr(CalcLastIteration.get()).get();
   Built.PreCond = PreCond.get();
+  Built.PreInits = buildPreInits(C, Captures);
   Built.Cond = Cond.get();
   Built.Init = Init.get();
   Built.Inc = Inc.get();
@@ -4136,6 +5356,56 @@ CheckOpenMPLoop(OpenMPDirectiveKind DKind, Expr *CollapseLoopCountExpr,
   Built.EUB = EUB.get();
   Built.NLB = NextLB.get();
   Built.NUB = NextUB.get();
+  Built.PrevLB = PrevLB.get();
+  Built.PrevUB = PrevUB.get();
+
+  Expr *CounterVal = SemaRef.DefaultLvalueConversion(IV.get()).get();
+  // Fill data for doacross depend clauses.
+  for (auto Pair : DSA.getDoacrossDependClauses()) {
+    if (Pair.first->getDependencyKind() == OMPC_DEPEND_source)
+      Pair.first->setCounterValue(CounterVal);
+    else {
+      if (NestedLoopCount != Pair.second.size() ||
+          NestedLoopCount != LoopMultipliers.size() + 1) {
+        // Erroneous case - clause has some problems.
+        Pair.first->setCounterValue(CounterVal);
+        continue;
+      }
+      assert(Pair.first->getDependencyKind() == OMPC_DEPEND_sink);
+      auto I = Pair.second.rbegin();
+      auto IS = IterSpaces.rbegin();
+      auto ILM = LoopMultipliers.rbegin();
+      Expr *UpCounterVal = CounterVal;
+      Expr *Multiplier = nullptr;
+      for (int Cnt = NestedLoopCount - 1; Cnt >= 0; --Cnt) {
+        if (I->first) {
+          assert(IS->CounterStep);
+          Expr *NormalizedOffset =
+              SemaRef
+                  .BuildBinOp(CurScope, I->first->getExprLoc(), BO_Div,
+                              I->first, IS->CounterStep)
+                  .get();
+          if (Multiplier) {
+            NormalizedOffset =
+                SemaRef
+                    .BuildBinOp(CurScope, I->first->getExprLoc(), BO_Mul,
+                                NormalizedOffset, Multiplier)
+                    .get();
+          }
+          assert(I->second == OO_Plus || I->second == OO_Minus);
+          BinaryOperatorKind BOK = (I->second == OO_Plus) ? BO_Add : BO_Sub;
+          UpCounterVal =
+              SemaRef.BuildBinOp(CurScope, I->first->getExprLoc(), BOK,
+                                 UpCounterVal, NormalizedOffset).get();
+        }
+        Multiplier = *ILM;
+        ++I;
+        ++IS;
+        ++ILM;
+      }
+      Pair.first->setCounterValue(UpCounterVal);
+    }
+  }
 
   return NestedLoopCount;
 }
@@ -4156,26 +5426,44 @@ static Expr *getOrderedNumberExpr(ArrayRef<OMPClause *> Clauses) {
   return nullptr;
 }
 
-static bool checkSimdlenSafelenValues(Sema &S, const Expr *Simdlen,
-                                      const Expr *Safelen) {
-  llvm::APSInt SimdlenRes, SafelenRes;
-  if (Simdlen->isValueDependent() || Simdlen->isTypeDependent() ||
-      Simdlen->isInstantiationDependent() ||
-      Simdlen->containsUnexpandedParameterPack())
-    return false;
-  if (Safelen->isValueDependent() || Safelen->isTypeDependent() ||
-      Safelen->isInstantiationDependent() ||
-      Safelen->containsUnexpandedParameterPack())
-    return false;
-  Simdlen->EvaluateAsInt(SimdlenRes, S.Context);
-  Safelen->EvaluateAsInt(SafelenRes, S.Context);
-  // OpenMP 4.1 [2.8.1, simd Construct, Restrictions]
-  // If both simdlen and safelen clauses are specified, the value of the simdlen
-  // parameter must be less than or equal to the value of the safelen parameter.
-  if (SimdlenRes > SafelenRes) {
-    S.Diag(Simdlen->getExprLoc(), diag::err_omp_wrong_simdlen_safelen_values)
-        << Simdlen->getSourceRange() << Safelen->getSourceRange();
-    return true;
+static bool checkSimdlenSafelenSpecified(Sema &S,
+                                         const ArrayRef<OMPClause *> Clauses) {
+  OMPSafelenClause *Safelen = nullptr;
+  OMPSimdlenClause *Simdlen = nullptr;
+
+  for (auto *Clause : Clauses) {
+    if (Clause->getClauseKind() == OMPC_safelen)
+      Safelen = cast<OMPSafelenClause>(Clause);
+    else if (Clause->getClauseKind() == OMPC_simdlen)
+      Simdlen = cast<OMPSimdlenClause>(Clause);
+    if (Safelen && Simdlen)
+      break;
+  }
+
+  if (Simdlen && Safelen) {
+    llvm::APSInt SimdlenRes, SafelenRes;
+    auto SimdlenLength = Simdlen->getSimdlen();
+    auto SafelenLength = Safelen->getSafelen();
+    if (SimdlenLength->isValueDependent() || SimdlenLength->isTypeDependent() ||
+        SimdlenLength->isInstantiationDependent() ||
+        SimdlenLength->containsUnexpandedParameterPack())
+      return false;
+    if (SafelenLength->isValueDependent() || SafelenLength->isTypeDependent() ||
+        SafelenLength->isInstantiationDependent() ||
+        SafelenLength->containsUnexpandedParameterPack())
+      return false;
+    SimdlenLength->EvaluateAsInt(SimdlenRes, S.Context);
+    SafelenLength->EvaluateAsInt(SafelenRes, S.Context);
+    // OpenMP 4.5 [2.8.1, simd Construct, Restrictions]
+    // If both simdlen and safelen clauses are specified, the value of the
+    // simdlen parameter must be less than or equal to the value of the safelen
+    // parameter.
+    if (SimdlenRes > SafelenRes) {
+      S.Diag(SimdlenLength->getExprLoc(),
+             diag::err_omp_wrong_simdlen_safelen_values)
+          << SimdlenLength->getSourceRange() << SafelenLength->getSourceRange();
+      return true;
+    }
   }
   return false;
 }
@@ -4183,7 +5471,7 @@ static bool checkSimdlenSafelenValues(Sema &S, const Expr *Simdlen,
 StmtResult Sema::ActOnOpenMPSimdDirective(
     ArrayRef<OMPClause *> Clauses, Stmt *AStmt, SourceLocation StartLoc,
     SourceLocation EndLoc,
-    llvm::DenseMap<VarDecl *, Expr *> &VarsWithImplicitDSA) {
+    llvm::DenseMap<ValueDecl *, Expr *> &VarsWithImplicitDSA) {
   if (!AStmt)
     return StmtError();
 
@@ -4205,27 +5493,13 @@ StmtResult Sema::ActOnOpenMPSimdDirective(
     for (auto C : Clauses) {
       if (auto LC = dyn_cast<OMPLinearClause>(C))
         if (FinishOpenMPLinearClause(*LC, cast<DeclRefExpr>(B.IterationVarRef),
-                                     B.NumIterations, *this, CurScope))
+                                     B.NumIterations, *this, CurScope,
+                                     DSAStack))
           return StmtError();
     }
   }
 
-  // OpenMP 4.1 [2.8.1, simd Construct, Restrictions]
-  // If both simdlen and safelen clauses are specified, the value of the simdlen
-  // parameter must be less than or equal to the value of the safelen parameter.
-  OMPSafelenClause *Safelen = nullptr;
-  OMPSimdlenClause *Simdlen = nullptr;
-  for (auto *Clause : Clauses) {
-    if (Clause->getClauseKind() == OMPC_safelen)
-      Safelen = cast<OMPSafelenClause>(Clause);
-    else if (Clause->getClauseKind() == OMPC_simdlen)
-      Simdlen = cast<OMPSimdlenClause>(Clause);
-    if (Safelen && Simdlen)
-      break;
-  }
-  if (Simdlen && Safelen &&
-      checkSimdlenSafelenValues(*this, Simdlen->getSimdlen(),
-                                Safelen->getSafelen()))
+  if (checkSimdlenSafelenSpecified(*this, Clauses))
     return StmtError();
 
   getCurFunction()->setHasBranchProtectedScope();
@@ -4236,7 +5510,7 @@ StmtResult Sema::ActOnOpenMPSimdDirective(
 StmtResult Sema::ActOnOpenMPForDirective(
     ArrayRef<OMPClause *> Clauses, Stmt *AStmt, SourceLocation StartLoc,
     SourceLocation EndLoc,
-    llvm::DenseMap<VarDecl *, Expr *> &VarsWithImplicitDSA) {
+    llvm::DenseMap<ValueDecl *, Expr *> &VarsWithImplicitDSA) {
   if (!AStmt)
     return StmtError();
 
@@ -4258,7 +5532,8 @@ StmtResult Sema::ActOnOpenMPForDirective(
     for (auto C : Clauses) {
       if (auto LC = dyn_cast<OMPLinearClause>(C))
         if (FinishOpenMPLinearClause(*LC, cast<DeclRefExpr>(B.IterationVarRef),
-                                     B.NumIterations, *this, CurScope))
+                                     B.NumIterations, *this, CurScope,
+                                     DSAStack))
           return StmtError();
     }
   }
@@ -4271,7 +5546,7 @@ StmtResult Sema::ActOnOpenMPForDirective(
 StmtResult Sema::ActOnOpenMPForSimdDirective(
     ArrayRef<OMPClause *> Clauses, Stmt *AStmt, SourceLocation StartLoc,
     SourceLocation EndLoc,
-    llvm::DenseMap<VarDecl *, Expr *> &VarsWithImplicitDSA) {
+    llvm::DenseMap<ValueDecl *, Expr *> &VarsWithImplicitDSA) {
   if (!AStmt)
     return StmtError();
 
@@ -4294,27 +5569,13 @@ StmtResult Sema::ActOnOpenMPForSimdDirective(
     for (auto C : Clauses) {
       if (auto LC = dyn_cast<OMPLinearClause>(C))
         if (FinishOpenMPLinearClause(*LC, cast<DeclRefExpr>(B.IterationVarRef),
-                                     B.NumIterations, *this, CurScope))
+                                     B.NumIterations, *this, CurScope,
+                                     DSAStack))
           return StmtError();
     }
   }
 
-  // OpenMP 4.1 [2.8.1, simd Construct, Restrictions]
-  // If both simdlen and safelen clauses are specified, the value of the simdlen
-  // parameter must be less than or equal to the value of the safelen parameter.
-  OMPSafelenClause *Safelen = nullptr;
-  OMPSimdlenClause *Simdlen = nullptr;
-  for (auto *Clause : Clauses) {
-    if (Clause->getClauseKind() == OMPC_safelen)
-      Safelen = cast<OMPSafelenClause>(Clause);
-    else if (Clause->getClauseKind() == OMPC_simdlen)
-      Simdlen = cast<OMPSimdlenClause>(Clause);
-    if (Safelen && Simdlen)
-      break;
-  }
-  if (Simdlen && Safelen &&
-      checkSimdlenSafelenValues(*this, Simdlen->getSimdlen(),
-                                Safelen->getSafelen()))
+  if (checkSimdlenSafelenSpecified(*this, Clauses))
     return StmtError();
 
   getCurFunction()->setHasBranchProtectedScope();
@@ -4480,7 +5741,7 @@ StmtResult Sema::ActOnOpenMPCriticalDirective(
 StmtResult Sema::ActOnOpenMPParallelForDirective(
     ArrayRef<OMPClause *> Clauses, Stmt *AStmt, SourceLocation StartLoc,
     SourceLocation EndLoc,
-    llvm::DenseMap<VarDecl *, Expr *> &VarsWithImplicitDSA) {
+    llvm::DenseMap<ValueDecl *, Expr *> &VarsWithImplicitDSA) {
   if (!AStmt)
     return StmtError();
 
@@ -4510,7 +5771,8 @@ StmtResult Sema::ActOnOpenMPParallelForDirective(
     for (auto C : Clauses) {
       if (auto LC = dyn_cast<OMPLinearClause>(C))
         if (FinishOpenMPLinearClause(*LC, cast<DeclRefExpr>(B.IterationVarRef),
-                                     B.NumIterations, *this, CurScope))
+                                     B.NumIterations, *this, CurScope,
+                                     DSAStack))
           return StmtError();
     }
   }
@@ -4524,7 +5786,7 @@ StmtResult Sema::ActOnOpenMPParallelForDirective(
 StmtResult Sema::ActOnOpenMPParallelForSimdDirective(
     ArrayRef<OMPClause *> Clauses, Stmt *AStmt, SourceLocation StartLoc,
     SourceLocation EndLoc,
-    llvm::DenseMap<VarDecl *, Expr *> &VarsWithImplicitDSA) {
+    llvm::DenseMap<ValueDecl *, Expr *> &VarsWithImplicitDSA) {
   if (!AStmt)
     return StmtError();
 
@@ -4551,27 +5813,13 @@ StmtResult Sema::ActOnOpenMPParallelForSimdDirective(
     for (auto C : Clauses) {
       if (auto LC = dyn_cast<OMPLinearClause>(C))
         if (FinishOpenMPLinearClause(*LC, cast<DeclRefExpr>(B.IterationVarRef),
-                                     B.NumIterations, *this, CurScope))
+                                     B.NumIterations, *this, CurScope,
+                                     DSAStack))
           return StmtError();
     }
   }
 
-  // OpenMP 4.1 [2.8.1, simd Construct, Restrictions]
-  // If both simdlen and safelen clauses are specified, the value of the simdlen
-  // parameter must be less than or equal to the value of the safelen parameter.
-  OMPSafelenClause *Safelen = nullptr;
-  OMPSimdlenClause *Simdlen = nullptr;
-  for (auto *Clause : Clauses) {
-    if (Clause->getClauseKind() == OMPC_safelen)
-      Safelen = cast<OMPSafelenClause>(Clause);
-    else if (Clause->getClauseKind() == OMPC_simdlen)
-      Simdlen = cast<OMPSimdlenClause>(Clause);
-    if (Safelen && Simdlen)
-      break;
-  }
-  if (Simdlen && Safelen &&
-      checkSimdlenSafelenValues(*this, Simdlen->getSimdlen(),
-                                Safelen->getSafelen()))
+  if (checkSimdlenSafelenSpecified(*this, Clauses))
     return StmtError();
 
   getCurFunction()->setHasBranchProtectedScope();
@@ -5453,6 +6701,9 @@ StmtResult Sema::ActOnOpenMPTargetDirective(ArrayRef<OMPClause *> Clauses,
       }
       assert(I != CS->body_end() && "Not found statement");
       S = *I;
+    } else {
+      auto *OED = dyn_cast<OMPExecutableDirective>(S);
+      OMPTeamsFound = OED && isOpenMPTeamsDirective(OED->getDirectiveKind());
     }
     if (!OMPTeamsFound) {
       Diag(StartLoc, diag::err_omp_target_contains_not_only_teams);
@@ -5469,6 +6720,84 @@ StmtResult Sema::ActOnOpenMPTargetDirective(ArrayRef<OMPClause *> Clauses,
   return OMPTargetDirective::Create(Context, StartLoc, EndLoc, Clauses, AStmt);
 }
 
+StmtResult
+Sema::ActOnOpenMPTargetParallelDirective(ArrayRef<OMPClause *> Clauses,
+                                         Stmt *AStmt, SourceLocation StartLoc,
+                                         SourceLocation EndLoc) {
+  if (!AStmt)
+    return StmtError();
+
+  CapturedStmt *CS = cast<CapturedStmt>(AStmt);
+  // 1.2.2 OpenMP Language Terminology
+  // Structured block - An executable statement with a single entry at the
+  // top and a single exit at the bottom.
+  // The point of exit cannot be a branch out of the structured block.
+  // longjmp() and throw() must not violate the entry/exit criteria.
+  CS->getCapturedDecl()->setNothrow();
+
+  getCurFunction()->setHasBranchProtectedScope();
+
+  return OMPTargetParallelDirective::Create(Context, StartLoc, EndLoc, Clauses,
+                                            AStmt);
+}
+
+StmtResult Sema::ActOnOpenMPTargetParallelForDirective(
+    ArrayRef<OMPClause *> Clauses, Stmt *AStmt, SourceLocation StartLoc,
+    SourceLocation EndLoc,
+    llvm::DenseMap<ValueDecl *, Expr *> &VarsWithImplicitDSA) {
+  if (!AStmt)
+    return StmtError();
+
+  CapturedStmt *CS = cast<CapturedStmt>(AStmt);
+  // 1.2.2 OpenMP Language Terminology
+  // Structured block - An executable statement with a single entry at the
+  // top and a single exit at the bottom.
+  // The point of exit cannot be a branch out of the structured block.
+  // longjmp() and throw() must not violate the entry/exit criteria.
+  CS->getCapturedDecl()->setNothrow();
+
+  OMPLoopDirective::HelperExprs B;
+  // In presence of clause 'collapse' or 'ordered' with number of loops, it will
+  // define the nested loops number.
+  unsigned NestedLoopCount =
+      CheckOpenMPLoop(OMPD_target_parallel_for, getCollapseNumberExpr(Clauses),
+                      getOrderedNumberExpr(Clauses), AStmt, *this, *DSAStack,
+                      VarsWithImplicitDSA, B);
+  if (NestedLoopCount == 0)
+    return StmtError();
+
+  assert((CurContext->isDependentContext() || B.builtAll()) &&
+         "omp target parallel for loop exprs were not built");
+
+  if (!CurContext->isDependentContext()) {
+    // Finalize the clauses that need pre-built expressions for CodeGen.
+    for (auto C : Clauses) {
+      if (auto LC = dyn_cast<OMPLinearClause>(C))
+        if (FinishOpenMPLinearClause(*LC, cast<DeclRefExpr>(B.IterationVarRef),
+                                     B.NumIterations, *this, CurScope,
+                                     DSAStack))
+          return StmtError();
+    }
+  }
+
+  getCurFunction()->setHasBranchProtectedScope();
+  return OMPTargetParallelForDirective::Create(Context, StartLoc, EndLoc,
+                                               NestedLoopCount, Clauses, AStmt,
+                                               B, DSAStack->isCancelRegion());
+}
+
+/// \brief Check for existence of a map clause in the list of clauses.
+static bool HasMapClause(ArrayRef<OMPClause *> Clauses) {
+  for (ArrayRef<OMPClause *>::iterator I = Clauses.begin(), E = Clauses.end();
+       I != E; ++I) {
+    if (*I != nullptr && (*I)->getClauseKind() == OMPC_map) {
+      return true;
+    }
+  }
+
+  return false;
+}
+
 StmtResult Sema::ActOnOpenMPTargetDataDirective(ArrayRef<OMPClause *> Clauses,
                                                 Stmt *AStmt,
                                                 SourceLocation StartLoc,
@@ -5478,12 +6807,66 @@ StmtResult Sema::ActOnOpenMPTargetDataDirective(ArrayRef<OMPClause *> Clauses,
 
   assert(isa<CapturedStmt>(AStmt) && "Captured statement expected");
 
+  // OpenMP [2.10.1, Restrictions, p. 97]
+  // At least one map clause must appear on the directive.
+  if (!HasMapClause(Clauses)) {
+    Diag(StartLoc, diag::err_omp_no_map_for_directive) <<
+        getOpenMPDirectiveName(OMPD_target_data);
+    return StmtError();
+  }
+
   getCurFunction()->setHasBranchProtectedScope();
 
   return OMPTargetDataDirective::Create(Context, StartLoc, EndLoc, Clauses,
                                         AStmt);
 }
 
+StmtResult
+Sema::ActOnOpenMPTargetEnterDataDirective(ArrayRef<OMPClause *> Clauses,
+                                          SourceLocation StartLoc,
+                                          SourceLocation EndLoc) {
+  // OpenMP [2.10.2, Restrictions, p. 99]
+  // At least one map clause must appear on the directive.
+  if (!HasMapClause(Clauses)) {
+    Diag(StartLoc, diag::err_omp_no_map_for_directive)
+        << getOpenMPDirectiveName(OMPD_target_enter_data);
+    return StmtError();
+  }
+
+  return OMPTargetEnterDataDirective::Create(Context, StartLoc, EndLoc,
+                                             Clauses);
+}
+
+StmtResult
+Sema::ActOnOpenMPTargetExitDataDirective(ArrayRef<OMPClause *> Clauses,
+                                         SourceLocation StartLoc,
+                                         SourceLocation EndLoc) {
+  // OpenMP [2.10.3, Restrictions, p. 102]
+  // At least one map clause must appear on the directive.
+  if (!HasMapClause(Clauses)) {
+    Diag(StartLoc, diag::err_omp_no_map_for_directive)
+        << getOpenMPDirectiveName(OMPD_target_exit_data);
+    return StmtError();
+  }
+
+  return OMPTargetExitDataDirective::Create(Context, StartLoc, EndLoc, Clauses);
+}
+
+StmtResult Sema::ActOnOpenMPTargetUpdateDirective(ArrayRef<OMPClause *> Clauses,
+                                                  SourceLocation StartLoc,
+                                                  SourceLocation EndLoc) {
+  bool seenMotionClause = false;
+  for (auto *C : Clauses) {
+    if (C->getClauseKind() == OMPC_to || C->getClauseKind() == OMPC_from)
+      seenMotionClause = true;
+  }
+  if (!seenMotionClause) {
+    Diag(StartLoc, diag::err_omp_at_least_one_motion_clause_required);
+    return StmtError();
+  }
+  return OMPTargetUpdateDirective::Create(Context, StartLoc, EndLoc, Clauses);
+}
+
 StmtResult Sema::ActOnOpenMPTeamsDirective(ArrayRef<OMPClause *> Clauses,
                                            Stmt *AStmt, SourceLocation StartLoc,
                                            SourceLocation EndLoc) {
@@ -5575,7 +6958,7 @@ static bool checkGrainsizeNumTasksClauses(Sema &S,
 StmtResult Sema::ActOnOpenMPTaskLoopDirective(
     ArrayRef<OMPClause *> Clauses, Stmt *AStmt, SourceLocation StartLoc,
     SourceLocation EndLoc,
-    llvm::DenseMap<VarDecl *, Expr *> &VarsWithImplicitDSA) {
+    llvm::DenseMap<ValueDecl *, Expr *> &VarsWithImplicitDSA) {
   if (!AStmt)
     return StmtError();
 
@@ -5607,7 +6990,7 @@ StmtResult Sema::ActOnOpenMPTaskLoopDirective(
 StmtResult Sema::ActOnOpenMPTaskLoopSimdDirective(
     ArrayRef<OMPClause *> Clauses, Stmt *AStmt, SourceLocation StartLoc,
     SourceLocation EndLoc,
-    llvm::DenseMap<VarDecl *, Expr *> &VarsWithImplicitDSA) {
+    llvm::DenseMap<ValueDecl *, Expr *> &VarsWithImplicitDSA) {
   if (!AStmt)
     return StmtError();
 
@@ -5625,6 +7008,17 @@ StmtResult Sema::ActOnOpenMPTaskLoopSimdDirective(
   assert((CurContext->isDependentContext() || B.builtAll()) &&
          "omp for loop exprs were not built");
 
+  if (!CurContext->isDependentContext()) {
+    // Finalize the clauses that need pre-built expressions for CodeGen.
+    for (auto C : Clauses) {
+      if (auto LC = dyn_cast<OMPLinearClause>(C))
+        if (FinishOpenMPLinearClause(*LC, cast<DeclRefExpr>(B.IterationVarRef),
+                                     B.NumIterations, *this, CurScope,
+                                     DSAStack))
+          return StmtError();
+    }
+  }
+
   // OpenMP, [2.9.2 taskloop Construct, Restrictions]
   // The grainsize clause and num_tasks clause are mutually exclusive and may
   // not appear on the same taskloop directive.
@@ -5639,7 +7033,7 @@ StmtResult Sema::ActOnOpenMPTaskLoopSimdDirective(
 StmtResult Sema::ActOnOpenMPDistributeDirective(
     ArrayRef<OMPClause *> Clauses, Stmt *AStmt, SourceLocation StartLoc,
     SourceLocation EndLoc,
-    llvm::DenseMap<VarDecl *, Expr *> &VarsWithImplicitDSA) {
+    llvm::DenseMap<ValueDecl *, Expr *> &VarsWithImplicitDSA) {
   if (!AStmt)
     return StmtError();
 
@@ -5662,6 +7056,157 @@ StmtResult Sema::ActOnOpenMPDistributeDirective(
                                         NestedLoopCount, Clauses, AStmt, B);
 }
 
+StmtResult Sema::ActOnOpenMPDistributeParallelForDirective(
+    ArrayRef<OMPClause *> Clauses, Stmt *AStmt, SourceLocation StartLoc,
+    SourceLocation EndLoc,
+    llvm::DenseMap<ValueDecl *, Expr *> &VarsWithImplicitDSA) {
+  if (!AStmt)
+    return StmtError();
+
+  CapturedStmt *CS = cast<CapturedStmt>(AStmt);
+  // 1.2.2 OpenMP Language Terminology
+  // Structured block - An executable statement with a single entry at the
+  // top and a single exit at the bottom.
+  // The point of exit cannot be a branch out of the structured block.
+  // longjmp() and throw() must not violate the entry/exit criteria.
+  CS->getCapturedDecl()->setNothrow();
+
+  OMPLoopDirective::HelperExprs B;
+  // In presence of clause 'collapse' with number of loops, it will
+  // define the nested loops number.
+  unsigned NestedLoopCount = CheckOpenMPLoop(
+      OMPD_distribute_parallel_for, getCollapseNumberExpr(Clauses),
+      nullptr /*ordered not a clause on distribute*/, AStmt, *this, *DSAStack,
+      VarsWithImplicitDSA, B);
+  if (NestedLoopCount == 0)
+    return StmtError();
+
+  assert((CurContext->isDependentContext() || B.builtAll()) &&
+         "omp for loop exprs were not built");
+
+  getCurFunction()->setHasBranchProtectedScope();
+  return OMPDistributeParallelForDirective::Create(
+      Context, StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B);
+}
+
+StmtResult Sema::ActOnOpenMPDistributeParallelForSimdDirective(
+    ArrayRef<OMPClause *> Clauses, Stmt *AStmt, SourceLocation StartLoc,
+    SourceLocation EndLoc,
+    llvm::DenseMap<ValueDecl *, Expr *> &VarsWithImplicitDSA) {
+  if (!AStmt)
+    return StmtError();
+
+  CapturedStmt *CS = cast<CapturedStmt>(AStmt);
+  // 1.2.2 OpenMP Language Terminology
+  // Structured block - An executable statement with a single entry at the
+  // top and a single exit at the bottom.
+  // The point of exit cannot be a branch out of the structured block.
+  // longjmp() and throw() must not violate the entry/exit criteria.
+  CS->getCapturedDecl()->setNothrow();
+
+  OMPLoopDirective::HelperExprs B;
+  // In presence of clause 'collapse' with number of loops, it will
+  // define the nested loops number.
+  unsigned NestedLoopCount = CheckOpenMPLoop(
+      OMPD_distribute_parallel_for_simd, getCollapseNumberExpr(Clauses),
+      nullptr /*ordered not a clause on distribute*/, AStmt, *this, *DSAStack,
+      VarsWithImplicitDSA, B);
+  if (NestedLoopCount == 0)
+    return StmtError();
+
+  assert((CurContext->isDependentContext() || B.builtAll()) &&
+         "omp for loop exprs were not built");
+
+  if (checkSimdlenSafelenSpecified(*this, Clauses))
+    return StmtError();
+
+  getCurFunction()->setHasBranchProtectedScope();
+  return OMPDistributeParallelForSimdDirective::Create(
+      Context, StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B);
+}
+
+StmtResult Sema::ActOnOpenMPDistributeSimdDirective(
+    ArrayRef<OMPClause *> Clauses, Stmt *AStmt, SourceLocation StartLoc,
+    SourceLocation EndLoc,
+    llvm::DenseMap<ValueDecl *, Expr *> &VarsWithImplicitDSA) {
+  if (!AStmt)
+    return StmtError();
+
+  CapturedStmt *CS = cast<CapturedStmt>(AStmt);
+  // 1.2.2 OpenMP Language Terminology
+  // Structured block - An executable statement with a single entry at the
+  // top and a single exit at the bottom.
+  // The point of exit cannot be a branch out of the structured block.
+  // longjmp() and throw() must not violate the entry/exit criteria.
+  CS->getCapturedDecl()->setNothrow();
+
+  OMPLoopDirective::HelperExprs B;
+  // In presence of clause 'collapse' with number of loops, it will
+  // define the nested loops number.
+  unsigned NestedLoopCount =
+      CheckOpenMPLoop(OMPD_distribute_simd, getCollapseNumberExpr(Clauses),
+                      nullptr /*ordered not a clause on distribute*/, AStmt,
+                      *this, *DSAStack, VarsWithImplicitDSA, B);
+  if (NestedLoopCount == 0)
+    return StmtError();
+
+  assert((CurContext->isDependentContext() || B.builtAll()) &&
+         "omp for loop exprs were not built");
+
+  if (checkSimdlenSafelenSpecified(*this, Clauses))
+    return StmtError();
+
+  getCurFunction()->setHasBranchProtectedScope();
+  return OMPDistributeSimdDirective::Create(Context, StartLoc, EndLoc,
+                                            NestedLoopCount, Clauses, AStmt, B);
+}
+
+StmtResult Sema::ActOnOpenMPTargetParallelForSimdDirective(
+    ArrayRef<OMPClause *> Clauses, Stmt *AStmt, SourceLocation StartLoc,
+    SourceLocation EndLoc,
+    llvm::DenseMap<ValueDecl *, Expr *> &VarsWithImplicitDSA) {
+  if (!AStmt)
+    return StmtError();
+
+  CapturedStmt *CS = cast<CapturedStmt>(AStmt);
+  // 1.2.2 OpenMP Language Terminology
+  // Structured block - An executable statement with a single entry at the
+  // top and a single exit at the bottom.
+  // The point of exit cannot be a branch out of the structured block.
+  // longjmp() and throw() must not violate the entry/exit criteria.
+  CS->getCapturedDecl()->setNothrow();
+
+  OMPLoopDirective::HelperExprs B;
+  // In presence of clause 'collapse' or 'ordered' with number of loops, it will
+  // define the nested loops number.
+  unsigned NestedLoopCount = CheckOpenMPLoop(
+      OMPD_target_parallel_for_simd, getCollapseNumberExpr(Clauses),
+      getOrderedNumberExpr(Clauses), AStmt, *this, *DSAStack,
+      VarsWithImplicitDSA, B);
+  if (NestedLoopCount == 0)
+    return StmtError();
+
+  assert((CurContext->isDependentContext() || B.builtAll()) &&
+         "omp target parallel for simd loop exprs were not built");
+
+  if (!CurContext->isDependentContext()) {
+    // Finalize the clauses that need pre-built expressions for CodeGen.
+    for (auto C : Clauses) {
+      if (auto LC = dyn_cast<OMPLinearClause>(C))
+        if (FinishOpenMPLinearClause(*LC, cast<DeclRefExpr>(B.IterationVarRef),
+                                     B.NumIterations, *this, CurScope,
+                                     DSAStack))
+          return StmtError();
+    }
+  }
+  if (checkSimdlenSafelenSpecified(*this, Clauses))
+    return StmtError();
+
+  getCurFunction()->setHasBranchProtectedScope();
+  return OMPTargetParallelForSimdDirective::Create(
+      Context, StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B);
+}
+
 OMPClause *Sema::ActOnOpenMPSingleExprClause(OpenMPClauseKind Kind, Expr *Expr,
                                              SourceLocation StartLoc,
                                              SourceLocation LParenLoc,
@@ -5735,7 +7280,14 @@ OMPClause *Sema::ActOnOpenMPSingleExprClause(OpenMPClauseKind Kind, Expr *Expr,
   case OMPC_simd:
   case OMPC_map:
   case OMPC_nogroup:
+  case OMPC_dist_schedule:
+  case OMPC_defaultmap:
   case OMPC_unknown:
+  case OMPC_uniform:
+  case OMPC_to:
+  case OMPC_from:
+  case OMPC_use_device_ptr:
+  case OMPC_is_device_ptr:
     llvm_unreachable("Clause is not allowed.");
   }
   return Res;
@@ -5751,12 +7303,11 @@ OMPClause *Sema::ActOnOpenMPIfClause(OpenMPDirectiveKind NameModifier,
   if (!Condition->isValueDependent() && !Condition->isTypeDependent() &&
       !Condition->isInstantiationDependent() &&
       !Condition->containsUnexpandedParameterPack()) {
-    ExprResult Val = ActOnBooleanCondition(DSAStack->getCurScope(),
-                                           Condition->getExprLoc(), Condition);
+    ExprResult Val = CheckBooleanCondition(StartLoc, Condition);
     if (Val.isInvalid())
       return nullptr;
 
-    ValExpr = Val.get();
+    ValExpr = MakeFullExpr(Val.get()).get();
   }
 
   return new (Context) OMPIfClause(NameModifier, ValExpr, StartLoc, LParenLoc,
@@ -5771,12 +7322,11 @@ OMPClause *Sema::ActOnOpenMPFinalClause(Expr *Condition,
   if (!Condition->isValueDependent() && !Condition->isTypeDependent() &&
       !Condition->isInstantiationDependent() &&
       !Condition->containsUnexpandedParameterPack()) {
-    ExprResult Val = ActOnBooleanCondition(DSAStack->getCurScope(),
-                                           Condition->getExprLoc(), Condition);
+    ExprResult Val = CheckBooleanCondition(StartLoc, Condition);
     if (Val.isInvalid())
       return nullptr;
 
-    ValExpr = Val.get();
+    ValExpr = MakeFullExpr(Val.get()).get();
   }
 
   return new (Context) OMPFinalClause(ValExpr, StartLoc, LParenLoc, EndLoc);
@@ -6018,7 +7568,14 @@ OMPClause *Sema::ActOnOpenMPSimpleClause(
   case OMPC_nogroup:
   case OMPC_num_tasks:
   case OMPC_hint:
+  case OMPC_dist_schedule:
+  case OMPC_defaultmap:
   case OMPC_unknown:
+  case OMPC_uniform:
+  case OMPC_to:
+  case OMPC_from:
+  case OMPC_use_device_ptr:
+  case OMPC_is_device_ptr:
     llvm_unreachable("Clause is not allowed.");
   }
   return Res;
@@ -6116,6 +7673,19 @@ OMPClause *Sema::ActOnOpenMPSingleExprWithArgClause(
                               Expr, StartLoc, LParenLoc, ArgumentLoc.back(),
                               DelimLoc, EndLoc);
     break;
+  case OMPC_dist_schedule:
+    Res = ActOnOpenMPDistScheduleClause(
+        static_cast<OpenMPDistScheduleClauseKind>(Argument.back()), Expr,
+        StartLoc, LParenLoc, ArgumentLoc.back(), DelimLoc, EndLoc);
+    break;
+  case OMPC_defaultmap:
+    enum { Modifier, DefaultmapKind };
+    Res = ActOnOpenMPDefaultmapClause(
+        static_cast<OpenMPDefaultmapClauseModifier>(Argument[Modifier]),
+        static_cast<OpenMPDefaultmapClauseKind>(Argument[DefaultmapKind]),
+        StartLoc, LParenLoc, ArgumentLoc[Modifier],
+        ArgumentLoc[DefaultmapKind], EndLoc);
+    break;
   case OMPC_final:
   case OMPC_num_threads:
   case OMPC_safelen:
@@ -6156,6 +7726,11 @@ OMPClause *Sema::ActOnOpenMPSingleExprWithArgClause(
   case OMPC_num_tasks:
   case OMPC_hint:
   case OMPC_unknown:
+  case OMPC_uniform:
+  case OMPC_to:
+  case OMPC_from:
+  case OMPC_use_device_ptr:
+  case OMPC_is_device_ptr:
     llvm_unreachable("Clause is not allowed.");
   }
   return Res;
@@ -6230,7 +7805,7 @@ OMPClause *Sema::ActOnOpenMPScheduleClause(
     return nullptr;
   }
   Expr *ValExpr = ChunkSize;
-  Expr *HelperValExpr = nullptr;
+  Stmt *HelperValStmt = nullptr;
   if (ChunkSize) {
     if (!ChunkSize->isValueDependent() && !ChunkSize->isTypeDependent() &&
         !ChunkSize->isInstantiationDependent() &&
@@ -6253,20 +7828,18 @@ OMPClause *Sema::ActOnOpenMPScheduleClause(
               << "schedule" << 1 << ChunkSize->getSourceRange();
           return nullptr;
         }
-      } else if (isParallelOrTaskRegion(DSAStack->getCurrentDirective())) {
-        auto *ImpVar = buildVarDecl(*this, ChunkSize->getExprLoc(),
-                                    ChunkSize->getType(), ".chunk.");
-        auto *ImpVarRef = buildDeclRefExpr(*this, ImpVar, ChunkSize->getType(),
-                                           ChunkSize->getExprLoc(),
-                                           /*RefersToCapture=*/true);
-        HelperValExpr = ImpVarRef;
+      } else if (isParallelOrTaskRegion(DSAStack->getCurrentDirective()) &&
+                 !CurContext->isDependentContext()) {
+        llvm::MapVector<Expr *, DeclRefExpr *> Captures;
+        ValExpr = tryBuildCapture(*this, ValExpr, Captures).get();
+        HelperValStmt = buildPreInits(Context, Captures);
       }
     }
   }
 
   return new (Context)
       OMPScheduleClause(StartLoc, LParenLoc, KindLoc, CommaLoc, EndLoc, Kind,
-                        ValExpr, HelperValExpr, M1, M1Loc, M2, M2Loc);
+                        ValExpr, HelperValStmt, M1, M1Loc, M2, M2Loc);
 }
 
 OMPClause *Sema::ActOnOpenMPClause(OpenMPClauseKind Kind,
@@ -6339,7 +7912,14 @@ OMPClause *Sema::ActOnOpenMPClause(OpenMPClauseKind Kind,
   case OMPC_grainsize:
   case OMPC_num_tasks:
   case OMPC_hint:
+  case OMPC_dist_schedule:
+  case OMPC_defaultmap:
   case OMPC_unknown:
+  case OMPC_uniform:
+  case OMPC_to:
+  case OMPC_from:
+  case OMPC_use_device_ptr:
+  case OMPC_is_device_ptr:
     llvm_unreachable("Clause is not allowed.");
   }
   return Res;
@@ -6406,8 +7986,9 @@ OMPClause *Sema::ActOnOpenMPVarListClause(
     SourceLocation StartLoc, SourceLocation LParenLoc, SourceLocation ColonLoc,
     SourceLocation EndLoc, CXXScopeSpec &ReductionIdScopeSpec,
     const DeclarationNameInfo &ReductionId, OpenMPDependClauseKind DepKind,
-    OpenMPLinearClauseKind LinKind, OpenMPMapClauseKind MapTypeModifier, 
-    OpenMPMapClauseKind MapType, SourceLocation DepLinMapLoc) {
+    OpenMPLinearClauseKind LinKind, OpenMPMapClauseKind MapTypeModifier,
+    OpenMPMapClauseKind MapType, bool IsMapTypeImplicit,
+    SourceLocation DepLinMapLoc) {
   OMPClause *Res = nullptr;
   switch (Kind) {
   case OMPC_private:
@@ -6448,8 +8029,21 @@ OMPClause *Sema::ActOnOpenMPVarListClause(
                                   StartLoc, LParenLoc, EndLoc);
     break;
   case OMPC_map:
-    Res = ActOnOpenMPMapClause(MapTypeModifier, MapType, DepLinMapLoc, ColonLoc,
-                               VarList, StartLoc, LParenLoc, EndLoc);
+    Res = ActOnOpenMPMapClause(MapTypeModifier, MapType, IsMapTypeImplicit,
+                               DepLinMapLoc, ColonLoc, VarList, StartLoc,
+                               LParenLoc, EndLoc);
+    break;
+  case OMPC_to:
+    Res = ActOnOpenMPToClause(VarList, StartLoc, LParenLoc, EndLoc);
+    break;
+  case OMPC_from:
+    Res = ActOnOpenMPFromClause(VarList, StartLoc, LParenLoc, EndLoc);
+    break;
+  case OMPC_use_device_ptr:
+    Res = ActOnOpenMPUseDevicePtrClause(VarList, StartLoc, LParenLoc, EndLoc);
+    break;
+  case OMPC_is_device_ptr:
+    Res = ActOnOpenMPIsDevicePtrClause(VarList, StartLoc, LParenLoc, EndLoc);
     break;
   case OMPC_if:
   case OMPC_final:
@@ -6480,12 +8074,93 @@ OMPClause *Sema::ActOnOpenMPVarListClause(
   case OMPC_nogroup:
   case OMPC_num_tasks:
   case OMPC_hint:
+  case OMPC_dist_schedule:
+  case OMPC_defaultmap:
   case OMPC_unknown:
+  case OMPC_uniform:
     llvm_unreachable("Clause is not allowed.");
   }
   return Res;
 }
 
+ExprResult Sema::getOpenMPCapturedExpr(VarDecl *Capture, ExprValueKind VK,
+                                       ExprObjectKind OK, SourceLocation Loc) {
+  ExprResult Res = BuildDeclRefExpr(
+      Capture, Capture->getType().getNonReferenceType(), VK_LValue, Loc);
+  if (!Res.isUsable())
+    return ExprError();
+  if (OK == OK_Ordinary && !getLangOpts().CPlusPlus) {
+    Res = CreateBuiltinUnaryOp(Loc, UO_Deref, Res.get());
+    if (!Res.isUsable())
+      return ExprError();
+  }
+  if (VK != VK_LValue && Res.get()->isGLValue()) {
+    Res = DefaultLvalueConversion(Res.get());
+    if (!Res.isUsable())
+      return ExprError();
+  }
+  return Res;
+}
+
+static std::pair<ValueDecl *, bool>
+getPrivateItem(Sema &S, Expr *&RefExpr, SourceLocation &ELoc,
+               SourceRange &ERange, bool AllowArraySection = false) {
+  if (RefExpr->isTypeDependent() || RefExpr->isValueDependent() ||
+      RefExpr->containsUnexpandedParameterPack())
+    return std::make_pair(nullptr, true);
+
+  // OpenMP [3.1, C/C++]
+  //  A list item is a variable name.
+  // OpenMP  [2.9.3.3, Restrictions, p.1]
+  //  A variable that is part of another variable (as an array or
+  //  structure element) cannot appear in a private clause.
+  RefExpr = RefExpr->IgnoreParens();
+  enum {
+    NoArrayExpr = -1,
+    ArraySubscript = 0,
+    OMPArraySection = 1
+  } IsArrayExpr = NoArrayExpr;
+  if (AllowArraySection) {
+    if (auto *ASE = dyn_cast_or_null<ArraySubscriptExpr>(RefExpr)) {
+      auto *Base = ASE->getBase()->IgnoreParenImpCasts();
+      while (auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
+        Base = TempASE->getBase()->IgnoreParenImpCasts();
+      RefExpr = Base;
+      IsArrayExpr = ArraySubscript;
+    } else if (auto *OASE = dyn_cast_or_null<OMPArraySectionExpr>(RefExpr)) {
+      auto *Base = OASE->getBase()->IgnoreParenImpCasts();
+      while (auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
+        Base = TempOASE->getBase()->IgnoreParenImpCasts();
+      while (auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
+        Base = TempASE->getBase()->IgnoreParenImpCasts();
+      RefExpr = Base;
+      IsArrayExpr = OMPArraySection;
+    }
+  }
+  ELoc = RefExpr->getExprLoc();
+  ERange = RefExpr->getSourceRange();
+  RefExpr = RefExpr->IgnoreParenImpCasts();
+  auto *DE = dyn_cast_or_null<DeclRefExpr>(RefExpr);
+  auto *ME = dyn_cast_or_null<MemberExpr>(RefExpr);
+  if ((!DE || !isa<VarDecl>(DE->getDecl())) &&
+      (S.getCurrentThisType().isNull() || !ME ||
+       !isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts()) ||
+       !isa<FieldDecl>(ME->getMemberDecl()))) {
+    if (IsArrayExpr != NoArrayExpr)
+      S.Diag(ELoc, diag::err_omp_expected_base_var_name) << IsArrayExpr
+                                                         << ERange;
+    else {
+      S.Diag(ELoc,
+             AllowArraySection
+                 ? diag::err_omp_expected_var_name_member_expr_or_array_item
+                 : diag::err_omp_expected_var_name_member_expr)
+          << (S.getCurrentThisType().isNull() ? 0 : 1) << ERange;
+    }
+    return std::make_pair(nullptr, false);
+  }
+  return std::make_pair(DE ? DE->getDecl() : ME->getMemberDecl(), false);
+}
+
 OMPClause *Sema::ActOnOpenMPPrivateClause(ArrayRef<Expr *> VarList,
                                           SourceLocation StartLoc,
                                           SourceLocation LParenLoc,
@@ -6494,42 +8169,27 @@ OMPClause *Sema::ActOnOpenMPPrivateClause(ArrayRef<Expr *> VarList,
   SmallVector<Expr *, 8> PrivateCopies;
   for (auto &RefExpr : VarList) {
     assert(RefExpr && "NULL expr in OpenMP private clause.");
-    if (isa<DependentScopeDeclRefExpr>(RefExpr)) {
+    SourceLocation ELoc;
+    SourceRange ERange;
+    Expr *SimpleRefExpr = RefExpr;
+    auto Res = getPrivateItem(*this, SimpleRefExpr, ELoc, ERange);
+    if (Res.second) {
       // It will be analyzed later.
       Vars.push_back(RefExpr);
       PrivateCopies.push_back(nullptr);
-      continue;
     }
-
-    SourceLocation ELoc = RefExpr->getExprLoc();
-    // OpenMP [2.1, C/C++]
-    //  A list item is a variable name.
-    // OpenMP  [2.9.3.3, Restrictions, p.1]
-    //  A variable that is part of another variable (as an array or
-    //  structure element) cannot appear in a private clause.
-    DeclRefExpr *DE = dyn_cast_or_null<DeclRefExpr>(RefExpr);
-    if (!DE || !isa<VarDecl>(DE->getDecl())) {
-      Diag(ELoc, diag::err_omp_expected_var_name) << RefExpr->getSourceRange();
+    ValueDecl *D = Res.first;
+    if (!D)
       continue;
-    }
-    Decl *D = DE->getDecl();
-    VarDecl *VD = cast<VarDecl>(D);
 
-    QualType Type = VD->getType();
-    if (Type->isDependentType() || Type->isInstantiationDependentType()) {
-      // It will be analyzed later.
-      Vars.push_back(DE);
-      PrivateCopies.push_back(nullptr);
-      continue;
-    }
+    QualType Type = D->getType();
+    auto *VD = dyn_cast<VarDecl>(D);
 
     // OpenMP [2.9.3.3, Restrictions, C/C++, p.3]
     //  A variable that appears in a private clause must not have an incomplete
     //  type or a reference type.
-    if (RequireCompleteType(ELoc, Type,
-                            diag::err_omp_private_incomplete_type)) {
+    if (RequireCompleteType(ELoc, Type, diag::err_omp_private_incomplete_type))
       continue;
-    }
     Type = Type.getNonReferenceType();
 
     // OpenMP [2.9.1.1, Data-sharing Attribute Rules for Variables Referenced
@@ -6539,28 +8199,45 @@ OMPClause *Sema::ActOnOpenMPPrivateClause(ArrayRef<Expr *> VarList,
     //  listed below. For these exceptions only, listing a predetermined
     //  variable in a data-sharing attribute clause is allowed and overrides
     //  the variable's predetermined data-sharing attributes.
-    DSAStackTy::DSAVarData DVar = DSAStack->getTopDSA(VD, false);
+    DSAStackTy::DSAVarData DVar = DSAStack->getTopDSA(D, false);
     if (DVar.CKind != OMPC_unknown && DVar.CKind != OMPC_private) {
       Diag(ELoc, diag::err_omp_wrong_dsa) << getOpenMPClauseName(DVar.CKind)
                                           << getOpenMPClauseName(OMPC_private);
-      ReportOriginalDSA(*this, DSAStack, VD, DVar);
+      ReportOriginalDSA(*this, DSAStack, D, DVar);
       continue;
     }
 
     // Variably modified types are not supported for tasks.
     if (!Type->isAnyPointerType() && Type->isVariablyModifiedType() &&
-        DSAStack->getCurrentDirective() == OMPD_task) {
+        isOpenMPTaskingDirective(DSAStack->getCurrentDirective())) {
       Diag(ELoc, diag::err_omp_variably_modified_type_not_supported)
           << getOpenMPClauseName(OMPC_private) << Type
           << getOpenMPDirectiveName(DSAStack->getCurrentDirective());
       bool IsDecl =
+          !VD ||
           VD->isThisDeclarationADefinition(Context) == VarDecl::DeclarationOnly;
-      Diag(VD->getLocation(),
+      Diag(D->getLocation(),
            IsDecl ? diag::note_previous_decl : diag::note_defined_here)
-          << VD;
+          << D;
       continue;
     }
 
+    // OpenMP 4.5 [2.15.5.1, Restrictions, p.3]
+    // A list item cannot appear in both a map clause and a data-sharing
+    // attribute clause on the same construct
+    if (DSAStack->getCurrentDirective() == OMPD_target) {
+      if (DSAStack->checkMappableExprComponentListsForDecl(
+              VD, /* CurrentRegionOnly = */ true,
+              [&](OMPClauseMappableExprCommon::MappableExprComponentListRef)
+                  -> bool { return true; })) {
+        Diag(ELoc, diag::err_omp_variable_in_map_and_dsa)
+            << getOpenMPClauseName(OMPC_private)
+            << getOpenMPDirectiveName(DSAStack->getCurrentDirective());
+        ReportOriginalDSA(*this, DSAStack, D, DVar);
+        continue;
+      }
+    }
+
     // OpenMP [2.9.3.3, Restrictions, C/C++, p.1]
     //  A variable of class type (or array thereof) that appears in a private
     //  clause requires an accessible, unambiguous default constructor for the
@@ -6571,16 +8248,21 @@ OMPClause *Sema::ActOnOpenMPPrivateClause(ArrayRef<Expr *> VarList,
     // IdResolver, so the code in the OpenMP region uses original variable for
     // proper diagnostics.
     Type = Type.getUnqualifiedType();
-    auto VDPrivate = buildVarDecl(*this, DE->getExprLoc(), Type, VD->getName(),
-                                  VD->hasAttrs() ? &VD->getAttrs() : nullptr);
+    auto VDPrivate = buildVarDecl(*this, ELoc, Type, D->getName(),
+                                  D->hasAttrs() ? &D->getAttrs() : nullptr);
     ActOnUninitializedDecl(VDPrivate, /*TypeMayContainAuto=*/false);
     if (VDPrivate->isInvalidDecl())
       continue;
     auto VDPrivateRefExpr = buildDeclRefExpr(
-        *this, VDPrivate, DE->getType().getUnqualifiedType(), DE->getExprLoc());
-
-    DSAStack->addDSA(VD, DE, OMPC_private);
-    Vars.push_back(DE);
+        *this, VDPrivate, RefExpr->getType().getUnqualifiedType(), ELoc);
+
+    DeclRefExpr *Ref = nullptr;
+    if (!VD && !CurContext->isDependentContext())
+      Ref = buildCapture(*this, D, SimpleRefExpr, /*WithInit=*/false);
+    DSAStack->addDSA(D, RefExpr->IgnoreParens(), OMPC_private, Ref);
+    Vars.push_back((VD || CurContext->isDependentContext())
+                       ? RefExpr->IgnoreParens()
+                       : Ref);
     PrivateCopies.push_back(VDPrivateRefExpr);
   }
 
@@ -6621,51 +8303,37 @@ OMPClause *Sema::ActOnOpenMPFirstprivateClause(ArrayRef<Expr *> VarList,
   SmallVector<Expr *, 8> Vars;
   SmallVector<Expr *, 8> PrivateCopies;
   SmallVector<Expr *, 8> Inits;
+  SmallVector<Decl *, 4> ExprCaptures;
   bool IsImplicitClause =
       StartLoc.isInvalid() && LParenLoc.isInvalid() && EndLoc.isInvalid();
   auto ImplicitClauseLoc = DSAStack->getConstructLoc();
 
   for (auto &RefExpr : VarList) {
     assert(RefExpr && "NULL expr in OpenMP firstprivate clause.");
-    if (isa<DependentScopeDeclRefExpr>(RefExpr)) {
+    SourceLocation ELoc;
+    SourceRange ERange;
+    Expr *SimpleRefExpr = RefExpr;
+    auto Res = getPrivateItem(*this, SimpleRefExpr, ELoc, ERange);
+    if (Res.second) {
       // It will be analyzed later.
       Vars.push_back(RefExpr);
       PrivateCopies.push_back(nullptr);
       Inits.push_back(nullptr);
-      continue;
     }
-
-    SourceLocation ELoc =
-        IsImplicitClause ? ImplicitClauseLoc : RefExpr->getExprLoc();
-    // OpenMP [2.1, C/C++]
-    //  A list item is a variable name.
-    // OpenMP  [2.9.3.3, Restrictions, p.1]
-    //  A variable that is part of another variable (as an array or
-    //  structure element) cannot appear in a private clause.
-    DeclRefExpr *DE = dyn_cast_or_null<DeclRefExpr>(RefExpr);
-    if (!DE || !isa<VarDecl>(DE->getDecl())) {
-      Diag(ELoc, diag::err_omp_expected_var_name) << RefExpr->getSourceRange();
+    ValueDecl *D = Res.first;
+    if (!D)
       continue;
-    }
-    Decl *D = DE->getDecl();
-    VarDecl *VD = cast<VarDecl>(D);
 
-    QualType Type = VD->getType();
-    if (Type->isDependentType() || Type->isInstantiationDependentType()) {
-      // It will be analyzed later.
-      Vars.push_back(DE);
-      PrivateCopies.push_back(nullptr);
-      Inits.push_back(nullptr);
-      continue;
-    }
+    ELoc = IsImplicitClause ? ImplicitClauseLoc : ELoc;
+    QualType Type = D->getType();
+    auto *VD = dyn_cast<VarDecl>(D);
 
     // OpenMP [2.9.3.3, Restrictions, C/C++, p.3]
     //  A variable that appears in a private clause must not have an incomplete
     //  type or a reference type.
     if (RequireCompleteType(ELoc, Type,
-                            diag::err_omp_firstprivate_incomplete_type)) {
+                            diag::err_omp_firstprivate_incomplete_type))
       continue;
-    }
     Type = Type.getNonReferenceType();
 
     // OpenMP [2.9.3.4, Restrictions, C/C++, p.1]
@@ -6675,8 +8343,10 @@ OMPClause *Sema::ActOnOpenMPFirstprivateClause(ArrayRef<Expr *> VarList,
     auto ElemType = Context.getBaseElementType(Type).getNonReferenceType();
 
     // If an implicit firstprivate variable found it was checked already.
+    DSAStackTy::DSAVarData TopDVar;
     if (!IsImplicitClause) {
-      DSAStackTy::DSAVarData DVar = DSAStack->getTopDSA(VD, false);
+      DSAStackTy::DSAVarData DVar = DSAStack->getTopDSA(D, false);
+      TopDVar = DVar;
       bool IsConstant = ElemType.isConstant(Context);
       // OpenMP [2.4.13, Data-sharing Attribute Clauses]
       //  A list item that specifies a given variable may not appear in more
@@ -6687,7 +8357,7 @@ OMPClause *Sema::ActOnOpenMPFirstprivateClause(ArrayRef<Expr *> VarList,
         Diag(ELoc, diag::err_omp_wrong_dsa)
             << getOpenMPClauseName(DVar.CKind)
             << getOpenMPClauseName(OMPC_firstprivate);
-        ReportOriginalDSA(*this, DSAStack, VD, DVar);
+        ReportOriginalDSA(*this, DSAStack, D, DVar);
         continue;
       }
 
@@ -6702,12 +8372,12 @@ OMPClause *Sema::ActOnOpenMPFirstprivateClause(ArrayRef<Expr *> VarList,
       // in a Construct, C/C++, p.2]
       //  Variables with const-qualified type having no mutable member may be
       //  listed in a firstprivate clause, even if they are static data members.
-      if (!(IsConstant || VD->isStaticDataMember()) && !DVar.RefExpr &&
+      if (!(IsConstant || (VD && VD->isStaticDataMember())) && !DVar.RefExpr &&
           DVar.CKind != OMPC_unknown && DVar.CKind != OMPC_shared) {
         Diag(ELoc, diag::err_omp_wrong_dsa)
             << getOpenMPClauseName(DVar.CKind)
             << getOpenMPClauseName(OMPC_firstprivate);
-        ReportOriginalDSA(*this, DSAStack, VD, DVar);
+        ReportOriginalDSA(*this, DSAStack, D, DVar);
         continue;
       }
 
@@ -6719,14 +8389,14 @@ OMPClause *Sema::ActOnOpenMPFirstprivateClause(ArrayRef<Expr *> VarList,
       //  to any of the parallel regions arising from the parallel construct.
       if (isOpenMPWorksharingDirective(CurrDir) &&
           !isOpenMPParallelDirective(CurrDir)) {
-        DVar = DSAStack->getImplicitDSA(VD, true);
+        DVar = DSAStack->getImplicitDSA(D, true);
         if (DVar.CKind != OMPC_shared &&
             (isOpenMPParallelDirective(DVar.DKind) ||
              DVar.DKind == OMPD_unknown)) {
           Diag(ELoc, diag::err_omp_required_access)
               << getOpenMPClauseName(OMPC_firstprivate)
               << getOpenMPClauseName(OMPC_shared);
-          ReportOriginalDSA(*this, DSAStack, VD, DVar);
+          ReportOriginalDSA(*this, DSAStack, D, DVar);
           continue;
         }
       }
@@ -6741,20 +8411,20 @@ OMPClause *Sema::ActOnOpenMPFirstprivateClause(ArrayRef<Expr *> VarList,
       //  construct must not appear in a firstprivate clause in a task construct
       //  encountered during execution of any of the worksharing regions arising
       //  from the worksharing construct.
-      if (CurrDir == OMPD_task) {
-        DVar =
-            DSAStack->hasInnermostDSA(VD, MatchesAnyClause(OMPC_reduction),
-                                      [](OpenMPDirectiveKind K) -> bool {
-                                        return isOpenMPParallelDirective(K) ||
-                                               isOpenMPWorksharingDirective(K);
-                                      },
-                                      false);
+      if (isOpenMPTaskingDirective(CurrDir)) {
+        DVar = DSAStack->hasInnermostDSA(
+            D, [](OpenMPClauseKind C) -> bool { return C == OMPC_reduction; },
+            [](OpenMPDirectiveKind K) -> bool {
+              return isOpenMPParallelDirective(K) ||
+                     isOpenMPWorksharingDirective(K);
+            },
+            false);
         if (DVar.CKind == OMPC_reduction &&
             (isOpenMPParallelDirective(DVar.DKind) ||
              isOpenMPWorksharingDirective(DVar.DKind))) {
           Diag(ELoc, diag::err_omp_parallel_reduction_in_task_firstprivate)
               << getOpenMPDirectiveName(DVar.DKind);
-          ReportOriginalDSA(*this, DSAStack, VD, DVar);
+          ReportOriginalDSA(*this, DSAStack, D, DVar);
           continue;
         }
       }
@@ -6773,31 +8443,48 @@ OMPClause *Sema::ActOnOpenMPFirstprivateClause(ArrayRef<Expr *> VarList,
       // A list item may appear in a firstprivate or lastprivate clause but not
       // both.
       if (CurrDir == OMPD_distribute) {
-        DVar = DSAStack->hasInnermostDSA(VD, MatchesAnyClause(OMPC_private),
-                                         [](OpenMPDirectiveKind K) -> bool {
-                                           return isOpenMPTeamsDirective(K);
-                                         },
-                                         false);
+        DVar = DSAStack->hasInnermostDSA(
+            D, [](OpenMPClauseKind C) -> bool { return C == OMPC_private; },
+            [](OpenMPDirectiveKind K) -> bool {
+              return isOpenMPTeamsDirective(K);
+            },
+            false);
         if (DVar.CKind == OMPC_private && isOpenMPTeamsDirective(DVar.DKind)) {
           Diag(ELoc, diag::err_omp_firstprivate_distribute_private_teams);
-          ReportOriginalDSA(*this, DSAStack, VD, DVar);
+          ReportOriginalDSA(*this, DSAStack, D, DVar);
           continue;
         }
-        DVar = DSAStack->hasInnermostDSA(VD, MatchesAnyClause(OMPC_reduction),
-                                         [](OpenMPDirectiveKind K) -> bool {
-                                           return isOpenMPTeamsDirective(K);
-                                         },
-                                         false);
+        DVar = DSAStack->hasInnermostDSA(
+            D, [](OpenMPClauseKind C) -> bool { return C == OMPC_reduction; },
+            [](OpenMPDirectiveKind K) -> bool {
+              return isOpenMPTeamsDirective(K);
+            },
+            false);
         if (DVar.CKind == OMPC_reduction &&
             isOpenMPTeamsDirective(DVar.DKind)) {
           Diag(ELoc, diag::err_omp_firstprivate_distribute_in_teams_reduction);
-          ReportOriginalDSA(*this, DSAStack, VD, DVar);
+          ReportOriginalDSA(*this, DSAStack, D, DVar);
           continue;
         }
-        DVar = DSAStack->getTopDSA(VD, false);
+        DVar = DSAStack->getTopDSA(D, false);
         if (DVar.CKind == OMPC_lastprivate) {
           Diag(ELoc, diag::err_omp_firstprivate_and_lastprivate_in_distribute);
-          ReportOriginalDSA(*this, DSAStack, VD, DVar);
+          ReportOriginalDSA(*this, DSAStack, D, DVar);
+          continue;
+        }
+      }
+      // OpenMP 4.5 [2.15.5.1, Restrictions, p.3]
+      // A list item cannot appear in both a map clause and a data-sharing
+      // attribute clause on the same construct
+      if (CurrDir == OMPD_target) {
+        if (DSAStack->checkMappableExprComponentListsForDecl(
+                VD, /* CurrentRegionOnly = */ true,
+                [&](OMPClauseMappableExprCommon::MappableExprComponentListRef)
+                    -> bool { return true; })) {
+          Diag(ELoc, diag::err_omp_variable_in_map_and_dsa)
+              << getOpenMPClauseName(OMPC_firstprivate)
+              << getOpenMPDirectiveName(DSAStack->getCurrentDirective());
+          ReportOriginalDSA(*this, DSAStack, D, DVar);
           continue;
         }
       }
@@ -6805,21 +8492,22 @@ OMPClause *Sema::ActOnOpenMPFirstprivateClause(ArrayRef<Expr *> VarList,
 
     // Variably modified types are not supported for tasks.
     if (!Type->isAnyPointerType() && Type->isVariablyModifiedType() &&
-        DSAStack->getCurrentDirective() == OMPD_task) {
+        isOpenMPTaskingDirective(DSAStack->getCurrentDirective())) {
       Diag(ELoc, diag::err_omp_variably_modified_type_not_supported)
           << getOpenMPClauseName(OMPC_firstprivate) << Type
           << getOpenMPDirectiveName(DSAStack->getCurrentDirective());
       bool IsDecl =
+          !VD ||
           VD->isThisDeclarationADefinition(Context) == VarDecl::DeclarationOnly;
-      Diag(VD->getLocation(),
+      Diag(D->getLocation(),
            IsDecl ? diag::note_previous_decl : diag::note_defined_here)
-          << VD;
+          << D;
       continue;
     }
 
     Type = Type.getUnqualifiedType();
-    auto VDPrivate = buildVarDecl(*this, ELoc, Type, VD->getName(),
-                                  VD->hasAttrs() ? &VD->getAttrs() : nullptr);
+    auto VDPrivate = buildVarDecl(*this, ELoc, Type, D->getName(),
+                                  D->hasAttrs() ? &D->getAttrs() : nullptr);
     // Generate helper private variable and initialize it with the value of the
     // original variable. The address of the original variable is replaced by
     // the address of the new private variable in the CodeGen. This new variable
@@ -6830,11 +8518,11 @@ OMPClause *Sema::ActOnOpenMPFirstprivateClause(ArrayRef<Expr *> VarList,
     // original array element in CodeGen.
     if (Type->isArrayType()) {
       auto VDInit =
-          buildVarDecl(*this, DE->getExprLoc(), ElemType, VD->getName());
+          buildVarDecl(*this, RefExpr->getExprLoc(), ElemType, D->getName());
       VDInitRefExpr = buildDeclRefExpr(*this, VDInit, ElemType, ELoc);
       auto Init = DefaultLvalueConversion(VDInitRefExpr).get();
       ElemType = ElemType.getUnqualifiedType();
-      auto *VDInitTemp = buildVarDecl(*this, DE->getLocStart(), ElemType,
+      auto *VDInitTemp = buildVarDecl(*this, RefExpr->getExprLoc(), ElemType,
                                       ".firstprivate.temp");
       InitializedEntity Entity =
           InitializedEntity::InitializeVariable(VDInitTemp);
@@ -6849,26 +8537,39 @@ OMPClause *Sema::ActOnOpenMPFirstprivateClause(ArrayRef<Expr *> VarList,
       // Remove temp variable declaration.
       Context.Deallocate(VDInitTemp);
     } else {
-      auto *VDInit =
-          buildVarDecl(*this, DE->getLocStart(), Type, ".firstprivate.temp");
-      VDInitRefExpr =
-          buildDeclRefExpr(*this, VDInit, DE->getType(), DE->getExprLoc());
+      auto *VDInit = buildVarDecl(*this, RefExpr->getExprLoc(), Type,
+                                  ".firstprivate.temp");
+      VDInitRefExpr = buildDeclRefExpr(*this, VDInit, RefExpr->getType(),
+                                       RefExpr->getExprLoc());
       AddInitializerToDecl(VDPrivate,
                            DefaultLvalueConversion(VDInitRefExpr).get(),
                            /*DirectInit=*/false, /*TypeMayContainAuto=*/false);
     }
     if (VDPrivate->isInvalidDecl()) {
       if (IsImplicitClause) {
-        Diag(DE->getExprLoc(),
+        Diag(RefExpr->getExprLoc(),
              diag::note_omp_task_predetermined_firstprivate_here);
       }
       continue;
     }
     CurContext->addDecl(VDPrivate);
     auto VDPrivateRefExpr = buildDeclRefExpr(
-        *this, VDPrivate, DE->getType().getUnqualifiedType(), DE->getExprLoc());
-    DSAStack->addDSA(VD, DE, OMPC_firstprivate);
-    Vars.push_back(DE);
+        *this, VDPrivate, RefExpr->getType().getUnqualifiedType(),
+        RefExpr->getExprLoc());
+    DeclRefExpr *Ref = nullptr;
+    if (!VD && !CurContext->isDependentContext()) {
+      if (TopDVar.CKind == OMPC_lastprivate)
+        Ref = TopDVar.PrivateCopy;
+      else {
+        Ref = buildCapture(*this, D, SimpleRefExpr, /*WithInit=*/true);
+        if (!IsOpenMPCapturedDecl(D))
+          ExprCaptures.push_back(Ref->getDecl());
+      }
+    }
+    DSAStack->addDSA(D, RefExpr->IgnoreParens(), OMPC_firstprivate, Ref);
+    Vars.push_back((VD || CurContext->isDependentContext())
+                       ? RefExpr->IgnoreParens()
+                       : Ref);
     PrivateCopies.push_back(VDPrivateRefExpr);
     Inits.push_back(VDInitRefExpr);
   }
@@ -6877,7 +8578,8 @@ OMPClause *Sema::ActOnOpenMPFirstprivateClause(ArrayRef<Expr *> VarList,
     return nullptr;
 
   return OMPFirstprivateClause::Create(Context, StartLoc, LParenLoc, EndLoc,
-                                       Vars, PrivateCopies, Inits);
+                                       Vars, PrivateCopies, Inits,
+                                       buildPreInits(Context, ExprCaptures));
 }
 
 OMPClause *Sema::ActOnOpenMPLastprivateClause(ArrayRef<Expr *> VarList,
@@ -6888,48 +8590,34 @@ OMPClause *Sema::ActOnOpenMPLastprivateClause(ArrayRef<Expr *> VarList,
   SmallVector<Expr *, 8> SrcExprs;
   SmallVector<Expr *, 8> DstExprs;
   SmallVector<Expr *, 8> AssignmentOps;
+  SmallVector<Decl *, 4> ExprCaptures;
+  SmallVector<Expr *, 4> ExprPostUpdates;
   for (auto &RefExpr : VarList) {
     assert(RefExpr && "NULL expr in OpenMP lastprivate clause.");
-    if (isa<DependentScopeDeclRefExpr>(RefExpr)) {
+    SourceLocation ELoc;
+    SourceRange ERange;
+    Expr *SimpleRefExpr = RefExpr;
+    auto Res = getPrivateItem(*this, SimpleRefExpr, ELoc, ERange);
+    if (Res.second) {
       // It will be analyzed later.
       Vars.push_back(RefExpr);
       SrcExprs.push_back(nullptr);
       DstExprs.push_back(nullptr);
       AssignmentOps.push_back(nullptr);
-      continue;
     }
-
-    SourceLocation ELoc = RefExpr->getExprLoc();
-    // OpenMP [2.1, C/C++]
-    //  A list item is a variable name.
-    // OpenMP  [2.14.3.5, Restrictions, p.1]
-    //  A variable that is part of another variable (as an array or structure
-    //  element) cannot appear in a lastprivate clause.
-    DeclRefExpr *DE = dyn_cast_or_null<DeclRefExpr>(RefExpr);
-    if (!DE || !isa<VarDecl>(DE->getDecl())) {
-      Diag(ELoc, diag::err_omp_expected_var_name) << RefExpr->getSourceRange();
+    ValueDecl *D = Res.first;
+    if (!D)
       continue;
-    }
-    Decl *D = DE->getDecl();
-    VarDecl *VD = cast<VarDecl>(D);
 
-    QualType Type = VD->getType();
-    if (Type->isDependentType() || Type->isInstantiationDependentType()) {
-      // It will be analyzed later.
-      Vars.push_back(DE);
-      SrcExprs.push_back(nullptr);
-      DstExprs.push_back(nullptr);
-      AssignmentOps.push_back(nullptr);
-      continue;
-    }
+    QualType Type = D->getType();
+    auto *VD = dyn_cast<VarDecl>(D);
 
     // OpenMP [2.14.3.5, Restrictions, C/C++, p.2]
     //  A variable that appears in a lastprivate clause must not have an
     //  incomplete type or a reference type.
     if (RequireCompleteType(ELoc, Type,
-                            diag::err_omp_lastprivate_incomplete_type)) {
+                            diag::err_omp_lastprivate_incomplete_type))
       continue;
-    }
     Type = Type.getNonReferenceType();
 
     // OpenMP [2.14.1.1, Data-sharing Attribute Rules for Variables Referenced
@@ -6937,14 +8625,14 @@ OMPClause *Sema::ActOnOpenMPLastprivateClause(ArrayRef<Expr *> VarList,
     //  Variables with the predetermined data-sharing attributes may not be
     //  listed in data-sharing attributes clauses, except for the cases
     //  listed below.
-    DSAStackTy::DSAVarData DVar = DSAStack->getTopDSA(VD, false);
+    DSAStackTy::DSAVarData DVar = DSAStack->getTopDSA(D, false);
     if (DVar.CKind != OMPC_unknown && DVar.CKind != OMPC_lastprivate &&
         DVar.CKind != OMPC_firstprivate &&
         (DVar.CKind != OMPC_private || DVar.RefExpr != nullptr)) {
       Diag(ELoc, diag::err_omp_wrong_dsa)
           << getOpenMPClauseName(DVar.CKind)
           << getOpenMPClauseName(OMPC_lastprivate);
-      ReportOriginalDSA(*this, DSAStack, VD, DVar);
+      ReportOriginalDSA(*this, DSAStack, D, DVar);
       continue;
     }
 
@@ -6958,15 +8646,28 @@ OMPClause *Sema::ActOnOpenMPLastprivateClause(ArrayRef<Expr *> VarList,
     DSAStackTy::DSAVarData TopDVar = DVar;
     if (isOpenMPWorksharingDirective(CurrDir) &&
         !isOpenMPParallelDirective(CurrDir)) {
-      DVar = DSAStack->getImplicitDSA(VD, true);
+      DVar = DSAStack->getImplicitDSA(D, true);
       if (DVar.CKind != OMPC_shared) {
         Diag(ELoc, diag::err_omp_required_access)
             << getOpenMPClauseName(OMPC_lastprivate)
             << getOpenMPClauseName(OMPC_shared);
-        ReportOriginalDSA(*this, DSAStack, VD, DVar);
+        ReportOriginalDSA(*this, DSAStack, D, DVar);
         continue;
       }
     }
+
+    // OpenMP 4.5 [2.10.8, Distribute Construct, p.3]
+    // A list item may appear in a firstprivate or lastprivate clause but not
+    // both.
+    if (CurrDir == OMPD_distribute) {
+      DSAStackTy::DSAVarData DVar = DSAStack->getTopDSA(D, false);
+      if (DVar.CKind == OMPC_firstprivate) {
+        Diag(ELoc, diag::err_omp_firstprivate_and_lastprivate_in_distribute);
+        ReportOriginalDSA(*this, DSAStack, D, DVar);
+        continue;
+      }
+    }
+
     // OpenMP [2.14.3.5, Restrictions, C++, p.1,2]
     //  A variable of class type (or array thereof) that appears in a
     //  lastprivate clause requires an accessible, unambiguous default
@@ -6976,42 +8677,54 @@ OMPClause *Sema::ActOnOpenMPLastprivateClause(ArrayRef<Expr *> VarList,
     //  lastprivate clause requires an accessible, unambiguous copy assignment
     //  operator for the class type.
     Type = Context.getBaseElementType(Type).getNonReferenceType();
-    auto *SrcVD = buildVarDecl(*this, DE->getLocStart(),
+    auto *SrcVD = buildVarDecl(*this, ERange.getBegin(),
                                Type.getUnqualifiedType(), ".lastprivate.src",
-                               VD->hasAttrs() ? &VD->getAttrs() : nullptr);
-    auto *PseudoSrcExpr = buildDeclRefExpr(
-        *this, SrcVD, Type.getUnqualifiedType(), DE->getExprLoc());
+                               D->hasAttrs() ? &D->getAttrs() : nullptr);
+    auto *PseudoSrcExpr =
+        buildDeclRefExpr(*this, SrcVD, Type.getUnqualifiedType(), ELoc);
     auto *DstVD =
-        buildVarDecl(*this, DE->getLocStart(), Type, ".lastprivate.dst",
-                     VD->hasAttrs() ? &VD->getAttrs() : nullptr);
-    auto *PseudoDstExpr =
-        buildDeclRefExpr(*this, DstVD, Type, DE->getExprLoc());
+        buildVarDecl(*this, ERange.getBegin(), Type, ".lastprivate.dst",
+                     D->hasAttrs() ? &D->getAttrs() : nullptr);
+    auto *PseudoDstExpr = buildDeclRefExpr(*this, DstVD, Type, ELoc);
     // For arrays generate assignment operation for single element and replace
     // it by the original array element in CodeGen.
-    auto AssignmentOp = BuildBinOp(/*S=*/nullptr, DE->getExprLoc(), BO_Assign,
+    auto AssignmentOp = BuildBinOp(/*S=*/nullptr, ELoc, BO_Assign,
                                    PseudoDstExpr, PseudoSrcExpr);
     if (AssignmentOp.isInvalid())
       continue;
-    AssignmentOp = ActOnFinishFullExpr(AssignmentOp.get(), DE->getExprLoc(),
+    AssignmentOp = ActOnFinishFullExpr(AssignmentOp.get(), ELoc,
                                        /*DiscardedValue=*/true);
     if (AssignmentOp.isInvalid())
       continue;
 
-    // OpenMP 4.5 [2.10.8, Distribute Construct, p.3]
-    // A list item may appear in a firstprivate or lastprivate clause but not
-    // both.
-    if (CurrDir == OMPD_distribute) {
-      DSAStackTy::DSAVarData DVar = DSAStack->getTopDSA(VD, false);
-      if (DVar.CKind == OMPC_firstprivate) {
-        Diag(ELoc, diag::err_omp_firstprivate_and_lastprivate_in_distribute);
-        ReportOriginalDSA(*this, DSAStack, VD, DVar);
-        continue;
+    DeclRefExpr *Ref = nullptr;
+    if (!VD && !CurContext->isDependentContext()) {
+      if (TopDVar.CKind == OMPC_firstprivate)
+        Ref = TopDVar.PrivateCopy;
+      else {
+        Ref = buildCapture(*this, D, SimpleRefExpr, /*WithInit=*/false);
+        if (!IsOpenMPCapturedDecl(D))
+          ExprCaptures.push_back(Ref->getDecl());
+      }
+      if (TopDVar.CKind == OMPC_firstprivate ||
+          (!IsOpenMPCapturedDecl(D) &&
+           Ref->getDecl()->hasAttr<OMPCaptureNoInitAttr>())) {
+        ExprResult RefRes = DefaultLvalueConversion(Ref);
+        if (!RefRes.isUsable())
+          continue;
+        ExprResult PostUpdateRes =
+            BuildBinOp(DSAStack->getCurScope(), ELoc, BO_Assign, SimpleRefExpr,
+                       RefRes.get());
+        if (!PostUpdateRes.isUsable())
+          continue;
+        ExprPostUpdates.push_back(
+            IgnoredValueConversions(PostUpdateRes.get()).get());
       }
     }
-
-    if (TopDVar.CKind != OMPC_firstprivate)
-      DSAStack->addDSA(VD, DE, OMPC_lastprivate);
-    Vars.push_back(DE);
+    DSAStack->addDSA(D, RefExpr->IgnoreParens(), OMPC_lastprivate, Ref);
+    Vars.push_back((VD || CurContext->isDependentContext())
+                       ? RefExpr->IgnoreParens()
+                       : Ref);
     SrcExprs.push_back(PseudoSrcExpr);
     DstExprs.push_back(PseudoDstExpr);
     AssignmentOps.push_back(AssignmentOp.get());
@@ -7021,7 +8734,9 @@ OMPClause *Sema::ActOnOpenMPLastprivateClause(ArrayRef<Expr *> VarList,
     return nullptr;
 
   return OMPLastprivateClause::Create(Context, StartLoc, LParenLoc, EndLoc,
-                                      Vars, SrcExprs, DstExprs, AssignmentOps);
+                                      Vars, SrcExprs, DstExprs, AssignmentOps,
+                                      buildPreInits(Context, ExprCaptures),
+                                      buildPostUpdate(*this, ExprPostUpdates));
 }
 
 OMPClause *Sema::ActOnOpenMPSharedClause(ArrayRef<Expr *> VarList,
@@ -7030,35 +8745,20 @@ OMPClause *Sema::ActOnOpenMPSharedClause(ArrayRef<Expr *> VarList,
                                          SourceLocation EndLoc) {
   SmallVector<Expr *, 8> Vars;
   for (auto &RefExpr : VarList) {
-    assert(RefExpr && "NULL expr in OpenMP shared clause.");
-    if (isa<DependentScopeDeclRefExpr>(RefExpr)) {
+    assert(RefExpr && "NULL expr in OpenMP lastprivate clause.");
+    SourceLocation ELoc;
+    SourceRange ERange;
+    Expr *SimpleRefExpr = RefExpr;
+    auto Res = getPrivateItem(*this, SimpleRefExpr, ELoc, ERange);
+    if (Res.second) {
       // It will be analyzed later.
       Vars.push_back(RefExpr);
-      continue;
-    }
-
-    SourceLocation ELoc = RefExpr->getExprLoc();
-    // OpenMP [2.1, C/C++]
-    //  A list item is a variable name.
-    // OpenMP  [2.14.3.2, Restrictions, p.1]
-    //  A variable that is part of another variable (as an array or structure
-    //  element) cannot appear in a shared unless it is a static data member
-    //  of a C++ class.
-    DeclRefExpr *DE = dyn_cast<DeclRefExpr>(RefExpr);
-    if (!DE || !isa<VarDecl>(DE->getDecl())) {
-      Diag(ELoc, diag::err_omp_expected_var_name) << RefExpr->getSourceRange();
-      continue;
     }
-    Decl *D = DE->getDecl();
-    VarDecl *VD = cast<VarDecl>(D);
-
-    QualType Type = VD->getType();
-    if (Type->isDependentType() || Type->isInstantiationDependentType()) {
-      // It will be analyzed later.
-      Vars.push_back(DE);
+    ValueDecl *D = Res.first;
+    if (!D)
       continue;
-    }
 
+    auto *VD = dyn_cast<VarDecl>(D);
     // OpenMP [2.9.1.1, Data-sharing Attribute Rules for Variables Referenced
     // in a Construct]
     //  Variables with the predetermined data-sharing attributes may not be
@@ -7066,17 +8766,22 @@ OMPClause *Sema::ActOnOpenMPSharedClause(ArrayRef<Expr *> VarList,
     //  listed below. For these exceptions only, listing a predetermined
     //  variable in a data-sharing attribute clause is allowed and overrides
     //  the variable's predetermined data-sharing attributes.
-    DSAStackTy::DSAVarData DVar = DSAStack->getTopDSA(VD, false);
+    DSAStackTy::DSAVarData DVar = DSAStack->getTopDSA(D, false);
     if (DVar.CKind != OMPC_unknown && DVar.CKind != OMPC_shared &&
         DVar.RefExpr) {
       Diag(ELoc, diag::err_omp_wrong_dsa) << getOpenMPClauseName(DVar.CKind)
                                           << getOpenMPClauseName(OMPC_shared);
-      ReportOriginalDSA(*this, DSAStack, VD, DVar);
+      ReportOriginalDSA(*this, DSAStack, D, DVar);
       continue;
     }
 
-    DSAStack->addDSA(VD, DE, OMPC_shared);
-    Vars.push_back(DE);
+    DeclRefExpr *Ref = nullptr;
+    if (!VD && IsOpenMPCapturedDecl(D) && !CurContext->isDependentContext())
+      Ref = buildCapture(*this, D, SimpleRefExpr, /*WithInit=*/true);
+    DSAStack->addDSA(D, RefExpr->IgnoreParens(), OMPC_shared, Ref);
+    Vars.push_back((VD || !Ref || CurContext->isDependentContext())
+                       ? RefExpr->IgnoreParens()
+                       : Ref);
   }
 
   if (Vars.empty())
@@ -7097,8 +8802,9 @@ public:
         return false;
       if (DVar.CKind != OMPC_unknown)
         return true;
-      DSAStackTy::DSAVarData DVarPrivate =
-          Stack->hasDSA(VD, isOpenMPPrivate, MatchesAlways(), false);
+      DSAStackTy::DSAVarData DVarPrivate = Stack->hasDSA(
+          VD, isOpenMPPrivate, [](OpenMPDirectiveKind) -> bool { return true; },
+          false);
       if (DVarPrivate.CKind != OMPC_unknown)
         return true;
       return false;
@@ -7116,16 +8822,137 @@ public:
 };
 } // namespace
 
+namespace {
+// Transform MemberExpression for specified FieldDecl of current class to
+// DeclRefExpr to specified OMPCapturedExprDecl.
+class TransformExprToCaptures : public TreeTransform<TransformExprToCaptures> {
+  typedef TreeTransform<TransformExprToCaptures> BaseTransform;
+  ValueDecl *Field;
+  DeclRefExpr *CapturedExpr;
+
+public:
+  TransformExprToCaptures(Sema &SemaRef, ValueDecl *FieldDecl)
+      : BaseTransform(SemaRef), Field(FieldDecl), CapturedExpr(nullptr) {}
+
+  ExprResult TransformMemberExpr(MemberExpr *E) {
+    if (isa<CXXThisExpr>(E->getBase()->IgnoreParenImpCasts()) &&
+        E->getMemberDecl() == Field) {
+      CapturedExpr = buildCapture(SemaRef, Field, E, /*WithInit=*/false);
+      return CapturedExpr;
+    }
+    return BaseTransform::TransformMemberExpr(E);
+  }
+  DeclRefExpr *getCapturedExpr() { return CapturedExpr; }
+};
+} // namespace
+
+template <typename T>
+static T filterLookupForUDR(SmallVectorImpl<UnresolvedSet<8>> &Lookups,
+                            const llvm::function_ref<T(ValueDecl *)> &Gen) {
+  for (auto &Set : Lookups) {
+    for (auto *D : Set) {
+      if (auto Res = Gen(cast<ValueDecl>(D)))
+        return Res;
+    }
+  }
+  return T();
+}
+
+static ExprResult
+buildDeclareReductionRef(Sema &SemaRef, SourceLocation Loc, SourceRange Range,
+                         Scope *S, CXXScopeSpec &ReductionIdScopeSpec,
+                         const DeclarationNameInfo &ReductionId, QualType Ty,
+                         CXXCastPath &BasePath, Expr *UnresolvedReduction) {
+  if (ReductionIdScopeSpec.isInvalid())
+    return ExprError();
+  SmallVector<UnresolvedSet<8>, 4> Lookups;
+  if (S) {
+    LookupResult Lookup(SemaRef, ReductionId, Sema::LookupOMPReductionName);
+    Lookup.suppressDiagnostics();
+    while (S && SemaRef.LookupParsedName(Lookup, S, &ReductionIdScopeSpec)) {
+      auto *D = Lookup.getRepresentativeDecl();
+      do {
+        S = S->getParent();
+      } while (S && !S->isDeclScope(D));
+      if (S)
+        S = S->getParent();
+      Lookups.push_back(UnresolvedSet<8>());
+      Lookups.back().append(Lookup.begin(), Lookup.end());
+      Lookup.clear();
+    }
+  } else if (auto *ULE =
+                 cast_or_null<UnresolvedLookupExpr>(UnresolvedReduction)) {
+    Lookups.push_back(UnresolvedSet<8>());
+    Decl *PrevD = nullptr;
+    for(auto *D : ULE->decls()) {
+      if (D == PrevD)
+        Lookups.push_back(UnresolvedSet<8>());
+      else if (auto *DRD = cast<OMPDeclareReductionDecl>(D))
+        Lookups.back().addDecl(DRD);
+      PrevD = D;
+    }
+  }
+  if (Ty->isDependentType() || Ty->isInstantiationDependentType() ||
+      Ty->containsUnexpandedParameterPack() ||
+      filterLookupForUDR<bool>(Lookups, [](ValueDecl *D) -> bool {
+        return !D->isInvalidDecl() &&
+               (D->getType()->isDependentType() ||
+                D->getType()->isInstantiationDependentType() ||
+                D->getType()->containsUnexpandedParameterPack());
+      })) {
+    UnresolvedSet<8> ResSet;
+    for (auto &Set : Lookups) {
+      ResSet.append(Set.begin(), Set.end());
+      // The last item marks the end of all declarations at the specified scope.
+      ResSet.addDecl(Set[Set.size() - 1]);
+    }
+    return UnresolvedLookupExpr::Create(
+        SemaRef.Context, /*NamingClass=*/nullptr,
+        ReductionIdScopeSpec.getWithLocInContext(SemaRef.Context), ReductionId,
+        /*ADL=*/true, /*Overloaded=*/true, ResSet.begin(), ResSet.end());
+  }
+  if (auto *VD = filterLookupForUDR<ValueDecl *>(
+          Lookups, [&SemaRef, Ty](ValueDecl *D) -> ValueDecl * {
+            if (!D->isInvalidDecl() &&
+                SemaRef.Context.hasSameType(D->getType(), Ty))
+              return D;
+            return nullptr;
+          }))
+    return SemaRef.BuildDeclRefExpr(VD, Ty, VK_LValue, Loc);
+  if (auto *VD = filterLookupForUDR<ValueDecl *>(
+          Lookups, [&SemaRef, Ty, Loc](ValueDecl *D) -> ValueDecl * {
+            if (!D->isInvalidDecl() &&
+                SemaRef.IsDerivedFrom(Loc, Ty, D->getType()) &&
+                !Ty.isMoreQualifiedThan(D->getType()))
+              return D;
+            return nullptr;
+          })) {
+    CXXBasePaths Paths(/*FindAmbiguities=*/true, /*RecordPaths=*/true,
+                       /*DetectVirtual=*/false);
+    if (SemaRef.IsDerivedFrom(Loc, Ty, VD->getType(), Paths)) {
+      if (!Paths.isAmbiguous(SemaRef.Context.getCanonicalType(
+              VD->getType().getUnqualifiedType()))) {
+        if (SemaRef.CheckBaseClassAccess(Loc, VD->getType(), Ty, Paths.front(),
+                                         /*DiagID=*/0) !=
+            Sema::AR_inaccessible) {
+          SemaRef.BuildBasePathArray(Paths, BasePath);
+          return SemaRef.BuildDeclRefExpr(VD, Ty, VK_LValue, Loc);
+        }
+      }
+    }
+  }
+  if (ReductionIdScopeSpec.isSet()) {
+    SemaRef.Diag(Loc, diag::err_omp_not_resolved_reduction_identifier) << Range;
+    return ExprError();
+  }
+  return ExprEmpty();
+}
+
 OMPClause *Sema::ActOnOpenMPReductionClause(
     ArrayRef<Expr *> VarList, SourceLocation StartLoc, SourceLocation LParenLoc,
     SourceLocation ColonLoc, SourceLocation EndLoc,
-    CXXScopeSpec &ReductionIdScopeSpec,
-    const DeclarationNameInfo &ReductionId) {
-  // TODO: Allow scope specification search when 'declare reduction' is
-  // supported.
-  assert(ReductionIdScopeSpec.isEmpty() &&
-         "No support for scoped reduction identifiers yet.");
-
+    CXXScopeSpec &ReductionIdScopeSpec, const DeclarationNameInfo &ReductionId,
+    ArrayRef<Expr *> UnresolvedReductions) {
   auto DN = ReductionId.getName();
   auto OOK = DN.getCXXOverloadedOperator();
   BinaryOperatorKind BOK = BO_Comma;
@@ -7209,48 +9036,21 @@ OMPClause *Sema::ActOnOpenMPReductionClause(
     break;
   }
   SourceRange ReductionIdRange;
-  if (ReductionIdScopeSpec.isValid()) {
+  if (ReductionIdScopeSpec.isValid())
     ReductionIdRange.setBegin(ReductionIdScopeSpec.getBeginLoc());
-  }
   ReductionIdRange.setEnd(ReductionId.getEndLoc());
-  if (BOK == BO_Comma) {
-    // Not allowed reduction identifier is found.
-    Diag(ReductionId.getLocStart(), diag::err_omp_unknown_reduction_identifier)
-        << ReductionIdRange;
-    return nullptr;
-  }
 
   SmallVector<Expr *, 8> Vars;
   SmallVector<Expr *, 8> Privates;
   SmallVector<Expr *, 8> LHSs;
   SmallVector<Expr *, 8> RHSs;
   SmallVector<Expr *, 8> ReductionOps;
+  SmallVector<Decl *, 4> ExprCaptures;
+  SmallVector<Expr *, 4> ExprPostUpdates;
+  auto IR = UnresolvedReductions.begin(), ER = UnresolvedReductions.end();
+  bool FirstIter = true;
   for (auto RefExpr : VarList) {
     assert(RefExpr && "nullptr expr in OpenMP reduction clause.");
-    if (isa<DependentScopeDeclRefExpr>(RefExpr)) {
-      // It will be analyzed later.
-      Vars.push_back(RefExpr);
-      Privates.push_back(nullptr);
-      LHSs.push_back(nullptr);
-      RHSs.push_back(nullptr);
-      ReductionOps.push_back(nullptr);
-      continue;
-    }
-
-    if (RefExpr->isTypeDependent() || RefExpr->isValueDependent() ||
-        RefExpr->isInstantiationDependent() ||
-        RefExpr->containsUnexpandedParameterPack()) {
-      // It will be analyzed later.
-      Vars.push_back(RefExpr);
-      Privates.push_back(nullptr);
-      LHSs.push_back(nullptr);
-      RHSs.push_back(nullptr);
-      ReductionOps.push_back(nullptr);
-      continue;
-    }
-
-    auto ELoc = RefExpr->getExprLoc();
-    auto ERange = RefExpr->getSourceRange();
     // OpenMP [2.1, C/C++]
     //  A list item is a variable or array section, subject to the restrictions
     //  specified in Section 2.4 on page 42 and in each of the sections
@@ -7258,52 +9058,53 @@ OMPClause *Sema::ActOnOpenMPReductionClause(
     // OpenMP  [2.14.3.3, Restrictions, p.1]
     //  A variable that is part of another variable (as an array or
     //  structure element) cannot appear in a private clause.
-    auto *DE = dyn_cast<DeclRefExpr>(RefExpr);
-    auto *ASE = dyn_cast<ArraySubscriptExpr>(RefExpr);
-    auto *OASE = dyn_cast<OMPArraySectionExpr>(RefExpr);
-    if (!ASE && !OASE && (!DE || !isa<VarDecl>(DE->getDecl()))) {
-      Diag(ELoc, diag::err_omp_expected_var_name_or_array_item) << ERange;
-      continue;
+    if (!FirstIter && IR != ER)
+      ++IR;
+    FirstIter = false;
+    SourceLocation ELoc;
+    SourceRange ERange;
+    Expr *SimpleRefExpr = RefExpr;
+    auto Res = getPrivateItem(*this, SimpleRefExpr, ELoc, ERange,
+                              /*AllowArraySection=*/true);
+    if (Res.second) {
+      // It will be analyzed later.
+      Vars.push_back(RefExpr);
+      Privates.push_back(nullptr);
+      LHSs.push_back(nullptr);
+      RHSs.push_back(nullptr);
+      // Try to find 'declare reduction' corresponding construct before using
+      // builtin/overloaded operators.
+      QualType Type = Context.DependentTy;
+      CXXCastPath BasePath;
+      ExprResult DeclareReductionRef = buildDeclareReductionRef(
+          *this, ELoc, ERange, DSAStack->getCurScope(), ReductionIdScopeSpec,
+          ReductionId, Type, BasePath, IR == ER ? nullptr : *IR);
+      if (CurContext->isDependentContext() &&
+          (DeclareReductionRef.isUnset() ||
+           isa<UnresolvedLookupExpr>(DeclareReductionRef.get())))
+        ReductionOps.push_back(DeclareReductionRef.get());
+      else
+        ReductionOps.push_back(nullptr);
     }
+    ValueDecl *D = Res.first;
+    if (!D)
+      continue;
+
     QualType Type;
-    VarDecl *VD = nullptr;
-    if (DE) {
-      auto D = DE->getDecl();
-      VD = cast<VarDecl>(D);
-      Type = VD->getType();
-    } else if (ASE) {
-      Type = ASE->getType();
-      auto *Base = ASE->getBase()->IgnoreParenImpCasts();
-      while (auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
-        Base = TempASE->getBase()->IgnoreParenImpCasts();
-      DE = dyn_cast<DeclRefExpr>(Base);
-      if (DE)
-        VD = dyn_cast<VarDecl>(DE->getDecl());
-      if (!VD) {
-        Diag(Base->getExprLoc(), diag::err_omp_expected_base_var_name)
-            << 0 << Base->getSourceRange();
-        continue;
-      }
-    } else if (OASE) {
+    auto *ASE = dyn_cast<ArraySubscriptExpr>(RefExpr->IgnoreParens());
+    auto *OASE = dyn_cast<OMPArraySectionExpr>(RefExpr->IgnoreParens());
+    if (ASE)
+      Type = ASE->getType().getNonReferenceType();
+    else if (OASE) {
       auto BaseType = OMPArraySectionExpr::getBaseOriginalType(OASE->getBase());
       if (auto *ATy = BaseType->getAsArrayTypeUnsafe())
         Type = ATy->getElementType();
       else
         Type = BaseType->getPointeeType();
-      auto *Base = OASE->getBase()->IgnoreParenImpCasts();
-      while (auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
-        Base = TempOASE->getBase()->IgnoreParenImpCasts();
-      while (auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
-        Base = TempASE->getBase()->IgnoreParenImpCasts();
-      DE = dyn_cast<DeclRefExpr>(Base);
-      if (DE)
-        VD = dyn_cast<VarDecl>(DE->getDecl());
-      if (!VD) {
-        Diag(Base->getExprLoc(), diag::err_omp_expected_base_var_name)
-            << 1 << Base->getSourceRange();
-        continue;
-      }
-    }
+      Type = Type.getNonReferenceType();
+    } else
+      Type = Context.getBaseElementType(D->getType().getNonReferenceType());
+    auto *VD = dyn_cast<VarDecl>(D);
 
     // OpenMP [2.9.3.3, Restrictions, C/C++, p.3]
     //  A variable that appears in a private clause must not have an incomplete
@@ -7312,39 +9113,27 @@ OMPClause *Sema::ActOnOpenMPReductionClause(
                             diag::err_omp_reduction_incomplete_type))
       continue;
     // OpenMP [2.14.3.6, reduction clause, Restrictions]
-    // Arrays may not appear in a reduction clause.
-    if (Type.getNonReferenceType()->isArrayType()) {
-      Diag(ELoc, diag::err_omp_reduction_type_array) << Type << ERange;
-      if (!ASE && !OASE) {
-        bool IsDecl = VD->isThisDeclarationADefinition(Context) ==
-                      VarDecl::DeclarationOnly;
-        Diag(VD->getLocation(),
-             IsDecl ? diag::note_previous_decl : diag::note_defined_here)
-            << VD;
-      }
-      continue;
-    }
-    // OpenMP [2.14.3.6, reduction clause, Restrictions]
     // A list item that appears in a reduction clause must not be
     // const-qualified.
     if (Type.getNonReferenceType().isConstant(Context)) {
       Diag(ELoc, diag::err_omp_const_reduction_list_item)
           << getOpenMPClauseName(OMPC_reduction) << Type << ERange;
       if (!ASE && !OASE) {
-        bool IsDecl = VD->isThisDeclarationADefinition(Context) ==
-                      VarDecl::DeclarationOnly;
-        Diag(VD->getLocation(),
+        bool IsDecl = !VD ||
+                      VD->isThisDeclarationADefinition(Context) ==
+                          VarDecl::DeclarationOnly;
+        Diag(D->getLocation(),
              IsDecl ? diag::note_previous_decl : diag::note_defined_here)
-            << VD;
+            << D;
       }
       continue;
     }
     // OpenMP [2.9.3.6, Restrictions, C/C++, p.4]
     //  If a list-item is a reference type then it must bind to the same object
     //  for all threads of the team.
-    if (!ASE && !OASE) {
+    if (!ASE && !OASE && VD) {
       VarDecl *VDDef = VD->getDefinition();
-      if (Type->isReferenceType() && VDDef) {
+      if (VD->getType()->isReferenceType() && VDDef && VDDef->hasInit()) {
         DSARefChecker Check(DSAStack);
         if (Check.Visit(VDDef->getInit())) {
           Diag(ELoc, diag::err_omp_reduction_ref_type_arg) << ERange;
@@ -7353,40 +9142,7 @@ OMPClause *Sema::ActOnOpenMPReductionClause(
         }
       }
     }
-    // OpenMP [2.14.3.6, reduction clause, Restrictions]
-    // The type of a list item that appears in a reduction clause must be valid
-    // for the reduction-identifier. For a max or min reduction in C, the type
-    // of the list item must be an allowed arithmetic data type: char, int,
-    // float, double, or _Bool, possibly modified with long, short, signed, or
-    // unsigned. For a max or min reduction in C++, the type of the list item
-    // must be an allowed arithmetic data type: char, wchar_t, int, float,
-    // double, or bool, possibly modified with long, short, signed, or unsigned.
-    if ((BOK == BO_GT || BOK == BO_LT) &&
-        !(Type->isScalarType() ||
-          (getLangOpts().CPlusPlus && Type->isArithmeticType()))) {
-      Diag(ELoc, diag::err_omp_clause_not_arithmetic_type_arg)
-          << getLangOpts().CPlusPlus;
-      if (!ASE && !OASE) {
-        bool IsDecl = VD->isThisDeclarationADefinition(Context) ==
-                      VarDecl::DeclarationOnly;
-        Diag(VD->getLocation(),
-             IsDecl ? diag::note_previous_decl : diag::note_defined_here)
-            << VD;
-      }
-      continue;
-    }
-    if ((BOK == BO_OrAssign || BOK == BO_AndAssign || BOK == BO_XorAssign) &&
-        !getLangOpts().CPlusPlus && Type->isFloatingType()) {
-      Diag(ELoc, diag::err_omp_clause_floating_type_arg);
-      if (!ASE && !OASE) {
-        bool IsDecl = VD->isThisDeclarationADefinition(Context) ==
-                      VarDecl::DeclarationOnly;
-        Diag(VD->getLocation(),
-             IsDecl ? diag::note_previous_decl : diag::note_defined_here)
-            << VD;
-      }
-      continue;
-    }
+
     // OpenMP [2.14.1.1, Data-sharing Attribute Rules for Variables Referenced
     // in a Construct]
     //  Variables with the predetermined data-sharing attributes may not be
@@ -7399,18 +9155,17 @@ OMPClause *Sema::ActOnOpenMPReductionClause(
     //  but a list item can appear only once in the reduction clauses for that
     //  directive.
     DSAStackTy::DSAVarData DVar;
-    DVar = DSAStack->getTopDSA(VD, false);
+    DVar = DSAStack->getTopDSA(D, false);
     if (DVar.CKind == OMPC_reduction) {
       Diag(ELoc, diag::err_omp_once_referenced)
           << getOpenMPClauseName(OMPC_reduction);
-      if (DVar.RefExpr) {
+      if (DVar.RefExpr)
         Diag(DVar.RefExpr->getExprLoc(), diag::note_omp_referenced);
-      }
     } else if (DVar.CKind != OMPC_unknown) {
       Diag(ELoc, diag::err_omp_wrong_dsa)
           << getOpenMPClauseName(DVar.CKind)
           << getOpenMPClauseName(OMPC_reduction);
-      ReportOriginalDSA(*this, DSAStack, VD, DVar);
+      ReportOriginalDSA(*this, DSAStack, D, DVar);
       continue;
     }
 
@@ -7421,24 +9176,91 @@ OMPClause *Sema::ActOnOpenMPReductionClause(
     OpenMPDirectiveKind CurrDir = DSAStack->getCurrentDirective();
     if (isOpenMPWorksharingDirective(CurrDir) &&
         !isOpenMPParallelDirective(CurrDir)) {
-      DVar = DSAStack->getImplicitDSA(VD, true);
+      DVar = DSAStack->getImplicitDSA(D, true);
       if (DVar.CKind != OMPC_shared) {
         Diag(ELoc, diag::err_omp_required_access)
             << getOpenMPClauseName(OMPC_reduction)
             << getOpenMPClauseName(OMPC_shared);
-        ReportOriginalDSA(*this, DSAStack, VD, DVar);
+        ReportOriginalDSA(*this, DSAStack, D, DVar);
+        continue;
+      }
+    }
+
+    // Try to find 'declare reduction' corresponding construct before using
+    // builtin/overloaded operators.
+    CXXCastPath BasePath;
+    ExprResult DeclareReductionRef = buildDeclareReductionRef(
+        *this, ELoc, ERange, DSAStack->getCurScope(), ReductionIdScopeSpec,
+        ReductionId, Type, BasePath, IR == ER ? nullptr : *IR);
+    if (DeclareReductionRef.isInvalid())
+      continue;
+    if (CurContext->isDependentContext() &&
+        (DeclareReductionRef.isUnset() ||
+         isa<UnresolvedLookupExpr>(DeclareReductionRef.get()))) {
+      Vars.push_back(RefExpr);
+      Privates.push_back(nullptr);
+      LHSs.push_back(nullptr);
+      RHSs.push_back(nullptr);
+      ReductionOps.push_back(DeclareReductionRef.get());
+      continue;
+    }
+    if (BOK == BO_Comma && DeclareReductionRef.isUnset()) {
+      // Not allowed reduction identifier is found.
+      Diag(ReductionId.getLocStart(),
+           diag::err_omp_unknown_reduction_identifier)
+          << Type << ReductionIdRange;
+      continue;
+    }
+
+    // OpenMP [2.14.3.6, reduction clause, Restrictions]
+    // The type of a list item that appears in a reduction clause must be valid
+    // for the reduction-identifier. For a max or min reduction in C, the type
+    // of the list item must be an allowed arithmetic data type: char, int,
+    // float, double, or _Bool, possibly modified with long, short, signed, or
+    // unsigned. For a max or min reduction in C++, the type of the list item
+    // must be an allowed arithmetic data type: char, wchar_t, int, float,
+    // double, or bool, possibly modified with long, short, signed, or unsigned.
+    if (DeclareReductionRef.isUnset()) {
+      if ((BOK == BO_GT || BOK == BO_LT) &&
+          !(Type->isScalarType() ||
+            (getLangOpts().CPlusPlus && Type->isArithmeticType()))) {
+        Diag(ELoc, diag::err_omp_clause_not_arithmetic_type_arg)
+            << getLangOpts().CPlusPlus;
+        if (!ASE && !OASE) {
+          bool IsDecl = !VD ||
+                        VD->isThisDeclarationADefinition(Context) ==
+                            VarDecl::DeclarationOnly;
+          Diag(D->getLocation(),
+               IsDecl ? diag::note_previous_decl : diag::note_defined_here)
+              << D;
+        }
+        continue;
+      }
+      if ((BOK == BO_OrAssign || BOK == BO_AndAssign || BOK == BO_XorAssign) &&
+          !getLangOpts().CPlusPlus && Type->isFloatingType()) {
+        Diag(ELoc, diag::err_omp_clause_floating_type_arg);
+        if (!ASE && !OASE) {
+          bool IsDecl = !VD ||
+                        VD->isThisDeclarationADefinition(Context) ==
+                            VarDecl::DeclarationOnly;
+          Diag(D->getLocation(),
+               IsDecl ? diag::note_previous_decl : diag::note_defined_here)
+              << D;
+        }
         continue;
       }
     }
 
     Type = Type.getNonLValueExprType(Context).getUnqualifiedType();
     auto *LHSVD = buildVarDecl(*this, ELoc, Type, ".reduction.lhs",
-                               VD->hasAttrs() ? &VD->getAttrs() : nullptr);
-    auto *RHSVD = buildVarDecl(*this, ELoc, Type, VD->getName(),
-                               VD->hasAttrs() ? &VD->getAttrs() : nullptr);
+                               D->hasAttrs() ? &D->getAttrs() : nullptr);
+    auto *RHSVD = buildVarDecl(*this, ELoc, Type, D->getName(),
+                               D->hasAttrs() ? &D->getAttrs() : nullptr);
     auto PrivateTy = Type;
-    if (OASE) {
-      // For array sections only:
+    if (OASE ||
+        (!ASE &&
+         D->getType().getNonReferenceType()->isVariablyModifiedType())) {
+      // For arays/array sections only:
       // Create pseudo array type for private copy. The size for this array will
       // be generated during codegen.
       // For array subscripts or single variables Private Ty is the same as Type
@@ -7447,162 +9269,227 @@ OMPClause *Sema::ActOnOpenMPReductionClause(
           Type, new (Context) OpaqueValueExpr(SourceLocation(),
                                               Context.getSizeType(), VK_RValue),
           ArrayType::Normal, /*IndexTypeQuals=*/0, SourceRange());
-    }
+    } else if (!ASE && !OASE &&
+               Context.getAsArrayType(D->getType().getNonReferenceType()))
+      PrivateTy = D->getType().getNonReferenceType();
     // Private copy.
-    auto *PrivateVD = buildVarDecl(*this, ELoc, PrivateTy, VD->getName(),
-                                   VD->hasAttrs() ? &VD->getAttrs() : nullptr);
+    auto *PrivateVD = buildVarDecl(*this, ELoc, PrivateTy, D->getName(),
+                                   D->hasAttrs() ? &D->getAttrs() : nullptr);
     // Add initializer for private variable.
     Expr *Init = nullptr;
-    switch (BOK) {
-    case BO_Add:
-    case BO_Xor:
-    case BO_Or:
-    case BO_LOr:
-      // '+', '-', '^', '|', '||' reduction ops - initializer is '0'.
-      if (Type->isScalarType() || Type->isAnyComplexType()) {
-        Init = ActOnIntegerConstant(ELoc, /*Val=*/0).get();
-      }
-      break;
-    case BO_Mul:
-    case BO_LAnd:
-      if (Type->isScalarType() || Type->isAnyComplexType()) {
-        // '*' and '&&' reduction ops - initializer is '1'.
-        Init = ActOnIntegerConstant(ELoc, /*Val=*/1).get();
-      }
-      break;
-    case BO_And: {
-      // '&' reduction op - initializer is '~0'.
-      QualType OrigType = Type;
-      if (auto *ComplexTy = OrigType->getAs<ComplexType>()) {
-        Type = ComplexTy->getElementType();
-      }
-      if (Type->isRealFloatingType()) {
-        llvm::APFloat InitValue =
-            llvm::APFloat::getAllOnesValue(Context.getTypeSize(Type),
-                                           /*isIEEE=*/true);
-        Init = FloatingLiteral::Create(Context, InitValue, /*isexact=*/true,
-                                       Type, ELoc);
-      } else if (Type->isScalarType()) {
-        auto Size = Context.getTypeSize(Type);
-        QualType IntTy = Context.getIntTypeForBitwidth(Size, /*Signed=*/0);
-        llvm::APInt InitValue = llvm::APInt::getAllOnesValue(Size);
-        Init = IntegerLiteral::Create(Context, InitValue, IntTy, ELoc);
+    auto *LHSDRE = buildDeclRefExpr(*this, LHSVD, Type, ELoc);
+    auto *RHSDRE = buildDeclRefExpr(*this, RHSVD, Type, ELoc);
+    if (DeclareReductionRef.isUsable()) {
+      auto *DRDRef = DeclareReductionRef.getAs<DeclRefExpr>();
+      auto *DRD = cast<OMPDeclareReductionDecl>(DRDRef->getDecl());
+      if (DRD->getInitializer()) {
+        Init = DRDRef;
+        RHSVD->setInit(DRDRef);
+        RHSVD->setInitStyle(VarDecl::CallInit);
       }
-      if (Init && OrigType->isAnyComplexType()) {
-        // Init = 0xFFFF + 0xFFFFi;
-        auto *Im = new (Context) ImaginaryLiteral(Init, OrigType);
-        Init = CreateBuiltinBinOp(ELoc, BO_Add, Init, Im).get();
+    } else {
+      switch (BOK) {
+      case BO_Add:
+      case BO_Xor:
+      case BO_Or:
+      case BO_LOr:
+        // '+', '-', '^', '|', '||' reduction ops - initializer is '0'.
+        if (Type->isScalarType() || Type->isAnyComplexType())
+          Init = ActOnIntegerConstant(ELoc, /*Val=*/0).get();
+        break;
+      case BO_Mul:
+      case BO_LAnd:
+        if (Type->isScalarType() || Type->isAnyComplexType()) {
+          // '*' and '&&' reduction ops - initializer is '1'.
+          Init = ActOnIntegerConstant(ELoc, /*Val=*/1).get();
+        }
+        break;
+      case BO_And: {
+        // '&' reduction op - initializer is '~0'.
+        QualType OrigType = Type;
+        if (auto *ComplexTy = OrigType->getAs<ComplexType>())
+          Type = ComplexTy->getElementType();
+        if (Type->isRealFloatingType()) {
+          llvm::APFloat InitValue =
+              llvm::APFloat::getAllOnesValue(Context.getTypeSize(Type),
+                                             /*isIEEE=*/true);
+          Init = FloatingLiteral::Create(Context, InitValue, /*isexact=*/true,
+                                         Type, ELoc);
+        } else if (Type->isScalarType()) {
+          auto Size = Context.getTypeSize(Type);
+          QualType IntTy = Context.getIntTypeForBitwidth(Size, /*Signed=*/0);
+          llvm::APInt InitValue = llvm::APInt::getAllOnesValue(Size);
+          Init = IntegerLiteral::Create(Context, InitValue, IntTy, ELoc);
+        }
+        if (Init && OrigType->isAnyComplexType()) {
+          // Init = 0xFFFF + 0xFFFFi;
+          auto *Im = new (Context) ImaginaryLiteral(Init, OrigType);
+          Init = CreateBuiltinBinOp(ELoc, BO_Add, Init, Im).get();
+        }
+        Type = OrigType;
+        break;
       }
-      Type = OrigType;
-      break;
-    }
-    case BO_LT:
-    case BO_GT: {
-      // 'min' reduction op - initializer is 'Largest representable number in
-      // the reduction list item type'.
-      // 'max' reduction op - initializer is 'Least representable number in
-      // the reduction list item type'.
-      if (Type->isIntegerType() || Type->isPointerType()) {
-        bool IsSigned = Type->hasSignedIntegerRepresentation();
-        auto Size = Context.getTypeSize(Type);
-        QualType IntTy =
-            Context.getIntTypeForBitwidth(Size, /*Signed=*/IsSigned);
-        llvm::APInt InitValue =
-            (BOK != BO_LT)
-                ? IsSigned ? llvm::APInt::getSignedMinValue(Size)
-                           : llvm::APInt::getMinValue(Size)
-                : IsSigned ? llvm::APInt::getSignedMaxValue(Size)
-                           : llvm::APInt::getMaxValue(Size);
-        Init = IntegerLiteral::Create(Context, InitValue, IntTy, ELoc);
-        if (Type->isPointerType()) {
-          // Cast to pointer type.
-          auto CastExpr = BuildCStyleCastExpr(
-              SourceLocation(), Context.getTrivialTypeSourceInfo(Type, ELoc),
-              SourceLocation(), Init);
-          if (CastExpr.isInvalid())
-            continue;
-          Init = CastExpr.get();
+      case BO_LT:
+      case BO_GT: {
+        // 'min' reduction op - initializer is 'Largest representable number in
+        // the reduction list item type'.
+        // 'max' reduction op - initializer is 'Least representable number in
+        // the reduction list item type'.
+        if (Type->isIntegerType() || Type->isPointerType()) {
+          bool IsSigned = Type->hasSignedIntegerRepresentation();
+          auto Size = Context.getTypeSize(Type);
+          QualType IntTy =
+              Context.getIntTypeForBitwidth(Size, /*Signed=*/IsSigned);
+          llvm::APInt InitValue =
+              (BOK != BO_LT)
+                  ? IsSigned ? llvm::APInt::getSignedMinValue(Size)
+                             : llvm::APInt::getMinValue(Size)
+                  : IsSigned ? llvm::APInt::getSignedMaxValue(Size)
+                             : llvm::APInt::getMaxValue(Size);
+          Init = IntegerLiteral::Create(Context, InitValue, IntTy, ELoc);
+          if (Type->isPointerType()) {
+            // Cast to pointer type.
+            auto CastExpr = BuildCStyleCastExpr(
+                SourceLocation(), Context.getTrivialTypeSourceInfo(Type, ELoc),
+                SourceLocation(), Init);
+            if (CastExpr.isInvalid())
+              continue;
+            Init = CastExpr.get();
+          }
+        } else if (Type->isRealFloatingType()) {
+          llvm::APFloat InitValue = llvm::APFloat::getLargest(
+              Context.getFloatTypeSemantics(Type), BOK != BO_LT);
+          Init = FloatingLiteral::Create(Context, InitValue, /*isexact=*/true,
+                                         Type, ELoc);
         }
-      } else if (Type->isRealFloatingType()) {
-        llvm::APFloat InitValue = llvm::APFloat::getLargest(
-            Context.getFloatTypeSemantics(Type), BOK != BO_LT);
-        Init = FloatingLiteral::Create(Context, InitValue, /*isexact=*/true,
-                                       Type, ELoc);
+        break;
+      }
+      case BO_PtrMemD:
+      case BO_PtrMemI:
+      case BO_MulAssign:
+      case BO_Div:
+      case BO_Rem:
+      case BO_Sub:
+      case BO_Shl:
+      case BO_Shr:
+      case BO_LE:
+      case BO_GE:
+      case BO_EQ:
+      case BO_NE:
+      case BO_AndAssign:
+      case BO_XorAssign:
+      case BO_OrAssign:
+      case BO_Assign:
+      case BO_AddAssign:
+      case BO_SubAssign:
+      case BO_DivAssign:
+      case BO_RemAssign:
+      case BO_ShlAssign:
+      case BO_ShrAssign:
+      case BO_Comma:
+        llvm_unreachable("Unexpected reduction operation");
       }
-      break;
     }
-    case BO_PtrMemD:
-    case BO_PtrMemI:
-    case BO_MulAssign:
-    case BO_Div:
-    case BO_Rem:
-    case BO_Sub:
-    case BO_Shl:
-    case BO_Shr:
-    case BO_LE:
-    case BO_GE:
-    case BO_EQ:
-    case BO_NE:
-    case BO_AndAssign:
-    case BO_XorAssign:
-    case BO_OrAssign:
-    case BO_Assign:
-    case BO_AddAssign:
-    case BO_SubAssign:
-    case BO_DivAssign:
-    case BO_RemAssign:
-    case BO_ShlAssign:
-    case BO_ShrAssign:
-    case BO_Comma:
-      llvm_unreachable("Unexpected reduction operation");
-    }
-    if (Init) {
+    if (Init && DeclareReductionRef.isUnset()) {
       AddInitializerToDecl(RHSVD, Init, /*DirectInit=*/false,
                            /*TypeMayContainAuto=*/false);
-    } else
+    } else if (!Init)
       ActOnUninitializedDecl(RHSVD, /*TypeMayContainAuto=*/false);
-    if (!RHSVD->hasInit()) {
+    if (RHSVD->isInvalidDecl())
+      continue;
+    if (!RHSVD->hasInit() && DeclareReductionRef.isUnset()) {
       Diag(ELoc, diag::err_omp_reduction_id_not_compatible) << Type
                                                             << ReductionIdRange;
-      if (VD) {
-        bool IsDecl = VD->isThisDeclarationADefinition(Context) ==
-                      VarDecl::DeclarationOnly;
-        Diag(VD->getLocation(),
-             IsDecl ? diag::note_previous_decl : diag::note_defined_here)
-            << VD;
-      }
+      bool IsDecl =
+          !VD ||
+          VD->isThisDeclarationADefinition(Context) == VarDecl::DeclarationOnly;
+      Diag(D->getLocation(),
+           IsDecl ? diag::note_previous_decl : diag::note_defined_here)
+          << D;
       continue;
     }
     // Store initializer for single element in private copy. Will be used during
     // codegen.
     PrivateVD->setInit(RHSVD->getInit());
     PrivateVD->setInitStyle(RHSVD->getInitStyle());
-    auto *LHSDRE = buildDeclRefExpr(*this, LHSVD, Type, ELoc);
-    auto *RHSDRE = buildDeclRefExpr(*this, RHSVD, Type, ELoc);
     auto *PrivateDRE = buildDeclRefExpr(*this, PrivateVD, PrivateTy, ELoc);
-    ExprResult ReductionOp =
-        BuildBinOp(DSAStack->getCurScope(), ReductionId.getLocStart(), BOK,
-                   LHSDRE, RHSDRE);
-    if (ReductionOp.isUsable()) {
-      if (BOK != BO_LT && BOK != BO_GT) {
-        ReductionOp =
-            BuildBinOp(DSAStack->getCurScope(), ReductionId.getLocStart(),
-                       BO_Assign, LHSDRE, ReductionOp.get());
-      } else {
-        auto *ConditionalOp = new (Context) ConditionalOperator(
-            ReductionOp.get(), SourceLocation(), LHSDRE, SourceLocation(),
-            RHSDRE, Type, VK_LValue, OK_Ordinary);
-        ReductionOp =
-            BuildBinOp(DSAStack->getCurScope(), ReductionId.getLocStart(),
-                       BO_Assign, LHSDRE, ConditionalOp);
+    ExprResult ReductionOp;
+    if (DeclareReductionRef.isUsable()) {
+      QualType RedTy = DeclareReductionRef.get()->getType();
+      QualType PtrRedTy = Context.getPointerType(RedTy);
+      ExprResult LHS = CreateBuiltinUnaryOp(ELoc, UO_AddrOf, LHSDRE);
+      ExprResult RHS = CreateBuiltinUnaryOp(ELoc, UO_AddrOf, RHSDRE);
+      if (!BasePath.empty()) {
+        LHS = DefaultLvalueConversion(LHS.get());
+        RHS = DefaultLvalueConversion(RHS.get());
+        LHS = ImplicitCastExpr::Create(Context, PtrRedTy,
+                                       CK_UncheckedDerivedToBase, LHS.get(),
+                                       &BasePath, LHS.get()->getValueKind());
+        RHS = ImplicitCastExpr::Create(Context, PtrRedTy,
+                                       CK_UncheckedDerivedToBase, RHS.get(),
+                                       &BasePath, RHS.get()->getValueKind());
       }
-      ReductionOp = ActOnFinishFullExpr(ReductionOp.get());
+      FunctionProtoType::ExtProtoInfo EPI;
+      QualType Params[] = {PtrRedTy, PtrRedTy};
+      QualType FnTy = Context.getFunctionType(Context.VoidTy, Params, EPI);
+      auto *OVE = new (Context) OpaqueValueExpr(
+          ELoc, Context.getPointerType(FnTy), VK_RValue, OK_Ordinary,
+          DefaultLvalueConversion(DeclareReductionRef.get()).get());
+      Expr *Args[] = {LHS.get(), RHS.get()};
+      ReductionOp = new (Context)
+          CallExpr(Context, OVE, Args, Context.VoidTy, VK_RValue, ELoc);
+    } else {
+      ReductionOp = BuildBinOp(DSAStack->getCurScope(),
+                               ReductionId.getLocStart(), BOK, LHSDRE, RHSDRE);
+      if (ReductionOp.isUsable()) {
+        if (BOK != BO_LT && BOK != BO_GT) {
+          ReductionOp =
+              BuildBinOp(DSAStack->getCurScope(), ReductionId.getLocStart(),
+                         BO_Assign, LHSDRE, ReductionOp.get());
+        } else {
+          auto *ConditionalOp = new (Context) ConditionalOperator(
+              ReductionOp.get(), SourceLocation(), LHSDRE, SourceLocation(),
+              RHSDRE, Type, VK_LValue, OK_Ordinary);
+          ReductionOp =
+              BuildBinOp(DSAStack->getCurScope(), ReductionId.getLocStart(),
+                         BO_Assign, LHSDRE, ConditionalOp);
+        }
+        ReductionOp = ActOnFinishFullExpr(ReductionOp.get());
+      }
+      if (ReductionOp.isInvalid())
+        continue;
     }
-    if (ReductionOp.isInvalid())
-      continue;
 
-    DSAStack->addDSA(VD, DE, OMPC_reduction);
-    Vars.push_back(RefExpr);
+    DeclRefExpr *Ref = nullptr;
+    Expr *VarsExpr = RefExpr->IgnoreParens();
+    if (!VD && !CurContext->isDependentContext()) {
+      if (ASE || OASE) {
+        TransformExprToCaptures RebuildToCapture(*this, D);
+        VarsExpr =
+            RebuildToCapture.TransformExpr(RefExpr->IgnoreParens()).get();
+        Ref = RebuildToCapture.getCapturedExpr();
+      } else {
+        VarsExpr = Ref =
+            buildCapture(*this, D, SimpleRefExpr, /*WithInit=*/false);
+      }
+      if (!IsOpenMPCapturedDecl(D)) {
+        ExprCaptures.push_back(Ref->getDecl());
+        if (Ref->getDecl()->hasAttr<OMPCaptureNoInitAttr>()) {
+          ExprResult RefRes = DefaultLvalueConversion(Ref);
+          if (!RefRes.isUsable())
+            continue;
+          ExprResult PostUpdateRes =
+              BuildBinOp(DSAStack->getCurScope(), ELoc, BO_Assign,
+                         SimpleRefExpr, RefRes.get());
+          if (!PostUpdateRes.isUsable())
+            continue;
+          ExprPostUpdates.push_back(
+              IgnoredValueConversions(PostUpdateRes.get()).get());
+        }
+      }
+    }
+    DSAStack->addDSA(D, RefExpr->IgnoreParens(), OMPC_reduction, Ref);
+    Vars.push_back(VarsExpr);
     Privates.push_back(PrivateDRE);
     LHSs.push_back(LHSDRE);
     RHSs.push_back(RHSDRE);
@@ -7615,7 +9502,67 @@ OMPClause *Sema::ActOnOpenMPReductionClause(
   return OMPReductionClause::Create(
       Context, StartLoc, LParenLoc, ColonLoc, EndLoc, Vars,
       ReductionIdScopeSpec.getWithLocInContext(Context), ReductionId, Privates,
-      LHSs, RHSs, ReductionOps);
+      LHSs, RHSs, ReductionOps, buildPreInits(Context, ExprCaptures),
+      buildPostUpdate(*this, ExprPostUpdates));
+}
+
+bool Sema::CheckOpenMPLinearModifier(OpenMPLinearClauseKind LinKind,
+                                     SourceLocation LinLoc) {
+  if ((!LangOpts.CPlusPlus && LinKind != OMPC_LINEAR_val) ||
+      LinKind == OMPC_LINEAR_unknown) {
+    Diag(LinLoc, diag::err_omp_wrong_linear_modifier) << LangOpts.CPlusPlus;
+    return true;
+  }
+  return false;
+}
+
+bool Sema::CheckOpenMPLinearDecl(ValueDecl *D, SourceLocation ELoc,
+                                 OpenMPLinearClauseKind LinKind,
+                                 QualType Type) {
+  auto *VD = dyn_cast_or_null<VarDecl>(D);
+  // A variable must not have an incomplete type or a reference type.
+  if (RequireCompleteType(ELoc, Type, diag::err_omp_linear_incomplete_type))
+    return true;
+  if ((LinKind == OMPC_LINEAR_uval || LinKind == OMPC_LINEAR_ref) &&
+      !Type->isReferenceType()) {
+    Diag(ELoc, diag::err_omp_wrong_linear_modifier_non_reference)
+        << Type << getOpenMPSimpleClauseTypeName(OMPC_linear, LinKind);
+    return true;
+  }
+  Type = Type.getNonReferenceType();
+
+  // A list item must not be const-qualified.
+  if (Type.isConstant(Context)) {
+    Diag(ELoc, diag::err_omp_const_variable)
+        << getOpenMPClauseName(OMPC_linear);
+    if (D) {
+      bool IsDecl =
+          !VD ||
+          VD->isThisDeclarationADefinition(Context) == VarDecl::DeclarationOnly;
+      Diag(D->getLocation(),
+           IsDecl ? diag::note_previous_decl : diag::note_defined_here)
+          << D;
+    }
+    return true;
+  }
+
+  // A list item must be of integral or pointer type.
+  Type = Type.getUnqualifiedType().getCanonicalType();
+  const auto *Ty = Type.getTypePtrOrNull();
+  if (!Ty || (!Ty->isDependentType() && !Ty->isIntegralType(Context) &&
+              !Ty->isPointerType())) {
+    Diag(ELoc, diag::err_omp_linear_expected_int_or_ptr) << Type;
+    if (D) {
+      bool IsDecl =
+          !VD ||
+          VD->isThisDeclarationADefinition(Context) == VarDecl::DeclarationOnly;
+      Diag(D->getLocation(),
+           IsDecl ? diag::note_previous_decl : diag::note_defined_here)
+          << D;
+    }
+    return true;
+  }
+  return false;
 }
 
 OMPClause *Sema::ActOnOpenMPLinearClause(
@@ -7625,121 +9572,84 @@ OMPClause *Sema::ActOnOpenMPLinearClause(
   SmallVector<Expr *, 8> Vars;
   SmallVector<Expr *, 8> Privates;
   SmallVector<Expr *, 8> Inits;
-  if ((!LangOpts.CPlusPlus && LinKind != OMPC_LINEAR_val) ||
-      LinKind == OMPC_LINEAR_unknown) {
-    Diag(LinLoc, diag::err_omp_wrong_linear_modifier) << LangOpts.CPlusPlus;
+  SmallVector<Decl *, 4> ExprCaptures;
+  SmallVector<Expr *, 4> ExprPostUpdates;
+  if (CheckOpenMPLinearModifier(LinKind, LinLoc))
     LinKind = OMPC_LINEAR_val;
-  }
   for (auto &RefExpr : VarList) {
     assert(RefExpr && "NULL expr in OpenMP linear clause.");
-    if (isa<DependentScopeDeclRefExpr>(RefExpr)) {
+    SourceLocation ELoc;
+    SourceRange ERange;
+    Expr *SimpleRefExpr = RefExpr;
+    auto Res = getPrivateItem(*this, SimpleRefExpr, ELoc, ERange,
+                              /*AllowArraySection=*/false);
+    if (Res.second) {
       // It will be analyzed later.
       Vars.push_back(RefExpr);
       Privates.push_back(nullptr);
       Inits.push_back(nullptr);
-      continue;
     }
-
-    // OpenMP [2.14.3.7, linear clause]
-    // A list item that appears in a linear clause is subject to the private
-    // clause semantics described in Section 2.14.3.3 on page 159 except as
-    // noted. In addition, the value of the new list item on each iteration
-    // of the associated loop(s) corresponds to the value of the original
-    // list item before entering the construct plus the logical number of
-    // the iteration times linear-step.
-
-    SourceLocation ELoc = RefExpr->getExprLoc();
-    // OpenMP [2.1, C/C++]
-    //  A list item is a variable name.
-    // OpenMP  [2.14.3.3, Restrictions, p.1]
-    //  A variable that is part of another variable (as an array or
-    //  structure element) cannot appear in a private clause.
-    DeclRefExpr *DE = dyn_cast<DeclRefExpr>(RefExpr);
-    if (!DE || !isa<VarDecl>(DE->getDecl())) {
-      Diag(ELoc, diag::err_omp_expected_var_name) << RefExpr->getSourceRange();
+    ValueDecl *D = Res.first;
+    if (!D)
       continue;
-    }
 
-    VarDecl *VD = cast<VarDecl>(DE->getDecl());
+    QualType Type = D->getType();
+    auto *VD = dyn_cast<VarDecl>(D);
 
     // OpenMP [2.14.3.7, linear clause]
     //  A list-item cannot appear in more than one linear clause.
     //  A list-item that appears in a linear clause cannot appear in any
     //  other data-sharing attribute clause.
-    DSAStackTy::DSAVarData DVar = DSAStack->getTopDSA(VD, false);
+    DSAStackTy::DSAVarData DVar = DSAStack->getTopDSA(D, false);
     if (DVar.RefExpr) {
       Diag(ELoc, diag::err_omp_wrong_dsa) << getOpenMPClauseName(DVar.CKind)
                                           << getOpenMPClauseName(OMPC_linear);
-      ReportOriginalDSA(*this, DSAStack, VD, DVar);
-      continue;
-    }
-
-    QualType QType = VD->getType();
-    if (QType->isDependentType() || QType->isInstantiationDependentType()) {
-      // It will be analyzed later.
-      Vars.push_back(DE);
-      Privates.push_back(nullptr);
-      Inits.push_back(nullptr);
+      ReportOriginalDSA(*this, DSAStack, D, DVar);
       continue;
     }
 
-    // A variable must not have an incomplete type or a reference type.
-    if (RequireCompleteType(ELoc, QType,
-                            diag::err_omp_linear_incomplete_type)) {
+    if (CheckOpenMPLinearDecl(D, ELoc, LinKind, Type))
       continue;
-    }
-    if ((LinKind == OMPC_LINEAR_uval || LinKind == OMPC_LINEAR_ref) &&
-        !QType->isReferenceType()) {
-      Diag(ELoc, diag::err_omp_wrong_linear_modifier_non_reference)
-          << QType << getOpenMPSimpleClauseTypeName(OMPC_linear, LinKind);
-      continue;
-    }
-    QType = QType.getNonReferenceType();
-
-    // A list item must not be const-qualified.
-    if (QType.isConstant(Context)) {
-      Diag(ELoc, diag::err_omp_const_variable)
-          << getOpenMPClauseName(OMPC_linear);
-      bool IsDecl =
-          VD->isThisDeclarationADefinition(Context) == VarDecl::DeclarationOnly;
-      Diag(VD->getLocation(),
-           IsDecl ? diag::note_previous_decl : diag::note_defined_here)
-          << VD;
-      continue;
-    }
-
-    // A list item must be of integral or pointer type.
-    QType = QType.getUnqualifiedType().getCanonicalType();
-    const Type *Ty = QType.getTypePtrOrNull();
-    if (!Ty || (!Ty->isDependentType() && !Ty->isIntegralType(Context) &&
-                !Ty->isPointerType())) {
-      Diag(ELoc, diag::err_omp_linear_expected_int_or_ptr) << QType;
-      bool IsDecl =
-          VD->isThisDeclarationADefinition(Context) == VarDecl::DeclarationOnly;
-      Diag(VD->getLocation(),
-           IsDecl ? diag::note_previous_decl : diag::note_defined_here)
-          << VD;
-      continue;
-    }
+    Type = Type.getNonReferenceType().getUnqualifiedType().getCanonicalType();
 
     // Build private copy of original var.
-    auto *Private = buildVarDecl(*this, ELoc, QType, VD->getName(),
-                                 VD->hasAttrs() ? &VD->getAttrs() : nullptr);
-    auto *PrivateRef = buildDeclRefExpr(
-        *this, Private, DE->getType().getUnqualifiedType(), DE->getExprLoc());
+    auto *Private = buildVarDecl(*this, ELoc, Type, D->getName(),
+                                 D->hasAttrs() ? &D->getAttrs() : nullptr);
+    auto *PrivateRef = buildDeclRefExpr(*this, Private, Type, ELoc);
     // Build var to save initial value.
-    VarDecl *Init = buildVarDecl(*this, ELoc, QType, ".linear.start");
+    VarDecl *Init = buildVarDecl(*this, ELoc, Type, ".linear.start");
     Expr *InitExpr;
+    DeclRefExpr *Ref = nullptr;
+    if (!VD && !CurContext->isDependentContext()) {
+      Ref = buildCapture(*this, D, SimpleRefExpr, /*WithInit=*/false);
+      if (!IsOpenMPCapturedDecl(D)) {
+        ExprCaptures.push_back(Ref->getDecl());
+        if (Ref->getDecl()->hasAttr<OMPCaptureNoInitAttr>()) {
+          ExprResult RefRes = DefaultLvalueConversion(Ref);
+          if (!RefRes.isUsable())
+            continue;
+          ExprResult PostUpdateRes =
+              BuildBinOp(DSAStack->getCurScope(), ELoc, BO_Assign,
+                         SimpleRefExpr, RefRes.get());
+          if (!PostUpdateRes.isUsable())
+            continue;
+          ExprPostUpdates.push_back(
+              IgnoredValueConversions(PostUpdateRes.get()).get());
+        }
+      }
+    }
     if (LinKind == OMPC_LINEAR_uval)
-      InitExpr = VD->getInit();
+      InitExpr = VD ? VD->getInit() : SimpleRefExpr;
     else
-      InitExpr = DE;
+      InitExpr = VD ? SimpleRefExpr : Ref;
     AddInitializerToDecl(Init, DefaultLvalueConversion(InitExpr).get(),
-                         /*DirectInit*/ false, /*TypeMayContainAuto*/ false);
-    auto InitRef = buildDeclRefExpr(
-        *this, Init, DE->getType().getUnqualifiedType(), DE->getExprLoc());
-    DSAStack->addDSA(VD, DE, OMPC_linear);
-    Vars.push_back(DE);
+                         /*DirectInit=*/false, /*TypeMayContainAuto=*/false);
+    auto InitRef = buildDeclRefExpr(*this, Init, Type, ELoc);
+
+    DSAStack->addDSA(D, RefExpr->IgnoreParens(), OMPC_linear, Ref);
+    Vars.push_back((VD || CurContext->isDependentContext())
+                       ? RefExpr->IgnoreParens()
+                       : Ref);
     Privates.push_back(PrivateRef);
     Inits.push_back(InitRef);
   }
@@ -7783,12 +9693,14 @@ OMPClause *Sema::ActOnOpenMPLinearClause(
 
   return OMPLinearClause::Create(Context, StartLoc, LParenLoc, LinKind, LinLoc,
                                  ColonLoc, EndLoc, Vars, Privates, Inits,
-                                 StepExpr, CalcStepExpr);
+                                 StepExpr, CalcStepExpr,
+                                 buildPreInits(Context, ExprCaptures),
+                                 buildPostUpdate(*this, ExprPostUpdates));
 }
 
 static bool FinishOpenMPLinearClause(OMPLinearClause &Clause, DeclRefExpr *IV,
                                      Expr *NumIterations, Sema &SemaRef,
-                                     Scope *S) {
+                                     Scope *S, DSAStackTy *Stack) {
   // Walk the vars and build update/final expressions for the CodeGen.
   SmallVector<Expr *, 8> Updates;
   SmallVector<Expr *, 8> Finals;
@@ -7798,17 +9710,35 @@ static bool FinishOpenMPLinearClause(OMPLinearClause &Clause, DeclRefExpr *IV,
   // If linear-step is not specified it is assumed to be 1.
   if (Step == nullptr)
     Step = SemaRef.ActOnIntegerConstant(SourceLocation(), 1).get();
-  else if (CalcStep)
+  else if (CalcStep) {
     Step = cast<BinaryOperator>(CalcStep)->getLHS();
+  }
   bool HasErrors = false;
   auto CurInit = Clause.inits().begin();
   auto CurPrivate = Clause.privates().begin();
   auto LinKind = Clause.getModifier();
   for (auto &RefExpr : Clause.varlists()) {
+    SourceLocation ELoc;
+    SourceRange ERange;
+    Expr *SimpleRefExpr = RefExpr;
+    auto Res = getPrivateItem(SemaRef, SimpleRefExpr, ELoc, ERange,
+                              /*AllowArraySection=*/false);
+    ValueDecl *D = Res.first;
+    if (Res.second || !D) {
+      Updates.push_back(nullptr);
+      Finals.push_back(nullptr);
+      HasErrors = true;
+      continue;
+    }
+    if (auto *CED = dyn_cast<OMPCapturedExprDecl>(D)) {
+      D = cast<MemberExpr>(CED->getInit()->IgnoreParenImpCasts())
+              ->getMemberDecl();
+    }
+    auto &&Info = Stack->isLoopControlVariable(D);
     Expr *InitExpr = *CurInit;
 
     // Build privatized reference to the current linear var.
-    auto DE = cast<DeclRefExpr>(RefExpr);
+    auto DE = cast<DeclRefExpr>(SimpleRefExpr);
     Expr *CapturedRef;
     if (LinKind == OMPC_LINEAR_uval)
       CapturedRef = cast<VarDecl>(DE->getDecl())->getInit();
@@ -7819,18 +9749,27 @@ static bool FinishOpenMPLinearClause(OMPLinearClause &Clause, DeclRefExpr *IV,
                            /*RefersToCapture=*/true);
 
     // Build update: Var = InitExpr + IV * Step
-    ExprResult Update =
-        BuildCounterUpdate(SemaRef, S, RefExpr->getExprLoc(), *CurPrivate,
-                           InitExpr, IV, Step, /* Subtract */ false);
+    ExprResult Update;
+    if (!Info.first) {
+      Update =
+          BuildCounterUpdate(SemaRef, S, RefExpr->getExprLoc(), *CurPrivate,
+                             InitExpr, IV, Step, /* Subtract */ false);
+    } else
+      Update = *CurPrivate;
     Update = SemaRef.ActOnFinishFullExpr(Update.get(), DE->getLocStart(),
                                          /*DiscardedValue=*/true);
 
     // Build final: Var = InitExpr + NumIterations * Step
-    ExprResult Final =
-        BuildCounterUpdate(SemaRef, S, RefExpr->getExprLoc(), CapturedRef,
-                           InitExpr, NumIterations, Step, /* Subtract */ false);
+    ExprResult Final;
+    if (!Info.first) {
+      Final = BuildCounterUpdate(SemaRef, S, RefExpr->getExprLoc(), CapturedRef,
+                                 InitExpr, NumIterations, Step,
+                                 /* Subtract */ false);
+    } else
+      Final = *CurPrivate;
     Final = SemaRef.ActOnFinishFullExpr(Final.get(), DE->getLocStart(),
                                         /*DiscardedValue=*/true);
+
     if (!Update.isUsable() || !Final.isUsable()) {
       Updates.push_back(nullptr);
       Finals.push_back(nullptr);
@@ -7839,7 +9778,8 @@ static bool FinishOpenMPLinearClause(OMPLinearClause &Clause, DeclRefExpr *IV,
       Updates.push_back(Update.get());
       Finals.push_back(Final.get());
     }
-    ++CurInit, ++CurPrivate;
+    ++CurInit;
+    ++CurPrivate;
   }
   Clause.setUpdates(Updates);
   Clause.setFinals(Finals);
@@ -7852,52 +9792,55 @@ OMPClause *Sema::ActOnOpenMPAlignedClause(
 
   SmallVector<Expr *, 8> Vars;
   for (auto &RefExpr : VarList) {
-    assert(RefExpr && "NULL expr in OpenMP aligned clause.");
-    if (isa<DependentScopeDeclRefExpr>(RefExpr)) {
+    assert(RefExpr && "NULL expr in OpenMP linear clause.");
+    SourceLocation ELoc;
+    SourceRange ERange;
+    Expr *SimpleRefExpr = RefExpr;
+    auto Res = getPrivateItem(*this, SimpleRefExpr, ELoc, ERange,
+                              /*AllowArraySection=*/false);
+    if (Res.second) {
       // It will be analyzed later.
       Vars.push_back(RefExpr);
-      continue;
     }
-
-    SourceLocation ELoc = RefExpr->getExprLoc();
-    // OpenMP [2.1, C/C++]
-    //  A list item is a variable name.
-    DeclRefExpr *DE = dyn_cast<DeclRefExpr>(RefExpr);
-    if (!DE || !isa<VarDecl>(DE->getDecl())) {
-      Diag(ELoc, diag::err_omp_expected_var_name) << RefExpr->getSourceRange();
+    ValueDecl *D = Res.first;
+    if (!D)
       continue;
-    }
 
-    VarDecl *VD = cast<VarDecl>(DE->getDecl());
+    QualType QType = D->getType();
+    auto *VD = dyn_cast<VarDecl>(D);
 
     // OpenMP  [2.8.1, simd construct, Restrictions]
     // The type of list items appearing in the aligned clause must be
     // array, pointer, reference to array, or reference to pointer.
-    QualType QType = VD->getType();
     QType = QType.getNonReferenceType().getUnqualifiedType().getCanonicalType();
     const Type *Ty = QType.getTypePtrOrNull();
-    if (!Ty || (!Ty->isDependentType() && !Ty->isArrayType() &&
-                !Ty->isPointerType())) {
+    if (!Ty || (!Ty->isArrayType() && !Ty->isPointerType())) {
       Diag(ELoc, diag::err_omp_aligned_expected_array_or_ptr)
-          << QType << getLangOpts().CPlusPlus << RefExpr->getSourceRange();
+          << QType << getLangOpts().CPlusPlus << ERange;
       bool IsDecl =
+          !VD ||
           VD->isThisDeclarationADefinition(Context) == VarDecl::DeclarationOnly;
-      Diag(VD->getLocation(),
+      Diag(D->getLocation(),
            IsDecl ? diag::note_previous_decl : diag::note_defined_here)
-          << VD;
+          << D;
       continue;
     }
 
     // OpenMP  [2.8.1, simd construct, Restrictions]
     // A list-item cannot appear in more than one aligned clause.
-    if (DeclRefExpr *PrevRef = DSAStack->addUniqueAligned(VD, DE)) {
-      Diag(ELoc, diag::err_omp_aligned_twice) << RefExpr->getSourceRange();
+    if (Expr *PrevRef = DSAStack->addUniqueAligned(D, SimpleRefExpr)) {
+      Diag(ELoc, diag::err_omp_aligned_twice) << 0 << ERange;
       Diag(PrevRef->getExprLoc(), diag::note_omp_explicit_dsa)
           << getOpenMPClauseName(OMPC_aligned);
       continue;
     }
 
-    Vars.push_back(DE);
+    DeclRefExpr *Ref = nullptr;
+    if (!VD && IsOpenMPCapturedDecl(D))
+      Ref = buildCapture(*this, D, SimpleRefExpr, /*WithInit=*/true);
+    Vars.push_back(DefaultFunctionArrayConversion(
+                       (VD || !Ref) ? RefExpr->IgnoreParens() : Ref)
+                       .get());
   }
 
   // OpenMP [2.8.1, simd construct, Description]
@@ -7945,7 +9888,8 @@ OMPClause *Sema::ActOnOpenMPCopyinClause(ArrayRef<Expr *> VarList,
     //  A list item that appears in a copyin clause must be threadprivate.
     DeclRefExpr *DE = dyn_cast<DeclRefExpr>(RefExpr);
     if (!DE || !isa<VarDecl>(DE->getDecl())) {
-      Diag(ELoc, diag::err_omp_expected_var_name) << RefExpr->getSourceRange();
+      Diag(ELoc, diag::err_omp_expected_var_name_member_expr)
+          << 0 << RefExpr->getSourceRange();
       continue;
     }
 
@@ -8020,51 +9964,37 @@ OMPClause *Sema::ActOnOpenMPCopyprivateClause(ArrayRef<Expr *> VarList,
   SmallVector<Expr *, 8> DstExprs;
   SmallVector<Expr *, 8> AssignmentOps;
   for (auto &RefExpr : VarList) {
-    assert(RefExpr && "NULL expr in OpenMP copyprivate clause.");
-    if (isa<DependentScopeDeclRefExpr>(RefExpr)) {
+    assert(RefExpr && "NULL expr in OpenMP linear clause.");
+    SourceLocation ELoc;
+    SourceRange ERange;
+    Expr *SimpleRefExpr = RefExpr;
+    auto Res = getPrivateItem(*this, SimpleRefExpr, ELoc, ERange,
+                              /*AllowArraySection=*/false);
+    if (Res.second) {
       // It will be analyzed later.
       Vars.push_back(RefExpr);
       SrcExprs.push_back(nullptr);
       DstExprs.push_back(nullptr);
       AssignmentOps.push_back(nullptr);
-      continue;
     }
-
-    SourceLocation ELoc = RefExpr->getExprLoc();
-    // OpenMP [2.1, C/C++]
-    //  A list item is a variable name.
-    // OpenMP  [2.14.4.1, Restrictions, p.1]
-    //  A list item that appears in a copyin clause must be threadprivate.
-    DeclRefExpr *DE = dyn_cast<DeclRefExpr>(RefExpr);
-    if (!DE || !isa<VarDecl>(DE->getDecl())) {
-      Diag(ELoc, diag::err_omp_expected_var_name) << RefExpr->getSourceRange();
+    ValueDecl *D = Res.first;
+    if (!D)
       continue;
-    }
-
-    Decl *D = DE->getDecl();
-    VarDecl *VD = cast<VarDecl>(D);
 
-    QualType Type = VD->getType();
-    if (Type->isDependentType() || Type->isInstantiationDependentType()) {
-      // It will be analyzed later.
-      Vars.push_back(DE);
-      SrcExprs.push_back(nullptr);
-      DstExprs.push_back(nullptr);
-      AssignmentOps.push_back(nullptr);
-      continue;
-    }
+    QualType Type = D->getType();
+    auto *VD = dyn_cast<VarDecl>(D);
 
     // OpenMP [2.14.4.2, Restrictions, p.2]
     //  A list item that appears in a copyprivate clause may not appear in a
     //  private or firstprivate clause on the single construct.
-    if (!DSAStack->isThreadPrivate(VD)) {
-      auto DVar = DSAStack->getTopDSA(VD, false);
+    if (!VD || !DSAStack->isThreadPrivate(VD)) {
+      auto DVar = DSAStack->getTopDSA(D, false);
       if (DVar.CKind != OMPC_unknown && DVar.CKind != OMPC_copyprivate &&
           DVar.RefExpr) {
         Diag(ELoc, diag::err_omp_wrong_dsa)
             << getOpenMPClauseName(DVar.CKind)
             << getOpenMPClauseName(OMPC_copyprivate);
-        ReportOriginalDSA(*this, DSAStack, VD, DVar);
+        ReportOriginalDSA(*this, DSAStack, D, DVar);
         continue;
       }
 
@@ -8072,12 +10002,12 @@ OMPClause *Sema::ActOnOpenMPCopyprivateClause(ArrayRef<Expr *> VarList,
       //  All list items that appear in a copyprivate clause must be either
       //  threadprivate or private in the enclosing context.
       if (DVar.CKind == OMPC_unknown) {
-        DVar = DSAStack->getImplicitDSA(VD, false);
+        DVar = DSAStack->getImplicitDSA(D, false);
         if (DVar.CKind == OMPC_shared) {
           Diag(ELoc, diag::err_omp_required_access)
               << getOpenMPClauseName(OMPC_copyprivate)
               << "threadprivate or private in the enclosing context";
-          ReportOriginalDSA(*this, DSAStack, VD, DVar);
+          ReportOriginalDSA(*this, DSAStack, D, DVar);
           continue;
         }
       }
@@ -8089,10 +10019,11 @@ OMPClause *Sema::ActOnOpenMPCopyprivateClause(ArrayRef<Expr *> VarList,
           << getOpenMPClauseName(OMPC_copyprivate) << Type
           << getOpenMPDirectiveName(DSAStack->getCurrentDirective());
       bool IsDecl =
+          !VD ||
           VD->isThisDeclarationADefinition(Context) == VarDecl::DeclarationOnly;
-      Diag(VD->getLocation(),
+      Diag(D->getLocation(),
            IsDecl ? diag::note_previous_decl : diag::note_defined_here)
-          << VD;
+          << D;
       continue;
     }
 
@@ -8103,27 +10034,29 @@ OMPClause *Sema::ActOnOpenMPCopyprivateClause(ArrayRef<Expr *> VarList,
     Type = Context.getBaseElementType(Type.getNonReferenceType())
                .getUnqualifiedType();
     auto *SrcVD =
-        buildVarDecl(*this, DE->getLocStart(), Type, ".copyprivate.src",
-                     VD->hasAttrs() ? &VD->getAttrs() : nullptr);
-    auto *PseudoSrcExpr =
-        buildDeclRefExpr(*this, SrcVD, Type, DE->getExprLoc());
+        buildVarDecl(*this, RefExpr->getLocStart(), Type, ".copyprivate.src",
+                     D->hasAttrs() ? &D->getAttrs() : nullptr);
+    auto *PseudoSrcExpr = buildDeclRefExpr(*this, SrcVD, Type, ELoc);
     auto *DstVD =
-        buildVarDecl(*this, DE->getLocStart(), Type, ".copyprivate.dst",
-                     VD->hasAttrs() ? &VD->getAttrs() : nullptr);
+        buildVarDecl(*this, RefExpr->getLocStart(), Type, ".copyprivate.dst",
+                     D->hasAttrs() ? &D->getAttrs() : nullptr);
     auto *PseudoDstExpr =
-        buildDeclRefExpr(*this, DstVD, Type, DE->getExprLoc());
-    auto AssignmentOp = BuildBinOp(/*S=*/nullptr, DE->getExprLoc(), BO_Assign,
+        buildDeclRefExpr(*this, DstVD, Type, ELoc);
+    auto AssignmentOp = BuildBinOp(DSAStack->getCurScope(), ELoc, BO_Assign,
                                    PseudoDstExpr, PseudoSrcExpr);
     if (AssignmentOp.isInvalid())
       continue;
-    AssignmentOp = ActOnFinishFullExpr(AssignmentOp.get(), DE->getExprLoc(),
+    AssignmentOp = ActOnFinishFullExpr(AssignmentOp.get(), ELoc,
                                        /*DiscardedValue=*/true);
     if (AssignmentOp.isInvalid())
       continue;
 
     // No need to mark vars as copyprivate, they are already threadprivate or
     // implicitly private.
-    Vars.push_back(DE);
+    assert(VD || IsOpenMPCapturedDecl(D));
+    Vars.push_back(
+        VD ? RefExpr->IgnoreParens()
+           : buildCapture(*this, D, SimpleRefExpr, /*WithInit=*/false));
     SrcExprs.push_back(PseudoSrcExpr);
     DstExprs.push_back(PseudoDstExpr);
     AssignmentOps.push_back(AssignmentOp.get());
@@ -8168,6 +10101,7 @@ Sema::ActOnOpenMPDependClause(OpenMPDependClauseKind DepKind,
     return nullptr;
   }
   SmallVector<Expr *, 8> Vars;
+  DSAStackTy::OperatorOffsetTy OpsOffs;
   llvm::APSInt DepCounter(/*BitWidth=*/32);
   llvm::APSInt TotalDepCount(/*BitWidth=*/32);
   if (DepKind == OMPC_DEPEND_sink) {
@@ -8180,8 +10114,7 @@ Sema::ActOnOpenMPDependClause(OpenMPDependClauseKind DepKind,
       DSAStack->getParentOrderedRegionParam()) {
     for (auto &RefExpr : VarList) {
       assert(RefExpr && "NULL expr in OpenMP shared clause.");
-      if (isa<DependentScopeDeclRefExpr>(RefExpr) ||
-          (DepKind == OMPC_DEPEND_sink && CurContext->isDependentContext())) {
+      if (isa<DependentScopeDeclRefExpr>(RefExpr)) {
         // It will be analyzed later.
         Vars.push_back(RefExpr);
         continue;
@@ -8203,61 +10136,66 @@ Sema::ActOnOpenMPDependClause(OpenMPDependClauseKind DepKind,
         // directive, xi denotes the loop iteration variable of the i-th nested
         // loop associated with the loop directive, and di is a constant
         // non-negative integer.
+        if (CurContext->isDependentContext()) {
+          // It will be analyzed later.
+          Vars.push_back(RefExpr);
+          continue;
+        }
         SimpleExpr = SimpleExpr->IgnoreImplicit();
-        auto *DE = dyn_cast<DeclRefExpr>(SimpleExpr);
-        if (!DE) {
-          OverloadedOperatorKind OOK = OO_None;
-          SourceLocation OOLoc;
-          Expr *LHS, *RHS;
-          if (auto *BO = dyn_cast<BinaryOperator>(SimpleExpr)) {
-            OOK = BinaryOperator::getOverloadedOperator(BO->getOpcode());
-            OOLoc = BO->getOperatorLoc();
-            LHS = BO->getLHS()->IgnoreParenImpCasts();
-            RHS = BO->getRHS()->IgnoreParenImpCasts();
-          } else if (auto *OCE = dyn_cast<CXXOperatorCallExpr>(SimpleExpr)) {
-            OOK = OCE->getOperator();
-            OOLoc = OCE->getOperatorLoc();
-            LHS = OCE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
-            RHS = OCE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
-          } else if (auto *MCE = dyn_cast<CXXMemberCallExpr>(SimpleExpr)) {
-            OOK = MCE->getMethodDecl()
-                      ->getNameInfo()
-                      .getName()
-                      .getCXXOverloadedOperator();
-            OOLoc = MCE->getCallee()->getExprLoc();
-            LHS = MCE->getImplicitObjectArgument()->IgnoreParenImpCasts();
-            RHS = MCE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
-          } else {
-            Diag(ELoc, diag::err_omp_depend_sink_wrong_expr);
-            continue;
-          }
-          DE = dyn_cast<DeclRefExpr>(LHS);
-          if (!DE) {
-            Diag(LHS->getExprLoc(),
-                 diag::err_omp_depend_sink_expected_loop_iteration)
-                << DSAStack->getParentLoopControlVariable(
-                    DepCounter.getZExtValue());
-            continue;
-          }
-          if (OOK != OO_Plus && OOK != OO_Minus) {
-            Diag(OOLoc, diag::err_omp_depend_sink_expected_plus_minus);
-            continue;
-          }
-          ExprResult Res = VerifyPositiveIntegerConstantInClause(
+        OverloadedOperatorKind OOK = OO_None;
+        SourceLocation OOLoc;
+        Expr *LHS = SimpleExpr;
+        Expr *RHS = nullptr;
+        if (auto *BO = dyn_cast<BinaryOperator>(SimpleExpr)) {
+          OOK = BinaryOperator::getOverloadedOperator(BO->getOpcode());
+          OOLoc = BO->getOperatorLoc();
+          LHS = BO->getLHS()->IgnoreParenImpCasts();
+          RHS = BO->getRHS()->IgnoreParenImpCasts();
+        } else if (auto *OCE = dyn_cast<CXXOperatorCallExpr>(SimpleExpr)) {
+          OOK = OCE->getOperator();
+          OOLoc = OCE->getOperatorLoc();
+          LHS = OCE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
+          RHS = OCE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
+        } else if (auto *MCE = dyn_cast<CXXMemberCallExpr>(SimpleExpr)) {
+          OOK = MCE->getMethodDecl()
+                    ->getNameInfo()
+                    .getName()
+                    .getCXXOverloadedOperator();
+          OOLoc = MCE->getCallee()->getExprLoc();
+          LHS = MCE->getImplicitObjectArgument()->IgnoreParenImpCasts();
+          RHS = MCE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
+        }
+        SourceLocation ELoc;
+        SourceRange ERange;
+        auto Res = getPrivateItem(*this, LHS, ELoc, ERange,
+                                  /*AllowArraySection=*/false);
+        if (Res.second) {
+          // It will be analyzed later.
+          Vars.push_back(RefExpr);
+        }
+        ValueDecl *D = Res.first;
+        if (!D)
+          continue;
+
+        if (OOK != OO_Plus && OOK != OO_Minus && (RHS || OOK != OO_None)) {
+          Diag(OOLoc, diag::err_omp_depend_sink_expected_plus_minus);
+          continue;
+        }
+        if (RHS) {
+          ExprResult RHSRes = VerifyPositiveIntegerConstantInClause(
               RHS, OMPC_depend, /*StrictlyPositive=*/false);
-          if (Res.isInvalid())
+          if (RHSRes.isInvalid())
             continue;
         }
-        auto *VD = dyn_cast<VarDecl>(DE->getDecl());
         if (!CurContext->isDependentContext() &&
             DSAStack->getParentOrderedRegionParam() &&
-            (!VD || DepCounter != DSAStack->isParentLoopControlVariable(VD))) {
-          Diag(DE->getExprLoc(),
-               diag::err_omp_depend_sink_expected_loop_iteration)
+            DepCounter != DSAStack->isParentLoopControlVariable(D).first) {
+          Diag(ELoc, diag::err_omp_depend_sink_expected_loop_iteration)
               << DSAStack->getParentLoopControlVariable(
-                  DepCounter.getZExtValue());
+                     DepCounter.getZExtValue());
           continue;
         }
+        OpsOffs.push_back({RHS, OOK});
       } else {
         // OpenMP  [2.11.1.1, Restrictions, p.3]
         //  A variable that is part of another variable (such as a field of a
@@ -8268,14 +10206,17 @@ Sema::ActOnOpenMPDependClause(OpenMPDependClauseKind DepKind,
         auto *OASE = dyn_cast<OMPArraySectionExpr>(SimpleExpr);
         if (!RefExpr->IgnoreParenImpCasts()->isLValue() ||
             (!ASE && !DE && !OASE) || (DE && !isa<VarDecl>(DE->getDecl())) ||
-            (ASE && !ASE->getBase()->getType()->isAnyPointerType() &&
-             !ASE->getBase()->getType()->isArrayType())) {
-          Diag(ELoc, diag::err_omp_expected_var_name_or_array_item)
-              << RefExpr->getSourceRange();
+            (ASE &&
+             !ASE->getBase()
+                  ->getType()
+                  .getNonReferenceType()
+                  ->isPointerType() &&
+             !ASE->getBase()->getType().getNonReferenceType()->isArrayType())) {
+          Diag(ELoc, diag::err_omp_expected_var_name_member_expr_or_array_item)
+              << 0 << RefExpr->getSourceRange();
           continue;
         }
       }
-
       Vars.push_back(RefExpr->IgnoreParenImpCasts());
     }
 
@@ -8289,9 +10230,11 @@ Sema::ActOnOpenMPDependClause(OpenMPDependClauseKind DepKind,
         Vars.empty())
       return nullptr;
   }
-
-  return OMPDependClause::Create(Context, StartLoc, LParenLoc, EndLoc, DepKind,
-                                 DepLoc, ColonLoc, Vars);
+  auto *C = OMPDependClause::Create(Context, StartLoc, LParenLoc, EndLoc,
+                                    DepKind, DepLoc, ColonLoc, Vars);
+  if (DepKind == OMPC_DEPEND_sink || DepKind == OMPC_DEPEND_source)
+    DSAStack->addDoacrossDependClause(C, OpsOffs);
+  return C;
 }
 
 OMPClause *Sema::ActOnOpenMPDeviceClause(Expr *Device, SourceLocation StartLoc,
@@ -8366,125 +10309,969 @@ static bool CheckTypeMappable(SourceLocation SL, SourceRange SR, Sema &SemaRef,
   return true;
 }
 
-OMPClause *Sema::ActOnOpenMPMapClause(
-    OpenMPMapClauseKind MapTypeModifier, OpenMPMapClauseKind MapType,
-    SourceLocation MapLoc, SourceLocation ColonLoc, ArrayRef<Expr *> VarList,
-    SourceLocation StartLoc, SourceLocation LParenLoc, SourceLocation EndLoc) {
-  SmallVector<Expr *, 4> Vars;
+/// \brief Return true if it can be proven that the provided array expression
+/// (array section or array subscript) does NOT specify the whole size of the
+/// array whose base type is \a BaseQTy.
+static bool CheckArrayExpressionDoesNotReferToWholeSize(Sema &SemaRef,
+                                                        const Expr *E,
+                                                        QualType BaseQTy) {
+  auto *OASE = dyn_cast<OMPArraySectionExpr>(E);
+
+  // If this is an array subscript, it refers to the whole size if the size of
+  // the dimension is constant and equals 1. Also, an array section assumes the
+  // format of an array subscript if no colon is used.
+  if (isa<ArraySubscriptExpr>(E) || (OASE && OASE->getColonLoc().isInvalid())) {
+    if (auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))
+      return ATy->getSize().getSExtValue() != 1;
+    // Size can't be evaluated statically.
+    return false;
+  }
 
-  for (auto &RE : VarList) {
-    assert(RE && "Null expr in omp map");
-    if (isa<DependentScopeDeclRefExpr>(RE)) {
-      // It will be analyzed later.
-      Vars.push_back(RE);
+  assert(OASE && "Expecting array section if not an array subscript.");
+  auto *LowerBound = OASE->getLowerBound();
+  auto *Length = OASE->getLength();
+
+  // If there is a lower bound that does not evaluates to zero, we are not
+  // convering the whole dimension.
+  if (LowerBound) {
+    llvm::APSInt ConstLowerBound;
+    if (!LowerBound->EvaluateAsInt(ConstLowerBound, SemaRef.getASTContext()))
+      return false; // Can't get the integer value as a constant.
+    if (ConstLowerBound.getSExtValue())
+      return true;
+  }
+
+  // If we don't have a length we covering the whole dimension.
+  if (!Length)
+    return false;
+
+  // If the base is a pointer, we don't have a way to get the size of the
+  // pointee.
+  if (BaseQTy->isPointerType())
+    return false;
+
+  // We can only check if the length is the same as the size of the dimension
+  // if we have a constant array.
+  auto *CATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr());
+  if (!CATy)
+    return false;
+
+  llvm::APSInt ConstLength;
+  if (!Length->EvaluateAsInt(ConstLength, SemaRef.getASTContext()))
+    return false; // Can't get the integer value as a constant.
+
+  return CATy->getSize().getSExtValue() != ConstLength.getSExtValue();
+}
+
+// Return true if it can be proven that the provided array expression (array
+// section or array subscript) does NOT specify a single element of the array
+// whose base type is \a BaseQTy.
+static bool CheckArrayExpressionDoesNotReferToUnitySize(Sema &SemaRef,
+                                                       const Expr *E,
+                                                       QualType BaseQTy) {
+  auto *OASE = dyn_cast<OMPArraySectionExpr>(E);
+
+  // An array subscript always refer to a single element. Also, an array section
+  // assumes the format of an array subscript if no colon is used.
+  if (isa<ArraySubscriptExpr>(E) || (OASE && OASE->getColonLoc().isInvalid()))
+    return false;
+
+  assert(OASE && "Expecting array section if not an array subscript.");
+  auto *Length = OASE->getLength();
+
+  // If we don't have a length we have to check if the array has unitary size
+  // for this dimension. Also, we should always expect a length if the base type
+  // is pointer.
+  if (!Length) {
+    if (auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))
+      return ATy->getSize().getSExtValue() != 1;
+    // We cannot assume anything.
+    return false;
+  }
+
+  // Check if the length evaluates to 1.
+  llvm::APSInt ConstLength;
+  if (!Length->EvaluateAsInt(ConstLength, SemaRef.getASTContext()))
+    return false; // Can't get the integer value as a constant.
+
+  return ConstLength.getSExtValue() != 1;
+}
+
+// Return the expression of the base of the mappable expression or null if it
+// cannot be determined and do all the necessary checks to see if the expression
+// is valid as a standalone mappable expression. In the process, record all the
+// components of the expression.
+static Expr *CheckMapClauseExpressionBase(
+    Sema &SemaRef, Expr *E,
+    OMPClauseMappableExprCommon::MappableExprComponentList &CurComponents,
+    OpenMPClauseKind CKind) {
+  SourceLocation ELoc = E->getExprLoc();
+  SourceRange ERange = E->getSourceRange();
+
+  // The base of elements of list in a map clause have to be either:
+  //  - a reference to variable or field.
+  //  - a member expression.
+  //  - an array expression.
+  //
+  // E.g. if we have the expression 'r.S.Arr[:12]', we want to retrieve the
+  // reference to 'r'.
+  //
+  // If we have:
+  //
+  // struct SS {
+  //   Bla S;
+  //   foo() {
+  //     #pragma omp target map (S.Arr[:12]);
+  //   }
+  // }
+  //
+  // We want to retrieve the member expression 'this->S';
+
+  Expr *RelevantExpr = nullptr;
+
+  // OpenMP 4.5 [2.15.5.1, map Clause, Restrictions, p.2]
+  //  If a list item is an array section, it must specify contiguous storage.
+  //
+  // For this restriction it is sufficient that we make sure only references
+  // to variables or fields and array expressions, and that no array sections
+  // exist except in the rightmost expression (unless they cover the whole
+  // dimension of the array). E.g. these would be invalid:
+  //
+  //   r.ArrS[3:5].Arr[6:7]
+  //
+  //   r.ArrS[3:5].x
+  //
+  // but these would be valid:
+  //   r.ArrS[3].Arr[6:7]
+  //
+  //   r.ArrS[3].x
+
+  bool AllowUnitySizeArraySection = true;
+  bool AllowWholeSizeArraySection = true;
+
+  while (!RelevantExpr) {
+    E = E->IgnoreParenImpCasts();
+
+    if (auto *CurE = dyn_cast<DeclRefExpr>(E)) {
+      if (!isa<VarDecl>(CurE->getDecl()))
+        break;
+
+      RelevantExpr = CurE;
+
+      // If we got a reference to a declaration, we should not expect any array
+      // section before that.
+      AllowUnitySizeArraySection = false;
+      AllowWholeSizeArraySection = false;
+
+      // Record the component.
+      CurComponents.push_back(OMPClauseMappableExprCommon::MappableComponent(
+          CurE, CurE->getDecl()));
+      continue;
+    }
+
+    if (auto *CurE = dyn_cast<MemberExpr>(E)) {
+      auto *BaseE = CurE->getBase()->IgnoreParenImpCasts();
+
+      if (isa<CXXThisExpr>(BaseE))
+        // We found a base expression: this->Val.
+        RelevantExpr = CurE;
+      else
+        E = BaseE;
+
+      if (!isa<FieldDecl>(CurE->getMemberDecl())) {
+        SemaRef.Diag(ELoc, diag::err_omp_expected_access_to_data_field)
+            << CurE->getSourceRange();
+        break;
+      }
+
+      auto *FD = cast<FieldDecl>(CurE->getMemberDecl());
+
+      // OpenMP 4.5 [2.15.5.1, map Clause, Restrictions, C/C++, p.3]
+      //  A bit-field cannot appear in a map clause.
+      //
+      if (FD->isBitField()) {
+        SemaRef.Diag(ELoc, diag::err_omp_bit_fields_forbidden_in_clause)
+            << CurE->getSourceRange() << getOpenMPClauseName(CKind);
+        break;
+      }
+
+      // OpenMP 4.5 [2.15.5.1, map Clause, Restrictions, C++, p.1]
+      //  If the type of a list item is a reference to a type T then the type
+      //  will be considered to be T for all purposes of this clause.
+      QualType CurType = BaseE->getType().getNonReferenceType();
+
+      // OpenMP 4.5 [2.15.5.1, map Clause, Restrictions, C/C++, p.2]
+      //  A list item cannot be a variable that is a member of a structure with
+      //  a union type.
+      //
+      if (auto *RT = CurType->getAs<RecordType>())
+        if (RT->isUnionType()) {
+          SemaRef.Diag(ELoc, diag::err_omp_union_type_not_allowed)
+              << CurE->getSourceRange();
+          break;
+        }
+
+      // If we got a member expression, we should not expect any array section
+      // before that:
+      //
+      // OpenMP 4.5 [2.15.5.1, map Clause, Restrictions, p.7]
+      //  If a list item is an element of a structure, only the rightmost symbol
+      //  of the variable reference can be an array section.
+      //
+      AllowUnitySizeArraySection = false;
+      AllowWholeSizeArraySection = false;
+
+      // Record the component.
+      CurComponents.push_back(
+          OMPClauseMappableExprCommon::MappableComponent(CurE, FD));
+      continue;
+    }
+
+    if (auto *CurE = dyn_cast<ArraySubscriptExpr>(E)) {
+      E = CurE->getBase()->IgnoreParenImpCasts();
+
+      if (!E->getType()->isAnyPointerType() && !E->getType()->isArrayType()) {
+        SemaRef.Diag(ELoc, diag::err_omp_expected_base_var_name)
+            << 0 << CurE->getSourceRange();
+        break;
+      }
+
+      // If we got an array subscript that express the whole dimension we
+      // can have any array expressions before. If it only expressing part of
+      // the dimension, we can only have unitary-size array expressions.
+      if (CheckArrayExpressionDoesNotReferToWholeSize(SemaRef, CurE,
+                                                      E->getType()))
+        AllowWholeSizeArraySection = false;
+
+      // Record the component - we don't have any declaration associated.
+      CurComponents.push_back(
+          OMPClauseMappableExprCommon::MappableComponent(CurE, nullptr));
+      continue;
+    }
+
+    if (auto *CurE = dyn_cast<OMPArraySectionExpr>(E)) {
+      E = CurE->getBase()->IgnoreParenImpCasts();
+
+      auto CurType =
+          OMPArraySectionExpr::getBaseOriginalType(E).getCanonicalType();
+
+      // OpenMP 4.5 [2.15.5.1, map Clause, Restrictions, C++, p.1]
+      //  If the type of a list item is a reference to a type T then the type
+      //  will be considered to be T for all purposes of this clause.
+      if (CurType->isReferenceType())
+        CurType = CurType->getPointeeType();
+
+      bool IsPointer = CurType->isAnyPointerType();
+
+      if (!IsPointer && !CurType->isArrayType()) {
+        SemaRef.Diag(ELoc, diag::err_omp_expected_base_var_name)
+            << 0 << CurE->getSourceRange();
+        break;
+      }
+
+      bool NotWhole =
+          CheckArrayExpressionDoesNotReferToWholeSize(SemaRef, CurE, CurType);
+      bool NotUnity =
+          CheckArrayExpressionDoesNotReferToUnitySize(SemaRef, CurE, CurType);
+
+      if (AllowWholeSizeArraySection && AllowUnitySizeArraySection) {
+        // Any array section is currently allowed.
+        //
+        // If this array section refers to the whole dimension we can still
+        // accept other array sections before this one, except if the base is a
+        // pointer. Otherwise, only unitary sections are accepted.
+        if (NotWhole || IsPointer)
+          AllowWholeSizeArraySection = false;
+      } else if ((AllowUnitySizeArraySection && NotUnity) ||
+                 (AllowWholeSizeArraySection && NotWhole)) {
+        // A unity or whole array section is not allowed and that is not
+        // compatible with the properties of the current array section.
+        SemaRef.Diag(
+            ELoc, diag::err_array_section_does_not_specify_contiguous_storage)
+            << CurE->getSourceRange();
+        break;
+      }
+
+      // Record the component - we don't have any declaration associated.
+      CurComponents.push_back(
+          OMPClauseMappableExprCommon::MappableComponent(CurE, nullptr));
       continue;
     }
+
+    // If nothing else worked, this is not a valid map clause expression.
+    SemaRef.Diag(ELoc,
+                 diag::err_omp_expected_named_var_member_or_array_expression)
+        << ERange;
+    break;
+  }
+
+  return RelevantExpr;
+}
+
+// Return true if expression E associated with value VD has conflicts with other
+// map information.
+static bool CheckMapConflicts(
+    Sema &SemaRef, DSAStackTy *DSAS, ValueDecl *VD, Expr *E,
+    bool CurrentRegionOnly,
+    OMPClauseMappableExprCommon::MappableExprComponentListRef CurComponents,
+    OpenMPClauseKind CKind) {
+  assert(VD && E);
+  SourceLocation ELoc = E->getExprLoc();
+  SourceRange ERange = E->getSourceRange();
+
+  // In order to easily check the conflicts we need to match each component of
+  // the expression under test with the components of the expressions that are
+  // already in the stack.
+
+  assert(!CurComponents.empty() && "Map clause expression with no components!");
+  assert(CurComponents.back().getAssociatedDeclaration() == VD &&
+         "Map clause expression with unexpected base!");
+
+  // Variables to help detecting enclosing problems in data environment nests.
+  bool IsEnclosedByDataEnvironmentExpr = false;
+  const Expr *EnclosingExpr = nullptr;
+
+  bool FoundError = DSAS->checkMappableExprComponentListsForDecl(
+      VD, CurrentRegionOnly,
+      [&](OMPClauseMappableExprCommon::MappableExprComponentListRef
+              StackComponents) -> bool {
+
+        assert(!StackComponents.empty() &&
+               "Map clause expression with no components!");
+        assert(StackComponents.back().getAssociatedDeclaration() == VD &&
+               "Map clause expression with unexpected base!");
+
+        // The whole expression in the stack.
+        auto *RE = StackComponents.front().getAssociatedExpression();
+
+        // Expressions must start from the same base. Here we detect at which
+        // point both expressions diverge from each other and see if we can
+        // detect if the memory referred to both expressions is contiguous and
+        // do not overlap.
+        auto CI = CurComponents.rbegin();
+        auto CE = CurComponents.rend();
+        auto SI = StackComponents.rbegin();
+        auto SE = StackComponents.rend();
+        for (; CI != CE && SI != SE; ++CI, ++SI) {
+
+          // OpenMP 4.5 [2.15.5.1, map Clause, Restrictions, p.3]
+          //  At most one list item can be an array item derived from a given
+          //  variable in map clauses of the same construct.
+          if (CurrentRegionOnly &&
+              (isa<ArraySubscriptExpr>(CI->getAssociatedExpression()) ||
+               isa<OMPArraySectionExpr>(CI->getAssociatedExpression())) &&
+              (isa<ArraySubscriptExpr>(SI->getAssociatedExpression()) ||
+               isa<OMPArraySectionExpr>(SI->getAssociatedExpression()))) {
+            SemaRef.Diag(CI->getAssociatedExpression()->getExprLoc(),
+                         diag::err_omp_multiple_array_items_in_map_clause)
+                << CI->getAssociatedExpression()->getSourceRange();
+            SemaRef.Diag(SI->getAssociatedExpression()->getExprLoc(),
+                         diag::note_used_here)
+                << SI->getAssociatedExpression()->getSourceRange();
+            return true;
+          }
+
+          // Do both expressions have the same kind?
+          if (CI->getAssociatedExpression()->getStmtClass() !=
+              SI->getAssociatedExpression()->getStmtClass())
+            break;
+
+          // Are we dealing with different variables/fields?
+          if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration())
+            break;
+        }
+        // Check if the extra components of the expressions in the enclosing
+        // data environment are redundant for the current base declaration.
+        // If they are, the maps completely overlap, which is legal.
+        for (; SI != SE; ++SI) {
+          QualType Type;
+          if (auto *ASE =
+              dyn_cast<ArraySubscriptExpr>(SI->getAssociatedExpression())) {
+            Type = ASE->getBase()->IgnoreParenImpCasts()->getType();
+          } else if (auto *OASE =
+              dyn_cast<OMPArraySectionExpr>(SI->getAssociatedExpression())) {
+            auto *E = OASE->getBase()->IgnoreParenImpCasts();
+            Type =
+                OMPArraySectionExpr::getBaseOriginalType(E).getCanonicalType();
+          }
+          if (Type.isNull() || Type->isAnyPointerType() ||
+              CheckArrayExpressionDoesNotReferToWholeSize(
+                  SemaRef, SI->getAssociatedExpression(), Type))
+            break;
+        }
+
+        // OpenMP 4.5 [2.15.5.1, map Clause, Restrictions, p.4]
+        //  List items of map clauses in the same construct must not share
+        //  original storage.
+        //
+        // If the expressions are exactly the same or one is a subset of the
+        // other, it means they are sharing storage.
+        if (CI == CE && SI == SE) {
+          if (CurrentRegionOnly) {
+            if (CKind == OMPC_map)
+              SemaRef.Diag(ELoc, diag::err_omp_map_shared_storage) << ERange;
+            else {
+              assert(CKind == OMPC_to || CKind == OMPC_from);
+              SemaRef.Diag(ELoc, diag::err_omp_once_referenced_in_target_update)
+                  << ERange;
+            }
+            SemaRef.Diag(RE->getExprLoc(), diag::note_used_here)
+                << RE->getSourceRange();
+            return true;
+          } else {
+            // If we find the same expression in the enclosing data environment,
+            // that is legal.
+            IsEnclosedByDataEnvironmentExpr = true;
+            return false;
+          }
+        }
+
+        QualType DerivedType =
+            std::prev(CI)->getAssociatedDeclaration()->getType();
+        SourceLocation DerivedLoc =
+            std::prev(CI)->getAssociatedExpression()->getExprLoc();
+
+        // OpenMP 4.5 [2.15.5.1, map Clause, Restrictions, C++, p.1]
+        //  If the type of a list item is a reference to a type T then the type
+        //  will be considered to be T for all purposes of this clause.
+        DerivedType = DerivedType.getNonReferenceType();
+
+        // OpenMP 4.5 [2.15.5.1, map Clause, Restrictions, C/C++, p.1]
+        //  A variable for which the type is pointer and an array section
+        //  derived from that variable must not appear as list items of map
+        //  clauses of the same construct.
+        //
+        // Also, cover one of the cases in:
+        // OpenMP 4.5 [2.15.5.1, map Clause, Restrictions, p.5]
+        //  If any part of the original storage of a list item has corresponding
+        //  storage in the device data environment, all of the original storage
+        //  must have corresponding storage in the device data environment.
+        //
+        if (DerivedType->isAnyPointerType()) {
+          if (CI == CE || SI == SE) {
+            SemaRef.Diag(
+                DerivedLoc,
+                diag::err_omp_pointer_mapped_along_with_derived_section)
+                << DerivedLoc;
+          } else {
+            assert(CI != CE && SI != SE);
+            SemaRef.Diag(DerivedLoc, diag::err_omp_same_pointer_derreferenced)
+                << DerivedLoc;
+          }
+          SemaRef.Diag(RE->getExprLoc(), diag::note_used_here)
+              << RE->getSourceRange();
+          return true;
+        }
+
+        // OpenMP 4.5 [2.15.5.1, map Clause, Restrictions, p.4]
+        //  List items of map clauses in the same construct must not share
+        //  original storage.
+        //
+        // An expression is a subset of the other.
+        if (CurrentRegionOnly && (CI == CE || SI == SE)) {
+          if (CKind == OMPC_map)
+            SemaRef.Diag(ELoc, diag::err_omp_map_shared_storage) << ERange;
+          else {
+            assert(CKind == OMPC_to || CKind == OMPC_from);
+            SemaRef.Diag(ELoc, diag::err_omp_once_referenced_in_target_update)
+                << ERange;
+          }
+          SemaRef.Diag(RE->getExprLoc(), diag::note_used_here)
+              << RE->getSourceRange();
+          return true;
+        }
+
+        // The current expression uses the same base as other expression in the
+        // data environment but does not contain it completely.
+        if (!CurrentRegionOnly && SI != SE)
+          EnclosingExpr = RE;
+
+        // The current expression is a subset of the expression in the data
+        // environment.
+        IsEnclosedByDataEnvironmentExpr |=
+            (!CurrentRegionOnly && CI != CE && SI == SE);
+
+        return false;
+      });
+
+  if (CurrentRegionOnly)
+    return FoundError;
+
+  // OpenMP 4.5 [2.15.5.1, map Clause, Restrictions, p.5]
+  //  If any part of the original storage of a list item has corresponding
+  //  storage in the device data environment, all of the original storage must
+  //  have corresponding storage in the device data environment.
+  // OpenMP 4.5 [2.15.5.1, map Clause, Restrictions, p.6]
+  //  If a list item is an element of a structure, and a different element of
+  //  the structure has a corresponding list item in the device data environment
+  //  prior to a task encountering the construct associated with the map clause,
+  //  then the list item must also have a corresponding list item in the device
+  //  data environment prior to the task encountering the construct.
+  //
+  if (EnclosingExpr && !IsEnclosedByDataEnvironmentExpr) {
+    SemaRef.Diag(ELoc,
+                 diag::err_omp_original_storage_is_shared_and_does_not_contain)
+        << ERange;
+    SemaRef.Diag(EnclosingExpr->getExprLoc(), diag::note_used_here)
+        << EnclosingExpr->getSourceRange();
+    return true;
+  }
+
+  return FoundError;
+}
+
+namespace {
+// Utility struct that gathers all the related lists associated with a mappable
+// expression.
+struct MappableVarListInfo final {
+  // The list of expressions.
+  ArrayRef<Expr *> VarList;
+  // The list of processed expressions.
+  SmallVector<Expr *, 16> ProcessedVarList;
+  // The mappble components for each expression.
+  OMPClauseMappableExprCommon::MappableExprComponentLists VarComponents;
+  // The base declaration of the variable.
+  SmallVector<ValueDecl *, 16> VarBaseDeclarations;
+
+  MappableVarListInfo(ArrayRef<Expr *> VarList) : VarList(VarList) {
+    // We have a list of components and base declarations for each entry in the
+    // variable list.
+    VarComponents.reserve(VarList.size());
+    VarBaseDeclarations.reserve(VarList.size());
+  }
+};
+}
+
+// Check the validity of the provided variable list for the provided clause kind
+// \a CKind. In the check process the valid expressions, and mappable expression
+// components and variables are extracted and used to fill \a Vars,
+// \a ClauseComponents, and \a ClauseBaseDeclarations. \a MapType and
+// \a IsMapTypeImplicit are expected to be valid if the clause kind is 'map'.
+static void
+checkMappableExpressionList(Sema &SemaRef, DSAStackTy *DSAS,
+                            OpenMPClauseKind CKind, MappableVarListInfo &MVLI,
+                            SourceLocation StartLoc,
+                            OpenMPMapClauseKind MapType = OMPC_MAP_unknown,
+                            bool IsMapTypeImplicit = false) {
+  // We only expect mappable expressions in 'to', 'from', and 'map' clauses.
+  assert((CKind == OMPC_map || CKind == OMPC_to || CKind == OMPC_from) &&
+         "Unexpected clause kind with mappable expressions!");
+
+  // Keep track of the mappable components and base declarations in this clause.
+  // Each entry in the list is going to have a list of components associated. We
+  // record each set of the components so that we can build the clause later on.
+  // In the end we should have the same amount of declarations and component
+  // lists.
+
+  for (auto &RE : MVLI.VarList) {
+    assert(RE && "Null expr in omp to/from/map clause");
     SourceLocation ELoc = RE->getExprLoc();
 
-    // OpenMP [2.14.5, Restrictions]
-    //  A variable that is part of another variable (such as field of a
-    //  structure) but is not an array element or an array section cannot appear
-    //  in a map clause.
     auto *VE = RE->IgnoreParenLValueCasts();
 
     if (VE->isValueDependent() || VE->isTypeDependent() ||
         VE->isInstantiationDependent() ||
         VE->containsUnexpandedParameterPack()) {
-      // It will be analyzed later.
-      Vars.push_back(RE);
+      // We can only analyze this information once the missing information is
+      // resolved.
+      MVLI.ProcessedVarList.push_back(RE);
       continue;
     }
 
     auto *SimpleExpr = RE->IgnoreParenCasts();
-    auto *DE = dyn_cast<DeclRefExpr>(SimpleExpr);
-    auto *ASE = dyn_cast<ArraySubscriptExpr>(SimpleExpr);
-    auto *OASE = dyn_cast<OMPArraySectionExpr>(SimpleExpr);
-
-    if (!RE->IgnoreParenImpCasts()->isLValue() ||
-        (!OASE && !ASE && !DE) ||
-        (DE && !isa<VarDecl>(DE->getDecl())) ||
-        (ASE && !ASE->getBase()->getType()->isAnyPointerType() &&
-         !ASE->getBase()->getType()->isArrayType())) {
-      Diag(ELoc, diag::err_omp_expected_var_name_or_array_item)
-        << RE->getSourceRange();
+
+    if (!RE->IgnoreParenImpCasts()->isLValue()) {
+      SemaRef.Diag(ELoc,
+                   diag::err_omp_expected_named_var_member_or_array_expression)
+          << RE->getSourceRange();
       continue;
     }
 
-    Decl *D = nullptr;
-    if (DE) {
-      D = DE->getDecl();
-    } else if (ASE) {
-      auto *B = ASE->getBase()->IgnoreParenCasts();
-      D = dyn_cast<DeclRefExpr>(B)->getDecl();
-    } else if (OASE) {
-      auto *B = OASE->getBase();
-      D = dyn_cast<DeclRefExpr>(B)->getDecl();
-    }
-    assert(D && "Null decl on map clause.");
-    auto *VD = cast<VarDecl>(D);
+    OMPClauseMappableExprCommon::MappableExprComponentList CurComponents;
+    ValueDecl *CurDeclaration = nullptr;
 
-    // OpenMP [2.14.5, Restrictions, p.8]
+    // Obtain the array or member expression bases if required. Also, fill the
+    // components array with all the components identified in the process.
+    auto *BE =
+        CheckMapClauseExpressionBase(SemaRef, SimpleExpr, CurComponents, CKind);
+    if (!BE)
+      continue;
+
+    assert(!CurComponents.empty() &&
+           "Invalid mappable expression information.");
+
+    // For the following checks, we rely on the base declaration which is
+    // expected to be associated with the last component. The declaration is
+    // expected to be a variable or a field (if 'this' is being mapped).
+    CurDeclaration = CurComponents.back().getAssociatedDeclaration();
+    assert(CurDeclaration && "Null decl on map clause.");
+    assert(
+        CurDeclaration->isCanonicalDecl() &&
+        "Expecting components to have associated only canonical declarations.");
+
+    auto *VD = dyn_cast<VarDecl>(CurDeclaration);
+    auto *FD = dyn_cast<FieldDecl>(CurDeclaration);
+
+    assert((VD || FD) && "Only variables or fields are expected here!");
+    (void)FD;
+
+    // OpenMP 4.5 [2.15.5.1, map Clause, Restrictions, p.10]
     // threadprivate variables cannot appear in a map clause.
-    if (DSAStack->isThreadPrivate(VD)) {
-      auto DVar = DSAStack->getTopDSA(VD, false);
-      Diag(ELoc, diag::err_omp_threadprivate_in_map);
-      ReportOriginalDSA(*this, DSAStack, VD, DVar);
+    // OpenMP 4.5 [2.10.5, target update Construct]
+    // threadprivate variables cannot appear in a from clause.
+    if (VD && DSAS->isThreadPrivate(VD)) {
+      auto DVar = DSAS->getTopDSA(VD, false);
+      SemaRef.Diag(ELoc, diag::err_omp_threadprivate_in_clause)
+          << getOpenMPClauseName(CKind);
+      ReportOriginalDSA(SemaRef, DSAS, VD, DVar);
       continue;
     }
 
-    // OpenMP [2.14.5, Restrictions, p.2]
-    //  At most one list item can be an array item derived from a given variable
-    //  in map clauses of the same construct.
-    // OpenMP [2.14.5, Restrictions, p.3]
-    //  List items of map clauses in the same construct must not share original
-    //  storage.
-    // OpenMP [2.14.5, Restrictions, C/C++, p.2]
-    //  A variable for which the type is pointer, reference to array, or
-    //  reference to pointer and an array section derived from that variable
-    //  must not appear as list items of map clauses of the same construct.
-    DSAStackTy::MapInfo MI = DSAStack->IsMappedInCurrentRegion(VD);
-    if (MI.RefExpr) {
-      Diag(ELoc, diag::err_omp_map_shared_storage) << ELoc;
-      Diag(MI.RefExpr->getExprLoc(), diag::note_used_here)
-          << MI.RefExpr->getSourceRange();
+    // OpenMP 4.5 [2.15.5.1, map Clause, Restrictions, p.9]
+    //  A list item cannot appear in both a map clause and a data-sharing
+    //  attribute clause on the same construct.
+
+    // Check conflicts with other map clause expressions. We check the conflicts
+    // with the current construct separately from the enclosing data
+    // environment, because the restrictions are different. We only have to
+    // check conflicts across regions for the map clauses.
+    if (CheckMapConflicts(SemaRef, DSAS, CurDeclaration, SimpleExpr,
+                          /*CurrentRegionOnly=*/true, CurComponents, CKind))
+      break;
+    if (CKind == OMPC_map &&
+        CheckMapConflicts(SemaRef, DSAS, CurDeclaration, SimpleExpr,
+                          /*CurrentRegionOnly=*/false, CurComponents, CKind))
+      break;
+
+    // OpenMP 4.5 [2.10.5, target update Construct]
+    // OpenMP 4.5 [2.15.5.1, map Clause, Restrictions, C++, p.1]
+    //  If the type of a list item is a reference to a type T then the type will
+    //  be considered to be T for all purposes of this clause.
+    QualType Type = CurDeclaration->getType().getNonReferenceType();
+
+    // OpenMP 4.5 [2.10.5, target update Construct, Restrictions, p.4]
+    // A list item in a to or from clause must have a mappable type.
+    // OpenMP 4.5 [2.15.5.1, map Clause, Restrictions, p.9]
+    //  A list item must have a mappable type.
+    if (!CheckTypeMappable(VE->getExprLoc(), VE->getSourceRange(), SemaRef,
+                           DSAS, Type))
       continue;
+
+    if (CKind == OMPC_map) {
+      // target enter data
+      // OpenMP [2.10.2, Restrictions, p. 99]
+      // A map-type must be specified in all map clauses and must be either
+      // to or alloc.
+      OpenMPDirectiveKind DKind = DSAS->getCurrentDirective();
+      if (DKind == OMPD_target_enter_data &&
+          !(MapType == OMPC_MAP_to || MapType == OMPC_MAP_alloc)) {
+        SemaRef.Diag(StartLoc, diag::err_omp_invalid_map_type_for_directive)
+            << (IsMapTypeImplicit ? 1 : 0)
+            << getOpenMPSimpleClauseTypeName(OMPC_map, MapType)
+            << getOpenMPDirectiveName(DKind);
+        continue;
+      }
+
+      // target exit_data
+      // OpenMP [2.10.3, Restrictions, p. 102]
+      // A map-type must be specified in all map clauses and must be either
+      // from, release, or delete.
+      if (DKind == OMPD_target_exit_data &&
+          !(MapType == OMPC_MAP_from || MapType == OMPC_MAP_release ||
+            MapType == OMPC_MAP_delete)) {
+        SemaRef.Diag(StartLoc, diag::err_omp_invalid_map_type_for_directive)
+            << (IsMapTypeImplicit ? 1 : 0)
+            << getOpenMPSimpleClauseTypeName(OMPC_map, MapType)
+            << getOpenMPDirectiveName(DKind);
+        continue;
+      }
+
+      // OpenMP 4.5 [2.15.5.1, Restrictions, p.3]
+      // A list item cannot appear in both a map clause and a data-sharing
+      // attribute clause on the same construct
+      if (DKind == OMPD_target && VD) {
+        auto DVar = DSAS->getTopDSA(VD, false);
+        if (isOpenMPPrivate(DVar.CKind)) {
+          SemaRef.Diag(ELoc, diag::err_omp_variable_in_map_and_dsa)
+              << getOpenMPClauseName(DVar.CKind)
+              << getOpenMPDirectiveName(DSAS->getCurrentDirective());
+          ReportOriginalDSA(SemaRef, DSAS, CurDeclaration, DVar);
+          continue;
+        }
+      }
     }
 
-    // OpenMP [2.14.5, Restrictions, C/C++, p.3,4]
-    //  A variable for which the type is pointer, reference to array, or
-    //  reference to pointer must not appear as a list item if the enclosing
-    //  device data environment already contains an array section derived from
-    //  that variable.
-    //  An array section derived from a variable for which the type is pointer,
-    //  reference to array, or reference to pointer must not appear as a list
-    //  item if the enclosing device data environment already contains that
-    //  variable.
-    QualType Type = VD->getType();
-    MI = DSAStack->getMapInfoForVar(VD);
-    if (MI.RefExpr && (isa<DeclRefExpr>(MI.RefExpr->IgnoreParenLValueCasts()) !=
-                       isa<DeclRefExpr>(VE)) &&
-        (Type->isPointerType() || Type->isReferenceType())) {
-      Diag(ELoc, diag::err_omp_map_shared_storage) << ELoc;
-      Diag(MI.RefExpr->getExprLoc(), diag::note_used_here)
-          << MI.RefExpr->getSourceRange();
-      continue;
+    // Save the current expression.
+    MVLI.ProcessedVarList.push_back(RE);
+
+    // Store the components in the stack so that they can be used to check
+    // against other clauses later on.
+    DSAS->addMappableExpressionComponents(CurDeclaration, CurComponents);
+
+    // Save the components and declaration to create the clause. For purposes of
+    // the clause creation, any component list that has has base 'this' uses
+    // null as base declaration.
+    MVLI.VarComponents.resize(MVLI.VarComponents.size() + 1);
+    MVLI.VarComponents.back().append(CurComponents.begin(),
+                                     CurComponents.end());
+    MVLI.VarBaseDeclarations.push_back(isa<MemberExpr>(BE) ? nullptr
+                                                           : CurDeclaration);
+  }
+}
+
+OMPClause *
+Sema::ActOnOpenMPMapClause(OpenMPMapClauseKind MapTypeModifier,
+                           OpenMPMapClauseKind MapType, bool IsMapTypeImplicit,
+                           SourceLocation MapLoc, SourceLocation ColonLoc,
+                           ArrayRef<Expr *> VarList, SourceLocation StartLoc,
+                           SourceLocation LParenLoc, SourceLocation EndLoc) {
+  MappableVarListInfo MVLI(VarList);
+  checkMappableExpressionList(*this, DSAStack, OMPC_map, MVLI, StartLoc,
+                              MapType, IsMapTypeImplicit);
+
+  // We need to produce a map clause even if we don't have variables so that
+  // other diagnostics related with non-existing map clauses are accurate.
+  return OMPMapClause::Create(Context, StartLoc, LParenLoc, EndLoc,
+                              MVLI.ProcessedVarList, MVLI.VarBaseDeclarations,
+                              MVLI.VarComponents, MapTypeModifier, MapType,
+                              IsMapTypeImplicit, MapLoc);
+}
+
+QualType Sema::ActOnOpenMPDeclareReductionType(SourceLocation TyLoc,
+                                               TypeResult ParsedType) {
+  assert(ParsedType.isUsable());
+
+  QualType ReductionType = GetTypeFromParser(ParsedType.get());
+  if (ReductionType.isNull())
+    return QualType();
+
+  // [OpenMP 4.0], 2.15 declare reduction Directive, Restrictions, C\C++
+  // A type name in a declare reduction directive cannot be a function type, an
+  // array type, a reference type, or a type qualified with const, volatile or
+  // restrict.
+  if (ReductionType.hasQualifiers()) {
+    Diag(TyLoc, diag::err_omp_reduction_wrong_type) << 0;
+    return QualType();
+  }
+
+  if (ReductionType->isFunctionType()) {
+    Diag(TyLoc, diag::err_omp_reduction_wrong_type) << 1;
+    return QualType();
+  }
+  if (ReductionType->isReferenceType()) {
+    Diag(TyLoc, diag::err_omp_reduction_wrong_type) << 2;
+    return QualType();
+  }
+  if (ReductionType->isArrayType()) {
+    Diag(TyLoc, diag::err_omp_reduction_wrong_type) << 3;
+    return QualType();
+  }
+  return ReductionType;
+}
+
+Sema::DeclGroupPtrTy Sema::ActOnOpenMPDeclareReductionDirectiveStart(
+    Scope *S, DeclContext *DC, DeclarationName Name,
+    ArrayRef<std::pair<QualType, SourceLocation>> ReductionTypes,
+    AccessSpecifier AS, Decl *PrevDeclInScope) {
+  SmallVector<Decl *, 8> Decls;
+  Decls.reserve(ReductionTypes.size());
+
+  LookupResult Lookup(*this, Name, SourceLocation(), LookupOMPReductionName,
+                      ForRedeclaration);
+  // [OpenMP 4.0], 2.15 declare reduction Directive, Restrictions
+  // A reduction-identifier may not be re-declared in the current scope for the
+  // same type or for a type that is compatible according to the base language
+  // rules.
+  llvm::DenseMap<QualType, SourceLocation> PreviousRedeclTypes;
+  OMPDeclareReductionDecl *PrevDRD = nullptr;
+  bool InCompoundScope = true;
+  if (S != nullptr) {
+    // Find previous declaration with the same name not referenced in other
+    // declarations.
+    FunctionScopeInfo *ParentFn = getEnclosingFunction();
+    InCompoundScope =
+        (ParentFn != nullptr) && !ParentFn->CompoundScopes.empty();
+    LookupName(Lookup, S);
+    FilterLookupForScope(Lookup, DC, S, /*ConsiderLinkage=*/false,
+                         /*AllowInlineNamespace=*/false);
+    llvm::DenseMap<OMPDeclareReductionDecl *, bool> UsedAsPrevious;
+    auto Filter = Lookup.makeFilter();
+    while (Filter.hasNext()) {
+      auto *PrevDecl = cast<OMPDeclareReductionDecl>(Filter.next());
+      if (InCompoundScope) {
+        auto I = UsedAsPrevious.find(PrevDecl);
+        if (I == UsedAsPrevious.end())
+          UsedAsPrevious[PrevDecl] = false;
+        if (auto *D = PrevDecl->getPrevDeclInScope())
+          UsedAsPrevious[D] = true;
+      }
+      PreviousRedeclTypes[PrevDecl->getType().getCanonicalType()] =
+          PrevDecl->getLocation();
+    }
+    Filter.done();
+    if (InCompoundScope) {
+      for (auto &PrevData : UsedAsPrevious) {
+        if (!PrevData.second) {
+          PrevDRD = PrevData.first;
+          break;
+        }
+      }
     }
+  } else if (PrevDeclInScope != nullptr) {
+    auto *PrevDRDInScope = PrevDRD =
+        cast<OMPDeclareReductionDecl>(PrevDeclInScope);
+    do {
+      PreviousRedeclTypes[PrevDRDInScope->getType().getCanonicalType()] =
+          PrevDRDInScope->getLocation();
+      PrevDRDInScope = PrevDRDInScope->getPrevDeclInScope();
+    } while (PrevDRDInScope != nullptr);
+  }
+  for (auto &TyData : ReductionTypes) {
+    auto I = PreviousRedeclTypes.find(TyData.first.getCanonicalType());
+    bool Invalid = false;
+    if (I != PreviousRedeclTypes.end()) {
+      Diag(TyData.second, diag::err_omp_declare_reduction_redefinition)
+          << TyData.first;
+      Diag(I->second, diag::note_previous_definition);
+      Invalid = true;
+    }
+    PreviousRedeclTypes[TyData.first.getCanonicalType()] = TyData.second;
+    auto *DRD = OMPDeclareReductionDecl::Create(Context, DC, TyData.second,
+                                                Name, TyData.first, PrevDRD);
+    DC->addDecl(DRD);
+    DRD->setAccess(AS);
+    Decls.push_back(DRD);
+    if (Invalid)
+      DRD->setInvalidDecl();
+    else
+      PrevDRD = DRD;
+  }
 
-    // OpenMP [2.14.5, Restrictions, C/C++, p.7]
-    //  A list item must have a mappable type.
-    if (!CheckTypeMappable(VE->getExprLoc(), VE->getSourceRange(), *this,
-                           DSAStack, Type))
-      continue;
+  return DeclGroupPtrTy::make(
+      DeclGroupRef::Create(Context, Decls.begin(), Decls.size()));
+}
+
+void Sema::ActOnOpenMPDeclareReductionCombinerStart(Scope *S, Decl *D) {
+  auto *DRD = cast<OMPDeclareReductionDecl>(D);
+
+  // Enter new function scope.
+  PushFunctionScope();
+  getCurFunction()->setHasBranchProtectedScope();
+  getCurFunction()->setHasOMPDeclareReductionCombiner();
+
+  if (S != nullptr)
+    PushDeclContext(S, DRD);
+  else
+    CurContext = DRD;
+
+  PushExpressionEvaluationContext(PotentiallyEvaluated);
 
-    Vars.push_back(RE);
-    MI.RefExpr = RE;
-    DSAStack->addMapInfoForVar(VD, MI);
+  QualType ReductionType = DRD->getType();
+  // Create 'T* omp_parm;T omp_in;'. All references to 'omp_in' will
+  // be replaced by '*omp_parm' during codegen. This required because 'omp_in'
+  // uses semantics of argument handles by value, but it should be passed by
+  // reference. C lang does not support references, so pass all parameters as
+  // pointers.
+  // Create 'T omp_in;' variable.
+  auto *OmpInParm =
+      buildVarDecl(*this, D->getLocation(), ReductionType, "omp_in");
+  // Create 'T* omp_parm;T omp_out;'. All references to 'omp_out' will
+  // be replaced by '*omp_parm' during codegen. This required because 'omp_out'
+  // uses semantics of argument handles by value, but it should be passed by
+  // reference. C lang does not support references, so pass all parameters as
+  // pointers.
+  // Create 'T omp_out;' variable.
+  auto *OmpOutParm =
+      buildVarDecl(*this, D->getLocation(), ReductionType, "omp_out");
+  if (S != nullptr) {
+    PushOnScopeChains(OmpInParm, S);
+    PushOnScopeChains(OmpOutParm, S);
+  } else {
+    DRD->addDecl(OmpInParm);
+    DRD->addDecl(OmpOutParm);
   }
-  if (Vars.empty())
-    return nullptr;
+}
+
+void Sema::ActOnOpenMPDeclareReductionCombinerEnd(Decl *D, Expr *Combiner) {
+  auto *DRD = cast<OMPDeclareReductionDecl>(D);
+  DiscardCleanupsInEvaluationContext();
+  PopExpressionEvaluationContext();
+
+  PopDeclContext();
+  PopFunctionScopeInfo();
+
+  if (Combiner != nullptr)
+    DRD->setCombiner(Combiner);
+  else
+    DRD->setInvalidDecl();
+}
+
+void Sema::ActOnOpenMPDeclareReductionInitializerStart(Scope *S, Decl *D) {
+  auto *DRD = cast<OMPDeclareReductionDecl>(D);
+
+  // Enter new function scope.
+  PushFunctionScope();
+  getCurFunction()->setHasBranchProtectedScope();
+
+  if (S != nullptr)
+    PushDeclContext(S, DRD);
+  else
+    CurContext = DRD;
+
+  PushExpressionEvaluationContext(PotentiallyEvaluated);
+
+  QualType ReductionType = DRD->getType();
+  // Create 'T* omp_parm;T omp_priv;'. All references to 'omp_priv' will
+  // be replaced by '*omp_parm' during codegen. This required because 'omp_priv'
+  // uses semantics of argument handles by value, but it should be passed by
+  // reference. C lang does not support references, so pass all parameters as
+  // pointers.
+  // Create 'T omp_priv;' variable.
+  auto *OmpPrivParm =
+      buildVarDecl(*this, D->getLocation(), ReductionType, "omp_priv");
+  // Create 'T* omp_parm;T omp_orig;'. All references to 'omp_orig' will
+  // be replaced by '*omp_parm' during codegen. This required because 'omp_orig'
+  // uses semantics of argument handles by value, but it should be passed by
+  // reference. C lang does not support references, so pass all parameters as
+  // pointers.
+  // Create 'T omp_orig;' variable.
+  auto *OmpOrigParm =
+      buildVarDecl(*this, D->getLocation(), ReductionType, "omp_orig");
+  if (S != nullptr) {
+    PushOnScopeChains(OmpPrivParm, S);
+    PushOnScopeChains(OmpOrigParm, S);
+  } else {
+    DRD->addDecl(OmpPrivParm);
+    DRD->addDecl(OmpOrigParm);
+  }
+}
+
+void Sema::ActOnOpenMPDeclareReductionInitializerEnd(Decl *D,
+                                                     Expr *Initializer) {
+  auto *DRD = cast<OMPDeclareReductionDecl>(D);
+  DiscardCleanupsInEvaluationContext();
+  PopExpressionEvaluationContext();
 
-  return OMPMapClause::Create(Context, StartLoc, LParenLoc, EndLoc, Vars,
-                              MapTypeModifier, MapType, MapLoc);
+  PopDeclContext();
+  PopFunctionScopeInfo();
+
+  if (Initializer != nullptr)
+    DRD->setInitializer(Initializer);
+  else
+    DRD->setInvalidDecl();
+}
+
+Sema::DeclGroupPtrTy Sema::ActOnOpenMPDeclareReductionDirectiveEnd(
+    Scope *S, DeclGroupPtrTy DeclReductions, bool IsValid) {
+  for (auto *D : DeclReductions.get()) {
+    if (IsValid) {
+      auto *DRD = cast<OMPDeclareReductionDecl>(D);
+      if (S != nullptr)
+        PushOnScopeChains(DRD, S, /*AddToContext=*/false);
+    } else
+      D->setInvalidDecl();
+  }
+  return DeclReductions;
 }
 
 OMPClause *Sema::ActOnOpenMPNumTeamsClause(Expr *NumTeams, 
@@ -8578,3 +11365,377 @@ OMPClause *Sema::ActOnOpenMPHintClause(Expr *Hint, SourceLocation StartLoc,
       OMPHintClause(HintExpr.get(), StartLoc, LParenLoc, EndLoc);
 }
 
+OMPClause *Sema::ActOnOpenMPDistScheduleClause(
+    OpenMPDistScheduleClauseKind Kind, Expr *ChunkSize, SourceLocation StartLoc,
+    SourceLocation LParenLoc, SourceLocation KindLoc, SourceLocation CommaLoc,
+    SourceLocation EndLoc) {
+  if (Kind == OMPC_DIST_SCHEDULE_unknown) {
+    std::string Values;
+    Values += "'";
+    Values += getOpenMPSimpleClauseTypeName(OMPC_dist_schedule, 0);
+    Values += "'";
+    Diag(KindLoc, diag::err_omp_unexpected_clause_value)
+        << Values << getOpenMPClauseName(OMPC_dist_schedule);
+    return nullptr;
+  }
+  Expr *ValExpr = ChunkSize;
+  Stmt *HelperValStmt = nullptr;
+  if (ChunkSize) {
+    if (!ChunkSize->isValueDependent() && !ChunkSize->isTypeDependent() &&
+        !ChunkSize->isInstantiationDependent() &&
+        !ChunkSize->containsUnexpandedParameterPack()) {
+      SourceLocation ChunkSizeLoc = ChunkSize->getLocStart();
+      ExprResult Val =
+          PerformOpenMPImplicitIntegerConversion(ChunkSizeLoc, ChunkSize);
+      if (Val.isInvalid())
+        return nullptr;
+
+      ValExpr = Val.get();
+
+      // OpenMP [2.7.1, Restrictions]
+      //  chunk_size must be a loop invariant integer expression with a positive
+      //  value.
+      llvm::APSInt Result;
+      if (ValExpr->isIntegerConstantExpr(Result, Context)) {
+        if (Result.isSigned() && !Result.isStrictlyPositive()) {
+          Diag(ChunkSizeLoc, diag::err_omp_negative_expression_in_clause)
+              << "dist_schedule" << ChunkSize->getSourceRange();
+          return nullptr;
+        }
+      } else if (isParallelOrTaskRegion(DSAStack->getCurrentDirective()) &&
+                 !CurContext->isDependentContext()) {
+        llvm::MapVector<Expr *, DeclRefExpr *> Captures;
+        ValExpr = tryBuildCapture(*this, ValExpr, Captures).get();
+        HelperValStmt = buildPreInits(Context, Captures);
+      }
+    }
+  }
+
+  return new (Context)
+      OMPDistScheduleClause(StartLoc, LParenLoc, KindLoc, CommaLoc, EndLoc,
+                            Kind, ValExpr, HelperValStmt);
+}
+
+OMPClause *Sema::ActOnOpenMPDefaultmapClause(
+    OpenMPDefaultmapClauseModifier M, OpenMPDefaultmapClauseKind Kind,
+    SourceLocation StartLoc, SourceLocation LParenLoc, SourceLocation MLoc,
+    SourceLocation KindLoc, SourceLocation EndLoc) {
+  // OpenMP 4.5 only supports 'defaultmap(tofrom: scalar)'
+  if (M != OMPC_DEFAULTMAP_MODIFIER_tofrom ||
+      Kind != OMPC_DEFAULTMAP_scalar) {
+    std::string Value;
+    SourceLocation Loc;
+    Value += "'";
+    if (M != OMPC_DEFAULTMAP_MODIFIER_tofrom) {
+      Value += getOpenMPSimpleClauseTypeName(OMPC_defaultmap,
+                 OMPC_DEFAULTMAP_MODIFIER_tofrom);
+      Loc = MLoc;
+    } else {
+      Value += getOpenMPSimpleClauseTypeName(OMPC_defaultmap,
+                 OMPC_DEFAULTMAP_scalar);
+      Loc = KindLoc;
+    }
+    Value += "'";
+    Diag(Loc, diag::err_omp_unexpected_clause_value)
+        << Value << getOpenMPClauseName(OMPC_defaultmap);
+    return nullptr;
+  }
+
+  return new (Context)
+      OMPDefaultmapClause(StartLoc, LParenLoc, MLoc, KindLoc, EndLoc, Kind, M);
+}
+
+bool Sema::ActOnStartOpenMPDeclareTargetDirective(SourceLocation Loc) {
+  DeclContext *CurLexicalContext = getCurLexicalContext();
+  if (!CurLexicalContext->isFileContext() &&
+      !CurLexicalContext->isExternCContext() &&
+      !CurLexicalContext->isExternCXXContext()) {
+    Diag(Loc, diag::err_omp_region_not_file_context);
+    return false;
+  }
+  if (IsInOpenMPDeclareTargetContext) {
+    Diag(Loc, diag::err_omp_enclosed_declare_target);
+    return false;
+  }
+
+  IsInOpenMPDeclareTargetContext = true;
+  return true;
+}
+
+void Sema::ActOnFinishOpenMPDeclareTargetDirective() {
+  assert(IsInOpenMPDeclareTargetContext &&
+         "Unexpected ActOnFinishOpenMPDeclareTargetDirective");
+
+  IsInOpenMPDeclareTargetContext = false;
+}
+
+void
+Sema::ActOnOpenMPDeclareTargetName(Scope *CurScope, CXXScopeSpec &ScopeSpec,
+                                   const DeclarationNameInfo &Id,
+                                   OMPDeclareTargetDeclAttr::MapTypeTy MT,
+                                   NamedDeclSetType &SameDirectiveDecls) {
+  LookupResult Lookup(*this, Id, LookupOrdinaryName);
+  LookupParsedName(Lookup, CurScope, &ScopeSpec, true);
+
+  if (Lookup.isAmbiguous())
+    return;
+  Lookup.suppressDiagnostics();
+
+  if (!Lookup.isSingleResult()) {
+    if (TypoCorrection Corrected =
+            CorrectTypo(Id, LookupOrdinaryName, CurScope, nullptr,
+                        llvm::make_unique<VarOrFuncDeclFilterCCC>(*this),
+                        CTK_ErrorRecovery)) {
+      diagnoseTypo(Corrected, PDiag(diag::err_undeclared_var_use_suggest)
+                                  << Id.getName());
+      checkDeclIsAllowedInOpenMPTarget(nullptr, Corrected.getCorrectionDecl());
+      return;
+    }
+
+    Diag(Id.getLoc(), diag::err_undeclared_var_use) << Id.getName();
+    return;
+  }
+
+  NamedDecl *ND = Lookup.getAsSingle<NamedDecl>();
+  if (isa<VarDecl>(ND) || isa<FunctionDecl>(ND)) {
+    if (!SameDirectiveDecls.insert(cast<NamedDecl>(ND->getCanonicalDecl())))
+      Diag(Id.getLoc(), diag::err_omp_declare_target_multiple) << Id.getName();
+
+    if (!ND->hasAttr<OMPDeclareTargetDeclAttr>()) {
+      Attr *A = OMPDeclareTargetDeclAttr::CreateImplicit(Context, MT);
+      ND->addAttr(A);
+      if (ASTMutationListener *ML = Context.getASTMutationListener())
+        ML->DeclarationMarkedOpenMPDeclareTarget(ND, A);
+      checkDeclIsAllowedInOpenMPTarget(nullptr, ND);
+    } else if (ND->getAttr<OMPDeclareTargetDeclAttr>()->getMapType() != MT) {
+      Diag(Id.getLoc(), diag::err_omp_declare_target_to_and_link)
+          << Id.getName();
+    }
+  } else
+    Diag(Id.getLoc(), diag::err_omp_invalid_target_decl) << Id.getName();
+}
+
+static void checkDeclInTargetContext(SourceLocation SL, SourceRange SR,
+                                     Sema &SemaRef, Decl *D) {
+  if (!D)
+    return;
+  Decl *LD = nullptr;
+  if (isa<TagDecl>(D)) {
+    LD = cast<TagDecl>(D)->getDefinition();
+  } else if (isa<VarDecl>(D)) {
+    LD = cast<VarDecl>(D)->getDefinition();
+
+    // If this is an implicit variable that is legal and we do not need to do
+    // anything.
+    if (cast<VarDecl>(D)->isImplicit()) {
+      Attr *A = OMPDeclareTargetDeclAttr::CreateImplicit(
+          SemaRef.Context, OMPDeclareTargetDeclAttr::MT_To);
+      D->addAttr(A);
+      if (ASTMutationListener *ML = SemaRef.Context.getASTMutationListener())
+        ML->DeclarationMarkedOpenMPDeclareTarget(D, A);
+      return;
+    }
+
+  } else if (isa<FunctionDecl>(D)) {
+    const FunctionDecl *FD = nullptr;
+    if (cast<FunctionDecl>(D)->hasBody(FD))
+      LD = const_cast<FunctionDecl *>(FD);
+
+    // If the definition is associated with the current declaration in the
+    // target region (it can be e.g. a lambda) that is legal and we do not need
+    // to do anything else.
+    if (LD == D) {
+      Attr *A = OMPDeclareTargetDeclAttr::CreateImplicit(
+          SemaRef.Context, OMPDeclareTargetDeclAttr::MT_To);
+      D->addAttr(A);
+      if (ASTMutationListener *ML = SemaRef.Context.getASTMutationListener())
+        ML->DeclarationMarkedOpenMPDeclareTarget(D, A);
+      return;
+    }
+  }
+  if (!LD)
+    LD = D;
+  if (LD && !LD->hasAttr<OMPDeclareTargetDeclAttr>() &&
+      (isa<VarDecl>(LD) || isa<FunctionDecl>(LD))) {
+    // Outlined declaration is not declared target.
+    if (LD->isOutOfLine()) {
+      SemaRef.Diag(LD->getLocation(), diag::warn_omp_not_in_target_context);
+      SemaRef.Diag(SL, diag::note_used_here) << SR;
+    } else {
+      DeclContext *DC = LD->getDeclContext();
+      while (DC) {
+        if (isa<FunctionDecl>(DC) &&
+            cast<FunctionDecl>(DC)->hasAttr<OMPDeclareTargetDeclAttr>())
+          break;
+        DC = DC->getParent();
+      }
+      if (DC)
+        return;
+
+      // Is not declared in target context.
+      SemaRef.Diag(LD->getLocation(), diag::warn_omp_not_in_target_context);
+      SemaRef.Diag(SL, diag::note_used_here) << SR;
+    }
+    // Mark decl as declared target to prevent further diagnostic.
+    Attr *A = OMPDeclareTargetDeclAttr::CreateImplicit(
+        SemaRef.Context, OMPDeclareTargetDeclAttr::MT_To);
+    D->addAttr(A);
+    if (ASTMutationListener *ML = SemaRef.Context.getASTMutationListener())
+      ML->DeclarationMarkedOpenMPDeclareTarget(D, A);
+  }
+}
+
+static bool checkValueDeclInTarget(SourceLocation SL, SourceRange SR,
+                                   Sema &SemaRef, DSAStackTy *Stack,
+                                   ValueDecl *VD) {
+  if (VD->hasAttr<OMPDeclareTargetDeclAttr>())
+    return true;
+  if (!CheckTypeMappable(SL, SR, SemaRef, Stack, VD->getType()))
+    return false;
+  return true;
+}
+
+void Sema::checkDeclIsAllowedInOpenMPTarget(Expr *E, Decl *D) {
+  if (!D || D->isInvalidDecl())
+    return;
+  SourceRange SR = E ? E->getSourceRange() : D->getSourceRange();
+  SourceLocation SL = E ? E->getLocStart() : D->getLocation();
+  // 2.10.6: threadprivate variable cannot appear in a declare target directive.
+  if (VarDecl *VD = dyn_cast<VarDecl>(D)) {
+    if (DSAStack->isThreadPrivate(VD)) {
+      Diag(SL, diag::err_omp_threadprivate_in_target);
+      ReportOriginalDSA(*this, DSAStack, VD, DSAStack->getTopDSA(VD, false));
+      return;
+    }
+  }
+  if (ValueDecl *VD = dyn_cast<ValueDecl>(D)) {
+    // Problem if any with var declared with incomplete type will be reported
+    // as normal, so no need to check it here.
+    if ((E || !VD->getType()->isIncompleteType()) &&
+        !checkValueDeclInTarget(SL, SR, *this, DSAStack, VD)) {
+      // Mark decl as declared target to prevent further diagnostic.
+      if (isa<VarDecl>(VD) || isa<FunctionDecl>(VD)) {
+        Attr *A = OMPDeclareTargetDeclAttr::CreateImplicit(
+            Context, OMPDeclareTargetDeclAttr::MT_To);
+        VD->addAttr(A);
+        if (ASTMutationListener *ML = Context.getASTMutationListener())
+          ML->DeclarationMarkedOpenMPDeclareTarget(VD, A);
+      }
+      return;
+    }
+  }
+  if (!E) {
+    // Checking declaration inside declare target region.
+    if (!D->hasAttr<OMPDeclareTargetDeclAttr>() &&
+        (isa<VarDecl>(D) || isa<FunctionDecl>(D))) {
+      Attr *A = OMPDeclareTargetDeclAttr::CreateImplicit(
+          Context, OMPDeclareTargetDeclAttr::MT_To);
+      D->addAttr(A);
+      if (ASTMutationListener *ML = Context.getASTMutationListener())
+        ML->DeclarationMarkedOpenMPDeclareTarget(D, A);
+    }
+    return;
+  }
+  checkDeclInTargetContext(E->getExprLoc(), E->getSourceRange(), *this, D);
+}
+
+OMPClause *Sema::ActOnOpenMPToClause(ArrayRef<Expr *> VarList,
+                                     SourceLocation StartLoc,
+                                     SourceLocation LParenLoc,
+                                     SourceLocation EndLoc) {
+  MappableVarListInfo MVLI(VarList);
+  checkMappableExpressionList(*this, DSAStack, OMPC_to, MVLI, StartLoc);
+  if (MVLI.ProcessedVarList.empty())
+    return nullptr;
+
+  return OMPToClause::Create(Context, StartLoc, LParenLoc, EndLoc,
+                             MVLI.ProcessedVarList, MVLI.VarBaseDeclarations,
+                             MVLI.VarComponents);
+}
+
+OMPClause *Sema::ActOnOpenMPFromClause(ArrayRef<Expr *> VarList,
+                                       SourceLocation StartLoc,
+                                       SourceLocation LParenLoc,
+                                       SourceLocation EndLoc) {
+  MappableVarListInfo MVLI(VarList);
+  checkMappableExpressionList(*this, DSAStack, OMPC_from, MVLI, StartLoc);
+  if (MVLI.ProcessedVarList.empty())
+    return nullptr;
+
+  return OMPFromClause::Create(Context, StartLoc, LParenLoc, EndLoc,
+                               MVLI.ProcessedVarList, MVLI.VarBaseDeclarations,
+                               MVLI.VarComponents);
+}
+
+OMPClause *Sema::ActOnOpenMPUseDevicePtrClause(ArrayRef<Expr *> VarList,
+                                               SourceLocation StartLoc,
+                                               SourceLocation LParenLoc,
+                                               SourceLocation EndLoc) {
+  SmallVector<Expr *, 8> Vars;
+  for (auto &RefExpr : VarList) {
+    assert(RefExpr && "NULL expr in OpenMP use_device_ptr clause.");
+    SourceLocation ELoc;
+    SourceRange ERange;
+    Expr *SimpleRefExpr = RefExpr;
+    auto Res = getPrivateItem(*this, SimpleRefExpr, ELoc, ERange);
+    if (Res.second) {
+      // It will be analyzed later.
+      Vars.push_back(RefExpr);
+    }
+    ValueDecl *D = Res.first;
+    if (!D)
+      continue;
+
+    QualType Type = D->getType();
+    // item should be a pointer or reference to pointer
+    if (!Type.getNonReferenceType()->isPointerType()) {
+      Diag(ELoc, diag::err_omp_usedeviceptr_not_a_pointer)
+          << 0 << RefExpr->getSourceRange();
+      continue;
+    }
+    Vars.push_back(RefExpr->IgnoreParens());
+  }
+
+  if (Vars.empty())
+    return nullptr;
+
+  return OMPUseDevicePtrClause::Create(Context, StartLoc, LParenLoc, EndLoc,
+                                       Vars);
+}
+
+OMPClause *Sema::ActOnOpenMPIsDevicePtrClause(ArrayRef<Expr *> VarList,
+                                              SourceLocation StartLoc,
+                                              SourceLocation LParenLoc,
+                                              SourceLocation EndLoc) {
+  SmallVector<Expr *, 8> Vars;
+  for (auto &RefExpr : VarList) {
+    assert(RefExpr && "NULL expr in OpenMP use_device_ptr clause.");
+    SourceLocation ELoc;
+    SourceRange ERange;
+    Expr *SimpleRefExpr = RefExpr;
+    auto Res = getPrivateItem(*this, SimpleRefExpr, ELoc, ERange);
+    if (Res.second) {
+      // It will be analyzed later.
+      Vars.push_back(RefExpr);
+    }
+    ValueDecl *D = Res.first;
+    if (!D)
+      continue;
+
+    QualType Type = D->getType();
+    // item should be a pointer or array or reference to pointer or array
+    if (!Type.getNonReferenceType()->isPointerType() &&
+        !Type.getNonReferenceType()->isArrayType()) {
+      Diag(ELoc, diag::err_omp_argument_type_isdeviceptr)
+          << 0 << RefExpr->getSourceRange();
+      continue;
+    }
+    Vars.push_back(RefExpr->IgnoreParens());
+  }
+
+  if (Vars.empty())
+    return nullptr;
+
+  return OMPIsDevicePtrClause::Create(Context, StartLoc, LParenLoc, EndLoc,
+                                      Vars);
+}
diff --git a/contrib/llvm/tools/clang/lib/Sema/SemaOverload.cpp b/contrib/llvm/tools/clang/lib/Sema/SemaOverload.cpp
index d6a0ff7dc3d1..40d6e910f1fb 100644
--- a/contrib/llvm/tools/clang/lib/Sema/SemaOverload.cpp
+++ b/contrib/llvm/tools/clang/lib/Sema/SemaOverload.cpp
@@ -39,8 +39,8 @@ using namespace clang;
 using namespace sema;
 
 static bool functionHasPassObjectSizeParams(const FunctionDecl *FD) {
-  return std::any_of(FD->param_begin(), FD->param_end(),
-                     std::mem_fn(&ParmVarDecl::hasAttr<PassObjectSizeAttr>));
+  return llvm::any_of(FD->parameters(),
+                      std::mem_fn(&ParmVarDecl::hasAttr<PassObjectSizeAttr>));
 }
 
 /// A convenience routine for creating a decayed reference to a function.
@@ -293,6 +293,13 @@ StandardConversionSequence::getNarrowingKind(ASTContext &Ctx,
   //   A narrowing conversion is an implicit conversion ...
   QualType FromType = getToType(0);
   QualType ToType = getToType(1);
+
+  // A conversion to an enumeration type is narrowing if the conversion to
+  // the underlying type is narrowing. This only arises for expressions of
+  // the form 'Enum{init}'.
+  if (auto *ET = ToType->getAs<EnumType>())
+    ToType = ET->getDecl()->getIntegerType();
+
   switch (Second) {
   // 'bool' is an integral type; dispatch to the right place to handle it.
   case ICK_Boolean_Conversion:
@@ -433,7 +440,7 @@ StandardConversionSequence::getNarrowingKind(ASTContext &Ctx,
 
 /// dump - Print this standard conversion sequence to standard
 /// error. Useful for debugging overloading issues.
-void StandardConversionSequence::dump() const {
+LLVM_DUMP_METHOD void StandardConversionSequence::dump() const {
   raw_ostream &OS = llvm::errs();
   bool PrintedSomething = false;
   if (First != ICK_Identity) {
@@ -985,7 +992,7 @@ Sema::CheckOverload(Scope *S, FunctionDecl *New, const LookupResult &Old,
 }
 
 bool Sema::IsOverload(FunctionDecl *New, FunctionDecl *Old,
-                      bool UseUsingDeclRules) {
+                      bool UseMemberUsingDeclRules, bool ConsiderCudaAttrs) {
   // C++ [basic.start.main]p2: This function shall not be overloaded.
   if (New->isMain())
     return false;
@@ -1041,7 +1048,7 @@ bool Sema::IsOverload(FunctionDecl *New, FunctionDecl *Old,
   //
   // However, we don't consider either of these when deciding whether
   // a member introduced by a shadow declaration is hidden.
-  if (!UseUsingDeclRules && NewTemplate &&
+  if (!UseMemberUsingDeclRules && NewTemplate &&
       (!TemplateParameterListsAreEqual(NewTemplate->getTemplateParameters(),
                                        OldTemplate->getTemplateParameters(),
                                        false, TPL_TemplateMatch) ||
@@ -1061,7 +1068,7 @@ bool Sema::IsOverload(FunctionDecl *New, FunctionDecl *Old,
   if (OldMethod && NewMethod &&
       !OldMethod->isStatic() && !NewMethod->isStatic()) {
     if (OldMethod->getRefQualifier() != NewMethod->getRefQualifier()) {
-      if (!UseUsingDeclRules &&
+      if (!UseMemberUsingDeclRules &&
           (OldMethod->getRefQualifier() == RQ_None ||
            NewMethod->getRefQualifier() == RQ_None)) {
         // C++0x [over.load]p2:
@@ -1118,7 +1125,7 @@ bool Sema::IsOverload(FunctionDecl *New, FunctionDecl *Old,
       return true;
   }
 
-  if (getLangOpts().CUDA && getLangOpts().CUDATargetOverloads) {
+  if (getLangOpts().CUDA && ConsiderCudaAttrs) {
     CUDAFunctionTarget NewTarget = IdentifyCUDATarget(New),
                        OldTarget = IdentifyCUDATarget(Old);
     if (NewTarget == CFT_InvalidTarget || NewTarget == CFT_Global)
@@ -1129,7 +1136,10 @@ bool Sema::IsOverload(FunctionDecl *New, FunctionDecl *Old,
     // Don't allow mixing of HD with other kinds. This guarantees that
     // we have only one viable function with this signature on any
     // side of CUDA compilation .
-    if ((NewTarget == CFT_HostDevice) || (OldTarget == CFT_HostDevice))
+    // __global__ functions can't be overloaded based on attribute
+    // difference because, like HD, they also exist on both sides.
+    if ((NewTarget == CFT_HostDevice) || (OldTarget == CFT_HostDevice) ||
+        (NewTarget == CFT_Global) || (OldTarget == CFT_Global))
       return false;
 
     // Allow overloading of functions with same signature, but
@@ -1147,7 +1157,16 @@ bool Sema::IsOverload(FunctionDecl *New, FunctionDecl *Old,
 /// \returns true if \arg FD is unavailable and current context is inside
 /// an available function, false otherwise.
 bool Sema::isFunctionConsideredUnavailable(FunctionDecl *FD) {
-  return FD->isUnavailable() && !cast<Decl>(CurContext)->isUnavailable();
+  if (!FD->isUnavailable())
+    return false;
+
+  // Walk up the context of the caller.
+  Decl *C = cast<Decl>(CurContext);
+  do {
+    if (C->isUnavailable())
+      return false;
+  } while ((C = cast_or_null<Decl>(C->getDeclContext())));
+  return true;
 }
 
 /// \brief Tries a user-defined conversion from From to ToType.
@@ -1199,11 +1218,13 @@ TryUserDefinedConversion(Sema &S, Expr *From, QualType ToType,
            S.IsDerivedFrom(From->getLocStart(), FromCanon, ToCanon))) {
         // Turn this into a "standard" conversion sequence, so that it
         // gets ranked with standard conversion sequences.
+        DeclAccessPair Found = ICS.UserDefined.FoundConversionFunction;
         ICS.setStandard();
         ICS.Standard.setAsIdentityConversion();
         ICS.Standard.setFromType(From->getType());
         ICS.Standard.setAllToTypes(ToType);
         ICS.Standard.CopyConstructor = Constructor;
+        ICS.Standard.FoundCopyConstructor = Found;
         if (ToCanon != FromCanon)
           ICS.Standard.Second = ICK_Derived_To_Base;
       }
@@ -1217,7 +1238,7 @@ TryUserDefinedConversion(Sema &S, Expr *From, QualType ToType,
     for (OverloadCandidateSet::iterator Cand = Conversions.begin();
          Cand != Conversions.end(); ++Cand)
       if (Cand->Viable)
-        ICS.Ambiguous.addConversion(Cand->Function);
+        ICS.Ambiguous.addConversion(Cand->FoundDecl, Cand->Function);
     break;
 
     // Fall through.
@@ -1652,6 +1673,20 @@ static bool IsStandardConversion(Sema &S, Expr* From, QualType ToType,
     SCS.Second = ICK_Complex_Real;
     FromType = ToType.getUnqualifiedType();
   } else if (FromType->isRealFloatingType() && ToType->isRealFloatingType()) {
+    // FIXME: disable conversions between long double and __float128 if
+    // their representation is different until there is back end support
+    // We of course allow this conversion if long double is really double.
+    if (&S.Context.getFloatTypeSemantics(FromType) !=
+        &S.Context.getFloatTypeSemantics(ToType)) {
+      bool Float128AndLongDouble = ((FromType == S.Context.Float128Ty &&
+                                    ToType == S.Context.LongDoubleTy) ||
+                                   (FromType == S.Context.LongDoubleTy &&
+                                    ToType == S.Context.Float128Ty));
+      if (Float128AndLongDouble &&
+          (&S.Context.getFloatTypeSemantics(S.Context.LongDoubleTy) !=
+           &llvm::APFloat::IEEEdouble))
+        return false;
+    }
     // Floating point conversions (C++ 4.8).
     SCS.Second = ICK_Floating_Conversion;
     FromType = ToType.getUnqualifiedType();
@@ -1809,8 +1844,7 @@ bool Sema::IsIntegralPromotion(Expr *From, QualType FromType, QualType ToType) {
         (FromType->isSignedIntegerType() ||
          // We can promote any unsigned integer type whose size is
          // less than int to an int.
-         (!FromType->isSignedIntegerType() &&
-          Context.getTypeSize(FromType) < Context.getTypeSize(ToType)))) {
+         Context.getTypeSize(FromType) < Context.getTypeSize(ToType))) {
       return To->getKind() == BuiltinType::Int;
     }
 
@@ -1955,7 +1989,8 @@ bool Sema::IsFloatingPointPromotion(QualType FromType, QualType ToType) {
       if (!getLangOpts().CPlusPlus &&
           (FromBuiltin->getKind() == BuiltinType::Float ||
            FromBuiltin->getKind() == BuiltinType::Double) &&
-          (ToBuiltin->getKind() == BuiltinType::LongDouble))
+          (ToBuiltin->getKind() == BuiltinType::LongDouble ||
+           ToBuiltin->getKind() == BuiltinType::Float128))
         return true;
 
       // Half can be promoted to float.
@@ -2538,9 +2573,8 @@ bool Sema::IsBlockPointerConversion(QualType FromType, QualType ToType,
        // Argument types are too different. Abort.
        return false;
    }
-   if (LangOpts.ObjCAutoRefCount && 
-       !Context.FunctionTypesMatchOnNSConsumedAttrs(FromFunctionType, 
-                                                    ToFunctionType))
+   if (!Context.doFunctionTypesMatchOnExtParameterInfos(FromFunctionType,
+                                                        ToFunctionType))
      return false;
    
    ConvertedType = ToType;
@@ -2919,6 +2953,10 @@ Sema::IsQualificationConversion(QualType FromType, QualType ToType,
 
     Qualifiers FromQuals = FromType.getQualifiers();
     Qualifiers ToQuals = ToType.getQualifiers();
+
+    // Ignore __unaligned qualifier if this type is void.
+    if (ToType.getUnqualifiedType()->isVoidType())
+      FromQuals.removeUnaligned();
     
     // Objective-C ARC:
     //   Check Objective-C lifetime conversions.
@@ -3015,39 +3053,26 @@ IsInitializerListConstructorConversion(Sema &S, Expr *From, QualType ToType,
                                        UserDefinedConversionSequence &User,
                                        OverloadCandidateSet &CandidateSet,
                                        bool AllowExplicit) {
-  DeclContext::lookup_result R = S.LookupConstructors(To);
-  for (DeclContext::lookup_iterator Con = R.begin(), ConEnd = R.end();
-       Con != ConEnd; ++Con) {
-    NamedDecl *D = *Con;
-    DeclAccessPair FoundDecl = DeclAccessPair::make(D, D->getAccess());
-
-    // Find the constructor (which may be a template).
-    CXXConstructorDecl *Constructor = nullptr;
-    FunctionTemplateDecl *ConstructorTmpl
-      = dyn_cast<FunctionTemplateDecl>(D);
-    if (ConstructorTmpl)
-      Constructor
-        = cast<CXXConstructorDecl>(ConstructorTmpl->getTemplatedDecl());
-    else
-      Constructor = cast<CXXConstructorDecl>(D);
+  for (auto *D : S.LookupConstructors(To)) {
+    auto Info = getConstructorInfo(D);
+    if (!Info)
+      continue;
 
-    bool Usable = !Constructor->isInvalidDecl() &&
-                  S.isInitListConstructor(Constructor) &&
-                  (AllowExplicit || !Constructor->isExplicit());
+    bool Usable = !Info.Constructor->isInvalidDecl() &&
+                  S.isInitListConstructor(Info.Constructor) &&
+                  (AllowExplicit || !Info.Constructor->isExplicit());
     if (Usable) {
       // If the first argument is (a reference to) the target type,
       // suppress conversions.
-      bool SuppressUserConversions =
-          isFirstArgumentCompatibleWithType(S.Context, Constructor, ToType);
-      if (ConstructorTmpl)
-        S.AddTemplateOverloadCandidate(ConstructorTmpl, FoundDecl,
-                                       /*ExplicitArgs*/ nullptr,
-                                       From, CandidateSet,
-                                       SuppressUserConversions);
+      bool SuppressUserConversions = isFirstArgumentCompatibleWithType(
+          S.Context, Info.Constructor, ToType);
+      if (Info.ConstructorTmpl)
+        S.AddTemplateOverloadCandidate(Info.ConstructorTmpl, Info.FoundDecl,
+                                       /*ExplicitArgs*/ nullptr, From,
+                                       CandidateSet, SuppressUserConversions);
       else
-        S.AddOverloadCandidate(Constructor, FoundDecl,
-                               From, CandidateSet,
-                               SuppressUserConversions);
+        S.AddOverloadCandidate(Info.Constructor, Info.FoundDecl, From,
+                               CandidateSet, SuppressUserConversions);
     }
   }
 
@@ -3147,27 +3172,17 @@ IsUserDefinedConversion(Sema &S, Expr *From, QualType ToType,
         ListInitializing = true;
       }
 
-      DeclContext::lookup_result R = S.LookupConstructors(ToRecordDecl);
-      for (DeclContext::lookup_iterator Con = R.begin(), ConEnd = R.end();
-           Con != ConEnd; ++Con) {
-        NamedDecl *D = *Con;
-        DeclAccessPair FoundDecl = DeclAccessPair::make(D, D->getAccess());
-
-        // Find the constructor (which may be a template).
-        CXXConstructorDecl *Constructor = nullptr;
-        FunctionTemplateDecl *ConstructorTmpl
-          = dyn_cast<FunctionTemplateDecl>(D);
-        if (ConstructorTmpl)
-          Constructor
-            = cast<CXXConstructorDecl>(ConstructorTmpl->getTemplatedDecl());
-        else
-          Constructor = cast<CXXConstructorDecl>(D);
+      for (auto *D : S.LookupConstructors(ToRecordDecl)) {
+        auto Info = getConstructorInfo(D);
+        if (!Info)
+          continue;
 
-        bool Usable = !Constructor->isInvalidDecl();
+        bool Usable = !Info.Constructor->isInvalidDecl();
         if (ListInitializing)
-          Usable = Usable && (AllowExplicit || !Constructor->isExplicit());
+          Usable = Usable && (AllowExplicit || !Info.Constructor->isExplicit());
         else
-          Usable = Usable &&Constructor->isConvertingConstructor(AllowExplicit);
+          Usable = Usable &&
+                   Info.Constructor->isConvertingConstructor(AllowExplicit);
         if (Usable) {
           bool SuppressUserConversions = !ConstructorsOnly;
           if (SuppressUserConversions && ListInitializing) {
@@ -3176,18 +3191,18 @@ IsUserDefinedConversion(Sema &S, Expr *From, QualType ToType,
               // If the first argument is (a reference to) the target type,
               // suppress conversions.
               SuppressUserConversions = isFirstArgumentCompatibleWithType(
-                                                S.Context, Constructor, ToType);
+                  S.Context, Info.Constructor, ToType);
             }
           }
-          if (ConstructorTmpl)
-            S.AddTemplateOverloadCandidate(ConstructorTmpl, FoundDecl,
-                                           /*ExplicitArgs*/ nullptr,
-                                           llvm::makeArrayRef(Args, NumArgs),
-                                           CandidateSet, SuppressUserConversions);
+          if (Info.ConstructorTmpl)
+            S.AddTemplateOverloadCandidate(
+                Info.ConstructorTmpl, Info.FoundDecl,
+                /*ExplicitArgs*/ nullptr, llvm::makeArrayRef(Args, NumArgs),
+                CandidateSet, SuppressUserConversions);
           else
             // Allow one user-defined conversion when user specifies a
             // From->ToType conversion via an static cast (c-style, etc).
-            S.AddOverloadCandidate(Constructor, FoundDecl,
+            S.AddOverloadCandidate(Info.Constructor, Info.FoundDecl,
                                    llvm::makeArrayRef(Args, NumArgs),
                                    CandidateSet, SuppressUserConversions);
         }
@@ -4127,6 +4142,10 @@ Sema::CompareReferenceRelationship(SourceLocation Loc,
     T2Quals.removeObjCLifetime();    
   }
     
+  // MS compiler ignores __unaligned qualifier for references; do the same.
+  T1Quals.removeUnaligned();
+  T2Quals.removeUnaligned();
+
   if (T1Quals == T2Quals)
     return Ref_Compatible;
   else if (T1Quals.compatiblyIncludes(T2Quals))
@@ -4248,7 +4267,7 @@ FindConversionForRefInit(Sema &S, ImplicitConversionSequence &ICS,
     for (OverloadCandidateSet::iterator Cand = CandidateSet.begin();
          Cand != CandidateSet.end(); ++Cand)
       if (Cand->Viable)
-        ICS.Ambiguous.addConversion(Cand->Function);
+        ICS.Ambiguous.addConversion(Cand->FoundDecl, Cand->Function);
     return true;
 
   case OR_No_Viable_Function:
@@ -4448,13 +4467,16 @@ TryReferenceInit(Sema &S, Expr *Init, QualType DeclType,
     // initialization fails.
     //
     // Note that we only want to check address spaces and cvr-qualifiers here.
-    // ObjC GC and lifetime qualifiers aren't important.
+    // ObjC GC, lifetime and unaligned qualifiers aren't important.
     Qualifiers T1Quals = T1.getQualifiers();
     Qualifiers T2Quals = T2.getQualifiers();
     T1Quals.removeObjCGCAttr();
     T1Quals.removeObjCLifetime();
     T2Quals.removeObjCGCAttr();
     T2Quals.removeObjCLifetime();
+    // MS compiler ignores __unaligned qualifier for references; do the same.
+    T1Quals.removeUnaligned();
+    T2Quals.removeUnaligned();
     if (!T1Quals.compatiblyIncludes(T2Quals))
       return ICS;
   }
@@ -5838,12 +5860,12 @@ Sema::AddOverloadCandidate(FunctionDecl *Function,
   }
 }
 
-ObjCMethodDecl *Sema::SelectBestMethod(Selector Sel, MultiExprArg Args,
-                                       bool IsInstance) {
-  SmallVector<ObjCMethodDecl*, 4> Methods;
-  if (!CollectMultipleMethodsInGlobalPool(Sel, Methods, IsInstance))
+ObjCMethodDecl *
+Sema::SelectBestMethod(Selector Sel, MultiExprArg Args, bool IsInstance,
+                       SmallVectorImpl<ObjCMethodDecl *> &Methods) {
+  if (Methods.size() <= 1)
     return nullptr;
-    
+
   for (unsigned b = 0, e = Methods.size(); b < e; b++) {
     bool Match = true;
     ObjCMethodDecl *Method = Methods[b];
@@ -5952,37 +5974,32 @@ EnableIfAttr *Sema::CheckEnableIf(FunctionDecl *Function, ArrayRef<Expr *> Args,
   SFINAETrap Trap(*this);
   SmallVector<Expr *, 16> ConvertedArgs;
   bool InitializationFailed = false;
-  bool ContainsValueDependentExpr = false;
+
+  // Ignore any variadic parameters. Converting them is pointless, since the
+  // user can't refer to them in the enable_if condition.
+  unsigned ArgSizeNoVarargs = std::min(Function->param_size(), Args.size());
 
   // Convert the arguments.
-  for (unsigned i = 0, e = Args.size(); i != e; ++i) {
-    if (i == 0 && !MissingImplicitThis && isa<CXXMethodDecl>(Function) &&
+  for (unsigned I = 0; I != ArgSizeNoVarargs; ++I) {
+    ExprResult R;
+    if (I == 0 && !MissingImplicitThis && isa<CXXMethodDecl>(Function) &&
         !cast<CXXMethodDecl>(Function)->isStatic() &&
         !isa<CXXConstructorDecl>(Function)) {
       CXXMethodDecl *Method = cast<CXXMethodDecl>(Function);
-      ExprResult R =
-        PerformObjectArgumentInitialization(Args[0], /*Qualifier=*/nullptr,
-                                            Method, Method);
-      if (R.isInvalid()) {
-        InitializationFailed = true;
-        break;
-      }
-      ContainsValueDependentExpr |= R.get()->isValueDependent();
-      ConvertedArgs.push_back(R.get());
+      R = PerformObjectArgumentInitialization(Args[0], /*Qualifier=*/nullptr,
+                                              Method, Method);
     } else {
-      ExprResult R =
-        PerformCopyInitialization(InitializedEntity::InitializeParameter(
-                                                Context,
-                                                Function->getParamDecl(i)),
-                                  SourceLocation(),
-                                  Args[i]);
-      if (R.isInvalid()) {
-        InitializationFailed = true;
-        break;
-      }
-      ContainsValueDependentExpr |= R.get()->isValueDependent();
-      ConvertedArgs.push_back(R.get());
+      R = PerformCopyInitialization(InitializedEntity::InitializeParameter(
+                                        Context, Function->getParamDecl(I)),
+                                    SourceLocation(), Args[I]);
+    }
+
+    if (R.isInvalid()) {
+      InitializationFailed = true;
+      break;
     }
+
+    ConvertedArgs.push_back(R.get());
   }
 
   if (InitializationFailed || Trap.hasErrorOccurred())
@@ -6002,7 +6019,6 @@ EnableIfAttr *Sema::CheckEnableIf(FunctionDecl *Function, ArrayRef<Expr *> Args,
         InitializationFailed = true;
         break;
       }
-      ContainsValueDependentExpr |= R.get()->isValueDependent();
       ConvertedArgs.push_back(R.get());
     }
 
@@ -6012,18 +6028,14 @@ EnableIfAttr *Sema::CheckEnableIf(FunctionDecl *Function, ArrayRef<Expr *> Args,
 
   for (auto *EIA : EnableIfAttrs) {
     APValue Result;
-    if (EIA->getCond()->isValueDependent()) {
-      // Don't even try now, we'll examine it after instantiation.
-      continue;
-    }
-
+    // FIXME: This doesn't consider value-dependent cases, because doing so is
+    // very difficult. Ideally, we should handle them more gracefully.
     if (!EIA->getCond()->EvaluateWithSubstitution(
-            Result, Context, Function, llvm::makeArrayRef(ConvertedArgs))) {
-      if (!ContainsValueDependentExpr)
-        return EIA;
-    } else if (!Result.isInt() || !Result.getInt().getBoolValue()) {
+            Result, Context, Function, llvm::makeArrayRef(ConvertedArgs)))
+      return EIA;
+
+    if (!Result.isInt() || !Result.getInt().getBoolValue())
       return EIA;
-    }
   }
   return nullptr;
 }
@@ -6814,7 +6826,8 @@ namespace {
 /// enumeration types.
 class BuiltinCandidateTypeSet  {
   /// TypeSet - A set of types.
-  typedef llvm::SmallPtrSet<QualType, 8> TypeSet;
+  typedef llvm::SetVector<QualType, SmallVector<QualType, 8>,
+                          llvm::SmallPtrSet<QualType, 8>> TypeSet;
 
   /// PointerTypes - The set of pointer types that will be used in the
   /// built-in candidates.
@@ -6913,7 +6926,7 @@ BuiltinCandidateTypeSet::AddPointerWithMoreQualifiedTypeVariants(QualType Ty,
                                              const Qualifiers &VisibleQuals) {
 
   // Insert this type.
-  if (!PointerTypes.insert(Ty).second)
+  if (!PointerTypes.insert(Ty))
     return false;
 
   QualType PointeeTy;
@@ -6981,7 +6994,7 @@ bool
 BuiltinCandidateTypeSet::AddMemberPointerWithMoreQualifiedTypeVariants(
     QualType Ty) {
   // Insert this type.
-  if (!MemberPointerTypes.insert(Ty).second)
+  if (!MemberPointerTypes.insert(Ty))
     return false;
 
   const MemberPointerType *PointerTy = Ty->getAs<MemberPointerType>();
@@ -7187,13 +7200,13 @@ class BuiltinOperatorOverloadBuilder {
   // provided via the getArithmeticType() method below.
   // The "promoted arithmetic types" are the arithmetic
   // types are that preserved by promotion (C++ [over.built]p2).
-  static const unsigned FirstIntegralType = 3;
-  static const unsigned LastIntegralType = 20;
-  static const unsigned FirstPromotedIntegralType = 3,
-                        LastPromotedIntegralType = 11;
+  static const unsigned FirstIntegralType = 4;
+  static const unsigned LastIntegralType = 21;
+  static const unsigned FirstPromotedIntegralType = 4,
+                        LastPromotedIntegralType = 12;
   static const unsigned FirstPromotedArithmeticType = 0,
-                        LastPromotedArithmeticType = 11;
-  static const unsigned NumArithmeticTypes = 20;
+                        LastPromotedArithmeticType = 12;
+  static const unsigned NumArithmeticTypes = 21;
 
   /// \brief Get the canonical type for a given arithmetic type index.
   CanQualType getArithmeticType(unsigned index) {
@@ -7204,6 +7217,7 @@ class BuiltinOperatorOverloadBuilder {
       &ASTContext::FloatTy,
       &ASTContext::DoubleTy,
       &ASTContext::LongDoubleTy,
+      &ASTContext::Float128Ty,
 
       // Start of integral types.
       &ASTContext::IntTy,
@@ -7246,7 +7260,7 @@ class BuiltinOperatorOverloadBuilder {
     // (we could precompute SLL x UI for all known platforms, but it's
     // better not to make any assumptions).
     // We assume that int128 has a higher rank than long long on all platforms.
-    enum PromotedType {
+    enum PromotedType : int8_t {
             Dep=-1,
             Flt,  Dbl, LDbl,   SI,   SL,  SLL, S128,   UI,   UL,  ULL, U128
     };
@@ -8476,16 +8490,31 @@ Sema::AddArgumentDependentLookupCandidates(DeclarationName Name,
   }
 }
 
-// Determines whether Cand1 is "better" in terms of its enable_if attrs than
-// Cand2 for overloading. This function assumes that all of the enable_if attrs
-// on Cand1 and Cand2 have conditions that evaluate to true.
-//
-// Cand1's set of enable_if attributes are said to be "better" than Cand2's iff
-// Cand1's first N enable_if attributes have precisely the same conditions as
-// Cand2's first N enable_if attributes (where N = the number of enable_if
-// attributes on Cand2), and Cand1 has more than N enable_if attributes.
-static bool hasBetterEnableIfAttrs(Sema &S, const FunctionDecl *Cand1,
-                                   const FunctionDecl *Cand2) {
+namespace {
+enum class Comparison { Equal, Better, Worse };
+}
+
+/// Compares the enable_if attributes of two FunctionDecls, for the purposes of
+/// overload resolution.
+///
+/// Cand1's set of enable_if attributes are said to be "better" than Cand2's iff
+/// Cand1's first N enable_if attributes have precisely the same conditions as
+/// Cand2's first N enable_if attributes (where N = the number of enable_if
+/// attributes on Cand2), and Cand1 has more than N enable_if attributes.
+///
+/// Note that you can have a pair of candidates such that Cand1's enable_if
+/// attributes are worse than Cand2's, and Cand2's enable_if attributes are
+/// worse than Cand1's.
+static Comparison compareEnableIfAttrs(const Sema &S, const FunctionDecl *Cand1,
+                                       const FunctionDecl *Cand2) {
+  // Common case: One (or both) decls don't have enable_if attrs.
+  bool Cand1Attr = Cand1->hasAttr<EnableIfAttr>();
+  bool Cand2Attr = Cand2->hasAttr<EnableIfAttr>();
+  if (!Cand1Attr || !Cand2Attr) {
+    if (Cand1Attr == Cand2Attr)
+      return Comparison::Equal;
+    return Cand1Attr ? Comparison::Better : Comparison::Worse;
+  }
 
   // FIXME: The next several lines are just
   // specific_attr_iterator<EnableIfAttr> but going in declaration order,
@@ -8493,10 +8522,10 @@ static bool hasBetterEnableIfAttrs(Sema &S, const FunctionDecl *Cand1,
   auto Cand1Attrs = getOrderedEnableIfAttrs(Cand1);
   auto Cand2Attrs = getOrderedEnableIfAttrs(Cand2);
 
-  // Candidate 1 is better if it has strictly more attributes and
-  // the common sequence is identical.
-  if (Cand1Attrs.size() <= Cand2Attrs.size())
-    return false;
+  // It's impossible for Cand1 to be better than (or equal to) Cand2 if Cand1
+  // has fewer enable_if attributes than Cand2.
+  if (Cand1Attrs.size() < Cand2Attrs.size())
+    return Comparison::Worse;
 
   auto Cand1I = Cand1Attrs.begin();
   llvm::FoldingSetNodeID Cand1ID, Cand2ID;
@@ -8508,10 +8537,10 @@ static bool hasBetterEnableIfAttrs(Sema &S, const FunctionDecl *Cand1,
     Cand1A->getCond()->Profile(Cand1ID, S.getASTContext(), true);
     Cand2A->getCond()->Profile(Cand2ID, S.getASTContext(), true);
     if (Cand1ID != Cand2ID)
-      return false;
+      return Comparison::Worse;
   }
 
-  return true;
+  return Cand1I == Cand1Attrs.end() ? Comparison::Equal : Comparison::Better;
 }
 
 /// isBetterOverloadCandidate - Determines whether the first overload
@@ -8621,14 +8650,33 @@ bool clang::isBetterOverloadCandidate(Sema &S, const OverloadCandidate &Cand1,
       return BetterTemplate == Cand1.Function->getPrimaryTemplate();
   }
 
+  // FIXME: Work around a defect in the C++17 inheriting constructor wording.
+  // A derived-class constructor beats an (inherited) base class constructor.
+  bool Cand1IsInherited =
+      dyn_cast_or_null<ConstructorUsingShadowDecl>(Cand1.FoundDecl.getDecl());
+  bool Cand2IsInherited =
+      dyn_cast_or_null<ConstructorUsingShadowDecl>(Cand2.FoundDecl.getDecl());
+  if (Cand1IsInherited != Cand2IsInherited)
+    return Cand2IsInherited;
+  else if (Cand1IsInherited) {
+    assert(Cand2IsInherited);
+    auto *Cand1Class = cast<CXXRecordDecl>(Cand1.Function->getDeclContext());
+    auto *Cand2Class = cast<CXXRecordDecl>(Cand2.Function->getDeclContext());
+    if (Cand1Class->isDerivedFrom(Cand2Class))
+      return true;
+    if (Cand2Class->isDerivedFrom(Cand1Class))
+      return false;
+    // Inherited from sibling base classes: still ambiguous.
+  }
+
   // Check for enable_if value-based overload resolution.
-  if (Cand1.Function && Cand2.Function &&
-      (Cand1.Function->hasAttr<EnableIfAttr>() ||
-       Cand2.Function->hasAttr<EnableIfAttr>()))
-    return hasBetterEnableIfAttrs(S, Cand1.Function, Cand2.Function);
+  if (Cand1.Function && Cand2.Function) {
+    Comparison Cmp = compareEnableIfAttrs(S, Cand1.Function, Cand2.Function);
+    if (Cmp != Comparison::Equal)
+      return Cmp == Comparison::Better;
+  }
 
-  if (S.getLangOpts().CUDA && S.getLangOpts().CUDATargetOverloads &&
-      Cand1.Function && Cand2.Function) {
+  if (S.getLangOpts().CUDA && Cand1.Function && Cand2.Function) {
     FunctionDecl *Caller = dyn_cast<FunctionDecl>(S.CurContext);
     return S.IdentifyCUDAPreference(Caller, Cand1.Function) >
            S.IdentifyCUDAPreference(Caller, Cand2.Function);
@@ -8722,14 +8770,44 @@ OverloadingResult
 OverloadCandidateSet::BestViableFunction(Sema &S, SourceLocation Loc,
                                          iterator &Best,
                                          bool UserDefinedConversion) {
+  llvm::SmallVector<OverloadCandidate *, 16> Candidates;
+  std::transform(begin(), end(), std::back_inserter(Candidates),
+                 [](OverloadCandidate &Cand) { return &Cand; });
+
+  // [CUDA] HD->H or HD->D calls are technically not allowed by CUDA
+  // but accepted by both clang and NVCC. However during a particular
+  // compilation mode only one call variant is viable. We need to
+  // exclude non-viable overload candidates from consideration based
+  // only on their host/device attributes. Specifically, if one
+  // candidate call is WrongSide and the other is SameSide, we ignore
+  // the WrongSide candidate.
+  if (S.getLangOpts().CUDA) {
+    const FunctionDecl *Caller = dyn_cast<FunctionDecl>(S.CurContext);
+    bool ContainsSameSideCandidate =
+        llvm::any_of(Candidates, [&](OverloadCandidate *Cand) {
+          return Cand->Function &&
+                 S.IdentifyCUDAPreference(Caller, Cand->Function) ==
+                     Sema::CFP_SameSide;
+        });
+    if (ContainsSameSideCandidate) {
+      auto IsWrongSideCandidate = [&](OverloadCandidate *Cand) {
+        return Cand->Function &&
+               S.IdentifyCUDAPreference(Caller, Cand->Function) ==
+                   Sema::CFP_WrongSide;
+      };
+      Candidates.erase(std::remove_if(Candidates.begin(), Candidates.end(),
+                                      IsWrongSideCandidate),
+                       Candidates.end());
+    }
+  }
+
   // Find the best viable function.
   Best = end();
-  for (iterator Cand = begin(); Cand != end(); ++Cand) {
+  for (auto *Cand : Candidates)
     if (Cand->Viable)
       if (Best == end() || isBetterOverloadCandidate(S, *Cand, *Best, Loc,
                                                      UserDefinedConversion))
         Best = Cand;
-  }
 
   // If we didn't find any viable functions, abort.
   if (Best == end())
@@ -8739,7 +8817,7 @@ OverloadCandidateSet::BestViableFunction(Sema &S, SourceLocation Loc,
 
   // Make sure that this function is better than every other viable
   // function. If not, we have an ambiguity.
-  for (iterator Cand = begin(); Cand != end(); ++Cand) {
+  for (auto *Cand : Candidates) {
     if (Cand->Viable &&
         Cand != Best &&
         !isBetterOverloadCandidate(S, *Best, *Cand, Loc,
@@ -8782,10 +8860,12 @@ enum OverloadCandidateKind {
   oc_implicit_move_constructor,
   oc_implicit_copy_assignment,
   oc_implicit_move_assignment,
-  oc_implicit_inherited_constructor
+  oc_inherited_constructor,
+  oc_inherited_constructor_template
 };
 
 OverloadCandidateKind ClassifyOverloadCandidate(Sema &S,
+                                                NamedDecl *Found,
                                                 FunctionDecl *Fn,
                                                 std::string &Description) {
   bool isTemplate = false;
@@ -8797,11 +8877,13 @@ OverloadCandidateKind ClassifyOverloadCandidate(Sema &S,
   }
 
   if (CXXConstructorDecl *Ctor = dyn_cast<CXXConstructorDecl>(Fn)) {
-    if (!Ctor->isImplicit())
-      return isTemplate ? oc_constructor_template : oc_constructor;
-
-    if (Ctor->getInheritedConstructor())
-      return oc_implicit_inherited_constructor;
+    if (!Ctor->isImplicit()) {
+      if (isa<ConstructorUsingShadowDecl>(Found))
+        return isTemplate ? oc_inherited_constructor_template
+                          : oc_inherited_constructor;
+      else
+        return isTemplate ? oc_constructor_template : oc_constructor;
+    }
 
     if (Ctor->isDefaultConstructor())
       return oc_implicit_default_constructor;
@@ -8833,14 +8915,13 @@ OverloadCandidateKind ClassifyOverloadCandidate(Sema &S,
   return isTemplate ? oc_function_template : oc_function;
 }
 
-void MaybeEmitInheritedConstructorNote(Sema &S, Decl *Fn) {
-  const CXXConstructorDecl *Ctor = dyn_cast<CXXConstructorDecl>(Fn);
-  if (!Ctor) return;
-
-  Ctor = Ctor->getInheritedConstructor();
-  if (!Ctor) return;
-
-  S.Diag(Ctor->getLocation(), diag::note_ovl_candidate_inherited_constructor);
+void MaybeEmitInheritedConstructorNote(Sema &S, Decl *FoundDecl) {
+  // FIXME: It'd be nice to only emit a note once per using-decl per overload
+  // set.
+  if (auto *Shadow = dyn_cast<ConstructorUsingShadowDecl>(FoundDecl))
+    S.Diag(FoundDecl->getLocation(),
+           diag::note_ovl_candidate_inherited_constructor)
+      << Shadow->getNominatedBaseClass();
 }
 
 } // end anonymous namespace
@@ -8879,8 +8960,8 @@ static bool checkAddressOfFunctionIsAvailable(Sema &S, const FunctionDecl *FD,
     return false;
   }
 
-  auto I = std::find_if(FD->param_begin(), FD->param_end(),
-                        std::mem_fn(&ParmVarDecl::hasAttr<PassObjectSizeAttr>));
+  auto I = llvm::find_if(
+      FD->parameters(), std::mem_fn(&ParmVarDecl::hasAttr<PassObjectSizeAttr>));
   if (I == FD->param_end())
     return true;
 
@@ -8914,19 +8995,19 @@ bool Sema::checkAddressOfFunctionIsAvailable(const FunctionDecl *Function,
 }
 
 // Notes the location of an overload candidate.
-void Sema::NoteOverloadCandidate(FunctionDecl *Fn, QualType DestType,
-                                 bool TakingAddress) {
+void Sema::NoteOverloadCandidate(NamedDecl *Found, FunctionDecl *Fn,
+                                 QualType DestType, bool TakingAddress) {
   if (TakingAddress && !checkAddressOfCandidateIsAvailable(*this, Fn))
     return;
 
   std::string FnDesc;
-  OverloadCandidateKind K = ClassifyOverloadCandidate(*this, Fn, FnDesc);
+  OverloadCandidateKind K = ClassifyOverloadCandidate(*this, Found, Fn, FnDesc);
   PartialDiagnostic PD = PDiag(diag::note_ovl_candidate)
                              << (unsigned) K << FnDesc;
 
   HandleFunctionTypeMismatch(PD, Fn->getType(), DestType);
   Diag(Fn->getLocation(), PD);
-  MaybeEmitInheritedConstructorNote(*this, Fn);
+  MaybeEmitInheritedConstructorNote(*this, Found);
 }
 
 // Notes the location of all overload candidates designated through
@@ -8943,11 +9024,11 @@ void Sema::NoteAllOverloadCandidates(Expr *OverloadedExpr, QualType DestType,
        I != IEnd; ++I) {
     if (FunctionTemplateDecl *FunTmpl = 
                 dyn_cast<FunctionTemplateDecl>((*I)->getUnderlyingDecl()) ) {
-      NoteOverloadCandidate(FunTmpl->getTemplatedDecl(), DestType,
+      NoteOverloadCandidate(*I, FunTmpl->getTemplatedDecl(), DestType,
                             TakingAddress);
     } else if (FunctionDecl *Fun 
                       = dyn_cast<FunctionDecl>((*I)->getUnderlyingDecl()) ) {
-      NoteOverloadCandidate(Fun, DestType, TakingAddress);
+      NoteOverloadCandidate(*I, Fun, DestType, TakingAddress);
     }
   }
 }
@@ -8971,7 +9052,7 @@ void ImplicitConversionSequence::DiagnoseAmbiguousConversion(
     if (CandsShown >= 4 && ShowOverloads == Ovl_Best)
       break;
     ++CandsShown;
-    S.NoteOverloadCandidate(*I);
+    S.NoteOverloadCandidate(I->first, I->second);
   }
   if (I != E)
     S.Diag(SourceLocation(), diag::note_ovl_too_many_candidates) << int(E - I);
@@ -8996,7 +9077,8 @@ static void DiagnoseBadConversion(Sema &S, OverloadCandidate *Cand,
   }
 
   std::string FnDesc;
-  OverloadCandidateKind FnKind = ClassifyOverloadCandidate(S, Fn, FnDesc);
+  OverloadCandidateKind FnKind =
+      ClassifyOverloadCandidate(S, Cand->FoundDecl, Fn, FnDesc);
 
   Expr *FromExpr = Conv.Bad.FromExpr;
   QualType FromTy = Conv.Bad.getFromType();
@@ -9013,7 +9095,7 @@ static void DiagnoseBadConversion(Sema &S, OverloadCandidate *Cand,
       << (unsigned) FnKind << FnDesc
       << (FromExpr ? FromExpr->getSourceRange() : SourceRange())
       << ToTy << Name << I+1;
-    MaybeEmitInheritedConstructorNote(S, Fn);
+    MaybeEmitInheritedConstructorNote(S, Cand->FoundDecl);
     return;
   }
 
@@ -9026,8 +9108,10 @@ static void DiagnoseBadConversion(Sema &S, OverloadCandidate *Cand,
   else {
     // TODO: detect and diagnose the full richness of const mismatches.
     if (CanQual<PointerType> FromPT = CFromTy->getAs<PointerType>())
-      if (CanQual<PointerType> ToPT = CToTy->getAs<PointerType>())
-        CFromTy = FromPT->getPointeeType(), CToTy = ToPT->getPointeeType();
+      if (CanQual<PointerType> ToPT = CToTy->getAs<PointerType>()) {
+        CFromTy = FromPT->getPointeeType();
+        CToTy = ToPT->getPointeeType();
+      }
   }
 
   if (CToTy.getUnqualifiedType() == CFromTy.getUnqualifiedType() &&
@@ -9042,7 +9126,7 @@ static void DiagnoseBadConversion(Sema &S, OverloadCandidate *Cand,
         << FromTy
         << FromQs.getAddressSpace() << ToQs.getAddressSpace()
         << (unsigned) isObjectArgument << I+1;
-      MaybeEmitInheritedConstructorNote(S, Fn);
+      MaybeEmitInheritedConstructorNote(S, Cand->FoundDecl);
       return;
     }
 
@@ -9053,7 +9137,7 @@ static void DiagnoseBadConversion(Sema &S, OverloadCandidate *Cand,
         << FromTy
         << FromQs.getObjCLifetime() << ToQs.getObjCLifetime()
         << (unsigned) isObjectArgument << I+1;
-      MaybeEmitInheritedConstructorNote(S, Fn);
+      MaybeEmitInheritedConstructorNote(S, Cand->FoundDecl);
       return;
     }
 
@@ -9064,7 +9148,16 @@ static void DiagnoseBadConversion(Sema &S, OverloadCandidate *Cand,
       << FromTy
       << FromQs.getObjCGCAttr() << ToQs.getObjCGCAttr()
       << (unsigned) isObjectArgument << I+1;
-      MaybeEmitInheritedConstructorNote(S, Fn);
+      MaybeEmitInheritedConstructorNote(S, Cand->FoundDecl);
+      return;
+    }
+
+    if (FromQs.hasUnaligned() != ToQs.hasUnaligned()) {
+      S.Diag(Fn->getLocation(), diag::note_ovl_candidate_bad_unaligned)
+        << (unsigned) FnKind << FnDesc
+        << (FromExpr ? FromExpr->getSourceRange() : SourceRange())
+        << FromTy << FromQs.hasUnaligned() << I+1;
+      MaybeEmitInheritedConstructorNote(S, Cand->FoundDecl);
       return;
     }
 
@@ -9082,7 +9175,7 @@ static void DiagnoseBadConversion(Sema &S, OverloadCandidate *Cand,
         << (FromExpr ? FromExpr->getSourceRange() : SourceRange())
         << FromTy << (CVR - 1) << I+1;
     }
-    MaybeEmitInheritedConstructorNote(S, Fn);
+    MaybeEmitInheritedConstructorNote(S, Cand->FoundDecl);
     return;
   }
 
@@ -9093,7 +9186,7 @@ static void DiagnoseBadConversion(Sema &S, OverloadCandidate *Cand,
       << (unsigned) FnKind << FnDesc
       << (FromExpr ? FromExpr->getSourceRange() : SourceRange())
       << FromTy << ToTy << (unsigned) isObjectArgument << I+1;
-    MaybeEmitInheritedConstructorNote(S, Fn);
+    MaybeEmitInheritedConstructorNote(S, Cand->FoundDecl);
     return;
   }
 
@@ -9104,11 +9197,14 @@ static void DiagnoseBadConversion(Sema &S, OverloadCandidate *Cand,
   if (const PointerType *PTy = TempFromTy->getAs<PointerType>())
     TempFromTy = PTy->getPointeeType();
   if (TempFromTy->isIncompleteType()) {
+    // Emit the generic diagnostic and, optionally, add the hints to it.
     S.Diag(Fn->getLocation(), diag::note_ovl_candidate_bad_conv_incomplete)
       << (unsigned) FnKind << FnDesc
       << (FromExpr ? FromExpr->getSourceRange() : SourceRange())
-      << FromTy << ToTy << (unsigned) isObjectArgument << I+1;
-    MaybeEmitInheritedConstructorNote(S, Fn);
+      << FromTy << ToTy << (unsigned) isObjectArgument << I+1
+      << (unsigned) (Cand->Fix.Kind);
+      
+    MaybeEmitInheritedConstructorNote(S, Cand->FoundDecl);
     return;
   }
 
@@ -9147,7 +9243,7 @@ static void DiagnoseBadConversion(Sema &S, OverloadCandidate *Cand,
         << (unsigned) FnKind << FnDesc
         << (FromExpr ? FromExpr->getSourceRange() : SourceRange())
         << (unsigned) isObjectArgument << I + 1;
-      MaybeEmitInheritedConstructorNote(S, Fn);
+      MaybeEmitInheritedConstructorNote(S, Cand->FoundDecl);
       return;
     }
   }
@@ -9159,7 +9255,7 @@ static void DiagnoseBadConversion(Sema &S, OverloadCandidate *Cand,
       << (FromExpr ? FromExpr->getSourceRange() : SourceRange())
       << (BaseToDerivedConversion - 1)
       << FromTy << ToTy << I+1;
-    MaybeEmitInheritedConstructorNote(S, Fn);
+    MaybeEmitInheritedConstructorNote(S, Cand->FoundDecl);
     return;
   }
 
@@ -9172,7 +9268,7 @@ static void DiagnoseBadConversion(Sema &S, OverloadCandidate *Cand,
         << (unsigned) FnKind << FnDesc
         << (FromExpr ? FromExpr->getSourceRange() : SourceRange())
         << FromTy << ToTy << (unsigned) isObjectArgument << I+1;
-        MaybeEmitInheritedConstructorNote(S, Fn);
+        MaybeEmitInheritedConstructorNote(S, Cand->FoundDecl);
         return;
       }
   }
@@ -9194,7 +9290,7 @@ static void DiagnoseBadConversion(Sema &S, OverloadCandidate *Cand,
     FDiag << *HI;
   S.Diag(Fn->getLocation(), FDiag);
 
-  MaybeEmitInheritedConstructorNote(S, Fn);
+  MaybeEmitInheritedConstructorNote(S, Cand->FoundDecl);
 }
 
 /// Additional arity mismatch diagnosis specific to a function overload
@@ -9228,7 +9324,8 @@ static bool CheckArityMismatch(Sema &S, OverloadCandidate *Cand,
 }
 
 /// General arity mismatch diagnosis over a candidate in a candidate set.
-static void DiagnoseArityMismatch(Sema &S, Decl *D, unsigned NumFormalArgs) {
+static void DiagnoseArityMismatch(Sema &S, NamedDecl *Found, Decl *D,
+                                  unsigned NumFormalArgs) {
   assert(isa<FunctionDecl>(D) &&
       "The templated declaration should at least be a function"
       " when diagnosing bad template argument deduction due to too many"
@@ -9258,7 +9355,8 @@ static void DiagnoseArityMismatch(Sema &S, Decl *D, unsigned NumFormalArgs) {
   }
 
   std::string Description;
-  OverloadCandidateKind FnKind = ClassifyOverloadCandidate(S, Fn, Description);
+  OverloadCandidateKind FnKind =
+      ClassifyOverloadCandidate(S, Found, Fn, Description);
 
   if (modeCount == 1 && Fn->getParamDecl(0)->getDeclName())
     S.Diag(Fn->getLocation(), diag::note_ovl_candidate_arity_one)
@@ -9268,28 +9366,25 @@ static void DiagnoseArityMismatch(Sema &S, Decl *D, unsigned NumFormalArgs) {
     S.Diag(Fn->getLocation(), diag::note_ovl_candidate_arity)
       << (unsigned) FnKind << (Fn->getDescribedFunctionTemplate() != nullptr)
       << mode << modeCount << NumFormalArgs;
-  MaybeEmitInheritedConstructorNote(S, Fn);
+  MaybeEmitInheritedConstructorNote(S, Found);
 }
 
 /// Arity mismatch diagnosis specific to a function overload candidate.
 static void DiagnoseArityMismatch(Sema &S, OverloadCandidate *Cand,
                                   unsigned NumFormalArgs) {
   if (!CheckArityMismatch(S, Cand, NumFormalArgs))
-    DiagnoseArityMismatch(S, Cand->Function, NumFormalArgs);
+    DiagnoseArityMismatch(S, Cand->FoundDecl, Cand->Function, NumFormalArgs);
 }
 
 static TemplateDecl *getDescribedTemplate(Decl *Templated) {
-  if (FunctionDecl *FD = dyn_cast<FunctionDecl>(Templated))
-    return FD->getDescribedFunctionTemplate();
-  else if (CXXRecordDecl *RD = dyn_cast<CXXRecordDecl>(Templated))
-    return RD->getDescribedClassTemplate();
-
+  if (TemplateDecl *TD = Templated->getDescribedTemplate())
+    return TD;
   llvm_unreachable("Unsupported: Getting the described template declaration"
                    " for bad deduction diagnosis");
 }
 
 /// Diagnose a failed template-argument deduction.
-static void DiagnoseBadDeduction(Sema &S, Decl *Templated,
+static void DiagnoseBadDeduction(Sema &S, NamedDecl *Found, Decl *Templated,
                                  DeductionFailureInfo &DeductionFailure,
                                  unsigned NumArgs,
                                  bool TakingCandidateAddress) {
@@ -9307,7 +9402,7 @@ static void DiagnoseBadDeduction(Sema &S, Decl *Templated,
     S.Diag(Templated->getLocation(),
            diag::note_ovl_candidate_incomplete_deduction)
         << ParamD->getDeclName();
-    MaybeEmitInheritedConstructorNote(S, Templated);
+    MaybeEmitInheritedConstructorNote(S, Found);
     return;
   }
 
@@ -9332,7 +9427,7 @@ static void DiagnoseBadDeduction(Sema &S, Decl *Templated,
 
     S.Diag(Templated->getLocation(), diag::note_ovl_candidate_underqualified)
         << ParamD->getDeclName() << Arg << NonCanonParam;
-    MaybeEmitInheritedConstructorNote(S, Templated);
+    MaybeEmitInheritedConstructorNote(S, Found);
     return;
   }
 
@@ -9351,7 +9446,7 @@ static void DiagnoseBadDeduction(Sema &S, Decl *Templated,
            diag::note_ovl_candidate_inconsistent_deduction)
         << which << ParamD->getDeclName() << *DeductionFailure.getFirstArg()
         << *DeductionFailure.getSecondArg();
-    MaybeEmitInheritedConstructorNote(S, Templated);
+    MaybeEmitInheritedConstructorNote(S, Found);
     return;
   }
 
@@ -9374,18 +9469,18 @@ static void DiagnoseBadDeduction(Sema &S, Decl *Templated,
              diag::note_ovl_candidate_explicit_arg_mismatch_unnamed)
           << (index + 1);
     }
-    MaybeEmitInheritedConstructorNote(S, Templated);
+    MaybeEmitInheritedConstructorNote(S, Found);
     return;
 
   case Sema::TDK_TooManyArguments:
   case Sema::TDK_TooFewArguments:
-    DiagnoseArityMismatch(S, Templated, NumArgs);
+    DiagnoseArityMismatch(S, Found, Templated, NumArgs);
     return;
 
   case Sema::TDK_InstantiationDepth:
     S.Diag(Templated->getLocation(),
            diag::note_ovl_candidate_instantiation_depth);
-    MaybeEmitInheritedConstructorNote(S, Templated);
+    MaybeEmitInheritedConstructorNote(S, Found);
     return;
 
   case Sema::TDK_SubstitutionFailure: {
@@ -9423,7 +9518,7 @@ static void DiagnoseBadDeduction(Sema &S, Decl *Templated,
     S.Diag(Templated->getLocation(),
            diag::note_ovl_candidate_substitution_failure)
         << TemplateArgString << SFINAEArgString << R;
-    MaybeEmitInheritedConstructorNote(S, Templated);
+    MaybeEmitInheritedConstructorNote(S, Found);
     return;
   }
 
@@ -9495,7 +9590,7 @@ static void DiagnoseBadDeduction(Sema &S, Decl *Templated,
   // note_ovl_candidate_bad_deduction, which is uselessly vague.
   case Sema::TDK_MiscellaneousDeductionFailure:
     S.Diag(Templated->getLocation(), diag::note_ovl_candidate_bad_deduction);
-    MaybeEmitInheritedConstructorNote(S, Templated);
+    MaybeEmitInheritedConstructorNote(S, Found);
     return;
   }
 }
@@ -9509,7 +9604,7 @@ static void DiagnoseBadDeduction(Sema &S, OverloadCandidate *Cand,
     if (CheckArityMismatch(S, Cand, NumArgs))
       return;
   }
-  DiagnoseBadDeduction(S, Cand->Function, // pattern
+  DiagnoseBadDeduction(S, Cand->FoundDecl, Cand->Function, // pattern
                        Cand->DeductionFailure, NumArgs, TakingCandidateAddress);
 }
 
@@ -9522,7 +9617,8 @@ static void DiagnoseBadTarget(Sema &S, OverloadCandidate *Cand) {
                            CalleeTarget = S.IdentifyCUDATarget(Callee);
 
   std::string FnDesc;
-  OverloadCandidateKind FnKind = ClassifyOverloadCandidate(S, Callee, FnDesc);
+  OverloadCandidateKind FnKind =
+      ClassifyOverloadCandidate(S, Cand->FoundDecl, Callee, FnDesc);
 
   S.Diag(Callee->getLocation(), diag::note_ovl_candidate_bad_target)
       << (unsigned)FnKind << CalleeTarget << CallerTarget;
@@ -9599,18 +9695,19 @@ static void NoteFunctionCandidate(Sema &S, OverloadCandidate *Cand,
   if (Cand->Viable && (Fn->isDeleted() ||
       S.isFunctionConsideredUnavailable(Fn))) {
     std::string FnDesc;
-    OverloadCandidateKind FnKind = ClassifyOverloadCandidate(S, Fn, FnDesc);
+    OverloadCandidateKind FnKind =
+        ClassifyOverloadCandidate(S, Cand->FoundDecl, Fn, FnDesc);
 
     S.Diag(Fn->getLocation(), diag::note_ovl_candidate_deleted)
       << FnKind << FnDesc
       << (Fn->isDeleted() ? (Fn->isDeletedAsWritten() ? 1 : 2) : 0);
-    MaybeEmitInheritedConstructorNote(S, Fn);
+    MaybeEmitInheritedConstructorNote(S, Cand->FoundDecl);
     return;
   }
 
   // We don't really have anything else to say about viable candidates.
   if (Cand->Viable) {
-    S.NoteOverloadCandidate(Fn);
+    S.NoteOverloadCandidate(Cand->FoundDecl, Fn);
     return;
   }
 
@@ -9620,19 +9717,20 @@ static void NoteFunctionCandidate(Sema &S, OverloadCandidate *Cand,
     return DiagnoseArityMismatch(S, Cand, NumArgs);
 
   case ovl_fail_bad_deduction:
-    return DiagnoseBadDeduction(S, Cand, NumArgs, TakingCandidateAddress);
+    return DiagnoseBadDeduction(S, Cand, NumArgs,
+                                TakingCandidateAddress);
 
   case ovl_fail_illegal_constructor: {
     S.Diag(Fn->getLocation(), diag::note_ovl_candidate_illegal_constructor)
       << (Fn->getPrimaryTemplate() ? 1 : 0);
-    MaybeEmitInheritedConstructorNote(S, Fn);
+    MaybeEmitInheritedConstructorNote(S, Cand->FoundDecl);
     return;
   }
 
   case ovl_fail_trivial_conversion:
   case ovl_fail_bad_final_conversion:
   case ovl_fail_final_conversion_not_exact:
-    return S.NoteOverloadCandidate(Fn);
+    return S.NoteOverloadCandidate(Cand->FoundDecl, Fn);
 
   case ovl_fail_bad_conversion: {
     unsigned I = (Cand->IgnoreObjectArgument ? 1 : 0);
@@ -9643,7 +9741,7 @@ static void NoteFunctionCandidate(Sema &S, OverloadCandidate *Cand,
     // FIXME: this currently happens when we're called from SemaInit
     // when user-conversion overload fails.  Figure out how to handle
     // those conditions and diagnose them well.
-    return S.NoteOverloadCandidate(Fn);
+    return S.NoteOverloadCandidate(Cand->FoundDecl, Fn);
   }
 
   case ovl_fail_bad_target:
@@ -9691,7 +9789,6 @@ static void NoteSurrogateCandidate(Sema &S, OverloadCandidate *Cand) {
 
   S.Diag(Cand->Surrogate->getLocation(), diag::note_ovl_surrogate_cand)
     << FnType;
-  MaybeEmitInheritedConstructorNote(S, Cand->Surrogate);
 }
 
 static void NoteBuiltinOperatorCandidate(Sema &S, StringRef Opc,
@@ -9721,8 +9818,8 @@ static void NoteAmbiguousUserConversions(Sema &S, SourceLocation OpLoc,
     if (ICS.isBad()) break; // all meaningless after first invalid
     if (!ICS.isAmbiguous()) continue;
 
-    ICS.DiagnoseAmbiguousConversion(S, OpLoc,
-                              S.PDiag(diag::note_ambiguous_type_conversion));
+    ICS.DiagnoseAmbiguousConversion(
+        S, OpLoc, S.PDiag(diag::note_ambiguous_type_conversion));
   }
 }
 
@@ -10089,7 +10186,7 @@ struct CompareTemplateSpecCandidatesForDisplay {
 /// deductions.
 void TemplateSpecCandidate::NoteDeductionFailure(Sema &S,
                                                  bool ForTakingAddress) {
-  DiagnoseBadDeduction(S, Specialization, // pattern
+  DiagnoseBadDeduction(S, FoundDecl, Specialization, // pattern
                        DeductionFailure, /*NumArgs=*/0, ForTakingAddress);
 }
 
@@ -10252,21 +10349,32 @@ public:
       }
     }
 
-    if (S.getLangOpts().CUDA && S.getLangOpts().CUDATargetOverloads &&
-        Matches.size() > 1)
+    if (S.getLangOpts().CUDA && Matches.size() > 1)
       EliminateSuboptimalCudaMatches();
   }
 
   bool hasComplained() const { return HasComplained; }
 
 private:
-  // Is A considered a better overload candidate for the desired type than B?
+  bool candidateHasExactlyCorrectType(const FunctionDecl *FD) {
+    QualType Discard;
+    return Context.hasSameUnqualifiedType(TargetFunctionType, FD->getType()) ||
+           S.IsNoReturnConversion(FD->getType(), TargetFunctionType, Discard);
+  }
+
+  /// \return true if A is considered a better overload candidate for the
+  /// desired type than B.
   bool isBetterCandidate(const FunctionDecl *A, const FunctionDecl *B) {
-    return hasBetterEnableIfAttrs(S, A, B);
+    // If A doesn't have exactly the correct type, we don't want to classify it
+    // as "better" than anything else. This way, the user is required to
+    // disambiguate for us if there are multiple candidates and no exact match.
+    return candidateHasExactlyCorrectType(A) &&
+           (!candidateHasExactlyCorrectType(B) ||
+            compareEnableIfAttrs(S, A, B) == Comparison::Better);
   }
 
-  // Returns true if we've eliminated any (read: all but one) candidates, false
-  // otherwise.
+  /// \return true if we were able to eliminate all but one overload candidate,
+  /// false otherwise.
   bool eliminiateSuboptimalOverloadCandidates() {
     // Same algorithm as overload resolution -- one pass to pick the "best",
     // another pass to be sure that nothing is better than the best.
@@ -10331,7 +10439,7 @@ private:
                                       Info, /*InOverloadResolution=*/true)) {
       // Make a note of the failed deduction for diagnostics.
       FailedCandidates.addCandidate()
-          .set(FunctionTemplate->getTemplatedDecl(),
+          .set(CurAccessFunPair, FunctionTemplate->getTemplatedDecl(),
                MakeDeductionFailureInfo(Context, Result, Info));
       return false;
     } 
@@ -10339,7 +10447,6 @@ private:
     // Template argument deduction ensures that we have an exact match or
     // compatible pointer-to-function arguments that would be adjusted by ICS.
     // This function template specicalization works.
-    Specialization = cast<FunctionDecl>(Specialization->getCanonicalDecl());
     assert(S.isSameOrCompatibleFunctionType(
               Context.getCanonicalType(Specialization->getType()),
               Context.getCanonicalType(TargetFunctionType)));
@@ -10380,12 +10487,9 @@ private:
       if (!S.checkAddressOfFunctionIsAvailable(FunDecl))
         return false;
 
-      QualType ResultTy;
-      if (Context.hasSameUnqualifiedType(TargetFunctionType, 
-                                         FunDecl->getType()) ||
-          S.IsNoReturnConversion(FunDecl->getType(), TargetFunctionType,
-                                 ResultTy) ||
-          (!S.getLangOpts().CPlusPlus && TargetType->isVoidPointerType())) {
+      // If we're in C, we need to support types that aren't exactly identical.
+      if (!S.getLangOpts().CPlusPlus ||
+          candidateHasExactlyCorrectType(FunDecl)) {
         Matches.push_back(std::make_pair(
             CurAccessFunPair, cast<FunctionDecl>(FunDecl->getCanonicalDecl())));
         FoundNonTemplateFunction = true;
@@ -10451,9 +10555,10 @@ private:
     UnresolvedSetIterator Result = S.getMostSpecialized(
         MatchesCopy.begin(), MatchesCopy.end(), FailedCandidates,
         SourceExpr->getLocStart(), S.PDiag(),
-        S.PDiag(diag::err_addr_ovl_ambiguous) << Matches[0]
-                                                     .second->getDeclName(),
-        S.PDiag(diag::note_ovl_candidate) << (unsigned)oc_function_template,
+        S.PDiag(diag::err_addr_ovl_ambiguous)
+          << Matches[0].second->getDeclName(),
+        S.PDiag(diag::note_ovl_candidate)
+          << (unsigned)oc_function_template,
         Complain, TargetFunctionType);
 
     if (Result != MatchesCopy.end()) {
@@ -10501,7 +10606,7 @@ public:
         if (FunctionDecl *Fun =
                 dyn_cast<FunctionDecl>((*I)->getUnderlyingDecl()))
           if (!functionHasPassObjectSizeParams(Fun))
-            S.NoteOverloadCandidate(Fun, TargetFunctionType,
+            S.NoteOverloadCandidate(*I, Fun, TargetFunctionType,
                                     /*TakingAddress=*/true);
       FailedCandidates.NoteCandidates(S, OvlExpr->getLocStart());
     }
@@ -10614,6 +10719,72 @@ Sema::ResolveAddressOfOverloadedFunction(Expr *AddressOfExpr,
 }
 
 /// \brief Given an expression that refers to an overloaded function, try to
+/// resolve that function to a single function that can have its address taken.
+/// This will modify `Pair` iff it returns non-null.
+///
+/// This routine can only realistically succeed if all but one candidates in the
+/// overload set for SrcExpr cannot have their addresses taken.
+FunctionDecl *
+Sema::resolveAddressOfOnlyViableOverloadCandidate(Expr *E,
+                                                  DeclAccessPair &Pair) {
+  OverloadExpr::FindResult R = OverloadExpr::find(E);
+  OverloadExpr *Ovl = R.Expression;
+  FunctionDecl *Result = nullptr;
+  DeclAccessPair DAP;
+  // Don't use the AddressOfResolver because we're specifically looking for
+  // cases where we have one overload candidate that lacks
+  // enable_if/pass_object_size/...
+  for (auto I = Ovl->decls_begin(), E = Ovl->decls_end(); I != E; ++I) {
+    auto *FD = dyn_cast<FunctionDecl>(I->getUnderlyingDecl());
+    if (!FD)
+      return nullptr;
+
+    if (!checkAddressOfFunctionIsAvailable(FD))
+      continue;
+
+    // We have more than one result; quit.
+    if (Result)
+      return nullptr;
+    DAP = I.getPair();
+    Result = FD;
+  }
+
+  if (Result)
+    Pair = DAP;
+  return Result;
+}
+
+/// \brief Given an overloaded function, tries to turn it into a non-overloaded
+/// function reference using resolveAddressOfOnlyViableOverloadCandidate. This
+/// will perform access checks, diagnose the use of the resultant decl, and, if
+/// necessary, perform a function-to-pointer decay.
+///
+/// Returns false if resolveAddressOfOnlyViableOverloadCandidate fails.
+/// Otherwise, returns true. This may emit diagnostics and return true.
+bool Sema::resolveAndFixAddressOfOnlyViableOverloadCandidate(
+    ExprResult &SrcExpr) {
+  Expr *E = SrcExpr.get();
+  assert(E->getType() == Context.OverloadTy && "SrcExpr must be an overload");
+
+  DeclAccessPair DAP;
+  FunctionDecl *Found = resolveAddressOfOnlyViableOverloadCandidate(E, DAP);
+  if (!Found)
+    return false;
+
+  // Emitting multiple diagnostics for a function that is both inaccessible and
+  // unavailable is consistent with our behavior elsewhere. So, always check
+  // for both.
+  DiagnoseUseOfDecl(Found, E->getExprLoc());
+  CheckAddressOfMemberAccess(E, DAP);
+  Expr *Fixed = FixOverloadedFunctionReference(E, DAP, Found);
+  if (Fixed->getType()->isFunctionType())
+    SrcExpr = DefaultFunctionArrayConversion(Fixed, /*Diagnose=*/false);
+  else
+    SrcExpr = Fixed;
+  return true;
+}
+
+/// \brief Given an expression that refers to an overloaded function, try to
 /// resolve that overloaded function expression down to a single function.
 ///
 /// This routine can only resolve template-ids that refer to a single function
@@ -10671,7 +10842,7 @@ Sema::ResolveSingleFunctionTemplateSpecialization(OverloadExpr *ovl,
       // Make a note of the failed deduction for diagnostics.
       // TODO: Actually use the failed-deduction info?
       FailedCandidates.addCandidate()
-          .set(FunctionTemplate->getTemplatedDecl(),
+          .set(I.getPair(), FunctionTemplate->getTemplatedDecl(),
                MakeDeductionFailureInfo(Context, Result, Info));
       continue;
     }
@@ -12236,6 +12407,16 @@ Sema::BuildCallToMemberFunction(Scope *S, Expr *MemExprE,
              << MD->getDeclName();
     }
   }
+
+  if (CXXDestructorDecl *DD =
+          dyn_cast<CXXDestructorDecl>(TheCall->getMethodDecl())) {
+    // a->A::f() doesn't go through the vtable, except in AppleKext mode.
+    bool CallCanBeVirtual = !MemExpr->hasQualifier() || getLangOpts().AppleKext;
+    CheckVirtualDtorCall(DD, MemExpr->getLocStart(), /*IsDelete=*/false,
+                         CallCanBeVirtual, /*WarnOnNonAbstractTypes=*/true,
+                         MemExpr->getMemberLoc());
+  }
+
   return MaybeBindToTemporary(TheCall);
 }
 
@@ -12843,6 +13024,9 @@ Expr *Sema::FixOverloadedFunctionReference(Expr *E, DeclAccessPair Found,
           = Context.getTypeDeclType(cast<RecordDecl>(Method->getDeclContext()));
         QualType MemPtrType
           = Context.getMemberPointerType(Fn->getType(), ClassType.getTypePtr());
+        // Under the MS ABI, lock down the inheritance model now.
+        if (Context.getTargetInfo().getCXXABI().isMicrosoft())
+          (void)isCompleteType(UnOp->getOperatorLoc(), MemPtrType);
 
         return new (Context) UnaryOperator(SubExpr, UO_AddrOf, MemPtrType,
                                            VK_RValue, OK_Ordinary,
diff --git a/contrib/llvm/tools/clang/lib/Sema/SemaPseudoObject.cpp b/contrib/llvm/tools/clang/lib/Sema/SemaPseudoObject.cpp
index e5d51f173caa..c93d800f96d1 100644
--- a/contrib/llvm/tools/clang/lib/Sema/SemaPseudoObject.cpp
+++ b/contrib/llvm/tools/clang/lib/Sema/SemaPseudoObject.cpp
@@ -578,7 +578,7 @@ bool ObjCPropertyOpBuilder::isWeakProperty() const {
   if (RefExpr->isExplicitProperty()) {
     const ObjCPropertyDecl *Prop = RefExpr->getExplicitProperty();
     if (Prop->getPropertyAttributes() & ObjCPropertyDecl::OBJC_PR_weak)
-      return !Prop->hasAttr<IBOutletAttr>();
+      return true;
 
     T = Prop->getType();
   } else if (Getter) {
@@ -658,7 +658,8 @@ bool ObjCPropertyOpBuilder::findSetter(bool warn) {
         SmallString<100> PropertyName = thisPropertyName;
         PropertyName[0] = front;
         IdentifierInfo *AltMember = &S.PP.getIdentifierTable().get(PropertyName);
-        if (ObjCPropertyDecl *prop1 = IFace->FindPropertyDeclaration(AltMember))
+        if (ObjCPropertyDecl *prop1 = IFace->FindPropertyDeclaration(
+                AltMember, prop->getQueryKind()))
           if (prop != prop1 && (prop1->getSetterMethodDecl() == setter)) {
             S.Diag(RefExpr->getExprLoc(), diag::error_property_setter_ambiguous_use)
               << prop << prop1 << setter->getSelector();
diff --git a/contrib/llvm/tools/clang/lib/Sema/SemaStmt.cpp b/contrib/llvm/tools/clang/lib/Sema/SemaStmt.cpp
index e1b1a47e182b..8e8104e581b2 100644
--- a/contrib/llvm/tools/clang/lib/Sema/SemaStmt.cpp
+++ b/contrib/llvm/tools/clang/lib/Sema/SemaStmt.cpp
@@ -37,6 +37,7 @@
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/SmallVector.h"
+
 using namespace clang;
 using namespace sema;
 
@@ -249,10 +250,10 @@ void Sema::DiagnoseUnusedExprResult(const Stmt *S) {
     // is written in a macro body, only warn if it has the warn_unused_result
     // attribute.
     if (const Decl *FD = CE->getCalleeDecl()) {
-      const FunctionDecl *Func = dyn_cast<FunctionDecl>(FD);
-      if (Func ? Func->hasUnusedResultAttr()
-               : FD->hasAttr<WarnUnusedResultAttr>()) {
-        Diag(Loc, diag::warn_unused_result) << R1 << R2;
+      if (const Attr *A = isa<FunctionDecl>(FD)
+                              ? cast<FunctionDecl>(FD)->getUnusedResultAttr()
+                              : FD->getAttr<WarnUnusedResultAttr>()) {
+        Diag(Loc, diag::warn_unused_result) << A << R1 << R2;
         return;
       }
       if (ShouldSuppress)
@@ -276,8 +277,8 @@ void Sema::DiagnoseUnusedExprResult(const Stmt *S) {
     }
     const ObjCMethodDecl *MD = ME->getMethodDecl();
     if (MD) {
-      if (MD->hasAttr<WarnUnusedResultAttr>()) {
-        Diag(Loc, diag::warn_unused_result) << R1 << R2;
+      if (const auto *A = MD->getAttr<WarnUnusedResultAttr>()) {
+        Diag(Loc, diag::warn_unused_result) << A << R1 << R2;
         return;
       }
     }
@@ -488,36 +489,62 @@ StmtResult Sema::ActOnAttributedStmt(SourceLocation AttrLoc,
   return LS;
 }
 
+namespace {
+class CommaVisitor : public EvaluatedExprVisitor<CommaVisitor> {
+  typedef EvaluatedExprVisitor<CommaVisitor> Inherited;
+  Sema &SemaRef;
+public:
+  CommaVisitor(Sema &SemaRef) : Inherited(SemaRef.Context), SemaRef(SemaRef) {}
+  void VisitBinaryOperator(BinaryOperator *E) {
+    if (E->getOpcode() == BO_Comma)
+      SemaRef.DiagnoseCommaOperator(E->getLHS(), E->getExprLoc());
+    EvaluatedExprVisitor<CommaVisitor>::VisitBinaryOperator(E);
+  }
+};
+}
+
 StmtResult
-Sema::ActOnIfStmt(SourceLocation IfLoc, FullExprArg CondVal, Decl *CondVar,
+Sema::ActOnIfStmt(SourceLocation IfLoc, bool IsConstexpr, Stmt *InitStmt,
+                  ConditionResult Cond,
                   Stmt *thenStmt, SourceLocation ElseLoc,
                   Stmt *elseStmt) {
-  ExprResult CondResult(CondVal.release());
-
-  VarDecl *ConditionVar = nullptr;
-  if (CondVar) {
-    ConditionVar = cast<VarDecl>(CondVar);
-    CondResult = CheckConditionVariable(ConditionVar, IfLoc, true);
-    CondResult = ActOnFinishFullExpr(CondResult.get(), IfLoc);
-  }
-  Expr *ConditionExpr = CondResult.getAs<Expr>();
-  if (ConditionExpr) {
-    DiagnoseUnusedExprResult(thenStmt);
+  if (Cond.isInvalid())
+    Cond = ConditionResult(
+        *this, nullptr,
+        MakeFullExpr(new (Context) OpaqueValueExpr(SourceLocation(),
+                                                   Context.BoolTy, VK_RValue),
+                     IfLoc),
+        false);
+
+  Expr *CondExpr = Cond.get().second;
+  if (!Diags.isIgnored(diag::warn_comma_operator,
+                       CondExpr->getExprLoc()))
+    CommaVisitor(*this).Visit(CondExpr);
+
+  if (!elseStmt)
+    DiagnoseEmptyStmtBody(CondExpr->getLocEnd(), thenStmt,
+                          diag::warn_empty_if_body);
+
+  return BuildIfStmt(IfLoc, IsConstexpr, InitStmt, Cond, thenStmt, ElseLoc,
+                     elseStmt);
+}
+
+StmtResult Sema::BuildIfStmt(SourceLocation IfLoc, bool IsConstexpr,
+                             Stmt *InitStmt, ConditionResult Cond,
+                             Stmt *thenStmt, SourceLocation ElseLoc,
+                             Stmt *elseStmt) {
+  if (Cond.isInvalid())
+    return StmtError();
 
-    if (!elseStmt) {
-      DiagnoseEmptyStmtBody(ConditionExpr->getLocEnd(), thenStmt,
-                            diag::warn_empty_if_body);
-    }
+  if (IsConstexpr)
+    getCurFunction()->setHasBranchProtectedScope();
 
-    DiagnoseUnusedExprResult(elseStmt);
-  } else {
-    // Create a dummy Expr for the condition for error recovery
-    ConditionExpr = new (Context) OpaqueValueExpr(SourceLocation(),
-                                                  Context.BoolTy, VK_RValue);
-  }
+  DiagnoseUnusedExprResult(thenStmt);
+  DiagnoseUnusedExprResult(elseStmt);
 
-  return new (Context) IfStmt(Context, IfLoc, ConditionVar, ConditionExpr,
-                              thenStmt, ElseLoc, elseStmt);
+  return new (Context)
+      IfStmt(Context, IfLoc, IsConstexpr, InitStmt, Cond.get().first,
+             Cond.get().second, thenStmt, ElseLoc, elseStmt);
 }
 
 namespace {
@@ -579,24 +606,7 @@ static QualType GetTypeBeforeIntegralPromotion(Expr *&expr) {
   return expr->getType();
 }
 
-StmtResult
-Sema::ActOnStartOfSwitchStmt(SourceLocation SwitchLoc, Expr *Cond,
-                             Decl *CondVar) {
-  ExprResult CondResult;
-
-  VarDecl *ConditionVar = nullptr;
-  if (CondVar) {
-    ConditionVar = cast<VarDecl>(CondVar);
-    CondResult = CheckConditionVariable(ConditionVar, SourceLocation(), false);
-    if (CondResult.isInvalid())
-      return StmtError();
-
-    Cond = CondResult.get();
-  }
-
-  if (!Cond)
-    return StmtError();
-
+ExprResult Sema::CheckSwitchCondition(SourceLocation SwitchLoc, Expr *Cond) {
   class SwitchConvertDiagnoser : public ICEConvertDiagnoser {
     Expr *Cond;
 
@@ -644,24 +654,24 @@ Sema::ActOnStartOfSwitchStmt(SourceLocation SwitchLoc, Expr *Cond,
     }
   } SwitchDiagnoser(Cond);
 
-  CondResult =
+  ExprResult CondResult =
       PerformContextualImplicitConversion(SwitchLoc, Cond, SwitchDiagnoser);
-  if (CondResult.isInvalid()) return StmtError();
-  Cond = CondResult.get();
+  if (CondResult.isInvalid())
+    return ExprError();
 
   // C99 6.8.4.2p5 - Integer promotions are performed on the controlling expr.
-  CondResult = UsualUnaryConversions(Cond);
-  if (CondResult.isInvalid()) return StmtError();
-  Cond = CondResult.get();
+  return UsualUnaryConversions(CondResult.get());
+}
 
-  CondResult = ActOnFinishFullExpr(Cond, SwitchLoc);
-  if (CondResult.isInvalid())
+StmtResult Sema::ActOnStartOfSwitchStmt(SourceLocation SwitchLoc,
+                                        Stmt *InitStmt, ConditionResult Cond) {
+  if (Cond.isInvalid())
     return StmtError();
-  Cond = CondResult.get();
 
   getCurFunction()->setHasBranchIntoScope();
 
-  SwitchStmt *SS = new (Context) SwitchStmt(Context, ConditionVar, Cond);
+  SwitchStmt *SS = new (Context)
+      SwitchStmt(Context, InitStmt, Cond.get().first, Cond.get().second);
   getCurFunction()->SwitchStack.push_back(SS);
   return SS;
 }
@@ -980,7 +990,8 @@ Sema::ActOnFinishSwitchStmt(SourceLocation SwitchLoc, Stmt *Switch,
             << SourceRange(CR->getLHS()->getLocStart(),
                            Hi->getLocEnd());
           CaseRanges.erase(CaseRanges.begin()+i);
-          --i, --e;
+          --i;
+          --e;
           continue;
         }
 
@@ -1221,23 +1232,17 @@ Sema::DiagnoseAssignmentEnum(QualType DstType, QualType SrcType,
     }
 }
 
-StmtResult
-Sema::ActOnWhileStmt(SourceLocation WhileLoc, FullExprArg Cond,
-                     Decl *CondVar, Stmt *Body) {
-  ExprResult CondResult(Cond.release());
-
-  VarDecl *ConditionVar = nullptr;
-  if (CondVar) {
-    ConditionVar = cast<VarDecl>(CondVar);
-    CondResult = CheckConditionVariable(ConditionVar, WhileLoc, true);
-    CondResult = ActOnFinishFullExpr(CondResult.get(), WhileLoc);
-    if (CondResult.isInvalid())
-      return StmtError();
-  }
-  Expr *ConditionExpr = CondResult.get();
-  if (!ConditionExpr)
+StmtResult Sema::ActOnWhileStmt(SourceLocation WhileLoc, ConditionResult Cond,
+                                Stmt *Body) {
+  if (Cond.isInvalid())
     return StmtError();
-  CheckBreakContinueBinding(ConditionExpr);
+
+  auto CondVal = Cond.get();
+  CheckBreakContinueBinding(CondVal.second);
+
+  if (CondVal.second &&
+      !Diags.isIgnored(diag::warn_comma_operator, CondVal.second->getExprLoc()))
+    CommaVisitor(*this).Visit(CondVal.second);
 
   DiagnoseUnusedExprResult(Body);
 
@@ -1245,7 +1250,7 @@ Sema::ActOnWhileStmt(SourceLocation WhileLoc, FullExprArg Cond,
     getCurCompoundScope().setHasEmptyLoopBodies();
 
   return new (Context)
-      WhileStmt(Context, ConditionVar, ConditionExpr, Body, WhileLoc);
+      WhileStmt(Context, CondVal.first, CondVal.second, Body, WhileLoc);
 }
 
 StmtResult
@@ -1255,7 +1260,7 @@ Sema::ActOnDoStmt(SourceLocation DoLoc, Stmt *Body,
   assert(Cond && "ActOnDoStmt(): missing expression");
 
   CheckBreakContinueBinding(Cond);
-  ExprResult CondResult = CheckBooleanCondition(Cond, DoLoc);
+  ExprResult CondResult = CheckBooleanCondition(DoLoc, Cond);
   if (CondResult.isInvalid())
     return StmtError();
   Cond = CondResult.get();
@@ -1416,6 +1421,18 @@ namespace {
           FoundDecl = true;
     }
 
+    void VisitPseudoObjectExpr(PseudoObjectExpr *POE) {
+      // Only need to visit the semantics for POE.
+      // SyntaticForm doesn't really use the Decal.
+      for (auto *S : POE->semantics()) {
+        if (auto *OVE = dyn_cast<OpaqueValueExpr>(S))
+          // Look past the OVE into the expression it binds.
+          Visit(OVE->getSourceExpr());
+        else
+          Visit(S);
+      }
+    }
+
     bool FoundDeclInUse() { return FoundDecl; }
 
   };  // end class DeclMatcher
@@ -1481,6 +1498,10 @@ namespace {
   // variables Increment and DRE.
   bool ProcessIterationStmt(Sema &S, Stmt* Statement, bool &Increment,
                             DeclRefExpr *&DRE) {
+    if (auto Cleanups = dyn_cast<ExprWithCleanups>(Statement))
+      if (!Cleanups->cleanupsHaveSideEffects())
+        Statement = Cleanups->getSubExpr();
+
     if (UnaryOperator *UO = dyn_cast<UnaryOperator>(Statement)) {
       switch (UO->getOpcode()) {
         default: return false;
@@ -1603,11 +1624,13 @@ void Sema::CheckBreakContinueBinding(Expr *E) {
   }
 }
 
-StmtResult
-Sema::ActOnForStmt(SourceLocation ForLoc, SourceLocation LParenLoc,
-                   Stmt *First, FullExprArg second, Decl *secondVar,
-                   FullExprArg third,
-                   SourceLocation RParenLoc, Stmt *Body) {
+StmtResult Sema::ActOnForStmt(SourceLocation ForLoc, SourceLocation LParenLoc,
+                              Stmt *First, ConditionResult Second,
+                              FullExprArg third, SourceLocation RParenLoc,
+                              Stmt *Body) {
+  if (Second.isInvalid())
+    return StmtError();
+
   if (!getLangOpts().CPlusPlus) {
     if (DeclStmt *DS = dyn_cast_or_null<DeclStmt>(First)) {
       // C99 6.8.5p3: The declaration part of a 'for' statement shall only
@@ -1625,21 +1648,18 @@ Sema::ActOnForStmt(SourceLocation ForLoc, SourceLocation LParenLoc,
     }
   }
 
-  CheckBreakContinueBinding(second.get());
+  CheckBreakContinueBinding(Second.get().second);
   CheckBreakContinueBinding(third.get());
 
-  CheckForLoopConditionalStatement(*this, second.get(), third.get(), Body);
+  if (!Second.get().first)
+    CheckForLoopConditionalStatement(*this, Second.get().second, third.get(),
+                                     Body);
   CheckForRedundantIteration(*this, third.get(), Body);
 
-  ExprResult SecondResult(second.release());
-  VarDecl *ConditionVar = nullptr;
-  if (secondVar) {
-    ConditionVar = cast<VarDecl>(secondVar);
-    SecondResult = CheckConditionVariable(ConditionVar, ForLoc, true);
-    SecondResult = ActOnFinishFullExpr(SecondResult.get(), ForLoc);
-    if (SecondResult.isInvalid())
-      return StmtError();
-  }
+  if (Second.get().second &&
+      !Diags.isIgnored(diag::warn_comma_operator,
+                       Second.get().second->getExprLoc()))
+    CommaVisitor(*this).Visit(Second.get().second);
 
   Expr *Third  = third.release().getAs<Expr>();
 
@@ -1650,8 +1670,9 @@ Sema::ActOnForStmt(SourceLocation ForLoc, SourceLocation LParenLoc,
   if (isa<NullStmt>(Body))
     getCurCompoundScope().setHasEmptyLoopBodies();
 
-  return new (Context) ForStmt(Context, First, SecondResult.get(), ConditionVar,
-                               Third, Body, ForLoc, LParenLoc, RParenLoc);
+  return new (Context)
+      ForStmt(Context, First, Second.get().second, Second.get().first, Third,
+              Body, ForLoc, LParenLoc, RParenLoc);
 }
 
 /// In an Objective C collection iteration statement:
@@ -1992,8 +2013,9 @@ StmtResult Sema::ActOnCXXForRangeStmt(Scope *S, SourceLocation ForLoc,
   }
 
   return BuildCXXForRangeStmt(ForLoc, CoawaitLoc, ColonLoc, RangeDecl.get(),
-                              /*BeginEndDecl=*/nullptr, /*Cond=*/nullptr,
-                              /*Inc=*/nullptr, DS, RParenLoc, Kind);
+                              /*BeginStmt=*/nullptr, /*EndStmt=*/nullptr,
+                              /*Cond=*/nullptr, /*Inc=*/nullptr,
+                              DS, RParenLoc, Kind);
 }
 
 /// \brief Create the initialization, compare, and increment steps for
@@ -2143,8 +2165,8 @@ struct InvalidateOnErrorScope {
 /// BuildCXXForRangeStmt - Build or instantiate a C++11 for-range statement.
 StmtResult
 Sema::BuildCXXForRangeStmt(SourceLocation ForLoc, SourceLocation CoawaitLoc,
-                           SourceLocation ColonLoc,
-                           Stmt *RangeDecl, Stmt *BeginEnd, Expr *Cond,
+                           SourceLocation ColonLoc, Stmt *RangeDecl,
+                           Stmt *Begin, Stmt *End, Expr *Cond,
                            Expr *Inc, Stmt *LoopVarDecl,
                            SourceLocation RParenLoc, BuildForRangeKind Kind) {
   // FIXME: This should not be used during template instantiation. We should
@@ -2170,7 +2192,8 @@ Sema::BuildCXXForRangeStmt(SourceLocation ForLoc, SourceLocation CoawaitLoc,
   InvalidateOnErrorScope Invalidate(*this, LoopVar,
                                     LoopVar->getType()->isUndeducedType());
 
-  StmtResult BeginEndDecl = BeginEnd;
+  StmtResult BeginDeclStmt = Begin;
+  StmtResult EndDeclStmt = End;
   ExprResult NotEqExpr = Cond, IncrExpr = Inc;
 
   if (RangeVarType->isDependentType()) {
@@ -2181,7 +2204,7 @@ Sema::BuildCXXForRangeStmt(SourceLocation ForLoc, SourceLocation CoawaitLoc,
     // them in properly when we instantiate the loop.
     if (!LoopVar->isInvalidDecl() && Kind != BFRK_Check)
       LoopVar->setType(SubstAutoType(LoopVar->getType(), Context.DependentTy));
-  } else if (!BeginEndDecl.get()) {
+  } else if (!BeginDeclStmt.get()) {
     SourceLocation RangeLoc = RangeVar->getLocation();
 
     const QualType RangeVarNonRefType = RangeVarType.getNonReferenceType();
@@ -2306,20 +2329,21 @@ Sema::BuildCXXForRangeStmt(SourceLocation ForLoc, SourceLocation CoawaitLoc,
            "invalid range expression in for loop");
 
     // C++11 [dcl.spec.auto]p7: BeginType and EndType must be the same.
+    // C++1z removes this restriction.
     QualType BeginType = BeginVar->getType(), EndType = EndVar->getType();
     if (!Context.hasSameType(BeginType, EndType)) {
-      Diag(RangeLoc, diag::err_for_range_begin_end_types_differ)
-        << BeginType << EndType;
+      Diag(RangeLoc, getLangOpts().CPlusPlus1z
+                         ? diag::warn_for_range_begin_end_types_differ
+                         : diag::ext_for_range_begin_end_types_differ)
+          << BeginType << EndType;
       NoteForRangeBeginEndFunction(*this, BeginExpr.get(), BEF_begin);
       NoteForRangeBeginEndFunction(*this, EndExpr.get(), BEF_end);
     }
 
-    Decl *BeginEndDecls[] = { BeginVar, EndVar };
-    // Claim the type doesn't contain auto: we've already done the checking.
-    DeclGroupPtrTy BeginEndGroup =
-        BuildDeclaratorGroup(MutableArrayRef<Decl *>(BeginEndDecls, 2),
-                             /*TypeMayContainAuto=*/ false);
-    BeginEndDecl = ActOnDeclStmt(BeginEndGroup, ColonLoc, ColonLoc);
+    BeginDeclStmt =
+        ActOnDeclStmt(ConvertDeclToDeclGroup(BeginVar), ColonLoc, ColonLoc);
+    EndDeclStmt =
+        ActOnDeclStmt(ConvertDeclToDeclGroup(EndVar), ColonLoc, ColonLoc);
 
     const QualType BeginRefNonRefType = BeginType.getNonReferenceType();
     ExprResult BeginRef = BuildDeclRefExpr(BeginVar, BeginRefNonRefType,
@@ -2335,8 +2359,10 @@ Sema::BuildCXXForRangeStmt(SourceLocation ForLoc, SourceLocation CoawaitLoc,
     // Build and check __begin != __end expression.
     NotEqExpr = ActOnBinOp(S, ColonLoc, tok::exclaimequal,
                            BeginRef.get(), EndRef.get());
-    NotEqExpr = ActOnBooleanCondition(S, ColonLoc, NotEqExpr.get());
-    NotEqExpr = ActOnFinishFullExpr(NotEqExpr.get());
+    if (!NotEqExpr.isInvalid())
+      NotEqExpr = CheckBooleanCondition(ColonLoc, NotEqExpr.get());
+    if (!NotEqExpr.isInvalid())
+      NotEqExpr = ActOnFinishFullExpr(NotEqExpr.get());
     if (NotEqExpr.isInvalid()) {
       Diag(RangeLoc, diag::note_for_range_invalid_iterator)
         << RangeLoc << 0 << BeginRangeRef.get()->getType();
@@ -2394,7 +2420,8 @@ Sema::BuildCXXForRangeStmt(SourceLocation ForLoc, SourceLocation CoawaitLoc,
     return StmtResult();
 
   return new (Context) CXXForRangeStmt(
-      RangeDS, cast_or_null<DeclStmt>(BeginEndDecl.get()), NotEqExpr.get(),
+      RangeDS, cast_or_null<DeclStmt>(BeginDeclStmt.get()),
+      cast_or_null<DeclStmt>(EndDeclStmt.get()), NotEqExpr.get(),
       IncrExpr.get(), LoopVarDS, /*Body=*/nullptr, ForLoc, CoawaitLoc,
       ColonLoc, RParenLoc);
 }
@@ -2426,6 +2453,10 @@ static void DiagnoseForRangeReferenceVariableCopies(Sema &SemaRef,
 
   QualType VariableType = VD->getType();
 
+  if (auto Cleanups = dyn_cast<ExprWithCleanups>(InitExpr))
+    if (!Cleanups->cleanupsHaveSideEffects())
+      InitExpr = Cleanups->getSubExpr();
+
   const MaterializeTemporaryExpr *MTE =
       dyn_cast<MaterializeTemporaryExpr>(InitExpr);
 
@@ -2663,16 +2694,16 @@ Sema::ActOnBreakStmt(SourceLocation BreakLoc, Scope *CurScope) {
 /// \param E The expression being returned from the function or block, or
 /// being thrown.
 ///
-/// \param AllowFunctionParameter Whether we allow function parameters to
-/// be considered NRVO candidates. C++ prohibits this for NRVO itself, but
-/// we re-use this logic to determine whether we should try to move as part of
-/// a return or throw (which does allow function parameters).
+/// \param AllowParamOrMoveConstructible Whether we allow function parameters or
+/// id-expressions that could be moved out of the function to be considered NRVO
+/// candidates. C++ prohibits these for NRVO itself, but we re-use this logic to
+/// determine whether we should try to move as part of a return or throw (which
+/// does allow function parameters).
 ///
 /// \returns The NRVO candidate variable, if the return statement may use the
 /// NRVO, or NULL if there is no such candidate.
-VarDecl *Sema::getCopyElisionCandidate(QualType ReturnType,
-                                       Expr *E,
-                                       bool AllowFunctionParameter) {
+VarDecl *Sema::getCopyElisionCandidate(QualType ReturnType, Expr *E,
+                                       bool AllowParamOrMoveConstructible) {
   if (!getLangOpts().CPlusPlus)
     return nullptr;
 
@@ -2685,13 +2716,13 @@ VarDecl *Sema::getCopyElisionCandidate(QualType ReturnType,
   if (!VD)
     return nullptr;
 
-  if (isCopyElisionCandidate(ReturnType, VD, AllowFunctionParameter))
+  if (isCopyElisionCandidate(ReturnType, VD, AllowParamOrMoveConstructible))
     return VD;
   return nullptr;
 }
 
 bool Sema::isCopyElisionCandidate(QualType ReturnType, const VarDecl *VD,
-                                  bool AllowFunctionParameter) {
+                                  bool AllowParamOrMoveConstructible) {
   QualType VDType = VD->getType();
   // - in a return statement in a function with ...
   // ... a class return type ...
@@ -2699,20 +2730,24 @@ bool Sema::isCopyElisionCandidate(QualType ReturnType, const VarDecl *VD,
     if (!ReturnType->isRecordType())
       return false;
     // ... the same cv-unqualified type as the function return type ...
-    if (!VDType->isDependentType() &&
+    // When considering moving this expression out, allow dissimilar types.
+    if (!AllowParamOrMoveConstructible && !VDType->isDependentType() &&
         !Context.hasSameUnqualifiedType(ReturnType, VDType))
       return false;
   }
 
   // ...object (other than a function or catch-clause parameter)...
   if (VD->getKind() != Decl::Var &&
-      !(AllowFunctionParameter && VD->getKind() == Decl::ParmVar))
+      !(AllowParamOrMoveConstructible && VD->getKind() == Decl::ParmVar))
     return false;
   if (VD->isExceptionVariable()) return false;
 
   // ...automatic...
   if (!VD->hasLocalStorage()) return false;
 
+  if (AllowParamOrMoveConstructible)
+    return true;
+
   // ...non-volatile...
   if (VD->getType().isVolatileQualified()) return false;
 
@@ -2731,7 +2766,7 @@ bool Sema::isCopyElisionCandidate(QualType ReturnType, const VarDecl *VD,
 /// \brief Perform the initialization of a potentially-movable value, which
 /// is the result of return value.
 ///
-/// This routine implements C++0x [class.copy]p33, which attempts to treat
+/// This routine implements C++14 [class.copy]p32, which attempts to treat
 /// returned lvalues as rvalues in certain cases (to prefer move construction),
 /// then falls back to treating them as lvalues if that failed.
 ExprResult
@@ -2740,52 +2775,59 @@ Sema::PerformMoveOrCopyInitialization(const InitializedEntity &Entity,
                                       QualType ResultType,
                                       Expr *Value,
                                       bool AllowNRVO) {
-  // C++0x [class.copy]p33:
-  //   When the criteria for elision of a copy operation are met or would
-  //   be met save for the fact that the source object is a function
-  //   parameter, and the object to be copied is designated by an lvalue,
-  //   overload resolution to select the constructor for the copy is first
-  //   performed as if the object were designated by an rvalue.
+  // C++14 [class.copy]p32:
+  // When the criteria for elision of a copy/move operation are met, but not for
+  // an exception-declaration, and the object to be copied is designated by an
+  // lvalue, or when the expression in a return statement is a (possibly
+  // parenthesized) id-expression that names an object with automatic storage
+  // duration declared in the body or parameter-declaration-clause of the
+  // innermost enclosing function or lambda-expression, overload resolution to
+  // select the constructor for the copy is first performed as if the object
+  // were designated by an rvalue.
   ExprResult Res = ExprError();
-  if (AllowNRVO &&
-      (NRVOCandidate || getCopyElisionCandidate(ResultType, Value, true))) {
-    ImplicitCastExpr AsRvalue(ImplicitCastExpr::OnStack,
-                              Value->getType(), CK_NoOp, Value, VK_XValue);
+
+  if (AllowNRVO && !NRVOCandidate)
+    NRVOCandidate = getCopyElisionCandidate(ResultType, Value, true);
+
+  if (AllowNRVO && NRVOCandidate) {
+    ImplicitCastExpr AsRvalue(ImplicitCastExpr::OnStack, Value->getType(),
+                              CK_NoOp, Value, VK_XValue);
 
     Expr *InitExpr = &AsRvalue;
-    InitializationKind Kind
-      = InitializationKind::CreateCopy(Value->getLocStart(),
-                                       Value->getLocStart());
-    InitializationSequence Seq(*this, Entity, Kind, InitExpr);
 
-    //   [...] If overload resolution fails, or if the type of the first
-    //   parameter of the selected constructor is not an rvalue reference
-    //   to the object's type (possibly cv-qualified), overload resolution
-    //   is performed again, considering the object as an lvalue.
+    InitializationKind Kind = InitializationKind::CreateCopy(
+        Value->getLocStart(), Value->getLocStart());
+
+    InitializationSequence Seq(*this, Entity, Kind, InitExpr);
     if (Seq) {
-      for (InitializationSequence::step_iterator Step = Seq.step_begin(),
-           StepEnd = Seq.step_end();
-           Step != StepEnd; ++Step) {
-        if (Step->Kind != InitializationSequence::SK_ConstructorInitialization)
+      for (const InitializationSequence::Step &Step : Seq.steps()) {
+        if (!(Step.Kind ==
+                  InitializationSequence::SK_ConstructorInitialization ||
+              (Step.Kind == InitializationSequence::SK_UserConversion &&
+               isa<CXXConstructorDecl>(Step.Function.Function))))
           continue;
 
-        CXXConstructorDecl *Constructor
-        = cast<CXXConstructorDecl>(Step->Function.Function);
+        CXXConstructorDecl *Constructor =
+            cast<CXXConstructorDecl>(Step.Function.Function);
 
         const RValueReferenceType *RRefType
           = Constructor->getParamDecl(0)->getType()
                                                  ->getAs<RValueReferenceType>();
 
-        // If we don't meet the criteria, break out now.
+        // [...] If the first overload resolution fails or was not performed, or
+        // if the type of the first parameter of the selected constructor is not
+        // an rvalue reference to the object’s type (possibly cv-qualified),
+        // overload resolution is performed again, considering the object as an
+        // lvalue.
         if (!RRefType ||
             !Context.hasSameUnqualifiedType(RRefType->getPointeeType(),
-                            Context.getTypeDeclType(Constructor->getParent())))
+                                            NRVOCandidate->getType()))
           break;
 
         // Promote "AsRvalue" to the heap, since we now need this
         // expression node to persist.
-        Value = ImplicitCastExpr::Create(Context, Value->getType(),
-                                         CK_NoOp, Value, nullptr, VK_XValue);
+        Value = ImplicitCastExpr::Create(Context, Value->getType(), CK_NoOp,
+                                         Value, nullptr, VK_XValue);
 
         // Complete type-checking the initialization of the return type
         // using the constructor we found.
@@ -2821,8 +2863,21 @@ Sema::ActOnCapScopeReturnStmt(SourceLocation ReturnLoc, Expr *RetValExp) {
   CapturingScopeInfo *CurCap = cast<CapturingScopeInfo>(getCurFunction());
   QualType FnRetType = CurCap->ReturnType;
   LambdaScopeInfo *CurLambda = dyn_cast<LambdaScopeInfo>(CurCap);
+  bool HasDeducedReturnType =
+      CurLambda && hasDeducedReturnType(CurLambda->CallOperator);
+
+  if (ExprEvalContexts.back().Context == DiscardedStatement &&
+      (HasDeducedReturnType || CurCap->HasImplicitReturnType)) {
+    if (RetValExp) {
+      ExprResult ER = ActOnFinishFullExpr(RetValExp, ReturnLoc);
+      if (ER.isInvalid())
+        return StmtError();
+      RetValExp = ER.get();
+    }
+    return new (Context) ReturnStmt(ReturnLoc, RetValExp, nullptr);
+  }
 
-  if (CurLambda && hasDeducedReturnType(CurLambda->CallOperator)) {
+  if (HasDeducedReturnType) {
     // In C++1y, the return type may involve 'auto'.
     // FIXME: Blocks might have a return type of 'auto' explicitly specified.
     FunctionDecl *FD = CurLambda->CallOperator;
@@ -3066,22 +3121,28 @@ bool Sema::DeduceFunctionTypeFromReturnExpr(FunctionDecl *FD,
   //  has multiple return statements, the return type is deduced for each return
   //  statement. [...] if the type deduced is not the same in each deduction,
   //  the program is ill-formed.
-  if (AT->isDeduced() && !FD->isInvalidDecl()) {
+  QualType DeducedT = AT->getDeducedType();
+  if (!DeducedT.isNull() && !FD->isInvalidDecl()) {
     AutoType *NewAT = Deduced->getContainedAutoType();
+    // It is possible that NewAT->getDeducedType() is null. When that happens,
+    // we should not crash, instead we ignore this deduction.
+    if (NewAT->getDeducedType().isNull())
+      return false;
+
     CanQualType OldDeducedType = Context.getCanonicalFunctionResultType(
-                                   AT->getDeducedType());
+                                   DeducedT);
     CanQualType NewDeducedType = Context.getCanonicalFunctionResultType(
                                    NewAT->getDeducedType());
     if (!FD->isDependentContext() && OldDeducedType != NewDeducedType) {
       const LambdaScopeInfo *LambdaSI = getCurLambda();
       if (LambdaSI && LambdaSI->HasImplicitReturnType) {
         Diag(ReturnLoc, diag::err_typecheck_missing_return_type_incompatible)
-          << NewAT->getDeducedType() << AT->getDeducedType()
+          << NewAT->getDeducedType() << DeducedT
           << true /*IsLambda*/;
       } else {
         Diag(ReturnLoc, diag::err_auto_fn_different_deductions)
           << (AT->isDecltypeAuto() ? 1 : 0)
-          << NewAT->getDeducedType() << AT->getDeducedType();
+          << NewAT->getDeducedType() << DeducedT;
       }
       return true;
     }
@@ -3097,9 +3158,8 @@ StmtResult
 Sema::ActOnReturnStmt(SourceLocation ReturnLoc, Expr *RetValExp,
                       Scope *CurScope) {
   StmtResult R = BuildReturnStmt(ReturnLoc, RetValExp);
-  if (R.isInvalid()) {
+  if (R.isInvalid() || ExprEvalContexts.back().Context == DiscardedStatement)
     return R;
-  }
 
   if (VarDecl *VD =
       const_cast<VarDecl*>(cast<ReturnStmt>(R.get())->getNRVOCandidate())) {
@@ -3148,6 +3208,19 @@ StmtResult Sema::BuildReturnStmt(SourceLocation ReturnLoc, Expr *RetValExp) {
   } else // If we don't have a function/method context, bail.
     return StmtError();
 
+  // C++1z: discarded return statements are not considered when deducing a
+  // return type.
+  if (ExprEvalContexts.back().Context == DiscardedStatement &&
+      FnRetType->getContainedAutoType()) {
+    if (RetValExp) {
+      ExprResult ER = ActOnFinishFullExpr(RetValExp, ReturnLoc);
+      if (ER.isInvalid())
+        return StmtError();
+      RetValExp = ER.get();
+    }
+    return new (Context) ReturnStmt(ReturnLoc, RetValExp, nullptr);
+  }
+
   // FIXME: Add a flag to the ScopeInfo to indicate whether we're performing
   // deduction.
   if (getLangOpts().CPlusPlus14) {
@@ -3525,11 +3598,6 @@ template <> struct DenseMapInfo<CatchHandlerType> {
     return LHS == RHS;
   }
 };
-
-// It's OK to treat CatchHandlerType as a POD type.
-template <> struct isPodLike<CatchHandlerType> {
-  static const bool value = true;
-};
 }
 
 namespace {
@@ -3554,7 +3622,7 @@ public:
   bool operator()(const CXXBaseSpecifier *S, CXXBasePath &) {
     if (S->getAccessSpecifier() == AccessSpecifier::AS_public) {
       CatchHandlerType Check(S->getType(), CheckAgainstPointer);
-      auto M = TypesToCheck;
+      const auto &M = TypesToCheck;
       auto I = M.find(Check);
       if (I != M.end()) {
         FoundHandler = I->second;
@@ -3916,9 +3984,9 @@ StmtResult Sema::ActOnCapturedRegionEnd(Stmt *S) {
   CapturedDecl *CD = RSI->TheCapturedDecl;
   RecordDecl *RD = RSI->TheRecordDecl;
 
-  CapturedStmt *Res = CapturedStmt::Create(getASTContext(), S,
-                                           RSI->CapRegionKind, Captures,
-                                           CaptureInits, CD, RD);
+  CapturedStmt *Res = CapturedStmt::Create(
+      getASTContext(), S, static_cast<CapturedRegionKind>(RSI->CapRegionKind),
+      Captures, CaptureInits, CD, RD);
 
   CD->setBody(Res->getCapturedStmt());
   RD->completeDefinition();
diff --git a/contrib/llvm/tools/clang/lib/Sema/SemaStmtAsm.cpp b/contrib/llvm/tools/clang/lib/Sema/SemaStmtAsm.cpp
index 11a4f8bfa85c..cd4269cd7eae 100644
--- a/contrib/llvm/tools/clang/lib/Sema/SemaStmtAsm.cpp
+++ b/contrib/llvm/tools/clang/lib/Sema/SemaStmtAsm.cpp
@@ -623,16 +623,12 @@ bool Sema::LookupInlineAsmField(StringRef Base, StringRef Member,
 
   if (!LookupName(BaseResult, getCurScope()))
     return true;
-
-  LookupResult CurrBaseResult(BaseResult);
-
+  
+  if(!BaseResult.isSingleResult())
+    return true;
+  NamedDecl *FoundDecl = BaseResult.getFoundDecl();
   for (StringRef NextMember : Members) {
-
-    if (!CurrBaseResult.isSingleResult())
-      return true;
-
     const RecordType *RT = nullptr;
-    NamedDecl *FoundDecl = CurrBaseResult.getFoundDecl();
     if (VarDecl *VD = dyn_cast<VarDecl>(FoundDecl))
       RT = VD->getType()->getAs<RecordType>();
     else if (TypedefNameDecl *TD = dyn_cast<TypedefNameDecl>(FoundDecl)) {
@@ -655,13 +651,15 @@ bool Sema::LookupInlineAsmField(StringRef Base, StringRef Member,
     if (!LookupQualifiedName(FieldResult, RT->getDecl()))
       return true;
 
+    if (!FieldResult.isSingleResult())
+      return true;
+    FoundDecl = FieldResult.getFoundDecl();
+
     // FIXME: Handle IndirectFieldDecl?
-    FieldDecl *FD = dyn_cast<FieldDecl>(FieldResult.getFoundDecl());
+    FieldDecl *FD = dyn_cast<FieldDecl>(FoundDecl);
     if (!FD)
       return true;
 
-    CurrBaseResult = FieldResult;
-
     const ASTRecordLayout &RL = Context.getASTRecordLayout(RT->getDecl());
     unsigned i = FD->getFieldIndex();
     CharUnits Result = Context.toCharUnitsFromBits(RL.getFieldOffset(i));
diff --git a/contrib/llvm/tools/clang/lib/Sema/SemaStmtAttr.cpp b/contrib/llvm/tools/clang/lib/Sema/SemaStmtAttr.cpp
index 984bd078fa03..87fd88939572 100644
--- a/contrib/llvm/tools/clang/lib/Sema/SemaStmtAttr.cpp
+++ b/contrib/llvm/tools/clang/lib/Sema/SemaStmtAttr.cpp
@@ -25,9 +25,11 @@ using namespace sema;
 
 static Attr *handleFallThroughAttr(Sema &S, Stmt *St, const AttributeList &A,
                                    SourceRange Range) {
+  FallThroughAttr Attr(A.getRange(), S.Context,
+                       A.getAttributeSpellingListIndex());
   if (!isa<NullStmt>(St)) {
     S.Diag(A.getRange().getBegin(), diag::err_fallthrough_attr_wrong_target)
-        << St->getLocStart();
+        << Attr.getSpelling() << St->getLocStart();
     if (isa<SwitchCase>(St)) {
       SourceLocation L = S.getLocForEndOfToken(Range.getEnd());
       S.Diag(L, diag::note_fallthrough_insert_semi_fixit)
@@ -35,12 +37,20 @@ static Attr *handleFallThroughAttr(Sema &S, Stmt *St, const AttributeList &A,
     }
     return nullptr;
   }
-  if (S.getCurFunction()->SwitchStack.empty()) {
+  auto *FnScope = S.getCurFunction();
+  if (FnScope->SwitchStack.empty()) {
     S.Diag(A.getRange().getBegin(), diag::err_fallthrough_attr_outside_switch);
     return nullptr;
   }
-  return ::new (S.Context) FallThroughAttr(A.getRange(), S.Context,
-                                           A.getAttributeSpellingListIndex());
+
+  // If this is spelled as the standard C++1z attribute, but not in C++1z, warn
+  // about using it as an extension.
+  if (!S.getLangOpts().CPlusPlus1z && A.isCXX11Attribute() &&
+      !A.getScopeName())
+    S.Diag(A.getLoc(), diag::ext_cxx1z_attr) << A.getName();
+
+  FnScope->setHasFallthroughStmt();
+  return ::new (S.Context) auto(Attr);
 }
 
 static Attr *handleLoopHintAttr(Sema &S, Stmt *St, const AttributeList &A,
@@ -97,6 +107,7 @@ static Attr *handleLoopHintAttr(Sema &S, Stmt *St, const AttributeList &A,
                  .Case("interleave_count", LoopHintAttr::InterleaveCount)
                  .Case("unroll", LoopHintAttr::Unroll)
                  .Case("unroll_count", LoopHintAttr::UnrollCount)
+                 .Case("distribute", LoopHintAttr::Distribute)
                  .Default(LoopHintAttr::Vectorize);
     if (Option == LoopHintAttr::VectorizeWidth ||
         Option == LoopHintAttr::InterleaveCount ||
@@ -107,7 +118,8 @@ static Attr *handleLoopHintAttr(Sema &S, Stmt *St, const AttributeList &A,
       State = LoopHintAttr::Numeric;
     } else if (Option == LoopHintAttr::Vectorize ||
                Option == LoopHintAttr::Interleave ||
-               Option == LoopHintAttr::Unroll) {
+               Option == LoopHintAttr::Unroll ||
+               Option == LoopHintAttr::Distribute) {
       assert(StateLoc && StateLoc->Ident && "Loop hint must have an argument");
       if (StateLoc->Ident->isStr("disable"))
         State = LoopHintAttr::Disable;
@@ -130,18 +142,21 @@ static Attr *handleLoopHintAttr(Sema &S, Stmt *St, const AttributeList &A,
 static void
 CheckForIncompatibleAttributes(Sema &S,
                                const SmallVectorImpl<const Attr *> &Attrs) {
-  // There are 3 categories of loop hints attributes: vectorize, interleave,
-  // and unroll. Each comes in two variants: a state form and a numeric form.
-  // The state form selectively defaults/enables/disables the transformation
-  // for the loop (for unroll, default indicates full unrolling rather than
-  // enabling the transformation).  The numeric form form provides an integer
-  // hint (for example, unroll count) to the transformer. The following array
-  // accumulates the hints encountered while iterating through the attributes
-  // to check for compatibility.
+  // There are 4 categories of loop hints attributes: vectorize, interleave,
+  // unroll and distribute. Except for distribute they come in two variants: a
+  // state form and a numeric form.  The state form selectively
+  // defaults/enables/disables the transformation for the loop (for unroll,
+  // default indicates full unrolling rather than enabling the transformation).
+  // The numeric form form provides an integer hint (for example, unroll count)
+  // to the transformer. The following array accumulates the hints encountered
+  // while iterating through the attributes to check for compatibility.
   struct {
     const LoopHintAttr *StateAttr;
     const LoopHintAttr *NumericAttr;
-  } HintAttrs[] = {{nullptr, nullptr}, {nullptr, nullptr}, {nullptr, nullptr}};
+  } HintAttrs[] = {{nullptr, nullptr},
+                   {nullptr, nullptr},
+                   {nullptr, nullptr},
+                   {nullptr, nullptr}};
 
   for (const auto *I : Attrs) {
     const LoopHintAttr *LH = dyn_cast<LoopHintAttr>(I);
@@ -151,7 +166,7 @@ CheckForIncompatibleAttributes(Sema &S,
       continue;
 
     LoopHintAttr::OptionType Option = LH->getOption();
-    enum { Vectorize, Interleave, Unroll } Category;
+    enum { Vectorize, Interleave, Unroll, Distribute } Category;
     switch (Option) {
     case LoopHintAttr::Vectorize:
     case LoopHintAttr::VectorizeWidth:
@@ -165,12 +180,17 @@ CheckForIncompatibleAttributes(Sema &S,
     case LoopHintAttr::UnrollCount:
       Category = Unroll;
       break;
+    case LoopHintAttr::Distribute:
+      // Perform the check for duplicated 'distribute' hints.
+      Category = Distribute;
+      break;
     };
 
     auto &CategoryState = HintAttrs[Category];
     const LoopHintAttr *PrevAttr;
     if (Option == LoopHintAttr::Vectorize ||
-        Option == LoopHintAttr::Interleave || Option == LoopHintAttr::Unroll) {
+        Option == LoopHintAttr::Interleave || Option == LoopHintAttr::Unroll ||
+        Option == LoopHintAttr::Distribute) {
       // Enable|Disable|AssumeSafety hint.  For example, vectorize(enable).
       PrevAttr = CategoryState.StateAttr;
       CategoryState.StateAttr = LH;
@@ -203,6 +223,52 @@ CheckForIncompatibleAttributes(Sema &S,
   }
 }
 
+static Attr *handleOpenCLUnrollHint(Sema &S, Stmt *St, const AttributeList &A,
+                                    SourceRange Range) {
+  // OpenCL v2.0 s6.11.5 - opencl_unroll_hint can have 0 arguments (compiler
+  // determines unrolling factor) or 1 argument (the unroll factor provided
+  // by the user).
+
+  if (S.getLangOpts().OpenCLVersion < 200) {
+    S.Diag(A.getLoc(), diag::err_attribute_requires_opencl_version)
+        << A.getName() << "2.0" << 1;
+    return nullptr;
+  }
+
+  unsigned NumArgs = A.getNumArgs();
+
+  if (NumArgs > 1) {
+    S.Diag(A.getLoc(), diag::err_attribute_too_many_arguments) << A.getName()
+                                                               << 1;
+    return nullptr;
+  }
+
+  unsigned UnrollFactor = 0;
+
+  if (NumArgs == 1) {
+    Expr *E = A.getArgAsExpr(0);
+    llvm::APSInt ArgVal(32);
+
+    if (!E->isIntegerConstantExpr(ArgVal, S.Context)) {
+      S.Diag(A.getLoc(), diag::err_attribute_argument_type)
+          << A.getName() << AANT_ArgumentIntegerConstant << E->getSourceRange();
+      return nullptr;
+    }
+
+    int Val = ArgVal.getSExtValue();
+
+    if (Val <= 0) {
+      S.Diag(A.getRange().getBegin(),
+             diag::err_attribute_requires_positive_integer)
+          << A.getName();
+      return nullptr;
+    }
+    UnrollFactor = Val;
+  }
+
+  return OpenCLUnrollHintAttr::CreateImplicit(S.Context, UnrollFactor);
+}
+
 static Attr *ProcessStmtAttribute(Sema &S, Stmt *St, const AttributeList &A,
                                   SourceRange Range) {
   switch (A.getKind()) {
@@ -215,10 +281,12 @@ static Attr *ProcessStmtAttribute(Sema &S, Stmt *St, const AttributeList &A,
     return handleFallThroughAttr(S, St, A, Range);
   case AttributeList::AT_LoopHint:
     return handleLoopHintAttr(S, St, A, Range);
+  case AttributeList::AT_OpenCLUnrollHint:
+    return handleOpenCLUnrollHint(S, St, A, Range);
   default:
     // if we're here, then we parsed a known attribute, but didn't recognize
     // it as a statement attribute => it is declaration attribute
-    S.Diag(A.getRange().getBegin(), diag::err_attribute_invalid_on_stmt)
+    S.Diag(A.getRange().getBegin(), diag::err_decl_attribute_invalid_on_stmt)
         << A.getName() << St->getLocStart();
     return nullptr;
   }
diff --git a/contrib/llvm/tools/clang/lib/Sema/SemaTemplate.cpp b/contrib/llvm/tools/clang/lib/Sema/SemaTemplate.cpp
index 138cee0b9424..72e499342f8f 100644
--- a/contrib/llvm/tools/clang/lib/Sema/SemaTemplate.cpp
+++ b/contrib/llvm/tools/clang/lib/Sema/SemaTemplate.cpp
@@ -1,13 +1,13 @@
-//===------- SemaTemplate.cpp - Semantic Analysis for C++ Templates -------===/
+//===------- SemaTemplate.cpp - Semantic Analysis for C++ Templates -------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
-//===----------------------------------------------------------------------===/
+//===----------------------------------------------------------------------===//
 //
 //  This file implements semantic analysis for C++ templates.
-//===----------------------------------------------------------------------===/
+//===----------------------------------------------------------------------===//
 
 #include "TreeTransform.h"
 #include "clang/AST/ASTConsumer.h"
@@ -32,6 +32,8 @@
 #include "llvm/ADT/SmallBitVector.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringExtras.h"
+
+#include <iterator>
 using namespace clang;
 using namespace sema;
 
@@ -413,9 +415,22 @@ Sema::ActOnDependentIdExpression(const CXXScopeSpec &SS,
                            const TemplateArgumentListInfo *TemplateArgs) {
   DeclContext *DC = getFunctionLevelDeclContext();
 
-  if (!isAddressOfOperand &&
-      isa<CXXMethodDecl>(DC) &&
-      cast<CXXMethodDecl>(DC)->isInstance()) {
+  // C++11 [expr.prim.general]p12:
+  //   An id-expression that denotes a non-static data member or non-static
+  //   member function of a class can only be used:
+  //   (...)
+  //   - if that id-expression denotes a non-static data member and it
+  //     appears in an unevaluated operand.
+  //
+  // If this might be the case, form a DependentScopeDeclRefExpr instead of a
+  // CXXDependentScopeMemberExpr. The former can instantiate to either
+  // DeclRefExpr or MemberExpr depending on lookup results, while the latter is
+  // always a MemberExpr.
+  bool MightBeCxx11UnevalField =
+      getLangOpts().CPlusPlus11 && isUnevaluatedContext();
+
+  if (!MightBeCxx11UnevalField && !isAddressOfOperand &&
+      isa<CXXMethodDecl>(DC) && cast<CXXMethodDecl>(DC)->isInstance()) {
     QualType ThisType = cast<CXXMethodDecl>(DC)->getThisType(Context);
 
     // Since the 'this' expression is synthesized, we don't need to
@@ -458,7 +473,6 @@ void Sema::DiagnoseTemplateParameterShadow(SourceLocation Loc, Decl *PrevDecl) {
   Diag(Loc, diag::err_template_param_shadow)
     << cast<NamedDecl>(PrevDecl)->getDeclName();
   Diag(PrevDecl->getLocation(), diag::note_template_param_here);
-  return;
 }
 
 /// AdjustDeclIfTemplate - If the given decl happens to be a template, reset
@@ -555,7 +569,6 @@ Decl *Sema::ActOnTypeParameter(Scope *S, bool Typename,
                                ParsedType DefaultArg) {
   assert(S->isTemplateParamScope() &&
          "Template type parameter not in template parameter scope!");
-  bool Invalid = false;
 
   SourceLocation Loc = ParamNameLoc;
   if (!ParamName)
@@ -567,8 +580,6 @@ Decl *Sema::ActOnTypeParameter(Scope *S, bool Typename,
                                    KeyLoc, Loc, Depth, Position, ParamName,
                                    Typename, IsParameterPack);
   Param->setAccess(AS_public);
-  if (Invalid)
-    Param->setInvalidDecl();
 
   if (ParamName) {
     maybeDiagnoseTemplateParameterShadow(*this, S, ParamNameLoc, ParamName);
@@ -583,7 +594,7 @@ Decl *Sema::ActOnTypeParameter(Scope *S, bool Typename,
   //   template-parameter that is not a template parameter pack.
   if (DefaultArg && IsParameterPack) {
     Diag(EqualLoc, diag::err_template_param_pack_default_arg);
-    DefaultArg = ParsedType();
+    DefaultArg = nullptr;
   }
 
   // Handle the default argument, if provided.
@@ -790,7 +801,7 @@ Decl *Sema::ActOnTemplateTemplateParameter(Scope* S,
     // However, it isn't worth doing.
     TemplateArgumentLoc DefaultArg = translateTemplateArgument(*this, Default);
     if (DefaultArg.getArgument().getAsTemplate().isNull()) {
-      Diag(DefaultArg.getLocation(), diag::err_template_arg_not_class_template)
+      Diag(DefaultArg.getLocation(), diag::err_template_arg_not_valid_template)
         << DefaultArg.getSourceRange();
       return Param;
     }
@@ -807,18 +818,21 @@ Decl *Sema::ActOnTemplateTemplateParameter(Scope* S,
   return Param;
 }
 
-/// ActOnTemplateParameterList - Builds a TemplateParameterList that
-/// contains the template parameters in Params/NumParams.
+/// ActOnTemplateParameterList - Builds a TemplateParameterList, optionally
+/// constrained by RequiresClause, that contains the template parameters in
+/// Params.
 TemplateParameterList *
 Sema::ActOnTemplateParameterList(unsigned Depth,
                                  SourceLocation ExportLoc,
                                  SourceLocation TemplateLoc,
                                  SourceLocation LAngleLoc,
                                  ArrayRef<Decl *> Params,
-                                 SourceLocation RAngleLoc) {
+                                 SourceLocation RAngleLoc,
+                                 Expr *RequiresClause) {
   if (ExportLoc.isValid())
     Diag(ExportLoc, diag::warn_template_export_unsupported);
 
+  // FIXME: store RequiresClause
   return TemplateParameterList::Create(
       Context, TemplateLoc, LAngleLoc,
       llvm::makeArrayRef((NamedDecl *const *)Params.data(), Params.size()),
@@ -916,6 +930,13 @@ Sema::CheckClassTemplate(Scope *S, unsigned TagSpec, TagUseKind TUK,
   if (Previous.begin() != Previous.end())
     PrevDecl = (*Previous.begin())->getUnderlyingDecl();
 
+  if (PrevDecl && PrevDecl->isTemplateParameter()) {
+    // Maybe we will complain about the shadowed template parameter.
+    DiagnoseTemplateParameterShadow(NameLoc, PrevDecl);
+    // Just pretend that we didn't see the previous declaration.
+    PrevDecl = nullptr;
+  }
+
   // If there is a previous declaration with the same name, check
   // whether this is a valid redeclaration.
   ClassTemplateDecl *PrevClassTemplate
@@ -1041,12 +1062,7 @@ Sema::CheckClassTemplate(Scope *S, unsigned TagSpec, TagUseKind TUK,
         // definition, as part of error recovery?
         return true;
       }
-    }    
-  } else if (PrevDecl && PrevDecl->isTemplateParameter()) {
-    // Maybe we will complain about the shadowed template parameter.
-    DiagnoseTemplateParameterShadow(NameLoc, PrevDecl);
-    // Just pretend that we didn't see the previous declaration.
-    PrevDecl = nullptr;
+    }
   } else if (PrevDecl) {
     // C++ [temp]p5:
     //   A class template shall not have the same name as any other
@@ -1577,7 +1593,7 @@ struct DependencyChecker : RecursiveASTVisitor<DependencyChecker> {
     return TraverseType(T->getInjectedSpecializationType());
   }
 };
-}
+} // end anonymous namespace
 
 /// Determines whether a given type depends on the given parameter
 /// list.
@@ -2027,7 +2043,7 @@ checkBuiltinTemplateIdType(Sema &SemaRef, BuiltinTemplateDecl *BTD,
                            TemplateArgumentListInfo &TemplateArgs) {
   ASTContext &Context = SemaRef.getASTContext();
   switch (BTD->getBuiltinTemplateKind()) {
-  case BTK__make_integer_seq:
+  case BTK__make_integer_seq: {
     // Specializations of __make_integer_seq<S, T, N> are treated like
     // S<T, 0, ..., N-1>.
 
@@ -2069,6 +2085,29 @@ checkBuiltinTemplateIdType(Sema &SemaRef, BuiltinTemplateDecl *BTD,
     return SemaRef.CheckTemplateIdType(Converted[0].getAsTemplate(),
                                        TemplateLoc, SyntheticTemplateArgs);
   }
+
+  case BTK__type_pack_element:
+    // Specializations of
+    //    __type_pack_element<Index, T_1, ..., T_N>
+    // are treated like T_Index.
+    assert(Converted.size() == 2 &&
+      "__type_pack_element should be given an index and a parameter pack");
+
+    // If the Index is out of bounds, the program is ill-formed.
+    TemplateArgument IndexArg = Converted[0], Ts = Converted[1];
+    llvm::APSInt Index = IndexArg.getAsIntegral();
+    assert(Index >= 0 && "the index used with __type_pack_element should be of "
+                         "type std::size_t, and hence be non-negative");
+    if (Index >= Ts.pack_size()) {
+      SemaRef.Diag(TemplateArgs[0].getLocation(),
+                   diag::err_type_pack_element_out_of_bounds);
+      return QualType();
+    }
+
+    // We simply return the type at index `Index`.
+    auto Nth = std::next(Ts.pack_begin(), Index.getExtValue());
+    return Nth->getAsType();
+  }
   llvm_unreachable("unexpected BuiltinTemplateDecl!");
 }
 
@@ -2119,7 +2158,7 @@ QualType Sema::CheckTemplateIdType(TemplateName Name,
       return QualType();
 
     TemplateArgumentList TemplateArgs(TemplateArgumentList::OnStack,
-                                      Converted.data(), Converted.size());
+                                      Converted);
 
     // Only substitute for the innermost template argument list.
     MultiLevelTemplateArgumentList TemplateArgLists;
@@ -2150,8 +2189,7 @@ QualType Sema::CheckTemplateIdType(TemplateName Name,
     //   template<typename T, typename U = T> struct A;
     TemplateName CanonName = Context.getCanonicalTemplateName(Name);
     CanonType = Context.getTemplateSpecializationType(CanonName,
-                                                      Converted.data(),
-                                                      Converted.size());
+                                                      Converted);
 
     // FIXME: CanonType is not actually the canonical type, and unfortunately
     // it is a TemplateSpecializationType that we will never use again.
@@ -2213,8 +2251,7 @@ QualType Sema::CheckTemplateIdType(TemplateName Name,
                             ClassTemplate->getTemplatedDecl()->getLocStart(),
                                                 ClassTemplate->getLocation(),
                                                      ClassTemplate,
-                                                     Converted.data(),
-                                                     Converted.size(), nullptr);
+                                                     Converted, nullptr);
       ClassTemplate->AddSpecialization(Decl, InsertPos);
       if (ClassTemplate->isOutOfLine())
         Decl->setLexicalDeclContext(ClassTemplate->getLexicalDeclContext());
@@ -2538,7 +2575,7 @@ DeclResult Sema::ActOnVarTemplateSpecialization(
     bool InstantiationDependent;
     if (!Name.isDependent() &&
         !TemplateSpecializationType::anyDependentTemplateArguments(
-            TemplateArgs.getArgumentArray(), TemplateArgs.size(),
+            TemplateArgs.arguments(),
             InstantiationDependent)) {
       Diag(TemplateNameLoc, diag::err_partial_spec_fully_specialized)
           << VarTemplate->getDeclName();
@@ -2595,7 +2632,7 @@ DeclResult Sema::ActOnVarTemplateSpecialization(
         VarTemplatePartialSpecializationDecl::Create(
             Context, VarTemplate->getDeclContext(), TemplateKWLoc,
             TemplateNameLoc, TemplateParams, VarTemplate, DI->getType(), DI, SC,
-            Converted.data(), Converted.size(), TemplateArgs);
+            Converted, TemplateArgs);
 
     if (!PrevPartial)
       VarTemplate->AddPartialSpecialization(Partial, InsertPos);
@@ -2637,7 +2674,7 @@ DeclResult Sema::ActOnVarTemplateSpecialization(
     // this explicit specialization or friend declaration.
     Specialization = VarTemplateSpecializationDecl::Create(
         Context, VarTemplate->getDeclContext(), TemplateKWLoc, TemplateNameLoc,
-        VarTemplate, DI->getType(), DI, SC, Converted.data(), Converted.size());
+        VarTemplate, DI->getType(), DI, SC, Converted);
     Specialization->setTemplateArgsInfo(TemplateArgs);
 
     if (!PrevDecl)
@@ -2713,7 +2750,7 @@ struct PartialSpecMatchResult {
   VarTemplatePartialSpecializationDecl *Partial;
   TemplateArgumentList *Args;
 };
-}
+} // end anonymous namespace
 
 DeclResult
 Sema::CheckVarTemplateId(VarTemplateDecl *Template, SourceLocation TemplateLoc,
@@ -2733,9 +2770,11 @@ Sema::CheckVarTemplateId(VarTemplateDecl *Template, SourceLocation TemplateLoc,
   // corresponds to these arguments.
   void *InsertPos = nullptr;
   if (VarTemplateSpecializationDecl *Spec = Template->findSpecialization(
-          Converted, InsertPos))
+          Converted, InsertPos)) {
+    checkSpecializationVisibility(TemplateNameLoc, Spec);
     // If we already have a variable template specialization, return it.
     return Spec;
+  }
 
   // This is the first time we have referenced this variable template
   // specialization. Create the canonical declaration and add it to
@@ -2743,7 +2782,7 @@ Sema::CheckVarTemplateId(VarTemplateDecl *Template, SourceLocation TemplateLoc,
   // that it represents. That is,
   VarDecl *InstantiationPattern = Template->getTemplatedDecl();
   TemplateArgumentList TemplateArgList(TemplateArgumentList::OnStack,
-                                       Converted.data(), Converted.size());
+                                       Converted);
   TemplateArgumentList *InstantiationArgs = &TemplateArgList;
   bool AmbiguousPartialSpec = false;
   typedef PartialSpecMatchResult MatchResult;
@@ -2776,8 +2815,9 @@ Sema::CheckVarTemplateId(VarTemplateDecl *Template, SourceLocation TemplateLoc,
               DeduceTemplateArguments(Partial, TemplateArgList, Info)) {
         // Store the failed-deduction information for use in diagnostics, later.
         // TODO: Actually use the failed-deduction info?
-        FailedCandidates.addCandidate()
-            .set(Partial, MakeDeductionFailureInfo(Context, Result, Info));
+        FailedCandidates.addCandidate().set(
+            DeclAccessPair::make(Template, AS_public), Partial,
+            MakeDeductionFailureInfo(Context, Result, Info));
         (void)Result;
       } else {
         Matched.push_back(PartialSpecMatchResult());
@@ -2834,8 +2874,8 @@ Sema::CheckVarTemplateId(VarTemplateDecl *Template, SourceLocation TemplateLoc,
   }
 
   // 2. Create the canonical declaration.
-  // Note that we do not instantiate the variable just yet, since
-  // instantiation is handled in DoMarkVarDeclReferenced().
+  // Note that we do not instantiate a definition until we see an odr-use
+  // in DoMarkVarDeclReferenced().
   // FIXME: LateAttrs et al.?
   VarTemplateSpecializationDecl *Decl = BuildVarTemplateInstantiation(
       Template, InstantiationPattern, *InstantiationArgs, TemplateArgs,
@@ -2863,6 +2903,8 @@ Sema::CheckVarTemplateId(VarTemplateDecl *Template, SourceLocation TemplateLoc,
           dyn_cast<VarTemplatePartialSpecializationDecl>(InstantiationPattern))
     Decl->setInstantiationOf(D, InstantiationArgs);
 
+  checkSpecializationVisibility(TemplateNameLoc, Decl);
+
   assert(Decl && "No variable template specialization?");
   return Decl;
 }
@@ -3214,13 +3256,12 @@ SubstDefaultTemplateArgument(Sema &SemaRef,
   // on the previously-computed template arguments.
   if (ArgType->getType()->isDependentType()) {
     Sema::InstantiatingTemplate Inst(SemaRef, TemplateLoc,
-                                     Template, Converted,
+                                     Param, Template, Converted,
                                      SourceRange(TemplateLoc, RAngleLoc));
     if (Inst.isInvalid())
       return nullptr;
 
-    TemplateArgumentList TemplateArgs(TemplateArgumentList::OnStack,
-                                      Converted.data(), Converted.size());
+    TemplateArgumentList TemplateArgs(TemplateArgumentList::OnStack, Converted);
 
     // Only substitute for the innermost template argument list.
     MultiLevelTemplateArgumentList TemplateArgLists;
@@ -3267,13 +3308,12 @@ SubstDefaultTemplateArgument(Sema &SemaRef,
                              NonTypeTemplateParmDecl *Param,
                         SmallVectorImpl<TemplateArgument> &Converted) {
   Sema::InstantiatingTemplate Inst(SemaRef, TemplateLoc,
-                                   Template, Converted,
+                                   Param, Template, Converted,
                                    SourceRange(TemplateLoc, RAngleLoc));
   if (Inst.isInvalid())
     return ExprError();
 
-  TemplateArgumentList TemplateArgs(TemplateArgumentList::OnStack,
-                                    Converted.data(), Converted.size());
+  TemplateArgumentList TemplateArgs(TemplateArgumentList::OnStack, Converted);
 
   // Only substitute for the innermost template argument list.
   MultiLevelTemplateArgumentList TemplateArgLists;
@@ -3319,13 +3359,13 @@ SubstDefaultTemplateArgument(Sema &SemaRef,
                              TemplateTemplateParmDecl *Param,
                        SmallVectorImpl<TemplateArgument> &Converted,
                              NestedNameSpecifierLoc &QualifierLoc) {
-  Sema::InstantiatingTemplate Inst(SemaRef, TemplateLoc, Template, Converted,
-                                   SourceRange(TemplateLoc, RAngleLoc));
+  Sema::InstantiatingTemplate Inst(
+      SemaRef, TemplateLoc, TemplateParameter(Param), Template, Converted,
+      SourceRange(TemplateLoc, RAngleLoc));
   if (Inst.isInvalid())
     return TemplateName();
 
-  TemplateArgumentList TemplateArgs(TemplateArgumentList::OnStack,
-                                    Converted.data(), Converted.size());
+  TemplateArgumentList TemplateArgs(TemplateArgumentList::OnStack, Converted);
 
   // Only substitute for the innermost template argument list.
   MultiLevelTemplateArgumentList TemplateArgLists;
@@ -3476,7 +3516,7 @@ bool Sema::CheckTemplateArgument(NamedDecl *Param,
         return true;
 
       TemplateArgumentList TemplateArgs(TemplateArgumentList::OnStack,
-                                        Converted.data(), Converted.size());
+                                        Converted);
       NTTPType = SubstType(NTTPType,
                            MultiLevelTemplateArgumentList(TemplateArgs),
                            NTTP->getLocation(),
@@ -3616,8 +3656,7 @@ bool Sema::CheckTemplateArgument(NamedDecl *Param,
     if (Inst.isInvalid())
       return true;
 
-    TemplateArgumentList TemplateArgs(TemplateArgumentList::OnStack,
-                                      Converted.data(), Converted.size());
+    TemplateArgumentList TemplateArgs(TemplateArgumentList::OnStack, Converted);
     TempParm = cast_or_null<TemplateTemplateParmDecl>(
                       SubstDecl(TempParm, CurContext,
                                 MultiLevelTemplateArgumentList(TemplateArgs)));
@@ -3728,7 +3767,7 @@ static bool diagnoseMissingArgument(Sema &S, SourceLocation Loc,
     S.diagnoseMissingImport(Loc, cast<NamedDecl>(TD),
                             D->getDefaultArgumentLoc(), Modules,
                             Sema::MissingImportKind::DefaultArgument,
-                            /*Recover*/ true);
+                            /*Recover*/true);
     return true;
   }
 
@@ -3943,7 +3982,9 @@ bool Sema::CheckTemplateArgumentList(TemplateDecl *Template,
     }
 
     // Introduce an instantiation record that describes where we are using
-    // the default template argument.
+    // the default template argument. We're not actually instantiating a
+    // template here, we just create this object to put a note into the
+    // context stack.
     InstantiatingTemplate Inst(*this, RAngleLoc, Template, *Param, Converted,
                                SourceRange(TemplateLoc, RAngleLoc));
     if (Inst.isInvalid())
@@ -4014,7 +4055,7 @@ namespace {
     bool VisitTagDecl(const TagDecl *Tag);
     bool VisitNestedNameSpecifier(NestedNameSpecifier *NNS);
   };
-}
+} // end anonymous namespace
 
 bool UnnamedLocalNoLinkageFinder::VisitBuiltinType(const BuiltinType*) {
   return false;
@@ -4229,7 +4270,6 @@ bool UnnamedLocalNoLinkageFinder::VisitNestedNameSpecifier(
   llvm_unreachable("Invalid NestedNameSpecifier::Kind!");
 }
 
-
 /// \brief Check a template argument against its corresponding
 /// template type parameter.
 ///
@@ -5340,10 +5380,11 @@ bool Sema::CheckTemplateArgument(TemplateTemplateParmDecl *Param,
   // partial specializations.
   if (!isa<ClassTemplateDecl>(Template) &&
       !isa<TemplateTemplateParmDecl>(Template) &&
-      !isa<TypeAliasTemplateDecl>(Template)) {
+      !isa<TypeAliasTemplateDecl>(Template) &&
+      !isa<BuiltinTemplateDecl>(Template)) {
     assert(isa<FunctionTemplateDecl>(Template) &&
            "Only function templates are possible here");
-    Diag(Arg.getLocation(), diag::err_template_arg_not_class_template);
+    Diag(Arg.getLocation(), diag::err_template_arg_not_valid_template);
     Diag(Template->getLocation(), diag::note_template_arg_refers_here_func)
       << Template;
   }
@@ -6281,9 +6322,7 @@ Sema::ActOnClassTemplateSpecialization(Scope *S, unsigned TagSpec,
     bool InstantiationDependent;
     if (!Name.isDependent() &&
         !TemplateSpecializationType::anyDependentTemplateArguments(
-                                             TemplateArgs.getArgumentArray(),
-                                                         TemplateArgs.size(),
-                                                     InstantiationDependent)) {
+            TemplateArgs.arguments(), InstantiationDependent)) {
       Diag(TemplateNameLoc, diag::err_partial_spec_fully_specialized)
         << ClassTemplate->getDeclName();
       isPartialSpecialization = false;
@@ -6316,8 +6355,7 @@ Sema::ActOnClassTemplateSpecialization(Scope *S, unsigned TagSpec,
     // arguments of the class template partial specialization.
     TemplateName CanonTemplate = Context.getCanonicalTemplateName(Name);
     CanonType = Context.getTemplateSpecializationType(CanonTemplate,
-                                                      Converted.data(),
-                                                      Converted.size());
+                                                      Converted);
 
     if (Context.hasSameType(CanonType,
                         ClassTemplate->getInjectedClassNameSpecialization())) {
@@ -6348,8 +6386,7 @@ Sema::ActOnClassTemplateSpecialization(Scope *S, unsigned TagSpec,
                                                        KWLoc, TemplateNameLoc,
                                                        TemplateParams,
                                                        ClassTemplate,
-                                                       Converted.data(),
-                                                       Converted.size(),
+                                                       Converted,
                                                        TemplateArgs,
                                                        CanonType,
                                                        PrevPartial);
@@ -6404,8 +6441,7 @@ Sema::ActOnClassTemplateSpecialization(Scope *S, unsigned TagSpec,
                                              ClassTemplate->getDeclContext(),
                                                 KWLoc, TemplateNameLoc,
                                                 ClassTemplate,
-                                                Converted.data(),
-                                                Converted.size(),
+                                                Converted,
                                                 PrevDecl);
     SetNestedNameSpecifier(Specialization, SS);
     if (TemplateParameterLists.size() > 0) {
@@ -6423,7 +6459,7 @@ Sema::ActOnClassTemplateSpecialization(Scope *S, unsigned TagSpec,
              "Only possible with -fms-extensions!");
       TemplateName CanonTemplate = Context.getCanonicalTemplateName(Name);
       CanonType = Context.getTemplateSpecializationType(
-          CanonTemplate, Converted.data(), Converted.size());
+          CanonTemplate, Converted);
     } else {
       CanonType = Context.getTypeDeclType(Specialization);
     }
@@ -6879,12 +6915,13 @@ bool Sema::CheckFunctionTemplateSpecialization(
       FunctionDecl *Specialization = nullptr;
       if (TemplateDeductionResult TDK = DeduceTemplateArguments(
               cast<FunctionTemplateDecl>(FunTmpl->getFirstDecl()),
-              ExplicitTemplateArgs ? &Args : nullptr, FT, Specialization, Info)) {
+              ExplicitTemplateArgs ? &Args : nullptr, FT, Specialization,
+              Info)) {
         // Template argument deduction failed; record why it failed, so
         // that we can provide nifty diagnostics.
-        FailedCandidates.addCandidate()
-            .set(FunTmpl->getTemplatedDecl(),
-                 MakeDeductionFailureInfo(Context, TDK, Info));
+        FailedCandidates.addCandidate().set(
+            I.getPair(), FunTmpl->getTemplatedDecl(),
+            MakeDeductionFailureInfo(Context, TDK, Info));
         (void)TDK;
         continue;
       }
@@ -6911,6 +6948,15 @@ bool Sema::CheckFunctionTemplateSpecialization(
   // Ignore access information;  it doesn't figure into redeclaration checking.
   FunctionDecl *Specialization = cast<FunctionDecl>(*Result);
 
+  // C++ Concepts TS [dcl.spec.concept]p7: A program shall not declare [...]
+  // an explicit specialization (14.8.3) [...] of a concept definition.
+  if (Specialization->getPrimaryTemplate()->isConcept()) {
+    Diag(FD->getLocation(), diag::err_concept_specialized)
+        << 0 /*function*/ << 1 /*explicitly specialized*/;
+    Diag(Specialization->getLocation(), diag::note_previous_declaration);
+    return true;
+  }
+
   FunctionTemplateSpecializationInfo *SpecInfo
     = Specialization->getTemplateSpecializationInfo();
   assert(SpecInfo && "Function template specialization info missing?");
@@ -6960,6 +7006,21 @@ bool Sema::CheckFunctionTemplateSpecialization(
   // Mark the prior declaration as an explicit specialization, so that later
   // clients know that this is an explicit specialization.
   if (!isFriend) {
+    // Since explicit specializations do not inherit '=delete' from their
+    // primary function template - check if the 'specialization' that was
+    // implicitly generated (during template argument deduction for partial
+    // ordering) from the most specialized of all the function templates that
+    // 'FD' could have been specializing, has a 'deleted' definition.  If so,
+    // first check that it was implicitly generated during template argument
+    // deduction by making sure it wasn't referenced, and then reset the deleted
+    // flag to not-deleted, so that we can inherit that information from 'FD'.
+    if (Specialization->isDeleted() && !SpecInfo->isExplicitSpecialization() &&
+        !Specialization->getCanonicalDecl()->isReferenced()) {
+      assert(
+          Specialization->getCanonicalDecl() == Specialization &&
+          "This must be the only existing declaration of this specialization");
+      Specialization->setDeletedAsWritten(false);
+    }
     SpecInfo->setTemplateSpecializationKind(TSK_ExplicitSpecialization);
     MarkUnusedFileScopedDecl(Specialization);
   }
@@ -7001,6 +7062,7 @@ Sema::CheckMemberSpecialization(NamedDecl *Member, LookupResult &Previous) {
   assert(!isa<TemplateDecl>(Member) && "Only for non-template members");
 
   // Try to find the member we are instantiating.
+  NamedDecl *FoundInstantiation = nullptr;
   NamedDecl *Instantiation = nullptr;
   NamedDecl *InstantiatedFrom = nullptr;
   MemberSpecializationInfo *MSInfo = nullptr;
@@ -7016,6 +7078,7 @@ Sema::CheckMemberSpecialization(NamedDecl *Member, LookupResult &Previous) {
         if (!hasExplicitCallingConv(Adjusted))
           Adjusted = adjustCCAndNoReturn(Adjusted, Method->getType());
         if (Context.hasSameType(Adjusted, Method->getType())) {
+          FoundInstantiation = *I;
           Instantiation = Method;
           InstantiatedFrom = Method->getInstantiatedFromMemberFunction();
           MSInfo = Method->getMemberSpecializationInfo();
@@ -7028,6 +7091,7 @@ Sema::CheckMemberSpecialization(NamedDecl *Member, LookupResult &Previous) {
     if (Previous.isSingleResult() &&
         (PrevVar = dyn_cast<VarDecl>(Previous.getFoundDecl())))
       if (PrevVar->isStaticDataMember()) {
+        FoundInstantiation = Previous.getRepresentativeDecl();
         Instantiation = PrevVar;
         InstantiatedFrom = PrevVar->getInstantiatedFromStaticDataMember();
         MSInfo = PrevVar->getMemberSpecializationInfo();
@@ -7036,6 +7100,7 @@ Sema::CheckMemberSpecialization(NamedDecl *Member, LookupResult &Previous) {
     CXXRecordDecl *PrevRecord;
     if (Previous.isSingleResult() &&
         (PrevRecord = dyn_cast<CXXRecordDecl>(Previous.getFoundDecl()))) {
+      FoundInstantiation = Previous.getRepresentativeDecl();
       Instantiation = PrevRecord;
       InstantiatedFrom = PrevRecord->getInstantiatedFromMemberClass();
       MSInfo = PrevRecord->getMemberSpecializationInfo();
@@ -7044,6 +7109,7 @@ Sema::CheckMemberSpecialization(NamedDecl *Member, LookupResult &Previous) {
     EnumDecl *PrevEnum;
     if (Previous.isSingleResult() &&
         (PrevEnum = dyn_cast<EnumDecl>(Previous.getFoundDecl()))) {
+      FoundInstantiation = Previous.getRepresentativeDecl();
       Instantiation = PrevEnum;
       InstantiatedFrom = PrevEnum->getInstantiatedFromMemberEnum();
       MSInfo = PrevEnum->getMemberSpecializationInfo();
@@ -7072,7 +7138,7 @@ Sema::CheckMemberSpecialization(NamedDecl *Member, LookupResult &Previous) {
     }
 
     Previous.clear();
-    Previous.addDecl(Instantiation);
+    Previous.addDecl(FoundInstantiation);
     return false;
   }
 
@@ -7119,6 +7185,13 @@ Sema::CheckMemberSpecialization(NamedDecl *Member, LookupResult &Previous) {
       InstantiationFunction->setTemplateSpecializationKind(
                                                   TSK_ExplicitSpecialization);
       InstantiationFunction->setLocation(Member->getLocation());
+      // Explicit specializations of member functions of class templates do not
+      // inherit '=delete' from the member function they are specializing.
+      if (InstantiationFunction->isDeleted()) {
+        assert(InstantiationFunction->getCanonicalDecl() ==
+               InstantiationFunction);
+        InstantiationFunction->setDeletedAsWritten(false);
+      }
     }
 
     cast<FunctionDecl>(Member)->setInstantiationOfMemberFunction(
@@ -7166,7 +7239,7 @@ Sema::CheckMemberSpecialization(NamedDecl *Member, LookupResult &Previous) {
   // Save the caller the trouble of having to figure out which declaration
   // this specialization matches.
   Previous.clear();
-  Previous.addDecl(Instantiation);
+  Previous.addDecl(FoundInstantiation);
   return false;
 }
 
@@ -7268,15 +7341,21 @@ Sema::ActOnExplicitInstantiation(Scope *S,
   assert(Kind != TTK_Enum &&
          "Invalid enum tag in class template explicit instantiation!");
 
-  if (isa<TypeAliasTemplateDecl>(TD)) {
-      Diag(KWLoc, diag::err_tag_reference_non_tag) << Kind;
-      Diag(TD->getTemplatedDecl()->getLocation(),
-           diag::note_previous_use);
+  ClassTemplateDecl *ClassTemplate = dyn_cast<ClassTemplateDecl>(TD);
+
+  if (!ClassTemplate) {
+    unsigned ErrorKind = 0;
+    if (isa<TypeAliasTemplateDecl>(TD)) {
+      ErrorKind = 4;
+    } else if (isa<TemplateTemplateParmDecl>(TD)) {
+      ErrorKind = 5;
+    }
+
+    Diag(TemplateNameLoc, diag::err_tag_reference_non_tag) << ErrorKind;
+    Diag(TD->getLocation(), diag::note_previous_use);
     return true;
   }
 
-  ClassTemplateDecl *ClassTemplate = cast<ClassTemplateDecl>(TD);
-
   if (!isAcceptableTagRedeclaration(ClassTemplate->getTemplatedDecl(),
                                     Kind, /*isDefinition*/false, KWLoc,
                                     ClassTemplate->getIdentifier())) {
@@ -7315,6 +7394,29 @@ Sema::ActOnExplicitInstantiation(Scope *S,
     }
   }
 
+  // In MSVC mode, dllimported explicit instantiation definitions are treated as
+  // instantiation declarations for most purposes.
+  bool DLLImportExplicitInstantiationDef = false;
+  if (TSK == TSK_ExplicitInstantiationDefinition &&
+      Context.getTargetInfo().getCXXABI().isMicrosoft()) {
+    // Check for dllimport class template instantiation definitions.
+    bool DLLImport =
+        ClassTemplate->getTemplatedDecl()->getAttr<DLLImportAttr>();
+    for (AttributeList *A = Attr; A; A = A->getNext()) {
+      if (A->getKind() == AttributeList::AT_DLLImport)
+        DLLImport = true;
+      if (A->getKind() == AttributeList::AT_DLLExport) {
+        // dllexport trumps dllimport here.
+        DLLImport = false;
+        break;
+      }
+    }
+    if (DLLImport) {
+      TSK = TSK_ExplicitInstantiationDeclaration;
+      DLLImportExplicitInstantiationDef = true;
+    }
+  }
+
   // Translate the parser's template argument list in our AST format.
   TemplateArgumentListInfo TemplateArgs(LAngleLoc, RAngleLoc);
   translateTemplateArguments(TemplateArgsIn, TemplateArgs);
@@ -7368,6 +7470,12 @@ Sema::ActOnExplicitInstantiation(Scope *S,
       Specialization->setLocation(TemplateNameLoc);
       PrevDecl = nullptr;
     }
+
+    if (PrevDecl_TSK == TSK_ExplicitInstantiationDeclaration &&
+        DLLImportExplicitInstantiationDef) {
+      // The new specialization might add a dllimport attribute.
+      HasNoEffect = false;
+    }
   }
 
   if (!Specialization) {
@@ -7378,8 +7486,7 @@ Sema::ActOnExplicitInstantiation(Scope *S,
                                              ClassTemplate->getDeclContext(),
                                                 KWLoc, TemplateNameLoc,
                                                 ClassTemplate,
-                                                Converted.data(),
-                                                Converted.size(),
+                                                Converted,
                                                 PrevDecl);
     SetNestedNameSpecifier(Specialization, SS);
 
@@ -7405,7 +7512,7 @@ Sema::ActOnExplicitInstantiation(Scope *S,
   // Set source locations for keywords.
   Specialization->setExternLoc(ExternLoc);
   Specialization->setTemplateKeywordLoc(TemplateLoc);
-  Specialization->setRBraceLoc(SourceLocation());
+  Specialization->setBraceRange(SourceRange());
 
   if (Attr)
     ProcessDeclAttributeList(S, Specialization, Attr);
@@ -7445,11 +7552,11 @@ Sema::ActOnExplicitInstantiation(Scope *S,
                                        Specialization->getDefinition());
   if (Def) {
     TemplateSpecializationKind Old_TSK = Def->getTemplateSpecializationKind();
-
     // Fix a TSK_ExplicitInstantiationDeclaration followed by a
     // TSK_ExplicitInstantiationDefinition
     if (Old_TSK == TSK_ExplicitInstantiationDeclaration &&
-        TSK == TSK_ExplicitInstantiationDefinition) {
+        (TSK == TSK_ExplicitInstantiationDefinition ||
+         DLLImportExplicitInstantiationDef)) {
       // FIXME: Need to notify the ASTMutationListener that we did this.
       Def->setTemplateSpecializationKind(TSK);
 
@@ -7462,7 +7569,13 @@ Sema::ActOnExplicitInstantiation(Scope *S,
             getDLLAttr(Specialization)->clone(getASTContext()));
         A->setInherited(true);
         Def->addAttr(A);
+
+        // We reject explicit instantiations in class scope, so there should
+        // never be any delayed exported classes to worry about.
+        assert(DelayedDllExportClasses.empty() &&
+               "delayed exports present at explicit instantiation");
         checkClassLevelDLLAttribute(Def);
+        referenceDLLExportedClassMethods();
 
         // Propagate attribute to base class templates.
         for (auto &B : Def->bases()) {
@@ -7673,6 +7786,15 @@ DeclResult Sema::ActOnExplicitInstantiation(Scope *S,
     Diag(D.getDeclSpec().getConstexprSpecLoc(),
          diag::err_explicit_instantiation_constexpr);
 
+  // C++ Concepts TS [dcl.spec.concept]p1: The concept specifier shall be
+  // applied only to the definition of a function template or variable template,
+  // declared in namespace scope.
+  if (D.getDeclSpec().isConceptSpecified()) {
+    Diag(D.getDeclSpec().getConceptSpecLoc(),
+         diag::err_concept_specified_specialization) << 0;
+    return true;
+  }
+
   // C++0x [temp.explicit]p2:
   //   There are two forms of explicit instantiation: an explicit instantiation
   //   definition and an explicit instantiation declaration. An explicit
@@ -7744,6 +7866,15 @@ DeclResult Sema::ActOnExplicitInstantiation(Scope *S,
         return true;
       }
 
+      // C++ Concepts TS [dcl.spec.concept]p7: A program shall not declare an
+      // explicit instantiation (14.8.2) [...] of a concept definition.
+      if (PrevTemplate->isConcept()) {
+        Diag(D.getIdentifierLoc(), diag::err_concept_specialized)
+            << 1 /*variable*/ << 0 /*explicitly instantiated*/;
+        Diag(PrevTemplate->getLocation(), diag::note_previous_declaration);
+        return true;
+      }
+
       // Translate the parser's template argument list into our AST format.
       TemplateArgumentListInfo TemplateArgs =
           makeTemplateArgumentListInfo(*this, *D.getName().TemplateId);
@@ -7856,7 +7987,7 @@ DeclResult Sema::ActOnExplicitInstantiation(Scope *S,
                                     R, Specialization, Info)) {
       // Keep track of almost-matches.
       FailedCandidates.addCandidate()
-          .set(FunTmpl->getTemplatedDecl(),
+          .set(P.getPair(), FunTmpl->getTemplatedDecl(),
                MakeDeductionFailureInfo(Context, TDK, Info));
       (void)TDK;
       continue;
@@ -7958,6 +8089,16 @@ DeclResult Sema::ActOnExplicitInstantiation(Scope *S,
          diag::ext_explicit_instantiation_without_qualified_id)
     << Specialization << D.getCXXScopeSpec().getRange();
 
+  // C++ Concepts TS [dcl.spec.concept]p7: A program shall not declare an
+  // explicit instantiation (14.8.2) [...] of a concept definition.
+  if (FunTmpl && FunTmpl->isConcept() &&
+      !D.getDeclSpec().isConceptSpecified()) {
+    Diag(D.getIdentifierLoc(), diag::err_concept_specialized)
+        << 0 /*function*/ << 0 /*explicitly instantiated*/;
+    Diag(FunTmpl->getLocation(), diag::note_previous_declaration);
+    return true;
+  }
+
   CheckExplicitInstantiationScope(*this,
                    FunTmpl? (NamedDecl *)FunTmpl
                           : Specialization->getInstantiatedFromMemberFunction(),
@@ -8294,7 +8435,7 @@ namespace {
       return E;
     }
   };
-}
+} // end anonymous namespace
 
 /// \brief Rebuilds a type within the context of the current instantiation.
 ///
@@ -8469,3 +8610,149 @@ bool Sema::IsInsideALocalClassWithinATemplateFunction() {
   }
   return false;
 }
+
+/// \brief Walk the path from which a declaration was instantiated, and check
+/// that every explicit specialization along that path is visible. This enforces
+/// C++ [temp.expl.spec]/6:
+///
+///   If a template, a member template or a member of a class template is
+///   explicitly specialized then that specialization shall be declared before
+///   the first use of that specialization that would cause an implicit
+///   instantiation to take place, in every translation unit in which such a
+///   use occurs; no diagnostic is required.
+///
+/// and also C++ [temp.class.spec]/1:
+///
+///   A partial specialization shall be declared before the first use of a
+///   class template specialization that would make use of the partial
+///   specialization as the result of an implicit or explicit instantiation
+///   in every translation unit in which such a use occurs; no diagnostic is
+///   required.
+class ExplicitSpecializationVisibilityChecker {
+  Sema &S;
+  SourceLocation Loc;
+  llvm::SmallVector<Module *, 8> Modules;
+
+public:
+  ExplicitSpecializationVisibilityChecker(Sema &S, SourceLocation Loc)
+      : S(S), Loc(Loc) {}
+
+  void check(NamedDecl *ND) {
+    if (auto *FD = dyn_cast<FunctionDecl>(ND))
+      return checkImpl(FD);
+    if (auto *RD = dyn_cast<CXXRecordDecl>(ND))
+      return checkImpl(RD);
+    if (auto *VD = dyn_cast<VarDecl>(ND))
+      return checkImpl(VD);
+    if (auto *ED = dyn_cast<EnumDecl>(ND))
+      return checkImpl(ED);
+  }
+
+private:
+  void diagnose(NamedDecl *D, bool IsPartialSpec) {
+    auto Kind = IsPartialSpec ? Sema::MissingImportKind::PartialSpecialization
+                              : Sema::MissingImportKind::ExplicitSpecialization;
+    const bool Recover = true;
+
+    // If we got a custom set of modules (because only a subset of the
+    // declarations are interesting), use them, otherwise let
+    // diagnoseMissingImport intelligently pick some.
+    if (Modules.empty())
+      S.diagnoseMissingImport(Loc, D, Kind, Recover);
+    else
+      S.diagnoseMissingImport(Loc, D, D->getLocation(), Modules, Kind, Recover);
+  }
+
+  // Check a specific declaration. There are three problematic cases:
+  //
+  //  1) The declaration is an explicit specialization of a template
+  //     specialization.
+  //  2) The declaration is an explicit specialization of a member of an
+  //     templated class.
+  //  3) The declaration is an instantiation of a template, and that template
+  //     is an explicit specialization of a member of a templated class.
+  //
+  // We don't need to go any deeper than that, as the instantiation of the
+  // surrounding class / etc is not triggered by whatever triggered this
+  // instantiation, and thus should be checked elsewhere.
+  template<typename SpecDecl>
+  void checkImpl(SpecDecl *Spec) {
+    bool IsHiddenExplicitSpecialization = false;
+    if (Spec->getTemplateSpecializationKind() == TSK_ExplicitSpecialization) {
+      IsHiddenExplicitSpecialization =
+          Spec->getMemberSpecializationInfo()
+              ? !S.hasVisibleMemberSpecialization(Spec, &Modules)
+              : !S.hasVisibleDeclaration(Spec);
+    } else {
+      checkInstantiated(Spec);
+    }
+
+    if (IsHiddenExplicitSpecialization)
+      diagnose(Spec->getMostRecentDecl(), false);
+  }
+
+  void checkInstantiated(FunctionDecl *FD) {
+    if (auto *TD = FD->getPrimaryTemplate())
+      checkTemplate(TD);
+  }
+
+  void checkInstantiated(CXXRecordDecl *RD) {
+    auto *SD = dyn_cast<ClassTemplateSpecializationDecl>(RD);
+    if (!SD)
+      return;
+
+    auto From = SD->getSpecializedTemplateOrPartial();
+    if (auto *TD = From.dyn_cast<ClassTemplateDecl *>())
+      checkTemplate(TD);
+    else if (auto *TD =
+                 From.dyn_cast<ClassTemplatePartialSpecializationDecl *>()) {
+      if (!S.hasVisibleDeclaration(TD))
+        diagnose(TD, true);
+      checkTemplate(TD);
+    }
+  }
+
+  void checkInstantiated(VarDecl *RD) {
+    auto *SD = dyn_cast<VarTemplateSpecializationDecl>(RD);
+    if (!SD)
+      return;
+
+    auto From = SD->getSpecializedTemplateOrPartial();
+    if (auto *TD = From.dyn_cast<VarTemplateDecl *>())
+      checkTemplate(TD);
+    else if (auto *TD =
+                 From.dyn_cast<VarTemplatePartialSpecializationDecl *>()) {
+      if (!S.hasVisibleDeclaration(TD))
+        diagnose(TD, true);
+      checkTemplate(TD);
+    }
+  }
+
+  void checkInstantiated(EnumDecl *FD) {}
+
+  template<typename TemplDecl>
+  void checkTemplate(TemplDecl *TD) {
+    if (TD->isMemberSpecialization()) {
+      if (!S.hasVisibleMemberSpecialization(TD, &Modules))
+        diagnose(TD->getMostRecentDecl(), false);
+    }
+  }
+};
+
+void Sema::checkSpecializationVisibility(SourceLocation Loc, NamedDecl *Spec) {
+  if (!getLangOpts().Modules)
+    return;
+
+  ExplicitSpecializationVisibilityChecker(*this, Loc).check(Spec);
+}
+
+/// \brief Check whether a template partial specialization that we've discovered
+/// is hidden, and produce suitable diagnostics if so.
+void Sema::checkPartialSpecializationVisibility(SourceLocation Loc,
+                                                NamedDecl *Spec) {
+  llvm::SmallVector<Module *, 8> Modules;
+  if (!hasVisibleDeclaration(Spec, &Modules))
+    diagnoseMissingImport(Loc, Spec, Spec->getLocation(), Modules,
+                          MissingImportKind::PartialSpecialization,
+                          /*Recover*/true);
+}
diff --git a/contrib/llvm/tools/clang/lib/Sema/SemaTemplateDeduction.cpp b/contrib/llvm/tools/clang/lib/Sema/SemaTemplateDeduction.cpp
index 71faafc6bc12..5740bc712e86 100644
--- a/contrib/llvm/tools/clang/lib/Sema/SemaTemplateDeduction.cpp
+++ b/contrib/llvm/tools/clang/lib/Sema/SemaTemplateDeduction.cpp
@@ -103,12 +103,12 @@ DeduceTemplateArgumentsByTypeMatch(Sema &S,
                                    bool PartialOrdering = false);
 
 static Sema::TemplateDeductionResult
-DeduceTemplateArguments(Sema &S,
-                        TemplateParameterList *TemplateParams,
+DeduceTemplateArguments(Sema &S, TemplateParameterList *TemplateParams,
                         const TemplateArgument *Params, unsigned NumParams,
                         const TemplateArgument *Args, unsigned NumArgs,
                         TemplateDeductionInfo &Info,
-                        SmallVectorImpl<DeducedTemplateArgument> &Deduced);
+                        SmallVectorImpl<DeducedTemplateArgument> &Deduced,
+                        bool NumberOfArgumentsMustMatch);
 
 /// \brief If the given expression is of a form that permits the deduction
 /// of a non-type template parameter, return the declaration of that
@@ -286,13 +286,10 @@ checkDeducedTemplateArguments(ASTContext &Context,
 
 /// \brief Deduce the value of the given non-type template parameter
 /// from the given constant.
-static Sema::TemplateDeductionResult
-DeduceNonTypeTemplateArgument(Sema &S,
-                              NonTypeTemplateParmDecl *NTTP,
-                              llvm::APSInt Value, QualType ValueType,
-                              bool DeducedFromArrayBound,
-                              TemplateDeductionInfo &Info,
-                    SmallVectorImpl<DeducedTemplateArgument> &Deduced) {
+static Sema::TemplateDeductionResult DeduceNonTypeTemplateArgument(
+    Sema &S, NonTypeTemplateParmDecl *NTTP, const llvm::APSInt &Value,
+    QualType ValueType, bool DeducedFromArrayBound, TemplateDeductionInfo &Info,
+    SmallVectorImpl<DeducedTemplateArgument> &Deduced) {
   assert(NTTP->getDepth() == 0 &&
          "Cannot deduce non-type template argument with depth > 0");
 
@@ -456,10 +453,10 @@ DeduceTemplateArguments(Sema &S,
     // Perform template argument deduction on each template
     // argument. Ignore any missing/extra arguments, since they could be
     // filled in by default arguments.
-    return DeduceTemplateArguments(S, TemplateParams,
-                                   Param->getArgs(), Param->getNumArgs(),
-                                   SpecArg->getArgs(), SpecArg->getNumArgs(),
-                                   Info, Deduced);
+    return DeduceTemplateArguments(S, TemplateParams, Param->getArgs(),
+                                   Param->getNumArgs(), SpecArg->getArgs(),
+                                   SpecArg->getNumArgs(), Info, Deduced,
+                                   /*NumberOfArgumentsMustMatch=*/false);
   }
 
   // If the argument type is a class template specialization, we
@@ -490,11 +487,10 @@ DeduceTemplateArguments(Sema &S,
     return Result;
 
   // Perform template argument deduction for the template arguments.
-  return DeduceTemplateArguments(S, TemplateParams,
-                                 Param->getArgs(), Param->getNumArgs(),
-                                 SpecArg->getTemplateArgs().data(),
-                                 SpecArg->getTemplateArgs().size(),
-                                 Info, Deduced);
+  return DeduceTemplateArguments(
+      S, TemplateParams, Param->getArgs(), Param->getNumArgs(),
+      SpecArg->getTemplateArgs().data(), SpecArg->getTemplateArgs().size(),
+      Info, Deduced, /*NumberOfArgumentsMustMatch=*/true);
 }
 
 /// \brief Determines whether the given type is an opaque type that
@@ -1418,85 +1414,101 @@ DeduceTemplateArgumentsByTypeMatch(Sema &S,
     //     TT<i>
     //     TT<>
     case Type::TemplateSpecialization: {
-      const TemplateSpecializationType *SpecParam
-        = cast<TemplateSpecializationType>(Param);
-
-      // Try to deduce template arguments from the template-id.
-      Sema::TemplateDeductionResult Result
-        = DeduceTemplateArguments(S, TemplateParams, SpecParam, Arg,
-                                  Info, Deduced);
-
-      if (Result && (TDF & TDF_DerivedClass)) {
-        // C++ [temp.deduct.call]p3b3:
-        //   If P is a class, and P has the form template-id, then A can be a
-        //   derived class of the deduced A. Likewise, if P is a pointer to a
-        //   class of the form template-id, A can be a pointer to a derived
-        //   class pointed to by the deduced A.
-        //
-        // More importantly:
-        //   These alternatives are considered only if type deduction would
-        //   otherwise fail.
-        if (const RecordType *RecordT = Arg->getAs<RecordType>()) {
-          // We cannot inspect base classes as part of deduction when the type
-          // is incomplete, so either instantiate any templates necessary to
-          // complete the type, or skip over it if it cannot be completed.
-          if (!S.isCompleteType(Info.getLocation(), Arg))
-            return Result;
-
-          // Use data recursion to crawl through the list of base classes.
-          // Visited contains the set of nodes we have already visited, while
-          // ToVisit is our stack of records that we still need to visit.
-          llvm::SmallPtrSet<const RecordType *, 8> Visited;
-          SmallVector<const RecordType *, 8> ToVisit;
-          ToVisit.push_back(RecordT);
-          bool Successful = false;
-          SmallVector<DeducedTemplateArgument, 8> DeducedOrig(Deduced.begin(),
-                                                              Deduced.end());
-          while (!ToVisit.empty()) {
-            // Retrieve the next class in the inheritance hierarchy.
-            const RecordType *NextT = ToVisit.pop_back_val();
-
-            // If we have already seen this type, skip it.
-            if (!Visited.insert(NextT).second)
-              continue;
-
-            // If this is a base class, try to perform template argument
-            // deduction from it.
-            if (NextT != RecordT) {
-              TemplateDeductionInfo BaseInfo(Info.getLocation());
-              Sema::TemplateDeductionResult BaseResult
-                = DeduceTemplateArguments(S, TemplateParams, SpecParam,
-                                          QualType(NextT, 0), BaseInfo,
-                                          Deduced);
-
-              // If template argument deduction for this base was successful,
-              // note that we had some success. Otherwise, ignore any deductions
-              // from this base class.
-              if (BaseResult == Sema::TDK_Success) {
-                Successful = true;
-                DeducedOrig.clear();
-                DeducedOrig.append(Deduced.begin(), Deduced.end());
-                Info.Param = BaseInfo.Param;
-                Info.FirstArg = BaseInfo.FirstArg;
-                Info.SecondArg = BaseInfo.SecondArg;
-              }
-              else
-                Deduced = DeducedOrig;
-            }
-
-            // Visit base classes
-            CXXRecordDecl *Next = cast<CXXRecordDecl>(NextT->getDecl());
-            for (const auto &Base : Next->bases()) {
-              assert(Base.getType()->isRecordType() &&
-                     "Base class that isn't a record?");
-              ToVisit.push_back(Base.getType()->getAs<RecordType>());
-            }
+      const TemplateSpecializationType *SpecParam =
+          cast<TemplateSpecializationType>(Param);
+
+      // When Arg cannot be a derived class, we can just try to deduce template
+      // arguments from the template-id.
+      const RecordType *RecordT = Arg->getAs<RecordType>();
+      if (!(TDF & TDF_DerivedClass) || !RecordT)
+        return DeduceTemplateArguments(S, TemplateParams, SpecParam, Arg, Info,
+                                       Deduced);
+
+      SmallVector<DeducedTemplateArgument, 8> DeducedOrig(Deduced.begin(),
+                                                          Deduced.end());
+
+      Sema::TemplateDeductionResult Result = DeduceTemplateArguments(
+          S, TemplateParams, SpecParam, Arg, Info, Deduced);
+
+      if (Result == Sema::TDK_Success)
+        return Result;
+
+      // We cannot inspect base classes as part of deduction when the type
+      // is incomplete, so either instantiate any templates necessary to
+      // complete the type, or skip over it if it cannot be completed.
+      if (!S.isCompleteType(Info.getLocation(), Arg))
+        return Result;
+
+      // C++14 [temp.deduct.call] p4b3:
+      //   If P is a class and P has the form simple-template-id, then the
+      //   transformed A can be a derived class of the deduced A. Likewise if
+      //   P is a pointer to a class of the form simple-template-id, the
+      //   transformed A can be a pointer to a derived class pointed to by the
+      //   deduced A.
+      //
+      //   These alternatives are considered only if type deduction would
+      //   otherwise fail. If they yield more than one possible deduced A, the
+      //   type deduction fails.
+
+      // Reset the incorrectly deduced argument from above.
+      Deduced = DeducedOrig;
+
+      // Use data recursion to crawl through the list of base classes.
+      // Visited contains the set of nodes we have already visited, while
+      // ToVisit is our stack of records that we still need to visit.
+      llvm::SmallPtrSet<const RecordType *, 8> Visited;
+      SmallVector<const RecordType *, 8> ToVisit;
+      ToVisit.push_back(RecordT);
+      bool Successful = false;
+      SmallVector<DeducedTemplateArgument, 8> SuccessfulDeduced;
+      while (!ToVisit.empty()) {
+        // Retrieve the next class in the inheritance hierarchy.
+        const RecordType *NextT = ToVisit.pop_back_val();
+
+        // If we have already seen this type, skip it.
+        if (!Visited.insert(NextT).second)
+          continue;
+
+        // If this is a base class, try to perform template argument
+        // deduction from it.
+        if (NextT != RecordT) {
+          TemplateDeductionInfo BaseInfo(Info.getLocation());
+          Sema::TemplateDeductionResult BaseResult =
+              DeduceTemplateArguments(S, TemplateParams, SpecParam,
+                                      QualType(NextT, 0), BaseInfo, Deduced);
+
+          // If template argument deduction for this base was successful,
+          // note that we had some success. Otherwise, ignore any deductions
+          // from this base class.
+          if (BaseResult == Sema::TDK_Success) {
+            // If we've already seen some success, then deduction fails due to
+            // an ambiguity (temp.deduct.call p5).
+            if (Successful)
+              return Sema::TDK_MiscellaneousDeductionFailure;
+
+            Successful = true;
+            std::swap(SuccessfulDeduced, Deduced);
+
+            Info.Param = BaseInfo.Param;
+            Info.FirstArg = BaseInfo.FirstArg;
+            Info.SecondArg = BaseInfo.SecondArg;
           }
 
-          if (Successful)
-            return Sema::TDK_Success;
+          Deduced = DeducedOrig;
         }
 
+        // Visit base classes
+        CXXRecordDecl *Next = cast<CXXRecordDecl>(NextT->getDecl());
+        for (const auto &Base : Next->bases()) {
+          assert(Base.getType()->isRecordType() &&
+                 "Base class that isn't a record?");
+          ToVisit.push_back(Base.getType()->getAs<RecordType>());
+        }
+      }
+
+      if (Successful) {
+        std::swap(SuccessfulDeduced, Deduced);
+        return Sema::TDK_Success;
       }
 
       return Result;
@@ -1821,12 +1833,12 @@ static bool hasPackExpansionBeforeEnd(const TemplateArgument *Args,
 }
 
 static Sema::TemplateDeductionResult
-DeduceTemplateArguments(Sema &S,
-                        TemplateParameterList *TemplateParams,
+DeduceTemplateArguments(Sema &S, TemplateParameterList *TemplateParams,
                         const TemplateArgument *Params, unsigned NumParams,
                         const TemplateArgument *Args, unsigned NumArgs,
                         TemplateDeductionInfo &Info,
-                        SmallVectorImpl<DeducedTemplateArgument> &Deduced) {
+                        SmallVectorImpl<DeducedTemplateArgument> &Deduced,
+                        bool NumberOfArgumentsMustMatch) {
   // C++0x [temp.deduct.type]p9:
   //   If the template argument list of P contains a pack expansion that is not
   //   the last template argument, the entire template argument list is a
@@ -1846,7 +1858,8 @@ DeduceTemplateArguments(Sema &S,
 
       // Check whether we have enough arguments.
       if (!hasTemplateArgumentForDeduction(Args, ArgIdx, NumArgs))
-        return Sema::TDK_Success;
+        return NumberOfArgumentsMustMatch ? Sema::TDK_TooFewArguments
+                                          : Sema::TDK_Success;
 
       if (Args[ArgIdx].isPackExpansion()) {
         // FIXME: We follow the logic of C++0x [temp.deduct.type]p22 here,
@@ -1917,7 +1930,7 @@ DeduceTemplateArguments(Sema &S,
   return DeduceTemplateArguments(S, TemplateParams,
                                  ParamList.data(), ParamList.size(),
                                  ArgList.data(), ArgList.size(),
-                                 Info, Deduced);
+                                 Info, Deduced, false);
 }
 
 /// \brief Determine whether two template arguments are the same.
@@ -2060,11 +2073,45 @@ static bool
 ConvertDeducedTemplateArgument(Sema &S, NamedDecl *Param,
                                DeducedTemplateArgument Arg,
                                NamedDecl *Template,
-                               QualType NTTPType,
-                               unsigned ArgumentPackIndex,
                                TemplateDeductionInfo &Info,
                                bool InFunctionTemplate,
                                SmallVectorImpl<TemplateArgument> &Output) {
+  // First, for a non-type template parameter type that is
+  // initialized by a declaration, we need the type of the
+  // corresponding non-type template parameter.
+  QualType NTTPType;
+  if (NonTypeTemplateParmDecl *NTTP =
+          dyn_cast<NonTypeTemplateParmDecl>(Param)) {
+    NTTPType = NTTP->getType();
+    if (NTTPType->isDependentType()) {
+      TemplateArgumentList TemplateArgs(TemplateArgumentList::OnStack, Output);
+      NTTPType = S.SubstType(NTTPType,
+                             MultiLevelTemplateArgumentList(TemplateArgs),
+                             NTTP->getLocation(),
+                             NTTP->getDeclName());
+      if (NTTPType.isNull())
+        return true;
+    }
+  }
+
+  auto ConvertArg = [&](DeducedTemplateArgument Arg,
+                        unsigned ArgumentPackIndex) {
+    // Convert the deduced template argument into a template
+    // argument that we can check, almost as if the user had written
+    // the template argument explicitly.
+    TemplateArgumentLoc ArgLoc =
+        getTrivialTemplateArgumentLoc(S, Arg, NTTPType, Info.getLocation());
+
+    // Check the template argument, converting it as necessary.
+    return S.CheckTemplateArgument(
+        Param, ArgLoc, Template, Template->getLocation(),
+        Template->getSourceRange().getEnd(), ArgumentPackIndex, Output,
+        InFunctionTemplate
+            ? (Arg.wasDeducedFromArrayBound() ? Sema::CTAK_DeducedFromArrayBound
+                                              : Sema::CTAK_Deduced)
+            : Sema::CTAK_Specified);
+  };
+
   if (Arg.getKind() == TemplateArgument::Pack) {
     // This is a template argument pack, so check each of its arguments against
     // the template parameter.
@@ -2075,39 +2122,41 @@ ConvertDeducedTemplateArgument(Sema &S, NamedDecl *Param,
       // checking logic has all of the prior template arguments available.
       DeducedTemplateArgument InnerArg(P);
       InnerArg.setDeducedFromArrayBound(Arg.wasDeducedFromArrayBound());
-      if (ConvertDeducedTemplateArgument(S, Param, InnerArg, Template,
-                                         NTTPType, PackedArgsBuilder.size(),
-                                         Info, InFunctionTemplate, Output))
+      assert(InnerArg.getKind() != TemplateArgument::Pack &&
+             "deduced nested pack");
+      if (ConvertArg(InnerArg, PackedArgsBuilder.size()))
         return true;
 
       // Move the converted template argument into our argument pack.
       PackedArgsBuilder.push_back(Output.pop_back_val());
     }
 
+    // If the pack is empty, we still need to substitute into the parameter
+    // itself, in case that substitution fails. For non-type parameters, we did
+    // this above. For type parameters, no substitution is ever required.
+    auto *TTP = dyn_cast<TemplateTemplateParmDecl>(Param);
+    if (TTP && PackedArgsBuilder.empty()) {
+      // Set up a template instantiation context.
+      LocalInstantiationScope Scope(S);
+      Sema::InstantiatingTemplate Inst(S, Template->getLocation(), Template,
+                                       TTP, Output,
+                                       Template->getSourceRange());
+      if (Inst.isInvalid())
+        return true;
+
+      TemplateArgumentList TemplateArgs(TemplateArgumentList::OnStack, Output);
+      if (!S.SubstDecl(TTP, S.CurContext,
+                       MultiLevelTemplateArgumentList(TemplateArgs)))
+        return true;
+    }
+
     // Create the resulting argument pack.
     Output.push_back(
         TemplateArgument::CreatePackCopy(S.Context, PackedArgsBuilder));
     return false;
   }
 
-  // Convert the deduced template argument into a template
-  // argument that we can check, almost as if the user had written
-  // the template argument explicitly.
-  TemplateArgumentLoc ArgLoc = getTrivialTemplateArgumentLoc(S, Arg, NTTPType,
-                                                             Info.getLocation());
-
-  // Check the template argument, converting it as necessary.
-  return S.CheckTemplateArgument(Param, ArgLoc,
-                                 Template,
-                                 Template->getLocation(),
-                                 Template->getSourceRange().getEnd(),
-                                 ArgumentPackIndex,
-                                 Output,
-                                 InFunctionTemplate
-                                  ? (Arg.wasDeducedFromArrayBound()
-                                       ? Sema::CTAK_DeducedFromArrayBound
-                                       : Sema::CTAK_Deduced)
-                                 : Sema::CTAK_Specified);
+  return ConvertArg(Arg, 0);
 }
 
 /// Complete template argument deduction for a class template partial
@@ -2138,47 +2187,19 @@ FinishTemplateArgumentDeduction(Sema &S,
 
     // We have deduced this argument, so it still needs to be
     // checked and converted.
-
-    // First, for a non-type template parameter type that is
-    // initialized by a declaration, we need the type of the
-    // corresponding non-type template parameter.
-    QualType NTTPType;
-    if (NonTypeTemplateParmDecl *NTTP
-                                  = dyn_cast<NonTypeTemplateParmDecl>(Param)) {
-      NTTPType = NTTP->getType();
-      if (NTTPType->isDependentType()) {
-        TemplateArgumentList TemplateArgs(TemplateArgumentList::OnStack,
-                                          Builder.data(), Builder.size());
-        NTTPType = S.SubstType(NTTPType,
-                               MultiLevelTemplateArgumentList(TemplateArgs),
-                               NTTP->getLocation(),
-                               NTTP->getDeclName());
-        if (NTTPType.isNull()) {
-          Info.Param = makeTemplateParameter(Param);
-          // FIXME: These template arguments are temporary. Free them!
-          Info.reset(TemplateArgumentList::CreateCopy(S.Context,
-                                                      Builder.data(),
-                                                      Builder.size()));
-          return Sema::TDK_SubstitutionFailure;
-        }
-      }
-    }
-
     if (ConvertDeducedTemplateArgument(S, Param, Deduced[I],
-                                       Partial, NTTPType, 0, Info, false,
+                                       Partial, Info, false,
                                        Builder)) {
       Info.Param = makeTemplateParameter(Param);
       // FIXME: These template arguments are temporary. Free them!
-      Info.reset(TemplateArgumentList::CreateCopy(S.Context, Builder.data(),
-                                                  Builder.size()));
+      Info.reset(TemplateArgumentList::CreateCopy(S.Context, Builder));
       return Sema::TDK_SubstitutionFailure;
     }
   }
 
   // Form the template argument list from the deduced template arguments.
   TemplateArgumentList *DeducedArgumentList
-    = TemplateArgumentList::CreateCopy(S.Context, Builder.data(),
-                                       Builder.size());
+    = TemplateArgumentList::CreateCopy(S.Context, Builder);
 
   Info.reset(DeducedArgumentList);
 
@@ -2306,43 +2327,18 @@ static Sema::TemplateDeductionResult FinishTemplateArgumentDeduction(
 
     // We have deduced this argument, so it still needs to be
     // checked and converted.
-
-    // First, for a non-type template parameter type that is
-    // initialized by a declaration, we need the type of the
-    // corresponding non-type template parameter.
-    QualType NTTPType;
-    if (NonTypeTemplateParmDecl *NTTP =
-            dyn_cast<NonTypeTemplateParmDecl>(Param)) {
-      NTTPType = NTTP->getType();
-      if (NTTPType->isDependentType()) {
-        TemplateArgumentList TemplateArgs(TemplateArgumentList::OnStack,
-                                          Builder.data(), Builder.size());
-        NTTPType =
-            S.SubstType(NTTPType, MultiLevelTemplateArgumentList(TemplateArgs),
-                        NTTP->getLocation(), NTTP->getDeclName());
-        if (NTTPType.isNull()) {
-          Info.Param = makeTemplateParameter(Param);
-          // FIXME: These template arguments are temporary. Free them!
-          Info.reset(TemplateArgumentList::CreateCopy(S.Context, Builder.data(),
-                                                      Builder.size()));
-          return Sema::TDK_SubstitutionFailure;
-        }
-      }
-    }
-
-    if (ConvertDeducedTemplateArgument(S, Param, Deduced[I], Partial, NTTPType,
-                                       0, Info, false, Builder)) {
+    if (ConvertDeducedTemplateArgument(S, Param, Deduced[I], Partial,
+                                       Info, false, Builder)) {
       Info.Param = makeTemplateParameter(Param);
       // FIXME: These template arguments are temporary. Free them!
-      Info.reset(TemplateArgumentList::CreateCopy(S.Context, Builder.data(),
-                                                  Builder.size()));
+      Info.reset(TemplateArgumentList::CreateCopy(S.Context, Builder));
       return Sema::TDK_SubstitutionFailure;
     }
   }
 
   // Form the template argument list from the deduced template arguments.
   TemplateArgumentList *DeducedArgumentList = TemplateArgumentList::CreateCopy(
-      S.Context, Builder.data(), Builder.size());
+      S.Context, Builder);
 
   Info.reset(DeducedArgumentList);
 
@@ -2488,7 +2484,7 @@ Sema::SubstituteExplicitTemplateArguments(
   if (ExplicitTemplateArgs.size() == 0) {
     // No arguments to substitute; just copy over the parameter types and
     // fill in the function type.
-    for (auto P : Function->params())
+    for (auto P : Function->parameters())
       ParamTypes.push_back(P->getType());
 
     if (FunctionType)
@@ -2533,7 +2529,7 @@ Sema::SubstituteExplicitTemplateArguments(
   // Form the template argument list from the explicitly-specified
   // template arguments.
   TemplateArgumentList *ExplicitArgumentList
-    = TemplateArgumentList::CreateCopy(Context, Builder.data(), Builder.size());
+    = TemplateArgumentList::CreateCopy(Context, Builder);
   Info.reset(ExplicitArgumentList);
 
   // Template argument deduction and the final substitution should be
@@ -2564,15 +2560,17 @@ Sema::SubstituteExplicitTemplateArguments(
   // Isolate our substituted parameters from our caller.
   LocalInstantiationScope InstScope(*this, /*MergeWithOuterScope*/true);
 
+  ExtParameterInfoBuilder ExtParamInfos;
+
   // Instantiate the types of each of the function parameters given the
   // explicitly-specified template arguments. If the function has a trailing
   // return type, substitute it after the arguments to ensure we substitute
   // in lexical order.
   if (Proto->hasTrailingReturn()) {
-    if (SubstParmTypes(Function->getLocation(),
-                       Function->param_begin(), Function->getNumParams(),
+    if (SubstParmTypes(Function->getLocation(), Function->parameters(),
+                       Proto->getExtParameterInfosOrNull(),
                        MultiLevelTemplateArgumentList(*ExplicitArgumentList),
-                       ParamTypes))
+                       ParamTypes, /*params*/ nullptr, ExtParamInfos))
       return TDK_SubstitutionFailure;
   }
   
@@ -2602,21 +2600,23 @@ Sema::SubstituteExplicitTemplateArguments(
     if (ResultType.isNull() || Trap.hasErrorOccurred())
       return TDK_SubstitutionFailure;
   }
-  
+
   // Instantiate the types of each of the function parameters given the
   // explicitly-specified template arguments if we didn't do so earlier.
   if (!Proto->hasTrailingReturn() &&
-      SubstParmTypes(Function->getLocation(),
-                     Function->param_begin(), Function->getNumParams(),
+      SubstParmTypes(Function->getLocation(), Function->parameters(),
+                     Proto->getExtParameterInfosOrNull(),
                      MultiLevelTemplateArgumentList(*ExplicitArgumentList),
-                     ParamTypes))
+                     ParamTypes, /*params*/ nullptr, ExtParamInfos))
     return TDK_SubstitutionFailure;
 
   if (FunctionType) {
+    auto EPI = Proto->getExtProtoInfo();
+    EPI.ExtParameterInfos = ExtParamInfos.getPointerOrNull(ParamTypes.size());
     *FunctionType = BuildFunctionType(ResultType, ParamTypes,
                                       Function->getLocation(),
                                       Function->getDeclName(),
-                                      Proto->getExtProtoInfo());
+                                      EPI);
     if (FunctionType->isNull() || Trap.hasErrorOccurred())
       return TDK_SubstitutionFailure;
   }
@@ -2804,41 +2804,15 @@ Sema::FinishTemplateArgumentDeduction(FunctionTemplateDecl *FunctionTemplate,
         }
         continue;
       }
+
       // We have deduced this argument, so it still needs to be
       // checked and converted.
-
-      // First, for a non-type template parameter type that is
-      // initialized by a declaration, we need the type of the
-      // corresponding non-type template parameter.
-      QualType NTTPType;
-      if (NonTypeTemplateParmDecl *NTTP
-                                = dyn_cast<NonTypeTemplateParmDecl>(Param)) {
-        NTTPType = NTTP->getType();
-        if (NTTPType->isDependentType()) {
-          TemplateArgumentList TemplateArgs(TemplateArgumentList::OnStack,
-                                            Builder.data(), Builder.size());
-          NTTPType = SubstType(NTTPType,
-                               MultiLevelTemplateArgumentList(TemplateArgs),
-                               NTTP->getLocation(),
-                               NTTP->getDeclName());
-          if (NTTPType.isNull()) {
-            Info.Param = makeTemplateParameter(Param);
-            // FIXME: These template arguments are temporary. Free them!
-            Info.reset(TemplateArgumentList::CreateCopy(Context,
-                                                        Builder.data(),
-                                                        Builder.size()));
-            return TDK_SubstitutionFailure;
-          }
-        }
-      }
-
       if (ConvertDeducedTemplateArgument(*this, Param, Deduced[I],
-                                         FunctionTemplate, NTTPType, 0, Info,
+                                         FunctionTemplate, Info,
                                          true, Builder)) {
         Info.Param = makeTemplateParameter(Param);
         // FIXME: These template arguments are temporary. Free them!
-        Info.reset(TemplateArgumentList::CreateCopy(Context, Builder.data(),
-                                                    Builder.size()));
+        Info.reset(TemplateArgumentList::CreateCopy(Context, Builder));
         return TDK_SubstitutionFailure;
       }
 
@@ -2862,11 +2836,21 @@ Sema::FinishTemplateArgumentDeduction(FunctionTemplateDecl *FunctionTemplate,
         Builder.push_back(TemplateArgument(
             llvm::makeArrayRef(ExplicitArgs, NumExplicitArgs)));
 
-        // Forget the partially-substituted pack; it's substitution is now
+        // Forget the partially-substituted pack; its substitution is now
         // complete.
         CurrentInstantiationScope->ResetPartiallySubstitutedPack();
       } else {
-        Builder.push_back(TemplateArgument::getEmptyPack());
+        // Go through the motions of checking the empty argument pack against
+        // the parameter pack.
+        DeducedTemplateArgument DeducedPack(TemplateArgument::getEmptyPack());
+        if (ConvertDeducedTemplateArgument(*this, Param, DeducedPack,
+                                           FunctionTemplate, Info, true,
+                                           Builder)) {
+          Info.Param = makeTemplateParameter(Param);
+          // FIXME: These template arguments are temporary. Free them!
+          Info.reset(TemplateArgumentList::CreateCopy(Context, Builder));
+          return TDK_SubstitutionFailure;
+        }
       }
       continue;
     }
@@ -2884,8 +2868,7 @@ Sema::FinishTemplateArgumentDeduction(FunctionTemplateDecl *FunctionTemplate,
     if (DefArg.getArgument().isNull()) {
       Info.Param = makeTemplateParameter(
                          const_cast<NamedDecl *>(TemplateParams->getParam(I)));
-      Info.reset(TemplateArgumentList::CreateCopy(Context, Builder.data(),
-                                                  Builder.size()));
+      Info.reset(TemplateArgumentList::CreateCopy(Context, Builder));
       if (PartialOverloading) break;
 
       return HasDefaultArg ? TDK_SubstitutionFailure : TDK_Incomplete;
@@ -2901,8 +2884,7 @@ Sema::FinishTemplateArgumentDeduction(FunctionTemplateDecl *FunctionTemplate,
       Info.Param = makeTemplateParameter(
                          const_cast<NamedDecl *>(TemplateParams->getParam(I)));
       // FIXME: These template arguments are temporary. Free them!
-      Info.reset(TemplateArgumentList::CreateCopy(Context, Builder.data(),
-                                                  Builder.size()));
+      Info.reset(TemplateArgumentList::CreateCopy(Context, Builder));
       return TDK_SubstitutionFailure;
     }
 
@@ -2911,7 +2893,7 @@ Sema::FinishTemplateArgumentDeduction(FunctionTemplateDecl *FunctionTemplate,
 
   // Form the template argument list from the deduced template arguments.
   TemplateArgumentList *DeducedArgumentList
-    = TemplateArgumentList::CreateCopy(Context, Builder.data(), Builder.size());
+    = TemplateArgumentList::CreateCopy(Context, Builder);
   Info.reset(DeducedArgumentList);
 
   // Substitute the deduced template arguments into the function template
@@ -3036,6 +3018,11 @@ ResolveOverloadForDeduction(Sema &S, TemplateParameterList *TemplateParams,
         return GetTypeOfFunction(S, R, ExplicitSpec);
     }
 
+    DeclAccessPair DAP;
+    if (FunctionDecl *Viable =
+            S.resolveAddressOfOnlyViableOverloadCandidate(Arg, DAP))
+      return GetTypeOfFunction(S, R, Viable);
+
     return QualType();
   }
   
@@ -4609,11 +4596,9 @@ Sema::getMoreSpecializedPartialSpecialization(
   TemplateName Name(PS1->getSpecializedTemplate());
   TemplateName CanonTemplate = Context.getCanonicalTemplateName(Name);
   QualType PT1 = Context.getTemplateSpecializationType(
-      CanonTemplate, PS1->getTemplateArgs().data(),
-      PS1->getTemplateArgs().size());
+      CanonTemplate, PS1->getTemplateArgs().asArray());
   QualType PT2 = Context.getTemplateSpecializationType(
-      CanonTemplate, PS2->getTemplateArgs().data(),
-      PS2->getTemplateArgs().size());
+      CanonTemplate, PS2->getTemplateArgs().asArray());
 
   // Determine whether PS1 is at least as specialized as PS2
   Deduced.resize(PS2->getTemplateParameters()->size());
diff --git a/contrib/llvm/tools/clang/lib/Sema/SemaTemplateInstantiate.cpp b/contrib/llvm/tools/clang/lib/Sema/SemaTemplateInstantiate.cpp
index fb7fc109d2e9..65a5633bf0d5 100644
--- a/contrib/llvm/tools/clang/lib/Sema/SemaTemplateInstantiate.cpp
+++ b/contrib/llvm/tools/clang/lib/Sema/SemaTemplateInstantiate.cpp
@@ -21,6 +21,7 @@
 #include "clang/Sema/DeclSpec.h"
 #include "clang/Sema/Initialization.h"
 #include "clang/Sema/Lookup.h"
+#include "clang/Sema/PrettyDeclStackTrace.h"
 #include "clang/Sema/Template.h"
 #include "clang/Sema/TemplateDeduction.h"
 
@@ -224,6 +225,10 @@ Sema::InstantiatingTemplate::InstantiatingTemplate(
     Inst.NumTemplateArgs = TemplateArgs.size();
     Inst.DeductionInfo = DeductionInfo;
     Inst.InstantiationRange = InstantiationRange;
+    AlreadyInstantiating =
+        !SemaRef.InstantiatingSpecializations
+             .insert(std::make_pair(Inst.Entity->getCanonicalDecl(), Inst.Kind))
+             .second;
     SemaRef.InNonInstantiationSFINAEContext = false;
     SemaRef.ActiveTemplateInstantiations.push_back(Inst);
     if (!Inst.isInstantiationRecord())
@@ -246,13 +251,14 @@ Sema::InstantiatingTemplate::InstantiatingTemplate(
           PointOfInstantiation, InstantiationRange, Entity) {}
 
 Sema::InstantiatingTemplate::InstantiatingTemplate(
-    Sema &SemaRef, SourceLocation PointOfInstantiation, TemplateDecl *Template,
-    ArrayRef<TemplateArgument> TemplateArgs, SourceRange InstantiationRange)
+    Sema &SemaRef, SourceLocation PointOfInstantiation, TemplateParameter Param,
+    TemplateDecl *Template, ArrayRef<TemplateArgument> TemplateArgs,
+    SourceRange InstantiationRange)
     : InstantiatingTemplate(
           SemaRef,
           ActiveTemplateInstantiation::DefaultTemplateArgumentInstantiation,
-          PointOfInstantiation, InstantiationRange, Template, nullptr,
-          TemplateArgs) {}
+          PointOfInstantiation, InstantiationRange, getAsNamedDecl(Param),
+          Template, TemplateArgs) {}
 
 Sema::InstantiatingTemplate::InstantiatingTemplate(
     Sema &SemaRef, SourceLocation PointOfInstantiation,
@@ -262,7 +268,11 @@ Sema::InstantiatingTemplate::InstantiatingTemplate(
     sema::TemplateDeductionInfo &DeductionInfo, SourceRange InstantiationRange)
     : InstantiatingTemplate(SemaRef, Kind, PointOfInstantiation,
                             InstantiationRange, FunctionTemplate, nullptr,
-                            TemplateArgs, &DeductionInfo) {}
+                            TemplateArgs, &DeductionInfo) {
+  assert(
+    Kind == ActiveTemplateInstantiation::ExplicitTemplateArgumentSubstitution ||
+    Kind == ActiveTemplateInstantiation::DeducedTemplateArgumentSubstitution);
+}
 
 Sema::InstantiatingTemplate::InstantiatingTemplate(
     Sema &SemaRef, SourceLocation PointOfInstantiation,
@@ -326,7 +336,8 @@ Sema::InstantiatingTemplate::InstantiatingTemplate(
 
 void Sema::InstantiatingTemplate::Clear() {
   if (!Invalid) {
-    if (!SemaRef.ActiveTemplateInstantiations.back().isInstantiationRecord()) {
+    auto &Active = SemaRef.ActiveTemplateInstantiations.back();
+    if (!Active.isInstantiationRecord()) {
       assert(SemaRef.NonInstantiationEntries > 0);
       --SemaRef.NonInstantiationEntries;
     }
@@ -344,6 +355,10 @@ void Sema::InstantiatingTemplate::Clear() {
       SemaRef.ActiveTemplateInstantiationLookupModules.pop_back();
     }
 
+    if (!AlreadyInstantiating)
+      SemaRef.InstantiatingSpecializations.erase(
+          std::make_pair(Active.Entity, Active.Kind));
+
     SemaRef.ActiveTemplateInstantiations.pop_back();
     Invalid = true;
   }
@@ -442,14 +457,12 @@ void Sema::PrintInstantiationStack() {
     }
 
     case ActiveTemplateInstantiation::DefaultTemplateArgumentInstantiation: {
-      TemplateDecl *Template = cast<TemplateDecl>(Active->Entity);
+      TemplateDecl *Template = cast<TemplateDecl>(Active->Template);
       SmallVector<char, 128> TemplateArgsStr;
       llvm::raw_svector_ostream OS(TemplateArgsStr);
       Template->printName(OS);
-      TemplateSpecializationType::PrintTemplateArgumentList(OS,
-                                                         Active->TemplateArgs,
-                                                      Active->NumTemplateArgs,
-                                                      getPrintingPolicy());
+      TemplateSpecializationType::PrintTemplateArgumentList(
+          OS, Active->template_arguments(), getPrintingPolicy());
       Diags.Report(Active->PointOfInstantiation,
                    diag::note_default_arg_instantiation_here)
         << OS.str()
@@ -500,10 +513,8 @@ void Sema::PrintInstantiationStack() {
       SmallVector<char, 128> TemplateArgsStr;
       llvm::raw_svector_ostream OS(TemplateArgsStr);
       FD->printName(OS);
-      TemplateSpecializationType::PrintTemplateArgumentList(OS,
-                                                         Active->TemplateArgs,
-                                                      Active->NumTemplateArgs,
-                                                      getPrintingPolicy());
+      TemplateSpecializationType::PrintTemplateArgumentList(
+          OS, Active->template_arguments(), getPrintingPolicy());
       Diags.Report(Active->PointOfInstantiation,
                    diag::note_default_function_arg_instantiation_here)
         << OS.str()
@@ -729,6 +740,11 @@ namespace {
       }
       
       SemaRef.CurrentInstantiationScope->InstantiatedLocal(Old, New);
+
+      // We recreated a local declaration, but not by instantiating it. There
+      // may be pending dependent diagnostics to produce.
+      if (auto *DC = dyn_cast<DeclContext>(Old))
+        SemaRef.PerformDependentDiagnostics(DC, TemplateArgs);
     }
     
     /// \brief Transform the definition of the given declaration by
@@ -1512,7 +1528,7 @@ QualType Sema::SubstType(QualType T,
 }
 
 static bool NeedsInstantiationAsFunctionType(TypeSourceInfo *T) {
-  if (T->getType()->isInstantiationDependentType() || 
+  if (T->getType()->isInstantiationDependentType() ||
       T->getType()->isVariablyModifiedType())
     return true;
 
@@ -1521,23 +1537,13 @@ static bool NeedsInstantiationAsFunctionType(TypeSourceInfo *T) {
     return false;
 
   FunctionProtoTypeLoc FP = TL.castAs<FunctionProtoTypeLoc>();
-  for (unsigned I = 0, E = FP.getNumParams(); I != E; ++I) {
-    ParmVarDecl *P = FP.getParam(I);
-
+  for (ParmVarDecl *P : FP.getParams()) {
     // This must be synthesized from a typedef.
     if (!P) continue;
 
-    // The parameter's type as written might be dependent even if the
-    // decayed type was not dependent.
-    if (TypeSourceInfo *TSInfo = P->getTypeSourceInfo())
-      if (TSInfo->getType()->isInstantiationDependentType())
-        return true;
-
-    // TODO: currently we always rebuild expressions.  When we
-    // properly get lazier about this, we should use the same
-    // logic to avoid rebuilding prototypes here.
-    if (P->hasDefaultArg())
-      return true;
+    // If there are any parameters, a new TypeSourceInfo that refers to the
+    // instantiated parameters must be built.
+    return true;
   }
 
   return false;
@@ -1556,7 +1562,7 @@ TypeSourceInfo *Sema::SubstFunctionDeclType(TypeSourceInfo *T,
   assert(!ActiveTemplateInstantiations.empty() &&
          "Cannot perform an instantiation without some context on the "
          "instantiation stack");
-  
+
   if (!NeedsInstantiationAsFunctionType(T))
     return T;
 
@@ -1718,20 +1724,21 @@ ParmVarDecl *Sema::SubstParmVarDecl(ParmVarDecl *OldParm,
 /// \brief Substitute the given template arguments into the given set of
 /// parameters, producing the set of parameter types that would be generated
 /// from such a substitution.
-bool Sema::SubstParmTypes(SourceLocation Loc, 
-                          ParmVarDecl **Params, unsigned NumParams,
-                          const MultiLevelTemplateArgumentList &TemplateArgs,
-                          SmallVectorImpl<QualType> &ParamTypes,
-                          SmallVectorImpl<ParmVarDecl *> *OutParams) {
+bool Sema::SubstParmTypes(
+    SourceLocation Loc, ArrayRef<ParmVarDecl *> Params,
+    const FunctionProtoType::ExtParameterInfo *ExtParamInfos,
+    const MultiLevelTemplateArgumentList &TemplateArgs,
+    SmallVectorImpl<QualType> &ParamTypes,
+    SmallVectorImpl<ParmVarDecl *> *OutParams,
+    ExtParameterInfoBuilder &ParamInfos) {
   assert(!ActiveTemplateInstantiations.empty() &&
          "Cannot perform an instantiation without some context on the "
          "instantiation stack");
   
   TemplateInstantiator Instantiator(*this, TemplateArgs, Loc, 
                                     DeclarationName());
-  return Instantiator.TransformFunctionTypeParams(Loc, Params, NumParams,
-                                                  nullptr, ParamTypes,
-                                                  OutParams);
+  return Instantiator.TransformFunctionTypeParams(
+      Loc, Params, nullptr, ExtParamInfos, ParamTypes, OutParams, ParamInfos);
 }
 
 /// \brief Perform substitution on the base class specifiers of the
@@ -1861,8 +1868,19 @@ static bool DiagnoseUninstantiableTemplate(Sema &S,
                                            TagDecl *PatternDef,
                                            TemplateSpecializationKind TSK,
                                            bool Complain = true) {
-  if (PatternDef && !PatternDef->isBeingDefined())
+  if (PatternDef && !PatternDef->isBeingDefined()) {
+    NamedDecl *SuggestedDef = nullptr;
+    if (!S.hasVisibleDefinition(PatternDef, &SuggestedDef,
+                                /*OnlyNeedComplete*/false)) {
+      // If we're allowed to diagnose this and recover, do so.
+      bool Recover = Complain && !S.isSFINAEContext();
+      if (Complain)
+        S.diagnoseMissingImport(PointOfInstantiation, SuggestedDef,
+                                Sema::MissingImportKind::Definition, Recover);
+      return !Recover;
+    }
     return false;
+  }
 
   if (!Complain || (PatternDef && PatternDef->isInvalidDecl())) {
     // Say nothing
@@ -1946,6 +1964,9 @@ Sema::InstantiateClass(SourceLocation PointOfInstantiation,
   InstantiatingTemplate Inst(*this, PointOfInstantiation, Instantiation);
   if (Inst.isInvalid())
     return true;
+  assert(!Inst.isAlreadyInstantiating() && "should have been caught by caller");
+  PrettyDeclStackTraceEntry CrashInfo(*this, Instantiation, SourceLocation(),
+                                      "instantiating class definition");
 
   // Enter the scope of this instantiation. We don't use
   // PushDeclContext because we don't have a scope.
@@ -1959,6 +1980,13 @@ Sema::InstantiateClass(SourceLocation PointOfInstantiation,
   bool MergeWithParentScope = !Instantiation->isDefinedOutsideFunctionOrMethod();
   LocalInstantiationScope Scope(*this, MergeWithParentScope);
 
+  // All dllexported classes created during instantiation should be fully
+  // emitted after instantiation completes. We may not be ready to emit any
+  // delayed classes already on the stack, so save them away and put them back
+  // later.
+  decltype(DelayedDllExportClasses) ExportedClasses;
+  std::swap(ExportedClasses, DelayedDllExportClasses);
+
   // Pull attributes from the pattern onto the instantiation.
   InstantiateAttrs(TemplateArgs, Pattern, Instantiation);
 
@@ -2044,6 +2072,9 @@ Sema::InstantiateClass(SourceLocation PointOfInstantiation,
   // default arg exprs for default constructors if necessary now.
   ActOnFinishCXXNonNestedClass(Instantiation);
 
+  // Put back the delayed exported classes that we moved out of the way.
+  std::swap(ExportedClasses, DelayedDllExportClasses);
+
   // Instantiate late parsed attributes, and attach them to their decls.
   // See Sema::InstantiateAttrs
   for (LateInstantiatedAttrVec::iterator I = LateAttrs.begin(),
@@ -2074,7 +2105,7 @@ Sema::InstantiateClass(SourceLocation PointOfInstantiation,
   if (TSK == TSK_ImplicitInstantiation) {
     Instantiation->setLocation(Pattern->getLocation());
     Instantiation->setLocStart(Pattern->getInnerLocStart());
-    Instantiation->setRBraceLoc(Pattern->getRBraceLoc());
+    Instantiation->setBraceRange(Pattern->getBraceRange());
   }
 
   if (!Instantiation->isInvalidDecl()) {
@@ -2159,6 +2190,10 @@ bool Sema::InstantiateEnum(SourceLocation PointOfInstantiation,
   InstantiatingTemplate Inst(*this, PointOfInstantiation, Instantiation);
   if (Inst.isInvalid())
     return true;
+  if (Inst.isAlreadyInstantiating())
+    return false;
+  PrettyDeclStackTraceEntry CrashInfo(*this, Instantiation, SourceLocation(),
+                                      "instantiating enum definition");
 
   // The instantiation is visible here, even if it was first declared in an
   // unimported module.
@@ -2231,6 +2266,14 @@ bool Sema::InstantiateInClassInitializer(
   InstantiatingTemplate Inst(*this, PointOfInstantiation, Instantiation);
   if (Inst.isInvalid())
     return true;
+  if (Inst.isAlreadyInstantiating()) {
+    // Error out if we hit an instantiation cycle for this initializer.
+    Diag(PointOfInstantiation, diag::err_in_class_initializer_cycle)
+      << Instantiation;
+    return true;
+  }
+  PrettyDeclStackTraceEntry CrashInfo(*this, Instantiation, SourceLocation(),
+                                      "instantiating default member init");
 
   // Enter the scope of this instantiation. We don't use PushDeclContext because
   // we don't have a scope.
@@ -2302,8 +2345,9 @@ bool Sema::InstantiateClassTemplateSpecialization(
                                     Info)) {
       // Store the failed-deduction information for use in diagnostics, later.
       // TODO: Actually use the failed-deduction info?
-      FailedCandidates.addCandidate()
-          .set(Partial, MakeDeductionFailureInfo(Context, Result, Info));
+      FailedCandidates.addCandidate().set(
+          DeclAccessPair::make(Template, AS_public), Partial,
+          MakeDeductionFailureInfo(Context, Result, Info));
       (void)Result;
     } else {
       Matched.push_back(PartialSpecMatchResult());
@@ -2495,8 +2539,7 @@ Sema::InstantiateClassMembers(SourceLocation PointOfInstantiation,
           //   specialization and is only an explicit instantiation definition 
           //   of members whose definition is visible at the point of 
           //   instantiation.
-          if (!Var->getInstantiatedFromStaticDataMember()
-                                                     ->getOutOfLineDefinition())
+          if (!Var->getInstantiatedFromStaticDataMember()->getDefinition())
             continue;
           
           Var->setTemplateSpecializationKind(TSK, PointOfInstantiation);
@@ -2522,6 +2565,13 @@ Sema::InstantiateClassMembers(SourceLocation PointOfInstantiation,
                                                 == TSK_ExplicitSpecialization)
         continue;
 
+      if (Context.getTargetInfo().getCXXABI().isMicrosoft() &&
+          TSK == TSK_ExplicitInstantiationDeclaration) {
+        // In MSVC mode, explicit instantiation decl of the outer class doesn't
+        // affect the inner class.
+        continue;
+      }
+
       if (CheckSpecializationInstantiationRedecl(PointOfInstantiation, TSK, 
                                                  Record, 
                                         MSInfo->getTemplateSpecializationKind(),
@@ -2583,7 +2633,7 @@ Sema::InstantiateClassMembers(SourceLocation PointOfInstantiation,
       if (Enum->getDefinition())
         continue;
 
-      EnumDecl *Pattern = Enum->getInstantiatedFromMemberEnum();
+      EnumDecl *Pattern = Enum->getTemplateInstantiationPattern();
       assert(Pattern && "Missing instantiated-from-template information");
 
       if (TSK == TSK_ExplicitInstantiationDefinition) {
@@ -2603,8 +2653,7 @@ Sema::InstantiateClassMembers(SourceLocation PointOfInstantiation,
             Instantiation->getTemplateInstantiationPattern();
         DeclContext::lookup_result Lookup =
             ClassPattern->lookup(Field->getDeclName());
-        assert(Lookup.size() == 1);
-        FieldDecl *Pattern = cast<FieldDecl>(Lookup[0]);
+        FieldDecl *Pattern = cast<FieldDecl>(Lookup.front());
         InstantiateInClassInitializer(PointOfInstantiation, Field, Pattern,
                                       TemplateArgs);
       }
diff --git a/contrib/llvm/tools/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp b/contrib/llvm/tools/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
index 7a452af77839..dd3748fb5337 100644
--- a/contrib/llvm/tools/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
+++ b/contrib/llvm/tools/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
@@ -227,6 +227,86 @@ static void instantiateDependentCUDALaunchBoundsAttr(
                         Attr.getSpellingListIndex());
 }
 
+static void
+instantiateDependentModeAttr(Sema &S,
+                             const MultiLevelTemplateArgumentList &TemplateArgs,
+                             const ModeAttr &Attr, Decl *New) {
+  S.AddModeAttr(Attr.getRange(), New, Attr.getMode(),
+                Attr.getSpellingListIndex(), /*InInstantiation=*/true);
+}
+
+/// Instantiation of 'declare simd' attribute and its arguments.
+static void instantiateOMPDeclareSimdDeclAttr(
+    Sema &S, const MultiLevelTemplateArgumentList &TemplateArgs,
+    const OMPDeclareSimdDeclAttr &Attr, Decl *New) {
+  // Allow 'this' in clauses with varlists.
+  if (auto *FTD = dyn_cast<FunctionTemplateDecl>(New))
+    New = FTD->getTemplatedDecl();
+  auto *FD = cast<FunctionDecl>(New);
+  auto *ThisContext = dyn_cast_or_null<CXXRecordDecl>(FD->getDeclContext());
+  SmallVector<Expr *, 4> Uniforms, Aligneds, Alignments, Linears, Steps;
+  SmallVector<unsigned, 4> LinModifiers;
+
+  auto &&Subst = [&](Expr *E) -> ExprResult {
+    if (auto *DRE = dyn_cast<DeclRefExpr>(E->IgnoreParenImpCasts()))
+      if (auto *PVD = dyn_cast<ParmVarDecl>(DRE->getDecl())) {
+        Sema::ContextRAII SavedContext(S, FD);
+        LocalInstantiationScope Local(S);
+        if (FD->getNumParams() > PVD->getFunctionScopeIndex())
+          Local.InstantiatedLocal(
+              PVD, FD->getParamDecl(PVD->getFunctionScopeIndex()));
+        return S.SubstExpr(E, TemplateArgs);
+      }
+    Sema::CXXThisScopeRAII ThisScope(S, ThisContext, /*TypeQuals=*/0,
+                                     FD->isCXXInstanceMember());
+    return S.SubstExpr(E, TemplateArgs);
+  };
+
+  ExprResult Simdlen;
+  if (auto *E = Attr.getSimdlen())
+    Simdlen = Subst(E);
+
+  if (Attr.uniforms_size() > 0) {
+    for(auto *E : Attr.uniforms()) {
+      ExprResult Inst = Subst(E);
+      if (Inst.isInvalid())
+        continue;
+      Uniforms.push_back(Inst.get());
+    }
+  }
+
+  auto AI = Attr.alignments_begin();
+  for (auto *E : Attr.aligneds()) {
+    ExprResult Inst = Subst(E);
+    if (Inst.isInvalid())
+      continue;
+    Aligneds.push_back(Inst.get());
+    Inst = ExprEmpty();
+    if (*AI)
+      Inst = S.SubstExpr(*AI, TemplateArgs);
+    Alignments.push_back(Inst.get());
+    ++AI;
+  }
+
+  auto SI = Attr.steps_begin();
+  for (auto *E : Attr.linears()) {
+    ExprResult Inst = Subst(E);
+    if (Inst.isInvalid())
+      continue;
+    Linears.push_back(Inst.get());
+    Inst = ExprEmpty();
+    if (*SI)
+      Inst = S.SubstExpr(*SI, TemplateArgs);
+    Steps.push_back(Inst.get());
+    ++SI;
+  }
+  LinModifiers.append(Attr.modifiers_begin(), Attr.modifiers_end());
+  (void)S.ActOnOpenMPDeclareSimdDirective(
+      S.ConvertDeclToDeclGroup(New), Attr.getBranchState(), Simdlen.get(),
+      Uniforms, Aligneds, Alignments, Linears, LinModifiers, Steps,
+      Attr.getRange());
+}
+
 void Sema::InstantiateAttrs(const MultiLevelTemplateArgumentList &TemplateArgs,
                             const Decl *Tmpl, Decl *New,
                             LateInstantiatedAttrVec *LateAttrs,
@@ -265,6 +345,16 @@ void Sema::InstantiateAttrs(const MultiLevelTemplateArgumentList &TemplateArgs,
       continue;
     }
 
+    if (const ModeAttr *Mode = dyn_cast<ModeAttr>(TmplAttr)) {
+      instantiateDependentModeAttr(*this, TemplateArgs, *Mode, New);
+      continue;
+    }
+
+    if (const auto *OMPAttr = dyn_cast<OMPDeclareSimdDeclAttr>(TmplAttr)) {
+      instantiateOMPDeclareSimdDeclAttr(*this, TemplateArgs, *OMPAttr, New);
+      continue;
+    }
+
     // Existing DLL attribute on the instantiation takes precedence.
     if (TmplAttr->getKind() == attr::DLLExport ||
         TmplAttr->getKind() == attr::DLLImport) {
@@ -273,6 +363,20 @@ void Sema::InstantiateAttrs(const MultiLevelTemplateArgumentList &TemplateArgs,
       }
     }
 
+    if (auto ABIAttr = dyn_cast<ParameterABIAttr>(TmplAttr)) {
+      AddParameterABIAttr(ABIAttr->getRange(), New, ABIAttr->getABI(),
+                          ABIAttr->getSpellingListIndex());
+      continue;
+    }
+
+    if (isa<NSConsumedAttr>(TmplAttr) || isa<CFConsumedAttr>(TmplAttr)) {
+      AddNSConsumedAttr(TmplAttr->getRange(), New,
+                        TmplAttr->getSpellingListIndex(),
+                        isa<NSConsumedAttr>(TmplAttr),
+                        /*template instantiation*/ true);
+      continue;
+    }
+
     assert(!TmplAttr->isPackExpansion());
     if (TmplAttr->isLateParsed() && LateAttrs) {
       // Late parsed attributes must be instantiated and attached after the
@@ -321,6 +425,16 @@ TemplateDeclInstantiator::VisitTranslationUnitDecl(TranslationUnitDecl *D) {
 }
 
 Decl *
+TemplateDeclInstantiator::VisitPragmaCommentDecl(PragmaCommentDecl *D) {
+  llvm_unreachable("pragma comment cannot be instantiated");
+}
+
+Decl *TemplateDeclInstantiator::VisitPragmaDetectMismatchDecl(
+    PragmaDetectMismatchDecl *D) {
+  llvm_unreachable("pragma comment cannot be instantiated");
+}
+
+Decl *
 TemplateDeclInstantiator::VisitExternCContextDecl(ExternCContextDecl *D) {
   llvm_unreachable("extern \"C\" context cannot be instantiated");
 }
@@ -491,13 +605,6 @@ Decl *TemplateDeclInstantiator::VisitVarDecl(VarDecl *D) {
 Decl *TemplateDeclInstantiator::VisitVarDecl(VarDecl *D,
                                              bool InstantiatingVarTemplate) {
 
-  // If this is the variable for an anonymous struct or union,
-  // instantiate the anonymous struct/union type first.
-  if (const RecordType *RecordTy = D->getType()->getAs<RecordType>())
-    if (RecordTy->getDecl()->isAnonymousStructOrUnion())
-      if (!VisitCXXRecordDecl(cast<CXXRecordDecl>(RecordTy->getDecl())))
-        return nullptr;
-
   // Do substitution on the type of the declaration
   TypeSourceInfo *DI = SemaRef.SubstType(D->getTypeSourceInfo(),
                                          TemplateArgs,
@@ -696,7 +803,7 @@ Decl *TemplateDeclInstantiator::VisitIndirectFieldDecl(IndirectFieldDecl *D) {
   QualType T = cast<FieldDecl>(NamedChain[i-1])->getType();
   IndirectFieldDecl *IndirectField = IndirectFieldDecl::Create(
       SemaRef.Context, Owner, D->getLocation(), D->getIdentifier(), T,
-      NamedChain, D->getChainingSize());
+      {NamedChain, D->getChainingSize()});
 
   for (const auto *Attr : D->attrs())
     IndirectField->addAttr(Attr->clone(SemaRef.Context));
@@ -911,9 +1018,7 @@ void TemplateDeclInstantiator::InstantiateEnumDefinition(
     }
   }
 
-  // FIXME: Fixup LBraceLoc
-  SemaRef.ActOnEnumBody(Enum->getLocation(), SourceLocation(),
-                        Enum->getRBraceLoc(), Enum,
+  SemaRef.ActOnEnumBody(Enum->getLocation(), Enum->getBraceRange(), Enum,
                         Enumerators,
                         nullptr, nullptr);
 }
@@ -1499,8 +1604,7 @@ Decl *TemplateDeclInstantiator::VisitFunctionDecl(FunctionDecl *D,
     ArrayRef<TemplateArgument> Innermost = TemplateArgs.getInnermost();
     Function->setFunctionTemplateSpecialization(FunctionTemplate,
                             TemplateArgumentList::CreateCopy(SemaRef.Context,
-                                                             Innermost.begin(),
-                                                             Innermost.size()),
+                                                             Innermost),
                                                 /*InsertPos=*/nullptr);
   } else if (isFriend) {
     // Note, we need this connection even if the friend doesn't have a body.
@@ -1736,36 +1840,6 @@ TemplateDeclInstantiator::VisitCXXMethodDecl(CXXMethodDecl *D,
                                         Constructor->isExplicit(),
                                         Constructor->isInlineSpecified(),
                                         false, Constructor->isConstexpr());
-
-    // Claim that the instantiation of a constructor or constructor template
-    // inherits the same constructor that the template does.
-    if (CXXConstructorDecl *Inh = const_cast<CXXConstructorDecl *>(
-            Constructor->getInheritedConstructor())) {
-      // If we're instantiating a specialization of a function template, our
-      // "inherited constructor" will actually itself be a function template.
-      // Instantiate a declaration of it, too.
-      if (FunctionTemplate) {
-        assert(!TemplateParams && Inh->getDescribedFunctionTemplate() &&
-               !Inh->getParent()->isDependentContext() &&
-               "inheriting constructor template in dependent context?");
-        Sema::InstantiatingTemplate Inst(SemaRef, Constructor->getLocation(),
-                                         Inh);
-        if (Inst.isInvalid())
-          return nullptr;
-        Sema::ContextRAII SavedContext(SemaRef, Inh->getDeclContext());
-        LocalInstantiationScope LocalScope(SemaRef);
-
-        // Use the same template arguments that we deduced for the inheriting
-        // constructor. There's no way they could be deduced differently.
-        MultiLevelTemplateArgumentList InheritedArgs;
-        InheritedArgs.addOuterTemplateArguments(TemplateArgs.getInnermost());
-        Inh = cast_or_null<CXXConstructorDecl>(
-            SemaRef.SubstDecl(Inh, Inh->getDeclContext(), InheritedArgs));
-        if (!Inh)
-          return nullptr;
-      }
-      cast<CXXConstructorDecl>(Method)->setInheritedConstructor(Inh);
-    }
   } else if (CXXDestructorDecl *Destructor = dyn_cast<CXXDestructorDecl>(D)) {
     Method = CXXDestructorDecl::Create(SemaRef.Context, Record,
                                        StartLoc, NameInfo, T, TInfo,
@@ -1821,8 +1895,7 @@ TemplateDeclInstantiator::VisitCXXMethodDecl(CXXMethodDecl *D,
     ArrayRef<TemplateArgument> Innermost = TemplateArgs.getInnermost();
     Method->setFunctionTemplateSpecialization(FunctionTemplate,
                          TemplateArgumentList::CreateCopy(SemaRef.Context,
-                                                          Innermost.begin(),
-                                                          Innermost.size()),
+                                                          Innermost),
                                               /*InsertPos=*/nullptr);
   } else if (!isFriend) {
     // Record that this is an instantiation of a member function.
@@ -2080,16 +2153,11 @@ Decl *TemplateDeclInstantiator::VisitNonTypeTemplateParmDecl(
 
   NonTypeTemplateParmDecl *Param;
   if (IsExpandedParameterPack)
-    Param = NonTypeTemplateParmDecl::Create(SemaRef.Context, Owner,
-                                            D->getInnerLocStart(),
-                                            D->getLocation(),
-                                    D->getDepth() - TemplateArgs.getNumLevels(),
-                                            D->getPosition(),
-                                            D->getIdentifier(), T,
-                                            DI,
-                                            ExpandedParameterPackTypes.data(),
-                                            ExpandedParameterPackTypes.size(),
-                                    ExpandedParameterPackTypesAsWritten.data());
+    Param = NonTypeTemplateParmDecl::Create(
+        SemaRef.Context, Owner, D->getInnerLocStart(), D->getLocation(),
+        D->getDepth() - TemplateArgs.getNumLevels(), D->getPosition(),
+        D->getIdentifier(), T, DI, ExpandedParameterPackTypes,
+        ExpandedParameterPackTypesAsWritten);
   else
     Param = NonTypeTemplateParmDecl::Create(SemaRef.Context, Owner,
                                             D->getInnerLocStart(),
@@ -2104,6 +2172,8 @@ Decl *TemplateDeclInstantiator::VisitNonTypeTemplateParmDecl(
     Param->setInvalidDecl();
 
   if (D->hasDefaultArgument() && !D->defaultArgumentWasInherited()) {
+    EnterExpressionEvaluationContext ConstantEvaluated(SemaRef,
+                                                       Sema::ConstantEvaluated);
     ExprResult Value = SemaRef.SubstExpr(D->getDefaultArgument(), TemplateArgs);
     if (!Value.isInvalid())
       Param->setDefaultArgument(Value.get());
@@ -2289,9 +2359,14 @@ Decl *TemplateDeclInstantiator::VisitUsingDecl(UsingDecl *D) {
   if (!QualifierLoc)
     return nullptr;
 
-  // The name info is non-dependent, so no transformation
-  // is required.
+  // For an inheriting constructor declaration, the name of the using
+  // declaration is the name of a constructor in this class, not in the
+  // base class.
   DeclarationNameInfo NameInfo = D->getNameInfo();
+  if (NameInfo.getName().getNameKind() == DeclarationName::CXXConstructorName)
+    if (auto *RD = dyn_cast<CXXRecordDecl>(SemaRef.CurContext))
+      NameInfo.setName(SemaRef.Context.DeclarationNames.getCXXConstructorName(
+          SemaRef.Context.getCanonicalType(SemaRef.Context.getRecordType(RD))));
 
   // We only need to do redeclaration lookups if we're in a class
   // scope (in fact, it's not really even possible in non-class
@@ -2334,18 +2409,23 @@ Decl *TemplateDeclInstantiator::VisitUsingDecl(UsingDecl *D) {
   if (NewUD->isInvalidDecl())
     return NewUD;
 
-  if (NameInfo.getName().getNameKind() == DeclarationName::CXXConstructorName) {
+  if (NameInfo.getName().getNameKind() == DeclarationName::CXXConstructorName)
     SemaRef.CheckInheritingConstructorUsingDecl(NewUD);
-    return NewUD;
-  }
 
   bool isFunctionScope = Owner->isFunctionOrMethod();
 
   // Process the shadow decls.
   for (auto *Shadow : D->shadows()) {
+    // FIXME: UsingShadowDecl doesn't preserve its immediate target, so
+    // reconstruct it in the case where it matters.
+    NamedDecl *OldTarget = Shadow->getTargetDecl();
+    if (auto *CUSD = dyn_cast<ConstructorUsingShadowDecl>(Shadow))
+      if (auto *BaseShadow = CUSD->getNominatedBaseClassShadowDecl())
+        OldTarget = BaseShadow;
+
     NamedDecl *InstTarget =
         cast_or_null<NamedDecl>(SemaRef.FindInstantiatedDecl(
-            Shadow->getLocation(), Shadow->getTargetDecl(), TemplateArgs));
+            Shadow->getLocation(), OldTarget, TemplateArgs));
     if (!InstTarget)
       return nullptr;
 
@@ -2376,6 +2456,12 @@ Decl *TemplateDeclInstantiator::VisitUsingShadowDecl(UsingShadowDecl *D) {
   return nullptr;
 }
 
+Decl *TemplateDeclInstantiator::VisitConstructorUsingShadowDecl(
+    ConstructorUsingShadowDecl *D) {
+  // Ignore these;  we handle them in bulk when processing the UsingDecl.
+  return nullptr;
+}
+
 Decl * TemplateDeclInstantiator
     ::VisitUnresolvedUsingTypenameDecl(UnresolvedUsingTypenameDecl *D) {
   NestedNameSpecifierLoc QualifierLoc
@@ -2477,6 +2563,86 @@ Decl *TemplateDeclInstantiator::VisitOMPThreadPrivateDecl(
   return TD;
 }
 
+Decl *TemplateDeclInstantiator::VisitOMPDeclareReductionDecl(
+    OMPDeclareReductionDecl *D) {
+  // Instantiate type and check if it is allowed.
+  QualType SubstReductionType = SemaRef.ActOnOpenMPDeclareReductionType(
+      D->getLocation(),
+      ParsedType::make(SemaRef.SubstType(D->getType(), TemplateArgs,
+                                         D->getLocation(), DeclarationName())));
+  if (SubstReductionType.isNull())
+    return nullptr;
+  bool IsCorrect = !SubstReductionType.isNull();
+  // Create instantiated copy.
+  std::pair<QualType, SourceLocation> ReductionTypes[] = {
+      std::make_pair(SubstReductionType, D->getLocation())};
+  auto *PrevDeclInScope = D->getPrevDeclInScope();
+  if (PrevDeclInScope && !PrevDeclInScope->isInvalidDecl()) {
+    PrevDeclInScope = cast<OMPDeclareReductionDecl>(
+        SemaRef.CurrentInstantiationScope->findInstantiationOf(PrevDeclInScope)
+            ->get<Decl *>());
+  }
+  auto DRD = SemaRef.ActOnOpenMPDeclareReductionDirectiveStart(
+      /*S=*/nullptr, Owner, D->getDeclName(), ReductionTypes, D->getAccess(),
+      PrevDeclInScope);
+  auto *NewDRD = cast<OMPDeclareReductionDecl>(DRD.get().getSingleDecl());
+  if (isDeclWithinFunction(NewDRD))
+    SemaRef.CurrentInstantiationScope->InstantiatedLocal(D, NewDRD);
+  Expr *SubstCombiner = nullptr;
+  Expr *SubstInitializer = nullptr;
+  // Combiners instantiation sequence.
+  if (D->getCombiner()) {
+    SemaRef.ActOnOpenMPDeclareReductionCombinerStart(
+        /*S=*/nullptr, NewDRD);
+    const char *Names[] = {"omp_in", "omp_out"};
+    for (auto &Name : Names) {
+      DeclarationName DN(&SemaRef.Context.Idents.get(Name));
+      auto OldLookup = D->lookup(DN);
+      auto Lookup = NewDRD->lookup(DN);
+      if (!OldLookup.empty() && !Lookup.empty()) {
+        assert(Lookup.size() == 1 && OldLookup.size() == 1);
+        SemaRef.CurrentInstantiationScope->InstantiatedLocal(OldLookup.front(),
+                                                             Lookup.front());
+      }
+    }
+    SubstCombiner = SemaRef.SubstExpr(D->getCombiner(), TemplateArgs).get();
+    SemaRef.ActOnOpenMPDeclareReductionCombinerEnd(NewDRD, SubstCombiner);
+    // Initializers instantiation sequence.
+    if (D->getInitializer()) {
+      SemaRef.ActOnOpenMPDeclareReductionInitializerStart(
+          /*S=*/nullptr, NewDRD);
+      const char *Names[] = {"omp_orig", "omp_priv"};
+      for (auto &Name : Names) {
+        DeclarationName DN(&SemaRef.Context.Idents.get(Name));
+        auto OldLookup = D->lookup(DN);
+        auto Lookup = NewDRD->lookup(DN);
+        if (!OldLookup.empty() && !Lookup.empty()) {
+          assert(Lookup.size() == 1 && OldLookup.size() == 1);
+          SemaRef.CurrentInstantiationScope->InstantiatedLocal(
+              OldLookup.front(), Lookup.front());
+        }
+      }
+      SubstInitializer =
+          SemaRef.SubstExpr(D->getInitializer(), TemplateArgs).get();
+      SemaRef.ActOnOpenMPDeclareReductionInitializerEnd(NewDRD,
+                                                        SubstInitializer);
+    }
+    IsCorrect = IsCorrect && SubstCombiner &&
+                (!D->getInitializer() || SubstInitializer);
+  } else
+    IsCorrect = false;
+
+  (void)SemaRef.ActOnOpenMPDeclareReductionDirectiveEnd(/*S=*/nullptr, DRD,
+                                                        IsCorrect);
+
+  return NewDRD;
+}
+
+Decl *TemplateDeclInstantiator::VisitOMPCapturedExprDecl(
+    OMPCapturedExprDecl * /*D*/) {
+  llvm_unreachable("Should not be met in templates");
+}
+
 Decl *TemplateDeclInstantiator::VisitFunctionDecl(FunctionDecl *D) {
   return VisitFunctionDecl(D, nullptr);
 }
@@ -2580,8 +2746,7 @@ TemplateDeclInstantiator::VisitClassTemplateSpecializationDecl(
                                               D->getLocStart(),
                                               D->getLocation(),
                                               InstClassTemplate,
-                                              Converted.data(),
-                                              Converted.size(),
+                                              Converted,
                                               PrevDecl);
 
   // Add this partial specialization to the set of class template partial
@@ -2596,7 +2761,7 @@ TemplateDeclInstantiator::VisitClassTemplateSpecializationDecl(
   // Build the canonical type that describes the converted template
   // arguments of the class template explicit specialization.
   QualType CanonType = SemaRef.Context.getTemplateSpecializationType(
-      TemplateName(InstClassTemplate), Converted.data(), Converted.size(),
+      TemplateName(InstClassTemplate), Converted,
       SemaRef.Context.getRecordType(InstD));
 
   // Build the fully-sugared type for this class template
@@ -2673,13 +2838,6 @@ Decl *TemplateDeclInstantiator::VisitVarTemplateSpecializationDecl(
     const TemplateArgumentListInfo &TemplateArgsInfo,
     ArrayRef<TemplateArgument> Converted) {
 
-  // If this is the variable for an anonymous struct or union,
-  // instantiate the anonymous struct/union type first.
-  if (const RecordType *RecordTy = D->getType()->getAs<RecordType>())
-    if (RecordTy->getDecl()->isAnonymousStructOrUnion())
-      if (!VisitCXXRecordDecl(cast<CXXRecordDecl>(RecordTy->getDecl())))
-        return nullptr;
-
   // Do substitution on the type of the declaration
   TypeSourceInfo *DI =
       SemaRef.SubstType(D->getTypeSourceInfo(), TemplateArgs,
@@ -2696,8 +2854,7 @@ Decl *TemplateDeclInstantiator::VisitVarTemplateSpecializationDecl(
   // Build the instantiated declaration
   VarTemplateSpecializationDecl *Var = VarTemplateSpecializationDecl::Create(
       SemaRef.Context, Owner, D->getInnerLocStart(), D->getLocation(),
-      VarTemplate, DI->getType(), DI, D->getStorageClass(), Converted.data(),
-      Converted.size());
+      VarTemplate, DI->getType(), DI, D->getStorageClass(), Converted);
   Var->setTemplateArgsInfo(TemplateArgsInfo);
   if (InsertPos)
     VarTemplate->AddSpecialization(Var, InsertPos);
@@ -2830,8 +2987,7 @@ TemplateDeclInstantiator::InstantiateClassTemplatePartialSpecialization(
   // arguments of the class template partial specialization.
   QualType CanonType
     = SemaRef.Context.getTemplateSpecializationType(TemplateName(ClassTemplate),
-                                                    Converted.data(),
-                                                    Converted.size());
+                                                    Converted);
 
   // Build the fully-sugared type for this class template
   // specialization as the user wrote in the specialization
@@ -2880,8 +3036,7 @@ TemplateDeclInstantiator::InstantiateClassTemplatePartialSpecialization(
                                                      PartialSpec->getLocation(),
                                                      InstParams,
                                                      ClassTemplate,
-                                                     Converted.data(),
-                                                     Converted.size(),
+                                                     Converted,
                                                      InstTemplateArgs,
                                                      CanonType,
                                                      nullptr);
@@ -2953,7 +3108,7 @@ TemplateDeclInstantiator::InstantiateVarTemplatePartialSpecialization(
   // Build the canonical type that describes the converted template
   // arguments of the variable template partial specialization.
   QualType CanonType = SemaRef.Context.getTemplateSpecializationType(
-      TemplateName(VarTemplate), Converted.data(), Converted.size());
+      TemplateName(VarTemplate), Converted);
 
   // Build the fully-sugared type for this variable template
   // specialization as the user wrote in the specialization
@@ -3009,8 +3164,7 @@ TemplateDeclInstantiator::InstantiateVarTemplatePartialSpecialization(
       VarTemplatePartialSpecializationDecl::Create(
           SemaRef.Context, Owner, PartialSpec->getInnerLocStart(),
           PartialSpec->getLocation(), InstParams, VarTemplate, DI->getType(),
-          DI, PartialSpec->getStorageClass(), Converted.data(),
-          Converted.size(), InstTemplateArgs);
+          DI, PartialSpec->getStorageClass(), Converted, InstTemplateArgs);
 
   // Substitute the nested name specifier, if any.
   if (SubstQualifier(PartialSpec, InstPartialSpec))
@@ -3118,9 +3272,10 @@ TemplateDeclInstantiator::SubstFunctionType(FunctionDecl *D,
     // In this case, we'll just go instantiate the ParmVarDecls that we
     // synthesized in the method declaration.
     SmallVector<QualType, 4> ParamTypes;
-    if (SemaRef.SubstParmTypes(D->getLocation(), D->param_begin(),
-                               D->getNumParams(), TemplateArgs, ParamTypes,
-                               &Params))
+    Sema::ExtParameterInfoBuilder ExtParamInfos;
+    if (SemaRef.SubstParmTypes(D->getLocation(), D->parameters(), nullptr,
+                               TemplateArgs, ParamTypes, &Params,
+                               ExtParamInfos))
       return nullptr;
   }
 
@@ -3205,6 +3360,13 @@ void Sema::InstantiateExceptionSpec(SourceLocation PointOfInstantiation,
     UpdateExceptionSpec(Decl, EST_None);
     return;
   }
+  if (Inst.isAlreadyInstantiating()) {
+    // This exception specification indirectly depends on itself. Reject.
+    // FIXME: Corresponding rule in the standard?
+    Diag(PointOfInstantiation, diag::err_exception_spec_cycle) << Decl;
+    UpdateExceptionSpec(Decl, EST_None);
+    return;
+  }
 
   // Enter the scope of this instantiation. We don't use
   // PushDeclContext because we don't have a scope.
@@ -3347,7 +3509,8 @@ TemplateDeclInstantiator::InitMethodInstantiation(CXXMethodDecl *New,
 void Sema::InstantiateFunctionDefinition(SourceLocation PointOfInstantiation,
                                          FunctionDecl *Function,
                                          bool Recursive,
-                                         bool DefinitionRequired) {
+                                         bool DefinitionRequired,
+                                         bool AtEndOfTU) {
   if (Function->isInvalidDecl() || Function->isDefined())
     return;
 
@@ -3401,6 +3564,10 @@ void Sema::InstantiateFunctionDefinition(SourceLocation PointOfInstantiation,
     Pattern = PatternDecl->getBody(PatternDecl);
   }
 
+  // FIXME: Check that the definition is visible before trying to instantiate
+  // it. This requires us to track the instantiation stack in order to know
+  // which definitions should be visible.
+
   if (!Pattern && !PatternDecl->isDefaulted()) {
     if (DefinitionRequired) {
       if (Function->getPrimaryTemplate())
@@ -3421,6 +3588,16 @@ void Sema::InstantiateFunctionDefinition(SourceLocation PointOfInstantiation,
       assert(!Recursive);
       PendingInstantiations.push_back(
         std::make_pair(Function, PointOfInstantiation));
+    } else if (Function->getTemplateSpecializationKind()
+                 == TSK_ImplicitInstantiation) {
+      if (AtEndOfTU && !getDiagnostics().hasErrorOccurred()) {
+        Diag(PointOfInstantiation, diag::warn_func_template_missing)
+          << Function;
+        Diag(PatternDecl->getLocation(), diag::note_forward_template_decl);
+        if (getLangOpts().CPlusPlus11)
+          Diag(PointOfInstantiation, diag::note_inst_declaration_hint)
+            << Function;
+      }
     }
 
     return;
@@ -3449,8 +3626,10 @@ void Sema::InstantiateFunctionDefinition(SourceLocation PointOfInstantiation,
   }
 
   InstantiatingTemplate Inst(*this, PointOfInstantiation, Function);
-  if (Inst.isInvalid())
+  if (Inst.isInvalid() || Inst.isAlreadyInstantiating())
     return;
+  PrettyDeclStackTraceEntry CrashInfo(*this, Function, SourceLocation(),
+                                      "instantiating function definition");
 
   // Copy the inner loc start from the pattern.
   Function->setInnerLocStart(PatternDecl->getInnerLocStart());
@@ -3681,11 +3860,12 @@ void Sema::BuildVariableInstantiation(
   Context.setManglingNumber(NewVar, Context.getManglingNumber(OldVar));
   Context.setStaticLocalNumber(NewVar, Context.getStaticLocalNumber(OldVar));
 
-  // Delay instantiation of the initializer for variable templates until a
-  // definition of the variable is needed. We need it right away if the type
-  // contains 'auto'.
+  // Delay instantiation of the initializer for variable templates or inline
+  // static data members until a definition of the variable is needed. We need
+  // it right away if the type contains 'auto'.
   if ((!isa<VarTemplateSpecializationDecl>(NewVar) &&
-       !InstantiatingVarTemplate) ||
+       !InstantiatingVarTemplate &&
+       !(OldVar->isInline() && OldVar->isThisDeclarationADefinition())) ||
       NewVar->getType()->isUndeducedType())
     InstantiateVariableInitializer(NewVar, OldVar, TemplateArgs);
 
@@ -3701,10 +3881,13 @@ void Sema::BuildVariableInstantiation(
 void Sema::InstantiateVariableInitializer(
     VarDecl *Var, VarDecl *OldVar,
     const MultiLevelTemplateArgumentList &TemplateArgs) {
-
-  if (Var->getAnyInitializer())
-    // We already have an initializer in the class.
-    return;
+  // We propagate the 'inline' flag with the initializer, because it
+  // would otherwise imply that the variable is a definition for a
+  // non-static data member.
+  if (OldVar->isInlineSpecified())
+    Var->setInlineSpecified();
+  else if (OldVar->isInline())
+    Var->setImplicitlyInline();
 
   if (OldVar->getInit()) {
     if (Var->isStaticDataMember() && !OldVar->isOutOfLine())
@@ -3713,9 +3896,14 @@ void Sema::InstantiateVariableInitializer(
       PushExpressionEvaluationContext(Sema::PotentiallyEvaluated, OldVar);
 
     // Instantiate the initializer.
-    ExprResult Init =
-        SubstInitializer(OldVar->getInit(), TemplateArgs,
-                         OldVar->getInitStyle() == VarDecl::CallInit);
+    ExprResult Init;
+
+    {
+      ContextRAII SwitchContext(*this, Var->getDeclContext());
+      Init = SubstInitializer(OldVar->getInit(), TemplateArgs,
+                              OldVar->getInitStyle() == VarDecl::CallInit);
+    }
+
     if (!Init.isInvalid()) {
       bool TypeMayContainAuto = true;
       Expr *InitExpr = Init.get();
@@ -3736,9 +3924,23 @@ void Sema::InstantiateVariableInitializer(
     }
 
     PopExpressionEvaluationContext();
-  } else if ((!Var->isStaticDataMember() || Var->isOutOfLine()) &&
-             !Var->isCXXForRangeDecl())
+  } else {
+    if (Var->isStaticDataMember()) {
+      if (!Var->isOutOfLine())
+        return;
+
+      // If the declaration inside the class had an initializer, don't add
+      // another one to the out-of-line definition.
+      if (OldVar->getFirstDecl()->hasInit())
+        return;
+    }
+
+    // We'll add an initializer to a for-range declaration later.
+    if (Var->isCXXForRangeDecl())
+      return;
+
     ActOnUninitializedDecl(Var, false);
+  }
 }
 
 /// \brief Instantiate the definition of the given variable from its
@@ -3768,7 +3970,7 @@ void Sema::InstantiateStaticDataMemberDefinition(
 
 void Sema::InstantiateVariableDefinition(SourceLocation PointOfInstantiation,
                                          VarDecl *Var, bool Recursive,
-                                         bool DefinitionRequired) {
+                                      bool DefinitionRequired, bool AtEndOfTU) {
   if (Var->isInvalidDecl())
     return;
 
@@ -3828,8 +4030,10 @@ void Sema::InstantiateVariableDefinition(SourceLocation PointOfInstantiation,
       // FIXME: Factor out the duplicated instantiation context setup/tear down
       // code here.
       InstantiatingTemplate Inst(*this, PointOfInstantiation, Var);
-      if (Inst.isInvalid())
+      if (Inst.isInvalid() || Inst.isAlreadyInstantiating())
         return;
+      PrettyDeclStackTraceEntry CrashInfo(*this, Var, SourceLocation(),
+                                          "instantiating variable initializer");
 
       // If we're performing recursive template instantiation, create our own
       // queue of pending implicit instantiations that we will instantiate
@@ -3876,9 +4080,13 @@ void Sema::InstantiateVariableDefinition(SourceLocation PointOfInstantiation,
 
     assert(PatternDecl && "data member was not instantiated from a template?");
     assert(PatternDecl->isStaticDataMember() && "not a static data member?");
-    Def = PatternDecl->getOutOfLineDefinition();
+    Def = PatternDecl->getDefinition();
   }
 
+  // FIXME: Check that the definition is visible before trying to instantiate
+  // it. This requires us to track the instantiation stack in order to know
+  // which definitions should be visible.
+
   // If we don't have a definition of the variable template, we won't perform
   // any instantiation. Rather, we rely on the user to instantiate this
   // definition (or provide a specialization for it) in another translation
@@ -3900,6 +4108,16 @@ void Sema::InstantiateVariableDefinition(SourceLocation PointOfInstantiation,
                  == TSK_ExplicitInstantiationDefinition) {
       PendingInstantiations.push_back(
         std::make_pair(Var, PointOfInstantiation));
+    } else if (Var->getTemplateSpecializationKind()
+                 == TSK_ImplicitInstantiation) {
+      // Warn about missing definition at the end of translation unit.
+      if (AtEndOfTU && !getDiagnostics().hasErrorOccurred()) {
+        Diag(PointOfInstantiation, diag::warn_var_template_missing)
+          << Var;
+        Diag(PatternDecl->getLocation(), diag::note_forward_template_decl);
+        if (getLangOpts().CPlusPlus11)
+          Diag(PointOfInstantiation, diag::note_inst_declaration_hint) << Var;
+      }
     }
 
     return;
@@ -3941,8 +4159,10 @@ void Sema::InstantiateVariableDefinition(SourceLocation PointOfInstantiation,
   }
 
   InstantiatingTemplate Inst(*this, PointOfInstantiation, Var);
-  if (Inst.isInvalid())
+  if (Inst.isInvalid() || Inst.isAlreadyInstantiating())
     return;
+  PrettyDeclStackTraceEntry CrashInfo(*this, Var, SourceLocation(),
+                                      "instantiating variable definition");
 
   // If we're performing recursive template instantiation, create our own
   // queue of pending implicit instantiations that we will instantiate later,
@@ -3958,11 +4178,16 @@ void Sema::InstantiateVariableDefinition(SourceLocation PointOfInstantiation,
   LocalInstantiationScope Local(*this);
 
   VarDecl *OldVar = Var;
-  if (!VarSpec)
+  if (Def->isStaticDataMember() && !Def->isOutOfLine()) {
+    // We're instantiating an inline static data member whose definition was
+    // provided inside the class.
+    // FIXME: Update record?
+    InstantiateVariableInitializer(Var, Def, TemplateArgs);
+  } else if (!VarSpec) {
     Var = cast_or_null<VarDecl>(SubstDecl(Def, Var->getDeclContext(),
                                           TemplateArgs));
-  else if (Var->isStaticDataMember() &&
-           Var->getLexicalDeclContext()->isRecord()) {
+  } else if (Var->isStaticDataMember() &&
+             Var->getLexicalDeclContext()->isRecord()) {
     // We need to instantiate the definition of a static data member template,
     // and all we have is the in-class declaration of it. Instantiate a separate
     // declaration of the definition.
@@ -4664,12 +4889,10 @@ void Sema::PerformPendingInstantiations(bool LocalOnly) {
 
     // Instantiate function definitions
     if (FunctionDecl *Function = dyn_cast<FunctionDecl>(Inst.first)) {
-      PrettyDeclStackTraceEntry CrashInfo(*this, Function, SourceLocation(),
-                                          "instantiating function definition");
       bool DefinitionRequired = Function->getTemplateSpecializationKind() ==
                                 TSK_ExplicitInstantiationDefinition;
       InstantiateFunctionDefinition(/*FIXME:*/Inst.second, Function, true,
-                                    DefinitionRequired);
+                                    DefinitionRequired, true);
       continue;
     }
 
@@ -4710,7 +4933,7 @@ void Sema::PerformPendingInstantiations(bool LocalOnly) {
     // Instantiate static data member definitions or variable template
     // specializations.
     InstantiateVariableDefinition(/*FIXME:*/ Inst.second, Var, true,
-                                  DefinitionRequired);
+                                  DefinitionRequired, true);
   }
 }
 
diff --git a/contrib/llvm/tools/clang/lib/Sema/SemaTemplateVariadic.cpp b/contrib/llvm/tools/clang/lib/Sema/SemaTemplateVariadic.cpp
index cb67d71f9e59..06afe87f515e 100644
--- a/contrib/llvm/tools/clang/lib/Sema/SemaTemplateVariadic.cpp
+++ b/contrib/llvm/tools/clang/lib/Sema/SemaTemplateVariadic.cpp
@@ -604,7 +604,7 @@ bool Sema::CheckParameterPacksForExpansion(
     //   Template argument deduction can extend the sequence of template 
     //   arguments corresponding to a template parameter pack, even when the
     //   sequence contains explicitly specified template arguments.
-    if (!IsFunctionParameterPack) {
+    if (!IsFunctionParameterPack && CurrentInstantiationScope) {
       if (NamedDecl *PartialPack 
                     = CurrentInstantiationScope->getPartiallySubstitutedPack()){
         unsigned PartialDepth, PartialIndex;
@@ -727,6 +727,7 @@ bool Sema::containsUnexpandedParameterPacks(Declarator &D) {
   case TST_half:
   case TST_float:
   case TST_double:
+  case TST_float128:
   case TST_bool:
   case TST_decimal32:
   case TST_decimal64:
@@ -739,6 +740,8 @@ bool Sema::containsUnexpandedParameterPacks(Declarator &D) {
   case TST_auto:
   case TST_auto_type:
   case TST_decltype_auto:
+#define GENERIC_IMAGE_TYPE(ImgType, Id) case TST_##ImgType##_t:
+#include "clang/Basic/OpenCLImageTypes.def"
   case TST_unknown_anytype:
   case TST_error:
     break;
@@ -996,10 +999,6 @@ ExprResult Sema::BuildEmptyCXXFoldExpr(SourceLocation EllipsisLoc,
                                        BinaryOperatorKind Operator) {
   // [temp.variadic]p9:
   //   If N is zero for a unary fold-expression, the value of the expression is
-  //       *   ->  1
-  //       +   ->  int()
-  //       &   ->  -1
-  //       |   ->  int()
   //       &&  ->  true
   //       ||  ->  false
   //       ,   ->  void()
@@ -1009,17 +1008,6 @@ ExprResult Sema::BuildEmptyCXXFoldExpr(SourceLocation EllipsisLoc,
   // prevent the result from being a null pointer constant.
   QualType ScalarType;
   switch (Operator) {
-  case BO_Add:
-    ScalarType = Context.IntTy;
-    break;
-  case BO_Mul:
-    return ActOnIntegerConstant(EllipsisLoc, 1);
-  case BO_Or:
-    ScalarType = Context.IntTy;
-    break;
-  case BO_And:
-    return CreateBuiltinUnaryOp(EllipsisLoc, UO_Minus,
-                                ActOnIntegerConstant(EllipsisLoc, 1).get());
   case BO_LOr:
     return ActOnCXXBoolLiteral(EllipsisLoc, tok::kw_false);
   case BO_LAnd:
diff --git a/contrib/llvm/tools/clang/lib/Sema/SemaType.cpp b/contrib/llvm/tools/clang/lib/Sema/SemaType.cpp
index f6ad132cde83..f3747eaa5cb5 100644
--- a/contrib/llvm/tools/clang/lib/Sema/SemaType.cpp
+++ b/contrib/llvm/tools/clang/lib/Sema/SemaType.cpp
@@ -11,7 +11,6 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "clang/Sema/SemaInternal.h"
 #include "TypeLocBuilder.h"
 #include "clang/AST/ASTConsumer.h"
 #include "clang/AST/ASTContext.h"
@@ -22,7 +21,6 @@
 #include "clang/AST/Expr.h"
 #include "clang/AST/TypeLoc.h"
 #include "clang/AST/TypeLocVisitor.h"
-#include "clang/Lex/Preprocessor.h"
 #include "clang/Basic/PartialDiagnostic.h"
 #include "clang/Basic/TargetInfo.h"
 #include "clang/Lex/Preprocessor.h"
@@ -30,9 +28,11 @@
 #include "clang/Sema/DelayedDiagnostic.h"
 #include "clang/Sema/Lookup.h"
 #include "clang/Sema/ScopeInfo.h"
+#include "clang/Sema/SemaInternal.h"
 #include "clang/Sema/Template.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringSwitch.h"
 #include "llvm/Support/ErrorHandling.h"
 
 using namespace clang;
@@ -100,20 +100,27 @@ static void diagnoseBadTypeAttribute(Sema &S, const AttributeList &attr,
     case AttributeList::AT_ObjCGC: \
     case AttributeList::AT_ObjCOwnership
 
-// Function type attributes.
-#define FUNCTION_TYPE_ATTRS_CASELIST \
-    case AttributeList::AT_NoReturn: \
+// Calling convention attributes.
+#define CALLING_CONV_ATTRS_CASELIST \
     case AttributeList::AT_CDecl: \
     case AttributeList::AT_FastCall: \
     case AttributeList::AT_StdCall: \
     case AttributeList::AT_ThisCall: \
     case AttributeList::AT_Pascal: \
+    case AttributeList::AT_SwiftCall: \
     case AttributeList::AT_VectorCall: \
     case AttributeList::AT_MSABI: \
     case AttributeList::AT_SysVABI: \
-    case AttributeList::AT_Regparm: \
     case AttributeList::AT_Pcs: \
-    case AttributeList::AT_IntelOclBicc
+    case AttributeList::AT_IntelOclBicc: \
+    case AttributeList::AT_PreserveMost: \
+    case AttributeList::AT_PreserveAll
+
+// Function type attributes.
+#define FUNCTION_TYPE_ATTRS_CASELIST \
+    case AttributeList::AT_NoReturn: \
+    case AttributeList::AT_Regparm: \
+    CALLING_CONV_ATTRS_CASELIST
 
 // Microsoft-specific type qualifiers.
 #define MS_TYPE_ATTRS_CASELIST  \
@@ -239,7 +246,7 @@ namespace {
       savedAttrs.back()->setNext(nullptr);
     }
   };
-}
+} // end anonymous namespace
 
 static void spliceAttrIntoList(AttributeList &attr, AttributeList *&head) {
   attr.setNext(head);
@@ -727,6 +734,7 @@ static void diagnoseAndRemoveTypeQualifiers(Sema &S, const DeclSpec &DS,
   // it; they probably didn't mean to specify a redundant qualifier.
   typedef std::pair<DeclSpec::TQ, SourceLocation> QualLoc;
   for (QualLoc Qual : {QualLoc(DeclSpec::TQ_const, DS.getConstSpecLoc()),
+                       QualLoc(DeclSpec::TQ_restrict, DS.getRestrictSpecLoc()),
                        QualLoc(DeclSpec::TQ_volatile, DS.getVolatileSpecLoc()),
                        QualLoc(DeclSpec::TQ_atomic, DS.getAtomicSpecLoc())}) {
     if (!(RemoveTQs & Qual.first))
@@ -743,6 +751,47 @@ static void diagnoseAndRemoveTypeQualifiers(Sema &S, const DeclSpec &DS,
   }
 }
 
+/// Return true if this is omitted block return type. Also check type
+/// attributes and type qualifiers when returning true.
+static bool checkOmittedBlockReturnType(Sema &S, Declarator &declarator,
+                                        QualType Result) {
+  if (!isOmittedBlockReturnType(declarator))
+    return false;
+
+  // Warn if we see type attributes for omitted return type on a block literal.
+  AttributeList *&attrs =
+      declarator.getMutableDeclSpec().getAttributes().getListRef();
+  AttributeList *prev = nullptr;
+  for (AttributeList *cur = attrs; cur; cur = cur->getNext()) {
+    AttributeList &attr = *cur;
+    // Skip attributes that were marked to be invalid or non-type
+    // attributes.
+    if (attr.isInvalid() || !attr.isTypeAttr()) {
+      prev = cur;
+      continue;
+    }
+    S.Diag(attr.getLoc(),
+           diag::warn_block_literal_attributes_on_omitted_return_type)
+        << attr.getName();
+    // Remove cur from the list.
+    if (prev) {
+      prev->setNext(cur->getNext());
+      prev = cur;
+    } else {
+      attrs = cur->getNext();
+    }
+  }
+
+  // Warn if we see type qualifiers for omitted return type on a block literal.
+  const DeclSpec &DS = declarator.getDeclSpec();
+  unsigned TypeQuals = DS.getTypeQualifiers();
+  diagnoseAndRemoveTypeQualifiers(S, DS, TypeQuals, Result, (unsigned)-1,
+      diag::warn_block_literal_qualifiers_on_omitted_return_type);
+  declarator.getMutableDeclSpec().ClearTypeQualifiers();
+
+  return true;
+}
+
 /// Apply Objective-C type arguments to the given type.
 static QualType applyObjCTypeArgs(Sema &S, SourceLocation loc, QualType type,
                                   ArrayRef<TypeSourceInfo *> typeArgs,
@@ -1171,6 +1220,21 @@ TypeResult Sema::actOnObjCTypeArgsAndProtocolQualifiers(
   return CreateParsedType(Result, ResultTInfo);
 }
 
+static StringRef getImageAccessAttrStr(AttributeList *attrs) {
+  if (attrs) {
+
+    AttributeList *Next;
+    do {
+      AttributeList &Attr = *attrs;
+      Next = Attr.getNext();
+      if (Attr.getKind() == AttributeList::AT_OpenCLAccess) {
+        return Attr.getName()->getName();
+      }
+    } while (Next);
+  }
+  return "";
+}
+
 /// \brief Convert the specified declspec to the appropriate type
 /// object.
 /// \param state Specifies the declarator containing the declaration specifier
@@ -1244,7 +1308,8 @@ static QualType ConvertDeclSpecToType(TypeProcessingState &state) {
       Result = Context.getAutoDeductType();
       break;
     } else if (declarator.getContext() == Declarator::LambdaExprContext ||
-               isOmittedBlockReturnType(declarator)) {
+               checkOmittedBlockReturnType(S, declarator,
+                                           Context.DependentTy)) {
       Result = Context.DependentTy;
       break;
     }
@@ -1332,7 +1397,8 @@ static QualType ConvertDeclSpecToType(TypeProcessingState &state) {
   }
   case DeclSpec::TST_int128:
     if (!S.Context.getTargetInfo().hasInt128Type())
-      S.Diag(DS.getTypeSpecTypeLoc(), diag::err_int128_unsupported);
+      S.Diag(DS.getTypeSpecTypeLoc(), diag::err_type_unsupported)
+        << "__int128";
     if (DS.getTypeSpecSign() == DeclSpec::TSS_unsigned)
       Result = Context.UnsignedInt128Ty;
     else
@@ -1354,7 +1420,14 @@ static QualType ConvertDeclSpecToType(TypeProcessingState &state) {
       declarator.setInvalidType(true);
     }
     break;
+  case DeclSpec::TST_float128:
+    if (!S.Context.getTargetInfo().hasFloat128Type())
+      S.Diag(DS.getTypeSpecTypeLoc(), diag::err_type_unsupported)
+        << "__float128";
+    Result = Context.Float128Ty;
+    break;
   case DeclSpec::TST_bool: Result = Context.BoolTy; break; // _Bool or bool
+    break;
   case DeclSpec::TST_decimal32:    // _Decimal32
   case DeclSpec::TST_decimal64:    // _Decimal64
   case DeclSpec::TST_decimal128:   // _Decimal128
@@ -1423,9 +1496,18 @@ static QualType ConvertDeclSpecToType(TypeProcessingState &state) {
           declarator.setInvalidType(true);
         }
       } else if (!S.getOpenCLOptions().cl_khr_gl_msaa_sharing &&
-                 (Result->isImage2dMSAAT() || Result->isImage2dArrayMSAAT() ||
-                  Result->isImage2dArrayMSAATDepth() ||
-                  Result->isImage2dMSAATDepth())) {
+                 (Result->isOCLImage2dArrayMSAADepthROType() ||
+                  Result->isOCLImage2dArrayMSAADepthWOType() ||
+                  Result->isOCLImage2dArrayMSAADepthRWType() ||
+                  Result->isOCLImage2dArrayMSAAROType() ||
+                  Result->isOCLImage2dArrayMSAARWType() ||
+                  Result->isOCLImage2dArrayMSAAWOType() ||
+                  Result->isOCLImage2dMSAADepthROType() ||
+                  Result->isOCLImage2dMSAADepthRWType() ||
+                  Result->isOCLImage2dMSAADepthWOType() ||
+                  Result->isOCLImage2dMSAAROType() ||
+                  Result->isOCLImage2dMSAARWType() ||
+                  Result->isOCLImage2dMSAAWOType())) {
         S.Diag(DS.getTypeSpecTypeLoc(), diag::err_type_requires_extension)
             << Result << "cl_khr_gl_msaa_sharing";
         declarator.setInvalidType(true);
@@ -1539,6 +1621,16 @@ static QualType ConvertDeclSpecToType(TypeProcessingState &state) {
     }
     break;
 
+#define GENERIC_IMAGE_TYPE(ImgType, Id) \
+  case DeclSpec::TST_##ImgType##_t: \
+    Result = llvm::StringSwitch<QualType>( \
+                 getImageAccessAttrStr(DS.getAttributes().getList())) \
+                 .Cases("write_only", "__write_only", Context.Id##WOTy) \
+                 .Cases("read_write", "__read_write", Context.Id##RWTy) \
+                 .Default(Context.Id##ROTy); \
+    break;
+#include "clang/Basic/OpenCLImageTypes.def"
+
   case DeclSpec::TST_error:
     Result = Context.IntTy;
     declarator.setInvalidType(true);
@@ -1693,12 +1785,13 @@ QualType Sema::BuildQualifiedType(QualType T, SourceLocation Loc,
 }
 
 QualType Sema::BuildQualifiedType(QualType T, SourceLocation Loc,
-                                  unsigned CVRA, const DeclSpec *DS) {
+                                  unsigned CVRAU, const DeclSpec *DS) {
   if (T.isNull())
     return QualType();
 
-  // Convert from DeclSpec::TQ to Qualifiers::TQ by just dropping TQ_atomic.
-  unsigned CVR = CVRA & ~DeclSpec::TQ_atomic;
+  // Convert from DeclSpec::TQ to Qualifiers::TQ by just dropping TQ_atomic and
+  // TQ_unaligned;
+  unsigned CVR = CVRAU & ~(DeclSpec::TQ_atomic | DeclSpec::TQ_unaligned);
 
   // C11 6.7.3/5:
   //   If the same qualifier appears more than once in the same
@@ -1708,7 +1801,7 @@ QualType Sema::BuildQualifiedType(QualType T, SourceLocation Loc,
   // It's not specified what happens when the _Atomic qualifier is applied to
   // a type specified with the _Atomic specifier, but we assume that this
   // should be treated as if the _Atomic qualifier appeared multiple times.
-  if (CVRA & DeclSpec::TQ_atomic && !T->isAtomicType()) {
+  if (CVRAU & DeclSpec::TQ_atomic && !T->isAtomicType()) {
     // C11 6.7.3/5:
     //   If other qualifiers appear along with the _Atomic qualifier in a
     //   specifier-qualifier-list, the resulting type is the so-qualified
@@ -1725,7 +1818,9 @@ QualType Sema::BuildQualifiedType(QualType T, SourceLocation Loc,
     return BuildQualifiedType(T, Loc, Split.Quals);
   }
 
-  return BuildQualifiedType(T, Loc, Qualifiers::fromCVRMask(CVR), DS);
+  Qualifiers Q = Qualifiers::fromCVRMask(CVR);
+  Q.setUnaligned(CVRAU & DeclSpec::TQ_unaligned);
+  return BuildQualifiedType(T, Loc, Q, DS);
 }
 
 /// \brief Build a paren type including \p T.
@@ -1821,7 +1916,7 @@ namespace {
 ///
 /// The values of this enum are used in diagnostics.
 enum QualifiedFunctionKind { QFK_BlockPointer, QFK_Pointer, QFK_Reference };
-}
+} // end anonymous namespace
 
 /// Check whether the type T is a qualified function type, and if it is,
 /// diagnose that it cannot be contained within the given kind of declarator.
@@ -1968,10 +2063,10 @@ static bool isArraySizeVLA(Sema &S, Expr *ArraySize, llvm::APSInt &SizeVal) {
   } Diagnoser;
 
   return S.VerifyIntegerConstantExpression(ArraySize, &SizeVal, Diagnoser,
-                                           S.LangOpts.GNUMode).isInvalid();
+                                           S.LangOpts.GNUMode ||
+                                           S.LangOpts.OpenCL).isInvalid();
 }
 
-
 /// \brief Build an array type.
 ///
 /// \param T The type of each element in the array.
@@ -2150,15 +2245,8 @@ QualType Sema::BuildArrayType(QualType T, ArrayType::ArraySizeModifier ASM,
   // If this is not C99, extwarn about VLA's and C99 array size modifiers.
   if (!getLangOpts().C99) {
     if (T->isVariableArrayType()) {
-      // Prohibit the use of non-POD types in VLAs.
-      QualType BaseT = Context.getBaseElementType(T);
-      if (!T->isDependentType() && isCompleteType(Loc, BaseT) &&
-          !BaseT.isPODType(Context) && !BaseT->isObjCLifetimeType()) {
-        Diag(Loc, diag::err_vla_non_pod) << BaseT;
-        return QualType();
-      }
       // Prohibit the use of VLAs during template argument deduction.
-      else if (isSFINAEContext()) {
+      if (isSFINAEContext()) {
         Diag(Loc, diag::err_vla_in_sfinae);
         return QualType();
       }
@@ -2176,6 +2264,18 @@ QualType Sema::BuildArrayType(QualType T, ArrayType::ArraySizeModifier ASM,
     Diag(Loc, diag::warn_vla_used);
   }
 
+  // OpenCL v2.0 s6.12.5 - Arrays of blocks are not supported.
+  // OpenCL v2.0 s6.16.13.1 - Arrays of pipe type are not supported.
+  // OpenCL v2.0 s6.9.b - Arrays of image/sampler type are not supported.
+  if (getLangOpts().OpenCL) {
+    const QualType ArrType = Context.getBaseElementType(T);
+    if (ArrType->isBlockPointerType() || ArrType->isPipeType() ||
+        ArrType->isSamplerT() || ArrType->isImageType()) {
+      Diag(Loc, diag::err_opencl_invalid_type_array) << ArrType;
+      return QualType();
+    }
+  }
+
   return T;
 }
 
@@ -2184,10 +2284,16 @@ QualType Sema::BuildArrayType(QualType T, ArrayType::ArraySizeModifier ASM,
 /// Run the required checks for the extended vector type.
 QualType Sema::BuildExtVectorType(QualType T, Expr *ArraySize,
                                   SourceLocation AttrLoc) {
-  // unlike gcc's vector_size attribute, we do not allow vectors to be defined
+  // Unlike gcc's vector_size attribute, we do not allow vectors to be defined
   // in conjunction with complex types (pointers, arrays, functions, etc.).
-  if (!T->isDependentType() &&
-      !T->isIntegerType() && !T->isRealFloatingType()) {
+  //
+  // Additionally, OpenCL prohibits vectors of booleans (they're considered a
+  // reserved data type under OpenCL v2.0 s6.1.4), we don't support selects
+  // on bitvectors, and we have no well-defined ABI for bitvectors, so vectors
+  // of bool aren't allowed.
+  if ((!T->isDependentType() && !T->isIntegerType() &&
+       !T->isRealFloatingType()) ||
+      T->isBooleanType()) {
     Diag(AttrLoc, diag::err_attribute_invalid_vector_type) << T;
     return QualType();
   }
@@ -2201,7 +2307,7 @@ QualType Sema::BuildExtVectorType(QualType T, Expr *ArraySize,
       return QualType();
     }
 
-    // unlike gcc's vector_size attribute, the size is specified as the
+    // Unlike gcc's vector_size attribute, the size is specified as the
     // number of elements, not the number of bytes.
     unsigned vectorSize = static_cast<unsigned>(vecSize.getZExtValue());
 
@@ -2247,6 +2353,74 @@ bool Sema::CheckFunctionReturnType(QualType T, SourceLocation Loc) {
   return false;
 }
 
+/// Check the extended parameter information.  Most of the necessary
+/// checking should occur when applying the parameter attribute; the
+/// only other checks required are positional restrictions.
+static void checkExtParameterInfos(Sema &S, ArrayRef<QualType> paramTypes,
+                    const FunctionProtoType::ExtProtoInfo &EPI,
+                    llvm::function_ref<SourceLocation(unsigned)> getParamLoc) {
+  assert(EPI.ExtParameterInfos && "shouldn't get here without param infos");
+
+  bool hasCheckedSwiftCall = false;
+  auto checkForSwiftCC = [&](unsigned paramIndex) {
+    // Only do this once.
+    if (hasCheckedSwiftCall) return;
+    hasCheckedSwiftCall = true;
+    if (EPI.ExtInfo.getCC() == CC_Swift) return;
+    S.Diag(getParamLoc(paramIndex), diag::err_swift_param_attr_not_swiftcall)
+      << getParameterABISpelling(EPI.ExtParameterInfos[paramIndex].getABI());
+  };
+
+  for (size_t paramIndex = 0, numParams = paramTypes.size();
+          paramIndex != numParams; ++paramIndex) {
+    switch (EPI.ExtParameterInfos[paramIndex].getABI()) {
+    // Nothing interesting to check for orindary-ABI parameters.
+    case ParameterABI::Ordinary:
+      continue;
+
+    // swift_indirect_result parameters must be a prefix of the function
+    // arguments.
+    case ParameterABI::SwiftIndirectResult:
+      checkForSwiftCC(paramIndex);
+      if (paramIndex != 0 &&
+          EPI.ExtParameterInfos[paramIndex - 1].getABI()
+            != ParameterABI::SwiftIndirectResult) {
+        S.Diag(getParamLoc(paramIndex),
+               diag::err_swift_indirect_result_not_first);
+      }
+      continue;
+
+    // swift_context parameters must be the last parameter except for
+    // a possible swift_error parameter.
+    case ParameterABI::SwiftContext:
+      checkForSwiftCC(paramIndex);
+      if (!(paramIndex == numParams - 1 ||
+            (paramIndex == numParams - 2 &&
+             EPI.ExtParameterInfos[numParams - 1].getABI()
+               == ParameterABI::SwiftErrorResult))) {
+        S.Diag(getParamLoc(paramIndex),
+               diag::err_swift_context_not_before_swift_error_result);
+      }
+      continue;
+
+    // swift_error parameters must be the last parameter.
+    case ParameterABI::SwiftErrorResult:
+      checkForSwiftCC(paramIndex);
+      if (paramIndex != numParams - 1) {
+        S.Diag(getParamLoc(paramIndex),
+               diag::err_swift_error_result_not_last);
+      } else if (paramIndex == 0 ||
+                 EPI.ExtParameterInfos[paramIndex - 1].getABI()
+                   != ParameterABI::SwiftContext) {
+        S.Diag(getParamLoc(paramIndex),
+               diag::err_swift_error_result_not_after_swift_context);
+      }
+      continue;
+    }
+    llvm_unreachable("bad ABI kind");
+  }
+}
+
 QualType Sema::BuildFunctionType(QualType T,
                                  MutableArrayRef<QualType> ParamTypes,
                                  SourceLocation Loc, DeclarationName Entity,
@@ -2271,6 +2445,11 @@ QualType Sema::BuildFunctionType(QualType T,
     ParamTypes[Idx] = ParamType;
   }
 
+  if (EPI.ExtParameterInfos) {
+    checkExtParameterInfos(*this, ParamTypes, EPI,
+                           [=](unsigned i) { return Loc; });
+  }
+
   if (Invalid)
     return QualType();
 
@@ -2477,7 +2656,8 @@ void Sema::diagnoseIgnoredQualifiers(unsigned DiagID, unsigned Quals,
                                      SourceLocation ConstQualLoc,
                                      SourceLocation VolatileQualLoc,
                                      SourceLocation RestrictQualLoc,
-                                     SourceLocation AtomicQualLoc) {
+                                     SourceLocation AtomicQualLoc,
+                                     SourceLocation UnalignedQualLoc) {
   if (!Quals)
     return;
 
@@ -2485,26 +2665,27 @@ void Sema::diagnoseIgnoredQualifiers(unsigned DiagID, unsigned Quals,
     const char *Name;
     unsigned Mask;
     SourceLocation Loc;
-  } const QualKinds[4] = {
+  } const QualKinds[5] = {
     { "const", DeclSpec::TQ_const, ConstQualLoc },
     { "volatile", DeclSpec::TQ_volatile, VolatileQualLoc },
     { "restrict", DeclSpec::TQ_restrict, RestrictQualLoc },
+    { "__unaligned", DeclSpec::TQ_unaligned, UnalignedQualLoc },
     { "_Atomic", DeclSpec::TQ_atomic, AtomicQualLoc }
   };
 
   SmallString<32> QualStr;
   unsigned NumQuals = 0;
   SourceLocation Loc;
-  FixItHint FixIts[4];
+  FixItHint FixIts[5];
 
   // Build a string naming the redundant qualifiers.
-  for (unsigned I = 0; I != 4; ++I) {
-    if (Quals & QualKinds[I].Mask) {
+  for (auto &E : QualKinds) {
+    if (Quals & E.Mask) {
       if (!QualStr.empty()) QualStr += ' ';
-      QualStr += QualKinds[I].Name;
+      QualStr += E.Name;
 
       // If we have a location for the qualifier, offer a fixit.
-      SourceLocation QualLoc = QualKinds[I].Loc;
+      SourceLocation QualLoc = E.Loc;
       if (QualLoc.isValid()) {
         FixIts[NumQuals] = FixItHint::CreateRemoval(QualLoc);
         if (Loc.isInvalid() ||
@@ -2550,7 +2731,8 @@ static void diagnoseRedundantReturnTypeQualifiers(Sema &S, QualType RetTy,
           SourceLocation::getFromRawEncoding(PTI.ConstQualLoc),
           SourceLocation::getFromRawEncoding(PTI.VolatileQualLoc),
           SourceLocation::getFromRawEncoding(PTI.RestrictQualLoc),
-          SourceLocation::getFromRawEncoding(PTI.AtomicQualLoc));
+          SourceLocation::getFromRawEncoding(PTI.AtomicQualLoc),
+          SourceLocation::getFromRawEncoding(PTI.UnalignedQualLoc));
       return;
     }
 
@@ -2586,7 +2768,8 @@ static void diagnoseRedundantReturnTypeQualifiers(Sema &S, QualType RetTy,
                               D.getDeclSpec().getConstSpecLoc(),
                               D.getDeclSpec().getVolatileSpecLoc(),
                               D.getDeclSpec().getRestrictSpecLoc(),
-                              D.getDeclSpec().getAtomicSpecLoc());
+                              D.getDeclSpec().getAtomicSpecLoc(),
+                              D.getDeclSpec().getUnalignedSpecLoc());
 }
 
 static QualType GetDeclSpecTypeForDeclarator(TypeProcessingState &state,
@@ -2700,6 +2883,7 @@ static QualType GetDeclSpecTypeForDeclarator(TypeProcessingState &state,
     case Declarator::FileContext:
     case Declarator::BlockContext:
     case Declarator::ForContext:
+    case Declarator::InitStmtContext:
     case Declarator::ConditionContext:
       break;
     case Declarator::CXXNewContext:
@@ -2785,6 +2969,7 @@ static QualType GetDeclSpecTypeForDeclarator(TypeProcessingState &state,
     case Declarator::MemberContext:
     case Declarator::BlockContext:
     case Declarator::ForContext:
+    case Declarator::InitStmtContext:
     case Declarator::BlockLiteralContext:
     case Declarator::LambdaExprContext:
       // C++11 [dcl.type]p3:
@@ -2940,6 +3125,26 @@ getCCForDeclaratorChunk(Sema &S, Declarator &D,
                         unsigned ChunkIndex) {
   assert(D.getTypeObject(ChunkIndex).Kind == DeclaratorChunk::Function);
 
+  // Check for an explicit CC attribute.
+  for (auto Attr = FTI.AttrList; Attr; Attr = Attr->getNext()) {
+    switch (Attr->getKind()) {
+    CALLING_CONV_ATTRS_CASELIST: {
+      // Ignore attributes that don't validate or can't apply to the
+      // function type.  We'll diagnose the failure to apply them in
+      // handleFunctionTypeAttr.
+      CallingConv CC;
+      if (!S.CheckCallingConvAttr(*Attr, CC) &&
+          (!FTI.isVariadic || supportsVariadicCall(CC))) {
+        return CC;
+      }
+      break;
+    }
+
+    default:
+      break;
+    }
+  }
+
   bool IsCXXInstanceMethod = false;
 
   if (S.getLangOpts().CPlusPlus) {
@@ -2979,15 +3184,19 @@ getCCForDeclaratorChunk(Sema &S, Declarator &D,
   CallingConv CC = S.Context.getDefaultCallingConvention(FTI.isVariadic,
                                                          IsCXXInstanceMethod);
 
-  // Attribute AT_OpenCLKernel affects the calling convention only on
-  // the SPIR target, hence it cannot be treated as a calling
+  // Attribute AT_OpenCLKernel affects the calling convention for SPIR
+  // and AMDGPU targets, hence it cannot be treated as a calling
   // convention attribute. This is the simplest place to infer
-  // "spir_kernel" for OpenCL kernels on SPIR.
-  if (CC == CC_SpirFunction) {
+  // calling convention for OpenCL kernels.
+  if (S.getLangOpts().OpenCL) {
     for (const AttributeList *Attr = D.getDeclSpec().getAttributes().getList();
          Attr; Attr = Attr->getNext()) {
       if (Attr->getKind() == AttributeList::AT_OpenCLKernel) {
-        CC = CC_SpirKernel;
+        llvm::Triple::ArchType arch = S.Context.getTargetInfo().getTriple().getArch();
+        if (arch == llvm::Triple::spir || arch == llvm::Triple::spir64 ||
+            arch == llvm::Triple::amdgcn) {
+          CC = CC_OpenCLKernel;
+        }
         break;
       }
     }
@@ -3004,7 +3213,7 @@ namespace {
     BlockPointer,
     MemberPointer,
   };
-}
+} // end anonymous namespace
 
 IdentifierInfo *Sema::getNullabilityKeyword(NullabilityKind nullability) {
   switch (nullability) {
@@ -3064,7 +3273,7 @@ namespace {
     // NSError**
     NSErrorPointerPointer,
   };
-}
+} // end anonymous namespace
 
 /// Classify the given declarator, whose type-specified is \c type, based on
 /// what kind of pointer it refers to.
@@ -3192,7 +3401,6 @@ static PointerDeclaratorKind classifyPointerDeclarator(Sema &S,
     break;
   } while (true);
 
-
   switch (numNormalPointers) {
   case 0:
     return PointerDeclaratorKind::NonPointer;
@@ -3509,6 +3717,7 @@ static TypeSourceInfo *GetFullTypeForDeclarator(TypeProcessingState &state,
     case Declarator::CXXCatchContext:
     case Declarator::CXXNewContext:
     case Declarator::ForContext:
+    case Declarator::InitStmtContext:
     case Declarator::LambdaExprContext:
     case Declarator::LambdaExprParameterContext:
     case Declarator::ObjCCatchContext:
@@ -3609,15 +3818,20 @@ static TypeSourceInfo *GetFullTypeForDeclarator(TypeProcessingState &state,
     case DeclaratorChunk::BlockPointer:
       // If blocks are disabled, emit an error.
       if (!LangOpts.Blocks)
-        S.Diag(DeclType.Loc, diag::err_blocks_disable);
+        S.Diag(DeclType.Loc, diag::err_blocks_disable) << LangOpts.OpenCL;
 
       // Handle pointer nullability.
       inferPointerNullability(SimplePointerKind::BlockPointer,
                               DeclType.Loc, DeclType.getAttrListRef());
 
       T = S.BuildBlockPointerType(T, D.getIdentifierLoc(), Name);
-      if (DeclType.Cls.TypeQuals)
+      if (DeclType.Cls.TypeQuals || LangOpts.OpenCL) {
+        // OpenCL v2.0, s6.12.5 - Block variable declarations are implicitly
+        // qualified with const.
+        if (LangOpts.OpenCL)
+          DeclType.Cls.TypeQuals |= DeclSpec::TQ_const;
         T = S.BuildQualifiedType(T, DeclType.Loc, DeclType.Cls.TypeQuals);
+      }
       break;
     case DeclaratorChunk::Pointer:
       // Verify that we're not building a pointer to pointer to function with
@@ -3638,10 +3852,21 @@ static TypeSourceInfo *GetFullTypeForDeclarator(TypeProcessingState &state,
           T = S.BuildQualifiedType(T, DeclType.Loc, DeclType.Ptr.TypeQuals);
         break;
       }
+
+      // OpenCL v2.0 s6.9b - Pointer to image/sampler cannot be used.
+      // OpenCL v2.0 s6.13.16.1 - Pointer to pipe cannot be used.
+      // OpenCL v2.0 s6.12.5 - Pointers to Blocks are not allowed.
+      if (LangOpts.OpenCL) {
+        if (T->isImageType() || T->isSamplerT() || T->isPipeType() ||
+            T->isBlockPointerType()) {
+          S.Diag(D.getIdentifierLoc(), diag::err_opencl_pointer_to_type) << T;
+          D.setInvalidType(true);
+        }
+      }
+
       T = S.BuildPointerType(T, DeclType.Loc, Name);
       if (DeclType.Ptr.TypeQuals)
         T = S.BuildQualifiedType(T, DeclType.Loc, DeclType.Ptr.TypeQuals);
-
       break;
     case DeclaratorChunk::Reference: {
       // Verify that we're not building a reference to pointer to function with
@@ -3808,7 +4033,8 @@ static TypeSourceInfo *GetFullTypeForDeclarator(TypeProcessingState &state,
       if (T->isHalfType()) {
         if (S.getLangOpts().OpenCL) {
           if (!S.getOpenCLOptions().cl_khr_fp16) {
-            S.Diag(D.getIdentifierLoc(), diag::err_opencl_half_return) << T;
+            S.Diag(D.getIdentifierLoc(), diag::err_opencl_invalid_return)
+                << T << 0 /*pointer hint*/;
             D.setInvalidType(true);
           } 
         } else if (!S.getLangOpts().HalfArgsAndReturns) {
@@ -3818,6 +4044,15 @@ static TypeSourceInfo *GetFullTypeForDeclarator(TypeProcessingState &state,
         }
       }
 
+        // OpenCL v2.0 s6.12.5 - A block cannot be the return value of a
+        // function.
+      if (LangOpts.OpenCL && (T->isBlockPointerType() || T->isImageType() ||
+                              T->isSamplerT() || T->isPipeType())) {
+        S.Diag(D.getIdentifierLoc(), diag::err_opencl_invalid_return)
+            << T << 1 /*hint off*/;
+        D.setInvalidType(true);
+      }
+
       // Methods cannot return interface types. All ObjC objects are
       // passed by reference.
       if (T->isObjCObjectType()) {
@@ -3967,9 +4202,9 @@ static TypeSourceInfo *GetFullTypeForDeclarator(TypeProcessingState &state,
         SmallVector<QualType, 16> ParamTys;
         ParamTys.reserve(FTI.NumParams);
 
-        SmallVector<bool, 16> ConsumedParameters;
-        ConsumedParameters.reserve(FTI.NumParams);
-        bool HasAnyConsumedParameters = false;
+        SmallVector<FunctionProtoType::ExtParameterInfo, 16>
+          ExtParameterInfos(FTI.NumParams);
+        bool HasAnyInterestingExtParameterInfos = false;
 
         for (unsigned i = 0, e = FTI.NumParams; i != e; ++i) {
           ParmVarDecl *Param = cast<ParmVarDecl>(FTI.Params[i].Param);
@@ -4027,17 +4262,25 @@ static TypeSourceInfo *GetFullTypeForDeclarator(TypeProcessingState &state,
             }
           }
 
-          if (LangOpts.ObjCAutoRefCount) {
-            bool Consumed = Param->hasAttr<NSConsumedAttr>();
-            ConsumedParameters.push_back(Consumed);
-            HasAnyConsumedParameters |= Consumed;
+          if (LangOpts.ObjCAutoRefCount && Param->hasAttr<NSConsumedAttr>()) {
+            ExtParameterInfos[i] = ExtParameterInfos[i].withIsConsumed(true);
+            HasAnyInterestingExtParameterInfos = true;
+          }
+
+          if (auto attr = Param->getAttr<ParameterABIAttr>()) {
+            ExtParameterInfos[i] =
+              ExtParameterInfos[i].withABI(attr->getABI());
+            HasAnyInterestingExtParameterInfos = true;
           }
 
           ParamTys.push_back(ParamTy);
         }
 
-        if (HasAnyConsumedParameters)
-          EPI.ConsumedParameters = ConsumedParameters.data();
+        if (HasAnyInterestingExtParameterInfos) {
+          EPI.ExtParameterInfos = ExtParameterInfos.data();
+          checkExtParameterInfos(S, ParamTys, EPI,
+              [&](unsigned i) { return FTI.Params[i].Param->getLocation(); });
+        }
 
         SmallVector<QualType, 4> Exceptions;
         SmallVector<ParsedType, 2> DynamicExceptions;
@@ -4068,7 +4311,6 @@ static TypeSourceInfo *GetFullTypeForDeclarator(TypeProcessingState &state,
 
         T = Context.getFunctionType(T, ParamTys, EPI);
       }
-
       break;
     }
     case DeclaratorChunk::MemberPointer: {
@@ -4306,6 +4548,7 @@ static TypeSourceInfo *GetFullTypeForDeclarator(TypeProcessingState &state,
     case Declarator::MemberContext:
     case Declarator::BlockContext:
     case Declarator::ForContext:
+    case Declarator::InitStmtContext:
     case Declarator::ConditionContext:
     case Declarator::CXXCatchContext:
     case Declarator::ObjCCatchContext:
@@ -4497,6 +4740,8 @@ static AttributeList::Kind getAttrListKind(AttributedType::Kind kind) {
     return AttributeList::AT_ThisCall;
   case AttributedType::attr_pascal:
     return AttributeList::AT_Pascal;
+  case AttributedType::attr_swiftcall:
+    return AttributeList::AT_SwiftCall;
   case AttributedType::attr_vectorcall:
     return AttributeList::AT_VectorCall;
   case AttributedType::attr_pcs:
@@ -4508,6 +4753,10 @@ static AttributeList::Kind getAttrListKind(AttributedType::Kind kind) {
     return AttributeList::AT_MSABI;
   case AttributedType::attr_sysv_abi:
     return AttributeList::AT_SysVABI;
+  case AttributedType::attr_preserve_most:
+    return AttributeList::AT_PreserveMost;
+  case AttributedType::attr_preserve_all:
+    return AttributeList::AT_PreserveAll;
   case AttributedType::attr_ptr32:
     return AttributeList::AT_Ptr32;
   case AttributedType::attr_ptr64:
@@ -4725,7 +4974,7 @@ namespace {
     void VisitPipeTypeLoc(PipeTypeLoc TL) {
       TL.setKWLoc(DS.getTypeSpecTypeLoc());
 
-      TypeSourceInfo *TInfo = 0;
+      TypeSourceInfo *TInfo = nullptr;
       Sema::GetTypeFromParser(DS.getRepAsType(), &TInfo);
       TL.getValueLoc().initializeFullCopy(TInfo->getTypeLoc());
     }
@@ -4859,7 +5108,7 @@ namespace {
       llvm_unreachable("unsupported TypeLoc kind in declarator!");
     }
   };
-}
+} // end anonymous namespace
 
 static void fillAtomicQualLoc(AtomicTypeLoc ATL, const DeclaratorChunk &Chunk) {
   SourceLocation Loc;
@@ -4995,7 +5244,6 @@ ParsedType Sema::ActOnObjCInstanceType(SourceLocation Loc) {
   return CreateParsedType(T, TInfo);
 }
 
-
 //===----------------------------------------------------------------------===//
 // Type Attribute Processing
 //===----------------------------------------------------------------------===//
@@ -5194,11 +5442,13 @@ static bool handleObjCOwnershipTypeAttr(TypeProcessingState &state,
     }
 
     // Otherwise, if the qualifiers actually conflict, pull sugar off
-    // until we reach a type that is directly qualified.
+    // and remove the ObjCLifetime qualifiers.
     if (previousLifetime != lifetime) {
-      // This should always terminate: the canonical type is
-      // qualified, so some bit of sugar must be hiding it.
-      while (!underlyingType.Quals.hasObjCLifetime()) {
+      // It's possible to have multiple local ObjCLifetime qualifiers. We
+      // can't stop after we reach a type that is directly qualified.
+      const Type *prevTy = nullptr;
+      while (!prevTy || prevTy != underlyingType.Ty) {
+        prevTy = underlyingType.Ty;
         underlyingType = underlyingType.getSingleStepDesugaredType();
       }
       underlyingType.Quals.removeObjCLifetime();
@@ -5369,6 +5619,7 @@ namespace {
   struct FunctionTypeUnwrapper {
     enum WrapKind {
       Desugar,
+      Attributed,
       Parens,
       Pointer,
       BlockPointer,
@@ -5401,6 +5652,9 @@ namespace {
         } else if (isa<ReferenceType>(Ty)) {
           T = cast<ReferenceType>(Ty)->getPointeeType();
           Stack.push_back(Reference);
+        } else if (isa<AttributedType>(Ty)) {
+          T = cast<AttributedType>(Ty)->getEquivalentType();
+          Stack.push_back(Attributed);
         } else {
           const Type *DTy = Ty->getUnqualifiedDesugaredType();
           if (Ty == DTy) {
@@ -5449,6 +5703,9 @@ namespace {
         // information.
         return wrap(C, Old->getUnqualifiedDesugaredType(), I);
 
+      case Attributed:
+        return wrap(C, cast<AttributedType>(Old)->getEquivalentType(), I);
+
       case Parens: {
         QualType New = wrap(C, cast<ParenType>(Old)->getInnerType(), I);
         return C.getParenType(New);
@@ -5483,7 +5740,7 @@ namespace {
       llvm_unreachable("unknown wrapping kind");
     }
   };
-}
+} // end anonymous namespace
 
 static bool handleMSPointerTypeQualifierAttr(TypeProcessingState &State,
                                              AttributeList &Attr,
@@ -5672,10 +5929,11 @@ bool Sema::checkObjCKindOfType(QualType &type, SourceLocation loc) {
 
   // Rebuild the "equivalent" type, which pushes __kindof down into
   // the object type.
-  QualType equivType = Context.getObjCObjectType(objType->getBaseType(),
-                                                 objType->getTypeArgsAsWritten(),
-                                                 objType->getProtocols(),
-                                                 /*isKindOf=*/true);
+  // There is no need to apply kindof on an unqualified id type.
+  QualType equivType = Context.getObjCObjectType(
+      objType->getBaseType(), objType->getTypeArgsAsWritten(),
+      objType->getProtocols(),
+      /*isKindOf=*/objType->isObjCUnqualifiedId() ? false : true);
 
   // If we started with an object pointer type, rebuild it.
   if (ptrType) {
@@ -5814,6 +6072,8 @@ static AttributedType::Kind getCCTypeAttrKind(AttributeList &Attr) {
     return AttributedType::attr_thiscall;
   case AttributeList::AT_Pascal:
     return AttributedType::attr_pascal;
+  case AttributeList::AT_SwiftCall:
+    return AttributedType::attr_swiftcall;
   case AttributeList::AT_VectorCall:
     return AttributedType::attr_vectorcall;
   case AttributeList::AT_Pcs: {
@@ -5835,6 +6095,10 @@ static AttributedType::Kind getCCTypeAttrKind(AttributeList &Attr) {
     return AttributedType::attr_ms_abi;
   case AttributeList::AT_SysVABI:
     return AttributedType::attr_sysv_abi;
+  case AttributeList::AT_PreserveMost:
+    return AttributedType::attr_preserve_most;
+  case AttributeList::AT_PreserveAll:
+    return AttributedType::attr_preserve_all;
   }
   llvm_unreachable("unexpected attribute kind!");
 }
@@ -5930,18 +6194,28 @@ static bool handleFunctionTypeAttr(TypeProcessingState &state,
     }
   }
 
-  // Diagnose use of callee-cleanup calling convention on variadic functions.
+  // Diagnose use of variadic functions with calling conventions that
+  // don't support them (e.g. because they're callee-cleanup).
+  // We delay warning about this on unprototyped function declarations
+  // until after redeclaration checking, just in case we pick up a
+  // prototype that way.  And apparently we also "delay" warning about
+  // unprototyped function types in general, despite not necessarily having
+  // much ability to diagnose it later.
   if (!supportsVariadicCall(CC)) {
     const FunctionProtoType *FnP = dyn_cast<FunctionProtoType>(fn);
     if (FnP && FnP->isVariadic()) {
       unsigned DiagID = diag::err_cconv_varargs;
+
       // stdcall and fastcall are ignored with a warning for GCC and MS
       // compatibility.
-      if (CC == CC_X86StdCall || CC == CC_X86FastCall)
+      bool IsInvalid = true;
+      if (CC == CC_X86StdCall || CC == CC_X86FastCall) {
         DiagID = diag::warn_cconv_varargs;
+        IsInvalid = false;
+      }
 
       S.Diag(attr.getLoc(), DiagID) << FunctionType::getNameForCallConv(CC);
-      attr.setInvalid();
+      if (IsInvalid) attr.setInvalid();
       return true;
     }
   }
@@ -5957,9 +6231,14 @@ static bool handleFunctionTypeAttr(TypeProcessingState &state,
   // Modify the CC from the wrapped function type, wrap it all back, and then
   // wrap the whole thing in an AttributedType as written.  The modified type
   // might have a different CC if we ignored the attribute.
-  FunctionType::ExtInfo EI = unwrapped.get()->getExtInfo().withCallingConv(CC);
-  QualType Equivalent =
+  QualType Equivalent;
+  if (CCOld == CC) {
+    Equivalent = type;
+  } else {
+    auto EI = unwrapped.get()->getExtInfo().withCallingConv(CC);
+    Equivalent =
       unwrapped.wrap(S, S.Context.adjustFunctionType(unwrapped.get(), EI));
+  }
   type = S.Context.getAttributedType(CCAttrKind, type, Equivalent);
   return true;
 }
@@ -6217,6 +6496,36 @@ static void HandleNeonVectorTypeAttr(QualType& CurType,
   CurType = S.Context.getVectorType(CurType, numElts, VecKind);
 }
 
+/// Handle OpenCL Access Qualifier Attribute.
+static void HandleOpenCLAccessAttr(QualType &CurType, const AttributeList &Attr,
+                                   Sema &S) {
+  // OpenCL v2.0 s6.6 - Access qualifier can be used only for image and pipe type.
+  if (!(CurType->isImageType() || CurType->isPipeType())) {
+    S.Diag(Attr.getLoc(), diag::err_opencl_invalid_access_qualifier);
+    Attr.setInvalid();
+    return;
+  }
+
+  if (const TypedefType* TypedefTy = CurType->getAs<TypedefType>()) {
+    QualType PointeeTy = TypedefTy->desugar();
+    S.Diag(Attr.getLoc(), diag::err_opencl_multiple_access_qualifiers);
+
+    std::string PrevAccessQual;
+    switch (cast<BuiltinType>(PointeeTy.getTypePtr())->getKind()) {
+      #define IMAGE_TYPE(ImgType, Id, SingletonId, Access, Suffix) \
+    case BuiltinType::Id:                                          \
+      PrevAccessQual = #Access;                                    \
+      break;
+      #include "clang/Basic/OpenCLImageTypes.def"
+    default:
+      assert(0 && "Unable to find corresponding image type.");
+    }
+
+    S.Diag(TypedefTy->getDecl()->getLocStart(),
+       diag::note_opencl_typedef_access_qualifier) << PrevAccessQual;
+  }
+}
+
 static void processTypeAttrs(TypeProcessingState &state, QualType &type,
                              TypeAttrLocation TAL, AttributeList *attrs) {
   // Scan through and apply attributes to this type where it makes sense.  Some
@@ -6312,9 +6621,8 @@ static void processTypeAttrs(TypeProcessingState &state, QualType &type,
                                VectorType::NeonPolyVector);
       attr.setUsedAsTypeAttr();
       break;
-    case AttributeList::AT_OpenCLImageAccess:
-      // FIXME: there should be some type checking happening here, I would
-      // imagine, but the original handler's checking was entirely superfluous.
+    case AttributeList::AT_OpenCLAccess:
+      HandleOpenCLAccessAttr(type, attr, state.getSema());
       attr.setUsedAsTypeAttr();
       break;
 
@@ -6554,8 +6862,8 @@ bool Sema::hasVisibleDefinition(NamedDecl *D, NamedDecl **Suggested,
       RD = Pattern;
     D = RD->getDefinition();
   } else if (auto *ED = dyn_cast<EnumDecl>(D)) {
-    while (auto *NewED = ED->getInstantiatedFromMemberEnum())
-      ED = NewED;
+    if (auto *Pattern = ED->getTemplateInstantiationPattern())
+      ED = Pattern;
     if (OnlyNeedComplete && ED->isFixed()) {
       // If the enum has a fixed underlying type, and we're only looking for a
       // complete type (not a definition), any visible declaration of it will
@@ -6616,6 +6924,7 @@ static void assignInheritanceModel(Sema &S, CXXRecordDecl *RD) {
         S.ImplicitMSInheritanceAttrLoc.isValid()
             ? S.ImplicitMSInheritanceAttrLoc
             : RD->getSourceRange()));
+    S.Consumer.AssignInheritanceModel(RD);
   }
 }
 
@@ -6641,9 +6950,16 @@ bool Sema::RequireCompleteTypeImpl(SourceLocation Loc, QualType T,
     }
   }
 
-  // If we have a complete type, we're done.
   NamedDecl *Def = nullptr;
-  if (!T->isIncompleteType(&Def)) {
+  bool Incomplete = T->isIncompleteType(&Def);
+
+  // Check that any necessary explicit specializations are visible. For an
+  // enum, we just need the declaration, so don't check this.
+  if (Def && !isa<EnumDecl>(Def))
+    checkSpecializationVisibility(Loc, Def);
+
+  // If we have a complete type, we're done.
+  if (!Incomplete) {
     // If we know about the definition but it is not visible, complain.
     NamedDecl *SuggestedDef = nullptr;
     if (Def &&
@@ -6652,7 +6968,7 @@ bool Sema::RequireCompleteTypeImpl(SourceLocation Loc, QualType T,
       // definition visible.
       bool TreatAsComplete = Diagnoser && !isSFINAEContext();
       if (Diagnoser)
-        diagnoseMissingImport(Loc, SuggestedDef, /*NeedDefinition*/true,
+        diagnoseMissingImport(Loc, SuggestedDef, MissingImportKind::Definition,
                               /*Recover*/TreatAsComplete);
       return !TreatAsComplete;
     }
@@ -6745,15 +7061,11 @@ bool Sema::RequireCompleteTypeImpl(SourceLocation Loc, QualType T,
     }
   }
 
-  if (!Diagnoser)
-    return true;
+  // FIXME: If we didn't instantiate a definition because of an explicit
+  // specialization declaration, check that it's visible.
 
-  // We have an incomplete type. Produce a diagnostic.
-  if (Ident___float128 &&
-      T == Context.getTypeDeclType(Context.getFloat128StubType())) {
-    Diag(Loc, diag::err_typecheck_decl_incomplete_type___float128);
+  if (!Diagnoser)
     return true;
-  }
 
   Diagnoser->diagnose(*this, Loc, T);
 
diff --git a/contrib/llvm/tools/clang/lib/Sema/TreeTransform.h b/contrib/llvm/tools/clang/lib/Sema/TreeTransform.h
index 935304fe4076..7224eef848de 100644
--- a/contrib/llvm/tools/clang/lib/Sema/TreeTransform.h
+++ b/contrib/llvm/tools/clang/lib/Sema/TreeTransform.h
@@ -410,6 +410,14 @@ public:
     return D;
   }
 
+  /// \brief Transform the specified condition.
+  ///
+  /// By default, this transforms the variable and expression and rebuilds
+  /// the condition.
+  Sema::ConditionResult TransformCondition(SourceLocation Loc, VarDecl *Var,
+                                           Expr *Expr,
+                                           Sema::ConditionKind Kind);
+
   /// \brief Transform the attributes associated with the given declaration and
   /// place them on the new declaration.
   ///
@@ -604,11 +612,12 @@ public:
   /// variables vector are acceptable.
   ///
   /// Return true on error.
-  bool TransformFunctionTypeParams(SourceLocation Loc,
-                                   ParmVarDecl **Params, unsigned NumParams,
-                                   const QualType *ParamTypes,
-                                   SmallVectorImpl<QualType> &PTypes,
-                                   SmallVectorImpl<ParmVarDecl*> *PVars);
+  bool TransformFunctionTypeParams(
+      SourceLocation Loc, ArrayRef<ParmVarDecl *> Params,
+      const QualType *ParamTypes,
+      const FunctionProtoType::ExtParameterInfo *ParamInfos,
+      SmallVectorImpl<QualType> &PTypes, SmallVectorImpl<ParmVarDecl *> *PVars,
+      Sema::ExtParameterInfoBuilder &PInfos);
 
   /// \brief Transforms a single function-type parameter.  Return null
   /// on error.
@@ -1164,20 +1173,20 @@ public:
   ///
   /// By default, performs semantic analysis to build the new statement.
   /// Subclasses may override this routine to provide different behavior.
-  StmtResult RebuildIfStmt(SourceLocation IfLoc, Sema::FullExprArg Cond,
-                           VarDecl *CondVar, Stmt *Then,
+  StmtResult RebuildIfStmt(SourceLocation IfLoc, bool IsConstexpr,
+                           Sema::ConditionResult Cond, Stmt *Init, Stmt *Then,
                            SourceLocation ElseLoc, Stmt *Else) {
-    return getSema().ActOnIfStmt(IfLoc, Cond, CondVar, Then, ElseLoc, Else);
+    return getSema().ActOnIfStmt(IfLoc, IsConstexpr, Init, Cond, Then,
+                                 ElseLoc, Else);
   }
 
   /// \brief Start building a new switch statement.
   ///
   /// By default, performs semantic analysis to build the new statement.
   /// Subclasses may override this routine to provide different behavior.
-  StmtResult RebuildSwitchStmtStart(SourceLocation SwitchLoc,
-                                    Expr *Cond, VarDecl *CondVar) {
-    return getSema().ActOnStartOfSwitchStmt(SwitchLoc, Cond,
-                                            CondVar);
+  StmtResult RebuildSwitchStmtStart(SourceLocation SwitchLoc, Stmt *Init,
+                                    Sema::ConditionResult Cond) {
+    return getSema().ActOnStartOfSwitchStmt(SwitchLoc, Init, Cond);
   }
 
   /// \brief Attach the body to the switch statement.
@@ -1193,9 +1202,9 @@ public:
   ///
   /// By default, performs semantic analysis to build the new statement.
   /// Subclasses may override this routine to provide different behavior.
-  StmtResult RebuildWhileStmt(SourceLocation WhileLoc, Sema::FullExprArg Cond,
-                              VarDecl *CondVar, Stmt *Body) {
-    return getSema().ActOnWhileStmt(WhileLoc, Cond, CondVar, Body);
+  StmtResult RebuildWhileStmt(SourceLocation WhileLoc,
+                              Sema::ConditionResult Cond, Stmt *Body) {
+    return getSema().ActOnWhileStmt(WhileLoc, Cond, Body);
   }
 
   /// \brief Build a new do-while statement.
@@ -1214,11 +1223,11 @@ public:
   /// By default, performs semantic analysis to build the new statement.
   /// Subclasses may override this routine to provide different behavior.
   StmtResult RebuildForStmt(SourceLocation ForLoc, SourceLocation LParenLoc,
-                            Stmt *Init, Sema::FullExprArg Cond,
-                            VarDecl *CondVar, Sema::FullExprArg Inc,
-                            SourceLocation RParenLoc, Stmt *Body) {
+                            Stmt *Init, Sema::ConditionResult Cond,
+                            Sema::FullExprArg Inc, SourceLocation RParenLoc,
+                            Stmt *Body) {
     return getSema().ActOnForStmt(ForLoc, LParenLoc, Init, Cond,
-                                  CondVar, Inc, RParenLoc, Body);
+                                  Inc, RParenLoc, Body);
   }
 
   /// \brief Build a new goto statement.
@@ -1559,10 +1568,11 @@ public:
                                        SourceLocation ColonLoc,
                                        SourceLocation EndLoc,
                                        CXXScopeSpec &ReductionIdScopeSpec,
-                                       const DeclarationNameInfo &ReductionId) {
+                                       const DeclarationNameInfo &ReductionId,
+                                       ArrayRef<Expr *> UnresolvedReductions) {
     return getSema().ActOnOpenMPReductionClause(
         VarList, StartLoc, LParenLoc, ColonLoc, EndLoc, ReductionIdScopeSpec,
-        ReductionId);
+        ReductionId, UnresolvedReductions);
   }
 
   /// \brief Build a new OpenMP 'linear' clause.
@@ -1658,14 +1668,15 @@ public:
   ///
   /// By default, performs semantic analysis to build the new OpenMP clause.
   /// Subclasses may override this routine to provide different behavior.
-  OMPClause *RebuildOMPMapClause(
-      OpenMPMapClauseKind MapTypeModifier, OpenMPMapClauseKind MapType,
-      SourceLocation MapLoc, SourceLocation ColonLoc, ArrayRef<Expr *> VarList,
-      SourceLocation StartLoc, SourceLocation LParenLoc,
-      SourceLocation EndLoc) {
-    return getSema().ActOnOpenMPMapClause(MapTypeModifier, MapType, MapLoc,
-                                          ColonLoc, VarList,StartLoc,
-                                          LParenLoc, EndLoc);
+  OMPClause *
+  RebuildOMPMapClause(OpenMPMapClauseKind MapTypeModifier,
+                      OpenMPMapClauseKind MapType, bool IsMapTypeImplicit,
+                      SourceLocation MapLoc, SourceLocation ColonLoc,
+                      ArrayRef<Expr *> VarList, SourceLocation StartLoc,
+                      SourceLocation LParenLoc, SourceLocation EndLoc) {
+    return getSema().ActOnOpenMPMapClause(MapTypeModifier, MapType,
+                                          IsMapTypeImplicit, MapLoc, ColonLoc,
+                                          VarList, StartLoc, LParenLoc, EndLoc);
   }
 
   /// \brief Build a new OpenMP 'num_teams' clause.
@@ -1734,6 +1745,66 @@ public:
     return getSema().ActOnOpenMPHintClause(Hint, StartLoc, LParenLoc, EndLoc);
   }
 
+  /// \brief Build a new OpenMP 'dist_schedule' clause.
+  ///
+  /// By default, performs semantic analysis to build the new OpenMP clause.
+  /// Subclasses may override this routine to provide different behavior.
+  OMPClause *
+  RebuildOMPDistScheduleClause(OpenMPDistScheduleClauseKind Kind,
+                               Expr *ChunkSize, SourceLocation StartLoc,
+                               SourceLocation LParenLoc, SourceLocation KindLoc,
+                               SourceLocation CommaLoc, SourceLocation EndLoc) {
+    return getSema().ActOnOpenMPDistScheduleClause(
+        Kind, ChunkSize, StartLoc, LParenLoc, KindLoc, CommaLoc, EndLoc);
+  }
+
+  /// \brief Build a new OpenMP 'to' clause.
+  ///
+  /// By default, performs semantic analysis to build the new statement.
+  /// Subclasses may override this routine to provide different behavior.
+  OMPClause *RebuildOMPToClause(ArrayRef<Expr *> VarList,
+                                SourceLocation StartLoc,
+                                SourceLocation LParenLoc,
+                                SourceLocation EndLoc) {
+    return getSema().ActOnOpenMPToClause(VarList, StartLoc, LParenLoc, EndLoc);
+  }
+
+  /// \brief Build a new OpenMP 'from' clause.
+  ///
+  /// By default, performs semantic analysis to build the new statement.
+  /// Subclasses may override this routine to provide different behavior.
+  OMPClause *RebuildOMPFromClause(ArrayRef<Expr *> VarList,
+                                  SourceLocation StartLoc,
+                                  SourceLocation LParenLoc,
+                                  SourceLocation EndLoc) {
+    return getSema().ActOnOpenMPFromClause(VarList, StartLoc, LParenLoc,
+                                           EndLoc);
+  }
+
+  /// Build a new OpenMP 'use_device_ptr' clause.
+  ///
+  /// By default, performs semantic analysis to build the new OpenMP clause.
+  /// Subclasses may override this routine to provide different behavior.
+  OMPClause *RebuildOMPUseDevicePtrClause(ArrayRef<Expr *> VarList,
+                                          SourceLocation StartLoc,
+                                          SourceLocation LParenLoc,
+                                          SourceLocation EndLoc) {
+    return getSema().ActOnOpenMPUseDevicePtrClause(VarList, StartLoc, LParenLoc,
+                                                   EndLoc);
+  }
+
+  /// Build a new OpenMP 'is_device_ptr' clause.
+  ///
+  /// By default, performs semantic analysis to build the new OpenMP clause.
+  /// Subclasses may override this routine to provide different behavior.
+  OMPClause *RebuildOMPIsDevicePtrClause(ArrayRef<Expr *> VarList,
+                                         SourceLocation StartLoc,
+                                         SourceLocation LParenLoc,
+                                         SourceLocation EndLoc) {
+    return getSema().ActOnOpenMPIsDevicePtrClause(VarList, StartLoc, LParenLoc,
+                                                  EndLoc);
+  }
+
   /// \brief Rebuild the operand to an Objective-C \@synchronized statement.
   ///
   /// By default, performs semantic analysis to build the new statement.
@@ -1823,7 +1894,7 @@ public:
   StmtResult RebuildCXXForRangeStmt(SourceLocation ForLoc,
                                     SourceLocation CoawaitLoc,
                                     SourceLocation ColonLoc,
-                                    Stmt *Range, Stmt *BeginEnd,
+                                    Stmt *Range, Stmt *Begin, Stmt *End,
                                     Expr *Cond, Expr *Inc,
                                     Stmt *LoopVar,
                                     SourceLocation RParenLoc) {
@@ -1845,7 +1916,7 @@ public:
     }
 
     return getSema().BuildCXXForRangeStmt(ForLoc, CoawaitLoc, ColonLoc,
-                                          Range, BeginEnd,
+                                          Range, Begin, End,
                                           Cond, Inc, LoopVar, RParenLoc,
                                           Sema::BFRK_Rebuild);
   }
@@ -2634,7 +2705,8 @@ public:
                                           ConvertedArgs))
       return ExprError();
 
-    return getSema().BuildCXXConstructExpr(Loc, T, Constructor, IsElidable,
+    return getSema().BuildCXXConstructExpr(Loc, T, Constructor,
+                                           IsElidable,
                                            ConvertedArgs,
                                            HadMultipleCandidates,
                                            ListInitialization,
@@ -2643,6 +2715,16 @@ public:
                                            ParenRange);
   }
 
+  /// \brief Build a new implicit construction via inherited constructor
+  /// expression.
+  ExprResult RebuildCXXInheritedCtorInitExpr(QualType T, SourceLocation Loc,
+                                             CXXConstructorDecl *Constructor,
+                                             bool ConstructsVBase,
+                                             bool InheritedFromVBase) {
+    return new (getSema().Context) CXXInheritedCtorInitExpr(
+        Loc, T, Constructor, ConstructsVBase, InheritedFromVBase);
+  }
+
   /// \brief Build a new object-construction expression.
   ///
   /// By default, performs semantic analysis to build the new expression.
@@ -3269,8 +3351,6 @@ bool TreeTransform<Derived>::TransformExprs(Expr *const *Inputs,
         if (Out.isInvalid())
           return true;
 
-        // FIXME: Can this happen? We should not try to expand the pack
-        // in this case.
         if (Out.get()->containsUnexpandedParameterPack()) {
           Out = getDerived().RebuildPackExpansion(
               Out.get(), Expansion->getEllipsisLoc(), OrigNumExpansions);
@@ -3316,6 +3396,31 @@ bool TreeTransform<Derived>::TransformExprs(Expr *const *Inputs,
   return false;
 }
 
+template <typename Derived>
+Sema::ConditionResult TreeTransform<Derived>::TransformCondition(
+    SourceLocation Loc, VarDecl *Var, Expr *Expr, Sema::ConditionKind Kind) {
+  if (Var) {
+    VarDecl *ConditionVar = cast_or_null<VarDecl>(
+        getDerived().TransformDefinition(Var->getLocation(), Var));
+
+    if (!ConditionVar)
+      return Sema::ConditionError();
+
+    return getSema().ActOnConditionVariable(ConditionVar, Loc, Kind);
+  }
+
+  if (Expr) {
+    ExprResult CondExpr = getDerived().TransformExpr(Expr);
+
+    if (CondExpr.isInvalid())
+      return Sema::ConditionError();
+
+    return getSema().ActOnCondition(nullptr, Loc, CondExpr.get(), Kind);
+  }
+
+  return Sema::ConditionResult();
+}
+
 template<typename Derived>
 NestedNameSpecifierLoc
 TreeTransform<Derived>::TransformNestedNameSpecifierLoc(
@@ -4590,15 +4695,17 @@ ParmVarDecl *TreeTransform<Derived>::TransformFunctionTypeParam(
   return newParm;
 }
 
-template<typename Derived>
-bool TreeTransform<Derived>::
-  TransformFunctionTypeParams(SourceLocation Loc,
-                              ParmVarDecl **Params, unsigned NumParams,
-                              const QualType *ParamTypes,
-                              SmallVectorImpl<QualType> &OutParamTypes,
-                              SmallVectorImpl<ParmVarDecl*> *PVars) {
+template <typename Derived>
+bool TreeTransform<Derived>::TransformFunctionTypeParams(
+    SourceLocation Loc, ArrayRef<ParmVarDecl *> Params,
+    const QualType *ParamTypes,
+    const FunctionProtoType::ExtParameterInfo *ParamInfos,
+    SmallVectorImpl<QualType> &OutParamTypes,
+    SmallVectorImpl<ParmVarDecl *> *PVars,
+    Sema::ExtParameterInfoBuilder &PInfos) {
   int indexAdjustment = 0;
 
+  unsigned NumParams = Params.size();
   for (unsigned i = 0; i != NumParams; ++i) {
     if (ParmVarDecl *OldParm = Params[i]) {
       assert(OldParm->getFunctionScopeIndex() == i);
@@ -4645,6 +4752,8 @@ bool TreeTransform<Derived>::
             if (!NewParm)
               return true;
 
+            if (ParamInfos)
+              PInfos.set(OutParamTypes.size(), ParamInfos[i]);
             OutParamTypes.push_back(NewParm->getType());
             if (PVars)
               PVars->push_back(NewParm);
@@ -4662,6 +4771,8 @@ bool TreeTransform<Derived>::
             if (!NewParm)
               return true;
 
+            if (ParamInfos)
+              PInfos.set(OutParamTypes.size(), ParamInfos[i]);
             OutParamTypes.push_back(NewParm->getType());
             if (PVars)
               PVars->push_back(NewParm);
@@ -4692,6 +4803,8 @@ bool TreeTransform<Derived>::
       if (!NewParm)
         return true;
 
+      if (ParamInfos)
+        PInfos.set(OutParamTypes.size(), ParamInfos[i]);
       OutParamTypes.push_back(NewParm->getType());
       if (PVars)
         PVars->push_back(NewParm);
@@ -4731,6 +4844,16 @@ bool TreeTransform<Derived>::
           if (NewType.isNull())
             return true;
 
+          if (NewType->containsUnexpandedParameterPack()) {
+            NewType =
+                getSema().getASTContext().getPackExpansionType(NewType, None);
+
+            if (NewType.isNull())
+              return true;
+          }
+
+          if (ParamInfos)
+            PInfos.set(OutParamTypes.size(), ParamInfos[i]);
           OutParamTypes.push_back(NewType);
           if (PVars)
             PVars->push_back(nullptr);
@@ -4748,6 +4871,8 @@ bool TreeTransform<Derived>::
         if (NewType.isNull())
           return true;
 
+        if (ParamInfos)
+          PInfos.set(OutParamTypes.size(), ParamInfos[i]);
         OutParamTypes.push_back(NewType);
         if (PVars)
           PVars->push_back(nullptr);
@@ -4770,6 +4895,8 @@ bool TreeTransform<Derived>::
       NewType = getSema().Context.getPackExpansionType(NewType,
                                                        NumExpansions);
 
+    if (ParamInfos)
+      PInfos.set(OutParamTypes.size(), ParamInfos[i]);
     OutParamTypes.push_back(NewType);
     if (PVars)
       PVars->push_back(nullptr);
@@ -4804,6 +4931,7 @@ template<typename Derived> template<typename Fn>
 QualType TreeTransform<Derived>::TransformFunctionProtoType(
     TypeLocBuilder &TLB, FunctionProtoTypeLoc TL, CXXRecordDecl *ThisContext,
     unsigned ThisTypeQuals, Fn TransformExceptionSpec) {
+
   // Transform the parameters and return type.
   //
   // We are required to instantiate the params and return type in source order.
@@ -4813,14 +4941,17 @@ QualType TreeTransform<Derived>::TransformFunctionProtoType(
   //
   SmallVector<QualType, 4> ParamTypes;
   SmallVector<ParmVarDecl*, 4> ParamDecls;
+  Sema::ExtParameterInfoBuilder ExtParamInfos;
   const FunctionProtoType *T = TL.getTypePtr();
 
   QualType ResultType;
 
   if (T->hasTrailingReturn()) {
     if (getDerived().TransformFunctionTypeParams(
-            TL.getBeginLoc(), TL.getParmArray(), TL.getNumParams(),
-            TL.getTypePtr()->param_type_begin(), ParamTypes, &ParamDecls))
+            TL.getBeginLoc(), TL.getParams(),
+            TL.getTypePtr()->param_type_begin(),
+            T->getExtParameterInfosOrNull(),
+            ParamTypes, &ParamDecls, ExtParamInfos))
       return QualType();
 
     {
@@ -4843,8 +4974,10 @@ QualType TreeTransform<Derived>::TransformFunctionProtoType(
       return QualType();
 
     if (getDerived().TransformFunctionTypeParams(
-            TL.getBeginLoc(), TL.getParmArray(), TL.getNumParams(),
-            TL.getTypePtr()->param_type_begin(), ParamTypes, &ParamDecls))
+            TL.getBeginLoc(), TL.getParams(),
+            TL.getTypePtr()->param_type_begin(),
+            T->getExtParameterInfosOrNull(),
+            ParamTypes, &ParamDecls, ExtParamInfos))
       return QualType();
   }
 
@@ -4854,8 +4987,19 @@ QualType TreeTransform<Derived>::TransformFunctionProtoType(
   if (TransformExceptionSpec(EPI.ExceptionSpec, EPIChanged))
     return QualType();
 
-  // FIXME: Need to transform ConsumedParameters for variadic template
-  // expansion.
+  // Handle extended parameter information.
+  if (auto NewExtParamInfos =
+        ExtParamInfos.getPointerOrNull(ParamTypes.size())) {
+    if (!EPI.ExtParameterInfos ||
+        llvm::makeArrayRef(EPI.ExtParameterInfos, TL.getNumParams())
+          != llvm::makeArrayRef(NewExtParamInfos, ParamTypes.size())) {
+      EPIChanged = true;
+    }
+    EPI.ExtParameterInfos = NewExtParamInfos;
+  } else if (EPI.ExtParameterInfos) {
+    EPIChanged = true;
+    EPI.ExtParameterInfos = nullptr;
+  }
 
   QualType Result = TL.getType();
   if (getDerived().AlwaysRebuild() || ResultType != T->getReturnType() ||
@@ -4890,8 +5034,8 @@ bool TreeTransform<Derived>::TransformExceptionSpec(
     if (NoexceptExpr.isInvalid())
       return true;
 
-    NoexceptExpr = getSema().CheckBooleanCondition(
-        NoexceptExpr.get(), NoexceptExpr.get()->getLocStart());
+    // FIXME: This is bogus, a noexcept expression is not a condition.
+    NoexceptExpr = getSema().CheckBooleanCondition(Loc, NoexceptExpr.get());
     if (NoexceptExpr.isInvalid())
       return true;
 
@@ -5918,7 +6062,6 @@ TreeTransform<Derived>::TransformObjCObjectType(TypeLocBuilder &TLB,
   }
 
   ObjCObjectTypeLoc NewT = TLB.push<ObjCObjectTypeLoc>(Result);
-  assert(TL.hasBaseTypeAsWritten() && "Can't be dependent");
   NewT.setHasBaseTypeAsWritten(true);
   NewT.setTypeArgsLAngleLoc(TL.getTypeArgsLAngleLoc());
   for (unsigned i = 0, n = TL.getNumTypeArgs(); i != n; ++i)
@@ -6123,85 +6266,73 @@ StmtResult TreeTransform<Derived>::TransformAttributedStmt(AttributedStmt *S) {
 template<typename Derived>
 StmtResult
 TreeTransform<Derived>::TransformIfStmt(IfStmt *S) {
-  // Transform the condition
-  ExprResult Cond;
-  VarDecl *ConditionVar = nullptr;
-  if (S->getConditionVariable()) {
-    ConditionVar
-      = cast_or_null<VarDecl>(
-                   getDerived().TransformDefinition(
-                                      S->getConditionVariable()->getLocation(),
-                                                    S->getConditionVariable()));
-    if (!ConditionVar)
-      return StmtError();
-  } else {
-    Cond = getDerived().TransformExpr(S->getCond());
-
-    if (Cond.isInvalid())
-      return StmtError();
-
-    // Convert the condition to a boolean value.
-    if (S->getCond()) {
-      ExprResult CondE = getSema().ActOnBooleanCondition(nullptr, S->getIfLoc(),
-                                                         Cond.get());
-      if (CondE.isInvalid())
-        return StmtError();
-
-      Cond = CondE.get();
-    }
-  }
+  // Transform the initialization statement
+  StmtResult Init = getDerived().TransformStmt(S->getInit());
+  if (Init.isInvalid())
+    return StmtError();
 
-  Sema::FullExprArg FullCond(getSema().MakeFullExpr(Cond.get(), S->getIfLoc()));
-  if (!S->getConditionVariable() && S->getCond() && !FullCond.get())
+  // Transform the condition
+  Sema::ConditionResult Cond = getDerived().TransformCondition(
+      S->getIfLoc(), S->getConditionVariable(), S->getCond(),
+      S->isConstexpr() ? Sema::ConditionKind::ConstexprIf
+                       : Sema::ConditionKind::Boolean);
+  if (Cond.isInvalid())
     return StmtError();
 
+  // If this is a constexpr if, determine which arm we should instantiate.
+  llvm::Optional<bool> ConstexprConditionValue;
+  if (S->isConstexpr())
+    ConstexprConditionValue = Cond.getKnownValue();
+
   // Transform the "then" branch.
-  StmtResult Then = getDerived().TransformStmt(S->getThen());
-  if (Then.isInvalid())
-    return StmtError();
+  StmtResult Then;
+  if (!ConstexprConditionValue || *ConstexprConditionValue) {
+    Then = getDerived().TransformStmt(S->getThen());
+    if (Then.isInvalid())
+      return StmtError();
+  } else {
+    Then = new (getSema().Context) NullStmt(S->getThen()->getLocStart());
+  }
 
   // Transform the "else" branch.
-  StmtResult Else = getDerived().TransformStmt(S->getElse());
-  if (Else.isInvalid())
-    return StmtError();
+  StmtResult Else;
+  if (!ConstexprConditionValue || !*ConstexprConditionValue) {
+    Else = getDerived().TransformStmt(S->getElse());
+    if (Else.isInvalid())
+      return StmtError();
+  }
 
   if (!getDerived().AlwaysRebuild() &&
-      FullCond.get() == S->getCond() &&
-      ConditionVar == S->getConditionVariable() &&
+      Init.get() == S->getInit() &&
+      Cond.get() == std::make_pair(S->getConditionVariable(), S->getCond()) &&
       Then.get() == S->getThen() &&
       Else.get() == S->getElse())
     return S;
 
-  return getDerived().RebuildIfStmt(S->getIfLoc(), FullCond, ConditionVar,
-                                    Then.get(),
-                                    S->getElseLoc(), Else.get());
+  return getDerived().RebuildIfStmt(S->getIfLoc(), S->isConstexpr(), Cond,
+                                    Init.get(), Then.get(), S->getElseLoc(),
+                                    Else.get());
 }
 
 template<typename Derived>
 StmtResult
 TreeTransform<Derived>::TransformSwitchStmt(SwitchStmt *S) {
-  // Transform the condition.
-  ExprResult Cond;
-  VarDecl *ConditionVar = nullptr;
-  if (S->getConditionVariable()) {
-    ConditionVar
-      = cast_or_null<VarDecl>(
-                   getDerived().TransformDefinition(
-                                      S->getConditionVariable()->getLocation(),
-                                                    S->getConditionVariable()));
-    if (!ConditionVar)
-      return StmtError();
-  } else {
-    Cond = getDerived().TransformExpr(S->getCond());
+  // Transform the initialization statement
+  StmtResult Init = getDerived().TransformStmt(S->getInit());
+  if (Init.isInvalid())
+    return StmtError();
 
-    if (Cond.isInvalid())
-      return StmtError();
-  }
+  // Transform the condition.
+  Sema::ConditionResult Cond = getDerived().TransformCondition(
+      S->getSwitchLoc(), S->getConditionVariable(), S->getCond(),
+      Sema::ConditionKind::Switch);
+  if (Cond.isInvalid())
+    return StmtError();
 
   // Rebuild the switch statement.
   StmtResult Switch
-    = getDerived().RebuildSwitchStmtStart(S->getSwitchLoc(), Cond.get(),
-                                          ConditionVar);
+    = getDerived().RebuildSwitchStmtStart(S->getSwitchLoc(),
+                                          S->getInit(), Cond);
   if (Switch.isInvalid())
     return StmtError();
 
@@ -6219,36 +6350,10 @@ template<typename Derived>
 StmtResult
 TreeTransform<Derived>::TransformWhileStmt(WhileStmt *S) {
   // Transform the condition
-  ExprResult Cond;
-  VarDecl *ConditionVar = nullptr;
-  if (S->getConditionVariable()) {
-    ConditionVar
-      = cast_or_null<VarDecl>(
-                   getDerived().TransformDefinition(
-                                      S->getConditionVariable()->getLocation(),
-                                                    S->getConditionVariable()));
-    if (!ConditionVar)
-      return StmtError();
-  } else {
-    Cond = getDerived().TransformExpr(S->getCond());
-
-    if (Cond.isInvalid())
-      return StmtError();
-
-    if (S->getCond()) {
-      // Convert the condition to a boolean value.
-      ExprResult CondE = getSema().ActOnBooleanCondition(nullptr,
-                                                         S->getWhileLoc(),
-                                                         Cond.get());
-      if (CondE.isInvalid())
-        return StmtError();
-      Cond = CondE;
-    }
-  }
-
-  Sema::FullExprArg FullCond(
-      getSema().MakeFullExpr(Cond.get(), S->getWhileLoc()));
-  if (!S->getConditionVariable() && S->getCond() && !FullCond.get())
+  Sema::ConditionResult Cond = getDerived().TransformCondition(
+      S->getWhileLoc(), S->getConditionVariable(), S->getCond(),
+      Sema::ConditionKind::Boolean);
+  if (Cond.isInvalid())
     return StmtError();
 
   // Transform the body
@@ -6257,13 +6362,11 @@ TreeTransform<Derived>::TransformWhileStmt(WhileStmt *S) {
     return StmtError();
 
   if (!getDerived().AlwaysRebuild() &&
-      FullCond.get() == S->getCond() &&
-      ConditionVar == S->getConditionVariable() &&
+      Cond.get() == std::make_pair(S->getConditionVariable(), S->getCond()) &&
       Body.get() == S->getBody())
     return Owned(S);
 
-  return getDerived().RebuildWhileStmt(S->getWhileLoc(), FullCond,
-                                       ConditionVar, Body.get());
+  return getDerived().RebuildWhileStmt(S->getWhileLoc(), Cond, Body.get());
 }
 
 template<typename Derived>
@@ -6303,37 +6406,10 @@ TreeTransform<Derived>::TransformForStmt(ForStmt *S) {
     getSema().ActOnOpenMPLoopInitialization(S->getForLoc(), Init.get());
 
   // Transform the condition
-  ExprResult Cond;
-  VarDecl *ConditionVar = nullptr;
-  if (S->getConditionVariable()) {
-    ConditionVar
-      = cast_or_null<VarDecl>(
-                   getDerived().TransformDefinition(
-                                      S->getConditionVariable()->getLocation(),
-                                                    S->getConditionVariable()));
-    if (!ConditionVar)
-      return StmtError();
-  } else {
-    Cond = getDerived().TransformExpr(S->getCond());
-
-    if (Cond.isInvalid())
-      return StmtError();
-
-    if (S->getCond()) {
-      // Convert the condition to a boolean value.
-      ExprResult CondE = getSema().ActOnBooleanCondition(nullptr,
-                                                         S->getForLoc(),
-                                                         Cond.get());
-      if (CondE.isInvalid())
-        return StmtError();
-
-      Cond = CondE.get();
-    }
-  }
-
-  Sema::FullExprArg FullCond(
-      getSema().MakeFullExpr(Cond.get(), S->getForLoc()));
-  if (!S->getConditionVariable() && S->getCond() && !FullCond.get())
+  Sema::ConditionResult Cond = getDerived().TransformCondition(
+      S->getForLoc(), S->getConditionVariable(), S->getCond(),
+      Sema::ConditionKind::Boolean);
+  if (Cond.isInvalid())
     return StmtError();
 
   // Transform the increment
@@ -6352,14 +6428,14 @@ TreeTransform<Derived>::TransformForStmt(ForStmt *S) {
 
   if (!getDerived().AlwaysRebuild() &&
       Init.get() == S->getInit() &&
-      FullCond.get() == S->getCond() &&
+      Cond.get() == std::make_pair(S->getConditionVariable(), S->getCond()) &&
       Inc.get() == S->getInc() &&
       Body.get() == S->getBody())
     return S;
 
   return getDerived().RebuildForStmt(S->getForLoc(), S->getLParenLoc(),
-                                     Init.get(), FullCond, ConditionVar,
-                                     FullInc, S->getRParenLoc(), Body.get());
+                                     Init.get(), Cond, FullInc,
+                                     S->getRParenLoc(), Body.get());
 }
 
 template<typename Derived>
@@ -6842,15 +6918,18 @@ TreeTransform<Derived>::TransformCXXForRangeStmt(CXXForRangeStmt *S) {
   if (Range.isInvalid())
     return StmtError();
 
-  StmtResult BeginEnd = getDerived().TransformStmt(S->getBeginEndStmt());
-  if (BeginEnd.isInvalid())
+  StmtResult Begin = getDerived().TransformStmt(S->getBeginStmt());
+  if (Begin.isInvalid())
+    return StmtError();
+  StmtResult End = getDerived().TransformStmt(S->getEndStmt());
+  if (End.isInvalid())
     return StmtError();
 
   ExprResult Cond = getDerived().TransformExpr(S->getCond());
   if (Cond.isInvalid())
     return StmtError();
   if (Cond.get())
-    Cond = SemaRef.CheckBooleanCondition(Cond.get(), S->getColonLoc());
+    Cond = SemaRef.CheckBooleanCondition(S->getColonLoc(), Cond.get());
   if (Cond.isInvalid())
     return StmtError();
   if (Cond.get())
@@ -6869,14 +6948,16 @@ TreeTransform<Derived>::TransformCXXForRangeStmt(CXXForRangeStmt *S) {
   StmtResult NewStmt = S;
   if (getDerived().AlwaysRebuild() ||
       Range.get() != S->getRangeStmt() ||
-      BeginEnd.get() != S->getBeginEndStmt() ||
+      Begin.get() != S->getBeginStmt() ||
+      End.get() != S->getEndStmt() ||
       Cond.get() != S->getCond() ||
       Inc.get() != S->getInc() ||
       LoopVar.get() != S->getLoopVarStmt()) {
     NewStmt = getDerived().RebuildCXXForRangeStmt(S->getForLoc(),
                                                   S->getCoawaitLoc(),
                                                   S->getColonLoc(), Range.get(),
-                                                  BeginEnd.get(), Cond.get(),
+                                                  Begin.get(), End.get(),
+                                                  Cond.get(),
                                                   Inc.get(), LoopVar.get(),
                                                   S->getRParenLoc());
     if (NewStmt.isInvalid())
@@ -6893,7 +6974,8 @@ TreeTransform<Derived>::TransformCXXForRangeStmt(CXXForRangeStmt *S) {
     NewStmt = getDerived().RebuildCXXForRangeStmt(S->getForLoc(),
                                                   S->getCoawaitLoc(),
                                                   S->getColonLoc(), Range.get(),
-                                                  BeginEnd.get(), Cond.get(),
+                                                  Begin.get(), End.get(),
+                                                  Cond.get(),
                                                   Inc.get(), LoopVar.get(),
                                                   S->getRParenLoc());
     if (NewStmt.isInvalid())
@@ -7378,6 +7460,61 @@ StmtResult TreeTransform<Derived>::TransformOMPTargetDataDirective(
 }
 
 template <typename Derived>
+StmtResult TreeTransform<Derived>::TransformOMPTargetEnterDataDirective(
+    OMPTargetEnterDataDirective *D) {
+  DeclarationNameInfo DirName;
+  getDerived().getSema().StartOpenMPDSABlock(OMPD_target_enter_data, DirName,
+                                             nullptr, D->getLocStart());
+  StmtResult Res = getDerived().TransformOMPExecutableDirective(D);
+  getDerived().getSema().EndOpenMPDSABlock(Res.get());
+  return Res;
+}
+
+template <typename Derived>
+StmtResult TreeTransform<Derived>::TransformOMPTargetExitDataDirective(
+    OMPTargetExitDataDirective *D) {
+  DeclarationNameInfo DirName;
+  getDerived().getSema().StartOpenMPDSABlock(OMPD_target_exit_data, DirName,
+                                             nullptr, D->getLocStart());
+  StmtResult Res = getDerived().TransformOMPExecutableDirective(D);
+  getDerived().getSema().EndOpenMPDSABlock(Res.get());
+  return Res;
+}
+
+template <typename Derived>
+StmtResult TreeTransform<Derived>::TransformOMPTargetParallelDirective(
+    OMPTargetParallelDirective *D) {
+  DeclarationNameInfo DirName;
+  getDerived().getSema().StartOpenMPDSABlock(OMPD_target_parallel, DirName,
+                                             nullptr, D->getLocStart());
+  StmtResult Res = getDerived().TransformOMPExecutableDirective(D);
+  getDerived().getSema().EndOpenMPDSABlock(Res.get());
+  return Res;
+}
+
+template <typename Derived>
+StmtResult TreeTransform<Derived>::TransformOMPTargetParallelForDirective(
+    OMPTargetParallelForDirective *D) {
+  DeclarationNameInfo DirName;
+  getDerived().getSema().StartOpenMPDSABlock(OMPD_target_parallel_for, DirName,
+                                             nullptr, D->getLocStart());
+  StmtResult Res = getDerived().TransformOMPExecutableDirective(D);
+  getDerived().getSema().EndOpenMPDSABlock(Res.get());
+  return Res;
+}
+
+template <typename Derived>
+StmtResult TreeTransform<Derived>::TransformOMPTargetUpdateDirective(
+    OMPTargetUpdateDirective *D) {
+  DeclarationNameInfo DirName;
+  getDerived().getSema().StartOpenMPDSABlock(OMPD_target_update, DirName,
+                                             nullptr, D->getLocStart());
+  StmtResult Res = getDerived().TransformOMPExecutableDirective(D);
+  getDerived().getSema().EndOpenMPDSABlock(Res.get());
+  return Res;
+}
+
+template <typename Derived>
 StmtResult
 TreeTransform<Derived>::TransformOMPTeamsDirective(OMPTeamsDirective *D) {
   DeclarationNameInfo DirName;
@@ -7443,6 +7580,52 @@ StmtResult TreeTransform<Derived>::TransformOMPDistributeDirective(
   return Res;
 }
 
+template <typename Derived>
+StmtResult TreeTransform<Derived>::TransformOMPDistributeParallelForDirective(
+    OMPDistributeParallelForDirective *D) {
+  DeclarationNameInfo DirName;
+  getDerived().getSema().StartOpenMPDSABlock(
+      OMPD_distribute_parallel_for, DirName, nullptr, D->getLocStart());
+  StmtResult Res = getDerived().TransformOMPExecutableDirective(D);
+  getDerived().getSema().EndOpenMPDSABlock(Res.get());
+  return Res;
+}
+
+template <typename Derived>
+StmtResult
+TreeTransform<Derived>::TransformOMPDistributeParallelForSimdDirective(
+    OMPDistributeParallelForSimdDirective *D) {
+  DeclarationNameInfo DirName;
+  getDerived().getSema().StartOpenMPDSABlock(
+      OMPD_distribute_parallel_for_simd, DirName, nullptr, D->getLocStart());
+  StmtResult Res = getDerived().TransformOMPExecutableDirective(D);
+  getDerived().getSema().EndOpenMPDSABlock(Res.get());
+  return Res;
+}
+
+template <typename Derived>
+StmtResult TreeTransform<Derived>::TransformOMPDistributeSimdDirective(
+    OMPDistributeSimdDirective *D) {
+  DeclarationNameInfo DirName;
+  getDerived().getSema().StartOpenMPDSABlock(OMPD_distribute_simd, DirName,
+                                             nullptr, D->getLocStart());
+  StmtResult Res = getDerived().TransformOMPExecutableDirective(D);
+  getDerived().getSema().EndOpenMPDSABlock(Res.get());
+  return Res;
+}
+
+template <typename Derived>
+StmtResult TreeTransform<Derived>::TransformOMPTargetParallelForSimdDirective(
+    OMPTargetParallelForSimdDirective *D) {
+  DeclarationNameInfo DirName;
+  getDerived().getSema().StartOpenMPDSABlock(OMPD_target_parallel_for_simd,
+                                             DirName, nullptr,
+                                             D->getLocStart());
+  StmtResult Res = getDerived().TransformOMPExecutableDirective(D);
+  getDerived().getSema().EndOpenMPDSABlock(Res.get());
+  return Res;
+}
+
 //===----------------------------------------------------------------------===//
 // OpenMP clause transformation
 //===----------------------------------------------------------------------===//
@@ -7701,9 +7884,31 @@ TreeTransform<Derived>::TransformOMPReductionClause(OMPReductionClause *C) {
     if (!NameInfo.getName())
       return nullptr;
   }
+  // Build a list of all UDR decls with the same names ranged by the Scopes.
+  // The Scope boundary is a duplication of the previous decl.
+  llvm::SmallVector<Expr *, 16> UnresolvedReductions;
+  for (auto *E : C->reduction_ops()) {
+    // Transform all the decls.
+    if (E) {
+      auto *ULE = cast<UnresolvedLookupExpr>(E);
+      UnresolvedSet<8> Decls;
+      for (auto *D : ULE->decls()) {
+        NamedDecl *InstD =
+            cast<NamedDecl>(getDerived().TransformDecl(E->getExprLoc(), D));
+        Decls.addDecl(InstD, InstD->getAccess());
+      }
+      UnresolvedReductions.push_back(
+       UnresolvedLookupExpr::Create(
+          SemaRef.Context, /*NamingClass=*/nullptr,
+          ReductionIdScopeSpec.getWithLocInContext(SemaRef.Context),
+          NameInfo, /*ADL=*/true, ULE->isOverloaded(),
+          Decls.begin(), Decls.end()));
+    } else
+      UnresolvedReductions.push_back(nullptr);
+  }
   return getDerived().RebuildOMPReductionClause(
       Vars, C->getLocStart(), C->getLParenLoc(), C->getColonLoc(),
-      C->getLocEnd(), ReductionIdScopeSpec, NameInfo);
+      C->getLocEnd(), ReductionIdScopeSpec, NameInfo, UnresolvedReductions);
 }
 
 template <typename Derived>
@@ -7825,9 +8030,9 @@ OMPClause *TreeTransform<Derived>::TransformOMPMapClause(OMPMapClause *C) {
     Vars.push_back(EVar.get());
   }
   return getDerived().RebuildOMPMapClause(
-      C->getMapTypeModifier(), C->getMapType(), C->getMapLoc(),
-      C->getColonLoc(), Vars, C->getLocStart(), C->getLParenLoc(),
-      C->getLocEnd());
+      C->getMapTypeModifier(), C->getMapType(), C->isImplicitMapType(),
+      C->getMapLoc(), C->getColonLoc(), Vars, C->getLocStart(),
+      C->getLParenLoc(), C->getLocEnd());
 }
 
 template <typename Derived>
@@ -7889,6 +8094,81 @@ OMPClause *TreeTransform<Derived>::TransformOMPHintClause(OMPHintClause *C) {
                                            C->getLParenLoc(), C->getLocEnd());
 }
 
+template <typename Derived>
+OMPClause *TreeTransform<Derived>::TransformOMPDistScheduleClause(
+    OMPDistScheduleClause *C) {
+  ExprResult E = getDerived().TransformExpr(C->getChunkSize());
+  if (E.isInvalid())
+    return nullptr;
+  return getDerived().RebuildOMPDistScheduleClause(
+      C->getDistScheduleKind(), E.get(), C->getLocStart(), C->getLParenLoc(),
+      C->getDistScheduleKindLoc(), C->getCommaLoc(), C->getLocEnd());
+}
+
+template <typename Derived>
+OMPClause *
+TreeTransform<Derived>::TransformOMPDefaultmapClause(OMPDefaultmapClause *C) {
+  return C;
+}
+
+template <typename Derived>
+OMPClause *TreeTransform<Derived>::TransformOMPToClause(OMPToClause *C) {
+  llvm::SmallVector<Expr *, 16> Vars;
+  Vars.reserve(C->varlist_size());
+  for (auto *VE : C->varlists()) {
+    ExprResult EVar = getDerived().TransformExpr(cast<Expr>(VE));
+    if (EVar.isInvalid())
+      return 0;
+    Vars.push_back(EVar.get());
+  }
+  return getDerived().RebuildOMPToClause(Vars, C->getLocStart(),
+                                         C->getLParenLoc(), C->getLocEnd());
+}
+
+template <typename Derived>
+OMPClause *TreeTransform<Derived>::TransformOMPFromClause(OMPFromClause *C) {
+  llvm::SmallVector<Expr *, 16> Vars;
+  Vars.reserve(C->varlist_size());
+  for (auto *VE : C->varlists()) {
+    ExprResult EVar = getDerived().TransformExpr(cast<Expr>(VE));
+    if (EVar.isInvalid())
+      return 0;
+    Vars.push_back(EVar.get());
+  }
+  return getDerived().RebuildOMPFromClause(Vars, C->getLocStart(),
+                                           C->getLParenLoc(), C->getLocEnd());
+}
+
+template <typename Derived>
+OMPClause *TreeTransform<Derived>::TransformOMPUseDevicePtrClause(
+    OMPUseDevicePtrClause *C) {
+  llvm::SmallVector<Expr *, 16> Vars;
+  Vars.reserve(C->varlist_size());
+  for (auto *VE : C->varlists()) {
+    ExprResult EVar = getDerived().TransformExpr(cast<Expr>(VE));
+    if (EVar.isInvalid())
+      return nullptr;
+    Vars.push_back(EVar.get());
+  }
+  return getDerived().RebuildOMPUseDevicePtrClause(
+      Vars, C->getLocStart(), C->getLParenLoc(), C->getLocEnd());
+}
+
+template <typename Derived>
+OMPClause *
+TreeTransform<Derived>::TransformOMPIsDevicePtrClause(OMPIsDevicePtrClause *C) {
+  llvm::SmallVector<Expr *, 16> Vars;
+  Vars.reserve(C->varlist_size());
+  for (auto *VE : C->varlists()) {
+    ExprResult EVar = getDerived().TransformExpr(cast<Expr>(VE));
+    if (EVar.isInvalid())
+      return nullptr;
+    Vars.push_back(EVar.get());
+  }
+  return getDerived().RebuildOMPIsDevicePtrClause(
+      Vars, C->getLocStart(), C->getLParenLoc(), C->getLocEnd());
+}
+
 //===----------------------------------------------------------------------===//
 // Expression transformation
 //===----------------------------------------------------------------------===//
@@ -8581,46 +8861,44 @@ TreeTransform<Derived>::TransformDesignatedInitExpr(DesignatedInitExpr *E) {
   // transform the designators.
   SmallVector<Expr*, 4> ArrayExprs;
   bool ExprChanged = false;
-  for (DesignatedInitExpr::designators_iterator D = E->designators_begin(),
-                                             DEnd = E->designators_end();
-       D != DEnd; ++D) {
-    if (D->isFieldDesignator()) {
-      Desig.AddDesignator(Designator::getField(D->getFieldName(),
-                                               D->getDotLoc(),
-                                               D->getFieldLoc()));
+  for (const DesignatedInitExpr::Designator &D : E->designators()) {
+    if (D.isFieldDesignator()) {
+      Desig.AddDesignator(Designator::getField(D.getFieldName(),
+                                               D.getDotLoc(),
+                                               D.getFieldLoc()));
       continue;
     }
 
-    if (D->isArrayDesignator()) {
-      ExprResult Index = getDerived().TransformExpr(E->getArrayIndex(*D));
+    if (D.isArrayDesignator()) {
+      ExprResult Index = getDerived().TransformExpr(E->getArrayIndex(D));
       if (Index.isInvalid())
         return ExprError();
 
-      Desig.AddDesignator(Designator::getArray(Index.get(),
-                                               D->getLBracketLoc()));
+      Desig.AddDesignator(
+          Designator::getArray(Index.get(), D.getLBracketLoc()));
 
-      ExprChanged = ExprChanged || Init.get() != E->getArrayIndex(*D);
+      ExprChanged = ExprChanged || Init.get() != E->getArrayIndex(D);
       ArrayExprs.push_back(Index.get());
       continue;
     }
 
-    assert(D->isArrayRangeDesignator() && "New kind of designator?");
+    assert(D.isArrayRangeDesignator() && "New kind of designator?");
     ExprResult Start
-      = getDerived().TransformExpr(E->getArrayRangeStart(*D));
+      = getDerived().TransformExpr(E->getArrayRangeStart(D));
     if (Start.isInvalid())
       return ExprError();
 
-    ExprResult End = getDerived().TransformExpr(E->getArrayRangeEnd(*D));
+    ExprResult End = getDerived().TransformExpr(E->getArrayRangeEnd(D));
     if (End.isInvalid())
       return ExprError();
 
     Desig.AddDesignator(Designator::getArrayRange(Start.get(),
                                                   End.get(),
-                                                  D->getLBracketLoc(),
-                                                  D->getEllipsisLoc()));
+                                                  D.getLBracketLoc(),
+                                                  D.getEllipsisLoc()));
 
-    ExprChanged = ExprChanged || Start.get() != E->getArrayRangeStart(*D) ||
-      End.get() != E->getArrayRangeEnd(*D);
+    ExprChanged = ExprChanged || Start.get() != E->getArrayRangeStart(D) ||
+                  End.get() != E->getArrayRangeEnd(D);
 
     ArrayExprs.push_back(Start.get());
     ArrayExprs.push_back(End.get());
@@ -9768,8 +10046,8 @@ TreeTransform<Derived>::TransformCXXConstructExpr(CXXConstructExpr *E) {
   }
 
   return getDerived().RebuildCXXConstructExpr(T, /*FIXME:*/E->getLocStart(),
-                                              Constructor, E->isElidable(),
-                                              Args,
+                                              Constructor,
+                                              E->isElidable(), Args,
                                               E->hadMultipleCandidates(),
                                               E->isListInitialization(),
                                               E->isStdInitListInitialization(),
@@ -9778,6 +10056,32 @@ TreeTransform<Derived>::TransformCXXConstructExpr(CXXConstructExpr *E) {
                                               E->getParenOrBraceRange());
 }
 
+template<typename Derived>
+ExprResult TreeTransform<Derived>::TransformCXXInheritedCtorInitExpr(
+    CXXInheritedCtorInitExpr *E) {
+  QualType T = getDerived().TransformType(E->getType());
+  if (T.isNull())
+    return ExprError();
+
+  CXXConstructorDecl *Constructor = cast_or_null<CXXConstructorDecl>(
+      getDerived().TransformDecl(E->getLocStart(), E->getConstructor()));
+  if (!Constructor)
+    return ExprError();
+
+  if (!getDerived().AlwaysRebuild() &&
+      T == E->getType() &&
+      Constructor == E->getConstructor()) {
+    // Mark the constructor as referenced.
+    // FIXME: Instantiation-specific
+    SemaRef.MarkFunctionReferenced(E->getLocStart(), Constructor);
+    return E;
+  }
+
+  return getDerived().RebuildCXXInheritedCtorInitExpr(
+      T, E->getLocation(), Constructor,
+      E->constructsVBase(), E->inheritedFromVBase());
+}
+
 /// \brief Transform a C++ temporary-binding expression.
 ///
 /// Since CXXBindTemporaryExpr nodes are implicitly generated, we just
@@ -9953,7 +10257,9 @@ TreeTransform<Derived>::TransformLambdaExpr(LambdaExpr *E) {
 
     // Capturing 'this' is trivial.
     if (C->capturesThis()) {
-      getSema().CheckCXXThisCapture(C->getLocation(), C->isExplicit());
+      getSema().CheckCXXThisCapture(C->getLocation(), C->isExplicit(),
+                                    /*BuildAndDiagnose*/ true, nullptr,
+                                    C->getCaptureKind() == LCK_StarThis);
       continue;
     }
     // Captured expression will be recaptured during captured variables
@@ -10804,6 +11110,12 @@ TransformObjCBridgedCastExpr(ObjCBridgedCastExpr *E) {
                                       Result.get());
 }
 
+template <typename Derived>
+ExprResult TreeTransform<Derived>::TransformObjCAvailabilityCheckExpr(
+    ObjCAvailabilityCheckExpr *E) {
+  return E;
+}
+
 template<typename Derived>
 ExprResult
 TreeTransform<Derived>::TransformObjCMessageExpr(ObjCMessageExpr *E) {
@@ -11039,22 +11351,26 @@ TreeTransform<Derived>::TransformBlockExpr(BlockExpr *E) {
   SmallVector<ParmVarDecl*, 4> params;
   SmallVector<QualType, 4> paramTypes;
 
+  const FunctionProtoType *exprFunctionType = E->getFunctionType();
+
   // Parameter substitution.
-  if (getDerived().TransformFunctionTypeParams(E->getCaretLocation(),
-                                               oldBlock->param_begin(),
-                                               oldBlock->param_size(),
-                                               nullptr, paramTypes, &params)) {
+  Sema::ExtParameterInfoBuilder extParamInfos;
+  if (getDerived().TransformFunctionTypeParams(
+          E->getCaretLocation(), oldBlock->parameters(), nullptr,
+          exprFunctionType->getExtParameterInfosOrNull(), paramTypes, &params,
+          extParamInfos)) {
     getSema().ActOnBlockError(E->getCaretLocation(), /*Scope=*/nullptr);
     return ExprError();
   }
 
-  const FunctionProtoType *exprFunctionType = E->getFunctionType();
   QualType exprResultType =
       getDerived().TransformType(exprFunctionType->getReturnType());
 
+  auto epi = exprFunctionType->getExtProtoInfo();
+  epi.ExtParameterInfos = extParamInfos.getPointerOrNull(paramTypes.size());
+
   QualType functionType =
-    getDerived().RebuildFunctionProtoType(exprResultType, paramTypes,
-                                          exprFunctionType->getExtProtoInfo());
+    getDerived().RebuildFunctionProtoType(exprResultType, paramTypes, epi);
   blockScope->FunctionType = functionType;
 
   // Set the parameters on the block decl.
diff --git a/contrib/llvm/tools/clang/lib/Sema/TypeLocBuilder.cpp b/contrib/llvm/tools/clang/lib/Sema/TypeLocBuilder.cpp
index be995400df6d..340b7fae78aa 100644
--- a/contrib/llvm/tools/clang/lib/Sema/TypeLocBuilder.cpp
+++ b/contrib/llvm/tools/clang/lib/Sema/TypeLocBuilder.cpp
@@ -115,11 +115,39 @@ TypeLoc TypeLocBuilder::pushImpl(QualType T, size_t LocalSize, unsigned LocalAli
       NumBytesAtAlign4 += LocalSize;
     }
   } else if (LocalAlignment == 8) {
-    if (!NumBytesAtAlign8 && NumBytesAtAlign4 % 8 != 0) {
-      // No existing padding and misaligned members; add in 4 bytes padding
-      memmove(&Buffer[Index - 4], &Buffer[Index], NumBytesAtAlign4);
-      Index -= 4;
+    if (NumBytesAtAlign8 == 0) {
+      // We have not seen any 8-byte aligned element yet. We insert a padding
+      // only if the new Index is not 8-byte-aligned.
+      if ((Index - LocalSize) % 8 != 0) {
+        memmove(&Buffer[Index - 4], &Buffer[Index], NumBytesAtAlign4);
+        Index -= 4;
+      }
+    } else {
+      unsigned Padding = NumBytesAtAlign4 % 8;
+      if (Padding == 0) {
+        if (LocalSize % 8 == 0) {
+          // Everything is set: there's no padding and we don't need to add
+          // any.
+        } else {
+          assert(LocalSize % 8 == 4);
+          // No existing padding; add in 4 bytes padding
+          memmove(&Buffer[Index - 4], &Buffer[Index], NumBytesAtAlign4);
+          Index -= 4;
+        }
+      } else {
+        assert(Padding == 4);
+        if (LocalSize % 8 == 0) {
+          // Everything is set: there's 4 bytes padding and we don't need
+          // to add any.
+        } else {
+          assert(LocalSize % 8 == 4);
+          // There are 4 bytes padding, but we don't need any; remove it.
+          memmove(&Buffer[Index + 4], &Buffer[Index], NumBytesAtAlign4);
+          Index += 4;
+        }
+      }
     }
+
     // Forget about any padding.
     NumBytesAtAlign4 = 0;
     NumBytesAtAlign8 += LocalSize;
diff --git a/contrib/llvm/tools/clang/lib/Sema/TypeLocBuilder.h b/contrib/llvm/tools/clang/lib/Sema/TypeLocBuilder.h
index 82844b391467..382821859768 100644
--- a/contrib/llvm/tools/clang/lib/Sema/TypeLocBuilder.h
+++ b/contrib/llvm/tools/clang/lib/Sema/TypeLocBuilder.h
@@ -7,7 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 //
-//  This files defines TypeLocBuilder, a class for building TypeLocs
+//  This file defines TypeLocBuilder, a class for building TypeLocs
 //  bottom-up.
 //
 //===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/tools/clang/lib/Serialization/ASTCommon.cpp b/contrib/llvm/tools/clang/lib/Serialization/ASTCommon.cpp
index 2b78d745864a..22ead2b57c72 100644
--- a/contrib/llvm/tools/clang/lib/Serialization/ASTCommon.cpp
+++ b/contrib/llvm/tools/clang/lib/Serialization/ASTCommon.cpp
@@ -91,6 +91,9 @@ serialization::TypeIdxFromBuiltin(const BuiltinType *BT) {
   case BuiltinType::LongDouble:
     ID = PREDEF_TYPE_LONGDOUBLE_ID;
     break;
+  case BuiltinType::Float128:
+    ID = PREDEF_TYPE_FLOAT128_ID;
+    break;
   case BuiltinType::NullPtr:
     ID = PREDEF_TYPE_NULLPTR_ID;
     break;
@@ -127,42 +130,11 @@ serialization::TypeIdxFromBuiltin(const BuiltinType *BT) {
   case BuiltinType::ObjCSel:
     ID = PREDEF_TYPE_OBJC_SEL;
     break;
-  case BuiltinType::OCLImage1d:
-    ID = PREDEF_TYPE_IMAGE1D_ID;
-    break;
-  case BuiltinType::OCLImage1dArray:
-    ID = PREDEF_TYPE_IMAGE1D_ARR_ID;
-    break;
-  case BuiltinType::OCLImage1dBuffer:
-    ID = PREDEF_TYPE_IMAGE1D_BUFF_ID;
-    break;
-  case BuiltinType::OCLImage2d:
-    ID = PREDEF_TYPE_IMAGE2D_ID;
-    break;
-  case BuiltinType::OCLImage2dArray:
-    ID = PREDEF_TYPE_IMAGE2D_ARR_ID;
-    break;
-  case BuiltinType::OCLImage2dDepth:
-    ID = PREDEF_TYPE_IMAGE2D_DEP_ID;
-    break;
-  case BuiltinType::OCLImage2dArrayDepth:
-    ID = PREDEF_TYPE_IMAGE2D_ARR_DEP_ID;
-    break;
-  case BuiltinType::OCLImage2dMSAA:
-    ID = PREDEF_TYPE_IMAGE2D_MSAA_ID;
-    break;
-  case BuiltinType::OCLImage2dArrayMSAA:
-    ID = PREDEF_TYPE_IMAGE2D_ARR_MSAA_ID;
-    break;
-  case BuiltinType::OCLImage2dMSAADepth:
-    ID = PREDEF_TYPE_IMAGE2D_MSAA_DEP_ID;
-    break;
-  case BuiltinType::OCLImage2dArrayMSAADepth:
-    ID = PREDEF_TYPE_IMAGE2D_ARR_MSAA_DEPTH_ID;
-    break;
-  case BuiltinType::OCLImage3d:
-    ID = PREDEF_TYPE_IMAGE3D_ID;
+#define IMAGE_TYPE(ImgType, Id, SingletonId, Access, Suffix) \
+  case BuiltinType::Id: \
+    ID = PREDEF_TYPE_##Id##_ID; \
     break;
+#include "clang/Basic/OpenCLImageTypes.def"
   case BuiltinType::OCLSampler:
     ID = PREDEF_TYPE_SAMPLER_ID;
     break;
@@ -286,6 +258,7 @@ bool serialization::isRedeclarableDeclKind(unsigned Kind) {
   case Decl::CXXDestructor:
   case Decl::CXXConversion:
   case Decl::UsingShadow:
+  case Decl::ConstructorUsingShadow:
   case Decl::Var:
   case Decl::FunctionTemplate:
   case Decl::ClassTemplate:
@@ -319,6 +292,8 @@ bool serialization::isRedeclarableDeclKind(unsigned Kind) {
   case Decl::ObjCCompatibleAlias:
   case Decl::LinkageSpec:
   case Decl::ObjCPropertyImpl:
+  case Decl::PragmaComment:
+  case Decl::PragmaDetectMismatch:
   case Decl::FileScopeAsm:
   case Decl::AccessSpec:
   case Decl::Friend:
@@ -329,6 +304,8 @@ bool serialization::isRedeclarableDeclKind(unsigned Kind) {
   case Decl::ClassScopeFunctionSpecialization:
   case Decl::Import:
   case Decl::OMPThreadPrivate:
+  case Decl::OMPCapturedExpr:
+  case Decl::OMPDeclareReduction:
   case Decl::BuiltinTemplate:
     return false;
 
diff --git a/contrib/llvm/tools/clang/lib/Serialization/ASTCommon.h b/contrib/llvm/tools/clang/lib/Serialization/ASTCommon.h
index 64f583c98728..641165e4178f 100644
--- a/contrib/llvm/tools/clang/lib/Serialization/ASTCommon.h
+++ b/contrib/llvm/tools/clang/lib/Serialization/ASTCommon.h
@@ -37,6 +37,7 @@ enum DeclUpdateKind {
   UPD_MANGLING_NUMBER,
   UPD_STATIC_LOCAL_NUMBER,
   UPD_DECL_MARKED_OPENMP_THREADPRIVATE,
+  UPD_DECL_MARKED_OPENMP_DECLARETARGET,
   UPD_DECL_EXPORTED,
   UPD_ADDED_ATTR_TO_RECORD
 };
diff --git a/contrib/llvm/tools/clang/lib/Serialization/ASTReader.cpp b/contrib/llvm/tools/clang/lib/Serialization/ASTReader.cpp
index 833ff57e4d0b..9d1554a826aa 100644
--- a/contrib/llvm/tools/clang/lib/Serialization/ASTReader.cpp
+++ b/contrib/llvm/tools/clang/lib/Serialization/ASTReader.cpp
@@ -48,6 +48,7 @@
 #include "llvm/ADT/Hashing.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/Bitcode/BitstreamReader.h"
+#include "llvm/Support/Compression.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/MemoryBuffer.h"
@@ -216,8 +217,13 @@ static bool checkLanguageOptions(const LangOptions &LangOpts,
   if (!AllowCompatibleDifferences)                                 \
     ENUM_LANGOPT(Name, Bits, Default, Description)
 
+#define COMPATIBLE_VALUE_LANGOPT(Name, Bits, Default, Description) \
+  if (!AllowCompatibleDifferences)                                 \
+    VALUE_LANGOPT(Name, Bits, Default, Description)
+
 #define BENIGN_LANGOPT(Name, Bits, Default, Description)
 #define BENIGN_ENUM_LANGOPT(Name, Type, Bits, Default, Description)
+#define BENIGN_VALUE_LANGOPT(Name, Type, Bits, Default, Description)
 #include "clang/Basic/LangOptions.def"
 
   if (ExistingLangOpts.ModuleFeatures != LangOpts.ModuleFeatures) {
@@ -768,6 +774,15 @@ IdentID ASTIdentifierLookupTrait::ReadIdentifierID(const unsigned char *d) {
   return Reader.getGlobalIdentifierID(F, RawID >> 1);
 }
 
+static void markIdentifierFromAST(ASTReader &Reader, IdentifierInfo &II) {
+  if (!II.isFromAST()) {
+    II.setIsFromAST();
+    bool IsModule = Reader.getPreprocessor().getCurrentModule() != nullptr;
+    if (isInterestingIdentifier(Reader, II, IsModule))
+      II.setChangedSinceDeserialization();
+  }
+}
+
 IdentifierInfo *ASTIdentifierLookupTrait::ReadData(const internal_key_type& k,
                                                    const unsigned char* d,
                                                    unsigned DataLen) {
@@ -784,12 +799,7 @@ IdentifierInfo *ASTIdentifierLookupTrait::ReadData(const internal_key_type& k,
     II = &Reader.getIdentifierTable().getOwn(k);
     KnownII = II;
   }
-  if (!II->isFromAST()) {
-    II->setIsFromAST();
-    bool IsModule = Reader.PP.getCurrentModule() != nullptr;
-    if (isInterestingIdentifier(Reader, *II, IsModule))
-      II->setChangedSinceDeserialization();
-  }
+  markIdentifierFromAST(Reader, *II);
   Reader.markIdentifierUpToDate(II);
 
   IdentID ID = Reader.getGlobalIdentifierID(F, RawID);
@@ -1199,6 +1209,32 @@ bool ASTReader::ReadSLocEntry(int ID) {
     return true;
   }
 
+  // Local helper to read the (possibly-compressed) buffer data following the
+  // entry record.
+  auto ReadBuffer = [this](
+      BitstreamCursor &SLocEntryCursor,
+      StringRef Name) -> std::unique_ptr<llvm::MemoryBuffer> {
+    RecordData Record;
+    StringRef Blob;
+    unsigned Code = SLocEntryCursor.ReadCode();
+    unsigned RecCode = SLocEntryCursor.readRecord(Code, Record, &Blob);
+
+    if (RecCode == SM_SLOC_BUFFER_BLOB_COMPRESSED) {
+      SmallString<0> Uncompressed;
+      if (llvm::zlib::uncompress(Blob, Uncompressed, Record[0]) !=
+          llvm::zlib::StatusOK) {
+        Error("could not decompress embedded file contents");
+        return nullptr;
+      }
+      return llvm::MemoryBuffer::getMemBufferCopy(Uncompressed, Name);
+    } else if (RecCode == SM_SLOC_BUFFER_BLOB) {
+      return llvm::MemoryBuffer::getMemBuffer(Blob.drop_back(1), Name, true);
+    } else {
+      Error("AST record has invalid code");
+      return nullptr;
+    }
+  };
+
   ModuleFile *F = GlobalSLocEntryMap.find(-ID)->second;
   F->SLocEntryCursor.JumpToBit(F->SLocEntryOffsets[ID - F->SLocEntryBaseID]);
   BitstreamCursor &SLocEntryCursor = F->SLocEntryCursor;
@@ -1254,24 +1290,16 @@ bool ASTReader::ReadSLocEntry(int ID) {
       FileDeclIDs[FID] = FileDeclsInfo(F, llvm::makeArrayRef(FirstDecl,
                                                              NumFileDecls));
     }
-    
+
     const SrcMgr::ContentCache *ContentCache
       = SourceMgr.getOrCreateContentCache(File,
                               /*isSystemFile=*/FileCharacter != SrcMgr::C_User);
     if (OverriddenBuffer && !ContentCache->BufferOverridden &&
         ContentCache->ContentsEntry == ContentCache->OrigEntry &&
         !ContentCache->getRawBuffer()) {
-      unsigned Code = SLocEntryCursor.ReadCode();
-      Record.clear();
-      unsigned RecCode = SLocEntryCursor.readRecord(Code, Record, &Blob);
-      
-      if (RecCode != SM_SLOC_BUFFER_BLOB) {
-        Error("AST record has invalid code");
+      auto Buffer = ReadBuffer(SLocEntryCursor, File->getName());
+      if (!Buffer)
         return true;
-      }
-      
-      std::unique_ptr<llvm::MemoryBuffer> Buffer
-        = llvm::MemoryBuffer::getMemBuffer(Blob.drop_back(1), File->getName());
       SourceMgr.overrideFileContents(File, std::move(Buffer));
     }
 
@@ -1288,18 +1316,10 @@ bool ASTReader::ReadSLocEntry(int ID) {
         (F->Kind == MK_ImplicitModule || F->Kind == MK_ExplicitModule)) {
       IncludeLoc = getImportLocation(F);
     }
-    unsigned Code = SLocEntryCursor.ReadCode();
-    Record.clear();
-    unsigned RecCode
-      = SLocEntryCursor.readRecord(Code, Record, &Blob);
 
-    if (RecCode != SM_SLOC_BUFFER_BLOB) {
-      Error("AST record has invalid code");
+    auto Buffer = ReadBuffer(SLocEntryCursor, Name);
+    if (!Buffer)
       return true;
-    }
-
-    std::unique_ptr<llvm::MemoryBuffer> Buffer =
-        llvm::MemoryBuffer::getMemBuffer(Blob.drop_back(1), Name);
     SourceMgr.createFileID(std::move(Buffer), FileCharacter, ID,
                            BaseOffset + Offset, IncludeLoc);
     break;
@@ -1660,9 +1680,12 @@ void ASTReader::ReadDefinedMacros() {
           break;
           
         case PP_MACRO_OBJECT_LIKE:
-        case PP_MACRO_FUNCTION_LIKE:
-          getLocalIdentifier(*I, Record[0]);
+        case PP_MACRO_FUNCTION_LIKE: {
+          IdentifierInfo *II = getLocalIdentifier(*I, Record[0]);
+          if (II->isOutOfDate())
+            updateOutOfDateIdentifier(*II);
           break;
+        }
           
         case PP_TOKEN:
           // Ignore tokens.
@@ -1987,17 +2010,8 @@ InputFile ASTReader::getInputFile(ModuleFile &F, unsigned ID, bool Complain) {
   // For an overridden file, there is nothing to validate.
   if (!Overridden && //
       (StoredSize != File->getSize() ||
-#if defined(LLVM_ON_WIN32)
-       false
-#else
-       // In our regression testing, the Windows file system seems to
-       // have inconsistent modification times that sometimes
-       // erroneously trigger this error-handling path.
-       //
-       // FIXME: This probably also breaks HeaderFileInfo lookups on Windows.
        (StoredTime && StoredTime != File->getModificationTime() &&
         !DisableValidation)
-#endif
        )) {
     if (Complain) {
       // Build a list of the PCH imports that got us here (in reverse).
@@ -2254,9 +2268,10 @@ ASTReader::ReadControlBlock(ModuleFile &F,
               (AllowConfigurationMismatch && Result == ConfigurationMismatch))
             Result = Success;
 
-          // If we've diagnosed a problem, we're done.
-          if (Result != Success &&
-              isDiagnosedResult(Result, ClientLoadCapabilities))
+          // If we can't load the module, exit early since we likely
+          // will rebuild the module anyway. The stream may be in the
+          // middle of a block.
+          if (Result != Success)
             return Result;
         } else if (Stream.SkipBlock()) {
           Error("malformed block record in AST file");
@@ -2294,6 +2309,11 @@ ASTReader::ReadControlBlock(ModuleFile &F,
         Diag(diag::err_pch_with_compiler_errors);
         return HadErrors;
       }
+      if (hasErrors) {
+        Diags.ErrorOccurred = true;
+        Diags.UncompilableErrorOccurred = true;
+        Diags.UnrecoverableErrorOccurred = true;
+      }
 
       F.RelocatablePCH = Record[4];
       // Relative paths in a relocatable PCH are relative to our sysroot.
@@ -2325,9 +2345,9 @@ ASTReader::ReadControlBlock(ModuleFile &F,
         ModuleKind ImportedKind = (ModuleKind)Record[Idx++];
         // The import location will be the local one for now; we will adjust
         // all import locations of module imports after the global source
-        // location info are setup.
+        // location info are setup, in ReadAST.
         SourceLocation ImportLoc =
-            SourceLocation::getFromRawEncoding(Record[Idx++]);
+            ReadUntranslatedSourceLocation(Record[Idx++]);
         off_t StoredSize = (off_t)Record[Idx++];
         time_t StoredModTime = (time_t)Record[Idx++];
         ASTFileSignature StoredSignature = Record[Idx++];
@@ -3017,17 +3037,6 @@ ASTReader::ReadASTBlock(ModuleFile &F, unsigned ClientLoadCapabilities) {
       break;
     }
 
-    case DECL_REPLACEMENTS: {
-      if (Record.size() % 3 != 0) {
-        Error("invalid DECL_REPLACEMENTS block in AST file");
-        return Failure;
-      }
-      for (unsigned I = 0, N = Record.size(); I != N; I += 3)
-        ReplacedDecls[getGlobalDeclID(F, Record[I])]
-          = ReplacedDeclInfo(&F, Record[I+1], Record[I+2]);
-      break;
-    }
-
     case OBJC_CATEGORIES_MAP: {
       if (F.LocalNumObjCCategoriesInMap != 0) {
         Error("duplicate OBJC_CATEGORIES_MAP record in AST file");
@@ -3043,28 +3052,6 @@ ASTReader::ReadASTBlock(ModuleFile &F, unsigned ClientLoadCapabilities) {
       F.ObjCCategories.swap(Record);
       break;
 
-    case CXX_BASE_SPECIFIER_OFFSETS: {
-      if (F.LocalNumCXXBaseSpecifiers != 0) {
-        Error("duplicate CXX_BASE_SPECIFIER_OFFSETS record in AST file");
-        return Failure;
-      }
-
-      F.LocalNumCXXBaseSpecifiers = Record[0];
-      F.CXXBaseSpecifiersOffsets = (const uint32_t *)Blob.data();
-      break;
-    }
-
-    case CXX_CTOR_INITIALIZERS_OFFSETS: {
-      if (F.LocalNumCXXCtorInitializers != 0) {
-        Error("duplicate CXX_CTOR_INITIALIZERS_OFFSETS record in AST file");
-        return Failure;
-      }
-
-      F.LocalNumCXXCtorInitializers = Record[0];
-      F.CXXCtorInitializersOffsets = (const uint32_t *)Blob.data();
-      break;
-    }
-
     case DIAG_PRAGMA_MAPPINGS:
       if (F.PragmaDiagMappings.empty())
         F.PragmaDiagMappings.swap(Record);
@@ -3201,6 +3188,23 @@ ASTReader::ReadASTBlock(ModuleFile &F, unsigned ClientLoadCapabilities) {
       OptimizeOffPragmaLocation = ReadSourceLocation(F, Record[0]);
       break;
 
+    case MSSTRUCT_PRAGMA_OPTIONS:
+      if (Record.size() != 1) {
+        Error("invalid pragma ms_struct record");
+        return Failure;
+      }
+      PragmaMSStructState = Record[0];
+      break;
+
+    case POINTERS_TO_MEMBERS_PRAGMA_OPTIONS:
+      if (Record.size() != 2) {
+        Error("invalid pragma ms_struct record");
+        return Failure;
+      }
+      PragmaMSPointersToMembersState = Record[0];
+      PointersToMembersPragmaLocation = ReadSourceLocation(F, Record[1]);
+      break;
+
     case UNUSED_LOCAL_TYPEDEF_NAME_CANDIDATES:
       for (unsigned I = 0, N = Record.size(); I != N; ++I)
         UnusedLocalTypedefNameCandidates.push_back(
@@ -3467,7 +3471,7 @@ static bool SkipCursorToBlock(BitstreamCursor &Cursor, unsigned BlockID) {
   }
 }
 
-ASTReader::ASTReadResult ASTReader::ReadAST(const std::string &FileName,
+ASTReader::ASTReadResult ASTReader::ReadAST(StringRef FileName,
                                             ModuleKind Type,
                                             SourceLocation ImportLoc,
                                             unsigned ClientLoadCapabilities) {
@@ -3560,12 +3564,7 @@ ASTReader::ASTReadResult ASTReader::ReadAST(const std::string &FileName,
 
       // Mark this identifier as being from an AST file so that we can track
       // whether we need to serialize it.
-      if (!II.isFromAST()) {
-        II.setIsFromAST();
-        bool IsModule = PP.getCurrentModule() != nullptr;
-        if (isInterestingIdentifier(*this, II, IsModule))
-          II.setChangedSinceDeserialization();
-      }
+      markIdentifierFromAST(*this, II);
 
       // Associate the ID with the identifier so that the writer can reuse it.
       auto ID = Trait.ReadIdentifierID(Data + KeyDataLen.first);
@@ -3584,11 +3583,12 @@ ASTReader::ASTReadResult ASTReader::ReadAST(const std::string &FileName,
 
     // Set the import location.
     F.DirectImportLoc = ImportLoc;
+    // FIXME: We assume that locations from PCH / preamble do not need
+    // any translation.
     if (!M->ImportedBy)
       F.ImportLoc = M->ImportLoc;
     else
-      F.ImportLoc = ReadSourceLocation(*M->ImportedBy,
-                                       M->ImportLoc.getRawEncoding());
+      F.ImportLoc = TranslateSourceLocation(*M->ImportedBy, M->ImportLoc);
   }
 
   if (!Context.getLangOpts().CPlusPlus ||
@@ -3605,6 +3605,9 @@ ASTReader::ASTReadResult ASTReader::ReadAST(const std::string &FileName,
          Id != IdEnd; ++Id)
       Id->second->setOutOfDate(true);
   }
+  // Mark selectors as out of date.
+  for (auto Sel : SelectorGeneration)
+    SelectorOutOfDate[Sel.first] = true;
   
   // Resolve any unresolved module exports.
   for (unsigned I = 0, N = UnresolvedModuleRefs.size(); I != N; ++I) {
@@ -4052,7 +4055,9 @@ void ASTReader::InitializeContext() {
     if (Module *Imported = getSubmodule(Import.ID)) {
       makeModuleVisible(Imported, Module::AllVisible,
                         /*ImportLoc=*/Import.ImportLoc);
-      PP.makeModuleVisible(Imported, Import.ImportLoc);
+      if (Import.ImportLoc.isValid())
+        PP.makeModuleVisible(Imported, Import.ImportLoc);
+      // FIXME: should we tell Sema to make the module visible too?
     }
   }
   ImportedModules.clear();
@@ -4521,14 +4526,25 @@ ASTReader::ReadSubmoduleBlock(ModuleFile &F, unsigned ClientLoadCapabilities) {
       
       SubmodulesLoaded[GlobalIndex] = CurrentModule;
 
-      // Clear out data that will be replaced by what is the module file.
+      // Clear out data that will be replaced by what is in the module file.
       CurrentModule->LinkLibraries.clear();
       CurrentModule->ConfigMacros.clear();
       CurrentModule->UnresolvedConflicts.clear();
       CurrentModule->Conflicts.clear();
+
+      // The module is available unless it's missing a requirement; relevant
+      // requirements will be (re-)added by SUBMODULE_REQUIRES records.
+      // Missing headers that were present when the module was built do not
+      // make it unavailable -- if we got this far, this must be an explicitly
+      // imported module file.
+      CurrentModule->Requirements.clear();
+      CurrentModule->MissingHeaders.clear();
+      CurrentModule->IsMissingRequirement =
+          ParentModule && ParentModule->IsMissingRequirement;
+      CurrentModule->IsAvailable = !CurrentModule->IsMissingRequirement;
       break;
     }
-        
+
     case SUBMODULE_UMBRELLA_HEADER: {
       std::string Filename = Blob;
       ResolveImportedPath(F, Filename);
@@ -4878,8 +4894,8 @@ PreprocessedEntity *ASTReader::ReadPreprocessedEntity(unsigned Index) {
     return nullptr;
 
   // Read the record.
-  SourceRange Range(ReadSourceLocation(M, PPOffs.Begin),
-                    ReadSourceLocation(M, PPOffs.End));
+  SourceRange Range(TranslateSourceLocation(M, PPOffs.getBegin()),
+                    TranslateSourceLocation(M, PPOffs.getEnd()));
   PreprocessingRecord &PPRec = *PP.getPreprocessingRecord();
   StringRef Blob;
   RecordData Record;
@@ -4963,7 +4979,6 @@ PreprocessedEntityID ASTReader::findNextPreprocessedEntity(
 
 namespace {
 
-template <unsigned PPEntityOffset::*PPLoc>
 struct PPEntityComp {
   const ASTReader &Reader;
   ModuleFile &M;
@@ -4987,7 +5002,7 @@ struct PPEntityComp {
   }
 
   SourceLocation getLoc(const PPEntityOffset &PPE) const {
-    return Reader.ReadSourceLocation(M, PPE.*PPLoc);
+    return Reader.TranslateSourceLocation(M, PPE.getBegin());
   }
 };
 
@@ -5018,7 +5033,7 @@ PreprocessedEntityID ASTReader::findPreprocessedEntity(SourceLocation Loc,
 
   if (EndsAfter) {
     PPI = std::upper_bound(pp_begin, pp_end, Loc,
-                           PPEntityComp<&PPEntityOffset::Begin>(*this, M));
+                           PPEntityComp(*this, M));
   } else {
     // Do a binary search manually instead of using std::lower_bound because
     // The end locations of entities may be unordered (when a macro expansion
@@ -5028,8 +5043,8 @@ PreprocessedEntityID ASTReader::findPreprocessedEntity(SourceLocation Loc,
       Half = Count / 2;
       PPI = First;
       std::advance(PPI, Half);
-      if (SourceMgr.isBeforeInTranslationUnit(ReadSourceLocation(M, PPI->End),
-                                              Loc)) {
+      if (SourceMgr.isBeforeInTranslationUnit(
+              TranslateSourceLocation(M, PPI->getEnd()), Loc)) {
         First = PPI;
         ++First;
         Count = Count - Half - 1;
@@ -5070,7 +5085,7 @@ Optional<bool> ASTReader::isPreprocessedEntityInFileID(unsigned Index,
   unsigned LocalIndex = PPInfo.second;
   const PPEntityOffset &PPOffs = M.PreprocessedEntityOffsets[LocalIndex];
   
-  SourceLocation Loc = ReadSourceLocation(M, PPOffs.Begin);
+  SourceLocation Loc = TranslateSourceLocation(M, PPOffs.getBegin());
   if (Loc.isInvalid())
     return false;
   
@@ -5374,6 +5389,17 @@ QualType ASTReader::readTypeRecord(unsigned Index) {
     for (unsigned I = 0; I != NumParams; ++I)
       ParamTypes.push_back(readType(*Loc.F, Record, Idx));
 
+    SmallVector<FunctionProtoType::ExtParameterInfo, 4> ExtParameterInfos;
+    if (Idx != Record.size()) {
+      for (unsigned I = 0; I != NumParams; ++I)
+        ExtParameterInfos.push_back(
+          FunctionProtoType::ExtParameterInfo
+                           ::getFromOpaqueValue(Record[Idx++]));
+      EPI.ExtParameterInfos = ExtParameterInfos.data();
+    }
+
+    assert(Idx == Record.size());
+
     return Context.getFunctionType(ResultType, ParamTypes, EPI);
   }
 
@@ -5594,7 +5620,7 @@ QualType ASTReader::readTypeRecord(unsigned Index) {
     while (NumArgs--)
       Args.push_back(ReadTemplateArgument(*Loc.F, Record, Idx));
     return Context.getDependentTemplateSpecializationType(Keyword, NNS, Name,
-                                                      Args.size(), Args.data());
+                                                          Args);
   }
 
   case TYPE_DEPENDENT_SIZED_ARRAY: {
@@ -5623,11 +5649,9 @@ QualType ASTReader::readTypeRecord(unsigned Index) {
     QualType Underlying = readType(*Loc.F, Record, Idx);
     QualType T;
     if (Underlying.isNull())
-      T = Context.getCanonicalTemplateSpecializationType(Name, Args.data(),
-                                                          Args.size());
+      T = Context.getCanonicalTemplateSpecializationType(Name, Args);
     else
-      T = Context.getTemplateSpecializationType(Name, Args.data(),
-                                                 Args.size(), Underlying);
+      T = Context.getTemplateSpecializationType(Name, Args, Underlying);
     const_cast<Type*>(T.getTypePtr())->setDependent(IsDependent);
     return T;
   }
@@ -6013,6 +6037,9 @@ QualType ASTReader::GetType(TypeID ID) {
     case PREDEF_TYPE_LONGDOUBLE_ID:
       T = Context.LongDoubleTy;
       break;
+    case PREDEF_TYPE_FLOAT128_ID:
+      T = Context.Float128Ty;
+      break;
     case PREDEF_TYPE_OVERLOAD_ID:
       T = Context.OverloadTy;
       break;
@@ -6046,42 +6073,11 @@ QualType ASTReader::GetType(TypeID ID) {
     case PREDEF_TYPE_OBJC_SEL:
       T = Context.ObjCBuiltinSelTy;
       break;
-    case PREDEF_TYPE_IMAGE1D_ID:
-      T = Context.OCLImage1dTy;
-      break;
-    case PREDEF_TYPE_IMAGE1D_ARR_ID:
-      T = Context.OCLImage1dArrayTy;
-      break;
-    case PREDEF_TYPE_IMAGE1D_BUFF_ID:
-      T = Context.OCLImage1dBufferTy;
-      break;
-    case PREDEF_TYPE_IMAGE2D_ID:
-      T = Context.OCLImage2dTy;
-      break;
-    case PREDEF_TYPE_IMAGE2D_ARR_ID:
-      T = Context.OCLImage2dArrayTy;
-      break;
-    case PREDEF_TYPE_IMAGE2D_DEP_ID:
-      T = Context.OCLImage2dDepthTy;
-      break;
-    case PREDEF_TYPE_IMAGE2D_ARR_DEP_ID:
-      T = Context.OCLImage2dArrayDepthTy;
-      break;
-    case PREDEF_TYPE_IMAGE2D_MSAA_ID:
-      T = Context.OCLImage2dMSAATy;
-      break;
-    case PREDEF_TYPE_IMAGE2D_ARR_MSAA_ID:
-      T = Context.OCLImage2dArrayMSAATy;
-      break;
-    case PREDEF_TYPE_IMAGE2D_MSAA_DEP_ID:
-      T = Context.OCLImage2dMSAADepthTy;
-      break;
-    case PREDEF_TYPE_IMAGE2D_ARR_MSAA_DEPTH_ID:
-      T = Context.OCLImage2dArrayMSAADepthTy;
-      break;
-    case PREDEF_TYPE_IMAGE3D_ID:
-      T = Context.OCLImage3dTy;
+#define IMAGE_TYPE(ImgType, Id, SingletonId, Access, Suffix) \
+    case PREDEF_TYPE_##Id##_ID: \
+      T = Context.SingletonId; \
       break;
+#include "clang/Basic/OpenCLImageTypes.def"
     case PREDEF_TYPE_SAMPLER_ID:
       T = Context.OCLSamplerTy;
       break;
@@ -6285,18 +6281,6 @@ void ASTReader::CompleteRedeclChain(const Decl *D) {
   }
 }
 
-uint64_t ASTReader::ReadCXXCtorInitializersRef(ModuleFile &M,
-                                               const RecordData &Record,
-                                               unsigned &Idx) {
-  if (Idx >= Record.size() || Record[Idx] > M.LocalNumCXXCtorInitializers) {
-    Error("malformed AST file: missing C++ ctor initializers");
-    return 0;
-  }
-
-  unsigned LocalID = Record[Idx++];
-  return getGlobalBitOffset(M, M.CXXCtorInitializersOffsets[LocalID - 1]);
-}
-
 CXXCtorInitializer **
 ASTReader::GetExternalCXXCtorInitializers(uint64_t Offset) {
   RecordLocation Loc = getLocalBitOffset(Offset);
@@ -6317,18 +6301,6 @@ ASTReader::GetExternalCXXCtorInitializers(uint64_t Offset) {
   return ReadCXXCtorInitializers(*Loc.F, Record, Idx);
 }
 
-uint64_t ASTReader::readCXXBaseSpecifiers(ModuleFile &M,
-                                          const RecordData &Record,
-                                          unsigned &Idx) {
-  if (Idx >= Record.size() || Record[Idx] > M.LocalNumCXXBaseSpecifiers) {
-    Error("malformed AST file: missing C++ base specifier");
-    return 0;
-  }
-
-  unsigned LocalID = Record[Idx++];
-  return getGlobalBitOffset(M, M.CXXBaseSpecifiersOffsets[LocalID - 1]);
-}
-
 CXXBaseSpecifier *ASTReader::GetExternalCXXBaseSpecifiers(uint64_t Offset) {
   RecordLocation Loc = getLocalBitOffset(Offset);
   BitstreamCursor &Cursor = Loc.F->DeclsCursor;
@@ -6396,9 +6368,9 @@ SourceLocation ASTReader::getSourceLocationForDeclID(GlobalDeclID ID) {
   if (Decl *D = DeclsLoaded[Index])
     return D->getLocation();
 
-  unsigned RawLocation = 0;
-  RecordLocation Rec = DeclCursorForID(ID, RawLocation);
-  return ReadSourceLocation(*Rec.F, RawLocation);
+  SourceLocation Loc;
+  DeclCursorForID(ID, Loc);
+  return Loc;
 }
 
 static Decl *getPredefinedDecl(ASTContext &Context, PredefinedDeclIDs ID) {
@@ -6444,6 +6416,15 @@ static Decl *getPredefinedDecl(ASTContext &Context, PredefinedDeclIDs ID) {
 
   case PREDEF_DECL_MAKE_INTEGER_SEQ_ID:
     return Context.getMakeIntegerSeqDecl();
+
+  case PREDEF_DECL_CF_CONSTANT_STRING_ID:
+    return Context.getCFConstantStringDecl();
+
+  case PREDEF_DECL_CF_CONSTANT_STRING_TAG_ID:
+    return Context.getCFConstantStringTagDecl();
+
+  case PREDEF_DECL_TYPE_PACK_ELEMENT_ID:
+    return Context.getTypePackElementDecl();
   }
   llvm_unreachable("PredefinedDeclIDs unknown enum value");
 }
@@ -6883,7 +6864,7 @@ dumpModuleIDMap(StringRef Name,
   }
 }
 
-void ASTReader::dump() {
+LLVM_DUMP_METHOD void ASTReader::dump() {
   llvm::errs() << "*** PCH/ModuleFile Remappings:\n";
   dumpModuleIDMap("Global bit offset map", GlobalBitOffsetsMap);
   dumpModuleIDMap("Global source location entry map", GlobalSLocEntryMap);
@@ -6968,10 +6949,18 @@ void ASTReader::UpdateSema() {
     SemaDeclRefs.clear();
   }
 
-  // Update the state of 'pragma clang optimize'. Use the same API as if we had
-  // encountered the pragma in the source.
+  // Update the state of pragmas. Use the same API as if we had encountered the
+  // pragma in the source.
   if(OptimizeOffPragmaLocation.isValid())
     SemaObj->ActOnPragmaOptimize(/* IsOn = */ false, OptimizeOffPragmaLocation);
+  if (PragmaMSStructState != -1)
+    SemaObj->ActOnPragmaMSStruct((PragmaMSStructKind)PragmaMSStructState);
+  if (PointersToMembersPragmaLocation.isValid()) {
+    SemaObj->ActOnPragmaMSPointersToMembers(
+        (LangOptions::PragmaMSPointersToMembersKind)
+            PragmaMSPointersToMembersState,
+        PointersToMembersPragmaLocation);
+  }
 }
 
 IdentifierInfo *ASTReader::get(StringRef Name) {
@@ -7028,19 +7017,20 @@ namespace clang {
     /// the current AST file.
     ASTIdentifierLookupTable::key_iterator End;
 
+    /// \brief Whether to skip any modules in the ASTReader.
+    bool SkipModules;
+
   public:
-    explicit ASTIdentifierIterator(const ASTReader &Reader);
+    explicit ASTIdentifierIterator(const ASTReader &Reader,
+                                   bool SkipModules = false);
 
     StringRef Next() override;
   };
 }
 
-ASTIdentifierIterator::ASTIdentifierIterator(const ASTReader &Reader)
-  : Reader(Reader), Index(Reader.ModuleMgr.size() - 1) {
-  ASTIdentifierLookupTable *IdTable
-    = (ASTIdentifierLookupTable *)Reader.ModuleMgr[Index].IdentifierLookupTable;
-  Current = IdTable->key_begin();
-  End = IdTable->key_end();
+ASTIdentifierIterator::ASTIdentifierIterator(const ASTReader &Reader,
+                                             bool SkipModules)
+    : Reader(Reader), Index(Reader.ModuleMgr.size()), SkipModules(SkipModules) {
 }
 
 StringRef ASTIdentifierIterator::Next() {
@@ -7050,9 +7040,12 @@ StringRef ASTIdentifierIterator::Next() {
       return StringRef();
 
     --Index;
-    ASTIdentifierLookupTable *IdTable
-      = (ASTIdentifierLookupTable *)Reader.ModuleMgr[Index].
-        IdentifierLookupTable;
+    ModuleFile &F = Reader.ModuleMgr[Index];
+    if (SkipModules && F.isModule())
+      continue;
+
+    ASTIdentifierLookupTable *IdTable =
+        (ASTIdentifierLookupTable *)F.IdentifierLookupTable;
     Current = IdTable->key_begin();
     End = IdTable->key_end();
   }
@@ -7064,9 +7057,42 @@ StringRef ASTIdentifierIterator::Next() {
   return Result;
 }
 
+namespace {
+/// A utility for appending two IdentifierIterators.
+class ChainedIdentifierIterator : public IdentifierIterator {
+  std::unique_ptr<IdentifierIterator> Current;
+  std::unique_ptr<IdentifierIterator> Queued;
+
+public:
+  ChainedIdentifierIterator(std::unique_ptr<IdentifierIterator> First,
+                            std::unique_ptr<IdentifierIterator> Second)
+      : Current(std::move(First)), Queued(std::move(Second)) {}
+
+  StringRef Next() override {
+    if (!Current)
+      return StringRef();
+
+    StringRef result = Current->Next();
+    if (!result.empty())
+      return result;
+
+    // Try the queued iterator, which may itself be empty.
+    Current.reset();
+    std::swap(Current, Queued);
+    return Next();
+  }
+};
+} // end anonymous namespace.
+
 IdentifierIterator *ASTReader::getIdentifiers() {
-  if (!loadGlobalIndex())
-    return GlobalIndex->createIdentifierIterator();
+  if (!loadGlobalIndex()) {
+    std::unique_ptr<IdentifierIterator> ReaderIter(
+        new ASTIdentifierIterator(*this, /*SkipModules=*/true));
+    std::unique_ptr<IdentifierIterator> ModulesIter(
+        GlobalIndex->createIdentifierIterator());
+    return new ChainedIdentifierIterator(std::move(ReaderIter),
+                                         std::move(ModulesIter));
+  }
 
   return new ASTIdentifierIterator(*this);
 }
@@ -7156,6 +7182,7 @@ void ASTReader::ReadMethodPool(Selector Sel) {
   unsigned &Generation = SelectorGeneration[Sel];
   unsigned PriorGeneration = Generation;
   Generation = getGeneration();
+  SelectorOutOfDate[Sel] = false;
   
   // Search for methods defined with this selector.
   ++NumMethodPoolLookups;
@@ -7187,6 +7214,11 @@ void ASTReader::ReadMethodPool(Selector Sel) {
   addMethodsToPool(S, Visitor.getFactoryMethods(), Pos->second.second);
 }
 
+void ASTReader::updateOutOfDateSelector(Selector Sel) {
+  if (SelectorOutOfDate[Sel])
+    ReadMethodPool(Sel);
+}
+
 void ASTReader::ReadKnownNamespaces(
                           SmallVectorImpl<NamespaceDecl *> &Namespaces) {
   Namespaces.clear();
@@ -7199,7 +7231,7 @@ void ASTReader::ReadKnownNamespaces(
 }
 
 void ASTReader::ReadUndefinedButUsed(
-                        llvm::DenseMap<NamedDecl*, SourceLocation> &Undefined) {
+    llvm::MapVector<NamedDecl *, SourceLocation> &Undefined) {
   for (unsigned Idx = 0, N = UndefinedButUsed.size(); Idx != N;) {
     NamedDecl *D = cast<NamedDecl>(GetDecl(UndefinedButUsed[Idx++]));
     SourceLocation Loc =
@@ -7449,10 +7481,11 @@ IdentifierInfo *ASTReader::DecodeIdentifierInfo(IdentifierID ID) {
     const unsigned char *StrLenPtr = (const unsigned char*) Str - 2;
     unsigned StrLen = (((unsigned) StrLenPtr[0])
                        | (((unsigned) StrLenPtr[1]) << 8)) - 1;
-    IdentifiersLoaded[ID]
-      = &PP.getIdentifierTable().get(StringRef(Str, StrLen));
+    auto &II = PP.getIdentifierTable().get(StringRef(Str, StrLen));
+    IdentifiersLoaded[ID] = &II;
+    markIdentifierFromAST(*this,  II);
     if (DeserializationListener)
-      DeserializationListener->IdentifierRead(ID + 1, IdentifiersLoaded[ID]);
+      DeserializationListener->IdentifierRead(ID + 1, &II);
   }
 
   return IdentifiersLoaded[ID];
@@ -7581,8 +7614,10 @@ ASTReader::getSourceDescriptor(unsigned ID) {
   // Chained PCH are not suported.
   if (ModuleMgr.size() == 1) {
     ModuleFile &MF = ModuleMgr.getPrimaryModule();
-    return ASTReader::ASTSourceDescriptor(
-        MF.OriginalSourceFileName, MF.OriginalDir, MF.FileName, MF.Signature);
+    StringRef ModuleName = llvm::sys::path::filename(MF.OriginalSourceFileName);
+    StringRef FileName = llvm::sys::path::filename(MF.FileName);
+    return ASTReader::ASTSourceDescriptor(ModuleName, MF.OriginalDir, FileName,
+                                          MF.Signature);
   }
   return None;
 }
@@ -8602,6 +8637,7 @@ void ASTReader::FinishedDeserializing() {
       auto Updates = std::move(PendingExceptionSpecUpdates);
       PendingExceptionSpecUpdates.clear();
       for (auto Update : Updates) {
+        ProcessingUpdatesRAIIObj ProcessingUpdates(*this);
         auto *FPT = Update.second->getType()->castAs<FunctionProtoType>();
         auto ESI = FPT->getExtProtoInfo().ExceptionSpec;
         if (auto *Listener = Context.getASTMutationListener())
@@ -8663,12 +8699,16 @@ ASTReader::ASTReader(
       FileMgr(PP.getFileManager()), PCHContainerRdr(PCHContainerRdr),
       Diags(PP.getDiagnostics()), SemaObj(nullptr), PP(PP), Context(Context),
       Consumer(nullptr), ModuleMgr(PP.getFileManager(), PCHContainerRdr),
+      DummyIdResolver(PP),
       ReadTimer(std::move(ReadTimer)),
+      PragmaMSStructState(-1),
+      PragmaMSPointersToMembersState(-1),
       isysroot(isysroot), DisableValidation(DisableValidation),
       AllowASTWithCompilerErrors(AllowASTWithCompilerErrors),
       AllowConfigurationMismatch(AllowConfigurationMismatch),
       ValidateSystemInputs(ValidateSystemInputs),
       UseGlobalIndex(UseGlobalIndex), TriedLoadingGlobalIndex(false),
+      ProcessingUpdateRecords(false),
       CurrSwitchCaseStmts(&SwitchCaseStmts), NumSLocEntriesRead(0),
       TotalNumSLocEntries(0), NumStatementsRead(0), TotalNumStatements(0),
       NumMacrosRead(0), TotalNumMacros(0), NumIdentifierLookups(0),
@@ -8699,3 +8739,7 @@ ASTReader::~ASTReader() {
   if (OwnsDeserializationListener)
     delete DeserializationListener;
 }
+
+IdentifierResolver &ASTReader::getIdResolver() {
+  return SemaObj ? SemaObj->IdResolver : DummyIdResolver;
+}
diff --git a/contrib/llvm/tools/clang/lib/Serialization/ASTReaderDecl.cpp b/contrib/llvm/tools/clang/lib/Serialization/ASTReaderDecl.cpp
index 5bf95f878d49..35da8f3ebcfe 100644
--- a/contrib/llvm/tools/clang/lib/Serialization/ASTReaderDecl.cpp
+++ b/contrib/llvm/tools/clang/lib/Serialization/ASTReaderDecl.cpp
@@ -23,7 +23,6 @@
 #include "clang/AST/DeclVisitor.h"
 #include "clang/AST/Expr.h"
 #include "clang/Sema/IdentifierResolver.h"
-#include "clang/Sema/Sema.h"
 #include "clang/Sema/SemaDiagnostic.h"
 #include "llvm/Support/SaveAndRestore.h"
 
@@ -38,8 +37,9 @@ namespace clang {
   class ASTDeclReader : public DeclVisitor<ASTDeclReader, void> {
     ASTReader &Reader;
     ModuleFile &F;
+    uint64_t Offset;
     const DeclID ThisDeclID;
-    const unsigned RawLocation;
+    const SourceLocation ThisDeclLoc;
     typedef ASTReader::RecordData RecordData;
     const RecordData &Record;
     unsigned &Idx;
@@ -47,27 +47,47 @@ namespace clang {
     unsigned AnonymousDeclNumber;
     GlobalDeclID NamedDeclForTagDecl;
     IdentifierInfo *TypedefNameForLinkage;
-    
+
     bool HasPendingBody;
 
+    ///\brief A flag to carry the information for a decl from the entity is
+    /// used. We use it to delay the marking of the canonical decl as used until
+    /// the entire declaration is deserialized and merged.
+    bool IsDeclMarkedUsed;
+
     uint64_t GetCurrentCursorOffset();
-    
+
+    uint64_t ReadLocalOffset(const RecordData &R, unsigned &I) {
+      uint64_t LocalOffset = R[I++];
+      assert(LocalOffset < Offset && "offset point after current record");
+      return LocalOffset ? Offset - LocalOffset : 0;
+    }
+
+    uint64_t ReadGlobalOffset(ModuleFile &F, const RecordData &R, unsigned &I) {
+      uint64_t Local = ReadLocalOffset(R, I);
+      return Local ? Reader.getGlobalBitOffset(F, Local) : 0;
+    }
+
     SourceLocation ReadSourceLocation(const RecordData &R, unsigned &I) {
       return Reader.ReadSourceLocation(F, R, I);
     }
-    
+
     SourceRange ReadSourceRange(const RecordData &R, unsigned &I) {
       return Reader.ReadSourceRange(F, R, I);
     }
-    
+
     TypeSourceInfo *GetTypeSourceInfo(const RecordData &R, unsigned &I) {
       return Reader.GetTypeSourceInfo(F, R, I);
     }
-    
+
     serialization::DeclID ReadDeclID(const RecordData &R, unsigned &I) {
       return Reader.ReadDeclID(F, R, I);
     }
 
+    std::string ReadString(const RecordData &R, unsigned &I) {
+      return Reader.ReadString(R, I);
+    }
+
     void ReadDeclIDList(SmallVectorImpl<DeclID> &IDs) {
       for (unsigned I = 0, Size = Record[Idx++]; I != Size; ++I)
         IDs.push_back(ReadDeclID(Record, Idx));
@@ -195,12 +215,14 @@ namespace clang {
     FindExistingResult findExisting(NamedDecl *D);
 
   public:
-    ASTDeclReader(ASTReader &Reader, ModuleFile &F, DeclID thisDeclID,
-                  unsigned RawLocation, const RecordData &Record, unsigned &Idx)
-        : Reader(Reader), F(F), ThisDeclID(thisDeclID),
-          RawLocation(RawLocation), Record(Record), Idx(Idx),
+    ASTDeclReader(ASTReader &Reader, ASTReader::RecordLocation Loc,
+                  DeclID thisDeclID, SourceLocation ThisDeclLoc,
+                  const RecordData &Record, unsigned &Idx)
+        : Reader(Reader), F(*Loc.F), Offset(Loc.Offset), ThisDeclID(thisDeclID),
+          ThisDeclLoc(ThisDeclLoc), Record(Record), Idx(Idx),
           TypeIDForTypeDecl(0), NamedDeclForTagDecl(0),
-          TypedefNameForLinkage(nullptr), HasPendingBody(false) {}
+          TypedefNameForLinkage(nullptr), HasPendingBody(false),
+          IsDeclMarkedUsed(false) {}
 
     template <typename DeclT>
     static Decl *getMostRecentDeclImpl(Redeclarable<DeclT> *D);
@@ -238,6 +260,8 @@ namespace clang {
     }
 
     void VisitDecl(Decl *D);
+    void VisitPragmaCommentDecl(PragmaCommentDecl *D);
+    void VisitPragmaDetectMismatchDecl(PragmaDetectMismatchDecl *D);
     void VisitTranslationUnitDecl(TranslationUnitDecl *TU);
     void VisitNamedDecl(NamedDecl *ND);
     void VisitLabelDecl(LabelDecl *LD);
@@ -300,6 +324,7 @@ namespace clang {
     void VisitTypeAliasTemplateDecl(TypeAliasTemplateDecl *D);
     void VisitUsingDecl(UsingDecl *D);
     void VisitUsingShadowDecl(UsingShadowDecl *D);
+    void VisitConstructorUsingShadowDecl(ConstructorUsingShadowDecl *D);
     void VisitLinkageSpecDecl(LinkageSpecDecl *D);
     void VisitFileScopeAsmDecl(FileScopeAsmDecl *AD);
     void VisitImportDecl(ImportDecl *D);
@@ -350,6 +375,8 @@ namespace clang {
     void VisitObjCPropertyDecl(ObjCPropertyDecl *D);
     void VisitObjCPropertyImplDecl(ObjCPropertyImplDecl *D);
     void VisitOMPThreadPrivateDecl(OMPThreadPrivateDecl *D);
+    void VisitOMPDeclareReductionDecl(OMPDeclareReductionDecl *D);
+    void VisitOMPCapturedExprDecl(OMPCapturedExprDecl *D);
 
     /// We've merged the definition \p MergedDef into the existing definition
     /// \p Def. Ensure that \p Def is made visible whenever \p MergedDef is made
@@ -423,6 +450,11 @@ uint64_t ASTDeclReader::GetCurrentCursorOffset() {
 void ASTDeclReader::Visit(Decl *D) {
   DeclVisitor<ASTDeclReader, void>::Visit(D);
 
+  // At this point we have deserialized and merged the decl and it is safe to
+  // update its canonical decl to signal that the entire entity is used.
+  D->getCanonicalDecl()->Used |= IsDeclMarkedUsed;
+  IsDeclMarkedUsed = false;
+
   if (DeclaratorDecl *DD = dyn_cast<DeclaratorDecl>(D)) {
     if (DD->DeclInfo) {
       DeclaratorDecl::ExtInfo *Info =
@@ -456,8 +488,7 @@ void ASTDeclReader::Visit(Decl *D) {
       if (auto *CD = dyn_cast<CXXConstructorDecl>(FD)) {
         CD->NumCtorInitializers = Record[Idx++];
         if (CD->NumCtorInitializers)
-          CD->CtorInitializers =
-              Reader.ReadCXXCtorInitializersRef(F, Record, Idx);
+          CD->CtorInitializers = ReadGlobalOffset(F, Record, Idx);
       }
       Reader.PendingBodies[FD] = GetCurrentCursorOffset();
       HasPendingBody = true;
@@ -493,7 +524,7 @@ void ASTDeclReader::VisitDecl(Decl *D) {
     D->setDeclContextsImpl(MergedSemaDC ? MergedSemaDC : SemaDC, LexicalDC,
                            Reader.getContext());
   }
-  D->setLocation(Reader.ReadSourceLocation(F, RawLocation));
+  D->setLocation(ThisDeclLoc);
   D->setInvalidDecl(Record[Idx++]);
   if (Record[Idx++]) { // hasAttrs
     AttrVec Attrs;
@@ -504,6 +535,7 @@ void ASTDeclReader::VisitDecl(Decl *D) {
   }
   D->setImplicit(Record[Idx++]);
   D->Used = Record[Idx++];
+  IsDeclMarkedUsed |= D->Used;
   D->setReferenced(Record[Idx++]);
   D->setTopLevelDeclInObjCContainer(Record[Idx++]);
   D->setAccess((AccessSpecifier)Record[Idx++]);
@@ -528,7 +560,7 @@ void ASTDeclReader::VisitDecl(Decl *D) {
       if (Owner->NameVisibility != Module::AllVisible) {
         // The owning module is not visible. Mark this declaration as hidden.
         D->Hidden = true;
-        
+
         // Note that this declaration was hidden because its owning module is 
         // not yet visible.
         Reader.HiddenNamesMap[Owner].push_back(D);
@@ -537,6 +569,29 @@ void ASTDeclReader::VisitDecl(Decl *D) {
   }
 }
 
+void ASTDeclReader::VisitPragmaCommentDecl(PragmaCommentDecl *D) {
+  VisitDecl(D);
+  D->setLocation(ReadSourceLocation(Record, Idx));
+  D->CommentKind = (PragmaMSCommentKind)Record[Idx++];
+  std::string Arg = ReadString(Record, Idx);
+  memcpy(D->getTrailingObjects<char>(), Arg.data(), Arg.size());
+  D->getTrailingObjects<char>()[Arg.size()] = '\0';
+}
+
+void ASTDeclReader::VisitPragmaDetectMismatchDecl(PragmaDetectMismatchDecl *D) {
+  VisitDecl(D);
+  D->setLocation(ReadSourceLocation(Record, Idx));
+  std::string Name = ReadString(Record, Idx);
+  memcpy(D->getTrailingObjects<char>(), Name.data(), Name.size());
+  D->getTrailingObjects<char>()[Name.size()] = '\0';
+
+  D->ValueStart = Name.size() + 1;
+  std::string Value = ReadString(Record, Idx);
+  memcpy(D->getTrailingObjects<char>() + D->ValueStart, Value.data(),
+         Value.size());
+  D->getTrailingObjects<char>()[D->ValueStart + Value.size()] = '\0';
+}
+
 void ASTDeclReader::VisitTranslationUnitDecl(TranslationUnitDecl *TU) {
   llvm_unreachable("Translation units are not serialized");
 }
@@ -592,7 +647,7 @@ ASTDeclReader::RedeclarableResult ASTDeclReader::VisitTagDecl(TagDecl *TD) {
   TD->setEmbeddedInDeclarator(Record[Idx++]);
   TD->setFreeStanding(Record[Idx++]);
   TD->setCompleteDefinitionRequired(Record[Idx++]);
-  TD->setRBraceLoc(ReadSourceLocation(Record, Idx));
+  TD->setBraceRange(ReadSourceRange(Record, Idx));
   
   switch (Record[Idx++]) {
   case 0:
@@ -771,7 +826,7 @@ void ASTDeclReader::VisitFunctionDecl(FunctionDecl *FD) {
 
     ASTContext &C = Reader.getContext();
     TemplateArgumentList *TemplArgList
-      = TemplateArgumentList::CreateCopy(C, TemplArgs.data(), TemplArgs.size());
+      = TemplateArgumentList::CreateCopy(C, TemplArgs);
     TemplateArgumentListInfo TemplArgsInfo(LAngleLoc, RAngleLoc);
     for (unsigned i=0, e = TemplArgLocs.size(); i != e; ++i)
       TemplArgsInfo.addArgument(TemplArgLocs[i]);
@@ -1098,7 +1153,7 @@ void ASTDeclReader::VisitObjCImplementationDecl(ObjCImplementationDecl *D) {
   D->setHasDestructors(Record[Idx++]);
   D->NumIvarInitializers = Record[Idx++];
   if (D->NumIvarInitializers)
-    D->IvarInitializers = Reader.ReadCXXCtorInitializersRef(F, Record, Idx);
+    D->IvarInitializers = ReadGlobalOffset(F, Record, Idx);
 }
 
 void ASTDeclReader::VisitObjCPropertyImplDecl(ObjCPropertyImplDecl *D) {
@@ -1163,6 +1218,8 @@ ASTDeclReader::RedeclarableResult ASTDeclReader::VisitVarDeclImpl(VarDecl *VD) {
     VD->NonParmVarDeclBits.NRVOVariable = Record[Idx++];
     VD->NonParmVarDeclBits.CXXForRangeDecl = Record[Idx++];
     VD->NonParmVarDeclBits.ARCPseudoStrong = Record[Idx++];
+    VD->NonParmVarDeclBits.IsInline = Record[Idx++];
+    VD->NonParmVarDeclBits.IsInlineSpecified = Record[Idx++];
     VD->NonParmVarDeclBits.IsConstexpr = Record[Idx++];
     VD->NonParmVarDeclBits.IsInitCapture = Record[Idx++];
     VD->NonParmVarDeclBits.PreviousDeclInSameBlockScope = Record[Idx++];
@@ -1365,6 +1422,16 @@ void ASTDeclReader::VisitUsingShadowDecl(UsingShadowDecl *D) {
   mergeRedeclarable(D, Redecl);
 }
 
+void ASTDeclReader::VisitConstructorUsingShadowDecl(
+    ConstructorUsingShadowDecl *D) {
+  VisitUsingShadowDecl(D);
+  D->NominatedBaseClassShadowDecl =
+      ReadDeclAs<ConstructorUsingShadowDecl>(Record, Idx);
+  D->ConstructedBaseClassShadowDecl =
+      ReadDeclAs<ConstructorUsingShadowDecl>(Record, Idx);
+  D->IsVirtual = Record[Idx++];
+}
+
 void ASTDeclReader::VisitUsingDirectiveDecl(UsingDirectiveDecl *D) {
   VisitNamedDecl(D);
   D->UsingLoc = ReadSourceLocation(Record, Idx);
@@ -1411,6 +1478,9 @@ void ASTDeclReader::ReadCXXDefinitionData(
   Data.HasOnlyCMembers = Record[Idx++];
   Data.HasInClassInitializer = Record[Idx++];
   Data.HasUninitializedReferenceMember = Record[Idx++];
+  Data.HasUninitializedFields = Record[Idx++];
+  Data.HasInheritedConstructor = Record[Idx++];
+  Data.HasInheritedAssignment = Record[Idx++];
   Data.NeedOverloadResolutionForMoveConstructor = Record[Idx++];
   Data.NeedOverloadResolutionForMoveAssignment = Record[Idx++];
   Data.NeedOverloadResolutionForDestructor = Record[Idx++];
@@ -1421,6 +1491,7 @@ void ASTDeclReader::ReadCXXDefinitionData(
   Data.DeclaredNonTrivialSpecialMembers = Record[Idx++];
   Data.HasIrrelevantDestructor = Record[Idx++];
   Data.HasConstexprNonCopyMoveConstructor = Record[Idx++];
+  Data.HasDefaultedDefaultConstructor = Record[Idx++];
   Data.DefaultedDefaultConstructorIsConstexpr = Record[Idx++];
   Data.HasConstexprDefaultConstructor = Record[Idx++];
   Data.HasNonLiteralTypeFieldsOrBases = Record[Idx++];
@@ -1434,10 +1505,10 @@ void ASTDeclReader::ReadCXXDefinitionData(
 
   Data.NumBases = Record[Idx++];
   if (Data.NumBases)
-    Data.Bases = Reader.readCXXBaseSpecifiers(F, Record, Idx);
+    Data.Bases = ReadGlobalOffset(F, Record, Idx);
   Data.NumVBases = Record[Idx++];
   if (Data.NumVBases)
-    Data.VBases = Reader.readCXXBaseSpecifiers(F, Record, Idx);
+    Data.VBases = ReadGlobalOffset(F, Record, Idx);
   
   Reader.ReadUnresolvedSet(F, Data.Conversions, Record, Idx);
   Reader.ReadUnresolvedSet(F, Data.VisibleConversions, Record, Idx);
@@ -1464,6 +1535,7 @@ void ASTDeclReader::ReadCXXDefinitionData(
       bool IsImplicit = Record[Idx++];
       LambdaCaptureKind Kind = static_cast<LambdaCaptureKind>(Record[Idx++]);
       switch (Kind) {
+      case LCK_StarThis: 
       case LCK_This:
       case LCK_VLAType:
         *ToCapture++ = Capture(Loc, IsImplicit, Kind, nullptr,SourceLocation());
@@ -1481,9 +1553,9 @@ void ASTDeclReader::ReadCXXDefinitionData(
 
 void ASTDeclReader::MergeDefinitionData(
     CXXRecordDecl *D, struct CXXRecordDecl::DefinitionData &&MergeDD) {
-  assert(D->DefinitionData.getNotUpdated() &&
+  assert(D->DefinitionData &&
          "merging class definition into non-definition");
-  auto &DD = *D->DefinitionData.getNotUpdated();
+  auto &DD = *D->DefinitionData;
 
   if (DD.Definition != MergeDD.Definition) {
     // Track that we merged the definitions.
@@ -1535,6 +1607,9 @@ void ASTDeclReader::MergeDefinitionData(
   MATCH_FIELD(HasOnlyCMembers)
   MATCH_FIELD(HasInClassInitializer)
   MATCH_FIELD(HasUninitializedReferenceMember)
+  MATCH_FIELD(HasUninitializedFields)
+  MATCH_FIELD(HasInheritedConstructor)
+  MATCH_FIELD(HasInheritedAssignment)
   MATCH_FIELD(NeedOverloadResolutionForMoveConstructor)
   MATCH_FIELD(NeedOverloadResolutionForMoveAssignment)
   MATCH_FIELD(NeedOverloadResolutionForDestructor)
@@ -1545,6 +1620,7 @@ void ASTDeclReader::MergeDefinitionData(
   OR_FIELD(DeclaredNonTrivialSpecialMembers)
   MATCH_FIELD(HasIrrelevantDestructor)
   OR_FIELD(HasConstexprNonCopyMoveConstructor)
+  OR_FIELD(HasDefaultedDefaultConstructor)
   MATCH_FIELD(DefaultedDefaultConstructorIsConstexpr)
   OR_FIELD(HasConstexprDefaultConstructor)
   MATCH_FIELD(HasNonLiteralTypeFieldsOrBases)
@@ -1602,7 +1678,7 @@ void ASTDeclReader::ReadCXXRecordDefinition(CXXRecordDecl *D, bool Update) {
   // because we're reading an update record, or because we've already done some
   // merging. Either way, just merge into it.
   CXXRecordDecl *Canon = D->getCanonicalDecl();
-  if (Canon->DefinitionData.getNotUpdated()) {
+  if (Canon->DefinitionData) {
     MergeDefinitionData(Canon, std::move(*DD));
     D->DefinitionData = Canon->DefinitionData;
     return;
@@ -1703,11 +1779,17 @@ void ASTDeclReader::VisitCXXMethodDecl(CXXMethodDecl *D) {
 }
 
 void ASTDeclReader::VisitCXXConstructorDecl(CXXConstructorDecl *D) {
+  // We need the inherited constructor information to merge the declaration,
+  // so we have to read it before we call VisitCXXMethodDecl.
+  if (D->isInheritingConstructor()) {
+    auto *Shadow = ReadDeclAs<ConstructorUsingShadowDecl>(Record, Idx);
+    auto *Ctor = ReadDeclAs<CXXConstructorDecl>(Record, Idx);
+    *D->getTrailingObjects<InheritedConstructor>() =
+        InheritedConstructor(Shadow, Ctor);
+  }
+
   VisitCXXMethodDecl(D);
 
-  if (auto *CD = ReadDeclAs<CXXConstructorDecl>(Record, Idx))
-    if (D->isCanonicalDecl())
-      D->setInheritedConstructor(CD->getCanonicalDecl());
   D->IsExplicitSpecified = Record[Idx++];
 }
 
@@ -1898,8 +1980,7 @@ ASTDeclReader::VisitClassTemplateSpecializationDeclImpl(
       SmallVector<TemplateArgument, 8> TemplArgs;
       Reader.ReadTemplateArgumentList(TemplArgs, F, Record, Idx);
       TemplateArgumentList *ArgList
-        = TemplateArgumentList::CreateCopy(C, TemplArgs.data(), 
-                                           TemplArgs.size());
+        = TemplateArgumentList::CreateCopy(C, TemplArgs);
       ClassTemplateSpecializationDecl::SpecializedPartialSpecialization *PS
           = new (C) ClassTemplateSpecializationDecl::
                                              SpecializedPartialSpecialization();
@@ -1913,8 +1994,7 @@ ASTDeclReader::VisitClassTemplateSpecializationDeclImpl(
   SmallVector<TemplateArgument, 8> TemplArgs;
   Reader.ReadTemplateArgumentList(TemplArgs, F, Record, Idx,
                                   /*Canonicalize*/ true);
-  D->TemplateArgs = TemplateArgumentList::CreateCopy(C, TemplArgs.data(), 
-                                                     TemplArgs.size());
+  D->TemplateArgs = TemplateArgumentList::CreateCopy(C, TemplArgs);
   D->PointOfInstantiation = ReadSourceLocation(Record, Idx);
   D->SpecializationKind = (TemplateSpecializationKind)Record[Idx++];
 
@@ -1938,8 +2018,8 @@ ASTDeclReader::VisitClassTemplateSpecializationDeclImpl(
 
         // This declaration might be a definition. Merge with any existing
         // definition.
-        if (auto *DDD = D->DefinitionData.getNotUpdated()) {
-          if (CanonSpec->DefinitionData.getNotUpdated())
+        if (auto *DDD = D->DefinitionData) {
+          if (CanonSpec->DefinitionData)
             MergeDefinitionData(CanonSpec, std::move(*DDD));
           else
             CanonSpec->DefinitionData = D->DefinitionData;
@@ -2017,7 +2097,7 @@ ASTDeclReader::VisitVarTemplateSpecializationDeclImpl(
       SmallVector<TemplateArgument, 8> TemplArgs;
       Reader.ReadTemplateArgumentList(TemplArgs, F, Record, Idx);
       TemplateArgumentList *ArgList = TemplateArgumentList::CreateCopy(
-          C, TemplArgs.data(), TemplArgs.size());
+          C, TemplArgs);
       VarTemplateSpecializationDecl::SpecializedPartialSpecialization *PS =
           new (C)
           VarTemplateSpecializationDecl::SpecializedPartialSpecialization();
@@ -2041,8 +2121,7 @@ ASTDeclReader::VisitVarTemplateSpecializationDeclImpl(
   SmallVector<TemplateArgument, 8> TemplArgs;
   Reader.ReadTemplateArgumentList(TemplArgs, F, Record, Idx,
                                   /*Canonicalize*/ true);
-  D->TemplateArgs =
-      TemplateArgumentList::CreateCopy(C, TemplArgs.data(), TemplArgs.size());
+  D->TemplateArgs = TemplateArgumentList::CreateCopy(C, TemplArgs);
   D->PointOfInstantiation = ReadSourceLocation(Record, Idx);
   D->SpecializationKind = (TemplateSpecializationKind)Record[Idx++];
 
@@ -2141,7 +2220,7 @@ void ASTDeclReader::VisitStaticAssertDecl(StaticAssertDecl *D) {
   VisitDecl(D);
   D->AssertExprAndFailed.setPointer(Reader.ReadExpr(F));
   D->AssertExprAndFailed.setInt(Record[Idx++]);
-  D->Message = cast<StringLiteral>(Reader.ReadExpr(F));
+  D->Message = cast_or_null<StringLiteral>(Reader.ReadExpr(F));
   D->RParenLoc = ReadSourceLocation(Record, Idx);
 }
 
@@ -2151,8 +2230,8 @@ void ASTDeclReader::VisitEmptyDecl(EmptyDecl *D) {
 
 std::pair<uint64_t, uint64_t>
 ASTDeclReader::VisitDeclContext(DeclContext *DC) {
-  uint64_t LexicalOffset = Record[Idx++];
-  uint64_t VisibleOffset = Record[Idx++];
+  uint64_t LexicalOffset = ReadLocalOffset(Record, Idx);
+  uint64_t VisibleOffset = ReadLocalOffset(Record, Idx);
   return std::make_pair(LexicalOffset, VisibleOffset);
 }
 
@@ -2187,7 +2266,7 @@ ASTDeclReader::VisitRedeclarable(Redeclarable<T> *D) {
     for (unsigned I = 0; I != N - 1; ++I)
       MergeWith = ReadDecl(Record, Idx/*, MergeWith*/);
 
-    RedeclOffset = Record[Idx++];
+    RedeclOffset = ReadLocalOffset(Record, Idx);
   } else {
     // This declaration was not the first local declaration. Read the first
     // local declaration now, to trigger the import of other redeclarations.
@@ -2263,8 +2342,8 @@ void ASTDeclReader::mergeTemplatePattern(RedeclarableTemplateDecl *D,
     // FIXME: This is duplicated in several places. Refactor.
     auto *ExistingClass =
         cast<CXXRecordDecl>(ExistingPattern)->getCanonicalDecl();
-    if (auto *DDD = DClass->DefinitionData.getNotUpdated()) {
-      if (ExistingClass->DefinitionData.getNotUpdated()) {
+    if (auto *DDD = DClass->DefinitionData) {
+      if (ExistingClass->DefinitionData) {
         MergeDefinitionData(ExistingClass, std::move(*DDD));
       } else {
         ExistingClass->DefinitionData = DClass->DefinitionData;
@@ -2307,6 +2386,8 @@ void ASTDeclReader::mergeRedeclarable(Redeclarable<T> *DBase, T *Existing,
     // appropriate canonical declaration.
     D->RedeclLink = Redeclarable<T>::PreviousDeclLink(ExistingCanon);
     D->First = ExistingCanon;
+    ExistingCanon->Used |= D->Used;
+    D->Used = false;
 
     // When we merge a namespace, update its pointer to the first namespace.
     // We cannot have loaded any redeclarations of this declaration yet, so
@@ -2360,6 +2441,18 @@ void ASTDeclReader::VisitOMPThreadPrivateDecl(OMPThreadPrivateDecl *D) {
   D->setVars(Vars);
 }
 
+void ASTDeclReader::VisitOMPDeclareReductionDecl(OMPDeclareReductionDecl *D) {
+  VisitValueDecl(D);
+  D->setLocation(Reader.ReadSourceLocation(F, Record, Idx));
+  D->setCombiner(Reader.ReadExpr(F));
+  D->setInitializer(Reader.ReadExpr(F));
+  D->PrevDeclInScope = Reader.ReadDeclID(F, Record, Idx);
+}
+
+void ASTDeclReader::VisitOMPCapturedExprDecl(OMPCapturedExprDecl *D) {
+  VisitVarDecl(D);
+}
+
 //===----------------------------------------------------------------------===//
 // Attribute Reading
 //===----------------------------------------------------------------------===//
@@ -2409,8 +2502,11 @@ static bool isConsumerInterestedIn(Decl *D, bool HasBody) {
       isa<ObjCProtocolDecl>(D) || 
       isa<ObjCImplDecl>(D) ||
       isa<ImportDecl>(D) ||
-      isa<OMPThreadPrivateDecl>(D))
+      isa<PragmaCommentDecl>(D) ||
+      isa<PragmaDetectMismatchDecl>(D))
     return true;
+  if (isa<OMPThreadPrivateDecl>(D) || isa<OMPDeclareReductionDecl>(D))
+    return !D->getDeclContext()->isFunctionOrMethod();
   if (VarDecl *Var = dyn_cast<VarDecl>(D))
     return Var->isFileVarDecl() &&
            Var->isThisDeclarationADefinition() == VarDecl::Definition;
@@ -2422,20 +2518,13 @@ static bool isConsumerInterestedIn(Decl *D, bool HasBody) {
 
 /// \brief Get the correct cursor and offset for loading a declaration.
 ASTReader::RecordLocation
-ASTReader::DeclCursorForID(DeclID ID, unsigned &RawLocation) {
-  // See if there's an override.
-  DeclReplacementMap::iterator It = ReplacedDecls.find(ID);
-  if (It != ReplacedDecls.end()) {
-    RawLocation = It->second.RawLoc;
-    return RecordLocation(It->second.Mod, It->second.Offset);
-  }
-
+ASTReader::DeclCursorForID(DeclID ID, SourceLocation &Loc) {
   GlobalDeclMapType::iterator I = GlobalDeclMap.find(ID);
   assert(I != GlobalDeclMap.end() && "Corrupted global declaration map");
   ModuleFile *M = I->second;
-  const DeclOffset &
-    DOffs =  M->DeclOffsets[ID - M->BaseDeclID - NUM_PREDEF_DECL_IDS];
-  RawLocation = DOffs.Loc;
+  const DeclOffset &DOffs =
+      M->DeclOffsets[ID - M->BaseDeclID - NUM_PREDEF_DECL_IDS];
+  Loc = TranslateSourceLocation(*M, DOffs.getLocation());
   return RecordLocation(M, DOffs.BitOffset);
 }
 
@@ -2588,6 +2677,13 @@ static bool isSameEntity(NamedDecl *X, NamedDecl *Y) {
   // functions, etc.
   if (FunctionDecl *FuncX = dyn_cast<FunctionDecl>(X)) {
     FunctionDecl *FuncY = cast<FunctionDecl>(Y);
+    if (CXXConstructorDecl *CtorX = dyn_cast<CXXConstructorDecl>(X)) {
+      CXXConstructorDecl *CtorY = cast<CXXConstructorDecl>(Y);
+      if (CtorX->getInheritedConstructor() &&
+          !isSameEntity(CtorX->getInheritedConstructor().getConstructor(),
+                        CtorY->getInheritedConstructor().getConstructor()))
+        return false;
+    }
     return (FuncX->getLinkageInternal() == FuncY->getLinkageInternal()) &&
       FuncX->getASTContext().hasSameType(FuncX->getType(), FuncY->getType());
   }
@@ -2595,8 +2691,24 @@ static bool isSameEntity(NamedDecl *X, NamedDecl *Y) {
   // Variables with the same type and linkage match.
   if (VarDecl *VarX = dyn_cast<VarDecl>(X)) {
     VarDecl *VarY = cast<VarDecl>(Y);
-    return (VarX->getLinkageInternal() == VarY->getLinkageInternal()) &&
-      VarX->getASTContext().hasSameType(VarX->getType(), VarY->getType());
+    if (VarX->getLinkageInternal() == VarY->getLinkageInternal()) {
+      ASTContext &C = VarX->getASTContext();
+      if (C.hasSameType(VarX->getType(), VarY->getType()))
+        return true;
+
+      // We can get decls with different types on the redecl chain. Eg.
+      // template <typename T> struct S { static T Var[]; }; // #1
+      // template <typename T> T S<T>::Var[sizeof(T)]; // #2
+      // Only? happens when completing an incomplete array type. In this case
+      // when comparing #1 and #2 we should go through their element type.
+      const ArrayType *VarXTy = C.getAsArrayType(VarX->getType());
+      const ArrayType *VarYTy = C.getAsArrayType(VarY->getType());
+      if (!VarXTy || !VarYTy)
+        return false;
+      if (VarXTy->isIncompleteArrayType() || VarYTy->isIncompleteArrayType())
+        return C.hasSameType(VarXTy->getElementType(), VarYTy->getElementType());
+    }
+    return false;
   }
 
   // Namespaces with the same name and inlinedness match.
@@ -2676,9 +2788,9 @@ DeclContext *ASTDeclReader::getPrimaryContextForMerging(ASTReader &Reader,
 
   if (CXXRecordDecl *RD = dyn_cast<CXXRecordDecl>(DC)) {
     // Try to dig out the definition.
-    auto *DD = RD->DefinitionData.getNotUpdated();
+    auto *DD = RD->DefinitionData;
     if (!DD)
-      DD = RD->getCanonicalDecl()->DefinitionData.getNotUpdated();
+      DD = RD->getCanonicalDecl()->DefinitionData;
 
     // If there's no definition yet, then DC's definition is added by an update
     // record, but we've not yet loaded that update record. In this case, we
@@ -2728,9 +2840,9 @@ ASTDeclReader::FindExistingResult::~FindExistingResult() {
   if (needsAnonymousDeclarationNumber(New)) {
     setAnonymousDeclForMerging(Reader, New->getLexicalDeclContext(),
                                AnonymousDeclNumber, New);
-  } else if (DC->isTranslationUnit() && Reader.SemaObj &&
+  } else if (DC->isTranslationUnit() &&
              !Reader.getContext().getLangOpts().CPlusPlus) {
-    if (Reader.SemaObj->IdResolver.tryAddTopLevelDecl(New, Name))
+    if (Reader.getIdResolver().tryAddTopLevelDecl(New, Name))
       Reader.PendingFakeLookupResults[Name.getAsIdentifierInfo()]
             .push_back(New);
   } else if (DeclContext *MergeDC = getPrimaryContextForMerging(Reader, DC)) {
@@ -2833,9 +2945,9 @@ ASTDeclReader::FindExistingResult ASTDeclReader::findExisting(NamedDecl *D) {
       if (isSameEntity(Existing, D))
         return FindExistingResult(Reader, D, Existing, AnonymousDeclNumber,
                                   TypedefNameForLinkage);
-  } else if (DC->isTranslationUnit() && Reader.SemaObj &&
+  } else if (DC->isTranslationUnit() &&
              !Reader.getContext().getLangOpts().CPlusPlus) {
-    IdentifierResolver &IdResolver = Reader.SemaObj->IdResolver;
+    IdentifierResolver &IdResolver = Reader.getIdResolver();
 
     // Temporarily consider the identifier to be up-to-date. We don't want to
     // cause additional lookups here.
@@ -3000,6 +3112,8 @@ static void inheritDefaultTemplateArguments(ASTContext &Context,
 
   for (unsigned I = 0, N = FromTP->size(); I != N; ++I) {
     NamedDecl *FromParam = FromTP->getParam(N - I - 1);
+    if (FromParam->isParameterPack())
+      continue;
     NamedDecl *ToParam = ToTP->getParam(N - I - 1);
 
     if (auto *FTTP = dyn_cast<TemplateTypeParmDecl>(FromParam)) {
@@ -3038,11 +3152,6 @@ void ASTDeclReader::attachPreviousDecl(ASTReader &Reader, Decl *D,
       Previous->IdentifierNamespace &
       (Decl::IDNS_Ordinary | Decl::IDNS_Tag | Decl::IDNS_Type);
 
-  // If the previous declaration is marked as used, then this declaration should
-  // be too.
-  if (Previous->Used)
-    D->Used = true;
-
   // If the declaration declares a template, it may inherit default arguments
   // from the previous declaration.
   if (TemplateDecl *TD = dyn_cast<TemplateDecl>(D))
@@ -3093,8 +3202,8 @@ void ASTReader::markIncompleteDeclChain(Decl *D) {
 /// \brief Read the declaration at the given offset from the AST file.
 Decl *ASTReader::ReadDeclRecord(DeclID ID) {
   unsigned Index = ID - NUM_PREDEF_DECL_IDS;
-  unsigned RawLocation = 0;
-  RecordLocation Loc = DeclCursorForID(ID, RawLocation);
+  SourceLocation DeclLoc;
+  RecordLocation Loc = DeclCursorForID(ID, DeclLoc);
   llvm::BitstreamCursor &DeclsCursor = Loc.F->DeclsCursor;
   // Keep track of where we are in the stream, then jump back there
   // after reading this declaration.
@@ -3109,7 +3218,7 @@ Decl *ASTReader::ReadDeclRecord(DeclID ID) {
   RecordData Record;
   unsigned Code = DeclsCursor.ReadCode();
   unsigned Idx = 0;
-  ASTDeclReader Reader(*this, *Loc.F, ID, RawLocation, Record,Idx);
+  ASTDeclReader Reader(*this, Loc, ID, DeclLoc, Record,Idx);
 
   Decl *D = nullptr;
   switch ((DeclCode)DeclsCursor.readRecord(Code, Record)) {
@@ -3152,6 +3261,9 @@ Decl *ASTReader::ReadDeclRecord(DeclID ID) {
   case DECL_USING_SHADOW:
     D = UsingShadowDecl::CreateDeserialized(Context, ID);
     break;
+  case DECL_CONSTRUCTOR_USING_SHADOW:
+    D = ConstructorUsingShadowDecl::CreateDeserialized(Context, ID);
+    break;
   case DECL_USING_DIRECTIVE:
     D = UsingDirectiveDecl::CreateDeserialized(Context, ID);
     break;
@@ -3168,7 +3280,10 @@ Decl *ASTReader::ReadDeclRecord(DeclID ID) {
     D = CXXMethodDecl::CreateDeserialized(Context, ID);
     break;
   case DECL_CXX_CONSTRUCTOR:
-    D = CXXConstructorDecl::CreateDeserialized(Context, ID);
+    D = CXXConstructorDecl::CreateDeserialized(Context, ID, false);
+    break;
+  case DECL_CXX_INHERITED_CONSTRUCTOR:
+    D = CXXConstructorDecl::CreateDeserialized(Context, ID, true);
     break;
   case DECL_CXX_DESTRUCTOR:
     D = CXXDestructorDecl::CreateDeserialized(Context, ID);
@@ -3305,6 +3420,19 @@ Decl *ASTReader::ReadDeclRecord(DeclID ID) {
   case DECL_OMP_THREADPRIVATE:
     D = OMPThreadPrivateDecl::CreateDeserialized(Context, ID, Record[Idx++]);
     break;
+  case DECL_OMP_DECLARE_REDUCTION:
+    D = OMPDeclareReductionDecl::CreateDeserialized(Context, ID);
+    break;
+  case DECL_OMP_CAPTUREDEXPR:
+    D = OMPCapturedExprDecl::CreateDeserialized(Context, ID);
+    break;
+  case DECL_PRAGMA_COMMENT:
+    D = PragmaCommentDecl::CreateDeserialized(Context, ID, Record[Idx++]);
+    break;
+  case DECL_PRAGMA_DETECT_MISMATCH:
+    D = PragmaDetectMismatchDecl::CreateDeserialized(Context, ID,
+                                                     Record[Idx++]);
+    break;
   case DECL_EMPTY:
     D = EmptyDecl::CreateDeserialized(Context, ID);
     break;
@@ -3353,23 +3481,10 @@ Decl *ASTReader::ReadDeclRecord(DeclID ID) {
 }
 
 void ASTReader::loadDeclUpdateRecords(serialization::DeclID ID, Decl *D) {
-  // Load the pending visible updates for this decl context, if it has any.
-  auto I = PendingVisibleUpdates.find(ID);
-  if (I != PendingVisibleUpdates.end()) {
-    auto VisibleUpdates = std::move(I->second);
-    PendingVisibleUpdates.erase(I);
-
-    auto *DC = cast<DeclContext>(D)->getPrimaryContext();
-    for (const PendingVisibleUpdate &Update : VisibleUpdates)
-      Lookups[DC].Table.add(
-          Update.Mod, Update.Data,
-          reader::ASTDeclContextNameLookupTrait(*this, *Update.Mod));
-    DC->setHasExternalVisibleStorage(true);
-  }
-
   // The declaration may have been modified by files later in the chain.
   // If this is the case, read the record containing the updates from each file
   // and pass it to ASTDeclReader to make the modifications.
+  ProcessingUpdatesRAIIObj ProcessingUpdates(*this);
   DeclUpdateOffsetsMap::iterator UpdI = DeclUpdateOffsets.find(ID);
   if (UpdI != DeclUpdateOffsets.end()) {
     auto UpdateOffsets = std::move(UpdI->second);
@@ -3389,7 +3504,8 @@ void ASTReader::loadDeclUpdateRecords(serialization::DeclID ID, Decl *D) {
       assert(RecCode == DECL_UPDATES && "Expected DECL_UPDATES record!");
 
       unsigned Idx = 0;
-      ASTDeclReader Reader(*this, *F, ID, 0, Record, Idx);
+      ASTDeclReader Reader(*this, RecordLocation(F, Offset), ID,
+                           SourceLocation(), Record, Idx);
       Reader.UpdateDecl(D, *F, Record);
 
       // We might have made this declaration interesting. If so, remember that
@@ -3401,6 +3517,20 @@ void ASTReader::loadDeclUpdateRecords(serialization::DeclID ID, Decl *D) {
       }
     }
   }
+
+  // Load the pending visible updates for this decl context, if it has any.
+  auto I = PendingVisibleUpdates.find(ID);
+  if (I != PendingVisibleUpdates.end()) {
+    auto VisibleUpdates = std::move(I->second);
+    PendingVisibleUpdates.erase(I);
+
+    auto *DC = cast<DeclContext>(D)->getPrimaryContext();
+    for (const PendingVisibleUpdate &Update : VisibleUpdates)
+      Lookups[DC].Table.add(
+          Update.Mod, Update.Data,
+          reader::ASTDeclContextNameLookupTrait(*this, *Update.Mod));
+    DC->setHasExternalVisibleStorage(true);
+  }
 }
 
 void ASTReader::loadPendingDeclChain(Decl *FirstLocal, uint64_t LocalOffset) {
@@ -3661,8 +3791,7 @@ void ASTDeclReader::UpdateDecl(Decl *D, ModuleFile &ModuleFile,
       if (auto *CD = dyn_cast<CXXConstructorDecl>(FD)) {
         CD->NumCtorInitializers = Record[Idx++];
         if (CD->NumCtorInitializers)
-          CD->CtorInitializers =
-              Reader.ReadCXXCtorInitializersRef(F, Record, Idx);
+          CD->CtorInitializers = ReadGlobalOffset(F, Record, Idx);
       }
       // Store the offset of the body so we can lazily load it later.
       Reader.PendingBodies[FD] = GetCurrentCursorOffset();
@@ -3673,14 +3802,14 @@ void ASTDeclReader::UpdateDecl(Decl *D, ModuleFile &ModuleFile,
 
     case UPD_CXX_INSTANTIATED_CLASS_DEFINITION: {
       auto *RD = cast<CXXRecordDecl>(D);
-      auto *OldDD = RD->DefinitionData.getNotUpdated();
+      auto *OldDD = RD->getCanonicalDecl()->DefinitionData;
       bool HadRealDefinition =
           OldDD && (OldDD->Definition != RD ||
                     !Reader.PendingFakeDefinitionData.count(OldDD));
       ReadCXXRecordDefinition(RD, /*Update*/true);
 
       // Visible update is handled separately.
-      uint64_t LexicalOffset = Record[Idx++];
+      uint64_t LexicalOffset = ReadLocalOffset(Record, Idx);
       if (!HadRealDefinition && LexicalOffset) {
         Reader.ReadLexicalDeclContextStorage(ModuleFile, ModuleFile.DeclsCursor,
                                              LexicalOffset, RD);
@@ -3705,7 +3834,7 @@ void ASTDeclReader::UpdateDecl(Decl *D, ModuleFile &ModuleFile,
           SmallVector<TemplateArgument, 8> TemplArgs;
           Reader.ReadTemplateArgumentList(TemplArgs, F, Record, Idx);
           auto *TemplArgList = TemplateArgumentList::CreateCopy(
-              Reader.getContext(), TemplArgs.data(), TemplArgs.size());
+              Reader.getContext(), TemplArgs);
 
           // FIXME: If we already have a partial specialization set,
           // check that it matches.
@@ -3718,7 +3847,7 @@ void ASTDeclReader::UpdateDecl(Decl *D, ModuleFile &ModuleFile,
       RD->setTagKind((TagTypeKind)Record[Idx++]);
       RD->setLocation(Reader.ReadSourceLocation(ModuleFile, Record, Idx));
       RD->setLocStart(Reader.ReadSourceLocation(ModuleFile, Record, Idx));
-      RD->setRBraceLoc(Reader.ReadSourceLocation(ModuleFile, Record, Idx));
+      RD->setBraceRange(Reader.ReadSourceRange(ModuleFile, Record, Idx));
 
       if (Record[Idx++]) {
         AttrVec Attrs;
@@ -3774,11 +3903,8 @@ void ASTDeclReader::UpdateDecl(Decl *D, ModuleFile &ModuleFile,
     }
 
     case UPD_DECL_MARKED_USED: {
-      // FIXME: This doesn't send the right notifications if there are
-      // ASTMutationListeners other than an ASTWriter.
-
       // Maintain AST consistency: any later redeclarations are used too.
-      forAllLaterRedecls(D, [](Decl *D) { D->Used = true; });
+      D->markUsed(Reader.Context);
       break;
     }
 
@@ -3802,11 +3928,8 @@ void ASTDeclReader::UpdateDecl(Decl *D, ModuleFile &ModuleFile,
         Exported = TD->getDefinition();
       Module *Owner = SubmoduleID ? Reader.getSubmodule(SubmoduleID) : nullptr;
       if (Reader.getContext().getLangOpts().ModulesLocalVisibility) {
-        // FIXME: This doesn't send the right notifications if there are
-        // ASTMutationListeners other than an ASTWriter.
-        Reader.getContext().mergeDefinitionIntoModule(
-            cast<NamedDecl>(Exported), Owner,
-            /*NotifyListeners*/ false);
+        Reader.getContext().mergeDefinitionIntoModule(cast<NamedDecl>(Exported),
+                                                      Owner);
         Reader.PendingMergedDefinitionsToDeduplicate.insert(
             cast<NamedDecl>(Exported));
       } else if (Owner && Owner->NameVisibility != Module::AllVisible) {
@@ -3820,6 +3943,7 @@ void ASTDeclReader::UpdateDecl(Decl *D, ModuleFile &ModuleFile,
       break;
     }
 
+    case UPD_DECL_MARKED_OPENMP_DECLARETARGET:
     case UPD_ADDED_ATTR_TO_RECORD:
       AttrVec Attrs;
       Reader.ReadAttributes(F, Attrs, Record, Idx);
diff --git a/contrib/llvm/tools/clang/lib/Serialization/ASTReaderStmt.cpp b/contrib/llvm/tools/clang/lib/Serialization/ASTReaderStmt.cpp
index ad81ac844209..395da42d4f24 100644
--- a/contrib/llvm/tools/clang/lib/Serialization/ASTReaderStmt.cpp
+++ b/contrib/llvm/tools/clang/lib/Serialization/ASTReaderStmt.cpp
@@ -184,6 +184,8 @@ void ASTStmtReader::VisitAttributedStmt(AttributedStmt *S) {
 
 void ASTStmtReader::VisitIfStmt(IfStmt *S) {
   VisitStmt(S);
+  S->setConstexpr(Record[Idx++]);
+  S->setInit(Reader.ReadSubStmt());
   S->setConditionVariable(Reader.getContext(),
                           ReadDeclAs<VarDecl>(Record, Idx));
   S->setCond(Reader.ReadSubExpr());
@@ -195,6 +197,7 @@ void ASTStmtReader::VisitIfStmt(IfStmt *S) {
 
 void ASTStmtReader::VisitSwitchStmt(SwitchStmt *S) {
   VisitStmt(S);
+  S->setInit(Reader.ReadSubStmt());
   S->setConditionVariable(Reader.getContext(),
                           ReadDeclAs<VarDecl>(Record, Idx));
   S->setCond(Reader.ReadSubExpr());
@@ -1179,6 +1182,14 @@ void ASTStmtReader::VisitObjCBoolLiteralExpr(ObjCBoolLiteralExpr *E) {
   E->setLocation(ReadSourceLocation(Record, Idx));
 }
 
+void ASTStmtReader::VisitObjCAvailabilityCheckExpr(ObjCAvailabilityCheckExpr *E) {
+  VisitExpr(E);
+  SourceRange R = Reader.ReadSourceRange(F, Record, Idx);
+  E->AtLoc = R.getBegin();
+  E->RParen = R.getEnd();
+  E->VersionToCheck = Reader.ReadVersionTuple(Record, Idx);
+}
+
 //===----------------------------------------------------------------------===//
 // C++ Expressions and Statements
 //===----------------------------------------------------------------------===//
@@ -1207,7 +1218,8 @@ void ASTStmtReader::VisitCXXForRangeStmt(CXXForRangeStmt *S) {
   S->ColonLoc = ReadSourceLocation(Record, Idx);
   S->RParenLoc = ReadSourceLocation(Record, Idx);
   S->setRangeStmt(Reader.ReadSubStmt());
-  S->setBeginEndStmt(Reader.ReadSubStmt());
+  S->setBeginStmt(Reader.ReadSubStmt());
+  S->setEndStmt(Reader.ReadSubStmt());
   S->setCond(Reader.ReadSubExpr());
   S->setInc(Reader.ReadSubExpr());
   S->setLoopVarStmt(Reader.ReadSubStmt());
@@ -1248,6 +1260,14 @@ void ASTStmtReader::VisitCXXConstructExpr(CXXConstructExpr *E) {
   E->ParenOrBraceRange = ReadSourceRange(Record, Idx);
 }
 
+void ASTStmtReader::VisitCXXInheritedCtorInitExpr(CXXInheritedCtorInitExpr *E) {
+  VisitExpr(E);
+  E->Constructor = ReadDeclAs<CXXConstructorDecl>(Record, Idx);
+  E->Loc = ReadSourceLocation(Record, Idx);
+  E->ConstructsVirtualBase = Record[Idx++];
+  E->InheritedFromVirtualBase = Record[Idx++];
+}
+
 void ASTStmtReader::VisitCXXTemporaryObjectExpr(CXXTemporaryObjectExpr *E) {
   VisitCXXConstructExpr(E);
   E->Type = GetTypeSourceInfo(Record, Idx);
@@ -1447,6 +1467,7 @@ void ASTStmtReader::VisitExprWithCleanups(ExprWithCleanups *E) {
     E->getTrailingObjects<BlockDecl *>()[i] =
         ReadDeclAs<BlockDecl>(Record, Idx);
 
+  E->ExprWithCleanupsBits.CleanupsHaveSideEffects = Record[Idx++];
   E->SubExpr = Reader.ReadSubExpr();
 }
 
@@ -1679,6 +1700,8 @@ void ASTStmtReader::VisitMSPropertySubscriptExpr(MSPropertySubscriptExpr *E) {
 void ASTStmtReader::VisitCXXUuidofExpr(CXXUuidofExpr *E) {
   VisitExpr(E);
   E->setSourceRange(ReadSourceRange(Record, Idx));
+  std::string UuidStr = ReadString(Record, Idx);
+  E->setUuidStr(StringRef(UuidStr).copy(Reader.getContext()));
   if (E->isTypeOperand()) { // __uuidof(ComType)
     E->setTypeOperandSourceInfo(
         GetTypeSourceInfo(Record, Idx));
@@ -1748,10 +1771,11 @@ public:
   OMPClauseReader(ASTStmtReader *R, ASTContext &C,
                   const ASTReader::RecordData &Record, unsigned &Idx)
     : Reader(R), Context(C), Record(Record), Idx(Idx) { }
-#define OPENMP_CLAUSE(Name, Class)    \
-  void Visit##Class(Class *S);
+#define OPENMP_CLAUSE(Name, Class) void Visit##Class(Class *C);
 #include "clang/Basic/OpenMPKinds.def"
   OMPClause *readClause();
+  void VisitOMPClauseWithPreInit(OMPClauseWithPreInit *C);
+  void VisitOMPClauseWithPostUpdate(OMPClauseWithPostUpdate *C);
 };
 }
 
@@ -1857,9 +1881,15 @@ OMPClause *OMPClauseReader::readClause() {
   case OMPC_device:
     C = new (Context) OMPDeviceClause();
     break;
-  case OMPC_map:
-    C = OMPMapClause::CreateEmpty(Context, Record[Idx++]);
+  case OMPC_map: {
+    unsigned NumVars = Record[Idx++];
+    unsigned NumDeclarations = Record[Idx++];
+    unsigned NumLists = Record[Idx++];
+    unsigned NumComponents = Record[Idx++];
+    C = OMPMapClause::CreateEmpty(Context, NumVars, NumDeclarations, NumLists,
+                                  NumComponents);
     break;
+  }
   case OMPC_num_teams:
     C = new (Context) OMPNumTeamsClause();
     break;
@@ -1878,6 +1908,36 @@ OMPClause *OMPClauseReader::readClause() {
   case OMPC_hint:
     C = new (Context) OMPHintClause();
     break;
+  case OMPC_dist_schedule:
+    C = new (Context) OMPDistScheduleClause();
+    break;
+  case OMPC_defaultmap:
+    C = new (Context) OMPDefaultmapClause();
+    break;
+  case OMPC_to: {
+    unsigned NumVars = Record[Idx++];
+    unsigned NumDeclarations = Record[Idx++];
+    unsigned NumLists = Record[Idx++];
+    unsigned NumComponents = Record[Idx++];
+    C = OMPToClause::CreateEmpty(Context, NumVars, NumDeclarations, NumLists,
+                                 NumComponents);
+    break;
+  }
+  case OMPC_from: {
+    unsigned NumVars = Record[Idx++];
+    unsigned NumDeclarations = Record[Idx++];
+    unsigned NumLists = Record[Idx++];
+    unsigned NumComponents = Record[Idx++];
+    C = OMPFromClause::CreateEmpty(Context, NumVars, NumDeclarations, NumLists,
+                                   NumComponents);
+    break;
+  }
+  case OMPC_use_device_ptr:
+    C = OMPUseDevicePtrClause::CreateEmpty(Context, Record[Idx++]);
+    break;
+  case OMPC_is_device_ptr:
+    C = OMPIsDevicePtrClause::CreateEmpty(Context, Record[Idx++]);
+    break;
   }
   Visit(C);
   C->setLocStart(Reader->ReadSourceLocation(Record, Idx));
@@ -1886,6 +1946,15 @@ OMPClause *OMPClauseReader::readClause() {
   return C;
 }
 
+void OMPClauseReader::VisitOMPClauseWithPreInit(OMPClauseWithPreInit *C) {
+  C->setPreInitStmt(Reader->Reader.ReadSubStmt());
+}
+
+void OMPClauseReader::VisitOMPClauseWithPostUpdate(OMPClauseWithPostUpdate *C) {
+  VisitOMPClauseWithPreInit(C);
+  C->setPostUpdateExpr(Reader->Reader.ReadSubExpr());
+}
+
 void OMPClauseReader::VisitOMPIfClause(OMPIfClause *C) {
   C->setNameModifier(static_cast<OpenMPDirectiveKind>(Record[Idx++]));
   C->setNameModifierLoc(Reader->ReadSourceLocation(Record, Idx));
@@ -1934,6 +2003,7 @@ void OMPClauseReader::VisitOMPProcBindClause(OMPProcBindClause *C) {
 }
 
 void OMPClauseReader::VisitOMPScheduleClause(OMPScheduleClause *C) {
+  VisitOMPClauseWithPreInit(C);
   C->setScheduleKind(
        static_cast<OpenMPScheduleClauseKind>(Record[Idx++]));
   C->setFirstScheduleModifier(
@@ -1941,7 +2011,6 @@ void OMPClauseReader::VisitOMPScheduleClause(OMPScheduleClause *C) {
   C->setSecondScheduleModifier(
       static_cast<OpenMPScheduleClauseModifier>(Record[Idx++]));
   C->setChunkSize(Reader->Reader.ReadSubExpr());
-  C->setHelperChunkSize(Reader->Reader.ReadSubExpr());
   C->setLParenLoc(Reader->ReadSourceLocation(Record, Idx));
   C->setFirstScheduleModifierLoc(Reader->ReadSourceLocation(Record, Idx));
   C->setSecondScheduleModifierLoc(Reader->ReadSourceLocation(Record, Idx));
@@ -1991,6 +2060,7 @@ void OMPClauseReader::VisitOMPPrivateClause(OMPPrivateClause *C) {
 }
 
 void OMPClauseReader::VisitOMPFirstprivateClause(OMPFirstprivateClause *C) {
+  VisitOMPClauseWithPreInit(C);
   C->setLParenLoc(Reader->ReadSourceLocation(Record, Idx));
   unsigned NumVars = C->varlist_size();
   SmallVector<Expr *, 16> Vars;
@@ -2009,6 +2079,7 @@ void OMPClauseReader::VisitOMPFirstprivateClause(OMPFirstprivateClause *C) {
 }
 
 void OMPClauseReader::VisitOMPLastprivateClause(OMPLastprivateClause *C) {
+  VisitOMPClauseWithPostUpdate(C);
   C->setLParenLoc(Reader->ReadSourceLocation(Record, Idx));
   unsigned NumVars = C->varlist_size();
   SmallVector<Expr *, 16> Vars;
@@ -2045,6 +2116,7 @@ void OMPClauseReader::VisitOMPSharedClause(OMPSharedClause *C) {
 }
 
 void OMPClauseReader::VisitOMPReductionClause(OMPReductionClause *C) {
+  VisitOMPClauseWithPostUpdate(C);
   C->setLParenLoc(Reader->ReadSourceLocation(Record, Idx));
   C->setColonLoc(Reader->ReadSourceLocation(Record, Idx));
   NestedNameSpecifierLoc NNSL =
@@ -2079,6 +2151,7 @@ void OMPClauseReader::VisitOMPReductionClause(OMPReductionClause *C) {
 }
 
 void OMPClauseReader::VisitOMPLinearClause(OMPLinearClause *C) {
+  VisitOMPClauseWithPostUpdate(C);
   C->setLParenLoc(Reader->ReadSourceLocation(Record, Idx));
   C->setColonLoc(Reader->ReadSourceLocation(Record, Idx));
   C->setModifier(static_cast<OpenMPLinearClauseKind>(Record[Idx++]));
@@ -2186,6 +2259,7 @@ void OMPClauseReader::VisitOMPDependClause(OMPDependClause *C) {
   for (unsigned i = 0; i != NumVars; ++i)
     Vars.push_back(Reader->Reader.ReadSubExpr());
   C->setVarRefs(Vars);
+  C->setCounterValue(Reader->Reader.ReadSubExpr());
 }
 
 void OMPClauseReader::VisitOMPDeviceClause(OMPDeviceClause *C) {
@@ -2202,12 +2276,45 @@ void OMPClauseReader::VisitOMPMapClause(OMPMapClause *C) {
   C->setMapLoc(Reader->ReadSourceLocation(Record, Idx));
   C->setColonLoc(Reader->ReadSourceLocation(Record, Idx));
   auto NumVars = C->varlist_size();
+  auto UniqueDecls = C->getUniqueDeclarationsNum();
+  auto TotalLists = C->getTotalComponentListNum();
+  auto TotalComponents = C->getTotalComponentsNum();
+
   SmallVector<Expr *, 16> Vars;
   Vars.reserve(NumVars);
-  for (unsigned i = 0; i != NumVars; ++i) {
+  for (unsigned i = 0; i != NumVars; ++i)
     Vars.push_back(Reader->Reader.ReadSubExpr());
-  }
   C->setVarRefs(Vars);
+
+  SmallVector<ValueDecl *, 16> Decls;
+  Decls.reserve(UniqueDecls);
+  for (unsigned i = 0; i < UniqueDecls; ++i)
+    Decls.push_back(
+        Reader->Reader.ReadDeclAs<ValueDecl>(Reader->F, Record, Idx));
+  C->setUniqueDecls(Decls);
+
+  SmallVector<unsigned, 16> ListsPerDecl;
+  ListsPerDecl.reserve(UniqueDecls);
+  for (unsigned i = 0; i < UniqueDecls; ++i)
+    ListsPerDecl.push_back(Record[Idx++]);
+  C->setDeclNumLists(ListsPerDecl);
+
+  SmallVector<unsigned, 32> ListSizes;
+  ListSizes.reserve(TotalLists);
+  for (unsigned i = 0; i < TotalLists; ++i)
+    ListSizes.push_back(Record[Idx++]);
+  C->setComponentListSizes(ListSizes);
+
+  SmallVector<OMPClauseMappableExprCommon::MappableComponent, 32> Components;
+  Components.reserve(TotalComponents);
+  for (unsigned i = 0; i < TotalComponents; ++i) {
+    Expr *AssociatedExpr = Reader->Reader.ReadSubExpr();
+    ValueDecl *AssociatedDecl =
+        Reader->Reader.ReadDeclAs<ValueDecl>(Reader->F, Record, Idx);
+    Components.push_back(OMPClauseMappableExprCommon::MappableComponent(
+        AssociatedExpr, AssociatedDecl));
+  }
+  C->setComponents(Components, ListSizes);
 }
 
 void OMPClauseReader::VisitOMPNumTeamsClause(OMPNumTeamsClause *C) {
@@ -2240,6 +2347,136 @@ void OMPClauseReader::VisitOMPHintClause(OMPHintClause *C) {
   C->setLParenLoc(Reader->ReadSourceLocation(Record, Idx));
 }
 
+void OMPClauseReader::VisitOMPDistScheduleClause(OMPDistScheduleClause *C) {
+  VisitOMPClauseWithPreInit(C);
+  C->setDistScheduleKind(
+      static_cast<OpenMPDistScheduleClauseKind>(Record[Idx++]));
+  C->setChunkSize(Reader->Reader.ReadSubExpr());
+  C->setLParenLoc(Reader->ReadSourceLocation(Record, Idx));
+  C->setDistScheduleKindLoc(Reader->ReadSourceLocation(Record, Idx));
+  C->setCommaLoc(Reader->ReadSourceLocation(Record, Idx));
+}
+
+void OMPClauseReader::VisitOMPDefaultmapClause(OMPDefaultmapClause *C) {
+  C->setDefaultmapKind(
+       static_cast<OpenMPDefaultmapClauseKind>(Record[Idx++]));
+  C->setDefaultmapModifier(
+      static_cast<OpenMPDefaultmapClauseModifier>(Record[Idx++]));
+  C->setLParenLoc(Reader->ReadSourceLocation(Record, Idx));
+  C->setDefaultmapModifierLoc(Reader->ReadSourceLocation(Record, Idx));
+  C->setDefaultmapKindLoc(Reader->ReadSourceLocation(Record, Idx));
+}
+
+void OMPClauseReader::VisitOMPToClause(OMPToClause *C) {
+  C->setLParenLoc(Reader->ReadSourceLocation(Record, Idx));
+  auto NumVars = C->varlist_size();
+  auto UniqueDecls = C->getUniqueDeclarationsNum();
+  auto TotalLists = C->getTotalComponentListNum();
+  auto TotalComponents = C->getTotalComponentsNum();
+
+  SmallVector<Expr *, 16> Vars;
+  Vars.reserve(NumVars);
+  for (unsigned i = 0; i != NumVars; ++i)
+    Vars.push_back(Reader->Reader.ReadSubExpr());
+  C->setVarRefs(Vars);
+
+  SmallVector<ValueDecl *, 16> Decls;
+  Decls.reserve(UniqueDecls);
+  for (unsigned i = 0; i < UniqueDecls; ++i)
+    Decls.push_back(
+        Reader->Reader.ReadDeclAs<ValueDecl>(Reader->F, Record, Idx));
+  C->setUniqueDecls(Decls);
+
+  SmallVector<unsigned, 16> ListsPerDecl;
+  ListsPerDecl.reserve(UniqueDecls);
+  for (unsigned i = 0; i < UniqueDecls; ++i)
+    ListsPerDecl.push_back(Record[Idx++]);
+  C->setDeclNumLists(ListsPerDecl);
+
+  SmallVector<unsigned, 32> ListSizes;
+  ListSizes.reserve(TotalLists);
+  for (unsigned i = 0; i < TotalLists; ++i)
+    ListSizes.push_back(Record[Idx++]);
+  C->setComponentListSizes(ListSizes);
+
+  SmallVector<OMPClauseMappableExprCommon::MappableComponent, 32> Components;
+  Components.reserve(TotalComponents);
+  for (unsigned i = 0; i < TotalComponents; ++i) {
+    Expr *AssociatedExpr = Reader->Reader.ReadSubExpr();
+    ValueDecl *AssociatedDecl =
+        Reader->Reader.ReadDeclAs<ValueDecl>(Reader->F, Record, Idx);
+    Components.push_back(OMPClauseMappableExprCommon::MappableComponent(
+        AssociatedExpr, AssociatedDecl));
+  }
+  C->setComponents(Components, ListSizes);
+}
+
+void OMPClauseReader::VisitOMPFromClause(OMPFromClause *C) {
+  C->setLParenLoc(Reader->ReadSourceLocation(Record, Idx));
+  auto NumVars = C->varlist_size();
+  auto UniqueDecls = C->getUniqueDeclarationsNum();
+  auto TotalLists = C->getTotalComponentListNum();
+  auto TotalComponents = C->getTotalComponentsNum();
+
+  SmallVector<Expr *, 16> Vars;
+  Vars.reserve(NumVars);
+  for (unsigned i = 0; i != NumVars; ++i)
+    Vars.push_back(Reader->Reader.ReadSubExpr());
+  C->setVarRefs(Vars);
+
+  SmallVector<ValueDecl *, 16> Decls;
+  Decls.reserve(UniqueDecls);
+  for (unsigned i = 0; i < UniqueDecls; ++i)
+    Decls.push_back(
+        Reader->Reader.ReadDeclAs<ValueDecl>(Reader->F, Record, Idx));
+  C->setUniqueDecls(Decls);
+
+  SmallVector<unsigned, 16> ListsPerDecl;
+  ListsPerDecl.reserve(UniqueDecls);
+  for (unsigned i = 0; i < UniqueDecls; ++i)
+    ListsPerDecl.push_back(Record[Idx++]);
+  C->setDeclNumLists(ListsPerDecl);
+
+  SmallVector<unsigned, 32> ListSizes;
+  ListSizes.reserve(TotalLists);
+  for (unsigned i = 0; i < TotalLists; ++i)
+    ListSizes.push_back(Record[Idx++]);
+  C->setComponentListSizes(ListSizes);
+
+  SmallVector<OMPClauseMappableExprCommon::MappableComponent, 32> Components;
+  Components.reserve(TotalComponents);
+  for (unsigned i = 0; i < TotalComponents; ++i) {
+    Expr *AssociatedExpr = Reader->Reader.ReadSubExpr();
+    ValueDecl *AssociatedDecl =
+        Reader->Reader.ReadDeclAs<ValueDecl>(Reader->F, Record, Idx);
+    Components.push_back(OMPClauseMappableExprCommon::MappableComponent(
+        AssociatedExpr, AssociatedDecl));
+  }
+  C->setComponents(Components, ListSizes);
+}
+
+void OMPClauseReader::VisitOMPUseDevicePtrClause(OMPUseDevicePtrClause *C) {
+  C->setLParenLoc(Reader->ReadSourceLocation(Record, Idx));
+  unsigned NumVars = C->varlist_size();
+  SmallVector<Expr *, 16> Vars;
+  Vars.reserve(NumVars);
+  for (unsigned i = 0; i != NumVars; ++i)
+    Vars.push_back(Reader->Reader.ReadSubExpr());
+  C->setVarRefs(Vars);
+  Vars.clear();
+}
+
+void OMPClauseReader::VisitOMPIsDevicePtrClause(OMPIsDevicePtrClause *C) {
+  C->setLParenLoc(Reader->ReadSourceLocation(Record, Idx));
+  unsigned NumVars = C->varlist_size();
+  SmallVector<Expr *, 16> Vars;
+  Vars.reserve(NumVars);
+  for (unsigned i = 0; i != NumVars; ++i)
+    Vars.push_back(Reader->Reader.ReadSubExpr());
+  C->setVarRefs(Vars);
+  Vars.clear();
+}
+
 //===----------------------------------------------------------------------===//
 // OpenMP Directives.
 //===----------------------------------------------------------------------===//
@@ -2267,7 +2504,10 @@ void ASTStmtReader::VisitOMPLoopDirective(OMPLoopDirective *D) {
   D->setCond(Reader.ReadSubExpr());
   D->setInit(Reader.ReadSubExpr());
   D->setInc(Reader.ReadSubExpr());
-  if (isOpenMPWorksharingDirective(D->getDirectiveKind())) {
+  D->setPreInits(Reader.ReadSubStmt());
+  if (isOpenMPWorksharingDirective(D->getDirectiveKind()) ||
+      isOpenMPTaskLoopDirective(D->getDirectiveKind()) ||
+      isOpenMPDistributeDirective(D->getDirectiveKind())) {
     D->setIsLastIterVariable(Reader.ReadSubExpr());
     D->setLowerBoundVariable(Reader.ReadSubExpr());
     D->setUpperBoundVariable(Reader.ReadSubExpr());
@@ -2275,6 +2515,11 @@ void ASTStmtReader::VisitOMPLoopDirective(OMPLoopDirective *D) {
     D->setEnsureUpperBound(Reader.ReadSubExpr());
     D->setNextLowerBound(Reader.ReadSubExpr());
     D->setNextUpperBound(Reader.ReadSubExpr());
+    D->setNumIterations(Reader.ReadSubExpr());
+  }
+  if (isOpenMPLoopBoundSharingDirective(D->getDirectiveKind())) {
+    D->setPrevLowerBoundVariable(Reader.ReadSubExpr());
+    D->setPrevUpperBoundVariable(Reader.ReadSubExpr());
   }
   SmallVector<Expr *, 4> Sub;
   unsigned CollapsedNum = D->getCollapsedNumber();
@@ -2442,6 +2687,33 @@ void ASTStmtReader::VisitOMPTargetDataDirective(OMPTargetDataDirective *D) {
   VisitOMPExecutableDirective(D);
 }
 
+void ASTStmtReader::VisitOMPTargetEnterDataDirective(
+    OMPTargetEnterDataDirective *D) {
+  VisitStmt(D);
+  ++Idx;
+  VisitOMPExecutableDirective(D);
+}
+
+void ASTStmtReader::VisitOMPTargetExitDataDirective(
+    OMPTargetExitDataDirective *D) {
+  VisitStmt(D);
+  ++Idx;
+  VisitOMPExecutableDirective(D);
+}
+
+void ASTStmtReader::VisitOMPTargetParallelDirective(
+    OMPTargetParallelDirective *D) {
+  VisitStmt(D);
+  ++Idx;
+  VisitOMPExecutableDirective(D);
+}
+
+void ASTStmtReader::VisitOMPTargetParallelForDirective(
+    OMPTargetParallelForDirective *D) {
+  VisitOMPLoopDirective(D);
+  D->setHasCancel(Record[Idx++]);
+}
+
 void ASTStmtReader::VisitOMPTeamsDirective(OMPTeamsDirective *D) {
   VisitStmt(D);
   // The NumClauses field was read in ReadStmtFromStream.
@@ -2476,6 +2748,31 @@ void ASTStmtReader::VisitOMPDistributeDirective(OMPDistributeDirective *D) {
   VisitOMPLoopDirective(D);
 }
 
+void ASTStmtReader::VisitOMPTargetUpdateDirective(OMPTargetUpdateDirective *D) {
+  VisitStmt(D);
+  ++Idx;
+  VisitOMPExecutableDirective(D);
+}
+void ASTStmtReader::VisitOMPDistributeParallelForDirective(
+    OMPDistributeParallelForDirective *D) {
+  VisitOMPLoopDirective(D);
+}
+
+void ASTStmtReader::VisitOMPDistributeParallelForSimdDirective(
+    OMPDistributeParallelForSimdDirective *D) {
+  VisitOMPLoopDirective(D);
+}
+
+void ASTStmtReader::VisitOMPDistributeSimdDirective(
+    OMPDistributeSimdDirective *D) {
+  VisitOMPLoopDirective(D);
+}
+
+void ASTStmtReader::VisitOMPTargetParallelForSimdDirective(
+    OMPTargetParallelForSimdDirective *D) {
+  VisitOMPLoopDirective(D);
+}
+
 //===----------------------------------------------------------------------===//
 // ASTReader Implementation
 //===----------------------------------------------------------------------===//
@@ -2932,6 +3229,9 @@ Stmt *ASTReader::ReadStmtFromStream(ModuleFile &F) {
     case EXPR_OBJC_BOOL_LITERAL:
       S = new (Context) ObjCBoolLiteralExpr(Empty);
       break;
+    case EXPR_OBJC_AVAILABILITY_CHECK:
+      S = new (Context) ObjCAvailabilityCheckExpr(Empty);
+      break;
     case STMT_SEH_LEAVE:
       S = new (Context) SEHLeaveStmt(Empty);
       break;
@@ -3085,6 +3385,34 @@ Stmt *ASTReader::ReadStmtFromStream(ModuleFile &F) {
           Context, Record[ASTStmtReader::NumStmtFields], Empty);
       break;
 
+    case STMT_OMP_TARGET_ENTER_DATA_DIRECTIVE:
+      S = OMPTargetEnterDataDirective::CreateEmpty(
+          Context, Record[ASTStmtReader::NumStmtFields], Empty);
+      break;
+
+    case STMT_OMP_TARGET_EXIT_DATA_DIRECTIVE:
+      S = OMPTargetExitDataDirective::CreateEmpty(
+          Context, Record[ASTStmtReader::NumStmtFields], Empty);
+      break;
+
+    case STMT_OMP_TARGET_PARALLEL_DIRECTIVE:
+      S = OMPTargetParallelDirective::CreateEmpty(
+          Context, Record[ASTStmtReader::NumStmtFields], Empty);
+      break;
+
+    case STMT_OMP_TARGET_PARALLEL_FOR_DIRECTIVE: {
+      unsigned NumClauses = Record[ASTStmtReader::NumStmtFields];
+      unsigned CollapsedNum = Record[ASTStmtReader::NumStmtFields + 1];
+      S = OMPTargetParallelForDirective::CreateEmpty(Context, NumClauses,
+                                                     CollapsedNum, Empty);
+      break;
+    }
+
+    case STMT_OMP_TARGET_UPDATE_DIRECTIVE:
+      S = OMPTargetUpdateDirective::CreateEmpty(
+          Context, Record[ASTStmtReader::NumStmtFields], Empty);
+      break;
+
     case STMT_OMP_TEAMS_DIRECTIVE:
       S = OMPTeamsDirective::CreateEmpty(
           Context, Record[ASTStmtReader::NumStmtFields], Empty);
@@ -3123,6 +3451,39 @@ Stmt *ASTReader::ReadStmtFromStream(ModuleFile &F) {
       break;
     }
 
+    case STMT_OMP_DISTRIBUTE_PARALLEL_FOR_DIRECTIVE: {
+      unsigned NumClauses = Record[ASTStmtReader::NumStmtFields];
+      unsigned CollapsedNum = Record[ASTStmtReader::NumStmtFields + 1];
+      S = OMPDistributeParallelForDirective::CreateEmpty(Context, NumClauses,
+                                                         CollapsedNum, Empty);
+      break;
+    }
+
+    case STMT_OMP_DISTRIBUTE_PARALLEL_FOR_SIMD_DIRECTIVE: {
+      unsigned NumClauses = Record[ASTStmtReader::NumStmtFields];
+      unsigned CollapsedNum = Record[ASTStmtReader::NumStmtFields + 1];
+      S = OMPDistributeParallelForSimdDirective::CreateEmpty(Context, NumClauses,
+                                                             CollapsedNum,
+                                                             Empty);
+      break;
+    }
+
+    case STMT_OMP_DISTRIBUTE_SIMD_DIRECTIVE: {
+      unsigned NumClauses = Record[ASTStmtReader::NumStmtFields];
+      unsigned CollapsedNum = Record[ASTStmtReader::NumStmtFields + 1];
+      S = OMPDistributeSimdDirective::CreateEmpty(Context, NumClauses,
+                                                  CollapsedNum, Empty);
+      break;
+    }
+
+    case STMT_OMP_TARGET_PARALLEL_FOR_SIMD_DIRECTIVE: {
+      unsigned NumClauses = Record[ASTStmtReader::NumStmtFields];
+      unsigned CollapsedNum = Record[ASTStmtReader::NumStmtFields + 1];
+      S = OMPTargetParallelForSimdDirective::CreateEmpty(Context, NumClauses,
+                                                         CollapsedNum, Empty);
+      break;
+    }
+
     case EXPR_CXX_OPERATOR_CALL:
       S = new (Context) CXXOperatorCallExpr(Context, Empty);
       break;
@@ -3135,6 +3496,10 @@ Stmt *ASTReader::ReadStmtFromStream(ModuleFile &F) {
       S = new (Context) CXXConstructExpr(Empty);
       break;
 
+    case EXPR_CXX_INHERITED_CTOR_INIT:
+      S = new (Context) CXXInheritedCtorInitExpr(Empty);
+      break;
+
     case EXPR_CXX_TEMPORARY_OBJECT:
       S = new (Context) CXXTemporaryObjectExpr(Empty);
       break;
diff --git a/contrib/llvm/tools/clang/lib/Serialization/ASTWriter.cpp b/contrib/llvm/tools/clang/lib/Serialization/ASTWriter.cpp
index ec04cd6c1fa9..7589b0c5dd52 100644
--- a/contrib/llvm/tools/clang/lib/Serialization/ASTWriter.cpp
+++ b/contrib/llvm/tools/clang/lib/Serialization/ASTWriter.cpp
@@ -50,6 +50,7 @@
 #include "llvm/ADT/Hashing.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/Bitcode/BitstreamWriter.h"
+#include "llvm/Support/Compression.h"
 #include "llvm/Support/EndianStream.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/MemoryBuffer.h"
@@ -81,19 +82,42 @@ static StringRef bytes(const SmallVectorImpl<T> &v) {
 // Type serialization
 //===----------------------------------------------------------------------===//
 
-namespace {
+namespace clang {
   class ASTTypeWriter {
     ASTWriter &Writer;
-    ASTWriter::RecordDataImpl &Record;
+    ASTRecordWriter Record;
 
-  public:
     /// \brief Type code that corresponds to the record generated.
     TypeCode Code;
     /// \brief Abbreviation to use for the record, if any.
     unsigned AbbrevToUse;
 
+  public:
     ASTTypeWriter(ASTWriter &Writer, ASTWriter::RecordDataImpl &Record)
-      : Writer(Writer), Record(Record), Code(TYPE_EXT_QUAL) { }
+      : Writer(Writer), Record(Writer, Record), Code((TypeCode)0), AbbrevToUse(0) { }
+
+    uint64_t Emit() {
+      return Record.Emit(Code, AbbrevToUse);
+    }
+
+    void Visit(QualType T) {
+      if (T.hasLocalNonFastQualifiers()) {
+        Qualifiers Qs = T.getLocalQualifiers();
+        Record.AddTypeRef(T.getLocalUnqualifiedType());
+        Record.push_back(Qs.getAsOpaqueValue());
+        Code = TYPE_EXT_QUAL;
+        AbbrevToUse = Writer.TypeExtQualAbbrev;
+      } else {
+        switch (T->getTypeClass()) {
+          // For all of the concrete, non-dependent types, call the
+          // appropriate visitor function.
+#define TYPE(Class, Base) \
+        case Type::Class: Visit##Class##Type(cast<Class##Type>(T)); break;
+#define ABSTRACT_TYPE(Class, Base)
+#include "clang/AST/TypeNodes.def"
+        }
+      }
+    }
 
     void VisitArrayType(const ArrayType *T);
     void VisitFunctionType(const FunctionType *T);
@@ -103,64 +127,64 @@ namespace {
 #define ABSTRACT_TYPE(Class, Base)
 #include "clang/AST/TypeNodes.def"
   };
-} // end anonymous namespace
+} // end namespace clang
 
 void ASTTypeWriter::VisitBuiltinType(const BuiltinType *T) {
   llvm_unreachable("Built-in types are never serialized");
 }
 
 void ASTTypeWriter::VisitComplexType(const ComplexType *T) {
-  Writer.AddTypeRef(T->getElementType(), Record);
+  Record.AddTypeRef(T->getElementType());
   Code = TYPE_COMPLEX;
 }
 
 void ASTTypeWriter::VisitPointerType(const PointerType *T) {
-  Writer.AddTypeRef(T->getPointeeType(), Record);
+  Record.AddTypeRef(T->getPointeeType());
   Code = TYPE_POINTER;
 }
 
 void ASTTypeWriter::VisitDecayedType(const DecayedType *T) {
-  Writer.AddTypeRef(T->getOriginalType(), Record);
+  Record.AddTypeRef(T->getOriginalType());
   Code = TYPE_DECAYED;
 }
 
 void ASTTypeWriter::VisitAdjustedType(const AdjustedType *T) {
-  Writer.AddTypeRef(T->getOriginalType(), Record);
-  Writer.AddTypeRef(T->getAdjustedType(), Record);
+  Record.AddTypeRef(T->getOriginalType());
+  Record.AddTypeRef(T->getAdjustedType());
   Code = TYPE_ADJUSTED;
 }
 
 void ASTTypeWriter::VisitBlockPointerType(const BlockPointerType *T) {
-  Writer.AddTypeRef(T->getPointeeType(), Record);
+  Record.AddTypeRef(T->getPointeeType());
   Code = TYPE_BLOCK_POINTER;
 }
 
 void ASTTypeWriter::VisitLValueReferenceType(const LValueReferenceType *T) {
-  Writer.AddTypeRef(T->getPointeeTypeAsWritten(), Record);
+  Record.AddTypeRef(T->getPointeeTypeAsWritten());
   Record.push_back(T->isSpelledAsLValue());
   Code = TYPE_LVALUE_REFERENCE;
 }
 
 void ASTTypeWriter::VisitRValueReferenceType(const RValueReferenceType *T) {
-  Writer.AddTypeRef(T->getPointeeTypeAsWritten(), Record);
+  Record.AddTypeRef(T->getPointeeTypeAsWritten());
   Code = TYPE_RVALUE_REFERENCE;
 }
 
 void ASTTypeWriter::VisitMemberPointerType(const MemberPointerType *T) {
-  Writer.AddTypeRef(T->getPointeeType(), Record);
-  Writer.AddTypeRef(QualType(T->getClass(), 0), Record);
+  Record.AddTypeRef(T->getPointeeType());
+  Record.AddTypeRef(QualType(T->getClass(), 0));
   Code = TYPE_MEMBER_POINTER;
 }
 
 void ASTTypeWriter::VisitArrayType(const ArrayType *T) {
-  Writer.AddTypeRef(T->getElementType(), Record);
+  Record.AddTypeRef(T->getElementType());
   Record.push_back(T->getSizeModifier()); // FIXME: stable values
   Record.push_back(T->getIndexTypeCVRQualifiers()); // FIXME: stable values
 }
 
 void ASTTypeWriter::VisitConstantArrayType(const ConstantArrayType *T) {
   VisitArrayType(T);
-  Writer.AddAPInt(T->getSize(), Record);
+  Record.AddAPInt(T->getSize());
   Code = TYPE_CONSTANT_ARRAY;
 }
 
@@ -171,14 +195,14 @@ void ASTTypeWriter::VisitIncompleteArrayType(const IncompleteArrayType *T) {
 
 void ASTTypeWriter::VisitVariableArrayType(const VariableArrayType *T) {
   VisitArrayType(T);
-  Writer.AddSourceLocation(T->getLBracketLoc(), Record);
-  Writer.AddSourceLocation(T->getRBracketLoc(), Record);
-  Writer.AddStmt(T->getSizeExpr());
+  Record.AddSourceLocation(T->getLBracketLoc());
+  Record.AddSourceLocation(T->getRBracketLoc());
+  Record.AddStmt(T->getSizeExpr());
   Code = TYPE_VARIABLE_ARRAY;
 }
 
 void ASTTypeWriter::VisitVectorType(const VectorType *T) {
-  Writer.AddTypeRef(T->getElementType(), Record);
+  Record.AddTypeRef(T->getElementType());
   Record.push_back(T->getNumElements());
   Record.push_back(T->getVectorKind());
   Code = TYPE_VECTOR;
@@ -190,7 +214,7 @@ void ASTTypeWriter::VisitExtVectorType(const ExtVectorType *T) {
 }
 
 void ASTTypeWriter::VisitFunctionType(const FunctionType *T) {
-  Writer.AddTypeRef(T->getReturnType(), Record);
+  Record.AddTypeRef(T->getReturnType());
   FunctionType::ExtInfo C = T->getExtInfo();
   Record.push_back(C.getNoReturn());
   Record.push_back(C.getHasRegParm());
@@ -208,20 +232,20 @@ void ASTTypeWriter::VisitFunctionNoProtoType(const FunctionNoProtoType *T) {
   Code = TYPE_FUNCTION_NO_PROTO;
 }
 
-static void addExceptionSpec(ASTWriter &Writer, const FunctionProtoType *T,
-                             ASTWriter::RecordDataImpl &Record) {
+static void addExceptionSpec(const FunctionProtoType *T,
+                             ASTRecordWriter &Record) {
   Record.push_back(T->getExceptionSpecType());
   if (T->getExceptionSpecType() == EST_Dynamic) {
     Record.push_back(T->getNumExceptions());
     for (unsigned I = 0, N = T->getNumExceptions(); I != N; ++I)
-      Writer.AddTypeRef(T->getExceptionType(I), Record);
+      Record.AddTypeRef(T->getExceptionType(I));
   } else if (T->getExceptionSpecType() == EST_ComputedNoexcept) {
-    Writer.AddStmt(T->getNoexceptExpr());
+    Record.AddStmt(T->getNoexceptExpr());
   } else if (T->getExceptionSpecType() == EST_Uninstantiated) {
-    Writer.AddDeclRef(T->getExceptionSpecDecl(), Record);
-    Writer.AddDeclRef(T->getExceptionSpecTemplate(), Record);
+    Record.AddDeclRef(T->getExceptionSpecDecl());
+    Record.AddDeclRef(T->getExceptionSpecTemplate());
   } else if (T->getExceptionSpecType() == EST_Unevaluated) {
-    Writer.AddDeclRef(T->getExceptionSpecDecl(), Record);
+    Record.AddDeclRef(T->getExceptionSpecDecl());
   }
 }
 
@@ -232,56 +256,62 @@ void ASTTypeWriter::VisitFunctionProtoType(const FunctionProtoType *T) {
   Record.push_back(T->hasTrailingReturn());
   Record.push_back(T->getTypeQuals());
   Record.push_back(static_cast<unsigned>(T->getRefQualifier()));
-  addExceptionSpec(Writer, T, Record);
+  addExceptionSpec(T, Record);
 
   Record.push_back(T->getNumParams());
   for (unsigned I = 0, N = T->getNumParams(); I != N; ++I)
-    Writer.AddTypeRef(T->getParamType(I), Record);
+    Record.AddTypeRef(T->getParamType(I));
+
+  if (T->hasExtParameterInfos()) {
+    for (unsigned I = 0, N = T->getNumParams(); I != N; ++I)
+      Record.push_back(T->getExtParameterInfo(I).getOpaqueValue());
+  }
 
   if (T->isVariadic() || T->hasTrailingReturn() || T->getTypeQuals() ||
-      T->getRefQualifier() || T->getExceptionSpecType() != EST_None)
+      T->getRefQualifier() || T->getExceptionSpecType() != EST_None ||
+      T->hasExtParameterInfos())
     AbbrevToUse = 0;
 
   Code = TYPE_FUNCTION_PROTO;
 }
 
 void ASTTypeWriter::VisitUnresolvedUsingType(const UnresolvedUsingType *T) {
-  Writer.AddDeclRef(T->getDecl(), Record);
+  Record.AddDeclRef(T->getDecl());
   Code = TYPE_UNRESOLVED_USING;
 }
 
 void ASTTypeWriter::VisitTypedefType(const TypedefType *T) {
-  Writer.AddDeclRef(T->getDecl(), Record);
+  Record.AddDeclRef(T->getDecl());
   assert(!T->isCanonicalUnqualified() && "Invalid typedef ?");
-  Writer.AddTypeRef(T->getCanonicalTypeInternal(), Record);
+  Record.AddTypeRef(T->getCanonicalTypeInternal());
   Code = TYPE_TYPEDEF;
 }
 
 void ASTTypeWriter::VisitTypeOfExprType(const TypeOfExprType *T) {
-  Writer.AddStmt(T->getUnderlyingExpr());
+  Record.AddStmt(T->getUnderlyingExpr());
   Code = TYPE_TYPEOF_EXPR;
 }
 
 void ASTTypeWriter::VisitTypeOfType(const TypeOfType *T) {
-  Writer.AddTypeRef(T->getUnderlyingType(), Record);
+  Record.AddTypeRef(T->getUnderlyingType());
   Code = TYPE_TYPEOF;
 }
 
 void ASTTypeWriter::VisitDecltypeType(const DecltypeType *T) {
-  Writer.AddTypeRef(T->getUnderlyingType(), Record);
-  Writer.AddStmt(T->getUnderlyingExpr());
+  Record.AddTypeRef(T->getUnderlyingType());
+  Record.AddStmt(T->getUnderlyingExpr());
   Code = TYPE_DECLTYPE;
 }
 
 void ASTTypeWriter::VisitUnaryTransformType(const UnaryTransformType *T) {
-  Writer.AddTypeRef(T->getBaseType(), Record);
-  Writer.AddTypeRef(T->getUnderlyingType(), Record);
+  Record.AddTypeRef(T->getBaseType());
+  Record.AddTypeRef(T->getUnderlyingType());
   Record.push_back(T->getUTTKind());
   Code = TYPE_UNARY_TRANSFORM;
 }
 
 void ASTTypeWriter::VisitAutoType(const AutoType *T) {
-  Writer.AddTypeRef(T->getDeducedType(), Record);
+  Record.AddTypeRef(T->getDeducedType());
   Record.push_back((unsigned)T->getKeyword());
   if (T->getDeducedType().isNull())
     Record.push_back(T->isDependentType());
@@ -290,7 +320,7 @@ void ASTTypeWriter::VisitAutoType(const AutoType *T) {
 
 void ASTTypeWriter::VisitTagType(const TagType *T) {
   Record.push_back(T->isDependentType());
-  Writer.AddDeclRef(T->getDecl()->getCanonicalDecl(), Record);
+  Record.AddDeclRef(T->getDecl()->getCanonicalDecl());
   assert(!T->isBeingDefined() &&
          "Cannot serialize in the middle of a type definition");
 }
@@ -306,8 +336,8 @@ void ASTTypeWriter::VisitEnumType(const EnumType *T) {
 }
 
 void ASTTypeWriter::VisitAttributedType(const AttributedType *T) {
-  Writer.AddTypeRef(T->getModifiedType(), Record);
-  Writer.AddTypeRef(T->getEquivalentType(), Record);
+  Record.AddTypeRef(T->getModifiedType());
+  Record.AddTypeRef(T->getEquivalentType());
   Record.push_back(T->getAttrKind());
   Code = TYPE_ATTRIBUTED;
 }
@@ -315,16 +345,16 @@ void ASTTypeWriter::VisitAttributedType(const AttributedType *T) {
 void
 ASTTypeWriter::VisitSubstTemplateTypeParmType(
                                         const SubstTemplateTypeParmType *T) {
-  Writer.AddTypeRef(QualType(T->getReplacedParameter(), 0), Record);
-  Writer.AddTypeRef(T->getReplacementType(), Record);
+  Record.AddTypeRef(QualType(T->getReplacedParameter(), 0));
+  Record.AddTypeRef(T->getReplacementType());
   Code = TYPE_SUBST_TEMPLATE_TYPE_PARM;
 }
 
 void
 ASTTypeWriter::VisitSubstTemplateTypeParmPackType(
                                       const SubstTemplateTypeParmPackType *T) {
-  Writer.AddTypeRef(QualType(T->getReplacedParameter(), 0), Record);
-  Writer.AddTemplateArgument(T->getArgumentPack(), Record);
+  Record.AddTypeRef(QualType(T->getReplacedParameter(), 0));
+  Record.AddTemplateArgument(T->getArgumentPack());
   Code = TYPE_SUBST_TEMPLATE_TYPE_PARM_PACK;
 }
 
@@ -332,22 +362,22 @@ void
 ASTTypeWriter::VisitTemplateSpecializationType(
                                        const TemplateSpecializationType *T) {
   Record.push_back(T->isDependentType());
-  Writer.AddTemplateName(T->getTemplateName(), Record);
+  Record.AddTemplateName(T->getTemplateName());
   Record.push_back(T->getNumArgs());
   for (const auto &ArgI : *T)
-    Writer.AddTemplateArgument(ArgI, Record);
-  Writer.AddTypeRef(T->isTypeAlias() ? T->getAliasedType() :
-                    T->isCanonicalUnqualified() ? QualType()
-                                                : T->getCanonicalTypeInternal(),
-                    Record);
+    Record.AddTemplateArgument(ArgI);
+  Record.AddTypeRef(T->isTypeAlias() ? T->getAliasedType()
+                                     : T->isCanonicalUnqualified()
+                                           ? QualType()
+                                           : T->getCanonicalTypeInternal());
   Code = TYPE_TEMPLATE_SPECIALIZATION;
 }
 
 void
 ASTTypeWriter::VisitDependentSizedArrayType(const DependentSizedArrayType *T) {
   VisitArrayType(T);
-  Writer.AddStmt(T->getSizeExpr());
-  Writer.AddSourceRange(T->getBracketsRange(), Record);
+  Record.AddStmt(T->getSizeExpr());
+  Record.AddSourceRange(T->getBracketsRange());
   Code = TYPE_DEPENDENT_SIZED_ARRAY;
 }
 
@@ -363,18 +393,17 @@ ASTTypeWriter::VisitTemplateTypeParmType(const TemplateTypeParmType *T) {
   Record.push_back(T->getDepth());
   Record.push_back(T->getIndex());
   Record.push_back(T->isParameterPack());
-  Writer.AddDeclRef(T->getDecl(), Record);
+  Record.AddDeclRef(T->getDecl());
   Code = TYPE_TEMPLATE_TYPE_PARM;
 }
 
 void
 ASTTypeWriter::VisitDependentNameType(const DependentNameType *T) {
   Record.push_back(T->getKeyword());
-  Writer.AddNestedNameSpecifier(T->getQualifier(), Record);
-  Writer.AddIdentifierRef(T->getIdentifier(), Record);
-  Writer.AddTypeRef(T->isCanonicalUnqualified() ? QualType()
-                                                : T->getCanonicalTypeInternal(),
-                    Record);
+  Record.AddNestedNameSpecifier(T->getQualifier());
+  Record.AddIdentifierRef(T->getIdentifier());
+  Record.AddTypeRef(
+      T->isCanonicalUnqualified() ? QualType() : T->getCanonicalTypeInternal());
   Code = TYPE_DEPENDENT_NAME;
 }
 
@@ -382,16 +411,16 @@ void
 ASTTypeWriter::VisitDependentTemplateSpecializationType(
                                 const DependentTemplateSpecializationType *T) {
   Record.push_back(T->getKeyword());
-  Writer.AddNestedNameSpecifier(T->getQualifier(), Record);
-  Writer.AddIdentifierRef(T->getIdentifier(), Record);
+  Record.AddNestedNameSpecifier(T->getQualifier());
+  Record.AddIdentifierRef(T->getIdentifier());
   Record.push_back(T->getNumArgs());
   for (const auto &I : *T)
-    Writer.AddTemplateArgument(I, Record);
+    Record.AddTemplateArgument(I);
   Code = TYPE_DEPENDENT_TEMPLATE_SPECIALIZATION;
 }
 
 void ASTTypeWriter::VisitPackExpansionType(const PackExpansionType *T) {
-  Writer.AddTypeRef(T->getPattern(), Record);
+  Record.AddTypeRef(T->getPattern());
   if (Optional<unsigned> NumExpansions = T->getNumExpansions())
     Record.push_back(*NumExpansions + 1);
   else
@@ -400,67 +429,66 @@ void ASTTypeWriter::VisitPackExpansionType(const PackExpansionType *T) {
 }
 
 void ASTTypeWriter::VisitParenType(const ParenType *T) {
-  Writer.AddTypeRef(T->getInnerType(), Record);
+  Record.AddTypeRef(T->getInnerType());
   Code = TYPE_PAREN;
 }
 
 void ASTTypeWriter::VisitElaboratedType(const ElaboratedType *T) {
   Record.push_back(T->getKeyword());
-  Writer.AddNestedNameSpecifier(T->getQualifier(), Record);
-  Writer.AddTypeRef(T->getNamedType(), Record);
+  Record.AddNestedNameSpecifier(T->getQualifier());
+  Record.AddTypeRef(T->getNamedType());
   Code = TYPE_ELABORATED;
 }
 
 void ASTTypeWriter::VisitInjectedClassNameType(const InjectedClassNameType *T) {
-  Writer.AddDeclRef(T->getDecl()->getCanonicalDecl(), Record);
-  Writer.AddTypeRef(T->getInjectedSpecializationType(), Record);
+  Record.AddDeclRef(T->getDecl()->getCanonicalDecl());
+  Record.AddTypeRef(T->getInjectedSpecializationType());
   Code = TYPE_INJECTED_CLASS_NAME;
 }
 
 void ASTTypeWriter::VisitObjCInterfaceType(const ObjCInterfaceType *T) {
-  Writer.AddDeclRef(T->getDecl()->getCanonicalDecl(), Record);
+  Record.AddDeclRef(T->getDecl()->getCanonicalDecl());
   Code = TYPE_OBJC_INTERFACE;
 }
 
 void ASTTypeWriter::VisitObjCObjectType(const ObjCObjectType *T) {
-  Writer.AddTypeRef(T->getBaseType(), Record);
+  Record.AddTypeRef(T->getBaseType());
   Record.push_back(T->getTypeArgsAsWritten().size());
   for (auto TypeArg : T->getTypeArgsAsWritten())
-    Writer.AddTypeRef(TypeArg, Record);
+    Record.AddTypeRef(TypeArg);
   Record.push_back(T->getNumProtocols());
   for (const auto *I : T->quals())
-    Writer.AddDeclRef(I, Record);
+    Record.AddDeclRef(I);
   Record.push_back(T->isKindOfTypeAsWritten());
   Code = TYPE_OBJC_OBJECT;
 }
 
 void
 ASTTypeWriter::VisitObjCObjectPointerType(const ObjCObjectPointerType *T) {
-  Writer.AddTypeRef(T->getPointeeType(), Record);
+  Record.AddTypeRef(T->getPointeeType());
   Code = TYPE_OBJC_OBJECT_POINTER;
 }
 
 void
 ASTTypeWriter::VisitAtomicType(const AtomicType *T) {
-  Writer.AddTypeRef(T->getValueType(), Record);
+  Record.AddTypeRef(T->getValueType());
   Code = TYPE_ATOMIC;
 }
 
 void
 ASTTypeWriter::VisitPipeType(const PipeType *T) {
-  Writer.AddTypeRef(T->getElementType(), Record);
+  Record.AddTypeRef(T->getElementType());
   Code = TYPE_PIPE;
 }
 
 namespace {
 
 class TypeLocWriter : public TypeLocVisitor<TypeLocWriter> {
-  ASTWriter &Writer;
-  ASTWriter::RecordDataImpl &Record;
+  ASTRecordWriter &Record;
 
 public:
-  TypeLocWriter(ASTWriter &Writer, ASTWriter::RecordDataImpl &Record)
-    : Writer(Writer), Record(Record) { }
+  TypeLocWriter(ASTRecordWriter &Record)
+    : Record(Record) { }
 
 #define ABSTRACT_TYPELOC(CLASS, PARENT)
 #define TYPELOC(CLASS, PARENT) \
@@ -477,7 +505,7 @@ void TypeLocWriter::VisitQualifiedTypeLoc(QualifiedTypeLoc TL) {
   // nothing to do
 }
 void TypeLocWriter::VisitBuiltinTypeLoc(BuiltinTypeLoc TL) {
-  Writer.AddSourceLocation(TL.getBuiltinLoc(), Record);
+  Record.AddSourceLocation(TL.getBuiltinLoc());
   if (TL.needsExtraLocalData()) {
     Record.push_back(TL.getWrittenTypeSpec());
     Record.push_back(TL.getWrittenSignSpec());
@@ -486,10 +514,10 @@ void TypeLocWriter::VisitBuiltinTypeLoc(BuiltinTypeLoc TL) {
   }
 }
 void TypeLocWriter::VisitComplexTypeLoc(ComplexTypeLoc TL) {
-  Writer.AddSourceLocation(TL.getNameLoc(), Record);
+  Record.AddSourceLocation(TL.getNameLoc());
 }
 void TypeLocWriter::VisitPointerTypeLoc(PointerTypeLoc TL) {
-  Writer.AddSourceLocation(TL.getStarLoc(), Record);
+  Record.AddSourceLocation(TL.getStarLoc());
 }
 void TypeLocWriter::VisitDecayedTypeLoc(DecayedTypeLoc TL) {
   // nothing to do
@@ -498,24 +526,24 @@ void TypeLocWriter::VisitAdjustedTypeLoc(AdjustedTypeLoc TL) {
   // nothing to do
 }
 void TypeLocWriter::VisitBlockPointerTypeLoc(BlockPointerTypeLoc TL) {
-  Writer.AddSourceLocation(TL.getCaretLoc(), Record);
+  Record.AddSourceLocation(TL.getCaretLoc());
 }
 void TypeLocWriter::VisitLValueReferenceTypeLoc(LValueReferenceTypeLoc TL) {
-  Writer.AddSourceLocation(TL.getAmpLoc(), Record);
+  Record.AddSourceLocation(TL.getAmpLoc());
 }
 void TypeLocWriter::VisitRValueReferenceTypeLoc(RValueReferenceTypeLoc TL) {
-  Writer.AddSourceLocation(TL.getAmpAmpLoc(), Record);
+  Record.AddSourceLocation(TL.getAmpAmpLoc());
 }
 void TypeLocWriter::VisitMemberPointerTypeLoc(MemberPointerTypeLoc TL) {
-  Writer.AddSourceLocation(TL.getStarLoc(), Record);
-  Writer.AddTypeSourceInfo(TL.getClassTInfo(), Record);
+  Record.AddSourceLocation(TL.getStarLoc());
+  Record.AddTypeSourceInfo(TL.getClassTInfo());
 }
 void TypeLocWriter::VisitArrayTypeLoc(ArrayTypeLoc TL) {
-  Writer.AddSourceLocation(TL.getLBracketLoc(), Record);
-  Writer.AddSourceLocation(TL.getRBracketLoc(), Record);
+  Record.AddSourceLocation(TL.getLBracketLoc());
+  Record.AddSourceLocation(TL.getRBracketLoc());
   Record.push_back(TL.getSizeExpr() ? 1 : 0);
   if (TL.getSizeExpr())
-    Writer.AddStmt(TL.getSizeExpr());
+    Record.AddStmt(TL.getSizeExpr());
 }
 void TypeLocWriter::VisitConstantArrayTypeLoc(ConstantArrayTypeLoc TL) {
   VisitArrayTypeLoc(TL);
@@ -532,21 +560,21 @@ void TypeLocWriter::VisitDependentSizedArrayTypeLoc(
 }
 void TypeLocWriter::VisitDependentSizedExtVectorTypeLoc(
                                         DependentSizedExtVectorTypeLoc TL) {
-  Writer.AddSourceLocation(TL.getNameLoc(), Record);
+  Record.AddSourceLocation(TL.getNameLoc());
 }
 void TypeLocWriter::VisitVectorTypeLoc(VectorTypeLoc TL) {
-  Writer.AddSourceLocation(TL.getNameLoc(), Record);
+  Record.AddSourceLocation(TL.getNameLoc());
 }
 void TypeLocWriter::VisitExtVectorTypeLoc(ExtVectorTypeLoc TL) {
-  Writer.AddSourceLocation(TL.getNameLoc(), Record);
+  Record.AddSourceLocation(TL.getNameLoc());
 }
 void TypeLocWriter::VisitFunctionTypeLoc(FunctionTypeLoc TL) {
-  Writer.AddSourceLocation(TL.getLocalRangeBegin(), Record);
-  Writer.AddSourceLocation(TL.getLParenLoc(), Record);
-  Writer.AddSourceLocation(TL.getRParenLoc(), Record);
-  Writer.AddSourceLocation(TL.getLocalRangeEnd(), Record);
+  Record.AddSourceLocation(TL.getLocalRangeBegin());
+  Record.AddSourceLocation(TL.getLParenLoc());
+  Record.AddSourceLocation(TL.getRParenLoc());
+  Record.AddSourceLocation(TL.getLocalRangeEnd());
   for (unsigned i = 0, e = TL.getNumParams(); i != e; ++i)
-    Writer.AddDeclRef(TL.getParam(i), Record);
+    Record.AddDeclRef(TL.getParam(i));
 }
 void TypeLocWriter::VisitFunctionProtoTypeLoc(FunctionProtoTypeLoc TL) {
   VisitFunctionTypeLoc(TL);
@@ -555,131 +583,131 @@ void TypeLocWriter::VisitFunctionNoProtoTypeLoc(FunctionNoProtoTypeLoc TL) {
   VisitFunctionTypeLoc(TL);
 }
 void TypeLocWriter::VisitUnresolvedUsingTypeLoc(UnresolvedUsingTypeLoc TL) {
-  Writer.AddSourceLocation(TL.getNameLoc(), Record);
+  Record.AddSourceLocation(TL.getNameLoc());
 }
 void TypeLocWriter::VisitTypedefTypeLoc(TypedefTypeLoc TL) {
-  Writer.AddSourceLocation(TL.getNameLoc(), Record);
+  Record.AddSourceLocation(TL.getNameLoc());
 }
 void TypeLocWriter::VisitTypeOfExprTypeLoc(TypeOfExprTypeLoc TL) {
-  Writer.AddSourceLocation(TL.getTypeofLoc(), Record);
-  Writer.AddSourceLocation(TL.getLParenLoc(), Record);
-  Writer.AddSourceLocation(TL.getRParenLoc(), Record);
+  Record.AddSourceLocation(TL.getTypeofLoc());
+  Record.AddSourceLocation(TL.getLParenLoc());
+  Record.AddSourceLocation(TL.getRParenLoc());
 }
 void TypeLocWriter::VisitTypeOfTypeLoc(TypeOfTypeLoc TL) {
-  Writer.AddSourceLocation(TL.getTypeofLoc(), Record);
-  Writer.AddSourceLocation(TL.getLParenLoc(), Record);
-  Writer.AddSourceLocation(TL.getRParenLoc(), Record);
-  Writer.AddTypeSourceInfo(TL.getUnderlyingTInfo(), Record);
+  Record.AddSourceLocation(TL.getTypeofLoc());
+  Record.AddSourceLocation(TL.getLParenLoc());
+  Record.AddSourceLocation(TL.getRParenLoc());
+  Record.AddTypeSourceInfo(TL.getUnderlyingTInfo());
 }
 void TypeLocWriter::VisitDecltypeTypeLoc(DecltypeTypeLoc TL) {
-  Writer.AddSourceLocation(TL.getNameLoc(), Record);
+  Record.AddSourceLocation(TL.getNameLoc());
 }
 void TypeLocWriter::VisitUnaryTransformTypeLoc(UnaryTransformTypeLoc TL) {
-  Writer.AddSourceLocation(TL.getKWLoc(), Record);
-  Writer.AddSourceLocation(TL.getLParenLoc(), Record);
-  Writer.AddSourceLocation(TL.getRParenLoc(), Record);
-  Writer.AddTypeSourceInfo(TL.getUnderlyingTInfo(), Record);
+  Record.AddSourceLocation(TL.getKWLoc());
+  Record.AddSourceLocation(TL.getLParenLoc());
+  Record.AddSourceLocation(TL.getRParenLoc());
+  Record.AddTypeSourceInfo(TL.getUnderlyingTInfo());
 }
 void TypeLocWriter::VisitAutoTypeLoc(AutoTypeLoc TL) {
-  Writer.AddSourceLocation(TL.getNameLoc(), Record);
+  Record.AddSourceLocation(TL.getNameLoc());
 }
 void TypeLocWriter::VisitRecordTypeLoc(RecordTypeLoc TL) {
-  Writer.AddSourceLocation(TL.getNameLoc(), Record);
+  Record.AddSourceLocation(TL.getNameLoc());
 }
 void TypeLocWriter::VisitEnumTypeLoc(EnumTypeLoc TL) {
-  Writer.AddSourceLocation(TL.getNameLoc(), Record);
+  Record.AddSourceLocation(TL.getNameLoc());
 }
 void TypeLocWriter::VisitAttributedTypeLoc(AttributedTypeLoc TL) {
-  Writer.AddSourceLocation(TL.getAttrNameLoc(), Record);
+  Record.AddSourceLocation(TL.getAttrNameLoc());
   if (TL.hasAttrOperand()) {
     SourceRange range = TL.getAttrOperandParensRange();
-    Writer.AddSourceLocation(range.getBegin(), Record);
-    Writer.AddSourceLocation(range.getEnd(), Record);
+    Record.AddSourceLocation(range.getBegin());
+    Record.AddSourceLocation(range.getEnd());
   }
   if (TL.hasAttrExprOperand()) {
     Expr *operand = TL.getAttrExprOperand();
     Record.push_back(operand ? 1 : 0);
-    if (operand) Writer.AddStmt(operand);
+    if (operand) Record.AddStmt(operand);
   } else if (TL.hasAttrEnumOperand()) {
-    Writer.AddSourceLocation(TL.getAttrEnumOperandLoc(), Record);
+    Record.AddSourceLocation(TL.getAttrEnumOperandLoc());
   }
 }
 void TypeLocWriter::VisitTemplateTypeParmTypeLoc(TemplateTypeParmTypeLoc TL) {
-  Writer.AddSourceLocation(TL.getNameLoc(), Record);
+  Record.AddSourceLocation(TL.getNameLoc());
 }
 void TypeLocWriter::VisitSubstTemplateTypeParmTypeLoc(
                                             SubstTemplateTypeParmTypeLoc TL) {
-  Writer.AddSourceLocation(TL.getNameLoc(), Record);
+  Record.AddSourceLocation(TL.getNameLoc());
 }
 void TypeLocWriter::VisitSubstTemplateTypeParmPackTypeLoc(
                                           SubstTemplateTypeParmPackTypeLoc TL) {
-  Writer.AddSourceLocation(TL.getNameLoc(), Record);
+  Record.AddSourceLocation(TL.getNameLoc());
 }
 void TypeLocWriter::VisitTemplateSpecializationTypeLoc(
                                            TemplateSpecializationTypeLoc TL) {
-  Writer.AddSourceLocation(TL.getTemplateKeywordLoc(), Record);
-  Writer.AddSourceLocation(TL.getTemplateNameLoc(), Record);
-  Writer.AddSourceLocation(TL.getLAngleLoc(), Record);
-  Writer.AddSourceLocation(TL.getRAngleLoc(), Record);
+  Record.AddSourceLocation(TL.getTemplateKeywordLoc());
+  Record.AddSourceLocation(TL.getTemplateNameLoc());
+  Record.AddSourceLocation(TL.getLAngleLoc());
+  Record.AddSourceLocation(TL.getRAngleLoc());
   for (unsigned i = 0, e = TL.getNumArgs(); i != e; ++i)
-    Writer.AddTemplateArgumentLocInfo(TL.getArgLoc(i).getArgument().getKind(),
-                                      TL.getArgLoc(i).getLocInfo(), Record);
+    Record.AddTemplateArgumentLocInfo(TL.getArgLoc(i).getArgument().getKind(),
+                                      TL.getArgLoc(i).getLocInfo());
 }
 void TypeLocWriter::VisitParenTypeLoc(ParenTypeLoc TL) {
-  Writer.AddSourceLocation(TL.getLParenLoc(), Record);
-  Writer.AddSourceLocation(TL.getRParenLoc(), Record);
+  Record.AddSourceLocation(TL.getLParenLoc());
+  Record.AddSourceLocation(TL.getRParenLoc());
 }
 void TypeLocWriter::VisitElaboratedTypeLoc(ElaboratedTypeLoc TL) {
-  Writer.AddSourceLocation(TL.getElaboratedKeywordLoc(), Record);
-  Writer.AddNestedNameSpecifierLoc(TL.getQualifierLoc(), Record);
+  Record.AddSourceLocation(TL.getElaboratedKeywordLoc());
+  Record.AddNestedNameSpecifierLoc(TL.getQualifierLoc());
 }
 void TypeLocWriter::VisitInjectedClassNameTypeLoc(InjectedClassNameTypeLoc TL) {
-  Writer.AddSourceLocation(TL.getNameLoc(), Record);
+  Record.AddSourceLocation(TL.getNameLoc());
 }
 void TypeLocWriter::VisitDependentNameTypeLoc(DependentNameTypeLoc TL) {
-  Writer.AddSourceLocation(TL.getElaboratedKeywordLoc(), Record);
-  Writer.AddNestedNameSpecifierLoc(TL.getQualifierLoc(), Record);
-  Writer.AddSourceLocation(TL.getNameLoc(), Record);
+  Record.AddSourceLocation(TL.getElaboratedKeywordLoc());
+  Record.AddNestedNameSpecifierLoc(TL.getQualifierLoc());
+  Record.AddSourceLocation(TL.getNameLoc());
 }
 void TypeLocWriter::VisitDependentTemplateSpecializationTypeLoc(
        DependentTemplateSpecializationTypeLoc TL) {
-  Writer.AddSourceLocation(TL.getElaboratedKeywordLoc(), Record);
-  Writer.AddNestedNameSpecifierLoc(TL.getQualifierLoc(), Record);
-  Writer.AddSourceLocation(TL.getTemplateKeywordLoc(), Record);
-  Writer.AddSourceLocation(TL.getTemplateNameLoc(), Record);
-  Writer.AddSourceLocation(TL.getLAngleLoc(), Record);
-  Writer.AddSourceLocation(TL.getRAngleLoc(), Record);
+  Record.AddSourceLocation(TL.getElaboratedKeywordLoc());
+  Record.AddNestedNameSpecifierLoc(TL.getQualifierLoc());
+  Record.AddSourceLocation(TL.getTemplateKeywordLoc());
+  Record.AddSourceLocation(TL.getTemplateNameLoc());
+  Record.AddSourceLocation(TL.getLAngleLoc());
+  Record.AddSourceLocation(TL.getRAngleLoc());
   for (unsigned I = 0, E = TL.getNumArgs(); I != E; ++I)
-    Writer.AddTemplateArgumentLocInfo(TL.getArgLoc(I).getArgument().getKind(),
-                                      TL.getArgLoc(I).getLocInfo(), Record);
+    Record.AddTemplateArgumentLocInfo(TL.getArgLoc(I).getArgument().getKind(),
+                                      TL.getArgLoc(I).getLocInfo());
 }
 void TypeLocWriter::VisitPackExpansionTypeLoc(PackExpansionTypeLoc TL) {
-  Writer.AddSourceLocation(TL.getEllipsisLoc(), Record);
+  Record.AddSourceLocation(TL.getEllipsisLoc());
 }
 void TypeLocWriter::VisitObjCInterfaceTypeLoc(ObjCInterfaceTypeLoc TL) {
-  Writer.AddSourceLocation(TL.getNameLoc(), Record);
+  Record.AddSourceLocation(TL.getNameLoc());
 }
 void TypeLocWriter::VisitObjCObjectTypeLoc(ObjCObjectTypeLoc TL) {
   Record.push_back(TL.hasBaseTypeAsWritten());
-  Writer.AddSourceLocation(TL.getTypeArgsLAngleLoc(), Record);
-  Writer.AddSourceLocation(TL.getTypeArgsRAngleLoc(), Record);
+  Record.AddSourceLocation(TL.getTypeArgsLAngleLoc());
+  Record.AddSourceLocation(TL.getTypeArgsRAngleLoc());
   for (unsigned i = 0, e = TL.getNumTypeArgs(); i != e; ++i)
-    Writer.AddTypeSourceInfo(TL.getTypeArgTInfo(i), Record);
-  Writer.AddSourceLocation(TL.getProtocolLAngleLoc(), Record);
-  Writer.AddSourceLocation(TL.getProtocolRAngleLoc(), Record);
+    Record.AddTypeSourceInfo(TL.getTypeArgTInfo(i));
+  Record.AddSourceLocation(TL.getProtocolLAngleLoc());
+  Record.AddSourceLocation(TL.getProtocolRAngleLoc());
   for (unsigned i = 0, e = TL.getNumProtocols(); i != e; ++i)
-    Writer.AddSourceLocation(TL.getProtocolLoc(i), Record);
+    Record.AddSourceLocation(TL.getProtocolLoc(i));
 }
 void TypeLocWriter::VisitObjCObjectPointerTypeLoc(ObjCObjectPointerTypeLoc TL) {
-  Writer.AddSourceLocation(TL.getStarLoc(), Record);
+  Record.AddSourceLocation(TL.getStarLoc());
 }
 void TypeLocWriter::VisitAtomicTypeLoc(AtomicTypeLoc TL) {
-  Writer.AddSourceLocation(TL.getKWLoc(), Record);
-  Writer.AddSourceLocation(TL.getLParenLoc(), Record);
-  Writer.AddSourceLocation(TL.getRParenLoc(), Record);
+  Record.AddSourceLocation(TL.getKWLoc());
+  Record.AddSourceLocation(TL.getLParenLoc());
+  Record.AddSourceLocation(TL.getRParenLoc());
 }
 void TypeLocWriter::VisitPipeTypeLoc(PipeTypeLoc TL) {
-  Writer.AddSourceLocation(TL.getKWLoc(), Record);
+  Record.AddSourceLocation(TL.getKWLoc());
 }
 
 void ASTWriter::WriteTypeAbbrevs() {
@@ -930,11 +958,9 @@ void ASTWriter::WriteBlockInfoBlock() {
   RECORD(SEMA_DECL_REFS);
   RECORD(WEAK_UNDECLARED_IDENTIFIERS);
   RECORD(PENDING_IMPLICIT_INSTANTIATIONS);
-  RECORD(DECL_REPLACEMENTS);
   RECORD(UPDATE_VISIBLE);
   RECORD(DECL_UPDATE_OFFSETS);
   RECORD(DECL_UPDATES);
-  RECORD(CXX_BASE_SPECIFIER_OFFSETS);
   RECORD(DIAG_PRAGMA_MAPPINGS);
   RECORD(CUDA_SPECIAL_DECL_REFS);
   RECORD(HEADER_SEARCH_TABLE);
@@ -953,8 +979,9 @@ void ASTWriter::WriteBlockInfoBlock() {
   RECORD(UNDEFINED_BUT_USED);
   RECORD(LATE_PARSED_TEMPLATE);
   RECORD(OPTIMIZE_PRAGMA_OPTIONS);
+  RECORD(MSSTRUCT_PRAGMA_OPTIONS);
+  RECORD(POINTERS_TO_MEMBERS_PRAGMA_OPTIONS);
   RECORD(UNUSED_LOCAL_TYPEDEF_NAME_CANDIDATES);
-  RECORD(CXX_CTOR_INITIALIZERS_OFFSETS);
   RECORD(DELETE_EXPRS_TO_ANALYZE);
 
   // SourceManager Block.
@@ -962,6 +989,7 @@ void ASTWriter::WriteBlockInfoBlock() {
   RECORD(SM_SLOC_FILE_ENTRY);
   RECORD(SM_SLOC_BUFFER_ENTRY);
   RECORD(SM_SLOC_BUFFER_BLOB);
+  RECORD(SM_SLOC_BUFFER_BLOB_COMPRESSED);
   RECORD(SM_SLOC_EXPANSION_ENTRY);
 
   // Preprocessor Block.
@@ -1076,6 +1104,7 @@ void ASTWriter::WriteBlockInfoBlock() {
   RECORD(DECL_CXX_RECORD);
   RECORD(DECL_CXX_METHOD);
   RECORD(DECL_CXX_CONSTRUCTOR);
+  RECORD(DECL_CXX_INHERITED_CONSTRUCTOR);
   RECORD(DECL_CXX_DESTRUCTOR);
   RECORD(DECL_CXX_CONVERSION);
   RECORD(DECL_ACCESS_SPEC);
@@ -1091,10 +1120,22 @@ void ASTWriter::WriteBlockInfoBlock() {
   RECORD(DECL_TEMPLATE_TYPE_PARM);
   RECORD(DECL_NON_TYPE_TEMPLATE_PARM);
   RECORD(DECL_TEMPLATE_TEMPLATE_PARM);
+  RECORD(DECL_TYPE_ALIAS_TEMPLATE);
   RECORD(DECL_STATIC_ASSERT);
   RECORD(DECL_CXX_BASE_SPECIFIERS);
+  RECORD(DECL_CXX_CTOR_INITIALIZERS);
   RECORD(DECL_INDIRECTFIELD);
   RECORD(DECL_EXPANDED_NON_TYPE_TEMPLATE_PARM_PACK);
+  RECORD(DECL_EXPANDED_TEMPLATE_TEMPLATE_PARM_PACK);
+  RECORD(DECL_CLASS_SCOPE_FUNCTION_SPECIALIZATION);
+  RECORD(DECL_IMPORT);
+  RECORD(DECL_OMP_THREADPRIVATE);
+  RECORD(DECL_EMPTY);
+  RECORD(DECL_OBJC_TYPE_PARAM);
+  RECORD(DECL_OMP_CAPTUREDEXPR);
+  RECORD(DECL_PRAGMA_COMMENT);
+  RECORD(DECL_PRAGMA_DETECT_MISMATCH);
+  RECORD(DECL_OMP_DECLARE_REDUCTION);
   
   // Statements and Exprs can occur in the Decls and Types block.
   AddStmtsExprs(Stream, Record);
@@ -1631,11 +1672,15 @@ static unsigned CreateSLocBufferAbbrev(llvm::BitstreamWriter &Stream) {
 
 /// \brief Create an abbreviation for the SLocEntry that refers to a
 /// buffer's blob.
-static unsigned CreateSLocBufferBlobAbbrev(llvm::BitstreamWriter &Stream) {
+static unsigned CreateSLocBufferBlobAbbrev(llvm::BitstreamWriter &Stream,
+                                           bool Compressed) {
   using namespace llvm;
 
   auto *Abbrev = new BitCodeAbbrev();
-  Abbrev->Add(BitCodeAbbrevOp(SM_SLOC_BUFFER_BLOB));
+  Abbrev->Add(BitCodeAbbrevOp(Compressed ? SM_SLOC_BUFFER_BLOB_COMPRESSED
+                                         : SM_SLOC_BUFFER_BLOB));
+  if (Compressed)
+    Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // Uncompressed size
   Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // Blob
   return Stream.EmitAbbrev(Abbrev);
 }
@@ -1857,12 +1902,14 @@ void ASTWriter::WriteSourceManagerBlock(SourceManager &SourceMgr,
   RecordData Record;
 
   // Enter the source manager block.
-  Stream.EnterSubblock(SOURCE_MANAGER_BLOCK_ID, 3);
+  Stream.EnterSubblock(SOURCE_MANAGER_BLOCK_ID, 4);
 
   // Abbreviations for the various kinds of source-location entries.
   unsigned SLocFileAbbrv = CreateSLocFileAbbrev(Stream);
   unsigned SLocBufferAbbrv = CreateSLocBufferAbbrev(Stream);
-  unsigned SLocBufferBlobAbbrv = CreateSLocBufferBlobAbbrev(Stream);
+  unsigned SLocBufferBlobAbbrv = CreateSLocBufferBlobAbbrev(Stream, false);
+  unsigned SLocBufferBlobCompressedAbbrv =
+      CreateSLocBufferBlobAbbrev(Stream, true);
   unsigned SLocExpansionAbbrv = CreateSLocExpansionAbbrev(Stream);
 
   // Write out the source location entry table. We skip the first
@@ -1897,11 +1944,12 @@ void ASTWriter::WriteSourceManagerBlock(SourceManager &SourceMgr,
     Record.push_back(SLoc->getOffset() - 2);
     if (SLoc->isFile()) {
       const SrcMgr::FileInfo &File = SLoc->getFile();
-      Record.push_back(File.getIncludeLoc().getRawEncoding());
+      AddSourceLocation(File.getIncludeLoc(), Record);
       Record.push_back(File.getFileCharacteristic()); // FIXME: stable encoding
       Record.push_back(File.hasLineDirectives());
 
       const SrcMgr::ContentCache *Content = File.getContentCache();
+      bool EmitBlob = false;
       if (Content->OrigEntry) {
         assert(Content->OrigEntry == Content->ContentsEntry &&
                "Writing to AST an overridden file is not supported");
@@ -1923,14 +1971,8 @@ void ASTWriter::WriteSourceManagerBlock(SourceManager &SourceMgr,
         
         Stream.EmitRecordWithAbbrev(SLocFileAbbrv, Record);
         
-        if (Content->BufferOverridden || Content->IsTransient) {
-          RecordData::value_type Record[] = {SM_SLOC_BUFFER_BLOB};
-          const llvm::MemoryBuffer *Buffer
-            = Content->getBuffer(PP.getDiagnostics(), PP.getSourceManager());
-          Stream.EmitRecordWithBlob(SLocBufferBlobAbbrv, Record,
-                                    StringRef(Buffer->getBufferStart(),
-                                              Buffer->getBufferSize() + 1));          
-        }
+        if (Content->BufferOverridden || Content->IsTransient)
+          EmitBlob = true;
       } else {
         // The source location entry is a buffer. The blob associated
         // with this entry contains the contents of the buffer.
@@ -1943,22 +1985,43 @@ void ASTWriter::WriteSourceManagerBlock(SourceManager &SourceMgr,
         const char *Name = Buffer->getBufferIdentifier();
         Stream.EmitRecordWithBlob(SLocBufferAbbrv, Record,
                                   StringRef(Name, strlen(Name) + 1));
-        RecordData::value_type Record[] = {SM_SLOC_BUFFER_BLOB};
-        Stream.EmitRecordWithBlob(SLocBufferBlobAbbrv, Record,
-                                  StringRef(Buffer->getBufferStart(),
-                                                  Buffer->getBufferSize() + 1));
+        EmitBlob = true;
 
         if (strcmp(Name, "<built-in>") == 0) {
           PreloadSLocs.push_back(SLocEntryOffsets.size());
         }
       }
+
+      if (EmitBlob) {
+        // Include the implicit terminating null character in the on-disk buffer
+        // if we're writing it uncompressed.
+        const llvm::MemoryBuffer *Buffer =
+            Content->getBuffer(PP.getDiagnostics(), PP.getSourceManager());
+        StringRef Blob(Buffer->getBufferStart(), Buffer->getBufferSize() + 1);
+
+        // Compress the buffer if possible. We expect that almost all PCM
+        // consumers will not want its contents.
+        SmallString<0> CompressedBuffer;
+        if (llvm::zlib::compress(Blob.drop_back(1), CompressedBuffer) ==
+            llvm::zlib::StatusOK) {
+          RecordData::value_type Record[] = {SM_SLOC_BUFFER_BLOB_COMPRESSED,
+                                             Blob.size() - 1};
+          Stream.EmitRecordWithBlob(SLocBufferBlobCompressedAbbrv, Record,
+                                    CompressedBuffer);
+        } else {
+          RecordData::value_type Record[] = {SM_SLOC_BUFFER_BLOB};
+          Stream.EmitRecordWithBlob(SLocBufferBlobAbbrv, Record, Blob);
+        }
+      }
     } else {
       // The source location entry is a macro expansion.
       const SrcMgr::ExpansionInfo &Expansion = SLoc->getExpansion();
-      Record.push_back(Expansion.getSpellingLoc().getRawEncoding());
-      Record.push_back(Expansion.getExpansionLocStart().getRawEncoding());
-      Record.push_back(Expansion.isMacroArgExpansion() ? 0
-                             : Expansion.getExpansionLocEnd().getRawEncoding());
+      AddSourceLocation(Expansion.getSpellingLoc(), Record);
+      AddSourceLocation(Expansion.getExpansionLocStart(), Record);
+      AddSourceLocation(Expansion.isMacroArgExpansion()
+                            ? SourceLocation()
+                            : Expansion.getExpansionLocEnd(),
+                        Record);
 
       // Compute the token length for this macro expansion.
       unsigned NextOffset = SourceMgr.getNextLocalOffset();
@@ -2125,30 +2188,29 @@ void ASTWriter::WritePreprocessor(const Preprocessor &PP, bool IsModule) {
 
     // Write out any exported module macros.
     bool EmittedModuleMacros = false;
-    if (IsModule) {
-      auto Leafs = PP.getLeafModuleMacros(Name);
-      SmallVector<ModuleMacro*, 8> Worklist(Leafs.begin(), Leafs.end());
-      llvm::DenseMap<ModuleMacro*, unsigned> Visits;
-      while (!Worklist.empty()) {
-        auto *Macro = Worklist.pop_back_val();
-
-        // Emit a record indicating this submodule exports this macro.
-        ModuleMacroRecord.push_back(
-            getSubmoduleID(Macro->getOwningModule()));
-        ModuleMacroRecord.push_back(getMacroRef(Macro->getMacroInfo(), Name));
-        for (auto *M : Macro->overrides())
-          ModuleMacroRecord.push_back(getSubmoduleID(M->getOwningModule()));
-
-        Stream.EmitRecord(PP_MODULE_MACRO, ModuleMacroRecord);
-        ModuleMacroRecord.clear();
-
-        // Enqueue overridden macros once we've visited all their ancestors.
-        for (auto *M : Macro->overrides())
-          if (++Visits[M] == M->getNumOverridingMacros())
-            Worklist.push_back(M);
-
-        EmittedModuleMacros = true;
-      }
+    // We write out exported module macros for PCH as well.
+    auto Leafs = PP.getLeafModuleMacros(Name);
+    SmallVector<ModuleMacro*, 8> Worklist(Leafs.begin(), Leafs.end());
+    llvm::DenseMap<ModuleMacro*, unsigned> Visits;
+    while (!Worklist.empty()) {
+      auto *Macro = Worklist.pop_back_val();
+
+      // Emit a record indicating this submodule exports this macro.
+      ModuleMacroRecord.push_back(
+          getSubmoduleID(Macro->getOwningModule()));
+      ModuleMacroRecord.push_back(getMacroRef(Macro->getMacroInfo(), Name));
+      for (auto *M : Macro->overrides())
+        ModuleMacroRecord.push_back(getSubmoduleID(M->getOwningModule()));
+
+      Stream.EmitRecord(PP_MODULE_MACRO, ModuleMacroRecord);
+      ModuleMacroRecord.clear();
+
+      // Enqueue overridden macros once we've visited all their ancestors.
+      for (auto *M : Macro->overrides())
+        if (++Visits[M] == M->getNumOverridingMacros())
+          Worklist.push_back(M);
+
+      EmittedModuleMacros = true;
     }
 
     if (Record.empty() && !EmittedModuleMacros)
@@ -2640,7 +2702,7 @@ void ASTWriter::WritePragmaDiagnosticMappings(const DiagnosticsEngine &Diag,
     if (point.Loc.isInvalid())
       continue;
 
-    Record.push_back(point.Loc.getRawEncoding());
+    AddSourceLocation(point.Loc, Record);
     unsigned &DiagStateID = DiagStateIDMap[point.State];
     Record.push_back(DiagStateID);
     
@@ -2661,95 +2723,36 @@ void ASTWriter::WritePragmaDiagnosticMappings(const DiagnosticsEngine &Diag,
     Stream.EmitRecord(DIAG_PRAGMA_MAPPINGS, Record);
 }
 
-void ASTWriter::WriteCXXCtorInitializersOffsets() {
-  if (CXXCtorInitializersOffsets.empty())
-    return;
-
-  // Create a blob abbreviation for the C++ ctor initializer offsets.
-  using namespace llvm;
-
-  auto *Abbrev = new BitCodeAbbrev();
-  Abbrev->Add(BitCodeAbbrevOp(CXX_CTOR_INITIALIZERS_OFFSETS));
-  Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); // size
-  Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob));
-  unsigned CtorInitializersOffsetAbbrev = Stream.EmitAbbrev(Abbrev);
-
-  // Write the base specifier offsets table.
-  RecordData::value_type Record[] = {CXX_CTOR_INITIALIZERS_OFFSETS,
-                                     CXXCtorInitializersOffsets.size()};
-  Stream.EmitRecordWithBlob(CtorInitializersOffsetAbbrev, Record,
-                            bytes(CXXCtorInitializersOffsets));
-}
-
-void ASTWriter::WriteCXXBaseSpecifiersOffsets() {
-  if (CXXBaseSpecifiersOffsets.empty())
-    return;
-
-  // Create a blob abbreviation for the C++ base specifiers offsets.
-  using namespace llvm;
-    
-  auto *Abbrev = new BitCodeAbbrev();
-  Abbrev->Add(BitCodeAbbrevOp(CXX_BASE_SPECIFIER_OFFSETS));
-  Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); // size
-  Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob));
-  unsigned BaseSpecifierOffsetAbbrev = Stream.EmitAbbrev(Abbrev);
-  
-  // Write the base specifier offsets table.
-  RecordData::value_type Record[] = {CXX_BASE_SPECIFIER_OFFSETS,
-                                     CXXBaseSpecifiersOffsets.size()};
-  Stream.EmitRecordWithBlob(BaseSpecifierOffsetAbbrev, Record,
-                            bytes(CXXBaseSpecifiersOffsets));
-}
-
 //===----------------------------------------------------------------------===//
 // Type Serialization
 //===----------------------------------------------------------------------===//
 
 /// \brief Write the representation of a type to the AST stream.
 void ASTWriter::WriteType(QualType T) {
-  TypeIdx &Idx = TypeIdxs[T];
-  if (Idx.getIndex() == 0) // we haven't seen this type before.
-    Idx = TypeIdx(NextTypeID++);
+  TypeIdx &IdxRef = TypeIdxs[T];
+  if (IdxRef.getIndex() == 0) // we haven't seen this type before.
+    IdxRef = TypeIdx(NextTypeID++);
+  TypeIdx Idx = IdxRef;
 
   assert(Idx.getIndex() >= FirstTypeID && "Re-writing a type from a prior AST");
 
+  RecordData Record;
+
+  // Emit the type's representation.
+  ASTTypeWriter W(*this, Record);
+  W.Visit(T);
+  uint64_t Offset = W.Emit();
+
   // Record the offset for this type.
   unsigned Index = Idx.getIndex() - FirstTypeID;
   if (TypeOffsets.size() == Index)
-    TypeOffsets.push_back(Stream.GetCurrentBitNo());
+    TypeOffsets.push_back(Offset);
   else if (TypeOffsets.size() < Index) {
     TypeOffsets.resize(Index + 1);
-    TypeOffsets[Index] = Stream.GetCurrentBitNo();
-  }
-
-  RecordData Record;
-
-  // Emit the type's representation.
-  ASTTypeWriter W(*this, Record);
-  W.AbbrevToUse = 0;
-
-  if (T.hasLocalNonFastQualifiers()) {
-    Qualifiers Qs = T.getLocalQualifiers();
-    AddTypeRef(T.getLocalUnqualifiedType(), Record);
-    Record.push_back(Qs.getAsOpaqueValue());
-    W.Code = TYPE_EXT_QUAL;
-    W.AbbrevToUse = TypeExtQualAbbrev;
+    TypeOffsets[Index] = Offset;
   } else {
-    switch (T->getTypeClass()) {
-      // For all of the concrete, non-dependent types, call the
-      // appropriate visitor function.
-#define TYPE(Class, Base) \
-    case Type::Class: W.Visit##Class##Type(cast<Class##Type>(T)); break;
-#define ABSTRACT_TYPE(Class, Base)
-#include "clang/AST/TypeNodes.def"
-    }
+    llvm_unreachable("Types emitted in wrong order");
   }
-
-  // Emit the serialized record.
-  Stream.EmitRecord(W.Code, Record, W.AbbrevToUse);
-
-  // Flush any expressions that were written as part of this type.
-  FlushStmts();
 }
 
 //===----------------------------------------------------------------------===//
@@ -3073,6 +3076,7 @@ void ASTWriter::WriteReferencedSelectorsPool(Sema &SemaRef) {
     return;
 
   RecordData Record;
+  ASTRecordWriter Writer(*this, Record);
 
   // Note: this writes out all references even for a dependent AST. But it is
   // very tricky to fix, and given that @selector shouldn't really appear in
@@ -3080,10 +3084,10 @@ void ASTWriter::WriteReferencedSelectorsPool(Sema &SemaRef) {
   for (auto &SelectorAndLocation : SemaRef.ReferencedSelectors) {
     Selector Sel = SelectorAndLocation.first;
     SourceLocation Loc = SelectorAndLocation.second;
-    AddSelectorRef(Sel, Record);
-    AddSourceLocation(Loc, Record);
+    Writer.AddSelectorRef(Sel);
+    Writer.AddSourceLocation(Loc);
   }
-  Stream.EmitRecord(REFERENCED_SELECTOR_POOL, Record);
+  Writer.Emit(REFERENCED_SELECTOR_POOL);
 }
 
 //===----------------------------------------------------------------------===//
@@ -3103,11 +3107,20 @@ static NamedDecl *getDeclForLocalLookup(const LangOptions &LangOpts,
   if (Decl *Redecl = D->getPreviousDecl()) {
     // For Redeclarable decls, a prior declaration might be local.
     for (; Redecl; Redecl = Redecl->getPreviousDecl()) {
-      if (!Redecl->isFromASTFile())
+      // If we find a local decl, we're done.
+      if (!Redecl->isFromASTFile()) {
+        // Exception: in very rare cases (for injected-class-names), not all
+        // redeclarations are in the same semantic context. Skip ones in a
+        // different context. They don't go in this lookup table at all.
+        if (!Redecl->getDeclContext()->getRedeclContext()->Equals(
+                D->getDeclContext()->getRedeclContext()))
+          continue;
         return cast<NamedDecl>(Redecl);
+      }
+
       // If we find a decl from a (chained-)PCH stop since we won't find a
       // local one.
-      if (D->getOwningModuleID() == 0)
+      if (Redecl->getOwningModuleID() == 0)
         break;
     }
   } else if (Decl *First = D->getCanonicalDecl()) {
@@ -3162,6 +3175,8 @@ public:
         NeedDecls(!IsModule || !Writer.getLangOpts().CPlusPlus),
         InterestingIdentifierOffsets(InterestingIdentifierOffsets) {}
 
+  bool needDecls() const { return NeedDecls; }
+
   static hash_value_type ComputeHash(const IdentifierInfo* II) {
     return llvm::HashString(II->getName());
   }
@@ -3307,7 +3322,12 @@ void ASTWriter::WriteIdentifierTable(Preprocessor &PP,
       auto *II = const_cast<IdentifierInfo *>(IdentIDPair.first);
       IdentID ID = IdentIDPair.second;
       assert(II && "NULL identifier in identifier table");
-      if (!Chain || !II->isFromAST() || II->hasChangedSinceDeserialization())
+      // Write out identifiers if either the ID is local or the identifier has
+      // changed since it was loaded.
+      if (ID >= FirstIdentID || !Chain || !II->isFromAST()
+          || II->hasChangedSinceDeserialization() ||
+          (Trait.needDecls() &&
+           II->hasFETokenInfoChangedSinceDeserialization()))
         Generator.insert(II, ID, Trait);
     }
 
@@ -3896,6 +3916,22 @@ void ASTWriter::WriteOptimizePragmaOptions(Sema &SemaRef) {
   Stream.EmitRecord(OPTIMIZE_PRAGMA_OPTIONS, Record);
 }
 
+/// \brief Write the state of 'pragma ms_struct' at the end of the module.
+void ASTWriter::WriteMSStructPragmaOptions(Sema &SemaRef) {
+  RecordData Record;
+  Record.push_back(SemaRef.MSStructPragmaOn ? PMSST_ON : PMSST_OFF);
+  Stream.EmitRecord(MSSTRUCT_PRAGMA_OPTIONS, Record);
+}
+
+/// \brief Write the state of 'pragma pointers_to_members' at the end of the
+//module.
+void ASTWriter::WriteMSPointersToMembersPragmaOptions(Sema &SemaRef) {
+  RecordData Record;
+  Record.push_back(SemaRef.MSPointerToMemberRepresentationMethod);
+  AddSourceLocation(SemaRef.ImplicitMSInheritanceAttrLoc, Record);
+  Stream.EmitRecord(POINTERS_TO_MEMBERS_PRAGMA_OPTIONS, Record);
+}
+
 void ASTWriter::WriteModuleFileExtension(Sema &SemaRef,
                                          ModuleFileExtensionWriter &Writer) {
   // Enter the extension block.
@@ -3935,13 +3971,13 @@ void ASTWriter::WriteModuleFileExtension(Sema &SemaRef,
 // General Serialization Routines
 //===----------------------------------------------------------------------===//
 
-/// \brief Write a record containing the given attributes.
-void ASTWriter::WriteAttributes(ArrayRef<const Attr*> Attrs,
-                                RecordDataImpl &Record) {
+/// \brief Emit the list of attributes to the specified record.
+void ASTRecordWriter::AddAttributes(ArrayRef<const Attr *> Attrs) {
+  auto &Record = *this;
   Record.push_back(Attrs.size());
   for (const auto *A : Attrs) {
     Record.push_back(A->getKind()); // FIXME: stable encoding, target attrs
-    AddSourceRange(A->getRange(), Record);
+    Record.AddSourceRange(A->getRange());
 
 #include "clang/Serialization/AttrPCHWrite.inc"
 
@@ -4046,9 +4082,8 @@ ASTWriter::ASTWriter(
       NextMacroID(FirstMacroID), FirstSubmoduleID(NUM_PREDEF_SUBMODULE_IDS),
       NextSubmoduleID(FirstSubmoduleID),
       FirstSelectorID(NUM_PREDEF_SELECTOR_IDS), NextSelectorID(FirstSelectorID),
-      CollectedStmts(&StmtsToEmit), NumStatements(0), NumMacros(0),
+      NumStatements(0), NumMacros(0),
       NumLexicalDeclContexts(0), NumVisibleDeclContexts(0),
-      NextCXXBaseSpecifiersID(1), NextCXXCtorInitializersID(1),
       TypeExtQualAbbrev(0), TypeFunctionProtoAbbrev(0), DeclParmVarAbbrev(0),
       DeclContextLexicalAbbrev(0), DeclContextVisibleLookupAbbrev(0),
       UpdateVisibleAbbrev(0), DeclRecordAbbrev(0), DeclTypedefAbbrev(0),
@@ -4152,6 +4187,12 @@ uint64_t ASTWriter::WriteASTCore(Sema &SemaRef, StringRef isysroot,
   RegisterPredefDecl(Context.ExternCContext, PREDEF_DECL_EXTERN_C_CONTEXT_ID);
   RegisterPredefDecl(Context.MakeIntegerSeqDecl,
                      PREDEF_DECL_MAKE_INTEGER_SEQ_ID);
+  RegisterPredefDecl(Context.CFConstantStringTypeDecl,
+                     PREDEF_DECL_CF_CONSTANT_STRING_ID);
+  RegisterPredefDecl(Context.CFConstantStringTagDecl,
+                     PREDEF_DECL_CF_CONSTANT_STRING_TAG_ID);
+  RegisterPredefDecl(Context.TypePackElementDecl,
+                     PREDEF_DECL_TYPE_PACK_ELEMENT_ID);
 
   // Build a record containing all of the tentative definitions in this file, in
   // TentativeDefinitions order.  Generally, this record will be empty for
@@ -4348,6 +4389,19 @@ uint64_t ASTWriter::WriteASTCore(Sema &SemaRef, StringRef isysroot,
     }
   }
 
+  // For method pool in the module, if it contains an entry for a selector,
+  // the entry should be complete, containing everything introduced by that
+  // module and all modules it imports. It's possible that the entry is out of
+  // date, so we need to pull in the new content here.
+
+  // It's possible that updateOutOfDateSelector can update SelectorIDs. To be
+  // safe, we copy all selectors out.
+  llvm::SmallVector<Selector, 256> AllSelectors;
+  for (auto &SelectorAndID : SelectorIDs)
+    AllSelectors.push_back(SelectorAndID.first);
+  for (auto &Selector : AllSelectors)
+    SemaRef.updateOutOfDateSelector(Selector);
+
   // Form the record of special types.
   RecordData SpecialTypes;
   AddTypeRef(Context.getRawCFConstantStringType(), SpecialTypes);
@@ -4445,8 +4499,6 @@ uint64_t ASTWriter::WriteASTCore(Sema &SemaRef, StringRef isysroot,
   WriteTypeDeclOffsets();
   if (!DeclUpdatesOffsetsRecord.empty())
     Stream.EmitRecord(DECL_UPDATE_OFFSETS, DeclUpdatesOffsetsRecord);
-  WriteCXXBaseSpecifiersOffsets();
-  WriteCXXCtorInitializersOffsets();
   WriteFileDeclIDsMap();
   WriteSourceManagerBlock(Context.getSourceManager(), PP);
   WriteComments();
@@ -4567,10 +4619,12 @@ uint64_t ASTWriter::WriteASTCore(Sema &SemaRef, StringRef isysroot,
     }
   }
 
-  WriteDeclReplacementsBlock();
   WriteObjCCategories();
-  if(!WritingModule)
+  if(!WritingModule) {
     WriteOptimizePragmaOptions(SemaRef);
+    WriteMSStructPragmaOptions(SemaRef);
+    WriteMSPointersToMembersPragmaOptions(SemaRef);
+  }
 
   // Some simple statistics
   RecordData::value_type Record[] = {
@@ -4596,11 +4650,18 @@ void ASTWriter::WriteDeclUpdatesBlocks(RecordDataImpl &OffsetsRecord) {
     const Decl *D = DeclUpdate.first;
 
     bool HasUpdatedBody = false;
-    RecordData Record;
+    RecordData RecordData;
+    ASTRecordWriter Record(*this, RecordData);
     for (auto &Update : DeclUpdate.second) {
       DeclUpdateKind Kind = (DeclUpdateKind)Update.getKind();
 
-      Record.push_back(Kind);
+      // An updated body is emitted last, so that the reader doesn't need
+      // to skip over the lazy body to reach statements for other records.
+      if (Kind == UPD_CXX_ADDED_FUNCTION_DEFINITION)
+        HasUpdatedBody = true;
+      else
+        Record.push_back(Kind);
+
       switch (Kind) {
       case UPD_CXX_ADDED_IMPLICIT_MEMBER:
       case UPD_CXX_ADDED_TEMPLATE_SPECIALIZATION:
@@ -4610,26 +4671,22 @@ void ASTWriter::WriteDeclUpdatesBlocks(RecordDataImpl &OffsetsRecord) {
         break;
 
       case UPD_CXX_ADDED_FUNCTION_DEFINITION:
-        // An updated body is emitted last, so that the reader doesn't need
-        // to skip over the lazy body to reach statements for other records.
-        Record.pop_back();
-        HasUpdatedBody = true;
         break;
 
       case UPD_CXX_INSTANTIATED_STATIC_DATA_MEMBER:
-        AddSourceLocation(Update.getLoc(), Record);
+        Record.AddSourceLocation(Update.getLoc());
         break;
 
       case UPD_CXX_INSTANTIATED_DEFAULT_ARGUMENT:
-        AddStmt(const_cast<Expr*>(
-                  cast<ParmVarDecl>(Update.getDecl())->getDefaultArg()));
+        Record.AddStmt(const_cast<Expr *>(
+            cast<ParmVarDecl>(Update.getDecl())->getDefaultArg()));
         break;
 
       case UPD_CXX_INSTANTIATED_CLASS_DEFINITION: {
         auto *RD = cast<CXXRecordDecl>(D);
         UpdatedDeclContexts.insert(RD->getPrimaryContext());
-        AddCXXDefinitionData(RD, Record);
-        Record.push_back(WriteDeclContextLexicalBlock(
+        Record.AddCXXDefinitionData(RD);
+        Record.AddOffset(WriteDeclContextLexicalBlock(
             *Context, const_cast<CXXRecordDecl *>(RD)));
 
         // This state is sometimes updated by template instantiation, when we
@@ -4637,11 +4694,11 @@ void ASTWriter::WriteDeclUpdatesBlocks(RecordDataImpl &OffsetsRecord) {
         // to it referring to the template definition.
         if (auto *MSInfo = RD->getMemberSpecializationInfo()) {
           Record.push_back(MSInfo->getTemplateSpecializationKind());
-          AddSourceLocation(MSInfo->getPointOfInstantiation(), Record);
+          Record.AddSourceLocation(MSInfo->getPointOfInstantiation());
         } else {
           auto *Spec = cast<ClassTemplateSpecializationDecl>(RD);
           Record.push_back(Spec->getTemplateSpecializationKind());
-          AddSourceLocation(Spec->getPointOfInstantiation(), Record);
+          Record.AddSourceLocation(Spec->getPointOfInstantiation());
 
           // The instantiation might have been resolved to a partial
           // specialization. If so, record which one.
@@ -4649,35 +4706,33 @@ void ASTWriter::WriteDeclUpdatesBlocks(RecordDataImpl &OffsetsRecord) {
           if (auto PartialSpec =
                 From.dyn_cast<ClassTemplatePartialSpecializationDecl*>()) {
             Record.push_back(true);
-            AddDeclRef(PartialSpec, Record);
-            AddTemplateArgumentList(&Spec->getTemplateInstantiationArgs(),
-                                    Record);
+            Record.AddDeclRef(PartialSpec);
+            Record.AddTemplateArgumentList(
+                &Spec->getTemplateInstantiationArgs());
           } else {
             Record.push_back(false);
           }
         }
         Record.push_back(RD->getTagKind());
-        AddSourceLocation(RD->getLocation(), Record);
-        AddSourceLocation(RD->getLocStart(), Record);
-        AddSourceLocation(RD->getRBraceLoc(), Record);
+        Record.AddSourceLocation(RD->getLocation());
+        Record.AddSourceLocation(RD->getLocStart());
+        Record.AddSourceRange(RD->getBraceRange());
 
         // Instantiation may change attributes; write them all out afresh.
         Record.push_back(D->hasAttrs());
-        if (Record.back())
-          WriteAttributes(llvm::makeArrayRef(D->getAttrs().begin(),
-                                             D->getAttrs().size()), Record);
+        if (D->hasAttrs())
+          Record.AddAttributes(D->getAttrs());
 
         // FIXME: Ensure we don't get here for explicit instantiations.
         break;
       }
 
       case UPD_CXX_RESOLVED_DTOR_DELETE:
-        AddDeclRef(Update.getDecl(), Record);
+        Record.AddDeclRef(Update.getDecl());
         break;
 
       case UPD_CXX_RESOLVED_EXCEPTION_SPEC:
         addExceptionSpec(
-            *this,
             cast<FunctionDecl>(D)->getType()->castAs<FunctionProtoType>(),
             Record);
         break;
@@ -4695,8 +4750,13 @@ void ASTWriter::WriteDeclUpdatesBlocks(RecordDataImpl &OffsetsRecord) {
         break;
 
       case UPD_DECL_MARKED_OPENMP_THREADPRIVATE:
-        AddSourceRange(D->getAttr<OMPThreadPrivateDeclAttr>()->getRange(),
-                       Record);
+        Record.AddSourceRange(
+            D->getAttr<OMPThreadPrivateDeclAttr>()->getRange());
+        break;
+
+      case UPD_DECL_MARKED_OPENMP_DECLARETARGET:
+        Record.AddSourceRange(
+            D->getAttr<OMPDeclareTargetDeclAttr>()->getRange());
         break;
 
       case UPD_DECL_EXPORTED:
@@ -4704,7 +4764,7 @@ void ASTWriter::WriteDeclUpdatesBlocks(RecordDataImpl &OffsetsRecord) {
         break;
 
       case UPD_ADDED_ATTR_TO_RECORD:
-        WriteAttributes(llvm::makeArrayRef(Update.getAttr()), Record);
+        Record.AddAttributes(llvm::makeArrayRef(Update.getAttr()));
         break;
       }
     }
@@ -4713,34 +4773,18 @@ void ASTWriter::WriteDeclUpdatesBlocks(RecordDataImpl &OffsetsRecord) {
       const auto *Def = cast<FunctionDecl>(D);
       Record.push_back(UPD_CXX_ADDED_FUNCTION_DEFINITION);
       Record.push_back(Def->isInlined());
-      AddSourceLocation(Def->getInnerLocStart(), Record);
-      AddFunctionDefinition(Def, Record);
+      Record.AddSourceLocation(Def->getInnerLocStart());
+      Record.AddFunctionDefinition(Def);
     }
 
     OffsetsRecord.push_back(GetDeclRef(D));
-    OffsetsRecord.push_back(Stream.GetCurrentBitNo());
-
-    Stream.EmitRecord(DECL_UPDATES, Record);
-
-    FlushPendingAfterDecl();
+    OffsetsRecord.push_back(Record.Emit(DECL_UPDATES));
   }
 }
 
-void ASTWriter::WriteDeclReplacementsBlock() {
-  if (ReplacedDecls.empty())
-    return;
-
-  RecordData Record;
-  for (const auto &I : ReplacedDecls) {
-    Record.push_back(I.ID);
-    Record.push_back(I.Offset);
-    Record.push_back(I.Loc);
-  }
-  Stream.EmitRecord(DECL_REPLACEMENTS, Record);
-}
-
 void ASTWriter::AddSourceLocation(SourceLocation Loc, RecordDataImpl &Record) {
-  Record.push_back(Loc.getRawEncoding());
+  uint32_t Raw = Loc.getRawEncoding();
+  Record.push_back((Raw << 1) | (Raw >> 31));
 }
 
 void ASTWriter::AddSourceRange(SourceRange Range, RecordDataImpl &Record) {
@@ -4748,19 +4792,19 @@ void ASTWriter::AddSourceRange(SourceRange Range, RecordDataImpl &Record) {
   AddSourceLocation(Range.getEnd(), Record);
 }
 
-void ASTWriter::AddAPInt(const llvm::APInt &Value, RecordDataImpl &Record) {
-  Record.push_back(Value.getBitWidth());
+void ASTRecordWriter::AddAPInt(const llvm::APInt &Value) {
+  Record->push_back(Value.getBitWidth());
   const uint64_t *Words = Value.getRawData();
-  Record.append(Words, Words + Value.getNumWords());
+  Record->append(Words, Words + Value.getNumWords());
 }
 
-void ASTWriter::AddAPSInt(const llvm::APSInt &Value, RecordDataImpl &Record) {
-  Record.push_back(Value.isUnsigned());
-  AddAPInt(Value, Record);
+void ASTRecordWriter::AddAPSInt(const llvm::APSInt &Value) {
+  Record->push_back(Value.isUnsigned());
+  AddAPInt(Value);
 }
 
-void ASTWriter::AddAPFloat(const llvm::APFloat &Value, RecordDataImpl &Record) {
-  AddAPInt(Value.bitcastToAPInt(), Record);
+void ASTRecordWriter::AddAPFloat(const llvm::APFloat &Value) {
+  AddAPInt(Value.bitcastToAPInt());
 }
 
 void ASTWriter::AddIdentifierRef(const IdentifierInfo *II, RecordDataImpl &Record) {
@@ -4805,8 +4849,8 @@ uint64_t ASTWriter::getMacroDirectivesOffset(const IdentifierInfo *Name) {
   return IdentMacroDirectivesOffsetMap.lookup(Name);
 }
 
-void ASTWriter::AddSelectorRef(const Selector SelRef, RecordDataImpl &Record) {
-  Record.push_back(getSelectorRef(SelRef));
+void ASTRecordWriter::AddSelectorRef(const Selector SelRef) {
+  Record->push_back(Writer->getSelectorRef(SelRef));
 }
 
 SelectorID ASTWriter::getSelectorRef(Selector Sel) {
@@ -4828,46 +4872,27 @@ SelectorID ASTWriter::getSelectorRef(Selector Sel) {
   return SID;
 }
 
-void ASTWriter::AddCXXTemporary(const CXXTemporary *Temp, RecordDataImpl &Record) {
-  AddDeclRef(Temp->getDestructor(), Record);
+void ASTRecordWriter::AddCXXTemporary(const CXXTemporary *Temp) {
+  AddDeclRef(Temp->getDestructor());
 }
 
-void ASTWriter::AddCXXCtorInitializersRef(ArrayRef<CXXCtorInitializer *> Inits,
-                                          RecordDataImpl &Record) {
-  assert(!Inits.empty() && "Empty ctor initializer sets are not recorded");
-  CXXCtorInitializersToWrite.push_back(
-      QueuedCXXCtorInitializers(NextCXXCtorInitializersID, Inits));
-  Record.push_back(NextCXXCtorInitializersID++);
-}
-
-void ASTWriter::AddCXXBaseSpecifiersRef(CXXBaseSpecifier const *Bases,
-                                        CXXBaseSpecifier const *BasesEnd,
-                                        RecordDataImpl &Record) {
-  assert(Bases != BasesEnd && "Empty base-specifier sets are not recorded");
-  CXXBaseSpecifiersToWrite.push_back(
-                                QueuedCXXBaseSpecifiers(NextCXXBaseSpecifiersID,
-                                                        Bases, BasesEnd));
-  Record.push_back(NextCXXBaseSpecifiersID++);
-}
-
-void ASTWriter::AddTemplateArgumentLocInfo(TemplateArgument::ArgKind Kind,
-                                           const TemplateArgumentLocInfo &Arg,
-                                           RecordDataImpl &Record) {
+void ASTRecordWriter::AddTemplateArgumentLocInfo(
+    TemplateArgument::ArgKind Kind, const TemplateArgumentLocInfo &Arg) {
   switch (Kind) {
   case TemplateArgument::Expression:
     AddStmt(Arg.getAsExpr());
     break;
   case TemplateArgument::Type:
-    AddTypeSourceInfo(Arg.getAsTypeSourceInfo(), Record);
+    AddTypeSourceInfo(Arg.getAsTypeSourceInfo());
     break;
   case TemplateArgument::Template:
-    AddNestedNameSpecifierLoc(Arg.getTemplateQualifierLoc(), Record);
-    AddSourceLocation(Arg.getTemplateNameLoc(), Record);
+    AddNestedNameSpecifierLoc(Arg.getTemplateQualifierLoc());
+    AddSourceLocation(Arg.getTemplateNameLoc());
     break;
   case TemplateArgument::TemplateExpansion:
-    AddNestedNameSpecifierLoc(Arg.getTemplateQualifierLoc(), Record);
-    AddSourceLocation(Arg.getTemplateNameLoc(), Record);
-    AddSourceLocation(Arg.getTemplateEllipsisLoc(), Record);
+    AddNestedNameSpecifierLoc(Arg.getTemplateQualifierLoc());
+    AddSourceLocation(Arg.getTemplateNameLoc());
+    AddSourceLocation(Arg.getTemplateEllipsisLoc());
     break;
   case TemplateArgument::Null:
   case TemplateArgument::Integral:
@@ -4879,35 +4904,32 @@ void ASTWriter::AddTemplateArgumentLocInfo(TemplateArgument::ArgKind Kind,
   }
 }
 
-void ASTWriter::AddTemplateArgumentLoc(const TemplateArgumentLoc &Arg,
-                                       RecordDataImpl &Record) {
-  AddTemplateArgument(Arg.getArgument(), Record);
+void ASTRecordWriter::AddTemplateArgumentLoc(const TemplateArgumentLoc &Arg) {
+  AddTemplateArgument(Arg.getArgument());
 
   if (Arg.getArgument().getKind() == TemplateArgument::Expression) {
     bool InfoHasSameExpr
       = Arg.getArgument().getAsExpr() == Arg.getLocInfo().getAsExpr();
-    Record.push_back(InfoHasSameExpr);
+    Record->push_back(InfoHasSameExpr);
     if (InfoHasSameExpr)
       return; // Avoid storing the same expr twice.
   }
-  AddTemplateArgumentLocInfo(Arg.getArgument().getKind(), Arg.getLocInfo(),
-                             Record);
+  AddTemplateArgumentLocInfo(Arg.getArgument().getKind(), Arg.getLocInfo());
 }
 
-void ASTWriter::AddTypeSourceInfo(TypeSourceInfo *TInfo, 
-                                  RecordDataImpl &Record) {
+void ASTRecordWriter::AddTypeSourceInfo(TypeSourceInfo *TInfo) {
   if (!TInfo) {
-    AddTypeRef(QualType(), Record);
+    AddTypeRef(QualType());
     return;
   }
 
-  AddTypeLoc(TInfo->getTypeLoc(), Record);
+  AddTypeLoc(TInfo->getTypeLoc());
 }
 
-void ASTWriter::AddTypeLoc(TypeLoc TL, RecordDataImpl &Record) {
-  AddTypeRef(TL.getType(), Record);
+void ASTRecordWriter::AddTypeLoc(TypeLoc TL) {
+  AddTypeRef(TL.getType());
 
-  TypeLocWriter TLW(*this, Record);
+  TypeLocWriter TLW(*this);
   for (; !TL.isNull(); TL = TL.getNextTypeLoc())
     TLW.Visit(TL);
 }
@@ -5042,32 +5064,32 @@ void ASTWriter::associateDeclWithFile(const Decl *D, DeclID ID) {
   Decls.insert(I, LocDecl);
 }
 
-void ASTWriter::AddDeclarationName(DeclarationName Name, RecordDataImpl &Record) {
+void ASTRecordWriter::AddDeclarationName(DeclarationName Name) {
   // FIXME: Emit a stable enum for NameKind.  0 = Identifier etc.
-  Record.push_back(Name.getNameKind());
+  Record->push_back(Name.getNameKind());
   switch (Name.getNameKind()) {
   case DeclarationName::Identifier:
-    AddIdentifierRef(Name.getAsIdentifierInfo(), Record);
+    AddIdentifierRef(Name.getAsIdentifierInfo());
     break;
 
   case DeclarationName::ObjCZeroArgSelector:
   case DeclarationName::ObjCOneArgSelector:
   case DeclarationName::ObjCMultiArgSelector:
-    AddSelectorRef(Name.getObjCSelector(), Record);
+    AddSelectorRef(Name.getObjCSelector());
     break;
 
   case DeclarationName::CXXConstructorName:
   case DeclarationName::CXXDestructorName:
   case DeclarationName::CXXConversionFunctionName:
-    AddTypeRef(Name.getCXXNameType(), Record);
+    AddTypeRef(Name.getCXXNameType());
     break;
 
   case DeclarationName::CXXOperatorName:
-    Record.push_back(Name.getCXXOverloadedOperator());
+    Record->push_back(Name.getCXXOverloadedOperator());
     break;
 
   case DeclarationName::CXXLiteralOperatorName:
-    AddIdentifierRef(Name.getCXXLiteralIdentifier(), Record);
+    AddIdentifierRef(Name.getCXXLiteralIdentifier());
     break;
 
   case DeclarationName::CXXUsingDirective:
@@ -5097,28 +5119,25 @@ unsigned ASTWriter::getAnonymousDeclarationNumber(const NamedDecl *D) {
   return It->second;
 }
 
-void ASTWriter::AddDeclarationNameLoc(const DeclarationNameLoc &DNLoc,
-                                     DeclarationName Name, RecordDataImpl &Record) {
+void ASTRecordWriter::AddDeclarationNameLoc(const DeclarationNameLoc &DNLoc,
+                                            DeclarationName Name) {
   switch (Name.getNameKind()) {
   case DeclarationName::CXXConstructorName:
   case DeclarationName::CXXDestructorName:
   case DeclarationName::CXXConversionFunctionName:
-    AddTypeSourceInfo(DNLoc.NamedType.TInfo, Record);
+    AddTypeSourceInfo(DNLoc.NamedType.TInfo);
     break;
 
   case DeclarationName::CXXOperatorName:
+    AddSourceLocation(SourceLocation::getFromRawEncoding(
+        DNLoc.CXXOperatorName.BeginOpNameLoc));
     AddSourceLocation(
-       SourceLocation::getFromRawEncoding(DNLoc.CXXOperatorName.BeginOpNameLoc),
-       Record);
-    AddSourceLocation(
-        SourceLocation::getFromRawEncoding(DNLoc.CXXOperatorName.EndOpNameLoc),
-        Record);
+        SourceLocation::getFromRawEncoding(DNLoc.CXXOperatorName.EndOpNameLoc));
     break;
 
   case DeclarationName::CXXLiteralOperatorName:
-    AddSourceLocation(
-     SourceLocation::getFromRawEncoding(DNLoc.CXXLiteralOperatorName.OpNameLoc),
-     Record);
+    AddSourceLocation(SourceLocation::getFromRawEncoding(
+        DNLoc.CXXLiteralOperatorName.OpNameLoc));
     break;
 
   case DeclarationName::Identifier:
@@ -5130,23 +5149,21 @@ void ASTWriter::AddDeclarationNameLoc(const DeclarationNameLoc &DNLoc,
   }
 }
 
-void ASTWriter::AddDeclarationNameInfo(const DeclarationNameInfo &NameInfo,
-                                       RecordDataImpl &Record) {
-  AddDeclarationName(NameInfo.getName(), Record);
-  AddSourceLocation(NameInfo.getLoc(), Record);
-  AddDeclarationNameLoc(NameInfo.getInfo(), NameInfo.getName(), Record);
+void ASTRecordWriter::AddDeclarationNameInfo(
+    const DeclarationNameInfo &NameInfo) {
+  AddDeclarationName(NameInfo.getName());
+  AddSourceLocation(NameInfo.getLoc());
+  AddDeclarationNameLoc(NameInfo.getInfo(), NameInfo.getName());
 }
 
-void ASTWriter::AddQualifierInfo(const QualifierInfo &Info,
-                                 RecordDataImpl &Record) {
-  AddNestedNameSpecifierLoc(Info.QualifierLoc, Record);
-  Record.push_back(Info.NumTemplParamLists);
+void ASTRecordWriter::AddQualifierInfo(const QualifierInfo &Info) {
+  AddNestedNameSpecifierLoc(Info.QualifierLoc);
+  Record->push_back(Info.NumTemplParamLists);
   for (unsigned i=0, e=Info.NumTemplParamLists; i != e; ++i)
-    AddTemplateParameterList(Info.TemplParamLists[i], Record);
+    AddTemplateParameterList(Info.TemplParamLists[i]);
 }
 
-void ASTWriter::AddNestedNameSpecifier(NestedNameSpecifier *NNS,
-                                       RecordDataImpl &Record) {
+void ASTRecordWriter::AddNestedNameSpecifier(NestedNameSpecifier *NNS) {
   // Nested name specifiers usually aren't too long. I think that 8 would
   // typically accommodate the vast majority.
   SmallVector<NestedNameSpecifier *, 8> NestedNames;
@@ -5157,28 +5174,28 @@ void ASTWriter::AddNestedNameSpecifier(NestedNameSpecifier *NNS,
     NNS = NNS->getPrefix();
   }
 
-  Record.push_back(NestedNames.size());
+  Record->push_back(NestedNames.size());
   while(!NestedNames.empty()) {
     NNS = NestedNames.pop_back_val();
     NestedNameSpecifier::SpecifierKind Kind = NNS->getKind();
-    Record.push_back(Kind);
+    Record->push_back(Kind);
     switch (Kind) {
     case NestedNameSpecifier::Identifier:
-      AddIdentifierRef(NNS->getAsIdentifier(), Record);
+      AddIdentifierRef(NNS->getAsIdentifier());
       break;
 
     case NestedNameSpecifier::Namespace:
-      AddDeclRef(NNS->getAsNamespace(), Record);
+      AddDeclRef(NNS->getAsNamespace());
       break;
 
     case NestedNameSpecifier::NamespaceAlias:
-      AddDeclRef(NNS->getAsNamespaceAlias(), Record);
+      AddDeclRef(NNS->getAsNamespaceAlias());
       break;
 
     case NestedNameSpecifier::TypeSpec:
     case NestedNameSpecifier::TypeSpecWithTemplate:
-      AddTypeRef(QualType(NNS->getAsType(), 0), Record);
-      Record.push_back(Kind == NestedNameSpecifier::TypeSpecWithTemplate);
+      AddTypeRef(QualType(NNS->getAsType(), 0));
+      Record->push_back(Kind == NestedNameSpecifier::TypeSpecWithTemplate);
       break;
 
     case NestedNameSpecifier::Global:
@@ -5186,14 +5203,13 @@ void ASTWriter::AddNestedNameSpecifier(NestedNameSpecifier *NNS,
       break;
 
     case NestedNameSpecifier::Super:
-      AddDeclRef(NNS->getAsRecordDecl(), Record);
+      AddDeclRef(NNS->getAsRecordDecl());
       break;
     }
   }
 }
 
-void ASTWriter::AddNestedNameSpecifierLoc(NestedNameSpecifierLoc NNS,
-                                          RecordDataImpl &Record) {
+void ASTRecordWriter::AddNestedNameSpecifierLoc(NestedNameSpecifierLoc NNS) {
   // Nested name specifiers usually aren't too long. I think that 8 would
   // typically accommodate the vast majority.
   SmallVector<NestedNameSpecifierLoc , 8> NestedNames;
@@ -5205,373 +5221,333 @@ void ASTWriter::AddNestedNameSpecifierLoc(NestedNameSpecifierLoc NNS,
     NNS = NNS.getPrefix();
   }
 
-  Record.push_back(NestedNames.size());
+  Record->push_back(NestedNames.size());
   while(!NestedNames.empty()) {
     NNS = NestedNames.pop_back_val();
     NestedNameSpecifier::SpecifierKind Kind
       = NNS.getNestedNameSpecifier()->getKind();
-    Record.push_back(Kind);
+    Record->push_back(Kind);
     switch (Kind) {
     case NestedNameSpecifier::Identifier:
-      AddIdentifierRef(NNS.getNestedNameSpecifier()->getAsIdentifier(), Record);
-      AddSourceRange(NNS.getLocalSourceRange(), Record);
+      AddIdentifierRef(NNS.getNestedNameSpecifier()->getAsIdentifier());
+      AddSourceRange(NNS.getLocalSourceRange());
       break;
 
     case NestedNameSpecifier::Namespace:
-      AddDeclRef(NNS.getNestedNameSpecifier()->getAsNamespace(), Record);
-      AddSourceRange(NNS.getLocalSourceRange(), Record);
+      AddDeclRef(NNS.getNestedNameSpecifier()->getAsNamespace());
+      AddSourceRange(NNS.getLocalSourceRange());
       break;
 
     case NestedNameSpecifier::NamespaceAlias:
-      AddDeclRef(NNS.getNestedNameSpecifier()->getAsNamespaceAlias(), Record);
-      AddSourceRange(NNS.getLocalSourceRange(), Record);
+      AddDeclRef(NNS.getNestedNameSpecifier()->getAsNamespaceAlias());
+      AddSourceRange(NNS.getLocalSourceRange());
       break;
 
     case NestedNameSpecifier::TypeSpec:
     case NestedNameSpecifier::TypeSpecWithTemplate:
-      Record.push_back(Kind == NestedNameSpecifier::TypeSpecWithTemplate);
-      AddTypeLoc(NNS.getTypeLoc(), Record);
-      AddSourceLocation(NNS.getLocalSourceRange().getEnd(), Record);
+      Record->push_back(Kind == NestedNameSpecifier::TypeSpecWithTemplate);
+      AddTypeLoc(NNS.getTypeLoc());
+      AddSourceLocation(NNS.getLocalSourceRange().getEnd());
       break;
 
     case NestedNameSpecifier::Global:
-      AddSourceLocation(NNS.getLocalSourceRange().getEnd(), Record);
+      AddSourceLocation(NNS.getLocalSourceRange().getEnd());
       break;
 
     case NestedNameSpecifier::Super:
-      AddDeclRef(NNS.getNestedNameSpecifier()->getAsRecordDecl(), Record);
-      AddSourceRange(NNS.getLocalSourceRange(), Record);
+      AddDeclRef(NNS.getNestedNameSpecifier()->getAsRecordDecl());
+      AddSourceRange(NNS.getLocalSourceRange());
       break;
     }
   }
 }
 
-void ASTWriter::AddTemplateName(TemplateName Name, RecordDataImpl &Record) {
+void ASTRecordWriter::AddTemplateName(TemplateName Name) {
   TemplateName::NameKind Kind = Name.getKind();
-  Record.push_back(Kind);
+  Record->push_back(Kind);
   switch (Kind) {
   case TemplateName::Template:
-    AddDeclRef(Name.getAsTemplateDecl(), Record);
+    AddDeclRef(Name.getAsTemplateDecl());
     break;
 
   case TemplateName::OverloadedTemplate: {
     OverloadedTemplateStorage *OvT = Name.getAsOverloadedTemplate();
-    Record.push_back(OvT->size());
+    Record->push_back(OvT->size());
     for (const auto &I : *OvT)
-      AddDeclRef(I, Record);
+      AddDeclRef(I);
     break;
   }
 
   case TemplateName::QualifiedTemplate: {
     QualifiedTemplateName *QualT = Name.getAsQualifiedTemplateName();
-    AddNestedNameSpecifier(QualT->getQualifier(), Record);
-    Record.push_back(QualT->hasTemplateKeyword());
-    AddDeclRef(QualT->getTemplateDecl(), Record);
+    AddNestedNameSpecifier(QualT->getQualifier());
+    Record->push_back(QualT->hasTemplateKeyword());
+    AddDeclRef(QualT->getTemplateDecl());
     break;
   }
 
   case TemplateName::DependentTemplate: {
     DependentTemplateName *DepT = Name.getAsDependentTemplateName();
-    AddNestedNameSpecifier(DepT->getQualifier(), Record);
-    Record.push_back(DepT->isIdentifier());
+    AddNestedNameSpecifier(DepT->getQualifier());
+    Record->push_back(DepT->isIdentifier());
     if (DepT->isIdentifier())
-      AddIdentifierRef(DepT->getIdentifier(), Record);
+      AddIdentifierRef(DepT->getIdentifier());
     else
-      Record.push_back(DepT->getOperator());
+      Record->push_back(DepT->getOperator());
     break;
   }
 
   case TemplateName::SubstTemplateTemplateParm: {
     SubstTemplateTemplateParmStorage *subst
       = Name.getAsSubstTemplateTemplateParm();
-    AddDeclRef(subst->getParameter(), Record);
-    AddTemplateName(subst->getReplacement(), Record);
+    AddDeclRef(subst->getParameter());
+    AddTemplateName(subst->getReplacement());
     break;
   }
       
   case TemplateName::SubstTemplateTemplateParmPack: {
     SubstTemplateTemplateParmPackStorage *SubstPack
       = Name.getAsSubstTemplateTemplateParmPack();
-    AddDeclRef(SubstPack->getParameterPack(), Record);
-    AddTemplateArgument(SubstPack->getArgumentPack(), Record);
+    AddDeclRef(SubstPack->getParameterPack());
+    AddTemplateArgument(SubstPack->getArgumentPack());
     break;
   }
   }
 }
 
-void ASTWriter::AddTemplateArgument(const TemplateArgument &Arg,
-                                    RecordDataImpl &Record) {
-  Record.push_back(Arg.getKind());
+void ASTRecordWriter::AddTemplateArgument(const TemplateArgument &Arg) {
+  Record->push_back(Arg.getKind());
   switch (Arg.getKind()) {
   case TemplateArgument::Null:
     break;
   case TemplateArgument::Type:
-    AddTypeRef(Arg.getAsType(), Record);
+    AddTypeRef(Arg.getAsType());
     break;
   case TemplateArgument::Declaration:
-    AddDeclRef(Arg.getAsDecl(), Record);
-    AddTypeRef(Arg.getParamTypeForDecl(), Record);
+    AddDeclRef(Arg.getAsDecl());
+    AddTypeRef(Arg.getParamTypeForDecl());
     break;
   case TemplateArgument::NullPtr:
-    AddTypeRef(Arg.getNullPtrType(), Record);
+    AddTypeRef(Arg.getNullPtrType());
     break;
   case TemplateArgument::Integral:
-    AddAPSInt(Arg.getAsIntegral(), Record);
-    AddTypeRef(Arg.getIntegralType(), Record);
+    AddAPSInt(Arg.getAsIntegral());
+    AddTypeRef(Arg.getIntegralType());
     break;
   case TemplateArgument::Template:
-    AddTemplateName(Arg.getAsTemplateOrTemplatePattern(), Record);
+    AddTemplateName(Arg.getAsTemplateOrTemplatePattern());
     break;
   case TemplateArgument::TemplateExpansion:
-    AddTemplateName(Arg.getAsTemplateOrTemplatePattern(), Record);
+    AddTemplateName(Arg.getAsTemplateOrTemplatePattern());
     if (Optional<unsigned> NumExpansions = Arg.getNumTemplateExpansions())
-      Record.push_back(*NumExpansions + 1);
+      Record->push_back(*NumExpansions + 1);
     else
-      Record.push_back(0);
+      Record->push_back(0);
     break;
   case TemplateArgument::Expression:
     AddStmt(Arg.getAsExpr());
     break;
   case TemplateArgument::Pack:
-    Record.push_back(Arg.pack_size());
+    Record->push_back(Arg.pack_size());
     for (const auto &P : Arg.pack_elements())
-      AddTemplateArgument(P, Record);
+      AddTemplateArgument(P);
     break;
   }
 }
 
-void
-ASTWriter::AddTemplateParameterList(const TemplateParameterList *TemplateParams,
-                                    RecordDataImpl &Record) {
+void ASTRecordWriter::AddTemplateParameterList(
+    const TemplateParameterList *TemplateParams) {
   assert(TemplateParams && "No TemplateParams!");
-  AddSourceLocation(TemplateParams->getTemplateLoc(), Record);
-  AddSourceLocation(TemplateParams->getLAngleLoc(), Record);
-  AddSourceLocation(TemplateParams->getRAngleLoc(), Record);
-  Record.push_back(TemplateParams->size());
+  AddSourceLocation(TemplateParams->getTemplateLoc());
+  AddSourceLocation(TemplateParams->getLAngleLoc());
+  AddSourceLocation(TemplateParams->getRAngleLoc());
+  Record->push_back(TemplateParams->size());
   for (const auto &P : *TemplateParams)
-    AddDeclRef(P, Record);
+    AddDeclRef(P);
 }
 
 /// \brief Emit a template argument list.
-void
-ASTWriter::AddTemplateArgumentList(const TemplateArgumentList *TemplateArgs,
-                                   RecordDataImpl &Record) {
+void ASTRecordWriter::AddTemplateArgumentList(
+    const TemplateArgumentList *TemplateArgs) {
   assert(TemplateArgs && "No TemplateArgs!");
-  Record.push_back(TemplateArgs->size());
+  Record->push_back(TemplateArgs->size());
   for (int i=0, e = TemplateArgs->size(); i != e; ++i)
-    AddTemplateArgument(TemplateArgs->get(i), Record);
+    AddTemplateArgument(TemplateArgs->get(i));
 }
 
-void
-ASTWriter::AddASTTemplateArgumentListInfo
-(const ASTTemplateArgumentListInfo *ASTTemplArgList, RecordDataImpl &Record) {
+void ASTRecordWriter::AddASTTemplateArgumentListInfo(
+    const ASTTemplateArgumentListInfo *ASTTemplArgList) {
   assert(ASTTemplArgList && "No ASTTemplArgList!");
-  AddSourceLocation(ASTTemplArgList->LAngleLoc, Record);
-  AddSourceLocation(ASTTemplArgList->RAngleLoc, Record);
-  Record.push_back(ASTTemplArgList->NumTemplateArgs);
+  AddSourceLocation(ASTTemplArgList->LAngleLoc);
+  AddSourceLocation(ASTTemplArgList->RAngleLoc);
+  Record->push_back(ASTTemplArgList->NumTemplateArgs);
   const TemplateArgumentLoc *TemplArgs = ASTTemplArgList->getTemplateArgs();
   for (int i=0, e = ASTTemplArgList->NumTemplateArgs; i != e; ++i)
-    AddTemplateArgumentLoc(TemplArgs[i], Record);
+    AddTemplateArgumentLoc(TemplArgs[i]);
 }
 
-void
-ASTWriter::AddUnresolvedSet(const ASTUnresolvedSet &Set, RecordDataImpl &Record) {
-  Record.push_back(Set.size());
+void ASTRecordWriter::AddUnresolvedSet(const ASTUnresolvedSet &Set) {
+  Record->push_back(Set.size());
   for (ASTUnresolvedSet::const_iterator
          I = Set.begin(), E = Set.end(); I != E; ++I) {
-    AddDeclRef(I.getDecl(), Record);
-    Record.push_back(I.getAccess());
+    AddDeclRef(I.getDecl());
+    Record->push_back(I.getAccess());
   }
 }
 
-void ASTWriter::AddCXXBaseSpecifier(const CXXBaseSpecifier &Base,
-                                    RecordDataImpl &Record) {
-  Record.push_back(Base.isVirtual());
-  Record.push_back(Base.isBaseOfClass());
-  Record.push_back(Base.getAccessSpecifierAsWritten());
-  Record.push_back(Base.getInheritConstructors());
-  AddTypeSourceInfo(Base.getTypeSourceInfo(), Record);
-  AddSourceRange(Base.getSourceRange(), Record);
+// FIXME: Move this out of the main ASTRecordWriter interface.
+void ASTRecordWriter::AddCXXBaseSpecifier(const CXXBaseSpecifier &Base) {
+  Record->push_back(Base.isVirtual());
+  Record->push_back(Base.isBaseOfClass());
+  Record->push_back(Base.getAccessSpecifierAsWritten());
+  Record->push_back(Base.getInheritConstructors());
+  AddTypeSourceInfo(Base.getTypeSourceInfo());
+  AddSourceRange(Base.getSourceRange());
   AddSourceLocation(Base.isPackExpansion()? Base.getEllipsisLoc() 
-                                          : SourceLocation(),
-                    Record);
+                                          : SourceLocation());
 }
 
-void ASTWriter::FlushCXXBaseSpecifiers() {
-  RecordData Record;
-  unsigned N = CXXBaseSpecifiersToWrite.size();
-  for (unsigned I = 0; I != N; ++I) {
-    Record.clear();
-    
-    // Record the offset of this base-specifier set.
-    unsigned Index = CXXBaseSpecifiersToWrite[I].ID - 1;
-    if (Index == CXXBaseSpecifiersOffsets.size())
-      CXXBaseSpecifiersOffsets.push_back(Stream.GetCurrentBitNo());
-    else {
-      if (Index > CXXBaseSpecifiersOffsets.size())
-        CXXBaseSpecifiersOffsets.resize(Index + 1);
-      CXXBaseSpecifiersOffsets[Index] = Stream.GetCurrentBitNo();
-    }
+static uint64_t EmitCXXBaseSpecifiers(ASTWriter &W,
+                                      ArrayRef<CXXBaseSpecifier> Bases) {
+  ASTWriter::RecordData Record;
+  ASTRecordWriter Writer(W, Record);
+  Writer.push_back(Bases.size());
 
-    const CXXBaseSpecifier *B = CXXBaseSpecifiersToWrite[I].Bases,
-                        *BEnd = CXXBaseSpecifiersToWrite[I].BasesEnd;
-    Record.push_back(BEnd - B);
-    for (; B != BEnd; ++B)
-      AddCXXBaseSpecifier(*B, Record);
-    Stream.EmitRecord(serialization::DECL_CXX_BASE_SPECIFIERS, Record);
-    
-    // Flush any expressions that were written as part of the base specifiers.
-    FlushStmts();
-  }
+  for (auto &Base : Bases)
+    Writer.AddCXXBaseSpecifier(Base);
+
+  return Writer.Emit(serialization::DECL_CXX_BASE_SPECIFIERS);
+}
 
-  assert(N == CXXBaseSpecifiersToWrite.size() &&
-         "added more base specifiers while writing base specifiers");
-  CXXBaseSpecifiersToWrite.clear();
+// FIXME: Move this out of the main ASTRecordWriter interface.
+void ASTRecordWriter::AddCXXBaseSpecifiers(ArrayRef<CXXBaseSpecifier> Bases) {
+  AddOffset(EmitCXXBaseSpecifiers(*Writer, Bases));
 }
 
-void ASTWriter::AddCXXCtorInitializers(
-                             const CXXCtorInitializer * const *CtorInitializers,
-                             unsigned NumCtorInitializers,
-                             RecordDataImpl &Record) {
-  Record.push_back(NumCtorInitializers);
-  for (unsigned i=0; i != NumCtorInitializers; ++i) {
-    const CXXCtorInitializer *Init = CtorInitializers[i];
+static uint64_t
+EmitCXXCtorInitializers(ASTWriter &W,
+                        ArrayRef<CXXCtorInitializer *> CtorInits) {
+  ASTWriter::RecordData Record;
+  ASTRecordWriter Writer(W, Record);
+  Writer.push_back(CtorInits.size());
 
+  for (auto *Init : CtorInits) {
     if (Init->isBaseInitializer()) {
-      Record.push_back(CTOR_INITIALIZER_BASE);
-      AddTypeSourceInfo(Init->getTypeSourceInfo(), Record);
-      Record.push_back(Init->isBaseVirtual());
+      Writer.push_back(CTOR_INITIALIZER_BASE);
+      Writer.AddTypeSourceInfo(Init->getTypeSourceInfo());
+      Writer.push_back(Init->isBaseVirtual());
     } else if (Init->isDelegatingInitializer()) {
-      Record.push_back(CTOR_INITIALIZER_DELEGATING);
-      AddTypeSourceInfo(Init->getTypeSourceInfo(), Record);
+      Writer.push_back(CTOR_INITIALIZER_DELEGATING);
+      Writer.AddTypeSourceInfo(Init->getTypeSourceInfo());
     } else if (Init->isMemberInitializer()){
-      Record.push_back(CTOR_INITIALIZER_MEMBER);
-      AddDeclRef(Init->getMember(), Record);
+      Writer.push_back(CTOR_INITIALIZER_MEMBER);
+      Writer.AddDeclRef(Init->getMember());
     } else {
-      Record.push_back(CTOR_INITIALIZER_INDIRECT_MEMBER);
-      AddDeclRef(Init->getIndirectMember(), Record);
+      Writer.push_back(CTOR_INITIALIZER_INDIRECT_MEMBER);
+      Writer.AddDeclRef(Init->getIndirectMember());
     }
 
-    AddSourceLocation(Init->getMemberLocation(), Record);
-    AddStmt(Init->getInit());
-    AddSourceLocation(Init->getLParenLoc(), Record);
-    AddSourceLocation(Init->getRParenLoc(), Record);
-    Record.push_back(Init->isWritten());
+    Writer.AddSourceLocation(Init->getMemberLocation());
+    Writer.AddStmt(Init->getInit());
+    Writer.AddSourceLocation(Init->getLParenLoc());
+    Writer.AddSourceLocation(Init->getRParenLoc());
+    Writer.push_back(Init->isWritten());
     if (Init->isWritten()) {
-      Record.push_back(Init->getSourceOrder());
+      Writer.push_back(Init->getSourceOrder());
     } else {
-      Record.push_back(Init->getNumArrayIndices());
-      for (unsigned i=0, e=Init->getNumArrayIndices(); i != e; ++i)
-        AddDeclRef(Init->getArrayIndex(i), Record);
+      Writer.push_back(Init->getNumArrayIndices());
+      for (auto *VD : Init->getArrayIndices())
+        Writer.AddDeclRef(VD);
     }
   }
-}
-
-void ASTWriter::FlushCXXCtorInitializers() {
-  RecordData Record;
 
-  unsigned N = CXXCtorInitializersToWrite.size();
-  (void)N; // Silence unused warning in non-assert builds.
-  for (auto &Init : CXXCtorInitializersToWrite) {
-    Record.clear();
-
-    // Record the offset of this mem-initializer list.
-    unsigned Index = Init.ID - 1;
-    if (Index == CXXCtorInitializersOffsets.size())
-      CXXCtorInitializersOffsets.push_back(Stream.GetCurrentBitNo());
-    else {
-      if (Index > CXXCtorInitializersOffsets.size())
-        CXXCtorInitializersOffsets.resize(Index + 1);
-      CXXCtorInitializersOffsets[Index] = Stream.GetCurrentBitNo();
-    }
-
-    AddCXXCtorInitializers(Init.Inits.data(), Init.Inits.size(), Record);
-    Stream.EmitRecord(serialization::DECL_CXX_CTOR_INITIALIZERS, Record);
-
-    // Flush any expressions that were written as part of the initializers.
-    FlushStmts();
-  }
+  return Writer.Emit(serialization::DECL_CXX_CTOR_INITIALIZERS);
+}
 
-  assert(N == CXXCtorInitializersToWrite.size() &&
-         "added more ctor initializers while writing ctor initializers");
-  CXXCtorInitializersToWrite.clear();
+// FIXME: Move this out of the main ASTRecordWriter interface.
+void ASTRecordWriter::AddCXXCtorInitializers(
+    ArrayRef<CXXCtorInitializer *> CtorInits) {
+  AddOffset(EmitCXXCtorInitializers(*Writer, CtorInits));
 }
 
-void ASTWriter::AddCXXDefinitionData(const CXXRecordDecl *D, RecordDataImpl &Record) {
+void ASTRecordWriter::AddCXXDefinitionData(const CXXRecordDecl *D) {
   auto &Data = D->data();
-  Record.push_back(Data.IsLambda);
-  Record.push_back(Data.UserDeclaredConstructor);
-  Record.push_back(Data.UserDeclaredSpecialMembers);
-  Record.push_back(Data.Aggregate);
-  Record.push_back(Data.PlainOldData);
-  Record.push_back(Data.Empty);
-  Record.push_back(Data.Polymorphic);
-  Record.push_back(Data.Abstract);
-  Record.push_back(Data.IsStandardLayout);
-  Record.push_back(Data.HasNoNonEmptyBases);
-  Record.push_back(Data.HasPrivateFields);
-  Record.push_back(Data.HasProtectedFields);
-  Record.push_back(Data.HasPublicFields);
-  Record.push_back(Data.HasMutableFields);
-  Record.push_back(Data.HasVariantMembers);
-  Record.push_back(Data.HasOnlyCMembers);
-  Record.push_back(Data.HasInClassInitializer);
-  Record.push_back(Data.HasUninitializedReferenceMember);
-  Record.push_back(Data.NeedOverloadResolutionForMoveConstructor);
-  Record.push_back(Data.NeedOverloadResolutionForMoveAssignment);
-  Record.push_back(Data.NeedOverloadResolutionForDestructor);
-  Record.push_back(Data.DefaultedMoveConstructorIsDeleted);
-  Record.push_back(Data.DefaultedMoveAssignmentIsDeleted);
-  Record.push_back(Data.DefaultedDestructorIsDeleted);
-  Record.push_back(Data.HasTrivialSpecialMembers);
-  Record.push_back(Data.DeclaredNonTrivialSpecialMembers);
-  Record.push_back(Data.HasIrrelevantDestructor);
-  Record.push_back(Data.HasConstexprNonCopyMoveConstructor);
-  Record.push_back(Data.DefaultedDefaultConstructorIsConstexpr);
-  Record.push_back(Data.HasConstexprDefaultConstructor);
-  Record.push_back(Data.HasNonLiteralTypeFieldsOrBases);
-  Record.push_back(Data.ComputedVisibleConversions);
-  Record.push_back(Data.UserProvidedDefaultConstructor);
-  Record.push_back(Data.DeclaredSpecialMembers);
-  Record.push_back(Data.ImplicitCopyConstructorHasConstParam);
-  Record.push_back(Data.ImplicitCopyAssignmentHasConstParam);
-  Record.push_back(Data.HasDeclaredCopyConstructorWithConstParam);
-  Record.push_back(Data.HasDeclaredCopyAssignmentWithConstParam);
+  Record->push_back(Data.IsLambda);
+  Record->push_back(Data.UserDeclaredConstructor);
+  Record->push_back(Data.UserDeclaredSpecialMembers);
+  Record->push_back(Data.Aggregate);
+  Record->push_back(Data.PlainOldData);
+  Record->push_back(Data.Empty);
+  Record->push_back(Data.Polymorphic);
+  Record->push_back(Data.Abstract);
+  Record->push_back(Data.IsStandardLayout);
+  Record->push_back(Data.HasNoNonEmptyBases);
+  Record->push_back(Data.HasPrivateFields);
+  Record->push_back(Data.HasProtectedFields);
+  Record->push_back(Data.HasPublicFields);
+  Record->push_back(Data.HasMutableFields);
+  Record->push_back(Data.HasVariantMembers);
+  Record->push_back(Data.HasOnlyCMembers);
+  Record->push_back(Data.HasInClassInitializer);
+  Record->push_back(Data.HasUninitializedReferenceMember);
+  Record->push_back(Data.HasUninitializedFields);
+  Record->push_back(Data.HasInheritedConstructor);
+  Record->push_back(Data.HasInheritedAssignment);
+  Record->push_back(Data.NeedOverloadResolutionForMoveConstructor);
+  Record->push_back(Data.NeedOverloadResolutionForMoveAssignment);
+  Record->push_back(Data.NeedOverloadResolutionForDestructor);
+  Record->push_back(Data.DefaultedMoveConstructorIsDeleted);
+  Record->push_back(Data.DefaultedMoveAssignmentIsDeleted);
+  Record->push_back(Data.DefaultedDestructorIsDeleted);
+  Record->push_back(Data.HasTrivialSpecialMembers);
+  Record->push_back(Data.DeclaredNonTrivialSpecialMembers);
+  Record->push_back(Data.HasIrrelevantDestructor);
+  Record->push_back(Data.HasConstexprNonCopyMoveConstructor);
+  Record->push_back(Data.HasDefaultedDefaultConstructor);
+  Record->push_back(Data.DefaultedDefaultConstructorIsConstexpr);
+  Record->push_back(Data.HasConstexprDefaultConstructor);
+  Record->push_back(Data.HasNonLiteralTypeFieldsOrBases);
+  Record->push_back(Data.ComputedVisibleConversions);
+  Record->push_back(Data.UserProvidedDefaultConstructor);
+  Record->push_back(Data.DeclaredSpecialMembers);
+  Record->push_back(Data.ImplicitCopyConstructorHasConstParam);
+  Record->push_back(Data.ImplicitCopyAssignmentHasConstParam);
+  Record->push_back(Data.HasDeclaredCopyConstructorWithConstParam);
+  Record->push_back(Data.HasDeclaredCopyAssignmentWithConstParam);
   // IsLambda bit is already saved.
 
-  Record.push_back(Data.NumBases);
+  Record->push_back(Data.NumBases);
   if (Data.NumBases > 0)
-    AddCXXBaseSpecifiersRef(Data.getBases(), Data.getBases() + Data.NumBases, 
-                            Record);
-  
+    AddCXXBaseSpecifiers(Data.bases());
+
   // FIXME: Make VBases lazily computed when needed to avoid storing them.
-  Record.push_back(Data.NumVBases);
+  Record->push_back(Data.NumVBases);
   if (Data.NumVBases > 0)
-    AddCXXBaseSpecifiersRef(Data.getVBases(), Data.getVBases() + Data.NumVBases, 
-                            Record);
+    AddCXXBaseSpecifiers(Data.vbases());
 
-  AddUnresolvedSet(Data.Conversions.get(*Context), Record);
-  AddUnresolvedSet(Data.VisibleConversions.get(*Context), Record);
+  AddUnresolvedSet(Data.Conversions.get(*Writer->Context));
+  AddUnresolvedSet(Data.VisibleConversions.get(*Writer->Context));
   // Data.Definition is the owning decl, no need to write it. 
-  AddDeclRef(D->getFirstFriend(), Record);
+  AddDeclRef(D->getFirstFriend());
   
   // Add lambda-specific data.
   if (Data.IsLambda) {
     auto &Lambda = D->getLambdaData();
-    Record.push_back(Lambda.Dependent);
-    Record.push_back(Lambda.IsGenericLambda);
-    Record.push_back(Lambda.CaptureDefault);
-    Record.push_back(Lambda.NumCaptures);
-    Record.push_back(Lambda.NumExplicitCaptures);
-    Record.push_back(Lambda.ManglingNumber);
-    AddDeclRef(Lambda.ContextDecl, Record);
-    AddTypeSourceInfo(Lambda.MethodTyInfo, Record);
+    Record->push_back(Lambda.Dependent);
+    Record->push_back(Lambda.IsGenericLambda);
+    Record->push_back(Lambda.CaptureDefault);
+    Record->push_back(Lambda.NumCaptures);
+    Record->push_back(Lambda.NumExplicitCaptures);
+    Record->push_back(Lambda.ManglingNumber);
+    AddDeclRef(Lambda.ContextDecl);
+    AddTypeSourceInfo(Lambda.MethodTyInfo);
     for (unsigned I = 0, N = Lambda.NumCaptures; I != N; ++I) {
       const LambdaCapture &Capture = Lambda.Captures[I];
-      AddSourceLocation(Capture.getLocation(), Record);
-      Record.push_back(Capture.isImplicit());
-      Record.push_back(Capture.getCaptureKind());
+      AddSourceLocation(Capture.getLocation());
+      Record->push_back(Capture.isImplicit());
+      Record->push_back(Capture.getCaptureKind());
       switch (Capture.getCaptureKind()) {
+      case LCK_StarThis:
       case LCK_This:
       case LCK_VLAType:
         break;
@@ -5579,10 +5555,9 @@ void ASTWriter::AddCXXDefinitionData(const CXXRecordDecl *D, RecordDataImpl &Rec
       case LCK_ByRef:
         VarDecl *Var =
             Capture.capturesVariable() ? Capture.getCapturedVar() : nullptr;
-        AddDeclRef(Var, Record);
+        AddDeclRef(Var);
         AddSourceLocation(Capture.isPackExpansion() ? Capture.getEllipsisLoc()
-                                                    : SourceLocation(),
-                          Record);
+                                                    : SourceLocation());
         break;
       }
     }
@@ -5662,6 +5637,7 @@ void ASTWriter::ModuleRead(serialization::SubmoduleID ID, Module *Mod) {
 }
 
 void ASTWriter::CompletedTagDefinition(const TagDecl *D) {
+  if (Chain && Chain->isProcessingUpdateRecords()) return;
   assert(D->isCompleteDefinition());
   assert(!WritingAST && "Already writing the AST!");
   if (auto *RD = dyn_cast<CXXRecordDecl>(D)) {
@@ -5682,18 +5658,26 @@ static bool isImportedDeclContext(ASTReader *Chain, const Decl *D) {
   if (D->isFromASTFile())
     return true;
 
-  // If we've not loaded any modules, this can't be imported.
-  if (!Chain || !Chain->getModuleManager().size())
-    return false;
-
   // The predefined __va_list_tag struct is imported if we imported any decls.
   // FIXME: This is a gross hack.
   return D == D->getASTContext().getVaListTagDecl();
 }
 
 void ASTWriter::AddedVisibleDecl(const DeclContext *DC, const Decl *D) {
-  // TU and namespaces are handled elsewhere.
-  if (isa<TranslationUnitDecl>(DC) || isa<NamespaceDecl>(DC))
+  if (Chain && Chain->isProcessingUpdateRecords()) return;
+  assert(DC->isLookupContext() &&
+          "Should not add lookup results to non-lookup contexts!");
+
+  // TU is handled elsewhere.
+  if (isa<TranslationUnitDecl>(DC))
+    return;
+
+  // Namespaces are handled elsewhere, except for template instantiations of
+  // FunctionTemplateDecls in namespaces. We are interested in cases where the
+  // local instantiations are added to an imported context. Only happens when
+  // adding ADL lookup candidates, for example templated friends.
+  if (isa<NamespaceDecl>(DC) && D->getFriendObjectKind() == Decl::FOK_None &&
+      !isa<FunctionTemplateDecl>(D))
     return;
 
   // We're only interested in cases where a local declaration is added to an
@@ -5715,6 +5699,7 @@ void ASTWriter::AddedVisibleDecl(const DeclContext *DC, const Decl *D) {
 }
 
 void ASTWriter::AddedCXXImplicitMember(const CXXRecordDecl *RD, const Decl *D) {
+  if (Chain && Chain->isProcessingUpdateRecords()) return;
   assert(D->isImplicit());
 
   // We're only interested in cases where a local declaration is added to an
@@ -5732,6 +5717,7 @@ void ASTWriter::AddedCXXImplicitMember(const CXXRecordDecl *RD, const Decl *D) {
 }
 
 void ASTWriter::ResolvedExceptionSpec(const FunctionDecl *FD) {
+  if (Chain && Chain->isProcessingUpdateRecords()) return;
   assert(!DoneWritingDeclsAndTypes && "Already done writing updates!");
   if (!Chain) return;
   Chain->forEachImportedKeyDecl(FD, [&](const Decl *D) {
@@ -5746,6 +5732,7 @@ void ASTWriter::ResolvedExceptionSpec(const FunctionDecl *FD) {
 }
 
 void ASTWriter::DeducedReturnType(const FunctionDecl *FD, QualType ReturnType) {
+  if (Chain && Chain->isProcessingUpdateRecords()) return;
   assert(!WritingAST && "Already writing the AST!");
   if (!Chain) return;
   Chain->forEachImportedKeyDecl(FD, [&](const Decl *D) {
@@ -5756,6 +5743,7 @@ void ASTWriter::DeducedReturnType(const FunctionDecl *FD, QualType ReturnType) {
 
 void ASTWriter::ResolvedOperatorDelete(const CXXDestructorDecl *DD,
                                        const FunctionDecl *Delete) {
+  if (Chain && Chain->isProcessingUpdateRecords()) return;
   assert(!WritingAST && "Already writing the AST!");
   assert(Delete && "Not given an operator delete");
   if (!Chain) return;
@@ -5765,6 +5753,7 @@ void ASTWriter::ResolvedOperatorDelete(const CXXDestructorDecl *DD,
 }
 
 void ASTWriter::CompletedImplicitDefinition(const FunctionDecl *D) {
+  if (Chain && Chain->isProcessingUpdateRecords()) return;
   assert(!WritingAST && "Already writing the AST!");
   if (!D->isFromASTFile())
     return; // Declaration not imported from PCH.
@@ -5774,6 +5763,7 @@ void ASTWriter::CompletedImplicitDefinition(const FunctionDecl *D) {
 }
 
 void ASTWriter::FunctionDefinitionInstantiated(const FunctionDecl *D) {
+  if (Chain && Chain->isProcessingUpdateRecords()) return;
   assert(!WritingAST && "Already writing the AST!");
   if (!D->isFromASTFile())
     return;
@@ -5782,6 +5772,7 @@ void ASTWriter::FunctionDefinitionInstantiated(const FunctionDecl *D) {
 }
 
 void ASTWriter::StaticDataMemberInstantiated(const VarDecl *D) {
+  if (Chain && Chain->isProcessingUpdateRecords()) return;
   assert(!WritingAST && "Already writing the AST!");
   if (!D->isFromASTFile())
     return;
@@ -5794,6 +5785,7 @@ void ASTWriter::StaticDataMemberInstantiated(const VarDecl *D) {
 }
 
 void ASTWriter::DefaultArgumentInstantiated(const ParmVarDecl *D) {
+  if (Chain && Chain->isProcessingUpdateRecords()) return;
   assert(!WritingAST && "Already writing the AST!");
   if (!D->isFromASTFile())
     return;
@@ -5804,6 +5796,7 @@ void ASTWriter::DefaultArgumentInstantiated(const ParmVarDecl *D) {
 
 void ASTWriter::AddedObjCCategoryToInterface(const ObjCCategoryDecl *CatD,
                                              const ObjCInterfaceDecl *IFD) {
+  if (Chain && Chain->isProcessingUpdateRecords()) return;
   assert(!WritingAST && "Already writing the AST!");
   if (!IFD->isFromASTFile())
     return; // Declaration not imported from PCH.
@@ -5814,14 +5807,21 @@ void ASTWriter::AddedObjCCategoryToInterface(const ObjCCategoryDecl *CatD,
 }
 
 void ASTWriter::DeclarationMarkedUsed(const Decl *D) {
+  if (Chain && Chain->isProcessingUpdateRecords()) return;
   assert(!WritingAST && "Already writing the AST!");
-  if (!D->isFromASTFile())
-    return;
+
+  // If there is *any* declaration of the entity that's not from an AST file,
+  // we can skip writing the update record. We make sure that isUsed() triggers
+  // completion of the redeclaration chain of the entity.
+  for (auto Prev = D->getMostRecentDecl(); Prev; Prev = Prev->getPreviousDecl())
+    if (IsLocalDecl(Prev))
+      return;
 
   DeclUpdates[D].push_back(DeclUpdate(UPD_DECL_MARKED_USED));
 }
 
 void ASTWriter::DeclarationMarkedOpenMPThreadPrivate(const Decl *D) {
+  if (Chain && Chain->isProcessingUpdateRecords()) return;
   assert(!WritingAST && "Already writing the AST!");
   if (!D->isFromASTFile())
     return;
@@ -5829,7 +5829,19 @@ void ASTWriter::DeclarationMarkedOpenMPThreadPrivate(const Decl *D) {
   DeclUpdates[D].push_back(DeclUpdate(UPD_DECL_MARKED_OPENMP_THREADPRIVATE));
 }
 
+void ASTWriter::DeclarationMarkedOpenMPDeclareTarget(const Decl *D,
+                                                     const Attr *Attr) {
+  if (Chain && Chain->isProcessingUpdateRecords()) return;
+  assert(!WritingAST && "Already writing the AST!");
+  if (!D->isFromASTFile())
+    return;
+
+  DeclUpdates[D].push_back(
+      DeclUpdate(UPD_DECL_MARKED_OPENMP_DECLARETARGET, Attr));
+}
+
 void ASTWriter::RedefinedHiddenDefinition(const NamedDecl *D, Module *M) {
+  if (Chain && Chain->isProcessingUpdateRecords()) return;
   assert(!WritingAST && "Already writing the AST!");
   assert(D->isHidden() && "expected a hidden declaration");
   DeclUpdates[D].push_back(DeclUpdate(UPD_DECL_EXPORTED, M));
@@ -5837,6 +5849,7 @@ void ASTWriter::RedefinedHiddenDefinition(const NamedDecl *D, Module *M) {
 
 void ASTWriter::AddedAttributeToRecord(const Attr *Attr,
                                        const RecordDecl *Record) {
+  if (Chain && Chain->isProcessingUpdateRecords()) return;
   assert(!WritingAST && "Already writing the AST!");
   if (!Record->isFromASTFile())
     return;
diff --git a/contrib/llvm/tools/clang/lib/Serialization/ASTWriterDecl.cpp b/contrib/llvm/tools/clang/lib/Serialization/ASTWriterDecl.cpp
index 54bba282ab8d..23d18540e822 100644
--- a/contrib/llvm/tools/clang/lib/Serialization/ASTWriterDecl.cpp
+++ b/contrib/llvm/tools/clang/lib/Serialization/ASTWriterDecl.cpp
@@ -32,23 +32,31 @@ using namespace serialization;
 
 namespace clang {
   class ASTDeclWriter : public DeclVisitor<ASTDeclWriter, void> {
-
     ASTWriter &Writer;
     ASTContext &Context;
-    typedef ASTWriter::RecordData RecordData;
-    RecordData &Record;
+    ASTRecordWriter Record;
 
-  public:
     serialization::DeclCode Code;
     unsigned AbbrevToUse;
 
-    ASTDeclWriter(ASTWriter &Writer, ASTContext &Context, RecordData &Record)
-      : Writer(Writer), Context(Context), Record(Record) {
+  public:
+    ASTDeclWriter(ASTWriter &Writer, ASTContext &Context,
+                  ASTWriter::RecordDataImpl &Record)
+        : Writer(Writer), Context(Context), Record(Writer, Record),
+          Code((serialization::DeclCode)0), AbbrevToUse(0) {}
+
+    uint64_t Emit(Decl *D) {
+      if (!Code)
+        llvm::report_fatal_error(StringRef("unexpected declaration kind '") +
+            D->getDeclKindName() + "'");
+      return Record.Emit(Code, AbbrevToUse);
     }
 
     void Visit(Decl *D);
 
     void VisitDecl(Decl *D);
+    void VisitPragmaCommentDecl(PragmaCommentDecl *D);
+    void VisitPragmaDetectMismatchDecl(PragmaDetectMismatchDecl *D);
     void VisitTranslationUnitDecl(TranslationUnitDecl *D);
     void VisitNamedDecl(NamedDecl *D);
     void VisitLabelDecl(LabelDecl *LD);
@@ -99,6 +107,7 @@ namespace clang {
     void VisitTypeAliasTemplateDecl(TypeAliasTemplateDecl *D);
     void VisitUsingDecl(UsingDecl *D);
     void VisitUsingShadowDecl(UsingShadowDecl *D);
+    void VisitConstructorUsingShadowDecl(ConstructorUsingShadowDecl *D);
     void VisitLinkageSpecDecl(LinkageSpecDecl *D);
     void VisitFileScopeAsmDecl(FileScopeAsmDecl *D);
     void VisitImportDecl(ImportDecl *D);
@@ -110,8 +119,7 @@ namespace clang {
     void VisitCapturedDecl(CapturedDecl *D);
     void VisitEmptyDecl(EmptyDecl *D);
 
-    void VisitDeclContext(DeclContext *DC, uint64_t LexicalOffset,
-                          uint64_t VisibleOffset);
+    void VisitDeclContext(DeclContext *DC);
     template <typename T> void VisitRedeclarable(Redeclarable<T> *D);
 
 
@@ -131,6 +139,8 @@ namespace clang {
     void VisitObjCPropertyDecl(ObjCPropertyDecl *D);
     void VisitObjCPropertyImplDecl(ObjCPropertyImplDecl *D);
     void VisitOMPThreadPrivateDecl(OMPThreadPrivateDecl *D);
+    void VisitOMPDeclareReductionDecl(OMPDeclareReductionDecl *D);
+    void VisitOMPCapturedExprDecl(OMPCapturedExprDecl *D);
 
     /// Add an Objective-C type parameter list to the given record.
     void AddObjCTypeParamList(ObjCTypeParamList *typeParams) {
@@ -142,21 +152,10 @@ namespace clang {
 
       Record.push_back(typeParams->size());
       for (auto typeParam : *typeParams) {
-        Writer.AddDeclRef(typeParam, Record);
+        Record.AddDeclRef(typeParam);
       }
-      Writer.AddSourceLocation(typeParams->getLAngleLoc(), Record);
-      Writer.AddSourceLocation(typeParams->getRAngleLoc(), Record);
-    }
-
-    void AddFunctionDefinition(const FunctionDecl *FD) {
-      assert(FD->doesThisDeclarationHaveABody());
-      if (auto *CD = dyn_cast<CXXConstructorDecl>(FD)) {
-        Record.push_back(CD->NumCtorInitializers);
-        if (CD->NumCtorInitializers)
-          Writer.AddCXXCtorInitializersRef(
-              llvm::makeArrayRef(CD->init_begin(), CD->init_end()), Record);
-      }
-      Writer.AddStmt(FD->getBody());
+      Record.AddSourceLocation(typeParams->getLAngleLoc());
+      Record.AddSourceLocation(typeParams->getRAngleLoc());
     }
 
     /// Add to the record the first declaration from each module file that
@@ -172,7 +171,7 @@ namespace clang {
           Firsts[nullptr] = R;
       }
       for (const auto &F : Firsts)
-        Writer.AddDeclRef(F.second, Record);
+        Record.AddDeclRef(F.second);
     }
 
     /// Get the specialization decl from an entry in the specialization list.
@@ -191,8 +190,8 @@ namespace clang {
       return None;
     }
 
-    template<typename Decl>
-    void AddTemplateSpecializations(Decl *D) {
+    template<typename DeclTy>
+    void AddTemplateSpecializations(DeclTy *D) {
       auto *Common = D->getCommonPtr();
 
       // If we have any lazy specializations, and the external AST source is
@@ -204,8 +203,6 @@ namespace clang {
         assert(!Common->LazySpecializations);
       }
 
-      auto &Specializations = Common->Specializations;
-      auto &&PartialSpecializations = getPartialSpecializations(Common);
       ArrayRef<DeclID> LazySpecializations;
       if (auto *LS = Common->LazySpecializations)
         LazySpecializations = llvm::makeArrayRef(LS + 1, LS[0]);
@@ -214,13 +211,15 @@ namespace clang {
       unsigned I = Record.size();
       Record.push_back(0);
 
-      for (auto &Entry : Specializations) {
-        auto *D = getSpecializationDecl(Entry);
-        assert(D->isCanonicalDecl() && "non-canonical decl in set");
-        AddFirstDeclFromEachModule(D, /*IncludeLocal*/true);
-      }
-      for (auto &Entry : PartialSpecializations) {
-        auto *D = getSpecializationDecl(Entry);
+      // AddFirstDeclFromEachModule might trigger deserialization, invalidating
+      // *Specializations iterators.
+      llvm::SmallVector<const Decl*, 16> Specs;
+      for (auto &Entry : Common->Specializations)
+        Specs.push_back(getSpecializationDecl(Entry));
+      for (auto &Entry : getPartialSpecializations(Common))
+        Specs.push_back(getSpecializationDecl(Entry));
+
+      for (auto *D : Specs) {
         assert(D->isCanonicalDecl() && "non-canonical decl in set");
         AddFirstDeclFromEachModule(D, /*IncludeLocal*/true);
       }
@@ -261,7 +260,7 @@ void ASTDeclWriter::Visit(Decl *D) {
   // abbreviation infrastructure requires that arrays are encoded last, so
   // we handle it here in the case of those classes derived from DeclaratorDecl
   if (DeclaratorDecl *DD = dyn_cast<DeclaratorDecl>(D)) {
-    Writer.AddTypeSourceInfo(DD->getTypeSourceInfo(), Record);
+    Record.AddTypeSourceInfo(DD->getTypeSourceInfo());
   }
 
   // Handle FunctionDecl's body here and write it after all other Stmts/Exprs
@@ -270,21 +269,26 @@ void ASTDeclWriter::Visit(Decl *D) {
   if (FunctionDecl *FD = dyn_cast<FunctionDecl>(D)) {
     Record.push_back(FD->doesThisDeclarationHaveABody());
     if (FD->doesThisDeclarationHaveABody())
-      AddFunctionDefinition(FD);
+      Record.AddFunctionDefinition(FD);
   }
+
+  // If this declaration is also a DeclContext, write blocks for the
+  // declarations that lexically stored inside its context and those
+  // declarations that are visible from its context.
+  if (DeclContext *DC = dyn_cast<DeclContext>(D))
+    VisitDeclContext(DC);
 }
 
 void ASTDeclWriter::VisitDecl(Decl *D) {
-  Writer.AddDeclRef(cast_or_null<Decl>(D->getDeclContext()), Record);
+  Record.AddDeclRef(cast_or_null<Decl>(D->getDeclContext()));
   if (D->getDeclContext() != D->getLexicalDeclContext())
-    Writer.AddDeclRef(cast_or_null<Decl>(D->getLexicalDeclContext()), Record);
+    Record.AddDeclRef(cast_or_null<Decl>(D->getLexicalDeclContext()));
   else
     Record.push_back(0);
   Record.push_back(D->isInvalidDecl());
   Record.push_back(D->hasAttrs());
   if (D->hasAttrs())
-    Writer.WriteAttributes(llvm::makeArrayRef(D->getAttrs().begin(),
-                                              D->getAttrs().size()), Record);
+    Record.AddAttributes(D->getAttrs());
   Record.push_back(D->isImplicit());
   Record.push_back(D->isUsed(false));
   Record.push_back(D->isReferenced());
@@ -314,13 +318,35 @@ void ASTDeclWriter::VisitDecl(Decl *D) {
   }
 }
 
+void ASTDeclWriter::VisitPragmaCommentDecl(PragmaCommentDecl *D) {
+  StringRef Arg = D->getArg();
+  Record.push_back(Arg.size());
+  VisitDecl(D);
+  Record.AddSourceLocation(D->getLocStart());
+  Record.push_back(D->getCommentKind());
+  Record.AddString(Arg);
+  Code = serialization::DECL_PRAGMA_COMMENT;
+}
+
+void ASTDeclWriter::VisitPragmaDetectMismatchDecl(
+    PragmaDetectMismatchDecl *D) {
+  StringRef Name = D->getName();
+  StringRef Value = D->getValue();
+  Record.push_back(Name.size() + 1 + Value.size());
+  VisitDecl(D);
+  Record.AddSourceLocation(D->getLocStart());
+  Record.AddString(Name);
+  Record.AddString(Value);
+  Code = serialization::DECL_PRAGMA_DETECT_MISMATCH;
+}
+
 void ASTDeclWriter::VisitTranslationUnitDecl(TranslationUnitDecl *D) {
   llvm_unreachable("Translation units aren't directly serialized");
 }
 
 void ASTDeclWriter::VisitNamedDecl(NamedDecl *D) {
   VisitDecl(D);
-  Writer.AddDeclarationName(D->getDeclName(), Record);
+  Record.AddDeclarationName(D->getDeclName());
   Record.push_back(needsAnonymousDeclarationNumber(D)
                        ? Writer.getAnonymousDeclarationNumber(D)
                        : 0);
@@ -328,17 +354,17 @@ void ASTDeclWriter::VisitNamedDecl(NamedDecl *D) {
 
 void ASTDeclWriter::VisitTypeDecl(TypeDecl *D) {
   VisitNamedDecl(D);
-  Writer.AddSourceLocation(D->getLocStart(), Record);
-  Writer.AddTypeRef(QualType(D->getTypeForDecl(), 0), Record);
+  Record.AddSourceLocation(D->getLocStart());
+  Record.AddTypeRef(QualType(D->getTypeForDecl(), 0));
 }
 
 void ASTDeclWriter::VisitTypedefNameDecl(TypedefNameDecl *D) {
   VisitRedeclarable(D);
   VisitTypeDecl(D);
-  Writer.AddTypeSourceInfo(D->getTypeSourceInfo(), Record);
+  Record.AddTypeSourceInfo(D->getTypeSourceInfo());
   Record.push_back(D->isModed());
   if (D->isModed())
-    Writer.AddTypeRef(D->getUnderlyingType(), Record);
+    Record.AddTypeRef(D->getUnderlyingType());
 }
 
 void ASTDeclWriter::VisitTypedefDecl(TypedefDecl *D) {
@@ -359,7 +385,7 @@ void ASTDeclWriter::VisitTypedefDecl(TypedefDecl *D) {
 
 void ASTDeclWriter::VisitTypeAliasDecl(TypeAliasDecl *D) {
   VisitTypedefNameDecl(D);
-  Writer.AddDeclRef(D->getDescribedAliasTemplate(), Record);
+  Record.AddDeclRef(D->getDescribedAliasTemplate());
   Code = serialization::DECL_TYPEALIAS;
 }
 
@@ -373,15 +399,15 @@ void ASTDeclWriter::VisitTagDecl(TagDecl *D) {
   Record.push_back(D->isEmbeddedInDeclarator());
   Record.push_back(D->isFreeStanding());
   Record.push_back(D->isCompleteDefinitionRequired());
-  Writer.AddSourceLocation(D->getRBraceLoc(), Record);
+  Record.AddSourceRange(D->getBraceRange());
 
   if (D->hasExtInfo()) {
     Record.push_back(1);
-    Writer.AddQualifierInfo(*D->getExtInfo(), Record);
+    Record.AddQualifierInfo(*D->getExtInfo());
   } else if (auto *TD = D->getTypedefNameForAnonDecl()) {
     Record.push_back(2);
-    Writer.AddDeclRef(TD, Record);
-    Writer.AddIdentifierRef(TD->getDeclName().getAsIdentifierInfo(), Record);
+    Record.AddDeclRef(TD);
+    Record.AddIdentifierRef(TD->getDeclName().getAsIdentifierInfo());
   } else {
     Record.push_back(0);
   }
@@ -389,21 +415,21 @@ void ASTDeclWriter::VisitTagDecl(TagDecl *D) {
 
 void ASTDeclWriter::VisitEnumDecl(EnumDecl *D) {
   VisitTagDecl(D);
-  Writer.AddTypeSourceInfo(D->getIntegerTypeSourceInfo(), Record);
+  Record.AddTypeSourceInfo(D->getIntegerTypeSourceInfo());
   if (!D->getIntegerTypeSourceInfo())
-    Writer.AddTypeRef(D->getIntegerType(), Record);
-  Writer.AddTypeRef(D->getPromotionType(), Record);
+    Record.AddTypeRef(D->getIntegerType());
+  Record.AddTypeRef(D->getPromotionType());
   Record.push_back(D->getNumPositiveBits());
   Record.push_back(D->getNumNegativeBits());
   Record.push_back(D->isScoped());
   Record.push_back(D->isScopedUsingClassTag());
   Record.push_back(D->isFixed());
   if (MemberSpecializationInfo *MemberInfo = D->getMemberSpecializationInfo()) {
-    Writer.AddDeclRef(MemberInfo->getInstantiatedFrom(), Record);
+    Record.AddDeclRef(MemberInfo->getInstantiatedFrom());
     Record.push_back(MemberInfo->getTemplateSpecializationKind());
-    Writer.AddSourceLocation(MemberInfo->getPointOfInstantiation(), Record);
+    Record.AddSourceLocation(MemberInfo->getPointOfInstantiation());
   } else {
-    Writer.AddDeclRef(nullptr, Record);
+    Record.AddDeclRef(nullptr);
   }
 
   if (D->getDeclContext() == D->getLexicalDeclContext() &&
@@ -457,31 +483,31 @@ void ASTDeclWriter::VisitRecordDecl(RecordDecl *D) {
 
 void ASTDeclWriter::VisitValueDecl(ValueDecl *D) {
   VisitNamedDecl(D);
-  Writer.AddTypeRef(D->getType(), Record);
+  Record.AddTypeRef(D->getType());
 }
 
 void ASTDeclWriter::VisitEnumConstantDecl(EnumConstantDecl *D) {
   VisitValueDecl(D);
   Record.push_back(D->getInitExpr()? 1 : 0);
   if (D->getInitExpr())
-    Writer.AddStmt(D->getInitExpr());
-  Writer.AddAPSInt(D->getInitVal(), Record);
+    Record.AddStmt(D->getInitExpr());
+  Record.AddAPSInt(D->getInitVal());
 
   Code = serialization::DECL_ENUM_CONSTANT;
 }
 
 void ASTDeclWriter::VisitDeclaratorDecl(DeclaratorDecl *D) {
   VisitValueDecl(D);
-  Writer.AddSourceLocation(D->getInnerLocStart(), Record);
+  Record.AddSourceLocation(D->getInnerLocStart());
   Record.push_back(D->hasExtInfo());
   if (D->hasExtInfo())
-    Writer.AddQualifierInfo(*D->getExtInfo(), Record);
+    Record.AddQualifierInfo(*D->getExtInfo());
 }
 
 void ASTDeclWriter::VisitFunctionDecl(FunctionDecl *D) {
   VisitRedeclarable(D);
   VisitDeclaratorDecl(D);
-  Writer.AddDeclarationNameLoc(D->DNLoc, D->getDeclName(), Record);
+  Record.AddDeclarationNameLoc(D->DNLoc, D->getDeclName());
   Record.push_back(D->getIdentifierNamespace());
   
   // FunctionDecl's body is handled last at ASTWriterDecl::Visit,
@@ -503,20 +529,20 @@ void ASTDeclWriter::VisitFunctionDecl(FunctionDecl *D) {
   Record.push_back(D->HasSkippedBody);
   Record.push_back(D->IsLateTemplateParsed);
   Record.push_back(D->getLinkageInternal());
-  Writer.AddSourceLocation(D->getLocEnd(), Record);
+  Record.AddSourceLocation(D->getLocEnd());
 
   Record.push_back(D->getTemplatedKind());
   switch (D->getTemplatedKind()) {
   case FunctionDecl::TK_NonTemplate:
     break;
   case FunctionDecl::TK_FunctionTemplate:
-    Writer.AddDeclRef(D->getDescribedFunctionTemplate(), Record);
+    Record.AddDeclRef(D->getDescribedFunctionTemplate());
     break;
   case FunctionDecl::TK_MemberSpecialization: {
     MemberSpecializationInfo *MemberInfo = D->getMemberSpecializationInfo();
-    Writer.AddDeclRef(MemberInfo->getInstantiatedFrom(), Record);
+    Record.AddDeclRef(MemberInfo->getInstantiatedFrom());
     Record.push_back(MemberInfo->getTemplateSpecializationKind());
-    Writer.AddSourceLocation(MemberInfo->getPointOfInstantiation(), Record);
+    Record.AddSourceLocation(MemberInfo->getPointOfInstantiation());
     break;
   }
   case FunctionDecl::TK_FunctionTemplateSpecialization: {
@@ -525,11 +551,11 @@ void ASTDeclWriter::VisitFunctionDecl(FunctionDecl *D) {
 
     RegisterTemplateSpecialization(FTSInfo->getTemplate(), D);
 
-    Writer.AddDeclRef(FTSInfo->getTemplate(), Record);
+    Record.AddDeclRef(FTSInfo->getTemplate());
     Record.push_back(FTSInfo->getTemplateSpecializationKind());
     
     // Template arguments.
-    Writer.AddTemplateArgumentList(FTSInfo->TemplateArguments, Record);
+    Record.AddTemplateArgumentList(FTSInfo->TemplateArguments);
     
     // Template args as written.
     Record.push_back(FTSInfo->TemplateArgumentsAsWritten != nullptr);
@@ -537,20 +563,18 @@ void ASTDeclWriter::VisitFunctionDecl(FunctionDecl *D) {
       Record.push_back(FTSInfo->TemplateArgumentsAsWritten->NumTemplateArgs);
       for (int i=0, e = FTSInfo->TemplateArgumentsAsWritten->NumTemplateArgs;
              i!=e; ++i)
-        Writer.AddTemplateArgumentLoc((*FTSInfo->TemplateArgumentsAsWritten)[i],
-                                      Record);
-      Writer.AddSourceLocation(FTSInfo->TemplateArgumentsAsWritten->LAngleLoc,
-                               Record);
-      Writer.AddSourceLocation(FTSInfo->TemplateArgumentsAsWritten->RAngleLoc,
-                               Record);
+        Record.AddTemplateArgumentLoc(
+            (*FTSInfo->TemplateArgumentsAsWritten)[i]);
+      Record.AddSourceLocation(FTSInfo->TemplateArgumentsAsWritten->LAngleLoc);
+      Record.AddSourceLocation(FTSInfo->TemplateArgumentsAsWritten->RAngleLoc);
     }
     
-    Writer.AddSourceLocation(FTSInfo->getPointOfInstantiation(), Record);
+    Record.AddSourceLocation(FTSInfo->getPointOfInstantiation());
 
     if (D->isCanonicalDecl()) {
       // Write the template that contains the specializations set. We will
       // add a FunctionTemplateSpecializationInfo to it when reading.
-      Writer.AddDeclRef(FTSInfo->getTemplate()->getCanonicalDecl(), Record);
+      Record.AddDeclRef(FTSInfo->getTemplate()->getCanonicalDecl());
     }
     break;
   }
@@ -561,21 +585,21 @@ void ASTDeclWriter::VisitFunctionDecl(FunctionDecl *D) {
     // Templates.
     Record.push_back(DFTSInfo->getNumTemplates());
     for (int i=0, e = DFTSInfo->getNumTemplates(); i != e; ++i)
-      Writer.AddDeclRef(DFTSInfo->getTemplate(i), Record);
+      Record.AddDeclRef(DFTSInfo->getTemplate(i));
     
     // Templates args.
     Record.push_back(DFTSInfo->getNumTemplateArgs());
     for (int i=0, e = DFTSInfo->getNumTemplateArgs(); i != e; ++i)
-      Writer.AddTemplateArgumentLoc(DFTSInfo->getTemplateArg(i), Record);
-    Writer.AddSourceLocation(DFTSInfo->getLAngleLoc(), Record);
-    Writer.AddSourceLocation(DFTSInfo->getRAngleLoc(), Record);
+      Record.AddTemplateArgumentLoc(DFTSInfo->getTemplateArg(i));
+    Record.AddSourceLocation(DFTSInfo->getLAngleLoc());
+    Record.AddSourceLocation(DFTSInfo->getRAngleLoc());
     break;
   }
   }
 
   Record.push_back(D->param_size());
-  for (auto P : D->params())
-    Writer.AddDeclRef(P, Record);
+  for (auto P : D->parameters())
+    Record.AddDeclRef(P);
   Code = serialization::DECL_FUNCTION;
 }
 
@@ -587,9 +611,9 @@ void ASTDeclWriter::VisitObjCMethodDecl(ObjCMethodDecl *D) {
                       D->getSelfDecl() != nullptr || D->getCmdDecl() != nullptr;
   Record.push_back(HasBodyStuff);
   if (HasBodyStuff) {
-    Writer.AddStmt(D->getBody());
-    Writer.AddDeclRef(D->getSelfDecl(), Record);
-    Writer.AddDeclRef(D->getCmdDecl(), Record);
+    Record.AddStmt(D->getBody());
+    Record.AddDeclRef(D->getSelfDecl());
+    Record.AddDeclRef(D->getCmdDecl());
   }
   Record.push_back(D->isInstanceMethod());
   Record.push_back(D->isVariadic());
@@ -602,7 +626,7 @@ void ASTDeclWriter::VisitObjCMethodDecl(ObjCMethodDecl *D) {
   Record.push_back(D->HasRedeclaration);
   if (D->HasRedeclaration) {
     assert(Context.getObjCMethodRedeclaration(D));
-    Writer.AddDeclRef(Context.getObjCMethodRedeclaration(D), Record);
+    Record.AddDeclRef(Context.getObjCMethodRedeclaration(D));
   }
 
   // FIXME: stable encoding for @required/@optional
@@ -610,19 +634,19 @@ void ASTDeclWriter::VisitObjCMethodDecl(ObjCMethodDecl *D) {
   // FIXME: stable encoding for in/out/inout/bycopy/byref/oneway/nullability
   Record.push_back(D->getObjCDeclQualifier());
   Record.push_back(D->hasRelatedResultType());
-  Writer.AddTypeRef(D->getReturnType(), Record);
-  Writer.AddTypeSourceInfo(D->getReturnTypeSourceInfo(), Record);
-  Writer.AddSourceLocation(D->getLocEnd(), Record);
+  Record.AddTypeRef(D->getReturnType());
+  Record.AddTypeSourceInfo(D->getReturnTypeSourceInfo());
+  Record.AddSourceLocation(D->getLocEnd());
   Record.push_back(D->param_size());
-  for (const auto *P : D->params())
-    Writer.AddDeclRef(P, Record);
+  for (const auto *P : D->parameters())
+    Record.AddDeclRef(P);
 
   Record.push_back(D->SelLocsKind);
   unsigned NumStoredSelLocs = D->getNumStoredSelLocs();
   SourceLocation *SelLocs = D->getStoredSelLocs();
   Record.push_back(NumStoredSelLocs);
   for (unsigned i = 0; i != NumStoredSelLocs; ++i)
-    Writer.AddSourceLocation(SelLocs[i], Record);
+    Record.AddSourceLocation(SelLocs[i]);
 
   Code = serialization::DECL_OBJC_METHOD;
 }
@@ -631,23 +655,23 @@ void ASTDeclWriter::VisitObjCTypeParamDecl(ObjCTypeParamDecl *D) {
   VisitTypedefNameDecl(D);
   Record.push_back(D->Variance);
   Record.push_back(D->Index);
-  Writer.AddSourceLocation(D->VarianceLoc, Record);
-  Writer.AddSourceLocation(D->ColonLoc, Record);
+  Record.AddSourceLocation(D->VarianceLoc);
+  Record.AddSourceLocation(D->ColonLoc);
 
   Code = serialization::DECL_OBJC_TYPE_PARAM;
 }
 
 void ASTDeclWriter::VisitObjCContainerDecl(ObjCContainerDecl *D) {
   VisitNamedDecl(D);
-  Writer.AddSourceLocation(D->getAtStartLoc(), Record);
-  Writer.AddSourceRange(D->getAtEndRange(), Record);
+  Record.AddSourceLocation(D->getAtStartLoc());
+  Record.AddSourceRange(D->getAtEndRange());
   // Abstract class (no need to define a stable serialization::DECL code).
 }
 
 void ASTDeclWriter::VisitObjCInterfaceDecl(ObjCInterfaceDecl *D) {
   VisitRedeclarable(D);
   VisitObjCContainerDecl(D);
-  Writer.AddTypeRef(QualType(D->getTypeForDecl(), 0), Record);
+  Record.AddTypeRef(QualType(D->getTypeForDecl(), 0));
   AddObjCTypeParamList(D->TypeParamList);
 
   Record.push_back(D->isThisDeclarationADefinition());
@@ -655,16 +679,16 @@ void ASTDeclWriter::VisitObjCInterfaceDecl(ObjCInterfaceDecl *D) {
     // Write the DefinitionData
     ObjCInterfaceDecl::DefinitionData &Data = D->data();
     
-    Writer.AddTypeSourceInfo(D->getSuperClassTInfo(), Record);
-    Writer.AddSourceLocation(D->getEndOfDefinitionLoc(), Record);
+    Record.AddTypeSourceInfo(D->getSuperClassTInfo());
+    Record.AddSourceLocation(D->getEndOfDefinitionLoc());
     Record.push_back(Data.HasDesignatedInitializers);
 
     // Write out the protocols that are directly referenced by the @interface.
     Record.push_back(Data.ReferencedProtocols.size());
     for (const auto *P : D->protocols())
-      Writer.AddDeclRef(P, Record);
+      Record.AddDeclRef(P);
     for (const auto &PL : D->protocol_locs())
-      Writer.AddSourceLocation(PL, Record);
+      Record.AddSourceLocation(PL);
     
     // Write out the protocols that are transitively referenced.
     Record.push_back(Data.AllReferencedProtocols.size());
@@ -672,7 +696,7 @@ void ASTDeclWriter::VisitObjCInterfaceDecl(ObjCInterfaceDecl *D) {
               P = Data.AllReferencedProtocols.begin(),
            PEnd = Data.AllReferencedProtocols.end();
          P != PEnd; ++P)
-      Writer.AddDeclRef(*P, Record);
+      Record.AddDeclRef(*P);
 
     
     if (ObjCCategoryDecl *Cat = D->getCategoryListRaw()) {
@@ -717,9 +741,9 @@ void ASTDeclWriter::VisitObjCProtocolDecl(ObjCProtocolDecl *D) {
   if (D->isThisDeclarationADefinition()) {
     Record.push_back(D->protocol_size());
     for (const auto *I : D->protocols())
-      Writer.AddDeclRef(I, Record);
+      Record.AddDeclRef(I);
     for (const auto &PL : D->protocol_locs())
-      Writer.AddSourceLocation(PL, Record);
+      Record.AddSourceLocation(PL);
   }
   
   Code = serialization::DECL_OBJC_PROTOCOL;
@@ -732,80 +756,80 @@ void ASTDeclWriter::VisitObjCAtDefsFieldDecl(ObjCAtDefsFieldDecl *D) {
 
 void ASTDeclWriter::VisitObjCCategoryDecl(ObjCCategoryDecl *D) {
   VisitObjCContainerDecl(D);
-  Writer.AddSourceLocation(D->getCategoryNameLoc(), Record);
-  Writer.AddSourceLocation(D->getIvarLBraceLoc(), Record);
-  Writer.AddSourceLocation(D->getIvarRBraceLoc(), Record);
-  Writer.AddDeclRef(D->getClassInterface(), Record);
+  Record.AddSourceLocation(D->getCategoryNameLoc());
+  Record.AddSourceLocation(D->getIvarLBraceLoc());
+  Record.AddSourceLocation(D->getIvarRBraceLoc());
+  Record.AddDeclRef(D->getClassInterface());
   AddObjCTypeParamList(D->TypeParamList);
   Record.push_back(D->protocol_size());
   for (const auto *I : D->protocols())
-    Writer.AddDeclRef(I, Record);
+    Record.AddDeclRef(I);
   for (const auto &PL : D->protocol_locs())
-    Writer.AddSourceLocation(PL, Record);
+    Record.AddSourceLocation(PL);
   Code = serialization::DECL_OBJC_CATEGORY;
 }
 
 void ASTDeclWriter::VisitObjCCompatibleAliasDecl(ObjCCompatibleAliasDecl *D) {
   VisitNamedDecl(D);
-  Writer.AddDeclRef(D->getClassInterface(), Record);
+  Record.AddDeclRef(D->getClassInterface());
   Code = serialization::DECL_OBJC_COMPATIBLE_ALIAS;
 }
 
 void ASTDeclWriter::VisitObjCPropertyDecl(ObjCPropertyDecl *D) {
   VisitNamedDecl(D);
-  Writer.AddSourceLocation(D->getAtLoc(), Record);
-  Writer.AddSourceLocation(D->getLParenLoc(), Record);
-  Writer.AddTypeRef(D->getType(), Record);
-  Writer.AddTypeSourceInfo(D->getTypeSourceInfo(), Record);
+  Record.AddSourceLocation(D->getAtLoc());
+  Record.AddSourceLocation(D->getLParenLoc());
+  Record.AddTypeRef(D->getType());
+  Record.AddTypeSourceInfo(D->getTypeSourceInfo());
   // FIXME: stable encoding
   Record.push_back((unsigned)D->getPropertyAttributes());
   Record.push_back((unsigned)D->getPropertyAttributesAsWritten());
   // FIXME: stable encoding
   Record.push_back((unsigned)D->getPropertyImplementation());
-  Writer.AddDeclarationName(D->getGetterName(), Record);
-  Writer.AddDeclarationName(D->getSetterName(), Record);
-  Writer.AddDeclRef(D->getGetterMethodDecl(), Record);
-  Writer.AddDeclRef(D->getSetterMethodDecl(), Record);
-  Writer.AddDeclRef(D->getPropertyIvarDecl(), Record);
+  Record.AddDeclarationName(D->getGetterName());
+  Record.AddDeclarationName(D->getSetterName());
+  Record.AddDeclRef(D->getGetterMethodDecl());
+  Record.AddDeclRef(D->getSetterMethodDecl());
+  Record.AddDeclRef(D->getPropertyIvarDecl());
   Code = serialization::DECL_OBJC_PROPERTY;
 }
 
 void ASTDeclWriter::VisitObjCImplDecl(ObjCImplDecl *D) {
   VisitObjCContainerDecl(D);
-  Writer.AddDeclRef(D->getClassInterface(), Record);
+  Record.AddDeclRef(D->getClassInterface());
   // Abstract class (no need to define a stable serialization::DECL code).
 }
 
 void ASTDeclWriter::VisitObjCCategoryImplDecl(ObjCCategoryImplDecl *D) {
   VisitObjCImplDecl(D);
-  Writer.AddIdentifierRef(D->getIdentifier(), Record);
-  Writer.AddSourceLocation(D->getCategoryNameLoc(), Record);
+  Record.AddIdentifierRef(D->getIdentifier());
+  Record.AddSourceLocation(D->getCategoryNameLoc());
   Code = serialization::DECL_OBJC_CATEGORY_IMPL;
 }
 
 void ASTDeclWriter::VisitObjCImplementationDecl(ObjCImplementationDecl *D) {
   VisitObjCImplDecl(D);
-  Writer.AddDeclRef(D->getSuperClass(), Record);
-  Writer.AddSourceLocation(D->getSuperClassLoc(), Record);
-  Writer.AddSourceLocation(D->getIvarLBraceLoc(), Record);
-  Writer.AddSourceLocation(D->getIvarRBraceLoc(), Record);
+  Record.AddDeclRef(D->getSuperClass());
+  Record.AddSourceLocation(D->getSuperClassLoc());
+  Record.AddSourceLocation(D->getIvarLBraceLoc());
+  Record.AddSourceLocation(D->getIvarRBraceLoc());
   Record.push_back(D->hasNonZeroConstructors());
   Record.push_back(D->hasDestructors());
   Record.push_back(D->NumIvarInitializers);
   if (D->NumIvarInitializers)
-    Writer.AddCXXCtorInitializersRef(
-        llvm::makeArrayRef(D->init_begin(), D->init_end()), Record);
+    Record.AddCXXCtorInitializers(
+        llvm::makeArrayRef(D->init_begin(), D->init_end()));
   Code = serialization::DECL_OBJC_IMPLEMENTATION;
 }
 
 void ASTDeclWriter::VisitObjCPropertyImplDecl(ObjCPropertyImplDecl *D) {
   VisitDecl(D);
-  Writer.AddSourceLocation(D->getLocStart(), Record);
-  Writer.AddDeclRef(D->getPropertyDecl(), Record);
-  Writer.AddDeclRef(D->getPropertyIvarDecl(), Record);
-  Writer.AddSourceLocation(D->getPropertyIvarDeclLoc(), Record);
-  Writer.AddStmt(D->getGetterCXXConstructor());
-  Writer.AddStmt(D->getSetterCXXAssignment());
+  Record.AddSourceLocation(D->getLocStart());
+  Record.AddDeclRef(D->getPropertyDecl());
+  Record.AddDeclRef(D->getPropertyIvarDecl());
+  Record.AddSourceLocation(D->getPropertyIvarDeclLoc());
+  Record.AddStmt(D->getGetterCXXConstructor());
+  Record.AddStmt(D->getSetterCXXAssignment());
   Code = serialization::DECL_OBJC_PROPERTY_IMPL;
 }
 
@@ -817,15 +841,14 @@ void ASTDeclWriter::VisitFieldDecl(FieldDecl *D) {
     Record.push_back(0);
   } else if (D->InitStorage.getInt() == FieldDecl::ISK_CapturedVLAType) {
     Record.push_back(D->InitStorage.getInt() + 1);
-    Writer.AddTypeRef(
-        QualType(static_cast<Type *>(D->InitStorage.getPointer()), 0),
-        Record);
+    Record.AddTypeRef(
+        QualType(static_cast<Type *>(D->InitStorage.getPointer()), 0));
   } else {
     Record.push_back(D->InitStorage.getInt() + 1);
-    Writer.AddStmt(static_cast<Expr *>(D->InitStorage.getPointer()));
+    Record.AddStmt(static_cast<Expr *>(D->InitStorage.getPointer()));
   }
   if (!D->getDeclName())
-    Writer.AddDeclRef(Context.getInstantiatedFromUnnamedFieldDecl(D), Record);
+    Record.AddDeclRef(Context.getInstantiatedFromUnnamedFieldDecl(D));
 
   if (D->getDeclContext() == D->getLexicalDeclContext() &&
       !D->hasAttrs() &&
@@ -848,8 +871,8 @@ void ASTDeclWriter::VisitFieldDecl(FieldDecl *D) {
 
 void ASTDeclWriter::VisitMSPropertyDecl(MSPropertyDecl *D) {
   VisitDeclaratorDecl(D);
-  Writer.AddIdentifierRef(D->getGetterId(), Record);
-  Writer.AddIdentifierRef(D->getSetterId(), Record);
+  Record.AddIdentifierRef(D->getGetterId());
+  Record.AddIdentifierRef(D->getSetterId());
   Code = serialization::DECL_MS_PROPERTY;
 }
 
@@ -858,7 +881,7 @@ void ASTDeclWriter::VisitIndirectFieldDecl(IndirectFieldDecl *D) {
   Record.push_back(D->getChainingSize());
 
   for (const auto *P : D->chain())
-    Writer.AddDeclRef(P, Record);
+    Record.AddDeclRef(P);
   Code = serialization::DECL_INDIRECTFIELD;
 }
 
@@ -873,6 +896,8 @@ void ASTDeclWriter::VisitVarDecl(VarDecl *D) {
     Record.push_back(D->isNRVOVariable());
     Record.push_back(D->isCXXForRangeDecl());
     Record.push_back(D->isARCPseudoStrong());
+    Record.push_back(D->isInline());
+    Record.push_back(D->isInlineSpecified());
     Record.push_back(D->isConstexpr());
     Record.push_back(D->isInitCapture());
     Record.push_back(D->isPreviousDeclInSameBlockScope());
@@ -881,7 +906,7 @@ void ASTDeclWriter::VisitVarDecl(VarDecl *D) {
 
   if (D->getInit()) {
     Record.push_back(!D->isInitKnownICE() ? 1 : (D->isInitICE() ? 3 : 2));
-    Writer.AddStmt(D->getInit());
+    Record.AddStmt(D->getInit());
   } else {
     Record.push_back(0);
   }
@@ -891,13 +916,13 @@ void ASTDeclWriter::VisitVarDecl(VarDecl *D) {
   };
   if (VarTemplateDecl *TemplD = D->getDescribedVarTemplate()) {
     Record.push_back(VarTemplate);
-    Writer.AddDeclRef(TemplD, Record);
+    Record.AddDeclRef(TemplD);
   } else if (MemberSpecializationInfo *SpecInfo
                = D->getMemberSpecializationInfo()) {
     Record.push_back(StaticDataMemberSpecialization);
-    Writer.AddDeclRef(SpecInfo->getInstantiatedFrom(), Record);
+    Record.AddDeclRef(SpecInfo->getInstantiatedFrom());
     Record.push_back(SpecInfo->getTemplateSpecializationKind());
-    Writer.AddSourceLocation(SpecInfo->getPointOfInstantiation(), Record);
+    Record.AddSourceLocation(SpecInfo->getPointOfInstantiation());
   } else {
     Record.push_back(VarNotTemplate);
   }
@@ -919,6 +944,7 @@ void ASTDeclWriter::VisitVarDecl(VarDecl *D) {
       D->getInit() == nullptr &&
       !isa<ParmVarDecl>(D) &&
       !isa<VarTemplateSpecializationDecl>(D) &&
+      !D->isInline() &&
       !D->isConstexpr() &&
       !D->isInitCapture() &&
       !D->isPreviousDeclInSameBlockScope() &&
@@ -943,7 +969,7 @@ void ASTDeclWriter::VisitParmVarDecl(ParmVarDecl *D) {
   Record.push_back(D->hasInheritedDefaultArg());
   Record.push_back(D->hasUninstantiatedDefaultArg());
   if (D->hasUninstantiatedDefaultArg())
-    Writer.AddStmt(D->getUninstantiatedDefaultArg());
+    Record.AddStmt(D->getUninstantiatedDefaultArg());
   Code = serialization::DECL_PARM_VAR;
 
   assert(!D->isARCPseudoStrong()); // can be true of ImplicitParamDecl
@@ -982,8 +1008,8 @@ void ASTDeclWriter::VisitParmVarDecl(ParmVarDecl *D) {
 
 void ASTDeclWriter::VisitFileScopeAsmDecl(FileScopeAsmDecl *D) {
   VisitDecl(D);
-  Writer.AddStmt(D->getAsmString());
-  Writer.AddSourceLocation(D->getRParenLoc(), Record);
+  Record.AddStmt(D->getAsmString());
+  Record.AddSourceLocation(D->getRParenLoc());
   Code = serialization::DECL_FILE_SCOPE_ASM;
 }
 
@@ -994,19 +1020,18 @@ void ASTDeclWriter::VisitEmptyDecl(EmptyDecl *D) {
 
 void ASTDeclWriter::VisitBlockDecl(BlockDecl *D) {
   VisitDecl(D);
-  Writer.AddStmt(D->getBody());
-  Writer.AddTypeSourceInfo(D->getSignatureAsWritten(), Record);
+  Record.AddStmt(D->getBody());
+  Record.AddTypeSourceInfo(D->getSignatureAsWritten());
   Record.push_back(D->param_size());
-  for (FunctionDecl::param_iterator P = D->param_begin(), PEnd = D->param_end();
-       P != PEnd; ++P)
-    Writer.AddDeclRef(*P, Record);
+  for (ParmVarDecl *P : D->parameters())
+    Record.AddDeclRef(P);
   Record.push_back(D->isVariadic());
   Record.push_back(D->blockMissingReturnType());
   Record.push_back(D->isConversionFromLambda());
   Record.push_back(D->capturesCXXThis());
   Record.push_back(D->getNumCaptures());
   for (const auto &capture : D->captures()) {
-    Writer.AddDeclRef(capture.getVariable(), Record);
+    Record.AddDeclRef(capture.getVariable());
 
     unsigned flags = 0;
     if (capture.isByRef()) flags |= 1;
@@ -1014,7 +1039,7 @@ void ASTDeclWriter::VisitBlockDecl(BlockDecl *D) {
     if (capture.hasCopyExpr()) flags |= 4;
     Record.push_back(flags);
 
-    if (capture.hasCopyExpr()) Writer.AddStmt(capture.getCopyExpr());
+    if (capture.hasCopyExpr()) Record.AddStmt(capture.getCopyExpr());
   }
 
   Code = serialization::DECL_BLOCK;
@@ -1027,21 +1052,21 @@ void ASTDeclWriter::VisitCapturedDecl(CapturedDecl *CD) {
   Record.push_back(CD->isNothrow() ? 1 : 0);
   // Body is stored by VisitCapturedStmt.
   for (unsigned I = 0; I < CD->getNumParams(); ++I)
-    Writer.AddDeclRef(CD->getParam(I), Record);
+    Record.AddDeclRef(CD->getParam(I));
   Code = serialization::DECL_CAPTURED;
 }
 
 void ASTDeclWriter::VisitLinkageSpecDecl(LinkageSpecDecl *D) {
   VisitDecl(D);
   Record.push_back(D->getLanguage());
-  Writer.AddSourceLocation(D->getExternLoc(), Record);
-  Writer.AddSourceLocation(D->getRBraceLoc(), Record);
+  Record.AddSourceLocation(D->getExternLoc());
+  Record.AddSourceLocation(D->getRBraceLoc());
   Code = serialization::DECL_LINKAGE_SPEC;
 }
 
 void ASTDeclWriter::VisitLabelDecl(LabelDecl *D) {
   VisitNamedDecl(D);
-  Writer.AddSourceLocation(D->getLocStart(), Record);
+  Record.AddSourceLocation(D->getLocStart());
   Code = serialization::DECL_LABEL;
 }
 
@@ -1050,11 +1075,11 @@ void ASTDeclWriter::VisitNamespaceDecl(NamespaceDecl *D) {
   VisitRedeclarable(D);
   VisitNamedDecl(D);
   Record.push_back(D->isInline());
-  Writer.AddSourceLocation(D->getLocStart(), Record);
-  Writer.AddSourceLocation(D->getRBraceLoc(), Record);
+  Record.AddSourceLocation(D->getLocStart());
+  Record.AddSourceLocation(D->getRBraceLoc());
 
   if (D->isOriginalNamespace())
-    Writer.AddDeclRef(D->getAnonymousNamespace(), Record);
+    Record.AddDeclRef(D->getAnonymousNamespace());
   Code = serialization::DECL_NAMESPACE;
 
   if (Writer.hasChain() && D->isAnonymousNamespace() && 
@@ -1075,56 +1100,65 @@ void ASTDeclWriter::VisitNamespaceDecl(NamespaceDecl *D) {
 void ASTDeclWriter::VisitNamespaceAliasDecl(NamespaceAliasDecl *D) {
   VisitRedeclarable(D);
   VisitNamedDecl(D);
-  Writer.AddSourceLocation(D->getNamespaceLoc(), Record);
-  Writer.AddSourceLocation(D->getTargetNameLoc(), Record);
-  Writer.AddNestedNameSpecifierLoc(D->getQualifierLoc(), Record);
-  Writer.AddDeclRef(D->getNamespace(), Record);
+  Record.AddSourceLocation(D->getNamespaceLoc());
+  Record.AddSourceLocation(D->getTargetNameLoc());
+  Record.AddNestedNameSpecifierLoc(D->getQualifierLoc());
+  Record.AddDeclRef(D->getNamespace());
   Code = serialization::DECL_NAMESPACE_ALIAS;
 }
 
 void ASTDeclWriter::VisitUsingDecl(UsingDecl *D) {
   VisitNamedDecl(D);
-  Writer.AddSourceLocation(D->getUsingLoc(), Record);
-  Writer.AddNestedNameSpecifierLoc(D->getQualifierLoc(), Record);
-  Writer.AddDeclarationNameLoc(D->DNLoc, D->getDeclName(), Record);
-  Writer.AddDeclRef(D->FirstUsingShadow.getPointer(), Record);
+  Record.AddSourceLocation(D->getUsingLoc());
+  Record.AddNestedNameSpecifierLoc(D->getQualifierLoc());
+  Record.AddDeclarationNameLoc(D->DNLoc, D->getDeclName());
+  Record.AddDeclRef(D->FirstUsingShadow.getPointer());
   Record.push_back(D->hasTypename());
-  Writer.AddDeclRef(Context.getInstantiatedFromUsingDecl(D), Record);
+  Record.AddDeclRef(Context.getInstantiatedFromUsingDecl(D));
   Code = serialization::DECL_USING;
 }
 
 void ASTDeclWriter::VisitUsingShadowDecl(UsingShadowDecl *D) {
   VisitRedeclarable(D);
   VisitNamedDecl(D);
-  Writer.AddDeclRef(D->getTargetDecl(), Record);
-  Writer.AddDeclRef(D->UsingOrNextShadow, Record);
-  Writer.AddDeclRef(Context.getInstantiatedFromUsingShadowDecl(D), Record);
+  Record.AddDeclRef(D->getTargetDecl());
+  Record.AddDeclRef(D->UsingOrNextShadow);
+  Record.AddDeclRef(Context.getInstantiatedFromUsingShadowDecl(D));
   Code = serialization::DECL_USING_SHADOW;
 }
 
+void ASTDeclWriter::VisitConstructorUsingShadowDecl(
+    ConstructorUsingShadowDecl *D) {
+  VisitUsingShadowDecl(D);
+  Record.AddDeclRef(D->NominatedBaseClassShadowDecl);
+  Record.AddDeclRef(D->ConstructedBaseClassShadowDecl);
+  Record.push_back(D->IsVirtual);
+  Code = serialization::DECL_CONSTRUCTOR_USING_SHADOW;
+}
+
 void ASTDeclWriter::VisitUsingDirectiveDecl(UsingDirectiveDecl *D) {
   VisitNamedDecl(D);
-  Writer.AddSourceLocation(D->getUsingLoc(), Record);
-  Writer.AddSourceLocation(D->getNamespaceKeyLocation(), Record);
-  Writer.AddNestedNameSpecifierLoc(D->getQualifierLoc(), Record);
-  Writer.AddDeclRef(D->getNominatedNamespace(), Record);
-  Writer.AddDeclRef(dyn_cast<Decl>(D->getCommonAncestor()), Record);
+  Record.AddSourceLocation(D->getUsingLoc());
+  Record.AddSourceLocation(D->getNamespaceKeyLocation());
+  Record.AddNestedNameSpecifierLoc(D->getQualifierLoc());
+  Record.AddDeclRef(D->getNominatedNamespace());
+  Record.AddDeclRef(dyn_cast<Decl>(D->getCommonAncestor()));
   Code = serialization::DECL_USING_DIRECTIVE;
 }
 
 void ASTDeclWriter::VisitUnresolvedUsingValueDecl(UnresolvedUsingValueDecl *D) {
   VisitValueDecl(D);
-  Writer.AddSourceLocation(D->getUsingLoc(), Record);
-  Writer.AddNestedNameSpecifierLoc(D->getQualifierLoc(), Record);
-  Writer.AddDeclarationNameLoc(D->DNLoc, D->getDeclName(), Record);
+  Record.AddSourceLocation(D->getUsingLoc());
+  Record.AddNestedNameSpecifierLoc(D->getQualifierLoc());
+  Record.AddDeclarationNameLoc(D->DNLoc, D->getDeclName());
   Code = serialization::DECL_UNRESOLVED_USING_VALUE;
 }
 
 void ASTDeclWriter::VisitUnresolvedUsingTypenameDecl(
                                                UnresolvedUsingTypenameDecl *D) {
   VisitTypeDecl(D);
-  Writer.AddSourceLocation(D->getTypenameLoc(), Record);
-  Writer.AddNestedNameSpecifierLoc(D->getQualifierLoc(), Record);
+  Record.AddSourceLocation(D->getTypenameLoc());
+  Record.AddNestedNameSpecifierLoc(D->getQualifierLoc());
   Code = serialization::DECL_UNRESOLVED_USING_TYPENAME;
 }
 
@@ -1136,25 +1170,25 @@ void ASTDeclWriter::VisitCXXRecordDecl(CXXRecordDecl *D) {
   };
   if (ClassTemplateDecl *TemplD = D->getDescribedClassTemplate()) {
     Record.push_back(CXXRecTemplate);
-    Writer.AddDeclRef(TemplD, Record);
+    Record.AddDeclRef(TemplD);
   } else if (MemberSpecializationInfo *MSInfo
                = D->getMemberSpecializationInfo()) {
     Record.push_back(CXXRecMemberSpecialization);
-    Writer.AddDeclRef(MSInfo->getInstantiatedFrom(), Record);
+    Record.AddDeclRef(MSInfo->getInstantiatedFrom());
     Record.push_back(MSInfo->getTemplateSpecializationKind());
-    Writer.AddSourceLocation(MSInfo->getPointOfInstantiation(), Record);
+    Record.AddSourceLocation(MSInfo->getPointOfInstantiation());
   } else {
     Record.push_back(CXXRecNotTemplate);
   }
 
   Record.push_back(D->isThisDeclarationADefinition());
   if (D->isThisDeclarationADefinition())
-    Writer.AddCXXDefinitionData(D, Record);
+    Record.AddCXXDefinitionData(D);
 
   // Store (what we currently believe to be) the key function to avoid
   // deserializing every method so we can compute it.
   if (D->IsCompleteDefinition)
-    Writer.AddDeclRef(Context.getCurrentKeyFunction(D), Record);
+    Record.AddDeclRef(Context.getCurrentKeyFunction(D));
 
   Code = serialization::DECL_CXX_RECORD;
 }
@@ -1166,7 +1200,7 @@ void ASTDeclWriter::VisitCXXMethodDecl(CXXMethodDecl *D) {
     for (CXXMethodDecl::method_iterator
            I = D->begin_overridden_methods(), E = D->end_overridden_methods();
            I != E; ++I)
-      Writer.AddDeclRef(*I, Record);
+      Record.AddDeclRef(*I);
   } else {
     // We only need to record overridden methods once for the canonical decl.
     Record.push_back(0);
@@ -1187,18 +1221,27 @@ void ASTDeclWriter::VisitCXXMethodDecl(CXXMethodDecl *D) {
 }
 
 void ASTDeclWriter::VisitCXXConstructorDecl(CXXConstructorDecl *D) {
+  if (auto Inherited = D->getInheritedConstructor()) {
+    Record.AddDeclRef(Inherited.getShadowDecl());
+    Record.AddDeclRef(Inherited.getConstructor());
+    Code = serialization::DECL_CXX_INHERITED_CONSTRUCTOR;
+  } else {
+    Code = serialization::DECL_CXX_CONSTRUCTOR;
+  }
+
   VisitCXXMethodDecl(D);
 
-  Writer.AddDeclRef(D->getInheritedConstructor(), Record);
   Record.push_back(D->IsExplicitSpecified);
 
-  Code = serialization::DECL_CXX_CONSTRUCTOR;
+  Code = D->isInheritingConstructor()
+             ? serialization::DECL_CXX_INHERITED_CONSTRUCTOR
+             : serialization::DECL_CXX_CONSTRUCTOR;
 }
 
 void ASTDeclWriter::VisitCXXDestructorDecl(CXXDestructorDecl *D) {
   VisitCXXMethodDecl(D);
 
-  Writer.AddDeclRef(D->getOperatorDelete(), Record);
+  Record.AddDeclRef(D->getOperatorDelete());
 
   Code = serialization::DECL_CXX_DESTRUCTOR;
 }
@@ -1215,11 +1258,11 @@ void ASTDeclWriter::VisitImportDecl(ImportDecl *D) {
   ArrayRef<SourceLocation> IdentifierLocs = D->getIdentifierLocs();
   Record.push_back(!IdentifierLocs.empty());
   if (IdentifierLocs.empty()) {
-    Writer.AddSourceLocation(D->getLocEnd(), Record);
+    Record.AddSourceLocation(D->getLocEnd());
     Record.push_back(1);
   } else {
     for (unsigned I = 0, N = IdentifierLocs.size(); I != N; ++I)
-      Writer.AddSourceLocation(IdentifierLocs[I], Record);
+      Record.AddSourceLocation(IdentifierLocs[I]);
     Record.push_back(IdentifierLocs.size());
   }
   // Note: the number of source locations must always be the last element in
@@ -1229,7 +1272,7 @@ void ASTDeclWriter::VisitImportDecl(ImportDecl *D) {
 
 void ASTDeclWriter::VisitAccessSpecDecl(AccessSpecDecl *D) {
   VisitDecl(D);
-  Writer.AddSourceLocation(D->getColonLoc(), Record);
+  Record.AddSourceLocation(D->getColonLoc());
   Code = serialization::DECL_ACCESS_SPEC;
 }
 
@@ -1241,15 +1284,14 @@ void ASTDeclWriter::VisitFriendDecl(FriendDecl *D) {
   bool hasFriendDecl = D->Friend.is<NamedDecl*>();
   Record.push_back(hasFriendDecl);
   if (hasFriendDecl)
-    Writer.AddDeclRef(D->getFriendDecl(), Record);
+    Record.AddDeclRef(D->getFriendDecl());
   else
-    Writer.AddTypeSourceInfo(D->getFriendType(), Record);
+    Record.AddTypeSourceInfo(D->getFriendType());
   for (unsigned i = 0; i < D->NumTPLists; ++i)
-    Writer.AddTemplateParameterList(D->getFriendTypeTemplateParameterList(i),
-                                    Record);
-  Writer.AddDeclRef(D->getNextFriend(), Record);
+    Record.AddTemplateParameterList(D->getFriendTypeTemplateParameterList(i));
+  Record.AddDeclRef(D->getNextFriend());
   Record.push_back(D->UnsupportedFriend);
-  Writer.AddSourceLocation(D->FriendLoc, Record);
+  Record.AddSourceLocation(D->FriendLoc);
   Code = serialization::DECL_FRIEND;
 }
 
@@ -1257,21 +1299,21 @@ void ASTDeclWriter::VisitFriendTemplateDecl(FriendTemplateDecl *D) {
   VisitDecl(D);
   Record.push_back(D->getNumTemplateParameters());
   for (unsigned i = 0, e = D->getNumTemplateParameters(); i != e; ++i)
-    Writer.AddTemplateParameterList(D->getTemplateParameterList(i), Record);
+    Record.AddTemplateParameterList(D->getTemplateParameterList(i));
   Record.push_back(D->getFriendDecl() != nullptr);
   if (D->getFriendDecl())
-    Writer.AddDeclRef(D->getFriendDecl(), Record);
+    Record.AddDeclRef(D->getFriendDecl());
   else
-    Writer.AddTypeSourceInfo(D->getFriendType(), Record);
-  Writer.AddSourceLocation(D->getFriendLoc(), Record);
+    Record.AddTypeSourceInfo(D->getFriendType());
+  Record.AddSourceLocation(D->getFriendLoc());
   Code = serialization::DECL_FRIEND_TEMPLATE;
 }
 
 void ASTDeclWriter::VisitTemplateDecl(TemplateDecl *D) {
   VisitNamedDecl(D);
 
-  Writer.AddDeclRef(D->getTemplatedDecl(), Record);
-  Writer.AddTemplateParameterList(D->getTemplateParameters(), Record);
+  Record.AddDeclRef(D->getTemplatedDecl());
+  Record.AddTemplateParameterList(D->getTemplateParameters());
 }
 
 void ASTDeclWriter::VisitRedeclarableTemplateDecl(RedeclarableTemplateDecl *D) {
@@ -1281,7 +1323,7 @@ void ASTDeclWriter::VisitRedeclarableTemplateDecl(RedeclarableTemplateDecl *D) {
   // getCommonPtr() can be used while this is still initializing.
   if (D->isFirstDecl()) {
     // This declaration owns the 'common' pointer, so serialize that data now.
-    Writer.AddDeclRef(D->getInstantiatedFromMemberTemplate(), Record);
+    Record.AddDeclRef(D->getInstantiatedFromMemberTemplate());
     if (D->getInstantiatedFromMemberTemplate())
       Record.push_back(D->isMemberSpecialization());
   }
@@ -1308,28 +1350,27 @@ void ASTDeclWriter::VisitClassTemplateSpecializationDecl(
                      ClassTemplatePartialSpecializationDecl *> InstFrom
     = D->getSpecializedTemplateOrPartial();
   if (Decl *InstFromD = InstFrom.dyn_cast<ClassTemplateDecl *>()) {
-    Writer.AddDeclRef(InstFromD, Record);
+    Record.AddDeclRef(InstFromD);
   } else {
-    Writer.AddDeclRef(InstFrom.get<ClassTemplatePartialSpecializationDecl *>(),
-                      Record);
-    Writer.AddTemplateArgumentList(&D->getTemplateInstantiationArgs(), Record);
+    Record.AddDeclRef(InstFrom.get<ClassTemplatePartialSpecializationDecl *>());
+    Record.AddTemplateArgumentList(&D->getTemplateInstantiationArgs());
   }
 
-  Writer.AddTemplateArgumentList(&D->getTemplateArgs(), Record);
-  Writer.AddSourceLocation(D->getPointOfInstantiation(), Record);
+  Record.AddTemplateArgumentList(&D->getTemplateArgs());
+  Record.AddSourceLocation(D->getPointOfInstantiation());
   Record.push_back(D->getSpecializationKind());
   Record.push_back(D->isCanonicalDecl());
 
   if (D->isCanonicalDecl()) {
     // When reading, we'll add it to the folding set of the following template. 
-    Writer.AddDeclRef(D->getSpecializedTemplate()->getCanonicalDecl(), Record);
+    Record.AddDeclRef(D->getSpecializedTemplate()->getCanonicalDecl());
   }
 
   // Explicit info.
-  Writer.AddTypeSourceInfo(D->getTypeAsWritten(), Record);
+  Record.AddTypeSourceInfo(D->getTypeAsWritten());
   if (D->getTypeAsWritten()) {
-    Writer.AddSourceLocation(D->getExternLoc(), Record);
-    Writer.AddSourceLocation(D->getTemplateKeywordLoc(), Record);
+    Record.AddSourceLocation(D->getExternLoc());
+    Record.AddSourceLocation(D->getTemplateKeywordLoc());
   }
 
   Code = serialization::DECL_CLASS_TEMPLATE_SPECIALIZATION;
@@ -1339,12 +1380,12 @@ void ASTDeclWriter::VisitClassTemplatePartialSpecializationDecl(
                                     ClassTemplatePartialSpecializationDecl *D) {
   VisitClassTemplateSpecializationDecl(D);
 
-  Writer.AddTemplateParameterList(D->getTemplateParameters(), Record);
-  Writer.AddASTTemplateArgumentListInfo(D->getTemplateArgsAsWritten(), Record);
+  Record.AddTemplateParameterList(D->getTemplateParameters());
+  Record.AddASTTemplateArgumentListInfo(D->getTemplateArgsAsWritten());
 
   // These are read/set from/to the first declaration.
   if (D->getPreviousDecl() == nullptr) {
-    Writer.AddDeclRef(D->getInstantiatedFromMember(), Record);
+    Record.AddDeclRef(D->getInstantiatedFromMember());
     Record.push_back(D->isMemberSpecialization());
   }
 
@@ -1368,28 +1409,27 @@ void ASTDeclWriter::VisitVarTemplateSpecializationDecl(
   llvm::PointerUnion<VarTemplateDecl *, VarTemplatePartialSpecializationDecl *>
   InstFrom = D->getSpecializedTemplateOrPartial();
   if (Decl *InstFromD = InstFrom.dyn_cast<VarTemplateDecl *>()) {
-    Writer.AddDeclRef(InstFromD, Record);
+    Record.AddDeclRef(InstFromD);
   } else {
-    Writer.AddDeclRef(InstFrom.get<VarTemplatePartialSpecializationDecl *>(),
-                      Record);
-    Writer.AddTemplateArgumentList(&D->getTemplateInstantiationArgs(), Record);
+    Record.AddDeclRef(InstFrom.get<VarTemplatePartialSpecializationDecl *>());
+    Record.AddTemplateArgumentList(&D->getTemplateInstantiationArgs());
   }
 
   // Explicit info.
-  Writer.AddTypeSourceInfo(D->getTypeAsWritten(), Record);
+  Record.AddTypeSourceInfo(D->getTypeAsWritten());
   if (D->getTypeAsWritten()) {
-    Writer.AddSourceLocation(D->getExternLoc(), Record);
-    Writer.AddSourceLocation(D->getTemplateKeywordLoc(), Record);
+    Record.AddSourceLocation(D->getExternLoc());
+    Record.AddSourceLocation(D->getTemplateKeywordLoc());
   }
 
-  Writer.AddTemplateArgumentList(&D->getTemplateArgs(), Record);
-  Writer.AddSourceLocation(D->getPointOfInstantiation(), Record);
+  Record.AddTemplateArgumentList(&D->getTemplateArgs());
+  Record.AddSourceLocation(D->getPointOfInstantiation());
   Record.push_back(D->getSpecializationKind());
   Record.push_back(D->isCanonicalDecl());
 
   if (D->isCanonicalDecl()) {
     // When reading, we'll add it to the folding set of the following template.
-    Writer.AddDeclRef(D->getSpecializedTemplate()->getCanonicalDecl(), Record);
+    Record.AddDeclRef(D->getSpecializedTemplate()->getCanonicalDecl());
   }
 
   Code = serialization::DECL_VAR_TEMPLATE_SPECIALIZATION;
@@ -1399,12 +1439,12 @@ void ASTDeclWriter::VisitVarTemplatePartialSpecializationDecl(
     VarTemplatePartialSpecializationDecl *D) {
   VisitVarTemplateSpecializationDecl(D);
 
-  Writer.AddTemplateParameterList(D->getTemplateParameters(), Record);
-  Writer.AddASTTemplateArgumentListInfo(D->getTemplateArgsAsWritten(), Record);
+  Record.AddTemplateParameterList(D->getTemplateParameters());
+  Record.AddASTTemplateArgumentListInfo(D->getTemplateArgsAsWritten());
 
   // These are read/set from/to the first declaration.
   if (D->getPreviousDecl() == nullptr) {
-    Writer.AddDeclRef(D->getInstantiatedFromMember(), Record);
+    Record.AddDeclRef(D->getInstantiatedFromMember());
     Record.push_back(D->isMemberSpecialization());
   }
 
@@ -1414,7 +1454,7 @@ void ASTDeclWriter::VisitVarTemplatePartialSpecializationDecl(
 void ASTDeclWriter::VisitClassScopeFunctionSpecializationDecl(
                                     ClassScopeFunctionSpecializationDecl *D) {
   VisitDecl(D);
-  Writer.AddDeclRef(D->getSpecialization(), Record);
+  Record.AddDeclRef(D->getSpecialization());
   Code = serialization::DECL_CLASS_SCOPE_FUNCTION_SPECIALIZATION;
 }
 
@@ -1436,7 +1476,7 @@ void ASTDeclWriter::VisitTemplateTypeParmDecl(TemplateTypeParmDecl *D) {
                         !D->defaultArgumentWasInherited();
   Record.push_back(OwnsDefaultArg);
   if (OwnsDefaultArg)
-    Writer.AddTypeSourceInfo(D->getDefaultArgumentInfo(), Record);
+    Record.AddTypeSourceInfo(D->getDefaultArgumentInfo());
 
   Code = serialization::DECL_TEMPLATE_TYPE_PARM;
 }
@@ -1455,8 +1495,8 @@ void ASTDeclWriter::VisitNonTypeTemplateParmDecl(NonTypeTemplateParmDecl *D) {
   
   if (D->isExpandedParameterPack()) {
     for (unsigned I = 0, N = D->getNumExpansionTypes(); I != N; ++I) {
-      Writer.AddTypeRef(D->getExpansionType(I), Record);
-      Writer.AddTypeSourceInfo(D->getExpansionTypeSourceInfo(I), Record);
+      Record.AddTypeRef(D->getExpansionType(I));
+      Record.AddTypeSourceInfo(D->getExpansionTypeSourceInfo(I));
     }
       
     Code = serialization::DECL_EXPANDED_NON_TYPE_TEMPLATE_PARM_PACK;
@@ -1467,7 +1507,7 @@ void ASTDeclWriter::VisitNonTypeTemplateParmDecl(NonTypeTemplateParmDecl *D) {
                           !D->defaultArgumentWasInherited();
     Record.push_back(OwnsDefaultArg);
     if (OwnsDefaultArg)
-      Writer.AddStmt(D->getDefaultArgument());
+      Record.AddStmt(D->getDefaultArgument());
     Code = serialization::DECL_NON_TYPE_TEMPLATE_PARM;
   }
 }
@@ -1487,8 +1527,7 @@ void ASTDeclWriter::VisitTemplateTemplateParmDecl(TemplateTemplateParmDecl *D) {
   if (D->isExpandedParameterPack()) {
     for (unsigned I = 0, N = D->getNumExpansionTemplateParameters();
          I != N; ++I)
-      Writer.AddTemplateParameterList(D->getExpansionTemplateParameters(I),
-                                      Record);
+      Record.AddTemplateParameterList(D->getExpansionTemplateParameters(I));
     Code = serialization::DECL_EXPANDED_TEMPLATE_TEMPLATE_PARM_PACK;
   } else {
     // Rest of TemplateTemplateParmDecl.
@@ -1497,7 +1536,7 @@ void ASTDeclWriter::VisitTemplateTemplateParmDecl(TemplateTemplateParmDecl *D) {
                           !D->defaultArgumentWasInherited();
     Record.push_back(OwnsDefaultArg);
     if (OwnsDefaultArg)
-      Writer.AddTemplateArgumentLoc(D->getDefaultArgument(), Record);
+      Record.AddTemplateArgumentLoc(D->getDefaultArgument());
     Code = serialization::DECL_TEMPLATE_TEMPLATE_PARM;
   }
 }
@@ -1509,41 +1548,20 @@ void ASTDeclWriter::VisitTypeAliasTemplateDecl(TypeAliasTemplateDecl *D) {
 
 void ASTDeclWriter::VisitStaticAssertDecl(StaticAssertDecl *D) {
   VisitDecl(D);
-  Writer.AddStmt(D->getAssertExpr());
+  Record.AddStmt(D->getAssertExpr());
   Record.push_back(D->isFailed());
-  Writer.AddStmt(D->getMessage());
-  Writer.AddSourceLocation(D->getRParenLoc(), Record);
+  Record.AddStmt(D->getMessage());
+  Record.AddSourceLocation(D->getRParenLoc());
   Code = serialization::DECL_STATIC_ASSERT;
 }
 
 /// \brief Emit the DeclContext part of a declaration context decl.
-///
-/// \param LexicalOffset the offset at which the DECL_CONTEXT_LEXICAL
-/// block for this declaration context is stored. May be 0 to indicate
-/// that there are no declarations stored within this context.
-///
-/// \param VisibleOffset the offset at which the DECL_CONTEXT_VISIBLE
-/// block for this declaration context is stored. May be 0 to indicate
-/// that there are no declarations visible from this context. Note
-/// that this value will not be emitted for non-primary declaration
-/// contexts.
-void ASTDeclWriter::VisitDeclContext(DeclContext *DC, uint64_t LexicalOffset,
-                                     uint64_t VisibleOffset) {
-  Record.push_back(LexicalOffset);
-  Record.push_back(VisibleOffset);
+void ASTDeclWriter::VisitDeclContext(DeclContext *DC) {
+  Record.AddOffset(Writer.WriteDeclContextLexicalBlock(Context, DC));
+  Record.AddOffset(Writer.WriteDeclContextVisibleBlock(Context, DC));
 }
 
 const Decl *ASTWriter::getFirstLocalDecl(const Decl *D) {
-  /// \brief Is this a local declaration (that is, one that will be written to
-  /// our AST file)? This is the case for declarations that are neither imported
-  /// from another AST file nor predefined.
-  auto IsLocalDecl = [&](const Decl *D) -> bool {
-    if (D->isFromASTFile())
-      return false;
-    auto I = DeclIDs.find(D);
-    return (I == DeclIDs.end() || I->second >= NUM_PREDEF_DECL_IDS);
-  };
-
   assert(IsLocalDecl(D) && "expected a local declaration");
 
   const Decl *Canon = D->getCanonicalDecl();
@@ -1569,7 +1587,7 @@ void ASTDeclWriter::VisitRedeclarable(Redeclarable<T> *D) {
     assert(isRedeclarableDeclKind(DAsT->getKind()) &&
            "Not considered redeclarable?");
 
-    Writer.AddDeclRef(First, Record);
+    Record.AddDeclRef(First);
 
     // Write out a list of local redeclarations of this declaration if it's the
     // first local declaration in the chain.
@@ -1587,23 +1605,22 @@ void ASTDeclWriter::VisitRedeclarable(Redeclarable<T> *D) {
 
       // Collect the set of local redeclarations of this declaration, from
       // newest to oldest.
-      RecordData LocalRedecls;
+      ASTWriter::RecordData LocalRedecls;
+      ASTRecordWriter LocalRedeclWriter(Record, LocalRedecls);
       for (const Decl *Prev = FirstLocal->getMostRecentDecl();
            Prev != FirstLocal; Prev = Prev->getPreviousDecl())
         if (!Prev->isFromASTFile())
-          Writer.AddDeclRef(Prev, LocalRedecls);
+          LocalRedeclWriter.AddDeclRef(Prev);
 
       // If we have any redecls, write them now as a separate record preceding
       // the declaration itself.
       if (LocalRedecls.empty())
         Record.push_back(0);
-      else {
-        Record.push_back(Writer.Stream.GetCurrentBitNo());
-        Writer.Stream.EmitRecord(LOCAL_REDECLARATIONS, LocalRedecls);
-      }
+      else
+        Record.AddOffset(LocalRedeclWriter.Emit(LOCAL_REDECLARATIONS));
     } else {
       Record.push_back(0);
-      Writer.AddDeclRef(FirstLocal, Record);
+      Record.AddDeclRef(FirstLocal);
     }
 
     // Make sure that we serialize both the previous and the most-recent 
@@ -1624,10 +1641,24 @@ void ASTDeclWriter::VisitOMPThreadPrivateDecl(OMPThreadPrivateDecl *D) {
   Record.push_back(D->varlist_size());
   VisitDecl(D);
   for (auto *I : D->varlists())
-    Writer.AddStmt(I);
+    Record.AddStmt(I);
   Code = serialization::DECL_OMP_THREADPRIVATE;
 }
 
+void ASTDeclWriter::VisitOMPDeclareReductionDecl(OMPDeclareReductionDecl *D) {
+  VisitValueDecl(D);
+  Record.AddSourceLocation(D->getLocStart());
+  Record.AddStmt(D->getCombiner());
+  Record.AddStmt(D->getInitializer());
+  Record.AddDeclRef(D->getPrevDeclInScope());
+  Code = serialization::DECL_OMP_DECLARE_REDUCTION;
+}
+
+void ASTDeclWriter::VisitOMPCapturedExprDecl(OMPCapturedExprDecl *D) {
+  VisitVarDecl(D);
+  Code = serialization::DECL_OMP_CAPTUREDEXPR;
+}
+
 //===----------------------------------------------------------------------===//
 // ASTWriter Implementation
 //===----------------------------------------------------------------------===//
@@ -1738,6 +1769,7 @@ void ASTWriter::WriteDeclAbbrevs() {
   Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // IsFreeStanding
   Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // IsCompleteDefinitionRequired
   Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6));   // SourceLocation
+  Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6));   // SourceLocation
   Abv->Add(BitCodeAbbrevOp(0));                         // ExtInfoKind
   // EnumDecl
   Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6));   // AddTypeRef
@@ -1786,6 +1818,7 @@ void ASTWriter::WriteDeclAbbrevs() {
   Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // IsFreeStanding
   Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // IsCompleteDefinitionRequired
   Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6));   // SourceLocation
+  Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6));   // SourceLocation
   Abv->Add(BitCodeAbbrevOp(0));                         // ExtInfoKind
   // RecordDecl
   Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // FlexibleArrayMember
@@ -1907,6 +1940,8 @@ void ASTWriter::WriteDeclAbbrevs() {
   Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // isNRVOVariable
   Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // isCXXForRangeDecl
   Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // isARCPseudoStrong
+  Abv->Add(BitCodeAbbrevOp(0));                         // isInline
+  Abv->Add(BitCodeAbbrevOp(0));                         // isInlineSpecified
   Abv->Add(BitCodeAbbrevOp(0));                         // isConstexpr
   Abv->Add(BitCodeAbbrevOp(0));                         // isInitCapture
   Abv->Add(BitCodeAbbrevOp(0));                         // isPrevDeclInSameScope
@@ -2082,8 +2117,10 @@ static bool isRequiredDecl(const Decl *D, ASTContext &Context,
   // An ObjCMethodDecl is never considered as "required" because its
   // implementation container always is.
 
-  // File scoped assembly or obj-c implementation must be seen.
-  if (isa<FileScopeAsmDecl>(D) || isa<ObjCImplDecl>(D))
+  // File scoped assembly or obj-c or OMP declare target implementation must be
+  // seen.
+  if (isa<FileScopeAsmDecl>(D) || isa<ObjCImplDecl>(D) ||
+      D->hasAttr<OMPDeclareTargetDeclAttr>())
     return true;
 
   // ImportDecl is used by codegen to determine the set of imported modules to
@@ -2096,12 +2133,6 @@ static bool isRequiredDecl(const Decl *D, ASTContext &Context,
 }
 
 void ASTWriter::WriteDecl(ASTContext &Context, Decl *D) {
-  // Switch case IDs are per Decl.
-  ClearSwitchCaseIDs();
-
-  RecordData Record;
-  ASTDeclWriter W(*this, Context, Record);
-
   // Determine the ID for this declaration.
   serialization::DeclID ID;
   assert(!D->isFromASTFile() && "should not be emitting imported decl");
@@ -2111,66 +2142,34 @@ void ASTWriter::WriteDecl(ASTContext &Context, Decl *D) {
     
   ID = IDR;
 
-  bool isReplacingADecl = ID < FirstDeclID;
-
-  // If this declaration is also a DeclContext, write blocks for the
-  // declarations that lexically stored inside its context and those
-  // declarations that are visible from its context. These blocks
-  // are written before the declaration itself so that we can put
-  // their offsets into the record for the declaration.
-  uint64_t LexicalOffset = 0;
-  uint64_t VisibleOffset = 0;
-  DeclContext *DC = dyn_cast<DeclContext>(D);
-  if (DC) {
-    if (isReplacingADecl) {
-      // It is replacing a decl from a chained PCH; make sure that the
-      // DeclContext is fully loaded.
-      if (DC->hasExternalLexicalStorage())
-        DC->LoadLexicalDeclsFromExternalStorage();
-      if (DC->hasExternalVisibleStorage())
-        Chain->completeVisibleDeclsMap(DC);
-    }
-    LexicalOffset = WriteDeclContextLexicalBlock(Context, DC);
-    VisibleOffset = WriteDeclContextVisibleBlock(Context, DC);
-  }
+  assert(ID >= FirstDeclID && "invalid decl ID");
   
+  RecordData Record;
+  ASTDeclWriter W(*this, Context, Record);
+
   // Build a record for this declaration
-  Record.clear();
-  W.Code = (serialization::DeclCode)0;
-  W.AbbrevToUse = 0;
   W.Visit(D);
-  if (DC) W.VisitDeclContext(DC, LexicalOffset, VisibleOffset);
 
-  if (isReplacingADecl) {
-    // We're replacing a decl in a previous file.
-    ReplacedDecls.push_back(ReplacedDeclInfo(ID, Stream.GetCurrentBitNo(),
-                                             D->getLocation()));
+  // Emit this declaration to the bitstream.
+  uint64_t Offset = W.Emit(D);
+
+  // Record the offset for this declaration
+  SourceLocation Loc = D->getLocation();
+  unsigned Index = ID - FirstDeclID;
+  if (DeclOffsets.size() == Index)
+    DeclOffsets.push_back(DeclOffset(Loc, Offset));
+  else if (DeclOffsets.size() < Index) {
+    // FIXME: Can/should this happen?
+    DeclOffsets.resize(Index+1);
+    DeclOffsets[Index].setLocation(Loc);
+    DeclOffsets[Index].BitOffset = Offset;
   } else {
-    unsigned Index = ID - FirstDeclID;
-
-    // Record the offset for this declaration
-    SourceLocation Loc = D->getLocation();
-    if (DeclOffsets.size() == Index)
-      DeclOffsets.push_back(DeclOffset(Loc, Stream.GetCurrentBitNo()));
-    else if (DeclOffsets.size() < Index) {
-      DeclOffsets.resize(Index+1);
-      DeclOffsets[Index].setLocation(Loc);
-      DeclOffsets[Index].BitOffset = Stream.GetCurrentBitNo();
-    }
-
-    SourceManager &SM = Context.getSourceManager();
-    if (Loc.isValid() && SM.isLocalSourceLocation(Loc))
-      associateDeclWithFile(D, ID);
+    llvm_unreachable("declarations should be emitted in ID order");
   }
 
-  if (!W.Code)
-    llvm::report_fatal_error(StringRef("unexpected declaration kind '") +
-                            D->getDeclKindName() + "'");
-  Stream.EmitRecord(W.Code, Record, W.AbbrevToUse);
-
-  // Flush any expressions, base specifiers, and ctor initializers that
-  // were written as part of this declaration.
-  FlushPendingAfterDecl();
+  SourceManager &SM = Context.getSourceManager();
+  if (Loc.isValid() && SM.isLocalSourceLocation(Loc))
+    associateDeclWithFile(D, ID);
 
   // Note declarations that should be deserialized eagerly so that we can add
   // them to a record in the AST file later.
@@ -2178,10 +2177,16 @@ void ASTWriter::WriteDecl(ASTContext &Context, Decl *D) {
     EagerlyDeserializedDecls.push_back(ID);
 }
 
-void ASTWriter::AddFunctionDefinition(const FunctionDecl *FD,
-                                      RecordData &Record) {
-  ClearSwitchCaseIDs();
+void ASTRecordWriter::AddFunctionDefinition(const FunctionDecl *FD) {
+  // Switch case IDs are per function body.
+  Writer->ClearSwitchCaseIDs();
 
-  ASTDeclWriter W(*this, FD->getASTContext(), Record);
-  W.AddFunctionDefinition(FD);
+  assert(FD->doesThisDeclarationHaveABody());
+  if (auto *CD = dyn_cast<CXXConstructorDecl>(FD)) {
+    Record->push_back(CD->getNumCtorInitializers());
+    if (CD->getNumCtorInitializers())
+      AddCXXCtorInitializers(
+          llvm::makeArrayRef(CD->init_begin(), CD->init_end()));
+  }
+  AddStmt(FD->getBody());
 }
diff --git a/contrib/llvm/tools/clang/lib/Serialization/ASTWriterStmt.cpp b/contrib/llvm/tools/clang/lib/Serialization/ASTWriterStmt.cpp
index 000a2185f5f0..84e718e9ef23 100644
--- a/contrib/llvm/tools/clang/lib/Serialization/ASTWriterStmt.cpp
+++ b/contrib/llvm/tools/clang/lib/Serialization/ASTWriterStmt.cpp
@@ -29,16 +29,24 @@ using namespace clang;
 namespace clang {
 
   class ASTStmtWriter : public StmtVisitor<ASTStmtWriter, void> {
-    friend class OMPClauseWriter;
     ASTWriter &Writer;
-    ASTWriter::RecordData &Record;
+    ASTRecordWriter Record;
 
-  public:
     serialization::StmtCode Code;
     unsigned AbbrevToUse;
 
+  public:
     ASTStmtWriter(ASTWriter &Writer, ASTWriter::RecordData &Record)
-      : Writer(Writer), Record(Record) { }
+        : Writer(Writer), Record(Writer, Record),
+          Code(serialization::STMT_NULL_PTR), AbbrevToUse(0) {}
+
+    ASTStmtWriter(const ASTStmtWriter&) = delete;
+
+    uint64_t Emit() {
+      assert(Code != serialization::STMT_NULL_PTR &&
+             "unhandled sub-statement writing AST file");
+      return Record.EmitStmt(Code, AbbrevToUse);
+    }
 
     void AddTemplateKWAndArgsInfo(const ASTTemplateKWAndArgsInfo &ArgInfo,
                                   const TemplateArgumentLoc *Args);
@@ -52,11 +60,11 @@ namespace clang {
 
 void ASTStmtWriter::AddTemplateKWAndArgsInfo(
     const ASTTemplateKWAndArgsInfo &ArgInfo, const TemplateArgumentLoc *Args) {
-  Writer.AddSourceLocation(ArgInfo.TemplateKWLoc, Record);
-  Writer.AddSourceLocation(ArgInfo.LAngleLoc, Record);
-  Writer.AddSourceLocation(ArgInfo.RAngleLoc, Record);
+  Record.AddSourceLocation(ArgInfo.TemplateKWLoc);
+  Record.AddSourceLocation(ArgInfo.LAngleLoc);
+  Record.AddSourceLocation(ArgInfo.RAngleLoc);
   for (unsigned i = 0; i != ArgInfo.NumTemplateArgs; ++i)
-    Writer.AddTemplateArgumentLoc(Args[i], Record);
+    Record.AddTemplateArgumentLoc(Args[i]);
 }
 
 void ASTStmtWriter::VisitStmt(Stmt *S) {
@@ -64,7 +72,7 @@ void ASTStmtWriter::VisitStmt(Stmt *S) {
 
 void ASTStmtWriter::VisitNullStmt(NullStmt *S) {
   VisitStmt(S);
-  Writer.AddSourceLocation(S->getSemiLoc(), Record);
+  Record.AddSourceLocation(S->getSemiLoc());
   Record.push_back(S->HasLeadingEmptyMacro);
   Code = serialization::STMT_NULL;
 }
@@ -73,68 +81,71 @@ void ASTStmtWriter::VisitCompoundStmt(CompoundStmt *S) {
   VisitStmt(S);
   Record.push_back(S->size());
   for (auto *CS : S->body())
-    Writer.AddStmt(CS);
-  Writer.AddSourceLocation(S->getLBracLoc(), Record);
-  Writer.AddSourceLocation(S->getRBracLoc(), Record);
+    Record.AddStmt(CS);
+  Record.AddSourceLocation(S->getLBracLoc());
+  Record.AddSourceLocation(S->getRBracLoc());
   Code = serialization::STMT_COMPOUND;
 }
 
 void ASTStmtWriter::VisitSwitchCase(SwitchCase *S) {
   VisitStmt(S);
   Record.push_back(Writer.getSwitchCaseID(S));
-  Writer.AddSourceLocation(S->getKeywordLoc(), Record);
-  Writer.AddSourceLocation(S->getColonLoc(), Record);
+  Record.AddSourceLocation(S->getKeywordLoc());
+  Record.AddSourceLocation(S->getColonLoc());
 }
 
 void ASTStmtWriter::VisitCaseStmt(CaseStmt *S) {
   VisitSwitchCase(S);
-  Writer.AddStmt(S->getLHS());
-  Writer.AddStmt(S->getRHS());
-  Writer.AddStmt(S->getSubStmt());
-  Writer.AddSourceLocation(S->getEllipsisLoc(), Record);
+  Record.AddStmt(S->getLHS());
+  Record.AddStmt(S->getRHS());
+  Record.AddStmt(S->getSubStmt());
+  Record.AddSourceLocation(S->getEllipsisLoc());
   Code = serialization::STMT_CASE;
 }
 
 void ASTStmtWriter::VisitDefaultStmt(DefaultStmt *S) {
   VisitSwitchCase(S);
-  Writer.AddStmt(S->getSubStmt());
+  Record.AddStmt(S->getSubStmt());
   Code = serialization::STMT_DEFAULT;
 }
 
 void ASTStmtWriter::VisitLabelStmt(LabelStmt *S) {
   VisitStmt(S);
-  Writer.AddDeclRef(S->getDecl(), Record);
-  Writer.AddStmt(S->getSubStmt());
-  Writer.AddSourceLocation(S->getIdentLoc(), Record);
+  Record.AddDeclRef(S->getDecl());
+  Record.AddStmt(S->getSubStmt());
+  Record.AddSourceLocation(S->getIdentLoc());
   Code = serialization::STMT_LABEL;
 }
 
 void ASTStmtWriter::VisitAttributedStmt(AttributedStmt *S) {
   VisitStmt(S);
   Record.push_back(S->getAttrs().size());
-  Writer.WriteAttributes(S->getAttrs(), Record);
-  Writer.AddStmt(S->getSubStmt());
-  Writer.AddSourceLocation(S->getAttrLoc(), Record);
+  Record.AddAttributes(S->getAttrs());
+  Record.AddStmt(S->getSubStmt());
+  Record.AddSourceLocation(S->getAttrLoc());
   Code = serialization::STMT_ATTRIBUTED;
 }
 
 void ASTStmtWriter::VisitIfStmt(IfStmt *S) {
   VisitStmt(S);
-  Writer.AddDeclRef(S->getConditionVariable(), Record);
-  Writer.AddStmt(S->getCond());
-  Writer.AddStmt(S->getThen());
-  Writer.AddStmt(S->getElse());
-  Writer.AddSourceLocation(S->getIfLoc(), Record);
-  Writer.AddSourceLocation(S->getElseLoc(), Record);
+  Record.push_back(S->isConstexpr());
+  Record.AddStmt(S->getInit());
+  Record.AddDeclRef(S->getConditionVariable());
+  Record.AddStmt(S->getCond());
+  Record.AddStmt(S->getThen());
+  Record.AddStmt(S->getElse());
+  Record.AddSourceLocation(S->getIfLoc());
+  Record.AddSourceLocation(S->getElseLoc());
   Code = serialization::STMT_IF;
 }
 
 void ASTStmtWriter::VisitSwitchStmt(SwitchStmt *S) {
   VisitStmt(S);
-  Writer.AddDeclRef(S->getConditionVariable(), Record);
-  Writer.AddStmt(S->getCond());
-  Writer.AddStmt(S->getBody());
-  Writer.AddSourceLocation(S->getSwitchLoc(), Record);
+  Record.AddStmt(S->getInit());
+  Record.AddDeclRef(S->getConditionVariable());
+  Record.AddStmt(S->getCond());
+  Record.AddStmt(S->getBody());
+  Record.AddSourceLocation(S->getSwitchLoc());
   Record.push_back(S->isAllEnumCasesCovered());
   for (SwitchCase *SC = S->getSwitchCaseList(); SC;
        SC = SC->getNextSwitchCase())
@@ -144,79 +155,79 @@ void ASTStmtWriter::VisitSwitchStmt(SwitchStmt *S) {
 
 void ASTStmtWriter::VisitWhileStmt(WhileStmt *S) {
   VisitStmt(S);
-  Writer.AddDeclRef(S->getConditionVariable(), Record);
-  Writer.AddStmt(S->getCond());
-  Writer.AddStmt(S->getBody());
-  Writer.AddSourceLocation(S->getWhileLoc(), Record);
+  Record.AddDeclRef(S->getConditionVariable());
+  Record.AddStmt(S->getCond());
+  Record.AddStmt(S->getBody());
+  Record.AddSourceLocation(S->getWhileLoc());
   Code = serialization::STMT_WHILE;
 }
 
 void ASTStmtWriter::VisitDoStmt(DoStmt *S) {
   VisitStmt(S);
-  Writer.AddStmt(S->getCond());
-  Writer.AddStmt(S->getBody());
-  Writer.AddSourceLocation(S->getDoLoc(), Record);
-  Writer.AddSourceLocation(S->getWhileLoc(), Record);
-  Writer.AddSourceLocation(S->getRParenLoc(), Record);
+  Record.AddStmt(S->getCond());
+  Record.AddStmt(S->getBody());
+  Record.AddSourceLocation(S->getDoLoc());
+  Record.AddSourceLocation(S->getWhileLoc());
+  Record.AddSourceLocation(S->getRParenLoc());
   Code = serialization::STMT_DO;
 }
 
 void ASTStmtWriter::VisitForStmt(ForStmt *S) {
   VisitStmt(S);
-  Writer.AddStmt(S->getInit());
-  Writer.AddStmt(S->getCond());
-  Writer.AddDeclRef(S->getConditionVariable(), Record);
-  Writer.AddStmt(S->getInc());
-  Writer.AddStmt(S->getBody());
-  Writer.AddSourceLocation(S->getForLoc(), Record);
-  Writer.AddSourceLocation(S->getLParenLoc(), Record);
-  Writer.AddSourceLocation(S->getRParenLoc(), Record);
+  Record.AddStmt(S->getInit());
+  Record.AddStmt(S->getCond());
+  Record.AddDeclRef(S->getConditionVariable());
+  Record.AddStmt(S->getInc());
+  Record.AddStmt(S->getBody());
+  Record.AddSourceLocation(S->getForLoc());
+  Record.AddSourceLocation(S->getLParenLoc());
+  Record.AddSourceLocation(S->getRParenLoc());
   Code = serialization::STMT_FOR;
 }
 
 void ASTStmtWriter::VisitGotoStmt(GotoStmt *S) {
   VisitStmt(S);
-  Writer.AddDeclRef(S->getLabel(), Record);
-  Writer.AddSourceLocation(S->getGotoLoc(), Record);
-  Writer.AddSourceLocation(S->getLabelLoc(), Record);
+  Record.AddDeclRef(S->getLabel());
+  Record.AddSourceLocation(S->getGotoLoc());
+  Record.AddSourceLocation(S->getLabelLoc());
   Code = serialization::STMT_GOTO;
 }
 
 void ASTStmtWriter::VisitIndirectGotoStmt(IndirectGotoStmt *S) {
   VisitStmt(S);
-  Writer.AddSourceLocation(S->getGotoLoc(), Record);
-  Writer.AddSourceLocation(S->getStarLoc(), Record);
-  Writer.AddStmt(S->getTarget());
+  Record.AddSourceLocation(S->getGotoLoc());
+  Record.AddSourceLocation(S->getStarLoc());
+  Record.AddStmt(S->getTarget());
   Code = serialization::STMT_INDIRECT_GOTO;
 }
 
 void ASTStmtWriter::VisitContinueStmt(ContinueStmt *S) {
   VisitStmt(S);
-  Writer.AddSourceLocation(S->getContinueLoc(), Record);
+  Record.AddSourceLocation(S->getContinueLoc());
   Code = serialization::STMT_CONTINUE;
 }
 
 void ASTStmtWriter::VisitBreakStmt(BreakStmt *S) {
   VisitStmt(S);
-  Writer.AddSourceLocation(S->getBreakLoc(), Record);
+  Record.AddSourceLocation(S->getBreakLoc());
   Code = serialization::STMT_BREAK;
 }
 
 void ASTStmtWriter::VisitReturnStmt(ReturnStmt *S) {
   VisitStmt(S);
-  Writer.AddStmt(S->getRetValue());
-  Writer.AddSourceLocation(S->getReturnLoc(), Record);
-  Writer.AddDeclRef(S->getNRVOCandidate(), Record);
+  Record.AddStmt(S->getRetValue());
+  Record.AddSourceLocation(S->getReturnLoc());
+  Record.AddDeclRef(S->getNRVOCandidate());
   Code = serialization::STMT_RETURN;
 }
 
 void ASTStmtWriter::VisitDeclStmt(DeclStmt *S) {
   VisitStmt(S);
-  Writer.AddSourceLocation(S->getStartLoc(), Record);
-  Writer.AddSourceLocation(S->getEndLoc(), Record);
+  Record.AddSourceLocation(S->getStartLoc());
+  Record.AddSourceLocation(S->getEndLoc());
   DeclGroupRef DG = S->getDeclGroup();
   for (DeclGroupRef::iterator D = DG.begin(), DEnd = DG.end(); D != DEnd; ++D)
-    Writer.AddDeclRef(*D, Record);
+    Record.AddDeclRef(*D);
   Code = serialization::STMT_DECL;
 }
 
@@ -225,64 +236,65 @@ void ASTStmtWriter::VisitAsmStmt(AsmStmt *S) {
   Record.push_back(S->getNumOutputs());
   Record.push_back(S->getNumInputs());
   Record.push_back(S->getNumClobbers());
-  Writer.AddSourceLocation(S->getAsmLoc(), Record);
+  Record.AddSourceLocation(S->getAsmLoc());
   Record.push_back(S->isVolatile());
   Record.push_back(S->isSimple());
 }
 
 void ASTStmtWriter::VisitGCCAsmStmt(GCCAsmStmt *S) {
   VisitAsmStmt(S);
-  Writer.AddSourceLocation(S->getRParenLoc(), Record);
-  Writer.AddStmt(S->getAsmString());
+  Record.AddSourceLocation(S->getRParenLoc());
+  Record.AddStmt(S->getAsmString());
 
   // Outputs
   for (unsigned I = 0, N = S->getNumOutputs(); I != N; ++I) {      
-    Writer.AddIdentifierRef(S->getOutputIdentifier(I), Record);
-    Writer.AddStmt(S->getOutputConstraintLiteral(I));
-    Writer.AddStmt(S->getOutputExpr(I));
+    Record.AddIdentifierRef(S->getOutputIdentifier(I));
+    Record.AddStmt(S->getOutputConstraintLiteral(I));
+    Record.AddStmt(S->getOutputExpr(I));
   }
 
   // Inputs
   for (unsigned I = 0, N = S->getNumInputs(); I != N; ++I) {
-    Writer.AddIdentifierRef(S->getInputIdentifier(I), Record);
-    Writer.AddStmt(S->getInputConstraintLiteral(I));
-    Writer.AddStmt(S->getInputExpr(I));
+    Record.AddIdentifierRef(S->getInputIdentifier(I));
+    Record.AddStmt(S->getInputConstraintLiteral(I));
+    Record.AddStmt(S->getInputExpr(I));
   }
 
   // Clobbers
   for (unsigned I = 0, N = S->getNumClobbers(); I != N; ++I)
-    Writer.AddStmt(S->getClobberStringLiteral(I));
+    Record.AddStmt(S->getClobberStringLiteral(I));
 
   Code = serialization::STMT_GCCASM;
 }
 
 void ASTStmtWriter::VisitMSAsmStmt(MSAsmStmt *S) {
   VisitAsmStmt(S);
-  Writer.AddSourceLocation(S->getLBraceLoc(), Record);
-  Writer.AddSourceLocation(S->getEndLoc(), Record);
+  Record.AddSourceLocation(S->getLBraceLoc());
+  Record.AddSourceLocation(S->getEndLoc());
   Record.push_back(S->getNumAsmToks());
-  Writer.AddString(S->getAsmString(), Record);
+  Record.AddString(S->getAsmString());
 
   // Tokens
   for (unsigned I = 0, N = S->getNumAsmToks(); I != N; ++I) {
-    Writer.AddToken(S->getAsmToks()[I], Record);
+    // FIXME: Move this to ASTRecordWriter?
+    Writer.AddToken(S->getAsmToks()[I], Record.getRecordData());
   }
 
   // Clobbers
   for (unsigned I = 0, N = S->getNumClobbers(); I != N; ++I) {
-    Writer.AddString(S->getClobber(I), Record);
+    Record.AddString(S->getClobber(I));
   }
 
   // Outputs
   for (unsigned I = 0, N = S->getNumOutputs(); I != N; ++I) {      
-    Writer.AddStmt(S->getOutputExpr(I));
-    Writer.AddString(S->getOutputConstraint(I), Record);
+    Record.AddStmt(S->getOutputExpr(I));
+    Record.AddString(S->getOutputConstraint(I));
   }
 
   // Inputs
   for (unsigned I = 0, N = S->getNumInputs(); I != N; ++I) {
-    Writer.AddStmt(S->getInputExpr(I));
-    Writer.AddString(S->getInputConstraint(I), Record);
+    Record.AddStmt(S->getInputExpr(I));
+    Record.AddString(S->getInputConstraint(I));
   }
 
   Code = serialization::STMT_MSASM;
@@ -314,26 +326,26 @@ void ASTStmtWriter::VisitCapturedStmt(CapturedStmt *S) {
   Record.push_back(std::distance(S->capture_begin(), S->capture_end()));
 
   // CapturedDecl and captured region kind
-  Writer.AddDeclRef(S->getCapturedDecl(), Record);
+  Record.AddDeclRef(S->getCapturedDecl());
   Record.push_back(S->getCapturedRegionKind());
 
-  Writer.AddDeclRef(S->getCapturedRecordDecl(), Record);
+  Record.AddDeclRef(S->getCapturedRecordDecl());
 
   // Capture inits
   for (auto *I : S->capture_inits())
-    Writer.AddStmt(I);
+    Record.AddStmt(I);
 
   // Body
-  Writer.AddStmt(S->getCapturedStmt());
+  Record.AddStmt(S->getCapturedStmt());
 
   // Captures
   for (const auto &I : S->captures()) {
     if (I.capturesThis() || I.capturesVariableArrayType())
-      Writer.AddDeclRef(nullptr, Record);
+      Record.AddDeclRef(nullptr);
     else
-      Writer.AddDeclRef(I.getCapturedVar(), Record);
+      Record.AddDeclRef(I.getCapturedVar());
     Record.push_back(I.getCaptureKind());
-    Writer.AddSourceLocation(I.getLocation(), Record);
+    Record.AddSourceLocation(I.getLocation());
   }
 
   Code = serialization::STMT_CAPTURED;
@@ -341,7 +353,7 @@ void ASTStmtWriter::VisitCapturedStmt(CapturedStmt *S) {
 
 void ASTStmtWriter::VisitExpr(Expr *E) {
   VisitStmt(E);
-  Writer.AddTypeRef(E->getType(), Record);
+  Record.AddTypeRef(E->getType());
   Record.push_back(E->isTypeDependent());
   Record.push_back(E->isValueDependent());
   Record.push_back(E->isInstantiationDependent());
@@ -352,9 +364,9 @@ void ASTStmtWriter::VisitExpr(Expr *E) {
 
 void ASTStmtWriter::VisitPredefinedExpr(PredefinedExpr *E) {
   VisitExpr(E);
-  Writer.AddSourceLocation(E->getLocation(), Record);
+  Record.AddSourceLocation(E->getLocation());
   Record.push_back(E->getIdentType()); // FIXME: stable encoding
-  Writer.AddStmt(E->getFunctionName());
+  Record.AddStmt(E->getFunctionName());
   Code = serialization::EXPR_PREDEFINED;
 }
 
@@ -381,25 +393,25 @@ void ASTStmtWriter::VisitDeclRefExpr(DeclRefExpr *E) {
   }
 
   if (E->hasQualifier())
-    Writer.AddNestedNameSpecifierLoc(E->getQualifierLoc(), Record);
+    Record.AddNestedNameSpecifierLoc(E->getQualifierLoc());
 
   if (E->getDecl() != E->getFoundDecl())
-    Writer.AddDeclRef(E->getFoundDecl(), Record);
+    Record.AddDeclRef(E->getFoundDecl());
 
   if (E->hasTemplateKWAndArgsInfo())
     AddTemplateKWAndArgsInfo(*E->getTrailingObjects<ASTTemplateKWAndArgsInfo>(),
                              E->getTrailingObjects<TemplateArgumentLoc>());
 
-  Writer.AddDeclRef(E->getDecl(), Record);
-  Writer.AddSourceLocation(E->getLocation(), Record);
-  Writer.AddDeclarationNameLoc(E->DNLoc, E->getDecl()->getDeclName(), Record);
+  Record.AddDeclRef(E->getDecl());
+  Record.AddSourceLocation(E->getLocation());
+  Record.AddDeclarationNameLoc(E->DNLoc, E->getDecl()->getDeclName());
   Code = serialization::EXPR_DECL_REF;
 }
 
 void ASTStmtWriter::VisitIntegerLiteral(IntegerLiteral *E) {
   VisitExpr(E);
-  Writer.AddSourceLocation(E->getLocation(), Record);
-  Writer.AddAPInt(E->getValue(), Record);
+  Record.AddSourceLocation(E->getLocation());
+  Record.AddAPInt(E->getValue());
 
   if (E->getValue().getBitWidth() == 32) {
     AbbrevToUse = Writer.getIntegerLiteralAbbrev();
@@ -412,14 +424,14 @@ void ASTStmtWriter::VisitFloatingLiteral(FloatingLiteral *E) {
   VisitExpr(E);
   Record.push_back(E->getRawSemantics());
   Record.push_back(E->isExact());
-  Writer.AddAPFloat(E->getValue(), Record);
-  Writer.AddSourceLocation(E->getLocation(), Record);
+  Record.AddAPFloat(E->getValue());
+  Record.AddSourceLocation(E->getLocation());
   Code = serialization::EXPR_FLOATING_LITERAL;
 }
 
 void ASTStmtWriter::VisitImaginaryLiteral(ImaginaryLiteral *E) {
   VisitExpr(E);
-  Writer.AddStmt(E->getSubExpr());
+  Record.AddStmt(E->getSubExpr());
   Code = serialization::EXPR_IMAGINARY_LITERAL;
 }
 
@@ -435,14 +447,14 @@ void ASTStmtWriter::VisitStringLiteral(StringLiteral *E) {
   // the AST file during deserialization.
   Record.append(E->getBytes().begin(), E->getBytes().end());
   for (unsigned I = 0, N = E->getNumConcatenated(); I != N; ++I)
-    Writer.AddSourceLocation(E->getStrTokenLoc(I), Record);
+    Record.AddSourceLocation(E->getStrTokenLoc(I));
   Code = serialization::EXPR_STRING_LITERAL;
 }
 
 void ASTStmtWriter::VisitCharacterLiteral(CharacterLiteral *E) {
   VisitExpr(E);
   Record.push_back(E->getValue());
-  Writer.AddSourceLocation(E->getLocation(), Record);
+  Record.AddSourceLocation(E->getLocation());
   Record.push_back(E->getKind());
 
   AbbrevToUse = Writer.getCharacterLiteralAbbrev();
@@ -452,9 +464,9 @@ void ASTStmtWriter::VisitCharacterLiteral(CharacterLiteral *E) {
 
 void ASTStmtWriter::VisitParenExpr(ParenExpr *E) {
   VisitExpr(E);
-  Writer.AddSourceLocation(E->getLParen(), Record);
-  Writer.AddSourceLocation(E->getRParen(), Record);
-  Writer.AddStmt(E->getSubExpr());
+  Record.AddSourceLocation(E->getLParen());
+  Record.AddSourceLocation(E->getRParen());
+  Record.AddStmt(E->getSubExpr());
   Code = serialization::EXPR_PAREN;
 }
 
@@ -462,17 +474,17 @@ void ASTStmtWriter::VisitParenListExpr(ParenListExpr *E) {
   VisitExpr(E);
   Record.push_back(E->NumExprs);
   for (unsigned i=0; i != E->NumExprs; ++i)
-    Writer.AddStmt(E->Exprs[i]);
-  Writer.AddSourceLocation(E->LParenLoc, Record);
-  Writer.AddSourceLocation(E->RParenLoc, Record);
+    Record.AddStmt(E->Exprs[i]);
+  Record.AddSourceLocation(E->LParenLoc);
+  Record.AddSourceLocation(E->RParenLoc);
   Code = serialization::EXPR_PAREN_LIST;
 }
 
 void ASTStmtWriter::VisitUnaryOperator(UnaryOperator *E) {
   VisitExpr(E);
-  Writer.AddStmt(E->getSubExpr());
+  Record.AddStmt(E->getSubExpr());
   Record.push_back(E->getOpcode()); // FIXME: stable encoding
-  Writer.AddSourceLocation(E->getOperatorLoc(), Record);
+  Record.AddSourceLocation(E->getOperatorLoc());
   Code = serialization::EXPR_UNARY_OPERATOR;
 }
 
@@ -480,34 +492,34 @@ void ASTStmtWriter::VisitOffsetOfExpr(OffsetOfExpr *E) {
   VisitExpr(E);
   Record.push_back(E->getNumComponents());
   Record.push_back(E->getNumExpressions());
-  Writer.AddSourceLocation(E->getOperatorLoc(), Record);
-  Writer.AddSourceLocation(E->getRParenLoc(), Record);
-  Writer.AddTypeSourceInfo(E->getTypeSourceInfo(), Record);
+  Record.AddSourceLocation(E->getOperatorLoc());
+  Record.AddSourceLocation(E->getRParenLoc());
+  Record.AddTypeSourceInfo(E->getTypeSourceInfo());
   for (unsigned I = 0, N = E->getNumComponents(); I != N; ++I) {
     const OffsetOfNode &ON = E->getComponent(I);
     Record.push_back(ON.getKind()); // FIXME: Stable encoding
-    Writer.AddSourceLocation(ON.getSourceRange().getBegin(), Record);
-    Writer.AddSourceLocation(ON.getSourceRange().getEnd(), Record);
+    Record.AddSourceLocation(ON.getSourceRange().getBegin());
+    Record.AddSourceLocation(ON.getSourceRange().getEnd());
     switch (ON.getKind()) {
     case OffsetOfNode::Array:
       Record.push_back(ON.getArrayExprIndex());
       break;
 
     case OffsetOfNode::Field:
-      Writer.AddDeclRef(ON.getField(), Record);
+      Record.AddDeclRef(ON.getField());
       break;
 
     case OffsetOfNode::Identifier:
-      Writer.AddIdentifierRef(ON.getFieldName(), Record);
+      Record.AddIdentifierRef(ON.getFieldName());
       break;
 
     case OffsetOfNode::Base:
-      Writer.AddCXXBaseSpecifier(*ON.getBase(), Record);
+      Record.AddCXXBaseSpecifier(*ON.getBase());
       break;
     }
   }
   for (unsigned I = 0, N = E->getNumExpressions(); I != N; ++I)
-    Writer.AddStmt(E->getIndexExpr(I));
+    Record.AddStmt(E->getIndexExpr(I));
   Code = serialization::EXPR_OFFSETOF;
 }
 
@@ -515,42 +527,42 @@ void ASTStmtWriter::VisitUnaryExprOrTypeTraitExpr(UnaryExprOrTypeTraitExpr *E) {
   VisitExpr(E);
   Record.push_back(E->getKind());
   if (E->isArgumentType())
-    Writer.AddTypeSourceInfo(E->getArgumentTypeInfo(), Record);
+    Record.AddTypeSourceInfo(E->getArgumentTypeInfo());
   else {
     Record.push_back(0);
-    Writer.AddStmt(E->getArgumentExpr());
+    Record.AddStmt(E->getArgumentExpr());
   }
-  Writer.AddSourceLocation(E->getOperatorLoc(), Record);
-  Writer.AddSourceLocation(E->getRParenLoc(), Record);
+  Record.AddSourceLocation(E->getOperatorLoc());
+  Record.AddSourceLocation(E->getRParenLoc());
   Code = serialization::EXPR_SIZEOF_ALIGN_OF;
 }
 
 void ASTStmtWriter::VisitArraySubscriptExpr(ArraySubscriptExpr *E) {
   VisitExpr(E);
-  Writer.AddStmt(E->getLHS());
-  Writer.AddStmt(E->getRHS());
-  Writer.AddSourceLocation(E->getRBracketLoc(), Record);
+  Record.AddStmt(E->getLHS());
+  Record.AddStmt(E->getRHS());
+  Record.AddSourceLocation(E->getRBracketLoc());
   Code = serialization::EXPR_ARRAY_SUBSCRIPT;
 }
 
 void ASTStmtWriter::VisitOMPArraySectionExpr(OMPArraySectionExpr *E) {
   VisitExpr(E);
-  Writer.AddStmt(E->getBase());
-  Writer.AddStmt(E->getLowerBound());
-  Writer.AddStmt(E->getLength());
-  Writer.AddSourceLocation(E->getColonLoc(), Record);
-  Writer.AddSourceLocation(E->getRBracketLoc(), Record);
+  Record.AddStmt(E->getBase());
+  Record.AddStmt(E->getLowerBound());
+  Record.AddStmt(E->getLength());
+  Record.AddSourceLocation(E->getColonLoc());
+  Record.AddSourceLocation(E->getRBracketLoc());
   Code = serialization::EXPR_OMP_ARRAY_SECTION;
 }
 
 void ASTStmtWriter::VisitCallExpr(CallExpr *E) {
   VisitExpr(E);
   Record.push_back(E->getNumArgs());
-  Writer.AddSourceLocation(E->getRParenLoc(), Record);
-  Writer.AddStmt(E->getCallee());
+  Record.AddSourceLocation(E->getRParenLoc());
+  Record.AddStmt(E->getCallee());
   for (CallExpr::arg_iterator Arg = E->arg_begin(), ArgEnd = E->arg_end();
        Arg != ArgEnd; ++Arg)
-    Writer.AddStmt(*Arg);
+    Record.AddStmt(*Arg);
   Code = serialization::EXPR_CALL;
 }
 
@@ -559,43 +571,43 @@ void ASTStmtWriter::VisitMemberExpr(MemberExpr *E) {
 
   Record.push_back(E->hasQualifier());
   if (E->hasQualifier())
-    Writer.AddNestedNameSpecifierLoc(E->getQualifierLoc(), Record);
+    Record.AddNestedNameSpecifierLoc(E->getQualifierLoc());
 
   Record.push_back(E->HasTemplateKWAndArgsInfo);
   if (E->HasTemplateKWAndArgsInfo) {
-    Writer.AddSourceLocation(E->getTemplateKeywordLoc(), Record);
+    Record.AddSourceLocation(E->getTemplateKeywordLoc());
     unsigned NumTemplateArgs = E->getNumTemplateArgs();
     Record.push_back(NumTemplateArgs);
-    Writer.AddSourceLocation(E->getLAngleLoc(), Record);
-    Writer.AddSourceLocation(E->getRAngleLoc(), Record);
+    Record.AddSourceLocation(E->getLAngleLoc());
+    Record.AddSourceLocation(E->getRAngleLoc());
     for (unsigned i=0; i != NumTemplateArgs; ++i)
-      Writer.AddTemplateArgumentLoc(E->getTemplateArgs()[i], Record);
+      Record.AddTemplateArgumentLoc(E->getTemplateArgs()[i]);
   }
 
   Record.push_back(E->hadMultipleCandidates());
 
   DeclAccessPair FoundDecl = E->getFoundDecl();
-  Writer.AddDeclRef(FoundDecl.getDecl(), Record);
+  Record.AddDeclRef(FoundDecl.getDecl());
   Record.push_back(FoundDecl.getAccess());
 
-  Writer.AddTypeRef(E->getType(), Record);
+  Record.AddTypeRef(E->getType());
   Record.push_back(E->getValueKind());
   Record.push_back(E->getObjectKind());
-  Writer.AddStmt(E->getBase());
-  Writer.AddDeclRef(E->getMemberDecl(), Record);
-  Writer.AddSourceLocation(E->getMemberLoc(), Record);
+  Record.AddStmt(E->getBase());
+  Record.AddDeclRef(E->getMemberDecl());
+  Record.AddSourceLocation(E->getMemberLoc());
   Record.push_back(E->isArrow());
-  Writer.AddSourceLocation(E->getOperatorLoc(), Record);
-  Writer.AddDeclarationNameLoc(E->MemberDNLoc,
-                               E->getMemberDecl()->getDeclName(), Record);
+  Record.AddSourceLocation(E->getOperatorLoc());
+  Record.AddDeclarationNameLoc(E->MemberDNLoc,
+                               E->getMemberDecl()->getDeclName());
   Code = serialization::EXPR_MEMBER;
 }
 
 void ASTStmtWriter::VisitObjCIsaExpr(ObjCIsaExpr *E) {
   VisitExpr(E);
-  Writer.AddStmt(E->getBase());
-  Writer.AddSourceLocation(E->getIsaMemberLoc(), Record);
-  Writer.AddSourceLocation(E->getOpLoc(), Record);
+  Record.AddStmt(E->getBase());
+  Record.AddSourceLocation(E->getIsaMemberLoc());
+  Record.AddSourceLocation(E->getOpLoc());
   Record.push_back(E->isArrow());
   Code = serialization::EXPR_OBJC_ISA;
 }
@@ -603,15 +615,15 @@ void ASTStmtWriter::VisitObjCIsaExpr(ObjCIsaExpr *E) {
 void ASTStmtWriter::
 VisitObjCIndirectCopyRestoreExpr(ObjCIndirectCopyRestoreExpr *E) {
   VisitExpr(E);
-  Writer.AddStmt(E->getSubExpr());
+  Record.AddStmt(E->getSubExpr());
   Record.push_back(E->shouldCopy());
   Code = serialization::EXPR_OBJC_INDIRECT_COPY_RESTORE;
 }
 
 void ASTStmtWriter::VisitObjCBridgedCastExpr(ObjCBridgedCastExpr *E) {
   VisitExplicitCastExpr(E);
-  Writer.AddSourceLocation(E->getLParenLoc(), Record);
-  Writer.AddSourceLocation(E->getBridgeKeywordLoc(), Record);
+  Record.AddSourceLocation(E->getLParenLoc());
+  Record.AddSourceLocation(E->getBridgeKeywordLoc());
   Record.push_back(E->getBridgeKind()); // FIXME: Stable encoding
   Code = serialization::EXPR_OBJC_BRIDGED_CAST;
 }
@@ -619,51 +631,51 @@ void ASTStmtWriter::VisitObjCBridgedCastExpr(ObjCBridgedCastExpr *E) {
 void ASTStmtWriter::VisitCastExpr(CastExpr *E) {
   VisitExpr(E);
   Record.push_back(E->path_size());
-  Writer.AddStmt(E->getSubExpr());
+  Record.AddStmt(E->getSubExpr());
   Record.push_back(E->getCastKind()); // FIXME: stable encoding
 
   for (CastExpr::path_iterator
          PI = E->path_begin(), PE = E->path_end(); PI != PE; ++PI)
-    Writer.AddCXXBaseSpecifier(**PI, Record);
+    Record.AddCXXBaseSpecifier(**PI);
 }
 
 void ASTStmtWriter::VisitBinaryOperator(BinaryOperator *E) {
   VisitExpr(E);
-  Writer.AddStmt(E->getLHS());
-  Writer.AddStmt(E->getRHS());
+  Record.AddStmt(E->getLHS());
+  Record.AddStmt(E->getRHS());
   Record.push_back(E->getOpcode()); // FIXME: stable encoding
-  Writer.AddSourceLocation(E->getOperatorLoc(), Record);
+  Record.AddSourceLocation(E->getOperatorLoc());
   Record.push_back(E->isFPContractable());
   Code = serialization::EXPR_BINARY_OPERATOR;
 }
 
 void ASTStmtWriter::VisitCompoundAssignOperator(CompoundAssignOperator *E) {
   VisitBinaryOperator(E);
-  Writer.AddTypeRef(E->getComputationLHSType(), Record);
-  Writer.AddTypeRef(E->getComputationResultType(), Record);
+  Record.AddTypeRef(E->getComputationLHSType());
+  Record.AddTypeRef(E->getComputationResultType());
   Code = serialization::EXPR_COMPOUND_ASSIGN_OPERATOR;
 }
 
 void ASTStmtWriter::VisitConditionalOperator(ConditionalOperator *E) {
   VisitExpr(E);
-  Writer.AddStmt(E->getCond());
-  Writer.AddStmt(E->getLHS());
-  Writer.AddStmt(E->getRHS());
-  Writer.AddSourceLocation(E->getQuestionLoc(), Record);
-  Writer.AddSourceLocation(E->getColonLoc(), Record);
+  Record.AddStmt(E->getCond());
+  Record.AddStmt(E->getLHS());
+  Record.AddStmt(E->getRHS());
+  Record.AddSourceLocation(E->getQuestionLoc());
+  Record.AddSourceLocation(E->getColonLoc());
   Code = serialization::EXPR_CONDITIONAL_OPERATOR;
 }
 
 void
 ASTStmtWriter::VisitBinaryConditionalOperator(BinaryConditionalOperator *E) {
   VisitExpr(E);
-  Writer.AddStmt(E->getOpaqueValue());
-  Writer.AddStmt(E->getCommon());
-  Writer.AddStmt(E->getCond());
-  Writer.AddStmt(E->getTrueExpr());
-  Writer.AddStmt(E->getFalseExpr());
-  Writer.AddSourceLocation(E->getQuestionLoc(), Record);
-  Writer.AddSourceLocation(E->getColonLoc(), Record);
+  Record.AddStmt(E->getOpaqueValue());
+  Record.AddStmt(E->getCommon());
+  Record.AddStmt(E->getCond());
+  Record.AddStmt(E->getTrueExpr());
+  Record.AddStmt(E->getFalseExpr());
+  Record.AddSourceLocation(E->getQuestionLoc());
+  Record.AddSourceLocation(E->getColonLoc());
   Code = serialization::EXPR_BINARY_CONDITIONAL_OPERATOR;
 }
 
@@ -678,30 +690,30 @@ void ASTStmtWriter::VisitImplicitCastExpr(ImplicitCastExpr *E) {
 
 void ASTStmtWriter::VisitExplicitCastExpr(ExplicitCastExpr *E) {
   VisitCastExpr(E);
-  Writer.AddTypeSourceInfo(E->getTypeInfoAsWritten(), Record);
+  Record.AddTypeSourceInfo(E->getTypeInfoAsWritten());
 }
 
 void ASTStmtWriter::VisitCStyleCastExpr(CStyleCastExpr *E) {
   VisitExplicitCastExpr(E);
-  Writer.AddSourceLocation(E->getLParenLoc(), Record);
-  Writer.AddSourceLocation(E->getRParenLoc(), Record);
+  Record.AddSourceLocation(E->getLParenLoc());
+  Record.AddSourceLocation(E->getRParenLoc());
   Code = serialization::EXPR_CSTYLE_CAST;
 }
 
 void ASTStmtWriter::VisitCompoundLiteralExpr(CompoundLiteralExpr *E) {
   VisitExpr(E);
-  Writer.AddSourceLocation(E->getLParenLoc(), Record);
-  Writer.AddTypeSourceInfo(E->getTypeSourceInfo(), Record);
-  Writer.AddStmt(E->getInitializer());
+  Record.AddSourceLocation(E->getLParenLoc());
+  Record.AddTypeSourceInfo(E->getTypeSourceInfo());
+  Record.AddStmt(E->getInitializer());
   Record.push_back(E->isFileScope());
   Code = serialization::EXPR_COMPOUND_LITERAL;
 }
 
 void ASTStmtWriter::VisitExtVectorElementExpr(ExtVectorElementExpr *E) {
   VisitExpr(E);
-  Writer.AddStmt(E->getBase());
-  Writer.AddIdentifierRef(&E->getAccessor(), Record);
-  Writer.AddSourceLocation(E->getAccessorLoc(), Record);
+  Record.AddStmt(E->getBase());
+  Record.AddIdentifierRef(&E->getAccessor());
+  Record.AddSourceLocation(E->getAccessorLoc());
   Code = serialization::EXPR_EXT_VECTOR_ELEMENT;
 }
 
@@ -709,15 +721,15 @@ void ASTStmtWriter::VisitInitListExpr(InitListExpr *E) {
   VisitExpr(E);
   // NOTE: only add the (possibly null) syntactic form.
   // No need to serialize the isSemanticForm flag and the semantic form.
-  Writer.AddStmt(E->getSyntacticForm());
-  Writer.AddSourceLocation(E->getLBraceLoc(), Record);
-  Writer.AddSourceLocation(E->getRBraceLoc(), Record);
+  Record.AddStmt(E->getSyntacticForm());
+  Record.AddSourceLocation(E->getLBraceLoc());
+  Record.AddSourceLocation(E->getRBraceLoc());
   bool isArrayFiller = E->ArrayFillerOrUnionFieldInit.is<Expr*>();
   Record.push_back(isArrayFiller);
   if (isArrayFiller)
-    Writer.AddStmt(E->getArrayFiller());
+    Record.AddStmt(E->getArrayFiller());
   else
-    Writer.AddDeclRef(E->getInitializedFieldInUnion(), Record);
+    Record.AddDeclRef(E->getInitializedFieldInUnion());
   Record.push_back(E->hadArrayRangeDesignator());
   Record.push_back(E->getNumInits());
   if (isArrayFiller) {
@@ -725,10 +737,10 @@ void ASTStmtWriter::VisitInitListExpr(InitListExpr *E) {
     // Replace them by 0 to indicate that the filler goes in that place.
     Expr *filler = E->getArrayFiller();
     for (unsigned I = 0, N = E->getNumInits(); I != N; ++I)
-      Writer.AddStmt(E->getInit(I) != filler ? E->getInit(I) : nullptr);
+      Record.AddStmt(E->getInit(I) != filler ? E->getInit(I) : nullptr);
   } else {
     for (unsigned I = 0, N = E->getNumInits(); I != N; ++I)
-      Writer.AddStmt(E->getInit(I));
+      Record.AddStmt(E->getInit(I));
   }
   Code = serialization::EXPR_INIT_LIST;
 }
@@ -737,34 +749,32 @@ void ASTStmtWriter::VisitDesignatedInitExpr(DesignatedInitExpr *E) {
   VisitExpr(E);
   Record.push_back(E->getNumSubExprs());
   for (unsigned I = 0, N = E->getNumSubExprs(); I != N; ++I)
-    Writer.AddStmt(E->getSubExpr(I));
-  Writer.AddSourceLocation(E->getEqualOrColonLoc(), Record);
+    Record.AddStmt(E->getSubExpr(I));
+  Record.AddSourceLocation(E->getEqualOrColonLoc());
   Record.push_back(E->usesGNUSyntax());
-  for (DesignatedInitExpr::designators_iterator D = E->designators_begin(),
-                                             DEnd = E->designators_end();
-       D != DEnd; ++D) {
-    if (D->isFieldDesignator()) {
-      if (FieldDecl *Field = D->getField()) {
+  for (const DesignatedInitExpr::Designator &D : E->designators()) {
+    if (D.isFieldDesignator()) {
+      if (FieldDecl *Field = D.getField()) {
         Record.push_back(serialization::DESIG_FIELD_DECL);
-        Writer.AddDeclRef(Field, Record);
+        Record.AddDeclRef(Field);
       } else {
         Record.push_back(serialization::DESIG_FIELD_NAME);
-        Writer.AddIdentifierRef(D->getFieldName(), Record);
+        Record.AddIdentifierRef(D.getFieldName());
       }
-      Writer.AddSourceLocation(D->getDotLoc(), Record);
-      Writer.AddSourceLocation(D->getFieldLoc(), Record);
-    } else if (D->isArrayDesignator()) {
+      Record.AddSourceLocation(D.getDotLoc());
+      Record.AddSourceLocation(D.getFieldLoc());
+    } else if (D.isArrayDesignator()) {
       Record.push_back(serialization::DESIG_ARRAY);
-      Record.push_back(D->getFirstExprIndex());
-      Writer.AddSourceLocation(D->getLBracketLoc(), Record);
-      Writer.AddSourceLocation(D->getRBracketLoc(), Record);
+      Record.push_back(D.getFirstExprIndex());
+      Record.AddSourceLocation(D.getLBracketLoc());
+      Record.AddSourceLocation(D.getRBracketLoc());
     } else {
-      assert(D->isArrayRangeDesignator() && "Unknown designator");
+      assert(D.isArrayRangeDesignator() && "Unknown designator");
       Record.push_back(serialization::DESIG_ARRAY_RANGE);
-      Record.push_back(D->getFirstExprIndex());
-      Writer.AddSourceLocation(D->getLBracketLoc(), Record);
-      Writer.AddSourceLocation(D->getEllipsisLoc(), Record);
-      Writer.AddSourceLocation(D->getRBracketLoc(), Record);
+      Record.push_back(D.getFirstExprIndex());
+      Record.AddSourceLocation(D.getLBracketLoc());
+      Record.AddSourceLocation(D.getEllipsisLoc());
+      Record.AddSourceLocation(D.getRBracketLoc());
     }
   }
   Code = serialization::EXPR_DESIGNATED_INIT;
@@ -772,8 +782,8 @@ void ASTStmtWriter::VisitDesignatedInitExpr(DesignatedInitExpr *E) {
 
 void ASTStmtWriter::VisitDesignatedInitUpdateExpr(DesignatedInitUpdateExpr *E) {
   VisitExpr(E);
-  Writer.AddStmt(E->getBase());
-  Writer.AddStmt(E->getUpdater());
+  Record.AddStmt(E->getBase());
+  Record.AddStmt(E->getUpdater());
   Code = serialization::EXPR_DESIGNATED_INIT_UPDATE;
 }
 
@@ -789,44 +799,44 @@ void ASTStmtWriter::VisitImplicitValueInitExpr(ImplicitValueInitExpr *E) {
 
 void ASTStmtWriter::VisitVAArgExpr(VAArgExpr *E) {
   VisitExpr(E);
-  Writer.AddStmt(E->getSubExpr());
-  Writer.AddTypeSourceInfo(E->getWrittenTypeInfo(), Record);
-  Writer.AddSourceLocation(E->getBuiltinLoc(), Record);
-  Writer.AddSourceLocation(E->getRParenLoc(), Record);
+  Record.AddStmt(E->getSubExpr());
+  Record.AddTypeSourceInfo(E->getWrittenTypeInfo());
+  Record.AddSourceLocation(E->getBuiltinLoc());
+  Record.AddSourceLocation(E->getRParenLoc());
   Record.push_back(E->isMicrosoftABI());
   Code = serialization::EXPR_VA_ARG;
 }
 
 void ASTStmtWriter::VisitAddrLabelExpr(AddrLabelExpr *E) {
   VisitExpr(E);
-  Writer.AddSourceLocation(E->getAmpAmpLoc(), Record);
-  Writer.AddSourceLocation(E->getLabelLoc(), Record);
-  Writer.AddDeclRef(E->getLabel(), Record);
+  Record.AddSourceLocation(E->getAmpAmpLoc());
+  Record.AddSourceLocation(E->getLabelLoc());
+  Record.AddDeclRef(E->getLabel());
   Code = serialization::EXPR_ADDR_LABEL;
 }
 
 void ASTStmtWriter::VisitStmtExpr(StmtExpr *E) {
   VisitExpr(E);
-  Writer.AddStmt(E->getSubStmt());
-  Writer.AddSourceLocation(E->getLParenLoc(), Record);
-  Writer.AddSourceLocation(E->getRParenLoc(), Record);
+  Record.AddStmt(E->getSubStmt());
+  Record.AddSourceLocation(E->getLParenLoc());
+  Record.AddSourceLocation(E->getRParenLoc());
   Code = serialization::EXPR_STMT;
 }
 
 void ASTStmtWriter::VisitChooseExpr(ChooseExpr *E) {
   VisitExpr(E);
-  Writer.AddStmt(E->getCond());
-  Writer.AddStmt(E->getLHS());
-  Writer.AddStmt(E->getRHS());
-  Writer.AddSourceLocation(E->getBuiltinLoc(), Record);
-  Writer.AddSourceLocation(E->getRParenLoc(), Record);
+  Record.AddStmt(E->getCond());
+  Record.AddStmt(E->getLHS());
+  Record.AddStmt(E->getRHS());
+  Record.AddSourceLocation(E->getBuiltinLoc());
+  Record.AddSourceLocation(E->getRParenLoc());
   Record.push_back(E->isConditionDependent() ? false : E->isConditionTrue());
   Code = serialization::EXPR_CHOOSE;
 }
 
 void ASTStmtWriter::VisitGNUNullExpr(GNUNullExpr *E) {
   VisitExpr(E);
-  Writer.AddSourceLocation(E->getTokenLocation(), Record);
+  Record.AddSourceLocation(E->getTokenLocation());
   Code = serialization::EXPR_GNU_NULL;
 }
 
@@ -834,24 +844,24 @@ void ASTStmtWriter::VisitShuffleVectorExpr(ShuffleVectorExpr *E) {
   VisitExpr(E);
   Record.push_back(E->getNumSubExprs());
   for (unsigned I = 0, N = E->getNumSubExprs(); I != N; ++I)
-    Writer.AddStmt(E->getExpr(I));
-  Writer.AddSourceLocation(E->getBuiltinLoc(), Record);
-  Writer.AddSourceLocation(E->getRParenLoc(), Record);
+    Record.AddStmt(E->getExpr(I));
+  Record.AddSourceLocation(E->getBuiltinLoc());
+  Record.AddSourceLocation(E->getRParenLoc());
   Code = serialization::EXPR_SHUFFLE_VECTOR;
 }
 
 void ASTStmtWriter::VisitConvertVectorExpr(ConvertVectorExpr *E) {
   VisitExpr(E);
-  Writer.AddSourceLocation(E->getBuiltinLoc(), Record);
-  Writer.AddSourceLocation(E->getRParenLoc(), Record);
-  Writer.AddTypeSourceInfo(E->getTypeSourceInfo(), Record);
-  Writer.AddStmt(E->getSrcExpr());
+  Record.AddSourceLocation(E->getBuiltinLoc());
+  Record.AddSourceLocation(E->getRParenLoc());
+  Record.AddTypeSourceInfo(E->getTypeSourceInfo());
+  Record.AddStmt(E->getSrcExpr());
   Code = serialization::EXPR_CONVERT_VECTOR;
 }
 
 void ASTStmtWriter::VisitBlockExpr(BlockExpr *E) {
   VisitExpr(E);
-  Writer.AddDeclRef(E->getBlockDecl(), Record);
+  Record.AddDeclRef(E->getBlockDecl());
   Code = serialization::EXPR_BLOCK;
 }
 
@@ -859,16 +869,16 @@ void ASTStmtWriter::VisitGenericSelectionExpr(GenericSelectionExpr *E) {
   VisitExpr(E);
   Record.push_back(E->getNumAssocs());
 
-  Writer.AddStmt(E->getControllingExpr());
+  Record.AddStmt(E->getControllingExpr());
   for (unsigned I = 0, N = E->getNumAssocs(); I != N; ++I) {
-    Writer.AddTypeSourceInfo(E->getAssocTypeSourceInfo(I), Record);
-    Writer.AddStmt(E->getAssocExpr(I));
+    Record.AddTypeSourceInfo(E->getAssocTypeSourceInfo(I));
+    Record.AddStmt(E->getAssocExpr(I));
   }
   Record.push_back(E->isResultDependent() ? -1U : E->getResultIndex());
 
-  Writer.AddSourceLocation(E->getGenericLoc(), Record);
-  Writer.AddSourceLocation(E->getDefaultLoc(), Record);
-  Writer.AddSourceLocation(E->getRParenLoc(), Record);
+  Record.AddSourceLocation(E->getGenericLoc());
+  Record.AddSourceLocation(E->getDefaultLoc());
+  Record.AddSourceLocation(E->getRParenLoc());
   Code = serialization::EXPR_GENERIC_SELECTION;
 }
 
@@ -882,10 +892,10 @@ void ASTStmtWriter::VisitPseudoObjectExpr(PseudoObjectExpr *E) {
   result = (result == PseudoObjectExpr::NoResult ? 0 : result + 1);
   Record.push_back(result);
 
-  Writer.AddStmt(E->getSyntacticForm());
+  Record.AddStmt(E->getSyntacticForm());
   for (PseudoObjectExpr::semantics_iterator
          i = E->semantics_begin(), e = E->semantics_end(); i != e; ++i) {
-    Writer.AddStmt(*i);
+    Record.AddStmt(*i);
   }
   Code = serialization::EXPR_PSEUDO_OBJECT;
 }
@@ -894,9 +904,9 @@ void ASTStmtWriter::VisitAtomicExpr(AtomicExpr *E) {
   VisitExpr(E);
   Record.push_back(E->getOp());
   for (unsigned I = 0, N = E->getNumSubExprs(); I != N; ++I)
-    Writer.AddStmt(E->getSubExprs()[I]);
-  Writer.AddSourceLocation(E->getBuiltinLoc(), Record);
-  Writer.AddSourceLocation(E->getRParenLoc(), Record);
+    Record.AddStmt(E->getSubExprs()[I]);
+  Record.AddSourceLocation(E->getBuiltinLoc());
+  Record.AddSourceLocation(E->getRParenLoc());
   Code = serialization::EXPR_ATOMIC;
 }
 
@@ -906,16 +916,16 @@ void ASTStmtWriter::VisitAtomicExpr(AtomicExpr *E) {
 
 void ASTStmtWriter::VisitObjCStringLiteral(ObjCStringLiteral *E) {
   VisitExpr(E);
-  Writer.AddStmt(E->getString());
-  Writer.AddSourceLocation(E->getAtLoc(), Record);
+  Record.AddStmt(E->getString());
+  Record.AddSourceLocation(E->getAtLoc());
   Code = serialization::EXPR_OBJC_STRING_LITERAL;
 }
 
 void ASTStmtWriter::VisitObjCBoxedExpr(ObjCBoxedExpr *E) {
   VisitExpr(E);
-  Writer.AddStmt(E->getSubExpr());
-  Writer.AddDeclRef(E->getBoxingMethod(), Record);
-  Writer.AddSourceRange(E->getSourceRange(), Record);
+  Record.AddStmt(E->getSubExpr());
+  Record.AddDeclRef(E->getBoxingMethod());
+  Record.AddSourceRange(E->getSourceRange());
   Code = serialization::EXPR_OBJC_BOXED_EXPRESSION;
 }
 
@@ -923,9 +933,9 @@ void ASTStmtWriter::VisitObjCArrayLiteral(ObjCArrayLiteral *E) {
   VisitExpr(E);
   Record.push_back(E->getNumElements());
   for (unsigned i = 0; i < E->getNumElements(); i++)
-    Writer.AddStmt(E->getElement(i));
-  Writer.AddDeclRef(E->getArrayWithObjectsMethod(), Record);
-  Writer.AddSourceRange(E->getSourceRange(), Record);
+    Record.AddStmt(E->getElement(i));
+  Record.AddDeclRef(E->getArrayWithObjectsMethod());
+  Record.AddSourceRange(E->getSourceRange());
   Code = serialization::EXPR_OBJC_ARRAY_LITERAL;
 }
 
@@ -935,10 +945,10 @@ void ASTStmtWriter::VisitObjCDictionaryLiteral(ObjCDictionaryLiteral *E) {
   Record.push_back(E->HasPackExpansions);
   for (unsigned i = 0; i < E->getNumElements(); i++) {
     ObjCDictionaryElement Element = E->getKeyValueElement(i);
-    Writer.AddStmt(Element.Key);
-    Writer.AddStmt(Element.Value);
+    Record.AddStmt(Element.Key);
+    Record.AddStmt(Element.Value);
     if (E->HasPackExpansions) {
-      Writer.AddSourceLocation(Element.EllipsisLoc, Record);
+      Record.AddSourceLocation(Element.EllipsisLoc);
       unsigned NumExpansions = 0;
       if (Element.NumExpansions)
         NumExpansions = *Element.NumExpansions + 1;
@@ -946,42 +956,42 @@ void ASTStmtWriter::VisitObjCDictionaryLiteral(ObjCDictionaryLiteral *E) {
     }
   }
     
-  Writer.AddDeclRef(E->getDictWithObjectsMethod(), Record);
-  Writer.AddSourceRange(E->getSourceRange(), Record);
+  Record.AddDeclRef(E->getDictWithObjectsMethod());
+  Record.AddSourceRange(E->getSourceRange());
   Code = serialization::EXPR_OBJC_DICTIONARY_LITERAL;
 }
 
 void ASTStmtWriter::VisitObjCEncodeExpr(ObjCEncodeExpr *E) {
   VisitExpr(E);
-  Writer.AddTypeSourceInfo(E->getEncodedTypeSourceInfo(), Record);
-  Writer.AddSourceLocation(E->getAtLoc(), Record);
-  Writer.AddSourceLocation(E->getRParenLoc(), Record);
+  Record.AddTypeSourceInfo(E->getEncodedTypeSourceInfo());
+  Record.AddSourceLocation(E->getAtLoc());
+  Record.AddSourceLocation(E->getRParenLoc());
   Code = serialization::EXPR_OBJC_ENCODE;
 }
 
 void ASTStmtWriter::VisitObjCSelectorExpr(ObjCSelectorExpr *E) {
   VisitExpr(E);
-  Writer.AddSelectorRef(E->getSelector(), Record);
-  Writer.AddSourceLocation(E->getAtLoc(), Record);
-  Writer.AddSourceLocation(E->getRParenLoc(), Record);
+  Record.AddSelectorRef(E->getSelector());
+  Record.AddSourceLocation(E->getAtLoc());
+  Record.AddSourceLocation(E->getRParenLoc());
   Code = serialization::EXPR_OBJC_SELECTOR_EXPR;
 }
 
 void ASTStmtWriter::VisitObjCProtocolExpr(ObjCProtocolExpr *E) {
   VisitExpr(E);
-  Writer.AddDeclRef(E->getProtocol(), Record);
-  Writer.AddSourceLocation(E->getAtLoc(), Record);
-  Writer.AddSourceLocation(E->ProtoLoc, Record);
-  Writer.AddSourceLocation(E->getRParenLoc(), Record);
+  Record.AddDeclRef(E->getProtocol());
+  Record.AddSourceLocation(E->getAtLoc());
+  Record.AddSourceLocation(E->ProtoLoc);
+  Record.AddSourceLocation(E->getRParenLoc());
   Code = serialization::EXPR_OBJC_PROTOCOL_EXPR;
 }
 
 void ASTStmtWriter::VisitObjCIvarRefExpr(ObjCIvarRefExpr *E) {
   VisitExpr(E);
-  Writer.AddDeclRef(E->getDecl(), Record);
-  Writer.AddSourceLocation(E->getLocation(), Record);
-  Writer.AddSourceLocation(E->getOpLoc(), Record);
-  Writer.AddStmt(E->getBase());
+  Record.AddDeclRef(E->getDecl());
+  Record.AddSourceLocation(E->getLocation());
+  Record.AddSourceLocation(E->getOpLoc());
+  Record.AddStmt(E->getBase());
   Record.push_back(E->isArrow());
   Record.push_back(E->isFreeIvar());
   Code = serialization::EXPR_OBJC_IVAR_REF_EXPR;
@@ -992,22 +1002,22 @@ void ASTStmtWriter::VisitObjCPropertyRefExpr(ObjCPropertyRefExpr *E) {
   Record.push_back(E->SetterAndMethodRefFlags.getInt());
   Record.push_back(E->isImplicitProperty());
   if (E->isImplicitProperty()) {
-    Writer.AddDeclRef(E->getImplicitPropertyGetter(), Record);
-    Writer.AddDeclRef(E->getImplicitPropertySetter(), Record);
+    Record.AddDeclRef(E->getImplicitPropertyGetter());
+    Record.AddDeclRef(E->getImplicitPropertySetter());
   } else {
-    Writer.AddDeclRef(E->getExplicitProperty(), Record);
+    Record.AddDeclRef(E->getExplicitProperty());
   }
-  Writer.AddSourceLocation(E->getLocation(), Record);
-  Writer.AddSourceLocation(E->getReceiverLocation(), Record);
+  Record.AddSourceLocation(E->getLocation());
+  Record.AddSourceLocation(E->getReceiverLocation());
   if (E->isObjectReceiver()) {
     Record.push_back(0);
-    Writer.AddStmt(E->getBase());
+    Record.AddStmt(E->getBase());
   } else if (E->isSuperReceiver()) {
     Record.push_back(1);
-    Writer.AddTypeRef(E->getSuperReceiverType(), Record);
+    Record.AddTypeRef(E->getSuperReceiverType());
   } else {
     Record.push_back(2);
-    Writer.AddDeclRef(E->getClassReceiver(), Record);
+    Record.AddDeclRef(E->getClassReceiver());
   }
   
   Code = serialization::EXPR_OBJC_PROPERTY_REF_EXPR;
@@ -1015,11 +1025,11 @@ void ASTStmtWriter::VisitObjCPropertyRefExpr(ObjCPropertyRefExpr *E) {
 
 void ASTStmtWriter::VisitObjCSubscriptRefExpr(ObjCSubscriptRefExpr *E) {
   VisitExpr(E);
-  Writer.AddSourceLocation(E->getRBracket(), Record);
-  Writer.AddStmt(E->getBaseExpr());
-  Writer.AddStmt(E->getKeyExpr());
-  Writer.AddDeclRef(E->getAtIndexMethodDecl(), Record);
-  Writer.AddDeclRef(E->setAtIndexMethodDecl(), Record);
+  Record.AddSourceLocation(E->getRBracket());
+  Record.AddStmt(E->getBaseExpr());
+  Record.AddStmt(E->getKeyExpr());
+  Record.AddDeclRef(E->getAtIndexMethodDecl());
+  Record.AddDeclRef(E->setAtIndexMethodDecl());
   
   Code = serialization::EXPR_OBJC_SUBSCRIPT_REF_EXPR;
 }
@@ -1034,155 +1044,163 @@ void ASTStmtWriter::VisitObjCMessageExpr(ObjCMessageExpr *E) {
   Record.push_back((unsigned)E->getReceiverKind()); // FIXME: stable encoding
   switch (E->getReceiverKind()) {
   case ObjCMessageExpr::Instance:
-    Writer.AddStmt(E->getInstanceReceiver());
+    Record.AddStmt(E->getInstanceReceiver());
     break;
 
   case ObjCMessageExpr::Class:
-    Writer.AddTypeSourceInfo(E->getClassReceiverTypeInfo(), Record);
+    Record.AddTypeSourceInfo(E->getClassReceiverTypeInfo());
     break;
 
   case ObjCMessageExpr::SuperClass:
   case ObjCMessageExpr::SuperInstance:
-    Writer.AddTypeRef(E->getSuperType(), Record);
-    Writer.AddSourceLocation(E->getSuperLoc(), Record);
+    Record.AddTypeRef(E->getSuperType());
+    Record.AddSourceLocation(E->getSuperLoc());
     break;
   }
 
   if (E->getMethodDecl()) {
     Record.push_back(1);
-    Writer.AddDeclRef(E->getMethodDecl(), Record);
+    Record.AddDeclRef(E->getMethodDecl());
   } else {
     Record.push_back(0);
-    Writer.AddSelectorRef(E->getSelector(), Record);    
+    Record.AddSelectorRef(E->getSelector());    
   }
     
-  Writer.AddSourceLocation(E->getLeftLoc(), Record);
-  Writer.AddSourceLocation(E->getRightLoc(), Record);
+  Record.AddSourceLocation(E->getLeftLoc());
+  Record.AddSourceLocation(E->getRightLoc());
 
   for (CallExpr::arg_iterator Arg = E->arg_begin(), ArgEnd = E->arg_end();
        Arg != ArgEnd; ++Arg)
-    Writer.AddStmt(*Arg);
+    Record.AddStmt(*Arg);
 
   SourceLocation *Locs = E->getStoredSelLocs();
   for (unsigned i = 0, e = E->getNumStoredSelLocs(); i != e; ++i)
-    Writer.AddSourceLocation(Locs[i], Record);
+    Record.AddSourceLocation(Locs[i]);
 
   Code = serialization::EXPR_OBJC_MESSAGE_EXPR;
 }
 
 void ASTStmtWriter::VisitObjCForCollectionStmt(ObjCForCollectionStmt *S) {
   VisitStmt(S);
-  Writer.AddStmt(S->getElement());
-  Writer.AddStmt(S->getCollection());
-  Writer.AddStmt(S->getBody());
-  Writer.AddSourceLocation(S->getForLoc(), Record);
-  Writer.AddSourceLocation(S->getRParenLoc(), Record);
+  Record.AddStmt(S->getElement());
+  Record.AddStmt(S->getCollection());
+  Record.AddStmt(S->getBody());
+  Record.AddSourceLocation(S->getForLoc());
+  Record.AddSourceLocation(S->getRParenLoc());
   Code = serialization::STMT_OBJC_FOR_COLLECTION;
 }
 
 void ASTStmtWriter::VisitObjCAtCatchStmt(ObjCAtCatchStmt *S) {
-  Writer.AddStmt(S->getCatchBody());
-  Writer.AddDeclRef(S->getCatchParamDecl(), Record);
-  Writer.AddSourceLocation(S->getAtCatchLoc(), Record);
-  Writer.AddSourceLocation(S->getRParenLoc(), Record);
+  Record.AddStmt(S->getCatchBody());
+  Record.AddDeclRef(S->getCatchParamDecl());
+  Record.AddSourceLocation(S->getAtCatchLoc());
+  Record.AddSourceLocation(S->getRParenLoc());
   Code = serialization::STMT_OBJC_CATCH;
 }
 
 void ASTStmtWriter::VisitObjCAtFinallyStmt(ObjCAtFinallyStmt *S) {
-  Writer.AddStmt(S->getFinallyBody());
-  Writer.AddSourceLocation(S->getAtFinallyLoc(), Record);
+  Record.AddStmt(S->getFinallyBody());
+  Record.AddSourceLocation(S->getAtFinallyLoc());
   Code = serialization::STMT_OBJC_FINALLY;
 }
 
 void ASTStmtWriter::VisitObjCAutoreleasePoolStmt(ObjCAutoreleasePoolStmt *S) {
-  Writer.AddStmt(S->getSubStmt());
-  Writer.AddSourceLocation(S->getAtLoc(), Record);
+  Record.AddStmt(S->getSubStmt());
+  Record.AddSourceLocation(S->getAtLoc());
   Code = serialization::STMT_OBJC_AUTORELEASE_POOL;
 }
 
 void ASTStmtWriter::VisitObjCAtTryStmt(ObjCAtTryStmt *S) {
   Record.push_back(S->getNumCatchStmts());
   Record.push_back(S->getFinallyStmt() != nullptr);
-  Writer.AddStmt(S->getTryBody());
+  Record.AddStmt(S->getTryBody());
   for (unsigned I = 0, N = S->getNumCatchStmts(); I != N; ++I)
-    Writer.AddStmt(S->getCatchStmt(I));
+    Record.AddStmt(S->getCatchStmt(I));
   if (S->getFinallyStmt())
-    Writer.AddStmt(S->getFinallyStmt());
-  Writer.AddSourceLocation(S->getAtTryLoc(), Record);
+    Record.AddStmt(S->getFinallyStmt());
+  Record.AddSourceLocation(S->getAtTryLoc());
   Code = serialization::STMT_OBJC_AT_TRY;
 }
 
 void ASTStmtWriter::VisitObjCAtSynchronizedStmt(ObjCAtSynchronizedStmt *S) {
-  Writer.AddStmt(S->getSynchExpr());
-  Writer.AddStmt(S->getSynchBody());
-  Writer.AddSourceLocation(S->getAtSynchronizedLoc(), Record);
+  Record.AddStmt(S->getSynchExpr());
+  Record.AddStmt(S->getSynchBody());
+  Record.AddSourceLocation(S->getAtSynchronizedLoc());
   Code = serialization::STMT_OBJC_AT_SYNCHRONIZED;
 }
 
 void ASTStmtWriter::VisitObjCAtThrowStmt(ObjCAtThrowStmt *S) {
-  Writer.AddStmt(S->getThrowExpr());
-  Writer.AddSourceLocation(S->getThrowLoc(), Record);
+  Record.AddStmt(S->getThrowExpr());
+  Record.AddSourceLocation(S->getThrowLoc());
   Code = serialization::STMT_OBJC_AT_THROW;
 }
 
 void ASTStmtWriter::VisitObjCBoolLiteralExpr(ObjCBoolLiteralExpr *E) {
   VisitExpr(E);
   Record.push_back(E->getValue());
-  Writer.AddSourceLocation(E->getLocation(), Record);
+  Record.AddSourceLocation(E->getLocation());
   Code = serialization::EXPR_OBJC_BOOL_LITERAL;
 }
 
+void ASTStmtWriter::VisitObjCAvailabilityCheckExpr(ObjCAvailabilityCheckExpr *E) {
+  VisitExpr(E);
+  Record.AddSourceRange(E->getSourceRange());
+  Record.AddVersionTuple(E->getVersion());
+  Code = serialization::EXPR_OBJC_AVAILABILITY_CHECK;
+}
+
 //===----------------------------------------------------------------------===//
 // C++ Expressions and Statements.
 //===----------------------------------------------------------------------===//
 
 void ASTStmtWriter::VisitCXXCatchStmt(CXXCatchStmt *S) {
   VisitStmt(S);
-  Writer.AddSourceLocation(S->getCatchLoc(), Record);
-  Writer.AddDeclRef(S->getExceptionDecl(), Record);
-  Writer.AddStmt(S->getHandlerBlock());
+  Record.AddSourceLocation(S->getCatchLoc());
+  Record.AddDeclRef(S->getExceptionDecl());
+  Record.AddStmt(S->getHandlerBlock());
   Code = serialization::STMT_CXX_CATCH;
 }
 
 void ASTStmtWriter::VisitCXXTryStmt(CXXTryStmt *S) {
   VisitStmt(S);
   Record.push_back(S->getNumHandlers());
-  Writer.AddSourceLocation(S->getTryLoc(), Record);
-  Writer.AddStmt(S->getTryBlock());
+  Record.AddSourceLocation(S->getTryLoc());
+  Record.AddStmt(S->getTryBlock());
   for (unsigned i = 0, e = S->getNumHandlers(); i != e; ++i)
-    Writer.AddStmt(S->getHandler(i));
+    Record.AddStmt(S->getHandler(i));
   Code = serialization::STMT_CXX_TRY;
 }
 
 void ASTStmtWriter::VisitCXXForRangeStmt(CXXForRangeStmt *S) {
   VisitStmt(S);
-  Writer.AddSourceLocation(S->getForLoc(), Record);
-  Writer.AddSourceLocation(S->getCoawaitLoc(), Record);
-  Writer.AddSourceLocation(S->getColonLoc(), Record);
-  Writer.AddSourceLocation(S->getRParenLoc(), Record);
-  Writer.AddStmt(S->getRangeStmt());
-  Writer.AddStmt(S->getBeginEndStmt());
-  Writer.AddStmt(S->getCond());
-  Writer.AddStmt(S->getInc());
-  Writer.AddStmt(S->getLoopVarStmt());
-  Writer.AddStmt(S->getBody());
+  Record.AddSourceLocation(S->getForLoc());
+  Record.AddSourceLocation(S->getCoawaitLoc());
+  Record.AddSourceLocation(S->getColonLoc());
+  Record.AddSourceLocation(S->getRParenLoc());
+  Record.AddStmt(S->getRangeStmt());
+  Record.AddStmt(S->getBeginStmt());
+  Record.AddStmt(S->getEndStmt());
+  Record.AddStmt(S->getCond());
+  Record.AddStmt(S->getInc());
+  Record.AddStmt(S->getLoopVarStmt());
+  Record.AddStmt(S->getBody());
   Code = serialization::STMT_CXX_FOR_RANGE;
 }
 
 void ASTStmtWriter::VisitMSDependentExistsStmt(MSDependentExistsStmt *S) {
   VisitStmt(S);
-  Writer.AddSourceLocation(S->getKeywordLoc(), Record);
+  Record.AddSourceLocation(S->getKeywordLoc());
   Record.push_back(S->isIfExists());
-  Writer.AddNestedNameSpecifierLoc(S->getQualifierLoc(), Record);
-  Writer.AddDeclarationNameInfo(S->getNameInfo(), Record);
-  Writer.AddStmt(S->getSubStmt());
+  Record.AddNestedNameSpecifierLoc(S->getQualifierLoc());
+  Record.AddDeclarationNameInfo(S->getNameInfo());
+  Record.AddStmt(S->getSubStmt());
   Code = serialization::STMT_MS_DEPENDENT_EXISTS;
 }
 
 void ASTStmtWriter::VisitCXXOperatorCallExpr(CXXOperatorCallExpr *E) {
   VisitCallExpr(E);
   Record.push_back(E->getOperator());
-  Writer.AddSourceRange(E->Range, Record);
+  Record.AddSourceRange(E->Range);
   Record.push_back(E->isFPContractable());
   Code = serialization::EXPR_CXX_OPERATOR_CALL;
 }
@@ -1196,22 +1214,31 @@ void ASTStmtWriter::VisitCXXConstructExpr(CXXConstructExpr *E) {
   VisitExpr(E);
   Record.push_back(E->getNumArgs());
   for (unsigned I = 0, N = E->getNumArgs(); I != N; ++I)
-    Writer.AddStmt(E->getArg(I));
-  Writer.AddDeclRef(E->getConstructor(), Record);
-  Writer.AddSourceLocation(E->getLocation(), Record);
+    Record.AddStmt(E->getArg(I));
+  Record.AddDeclRef(E->getConstructor());
+  Record.AddSourceLocation(E->getLocation());
   Record.push_back(E->isElidable());
   Record.push_back(E->hadMultipleCandidates());
   Record.push_back(E->isListInitialization());
   Record.push_back(E->isStdInitListInitialization());
   Record.push_back(E->requiresZeroInitialization());
   Record.push_back(E->getConstructionKind()); // FIXME: stable encoding
-  Writer.AddSourceRange(E->getParenOrBraceRange(), Record);
+  Record.AddSourceRange(E->getParenOrBraceRange());
   Code = serialization::EXPR_CXX_CONSTRUCT;
 }
 
+void ASTStmtWriter::VisitCXXInheritedCtorInitExpr(CXXInheritedCtorInitExpr *E) {
+  VisitExpr(E);
+  Record.AddDeclRef(E->getConstructor());
+  Record.AddSourceLocation(E->getLocation());
+  Record.push_back(E->constructsVBase());
+  Record.push_back(E->inheritedFromVBase());
+  Code = serialization::EXPR_CXX_INHERITED_CTOR_INIT;
+}
+
 void ASTStmtWriter::VisitCXXTemporaryObjectExpr(CXXTemporaryObjectExpr *E) {
   VisitCXXConstructExpr(E);
-  Writer.AddTypeSourceInfo(E->getTypeSourceInfo(), Record);
+  Record.AddTypeSourceInfo(E->getTypeSourceInfo());
   Code = serialization::EXPR_CXX_TEMPORARY_OBJECT;
 }
 
@@ -1222,18 +1249,18 @@ void ASTStmtWriter::VisitLambdaExpr(LambdaExpr *E) {
   if (E->HasArrayIndexVars)
     NumArrayIndexVars = E->getArrayIndexStarts()[E->NumCaptures];
   Record.push_back(NumArrayIndexVars);
-  Writer.AddSourceRange(E->IntroducerRange, Record);
+  Record.AddSourceRange(E->IntroducerRange);
   Record.push_back(E->CaptureDefault); // FIXME: stable encoding
-  Writer.AddSourceLocation(E->CaptureDefaultLoc, Record);
+  Record.AddSourceLocation(E->CaptureDefaultLoc);
   Record.push_back(E->ExplicitParams);
   Record.push_back(E->ExplicitResultType);
-  Writer.AddSourceLocation(E->ClosingBrace, Record);
+  Record.AddSourceLocation(E->ClosingBrace);
   
   // Add capture initializers.
   for (LambdaExpr::capture_init_iterator C = E->capture_init_begin(),
                                       CEnd = E->capture_init_end();
        C != CEnd; ++C) {
-    Writer.AddStmt(*C);
+    Record.AddStmt(*C);
   }
   
   // Add array index variables, if any.
@@ -1242,7 +1269,7 @@ void ASTStmtWriter::VisitLambdaExpr(LambdaExpr *E) {
                   E->getArrayIndexStarts() + E->NumCaptures + 1);
     VarDecl **ArrayIndexVars = E->getArrayIndexVars();
     for (unsigned I = 0; I != NumArrayIndexVars; ++I)
-      Writer.AddDeclRef(ArrayIndexVars[I], Record);
+      Record.AddDeclRef(ArrayIndexVars[I]);
   }
   
   Code = serialization::EXPR_LAMBDA;
@@ -1250,15 +1277,14 @@ void ASTStmtWriter::VisitLambdaExpr(LambdaExpr *E) {
 
 void ASTStmtWriter::VisitCXXStdInitializerListExpr(CXXStdInitializerListExpr *E) {
   VisitExpr(E);
-  Writer.AddStmt(E->getSubExpr());
+  Record.AddStmt(E->getSubExpr());
   Code = serialization::EXPR_CXX_STD_INITIALIZER_LIST;
 }
 
 void ASTStmtWriter::VisitCXXNamedCastExpr(CXXNamedCastExpr *E) {
   VisitExplicitCastExpr(E);
-  Writer.AddSourceRange(SourceRange(E->getOperatorLoc(), E->getRParenLoc()),
-                        Record);
-  Writer.AddSourceRange(E->getAngleBrackets(), Record);
+  Record.AddSourceRange(SourceRange(E->getOperatorLoc(), E->getRParenLoc()));
+  Record.AddSourceRange(E->getAngleBrackets());
 }
 
 void ASTStmtWriter::VisitCXXStaticCastExpr(CXXStaticCastExpr *E) {
@@ -1283,82 +1309,82 @@ void ASTStmtWriter::VisitCXXConstCastExpr(CXXConstCastExpr *E) {
 
 void ASTStmtWriter::VisitCXXFunctionalCastExpr(CXXFunctionalCastExpr *E) {
   VisitExplicitCastExpr(E);
-  Writer.AddSourceLocation(E->getLParenLoc(), Record);
-  Writer.AddSourceLocation(E->getRParenLoc(), Record);
+  Record.AddSourceLocation(E->getLParenLoc());
+  Record.AddSourceLocation(E->getRParenLoc());
   Code = serialization::EXPR_CXX_FUNCTIONAL_CAST;
 }
 
 void ASTStmtWriter::VisitUserDefinedLiteral(UserDefinedLiteral *E) {
   VisitCallExpr(E);
-  Writer.AddSourceLocation(E->UDSuffixLoc, Record);
+  Record.AddSourceLocation(E->UDSuffixLoc);
   Code = serialization::EXPR_USER_DEFINED_LITERAL;
 }
 
 void ASTStmtWriter::VisitCXXBoolLiteralExpr(CXXBoolLiteralExpr *E) {
   VisitExpr(E);
   Record.push_back(E->getValue());
-  Writer.AddSourceLocation(E->getLocation(), Record);
+  Record.AddSourceLocation(E->getLocation());
   Code = serialization::EXPR_CXX_BOOL_LITERAL;
 }
 
 void ASTStmtWriter::VisitCXXNullPtrLiteralExpr(CXXNullPtrLiteralExpr *E) {
   VisitExpr(E);
-  Writer.AddSourceLocation(E->getLocation(), Record);
+  Record.AddSourceLocation(E->getLocation());
   Code = serialization::EXPR_CXX_NULL_PTR_LITERAL;
 }
 
 void ASTStmtWriter::VisitCXXTypeidExpr(CXXTypeidExpr *E) {
   VisitExpr(E);
-  Writer.AddSourceRange(E->getSourceRange(), Record);
+  Record.AddSourceRange(E->getSourceRange());
   if (E->isTypeOperand()) {
-    Writer.AddTypeSourceInfo(E->getTypeOperandSourceInfo(), Record);
+    Record.AddTypeSourceInfo(E->getTypeOperandSourceInfo());
     Code = serialization::EXPR_CXX_TYPEID_TYPE;
   } else {
-    Writer.AddStmt(E->getExprOperand());
+    Record.AddStmt(E->getExprOperand());
     Code = serialization::EXPR_CXX_TYPEID_EXPR;
   }
 }
 
 void ASTStmtWriter::VisitCXXThisExpr(CXXThisExpr *E) {
   VisitExpr(E);
-  Writer.AddSourceLocation(E->getLocation(), Record);
+  Record.AddSourceLocation(E->getLocation());
   Record.push_back(E->isImplicit());
   Code = serialization::EXPR_CXX_THIS;
 }
 
 void ASTStmtWriter::VisitCXXThrowExpr(CXXThrowExpr *E) {
   VisitExpr(E);
-  Writer.AddSourceLocation(E->getThrowLoc(), Record);
-  Writer.AddStmt(E->getSubExpr());
+  Record.AddSourceLocation(E->getThrowLoc());
+  Record.AddStmt(E->getSubExpr());
   Record.push_back(E->isThrownVariableInScope());
   Code = serialization::EXPR_CXX_THROW;
 }
 
 void ASTStmtWriter::VisitCXXDefaultArgExpr(CXXDefaultArgExpr *E) {
   VisitExpr(E);
-  Writer.AddDeclRef(E->getParam(), Record);
-  Writer.AddSourceLocation(E->getUsedLocation(), Record);
+  Record.AddDeclRef(E->getParam());
+  Record.AddSourceLocation(E->getUsedLocation());
   Code = serialization::EXPR_CXX_DEFAULT_ARG;
 }
 
 void ASTStmtWriter::VisitCXXDefaultInitExpr(CXXDefaultInitExpr *E) {
   VisitExpr(E);
-  Writer.AddDeclRef(E->getField(), Record);
-  Writer.AddSourceLocation(E->getExprLoc(), Record);
+  Record.AddDeclRef(E->getField());
+  Record.AddSourceLocation(E->getExprLoc());
   Code = serialization::EXPR_CXX_DEFAULT_INIT;
 }
 
 void ASTStmtWriter::VisitCXXBindTemporaryExpr(CXXBindTemporaryExpr *E) {
   VisitExpr(E);
-  Writer.AddCXXTemporary(E->getTemporary(), Record);
-  Writer.AddStmt(E->getSubExpr());
+  Record.AddCXXTemporary(E->getTemporary());
+  Record.AddStmt(E->getSubExpr());
   Code = serialization::EXPR_CXX_BIND_TEMPORARY;
 }
 
 void ASTStmtWriter::VisitCXXScalarValueInitExpr(CXXScalarValueInitExpr *E) {
   VisitExpr(E);
-  Writer.AddTypeSourceInfo(E->getTypeSourceInfo(), Record);
-  Writer.AddSourceLocation(E->getRParenLoc(), Record);
+  Record.AddTypeSourceInfo(E->getTypeSourceInfo());
+  Record.AddSourceLocation(E->getRParenLoc());
   Code = serialization::EXPR_CXX_SCALAR_VALUE_INIT;
 }
 
@@ -1369,15 +1395,15 @@ void ASTStmtWriter::VisitCXXNewExpr(CXXNewExpr *E) {
   Record.push_back(E->doesUsualArrayDeleteWantSize());
   Record.push_back(E->getNumPlacementArgs());
   Record.push_back(E->StoredInitializationStyle);
-  Writer.AddDeclRef(E->getOperatorNew(), Record);
-  Writer.AddDeclRef(E->getOperatorDelete(), Record);
-  Writer.AddTypeSourceInfo(E->getAllocatedTypeSourceInfo(), Record);
-  Writer.AddSourceRange(E->getTypeIdParens(), Record);
-  Writer.AddSourceRange(E->getSourceRange(), Record);
-  Writer.AddSourceRange(E->getDirectInitRange(), Record);
+  Record.AddDeclRef(E->getOperatorNew());
+  Record.AddDeclRef(E->getOperatorDelete());
+  Record.AddTypeSourceInfo(E->getAllocatedTypeSourceInfo());
+  Record.AddSourceRange(E->getTypeIdParens());
+  Record.AddSourceRange(E->getSourceRange());
+  Record.AddSourceRange(E->getDirectInitRange());
   for (CXXNewExpr::arg_iterator I = E->raw_arg_begin(), e = E->raw_arg_end();
        I != e; ++I)
-    Writer.AddStmt(*I);
+    Record.AddStmt(*I);
 
   Code = serialization::EXPR_CXX_NEW;
 }
@@ -1388,9 +1414,9 @@ void ASTStmtWriter::VisitCXXDeleteExpr(CXXDeleteExpr *E) {
   Record.push_back(E->isArrayForm());
   Record.push_back(E->isArrayFormAsWritten());
   Record.push_back(E->doesUsualArrayDeleteWantSize());
-  Writer.AddDeclRef(E->getOperatorDelete(), Record);
-  Writer.AddStmt(E->getArgument());
-  Writer.AddSourceLocation(E->getSourceRange().getBegin(), Record);
+  Record.AddDeclRef(E->getOperatorDelete());
+  Record.AddStmt(E->getArgument());
+  Record.AddSourceLocation(E->getSourceRange().getBegin());
   
   Code = serialization::EXPR_CXX_DELETE;
 }
@@ -1398,20 +1424,20 @@ void ASTStmtWriter::VisitCXXDeleteExpr(CXXDeleteExpr *E) {
 void ASTStmtWriter::VisitCXXPseudoDestructorExpr(CXXPseudoDestructorExpr *E) {
   VisitExpr(E);
 
-  Writer.AddStmt(E->getBase());
+  Record.AddStmt(E->getBase());
   Record.push_back(E->isArrow());
-  Writer.AddSourceLocation(E->getOperatorLoc(), Record);
-  Writer.AddNestedNameSpecifierLoc(E->getQualifierLoc(), Record);
-  Writer.AddTypeSourceInfo(E->getScopeTypeInfo(), Record);
-  Writer.AddSourceLocation(E->getColonColonLoc(), Record);
-  Writer.AddSourceLocation(E->getTildeLoc(), Record);
+  Record.AddSourceLocation(E->getOperatorLoc());
+  Record.AddNestedNameSpecifierLoc(E->getQualifierLoc());
+  Record.AddTypeSourceInfo(E->getScopeTypeInfo());
+  Record.AddSourceLocation(E->getColonColonLoc());
+  Record.AddSourceLocation(E->getTildeLoc());
 
   // PseudoDestructorTypeStorage.
-  Writer.AddIdentifierRef(E->getDestroyedTypeIdentifier(), Record);
+  Record.AddIdentifierRef(E->getDestroyedTypeIdentifier());
   if (E->getDestroyedTypeIdentifier())
-    Writer.AddSourceLocation(E->getDestroyedTypeLoc(), Record);
+    Record.AddSourceLocation(E->getDestroyedTypeLoc());
   else
-    Writer.AddTypeSourceInfo(E->getDestroyedTypeInfo(), Record);
+    Record.AddTypeSourceInfo(E->getDestroyedTypeInfo());
 
   Code = serialization::EXPR_CXX_PSEUDO_DESTRUCTOR;
 }
@@ -1420,9 +1446,10 @@ void ASTStmtWriter::VisitExprWithCleanups(ExprWithCleanups *E) {
   VisitExpr(E);
   Record.push_back(E->getNumObjects());
   for (unsigned i = 0, e = E->getNumObjects(); i != e; ++i)
-    Writer.AddDeclRef(E->getObject(i), Record);
-  
-  Writer.AddStmt(E->getSubExpr());
+    Record.AddDeclRef(E->getObject(i));
+
+  Record.push_back(E->cleanupsHaveSideEffects());
+  Record.AddStmt(E->getSubExpr());
   Code = serialization::EXPR_EXPR_WITH_CLEANUPS;
 }
 
@@ -1443,15 +1470,15 @@ ASTStmtWriter::VisitCXXDependentScopeMemberExpr(CXXDependentScopeMemberExpr *E){
   }
 
   if (!E->isImplicitAccess())
-    Writer.AddStmt(E->getBase());
+    Record.AddStmt(E->getBase());
   else
-    Writer.AddStmt(nullptr);
-  Writer.AddTypeRef(E->getBaseType(), Record);
+    Record.AddStmt(nullptr);
+  Record.AddTypeRef(E->getBaseType());
   Record.push_back(E->isArrow());
-  Writer.AddSourceLocation(E->getOperatorLoc(), Record);
-  Writer.AddNestedNameSpecifierLoc(E->getQualifierLoc(), Record);
-  Writer.AddDeclRef(E->getFirstQualifierFoundInScope(), Record);
-  Writer.AddDeclarationNameInfo(E->MemberNameInfo, Record);
+  Record.AddSourceLocation(E->getOperatorLoc());
+  Record.AddNestedNameSpecifierLoc(E->getQualifierLoc());
+  Record.AddDeclRef(E->getFirstQualifierFoundInScope());
+  Record.AddDeclarationNameInfo(E->MemberNameInfo);
   Code = serialization::EXPR_CXX_DEPENDENT_SCOPE_MEMBER;
 }
 
@@ -1471,8 +1498,8 @@ ASTStmtWriter::VisitDependentScopeDeclRefExpr(DependentScopeDeclRefExpr *E) {
                              E->getTrailingObjects<TemplateArgumentLoc>());
   }
 
-  Writer.AddNestedNameSpecifierLoc(E->getQualifierLoc(), Record);
-  Writer.AddDeclarationNameInfo(E->NameInfo, Record);
+  Record.AddNestedNameSpecifierLoc(E->getQualifierLoc());
+  Record.AddDeclarationNameInfo(E->NameInfo);
   Code = serialization::EXPR_CXX_DEPENDENT_SCOPE_DECL_REF;
 }
 
@@ -1482,10 +1509,10 @@ ASTStmtWriter::VisitCXXUnresolvedConstructExpr(CXXUnresolvedConstructExpr *E) {
   Record.push_back(E->arg_size());
   for (CXXUnresolvedConstructExpr::arg_iterator
          ArgI = E->arg_begin(), ArgE = E->arg_end(); ArgI != ArgE; ++ArgI)
-    Writer.AddStmt(*ArgI);
-  Writer.AddTypeSourceInfo(E->getTypeSourceInfo(), Record);
-  Writer.AddSourceLocation(E->getLParenLoc(), Record);
-  Writer.AddSourceLocation(E->getRParenLoc(), Record);
+    Record.AddStmt(*ArgI);
+  Record.AddTypeSourceInfo(E->getTypeSourceInfo());
+  Record.AddSourceLocation(E->getLParenLoc());
+  Record.AddSourceLocation(E->getRParenLoc());
   Code = serialization::EXPR_CXX_UNRESOLVED_CONSTRUCT;
 }
 
@@ -1506,21 +1533,21 @@ void ASTStmtWriter::VisitOverloadExpr(OverloadExpr *E) {
   Record.push_back(E->getNumDecls());
   for (OverloadExpr::decls_iterator
          OvI = E->decls_begin(), OvE = E->decls_end(); OvI != OvE; ++OvI) {
-    Writer.AddDeclRef(OvI.getDecl(), Record);
+    Record.AddDeclRef(OvI.getDecl());
     Record.push_back(OvI.getAccess());
   }
 
-  Writer.AddDeclarationNameInfo(E->NameInfo, Record);
-  Writer.AddNestedNameSpecifierLoc(E->getQualifierLoc(), Record);
+  Record.AddDeclarationNameInfo(E->NameInfo);
+  Record.AddNestedNameSpecifierLoc(E->getQualifierLoc());
 }
 
 void ASTStmtWriter::VisitUnresolvedMemberExpr(UnresolvedMemberExpr *E) {
   VisitOverloadExpr(E);
   Record.push_back(E->isArrow());
   Record.push_back(E->hasUnresolvedUsing());
-  Writer.AddStmt(!E->isImplicitAccess() ? E->getBase() : nullptr);
-  Writer.AddTypeRef(E->getBaseType(), Record);
-  Writer.AddSourceLocation(E->getOperatorLoc(), Record);
+  Record.AddStmt(!E->isImplicitAccess() ? E->getBase() : nullptr);
+  Record.AddTypeRef(E->getBaseType());
+  Record.AddSourceLocation(E->getOperatorLoc());
   Code = serialization::EXPR_CXX_UNRESOLVED_MEMBER;
 }
 
@@ -1528,7 +1555,7 @@ void ASTStmtWriter::VisitUnresolvedLookupExpr(UnresolvedLookupExpr *E) {
   VisitOverloadExpr(E);
   Record.push_back(E->requiresADL());
   Record.push_back(E->isOverloaded());
-  Writer.AddDeclRef(E->getNamingClass(), Record);
+  Record.AddDeclRef(E->getNamingClass());
   Code = serialization::EXPR_CXX_UNRESOLVED_LOOKUP;
 }
 
@@ -1537,9 +1564,9 @@ void ASTStmtWriter::VisitTypeTraitExpr(TypeTraitExpr *E) {
   Record.push_back(E->TypeTraitExprBits.NumArgs);
   Record.push_back(E->TypeTraitExprBits.Kind); // FIXME: Stable encoding
   Record.push_back(E->TypeTraitExprBits.Value);
-  Writer.AddSourceRange(E->getSourceRange(), Record);
+  Record.AddSourceRange(E->getSourceRange());
   for (unsigned I = 0, N = E->getNumArgs(); I != N; ++I)
-    Writer.AddTypeSourceInfo(E->getArg(I), Record);
+    Record.AddTypeSourceInfo(E->getArg(I));
   Code = serialization::EXPR_TYPE_TRAIT;
 }
 
@@ -1547,8 +1574,8 @@ void ASTStmtWriter::VisitArrayTypeTraitExpr(ArrayTypeTraitExpr *E) {
   VisitExpr(E);
   Record.push_back(E->getTrait());
   Record.push_back(E->getValue());
-  Writer.AddSourceRange(E->getSourceRange(), Record);
-  Writer.AddTypeSourceInfo(E->getQueriedTypeSourceInfo(), Record);
+  Record.AddSourceRange(E->getSourceRange());
+  Record.AddTypeSourceInfo(E->getQueriedTypeSourceInfo());
   Code = serialization::EXPR_ARRAY_TYPE_TRAIT;
 }
 
@@ -1556,24 +1583,24 @@ void ASTStmtWriter::VisitExpressionTraitExpr(ExpressionTraitExpr *E) {
   VisitExpr(E);
   Record.push_back(E->getTrait());
   Record.push_back(E->getValue());
-  Writer.AddSourceRange(E->getSourceRange(), Record);
-  Writer.AddStmt(E->getQueriedExpression());
+  Record.AddSourceRange(E->getSourceRange());
+  Record.AddStmt(E->getQueriedExpression());
   Code = serialization::EXPR_CXX_EXPRESSION_TRAIT;
 }
 
 void ASTStmtWriter::VisitCXXNoexceptExpr(CXXNoexceptExpr *E) {
   VisitExpr(E);
   Record.push_back(E->getValue());
-  Writer.AddSourceRange(E->getSourceRange(), Record);
-  Writer.AddStmt(E->getOperand());
+  Record.AddSourceRange(E->getSourceRange());
+  Record.AddStmt(E->getOperand());
   Code = serialization::EXPR_CXX_NOEXCEPT;
 }
 
 void ASTStmtWriter::VisitPackExpansionExpr(PackExpansionExpr *E) {
   VisitExpr(E);
-  Writer.AddSourceLocation(E->getEllipsisLoc(), Record);
+  Record.AddSourceLocation(E->getEllipsisLoc());
   Record.push_back(E->NumExpansions);
-  Writer.AddStmt(E->getPattern());
+  Record.AddStmt(E->getPattern());
   Code = serialization::EXPR_PACK_EXPANSION;
 }
 
@@ -1581,13 +1608,13 @@ void ASTStmtWriter::VisitSizeOfPackExpr(SizeOfPackExpr *E) {
   VisitExpr(E);
   Record.push_back(E->isPartiallySubstituted() ? E->getPartialArguments().size()
                                                : 0);
-  Writer.AddSourceLocation(E->OperatorLoc, Record);
-  Writer.AddSourceLocation(E->PackLoc, Record);
-  Writer.AddSourceLocation(E->RParenLoc, Record);
-  Writer.AddDeclRef(E->Pack, Record);
+  Record.AddSourceLocation(E->OperatorLoc);
+  Record.AddSourceLocation(E->PackLoc);
+  Record.AddSourceLocation(E->RParenLoc);
+  Record.AddDeclRef(E->Pack);
   if (E->isPartiallySubstituted()) {
     for (const auto &TA : E->getPartialArguments())
-      Writer.AddTemplateArgument(TA, Record);
+      Record.AddTemplateArgument(TA);
   } else if (!E->isValueDependent()) {
     Record.push_back(E->getPackLength());
   }
@@ -1597,62 +1624,62 @@ void ASTStmtWriter::VisitSizeOfPackExpr(SizeOfPackExpr *E) {
 void ASTStmtWriter::VisitSubstNonTypeTemplateParmExpr(
                                               SubstNonTypeTemplateParmExpr *E) {
   VisitExpr(E);
-  Writer.AddDeclRef(E->getParameter(), Record);
-  Writer.AddSourceLocation(E->getNameLoc(), Record);
-  Writer.AddStmt(E->getReplacement());
+  Record.AddDeclRef(E->getParameter());
+  Record.AddSourceLocation(E->getNameLoc());
+  Record.AddStmt(E->getReplacement());
   Code = serialization::EXPR_SUBST_NON_TYPE_TEMPLATE_PARM;
 }
 
 void ASTStmtWriter::VisitSubstNonTypeTemplateParmPackExpr(
                                           SubstNonTypeTemplateParmPackExpr *E) {
   VisitExpr(E);
-  Writer.AddDeclRef(E->getParameterPack(), Record);
-  Writer.AddTemplateArgument(E->getArgumentPack(), Record);
-  Writer.AddSourceLocation(E->getParameterPackLocation(), Record);
+  Record.AddDeclRef(E->getParameterPack());
+  Record.AddTemplateArgument(E->getArgumentPack());
+  Record.AddSourceLocation(E->getParameterPackLocation());
   Code = serialization::EXPR_SUBST_NON_TYPE_TEMPLATE_PARM_PACK;
 }
 
 void ASTStmtWriter::VisitFunctionParmPackExpr(FunctionParmPackExpr *E) {
   VisitExpr(E);
   Record.push_back(E->getNumExpansions());
-  Writer.AddDeclRef(E->getParameterPack(), Record);
-  Writer.AddSourceLocation(E->getParameterPackLocation(), Record);
+  Record.AddDeclRef(E->getParameterPack());
+  Record.AddSourceLocation(E->getParameterPackLocation());
   for (FunctionParmPackExpr::iterator I = E->begin(), End = E->end();
        I != End; ++I)
-    Writer.AddDeclRef(*I, Record);
+    Record.AddDeclRef(*I);
   Code = serialization::EXPR_FUNCTION_PARM_PACK;
 }
 
 void ASTStmtWriter::VisitMaterializeTemporaryExpr(MaterializeTemporaryExpr *E) {
   VisitExpr(E);
-  Writer.AddStmt(E->getTemporary());
-  Writer.AddDeclRef(E->getExtendingDecl(), Record);
+  Record.AddStmt(E->getTemporary());
+  Record.AddDeclRef(E->getExtendingDecl());
   Record.push_back(E->getManglingNumber());
   Code = serialization::EXPR_MATERIALIZE_TEMPORARY;
 }
 
 void ASTStmtWriter::VisitCXXFoldExpr(CXXFoldExpr *E) {
   VisitExpr(E);
-  Writer.AddSourceLocation(E->LParenLoc, Record);
-  Writer.AddSourceLocation(E->EllipsisLoc, Record);
-  Writer.AddSourceLocation(E->RParenLoc, Record);
-  Writer.AddStmt(E->SubExprs[0]);
-  Writer.AddStmt(E->SubExprs[1]);
+  Record.AddSourceLocation(E->LParenLoc);
+  Record.AddSourceLocation(E->EllipsisLoc);
+  Record.AddSourceLocation(E->RParenLoc);
+  Record.AddStmt(E->SubExprs[0]);
+  Record.AddStmt(E->SubExprs[1]);
   Record.push_back(E->Opcode);
   Code = serialization::EXPR_CXX_FOLD;
 }
 
 void ASTStmtWriter::VisitOpaqueValueExpr(OpaqueValueExpr *E) {
   VisitExpr(E);
-  Writer.AddStmt(E->getSourceExpr());
-  Writer.AddSourceLocation(E->getLocation(), Record);
+  Record.AddStmt(E->getSourceExpr());
+  Record.AddSourceLocation(E->getLocation());
   Code = serialization::EXPR_OPAQUE_VALUE;
 }
 
 void ASTStmtWriter::VisitTypoExpr(TypoExpr *E) {
   VisitExpr(E);
   // TODO: Figure out sane writer behavior for a TypoExpr, if necessary
-  assert(false && "Cannot write TypoExpr nodes");
+  llvm_unreachable("Cannot write TypoExpr nodes");
 }
 
 //===----------------------------------------------------------------------===//
@@ -1661,7 +1688,7 @@ void ASTStmtWriter::VisitTypoExpr(TypoExpr *E) {
 
 void ASTStmtWriter::VisitCUDAKernelCallExpr(CUDAKernelCallExpr *E) {
   VisitCallExpr(E);
-  Writer.AddStmt(E->getConfig());
+  Record.AddStmt(E->getConfig());
   Code = serialization::EXPR_CUDA_KERNEL_CALL;
 }
 
@@ -1670,9 +1697,9 @@ void ASTStmtWriter::VisitCUDAKernelCallExpr(CUDAKernelCallExpr *E) {
 //===----------------------------------------------------------------------===//
 void ASTStmtWriter::VisitAsTypeExpr(AsTypeExpr *E) {
   VisitExpr(E);
-  Writer.AddSourceLocation(E->getBuiltinLoc(), Record);
-  Writer.AddSourceLocation(E->getRParenLoc(), Record);
-  Writer.AddStmt(E->getSrcExpr());
+  Record.AddSourceLocation(E->getBuiltinLoc());
+  Record.AddSourceLocation(E->getRParenLoc());
+  Record.AddStmt(E->getSrcExpr());
   Code = serialization::EXPR_ASTYPE;
 }
 
@@ -1682,60 +1709,61 @@ void ASTStmtWriter::VisitAsTypeExpr(AsTypeExpr *E) {
 void ASTStmtWriter::VisitMSPropertyRefExpr(MSPropertyRefExpr *E) {
   VisitExpr(E);
   Record.push_back(E->isArrow());
-  Writer.AddStmt(E->getBaseExpr());
-  Writer.AddNestedNameSpecifierLoc(E->getQualifierLoc(), Record);
-  Writer.AddSourceLocation(E->getMemberLoc(), Record);
-  Writer.AddDeclRef(E->getPropertyDecl(), Record);
+  Record.AddStmt(E->getBaseExpr());
+  Record.AddNestedNameSpecifierLoc(E->getQualifierLoc());
+  Record.AddSourceLocation(E->getMemberLoc());
+  Record.AddDeclRef(E->getPropertyDecl());
   Code = serialization::EXPR_CXX_PROPERTY_REF_EXPR;
 }
 
 void ASTStmtWriter::VisitMSPropertySubscriptExpr(MSPropertySubscriptExpr *E) {
   VisitExpr(E);
-  Writer.AddStmt(E->getBase());
-  Writer.AddStmt(E->getIdx());
-  Writer.AddSourceLocation(E->getRBracketLoc(), Record);
+  Record.AddStmt(E->getBase());
+  Record.AddStmt(E->getIdx());
+  Record.AddSourceLocation(E->getRBracketLoc());
   Code = serialization::EXPR_CXX_PROPERTY_SUBSCRIPT_EXPR;
 }
 
 void ASTStmtWriter::VisitCXXUuidofExpr(CXXUuidofExpr *E) {
   VisitExpr(E);
-  Writer.AddSourceRange(E->getSourceRange(), Record);
+  Record.AddSourceRange(E->getSourceRange());
+  Record.AddString(E->getUuidStr());
   if (E->isTypeOperand()) {
-    Writer.AddTypeSourceInfo(E->getTypeOperandSourceInfo(), Record);
+    Record.AddTypeSourceInfo(E->getTypeOperandSourceInfo());
     Code = serialization::EXPR_CXX_UUIDOF_TYPE;
   } else {
-    Writer.AddStmt(E->getExprOperand());
+    Record.AddStmt(E->getExprOperand());
     Code = serialization::EXPR_CXX_UUIDOF_EXPR;
   }
 }
 
 void ASTStmtWriter::VisitSEHExceptStmt(SEHExceptStmt *S) {
   VisitStmt(S);
-  Writer.AddSourceLocation(S->getExceptLoc(), Record);
-  Writer.AddStmt(S->getFilterExpr());
-  Writer.AddStmt(S->getBlock());
+  Record.AddSourceLocation(S->getExceptLoc());
+  Record.AddStmt(S->getFilterExpr());
+  Record.AddStmt(S->getBlock());
   Code = serialization::STMT_SEH_EXCEPT;
 }
 
 void ASTStmtWriter::VisitSEHFinallyStmt(SEHFinallyStmt *S) {
   VisitStmt(S);
-  Writer.AddSourceLocation(S->getFinallyLoc(), Record);
-  Writer.AddStmt(S->getBlock());
+  Record.AddSourceLocation(S->getFinallyLoc());
+  Record.AddStmt(S->getBlock());
   Code = serialization::STMT_SEH_FINALLY;
 }
 
 void ASTStmtWriter::VisitSEHTryStmt(SEHTryStmt *S) {
   VisitStmt(S);
   Record.push_back(S->getIsCXXTry());
-  Writer.AddSourceLocation(S->getTryLoc(), Record);
-  Writer.AddStmt(S->getTryBlock());
-  Writer.AddStmt(S->getHandler());
+  Record.AddSourceLocation(S->getTryLoc());
+  Record.AddStmt(S->getTryBlock());
+  Record.AddStmt(S->getHandler());
   Code = serialization::STMT_SEH_TRY;
 }
 
 void ASTStmtWriter::VisitSEHLeaveStmt(SEHLeaveStmt *S) {
   VisitStmt(S);
-  Writer.AddSourceLocation(S->getLeaveLoc(), Record);
+  Record.AddSourceLocation(S->getLeaveLoc());
   Code = serialization::STMT_SEH_LEAVE;
 }
 
@@ -1745,86 +1773,95 @@ void ASTStmtWriter::VisitSEHLeaveStmt(SEHLeaveStmt *S) {
 
 namespace clang {
 class OMPClauseWriter : public OMPClauseVisitor<OMPClauseWriter> {
-  ASTStmtWriter *Writer;
-  ASTWriter::RecordData &Record;
+  ASTRecordWriter &Record;
 public:
-  OMPClauseWriter(ASTStmtWriter *W, ASTWriter::RecordData &Record)
-    : Writer(W), Record(Record) { }
+  OMPClauseWriter(ASTRecordWriter &Record) : Record(Record) {}
 #define OPENMP_CLAUSE(Name, Class)    \
   void Visit##Class(Class *S);
 #include "clang/Basic/OpenMPKinds.def"
   void writeClause(OMPClause *C);
+  void VisitOMPClauseWithPreInit(OMPClauseWithPreInit *C);
+  void VisitOMPClauseWithPostUpdate(OMPClauseWithPostUpdate *C);
 };
 }
 
 void OMPClauseWriter::writeClause(OMPClause *C) {
   Record.push_back(C->getClauseKind());
   Visit(C);
-  Writer->Writer.AddSourceLocation(C->getLocStart(), Record);
-  Writer->Writer.AddSourceLocation(C->getLocEnd(), Record);
+  Record.AddSourceLocation(C->getLocStart());
+  Record.AddSourceLocation(C->getLocEnd());
+}
+
+void OMPClauseWriter::VisitOMPClauseWithPreInit(OMPClauseWithPreInit *C) {
+  Record.AddStmt(C->getPreInitStmt());
+}
+
+void OMPClauseWriter::VisitOMPClauseWithPostUpdate(OMPClauseWithPostUpdate *C) {
+  VisitOMPClauseWithPreInit(C);
+  Record.AddStmt(C->getPostUpdateExpr());
 }
 
 void OMPClauseWriter::VisitOMPIfClause(OMPIfClause *C) {
   Record.push_back(C->getNameModifier());
-  Writer->Writer.AddSourceLocation(C->getNameModifierLoc(), Record);
-  Writer->Writer.AddSourceLocation(C->getColonLoc(), Record);
-  Writer->Writer.AddStmt(C->getCondition());
-  Writer->Writer.AddSourceLocation(C->getLParenLoc(), Record);
+  Record.AddSourceLocation(C->getNameModifierLoc());
+  Record.AddSourceLocation(C->getColonLoc());
+  Record.AddStmt(C->getCondition());
+  Record.AddSourceLocation(C->getLParenLoc());
 }
 
 void OMPClauseWriter::VisitOMPFinalClause(OMPFinalClause *C) {
-  Writer->Writer.AddStmt(C->getCondition());
-  Writer->Writer.AddSourceLocation(C->getLParenLoc(), Record);
+  Record.AddStmt(C->getCondition());
+  Record.AddSourceLocation(C->getLParenLoc());
 }
 
 void OMPClauseWriter::VisitOMPNumThreadsClause(OMPNumThreadsClause *C) {
-  Writer->Writer.AddStmt(C->getNumThreads());
-  Writer->Writer.AddSourceLocation(C->getLParenLoc(), Record);
+  Record.AddStmt(C->getNumThreads());
+  Record.AddSourceLocation(C->getLParenLoc());
 }
 
 void OMPClauseWriter::VisitOMPSafelenClause(OMPSafelenClause *C) {
-  Writer->Writer.AddStmt(C->getSafelen());
-  Writer->Writer.AddSourceLocation(C->getLParenLoc(), Record);
+  Record.AddStmt(C->getSafelen());
+  Record.AddSourceLocation(C->getLParenLoc());
 }
 
 void OMPClauseWriter::VisitOMPSimdlenClause(OMPSimdlenClause *C) {
-  Writer->Writer.AddStmt(C->getSimdlen());
-  Writer->Writer.AddSourceLocation(C->getLParenLoc(), Record);
+  Record.AddStmt(C->getSimdlen());
+  Record.AddSourceLocation(C->getLParenLoc());
 }
 
 void OMPClauseWriter::VisitOMPCollapseClause(OMPCollapseClause *C) {
-  Writer->Writer.AddStmt(C->getNumForLoops());
-  Writer->Writer.AddSourceLocation(C->getLParenLoc(), Record);
+  Record.AddStmt(C->getNumForLoops());
+  Record.AddSourceLocation(C->getLParenLoc());
 }
 
 void OMPClauseWriter::VisitOMPDefaultClause(OMPDefaultClause *C) {
   Record.push_back(C->getDefaultKind());
-  Writer->Writer.AddSourceLocation(C->getLParenLoc(), Record);
-  Writer->Writer.AddSourceLocation(C->getDefaultKindKwLoc(), Record);
+  Record.AddSourceLocation(C->getLParenLoc());
+  Record.AddSourceLocation(C->getDefaultKindKwLoc());
 }
 
 void OMPClauseWriter::VisitOMPProcBindClause(OMPProcBindClause *C) {
   Record.push_back(C->getProcBindKind());
-  Writer->Writer.AddSourceLocation(C->getLParenLoc(), Record);
-  Writer->Writer.AddSourceLocation(C->getProcBindKindKwLoc(), Record);
+  Record.AddSourceLocation(C->getLParenLoc());
+  Record.AddSourceLocation(C->getProcBindKindKwLoc());
 }
 
 void OMPClauseWriter::VisitOMPScheduleClause(OMPScheduleClause *C) {
+  VisitOMPClauseWithPreInit(C);
   Record.push_back(C->getScheduleKind());
   Record.push_back(C->getFirstScheduleModifier());
   Record.push_back(C->getSecondScheduleModifier());
-  Writer->Writer.AddStmt(C->getChunkSize());
-  Writer->Writer.AddStmt(C->getHelperChunkSize());
-  Writer->Writer.AddSourceLocation(C->getLParenLoc(), Record);
-  Writer->Writer.AddSourceLocation(C->getFirstScheduleModifierLoc(), Record);
-  Writer->Writer.AddSourceLocation(C->getSecondScheduleModifierLoc(), Record);
-  Writer->Writer.AddSourceLocation(C->getScheduleKindLoc(), Record);
-  Writer->Writer.AddSourceLocation(C->getCommaLoc(), Record);
+  Record.AddStmt(C->getChunkSize());
+  Record.AddSourceLocation(C->getLParenLoc());
+  Record.AddSourceLocation(C->getFirstScheduleModifierLoc());
+  Record.AddSourceLocation(C->getSecondScheduleModifierLoc());
+  Record.AddSourceLocation(C->getScheduleKindLoc());
+  Record.AddSourceLocation(C->getCommaLoc());
 }
 
 void OMPClauseWriter::VisitOMPOrderedClause(OMPOrderedClause *C) {
-  Writer->Writer.AddStmt(C->getNumForLoops());
-  Writer->Writer.AddSourceLocation(C->getLParenLoc(), Record);
+  Record.AddStmt(C->getNumForLoops());
+  Record.AddSourceLocation(C->getLParenLoc());
 }
 
 void OMPClauseWriter::VisitOMPNowaitClause(OMPNowaitClause *) {}
@@ -1851,204 +1888,295 @@ void OMPClauseWriter::VisitOMPNogroupClause(OMPNogroupClause *) {}
 
 void OMPClauseWriter::VisitOMPPrivateClause(OMPPrivateClause *C) {
   Record.push_back(C->varlist_size());
-  Writer->Writer.AddSourceLocation(C->getLParenLoc(), Record);
+  Record.AddSourceLocation(C->getLParenLoc());
   for (auto *VE : C->varlists()) {
-    Writer->Writer.AddStmt(VE);
+    Record.AddStmt(VE);
   }
   for (auto *VE : C->private_copies()) {
-    Writer->Writer.AddStmt(VE);
+    Record.AddStmt(VE);
   }
 }
 
 void OMPClauseWriter::VisitOMPFirstprivateClause(OMPFirstprivateClause *C) {
   Record.push_back(C->varlist_size());
-  Writer->Writer.AddSourceLocation(C->getLParenLoc(), Record);
+  VisitOMPClauseWithPreInit(C);
+  Record.AddSourceLocation(C->getLParenLoc());
   for (auto *VE : C->varlists()) {
-    Writer->Writer.AddStmt(VE);
+    Record.AddStmt(VE);
   }
   for (auto *VE : C->private_copies()) {
-    Writer->Writer.AddStmt(VE);
+    Record.AddStmt(VE);
   }
   for (auto *VE : C->inits()) {
-    Writer->Writer.AddStmt(VE);
+    Record.AddStmt(VE);
   }
 }
 
 void OMPClauseWriter::VisitOMPLastprivateClause(OMPLastprivateClause *C) {
   Record.push_back(C->varlist_size());
-  Writer->Writer.AddSourceLocation(C->getLParenLoc(), Record);
+  VisitOMPClauseWithPostUpdate(C);
+  Record.AddSourceLocation(C->getLParenLoc());
   for (auto *VE : C->varlists())
-    Writer->Writer.AddStmt(VE);
+    Record.AddStmt(VE);
   for (auto *E : C->private_copies())
-    Writer->Writer.AddStmt(E);
+    Record.AddStmt(E);
   for (auto *E : C->source_exprs())
-    Writer->Writer.AddStmt(E);
+    Record.AddStmt(E);
   for (auto *E : C->destination_exprs())
-    Writer->Writer.AddStmt(E);
+    Record.AddStmt(E);
   for (auto *E : C->assignment_ops())
-    Writer->Writer.AddStmt(E);
+    Record.AddStmt(E);
 }
 
 void OMPClauseWriter::VisitOMPSharedClause(OMPSharedClause *C) {
   Record.push_back(C->varlist_size());
-  Writer->Writer.AddSourceLocation(C->getLParenLoc(), Record);
+  Record.AddSourceLocation(C->getLParenLoc());
   for (auto *VE : C->varlists())
-    Writer->Writer.AddStmt(VE);
+    Record.AddStmt(VE);
 }
 
 void OMPClauseWriter::VisitOMPReductionClause(OMPReductionClause *C) {
   Record.push_back(C->varlist_size());
-  Writer->Writer.AddSourceLocation(C->getLParenLoc(), Record);
-  Writer->Writer.AddSourceLocation(C->getColonLoc(), Record);
-  Writer->Writer.AddNestedNameSpecifierLoc(C->getQualifierLoc(), Record);
-  Writer->Writer.AddDeclarationNameInfo(C->getNameInfo(), Record);
+  VisitOMPClauseWithPostUpdate(C);
+  Record.AddSourceLocation(C->getLParenLoc());
+  Record.AddSourceLocation(C->getColonLoc());
+  Record.AddNestedNameSpecifierLoc(C->getQualifierLoc());
+  Record.AddDeclarationNameInfo(C->getNameInfo());
   for (auto *VE : C->varlists())
-    Writer->Writer.AddStmt(VE);
+    Record.AddStmt(VE);
   for (auto *VE : C->privates())
-    Writer->Writer.AddStmt(VE);
+    Record.AddStmt(VE);
   for (auto *E : C->lhs_exprs())
-    Writer->Writer.AddStmt(E);
+    Record.AddStmt(E);
   for (auto *E : C->rhs_exprs())
-    Writer->Writer.AddStmt(E);
+    Record.AddStmt(E);
   for (auto *E : C->reduction_ops())
-    Writer->Writer.AddStmt(E);
+    Record.AddStmt(E);
 }
 
 void OMPClauseWriter::VisitOMPLinearClause(OMPLinearClause *C) {
   Record.push_back(C->varlist_size());
-  Writer->Writer.AddSourceLocation(C->getLParenLoc(), Record);
-  Writer->Writer.AddSourceLocation(C->getColonLoc(), Record);
+  VisitOMPClauseWithPostUpdate(C);
+  Record.AddSourceLocation(C->getLParenLoc());
+  Record.AddSourceLocation(C->getColonLoc());
   Record.push_back(C->getModifier());
-  Writer->Writer.AddSourceLocation(C->getModifierLoc(), Record);
+  Record.AddSourceLocation(C->getModifierLoc());
   for (auto *VE : C->varlists()) {
-    Writer->Writer.AddStmt(VE);
+    Record.AddStmt(VE);
   }
   for (auto *VE : C->privates()) {
-    Writer->Writer.AddStmt(VE);
+    Record.AddStmt(VE);
   }
   for (auto *VE : C->inits()) {
-    Writer->Writer.AddStmt(VE);
+    Record.AddStmt(VE);
   }
   for (auto *VE : C->updates()) {
-    Writer->Writer.AddStmt(VE);
+    Record.AddStmt(VE);
   }
   for (auto *VE : C->finals()) {
-    Writer->Writer.AddStmt(VE);
+    Record.AddStmt(VE);
   }
-  Writer->Writer.AddStmt(C->getStep());
-  Writer->Writer.AddStmt(C->getCalcStep());
+  Record.AddStmt(C->getStep());
+  Record.AddStmt(C->getCalcStep());
 }
 
 void OMPClauseWriter::VisitOMPAlignedClause(OMPAlignedClause *C) {
   Record.push_back(C->varlist_size());
-  Writer->Writer.AddSourceLocation(C->getLParenLoc(), Record);
-  Writer->Writer.AddSourceLocation(C->getColonLoc(), Record);
+  Record.AddSourceLocation(C->getLParenLoc());
+  Record.AddSourceLocation(C->getColonLoc());
   for (auto *VE : C->varlists())
-    Writer->Writer.AddStmt(VE);
-  Writer->Writer.AddStmt(C->getAlignment());
+    Record.AddStmt(VE);
+  Record.AddStmt(C->getAlignment());
 }
 
 void OMPClauseWriter::VisitOMPCopyinClause(OMPCopyinClause *C) {
   Record.push_back(C->varlist_size());
-  Writer->Writer.AddSourceLocation(C->getLParenLoc(), Record);
+  Record.AddSourceLocation(C->getLParenLoc());
   for (auto *VE : C->varlists())
-    Writer->Writer.AddStmt(VE);
+    Record.AddStmt(VE);
   for (auto *E : C->source_exprs())
-    Writer->Writer.AddStmt(E);
+    Record.AddStmt(E);
   for (auto *E : C->destination_exprs())
-    Writer->Writer.AddStmt(E);
+    Record.AddStmt(E);
   for (auto *E : C->assignment_ops())
-    Writer->Writer.AddStmt(E);
+    Record.AddStmt(E);
 }
 
 void OMPClauseWriter::VisitOMPCopyprivateClause(OMPCopyprivateClause *C) {
   Record.push_back(C->varlist_size());
-  Writer->Writer.AddSourceLocation(C->getLParenLoc(), Record);
+  Record.AddSourceLocation(C->getLParenLoc());
   for (auto *VE : C->varlists())
-    Writer->Writer.AddStmt(VE);
+    Record.AddStmt(VE);
   for (auto *E : C->source_exprs())
-    Writer->Writer.AddStmt(E);
+    Record.AddStmt(E);
   for (auto *E : C->destination_exprs())
-    Writer->Writer.AddStmt(E);
+    Record.AddStmt(E);
   for (auto *E : C->assignment_ops())
-    Writer->Writer.AddStmt(E);
+    Record.AddStmt(E);
 }
 
 void OMPClauseWriter::VisitOMPFlushClause(OMPFlushClause *C) {
   Record.push_back(C->varlist_size());
-  Writer->Writer.AddSourceLocation(C->getLParenLoc(), Record);
+  Record.AddSourceLocation(C->getLParenLoc());
   for (auto *VE : C->varlists())
-    Writer->Writer.AddStmt(VE);
+    Record.AddStmt(VE);
 }
 
 void OMPClauseWriter::VisitOMPDependClause(OMPDependClause *C) {
   Record.push_back(C->varlist_size());
-  Writer->Writer.AddSourceLocation(C->getLParenLoc(), Record);
+  Record.AddSourceLocation(C->getLParenLoc());
   Record.push_back(C->getDependencyKind());
-  Writer->Writer.AddSourceLocation(C->getDependencyLoc(), Record);
-  Writer->Writer.AddSourceLocation(C->getColonLoc(), Record);
+  Record.AddSourceLocation(C->getDependencyLoc());
+  Record.AddSourceLocation(C->getColonLoc());
   for (auto *VE : C->varlists())
-    Writer->Writer.AddStmt(VE);
+    Record.AddStmt(VE);
+  Record.AddStmt(C->getCounterValue());
 }
 
 void OMPClauseWriter::VisitOMPDeviceClause(OMPDeviceClause *C) {
-  Writer->Writer.AddStmt(C->getDevice());
-  Writer->Writer.AddSourceLocation(C->getLParenLoc(), Record);
+  Record.AddStmt(C->getDevice());
+  Record.AddSourceLocation(C->getLParenLoc());
 }
 
 void OMPClauseWriter::VisitOMPMapClause(OMPMapClause *C) {
   Record.push_back(C->varlist_size());
-  Writer->Writer.AddSourceLocation(C->getLParenLoc(), Record);
+  Record.push_back(C->getUniqueDeclarationsNum());
+  Record.push_back(C->getTotalComponentListNum());
+  Record.push_back(C->getTotalComponentsNum());
+  Record.AddSourceLocation(C->getLParenLoc());
   Record.push_back(C->getMapTypeModifier());
   Record.push_back(C->getMapType());
-  Writer->Writer.AddSourceLocation(C->getMapLoc(), Record);
-  Writer->Writer.AddSourceLocation(C->getColonLoc(), Record);
-  for (auto *VE : C->varlists())
-    Writer->Writer.AddStmt(VE);
+  Record.AddSourceLocation(C->getMapLoc());
+  Record.AddSourceLocation(C->getColonLoc());
+  for (auto *E : C->varlists())
+    Record.AddStmt(E);
+  for (auto *D : C->all_decls())
+    Record.AddDeclRef(D);
+  for (auto N : C->all_num_lists())
+    Record.push_back(N);
+  for (auto N : C->all_lists_sizes())
+    Record.push_back(N);
+  for (auto &M : C->all_components()) {
+    Record.AddStmt(M.getAssociatedExpression());
+    Record.AddDeclRef(M.getAssociatedDeclaration());
+  }
 }
 
 void OMPClauseWriter::VisitOMPNumTeamsClause(OMPNumTeamsClause *C) {
-  Writer->Writer.AddStmt(C->getNumTeams());
-  Writer->Writer.AddSourceLocation(C->getLParenLoc(), Record);
+  Record.AddStmt(C->getNumTeams());
+  Record.AddSourceLocation(C->getLParenLoc());
 }
 
 void OMPClauseWriter::VisitOMPThreadLimitClause(OMPThreadLimitClause *C) {
-  Writer->Writer.AddStmt(C->getThreadLimit());
-  Writer->Writer.AddSourceLocation(C->getLParenLoc(), Record);
+  Record.AddStmt(C->getThreadLimit());
+  Record.AddSourceLocation(C->getLParenLoc());
 }
 
 void OMPClauseWriter::VisitOMPPriorityClause(OMPPriorityClause *C) {
-  Writer->Writer.AddStmt(C->getPriority());
-  Writer->Writer.AddSourceLocation(C->getLParenLoc(), Record);
+  Record.AddStmt(C->getPriority());
+  Record.AddSourceLocation(C->getLParenLoc());
 }
 
 void OMPClauseWriter::VisitOMPGrainsizeClause(OMPGrainsizeClause *C) {
-  Writer->Writer.AddStmt(C->getGrainsize());
-  Writer->Writer.AddSourceLocation(C->getLParenLoc(), Record);
+  Record.AddStmt(C->getGrainsize());
+  Record.AddSourceLocation(C->getLParenLoc());
 }
 
 void OMPClauseWriter::VisitOMPNumTasksClause(OMPNumTasksClause *C) {
-  Writer->Writer.AddStmt(C->getNumTasks());
-  Writer->Writer.AddSourceLocation(C->getLParenLoc(), Record);
+  Record.AddStmt(C->getNumTasks());
+  Record.AddSourceLocation(C->getLParenLoc());
 }
 
 void OMPClauseWriter::VisitOMPHintClause(OMPHintClause *C) {
-  Writer->Writer.AddStmt(C->getHint());
-  Writer->Writer.AddSourceLocation(C->getLParenLoc(), Record);
+  Record.AddStmt(C->getHint());
+  Record.AddSourceLocation(C->getLParenLoc());
+}
+
+void OMPClauseWriter::VisitOMPDistScheduleClause(OMPDistScheduleClause *C) {
+  VisitOMPClauseWithPreInit(C);
+  Record.push_back(C->getDistScheduleKind());
+  Record.AddStmt(C->getChunkSize());
+  Record.AddSourceLocation(C->getLParenLoc());
+  Record.AddSourceLocation(C->getDistScheduleKindLoc());
+  Record.AddSourceLocation(C->getCommaLoc());
+}
+
+void OMPClauseWriter::VisitOMPDefaultmapClause(OMPDefaultmapClause *C) {
+  Record.push_back(C->getDefaultmapKind());
+  Record.push_back(C->getDefaultmapModifier());
+  Record.AddSourceLocation(C->getLParenLoc());
+  Record.AddSourceLocation(C->getDefaultmapModifierLoc());
+  Record.AddSourceLocation(C->getDefaultmapKindLoc());
+}
+
+void OMPClauseWriter::VisitOMPToClause(OMPToClause *C) {
+  Record.push_back(C->varlist_size());
+  Record.push_back(C->getUniqueDeclarationsNum());
+  Record.push_back(C->getTotalComponentListNum());
+  Record.push_back(C->getTotalComponentsNum());
+  Record.AddSourceLocation(C->getLParenLoc());
+  for (auto *E : C->varlists())
+    Record.AddStmt(E);
+  for (auto *D : C->all_decls())
+    Record.AddDeclRef(D);
+  for (auto N : C->all_num_lists())
+    Record.push_back(N);
+  for (auto N : C->all_lists_sizes())
+    Record.push_back(N);
+  for (auto &M : C->all_components()) {
+    Record.AddStmt(M.getAssociatedExpression());
+    Record.AddDeclRef(M.getAssociatedDeclaration());
+  }
+}
+
+void OMPClauseWriter::VisitOMPFromClause(OMPFromClause *C) {
+  Record.push_back(C->varlist_size());
+  Record.push_back(C->getUniqueDeclarationsNum());
+  Record.push_back(C->getTotalComponentListNum());
+  Record.push_back(C->getTotalComponentsNum());
+  Record.AddSourceLocation(C->getLParenLoc());
+  for (auto *E : C->varlists())
+    Record.AddStmt(E);
+  for (auto *D : C->all_decls())
+    Record.AddDeclRef(D);
+  for (auto N : C->all_num_lists())
+    Record.push_back(N);
+  for (auto N : C->all_lists_sizes())
+    Record.push_back(N);
+  for (auto &M : C->all_components()) {
+    Record.AddStmt(M.getAssociatedExpression());
+    Record.AddDeclRef(M.getAssociatedDeclaration());
+  }
+}
+
+void OMPClauseWriter::VisitOMPUseDevicePtrClause(OMPUseDevicePtrClause *C) {
+  Record.push_back(C->varlist_size());
+  Record.AddSourceLocation(C->getLParenLoc());
+  for (auto *VE : C->varlists()) {
+    Record.AddStmt(VE);
+  }
+}
+
+void OMPClauseWriter::VisitOMPIsDevicePtrClause(OMPIsDevicePtrClause *C) {
+  Record.push_back(C->varlist_size());
+  Record.AddSourceLocation(C->getLParenLoc());
+  for (auto *VE : C->varlists()) {
+    Record.AddStmt(VE);
+  }
 }
 
 //===----------------------------------------------------------------------===//
 // OpenMP Directives.
 //===----------------------------------------------------------------------===//
 void ASTStmtWriter::VisitOMPExecutableDirective(OMPExecutableDirective *E) {
-  Writer.AddSourceLocation(E->getLocStart(), Record);
-  Writer.AddSourceLocation(E->getLocEnd(), Record);
-  OMPClauseWriter ClauseWriter(this, Record);
+  Record.AddSourceLocation(E->getLocStart());
+  Record.AddSourceLocation(E->getLocEnd());
+  OMPClauseWriter ClauseWriter(Record);
   for (unsigned i = 0; i < E->getNumClauses(); ++i) {
     ClauseWriter.writeClause(E->getClause(i));
   }
   if (E->hasAssociatedStmt())
-    Writer.AddStmt(E->getAssociatedStmt());
+    Record.AddStmt(E->getAssociatedStmt());
 }
 
 void ASTStmtWriter::VisitOMPLoopDirective(OMPLoopDirective *D) {
@@ -2056,36 +2184,44 @@ void ASTStmtWriter::VisitOMPLoopDirective(OMPLoopDirective *D) {
   Record.push_back(D->getNumClauses());
   Record.push_back(D->getCollapsedNumber());
   VisitOMPExecutableDirective(D);
-  Writer.AddStmt(D->getIterationVariable());
-  Writer.AddStmt(D->getLastIteration());
-  Writer.AddStmt(D->getCalcLastIteration());
-  Writer.AddStmt(D->getPreCond());
-  Writer.AddStmt(D->getCond());
-  Writer.AddStmt(D->getInit());
-  Writer.AddStmt(D->getInc());
-  if (isOpenMPWorksharingDirective(D->getDirectiveKind())) {
-    Writer.AddStmt(D->getIsLastIterVariable());
-    Writer.AddStmt(D->getLowerBoundVariable());
-    Writer.AddStmt(D->getUpperBoundVariable());
-    Writer.AddStmt(D->getStrideVariable());
-    Writer.AddStmt(D->getEnsureUpperBound());
-    Writer.AddStmt(D->getNextLowerBound());
-    Writer.AddStmt(D->getNextUpperBound());
+  Record.AddStmt(D->getIterationVariable());
+  Record.AddStmt(D->getLastIteration());
+  Record.AddStmt(D->getCalcLastIteration());
+  Record.AddStmt(D->getPreCond());
+  Record.AddStmt(D->getCond());
+  Record.AddStmt(D->getInit());
+  Record.AddStmt(D->getInc());
+  Record.AddStmt(D->getPreInits());
+  if (isOpenMPWorksharingDirective(D->getDirectiveKind()) ||
+      isOpenMPTaskLoopDirective(D->getDirectiveKind()) ||
+      isOpenMPDistributeDirective(D->getDirectiveKind())) {
+    Record.AddStmt(D->getIsLastIterVariable());
+    Record.AddStmt(D->getLowerBoundVariable());
+    Record.AddStmt(D->getUpperBoundVariable());
+    Record.AddStmt(D->getStrideVariable());
+    Record.AddStmt(D->getEnsureUpperBound());
+    Record.AddStmt(D->getNextLowerBound());
+    Record.AddStmt(D->getNextUpperBound());
+    Record.AddStmt(D->getNumIterations());
+  }
+  if (isOpenMPLoopBoundSharingDirective(D->getDirectiveKind())) {
+    Record.AddStmt(D->getPrevLowerBoundVariable());
+    Record.AddStmt(D->getPrevUpperBoundVariable());
   }
   for (auto I : D->counters()) {
-    Writer.AddStmt(I);
+    Record.AddStmt(I);
   }
   for (auto I : D->private_counters()) {
-    Writer.AddStmt(I);
+    Record.AddStmt(I);
   }
   for (auto I : D->inits()) {
-    Writer.AddStmt(I);
+    Record.AddStmt(I);
   }
   for (auto I : D->updates()) {
-    Writer.AddStmt(I);
+    Record.AddStmt(I);
   }
   for (auto I : D->finals()) {
-    Writer.AddStmt(I);
+    Record.AddStmt(I);
   }
 }
 
@@ -2145,7 +2281,7 @@ void ASTStmtWriter::VisitOMPCriticalDirective(OMPCriticalDirective *D) {
   VisitStmt(D);
   Record.push_back(D->getNumClauses());
   VisitOMPExecutableDirective(D);
-  Writer.AddDeclarationNameInfo(D->getDirectiveName(), Record);
+  Record.AddDeclarationNameInfo(D->getDirectiveName());
   Code = serialization::STMT_OMP_CRITICAL_DIRECTIVE;
 }
 
@@ -2182,10 +2318,10 @@ void ASTStmtWriter::VisitOMPAtomicDirective(OMPAtomicDirective *D) {
   VisitStmt(D);
   Record.push_back(D->getNumClauses());
   VisitOMPExecutableDirective(D);
-  Writer.AddStmt(D->getX());
-  Writer.AddStmt(D->getV());
-  Writer.AddStmt(D->getExpr());
-  Writer.AddStmt(D->getUpdateExpr());
+  Record.AddStmt(D->getX());
+  Record.AddStmt(D->getV());
+  Record.AddStmt(D->getExpr());
+  Record.AddStmt(D->getUpdateExpr());
   Record.push_back(D->isXLHSInRHSPart() ? 1 : 0);
   Record.push_back(D->isPostfixUpdate() ? 1 : 0);
   Code = serialization::STMT_OMP_ATOMIC_DIRECTIVE;
@@ -2205,6 +2341,37 @@ void ASTStmtWriter::VisitOMPTargetDataDirective(OMPTargetDataDirective *D) {
   Code = serialization::STMT_OMP_TARGET_DATA_DIRECTIVE;
 }
 
+void ASTStmtWriter::VisitOMPTargetEnterDataDirective(
+    OMPTargetEnterDataDirective *D) {
+  VisitStmt(D);
+  Record.push_back(D->getNumClauses());
+  VisitOMPExecutableDirective(D);
+  Code = serialization::STMT_OMP_TARGET_ENTER_DATA_DIRECTIVE;
+}
+
+void ASTStmtWriter::VisitOMPTargetExitDataDirective(
+    OMPTargetExitDataDirective *D) {
+  VisitStmt(D);
+  Record.push_back(D->getNumClauses());
+  VisitOMPExecutableDirective(D);
+  Code = serialization::STMT_OMP_TARGET_EXIT_DATA_DIRECTIVE;
+}
+
+void ASTStmtWriter::VisitOMPTargetParallelDirective(
+    OMPTargetParallelDirective *D) {
+  VisitStmt(D);
+  Record.push_back(D->getNumClauses());
+  VisitOMPExecutableDirective(D);
+  Code = serialization::STMT_OMP_TARGET_PARALLEL_DIRECTIVE;
+}
+
+void ASTStmtWriter::VisitOMPTargetParallelForDirective(
+    OMPTargetParallelForDirective *D) {
+  VisitOMPLoopDirective(D);
+  Record.push_back(D->hasCancel() ? 1 : 0);
+  Code = serialization::STMT_OMP_TARGET_PARALLEL_FOR_DIRECTIVE;
+}
+
 void ASTStmtWriter::VisitOMPTaskyieldDirective(OMPTaskyieldDirective *D) {
   VisitStmt(D);
   VisitOMPExecutableDirective(D);
@@ -2281,6 +2448,37 @@ void ASTStmtWriter::VisitOMPDistributeDirective(OMPDistributeDirective *D) {
   Code = serialization::STMT_OMP_DISTRIBUTE_DIRECTIVE;
 }
 
+void ASTStmtWriter::VisitOMPTargetUpdateDirective(OMPTargetUpdateDirective *D) {
+  VisitStmt(D);
+  Record.push_back(D->getNumClauses());
+  VisitOMPExecutableDirective(D);
+  Code = serialization::STMT_OMP_TARGET_UPDATE_DIRECTIVE;
+}
+
+void ASTStmtWriter::VisitOMPDistributeParallelForDirective(
+    OMPDistributeParallelForDirective *D) {
+  VisitOMPLoopDirective(D);
+  Code = serialization::STMT_OMP_DISTRIBUTE_PARALLEL_FOR_DIRECTIVE;
+}
+
+void ASTStmtWriter::VisitOMPDistributeParallelForSimdDirective(
+    OMPDistributeParallelForSimdDirective *D) {
+  VisitOMPLoopDirective(D);
+  Code = serialization::STMT_OMP_DISTRIBUTE_PARALLEL_FOR_SIMD_DIRECTIVE;
+}
+
+void ASTStmtWriter::VisitOMPDistributeSimdDirective(
+    OMPDistributeSimdDirective *D) {
+  VisitOMPLoopDirective(D);
+  Code = serialization::STMT_OMP_DISTRIBUTE_SIMD_DIRECTIVE;
+}
+
+void ASTStmtWriter::VisitOMPTargetParallelForSimdDirective(
+    OMPTargetParallelForSimdDirective *D) {
+  VisitOMPLoopDirective(D);
+  Code = serialization::STMT_OMP_TARGET_PARALLEL_FOR_SIMD_DIRECTIVE;
+}
+
 //===----------------------------------------------------------------------===//
 // ASTWriter Implementation
 //===----------------------------------------------------------------------===//
@@ -2305,9 +2503,7 @@ void ASTWriter::ClearSwitchCaseIDs() {
 
 /// \brief Write the given substatement or subexpression to the
 /// bitstream.
-void ASTWriter::WriteSubStmt(Stmt *S,
-                             llvm::DenseMap<Stmt *, uint64_t> &SubStmtEntries,
-                             llvm::DenseSet<Stmt *> &ParentStmts) {
+void ASTWriter::WriteSubStmt(Stmt *S) {
   RecordData Record;
   ASTStmtWriter Writer(*this, Record);
   ++NumStatements;
@@ -2343,61 +2539,44 @@ void ASTWriter::WriteSubStmt(Stmt *S,
   ParentStmtInserterRAII ParentStmtInserter(S, ParentStmts);
 #endif
 
-  // Redirect ASTWriter::AddStmt to collect sub-stmts.
-  SmallVector<Stmt *, 16> SubStmts;
-  CollectedStmts = &SubStmts;
-
-  Writer.Code = serialization::STMT_NULL_PTR;
-  Writer.AbbrevToUse = 0;
   Writer.Visit(S);
   
-#ifndef NDEBUG
-  if (Writer.Code == serialization::STMT_NULL_PTR) {
-    SourceManager &SrcMgr
-      = DeclIDs.begin()->first->getASTContext().getSourceManager();
-    S->dump(SrcMgr);
-    llvm_unreachable("Unhandled sub-statement writing AST file");
-  }
-#endif
-
-  // Revert ASTWriter::AddStmt.
-  CollectedStmts = &StmtsToEmit;
-
-  // Write the sub-stmts in reverse order, last to first. When reading them back
-  // we will read them in correct order by "pop"ing them from the Stmts stack.
-  // This simplifies reading and allows to store a variable number of sub-stmts
-  // without knowing it in advance.
-  while (!SubStmts.empty())
-    WriteSubStmt(SubStmts.pop_back_val(), SubStmtEntries, ParentStmts);
-  
-  Stream.EmitRecord(Writer.Code, Record, Writer.AbbrevToUse);
- 
-  SubStmtEntries[S] = Stream.GetCurrentBitNo();
+  uint64_t Offset = Writer.Emit();
+  SubStmtEntries[S] = Offset;
 }
 
 /// \brief Flush all of the statements that have been added to the
 /// queue via AddStmt().
-void ASTWriter::FlushStmts() {
-  RecordData Record;
-
+void ASTRecordWriter::FlushStmts() {
   // We expect to be the only consumer of the two temporary statement maps,
   // assert that they are empty.
-  assert(SubStmtEntries.empty() && "unexpected entries in sub-stmt map");
-  assert(ParentStmts.empty() && "unexpected entries in parent stmt map");
+  assert(Writer->SubStmtEntries.empty() && "unexpected entries in sub-stmt map");
+  assert(Writer->ParentStmts.empty() && "unexpected entries in parent stmt map");
 
   for (unsigned I = 0, N = StmtsToEmit.size(); I != N; ++I) {
-    WriteSubStmt(StmtsToEmit[I], SubStmtEntries, ParentStmts);
+    Writer->WriteSubStmt(StmtsToEmit[I]);
     
-    assert(N == StmtsToEmit.size() &&
-           "Substatement written via AddStmt rather than WriteSubStmt!");
+    assert(N == StmtsToEmit.size() && "record modified while being written!");
 
     // Note that we are at the end of a full expression. Any
     // expression records that follow this one are part of a different
     // expression.
-    Stream.EmitRecord(serialization::STMT_STOP, Record);
+    Writer->Stream.EmitRecord(serialization::STMT_STOP, ArrayRef<uint32_t>());
 
-    SubStmtEntries.clear();
-    ParentStmts.clear();
+    Writer->SubStmtEntries.clear();
+    Writer->ParentStmts.clear();
+  }
+
+  StmtsToEmit.clear();
+}
+
+void ASTRecordWriter::FlushSubStmts() {
+  // For a nested statement, write out the substatements in reverse order (so
+  // that a simple stack machine can be used when loading), and don't emit a
+  // STMT_STOP after each one.
+  for (unsigned I = 0, N = StmtsToEmit.size(); I != N; ++I) {
+    Writer->WriteSubStmt(StmtsToEmit[N - I - 1]);
+    assert(N == StmtsToEmit.size() && "record modified while being written!");
   }
 
   StmtsToEmit.clear();
diff --git a/contrib/llvm/tools/clang/lib/Serialization/GeneratePCH.cpp b/contrib/llvm/tools/clang/lib/Serialization/GeneratePCH.cpp
index 4a2255ab6d39..308fde8b1dd7 100644
--- a/contrib/llvm/tools/clang/lib/Serialization/GeneratePCH.cpp
+++ b/contrib/llvm/tools/clang/lib/Serialization/GeneratePCH.cpp
@@ -51,7 +51,10 @@ void PCHGenerator::HandleTranslationUnit(ASTContext &Ctx) {
   // Emit the PCH file to the Buffer.
   assert(SemaPtr && "No Sema?");
   Buffer->Signature =
-      Writer.WriteAST(*SemaPtr, OutputFile, Module, isysroot, hasErrors);
+      Writer.WriteAST(*SemaPtr, OutputFile, Module, isysroot,
+                      // For serialization we are lenient if the errors were
+                      // only warn-as-error kind.
+                      PP.getDiagnostics().hasUncompilableErrorOccurred());
 
   Buffer->IsComplete = true;
 }
diff --git a/contrib/llvm/tools/clang/lib/Serialization/GlobalModuleIndex.cpp b/contrib/llvm/tools/clang/lib/Serialization/GlobalModuleIndex.cpp
index af5f94a5cdc4..581e894d9150 100644
--- a/contrib/llvm/tools/clang/lib/Serialization/GlobalModuleIndex.cpp
+++ b/contrib/llvm/tools/clang/lib/Serialization/GlobalModuleIndex.cpp
@@ -354,7 +354,7 @@ void GlobalModuleIndex::printStats() {
   std::fprintf(stderr, "\n");
 }
 
-void GlobalModuleIndex::dump() {
+LLVM_DUMP_METHOD void GlobalModuleIndex::dump() {
   llvm::errs() << "*** Global Module Index Dump:\n";
   llvm::errs() << "Module files:\n";
   for (auto &MI : Modules) {
diff --git a/contrib/llvm/tools/clang/lib/Serialization/Module.cpp b/contrib/llvm/tools/clang/lib/Serialization/Module.cpp
index 4884f0b09480..ca0cb3c8ea17 100644
--- a/contrib/llvm/tools/clang/lib/Serialization/Module.cpp
+++ b/contrib/llvm/tools/clang/lib/Serialization/Module.cpp
@@ -37,8 +37,6 @@ ModuleFile::ModuleFile(ModuleKind Kind, unsigned Generation)
     LocalNumSelectors(0), SelectorOffsets(nullptr), BaseSelectorID(0),
     SelectorLookupTableData(nullptr), SelectorLookupTable(nullptr),
     LocalNumDecls(0), DeclOffsets(nullptr), BaseDeclID(0),
-    LocalNumCXXBaseSpecifiers(0), CXXBaseSpecifiersOffsets(nullptr),
-    LocalNumCXXCtorInitializers(0), CXXCtorInitializersOffsets(nullptr),
     FileSortedDecls(nullptr), NumFileSortedDecls(0),
     ObjCCategoriesMap(nullptr), LocalNumObjCCategoriesInMap(0),
     LocalNumTypes(0), TypeOffsets(nullptr), BaseTypeIndex(0)
@@ -65,7 +63,7 @@ dumpLocalRemap(StringRef Name,
   }
 }
 
-void ModuleFile::dump() {
+LLVM_DUMP_METHOD void ModuleFile::dump() {
   llvm::errs() << "\nModule: " << FileName << "\n";
   if (!Imports.empty()) {
     llvm::errs() << "  Imports: ";
diff --git a/contrib/llvm/tools/clang/lib/Serialization/ModuleManager.cpp b/contrib/llvm/tools/clang/lib/Serialization/ModuleManager.cpp
index 74f75a103f7a..292f36dfeb2a 100644
--- a/contrib/llvm/tools/clang/lib/Serialization/ModuleManager.cpp
+++ b/contrib/llvm/tools/clang/lib/Serialization/ModuleManager.cpp
@@ -320,11 +320,11 @@ void ModuleManager::visit(llvm::function_ref<bool(ModuleFile &M)> Visitor,
     Queue.reserve(N);
     llvm::SmallVector<unsigned, 4> UnusedIncomingEdges;
     UnusedIncomingEdges.resize(size());
-    for (auto M = rbegin(), MEnd = rend(); M != MEnd; ++M) {
-      unsigned Size = (*M)->ImportedBy.size();
-      UnusedIncomingEdges[(*M)->Index] = Size;
+    for (ModuleFile *M : llvm::reverse(*this)) {
+      unsigned Size = M->ImportedBy.size();
+      UnusedIncomingEdges[M->Index] = Size;
       if (!Size)
-        Queue.push_back(*M);
+        Queue.push_back(M);
     }
 
     // Traverse the graph, making sure to visit a module before visiting any
diff --git a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/AnalyzerStatsChecker.cpp b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/AnalyzerStatsChecker.cpp
index a052d83f5afa..64c30e7a82c1 100644
--- a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/AnalyzerStatsChecker.cpp
+++ b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/AnalyzerStatsChecker.cpp
@@ -43,7 +43,7 @@ void AnalyzerStatsChecker::checkEndAnalysis(ExplodedGraph &G,
                                             ExprEngine &Eng) const {
   const CFG *C = nullptr;
   const SourceManager &SM = B.getSourceManager();
-  llvm::SmallPtrSet<const CFGBlock*, 256> reachable;
+  llvm::SmallPtrSet<const CFGBlock*, 32> reachable;
 
   // Root node should have the location context of the top most function.
   const ExplodedNode *GraphRoot = *G.roots_begin();
diff --git a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
index f4de733bd794..13f0f655b89c 100644
--- a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
+++ b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
@@ -211,7 +211,7 @@ void ArrayBoundCheckerV2::reportOOB(CheckerContext &checkerContext,
       llvm::make_unique<BugReport>(*BT, os.str(), errorNode));
 }
 
-void RegionRawOffsetV2::dump() const {
+LLVM_DUMP_METHOD void RegionRawOffsetV2::dump() const {
   dumpToStream(llvm::errs());
 }
 
diff --git a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/BasicObjCFoundationChecks.cpp b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/BasicObjCFoundationChecks.cpp
index 26d42ba59c22..6239c5507a4b 100644
--- a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/BasicObjCFoundationChecks.cpp
+++ b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/BasicObjCFoundationChecks.cpp
@@ -132,7 +132,7 @@ namespace {
     void checkPostStmt(const ObjCArrayLiteral *AL,
                        CheckerContext &C) const;
   };
-}
+} // end anonymous namespace
 
 void NilArgChecker::warnIfNilExpr(const Expr *E,
                                   const char *Msg,
@@ -143,7 +143,6 @@ void NilArgChecker::warnIfNilExpr(const Expr *E,
     if (ExplodedNode *N = C.generateErrorNode()) {
       generateBugReport(N, Msg, E->getSourceRange(), E, C);
     }
-
   }
 }
 
@@ -530,6 +529,7 @@ namespace {
 class CFRetainReleaseChecker : public Checker< check::PreStmt<CallExpr> > {
   mutable std::unique_ptr<APIMisuse> BT;
   mutable IdentifierInfo *Retain, *Release, *MakeCollectable, *Autorelease;
+
 public:
   CFRetainReleaseChecker()
       : Retain(nullptr), Release(nullptr), MakeCollectable(nullptr),
@@ -538,7 +538,6 @@ public:
 };
 } // end anonymous namespace
 
-
 void CFRetainReleaseChecker::checkPreStmt(const CallExpr *CE,
                                           CheckerContext &C) const {
   // If the CallExpr doesn't have exactly 1 argument just give up checking.
@@ -631,11 +630,10 @@ class ClassReleaseChecker : public Checker<check::PreObjCMessage> {
 public:
   void checkPreObjCMessage(const ObjCMethodCall &msg, CheckerContext &C) const;
 };
-}
+} // end anonymous namespace
 
 void ClassReleaseChecker::checkPreObjCMessage(const ObjCMethodCall &msg,
                                               CheckerContext &C) const {
-
   if (!BT) {
     BT.reset(new APIMisuse(
         this, "message incorrectly sent to class instead of class instance"));
@@ -692,7 +690,7 @@ class VariadicMethodTypeChecker : public Checker<check::PreObjCMessage> {
 public:
   void checkPreObjCMessage(const ObjCMethodCall &msg, CheckerContext &C) const;
 };
-}
+} // end anonymous namespace
 
 /// isVariadicMessage - Returns whether the given message is a variadic message,
 /// where all arguments must be Objective-C types.
@@ -855,7 +853,7 @@ public:
                                      const CallEvent *Call,
                                      PointerEscapeKind Kind) const;
 };
-}
+} // end anonymous namespace
 
 static bool isKnownNonNilCollectionType(QualType T) {
   const ObjCObjectPointerType *PT = T->getAs<ObjCObjectPointerType>();
@@ -983,7 +981,6 @@ assumeCollectionNonEmpty(CheckerContext &C, ProgramStateRef State,
   return assumeCollectionNonEmpty(C, State, CollectionS, Assumption);
 }
 
-
 /// If the fist block edge is a back edge, we are reentering the loop.
 static bool alreadyExecutedAtLeastOneLoopIteration(const ExplodedNode *N,
                                              const ObjCForCollectionStmt *FCS) {
@@ -1080,7 +1077,6 @@ void ObjCLoopChecker::checkPostObjCMessage(const ObjCMethodCall &M,
 
     C.addTransition(State);
   }
-  return;
 }
 
 static SymbolRef getMethodReceiverIfKnownImmutable(const CallEvent *Call) {
@@ -1203,7 +1199,7 @@ public:
 
   void checkPostObjCMessage(const ObjCMethodCall &M, CheckerContext &C) const;
 };
-}
+} // end anonymous namespace
 
 ProgramStateRef
 ObjCNonNilReturnValueChecker::assumeExprIsNonNull(const Expr *NonNullExpr,
diff --git a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp
index 17537445d66c..e9512977fa6d 100644
--- a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp
+++ b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp
@@ -118,6 +118,10 @@ public:
 
   void evalStrsep(CheckerContext &C, const CallExpr *CE) const;
 
+  void evalStdCopy(CheckerContext &C, const CallExpr *CE) const;
+  void evalStdCopyBackward(CheckerContext &C, const CallExpr *CE) const;
+  void evalStdCopyCommon(CheckerContext &C, const CallExpr *CE) const;
+
   // Utility methods
   std::pair<ProgramStateRef , ProgramStateRef >
   static assumeZero(CheckerContext &C,
@@ -916,7 +920,7 @@ ProgramStateRef CStringChecker::InvalidateBuffer(CheckerContext &C,
     // Invalidate and escape only indirect regions accessible through the source
     // buffer.
     if (IsSourceBuffer) {
-      ITraits.setTrait(R,
+      ITraits.setTrait(R->getBaseRegion(),
                        RegionAndSymbolInvalidationTraits::TK_PreserveContents);
       ITraits.setTrait(R, RegionAndSymbolInvalidationTraits::TK_SuppressEscape);
       CausesPointerEscape = true;
@@ -1833,6 +1837,8 @@ void CStringChecker::evalStrcmpCommon(CheckerContext &C, const CallExpr *CE,
   const StringLiteral *s1StrLiteral = getCStringLiteral(C, state, s1, s1Val);
   const StringLiteral *s2StrLiteral = getCStringLiteral(C, state, s2, s2Val);
   bool canComputeResult = false;
+  SVal resultVal = svalBuilder.conjureSymbolVal(nullptr, CE, LCtx,
+                                                C.blockCount());
 
   if (s1StrLiteral && s2StrLiteral) {
     StringRef s1StrRef = s1StrLiteral->getString();
@@ -1866,28 +1872,29 @@ void CStringChecker::evalStrcmpCommon(CheckerContext &C, const CallExpr *CE,
         s2StrRef = s2StrRef.substr(0, s2Term);
 
       // Use StringRef's comparison methods to compute the actual result.
-      int result;
+      int compareRes = ignoreCase ? s1StrRef.compare_lower(s2StrRef)
+                                  : s1StrRef.compare(s2StrRef);
 
-      if (ignoreCase) {
-        // Compare string 1 to string 2 the same way strcasecmp() does.
-        result = s1StrRef.compare_lower(s2StrRef);
-      } else {
-        // Compare string 1 to string 2 the same way strcmp() does.
-        result = s1StrRef.compare(s2StrRef);
+      // The strcmp function returns an integer greater than, equal to, or less
+      // than zero, [c11, p7.24.4.2].
+      if (compareRes == 0) {
+        resultVal = svalBuilder.makeIntVal(compareRes, CE->getType());
+      }
+      else {
+        DefinedSVal zeroVal = svalBuilder.makeIntVal(0, CE->getType());
+        // Constrain strcmp's result range based on the result of StringRef's
+        // comparison methods.
+        BinaryOperatorKind op = (compareRes == 1) ? BO_GT : BO_LT;
+        SVal compareWithZero =
+          svalBuilder.evalBinOp(state, op, resultVal, zeroVal,
+                                svalBuilder.getConditionType());
+        DefinedSVal compareWithZeroVal = compareWithZero.castAs<DefinedSVal>();
+        state = state->assume(compareWithZeroVal, true);
       }
-
-      // Build the SVal of the comparison and bind the return value.
-      SVal resultVal = svalBuilder.makeIntVal(result, CE->getType());
-      state = state->BindExpr(CE, LCtx, resultVal);
     }
   }
 
-  if (!canComputeResult) {
-    // Conjure a symbolic value. It's the best we can do.
-    SVal resultVal = svalBuilder.conjureSymbolVal(nullptr, CE, LCtx,
-                                                  C.blockCount());
-    state = state->BindExpr(CE, LCtx, resultVal);
-  }
+  state = state->BindExpr(CE, LCtx, resultVal);
 
   // Record this as a possible path.
   C.addTransition(state);
@@ -1950,7 +1957,57 @@ void CStringChecker::evalStrsep(CheckerContext &C, const CallExpr *CE) const {
   C.addTransition(State);
 }
 
+// These should probably be moved into a C++ standard library checker.
+void CStringChecker::evalStdCopy(CheckerContext &C, const CallExpr *CE) const {
+  evalStdCopyCommon(C, CE);
+}
+
+void CStringChecker::evalStdCopyBackward(CheckerContext &C,
+                                         const CallExpr *CE) const {
+  evalStdCopyCommon(C, CE);
+}
+
+void CStringChecker::evalStdCopyCommon(CheckerContext &C,
+                                       const CallExpr *CE) const {
+  if (CE->getNumArgs() < 3)
+    return;
+
+  ProgramStateRef State = C.getState();
+
+  const LocationContext *LCtx = C.getLocationContext();
 
+  // template <class _InputIterator, class _OutputIterator>
+  // _OutputIterator
+  // copy(_InputIterator __first, _InputIterator __last,
+  //        _OutputIterator __result)
+
+  // Invalidate the destination buffer
+  const Expr *Dst = CE->getArg(2);
+  SVal DstVal = State->getSVal(Dst, LCtx);
+  State = InvalidateBuffer(C, State, Dst, DstVal, /*IsSource=*/false,
+                           /*Size=*/nullptr);
+
+  SValBuilder &SVB = C.getSValBuilder();
+
+  SVal ResultVal = SVB.conjureSymbolVal(nullptr, CE, LCtx, C.blockCount());
+  State = State->BindExpr(CE, LCtx, ResultVal);
+
+  C.addTransition(State);
+}
+
+static bool isCPPStdLibraryFunction(const FunctionDecl *FD, StringRef Name) {
+  IdentifierInfo *II = FD->getIdentifier();
+  if (!II)
+    return false;
+
+  if (!AnalysisDeclContext::isInStdNamespace(FD))
+    return false;
+
+  if (II->getName().equals(Name))
+    return true;
+
+  return false;
+}
 //===----------------------------------------------------------------------===//
 // The driver method, and other Checker callbacks.
 //===----------------------------------------------------------------------===//
@@ -1999,6 +2056,10 @@ bool CStringChecker::evalCall(const CallExpr *CE, CheckerContext &C) const {
     evalFunction =  &CStringChecker::evalBcopy;
   else if (C.isCLibraryFunction(FDecl, "bcmp"))
     evalFunction =  &CStringChecker::evalMemcmp;
+  else if (isCPPStdLibraryFunction(FDecl, "copy"))
+    evalFunction =  &CStringChecker::evalStdCopy;
+  else if (isCPPStdLibraryFunction(FDecl, "copy_backward"))
+    evalFunction =  &CStringChecker::evalStdCopyBackward;
 
   // If the callee isn't a string function, let another checker handle it.
   if (!evalFunction)
diff --git a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/CallAndMessageChecker.cpp b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/CallAndMessageChecker.cpp
index 145908376996..5126716fcded 100644
--- a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/CallAndMessageChecker.cpp
+++ b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/CallAndMessageChecker.cpp
@@ -313,7 +313,7 @@ void CallAndMessageChecker::checkPreStmt(const CallExpr *CE,
   if (L.isUndef()) {
     if (!BT_call_undef)
       BT_call_undef.reset(new BuiltinBug(
-          this, "Called function pointer is an uninitalized pointer value"));
+          this, "Called function pointer is an uninitialized pointer value"));
     emitBadCall(BT_call_undef.get(), C, Callee);
     return;
   }
diff --git a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/CheckObjCDealloc.cpp b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/CheckObjCDealloc.cpp
index 25caa0002598..9e863e79e41f 100644
--- a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/CheckObjCDealloc.cpp
+++ b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/CheckObjCDealloc.cpp
@@ -7,9 +7,24 @@
 //
 //===----------------------------------------------------------------------===//
 //
-//  This file defines a CheckObjCDealloc, a checker that
-//  analyzes an Objective-C class's implementation to determine if it
-//  correctly implements -dealloc.
+//  This checker analyzes Objective-C -dealloc methods and their callees
+//  to warn about improper releasing of instance variables that back synthesized
+// properties. It warns about missing releases in the following cases:
+//  - When a class has a synthesized instance variable for a 'retain' or 'copy'
+//    property and lacks a -dealloc method in its implementation.
+//  - When a class has a synthesized instance variable for a 'retain'/'copy'
+//   property but the ivar is not released in -dealloc by either -release
+//   or by nilling out the property.
+//
+//  It warns about extra releases in -dealloc (but not in callees) when a
+//  synthesized instance variable is released in the following cases:
+//  - When the property is 'assign' and is not 'readonly'.
+//  - When the property is 'weak'.
+//
+//  This checker only warns for instance variables synthesized to back
+//  properties. Handling the more general case would require inferring whether
+//  an instance variable is stored retained or not. For synthesized properties,
+//  this is specified in the property declaration itself.
 //
 //===----------------------------------------------------------------------===//
 
@@ -20,229 +35,1035 @@
 #include "clang/AST/ExprObjC.h"
 #include "clang/Basic/LangOptions.h"
 #include "clang/StaticAnalyzer/Core/BugReporter/BugReporter.h"
+#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
 #include "clang/StaticAnalyzer/Core/BugReporter/PathDiagnostic.h"
 #include "clang/StaticAnalyzer/Core/Checker.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/AnalysisManager.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/SymbolManager.h"
 #include "llvm/Support/raw_ostream.h"
 
 using namespace clang;
 using namespace ento;
 
-static bool scan_ivar_release(Stmt *S, ObjCIvarDecl *ID,
-                              const ObjCPropertyDecl *PD,
-                              Selector Release,
-                              IdentifierInfo* SelfII,
-                              ASTContext &Ctx) {
-
-  // [mMyIvar release]
-  if (ObjCMessageExpr *ME = dyn_cast<ObjCMessageExpr>(S))
-    if (ME->getSelector() == Release)
-      if (ME->getInstanceReceiver())
-        if (Expr *Receiver = ME->getInstanceReceiver()->IgnoreParenCasts())
-          if (ObjCIvarRefExpr *E = dyn_cast<ObjCIvarRefExpr>(Receiver))
-            if (E->getDecl() == ID)
-              return true;
-
-  // [self setMyIvar:nil];
-  if (ObjCMessageExpr *ME = dyn_cast<ObjCMessageExpr>(S))
-    if (ME->getInstanceReceiver())
-      if (Expr *Receiver = ME->getInstanceReceiver()->IgnoreParenCasts())
-        if (DeclRefExpr *E = dyn_cast<DeclRefExpr>(Receiver))
-          if (E->getDecl()->getIdentifier() == SelfII)
-            if (ME->getMethodDecl() == PD->getSetterMethodDecl() &&
-                ME->getNumArgs() == 1 &&
-                ME->getArg(0)->isNullPointerConstant(Ctx,
-                                              Expr::NPC_ValueDependentIsNull))
-              return true;
-
-  // self.myIvar = nil;
-  if (BinaryOperator* BO = dyn_cast<BinaryOperator>(S))
-    if (BO->isAssignmentOp())
-      if (ObjCPropertyRefExpr *PRE =
-           dyn_cast<ObjCPropertyRefExpr>(BO->getLHS()->IgnoreParenCasts()))
-        if (PRE->isExplicitProperty() && PRE->getExplicitProperty() == PD)
-            if (BO->getRHS()->isNullPointerConstant(Ctx,
-                                            Expr::NPC_ValueDependentIsNull)) {
-              // This is only a 'release' if the property kind is not
-              // 'assign'.
-              return PD->getSetterKind() != ObjCPropertyDecl::Assign;
-            }
-
-  // Recurse to children.
-  for (Stmt *SubStmt : S->children())
-    if (SubStmt && scan_ivar_release(SubStmt, ID, PD, Release, SelfII, Ctx))
-      return true;
+/// Indicates whether an instance variable is required to be released in
+/// -dealloc.
+enum class ReleaseRequirement {
+  /// The instance variable must be released, either by calling
+  /// -release on it directly or by nilling it out with a property setter.
+  MustRelease,
 
-  return false;
+  /// The instance variable must not be directly released with -release.
+  MustNotReleaseDirectly,
+
+  /// The requirement for the instance variable could not be determined.
+  Unknown
+};
+
+/// Returns true if the property implementation is synthesized and the
+/// type of the property is retainable.
+static bool isSynthesizedRetainableProperty(const ObjCPropertyImplDecl *I,
+                                            const ObjCIvarDecl **ID,
+                                            const ObjCPropertyDecl **PD) {
+
+  if (I->getPropertyImplementation() != ObjCPropertyImplDecl::Synthesize)
+    return false;
+
+  (*ID) = I->getPropertyIvarDecl();
+  if (!(*ID))
+    return false;
+
+  QualType T = (*ID)->getType();
+  if (!T->isObjCRetainableType())
+    return false;
+
+  (*PD) = I->getPropertyDecl();
+  // Shouldn't be able to synthesize a property that doesn't exist.
+  assert(*PD);
+
+  return true;
 }
 
-static void checkObjCDealloc(const CheckerBase *Checker,
-                             const ObjCImplementationDecl *D,
-                             const LangOptions &LOpts, BugReporter &BR) {
+namespace {
 
-  assert (LOpts.getGC() != LangOptions::GCOnly);
+class ObjCDeallocChecker
+    : public Checker<check::ASTDecl<ObjCImplementationDecl>,
+                     check::PreObjCMessage, check::PostObjCMessage,
+                     check::PreCall,
+                     check::BeginFunction, check::EndFunction,
+                     eval::Assume,
+                     check::PointerEscape,
+                     check::PreStmt<ReturnStmt>> {
 
-  ASTContext &Ctx = BR.getContext();
-  const ObjCInterfaceDecl *ID = D->getClassInterface();
+  mutable IdentifierInfo *NSObjectII, *SenTestCaseII, *XCTestCaseII,
+      *Block_releaseII, *CIFilterII;
 
-  // Does the class contain any ivars that are pointers (or id<...>)?
-  // If not, skip the check entirely.
-  // NOTE: This is motivated by PR 2517:
-  //        http://llvm.org/bugs/show_bug.cgi?id=2517
+  mutable Selector DeallocSel, ReleaseSel;
 
-  bool containsPointerIvar = false;
+  std::unique_ptr<BugType> MissingReleaseBugType;
+  std::unique_ptr<BugType> ExtraReleaseBugType;
+  std::unique_ptr<BugType> MistakenDeallocBugType;
 
-  for (const auto *Ivar : ID->ivars()) {
-    QualType T = Ivar->getType();
+public:
+  ObjCDeallocChecker();
 
-    if (!T->isObjCObjectPointerType() ||
-        Ivar->hasAttr<IBOutletAttr>() || // Skip IBOutlets.
-        Ivar->hasAttr<IBOutletCollectionAttr>()) // Skip IBOutletCollections.
-      continue;
+  void checkASTDecl(const ObjCImplementationDecl *D, AnalysisManager& Mgr,
+                    BugReporter &BR) const;
+  void checkBeginFunction(CheckerContext &Ctx) const;
+  void checkPreObjCMessage(const ObjCMethodCall &M, CheckerContext &C) const;
+  void checkPreCall(const CallEvent &Call, CheckerContext &C) const;
+  void checkPostObjCMessage(const ObjCMethodCall &M, CheckerContext &C) const;
 
-    containsPointerIvar = true;
-    break;
-  }
+  ProgramStateRef evalAssume(ProgramStateRef State, SVal Cond,
+                             bool Assumption) const;
 
-  if (!containsPointerIvar)
-    return;
+  ProgramStateRef checkPointerEscape(ProgramStateRef State,
+                                     const InvalidatedSymbols &Escaped,
+                                     const CallEvent *Call,
+                                     PointerEscapeKind Kind) const;
+  void checkPreStmt(const ReturnStmt *RS, CheckerContext &C) const;
+  void checkEndFunction(CheckerContext &Ctx) const;
 
-  // Determine if the class subclasses NSObject.
-  IdentifierInfo* NSObjectII = &Ctx.Idents.get("NSObject");
-  IdentifierInfo* SenTestCaseII = &Ctx.Idents.get("SenTestCase");
+private:
+  void diagnoseMissingReleases(CheckerContext &C) const;
 
+  bool diagnoseExtraRelease(SymbolRef ReleasedValue, const ObjCMethodCall &M,
+                            CheckerContext &C) const;
 
-  for ( ; ID ; ID = ID->getSuperClass()) {
-    IdentifierInfo *II = ID->getIdentifier();
+  bool diagnoseMistakenDealloc(SymbolRef DeallocedValue,
+                               const ObjCMethodCall &M,
+                               CheckerContext &C) const;
 
-    if (II == NSObjectII)
-      break;
+  SymbolRef getValueReleasedByNillingOut(const ObjCMethodCall &M,
+                                         CheckerContext &C) const;
 
-    // FIXME: For now, ignore classes that subclass SenTestCase, as these don't
-    // need to implement -dealloc.  They implement tear down in another way,
-    // which we should try and catch later.
-    //  http://llvm.org/bugs/show_bug.cgi?id=3187
-    if (II == SenTestCaseII)
-      return;
+  const ObjCIvarRegion *getIvarRegionForIvarSymbol(SymbolRef IvarSym) const;
+  SymbolRef getInstanceSymbolFromIvarSymbol(SymbolRef IvarSym) const;
+
+  const ObjCPropertyImplDecl*
+  findPropertyOnDeallocatingInstance(SymbolRef IvarSym,
+                                     CheckerContext &C) const;
+
+  ReleaseRequirement
+  getDeallocReleaseRequirement(const ObjCPropertyImplDecl *PropImpl) const;
+
+  bool isInInstanceDealloc(const CheckerContext &C, SVal &SelfValOut) const;
+  bool isInInstanceDealloc(const CheckerContext &C, const LocationContext *LCtx,
+                           SVal &SelfValOut) const;
+  bool instanceDeallocIsOnStack(const CheckerContext &C,
+                                SVal &InstanceValOut) const;
+
+  bool isSuperDeallocMessage(const ObjCMethodCall &M) const;
+
+  const ObjCImplDecl *getContainingObjCImpl(const LocationContext *LCtx) const;
+
+  const ObjCPropertyDecl *
+  findShadowedPropertyDecl(const ObjCPropertyImplDecl *PropImpl) const;
+
+  void transitionToReleaseValue(CheckerContext &C, SymbolRef Value) const;
+  ProgramStateRef removeValueRequiringRelease(ProgramStateRef State,
+                                              SymbolRef InstanceSym,
+                                              SymbolRef ValueSym) const;
+
+  void initIdentifierInfoAndSelectors(ASTContext &Ctx) const;
+
+  bool classHasSeparateTeardown(const ObjCInterfaceDecl *ID) const;
+
+  bool isReleasedByCIFilterDealloc(const ObjCPropertyImplDecl *PropImpl) const;
+};
+} // End anonymous namespace.
+
+typedef llvm::ImmutableSet<SymbolRef> SymbolSet;
+
+/// Maps from the symbol for a class instance to the set of
+/// symbols remaining that must be released in -dealloc.
+REGISTER_MAP_WITH_PROGRAMSTATE(UnreleasedIvarMap, SymbolRef, SymbolSet)
+
+namespace clang {
+namespace ento {
+template<> struct ProgramStateTrait<SymbolSet>
+:  public ProgramStatePartialTrait<SymbolSet> {
+  static void *GDMIndex() { static int index = 0; return &index; }
+};
+}
+}
+
+/// An AST check that diagnose when the class requires a -dealloc method and
+/// is missing one.
+void ObjCDeallocChecker::checkASTDecl(const ObjCImplementationDecl *D,
+                                      AnalysisManager &Mgr,
+                                      BugReporter &BR) const {
+  assert(Mgr.getLangOpts().getGC() != LangOptions::GCOnly);
+  assert(!Mgr.getLangOpts().ObjCAutoRefCount);
+  initIdentifierInfoAndSelectors(Mgr.getASTContext());
+
+  const ObjCInterfaceDecl *ID = D->getClassInterface();
+  // If the class is known to have a lifecycle with a separate teardown method
+  // then it may not require a -dealloc method.
+  if (classHasSeparateTeardown(ID))
+    return;
+
+  // Does the class contain any synthesized properties that are retainable?
+  // If not, skip the check entirely.
+  const ObjCPropertyImplDecl *PropImplRequiringRelease = nullptr;
+  bool HasOthers = false;
+  for (const auto *I : D->property_impls()) {
+    if (getDeallocReleaseRequirement(I) == ReleaseRequirement::MustRelease) {
+      if (!PropImplRequiringRelease)
+        PropImplRequiringRelease = I;
+      else {
+        HasOthers = true;
+        break;
+      }
+    }
   }
 
-  if (!ID)
+  if (!PropImplRequiringRelease)
     return;
 
-  // Get the "dealloc" selector.
-  IdentifierInfo* II = &Ctx.Idents.get("dealloc");
-  Selector S = Ctx.Selectors.getSelector(0, &II);
   const ObjCMethodDecl *MD = nullptr;
 
   // Scan the instance methods for "dealloc".
   for (const auto *I : D->instance_methods()) {
-    if (I->getSelector() == S) {
+    if (I->getSelector() == DeallocSel) {
       MD = I;
       break;
     }
   }
 
-  PathDiagnosticLocation DLoc =
-    PathDiagnosticLocation::createBegin(D, BR.getSourceManager());
-
   if (!MD) { // No dealloc found.
+    const char* Name = "Missing -dealloc";
 
-    const char* name = LOpts.getGC() == LangOptions::NonGC
-                       ? "missing -dealloc"
-                       : "missing -dealloc (Hybrid MM, non-GC)";
+    std::string Buf;
+    llvm::raw_string_ostream OS(Buf);
+    OS << "'" << *D << "' lacks a 'dealloc' instance method but "
+       << "must release '" << *PropImplRequiringRelease->getPropertyIvarDecl()
+       << "'";
 
-    std::string buf;
-    llvm::raw_string_ostream os(buf);
-    os << "Objective-C class '" << *D << "' lacks a 'dealloc' instance method";
+    if (HasOthers)
+      OS << " and others";
+    PathDiagnosticLocation DLoc =
+        PathDiagnosticLocation::createBegin(D, BR.getSourceManager());
 
-    BR.EmitBasicReport(D, Checker, name, categories::CoreFoundationObjectiveC,
-                       os.str(), DLoc);
+    BR.EmitBasicReport(D, this, Name, categories::CoreFoundationObjectiveC,
+                       OS.str(), DLoc);
     return;
   }
+}
 
-  // Get the "release" selector.
-  IdentifierInfo* RII = &Ctx.Idents.get("release");
-  Selector RS = Ctx.Selectors.getSelector(0, &RII);
+/// If this is the beginning of -dealloc, mark the values initially stored in
+/// instance variables that must be released by the end of -dealloc
+/// as unreleased in the state.
+void ObjCDeallocChecker::checkBeginFunction(
+    CheckerContext &C) const {
+  initIdentifierInfoAndSelectors(C.getASTContext());
 
-  // Get the "self" identifier
-  IdentifierInfo* SelfII = &Ctx.Idents.get("self");
+  // Only do this if the current method is -dealloc.
+  SVal SelfVal;
+  if (!isInInstanceDealloc(C, SelfVal))
+    return;
 
-  // Scan for missing and extra releases of ivars used by implementations
-  // of synthesized properties
-  for (const auto *I : D->property_impls()) {
-    // We can only check the synthesized properties
-    if (I->getPropertyImplementation() != ObjCPropertyImplDecl::Synthesize)
+  SymbolRef SelfSymbol = SelfVal.getAsSymbol();
+
+  const LocationContext *LCtx = C.getLocationContext();
+  ProgramStateRef InitialState = C.getState();
+
+  ProgramStateRef State = InitialState;
+
+  SymbolSet::Factory &F = State->getStateManager().get_context<SymbolSet>();
+
+  // Symbols that must be released by the end of the -dealloc;
+  SymbolSet RequiredReleases = F.getEmptySet();
+
+  // If we're an inlined -dealloc, we should add our symbols to the existing
+  // set from our subclass.
+  if (const SymbolSet *CurrSet = State->get<UnreleasedIvarMap>(SelfSymbol))
+    RequiredReleases = *CurrSet;
+
+  for (auto *PropImpl : getContainingObjCImpl(LCtx)->property_impls()) {
+    ReleaseRequirement Requirement = getDeallocReleaseRequirement(PropImpl);
+    if (Requirement != ReleaseRequirement::MustRelease)
       continue;
 
-    ObjCIvarDecl *ID = I->getPropertyIvarDecl();
-    if (!ID)
+    SVal LVal = State->getLValue(PropImpl->getPropertyIvarDecl(), SelfVal);
+    Optional<Loc> LValLoc = LVal.getAs<Loc>();
+    if (!LValLoc)
       continue;
 
-    QualType T = ID->getType();
-    if (!T->isObjCObjectPointerType()) // Skip non-pointer ivars
+    SVal InitialVal = State->getSVal(LValLoc.getValue());
+    SymbolRef Symbol = InitialVal.getAsSymbol();
+    if (!Symbol || !isa<SymbolRegionValue>(Symbol))
       continue;
 
-    const ObjCPropertyDecl *PD = I->getPropertyDecl();
-    if (!PD)
+    // Mark the value as requiring a release.
+    RequiredReleases = F.add(RequiredReleases, Symbol);
+  }
+
+  if (!RequiredReleases.isEmpty()) {
+    State = State->set<UnreleasedIvarMap>(SelfSymbol, RequiredReleases);
+  }
+
+  if (State != InitialState) {
+    C.addTransition(State);
+  }
+}
+
+/// Given a symbol for an ivar, return the ivar region it was loaded from.
+/// Returns nullptr if the instance symbol cannot be found.
+const ObjCIvarRegion *
+ObjCDeallocChecker::getIvarRegionForIvarSymbol(SymbolRef IvarSym) const {
+  return dyn_cast_or_null<ObjCIvarRegion>(IvarSym->getOriginRegion());
+}
+
+/// Given a symbol for an ivar, return a symbol for the instance containing
+/// the ivar. Returns nullptr if the instance symbol cannot be found.
+SymbolRef
+ObjCDeallocChecker::getInstanceSymbolFromIvarSymbol(SymbolRef IvarSym) const {
+
+  const ObjCIvarRegion *IvarRegion = getIvarRegionForIvarSymbol(IvarSym);
+  if (!IvarRegion)
+    return nullptr;
+
+  return IvarRegion->getSymbolicBase()->getSymbol();
+}
+
+/// If we are in -dealloc or -dealloc is on the stack, handle the call if it is
+/// a release or a nilling-out property setter.
+void ObjCDeallocChecker::checkPreObjCMessage(
+    const ObjCMethodCall &M, CheckerContext &C) const {
+  // Only run if -dealloc is on the stack.
+  SVal DeallocedInstance;
+  if (!instanceDeallocIsOnStack(C, DeallocedInstance))
+    return;
+
+  SymbolRef ReleasedValue = nullptr;
+
+  if (M.getSelector() == ReleaseSel) {
+    ReleasedValue = M.getReceiverSVal().getAsSymbol();
+  } else if (M.getSelector() == DeallocSel && !M.isReceiverSelfOrSuper()) {
+    if (diagnoseMistakenDealloc(M.getReceiverSVal().getAsSymbol(), M, C))
+      return;
+  }
+
+  if (ReleasedValue) {
+    // An instance variable symbol was released with -release:
+    //    [_property release];
+    if (diagnoseExtraRelease(ReleasedValue,M, C))
+      return;
+  } else {
+    // An instance variable symbol was released nilling out its property:
+    //    self.property = nil;
+    ReleasedValue = getValueReleasedByNillingOut(M, C);
+  }
+
+  if (!ReleasedValue)
+    return;
+
+  transitionToReleaseValue(C, ReleasedValue);
+}
+
+/// If we are in -dealloc or -dealloc is on the stack, handle the call if it is
+/// call to Block_release().
+void ObjCDeallocChecker::checkPreCall(const CallEvent &Call,
+                                      CheckerContext &C) const {
+  const IdentifierInfo *II = Call.getCalleeIdentifier();
+  if (II != Block_releaseII)
+    return;
+
+  if (Call.getNumArgs() != 1)
+    return;
+
+  SymbolRef ReleasedValue = Call.getArgSVal(0).getAsSymbol();
+  if (!ReleasedValue)
+    return;
+
+  transitionToReleaseValue(C, ReleasedValue);
+}
+/// If the message was a call to '[super dealloc]', diagnose any missing
+/// releases.
+void ObjCDeallocChecker::checkPostObjCMessage(
+    const ObjCMethodCall &M, CheckerContext &C) const {
+  // We perform this check post-message so that if the super -dealloc
+  // calls a helper method and that this class overrides, any ivars released in
+  // the helper method will be recorded before checking.
+  if (isSuperDeallocMessage(M))
+    diagnoseMissingReleases(C);
+}
+
+/// Check for missing releases even when -dealloc does not call
+/// '[super dealloc]'.
+void ObjCDeallocChecker::checkEndFunction(
+    CheckerContext &C) const {
+  diagnoseMissingReleases(C);
+}
+
+/// Check for missing releases on early return.
+void ObjCDeallocChecker::checkPreStmt(
+    const ReturnStmt *RS, CheckerContext &C) const {
+  diagnoseMissingReleases(C);
+}
+
+/// When a symbol is assumed to be nil, remove it from the set of symbols
+/// require to be nil.
+ProgramStateRef ObjCDeallocChecker::evalAssume(ProgramStateRef State, SVal Cond,
+                                               bool Assumption) const {
+  if (State->get<UnreleasedIvarMap>().isEmpty())
+    return State;
+
+  auto *CondBSE = dyn_cast_or_null<BinarySymExpr>(Cond.getAsSymExpr());
+  if (!CondBSE)
+    return State;
+
+  BinaryOperator::Opcode OpCode = CondBSE->getOpcode();
+  if (Assumption) {
+    if (OpCode != BO_EQ)
+      return State;
+  } else {
+    if (OpCode != BO_NE)
+      return State;
+  }
+
+  SymbolRef NullSymbol = nullptr;
+  if (auto *SIE = dyn_cast<SymIntExpr>(CondBSE)) {
+    const llvm::APInt &RHS = SIE->getRHS();
+    if (RHS != 0)
+      return State;
+    NullSymbol = SIE->getLHS();
+  } else if (auto *SIE = dyn_cast<IntSymExpr>(CondBSE)) {
+    const llvm::APInt &LHS = SIE->getLHS();
+    if (LHS != 0)
+      return State;
+    NullSymbol = SIE->getRHS();
+  } else {
+    return State;
+  }
+
+  SymbolRef InstanceSymbol = getInstanceSymbolFromIvarSymbol(NullSymbol);
+  if (!InstanceSymbol)
+    return State;
+
+  State = removeValueRequiringRelease(State, InstanceSymbol, NullSymbol);
+
+  return State;
+}
+
+/// If a symbol escapes conservatively assume unseen code released it.
+ProgramStateRef ObjCDeallocChecker::checkPointerEscape(
+    ProgramStateRef State, const InvalidatedSymbols &Escaped,
+    const CallEvent *Call, PointerEscapeKind Kind) const {
+
+  if (State->get<UnreleasedIvarMap>().isEmpty())
+    return State;
+
+  // Don't treat calls to '[super dealloc]' as escaping for the purposes
+  // of this checker. Because the checker diagnoses missing releases in the
+  // post-message handler for '[super dealloc], escaping here would cause
+  // the checker to never warn.
+  auto *OMC = dyn_cast_or_null<ObjCMethodCall>(Call);
+  if (OMC && isSuperDeallocMessage(*OMC))
+    return State;
+
+  for (const auto &Sym : Escaped) {
+    if (!Call || (Call && !Call->isInSystemHeader())) {
+      // If Sym is a symbol for an object with instance variables that
+      // must be released, remove these obligations when the object escapes
+      // unless via a call to a system function. System functions are
+      // very unlikely to release instance variables on objects passed to them,
+      // and are frequently called on 'self' in -dealloc (e.g., to remove
+      // observers) -- we want to avoid false negatives from escaping on
+      // them.
+      State = State->remove<UnreleasedIvarMap>(Sym);
+    }
+
+
+    SymbolRef InstanceSymbol = getInstanceSymbolFromIvarSymbol(Sym);
+    if (!InstanceSymbol)
+      continue;
+
+    State = removeValueRequiringRelease(State, InstanceSymbol, Sym);
+  }
+
+  return State;
+}
+
+/// Report any unreleased instance variables for the current instance being
+/// dealloced.
+void ObjCDeallocChecker::diagnoseMissingReleases(CheckerContext &C) const {
+  ProgramStateRef State = C.getState();
+
+  SVal SelfVal;
+  if (!isInInstanceDealloc(C, SelfVal))
+    return;
+
+  const MemRegion *SelfRegion = SelfVal.castAs<loc::MemRegionVal>().getRegion();
+  const LocationContext *LCtx = C.getLocationContext();
+
+  ExplodedNode *ErrNode = nullptr;
+
+  SymbolRef SelfSym = SelfVal.getAsSymbol();
+  if (!SelfSym)
+    return;
+
+  const SymbolSet *OldUnreleased = State->get<UnreleasedIvarMap>(SelfSym);
+  if (!OldUnreleased)
+    return;
+
+  SymbolSet NewUnreleased = *OldUnreleased;
+  SymbolSet::Factory &F = State->getStateManager().get_context<SymbolSet>();
+
+  ProgramStateRef InitialState = State;
+
+  for (auto *IvarSymbol : *OldUnreleased) {
+    const TypedValueRegion *TVR =
+        cast<SymbolRegionValue>(IvarSymbol)->getRegion();
+    const ObjCIvarRegion *IvarRegion = cast<ObjCIvarRegion>(TVR);
+
+    // Don't warn if the ivar is not for this instance.
+    if (SelfRegion != IvarRegion->getSuperRegion())
       continue;
 
-    // ivars cannot be set via read-only properties, so we'll skip them
-    if (PD->isReadOnly())
+      const ObjCIvarDecl *IvarDecl = IvarRegion->getDecl();
+    // Prevent an inlined call to -dealloc in a super class from warning
+    // about the values the subclass's -dealloc should release.
+    if (IvarDecl->getContainingInterface() !=
+        cast<ObjCMethodDecl>(LCtx->getDecl())->getClassInterface())
       continue;
 
-    // ivar must be released if and only if the kind of setter was not 'assign'
-    bool requiresRelease = PD->getSetterKind() != ObjCPropertyDecl::Assign;
-    if (scan_ivar_release(MD->getBody(), ID, PD, RS, SelfII, Ctx)
-       != requiresRelease) {
-      const char *name = nullptr;
-      std::string buf;
-      llvm::raw_string_ostream os(buf);
-
-      if (requiresRelease) {
-        name = LOpts.getGC() == LangOptions::NonGC
-               ? "missing ivar release (leak)"
-               : "missing ivar release (Hybrid MM, non-GC)";
-
-        os << "The '" << *ID
-           << "' instance variable was retained by a synthesized property but "
-              "wasn't released in 'dealloc'";
-      } else {
-        name = LOpts.getGC() == LangOptions::NonGC
-               ? "extra ivar release (use-after-release)"
-               : "extra ivar release (Hybrid MM, non-GC)";
-
-        os << "The '" << *ID
-           << "' instance variable was not retained by a synthesized property "
-              "but was released in 'dealloc'";
-      }
+    // Prevents diagnosing multiple times for the same instance variable
+    // at, for example, both a return and at the end of of the function.
+    NewUnreleased = F.remove(NewUnreleased, IvarSymbol);
+
+    if (State->getStateManager()
+            .getConstraintManager()
+            .isNull(State, IvarSymbol)
+            .isConstrainedTrue()) {
+      continue;
+    }
+
+    // A missing release manifests as a leak, so treat as a non-fatal error.
+    if (!ErrNode)
+      ErrNode = C.generateNonFatalErrorNode();
+    // If we've already reached this node on another path, return without
+    // diagnosing.
+    if (!ErrNode)
+      return;
+
+    std::string Buf;
+    llvm::raw_string_ostream OS(Buf);
+
+    const ObjCInterfaceDecl *Interface = IvarDecl->getContainingInterface();
+    // If the class is known to have a lifecycle with teardown that is
+    // separate from -dealloc, do not warn about missing releases. We
+    // suppress here (rather than not tracking for instance variables in
+    // such classes) because these classes are rare.
+    if (classHasSeparateTeardown(Interface))
+      return;
+
+    ObjCImplDecl *ImplDecl = Interface->getImplementation();
+
+    const ObjCPropertyImplDecl *PropImpl =
+        ImplDecl->FindPropertyImplIvarDecl(IvarDecl->getIdentifier());
 
-      PathDiagnosticLocation SDLoc =
-        PathDiagnosticLocation::createBegin(I, BR.getSourceManager());
+    const ObjCPropertyDecl *PropDecl = PropImpl->getPropertyDecl();
 
-      BR.EmitBasicReport(MD, Checker, name,
-                         categories::CoreFoundationObjectiveC, os.str(), SDLoc);
+    assert(PropDecl->getSetterKind() == ObjCPropertyDecl::Copy ||
+           PropDecl->getSetterKind() == ObjCPropertyDecl::Retain);
+
+    OS << "The '" << *IvarDecl << "' ivar in '" << *ImplDecl
+       << "' was ";
+
+    if (PropDecl->getSetterKind() == ObjCPropertyDecl::Retain)
+      OS << "retained";
+    else
+      OS << "copied";
+
+    OS << " by a synthesized property but not released"
+          " before '[super dealloc]'";
+
+    std::unique_ptr<BugReport> BR(
+        new BugReport(*MissingReleaseBugType, OS.str(), ErrNode));
+
+    C.emitReport(std::move(BR));
+  }
+
+  if (NewUnreleased.isEmpty()) {
+    State = State->remove<UnreleasedIvarMap>(SelfSym);
+  } else {
+    State = State->set<UnreleasedIvarMap>(SelfSym, NewUnreleased);
+  }
+
+  if (ErrNode) {
+    C.addTransition(State, ErrNode);
+  } else if (State != InitialState) {
+    C.addTransition(State);
+  }
+
+  // Make sure that after checking in the top-most frame the list of
+  // tracked ivars is empty. This is intended to detect accidental leaks in
+  // the UnreleasedIvarMap program state.
+  assert(!LCtx->inTopFrame() || State->get<UnreleasedIvarMap>().isEmpty());
+}
+
+/// Given a symbol, determine whether the symbol refers to an ivar on
+/// the top-most deallocating instance. If so, find the property for that
+/// ivar, if one exists. Otherwise return null.
+const ObjCPropertyImplDecl *
+ObjCDeallocChecker::findPropertyOnDeallocatingInstance(
+    SymbolRef IvarSym, CheckerContext &C) const {
+  SVal DeallocedInstance;
+  if (!isInInstanceDealloc(C, DeallocedInstance))
+    return nullptr;
+
+  // Try to get the region from which the ivar value was loaded.
+  auto *IvarRegion = getIvarRegionForIvarSymbol(IvarSym);
+  if (!IvarRegion)
+    return nullptr;
+
+  // Don't try to find the property if the ivar was not loaded from the
+  // given instance.
+  if (DeallocedInstance.castAs<loc::MemRegionVal>().getRegion() !=
+      IvarRegion->getSuperRegion())
+    return nullptr;
+
+  const LocationContext *LCtx = C.getLocationContext();
+  const ObjCIvarDecl *IvarDecl = IvarRegion->getDecl();
+
+  const ObjCImplDecl *Container = getContainingObjCImpl(LCtx);
+  const ObjCPropertyImplDecl *PropImpl =
+      Container->FindPropertyImplIvarDecl(IvarDecl->getIdentifier());
+  return PropImpl;
+}
+
+/// Emits a warning if the current context is -dealloc and ReleasedValue
+/// must not be directly released in a -dealloc. Returns true if a diagnostic
+/// was emitted.
+bool ObjCDeallocChecker::diagnoseExtraRelease(SymbolRef ReleasedValue,
+                                              const ObjCMethodCall &M,
+                                              CheckerContext &C) const {
+  // Try to get the region from which the the released value was loaded.
+  // Note that, unlike diagnosing for missing releases, here we don't track
+  // values that must not be released in the state. This is because even if
+  // these values escape, it is still an error under the rules of MRR to
+  // release them in -dealloc.
+  const ObjCPropertyImplDecl *PropImpl =
+      findPropertyOnDeallocatingInstance(ReleasedValue, C);
+
+  if (!PropImpl)
+    return false;
+
+  // If the ivar belongs to a property that must not be released directly
+  // in dealloc, emit a warning.
+  if (getDeallocReleaseRequirement(PropImpl) !=
+      ReleaseRequirement::MustNotReleaseDirectly) {
+    return false;
+  }
+
+  // If the property is readwrite but it shadows a read-only property in its
+  // external interface, treat the property a read-only. If the outside
+  // world cannot write to a property then the internal implementation is free
+  // to make its own convention about whether the value is stored retained
+  // or not. We look up the shadow here rather than in
+  // getDeallocReleaseRequirement() because doing so can be expensive.
+  const ObjCPropertyDecl *PropDecl = findShadowedPropertyDecl(PropImpl);
+  if (PropDecl) {
+    if (PropDecl->isReadOnly())
+      return false;
+  } else {
+    PropDecl = PropImpl->getPropertyDecl();
+  }
+
+  ExplodedNode *ErrNode = C.generateNonFatalErrorNode();
+  if (!ErrNode)
+    return false;
+
+  std::string Buf;
+  llvm::raw_string_ostream OS(Buf);
+
+  assert(PropDecl->getSetterKind() == ObjCPropertyDecl::Weak ||
+         (PropDecl->getSetterKind() == ObjCPropertyDecl::Assign &&
+          !PropDecl->isReadOnly()) ||
+         isReleasedByCIFilterDealloc(PropImpl)
+         );
+
+  const ObjCImplDecl *Container = getContainingObjCImpl(C.getLocationContext());
+  OS << "The '" << *PropImpl->getPropertyIvarDecl()
+     << "' ivar in '" << *Container;
+
+
+  if (isReleasedByCIFilterDealloc(PropImpl)) {
+    OS << "' will be released by '-[CIFilter dealloc]' but also released here";
+  } else {
+    OS << "' was synthesized for ";
+
+    if (PropDecl->getSetterKind() == ObjCPropertyDecl::Weak)
+      OS << "a weak";
+    else
+      OS << "an assign, readwrite";
+
+    OS <<  " property but was released in 'dealloc'";
+  }
+
+  std::unique_ptr<BugReport> BR(
+      new BugReport(*ExtraReleaseBugType, OS.str(), ErrNode));
+  BR->addRange(M.getOriginExpr()->getSourceRange());
+
+  C.emitReport(std::move(BR));
+
+  return true;
+}
+
+/// Emits a warning if the current context is -dealloc and DeallocedValue
+/// must not be directly dealloced in a -dealloc. Returns true if a diagnostic
+/// was emitted.
+bool ObjCDeallocChecker::diagnoseMistakenDealloc(SymbolRef DeallocedValue,
+                                                 const ObjCMethodCall &M,
+                                                 CheckerContext &C) const {
+
+  // Find the property backing the instance variable that M
+  // is dealloc'ing.
+  const ObjCPropertyImplDecl *PropImpl =
+      findPropertyOnDeallocatingInstance(DeallocedValue, C);
+  if (!PropImpl)
+    return false;
+
+  if (getDeallocReleaseRequirement(PropImpl) !=
+      ReleaseRequirement::MustRelease) {
+    return false;
+  }
+
+  ExplodedNode *ErrNode = C.generateErrorNode();
+  if (!ErrNode)
+    return false;
+
+  std::string Buf;
+  llvm::raw_string_ostream OS(Buf);
+
+  OS << "'" << *PropImpl->getPropertyIvarDecl()
+     << "' should be released rather than deallocated";
+
+  std::unique_ptr<BugReport> BR(
+      new BugReport(*MistakenDeallocBugType, OS.str(), ErrNode));
+  BR->addRange(M.getOriginExpr()->getSourceRange());
+
+  C.emitReport(std::move(BR));
+
+  return true;
+}
+
+ObjCDeallocChecker::ObjCDeallocChecker()
+    : NSObjectII(nullptr), SenTestCaseII(nullptr), XCTestCaseII(nullptr),
+      CIFilterII(nullptr) {
+
+  MissingReleaseBugType.reset(
+      new BugType(this, "Missing ivar release (leak)",
+                  categories::MemoryCoreFoundationObjectiveC));
+
+  ExtraReleaseBugType.reset(
+      new BugType(this, "Extra ivar release",
+                  categories::MemoryCoreFoundationObjectiveC));
+
+  MistakenDeallocBugType.reset(
+      new BugType(this, "Mistaken dealloc",
+                  categories::MemoryCoreFoundationObjectiveC));
+}
+
+void ObjCDeallocChecker::initIdentifierInfoAndSelectors(
+    ASTContext &Ctx) const {
+  if (NSObjectII)
+    return;
+
+  NSObjectII = &Ctx.Idents.get("NSObject");
+  SenTestCaseII = &Ctx.Idents.get("SenTestCase");
+  XCTestCaseII = &Ctx.Idents.get("XCTestCase");
+  Block_releaseII = &Ctx.Idents.get("_Block_release");
+  CIFilterII = &Ctx.Idents.get("CIFilter");
+
+  IdentifierInfo *DeallocII = &Ctx.Idents.get("dealloc");
+  IdentifierInfo *ReleaseII = &Ctx.Idents.get("release");
+  DeallocSel = Ctx.Selectors.getSelector(0, &DeallocII);
+  ReleaseSel = Ctx.Selectors.getSelector(0, &ReleaseII);
+}
+
+/// Returns true if M is a call to '[super dealloc]'.
+bool ObjCDeallocChecker::isSuperDeallocMessage(
+    const ObjCMethodCall &M) const {
+  if (M.getOriginExpr()->getReceiverKind() != ObjCMessageExpr::SuperInstance)
+    return false;
+
+  return M.getSelector() == DeallocSel;
+}
+
+/// Returns the ObjCImplDecl containing the method declaration in LCtx.
+const ObjCImplDecl *
+ObjCDeallocChecker::getContainingObjCImpl(const LocationContext *LCtx) const {
+  auto *MD = cast<ObjCMethodDecl>(LCtx->getDecl());
+  return cast<ObjCImplDecl>(MD->getDeclContext());
+}
+
+/// Returns the property that shadowed by PropImpl if one exists and
+/// nullptr otherwise.
+const ObjCPropertyDecl *ObjCDeallocChecker::findShadowedPropertyDecl(
+    const ObjCPropertyImplDecl *PropImpl) const {
+  const ObjCPropertyDecl *PropDecl = PropImpl->getPropertyDecl();
+
+  // Only readwrite properties can shadow.
+  if (PropDecl->isReadOnly())
+    return nullptr;
+
+  auto *CatDecl = dyn_cast<ObjCCategoryDecl>(PropDecl->getDeclContext());
+
+  // Only class extensions can contain shadowing properties.
+  if (!CatDecl || !CatDecl->IsClassExtension())
+    return nullptr;
+
+  IdentifierInfo *ID = PropDecl->getIdentifier();
+  DeclContext::lookup_result R = CatDecl->getClassInterface()->lookup(ID);
+  for (DeclContext::lookup_iterator I = R.begin(), E = R.end(); I != E; ++I) {
+    auto *ShadowedPropDecl = dyn_cast<ObjCPropertyDecl>(*I);
+    if (!ShadowedPropDecl)
+      continue;
+
+    if (ShadowedPropDecl->isInstanceProperty()) {
+      assert(ShadowedPropDecl->isReadOnly());
+      return ShadowedPropDecl;
     }
   }
+
+  return nullptr;
 }
 
-//===----------------------------------------------------------------------===//
-// ObjCDeallocChecker
-//===----------------------------------------------------------------------===//
+/// Add a transition noting the release of the given value.
+void ObjCDeallocChecker::transitionToReleaseValue(CheckerContext &C,
+                                                  SymbolRef Value) const {
+  assert(Value);
+  SymbolRef InstanceSym = getInstanceSymbolFromIvarSymbol(Value);
+  if (!InstanceSym)
+    return;
+  ProgramStateRef InitialState = C.getState();
 
-namespace {
-class ObjCDeallocChecker : public Checker<
-                                      check::ASTDecl<ObjCImplementationDecl> > {
-public:
-  void checkASTDecl(const ObjCImplementationDecl *D, AnalysisManager& mgr,
-                    BugReporter &BR) const {
-    if (mgr.getLangOpts().getGC() == LangOptions::GCOnly)
-      return;
-    checkObjCDealloc(this, cast<ObjCImplementationDecl>(D), mgr.getLangOpts(),
-                     BR);
+  ProgramStateRef ReleasedState =
+      removeValueRequiringRelease(InitialState, InstanceSym, Value);
+
+  if (ReleasedState != InitialState) {
+    C.addTransition(ReleasedState);
   }
-};
 }
 
-void ento::registerObjCDeallocChecker(CheckerManager &mgr) {
-  mgr.registerChecker<ObjCDeallocChecker>();
+/// Remove the Value requiring a release from the tracked set for
+/// Instance and return the resultant state.
+ProgramStateRef ObjCDeallocChecker::removeValueRequiringRelease(
+    ProgramStateRef State, SymbolRef Instance, SymbolRef Value) const {
+  assert(Instance);
+  assert(Value);
+  const ObjCIvarRegion *RemovedRegion = getIvarRegionForIvarSymbol(Value);
+  if (!RemovedRegion)
+    return State;
+
+  const SymbolSet *Unreleased = State->get<UnreleasedIvarMap>(Instance);
+  if (!Unreleased)
+    return State;
+
+  // Mark the value as no longer requiring a release.
+  SymbolSet::Factory &F = State->getStateManager().get_context<SymbolSet>();
+  SymbolSet NewUnreleased = *Unreleased;
+  for (auto &Sym : *Unreleased) {
+    const ObjCIvarRegion *UnreleasedRegion = getIvarRegionForIvarSymbol(Sym);
+    assert(UnreleasedRegion);
+    if (RemovedRegion->getDecl() == UnreleasedRegion->getDecl()) {
+      NewUnreleased = F.remove(NewUnreleased, Sym);
+    }
+  }
+
+  if (NewUnreleased.isEmpty()) {
+    return State->remove<UnreleasedIvarMap>(Instance);
+  }
+
+  return State->set<UnreleasedIvarMap>(Instance, NewUnreleased);
+}
+
+/// Determines whether the instance variable for \p PropImpl must or must not be
+/// released in -dealloc or whether it cannot be determined.
+ReleaseRequirement ObjCDeallocChecker::getDeallocReleaseRequirement(
+    const ObjCPropertyImplDecl *PropImpl) const {
+  const ObjCIvarDecl *IvarDecl;
+  const ObjCPropertyDecl *PropDecl;
+  if (!isSynthesizedRetainableProperty(PropImpl, &IvarDecl, &PropDecl))
+    return ReleaseRequirement::Unknown;
+
+  ObjCPropertyDecl::SetterKind SK = PropDecl->getSetterKind();
+
+  switch (SK) {
+  // Retain and copy setters retain/copy their values before storing and so
+  // the value in their instance variables must be released in -dealloc.
+  case ObjCPropertyDecl::Retain:
+  case ObjCPropertyDecl::Copy:
+    if (isReleasedByCIFilterDealloc(PropImpl))
+      return ReleaseRequirement::MustNotReleaseDirectly;
+
+    return ReleaseRequirement::MustRelease;
+
+  case ObjCPropertyDecl::Weak:
+    return ReleaseRequirement::MustNotReleaseDirectly;
+
+  case ObjCPropertyDecl::Assign:
+    // It is common for the ivars for read-only assign properties to
+    // always be stored retained, so their release requirement cannot be
+    // be determined.
+    if (PropDecl->isReadOnly())
+      return ReleaseRequirement::Unknown;
+
+    return ReleaseRequirement::MustNotReleaseDirectly;
+  }
+  llvm_unreachable("Unrecognized setter kind");
+}
+
+/// Returns the released value if M is a call a setter that releases
+/// and nils out its underlying instance variable.
+SymbolRef
+ObjCDeallocChecker::getValueReleasedByNillingOut(const ObjCMethodCall &M,
+                                                 CheckerContext &C) const {
+  SVal ReceiverVal = M.getReceiverSVal();
+  if (!ReceiverVal.isValid())
+    return nullptr;
+
+  if (M.getNumArgs() == 0)
+    return nullptr;
+
+  if (!M.getArgExpr(0)->getType()->isObjCRetainableType())
+    return nullptr;
+
+  // Is the first argument nil?
+  SVal Arg = M.getArgSVal(0);
+  ProgramStateRef notNilState, nilState;
+  std::tie(notNilState, nilState) =
+      M.getState()->assume(Arg.castAs<DefinedOrUnknownSVal>());
+  if (!(nilState && !notNilState))
+    return nullptr;
+
+  const ObjCPropertyDecl *Prop = M.getAccessedProperty();
+  if (!Prop)
+    return nullptr;
+
+  ObjCIvarDecl *PropIvarDecl = Prop->getPropertyIvarDecl();
+  if (!PropIvarDecl)
+    return nullptr;
+
+  ProgramStateRef State = C.getState();
+
+  SVal LVal = State->getLValue(PropIvarDecl, ReceiverVal);
+  Optional<Loc> LValLoc = LVal.getAs<Loc>();
+  if (!LValLoc)
+    return nullptr;
+
+  SVal CurrentValInIvar = State->getSVal(LValLoc.getValue());
+  return CurrentValInIvar.getAsSymbol();
+}
+
+/// Returns true if the current context is a call to -dealloc and false
+/// otherwise. If true, it also sets SelfValOut to the value of
+/// 'self'.
+bool ObjCDeallocChecker::isInInstanceDealloc(const CheckerContext &C,
+                                             SVal &SelfValOut) const {
+  return isInInstanceDealloc(C, C.getLocationContext(), SelfValOut);
+}
+
+/// Returns true if LCtx is a call to -dealloc and false
+/// otherwise. If true, it also sets SelfValOut to the value of
+/// 'self'.
+bool ObjCDeallocChecker::isInInstanceDealloc(const CheckerContext &C,
+                                             const LocationContext *LCtx,
+                                             SVal &SelfValOut) const {
+  auto *MD = dyn_cast<ObjCMethodDecl>(LCtx->getDecl());
+  if (!MD || !MD->isInstanceMethod() || MD->getSelector() != DeallocSel)
+    return false;
+
+  const ImplicitParamDecl *SelfDecl = LCtx->getSelfDecl();
+  assert(SelfDecl && "No self in -dealloc?");
+
+  ProgramStateRef State = C.getState();
+  SelfValOut = State->getSVal(State->getRegion(SelfDecl, LCtx));
+  return true;
+}
+
+/// Returns true if there is a call to -dealloc anywhere on the stack and false
+/// otherwise. If true, it also sets InstanceValOut to the value of
+/// 'self' in the frame for -dealloc.
+bool ObjCDeallocChecker::instanceDeallocIsOnStack(const CheckerContext &C,
+                                                  SVal &InstanceValOut) const {
+  const LocationContext *LCtx = C.getLocationContext();
+
+  while (LCtx) {
+    if (isInInstanceDealloc(C, LCtx, InstanceValOut))
+      return true;
+
+    LCtx = LCtx->getParent();
+  }
+
+  return false;
+}
+
+/// Returns true if the ID is a class in which which is known to have
+/// a separate teardown lifecycle. In this case, -dealloc warnings
+/// about missing releases should be suppressed.
+bool ObjCDeallocChecker::classHasSeparateTeardown(
+    const ObjCInterfaceDecl *ID) const {
+  // Suppress if the class is not a subclass of NSObject.
+  for ( ; ID ; ID = ID->getSuperClass()) {
+    IdentifierInfo *II = ID->getIdentifier();
+
+    if (II == NSObjectII)
+      return false;
+
+    // FIXME: For now, ignore classes that subclass SenTestCase and XCTestCase,
+    // as these don't need to implement -dealloc.  They implement tear down in
+    // another way, which we should try and catch later.
+    //  http://llvm.org/bugs/show_bug.cgi?id=3187
+    if (II == XCTestCaseII || II == SenTestCaseII)
+      return true;
+  }
+
+  return true;
+}
+
+/// The -dealloc method in CIFilter highly unusual in that is will release
+/// instance variables belonging to its *subclasses* if the variable name
+/// starts with "input" or backs a property whose name starts with "input".
+/// Subclasses should not release these ivars in their own -dealloc method --
+/// doing so could result in an over release.
+///
+/// This method returns true if the property will be released by
+/// -[CIFilter dealloc].
+bool ObjCDeallocChecker::isReleasedByCIFilterDealloc(
+    const ObjCPropertyImplDecl *PropImpl) const {
+  assert(PropImpl->getPropertyIvarDecl());
+  StringRef PropName = PropImpl->getPropertyDecl()->getName();
+  StringRef IvarName = PropImpl->getPropertyIvarDecl()->getName();
+
+  const char *ReleasePrefix = "input";
+  if (!(PropName.startswith(ReleasePrefix) ||
+        IvarName.startswith(ReleasePrefix))) {
+    return false;
+  }
+
+  const ObjCInterfaceDecl *ID =
+      PropImpl->getPropertyIvarDecl()->getContainingInterface();
+  for ( ; ID ; ID = ID->getSuperClass()) {
+    IdentifierInfo *II = ID->getIdentifier();
+    if (II == CIFilterII)
+      return true;
+  }
+
+  return false;
+}
+
+void ento::registerObjCDeallocChecker(CheckerManager &Mgr) {
+  const LangOptions &LangOpts = Mgr.getLangOpts();
+  // These checker only makes sense under MRR.
+  if (LangOpts.getGC() == LangOptions::GCOnly || LangOpts.ObjCAutoRefCount)
+    return;
+
+  Mgr.registerChecker<ObjCDeallocChecker>();
 }
diff --git a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/CheckerDocumentation.cpp b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/CheckerDocumentation.cpp
index 37b84480f892..74d05e27e8eb 100644
--- a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/CheckerDocumentation.cpp
+++ b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/CheckerDocumentation.cpp
@@ -1,4 +1,4 @@
-//= CheckerDocumentation.cpp - Documentation checker ---------------*- C++ -*-//
+//===- CheckerDocumentation.cpp - Documentation checker ---------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -57,7 +57,6 @@ class CheckerDocumentation : public Checker< check::PreStmt<ReturnStmt>,
                                        check::Event<ImplicitNullDerefEvent>,
                                        check::ASTDecl<FunctionDecl> > {
 public:
-
   /// \brief Pre-visit the Statement.
   ///
   /// The method will be called before the analyzer core processes the
@@ -147,7 +146,6 @@ public:
   /// check::Bind
   void checkBind(SVal Loc, SVal Val, const Stmt *S, CheckerContext &) const {}
 
-
   /// \brief Called whenever a symbol becomes dead.
   ///
   /// This callback should be used by the checkers to aggressively clean
@@ -164,8 +162,16 @@ public:
   /// check::DeadSymbols
   void checkDeadSymbols(SymbolReaper &SR, CheckerContext &C) const {}
 
+
+  /// \brief Called when the analyzer core starts analyzing a function,
+  /// regardless of whether it is analyzed at the top level or is inlined.
+  ///
+  /// check::BeginFunction
+  void checkBeginFunction(CheckerContext &Ctx) const {}
+
   /// \brief Called when the analyzer core reaches the end of a
-  /// function being analyzed.
+  /// function being analyzed regardless of whether it is analyzed at the top
+  /// level or is inlined.
   ///
   /// check::EndFunction
   void checkEndFunction(CheckerContext &Ctx) const {}
@@ -190,7 +196,6 @@ public:
                                  AnalysisManager &Mgr,
                                  BugReporter &BR) const {}
 
-
   /// \brief Evaluates function call.
   ///
   /// The analysis core threats all function calls in the same way. However, some
@@ -310,12 +315,10 @@ public:
   void checkASTDecl(const FunctionDecl *D,
                     AnalysisManager &Mgr,
                     BugReporter &BR) const {}
-
 };
 
 void CheckerDocumentation::checkPostStmt(const DeclStmt *DS,
                                          CheckerContext &C) const {
-  return;
 }
 
 } // end namespace ento
diff --git a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/Checkers.td b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/Checkers.td
deleted file mode 100644
index 8133d290d886..000000000000
--- a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/Checkers.td
+++ /dev/null
@@ -1,647 +0,0 @@
-//===--- Checkers.td - Static Analyzer Checkers -===-----------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-include "clang/StaticAnalyzer/Checkers/CheckerBase.td"
-
-//===----------------------------------------------------------------------===//
-// Packages.
-//===----------------------------------------------------------------------===//
-
-// The Alpha package is for checkers that have too many false positives to be
-// turned on by default. The hierarchy under Alpha should be organized in the
-// hierarchy checkers would have had if they were truly at the top level.
-// (For example, a Cocoa-specific checker that is alpha should be in
-// alpha.osx.cocoa).
-def Alpha : Package<"alpha">;
-
-def Core : Package<"core">;
-def CoreBuiltin : Package<"builtin">, InPackage<Core>;
-def CoreUninitialized  : Package<"uninitialized">, InPackage<Core>;
-def CoreAlpha : Package<"core">, InPackage<Alpha>, Hidden;
-
-// The OptIn package is for checkers that are not alpha and that would normally
-// be on by default but where the driver does not have enough information to
-// determine when they are applicable. For example, localizability checkers fit
-// this criterion because the driver cannot determine whether a project is
-// localized or not -- this is best determined at the IDE or build-system level.
-//
-// The checker hierarchy under OptIn should mirror that in Alpha: checkers
-// should be organized as if they were at the top level.
-//
-// Note: OptIn is *not* intended for checkers that are too noisy to be on by
-// default. Such checkers belong in the alpha package.
-def OptIn : Package<"optin">;
-
-def Nullability : Package<"nullability">;
-
-def Cplusplus : Package<"cplusplus">;
-def CplusplusAlpha : Package<"cplusplus">, InPackage<Alpha>, Hidden;
-
-def DeadCode : Package<"deadcode">;
-def DeadCodeAlpha : Package<"deadcode">, InPackage<Alpha>, Hidden;
-
-def Performance : Package<"performance">, InPackage<OptIn>;
-
-def Security : Package <"security">;
-def InsecureAPI : Package<"insecureAPI">, InPackage<Security>;
-def SecurityAlpha : Package<"security">, InPackage<Alpha>, Hidden;
-def Taint : Package<"taint">, InPackage<SecurityAlpha>, Hidden;
-
-def Unix : Package<"unix">;
-def UnixAlpha : Package<"unix">, InPackage<Alpha>, Hidden;
-def CString : Package<"cstring">, InPackage<Unix>, Hidden;
-def CStringAlpha : Package<"cstring">, InPackage<UnixAlpha>, Hidden;
-
-def OSX : Package<"osx">;
-def OSXAlpha : Package<"osx">, InPackage<Alpha>, Hidden;
-def OSXOptIn : Package<"osx">, InPackage<OptIn>;
-
-def Cocoa : Package<"cocoa">, InPackage<OSX>;
-def CocoaAlpha : Package<"cocoa">, InPackage<OSXAlpha>, Hidden;
-def CocoaOptIn : Package<"cocoa">, InPackage<OSXOptIn>;
-
-def CoreFoundation : Package<"coreFoundation">, InPackage<OSX>;
-def Containers : Package<"containers">, InPackage<CoreFoundation>;
-
-def LocalizabilityAlpha : Package<"localizability">, InPackage<CocoaAlpha>;
-def LocalizabilityOptIn : Package<"localizability">, InPackage<CocoaOptIn>;
-
-def LLVM : Package<"llvm">;
-def Debug : Package<"debug">;
-
-//===----------------------------------------------------------------------===//
-// Core Checkers.
-//===----------------------------------------------------------------------===//
-
-let ParentPackage = Core in {
-
-def DereferenceChecker : Checker<"NullDereference">,
-  HelpText<"Check for dereferences of null pointers">,
-  DescFile<"DereferenceChecker.cpp">;
-
-def CallAndMessageChecker : Checker<"CallAndMessage">,
-  HelpText<"Check for logical errors for function calls and Objective-C message expressions (e.g., uninitialized arguments, null function pointers)">,
-  DescFile<"CallAndMessageChecker.cpp">;
-
-def NonNullParamChecker : Checker<"NonNullParamChecker">,
-  HelpText<"Check for null pointers passed as arguments to a function whose arguments are references or marked with the 'nonnull' attribute">,
-  DescFile<"NonNullParamChecker.cpp">;
-
-def VLASizeChecker : Checker<"VLASize">,
-  HelpText<"Check for declarations of VLA of undefined or zero size">,
-  DescFile<"VLASizeChecker.cpp">;
-
-def DivZeroChecker : Checker<"DivideZero">,
-  HelpText<"Check for division by zero">,
-  DescFile<"DivZeroChecker.cpp">;
-
-def UndefResultChecker : Checker<"UndefinedBinaryOperatorResult">,
-  HelpText<"Check for undefined results of binary operators">,
-  DescFile<"UndefResultChecker.cpp">;
-
-def StackAddrEscapeChecker : Checker<"StackAddressEscape">,
-  HelpText<"Check that addresses to stack memory do not escape the function">,
-  DescFile<"StackAddrEscapeChecker.cpp">;
-
-def DynamicTypePropagation : Checker<"DynamicTypePropagation">,
-  HelpText<"Generate dynamic type information">,
-  DescFile<"DynamicTypePropagation.cpp">;
-
-} // end "core"
-
-let ParentPackage = CoreAlpha in {
-
-def BoolAssignmentChecker : Checker<"BoolAssignment">,
-  HelpText<"Warn about assigning non-{0,1} values to Boolean variables">,
-  DescFile<"BoolAssignmentChecker.cpp">;
-
-def CastSizeChecker : Checker<"CastSize">,
-  HelpText<"Check when casting a malloc'ed type T, whether the size is a multiple of the size of T">,
-  DescFile<"CastSizeChecker.cpp">;
-
-def CastToStructChecker : Checker<"CastToStruct">,
-  HelpText<"Check for cast from non-struct pointer to struct pointer">,
-  DescFile<"CastToStructChecker.cpp">;
-
-def IdenticalExprChecker : Checker<"IdenticalExpr">,
-  HelpText<"Warn about unintended use of identical expressions in operators">,
-  DescFile<"IdenticalExprChecker.cpp">;
-
-def FixedAddressChecker : Checker<"FixedAddr">,
-  HelpText<"Check for assignment of a fixed address to a pointer">,
-  DescFile<"FixedAddressChecker.cpp">;
-
-def PointerArithChecker : Checker<"PointerArithm">,
-  HelpText<"Check for pointer arithmetic on locations other than array elements">,
-  DescFile<"PointerArithChecker">;
-
-def PointerSubChecker : Checker<"PointerSub">,
-  HelpText<"Check for pointer subtractions on two pointers pointing to different memory chunks">,
-  DescFile<"PointerSubChecker">;
-
-def SizeofPointerChecker : Checker<"SizeofPtr">,
-  HelpText<"Warn about unintended use of sizeof() on pointer expressions">,
-  DescFile<"CheckSizeofPointer.cpp">;
-
-def CallAndMessageUnInitRefArg : Checker<"CallAndMessageUnInitRefArg">,
-  HelpText<"Check for logical errors for function calls and Objective-C message expressions (e.g., uninitialized arguments, null function pointers, and pointer to undefined variables)">,
-  DescFile<"CallAndMessageChecker.cpp">;
-
-def TestAfterDivZeroChecker : Checker<"TestAfterDivZero">,
-  HelpText<"Check for division by variable that is later compared against 0. Either the comparison is useless or there is division by zero.">,
-  DescFile<"TestAfterDivZeroChecker.cpp">;
-
-def DynamicTypeChecker : Checker<"DynamicTypeChecker">,
-  HelpText<"Check for cases where the dynamic and the static type of an object are unrelated.">,
-  DescFile<"DynamicTypeChecker.cpp">;
-
-} // end "alpha.core"
-
-let ParentPackage = Nullability in {
-
-def NullPassedToNonnullChecker : Checker<"NullPassedToNonnull">,
-  HelpText<"Warns when a null pointer is passed to a pointer which has a _Nonnull type.">,
-  DescFile<"NullabilityChecker.cpp">;
-
-def NullReturnedFromNonnullChecker : Checker<"NullReturnedFromNonnull">,
-  HelpText<"Warns when a null pointer is returned from a function that has _Nonnull return type.">,
-  DescFile<"NullabilityChecker.cpp">;
-
-def NullableDereferencedChecker : Checker<"NullableDereferenced">,
-  HelpText<"Warns when a nullable pointer is dereferenced.">,
-  DescFile<"NullabilityChecker.cpp">;
-
-def NullablePassedToNonnullChecker : Checker<"NullablePassedToNonnull">,
-  HelpText<"Warns when a nullable pointer is passed to a pointer which has a _Nonnull type.">,
-  DescFile<"NullabilityChecker.cpp">;
-
-def NullableReturnedFromNonnullChecker : Checker<"NullablePassedToNonnull">,
-  HelpText<"Warns when a nullable pointer is returned from a function that has _Nonnull return type.">,
-  DescFile<"NullabilityChecker.cpp">;
-
-} // end "nullability"
-
-//===----------------------------------------------------------------------===//
-// Evaluate "builtin" functions.
-//===----------------------------------------------------------------------===//
-
-let ParentPackage = CoreBuiltin in {
-
-def NoReturnFunctionChecker : Checker<"NoReturnFunctions">,
-  HelpText<"Evaluate \"panic\" functions that are known to not return to the caller">,
-  DescFile<"NoReturnFunctionChecker.cpp">;
-
-def BuiltinFunctionChecker : Checker<"BuiltinFunctions">,
-  HelpText<"Evaluate compiler builtin functions (e.g., alloca())">,
-  DescFile<"BuiltinFunctionChecker.cpp">;
-
-} // end "core.builtin"
-
-//===----------------------------------------------------------------------===//
-// Uninitialized values checkers.
-//===----------------------------------------------------------------------===//
-
-let ParentPackage = CoreUninitialized in {
-
-def UndefinedArraySubscriptChecker : Checker<"ArraySubscript">,
-  HelpText<"Check for uninitialized values used as array subscripts">,
-  DescFile<"UndefinedArraySubscriptChecker.cpp">;
-
-def UndefinedAssignmentChecker : Checker<"Assign">,
-  HelpText<"Check for assigning uninitialized values">,
-  DescFile<"UndefinedAssignmentChecker.cpp">;
-
-def UndefBranchChecker : Checker<"Branch">,
-  HelpText<"Check for uninitialized values used as branch conditions">,
-  DescFile<"UndefBranchChecker.cpp">;
-
-def UndefCapturedBlockVarChecker : Checker<"CapturedBlockVariable">,
-  HelpText<"Check for blocks that capture uninitialized values">,
-  DescFile<"UndefCapturedBlockVarChecker.cpp">;
-
-def ReturnUndefChecker : Checker<"UndefReturn">,
-  HelpText<"Check for uninitialized values being returned to the caller">,
-  DescFile<"ReturnUndefChecker.cpp">;
-
-} // end "core.uninitialized"
-
-//===----------------------------------------------------------------------===//
-// C++ checkers.
-//===----------------------------------------------------------------------===//
-
-let ParentPackage = Cplusplus in {
-
-def NewDeleteChecker : Checker<"NewDelete">,
-  HelpText<"Check for double-free and use-after-free problems. Traces memory managed by new/delete.">,
-  DescFile<"MallocChecker.cpp">;
-
-def NewDeleteLeaksChecker : Checker<"NewDeleteLeaks">,
-  HelpText<"Check for memory leaks. Traces memory managed by new/delete.">,
-  DescFile<"MallocChecker.cpp">;
-
-} // end: "cplusplus"
-
-let ParentPackage = CplusplusAlpha in {
-
-def VirtualCallChecker : Checker<"VirtualCall">,
-  HelpText<"Check virtual function calls during construction or destruction">,
-  DescFile<"VirtualCallChecker.cpp">;
-
-} // end: "alpha.cplusplus"
-
-//===----------------------------------------------------------------------===//
-// Deadcode checkers.
-//===----------------------------------------------------------------------===//
-
-let ParentPackage = DeadCode in {
-
-def DeadStoresChecker : Checker<"DeadStores">,
-  HelpText<"Check for values stored to variables that are never read afterwards">,
-  DescFile<"DeadStoresChecker.cpp">;
-} // end DeadCode
-
-let ParentPackage = DeadCodeAlpha in {
-
-def UnreachableCodeChecker : Checker<"UnreachableCode">,
-  HelpText<"Check unreachable code">,
-  DescFile<"UnreachableCodeChecker.cpp">;
-
-} // end "alpha.deadcode"
-
-//===----------------------------------------------------------------------===//
-// Performance checkers.
-//===----------------------------------------------------------------------===//
-
-let ParentPackage = Performance in {
-
-def PaddingChecker : Checker<"Padding">,
-  HelpText<"Check for excessively padded structs.">,
-  DescFile<"PaddingChecker.cpp">;
-
-} // end: "padding"
-
-//===----------------------------------------------------------------------===//
-// Security checkers.
-//===----------------------------------------------------------------------===//
-
-let ParentPackage = InsecureAPI in {
-  def gets : Checker<"gets">,
-    HelpText<"Warn on uses of the 'gets' function">,
-    DescFile<"CheckSecuritySyntaxOnly.cpp">;
-  def getpw : Checker<"getpw">,
-    HelpText<"Warn on uses of the 'getpw' function">,
-    DescFile<"CheckSecuritySyntaxOnly.cpp">;
-  def mktemp : Checker<"mktemp">,
-    HelpText<"Warn on uses of the 'mktemp' function">,
-    DescFile<"CheckSecuritySyntaxOnly.cpp">;
-  def mkstemp : Checker<"mkstemp">,
-    HelpText<"Warn when 'mkstemp' is passed fewer than 6 X's in the format string">,
-    DescFile<"CheckSecuritySyntaxOnly.cpp">;
-  def rand : Checker<"rand">,
-    HelpText<"Warn on uses of the 'rand', 'random', and related functions">,
-    DescFile<"CheckSecuritySyntaxOnly.cpp">;
-  def strcpy : Checker<"strcpy">,
-    HelpText<"Warn on uses of the 'strcpy' and 'strcat' functions">,
-    DescFile<"CheckSecuritySyntaxOnly.cpp">;
-  def vfork : Checker<"vfork">,
-    HelpText<"Warn on uses of the 'vfork' function">,
-    DescFile<"CheckSecuritySyntaxOnly.cpp">;
-  def UncheckedReturn : Checker<"UncheckedReturn">,
-    HelpText<"Warn on uses of functions whose return values must be always checked">,
-    DescFile<"CheckSecuritySyntaxOnly.cpp">;
-}
-let ParentPackage = Security in {
-  def FloatLoopCounter : Checker<"FloatLoopCounter">,
-    HelpText<"Warn on using a floating point value as a loop counter (CERT: FLP30-C, FLP30-CPP)">,
-    DescFile<"CheckSecuritySyntaxOnly.cpp">;
-}
-
-let ParentPackage = SecurityAlpha in {
-
-def ArrayBoundChecker : Checker<"ArrayBound">,
-  HelpText<"Warn about buffer overflows (older checker)">,
-  DescFile<"ArrayBoundChecker.cpp">;
-
-def ArrayBoundCheckerV2 : Checker<"ArrayBoundV2">,
-  HelpText<"Warn about buffer overflows (newer checker)">,
-  DescFile<"ArrayBoundCheckerV2.cpp">;
-
-def ReturnPointerRangeChecker : Checker<"ReturnPtrRange">,
-  HelpText<"Check for an out-of-bound pointer being returned to callers">,
-  DescFile<"ReturnPointerRangeChecker.cpp">;
-
-def MallocOverflowSecurityChecker : Checker<"MallocOverflow">,
-  HelpText<"Check for overflows in the arguments to malloc()">,
-  DescFile<"MallocOverflowSecurityChecker.cpp">;
-
-} // end "alpha.security"
-
-//===----------------------------------------------------------------------===//
-// Taint checkers.
-//===----------------------------------------------------------------------===//
-
-let ParentPackage = Taint in {
-
-def GenericTaintChecker : Checker<"TaintPropagation">,
-  HelpText<"Generate taint information used by other checkers">,
-  DescFile<"GenericTaintChecker.cpp">;
-
-} // end "alpha.security.taint"
-
-//===----------------------------------------------------------------------===//
-// Unix API checkers.
-//===----------------------------------------------------------------------===//
-
-let ParentPackage = Unix in {
-
-def UnixAPIChecker : Checker<"API">,
-  HelpText<"Check calls to various UNIX/Posix functions">,
-  DescFile<"UnixAPIChecker.cpp">;
-
-def MallocChecker: Checker<"Malloc">,
-  HelpText<"Check for memory leaks, double free, and use-after-free problems. Traces memory managed by malloc()/free().">,
-  DescFile<"MallocChecker.cpp">;
-
-def MallocSizeofChecker : Checker<"MallocSizeof">,
-  HelpText<"Check for dubious malloc arguments involving sizeof">,
-  DescFile<"MallocSizeofChecker.cpp">;
-
-def MismatchedDeallocatorChecker : Checker<"MismatchedDeallocator">,
-  HelpText<"Check for mismatched deallocators.">,
-  DescFile<"MallocChecker.cpp">;
-
-def VforkChecker : Checker<"Vfork">,
-  HelpText<"Check for proper usage of vfork">,
-  DescFile<"VforkChecker.cpp">;
-
-} // end "unix"
-
-let ParentPackage = UnixAlpha in {
-
-def ChrootChecker : Checker<"Chroot">,
-  HelpText<"Check improper use of chroot">,
-  DescFile<"ChrootChecker.cpp">;
-
-def PthreadLockChecker : Checker<"PthreadLock">,
-  HelpText<"Simple lock -> unlock checker">,
-  DescFile<"PthreadLockChecker.cpp">;
-
-def StreamChecker : Checker<"Stream">,
-  HelpText<"Check stream handling functions">,
-  DescFile<"StreamChecker.cpp">;
-
-def SimpleStreamChecker : Checker<"SimpleStream">,
-  HelpText<"Check for misuses of stream APIs">,
-  DescFile<"SimpleStreamChecker.cpp">;
-
-} // end "alpha.unix"
-
-let ParentPackage = CString in {
-
-def CStringNullArg : Checker<"NullArg">,
-  HelpText<"Check for null pointers being passed as arguments to C string functions">,
-  DescFile<"CStringChecker.cpp">;
-
-def CStringSyntaxChecker : Checker<"BadSizeArg">,
-  HelpText<"Check the size argument passed into C string functions for common erroneous patterns">,
-  DescFile<"CStringSyntaxChecker.cpp">;
-}
-
-let ParentPackage = CStringAlpha in {
-
-def CStringOutOfBounds : Checker<"OutOfBounds">,
-  HelpText<"Check for out-of-bounds access in string functions">,
-  DescFile<"CStringChecker.cpp">;
-
-def CStringBufferOverlap : Checker<"BufferOverlap">,
-  HelpText<"Checks for overlap in two buffer arguments">,
-  DescFile<"CStringChecker.cpp">;
-
-def CStringNotNullTerm : Checker<"NotNullTerminated">,
-  HelpText<"Check for arguments which are not null-terminating strings">,
-  DescFile<"CStringChecker.cpp">;
-}
-
-//===----------------------------------------------------------------------===//
-// Mac OS X, Cocoa, and Core Foundation checkers.
-//===----------------------------------------------------------------------===//
-
-let ParentPackage = OSX in {
-
-def MacOSXAPIChecker : Checker<"API">,
-  InPackage<OSX>,
-  HelpText<"Check for proper uses of various Apple APIs">,
-  DescFile<"MacOSXAPIChecker.cpp">;
-
-def MacOSKeychainAPIChecker : Checker<"SecKeychainAPI">,
-  InPackage<OSX>,
-  HelpText<"Check for proper uses of Secure Keychain APIs">,
-  DescFile<"MacOSKeychainAPIChecker.cpp">;
-
-} // end "osx"
-
-let ParentPackage = Cocoa in {
-
-def ObjCAtSyncChecker : Checker<"AtSync">,
-  HelpText<"Check for nil pointers used as mutexes for @synchronized">,
-  DescFile<"ObjCAtSyncChecker.cpp">;
-
-def NilArgChecker : Checker<"NilArg">,
-  HelpText<"Check for prohibited nil arguments to ObjC method calls">,
-  DescFile<"BasicObjCFoundationChecks.cpp">;
-
-def ClassReleaseChecker : Checker<"ClassRelease">,
-  HelpText<"Check for sending 'retain', 'release', or 'autorelease' directly to a Class">,
-  DescFile<"BasicObjCFoundationChecks.cpp">;
-
-def VariadicMethodTypeChecker : Checker<"VariadicMethodTypes">,
-  HelpText<"Check for passing non-Objective-C types to variadic collection "
-           "initialization methods that expect only Objective-C types">,
-  DescFile<"BasicObjCFoundationChecks.cpp">;
-
-def NSAutoreleasePoolChecker : Checker<"NSAutoreleasePool">,
-  HelpText<"Warn for suboptimal uses of NSAutoreleasePool in Objective-C GC mode">,
-  DescFile<"NSAutoreleasePoolChecker.cpp">;
-
-def ObjCMethSigsChecker : Checker<"IncompatibleMethodTypes">,
-  HelpText<"Warn about Objective-C method signatures with type incompatibilities">,
-  DescFile<"CheckObjCInstMethSignature.cpp">;
-
-def ObjCUnusedIvarsChecker : Checker<"UnusedIvars">,
-  HelpText<"Warn about private ivars that are never used">,
-  DescFile<"ObjCUnusedIVarsChecker.cpp">;
-
-def ObjCSelfInitChecker : Checker<"SelfInit">,
-  HelpText<"Check that 'self' is properly initialized inside an initializer method">,
-  DescFile<"ObjCSelfInitChecker.cpp">;
-
-def ObjCLoopChecker : Checker<"Loops">,
-  HelpText<"Improved modeling of loops using Cocoa collection types">,
-  DescFile<"BasicObjCFoundationChecks.cpp">;
-
-def ObjCNonNilReturnValueChecker : Checker<"NonNilReturnValue">,
-  HelpText<"Model the APIs that are guaranteed to return a non-nil value">,
-  DescFile<"BasicObjCFoundationChecks.cpp">;
-
-def ObjCSuperCallChecker : Checker<"MissingSuperCall">,
-  HelpText<"Warn about Objective-C methods that lack a necessary call to super">,
-  DescFile<"ObjCMissingSuperCallChecker.cpp">;
-
-def NSErrorChecker : Checker<"NSError">,
-  HelpText<"Check usage of NSError** parameters">,
-  DescFile<"NSErrorChecker.cpp">;
-
-def RetainCountChecker : Checker<"RetainCount">,
-  HelpText<"Check for leaks and improper reference count management">,
-  DescFile<"RetainCountChecker.cpp">;
-
-def ObjCGenericsChecker : Checker<"ObjCGenerics">,
-  HelpText<"Check for type errors when using Objective-C generics">,
-  DescFile<"DynamicTypePropagation.cpp">;
-
-} // end "osx.cocoa"
-
-let ParentPackage = CocoaAlpha in {
-
-def ObjCDeallocChecker : Checker<"Dealloc">,
-  HelpText<"Warn about Objective-C classes that lack a correct implementation of -dealloc">,
-  DescFile<"CheckObjCDealloc.cpp">;
-
-def InstanceVariableInvalidation : Checker<"InstanceVariableInvalidation">,
-  HelpText<"Check that the invalidatable instance variables are invalidated in the methods annotated with objc_instance_variable_invalidator">,
-  DescFile<"IvarInvalidationChecker.cpp">;
-
-def MissingInvalidationMethod : Checker<"MissingInvalidationMethod">,
-  HelpText<"Check that the invalidation methods are present in classes that contain invalidatable instance variables">,
-  DescFile<"IvarInvalidationChecker.cpp">;
-
-def DirectIvarAssignment : Checker<"DirectIvarAssignment">,
-  HelpText<"Check for direct assignments to instance variables">,
-  DescFile<"DirectIvarAssignment.cpp">;
-
-def DirectIvarAssignmentForAnnotatedFunctions : Checker<"DirectIvarAssignmentForAnnotatedFunctions">,
-  HelpText<"Check for direct assignments to instance variables in the methods annotated with objc_no_direct_instance_variable_assignment">,
-  DescFile<"DirectIvarAssignment.cpp">;
-
-} // end "alpha.osx.cocoa"
-
-let ParentPackage = CoreFoundation in {
-
-def CFNumberCreateChecker : Checker<"CFNumber">,
-  HelpText<"Check for proper uses of CFNumberCreate">,
-  DescFile<"BasicObjCFoundationChecks.cpp">;
-
-def CFRetainReleaseChecker : Checker<"CFRetainRelease">,
-  HelpText<"Check for null arguments to CFRetain/CFRelease/CFMakeCollectable">,
-  DescFile<"BasicObjCFoundationChecks.cpp">;
-
-def CFErrorChecker : Checker<"CFError">,
-  HelpText<"Check usage of CFErrorRef* parameters">,
-  DescFile<"NSErrorChecker.cpp">;
-}
-
-let ParentPackage = Containers in {
-def ObjCContainersASTChecker : Checker<"PointerSizedValues">,
-  HelpText<"Warns if 'CFArray', 'CFDictionary', 'CFSet' are created with non-pointer-size values">,
-  DescFile<"ObjCContainersASTChecker.cpp">;
-
-def ObjCContainersChecker : Checker<"OutOfBounds">,
-  HelpText<"Checks for index out-of-bounds when using 'CFArray' API">,
-  DescFile<"ObjCContainersChecker.cpp">;
-
-}
-
-let ParentPackage = LocalizabilityOptIn in {
-def NonLocalizedStringChecker : Checker<"NonLocalizedStringChecker">,
-  HelpText<"Warns about uses of non-localized NSStrings passed to UI methods expecting localized NSStrings">,
-  DescFile<"LocalizationChecker.cpp">;
-
-def EmptyLocalizationContextChecker : Checker<"EmptyLocalizationContextChecker">,
-  HelpText<"Check that NSLocalizedString macros include a comment for context">,
-  DescFile<"LocalizationChecker.cpp">;
-}
-
-let ParentPackage = LocalizabilityAlpha in {
-def PluralMisuseChecker : Checker<"PluralMisuseChecker">,
-  HelpText<"Warns against using one vs. many plural pattern in code when generating localized strings.">,
-  DescFile<"LocalizationChecker.cpp">;
-}
-
-//===----------------------------------------------------------------------===//
-// Checkers for LLVM development.
-//===----------------------------------------------------------------------===//
-
-def LLVMConventionsChecker : Checker<"Conventions">,
-  InPackage<LLVM>,
-  HelpText<"Check code for LLVM codebase conventions">,
-  DescFile<"LLVMConventionsChecker.cpp">;
-
-//===----------------------------------------------------------------------===//
-// Debugging checkers (for analyzer development).
-//===----------------------------------------------------------------------===//
-
-let ParentPackage = Debug in {
-
-def DominatorsTreeDumper : Checker<"DumpDominators">,
-  HelpText<"Print the dominance tree for a given CFG">,
-  DescFile<"DebugCheckers.cpp">;
-
-def LiveVariablesDumper : Checker<"DumpLiveVars">,
-  HelpText<"Print results of live variable analysis">,
-  DescFile<"DebugCheckers.cpp">;
-
-def CFGViewer : Checker<"ViewCFG">,
-  HelpText<"View Control-Flow Graphs using GraphViz">,
-  DescFile<"DebugCheckers.cpp">;
-
-def CFGDumper : Checker<"DumpCFG">,
-  HelpText<"Display Control-Flow Graphs">,
-  DescFile<"DebugCheckers.cpp">;
-
-def CallGraphViewer : Checker<"ViewCallGraph">,
-  HelpText<"View Call Graph using GraphViz">,
-  DescFile<"DebugCheckers.cpp">;
-
-def CallGraphDumper : Checker<"DumpCallGraph">,
-  HelpText<"Display Call Graph">,
-  DescFile<"DebugCheckers.cpp">;
-
-def ConfigDumper : Checker<"ConfigDumper">,
-  HelpText<"Dump config table">,
-  DescFile<"DebugCheckers.cpp">;
-
-def TraversalDumper : Checker<"DumpTraversal">,
-  HelpText<"Print branch conditions as they are traversed by the engine">,
-  DescFile<"TraversalChecker.cpp">;
-
-def CallDumper : Checker<"DumpCalls">,
-  HelpText<"Print calls as they are traversed by the engine">,
-  DescFile<"TraversalChecker.cpp">;
-
-def AnalyzerStatsChecker : Checker<"Stats">,
-  HelpText<"Emit warnings with analyzer statistics">,
-  DescFile<"AnalyzerStatsChecker.cpp">;
-
-def TaintTesterChecker : Checker<"TaintTest">,
-  HelpText<"Mark tainted symbols as such.">,
-  DescFile<"TaintTesterChecker.cpp">;
-
-def ExprInspectionChecker : Checker<"ExprInspection">,
-  HelpText<"Check the analyzer's understanding of expressions">,
-  DescFile<"ExprInspectionChecker.cpp">;
-
-def ExplodedGraphViewer : Checker<"ViewExplodedGraph">,
-  HelpText<"View Exploded Graphs using GraphViz">,
-  DescFile<"DebugCheckers.cpp">;
-
-def BugHashDumper : Checker<"DumpBugHash">,
-  HelpText<"Dump the bug hash for all statements.">,
-  DescFile<"DebugCheckers.cpp">;
-
-} // end "debug"
diff --git a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/ChrootChecker.cpp b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/ChrootChecker.cpp
index 3ad1996db893..14587fb5163b 100644
--- a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/ChrootChecker.cpp
+++ b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/ChrootChecker.cpp
@@ -1,4 +1,4 @@
-//===- Chrootchecker.cpp -------- Basic security checks ----------*- C++ -*-==//
+//===- Chrootchecker.cpp -------- Basic security checks ---------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -20,6 +20,7 @@
 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/SymbolManager.h"
 #include "llvm/ADT/ImmutableMap.h"
+
 using namespace clang;
 using namespace ento;
 
@@ -148,8 +149,6 @@ void ChrootChecker::checkPreStmt(const CallExpr *CE, CheckerContext &C) const {
         C.emitReport(llvm::make_unique<BugReport>(
             *BT_BreakJail, BT_BreakJail->getDescription(), N));
       }
-
-  return;
 }
 
 void ento::registerChrootChecker(CheckerManager &mgr) {
diff --git a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/ClangCheckers.cpp b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/ClangCheckers.cpp
index 77a5a7226453..fb9e366c3de0 100644
--- a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/ClangCheckers.cpp
+++ b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/ClangCheckers.cpp
@@ -27,6 +27,6 @@ void ento::registerBuiltinCheckers(CheckerRegistry &registry) {
 #define GET_CHECKERS
 #define CHECKER(FULLNAME,CLASS,DESCFILE,HELPTEXT,GROUPINDEX,HIDDEN)    \
   registry.addChecker(register##CLASS, FULLNAME, HELPTEXT);
-#include "Checkers.inc"
+#include "clang/StaticAnalyzer/Checkers/Checkers.inc"
 #undef GET_CHECKERS
 }
diff --git a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/ClangSACheckers.h b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/ClangSACheckers.h
index 05b4a61c5af1..d6e96f27a75e 100644
--- a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/ClangSACheckers.h
+++ b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/ClangSACheckers.h
@@ -26,7 +26,7 @@ class CheckerRegistry;
 #define GET_CHECKERS
 #define CHECKER(FULLNAME,CLASS,CXXFILE,HELPTEXT,GROUPINDEX,HIDDEN)    \
   void register##CLASS(CheckerManager &mgr);
-#include "Checkers.inc"
+#include "clang/StaticAnalyzer/Checkers/Checkers.inc"
 #undef CHECKER
 #undef GET_CHECKERS
 
diff --git a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/DeadStoresChecker.cpp b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/DeadStoresChecker.cpp
index f2a269a3335c..8ca2a24cffe7 100644
--- a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/DeadStoresChecker.cpp
+++ b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/DeadStoresChecker.cpp
@@ -278,6 +278,8 @@ public:
           RHS = RHS->IgnoreParenCasts();
 
           QualType T = VD->getType();
+          if (T.isVolatileQualified())
+            return;
           if (T->isPointerType() || T->isObjCObjectPointerType()) {
             if (RHS->isNullPointerConstant(Ctx, Expr::NPC_ValueDependentIsNull))
               return;
diff --git a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/DereferenceChecker.cpp b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/DereferenceChecker.cpp
index f216f696ef65..152b937bb03f 100644
--- a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/DereferenceChecker.cpp
+++ b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/DereferenceChecker.cpp
@@ -230,7 +230,7 @@ void DereferenceChecker::checkLocation(SVal l, bool isLoad, const Stmt* S,
     // dereference.
     if (ExplodedNode *N = C.generateSink(nullState, C.getPredecessor())) {
       ImplicitNullDerefEvent event = {l, isLoad, N, &C.getBugReporter(),
-                                      /*IsDirectDereference=*/false};
+                                      /*IsDirectDereference=*/true};
       dispatchEvent(event);
     }
   }
@@ -272,7 +272,7 @@ void DereferenceChecker::checkBind(SVal L, SVal V, const Stmt *S,
     if (ExplodedNode *N = C.generateSink(StNull, C.getPredecessor())) {
       ImplicitNullDerefEvent event = {V, /*isLoad=*/true, N,
                                       &C.getBugReporter(),
-                                      /*IsDirectDereference=*/false};
+                                      /*IsDirectDereference=*/true};
       dispatchEvent(event);
     }
   }
diff --git a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/DirectIvarAssignment.cpp b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/DirectIvarAssignment.cpp
index ad478cbf7829..5efb9096f2ff 100644
--- a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/DirectIvarAssignment.cpp
+++ b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/DirectIvarAssignment.cpp
@@ -123,7 +123,7 @@ void DirectIvarAssignment::checkASTDecl(const ObjCImplementationDecl *D,
   IvarToPropertyMapTy IvarToPropMap;
 
   // Find all properties for this class.
-  for (const auto *PD : InterD->properties()) {
+  for (const auto *PD : InterD->instance_properties()) {
     // Find the corresponding IVar.
     const ObjCIvarDecl *ID = findPropertyBackingIvar(PD, InterD,
                                                      Mgr.getASTContext());
diff --git a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/DynamicTypePropagation.cpp b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/DynamicTypePropagation.cpp
index 30f629830c61..b8e43325da04 100644
--- a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/DynamicTypePropagation.cpp
+++ b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/DynamicTypePropagation.cpp
@@ -1,4 +1,4 @@
-//== DynamicTypePropagation.cpp -------------------------------- -*- C++ -*--=//
+//===- DynamicTypePropagation.cpp ------------------------------*- C++ -*--===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -97,6 +97,7 @@ class DynamicTypePropagation:
                          const ObjCObjectPointerType *To, ExplodedNode *N,
                          SymbolRef Sym, CheckerContext &C,
                          const Stmt *ReportedNode = nullptr) const;
+
 public:
   void checkPreCall(const CallEvent &Call, CheckerContext &C) const;
   void checkPostCall(const CallEvent &Call, CheckerContext &C) const;
@@ -109,7 +110,7 @@ public:
   /// This value is set to true, when the Generics checker is turned on.
   DefaultBool CheckGenerics;
 };
-}
+} // end anonymous namespace
 
 void DynamicTypePropagation::checkDeadSymbols(SymbolReaper &SR,
                                               CheckerContext &C) const {
@@ -151,7 +152,6 @@ static void recordFixedType(const MemRegion *Region, const CXXMethodDecl *MD,
   ProgramStateRef State = C.getState();
   State = setDynamicTypeInfo(State, Region, Ty, /*CanBeSubclass=*/false);
   C.addTransition(State);
-  return;
 }
 
 void DynamicTypePropagation::checkPreCall(const CallEvent &Call,
@@ -387,6 +387,14 @@ static const ObjCObjectPointerType *getMostInformativeDerivedClassImpl(
     }
     return From;
   }
+
+  if (To->getObjectType()->getSuperClassType().isNull()) {
+    // If To has no super class and From and To aren't the same then
+    // To was not actually a descendent of From. In this case the best we can
+    // do is 'From'.
+    return From;
+  }
+
   const auto *SuperOfTo =
       To->getObjectType()->getSuperClassType()->getAs<ObjCObjectType>();
   assert(SuperOfTo);
@@ -444,6 +452,23 @@ storeWhenMoreInformative(ProgramStateRef &State, SymbolRef Sym,
                          const ObjCObjectPointerType *StaticLowerBound,
                          const ObjCObjectPointerType *StaticUpperBound,
                          ASTContext &C) {
+  // TODO: The above 4 cases are not exhaustive. In particular, it is possible
+  // for Current to be incomparable with StaticLowerBound, StaticUpperBound,
+  // or both.
+  //
+  // For example, suppose Foo<T> and Bar<T> are unrelated types.
+  //
+  //  Foo<T> *f = ...
+  //  Bar<T> *b = ...
+  //
+  //  id t1 = b;
+  //  f = t1;
+  //  id t2 = f; // StaticLowerBound is Foo<T>, Current is Bar<T>
+  //
+  // We should either constrain the callers of this function so that the stated
+  // preconditions hold (and assert it) or rewrite the function to expicitly
+  // handle the additional cases.
+
   // Precondition
   assert(StaticUpperBound->isSpecialized() ||
          StaticLowerBound->isSpecialized());
@@ -772,7 +797,6 @@ void DynamicTypePropagation::checkPostObjCMessage(const ObjCMethodCall &M,
   // class. This method is provided by the runtime and available on all classes.
   if (MessageExpr->getReceiverKind() == ObjCMessageExpr::Class &&
       Sel.getAsString() == "class") {
-
     QualType ReceiverType = MessageExpr->getClassReceiver();
     const auto *ReceiverClassType = ReceiverType->getAs<ObjCObjectType>();
     QualType ReceiverClassPointerType =
diff --git a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/ExprInspectionChecker.cpp b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/ExprInspectionChecker.cpp
index 8f6c20ab1906..31e9150cc15b 100644
--- a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/ExprInspectionChecker.cpp
+++ b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/ExprInspectionChecker.cpp
@@ -11,6 +11,7 @@
 #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
 #include "clang/StaticAnalyzer/Core/Checker.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
+#include "clang/StaticAnalyzer/Checkers/SValExplainer.h"
 #include "llvm/ADT/StringSwitch.h"
 
 using namespace clang;
@@ -25,17 +26,21 @@ class ExprInspectionChecker : public Checker<eval::Call, check::DeadSymbols> {
   void analyzerWarnIfReached(const CallExpr *CE, CheckerContext &C) const;
   void analyzerCrash(const CallExpr *CE, CheckerContext &C) const;
   void analyzerWarnOnDeadSymbol(const CallExpr *CE, CheckerContext &C) const;
+  void analyzerExplain(const CallExpr *CE, CheckerContext &C) const;
+  void analyzerGetExtent(const CallExpr *CE, CheckerContext &C) const;
 
   typedef void (ExprInspectionChecker::*FnCheck)(const CallExpr *,
                                                  CheckerContext &C) const;
 
+  void reportBug(llvm::StringRef Msg, CheckerContext &C) const;
+
 public:
   bool evalCall(const CallExpr *CE, CheckerContext &C) const;
   void checkDeadSymbols(SymbolReaper &SymReaper, CheckerContext &C) const;
 };
 }
 
-REGISTER_SET_WITH_PROGRAMSTATE(MarkedSymbols, const void *)
+REGISTER_SET_WITH_PROGRAMSTATE(MarkedSymbols, SymbolRef)
 
 bool ExprInspectionChecker::evalCall(const CallExpr *CE,
                                      CheckerContext &C) const {
@@ -50,6 +55,8 @@ bool ExprInspectionChecker::evalCall(const CallExpr *CE,
           &ExprInspectionChecker::analyzerWarnIfReached)
     .Case("clang_analyzer_warnOnDeadSymbol",
           &ExprInspectionChecker::analyzerWarnOnDeadSymbol)
+    .Case("clang_analyzer_explain", &ExprInspectionChecker::analyzerExplain)
+    .Case("clang_analyzer_getExtent", &ExprInspectionChecker::analyzerGetExtent)
     .Default(nullptr);
 
   if (!Handler)
@@ -91,6 +98,18 @@ static const char *getArgumentValueString(const CallExpr *CE,
   }
 }
 
+void ExprInspectionChecker::reportBug(llvm::StringRef Msg,
+                                      CheckerContext &C) const {
+  if (!BT)
+    BT.reset(new BugType(this, "Checking analyzer assumptions", "debug"));
+
+  ExplodedNode *N = C.generateNonFatalErrorNode();
+  if (!N)
+    return;
+
+  C.emitReport(llvm::make_unique<BugReport>(*BT, Msg, N));
+}
+
 void ExprInspectionChecker::analyzerEval(const CallExpr *CE,
                                          CheckerContext &C) const {
   const LocationContext *LC = C.getPredecessor()->getLocationContext();
@@ -100,26 +119,12 @@ void ExprInspectionChecker::analyzerEval(const CallExpr *CE,
   if (LC->getCurrentStackFrame()->getParent() != nullptr)
     return;
 
-  if (!BT)
-    BT.reset(new BugType(this, "Checking analyzer assumptions", "debug"));
-
-  ExplodedNode *N = C.generateNonFatalErrorNode();
-  if (!N)
-    return;
-  C.emitReport(
-      llvm::make_unique<BugReport>(*BT, getArgumentValueString(CE, C), N));
+  reportBug(getArgumentValueString(CE, C), C);
 }
 
 void ExprInspectionChecker::analyzerWarnIfReached(const CallExpr *CE,
                                                   CheckerContext &C) const {
-
-  if (!BT)
-    BT.reset(new BugType(this, "Checking analyzer assumptions", "debug"));
-
-  ExplodedNode *N = C.generateNonFatalErrorNode();
-  if (!N)
-    return;
-  C.emitReport(llvm::make_unique<BugReport>(*BT, "REACHABLE", N));
+  reportBug("REACHABLE", C);
 }
 
 void ExprInspectionChecker::analyzerCheckInlined(const CallExpr *CE,
@@ -134,14 +139,32 @@ void ExprInspectionChecker::analyzerCheckInlined(const CallExpr *CE,
   if (LC->getCurrentStackFrame()->getParent() == nullptr)
     return;
 
-  if (!BT)
-    BT.reset(new BugType(this, "Checking analyzer assumptions", "debug"));
+  reportBug(getArgumentValueString(CE, C), C);
+}
 
-  ExplodedNode *N = C.generateNonFatalErrorNode();
-  if (!N)
-    return;
-  C.emitReport(
-      llvm::make_unique<BugReport>(*BT, getArgumentValueString(CE, C), N));
+void ExprInspectionChecker::analyzerExplain(const CallExpr *CE,
+                                            CheckerContext &C) const {
+  if (CE->getNumArgs() == 0)
+    reportBug("Missing argument for explaining", C);
+
+  SVal V = C.getSVal(CE->getArg(0));
+  SValExplainer Ex(C.getASTContext());
+  reportBug(Ex.Visit(V), C);
+}
+
+void ExprInspectionChecker::analyzerGetExtent(const CallExpr *CE,
+                                              CheckerContext &C) const {
+  if (CE->getNumArgs() == 0)
+    reportBug("Missing region for obtaining extent", C);
+
+  auto MR = dyn_cast_or_null<SubRegion>(C.getSVal(CE->getArg(0)).getAsRegion());
+  if (!MR)
+    reportBug("Obtaining extent of a non-region", C);
+
+  ProgramStateRef State = C.getState();
+  State = State->BindExpr(CE, C.getLocationContext(),
+                          MR->getExtent(C.getSValBuilder()));
+  C.addTransition(State);
 }
 
 void ExprInspectionChecker::analyzerWarnOnDeadSymbol(const CallExpr *CE,
@@ -163,20 +186,14 @@ void ExprInspectionChecker::checkDeadSymbols(SymbolReaper &SymReaper,
   ProgramStateRef State = C.getState();
   const MarkedSymbolsTy &Syms = State->get<MarkedSymbols>();
   for (auto I = Syms.begin(), E = Syms.end(); I != E; ++I) {
-    SymbolRef Sym = static_cast<SymbolRef>(*I);
+    SymbolRef Sym = *I;
     if (!SymReaper.isDead(Sym))
       continue;
 
-    if (!BT)
-      BT.reset(new BugType(this, "Checking analyzer assumptions", "debug"));
-
-    ExplodedNode *N = C.generateNonFatalErrorNode();
-    if (!N)
-      return;
-
-    C.emitReport(llvm::make_unique<BugReport>(*BT, "SYMBOL DEAD", N));
-    C.addTransition(State->remove<MarkedSymbols>(Sym), N);
+    reportBug("SYMBOL DEAD", C);
+    State = State->remove<MarkedSymbols>(Sym);
   }
+  C.addTransition(State);
 }
 
 void ExprInspectionChecker::analyzerCrash(const CallExpr *CE,
diff --git a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/IvarInvalidationChecker.cpp b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/IvarInvalidationChecker.cpp
index dffff38c91a2..8076ca09591f 100644
--- a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/IvarInvalidationChecker.cpp
+++ b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/IvarInvalidationChecker.cpp
@@ -1,4 +1,4 @@
-//=- IvarInvalidationChecker.cpp - -*- C++ -------------------------------*-==//
+//===- IvarInvalidationChecker.cpp ------------------------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -43,7 +43,6 @@ using namespace clang;
 using namespace ento;
 
 namespace {
-
 struct ChecksFilter {
   /// Check for missing invalidation method declarations.
   DefaultBool check_MissingInvalidationMethod;
@@ -55,7 +54,6 @@ struct ChecksFilter {
 };
 
 class IvarInvalidationCheckerImpl {
-
   typedef llvm::SmallSetVector<const ObjCMethodDecl*, 2> MethodSet;
   typedef llvm::DenseMap<const ObjCMethodDecl*,
                          const ObjCIvarDecl*> MethToIvarMapTy;
@@ -64,7 +62,6 @@ class IvarInvalidationCheckerImpl {
   typedef llvm::DenseMap<const ObjCIvarDecl*,
                          const ObjCPropertyDecl*> IvarToPropMapTy;
 
-
   struct InvalidationInfo {
     /// Has the ivar been invalidated?
     bool IsInvalidated;
@@ -167,7 +164,7 @@ class IvarInvalidationCheckerImpl {
     void VisitObjCMessageExpr(const ObjCMessageExpr *ME);
 
     void VisitChildren(const Stmt *S) {
-      for (const Stmt *Child : S->children()) {
+      for (const auto *Child : S->children()) {
         if (Child)
           this->Visit(Child);
         if (CalledAnotherInvalidationMethod)
@@ -208,6 +205,7 @@ class IvarInvalidationCheckerImpl {
                                   const IvarToPropMapTy &IvarToPopertyMap,
                                   const ObjCInterfaceDecl *InterfaceD,
                                   bool MissingDeclaration) const;
+
   void reportIvarNeedsInvalidation(const ObjCIvarDecl *IvarD,
                                    const IvarToPropMapTy &IvarToPopertyMap,
                                    const ObjCMethodDecl *MethodD) const;
@@ -276,8 +274,6 @@ void IvarInvalidationCheckerImpl::containsInvalidationMethod(
     }
     return;
   }
-
-  return;
 }
 
 bool IvarInvalidationCheckerImpl::trackIvar(const ObjCIvarDecl *Iv,
@@ -390,6 +386,8 @@ visit(const ObjCImplementationDecl *ImplD) const {
   for (ObjCInterfaceDecl::PropertyMap::iterator
       I = PropMap.begin(), E = PropMap.end(); I != E; ++I) {
     const ObjCPropertyDecl *PD = I->second;
+    if (PD->isClassProperty())
+      continue;
 
     const ObjCIvarDecl *ID = findPropertyBackingIvar(PD, InterfaceD, Ivars,
                                                      &FirstIvarDecl);
@@ -584,8 +582,7 @@ void IvarInvalidationCheckerImpl::MethodCrawler::markInvalidated(
     // If InvalidationMethod is present, we are processing the message send and
     // should ensure we are invalidating with the appropriate method,
     // otherwise, we are processing setting to 'nil'.
-    if (!InvalidationMethod ||
-        (InvalidationMethod && I->second.hasMethod(InvalidationMethod)))
+    if (!InvalidationMethod || I->second.hasMethod(InvalidationMethod))
       IVars.erase(I);
   }
 }
@@ -722,11 +719,10 @@ void IvarInvalidationCheckerImpl::MethodCrawler::VisitObjCMessageExpr(
 
   VisitStmt(ME);
 }
-}
+} // end anonymous namespace
 
 // Register the checkers.
 namespace {
-
 class IvarInvalidationChecker :
   public Checker<check::ASTDecl<ObjCImplementationDecl> > {
 public:
@@ -738,7 +734,7 @@ public:
     Walker.visit(D);
   }
 };
-}
+} // end anonymous namespace
 
 #define REGISTER_CHECKER(name)                                                 \
   void ento::register##name(CheckerManager &mgr) {                             \
@@ -750,4 +746,3 @@ public:
 
 REGISTER_CHECKER(InstanceVariableInvalidation)
 REGISTER_CHECKER(MissingInvalidationMethod)
-
diff --git a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/LocalizationChecker.cpp b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/LocalizationChecker.cpp
index 56346cd4f706..7be2f574f0e9 100644
--- a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/LocalizationChecker.cpp
+++ b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/LocalizationChecker.cpp
@@ -111,6 +111,30 @@ NonLocalizedStringChecker::NonLocalizedStringChecker() {
                        "Localizability Issue (Apple)"));
 }
 
+namespace {
+class NonLocalizedStringBRVisitor final
+    : public BugReporterVisitorImpl<NonLocalizedStringBRVisitor> {
+
+  const MemRegion *NonLocalizedString;
+  bool Satisfied;
+
+public:
+  NonLocalizedStringBRVisitor(const MemRegion *NonLocalizedString)
+      : NonLocalizedString(NonLocalizedString), Satisfied(false) {
+        assert(NonLocalizedString);
+  }
+
+  PathDiagnosticPiece *VisitNode(const ExplodedNode *Succ,
+                                 const ExplodedNode *Pred,
+                                 BugReporterContext &BRC,
+                                 BugReport &BR) override;
+
+  void Profile(llvm::FoldingSetNodeID &ID) const override {
+    ID.Add(NonLocalizedString);
+  }
+};
+} // End anonymous namespace.
+
 #define NEW_RECEIVER(receiver)                                                 \
   llvm::DenseMap<Selector, uint8_t> &receiver##M =                             \
       UIMethods.insert({&Ctx.Idents.get(#receiver),                            \
@@ -619,11 +643,46 @@ void NonLocalizedStringChecker::setNonLocalizedState(const SVal S,
   }
 }
 
+
+static bool isDebuggingName(std::string name) {
+  return StringRef(name).lower().find("debug") != StringRef::npos;
+}
+
+/// Returns true when, heuristically, the analyzer may be analyzing debugging
+/// code. We use this to suppress localization diagnostics in un-localized user
+/// interfaces that are only used for debugging and are therefore not user
+/// facing.
+static bool isDebuggingContext(CheckerContext &C) {
+  const Decl *D = C.getCurrentAnalysisDeclContext()->getDecl();
+  if (!D)
+    return false;
+
+  if (auto *ND = dyn_cast<NamedDecl>(D)) {
+    if (isDebuggingName(ND->getNameAsString()))
+      return true;
+  }
+
+  const DeclContext *DC = D->getDeclContext();
+
+  if (auto *CD = dyn_cast<ObjCContainerDecl>(DC)) {
+    if (isDebuggingName(CD->getNameAsString()))
+      return true;
+  }
+
+  return false;
+}
+
+
 /// Reports a localization error for the passed in method call and SVal
 void NonLocalizedStringChecker::reportLocalizationError(
     SVal S, const ObjCMethodCall &M, CheckerContext &C,
     int argumentNumber) const {
 
+  // Don't warn about localization errors in classes and methods that
+  // may be debug code.
+  if (isDebuggingContext(C))
+    return;
+
   ExplodedNode *ErrNode = C.getPredecessor();
   static CheckerProgramPointTag Tag("NonLocalizedStringChecker",
                                     "UnlocalizedString");
@@ -641,6 +700,11 @@ void NonLocalizedStringChecker::reportLocalizationError(
     R->addRange(M.getSourceRange());
   }
   R->markInteresting(S);
+
+  const MemRegion *StringRegion = S.getAsRegion();
+  if (StringRegion)
+    R->addVisitor(llvm::make_unique<NonLocalizedStringBRVisitor>(StringRegion));
+
   C.emitReport(std::move(R));
 }
 
@@ -831,6 +895,41 @@ void NonLocalizedStringChecker::checkPostStmt(const ObjCStringLiteral *SL,
   setNonLocalizedState(sv, C);
 }
 
+PathDiagnosticPiece *
+NonLocalizedStringBRVisitor::VisitNode(const ExplodedNode *Succ,
+                                       const ExplodedNode *Pred,
+                                       BugReporterContext &BRC, BugReport &BR) {
+  if (Satisfied)
+    return nullptr;
+
+  Optional<StmtPoint> Point = Succ->getLocation().getAs<StmtPoint>();
+  if (!Point.hasValue())
+    return nullptr;
+
+  auto *LiteralExpr = dyn_cast<ObjCStringLiteral>(Point->getStmt());
+  if (!LiteralExpr)
+    return nullptr;
+
+  ProgramStateRef State = Succ->getState();
+  SVal LiteralSVal = State->getSVal(LiteralExpr, Succ->getLocationContext());
+  if (LiteralSVal.getAsRegion() != NonLocalizedString)
+    return nullptr;
+
+  Satisfied = true;
+
+  PathDiagnosticLocation L =
+      PathDiagnosticLocation::create(*Point, BRC.getSourceManager());
+
+  if (!L.isValid() || !L.asLocation().isValid())
+    return nullptr;
+
+  auto *Piece = new PathDiagnosticEventPiece(L,
+      "Non-localized string literal here");
+  Piece->addRange(LiteralExpr->getSourceRange());
+
+  return Piece;
+}
+
 namespace {
 class EmptyLocalizationContextChecker
     : public Checker<check::ASTDecl<ObjCImplementationDecl>> {
@@ -965,7 +1064,7 @@ void EmptyLocalizationContextChecker::MethodCrawler::VisitObjCMessageExpr(
     return;
 
   StringRef Comment =
-      StringRef(Result.getLiteralData(), Result.getLength()).trim("\"");
+      StringRef(Result.getLiteralData(), Result.getLength()).trim('"');
 
   if ((Comment.trim().size() == 0 && Comment.size() > 0) || // Is Whitespace
       Comment.empty()) {
diff --git a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/MPI-Checker/MPIBugReporter.cpp b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/MPI-Checker/MPIBugReporter.cpp
new file mode 100644
index 000000000000..d56ea6d689d3
--- /dev/null
+++ b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/MPI-Checker/MPIBugReporter.cpp
@@ -0,0 +1,115 @@
+//===-- MPIBugReporter.cpp - bug reporter -----------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file defines prefabricated reports which are emitted in
+/// case of MPI related bugs, detected by path-sensitive analysis.
+///
+//===----------------------------------------------------------------------===//
+
+#include "MPIBugReporter.h"
+#include "MPIChecker.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h"
+
+namespace clang {
+namespace ento {
+namespace mpi {
+
+void MPIBugReporter::reportDoubleNonblocking(
+    const CallEvent &MPICallEvent, const ento::mpi::Request &Req,
+    const MemRegion *const RequestRegion,
+    const ExplodedNode *const ExplNode,
+    BugReporter &BReporter) const {
+
+  std::string ErrorText;
+  ErrorText = "Double nonblocking on request " +
+              RequestRegion->getDescriptiveName() + ". ";
+
+  auto Report = llvm::make_unique<BugReport>(*DoubleNonblockingBugType,
+                                             ErrorText, ExplNode);
+
+  Report->addRange(MPICallEvent.getSourceRange());
+  SourceRange Range = RequestRegion->sourceRange();
+
+  if (Range.isValid())
+    Report->addRange(Range);
+
+  Report->addVisitor(llvm::make_unique<RequestNodeVisitor>(
+      RequestRegion, "Request is previously used by nonblocking call here. "));
+  Report->markInteresting(RequestRegion);
+
+  BReporter.emitReport(std::move(Report));
+}
+
+void MPIBugReporter::reportMissingWait(
+    const ento::mpi::Request &Req, const MemRegion *const RequestRegion,
+    const ExplodedNode *const ExplNode,
+    BugReporter &BReporter) const {
+  std::string ErrorText{"Request " + RequestRegion->getDescriptiveName() +
+                        " has no matching wait. "};
+
+  auto Report =
+      llvm::make_unique<BugReport>(*MissingWaitBugType, ErrorText, ExplNode);
+
+  SourceRange Range = RequestRegion->sourceRange();
+  if (Range.isValid())
+    Report->addRange(Range);
+  Report->addVisitor(llvm::make_unique<RequestNodeVisitor>(
+      RequestRegion, "Request is previously used by nonblocking call here. "));
+  Report->markInteresting(RequestRegion);
+
+  BReporter.emitReport(std::move(Report));
+}
+
+void MPIBugReporter::reportUnmatchedWait(
+    const CallEvent &CE, const clang::ento::MemRegion *const RequestRegion,
+    const ExplodedNode *const ExplNode,
+    BugReporter &BReporter) const {
+  std::string ErrorText{"Request " + RequestRegion->getDescriptiveName() +
+                        " has no matching nonblocking call. "};
+
+  auto Report =
+      llvm::make_unique<BugReport>(*UnmatchedWaitBugType, ErrorText, ExplNode);
+
+  Report->addRange(CE.getSourceRange());
+  SourceRange Range = RequestRegion->sourceRange();
+  if (Range.isValid())
+    Report->addRange(Range);
+
+  BReporter.emitReport(std::move(Report));
+}
+
+PathDiagnosticPiece *MPIBugReporter::RequestNodeVisitor::VisitNode(
+    const ExplodedNode *N, const ExplodedNode *PrevN, BugReporterContext &BRC,
+    BugReport &BR) {
+
+  if (IsNodeFound)
+    return nullptr;
+
+  const Request *const Req = N->getState()->get<RequestMap>(RequestRegion);
+  const Request *const PrevReq =
+      PrevN->getState()->get<RequestMap>(RequestRegion);
+
+  // Check if request was previously unused or in a different state.
+  if ((Req && !PrevReq) || (Req->CurrentState != PrevReq->CurrentState)) {
+    IsNodeFound = true;
+
+    ProgramPoint P = PrevN->getLocation();
+    PathDiagnosticLocation L =
+        PathDiagnosticLocation::create(P, BRC.getSourceManager());
+
+    return new PathDiagnosticEventPiece(L, ErrorText);
+  }
+
+  return nullptr;
+}
+
+} // end of namespace: mpi
+} // end of namespace: ento
+} // end of namespace: clang
diff --git a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/MPI-Checker/MPIBugReporter.h b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/MPI-Checker/MPIBugReporter.h
new file mode 100644
index 000000000000..22fbf4c5b303
--- /dev/null
+++ b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/MPI-Checker/MPIBugReporter.h
@@ -0,0 +1,111 @@
+//===-- MPIBugReporter.h - bug reporter -----------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file defines prefabricated reports which are emitted in
+/// case of MPI related bugs, detected by path-sensitive analysis.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_LIB_STATICANALYZER_CHECKERS_MPICHECKER_MPIBUGREPORTER_H
+#define LLVM_CLANG_LIB_STATICANALYZER_CHECKERS_MPICHECKER_MPIBUGREPORTER_H
+
+#include "MPITypes.h"
+#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
+
+namespace clang {
+namespace ento {
+namespace mpi {
+
+class MPIBugReporter {
+public:
+  MPIBugReporter(const CheckerBase &CB) {
+    UnmatchedWaitBugType.reset(new BugType(&CB, "Unmatched wait", MPIError));
+    DoubleNonblockingBugType.reset(
+        new BugType(&CB, "Double nonblocking", MPIError));
+    MissingWaitBugType.reset(new BugType(&CB, "Missing wait", MPIError));
+  }
+
+  /// Report duplicate request use by nonblocking calls without intermediate
+  /// wait.
+  ///
+  /// \param MPICallEvent MPI call that caused the double nonblocking
+  /// \param Req request that was used by two nonblocking calls in sequence
+  /// \param RequestRegion memory region of the request
+  /// \param ExplNode node in the graph the bug appeared at
+  /// \param BReporter bug reporter for current context
+  void reportDoubleNonblocking(const CallEvent &MPICallEvent,
+                               const Request &Req,
+                               const MemRegion *const RequestRegion,
+                               const ExplodedNode *const ExplNode,
+                              BugReporter &BReporter) const;
+
+  /// Report a missing wait for a nonblocking call. A missing wait report
+  /// is emitted if a nonblocking call is not matched in the scope of a
+  /// function.
+  ///
+  /// \param Req request that is not matched by a wait
+  /// \param RequestRegion memory region of the request
+  /// \param ExplNode node in the graph the bug appeared at
+  /// \param BReporter bug reporter for current context
+  void reportMissingWait(const Request &Req,
+                         const MemRegion *const RequestRegion,
+                         const ExplodedNode *const ExplNode,
+                         BugReporter &BReporter) const;
+
+  /// Report a wait on a request that has not been used at all before.
+  ///
+  /// \param CE wait call that uses the request
+  /// \param RequestRegion memory region of the request
+  /// \param ExplNode node in the graph the bug appeared at
+  /// \param BReporter bug reporter for current context
+  void reportUnmatchedWait(const CallEvent &CE,
+                           const MemRegion *const RequestRegion,
+                           const ExplodedNode *const ExplNode,
+                           BugReporter &BReporter) const;
+
+private:
+  const std::string MPIError = "MPI Error";
+
+  // path-sensitive bug types
+  std::unique_ptr<BugType> UnmatchedWaitBugType;
+  std::unique_ptr<BugType> MissingWaitBugType;
+  std::unique_ptr<BugType> DoubleNonblockingBugType;
+
+  /// Bug visitor class to find the node where the request region was previously
+  /// used in order to include it into the BugReport path.
+  class RequestNodeVisitor : public BugReporterVisitorImpl<RequestNodeVisitor> {
+  public:
+    RequestNodeVisitor(const MemRegion *const MemoryRegion,
+                       const std::string &ErrText)
+        : RequestRegion(MemoryRegion), ErrorText(ErrText) {}
+
+    void Profile(llvm::FoldingSetNodeID &ID) const override {
+      static int X = 0;
+      ID.AddPointer(&X);
+      ID.AddPointer(RequestRegion);
+    }
+
+    PathDiagnosticPiece *VisitNode(const ExplodedNode *N,
+                                   const ExplodedNode *PrevN,
+                                   BugReporterContext &BRC,
+                                   BugReport &BR) override;
+
+  private:
+    const MemRegion *const RequestRegion;
+    bool IsNodeFound = false;
+    std::string ErrorText;
+  };
+};
+
+} // end of namespace: mpi
+} // end of namespace: ento
+} // end of namespace: clang
+
+#endif
diff --git a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/MPI-Checker/MPIChecker.cpp b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/MPI-Checker/MPIChecker.cpp
new file mode 100644
index 000000000000..c3d0f8f2a129
--- /dev/null
+++ b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/MPI-Checker/MPIChecker.cpp
@@ -0,0 +1,190 @@
+//===-- MPIChecker.cpp - Checker Entry Point Class --------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file defines the main class of MPI-Checker which serves as an entry
+/// point. It is created once for each translation unit analysed.
+/// The checker defines path-sensitive checks, to verify correct usage of the
+/// MPI API.
+///
+//===----------------------------------------------------------------------===//
+
+#include "MPIChecker.h"
+#include "../ClangSACheckers.h"
+
+namespace clang {
+namespace ento {
+namespace mpi {
+
+void MPIChecker::checkDoubleNonblocking(const CallEvent &PreCallEvent,
+                                        CheckerContext &Ctx) const {
+  if (!FuncClassifier->isNonBlockingType(PreCallEvent.getCalleeIdentifier())) {
+    return;
+  }
+  const MemRegion *const MR =
+      PreCallEvent.getArgSVal(PreCallEvent.getNumArgs() - 1).getAsRegion();
+  if (!MR)
+    return;
+  const ElementRegion *const ER = dyn_cast<ElementRegion>(MR);
+
+  // The region must be typed, in order to reason about it.
+  if (!isa<TypedRegion>(MR) || (ER && !isa<TypedRegion>(ER->getSuperRegion())))
+    return;
+
+  ProgramStateRef State = Ctx.getState();
+  const Request *const Req = State->get<RequestMap>(MR);
+
+  // double nonblocking detected
+  if (Req && Req->CurrentState == Request::State::Nonblocking) {
+    ExplodedNode *ErrorNode = Ctx.generateNonFatalErrorNode();
+    BReporter.reportDoubleNonblocking(PreCallEvent, *Req, MR, ErrorNode, Ctx.getBugReporter());
+    Ctx.addTransition(ErrorNode->getState(), ErrorNode);
+  }
+  // no error
+  else {
+    State = State->set<RequestMap>(MR, Request::State::Nonblocking);
+    Ctx.addTransition(State);
+  }
+}
+
+void MPIChecker::checkUnmatchedWaits(const CallEvent &PreCallEvent,
+                                     CheckerContext &Ctx) const {
+  if (!FuncClassifier->isWaitType(PreCallEvent.getCalleeIdentifier()))
+    return;
+  const MemRegion *const MR = topRegionUsedByWait(PreCallEvent);
+  if (!MR)
+    return;
+  const ElementRegion *const ER = dyn_cast<ElementRegion>(MR);
+
+  // The region must be typed, in order to reason about it.
+  if (!isa<TypedRegion>(MR) || (ER && !isa<TypedRegion>(ER->getSuperRegion())))
+    return;
+
+  llvm::SmallVector<const MemRegion *, 2> ReqRegions;
+  allRegionsUsedByWait(ReqRegions, MR, PreCallEvent, Ctx);
+  if (ReqRegions.empty())
+    return;
+
+  ProgramStateRef State = Ctx.getState();
+  static CheckerProgramPointTag Tag("MPI-Checker", "UnmatchedWait");
+  ExplodedNode *ErrorNode{nullptr};
+
+  // Check all request regions used by the wait function.
+  for (const auto &ReqRegion : ReqRegions) {
+    const Request *const Req = State->get<RequestMap>(ReqRegion);
+    State = State->set<RequestMap>(ReqRegion, Request::State::Wait);
+    if (!Req) {
+      if (!ErrorNode) {
+        ErrorNode = Ctx.generateNonFatalErrorNode(State, &Tag);
+        State = ErrorNode->getState();
+      }
+      // A wait has no matching nonblocking call.
+      BReporter.reportUnmatchedWait(PreCallEvent, ReqRegion, ErrorNode, Ctx.getBugReporter());
+    }
+  }
+
+  if (!ErrorNode) {
+    Ctx.addTransition(State);
+  } else {
+    Ctx.addTransition(State, ErrorNode);
+  }
+}
+
+void MPIChecker::checkMissingWaits(SymbolReaper &SymReaper,
+                                   CheckerContext &Ctx) const {
+  if (!SymReaper.hasDeadSymbols())
+    return;
+
+  ProgramStateRef State = Ctx.getState();
+  const auto &Requests = State->get<RequestMap>();
+  if (Requests.isEmpty())
+    return;
+
+  static CheckerProgramPointTag Tag("MPI-Checker", "MissingWait");
+  ExplodedNode *ErrorNode{nullptr};
+
+  auto ReqMap = State->get<RequestMap>();
+  for (const auto &Req : ReqMap) {
+    if (!SymReaper.isLiveRegion(Req.first)) {
+      if (Req.second.CurrentState == Request::State::Nonblocking) {
+
+        if (!ErrorNode) {
+          ErrorNode = Ctx.generateNonFatalErrorNode(State, &Tag);
+          State = ErrorNode->getState();
+        }
+        BReporter.reportMissingWait(Req.second, Req.first, ErrorNode, Ctx.getBugReporter());
+      }
+      State = State->remove<RequestMap>(Req.first);
+    }
+  }
+
+  // Transition to update the state regarding removed requests.
+  if (!ErrorNode) {
+    Ctx.addTransition(State);
+  } else {
+    Ctx.addTransition(State, ErrorNode);
+  }
+}
+
+const MemRegion *MPIChecker::topRegionUsedByWait(const CallEvent &CE) const {
+
+  if (FuncClassifier->isMPI_Wait(CE.getCalleeIdentifier())) {
+    return CE.getArgSVal(0).getAsRegion();
+  } else if (FuncClassifier->isMPI_Waitall(CE.getCalleeIdentifier())) {
+    return CE.getArgSVal(1).getAsRegion();
+  } else {
+    return (const MemRegion *)nullptr;
+  }
+}
+
+void MPIChecker::allRegionsUsedByWait(
+    llvm::SmallVector<const MemRegion *, 2> &ReqRegions,
+    const MemRegion *const MR, const CallEvent &CE, CheckerContext &Ctx) const {
+
+  MemRegionManager *const RegionManager = MR->getMemRegionManager();
+
+  if (FuncClassifier->isMPI_Waitall(CE.getCalleeIdentifier())) {
+    const MemRegion *SuperRegion{nullptr};
+    if (const ElementRegion *const ER = MR->getAs<ElementRegion>()) {
+      SuperRegion = ER->getSuperRegion();
+    }
+
+    // A single request is passed to MPI_Waitall.
+    if (!SuperRegion) {
+      ReqRegions.push_back(MR);
+      return;
+    }
+
+    const auto &Size = Ctx.getStoreManager().getSizeInElements(
+        Ctx.getState(), SuperRegion,
+        CE.getArgExpr(1)->getType()->getPointeeType());
+    const llvm::APSInt &ArrSize = Size.getAs<nonloc::ConcreteInt>()->getValue();
+
+    for (size_t i = 0; i < ArrSize; ++i) {
+      const NonLoc Idx = Ctx.getSValBuilder().makeArrayIndex(i);
+
+      const ElementRegion *const ER = RegionManager->getElementRegion(
+          CE.getArgExpr(1)->getType()->getPointeeType(), Idx, SuperRegion,
+          Ctx.getASTContext());
+
+      ReqRegions.push_back(ER->getAs<MemRegion>());
+    }
+  } else if (FuncClassifier->isMPI_Wait(CE.getCalleeIdentifier())) {
+    ReqRegions.push_back(MR);
+  }
+}
+
+} // end of namespace: mpi
+} // end of namespace: ento
+} // end of namespace: clang
+
+// Registers the checker for static analysis.
+void clang::ento::registerMPIChecker(CheckerManager &MGR) {
+  MGR.registerChecker<clang::ento::mpi::MPIChecker>();
+}
diff --git a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/MPI-Checker/MPIChecker.h b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/MPI-Checker/MPIChecker.h
new file mode 100644
index 000000000000..20c60ad076a2
--- /dev/null
+++ b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/MPI-Checker/MPIChecker.h
@@ -0,0 +1,107 @@
+//===-- MPIChecker.h - Verify MPI API usage- --------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file defines the main class of MPI-Checker which serves as an entry
+/// point. It is created once for each translation unit analysed.
+/// The checker defines path-sensitive checks, to verify correct usage of the
+/// MPI API.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_LIB_STATICANALYZER_CHECKERS_MPICHECKER_MPICHECKER_H
+#define LLVM_CLANG_LIB_STATICANALYZER_CHECKERS_MPICHECKER_MPICHECKER_H
+
+#include "MPIBugReporter.h"
+#include "MPIFunctionClassifier.h"
+#include "MPITypes.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
+
+namespace clang {
+namespace ento {
+namespace mpi {
+
+class MPIChecker : public Checker<check::PreCall, check::DeadSymbols> {
+public:
+  MPIChecker() : BReporter(*this) { }
+
+  // path-sensitive callbacks
+  void checkPreCall(const CallEvent &CE, CheckerContext &Ctx) const {
+    dynamicInit(Ctx);
+    checkUnmatchedWaits(CE, Ctx);
+    checkDoubleNonblocking(CE, Ctx);
+  }
+
+  void checkDeadSymbols(SymbolReaper &SymReaper, CheckerContext &Ctx) const {
+    dynamicInit(Ctx);
+    checkMissingWaits(SymReaper, Ctx);
+  }
+
+  void dynamicInit(CheckerContext &Ctx) const {
+    if (FuncClassifier)
+      return;
+    const_cast<std::unique_ptr<MPIFunctionClassifier> &>(FuncClassifier)
+        .reset(new MPIFunctionClassifier{Ctx.getASTContext()});
+
+  }
+
+  /// Checks if a request is used by nonblocking calls multiple times
+  /// in sequence without intermediate wait. The check contains a guard,
+  /// in order to only inspect nonblocking functions.
+  ///
+  /// \param PreCallEvent MPI call to verify
+  void checkDoubleNonblocking(const clang::ento::CallEvent &PreCallEvent,
+                              clang::ento::CheckerContext &Ctx) const;
+
+  /// Checks if a request is used by a wait multiple times in sequence without
+  /// intermediate nonblocking call or if the request used by the wait
+  /// function was not used at all before. The check contains a guard,
+  /// in order to only inspect wait functions.
+  ///
+  /// \param PreCallEvent MPI call to verify
+  void checkUnmatchedWaits(const clang::ento::CallEvent &PreCallEvent,
+                           clang::ento::CheckerContext &Ctx) const;
+
+  /// Check if a nonblocking call is not matched by a wait.
+  /// If a memory region is not alive and the last function using the
+  /// request was a nonblocking call, this is rated as a missing wait.
+  void checkMissingWaits(clang::ento::SymbolReaper &SymReaper,
+                         clang::ento::CheckerContext &Ctx) const;
+
+private:
+  /// Collects all memory regions of a request(array) used by a wait
+  /// function. If the wait function uses a single request, this is a single
+  /// region. For wait functions using multiple requests, multiple regions
+  /// representing elements in the array are collected.
+  ///
+  /// \param ReqRegions vector the regions get pushed into
+  /// \param MR top most region to iterate
+  /// \param CE MPI wait call using the request(s)
+  void allRegionsUsedByWait(
+      llvm::SmallVector<const clang::ento::MemRegion *, 2> &ReqRegions,
+      const clang::ento::MemRegion *const MR, const clang::ento::CallEvent &CE,
+      clang::ento::CheckerContext &Ctx) const;
+
+  /// Returns the memory region used by a wait function.
+  /// Distinguishes between MPI_Wait and MPI_Waitall.
+  ///
+  /// \param CE MPI wait call
+  const clang::ento::MemRegion *
+  topRegionUsedByWait(const clang::ento::CallEvent &CE) const;
+
+  const std::unique_ptr<MPIFunctionClassifier> FuncClassifier;
+  MPIBugReporter BReporter;
+};
+
+} // end of namespace: mpi
+} // end of namespace: ento
+} // end of namespace: clang
+
+#endif
diff --git a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/MPI-Checker/MPIFunctionClassifier.cpp b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/MPI-Checker/MPIFunctionClassifier.cpp
new file mode 100644
index 000000000000..ad937f683d30
--- /dev/null
+++ b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/MPI-Checker/MPIFunctionClassifier.cpp
@@ -0,0 +1,284 @@
+//===-- MPIFunctionClassifier.cpp - classifies MPI functions ----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file defines functionality to identify and classify MPI functions.
+///
+//===----------------------------------------------------------------------===//
+
+#include "MPIFunctionClassifier.h"
+#include "llvm/ADT/STLExtras.h"
+
+namespace clang {
+namespace ento {
+namespace mpi {
+
+void MPIFunctionClassifier::identifierInit(ASTContext &ASTCtx) {
+  // Initialize function identifiers.
+  initPointToPointIdentifiers(ASTCtx);
+  initCollectiveIdentifiers(ASTCtx);
+  initAdditionalIdentifiers(ASTCtx);
+}
+
+void MPIFunctionClassifier::initPointToPointIdentifiers(ASTContext &ASTCtx) {
+  // Copy identifiers into the correct classification containers.
+  IdentInfo_MPI_Send = &ASTCtx.Idents.get("MPI_Send");
+  MPIPointToPointTypes.push_back(IdentInfo_MPI_Send);
+  MPIType.push_back(IdentInfo_MPI_Send);
+  assert(IdentInfo_MPI_Send);
+
+  IdentInfo_MPI_Isend = &ASTCtx.Idents.get("MPI_Isend");
+  MPIPointToPointTypes.push_back(IdentInfo_MPI_Isend);
+  MPINonBlockingTypes.push_back(IdentInfo_MPI_Isend);
+  MPIType.push_back(IdentInfo_MPI_Isend);
+  assert(IdentInfo_MPI_Isend);
+
+  IdentInfo_MPI_Ssend = &ASTCtx.Idents.get("MPI_Ssend");
+  MPIPointToPointTypes.push_back(IdentInfo_MPI_Ssend);
+  MPIType.push_back(IdentInfo_MPI_Ssend);
+  assert(IdentInfo_MPI_Ssend);
+
+  IdentInfo_MPI_Issend = &ASTCtx.Idents.get("MPI_Issend");
+  MPIPointToPointTypes.push_back(IdentInfo_MPI_Issend);
+  MPINonBlockingTypes.push_back(IdentInfo_MPI_Issend);
+  MPIType.push_back(IdentInfo_MPI_Issend);
+  assert(IdentInfo_MPI_Issend);
+
+  IdentInfo_MPI_Bsend = &ASTCtx.Idents.get("MPI_Bsend");
+  MPIPointToPointTypes.push_back(IdentInfo_MPI_Bsend);
+  MPIType.push_back(IdentInfo_MPI_Bsend);
+  assert(IdentInfo_MPI_Bsend);
+
+  IdentInfo_MPI_Ibsend = &ASTCtx.Idents.get("MPI_Ibsend");
+  MPIPointToPointTypes.push_back(IdentInfo_MPI_Ibsend);
+  MPINonBlockingTypes.push_back(IdentInfo_MPI_Ibsend);
+  MPIType.push_back(IdentInfo_MPI_Ibsend);
+  assert(IdentInfo_MPI_Ibsend);
+
+  IdentInfo_MPI_Rsend = &ASTCtx.Idents.get("MPI_Rsend");
+  MPIPointToPointTypes.push_back(IdentInfo_MPI_Rsend);
+  MPIType.push_back(IdentInfo_MPI_Rsend);
+  assert(IdentInfo_MPI_Rsend);
+
+  IdentInfo_MPI_Irsend = &ASTCtx.Idents.get("MPI_Irsend");
+  MPIPointToPointTypes.push_back(IdentInfo_MPI_Irsend);
+  MPIType.push_back(IdentInfo_MPI_Irsend);
+  assert(IdentInfo_MPI_Irsend);
+
+  IdentInfo_MPI_Recv = &ASTCtx.Idents.get("MPI_Recv");
+  MPIPointToPointTypes.push_back(IdentInfo_MPI_Recv);
+  MPIType.push_back(IdentInfo_MPI_Recv);
+  assert(IdentInfo_MPI_Recv);
+
+  IdentInfo_MPI_Irecv = &ASTCtx.Idents.get("MPI_Irecv");
+  MPIPointToPointTypes.push_back(IdentInfo_MPI_Irecv);
+  MPINonBlockingTypes.push_back(IdentInfo_MPI_Irecv);
+  MPIType.push_back(IdentInfo_MPI_Irecv);
+  assert(IdentInfo_MPI_Irecv);
+}
+
+void MPIFunctionClassifier::initCollectiveIdentifiers(ASTContext &ASTCtx) {
+  // Copy identifiers into the correct classification containers.
+  IdentInfo_MPI_Scatter = &ASTCtx.Idents.get("MPI_Scatter");
+  MPICollectiveTypes.push_back(IdentInfo_MPI_Scatter);
+  MPIPointToCollTypes.push_back(IdentInfo_MPI_Scatter);
+  MPIType.push_back(IdentInfo_MPI_Scatter);
+  assert(IdentInfo_MPI_Scatter);
+
+  IdentInfo_MPI_Iscatter = &ASTCtx.Idents.get("MPI_Iscatter");
+  MPICollectiveTypes.push_back(IdentInfo_MPI_Iscatter);
+  MPIPointToCollTypes.push_back(IdentInfo_MPI_Iscatter);
+  MPINonBlockingTypes.push_back(IdentInfo_MPI_Iscatter);
+  MPIType.push_back(IdentInfo_MPI_Iscatter);
+  assert(IdentInfo_MPI_Iscatter);
+
+  IdentInfo_MPI_Gather = &ASTCtx.Idents.get("MPI_Gather");
+  MPICollectiveTypes.push_back(IdentInfo_MPI_Gather);
+  MPICollToPointTypes.push_back(IdentInfo_MPI_Gather);
+  MPIType.push_back(IdentInfo_MPI_Gather);
+  assert(IdentInfo_MPI_Gather);
+
+  IdentInfo_MPI_Igather = &ASTCtx.Idents.get("MPI_Igather");
+  MPICollectiveTypes.push_back(IdentInfo_MPI_Igather);
+  MPICollToPointTypes.push_back(IdentInfo_MPI_Igather);
+  MPINonBlockingTypes.push_back(IdentInfo_MPI_Igather);
+  MPIType.push_back(IdentInfo_MPI_Igather);
+  assert(IdentInfo_MPI_Igather);
+
+  IdentInfo_MPI_Allgather = &ASTCtx.Idents.get("MPI_Allgather");
+  MPICollectiveTypes.push_back(IdentInfo_MPI_Allgather);
+  MPICollToCollTypes.push_back(IdentInfo_MPI_Allgather);
+  MPIType.push_back(IdentInfo_MPI_Allgather);
+  assert(IdentInfo_MPI_Allgather);
+
+  IdentInfo_MPI_Iallgather = &ASTCtx.Idents.get("MPI_Iallgather");
+  MPICollectiveTypes.push_back(IdentInfo_MPI_Iallgather);
+  MPICollToCollTypes.push_back(IdentInfo_MPI_Iallgather);
+  MPINonBlockingTypes.push_back(IdentInfo_MPI_Iallgather);
+  MPIType.push_back(IdentInfo_MPI_Iallgather);
+  assert(IdentInfo_MPI_Iallgather);
+
+  IdentInfo_MPI_Bcast = &ASTCtx.Idents.get("MPI_Bcast");
+  MPICollectiveTypes.push_back(IdentInfo_MPI_Bcast);
+  MPIPointToCollTypes.push_back(IdentInfo_MPI_Bcast);
+  MPIType.push_back(IdentInfo_MPI_Bcast);
+  assert(IdentInfo_MPI_Bcast);
+
+  IdentInfo_MPI_Ibcast = &ASTCtx.Idents.get("MPI_Ibcast");
+  MPICollectiveTypes.push_back(IdentInfo_MPI_Ibcast);
+  MPIPointToCollTypes.push_back(IdentInfo_MPI_Ibcast);
+  MPINonBlockingTypes.push_back(IdentInfo_MPI_Ibcast);
+  MPIType.push_back(IdentInfo_MPI_Ibcast);
+  assert(IdentInfo_MPI_Ibcast);
+
+  IdentInfo_MPI_Reduce = &ASTCtx.Idents.get("MPI_Reduce");
+  MPICollectiveTypes.push_back(IdentInfo_MPI_Reduce);
+  MPICollToPointTypes.push_back(IdentInfo_MPI_Reduce);
+  MPIType.push_back(IdentInfo_MPI_Reduce);
+  assert(IdentInfo_MPI_Reduce);
+
+  IdentInfo_MPI_Ireduce = &ASTCtx.Idents.get("MPI_Ireduce");
+  MPICollectiveTypes.push_back(IdentInfo_MPI_Ireduce);
+  MPICollToPointTypes.push_back(IdentInfo_MPI_Ireduce);
+  MPINonBlockingTypes.push_back(IdentInfo_MPI_Ireduce);
+  MPIType.push_back(IdentInfo_MPI_Ireduce);
+  assert(IdentInfo_MPI_Ireduce);
+
+  IdentInfo_MPI_Allreduce = &ASTCtx.Idents.get("MPI_Allreduce");
+  MPICollectiveTypes.push_back(IdentInfo_MPI_Allreduce);
+  MPICollToCollTypes.push_back(IdentInfo_MPI_Allreduce);
+  MPIType.push_back(IdentInfo_MPI_Allreduce);
+  assert(IdentInfo_MPI_Allreduce);
+
+  IdentInfo_MPI_Iallreduce = &ASTCtx.Idents.get("MPI_Iallreduce");
+  MPICollectiveTypes.push_back(IdentInfo_MPI_Iallreduce);
+  MPICollToCollTypes.push_back(IdentInfo_MPI_Iallreduce);
+  MPINonBlockingTypes.push_back(IdentInfo_MPI_Iallreduce);
+  MPIType.push_back(IdentInfo_MPI_Iallreduce);
+  assert(IdentInfo_MPI_Iallreduce);
+
+  IdentInfo_MPI_Alltoall = &ASTCtx.Idents.get("MPI_Alltoall");
+  MPICollectiveTypes.push_back(IdentInfo_MPI_Alltoall);
+  MPICollToCollTypes.push_back(IdentInfo_MPI_Alltoall);
+  MPIType.push_back(IdentInfo_MPI_Alltoall);
+  assert(IdentInfo_MPI_Alltoall);
+
+  IdentInfo_MPI_Ialltoall = &ASTCtx.Idents.get("MPI_Ialltoall");
+  MPICollectiveTypes.push_back(IdentInfo_MPI_Ialltoall);
+  MPICollToCollTypes.push_back(IdentInfo_MPI_Ialltoall);
+  MPINonBlockingTypes.push_back(IdentInfo_MPI_Ialltoall);
+  MPIType.push_back(IdentInfo_MPI_Ialltoall);
+  assert(IdentInfo_MPI_Ialltoall);
+}
+
+void MPIFunctionClassifier::initAdditionalIdentifiers(ASTContext &ASTCtx) {
+  IdentInfo_MPI_Comm_rank = &ASTCtx.Idents.get("MPI_Comm_rank");
+  MPIType.push_back(IdentInfo_MPI_Comm_rank);
+  assert(IdentInfo_MPI_Comm_rank);
+
+  IdentInfo_MPI_Comm_size = &ASTCtx.Idents.get("MPI_Comm_size");
+  MPIType.push_back(IdentInfo_MPI_Comm_size);
+  assert(IdentInfo_MPI_Comm_size);
+
+  IdentInfo_MPI_Wait = &ASTCtx.Idents.get("MPI_Wait");
+  MPIType.push_back(IdentInfo_MPI_Wait);
+  assert(IdentInfo_MPI_Wait);
+
+  IdentInfo_MPI_Waitall = &ASTCtx.Idents.get("MPI_Waitall");
+  MPIType.push_back(IdentInfo_MPI_Waitall);
+  assert(IdentInfo_MPI_Waitall);
+
+  IdentInfo_MPI_Barrier = &ASTCtx.Idents.get("MPI_Barrier");
+  MPICollectiveTypes.push_back(IdentInfo_MPI_Barrier);
+  MPIType.push_back(IdentInfo_MPI_Barrier);
+  assert(IdentInfo_MPI_Barrier);
+}
+
+// general identifiers
+bool MPIFunctionClassifier::isMPIType(const IdentifierInfo *IdentInfo) const {
+  return llvm::is_contained(MPIType, IdentInfo);
+}
+
+bool MPIFunctionClassifier::isNonBlockingType(
+    const IdentifierInfo *IdentInfo) const {
+  return llvm::is_contained(MPINonBlockingTypes, IdentInfo);
+}
+
+// point-to-point identifiers
+bool MPIFunctionClassifier::isPointToPointType(
+    const IdentifierInfo *IdentInfo) const {
+  return llvm::is_contained(MPIPointToPointTypes, IdentInfo);
+}
+
+// collective identifiers
+bool MPIFunctionClassifier::isCollectiveType(
+    const IdentifierInfo *IdentInfo) const {
+  return llvm::is_contained(MPICollectiveTypes, IdentInfo);
+}
+
+bool MPIFunctionClassifier::isCollToColl(
+    const IdentifierInfo *IdentInfo) const {
+  return llvm::is_contained(MPICollToCollTypes, IdentInfo);
+}
+
+bool MPIFunctionClassifier::isScatterType(
+    const IdentifierInfo *IdentInfo) const {
+  return IdentInfo == IdentInfo_MPI_Scatter ||
+         IdentInfo == IdentInfo_MPI_Iscatter;
+}
+
+bool MPIFunctionClassifier::isGatherType(
+    const IdentifierInfo *IdentInfo) const {
+  return IdentInfo == IdentInfo_MPI_Gather ||
+         IdentInfo == IdentInfo_MPI_Igather ||
+         IdentInfo == IdentInfo_MPI_Allgather ||
+         IdentInfo == IdentInfo_MPI_Iallgather;
+}
+
+bool MPIFunctionClassifier::isAllgatherType(
+    const IdentifierInfo *IdentInfo) const {
+  return IdentInfo == IdentInfo_MPI_Allgather ||
+         IdentInfo == IdentInfo_MPI_Iallgather;
+}
+
+bool MPIFunctionClassifier::isAlltoallType(
+    const IdentifierInfo *IdentInfo) const {
+  return IdentInfo == IdentInfo_MPI_Alltoall ||
+         IdentInfo == IdentInfo_MPI_Ialltoall;
+}
+
+bool MPIFunctionClassifier::isBcastType(const IdentifierInfo *IdentInfo) const {
+  return IdentInfo == IdentInfo_MPI_Bcast || IdentInfo == IdentInfo_MPI_Ibcast;
+}
+
+bool MPIFunctionClassifier::isReduceType(
+    const IdentifierInfo *IdentInfo) const {
+  return IdentInfo == IdentInfo_MPI_Reduce ||
+         IdentInfo == IdentInfo_MPI_Ireduce ||
+         IdentInfo == IdentInfo_MPI_Allreduce ||
+         IdentInfo == IdentInfo_MPI_Iallreduce;
+}
+
+// additional identifiers
+bool MPIFunctionClassifier::isMPI_Wait(const IdentifierInfo *IdentInfo) const {
+  return IdentInfo == IdentInfo_MPI_Wait;
+}
+
+bool MPIFunctionClassifier::isMPI_Waitall(
+    const IdentifierInfo *IdentInfo) const {
+  return IdentInfo == IdentInfo_MPI_Waitall;
+}
+
+bool MPIFunctionClassifier::isWaitType(const IdentifierInfo *IdentInfo) const {
+  return IdentInfo == IdentInfo_MPI_Wait || IdentInfo == IdentInfo_MPI_Waitall;
+}
+
+} // end of namespace: mpi
+} // end of namespace: ento
+} // end of namespace: clang
diff --git a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/MPI-Checker/MPIFunctionClassifier.h b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/MPI-Checker/MPIFunctionClassifier.h
new file mode 100644
index 000000000000..65e908912c54
--- /dev/null
+++ b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/MPI-Checker/MPIFunctionClassifier.h
@@ -0,0 +1,97 @@
+//===-- MPIFunctionClassifier.h - classifies MPI functions ----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file defines functionality to identify and classify MPI functions.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_LIB_STATICANALYZER_CHECKERS_MPICHECKER_MPIFUNCTIONCLASSIFIER_H
+#define LLVM_CLANG_LIB_STATICANALYZER_CHECKERS_MPICHECKER_MPIFUNCTIONCLASSIFIER_H
+
+#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
+
+namespace clang {
+namespace ento {
+namespace mpi {
+
+class MPIFunctionClassifier {
+public:
+  MPIFunctionClassifier(ASTContext &ASTCtx) { identifierInit(ASTCtx); }
+
+  // general identifiers
+  bool isMPIType(const IdentifierInfo *const IdentInfo) const;
+  bool isNonBlockingType(const IdentifierInfo *const IdentInfo) const;
+
+  // point-to-point identifiers
+  bool isPointToPointType(const IdentifierInfo *const IdentInfo) const;
+
+  // collective identifiers
+  bool isCollectiveType(const IdentifierInfo *const IdentInfo) const;
+  bool isCollToColl(const IdentifierInfo *const IdentInfo) const;
+  bool isScatterType(const IdentifierInfo *const IdentInfo) const;
+  bool isGatherType(const IdentifierInfo *const IdentInfo) const;
+  bool isAllgatherType(const IdentifierInfo *const IdentInfo) const;
+  bool isAlltoallType(const IdentifierInfo *const IdentInfo) const;
+  bool isReduceType(const IdentifierInfo *const IdentInfo) const;
+  bool isBcastType(const IdentifierInfo *const IdentInfo) const;
+
+  // additional identifiers
+  bool isMPI_Wait(const IdentifierInfo *const IdentInfo) const;
+  bool isMPI_Waitall(const IdentifierInfo *const IdentInfo) const;
+  bool isWaitType(const IdentifierInfo *const IdentInfo) const;
+
+private:
+  // Initializes function identifiers, to recognize them during analysis.
+  void identifierInit(ASTContext &ASTCtx);
+  void initPointToPointIdentifiers(ASTContext &ASTCtx);
+  void initCollectiveIdentifiers(ASTContext &ASTCtx);
+  void initAdditionalIdentifiers(ASTContext &ASTCtx);
+
+  // The containers are used, to enable classification of MPI-functions during
+  // analysis.
+  llvm::SmallVector<IdentifierInfo *, 12> MPINonBlockingTypes;
+
+  llvm::SmallVector<IdentifierInfo *, 10> MPIPointToPointTypes;
+  llvm::SmallVector<IdentifierInfo *, 16> MPICollectiveTypes;
+
+  llvm::SmallVector<IdentifierInfo *, 4> MPIPointToCollTypes;
+  llvm::SmallVector<IdentifierInfo *, 4> MPICollToPointTypes;
+  llvm::SmallVector<IdentifierInfo *, 6> MPICollToCollTypes;
+
+  llvm::SmallVector<IdentifierInfo *, 32> MPIType;
+
+  // point-to-point functions
+  IdentifierInfo *IdentInfo_MPI_Send = nullptr, *IdentInfo_MPI_Isend = nullptr,
+      *IdentInfo_MPI_Ssend = nullptr, *IdentInfo_MPI_Issend = nullptr,
+      *IdentInfo_MPI_Bsend = nullptr, *IdentInfo_MPI_Ibsend = nullptr,
+      *IdentInfo_MPI_Rsend = nullptr, *IdentInfo_MPI_Irsend = nullptr,
+      *IdentInfo_MPI_Recv = nullptr, *IdentInfo_MPI_Irecv = nullptr;
+
+  // collective functions
+  IdentifierInfo *IdentInfo_MPI_Scatter = nullptr,
+      *IdentInfo_MPI_Iscatter = nullptr, *IdentInfo_MPI_Gather = nullptr,
+      *IdentInfo_MPI_Igather = nullptr, *IdentInfo_MPI_Allgather = nullptr,
+      *IdentInfo_MPI_Iallgather = nullptr, *IdentInfo_MPI_Bcast = nullptr,
+      *IdentInfo_MPI_Ibcast = nullptr, *IdentInfo_MPI_Reduce = nullptr,
+      *IdentInfo_MPI_Ireduce = nullptr, *IdentInfo_MPI_Allreduce = nullptr,
+      *IdentInfo_MPI_Iallreduce = nullptr, *IdentInfo_MPI_Alltoall = nullptr,
+      *IdentInfo_MPI_Ialltoall = nullptr, *IdentInfo_MPI_Barrier = nullptr;
+
+  // additional functions
+  IdentifierInfo *IdentInfo_MPI_Comm_rank = nullptr,
+      *IdentInfo_MPI_Comm_size = nullptr, *IdentInfo_MPI_Wait = nullptr,
+      *IdentInfo_MPI_Waitall = nullptr;
+};
+
+} // end of namespace: mpi
+} // end of namespace: ento
+} // end of namespace: clang
+
+#endif
diff --git a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/MPI-Checker/MPITypes.h b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/MPI-Checker/MPITypes.h
new file mode 100644
index 000000000000..27ec950d31eb
--- /dev/null
+++ b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/MPI-Checker/MPITypes.h
@@ -0,0 +1,68 @@
+//===-- MPITypes.h - Functionality to model MPI concepts --------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file provides definitions to model concepts of MPI. The mpi::Request
+/// class defines a wrapper class, in order to make MPI requests trackable for
+/// path-sensitive analysis.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_LIB_STATICANALYZER_CHECKERS_MPICHECKER_MPITYPES_H
+#define LLVM_CLANG_LIB_STATICANALYZER_CHECKERS_MPICHECKER_MPITYPES_H
+
+#include "MPIFunctionClassifier.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h"
+#include "llvm/ADT/SmallSet.h"
+
+namespace clang {
+namespace ento {
+namespace mpi {
+
+class Request {
+public:
+  enum State : unsigned char { Nonblocking, Wait };
+
+  Request(State S) : CurrentState{S} {}
+
+  void Profile(llvm::FoldingSetNodeID &Id) const {
+    Id.AddInteger(CurrentState);
+  }
+
+  bool operator==(const Request &ToCompare) const {
+    return CurrentState == ToCompare.CurrentState;
+  }
+
+  const State CurrentState;
+};
+
+// The RequestMap stores MPI requests which are identified by their memory
+// region. Requests are used in MPI to complete nonblocking operations with wait
+// operations. A custom map implementation is used, in order to make it
+// available in an arbitrary amount of translation units.
+struct RequestMap {};
+typedef llvm::ImmutableMap<const clang::ento::MemRegion *,
+                           clang::ento::mpi::Request>
+    RequestMapImpl;
+
+} // end of namespace: mpi
+
+
+template <>
+struct ProgramStateTrait<mpi::RequestMap>
+    : public ProgramStatePartialTrait<mpi::RequestMapImpl> {
+  static void *GDMIndex() {
+    static int index = 0;
+    return &index;
+  }
+};
+
+} // end of namespace: ento
+} // end of namespace: clang
+#endif
diff --git a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/MacOSXAPIChecker.cpp b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/MacOSXAPIChecker.cpp
index 4cbe97b26075..c038a2649e15 100644
--- a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/MacOSXAPIChecker.cpp
+++ b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/MacOSXAPIChecker.cpp
@@ -75,7 +75,7 @@ void MacOSXAPIChecker::CheckDispatchOnce(CheckerContext &C, const CallExpr *CE,
   // _dispatch_once is then a function which then calls the real dispatch_once.
   // Users do not care; they just want the warning at the top-level call.
   if (CE->getLocStart().isMacroID()) {
-    StringRef TrimmedFName = FName.ltrim("_");
+    StringRef TrimmedFName = FName.ltrim('_');
     if (TrimmedFName != FName)
       FName = TrimmedFName;
   }
diff --git a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/MallocChecker.cpp b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/MallocChecker.cpp
index fee030feb6d2..e06662b16934 100644
--- a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/MallocChecker.cpp
+++ b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/MallocChecker.cpp
@@ -31,6 +31,7 @@
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringExtras.h"
 #include <climits>
+#include <utility>
 
 using namespace clang;
 using namespace ento;
@@ -169,11 +170,12 @@ class MallocChecker : public Checker<check::DeadSymbols,
 {
 public:
   MallocChecker()
-      : II_alloca(nullptr), II_malloc(nullptr), II_free(nullptr),
-        II_realloc(nullptr), II_calloc(nullptr), II_valloc(nullptr),
-        II_reallocf(nullptr), II_strndup(nullptr), II_strdup(nullptr),
-        II_kmalloc(nullptr), II_if_nameindex(nullptr),
-        II_if_freenameindex(nullptr) {}
+      : II_alloca(nullptr), II_win_alloca(nullptr), II_malloc(nullptr),
+        II_free(nullptr), II_realloc(nullptr), II_calloc(nullptr),
+        II_valloc(nullptr), II_reallocf(nullptr), II_strndup(nullptr),
+        II_strdup(nullptr), II_win_strdup(nullptr), II_kmalloc(nullptr),
+        II_if_nameindex(nullptr), II_if_freenameindex(nullptr),
+        II_wcsdup(nullptr), II_win_wcsdup(nullptr) {}
 
   /// In pessimistic mode, the checker assumes that it does not know which
   /// functions might free the memory.
@@ -231,10 +233,11 @@ private:
   mutable std::unique_ptr<BugType> BT_MismatchedDealloc;
   mutable std::unique_ptr<BugType> BT_OffsetFree[CK_NumCheckKinds];
   mutable std::unique_ptr<BugType> BT_UseZerroAllocated[CK_NumCheckKinds];
-  mutable IdentifierInfo *II_alloca, *II_malloc, *II_free, *II_realloc,
-                         *II_calloc, *II_valloc, *II_reallocf, *II_strndup,
-                         *II_strdup, *II_kmalloc, *II_if_nameindex,
-                         *II_if_freenameindex;
+  mutable IdentifierInfo *II_alloca, *II_win_alloca, *II_malloc, *II_free,
+                         *II_realloc, *II_calloc, *II_valloc, *II_reallocf,
+                         *II_strndup, *II_strdup, *II_win_strdup, *II_kmalloc,
+                         *II_if_nameindex, *II_if_freenameindex, *II_wcsdup,
+                         *II_win_wcsdup;
   mutable Optional<uint64_t> KernelZeroFlagVal;
 
   void initIdentifierInfo(ASTContext &C) const;
@@ -518,7 +521,7 @@ namespace {
 class StopTrackingCallback final : public SymbolVisitor {
   ProgramStateRef state;
 public:
-  StopTrackingCallback(ProgramStateRef st) : state(st) {}
+  StopTrackingCallback(ProgramStateRef st) : state(std::move(st)) {}
   ProgramStateRef getState() const { return state; }
 
   bool VisitSymbol(SymbolRef sym) override {
@@ -540,9 +543,15 @@ void MallocChecker::initIdentifierInfo(ASTContext &Ctx) const {
   II_valloc = &Ctx.Idents.get("valloc");
   II_strdup = &Ctx.Idents.get("strdup");
   II_strndup = &Ctx.Idents.get("strndup");
+  II_wcsdup = &Ctx.Idents.get("wcsdup");
   II_kmalloc = &Ctx.Idents.get("kmalloc");
   II_if_nameindex = &Ctx.Idents.get("if_nameindex");
   II_if_freenameindex = &Ctx.Idents.get("if_freenameindex");
+
+  //MSVC uses `_`-prefixed instead, so we check for them too.
+  II_win_strdup = &Ctx.Idents.get("_strdup");
+  II_win_wcsdup = &Ctx.Idents.get("_wcsdup");
+  II_win_alloca = &Ctx.Idents.get("_alloca");
 }
 
 bool MallocChecker::isMemFunction(const FunctionDecl *FD, ASTContext &C) const {
@@ -585,7 +594,8 @@ bool MallocChecker::isCMemFunction(const FunctionDecl *FD,
     if (Family == AF_Malloc && CheckAlloc) {
       if (FunI == II_malloc || FunI == II_realloc || FunI == II_reallocf ||
           FunI == II_calloc || FunI == II_valloc || FunI == II_strdup ||
-          FunI == II_strndup || FunI == II_kmalloc)
+          FunI == II_win_strdup || FunI == II_strndup || FunI == II_wcsdup ||
+          FunI == II_win_wcsdup || FunI == II_kmalloc)
         return true;
     }
 
@@ -600,7 +610,7 @@ bool MallocChecker::isCMemFunction(const FunctionDecl *FD,
     }
 
     if (Family == AF_Alloca && CheckAlloc) {
-      if (FunI == II_alloca)
+      if (FunI == II_alloca || FunI == II_win_alloca)
         return true;
     }
   }
@@ -789,11 +799,12 @@ void MallocChecker::checkPostStmt(const CallExpr *CE, CheckerContext &C) const {
       State = ProcessZeroAllocation(C, CE, 1, State);
     } else if (FunI == II_free) {
       State = FreeMemAux(C, CE, State, 0, false, ReleasedAllocatedMemory);
-    } else if (FunI == II_strdup) {
+    } else if (FunI == II_strdup || FunI == II_win_strdup ||
+               FunI == II_wcsdup || FunI == II_win_wcsdup) {
       State = MallocUpdateRefState(C, CE, State);
     } else if (FunI == II_strndup) {
       State = MallocUpdateRefState(C, CE, State);
-    } else if (FunI == II_alloca) {
+    } else if (FunI == II_alloca || FunI == II_win_alloca) {
       State = MallocMemAux(C, CE, CE->getArg(0), UndefinedVal(), State,
                            AF_Alloca);
       State = ProcessZeroAllocation(C, CE, 0, State);
@@ -933,7 +944,7 @@ static bool treatUnusedNewEscaped(const CXXNewExpr *NE) {
   const CXXConstructorDecl *CtorD = ConstructE->getConstructor();
 
   // Iterate over the constructor parameters.
-  for (const auto *CtorParam : CtorD->params()) {
+  for (const auto *CtorParam : CtorD->parameters()) {
 
     QualType CtorParamPointeeT = CtorParam->getType()->getPointeeType();
     if (CtorParamPointeeT.isNull())
diff --git a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/MallocOverflowSecurityChecker.cpp b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/MallocOverflowSecurityChecker.cpp
index 99ba90d7a2d9..fc2ab1d6e3f7 100644
--- a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/MallocOverflowSecurityChecker.cpp
+++ b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/MallocOverflowSecurityChecker.cpp
@@ -25,10 +25,10 @@
 #include "clang/StaticAnalyzer/Core/PathSensitive/AnalysisManager.h"
 #include "llvm/ADT/APSInt.h"
 #include "llvm/ADT/SmallVector.h"
+#include <utility>
 
 using namespace clang;
 using namespace ento;
-using llvm::APInt;
 using llvm::APSInt;
 
 namespace {
@@ -38,7 +38,7 @@ struct MallocOverflowCheck {
   APSInt maxVal;
 
   MallocOverflowCheck(const BinaryOperator *m, const Expr *v, APSInt val)
-      : mulop(m), variable(v), maxVal(val) {}
+      : mulop(m), variable(v), maxVal(std::move(val)) {}
 };
 
 class MallocOverflowSecurityChecker : public Checker<check::ASTCodeBody> {
@@ -141,25 +141,25 @@ private:
       return false;
     }
 
-    const Decl *getDecl(const DeclRefExpr *DR) { return DR->getDecl(); }
-
-    const Decl *getDecl(const MemberExpr *ME) { return ME->getMemberDecl(); }
+    static const Decl *getDecl(const DeclRefExpr *DR) { return DR->getDecl(); }
+    static const Decl *getDecl(const MemberExpr *ME) {
+      return ME->getMemberDecl();
+    }
 
     template <typename T1>
-    void Erase(const T1 *DR, std::function<bool(theVecType::iterator)> pred) {
-      theVecType::iterator i = toScanFor.end();
-      theVecType::iterator e = toScanFor.begin();
-      while (i != e) {
-        --i;
-        if (const T1 *DR_i = dyn_cast<T1>(i->variable)) {
-          if ((getDecl(DR_i) == getDecl(DR)) && pred(i))
-            i = toScanFor.erase(i);
-        }
-      }
+    void Erase(const T1 *DR,
+               llvm::function_ref<bool(const MallocOverflowCheck &)> Pred) {
+      auto P = [DR, Pred](const MallocOverflowCheck &Check) {
+        if (const auto *CheckDR = dyn_cast<T1>(Check.variable))
+          return getDecl(CheckDR) == getDecl(DR) && Pred(Check);
+        return false;
+      };
+      toScanFor.erase(std::remove_if(toScanFor.begin(), toScanFor.end(), P),
+                      toScanFor.end());
     }
 
     void CheckExpr(const Expr *E_p) {
-      auto PredTrue = [](theVecType::iterator) -> bool { return true; };
+      auto PredTrue = [](const MallocOverflowCheck &) { return true; };
       const Expr *E = E_p->IgnoreParenImpCasts();
       if (const DeclRefExpr *DR = dyn_cast<DeclRefExpr>(E))
         Erase<DeclRefExpr>(DR, PredTrue);
@@ -210,9 +210,9 @@ private:
       const Expr *E = lhs->IgnoreParenImpCasts();
 
       auto pred = [assignKnown, numeratorKnown,
-                   denomExtVal](theVecType::iterator i) {
+                   denomExtVal](const MallocOverflowCheck &Check) {
         return assignKnown ||
-               (numeratorKnown && (denomExtVal >= i->maxVal.getExtValue()));
+               (numeratorKnown && (denomExtVal >= Check.maxVal.getExtValue()));
       };
 
       if (const DeclRefExpr *DR = dyn_cast<DeclRefExpr>(E))
diff --git a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/NSErrorChecker.cpp b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/NSErrorChecker.cpp
index dab068b27e80..559c75d7a5b0 100644
--- a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/NSErrorChecker.cpp
+++ b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/NSErrorChecker.cpp
@@ -61,7 +61,7 @@ void NSErrorMethodChecker::checkASTDecl(const ObjCMethodDecl *D,
     II = &D->getASTContext().Idents.get("NSError");
 
   bool hasNSError = false;
-  for (const auto *I : D->params())  {
+  for (const auto *I : D->parameters())  {
     if (IsNSError(I->getType(), II)) {
       hasNSError = true;
       break;
@@ -108,7 +108,7 @@ void CFErrorFunctionChecker::checkASTDecl(const FunctionDecl *D,
     II = &D->getASTContext().Idents.get("CFErrorRef");
 
   bool hasCFError = false;
-  for (auto I : D->params())  {
+  for (auto I : D->parameters())  {
     if (IsCFError(I->getType(), II)) {
       hasCFError = true;
       break;
diff --git a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/NullabilityChecker.cpp b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/NullabilityChecker.cpp
index bb86ea401df5..d7ec6b10c6f7 100644
--- a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/NullabilityChecker.cpp
+++ b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/NullabilityChecker.cpp
@@ -26,13 +26,16 @@
 //===----------------------------------------------------------------------===//
 
 #include "ClangSACheckers.h"
-#include "llvm/Support/Path.h"
+
 #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
 #include "clang/StaticAnalyzer/Core/Checker.h"
 #include "clang/StaticAnalyzer/Core/CheckerManager.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h"
 
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/Path.h"
+
 using namespace clang;
 using namespace ento;
 
@@ -89,18 +92,6 @@ enum class ErrorKind : int {
   NullablePassedToNonnull
 };
 
-const char *const ErrorMessages[] = {
-    "Null is assigned to a pointer which is expected to have non-null value",
-    "Null passed to a callee that requires a non-null argument",
-    "Null is returned from a function that is expected to return a non-null "
-    "value",
-    "Nullable pointer is assigned to a pointer which is expected to have "
-    "non-null value",
-    "Nullable pointer is returned from a function that is expected to return a "
-    "non-null value",
-    "Nullable pointer is dereferenced",
-    "Nullable pointer is passed to a callee that requires a non-null argument"};
-
 class NullabilityChecker
     : public Checker<check::Bind, check::PreCall, check::PreStmt<ReturnStmt>,
                      check::PostCall, check::PostStmt<ExplicitCastExpr>,
@@ -109,6 +100,14 @@ class NullabilityChecker
   mutable std::unique_ptr<BugType> BT;
 
 public:
+  // If true, the checker will not diagnose nullabilility issues for calls
+  // to system headers. This option is motivated by the observation that large
+  // projects may have many nullability warnings. These projects may
+  // find warnings about nullability annotations that they have explicitly
+  // added themselves higher priority to fix than warnings on calls to system
+  // libraries.
+  DefaultBool NoDiagnoseCallsToSystemHeaders;
+
   void checkBind(SVal L, SVal V, const Stmt *S, CheckerContext &C) const;
   void checkPostStmt(const ExplicitCastExpr *CE, CheckerContext &C) const;
   void checkPreStmt(const ReturnStmt *S, CheckerContext &C) const;
@@ -169,17 +168,19 @@ private:
   ///
   /// When \p SuppressPath is set to true, no more bugs will be reported on this
   /// path by this checker.
-  void reportBugIfPreconditionHolds(ErrorKind Error, ExplodedNode *N,
-                                    const MemRegion *Region, CheckerContext &C,
-                                    const Stmt *ValueExpr = nullptr,
-                                    bool SuppressPath = false) const;
-
-  void reportBug(ErrorKind Error, ExplodedNode *N, const MemRegion *Region,
-                 BugReporter &BR, const Stmt *ValueExpr = nullptr) const {
+  void reportBugIfInvariantHolds(StringRef Msg, ErrorKind Error,
+                                 ExplodedNode *N, const MemRegion *Region,
+                                 CheckerContext &C,
+                                 const Stmt *ValueExpr = nullptr,
+                                  bool SuppressPath = false) const;
+
+  void reportBug(StringRef Msg, ErrorKind Error, ExplodedNode *N,
+                 const MemRegion *Region, BugReporter &BR,
+                 const Stmt *ValueExpr = nullptr) const {
     if (!BT)
       BT.reset(new BugType(this, "Nullability", "Memory error"));
-    const char *Msg = ErrorMessages[static_cast<int>(Error)];
-    std::unique_ptr<BugReport> R(new BugReport(*BT, Msg, N));
+
+    auto R = llvm::make_unique<BugReport>(*BT, Msg, N);
     if (Region) {
       R->markInteresting(Region);
       R->addVisitor(llvm::make_unique<NullabilityBugVisitor>(Region));
@@ -198,6 +199,15 @@ private:
   /// to the wrapped region. Otherwise it will return a nullptr.
   const SymbolicRegion *getTrackRegion(SVal Val,
                                        bool CheckSuperRegion = false) const;
+
+  /// Returns true if the call is diagnosable in the currrent analyzer
+  /// configuration.
+  bool isDiagnosableCall(const CallEvent &Call) const {
+    if (NoDiagnoseCallsToSystemHeaders && Call.isInSystemHeader())
+      return false;
+
+    return true;
+  }
 };
 
 class NullabilityState {
@@ -237,12 +247,31 @@ bool operator==(NullabilityState Lhs, NullabilityState Rhs) {
 REGISTER_MAP_WITH_PROGRAMSTATE(NullabilityMap, const MemRegion *,
                                NullabilityState)
 
-// If the nullability precondition of a function is violated, we should not
-// report nullability related issues on that path. For this reason once a
-// precondition is not met on a path, this checker will be esentially turned off
-// for the rest of the analysis. We do not want to generate a sink node however,
-// so this checker would not lead to reduced coverage.
-REGISTER_TRAIT_WITH_PROGRAMSTATE(PreconditionViolated, bool)
+// We say "the nullability type invariant is violated" when a location with a
+// non-null type contains NULL or a function with a non-null return type returns
+// NULL. Violations of the nullability type invariant can be detected either
+// directly (for example, when NULL is passed as an argument to a nonnull
+// parameter) or indirectly (for example, when, inside a function, the
+// programmer defensively checks whether a nonnull parameter contains NULL and
+// finds that it does).
+//
+// As a matter of policy, the nullability checker typically warns on direct
+// violations of the nullability invariant (although it uses various
+// heuristics to suppress warnings in some cases) but will not warn if the
+// invariant has already been violated along the path (either directly or
+// indirectly). As a practical matter, this prevents the analyzer from
+// (1) warning on defensive code paths where a nullability precondition is
+// determined to have been violated, (2) warning additional times after an
+// initial direct violation has been discovered, and (3) warning after a direct
+// violation that has been implicitly or explicitly suppressed (for
+// example, with a cast of NULL to _Nonnull). In essence, once an invariant
+// violation is detected on a path, this checker will be esentially turned off
+// for the rest of the analysis
+//
+// The analyzer takes this approach (rather than generating a sink node) to
+// ensure coverage of defensive paths, which may be important for backwards
+// compatibility in codebases that were developed without nullability in mind.
+REGISTER_TRAIT_WITH_PROGRAMSTATE(InvariantViolated, bool)
 
 enum class NullConstraint { IsNull, IsNotNull, Unknown };
 
@@ -327,38 +356,79 @@ static Nullability getNullabilityAnnotation(QualType Type) {
   return Nullability::Unspecified;
 }
 
-template <typename ParamVarDeclRange>
+/// Returns true when the value stored at the given location is null
+/// and the passed in type is nonnnull.
+static bool checkValueAtLValForInvariantViolation(ProgramStateRef State,
+                                                  SVal LV, QualType T) {
+  if (getNullabilityAnnotation(T) != Nullability::Nonnull)
+    return false;
+
+  auto RegionVal = LV.getAs<loc::MemRegionVal>();
+  if (!RegionVal)
+    return false;
+
+  auto StoredVal =
+  State->getSVal(RegionVal->getRegion()).getAs<DefinedOrUnknownSVal>();
+  if (!StoredVal)
+    return false;
+
+  if (getNullConstraint(*StoredVal, State) == NullConstraint::IsNull)
+    return true;
+
+  return false;
+}
+
 static bool
-checkParamsForPreconditionViolation(const ParamVarDeclRange &Params,
+checkParamsForPreconditionViolation(ArrayRef<ParmVarDecl *> Params,
                                     ProgramStateRef State,
                                     const LocationContext *LocCtxt) {
   for (const auto *ParamDecl : Params) {
     if (ParamDecl->isParameterPack())
       break;
 
-    if (getNullabilityAnnotation(ParamDecl->getType()) != Nullability::Nonnull)
-      continue;
+    SVal LV = State->getLValue(ParamDecl, LocCtxt);
+    if (checkValueAtLValForInvariantViolation(State, LV,
+                                              ParamDecl->getType())) {
+      return true;
+    }
+  }
+  return false;
+}
 
-    auto RegVal = State->getLValue(ParamDecl, LocCtxt)
-                      .template getAs<loc::MemRegionVal>();
-    if (!RegVal)
-      continue;
+static bool
+checkSelfIvarsForInvariantViolation(ProgramStateRef State,
+                                    const LocationContext *LocCtxt) {
+  auto *MD = dyn_cast<ObjCMethodDecl>(LocCtxt->getDecl());
+  if (!MD || !MD->isInstanceMethod())
+    return false;
 
-    auto ParamValue = State->getSVal(RegVal->getRegion())
-                          .template getAs<DefinedOrUnknownSVal>();
-    if (!ParamValue)
-      continue;
+  const ImplicitParamDecl *SelfDecl = LocCtxt->getSelfDecl();
+  if (!SelfDecl)
+    return false;
 
-    if (getNullConstraint(*ParamValue, State) == NullConstraint::IsNull) {
+  SVal SelfVal = State->getSVal(State->getRegion(SelfDecl, LocCtxt));
+
+  const ObjCObjectPointerType *SelfType =
+      dyn_cast<ObjCObjectPointerType>(SelfDecl->getType());
+  if (!SelfType)
+    return false;
+
+  const ObjCInterfaceDecl *ID = SelfType->getInterfaceDecl();
+  if (!ID)
+    return false;
+
+  for (const auto *IvarDecl : ID->ivars()) {
+    SVal LV = State->getLValue(IvarDecl, SelfVal);
+    if (checkValueAtLValForInvariantViolation(State, LV, IvarDecl->getType())) {
       return true;
     }
   }
   return false;
 }
 
-static bool checkPreconditionViolation(ProgramStateRef State, ExplodedNode *N,
-                                       CheckerContext &C) {
-  if (State->get<PreconditionViolated>())
+static bool checkInvariantViolation(ProgramStateRef State, ExplodedNode *N,
+                                    CheckerContext &C) {
+  if (State->get<InvariantViolated>())
     return true;
 
   const LocationContext *LocCtxt = C.getLocationContext();
@@ -366,41 +436,38 @@ static bool checkPreconditionViolation(ProgramStateRef State, ExplodedNode *N,
   if (!D)
     return false;
 
-  if (const auto *BlockD = dyn_cast<BlockDecl>(D)) {
-    if (checkParamsForPreconditionViolation(BlockD->parameters(), State,
-                                            LocCtxt)) {
-      if (!N->isSink())
-        C.addTransition(State->set<PreconditionViolated>(true), N);
-      return true;
-    }
+  ArrayRef<ParmVarDecl*> Params;
+  if (const auto *BD = dyn_cast<BlockDecl>(D))
+    Params = BD->parameters();
+  else if (const auto *FD = dyn_cast<FunctionDecl>(D))
+    Params = FD->parameters();
+  else if (const auto *MD = dyn_cast<ObjCMethodDecl>(D))
+    Params = MD->parameters();
+  else
     return false;
-  }
 
-  if (const auto *FuncDecl = dyn_cast<FunctionDecl>(D)) {
-    if (checkParamsForPreconditionViolation(FuncDecl->parameters(), State,
-                                            LocCtxt)) {
-      if (!N->isSink())
-        C.addTransition(State->set<PreconditionViolated>(true), N);
-      return true;
-    }
-    return false;
+  if (checkParamsForPreconditionViolation(Params, State, LocCtxt) ||
+      checkSelfIvarsForInvariantViolation(State, LocCtxt)) {
+    if (!N->isSink())
+      C.addTransition(State->set<InvariantViolated>(true), N);
+    return true;
   }
   return false;
 }
 
-void NullabilityChecker::reportBugIfPreconditionHolds(
+void NullabilityChecker::reportBugIfInvariantHolds(StringRef Msg,
     ErrorKind Error, ExplodedNode *N, const MemRegion *Region,
     CheckerContext &C, const Stmt *ValueExpr, bool SuppressPath) const {
   ProgramStateRef OriginalState = N->getState();
 
-  if (checkPreconditionViolation(OriginalState, N, C))
+  if (checkInvariantViolation(OriginalState, N, C))
     return;
   if (SuppressPath) {
-    OriginalState = OriginalState->set<PreconditionViolated>(true);
+    OriginalState = OriginalState->set<InvariantViolated>(true);
     N = C.addTransition(OriginalState, N);
   }
 
-  reportBug(Error, N, Region, C.getBugReporter(), ValueExpr);
+  reportBug(Msg, Error, N, Region, C.getBugReporter(), ValueExpr);
 }
 
 /// Cleaning up the program state.
@@ -424,7 +491,7 @@ void NullabilityChecker::checkDeadSymbols(SymbolReaper &SR,
   // preconditions are violated. It is not enough to check this only when we
   // actually report an error, because at that time interesting symbols might be
   // reaped.
-  if (checkPreconditionViolation(State, C.getPredecessor(), C))
+  if (checkInvariantViolation(State, C.getPredecessor(), C))
     return;
   C.addTransition(State);
 }
@@ -433,7 +500,7 @@ void NullabilityChecker::checkDeadSymbols(SymbolReaper &SR,
 /// not know anything about the value of that pointer. When that pointer is
 /// nullable, this code emits a warning.
 void NullabilityChecker::checkEvent(ImplicitNullDerefEvent Event) const {
-  if (Event.SinkNode->getState()->get<PreconditionViolated>())
+  if (Event.SinkNode->getState()->get<InvariantViolated>())
     return;
 
   const MemRegion *Region =
@@ -454,18 +521,32 @@ void NullabilityChecker::checkEvent(ImplicitNullDerefEvent Event) const {
     // Do not suppress errors on defensive code paths, because dereferencing
     // a nullable pointer is always an error.
     if (Event.IsDirectDereference)
-      reportBug(ErrorKind::NullableDereferenced, Event.SinkNode, Region, BR);
-    else
-      reportBug(ErrorKind::NullablePassedToNonnull, Event.SinkNode, Region, BR);
+      reportBug("Nullable pointer is dereferenced",
+                ErrorKind::NullableDereferenced, Event.SinkNode, Region, BR);
+    else {
+      reportBug("Nullable pointer is passed to a callee that requires a "
+                "non-null", ErrorKind::NullablePassedToNonnull,
+                Event.SinkNode, Region, BR);
+    }
   }
 }
 
+/// Find the outermost subexpression of E that is not an implicit cast.
+/// This looks through the implicit casts to _Nonnull that ARC adds to
+/// return expressions of ObjC types when the return type of the function or
+/// method is non-null but the express is not.
+static const Expr *lookThroughImplicitCasts(const Expr *E) {
+  assert(E);
+
+  while (auto *ICE = dyn_cast<ImplicitCastExpr>(E)) {
+    E = ICE->getSubExpr();
+  }
+
+  return E;
+}
+
 /// This method check when nullable pointer or null value is returned from a
 /// function that has nonnull return type.
-///
-/// TODO: when nullability preconditons are violated, it is ok to violate the
-/// nullability postconditons (i.e.: when one of the nonnull parameters are null
-/// this check should not report any nullability related issue).
 void NullabilityChecker::checkPreStmt(const ReturnStmt *S,
                                       CheckerContext &C) const {
   auto RetExpr = S->getRetValue();
@@ -476,7 +557,7 @@ void NullabilityChecker::checkPreStmt(const ReturnStmt *S,
     return;
 
   ProgramStateRef State = C.getState();
-  if (State->get<PreconditionViolated>())
+  if (State->get<InvariantViolated>())
     return;
 
   auto RetSVal =
@@ -484,16 +565,31 @@ void NullabilityChecker::checkPreStmt(const ReturnStmt *S,
   if (!RetSVal)
     return;
 
+  bool InSuppressedMethodFamily = false;
+
+  QualType RequiredRetType;
   AnalysisDeclContext *DeclCtxt =
       C.getLocationContext()->getAnalysisDeclContext();
-  const FunctionType *FuncType = DeclCtxt->getDecl()->getFunctionType();
-  if (!FuncType)
+  const Decl *D = DeclCtxt->getDecl();
+  if (auto *MD = dyn_cast<ObjCMethodDecl>(D)) {
+    // HACK: This is a big hammer to avoid warning when there are defensive
+    // nil checks in -init and -copy methods. We should add more sophisticated
+    // logic here to suppress on common defensive idioms but still
+    // warn when there is a likely problem.
+    ObjCMethodFamily Family = MD->getMethodFamily();
+    if (OMF_init == Family || OMF_copy == Family || OMF_mutableCopy == Family)
+      InSuppressedMethodFamily = true;
+
+    RequiredRetType = MD->getReturnType();
+  } else if (auto *FD = dyn_cast<FunctionDecl>(D)) {
+    RequiredRetType = FD->getReturnType();
+  } else {
     return;
+  }
 
   NullConstraint Nullness = getNullConstraint(*RetSVal, State);
 
-  Nullability RequiredNullability =
-      getNullabilityAnnotation(FuncType->getReturnType());
+  Nullability RequiredNullability = getNullabilityAnnotation(RequiredRetType);
 
   // If the returned value is null but the type of the expression
   // generating it is nonnull then we will suppress the diagnostic.
@@ -501,18 +597,36 @@ void NullabilityChecker::checkPreStmt(const ReturnStmt *S,
   // function with a _Nonnull return type:
   //    return (NSString * _Nonnull)0;
   Nullability RetExprTypeLevelNullability =
-        getNullabilityAnnotation(RetExpr->getType());
+        getNullabilityAnnotation(lookThroughImplicitCasts(RetExpr)->getType());
 
+  bool NullReturnedFromNonNull = (RequiredNullability == Nullability::Nonnull &&
+                                  Nullness == NullConstraint::IsNull);
   if (Filter.CheckNullReturnedFromNonnull &&
-      Nullness == NullConstraint::IsNull &&
+      NullReturnedFromNonNull &&
       RetExprTypeLevelNullability != Nullability::Nonnull &&
-      RequiredNullability == Nullability::Nonnull) {
+      !InSuppressedMethodFamily &&
+      C.getLocationContext()->inTopFrame()) {
     static CheckerProgramPointTag Tag(this, "NullReturnedFromNonnull");
     ExplodedNode *N = C.generateErrorNode(State, &Tag);
     if (!N)
       return;
-    reportBugIfPreconditionHolds(ErrorKind::NilReturnedToNonnull, N, nullptr, C,
-                                 RetExpr);
+
+    SmallString<256> SBuf;
+    llvm::raw_svector_ostream OS(SBuf);
+    OS << "Null is returned from a " << C.getDeclDescription(D) <<
+          " that is expected to return a non-null value";
+
+    reportBugIfInvariantHolds(OS.str(),
+                              ErrorKind::NilReturnedToNonnull, N, nullptr, C,
+                              RetExpr);
+    return;
+  }
+
+  // If null was returned from a non-null function, mark the nullability
+  // invariant as violated even if the diagnostic was suppressed.
+  if (NullReturnedFromNonNull) {
+    State = State->set<InvariantViolated>(true);
+    C.addTransition(State);
     return;
   }
 
@@ -530,8 +644,15 @@ void NullabilityChecker::checkPreStmt(const ReturnStmt *S,
         RequiredNullability == Nullability::Nonnull) {
       static CheckerProgramPointTag Tag(this, "NullableReturnedFromNonnull");
       ExplodedNode *N = C.addTransition(State, C.getPredecessor(), &Tag);
-      reportBugIfPreconditionHolds(ErrorKind::NullableReturnedToNonnull, N,
-                                   Region, C);
+
+      SmallString<256> SBuf;
+      llvm::raw_svector_ostream OS(SBuf);
+      OS << "Nullable pointer is returned from a " << C.getDeclDescription(D) <<
+            " that is expected to return a non-null value";
+
+      reportBugIfInvariantHolds(OS.str(),
+                                ErrorKind::NullableReturnedToNonnull, N,
+                                Region, C);
     }
     return;
   }
@@ -551,7 +672,7 @@ void NullabilityChecker::checkPreCall(const CallEvent &Call,
     return;
 
   ProgramStateRef State = C.getState();
-  if (State->get<PreconditionViolated>())
+  if (State->get<InvariantViolated>())
     return;
 
   ProgramStateRef OrigState = State;
@@ -579,14 +700,22 @@ void NullabilityChecker::checkPreCall(const CallEvent &Call,
     Nullability ArgExprTypeLevelNullability =
         getNullabilityAnnotation(ArgExpr->getType());
 
+    unsigned ParamIdx = Param->getFunctionScopeIndex() + 1;
+
     if (Filter.CheckNullPassedToNonnull && Nullness == NullConstraint::IsNull &&
         ArgExprTypeLevelNullability != Nullability::Nonnull &&
-        RequiredNullability == Nullability::Nonnull) {
+        RequiredNullability == Nullability::Nonnull &&
+        isDiagnosableCall(Call)) {
       ExplodedNode *N = C.generateErrorNode(State);
       if (!N)
         return;
-      reportBugIfPreconditionHolds(ErrorKind::NilPassedToNonnull, N, nullptr, C,
-                                   ArgExpr);
+      SmallString<256> SBuf;
+      llvm::raw_svector_ostream OS(SBuf);
+      OS << "Null passed to a callee that requires a non-null " << ParamIdx
+         << llvm::getOrdinalSuffix(ParamIdx) << " parameter";
+      reportBugIfInvariantHolds(OS.str(), ErrorKind::NilPassedToNonnull, N,
+                                nullptr, C,
+                                ArgExpr, /*SuppressPath=*/false);
       return;
     }
 
@@ -603,17 +732,24 @@ void NullabilityChecker::checkPreCall(const CallEvent &Call,
         continue;
 
       if (Filter.CheckNullablePassedToNonnull &&
-          RequiredNullability == Nullability::Nonnull) {
+          RequiredNullability == Nullability::Nonnull &&
+          isDiagnosableCall(Call)) {
         ExplodedNode *N = C.addTransition(State);
-        reportBugIfPreconditionHolds(ErrorKind::NullablePassedToNonnull, N,
-                                     Region, C, ArgExpr, /*SuppressPath=*/true);
+        SmallString<256> SBuf;
+        llvm::raw_svector_ostream OS(SBuf);
+        OS << "Nullable pointer is passed to a callee that requires a non-null "
+           << ParamIdx << llvm::getOrdinalSuffix(ParamIdx) << " parameter";
+        reportBugIfInvariantHolds(OS.str(),
+                                  ErrorKind::NullablePassedToNonnull, N,
+                                  Region, C, ArgExpr, /*SuppressPath=*/true);
         return;
       }
       if (Filter.CheckNullableDereferenced &&
           Param->getType()->isReferenceType()) {
         ExplodedNode *N = C.addTransition(State);
-        reportBugIfPreconditionHolds(ErrorKind::NullableDereferenced, N, Region,
-                                     C, ArgExpr, /*SuppressPath=*/true);
+        reportBugIfInvariantHolds("Nullable pointer is dereferenced",
+                                  ErrorKind::NullableDereferenced, N, Region,
+                                  C, ArgExpr, /*SuppressPath=*/true);
         return;
       }
       continue;
@@ -644,7 +780,7 @@ void NullabilityChecker::checkPostCall(const CallEvent &Call,
   if (!ReturnType->isAnyPointerType())
     return;
   ProgramStateRef State = C.getState();
-  if (State->get<PreconditionViolated>())
+  if (State->get<InvariantViolated>())
     return;
 
   const MemRegion *Region = getTrackRegion(Call.getReturnValue());
@@ -713,7 +849,7 @@ void NullabilityChecker::checkPostObjCMessage(const ObjCMethodCall &M,
     return;
 
   ProgramStateRef State = C.getState();
-  if (State->get<PreconditionViolated>())
+  if (State->get<InvariantViolated>())
     return;
 
   const MemRegion *ReturnRegion = getTrackRegion(M.getReturnValue());
@@ -828,7 +964,7 @@ void NullabilityChecker::checkPostStmt(const ExplicitCastExpr *CE,
     return;
 
   ProgramStateRef State = C.getState();
-  if (State->get<PreconditionViolated>())
+  if (State->get<InvariantViolated>())
     return;
 
   Nullability DestNullability = getNullabilityAnnotation(DestType);
@@ -953,7 +1089,7 @@ void NullabilityChecker::checkBind(SVal L, SVal V, const Stmt *S,
     return;
 
   ProgramStateRef State = C.getState();
-  if (State->get<PreconditionViolated>())
+  if (State->get<InvariantViolated>())
     return;
 
   auto ValDefOrUnknown = V.getAs<DefinedOrUnknownSVal>();
@@ -967,24 +1103,48 @@ void NullabilityChecker::checkBind(SVal L, SVal V, const Stmt *S,
     ValNullability = getNullabilityAnnotation(Sym->getType());
 
   Nullability LocNullability = getNullabilityAnnotation(LocType);
+
+  // If the type of the RHS expression is nonnull, don't warn. This
+  // enables explicit suppression with a cast to nonnull.
+  Nullability ValueExprTypeLevelNullability = Nullability::Unspecified;
+  const Expr *ValueExpr = matchValueExprForBind(S);
+  if (ValueExpr) {
+    ValueExprTypeLevelNullability =
+      getNullabilityAnnotation(lookThroughImplicitCasts(ValueExpr)->getType());
+  }
+
+  bool NullAssignedToNonNull = (LocNullability == Nullability::Nonnull &&
+                                RhsNullness == NullConstraint::IsNull);
   if (Filter.CheckNullPassedToNonnull &&
-      RhsNullness == NullConstraint::IsNull &&
+      NullAssignedToNonNull &&
       ValNullability != Nullability::Nonnull &&
-      LocNullability == Nullability::Nonnull &&
+      ValueExprTypeLevelNullability != Nullability::Nonnull &&
       !isARCNilInitializedLocal(C, S)) {
     static CheckerProgramPointTag Tag(this, "NullPassedToNonnull");
     ExplodedNode *N = C.generateErrorNode(State, &Tag);
     if (!N)
       return;
 
-    const Stmt *ValueExpr = matchValueExprForBind(S);
-    if (!ValueExpr)
-      ValueExpr = S;
 
-    reportBugIfPreconditionHolds(ErrorKind::NilAssignedToNonnull, N, nullptr, C,
-                                 ValueExpr);
+    const Stmt *ValueStmt = S;
+    if (ValueExpr)
+      ValueStmt = ValueExpr;
+
+    reportBugIfInvariantHolds("Null is assigned to a pointer which is "
+                              "expected to have non-null value",
+                              ErrorKind::NilAssignedToNonnull, N, nullptr, C,
+                              ValueStmt);
     return;
   }
+
+  // If null was returned from a non-null function, mark the nullability
+  // invariant as violated even if the diagnostic was suppressed.
+  if (NullAssignedToNonNull) {
+    State = State->set<InvariantViolated>(true);
+    C.addTransition(State);
+    return;
+  }
+
   // Intentionally missing case: '0' is bound to a reference. It is handled by
   // the DereferenceChecker.
 
@@ -1003,8 +1163,10 @@ void NullabilityChecker::checkBind(SVal L, SVal V, const Stmt *S,
         LocNullability == Nullability::Nonnull) {
       static CheckerProgramPointTag Tag(this, "NullablePassedToNonnull");
       ExplodedNode *N = C.addTransition(State, C.getPredecessor(), &Tag);
-      reportBugIfPreconditionHolds(ErrorKind::NullableAssignedToNonnull, N,
-                                   ValueRegion, C);
+      reportBugIfInvariantHolds("Nullable pointer is assigned to a pointer "
+                                "which is expected to have non-null value",
+                                ErrorKind::NullableAssignedToNonnull, N,
+                                ValueRegion, C);
     }
     return;
   }
@@ -1052,6 +1214,10 @@ void NullabilityChecker::printState(raw_ostream &Out, ProgramStateRef State,
     checker->Filter.Check##name = true;                                        \
     checker->Filter.CheckName##name = mgr.getCurrentCheckName();               \
     checker->NeedTracking = checker->NeedTracking || trackingRequired;         \
+    checker->NoDiagnoseCallsToSystemHeaders =                                  \
+        checker->NoDiagnoseCallsToSystemHeaders ||                             \
+        mgr.getAnalyzerOptions().getBooleanOption(                             \
+                      "NoDiagnoseCallsToSystemHeaders", false, checker, true); \
   }
 
 // The checks are likely to be turned on by default and it is possible to do
diff --git a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/ObjCContainersChecker.cpp b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/ObjCContainersChecker.cpp
index 0203d79cd00e..58ebf72660b6 100644
--- a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/ObjCContainersChecker.cpp
+++ b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/ObjCContainersChecker.cpp
@@ -79,7 +79,6 @@ void ObjCContainersChecker::addSizeInfo(const Expr *Array, const Expr *Size,
 
   C.addTransition(
       State->set<ArraySizeMap>(ArraySym, SizeV.castAs<DefinedSVal>()));
-  return;
 }
 
 void ObjCContainersChecker::checkPostStmt(const CallExpr *CE,
@@ -156,10 +155,7 @@ ObjCContainersChecker::checkPointerEscape(ProgramStateRef State,
                                           const InvalidatedSymbols &Escaped,
                                           const CallEvent *Call,
                                           PointerEscapeKind Kind) const {
-  for (InvalidatedSymbols::const_iterator I = Escaped.begin(),
-                                          E = Escaped.end();
-                                          I != E; ++I) {
-    SymbolRef Sym = *I;
+  for (const auto &Sym : Escaped) {
     // When a symbol for a mutable array escapes, we can't reason precisely
     // about its size any more -- so remove it from the map.
     // Note that we aren't notified here when a CFMutableArrayRef escapes as a
@@ -169,6 +165,7 @@ ObjCContainersChecker::checkPointerEscape(ProgramStateRef State,
   }
   return State;
 }
+
 /// Register checker.
 void ento::registerObjCContainersChecker(CheckerManager &mgr) {
   mgr.registerChecker<ObjCContainersChecker>();
diff --git a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/ObjCSuperDeallocChecker.cpp b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/ObjCSuperDeallocChecker.cpp
new file mode 100644
index 000000000000..15980c5c5387
--- /dev/null
+++ b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/ObjCSuperDeallocChecker.cpp
@@ -0,0 +1,294 @@
+//===- ObjCSuperDeallocChecker.cpp - Check correct use of [super dealloc] -===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This defines ObjCSuperDeallocChecker, a builtin check that warns when
+// self is used after a call to [super dealloc] in MRR mode.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ClangSACheckers.h"
+#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
+#include "clang/StaticAnalyzer/Core/Checker.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/SymbolManager.h"
+
+using namespace clang;
+using namespace ento;
+
+namespace {
+class ObjCSuperDeallocChecker
+    : public Checker<check::PostObjCMessage, check::PreObjCMessage,
+                     check::PreCall, check::Location> {
+
+  mutable IdentifierInfo *IIdealloc, *IINSObject;
+  mutable Selector SELdealloc;
+
+  std::unique_ptr<BugType> DoubleSuperDeallocBugType;
+
+  void initIdentifierInfoAndSelectors(ASTContext &Ctx) const;
+
+  bool isSuperDeallocMessage(const ObjCMethodCall &M) const;
+
+public:
+  ObjCSuperDeallocChecker();
+  void checkPostObjCMessage(const ObjCMethodCall &M, CheckerContext &C) const;
+  void checkPreObjCMessage(const ObjCMethodCall &M, CheckerContext &C) const;
+
+  void checkPreCall(const CallEvent &Call, CheckerContext &C) const;
+
+  void checkLocation(SVal l, bool isLoad, const Stmt *S,
+                     CheckerContext &C) const;
+
+private:
+
+  void diagnoseCallArguments(const CallEvent &CE, CheckerContext &C) const;
+
+  void reportUseAfterDealloc(SymbolRef Sym, StringRef Desc, const Stmt *S,
+                             CheckerContext &C) const;
+};
+
+} // End anonymous namespace.
+
+// Remember whether [super dealloc] has previously been called on the
+// SymbolRef for the receiver.
+REGISTER_SET_WITH_PROGRAMSTATE(CalledSuperDealloc, SymbolRef)
+
+namespace {
+class SuperDeallocBRVisitor final
+    : public BugReporterVisitorImpl<SuperDeallocBRVisitor> {
+
+  SymbolRef ReceiverSymbol;
+  bool Satisfied;
+
+public:
+  SuperDeallocBRVisitor(SymbolRef ReceiverSymbol)
+      : ReceiverSymbol(ReceiverSymbol),
+        Satisfied(false) {}
+
+  PathDiagnosticPiece *VisitNode(const ExplodedNode *Succ,
+                                 const ExplodedNode *Pred,
+                                 BugReporterContext &BRC,
+                                 BugReport &BR) override;
+
+  void Profile(llvm::FoldingSetNodeID &ID) const override {
+    ID.Add(ReceiverSymbol);
+  }
+};
+} // End anonymous namespace.
+
+void ObjCSuperDeallocChecker::checkPreObjCMessage(const ObjCMethodCall &M,
+                                                  CheckerContext &C) const {
+
+  ProgramStateRef State = C.getState();
+  SymbolRef ReceiverSymbol = M.getReceiverSVal().getAsSymbol();
+  if (!ReceiverSymbol) {
+    diagnoseCallArguments(M, C);
+    return;
+  }
+
+  bool AlreadyCalled = State->contains<CalledSuperDealloc>(ReceiverSymbol);
+  if (!AlreadyCalled)
+    return;
+
+  StringRef Desc;
+
+  if (isSuperDeallocMessage(M)) {
+    Desc = "[super dealloc] should not be called multiple times";
+  } else {
+    Desc = StringRef();
+  }
+
+  reportUseAfterDealloc(ReceiverSymbol, Desc, M.getOriginExpr(), C);
+
+  return;
+}
+
+void ObjCSuperDeallocChecker::checkPreCall(const CallEvent &Call,
+                                           CheckerContext &C) const {
+  diagnoseCallArguments(Call, C);
+}
+
+void ObjCSuperDeallocChecker::checkPostObjCMessage(const ObjCMethodCall &M,
+                                                   CheckerContext &C) const {
+  // Check for [super dealloc] method call.
+  if (!isSuperDeallocMessage(M))
+    return;
+
+  ProgramStateRef State = C.getState();
+  SymbolRef ReceiverSymbol = M.getSelfSVal().getAsSymbol();
+  assert(ReceiverSymbol && "No receiver symbol at call to [super dealloc]?");
+
+  // We add this transition in checkPostObjCMessage to avoid warning when
+  // we inline a call to [super dealloc] where the inlined call itself
+  // calls [super dealloc].
+  State = State->add<CalledSuperDealloc>(ReceiverSymbol);
+  C.addTransition(State);
+}
+
+void ObjCSuperDeallocChecker::checkLocation(SVal L, bool IsLoad, const Stmt *S,
+                                  CheckerContext &C) const {
+  SymbolRef BaseSym = L.getLocSymbolInBase();
+  if (!BaseSym)
+    return;
+
+  ProgramStateRef State = C.getState();
+
+  if (!State->contains<CalledSuperDealloc>(BaseSym))
+    return;
+
+  const MemRegion *R = L.getAsRegion();
+  if (!R)
+    return;
+
+  // Climb the super regions to find the base symbol while recording
+  // the second-to-last region for error reporting.
+  const MemRegion *PriorSubRegion = nullptr;
+  while (const SubRegion *SR = dyn_cast<SubRegion>(R)) {
+    if (const SymbolicRegion *SymR = dyn_cast<SymbolicRegion>(SR)) {
+      BaseSym = SymR->getSymbol();
+      break;
+    } else {
+      R = SR->getSuperRegion();
+      PriorSubRegion = SR;
+    }
+  }
+
+  StringRef Desc = StringRef();
+  auto *IvarRegion = dyn_cast_or_null<ObjCIvarRegion>(PriorSubRegion);
+
+  std::string Buf;
+  llvm::raw_string_ostream OS(Buf);
+  if (IvarRegion) {
+    OS << "Use of instance variable '" << *IvarRegion->getDecl() <<
+          "' after 'self' has been deallocated";
+    Desc = OS.str();
+  }
+
+  reportUseAfterDealloc(BaseSym, Desc, S, C);
+}
+
+/// Report a use-after-dealloc on Sym. If not empty,
+/// Desc will be used to describe the error; otherwise,
+/// a default warning will be used.
+void ObjCSuperDeallocChecker::reportUseAfterDealloc(SymbolRef Sym,
+                                                    StringRef Desc,
+                                                    const Stmt *S,
+                                                    CheckerContext &C) const {
+  // We have a use of self after free.
+  // This likely causes a crash, so stop exploring the
+  // path by generating a sink.
+  ExplodedNode *ErrNode = C.generateErrorNode();
+  // If we've already reached this node on another path, return.
+  if (!ErrNode)
+    return;
+
+  if (Desc.empty())
+    Desc = "use of 'self' after it has been deallocated";
+
+  // Generate the report.
+  std::unique_ptr<BugReport> BR(
+      new BugReport(*DoubleSuperDeallocBugType, Desc, ErrNode));
+  BR->addRange(S->getSourceRange());
+  BR->addVisitor(llvm::make_unique<SuperDeallocBRVisitor>(Sym));
+  C.emitReport(std::move(BR));
+}
+
+/// Diagnose if any of the arguments to CE have already been
+/// dealloc'd.
+void ObjCSuperDeallocChecker::diagnoseCallArguments(const CallEvent &CE,
+                                                    CheckerContext &C) const {
+  ProgramStateRef State = C.getState();
+  unsigned ArgCount = CE.getNumArgs();
+  for (unsigned I = 0; I < ArgCount; I++) {
+    SymbolRef Sym = CE.getArgSVal(I).getAsSymbol();
+    if (!Sym)
+      continue;
+
+    if (State->contains<CalledSuperDealloc>(Sym)) {
+      reportUseAfterDealloc(Sym, StringRef(), CE.getArgExpr(I), C);
+      return;
+    }
+  }
+}
+
+ObjCSuperDeallocChecker::ObjCSuperDeallocChecker()
+    : IIdealloc(nullptr), IINSObject(nullptr) {
+
+  DoubleSuperDeallocBugType.reset(
+      new BugType(this, "[super dealloc] should not be called more than once",
+                  categories::CoreFoundationObjectiveC));
+}
+
+void
+ObjCSuperDeallocChecker::initIdentifierInfoAndSelectors(ASTContext &Ctx) const {
+  if (IIdealloc)
+    return;
+
+  IIdealloc = &Ctx.Idents.get("dealloc");
+  IINSObject = &Ctx.Idents.get("NSObject");
+
+  SELdealloc = Ctx.Selectors.getSelector(0, &IIdealloc);
+}
+
+bool
+ObjCSuperDeallocChecker::isSuperDeallocMessage(const ObjCMethodCall &M) const {
+  if (M.getOriginExpr()->getReceiverKind() != ObjCMessageExpr::SuperInstance)
+    return false;
+
+  ASTContext &Ctx = M.getState()->getStateManager().getContext();
+  initIdentifierInfoAndSelectors(Ctx);
+
+  return M.getSelector() == SELdealloc;
+}
+
+PathDiagnosticPiece *SuperDeallocBRVisitor::VisitNode(const ExplodedNode *Succ,
+                                                      const ExplodedNode *Pred,
+                                                      BugReporterContext &BRC,
+                                                      BugReport &BR) {
+  if (Satisfied)
+    return nullptr;
+
+  ProgramStateRef State = Succ->getState();
+
+  bool CalledNow =
+      Succ->getState()->contains<CalledSuperDealloc>(ReceiverSymbol);
+  bool CalledBefore =
+      Pred->getState()->contains<CalledSuperDealloc>(ReceiverSymbol);
+
+  // Is Succ the node on which the analyzer noted that [super dealloc] was
+  // called on ReceiverSymbol?
+  if (CalledNow && !CalledBefore) {
+    Satisfied = true;
+
+    ProgramPoint P = Succ->getLocation();
+    PathDiagnosticLocation L =
+        PathDiagnosticLocation::create(P, BRC.getSourceManager());
+
+    if (!L.isValid() || !L.asLocation().isValid())
+      return nullptr;
+
+    return new PathDiagnosticEventPiece(
+        L, "[super dealloc] called here");
+  }
+
+  return nullptr;
+}
+
+//===----------------------------------------------------------------------===//
+// Checker Registration.
+//===----------------------------------------------------------------------===//
+
+void ento::registerObjCSuperDeallocChecker(CheckerManager &Mgr) {
+  const LangOptions &LangOpts = Mgr.getLangOpts();
+  if (LangOpts.getGC() == LangOptions::GCOnly || LangOpts.ObjCAutoRefCount)
+    return;
+  Mgr.registerChecker<ObjCSuperDeallocChecker>();
+}
diff --git a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/PaddingChecker.cpp b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/PaddingChecker.cpp
index 8ce37357fe1f..0640d2f49f43 100644
--- a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/PaddingChecker.cpp
+++ b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/PaddingChecker.cpp
@@ -168,7 +168,7 @@ public:
                                         const ASTRecordLayout &RL) {
     CharUnits PaddingSum;
     CharUnits Offset = ASTContext.toCharUnitsFromBits(RL.getFieldOffset(0));
-    for (const auto &FD : RD->fields()) {
+    for (const FieldDecl *FD : RD->fields()) {
       // This checker only cares about the padded size of the
       // field, and not the data size. If the field is a record
       // with tail padding, then we won't put that number in our
@@ -260,13 +260,13 @@ public:
         // We are poorly aligned, and we need to pad in order to layout another
         // field. Round up to at least the smallest field alignment that we
         // currently have.
-        CharUnits NextOffset = NewOffset.RoundUpToAlignment(Fields[0].Align);
+        CharUnits NextOffset = NewOffset.alignTo(Fields[0].Align);
         NewPad += NextOffset - NewOffset;
         NewOffset = NextOffset;
       }
     }
     // Calculate tail padding.
-    CharUnits NewSize = NewOffset.RoundUpToAlignment(RL.getAlignment());
+    CharUnits NewSize = NewOffset.alignTo(RL.getAlignment());
     NewPad += NewSize - NewOffset;
     return NewPad;
   }
diff --git a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/PointerArithChecker.cpp b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/PointerArithChecker.cpp
index e3369677af72..df5118806bff 100644
--- a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/PointerArithChecker.cpp
+++ b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/PointerArithChecker.cpp
@@ -13,55 +13,329 @@
 //===----------------------------------------------------------------------===//
 
 #include "ClangSACheckers.h"
+#include "clang/AST/DeclCXX.h"
+#include "clang/AST/ExprCXX.h"
 #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
 #include "clang/StaticAnalyzer/Core/Checker.h"
 #include "clang/StaticAnalyzer/Core/CheckerManager.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
+#include "llvm/ADT/SmallVector.h"
 
 using namespace clang;
 using namespace ento;
 
 namespace {
+enum class AllocKind {
+  SingleObject,
+  Array,
+  Unknown,
+  Reinterpreted // Single object interpreted as an array.
+};
+} // end namespace
+
+namespace llvm {
+template <> struct FoldingSetTrait<AllocKind> {
+  static inline void Profile(AllocKind X, FoldingSetNodeID &ID) {
+    ID.AddInteger(static_cast<int>(X));
+  }
+};
+} // end namespace llvm
+
+namespace {
 class PointerArithChecker
-  : public Checker< check::PreStmt<BinaryOperator> > {
-  mutable std::unique_ptr<BuiltinBug> BT;
+    : public Checker<
+          check::PreStmt<BinaryOperator>, check::PreStmt<UnaryOperator>,
+          check::PreStmt<ArraySubscriptExpr>, check::PreStmt<CastExpr>,
+          check::PostStmt<CastExpr>, check::PostStmt<CXXNewExpr>,
+          check::PostStmt<CallExpr>, check::DeadSymbols> {
+  AllocKind getKindOfNewOp(const CXXNewExpr *NE, const FunctionDecl *FD) const;
+  const MemRegion *getArrayRegion(const MemRegion *Region, bool &Polymorphic,
+                                  AllocKind &AKind, CheckerContext &C) const;
+  const MemRegion *getPointedRegion(const MemRegion *Region,
+                                    CheckerContext &C) const;
+  void reportPointerArithMisuse(const Expr *E, CheckerContext &C,
+                                bool PointedNeeded = false) const;
+  void initAllocIdentifiers(ASTContext &C) const;
+
+  mutable std::unique_ptr<BuiltinBug> BT_pointerArith;
+  mutable std::unique_ptr<BuiltinBug> BT_polyArray;
+  mutable llvm::SmallSet<IdentifierInfo *, 8> AllocFunctions;
 
 public:
-  void checkPreStmt(const BinaryOperator *B, CheckerContext &C) const;
+  void checkPreStmt(const UnaryOperator *UOp, CheckerContext &C) const;
+  void checkPreStmt(const BinaryOperator *BOp, CheckerContext &C) const;
+  void checkPreStmt(const ArraySubscriptExpr *SubExpr, CheckerContext &C) const;
+  void checkPreStmt(const CastExpr *CE, CheckerContext &C) const;
+  void checkPostStmt(const CastExpr *CE, CheckerContext &C) const;
+  void checkPostStmt(const CXXNewExpr *NE, CheckerContext &C) const;
+  void checkPostStmt(const CallExpr *CE, CheckerContext &C) const;
+  void checkDeadSymbols(SymbolReaper &SR, CheckerContext &C) const;
 };
+} // end namespace
+
+REGISTER_MAP_WITH_PROGRAMSTATE(RegionState, const MemRegion *, AllocKind)
+
+void PointerArithChecker::checkDeadSymbols(SymbolReaper &SR,
+                                           CheckerContext &C) const {
+  // TODO: intentional leak. Some information is garbage collected too early,
+  // see http://reviews.llvm.org/D14203 for further information.
+  /*ProgramStateRef State = C.getState();
+  RegionStateTy RegionStates = State->get<RegionState>();
+  for (RegionStateTy::iterator I = RegionStates.begin(), E = RegionStates.end();
+       I != E; ++I) {
+    if (!SR.isLiveRegion(I->first))
+      State = State->remove<RegionState>(I->first);
+  }
+  C.addTransition(State);*/
 }
 
-void PointerArithChecker::checkPreStmt(const BinaryOperator *B,
-                                       CheckerContext &C) const {
-  if (B->getOpcode() != BO_Sub && B->getOpcode() != BO_Add)
-    return;
+AllocKind PointerArithChecker::getKindOfNewOp(const CXXNewExpr *NE,
+                                              const FunctionDecl *FD) const {
+  // This checker try not to assume anything about placement and overloaded
+  // new to avoid false positives.
+  if (isa<CXXMethodDecl>(FD))
+    return AllocKind::Unknown;
+  if (FD->getNumParams() != 1 || FD->isVariadic())
+    return AllocKind::Unknown;
+  if (NE->isArray())
+    return AllocKind::Array;
+
+  return AllocKind::SingleObject;
+}
+
+const MemRegion *
+PointerArithChecker::getPointedRegion(const MemRegion *Region,
+                                      CheckerContext &C) const {
+  assert(Region);
+  ProgramStateRef State = C.getState();
+  SVal S = State->getSVal(Region);
+  return S.getAsRegion();
+}
 
-  ProgramStateRef state = C.getState();
-  const LocationContext *LCtx = C.getLocationContext();
-  SVal LV = state->getSVal(B->getLHS(), LCtx);
-  SVal RV = state->getSVal(B->getRHS(), LCtx);
+/// Checks whether a region is the part of an array.
+/// In case there is a dericed to base cast above the array element, the
+/// Polymorphic output value is set to true. AKind output value is set to the
+/// allocation kind of the inspected region.
+const MemRegion *PointerArithChecker::getArrayRegion(const MemRegion *Region,
+                                                     bool &Polymorphic,
+                                                     AllocKind &AKind,
+                                                     CheckerContext &C) const {
+  assert(Region);
+  while (Region->getKind() == MemRegion::Kind::CXXBaseObjectRegionKind) {
+    Region = Region->getAs<CXXBaseObjectRegion>()->getSuperRegion();
+    Polymorphic = true;
+  }
+  if (Region->getKind() == MemRegion::Kind::ElementRegionKind) {
+    Region = Region->getAs<ElementRegion>()->getSuperRegion();
+  }
 
-  const MemRegion *LR = LV.getAsRegion();
+  ProgramStateRef State = C.getState();
+  if (const AllocKind *Kind = State->get<RegionState>(Region)) {
+    AKind = *Kind;
+    if (*Kind == AllocKind::Array)
+      return Region;
+    else
+      return nullptr;
+  }
+  // When the region is symbolic and we do not have any information about it,
+  // assume that this is an array to avoid false positives.
+  if (Region->getKind() == MemRegion::Kind::SymbolicRegionKind)
+    return Region;
 
-  if (!LR || !RV.isConstant())
+  // No AllocKind stored and not symbolic, assume that it points to a single
+  // object.
+  return nullptr;
+}
+
+void PointerArithChecker::reportPointerArithMisuse(const Expr *E,
+                                                   CheckerContext &C,
+                                                   bool PointedNeeded) const {
+  SourceRange SR = E->getSourceRange();
+  if (SR.isInvalid())
     return;
 
-  // If pointer arithmetic is done on variables of non-array type, this often
-  // means behavior rely on memory organization, which is dangerous.
-  if (isa<VarRegion>(LR) || isa<CodeTextRegion>(LR) ||
-      isa<CompoundLiteralRegion>(LR)) {
+  ProgramStateRef State = C.getState();
+  const MemRegion *Region =
+      State->getSVal(E, C.getLocationContext()).getAsRegion();
+  if (!Region)
+    return;
+  if (PointedNeeded)
+    Region = getPointedRegion(Region, C);
+  if (!Region)
+    return;
 
+  bool IsPolymorphic = false;
+  AllocKind Kind = AllocKind::Unknown;
+  if (const MemRegion *ArrayRegion =
+          getArrayRegion(Region, IsPolymorphic, Kind, C)) {
+    if (!IsPolymorphic)
+      return;
     if (ExplodedNode *N = C.generateNonFatalErrorNode()) {
-      if (!BT)
-        BT.reset(
-            new BuiltinBug(this, "Dangerous pointer arithmetic",
-                           "Pointer arithmetic done on non-array variables "
-                           "means reliance on memory layout, which is "
-                           "dangerous."));
-      auto R = llvm::make_unique<BugReport>(*BT, BT->getDescription(), N);
-      R->addRange(B->getSourceRange());
+      if (!BT_polyArray)
+        BT_polyArray.reset(new BuiltinBug(
+            this, "Dangerous pointer arithmetic",
+            "Pointer arithmetic on a pointer to base class is dangerous "
+            "because derived and base class may have different size."));
+      auto R = llvm::make_unique<BugReport>(*BT_polyArray,
+                                            BT_polyArray->getDescription(), N);
+      R->addRange(E->getSourceRange());
+      R->markInteresting(ArrayRegion);
       C.emitReport(std::move(R));
     }
+    return;
+  }
+
+  if (Kind == AllocKind::Reinterpreted)
+    return;
+
+  // We might not have enough information about symbolic regions.
+  if (Kind != AllocKind::SingleObject &&
+      Region->getKind() == MemRegion::Kind::SymbolicRegionKind)
+    return;
+
+  if (ExplodedNode *N = C.generateNonFatalErrorNode()) {
+    if (!BT_pointerArith)
+      BT_pointerArith.reset(new BuiltinBug(this, "Dangerous pointer arithmetic",
+                                           "Pointer arithmetic on non-array "
+                                           "variables relies on memory layout, "
+                                           "which is dangerous."));
+    auto R = llvm::make_unique<BugReport>(*BT_pointerArith,
+                                          BT_pointerArith->getDescription(), N);
+    R->addRange(SR);
+    R->markInteresting(Region);
+    C.emitReport(std::move(R));
+  }
+}
+
+void PointerArithChecker::initAllocIdentifiers(ASTContext &C) const {
+  if (!AllocFunctions.empty())
+    return;
+  AllocFunctions.insert(&C.Idents.get("alloca"));
+  AllocFunctions.insert(&C.Idents.get("malloc"));
+  AllocFunctions.insert(&C.Idents.get("realloc"));
+  AllocFunctions.insert(&C.Idents.get("calloc"));
+  AllocFunctions.insert(&C.Idents.get("valloc"));
+}
+
+void PointerArithChecker::checkPostStmt(const CallExpr *CE,
+                                        CheckerContext &C) const {
+  ProgramStateRef State = C.getState();
+  const FunctionDecl *FD = C.getCalleeDecl(CE);
+  if (!FD)
+    return;
+  IdentifierInfo *FunI = FD->getIdentifier();
+  initAllocIdentifiers(C.getASTContext());
+  if (AllocFunctions.count(FunI) == 0)
+    return;
+
+  SVal SV = State->getSVal(CE, C.getLocationContext());
+  const MemRegion *Region = SV.getAsRegion();
+  if (!Region)
+    return;
+  // Assume that C allocation functions allocate arrays to avoid false
+  // positives.
+  // TODO: Add heuristics to distinguish alloc calls that allocates single
+  // objecs.
+  State = State->set<RegionState>(Region, AllocKind::Array);
+  C.addTransition(State);
+}
+
+void PointerArithChecker::checkPostStmt(const CXXNewExpr *NE,
+                                        CheckerContext &C) const {
+  const FunctionDecl *FD = NE->getOperatorNew();
+  if (!FD)
+    return;
+
+  AllocKind Kind = getKindOfNewOp(NE, FD);
+
+  ProgramStateRef State = C.getState();
+  SVal AllocedVal = State->getSVal(NE, C.getLocationContext());
+  const MemRegion *Region = AllocedVal.getAsRegion();
+  if (!Region)
+    return;
+  State = State->set<RegionState>(Region, Kind);
+  C.addTransition(State);
+}
+
+void PointerArithChecker::checkPostStmt(const CastExpr *CE,
+                                        CheckerContext &C) const {
+  if (CE->getCastKind() != CastKind::CK_BitCast)
+    return;
+
+  const Expr *CastedExpr = CE->getSubExpr();
+  ProgramStateRef State = C.getState();
+  SVal CastedVal = State->getSVal(CastedExpr, C.getLocationContext());
+
+  const MemRegion *Region = CastedVal.getAsRegion();
+  if (!Region)
+    return;
+
+  // Suppress reinterpret casted hits.
+  State = State->set<RegionState>(Region, AllocKind::Reinterpreted);
+  C.addTransition(State);
+}
+
+void PointerArithChecker::checkPreStmt(const CastExpr *CE,
+                                       CheckerContext &C) const {
+  if (CE->getCastKind() != CastKind::CK_ArrayToPointerDecay)
+    return;
+
+  const Expr *CastedExpr = CE->getSubExpr();
+  ProgramStateRef State = C.getState();
+  SVal CastedVal = State->getSVal(CastedExpr, C.getLocationContext());
+
+  const MemRegion *Region = CastedVal.getAsRegion();
+  if (!Region)
+    return;
+
+  if (const AllocKind *Kind = State->get<RegionState>(Region)) {
+    if (*Kind == AllocKind::Array || *Kind == AllocKind::Reinterpreted)
+      return;
+  }
+  State = State->set<RegionState>(Region, AllocKind::Array);
+  C.addTransition(State);
+}
+
+void PointerArithChecker::checkPreStmt(const UnaryOperator *UOp,
+                                       CheckerContext &C) const {
+  if (!UOp->isIncrementDecrementOp() || !UOp->getType()->isPointerType())
+    return;
+  reportPointerArithMisuse(UOp->getSubExpr(), C, true);
+}
+
+void PointerArithChecker::checkPreStmt(const ArraySubscriptExpr *SubsExpr,
+                                       CheckerContext &C) const {
+  ProgramStateRef State = C.getState();
+  SVal Idx = State->getSVal(SubsExpr->getIdx(), C.getLocationContext());
+
+  // Indexing with 0 is OK.
+  if (Idx.isZeroConstant())
+    return;
+  reportPointerArithMisuse(SubsExpr->getBase(), C);
+}
+
+void PointerArithChecker::checkPreStmt(const BinaryOperator *BOp,
+                                       CheckerContext &C) const {
+  BinaryOperatorKind OpKind = BOp->getOpcode();
+  if (!BOp->isAdditiveOp() && OpKind != BO_AddAssign && OpKind != BO_SubAssign)
+    return;
+
+  const Expr *Lhs = BOp->getLHS();
+  const Expr *Rhs = BOp->getRHS();
+  ProgramStateRef State = C.getState();
+
+  if (Rhs->getType()->isIntegerType() && Lhs->getType()->isPointerType()) {
+    SVal RHSVal = State->getSVal(Rhs, C.getLocationContext());
+    if (State->isNull(RHSVal).isConstrainedTrue())
+      return;
+    reportPointerArithMisuse(Lhs, C, !BOp->isAdditiveOp());
+  }
+  // The int += ptr; case is not valid C++.
+  if (Lhs->getType()->isIntegerType() && Rhs->getType()->isPointerType()) {
+    SVal LHSVal = State->getSVal(Lhs, C.getLocationContext());
+    if (State->isNull(LHSVal).isConstrainedTrue())
+      return;
+    reportPointerArithMisuse(Rhs, C);
   }
 }
 
diff --git a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/RetainCountChecker.cpp b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/RetainCountChecker.cpp
index f983c3085635..b646127cfae7 100644
--- a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/RetainCountChecker.cpp
+++ b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/RetainCountChecker.cpp
@@ -12,8 +12,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "ClangSACheckers.h"
 #include "AllocationDiagnostics.h"
+#include "ClangSACheckers.h"
 #include "SelectorExtras.h"
 #include "clang/AST/Attr.h"
 #include "clang/AST/DeclCXX.h"
@@ -39,6 +39,7 @@
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringExtras.h"
 #include <cstdarg>
+#include <utility>
 
 using namespace clang;
 using namespace ento;
@@ -2683,7 +2684,7 @@ namespace {
 class StopTrackingCallback final : public SymbolVisitor {
   ProgramStateRef state;
 public:
-  StopTrackingCallback(ProgramStateRef st) : state(st) {}
+  StopTrackingCallback(ProgramStateRef st) : state(std::move(st)) {}
   ProgramStateRef getState() const { return state; }
 
   bool VisitSymbol(SymbolRef sym) override {
@@ -2832,14 +2833,6 @@ void RetainCountChecker::checkPostStmt(const ObjCBoxedExpr *Ex,
   C.addTransition(State);
 }
 
-static bool wasLoadedFromIvar(SymbolRef Sym) {
-  if (auto DerivedVal = dyn_cast<SymbolDerived>(Sym))
-    return isa<ObjCIvarRegion>(DerivedVal->getRegion());
-  if (auto RegionVal = dyn_cast<SymbolRegionValue>(Sym))
-    return isa<ObjCIvarRegion>(RegionVal->getRegion());
-  return false;
-}
-
 void RetainCountChecker::checkPostStmt(const ObjCIvarRefExpr *IRE,
                                        CheckerContext &C) const {
   Optional<Loc> IVarLoc = C.getSVal(IRE).getAs<Loc>();
@@ -2848,7 +2841,7 @@ void RetainCountChecker::checkPostStmt(const ObjCIvarRefExpr *IRE,
 
   ProgramStateRef State = C.getState();
   SymbolRef Sym = State->getSVal(*IVarLoc).getAsSymbol();
-  if (!Sym || !wasLoadedFromIvar(Sym))
+  if (!Sym || !dyn_cast_or_null<ObjCIvarRegion>(Sym->getOriginRegion()))
     return;
 
   // Accessing an ivar directly is unusual. If we've done that, be more
diff --git a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/SimpleStreamChecker.cpp b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/SimpleStreamChecker.cpp
index 7026a2ec16a1..ab4b4d3bd91b 100644
--- a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/SimpleStreamChecker.cpp
+++ b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/SimpleStreamChecker.cpp
@@ -20,6 +20,7 @@
 #include "clang/StaticAnalyzer/Core/Checker.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
+#include <utility>
 
 using namespace clang;
 using namespace ento;
@@ -51,14 +52,11 @@ class SimpleStreamChecker : public Checker<check::PostCall,
                                            check::PreCall,
                                            check::DeadSymbols,
                                            check::PointerEscape> {
-
-  mutable IdentifierInfo *IIfopen, *IIfclose;
+  CallDescription OpenFn, CloseFn;
 
   std::unique_ptr<BugType> DoubleCloseBugType;
   std::unique_ptr<BugType> LeakBugType;
 
-  void initIdentifierInfo(ASTContext &Ctx) const;
-
   void reportDoubleClose(SymbolRef FileDescSym,
                          const CallEvent &Call,
                          CheckerContext &C) const;
@@ -95,7 +93,7 @@ namespace {
 class StopTrackingCallback final : public SymbolVisitor {
   ProgramStateRef state;
 public:
-  StopTrackingCallback(ProgramStateRef st) : state(st) {}
+  StopTrackingCallback(ProgramStateRef st) : state(std::move(st)) {}
   ProgramStateRef getState() const { return state; }
 
   bool VisitSymbol(SymbolRef sym) override {
@@ -106,7 +104,7 @@ public:
 } // end anonymous namespace
 
 SimpleStreamChecker::SimpleStreamChecker()
-    : IIfopen(nullptr), IIfclose(nullptr) {
+    : OpenFn("fopen"), CloseFn("fclose", 1) {
   // Initialize the bug types.
   DoubleCloseBugType.reset(
       new BugType(this, "Double fclose", "Unix Stream API Error"));
@@ -119,12 +117,10 @@ SimpleStreamChecker::SimpleStreamChecker()
 
 void SimpleStreamChecker::checkPostCall(const CallEvent &Call,
                                         CheckerContext &C) const {
-  initIdentifierInfo(C.getASTContext());
-
   if (!Call.isGlobalCFunction())
     return;
 
-  if (Call.getCalleeIdentifier() != IIfopen)
+  if (!Call.isCalled(OpenFn))
     return;
 
   // Get the symbolic value corresponding to the file handle.
@@ -140,15 +136,10 @@ void SimpleStreamChecker::checkPostCall(const CallEvent &Call,
 
 void SimpleStreamChecker::checkPreCall(const CallEvent &Call,
                                        CheckerContext &C) const {
-  initIdentifierInfo(C.getASTContext());
-
   if (!Call.isGlobalCFunction())
     return;
 
-  if (Call.getCalleeIdentifier() != IIfclose)
-    return;
-
-  if (Call.getNumArgs() != 1)
+  if (!Call.isCalled(CloseFn))
     return;
 
   // Get the symbolic value corresponding to the file handle.
@@ -275,13 +266,6 @@ SimpleStreamChecker::checkPointerEscape(ProgramStateRef State,
   return State;
 }
 
-void SimpleStreamChecker::initIdentifierInfo(ASTContext &Ctx) const {
-  if (IIfopen)
-    return;
-  IIfopen = &Ctx.Idents.get("fopen");
-  IIfclose = &Ctx.Idents.get("fclose");
-}
-
 void ento::registerSimpleStreamChecker(CheckerManager &mgr) {
   mgr.registerChecker<SimpleStreamChecker>();
 }
diff --git a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/StackAddrEscapeChecker.cpp b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/StackAddrEscapeChecker.cpp
index 79fc701d6d58..556274d0edb6 100644
--- a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/StackAddrEscapeChecker.cpp
+++ b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/StackAddrEscapeChecker.cpp
@@ -236,7 +236,12 @@ void StackAddrEscapeChecker::checkEndFunction(CheckerContext &Ctx) const {
     SmallString<512> buf;
     llvm::raw_svector_ostream os(buf);
     SourceRange range = genName(os, cb.V[i].second, Ctx.getASTContext());
-    os << " is still referred to by the global variable '";
+    os << " is still referred to by the ";
+    if (isa<StaticGlobalSpaceRegion>(cb.V[i].first->getMemorySpace()))
+      os << "static";
+    else
+      os << "global";
+    os << " variable '";
     const VarRegion *VR = cast<VarRegion>(cb.V[i].first->getBaseRegion());
     os << *VR->getDecl()
        << "' upon returning to the caller.  This will be a dangling reference";
diff --git a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/TraversalChecker.cpp b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/TraversalChecker.cpp
index d02d2df1c507..8ad962875b06 100644
--- a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/TraversalChecker.cpp
+++ b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/TraversalChecker.cpp
@@ -25,9 +25,11 @@ using namespace ento;
 
 namespace {
 class TraversalDumper : public Checker< check::BranchCondition,
+                                        check::BeginFunction,
                                         check::EndFunction > {
 public:
   void checkBranchCondition(const Stmt *Condition, CheckerContext &C) const;
+  void checkBeginFunction(CheckerContext &C) const;
   void checkEndFunction(CheckerContext &C) const;
 };
 }
@@ -50,6 +52,10 @@ void TraversalDumper::checkBranchCondition(const Stmt *Condition,
                << Parent->getStmtClassName() << "\n";
 }
 
+void TraversalDumper::checkBeginFunction(CheckerContext &C) const {
+  llvm::outs() << "--BEGIN FUNCTION--\n";
+}
+
 void TraversalDumper::checkEndFunction(CheckerContext &C) const {
   llvm::outs() << "--END FUNCTION--\n";
 }
diff --git a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/UndefBranchChecker.cpp b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/UndefBranchChecker.cpp
index ed17610e4116..0a274292aa39 100644
--- a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/UndefBranchChecker.cpp
+++ b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/UndefBranchChecker.cpp
@@ -17,6 +17,7 @@
 #include "clang/StaticAnalyzer/Core/Checker.h"
 #include "clang/StaticAnalyzer/Core/CheckerManager.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
+#include <utility>
 
 using namespace clang;
 using namespace ento;
@@ -31,7 +32,7 @@ class UndefBranchChecker : public Checker<check::BranchCondition> {
     const LocationContext *LCtx;
 
     FindUndefExpr(ProgramStateRef S, const LocationContext *L)
-      : St(S), LCtx(L) {}
+        : St(std::move(S)), LCtx(L) {}
 
     const Expr *FindExpr(const Expr *Ex) {
       if (!MatchesCriteria(Ex))
diff --git a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/UnreachableCodeChecker.cpp b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/UnreachableCodeChecker.cpp
index a03abce9626b..892e713d241f 100644
--- a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/UnreachableCodeChecker.cpp
+++ b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/UnreachableCodeChecker.cpp
@@ -26,10 +26,6 @@
 #include "clang/StaticAnalyzer/Core/PathSensitive/SVals.h"
 #include "llvm/ADT/SmallSet.h"
 
-// The number of CFGBlock pointers we want to reserve memory for. This is used
-// once for each function we analyze.
-#define DEFAULT_CFGBLOCKS 256
-
 using namespace clang;
 using namespace ento;
 
@@ -39,7 +35,7 @@ public:
   void checkEndAnalysis(ExplodedGraph &G, BugReporter &B,
                         ExprEngine &Eng) const;
 private:
-  typedef llvm::SmallSet<unsigned, DEFAULT_CFGBLOCKS> CFGBlocksSet;
+  typedef llvm::SmallSet<unsigned, 32> CFGBlocksSet;
 
   static inline const Stmt *getUnreachableStmt(const CFGBlock *CB);
   static void FindUnreachableEntryPoints(const CFGBlock *CB,
diff --git a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/VLASizeChecker.cpp b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/VLASizeChecker.cpp
index e3b2ed222363..40217bdee892 100644
--- a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/VLASizeChecker.cpp
+++ b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/VLASizeChecker.cpp
@@ -76,7 +76,6 @@ void VLASizeChecker::reportBug(VLASize_Kind Kind,
   report->addRange(SizeE->getSourceRange());
   bugreporter::trackNullOrUndefValue(N, SizeE, *report);
   C.emitReport(std::move(report));
-  return;
 }
 
 void VLASizeChecker::checkPreStmt(const DeclStmt *DS, CheckerContext &C) const {
diff --git a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/VforkChecker.cpp b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/VforkChecker.cpp
index 26ffee827cff..75aefc0e8384 100644
--- a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/VforkChecker.cpp
+++ b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/VforkChecker.cpp
@@ -54,10 +54,10 @@ class VforkChecker : public Checker<check::PreCall, check::PostCall,
   bool isCallWhitelisted(const IdentifierInfo *II, CheckerContext &C) const;
 
   void reportBug(const char *What, CheckerContext &C,
-                 const char *Details = 0) const;
+                 const char *Details = nullptr) const;
 
 public:
-  VforkChecker() : II_vfork(0) {}
+  VforkChecker() : II_vfork(nullptr) {}
 
   void checkPreCall(const CallEvent &Call, CheckerContext &C) const;
   void checkPostCall(const CallEvent &Call, CheckerContext &C) const;
@@ -107,7 +107,7 @@ bool VforkChecker::isCallWhitelisted(const IdentifierInfo *II,
       "execv",
       "execvp",
       "execvpe",
-      0,
+      nullptr
     };
 
     ASTContext &AC = C.getASTContext();
diff --git a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/BugReporter.cpp b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/BugReporter.cpp
index 11be764633cf..488126b0088a 100644
--- a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/BugReporter.cpp
+++ b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/BugReporter.cpp
@@ -2922,7 +2922,7 @@ bool TrimmedGraph::popNextReportGraph(ReportGraph &GraphWrapper) {
   while (true) {
     // Create the equivalent node in the new graph with the same state
     // and location.
-    ExplodedNode *NewN = GNew->getNode(OrigN->getLocation(), OrigN->getState(),
+    ExplodedNode *NewN = GNew->createUncachedNode(OrigN->getLocation(), OrigN->getState(),
                                        OrigN->isSink());
 
     // Store the mapping to the original node.
@@ -3487,7 +3487,7 @@ LLVM_DUMP_METHOD void PathPieces::dump() const {
   }
 }
 
-void PathDiagnosticCallPiece::dump() const {
+LLVM_DUMP_METHOD void PathDiagnosticCallPiece::dump() const {
   llvm::errs() << "CALL\n--------------\n";
 
   if (const Stmt *SLoc = getLocStmt(getLocation()))
@@ -3498,26 +3498,26 @@ void PathDiagnosticCallPiece::dump() const {
     getLocation().dump();
 }
 
-void PathDiagnosticEventPiece::dump() const {
+LLVM_DUMP_METHOD void PathDiagnosticEventPiece::dump() const {
   llvm::errs() << "EVENT\n--------------\n";
   llvm::errs() << getString() << "\n";
   llvm::errs() << " ---- at ----\n";
   getLocation().dump();
 }
 
-void PathDiagnosticControlFlowPiece::dump() const {
+LLVM_DUMP_METHOD void PathDiagnosticControlFlowPiece::dump() const {
   llvm::errs() << "CONTROL\n--------------\n";
   getStartLocation().dump();
   llvm::errs() << " ---- to ----\n";
   getEndLocation().dump();
 }
 
-void PathDiagnosticMacroPiece::dump() const {
+LLVM_DUMP_METHOD void PathDiagnosticMacroPiece::dump() const {
   llvm::errs() << "MACRO\n--------------\n";
   // FIXME: Print which macro is being invoked.
 }
 
-void PathDiagnosticLocation::dump() const {
+LLVM_DUMP_METHOD void PathDiagnosticLocation::dump() const {
   if (!isValid()) {
     llvm::errs() << "<INVALID>\n";
     return;
diff --git a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/BugReporterVisitors.cpp b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/BugReporterVisitors.cpp
index cf1e0a6a656c..0e505463bb5e 100644
--- a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/BugReporterVisitors.cpp
+++ b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/BugReporterVisitors.cpp
@@ -14,6 +14,7 @@
 #include "clang/StaticAnalyzer/Core/BugReporter/BugReporterVisitor.h"
 #include "clang/AST/Expr.h"
 #include "clang/AST/ExprObjC.h"
+#include "clang/Analysis/CFGStmtMap.h"
 #include "clang/StaticAnalyzer/Core/BugReporter/BugReporter.h"
 #include "clang/StaticAnalyzer/Core/BugReporter/PathDiagnostic.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h"
@@ -323,6 +324,9 @@ public:
     }
 
     PathDiagnosticLocation L(Ret, BRC.getSourceManager(), StackFrame);
+    if (!L.isValid() || !L.asLocation().isValid())
+      return nullptr;
+
     return new PathDiagnosticEventPiece(L, Out.str());
   }
 
@@ -828,8 +832,53 @@ SuppressInlineDefensiveChecksVisitor::VisitNode(const ExplodedNode *Succ,
     // Check if this is inlined defensive checks.
     const LocationContext *CurLC =Succ->getLocationContext();
     const LocationContext *ReportLC = BR.getErrorNode()->getLocationContext();
-    if (CurLC != ReportLC && !CurLC->isParentOf(ReportLC))
+    if (CurLC != ReportLC && !CurLC->isParentOf(ReportLC)) {
       BR.markInvalid("Suppress IDC", CurLC);
+      return nullptr;
+    }
+
+    // Treat defensive checks in function-like macros as if they were an inlined
+    // defensive check. If the bug location is not in a macro and the
+    // terminator for the current location is in a macro then suppress the
+    // warning.
+    auto BugPoint = BR.getErrorNode()->getLocation().getAs<StmtPoint>();
+
+    if (!BugPoint)
+      return nullptr;
+
+    SourceLocation BugLoc = BugPoint->getStmt()->getLocStart();
+    if (BugLoc.isMacroID())
+      return nullptr;
+
+    ProgramPoint CurPoint = Succ->getLocation();
+    const Stmt *CurTerminatorStmt = nullptr;
+    if (auto BE = CurPoint.getAs<BlockEdge>()) {
+      CurTerminatorStmt = BE->getSrc()->getTerminator().getStmt();
+    } else if (auto SP = CurPoint.getAs<StmtPoint>()) {
+      const Stmt *CurStmt = SP->getStmt();
+      if (!CurStmt->getLocStart().isMacroID())
+        return nullptr;
+
+      CFGStmtMap *Map = CurLC->getAnalysisDeclContext()->getCFGStmtMap();
+      CurTerminatorStmt = Map->getBlock(CurStmt)->getTerminator();
+    } else {
+      return nullptr;
+    }
+
+    if (!CurTerminatorStmt)
+      return nullptr;
+
+    SourceLocation TerminatorLoc = CurTerminatorStmt->getLocStart();
+    if (TerminatorLoc.isMacroID()) {
+      const SourceManager &SMgr = BRC.getSourceManager();
+      std::pair<FileID, unsigned> TLInfo = SMgr.getDecomposedLoc(TerminatorLoc);
+      SrcMgr::SLocEntry SE = SMgr.getSLocEntry(TLInfo.first);
+      const SrcMgr::ExpansionInfo &EInfo = SE.getExpansion();
+      if (EInfo.isFunctionMacroExpansion()) {
+        BR.markInvalid("Suppress Macro IDC", CurLC);
+        return nullptr;
+      }
+    }
   }
   return nullptr;
 }
@@ -862,6 +911,15 @@ static const Expr *peelOffOuterExpr(const Expr *Ex,
     return peelOffOuterExpr(EWC->getSubExpr(), N);
   if (const OpaqueValueExpr *OVE = dyn_cast<OpaqueValueExpr>(Ex))
     return peelOffOuterExpr(OVE->getSourceExpr(), N);
+  if (auto *POE = dyn_cast<PseudoObjectExpr>(Ex)) {
+    auto *PropRef = dyn_cast<ObjCPropertyRefExpr>(POE->getSyntacticForm());
+    if (PropRef && PropRef->isMessagingGetter()) {
+      const Expr *GetterMessageSend =
+          POE->getSemanticExpr(POE->getNumSemanticExprs() - 1);
+      assert(isa<ObjCMessageExpr>(GetterMessageSend));
+      return peelOffOuterExpr(GetterMessageSend, N);
+    }
+  }
 
   // Peel off the ternary operator.
   if (const ConditionalOperator *CO = dyn_cast<ConditionalOperator>(Ex)) {
@@ -1494,20 +1552,6 @@ ConditionBRVisitor::VisitTrueTest(const Expr *Cond,
   return event;
 }
 
-
-// FIXME: Copied from ExprEngineCallAndReturn.cpp.
-static bool isInStdNamespace(const Decl *D) {
-  const DeclContext *DC = D->getDeclContext()->getEnclosingNamespaceContext();
-  const NamespaceDecl *ND = dyn_cast<NamespaceDecl>(DC);
-  if (!ND)
-    return false;
-
-  while (const NamespaceDecl *Parent = dyn_cast<NamespaceDecl>(ND->getParent()))
-    ND = Parent;
-
-  return ND->isStdNamespace();
-}
-
 std::unique_ptr<PathDiagnosticPiece>
 LikelyFalsePositiveSuppressionBRVisitor::getEndPath(BugReporterContext &BRC,
                                                     const ExplodedNode *N,
@@ -1518,7 +1562,7 @@ LikelyFalsePositiveSuppressionBRVisitor::getEndPath(BugReporterContext &BRC,
   AnalyzerOptions &Options = Eng.getAnalysisManager().options;
   const Decl *D = N->getLocationContext()->getDecl();
 
-  if (isInStdNamespace(D)) {
+  if (AnalysisDeclContext::isInStdNamespace(D)) {
     // Skip reports within the 'std' namespace. Although these can sometimes be
     // the user's fault, we currently don't report them very well, and
     // Note that this will not help for any other data structure libraries, like
@@ -1552,12 +1596,6 @@ LikelyFalsePositiveSuppressionBRVisitor::getEndPath(BugReporterContext &BRC,
         }
       }
 
-      // The analyzer issues a false positive on
-      //   std::basic_string<uint8_t> v; v.push_back(1);
-      // and
-      //   std::u16string s; s += u'a';
-      // because we cannot reason about the internal invariants of the
-      // datastructure.
       for (const LocationContext *LCtx = N->getLocationContext(); LCtx;
            LCtx = LCtx->getParent()) {
         const CXXMethodDecl *MD = dyn_cast<CXXMethodDecl>(LCtx->getDecl());
@@ -1565,10 +1603,24 @@ LikelyFalsePositiveSuppressionBRVisitor::getEndPath(BugReporterContext &BRC,
           continue;
 
         const CXXRecordDecl *CD = MD->getParent();
+        // The analyzer issues a false positive on
+        //   std::basic_string<uint8_t> v; v.push_back(1);
+        // and
+        //   std::u16string s; s += u'a';
+        // because we cannot reason about the internal invariants of the
+        // datastructure.
         if (CD->getName() == "basic_string") {
           BR.markInvalid(getTag(), nullptr);
           return nullptr;
         }
+
+        // The analyzer issues a false positive on
+        //    std::shared_ptr<int> p(new int(1)); p = nullptr;
+        // because it does not reason properly about temporary destructors.
+        if (CD->getName() == "shared_ptr") {
+          BR.markInvalid(getTag(), nullptr);
+          return nullptr;
+        }
       }
     }
   }
diff --git a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/CallEvent.cpp b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/CallEvent.cpp
index 69af09b25b6e..52613186677a 100644
--- a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/CallEvent.cpp
+++ b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/CallEvent.cpp
@@ -177,7 +177,7 @@ ProgramStateRef CallEvent::invalidateRegions(unsigned BlockCount,
     // below for efficiency.
     if (PreserveArgs.count(Idx))
       if (const MemRegion *MR = getArgSVal(Idx).getAsRegion())
-        ETraits.setTrait(MR->StripCasts(),
+        ETraits.setTrait(MR->getBaseRegion(),
                         RegionAndSymbolInvalidationTraits::TK_PreserveContents);
         // TODO: Factor this out + handle the lower level const pointers.
 
@@ -210,6 +210,16 @@ ProgramPoint CallEvent::getProgramPoint(bool IsPreVisit,
   return PostImplicitCall(D, Loc, getLocationContext(), Tag);
 }
 
+bool CallEvent::isCalled(const CallDescription &CD) const {
+  assert(getKind() != CE_ObjCMessage && "Obj-C methods are not supported");
+  if (!CD.II)
+    CD.II = &getState()->getStateManager().getContext().Idents.get(CD.FuncName);
+  if (getCalleeIdentifier() != CD.II)
+    return false;
+  return (CD.RequiredArgs == CallDescription::NoArgRequirement ||
+          CD.RequiredArgs == getNumArgs());
+}
+
 SVal CallEvent::getArgSVal(unsigned Index) const {
   const Expr *ArgE = getArgExpr(Index);
   if (!ArgE)
@@ -668,9 +678,26 @@ ArrayRef<ParmVarDecl*> ObjCMethodCall::parameters() const {
   return D->parameters();
 }
 
-void
-ObjCMethodCall::getExtraInvalidatedValues(ValueList &Values,
-                  RegionAndSymbolInvalidationTraits *ETraits) const {
+void ObjCMethodCall::getExtraInvalidatedValues(
+    ValueList &Values, RegionAndSymbolInvalidationTraits *ETraits) const {
+
+  // If the method call is a setter for property known to be backed by
+  // an instance variable, don't invalidate the entire receiver, just
+  // the storage for that instance variable.
+  if (const ObjCPropertyDecl *PropDecl = getAccessedProperty()) {
+    if (const ObjCIvarDecl *PropIvar = PropDecl->getPropertyIvarDecl()) {
+      SVal IvarLVal = getState()->getLValue(PropIvar, getReceiverSVal());
+      const MemRegion *IvarRegion = IvarLVal.getAsRegion();
+      ETraits->setTrait(
+          IvarRegion,
+          RegionAndSymbolInvalidationTraits::TK_DoNotInvalidateSuperRegion);
+      ETraits->setTrait(IvarRegion,
+                        RegionAndSymbolInvalidationTraits::TK_SuppressEscape);
+      Values.push_back(IvarLVal);
+      return;
+    }
+  }
+
   Values.push_back(getReceiverSVal());
 }
 
@@ -730,6 +757,18 @@ const PseudoObjectExpr *ObjCMethodCall::getContainingPseudoObjectExpr() const {
   return ObjCMessageDataTy::getFromOpaqueValue(Data).getPointer();
 }
 
+static const Expr *
+getSyntacticFromForPseudoObjectExpr(const PseudoObjectExpr *POE) {
+  const Expr *Syntactic = POE->getSyntacticForm();
+
+  // This handles the funny case of assigning to the result of a getter.
+  // This can happen if the getter returns a non-const reference.
+  if (const BinaryOperator *BO = dyn_cast<BinaryOperator>(Syntactic))
+    Syntactic = BO->getLHS();
+
+  return Syntactic;
+}
+
 ObjCMessageKind ObjCMethodCall::getMessageKind() const {
   if (!Data) {
 
@@ -739,12 +778,7 @@ ObjCMessageKind ObjCMethodCall::getMessageKind() const {
 
     // Check if parent is a PseudoObjectExpr.
     if (const PseudoObjectExpr *POE = dyn_cast_or_null<PseudoObjectExpr>(S)) {
-      const Expr *Syntactic = POE->getSyntacticForm();
-
-      // This handles the funny case of assigning to the result of a getter.
-      // This can happen if the getter returns a non-const reference.
-      if (const BinaryOperator *BO = dyn_cast<BinaryOperator>(Syntactic))
-        Syntactic = BO->getLHS();
+      const Expr *Syntactic = getSyntacticFromForPseudoObjectExpr(POE);
 
       ObjCMessageKind K;
       switch (Syntactic->getStmtClass()) {
@@ -780,6 +814,27 @@ ObjCMessageKind ObjCMethodCall::getMessageKind() const {
   return static_cast<ObjCMessageKind>(Info.getInt());
 }
 
+const ObjCPropertyDecl *ObjCMethodCall::getAccessedProperty() const {
+  // Look for properties accessed with property syntax (foo.bar = ...)
+  if ( getMessageKind() == OCM_PropertyAccess) {
+    const PseudoObjectExpr *POE = getContainingPseudoObjectExpr();
+    assert(POE && "Property access without PseudoObjectExpr?");
+
+    const Expr *Syntactic = getSyntacticFromForPseudoObjectExpr(POE);
+    auto *RefExpr = cast<ObjCPropertyRefExpr>(Syntactic);
+
+    if (RefExpr->isExplicitProperty())
+      return RefExpr->getExplicitProperty();
+  }
+
+  // Look for properties accessed with method syntax ([foo setBar:...]).
+  const ObjCMethodDecl *MD = getDecl();
+  if (!MD || !MD->isPropertyAccessor())
+    return nullptr;
+
+  // Note: This is potentially quite slow.
+  return MD->findPropertyDecl();
+}
 
 bool ObjCMethodCall::canBeOverridenInSubclass(ObjCInterfaceDecl *IDecl,
                                              Selector Sel) const {
@@ -903,8 +958,30 @@ RuntimeDefinition ObjCMethodCall::getRuntimeDefinition() const {
           // even if we don't actually have an implementation.
           if (!*Val)
             if (const ObjCMethodDecl *CompileTimeMD = E->getMethodDecl())
-              if (CompileTimeMD->isPropertyAccessor())
-                Val = IDecl->lookupInstanceMethod(Sel);
+              if (CompileTimeMD->isPropertyAccessor()) {
+                if (!CompileTimeMD->getSelfDecl() &&
+                    isa<ObjCCategoryDecl>(CompileTimeMD->getDeclContext())) {
+                  // If the method is an accessor in a category, and it doesn't
+                  // have a self declaration, first
+                  // try to find the method in a class extension. This
+                  // works around a bug in Sema where multiple accessors
+                  // are synthesized for properties in class
+                  // extensions that are redeclared in a category and the
+                  // the implicit parameters are not filled in for
+                  // the method on the category.
+                  // This ensures we find the accessor in the extension, which
+                  // has the implicit parameters filled in.
+                  auto *ID = CompileTimeMD->getClassInterface();
+                  for (auto *CatDecl : ID->visible_extensions()) {
+                    Val = CatDecl->getMethod(Sel,
+                                             CompileTimeMD->isInstanceMethod());
+                    if (*Val)
+                      break;
+                  }
+                }
+                if (!*Val)
+                  Val = IDecl->lookupInstanceMethod(Sel);
+              }
         }
 
         const ObjCMethodDecl *MD = Val.getValue();
diff --git a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/CheckerContext.cpp b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/CheckerContext.cpp
index 5ec8bfa80074..548b06ef91fc 100644
--- a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/CheckerContext.cpp
+++ b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/CheckerContext.cpp
@@ -35,6 +35,13 @@ StringRef CheckerContext::getCalleeName(const FunctionDecl *FunDecl) const {
   return funI->getName();
 }
 
+StringRef CheckerContext::getDeclDescription(const Decl *D) {
+  if (isa<ObjCMethodDecl>(D) || isa<CXXMethodDecl>(D))
+    return "method";
+  if (isa<BlockDecl>(D))
+    return "anonymous block";
+  return "function";
+}
 
 bool CheckerContext::isCLibraryFunction(const FunctionDecl *FD,
                                         StringRef Name) {
diff --git a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/CheckerHelpers.cpp b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/CheckerHelpers.cpp
index d6aeceb1457d..ed41914ebd05 100644
--- a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/CheckerHelpers.cpp
+++ b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/CheckerHelpers.cpp
@@ -75,8 +75,8 @@ bool clang::ento::containsBuiltinOffsetOf(const Stmt *S) {
 // Extract lhs and rhs from assignment statement
 std::pair<const clang::VarDecl *, const clang::Expr *>
 clang::ento::parseAssignment(const Stmt *S) {
-  const VarDecl *VD = 0;
-  const Expr *RHS = 0;
+  const VarDecl *VD = nullptr;
+  const Expr *RHS = nullptr;
 
   if (auto Assign = dyn_cast_or_null<BinaryOperator>(S)) {
     if (Assign->isAssignmentOp()) {
diff --git a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/CheckerManager.cpp b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/CheckerManager.cpp
index 008e8ef31cda..d8382e88691a 100644
--- a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/CheckerManager.cpp
+++ b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/CheckerManager.cpp
@@ -377,6 +377,40 @@ void CheckerManager::runCheckersForEndAnalysis(ExplodedGraph &G,
     EndAnalysisCheckers[i](G, BR, Eng);
 }
 
+namespace {
+struct CheckBeginFunctionContext {
+  typedef std::vector<CheckerManager::CheckBeginFunctionFunc> CheckersTy;
+  const CheckersTy &Checkers;
+  ExprEngine &Eng;
+  const ProgramPoint &PP;
+
+  CheckersTy::const_iterator checkers_begin() { return Checkers.begin(); }
+  CheckersTy::const_iterator checkers_end() { return Checkers.end(); }
+
+  CheckBeginFunctionContext(const CheckersTy &Checkers, ExprEngine &Eng,
+                            const ProgramPoint &PP)
+      : Checkers(Checkers), Eng(Eng), PP(PP) {}
+
+  void runChecker(CheckerManager::CheckBeginFunctionFunc checkFn,
+                  NodeBuilder &Bldr, ExplodedNode *Pred) {
+    const ProgramPoint &L = PP.withTag(checkFn.Checker);
+    CheckerContext C(Bldr, Eng, Pred, L);
+
+    checkFn(C);
+  }
+};
+}
+
+void CheckerManager::runCheckersForBeginFunction(ExplodedNodeSet &Dst,
+                                                 const BlockEdge &L,
+                                                 ExplodedNode *Pred,
+                                                 ExprEngine &Eng) {
+  ExplodedNodeSet Src;
+  Src.insert(Pred);
+  CheckBeginFunctionContext C(BeginFunctionCheckers, Eng, L);
+  expandGraphWithCheckers(C, Dst, Src);
+}
+
 /// \brief Run checkers for end of path.
 // Note, We do not chain the checker output (like in expandGraphWithCheckers)
 // for this callback since end of path nodes are expected to be final.
@@ -671,6 +705,10 @@ void CheckerManager::_registerForEndAnalysis(CheckEndAnalysisFunc checkfn) {
   EndAnalysisCheckers.push_back(checkfn);
 }
 
+void CheckerManager::_registerForBeginFunction(CheckBeginFunctionFunc checkfn) {
+  BeginFunctionCheckers.push_back(checkfn);
+}
+
 void CheckerManager::_registerForEndFunction(CheckEndFunctionFunc checkfn) {
   EndFunctionCheckers.push_back(checkfn);
 }
diff --git a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/CheckerRegistry.cpp b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/CheckerRegistry.cpp
index a15e1573e228..ba03e2f8a3c1 100644
--- a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/CheckerRegistry.cpp
+++ b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/CheckerRegistry.cpp
@@ -49,12 +49,12 @@ static void collectCheckers(const CheckerRegistry::CheckerInfoList &checkers,
                             CheckerOptInfo &opt, CheckerInfoSet &collected) {
   // Use a binary search to find the possible start of the package.
   CheckerRegistry::CheckerInfo packageInfo(nullptr, opt.getName(), "");
-  CheckerRegistry::CheckerInfoList::const_iterator e = checkers.end();
+  auto end = checkers.cend();
   CheckerRegistry::CheckerInfoList::const_iterator i =
-    std::lower_bound(checkers.begin(), e, packageInfo, checkerNameLT);
+    std::lower_bound(checkers.cbegin(), end, packageInfo, checkerNameLT);
 
   // If we didn't even find a possible package, give up.
-  if (i == e)
+  if (i == end)
     return;
 
   // If what we found doesn't actually start the package, give up.
@@ -73,7 +73,7 @@ static void collectCheckers(const CheckerRegistry::CheckerInfoList &checkers,
     size = packageSize->getValue();
 
   // Step through all the checkers in the package.
-  for (e = i+size; i != e; ++i) {
+  for (auto checkEnd = i+size; i != checkEnd; ++i) {
     if (opt.isEnabled())
       collected.insert(&*i);
     else
diff --git a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/CoreEngine.cpp b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/CoreEngine.cpp
index 39cf7e771755..da608f6c7558 100644
--- a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/CoreEngine.cpp
+++ b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/CoreEngine.cpp
@@ -192,14 +192,27 @@ bool CoreEngine::ExecuteWorkList(const LocationContext *L, unsigned Steps,
     WList->setBlockCounter(BCounterFactory.GetEmptyCounter());
 
     if (!InitState)
-      // Generate the root.
-      generateNode(StartLoc, SubEng.getInitialState(L), nullptr);
-    else
-      generateNode(StartLoc, InitState, nullptr);
+      InitState = SubEng.getInitialState(L);
+
+    bool IsNew;
+    ExplodedNode *Node = G.getNode(StartLoc, InitState, false, &IsNew);
+    assert (IsNew);
+    G.addRoot(Node);
+
+    NodeBuilderContext BuilderCtx(*this, StartLoc.getDst(), Node);
+    ExplodedNodeSet DstBegin;
+    SubEng.processBeginOfFunction(BuilderCtx, Node, DstBegin, StartLoc);
+
+    enqueue(DstBegin);
   }
 
   // Check if we have a steps limit
   bool UnlimitedSteps = Steps == 0;
+  // Cap our pre-reservation in the event that the user specifies
+  // a very large number of maximum steps.
+  const unsigned PreReservationCap = 4000000;
+  if(!UnlimitedSteps)
+    G.reserve(std::min(Steps,PreReservationCap));
 
   while (WList->hasWork()) {
     if (!UnlimitedSteps) {
@@ -243,8 +256,7 @@ void CoreEngine::dispatchWorkItem(ExplodedNode* Pred, ProgramPoint Loc,
       break;
 
     case ProgramPoint::CallEnterKind: {
-      CallEnter CEnter = Loc.castAs<CallEnter>();
-      SubEng.processCallEnter(CEnter, Pred);
+      HandleCallEnter(Loc.castAs<CallEnter>(), Pred);
       break;
     }
 
@@ -456,6 +468,11 @@ void CoreEngine::HandleBlockExit(const CFGBlock * B, ExplodedNode *Pred) {
                Pred->State, Pred);
 }
 
+void CoreEngine::HandleCallEnter(const CallEnter &CE, ExplodedNode *Pred) {
+  NodeBuilderContext BuilderCtx(*this, CE.getEntry(), Pred);
+  SubEng.processCallEnter(BuilderCtx, CE, Pred);
+}
+
 void CoreEngine::HandleBranch(const Stmt *Cond, const Stmt *Term,
                                 const CFGBlock * B, ExplodedNode *Pred) {
   assert(B->succ_size() == 2);
diff --git a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/ExplodedGraph.cpp b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/ExplodedGraph.cpp
index 8a09720b2a19..02d382cc4885 100644
--- a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/ExplodedGraph.cpp
+++ b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/ExplodedGraph.cpp
@@ -336,6 +336,14 @@ ExplodedNode *ExplodedGraph::getNode(const ProgramPoint &L,
   return V;
 }
 
+ExplodedNode *ExplodedGraph::createUncachedNode(const ProgramPoint &L,
+                                                ProgramStateRef State,
+                                                bool IsSink) {
+  NodeTy *V = (NodeTy *) getAllocator().Allocate<NodeTy>();
+  new (V) NodeTy(L, State, IsSink);
+  return V;
+}
+
 std::unique_ptr<ExplodedGraph>
 ExplodedGraph::trim(ArrayRef<const NodeTy *> Sinks,
                     InterExplodedGraphMap *ForwardMap,
@@ -395,8 +403,7 @@ ExplodedGraph::trim(ArrayRef<const NodeTy *> Sinks,
 
     // Create the corresponding node in the new graph and record the mapping
     // from the old node to the new node.
-    ExplodedNode *NewN = G->getNode(N->getLocation(), N->State, N->isSink(),
-                                    nullptr);
+    ExplodedNode *NewN = G->createUncachedNode(N->getLocation(), N->State, N->isSink());
     Pass2[N] = NewN;
 
     // Also record the reverse mapping from the new node to the old node.
diff --git a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
index 662b0a2dd798..405aecdee032 100644
--- a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
+++ b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
@@ -30,6 +30,7 @@
 #include "llvm/ADT/ImmutableList.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/SaveAndRestore.h"
 
 #ifndef NDEBUG
 #include "llvm/Support/GraphWriter.h"
@@ -754,6 +755,7 @@ void ExprEngine::Visit(const Stmt *S, ExplodedNode *Pred,
     // C++ and ARC stuff we don't support yet.
     case Expr::ObjCIndirectCopyRestoreExprClass:
     case Stmt::CXXDependentScopeMemberExprClass:
+    case Stmt::CXXInheritedCtorInitExprClass:
     case Stmt::CXXTryStmtClass:
     case Stmt::CXXTypeidExprClass:
     case Stmt::CXXUuidofExprClass:
@@ -830,12 +832,21 @@ void ExprEngine::Visit(const Stmt *S, ExplodedNode *Pred,
     case Stmt::OMPAtomicDirectiveClass:
     case Stmt::OMPTargetDirectiveClass:
     case Stmt::OMPTargetDataDirectiveClass:
+    case Stmt::OMPTargetEnterDataDirectiveClass:
+    case Stmt::OMPTargetExitDataDirectiveClass:
+    case Stmt::OMPTargetParallelDirectiveClass:
+    case Stmt::OMPTargetParallelForDirectiveClass:
+    case Stmt::OMPTargetUpdateDirectiveClass:
     case Stmt::OMPTeamsDirectiveClass:
     case Stmt::OMPCancellationPointDirectiveClass:
     case Stmt::OMPCancelDirectiveClass:
     case Stmt::OMPTaskLoopDirectiveClass:
     case Stmt::OMPTaskLoopSimdDirectiveClass:
     case Stmt::OMPDistributeDirectiveClass:
+    case Stmt::OMPDistributeParallelForDirectiveClass:
+    case Stmt::OMPDistributeParallelForSimdDirectiveClass:
+    case Stmt::OMPDistributeSimdDirectiveClass:
+    case Stmt::OMPTargetParallelForSimdDirectiveClass:
       llvm_unreachable("Stmt should not be in analyzer evaluation loop");
 
     case Stmt::ObjCSubscriptRefExprClass:
@@ -892,7 +903,6 @@ void ExprEngine::Visit(const Stmt *S, ExplodedNode *Pred,
     case Stmt::CUDAKernelCallExprClass:
     case Stmt::OpaqueValueExprClass:
     case Stmt::AsTypeExprClass:
-    case Stmt::AtomicExprClass:
       // Fall through.
 
     // Cases we intentionally don't evaluate, since they don't need
@@ -906,6 +916,7 @@ void ExprEngine::Visit(const Stmt *S, ExplodedNode *Pred,
     case Stmt::CXXScalarValueInitExprClass:
     case Stmt::CXXBoolLiteralExprClass:
     case Stmt::ObjCBoolLiteralExprClass:
+    case Stmt::ObjCAvailabilityCheckExprClass:
     case Stmt::FloatingLiteralClass:
     case Stmt::NoInitExprClass:
     case Stmt::SizeOfPackExprClass:
@@ -1237,6 +1248,12 @@ void ExprEngine::Visit(const Stmt *S, ExplodedNode *Pred,
       Bldr.addNodes(Dst);
       break;
 
+    case Stmt::AtomicExprClass:
+      Bldr.takeNodes(Pred);
+      VisitAtomicExpr(cast<AtomicExpr>(S), Pred, Dst);
+      Bldr.addNodes(Dst);
+      break;
+
     case Stmt::ObjCIvarRefExprClass:
       Bldr.takeNodes(Pred);
       VisitLvalObjCIvarRefExpr(cast<ObjCIvarRefExpr>(S), Pred, Dst);
@@ -1745,6 +1762,14 @@ static bool stackFrameDoesNotContainInitializedTemporaries(ExplodedNode &Pred) {
 }
 #endif
 
+void ExprEngine::processBeginOfFunction(NodeBuilderContext &BC,
+                                        ExplodedNode *Pred,
+                                        ExplodedNodeSet &Dst,
+                                        const BlockEdge &L) {
+  SaveAndRestore<const NodeBuilderContext *> NodeContextRAII(currBldrCtx, &BC);
+  getCheckerManager().runCheckersForBeginFunction(Dst, L, Pred, *this);
+}
+
 /// ProcessEndPath - Called by CoreEngine.  Used to generate end-of-path
 ///  nodes when the control reaches the end of a function.
 void ExprEngine::processEndOfFunction(NodeBuilderContext& BC,
@@ -2052,6 +2077,44 @@ void ExprEngine::VisitMemberExpr(const MemberExpr *M, ExplodedNode *Pred,
   getCheckerManager().runCheckersForPostStmt(Dst, EvalSet, M, *this);
 }
 
+void ExprEngine::VisitAtomicExpr(const AtomicExpr *AE, ExplodedNode *Pred,
+                                 ExplodedNodeSet &Dst) {
+  ExplodedNodeSet AfterPreSet;
+  getCheckerManager().runCheckersForPreStmt(AfterPreSet, Pred, AE, *this);
+
+  // For now, treat all the arguments to C11 atomics as escaping.
+  // FIXME: Ideally we should model the behavior of the atomics precisely here.
+
+  ExplodedNodeSet AfterInvalidateSet;
+  StmtNodeBuilder Bldr(AfterPreSet, AfterInvalidateSet, *currBldrCtx);
+
+  for (ExplodedNodeSet::iterator I = AfterPreSet.begin(), E = AfterPreSet.end();
+       I != E; ++I) {
+    ProgramStateRef State = (*I)->getState();
+    const LocationContext *LCtx = (*I)->getLocationContext();
+
+    SmallVector<SVal, 8> ValuesToInvalidate;
+    for (unsigned SI = 0, Count = AE->getNumSubExprs(); SI != Count; SI++) {
+      const Expr *SubExpr = AE->getSubExprs()[SI];
+      SVal SubExprVal = State->getSVal(SubExpr, LCtx);
+      ValuesToInvalidate.push_back(SubExprVal);
+    }
+
+    State = State->invalidateRegions(ValuesToInvalidate, AE,
+                                    currBldrCtx->blockCount(),
+                                    LCtx,
+                                    /*CausedByPointerEscape*/true,
+                                    /*Symbols=*/nullptr);
+
+    SVal ResultVal = UnknownVal();
+    State = State->BindExpr(AE, LCtx, ResultVal);
+    Bldr.generateNode(AE, *I, State, nullptr,
+                      ProgramPoint::PostStmtKind);
+  }
+
+  getCheckerManager().runCheckersForPostStmt(Dst, AfterInvalidateSet, AE, *this);
+}
+
 namespace {
 class CollectReachableSymbolsCallback final : public SymbolVisitor {
   InvalidatedSymbols Symbols;
diff --git a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/ExprEngineCallAndReturn.cpp b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/ExprEngineCallAndReturn.cpp
index 74cc8d2ccbc5..39d88bfda148 100644
--- a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/ExprEngineCallAndReturn.cpp
+++ b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/ExprEngineCallAndReturn.cpp
@@ -37,13 +37,12 @@ STATISTIC(NumInlinedCalls,
 STATISTIC(NumReachedInlineCountMax,
   "The # of times we reached inline count maximum");
 
-void ExprEngine::processCallEnter(CallEnter CE, ExplodedNode *Pred) {
+void ExprEngine::processCallEnter(NodeBuilderContext& BC, CallEnter CE,
+                                  ExplodedNode *Pred) {
   // Get the entry block in the CFG of the callee.
   const StackFrameContext *calleeCtx = CE.getCalleeContext();
   PrettyStackTraceLocationContext CrashInfo(calleeCtx);
-
-  const CFG *CalleeCFG = calleeCtx->getCFG();
-  const CFGBlock *Entry = &(CalleeCFG->getEntry());
+  const CFGBlock *Entry = CE.getEntry();
 
   // Validate the CFG.
   assert(Entry->empty());
@@ -57,12 +56,16 @@ void ExprEngine::processCallEnter(CallEnter CE, ExplodedNode *Pred) {
 
   ProgramStateRef state = Pred->getState();
 
-  // Construct a new node and add it to the worklist.
+  // Construct a new node, notify checkers that analysis of the function has
+  // begun, and add the resultant nodes to the worklist.
   bool isNew;
   ExplodedNode *Node = G.getNode(Loc, state, false, &isNew);
   Node->addPredecessor(Pred, G);
-  if (isNew)
-    Engine.getWorkList()->enqueue(Node);
+  if (isNew) {
+    ExplodedNodeSet DstBegin;
+    processBeginOfFunction(BC, Node, DstBegin, Loc);
+    Engine.enqueue(DstBegin);
+  }
 }
 
 // Find the last statement on the path to the exploded node and the
@@ -379,22 +382,6 @@ void ExprEngine::examineStackFrames(const Decl *D, const LocationContext *LCtx,
     }
     LCtx = LCtx->getParent();
   }
-
-}
-
-static bool IsInStdNamespace(const FunctionDecl *FD) {
-  const DeclContext *DC = FD->getEnclosingNamespaceContext();
-  const NamespaceDecl *ND = dyn_cast<NamespaceDecl>(DC);
-  if (!ND)
-    return false;
-
-  while (const DeclContext *Parent = ND->getParent()) {
-    if (!isa<NamespaceDecl>(Parent))
-      break;
-    ND = cast<NamespaceDecl>(Parent);
-  }
-
-  return ND->isStdNamespace();
 }
 
 // The GDM component containing the dynamic dispatch bifurcation info. When
@@ -408,7 +395,8 @@ namespace {
     DynamicDispatchModeInlined = 1,
     DynamicDispatchModeConservative
   };
-}
+} // end anonymous namespace
+
 REGISTER_TRAIT_WITH_PROGRAMSTATE(DynamicDispatchBifurcationMap,
                                  CLANG_ENTO_PROGRAMSTATE_MAP(const MemRegion *,
                                                              unsigned))
@@ -441,7 +429,6 @@ bool ExprEngine::inlineCall(const CallEvent &Call, const Decl *D,
                              currBldrCtx->getBlock(),
                              currStmtIdx);
 
-
   CallEnter Loc(CallE, CalleeSFC, CurLC);
 
   // Construct a new state which contains the mapping from actual to
@@ -761,7 +748,7 @@ static bool mayInlineDecl(AnalysisDeclContext *CalleeADC,
       // Conditionally control the inlining of C++ standard library functions.
       if (!Opts.mayInlineCXXStandardLibrary())
         if (Ctx.getSourceManager().isInSystemHeader(FD->getLocation()))
-          if (IsInStdNamespace(FD))
+          if (AnalysisDeclContext::isInStdNamespace(FD))
             return false;
 
       // Conditionally control the inlining of methods on objects that look
@@ -778,7 +765,6 @@ static bool mayInlineDecl(AnalysisDeclContext *CalleeADC,
       if (!Opts.mayInlineCXXSharedPtrDtor())
         if (isCXXSharedPtrDtor(FD))
           return false;
-
     }
   }
 
@@ -988,13 +974,10 @@ void ExprEngine::BifurcateCall(const MemRegion *BifurReg,
   conservativeEvalCall(Call, Bldr, Pred, NoIState);
 
   NumOfDynamicDispatchPathSplits++;
-  return;
 }
 
-
 void ExprEngine::VisitReturnStmt(const ReturnStmt *RS, ExplodedNode *Pred,
                                  ExplodedNodeSet &Dst) {
-
   ExplodedNodeSet dstPreVisit;
   getCheckerManager().runCheckersForPreStmt(dstPreVisit, Pred, RS, *this);
 
diff --git a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/HTMLDiagnostics.cpp b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/HTMLDiagnostics.cpp
index b3edb8569bd6..3a18956e4139 100644
--- a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/HTMLDiagnostics.cpp
+++ b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/HTMLDiagnostics.cpp
@@ -412,13 +412,13 @@ void HTMLDiagnostics::HandlePiece(Rewriter& R, FileID BugFileID,
   // Output a maximum size.
   if (!isa<PathDiagnosticMacroPiece>(P)) {
     // Get the string and determining its maximum substring.
-    const std::string& Msg = P.getString();
+    const auto &Msg = P.getString();
     unsigned max_token = 0;
     unsigned cnt = 0;
     unsigned len = Msg.size();
 
-    for (std::string::const_iterator I=Msg.begin(), E=Msg.end(); I!=E; ++I)
-      switch (*I) {
+    for (char C : Msg)
+      switch (C) {
       default:
         ++cnt;
         continue;
diff --git a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/IssueHash.cpp b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/IssueHash.cpp
index 0a3af3dcc7e9..bd5c81179adc 100644
--- a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/IssueHash.cpp
+++ b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/IssueHash.cpp
@@ -132,8 +132,11 @@ static std::string NormalizeLine(const SourceManager &SM, FullSourceLoc &L,
 
   StringRef Str = GetNthLineOfFile(SM.getBuffer(L.getFileID(), L),
                                    L.getExpansionLineNumber());
-  unsigned col = Str.find_first_not_of(Whitespaces);
-  col++;
+  StringRef::size_type col = Str.find_first_not_of(Whitespaces);
+  if (col == StringRef::npos)
+    col = 1; // The line only contains whitespace.
+  else
+    col++;
   SourceLocation StartOfLine =
       SM.translateLineCol(SM.getFileID(L), L.getExpansionLineNumber(), col);
   llvm::MemoryBuffer *Buffer =
@@ -180,7 +183,7 @@ std::string clang::GetIssueString(const SourceManager &SM,
 
   return (llvm::Twine(CheckerName) + Delimiter +
           GetEnclosingDeclContextSignature(D) + Delimiter +
-          llvm::utostr(IssueLoc.getExpansionColumnNumber()) + Delimiter +
+          Twine(IssueLoc.getExpansionColumnNumber()) + Delimiter +
           NormalizeLine(SM, IssueLoc, LangOpts) + Delimiter + BugType)
       .str();
 }
diff --git a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/MemRegion.cpp b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/MemRegion.cpp
index 30052ccacee4..b7b6f42b2910 100644
--- a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/MemRegion.cpp
+++ b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/MemRegion.cpp
@@ -35,7 +35,6 @@ template<typename RegionTy> struct MemRegionManagerTrait;
 
 template <typename RegionTy, typename A1>
 RegionTy* MemRegionManager::getRegion(const A1 a1) {
-
   const typename MemRegionManagerTrait<RegionTy>::SuperRegionTy *superRegion =
   MemRegionManagerTrait<RegionTy>::getSuperRegion(*this, a1);
 
@@ -46,7 +45,7 @@ RegionTy* MemRegionManager::getRegion(const A1 a1) {
                                                                    InsertPos));
 
   if (!R) {
-    R = (RegionTy*) A.Allocate<RegionTy>();
+    R = A.Allocate<RegionTy>();
     new (R) RegionTy(a1, superRegion);
     Regions.InsertNode(R, InsertPos);
   }
@@ -64,7 +63,7 @@ RegionTy* MemRegionManager::getSubRegion(const A1 a1,
                                                                    InsertPos));
 
   if (!R) {
-    R = (RegionTy*) A.Allocate<RegionTy>();
+    R = A.Allocate<RegionTy>();
     new (R) RegionTy(a1, superRegion);
     Regions.InsertNode(R, InsertPos);
   }
@@ -74,7 +73,6 @@ RegionTy* MemRegionManager::getSubRegion(const A1 a1,
 
 template <typename RegionTy, typename A1, typename A2>
 RegionTy* MemRegionManager::getRegion(const A1 a1, const A2 a2) {
-
   const typename MemRegionManagerTrait<RegionTy>::SuperRegionTy *superRegion =
   MemRegionManagerTrait<RegionTy>::getSuperRegion(*this, a1, a2);
 
@@ -85,7 +83,7 @@ RegionTy* MemRegionManager::getRegion(const A1 a1, const A2 a2) {
                                                                    InsertPos));
 
   if (!R) {
-    R = (RegionTy*) A.Allocate<RegionTy>();
+    R = A.Allocate<RegionTy>();
     new (R) RegionTy(a1, a2, superRegion);
     Regions.InsertNode(R, InsertPos);
   }
@@ -96,7 +94,6 @@ RegionTy* MemRegionManager::getRegion(const A1 a1, const A2 a2) {
 template <typename RegionTy, typename A1, typename A2>
 RegionTy* MemRegionManager::getSubRegion(const A1 a1, const A2 a2,
                                          const MemRegion *superRegion) {
-
   llvm::FoldingSetNodeID ID;
   RegionTy::ProfileRegion(ID, a1, a2, superRegion);
   void *InsertPos;
@@ -104,7 +101,7 @@ RegionTy* MemRegionManager::getSubRegion(const A1 a1, const A2 a2,
                                                                    InsertPos));
 
   if (!R) {
-    R = (RegionTy*) A.Allocate<RegionTy>();
+    R = A.Allocate<RegionTy>();
     new (R) RegionTy(a1, a2, superRegion);
     Regions.InsertNode(R, InsertPos);
   }
@@ -115,7 +112,6 @@ RegionTy* MemRegionManager::getSubRegion(const A1 a1, const A2 a2,
 template <typename RegionTy, typename A1, typename A2, typename A3>
 RegionTy* MemRegionManager::getSubRegion(const A1 a1, const A2 a2, const A3 a3,
                                          const MemRegion *superRegion) {
-
   llvm::FoldingSetNodeID ID;
   RegionTy::ProfileRegion(ID, a1, a2, a3, superRegion);
   void *InsertPos;
@@ -123,7 +119,7 @@ RegionTy* MemRegionManager::getSubRegion(const A1 a1, const A2 a2, const A3 a3,
                                                                    InsertPos));
 
   if (!R) {
-    R = (RegionTy*) A.Allocate<RegionTy>();
+    R = A.Allocate<RegionTy>();
     new (R) RegionTy(a1, a2, a3, superRegion);
     Regions.InsertNode(R, InsertPos);
   }
@@ -246,23 +242,23 @@ QualType CXXBaseObjectRegion::getValueType() const {
 //===----------------------------------------------------------------------===//
 
 void MemSpaceRegion::Profile(llvm::FoldingSetNodeID &ID) const {
-  ID.AddInteger((unsigned)getKind());
+  ID.AddInteger(static_cast<unsigned>(getKind()));
 }
 
 void StackSpaceRegion::Profile(llvm::FoldingSetNodeID &ID) const {
-  ID.AddInteger((unsigned)getKind());
+  ID.AddInteger(static_cast<unsigned>(getKind()));
   ID.AddPointer(getStackFrame());
 }
 
 void StaticGlobalSpaceRegion::Profile(llvm::FoldingSetNodeID &ID) const {
-  ID.AddInteger((unsigned)getKind());
+  ID.AddInteger(static_cast<unsigned>(getKind()));
   ID.AddPointer(getCodeRegion());
 }
 
 void StringRegion::ProfileRegion(llvm::FoldingSetNodeID& ID,
                                  const StringLiteral* Str,
                                  const MemRegion* superRegion) {
-  ID.AddInteger((unsigned) StringRegionKind);
+  ID.AddInteger(static_cast<unsigned>(StringRegionKind));
   ID.AddPointer(Str);
   ID.AddPointer(superRegion);
 }
@@ -270,7 +266,7 @@ void StringRegion::ProfileRegion(llvm::FoldingSetNodeID& ID,
 void ObjCStringRegion::ProfileRegion(llvm::FoldingSetNodeID& ID,
                                      const ObjCStringLiteral* Str,
                                      const MemRegion* superRegion) {
-  ID.AddInteger((unsigned) ObjCStringRegionKind);
+  ID.AddInteger(static_cast<unsigned>(ObjCStringRegionKind));
   ID.AddPointer(Str);
   ID.AddPointer(superRegion);
 }
@@ -278,7 +274,7 @@ void ObjCStringRegion::ProfileRegion(llvm::FoldingSetNodeID& ID,
 void AllocaRegion::ProfileRegion(llvm::FoldingSetNodeID& ID,
                                  const Expr *Ex, unsigned cnt,
                                  const MemRegion *superRegion) {
-  ID.AddInteger((unsigned) AllocaRegionKind);
+  ID.AddInteger(static_cast<unsigned>(AllocaRegionKind));
   ID.AddPointer(Ex);
   ID.AddInteger(cnt);
   ID.AddPointer(superRegion);
@@ -295,7 +291,7 @@ void CompoundLiteralRegion::Profile(llvm::FoldingSetNodeID& ID) const {
 void CompoundLiteralRegion::ProfileRegion(llvm::FoldingSetNodeID& ID,
                                           const CompoundLiteralExpr *CL,
                                           const MemRegion* superRegion) {
-  ID.AddInteger((unsigned) CompoundLiteralRegionKind);
+  ID.AddInteger(static_cast<unsigned>(CompoundLiteralRegionKind));
   ID.AddPointer(CL);
   ID.AddPointer(superRegion);
 }
@@ -303,7 +299,7 @@ void CompoundLiteralRegion::ProfileRegion(llvm::FoldingSetNodeID& ID,
 void CXXThisRegion::ProfileRegion(llvm::FoldingSetNodeID &ID,
                                   const PointerType *PT,
                                   const MemRegion *sRegion) {
-  ID.AddInteger((unsigned) CXXThisRegionKind);
+  ID.AddInteger(static_cast<unsigned>(CXXThisRegionKind));
   ID.AddPointer(PT);
   ID.AddPointer(sRegion);
 }
@@ -320,7 +316,7 @@ void ObjCIvarRegion::ProfileRegion(llvm::FoldingSetNodeID& ID,
 
 void DeclRegion::ProfileRegion(llvm::FoldingSetNodeID& ID, const Decl *D,
                                const MemRegion* superRegion, Kind k) {
-  ID.AddInteger((unsigned) k);
+  ID.AddInteger(static_cast<unsigned>(k));
   ID.AddPointer(D);
   ID.AddPointer(superRegion);
 }
@@ -335,7 +331,7 @@ void VarRegion::Profile(llvm::FoldingSetNodeID &ID) const {
 
 void SymbolicRegion::ProfileRegion(llvm::FoldingSetNodeID& ID, SymbolRef sym,
                                    const MemRegion *sreg) {
-  ID.AddInteger((unsigned) MemRegion::SymbolicRegionKind);
+  ID.AddInteger(static_cast<unsigned>(MemRegion::SymbolicRegionKind));
   ID.Add(sym);
   ID.AddPointer(sreg);
 }
@@ -438,7 +434,7 @@ void SubRegion::anchor() { }
 // Region pretty-printing.
 //===----------------------------------------------------------------------===//
 
-void MemRegion::dump() const {
+LLVM_DUMP_METHOD void MemRegion::dump() const {
   dumpToStream(llvm::errs());
 }
 
@@ -454,7 +450,7 @@ void MemRegion::dumpToStream(raw_ostream &os) const {
 }
 
 void AllocaRegion::dumpToStream(raw_ostream &os) const {
-  os << "alloca{" << (const void*) Ex << ',' << Cnt << '}';
+  os << "alloca{" << static_cast<const void*>(Ex) << ',' << Cnt << '}';
 }
 
 void FunctionCodeRegion::dumpToStream(raw_ostream &os) const {
@@ -462,7 +458,7 @@ void FunctionCodeRegion::dumpToStream(raw_ostream &os) const {
 }
 
 void BlockCodeRegion::dumpToStream(raw_ostream &os) const {
-  os << "block_code{" << (const void*) this << '}';
+  os << "block_code{" << static_cast<const void*>(this) << '}';
 }
 
 void BlockDataRegion::dumpToStream(raw_ostream &os) const {
@@ -478,12 +474,12 @@ void BlockDataRegion::dumpToStream(raw_ostream &os) const {
 
 void CompoundLiteralRegion::dumpToStream(raw_ostream &os) const {
   // FIXME: More elaborate pretty-printing.
-  os << "{ " << (const void*) CL <<  " }";
+  os << "{ " << static_cast<const void*>(CL) <<  " }";
 }
 
 void CXXTempObjectRegion::dumpToStream(raw_ostream &os) const {
   os << "temp_object{" << getValueType().getAsString() << ','
-     << (const void*) Ex << '}';
+     << static_cast<const void*>(Ex) << '}';
 }
 
 void CXXBaseObjectRegion::dumpToStream(raw_ostream &os) const {
@@ -525,7 +521,7 @@ void VarRegion::dumpToStream(raw_ostream &os) const {
   os << *cast<VarDecl>(D);
 }
 
-void RegionRawOffset::dump() const {
+LLVM_DUMP_METHOD void RegionRawOffset::dump() const {
   dumpToStream(llvm::errs());
 }
 
@@ -582,12 +578,10 @@ void MemRegion::printPretty(raw_ostream &os) const {
   os << "'";
   printPrettyAsExpr(os);
   os << "'";
-  return;
 }
 
 void MemRegion::printPrettyAsExpr(raw_ostream &os) const {
   llvm_unreachable("This region cannot be printed pretty.");
-  return;
 }
 
 bool VarRegion::canPrintPrettyAsExpr() const {
@@ -628,7 +622,6 @@ void FieldRegion::printPretty(raw_ostream &os) const {
   } else {
     os << "field " << "\'" << getDecl()->getName() << "'";
   }
-  return;
 }
 
 bool CXXBaseObjectRegion::canPrintPrettyAsExpr() const {
@@ -639,6 +632,65 @@ void CXXBaseObjectRegion::printPrettyAsExpr(raw_ostream &os) const {
   superRegion->printPrettyAsExpr(os);
 }
 
+std::string MemRegion::getDescriptiveName(bool UseQuotes) const {
+  std::string VariableName;
+  std::string ArrayIndices;
+  const MemRegion *R = this;
+  SmallString<50> buf;
+  llvm::raw_svector_ostream os(buf);
+
+  // Obtain array indices to add them to the variable name.
+  const ElementRegion *ER = nullptr;
+  while ((ER = R->getAs<ElementRegion>())) {
+    // Index is a ConcreteInt.
+    if (auto CI = ER->getIndex().getAs<nonloc::ConcreteInt>()) {
+      llvm::SmallString<2> Idx;
+      CI->getValue().toString(Idx);
+      ArrayIndices = (llvm::Twine("[") + Idx.str() + "]" + ArrayIndices).str();
+    }
+    // If not a ConcreteInt, try to obtain the variable
+    // name by calling 'getDescriptiveName' recursively.
+    else {
+      std::string Idx = ER->getDescriptiveName(false);
+      if (!Idx.empty()) {
+        ArrayIndices = (llvm::Twine("[") + Idx + "]" + ArrayIndices).str();
+      }
+    }
+    R = ER->getSuperRegion();
+  }
+
+  // Get variable name.
+  if (R && R->canPrintPrettyAsExpr()) {
+    R->printPrettyAsExpr(os);
+    if (UseQuotes) {
+      return (llvm::Twine("'") + os.str() + ArrayIndices + "'").str();
+    } else {
+      return (llvm::Twine(os.str()) + ArrayIndices).str();
+    }
+  }
+
+  return VariableName;
+}
+
+SourceRange MemRegion::sourceRange() const {
+  const VarRegion *const VR = dyn_cast<VarRegion>(this->getBaseRegion());
+  const FieldRegion *const FR = dyn_cast<FieldRegion>(this);
+
+  // Check for more specific regions first.
+  // FieldRegion
+  if (FR) {
+    return FR->getDecl()->getSourceRange();
+  }
+  // VarRegion
+  else if (VR) {
+    return VR->getDecl()->getSourceRange();
+  }
+  // Return invalid source range (can be checked by client).
+  else {
+    return SourceRange{};
+  }
+}
+
 //===----------------------------------------------------------------------===//
 // MemRegionManager methods.
 //===----------------------------------------------------------------------===//
@@ -646,7 +698,7 @@ void CXXBaseObjectRegion::printPrettyAsExpr(raw_ostream &os) const {
 template <typename REG>
 const REG *MemRegionManager::LazyAllocate(REG*& region) {
   if (!region) {
-    region = (REG*) A.Allocate<REG>();
+    region = A.Allocate<REG>();
     new (region) REG(this);
   }
 
@@ -656,7 +708,7 @@ const REG *MemRegionManager::LazyAllocate(REG*& region) {
 template <typename REG, typename ARG>
 const REG *MemRegionManager::LazyAllocate(REG*& region, ARG a) {
   if (!region) {
-    region = (REG*) A.Allocate<REG>();
+    region = A.Allocate<REG>();
     new (region) REG(this, a);
   }
 
@@ -892,7 +944,6 @@ MemRegionManager::getCXXStaticTempObjectRegion(const Expr *Ex) {
 const CompoundLiteralRegion*
 MemRegionManager::getCompoundLiteralRegion(const CompoundLiteralExpr *CL,
                                            const LocationContext *LC) {
-
   const MemRegion *sReg = nullptr;
 
   if (CL->isFileScope())
@@ -910,7 +961,6 @@ const ElementRegion*
 MemRegionManager::getElementRegion(QualType elementType, NonLoc Idx,
                                    const MemRegion* superRegion,
                                    ASTContext &Ctx){
-
   QualType T = Ctx.getCanonicalType(elementType).getUnqualifiedType();
 
   llvm::FoldingSetNodeID ID;
@@ -921,7 +971,7 @@ MemRegionManager::getElementRegion(QualType elementType, NonLoc Idx,
   ElementRegion* R = cast_or_null<ElementRegion>(data);
 
   if (!R) {
-    R = (ElementRegion*) A.Allocate<ElementRegion>();
+    R = A.Allocate<ElementRegion>();
     new (R) ElementRegion(T, Idx, superRegion);
     Regions.InsertNode(R, InsertPos);
   }
@@ -1342,10 +1392,10 @@ RegionOffset MemRegion::getAsOffset() const {
       // Get the field number.
       unsigned idx = 0;
       for (RecordDecl::field_iterator FI = RD->field_begin(),
-             FE = RD->field_end(); FI != FE; ++FI, ++idx)
+             FE = RD->field_end(); FI != FE; ++FI, ++idx) {
         if (FR->getDecl() == *FI)
           break;
-
+      }
       const ASTRecordLayout &Layout = getContext().getASTRecordLayout(RD);
       // This is offset in bits.
       Offset += Layout.getFieldOffset(idx);
@@ -1406,9 +1456,9 @@ void BlockDataRegion::LazyInitializeReferencedVars() {
   BumpVectorContext BC(A);
 
   typedef BumpVector<const MemRegion*> VarVec;
-  VarVec *BV = (VarVec*) A.Allocate<VarVec>();
+  VarVec *BV = A.Allocate<VarVec>();
   new (BV) VarVec(BC, NumBlockVars);
-  VarVec *BVOriginal = (VarVec*) A.Allocate<VarVec>();
+  VarVec *BVOriginal = A.Allocate<VarVec>();
   new (BVOriginal) VarVec(BC, NumBlockVars);
 
   for (const VarDecl *VD : ReferencedBlockVars) {
@@ -1488,7 +1538,7 @@ void RegionAndSymbolInvalidationTraits::setTrait(const MemRegion *MR,
 }
 
 bool RegionAndSymbolInvalidationTraits::hasTrait(SymbolRef Sym,
-                                                 InvalidationKinds IK) {
+                                                 InvalidationKinds IK) const {
   const_symbol_iterator I = SymTraitsMap.find(Sym);
   if (I != SymTraitsMap.end())
     return I->second & IK;
@@ -1497,7 +1547,7 @@ bool RegionAndSymbolInvalidationTraits::hasTrait(SymbolRef Sym,
 }
 
 bool RegionAndSymbolInvalidationTraits::hasTrait(const MemRegion *MR,
-                                                 InvalidationKinds IK) {
+                                                 InvalidationKinds IK) const {
   if (!MR)
     return false;
 
diff --git a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/PathDiagnostic.cpp b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/PathDiagnostic.cpp
index 504df30de834..217d628a129c 100644
--- a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/PathDiagnostic.cpp
+++ b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/PathDiagnostic.cpp
@@ -61,7 +61,6 @@ PathDiagnosticCallPiece::~PathDiagnosticCallPiece() {}
 PathDiagnosticControlFlowPiece::~PathDiagnosticControlFlowPiece() {}
 PathDiagnosticMacroPiece::~PathDiagnosticMacroPiece() {}
 
-
 void PathPieces::flattenTo(PathPieces &Primary, PathPieces &Current,
                            bool ShouldFlattenMacros) const {
   for (PathPieces::const_iterator I = begin(), E = end(); I != E; ++I) {
@@ -102,7 +101,6 @@ void PathPieces::flattenTo(PathPieces &Primary, PathPieces &Current,
   }
 }
 
-
 PathDiagnostic::~PathDiagnostic() {}
 
 PathDiagnostic::PathDiagnostic(StringRef CheckName, const Decl *declWithIssue,
@@ -278,6 +276,7 @@ void PathDiagnosticConsumer::HandlePathDiagnostic(
 }
 
 static Optional<bool> comparePath(const PathPieces &X, const PathPieces &Y);
+
 static Optional<bool>
 compareControlFlow(const PathDiagnosticControlFlowPiece &X,
                    const PathDiagnosticControlFlowPiece &Y) {
@@ -505,7 +504,6 @@ static SourceLocation getValidSourceLocation(const Stmt* S,
   // S might be a temporary statement that does not have a location in the
   // source code, so find an enclosing statement and use its location.
   if (!L.isValid()) {
-
     AnalysisDeclContext *ADC;
     if (LAC.is<const LocationContext*>())
       ADC = LAC.get<const LocationContext*>()->getAnalysisDeclContext();
@@ -578,22 +576,20 @@ getLocationForCaller(const StackFrameContext *SFC,
   llvm_unreachable("Unknown CFGElement kind");
 }
 
-
 PathDiagnosticLocation
-  PathDiagnosticLocation::createBegin(const Decl *D,
-                                      const SourceManager &SM) {
+PathDiagnosticLocation::createBegin(const Decl *D,
+                                    const SourceManager &SM) {
   return PathDiagnosticLocation(D->getLocStart(), SM, SingleLocK);
 }
 
 PathDiagnosticLocation
-  PathDiagnosticLocation::createBegin(const Stmt *S,
-                                      const SourceManager &SM,
-                                      LocationOrAnalysisDeclContext LAC) {
+PathDiagnosticLocation::createBegin(const Stmt *S,
+                                    const SourceManager &SM,
+                                    LocationOrAnalysisDeclContext LAC) {
   return PathDiagnosticLocation(getValidSourceLocation(S, LAC),
                                 SM, SingleLocK);
 }
 
-
 PathDiagnosticLocation
 PathDiagnosticLocation::createEnd(const Stmt *S,
                                   const SourceManager &SM,
@@ -605,13 +601,13 @@ PathDiagnosticLocation::createEnd(const Stmt *S,
 }
 
 PathDiagnosticLocation
-  PathDiagnosticLocation::createOperatorLoc(const BinaryOperator *BO,
-                                            const SourceManager &SM) {
+PathDiagnosticLocation::createOperatorLoc(const BinaryOperator *BO,
+                                          const SourceManager &SM) {
   return PathDiagnosticLocation(BO->getOperatorLoc(), SM, SingleLocK);
 }
 
 PathDiagnosticLocation
-  PathDiagnosticLocation::createConditionalColonLoc(
+PathDiagnosticLocation::createConditionalColonLoc(
                                             const ConditionalOperator *CO,
                                             const SourceManager &SM) {
   return PathDiagnosticLocation(CO->getColonLoc(), SM, SingleLocK);
@@ -619,28 +615,28 @@ PathDiagnosticLocation
 
 
 PathDiagnosticLocation
-  PathDiagnosticLocation::createMemberLoc(const MemberExpr *ME,
-                                          const SourceManager &SM) {
+PathDiagnosticLocation::createMemberLoc(const MemberExpr *ME,
+                                        const SourceManager &SM) {
   return PathDiagnosticLocation(ME->getMemberLoc(), SM, SingleLocK);
 }
 
 PathDiagnosticLocation
-  PathDiagnosticLocation::createBeginBrace(const CompoundStmt *CS,
-                                           const SourceManager &SM) {
+PathDiagnosticLocation::createBeginBrace(const CompoundStmt *CS,
+                                         const SourceManager &SM) {
   SourceLocation L = CS->getLBracLoc();
   return PathDiagnosticLocation(L, SM, SingleLocK);
 }
 
 PathDiagnosticLocation
-  PathDiagnosticLocation::createEndBrace(const CompoundStmt *CS,
-                                         const SourceManager &SM) {
+PathDiagnosticLocation::createEndBrace(const CompoundStmt *CS,
+                                       const SourceManager &SM) {
   SourceLocation L = CS->getRBracLoc();
   return PathDiagnosticLocation(L, SM, SingleLocK);
 }
 
 PathDiagnosticLocation
-  PathDiagnosticLocation::createDeclBegin(const LocationContext *LC,
-                                          const SourceManager &SM) {
+PathDiagnosticLocation::createDeclBegin(const LocationContext *LC,
+                                        const SourceManager &SM) {
   // FIXME: Should handle CXXTryStmt if analyser starts supporting C++.
   if (const CompoundStmt *CS =
         dyn_cast_or_null<CompoundStmt>(LC->getDecl()->getBody()))
@@ -653,16 +649,15 @@ PathDiagnosticLocation
 }
 
 PathDiagnosticLocation
-  PathDiagnosticLocation::createDeclEnd(const LocationContext *LC,
-                                        const SourceManager &SM) {
+PathDiagnosticLocation::createDeclEnd(const LocationContext *LC,
+                                      const SourceManager &SM) {
   SourceLocation L = LC->getDecl()->getBodyRBrace();
   return PathDiagnosticLocation(L, SM, SingleLocK);
 }
 
 PathDiagnosticLocation
-  PathDiagnosticLocation::create(const ProgramPoint& P,
-                                 const SourceManager &SMng) {
-
+PathDiagnosticLocation::create(const ProgramPoint& P,
+                               const SourceManager &SMng) {
   const Stmt* S = nullptr;
   if (Optional<BlockEdge> BE = P.getAs<BlockEdge>()) {
     const CFGBlock *BSrc = BE->getSrc();
@@ -1062,7 +1057,6 @@ void PathDiagnosticLocation::Profile(llvm::FoldingSetNodeID &ID) const {
   ID.AddInteger(Range.getBegin().getRawEncoding());
   ID.AddInteger(Range.getEnd().getRawEncoding());
   ID.AddInteger(Loc.getRawEncoding());
-  return;
 }
 
 void PathDiagnosticPiece::Profile(llvm::FoldingSetNodeID &ID) const {
diff --git a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/PlistDiagnostics.cpp b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/PlistDiagnostics.cpp
index 55e1222e0ac6..8ad931acdf7f 100644
--- a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/PlistDiagnostics.cpp
+++ b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/PlistDiagnostics.cpp
@@ -124,7 +124,7 @@ static void ReportControlFlow(raw_ostream &o,
   --indent;
 
   // Output any helper text.
-  const std::string& s = P.getString();
+  const auto &s = P.getString();
   if (!s.empty()) {
     Indent(o, indent) << "<key>alternate</key>";
     EmitString(o, s) << '\n';
diff --git a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/ProgramState.cpp b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/ProgramState.cpp
index 100fa75c5f42..adda7af08db8 100644
--- a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/ProgramState.cpp
+++ b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/ProgramState.cpp
@@ -439,7 +439,7 @@ void ProgramState::printDOT(raw_ostream &Out) const {
   print(Out, "\\l", "\\|");
 }
 
-void ProgramState::dump() const {
+LLVM_DUMP_METHOD void ProgramState::dump() const {
   print(llvm::errs());
 }
 
diff --git a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/RegionStore.cpp b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/RegionStore.cpp
index a63f6e496272..0d173c464481 100644
--- a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/RegionStore.cpp
+++ b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/RegionStore.cpp
@@ -14,6 +14,7 @@
 // parameters are created lazily.
 //
 //===----------------------------------------------------------------------===//
+
 #include "clang/AST/Attr.h"
 #include "clang/AST/CharUnits.h"
 #include "clang/Analysis/Analyses/LiveVariables.h"
@@ -29,6 +30,7 @@
 #include "llvm/ADT/ImmutableMap.h"
 #include "llvm/ADT/Optional.h"
 #include "llvm/Support/raw_ostream.h"
+#include <utility>
 
 using namespace clang;
 using namespace ento;
@@ -665,10 +667,9 @@ protected:
 
 public:
   ClusterAnalysis(RegionStoreManager &rm, ProgramStateManager &StateMgr,
-                  RegionBindingsRef b )
-    : RM(rm), Ctx(StateMgr.getContext()),
-      svalBuilder(StateMgr.getSValBuilder()),
-      B(b) {}
+                  RegionBindingsRef b)
+      : RM(rm), Ctx(StateMgr.getContext()),
+        svalBuilder(StateMgr.getSValBuilder()), B(std::move(b)) {}
 
   RegionBindingsRef getRegionBindings() const { return B; }
 
@@ -1130,11 +1131,10 @@ void invalidateRegionsWorker::VisitCluster(const MemRegion *baseR,
         // Check offset is not symbolic and within array's boundaries.
         // Handles arrays of 0 elements and of 0-sized elements as well.
         if (!ROffset ||
-            (ROffset &&
-             ((*ROffset >= LowerOffset && *ROffset < UpperOffset) ||
-              (UpperOverflow &&
-               (*ROffset >= LowerOffset || *ROffset < UpperOffset)) ||
-              (LowerOffset == UpperOffset && *ROffset == LowerOffset)))) {
+            ((*ROffset >= LowerOffset && *ROffset < UpperOffset) ||
+             (UpperOverflow &&
+              (*ROffset >= LowerOffset || *ROffset < UpperOffset)) ||
+             (LowerOffset == UpperOffset && *ROffset == LowerOffset))) {
           B = B.removeBinding(I.getKey());
           // Bound symbolic regions need to be invalidated for dead symbol
           // detection.
diff --git a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/SValBuilder.cpp b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/SValBuilder.cpp
index 18315225a99d..72bcdd9ecb06 100644
--- a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/SValBuilder.cpp
+++ b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/SValBuilder.cpp
@@ -367,6 +367,11 @@ SVal SValBuilder::evalBinOp(ProgramStateRef state, BinaryOperator::Opcode op,
   if (lhs.isUnknown() || rhs.isUnknown())
     return UnknownVal();
 
+  if (lhs.getAs<nonloc::LazyCompoundVal>() ||
+      rhs.getAs<nonloc::LazyCompoundVal>()) {
+    return UnknownVal();
+  }
+
   if (Optional<Loc> LV = lhs.getAs<Loc>()) {
     if (Optional<Loc> RV = rhs.getAs<Loc>())
       return evalBinOpLL(state, op, *LV, *RV, type);
@@ -451,7 +456,7 @@ SVal SValBuilder::evalIntegralCast(ProgramStateRef state, SVal val,
   NonLoc FromVal = val.castAs<NonLoc>();
   QualType CmpTy = getConditionType();
   NonLoc CompVal =
-      evalBinOpNN(state, BO_LT, FromVal, ToTypeMaxVal, CmpTy).castAs<NonLoc>();
+      evalBinOpNN(state, BO_LE, FromVal, ToTypeMaxVal, CmpTy).castAs<NonLoc>();
   ProgramStateRef IsNotTruncated, IsTruncated;
   std::tie(IsNotTruncated, IsTruncated) = state->assume(CompVal);
   if (!IsNotTruncated && IsTruncated) {
diff --git a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/SVals.cpp b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/SVals.cpp
index dffee6c8c57b..a30beed688b7 100644
--- a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/SVals.cpp
+++ b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/SVals.cpp
@@ -236,7 +236,7 @@ SVal loc::ConcreteInt::evalBinOp(BasicValueFactory& BasicVals,
 // Pretty-Printing.
 //===----------------------------------------------------------------------===//
 
-void SVal::dump() const { dumpToStream(llvm::errs()); }
+LLVM_DUMP_METHOD void SVal::dump() const { dumpToStream(llvm::errs()); }
 
 void SVal::dumpToStream(raw_ostream &os) const {
   switch (getBaseKind()) {
diff --git a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/SymbolManager.cpp b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/SymbolManager.cpp
index 2dd252c223fd..b8b4af1179e5 100644
--- a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/SymbolManager.cpp
+++ b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/SymbolManager.cpp
@@ -23,7 +23,7 @@ using namespace ento;
 
 void SymExpr::anchor() { }
 
-void SymExpr::dump() const {
+LLVM_DUMP_METHOD void SymExpr::dump() const {
   dumpToStream(llvm::errs());
 }
 
diff --git a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Frontend/AnalysisConsumer.cpp b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Frontend/AnalysisConsumer.cpp
index d1446855e01f..8ac229fc6583 100644
--- a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Frontend/AnalysisConsumer.cpp
+++ b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Frontend/AnalysisConsumer.cpp
@@ -14,11 +14,11 @@
 #include "clang/StaticAnalyzer/Frontend/AnalysisConsumer.h"
 #include "ModelInjector.h"
 #include "clang/AST/ASTConsumer.h"
-#include "clang/AST/RecursiveASTVisitor.h"
 #include "clang/AST/Decl.h"
 #include "clang/AST/DeclCXX.h"
 #include "clang/AST/DeclObjC.h"
 #include "clang/AST/ParentMap.h"
+#include "clang/AST/RecursiveASTVisitor.h"
 #include "clang/Analysis/Analyses/LiveVariables.h"
 #include "clang/Analysis/CFG.h"
 #include "clang/Analysis/CallGraph.h"
@@ -47,10 +47,10 @@
 #include "llvm/Support/raw_ostream.h"
 #include <memory>
 #include <queue>
+#include <utility>
 
 using namespace clang;
 using namespace ento;
-using llvm::SmallPtrSet;
 
 #define DEBUG_TYPE "AnalysisConsumer"
 
@@ -185,13 +185,12 @@ public:
   /// translation unit.
   FunctionSummariesTy FunctionSummaries;
 
-  AnalysisConsumer(const Preprocessor& pp,
-                   const std::string& outdir,
-                   AnalyzerOptionsRef opts,
-                   ArrayRef<std::string> plugins,
+  AnalysisConsumer(const Preprocessor &pp, const std::string &outdir,
+                   AnalyzerOptionsRef opts, ArrayRef<std::string> plugins,
                    CodeInjector *injector)
-    : RecVisitorMode(0), RecVisitorBR(nullptr), Ctx(nullptr), PP(pp),
-      OutDir(outdir), Opts(opts), Plugins(plugins), Injector(injector) {
+      : RecVisitorMode(0), RecVisitorBR(nullptr), Ctx(nullptr), PP(pp),
+        OutDir(outdir), Opts(std::move(opts)), Plugins(plugins),
+        Injector(injector) {
     DigestAnalyzerOptions();
     if (Opts->PrintStats) {
       llvm::EnableStatistics();
@@ -274,7 +273,7 @@ public:
       llvm::errs() << ": " << Loc.getFilename();
       if (isa<FunctionDecl>(D) || isa<ObjCMethodDecl>(D)) {
         const NamedDecl *ND = cast<NamedDecl>(D);
-        llvm::errs() << ' ' << *ND << '\n';
+        llvm::errs() << ' ' << ND->getQualifiedNameAsString() << '\n';
       }
       else if (isa<BlockDecl>(D)) {
         llvm::errs() << ' ' << "block(line:" << Loc.getLine() << ",col:"
@@ -799,10 +798,7 @@ UbigraphViz::~UbigraphViz() {
   std::string Ubiviz;
   if (auto Path = llvm::sys::findProgramByName("ubiviz"))
     Ubiviz = *Path;
-  std::vector<const char*> args;
-  args.push_back(Ubiviz.c_str());
-  args.push_back(Filename.c_str());
-  args.push_back(nullptr);
+  const char *args[] = {Ubiviz.c_str(), Filename.c_str(), nullptr};
 
   if (llvm::sys::ExecuteAndWait(Ubiviz, &args[0], nullptr, nullptr, 0, 0,
                                 &ErrMsg)) {
diff --git a/contrib/llvm/tools/clang/lib/Tooling/CommonOptionsParser.cpp b/contrib/llvm/tools/clang/lib/Tooling/CommonOptionsParser.cpp
index 82f560140085..5a44061cbd4c 100644
--- a/contrib/llvm/tools/clang/lib/Tooling/CommonOptionsParser.cpp
+++ b/contrib/llvm/tools/clang/lib/Tooling/CommonOptionsParser.cpp
@@ -62,7 +62,7 @@ public:
       : Compilations(std::move(Compilations)) {}
 
   void appendArgumentsAdjuster(ArgumentsAdjuster Adjuster) {
-    Adjusters.push_back(Adjuster);
+    Adjusters.push_back(std::move(Adjuster));
   }
 
   std::vector<CompileCommand>
@@ -118,6 +118,8 @@ CommonOptionsParser::CommonOptionsParser(
 
   Compilations.reset(FixedCompilationDatabase::loadFromCommandLine(argc, argv));
   cl::ParseCommandLineOptions(argc, argv, Overview);
+  cl::PrintOptionValues();
+
   SourcePathList = SourcePaths;
   if ((OccurrencesFlag == cl::ZeroOrMore || OccurrencesFlag == cl::Optional) &&
       SourcePathList.empty())
diff --git a/contrib/llvm/tools/clang/lib/Tooling/CompilationDatabase.cpp b/contrib/llvm/tools/clang/lib/Tooling/CompilationDatabase.cpp
index 957e40137eac..8fc4a1fe5beb 100644
--- a/contrib/llvm/tools/clang/lib/Tooling/CompilationDatabase.cpp
+++ b/contrib/llvm/tools/clang/lib/Tooling/CompilationDatabase.cpp
@@ -139,9 +139,8 @@ private:
       ;
     }
 
-    for (driver::ActionList::const_iterator I = A->begin(), E = A->end();
-         I != E; ++I)
-      runImpl(*I, CollectChildren);
+    for (const driver::Action *AI : A->inputs())
+      runImpl(AI, CollectChildren);
   }
 };
 
diff --git a/contrib/llvm/tools/clang/lib/Tooling/Core/QualTypeNames.cpp b/contrib/llvm/tools/clang/lib/Tooling/Core/QualTypeNames.cpp
new file mode 100644
index 000000000000..619dae1ee106
--- /dev/null
+++ b/contrib/llvm/tools/clang/lib/Tooling/Core/QualTypeNames.cpp
@@ -0,0 +1,479 @@
+//===------- QualTypeNames.cpp - Generate Complete QualType Names ---------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Tooling/Core/QualTypeNames.h"
+#include "clang/AST/DeclTemplate.h"
+#include "clang/AST/DeclarationName.h"
+#include "clang/AST/GlobalDecl.h"
+#include "clang/AST/Mangle.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/StringRef.h"
+
+#include <stdio.h>
+#include <memory>
+
+namespace clang {
+
+namespace TypeName {
+/// \brief Generates a QualType that can be used to name the same type
+/// if used at the end of the current translation unit. This ignores
+/// issues such as type shadowing.
+///
+/// \param[in] QT - the type for which the fully qualified type will be
+/// returned.
+/// \param[in] Ctx - the ASTContext to be used.
+/// \param[in] WithGlobalNsPrefix - Indicate whether the global namespace
+/// specifier "::" should be prepended or not.
+static QualType getFullyQualifiedType(QualType QT, const ASTContext &Ctx,
+                                      bool WithGlobalNsPrefix);
+
+/// \brief Create a NestedNameSpecifier for Namesp and its enclosing
+/// scopes.
+///
+/// \param[in] Ctx - the AST Context to be used.
+/// \param[in] Namesp - the NamespaceDecl for which a NestedNameSpecifier
+/// is requested.
+/// \param[in] WithGlobalNsPrefix - Indicate whether the global namespace
+/// specifier "::" should be prepended or not.
+static NestedNameSpecifier *createNestedNameSpecifier(
+    const ASTContext &Ctx,
+    const NamespaceDecl *Namesp,
+    bool WithGlobalNsPrefix);
+
+/// \brief Create a NestedNameSpecifier for TagDecl and its enclosing
+/// scopes.
+///
+/// \param[in] Ctx - the AST Context to be used.
+/// \param[in] TD - the TagDecl for which a NestedNameSpecifier is
+/// requested.
+/// \param[in] FullyQualify - Convert all template arguments into fully
+/// qualified names.
+/// \param[in] WithGlobalNsPrefix - Indicate whether the global namespace
+/// specifier "::" should be prepended or not.
+static NestedNameSpecifier *createNestedNameSpecifier(
+    const ASTContext &Ctx, const TypeDecl *TD,
+    bool FullyQualify, bool WithGlobalNsPrefix);
+
+static NestedNameSpecifier *createNestedNameSpecifierForScopeOf(
+    const ASTContext &Ctx, const Decl *decl,
+    bool FullyQualified, bool WithGlobalNsPrefix);
+
+static NestedNameSpecifier *getFullyQualifiedNestedNameSpecifier(
+    const ASTContext &Ctx, NestedNameSpecifier *scope, bool WithGlobalNsPrefix);
+
+static bool getFullyQualifiedTemplateName(const ASTContext &Ctx,
+                                          TemplateName &TName,
+                                          bool WithGlobalNsPrefix) {
+  bool Changed = false;
+  NestedNameSpecifier *NNS = nullptr;
+
+  TemplateDecl *ArgTDecl = TName.getAsTemplateDecl();
+  // ArgTDecl won't be NULL because we asserted that this isn't a
+  // dependent context very early in the call chain.
+  assert(ArgTDecl != nullptr);
+  QualifiedTemplateName *QTName = TName.getAsQualifiedTemplateName();
+
+  if (QTName && !QTName->hasTemplateKeyword()) {
+    NNS = QTName->getQualifier();
+    NestedNameSpecifier *QNNS = getFullyQualifiedNestedNameSpecifier(
+        Ctx, NNS, WithGlobalNsPrefix);
+    if (QNNS != NNS) {
+      Changed = true;
+      NNS = QNNS;
+    } else {
+      NNS = nullptr;
+    }
+  } else {
+    NNS = createNestedNameSpecifierForScopeOf(
+        Ctx, ArgTDecl, true, WithGlobalNsPrefix);
+  }
+  if (NNS) {
+    TName = Ctx.getQualifiedTemplateName(NNS,
+                                         /*TemplateKeyword=*/false, ArgTDecl);
+    Changed = true;
+  }
+  return Changed;
+}
+
+static bool getFullyQualifiedTemplateArgument(const ASTContext &Ctx,
+                                              TemplateArgument &Arg,
+                                              bool WithGlobalNsPrefix) {
+  bool Changed = false;
+
+  // Note: we do not handle TemplateArgument::Expression, to replace it
+  // we need the information for the template instance decl.
+
+  if (Arg.getKind() == TemplateArgument::Template) {
+    TemplateName TName = Arg.getAsTemplate();
+    Changed = getFullyQualifiedTemplateName(Ctx, TName, WithGlobalNsPrefix);
+    if (Changed) {
+      Arg = TemplateArgument(TName);
+    }
+  } else if (Arg.getKind() == TemplateArgument::Type) {
+    QualType SubTy = Arg.getAsType();
+    // Check if the type needs more desugaring and recurse.
+    QualType QTFQ = getFullyQualifiedType(SubTy, Ctx, WithGlobalNsPrefix);
+    if (QTFQ != SubTy) {
+      Arg = TemplateArgument(QTFQ);
+      Changed = true;
+    }
+  }
+  return Changed;
+}
+
+static const Type *getFullyQualifiedTemplateType(const ASTContext &Ctx,
+                                                 const Type *TypePtr,
+                                                 bool WithGlobalNsPrefix) {
+  // DependentTemplateTypes exist within template declarations and
+  // definitions. Therefore we shouldn't encounter them at the end of
+  // a translation unit. If we do, the caller has made an error.
+  assert(!isa<DependentTemplateSpecializationType>(TypePtr));
+  // In case of template specializations, iterate over the arguments
+  // and fully qualify them as well.
+  if (const auto *TST = dyn_cast<const TemplateSpecializationType>(TypePtr)) {
+    bool MightHaveChanged = false;
+    SmallVector<TemplateArgument, 4> FQArgs;
+    for (TemplateSpecializationType::iterator I = TST->begin(), E = TST->end();
+         I != E; ++I) {
+      // Cheap to copy and potentially modified by
+      // getFullyQualifedTemplateArgument.
+      TemplateArgument Arg(*I);
+      MightHaveChanged |= getFullyQualifiedTemplateArgument(
+          Ctx, Arg, WithGlobalNsPrefix);
+      FQArgs.push_back(Arg);
+    }
+
+    // If a fully qualified arg is different from the unqualified arg,
+    // allocate new type in the AST.
+    if (MightHaveChanged) {
+      QualType QT = Ctx.getTemplateSpecializationType(
+          TST->getTemplateName(), FQArgs,
+          TST->getCanonicalTypeInternal());
+      // getTemplateSpecializationType returns a fully qualified
+      // version of the specialization itself, so no need to qualify
+      // it.
+      return QT.getTypePtr();
+    }
+  } else if (const auto *TSTRecord = dyn_cast<const RecordType>(TypePtr)) {
+    // We are asked to fully qualify and we have a Record Type,
+    // which can point to a template instantiation with no sugar in any of
+    // its template argument, however we still need to fully qualify them.
+
+    if (const auto *TSTDecl =
+        dyn_cast<ClassTemplateSpecializationDecl>(TSTRecord->getDecl())) {
+      const TemplateArgumentList &TemplateArgs = TSTDecl->getTemplateArgs();
+
+      bool MightHaveChanged = false;
+      SmallVector<TemplateArgument, 4> FQArgs;
+      for (unsigned int I = 0, E = TemplateArgs.size(); I != E; ++I) {
+        // cheap to copy and potentially modified by
+        // getFullyQualifedTemplateArgument
+        TemplateArgument Arg(TemplateArgs[I]);
+        MightHaveChanged |= getFullyQualifiedTemplateArgument(
+            Ctx, Arg, WithGlobalNsPrefix);
+        FQArgs.push_back(Arg);
+      }
+
+      // If a fully qualified arg is different from the unqualified arg,
+      // allocate new type in the AST.
+      if (MightHaveChanged) {
+        TemplateName TN(TSTDecl->getSpecializedTemplate());
+        QualType QT = Ctx.getTemplateSpecializationType(
+            TN, FQArgs,
+            TSTRecord->getCanonicalTypeInternal());
+        // getTemplateSpecializationType returns a fully qualified
+        // version of the specialization itself, so no need to qualify
+        // it.
+        return QT.getTypePtr();
+      }
+    }
+  }
+  return TypePtr;
+}
+
+static NestedNameSpecifier *createOuterNNS(const ASTContext &Ctx, const Decl *D,
+                                           bool FullyQualify,
+                                           bool WithGlobalNsPrefix) {
+  const DeclContext *DC = D->getDeclContext();
+  if (const auto *NS = dyn_cast<NamespaceDecl>(DC)) {
+    while (NS && NS->isInline()) {
+      // Ignore inline namespace;
+      NS = dyn_cast<NamespaceDecl>(NS->getDeclContext());
+    }
+    if (NS->getDeclName()) {
+      return createNestedNameSpecifier(Ctx, NS, WithGlobalNsPrefix);
+    }
+    return nullptr;  // no starting '::', no anonymous
+  } else if (const auto *TD = dyn_cast<TagDecl>(DC)) {
+    return createNestedNameSpecifier(Ctx, TD, FullyQualify, WithGlobalNsPrefix);
+  } else if (const auto *TDD = dyn_cast<TypedefNameDecl>(DC)) {
+    return createNestedNameSpecifier(
+        Ctx, TDD, FullyQualify, WithGlobalNsPrefix);
+  } else if (WithGlobalNsPrefix && DC->isTranslationUnit()) {
+    return NestedNameSpecifier::GlobalSpecifier(Ctx);
+  }
+  return nullptr;  // no starting '::' if |WithGlobalNsPrefix| is false
+}
+
+/// \brief Return a fully qualified version of this name specifier.
+static NestedNameSpecifier *getFullyQualifiedNestedNameSpecifier(
+    const ASTContext &Ctx, NestedNameSpecifier *Scope,
+    bool WithGlobalNsPrefix) {
+  switch (Scope->getKind()) {
+    case NestedNameSpecifier::Global:
+      // Already fully qualified
+      return Scope;
+    case NestedNameSpecifier::Namespace:
+      return TypeName::createNestedNameSpecifier(
+          Ctx, Scope->getAsNamespace(), WithGlobalNsPrefix);
+    case NestedNameSpecifier::NamespaceAlias:
+      // Namespace aliases are only valid for the duration of the
+      // scope where they were introduced, and therefore are often
+      // invalid at the end of the TU.  So use the namespace name more
+      // likely to be valid at the end of the TU.
+      return TypeName::createNestedNameSpecifier(
+          Ctx,
+          Scope->getAsNamespaceAlias()->getNamespace()->getCanonicalDecl(),
+          WithGlobalNsPrefix);
+    case NestedNameSpecifier::Identifier:
+      // A function or some other construct that makes it un-namable
+      // at the end of the TU. Skip the current component of the name,
+      // but use the name of it's prefix.
+      return getFullyQualifiedNestedNameSpecifier(
+          Ctx, Scope->getPrefix(), WithGlobalNsPrefix);
+    case NestedNameSpecifier::Super:
+    case NestedNameSpecifier::TypeSpec:
+    case NestedNameSpecifier::TypeSpecWithTemplate: {
+      const Type *Type = Scope->getAsType();
+      // Find decl context.
+      const TagDecl *TD = nullptr;
+      if (const TagType *TagDeclType = Type->getAs<TagType>()) {
+        TD = TagDeclType->getDecl();
+      } else {
+        TD = Type->getAsCXXRecordDecl();
+      }
+      if (TD) {
+        return TypeName::createNestedNameSpecifier(Ctx, TD,
+                                                   true /*FullyQualified*/,
+                                                   WithGlobalNsPrefix);
+      } else if (const auto *TDD = dyn_cast<TypedefType>(Type)) {
+        return TypeName::createNestedNameSpecifier(Ctx, TDD->getDecl(),
+                                                   true /*FullyQualified*/,
+                                                   WithGlobalNsPrefix);
+      }
+      return Scope;
+    }
+  }
+  llvm_unreachable("bad NNS kind");
+}
+
+/// \brief Create a nested name specifier for the declaring context of
+/// the type.
+static NestedNameSpecifier *createNestedNameSpecifierForScopeOf(
+    const ASTContext &Ctx, const Decl *Decl,
+    bool FullyQualified, bool WithGlobalNsPrefix) {
+  assert(Decl);
+
+  const DeclContext *DC = Decl->getDeclContext()->getRedeclContext();
+  const auto *Outer = dyn_cast_or_null<NamedDecl>(DC);
+  const auto *OuterNS = dyn_cast_or_null<NamespaceDecl>(DC);
+  if (Outer && !(OuterNS && OuterNS->isAnonymousNamespace())) {
+    if (const auto *CxxDecl = dyn_cast<CXXRecordDecl>(DC)) {
+      if (ClassTemplateDecl *ClassTempl =
+              CxxDecl->getDescribedClassTemplate()) {
+        // We are in the case of a type(def) that was declared in a
+        // class template but is *not* type dependent.  In clang, it
+        // gets attached to the class template declaration rather than
+        // any specific class template instantiation.  This result in
+        // 'odd' fully qualified typename:
+        //
+        //    vector<_Tp,_Alloc>::size_type
+        //
+        // Make the situation is 'useable' but looking a bit odd by
+        // picking a random instance as the declaring context.
+        if (ClassTempl->spec_begin() != ClassTempl->spec_end()) {
+          Decl = *(ClassTempl->spec_begin());
+          Outer = dyn_cast<NamedDecl>(Decl);
+          OuterNS = dyn_cast<NamespaceDecl>(Decl);
+        }
+      }
+    }
+
+    if (OuterNS) {
+      return createNestedNameSpecifier(Ctx, OuterNS, WithGlobalNsPrefix);
+    } else if (const auto *TD = dyn_cast<TagDecl>(Outer)) {
+      return createNestedNameSpecifier(
+          Ctx, TD, FullyQualified, WithGlobalNsPrefix);
+    } else if (dyn_cast<TranslationUnitDecl>(Outer)) {
+      // Context is the TU. Nothing needs to be done.
+      return nullptr;
+    } else {
+      // Decl's context was neither the TU, a namespace, nor a
+      // TagDecl, which means it is a type local to a scope, and not
+      // accessible at the end of the TU.
+      return nullptr;
+    }
+  } else if (WithGlobalNsPrefix && DC->isTranslationUnit()) {
+    return NestedNameSpecifier::GlobalSpecifier(Ctx);
+  }
+  return nullptr;
+}
+
+/// \brief Create a nested name specifier for the declaring context of
+/// the type.
+static NestedNameSpecifier *createNestedNameSpecifierForScopeOf(
+    const ASTContext &Ctx, const Type *TypePtr,
+    bool FullyQualified, bool WithGlobalNsPrefix) {
+  if (!TypePtr) return nullptr;
+
+  Decl *Decl = nullptr;
+  // There are probably other cases ...
+  if (const auto *TDT = dyn_cast<TypedefType>(TypePtr)) {
+    Decl = TDT->getDecl();
+  } else if (const auto *TagDeclType = dyn_cast<TagType>(TypePtr)) {
+    Decl = TagDeclType->getDecl();
+  } else if (const auto *TST = dyn_cast<TemplateSpecializationType>(TypePtr)) {
+    Decl = TST->getTemplateName().getAsTemplateDecl();
+  } else {
+    Decl = TypePtr->getAsCXXRecordDecl();
+  }
+
+  if (!Decl) return nullptr;
+
+  return createNestedNameSpecifierForScopeOf(
+      Ctx, Decl, FullyQualified, WithGlobalNsPrefix);
+}
+
+NestedNameSpecifier *createNestedNameSpecifier(const ASTContext &Ctx,
+                                               const NamespaceDecl *Namespace,
+                                               bool WithGlobalNsPrefix) {
+  while (Namespace && Namespace->isInline()) {
+    // Ignore inline namespace;
+    Namespace = dyn_cast<NamespaceDecl>(Namespace->getDeclContext());
+  }
+  if (!Namespace) return nullptr;
+
+  bool FullyQualified = true;  // doesn't matter, DeclContexts are namespaces
+  return NestedNameSpecifier::Create(
+      Ctx,
+      createOuterNNS(Ctx, Namespace, FullyQualified, WithGlobalNsPrefix),
+      Namespace);
+}
+
+NestedNameSpecifier *createNestedNameSpecifier(const ASTContext &Ctx,
+                                               const TypeDecl *TD,
+                                               bool FullyQualify,
+                                               bool WithGlobalNsPrefix) {
+  return NestedNameSpecifier::Create(
+      Ctx,
+      createOuterNNS(Ctx, TD, FullyQualify, WithGlobalNsPrefix),
+      false /*No TemplateKeyword*/,
+      TD->getTypeForDecl());
+}
+
+/// \brief Return the fully qualified type, including fully-qualified
+/// versions of any template parameters.
+QualType getFullyQualifiedType(QualType QT, const ASTContext &Ctx,
+                               bool WithGlobalNsPrefix) {
+  // In case of myType* we need to strip the pointer first, fully
+  // qualify and attach the pointer once again.
+  if (isa<PointerType>(QT.getTypePtr())) {
+    // Get the qualifiers.
+    Qualifiers Quals = QT.getQualifiers();
+    QT = getFullyQualifiedType(QT->getPointeeType(), Ctx, WithGlobalNsPrefix);
+    QT = Ctx.getPointerType(QT);
+    // Add back the qualifiers.
+    QT = Ctx.getQualifiedType(QT, Quals);
+    return QT;
+  }
+
+  // In case of myType& we need to strip the reference first, fully
+  // qualify and attach the reference once again.
+  if (isa<ReferenceType>(QT.getTypePtr())) {
+    // Get the qualifiers.
+    bool IsLValueRefTy = isa<LValueReferenceType>(QT.getTypePtr());
+    Qualifiers Quals = QT.getQualifiers();
+    QT = getFullyQualifiedType(QT->getPointeeType(), Ctx, WithGlobalNsPrefix);
+    // Add the r- or l-value reference type back to the fully
+    // qualified one.
+    if (IsLValueRefTy)
+      QT = Ctx.getLValueReferenceType(QT);
+    else
+      QT = Ctx.getRValueReferenceType(QT);
+    // Add back the qualifiers.
+    QT = Ctx.getQualifiedType(QT, Quals);
+    return QT;
+  }
+
+  // Remove the part of the type related to the type being a template
+  // parameter (we won't report it as part of the 'type name' and it
+  // is actually make the code below to be more complex (to handle
+  // those)
+  while (isa<SubstTemplateTypeParmType>(QT.getTypePtr())) {
+    // Get the qualifiers.
+    Qualifiers Quals = QT.getQualifiers();
+
+    QT = dyn_cast<SubstTemplateTypeParmType>(QT.getTypePtr())->desugar();
+
+    // Add back the qualifiers.
+    QT = Ctx.getQualifiedType(QT, Quals);
+  }
+
+  NestedNameSpecifier *Prefix = nullptr;
+  // Local qualifiers are attached to the QualType outside of the
+  // elaborated type.  Retrieve them before descending into the
+  // elaborated type.
+  Qualifiers PrefixQualifiers = QT.getLocalQualifiers();
+  QT = QualType(QT.getTypePtr(), 0);
+  ElaboratedTypeKeyword Keyword = ETK_None;
+  if (const auto *ETypeInput = dyn_cast<ElaboratedType>(QT.getTypePtr())) {
+    QT = ETypeInput->getNamedType();
+    assert(!QT.hasLocalQualifiers());
+    Keyword = ETypeInput->getKeyword();
+  }
+  // Create a nested name specifier if needed.
+  Prefix = createNestedNameSpecifierForScopeOf(Ctx, QT.getTypePtr(),
+                                               true /*FullyQualified*/,
+                                               WithGlobalNsPrefix);
+
+  // In case of template specializations iterate over the arguments and
+  // fully qualify them as well.
+  if (isa<const TemplateSpecializationType>(QT.getTypePtr()) ||
+      isa<const RecordType>(QT.getTypePtr())) {
+    // We are asked to fully qualify and we have a Record Type (which
+    // may point to a template specialization) or Template
+    // Specialization Type. We need to fully qualify their arguments.
+
+    const Type *TypePtr = getFullyQualifiedTemplateType(
+        Ctx, QT.getTypePtr(), WithGlobalNsPrefix);
+    QT = QualType(TypePtr, 0);
+  }
+  if (Prefix || Keyword != ETK_None) {
+    QT = Ctx.getElaboratedType(Keyword, Prefix, QT);
+  }
+  QT = Ctx.getQualifiedType(QT, PrefixQualifiers);
+  return QT;
+}
+
+std::string getFullyQualifiedName(QualType QT,
+                                  const ASTContext &Ctx,
+                                  bool WithGlobalNsPrefix) {
+  PrintingPolicy Policy(Ctx.getPrintingPolicy());
+  Policy.SuppressScope = false;
+  Policy.AnonymousTagLocations = false;
+  Policy.PolishForDeclaration = true;
+  Policy.SuppressUnwrittenScope = true;
+  QualType FQQT = getFullyQualifiedType(QT, Ctx, WithGlobalNsPrefix);
+  return FQQT.getAsString(Policy);
+}
+
+}  // end namespace TypeName
+}  // end namespace clang
diff --git a/contrib/llvm/tools/clang/lib/Tooling/Core/Replacement.cpp b/contrib/llvm/tools/clang/lib/Tooling/Core/Replacement.cpp
index 47bbdeb470ee..4f130709ac16 100644
--- a/contrib/llvm/tools/clang/lib/Tooling/Core/Replacement.cpp
+++ b/contrib/llvm/tools/clang/lib/Tooling/Core/Replacement.cpp
@@ -11,6 +11,8 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "clang/Tooling/Core/Replacement.h"
+
 #include "clang/Basic/Diagnostic.h"
 #include "clang/Basic/DiagnosticIDs.h"
 #include "clang/Basic/DiagnosticOptions.h"
@@ -18,7 +20,6 @@
 #include "clang/Basic/SourceManager.h"
 #include "clang/Lex/Lexer.h"
 #include "clang/Rewrite/Core/Rewriter.h"
-#include "clang/Tooling/Core/Replacement.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/Path.h"
 #include "llvm/Support/raw_os_ostream.h"
@@ -57,14 +58,8 @@ bool Replacement::apply(Rewriter &Rewrite) const {
   const FileEntry *Entry = SM.getFileManager().getFile(FilePath);
   if (!Entry)
     return false;
-  FileID ID;
-  // FIXME: Use SM.translateFile directly.
-  SourceLocation Location = SM.translateFileLineCol(Entry, 1, 1);
-  ID = Location.isValid() ?
-    SM.getFileID(Location) :
-    SM.createFileID(Entry, SourceLocation(), SrcMgr::C_User);
-  // FIXME: We cannot check whether Offset + Length is in the file, as
-  // the remapping API is not public in the RewriteBuffer.
+
+  FileID ID = SM.getOrCreateFileID(Entry, SrcMgr::C_User);
   const SourceLocation Start =
     SM.getLocForStartOfFile(ID).
     getLocWithOffset(ReplacementRange.getOffset());
@@ -254,7 +249,11 @@ bool applyAllReplacements(const std::vector<Replacement> &Replaces,
   return Result;
 }
 
-std::string applyAllReplacements(StringRef Code, const Replacements &Replaces) {
+llvm::Expected<std::string> applyAllReplacements(StringRef Code,
+                                                const Replacements &Replaces) {
+  if (Replaces.empty())
+    return Code.str();
+
   IntrusiveRefCntPtr<vfs::InMemoryFileSystem> InMemoryFileSystem(
       new vfs::InMemoryFileSystem);
   FileManager Files(FileSystemOptions(), InMemoryFileSystem);
@@ -272,7 +271,9 @@ std::string applyAllReplacements(StringRef Code, const Replacements &Replaces) {
     Replacement Replace("<stdin>", I->getOffset(), I->getLength(),
                         I->getReplacementText());
     if (!Replace.apply(Rewrite))
-      return "";
+      return llvm::make_error<llvm::StringError>(
+          "Failed to apply replacement: " + Replace.toString(),
+          llvm::inconvertibleErrorCode());
   }
   std::string Result;
   llvm::raw_string_ostream OS(Result);
@@ -281,6 +282,55 @@ std::string applyAllReplacements(StringRef Code, const Replacements &Replaces) {
   return Result;
 }
 
+// Merge and sort overlapping ranges in \p Ranges.
+static std::vector<Range> mergeAndSortRanges(std::vector<Range> Ranges) {
+  std::sort(Ranges.begin(), Ranges.end(),
+            [](const Range &LHS, const Range &RHS) {
+              if (LHS.getOffset() != RHS.getOffset())
+                return LHS.getOffset() < RHS.getOffset();
+              return LHS.getLength() < RHS.getLength();
+            });
+  std::vector<Range> Result;
+  for (const auto &R : Ranges) {
+    if (Result.empty() ||
+        Result.back().getOffset() + Result.back().getLength() < R.getOffset()) {
+      Result.push_back(R);
+    } else {
+      unsigned NewEnd =
+          std::max(Result.back().getOffset() + Result.back().getLength(),
+                   R.getOffset() + R.getLength());
+      Result[Result.size() - 1] =
+          Range(Result.back().getOffset(), NewEnd - Result.back().getOffset());
+    }
+  }
+  return Result;
+}
+
+std::vector<Range> calculateChangedRanges(const Replacements &Replaces) {
+  std::vector<Range> ChangedRanges;
+  int Shift = 0;
+  for (const Replacement &R : Replaces) {
+    unsigned Offset = R.getOffset() + Shift;
+    unsigned Length = R.getReplacementText().size();
+    Shift += Length - R.getLength();
+    ChangedRanges.push_back(Range(Offset, Length));
+  }
+  return mergeAndSortRanges(ChangedRanges);
+}
+
+std::vector<Range>
+calculateRangesAfterReplacements(const Replacements &Replaces,
+                                 const std::vector<Range> &Ranges) {
+  auto MergedRanges = mergeAndSortRanges(Ranges);
+  tooling::Replacements FakeReplaces;
+  for (const auto &R : MergedRanges)
+    FakeReplaces.insert(Replacement(Replaces.begin()->getFilePath(),
+                                    R.getOffset(), R.getLength(),
+                                    std::string(R.getLength(), ' ')));
+  tooling::Replacements NewReplaces = mergeReplacements(FakeReplaces, Replaces);
+  return calculateChangedRanges(NewReplaces);
+}
+
 namespace {
 // Represents a merged replacement, i.e. a replacement consisting of multiple
 // overlapping replacements from 'First' and 'Second' in mergeReplacements.
@@ -314,7 +364,7 @@ public:
 
   // Merges the next element 'R' into this merged element. As we always merge
   // from 'First' into 'Second' or vice versa, the MergedReplacement knows what
-  // set the next element is coming from. 
+  // set the next element is coming from.
   void merge(const Replacement &R) {
     if (MergeSecond) {
       unsigned REnd = R.getOffset() + Delta + R.getLength();
@@ -377,6 +427,15 @@ private:
 };
 } // namespace
 
+std::map<std::string, Replacements>
+groupReplacementsByFile(const Replacements &Replaces) {
+  std::map<std::string, Replacements> FileToReplaces;
+  for (const auto &Replace : Replaces) {
+    FileToReplaces[Replace.getFilePath()].insert(Replace);
+  }
+  return FileToReplaces;
+}
+
 Replacements mergeReplacements(const Replacements &First,
                                const Replacements &Second) {
   if (First.empty() || Second.empty())
@@ -416,4 +475,3 @@ Replacements mergeReplacements(const Replacements &First,
 
 } // end namespace tooling
 } // end namespace clang
-
diff --git a/contrib/llvm/tools/clang/lib/Tooling/FixIt.cpp b/contrib/llvm/tools/clang/lib/Tooling/FixIt.cpp
new file mode 100644
index 000000000000..70942c5ac845
--- /dev/null
+++ b/contrib/llvm/tools/clang/lib/Tooling/FixIt.cpp
@@ -0,0 +1,31 @@
+//===--- FixIt.cpp - FixIt Hint utilities -----------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains implementations of utitilies to ease source code rewriting
+// by providing helper functions related to FixItHint.
+//
+//===----------------------------------------------------------------------===//
+#include "clang/Tooling/FixIt.h"
+#include "clang/Lex/Lexer.h"
+
+namespace clang {
+namespace tooling {
+namespace fixit {
+
+namespace internal {
+StringRef getText(SourceRange Range, const ASTContext &Context) {
+  return Lexer::getSourceText(CharSourceRange::getTokenRange(Range),
+                              Context.getSourceManager(),
+                              Context.getLangOpts());
+}
+} // end namespace internal
+
+} // end namespace fixit
+} // end namespace tooling
+} // end namespace clang
diff --git a/contrib/llvm/tools/clang/lib/Tooling/Refactoring.cpp b/contrib/llvm/tools/clang/lib/Tooling/Refactoring.cpp
index d32452f6f293..28d535aeb45f 100644
--- a/contrib/llvm/tools/clang/lib/Tooling/Refactoring.cpp
+++ b/contrib/llvm/tools/clang/lib/Tooling/Refactoring.cpp
@@ -14,6 +14,7 @@
 #include "clang/Basic/DiagnosticOptions.h"
 #include "clang/Basic/FileManager.h"
 #include "clang/Basic/SourceManager.h"
+#include "clang/Format/Format.h"
 #include "clang/Frontend/TextDiagnosticPrinter.h"
 #include "clang/Lex/Lexer.h"
 #include "clang/Rewrite/Core/Rewriter.h"
@@ -61,5 +62,33 @@ int RefactoringTool::saveRewrittenFiles(Rewriter &Rewrite) {
   return Rewrite.overwriteChangedFiles() ? 1 : 0;
 }
 
+bool formatAndApplyAllReplacements(const Replacements &Replaces,
+                                   Rewriter &Rewrite, StringRef Style) {
+  SourceManager &SM = Rewrite.getSourceMgr();
+  FileManager &Files = SM.getFileManager();
+
+  auto FileToReplaces = groupReplacementsByFile(Replaces);
+
+  bool Result = true;
+  for (const auto &FileAndReplaces : FileToReplaces) {
+    const std::string &FilePath = FileAndReplaces.first;
+    auto &CurReplaces = FileAndReplaces.second;
+
+    const FileEntry *Entry = Files.getFile(FilePath);
+    FileID ID = SM.getOrCreateFileID(Entry, SrcMgr::C_User);
+    StringRef Code = SM.getBufferData(ID);
+
+    format::FormatStyle CurStyle = format::getStyle(Style, FilePath, "LLVM");
+    auto NewReplacements =
+        format::formatReplacements(Code, CurReplaces, CurStyle);
+    if (!NewReplacements) {
+      llvm::errs() << llvm::toString(NewReplacements.takeError()) << "\n";
+      return false;
+    }
+    Result = applyAllReplacements(*NewReplacements, Rewrite) && Result;
+  }
+  return Result;
+}
+
 } // end namespace tooling
 } // end namespace clang
diff --git a/contrib/llvm/tools/clang/lib/Tooling/Tooling.cpp b/contrib/llvm/tools/clang/lib/Tooling/Tooling.cpp
index fd5596ec2ded..4c7fed1e617c 100644
--- a/contrib/llvm/tools/clang/lib/Tooling/Tooling.cpp
+++ b/contrib/llvm/tools/clang/lib/Tooling/Tooling.cpp
@@ -31,6 +31,7 @@
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/Host.h"
 #include "llvm/Support/raw_ostream.h"
+#include <utility>
 
 #define DEBUG_TYPE "clang-tooling"
 
@@ -49,8 +50,9 @@ FrontendActionFactory::~FrontendActionFactory() {}
 static clang::driver::Driver *newDriver(
     clang::DiagnosticsEngine *Diagnostics, const char *BinaryName,
     IntrusiveRefCntPtr<vfs::FileSystem> VFS) {
-  clang::driver::Driver *CompilerDriver = new clang::driver::Driver(
-      BinaryName, llvm::sys::getDefaultTargetTriple(), *Diagnostics, VFS);
+  clang::driver::Driver *CompilerDriver =
+      new clang::driver::Driver(BinaryName, llvm::sys::getDefaultTargetTriple(),
+                                *Diagnostics, std::move(VFS));
   CompilerDriver->setTitle("clang_based_tool");
   return CompilerDriver;
 }
@@ -103,14 +105,16 @@ bool runToolOnCode(clang::FrontendAction *ToolAction, const Twine &Code,
                    const Twine &FileName,
                    std::shared_ptr<PCHContainerOperations> PCHContainerOps) {
   return runToolOnCodeWithArgs(ToolAction, Code, std::vector<std::string>(),
-                               FileName, PCHContainerOps);
+                               FileName, "clang-tool",
+                               std::move(PCHContainerOps));
 }
 
 static std::vector<std::string>
-getSyntaxOnlyToolArgs(const std::vector<std::string> &ExtraArgs,
+getSyntaxOnlyToolArgs(const Twine &ToolName,
+                      const std::vector<std::string> &ExtraArgs,
                       StringRef FileName) {
   std::vector<std::string> Args;
-  Args.push_back("clang-tool");
+  Args.push_back(ToolName.str());
   Args.push_back("-fsyntax-only");
   Args.insert(Args.end(), ExtraArgs.begin(), ExtraArgs.end());
   Args.push_back(FileName.str());
@@ -120,6 +124,7 @@ getSyntaxOnlyToolArgs(const std::vector<std::string> &ExtraArgs,
 bool runToolOnCodeWithArgs(
     clang::FrontendAction *ToolAction, const Twine &Code,
     const std::vector<std::string> &Args, const Twine &FileName,
+    const Twine &ToolName,
     std::shared_ptr<PCHContainerOperations> PCHContainerOps,
     const FileContentMappings &VirtualMappedFiles) {
 
@@ -132,8 +137,9 @@ bool runToolOnCodeWithArgs(
   OverlayFileSystem->pushOverlay(InMemoryFileSystem);
   llvm::IntrusiveRefCntPtr<FileManager> Files(
       new FileManager(FileSystemOptions(), OverlayFileSystem));
-  ToolInvocation Invocation(getSyntaxOnlyToolArgs(Args, FileNameRef),
-                            ToolAction, Files.get(), PCHContainerOps);
+  ToolInvocation Invocation(getSyntaxOnlyToolArgs(ToolName, Args, FileNameRef),
+                            ToolAction, Files.get(),
+                            std::move(PCHContainerOps));
 
   SmallString<1024> CodeStorage;
   InMemoryFileSystem->addFile(FileNameRef, 0,
@@ -206,14 +212,16 @@ ToolInvocation::ToolInvocation(
     std::vector<std::string> CommandLine, ToolAction *Action,
     FileManager *Files, std::shared_ptr<PCHContainerOperations> PCHContainerOps)
     : CommandLine(std::move(CommandLine)), Action(Action), OwnsAction(false),
-      Files(Files), PCHContainerOps(PCHContainerOps), DiagConsumer(nullptr) {}
+      Files(Files), PCHContainerOps(std::move(PCHContainerOps)),
+      DiagConsumer(nullptr) {}
 
 ToolInvocation::ToolInvocation(
     std::vector<std::string> CommandLine, FrontendAction *FAction,
     FileManager *Files, std::shared_ptr<PCHContainerOperations> PCHContainerOps)
     : CommandLine(std::move(CommandLine)),
       Action(new SingleFrontendActionFactory(FAction)), OwnsAction(true),
-      Files(Files), PCHContainerOps(PCHContainerOps), DiagConsumer(nullptr) {}
+      Files(Files), PCHContainerOps(std::move(PCHContainerOps)),
+      DiagConsumer(nullptr) {}
 
 ToolInvocation::~ToolInvocation() {
   if (OwnsAction)
@@ -260,7 +268,7 @@ bool ToolInvocation::run() {
                                                       Input.release());
   }
   return runInvocation(BinaryName, Compilation.get(), Invocation.release(),
-                       PCHContainerOps);
+                       std::move(PCHContainerOps));
 }
 
 bool ToolInvocation::runInvocation(
@@ -274,7 +282,7 @@ bool ToolInvocation::runInvocation(
     llvm::errs() << "\n";
   }
 
-  return Action->runInvocation(Invocation, Files, PCHContainerOps,
+  return Action->runInvocation(Invocation, Files, std::move(PCHContainerOps),
                                DiagConsumer);
 }
 
@@ -283,7 +291,7 @@ bool FrontendActionFactory::runInvocation(
     std::shared_ptr<PCHContainerOperations> PCHContainerOps,
     DiagnosticConsumer *DiagConsumer) {
   // Create a compiler instance to handle the actual work.
-  clang::CompilerInstance Compiler(PCHContainerOps);
+  clang::CompilerInstance Compiler(std::move(PCHContainerOps));
   Compiler.setInvocation(Invocation);
   Compiler.setFileManager(Files);
 
@@ -309,7 +317,7 @@ ClangTool::ClangTool(const CompilationDatabase &Compilations,
                      ArrayRef<std::string> SourcePaths,
                      std::shared_ptr<PCHContainerOperations> PCHContainerOps)
     : Compilations(Compilations), SourcePaths(SourcePaths),
-      PCHContainerOps(PCHContainerOps),
+      PCHContainerOps(std::move(PCHContainerOps)),
       OverlayFileSystem(new vfs::OverlayFileSystem(vfs::getRealFileSystem())),
       InMemoryFileSystem(new vfs::InMemoryFileSystem),
       Files(new FileManager(FileSystemOptions(), OverlayFileSystem)),
@@ -327,26 +335,32 @@ void ClangTool::mapVirtualFile(StringRef FilePath, StringRef Content) {
 
 void ClangTool::appendArgumentsAdjuster(ArgumentsAdjuster Adjuster) {
   if (ArgsAdjuster)
-    ArgsAdjuster = combineAdjusters(ArgsAdjuster, Adjuster);
+    ArgsAdjuster =
+        combineAdjusters(std::move(ArgsAdjuster), std::move(Adjuster));
   else
-    ArgsAdjuster = Adjuster;
+    ArgsAdjuster = std::move(Adjuster);
 }
 
 void ClangTool::clearArgumentsAdjusters() {
   ArgsAdjuster = nullptr;
 }
 
+static void injectResourceDir(CommandLineArguments &Args, const char *Argv0,
+                              void *MainAddr) {
+  // Allow users to override the resource dir.
+  for (StringRef Arg : Args)
+    if (Arg.startswith("-resource-dir"))
+      return;
+
+  // If there's no override in place add our resource dir.
+  Args.push_back("-resource-dir=" +
+                 CompilerInvocation::GetResourcesPath(Argv0, MainAddr));
+}
+
 int ClangTool::run(ToolAction *Action) {
   // Exists solely for the purpose of lookup of the resource path.
   // This just needs to be some symbol in the binary.
   static int StaticSymbol;
-  // The driver detects the builtin header path based on the path of the
-  // executable.
-  // FIXME: On linux, GetMainExecutable is independent of the value of the
-  // first argument, thus allowing ClangTool and runToolOnCode to just
-  // pass in made-up names here. Make sure this works on other platforms.
-  std::string MainExecutable =
-      llvm::sys::fs::getMainExecutable("clang_tool", &StaticSymbol);
 
   llvm::SmallString<128> InitialDirectory;
   if (std::error_code EC = llvm::sys::fs::current_path(InitialDirectory))
@@ -411,7 +425,17 @@ int ClangTool::run(ToolAction *Action) {
       if (ArgsAdjuster)
         CommandLine = ArgsAdjuster(CommandLine, CompileCommand.Filename);
       assert(!CommandLine.empty());
-      CommandLine[0] = MainExecutable;
+
+      // Add the resource dir based on the binary of this tool. argv[0] in the
+      // compilation database may refer to a different compiler and we want to
+      // pick up the very same standard library that compiler is using. The
+      // builtin headers in the resource dir need to match the exact clang
+      // version the tool is using.
+      // FIXME: On linux, GetMainExecutable is independent of the value of the
+      // first argument, thus allowing ClangTool and runToolOnCode to just
+      // pass in made-up names here. Make sure this works on other platforms.
+      injectResourceDir(CommandLine, "clang_tool", &StaticSymbol);
+
       // FIXME: We need a callback mechanism for the tool writer to output a
       // customized message for each file.
       DEBUG({ llvm::dbgs() << "Processing: " << File << ".\n"; });
@@ -446,7 +470,7 @@ public:
                      std::shared_ptr<PCHContainerOperations> PCHContainerOps,
                      DiagnosticConsumer *DiagConsumer) override {
     std::unique_ptr<ASTUnit> AST = ASTUnit::LoadFromCompilerInvocation(
-        Invocation, PCHContainerOps,
+        Invocation, std::move(PCHContainerOps),
         CompilerInstance::createDiagnostics(&Invocation->getDiagnosticOpts(),
                                             DiagConsumer,
                                             /*ShouldOwnClient=*/false),
@@ -458,7 +482,6 @@ public:
     return true;
   }
 };
-
 }
 
 int ClangTool::buildASTs(std::vector<std::unique_ptr<ASTUnit>> &ASTs) {
@@ -470,12 +493,12 @@ std::unique_ptr<ASTUnit>
 buildASTFromCode(const Twine &Code, const Twine &FileName,
                  std::shared_ptr<PCHContainerOperations> PCHContainerOps) {
   return buildASTFromCodeWithArgs(Code, std::vector<std::string>(), FileName,
-                                  PCHContainerOps);
+                                  "clang-tool", std::move(PCHContainerOps));
 }
 
 std::unique_ptr<ASTUnit> buildASTFromCodeWithArgs(
     const Twine &Code, const std::vector<std::string> &Args,
-    const Twine &FileName,
+    const Twine &FileName, const Twine &ToolName,
     std::shared_ptr<PCHContainerOperations> PCHContainerOps) {
   SmallString<16> FileNameStorage;
   StringRef FileNameRef = FileName.toNullTerminatedStringRef(FileNameStorage);
@@ -489,8 +512,8 @@ std::unique_ptr<ASTUnit> buildASTFromCodeWithArgs(
   OverlayFileSystem->pushOverlay(InMemoryFileSystem);
   llvm::IntrusiveRefCntPtr<FileManager> Files(
       new FileManager(FileSystemOptions(), OverlayFileSystem));
-  ToolInvocation Invocation(getSyntaxOnlyToolArgs(Args, FileNameRef), &Action,
-                            Files.get(), PCHContainerOps);
+  ToolInvocation Invocation(getSyntaxOnlyToolArgs(ToolName, Args, FileNameRef),
+                            &Action, Files.get(), std::move(PCHContainerOps));
 
   SmallString<1024> CodeStorage;
   InMemoryFileSystem->addFile(FileNameRef, 0,
author	Dimitry Andric <dim@FreeBSD.org>	2016-12-26 20:36:37 +0000
committer	Dimitry Andric <dim@FreeBSD.org>	2016-12-26 20:36:37 +0000
commit	b6d42e34c27d79488e27db71466f4e5cece05910 (patch)
tree	ab60b4cdd6e430dda1f292a46a77ddb744723f31 /contrib/llvm/tools/clang/lib
parent	d76705554f5443404be5a5e89f2f5f5ebf42cf98 (diff)
download	src-test2-b6d42e34c27d79488e27db71466f4e5cece05910.tar.gz src-test2-b6d42e34c27d79488e27db71466f4e5cece05910.zip